2 /********************************************************************
4 * Copyright (c) 2001-2011, International Business Machines Corporation and
5 * others. All Rights Reserved.
6 ********************************************************************/
7 /*******************************************************************************
11 *******************************************************************************/
13 * These are the tests specific to ICU 1.8 and above, that I didn't know where
19 #include "unicode/utypes.h"
21 #if !UCONFIG_NO_COLLATION
23 #include "unicode/ucol.h"
24 #include "unicode/ucoleitr.h"
25 #include "unicode/uloc.h"
29 #include "unicode/ustring.h"
36 #include "unicode/parseerr.h"
37 #include "unicode/ucnv.h"
38 #include "unicode/ures.h"
39 #include "unicode/uscript.h"
44 #define LEN(a) (sizeof(a)/sizeof(a[0]))
46 #define MAX_TOKEN_LEN 16
48 typedef UCollationResult
tst_strcoll(void *collator
, const int object
,
49 const UChar
*source
, const int sLen
,
50 const UChar
*target
, const int tLen
);
54 const static char cnt1
[][10] = {
69 const static char cnt2
[][10] = {
81 static void IncompleteCntTest(void)
83 UErrorCode status
= U_ZERO_ERROR
;
88 UCollator
*coll
= NULL
;
89 uint32_t i
= 0, j
= 0;
92 u_uastrcpy(temp
, " & Z < ABC < Q < B");
94 coll
= ucol_openRules(temp
, u_strlen(temp
), UCOL_OFF
, UCOL_DEFAULT_STRENGTH
, NULL
,&status
);
96 if(U_SUCCESS(status
)) {
97 size
= sizeof(cnt1
)/sizeof(cnt1
[0]);
98 for(i
= 0; i
< size
-1; i
++) {
99 for(j
= i
+1; j
< size
; j
++) {
100 UCollationElements
*iter
;
101 u_uastrcpy(t1
, cnt1
[i
]);
102 u_uastrcpy(t2
, cnt1
[j
]);
103 doTest(coll
, t1
, t2
, UCOL_LESS
);
104 /* synwee : added collation element iterator test */
105 iter
= ucol_openElements(coll
, t2
, u_strlen(t2
), &status
);
106 if (U_FAILURE(status
)) {
107 log_err("Creation of iterator failed\n");
111 ucol_closeElements(iter
);
119 u_uastrcpy(temp
, " & Z < DAVIS < MARK <DAV");
120 coll
= ucol_openRules(temp
, u_strlen(temp
), UCOL_OFF
, UCOL_DEFAULT_STRENGTH
,NULL
, &status
);
122 if(U_SUCCESS(status
)) {
123 size
= sizeof(cnt2
)/sizeof(cnt2
[0]);
124 for(i
= 0; i
< size
-1; i
++) {
125 for(j
= i
+1; j
< size
; j
++) {
126 UCollationElements
*iter
;
127 u_uastrcpy(t1
, cnt2
[i
]);
128 u_uastrcpy(t2
, cnt2
[j
]);
129 doTest(coll
, t1
, t2
, UCOL_LESS
);
131 /* synwee : added collation element iterator test */
132 iter
= ucol_openElements(coll
, t2
, u_strlen(t2
), &status
);
133 if (U_FAILURE(status
)) {
134 log_err("Creation of iterator failed\n");
138 ucol_closeElements(iter
);
148 const static char shifted
[][20] = {
160 const static UCollationResult shiftedTert
[] = {
172 const static char nonignorable
[][20] = {
184 static void BlackBirdTest(void) {
185 UErrorCode status
= U_ZERO_ERROR
;
189 uint32_t i
= 0, j
= 0;
191 UCollator
*coll
= ucol_open("en_US", &status
);
193 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_OFF
, &status
);
194 ucol_setAttribute(coll
, UCOL_ALTERNATE_HANDLING
, UCOL_NON_IGNORABLE
, &status
);
196 if(U_SUCCESS(status
)) {
197 size
= sizeof(nonignorable
)/sizeof(nonignorable
[0]);
198 for(i
= 0; i
< size
-1; i
++) {
199 for(j
= i
+1; j
< size
; j
++) {
200 u_uastrcpy(t1
, nonignorable
[i
]);
201 u_uastrcpy(t2
, nonignorable
[j
]);
202 doTest(coll
, t1
, t2
, UCOL_LESS
);
207 ucol_setAttribute(coll
, UCOL_ALTERNATE_HANDLING
, UCOL_SHIFTED
, &status
);
208 ucol_setAttribute(coll
, UCOL_STRENGTH
, UCOL_QUATERNARY
, &status
);
210 if(U_SUCCESS(status
)) {
211 size
= sizeof(shifted
)/sizeof(shifted
[0]);
212 for(i
= 0; i
< size
-1; i
++) {
213 for(j
= i
+1; j
< size
; j
++) {
214 u_uastrcpy(t1
, shifted
[i
]);
215 u_uastrcpy(t2
, shifted
[j
]);
216 doTest(coll
, t1
, t2
, UCOL_LESS
);
221 ucol_setAttribute(coll
, UCOL_STRENGTH
, UCOL_TERTIARY
, &status
);
222 if(U_SUCCESS(status
)) {
223 size
= sizeof(shifted
)/sizeof(shifted
[0]);
224 for(i
= 1; i
< size
; i
++) {
225 u_uastrcpy(t1
, shifted
[i
-1]);
226 u_uastrcpy(t2
, shifted
[i
]);
227 doTest(coll
, t1
, t2
, shiftedTert
[i
]);
234 const static UChar testSourceCases
[][MAX_TOKEN_LEN
] = {
235 {0x0041/*'A'*/, 0x0300, 0x0301, 0x0000},
236 {0x0041/*'A'*/, 0x0300, 0x0316, 0x0000},
237 {0x0041/*'A'*/, 0x0300, 0x0000},
238 {0x00C0, 0x0301, 0x0000},
239 /* this would work with forced normalization */
240 {0x00C0, 0x0316, 0x0000}
243 const static UChar testTargetCases
[][MAX_TOKEN_LEN
] = {
244 {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
245 {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000},
247 {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
248 /* this would work with forced normalization */
249 {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000}
252 const static UCollationResult results
[] = {
260 static void FunkyATest(void)
264 UErrorCode status
= U_ZERO_ERROR
;
265 UCollator
*myCollation
;
266 myCollation
= ucol_open("en_US", &status
);
267 if(U_FAILURE(status
)){
268 log_err_status(status
, "ERROR: in creation of rule based collator: %s\n", myErrorName(status
));
271 log_verbose("Testing some A letters, for some reason\n");
272 ucol_setAttribute(myCollation
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
273 ucol_setStrength(myCollation
, UCOL_TERTIARY
);
274 for (i
= 0; i
< 4 ; i
++)
276 doTest(myCollation
, testSourceCases
[i
], testTargetCases
[i
], results
[i
]);
278 ucol_close(myCollation
);
281 UColAttributeValue caseFirst
[] = {
288 UColAttributeValue alternateHandling
[] = {
293 UColAttributeValue caseLevel
[] = {
298 UColAttributeValue strengths
[] = {
307 static const char * strengthsC
[] = {
315 static const char * caseFirstC
[] = {
322 static const char * alternateHandlingC
[] = {
323 "UCOL_NON_IGNORABLE",
327 static const char * caseLevelC
[] = {
332 /* not used currently - does not test only prints */
333 static void PrintMarkDavis(void)
335 UErrorCode status
= U_ZERO_ERROR
;
337 uint8_t sortkey
[256];
338 UCollator
*coll
= ucol_open("en_US", &status
);
339 uint32_t h
,i
,j
,k
, sortkeysize
;
344 log_verbose("PrintMarkDavis");
346 u_uastrcpy(m
, "Mark Davis");
352 for(i
= 0; i
<sizem
; i
++) {
353 fprintf(stderr
, "\\u%04X ", m
[i
]);
355 fprintf(stderr
, "\n");
357 for(h
= 0; h
<sizeof(caseFirst
)/sizeof(caseFirst
[0]); h
++) {
358 ucol_setAttribute(coll
, UCOL_CASE_FIRST
, caseFirst
[i
], &status
);
359 fprintf(stderr
, "caseFirst: %s\n", caseFirstC
[h
]);
361 for(i
= 0; i
<sizeof(alternateHandling
)/sizeof(alternateHandling
[0]); i
++) {
362 ucol_setAttribute(coll
, UCOL_ALTERNATE_HANDLING
, alternateHandling
[i
], &status
);
363 fprintf(stderr
, " AltHandling: %s\n", alternateHandlingC
[i
]);
365 for(j
= 0; j
<sizeof(caseLevel
)/sizeof(caseLevel
[0]); j
++) {
366 ucol_setAttribute(coll
, UCOL_CASE_LEVEL
, caseLevel
[j
], &status
);
367 fprintf(stderr
, " caseLevel: %s\n", caseLevelC
[j
]);
369 for(k
= 0; k
<sizeof(strengths
)/sizeof(strengths
[0]); k
++) {
370 ucol_setAttribute(coll
, UCOL_STRENGTH
, strengths
[k
], &status
);
371 sortkeysize
= ucol_getSortKey(coll
, m
, sizem
, sortkey
, 256);
372 fprintf(stderr
, " strength: %s\n Sortkey: ", strengthsC
[k
]);
373 fprintf(stderr
, "%s\n", ucol_sortKeyToString(coll
, sortkey
, buffer
, &len
));
384 static void BillFairmanTest(void) {
386 ** check for actual locale via ICU resource bundles
388 ** lp points to the original locale ("fr_FR_....")
391 UResourceBundle
*lr
,*cr
;
392 UErrorCode lec
= U_ZERO_ERROR
;
393 const char *lp
= "fr_FR_you_ll_never_find_this_locale";
395 log_verbose("BillFairmanTest\n");
397 lr
= ures_open(NULL
,lp
,&lec
);
399 cr
= ures_getByKey(lr
,"collations",0,&lec
);
401 lp
= ures_getLocaleByType(cr
, ULOC_ACTUAL_LOCALE
, &lec
);
403 if (U_SUCCESS(lec
)) {
404 if(strcmp(lp
, "fr") != 0) {
405 log_err("Wrong locale for French Collation Data, expected \"fr\" got %s", lp
);
415 static void testPrimary(UCollator
* col
, const UChar
* p
,const UChar
* q
){
416 UChar source
[256] = { '\0'};
417 UChar target
[256] = { '\0'};
421 UChar preP = (*p>0x0400 && *p<0x0500)?0x00e1:0x491;
422 UChar preQ = (*p>0x0400 && *p<0x0500)?0x0041:0x413;
424 /*log_verbose("Testing primary\n");*/
426 doTest(col
, p
, q
, UCOL_LESS
);
428 UCollationResult result = ucol_strcoll(col,p,u_strlen(p),q,u_strlen(q));
430 if(result!=UCOL_LESS){
431 aescstrdup(p,utfSource,256);
432 aescstrdup(q,utfTarget,256);
433 fprintf(file,"Primary failed source: %s target: %s \n", utfSource,utfTarget);
437 u_strcpy(source
+1,p
);
439 u_strcpy(target
+1,q
);
440 doTest(col
, source
, target
, UCOL_LESS
);
442 fprintf(file,"Primary swamps 2nd failed source: %s target: %s \n", utfSource,utfTarget);
446 static void testSecondary(UCollator
* col
, const UChar
* p
,const UChar
* q
){
447 UChar source
[256] = { '\0'};
448 UChar target
[256] = { '\0'};
450 /*log_verbose("Testing secondary\n");*/
452 doTest(col
, p
, q
, UCOL_LESS
);
454 fprintf(file,"secondary failed source: %s target: %s \n", utfSource,utfTarget);
457 u_strcpy(source
+1,p
);
459 u_strcpy(target
+1,q
);
461 doTest(col
, source
, target
, UCOL_LESS
);
463 fprintf(file,"secondary swamps 3rd failed source: %s target: %s \n",utfSource,utfTarget);
468 source
[u_strlen(p
)] = 0x62;
469 source
[u_strlen(p
)+1] = 0;
473 target
[u_strlen(q
)] = 0x61;
474 target
[u_strlen(q
)+1] = 0;
476 doTest(col
, source
, target
, UCOL_GREATER
);
479 fprintf(file,"secondary is swamped by 1 failed source: %s target: %s \n",utfSource,utfTarget);
483 static void testTertiary(UCollator
* col
, const UChar
* p
,const UChar
* q
){
484 UChar source
[256] = { '\0'};
485 UChar target
[256] = { '\0'};
487 /*log_verbose("Testing tertiary\n");*/
489 doTest(col
, p
, q
, UCOL_LESS
);
491 fprintf(file,"Tertiary failed source: %s target: %s \n",utfSource,utfTarget);
494 u_strcpy(source
+1,p
);
496 u_strcpy(target
+1,q
);
498 doTest(col
, source
, target
, UCOL_LESS
);
500 fprintf(file,"Tertiary swamps 4th failed source: %s target: %s \n", utfSource,utfTarget);
504 source
[u_strlen(p
)] = 0xE0;
505 source
[u_strlen(p
)+1] = 0;
508 target
[u_strlen(q
)] = 0x61;
509 target
[u_strlen(q
)+1] = 0;
511 doTest(col
, source
, target
, UCOL_GREATER
);
514 fprintf(file,"Tertiary is swamped by 3rd failed source: %s target: %s \n",utfSource,utfTarget);
518 static void testEquality(UCollator
* col
, const UChar
* p
,const UChar
* q
){
520 UChar source[256] = { '\0'};
521 UChar target[256] = { '\0'};
524 doTest(col
, p
, q
, UCOL_EQUAL
);
526 fprintf(file,"Primary failed source: %s target: %s \n", utfSource,utfTarget);
530 static void testCollator(UCollator
*coll
, UErrorCode
*status
) {
531 const UChar
*rules
= NULL
, *current
= NULL
;
533 uint32_t strength
= 0;
534 uint32_t chOffset
= 0; uint32_t chLen
= 0;
535 uint32_t exOffset
= 0; uint32_t exLen
= 0;
536 uint32_t prefixOffset
= 0; uint32_t prefixLen
= 0;
537 uint32_t firstEx
= 0;
538 /* uint32_t rExpsLen = 0; */
539 uint32_t firstLen
= 0;
540 UBool varT
= FALSE
; UBool top_
= TRUE
;
542 UBool startOfRules
= TRUE
;
543 UBool lastReset
= FALSE
;
544 UBool before
= FALSE
;
545 uint32_t beforeStrength
= 0;
553 UChar
*rulesCopy
= NULL
;
554 UParseError parseError
;
556 uprv_memset(&src
, 0, sizeof(UColTokenParser
));
560 rules
= ucol_getRules(coll
, &ruleLen
);
561 if(U_SUCCESS(*status
) && ruleLen
> 0) {
562 rulesCopy
= (UChar
*)uprv_malloc((ruleLen
+UCOL_TOK_EXTRA_RULE_SPACE_SIZE
)*sizeof(UChar
));
563 uprv_memcpy(rulesCopy
, rules
, ruleLen
*sizeof(UChar
));
564 src
.current
= src
.source
= rulesCopy
;
565 src
.end
= rulesCopy
+ruleLen
;
566 src
.extraCurrent
= src
.end
;
567 src
.extraEnd
= src
.end
+UCOL_TOK_EXTRA_RULE_SPACE_SIZE
;
568 *first
= *second
= 0;
570 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
571 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
572 while ((current
= ucol_tok_parseNextToken(&src
, startOfRules
,&parseError
, status
)) != NULL
) {
573 strength
= src
.parsedToken
.strength
;
574 chOffset
= src
.parsedToken
.charsOffset
;
575 chLen
= src
.parsedToken
.charsLen
;
576 exOffset
= src
.parsedToken
.extensionOffset
;
577 exLen
= src
.parsedToken
.extensionLen
;
578 prefixOffset
= src
.parsedToken
.prefixOffset
;
579 prefixLen
= src
.parsedToken
.prefixLen
;
580 specs
= src
.parsedToken
.flags
;
582 startOfRules
= FALSE
;
583 varT
= (UBool
)((specs
& UCOL_TOK_VARIABLE_TOP
) != 0);
584 top_
= (UBool
)((specs
& UCOL_TOK_TOP
) != 0);
585 if(top_
) { /* if reset is on top, the sequence is broken. We should have an empty string */
588 u_strncpy(second
,src
.source
+chOffset
, chLen
);
591 if(exLen
> 0 && firstEx
== 0) {
592 u_strncat(first
, src
.source
+exOffset
, exLen
);
593 first
[firstLen
+exLen
] = 0;
596 if(lastReset
== TRUE
&& prefixLen
!= 0) {
597 u_strncpy(first
+prefixLen
, first
, firstLen
);
598 u_strncpy(first
, src
.source
+prefixOffset
, prefixLen
);
599 first
[firstLen
+prefixLen
] = 0;
600 firstLen
= firstLen
+prefixLen
;
603 if(before
== TRUE
) { /* swap first and second */
604 u_strcpy(tempB
, first
);
605 u_strcpy(first
, second
);
606 u_strcpy(second
, tempB
);
615 if(beforeStrength
< strength
) {
616 strength
= beforeStrength
;
624 testEquality(coll
,first
,second
);
627 testPrimary(coll
,first
,second
);
630 testSecondary(coll
,first
,second
);
633 testTertiary(coll
,first
,second
);
637 before
= (UBool
)((specs
& UCOL_TOK_BEFORE
) != 0);
639 beforeStrength
= (specs
& UCOL_TOK_BEFORE
)-1;
646 if(before
== TRUE
&& strength
!= UCOL_TOK_RESET
) { /* first and second were swapped */
651 u_strcpy(first
, second
);
654 uprv_free(src
.source
);
658 static UCollationResult
ucaTest(void *collator
, const int object
, const UChar
*source
, const int sLen
, const UChar
*target
, const int tLen
) {
659 UCollator
*UCA
= (UCollator
*)collator
;
660 return ucol_strcoll(UCA
, source
, sLen
, target
, tLen
);
664 static UCollationResult winTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) {
666 LCID lcid = (LCID)collator;
667 return (UCollationResult)CompareString(lcid, 0, source, sLen, target, tLen);
674 static UCollationResult
swampEarlier(tst_strcoll
* func
, void *collator
, int opts
,
676 const UChar
*s
, const uint32_t sLen
,
677 const UChar
*t
, const uint32_t tLen
) {
678 UChar source
[256] = {0};
679 UChar target
[256] = {0};
682 u_strcpy(source
+1, s
);
684 u_strcpy(target
+1, t
);
686 return func(collator
, opts
, source
, sLen
+1, target
, tLen
+1);
689 static UCollationResult
swampLater(tst_strcoll
* func
, void *collator
, int opts
,
691 const UChar
*s
, const uint32_t sLen
,
692 const UChar
*t
, const uint32_t tLen
) {
693 UChar source
[256] = {0};
694 UChar target
[256] = {0};
701 return func(collator
, opts
, source
, sLen
+1, target
, tLen
+1);
704 static uint32_t probeStrength(tst_strcoll
* func
, void *collator
, int opts
,
705 const UChar
*s
, const uint32_t sLen
,
706 const UChar
*t
, const uint32_t tLen
,
707 UCollationResult result
) {
708 /*UChar fPrimary = 0x6d;*/
709 /*UChar sPrimary = 0x6e;*/
710 UChar fSecondary
= 0x310d;
711 UChar sSecondary
= 0x31a3;
712 UChar fTertiary
= 0x310f;
713 UChar sTertiary
= 0x31b7;
715 UCollationResult oposite
;
716 if(result
== UCOL_EQUAL
) {
717 return UCOL_IDENTICAL
;
718 } else if(result
== UCOL_GREATER
) {
721 oposite
= UCOL_GREATER
;
724 if(swampEarlier(func
, collator
, opts
, sSecondary
, fSecondary
, s
, sLen
, t
, tLen
) == result
) {
726 } else if((swampEarlier(func
, collator
, opts
, sTertiary
, 0x310f, s
, sLen
, t
, tLen
) == result
) &&
727 (swampEarlier(func
, collator
, opts
, 0x310f, sTertiary
, s
, sLen
, t
, tLen
) == result
)) {
728 return UCOL_SECONDARY
;
729 } else if((swampLater(func
, collator
, opts
, sTertiary
, fTertiary
, s
, sLen
, t
, tLen
) == result
) &&
730 (swampLater(func
, collator
, opts
, fTertiary
, sTertiary
, s
, sLen
, t
, tLen
) == result
)) {
731 return UCOL_TERTIARY
;
732 } else if((swampLater(func
, collator
, opts
, sTertiary
, 0x310f, s
, sLen
, t
, tLen
) == oposite
) &&
733 (swampLater(func
, collator
, opts
, fTertiary
, sTertiary
, s
, sLen
, t
, tLen
) == oposite
)) {
734 return UCOL_QUATERNARY
;
736 return UCOL_IDENTICAL
;
740 static char *getRelationSymbol(UCollationResult res
, uint32_t strength
, char *buffer
) {
743 if(res
== UCOL_EQUAL
|| strength
== 0xdeadbeef) {
747 } else if(res
== UCOL_GREATER
) {
748 for(i
= 0; i
<strength
+1; i
++) {
751 buffer
[strength
+1] = '\0';
753 for(i
= 0; i
<strength
+1; i
++) {
756 buffer
[strength
+1] = '\0';
764 static void logFailure (const char *platform
, const char *test
,
765 const UChar
*source
, const uint32_t sLen
,
766 const UChar
*target
, const uint32_t tLen
,
767 UCollationResult realRes
, uint32_t realStrength
,
768 UCollationResult expRes
, uint32_t expStrength
, UBool error
) {
772 char sEsc
[256], s
[256], tEsc
[256], t
[256], b
[256], output
[512], relation
[256];
773 static int32_t maxOutputLength
= 0;
774 int32_t outputLength
;
776 *sEsc
= *tEsc
= *s
= *t
= 0;
778 log_err("Difference between expected and generated order. Run test with -v for more info\n");
779 } else if(getTestOption(VERBOSITY_OPTION
) == 0) {
782 for(i
= 0; i
<sLen
; i
++) {
783 sprintf(b
, "%04X", source
[i
]);
788 if(source
[i
] < 0x80) {
789 sprintf(b
, "(%c)", source
[i
]);
793 for(i
= 0; i
<tLen
; i
++) {
794 sprintf(b
, "%04X", target
[i
]);
799 if(target
[i
] < 0x80) {
800 sprintf(b
, "(%c)", target
[i
]);
805 strcpy(output, "[[ ");
806 strcat(output, sEsc);
807 strcat(output, getRelationSymbol(expRes, expStrength, relation));
808 strcat(output, tEsc);
810 strcat(output, " : ");
812 strcat(output, sEsc);
813 strcat(output, getRelationSymbol(realRes, realStrength, relation));
814 strcat(output, tEsc);
815 strcat(output, " ]] ");
817 log_verbose("%s", output);
821 strcpy(output
, "DIFF: ");
824 strcat(output
, " : ");
827 strcat(output
, test
);
828 strcat(output
, ": ");
830 strcat(output
, sEsc
);
831 strcat(output
, getRelationSymbol(expRes
, expStrength
, relation
));
832 strcat(output
, tEsc
);
836 strcat(output
, platform
);
837 strcat(output
, ": ");
839 strcat(output
, sEsc
);
840 strcat(output
, getRelationSymbol(realRes
, realStrength
, relation
));
841 strcat(output
, tEsc
);
843 outputLength
= (int32_t)strlen(output
);
844 if(outputLength
> maxOutputLength
) {
845 maxOutputLength
= outputLength
;
846 U_ASSERT(outputLength
< sizeof(output
));
849 log_verbose("%s\n", output
);
854 static void printOutRules(const UChar *rules) {
855 uint32_t len = u_strlen(rules);
860 fprintf(stdout, "Rules:");
862 for(i = 0; i<len; i++) {
863 if(rules[i]<0x7f && rules[i]>=0x20) {
864 toPrint = (char)rules[i];
867 fprintf(stdout, "\n&");
868 } else if(toPrint == ';') {
869 fprintf(stdout, "<<");
871 } else if(toPrint == ',') {
872 fprintf(stdout, "<<<");
875 fprintf(stdout, "%c", toPrint);
878 } else if(rules[i]<0x3400 || rules[i]>=0xa000) {
879 fprintf(stdout, "\\u%04X", rules[i]);
883 fprintf(stdout, "\n");
893 static uint32_t testSwitch(tst_strcoll
* func
, void *collator
, int opts
, uint32_t strength
, const UChar
*first
, const UChar
*second
, const char* msg
, UBool error
) {
895 UCollationResult realResult
;
896 uint32_t realStrength
;
898 uint32_t sLen
= u_strlen(first
);
899 uint32_t tLen
= u_strlen(second
);
901 realResult
= func(collator
, opts
, first
, sLen
, second
, tLen
);
902 realStrength
= probeStrength(func
, collator
, opts
, first
, sLen
, second
, tLen
, realResult
);
904 if(strength
== UCOL_IDENTICAL
&& realResult
!= UCOL_IDENTICAL
) {
905 logFailure(msg
, "tailoring", first
, sLen
, second
, tLen
, realResult
, realStrength
, UCOL_EQUAL
, strength
, error
);
907 } else if(realResult
!= UCOL_LESS
|| realStrength
!= strength
) {
908 logFailure(msg
, "tailoring", first
, sLen
, second
, tLen
, realResult
, realStrength
, UCOL_LESS
, strength
, error
);
915 static void testAgainstUCA(UCollator
*coll
, UCollator
*UCA
, const char *refName
, UBool error
, UErrorCode
*status
) {
916 const UChar
*rules
= NULL
, *current
= NULL
;
918 uint32_t strength
= 0;
919 uint32_t chOffset
= 0; uint32_t chLen
= 0;
920 uint32_t exOffset
= 0; uint32_t exLen
= 0;
921 uint32_t prefixOffset
= 0; uint32_t prefixLen
= 0;
922 /* uint32_t rExpsLen = 0; */
923 uint32_t firstLen
= 0, secondLen
= 0;
924 UBool varT
= FALSE
; UBool top_
= TRUE
;
926 UBool startOfRules
= TRUE
;
932 UChar
*rulesCopy
= NULL
;
934 uint32_t UCAdiff
= 0;
935 uint32_t Windiff
= 1;
936 UParseError parseError
;
938 uprv_memset(&src
, 0, sizeof(UColTokenParser
));
941 rules
= ucol_getRules(coll
, &ruleLen
);
943 /*printOutRules(rules);*/
945 if(U_SUCCESS(*status
) && ruleLen
> 0) {
946 rulesCopy
= (UChar
*)uprv_malloc((ruleLen
+UCOL_TOK_EXTRA_RULE_SPACE_SIZE
)*sizeof(UChar
));
947 uprv_memcpy(rulesCopy
, rules
, ruleLen
*sizeof(UChar
));
948 src
.current
= src
.source
= rulesCopy
;
949 src
.end
= rulesCopy
+ruleLen
;
950 src
.extraCurrent
= src
.end
;
951 src
.extraEnd
= src
.end
+UCOL_TOK_EXTRA_RULE_SPACE_SIZE
;
952 *first
= *second
= 0;
954 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
955 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
956 while ((current
= ucol_tok_parseNextToken(&src
, startOfRules
, &parseError
,status
)) != NULL
) {
957 strength
= src
.parsedToken
.strength
;
958 chOffset
= src
.parsedToken
.charsOffset
;
959 chLen
= src
.parsedToken
.charsLen
;
960 exOffset
= src
.parsedToken
.extensionOffset
;
961 exLen
= src
.parsedToken
.extensionLen
;
962 prefixOffset
= src
.parsedToken
.prefixOffset
;
963 prefixLen
= src
.parsedToken
.prefixLen
;
964 specs
= src
.parsedToken
.flags
;
966 startOfRules
= FALSE
;
967 varT
= (UBool
)((specs
& UCOL_TOK_VARIABLE_TOP
) != 0);
968 top_
= (UBool
)((specs
& UCOL_TOK_TOP
) != 0);
970 u_strncpy(second
,src
.source
+chOffset
, chLen
);
975 u_strncat(first
, src
.source
+exOffset
, exLen
);
976 first
[firstLen
+exLen
] = 0;
980 if(strength
!= UCOL_TOK_RESET
) {
981 if((*first
<0x3400 || *first
>=0xa000) && (*second
<0x3400 || *second
>=0xa000)) {
982 UCAdiff
+= testSwitch(&ucaTest
, (void *)UCA
, 0, strength
, first
, second
, refName
, error
);
983 /*Windiff += testSwitch(&winTest, (void *)lcid, 0, strength, first, second, "Win32");*/
989 u_strcpy(first
, second
);
992 if(UCAdiff
!= 0 && Windiff
!= 0) {
996 log_verbose("No immediate difference with %s!\n", refName
);
999 log_verbose("No immediate difference with Win32!\n");
1001 uprv_free(src
.source
);
1006 * Takes two CEs (lead and continuation) and
1007 * compares them as CEs should be compared:
1008 * primary vs. primary, secondary vs. secondary
1009 * tertiary vs. tertiary
1011 static int32_t compareCEs(uint32_t s1
, uint32_t s2
,
1012 uint32_t t1
, uint32_t t2
) {
1013 uint32_t s
= 0, t
= 0;
1014 if(s1
== t1
&& s2
== t2
) {
1017 s
= (s1
& 0xFFFF0000)|((s2
& 0xFFFF0000)>>16);
1018 t
= (t1
& 0xFFFF0000)|((t2
& 0xFFFF0000)>>16);
1024 s
= (s1
& 0x0000FF00) | (s2
& 0x0000FF00)>>8;
1025 t
= (t1
& 0x0000FF00) | (t2
& 0x0000FF00)>>8;
1031 s
= (s1
& 0x000000FF)<<8 | (s2
& 0x000000FF);
1032 t
= (t1
& 0x000000FF)<<8 | (t2
& 0x000000FF);
1044 uint32_t startContCE
;
1046 uint32_t limitContCE
;
1047 } indirectBoundaries
;
1049 /* these values are used for finding CE values for indirect positioning. */
1050 /* Indirect positioning is a mechanism for allowing resets on symbolic */
1051 /* values. It only works for resets and you cannot tailor indirect names */
1052 /* An indirect name can define either an anchor point or a range. An */
1053 /* anchor point behaves in exactly the same way as a code point in reset */
1054 /* would, except that it cannot be tailored. A range (we currently only */
1055 /* know for the [top] range will explicitly set the upper bound for */
1056 /* generated CEs, thus allowing for better control over how many CEs can */
1057 /* be squeezed between in the range without performance penalty. */
1058 /* In that respect, we use [top] for tailoring of locales that use CJK */
1059 /* characters. Other indirect values are currently a pure convenience, */
1060 /* they can be used to assure that the CEs will be always positioned in */
1061 /* the same place relative to a point with known properties (e.g. first */
1062 /* primary ignorable). */
1063 static indirectBoundaries ucolIndirectBoundaries
[15];
1064 static UBool indirectBoundariesSet
= FALSE
;
1065 static void setIndirectBoundaries(uint32_t indexR
, uint32_t *start
, uint32_t *end
) {
1066 /* Set values for the top - TODO: once we have values for all the indirects, we are going */
1067 /* to initalize here. */
1068 ucolIndirectBoundaries
[indexR
].startCE
= start
[0];
1069 ucolIndirectBoundaries
[indexR
].startContCE
= start
[1];
1071 ucolIndirectBoundaries
[indexR
].limitCE
= end
[0];
1072 ucolIndirectBoundaries
[indexR
].limitContCE
= end
[1];
1074 ucolIndirectBoundaries
[indexR
].limitCE
= 0;
1075 ucolIndirectBoundaries
[indexR
].limitContCE
= 0;
1079 static void testCEs(UCollator
*coll
, UErrorCode
*status
) {
1080 const UChar
*rules
= NULL
, *current
= NULL
;
1081 int32_t ruleLen
= 0;
1083 uint32_t strength
= 0;
1084 uint32_t maxStrength
= UCOL_IDENTICAL
;
1085 uint32_t baseCE
, baseContCE
, nextCE
, nextContCE
, currCE
, currContCE
;
1087 uint32_t lastContCE
;
1090 uint32_t chOffset
= 0; uint32_t chLen
= 0;
1091 uint32_t exOffset
= 0; uint32_t exLen
= 0;
1092 uint32_t prefixOffset
= 0; uint32_t prefixLen
= 0;
1093 uint32_t oldOffset
= 0;
1095 /* uint32_t rExpsLen = 0; */
1096 /* uint32_t firstLen = 0; */
1098 UBool varT
= FALSE
; UBool top_
= TRUE
;
1099 UBool startOfRules
= TRUE
;
1100 UBool before
= FALSE
;
1101 UColTokenParser src
;
1103 UParseError parseError
;
1104 UChar
*rulesCopy
= NULL
;
1105 collIterate
*c
= uprv_new_collIterate(status
);
1106 UCAConstants
*consts
= NULL
;
1107 uint32_t UCOL_RESET_TOP_VALUE
, /*UCOL_RESET_TOP_CONT, */
1108 UCOL_NEXT_TOP_VALUE
, UCOL_NEXT_TOP_CONT
;
1110 UCollator
*UCA
= ucol_open("root", status
);
1112 if (U_FAILURE(*status
)) {
1113 log_err("Could not open root collator %s\n", u_errorName(*status
));
1114 uprv_delete_collIterate(c
);
1118 colLoc
= ucol_getLocaleByType(coll
, ULOC_ACTUAL_LOCALE
, status
);
1119 if (U_FAILURE(*status
)) {
1120 log_err("Could not get collator name: %s\n", u_errorName(*status
));
1122 uprv_delete_collIterate(c
);
1126 uprv_memset(&src
, 0, sizeof(UColTokenParser
));
1128 consts
= (UCAConstants
*)((uint8_t *)UCA
->image
+ UCA
->image
->UCAConsts
);
1129 UCOL_RESET_TOP_VALUE
= consts
->UCA_LAST_NON_VARIABLE
[0];
1130 /*UCOL_RESET_TOP_CONT = consts->UCA_LAST_NON_VARIABLE[1]; */
1131 UCOL_NEXT_TOP_VALUE
= consts
->UCA_FIRST_IMPLICIT
[0];
1132 UCOL_NEXT_TOP_CONT
= consts
->UCA_FIRST_IMPLICIT
[1];
1134 baseCE
=baseContCE
=nextCE
=nextContCE
=currCE
=currContCE
=lastCE
=lastContCE
= UCOL_NOT_FOUND
;
1138 rules
= ucol_getRules(coll
, &ruleLen
);
1140 src
.invUCA
= ucol_initInverseUCA(status
);
1142 if(indirectBoundariesSet
== FALSE
) {
1143 /* UCOL_RESET_TOP_VALUE */
1144 setIndirectBoundaries(0, consts
->UCA_LAST_NON_VARIABLE
, consts
->UCA_FIRST_IMPLICIT
);
1145 /* UCOL_FIRST_PRIMARY_IGNORABLE */
1146 setIndirectBoundaries(1, consts
->UCA_FIRST_PRIMARY_IGNORABLE
, 0);
1147 /* UCOL_LAST_PRIMARY_IGNORABLE */
1148 setIndirectBoundaries(2, consts
->UCA_LAST_PRIMARY_IGNORABLE
, 0);
1149 /* UCOL_FIRST_SECONDARY_IGNORABLE */
1150 setIndirectBoundaries(3, consts
->UCA_FIRST_SECONDARY_IGNORABLE
, 0);
1151 /* UCOL_LAST_SECONDARY_IGNORABLE */
1152 setIndirectBoundaries(4, consts
->UCA_LAST_SECONDARY_IGNORABLE
, 0);
1153 /* UCOL_FIRST_TERTIARY_IGNORABLE */
1154 setIndirectBoundaries(5, consts
->UCA_FIRST_TERTIARY_IGNORABLE
, 0);
1155 /* UCOL_LAST_TERTIARY_IGNORABLE */
1156 setIndirectBoundaries(6, consts
->UCA_LAST_TERTIARY_IGNORABLE
, 0);
1157 /* UCOL_FIRST_VARIABLE */
1158 setIndirectBoundaries(7, consts
->UCA_FIRST_VARIABLE
, 0);
1159 /* UCOL_LAST_VARIABLE */
1160 setIndirectBoundaries(8, consts
->UCA_LAST_VARIABLE
, 0);
1161 /* UCOL_FIRST_NON_VARIABLE */
1162 setIndirectBoundaries(9, consts
->UCA_FIRST_NON_VARIABLE
, 0);
1163 /* UCOL_LAST_NON_VARIABLE */
1164 setIndirectBoundaries(10, consts
->UCA_LAST_NON_VARIABLE
, consts
->UCA_FIRST_IMPLICIT
);
1165 /* UCOL_FIRST_IMPLICIT */
1166 setIndirectBoundaries(11, consts
->UCA_FIRST_IMPLICIT
, 0);
1167 /* UCOL_LAST_IMPLICIT */
1168 setIndirectBoundaries(12, consts
->UCA_LAST_IMPLICIT
, consts
->UCA_FIRST_TRAILING
);
1169 /* UCOL_FIRST_TRAILING */
1170 setIndirectBoundaries(13, consts
->UCA_FIRST_TRAILING
, 0);
1171 /* UCOL_LAST_TRAILING */
1172 setIndirectBoundaries(14, consts
->UCA_LAST_TRAILING
, 0);
1173 ucolIndirectBoundaries
[14].limitCE
= (consts
->UCA_PRIMARY_SPECIAL_MIN
<<24);
1174 indirectBoundariesSet
= TRUE
;
1178 if(U_SUCCESS(*status
) && ruleLen
> 0) {
1179 rulesCopy
= (UChar
*)uprv_malloc((ruleLen
+UCOL_TOK_EXTRA_RULE_SPACE_SIZE
)*sizeof(UChar
));
1180 uprv_memcpy(rulesCopy
, rules
, ruleLen
*sizeof(UChar
));
1181 src
.current
= src
.source
= rulesCopy
;
1182 src
.end
= rulesCopy
+ruleLen
;
1183 src
.extraCurrent
= src
.end
;
1184 src
.extraEnd
= src
.end
+UCOL_TOK_EXTRA_RULE_SPACE_SIZE
;
1186 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
1187 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
1188 while ((current
= ucol_tok_parseNextToken(&src
, startOfRules
, &parseError
,status
)) != NULL
) {
1189 strength
= src
.parsedToken
.strength
;
1190 chOffset
= src
.parsedToken
.charsOffset
;
1191 chLen
= src
.parsedToken
.charsLen
;
1192 exOffset
= src
.parsedToken
.extensionOffset
;
1193 exLen
= src
.parsedToken
.extensionLen
;
1194 prefixOffset
= src
.parsedToken
.prefixOffset
;
1195 prefixLen
= src
.parsedToken
.prefixLen
;
1196 specs
= src
.parsedToken
.flags
;
1198 startOfRules
= FALSE
;
1199 varT
= (UBool
)((specs
& UCOL_TOK_VARIABLE_TOP
) != 0);
1200 top_
= (UBool
)((specs
& UCOL_TOK_TOP
) != 0);
1202 uprv_init_collIterate(coll
, src
.source
+chOffset
, chLen
, c
, status
);
1204 currCE
= ucol_getNextCE(coll
, c
, status
);
1205 if(currCE
== 0 && UCOL_ISTHAIPREVOWEL(*(src
.source
+chOffset
))) {
1206 log_verbose("Thai prevowel detected. Will pick next CE\n");
1207 currCE
= ucol_getNextCE(coll
, c
, status
);
1210 currContCE
= ucol_getNextCE(coll
, c
, status
);
1211 if(!isContinuation(currContCE
)) {
1215 /* we need to repack CEs here */
1217 if(strength
== UCOL_TOK_RESET
) {
1218 before
= (UBool
)((specs
& UCOL_TOK_BEFORE
) != 0);
1220 int32_t tokenIndex
= src
.parsedToken
.indirectIndex
;
1222 nextCE
= baseCE
= currCE
= ucolIndirectBoundaries
[tokenIndex
].startCE
;
1223 nextContCE
= baseContCE
= currContCE
= ucolIndirectBoundaries
[tokenIndex
].startContCE
;
1225 nextCE
= baseCE
= currCE
;
1226 nextContCE
= baseContCE
= currContCE
;
1228 maxStrength
= UCOL_IDENTICAL
;
1230 if(strength
< maxStrength
) {
1231 maxStrength
= strength
;
1232 if(baseCE
== UCOL_RESET_TOP_VALUE
) {
1233 log_verbose("Resetting to [top]\n");
1234 nextCE
= UCOL_NEXT_TOP_VALUE
;
1235 nextContCE
= UCOL_NEXT_TOP_CONT
;
1237 result
= ucol_inv_getNextCE(&src
, baseCE
& 0xFFFFFF3F, baseContCE
, &nextCE
, &nextContCE
, maxStrength
);
1240 if(ucol_isTailored(coll
, *(src
.source
+oldOffset
), status
)) {
1241 log_verbose("Reset is tailored codepoint %04X, don't know how to continue, taking next test\n", *(src
.source
+oldOffset
));
1244 log_err("%s: couldn't find the CE\n", colLoc
);
1250 currCE
&= 0xFFFFFF3F;
1251 currContCE
&= 0xFFFFFFBF;
1253 if(maxStrength
== UCOL_IDENTICAL
) {
1254 if(baseCE
!= currCE
|| baseContCE
!= currContCE
) {
1255 log_err("%s: current CE (initial strength UCOL_EQUAL)\n", colLoc
);
1258 if(strength
== UCOL_IDENTICAL
) {
1259 if(lastCE
!= currCE
|| lastContCE
!= currContCE
) {
1260 log_err("%s: current CE (initial strength UCOL_EQUAL)\n", colLoc
);
1263 if(compareCEs(currCE
, currContCE
, nextCE
, nextContCE
) > 0) {
1264 /*if(currCE > nextCE || (currCE == nextCE && currContCE >= nextContCE)) {*/
1265 log_err("%s: current CE is not less than base CE\n", colLoc
);
1268 if(compareCEs(currCE
, currContCE
, lastCE
, lastContCE
) < 0) {
1269 /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
1270 log_err("%s: sequence of generated CEs is broken\n", colLoc
);
1274 if(compareCEs(currCE
, currContCE
, lastCE
, lastContCE
) > 0) {
1275 /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
1276 log_err("%s: sequence of generated CEs is broken\n", colLoc
);
1284 oldOffset
= chOffset
;
1285 lastCE
= currCE
& 0xFFFFFF3F;
1286 lastContCE
= currContCE
& 0xFFFFFFBF;
1288 uprv_free(src
.source
);
1291 uprv_delete_collIterate(c
);
1295 /* these locales are now picked from index RB */
1296 static const char* localesToTest
[] = {
1297 "ar", "bg", "ca", "cs", "da",
1298 "el", "en_BE", "en_US_POSIX",
1299 "es", "et", "fi", "fr", "hi",
1300 "hr", "hu", "is", "iw", "ja",
1301 "ko", "lt", "lv", "mk", "mt",
1302 "nb", "nn", "nn_NO", "pl", "ro",
1303 "ru", "sh", "sk", "sl", "sq",
1304 "sr", "sv", "th", "tr", "uk",
1309 static const char* rulesToTest
[] = {
1311 "&\\u0622 < \\u0627 << \\u0671 < \\u0621",
1313 /* Cui Mins rules */
1314 "&[top]<o,O<p,P<q,Q<'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu<'?'",*/
1315 "&[top]<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
1316 "&[top]<o,O<p,P<q,Q,'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U&'Qu','?'",*/
1317 "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
1318 "&[top]<'?';Qu<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qu",*/
1319 "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/um<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qum;'?'",*/
1320 "&[top]<'?';Qum<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U" /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qum"*/
1324 static void TestCollations(void) {
1325 int32_t noOfLoc
= uloc_countAvailable();
1326 int32_t i
= 0, j
= 0;
1328 UErrorCode status
= U_ZERO_ERROR
;
1334 const char *locName
= NULL
;
1335 UCollator
*coll
= NULL
;
1336 UCollator
*UCA
= ucol_open("", &status
);
1337 UColAttributeValue oldStrength
= ucol_getAttribute(UCA
, UCOL_STRENGTH
, &status
);
1338 if (U_FAILURE(status
)) {
1339 log_err_status(status
, "Could not open UCA collator %s\n", u_errorName(status
));
1342 ucol_setAttribute(UCA
, UCOL_STRENGTH
, UCOL_QUATERNARY
, &status
);
1344 for(i
= 0; i
<noOfLoc
; i
++) {
1345 status
= U_ZERO_ERROR
;
1346 locName
= uloc_getAvailable(i
);
1347 if(uprv_strcmp("ja", locName
) == 0) {
1348 log_verbose("Don't know how to test prefixes\n");
1351 if(hasCollationElements(locName
)) {
1352 nameSize
= uloc_getDisplayName(locName
, NULL
, name
, 256, &status
);
1353 for(j
= 0; j
<nameSize
; j
++) {
1354 cName
[j
] = (char)name
[j
];
1356 cName
[nameSize
] = 0;
1357 log_verbose("\nTesting locale %s (%s)\n", locName
, cName
);
1358 coll
= ucol_open(locName
, &status
);
1359 if(U_SUCCESS(status
)) {
1360 testAgainstUCA(coll
, UCA
, "UCA", FALSE
, &status
);
1363 log_err("Couldn't instantiate collator for locale %s, error: %s\n", locName
, u_errorName(status
));
1364 status
= U_ZERO_ERROR
;
1368 ucol_setAttribute(UCA
, UCOL_STRENGTH
, oldStrength
, &status
);
1372 static void RamsRulesTest(void) {
1373 UErrorCode status
= U_ZERO_ERROR
;
1375 UCollator
*coll
= NULL
;
1378 int32_t noOfLoc
= uloc_countAvailable();
1379 const char *locName
= NULL
;
1381 log_verbose("RamsRulesTest\n");
1383 if (uprv_strcmp("km", uloc_getDefault())==0 || uprv_strcmp("km_KH", uloc_getDefault())==0) {
1384 /* This test will fail if the default locale is "km" or "km_KH". Enable after trac#6040. */
1388 for(i
= 0; i
<noOfLoc
; i
++) {
1389 locName
= uloc_getAvailable(i
);
1390 if(hasCollationElements(locName
)) {
1391 if (uprv_strcmp("ja", locName
)==0) {
1392 log_verbose("Don't know how to test Japanese because of prefixes\n");
1395 if (uprv_strcmp("de__PHONEBOOK", locName
)==0) {
1396 log_verbose("Don't know how to test Phonebook because the reset is on an expanding character\n");
1399 if (uprv_strcmp("bn", locName
)==0 ||
1400 uprv_strcmp("en_US_POSIX", locName
)==0 ||
1401 uprv_strcmp("km", locName
)==0 ||
1402 uprv_strcmp("km_KH", locName
)==0 ||
1403 uprv_strcmp("my", locName
)==0 ||
1404 uprv_strcmp("si", locName
)==0 ||
1405 uprv_strcmp("si_LK", locName
)==0 ||
1406 uprv_strcmp("zh", locName
)==0 ||
1407 uprv_strcmp("zh_Hant", locName
)==0
1409 log_verbose("Don't know how to test %s. "
1410 "TODO: Fix ticket #6040 and reenable RamsRulesTest for this locale.\n", locName
);
1413 log_verbose("Testing locale %s\n", locName
);
1414 status
= U_ZERO_ERROR
;
1415 coll
= ucol_open(locName
, &status
);
1416 if(U_SUCCESS(status
)) {
1417 if((status
!= U_USING_DEFAULT_WARNING
) && (status
!= U_USING_FALLBACK_WARNING
)) {
1418 if(coll
->image
->jamoSpecial
== TRUE
) {
1419 log_err("%s has special JAMOs\n", locName
);
1421 ucol_setAttribute(coll
, UCOL_CASE_FIRST
, UCOL_OFF
, &status
);
1422 testCollator(coll
, &status
);
1423 testCEs(coll
, &status
);
1425 log_verbose("Skipping %s: %s\n", locName
, u_errorName(status
));
1429 log_err("Could not open %s: %s\n", locName
, u_errorName(status
));
1434 for(i
= 0; i
<sizeof(rulesToTest
)/sizeof(rulesToTest
[0]); i
++) {
1435 log_verbose("Testing rule: %s\n", rulesToTest
[i
]);
1436 ruleLen
= u_unescape(rulesToTest
[i
], rule
, 2048);
1437 status
= U_ZERO_ERROR
;
1438 coll
= ucol_openRules(rule
, ruleLen
, UCOL_OFF
, UCOL_TERTIARY
, NULL
,&status
);
1439 if(U_SUCCESS(status
)) {
1440 testCollator(coll
, &status
);
1441 testCEs(coll
, &status
);
1444 log_err_status(status
, "Could not test rule: %s: '%s'\n", u_errorName(status
), rulesToTest
[i
]);
1450 static void IsTailoredTest(void) {
1451 UErrorCode status
= U_ZERO_ERROR
;
1453 UCollator
*coll
= NULL
;
1455 UChar tailored
[2048];
1456 UChar notTailored
[2048];
1457 uint32_t ruleLen
, tailoredLen
, notTailoredLen
;
1459 log_verbose("IsTailoredTest\n");
1461 u_uastrcpy(rule
, "&Z < A, B, C;c < d");
1462 ruleLen
= u_strlen(rule
);
1464 u_uastrcpy(tailored
, "ABCcd");
1465 tailoredLen
= u_strlen(tailored
);
1467 u_uastrcpy(notTailored
, "ZabD");
1468 notTailoredLen
= u_strlen(notTailored
);
1470 coll
= ucol_openRules(rule
, ruleLen
, UCOL_OFF
, UCOL_TERTIARY
, NULL
,&status
);
1471 if(U_SUCCESS(status
)) {
1472 for(i
= 0; i
<tailoredLen
; i
++) {
1473 if(!ucol_isTailored(coll
, tailored
[i
], &status
)) {
1474 log_err("%i: %04X should be tailored - it is reported as not\n", i
, tailored
[i
]);
1477 for(i
= 0; i
<notTailoredLen
; i
++) {
1478 if(ucol_isTailored(coll
, notTailored
[i
], &status
)) {
1479 log_err("%i: %04X should not be tailored - it is reported as it is\n", i
, notTailored
[i
]);
1485 log_err_status(status
, "Can't tailor rules\n");
1488 status
= U_ZERO_ERROR
;
1489 coll
= ucol_open("ja", &status
);
1490 if(!ucol_isTailored(coll
, 0x4E9C, &status
)) {
1491 log_err_status(status
, "0x4E9C should be tailored - it is reported as not\n");
1497 const static char chTest
[][20] = {
1500 "ca", "cb", "cx", "cy", "CZ",
1501 "c\\u030C", "C\\u030C",
1504 "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY",
1505 "ch", "cH", "Ch", "CH",
1506 "cha", "charly", "che", "chh", "chch", "chr",
1509 "r\\u030C", "R\\u030C",
1512 "s\\u030C", "S\\u030C",
1514 "z\\u030C", "Z\\u030C"
1517 static void TestChMove(void) {
1518 UChar t1
[256] = {0};
1519 UChar t2
[256] = {0};
1521 uint32_t i
= 0, j
= 0;
1523 UErrorCode status
= U_ZERO_ERROR
;
1525 UCollator
*coll
= ucol_open("cs", &status
);
1527 if(U_SUCCESS(status
)) {
1528 size
= sizeof(chTest
)/sizeof(chTest
[0]);
1529 for(i
= 0; i
< size
-1; i
++) {
1530 for(j
= i
+1; j
< size
; j
++) {
1531 u_unescape(chTest
[i
], t1
, 256);
1532 u_unescape(chTest
[j
], t2
, 256);
1533 doTest(coll
, t1
, t2
, UCOL_LESS
);
1538 log_data_err("Can't open collator");
1546 const static char impTest
[][20] = {
1556 static void TestImplicitTailoring(void) {
1557 static const struct {
1559 const char *data
[10];
1562 { "&[before 1]\\u4e00 < b < c &[before 1]\\u4e00 < d < e", { "d", "e", "b", "c", "\\u4e00"}, 5 },
1563 { "&\\u4e00 < a <<< A < b <<< B", { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 },
1564 { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3},
1565 { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3}
1570 for(i
= 0; i
< sizeof(tests
)/sizeof(tests
[0]); i
++) {
1571 genericRulesStarter(tests
[i
].rules
, tests
[i
].data
, tests
[i
].len
);
1575 UChar t1[256] = {0};
1576 UChar t2[256] = {0};
1578 const char *rule = "&\\u4e00 < a <<< A < b <<< B";
1580 uint32_t i = 0, j = 0;
1582 uint32_t ruleLen = 0;
1583 UErrorCode status = U_ZERO_ERROR;
1584 UCollator *coll = NULL;
1585 ruleLen = u_unescape(rule, t1, 256);
1587 coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
1589 if(U_SUCCESS(status)) {
1590 size = sizeof(impTest)/sizeof(impTest[0]);
1591 for(i = 0; i < size-1; i++) {
1592 for(j = i+1; j < size; j++) {
1593 u_unescape(impTest[i], t1, 256);
1594 u_unescape(impTest[j], t2, 256);
1595 doTest(coll, t1, t2, UCOL_LESS);
1600 log_err("Can't open collator");
1606 static void TestFCDProblem(void) {
1607 UChar t1
[256] = {0};
1608 UChar t2
[256] = {0};
1610 const char *s1
= "\\u0430\\u0306\\u0325";
1611 const char *s2
= "\\u04D1\\u0325";
1613 UErrorCode status
= U_ZERO_ERROR
;
1614 UCollator
*coll
= ucol_open("", &status
);
1615 u_unescape(s1
, t1
, 256);
1616 u_unescape(s2
, t2
, 256);
1618 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_OFF
, &status
);
1619 doTest(coll
, t1
, t2
, UCOL_EQUAL
);
1621 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
1622 doTest(coll
, t1
, t2
, UCOL_EQUAL
);
1628 The largest normalization form is 18 for NFKC/NFKD, 4 for NFD and 3 for NFC
1629 We're only using NFC/NFD in this test.
1631 #define NORM_BUFFER_TEST_LEN 18
1634 UChar NFC
[NORM_BUFFER_TEST_LEN
];
1635 UChar NFD
[NORM_BUFFER_TEST_LEN
];
1638 static void TestComposeDecompose(void) {
1639 /* [[:NFD_Inert=false:][:NFC_Inert=false:]] */
1640 static const UChar UNICODESET_STR
[] = {
1641 0x5B,0x5B,0x3A,0x4E,0x46,0x44,0x5F,0x49,0x6E,0x65,0x72,0x74,0x3D,0x66,0x61,
1642 0x6C,0x73,0x65,0x3A,0x5D,0x5B,0x3A,0x4E,0x46,0x43,0x5F,0x49,0x6E,0x65,0x72,
1643 0x74,0x3D,0x66,0x61,0x6C,0x73,0x65,0x3A,0x5D,0x5D,0
1646 int32_t i
= 0, j
= 0;
1648 UErrorCode status
= U_ZERO_ERROR
;
1649 const char *locName
= NULL
;
1653 uint32_t noCases
= 0;
1654 UCollator
*coll
= NULL
;
1656 UChar comp
[NORM_BUFFER_TEST_LEN
];
1658 UCollationElements
*iter
;
1659 USet
*charsToTest
= uset_openPattern(UNICODESET_STR
, -1, &status
);
1660 int32_t charsToTestSize
;
1662 noOfLoc
= uloc_countAvailable();
1664 coll
= ucol_open("", &status
);
1665 if (U_FAILURE(status
)) {
1666 log_data_err("Error opening collator -> %s (Are you missing data?)\n", u_errorName(status
));
1669 charsToTestSize
= uset_size(charsToTest
);
1670 if (charsToTestSize
<= 0) {
1671 log_err("Set was zero. Missing data?\n");
1674 t
= malloc(charsToTestSize
* sizeof(tester
*));
1675 t
[0] = (tester
*)malloc(sizeof(tester
));
1676 log_verbose("Testing UCA extensively for %d characters\n", charsToTestSize
);
1678 for(u
= 0; u
< charsToTestSize
; u
++) {
1679 UChar32 ch
= uset_charAt(charsToTest
, u
);
1681 UTF_APPEND_CHAR_UNSAFE(comp
, len
, ch
);
1682 nfcSize
= unorm_normalize(comp
, len
, UNORM_NFC
, 0, t
[noCases
]->NFC
, NORM_BUFFER_TEST_LEN
, &status
);
1683 nfdSize
= unorm_normalize(comp
, len
, UNORM_NFD
, 0, t
[noCases
]->NFD
, NORM_BUFFER_TEST_LEN
, &status
);
1685 if(nfcSize
!= nfdSize
|| (uprv_memcmp(t
[noCases
]->NFC
, t
[noCases
]->NFD
, nfcSize
* sizeof(UChar
)) != 0)
1686 || (len
!= nfdSize
|| (uprv_memcmp(comp
, t
[noCases
]->NFD
, nfdSize
* sizeof(UChar
)) != 0))) {
1688 if(len
!= nfdSize
|| (uprv_memcmp(comp
, t
[noCases
]->NFD
, nfdSize
* sizeof(UChar
)) != 0)) {
1689 u_strncpy(t
[noCases
]->NFC
, comp
, len
);
1690 t
[noCases
]->NFC
[len
] = 0;
1693 t
[noCases
] = (tester
*)malloc(sizeof(tester
));
1694 uprv_memset(t
[noCases
], 0, sizeof(tester
));
1697 log_verbose("Testing %d/%d of possible test cases\n", noCases
, charsToTestSize
);
1698 uset_close(charsToTest
);
1701 for(u
=0; u
<(UChar32
)noCases
; u
++) {
1702 if(!ucol_equal(coll
, t
[u
]->NFC
, -1, t
[u
]->NFD
, -1)) {
1703 log_err("Failure: codePoint %05X fails TestComposeDecompose in the UCA\n", t
[u
]->u
);
1704 doTest(coll
, t
[u
]->NFC
, t
[u
]->NFD
, UCOL_EQUAL
);
1708 for(u = 0; u < charsToTestSize; u++) {
1710 log_verbose("%08X ", u);
1712 uprv_memset(t[noCases], 0, sizeof(tester));
1715 UTF_APPEND_CHAR_UNSAFE(comp, len, u);
1717 nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
1718 nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
1719 doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL);
1720 doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL);
1726 log_verbose("Testing locales, number of cases = %i\n", noCases
);
1727 for(i
= 0; i
<noOfLoc
; i
++) {
1728 status
= U_ZERO_ERROR
;
1729 locName
= uloc_getAvailable(i
);
1730 if(hasCollationElements(locName
)) {
1733 int32_t nameSize
= uloc_getDisplayName(locName
, NULL
, name
, sizeof(cName
), &status
);
1735 for(j
= 0; j
<nameSize
; j
++) {
1736 cName
[j
] = (char)name
[j
];
1738 cName
[nameSize
] = 0;
1739 log_verbose("\nTesting locale %s (%s)\n", locName
, cName
);
1741 coll
= ucol_open(locName
, &status
);
1742 ucol_setStrength(coll
, UCOL_IDENTICAL
);
1743 iter
= ucol_openElements(coll
, t
[u
]->NFD
, u_strlen(t
[u
]->NFD
), &status
);
1745 for(u
=0; u
<(UChar32
)noCases
; u
++) {
1746 if(!ucol_equal(coll
, t
[u
]->NFC
, -1, t
[u
]->NFD
, -1)) {
1747 log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t
[u
]->u
, cName
);
1748 doTest(coll
, t
[u
]->NFC
, t
[u
]->NFD
, UCOL_EQUAL
);
1749 log_verbose("Testing NFC\n");
1750 ucol_setText(iter
, t
[u
]->NFC
, u_strlen(t
[u
]->NFC
), &status
);
1752 log_verbose("Testing NFD\n");
1753 ucol_setText(iter
, t
[u
]->NFD
, u_strlen(t
[u
]->NFD
), &status
);
1757 ucol_closeElements(iter
);
1761 for(u
= 0; u
<= (UChar32
)noCases
; u
++) {
1767 static void TestEmptyRule(void) {
1768 UErrorCode status
= U_ZERO_ERROR
;
1769 UChar rulez
[] = { 0 };
1770 UCollator
*coll
= ucol_openRules(rulez
, 0, UCOL_OFF
, UCOL_TERTIARY
,NULL
, &status
);
1775 static void TestUCARules(void) {
1776 UErrorCode status
= U_ZERO_ERROR
;
1779 uint32_t ruleLen
= 0;
1780 UCollator
*UCAfromRules
= NULL
;
1781 UCollator
*coll
= ucol_open("", &status
);
1782 if(status
== U_FILE_ACCESS_ERROR
) {
1783 log_data_err("Is your data around?\n");
1785 } else if(U_FAILURE(status
)) {
1786 log_err("Error opening collator\n");
1789 ruleLen
= ucol_getRulesEx(coll
, UCOL_FULL_RULES
, rules
, 256);
1791 log_verbose("TestUCARules\n");
1793 rules
= (UChar
*)malloc((ruleLen
+1)*sizeof(UChar
));
1794 ruleLen
= ucol_getRulesEx(coll
, UCOL_FULL_RULES
, rules
, ruleLen
);
1796 log_verbose("Rules length is %d\n", ruleLen
);
1797 UCAfromRules
= ucol_openRules(rules
, ruleLen
, UCOL_OFF
, UCOL_TERTIARY
, NULL
,&status
);
1798 if(U_SUCCESS(status
)) {
1799 ucol_close(UCAfromRules
);
1801 log_verbose("Unable to create a collator from UCARules!\n");
1804 u_unescape(blah, b, 256);
1805 ucol_getSortKey(coll, b, 1, res, 256);
1814 /* Pinyin tonal order */
1816 A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0)
1817 (w/macron)< (w/acute)< (w/caron)< (w/grave)
1818 E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8)
1819 I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec)
1820 O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2)
1821 U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9)
1822 < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) <
1825 However, in testing we got the following order:
1826 A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101)
1827 (w/acute)< (w/grave)< (w/caron)< (w/macron)
1828 E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) <
1830 I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b)
1831 O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d)
1832 U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) <
1834 < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b)
1837 static void TestBefore(void) {
1838 const static char *data
[] = {
1839 "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A",
1840 "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E",
1841 "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I",
1842 "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O",
1843 "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U",
1844 "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc"
1846 genericRulesStarter(
1847 "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0"
1848 "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8"
1849 "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec"
1850 "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2"
1851 "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9"
1852 "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc",
1853 data
, sizeof(data
)/sizeof(data
[0]));
1857 /* superceded by TestBeforePinyin */
1858 static void TestJ784(void) {
1859 const static char *data
[] = {
1860 "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0",
1861 "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8",
1862 "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec",
1863 "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2",
1864 "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9",
1866 "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc"
1868 genericLocaleStarter("zh", data
, sizeof(data
)/sizeof(data
[0]));
1873 /* superceded by the changes to the lv locale */
1874 static void TestJ831(void) {
1875 const static char *data
[] = {
1881 genericLocaleStarter("lv", data
, sizeof(data
)/sizeof(data
[0]));
1885 static void TestJ815(void) {
1886 const static char *data
[] = {
1902 genericLocaleStarter("fr", data
, sizeof(data
)/sizeof(data
[0]));
1903 genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data
, sizeof(data
)/sizeof(data
[0]));
1908 "& a < b < c < d& r < c", "& a < b < d& r < c",
1909 "& a < b < c < d& c < m", "& a < b < c < m < d",
1910 "& a < b < c < d& a < m", "& a < m < b < c < d",
1911 "& a <<< b << c < d& a < m", "& a <<< b << c < m < d",
1912 "& a < b < c < d& [before 1] c < m", "& a < b < m < c < d",
1913 "& a < b <<< c << d <<< e& [before 3] e <<< x", "& a < b <<< c << d <<< x <<< e",
1914 "& a < b <<< c << d <<< e& [before 2] e <<< x", "& a < b <<< c <<< x << d <<< e",
1915 "& a < b <<< c << d <<< e& [before 1] e <<< x", "& a <<< x < b <<< c << d <<< e",
1916 "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x", "& a < b <<< c << d <<< e <<< f < x < g",
1918 static void TestRedundantRules(void) {
1921 static const struct {
1923 const char *expectedRules
;
1924 const char *testdata
[8];
1925 uint32_t testdatalen
;
1927 /* this test conflicts with positioning of CODAN placeholder */
1929 "& a <<< b <<< c << d <<< e& [before 1] e <<< x",
1933 /* this test conflicts with the [before x] syntax tightening */
1935 "& b <<< c <<< d << e <<< f& [before 1] f <<< x",
1939 /* this test conflicts with the [before x] syntax tightening */
1941 "& a < b <<< c << d <<< e& [before 1] e <<< x",
1942 "& a <<< x < b <<< c << d <<< e",
1943 {"a", "x", "b", "c", "d", "e"}, 6
1946 "& a < b < c < d& [before 1] c < m",
1947 "& a < b < m < c < d",
1948 {"a", "b", "m", "c", "d"}, 5
1951 "& a < b <<< c << d <<< e& [before 3] e <<< x",
1952 "& a < b <<< c << d <<< x <<< e",
1953 {"a", "b", "c", "d", "x", "e"}, 6
1955 /* this test conflicts with the [before x] syntax tightening */
1957 "& a < b <<< c << d <<< e& [before 2] e <<< x",
1958 "& a < b <<< c <<< x << d <<< e",
1959 {"a", "b", "c", "x", "d", "e"},, 6
1962 "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x",
1963 "& a < b <<< c << d <<< e <<< f < x < g",
1964 {"a", "b", "c", "d", "e", "f", "x", "g"}, 8
1967 "& a <<< b << c < d& a < m",
1968 "& a <<< b << c < m < d",
1969 {"a", "b", "c", "m", "d"}, 5
1972 "&a<b<<b\\u0301 &z<b",
1974 {"a", "b\\u0301", "z", "b"}, 4
1987 "& a < b < c < d& r < c",
1988 "& a < b < d& r < c",
1992 "& a < b < c < d& r < c",
1993 "& a < b < d& r < c",
1997 "& a < b < c < d& c < m",
1998 "& a < b < c < m < d",
1999 {"a", "b", "c", "m", "d"}, 5
2002 "& a < b < c < d& a < m",
2003 "& a < m < b < c < d",
2004 {"a", "m", "b", "c", "d"}, 5
2009 UCollator
*credundant
= NULL
;
2010 UCollator
*cresulting
= NULL
;
2011 UErrorCode status
= U_ZERO_ERROR
;
2012 UChar rlz
[2048] = { 0 };
2015 for(i
= 0; i
<sizeof(tests
)/sizeof(tests
[0]); i
++) {
2016 log_verbose("testing rule %s, expected to be %s\n", tests
[i
].rules
, tests
[i
].expectedRules
);
2017 rlen
= u_unescape(tests
[i
].rules
, rlz
, 2048);
2019 credundant
= ucol_openRules(rlz
, rlen
, UCOL_DEFAULT
, UCOL_DEFAULT
, NULL
,&status
);
2020 if(status
== U_FILE_ACCESS_ERROR
) {
2021 log_data_err("Is your data around?\n");
2023 } else if(U_FAILURE(status
)) {
2024 log_err("Error opening collator\n");
2028 rlen
= u_unescape(tests
[i
].expectedRules
, rlz
, 2048);
2029 cresulting
= ucol_openRules(rlz
, rlen
, UCOL_DEFAULT
, UCOL_DEFAULT
, NULL
,&status
);
2031 testAgainstUCA(cresulting
, credundant
, "expected", TRUE
, &status
);
2033 ucol_close(credundant
);
2034 ucol_close(cresulting
);
2036 log_verbose("testing using data\n");
2038 genericRulesStarter(tests
[i
].rules
, tests
[i
].testdata
, tests
[i
].testdatalen
);
2043 static void TestExpansionSyntax(void) {
2046 const static char *rules
[] = {
2047 "&AE <<< a << b <<< c &d <<< f",
2048 "&AE <<< a <<< b << c << d < e < f <<< g",
2049 "&AE <<< B <<< C / D <<< F"
2052 const static char *expectedRules
[] = {
2053 "&A <<< a / E << b / E <<< c /E &d <<< f",
2054 "&A <<< a / E <<< b / E << c / E << d / E < e < f <<< g",
2055 "&A <<< B / E <<< C / ED <<< F / E"
2058 const static char *testdata
[][8] = {
2059 {"AE", "a", "b", "c"},
2060 {"AE", "a", "b", "c", "d", "e", "f", "g"},
2061 {"AE", "B", "C"} /* / ED <<< F / E"},*/
2064 const static uint32_t testdatalen
[] = {
2072 UCollator
*credundant
= NULL
;
2073 UCollator
*cresulting
= NULL
;
2074 UErrorCode status
= U_ZERO_ERROR
;
2075 UChar rlz
[2048] = { 0 };
2078 for(i
= 0; i
<sizeof(rules
)/sizeof(rules
[0]); i
++) {
2079 log_verbose("testing rule %s, expected to be %s\n", rules
[i
], expectedRules
[i
]);
2080 rlen
= u_unescape(rules
[i
], rlz
, 2048);
2082 credundant
= ucol_openRules(rlz
, rlen
, UCOL_DEFAULT
, UCOL_DEFAULT
, NULL
, &status
);
2083 if(status
== U_FILE_ACCESS_ERROR
) {
2084 log_data_err("Is your data around?\n");
2086 } else if(U_FAILURE(status
)) {
2087 log_err("Error opening collator\n");
2090 rlen
= u_unescape(expectedRules
[i
], rlz
, 2048);
2091 cresulting
= ucol_openRules(rlz
, rlen
, UCOL_DEFAULT
, UCOL_DEFAULT
, NULL
,&status
);
2093 /* testAgainstUCA still doesn't handle expansions correctly, so this is not run */
2094 /* as a hard error test, but only in information mode */
2095 testAgainstUCA(cresulting
, credundant
, "expected", FALSE
, &status
);
2097 ucol_close(credundant
);
2098 ucol_close(cresulting
);
2100 log_verbose("testing using data\n");
2102 genericRulesStarter(rules
[i
], testdata
[i
], testdatalen
[i
]);
2106 static void TestCase(void)
2108 const static UChar gRules
[MAX_TOKEN_LEN
] =
2109 /*" & 0 < 1,\u2461<a,A"*/
2110 { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x0041, 0x0000 };
2112 const static UChar testCase
[][MAX_TOKEN_LEN
] =
2114 /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000},
2115 /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000},
2116 /*2*/ {0x2460 /*circ'1'*/, 0x0061/*'a'*/, 0x0000},
2117 /*3*/ {0x2460 /*circ'1'*/, 0x0041/*'A'*/, 0x0000}
2120 const static UCollationResult caseTestResults
[][9] =
2122 { UCOL_LESS
, UCOL_LESS
, UCOL_LESS
, UCOL_EQUAL
, UCOL_LESS
, UCOL_LESS
, UCOL_EQUAL
, UCOL_EQUAL
, UCOL_LESS
},
2123 { UCOL_GREATER
, UCOL_LESS
, UCOL_LESS
, UCOL_EQUAL
, UCOL_LESS
, UCOL_LESS
, UCOL_EQUAL
, UCOL_EQUAL
, UCOL_GREATER
},
2124 { UCOL_LESS
, UCOL_LESS
, UCOL_LESS
, UCOL_EQUAL
, UCOL_GREATER
, UCOL_LESS
, UCOL_EQUAL
, UCOL_EQUAL
, UCOL_LESS
},
2125 { UCOL_GREATER
, UCOL_LESS
, UCOL_GREATER
, UCOL_EQUAL
, UCOL_LESS
, UCOL_LESS
, UCOL_EQUAL
, UCOL_EQUAL
, UCOL_GREATER
}
2128 const static UColAttributeValue caseTestAttributes
[][2] =
2130 { UCOL_LOWER_FIRST
, UCOL_OFF
},
2131 { UCOL_UPPER_FIRST
, UCOL_OFF
},
2132 { UCOL_LOWER_FIRST
, UCOL_ON
},
2133 { UCOL_UPPER_FIRST
, UCOL_ON
}
2136 UErrorCode status
= U_ZERO_ERROR
;
2137 UCollationElements
*iter
;
2138 UCollator
*myCollation
;
2139 myCollation
= ucol_open("en_US", &status
);
2141 if(U_FAILURE(status
)){
2142 log_err_status(status
, "ERROR: in creation of rule based collator: %s\n", myErrorName(status
));
2145 log_verbose("Testing different case settings\n");
2146 ucol_setStrength(myCollation
, UCOL_TERTIARY
);
2148 for(k
= 0; k
<4; k
++) {
2149 ucol_setAttribute(myCollation
, UCOL_CASE_FIRST
, caseTestAttributes
[k
][0], &status
);
2150 ucol_setAttribute(myCollation
, UCOL_CASE_LEVEL
, caseTestAttributes
[k
][1], &status
);
2151 log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes
[k
][0], caseTestAttributes
[k
][1]);
2152 for (i
= 0; i
< 3 ; i
++) {
2153 for(j
= i
+1; j
<4; j
++) {
2154 doTest(myCollation
, testCase
[i
], testCase
[j
], caseTestResults
[k
][3*i
+j
-1]);
2158 ucol_close(myCollation
);
2160 myCollation
= ucol_openRules(gRules
, u_strlen(gRules
), UCOL_OFF
, UCOL_TERTIARY
,NULL
, &status
);
2161 if(U_FAILURE(status
)){
2162 log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status
));
2165 log_verbose("Testing different case settings with custom rules\n");
2166 ucol_setStrength(myCollation
, UCOL_TERTIARY
);
2168 for(k
= 0; k
<4; k
++) {
2169 ucol_setAttribute(myCollation
, UCOL_CASE_FIRST
, caseTestAttributes
[k
][0], &status
);
2170 ucol_setAttribute(myCollation
, UCOL_CASE_LEVEL
, caseTestAttributes
[k
][1], &status
);
2171 for (i
= 0; i
< 3 ; i
++) {
2172 for(j
= i
+1; j
<4; j
++) {
2173 log_verbose("k:%d, i:%d, j:%d\n", k
, i
, j
);
2174 doTest(myCollation
, testCase
[i
], testCase
[j
], caseTestResults
[k
][3*i
+j
-1]);
2175 iter
=ucol_openElements(myCollation
, testCase
[i
], u_strlen(testCase
[i
]), &status
);
2177 ucol_closeElements(iter
);
2178 iter
=ucol_openElements(myCollation
, testCase
[j
], u_strlen(testCase
[j
]), &status
);
2180 ucol_closeElements(iter
);
2184 ucol_close(myCollation
);
2186 const static char *lowerFirst
[] = {
2202 const static char *upperFirst
[] = {
2217 log_verbose("mixed case test\n");
2218 log_verbose("lower first, case level off\n");
2219 genericRulesStarter("[casefirst lower]&H<ch<<<Ch<<<CH", lowerFirst
, sizeof(lowerFirst
)/sizeof(lowerFirst
[0]));
2220 log_verbose("upper first, case level off\n");
2221 genericRulesStarter("[casefirst upper]&H<ch<<<Ch<<<CH", upperFirst
, sizeof(upperFirst
)/sizeof(upperFirst
[0]));
2222 log_verbose("lower first, case level on\n");
2223 genericRulesStarter("[casefirst lower][caselevel on]&H<ch<<<Ch<<<CH", lowerFirst
, sizeof(lowerFirst
)/sizeof(lowerFirst
[0]));
2224 log_verbose("upper first, case level on\n");
2225 genericRulesStarter("[casefirst upper][caselevel on]&H<ch<<<Ch<<<CH", upperFirst
, sizeof(upperFirst
)/sizeof(upperFirst
[0]));
2230 static void TestIncrementalNormalize(void) {
2232 /*UChar baseA =0x61;*/
2234 /* UChar baseB = 0x42;*/
2235 static const UChar ccMix
[] = {0x316, 0x321, 0x300};
2236 /*UChar ccMix[] = {0x61, 0x61, 0x61};*/
2238 0x316 is combining grave accent below, cc=220
2239 0x321 is combining palatalized hook below, cc=202
2240 0x300 is combining grave accent, cc=230
2243 #define MAXSLEN 2000
2244 /*int maxSLen = 64000;*/
2249 UErrorCode status
= U_ZERO_ERROR
;
2250 UCollationResult result
;
2252 int32_t myQ
= getTestOption(QUICK_OPTION
);
2254 if(getTestOption(QUICK_OPTION
) < 0) {
2255 setTestOption(QUICK_OPTION
, 1);
2259 /* Test 1. Run very long unnormalized strings, to force overflow of*/
2260 /* most buffers along the way.*/
2261 UChar strA
[MAXSLEN
+1];
2262 UChar strB
[MAXSLEN
+1];
2264 coll
= ucol_open("en_US", &status
);
2265 if(status
== U_FILE_ACCESS_ERROR
) {
2266 log_data_err("Is your data around?\n");
2268 } else if(U_FAILURE(status
)) {
2269 log_err("Error opening collator\n");
2272 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
2274 /*for (sLen = 257; sLen<MAXSLEN; sLen++) {*/
2275 /*for (sLen = 4; sLen<MAXSLEN; sLen++) {*/
2276 /*for (sLen = 1000; sLen<1001; sLen++) {*/
2277 for (sLen
= 500; sLen
<501; sLen
++) {
2278 /*for (sLen = 40000; sLen<65000; sLen+=1000) {*/
2281 for (i
=1; i
<=sLen
-1; i
++) {
2282 strA
[i
] = ccMix
[i
% 3];
2283 strB
[sLen
-i
] = ccMix
[i
% 3];
2288 ucol_setStrength(coll
, UCOL_TERTIARY
); /* Do test with default strength, which runs*/
2289 doTest(coll
, strA
, strB
, UCOL_EQUAL
); /* optimized functions in the impl*/
2290 ucol_setStrength(coll
, UCOL_IDENTICAL
); /* Do again with the slow, general impl.*/
2291 doTest(coll
, strA
, strB
, UCOL_EQUAL
);
2295 setTestOption(QUICK_OPTION
, myQ
);
2298 /* Test 2: Non-normal sequence in a string that extends to the last character*/
2299 /* of the string. Checks a couple of edge cases.*/
2302 static const UChar strA
[] = {0x41, 0x41, 0x300, 0x316, 0};
2303 static const UChar strB
[] = {0x41, 0xc0, 0x316, 0};
2304 ucol_setStrength(coll
, UCOL_TERTIARY
);
2305 doTest(coll
, strA
, strB
, UCOL_EQUAL
);
2308 /* Test 3: Non-normal sequence is terminated by a surrogate pair.*/
2312 * test below used a code point from Desseret, which sorts differently
2315 /*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/
2316 static const UChar strA
[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0};
2317 static const UChar strB
[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0};
2318 ucol_setStrength(coll
, UCOL_TERTIARY
);
2319 doTest(coll
, strA
, strB
, UCOL_GREATER
);
2322 /* Test 4: Imbedded nulls do not terminate a string when length is specified.*/
2325 static const UChar strA
[] = {0x41, 0x00, 0x42, 0x00};
2326 static const UChar strB
[] = {0x41, 0x00, 0x00, 0x00};
2333 /* there used to be -3 here. Hmmmm.... */
2334 /*result = ucol_strcoll(coll, strA, -3, strB, -3);*/
2335 result
= ucol_strcoll(coll
, strA
, 3, strB
, 3);
2336 if (result
!= UCOL_GREATER
) {
2337 log_err("ERROR 1 in test 4\n");
2339 result
= ucol_strcoll(coll
, strA
, -1, strB
, -1);
2340 if (result
!= UCOL_EQUAL
) {
2341 log_err("ERROR 2 in test 4\n");
2344 ucol_getSortKey(coll
, strA
, 3, (uint8_t *)sortKeyA
, sizeof(sortKeyA
));
2345 ucol_getSortKey(coll
, strA
, -1, (uint8_t *)sortKeyAz
, sizeof(sortKeyAz
));
2346 ucol_getSortKey(coll
, strB
, 3, (uint8_t *)sortKeyB
, sizeof(sortKeyB
));
2347 ucol_getSortKey(coll
, strB
, -1, (uint8_t *)sortKeyBz
, sizeof(sortKeyBz
));
2349 r
= strcmp(sortKeyA
, sortKeyAz
);
2351 log_err("Error 3 in test 4\n");
2353 r
= strcmp(sortKeyA
, sortKeyB
);
2355 log_err("Error 4 in test 4\n");
2357 r
= strcmp(sortKeyAz
, sortKeyBz
);
2359 log_err("Error 5 in test 4\n");
2362 ucol_setStrength(coll
, UCOL_IDENTICAL
);
2363 ucol_getSortKey(coll
, strA
, 3, (uint8_t *)sortKeyA
, sizeof(sortKeyA
));
2364 ucol_getSortKey(coll
, strA
, -1, (uint8_t *)sortKeyAz
, sizeof(sortKeyAz
));
2365 ucol_getSortKey(coll
, strB
, 3, (uint8_t *)sortKeyB
, sizeof(sortKeyB
));
2366 ucol_getSortKey(coll
, strB
, -1, (uint8_t *)sortKeyBz
, sizeof(sortKeyBz
));
2368 r
= strcmp(sortKeyA
, sortKeyAz
);
2370 log_err("Error 6 in test 4\n");
2372 r
= strcmp(sortKeyA
, sortKeyB
);
2374 log_err("Error 7 in test 4\n");
2376 r
= strcmp(sortKeyAz
, sortKeyBz
);
2378 log_err("Error 8 in test 4\n");
2380 ucol_setStrength(coll
, UCOL_TERTIARY
);
2384 /* Test 5: Null characters in non-normal source strings.*/
2387 static const UChar strA
[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00};
2388 static const UChar strB
[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00};
2395 result
= ucol_strcoll(coll
, strA
, 6, strB
, 6);
2396 if (result
!= UCOL_GREATER
) {
2397 log_err("ERROR 1 in test 5\n");
2399 result
= ucol_strcoll(coll
, strA
, -1, strB
, -1);
2400 if (result
!= UCOL_EQUAL
) {
2401 log_err("ERROR 2 in test 5\n");
2404 ucol_getSortKey(coll
, strA
, 6, (uint8_t *)sortKeyA
, sizeof(sortKeyA
));
2405 ucol_getSortKey(coll
, strA
, -1, (uint8_t *)sortKeyAz
, sizeof(sortKeyAz
));
2406 ucol_getSortKey(coll
, strB
, 6, (uint8_t *)sortKeyB
, sizeof(sortKeyB
));
2407 ucol_getSortKey(coll
, strB
, -1, (uint8_t *)sortKeyBz
, sizeof(sortKeyBz
));
2409 r
= strcmp(sortKeyA
, sortKeyAz
);
2411 log_err("Error 3 in test 5\n");
2413 r
= strcmp(sortKeyA
, sortKeyB
);
2415 log_err("Error 4 in test 5\n");
2417 r
= strcmp(sortKeyAz
, sortKeyBz
);
2419 log_err("Error 5 in test 5\n");
2422 ucol_setStrength(coll
, UCOL_IDENTICAL
);
2423 ucol_getSortKey(coll
, strA
, 6, (uint8_t *)sortKeyA
, sizeof(sortKeyA
));
2424 ucol_getSortKey(coll
, strA
, -1, (uint8_t *)sortKeyAz
, sizeof(sortKeyAz
));
2425 ucol_getSortKey(coll
, strB
, 6, (uint8_t *)sortKeyB
, sizeof(sortKeyB
));
2426 ucol_getSortKey(coll
, strB
, -1, (uint8_t *)sortKeyBz
, sizeof(sortKeyBz
));
2428 r
= strcmp(sortKeyA
, sortKeyAz
);
2430 log_err("Error 6 in test 5\n");
2432 r
= strcmp(sortKeyA
, sortKeyB
);
2434 log_err("Error 7 in test 5\n");
2436 r
= strcmp(sortKeyAz
, sortKeyBz
);
2438 log_err("Error 8 in test 5\n");
2440 ucol_setStrength(coll
, UCOL_TERTIARY
);
2444 /* Test 6: Null character as base of a non-normal combining sequence.*/
2447 static const UChar strA
[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00};
2448 static const UChar strB
[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00};
2450 result
= ucol_strcoll(coll
, strA
, 5, strB
, 5);
2451 if (result
!= UCOL_LESS
) {
2452 log_err("Error 1 in test 6\n");
2454 result
= ucol_strcoll(coll
, strA
, -1, strB
, -1);
2455 if (result
!= UCOL_EQUAL
) {
2456 log_err("Error 2 in test 6\n");
2466 static void TestGetCaseBit(void) {
2467 static const char *caseBitData
[] = {
2468 "a", "A", "ch", "Ch", "CH",
2469 "\\uFF9E", "\\u0009"
2472 static const uint8_t results
[] = {
2473 UCOL_LOWER_CASE
, UCOL_UPPER_CASE
, UCOL_LOWER_CASE
, UCOL_MIXED_CASE
, UCOL_UPPER_CASE
,
2474 UCOL_UPPER_CASE
, UCOL_LOWER_CASE
2477 uint32_t i
, blen
= 0;
2479 UErrorCode status
= U_ZERO_ERROR
;
2480 UCollator
*UCA
= ucol_open("", &status
);
2483 for(i
= 0; i
<sizeof(results
)/sizeof(results
[0]); i
++) {
2484 blen
= u_unescape(caseBitData
[i
], b
, 256);
2485 res
= ucol_uprv_getCaseBits(UCA
, b
, blen
, &status
);
2486 if(results
[i
] != res
) {
2487 log_err("Expected case = %02X, got %02X for %04X\n", results
[i
], res
, b
[0]);
2493 static void TestHangulTailoring(void) {
2494 static const char *koreanData
[] = {
2495 "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53ef", "\\u5475",
2496 "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\\u67b7", "\\u67ef",
2497 "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\\u8857", "\\u8888",
2498 "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5",
2499 "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\\u659D", "\\u698E",
2500 "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\\u8B0C"
2504 "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 <<< \\u53ef <<< \\u5475 "
2505 "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef "
2506 "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 "
2507 "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 "
2508 "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E "
2509 "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C";
2512 UErrorCode status
= U_ZERO_ERROR
;
2513 UChar rlz
[2048] = { 0 };
2514 uint32_t rlen
= u_unescape(rules
, rlz
, 2048);
2516 UCollator
*coll
= ucol_openRules(rlz
, rlen
, UCOL_DEFAULT
, UCOL_DEFAULT
,NULL
, &status
);
2517 if(status
== U_FILE_ACCESS_ERROR
) {
2518 log_data_err("Is your data around?\n");
2520 } else if(U_FAILURE(status
)) {
2521 log_err("Error opening collator\n");
2525 log_verbose("Using start of korean rules\n");
2527 if(U_SUCCESS(status
)) {
2528 genericOrderingTest(coll
, koreanData
, sizeof(koreanData
)/sizeof(koreanData
[0]));
2530 log_err("Unable to open collator with rules %s\n", rules
);
2533 log_verbose("Setting jamoSpecial to TRUE and testing once more\n");
2534 ((UCATableHeader
*)coll
->image
)->jamoSpecial
= TRUE
; /* don't try this at home */
2535 genericOrderingTest(coll
, koreanData
, sizeof(koreanData
)/sizeof(koreanData
[0]));
2539 log_verbose("Using ko__LOTUS locale\n");
2540 genericLocaleStarter("ko__LOTUS", koreanData
, sizeof(koreanData
)/sizeof(koreanData
[0]));
2543 static void TestCompressOverlap(void) {
2546 UErrorCode status
= U_ZERO_ERROR
;
2553 coll
= ucol_open("", &status
);
2555 if (U_FAILURE(status
)) {
2556 log_err_status(status
, "Collator can't be created -> %s\n", u_errorName(status
));
2559 while (count
< 149) {
2560 secstr
[count
] = 0x0020; /* [06, 05, 05] */
2561 tertstr
[count
] = 0x0020;
2565 /* top down compression ----------------------------------- */
2566 secstr
[count
] = 0x0332; /* [, 87, 05] */
2567 tertstr
[count
] = 0x3000; /* [06, 05, 07] */
2569 /* no compression secstr should have 150 secondary bytes, tertstr should
2570 have 150 tertiary bytes.
2571 with correct overlapping compression, secstr should have 4 secondary
2572 bytes, tertstr should have > 2 tertiary bytes */
2573 resultlen
= ucol_getSortKey(coll
, secstr
, 150, (uint8_t *)result
, 250);
2574 tempptr
= uprv_strchr(result
, 1) + 1;
2575 while (*(tempptr
+ 1) != 1) {
2576 /* the last secondary collation element is not checked since it is not
2577 part of the compression */
2578 if (*tempptr
< UCOL_COMMON_TOP2
- UCOL_TOP_COUNT2
) {
2579 log_err("Secondary compression overlapped\n");
2584 /* tertiary top/bottom/common for en_US is similar to the secondary
2585 top/bottom/common */
2586 resultlen
= ucol_getSortKey(coll
, tertstr
, 150, (uint8_t *)result
, 250);
2587 tempptr
= uprv_strrchr(result
, 1) + 1;
2588 while (*(tempptr
+ 1) != 0) {
2589 /* the last secondary collation element is not checked since it is not
2590 part of the compression */
2591 if (*tempptr
< coll
->tertiaryTop
- coll
->tertiaryTopCount
) {
2592 log_err("Tertiary compression overlapped\n");
2597 /* bottom up compression ------------------------------------- */
2600 resultlen
= ucol_getSortKey(coll
, secstr
, 150, (uint8_t *)result
, 250);
2601 tempptr
= uprv_strchr(result
, 1) + 1;
2602 while (*(tempptr
+ 1) != 1) {
2603 /* the last secondary collation element is not checked since it is not
2604 part of the compression */
2605 if (*tempptr
> UCOL_COMMON_BOT2
+ UCOL_BOT_COUNT2
) {
2606 log_err("Secondary compression overlapped\n");
2611 /* tertiary top/bottom/common for en_US is similar to the secondary
2612 top/bottom/common */
2613 resultlen
= ucol_getSortKey(coll
, tertstr
, 150, (uint8_t *)result
, 250);
2614 tempptr
= uprv_strrchr(result
, 1) + 1;
2615 while (*(tempptr
+ 1) != 0) {
2616 /* the last secondary collation element is not checked since it is not
2617 part of the compression */
2618 if (*tempptr
> coll
->tertiaryBottom
+ coll
->tertiaryBottomCount
) {
2619 log_err("Tertiary compression overlapped\n");
2627 static void TestCyrillicTailoring(void) {
2628 static const char *test
[] = {
2634 /* Russian overrides contractions, so this test is not valid anymore */
2635 /*genericLocaleStarter("ru", test, 3);*/
2637 genericLocaleStarter("root", test
, 3);
2638 genericRulesStarter("&\\u0410 = \\u0410", test
, 3);
2639 genericRulesStarter("&Z < \\u0410", test
, 3);
2640 genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test
, 3);
2641 genericRulesStarter("&Z < \\u0410 < \\u04d0", test
, 3);
2642 genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test
, 3);
2643 genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test
, 3);
2646 static void TestSuppressContractions(void) {
2648 static const char *testNoCont2
[] = {
2653 static const char *testNoCont
[] = {
2656 "\\uFF21\\u0410\\u0302"
2659 genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont
, 3);
2660 genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2
, 3);
2663 static void TestContraction(void) {
2664 const static char *testrules
[] = {
2666 "&A = A\\u0306/\\u0306",
2669 const static UChar testdata
[][2] = {
2670 {0x0041 /* 'A' */, 0x0042 /* 'B' */},
2671 {0x0041 /* 'A' */, 0x0306 /* combining breve */},
2672 {0x0063 /* 'c' */, 0x0068 /* 'h' */}
2674 const static UChar testdata2
[][2] = {
2675 {0x0063 /* 'c' */, 0x0067 /* 'g' */},
2676 {0x0063 /* 'c' */, 0x0068 /* 'h' */},
2677 {0x0063 /* 'c' */, 0x006C /* 'l' */}
2679 const static char *testrules3
[] = {
2680 "&z < xyz &xyzw << B",
2681 "&z < xyz &xyz << B / w",
2682 "&z < ch &achm << B",
2683 "&z < ch &a << B / chm",
2684 "&\\ud800\\udc00w << B",
2685 "&\\ud800\\udc00 << B / w",
2686 "&a\\ud800\\udc00m << B",
2687 "&a << B / \\ud800\\udc00m",
2690 UErrorCode status
= U_ZERO_ERROR
;
2692 UChar rule
[256] = {0};
2696 for (i
= 0; i
< sizeof(testrules
) / sizeof(testrules
[0]); i
++) {
2697 UCollationElements
*iter1
;
2699 log_verbose("Rule %s for testing\n", testrules
[i
]);
2700 rlen
= u_unescape(testrules
[i
], rule
, 32);
2701 coll
= ucol_openRules(rule
, rlen
, UCOL_ON
, UCOL_TERTIARY
,NULL
, &status
);
2702 if (U_FAILURE(status
)) {
2703 log_err_status(status
, "Collator creation failed %s -> %s\n", testrules
[i
], u_errorName(status
));
2706 iter1
= ucol_openElements(coll
, testdata
[i
], 2, &status
);
2707 if (U_FAILURE(status
)) {
2708 log_err("Collation iterator creation failed\n");
2712 UCollationElements
*iter2
= ucol_openElements(coll
,
2716 if (U_FAILURE(status
)) {
2717 log_err("Collation iterator creation failed\n");
2720 ce
= ucol_next(iter2
, &status
);
2721 while (ce
!= UCOL_NULLORDER
) {
2722 if ((uint32_t)ucol_next(iter1
, &status
) != ce
) {
2723 log_err("Collation elements in contraction split does not match\n");
2726 ce
= ucol_next(iter2
, &status
);
2729 ucol_closeElements(iter2
);
2731 if (ucol_next(iter1
, &status
) != UCOL_NULLORDER
) {
2732 log_err("Collation elements not exhausted\n");
2735 ucol_closeElements(iter1
);
2739 rlen
= u_unescape("& a < b < c < ch < d & c = ch / h", rule
, 256);
2740 coll
= ucol_openRules(rule
, rlen
, UCOL_ON
, UCOL_TERTIARY
,NULL
, &status
);
2741 if (ucol_strcoll(coll
, testdata2
[0], 2, testdata2
[1], 2) != UCOL_LESS
) {
2742 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
2743 testdata2
[0][0], testdata2
[0][1], testdata2
[1][0],
2747 if (ucol_strcoll(coll
, testdata2
[1], 2, testdata2
[2], 2) != UCOL_LESS
) {
2748 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
2749 testdata2
[1][0], testdata2
[1][1], testdata2
[2][0],
2755 for (i
= 0; i
< sizeof(testrules3
) / sizeof(testrules3
[0]); i
+= 2) {
2758 UCollationElements
*iter1
,
2760 UChar ch
= 0x0042 /* 'B' */;
2762 rlen
= u_unescape(testrules3
[i
], rule
, 32);
2763 coll1
= ucol_openRules(rule
, rlen
, UCOL_ON
, UCOL_TERTIARY
,NULL
, &status
);
2764 rlen
= u_unescape(testrules3
[i
+ 1], rule
, 32);
2765 coll2
= ucol_openRules(rule
, rlen
, UCOL_ON
, UCOL_TERTIARY
,NULL
, &status
);
2766 if (U_FAILURE(status
)) {
2767 log_err("Collator creation failed %s\n", testrules
[i
]);
2770 iter1
= ucol_openElements(coll1
, &ch
, 1, &status
);
2771 iter2
= ucol_openElements(coll2
, &ch
, 1, &status
);
2772 if (U_FAILURE(status
)) {
2773 log_err("Collation iterator creation failed\n");
2776 ce
= ucol_next(iter1
, &status
);
2777 if (U_FAILURE(status
)) {
2778 log_err("Retrieving ces failed\n");
2781 while (ce
!= UCOL_NULLORDER
) {
2782 if (ce
!= (uint32_t)ucol_next(iter2
, &status
)) {
2783 log_err("CEs does not match\n");
2786 ce
= ucol_next(iter1
, &status
);
2787 if (U_FAILURE(status
)) {
2788 log_err("Retrieving ces failed\n");
2792 if (ucol_next(iter2
, &status
) != UCOL_NULLORDER
) {
2793 log_err("CEs not exhausted\n");
2796 ucol_closeElements(iter1
);
2797 ucol_closeElements(iter2
);
2803 static void TestExpansion(void) {
2804 const static char *testrules
[] = {
2805 "&J << K / B & K << M",
2808 const static UChar testdata
[][3] = {
2809 {0x004A /*'J'*/, 0x0041 /*'A'*/, 0},
2810 {0x004D /*'M'*/, 0x0041 /*'A'*/, 0},
2811 {0x004B /*'K'*/, 0x0041 /*'A'*/, 0},
2812 {0x004B /*'K'*/, 0x0043 /*'C'*/, 0},
2813 {0x004A /*'J'*/, 0x0043 /*'C'*/, 0},
2814 {0x004D /*'M'*/, 0x0043 /*'C'*/, 0}
2817 UErrorCode status
= U_ZERO_ERROR
;
2819 UChar rule
[256] = {0};
2823 for (i
= 0; i
< sizeof(testrules
) / sizeof(testrules
[0]); i
++) {
2825 log_verbose("Rule %s for testing\n", testrules
[i
]);
2826 rlen
= u_unescape(testrules
[i
], rule
, 32);
2827 coll
= ucol_openRules(rule
, rlen
, UCOL_ON
, UCOL_TERTIARY
,NULL
, &status
);
2828 if (U_FAILURE(status
)) {
2829 log_err_status(status
, "Collator creation failed %s -> %s\n", testrules
[i
], u_errorName(status
));
2833 for (j
= 0; j
< 5; j
++) {
2834 doTest(coll
, testdata
[j
], testdata
[j
+ 1], UCOL_LESS
);
2841 /* this test tests the current limitations of the engine */
2842 /* it always fail, so it is disabled by default */
2843 static void TestLimitations(void) {
2844 /* recursive expansions */
2846 static const char *rule
= "&a=b/c&d=c/e";
2847 static const char *tlimit01
[] = {"add","b","adf"};
2848 static const char *tlimit02
[] = {"aa","b","af"};
2849 log_verbose("recursive expansions\n");
2850 genericRulesStarter(rule
, tlimit01
, sizeof(tlimit01
)/sizeof(tlimit01
[0]));
2851 genericRulesStarter(rule
, tlimit02
, sizeof(tlimit02
)/sizeof(tlimit02
[0]));
2853 /* contractions spanning expansions */
2855 static const char *rule
= "&a<<<c/e&g<<<eh";
2856 static const char *tlimit01
[] = {"ad","c","af","f","ch","h"};
2857 static const char *tlimit02
[] = {"ad","c","ch","af","f","h"};
2858 log_verbose("contractions spanning expansions\n");
2859 genericRulesStarter(rule
, tlimit01
, sizeof(tlimit01
)/sizeof(tlimit01
[0]));
2860 genericRulesStarter(rule
, tlimit02
, sizeof(tlimit02
)/sizeof(tlimit02
[0]));
2862 /* normalization: nulls in contractions */
2864 static const char *rule
= "&a<<<\\u0000\\u0302";
2865 static const char *tlimit01
[] = {"a","\\u0000\\u0302\\u0327"};
2866 static const char *tlimit02
[] = {"\\u0000\\u0302\\u0327","a"};
2867 static const UColAttribute att
[] = { UCOL_DECOMPOSITION_MODE
};
2868 static const UColAttributeValue valOn
[] = { UCOL_ON
};
2869 static const UColAttributeValue valOff
[] = { UCOL_OFF
};
2871 log_verbose("NULL in contractions\n");
2872 genericRulesStarterWithOptions(rule
, tlimit01
, 2, att
, valOn
, 1);
2873 genericRulesStarterWithOptions(rule
, tlimit02
, 2, att
, valOn
, 1);
2874 genericRulesStarterWithOptions(rule
, tlimit01
, 2, att
, valOff
, 1);
2875 genericRulesStarterWithOptions(rule
, tlimit02
, 2, att
, valOff
, 1);
2878 /* normalization: contractions spanning normalization */
2880 static const char *rule
= "&a<<<\\u0000\\u0302";
2881 static const char *tlimit01
[] = {"a","\\u0000\\u0302\\u0327"};
2882 static const char *tlimit02
[] = {"\\u0000\\u0302\\u0327","a"};
2883 static const UColAttribute att
[] = { UCOL_DECOMPOSITION_MODE
};
2884 static const UColAttributeValue valOn
[] = { UCOL_ON
};
2885 static const UColAttributeValue valOff
[] = { UCOL_OFF
};
2887 log_verbose("contractions spanning normalization\n");
2888 genericRulesStarterWithOptions(rule
, tlimit01
, 2, att
, valOn
, 1);
2889 genericRulesStarterWithOptions(rule
, tlimit02
, 2, att
, valOn
, 1);
2890 genericRulesStarterWithOptions(rule
, tlimit01
, 2, att
, valOff
, 1);
2891 genericRulesStarterWithOptions(rule
, tlimit02
, 2, att
, valOff
, 1);
2896 /*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/
2897 static const char *rule
= "&\\u2010<x<[variable top]=z";
2898 /*static const char *rule3 = "&' '<x<[variable top]=z";*/
2899 static const char *tlimit01
[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" };
2900 static const char *tlimit02
[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"};
2901 static const char *tlimit03
[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" };
2902 static const UColAttribute att
[] = { UCOL_ALTERNATE_HANDLING
, UCOL_STRENGTH
};
2903 static const UColAttributeValue valOn
[] = { UCOL_SHIFTED
, UCOL_QUATERNARY
};
2904 static const UColAttributeValue valOff
[] = { UCOL_NON_IGNORABLE
, UCOL_TERTIARY
};
2906 log_verbose("variable top\n");
2907 genericRulesStarterWithOptions(rule
, tlimit03
, sizeof(tlimit03
)/sizeof(tlimit03
[0]), att
, valOn
, sizeof(att
)/sizeof(att
[0]));
2908 genericRulesStarterWithOptions(rule
, tlimit01
, sizeof(tlimit01
)/sizeof(tlimit01
[0]), att
, valOn
, sizeof(att
)/sizeof(att
[0]));
2909 genericRulesStarterWithOptions(rule
, tlimit02
, sizeof(tlimit02
)/sizeof(tlimit02
[0]), att
, valOn
, sizeof(att
)/sizeof(att
[0]));
2910 genericRulesStarterWithOptions(rule
, tlimit01
, sizeof(tlimit01
)/sizeof(tlimit01
[0]), att
, valOff
, sizeof(att
)/sizeof(att
[0]));
2911 genericRulesStarterWithOptions(rule
, tlimit02
, sizeof(tlimit02
)/sizeof(tlimit02
[0]), att
, valOff
, sizeof(att
)/sizeof(att
[0]));
2916 static const char *rule
= "&c<ch<<<cH<<<Ch<<<CH";
2917 static const char *tlimit01
[] = {"c","CH","Ch","cH","ch"};
2918 static const char *tlimit02
[] = {"c","CH","cH","Ch","ch"};
2919 static const UColAttribute att
[] = { UCOL_CASE_FIRST
};
2920 static const UColAttributeValue valOn
[] = { UCOL_UPPER_FIRST
};
2921 /*static const UColAttributeValue valOff[] = { UCOL_OFF};*/
2922 log_verbose("case level\n");
2923 genericRulesStarterWithOptions(rule
, tlimit01
, sizeof(tlimit01
)/sizeof(tlimit01
[0]), att
, valOn
, sizeof(att
)/sizeof(att
[0]));
2924 genericRulesStarterWithOptions(rule
, tlimit02
, sizeof(tlimit02
)/sizeof(tlimit02
[0]), att
, valOn
, sizeof(att
)/sizeof(att
[0]));
2925 /*genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
2926 /*genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
2932 static void TestBocsuCoverage(void) {
2933 UErrorCode status
= U_ZERO_ERROR
;
2934 const char *testString
= "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u0041";
2935 UChar test
[256] = {0};
2936 uint32_t tlen
= u_unescape(testString
, test
, 32);
2937 uint8_t key
[256] = {0};
2940 UCollator
*coll
= ucol_open("", &status
);
2941 if(U_SUCCESS(status
)) {
2942 ucol_setAttribute(coll
, UCOL_STRENGTH
, UCOL_IDENTICAL
, &status
);
2944 klen
= ucol_getSortKey(coll
, test
, tlen
, key
, 256);
2948 log_data_err("Couldn't open UCA\n");
2952 static void TestVariableTopSetting(void) {
2953 UErrorCode status
= U_ZERO_ERROR
;
2954 const UChar
*current
= NULL
;
2955 uint32_t varTopOriginal
= 0, varTop1
, varTop2
;
2956 UCollator
*coll
= ucol_open("", &status
);
2957 if(U_SUCCESS(status
)) {
2959 uint32_t strength
= 0;
2961 uint32_t chOffset
= 0;
2963 uint32_t exOffset
= 0;
2965 uint32_t oldChOffset
= 0;
2966 uint32_t oldChLen
= 0;
2967 uint32_t oldExOffset
= 0;
2968 uint32_t oldExLen
= 0;
2969 uint32_t prefixOffset
= 0;
2970 uint32_t prefixLen
= 0;
2972 UBool startOfRules
= TRUE
;
2973 UColTokenParser src
;
2976 UChar
*rulesCopy
= NULL
;
2979 UCollationResult result
;
2981 UChar first
[256] = { 0 };
2982 UChar second
[256] = { 0 };
2983 UParseError parseError
;
2984 int32_t myQ
= getTestOption(QUICK_OPTION
);
2986 uprv_memset(&src
, 0, sizeof(UColTokenParser
));
2990 if(getTestOption(QUICK_OPTION
) <= 0) {
2991 setTestOption(QUICK_OPTION
, 1);
2994 /* this test will fail when normalization is turned on */
2995 /* therefore we always turn off exhaustive mode for it */
2997 log_verbose("Slide variable top over UCARules\n");
2998 rulesLen
= ucol_getRulesEx(coll
, UCOL_FULL_RULES
, rulesCopy
, 0);
2999 rulesCopy
= (UChar
*)uprv_malloc((rulesLen
+UCOL_TOK_EXTRA_RULE_SPACE_SIZE
)*sizeof(UChar
));
3000 rulesLen
= ucol_getRulesEx(coll
, UCOL_FULL_RULES
, rulesCopy
, rulesLen
+UCOL_TOK_EXTRA_RULE_SPACE_SIZE
);
3002 if(U_SUCCESS(status
) && rulesLen
> 0) {
3003 ucol_setAttribute(coll
, UCOL_ALTERNATE_HANDLING
, UCOL_SHIFTED
, &status
);
3004 src
.current
= src
.source
= rulesCopy
;
3005 src
.end
= rulesCopy
+rulesLen
;
3006 src
.extraCurrent
= src
.end
;
3007 src
.extraEnd
= src
.end
+UCOL_TOK_EXTRA_RULE_SPACE_SIZE
;
3009 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
3010 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
3011 while ((current
= ucol_tok_parseNextToken(&src
, startOfRules
, &parseError
,&status
)) != NULL
) {
3012 strength
= src
.parsedToken
.strength
;
3013 chOffset
= src
.parsedToken
.charsOffset
;
3014 chLen
= src
.parsedToken
.charsLen
;
3015 exOffset
= src
.parsedToken
.extensionOffset
;
3016 exLen
= src
.parsedToken
.extensionLen
;
3017 prefixOffset
= src
.parsedToken
.prefixOffset
;
3018 prefixLen
= src
.parsedToken
.prefixLen
;
3019 specs
= src
.parsedToken
.flags
;
3021 startOfRules
= FALSE
;
3023 log_verbose("%04X %d ", *(src
.source
+chOffset
), chLen
);
3025 if(strength
== UCOL_PRIMARY
) {
3026 status
= U_ZERO_ERROR
;
3027 varTopOriginal
= ucol_getVariableTop(coll
, &status
);
3028 varTop1
= ucol_setVariableTop(coll
, src
.source
+oldChOffset
, oldChLen
, &status
);
3029 if(U_FAILURE(status
)) {
3033 uint32_t CE
= UCOL_NO_MORE_CES
;
3035 /* before we start screaming, let's see if there is a problem with the rules */
3036 UErrorCode collIterateStatus
= U_ZERO_ERROR
;
3037 collIterate
*s
= uprv_new_collIterate(&collIterateStatus
);
3038 uprv_init_collIterate(coll
, src
.source
+oldChOffset
, oldChLen
, s
, &collIterateStatus
);
3040 CE
= ucol_getNextCE(coll
, s
, &status
);
3042 for(i
= 0; i
< oldChLen
; i
++) {
3043 j
= sprintf(buf
, "%04X ", *(src
.source
+oldChOffset
+i
));
3046 if(status
== U_PRIMARY_TOO_LONG_ERROR
) {
3047 log_verbose("= Expected failure for %s =", buffer
);
3049 if(uprv_collIterateAtEnd(s
)) {
3050 log_err("Unexpected failure setting variable top at offset %d. Error %s. Codepoints: %s\n",
3051 oldChOffset
, u_errorName(status
), buffer
);
3053 log_verbose("There is a goofy contraction in UCA rules that does not appear in the fractional UCA. Codepoints: %s\n",
3057 uprv_delete_collIterate(s
);
3059 varTop2
= ucol_getVariableTop(coll
, &status
);
3060 if((varTop1
& 0xFFFF0000) != (varTop2
& 0xFFFF0000)) {
3061 log_err("cannot retrieve set varTop value!\n");
3065 if((varTop1
& 0xFFFF0000) > 0 && oldExLen
== 0) {
3067 u_strncpy(first
, src
.source
+oldChOffset
, oldChLen
);
3068 u_strncpy(first
+oldChLen
, src
.source
+chOffset
, chLen
);
3069 u_strncpy(first
+oldChLen
+chLen
, src
.source
+oldChOffset
, oldChLen
);
3070 first
[2*oldChLen
+chLen
] = 0;
3073 u_strncpy(second
, src
.source
+chOffset
, chLen
);
3075 } else { /* This is skipped momentarily, but should work once UCARules are fully UCA conformant */
3076 u_strncpy(second
, src
.source
+oldExOffset
, oldExLen
);
3077 u_strncpy(second
+oldChLen
, src
.source
+chOffset
, chLen
);
3078 u_strncpy(second
+oldChLen
+chLen
, src
.source
+oldExOffset
, oldExLen
);
3079 second
[2*oldExLen
+chLen
] = 0;
3081 result
= ucol_strcoll(coll
, first
, -1, second
, -1);
3082 if(result
== UCOL_EQUAL
) {
3083 doTest(coll
, first
, second
, UCOL_EQUAL
);
3085 log_verbose("Suspicious strcoll result for %04X and %04X\n", *(src
.source
+oldChOffset
), *(src
.source
+chOffset
));
3089 if(strength
!= UCOL_TOK_RESET
) {
3090 oldChOffset
= chOffset
;
3092 oldExOffset
= exOffset
;
3096 status
= U_ZERO_ERROR
;
3099 log_err("Unexpected failure getting rules %s\n", u_errorName(status
));
3102 if (U_FAILURE(status
)) {
3103 log_err("Error parsing rules %s\n", u_errorName(status
));
3106 status
= U_ZERO_ERROR
;
3109 setTestOption(QUICK_OPTION
, myQ
);
3111 log_verbose("Testing setting variable top to contractions\n");
3113 /* uint32_t tailoredCE = UCOL_NOT_FOUND; */
3114 /*UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->UCAConsts+sizeof(UCAConstants));*/
3115 UChar
*conts
= (UChar
*)((uint8_t *)coll
->image
+ coll
->image
->contractionUCACombos
);
3116 while(*conts
!= 0) {
3117 if((*(conts
+2) == 0) || (*(conts
+1)==0)) { /* contracts or pre-context contractions */
3118 varTop1
= ucol_setVariableTop(coll
, conts
, -1, &status
);
3120 varTop1
= ucol_setVariableTop(coll
, conts
, 3, &status
);
3122 if(U_FAILURE(status
)) {
3123 if(status
== U_PRIMARY_TOO_LONG_ERROR
) {
3124 /* ucol_setVariableTop() is documented to not accept 3-byte primaries,
3125 * therefore it is not an error when it complains about them. */
3126 log_verbose("Couldn't set variable top to a contraction %04X %04X %04X - U_PRIMARY_TOO_LONG_ERROR\n",
3127 *conts
, *(conts
+1), *(conts
+2));
3129 log_err("Couldn't set variable top to a contraction %04X %04X %04X - %s\n",
3130 *conts
, *(conts
+1), *(conts
+2), u_errorName(status
));
3132 status
= U_ZERO_ERROR
;
3137 status
= U_ZERO_ERROR
;
3143 ucol_setVariableTop(coll
, first
, -1, &status
);
3145 if(U_SUCCESS(status
)) {
3146 log_err("Invalid contraction succeded in setting variable top!\n");
3151 log_verbose("Test restoring variable top\n");
3153 status
= U_ZERO_ERROR
;
3154 ucol_restoreVariableTop(coll
, varTopOriginal
, &status
);
3155 if(varTopOriginal
!= ucol_getVariableTop(coll
, &status
)) {
3156 log_err("Couldn't restore old variable top\n");
3159 log_verbose("Testing calling with error set\n");
3161 status
= U_INTERNAL_PROGRAM_ERROR
;
3162 varTop1
= ucol_setVariableTop(coll
, first
, 1, &status
);
3163 varTop2
= ucol_getVariableTop(coll
, &status
);
3164 ucol_restoreVariableTop(coll
, varTop2
, &status
);
3165 varTop1
= ucol_setVariableTop(NULL
, first
, 1, &status
);
3166 varTop2
= ucol_getVariableTop(NULL
, &status
);
3167 ucol_restoreVariableTop(NULL
, varTop2
, &status
);
3168 if(status
!= U_INTERNAL_PROGRAM_ERROR
) {
3169 log_err("Bad reaction to passed error!\n");
3171 uprv_free(src
.source
);
3174 log_data_err("Couldn't open UCA collator\n");
3179 static void TestNonChars(void) {
3180 static const char *test
[] = {
3181 "\\u0000", /* ignorable */
3182 "\\uFFFE", /* special merge-sort character with minimum non-ignorable weights */
3183 "\\uFDD0", "\\uFDEF",
3184 "\\U0001FFFE", "\\U0001FFFF", /* UCA 6.0: noncharacters are treated like unassigned, */
3185 "\\U0002FFFE", "\\U0002FFFF", /* not like ignorable. */
3186 "\\U0003FFFE", "\\U0003FFFF",
3187 "\\U0004FFFE", "\\U0004FFFF",
3188 "\\U0005FFFE", "\\U0005FFFF",
3189 "\\U0006FFFE", "\\U0006FFFF",
3190 "\\U0007FFFE", "\\U0007FFFF",
3191 "\\U0008FFFE", "\\U0008FFFF",
3192 "\\U0009FFFE", "\\U0009FFFF",
3193 "\\U000AFFFE", "\\U000AFFFF",
3194 "\\U000BFFFE", "\\U000BFFFF",
3195 "\\U000CFFFE", "\\U000CFFFF",
3196 "\\U000DFFFE", "\\U000DFFFF",
3197 "\\U000EFFFE", "\\U000EFFFF",
3198 "\\U000FFFFE", "\\U000FFFFF",
3199 "\\U0010FFFE", "\\U0010FFFF",
3200 "\\uFFFF" /* special character with maximum primary weight */
3202 UErrorCode status
= U_ZERO_ERROR
;
3203 UCollator
*coll
= ucol_open("en_US", &status
);
3205 log_verbose("Test non characters\n");
3207 if(U_SUCCESS(status
)) {
3208 genericOrderingTestWithResult(coll
, test
, 35, UCOL_LESS
);
3210 log_err_status(status
, "Unable to open collator\n");
3216 static void TestExtremeCompression(void) {
3217 static char *test
[4];
3218 int32_t j
= 0, i
= 0;
3220 for(i
= 0; i
<4; i
++) {
3221 test
[i
] = (char *)malloc(2048*sizeof(char));
3224 for(j
= 20; j
< 500; j
++) {
3225 for(i
= 0; i
<4; i
++) {
3226 uprv_memset(test
[i
], 'a', (j
-1)*sizeof(char));
3227 test
[i
][j
-1] = (char)('a'+i
);
3230 genericLocaleStarter("en_US", (const char **)test
, 4);
3234 for(i
= 0; i
<4; i
++) {
3240 static void TestExtremeCompression(void) {
3241 static char *test
[4];
3242 int32_t j
= 0, i
= 0;
3243 UErrorCode status
= U_ZERO_ERROR
;
3244 UCollator
*coll
= ucol_open("en_US", status
);
3245 for(i
= 0; i
<4; i
++) {
3246 test
[i
] = (char *)malloc(2048*sizeof(char));
3248 for(j
= 10; j
< 2048; j
++) {
3249 for(i
= 0; i
<4; i
++) {
3250 uprv_memset(test
[i
], 'a', (j
-2)*sizeof(char));
3251 test
[i
][j
-1] = (char)('a'+i
);
3255 genericLocaleStarter("en_US", (const char **)test
, 4);
3257 for(j
= 10; j
< 2048; j
++) {
3258 for(i
= 0; i
<1; i
++) {
3259 uprv_memset(test
[i
], 'a', (j
-1)*sizeof(char));
3263 for(i
= 0; i
<4; i
++) {
3269 static void TestSurrogates(void) {
3270 static const char *test
[] = {
3271 "z","\\ud900\\udc25", "\\ud805\\udc50",
3272 "\\ud800\\udc00y", "\\ud800\\udc00r",
3273 "\\ud800\\udc00f", "\\ud800\\udc00",
3274 "\\ud800\\udc00c", "\\ud800\\udc00b",
3275 "\\ud800\\udc00fa", "\\ud800\\udc00fb",
3280 static const char *rule
=
3281 "&z < \\ud900\\udc25 < \\ud805\\udc50"
3282 "< \\ud800\\udc00y < \\ud800\\udc00r"
3283 "< \\ud800\\udc00f << \\ud800\\udc00"
3284 "< \\ud800\\udc00fa << \\ud800\\udc00fb"
3285 "< \\ud800\\udc00a < c < b" ;
3287 genericRulesStarter(rule
, test
, 14);
3290 /* This is a test for prefix implementation, used by JIS X 4061 collation rules */
3291 static void TestPrefix(void) {
3294 static const struct {
3296 const char *data
[50];
3306 "&z<<<\\ud900\\udc25|a",
3307 {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 },
3311 for(i
= 0; i
<(sizeof(tests
)/sizeof(tests
[0])); i
++) {
3312 genericRulesStarter(tests
[i
].rules
, tests
[i
].data
, tests
[i
].len
);
3316 /* This test uses data suplied by Masashiko Maedera to test the implementation */
3317 /* JIS X 4061 collation order implementation */
3318 static void TestNewJapanese(void) {
3320 static const char * const test1
[] = {
3321 "\\u30b7\\u30e3\\u30fc\\u30ec",
3322 "\\u30b7\\u30e3\\u30a4",
3323 "\\u30b7\\u30e4\\u30a3",
3324 "\\u30b7\\u30e3\\u30ec",
3325 "\\u3061\\u3087\\u3053",
3326 "\\u3061\\u3088\\u3053",
3327 "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8",
3328 "\\u3066\\u30fc\\u305f",
3329 "\\u30c6\\u30fc\\u30bf",
3330 "\\u30c6\\u30a7\\u30bf",
3331 "\\u3066\\u3048\\u305f",
3332 "\\u3067\\u30fc\\u305f",
3333 "\\u30c7\\u30fc\\u30bf",
3334 "\\u30c7\\u30a7\\u30bf",
3335 "\\u3067\\u3048\\u305f",
3336 "\\u3066\\u30fc\\u305f\\u30fc",
3337 "\\u30c6\\u30fc\\u30bf\\u30a1",
3338 "\\u30c6\\u30a7\\u30bf\\u30fc",
3339 "\\u3066\\u3047\\u305f\\u3041",
3340 "\\u3066\\u3048\\u305f\\u30fc",
3341 "\\u3067\\u30fc\\u305f\\u30fc",
3342 "\\u30c7\\u30fc\\u30bf\\u30a1",
3343 "\\u3067\\u30a7\\u305f\\u30a1",
3344 "\\u30c7\\u3047\\u30bf\\u3041",
3345 "\\u30c7\\u30a8\\u30bf\\u30a2",
3347 "\\u3073\\u3085\\u3042",
3348 "\\u3074\\u3085\\u3042",
3349 "\\u3073\\u3085\\u3042\\u30fc",
3350 "\\u30d3\\u30e5\\u30a2\\u30fc",
3351 "\\u3074\\u3085\\u3042\\u30fc",
3352 "\\u30d4\\u30e5\\u30a2\\u30fc",
3353 "\\u30d2\\u30e5\\u30a6",
3354 "\\u30d2\\u30e6\\u30a6",
3355 "\\u30d4\\u30e5\\u30a6\\u30a2",
3356 "\\u3073\\u3085\\u30fc\\u3042\\u30fc",
3357 "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc",
3358 "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc",
3359 "\\u3072\\u3085\\u3093",
3360 "\\u3074\\u3085\\u3093",
3361 "\\u3075\\u30fc\\u308a",
3362 "\\u30d5\\u30fc\\u30ea",
3363 "\\u3075\\u3045\\u308a",
3364 "\\u3075\\u30a5\\u308a",
3365 "\\u3075\\u30a5\\u30ea",
3366 "\\u30d5\\u30a6\\u30ea",
3367 "\\u3076\\u30fc\\u308a",
3368 "\\u30d6\\u30fc\\u30ea",
3369 "\\u3076\\u3045\\u308a",
3370 "\\u30d6\\u30a5\\u308a",
3371 "\\u3077\\u3046\\u308a",
3372 "\\u30d7\\u30a6\\u30ea",
3373 "\\u3075\\u30fc\\u308a\\u30fc",
3374 "\\u30d5\\u30a5\\u30ea\\u30fc",
3375 "\\u3075\\u30a5\\u308a\\u30a3",
3376 "\\u30d5\\u3045\\u308a\\u3043",
3377 "\\u30d5\\u30a6\\u30ea\\u30fc",
3378 "\\u3075\\u3046\\u308a\\u3043",
3379 "\\u30d6\\u30a6\\u30ea\\u30a4",
3380 "\\u3077\\u30fc\\u308a\\u30fc",
3381 "\\u3077\\u30a5\\u308a\\u30a4",
3382 "\\u3077\\u3046\\u308a\\u30fc",
3383 "\\u30d7\\u30a6\\u30ea\\u30a4",
3399 static const char *test2
[] = {
3400 "\\u306f\\u309d", /* H\\u309d */
3401 "\\u30cf\\u30fd", /* K\\u30fd */
3402 "\\u306f\\u306f", /* HH */
3403 "\\u306f\\u30cf", /* HK */
3404 "\\u30cf\\u30cf", /* KK */
3405 "\\u306f\\u309e", /* H\\u309e */
3406 "\\u30cf\\u30fe", /* K\\u30fe */
3407 "\\u306f\\u3070", /* HH\\u309b */
3408 "\\u30cf\\u30d0", /* KK\\u309b */
3409 "\\u306f\\u3071", /* HH\\u309c */
3410 "\\u30cf\\u3071", /* KH\\u309c */
3411 "\\u30cf\\u30d1", /* KK\\u309c */
3412 "\\u3070\\u309d", /* H\\u309b\\u309d */
3413 "\\u30d0\\u30fd", /* K\\u309b\\u30fd */
3414 "\\u3070\\u306f", /* H\\u309bH */
3415 "\\u30d0\\u30cf", /* K\\u309bK */
3416 "\\u3070\\u309e", /* H\\u309b\\u309e */
3417 "\\u30d0\\u30fe", /* K\\u309b\\u30fe */
3418 "\\u3070\\u3070", /* H\\u309bH\\u309b */
3419 "\\u30d0\\u3070", /* K\\u309bH\\u309b */
3420 "\\u30d0\\u30d0", /* K\\u309bK\\u309b */
3421 "\\u3070\\u3071", /* H\\u309bH\\u309c */
3422 "\\u30d0\\u30d1", /* K\\u309bK\\u309c */
3423 "\\u3071\\u309d", /* H\\u309c\\u309d */
3424 "\\u30d1\\u30fd", /* K\\u309c\\u30fd */
3425 "\\u3071\\u306f", /* H\\u309cH */
3426 "\\u30d1\\u30cf", /* K\\u309cK */
3427 "\\u3071\\u3070", /* H\\u309cH\\u309b */
3428 "\\u3071\\u30d0", /* H\\u309cK\\u309b */
3429 "\\u30d1\\u30d0", /* K\\u309cK\\u309b */
3430 "\\u3071\\u3071", /* H\\u309cH\\u309c */
3431 "\\u30d1\\u30d1", /* K\\u309cK\\u309c */
3434 static const char *test3[] = {
3462 "\\u30b7\\u30e3\\u30fc\\u30ec",
3465 static const UColAttribute att
[] = { UCOL_STRENGTH
};
3466 static const UColAttributeValue val
[] = { UCOL_QUATERNARY
};
3468 static const UColAttribute attShifted
[] = { UCOL_STRENGTH
, UCOL_ALTERNATE_HANDLING
};
3469 static const UColAttributeValue valShifted
[] = { UCOL_QUATERNARY
, UCOL_SHIFTED
};
3471 genericLocaleStarterWithOptions("ja", test1
, sizeof(test1
)/sizeof(test1
[0]), att
, val
, 1);
3472 genericLocaleStarterWithOptions("ja", test2
, sizeof(test2
)/sizeof(test2
[0]), att
, val
, 1);
3473 /*genericLocaleStarter("ja", test3, sizeof(test3)/sizeof(test3[0]));*/
3474 genericLocaleStarterWithOptions("ja", test1
, sizeof(test1
)/sizeof(test1
[0]), attShifted
, valShifted
, 2);
3475 genericLocaleStarterWithOptions("ja", test2
, sizeof(test2
)/sizeof(test2
[0]), attShifted
, valShifted
, 2);
3478 static void TestStrCollIdenticalPrefix(void) {
3479 const char* rule
= "&\\ud9b0\\udc70=\\ud9b0\\udc71";
3480 const char* test
[] = {
3484 genericRulesStarterWithResult(rule
, test
, sizeof(test
)/sizeof(test
[0]), UCOL_EQUAL
);
3486 /* Contractions should have all their canonically equivalent */
3487 /* strings included */
3488 static void TestContractionClosure(void) {
3489 static const struct {
3491 const char *data
[10];
3494 { "&b=\\u00e4\\u00e4",
3495 { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\\u00e4" }, 5},
3497 { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4},
3502 for(i
= 0; i
<(sizeof(tests
)/sizeof(tests
[0])); i
++) {
3503 genericRulesStarterWithResult(tests
[i
].rules
, tests
[i
].data
, tests
[i
].len
, UCOL_EQUAL
);
3507 /* This tests also fails*/
3508 static void TestBeforePrefixFailure(void) {
3509 static const struct {
3511 const char *data
[10];
3515 "&[before 3]\\uff41 <<< x",
3516 {"x", "\\uff41"}, 2 },
3517 { "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3518 "&\\u30A8=\\u30A8=\\u3048=\\uff74"
3519 "&[before 3]\\u30a7<<<\\u30a9",
3520 {"\\u30a9", "\\u30a7"}, 2 },
3521 { "&[before 3]\\u30a7<<<\\u30a9"
3522 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3523 "&\\u30A8=\\u30A8=\\u3048=\\uff74",
3524 {"\\u30a9", "\\u30a7"}, 2 },
3529 for(i
= 0; i
<(sizeof(tests
)/sizeof(tests
[0])); i
++) {
3530 genericRulesStarter(tests
[i
].rules
, tests
[i
].data
, tests
[i
].len
);
3535 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3536 "&\\u30A8=\\u30A8=\\u3048=\\uff74"
3537 "&[before 3]\\u30a7<<<\\u30c6|\\u30fc";
3539 "&[before 3]\\u30a7<<<\\u30c6|\\u30fc"
3540 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3541 "&\\u30A8=\\u30A8=\\u3048=\\uff74";
3542 const char* test
[] = {
3543 "\\u30c6\\u30fc\\u30bf",
3544 "\\u30c6\\u30a7\\u30bf",
3546 genericRulesStarter(rule1
, test
, sizeof(test
)/sizeof(test
[0]));
3547 genericRulesStarter(rule2
, test
, sizeof(test
)/sizeof(test
[0]));
3548 /* this piece of code should be in some sort of verbose mode */
3549 /* it gets the collation elements for elements and prints them */
3550 /* This is useful when trying to see whether the problem is */
3552 UErrorCode status
= U_ZERO_ERROR
;
3554 UCollationElements
*it
= NULL
;
3557 uint32_t uStringLen
;
3558 UCollator
*coll
= NULL
;
3560 uStringLen
= u_unescape(rule1
, string
, 256);
3562 coll
= ucol_openRules(string
, uStringLen
, UCOL_DEFAULT
, UCOL_DEFAULT
, NULL
, &status
);
3564 /*coll = ucol_open("ja_JP_JIS", &status);*/
3565 it
= ucol_openElements(coll
, string
, 0, &status
);
3567 for(i
= 0; i
< sizeof(test
)/sizeof(test
[0]); i
++) {
3568 log_verbose("%s\n", test
[i
]);
3569 uStringLen
= u_unescape(test
[i
], string
, 256);
3570 ucol_setText(it
, string
, uStringLen
, &status
);
3572 while((CE
=ucol_next(it
, &status
)) != UCOL_NULLORDER
) {
3573 log_verbose("%08X\n", CE
);
3579 ucol_closeElements(it
);
3585 static void TestPrefixCompose(void) {
3587 "&\\u30a7<<<\\u30ab|\\u30fc=\\u30ac|\\u30fc";
3589 const char* test[] = {
3590 "\\u30c6\\u30fc\\u30bf",
3591 "\\u30c6\\u30a7\\u30bf",
3595 UErrorCode status
= U_ZERO_ERROR
;
3597 /*UCollationElements *it = NULL;*/
3600 uint32_t uStringLen
;
3601 UCollator
*coll
= NULL
;
3603 uStringLen
= u_unescape(rule1
, string
, 256);
3605 coll
= ucol_openRules(string
, uStringLen
, UCOL_DEFAULT
, UCOL_DEFAULT
, NULL
, &status
);
3613 [last variable] last variable value
3614 [last primary ignorable] largest CE for primary ignorable
3615 [last secondary ignorable] largest CE for secondary ignorable
3616 [last tertiary ignorable] largest CE for tertiary ignorable
3617 [top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8)
3620 static void TestRuleOptions(void) {
3621 /* values here are hardcoded and are correct for the current UCA
3622 * when the UCA changes, one might be forced to change these
3627 * These strings contain the last character before [variable top]
3628 * and the first and second characters (by primary weights) after it.
3629 * See FractionalUCA.txt. For example:
3630 [last variable [0C FE, 05, 05]] # U+10A7F OLD SOUTH ARABIAN NUMERIC INDICATOR
3631 [variable top = 0C FE]
3632 [first regular [0D 0A, 05, 05]] # U+0060 GRAVE ACCENT
3634 00B4; [0D 0C, 05, 05]
3636 * Note: Starting with UCA 6.0, the [variable top] collation element
3637 * is not the weight of any character or string,
3638 * which means that LAST_VARIABLE_CHAR_STRING sorts before [last variable].
3640 #define LAST_VARIABLE_CHAR_STRING "\\U00010A7F"
3641 #define FIRST_REGULAR_CHAR_STRING "\\u0060"
3642 #define SECOND_REGULAR_CHAR_STRING "\\u00B4"
3645 * This string has to match the character that has the [last regular] weight
3646 * which changes with each UCA version.
3647 * See the bottom of FractionalUCA.txt which says something like
3648 [last regular [7A FE, 05, 05]] # U+1342E EGYPTIAN HIEROGLYPH AA032
3650 * Note: Starting with UCA 6.0, the [last regular] collation element
3651 * is not the weight of any character or string,
3652 * which means that LAST_REGULAR_CHAR_STRING sorts before [last regular].
3654 #define LAST_REGULAR_CHAR_STRING "\\U0001342E"
3656 static const struct {
3658 const char *data
[10];
3661 /* - all befores here amount to zero */
3662 { "&[before 3][first tertiary ignorable]<<<a",
3663 { "\\u0000", "a"}, 2
3664 }, /* you cannot go before first tertiary ignorable */
3666 { "&[before 3][last tertiary ignorable]<<<a",
3667 { "\\u0000", "a"}, 2
3668 }, /* you cannot go before last tertiary ignorable */
3670 { "&[before 3][first secondary ignorable]<<<a",
3671 { "\\u0000", "a"}, 2
3672 }, /* you cannot go before first secondary ignorable */
3674 { "&[before 3][last secondary ignorable]<<<a",
3675 { "\\u0000", "a"}, 2
3676 }, /* you cannot go before first secondary ignorable */
3678 /* 'normal' befores */
3680 { "&[before 3][first primary ignorable]<<<c<<<b &[first primary ignorable]<a",
3681 { "c", "b", "\\u0332", "a" }, 4
3684 /* we don't have a code point that corresponds to
3685 * the last primary ignorable
3687 { "&[before 3][last primary ignorable]<<<c<<<b &[last primary ignorable]<a",
3688 { "\\u0332", "\\u20e3", "c", "b", "a" }, 5
3691 { "&[before 3][first variable]<<<c<<<b &[first variable]<a",
3692 { "c", "b", "\\u0009", "a", "\\u000a" }, 5
3695 { "&[last variable]<a &[before 3][last variable]<<<c<<<b ",
3696 { LAST_VARIABLE_CHAR_STRING
, "c", "b", /* [last variable] */ "a", FIRST_REGULAR_CHAR_STRING
}, 5
3699 { "&[first regular]<a"
3700 "&[before 1][first regular]<b",
3701 { "b", FIRST_REGULAR_CHAR_STRING
, "a", SECOND_REGULAR_CHAR_STRING
}, 4
3704 { "&[before 1][last regular]<b"
3705 "&[last regular]<a",
3706 { LAST_REGULAR_CHAR_STRING
, "b", /* [last regular] */ "a", "\\u4e00" }, 4
3709 { "&[before 1][first implicit]<b"
3710 "&[first implicit]<a",
3711 { "b", "\\u4e00", "a", "\\u4e01"}, 4
3714 { "&[before 1][last implicit]<b"
3715 "&[last implicit]<a",
3716 { "b", "\\U0010FFFD", "a" }, 3
3719 { "&[last variable]<z"
3720 "&[last primary ignorable]<x"
3721 "&[last secondary ignorable]<<y"
3722 "&[last tertiary ignorable]<<<w"
3724 {"\\ufffb", "w", "y", "\\u20e3", "x", LAST_VARIABLE_CHAR_STRING
, "z", "u"}, 7
3730 for(i
= 0; i
<(sizeof(tests
)/sizeof(tests
[0])); i
++) {
3731 genericRulesStarter(tests
[i
].rules
, tests
[i
].data
, tests
[i
].len
);
3736 static void TestOptimize(void) {
3737 /* this is not really a test - just trying out
3738 * whether copying of UCA contents will fail
3739 * Cannot really test, since the functionality
3742 static const struct {
3744 const char *data
[10];
3747 /* - all befores here amount to zero */
3748 { "[optimize [\\uAC00-\\uD7FF]]",
3753 for(i
= 0; i
<(sizeof(tests
)/sizeof(tests
[0])); i
++) {
3754 genericRulesStarter(tests
[i
].rules
, tests
[i
].data
, tests
[i
].len
);
3759 cycheng@ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator.
3760 weiv ucol_strcollIter?
3761 cycheng@ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021
3762 weiv these are the input strings?
3763 cycheng@ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on, we have s1 > s2
3764 weiv will check - could be a problem with utf-8 iterator
3765 cycheng@ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2
3767 cycheng@ca.ibm.c... note that we have a standalone high surrogate
3768 weiv that doesn't sound right
3769 cycheng@ca.ibm.c... we got the same inconsistent results on AIX and Win2000
3770 weiv so you have two strings, you convert them to utf-8 and to utf-16BE
3771 cycheng@ca.ibm.c... yes
3772 weiv and then do the comparison
3773 cycheng@ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be
3774 weiv utf-16 strings look like a little endian ones in the example you sent me
3775 weiv It could be a bug - let me try to test it out
3776 cycheng@ca.ibm.c... ok
3777 cycheng@ca.ibm.c... we can wait till the conf. call
3778 cycheng@ca.ibm.c... next weke
3779 weiv that would be great
3781 weiv I might be wrong
3782 weiv let me play with it some more
3783 cycheng@ca.ibm.c... ok
3784 cycheng@ca.ibm.c... also please check s3 = 0x0e3a0062 and s4 = 0x0e400021. both are in utf-16be
3785 cycheng@ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that's built for db2
3786 cycheng@ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be
3788 cycheng@ca.ibm.c... i ask sherman to send you more inconsistent data
3790 cycheng@ca.ibm.c... the 4 strings we sent are just samples
3793 static void Alexis(void) {
3794 UErrorCode status
= U_ZERO_ERROR
;
3795 UCollator
*coll
= ucol_open("", &status
);
3798 const char utf16be
[2][4] = {
3799 { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 },
3800 { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 }
3803 const char utf8
[2][4] = {
3804 { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 },
3805 { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 },
3808 UCharIterator iterU161
, iterU162
;
3809 UCharIterator iterU81
, iterU82
;
3811 UCollationResult resU16
, resU8
;
3813 uiter_setUTF16BE(&iterU161
, utf16be
[0], 4);
3814 uiter_setUTF16BE(&iterU162
, utf16be
[1], 4);
3816 uiter_setUTF8(&iterU81
, utf8
[0], 4);
3817 uiter_setUTF8(&iterU82
, utf8
[1], 4);
3819 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
3821 resU16
= ucol_strcollIter(coll
, &iterU161
, &iterU162
, &status
);
3822 resU8
= ucol_strcollIter(coll
, &iterU81
, &iterU82
, &status
);
3825 if(resU16
!= resU8
) {
3826 log_err("different results\n");
3833 #define CMSCOLL_ALEXIS2_BUFFER_SIZE 256
3834 static void Alexis2(void) {
3835 UErrorCode status
= U_ZERO_ERROR
;
3836 UChar U16Source
[CMSCOLL_ALEXIS2_BUFFER_SIZE
], U16Target
[CMSCOLL_ALEXIS2_BUFFER_SIZE
];
3837 char U16BESource
[CMSCOLL_ALEXIS2_BUFFER_SIZE
], U16BETarget
[CMSCOLL_ALEXIS2_BUFFER_SIZE
];
3838 char U8Source
[CMSCOLL_ALEXIS2_BUFFER_SIZE
], U8Target
[CMSCOLL_ALEXIS2_BUFFER_SIZE
];
3839 int32_t U16LenS
= 0, U16LenT
= 0, U16BELenS
= 0, U16BELenT
= 0, U8LenS
= 0, U8LenT
= 0;
3841 UConverter
*conv
= NULL
;
3843 UCharIterator U16BEItS
, U16BEItT
;
3844 UCharIterator U8ItS
, U8ItT
;
3846 UCollationResult resU16
, resU16BE
, resU8
;
3848 static const char* const pairs
[][2] = {
3849 { "\\ud800\\u0021", "\\uFFFC\\u0062"},
3850 { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" },
3851 { "\\u0E40\\u0021", "\\u00A1\\u0021"},
3852 { "\\u0E40\\u0021", "\\uFE57\\u0062"},
3853 { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"},
3854 { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"},
3855 { "\\u0020", "\\u0020\\u0000"}
3857 5F20 (my result here)
3859 5F20 (your result here)
3865 UCollator
*coll
= ucol_open("", &status
);
3866 if(status
== U_FILE_ACCESS_ERROR
) {
3867 log_data_err("Is your data around?\n");
3869 } else if(U_FAILURE(status
)) {
3870 log_err("Error opening collator\n");
3873 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
3874 conv
= ucnv_open("UTF16BE", &status
);
3875 for(i
= 0; i
< sizeof(pairs
)/sizeof(pairs
[0]); i
++) {
3876 U16LenS
= u_unescape(pairs
[i
][0], U16Source
, CMSCOLL_ALEXIS2_BUFFER_SIZE
);
3877 U16LenT
= u_unescape(pairs
[i
][1], U16Target
, CMSCOLL_ALEXIS2_BUFFER_SIZE
);
3879 resU16
= ucol_strcoll(coll
, U16Source
, U16LenS
, U16Target
, U16LenT
);
3881 log_verbose("Result of strcoll is %i\n", resU16
);
3883 U16BELenS
= ucnv_fromUChars(conv
, U16BESource
, CMSCOLL_ALEXIS2_BUFFER_SIZE
, U16Source
, U16LenS
, &status
);
3884 U16BELenT
= ucnv_fromUChars(conv
, U16BETarget
, CMSCOLL_ALEXIS2_BUFFER_SIZE
, U16Target
, U16LenT
, &status
);
3886 /* use the original sizes, as the result from converter is in bytes */
3887 uiter_setUTF16BE(&U16BEItS
, U16BESource
, U16LenS
);
3888 uiter_setUTF16BE(&U16BEItT
, U16BETarget
, U16LenT
);
3890 resU16BE
= ucol_strcollIter(coll
, &U16BEItS
, &U16BEItT
, &status
);
3892 log_verbose("Result of U16BE is %i\n", resU16BE
);
3894 if(resU16
!= resU16BE
) {
3895 log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", pairs
[i
][0], pairs
[i
][1]);
3898 u_strToUTF8(U8Source
, CMSCOLL_ALEXIS2_BUFFER_SIZE
, &U8LenS
, U16Source
, U16LenS
, &status
);
3899 u_strToUTF8(U8Target
, CMSCOLL_ALEXIS2_BUFFER_SIZE
, &U8LenT
, U16Target
, U16LenT
, &status
);
3901 uiter_setUTF8(&U8ItS
, U8Source
, U8LenS
);
3902 uiter_setUTF8(&U8ItT
, U8Target
, U8LenT
);
3904 resU8
= ucol_strcollIter(coll
, &U8ItS
, &U8ItT
, &status
);
3906 if(resU16
!= resU8
) {
3907 log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pairs
[i
][0], pairs
[i
][1]);
3916 static void TestHebrewUCA(void) {
3917 UErrorCode status
= U_ZERO_ERROR
;
3918 static const char *first
[] = {
3919 "d790d6b8d79cd795d6bcd7a9",
3920 "d790d79cd79ed7a7d799d799d7a1",
3921 "d790d6b4d79ed795d6bcd7a9",
3924 char utf8String
[3][256];
3925 UChar utf16String
[3][256];
3927 int32_t i
= 0, j
= 0;
3928 int32_t sizeUTF8
[3];
3929 int32_t sizeUTF16
[3];
3931 UCollator
*coll
= ucol_open("", &status
);
3932 if (U_FAILURE(status
)) {
3933 log_err_status(status
, "Could not open UCA collation %s\n", u_errorName(status
));
3936 /*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/
3938 for(i
= 0; i
< sizeof(first
)/sizeof(first
[0]); i
++) {
3939 sizeUTF8
[i
] = u_parseUTF8(first
[i
], -1, utf8String
[i
], 256, &status
);
3940 u_strFromUTF8(utf16String
[i
], 256, &sizeUTF16
[i
], utf8String
[i
], sizeUTF8
[i
], &status
);
3941 log_verbose("%i: ");
3942 for(j
= 0; j
< sizeUTF16
[i
]; j
++) {
3943 /*log_verbose("\\u%04X", utf16String[i][j]);*/
3944 log_verbose("%04X", utf16String
[i
][j
]);
3948 for(i
= 0; i
< sizeof(first
)/sizeof(first
[0])-1; i
++) {
3949 for(j
= i
+ 1; j
< sizeof(first
)/sizeof(first
[0]); j
++) {
3950 doTest(coll
, utf16String
[i
], utf16String
[j
], UCOL_LESS
);
3958 static void TestPartialSortKeyTermination(void) {
3959 static const char* cases
[] = {
3960 "\\u1234\\u1234\\udc00",
3961 "\\udc00\\ud800\\ud800"
3964 int32_t i
= sizeof(UCollator
);
3966 UErrorCode status
= U_ZERO_ERROR
;
3968 UCollator
*coll
= ucol_open("", &status
);
3972 UChar currCase
[256];
3974 int32_t pKeyLen
= 0;
3978 for(i
= 0; i
< sizeof(cases
)/sizeof(cases
[0]); i
++) {
3979 uint32_t state
[2] = {0, 0};
3980 length
= u_unescape(cases
[i
], currCase
, 256);
3981 uiter_setString(&iter
, currCase
, length
);
3982 pKeyLen
= ucol_nextSortKeyPart(coll
, &iter
, state
, key
, 256, &status
);
3984 log_verbose("Done\n");
3990 static void TestSettings(void) {
3991 static const char* cases
[] = {
3996 static const char* locales
[] = {
4001 UErrorCode status
= U_ZERO_ERROR
;
4003 int32_t i
= 0, j
= 0;
4005 UChar source
[256], target
[256];
4006 int32_t sLen
= 0, tLen
= 0;
4008 UCollator
*collateObject
= NULL
;
4009 for(i
= 0; i
< sizeof(locales
)/sizeof(locales
[0]); i
++) {
4010 collateObject
= ucol_open(locales
[i
], &status
);
4011 ucol_setStrength(collateObject
, UCOL_PRIMARY
);
4012 ucol_setAttribute(collateObject
, UCOL_CASE_LEVEL
, UCOL_OFF
, &status
);
4013 for(j
= 1; j
< sizeof(cases
)/sizeof(cases
[0]); j
++) {
4014 sLen
= u_unescape(cases
[j
-1], source
, 256);
4016 tLen
= u_unescape(cases
[j
], target
, 256);
4018 doTest(collateObject
, source
, target
, UCOL_EQUAL
);
4020 ucol_close(collateObject
);
4024 static int32_t TestEqualsForCollator(const char* locName
, UCollator
*source
, UCollator
*target
) {
4025 UErrorCode status
= U_ZERO_ERROR
;
4026 int32_t errorNo
= 0;
4027 /*const UChar *sourceRules = NULL;*/
4028 /*int32_t sourceRulesLen = 0;*/
4029 UColAttributeValue french
= UCOL_OFF
;
4030 int32_t cloneSize
= 0;
4032 if(!ucol_equals(source
, target
)) {
4033 log_err("Same collators, different address not equal\n");
4037 if(uprv_strcmp(ucol_getLocaleByType(source
, ULOC_REQUESTED_LOCALE
, &status
), ucol_getLocaleByType(source
, ULOC_ACTUAL_LOCALE
, &status
)) == 0) {
4038 /* currently, safeClone is implemented through getRules/openRules
4039 * so it is the same as the test below - I will comment that test out.
4042 target
= ucol_safeClone(source
, NULL
, &cloneSize
, &status
);
4043 if(U_FAILURE(status
)) {
4044 log_err("Error creating clone\n");
4048 if(!ucol_equals(source
, target
)) {
4049 log_err("Collator different from it's clone\n");
4052 french
= ucol_getAttribute(source
, UCOL_FRENCH_COLLATION
, &status
);
4053 if(french
== UCOL_ON
) {
4054 ucol_setAttribute(target
, UCOL_FRENCH_COLLATION
, UCOL_OFF
, &status
);
4056 ucol_setAttribute(target
, UCOL_FRENCH_COLLATION
, UCOL_ON
, &status
);
4058 if(U_FAILURE(status
)) {
4059 log_err("Error setting attributes\n");
4063 if(ucol_equals(source
, target
)) {
4064 log_err("Collators same even when options changed\n");
4068 /* commented out since safeClone uses exactly the same technique */
4070 sourceRules = ucol_getRules(source, &sourceRulesLen);
4071 target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
4072 if(U_FAILURE(status)) {
4073 log_err("Error instantiating target from rules\n");
4077 if(!ucol_equals(source, target)) {
4078 log_err("Collator different from collator that was created from the same rules\n");
4088 static void TestEquals(void) {
4089 /* ucol_equals is not currently a public API. There is a chance that it will become
4090 * something like this, but currently it is only used by RuleBasedCollator::operator==
4092 /* test whether the two collators instantiated from the same locale are equal */
4093 UErrorCode status
= U_ZERO_ERROR
;
4094 UParseError parseError
;
4095 int32_t noOfLoc
= uloc_countAvailable();
4096 const char *locName
= NULL
;
4097 UCollator
*source
= NULL
, *target
= NULL
;
4100 const char* rules
[] = {
4101 "&l < lj <<< Lj <<< LJ",
4102 "&n < nj <<< Nj <<< NJ",
4107 const char* badRules[] = {
4109 "&n < nj <<< nJ <<< NJ",
4111 "&AE <<< \\u00c4 <<< x"
4115 UChar sourceRules
[1024], targetRules
[1024];
4116 int32_t sourceRulesSize
= 0, targetRulesSize
= 0;
4117 int32_t rulesSize
= sizeof(rules
)/sizeof(rules
[0]);
4119 for(i
= 0; i
< rulesSize
; i
++) {
4120 sourceRulesSize
+= u_unescape(rules
[i
], sourceRules
+sourceRulesSize
, 1024 - sourceRulesSize
);
4121 targetRulesSize
+= u_unescape(rules
[rulesSize
-i
-1], targetRules
+targetRulesSize
, 1024 - targetRulesSize
);
4124 source
= ucol_openRules(sourceRules
, sourceRulesSize
, UCOL_DEFAULT
, UCOL_DEFAULT
, &parseError
, &status
);
4125 if(status
== U_FILE_ACCESS_ERROR
) {
4126 log_data_err("Is your data around?\n");
4128 } else if(U_FAILURE(status
)) {
4129 log_err("Error opening collator\n");
4132 target
= ucol_openRules(targetRules
, targetRulesSize
, UCOL_DEFAULT
, UCOL_DEFAULT
, &parseError
, &status
);
4133 if(!ucol_equals(source
, target
)) {
4134 log_err("Equivalent collators not equal!\n");
4139 source
= ucol_open("root", &status
);
4140 target
= ucol_open("root", &status
);
4141 log_verbose("Testing root\n");
4142 if(!ucol_equals(source
, source
)) {
4143 log_err("Same collator not equal\n");
4145 if(TestEqualsForCollator(locName
, source
, target
)) {
4146 log_err("Errors for root\n", locName
);
4150 for(i
= 0; i
<noOfLoc
; i
++) {
4151 status
= U_ZERO_ERROR
;
4152 locName
= uloc_getAvailable(i
);
4153 /*if(hasCollationElements(locName)) {*/
4154 log_verbose("Testing equality for locale %s\n", locName
);
4155 source
= ucol_open(locName
, &status
);
4156 target
= ucol_open(locName
, &status
);
4157 if (U_FAILURE(status
)) {
4158 log_err("Error opening collator for locale %s %s\n", locName
, u_errorName(status
));
4161 if(TestEqualsForCollator(locName
, source
, target
)) {
4162 log_err("Errors for locale %s\n", locName
);
4169 static void TestJ2726(void) {
4170 UChar a
[2] = { 0x61, 0x00 }; /*"a"*/
4171 UChar aSpace
[3] = { 0x61, 0x20, 0x00 }; /*"a "*/
4172 UChar spaceA
[3] = { 0x20, 0x61, 0x00 }; /*" a"*/
4173 UErrorCode status
= U_ZERO_ERROR
;
4174 UCollator
*coll
= ucol_open("en", &status
);
4175 ucol_setAttribute(coll
, UCOL_ALTERNATE_HANDLING
, UCOL_SHIFTED
, &status
);
4176 ucol_setAttribute(coll
, UCOL_STRENGTH
, UCOL_PRIMARY
, &status
);
4177 doTest(coll
, a
, aSpace
, UCOL_EQUAL
);
4178 doTest(coll
, aSpace
, a
, UCOL_EQUAL
);
4179 doTest(coll
, a
, spaceA
, UCOL_EQUAL
);
4180 doTest(coll
, spaceA
, a
, UCOL_EQUAL
);
4181 doTest(coll
, spaceA
, aSpace
, UCOL_EQUAL
);
4182 doTest(coll
, aSpace
, spaceA
, UCOL_EQUAL
);
4186 static void NullRule(void) {
4188 UErrorCode status
= U_ZERO_ERROR
;
4189 UCollator
*coll
= ucol_openRules(r
, 1, UCOL_DEFAULT
, UCOL_DEFAULT
, NULL
, &status
);
4190 if(U_SUCCESS(status
)) {
4191 log_err("This should have been an error!\n");
4194 status
= U_ZERO_ERROR
;
4196 coll
= ucol_openRules(r
, 0, UCOL_DEFAULT
, UCOL_DEFAULT
, NULL
, &status
);
4197 if(U_FAILURE(status
)) {
4198 log_err_status(status
, "Empty rules should have produced a valid collator -> %s\n", u_errorName(status
));
4205 * Test for CollationElementIterator previous and next for the whole set of
4206 * unicode characters with normalization on.
4208 static void TestNumericCollation(void)
4210 UErrorCode status
= U_ZERO_ERROR
;
4212 const static char *basicTestStrings
[]={
4225 const static char *preZeroTestStrings
[]={
4233 "avery000000010000",
4236 const static char *thirtyTwoBitNumericStrings
[]={
4243 const static char *longNumericStrings
[]={
4244 /* Some of these sort out of the order that would expected if digits-as-numbers handled arbitrarily-long digit strings.
4245 In fact, a single collation element can represent a maximum of 254 digits as a number. Digit strings longer than that
4246 are treated as multiple collation elements. */
4247 "num9234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123z", /*253digits, num + 9.23E252 + z */
4248 "num10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*254digits, num + 1.00E253 */
4249 "num100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*255digits, num + 1.00E253 + 0, out of numeric order but expected */
4250 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 1.23E253 */
4251 "num123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345", /*255digits, num + 1.23E253 + 5 */
4252 "num1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456", /*256digits, num + 1.23E253 + 56 */
4253 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567", /*257digits, num + 1.23E253 + 567 */
4254 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 1.23E253 + a, out of numeric order but expected */
4255 "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 9.23E253, out of numeric order but expected */
4256 "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 9.23E253 + a, out of numeric order but expected */
4259 const static char *supplementaryDigits
[] = {
4260 "\\uD835\\uDFCE", /* 0 */
4261 "\\uD835\\uDFCF", /* 1 */
4262 "\\uD835\\uDFD0", /* 2 */
4263 "\\uD835\\uDFD1", /* 3 */
4264 "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */
4265 "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */
4266 "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */
4267 "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */
4268 "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */
4269 "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */
4272 const static char *foreignDigits
[] = {
4287 const static char *evenZeroes
[] = {
4294 UColAttribute att
= UCOL_NUMERIC_COLLATION
;
4295 UColAttributeValue val
= UCOL_ON
;
4297 /* Open our collator. */
4298 UCollator
* coll
= ucol_open("root", &status
);
4299 if (U_FAILURE(status
)){
4300 log_err_status(status
, "ERROR: in using ucol_open() -> %s\n",
4301 myErrorName(status
));
4304 genericLocaleStarterWithOptions("root", basicTestStrings
, sizeof(basicTestStrings
)/sizeof(basicTestStrings
[0]), &att
, &val
, 1);
4305 genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings
, sizeof(thirtyTwoBitNumericStrings
)/sizeof(thirtyTwoBitNumericStrings
[0]), &att
, &val
, 1);
4306 genericLocaleStarterWithOptions("root", longNumericStrings
, sizeof(longNumericStrings
)/sizeof(longNumericStrings
[0]), &att
, &val
, 1);
4307 genericLocaleStarterWithOptions("en_US", foreignDigits
, sizeof(foreignDigits
)/sizeof(foreignDigits
[0]), &att
, &val
, 1);
4308 genericLocaleStarterWithOptions("root", supplementaryDigits
, sizeof(supplementaryDigits
)/sizeof(supplementaryDigits
[0]), &att
, &val
, 1);
4309 genericLocaleStarterWithOptions("root", evenZeroes
, sizeof(evenZeroes
)/sizeof(evenZeroes
[0]), &att
, &val
, 1);
4311 /* Setting up our collator to do digits. */
4312 ucol_setAttribute(coll
, UCOL_NUMERIC_COLLATION
, UCOL_ON
, &status
);
4313 if (U_FAILURE(status
)){
4314 log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n",
4315 myErrorName(status
));
4320 Testing that prepended zeroes still yield the correct collation behavior.
4321 We expect that every element in our strings array will be equal.
4323 genericOrderingTestWithResult(coll
, preZeroTestStrings
, sizeof(preZeroTestStrings
)/sizeof(preZeroTestStrings
[0]), UCOL_EQUAL
);
4328 static void TestTibetanConformance(void)
4330 const char* test
[] = {
4331 "\\u0FB2\\u0591\\u0F71\\u0061",
4332 "\\u0FB2\\u0F71\\u0061"
4335 UErrorCode status
= U_ZERO_ERROR
;
4336 UCollator
*coll
= ucol_open("", &status
);
4340 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
4341 if (U_SUCCESS(status
)) {
4342 u_unescape(test
[0], source
, 100);
4343 u_unescape(test
[1], target
, 100);
4344 doTest(coll
, source
, target
, UCOL_EQUAL
);
4345 result
= ucol_strcoll(coll
, source
, -1, target
, -1);
4346 log_verbose("result %d\n", result
);
4347 if (UCOL_EQUAL
!= result
) {
4348 log_err("Tibetan comparison error\n");
4353 genericLocaleStarterWithResult("", test
, 2, UCOL_EQUAL
);
4356 static void TestPinyinProblem(void) {
4357 static const char *test
[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B50" };
4358 genericLocaleStarter("zh__PINYIN", test
, sizeof(test
)/sizeof(test
[0]));
4361 #define TST_UCOL_MAX_INPUT 0x220001
4362 #define topByte 0xFF000000;
4363 #define bottomByte 0xFF;
4364 #define fourBytes 0xFFFFFFFF;
4367 static void showImplicit(UChar32 i
) {
4368 if (i
>= 0 && i
<= TST_UCOL_MAX_INPUT
) {
4369 log_verbose("%08X\t%08X\n", i
, uprv_uca_getImplicitFromRaw(i
));
4373 static void TestImplicitGeneration(void) {
4374 UErrorCode status
= U_ZERO_ERROR
;
4377 UChar32 i
= 0, j
= 0;
4378 UChar32 roundtrip
= 0;
4379 UChar32 lastBottom
= 0;
4380 UChar32 currentBottom
= 0;
4381 UChar32 lastTop
= 0;
4382 UChar32 currentTop
= 0;
4384 UCollator
*coll
= ucol_open("root", &status
);
4385 if(U_FAILURE(status
)) {
4386 log_err_status(status
, "Couldn't open UCA -> %s\n", u_errorName(status
));
4390 uprv_uca_getRawFromImplicit(0xE20303E7);
4392 for (i
= 0; i
<= TST_UCOL_MAX_INPUT
; ++i
) {
4393 current
= uprv_uca_getImplicitFromRaw(i
) & fourBytes
;
4395 /* check that it round-trips AND that all intervening ones are illegal*/
4396 roundtrip
= uprv_uca_getRawFromImplicit(current
);
4397 if (roundtrip
!= i
) {
4398 log_err("No roundtrip %08X\n", i
);
4401 for (j
= last
+ 1; j
< current
; ++j
) {
4402 roundtrip
= uprv_uca_getRawFromImplicit(j
);
4403 /* raise an error if it *doesn't* find an error*/
4404 if (roundtrip
!= -1) {
4405 log_err("Fails to recognize illegal %08X\n", j
);
4409 /* now do other consistency checks*/
4410 lastBottom
= last
& bottomByte
;
4411 currentBottom
= current
& bottomByte
;
4412 lastTop
= last
& topByte
;
4413 currentTop
= current
& topByte
;
4415 /* print out some values for spot-checking*/
4416 if (lastTop
!= currentTop
|| i
== 0x10000 || i
== 0x110000) {
4426 if(uprv_uca_getCodePointFromRaw(uprv_uca_getRawFromCodePoint(i
)) != i
) {
4427 log_err("No raw <-> code point roundtrip for 0x%08X\n", i
);
4430 showImplicit(TST_UCOL_MAX_INPUT
-2);
4431 showImplicit(TST_UCOL_MAX_INPUT
-1);
4432 showImplicit(TST_UCOL_MAX_INPUT
);
4437 * Iterate through the given iterator, checking to see that all the strings
4438 * in the expected array are present.
4439 * @param expected array of strings we expect to see, or NULL
4440 * @param expectedCount number of elements of expected, or 0
4442 static int32_t checkUEnumeration(const char* msg
,
4444 const char** expected
,
4445 int32_t expectedCount
) {
4446 UErrorCode ec
= U_ZERO_ERROR
;
4447 int32_t i
= 0, n
, j
, bit
;
4448 int32_t seenMask
= 0;
4450 U_ASSERT(expectedCount
>= 0 && expectedCount
< 31); /* [sic] 31 not 32 */
4451 n
= uenum_count(iter
, &ec
);
4452 if (!assertSuccess("count", &ec
)) return -1;
4453 log_verbose("%s = [", msg
);
4455 const char* s
= uenum_next(iter
, NULL
, &ec
);
4456 if (!assertSuccess("snext", &ec
) || s
== NULL
) break;
4457 if (i
!= 0) log_verbose(",");
4458 log_verbose("%s", s
);
4459 /* check expected list */
4460 for (j
=0, bit
=1; j
<expectedCount
; ++j
, bit
<<=1) {
4461 if ((seenMask
&bit
) == 0 &&
4462 uprv_strcmp(s
, expected
[j
]) == 0) {
4468 log_verbose("] (%d)\n", i
);
4469 assertTrue("count verified", i
==n
);
4470 /* did we see all expected strings? */
4471 for (j
=0, bit
=1; j
<expectedCount
; ++j
, bit
<<=1) {
4472 if ((seenMask
&bit
)!=0) {
4473 log_verbose("Ok: \"%s\" seen\n", expected
[j
]);
4475 log_err("FAIL: \"%s\" not seen\n", expected
[j
]);
4482 * Test new API added for separate collation tree.
4484 static void TestSeparateTrees(void) {
4485 UErrorCode ec
= U_ZERO_ERROR
;
4486 UEnumeration
*e
= NULL
;
4491 static const char* AVAIL
[] = { "en", "de" };
4493 static const char* KW
[] = { "collation" };
4495 static const char* KWVAL
[] = { "phonebook", "stroke" };
4497 #if !UCONFIG_NO_SERVICE
4498 e
= ucol_openAvailableLocales(&ec
);
4500 assertSuccess("ucol_openAvailableLocales", &ec
);
4501 assertTrue("ucol_openAvailableLocales!=0", e
!=0);
4502 n
= checkUEnumeration("ucol_openAvailableLocales", e
, AVAIL
, LEN(AVAIL
));
4503 /* Don't need to check n because we check list */
4506 log_data_err("Error calling ucol_openAvailableLocales() -> %s (Are you missing data?)\n", u_errorName(ec
));
4510 e
= ucol_getKeywords(&ec
);
4512 assertSuccess("ucol_getKeywords", &ec
);
4513 assertTrue("ucol_getKeywords!=0", e
!=0);
4514 n
= checkUEnumeration("ucol_getKeywords", e
, KW
, LEN(KW
));
4515 /* Don't need to check n because we check list */
4518 log_data_err("Error calling ucol_getKeywords() -> %s (Are you missing data?)\n", u_errorName(ec
));
4521 e
= ucol_getKeywordValues(KW
[0], &ec
);
4523 assertSuccess("ucol_getKeywordValues", &ec
);
4524 assertTrue("ucol_getKeywordValues!=0", e
!=0);
4525 n
= checkUEnumeration("ucol_getKeywordValues", e
, KWVAL
, LEN(KWVAL
));
4526 /* Don't need to check n because we check list */
4529 log_data_err("Error calling ucol_getKeywordValues() -> %s (Are you missing data?)\n", u_errorName(ec
));
4532 /* Try setting a warning before calling ucol_getKeywordValues */
4533 ec
= U_USING_FALLBACK_WARNING
;
4534 e
= ucol_getKeywordValues(KW
[0], &ec
);
4535 if (assertSuccess("ucol_getKeywordValues [with warning code set]", &ec
)) {
4536 assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e
!=0);
4537 n
= checkUEnumeration("ucol_getKeywordValues [with warning code set]", e
, KWVAL
, LEN(KWVAL
));
4538 /* Don't need to check n because we check list */
4543 U_DRAFT int32_t U_EXPORT2
4544 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
4545 const char* locale, UBool* isAvailable,
4546 UErrorCode* status);
4549 n
= ucol_getFunctionalEquivalent(loc
, sizeof(loc
), "collation", "de",
4551 if (assertSuccess("getFunctionalEquivalent", &ec
)) {
4552 assertEquals("getFunctionalEquivalent(de)", "de", loc
);
4553 assertTrue("getFunctionalEquivalent(de).isAvailable==TRUE",
4554 isAvailable
== TRUE
);
4557 n
= ucol_getFunctionalEquivalent(loc
, sizeof(loc
), "collation", "de_DE",
4559 if (assertSuccess("getFunctionalEquivalent", &ec
)) {
4560 assertEquals("getFunctionalEquivalent(de_DE)", "de", loc
);
4561 assertTrue("getFunctionalEquivalent(de_DE).isAvailable==TRUE",
4562 isAvailable
== TRUE
);
4566 /* supercedes TestJ784 */
4567 static void TestBeforePinyin(void) {
4568 const static char rules
[] = {
4569 "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<\\u00E0<<<\\u00C0"
4570 "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<\\u00E8<<<\\u00C8"
4571 "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<\\u00EC<<<\\u00CC"
4572 "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<\\u00F2<<<\\u00D2"
4573 "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<<\\u00F9<<<\\u00D9"
4574 "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<<\\u01DB<<\\u00FC"
4577 const static char *test
[] = {
4588 const static char *test2
[] = {
4621 genericRulesStarter(rules
, test
, sizeof(test
)/sizeof(test
[0]));
4622 genericLocaleStarter("zh", test
, sizeof(test
)/sizeof(test
[0]));
4623 genericRulesStarter(rules
, test2
, sizeof(test2
)/sizeof(test2
[0]));
4624 genericLocaleStarter("zh", test2
, sizeof(test2
)/sizeof(test2
[0]));
4627 static void TestBeforeTightening(void) {
4628 static const struct {
4630 UErrorCode expectedStatus
;
4632 { "&[before 1]a<x", U_ZERO_ERROR
},
4633 { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR
},
4634 { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR
},
4635 { "&[before 1]a=x", U_INVALID_FORMAT_ERROR
},
4636 { "&[before 2]a<x",U_INVALID_FORMAT_ERROR
},
4637 { "&[before 2]a<<x",U_ZERO_ERROR
},
4638 { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR
},
4639 { "&[before 2]a=x",U_INVALID_FORMAT_ERROR
},
4640 { "&[before 3]a<x",U_INVALID_FORMAT_ERROR
},
4641 { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR
},
4642 { "&[before 3]a<<<x",U_ZERO_ERROR
},
4643 { "&[before 3]a=x",U_INVALID_FORMAT_ERROR
},
4644 { "&[before I]a = x",U_INVALID_FORMAT_ERROR
}
4649 UErrorCode status
= U_ZERO_ERROR
;
4650 UChar rlz
[RULE_BUFFER_LEN
] = { 0 };
4653 UCollator
*coll
= NULL
;
4656 for(i
= 0; i
< sizeof(tests
)/sizeof(tests
[0]); i
++) {
4657 rlen
= u_unescape(tests
[i
].rules
, rlz
, RULE_BUFFER_LEN
);
4658 coll
= ucol_openRules(rlz
, rlen
, UCOL_DEFAULT
, UCOL_DEFAULT
,NULL
, &status
);
4659 if(status
!= tests
[i
].expectedStatus
) {
4660 log_err_status(status
, "Opening a collator with rules %s returned error code %s, expected %s\n",
4661 tests
[i
].rules
, u_errorName(status
), u_errorName(tests
[i
].expectedStatus
));
4664 status
= U_ZERO_ERROR
;
4671 &[before
1] a
< x
<<< X
<< q
<<< Q
< z
4672 assert: m
<<< M
< x
<<< X
<< q
<<< Q
< z
< a
< n
4675 &[before
2] a
<< x
<<< X
<< q
<<< Q
< z
4676 assert: m
<<< M
< x
<<< X
<< q
<<< Q
<< a
< z
< n
4679 &[before
3] a
<<< x
<<< X
<< q
<<< Q
< z
4680 assert: m
<<< M
< x
<<< X
<<< a
<< q
<<< Q
< z
< n
4684 &[before
1] a
< x
<<< X
<< q
<<< Q
< z
4685 assert: x
<<< X
<< q
<<< Q
< z
< m
<<< M
<< a
< n
4688 &[before
2] a
<< x
<<< X
<< q
<<< Q
< z
4689 assert: m
<<< M
<< x
<<< X
<< q
<<< Q
<< a
< z
< n
4692 &[before
3] a
<<< x
<<< X
<< q
<<< Q
< z
4693 assert: m
<<< M
<< x
<<< X
<<< a
<< q
<<< Q
< z
< n
4697 &[before
1] a
< x
<<< X
<< q
<<< Q
< z
4698 assert: x
<<< X
<< q
<<< Q
< z
< n
< m
<<< a
<<< M
4701 &[before
2] a
<< x
<<< X
<< q
<<< Q
< z
4702 assert: x
<<< X
<< q
<<< Q
<< m
<<< a
<<< M
< z
< n
4705 &[before
3] a
<<< x
<<< X
<< q
<<< Q
< z
4706 assert: m
<<< x
<<< X
<<< a
<<< M
<< q
<<< Q
< z
< n
4709 &[before
1] s
< x
<<< X
<< q
<<< Q
< z
4710 assert: r
<<< R
< x
<<< X
<< q
<<< Q
< z
< s
< n
4712 &[before
2] s
<< x
<<< X
<< q
<<< Q
< z
4713 assert: r
<<< R
< x
<<< X
<< q
<<< Q
<< s
< z
< n
4715 &[before
3] s
<<< x
<<< X
<< q
<<< Q
< z
4716 assert: r
<<< R
< x
<<< X
<<< s
<< q
<<< Q
< z
< n
4719 &[before
1] \u24DC < x
<<< X
<< q
<<< Q
< z
4720 assert: x
<<< X
<< q
<<< Q
< z
< n
< m
<<< \u24DC <<< M
4722 &[before
2] \u24DC << x
<<< X
<< q
<<< Q
< z
4723 assert: x
<<< X
<< q
<<< Q
<< m
<<< \u24DC <<< M
< z
< n
4725 &[before
3] \u24DC <<< x
<<< X
<< q
<<< Q
< z
4726 assert: m
<<< x
<<< X
<<< \u24DC <<< M
<< q
<<< Q
< z
< n
4731 /* requires features not yet supported */
4732 static void TestMoreBefore(void) {
4733 static const struct {
4735 const char* order
[16];
4738 { "&m < a &[before 1] a < x <<< X << q <<< Q < z",
4739 { "m","M","x","X","q","Q","z","a","n" }, 9},
4740 { "&m < a &[before 2] a << x <<< X << q <<< Q < z",
4741 { "m","M","x","X","q","Q","a","z","n" }, 9},
4742 { "&m < a &[before 3] a <<< x <<< X << q <<< Q < z",
4743 { "m","M","x","X","a","q","Q","z","n" }, 9},
4744 { "&m << a &[before 1] a < x <<< X << q <<< Q < z",
4745 { "x","X","q","Q","z","m","M","a","n" }, 9},
4746 { "&m << a &[before 2] a << x <<< X << q <<< Q < z",
4747 { "m","M","x","X","q","Q","a","z","n" }, 9},
4748 { "&m << a &[before 3] a <<< x <<< X << q <<< Q < z",
4749 { "m","M","x","X","a","q","Q","z","n" }, 9},
4750 { "&m <<< a &[before 1] a < x <<< X << q <<< Q < z",
4751 { "x","X","q","Q","z","n","m","a","M" }, 9},
4752 { "&m <<< a &[before 2] a << x <<< X << q <<< Q < z",
4753 { "x","X","q","Q","m","a","M","z","n" }, 9},
4754 { "&m <<< a &[before 3] a <<< x <<< X << q <<< Q < z",
4755 { "m","x","X","a","M","q","Q","z","n" }, 9},
4756 { "&[before 1] s < x <<< X << q <<< Q < z",
4757 { "r","R","x","X","q","Q","z","s","n" }, 9},
4758 { "&[before 2] s << x <<< X << q <<< Q < z",
4759 { "r","R","x","X","q","Q","s","z","n" }, 9},
4760 { "&[before 3] s <<< x <<< X << q <<< Q < z",
4761 { "r","R","x","X","s","q","Q","z","n" }, 9},
4762 { "&[before 1] \\u24DC < x <<< X << q <<< Q < z",
4763 { "x","X","q","Q","z","n","m","\\u24DC","M" }, 9},
4764 { "&[before 2] \\u24DC << x <<< X << q <<< Q < z",
4765 { "x","X","q","Q","m","\\u24DC","M","z","n" }, 9},
4766 { "&[before 3] \\u24DC <<< x <<< X << q <<< Q < z",
4767 { "m","x","X","\\u24DC","M","q","Q","z","n" }, 9}
4772 for(i
= 0; i
< sizeof(tests
)/sizeof(tests
[0]); i
++) {
4773 genericRulesStarter(tests
[i
].rules
, tests
[i
].order
, tests
[i
].size
);
4778 static void TestTailorNULL( void ) {
4779 const static char* rule
= "&a <<< '\\u0000'";
4780 UErrorCode status
= U_ZERO_ERROR
;
4781 UChar rlz
[RULE_BUFFER_LEN
] = { 0 };
4783 UChar a
= 1, null
= 0;
4784 UCollationResult res
= UCOL_EQUAL
;
4786 UCollator
*coll
= NULL
;
4789 rlen
= u_unescape(rule
, rlz
, RULE_BUFFER_LEN
);
4790 coll
= ucol_openRules(rlz
, rlen
, UCOL_DEFAULT
, UCOL_DEFAULT
,NULL
, &status
);
4792 if(U_FAILURE(status
)) {
4793 log_err_status(status
, "Could not open default collator! -> %s\n", u_errorName(status
));
4795 res
= ucol_strcoll(coll
, &a
, 1, &null
, 1);
4797 if(res
!= UCOL_LESS
) {
4798 log_err("NULL was not tailored properly!\n");
4806 TestUpperFirstQuaternary(void)
4808 const char* tests
[] = { "B", "b", "Bb", "bB" };
4809 UColAttribute att
[] = { UCOL_STRENGTH
, UCOL_CASE_FIRST
};
4810 UColAttributeValue attVals
[] = { UCOL_QUATERNARY
, UCOL_UPPER_FIRST
};
4811 genericLocaleStarterWithOptions("root", tests
, sizeof(tests
)/sizeof(tests
[0]), att
, attVals
, sizeof(att
)/sizeof(att
[0]));
4817 const char* tests
[] = { "\\u00e2T", "aT" };
4818 UColAttribute att
[] = { UCOL_STRENGTH
, UCOL_CASE_LEVEL
};
4819 UColAttributeValue attVals
[] = { UCOL_PRIMARY
, UCOL_ON
};
4820 const char* tests2
[] = { "a", "A" };
4821 const char* rule
= "&[first tertiary ignorable]=A=a";
4822 UColAttribute att2
[] = { UCOL_CASE_LEVEL
};
4823 UColAttributeValue attVals2
[] = { UCOL_ON
};
4824 /* Test whether we correctly ignore primary ignorables on case level when */
4825 /* we have only primary & case level */
4826 genericLocaleStarterWithOptionsAndResult("root", tests
, sizeof(tests
)/sizeof(tests
[0]), att
, attVals
, sizeof(att
)/sizeof(att
[0]), UCOL_EQUAL
);
4827 /* Test whether ICU4J will make case level for sortkeys that have primary strength */
4828 /* and case level */
4829 genericLocaleStarterWithOptions("root", tests2
, sizeof(tests2
)/sizeof(tests2
[0]), att
, attVals
, sizeof(att
)/sizeof(att
[0]));
4830 /* Test whether completely ignorable letters have case level info (they shouldn't) */
4831 genericRulesStarterWithOptionsAndResult(rule
, tests2
, sizeof(tests2
)/sizeof(tests2
[0]), att2
, attVals2
, sizeof(att2
)/sizeof(att2
[0]), UCOL_EQUAL
);
4837 static const char *test
= "this is a test string";
4839 int32_t ustr_length
= u_unescape(test
, ustr
, 256);
4840 unsigned char sortkey
[256];
4841 int32_t sortkey_length
;
4842 UErrorCode status
= U_ZERO_ERROR
;
4843 static UCollator
*coll
= NULL
;
4844 coll
= ucol_open("root", &status
);
4845 if(U_FAILURE(status
)) {
4846 log_err_status(status
, "Couldn't open UCA -> %s\n", u_errorName(status
));
4849 ucol_setStrength(coll
, UCOL_PRIMARY
);
4850 ucol_setAttribute(coll
, UCOL_STRENGTH
, UCOL_PRIMARY
, &status
);
4851 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
4852 if (U_FAILURE(status
)) {
4853 log_err("Failed setting atributes\n");
4856 sortkey_length
= ucol_getSortKey(coll
, ustr
, ustr_length
, NULL
, 0);
4857 if (sortkey_length
> 256) return;
4859 /* we mark the position where the null byte should be written in advance */
4860 sortkey
[sortkey_length
-1] = 0xAA;
4862 /* we set the buffer size one byte higher than needed */
4863 sortkey_length
= ucol_getSortKey(coll
, ustr
, ustr_length
, sortkey
,
4866 /* no error occurs (for me) */
4867 if (sortkey
[sortkey_length
-1] == 0xAA) {
4868 log_err("Hit bug at first try\n");
4871 /* we mark the position where the null byte should be written again */
4872 sortkey
[sortkey_length
-1] = 0xAA;
4874 /* this time we set the buffer size to the exact amount needed */
4875 sortkey_length
= ucol_getSortKey(coll
, ustr
, ustr_length
, sortkey
,
4878 /* now the trailing null byte is not written */
4879 if (sortkey
[sortkey_length
-1] == 0xAA) {
4880 log_err("Hit bug at second try\n");
4886 /* Regression test for Thai partial sort key problem */
4890 const static char *test
[] = {
4891 "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e47\\u0e21",
4892 "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e48\\u0e21"
4895 genericLocaleStarter("th", test
, sizeof(test
)/sizeof(test
[0]));
4901 const static char *test
[] = { "a", "y" };
4902 const char* rules
= "&Ny << Y &[first secondary ignorable] <<< a";
4903 genericRulesStarter(rules
, test
, sizeof(test
)/sizeof(test
[0]));
4909 UErrorCode status
= U_ZERO_ERROR
;
4911 UCollator
*coll
=NULL
;
4912 uint8_t resColl
[100], expColl
[100];
4913 int32_t rLen
, tLen
, ruleLen
, sLen
, kLen
;
4914 UChar rule
[256]={0x26, 0x62, 0x3c, 0x1FF3, 0}; /* &a<0x1FF3-omega with Ypogegrammeni*/
4915 UChar rule2
[256]={0x26, 0x7a, 0x3c, 0x0161, 0}; /* &z<s with caron*/
4916 UChar rule3
[256]={0x26, 0x7a, 0x3c, 0x0061, 0x00ea, 0}; /* &z<a+e with circumflex.*/
4917 static const UChar tData
[][20]={
4919 {0x0041, 0x0323, 0x0302, 0},
4920 {0x1EA0, 0x0302, 0},
4921 {0x00C2, 0x0323, 0},
4922 {0x1ED8, 0}, /* O with dot and circumflex */
4923 {0x1ECC, 0x0302, 0},
4925 {0x1EA1, 0x0306, 0},
4927 static const UChar tailorData
[][20]={
4928 {0x1FA2, 0}, /* Omega with 3 combining marks */
4929 {0x03C9, 0x0313, 0x0300, 0x0345, 0},
4930 {0x1FF3, 0x0313, 0x0300, 0},
4931 {0x1F60, 0x0300, 0x0345, 0},
4932 {0x1F62, 0x0345, 0},
4933 {0x1FA0, 0x0300, 0},
4935 static const UChar tailorData2
[][20]={
4936 {0x1E63, 0x030C, 0}, /* s with dot below + caron */
4937 {0x0073, 0x0323, 0x030C, 0},
4938 {0x0073, 0x030C, 0x0323, 0},
4940 static const UChar tailorData3
[][20]={
4941 {0x007a, 0}, /* z */
4942 {0x0061, 0x0065, 0}, /* a + e */
4943 {0x0061, 0x00ea, 0}, /* a + e with circumflex */
4944 {0x0061, 0x1EC7, 0}, /* a+ e with dot below and circumflex */
4945 {0x0061, 0x1EB9, 0x0302, 0}, /* a + e with dot below + combining circumflex */
4946 {0x0061, 0x00EA, 0x0323, 0}, /* a + e with circumflex + combining dot below */
4947 {0x00EA, 0x0323, 0}, /* e with circumflex + combining dot below */
4948 {0x00EA, 0}, /* e with circumflex */
4951 /* Test Vietnamese sort. */
4952 coll
= ucol_open("vi", &status
);
4953 if(U_FAILURE(status
)) {
4954 log_err_status(status
, "Couldn't open collator -> %s\n", u_errorName(status
));
4957 log_verbose("\n\nVI collation:");
4958 if ( !ucol_equal(coll
, tData
[0], u_strlen(tData
[0]), tData
[2], u_strlen(tData
[2])) ) {
4959 log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
4961 if ( !ucol_equal(coll
, tData
[0], u_strlen(tData
[0]), tData
[3], u_strlen(tData
[3])) ) {
4962 log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
4964 if ( !ucol_equal(coll
, tData
[5], u_strlen(tData
[5]), tData
[4], u_strlen(tData
[4])) ) {
4965 log_err("\\u1ED8 not equals to \\u1ECC+\\u0302\n");
4967 if ( !ucol_equal(coll
, tData
[7], u_strlen(tData
[7]), tData
[6], u_strlen(tData
[6])) ) {
4968 log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
4971 for (j
=0; j
<8; j
++) {
4972 tLen
= u_strlen(tData
[j
]);
4973 log_verbose("\n Data :%s \tlen: %d key: ", tData
[j
], tLen
);
4974 rLen
= ucol_getSortKey(coll
, tData
[j
], tLen
, resColl
, 100);
4975 for(i
= 0; i
<rLen
; i
++) {
4976 log_verbose(" %02X", resColl
[i
]);
4982 /* Test Romanian sort. */
4983 coll
= ucol_open("ro", &status
);
4984 log_verbose("\n\nRO collation:");
4985 if ( !ucol_equal(coll
, tData
[0], u_strlen(tData
[0]), tData
[1], u_strlen(tData
[1])) ) {
4986 log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
4988 if ( !ucol_equal(coll
, tData
[4], u_strlen(tData
[4]), tData
[5], u_strlen(tData
[5])) ) {
4989 log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
4991 if ( !ucol_equal(coll
, tData
[6], u_strlen(tData
[6]), tData
[7], u_strlen(tData
[7])) ) {
4992 log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
4995 for (j
=4; j
<8; j
++) {
4996 tLen
= u_strlen(tData
[j
]);
4997 log_verbose("\n Data :%s \tlen: %d key: ", tData
[j
], tLen
);
4998 rLen
= ucol_getSortKey(coll
, tData
[j
], tLen
, resColl
, 100);
4999 for(i
= 0; i
<rLen
; i
++) {
5000 log_verbose(" %02X", resColl
[i
]);
5005 /* Test the precomposed Greek character with 3 combining marks. */
5006 log_verbose("\n\nTailoring test: Greek character with 3 combining marks");
5007 ruleLen
= u_strlen(rule
);
5008 coll
= ucol_openRules(rule
, ruleLen
, UCOL_OFF
, UCOL_TERTIARY
, NULL
,&status
);
5009 if (U_FAILURE(status
)) {
5010 log_err("ucol_openRules failed with %s\n", u_errorName(status
));
5013 sLen
= u_strlen(tailorData
[0]);
5014 for (j
=1; j
<6; j
++) {
5015 tLen
= u_strlen(tailorData
[j
]);
5016 if ( !ucol_equal(coll
, tailorData
[0], sLen
, tailorData
[j
], tLen
)) {
5017 log_err("\n \\u1FA2 not equals to data[%d]:%s\n", j
, tailorData
[j
]);
5020 /* Test getSortKey. */
5021 tLen
= u_strlen(tailorData
[0]);
5022 kLen
=ucol_getSortKey(coll
, tailorData
[0], tLen
, expColl
, 100);
5023 for (j
=0; j
<6; j
++) {
5024 tLen
= u_strlen(tailorData
[j
]);
5025 rLen
= ucol_getSortKey(coll
, tailorData
[j
], tLen
, resColl
, 100);
5026 if ( kLen
!=rLen
|| uprv_memcmp(expColl
, resColl
, rLen
*sizeof(uint8_t))!=0 ) {
5027 log_err("\n Data[%d] :%s \tlen: %d key: ", j
, tailorData
[j
], tLen
);
5028 for(i
= 0; i
<rLen
; i
++) {
5029 log_err(" %02X", resColl
[i
]);
5035 log_verbose("\n\nTailoring test for s with caron:");
5036 ruleLen
= u_strlen(rule2
);
5037 coll
= ucol_openRules(rule2
, ruleLen
, UCOL_OFF
, UCOL_TERTIARY
, NULL
,&status
);
5038 tLen
= u_strlen(tailorData2
[0]);
5039 kLen
=ucol_getSortKey(coll
, tailorData2
[0], tLen
, expColl
, 100);
5040 for (j
=1; j
<3; j
++) {
5041 tLen
= u_strlen(tailorData2
[j
]);
5042 rLen
= ucol_getSortKey(coll
, tailorData2
[j
], tLen
, resColl
, 100);
5043 if ( kLen
!=rLen
|| uprv_memcmp(expColl
, resColl
, rLen
*sizeof(uint8_t))!=0 ) {
5044 log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j
, tailorData
[j
], tLen
);
5045 for(i
= 0; i
<rLen
; i
++) {
5046 log_err(" %02X", resColl
[i
]);
5052 log_verbose("\n\nTailoring test for &z< ae with circumflex:");
5053 ruleLen
= u_strlen(rule3
);
5054 coll
= ucol_openRules(rule3
, ruleLen
, UCOL_OFF
, UCOL_TERTIARY
, NULL
,&status
);
5055 tLen
= u_strlen(tailorData3
[3]);
5056 kLen
=ucol_getSortKey(coll
, tailorData3
[3], tLen
, expColl
, 100);
5057 for (j
=4; j
<6; j
++) {
5058 tLen
= u_strlen(tailorData3
[j
]);
5059 rLen
= ucol_getSortKey(coll
, tailorData3
[j
], tLen
, resColl
, 100);
5061 if ( kLen
!=rLen
|| uprv_memcmp(expColl
, resColl
, rLen
*sizeof(uint8_t))!=0 ) {
5062 log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j
, tailorData
[j
], tLen
);
5063 for(i
= 0; i
<rLen
; i
++) {
5064 log_err(" %02X", resColl
[i
]);
5068 log_verbose("\n Test Data[%d] :%s \tlen: %d key: ", j
, tailorData
[j
], tLen
);
5069 for(i
= 0; i
<rLen
; i
++) {
5070 log_verbose(" %02X", resColl
[i
]);
5077 TestTailor6179(void)
5079 UErrorCode status
= U_ZERO_ERROR
;
5081 UCollator
*coll
=NULL
;
5082 uint8_t resColl
[100];
5083 int32_t rLen
, tLen
, ruleLen
;
5084 /* &[last primary ignorable]<< a &[first primary ignorable]<<b */
5085 UChar rule1
[256]={0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,
5086 0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x20,0x61,0x20,
5087 0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,0x20,
5088 0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x62,0x20, 0};
5089 /* &[last secondary ignorable]<<< a &[first secondary ignorable]<<<b */
5090 UChar rule2
[256]={0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,0x64,0x61,
5091 0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x3C,
5092 0x61,0x20,0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,
5093 0x64,0x61,0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,
5094 0x3C,0x3C,0x20,0x62,0};
5096 UChar tData1
[][20]={
5101 UChar tData2
[][20]={
5108 * These values from FractionalUCA.txt will change,
5109 * and need to be updated here.
5111 uint8_t firstPrimaryIgnCE
[6]={1, 87, 1, 5, 1, 0};
5112 uint8_t lastPrimaryIgnCE
[6]={1, 0xE3, 0xC9, 1, 5, 0};
5113 uint8_t firstSecondaryIgnCE
[6]={1, 1, 0x3f, 0x03, 0};
5114 uint8_t lastSecondaryIgnCE
[6]={1, 1, 0x3f, 0x03, 0};
5116 /* Test [Last Primary ignorable] */
5118 log_verbose("\n\nTailoring test: &[last primary ignorable]<<a &[first primary ignorable]<<b ");
5119 ruleLen
= u_strlen(rule1
);
5120 coll
= ucol_openRules(rule1
, ruleLen
, UCOL_OFF
, UCOL_TERTIARY
, NULL
,&status
);
5121 if (U_FAILURE(status
)) {
5122 log_err_status(status
, "Tailoring test: &[last primary ignorable] failed! -> %s\n", u_errorName(status
));
5125 tLen
= u_strlen(tData1
[0]);
5126 rLen
= ucol_getSortKey(coll
, tData1
[0], tLen
, resColl
, 100);
5127 if (uprv_memcmp(resColl
, lastPrimaryIgnCE
, uprv_min(rLen
,6)) < 0) {
5128 log_err("\n Data[%d] :%s \tlen: %d key: ", 0, tData1
[0], rLen
);
5129 for(i
= 0; i
<rLen
; i
++) {
5130 log_err(" %02X", resColl
[i
]);
5133 tLen
= u_strlen(tData1
[1]);
5134 rLen
= ucol_getSortKey(coll
, tData1
[1], tLen
, resColl
, 100);
5135 if (uprv_memcmp(resColl
, firstPrimaryIgnCE
, uprv_min(rLen
, 6)) < 0) {
5136 log_err("\n Data[%d] :%s \tlen: %d key: ", 1, tData1
[1], rLen
);
5137 for(i
= 0; i
<rLen
; i
++) {
5138 log_err(" %02X", resColl
[i
]);
5144 /* Test [Last Secondary ignorable] */
5145 log_verbose("\n\nTailoring test: &[last secondary ignorable]<<<a &[first secondary ignorable]<<<b ");
5146 ruleLen
= u_strlen(rule1
);
5147 coll
= ucol_openRules(rule2
, ruleLen
, UCOL_OFF
, UCOL_TERTIARY
, NULL
,&status
);
5148 if (U_FAILURE(status
)) {
5149 log_err("Tailoring test: &[last primary ignorable] failed!");
5152 tLen
= u_strlen(tData2
[0]);
5153 rLen
= ucol_getSortKey(coll
, tData2
[0], tLen
, resColl
, 100);
5154 log_verbose("\n Data[%d] :%s \tlen: %d key: ", 0, tData2
[0], rLen
);
5155 for(i
= 0; i
<rLen
; i
++) {
5156 log_verbose(" %02X", resColl
[i
]);
5158 if (uprv_memcmp(resColl
, lastSecondaryIgnCE
, uprv_min(rLen
, 3)) < 0) {
5159 log_err("\n Data[%d] :%s \tlen: %d key: ", 0, tData2
[0], rLen
);
5160 for(i
= 0; i
<rLen
; i
++) {
5161 log_err(" %02X", resColl
[i
]);
5164 tLen
= u_strlen(tData2
[1]);
5165 rLen
= ucol_getSortKey(coll
, tData2
[1], tLen
, resColl
, 100);
5166 log_verbose("\n Data[%d] :%s \tlen: %d key: ", 1, tData2
[1], rLen
);
5167 for(i
= 0; i
<rLen
; i
++) {
5168 log_verbose(" %02X", resColl
[i
]);
5170 if (uprv_memcmp(resColl
, firstSecondaryIgnCE
, uprv_min(rLen
, 4)) < 0) {
5171 log_err("\n Data[%d] :%s \tlen: %d key: ", 1, tData2
[1], rLen
);
5172 for(i
= 0; i
<rLen
; i
++) {
5173 log_err(" %02X", resColl
[i
]);
5180 TestUCAPrecontext(void)
5182 UErrorCode status
= U_ZERO_ERROR
;
5184 UCollator
*coll
=NULL
;
5185 uint8_t resColl
[100], prevColl
[100];
5186 int32_t rLen
, tLen
, ruleLen
;
5187 UChar rule1
[256]= {0x26, 0xb7, 0x3c, 0x61, 0}; /* & middle-dot < a */
5188 UChar rule2
[256]= {0x26, 0x4C, 0xb7, 0x3c, 0x3c, 0x61, 0};
5189 /* & l middle-dot << a a is an expansion. */
5191 UChar tData1
[][20]={
5192 { 0xb7, 0}, /* standalone middle dot(0xb7) */
5193 { 0x387, 0}, /* standalone middle dot(0x387) */
5196 { 0x4C, 0x0332, 0}, /* l with [first primary ignorable] */
5197 { 0x6C, 0xb7, 0}, /* l with middle dot(0xb7) */
5198 { 0x6C, 0x387, 0}, /* l with middle dot(0x387) */
5199 { 0x4C, 0xb7, 0}, /* L with middle dot(0xb7) */
5200 { 0x4C, 0x387, 0}, /* L with middle dot(0x387) */
5201 { 0x6C, 0x61, 0x387, 0}, /* la with middle dot(0x387) */
5202 { 0x4C, 0x61, 0xb7, 0}, /* La with middle dot(0xb7) */
5205 log_verbose("\n\nEN collation:");
5206 coll
= ucol_open("en", &status
);
5207 if (U_FAILURE(status
)) {
5208 log_err_status(status
, "Tailoring test: &z <<a|- failed! -> %s\n", u_errorName(status
));
5211 for (j
=0; j
<11; j
++) {
5212 tLen
= u_strlen(tData1
[j
]);
5213 rLen
= ucol_getSortKey(coll
, tData1
[j
], tLen
, resColl
, 100);
5214 if ((j
>0) && (strcmp((char *)resColl
, (char *)prevColl
)<0)) {
5215 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5218 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j
, tData1
[j
], rLen
);
5219 for(i
= 0; i
<rLen
; i
++) {
5220 log_verbose(" %02X", resColl
[i
]);
5222 uprv_memcpy(prevColl
, resColl
, sizeof(uint8_t)*(rLen
+1));
5227 log_verbose("\n\nJA collation:");
5228 coll
= ucol_open("ja", &status
);
5229 if (U_FAILURE(status
)) {
5230 log_err("Tailoring test: &z <<a|- failed!");
5233 for (j
=0; j
<11; j
++) {
5234 tLen
= u_strlen(tData1
[j
]);
5235 rLen
= ucol_getSortKey(coll
, tData1
[j
], tLen
, resColl
, 100);
5236 if ((j
>0) && (strcmp((char *)resColl
, (char *)prevColl
)<0)) {
5237 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5240 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j
, tData1
[j
], rLen
);
5241 for(i
= 0; i
<rLen
; i
++) {
5242 log_verbose(" %02X", resColl
[i
]);
5244 uprv_memcpy(prevColl
, resColl
, sizeof(uint8_t)*(rLen
+1));
5249 log_verbose("\n\nTailoring test: & middle dot < a ");
5250 ruleLen
= u_strlen(rule1
);
5251 coll
= ucol_openRules(rule1
, ruleLen
, UCOL_OFF
, UCOL_TERTIARY
, NULL
,&status
);
5252 if (U_FAILURE(status
)) {
5253 log_err("Tailoring test: & middle dot < a failed!");
5256 for (j
=0; j
<11; j
++) {
5257 tLen
= u_strlen(tData1
[j
]);
5258 rLen
= ucol_getSortKey(coll
, tData1
[j
], tLen
, resColl
, 100);
5259 if ((j
>0) && (strcmp((char *)resColl
, (char *)prevColl
)<0)) {
5260 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5263 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j
, tData1
[j
], rLen
);
5264 for(i
= 0; i
<rLen
; i
++) {
5265 log_verbose(" %02X", resColl
[i
]);
5267 uprv_memcpy(prevColl
, resColl
, sizeof(uint8_t)*(rLen
+1));
5272 log_verbose("\n\nTailoring test: & l middle-dot << a ");
5273 ruleLen
= u_strlen(rule2
);
5274 coll
= ucol_openRules(rule2
, ruleLen
, UCOL_OFF
, UCOL_TERTIARY
, NULL
,&status
);
5275 if (U_FAILURE(status
)) {
5276 log_err("Tailoring test: & l middle-dot << a failed!");
5279 for (j
=0; j
<11; j
++) {
5280 tLen
= u_strlen(tData1
[j
]);
5281 rLen
= ucol_getSortKey(coll
, tData1
[j
], tLen
, resColl
, 100);
5282 if ((j
>0) && (j
!=3) && (strcmp((char *)resColl
, (char *)prevColl
)<0)) {
5283 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5286 if ((j
==3)&&(strcmp((char *)resColl
, (char *)prevColl
)>0)) {
5287 log_err("\n Expecting smaller key than previous test case: Data[%d] :%s.",
5290 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j
, tData1
[j
], rLen
);
5291 for(i
= 0; i
<rLen
; i
++) {
5292 log_verbose(" %02X", resColl
[i
]);
5294 uprv_memcpy(prevColl
, resColl
, sizeof(uint8_t)*(rLen
+1));
5300 TestOutOfBuffer5468(void)
5302 static const char *test
= "\\u4e00";
5304 int32_t ustr_length
= u_unescape(test
, ustr
, 256);
5305 unsigned char shortKeyBuf
[1];
5306 int32_t sortkey_length
;
5307 UErrorCode status
= U_ZERO_ERROR
;
5308 static UCollator
*coll
= NULL
;
5310 coll
= ucol_open("root", &status
);
5311 if(U_FAILURE(status
)) {
5312 log_err_status(status
, "Couldn't open UCA -> %s\n", u_errorName(status
));
5315 ucol_setStrength(coll
, UCOL_PRIMARY
);
5316 ucol_setAttribute(coll
, UCOL_STRENGTH
, UCOL_PRIMARY
, &status
);
5317 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
5318 if (U_FAILURE(status
)) {
5319 log_err("Failed setting atributes\n");
5323 sortkey_length
= ucol_getSortKey(coll
, ustr
, ustr_length
, shortKeyBuf
, sizeof(shortKeyBuf
));
5324 if (sortkey_length
!= 4) {
5325 log_err("expecting length of sortKey is 4 got:%d ", sortkey_length
);
5327 log_verbose("length of sortKey is %d", sortkey_length
);
5331 #define TSKC_DATA_SIZE 5
5332 #define TSKC_BUF_SIZE 50
5334 TestSortKeyConsistency(void)
5336 UErrorCode icuRC
= U_ZERO_ERROR
;
5338 UChar data
[] = { 0xFFFD, 0x0006, 0x0006, 0x0006, 0xFFFD};
5340 uint8_t bufFull
[TSKC_DATA_SIZE
][TSKC_BUF_SIZE
];
5341 uint8_t bufPart
[TSKC_DATA_SIZE
][TSKC_BUF_SIZE
];
5344 ucol
= ucol_openFromShortString("LEN_S4", FALSE
, NULL
, &icuRC
);
5345 if (U_FAILURE(icuRC
))
5347 log_err_status(icuRC
, "ucol_openFromShortString failed -> %s\n", u_errorName(icuRC
));
5351 for (i
= 0; i
< TSKC_DATA_SIZE
; i
++)
5353 UCharIterator uiter
;
5354 uint32_t state
[2] = { 0, 0 };
5355 int32_t dataLen
= i
+1;
5356 for (j
=0; j
<TSKC_BUF_SIZE
; j
++)
5357 bufFull
[i
][j
] = bufPart
[i
][j
] = 0;
5360 ucol_getSortKey(ucol
, data
, dataLen
, bufFull
[i
], TSKC_BUF_SIZE
);
5362 /* Partial sort key */
5363 uiter_setString(&uiter
, data
, dataLen
);
5364 ucol_nextSortKeyPart(ucol
, &uiter
, state
, bufPart
[i
], TSKC_BUF_SIZE
, &icuRC
);
5365 if (U_FAILURE(icuRC
))
5367 log_err("ucol_nextSortKeyPart failed\n");
5372 for (i2
=0; i2
<i
; i2
++)
5374 UBool fullMatch
= TRUE
;
5375 UBool partMatch
= TRUE
;
5376 for (j
=0; j
<TSKC_BUF_SIZE
; j
++)
5378 fullMatch
= fullMatch
&& (bufFull
[i
][j
] != bufFull
[i2
][j
]);
5379 partMatch
= partMatch
&& (bufPart
[i
][j
] != bufPart
[i2
][j
]);
5381 if (fullMatch
!= partMatch
) {
5382 log_err(fullMatch
? "full key was consistent, but partial key changed\n"
5383 : "partial key was consistent, but full key changed\n");
5390 /*=============================================*/
5395 static void TestCroatianSortKey(void) {
5396 const char* collString
= "LHR_AN_CX_EX_FX_HX_NX_S3";
5397 UErrorCode status
= U_ZERO_ERROR
;
5401 static const UChar text
[] = { 0x0044, 0xD81A };
5403 size_t length
= sizeof(text
)/sizeof(*text
);
5405 uint8_t textSortKey
[32];
5406 size_t lenSortKey
= 32;
5407 size_t actualSortKeyLen
;
5408 uint32_t uStateInfo
[2] = { 0, 0 };
5410 ucol
= ucol_openFromShortString(collString
, FALSE
, NULL
, &status
);
5411 if (U_FAILURE(status
)) {
5412 log_err_status(status
, "ucol_openFromShortString error in Craotian test. -> %s\n", u_errorName(status
));
5416 uiter_setString(&iter
, text
, length
);
5418 actualSortKeyLen
= ucol_nextSortKeyPart(
5419 ucol
, &iter
, (uint32_t*)uStateInfo
,
5420 textSortKey
, lenSortKey
, &status
5423 if (actualSortKeyLen
== lenSortKey
) {
5424 log_err("ucol_nextSortKeyPart did not give correct result in Croatian test.\n");
5431 /* This test ensures that codepoints such as 0x3099 are flagged correctly by the collator since
5432 * they are both Hiragana and Katakana
5434 #define SORTKEYLEN 50
5435 static void TestHiragana(void) {
5436 UErrorCode status
= U_ZERO_ERROR
;
5438 UCollationResult strcollresult
;
5439 UChar data1
[] = { 0x3058, 0x30B8 }; /* Hiragana and Katakana letter Zi */
5440 UChar data2
[] = { 0x3057, 0x3099, 0x30B7, 0x3099 };
5441 int32_t data1Len
= sizeof(data1
)/sizeof(*data1
);
5442 int32_t data2Len
= sizeof(data2
)/sizeof(*data2
);
5444 uint8_t sortKey1
[SORTKEYLEN
];
5445 uint8_t sortKey2
[SORTKEYLEN
];
5447 UCharIterator uiter1
;
5448 UCharIterator uiter2
;
5449 uint32_t state1
[2] = { 0, 0 };
5450 uint32_t state2
[2] = { 0, 0 };
5454 ucol
= ucol_openFromShortString("LJA_AN_CX_EX_FX_HO_NX_S4", FALSE
, NULL
,
5456 if (U_FAILURE(status
)) {
5457 log_err_status(status
, "Error status: %s; Unable to open collator from short string.\n", u_errorName(status
));
5461 /* Start of full sort keys */
5462 /* Full sort key1 */
5463 keySize1
= ucol_getSortKey(ucol
, data1
, data1Len
, sortKey1
, SORTKEYLEN
);
5464 /* Full sort key2 */
5465 keySize2
= ucol_getSortKey(ucol
, data2
, data2Len
, sortKey2
, SORTKEYLEN
);
5466 if (keySize1
== keySize2
) {
5467 for (i
= 0; i
< keySize1
; i
++) {
5468 if (sortKey1
[i
] != sortKey2
[i
]) {
5469 log_err("Full sort keys are different. Should be equal.");
5473 log_err("Full sort keys sizes doesn't match: %d %d", keySize1
, keySize2
);
5475 /* End of full sort keys */
5477 /* Start of partial sort keys */
5478 /* Partial sort key1 */
5479 uiter_setString(&uiter1
, data1
, data1Len
);
5480 keySize1
= ucol_nextSortKeyPart(ucol
, &uiter1
, state1
, sortKey1
, SORTKEYLEN
, &status
);
5481 /* Partial sort key2 */
5482 uiter_setString(&uiter2
, data2
, data2Len
);
5483 keySize2
= ucol_nextSortKeyPart(ucol
, &uiter2
, state2
, sortKey2
, SORTKEYLEN
, &status
);
5484 if (U_SUCCESS(status
) && keySize1
== keySize2
) {
5485 for (j
= 0; j
< keySize1
; j
++) {
5486 if (sortKey1
[j
] != sortKey2
[j
]) {
5487 log_err("Partial sort keys are different. Should be equal");
5491 log_err("Error Status: %s or Partial sort keys sizes doesn't match: %d %d", u_errorName(status
), keySize1
, keySize2
);
5493 /* End of partial sort keys */
5495 /* Start of strcoll */
5496 /* Use ucol_strcoll() to determine ordering */
5497 strcollresult
= ucol_strcoll(ucol
, data1
, data1Len
, data2
, data2Len
);
5498 if (strcollresult
!= UCOL_EQUAL
) {
5499 log_err("Result from ucol_strcoll() should be UCOL_EQUAL.");
5505 /* Convenient struct for running collation tests */
5507 const UChar source
[MAX_TOKEN_LEN
]; /* String on left */
5508 const UChar target
[MAX_TOKEN_LEN
]; /* String on right */
5509 UCollationResult result
; /* -1, 0 or +1, depending on collation */
5513 * Utility function to test one collation test case.
5514 * @param testcases Array of test cases.
5515 * @param n_testcases Size of the array testcases.
5516 * @param str_rules Array of rules. These rules should be specifying the same rule in different formats.
5517 * @param n_rules Size of the array str_rules.
5519 static void doTestOneTestCase(const OneTestCase testcases
[],
5521 const char* str_rules
[],
5524 int rule_no
, testcase_no
;
5527 UErrorCode status
= U_ZERO_ERROR
;
5528 UParseError parse_error
;
5529 UCollator
*myCollation
;
5531 for (rule_no
= 0; rule_no
< n_rules
; ++rule_no
) {
5533 length
= u_unescape(str_rules
[rule_no
], rule
, 500);
5535 log_err("ERROR: The rule cannot be unescaped: %s\n");
5538 myCollation
= ucol_openRules(rule
, length
, UCOL_ON
, UCOL_TERTIARY
, &parse_error
, &status
);
5539 if(U_FAILURE(status
)){
5540 log_err_status(status
, "ERROR: in creation of rule based collator: %s\n", myErrorName(status
));
5543 log_verbose("Testing the <<* syntax\n");
5544 ucol_setAttribute(myCollation
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
5545 ucol_setStrength(myCollation
, UCOL_TERTIARY
);
5546 for (testcase_no
= 0; testcase_no
< n_testcases
; ++testcase_no
) {
5548 testcases
[testcase_no
].source
,
5549 testcases
[testcase_no
].target
,
5550 testcases
[testcase_no
].result
5553 ucol_close(myCollation
);
5557 const static OneTestCase rangeTestcases
[] = {
5558 { {0x0061}, {0x0062}, UCOL_LESS
}, /* "a" < "b" */
5559 { {0x0062}, {0x0063}, UCOL_LESS
}, /* "b" < "c" */
5560 { {0x0061}, {0x0063}, UCOL_LESS
}, /* "a" < "c" */
5562 { {0x0062}, {0x006b}, UCOL_LESS
}, /* "b" << "k" */
5563 { {0x006b}, {0x006c}, UCOL_LESS
}, /* "k" << "l" */
5564 { {0x0062}, {0x006c}, UCOL_LESS
}, /* "b" << "l" */
5565 { {0x0061}, {0x006c}, UCOL_LESS
}, /* "a" < "l" */
5566 { {0x0061}, {0x006d}, UCOL_LESS
}, /* "a" < "m" */
5568 { {0x0079}, {0x006d}, UCOL_LESS
}, /* "y" < "f" */
5569 { {0x0079}, {0x0067}, UCOL_LESS
}, /* "y" < "g" */
5570 { {0x0061}, {0x0068}, UCOL_LESS
}, /* "y" < "h" */
5571 { {0x0061}, {0x0065}, UCOL_LESS
}, /* "g" < "e" */
5573 { {0x0061}, {0x0031}, UCOL_EQUAL
}, /* "a" = "1" */
5574 { {0x0061}, {0x0032}, UCOL_EQUAL
}, /* "a" = "2" */
5575 { {0x0061}, {0x0033}, UCOL_EQUAL
}, /* "a" = "3" */
5576 { {0x0061}, {0x0066}, UCOL_LESS
}, /* "a" < "f" */
5577 { {0x006c, 0x0061}, {0x006b, 0x0062}, UCOL_LESS
}, /* "la" < "123" */
5578 { {0x0061, 0x0061, 0x0061}, {0x0031, 0x0032, 0x0033}, UCOL_EQUAL
}, /* "aaa" = "123" */
5579 { {0x0062}, {0x007a}, UCOL_LESS
}, /* "b" < "z" */
5580 { {0x0061, 0x007a, 0x0062}, {0x0032, 0x0079, 0x006d}, UCOL_LESS
}, /* "azm" = "2yc" */
5583 static int nRangeTestcases
= LEN(rangeTestcases
);
5585 const static OneTestCase rangeTestcasesSupplemental
[] = {
5586 { {0xfffe}, {0xffff}, UCOL_LESS
}, /* U+FFFE < U+FFFF */
5587 { {0xffff}, {0xd800, 0xdc00}, UCOL_LESS
}, /* U+FFFF < U+10000 */
5588 { {0xd800, 0xdc00}, {0xd800, 0xdc01}, UCOL_LESS
}, /* U+10000 < U+10001 */
5589 { {0xfffe}, {0xd800, 0xdc01}, UCOL_LESS
}, /* U+FFFE < U+10001 */
5590 { {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL_LESS
}, /* U+10000 < U+10001 */
5591 { {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL_LESS
}, /* U+10000 < U+10001 */
5592 { {0xfffe}, {0xd800, 0xdc02}, UCOL_LESS
}, /* U+FFFE < U+10001 */
5595 static int nRangeTestcasesSupplemental
= LEN(rangeTestcasesSupplemental
);
5597 const static OneTestCase rangeTestcasesQwerty
[] = {
5598 { {0x0071}, {0x0077}, UCOL_LESS
}, /* "q" < "w" */
5599 { {0x0077}, {0x0065}, UCOL_LESS
}, /* "w" < "e" */
5601 { {0x0079}, {0x0075}, UCOL_LESS
}, /* "y" < "u" */
5602 { {0x0071}, {0x0075}, UCOL_LESS
}, /* "q" << "u" */
5604 { {0x0074}, {0x0069}, UCOL_LESS
}, /* "t" << "i" */
5605 { {0x006f}, {0x0070}, UCOL_LESS
}, /* "o" << "p" */
5607 { {0x0079}, {0x0065}, UCOL_LESS
}, /* "y" < "e" */
5608 { {0x0069}, {0x0075}, UCOL_LESS
}, /* "i" < "u" */
5610 { {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},
5611 {0x0077, 0x0065, 0x0072, 0x0065}, UCOL_LESS
}, /* "quest" < "were" */
5612 { {0x0071, 0x0075, 0x0061, 0x0063, 0x006b},
5613 {0x0071, 0x0075, 0x0065, 0x0073, 0x0074}, UCOL_LESS
}, /* "quack" < "quest" */
5616 static int nRangeTestcasesQwerty
= LEN(rangeTestcasesQwerty
);
5618 static void TestSameStrengthList(void)
5620 const char* strRules
[] = {
5622 "&a<b<c<d &b<<k<<l<<m &k<<<x<<<y<<<z &y<f<g<h<e &a=1=2=3",
5625 "&a<*bcd &b<<*klm &k<<<*xyz &y<*fghe &a=*123",
5627 doTestOneTestCase(rangeTestcases
, nRangeTestcases
, strRules
, LEN(strRules
));
5630 static void TestSameStrengthListQuoted(void)
5632 const char* strRules
[] = {
5633 /* Lists with quoted characters */
5634 "&\\u0061<*bcd &b<<*klm &k<<<*xyz &y<*f\\u0067\\u0068e &a=*123",
5635 "&'\\u0061'<*bcd &b<<*klm &k<<<*xyz &y<*f'\\u0067\\u0068'e &a=*123",
5637 "&\\u0061<*b\\u0063d &b<<*klm &k<<<*xyz &\\u0079<*fgh\\u0065 &a=*\\u0031\\u0032\\u0033",
5638 "&'\\u0061'<*b'\\u0063'd &b<<*klm &k<<<*xyz &'\\u0079'<*fgh'\\u0065' &a=*'\\u0031\\u0032\\u0033'",
5640 "&\\u0061<*\\u0062c\\u0064 &b<<*klm &k<<<*xyz &y<*fghe &a=*\\u0031\\u0032\\u0033",
5641 "&'\\u0061'<*'\\u0062'c'\\u0064' &b<<*klm &k<<<*xyz &y<*fghe &a=*'\\u0031\\u0032\\u0033'",
5643 doTestOneTestCase(rangeTestcases
, nRangeTestcases
, strRules
, LEN(strRules
));
5646 static void TestSameStrengthListSupplemental(void)
5648 const char* strRules
[] = {
5649 "&\\ufffe<\\uffff<\\U00010000<\\U00010001<\\U00010002",
5650 "&\\ufffe<\\uffff<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02",
5651 "&\\ufffe<*\\uffff\\U00010000\\U00010001\\U00010002",
5652 "&\\ufffe<*\\uffff\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02",
5654 doTestOneTestCase(rangeTestcasesSupplemental
, nRangeTestcasesSupplemental
, strRules
, LEN(strRules
));
5657 static void TestSameStrengthListQwerty(void)
5659 const char* strRules
[] = {
5660 "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d", /* Normal */
5661 "&q<*wer &w<<*tyu &t<<<*iop &o=*asd", /* Lists */
5662 "&\\u0071<\\u0077<\\u0065<\\u0072 &\\u0077<<\\u0074<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<\\u006f<<<\\u0070 &\\u006f=\\u0061=\\u0073=\\u0064",
5663 "&'\\u0071'<\\u0077<\\u0065<\\u0072 &\\u0077<<'\\u0074'<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<'\\u006f'<<<\\u0070 &\\u006f=\\u0061='\\u0073'=\\u0064",
5664 "&\\u0071<*\\u0077\\u0065\\u0072 &\\u0077<<*\\u0074\\u0079\\u0075 &\\u0074<<<*\\u0069\\u006f\\u0070 &\\u006f=*\\u0061\\u0073\\u0064",
5666 /* Quoted characters also will work if two quoted characters are not consecutive. */
5667 "&\\u0071<*'\\u0077'\\u0065\\u0072 &\\u0077<<*\\u0074'\\u0079'\\u0075 &\\u0074<<<*\\u0069\\u006f'\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",
5669 /* Consecutive quoted charactes do not work, because a '' will be treated as a quote character. */
5670 /* "&\\u0071<*'\\u0077''\\u0065''\\u0072' &\\u0077<<*'\\u0074''\\u0079''\\u0075' &\\u0074<<<*'\\u0069''\\u006f''\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",*/
5673 doTestOneTestCase(rangeTestcasesQwerty
, nRangeTestcasesQwerty
, strRules
, LEN(strRules
));
5676 static void TestSameStrengthListQuotedQwerty(void)
5678 const char* strRules
[] = {
5679 "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d", /* Normal */
5680 "&q<*wer &w<<*tyu &t<<<*iop &o=*asd", /* Lists */
5681 "&q<*w'e'r &w<<*'t'yu &t<<<*io'p' &o=*'a's'd'", /* Lists with quotes */
5683 /* Lists with continuous quotes may not work, because '' will be treated as a quote character. */
5684 /* "&q<*'w''e''r' &w<<*'t''y''u' &t<<<*'i''o''p' &o=*'a''s''d'", */
5686 doTestOneTestCase(rangeTestcasesQwerty
, nRangeTestcasesQwerty
, strRules
, LEN(strRules
));
5689 static void TestSameStrengthListRanges(void)
5691 const char* strRules
[] = {
5692 "&a<*b-d &b<<*k-m &k<<<*x-z &y<*f-he &a=*1-3",
5694 doTestOneTestCase(rangeTestcases
, nRangeTestcases
, strRules
, LEN(strRules
));
5697 static void TestSameStrengthListSupplementalRanges(void)
5699 const char* strRules
[] = {
5700 "&\\ufffe<*\\uffff-\\U00010002",
5702 doTestOneTestCase(rangeTestcasesSupplemental
, nRangeTestcasesSupplemental
, strRules
, LEN(strRules
));
5705 static void TestSpecialCharacters(void)
5707 const char* strRules
[] = {
5709 "&';'<'+'<','<'-'<'&'<'*'",
5718 const static OneTestCase specialCharacterStrings
[] = {
5719 { {0x003b}, {0x002b}, UCOL_LESS
}, /* ; < + */
5720 { {0x002b}, {0x002c}, UCOL_LESS
}, /* + < , */
5721 { {0x002c}, {0x002d}, UCOL_LESS
}, /* , < - */
5722 { {0x002d}, {0x0026}, UCOL_LESS
}, /* - < & */
5724 doTestOneTestCase(specialCharacterStrings
, LEN(specialCharacterStrings
), strRules
, LEN(strRules
));
5727 static void TestPrivateUseCharacters(void)
5729 const char* strRules
[] = {
5731 "&'\\u5ea7'<'\\uE2D8'<'\\uE2D9'<'\\uE2DA'<'\\uE2DB'<'\\uE2DC'<'\\u4e8d'",
5732 "&\\u5ea7<\\uE2D8<\\uE2D9<\\uE2DA<\\uE2DB<\\uE2DC<\\u4e8d",
5735 const static OneTestCase privateUseCharacterStrings
[] = {
5736 { {0x5ea7}, {0xe2d8}, UCOL_LESS
},
5737 { {0xe2d8}, {0xe2d9}, UCOL_LESS
},
5738 { {0xe2d9}, {0xe2da}, UCOL_LESS
},
5739 { {0xe2da}, {0xe2db}, UCOL_LESS
},
5740 { {0xe2db}, {0xe2dc}, UCOL_LESS
},
5741 { {0xe2dc}, {0x4e8d}, UCOL_LESS
},
5743 doTestOneTestCase(privateUseCharacterStrings
, LEN(privateUseCharacterStrings
), strRules
, LEN(strRules
));
5746 static void TestPrivateUseCharactersInList(void)
5748 const char* strRules
[] = {
5750 "&'\\u5ea7'<*'\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d'",
5751 /* "&'\\u5ea7'<*\\uE2D8'\\uE2D9\\uE2DA'\\uE2DB'\\uE2DC\\u4e8d'", */
5752 "&\\u5ea7<*\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d",
5755 const static OneTestCase privateUseCharacterStrings
[] = {
5756 { {0x5ea7}, {0xe2d8}, UCOL_LESS
},
5757 { {0xe2d8}, {0xe2d9}, UCOL_LESS
},
5758 { {0xe2d9}, {0xe2da}, UCOL_LESS
},
5759 { {0xe2da}, {0xe2db}, UCOL_LESS
},
5760 { {0xe2db}, {0xe2dc}, UCOL_LESS
},
5761 { {0xe2dc}, {0x4e8d}, UCOL_LESS
},
5763 doTestOneTestCase(privateUseCharacterStrings
, LEN(privateUseCharacterStrings
), strRules
, LEN(strRules
));
5766 static void TestPrivateUseCharactersInRange(void)
5768 const char* strRules
[] = {
5770 "&'\\u5ea7'<*'\\uE2D8'-'\\uE2DC\\u4e8d'",
5771 "&\\u5ea7<*\\uE2D8-\\uE2DC\\u4e8d",
5772 /* "&\\u5ea7<\\uE2D8'\\uE2D8'-'\\uE2D9'\\uE2DA-\\uE2DB\\uE2DC\\u4e8d", */
5775 const static OneTestCase privateUseCharacterStrings
[] = {
5776 { {0x5ea7}, {0xe2d8}, UCOL_LESS
},
5777 { {0xe2d8}, {0xe2d9}, UCOL_LESS
},
5778 { {0xe2d9}, {0xe2da}, UCOL_LESS
},
5779 { {0xe2da}, {0xe2db}, UCOL_LESS
},
5780 { {0xe2db}, {0xe2dc}, UCOL_LESS
},
5781 { {0xe2dc}, {0x4e8d}, UCOL_LESS
},
5783 doTestOneTestCase(privateUseCharacterStrings
, LEN(privateUseCharacterStrings
), strRules
, LEN(strRules
));
5786 static void TestInvalidListsAndRanges(void)
5788 const char* invalidRules
[] = {
5789 /* Range not in starred expression */
5790 "&\\ufffe<\\uffff-\\U00010002",
5792 /* Range without start */
5795 /* Range without end */
5798 /* More than one hyphen */
5801 /* Range in the wrong order */
5807 UErrorCode status
= U_ZERO_ERROR
;
5808 UParseError parse_error
;
5809 int n_rules
= LEN(invalidRules
);
5812 UCollator
*myCollation
;
5814 for (rule_no
= 0; rule_no
< n_rules
; ++rule_no
) {
5816 length
= u_unescape(invalidRules
[rule_no
], rule
, 500);
5818 log_err("ERROR: The rule cannot be unescaped: %s\n");
5821 myCollation
= ucol_openRules(rule
, length
, UCOL_ON
, UCOL_TERTIARY
, &parse_error
, &status
);
5822 if(!U_FAILURE(status
)){
5823 log_err("ERROR: Could not cause a failure as expected: \n");
5825 status
= U_ZERO_ERROR
;
5830 * This test ensures that characters placed before a character in a different script have the same lead byte
5831 * in their collation key before and after script reordering.
5833 static void TestBeforeRuleWithScriptReordering(void)
5836 UErrorCode status
= U_ZERO_ERROR
;
5837 UCollator
*myCollation
;
5838 char srules
[500] = "&[before 1]\\u03b1 < \\u0e01";
5840 uint32_t rulesLength
= 0;
5841 int32_t reorderCodes
[1] = {USCRIPT_GREEK
};
5842 UCollationResult collResult
;
5844 uint8_t baseKey
[256];
5845 uint32_t baseKeyLength
;
5846 uint8_t beforeKey
[256];
5847 uint32_t beforeKeyLength
;
5849 UChar base
[] = { 0x03b1 }; /* base */
5850 int32_t baseLen
= sizeof(base
)/sizeof(*base
);
5852 UChar before
[] = { 0x0e01 }; /* ko kai */
5853 int32_t beforeLen
= sizeof(before
)/sizeof(*before
);
5855 /*UChar *data[] = { before, base };
5856 genericRulesStarter(srules, data, 2);*/
5858 log_verbose("Testing the &[before 1] rule with [reorder grek]\n");
5861 /* build collator */
5862 log_verbose("Testing the &[before 1] rule with [scriptReorder grek]\n");
5864 rulesLength
= u_unescape(srules
, rules
, LEN(rules
));
5865 myCollation
= ucol_openRules(rules
, rulesLength
, UCOL_ON
, UCOL_TERTIARY
, &error
, &status
);
5866 if(U_FAILURE(status
)) {
5867 log_err_status(status
, "ERROR: in creation of rule based collator: %s\n", myErrorName(status
));
5871 /* check collation results - before rule applied but not script reordering */
5872 collResult
= ucol_strcoll(myCollation
, base
, baseLen
, before
, beforeLen
);
5873 if (collResult
!= UCOL_GREATER
) {
5874 log_err("Collation result not correct before script reordering = %d\n", collResult
);
5877 /* check the lead byte of the collation keys before script reordering */
5878 baseKeyLength
= ucol_getSortKey(myCollation
, base
, baseLen
, baseKey
, 256);
5879 beforeKeyLength
= ucol_getSortKey(myCollation
, before
, beforeLen
, beforeKey
, 256);
5880 if (baseKey
[0] != beforeKey
[0]) {
5881 log_err("Different lead byte for sort keys using before rule and before script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey
[0], beforeKey
[0]);
5884 /* reorder the scripts */
5885 ucol_setReorderCodes(myCollation
, reorderCodes
, 1, &status
);
5886 if(U_FAILURE(status
)) {
5887 log_err_status(status
, "ERROR: while setting script order: %s\n", myErrorName(status
));
5891 /* check collation results - before rule applied and after script reordering */
5892 collResult
= ucol_strcoll(myCollation
, base
, baseLen
, before
, beforeLen
);
5893 if (collResult
!= UCOL_GREATER
) {
5894 log_err("Collation result not correct after script reordering = %d\n", collResult
);
5897 /* check the lead byte of the collation keys after script reordering */
5898 ucol_getSortKey(myCollation
, base
, baseLen
, baseKey
, 256);
5899 ucol_getSortKey(myCollation
, before
, beforeLen
, beforeKey
, 256);
5900 if (baseKey
[0] != beforeKey
[0]) {
5901 log_err("Different lead byte for sort keys using before fule and after script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey
[0], beforeKey
[0]);
5904 ucol_close(myCollation
);
5908 * Test that in a primary-compressed sort key all bytes except the first one are unchanged under script reordering.
5910 static void TestNonLeadBytesDuringCollationReordering(void)
5912 UErrorCode status
= U_ZERO_ERROR
;
5913 UCollator
*myCollation
;
5914 int32_t reorderCodes
[1] = {USCRIPT_GREEK
};
5915 UCollationResult collResult
;
5917 uint8_t baseKey
[256];
5918 uint32_t baseKeyLength
;
5919 uint8_t reorderKey
[256];
5920 uint32_t reorderKeyLength
;
5922 UChar testString
[] = { 0x03b1, 0x03b2, 0x03b3 };
5927 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
5929 /* build collator tertiary */
5930 myCollation
= ucol_open("", &status
);
5931 ucol_setStrength(myCollation
, UCOL_TERTIARY
);
5932 if(U_FAILURE(status
)) {
5933 log_err_status(status
, "ERROR: in creation of collator: %s\n", myErrorName(status
));
5936 baseKeyLength
= ucol_getSortKey(myCollation
, testString
, LEN(testString
), baseKey
, 256);
5938 ucol_setReorderCodes(myCollation
, reorderCodes
, LEN(reorderCodes
), &status
);
5939 if(U_FAILURE(status
)) {
5940 log_err_status(status
, "ERROR: setting reorder codes: %s\n", myErrorName(status
));
5943 reorderKeyLength
= ucol_getSortKey(myCollation
, testString
, LEN(testString
), reorderKey
, 256);
5945 if (baseKeyLength
!= reorderKeyLength
) {
5946 log_err("Key lengths not the same during reordering.\n", collResult
);
5950 for (i
= 1; i
< baseKeyLength
; i
++) {
5951 if (baseKey
[i
] != reorderKey
[i
]) {
5952 log_err("Collation key bytes not the same at position %d.\n", i
);
5956 ucol_close(myCollation
);
5958 /* build collator quaternary */
5959 myCollation
= ucol_open("", &status
);
5960 ucol_setStrength(myCollation
, UCOL_QUATERNARY
);
5961 if(U_FAILURE(status
)) {
5962 log_err_status(status
, "ERROR: in creation of collator: %s\n", myErrorName(status
));
5965 baseKeyLength
= ucol_getSortKey(myCollation
, testString
, LEN(testString
), baseKey
, 256);
5967 ucol_setReorderCodes(myCollation
, reorderCodes
, LEN(reorderCodes
), &status
);
5968 if(U_FAILURE(status
)) {
5969 log_err_status(status
, "ERROR: setting reorder codes: %s\n", myErrorName(status
));
5972 reorderKeyLength
= ucol_getSortKey(myCollation
, testString
, LEN(testString
), reorderKey
, 256);
5974 if (baseKeyLength
!= reorderKeyLength
) {
5975 log_err("Key lengths not the same during reordering.\n", collResult
);
5979 for (i
= 1; i
< baseKeyLength
; i
++) {
5980 if (baseKey
[i
] != reorderKey
[i
]) {
5981 log_err("Collation key bytes not the same at position %d.\n", i
);
5985 ucol_close(myCollation
);
5989 * Test reordering API.
5991 static void TestReorderingAPI(void)
5993 UErrorCode status
= U_ZERO_ERROR
;
5994 UCollator
*myCollation
;
5995 int32_t reorderCodes
[3] = {USCRIPT_GREEK
, USCRIPT_HAN
, UCOL_REORDER_CODE_PUNCTUATION
};
5996 UCollationResult collResult
;
5997 int32_t retrievedReorderCodesLength
;
5998 UChar greekString
[] = { 0x03b1 };
5999 UChar punctuationString
[] = { 0x203e };
6001 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
6003 /* build collator tertiary */
6004 myCollation
= ucol_open("", &status
);
6005 ucol_setStrength(myCollation
, UCOL_TERTIARY
);
6006 if(U_FAILURE(status
)) {
6007 log_err_status(status
, "ERROR: in creation of collator: %s\n", myErrorName(status
));
6011 /* set the reorderding */
6012 ucol_setReorderCodes(myCollation
, reorderCodes
, LEN(reorderCodes
), &status
);
6013 if (U_FAILURE(status
)) {
6014 log_err_status(status
, "ERROR: setting reorder codes: %s\n", myErrorName(status
));
6018 retrievedReorderCodesLength
= ucol_getReorderCodes(myCollation
, NULL
, 0, &status
);
6019 if (status
!= U_BUFFER_OVERFLOW_ERROR
) {
6020 log_err_status(status
, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status
));
6023 status
= U_ZERO_ERROR
;
6024 if (retrievedReorderCodesLength
!= LEN(reorderCodes
)) {
6025 log_err_status(status
, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength
, LEN(reorderCodes
));
6028 collResult
= ucol_strcoll(myCollation
, greekString
, LEN(greekString
), punctuationString
, LEN(punctuationString
));
6029 if (collResult
!= UCOL_LESS
) {
6030 log_err_status(status
, "ERROR: collation result should have been UCOL_LESS\n");
6034 /* clear the reordering */
6035 ucol_setReorderCodes(myCollation
, NULL
, 0, &status
);
6036 if (U_FAILURE(status
)) {
6037 log_err_status(status
, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status
));
6041 retrievedReorderCodesLength
= ucol_getReorderCodes(myCollation
, NULL
, 0, &status
);
6042 if (retrievedReorderCodesLength
!= 0) {
6043 log_err_status(status
, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength
, 0);
6047 collResult
= ucol_strcoll(myCollation
, greekString
, LEN(greekString
), punctuationString
, LEN(punctuationString
));
6048 if (collResult
!= UCOL_GREATER
) {
6049 log_err_status(status
, "ERROR: collation result should have been UCOL_GREATER\n");
6053 ucol_close(myCollation
);
6057 * Utility function to test one collation reordering test case.
6058 * @param testcases Array of test cases.
6059 * @param n_testcases Size of the array testcases.
6060 * @param str_rules Array of rules. These rules should be specifying the same rule in different formats.
6061 * @param n_rules Size of the array str_rules.
6063 static void doTestOneReorderingAPITestCase(const OneTestCase testCases
[], uint32_t testCasesLen
, const int32_t reorderTokens
[], int32_t reorderTokensLen
)
6066 UErrorCode status
= U_ZERO_ERROR
;
6067 UCollator
*myCollation
;
6069 for (testCaseNum
= 0; testCaseNum
< testCasesLen
; ++testCaseNum
) {
6070 myCollation
= ucol_open("", &status
);
6071 if (U_FAILURE(status
)) {
6072 log_err_status(status
, "ERROR: in creation of collator: %s\n", myErrorName(status
));
6075 ucol_setReorderCodes(myCollation
, reorderTokens
, reorderTokensLen
, &status
);
6076 if(U_FAILURE(status
)) {
6077 log_err_status(status
, "ERROR: while setting script order: %s\n", myErrorName(status
));
6081 for (testCaseNum
= 0; testCaseNum
< testCasesLen
; ++testCaseNum
) {
6083 testCases
[testCaseNum
].source
,
6084 testCases
[testCaseNum
].target
,
6085 testCases
[testCaseNum
].result
6088 ucol_close(myCollation
);
6092 static void TestGreekFirstReorder(void)
6094 const char* strRules
[] = {
6098 const int32_t apiRules
[] = {
6102 const static OneTestCase privateUseCharacterStrings
[] = {
6103 { {0x0391}, {0x0391}, UCOL_EQUAL
},
6104 { {0x0041}, {0x0391}, UCOL_GREATER
},
6105 { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_GREATER
},
6106 { {0x0060}, {0x0391}, UCOL_LESS
},
6107 { {0x0391}, {0xe2dc}, UCOL_LESS
},
6108 { {0x0391}, {0x0060}, UCOL_GREATER
},
6111 /* Test rules creation */
6112 doTestOneTestCase(privateUseCharacterStrings
, LEN(privateUseCharacterStrings
), strRules
, LEN(strRules
));
6114 /* Test collation reordering API */
6115 doTestOneReorderingAPITestCase(privateUseCharacterStrings
, LEN(privateUseCharacterStrings
), apiRules
, LEN(apiRules
));
6118 static void TestGreekLastReorder(void)
6120 const char* strRules
[] = {
6121 "[reorder Zzzz Grek]"
6124 const int32_t apiRules
[] = {
6125 USCRIPT_UNKNOWN
, USCRIPT_GREEK
6128 const static OneTestCase privateUseCharacterStrings
[] = {
6129 { {0x0391}, {0x0391}, UCOL_EQUAL
},
6130 { {0x0041}, {0x0391}, UCOL_LESS
},
6131 { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_LESS
},
6132 { {0x0060}, {0x0391}, UCOL_LESS
},
6133 { {0x0391}, {0xe2dc}, UCOL_GREATER
},
6136 /* Test rules creation */
6137 doTestOneTestCase(privateUseCharacterStrings
, LEN(privateUseCharacterStrings
), strRules
, LEN(strRules
));
6139 /* Test collation reordering API */
6140 doTestOneReorderingAPITestCase(privateUseCharacterStrings
, LEN(privateUseCharacterStrings
), apiRules
, LEN(apiRules
));
6143 static void TestNonScriptReorder(void)
6145 const char* strRules
[] = {
6146 "[reorder Grek Symbol DIGIT Latn Punct space Zzzz cURRENCy]"
6149 const int32_t apiRules
[] = {
6150 USCRIPT_GREEK
, UCOL_REORDER_CODE_SYMBOL
, UCOL_REORDER_CODE_DIGIT
, USCRIPT_LATIN
,
6151 UCOL_REORDER_CODE_PUNCTUATION
, UCOL_REORDER_CODE_SPACE
, USCRIPT_UNKNOWN
,
6152 UCOL_REORDER_CODE_CURRENCY
6155 const static OneTestCase privateUseCharacterStrings
[] = {
6156 { {0x0391}, {0x0041}, UCOL_LESS
},
6157 { {0x0041}, {0x0391}, UCOL_GREATER
},
6158 { {0x0060}, {0x0041}, UCOL_LESS
},
6159 { {0x0060}, {0x0391}, UCOL_GREATER
},
6160 { {0x0024}, {0x0041}, UCOL_GREATER
},
6163 /* Test rules creation */
6164 doTestOneTestCase(privateUseCharacterStrings
, LEN(privateUseCharacterStrings
), strRules
, LEN(strRules
));
6166 /* Test collation reordering API */
6167 doTestOneReorderingAPITestCase(privateUseCharacterStrings
, LEN(privateUseCharacterStrings
), apiRules
, LEN(apiRules
));
6170 static void TestHaniReorder(void)
6172 const char* strRules
[] = {
6175 const int32_t apiRules
[] = {
6179 const static OneTestCase privateUseCharacterStrings
[] = {
6180 { {0x4e00}, {0x0041}, UCOL_LESS
},
6181 { {0x4e00}, {0x0060}, UCOL_GREATER
},
6182 { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS
},
6183 { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER
},
6184 { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS
},
6185 { {0xfa27}, {0x0041}, UCOL_LESS
},
6186 { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS
},
6189 /* Test rules creation */
6190 doTestOneTestCase(privateUseCharacterStrings
, LEN(privateUseCharacterStrings
), strRules
, LEN(strRules
));
6192 /* Test collation reordering API */
6193 doTestOneReorderingAPITestCase(privateUseCharacterStrings
, LEN(privateUseCharacterStrings
), apiRules
, LEN(apiRules
));
6196 static void TestMultipleReorder()
6198 const char* strRules
[] = {
6199 "[reorder Grek Zzzz DIGIT Latn Hani]"
6202 const int32_t apiRules
[] = {
6203 USCRIPT_GREEK
, USCRIPT_UNKNOWN
, UCOL_REORDER_CODE_DIGIT
, USCRIPT_LATIN
, USCRIPT_HAN
6206 const static OneTestCase collationTestCases
[] = {
6207 { {0x0391}, {0x0041}, UCOL_LESS
},
6208 { {0x0031}, {0x0041}, UCOL_LESS
},
6209 { {0x0041}, {0x4e00}, UCOL_LESS
},
6212 /* Test rules creation */
6213 doTestOneTestCase(collationTestCases
, LEN(collationTestCases
), strRules
, LEN(strRules
));
6215 /* Test collation reordering API */
6216 doTestOneReorderingAPITestCase(collationTestCases
, LEN(collationTestCases
), apiRules
, LEN(apiRules
));
6219 static int compare_uint8_t_arrays(const uint8_t* a
, const uint8_t* b
)
6221 for (; *a
== *b
; ++a
, ++b
) {
6226 return (*a
< *b
? -1 : 1);
6229 static void TestImport(void)
6233 UCollator
* viescoll
;
6234 UCollator
* importviescoll
;
6236 UErrorCode status
= U_ZERO_ERROR
;
6238 int32_t viruleslength
;
6240 int32_t esruleslength
;
6242 int32_t viesruleslength
;
6243 char srules
[500] = "[import vi][import es]";
6245 uint32_t length
= 0;
6258 USet
* importTailoredSet
;
6261 vicoll
= ucol_open("vi", &status
);
6262 if(U_FAILURE(status
)){
6263 log_err_status(status
, "ERROR: Call ucol_open(\"vi\", ...): %s\n", myErrorName(status
));
6267 virules
= (UChar
*) ucol_getRules(vicoll
, &viruleslength
);
6268 escoll
= ucol_open("es", &status
);
6269 esrules
= (UChar
*) ucol_getRules(escoll
, &esruleslength
);
6270 viesrules
= (UChar
*)uprv_malloc((viruleslength
+esruleslength
+1)*sizeof(UChar
*));
6272 u_strcat(viesrules
, virules
);
6273 u_strcat(viesrules
, esrules
);
6274 viesruleslength
= viruleslength
+ esruleslength
;
6275 viescoll
= ucol_openRules(viesrules
, viesruleslength
, UCOL_ON
, UCOL_TERTIARY
, &error
, &status
);
6277 /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
6278 length
= u_unescape(srules
, rules
, 500);
6279 importviescoll
= ucol_openRules(rules
, length
, UCOL_ON
, UCOL_TERTIARY
, &error
, &status
);
6280 if(U_FAILURE(status
)){
6281 log_err_status(status
, "ERROR: in creation of rule based collator: %s\n", myErrorName(status
));
6285 tailoredSet
= ucol_getTailoredSet(viescoll
, &status
);
6286 importTailoredSet
= ucol_getTailoredSet(importviescoll
, &status
);
6288 if(!uset_equals(tailoredSet
, importTailoredSet
)){
6289 log_err("Tailored sets not equal");
6292 uset_close(importTailoredSet
);
6294 itemCount
= uset_getItemCount(tailoredSet
);
6296 for( i
= 0; i
< itemCount
; i
++){
6297 strLength
= uset_getItem(tailoredSet
, i
, &start
, &end
, str
, 500, &status
);
6299 for (; start
<= end
; start
++){
6301 U16_APPEND(str
, k
, 500, start
, b
);
6302 ucol_getSortKey(viescoll
, str
, 1, sk1
, 500);
6303 ucol_getSortKey(importviescoll
, str
, 1, sk2
, 500);
6304 if(compare_uint8_t_arrays(sk1
, sk2
) != 0){
6305 log_err("Sort key for %s not equal\n", str
);
6310 ucol_getSortKey(viescoll
, str
, strLength
, sk1
, 500);
6311 ucol_getSortKey(importviescoll
, str
, strLength
, sk2
, 500);
6312 if(compare_uint8_t_arrays(sk1
, sk2
) != 0){
6313 log_err("ZZSort key for %s not equal\n", str
);
6320 uset_close(tailoredSet
);
6322 uprv_free(viesrules
);
6326 ucol_close(viescoll
);
6327 ucol_close(importviescoll
);
6330 static void TestImportWithType(void)
6334 UCollator
* videcoll
;
6335 UCollator
* importvidecoll
;
6337 UErrorCode status
= U_ZERO_ERROR
;
6338 const UChar
* virules
;
6339 int32_t viruleslength
;
6340 const UChar
* derules
;
6341 int32_t deruleslength
;
6343 int32_t videruleslength
;
6344 const char srules
[500] = "[import vi][import de-u-co-phonebk]";
6346 uint32_t length
= 0;
6358 USet
* importTailoredSet
;
6360 vicoll
= ucol_open("vi", &status
);
6361 if(U_FAILURE(status
)){
6362 log_err_status(status
, "ERROR: in creation of rule based collator: %s\n", myErrorName(status
));
6365 virules
= ucol_getRules(vicoll
, &viruleslength
);
6366 /* decoll = ucol_open("de@collation=phonebook", &status); */
6367 decoll
= ucol_open("de-u-co-phonebk", &status
);
6368 if(U_FAILURE(status
)){
6369 log_err_status(status
, "ERROR: in creation of rule based collator: %s\n", myErrorName(status
));
6374 derules
= ucol_getRules(decoll
, &deruleslength
);
6375 viderules
= (UChar
*)uprv_malloc((viruleslength
+deruleslength
+1)*sizeof(UChar
*));
6377 u_strcat(viderules
, virules
);
6378 u_strcat(viderules
, derules
);
6379 videruleslength
= viruleslength
+ deruleslength
;
6380 videcoll
= ucol_openRules(viderules
, videruleslength
, UCOL_ON
, UCOL_TERTIARY
, &error
, &status
);
6382 /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
6383 length
= u_unescape(srules
, rules
, 500);
6384 importvidecoll
= ucol_openRules(rules
, length
, UCOL_ON
, UCOL_TERTIARY
, &error
, &status
);
6385 if(U_FAILURE(status
)){
6386 log_err_status(status
, "ERROR: in creation of rule based collator: %s\n", myErrorName(status
));
6390 tailoredSet
= ucol_getTailoredSet(videcoll
, &status
);
6391 importTailoredSet
= ucol_getTailoredSet(importvidecoll
, &status
);
6393 if(!uset_equals(tailoredSet
, importTailoredSet
)){
6394 log_err("Tailored sets not equal");
6397 uset_close(importTailoredSet
);
6399 itemCount
= uset_getItemCount(tailoredSet
);
6401 for( i
= 0; i
< itemCount
; i
++){
6402 strLength
= uset_getItem(tailoredSet
, i
, &start
, &end
, str
, 500, &status
);
6404 for (; start
<= end
; start
++){
6406 U16_APPEND_UNSAFE(str
, k
, start
);
6407 ucol_getSortKey(videcoll
, str
, 1, sk1
, 500);
6408 ucol_getSortKey(importvidecoll
, str
, 1, sk2
, 500);
6409 if(compare_uint8_t_arrays(sk1
, sk2
) != 0){
6410 log_err("Sort key for %s not equal\n", str
);
6415 ucol_getSortKey(videcoll
, str
, strLength
, sk1
, 500);
6416 ucol_getSortKey(importvidecoll
, str
, strLength
, sk2
, 500);
6417 if(compare_uint8_t_arrays(sk1
, sk2
) != 0){
6418 log_err("Sort key for %s not equal\n", str
);
6425 uset_close(tailoredSet
);
6427 uprv_free(viderules
);
6429 ucol_close(videcoll
);
6430 ucol_close(importvidecoll
);
6437 #define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x)
6439 void addMiscCollTest(TestNode
** root
)
6441 TEST(TestRuleOptions
);
6442 TEST(TestBeforePrefixFailure
);
6443 TEST(TestContractionClosure
);
6444 TEST(TestPrefixCompose
);
6445 TEST(TestStrCollIdenticalPrefix
);
6447 TEST(TestNewJapanese
);
6448 /*TEST(TestLimitations);*/
6450 TEST(TestExtremeCompression
);
6451 TEST(TestSurrogates
);
6452 TEST(TestVariableTopSetting
);
6453 TEST(TestBocsuCoverage
);
6454 TEST(TestCyrillicTailoring
);
6456 TEST(IncompleteCntTest
);
6457 TEST(BlackBirdTest
);
6459 TEST(BillFairmanTest
);
6460 TEST(RamsRulesTest
);
6461 TEST(IsTailoredTest
);
6462 TEST(TestCollations
);
6464 TEST(TestImplicitTailoring
);
6465 TEST(TestFCDProblem
);
6466 TEST(TestEmptyRule
);
6467 /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */
6469 /*TEST(TestJ831);*/ /* we changed lv locale */
6471 TEST(TestRedundantRules
);
6472 TEST(TestExpansionSyntax
);
6473 TEST(TestHangulTailoring
);
6475 TEST(TestIncrementalNormalize
);
6476 TEST(TestComposeDecompose
);
6477 TEST(TestCompressOverlap
);
6478 TEST(TestContraction
);
6479 TEST(TestExpansion
);
6480 /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys */
6481 /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported */
6483 TEST(TestSuppressContractions
);
6485 TEST(TestHebrewUCA
);
6486 TEST(TestPartialSortKeyTermination
);
6491 TEST(TestNumericCollation
);
6492 TEST(TestTibetanConformance
);
6493 TEST(TestPinyinProblem
);
6494 TEST(TestImplicitGeneration
);
6495 TEST(TestSeparateTrees
);
6496 TEST(TestBeforePinyin
);
6497 TEST(TestBeforeTightening
);
6498 /*TEST(TestMoreBefore);*/
6499 TEST(TestTailorNULL
);
6500 TEST(TestUpperFirstQuaternary
);
6506 TEST(TestSortKeyConsistency
);
6507 TEST(TestVI5913
); /* VI, RO tailored rules */
6508 TEST(TestCroatianSortKey
);
6509 TEST(TestTailor6179
);
6510 TEST(TestUCAPrecontext
);
6511 TEST(TestOutOfBuffer5468
);
6512 TEST(TestSameStrengthList
);
6514 TEST(TestSameStrengthListQuoted
);
6515 TEST(TestSameStrengthListSupplemental
);
6516 TEST(TestSameStrengthListQwerty
);
6517 TEST(TestSameStrengthListQuotedQwerty
);
6518 TEST(TestSameStrengthListRanges
);
6519 TEST(TestSameStrengthListSupplementalRanges
);
6520 TEST(TestSpecialCharacters
);
6521 TEST(TestPrivateUseCharacters
);
6522 TEST(TestPrivateUseCharactersInList
);
6523 TEST(TestPrivateUseCharactersInRange
);
6524 TEST(TestInvalidListsAndRanges
);
6526 TEST(TestImportWithType
);
6528 TEST(TestBeforeRuleWithScriptReordering
);
6529 TEST(TestNonLeadBytesDuringCollationReordering
);
6530 TEST(TestReorderingAPI
);
6531 TEST(TestGreekFirstReorder
);
6532 TEST(TestGreekLastReorder
);
6533 TEST(TestNonScriptReorder
);
6534 TEST(TestHaniReorder
);
6535 TEST(TestMultipleReorder
);
6538 #endif /* #if !UCONFIG_NO_COLLATION */