1 /********************************************************************
3 * Copyright (c) 2001-2006, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /*******************************************************************************
10 *******************************************************************************/
12 * These are the tests specific to ICU 1.8 and above, that I didn't know where
18 #include "unicode/utypes.h"
20 #if !UCONFIG_NO_COLLATION
22 #include "unicode/ucol.h"
23 #include "unicode/ucoleitr.h"
24 #include "unicode/uloc.h"
28 #include "unicode/ustring.h"
35 #include "unicode/parseerr.h"
36 #include "unicode/ucnv.h"
39 #define LEN(a) (sizeof(a)/sizeof(a[0]))
41 #define MAX_TOKEN_LEN 16
43 typedef int tst_strcoll(void *collator
, const int object
,
44 const UChar
*source
, const int sLen
,
45 const UChar
*target
, const int tLen
);
49 const static char cnt1
[][10] = {
64 const static char cnt2
[][10] = {
76 static void IncompleteCntTest(void)
78 UErrorCode status
= U_ZERO_ERROR
;
83 UCollator
*coll
= NULL
;
84 uint32_t i
= 0, j
= 0;
87 u_uastrcpy(temp
, " & Z < ABC < Q < B");
89 coll
= ucol_openRules(temp
, u_strlen(temp
), UCOL_OFF
, UCOL_DEFAULT_STRENGTH
, NULL
,&status
);
91 if(U_SUCCESS(status
)) {
92 size
= sizeof(cnt1
)/sizeof(cnt1
[0]);
93 for(i
= 0; i
< size
-1; i
++) {
94 for(j
= i
+1; j
< size
; j
++) {
95 UCollationElements
*iter
;
96 u_uastrcpy(t1
, cnt1
[i
]);
97 u_uastrcpy(t2
, cnt1
[j
]);
98 doTest(coll
, t1
, t2
, UCOL_LESS
);
99 /* synwee : added collation element iterator test */
100 iter
= ucol_openElements(coll
, t2
, u_strlen(t2
), &status
);
101 if (U_FAILURE(status
)) {
102 log_err("Creation of iterator failed\n");
106 ucol_closeElements(iter
);
114 u_uastrcpy(temp
, " & Z < DAVIS < MARK <DAV");
115 coll
= ucol_openRules(temp
, u_strlen(temp
), UCOL_OFF
, UCOL_DEFAULT_STRENGTH
,NULL
, &status
);
117 if(U_SUCCESS(status
)) {
118 size
= sizeof(cnt2
)/sizeof(cnt2
[0]);
119 for(i
= 0; i
< size
-1; i
++) {
120 for(j
= i
+1; j
< size
; j
++) {
121 UCollationElements
*iter
;
122 u_uastrcpy(t1
, cnt2
[i
]);
123 u_uastrcpy(t2
, cnt2
[j
]);
124 doTest(coll
, t1
, t2
, UCOL_LESS
);
126 /* synwee : added collation element iterator test */
127 iter
= ucol_openElements(coll
, t2
, u_strlen(t2
), &status
);
128 if (U_FAILURE(status
)) {
129 log_err("Creation of iterator failed\n");
133 ucol_closeElements(iter
);
143 const static char shifted
[][20] = {
155 const static UCollationResult shiftedTert
[] = {
167 const static char nonignorable
[][20] = {
179 static void BlackBirdTest(void) {
180 UErrorCode status
= U_ZERO_ERROR
;
184 uint32_t i
= 0, j
= 0;
186 UCollator
*coll
= ucol_open("en_US", &status
);
188 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_OFF
, &status
);
189 ucol_setAttribute(coll
, UCOL_ALTERNATE_HANDLING
, UCOL_NON_IGNORABLE
, &status
);
191 if(U_SUCCESS(status
)) {
192 size
= sizeof(nonignorable
)/sizeof(nonignorable
[0]);
193 for(i
= 0; i
< size
-1; i
++) {
194 for(j
= i
+1; j
< size
; j
++) {
195 u_uastrcpy(t1
, nonignorable
[i
]);
196 u_uastrcpy(t2
, nonignorable
[j
]);
197 doTest(coll
, t1
, t2
, UCOL_LESS
);
202 ucol_setAttribute(coll
, UCOL_ALTERNATE_HANDLING
, UCOL_SHIFTED
, &status
);
203 ucol_setAttribute(coll
, UCOL_STRENGTH
, UCOL_QUATERNARY
, &status
);
205 if(U_SUCCESS(status
)) {
206 size
= sizeof(shifted
)/sizeof(shifted
[0]);
207 for(i
= 0; i
< size
-1; i
++) {
208 for(j
= i
+1; j
< size
; j
++) {
209 u_uastrcpy(t1
, shifted
[i
]);
210 u_uastrcpy(t2
, shifted
[j
]);
211 doTest(coll
, t1
, t2
, UCOL_LESS
);
216 ucol_setAttribute(coll
, UCOL_STRENGTH
, UCOL_TERTIARY
, &status
);
217 if(U_SUCCESS(status
)) {
218 size
= sizeof(shifted
)/sizeof(shifted
[0]);
219 for(i
= 1; i
< size
; i
++) {
220 u_uastrcpy(t1
, shifted
[i
-1]);
221 u_uastrcpy(t2
, shifted
[i
]);
222 doTest(coll
, t1
, t2
, shiftedTert
[i
]);
229 const static UChar testSourceCases
[][MAX_TOKEN_LEN
] = {
230 {0x0041/*'A'*/, 0x0300, 0x0301, 0x0000},
231 {0x0041/*'A'*/, 0x0300, 0x0316, 0x0000},
232 {0x0041/*'A'*/, 0x0300, 0x0000},
233 {0x00C0, 0x0301, 0x0000},
234 /* this would work with forced normalization */
235 {0x00C0, 0x0316, 0x0000}
238 const static UChar testTargetCases
[][MAX_TOKEN_LEN
] = {
239 {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
240 {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000},
242 {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
243 /* this would work with forced normalization */
244 {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000}
247 const static UCollationResult results
[] = {
255 static void FunkyATest(void)
259 UErrorCode status
= U_ZERO_ERROR
;
260 UCollator
*myCollation
;
261 myCollation
= ucol_open("en_US", &status
);
262 if(U_FAILURE(status
)){
263 log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status
));
266 log_verbose("Testing some A letters, for some reason\n");
267 ucol_setAttribute(myCollation
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
268 ucol_setStrength(myCollation
, UCOL_TERTIARY
);
269 for (i
= 0; i
< 4 ; i
++)
271 doTest(myCollation
, testSourceCases
[i
], testTargetCases
[i
], results
[i
]);
273 ucol_close(myCollation
);
276 UColAttributeValue caseFirst
[] = {
283 UColAttributeValue alternateHandling
[] = {
288 UColAttributeValue caseLevel
[] = {
293 UColAttributeValue strengths
[] = {
302 static const char * strengthsC
[] = {
310 static const char * caseFirstC
[] = {
317 static const char * alternateHandlingC
[] = {
318 "UCOL_NON_IGNORABLE",
322 static const char * caseLevelC
[] = {
327 /* not used currently - does not test only prints */
328 static void PrintMarkDavis(void)
330 UErrorCode status
= U_ZERO_ERROR
;
332 uint8_t sortkey
[256];
333 UCollator
*coll
= ucol_open("en_US", &status
);
334 uint32_t h
,i
,j
,k
, sortkeysize
;
339 log_verbose("PrintMarkDavis");
341 u_uastrcpy(m
, "Mark Davis");
347 for(i
= 0; i
<sizem
; i
++) {
348 fprintf(stderr
, "\\u%04X ", m
[i
]);
350 fprintf(stderr
, "\n");
352 for(h
= 0; h
<sizeof(caseFirst
)/sizeof(caseFirst
[0]); h
++) {
353 ucol_setAttribute(coll
, UCOL_CASE_FIRST
, caseFirst
[i
], &status
);
354 fprintf(stderr
, "caseFirst: %s\n", caseFirstC
[h
]);
356 for(i
= 0; i
<sizeof(alternateHandling
)/sizeof(alternateHandling
[0]); i
++) {
357 ucol_setAttribute(coll
, UCOL_ALTERNATE_HANDLING
, alternateHandling
[i
], &status
);
358 fprintf(stderr
, " AltHandling: %s\n", alternateHandlingC
[i
]);
360 for(j
= 0; j
<sizeof(caseLevel
)/sizeof(caseLevel
[0]); j
++) {
361 ucol_setAttribute(coll
, UCOL_CASE_LEVEL
, caseLevel
[j
], &status
);
362 fprintf(stderr
, " caseLevel: %s\n", caseLevelC
[j
]);
364 for(k
= 0; k
<sizeof(strengths
)/sizeof(strengths
[0]); k
++) {
365 ucol_setAttribute(coll
, UCOL_STRENGTH
, strengths
[k
], &status
);
366 sortkeysize
= ucol_getSortKey(coll
, m
, sizem
, sortkey
, 256);
367 fprintf(stderr
, " strength: %s\n Sortkey: ", strengthsC
[k
]);
368 fprintf(stderr
, "%s\n", ucol_sortKeyToString(coll
, sortkey
, buffer
, &len
));
379 static void BillFairmanTest(void) {
381 ** check for actual locale via ICU resource bundles
383 ** lp points to the original locale ("fr_FR_....")
386 UResourceBundle
*lr
,*cr
;
387 UErrorCode lec
= U_ZERO_ERROR
;
388 const char *lp
= "fr_FR_you_ll_never_find_this_locale";
390 log_verbose("BillFairmanTest\n");
392 lr
= ures_open(NULL
,lp
,&lec
);
394 cr
= ures_getByKey(lr
,"collations",0,&lec
);
396 lp
= ures_getLocale(cr
,&lec
);
398 if (U_SUCCESS(lec
)) {
399 if(strcmp(lp
, "fr") != 0) {
400 log_err("Wrong locale for French Collation Data, expected \"fr\" got %s", lp
);
410 static void testPrimary(UCollator
* col
, const UChar
* p
,const UChar
* q
){
411 UChar source
[256] = { '\0'};
412 UChar target
[256] = { '\0'};
416 UChar preP = (*p>0x0400 && *p<0x0500)?0x00e1:0x491;
417 UChar preQ = (*p>0x0400 && *p<0x0500)?0x0041:0x413;
419 /*log_verbose("Testing primary\n");*/
421 doTest(col
, p
, q
, UCOL_LESS
);
423 UCollationResult result = ucol_strcoll(col,p,u_strlen(p),q,u_strlen(q));
425 if(result!=UCOL_LESS){
426 aescstrdup(p,utfSource,256);
427 aescstrdup(q,utfTarget,256);
428 fprintf(file,"Primary failed source: %s target: %s \n", utfSource,utfTarget);
432 u_strcpy(source
+1,p
);
434 u_strcpy(target
+1,q
);
435 doTest(col
, source
, target
, UCOL_LESS
);
437 fprintf(file,"Primary swamps 2nd failed source: %s target: %s \n", utfSource,utfTarget);
441 static void testSecondary(UCollator
* col
, const UChar
* p
,const UChar
* q
){
442 UChar source
[256] = { '\0'};
443 UChar target
[256] = { '\0'};
445 /*log_verbose("Testing secondary\n");*/
447 doTest(col
, p
, q
, UCOL_LESS
);
449 fprintf(file,"secondary failed source: %s target: %s \n", utfSource,utfTarget);
452 u_strcpy(source
+1,p
);
454 u_strcpy(target
+1,q
);
456 doTest(col
, source
, target
, UCOL_LESS
);
458 fprintf(file,"secondary swamps 3rd failed source: %s target: %s \n",utfSource,utfTarget);
463 source
[u_strlen(p
)] = 0x62;
464 source
[u_strlen(p
)+1] = 0;
468 target
[u_strlen(q
)] = 0x61;
469 target
[u_strlen(q
)+1] = 0;
471 doTest(col
, source
, target
, UCOL_GREATER
);
474 fprintf(file,"secondary is swamped by 1 failed source: %s target: %s \n",utfSource,utfTarget);
478 static void testTertiary(UCollator
* col
, const UChar
* p
,const UChar
* q
){
479 UChar source
[256] = { '\0'};
480 UChar target
[256] = { '\0'};
482 /*log_verbose("Testing tertiary\n");*/
484 doTest(col
, p
, q
, UCOL_LESS
);
486 fprintf(file,"Tertiary failed source: %s target: %s \n",utfSource,utfTarget);
489 u_strcpy(source
+1,p
);
491 u_strcpy(target
+1,q
);
493 doTest(col
, source
, target
, UCOL_LESS
);
495 fprintf(file,"Tertiary swamps 4th failed source: %s target: %s \n", utfSource,utfTarget);
499 source
[u_strlen(p
)] = 0xE0;
500 source
[u_strlen(p
)+1] = 0;
503 target
[u_strlen(q
)] = 0x61;
504 target
[u_strlen(q
)+1] = 0;
506 doTest(col
, source
, target
, UCOL_GREATER
);
509 fprintf(file,"Tertiary is swamped by 3rd failed source: %s target: %s \n",utfSource,utfTarget);
513 static void testEquality(UCollator
* col
, const UChar
* p
,const UChar
* q
){
515 UChar source[256] = { '\0'};
516 UChar target[256] = { '\0'};
519 doTest(col
, p
, q
, UCOL_EQUAL
);
521 fprintf(file,"Primary failed source: %s target: %s \n", utfSource,utfTarget);
525 static void testCollator(UCollator
*coll
, UErrorCode
*status
) {
526 const UChar
*rules
= NULL
, *current
= NULL
;
528 uint32_t strength
= 0;
529 uint32_t chOffset
= 0; uint32_t chLen
= 0;
530 uint32_t exOffset
= 0; uint32_t exLen
= 0;
531 uint32_t prefixOffset
= 0; uint32_t prefixLen
= 0;
532 uint32_t firstEx
= 0;
533 /* uint32_t rExpsLen = 0; */
534 uint32_t firstLen
= 0;
535 UBool varT
= FALSE
; UBool top_
= TRUE
;
537 UBool startOfRules
= TRUE
;
538 UBool lastReset
= FALSE
;
539 UBool before
= FALSE
;
540 uint32_t beforeStrength
= 0;
548 UChar
*rulesCopy
= NULL
;
549 UParseError parseError
;
552 rules
= ucol_getRules(coll
, &ruleLen
);
553 if(U_SUCCESS(*status
) && ruleLen
> 0) {
554 rulesCopy
= (UChar
*)malloc((ruleLen
+UCOL_TOK_EXTRA_RULE_SPACE_SIZE
)*sizeof(UChar
));
555 uprv_memcpy(rulesCopy
, rules
, ruleLen
*sizeof(UChar
));
556 src
.current
= src
.source
= rulesCopy
;
557 src
.end
= rulesCopy
+ruleLen
;
558 src
.extraCurrent
= src
.end
;
559 src
.extraEnd
= src
.end
+UCOL_TOK_EXTRA_RULE_SPACE_SIZE
;
560 *first
= *second
= 0;
562 while ((current
= ucol_tok_parseNextToken(&src
, startOfRules
,&parseError
, status
)) != NULL
) {
563 strength
= src
.parsedToken
.strength
;
564 chOffset
= src
.parsedToken
.charsOffset
;
565 chLen
= src
.parsedToken
.charsLen
;
566 exOffset
= src
.parsedToken
.extensionOffset
;
567 exLen
= src
.parsedToken
.extensionLen
;
568 prefixOffset
= src
.parsedToken
.prefixOffset
;
569 prefixLen
= src
.parsedToken
.prefixLen
;
570 specs
= src
.parsedToken
.flags
;
572 startOfRules
= FALSE
;
573 varT
= (UBool
)((specs
& UCOL_TOK_VARIABLE_TOP
) != 0);
574 top_
= (UBool
)((specs
& UCOL_TOK_TOP
) != 0);
575 if(top_
) { /* if reset is on top, the sequence is broken. We should have an empty string */
578 u_strncpy(second
,rulesCopy
+chOffset
, chLen
);
581 if(exLen
> 0 && firstEx
== 0) {
582 u_strncat(first
, rulesCopy
+exOffset
, exLen
);
583 first
[firstLen
+exLen
] = 0;
586 if(lastReset
== TRUE
&& prefixLen
!= 0) {
587 u_strncpy(first
+prefixLen
, first
, firstLen
);
588 u_strncpy(first
, rulesCopy
+prefixOffset
, prefixLen
);
589 first
[firstLen
+prefixLen
] = 0;
590 firstLen
= firstLen
+prefixLen
;
593 if(before
== TRUE
) { /* swap first and second */
594 u_strcpy(tempB
, first
);
595 u_strcpy(first
, second
);
596 u_strcpy(second
, tempB
);
605 if(beforeStrength
< strength
) {
606 strength
= beforeStrength
;
614 testEquality(coll
,first
,second
);
617 testPrimary(coll
,first
,second
);
620 testSecondary(coll
,first
,second
);
623 testTertiary(coll
,first
,second
);
627 before
= (UBool
)((specs
& UCOL_TOK_BEFORE
) != 0);
629 beforeStrength
= (specs
& UCOL_TOK_BEFORE
)-1;
636 if(before
== TRUE
&& strength
!= UCOL_TOK_RESET
) { /* first and second were swapped */
641 u_strcpy(first
, second
);
648 static int ucaTest(void *collator
, const int object
, const UChar
*source
, const int sLen
, const UChar
*target
, const int tLen
) {
649 UCollator
*UCA
= (UCollator
*)collator
;
650 return ucol_strcoll(UCA
, source
, sLen
, target
, tLen
);
654 static int winTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) {
656 LCID lcid = (LCID)collator;
657 return CompareString(lcid, 0, source, sLen, target, tLen);
664 static UCollationResult
swampEarlier(tst_strcoll
* func
, void *collator
, int opts
,
666 const UChar
*s
, const uint32_t sLen
,
667 const UChar
*t
, const uint32_t tLen
) {
668 UChar source
[256] = {0};
669 UChar target
[256] = {0};
672 u_strcpy(source
+1, s
);
674 u_strcpy(target
+1, t
);
676 return func(collator
, opts
, source
, sLen
+1, target
, tLen
+1);
679 static UCollationResult
swampLater(tst_strcoll
* func
, void *collator
, int opts
,
681 const UChar
*s
, const uint32_t sLen
,
682 const UChar
*t
, const uint32_t tLen
) {
683 UChar source
[256] = {0};
684 UChar target
[256] = {0};
691 return func(collator
, opts
, source
, sLen
+1, target
, tLen
+1);
694 static uint32_t probeStrength(tst_strcoll
* func
, void *collator
, int opts
,
695 const UChar
*s
, const uint32_t sLen
,
696 const UChar
*t
, const uint32_t tLen
,
697 UCollationResult result
) {
698 /*UChar fPrimary = 0x6d;*/
699 /*UChar sPrimary = 0x6e;*/
700 UChar fSecondary
= 0x310d;
701 UChar sSecondary
= 0x31a3;
702 UChar fTertiary
= 0x310f;
703 UChar sTertiary
= 0x31b7;
705 UCollationResult oposite
;
706 if(result
== UCOL_EQUAL
) {
707 return UCOL_IDENTICAL
;
708 } else if(result
== UCOL_GREATER
) {
711 oposite
= UCOL_GREATER
;
714 if(swampEarlier(func
, collator
, opts
, sSecondary
, fSecondary
, s
, sLen
, t
, tLen
) == result
) {
716 } else if((swampEarlier(func
, collator
, opts
, sTertiary
, 0x310f, s
, sLen
, t
, tLen
) == result
) &&
717 (swampEarlier(func
, collator
, opts
, 0x310f, sTertiary
, s
, sLen
, t
, tLen
) == result
)) {
718 return UCOL_SECONDARY
;
719 } else if((swampLater(func
, collator
, opts
, sTertiary
, fTertiary
, s
, sLen
, t
, tLen
) == result
) &&
720 (swampLater(func
, collator
, opts
, fTertiary
, sTertiary
, s
, sLen
, t
, tLen
) == result
)) {
721 return UCOL_TERTIARY
;
722 } else if((swampLater(func
, collator
, opts
, sTertiary
, 0x310f, s
, sLen
, t
, tLen
) == oposite
) &&
723 (swampLater(func
, collator
, opts
, fTertiary
, sTertiary
, s
, sLen
, t
, tLen
) == oposite
)) {
724 return UCOL_QUATERNARY
;
726 return UCOL_IDENTICAL
;
730 static char *getRelationSymbol(UCollationResult res
, uint32_t strength
, char *buffer
) {
733 if(res
== UCOL_EQUAL
|| strength
== 0xdeadbeef) {
737 } else if(res
== UCOL_GREATER
) {
738 for(i
= 0; i
<strength
+1; i
++) {
741 buffer
[strength
+1] = '\0';
743 for(i
= 0; i
<strength
+1; i
++) {
746 buffer
[strength
+1] = '\0';
754 static void logFailure (const char *platform
, const char *test
,
755 const UChar
*source
, const uint32_t sLen
,
756 const UChar
*target
, const uint32_t tLen
,
757 UCollationResult realRes
, uint32_t realStrength
,
758 UCollationResult expRes
, uint32_t expStrength
, UBool error
) {
762 char sEsc
[256], s
[256], tEsc
[256], t
[256], b
[256], output
[512], relation
[256];
763 static int32_t maxOutputLength
= 0;
764 int32_t outputLength
;
766 *sEsc
= *tEsc
= *s
= *t
= 0;
768 log_err("Difference between expected and generated order. Run test with -v for more info\n");
769 } else if(VERBOSITY
== 0) {
772 for(i
= 0; i
<sLen
; i
++) {
773 sprintf(b
, "%04X", source
[i
]);
778 if(source
[i
] < 0x80) {
779 sprintf(b
, "(%c)", source
[i
]);
783 for(i
= 0; i
<tLen
; i
++) {
784 sprintf(b
, "%04X", target
[i
]);
789 if(target
[i
] < 0x80) {
790 sprintf(b
, "(%c)", target
[i
]);
795 strcpy(output, "[[ ");
796 strcat(output, sEsc);
797 strcat(output, getRelationSymbol(expRes, expStrength, relation));
798 strcat(output, tEsc);
800 strcat(output, " : ");
802 strcat(output, sEsc);
803 strcat(output, getRelationSymbol(realRes, realStrength, relation));
804 strcat(output, tEsc);
805 strcat(output, " ]] ");
807 log_verbose("%s", output);
811 strcpy(output
, "DIFF: ");
814 strcat(output
, " : ");
817 strcat(output
, test
);
818 strcat(output
, ": ");
820 strcat(output
, sEsc
);
821 strcat(output
, getRelationSymbol(expRes
, expStrength
, relation
));
822 strcat(output
, tEsc
);
826 strcat(output
, platform
);
827 strcat(output
, ": ");
829 strcat(output
, sEsc
);
830 strcat(output
, getRelationSymbol(realRes
, realStrength
, relation
));
831 strcat(output
, tEsc
);
833 outputLength
= (int32_t)strlen(output
);
834 if(outputLength
> maxOutputLength
) {
835 maxOutputLength
= outputLength
;
836 U_ASSERT(outputLength
< sizeof(output
));
839 log_verbose("%s\n", output
);
844 static void printOutRules(const UChar *rules) {
845 uint32_t len = u_strlen(rules);
850 fprintf(stdout, "Rules:");
852 for(i = 0; i<len; i++) {
853 if(rules[i]<0x7f && rules[i]>=0x20) {
854 toPrint = (char)rules[i];
857 fprintf(stdout, "\n&");
858 } else if(toPrint == ';') {
859 fprintf(stdout, "<<");
861 } else if(toPrint == ',') {
862 fprintf(stdout, "<<<");
865 fprintf(stdout, "%c", toPrint);
868 } else if(rules[i]<0x3400 || rules[i]>=0xa000) {
869 fprintf(stdout, "\\u%04X", rules[i]);
873 fprintf(stdout, "\n");
883 static uint32_t testSwitch(tst_strcoll
* func
, void *collator
, int opts
, uint32_t strength
, const UChar
*first
, const UChar
*second
, const char* msg
, UBool error
) {
885 UCollationResult realResult
;
886 uint32_t realStrength
;
888 uint32_t sLen
= u_strlen(first
);
889 uint32_t tLen
= u_strlen(second
);
891 realResult
= func(collator
, opts
, first
, sLen
, second
, tLen
);
892 realStrength
= probeStrength(func
, collator
, opts
, first
, sLen
, second
, tLen
, realResult
);
894 if(strength
== UCOL_IDENTICAL
&& realResult
!= UCOL_IDENTICAL
) {
895 logFailure(msg
, "tailoring", first
, sLen
, second
, tLen
, realResult
, realStrength
, UCOL_EQUAL
, strength
, error
);
897 } else if(realResult
!= UCOL_LESS
|| realStrength
!= strength
) {
898 logFailure(msg
, "tailoring", first
, sLen
, second
, tLen
, realResult
, realStrength
, UCOL_LESS
, strength
, error
);
905 static void testAgainstUCA(UCollator
*coll
, UCollator
*UCA
, const char *refName
, UBool error
, UErrorCode
*status
) {
906 const UChar
*rules
= NULL
, *current
= NULL
;
908 uint32_t strength
= 0;
909 uint32_t chOffset
= 0; uint32_t chLen
= 0;
910 uint32_t exOffset
= 0; uint32_t exLen
= 0;
911 uint32_t prefixOffset
= 0; uint32_t prefixLen
= 0;
912 /* uint32_t rExpsLen = 0; */
913 uint32_t firstLen
= 0, secondLen
= 0;
914 UBool varT
= FALSE
; UBool top_
= TRUE
;
916 UBool startOfRules
= TRUE
;
922 UChar
*rulesCopy
= NULL
;
924 uint32_t UCAdiff
= 0;
925 uint32_t Windiff
= 1;
926 UParseError parseError
;
930 rules
= ucol_getRules(coll
, &ruleLen
);
932 /*printOutRules(rules);*/
934 if(U_SUCCESS(*status
) && ruleLen
> 0) {
935 rulesCopy
= (UChar
*)malloc((ruleLen
+UCOL_TOK_EXTRA_RULE_SPACE_SIZE
)*sizeof(UChar
));
936 uprv_memcpy(rulesCopy
, rules
, ruleLen
*sizeof(UChar
));
937 src
.current
= src
.source
= rulesCopy
;
938 src
.end
= rulesCopy
+ruleLen
;
939 src
.extraCurrent
= src
.end
;
940 src
.extraEnd
= src
.end
+UCOL_TOK_EXTRA_RULE_SPACE_SIZE
;
941 *first
= *second
= 0;
943 while ((current
= ucol_tok_parseNextToken(&src
, startOfRules
, &parseError
,status
)) != NULL
) {
944 strength
= src
.parsedToken
.strength
;
945 chOffset
= src
.parsedToken
.charsOffset
;
946 chLen
= src
.parsedToken
.charsLen
;
947 exOffset
= src
.parsedToken
.extensionOffset
;
948 exLen
= src
.parsedToken
.extensionLen
;
949 prefixOffset
= src
.parsedToken
.prefixOffset
;
950 prefixLen
= src
.parsedToken
.prefixLen
;
951 specs
= src
.parsedToken
.flags
;
953 startOfRules
= FALSE
;
954 varT
= (UBool
)((specs
& UCOL_TOK_VARIABLE_TOP
) != 0);
955 top_
= (UBool
)((specs
& UCOL_TOK_TOP
) != 0);
957 u_strncpy(second
,rulesCopy
+chOffset
, chLen
);
962 u_strncat(first
, rulesCopy
+exOffset
, exLen
);
963 first
[firstLen
+exLen
] = 0;
967 if(strength
!= UCOL_TOK_RESET
) {
968 if((*first
<0x3400 || *first
>=0xa000) && (*second
<0x3400 || *second
>=0xa000)) {
969 UCAdiff
+= testSwitch(&ucaTest
, (void *)UCA
, 0, strength
, first
, second
, refName
, error
);
970 /*Windiff += testSwitch(&winTest, (void *)lcid, 0, strength, first, second, "Win32");*/
976 u_strcpy(first
, second
);
979 if(UCAdiff
!= 0 && Windiff
!= 0) {
983 log_verbose("No immediate difference with %s!\n", refName
);
986 log_verbose("No immediate difference with Win32!\n");
993 * Takes two CEs (lead and continuation) and
994 * compares them as CEs should be compared:
995 * primary vs. primary, secondary vs. secondary
996 * tertiary vs. tertiary
998 static int32_t compareCEs(uint32_t s1
, uint32_t s2
,
999 uint32_t t1
, uint32_t t2
) {
1000 uint32_t s
= 0, t
= 0;
1001 if(s1
== t1
&& s2
== t2
) {
1004 s
= (s1
& 0xFFFF0000)|((s2
& 0xFFFF0000)>>16);
1005 t
= (t1
& 0xFFFF0000)|((t2
& 0xFFFF0000)>>16);
1011 s
= (s1
& 0x0000FF00) | (s2
& 0x0000FF00)>>8;
1012 t
= (t1
& 0x0000FF00) | (t2
& 0x0000FF00)>>8;
1018 s
= (s1
& 0x000000FF)<<8 | (s2
& 0x000000FF);
1019 t
= (t1
& 0x000000FF)<<8 | (t2
& 0x000000FF);
1031 uint32_t startContCE
;
1033 uint32_t limitContCE
;
1034 } indirectBoundaries
;
1036 /* these values are used for finding CE values for indirect positioning. */
1037 /* Indirect positioning is a mechanism for allowing resets on symbolic */
1038 /* values. It only works for resets and you cannot tailor indirect names */
1039 /* An indirect name can define either an anchor point or a range. An */
1040 /* anchor point behaves in exactly the same way as a code point in reset */
1041 /* would, except that it cannot be tailored. A range (we currently only */
1042 /* know for the [top] range will explicitly set the upper bound for */
1043 /* generated CEs, thus allowing for better control over how many CEs can */
1044 /* be squeezed between in the range without performance penalty. */
1045 /* In that respect, we use [top] for tailoring of locales that use CJK */
1046 /* characters. Other indirect values are currently a pure convenience, */
1047 /* they can be used to assure that the CEs will be always positioned in */
1048 /* the same place relative to a point with known properties (e.g. first */
1049 /* primary ignorable). */
1050 static indirectBoundaries ucolIndirectBoundaries
[15];
1051 static UBool indirectBoundariesSet
= FALSE
;
1052 static void setIndirectBoundaries(uint32_t indexR
, uint32_t *start
, uint32_t *end
) {
1054 /* Set values for the top - TODO: once we have values for all the indirects, we are going */
1055 /* to initalize here. */
1056 ucolIndirectBoundaries
[indexR
].startCE
= start
[0];
1057 ucolIndirectBoundaries
[indexR
].startContCE
= start
[1];
1059 ucolIndirectBoundaries
[indexR
].limitCE
= end
[0];
1060 ucolIndirectBoundaries
[indexR
].limitContCE
= end
[1];
1062 ucolIndirectBoundaries
[indexR
].limitCE
= 0;
1063 ucolIndirectBoundaries
[indexR
].limitContCE
= 0;
1067 static void testCEs(UCollator
*coll
, UErrorCode
*status
) {
1069 const UChar
*rules
= NULL
, *current
= NULL
;
1070 int32_t ruleLen
= 0;
1072 uint32_t strength
= 0;
1073 uint32_t maxStrength
= UCOL_IDENTICAL
;
1074 uint32_t baseCE
, baseContCE
, nextCE
, nextContCE
, currCE
, currContCE
;
1076 uint32_t lastContCE
;
1079 uint32_t chOffset
= 0; uint32_t chLen
= 0;
1080 uint32_t exOffset
= 0; uint32_t exLen
= 0;
1081 uint32_t prefixOffset
= 0; uint32_t prefixLen
= 0;
1082 uint32_t oldOffset
= 0;
1084 /* uint32_t rExpsLen = 0; */
1085 /* uint32_t firstLen = 0; */
1087 UBool varT
= FALSE
; UBool top_
= TRUE
;
1088 UBool startOfRules
= TRUE
;
1089 UBool before
= FALSE
;
1090 UColTokenParser src
;
1092 UParseError parseError
;
1093 UChar
*rulesCopy
= NULL
;
1095 UCollator
*UCA
= ucol_open("root", status
);
1096 UCAConstants
*consts
= (UCAConstants
*)((uint8_t *)UCA
->image
+ UCA
->image
->UCAConsts
);
1097 uint32_t UCOL_RESET_TOP_VALUE
= consts
->UCA_LAST_NON_VARIABLE
[0], /*UCOL_RESET_TOP_CONT = consts->UCA_LAST_NON_VARIABLE[1], */
1098 UCOL_NEXT_TOP_VALUE
= consts
->UCA_FIRST_IMPLICIT
[0], UCOL_NEXT_TOP_CONT
= consts
->UCA_FIRST_IMPLICIT
[1];
1100 baseCE
=baseContCE
=nextCE
=nextContCE
=currCE
=currContCE
=lastCE
=lastContCE
= UCOL_NOT_FOUND
;
1104 rules
= ucol_getRules(coll
, &ruleLen
);
1106 src
.invUCA
= ucol_initInverseUCA(status
);
1108 if(indirectBoundariesSet
== FALSE
) {
1109 /* UCOL_RESET_TOP_VALUE */
1110 setIndirectBoundaries(0, consts
->UCA_LAST_NON_VARIABLE
, consts
->UCA_FIRST_IMPLICIT
);
1111 /* UCOL_FIRST_PRIMARY_IGNORABLE */
1112 setIndirectBoundaries(1, consts
->UCA_FIRST_PRIMARY_IGNORABLE
, 0);
1113 /* UCOL_LAST_PRIMARY_IGNORABLE */
1114 setIndirectBoundaries(2, consts
->UCA_LAST_PRIMARY_IGNORABLE
, 0);
1115 /* UCOL_FIRST_SECONDARY_IGNORABLE */
1116 setIndirectBoundaries(3, consts
->UCA_FIRST_SECONDARY_IGNORABLE
, 0);
1117 /* UCOL_LAST_SECONDARY_IGNORABLE */
1118 setIndirectBoundaries(4, consts
->UCA_LAST_SECONDARY_IGNORABLE
, 0);
1119 /* UCOL_FIRST_TERTIARY_IGNORABLE */
1120 setIndirectBoundaries(5, consts
->UCA_FIRST_TERTIARY_IGNORABLE
, 0);
1121 /* UCOL_LAST_TERTIARY_IGNORABLE */
1122 setIndirectBoundaries(6, consts
->UCA_LAST_TERTIARY_IGNORABLE
, 0);
1123 /* UCOL_FIRST_VARIABLE */
1124 setIndirectBoundaries(7, consts
->UCA_FIRST_VARIABLE
, 0);
1125 /* UCOL_LAST_VARIABLE */
1126 setIndirectBoundaries(8, consts
->UCA_LAST_VARIABLE
, 0);
1127 /* UCOL_FIRST_NON_VARIABLE */
1128 setIndirectBoundaries(9, consts
->UCA_FIRST_NON_VARIABLE
, 0);
1129 /* UCOL_LAST_NON_VARIABLE */
1130 setIndirectBoundaries(10, consts
->UCA_LAST_NON_VARIABLE
, consts
->UCA_FIRST_IMPLICIT
);
1131 /* UCOL_FIRST_IMPLICIT */
1132 setIndirectBoundaries(11, consts
->UCA_FIRST_IMPLICIT
, 0);
1133 /* UCOL_LAST_IMPLICIT */
1134 setIndirectBoundaries(12, consts
->UCA_LAST_IMPLICIT
, consts
->UCA_FIRST_TRAILING
);
1135 /* UCOL_FIRST_TRAILING */
1136 setIndirectBoundaries(13, consts
->UCA_FIRST_TRAILING
, 0);
1137 /* UCOL_LAST_TRAILING */
1138 setIndirectBoundaries(14, consts
->UCA_LAST_TRAILING
, 0);
1139 ucolIndirectBoundaries
[14].limitCE
= (consts
->UCA_PRIMARY_SPECIAL_MIN
<<24);
1140 indirectBoundariesSet
= TRUE
;
1144 if(U_SUCCESS(*status
) && ruleLen
> 0) {
1145 rulesCopy
= (UChar
*)malloc((ruleLen
+UCOL_TOK_EXTRA_RULE_SPACE_SIZE
)*sizeof(UChar
));
1146 uprv_memcpy(rulesCopy
, rules
, ruleLen
*sizeof(UChar
));
1147 src
.current
= src
.source
= rulesCopy
;
1148 src
.end
= rulesCopy
+ruleLen
;
1149 src
.extraCurrent
= src
.end
;
1150 src
.extraEnd
= src
.end
+UCOL_TOK_EXTRA_RULE_SPACE_SIZE
;
1152 while ((current
= ucol_tok_parseNextToken(&src
, startOfRules
, &parseError
,status
)) != NULL
) {
1153 strength
= src
.parsedToken
.strength
;
1154 chOffset
= src
.parsedToken
.charsOffset
;
1155 chLen
= src
.parsedToken
.charsLen
;
1156 exOffset
= src
.parsedToken
.extensionOffset
;
1157 exLen
= src
.parsedToken
.extensionLen
;
1158 prefixOffset
= src
.parsedToken
.prefixOffset
;
1159 prefixLen
= src
.parsedToken
.prefixLen
;
1160 specs
= src
.parsedToken
.flags
;
1162 startOfRules
= FALSE
;
1163 varT
= (UBool
)((specs
& UCOL_TOK_VARIABLE_TOP
) != 0);
1164 top_
= (UBool
)((specs
& UCOL_TOK_TOP
) != 0);
1166 uprv_init_collIterate(coll
, rulesCopy
+chOffset
, chLen
, &c
);
1168 currCE
= ucol_getNextCE(coll
, &c
, status
);
1169 if(currCE
== 0 && UCOL_ISTHAIPREVOWEL(*(rulesCopy
+chOffset
))) {
1170 log_verbose("Thai prevowel detected. Will pick next CE\n");
1171 currCE
= ucol_getNextCE(coll
, &c
, status
);
1174 currContCE
= ucol_getNextCE(coll
, &c
, status
);
1175 if(!isContinuation(currContCE
)) {
1179 /* we need to repack CEs here */
1181 if(strength
== UCOL_TOK_RESET
) {
1182 before
= (UBool
)((specs
& UCOL_TOK_BEFORE
) != 0);
1184 int32_t index
= src
.parsedToken
.indirectIndex
;
1186 nextCE
= baseCE
= currCE
= ucolIndirectBoundaries
[index
].startCE
;
1187 nextContCE
= baseContCE
= currContCE
= ucolIndirectBoundaries
[index
].startContCE
;
1189 nextCE
= baseCE
= currCE
;
1190 nextContCE
= baseContCE
= currContCE
;
1192 maxStrength
= UCOL_IDENTICAL
;
1194 if(strength
< maxStrength
) {
1195 maxStrength
= strength
;
1196 if(baseCE
== UCOL_RESET_TOP_VALUE
) {
1197 log_verbose("Resetting to [top]\n");
1198 nextCE
= UCOL_NEXT_TOP_VALUE
;
1199 nextContCE
= UCOL_NEXT_TOP_CONT
;
1201 result
= ucol_inv_getNextCE(&src
, baseCE
& 0xFFFFFF3F, baseContCE
, &nextCE
, &nextContCE
, maxStrength
);
1204 if(ucol_isTailored(coll
, *(rulesCopy
+oldOffset
), status
)) {
1205 log_verbose("Reset is tailored codepoint %04X, don't know how to continue, taking next test\n", *(rulesCopy
+oldOffset
));
1208 log_err("couldn't find the CE\n");
1214 currCE
&= 0xFFFFFF3F;
1215 currContCE
&= 0xFFFFFFBF;
1217 if(maxStrength
== UCOL_IDENTICAL
) {
1218 if(baseCE
!= currCE
|| baseContCE
!= currContCE
) {
1219 log_err("current CE (initial strength UCOL_EQUAL)\n");
1222 if(strength
== UCOL_IDENTICAL
) {
1223 if(lastCE
!= currCE
|| lastContCE
!= currContCE
) {
1224 log_err("current CE (initial strength UCOL_EQUAL)\n");
1227 if(compareCEs(currCE
, currContCE
, nextCE
, nextContCE
) > 0) {
1228 /*if(currCE > nextCE || (currCE == nextCE && currContCE >= nextContCE)) {*/
1229 log_err("current CE is not less than base CE\n");
1232 if(compareCEs(currCE
, currContCE
, lastCE
, lastContCE
) < 0) {
1233 /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
1234 log_err("sequence of generated CEs is broken\n");
1238 if(compareCEs(currCE
, currContCE
, lastCE
, lastContCE
) > 0) {
1239 /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
1240 log_err("sequence of generated CEs is broken\n");
1248 oldOffset
= chOffset
;
1249 lastCE
= currCE
& 0xFFFFFF3F;
1250 lastContCE
= currContCE
& 0xFFFFFFBF;
1258 /* these locales are now picked from index RB */
1259 static const char* localesToTest
[] = {
1260 "ar", "bg", "ca", "cs", "da",
1261 "el", "en_BE", "en_US_POSIX",
1262 "es", "et", "fi", "fr", "hi",
1263 "hr", "hu", "is", "iw", "ja",
1264 "ko", "lt", "lv", "mk", "mt",
1265 "nb", "nn", "nn_NO", "pl", "ro",
1266 "ru", "sh", "sk", "sl", "sq",
1267 "sr", "sv", "th", "tr", "uk",
1272 static const char* rulesToTest
[] = {
1274 "&\\u0622 < \\u0627 << \\u0671 < \\u0621",
1276 /* Cui Mins rules */
1277 "&[top]<o,O<p,P<q,Q<'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu<'?'",*/
1278 "&[top]<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
1279 "&[top]<o,O<p,P<q,Q,'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U&'Qu','?'",*/
1280 "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
1281 "&[top]<'?';Qu<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qu",*/
1282 "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/um<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qum;'?'",*/
1283 "&[top]<'?';Qum<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U" /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qum"*/
1287 static void TestCollations(void) {
1288 int32_t noOfLoc
= uloc_countAvailable();
1289 int32_t i
= 0, j
= 0;
1291 UErrorCode status
= U_ZERO_ERROR
;
1297 const char *locName
= NULL
;
1298 UCollator
*coll
= NULL
;
1299 UCollator
*UCA
= ucol_open("", &status
);
1300 UColAttributeValue oldStrength
= ucol_getAttribute(UCA
, UCOL_STRENGTH
, &status
);
1301 ucol_setAttribute(UCA
, UCOL_STRENGTH
, UCOL_QUATERNARY
, &status
);
1303 for(i
= 0; i
<noOfLoc
; i
++) {
1304 status
= U_ZERO_ERROR
;
1305 locName
= uloc_getAvailable(i
);
1306 if(uprv_strcmp("ja", locName
) == 0) {
1307 log_verbose("Don't know how to test prefixes\n");
1310 if(hasCollationElements(locName
)) {
1311 nameSize
= uloc_getDisplayName(locName
, NULL
, name
, 256, &status
);
1312 for(j
= 0; j
<nameSize
; j
++) {
1313 cName
[j
] = (char)name
[j
];
1315 cName
[nameSize
] = 0;
1316 log_verbose("\nTesting locale %s (%s)\n", locName
, cName
);
1317 coll
= ucol_open(locName
, &status
);
1318 if(U_SUCCESS(status
)) {
1319 testAgainstUCA(coll
, UCA
, "UCA", FALSE
, &status
);
1322 log_err("Couldn't instantiate collator for locale %s, error: %s\n", locName
, u_errorName(status
));
1323 status
= U_ZERO_ERROR
;
1327 ucol_setAttribute(UCA
, UCOL_STRENGTH
, oldStrength
, &status
);
1331 static void RamsRulesTest(void) {
1332 UErrorCode status
= U_ZERO_ERROR
;
1334 UCollator
*coll
= NULL
;
1337 int32_t noOfLoc
= uloc_countAvailable();
1338 const char *locName
= NULL
;
1340 log_verbose("RamsRulesTest\n");
1342 for(i
= 0; i
<noOfLoc
; i
++) {
1343 status
= U_ZERO_ERROR
;
1344 locName
= uloc_getAvailable(i
);
1345 if(hasCollationElements(locName
)) {
1346 if (uprv_strcmp("ja", locName
)==0) {
1347 log_verbose("Don't know how to test Japanese because of prefixes\n");
1350 if (uprv_strcmp("de__PHONEBOOK", locName
)==0) {
1351 log_verbose("Don't know how to test Phonebook because the reset is on an expanding character\n");
1354 log_verbose("Testing locale %s\n", locName
);
1355 coll
= ucol_open(locName
, &status
);
1356 if(U_SUCCESS(status
)) {
1357 if(coll
->image
->jamoSpecial
== TRUE
) {
1358 log_err("%s has special JAMOs\n", locName
);
1360 ucol_setAttribute(coll
, UCOL_CASE_FIRST
, UCOL_OFF
, &status
);
1361 testCollator(coll
, &status
);
1362 testCEs(coll
, &status
);
1368 for(i
= 0; i
<sizeof(rulesToTest
)/sizeof(rulesToTest
[0]); i
++) {
1369 log_verbose("Testing rule: %s\n", rulesToTest
[i
]);
1370 ruleLen
= u_unescape(rulesToTest
[i
], rule
, 2048);
1371 coll
= ucol_openRules(rule
, ruleLen
, UCOL_OFF
, UCOL_TERTIARY
, NULL
,&status
);
1372 if(U_SUCCESS(status
)) {
1373 testCollator(coll
, &status
);
1374 testCEs(coll
, &status
);
1381 static void IsTailoredTest(void) {
1382 UErrorCode status
= U_ZERO_ERROR
;
1384 UCollator
*coll
= NULL
;
1386 UChar tailored
[2048];
1387 UChar notTailored
[2048];
1388 uint32_t ruleLen
, tailoredLen
, notTailoredLen
;
1390 log_verbose("IsTailoredTest\n");
1392 u_uastrcpy(rule
, "&Z < A, B, C;c < d");
1393 ruleLen
= u_strlen(rule
);
1395 u_uastrcpy(tailored
, "ABCcd");
1396 tailoredLen
= u_strlen(tailored
);
1398 u_uastrcpy(notTailored
, "ZabD");
1399 notTailoredLen
= u_strlen(notTailored
);
1401 coll
= ucol_openRules(rule
, ruleLen
, UCOL_OFF
, UCOL_TERTIARY
, NULL
,&status
);
1402 if(U_SUCCESS(status
)) {
1403 for(i
= 0; i
<tailoredLen
; i
++) {
1404 if(!ucol_isTailored(coll
, tailored
[i
], &status
)) {
1405 log_err("%i: %04X should be tailored - it is reported as not\n", i
, tailored
[i
]);
1408 for(i
= 0; i
<notTailoredLen
; i
++) {
1409 if(ucol_isTailored(coll
, notTailored
[i
], &status
)) {
1410 log_err("%i: %04X should not be tailored - it is reported as it is\n", i
, notTailored
[i
]);
1418 const static char chTest
[][20] = {
1421 "ca", "cb", "cx", "cy", "CZ",
1422 "c\\u030C", "C\\u030C",
1425 "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY",
1426 "ch", "cH", "Ch", "CH",
1427 "cha", "charly", "che", "chh", "chch", "chr",
1430 "r\\u030C", "R\\u030C",
1433 "s\\u030C", "S\\u030C",
1435 "z\\u030C", "Z\\u030C"
1438 static void TestChMove(void) {
1439 UChar t1
[256] = {0};
1440 UChar t2
[256] = {0};
1442 uint32_t i
= 0, j
= 0;
1444 UErrorCode status
= U_ZERO_ERROR
;
1446 UCollator
*coll
= ucol_open("cs", &status
);
1448 if(U_SUCCESS(status
)) {
1449 size
= sizeof(chTest
)/sizeof(chTest
[0]);
1450 for(i
= 0; i
< size
-1; i
++) {
1451 for(j
= i
+1; j
< size
; j
++) {
1452 u_unescape(chTest
[i
], t1
, 256);
1453 u_unescape(chTest
[j
], t2
, 256);
1454 doTest(coll
, t1
, t2
, UCOL_LESS
);
1459 log_err("Can't open collator");
1467 const static char impTest
[][20] = {
1477 static void TestImplicitTailoring(void) {
1480 const char *data
[50];
1483 { "&[before 1]\\u4e00 < b < c &[before 1]\\u4e00 < d < e", { "d", "e", "b", "c", "\\u4e00"}, 5 },
1484 { "&\\u4e00 < a <<< A < b <<< B", { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 },
1485 { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3},
1486 { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3}
1491 for(i
= 0; i
< sizeof(tests
)/sizeof(tests
[0]); i
++) {
1492 genericRulesStarter(tests
[i
].rules
, tests
[i
].data
, tests
[i
].len
);
1496 UChar t1[256] = {0};
1497 UChar t2[256] = {0};
1499 const char *rule = "&\\u4e00 < a <<< A < b <<< B";
1501 uint32_t i = 0, j = 0;
1503 uint32_t ruleLen = 0;
1504 UErrorCode status = U_ZERO_ERROR;
1505 UCollator *coll = NULL;
1506 ruleLen = u_unescape(rule, t1, 256);
1508 coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
1510 if(U_SUCCESS(status)) {
1511 size = sizeof(impTest)/sizeof(impTest[0]);
1512 for(i = 0; i < size-1; i++) {
1513 for(j = i+1; j < size; j++) {
1514 u_unescape(impTest[i], t1, 256);
1515 u_unescape(impTest[j], t2, 256);
1516 doTest(coll, t1, t2, UCOL_LESS);
1521 log_err("Can't open collator");
1527 static void TestFCDProblem(void) {
1528 UChar t1
[256] = {0};
1529 UChar t2
[256] = {0};
1531 const char *s1
= "\\u0430\\u0306\\u0325";
1532 const char *s2
= "\\u04D1\\u0325";
1534 UErrorCode status
= U_ZERO_ERROR
;
1535 UCollator
*coll
= ucol_open("", &status
);
1536 u_unescape(s1
, t1
, 256);
1537 u_unescape(s2
, t2
, 256);
1539 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_OFF
, &status
);
1540 doTest(coll
, t1
, t2
, UCOL_EQUAL
);
1542 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
1543 doTest(coll
, t1
, t2
, UCOL_EQUAL
);
1548 #define NORM_BUFFER_TEST_LEN 32
1551 UChar NFC
[NORM_BUFFER_TEST_LEN
];
1552 UChar NFD
[NORM_BUFFER_TEST_LEN
];
1555 static void TestComposeDecompose(void) {
1557 int32_t i
= 0, j
= 0;
1559 UErrorCode status
= U_ZERO_ERROR
;
1561 const char *locName
= NULL
;
1566 uint32_t noCases
= 0;
1567 UCollator
*coll
= NULL
;
1569 UChar comp
[NORM_BUFFER_TEST_LEN
];
1571 UCollationElements
*iter
;
1573 noOfLoc
= uloc_countAvailable();
1575 t
= malloc(0x30000 * sizeof(tester
*));
1576 t
[0] = (tester
*)malloc(sizeof(tester
));
1577 log_verbose("Testing UCA extensively\n");
1578 coll
= ucol_open("", &status
);
1579 if(status
== U_FILE_ACCESS_ERROR
) {
1580 log_data_err("Is your data around?\n");
1582 } else if(U_FAILURE(status
)) {
1583 log_err("Error opening collator\n");
1588 for(u
= 0; u
< 0x30000; u
++) {
1590 UTF_APPEND_CHAR_UNSAFE(comp
, len
, u
);
1591 nfcSize
= unorm_normalize(comp
, len
, UNORM_NFC
, 0, t
[noCases
]->NFC
, NORM_BUFFER_TEST_LEN
, &status
);
1592 nfdSize
= unorm_normalize(comp
, len
, UNORM_NFD
, 0, t
[noCases
]->NFD
, NORM_BUFFER_TEST_LEN
, &status
);
1594 if(nfcSize
!= nfdSize
|| (uprv_memcmp(t
[noCases
]->NFC
, t
[noCases
]->NFD
, nfcSize
* sizeof(UChar
)) != 0)
1595 || (len
!= nfdSize
|| (uprv_memcmp(comp
, t
[noCases
]->NFD
, nfdSize
* sizeof(UChar
)) != 0))) {
1597 if(len
!= nfdSize
|| (uprv_memcmp(comp
, t
[noCases
]->NFD
, nfdSize
* sizeof(UChar
)) != 0)) {
1598 u_strncpy(t
[noCases
]->NFC
, comp
, len
);
1599 t
[noCases
]->NFC
[len
] = 0;
1602 t
[noCases
] = (tester
*)malloc(sizeof(tester
));
1603 uprv_memset(t
[noCases
], 0, sizeof(tester
));
1607 for(u
=0; u
<(UChar32
)noCases
; u
++) {
1608 if(!ucol_equal(coll
, t
[u
]->NFC
, -1, t
[u
]->NFD
, -1)) {
1609 log_err("Failure: codePoint %05X fails TestComposeDecompose in the UCA\n", t
[u
]->u
);
1610 doTest(coll
, t
[u
]->NFC
, t
[u
]->NFD
, UCOL_EQUAL
);
1614 for(u = 0; u < 0x30000; u++) {
1616 log_verbose("%08X ", u);
1618 uprv_memset(t[noCases], 0, sizeof(tester));
1621 UTF_APPEND_CHAR_UNSAFE(comp, len, u);
1623 nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
1624 nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
1625 doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL);
1626 doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL);
1632 log_verbose("Testing locales, number of cases = %i\n", noCases
);
1633 for(i
= 0; i
<noOfLoc
; i
++) {
1634 status
= U_ZERO_ERROR
;
1635 locName
= uloc_getAvailable(i
);
1636 if(hasCollationElements(locName
)) {
1639 int32_t nameSize
= uloc_getDisplayName(locName
, NULL
, name
, sizeof(cName
), &status
);
1641 for(j
= 0; j
<nameSize
; j
++) {
1642 cName
[j
] = (char)name
[j
];
1644 cName
[nameSize
] = 0;
1645 log_verbose("\nTesting locale %s (%s)\n", locName
, cName
);
1647 coll
= ucol_open(locName
, &status
);
1648 ucol_setStrength(coll
, UCOL_IDENTICAL
);
1649 iter
= ucol_openElements(coll
, t
[u
]->NFD
, u_strlen(t
[u
]->NFD
), &status
);
1651 for(u
=0; u
<(UChar32
)noCases
; u
++) {
1652 if(!ucol_equal(coll
, t
[u
]->NFC
, -1, t
[u
]->NFD
, -1)) {
1653 log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t
[u
]->u
, cName
);
1654 doTest(coll
, t
[u
]->NFC
, t
[u
]->NFD
, UCOL_EQUAL
);
1655 log_verbose("Testing NFC\n");
1656 ucol_setText(iter
, t
[u
]->NFC
, u_strlen(t
[u
]->NFC
), &status
);
1658 log_verbose("Testing NFD\n");
1659 ucol_setText(iter
, t
[u
]->NFD
, u_strlen(t
[u
]->NFD
), &status
);
1663 ucol_closeElements(iter
);
1667 for(u
= 0; u
<= (UChar32
)noCases
; u
++) {
1673 static void TestEmptyRule(void) {
1674 UErrorCode status
= U_ZERO_ERROR
;
1675 UChar rulez
[] = { 0 };
1676 UCollator
*coll
= ucol_openRules(rulez
, 0, UCOL_OFF
, UCOL_TERTIARY
,NULL
, &status
);
1681 static void TestUCARules(void) {
1682 UErrorCode status
= U_ZERO_ERROR
;
1685 uint32_t ruleLen
= 0;
1686 UCollator
*UCAfromRules
= NULL
;
1687 UCollator
*coll
= ucol_open("", &status
);
1688 if(status
== U_FILE_ACCESS_ERROR
) {
1689 log_data_err("Is your data around?\n");
1691 } else if(U_FAILURE(status
)) {
1692 log_err("Error opening collator\n");
1695 ruleLen
= ucol_getRulesEx(coll
, UCOL_FULL_RULES
, rules
, 256);
1697 log_verbose("TestUCARules\n");
1699 rules
= (UChar
*)malloc((ruleLen
+1)*sizeof(UChar
));
1700 ruleLen
= ucol_getRulesEx(coll
, UCOL_FULL_RULES
, rules
, ruleLen
);
1702 log_verbose("Rules length is %d\n", ruleLen
);
1703 UCAfromRules
= ucol_openRules(rules
, ruleLen
, UCOL_OFF
, UCOL_TERTIARY
, NULL
,&status
);
1704 if(U_SUCCESS(status
)) {
1705 ucol_close(UCAfromRules
);
1707 log_verbose("Unable to create a collator from UCARules!\n");
1710 u_unescape(blah, b, 256);
1711 ucol_getSortKey(coll, b, 1, res, 256);
1720 /* Pinyin tonal order */
1722 A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0)
1723 (w/macron)< (w/acute)< (w/caron)< (w/grave)
1724 E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8)
1725 I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec)
1726 O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2)
1727 U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9)
1728 < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) <
1731 However, in testing we got the following order:
1732 A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101)
1733 (w/acute)< (w/grave)< (w/caron)< (w/macron)
1734 E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) <
1736 I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b)
1737 O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d)
1738 U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) <
1740 < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b)
1743 static void TestBefore(void) {
1744 const static char *data
[] = {
1745 "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A",
1746 "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E",
1747 "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I",
1748 "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O",
1749 "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U",
1750 "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc"
1752 genericRulesStarter(
1753 "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0"
1754 "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8"
1755 "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec"
1756 "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2"
1757 "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9"
1758 "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc",
1759 data
, sizeof(data
)/sizeof(data
[0]));
1763 /* superceded by TestBeforePinyin */
1764 static void TestJ784(void) {
1765 const static char *data
[] = {
1766 "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0",
1767 "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8",
1768 "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec",
1769 "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2",
1770 "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9",
1772 "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc"
1774 genericLocaleStarter("zh", data
, sizeof(data
)/sizeof(data
[0]));
1779 /* superceded by the changes to the lv locale */
1780 static void TestJ831(void) {
1781 const static char *data
[] = {
1787 genericLocaleStarter("lv", data
, sizeof(data
)/sizeof(data
[0]));
1791 static void TestJ815(void) {
1792 const static char *data
[] = {
1808 genericLocaleStarter("fr", data
, sizeof(data
)/sizeof(data
[0]));
1809 genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data
, sizeof(data
)/sizeof(data
[0]));
1814 "& a < b < c < d& r < c", "& a < b < d& r < c",
1815 "& a < b < c < d& c < m", "& a < b < c < m < d",
1816 "& a < b < c < d& a < m", "& a < m < b < c < d",
1817 "& a <<< b << c < d& a < m", "& a <<< b << c < m < d",
1818 "& a < b < c < d& [before 1] c < m", "& a < b < m < c < d",
1819 "& a < b <<< c << d <<< e& [before 3] e <<< x", "& a < b <<< c << d <<< x <<< e",
1820 "& a < b <<< c << d <<< e& [before 2] e <<< x", "& a < b <<< c <<< x << d <<< e",
1821 "& a < b <<< c << d <<< e& [before 1] e <<< x", "& a <<< x < b <<< c << d <<< e",
1822 "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x", "& a < b <<< c << d <<< e <<< f < x < g",
1824 static void TestRedundantRules(void) {
1829 const char *expectedRules
;
1830 const char *testdata
[8];
1831 uint32_t testdatalen
;
1833 /* this test conflicts with positioning of CODAN placeholder */
1835 "& a <<< b <<< c << d <<< e& [before 1] e <<< x",
1839 /* this test conflicts with the [before x] syntax tightening */
1841 "& b <<< c <<< d << e <<< f& [before 1] f <<< x",
1845 /* this test conflicts with the [before x] syntax tightening */
1847 "& a < b <<< c << d <<< e& [before 1] e <<< x",
1848 "& a <<< x < b <<< c << d <<< e",
1849 {"a", "x", "b", "c", "d", "e"}, 6
1852 "& a < b < c < d& [before 1] c < m",
1853 "& a < b < m < c < d",
1854 {"a", "b", "m", "c", "d"}, 5
1857 "& a < b <<< c << d <<< e& [before 3] e <<< x",
1858 "& a < b <<< c << d <<< x <<< e",
1859 {"a", "b", "c", "d", "x", "e"}, 6
1861 /* this test conflicts with the [before x] syntax tightening */
1863 "& a < b <<< c << d <<< e& [before 2] e <<< x",
1864 "& a < b <<< c <<< x << d <<< e",
1865 {"a", "b", "c", "x", "d", "e"},, 6
1868 "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x",
1869 "& a < b <<< c << d <<< e <<< f < x < g",
1870 {"a", "b", "c", "d", "e", "f", "x", "g"}, 8
1873 "& a <<< b << c < d& a < m",
1874 "& a <<< b << c < m < d",
1875 {"a", "b", "c", "m", "d"}, 5
1878 "&a<b<<b\\u0301 &z<b",
1880 {"a", "b\\u0301", "z", "b"}, 4
1893 "& a < b < c < d& r < c",
1894 "& a < b < d& r < c",
1898 "& a < b < c < d& r < c",
1899 "& a < b < d& r < c",
1903 "& a < b < c < d& c < m",
1904 "& a < b < c < m < d",
1905 {"a", "b", "c", "m", "d"}, 5
1908 "& a < b < c < d& a < m",
1909 "& a < m < b < c < d",
1910 {"a", "m", "b", "c", "d"}, 5
1915 UCollator
*credundant
= NULL
;
1916 UCollator
*cresulting
= NULL
;
1917 UErrorCode status
= U_ZERO_ERROR
;
1918 UChar rlz
[2048] = { 0 };
1921 for(i
= 0; i
<sizeof(tests
)/sizeof(tests
[0]); i
++) {
1922 log_verbose("testing rule %s, expected to be %s\n", tests
[i
].rules
, tests
[i
].expectedRules
);
1923 rlen
= u_unescape(tests
[i
].rules
, rlz
, 2048);
1925 credundant
= ucol_openRules(rlz
, rlen
, UCOL_DEFAULT
, UCOL_DEFAULT
, NULL
,&status
);
1926 if(status
== U_FILE_ACCESS_ERROR
) {
1927 log_data_err("Is your data around?\n");
1929 } else if(U_FAILURE(status
)) {
1930 log_err("Error opening collator\n");
1934 rlen
= u_unescape(tests
[i
].expectedRules
, rlz
, 2048);
1935 cresulting
= ucol_openRules(rlz
, rlen
, UCOL_DEFAULT
, UCOL_DEFAULT
, NULL
,&status
);
1937 testAgainstUCA(cresulting
, credundant
, "expected", TRUE
, &status
);
1939 ucol_close(credundant
);
1940 ucol_close(cresulting
);
1942 log_verbose("testing using data\n");
1944 genericRulesStarter(tests
[i
].rules
, tests
[i
].testdata
, tests
[i
].testdatalen
);
1949 static void TestExpansionSyntax(void) {
1952 const static char *rules
[] = {
1953 "&AE <<< a << b <<< c &d <<< f",
1954 "&AE <<< a <<< b << c << d < e < f <<< g",
1955 "&AE <<< B <<< C / D <<< F"
1958 const static char *expectedRules
[] = {
1959 "&A <<< a / E << b / E <<< c /E &d <<< f",
1960 "&A <<< a / E <<< b / E << c / E << d / E < e < f <<< g",
1961 "&A <<< B / E <<< C / ED <<< F / E"
1964 const static char *testdata
[][8] = {
1965 {"AE", "a", "b", "c"},
1966 {"AE", "a", "b", "c", "d", "e", "f", "g"},
1967 {"AE", "B", "C"} /* / ED <<< F / E"},*/
1970 const static uint32_t testdatalen
[] = {
1978 UCollator
*credundant
= NULL
;
1979 UCollator
*cresulting
= NULL
;
1980 UErrorCode status
= U_ZERO_ERROR
;
1981 UChar rlz
[2048] = { 0 };
1984 for(i
= 0; i
<sizeof(rules
)/sizeof(rules
[0]); i
++) {
1985 log_verbose("testing rule %s, expected to be %s\n", rules
[i
], expectedRules
[i
]);
1986 rlen
= u_unescape(rules
[i
], rlz
, 2048);
1988 credundant
= ucol_openRules(rlz
, rlen
, UCOL_DEFAULT
, UCOL_DEFAULT
, NULL
, &status
);
1989 if(status
== U_FILE_ACCESS_ERROR
) {
1990 log_data_err("Is your data around?\n");
1992 } else if(U_FAILURE(status
)) {
1993 log_err("Error opening collator\n");
1996 rlen
= u_unescape(expectedRules
[i
], rlz
, 2048);
1997 cresulting
= ucol_openRules(rlz
, rlen
, UCOL_DEFAULT
, UCOL_DEFAULT
, NULL
,&status
);
1999 /* testAgainstUCA still doesn't handle expansions correctly, so this is not run */
2000 /* as a hard error test, but only in information mode */
2001 testAgainstUCA(cresulting
, credundant
, "expected", FALSE
, &status
);
2003 ucol_close(credundant
);
2004 ucol_close(cresulting
);
2006 log_verbose("testing using data\n");
2008 genericRulesStarter(rules
[i
], testdata
[i
], testdatalen
[i
]);
2012 static void TestCase(void)
2014 const static UChar gRules
[MAX_TOKEN_LEN
] =
2015 /*" & 0 < 1,\u2461<a,A"*/
2016 { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x0041, 0x0000 };
2018 const static UChar testCase
[][MAX_TOKEN_LEN
] =
2020 /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000},
2021 /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000},
2022 /*2*/ {0x2460 /*circ'1'*/, 0x0061/*'a'*/, 0x0000},
2023 /*3*/ {0x2460 /*circ'1'*/, 0x0041/*'A'*/, 0x0000}
2026 const static UCollationResult caseTestResults
[][9] =
2028 { UCOL_LESS
, UCOL_LESS
, UCOL_LESS
, 0, UCOL_LESS
, UCOL_LESS
, 0, 0, UCOL_LESS
},
2029 { UCOL_GREATER
, UCOL_LESS
, UCOL_LESS
, 0, UCOL_LESS
, UCOL_LESS
, 0, 0, UCOL_GREATER
},
2030 { UCOL_LESS
, UCOL_LESS
, UCOL_LESS
, 0, UCOL_GREATER
, UCOL_LESS
, 0, 0, UCOL_LESS
},
2031 { UCOL_GREATER
, UCOL_LESS
, UCOL_GREATER
, 0, UCOL_LESS
, UCOL_LESS
, 0, 0, UCOL_GREATER
}
2035 const static UColAttributeValue caseTestAttributes
[][2] =
2037 { UCOL_LOWER_FIRST
, UCOL_OFF
},
2038 { UCOL_UPPER_FIRST
, UCOL_OFF
},
2039 { UCOL_LOWER_FIRST
, UCOL_ON
},
2040 { UCOL_UPPER_FIRST
, UCOL_ON
}
2044 UErrorCode status
= U_ZERO_ERROR
;
2045 UCollationElements
*iter
;
2046 UCollator
*myCollation
;
2047 myCollation
= ucol_open("en_US", &status
);
2049 if(U_FAILURE(status
)){
2050 log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status
));
2053 log_verbose("Testing different case settings\n");
2054 ucol_setStrength(myCollation
, UCOL_TERTIARY
);
2056 for(k
= 0; k
<4; k
++) {
2057 ucol_setAttribute(myCollation
, UCOL_CASE_FIRST
, caseTestAttributes
[k
][0], &status
);
2058 ucol_setAttribute(myCollation
, UCOL_CASE_LEVEL
, caseTestAttributes
[k
][1], &status
);
2059 log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes
[k
][0], caseTestAttributes
[k
][1]);
2060 for (i
= 0; i
< 3 ; i
++) {
2061 for(j
= i
+1; j
<4; j
++) {
2062 doTest(myCollation
, testCase
[i
], testCase
[j
], caseTestResults
[k
][3*i
+j
-1]);
2066 ucol_close(myCollation
);
2068 myCollation
= ucol_openRules(gRules
, u_strlen(gRules
), UCOL_OFF
, UCOL_TERTIARY
,NULL
, &status
);
2069 if(U_FAILURE(status
)){
2070 log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status
));
2073 log_verbose("Testing different case settings with custom rules\n");
2074 ucol_setStrength(myCollation
, UCOL_TERTIARY
);
2076 for(k
= 0; k
<4; k
++) {
2077 ucol_setAttribute(myCollation
, UCOL_CASE_FIRST
, caseTestAttributes
[k
][0], &status
);
2078 ucol_setAttribute(myCollation
, UCOL_CASE_LEVEL
, caseTestAttributes
[k
][1], &status
);
2079 for (i
= 0; i
< 3 ; i
++) {
2080 for(j
= i
+1; j
<4; j
++) {
2081 log_verbose("k:%d, i:%d, j:%d\n", k
, i
, j
);
2082 doTest(myCollation
, testCase
[i
], testCase
[j
], caseTestResults
[k
][3*i
+j
-1]);
2083 iter
=ucol_openElements(myCollation
, testCase
[i
], u_strlen(testCase
[i
]), &status
);
2085 ucol_closeElements(iter
);
2086 iter
=ucol_openElements(myCollation
, testCase
[j
], u_strlen(testCase
[j
]), &status
);
2088 ucol_closeElements(iter
);
2092 ucol_close(myCollation
);
2094 const static char *lowerFirst
[] = {
2110 const static char *upperFirst
[] = {
2125 log_verbose("mixed case test\n");
2126 log_verbose("lower first, case level off\n");
2127 genericRulesStarter("[casefirst lower]&H<ch<<<Ch<<<CH", lowerFirst
, sizeof(lowerFirst
)/sizeof(lowerFirst
[0]));
2128 log_verbose("upper first, case level off\n");
2129 genericRulesStarter("[casefirst upper]&H<ch<<<Ch<<<CH", upperFirst
, sizeof(upperFirst
)/sizeof(upperFirst
[0]));
2130 log_verbose("lower first, case level on\n");
2131 genericRulesStarter("[casefirst lower][caselevel on]&H<ch<<<Ch<<<CH", lowerFirst
, sizeof(lowerFirst
)/sizeof(lowerFirst
[0]));
2132 log_verbose("upper first, case level on\n");
2133 genericRulesStarter("[casefirst upper][caselevel on]&H<ch<<<Ch<<<CH", upperFirst
, sizeof(upperFirst
)/sizeof(upperFirst
[0]));
2138 static void TestIncrementalNormalize(void) {
2140 /*UChar baseA =0x61;*/
2142 /* UChar baseB = 0x42;*/
2143 UChar ccMix
[] = {0x316, 0x321, 0x300};
2144 /*UChar ccMix[] = {0x61, 0x61, 0x61};*/
2146 0x316 is combining grave accent below, cc=220
2147 0x321 is combining palatalized hook below, cc=202
2148 0x300 is combining grave accent, cc=230
2151 /*int maxSLen = 2000;*/
2152 int maxSLen
= 64000;
2157 UErrorCode status
= U_ZERO_ERROR
;
2158 UCollationResult result
;
2160 int32_t myQ
= QUICK
;
2167 /* Test 1. Run very long unnormalized strings, to force overflow of*/
2168 /* most buffers along the way.*/
2172 strA
= malloc((maxSLen
+1) * sizeof(UChar
));
2173 strB
= malloc((maxSLen
+1) * sizeof(UChar
));
2175 coll
= ucol_open("en_US", &status
);
2176 if(status
== U_FILE_ACCESS_ERROR
) {
2177 log_data_err("Is your data around?\n");
2179 } else if(U_FAILURE(status
)) {
2180 log_err("Error opening collator\n");
2183 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
2185 /*for (sLen = 257; sLen<maxSLen; sLen++) {*/
2186 /*for (sLen = 4; sLen<maxSLen; sLen++) {*/
2187 /*for (sLen = 1000; sLen<1001; sLen++) {*/
2188 for (sLen
= 500; sLen
<501; sLen
++) {
2189 /*for (sLen = 40000; sLen<65000; sLen+=1000) {*/
2192 for (i
=1; i
<=sLen
-1; i
++) {
2193 strA
[i
] = ccMix
[i
% 3];
2194 strB
[sLen
-i
] = ccMix
[i
% 3];
2199 ucol_setStrength(coll
, UCOL_TERTIARY
); /* Do test with default strength, which runs*/
2200 doTest(coll
, strA
, strB
, UCOL_EQUAL
); /* optimized functions in the impl*/
2201 ucol_setStrength(coll
, UCOL_IDENTICAL
); /* Do again with the slow, general impl.*/
2202 doTest(coll
, strA
, strB
, UCOL_EQUAL
);
2211 /* Test 2: Non-normal sequence in a string that extends to the last character*/
2212 /* of the string. Checks a couple of edge cases.*/
2215 UChar strA
[] = {0x41, 0x41, 0x300, 0x316, 0};
2216 UChar strB
[] = {0x41, 0xc0, 0x316, 0};
2217 ucol_setStrength(coll
, UCOL_TERTIARY
);
2218 doTest(coll
, strA
, strB
, UCOL_EQUAL
);
2221 /* Test 3: Non-normal sequence is terminated by a surrogate pair.*/
2225 * test below used a code point from Desseret, which sorts differently
2228 /*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/
2229 UChar strA
[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0};
2230 UChar strB
[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0};
2231 ucol_setStrength(coll
, UCOL_TERTIARY
);
2232 doTest(coll
, strA
, strB
, UCOL_GREATER
);
2235 /* Test 4: Imbedded nulls do not terminate a string when length is specified.*/
2238 UChar strA
[] = {0x41, 0x00, 0x42, 0x00};
2239 UChar strB
[] = {0x41, 0x00, 0x00, 0x00};
2246 /* there used to be -3 here. Hmmmm.... */
2247 /*result = ucol_strcoll(coll, strA, -3, strB, -3);*/
2248 result
= ucol_strcoll(coll
, strA
, 3, strB
, 3);
2249 if (result
!= UCOL_GREATER
) {
2250 log_err("ERROR 1 in test 4\n");
2252 result
= ucol_strcoll(coll
, strA
, -1, strB
, -1);
2253 if (result
!= UCOL_EQUAL
) {
2254 log_err("ERROR 2 in test 4\n");
2257 ucol_getSortKey(coll
, strA
, 3, (uint8_t *)sortKeyA
, sizeof(sortKeyA
));
2258 ucol_getSortKey(coll
, strA
, -1, (uint8_t *)sortKeyAz
, sizeof(sortKeyAz
));
2259 ucol_getSortKey(coll
, strB
, 3, (uint8_t *)sortKeyB
, sizeof(sortKeyB
));
2260 ucol_getSortKey(coll
, strB
, -1, (uint8_t *)sortKeyBz
, sizeof(sortKeyBz
));
2262 r
= strcmp(sortKeyA
, sortKeyAz
);
2264 log_err("Error 3 in test 4\n");
2266 r
= strcmp(sortKeyA
, sortKeyB
);
2268 log_err("Error 4 in test 4\n");
2270 r
= strcmp(sortKeyAz
, sortKeyBz
);
2272 log_err("Error 5 in test 4\n");
2275 ucol_setStrength(coll
, UCOL_IDENTICAL
);
2276 ucol_getSortKey(coll
, strA
, 3, (uint8_t *)sortKeyA
, sizeof(sortKeyA
));
2277 ucol_getSortKey(coll
, strA
, -1, (uint8_t *)sortKeyAz
, sizeof(sortKeyAz
));
2278 ucol_getSortKey(coll
, strB
, 3, (uint8_t *)sortKeyB
, sizeof(sortKeyB
));
2279 ucol_getSortKey(coll
, strB
, -1, (uint8_t *)sortKeyBz
, sizeof(sortKeyBz
));
2281 r
= strcmp(sortKeyA
, sortKeyAz
);
2283 log_err("Error 6 in test 4\n");
2285 r
= strcmp(sortKeyA
, sortKeyB
);
2287 log_err("Error 7 in test 4\n");
2289 r
= strcmp(sortKeyAz
, sortKeyBz
);
2291 log_err("Error 8 in test 4\n");
2293 ucol_setStrength(coll
, UCOL_TERTIARY
);
2297 /* Test 5: Null characters in non-normal source strings.*/
2300 UChar strA
[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00};
2301 UChar strB
[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00};
2308 result
= ucol_strcoll(coll
, strA
, 6, strB
, 6);
2309 if (result
!= UCOL_GREATER
) {
2310 log_err("ERROR 1 in test 5\n");
2312 result
= ucol_strcoll(coll
, strA
, -1, strB
, -1);
2313 if (result
!= UCOL_EQUAL
) {
2314 log_err("ERROR 2 in test 5\n");
2317 ucol_getSortKey(coll
, strA
, 6, (uint8_t *)sortKeyA
, sizeof(sortKeyA
));
2318 ucol_getSortKey(coll
, strA
, -1, (uint8_t *)sortKeyAz
, sizeof(sortKeyAz
));
2319 ucol_getSortKey(coll
, strB
, 6, (uint8_t *)sortKeyB
, sizeof(sortKeyB
));
2320 ucol_getSortKey(coll
, strB
, -1, (uint8_t *)sortKeyBz
, sizeof(sortKeyBz
));
2322 r
= strcmp(sortKeyA
, sortKeyAz
);
2324 log_err("Error 3 in test 5\n");
2326 r
= strcmp(sortKeyA
, sortKeyB
);
2328 log_err("Error 4 in test 5\n");
2330 r
= strcmp(sortKeyAz
, sortKeyBz
);
2332 log_err("Error 5 in test 5\n");
2335 ucol_setStrength(coll
, UCOL_IDENTICAL
);
2336 ucol_getSortKey(coll
, strA
, 6, (uint8_t *)sortKeyA
, sizeof(sortKeyA
));
2337 ucol_getSortKey(coll
, strA
, -1, (uint8_t *)sortKeyAz
, sizeof(sortKeyAz
));
2338 ucol_getSortKey(coll
, strB
, 6, (uint8_t *)sortKeyB
, sizeof(sortKeyB
));
2339 ucol_getSortKey(coll
, strB
, -1, (uint8_t *)sortKeyBz
, sizeof(sortKeyBz
));
2341 r
= strcmp(sortKeyA
, sortKeyAz
);
2343 log_err("Error 6 in test 5\n");
2345 r
= strcmp(sortKeyA
, sortKeyB
);
2347 log_err("Error 7 in test 5\n");
2349 r
= strcmp(sortKeyAz
, sortKeyBz
);
2351 log_err("Error 8 in test 5\n");
2353 ucol_setStrength(coll
, UCOL_TERTIARY
);
2357 /* Test 6: Null character as base of a non-normal combining sequence.*/
2360 UChar strA
[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00};
2361 UChar strB
[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00};
2363 result
= ucol_strcoll(coll
, strA
, 5, strB
, 5);
2364 if (result
!= UCOL_LESS
) {
2365 log_err("Error 1 in test 6\n");
2367 result
= ucol_strcoll(coll
, strA
, -1, strB
, -1);
2368 if (result
!= UCOL_EQUAL
) {
2369 log_err("Error 2 in test 6\n");
2379 static void TestGetCaseBit(void) {
2380 static const char *caseBitData
[] = {
2381 "a", "A", "ch", "Ch", "CH",
2382 "\\uFF9E", "\\u0009"
2385 static const uint8_t results
[] = {
2386 UCOL_LOWER_CASE
, UCOL_UPPER_CASE
, UCOL_LOWER_CASE
, UCOL_MIXED_CASE
, UCOL_UPPER_CASE
,
2387 UCOL_UPPER_CASE
, UCOL_LOWER_CASE
2390 uint32_t i
, blen
= 0;
2392 UErrorCode status
= U_ZERO_ERROR
;
2393 UCollator
*UCA
= ucol_open("", &status
);
2396 for(i
= 0; i
<sizeof(results
)/sizeof(results
[0]); i
++) {
2397 blen
= u_unescape(caseBitData
[i
], b
, 256);
2398 res
= ucol_uprv_getCaseBits(UCA
, b
, blen
, &status
);
2399 if(results
[i
] != res
) {
2400 log_err("Expected case = %02X, got %02X for %04X\n", results
[i
], res
, b
[0]);
2406 static void TestHangulTailoring(void) {
2407 static const char *koreanData
[] = {
2408 "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53ef", "\\u5475",
2409 "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\\u67b7", "\\u67ef",
2410 "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\\u8857", "\\u8888",
2411 "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5",
2412 "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\\u659D", "\\u698E",
2413 "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\\u8B0C"
2417 "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 <<< \\u53ef <<< \\u5475 "
2418 "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef "
2419 "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 "
2420 "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 "
2421 "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E "
2422 "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C";
2425 UErrorCode status
= U_ZERO_ERROR
;
2426 UChar rlz
[2048] = { 0 };
2427 uint32_t rlen
= u_unescape(rules
, rlz
, 2048);
2429 UCollator
*coll
= ucol_openRules(rlz
, rlen
, UCOL_DEFAULT
, UCOL_DEFAULT
,NULL
, &status
);
2430 if(status
== U_FILE_ACCESS_ERROR
) {
2431 log_data_err("Is your data around?\n");
2433 } else if(U_FAILURE(status
)) {
2434 log_err("Error opening collator\n");
2438 log_verbose("Using start of korean rules\n");
2440 if(U_SUCCESS(status
)) {
2441 genericOrderingTest(coll
, koreanData
, sizeof(koreanData
)/sizeof(koreanData
[0]));
2443 log_err("Unable to open collator with rules %s\n", rules
);
2446 log_verbose("Setting jamoSpecial to TRUE and testing once more\n");
2447 ((UCATableHeader
*)coll
->image
)->jamoSpecial
= TRUE
; /* don't try this at home */
2448 genericOrderingTest(coll
, koreanData
, sizeof(koreanData
)/sizeof(koreanData
[0]));
2452 log_verbose("Using ko__LOTUS locale\n");
2453 genericLocaleStarter("ko__LOTUS", koreanData
, sizeof(koreanData
)/sizeof(koreanData
[0]));
2456 static void TestCompressOverlap(void) {
2459 UErrorCode status
= U_ZERO_ERROR
;
2466 coll
= ucol_open("", &status
);
2468 if (U_FAILURE(status
)) {
2469 log_err("Collator can't be created\n");
2472 while (count
< 149) {
2473 secstr
[count
] = 0x0020; /* [06, 05, 05] */
2474 tertstr
[count
] = 0x0020;
2478 /* top down compression ----------------------------------- */
2479 secstr
[count
] = 0x0332; /* [, 87, 05] */
2480 tertstr
[count
] = 0x3000; /* [06, 05, 07] */
2482 /* no compression secstr should have 150 secondary bytes, tertstr should
2483 have 150 tertiary bytes.
2484 with correct overlapping compression, secstr should have 4 secondary
2485 bytes, tertstr should have > 2 tertiary bytes */
2486 resultlen
= ucol_getSortKey(coll
, secstr
, 150, (uint8_t *)result
, 250);
2487 tempptr
= uprv_strchr(result
, 1) + 1;
2488 while (*(tempptr
+ 1) != 1) {
2489 /* the last secondary collation element is not checked since it is not
2490 part of the compression */
2491 if (*tempptr
< UCOL_COMMON_TOP2
- UCOL_TOP_COUNT2
) {
2492 log_err("Secondary compression overlapped\n");
2497 /* tertiary top/bottom/common for en_US is similar to the secondary
2498 top/bottom/common */
2499 resultlen
= ucol_getSortKey(coll
, tertstr
, 150, (uint8_t *)result
, 250);
2500 tempptr
= uprv_strrchr(result
, 1) + 1;
2501 while (*(tempptr
+ 1) != 0) {
2502 /* the last secondary collation element is not checked since it is not
2503 part of the compression */
2504 if (*tempptr
< coll
->tertiaryTop
- coll
->tertiaryTopCount
) {
2505 log_err("Tertiary compression overlapped\n");
2510 /* bottom up compression ------------------------------------- */
2513 resultlen
= ucol_getSortKey(coll
, secstr
, 150, (uint8_t *)result
, 250);
2514 tempptr
= uprv_strchr(result
, 1) + 1;
2515 while (*(tempptr
+ 1) != 1) {
2516 /* the last secondary collation element is not checked since it is not
2517 part of the compression */
2518 if (*tempptr
> UCOL_COMMON_BOT2
+ UCOL_BOT_COUNT2
) {
2519 log_err("Secondary compression overlapped\n");
2524 /* tertiary top/bottom/common for en_US is similar to the secondary
2525 top/bottom/common */
2526 resultlen
= ucol_getSortKey(coll
, tertstr
, 150, (uint8_t *)result
, 250);
2527 tempptr
= uprv_strrchr(result
, 1) + 1;
2528 while (*(tempptr
+ 1) != 0) {
2529 /* the last secondary collation element is not checked since it is not
2530 part of the compression */
2531 if (*tempptr
> coll
->tertiaryBottom
+ coll
->tertiaryBottomCount
) {
2532 log_err("Tertiary compression overlapped\n");
2540 static void TestCyrillicTailoring(void) {
2541 static const char *test
[] = {
2547 /* Russian overrides contractions, so this test is not valid anymore */
2548 /*genericLocaleStarter("ru", test, 3);*/
2550 genericLocaleStarter("root", test
, 3);
2551 genericRulesStarter("&\\u0410 = \\u0410", test
, 3);
2552 genericRulesStarter("&Z < \\u0410", test
, 3);
2553 genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test
, 3);
2554 genericRulesStarter("&Z < \\u0410 < \\u04d0", test
, 3);
2555 genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test
, 3);
2556 genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test
, 3);
2559 static void TestSuppressContractions(void) {
2561 static const char *testNoCont2
[] = {
2566 static const char *testNoCont
[] = {
2569 "\\uFF21\\u0410\\u0302"
2572 genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont
, 3);
2573 genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2
, 3);
2576 static void TestContraction(void) {
2577 const static char *testrules
[] = {
2579 "&A = A\\u0306/\\u0306",
2582 const static UChar testdata
[][2] = {
2583 {0x0041 /* 'A' */, 0x0042 /* 'B' */},
2584 {0x0041 /* 'A' */, 0x0306 /* combining breve */},
2585 {0x0063 /* 'c' */, 0x0068 /* 'h' */}
2587 const static UChar testdata2
[][2] = {
2588 {0x0063 /* 'c' */, 0x0067 /* 'g' */},
2589 {0x0063 /* 'c' */, 0x0068 /* 'h' */},
2590 {0x0063 /* 'c' */, 0x006C /* 'l' */}
2592 const static char *testrules3
[] = {
2593 "&z < xyz &xyzw << B",
2594 "&z < xyz &xyz << B / w",
2595 "&z < ch &achm << B",
2596 "&z < ch &a << B / chm",
2597 "&\\ud800\\udc00w << B",
2598 "&\\ud800\\udc00 << B / w",
2599 "&a\\ud800\\udc00m << B",
2600 "&a << B / \\ud800\\udc00m",
2603 UErrorCode status
= U_ZERO_ERROR
;
2605 UChar rule
[256] = {0};
2609 for (i
= 0; i
< sizeof(testrules
) / sizeof(testrules
[0]); i
++) {
2610 UCollationElements
*iter1
;
2612 log_verbose("Rule %s for testing\n", testrules
[i
]);
2613 rlen
= u_unescape(testrules
[i
], rule
, 32);
2614 coll
= ucol_openRules(rule
, rlen
, UCOL_ON
, UCOL_TERTIARY
,NULL
, &status
);
2615 if (U_FAILURE(status
)) {
2616 log_err("Collator creation failed %s\n", testrules
[i
]);
2619 iter1
= ucol_openElements(coll
, testdata
[i
], 2, &status
);
2620 if (U_FAILURE(status
)) {
2621 log_err("Collation iterator creation failed\n");
2625 UCollationElements
*iter2
= ucol_openElements(coll
,
2629 if (U_FAILURE(status
)) {
2630 log_err("Collation iterator creation failed\n");
2633 ce
= ucol_next(iter2
, &status
);
2634 while (ce
!= UCOL_NULLORDER
) {
2635 if ((uint32_t)ucol_next(iter1
, &status
) != ce
) {
2636 log_err("Collation elements in contraction split does not match\n");
2639 ce
= ucol_next(iter2
, &status
);
2642 ucol_closeElements(iter2
);
2644 if (ucol_next(iter1
, &status
) != UCOL_NULLORDER
) {
2645 log_err("Collation elements not exhausted\n");
2648 ucol_closeElements(iter1
);
2652 rlen
= u_unescape("& a < b < c < ch < d & c = ch / h", rule
, 256);
2653 coll
= ucol_openRules(rule
, rlen
, UCOL_ON
, UCOL_TERTIARY
,NULL
, &status
);
2654 if (ucol_strcoll(coll
, testdata2
[0], 2, testdata2
[1], 2) != UCOL_LESS
) {
2655 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
2656 testdata2
[0][0], testdata2
[0][1], testdata2
[1][0],
2660 if (ucol_strcoll(coll
, testdata2
[1], 2, testdata2
[2], 2) != UCOL_LESS
) {
2661 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
2662 testdata2
[1][0], testdata2
[1][1], testdata2
[2][0],
2668 for (i
= 0; i
< sizeof(testrules3
) / sizeof(testrules3
[0]); i
+= 2) {
2671 UCollationElements
*iter1
,
2673 UChar ch
= 0x0042 /* 'B' */;
2675 rlen
= u_unescape(testrules3
[i
], rule
, 32);
2676 coll1
= ucol_openRules(rule
, rlen
, UCOL_ON
, UCOL_TERTIARY
,NULL
, &status
);
2677 rlen
= u_unescape(testrules3
[i
+ 1], rule
, 32);
2678 coll2
= ucol_openRules(rule
, rlen
, UCOL_ON
, UCOL_TERTIARY
,NULL
, &status
);
2679 if (U_FAILURE(status
)) {
2680 log_err("Collator creation failed %s\n", testrules
[i
]);
2683 iter1
= ucol_openElements(coll1
, &ch
, 1, &status
);
2684 iter2
= ucol_openElements(coll2
, &ch
, 1, &status
);
2685 if (U_FAILURE(status
)) {
2686 log_err("Collation iterator creation failed\n");
2689 ce
= ucol_next(iter1
, &status
);
2690 if (U_FAILURE(status
)) {
2691 log_err("Retrieving ces failed\n");
2694 while (ce
!= UCOL_NULLORDER
) {
2695 if (ce
!= (uint32_t)ucol_next(iter2
, &status
)) {
2696 log_err("CEs does not match\n");
2699 ce
= ucol_next(iter1
, &status
);
2700 if (U_FAILURE(status
)) {
2701 log_err("Retrieving ces failed\n");
2705 if (ucol_next(iter2
, &status
) != UCOL_NULLORDER
) {
2706 log_err("CEs not exhausted\n");
2709 ucol_closeElements(iter1
);
2710 ucol_closeElements(iter2
);
2716 static void TestExpansion(void) {
2717 const static char *testrules
[] = {
2718 "&J << K / B & K << M",
2721 const static UChar testdata
[][3] = {
2722 {0x004A /*'J'*/, 0x0041 /*'A'*/, 0},
2723 {0x004D /*'M'*/, 0x0041 /*'A'*/, 0},
2724 {0x004B /*'K'*/, 0x0041 /*'A'*/, 0},
2725 {0x004B /*'K'*/, 0x0043 /*'C'*/, 0},
2726 {0x004A /*'J'*/, 0x0043 /*'C'*/, 0},
2727 {0x004D /*'M'*/, 0x0043 /*'C'*/, 0}
2730 UErrorCode status
= U_ZERO_ERROR
;
2732 UChar rule
[256] = {0};
2736 for (i
= 0; i
< sizeof(testrules
) / sizeof(testrules
[0]); i
++) {
2738 log_verbose("Rule %s for testing\n", testrules
[i
]);
2739 rlen
= u_unescape(testrules
[i
], rule
, 32);
2740 coll
= ucol_openRules(rule
, rlen
, UCOL_ON
, UCOL_TERTIARY
,NULL
, &status
);
2741 if (U_FAILURE(status
)) {
2742 log_err("Collator creation failed %s\n", testrules
[i
]);
2746 for (j
= 0; j
< 5; j
++) {
2747 doTest(coll
, testdata
[j
], testdata
[j
+ 1], UCOL_LESS
);
2754 /* this test tests the current limitations of the engine */
2755 /* it always fail, so it is disabled by default */
2756 static void TestLimitations(void) {
2757 /* recursive expansions */
2759 static const char *rule
= "&a=b/c&d=c/e";
2760 static const char *tlimit01
[] = {"add","b","adf"};
2761 static const char *tlimit02
[] = {"aa","b","af"};
2762 log_verbose("recursive expansions\n");
2763 genericRulesStarter(rule
, tlimit01
, sizeof(tlimit01
)/sizeof(tlimit01
[0]));
2764 genericRulesStarter(rule
, tlimit02
, sizeof(tlimit02
)/sizeof(tlimit02
[0]));
2766 /* contractions spanning expansions */
2768 static const char *rule
= "&a<<<c/e&g<<<eh";
2769 static const char *tlimit01
[] = {"ad","c","af","f","ch","h"};
2770 static const char *tlimit02
[] = {"ad","c","ch","af","f","h"};
2771 log_verbose("contractions spanning expansions\n");
2772 genericRulesStarter(rule
, tlimit01
, sizeof(tlimit01
)/sizeof(tlimit01
[0]));
2773 genericRulesStarter(rule
, tlimit02
, sizeof(tlimit02
)/sizeof(tlimit02
[0]));
2775 /* normalization: nulls in contractions */
2777 static const char *rule
= "&a<<<\\u0000\\u0302";
2778 static const char *tlimit01
[] = {"a","\\u0000\\u0302\\u0327"};
2779 static const char *tlimit02
[] = {"\\u0000\\u0302\\u0327","a"};
2780 static const UColAttribute att
[] = { UCOL_DECOMPOSITION_MODE
};
2781 static const UColAttributeValue valOn
[] = { UCOL_ON
};
2782 static const UColAttributeValue valOff
[] = { UCOL_OFF
};
2784 log_verbose("NULL in contractions\n");
2785 genericRulesStarterWithOptions(rule
, tlimit01
, 2, att
, valOn
, 1);
2786 genericRulesStarterWithOptions(rule
, tlimit02
, 2, att
, valOn
, 1);
2787 genericRulesStarterWithOptions(rule
, tlimit01
, 2, att
, valOff
, 1);
2788 genericRulesStarterWithOptions(rule
, tlimit02
, 2, att
, valOff
, 1);
2791 /* normalization: contractions spanning normalization */
2793 static const char *rule
= "&a<<<\\u0000\\u0302";
2794 static const char *tlimit01
[] = {"a","\\u0000\\u0302\\u0327"};
2795 static const char *tlimit02
[] = {"\\u0000\\u0302\\u0327","a"};
2796 static const UColAttribute att
[] = { UCOL_DECOMPOSITION_MODE
};
2797 static const UColAttributeValue valOn
[] = { UCOL_ON
};
2798 static const UColAttributeValue valOff
[] = { UCOL_OFF
};
2800 log_verbose("contractions spanning normalization\n");
2801 genericRulesStarterWithOptions(rule
, tlimit01
, 2, att
, valOn
, 1);
2802 genericRulesStarterWithOptions(rule
, tlimit02
, 2, att
, valOn
, 1);
2803 genericRulesStarterWithOptions(rule
, tlimit01
, 2, att
, valOff
, 1);
2804 genericRulesStarterWithOptions(rule
, tlimit02
, 2, att
, valOff
, 1);
2809 /*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/
2810 static const char *rule
= "&\\u2010<x<[variable top]=z";
2811 /*static const char *rule3 = "&' '<x<[variable top]=z";*/
2812 static const char *tlimit01
[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" };
2813 static const char *tlimit02
[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"};
2814 static const char *tlimit03
[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" };
2815 static const UColAttribute att
[] = { UCOL_ALTERNATE_HANDLING
, UCOL_STRENGTH
};
2816 static const UColAttributeValue valOn
[] = { UCOL_SHIFTED
, UCOL_QUATERNARY
};
2817 static const UColAttributeValue valOff
[] = { UCOL_NON_IGNORABLE
, UCOL_TERTIARY
};
2819 log_verbose("variable top\n");
2820 genericRulesStarterWithOptions(rule
, tlimit03
, sizeof(tlimit03
)/sizeof(tlimit03
[0]), att
, valOn
, sizeof(att
)/sizeof(att
[0]));
2821 genericRulesStarterWithOptions(rule
, tlimit01
, sizeof(tlimit01
)/sizeof(tlimit01
[0]), att
, valOn
, sizeof(att
)/sizeof(att
[0]));
2822 genericRulesStarterWithOptions(rule
, tlimit02
, sizeof(tlimit02
)/sizeof(tlimit02
[0]), att
, valOn
, sizeof(att
)/sizeof(att
[0]));
2823 genericRulesStarterWithOptions(rule
, tlimit01
, sizeof(tlimit01
)/sizeof(tlimit01
[0]), att
, valOff
, sizeof(att
)/sizeof(att
[0]));
2824 genericRulesStarterWithOptions(rule
, tlimit02
, sizeof(tlimit02
)/sizeof(tlimit02
[0]), att
, valOff
, sizeof(att
)/sizeof(att
[0]));
2829 static const char *rule
= "&c<ch<<<cH<<<Ch<<<CH";
2830 static const char *tlimit01
[] = {"c","CH","Ch","cH","ch"};
2831 static const char *tlimit02
[] = {"c","CH","cH","Ch","ch"};
2832 static const UColAttribute att
[] = { UCOL_CASE_FIRST
};
2833 static const UColAttributeValue valOn
[] = { UCOL_UPPER_FIRST
};
2834 /*static const UColAttributeValue valOff[] = { UCOL_OFF};*/
2835 log_verbose("case level\n");
2836 genericRulesStarterWithOptions(rule
, tlimit01
, sizeof(tlimit01
)/sizeof(tlimit01
[0]), att
, valOn
, sizeof(att
)/sizeof(att
[0]));
2837 genericRulesStarterWithOptions(rule
, tlimit02
, sizeof(tlimit02
)/sizeof(tlimit02
[0]), att
, valOn
, sizeof(att
)/sizeof(att
[0]));
2838 /*genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
2839 /*genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
2845 static void TestBocsuCoverage(void) {
2846 UErrorCode status
= U_ZERO_ERROR
;
2847 const char *testString
= "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u0041";
2848 UChar test
[256] = {0};
2849 uint32_t tlen
= u_unescape(testString
, test
, 32);
2850 uint8_t key
[256] = {0};
2853 UCollator
*coll
= ucol_open("", &status
);
2854 if(U_SUCCESS(status
)) {
2855 ucol_setAttribute(coll
, UCOL_STRENGTH
, UCOL_IDENTICAL
, &status
);
2857 klen
= ucol_getSortKey(coll
, test
, tlen
, key
, 256);
2861 log_data_err("Couldn't open UCA\n");
2865 static void TestVariableTopSetting(void) {
2866 UErrorCode status
= U_ZERO_ERROR
;
2867 const UChar
*current
= NULL
;
2868 uint32_t varTopOriginal
= 0, varTop1
, varTop2
;
2869 UCollator
*coll
= ucol_open("", &status
);
2870 if(U_SUCCESS(status
)) {
2872 uint32_t strength
= 0;
2874 uint32_t chOffset
= 0;
2876 uint32_t exOffset
= 0;
2878 uint32_t oldChOffset
= 0;
2879 uint32_t oldChLen
= 0;
2880 uint32_t oldExOffset
= 0;
2881 uint32_t oldExLen
= 0;
2882 uint32_t prefixOffset
= 0;
2883 uint32_t prefixLen
= 0;
2885 UBool startOfRules
= TRUE
;
2886 UColTokenParser src
;
2889 UChar
*rulesCopy
= NULL
;
2892 UCollationResult result
;
2894 UChar first
[256] = { 0 };
2895 UChar second
[256] = { 0 };
2896 UParseError parseError
;
2897 int32_t myQ
= QUICK
;
2905 /* this test will fail when normalization is turned on */
2906 /* therefore we always turn off exhaustive mode for it */
2907 if(1) { /* QUICK > 0*/
2908 log_verbose("Slide variable top over UCARules\n");
2909 rulesLen
= ucol_getRulesEx(coll
, UCOL_FULL_RULES
, rulesCopy
, 0);
2910 rulesCopy
= (UChar
*)malloc((rulesLen
+UCOL_TOK_EXTRA_RULE_SPACE_SIZE
)*sizeof(UChar
));
2911 rulesLen
= ucol_getRulesEx(coll
, UCOL_FULL_RULES
, rulesCopy
, rulesLen
+UCOL_TOK_EXTRA_RULE_SPACE_SIZE
);
2913 if(U_SUCCESS(status
) && rulesLen
> 0) {
2914 ucol_setAttribute(coll
, UCOL_ALTERNATE_HANDLING
, UCOL_SHIFTED
, &status
);
2915 src
.current
= src
.source
= rulesCopy
;
2916 src
.end
= rulesCopy
+rulesLen
;
2917 src
.extraCurrent
= src
.end
;
2918 src
.extraEnd
= src
.end
+UCOL_TOK_EXTRA_RULE_SPACE_SIZE
;
2920 while ((current
= ucol_tok_parseNextToken(&src
, startOfRules
, &parseError
,&status
)) != NULL
) {
2921 strength
= src
.parsedToken
.strength
;
2922 chOffset
= src
.parsedToken
.charsOffset
;
2923 chLen
= src
.parsedToken
.charsLen
;
2924 exOffset
= src
.parsedToken
.extensionOffset
;
2925 exLen
= src
.parsedToken
.extensionLen
;
2926 prefixOffset
= src
.parsedToken
.prefixOffset
;
2927 prefixLen
= src
.parsedToken
.prefixLen
;
2928 specs
= src
.parsedToken
.flags
;
2930 startOfRules
= FALSE
;
2932 log_verbose("%04X %d ", *(rulesCopy
+chOffset
), chLen
);
2934 if(strength
== UCOL_PRIMARY
) {
2935 status
= U_ZERO_ERROR
;
2936 varTopOriginal
= ucol_getVariableTop(coll
, &status
);
2937 varTop1
= ucol_setVariableTop(coll
, rulesCopy
+oldChOffset
, oldChLen
, &status
);
2938 if(U_FAILURE(status
)) {
2942 uint32_t CE
= UCOL_NO_MORE_CES
;
2944 /* before we start screaming, let's see if there is a problem with the rules */
2946 uprv_init_collIterate(coll
, rulesCopy
+oldChOffset
, oldChLen
, &s
);
2948 CE
= ucol_getNextCE(coll
, &s
, &status
);
2950 for(i
= 0; i
< oldChLen
; i
++) {
2951 j
= sprintf(buf
, "%04X ", *(rulesCopy
+oldChOffset
+i
));
2954 if(status
== U_PRIMARY_TOO_LONG_ERROR
) {
2955 log_verbose("= Expected failure for %s =", buffer
);
2957 if(s
.pos
== s
.endp
) {
2958 log_err("Unexpected failure setting variable top at offset %d. Error %s. Codepoints: %s\n",
2959 oldChOffset
, u_errorName(status
), buffer
);
2961 log_verbose("There is a goofy contraction in UCA rules that does not appear in the fractional UCA. Codepoints: %s\n",
2966 varTop2
= ucol_getVariableTop(coll
, &status
);
2967 if((varTop1
& 0xFFFF0000) != (varTop2
& 0xFFFF0000)) {
2968 log_err("cannot retrieve set varTop value!\n");
2972 if((varTop1
& 0xFFFF0000) > 0 && oldExLen
== 0) {
2974 u_strncpy(first
, rulesCopy
+oldChOffset
, oldChLen
);
2975 u_strncpy(first
+oldChLen
, rulesCopy
+chOffset
, chLen
);
2976 u_strncpy(first
+oldChLen
+chLen
, rulesCopy
+oldChOffset
, oldChLen
);
2977 first
[2*oldChLen
+chLen
] = 0;
2980 u_strncpy(second
, rulesCopy
+chOffset
, chLen
);
2982 } else { /* This is skipped momentarily, but should work once UCARules are fully UCA conformant */
2983 u_strncpy(second
, rulesCopy
+oldExOffset
, oldExLen
);
2984 u_strncpy(second
+oldChLen
, rulesCopy
+chOffset
, chLen
);
2985 u_strncpy(second
+oldChLen
+chLen
, rulesCopy
+oldExOffset
, oldExLen
);
2986 second
[2*oldExLen
+chLen
] = 0;
2988 result
= ucol_strcoll(coll
, first
, -1, second
, -1);
2989 if(result
== UCOL_EQUAL
) {
2990 doTest(coll
, first
, second
, UCOL_EQUAL
);
2992 log_verbose("Suspicious strcoll result for %04X and %04X\n", *(rulesCopy
+oldChOffset
), *(rulesCopy
+chOffset
));
2996 if(strength
!= UCOL_TOK_RESET
) {
2997 oldChOffset
= chOffset
;
2999 oldExOffset
= exOffset
;
3003 status
= U_ZERO_ERROR
;
3006 log_err("Unexpected failure getting rules %s\n", u_errorName(status
));
3009 if (U_FAILURE(status
)) {
3010 log_err("Error parsing rules %s\n", u_errorName(status
));
3013 status
= U_ZERO_ERROR
;
3018 log_verbose("Testing setting variable top to contractions\n");
3020 /* uint32_t tailoredCE = UCOL_NOT_FOUND; */
3021 /*UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->UCAConsts+sizeof(UCAConstants));*/
3022 UChar
*conts
= (UChar
*)((uint8_t *)coll
->image
+ coll
->image
->contractionUCACombos
);
3023 while(*conts
!= 0) {
3024 if(*(conts
+2) == 0) {
3025 varTop1
= ucol_setVariableTop(coll
, conts
, -1, &status
);
3027 varTop1
= ucol_setVariableTop(coll
, conts
, 3, &status
);
3029 if(U_FAILURE(status
)) {
3030 log_err("Couldn't set variable top to a contraction %04X %04X %04X\n",
3031 *conts
, *(conts
+1), *(conts
+2));
3032 status
= U_ZERO_ERROR
;
3037 status
= U_ZERO_ERROR
;
3043 ucol_setVariableTop(coll
, first
, -1, &status
);
3045 if(U_SUCCESS(status
)) {
3046 log_err("Invalid contraction succeded in setting variable top!\n");
3051 log_verbose("Test restoring variable top\n");
3053 status
= U_ZERO_ERROR
;
3054 ucol_restoreVariableTop(coll
, varTopOriginal
, &status
);
3055 if(varTopOriginal
!= ucol_getVariableTop(coll
, &status
)) {
3056 log_err("Couldn't restore old variable top\n");
3059 log_verbose("Testing calling with error set\n");
3061 status
= U_INTERNAL_PROGRAM_ERROR
;
3062 varTop1
= ucol_setVariableTop(coll
, first
, 1, &status
);
3063 varTop2
= ucol_getVariableTop(coll
, &status
);
3064 ucol_restoreVariableTop(coll
, varTop2
, &status
);
3065 varTop1
= ucol_setVariableTop(NULL
, first
, 1, &status
);
3066 varTop2
= ucol_getVariableTop(NULL
, &status
);
3067 ucol_restoreVariableTop(NULL
, varTop2
, &status
);
3068 if(status
!= U_INTERNAL_PROGRAM_ERROR
) {
3069 log_err("Bad reaction to passed error!\n");
3074 log_data_err("Couldn't open UCA collator\n");
3079 static void TestNonChars(void) {
3080 static const char *test
[] = {
3082 "\\uFFFE", "\\uFFFF",
3083 "\\U0001FFFE", "\\U0001FFFF",
3084 "\\U0002FFFE", "\\U0002FFFF",
3085 "\\U0003FFFE", "\\U0003FFFF",
3086 "\\U0004FFFE", "\\U0004FFFF",
3087 "\\U0005FFFE", "\\U0005FFFF",
3088 "\\U0006FFFE", "\\U0006FFFF",
3089 "\\U0007FFFE", "\\U0007FFFF",
3090 "\\U0008FFFE", "\\U0008FFFF",
3091 "\\U0009FFFE", "\\U0009FFFF",
3092 "\\U000AFFFE", "\\U000AFFFF",
3093 "\\U000BFFFE", "\\U000BFFFF",
3094 "\\U000CFFFE", "\\U000CFFFF",
3095 "\\U000DFFFE", "\\U000DFFFF",
3096 "\\U000EFFFE", "\\U000EFFFF",
3097 "\\U000FFFFE", "\\U000FFFFF",
3098 "\\U0010FFFE", "\\U0010FFFF"
3100 UErrorCode status
= U_ZERO_ERROR
;
3101 UCollator
*coll
= ucol_open("en_US", &status
);
3103 log_verbose("Test non characters\n");
3105 if(U_SUCCESS(status
)) {
3106 genericOrderingTestWithResult(coll
, test
, 35, UCOL_EQUAL
);
3108 log_err("Unable to open collator\n");
3114 static void TestExtremeCompression(void) {
3115 static char *test
[4];
3116 int32_t j
= 0, i
= 0;
3118 for(i
= 0; i
<4; i
++) {
3119 test
[i
] = (char *)malloc(2048*sizeof(char));
3122 for(j
= 20; j
< 500; j
++) {
3123 for(i
= 0; i
<4; i
++) {
3124 uprv_memset(test
[i
], 'a', (j
-1)*sizeof(char));
3125 test
[i
][j
-1] = (char)('a'+i
);
3128 genericLocaleStarter("en_US", (const char **)test
, 4);
3132 for(i
= 0; i
<4; i
++) {
3138 static void TestExtremeCompression(void) {
3139 static char *test
[4];
3140 int32_t j
= 0, i
= 0;
3141 UErrorCode status
= U_ZERO_ERROR
;
3142 UCollator
*coll
= ucol_open("en_US", status
);
3143 for(i
= 0; i
<4; i
++) {
3144 test
[i
] = (char *)malloc(2048*sizeof(char));
3146 for(j
= 10; j
< 2048; j
++) {
3147 for(i
= 0; i
<4; i
++) {
3148 uprv_memset(test
[i
], 'a', (j
-2)*sizeof(char));
3149 test
[i
][j
-1] = (char)('a'+i
);
3153 genericLocaleStarter("en_US", (const char **)test
, 4);
3155 for(j
= 10; j
< 2048; j
++) {
3156 for(i
= 0; i
<1; i
++) {
3157 uprv_memset(test
[i
], 'a', (j
-1)*sizeof(char));
3161 for(i
= 0; i
<4; i
++) {
3167 static void TestSurrogates(void) {
3168 static const char *test
[] = {
3169 "z","\\ud900\\udc25", "\\ud805\\udc50",
3170 "\\ud800\\udc00y", "\\ud800\\udc00r",
3171 "\\ud800\\udc00f", "\\ud800\\udc00",
3172 "\\ud800\\udc00c", "\\ud800\\udc00b",
3173 "\\ud800\\udc00fa", "\\ud800\\udc00fb",
3178 static const char *rule
=
3179 "&z < \\ud900\\udc25 < \\ud805\\udc50"
3180 "< \\ud800\\udc00y < \\ud800\\udc00r"
3181 "< \\ud800\\udc00f << \\ud800\\udc00"
3182 "< \\ud800\\udc00fa << \\ud800\\udc00fb"
3183 "< \\ud800\\udc00a < c < b" ;
3185 genericRulesStarter(rule
, test
, 14);
3188 /* This is a test for prefix implementation, used by JIS X 4061 collation rules */
3189 static void TestPrefix(void) {
3194 const char *data
[50];
3204 "&z<<<\\ud900\\udc25|a",
3205 {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 },
3209 for(i
= 0; i
<(sizeof(tests
)/sizeof(tests
[0])); i
++) {
3210 genericRulesStarter(tests
[i
].rules
, tests
[i
].data
, tests
[i
].len
);
3214 /* This test uses data suplied by Masashiko Maedera to test the implementation */
3215 /* JIS X 4061 collation order implementation */
3216 static void TestNewJapanese(void) {
3218 static const char *test1
[] = {
3219 "\\u30b7\\u30e3\\u30fc\\u30ec",
3220 "\\u30b7\\u30e3\\u30a4",
3221 "\\u30b7\\u30e4\\u30a3",
3222 "\\u30b7\\u30e3\\u30ec",
3223 "\\u3061\\u3087\\u3053",
3224 "\\u3061\\u3088\\u3053",
3225 "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8",
3226 "\\u3066\\u30fc\\u305f",
3227 "\\u30c6\\u30fc\\u30bf",
3228 "\\u30c6\\u30a7\\u30bf",
3229 "\\u3066\\u3048\\u305f",
3230 "\\u3067\\u30fc\\u305f",
3231 "\\u30c7\\u30fc\\u30bf",
3232 "\\u30c7\\u30a7\\u30bf",
3233 "\\u3067\\u3048\\u305f",
3234 "\\u3066\\u30fc\\u305f\\u30fc",
3235 "\\u30c6\\u30fc\\u30bf\\u30a1",
3236 "\\u30c6\\u30a7\\u30bf\\u30fc",
3237 "\\u3066\\u3047\\u305f\\u3041",
3238 "\\u3066\\u3048\\u305f\\u30fc",
3239 "\\u3067\\u30fc\\u305f\\u30fc",
3240 "\\u30c7\\u30fc\\u30bf\\u30a1",
3241 "\\u3067\\u30a7\\u305f\\u30a1",
3242 "\\u30c7\\u3047\\u30bf\\u3041",
3243 "\\u30c7\\u30a8\\u30bf\\u30a2",
3245 "\\u3073\\u3085\\u3042",
3246 "\\u3074\\u3085\\u3042",
3247 "\\u3073\\u3085\\u3042\\u30fc",
3248 "\\u30d3\\u30e5\\u30a2\\u30fc",
3249 "\\u3074\\u3085\\u3042\\u30fc",
3250 "\\u30d4\\u30e5\\u30a2\\u30fc",
3251 "\\u30d2\\u30e5\\u30a6",
3252 "\\u30d2\\u30e6\\u30a6",
3253 "\\u30d4\\u30e5\\u30a6\\u30a2",
3254 "\\u3073\\u3085\\u30fc\\u3042\\u30fc",
3255 "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc",
3256 "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc",
3257 "\\u3072\\u3085\\u3093",
3258 "\\u3074\\u3085\\u3093",
3259 "\\u3075\\u30fc\\u308a",
3260 "\\u30d5\\u30fc\\u30ea",
3261 "\\u3075\\u3045\\u308a",
3262 "\\u3075\\u30a5\\u308a",
3263 "\\u3075\\u30a5\\u30ea",
3264 "\\u30d5\\u30a6\\u30ea",
3265 "\\u3076\\u30fc\\u308a",
3266 "\\u30d6\\u30fc\\u30ea",
3267 "\\u3076\\u3045\\u308a",
3268 "\\u30d6\\u30a5\\u308a",
3269 "\\u3077\\u3046\\u308a",
3270 "\\u30d7\\u30a6\\u30ea",
3271 "\\u3075\\u30fc\\u308a\\u30fc",
3272 "\\u30d5\\u30a5\\u30ea\\u30fc",
3273 "\\u3075\\u30a5\\u308a\\u30a3",
3274 "\\u30d5\\u3045\\u308a\\u3043",
3275 "\\u30d5\\u30a6\\u30ea\\u30fc",
3276 "\\u3075\\u3046\\u308a\\u3043",
3277 "\\u30d6\\u30a6\\u30ea\\u30a4",
3278 "\\u3077\\u30fc\\u308a\\u30fc",
3279 "\\u3077\\u30a5\\u308a\\u30a4",
3280 "\\u3077\\u3046\\u308a\\u30fc",
3281 "\\u30d7\\u30a6\\u30ea\\u30a4",
3297 static const char *test2
[] = {
3298 "\\u306f\\u309d", /* H\\u309d */
3299 "\\u30cf\\u30fd", /* K\\u30fd */
3300 "\\u306f\\u306f", /* HH */
3301 "\\u306f\\u30cf", /* HK */
3302 "\\u30cf\\u30cf", /* KK */
3303 "\\u306f\\u309e", /* H\\u309e */
3304 "\\u30cf\\u30fe", /* K\\u30fe */
3305 "\\u306f\\u3070", /* HH\\u309b */
3306 "\\u30cf\\u30d0", /* KK\\u309b */
3307 "\\u306f\\u3071", /* HH\\u309c */
3308 "\\u30cf\\u3071", /* KH\\u309c */
3309 "\\u30cf\\u30d1", /* KK\\u309c */
3310 "\\u3070\\u309d", /* H\\u309b\\u309d */
3311 "\\u30d0\\u30fd", /* K\\u309b\\u30fd */
3312 "\\u3070\\u306f", /* H\\u309bH */
3313 "\\u30d0\\u30cf", /* K\\u309bK */
3314 "\\u3070\\u309e", /* H\\u309b\\u309e */
3315 "\\u30d0\\u30fe", /* K\\u309b\\u30fe */
3316 "\\u3070\\u3070", /* H\\u309bH\\u309b */
3317 "\\u30d0\\u3070", /* K\\u309bH\\u309b */
3318 "\\u30d0\\u30d0", /* K\\u309bK\\u309b */
3319 "\\u3070\\u3071", /* H\\u309bH\\u309c */
3320 "\\u30d0\\u30d1", /* K\\u309bK\\u309c */
3321 "\\u3071\\u309d", /* H\\u309c\\u309d */
3322 "\\u30d1\\u30fd", /* K\\u309c\\u30fd */
3323 "\\u3071\\u306f", /* H\\u309cH */
3324 "\\u30d1\\u30cf", /* K\\u309cK */
3325 "\\u3071\\u3070", /* H\\u309cH\\u309b */
3326 "\\u3071\\u30d0", /* H\\u309cK\\u309b */
3327 "\\u30d1\\u30d0", /* K\\u309cK\\u309b */
3328 "\\u3071\\u3071", /* H\\u309cH\\u309c */
3329 "\\u30d1\\u30d1", /* K\\u309cK\\u309c */
3332 static const char *test3[] = {
3360 "\\u30b7\\u30e3\\u30fc\\u30ec",
3363 static const UColAttribute att
[] = { UCOL_STRENGTH
};
3364 static const UColAttributeValue val
[] = { UCOL_QUATERNARY
};
3366 static const UColAttribute attShifted
[] = { UCOL_STRENGTH
, UCOL_ALTERNATE_HANDLING
};
3367 static const UColAttributeValue valShifted
[] = { UCOL_QUATERNARY
, UCOL_SHIFTED
};
3369 genericLocaleStarterWithOptions("ja", test1
, sizeof(test1
)/sizeof(test1
[0]), att
, val
, 1);
3370 genericLocaleStarterWithOptions("ja", test2
, sizeof(test2
)/sizeof(test2
[0]), att
, val
, 1);
3371 /*genericLocaleStarter("ja", test3, sizeof(test3)/sizeof(test3[0]));*/
3372 genericLocaleStarterWithOptions("ja", test1
, sizeof(test1
)/sizeof(test1
[0]), attShifted
, valShifted
, 2);
3373 genericLocaleStarterWithOptions("ja", test2
, sizeof(test2
)/sizeof(test2
[0]), attShifted
, valShifted
, 2);
3376 static void TestStrCollIdenticalPrefix(void) {
3377 const char* rule
= "&\\ud9b0\\udc70=\\ud9b0\\udc71";
3378 const char* test
[] = {
3382 genericRulesStarterWithResult(rule
, test
, sizeof(test
)/sizeof(test
[0]), UCOL_EQUAL
);
3384 /* Contractions should have all their canonically equivalent */
3385 /* strings included */
3386 static void TestContractionClosure(void) {
3389 const char *data
[50];
3392 { "&b=\\u00e4\\u00e4",
3393 { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\\u00e4" }, 5},
3395 { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4},
3400 for(i
= 0; i
<(sizeof(tests
)/sizeof(tests
[0])); i
++) {
3401 genericRulesStarterWithResult(tests
[i
].rules
, tests
[i
].data
, tests
[i
].len
, UCOL_EQUAL
);
3405 /* This tests also fails*/
3406 static void TestBeforePrefixFailure(void) {
3409 const char *data
[50];
3413 "&[before 3]\\uff41 <<< x",
3414 {"x", "\\uff41"}, 2 },
3415 { "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3416 "&\\u30A8=\\u30A8=\\u3048=\\uff74"
3417 "&[before 3]\\u30a7<<<\\u30a9",
3418 {"\\u30a9", "\\u30a7"}, 2 },
3419 { "&[before 3]\\u30a7<<<\\u30a9"
3420 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3421 "&\\u30A8=\\u30A8=\\u3048=\\uff74",
3422 {"\\u30a9", "\\u30a7"}, 2 },
3427 for(i
= 0; i
<(sizeof(tests
)/sizeof(tests
[0])); i
++) {
3428 genericRulesStarter(tests
[i
].rules
, tests
[i
].data
, tests
[i
].len
);
3433 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3434 "&\\u30A8=\\u30A8=\\u3048=\\uff74"
3435 "&[before 3]\\u30a7<<<\\u30c6|\\u30fc";
3437 "&[before 3]\\u30a7<<<\\u30c6|\\u30fc"
3438 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3439 "&\\u30A8=\\u30A8=\\u3048=\\uff74";
3440 const char* test
[] = {
3441 "\\u30c6\\u30fc\\u30bf",
3442 "\\u30c6\\u30a7\\u30bf",
3444 genericRulesStarter(rule1
, test
, sizeof(test
)/sizeof(test
[0]));
3445 genericRulesStarter(rule2
, test
, sizeof(test
)/sizeof(test
[0]));
3446 /* this piece of code should be in some sort of verbose mode */
3447 /* it gets the collation elements for elements and prints them */
3448 /* This is useful when trying to see whether the problem is */
3450 UErrorCode status
= U_ZERO_ERROR
;
3452 UCollationElements
*it
= NULL
;
3455 uint32_t uStringLen
;
3456 UCollator
*coll
= NULL
;
3458 uStringLen
= u_unescape(rule1
, string
, 256);
3460 coll
= ucol_openRules(string
, uStringLen
, UCOL_DEFAULT
, UCOL_DEFAULT
, NULL
, &status
);
3462 /*coll = ucol_open("ja_JP_JIS", &status);*/
3463 it
= ucol_openElements(coll
, string
, 0, &status
);
3465 for(i
= 0; i
< sizeof(test
)/sizeof(test
[0]); i
++) {
3466 log_verbose("%s\n", test
[i
]);
3467 uStringLen
= u_unescape(test
[i
], string
, 256);
3468 ucol_setText(it
, string
, uStringLen
, &status
);
3470 while((CE
=ucol_next(it
, &status
)) != UCOL_NULLORDER
) {
3471 log_verbose("%08X\n", CE
);
3477 ucol_closeElements(it
);
3483 static void TestPrefixCompose(void) {
3485 "&\\u30a7<<<\\u30ab|\\u30fc=\\u30ac|\\u30fc";
3487 const char* test[] = {
3488 "\\u30c6\\u30fc\\u30bf",
3489 "\\u30c6\\u30a7\\u30bf",
3493 UErrorCode status
= U_ZERO_ERROR
;
3495 /*UCollationElements *it = NULL;*/
3498 uint32_t uStringLen
;
3499 UCollator
*coll
= NULL
;
3501 uStringLen
= u_unescape(rule1
, string
, 256);
3503 coll
= ucol_openRules(string
, uStringLen
, UCOL_DEFAULT
, UCOL_DEFAULT
, NULL
, &status
);
3511 [last variable] last variable value
3512 [last primary ignorable] largest CE for primary ignorable
3513 [last secondary ignorable] largest CE for secondary ignorable
3514 [last tertiary ignorable] largest CE for tertiary ignorable
3515 [top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8)
3518 static void TestRuleOptions(void) {
3519 /* values here are hardcoded and are correct for the current UCA
3520 * when the UCA changes, one might be forced to change these
3521 * values. (\\u02d0, \\U00010FFFC etc...)
3525 const char *data
[50];
3528 /* - all befores here amount to zero */
3529 { "&[before 3][first tertiary ignorable]<<<a",
3530 { "\\u0000", "a"}, 2
3531 }, /* you cannot go before first tertiary ignorable */
3533 { "&[before 3][last tertiary ignorable]<<<a",
3534 { "\\u0000", "a"}, 2
3535 }, /* you cannot go before last tertiary ignorable */
3537 { "&[before 3][first secondary ignorable]<<<a",
3538 { "\\u0000", "a"}, 2
3539 }, /* you cannot go before first secondary ignorable */
3541 { "&[before 3][last secondary ignorable]<<<a",
3542 { "\\u0000", "a"}, 2
3543 }, /* you cannot go before first secondary ignorable */
3545 /* 'normal' befores */
3547 { "&[before 3][first primary ignorable]<<<c<<<b &[first primary ignorable]<a",
3548 { "c", "b", "\\u0332", "a" }, 4
3551 /* we don't have a code point that corresponds to
3552 * the last primary ignorable
3554 { "&[before 3][last primary ignorable]<<<c<<<b &[last primary ignorable]<a",
3555 { "\\u0332", "\\u20e3", "c", "b", "a" }, 5
3558 { "&[before 3][first variable]<<<c<<<b &[first variable]<a",
3559 { "c", "b", "\\u0009", "a", "\\u000a" }, 5
3562 { "&[last variable]<a &[before 3][last variable]<<<c<<<b ",
3563 { "c", "b", "\\uD834\\uDF71", "a", "\\u02d0" }, 5
3566 { "&[first regular]<a"
3567 "&[before 1][first regular]<b",
3568 { "b", "\\u02d0", "a", "\\u02d1"}, 4
3571 { "&[before 1][last regular]<b"
3572 "&[last regular]<a",
3573 { "b", "\\uD808\\uDF6E", "a", "\\u4e00" }, 4
3576 { "&[before 1][first implicit]<b"
3577 "&[first implicit]<a",
3578 { "b", "\\u4e00", "a", "\\u4e01"}, 4
3581 { "&[before 1][last implicit]<b"
3582 "&[last implicit]<a",
3583 { "b", "\\U0010FFFD", "a" }, 3
3586 { "&[last variable]<z"
3587 "&[last primary ignorable]<x"
3588 "&[last secondary ignorable]<<y"
3589 "&[last tertiary ignorable]<<<w"
3591 {"\\ufffb", "w", "y", "\\u20e3", "x", "\\u137c", "z", "u"}, 7
3598 for(i
= 0; i
<(sizeof(tests
)/sizeof(tests
[0])); i
++) {
3599 genericRulesStarter(tests
[i
].rules
, tests
[i
].data
, tests
[i
].len
);
3604 static void TestOptimize(void) {
3605 /* this is not really a test - just trying out
3606 * whether copying of UCA contents will fail
3607 * Cannot really test, since the functionality
3612 const char *data
[50];
3615 /* - all befores here amount to zero */
3616 { "[optimize [\\uAC00-\\uD7FF]]",
3621 for(i
= 0; i
<(sizeof(tests
)/sizeof(tests
[0])); i
++) {
3622 genericRulesStarter(tests
[i
].rules
, tests
[i
].data
, tests
[i
].len
);
3627 cycheng@ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator.
3628 weiv ucol_strcollIter?
3629 cycheng@ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021
3630 weiv these are the input strings?
3631 cycheng@ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on, we have s1 > s2
3632 weiv will check - could be a problem with utf-8 iterator
3633 cycheng@ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2
3635 cycheng@ca.ibm.c... note that we have a standalone high surrogate
3636 weiv that doesn't sound right
3637 cycheng@ca.ibm.c... we got the same inconsistent results on AIX and Win2000
3638 weiv so you have two strings, you convert them to utf-8 and to utf-16BE
3639 cycheng@ca.ibm.c... yes
3640 weiv and then do the comparison
3641 cycheng@ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be
3642 weiv utf-16 strings look like a little endian ones in the example you sent me
3643 weiv It could be a bug - let me try to test it out
3644 cycheng@ca.ibm.c... ok
3645 cycheng@ca.ibm.c... we can wait till the conf. call
3646 cycheng@ca.ibm.c... next weke
3647 weiv that would be great
3649 weiv I might be wrong
3650 weiv let me play with it some more
3651 cycheng@ca.ibm.c... ok
3652 cycheng@ca.ibm.c... also please check s3 = 0x0e3a0062 and s4 = 0x0e400021. both are in utf-16be
3653 cycheng@ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that's built for db2
3654 cycheng@ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be
3656 cycheng@ca.ibm.c... i ask sherman to send you more inconsistent data
3658 cycheng@ca.ibm.c... the 4 strings we sent are just samples
3661 static void Alexis(void) {
3662 UErrorCode status
= U_ZERO_ERROR
;
3663 UCollator
*coll
= ucol_open("", &status
);
3666 const char utf16be
[2][4] = {
3667 { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 },
3668 { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 }
3671 const char utf8
[2][4] = {
3672 { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 },
3673 { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 },
3676 UCharIterator iterU161
, iterU162
;
3677 UCharIterator iterU81
, iterU82
;
3679 UCollationResult resU16
, resU8
;
3681 uiter_setUTF16BE(&iterU161
, utf16be
[0], 4);
3682 uiter_setUTF16BE(&iterU162
, utf16be
[1], 4);
3684 uiter_setUTF8(&iterU81
, utf8
[0], 4);
3685 uiter_setUTF8(&iterU82
, utf8
[1], 4);
3687 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
3689 resU16
= ucol_strcollIter(coll
, &iterU161
, &iterU162
, &status
);
3690 resU8
= ucol_strcollIter(coll
, &iterU81
, &iterU82
, &status
);
3693 if(resU16
!= resU8
) {
3694 log_err("different results\n");
3701 #define CMSCOLL_ALEXIS2_BUFFER_SIZE 256
3702 static void Alexis2(void) {
3703 UErrorCode status
= U_ZERO_ERROR
;
3704 UChar U16Source
[CMSCOLL_ALEXIS2_BUFFER_SIZE
], U16Target
[CMSCOLL_ALEXIS2_BUFFER_SIZE
];
3705 char U16BESource
[CMSCOLL_ALEXIS2_BUFFER_SIZE
], U16BETarget
[CMSCOLL_ALEXIS2_BUFFER_SIZE
];
3706 char U8Source
[CMSCOLL_ALEXIS2_BUFFER_SIZE
], U8Target
[CMSCOLL_ALEXIS2_BUFFER_SIZE
];
3707 int32_t U16LenS
= 0, U16LenT
= 0, U16BELenS
= 0, U16BELenT
= 0, U8LenS
= 0, U8LenT
= 0;
3709 UConverter
*conv
= NULL
;
3711 UCharIterator U16BEItS
, U16BEItT
;
3712 UCharIterator U8ItS
, U8ItT
;
3714 UCollationResult resU16
, resU16BE
, resU8
;
3716 const char* pairs
[][2] = {
3717 { "\\ud800\\u0021", "\\uFFFC\\u0062"},
3718 { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" },
3719 { "\\u0E40\\u0021", "\\u00A1\\u0021"},
3720 { "\\u0E40\\u0021", "\\uFE57\\u0062"},
3721 { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"},
3722 { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"},
3723 { "\\u0020", "\\u0020\\u0000"}
3725 5F20 (my result here)
3727 5F20 (your result here)
3733 UCollator
*coll
= ucol_open("", &status
);
3734 if(status
== U_FILE_ACCESS_ERROR
) {
3735 log_data_err("Is your data around?\n");
3737 } else if(U_FAILURE(status
)) {
3738 log_err("Error opening collator\n");
3741 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
3742 conv
= ucnv_open("UTF16BE", &status
);
3743 for(i
= 0; i
< sizeof(pairs
)/sizeof(pairs
[0]); i
++) {
3744 U16LenS
= u_unescape(pairs
[i
][0], U16Source
, CMSCOLL_ALEXIS2_BUFFER_SIZE
);
3745 U16LenT
= u_unescape(pairs
[i
][1], U16Target
, CMSCOLL_ALEXIS2_BUFFER_SIZE
);
3747 resU16
= ucol_strcoll(coll
, U16Source
, U16LenS
, U16Target
, U16LenT
);
3749 log_verbose("Result of strcoll is %i\n", resU16
);
3751 U16BELenS
= ucnv_fromUChars(conv
, U16BESource
, CMSCOLL_ALEXIS2_BUFFER_SIZE
, U16Source
, U16LenS
, &status
);
3752 U16BELenT
= ucnv_fromUChars(conv
, U16BETarget
, CMSCOLL_ALEXIS2_BUFFER_SIZE
, U16Target
, U16LenT
, &status
);
3754 /* use the original sizes, as the result from converter is in bytes */
3755 uiter_setUTF16BE(&U16BEItS
, U16BESource
, U16LenS
);
3756 uiter_setUTF16BE(&U16BEItT
, U16BETarget
, U16LenT
);
3758 resU16BE
= ucol_strcollIter(coll
, &U16BEItS
, &U16BEItT
, &status
);
3760 log_verbose("Result of U16BE is %i\n", resU16BE
);
3762 if(resU16
!= resU16BE
) {
3763 log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", pairs
[i
][0], pairs
[i
][1]);
3766 u_strToUTF8(U8Source
, CMSCOLL_ALEXIS2_BUFFER_SIZE
, &U8LenS
, U16Source
, U16LenS
, &status
);
3767 u_strToUTF8(U8Target
, CMSCOLL_ALEXIS2_BUFFER_SIZE
, &U8LenT
, U16Target
, U16LenT
, &status
);
3769 uiter_setUTF8(&U8ItS
, U8Source
, U8LenS
);
3770 uiter_setUTF8(&U8ItT
, U8Target
, U8LenT
);
3772 resU8
= ucol_strcollIter(coll
, &U8ItS
, &U8ItT
, &status
);
3774 if(resU16
!= resU8
) {
3775 log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pairs
[i
][0], pairs
[i
][1]);
3784 static void TestHebrewUCA(void) {
3785 UErrorCode status
= U_ZERO_ERROR
;
3786 const char *first
[] = {
3787 "d790d6b8d79cd795d6bcd7a9",
3788 "d790d79cd79ed7a7d799d799d7a1",
3789 "d790d6b4d79ed795d6bcd7a9",
3792 char utf8String
[3][256];
3793 UChar utf16String
[3][256];
3795 int32_t i
= 0, j
= 0;
3796 int32_t sizeUTF8
[3];
3797 int32_t sizeUTF16
[3];
3799 UCollator
*coll
= ucol_open("", &status
);
3800 /*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/
3802 for(i
= 0; i
< sizeof(first
)/sizeof(first
[0]); i
++) {
3803 sizeUTF8
[i
] = u_parseUTF8(first
[i
], -1, utf8String
[i
], 256, &status
);
3804 u_strFromUTF8(utf16String
[i
], 256, &sizeUTF16
[i
], utf8String
[i
], sizeUTF8
[i
], &status
);
3805 log_verbose("%i: ");
3806 for(j
= 0; j
< sizeUTF16
[i
]; j
++) {
3807 /*log_verbose("\\u%04X", utf16String[i][j]);*/
3808 log_verbose("%04X", utf16String
[i
][j
]);
3812 for(i
= 0; i
< sizeof(first
)/sizeof(first
[0])-1; i
++) {
3813 for(j
= i
+ 1; j
< sizeof(first
)/sizeof(first
[0]); j
++) {
3814 doTest(coll
, utf16String
[i
], utf16String
[j
], UCOL_LESS
);
3822 static void TestPartialSortKeyTermination(void) {
3823 const char* cases
[] = {
3824 "\\u1234\\u1234\\udc00",
3825 "\\udc00\\ud800\\ud800"
3828 int32_t i
= sizeof(UCollator
);
3830 UErrorCode status
= U_ZERO_ERROR
;
3832 UCollator
*coll
= ucol_open("", &status
);
3836 UChar currCase
[256];
3838 int32_t pKeyLen
= 0;
3842 for(i
= 0; i
< sizeof(cases
)/sizeof(cases
[0]); i
++) {
3843 uint32_t state
[2] = {0, 0};
3844 length
= u_unescape(cases
[i
], currCase
, 256);
3845 uiter_setString(&iter
, currCase
, length
);
3846 pKeyLen
= ucol_nextSortKeyPart(coll
, &iter
, state
, key
, 256, &status
);
3848 log_verbose("Done\n");
3854 static void TestSettings(void) {
3855 const char* cases
[] = {
3860 const char* locales
[] = {
3865 UErrorCode status
= U_ZERO_ERROR
;
3867 int32_t i
= 0, j
= 0;
3869 UChar source
[256], target
[256];
3870 int32_t sLen
= 0, tLen
= 0;
3872 UCollator
*collateObject
= NULL
;
3873 for(i
= 0; i
< sizeof(locales
)/sizeof(locales
[0]); i
++) {
3874 collateObject
= ucol_open(locales
[i
], &status
);
3875 ucol_setStrength(collateObject
, UCOL_PRIMARY
);
3876 ucol_setAttribute(collateObject
, UCOL_CASE_LEVEL
, UCOL_OFF
, &status
);
3877 for(j
= 1; j
< sizeof(cases
)/sizeof(cases
[0]); j
++) {
3878 sLen
= u_unescape(cases
[j
-1], source
, 256);
3880 tLen
= u_unescape(cases
[j
], target
, 256);
3882 doTest(collateObject
, source
, target
, UCOL_EQUAL
);
3884 ucol_close(collateObject
);
3888 static int32_t TestEqualsForCollator(const char* locName
, UCollator
*source
, UCollator
*target
) {
3889 UErrorCode status
= U_ZERO_ERROR
;
3890 int32_t errorNo
= 0;
3891 /*const UChar *sourceRules = NULL;*/
3892 /*int32_t sourceRulesLen = 0;*/
3893 UColAttributeValue french
= UCOL_OFF
;
3894 int32_t cloneSize
= 0;
3896 if(!ucol_equals(source
, target
)) {
3897 log_err("Same collators, different address not equal\n");
3901 if(uprv_strcmp(ucol_getLocale(source
, ULOC_REQUESTED_LOCALE
, &status
), ucol_getLocale(source
, ULOC_ACTUAL_LOCALE
, &status
)) == 0) {
3902 /* currently, safeClone is implemented through getRules/openRules
3903 * so it is the same as the test below - I will comment that test out.
3906 target
= ucol_safeClone(source
, NULL
, &cloneSize
, &status
);
3907 if(U_FAILURE(status
)) {
3908 log_err("Error creating clone\n");
3912 if(!ucol_equals(source
, target
)) {
3913 log_err("Collator different from it's clone\n");
3916 french
= ucol_getAttribute(source
, UCOL_FRENCH_COLLATION
, &status
);
3917 if(french
== UCOL_ON
) {
3918 ucol_setAttribute(target
, UCOL_FRENCH_COLLATION
, UCOL_OFF
, &status
);
3920 ucol_setAttribute(target
, UCOL_FRENCH_COLLATION
, UCOL_ON
, &status
);
3922 if(U_FAILURE(status
)) {
3923 log_err("Error setting attributes\n");
3927 if(ucol_equals(source
, target
)) {
3928 log_err("Collators same even when options changed\n");
3932 /* commented out since safeClone uses exactly the same technique */
3934 sourceRules = ucol_getRules(source, &sourceRulesLen);
3935 target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
3936 if(U_FAILURE(status)) {
3937 log_err("Error instantiating target from rules\n");
3941 if(!ucol_equals(source, target)) {
3942 log_err("Collator different from collator that was created from the same rules\n");
3952 static void TestEquals(void) {
3953 /* ucol_equals is not currently a public API. There is a chance that it will become
3954 * something like this, but currently it is only used by RuleBasedCollator::operator==
3956 /* test whether the two collators instantiated from the same locale are equal */
3957 UErrorCode status
= U_ZERO_ERROR
;
3958 UParseError parseError
;
3959 int32_t noOfLoc
= uloc_countAvailable();
3960 const char *locName
= NULL
;
3961 UCollator
*source
= NULL
, *target
= NULL
;
3964 const char* rules
[] = {
3965 "&l < lj <<< Lj <<< LJ",
3966 "&n < nj <<< Nj <<< NJ",
3971 const char* badRules[] = {
3973 "&n < nj <<< nJ <<< NJ",
3975 "&AE <<< \\u00c4 <<< x"
3979 UChar sourceRules
[1024], targetRules
[1024];
3980 int32_t sourceRulesSize
= 0, targetRulesSize
= 0;
3981 int32_t rulesSize
= sizeof(rules
)/sizeof(rules
[0]);
3983 for(i
= 0; i
< rulesSize
; i
++) {
3984 sourceRulesSize
+= u_unescape(rules
[i
], sourceRules
+sourceRulesSize
, 1024 - sourceRulesSize
);
3985 targetRulesSize
+= u_unescape(rules
[rulesSize
-i
-1], targetRules
+targetRulesSize
, 1024 - targetRulesSize
);
3988 source
= ucol_openRules(sourceRules
, sourceRulesSize
, UCOL_DEFAULT
, UCOL_DEFAULT
, &parseError
, &status
);
3989 if(status
== U_FILE_ACCESS_ERROR
) {
3990 log_data_err("Is your data around?\n");
3992 } else if(U_FAILURE(status
)) {
3993 log_err("Error opening collator\n");
3996 target
= ucol_openRules(targetRules
, targetRulesSize
, UCOL_DEFAULT
, UCOL_DEFAULT
, &parseError
, &status
);
3997 if(!ucol_equals(source
, target
)) {
3998 log_err("Equivalent collators not equal!\n");
4003 source
= ucol_open("root", &status
);
4004 target
= ucol_open("root", &status
);
4005 log_verbose("Testing root\n");
4006 if(!ucol_equals(source
, source
)) {
4007 log_err("Same collator not equal\n");
4009 if(TestEqualsForCollator(locName
, source
, target
)) {
4010 log_err("Errors for root\n", locName
);
4014 for(i
= 0; i
<noOfLoc
; i
++) {
4015 status
= U_ZERO_ERROR
;
4016 locName
= uloc_getAvailable(i
);
4017 /*if(hasCollationElements(locName)) {*/
4018 log_verbose("Testing equality for locale %s\n", locName
);
4019 source
= ucol_open(locName
, &status
);
4020 target
= ucol_open(locName
, &status
);
4021 if(TestEqualsForCollator(locName
, source
, target
)) {
4022 log_err("Errors for locale %s\n", locName
);
4029 static void TestJ2726(void) {
4030 UChar a
[2] = { 0x61, 0x00 }; /*"a"*/
4031 UChar aSpace
[3] = { 0x61, 0x20, 0x00 }; /*"a "*/
4032 UChar spaceA
[3] = { 0x20, 0x61, 0x00 }; /*" a"*/
4033 UErrorCode status
= U_ZERO_ERROR
;
4034 UCollator
*coll
= ucol_open("en", &status
);
4035 ucol_setAttribute(coll
, UCOL_ALTERNATE_HANDLING
, UCOL_SHIFTED
, &status
);
4036 ucol_setAttribute(coll
, UCOL_STRENGTH
, UCOL_PRIMARY
, &status
);
4037 doTest(coll
, a
, aSpace
, UCOL_EQUAL
);
4038 doTest(coll
, aSpace
, a
, UCOL_EQUAL
);
4039 doTest(coll
, a
, spaceA
, UCOL_EQUAL
);
4040 doTest(coll
, spaceA
, a
, UCOL_EQUAL
);
4041 doTest(coll
, spaceA
, aSpace
, UCOL_EQUAL
);
4042 doTest(coll
, aSpace
, spaceA
, UCOL_EQUAL
);
4046 static void NullRule(void) {
4048 UErrorCode status
= U_ZERO_ERROR
;
4049 UCollator
*coll
= ucol_openRules(r
, 1, UCOL_DEFAULT
, UCOL_DEFAULT
, NULL
, &status
);
4050 if(U_SUCCESS(status
)) {
4051 log_err("This should have been an error!\n");
4054 status
= U_ZERO_ERROR
;
4056 coll
= ucol_openRules(r
, 0, UCOL_DEFAULT
, UCOL_DEFAULT
, NULL
, &status
);
4057 if(U_FAILURE(status
)) {
4058 log_err("Empty rules should have produced a valid collator\n");
4065 * Test for CollationElementIterator previous and next for the whole set of
4066 * unicode characters with normalization on.
4068 static void TestNumericCollation(void)
4070 UErrorCode status
= U_ZERO_ERROR
;
4072 const static char *basicTestStrings
[]={
4085 const static char *preZeroTestStrings
[]={
4093 "avery000000010000",
4096 const static char *thirtyTwoBitNumericStrings
[]={
4103 const static char *supplementaryDigits
[] = {
4104 "\\uD835\\uDFCE", /* 0 */
4105 "\\uD835\\uDFCF", /* 1 */
4106 "\\uD835\\uDFD0", /* 2 */
4107 "\\uD835\\uDFD1", /* 3 */
4108 "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */
4109 "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */
4110 "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */
4111 "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */
4112 "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */
4113 "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */
4116 const static char *foreignDigits
[] = {
4131 const static char *evenZeroes
[] = {
4138 UColAttribute att
= UCOL_NUMERIC_COLLATION
;
4139 UColAttributeValue val
= UCOL_ON
;
4141 /* Open our collator. */
4142 UCollator
* coll
= ucol_open("root", &status
);
4143 if (U_FAILURE(status
)){
4144 log_err("ERROR: in using ucol_open()\n %s\n",
4145 myErrorName(status
));
4148 genericLocaleStarterWithOptions("root", basicTestStrings
, sizeof(basicTestStrings
)/sizeof(basicTestStrings
[0]), &att
, &val
, 1);
4149 genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings
, sizeof(thirtyTwoBitNumericStrings
)/sizeof(thirtyTwoBitNumericStrings
[0]), &att
, &val
, 1);
4150 genericLocaleStarterWithOptions("en_US", foreignDigits
, sizeof(foreignDigits
)/sizeof(foreignDigits
[0]), &att
, &val
, 1);
4151 genericLocaleStarterWithOptions("root", supplementaryDigits
, sizeof(supplementaryDigits
)/sizeof(supplementaryDigits
[0]), &att
, &val
, 1);
4152 genericLocaleStarterWithOptions("root", evenZeroes
, sizeof(evenZeroes
)/sizeof(evenZeroes
[0]), &att
, &val
, 1);
4154 /* Setting up our collator to do digits. */
4155 ucol_setAttribute(coll
, UCOL_NUMERIC_COLLATION
, UCOL_ON
, &status
);
4156 if (U_FAILURE(status
)){
4157 log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n",
4158 myErrorName(status
));
4163 Testing that prepended zeroes still yield the correct collation behavior.
4164 We expect that every element in our strings array will be equal.
4166 genericOrderingTestWithResult(coll
, preZeroTestStrings
, sizeof(preZeroTestStrings
)/sizeof(preZeroTestStrings
[0]), UCOL_EQUAL
);
4171 static void TestTibetanConformance(void)
4173 const char* test
[] = {
4174 "\\u0FB2\\u0591\\u0F71\\u0061",
4175 "\\u0FB2\\u0F71\\u0061"
4178 UErrorCode status
= U_ZERO_ERROR
;
4179 UCollator
*coll
= ucol_open("", &status
);
4183 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
4184 if (U_SUCCESS(status
)) {
4185 u_unescape(test
[0], source
, 100);
4186 u_unescape(test
[1], target
, 100);
4187 doTest(coll
, source
, target
, UCOL_EQUAL
);
4188 result
= ucol_strcoll(coll
, source
, -1, target
, -1);
4189 log_verbose("result %d\n", result
);
4190 if (UCOL_EQUAL
!= result
) {
4191 log_err("Tibetan comparison error\n");
4196 genericLocaleStarterWithResult("", test
, 2, UCOL_EQUAL
);
4199 static void TestPinyinProblem(void) {
4200 static const char *test
[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B50" };
4201 genericLocaleStarter("zh__PINYIN", test
, sizeof(test
)/sizeof(test
[0]));
4204 #define TST_UCOL_MAX_INPUT 0x220001
4205 #define topByte 0xFF000000;
4206 #define bottomByte 0xFF;
4207 #define fourBytes 0xFFFFFFFF;
4210 static void showImplicit(UChar32 i
) {
4211 if (i
>= 0 && i
<= TST_UCOL_MAX_INPUT
) {
4212 log_verbose("%08X\t%08X\n", i
, uprv_uca_getImplicitFromRaw(i
));
4216 static void TestImplicitGeneration(void) {
4217 UErrorCode status
= U_ZERO_ERROR
;
4220 UChar32 i
= 0, j
= 0;
4221 UChar32 roundtrip
= 0;
4222 UChar32 lastBottom
= 0;
4223 UChar32 currentBottom
= 0;
4224 UChar32 lastTop
= 0;
4225 UChar32 currentTop
= 0;
4227 UCollator
*coll
= ucol_open("root", &status
);
4228 if(U_FAILURE(status
)) {
4229 log_err("Couldn't open UCA\n");
4233 uprv_uca_getRawFromImplicit(0xE20303E7);
4235 for (i
= 0; i
<= TST_UCOL_MAX_INPUT
; ++i
) {
4236 current
= uprv_uca_getImplicitFromRaw(i
) & fourBytes
;
4238 /* check that it round-trips AND that all intervening ones are illegal*/
4239 roundtrip
= uprv_uca_getRawFromImplicit(current
);
4240 if (roundtrip
!= i
) {
4241 log_err("No roundtrip %08X\n", i
);
4244 for (j
= last
+ 1; j
< current
; ++j
) {
4245 roundtrip
= uprv_uca_getRawFromImplicit(j
);
4246 /* raise an error if it *doesn't* find an error*/
4247 if (roundtrip
!= -1) {
4248 log_err("Fails to recognize illegal %08X\n", j
);
4252 /* now do other consistency checks*/
4253 lastBottom
= last
& bottomByte
;
4254 currentBottom
= current
& bottomByte
;
4255 lastTop
= last
& topByte
;
4256 currentTop
= current
& topByte
;
4258 /* print out some values for spot-checking*/
4259 if (lastTop
!= currentTop
|| i
== 0x10000 || i
== 0x110000) {
4269 if(uprv_uca_getCodePointFromRaw(uprv_uca_getRawFromCodePoint(i
)) != i
) {
4270 log_err("No raw <-> code point roundtrip for 0x%08X\n", i
);
4273 showImplicit(TST_UCOL_MAX_INPUT
-2);
4274 showImplicit(TST_UCOL_MAX_INPUT
-1);
4275 showImplicit(TST_UCOL_MAX_INPUT
);
4280 * Iterate through the given iterator, checking to see that all the strings
4281 * in the expected array are present.
4282 * @param expected array of strings we expect to see, or NULL
4283 * @param expectedCount number of elements of expected, or 0
4285 static int32_t checkUEnumeration(const char* msg
,
4287 const char** expected
,
4288 int32_t expectedCount
) {
4289 UErrorCode ec
= U_ZERO_ERROR
;
4290 int32_t i
= 0, n
, j
, bit
;
4291 int32_t seenMask
= 0;
4293 U_ASSERT(expectedCount
>= 0 && expectedCount
< 31); /* [sic] 31 not 32 */
4294 n
= uenum_count(iter
, &ec
);
4295 if (!assertSuccess("count", &ec
)) return -1;
4296 log_verbose("%s = [", msg
);
4298 const char* s
= uenum_next(iter
, NULL
, &ec
);
4299 if (!assertSuccess("snext", &ec
) || s
== NULL
) break;
4300 if (i
!= 0) log_verbose(",");
4301 log_verbose("%s", s
);
4302 /* check expected list */
4303 for (j
=0, bit
=1; j
<expectedCount
; ++j
, bit
<<=1) {
4304 if ((seenMask
&bit
) == 0 &&
4305 uprv_strcmp(s
, expected
[j
]) == 0) {
4311 log_verbose("] (%d)\n", i
);
4312 assertTrue("count verified", i
==n
);
4313 /* did we see all expected strings? */
4314 for (j
=0, bit
=1; j
<expectedCount
; ++j
, bit
<<=1) {
4315 if ((seenMask
&bit
)!=0) {
4316 log_verbose("Ok: \"%s\" seen\n", expected
[j
]);
4318 log_err("FAIL: \"%s\" not seen\n", expected
[j
]);
4325 * Test new API added for separate collation tree.
4327 static void TestSeparateTrees(void) {
4328 UErrorCode ec
= U_ZERO_ERROR
;
4329 UEnumeration
*e
= NULL
;
4334 static const char* AVAIL
[] = { "en", "de" };
4336 static const char* KW
[] = { "collation" };
4338 static const char* KWVAL
[] = { "phonebook", "stroke" };
4340 #if !UCONFIG_NO_SERVICE
4341 e
= ucol_openAvailableLocales(&ec
);
4342 assertSuccess("ucol_openAvailableLocales", &ec
);
4343 assertTrue("ucol_openAvailableLocales!=0", e
!=0);
4344 n
= checkUEnumeration("ucol_openAvailableLocales", e
, AVAIL
, LEN(AVAIL
));
4345 /* Don't need to check n because we check list */
4349 e
= ucol_getKeywords(&ec
);
4350 assertSuccess("ucol_getKeywords", &ec
);
4351 assertTrue("ucol_getKeywords!=0", e
!=0);
4352 n
= checkUEnumeration("ucol_getKeywords", e
, KW
, LEN(KW
));
4353 /* Don't need to check n because we check list */
4356 e
= ucol_getKeywordValues(KW
[0], &ec
);
4357 assertSuccess("ucol_getKeywordValues", &ec
);
4358 assertTrue("ucol_getKeywordValues!=0", e
!=0);
4359 n
= checkUEnumeration("ucol_getKeywordValues", e
, KWVAL
, LEN(KWVAL
));
4360 /* Don't need to check n because we check list */
4363 /* Try setting a warning before calling ucol_getKeywordValues */
4364 ec
= U_USING_FALLBACK_WARNING
;
4365 e
= ucol_getKeywordValues(KW
[0], &ec
);
4366 assertSuccess("ucol_getKeywordValues [with warning code set]", &ec
);
4367 assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e
!=0);
4368 n
= checkUEnumeration("ucol_getKeywordValues [with warning code set]", e
, KWVAL
, LEN(KWVAL
));
4369 /* Don't need to check n because we check list */
4373 U_DRAFT int32_t U_EXPORT2
4374 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
4375 const char* locale, UBool* isAvailable,
4376 UErrorCode* status);
4379 n
= ucol_getFunctionalEquivalent(loc
, sizeof(loc
), "collation", "fr",
4381 assertSuccess("getFunctionalEquivalent", &ec
);
4382 assertEquals("getFunctionalEquivalent(fr)", "fr", loc
);
4383 assertTrue("getFunctionalEquivalent(fr).isAvailable==TRUE",
4384 isAvailable
== TRUE
);
4386 n
= ucol_getFunctionalEquivalent(loc
, sizeof(loc
), "collation", "fr_FR",
4388 assertSuccess("getFunctionalEquivalent", &ec
);
4389 assertEquals("getFunctionalEquivalent(fr_FR)", "fr", loc
);
4390 assertTrue("getFunctionalEquivalent(fr_FR).isAvailable==TRUE",
4391 isAvailable
== TRUE
);
4394 /* supercedes TestJ784 */
4395 static void TestBeforePinyin(void) {
4396 const static char rules
[] = {
4397 "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<\\u00E0<<<\\u00C0"
4398 "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<\\u00E8<<<\\u00C8"
4399 "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<\\u00EC<<<\\u00CC"
4400 "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<\\u00F2<<<\\u00D2"
4401 "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<<\\u00F9<<<\\u00D9"
4402 "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<<\\u01DB<<\\u00FC"
4405 const static char *test
[] = {
4416 const static char *test2
[] = {
4449 genericRulesStarter(rules
, test
, sizeof(test
)/sizeof(test
[0]));
4450 genericLocaleStarter("zh", test
, sizeof(test
)/sizeof(test
[0]));
4451 genericRulesStarter(rules
, test2
, sizeof(test2
)/sizeof(test2
[0]));
4452 genericLocaleStarter("zh", test2
, sizeof(test2
)/sizeof(test2
[0]));
4455 static void TestBeforeTightening(void) {
4458 UErrorCode expectedStatus
;
4460 { "&[before 1]a<x", U_ZERO_ERROR
},
4461 { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR
},
4462 { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR
},
4463 { "&[before 1]a=x", U_INVALID_FORMAT_ERROR
},
4464 { "&[before 2]a<x",U_INVALID_FORMAT_ERROR
},
4465 { "&[before 2]a<<x",U_ZERO_ERROR
},
4466 { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR
},
4467 { "&[before 2]a=x",U_INVALID_FORMAT_ERROR
},
4468 { "&[before 3]a<x",U_INVALID_FORMAT_ERROR
},
4469 { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR
},
4470 { "&[before 3]a<<<x",U_ZERO_ERROR
},
4471 { "&[before 3]a=x",U_INVALID_FORMAT_ERROR
},
4472 { "&[before I]a = x",U_INVALID_FORMAT_ERROR
}
4477 UErrorCode status
= U_ZERO_ERROR
;
4478 UChar rlz
[RULE_BUFFER_LEN
] = { 0 };
4481 UCollator
*coll
= NULL
;
4484 for(i
= 0; i
< sizeof(tests
)/sizeof(tests
[0]); i
++) {
4485 rlen
= u_unescape(tests
[i
].rules
, rlz
, RULE_BUFFER_LEN
);
4486 coll
= ucol_openRules(rlz
, rlen
, UCOL_DEFAULT
, UCOL_DEFAULT
,NULL
, &status
);
4487 if(status
!= tests
[i
].expectedStatus
) {
4488 log_err("Opening a collator with rules %s returned error code %s, expected %s\n",
4489 tests
[i
].rules
, u_errorName(status
), u_errorName(tests
[i
].expectedStatus
));
4492 status
= U_ZERO_ERROR
;
4499 &[before
1] a
< x
<<< X
<< q
<<< Q
< z
4500 assert: m
<<< M
< x
<<< X
<< q
<<< Q
< z
< a
< n
4503 &[before
2] a
<< x
<<< X
<< q
<<< Q
< z
4504 assert: m
<<< M
< x
<<< X
<< q
<<< Q
<< a
< z
< n
4507 &[before
3] a
<<< x
<<< X
<< q
<<< Q
< z
4508 assert: m
<<< M
< x
<<< X
<<< a
<< q
<<< Q
< z
< n
4512 &[before
1] a
< x
<<< X
<< q
<<< Q
< z
4513 assert: x
<<< X
<< q
<<< Q
< z
< m
<<< M
<< a
< n
4516 &[before
2] a
<< x
<<< X
<< q
<<< Q
< z
4517 assert: m
<<< M
<< x
<<< X
<< q
<<< Q
<< a
< z
< n
4520 &[before
3] a
<<< x
<<< X
<< q
<<< Q
< z
4521 assert: m
<<< M
<< x
<<< X
<<< a
<< q
<<< Q
< z
< n
4525 &[before
1] a
< x
<<< X
<< q
<<< Q
< z
4526 assert: x
<<< X
<< q
<<< Q
< z
< n
< m
<<< a
<<< M
4529 &[before
2] a
<< x
<<< X
<< q
<<< Q
< z
4530 assert: x
<<< X
<< q
<<< Q
<< m
<<< a
<<< M
< z
< n
4533 &[before
3] a
<<< x
<<< X
<< q
<<< Q
< z
4534 assert: m
<<< x
<<< X
<<< a
<<< M
<< q
<<< Q
< z
< n
4537 &[before
1] s
< x
<<< X
<< q
<<< Q
< z
4538 assert: r
<<< R
< x
<<< X
<< q
<<< Q
< z
< s
< n
4540 &[before
2] s
<< x
<<< X
<< q
<<< Q
< z
4541 assert: r
<<< R
< x
<<< X
<< q
<<< Q
<< s
< z
< n
4543 &[before
3] s
<<< x
<<< X
<< q
<<< Q
< z
4544 assert: r
<<< R
< x
<<< X
<<< s
<< q
<<< Q
< z
< n
4547 &[before
1] \u24DC < x
<<< X
<< q
<<< Q
< z
4548 assert: x
<<< X
<< q
<<< Q
< z
< n
< m
<<< \u24DC <<< M
4550 &[before
2] \u24DC << x
<<< X
<< q
<<< Q
< z
4551 assert: x
<<< X
<< q
<<< Q
<< m
<<< \u24DC <<< M
< z
< n
4553 &[before
3] \u24DC <<< x
<<< X
<< q
<<< Q
< z
4554 assert: m
<<< x
<<< X
<<< \u24DC <<< M
<< q
<<< Q
< z
< n
4559 /* requires features not yet supported */
4560 static void TestMoreBefore(void) {
4563 const char* order
[20];
4566 { "&m < a &[before 1] a < x <<< X << q <<< Q < z",
4567 { "m","M","x","X","q","Q","z","a","n" }, 9},
4568 { "&m < a &[before 2] a << x <<< X << q <<< Q < z",
4569 { "m","M","x","X","q","Q","a","z","n" }, 9},
4570 { "&m < a &[before 3] a <<< x <<< X << q <<< Q < z",
4571 { "m","M","x","X","a","q","Q","z","n" }, 9},
4572 { "&m << a &[before 1] a < x <<< X << q <<< Q < z",
4573 { "x","X","q","Q","z","m","M","a","n" }, 9},
4574 { "&m << a &[before 2] a << x <<< X << q <<< Q < z",
4575 { "m","M","x","X","q","Q","a","z","n" }, 9},
4576 { "&m << a &[before 3] a <<< x <<< X << q <<< Q < z",
4577 { "m","M","x","X","a","q","Q","z","n" }, 9},
4578 { "&m <<< a &[before 1] a < x <<< X << q <<< Q < z",
4579 { "x","X","q","Q","z","n","m","a","M" }, 9},
4580 { "&m <<< a &[before 2] a << x <<< X << q <<< Q < z",
4581 { "x","X","q","Q","m","a","M","z","n" }, 9},
4582 { "&m <<< a &[before 3] a <<< x <<< X << q <<< Q < z",
4583 { "m","x","X","a","M","q","Q","z","n" }, 9},
4584 { "&[before 1] s < x <<< X << q <<< Q < z",
4585 { "r","R","x","X","q","Q","z","s","n" }, 9},
4586 { "&[before 2] s << x <<< X << q <<< Q < z",
4587 { "r","R","x","X","q","Q","s","z","n" }, 9},
4588 { "&[before 3] s <<< x <<< X << q <<< Q < z",
4589 { "r","R","x","X","s","q","Q","z","n" }, 9},
4590 { "&[before 1] \\u24DC < x <<< X << q <<< Q < z",
4591 { "x","X","q","Q","z","n","m","\\u24DC","M" }, 9},
4592 { "&[before 2] \\u24DC << x <<< X << q <<< Q < z",
4593 { "x","X","q","Q","m","\\u24DC","M","z","n" }, 9},
4594 { "&[before 3] \\u24DC <<< x <<< X << q <<< Q < z",
4595 { "m","x","X","\\u24DC","M","q","Q","z","n" }, 9}
4600 for(i
= 0; i
< sizeof(tests
)/sizeof(tests
[0]); i
++) {
4601 genericRulesStarter(tests
[i
].rules
, tests
[i
].order
, tests
[i
].size
);
4606 static void TestTailorNULL( void ) {
4607 const static char* rule
= "&a <<< '\\u0000'";
4608 UErrorCode status
= U_ZERO_ERROR
;
4609 UChar rlz
[RULE_BUFFER_LEN
] = { 0 };
4611 UChar a
= 1, null
= 0;
4612 UCollationResult res
= UCOL_EQUAL
;
4614 UCollator
*coll
= NULL
;
4617 rlen
= u_unescape(rule
, rlz
, RULE_BUFFER_LEN
);
4618 coll
= ucol_openRules(rlz
, rlen
, UCOL_DEFAULT
, UCOL_DEFAULT
,NULL
, &status
);
4620 if(U_FAILURE(status
)) {
4621 log_err("Could not open default collator!\n");
4623 res
= ucol_strcoll(coll
, &a
, 1, &null
, 1);
4625 if(res
!= UCOL_LESS
) {
4626 log_err("NULL was not tailored properly!\n");
4634 TestThaiSortKey(void)
4636 UChar yamakan
= 0x0E4E;
4637 UErrorCode status
= U_ZERO_ERROR
;
4640 /* NOTE: there is a Thai tailoring that moves Yammakan. It should not move it, */
4641 /* since it stays in the same relative position. This should be addressed in CLDR */
4642 /* UCA 4.0 uint8_t expectedKey[256] = { 0x01, 0xd9, 0xb2, 0x01, 0x05, 0x00 }; */
4643 /* UCA 4.1 uint8_t expectedKey[256] = { 0x01, 0xdb, 0x3a, 0x01, 0x05, 0x00 }; */
4644 /* UCA 5.0 moves Yammakan */
4645 uint8_t expectedKey
[256] = { 0x01, 0xdc, 0xce, 0x01, 0x05, 0x00 };
4646 UCollator
*coll
= ucol_open("th", &status
);
4647 if(U_FAILURE(status
)) {
4648 log_err("Could not open a collator, exiting (%s)\n", u_errorName(status
));
4652 keyLen
= ucol_getSortKey(coll
, &yamakan
, 1, key
, 256);
4653 if(strcmp((char *)key
, (char *)expectedKey
)) {
4654 log_err("Yammakan key is different from ICU 34!\n");
4661 TestUpperFirstQuaternary(void)
4663 const char* tests
[] = { "B", "b", "Bb", "bB" };
4664 UColAttribute att
[] = { UCOL_STRENGTH
, UCOL_CASE_FIRST
};
4665 UColAttributeValue attVals
[] = { UCOL_QUATERNARY
, UCOL_UPPER_FIRST
};
4666 genericLocaleStarterWithOptions("root", tests
, sizeof(tests
)/sizeof(tests
[0]), att
, attVals
, sizeof(att
)/sizeof(att
[0]));
4672 const char* tests
[] = { "\\u00e2T", "aT" };
4673 UColAttribute att
[] = { UCOL_STRENGTH
, UCOL_CASE_LEVEL
};
4674 UColAttributeValue attVals
[] = { UCOL_PRIMARY
, UCOL_ON
};
4675 const char* tests2
[] = { "a", "A" };
4676 const char* rule
= "&[first tertiary ignorable]=A=a";
4677 UColAttribute att2
[] = { UCOL_CASE_LEVEL
};
4678 UColAttributeValue attVals2
[] = { UCOL_ON
};
4679 /* Test whether we correctly ignore primary ignorables on case level when */
4680 /* we have only primary & case level */
4681 genericLocaleStarterWithOptionsAndResult("root", tests
, sizeof(tests
)/sizeof(tests
[0]), att
, attVals
, sizeof(att
)/sizeof(att
[0]), UCOL_EQUAL
);
4682 /* Test whether ICU4J will make case level for sortkeys that have primary strength */
4683 /* and case level */
4684 genericLocaleStarterWithOptions("root", tests2
, sizeof(tests2
)/sizeof(tests2
[0]), att
, attVals
, sizeof(att
)/sizeof(att
[0]));
4685 /* Test whether completely ignorable letters have case level info (they shouldn't) */
4686 genericRulesStarterWithOptionsAndResult(rule
, tests2
, sizeof(tests2
)/sizeof(tests2
[0]), att2
, attVals2
, sizeof(att2
)/sizeof(att2
[0]), UCOL_EQUAL
);
4692 static const char *test
= "this is a test string";
4694 int32_t ustr_length
= u_unescape(test
, ustr
, 256);
4695 unsigned char sortkey
[256];
4696 int32_t sortkey_length
;
4697 UErrorCode status
= U_ZERO_ERROR
;
4698 static UCollator
*coll
= NULL
;
4699 coll
= ucol_open("root", &status
);
4700 if(U_FAILURE(status
)) {
4701 log_err("Couldn't open UCA\n");
4704 ucol_setStrength(coll
, UCOL_PRIMARY
);
4705 ucol_setAttribute(coll
, UCOL_STRENGTH
, UCOL_PRIMARY
, &status
);
4706 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
4707 if (U_FAILURE(status
)) {
4708 log_err("Failed setting atributes\n");
4711 sortkey_length
= ucol_getSortKey(coll
, ustr
, ustr_length
, NULL
, 0);
4712 if (sortkey_length
> 256) return;
4714 /* we mark the position where the null byte should be written in advance */
4715 sortkey
[sortkey_length
-1] = 0xAA;
4717 /* we set the buffer size one byte higher than needed */
4718 sortkey_length
= ucol_getSortKey(coll
, ustr
, ustr_length
, sortkey
,
4721 /* no error occurs (for me) */
4722 if (sortkey
[sortkey_length
-1] == 0xAA) {
4723 log_err("Hit bug at first try\n");
4726 /* we mark the position where the null byte should be written again */
4727 sortkey
[sortkey_length
-1] = 0xAA;
4729 /* this time we set the buffer size to the exact amount needed */
4730 sortkey_length
= ucol_getSortKey(coll
, ustr
, ustr_length
, sortkey
,
4733 /* now the trailing null byte is not written */
4734 if (sortkey
[sortkey_length
-1] == 0xAA) {
4735 log_err("Hit bug at second try\n");
4741 /* Regression test for Thai partial sort key problem */
4745 const static char *test
[] = {
4746 "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e47\\u0e21",
4747 "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e48\\u0e21"
4750 genericLocaleStarter("th", test
, sizeof(test
)/sizeof(test
[0]));
4755 #define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x)
4757 void addMiscCollTest(TestNode
** root
)
4759 TEST(TestRuleOptions
);
4760 TEST(TestBeforePrefixFailure
);
4761 TEST(TestContractionClosure
);
4762 TEST(TestPrefixCompose
);
4763 TEST(TestStrCollIdenticalPrefix
);
4765 TEST(TestNewJapanese
);
4766 /*TEST(TestLimitations);*/
4768 TEST(TestExtremeCompression
);
4769 TEST(TestSurrogates
);
4770 TEST(TestVariableTopSetting
);
4771 TEST(TestBocsuCoverage
);
4772 TEST(TestCyrillicTailoring
);
4774 TEST(IncompleteCntTest
);
4775 TEST(BlackBirdTest
);
4777 TEST(BillFairmanTest
);
4778 TEST(RamsRulesTest
);
4779 TEST(IsTailoredTest
);
4780 TEST(TestCollations
);
4782 TEST(TestImplicitTailoring
);
4783 TEST(TestFCDProblem
);
4784 TEST(TestEmptyRule
);
4785 /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */
4787 /*TEST(TestJ831);*/ /* we changed lv locale */
4789 TEST(TestRedundantRules
);
4790 TEST(TestExpansionSyntax
);
4791 TEST(TestHangulTailoring
);
4793 TEST(TestIncrementalNormalize
);
4794 TEST(TestComposeDecompose
);
4795 TEST(TestCompressOverlap
);
4796 TEST(TestContraction
);
4797 TEST(TestExpansion
);
4798 /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys */
4799 /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported */
4801 TEST(TestSuppressContractions
);
4803 TEST(TestHebrewUCA
);
4804 TEST(TestPartialSortKeyTermination
);
4809 TEST(TestNumericCollation
);
4810 TEST(TestTibetanConformance
);
4811 TEST(TestPinyinProblem
);
4812 TEST(TestImplicitGeneration
);
4813 TEST(TestSeparateTrees
);
4814 TEST(TestBeforePinyin
);
4815 TEST(TestBeforeTightening
);
4816 /*TEST(TestMoreBefore);*/
4817 TEST(TestTailorNULL
);
4818 TEST(TestThaiSortKey
);
4819 TEST(TestUpperFirstQuaternary
);
4825 #endif /* #if !UCONFIG_NO_COLLATION */