2 /********************************************************************
4 * Copyright (c) 2001-2013, International Business Machines Corporation and
5 * others. All Rights Reserved.
6 ********************************************************************/
7 /*******************************************************************************
11 *******************************************************************************/
13 * These are the tests specific to ICU 1.8 and above, that I didn't know where
19 #include "unicode/utypes.h"
21 #if !UCONFIG_NO_COLLATION
23 #include "unicode/ucol.h"
24 #include "unicode/ucoleitr.h"
25 #include "unicode/uloc.h"
29 #include "unicode/ustring.h"
36 #include "unicode/parseerr.h"
37 #include "unicode/ucnv.h"
38 #include "unicode/ures.h"
39 #include "unicode/uscript.h"
40 #include "unicode/utf16.h"
45 #define LEN(a) (sizeof(a)/sizeof(a[0]))
47 #define MAX_TOKEN_LEN 16
49 typedef UCollationResult
tst_strcoll(void *collator
, const int object
,
50 const UChar
*source
, const int sLen
,
51 const UChar
*target
, const int tLen
);
55 const static char cnt1
[][10] = {
70 const static char cnt2
[][10] = {
82 static void IncompleteCntTest(void)
84 UErrorCode status
= U_ZERO_ERROR
;
89 UCollator
*coll
= NULL
;
90 uint32_t i
= 0, j
= 0;
93 u_uastrcpy(temp
, " & Z < ABC < Q < B");
95 coll
= ucol_openRules(temp
, u_strlen(temp
), UCOL_OFF
, UCOL_DEFAULT_STRENGTH
, NULL
,&status
);
97 if(U_SUCCESS(status
)) {
98 size
= sizeof(cnt1
)/sizeof(cnt1
[0]);
99 for(i
= 0; i
< size
-1; i
++) {
100 for(j
= i
+1; j
< size
; j
++) {
101 UCollationElements
*iter
;
102 u_uastrcpy(t1
, cnt1
[i
]);
103 u_uastrcpy(t2
, cnt1
[j
]);
104 doTest(coll
, t1
, t2
, UCOL_LESS
);
105 /* synwee : added collation element iterator test */
106 iter
= ucol_openElements(coll
, t2
, u_strlen(t2
), &status
);
107 if (U_FAILURE(status
)) {
108 log_err("Creation of iterator failed\n");
112 ucol_closeElements(iter
);
120 u_uastrcpy(temp
, " & Z < DAVIS < MARK <DAV");
121 coll
= ucol_openRules(temp
, u_strlen(temp
), UCOL_OFF
, UCOL_DEFAULT_STRENGTH
,NULL
, &status
);
123 if(U_SUCCESS(status
)) {
124 size
= sizeof(cnt2
)/sizeof(cnt2
[0]);
125 for(i
= 0; i
< size
-1; i
++) {
126 for(j
= i
+1; j
< size
; j
++) {
127 UCollationElements
*iter
;
128 u_uastrcpy(t1
, cnt2
[i
]);
129 u_uastrcpy(t2
, cnt2
[j
]);
130 doTest(coll
, t1
, t2
, UCOL_LESS
);
132 /* synwee : added collation element iterator test */
133 iter
= ucol_openElements(coll
, t2
, u_strlen(t2
), &status
);
134 if (U_FAILURE(status
)) {
135 log_err("Creation of iterator failed\n");
139 ucol_closeElements(iter
);
149 const static char shifted
[][20] = {
161 const static UCollationResult shiftedTert
[] = {
173 const static char nonignorable
[][20] = {
185 static void BlackBirdTest(void) {
186 UErrorCode status
= U_ZERO_ERROR
;
190 uint32_t i
= 0, j
= 0;
192 UCollator
*coll
= ucol_open("en_US", &status
);
194 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_OFF
, &status
);
195 ucol_setAttribute(coll
, UCOL_ALTERNATE_HANDLING
, UCOL_NON_IGNORABLE
, &status
);
197 if(U_SUCCESS(status
)) {
198 size
= sizeof(nonignorable
)/sizeof(nonignorable
[0]);
199 for(i
= 0; i
< size
-1; i
++) {
200 for(j
= i
+1; j
< size
; j
++) {
201 u_uastrcpy(t1
, nonignorable
[i
]);
202 u_uastrcpy(t2
, nonignorable
[j
]);
203 doTest(coll
, t1
, t2
, UCOL_LESS
);
208 ucol_setAttribute(coll
, UCOL_ALTERNATE_HANDLING
, UCOL_SHIFTED
, &status
);
209 ucol_setAttribute(coll
, UCOL_STRENGTH
, UCOL_QUATERNARY
, &status
);
211 if(U_SUCCESS(status
)) {
212 size
= sizeof(shifted
)/sizeof(shifted
[0]);
213 for(i
= 0; i
< size
-1; i
++) {
214 for(j
= i
+1; j
< size
; j
++) {
215 u_uastrcpy(t1
, shifted
[i
]);
216 u_uastrcpy(t2
, shifted
[j
]);
217 doTest(coll
, t1
, t2
, UCOL_LESS
);
222 ucol_setAttribute(coll
, UCOL_STRENGTH
, UCOL_TERTIARY
, &status
);
223 if(U_SUCCESS(status
)) {
224 size
= sizeof(shifted
)/sizeof(shifted
[0]);
225 for(i
= 1; i
< size
; i
++) {
226 u_uastrcpy(t1
, shifted
[i
-1]);
227 u_uastrcpy(t2
, shifted
[i
]);
228 doTest(coll
, t1
, t2
, shiftedTert
[i
]);
235 const static UChar testSourceCases
[][MAX_TOKEN_LEN
] = {
236 {0x0041/*'A'*/, 0x0300, 0x0301, 0x0000},
237 {0x0041/*'A'*/, 0x0300, 0x0316, 0x0000},
238 {0x0041/*'A'*/, 0x0300, 0x0000},
239 {0x00C0, 0x0301, 0x0000},
240 /* this would work with forced normalization */
241 {0x00C0, 0x0316, 0x0000}
244 const static UChar testTargetCases
[][MAX_TOKEN_LEN
] = {
245 {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
246 {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000},
248 {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
249 /* this would work with forced normalization */
250 {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000}
253 const static UCollationResult results
[] = {
261 static void FunkyATest(void)
265 UErrorCode status
= U_ZERO_ERROR
;
266 UCollator
*myCollation
;
267 myCollation
= ucol_open("en_US", &status
);
268 if(U_FAILURE(status
)){
269 log_err_status(status
, "ERROR: in creation of rule based collator: %s\n", myErrorName(status
));
272 log_verbose("Testing some A letters, for some reason\n");
273 ucol_setAttribute(myCollation
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
274 ucol_setStrength(myCollation
, UCOL_TERTIARY
);
275 for (i
= 0; i
< 4 ; i
++)
277 doTest(myCollation
, testSourceCases
[i
], testTargetCases
[i
], results
[i
]);
279 ucol_close(myCollation
);
282 UColAttributeValue caseFirst
[] = {
289 UColAttributeValue alternateHandling
[] = {
294 UColAttributeValue caseLevel
[] = {
299 UColAttributeValue strengths
[] = {
308 static const char * strengthsC
[] = {
316 static const char * caseFirstC
[] = {
323 static const char * alternateHandlingC
[] = {
324 "UCOL_NON_IGNORABLE",
328 static const char * caseLevelC
[] = {
333 /* not used currently - does not test only prints */
334 static void PrintMarkDavis(void)
336 UErrorCode status
= U_ZERO_ERROR
;
338 uint8_t sortkey
[256];
339 UCollator
*coll
= ucol_open("en_US", &status
);
340 uint32_t h
,i
,j
,k
, sortkeysize
;
345 log_verbose("PrintMarkDavis");
347 u_uastrcpy(m
, "Mark Davis");
353 for(i
= 0; i
<sizem
; i
++) {
354 fprintf(stderr
, "\\u%04X ", m
[i
]);
356 fprintf(stderr
, "\n");
358 for(h
= 0; h
<sizeof(caseFirst
)/sizeof(caseFirst
[0]); h
++) {
359 ucol_setAttribute(coll
, UCOL_CASE_FIRST
, caseFirst
[i
], &status
);
360 fprintf(stderr
, "caseFirst: %s\n", caseFirstC
[h
]);
362 for(i
= 0; i
<sizeof(alternateHandling
)/sizeof(alternateHandling
[0]); i
++) {
363 ucol_setAttribute(coll
, UCOL_ALTERNATE_HANDLING
, alternateHandling
[i
], &status
);
364 fprintf(stderr
, " AltHandling: %s\n", alternateHandlingC
[i
]);
366 for(j
= 0; j
<sizeof(caseLevel
)/sizeof(caseLevel
[0]); j
++) {
367 ucol_setAttribute(coll
, UCOL_CASE_LEVEL
, caseLevel
[j
], &status
);
368 fprintf(stderr
, " caseLevel: %s\n", caseLevelC
[j
]);
370 for(k
= 0; k
<sizeof(strengths
)/sizeof(strengths
[0]); k
++) {
371 ucol_setAttribute(coll
, UCOL_STRENGTH
, strengths
[k
], &status
);
372 sortkeysize
= ucol_getSortKey(coll
, m
, sizem
, sortkey
, 256);
373 fprintf(stderr
, " strength: %s\n Sortkey: ", strengthsC
[k
]);
374 fprintf(stderr
, "%s\n", ucol_sortKeyToString(coll
, sortkey
, buffer
, &len
));
385 static void BillFairmanTest(void) {
387 ** check for actual locale via ICU resource bundles
389 ** lp points to the original locale ("fr_FR_....")
392 UResourceBundle
*lr
,*cr
;
393 UErrorCode lec
= U_ZERO_ERROR
;
394 const char *lp
= "fr_FR_you_ll_never_find_this_locale";
396 log_verbose("BillFairmanTest\n");
398 lr
= ures_open(NULL
,lp
,&lec
);
400 cr
= ures_getByKey(lr
,"collations",0,&lec
);
402 lp
= ures_getLocaleByType(cr
, ULOC_ACTUAL_LOCALE
, &lec
);
404 if (U_SUCCESS(lec
)) {
405 if(strcmp(lp
, "fr") != 0) {
406 log_err("Wrong locale for French Collation Data, expected \"fr\" got %s", lp
);
416 static void testPrimary(UCollator
* col
, const UChar
* p
,const UChar
* q
){
417 UChar source
[256] = { '\0'};
418 UChar target
[256] = { '\0'};
422 UChar preP = (*p>0x0400 && *p<0x0500)?0x00e1:0x491;
423 UChar preQ = (*p>0x0400 && *p<0x0500)?0x0041:0x413;
425 /*log_verbose("Testing primary\n");*/
427 doTest(col
, p
, q
, UCOL_LESS
);
429 UCollationResult result = ucol_strcoll(col,p,u_strlen(p),q,u_strlen(q));
431 if(result!=UCOL_LESS){
432 aescstrdup(p,utfSource,256);
433 aescstrdup(q,utfTarget,256);
434 fprintf(file,"Primary failed source: %s target: %s \n", utfSource,utfTarget);
438 u_strcpy(source
+1,p
);
440 u_strcpy(target
+1,q
);
441 doTest(col
, source
, target
, UCOL_LESS
);
443 fprintf(file,"Primary swamps 2nd failed source: %s target: %s \n", utfSource,utfTarget);
447 static void testSecondary(UCollator
* col
, const UChar
* p
,const UChar
* q
){
448 UChar source
[256] = { '\0'};
449 UChar target
[256] = { '\0'};
451 /*log_verbose("Testing secondary\n");*/
453 doTest(col
, p
, q
, UCOL_LESS
);
455 fprintf(file,"secondary failed source: %s target: %s \n", utfSource,utfTarget);
458 u_strcpy(source
+1,p
);
460 u_strcpy(target
+1,q
);
462 doTest(col
, source
, target
, UCOL_LESS
);
464 fprintf(file,"secondary swamps 3rd failed source: %s target: %s \n",utfSource,utfTarget);
469 source
[u_strlen(p
)] = 0x62;
470 source
[u_strlen(p
)+1] = 0;
474 target
[u_strlen(q
)] = 0x61;
475 target
[u_strlen(q
)+1] = 0;
477 doTest(col
, source
, target
, UCOL_GREATER
);
480 fprintf(file,"secondary is swamped by 1 failed source: %s target: %s \n",utfSource,utfTarget);
484 static void testTertiary(UCollator
* col
, const UChar
* p
,const UChar
* q
){
485 UChar source
[256] = { '\0'};
486 UChar target
[256] = { '\0'};
488 /*log_verbose("Testing tertiary\n");*/
490 doTest(col
, p
, q
, UCOL_LESS
);
492 fprintf(file,"Tertiary failed source: %s target: %s \n",utfSource,utfTarget);
495 u_strcpy(source
+1,p
);
497 u_strcpy(target
+1,q
);
499 doTest(col
, source
, target
, UCOL_LESS
);
501 fprintf(file,"Tertiary swamps 4th failed source: %s target: %s \n", utfSource,utfTarget);
505 source
[u_strlen(p
)] = 0xE0;
506 source
[u_strlen(p
)+1] = 0;
509 target
[u_strlen(q
)] = 0x61;
510 target
[u_strlen(q
)+1] = 0;
512 doTest(col
, source
, target
, UCOL_GREATER
);
515 fprintf(file,"Tertiary is swamped by 3rd failed source: %s target: %s \n",utfSource,utfTarget);
519 static void testEquality(UCollator
* col
, const UChar
* p
,const UChar
* q
){
521 UChar source[256] = { '\0'};
522 UChar target[256] = { '\0'};
525 doTest(col
, p
, q
, UCOL_EQUAL
);
527 fprintf(file,"Primary failed source: %s target: %s \n", utfSource,utfTarget);
531 static void testCollator(UCollator
*coll
, UErrorCode
*status
) {
532 const UChar
*rules
= NULL
, *current
= NULL
;
534 uint32_t strength
= 0;
535 uint32_t chOffset
= 0; uint32_t chLen
= 0;
536 uint32_t exOffset
= 0; uint32_t exLen
= 0;
537 uint32_t prefixOffset
= 0; uint32_t prefixLen
= 0;
538 uint32_t firstEx
= 0;
539 /* uint32_t rExpsLen = 0; */
540 uint32_t firstLen
= 0;
541 UBool varT
= FALSE
; UBool top_
= TRUE
;
543 UBool startOfRules
= TRUE
;
544 UBool lastReset
= FALSE
;
545 UBool before
= FALSE
;
546 uint32_t beforeStrength
= 0;
554 UChar
*rulesCopy
= NULL
;
555 UParseError parseError
;
557 uprv_memset(&src
, 0, sizeof(UColTokenParser
));
561 rules
= ucol_getRules(coll
, &ruleLen
);
562 if(U_SUCCESS(*status
) && ruleLen
> 0) {
563 rulesCopy
= (UChar
*)uprv_malloc((ruleLen
+UCOL_TOK_EXTRA_RULE_SPACE_SIZE
)*sizeof(UChar
));
564 uprv_memcpy(rulesCopy
, rules
, ruleLen
*sizeof(UChar
));
565 src
.current
= src
.source
= rulesCopy
;
566 src
.end
= rulesCopy
+ruleLen
;
567 src
.extraCurrent
= src
.end
;
568 src
.extraEnd
= src
.end
+UCOL_TOK_EXTRA_RULE_SPACE_SIZE
;
569 *first
= *second
= 0;
571 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
572 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
573 while ((current
= ucol_tok_parseNextToken(&src
, startOfRules
,&parseError
, status
)) != NULL
) {
574 strength
= src
.parsedToken
.strength
;
575 chOffset
= src
.parsedToken
.charsOffset
;
576 chLen
= src
.parsedToken
.charsLen
;
577 exOffset
= src
.parsedToken
.extensionOffset
;
578 exLen
= src
.parsedToken
.extensionLen
;
579 prefixOffset
= src
.parsedToken
.prefixOffset
;
580 prefixLen
= src
.parsedToken
.prefixLen
;
581 specs
= src
.parsedToken
.flags
;
583 startOfRules
= FALSE
;
584 varT
= (UBool
)((specs
& UCOL_TOK_VARIABLE_TOP
) != 0);
585 top_
= (UBool
)((specs
& UCOL_TOK_TOP
) != 0);
586 if(top_
) { /* if reset is on top, the sequence is broken. We should have an empty string */
589 u_strncpy(second
,src
.source
+chOffset
, chLen
);
592 if(exLen
> 0 && firstEx
== 0) {
593 u_strncat(first
, src
.source
+exOffset
, exLen
);
594 first
[firstLen
+exLen
] = 0;
597 if(lastReset
== TRUE
&& prefixLen
!= 0) {
598 u_strncpy(first
+prefixLen
, first
, firstLen
);
599 u_strncpy(first
, src
.source
+prefixOffset
, prefixLen
);
600 first
[firstLen
+prefixLen
] = 0;
601 firstLen
= firstLen
+prefixLen
;
604 if(before
== TRUE
) { /* swap first and second */
605 u_strcpy(tempB
, first
);
606 u_strcpy(first
, second
);
607 u_strcpy(second
, tempB
);
616 if(beforeStrength
< strength
) {
617 strength
= beforeStrength
;
625 testEquality(coll
,first
,second
);
628 testPrimary(coll
,first
,second
);
631 testSecondary(coll
,first
,second
);
634 testTertiary(coll
,first
,second
);
638 before
= (UBool
)((specs
& UCOL_TOK_BEFORE
) != 0);
640 beforeStrength
= (specs
& UCOL_TOK_BEFORE
)-1;
647 if(before
== TRUE
&& strength
!= UCOL_TOK_RESET
) { /* first and second were swapped */
652 u_strcpy(first
, second
);
655 uprv_free(src
.source
);
656 uprv_free(src
.reorderCodes
);
660 static UCollationResult
ucaTest(void *collator
, const int object
, const UChar
*source
, const int sLen
, const UChar
*target
, const int tLen
) {
661 UCollator
*UCA
= (UCollator
*)collator
;
662 return ucol_strcoll(UCA
, source
, sLen
, target
, tLen
);
666 static UCollationResult winTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) {
667 #if U_PLATFORM_HAS_WIN32_API
668 LCID lcid = (LCID)collator;
669 return (UCollationResult)CompareString(lcid, 0, source, sLen, target, tLen);
676 static UCollationResult
swampEarlier(tst_strcoll
* func
, void *collator
, int opts
,
678 const UChar
*s
, const uint32_t sLen
,
679 const UChar
*t
, const uint32_t tLen
) {
680 UChar source
[256] = {0};
681 UChar target
[256] = {0};
684 u_strcpy(source
+1, s
);
686 u_strcpy(target
+1, t
);
688 return func(collator
, opts
, source
, sLen
+1, target
, tLen
+1);
691 static UCollationResult
swampLater(tst_strcoll
* func
, void *collator
, int opts
,
693 const UChar
*s
, const uint32_t sLen
,
694 const UChar
*t
, const uint32_t tLen
) {
695 UChar source
[256] = {0};
696 UChar target
[256] = {0};
703 return func(collator
, opts
, source
, sLen
+1, target
, tLen
+1);
706 static uint32_t probeStrength(tst_strcoll
* func
, void *collator
, int opts
,
707 const UChar
*s
, const uint32_t sLen
,
708 const UChar
*t
, const uint32_t tLen
,
709 UCollationResult result
) {
710 /*UChar fPrimary = 0x6d;*/
711 /*UChar sPrimary = 0x6e;*/
712 UChar fSecondary
= 0x310d;
713 UChar sSecondary
= 0x31a3;
714 UChar fTertiary
= 0x310f;
715 UChar sTertiary
= 0x31b7;
717 UCollationResult oposite
;
718 if(result
== UCOL_EQUAL
) {
719 return UCOL_IDENTICAL
;
720 } else if(result
== UCOL_GREATER
) {
723 oposite
= UCOL_GREATER
;
726 if(swampEarlier(func
, collator
, opts
, sSecondary
, fSecondary
, s
, sLen
, t
, tLen
) == result
) {
728 } else if((swampEarlier(func
, collator
, opts
, sTertiary
, 0x310f, s
, sLen
, t
, tLen
) == result
) &&
729 (swampEarlier(func
, collator
, opts
, 0x310f, sTertiary
, s
, sLen
, t
, tLen
) == result
)) {
730 return UCOL_SECONDARY
;
731 } else if((swampLater(func
, collator
, opts
, sTertiary
, fTertiary
, s
, sLen
, t
, tLen
) == result
) &&
732 (swampLater(func
, collator
, opts
, fTertiary
, sTertiary
, s
, sLen
, t
, tLen
) == result
)) {
733 return UCOL_TERTIARY
;
734 } else if((swampLater(func
, collator
, opts
, sTertiary
, 0x310f, s
, sLen
, t
, tLen
) == oposite
) &&
735 (swampLater(func
, collator
, opts
, fTertiary
, sTertiary
, s
, sLen
, t
, tLen
) == oposite
)) {
736 return UCOL_QUATERNARY
;
738 return UCOL_IDENTICAL
;
742 static char *getRelationSymbol(UCollationResult res
, uint32_t strength
, char *buffer
) {
745 if(res
== UCOL_EQUAL
|| strength
== 0xdeadbeef) {
749 } else if(res
== UCOL_GREATER
) {
750 for(i
= 0; i
<strength
+1; i
++) {
753 buffer
[strength
+1] = '\0';
755 for(i
= 0; i
<strength
+1; i
++) {
758 buffer
[strength
+1] = '\0';
766 static void logFailure (const char *platform
, const char *test
,
767 const UChar
*source
, const uint32_t sLen
,
768 const UChar
*target
, const uint32_t tLen
,
769 UCollationResult realRes
, uint32_t realStrength
,
770 UCollationResult expRes
, uint32_t expStrength
, UBool error
) {
774 char sEsc
[256], s
[256], tEsc
[256], t
[256], b
[256], output
[512], relation
[256];
775 static int32_t maxOutputLength
= 0;
776 int32_t outputLength
;
778 *sEsc
= *tEsc
= *s
= *t
= 0;
780 log_err("Difference between expected and generated order. Run test with -v for more info\n");
781 } else if(getTestOption(VERBOSITY_OPTION
) == 0) {
784 for(i
= 0; i
<sLen
; i
++) {
785 sprintf(b
, "%04X", source
[i
]);
790 if(source
[i
] < 0x80) {
791 sprintf(b
, "(%c)", source
[i
]);
795 for(i
= 0; i
<tLen
; i
++) {
796 sprintf(b
, "%04X", target
[i
]);
801 if(target
[i
] < 0x80) {
802 sprintf(b
, "(%c)", target
[i
]);
807 strcpy(output, "[[ ");
808 strcat(output, sEsc);
809 strcat(output, getRelationSymbol(expRes, expStrength, relation));
810 strcat(output, tEsc);
812 strcat(output, " : ");
814 strcat(output, sEsc);
815 strcat(output, getRelationSymbol(realRes, realStrength, relation));
816 strcat(output, tEsc);
817 strcat(output, " ]] ");
819 log_verbose("%s", output);
823 strcpy(output
, "DIFF: ");
826 strcat(output
, " : ");
829 strcat(output
, test
);
830 strcat(output
, ": ");
832 strcat(output
, sEsc
);
833 strcat(output
, getRelationSymbol(expRes
, expStrength
, relation
));
834 strcat(output
, tEsc
);
838 strcat(output
, platform
);
839 strcat(output
, ": ");
841 strcat(output
, sEsc
);
842 strcat(output
, getRelationSymbol(realRes
, realStrength
, relation
));
843 strcat(output
, tEsc
);
845 outputLength
= (int32_t)strlen(output
);
846 if(outputLength
> maxOutputLength
) {
847 maxOutputLength
= outputLength
;
848 U_ASSERT(outputLength
< sizeof(output
));
851 log_verbose("%s\n", output
);
856 static void printOutRules(const UChar *rules) {
857 uint32_t len = u_strlen(rules);
862 fprintf(stdout, "Rules:");
864 for(i = 0; i<len; i++) {
865 if(rules[i]<0x7f && rules[i]>=0x20) {
866 toPrint = (char)rules[i];
869 fprintf(stdout, "\n&");
870 } else if(toPrint == ';') {
871 fprintf(stdout, "<<");
873 } else if(toPrint == ',') {
874 fprintf(stdout, "<<<");
877 fprintf(stdout, "%c", toPrint);
880 } else if(rules[i]<0x3400 || rules[i]>=0xa000) {
881 fprintf(stdout, "\\u%04X", rules[i]);
885 fprintf(stdout, "\n");
895 static uint32_t testSwitch(tst_strcoll
* func
, void *collator
, int opts
, uint32_t strength
, const UChar
*first
, const UChar
*second
, const char* msg
, UBool error
) {
897 UCollationResult realResult
;
898 uint32_t realStrength
;
900 uint32_t sLen
= u_strlen(first
);
901 uint32_t tLen
= u_strlen(second
);
903 realResult
= func(collator
, opts
, first
, sLen
, second
, tLen
);
904 realStrength
= probeStrength(func
, collator
, opts
, first
, sLen
, second
, tLen
, realResult
);
906 if(strength
== UCOL_IDENTICAL
&& realResult
!= UCOL_EQUAL
) {
907 logFailure(msg
, "tailoring", first
, sLen
, second
, tLen
, realResult
, realStrength
, UCOL_EQUAL
, strength
, error
);
909 } else if(realResult
!= UCOL_LESS
|| realStrength
!= strength
) {
910 logFailure(msg
, "tailoring", first
, sLen
, second
, tLen
, realResult
, realStrength
, UCOL_LESS
, strength
, error
);
917 static void testAgainstUCA(UCollator
*coll
, UCollator
*UCA
, const char *refName
, UBool error
, UErrorCode
*status
) {
918 const UChar
*rules
= NULL
, *current
= NULL
;
920 uint32_t strength
= 0;
921 uint32_t chOffset
= 0; uint32_t chLen
= 0;
922 uint32_t exOffset
= 0; uint32_t exLen
= 0;
923 uint32_t prefixOffset
= 0; uint32_t prefixLen
= 0;
924 /* uint32_t rExpsLen = 0; */
925 uint32_t firstLen
= 0, secondLen
= 0;
926 UBool varT
= FALSE
; UBool top_
= TRUE
;
928 UBool startOfRules
= TRUE
;
934 UChar
*rulesCopy
= NULL
;
936 uint32_t UCAdiff
= 0;
937 uint32_t Windiff
= 1;
938 UParseError parseError
;
940 uprv_memset(&src
, 0, sizeof(UColTokenParser
));
943 rules
= ucol_getRules(coll
, &ruleLen
);
945 /*printOutRules(rules);*/
947 if(U_SUCCESS(*status
) && ruleLen
> 0) {
948 rulesCopy
= (UChar
*)uprv_malloc((ruleLen
+UCOL_TOK_EXTRA_RULE_SPACE_SIZE
)*sizeof(UChar
));
949 uprv_memcpy(rulesCopy
, rules
, ruleLen
*sizeof(UChar
));
950 src
.current
= src
.source
= rulesCopy
;
951 src
.end
= rulesCopy
+ruleLen
;
952 src
.extraCurrent
= src
.end
;
953 src
.extraEnd
= src
.end
+UCOL_TOK_EXTRA_RULE_SPACE_SIZE
;
954 *first
= *second
= 0;
956 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
957 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
958 while ((current
= ucol_tok_parseNextToken(&src
, startOfRules
, &parseError
,status
)) != NULL
) {
959 strength
= src
.parsedToken
.strength
;
960 chOffset
= src
.parsedToken
.charsOffset
;
961 chLen
= src
.parsedToken
.charsLen
;
962 exOffset
= src
.parsedToken
.extensionOffset
;
963 exLen
= src
.parsedToken
.extensionLen
;
964 prefixOffset
= src
.parsedToken
.prefixOffset
;
965 prefixLen
= src
.parsedToken
.prefixLen
;
966 specs
= src
.parsedToken
.flags
;
968 startOfRules
= FALSE
;
969 varT
= (UBool
)((specs
& UCOL_TOK_VARIABLE_TOP
) != 0);
970 top_
= (UBool
)((specs
& UCOL_TOK_TOP
) != 0);
972 u_strncpy(second
,src
.source
+chOffset
, chLen
);
977 u_strncat(first
, src
.source
+exOffset
, exLen
);
978 first
[firstLen
+exLen
] = 0;
982 if(strength
!= UCOL_TOK_RESET
) {
983 if((*first
<0x3400 || *first
>=0xa000) && (*second
<0x3400 || *second
>=0xa000)) {
984 UCAdiff
+= testSwitch(&ucaTest
, (void *)UCA
, 0, strength
, first
, second
, refName
, error
);
985 /*Windiff += testSwitch(&winTest, (void *)lcid, 0, strength, first, second, "Win32");*/
991 u_strcpy(first
, second
);
994 if(UCAdiff
!= 0 && Windiff
!= 0) {
998 log_verbose("No immediate difference with %s!\n", refName
);
1001 log_verbose("No immediate difference with Win32!\n");
1003 uprv_free(src
.source
);
1004 uprv_free(src
.reorderCodes
);
1009 * Takes two CEs (lead and continuation) and
1010 * compares them as CEs should be compared:
1011 * primary vs. primary, secondary vs. secondary
1012 * tertiary vs. tertiary
1014 static int32_t compareCEs(uint32_t s1
, uint32_t s2
,
1015 uint32_t t1
, uint32_t t2
) {
1016 uint32_t s
= 0, t
= 0;
1017 if(s1
== t1
&& s2
== t2
) {
1020 s
= (s1
& 0xFFFF0000)|((s2
& 0xFFFF0000)>>16);
1021 t
= (t1
& 0xFFFF0000)|((t2
& 0xFFFF0000)>>16);
1027 s
= (s1
& 0x0000FF00) | (s2
& 0x0000FF00)>>8;
1028 t
= (t1
& 0x0000FF00) | (t2
& 0x0000FF00)>>8;
1034 s
= (s1
& 0x000000FF)<<8 | (s2
& 0x000000FF);
1035 t
= (t1
& 0x000000FF)<<8 | (t2
& 0x000000FF);
1047 uint32_t startContCE
;
1049 uint32_t limitContCE
;
1050 } indirectBoundaries
;
1052 /* these values are used for finding CE values for indirect positioning. */
1053 /* Indirect positioning is a mechanism for allowing resets on symbolic */
1054 /* values. It only works for resets and you cannot tailor indirect names */
1055 /* An indirect name can define either an anchor point or a range. An */
1056 /* anchor point behaves in exactly the same way as a code point in reset */
1057 /* would, except that it cannot be tailored. A range (we currently only */
1058 /* know for the [top] range will explicitly set the upper bound for */
1059 /* generated CEs, thus allowing for better control over how many CEs can */
1060 /* be squeezed between in the range without performance penalty. */
1061 /* In that respect, we use [top] for tailoring of locales that use CJK */
1062 /* characters. Other indirect values are currently a pure convenience, */
1063 /* they can be used to assure that the CEs will be always positioned in */
1064 /* the same place relative to a point with known properties (e.g. first */
1065 /* primary ignorable). */
1066 static indirectBoundaries ucolIndirectBoundaries
[15];
1067 static UBool indirectBoundariesSet
= FALSE
;
1068 static void setIndirectBoundaries(uint32_t indexR
, uint32_t *start
, uint32_t *end
) {
1069 /* Set values for the top - TODO: once we have values for all the indirects, we are going */
1070 /* to initalize here. */
1071 ucolIndirectBoundaries
[indexR
].startCE
= start
[0];
1072 ucolIndirectBoundaries
[indexR
].startContCE
= start
[1];
1074 ucolIndirectBoundaries
[indexR
].limitCE
= end
[0];
1075 ucolIndirectBoundaries
[indexR
].limitContCE
= end
[1];
1077 ucolIndirectBoundaries
[indexR
].limitCE
= 0;
1078 ucolIndirectBoundaries
[indexR
].limitContCE
= 0;
1082 static void testCEs(UCollator
*coll
, UErrorCode
*status
) {
1083 const UChar
*rules
= NULL
, *current
= NULL
;
1084 int32_t ruleLen
= 0;
1086 uint32_t strength
= 0;
1087 uint32_t maxStrength
= UCOL_IDENTICAL
;
1088 uint32_t baseCE
, baseContCE
, nextCE
, nextContCE
, currCE
, currContCE
;
1090 uint32_t lastContCE
;
1093 uint32_t chOffset
= 0; uint32_t chLen
= 0;
1094 uint32_t exOffset
= 0; uint32_t exLen
= 0;
1095 uint32_t prefixOffset
= 0; uint32_t prefixLen
= 0;
1096 uint32_t oldOffset
= 0;
1098 /* uint32_t rExpsLen = 0; */
1099 /* uint32_t firstLen = 0; */
1101 UBool varT
= FALSE
; UBool top_
= TRUE
;
1102 UBool startOfRules
= TRUE
;
1103 UBool before
= FALSE
;
1104 UColTokenParser src
;
1106 UParseError parseError
;
1107 UChar
*rulesCopy
= NULL
;
1108 collIterate
*c
= uprv_new_collIterate(status
);
1109 UCAConstants
*consts
= NULL
;
1110 uint32_t UCOL_RESET_TOP_VALUE
, /*UCOL_RESET_TOP_CONT, */
1111 UCOL_NEXT_TOP_VALUE
, UCOL_NEXT_TOP_CONT
;
1113 UCollator
*UCA
= ucol_open("root", status
);
1115 if (U_FAILURE(*status
)) {
1116 log_err("Could not open root collator %s\n", u_errorName(*status
));
1117 uprv_delete_collIterate(c
);
1121 colLoc
= ucol_getLocaleByType(coll
, ULOC_ACTUAL_LOCALE
, status
);
1122 if (U_FAILURE(*status
)) {
1123 log_err("Could not get collator name: %s\n", u_errorName(*status
));
1125 uprv_delete_collIterate(c
);
1129 uprv_memset(&src
, 0, sizeof(UColTokenParser
));
1131 consts
= (UCAConstants
*)((uint8_t *)UCA
->image
+ UCA
->image
->UCAConsts
);
1132 UCOL_RESET_TOP_VALUE
= consts
->UCA_LAST_NON_VARIABLE
[0];
1133 /*UCOL_RESET_TOP_CONT = consts->UCA_LAST_NON_VARIABLE[1]; */
1134 UCOL_NEXT_TOP_VALUE
= consts
->UCA_FIRST_IMPLICIT
[0];
1135 UCOL_NEXT_TOP_CONT
= consts
->UCA_FIRST_IMPLICIT
[1];
1137 baseCE
=baseContCE
=nextCE
=nextContCE
=currCE
=currContCE
=lastCE
=lastContCE
= UCOL_NOT_FOUND
;
1141 rules
= ucol_getRules(coll
, &ruleLen
);
1143 src
.invUCA
= ucol_initInverseUCA(status
);
1145 if(indirectBoundariesSet
== FALSE
) {
1146 /* UCOL_RESET_TOP_VALUE */
1147 setIndirectBoundaries(0, consts
->UCA_LAST_NON_VARIABLE
, consts
->UCA_FIRST_IMPLICIT
);
1148 /* UCOL_FIRST_PRIMARY_IGNORABLE */
1149 setIndirectBoundaries(1, consts
->UCA_FIRST_PRIMARY_IGNORABLE
, 0);
1150 /* UCOL_LAST_PRIMARY_IGNORABLE */
1151 setIndirectBoundaries(2, consts
->UCA_LAST_PRIMARY_IGNORABLE
, 0);
1152 /* UCOL_FIRST_SECONDARY_IGNORABLE */
1153 setIndirectBoundaries(3, consts
->UCA_FIRST_SECONDARY_IGNORABLE
, 0);
1154 /* UCOL_LAST_SECONDARY_IGNORABLE */
1155 setIndirectBoundaries(4, consts
->UCA_LAST_SECONDARY_IGNORABLE
, 0);
1156 /* UCOL_FIRST_TERTIARY_IGNORABLE */
1157 setIndirectBoundaries(5, consts
->UCA_FIRST_TERTIARY_IGNORABLE
, 0);
1158 /* UCOL_LAST_TERTIARY_IGNORABLE */
1159 setIndirectBoundaries(6, consts
->UCA_LAST_TERTIARY_IGNORABLE
, 0);
1160 /* UCOL_FIRST_VARIABLE */
1161 setIndirectBoundaries(7, consts
->UCA_FIRST_VARIABLE
, 0);
1162 /* UCOL_LAST_VARIABLE */
1163 setIndirectBoundaries(8, consts
->UCA_LAST_VARIABLE
, 0);
1164 /* UCOL_FIRST_NON_VARIABLE */
1165 setIndirectBoundaries(9, consts
->UCA_FIRST_NON_VARIABLE
, 0);
1166 /* UCOL_LAST_NON_VARIABLE */
1167 setIndirectBoundaries(10, consts
->UCA_LAST_NON_VARIABLE
, consts
->UCA_FIRST_IMPLICIT
);
1168 /* UCOL_FIRST_IMPLICIT */
1169 setIndirectBoundaries(11, consts
->UCA_FIRST_IMPLICIT
, 0);
1170 /* UCOL_LAST_IMPLICIT */
1171 setIndirectBoundaries(12, consts
->UCA_LAST_IMPLICIT
, consts
->UCA_FIRST_TRAILING
);
1172 /* UCOL_FIRST_TRAILING */
1173 setIndirectBoundaries(13, consts
->UCA_FIRST_TRAILING
, 0);
1174 /* UCOL_LAST_TRAILING */
1175 setIndirectBoundaries(14, consts
->UCA_LAST_TRAILING
, 0);
1176 ucolIndirectBoundaries
[14].limitCE
= (consts
->UCA_PRIMARY_SPECIAL_MIN
<<24);
1177 indirectBoundariesSet
= TRUE
;
1181 if(U_SUCCESS(*status
) && ruleLen
> 0) {
1182 rulesCopy
= (UChar
*)uprv_malloc((ruleLen
+UCOL_TOK_EXTRA_RULE_SPACE_SIZE
)*sizeof(UChar
));
1183 uprv_memcpy(rulesCopy
, rules
, ruleLen
*sizeof(UChar
));
1184 src
.current
= src
.source
= rulesCopy
;
1185 src
.end
= rulesCopy
+ruleLen
;
1186 src
.extraCurrent
= src
.end
;
1187 src
.extraEnd
= src
.end
+UCOL_TOK_EXTRA_RULE_SPACE_SIZE
;
1189 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
1190 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
1191 while ((current
= ucol_tok_parseNextToken(&src
, startOfRules
, &parseError
,status
)) != NULL
) {
1192 strength
= src
.parsedToken
.strength
;
1193 chOffset
= src
.parsedToken
.charsOffset
;
1194 chLen
= src
.parsedToken
.charsLen
;
1195 exOffset
= src
.parsedToken
.extensionOffset
;
1196 exLen
= src
.parsedToken
.extensionLen
;
1197 prefixOffset
= src
.parsedToken
.prefixOffset
;
1198 prefixLen
= src
.parsedToken
.prefixLen
;
1199 specs
= src
.parsedToken
.flags
;
1201 startOfRules
= FALSE
;
1202 varT
= (UBool
)((specs
& UCOL_TOK_VARIABLE_TOP
) != 0);
1203 top_
= (UBool
)((specs
& UCOL_TOK_TOP
) != 0);
1205 uprv_init_collIterate(coll
, src
.source
+chOffset
, chLen
, c
, status
);
1207 currCE
= ucol_getNextCE(coll
, c
, status
);
1208 if(currCE
== 0 && UCOL_ISTHAIPREVOWEL(*(src
.source
+chOffset
))) {
1209 log_verbose("Thai prevowel detected. Will pick next CE\n");
1210 currCE
= ucol_getNextCE(coll
, c
, status
);
1213 currContCE
= ucol_getNextCE(coll
, c
, status
);
1214 if(!isContinuation(currContCE
)) {
1218 /* we need to repack CEs here */
1220 if(strength
== UCOL_TOK_RESET
) {
1221 before
= (UBool
)((specs
& UCOL_TOK_BEFORE
) != 0);
1223 int32_t tokenIndex
= src
.parsedToken
.indirectIndex
;
1225 nextCE
= baseCE
= currCE
= ucolIndirectBoundaries
[tokenIndex
].startCE
;
1226 nextContCE
= baseContCE
= currContCE
= ucolIndirectBoundaries
[tokenIndex
].startContCE
;
1228 nextCE
= baseCE
= currCE
;
1229 nextContCE
= baseContCE
= currContCE
;
1231 maxStrength
= UCOL_IDENTICAL
;
1233 if(strength
< maxStrength
) {
1234 maxStrength
= strength
;
1235 if(baseCE
== UCOL_RESET_TOP_VALUE
) {
1236 log_verbose("Resetting to [top]\n");
1237 nextCE
= UCOL_NEXT_TOP_VALUE
;
1238 nextContCE
= UCOL_NEXT_TOP_CONT
;
1240 result
= ucol_inv_getNextCE(&src
, baseCE
& 0xFFFFFF3F, baseContCE
, &nextCE
, &nextContCE
, maxStrength
);
1243 if(ucol_isTailored(coll
, *(src
.source
+oldOffset
), status
)) {
1244 log_verbose("Reset is tailored codepoint %04X, don't know how to continue, taking next test\n", *(src
.source
+oldOffset
));
1247 log_err("%s: couldn't find the CE\n", colLoc
);
1253 currCE
&= 0xFFFFFF3F;
1254 currContCE
&= 0xFFFFFFBF;
1256 if(maxStrength
== UCOL_IDENTICAL
) {
1257 if(baseCE
!= currCE
|| baseContCE
!= currContCE
) {
1258 log_err("%s: current CE (initial strength UCOL_EQUAL)\n", colLoc
);
1261 if(strength
== UCOL_IDENTICAL
) {
1262 if(lastCE
!= currCE
|| lastContCE
!= currContCE
) {
1263 log_err("%s: current CE (initial strength UCOL_EQUAL)\n", colLoc
);
1266 if(compareCEs(currCE
, currContCE
, nextCE
, nextContCE
) > 0) {
1267 /*if(currCE > nextCE || (currCE == nextCE && currContCE >= nextContCE)) {*/
1268 log_err("%s: current CE is not less than base CE\n", colLoc
);
1271 if(compareCEs(currCE
, currContCE
, lastCE
, lastContCE
) < 0) {
1272 /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
1273 log_err("%s: sequence of generated CEs is broken\n", colLoc
);
1277 if(compareCEs(currCE
, currContCE
, lastCE
, lastContCE
) > 0) {
1278 /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
1279 log_err("%s: sequence of generated CEs is broken\n", colLoc
);
1287 oldOffset
= chOffset
;
1288 lastCE
= currCE
& 0xFFFFFF3F;
1289 lastContCE
= currContCE
& 0xFFFFFFBF;
1291 uprv_free(src
.source
);
1292 uprv_free(src
.reorderCodes
);
1295 uprv_delete_collIterate(c
);
1299 /* these locales are now picked from index RB */
1300 static const char* localesToTest
[] = {
1301 "ar", "bg", "ca", "cs", "da",
1302 "el", "en_BE", "en_US_POSIX",
1303 "es", "et", "fi", "fr", "hi",
1304 "hr", "hu", "is", "iw", "ja",
1305 "ko", "lt", "lv", "mk", "mt",
1306 "nb", "nn", "nn_NO", "pl", "ro",
1307 "ru", "sh", "sk", "sl", "sq",
1308 "sr", "sv", "th", "tr", "uk",
1313 static const char* rulesToTest
[] = {
1315 "&\\u0622 < \\u0627 << \\u0671 < \\u0621",
1317 /* Cui Mins rules */
1318 "&[top]<o,O<p,P<q,Q<'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu<'?'",*/
1319 "&[top]<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
1320 "&[top]<o,O<p,P<q,Q,'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U&'Qu','?'",*/
1321 "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
1322 "&[top]<'?';Qu<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qu",*/
1323 "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/um<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qum;'?'",*/
1324 "&[top]<'?';Qum<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U" /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qum"*/
1328 static void TestCollations(void) {
1329 int32_t noOfLoc
= uloc_countAvailable();
1330 int32_t i
= 0, j
= 0;
1332 UErrorCode status
= U_ZERO_ERROR
;
1338 const char *locName
= NULL
;
1339 UCollator
*coll
= NULL
;
1340 UCollator
*UCA
= ucol_open("", &status
);
1341 UColAttributeValue oldStrength
= ucol_getAttribute(UCA
, UCOL_STRENGTH
, &status
);
1342 if (U_FAILURE(status
)) {
1343 log_err_status(status
, "Could not open UCA collator %s\n", u_errorName(status
));
1346 ucol_setAttribute(UCA
, UCOL_STRENGTH
, UCOL_QUATERNARY
, &status
);
1348 for(i
= 0; i
<noOfLoc
; i
++) {
1349 status
= U_ZERO_ERROR
;
1350 locName
= uloc_getAvailable(i
);
1351 if(uprv_strcmp("ja", locName
) == 0) {
1352 log_verbose("Don't know how to test prefixes\n");
1355 if(hasCollationElements(locName
)) {
1356 nameSize
= uloc_getDisplayName(locName
, NULL
, name
, 256, &status
);
1357 for(j
= 0; j
<nameSize
; j
++) {
1358 cName
[j
] = (char)name
[j
];
1360 cName
[nameSize
] = 0;
1361 log_verbose("\nTesting locale %s (%s)\n", locName
, cName
);
1362 coll
= ucol_open(locName
, &status
);
1363 if(U_SUCCESS(status
)) {
1364 testAgainstUCA(coll
, UCA
, "UCA", FALSE
, &status
);
1367 log_err("Couldn't instantiate collator for locale %s, error: %s\n", locName
, u_errorName(status
));
1368 status
= U_ZERO_ERROR
;
1372 ucol_setAttribute(UCA
, UCOL_STRENGTH
, oldStrength
, &status
);
1376 static void RamsRulesTest(void) {
1377 UErrorCode status
= U_ZERO_ERROR
;
1379 UCollator
*coll
= NULL
;
1382 int32_t noOfLoc
= uloc_countAvailable();
1383 const char *locName
= NULL
;
1385 log_verbose("RamsRulesTest\n");
1387 if (uprv_strcmp("km", uloc_getDefault())==0 || uprv_strcmp("km_KH", uloc_getDefault())==0) {
1388 /* This test will fail if the default locale is "km" or "km_KH". Enable after trac#6040. */
1392 for(i
= 0; i
<noOfLoc
; i
++) {
1393 locName
= uloc_getAvailable(i
);
1394 if(hasCollationElements(locName
)) {
1395 if (uprv_strcmp("ja", locName
)==0) {
1396 log_verbose("Don't know how to test Japanese because of prefixes\n");
1399 if (uprv_strcmp("de__PHONEBOOK", locName
)==0) {
1400 log_verbose("Don't know how to test Phonebook because the reset is on an expanding character\n");
1403 if (uprv_strcmp("bn", locName
)==0 ||
1404 uprv_strcmp("bs", locName
)==0 || /* Add due to import per cldrbug 5647 */
1405 uprv_strcmp("bs_Cyrl", locName
)==0 || /* Add due to import per cldrbug 5647 */
1406 uprv_strcmp("en_US_POSIX", locName
)==0 ||
1407 uprv_strcmp("fa_AF", locName
)==0 || /* Add due to import per cldrbug 5647 */
1408 uprv_strcmp("he", locName
)==0 || /* Add due to new tailoring of \u05F3 vs \u0027 per cldrbug 5576 */
1409 uprv_strcmp("he_IL", locName
)==0 || /* Add due to new tailoring of \u05F3 vs \u0027 per cldrbug 5576 */
1410 uprv_strcmp("km", locName
)==0 ||
1411 uprv_strcmp("km_KH", locName
)==0 ||
1412 uprv_strcmp("my", locName
)==0 ||
1413 uprv_strcmp("si", locName
)==0 ||
1414 uprv_strcmp("si_LK", locName
)==0 ||
1415 uprv_strcmp("sr_Latn", locName
)==0 || /* Add due to import per cldrbug 5647 */
1416 uprv_strcmp("th", locName
)==0 ||
1417 uprv_strcmp("th_TH", locName
)==0 ||
1418 uprv_strcmp("zh", locName
)==0 ||
1419 uprv_strcmp("zh_Hant", locName
)==0
1421 log_verbose("Don't know how to test %s. "
1422 "TODO: Fix ticket #6040 and reenable RamsRulesTest for this locale.\n", locName
);
1425 log_verbose("Testing locale %s\n", locName
);
1426 status
= U_ZERO_ERROR
;
1427 coll
= ucol_open(locName
, &status
);
1428 if(U_SUCCESS(status
)) {
1429 if((status
!= U_USING_DEFAULT_WARNING
) && (status
!= U_USING_FALLBACK_WARNING
)) {
1430 if(coll
->image
->jamoSpecial
== TRUE
) {
1431 log_err("%s has special JAMOs\n", locName
);
1433 ucol_setAttribute(coll
, UCOL_CASE_FIRST
, UCOL_OFF
, &status
);
1434 testCollator(coll
, &status
);
1435 testCEs(coll
, &status
);
1437 log_verbose("Skipping %s: %s\n", locName
, u_errorName(status
));
1441 log_err("Could not open %s: %s\n", locName
, u_errorName(status
));
1446 for(i
= 0; i
<sizeof(rulesToTest
)/sizeof(rulesToTest
[0]); i
++) {
1447 log_verbose("Testing rule: %s\n", rulesToTest
[i
]);
1448 ruleLen
= u_unescape(rulesToTest
[i
], rule
, 2048);
1449 status
= U_ZERO_ERROR
;
1450 coll
= ucol_openRules(rule
, ruleLen
, UCOL_OFF
, UCOL_TERTIARY
, NULL
,&status
);
1451 if(U_SUCCESS(status
)) {
1452 testCollator(coll
, &status
);
1453 testCEs(coll
, &status
);
1456 log_err_status(status
, "Could not test rule: %s: '%s'\n", u_errorName(status
), rulesToTest
[i
]);
1462 static void IsTailoredTest(void) {
1463 UErrorCode status
= U_ZERO_ERROR
;
1465 UCollator
*coll
= NULL
;
1467 UChar tailored
[2048];
1468 UChar notTailored
[2048];
1469 uint32_t ruleLen
, tailoredLen
, notTailoredLen
;
1471 log_verbose("IsTailoredTest\n");
1473 u_uastrcpy(rule
, "&Z < A, B, C;c < d");
1474 ruleLen
= u_strlen(rule
);
1476 u_uastrcpy(tailored
, "ABCcd");
1477 tailoredLen
= u_strlen(tailored
);
1479 u_uastrcpy(notTailored
, "ZabD");
1480 notTailoredLen
= u_strlen(notTailored
);
1482 coll
= ucol_openRules(rule
, ruleLen
, UCOL_OFF
, UCOL_TERTIARY
, NULL
,&status
);
1483 if(U_SUCCESS(status
)) {
1484 for(i
= 0; i
<tailoredLen
; i
++) {
1485 if(!ucol_isTailored(coll
, tailored
[i
], &status
)) {
1486 log_err("%i: %04X should be tailored - it is reported as not\n", i
, tailored
[i
]);
1489 for(i
= 0; i
<notTailoredLen
; i
++) {
1490 if(ucol_isTailored(coll
, notTailored
[i
], &status
)) {
1491 log_err("%i: %04X should not be tailored - it is reported as it is\n", i
, notTailored
[i
]);
1497 log_err_status(status
, "Can't tailor rules\n");
1500 status
= U_ZERO_ERROR
;
1501 coll
= ucol_open("ja", &status
);
1502 if(!ucol_isTailored(coll
, 0x4E9C, &status
)) {
1503 log_err_status(status
, "0x4E9C should be tailored - it is reported as not\n");
1509 const static char chTest
[][20] = {
1512 "ca", "cb", "cx", "cy", "CZ",
1513 "c\\u030C", "C\\u030C",
1516 "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY",
1517 "ch", "cH", "Ch", "CH",
1518 "cha", "charly", "che", "chh", "chch", "chr",
1521 "r\\u030C", "R\\u030C",
1524 "s\\u030C", "S\\u030C",
1526 "z\\u030C", "Z\\u030C"
1529 static void TestChMove(void) {
1530 UChar t1
[256] = {0};
1531 UChar t2
[256] = {0};
1533 uint32_t i
= 0, j
= 0;
1535 UErrorCode status
= U_ZERO_ERROR
;
1537 UCollator
*coll
= ucol_open("cs", &status
);
1539 if(U_SUCCESS(status
)) {
1540 size
= sizeof(chTest
)/sizeof(chTest
[0]);
1541 for(i
= 0; i
< size
-1; i
++) {
1542 for(j
= i
+1; j
< size
; j
++) {
1543 u_unescape(chTest
[i
], t1
, 256);
1544 u_unescape(chTest
[j
], t2
, 256);
1545 doTest(coll
, t1
, t2
, UCOL_LESS
);
1550 log_data_err("Can't open collator");
1558 const static char impTest
[][20] = {
1568 static void TestImplicitTailoring(void) {
1569 static const struct {
1571 const char *data
[10];
1574 { "&[before 1]\\u4e00 < b < c &[before 1]\\u4e00 < d < e", { "d", "e", "b", "c", "\\u4e00"}, 5 },
1575 { "&\\u4e00 < a <<< A < b <<< B", { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 },
1576 { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3},
1577 { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3}
1582 for(i
= 0; i
< sizeof(tests
)/sizeof(tests
[0]); i
++) {
1583 genericRulesStarter(tests
[i
].rules
, tests
[i
].data
, tests
[i
].len
);
1587 UChar t1[256] = {0};
1588 UChar t2[256] = {0};
1590 const char *rule = "&\\u4e00 < a <<< A < b <<< B";
1592 uint32_t i = 0, j = 0;
1594 uint32_t ruleLen = 0;
1595 UErrorCode status = U_ZERO_ERROR;
1596 UCollator *coll = NULL;
1597 ruleLen = u_unescape(rule, t1, 256);
1599 coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
1601 if(U_SUCCESS(status)) {
1602 size = sizeof(impTest)/sizeof(impTest[0]);
1603 for(i = 0; i < size-1; i++) {
1604 for(j = i+1; j < size; j++) {
1605 u_unescape(impTest[i], t1, 256);
1606 u_unescape(impTest[j], t2, 256);
1607 doTest(coll, t1, t2, UCOL_LESS);
1612 log_err("Can't open collator");
1618 static void TestFCDProblem(void) {
1619 UChar t1
[256] = {0};
1620 UChar t2
[256] = {0};
1622 const char *s1
= "\\u0430\\u0306\\u0325";
1623 const char *s2
= "\\u04D1\\u0325";
1625 UErrorCode status
= U_ZERO_ERROR
;
1626 UCollator
*coll
= ucol_open("", &status
);
1627 u_unescape(s1
, t1
, 256);
1628 u_unescape(s2
, t2
, 256);
1630 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_OFF
, &status
);
1631 doTest(coll
, t1
, t2
, UCOL_EQUAL
);
1633 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
1634 doTest(coll
, t1
, t2
, UCOL_EQUAL
);
1640 The largest normalization form is 18 for NFKC/NFKD, 4 for NFD and 3 for NFC
1641 We're only using NFC/NFD in this test.
1643 #define NORM_BUFFER_TEST_LEN 18
1646 UChar NFC
[NORM_BUFFER_TEST_LEN
];
1647 UChar NFD
[NORM_BUFFER_TEST_LEN
];
1650 static void TestComposeDecompose(void) {
1651 /* [[:NFD_Inert=false:][:NFC_Inert=false:]] */
1652 static const UChar UNICODESET_STR
[] = {
1653 0x5B,0x5B,0x3A,0x4E,0x46,0x44,0x5F,0x49,0x6E,0x65,0x72,0x74,0x3D,0x66,0x61,
1654 0x6C,0x73,0x65,0x3A,0x5D,0x5B,0x3A,0x4E,0x46,0x43,0x5F,0x49,0x6E,0x65,0x72,
1655 0x74,0x3D,0x66,0x61,0x6C,0x73,0x65,0x3A,0x5D,0x5D,0
1658 int32_t i
= 0, j
= 0;
1660 UErrorCode status
= U_ZERO_ERROR
;
1661 const char *locName
= NULL
;
1665 uint32_t noCases
= 0;
1666 UCollator
*coll
= NULL
;
1668 UChar comp
[NORM_BUFFER_TEST_LEN
];
1670 UCollationElements
*iter
;
1671 USet
*charsToTest
= uset_openPattern(UNICODESET_STR
, -1, &status
);
1672 int32_t charsToTestSize
;
1674 noOfLoc
= uloc_countAvailable();
1676 coll
= ucol_open("", &status
);
1677 if (U_FAILURE(status
)) {
1678 log_data_err("Error opening collator -> %s (Are you missing data?)\n", u_errorName(status
));
1681 charsToTestSize
= uset_size(charsToTest
);
1682 if (charsToTestSize
<= 0) {
1683 log_err("Set was zero. Missing data?\n");
1686 t
= (tester
**)malloc(charsToTestSize
* sizeof(tester
*));
1687 t
[0] = (tester
*)malloc(sizeof(tester
));
1688 log_verbose("Testing UCA extensively for %d characters\n", charsToTestSize
);
1690 for(u
= 0; u
< charsToTestSize
; u
++) {
1691 UChar32 ch
= uset_charAt(charsToTest
, u
);
1693 U16_APPEND_UNSAFE(comp
, len
, ch
);
1694 nfcSize
= unorm_normalize(comp
, len
, UNORM_NFC
, 0, t
[noCases
]->NFC
, NORM_BUFFER_TEST_LEN
, &status
);
1695 nfdSize
= unorm_normalize(comp
, len
, UNORM_NFD
, 0, t
[noCases
]->NFD
, NORM_BUFFER_TEST_LEN
, &status
);
1697 if(nfcSize
!= nfdSize
|| (uprv_memcmp(t
[noCases
]->NFC
, t
[noCases
]->NFD
, nfcSize
* sizeof(UChar
)) != 0)
1698 || (len
!= nfdSize
|| (uprv_memcmp(comp
, t
[noCases
]->NFD
, nfdSize
* sizeof(UChar
)) != 0))) {
1700 if(len
!= nfdSize
|| (uprv_memcmp(comp
, t
[noCases
]->NFD
, nfdSize
* sizeof(UChar
)) != 0)) {
1701 u_strncpy(t
[noCases
]->NFC
, comp
, len
);
1702 t
[noCases
]->NFC
[len
] = 0;
1705 t
[noCases
] = (tester
*)malloc(sizeof(tester
));
1706 uprv_memset(t
[noCases
], 0, sizeof(tester
));
1709 log_verbose("Testing %d/%d of possible test cases\n", noCases
, charsToTestSize
);
1710 uset_close(charsToTest
);
1713 for(u
=0; u
<(UChar32
)noCases
; u
++) {
1714 if(!ucol_equal(coll
, t
[u
]->NFC
, -1, t
[u
]->NFD
, -1)) {
1715 log_err("Failure: codePoint %05X fails TestComposeDecompose in the UCA\n", t
[u
]->u
);
1716 doTest(coll
, t
[u
]->NFC
, t
[u
]->NFD
, UCOL_EQUAL
);
1720 for(u = 0; u < charsToTestSize; u++) {
1722 log_verbose("%08X ", u);
1724 uprv_memset(t[noCases], 0, sizeof(tester));
1727 U16_APPEND_UNSAFE(comp, len, u);
1729 nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
1730 nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
1731 doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL);
1732 doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL);
1738 log_verbose("Testing locales, number of cases = %i\n", noCases
);
1739 for(i
= 0; i
<noOfLoc
; i
++) {
1740 status
= U_ZERO_ERROR
;
1741 locName
= uloc_getAvailable(i
);
1742 if(hasCollationElements(locName
)) {
1745 int32_t nameSize
= uloc_getDisplayName(locName
, NULL
, name
, sizeof(cName
), &status
);
1747 for(j
= 0; j
<nameSize
; j
++) {
1748 cName
[j
] = (char)name
[j
];
1750 cName
[nameSize
] = 0;
1751 log_verbose("\nTesting locale %s (%s)\n", locName
, cName
);
1753 coll
= ucol_open(locName
, &status
);
1754 ucol_setStrength(coll
, UCOL_IDENTICAL
);
1755 iter
= ucol_openElements(coll
, t
[u
]->NFD
, u_strlen(t
[u
]->NFD
), &status
);
1757 for(u
=0; u
<(UChar32
)noCases
; u
++) {
1758 if(!ucol_equal(coll
, t
[u
]->NFC
, -1, t
[u
]->NFD
, -1)) {
1759 log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t
[u
]->u
, cName
);
1760 doTest(coll
, t
[u
]->NFC
, t
[u
]->NFD
, UCOL_EQUAL
);
1761 log_verbose("Testing NFC\n");
1762 ucol_setText(iter
, t
[u
]->NFC
, u_strlen(t
[u
]->NFC
), &status
);
1764 log_verbose("Testing NFD\n");
1765 ucol_setText(iter
, t
[u
]->NFD
, u_strlen(t
[u
]->NFD
), &status
);
1769 ucol_closeElements(iter
);
1773 for(u
= 0; u
<= (UChar32
)noCases
; u
++) {
1779 static void TestEmptyRule(void) {
1780 UErrorCode status
= U_ZERO_ERROR
;
1781 UChar rulez
[] = { 0 };
1782 UCollator
*coll
= ucol_openRules(rulez
, 0, UCOL_OFF
, UCOL_TERTIARY
,NULL
, &status
);
1787 static void TestUCARules(void) {
1788 UErrorCode status
= U_ZERO_ERROR
;
1791 uint32_t ruleLen
= 0;
1792 UCollator
*UCAfromRules
= NULL
;
1793 UCollator
*coll
= ucol_open("", &status
);
1794 if(status
== U_FILE_ACCESS_ERROR
) {
1795 log_data_err("Is your data around?\n");
1797 } else if(U_FAILURE(status
)) {
1798 log_err("Error opening collator\n");
1801 ruleLen
= ucol_getRulesEx(coll
, UCOL_FULL_RULES
, rules
, 256);
1803 log_verbose("TestUCARules\n");
1805 rules
= (UChar
*)malloc((ruleLen
+1)*sizeof(UChar
));
1806 ruleLen
= ucol_getRulesEx(coll
, UCOL_FULL_RULES
, rules
, ruleLen
);
1808 log_verbose("Rules length is %d\n", ruleLen
);
1809 UCAfromRules
= ucol_openRules(rules
, ruleLen
, UCOL_OFF
, UCOL_TERTIARY
, NULL
,&status
);
1810 if(U_SUCCESS(status
)) {
1811 ucol_close(UCAfromRules
);
1813 log_verbose("Unable to create a collator from UCARules!\n");
1816 u_unescape(blah, b, 256);
1817 ucol_getSortKey(coll, b, 1, res, 256);
1826 /* Pinyin tonal order */
1828 A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0)
1829 (w/macron)< (w/acute)< (w/caron)< (w/grave)
1830 E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8)
1831 I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec)
1832 O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2)
1833 U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9)
1834 < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) <
1837 However, in testing we got the following order:
1838 A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101)
1839 (w/acute)< (w/grave)< (w/caron)< (w/macron)
1840 E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) <
1842 I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b)
1843 O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d)
1844 U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) <
1846 < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b)
1849 static void TestBefore(void) {
1850 const static char *data
[] = {
1851 "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A",
1852 "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E",
1853 "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I",
1854 "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O",
1855 "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U",
1856 "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc"
1858 genericRulesStarter(
1859 "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0"
1860 "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8"
1861 "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec"
1862 "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2"
1863 "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9"
1864 "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc",
1865 data
, sizeof(data
)/sizeof(data
[0]));
1869 /* superceded by TestBeforePinyin */
1870 static void TestJ784(void) {
1871 const static char *data
[] = {
1872 "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0",
1873 "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8",
1874 "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec",
1875 "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2",
1876 "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9",
1878 "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc"
1880 genericLocaleStarter("zh", data
, sizeof(data
)/sizeof(data
[0]));
1885 /* superceded by the changes to the lv locale */
1886 static void TestJ831(void) {
1887 const static char *data
[] = {
1893 genericLocaleStarter("lv", data
, sizeof(data
)/sizeof(data
[0]));
1897 static void TestJ815(void) {
1898 const static char *data
[] = {
1914 genericLocaleStarter("fr", data
, sizeof(data
)/sizeof(data
[0]));
1915 genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data
, sizeof(data
)/sizeof(data
[0]));
1920 "& a < b < c < d& r < c", "& a < b < d& r < c",
1921 "& a < b < c < d& c < m", "& a < b < c < m < d",
1922 "& a < b < c < d& a < m", "& a < m < b < c < d",
1923 "& a <<< b << c < d& a < m", "& a <<< b << c < m < d",
1924 "& a < b < c < d& [before 1] c < m", "& a < b < m < c < d",
1925 "& a < b <<< c << d <<< e& [before 3] e <<< x", "& a < b <<< c << d <<< x <<< e",
1926 "& a < b <<< c << d <<< e& [before 2] e <<< x", "& a < b <<< c <<< x << d <<< e",
1927 "& a < b <<< c << d <<< e& [before 1] e <<< x", "& a <<< x < b <<< c << d <<< e",
1928 "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x", "& a < b <<< c << d <<< e <<< f < x < g",
1930 static void TestRedundantRules(void) {
1933 static const struct {
1935 const char *expectedRules
;
1936 const char *testdata
[8];
1937 uint32_t testdatalen
;
1939 /* this test conflicts with positioning of CODAN placeholder */
1941 "& a <<< b <<< c << d <<< e& [before 1] e <<< x",
1945 /* this test conflicts with the [before x] syntax tightening */
1947 "& b <<< c <<< d << e <<< f& [before 1] f <<< x",
1951 /* this test conflicts with the [before x] syntax tightening */
1953 "& a < b <<< c << d <<< e& [before 1] e <<< x",
1954 "& a <<< x < b <<< c << d <<< e",
1955 {"a", "x", "b", "c", "d", "e"}, 6
1958 "& a < b < c < d& [before 1] c < m",
1959 "& a < b < m < c < d",
1960 {"a", "b", "m", "c", "d"}, 5
1963 "& a < b <<< c << d <<< e& [before 3] e <<< x",
1964 "& a < b <<< c << d <<< x <<< e",
1965 {"a", "b", "c", "d", "x", "e"}, 6
1967 /* this test conflicts with the [before x] syntax tightening */
1969 "& a < b <<< c << d <<< e& [before 2] e <<< x",
1970 "& a < b <<< c <<< x << d <<< e",
1971 {"a", "b", "c", "x", "d", "e"},, 6
1974 "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x",
1975 "& a < b <<< c << d <<< e <<< f < x < g",
1976 {"a", "b", "c", "d", "e", "f", "x", "g"}, 8
1979 "& a <<< b << c < d& a < m",
1980 "& a <<< b << c < m < d",
1981 {"a", "b", "c", "m", "d"}, 5
1984 "&a<b<<b\\u0301 &z<b",
1986 {"a", "b\\u0301", "z", "b"}, 4
1999 "& a < b < c < d& r < c",
2000 "& a < b < d& r < c",
2004 "& a < b < c < d& r < c",
2005 "& a < b < d& r < c",
2009 "& a < b < c < d& c < m",
2010 "& a < b < c < m < d",
2011 {"a", "b", "c", "m", "d"}, 5
2014 "& a < b < c < d& a < m",
2015 "& a < m < b < c < d",
2016 {"a", "m", "b", "c", "d"}, 5
2021 UCollator
*credundant
= NULL
;
2022 UCollator
*cresulting
= NULL
;
2023 UErrorCode status
= U_ZERO_ERROR
;
2024 UChar rlz
[2048] = { 0 };
2027 for(i
= 0; i
<sizeof(tests
)/sizeof(tests
[0]); i
++) {
2028 log_verbose("testing rule %s, expected to be %s\n", tests
[i
].rules
, tests
[i
].expectedRules
);
2029 rlen
= u_unescape(tests
[i
].rules
, rlz
, 2048);
2031 credundant
= ucol_openRules(rlz
, rlen
, UCOL_DEFAULT
, UCOL_DEFAULT
, NULL
,&status
);
2032 if(status
== U_FILE_ACCESS_ERROR
) {
2033 log_data_err("Is your data around?\n");
2035 } else if(U_FAILURE(status
)) {
2036 log_err("Error opening collator\n");
2040 rlen
= u_unescape(tests
[i
].expectedRules
, rlz
, 2048);
2041 cresulting
= ucol_openRules(rlz
, rlen
, UCOL_DEFAULT
, UCOL_DEFAULT
, NULL
,&status
);
2043 testAgainstUCA(cresulting
, credundant
, "expected", TRUE
, &status
);
2045 ucol_close(credundant
);
2046 ucol_close(cresulting
);
2048 log_verbose("testing using data\n");
2050 genericRulesStarter(tests
[i
].rules
, tests
[i
].testdata
, tests
[i
].testdatalen
);
2055 static void TestExpansionSyntax(void) {
2058 const static char *rules
[] = {
2059 "&AE <<< a << b <<< c &d <<< f",
2060 "&AE <<< a <<< b << c << d < e < f <<< g",
2061 "&AE <<< B <<< C / D <<< F"
2064 const static char *expectedRules
[] = {
2065 "&A <<< a / E << b / E <<< c /E &d <<< f",
2066 "&A <<< a / E <<< b / E << c / E << d / E < e < f <<< g",
2067 "&A <<< B / E <<< C / ED <<< F / E"
2070 const static char *testdata
[][8] = {
2071 {"AE", "a", "b", "c"},
2072 {"AE", "a", "b", "c", "d", "e", "f", "g"},
2073 {"AE", "B", "C"} /* / ED <<< F / E"},*/
2076 const static uint32_t testdatalen
[] = {
2084 UCollator
*credundant
= NULL
;
2085 UCollator
*cresulting
= NULL
;
2086 UErrorCode status
= U_ZERO_ERROR
;
2087 UChar rlz
[2048] = { 0 };
2090 for(i
= 0; i
<sizeof(rules
)/sizeof(rules
[0]); i
++) {
2091 log_verbose("testing rule %s, expected to be %s\n", rules
[i
], expectedRules
[i
]);
2092 rlen
= u_unescape(rules
[i
], rlz
, 2048);
2094 credundant
= ucol_openRules(rlz
, rlen
, UCOL_DEFAULT
, UCOL_DEFAULT
, NULL
, &status
);
2095 if(status
== U_FILE_ACCESS_ERROR
) {
2096 log_data_err("Is your data around?\n");
2098 } else if(U_FAILURE(status
)) {
2099 log_err("Error opening collator\n");
2102 rlen
= u_unescape(expectedRules
[i
], rlz
, 2048);
2103 cresulting
= ucol_openRules(rlz
, rlen
, UCOL_DEFAULT
, UCOL_DEFAULT
, NULL
,&status
);
2105 /* testAgainstUCA still doesn't handle expansions correctly, so this is not run */
2106 /* as a hard error test, but only in information mode */
2107 testAgainstUCA(cresulting
, credundant
, "expected", FALSE
, &status
);
2109 ucol_close(credundant
);
2110 ucol_close(cresulting
);
2112 log_verbose("testing using data\n");
2114 genericRulesStarter(rules
[i
], testdata
[i
], testdatalen
[i
]);
2118 static void TestCase(void)
2120 const static UChar gRules
[MAX_TOKEN_LEN
] =
2121 /*" & 0 < 1,\u2461<a,A"*/
2122 { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x0041, 0x0000 };
2124 const static UChar testCase
[][MAX_TOKEN_LEN
] =
2126 /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000},
2127 /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000},
2128 /*2*/ {0x2460 /*circ'1'*/, 0x0061/*'a'*/, 0x0000},
2129 /*3*/ {0x2460 /*circ'1'*/, 0x0041/*'A'*/, 0x0000}
2132 const static UCollationResult caseTestResults
[][9] =
2134 { UCOL_LESS
, UCOL_LESS
, UCOL_LESS
, UCOL_EQUAL
, UCOL_LESS
, UCOL_LESS
, UCOL_EQUAL
, UCOL_EQUAL
, UCOL_LESS
},
2135 { UCOL_GREATER
, UCOL_LESS
, UCOL_LESS
, UCOL_EQUAL
, UCOL_LESS
, UCOL_LESS
, UCOL_EQUAL
, UCOL_EQUAL
, UCOL_GREATER
},
2136 { UCOL_LESS
, UCOL_LESS
, UCOL_LESS
, UCOL_EQUAL
, UCOL_GREATER
, UCOL_LESS
, UCOL_EQUAL
, UCOL_EQUAL
, UCOL_LESS
},
2137 { UCOL_GREATER
, UCOL_LESS
, UCOL_GREATER
, UCOL_EQUAL
, UCOL_LESS
, UCOL_LESS
, UCOL_EQUAL
, UCOL_EQUAL
, UCOL_GREATER
}
2140 const static UColAttributeValue caseTestAttributes
[][2] =
2142 { UCOL_LOWER_FIRST
, UCOL_OFF
},
2143 { UCOL_UPPER_FIRST
, UCOL_OFF
},
2144 { UCOL_LOWER_FIRST
, UCOL_ON
},
2145 { UCOL_UPPER_FIRST
, UCOL_ON
}
2148 UErrorCode status
= U_ZERO_ERROR
;
2149 UCollationElements
*iter
;
2150 UCollator
*myCollation
;
2151 myCollation
= ucol_open("en_US", &status
);
2153 if(U_FAILURE(status
)){
2154 log_err_status(status
, "ERROR: in creation of rule based collator: %s\n", myErrorName(status
));
2157 log_verbose("Testing different case settings\n");
2158 ucol_setStrength(myCollation
, UCOL_TERTIARY
);
2160 for(k
= 0; k
<4; k
++) {
2161 ucol_setAttribute(myCollation
, UCOL_CASE_FIRST
, caseTestAttributes
[k
][0], &status
);
2162 ucol_setAttribute(myCollation
, UCOL_CASE_LEVEL
, caseTestAttributes
[k
][1], &status
);
2163 log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes
[k
][0], caseTestAttributes
[k
][1]);
2164 for (i
= 0; i
< 3 ; i
++) {
2165 for(j
= i
+1; j
<4; j
++) {
2166 doTest(myCollation
, testCase
[i
], testCase
[j
], caseTestResults
[k
][3*i
+j
-1]);
2170 ucol_close(myCollation
);
2172 myCollation
= ucol_openRules(gRules
, u_strlen(gRules
), UCOL_OFF
, UCOL_TERTIARY
,NULL
, &status
);
2173 if(U_FAILURE(status
)){
2174 log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status
));
2177 log_verbose("Testing different case settings with custom rules\n");
2178 ucol_setStrength(myCollation
, UCOL_TERTIARY
);
2180 for(k
= 0; k
<4; k
++) {
2181 ucol_setAttribute(myCollation
, UCOL_CASE_FIRST
, caseTestAttributes
[k
][0], &status
);
2182 ucol_setAttribute(myCollation
, UCOL_CASE_LEVEL
, caseTestAttributes
[k
][1], &status
);
2183 for (i
= 0; i
< 3 ; i
++) {
2184 for(j
= i
+1; j
<4; j
++) {
2185 log_verbose("k:%d, i:%d, j:%d\n", k
, i
, j
);
2186 doTest(myCollation
, testCase
[i
], testCase
[j
], caseTestResults
[k
][3*i
+j
-1]);
2187 iter
=ucol_openElements(myCollation
, testCase
[i
], u_strlen(testCase
[i
]), &status
);
2189 ucol_closeElements(iter
);
2190 iter
=ucol_openElements(myCollation
, testCase
[j
], u_strlen(testCase
[j
]), &status
);
2192 ucol_closeElements(iter
);
2196 ucol_close(myCollation
);
2198 const static char *lowerFirst
[] = {
2214 const static char *upperFirst
[] = {
2229 log_verbose("mixed case test\n");
2230 log_verbose("lower first, case level off\n");
2231 genericRulesStarter("[casefirst lower]&H<ch<<<Ch<<<CH", lowerFirst
, sizeof(lowerFirst
)/sizeof(lowerFirst
[0]));
2232 log_verbose("upper first, case level off\n");
2233 genericRulesStarter("[casefirst upper]&H<ch<<<Ch<<<CH", upperFirst
, sizeof(upperFirst
)/sizeof(upperFirst
[0]));
2234 log_verbose("lower first, case level on\n");
2235 genericRulesStarter("[casefirst lower][caselevel on]&H<ch<<<Ch<<<CH", lowerFirst
, sizeof(lowerFirst
)/sizeof(lowerFirst
[0]));
2236 log_verbose("upper first, case level on\n");
2237 genericRulesStarter("[casefirst upper][caselevel on]&H<ch<<<Ch<<<CH", upperFirst
, sizeof(upperFirst
)/sizeof(upperFirst
[0]));
2242 static void TestIncrementalNormalize(void) {
2244 /*UChar baseA =0x61;*/
2246 /* UChar baseB = 0x42;*/
2247 static const UChar ccMix
[] = {0x316, 0x321, 0x300};
2248 /*UChar ccMix[] = {0x61, 0x61, 0x61};*/
2250 0x316 is combining grave accent below, cc=220
2251 0x321 is combining palatalized hook below, cc=202
2252 0x300 is combining grave accent, cc=230
2255 #define MAXSLEN 2000
2256 /*int maxSLen = 64000;*/
2261 UErrorCode status
= U_ZERO_ERROR
;
2262 UCollationResult result
;
2264 int32_t myQ
= getTestOption(QUICK_OPTION
);
2266 if(getTestOption(QUICK_OPTION
) < 0) {
2267 setTestOption(QUICK_OPTION
, 1);
2271 /* Test 1. Run very long unnormalized strings, to force overflow of*/
2272 /* most buffers along the way.*/
2273 UChar strA
[MAXSLEN
+1];
2274 UChar strB
[MAXSLEN
+1];
2276 coll
= ucol_open("en_US", &status
);
2277 if(status
== U_FILE_ACCESS_ERROR
) {
2278 log_data_err("Is your data around?\n");
2280 } else if(U_FAILURE(status
)) {
2281 log_err("Error opening collator\n");
2284 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
2286 /*for (sLen = 257; sLen<MAXSLEN; sLen++) {*/
2287 /*for (sLen = 4; sLen<MAXSLEN; sLen++) {*/
2288 /*for (sLen = 1000; sLen<1001; sLen++) {*/
2289 for (sLen
= 500; sLen
<501; sLen
++) {
2290 /*for (sLen = 40000; sLen<65000; sLen+=1000) {*/
2293 for (i
=1; i
<=sLen
-1; i
++) {
2294 strA
[i
] = ccMix
[i
% 3];
2295 strB
[sLen
-i
] = ccMix
[i
% 3];
2300 ucol_setStrength(coll
, UCOL_TERTIARY
); /* Do test with default strength, which runs*/
2301 doTest(coll
, strA
, strB
, UCOL_EQUAL
); /* optimized functions in the impl*/
2302 ucol_setStrength(coll
, UCOL_IDENTICAL
); /* Do again with the slow, general impl.*/
2303 doTest(coll
, strA
, strB
, UCOL_EQUAL
);
2307 setTestOption(QUICK_OPTION
, myQ
);
2310 /* Test 2: Non-normal sequence in a string that extends to the last character*/
2311 /* of the string. Checks a couple of edge cases.*/
2314 static const UChar strA
[] = {0x41, 0x41, 0x300, 0x316, 0};
2315 static const UChar strB
[] = {0x41, 0xc0, 0x316, 0};
2316 ucol_setStrength(coll
, UCOL_TERTIARY
);
2317 doTest(coll
, strA
, strB
, UCOL_EQUAL
);
2320 /* Test 3: Non-normal sequence is terminated by a surrogate pair.*/
2324 * test below used a code point from Desseret, which sorts differently
2327 /*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/
2328 static const UChar strA
[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0};
2329 static const UChar strB
[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0};
2330 ucol_setStrength(coll
, UCOL_TERTIARY
);
2331 doTest(coll
, strA
, strB
, UCOL_GREATER
);
2334 /* Test 4: Imbedded nulls do not terminate a string when length is specified.*/
2337 static const UChar strA
[] = {0x41, 0x00, 0x42, 0x00};
2338 static const UChar strB
[] = {0x41, 0x00, 0x00, 0x00};
2345 /* there used to be -3 here. Hmmmm.... */
2346 /*result = ucol_strcoll(coll, strA, -3, strB, -3);*/
2347 result
= ucol_strcoll(coll
, strA
, 3, strB
, 3);
2348 if (result
!= UCOL_GREATER
) {
2349 log_err("ERROR 1 in test 4\n");
2351 result
= ucol_strcoll(coll
, strA
, -1, strB
, -1);
2352 if (result
!= UCOL_EQUAL
) {
2353 log_err("ERROR 2 in test 4\n");
2356 ucol_getSortKey(coll
, strA
, 3, (uint8_t *)sortKeyA
, sizeof(sortKeyA
));
2357 ucol_getSortKey(coll
, strA
, -1, (uint8_t *)sortKeyAz
, sizeof(sortKeyAz
));
2358 ucol_getSortKey(coll
, strB
, 3, (uint8_t *)sortKeyB
, sizeof(sortKeyB
));
2359 ucol_getSortKey(coll
, strB
, -1, (uint8_t *)sortKeyBz
, sizeof(sortKeyBz
));
2361 r
= strcmp(sortKeyA
, sortKeyAz
);
2363 log_err("Error 3 in test 4\n");
2365 r
= strcmp(sortKeyA
, sortKeyB
);
2367 log_err("Error 4 in test 4\n");
2369 r
= strcmp(sortKeyAz
, sortKeyBz
);
2371 log_err("Error 5 in test 4\n");
2374 ucol_setStrength(coll
, UCOL_IDENTICAL
);
2375 ucol_getSortKey(coll
, strA
, 3, (uint8_t *)sortKeyA
, sizeof(sortKeyA
));
2376 ucol_getSortKey(coll
, strA
, -1, (uint8_t *)sortKeyAz
, sizeof(sortKeyAz
));
2377 ucol_getSortKey(coll
, strB
, 3, (uint8_t *)sortKeyB
, sizeof(sortKeyB
));
2378 ucol_getSortKey(coll
, strB
, -1, (uint8_t *)sortKeyBz
, sizeof(sortKeyBz
));
2380 r
= strcmp(sortKeyA
, sortKeyAz
);
2382 log_err("Error 6 in test 4\n");
2384 r
= strcmp(sortKeyA
, sortKeyB
);
2386 log_err("Error 7 in test 4\n");
2388 r
= strcmp(sortKeyAz
, sortKeyBz
);
2390 log_err("Error 8 in test 4\n");
2392 ucol_setStrength(coll
, UCOL_TERTIARY
);
2396 /* Test 5: Null characters in non-normal source strings.*/
2399 static const UChar strA
[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00};
2400 static const UChar strB
[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00};
2407 result
= ucol_strcoll(coll
, strA
, 6, strB
, 6);
2408 if (result
!= UCOL_GREATER
) {
2409 log_err("ERROR 1 in test 5\n");
2411 result
= ucol_strcoll(coll
, strA
, -1, strB
, -1);
2412 if (result
!= UCOL_EQUAL
) {
2413 log_err("ERROR 2 in test 5\n");
2416 ucol_getSortKey(coll
, strA
, 6, (uint8_t *)sortKeyA
, sizeof(sortKeyA
));
2417 ucol_getSortKey(coll
, strA
, -1, (uint8_t *)sortKeyAz
, sizeof(sortKeyAz
));
2418 ucol_getSortKey(coll
, strB
, 6, (uint8_t *)sortKeyB
, sizeof(sortKeyB
));
2419 ucol_getSortKey(coll
, strB
, -1, (uint8_t *)sortKeyBz
, sizeof(sortKeyBz
));
2421 r
= strcmp(sortKeyA
, sortKeyAz
);
2423 log_err("Error 3 in test 5\n");
2425 r
= strcmp(sortKeyA
, sortKeyB
);
2427 log_err("Error 4 in test 5\n");
2429 r
= strcmp(sortKeyAz
, sortKeyBz
);
2431 log_err("Error 5 in test 5\n");
2434 ucol_setStrength(coll
, UCOL_IDENTICAL
);
2435 ucol_getSortKey(coll
, strA
, 6, (uint8_t *)sortKeyA
, sizeof(sortKeyA
));
2436 ucol_getSortKey(coll
, strA
, -1, (uint8_t *)sortKeyAz
, sizeof(sortKeyAz
));
2437 ucol_getSortKey(coll
, strB
, 6, (uint8_t *)sortKeyB
, sizeof(sortKeyB
));
2438 ucol_getSortKey(coll
, strB
, -1, (uint8_t *)sortKeyBz
, sizeof(sortKeyBz
));
2440 r
= strcmp(sortKeyA
, sortKeyAz
);
2442 log_err("Error 6 in test 5\n");
2444 r
= strcmp(sortKeyA
, sortKeyB
);
2446 log_err("Error 7 in test 5\n");
2448 r
= strcmp(sortKeyAz
, sortKeyBz
);
2450 log_err("Error 8 in test 5\n");
2452 ucol_setStrength(coll
, UCOL_TERTIARY
);
2456 /* Test 6: Null character as base of a non-normal combining sequence.*/
2459 static const UChar strA
[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00};
2460 static const UChar strB
[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00};
2462 result
= ucol_strcoll(coll
, strA
, 5, strB
, 5);
2463 if (result
!= UCOL_LESS
) {
2464 log_err("Error 1 in test 6\n");
2466 result
= ucol_strcoll(coll
, strA
, -1, strB
, -1);
2467 if (result
!= UCOL_EQUAL
) {
2468 log_err("Error 2 in test 6\n");
2478 static void TestGetCaseBit(void) {
2479 static const char *caseBitData
[] = {
2480 "a", "A", "ch", "Ch", "CH",
2481 "\\uFF9E", "\\u0009"
2484 static const uint8_t results
[] = {
2485 UCOL_LOWER_CASE
, UCOL_UPPER_CASE
, UCOL_LOWER_CASE
, UCOL_MIXED_CASE
, UCOL_UPPER_CASE
,
2486 UCOL_UPPER_CASE
, UCOL_LOWER_CASE
2489 uint32_t i
, blen
= 0;
2491 UErrorCode status
= U_ZERO_ERROR
;
2492 UCollator
*UCA
= ucol_open("", &status
);
2495 for(i
= 0; i
<sizeof(results
)/sizeof(results
[0]); i
++) {
2496 blen
= u_unescape(caseBitData
[i
], b
, 256);
2497 res
= ucol_uprv_getCaseBits(UCA
, b
, blen
, &status
);
2498 if(results
[i
] != res
) {
2499 log_err("Expected case = %02X, got %02X for %04X\n", results
[i
], res
, b
[0]);
2505 static void TestHangulTailoring(void) {
2506 static const char *koreanData
[] = {
2507 "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53ef", "\\u5475",
2508 "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\\u67b7", "\\u67ef",
2509 "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\\u8857", "\\u8888",
2510 "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5",
2511 "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\\u659D", "\\u698E",
2512 "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\\u8B0C"
2516 "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 <<< \\u53ef <<< \\u5475 "
2517 "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef "
2518 "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 "
2519 "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 "
2520 "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E "
2521 "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C";
2524 UErrorCode status
= U_ZERO_ERROR
;
2525 UChar rlz
[2048] = { 0 };
2526 uint32_t rlen
= u_unescape(rules
, rlz
, 2048);
2528 UCollator
*coll
= ucol_openRules(rlz
, rlen
, UCOL_DEFAULT
, UCOL_DEFAULT
,NULL
, &status
);
2529 if(status
== U_FILE_ACCESS_ERROR
) {
2530 log_data_err("Is your data around?\n");
2532 } else if(U_FAILURE(status
)) {
2533 log_err("Error opening collator\n");
2537 log_verbose("Using start of korean rules\n");
2539 if(U_SUCCESS(status
)) {
2540 genericOrderingTest(coll
, koreanData
, sizeof(koreanData
)/sizeof(koreanData
[0]));
2542 log_err("Unable to open collator with rules %s\n", rules
);
2545 log_verbose("Setting jamoSpecial to TRUE and testing once more\n");
2546 ((UCATableHeader
*)coll
->image
)->jamoSpecial
= TRUE
; /* don't try this at home */
2547 genericOrderingTest(coll
, koreanData
, sizeof(koreanData
)/sizeof(koreanData
[0]));
2551 log_verbose("Using ko__LOTUS locale\n");
2552 genericLocaleStarter("ko__LOTUS", koreanData
, sizeof(koreanData
)/sizeof(koreanData
[0]));
2555 static void TestCompressOverlap(void) {
2558 UErrorCode status
= U_ZERO_ERROR
;
2565 coll
= ucol_open("", &status
);
2567 if (U_FAILURE(status
)) {
2568 log_err_status(status
, "Collator can't be created -> %s\n", u_errorName(status
));
2571 while (count
< 149) {
2572 secstr
[count
] = 0x0020; /* [06, 05, 05] */
2573 tertstr
[count
] = 0x0020;
2577 /* top down compression ----------------------------------- */
2578 secstr
[count
] = 0x0332; /* [, 87, 05] */
2579 tertstr
[count
] = 0x3000; /* [06, 05, 07] */
2581 /* no compression secstr should have 150 secondary bytes, tertstr should
2582 have 150 tertiary bytes.
2583 with correct overlapping compression, secstr should have 4 secondary
2584 bytes, tertstr should have > 2 tertiary bytes */
2585 resultlen
= ucol_getSortKey(coll
, secstr
, 150, (uint8_t *)result
, 250);
2586 tempptr
= uprv_strchr(result
, 1) + 1;
2587 while (*(tempptr
+ 1) != 1) {
2588 /* the last secondary collation element is not checked since it is not
2589 part of the compression */
2590 if (*tempptr
< UCOL_COMMON_TOP2
- UCOL_TOP_COUNT2
) {
2591 log_err("Secondary compression overlapped\n");
2596 /* tertiary top/bottom/common for en_US is similar to the secondary
2597 top/bottom/common */
2598 resultlen
= ucol_getSortKey(coll
, tertstr
, 150, (uint8_t *)result
, 250);
2599 tempptr
= uprv_strrchr(result
, 1) + 1;
2600 while (*(tempptr
+ 1) != 0) {
2601 /* the last secondary collation element is not checked since it is not
2602 part of the compression */
2603 if (*tempptr
< coll
->tertiaryTop
- coll
->tertiaryTopCount
) {
2604 log_err("Tertiary compression overlapped\n");
2609 /* bottom up compression ------------------------------------- */
2612 resultlen
= ucol_getSortKey(coll
, secstr
, 150, (uint8_t *)result
, 250);
2613 tempptr
= uprv_strchr(result
, 1) + 1;
2614 while (*(tempptr
+ 1) != 1) {
2615 /* the last secondary collation element is not checked since it is not
2616 part of the compression */
2617 if (*tempptr
> UCOL_COMMON_BOT2
+ UCOL_BOT_COUNT2
) {
2618 log_err("Secondary compression overlapped\n");
2623 /* tertiary top/bottom/common for en_US is similar to the secondary
2624 top/bottom/common */
2625 resultlen
= ucol_getSortKey(coll
, tertstr
, 150, (uint8_t *)result
, 250);
2626 tempptr
= uprv_strrchr(result
, 1) + 1;
2627 while (*(tempptr
+ 1) != 0) {
2628 /* the last secondary collation element is not checked since it is not
2629 part of the compression */
2630 if (*tempptr
> coll
->tertiaryBottom
+ coll
->tertiaryBottomCount
) {
2631 log_err("Tertiary compression overlapped\n");
2639 static void TestCyrillicTailoring(void) {
2640 static const char *test
[] = {
2646 /* Russian overrides contractions, so this test is not valid anymore */
2647 /*genericLocaleStarter("ru", test, 3);*/
2649 genericLocaleStarter("root", test
, 3);
2650 genericRulesStarter("&\\u0410 = \\u0410", test
, 3);
2651 genericRulesStarter("&Z < \\u0410", test
, 3);
2652 genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test
, 3);
2653 genericRulesStarter("&Z < \\u0410 < \\u04d0", test
, 3);
2654 genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test
, 3);
2655 genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test
, 3);
2658 static void TestSuppressContractions(void) {
2660 static const char *testNoCont2
[] = {
2665 static const char *testNoCont
[] = {
2668 "\\uFF21\\u0410\\u0302"
2671 genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont
, 3);
2672 genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2
, 3);
2675 static void TestContraction(void) {
2676 const static char *testrules
[] = {
2678 "&A = A\\u0306/\\u0306",
2681 const static UChar testdata
[][2] = {
2682 {0x0041 /* 'A' */, 0x0042 /* 'B' */},
2683 {0x0041 /* 'A' */, 0x0306 /* combining breve */},
2684 {0x0063 /* 'c' */, 0x0068 /* 'h' */}
2686 const static UChar testdata2
[][2] = {
2687 {0x0063 /* 'c' */, 0x0067 /* 'g' */},
2688 {0x0063 /* 'c' */, 0x0068 /* 'h' */},
2689 {0x0063 /* 'c' */, 0x006C /* 'l' */}
2691 const static char *testrules3
[] = {
2692 "&z < xyz &xyzw << B",
2693 "&z < xyz &xyz << B / w",
2694 "&z < ch &achm << B",
2695 "&z < ch &a << B / chm",
2696 "&\\ud800\\udc00w << B",
2697 "&\\ud800\\udc00 << B / w",
2698 "&a\\ud800\\udc00m << B",
2699 "&a << B / \\ud800\\udc00m",
2702 UErrorCode status
= U_ZERO_ERROR
;
2704 UChar rule
[256] = {0};
2708 for (i
= 0; i
< sizeof(testrules
) / sizeof(testrules
[0]); i
++) {
2709 UCollationElements
*iter1
;
2711 log_verbose("Rule %s for testing\n", testrules
[i
]);
2712 rlen
= u_unescape(testrules
[i
], rule
, 32);
2713 coll
= ucol_openRules(rule
, rlen
, UCOL_ON
, UCOL_TERTIARY
,NULL
, &status
);
2714 if (U_FAILURE(status
)) {
2715 log_err_status(status
, "Collator creation failed %s -> %s\n", testrules
[i
], u_errorName(status
));
2718 iter1
= ucol_openElements(coll
, testdata
[i
], 2, &status
);
2719 if (U_FAILURE(status
)) {
2720 log_err("Collation iterator creation failed\n");
2724 UCollationElements
*iter2
= ucol_openElements(coll
,
2728 if (U_FAILURE(status
)) {
2729 log_err("Collation iterator creation failed\n");
2732 ce
= ucol_next(iter2
, &status
);
2733 while (ce
!= UCOL_NULLORDER
) {
2734 if ((uint32_t)ucol_next(iter1
, &status
) != ce
) {
2735 log_err("Collation elements in contraction split does not match\n");
2738 ce
= ucol_next(iter2
, &status
);
2741 ucol_closeElements(iter2
);
2743 if (ucol_next(iter1
, &status
) != UCOL_NULLORDER
) {
2744 log_err("Collation elements not exhausted\n");
2747 ucol_closeElements(iter1
);
2751 rlen
= u_unescape("& a < b < c < ch < d & c = ch / h", rule
, 256);
2752 coll
= ucol_openRules(rule
, rlen
, UCOL_ON
, UCOL_TERTIARY
,NULL
, &status
);
2753 if (ucol_strcoll(coll
, testdata2
[0], 2, testdata2
[1], 2) != UCOL_LESS
) {
2754 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
2755 testdata2
[0][0], testdata2
[0][1], testdata2
[1][0],
2759 if (ucol_strcoll(coll
, testdata2
[1], 2, testdata2
[2], 2) != UCOL_LESS
) {
2760 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
2761 testdata2
[1][0], testdata2
[1][1], testdata2
[2][0],
2767 for (i
= 0; i
< sizeof(testrules3
) / sizeof(testrules3
[0]); i
+= 2) {
2770 UCollationElements
*iter1
,
2772 UChar ch
= 0x0042 /* 'B' */;
2774 rlen
= u_unescape(testrules3
[i
], rule
, 32);
2775 coll1
= ucol_openRules(rule
, rlen
, UCOL_ON
, UCOL_TERTIARY
,NULL
, &status
);
2776 rlen
= u_unescape(testrules3
[i
+ 1], rule
, 32);
2777 coll2
= ucol_openRules(rule
, rlen
, UCOL_ON
, UCOL_TERTIARY
,NULL
, &status
);
2778 if (U_FAILURE(status
)) {
2779 log_err("Collator creation failed %s\n", testrules
[i
]);
2782 iter1
= ucol_openElements(coll1
, &ch
, 1, &status
);
2783 iter2
= ucol_openElements(coll2
, &ch
, 1, &status
);
2784 if (U_FAILURE(status
)) {
2785 log_err("Collation iterator creation failed\n");
2788 ce
= ucol_next(iter1
, &status
);
2789 if (U_FAILURE(status
)) {
2790 log_err("Retrieving ces failed\n");
2793 while (ce
!= UCOL_NULLORDER
) {
2794 if (ce
!= (uint32_t)ucol_next(iter2
, &status
)) {
2795 log_err("CEs does not match\n");
2798 ce
= ucol_next(iter1
, &status
);
2799 if (U_FAILURE(status
)) {
2800 log_err("Retrieving ces failed\n");
2804 if (ucol_next(iter2
, &status
) != UCOL_NULLORDER
) {
2805 log_err("CEs not exhausted\n");
2808 ucol_closeElements(iter1
);
2809 ucol_closeElements(iter2
);
2815 static void TestExpansion(void) {
2816 const static char *testrules
[] = {
2817 "&J << K / B & K << M",
2820 const static UChar testdata
[][3] = {
2821 {0x004A /*'J'*/, 0x0041 /*'A'*/, 0},
2822 {0x004D /*'M'*/, 0x0041 /*'A'*/, 0},
2823 {0x004B /*'K'*/, 0x0041 /*'A'*/, 0},
2824 {0x004B /*'K'*/, 0x0043 /*'C'*/, 0},
2825 {0x004A /*'J'*/, 0x0043 /*'C'*/, 0},
2826 {0x004D /*'M'*/, 0x0043 /*'C'*/, 0}
2829 UErrorCode status
= U_ZERO_ERROR
;
2831 UChar rule
[256] = {0};
2835 for (i
= 0; i
< sizeof(testrules
) / sizeof(testrules
[0]); i
++) {
2837 log_verbose("Rule %s for testing\n", testrules
[i
]);
2838 rlen
= u_unescape(testrules
[i
], rule
, 32);
2839 coll
= ucol_openRules(rule
, rlen
, UCOL_ON
, UCOL_TERTIARY
,NULL
, &status
);
2840 if (U_FAILURE(status
)) {
2841 log_err_status(status
, "Collator creation failed %s -> %s\n", testrules
[i
], u_errorName(status
));
2845 for (j
= 0; j
< 5; j
++) {
2846 doTest(coll
, testdata
[j
], testdata
[j
+ 1], UCOL_LESS
);
2853 /* this test tests the current limitations of the engine */
2854 /* it always fail, so it is disabled by default */
2855 static void TestLimitations(void) {
2856 /* recursive expansions */
2858 static const char *rule
= "&a=b/c&d=c/e";
2859 static const char *tlimit01
[] = {"add","b","adf"};
2860 static const char *tlimit02
[] = {"aa","b","af"};
2861 log_verbose("recursive expansions\n");
2862 genericRulesStarter(rule
, tlimit01
, sizeof(tlimit01
)/sizeof(tlimit01
[0]));
2863 genericRulesStarter(rule
, tlimit02
, sizeof(tlimit02
)/sizeof(tlimit02
[0]));
2865 /* contractions spanning expansions */
2867 static const char *rule
= "&a<<<c/e&g<<<eh";
2868 static const char *tlimit01
[] = {"ad","c","af","f","ch","h"};
2869 static const char *tlimit02
[] = {"ad","c","ch","af","f","h"};
2870 log_verbose("contractions spanning expansions\n");
2871 genericRulesStarter(rule
, tlimit01
, sizeof(tlimit01
)/sizeof(tlimit01
[0]));
2872 genericRulesStarter(rule
, tlimit02
, sizeof(tlimit02
)/sizeof(tlimit02
[0]));
2874 /* normalization: nulls in contractions */
2876 static const char *rule
= "&a<<<\\u0000\\u0302";
2877 static const char *tlimit01
[] = {"a","\\u0000\\u0302\\u0327"};
2878 static const char *tlimit02
[] = {"\\u0000\\u0302\\u0327","a"};
2879 static const UColAttribute att
[] = { UCOL_DECOMPOSITION_MODE
};
2880 static const UColAttributeValue valOn
[] = { UCOL_ON
};
2881 static const UColAttributeValue valOff
[] = { UCOL_OFF
};
2883 log_verbose("NULL in contractions\n");
2884 genericRulesStarterWithOptions(rule
, tlimit01
, 2, att
, valOn
, 1);
2885 genericRulesStarterWithOptions(rule
, tlimit02
, 2, att
, valOn
, 1);
2886 genericRulesStarterWithOptions(rule
, tlimit01
, 2, att
, valOff
, 1);
2887 genericRulesStarterWithOptions(rule
, tlimit02
, 2, att
, valOff
, 1);
2890 /* normalization: contractions spanning normalization */
2892 static const char *rule
= "&a<<<\\u0000\\u0302";
2893 static const char *tlimit01
[] = {"a","\\u0000\\u0302\\u0327"};
2894 static const char *tlimit02
[] = {"\\u0000\\u0302\\u0327","a"};
2895 static const UColAttribute att
[] = { UCOL_DECOMPOSITION_MODE
};
2896 static const UColAttributeValue valOn
[] = { UCOL_ON
};
2897 static const UColAttributeValue valOff
[] = { UCOL_OFF
};
2899 log_verbose("contractions spanning normalization\n");
2900 genericRulesStarterWithOptions(rule
, tlimit01
, 2, att
, valOn
, 1);
2901 genericRulesStarterWithOptions(rule
, tlimit02
, 2, att
, valOn
, 1);
2902 genericRulesStarterWithOptions(rule
, tlimit01
, 2, att
, valOff
, 1);
2903 genericRulesStarterWithOptions(rule
, tlimit02
, 2, att
, valOff
, 1);
2908 /*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/
2909 static const char *rule
= "&\\u2010<x<[variable top]=z";
2910 /*static const char *rule3 = "&' '<x<[variable top]=z";*/
2911 static const char *tlimit01
[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" };
2912 static const char *tlimit02
[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"};
2913 static const char *tlimit03
[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" };
2914 static const UColAttribute att
[] = { UCOL_ALTERNATE_HANDLING
, UCOL_STRENGTH
};
2915 static const UColAttributeValue valOn
[] = { UCOL_SHIFTED
, UCOL_QUATERNARY
};
2916 static const UColAttributeValue valOff
[] = { UCOL_NON_IGNORABLE
, UCOL_TERTIARY
};
2918 log_verbose("variable top\n");
2919 genericRulesStarterWithOptions(rule
, tlimit03
, sizeof(tlimit03
)/sizeof(tlimit03
[0]), att
, valOn
, sizeof(att
)/sizeof(att
[0]));
2920 genericRulesStarterWithOptions(rule
, tlimit01
, sizeof(tlimit01
)/sizeof(tlimit01
[0]), att
, valOn
, sizeof(att
)/sizeof(att
[0]));
2921 genericRulesStarterWithOptions(rule
, tlimit02
, sizeof(tlimit02
)/sizeof(tlimit02
[0]), att
, valOn
, sizeof(att
)/sizeof(att
[0]));
2922 genericRulesStarterWithOptions(rule
, tlimit01
, sizeof(tlimit01
)/sizeof(tlimit01
[0]), att
, valOff
, sizeof(att
)/sizeof(att
[0]));
2923 genericRulesStarterWithOptions(rule
, tlimit02
, sizeof(tlimit02
)/sizeof(tlimit02
[0]), att
, valOff
, sizeof(att
)/sizeof(att
[0]));
2928 static const char *rule
= "&c<ch<<<cH<<<Ch<<<CH";
2929 static const char *tlimit01
[] = {"c","CH","Ch","cH","ch"};
2930 static const char *tlimit02
[] = {"c","CH","cH","Ch","ch"};
2931 static const UColAttribute att
[] = { UCOL_CASE_FIRST
};
2932 static const UColAttributeValue valOn
[] = { UCOL_UPPER_FIRST
};
2933 /*static const UColAttributeValue valOff[] = { UCOL_OFF};*/
2934 log_verbose("case level\n");
2935 genericRulesStarterWithOptions(rule
, tlimit01
, sizeof(tlimit01
)/sizeof(tlimit01
[0]), att
, valOn
, sizeof(att
)/sizeof(att
[0]));
2936 genericRulesStarterWithOptions(rule
, tlimit02
, sizeof(tlimit02
)/sizeof(tlimit02
[0]), att
, valOn
, sizeof(att
)/sizeof(att
[0]));
2937 /*genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
2938 /*genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
2944 static void TestBocsuCoverage(void) {
2945 UErrorCode status
= U_ZERO_ERROR
;
2946 const char *testString
= "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u0041";
2947 UChar test
[256] = {0};
2948 uint32_t tlen
= u_unescape(testString
, test
, 32);
2949 uint8_t key
[256] = {0};
2952 UCollator
*coll
= ucol_open("", &status
);
2953 if(U_SUCCESS(status
)) {
2954 ucol_setAttribute(coll
, UCOL_STRENGTH
, UCOL_IDENTICAL
, &status
);
2956 klen
= ucol_getSortKey(coll
, test
, tlen
, key
, 256);
2960 log_data_err("Couldn't open UCA\n");
2964 static void TestVariableTopSetting(void) {
2965 UErrorCode status
= U_ZERO_ERROR
;
2966 const UChar
*current
= NULL
;
2967 uint32_t varTopOriginal
= 0, varTop1
, varTop2
;
2968 UCollator
*coll
= ucol_open("", &status
);
2969 if(U_SUCCESS(status
)) {
2971 uint32_t strength
= 0;
2973 uint32_t chOffset
= 0;
2975 uint32_t exOffset
= 0;
2977 uint32_t oldChOffset
= 0;
2978 uint32_t oldChLen
= 0;
2979 uint32_t oldExOffset
= 0;
2980 uint32_t oldExLen
= 0;
2981 uint32_t prefixOffset
= 0;
2982 uint32_t prefixLen
= 0;
2984 UBool startOfRules
= TRUE
;
2985 UColTokenParser src
;
2988 UChar
*rulesCopy
= NULL
;
2991 UCollationResult result
;
2993 UChar first
[256] = { 0 };
2994 UChar second
[256] = { 0 };
2995 UParseError parseError
;
2996 int32_t myQ
= getTestOption(QUICK_OPTION
);
2998 uprv_memset(&src
, 0, sizeof(UColTokenParser
));
3002 if(getTestOption(QUICK_OPTION
) <= 0) {
3003 setTestOption(QUICK_OPTION
, 1);
3006 /* this test will fail when normalization is turned on */
3007 /* therefore we always turn off exhaustive mode for it */
3009 log_verbose("Slide variable top over UCARules\n");
3010 rulesLen
= ucol_getRulesEx(coll
, UCOL_FULL_RULES
, rulesCopy
, 0);
3011 rulesCopy
= (UChar
*)uprv_malloc((rulesLen
+UCOL_TOK_EXTRA_RULE_SPACE_SIZE
)*sizeof(UChar
));
3012 rulesLen
= ucol_getRulesEx(coll
, UCOL_FULL_RULES
, rulesCopy
, rulesLen
+UCOL_TOK_EXTRA_RULE_SPACE_SIZE
);
3014 if(U_SUCCESS(status
) && rulesLen
> 0) {
3015 ucol_setAttribute(coll
, UCOL_ALTERNATE_HANDLING
, UCOL_SHIFTED
, &status
);
3016 src
.current
= src
.source
= rulesCopy
;
3017 src
.end
= rulesCopy
+rulesLen
;
3018 src
.extraCurrent
= src
.end
;
3019 src
.extraEnd
= src
.end
+UCOL_TOK_EXTRA_RULE_SPACE_SIZE
;
3021 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
3022 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
3023 while ((current
= ucol_tok_parseNextToken(&src
, startOfRules
, &parseError
,&status
)) != NULL
) {
3024 strength
= src
.parsedToken
.strength
;
3025 chOffset
= src
.parsedToken
.charsOffset
;
3026 chLen
= src
.parsedToken
.charsLen
;
3027 exOffset
= src
.parsedToken
.extensionOffset
;
3028 exLen
= src
.parsedToken
.extensionLen
;
3029 prefixOffset
= src
.parsedToken
.prefixOffset
;
3030 prefixLen
= src
.parsedToken
.prefixLen
;
3031 specs
= src
.parsedToken
.flags
;
3033 startOfRules
= FALSE
;
3035 log_verbose("%04X %d ", *(src
.source
+chOffset
), chLen
);
3037 if(strength
== UCOL_PRIMARY
) {
3038 status
= U_ZERO_ERROR
;
3039 varTopOriginal
= ucol_getVariableTop(coll
, &status
);
3040 varTop1
= ucol_setVariableTop(coll
, src
.source
+oldChOffset
, oldChLen
, &status
);
3041 if(U_FAILURE(status
)) {
3045 uint32_t CE
= UCOL_NO_MORE_CES
;
3047 /* before we start screaming, let's see if there is a problem with the rules */
3048 UErrorCode collIterateStatus
= U_ZERO_ERROR
;
3049 collIterate
*s
= uprv_new_collIterate(&collIterateStatus
);
3050 uprv_init_collIterate(coll
, src
.source
+oldChOffset
, oldChLen
, s
, &collIterateStatus
);
3052 CE
= ucol_getNextCE(coll
, s
, &status
);
3054 for(i
= 0; i
< oldChLen
; i
++) {
3055 j
= sprintf(buf
, "%04X ", *(src
.source
+oldChOffset
+i
));
3058 if(status
== U_PRIMARY_TOO_LONG_ERROR
) {
3059 log_verbose("= Expected failure for %s =", buffer
);
3061 if(uprv_collIterateAtEnd(s
)) {
3062 log_err("Unexpected failure setting variable top at offset %d. Error %s. Codepoints: %s\n",
3063 oldChOffset
, u_errorName(status
), buffer
);
3065 log_verbose("There is a goofy contraction in UCA rules that does not appear in the fractional UCA. Codepoints: %s\n",
3069 uprv_delete_collIterate(s
);
3071 varTop2
= ucol_getVariableTop(coll
, &status
);
3072 if((varTop1
& 0xFFFF0000) != (varTop2
& 0xFFFF0000)) {
3073 log_err("cannot retrieve set varTop value!\n");
3077 if((varTop1
& 0xFFFF0000) > 0 && oldExLen
== 0) {
3079 u_strncpy(first
, src
.source
+oldChOffset
, oldChLen
);
3080 u_strncpy(first
+oldChLen
, src
.source
+chOffset
, chLen
);
3081 u_strncpy(first
+oldChLen
+chLen
, src
.source
+oldChOffset
, oldChLen
);
3082 first
[2*oldChLen
+chLen
] = 0;
3085 u_strncpy(second
, src
.source
+chOffset
, chLen
);
3087 } else { /* This is skipped momentarily, but should work once UCARules are fully UCA conformant */
3088 u_strncpy(second
, src
.source
+oldExOffset
, oldExLen
);
3089 u_strncpy(second
+oldChLen
, src
.source
+chOffset
, chLen
);
3090 u_strncpy(second
+oldChLen
+chLen
, src
.source
+oldExOffset
, oldExLen
);
3091 second
[2*oldExLen
+chLen
] = 0;
3093 result
= ucol_strcoll(coll
, first
, -1, second
, -1);
3094 if(result
== UCOL_EQUAL
) {
3095 doTest(coll
, first
, second
, UCOL_EQUAL
);
3097 log_verbose("Suspicious strcoll result for %04X and %04X\n", *(src
.source
+oldChOffset
), *(src
.source
+chOffset
));
3101 if(strength
!= UCOL_TOK_RESET
) {
3102 oldChOffset
= chOffset
;
3104 oldExOffset
= exOffset
;
3108 status
= U_ZERO_ERROR
;
3111 log_err("Unexpected failure getting rules %s\n", u_errorName(status
));
3114 if (U_FAILURE(status
)) {
3115 log_err("Error parsing rules %s\n", u_errorName(status
));
3118 status
= U_ZERO_ERROR
;
3121 setTestOption(QUICK_OPTION
, myQ
);
3123 log_verbose("Testing setting variable top to contractions\n");
3125 UChar
*conts
= (UChar
*)((uint8_t *)coll
->image
+ coll
->image
->contractionUCACombos
);
3126 int32_t maxUCAContractionLength
= coll
->image
->contractionUCACombosWidth
;
3127 while(*conts
!= 0) {
3129 * A continuation is NUL-terminated and NUL-padded
3130 * except if it has the maximum length.
3132 int32_t contractionLength
= maxUCAContractionLength
;
3133 while(contractionLength
> 0 && conts
[contractionLength
- 1] == 0) {
3134 --contractionLength
;
3136 if(*(conts
+1)==0) { /* pre-context */
3137 varTop1
= ucol_setVariableTop(coll
, conts
, 1, &status
);
3139 varTop1
= ucol_setVariableTop(coll
, conts
, contractionLength
, &status
);
3141 if(U_FAILURE(status
)) {
3142 if(status
== U_PRIMARY_TOO_LONG_ERROR
) {
3143 /* ucol_setVariableTop() is documented to not accept 3-byte primaries,
3144 * therefore it is not an error when it complains about them. */
3145 log_verbose("Couldn't set variable top to a contraction %04X %04X %04X - U_PRIMARY_TOO_LONG_ERROR\n",
3146 *conts
, *(conts
+1), *(conts
+2));
3148 log_err("Couldn't set variable top to a contraction %04X %04X %04X - %s\n",
3149 *conts
, *(conts
+1), *(conts
+2), u_errorName(status
));
3151 status
= U_ZERO_ERROR
;
3153 conts
+=maxUCAContractionLength
;
3156 status
= U_ZERO_ERROR
;
3162 ucol_setVariableTop(coll
, first
, -1, &status
);
3164 if(U_SUCCESS(status
)) {
3165 log_err("Invalid contraction succeded in setting variable top!\n");
3170 log_verbose("Test restoring variable top\n");
3172 status
= U_ZERO_ERROR
;
3173 ucol_restoreVariableTop(coll
, varTopOriginal
, &status
);
3174 if(varTopOriginal
!= ucol_getVariableTop(coll
, &status
)) {
3175 log_err("Couldn't restore old variable top\n");
3178 log_verbose("Testing calling with error set\n");
3180 status
= U_INTERNAL_PROGRAM_ERROR
;
3181 varTop1
= ucol_setVariableTop(coll
, first
, 1, &status
);
3182 varTop2
= ucol_getVariableTop(coll
, &status
);
3183 ucol_restoreVariableTop(coll
, varTop2
, &status
);
3184 varTop1
= ucol_setVariableTop(NULL
, first
, 1, &status
);
3185 varTop2
= ucol_getVariableTop(NULL
, &status
);
3186 ucol_restoreVariableTop(NULL
, varTop2
, &status
);
3187 if(status
!= U_INTERNAL_PROGRAM_ERROR
) {
3188 log_err("Bad reaction to passed error!\n");
3190 uprv_free(src
.source
);
3193 log_data_err("Couldn't open UCA collator\n");
3198 static void TestNonChars(void) {
3199 static const char *test
[] = {
3200 "\\u0000", /* ignorable */
3201 "\\uFFFE", /* special merge-sort character with minimum non-ignorable weights */
3202 "\\uFDD0", "\\uFDEF",
3203 "\\U0001FFFE", "\\U0001FFFF", /* UCA 6.0: noncharacters are treated like unassigned, */
3204 "\\U0002FFFE", "\\U0002FFFF", /* not like ignorable. */
3205 "\\U0003FFFE", "\\U0003FFFF",
3206 "\\U0004FFFE", "\\U0004FFFF",
3207 "\\U0005FFFE", "\\U0005FFFF",
3208 "\\U0006FFFE", "\\U0006FFFF",
3209 "\\U0007FFFE", "\\U0007FFFF",
3210 "\\U0008FFFE", "\\U0008FFFF",
3211 "\\U0009FFFE", "\\U0009FFFF",
3212 "\\U000AFFFE", "\\U000AFFFF",
3213 "\\U000BFFFE", "\\U000BFFFF",
3214 "\\U000CFFFE", "\\U000CFFFF",
3215 "\\U000DFFFE", "\\U000DFFFF",
3216 "\\U000EFFFE", "\\U000EFFFF",
3217 "\\U000FFFFE", "\\U000FFFFF",
3218 "\\U0010FFFE", "\\U0010FFFF",
3219 "\\uFFFF" /* special character with maximum primary weight */
3221 UErrorCode status
= U_ZERO_ERROR
;
3222 UCollator
*coll
= ucol_open("en_US", &status
);
3224 log_verbose("Test non characters\n");
3226 if(U_SUCCESS(status
)) {
3227 genericOrderingTestWithResult(coll
, test
, 35, UCOL_LESS
);
3229 log_err_status(status
, "Unable to open collator\n");
3235 static void TestExtremeCompression(void) {
3236 static char *test
[4];
3237 int32_t j
= 0, i
= 0;
3239 for(i
= 0; i
<4; i
++) {
3240 test
[i
] = (char *)malloc(2048*sizeof(char));
3243 for(j
= 20; j
< 500; j
++) {
3244 for(i
= 0; i
<4; i
++) {
3245 uprv_memset(test
[i
], 'a', (j
-1)*sizeof(char));
3246 test
[i
][j
-1] = (char)('a'+i
);
3249 genericLocaleStarter("en_US", (const char **)test
, 4);
3253 for(i
= 0; i
<4; i
++) {
3259 static void TestExtremeCompression(void) {
3260 static char *test
[4];
3261 int32_t j
= 0, i
= 0;
3262 UErrorCode status
= U_ZERO_ERROR
;
3263 UCollator
*coll
= ucol_open("en_US", status
);
3264 for(i
= 0; i
<4; i
++) {
3265 test
[i
] = (char *)malloc(2048*sizeof(char));
3267 for(j
= 10; j
< 2048; j
++) {
3268 for(i
= 0; i
<4; i
++) {
3269 uprv_memset(test
[i
], 'a', (j
-2)*sizeof(char));
3270 test
[i
][j
-1] = (char)('a'+i
);
3274 genericLocaleStarter("en_US", (const char **)test
, 4);
3276 for(j
= 10; j
< 2048; j
++) {
3277 for(i
= 0; i
<1; i
++) {
3278 uprv_memset(test
[i
], 'a', (j
-1)*sizeof(char));
3282 for(i
= 0; i
<4; i
++) {
3288 static void TestSurrogates(void) {
3289 static const char *test
[] = {
3290 "z","\\ud900\\udc25", "\\ud805\\udc50",
3291 "\\ud800\\udc00y", "\\ud800\\udc00r",
3292 "\\ud800\\udc00f", "\\ud800\\udc00",
3293 "\\ud800\\udc00c", "\\ud800\\udc00b",
3294 "\\ud800\\udc00fa", "\\ud800\\udc00fb",
3299 static const char *rule
=
3300 "&z < \\ud900\\udc25 < \\ud805\\udc50"
3301 "< \\ud800\\udc00y < \\ud800\\udc00r"
3302 "< \\ud800\\udc00f << \\ud800\\udc00"
3303 "< \\ud800\\udc00fa << \\ud800\\udc00fb"
3304 "< \\ud800\\udc00a < c < b" ;
3306 genericRulesStarter(rule
, test
, 14);
3309 /* This is a test for prefix implementation, used by JIS X 4061 collation rules */
3310 static void TestPrefix(void) {
3313 static const struct {
3315 const char *data
[50];
3325 "&z<<<\\ud900\\udc25|a",
3326 {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 },
3330 for(i
= 0; i
<(sizeof(tests
)/sizeof(tests
[0])); i
++) {
3331 genericRulesStarter(tests
[i
].rules
, tests
[i
].data
, tests
[i
].len
);
3335 /* This test uses data suplied by Masashiko Maedera to test the implementation */
3336 /* JIS X 4061 collation order implementation */
3337 static void TestNewJapanese(void) {
3339 static const char * const test1
[] = {
3340 "\\u30b7\\u30e3\\u30fc\\u30ec",
3341 "\\u30b7\\u30e3\\u30a4",
3342 "\\u30b7\\u30e4\\u30a3",
3343 "\\u30b7\\u30e3\\u30ec",
3344 "\\u3061\\u3087\\u3053",
3345 "\\u3061\\u3088\\u3053",
3346 "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8",
3347 "\\u3066\\u30fc\\u305f",
3348 "\\u30c6\\u30fc\\u30bf",
3349 "\\u30c6\\u30a7\\u30bf",
3350 "\\u3066\\u3048\\u305f",
3351 "\\u3067\\u30fc\\u305f",
3352 "\\u30c7\\u30fc\\u30bf",
3353 "\\u30c7\\u30a7\\u30bf",
3354 "\\u3067\\u3048\\u305f",
3355 "\\u3066\\u30fc\\u305f\\u30fc",
3356 "\\u30c6\\u30fc\\u30bf\\u30a1",
3357 "\\u30c6\\u30a7\\u30bf\\u30fc",
3358 "\\u3066\\u3047\\u305f\\u3041",
3359 "\\u3066\\u3048\\u305f\\u30fc",
3360 "\\u3067\\u30fc\\u305f\\u30fc",
3361 "\\u30c7\\u30fc\\u30bf\\u30a1",
3362 "\\u3067\\u30a7\\u305f\\u30a1",
3363 "\\u30c7\\u3047\\u30bf\\u3041",
3364 "\\u30c7\\u30a8\\u30bf\\u30a2",
3366 "\\u3073\\u3085\\u3042",
3367 "\\u3074\\u3085\\u3042",
3368 "\\u3073\\u3085\\u3042\\u30fc",
3369 "\\u30d3\\u30e5\\u30a2\\u30fc",
3370 "\\u3074\\u3085\\u3042\\u30fc",
3371 "\\u30d4\\u30e5\\u30a2\\u30fc",
3372 "\\u30d2\\u30e5\\u30a6",
3373 "\\u30d2\\u30e6\\u30a6",
3374 "\\u30d4\\u30e5\\u30a6\\u30a2",
3375 "\\u3073\\u3085\\u30fc\\u3042\\u30fc",
3376 "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc",
3377 "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc",
3378 "\\u3072\\u3085\\u3093",
3379 "\\u3074\\u3085\\u3093",
3380 "\\u3075\\u30fc\\u308a",
3381 "\\u30d5\\u30fc\\u30ea",
3382 "\\u3075\\u3045\\u308a",
3383 "\\u3075\\u30a5\\u308a",
3384 "\\u3075\\u30a5\\u30ea",
3385 "\\u30d5\\u30a6\\u30ea",
3386 "\\u3076\\u30fc\\u308a",
3387 "\\u30d6\\u30fc\\u30ea",
3388 "\\u3076\\u3045\\u308a",
3389 "\\u30d6\\u30a5\\u308a",
3390 "\\u3077\\u3046\\u308a",
3391 "\\u30d7\\u30a6\\u30ea",
3392 "\\u3075\\u30fc\\u308a\\u30fc",
3393 "\\u30d5\\u30a5\\u30ea\\u30fc",
3394 "\\u3075\\u30a5\\u308a\\u30a3",
3395 "\\u30d5\\u3045\\u308a\\u3043",
3396 "\\u30d5\\u30a6\\u30ea\\u30fc",
3397 "\\u3075\\u3046\\u308a\\u3043",
3398 "\\u30d6\\u30a6\\u30ea\\u30a4",
3399 "\\u3077\\u30fc\\u308a\\u30fc",
3400 "\\u3077\\u30a5\\u308a\\u30a4",
3401 "\\u3077\\u3046\\u308a\\u30fc",
3402 "\\u30d7\\u30a6\\u30ea\\u30a4",
3418 static const char *test2
[] = {
3419 "\\u306f\\u309d", /* H\\u309d */
3420 "\\u30cf\\u30fd", /* K\\u30fd */
3421 "\\u306f\\u306f", /* HH */
3422 "\\u306f\\u30cf", /* HK */
3423 "\\u30cf\\u30cf", /* KK */
3424 "\\u306f\\u309e", /* H\\u309e */
3425 "\\u30cf\\u30fe", /* K\\u30fe */
3426 "\\u306f\\u3070", /* HH\\u309b */
3427 "\\u30cf\\u30d0", /* KK\\u309b */
3428 "\\u306f\\u3071", /* HH\\u309c */
3429 "\\u30cf\\u3071", /* KH\\u309c */
3430 "\\u30cf\\u30d1", /* KK\\u309c */
3431 "\\u3070\\u309d", /* H\\u309b\\u309d */
3432 "\\u30d0\\u30fd", /* K\\u309b\\u30fd */
3433 "\\u3070\\u306f", /* H\\u309bH */
3434 "\\u30d0\\u30cf", /* K\\u309bK */
3435 "\\u3070\\u309e", /* H\\u309b\\u309e */
3436 "\\u30d0\\u30fe", /* K\\u309b\\u30fe */
3437 "\\u3070\\u3070", /* H\\u309bH\\u309b */
3438 "\\u30d0\\u3070", /* K\\u309bH\\u309b */
3439 "\\u30d0\\u30d0", /* K\\u309bK\\u309b */
3440 "\\u3070\\u3071", /* H\\u309bH\\u309c */
3441 "\\u30d0\\u30d1", /* K\\u309bK\\u309c */
3442 "\\u3071\\u309d", /* H\\u309c\\u309d */
3443 "\\u30d1\\u30fd", /* K\\u309c\\u30fd */
3444 "\\u3071\\u306f", /* H\\u309cH */
3445 "\\u30d1\\u30cf", /* K\\u309cK */
3446 "\\u3071\\u3070", /* H\\u309cH\\u309b */
3447 "\\u3071\\u30d0", /* H\\u309cK\\u309b */
3448 "\\u30d1\\u30d0", /* K\\u309cK\\u309b */
3449 "\\u3071\\u3071", /* H\\u309cH\\u309c */
3450 "\\u30d1\\u30d1", /* K\\u309cK\\u309c */
3453 static const char *test3[] = {
3481 "\\u30b7\\u30e3\\u30fc\\u30ec",
3484 static const UColAttribute att
[] = { UCOL_STRENGTH
};
3485 static const UColAttributeValue val
[] = { UCOL_QUATERNARY
};
3487 static const UColAttribute attShifted
[] = { UCOL_STRENGTH
, UCOL_ALTERNATE_HANDLING
};
3488 static const UColAttributeValue valShifted
[] = { UCOL_QUATERNARY
, UCOL_SHIFTED
};
3490 genericLocaleStarterWithOptions("ja", test1
, sizeof(test1
)/sizeof(test1
[0]), att
, val
, 1);
3491 genericLocaleStarterWithOptions("ja", test2
, sizeof(test2
)/sizeof(test2
[0]), att
, val
, 1);
3492 /*genericLocaleStarter("ja", test3, sizeof(test3)/sizeof(test3[0]));*/
3493 genericLocaleStarterWithOptions("ja", test1
, sizeof(test1
)/sizeof(test1
[0]), attShifted
, valShifted
, 2);
3494 genericLocaleStarterWithOptions("ja", test2
, sizeof(test2
)/sizeof(test2
[0]), attShifted
, valShifted
, 2);
3497 static void TestStrCollIdenticalPrefix(void) {
3498 const char* rule
= "&\\ud9b0\\udc70=\\ud9b0\\udc71";
3499 const char* test
[] = {
3503 genericRulesStarterWithResult(rule
, test
, sizeof(test
)/sizeof(test
[0]), UCOL_EQUAL
);
3505 /* Contractions should have all their canonically equivalent */
3506 /* strings included */
3507 static void TestContractionClosure(void) {
3508 static const struct {
3510 const char *data
[10];
3513 { "&b=\\u00e4\\u00e4",
3514 { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\\u00e4" }, 5},
3516 { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4},
3521 for(i
= 0; i
<(sizeof(tests
)/sizeof(tests
[0])); i
++) {
3522 genericRulesStarterWithResult(tests
[i
].rules
, tests
[i
].data
, tests
[i
].len
, UCOL_EQUAL
);
3526 /* This tests also fails*/
3527 static void TestBeforePrefixFailure(void) {
3528 static const struct {
3530 const char *data
[10];
3534 "&[before 3]\\uff41 <<< x",
3535 {"x", "\\uff41"}, 2 },
3536 { "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3537 "&\\u30A8=\\u30A8=\\u3048=\\uff74"
3538 "&[before 3]\\u30a7<<<\\u30a9",
3539 {"\\u30a9", "\\u30a7"}, 2 },
3540 { "&[before 3]\\u30a7<<<\\u30a9"
3541 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3542 "&\\u30A8=\\u30A8=\\u3048=\\uff74",
3543 {"\\u30a9", "\\u30a7"}, 2 },
3548 for(i
= 0; i
<(sizeof(tests
)/sizeof(tests
[0])); i
++) {
3549 genericRulesStarter(tests
[i
].rules
, tests
[i
].data
, tests
[i
].len
);
3554 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3555 "&\\u30A8=\\u30A8=\\u3048=\\uff74"
3556 "&[before 3]\\u30a7<<<\\u30c6|\\u30fc";
3558 "&[before 3]\\u30a7<<<\\u30c6|\\u30fc"
3559 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3560 "&\\u30A8=\\u30A8=\\u3048=\\uff74";
3561 const char* test
[] = {
3562 "\\u30c6\\u30fc\\u30bf",
3563 "\\u30c6\\u30a7\\u30bf",
3565 genericRulesStarter(rule1
, test
, sizeof(test
)/sizeof(test
[0]));
3566 genericRulesStarter(rule2
, test
, sizeof(test
)/sizeof(test
[0]));
3567 /* this piece of code should be in some sort of verbose mode */
3568 /* it gets the collation elements for elements and prints them */
3569 /* This is useful when trying to see whether the problem is */
3571 UErrorCode status
= U_ZERO_ERROR
;
3573 UCollationElements
*it
= NULL
;
3576 uint32_t uStringLen
;
3577 UCollator
*coll
= NULL
;
3579 uStringLen
= u_unescape(rule1
, string
, 256);
3581 coll
= ucol_openRules(string
, uStringLen
, UCOL_DEFAULT
, UCOL_DEFAULT
, NULL
, &status
);
3583 /*coll = ucol_open("ja_JP_JIS", &status);*/
3584 it
= ucol_openElements(coll
, string
, 0, &status
);
3586 for(i
= 0; i
< sizeof(test
)/sizeof(test
[0]); i
++) {
3587 log_verbose("%s\n", test
[i
]);
3588 uStringLen
= u_unescape(test
[i
], string
, 256);
3589 ucol_setText(it
, string
, uStringLen
, &status
);
3591 while((CE
=ucol_next(it
, &status
)) != UCOL_NULLORDER
) {
3592 log_verbose("%08X\n", CE
);
3598 ucol_closeElements(it
);
3604 static void TestPrefixCompose(void) {
3606 "&\\u30a7<<<\\u30ab|\\u30fc=\\u30ac|\\u30fc";
3608 const char* test[] = {
3609 "\\u30c6\\u30fc\\u30bf",
3610 "\\u30c6\\u30a7\\u30bf",
3614 UErrorCode status
= U_ZERO_ERROR
;
3616 /*UCollationElements *it = NULL;*/
3619 uint32_t uStringLen
;
3620 UCollator
*coll
= NULL
;
3622 uStringLen
= u_unescape(rule1
, string
, 256);
3624 coll
= ucol_openRules(string
, uStringLen
, UCOL_DEFAULT
, UCOL_DEFAULT
, NULL
, &status
);
3632 [last variable] last variable value
3633 [last primary ignorable] largest CE for primary ignorable
3634 [last secondary ignorable] largest CE for secondary ignorable
3635 [last tertiary ignorable] largest CE for tertiary ignorable
3636 [top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8)
3639 static void TestRuleOptions(void) {
3640 /* values here are hardcoded and are correct for the current UCA
3641 * when the UCA changes, one might be forced to change these
3646 * These strings contain the last character before [variable top]
3647 * and the first and second characters (by primary weights) after it.
3648 * See FractionalUCA.txt. For example:
3649 [last variable [0C FE, 05, 05]] # U+10A7F OLD SOUTH ARABIAN NUMERIC INDICATOR
3650 [variable top = 0C FE]
3651 [first regular [0D 0A, 05, 05]] # U+0060 GRAVE ACCENT
3653 00B4; [0D 0C, 05, 05]
3655 * Note: Starting with UCA 6.0, the [variable top] collation element
3656 * is not the weight of any character or string,
3657 * which means that LAST_VARIABLE_CHAR_STRING sorts before [last variable].
3659 #define LAST_VARIABLE_CHAR_STRING "\\U00010A7F"
3660 #define FIRST_REGULAR_CHAR_STRING "\\u0060"
3661 #define SECOND_REGULAR_CHAR_STRING "\\u00B4"
3664 * This string has to match the character that has the [last regular] weight
3665 * which changes with each UCA version.
3666 * See the bottom of FractionalUCA.txt which says something like
3667 [last regular [7A FE, 05, 05]] # U+1342E EGYPTIAN HIEROGLYPH AA032
3669 * Note: Starting with UCA 6.0, the [last regular] collation element
3670 * is not the weight of any character or string,
3671 * which means that LAST_REGULAR_CHAR_STRING sorts before [last regular].
3673 #define LAST_REGULAR_CHAR_STRING "\\U0001342E"
3675 static const struct {
3677 const char *data
[10];
3680 /* - all befores here amount to zero */
3681 { "&[before 3][first tertiary ignorable]<<<a",
3682 { "\\u0000", "a"}, 2
3683 }, /* you cannot go before first tertiary ignorable */
3685 { "&[before 3][last tertiary ignorable]<<<a",
3686 { "\\u0000", "a"}, 2
3687 }, /* you cannot go before last tertiary ignorable */
3689 { "&[before 3][first secondary ignorable]<<<a",
3690 { "\\u0000", "a"}, 2
3691 }, /* you cannot go before first secondary ignorable */
3693 { "&[before 3][last secondary ignorable]<<<a",
3694 { "\\u0000", "a"}, 2
3695 }, /* you cannot go before first secondary ignorable */
3697 /* 'normal' befores */
3699 { "&[before 3][first primary ignorable]<<<c<<<b &[first primary ignorable]<a",
3700 { "c", "b", "\\u0332", "a" }, 4
3703 /* we don't have a code point that corresponds to
3704 * the last primary ignorable
3706 { "&[before 3][last primary ignorable]<<<c<<<b &[last primary ignorable]<a",
3707 { "\\u0332", "\\u20e3", "c", "b", "a" }, 5
3710 { "&[before 3][first variable]<<<c<<<b &[first variable]<a",
3711 { "c", "b", "\\u0009", "a", "\\u000a" }, 5
3714 { "&[last variable]<a &[before 3][last variable]<<<c<<<b ",
3715 { LAST_VARIABLE_CHAR_STRING
, "c", "b", /* [last variable] */ "a", FIRST_REGULAR_CHAR_STRING
}, 5
3718 { "&[first regular]<a"
3719 "&[before 1][first regular]<b",
3720 { "b", FIRST_REGULAR_CHAR_STRING
, "a", SECOND_REGULAR_CHAR_STRING
}, 4
3723 { "&[before 1][last regular]<b"
3724 "&[last regular]<a",
3725 { LAST_REGULAR_CHAR_STRING
, "b", /* [last regular] */ "a", "\\u4e00" }, 4
3728 { "&[before 1][first implicit]<b"
3729 "&[first implicit]<a",
3730 { "b", "\\u4e00", "a", "\\u4e01"}, 4
3733 { "&[before 1][last implicit]<b"
3734 "&[last implicit]<a",
3735 { "b", "\\U0010FFFD", "a" }, 3
3738 { "&[last variable]<z"
3739 "&[last primary ignorable]<x"
3740 "&[last secondary ignorable]<<y"
3741 "&[last tertiary ignorable]<<<w"
3743 {"\\ufffb", "w", "y", "\\u20e3", "x", LAST_VARIABLE_CHAR_STRING
, "z", "u"}, 7
3749 for(i
= 0; i
<(sizeof(tests
)/sizeof(tests
[0])); i
++) {
3750 genericRulesStarter(tests
[i
].rules
, tests
[i
].data
, tests
[i
].len
);
3755 static void TestOptimize(void) {
3756 /* this is not really a test - just trying out
3757 * whether copying of UCA contents will fail
3758 * Cannot really test, since the functionality
3761 static const struct {
3763 const char *data
[10];
3766 /* - all befores here amount to zero */
3767 { "[optimize [\\uAC00-\\uD7FF]]",
3772 for(i
= 0; i
<(sizeof(tests
)/sizeof(tests
[0])); i
++) {
3773 genericRulesStarter(tests
[i
].rules
, tests
[i
].data
, tests
[i
].len
);
3778 cycheng@ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator.
3779 weiv ucol_strcollIter?
3780 cycheng@ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021
3781 weiv these are the input strings?
3782 cycheng@ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on, we have s1 > s2
3783 weiv will check - could be a problem with utf-8 iterator
3784 cycheng@ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2
3786 cycheng@ca.ibm.c... note that we have a standalone high surrogate
3787 weiv that doesn't sound right
3788 cycheng@ca.ibm.c... we got the same inconsistent results on AIX and Win2000
3789 weiv so you have two strings, you convert them to utf-8 and to utf-16BE
3790 cycheng@ca.ibm.c... yes
3791 weiv and then do the comparison
3792 cycheng@ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be
3793 weiv utf-16 strings look like a little endian ones in the example you sent me
3794 weiv It could be a bug - let me try to test it out
3795 cycheng@ca.ibm.c... ok
3796 cycheng@ca.ibm.c... we can wait till the conf. call
3797 cycheng@ca.ibm.c... next weke
3798 weiv that would be great
3800 weiv I might be wrong
3801 weiv let me play with it some more
3802 cycheng@ca.ibm.c... ok
3803 cycheng@ca.ibm.c... also please check s3 = 0x0e3a0062 and s4 = 0x0e400021. both are in utf-16be
3804 cycheng@ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that's built for db2
3805 cycheng@ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be
3807 cycheng@ca.ibm.c... i ask sherman to send you more inconsistent data
3809 cycheng@ca.ibm.c... the 4 strings we sent are just samples
3812 static void Alexis(void) {
3813 UErrorCode status
= U_ZERO_ERROR
;
3814 UCollator
*coll
= ucol_open("", &status
);
3817 const char utf16be
[2][4] = {
3818 { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 },
3819 { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 }
3822 const char utf8
[2][4] = {
3823 { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 },
3824 { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 },
3827 UCharIterator iterU161
, iterU162
;
3828 UCharIterator iterU81
, iterU82
;
3830 UCollationResult resU16
, resU8
;
3832 uiter_setUTF16BE(&iterU161
, utf16be
[0], 4);
3833 uiter_setUTF16BE(&iterU162
, utf16be
[1], 4);
3835 uiter_setUTF8(&iterU81
, utf8
[0], 4);
3836 uiter_setUTF8(&iterU82
, utf8
[1], 4);
3838 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
3840 resU16
= ucol_strcollIter(coll
, &iterU161
, &iterU162
, &status
);
3841 resU8
= ucol_strcollIter(coll
, &iterU81
, &iterU82
, &status
);
3844 if(resU16
!= resU8
) {
3845 log_err("different results\n");
3852 #define CMSCOLL_ALEXIS2_BUFFER_SIZE 256
3853 static void Alexis2(void) {
3854 UErrorCode status
= U_ZERO_ERROR
;
3855 UChar U16Source
[CMSCOLL_ALEXIS2_BUFFER_SIZE
], U16Target
[CMSCOLL_ALEXIS2_BUFFER_SIZE
];
3856 char U16BESource
[CMSCOLL_ALEXIS2_BUFFER_SIZE
], U16BETarget
[CMSCOLL_ALEXIS2_BUFFER_SIZE
];
3857 char U8Source
[CMSCOLL_ALEXIS2_BUFFER_SIZE
], U8Target
[CMSCOLL_ALEXIS2_BUFFER_SIZE
];
3858 int32_t U16LenS
= 0, U16LenT
= 0, U16BELenS
= 0, U16BELenT
= 0, U8LenS
= 0, U8LenT
= 0;
3860 UConverter
*conv
= NULL
;
3862 UCharIterator U16BEItS
, U16BEItT
;
3863 UCharIterator U8ItS
, U8ItT
;
3865 UCollationResult resU16
, resU16BE
, resU8
;
3867 static const char* const pairs
[][2] = {
3868 { "\\ud800\\u0021", "\\uFFFC\\u0062"},
3869 { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" },
3870 { "\\u0E40\\u0021", "\\u00A1\\u0021"},
3871 { "\\u0E40\\u0021", "\\uFE57\\u0062"},
3872 { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"},
3873 { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"},
3874 { "\\u0020", "\\u0020\\u0000"}
3876 5F20 (my result here)
3878 5F20 (your result here)
3884 UCollator
*coll
= ucol_open("", &status
);
3885 if(status
== U_FILE_ACCESS_ERROR
) {
3886 log_data_err("Is your data around?\n");
3888 } else if(U_FAILURE(status
)) {
3889 log_err("Error opening collator\n");
3892 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
3893 conv
= ucnv_open("UTF16BE", &status
);
3894 for(i
= 0; i
< sizeof(pairs
)/sizeof(pairs
[0]); i
++) {
3895 U16LenS
= u_unescape(pairs
[i
][0], U16Source
, CMSCOLL_ALEXIS2_BUFFER_SIZE
);
3896 U16LenT
= u_unescape(pairs
[i
][1], U16Target
, CMSCOLL_ALEXIS2_BUFFER_SIZE
);
3898 resU16
= ucol_strcoll(coll
, U16Source
, U16LenS
, U16Target
, U16LenT
);
3900 log_verbose("Result of strcoll is %i\n", resU16
);
3902 U16BELenS
= ucnv_fromUChars(conv
, U16BESource
, CMSCOLL_ALEXIS2_BUFFER_SIZE
, U16Source
, U16LenS
, &status
);
3903 U16BELenT
= ucnv_fromUChars(conv
, U16BETarget
, CMSCOLL_ALEXIS2_BUFFER_SIZE
, U16Target
, U16LenT
, &status
);
3905 /* use the original sizes, as the result from converter is in bytes */
3906 uiter_setUTF16BE(&U16BEItS
, U16BESource
, U16LenS
);
3907 uiter_setUTF16BE(&U16BEItT
, U16BETarget
, U16LenT
);
3909 resU16BE
= ucol_strcollIter(coll
, &U16BEItS
, &U16BEItT
, &status
);
3911 log_verbose("Result of U16BE is %i\n", resU16BE
);
3913 if(resU16
!= resU16BE
) {
3914 log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", pairs
[i
][0], pairs
[i
][1]);
3917 u_strToUTF8(U8Source
, CMSCOLL_ALEXIS2_BUFFER_SIZE
, &U8LenS
, U16Source
, U16LenS
, &status
);
3918 u_strToUTF8(U8Target
, CMSCOLL_ALEXIS2_BUFFER_SIZE
, &U8LenT
, U16Target
, U16LenT
, &status
);
3920 uiter_setUTF8(&U8ItS
, U8Source
, U8LenS
);
3921 uiter_setUTF8(&U8ItT
, U8Target
, U8LenT
);
3923 resU8
= ucol_strcollIter(coll
, &U8ItS
, &U8ItT
, &status
);
3925 if(resU16
!= resU8
) {
3926 log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pairs
[i
][0], pairs
[i
][1]);
3935 static void TestHebrewUCA(void) {
3936 UErrorCode status
= U_ZERO_ERROR
;
3937 static const char *first
[] = {
3938 "d790d6b8d79cd795d6bcd7a9",
3939 "d790d79cd79ed7a7d799d799d7a1",
3940 "d790d6b4d79ed795d6bcd7a9",
3943 char utf8String
[3][256];
3944 UChar utf16String
[3][256];
3946 int32_t i
= 0, j
= 0;
3947 int32_t sizeUTF8
[3];
3948 int32_t sizeUTF16
[3];
3950 UCollator
*coll
= ucol_open("", &status
);
3951 if (U_FAILURE(status
)) {
3952 log_err_status(status
, "Could not open UCA collation %s\n", u_errorName(status
));
3955 /*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/
3957 for(i
= 0; i
< sizeof(first
)/sizeof(first
[0]); i
++) {
3958 sizeUTF8
[i
] = u_parseUTF8(first
[i
], -1, utf8String
[i
], 256, &status
);
3959 u_strFromUTF8(utf16String
[i
], 256, &sizeUTF16
[i
], utf8String
[i
], sizeUTF8
[i
], &status
);
3960 log_verbose("%i: ");
3961 for(j
= 0; j
< sizeUTF16
[i
]; j
++) {
3962 /*log_verbose("\\u%04X", utf16String[i][j]);*/
3963 log_verbose("%04X", utf16String
[i
][j
]);
3967 for(i
= 0; i
< sizeof(first
)/sizeof(first
[0])-1; i
++) {
3968 for(j
= i
+ 1; j
< sizeof(first
)/sizeof(first
[0]); j
++) {
3969 doTest(coll
, utf16String
[i
], utf16String
[j
], UCOL_LESS
);
3977 static void TestPartialSortKeyTermination(void) {
3978 static const char* cases
[] = {
3979 "\\u1234\\u1234\\udc00",
3980 "\\udc00\\ud800\\ud800"
3983 int32_t i
= sizeof(UCollator
);
3985 UErrorCode status
= U_ZERO_ERROR
;
3987 UCollator
*coll
= ucol_open("", &status
);
3991 UChar currCase
[256];
3993 int32_t pKeyLen
= 0;
3997 for(i
= 0; i
< sizeof(cases
)/sizeof(cases
[0]); i
++) {
3998 uint32_t state
[2] = {0, 0};
3999 length
= u_unescape(cases
[i
], currCase
, 256);
4000 uiter_setString(&iter
, currCase
, length
);
4001 pKeyLen
= ucol_nextSortKeyPart(coll
, &iter
, state
, key
, 256, &status
);
4003 log_verbose("Done\n");
4009 static void TestSettings(void) {
4010 static const char* cases
[] = {
4015 static const char* locales
[] = {
4020 UErrorCode status
= U_ZERO_ERROR
;
4022 int32_t i
= 0, j
= 0;
4024 UChar source
[256], target
[256];
4025 int32_t sLen
= 0, tLen
= 0;
4027 UCollator
*collateObject
= NULL
;
4028 for(i
= 0; i
< sizeof(locales
)/sizeof(locales
[0]); i
++) {
4029 collateObject
= ucol_open(locales
[i
], &status
);
4030 ucol_setStrength(collateObject
, UCOL_PRIMARY
);
4031 ucol_setAttribute(collateObject
, UCOL_CASE_LEVEL
, UCOL_OFF
, &status
);
4032 for(j
= 1; j
< sizeof(cases
)/sizeof(cases
[0]); j
++) {
4033 sLen
= u_unescape(cases
[j
-1], source
, 256);
4035 tLen
= u_unescape(cases
[j
], target
, 256);
4037 doTest(collateObject
, source
, target
, UCOL_EQUAL
);
4039 ucol_close(collateObject
);
4043 static int32_t TestEqualsForCollator(const char* locName
, UCollator
*source
, UCollator
*target
) {
4044 UErrorCode status
= U_ZERO_ERROR
;
4045 int32_t errorNo
= 0;
4046 /*const UChar *sourceRules = NULL;*/
4047 /*int32_t sourceRulesLen = 0;*/
4048 UColAttributeValue french
= UCOL_OFF
;
4049 int32_t cloneSize
= 0;
4051 if(!ucol_equals(source
, target
)) {
4052 log_err("Same collators, different address not equal\n");
4056 if(uprv_strcmp(ucol_getLocaleByType(source
, ULOC_REQUESTED_LOCALE
, &status
), ucol_getLocaleByType(source
, ULOC_ACTUAL_LOCALE
, &status
)) == 0) {
4057 /* currently, safeClone is implemented through getRules/openRules
4058 * so it is the same as the test below - I will comment that test out.
4061 target
= ucol_safeClone(source
, NULL
, &cloneSize
, &status
);
4062 if(U_FAILURE(status
)) {
4063 log_err("Error creating clone\n");
4067 if(!ucol_equals(source
, target
)) {
4068 log_err("Collator different from it's clone\n");
4071 french
= ucol_getAttribute(source
, UCOL_FRENCH_COLLATION
, &status
);
4072 if(french
== UCOL_ON
) {
4073 ucol_setAttribute(target
, UCOL_FRENCH_COLLATION
, UCOL_OFF
, &status
);
4075 ucol_setAttribute(target
, UCOL_FRENCH_COLLATION
, UCOL_ON
, &status
);
4077 if(U_FAILURE(status
)) {
4078 log_err("Error setting attributes\n");
4082 if(ucol_equals(source
, target
)) {
4083 log_err("Collators same even when options changed\n");
4087 /* commented out since safeClone uses exactly the same technique */
4089 sourceRules = ucol_getRules(source, &sourceRulesLen);
4090 target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
4091 if(U_FAILURE(status)) {
4092 log_err("Error instantiating target from rules\n");
4096 if(!ucol_equals(source, target)) {
4097 log_err("Collator different from collator that was created from the same rules\n");
4107 static void TestEquals(void) {
4108 /* ucol_equals is not currently a public API. There is a chance that it will become
4109 * something like this, but currently it is only used by RuleBasedCollator::operator==
4111 /* test whether the two collators instantiated from the same locale are equal */
4112 UErrorCode status
= U_ZERO_ERROR
;
4113 UParseError parseError
;
4114 int32_t noOfLoc
= uloc_countAvailable();
4115 const char *locName
= NULL
;
4116 UCollator
*source
= NULL
, *target
= NULL
;
4119 const char* rules
[] = {
4120 "&l < lj <<< Lj <<< LJ",
4121 "&n < nj <<< Nj <<< NJ",
4126 const char* badRules[] = {
4128 "&n < nj <<< nJ <<< NJ",
4130 "&AE <<< \\u00c4 <<< x"
4134 UChar sourceRules
[1024], targetRules
[1024];
4135 int32_t sourceRulesSize
= 0, targetRulesSize
= 0;
4136 int32_t rulesSize
= sizeof(rules
)/sizeof(rules
[0]);
4138 for(i
= 0; i
< rulesSize
; i
++) {
4139 sourceRulesSize
+= u_unescape(rules
[i
], sourceRules
+sourceRulesSize
, 1024 - sourceRulesSize
);
4140 targetRulesSize
+= u_unescape(rules
[rulesSize
-i
-1], targetRules
+targetRulesSize
, 1024 - targetRulesSize
);
4143 source
= ucol_openRules(sourceRules
, sourceRulesSize
, UCOL_DEFAULT
, UCOL_DEFAULT
, &parseError
, &status
);
4144 if(status
== U_FILE_ACCESS_ERROR
) {
4145 log_data_err("Is your data around?\n");
4147 } else if(U_FAILURE(status
)) {
4148 log_err("Error opening collator\n");
4151 target
= ucol_openRules(targetRules
, targetRulesSize
, UCOL_DEFAULT
, UCOL_DEFAULT
, &parseError
, &status
);
4152 if(!ucol_equals(source
, target
)) {
4153 log_err("Equivalent collators not equal!\n");
4158 source
= ucol_open("root", &status
);
4159 target
= ucol_open("root", &status
);
4160 log_verbose("Testing root\n");
4161 if(!ucol_equals(source
, source
)) {
4162 log_err("Same collator not equal\n");
4164 if(TestEqualsForCollator(locName
, source
, target
)) {
4165 log_err("Errors for root\n", locName
);
4169 for(i
= 0; i
<noOfLoc
; i
++) {
4170 status
= U_ZERO_ERROR
;
4171 locName
= uloc_getAvailable(i
);
4172 /*if(hasCollationElements(locName)) {*/
4173 log_verbose("Testing equality for locale %s\n", locName
);
4174 source
= ucol_open(locName
, &status
);
4175 target
= ucol_open(locName
, &status
);
4176 if (U_FAILURE(status
)) {
4177 log_err("Error opening collator for locale %s %s\n", locName
, u_errorName(status
));
4180 if(TestEqualsForCollator(locName
, source
, target
)) {
4181 log_err("Errors for locale %s\n", locName
);
4188 static void TestJ2726(void) {
4189 UChar a
[2] = { 0x61, 0x00 }; /*"a"*/
4190 UChar aSpace
[3] = { 0x61, 0x20, 0x00 }; /*"a "*/
4191 UChar spaceA
[3] = { 0x20, 0x61, 0x00 }; /*" a"*/
4192 UErrorCode status
= U_ZERO_ERROR
;
4193 UCollator
*coll
= ucol_open("en", &status
);
4194 ucol_setAttribute(coll
, UCOL_ALTERNATE_HANDLING
, UCOL_SHIFTED
, &status
);
4195 ucol_setAttribute(coll
, UCOL_STRENGTH
, UCOL_PRIMARY
, &status
);
4196 doTest(coll
, a
, aSpace
, UCOL_EQUAL
);
4197 doTest(coll
, aSpace
, a
, UCOL_EQUAL
);
4198 doTest(coll
, a
, spaceA
, UCOL_EQUAL
);
4199 doTest(coll
, spaceA
, a
, UCOL_EQUAL
);
4200 doTest(coll
, spaceA
, aSpace
, UCOL_EQUAL
);
4201 doTest(coll
, aSpace
, spaceA
, UCOL_EQUAL
);
4205 static void NullRule(void) {
4207 UErrorCode status
= U_ZERO_ERROR
;
4208 UCollator
*coll
= ucol_openRules(r
, 1, UCOL_DEFAULT
, UCOL_DEFAULT
, NULL
, &status
);
4209 if(U_SUCCESS(status
)) {
4210 log_err("This should have been an error!\n");
4213 status
= U_ZERO_ERROR
;
4215 coll
= ucol_openRules(r
, 0, UCOL_DEFAULT
, UCOL_DEFAULT
, NULL
, &status
);
4216 if(U_FAILURE(status
)) {
4217 log_err_status(status
, "Empty rules should have produced a valid collator -> %s\n", u_errorName(status
));
4224 * Test for CollationElementIterator previous and next for the whole set of
4225 * unicode characters with normalization on.
4227 static void TestNumericCollation(void)
4229 UErrorCode status
= U_ZERO_ERROR
;
4231 const static char *basicTestStrings
[]={
4244 const static char *preZeroTestStrings
[]={
4252 "avery000000010000",
4255 const static char *thirtyTwoBitNumericStrings
[]={
4262 const static char *longNumericStrings
[]={
4263 /* Some of these sort out of the order that would expected if digits-as-numbers handled arbitrarily-long digit strings.
4264 In fact, a single collation element can represent a maximum of 254 digits as a number. Digit strings longer than that
4265 are treated as multiple collation elements. */
4266 "num9234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123z", /*253digits, num + 9.23E252 + z */
4267 "num10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*254digits, num + 1.00E253 */
4268 "num100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*255digits, num + 1.00E253 + 0, out of numeric order but expected */
4269 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 1.23E253 */
4270 "num123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345", /*255digits, num + 1.23E253 + 5 */
4271 "num1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456", /*256digits, num + 1.23E253 + 56 */
4272 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567", /*257digits, num + 1.23E253 + 567 */
4273 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 1.23E253 + a, out of numeric order but expected */
4274 "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 9.23E253, out of numeric order but expected */
4275 "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 9.23E253 + a, out of numeric order but expected */
4278 const static char *supplementaryDigits
[] = {
4279 "\\uD835\\uDFCE", /* 0 */
4280 "\\uD835\\uDFCF", /* 1 */
4281 "\\uD835\\uDFD0", /* 2 */
4282 "\\uD835\\uDFD1", /* 3 */
4283 "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */
4284 "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */
4285 "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */
4286 "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */
4287 "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */
4288 "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */
4291 const static char *foreignDigits
[] = {
4306 const static char *evenZeroes
[] = {
4313 UColAttribute att
= UCOL_NUMERIC_COLLATION
;
4314 UColAttributeValue val
= UCOL_ON
;
4316 /* Open our collator. */
4317 UCollator
* coll
= ucol_open("root", &status
);
4318 if (U_FAILURE(status
)){
4319 log_err_status(status
, "ERROR: in using ucol_open() -> %s\n",
4320 myErrorName(status
));
4323 genericLocaleStarterWithOptions("root", basicTestStrings
, sizeof(basicTestStrings
)/sizeof(basicTestStrings
[0]), &att
, &val
, 1);
4324 genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings
, sizeof(thirtyTwoBitNumericStrings
)/sizeof(thirtyTwoBitNumericStrings
[0]), &att
, &val
, 1);
4325 genericLocaleStarterWithOptions("root", longNumericStrings
, sizeof(longNumericStrings
)/sizeof(longNumericStrings
[0]), &att
, &val
, 1);
4326 genericLocaleStarterWithOptions("en_US", foreignDigits
, sizeof(foreignDigits
)/sizeof(foreignDigits
[0]), &att
, &val
, 1);
4327 genericLocaleStarterWithOptions("root", supplementaryDigits
, sizeof(supplementaryDigits
)/sizeof(supplementaryDigits
[0]), &att
, &val
, 1);
4328 genericLocaleStarterWithOptions("root", evenZeroes
, sizeof(evenZeroes
)/sizeof(evenZeroes
[0]), &att
, &val
, 1);
4330 /* Setting up our collator to do digits. */
4331 ucol_setAttribute(coll
, UCOL_NUMERIC_COLLATION
, UCOL_ON
, &status
);
4332 if (U_FAILURE(status
)){
4333 log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n",
4334 myErrorName(status
));
4339 Testing that prepended zeroes still yield the correct collation behavior.
4340 We expect that every element in our strings array will be equal.
4342 genericOrderingTestWithResult(coll
, preZeroTestStrings
, sizeof(preZeroTestStrings
)/sizeof(preZeroTestStrings
[0]), UCOL_EQUAL
);
4347 static void TestTibetanConformance(void)
4349 const char* test
[] = {
4350 "\\u0FB2\\u0591\\u0F71\\u0061",
4351 "\\u0FB2\\u0F71\\u0061"
4354 UErrorCode status
= U_ZERO_ERROR
;
4355 UCollator
*coll
= ucol_open("", &status
);
4359 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
4360 if (U_SUCCESS(status
)) {
4361 u_unescape(test
[0], source
, 100);
4362 u_unescape(test
[1], target
, 100);
4363 doTest(coll
, source
, target
, UCOL_EQUAL
);
4364 result
= ucol_strcoll(coll
, source
, -1, target
, -1);
4365 log_verbose("result %d\n", result
);
4366 if (UCOL_EQUAL
!= result
) {
4367 log_err("Tibetan comparison error\n");
4372 genericLocaleStarterWithResult("", test
, 2, UCOL_EQUAL
);
4375 static void TestPinyinProblem(void) {
4376 static const char *test
[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B50" };
4377 genericLocaleStarter("zh__PINYIN", test
, sizeof(test
)/sizeof(test
[0]));
4380 #define TST_UCOL_MAX_INPUT 0x220001
4381 #define topByte 0xFF000000;
4382 #define bottomByte 0xFF;
4383 #define fourBytes 0xFFFFFFFF;
4386 static void showImplicit(UChar32 i
) {
4387 if (i
>= 0 && i
<= TST_UCOL_MAX_INPUT
) {
4388 log_verbose("%08X\t%08X\n", i
, uprv_uca_getImplicitFromRaw(i
));
4392 static void TestImplicitGeneration(void) {
4393 UErrorCode status
= U_ZERO_ERROR
;
4396 UChar32 i
= 0, j
= 0;
4397 UChar32 roundtrip
= 0;
4398 UChar32 lastBottom
= 0;
4399 UChar32 currentBottom
= 0;
4400 UChar32 lastTop
= 0;
4401 UChar32 currentTop
= 0;
4403 UCollator
*coll
= ucol_open("root", &status
);
4404 if(U_FAILURE(status
)) {
4405 log_err_status(status
, "Couldn't open UCA -> %s\n", u_errorName(status
));
4409 uprv_uca_getRawFromImplicit(0xE20303E7);
4411 for (i
= 0; i
<= TST_UCOL_MAX_INPUT
; ++i
) {
4412 current
= uprv_uca_getImplicitFromRaw(i
) & fourBytes
;
4414 /* check that it round-trips AND that all intervening ones are illegal*/
4415 roundtrip
= uprv_uca_getRawFromImplicit(current
);
4416 if (roundtrip
!= i
) {
4417 log_err("No roundtrip %08X\n", i
);
4420 for (j
= last
+ 1; j
< current
; ++j
) {
4421 roundtrip
= uprv_uca_getRawFromImplicit(j
);
4422 /* raise an error if it *doesn't* find an error*/
4423 if (roundtrip
!= -1) {
4424 log_err("Fails to recognize illegal %08X\n", j
);
4428 /* now do other consistency checks*/
4429 lastBottom
= last
& bottomByte
;
4430 currentBottom
= current
& bottomByte
;
4431 lastTop
= last
& topByte
;
4432 currentTop
= current
& topByte
;
4434 /* print out some values for spot-checking*/
4435 if (lastTop
!= currentTop
|| i
== 0x10000 || i
== 0x110000) {
4445 if(uprv_uca_getCodePointFromRaw(uprv_uca_getRawFromCodePoint(i
)) != i
) {
4446 log_err("No raw <-> code point roundtrip for 0x%08X\n", i
);
4449 showImplicit(TST_UCOL_MAX_INPUT
-2);
4450 showImplicit(TST_UCOL_MAX_INPUT
-1);
4451 showImplicit(TST_UCOL_MAX_INPUT
);
4456 * Iterate through the given iterator, checking to see that all the strings
4457 * in the expected array are present.
4458 * @param expected array of strings we expect to see, or NULL
4459 * @param expectedCount number of elements of expected, or 0
4461 static int32_t checkUEnumeration(const char* msg
,
4463 const char** expected
,
4464 int32_t expectedCount
) {
4465 UErrorCode ec
= U_ZERO_ERROR
;
4466 int32_t i
= 0, n
, j
, bit
;
4467 int32_t seenMask
= 0;
4469 U_ASSERT(expectedCount
>= 0 && expectedCount
< 31); /* [sic] 31 not 32 */
4470 n
= uenum_count(iter
, &ec
);
4471 if (!assertSuccess("count", &ec
)) return -1;
4472 log_verbose("%s = [", msg
);
4474 const char* s
= uenum_next(iter
, NULL
, &ec
);
4475 if (!assertSuccess("snext", &ec
) || s
== NULL
) break;
4476 if (i
!= 0) log_verbose(",");
4477 log_verbose("%s", s
);
4478 /* check expected list */
4479 for (j
=0, bit
=1; j
<expectedCount
; ++j
, bit
<<=1) {
4480 if ((seenMask
&bit
) == 0 &&
4481 uprv_strcmp(s
, expected
[j
]) == 0) {
4487 log_verbose("] (%d)\n", i
);
4488 assertTrue("count verified", i
==n
);
4489 /* did we see all expected strings? */
4490 for (j
=0, bit
=1; j
<expectedCount
; ++j
, bit
<<=1) {
4491 if ((seenMask
&bit
)!=0) {
4492 log_verbose("Ok: \"%s\" seen\n", expected
[j
]);
4494 log_err("FAIL: \"%s\" not seen\n", expected
[j
]);
4501 * Test new API added for separate collation tree.
4503 static void TestSeparateTrees(void) {
4504 UErrorCode ec
= U_ZERO_ERROR
;
4505 UEnumeration
*e
= NULL
;
4510 static const char* AVAIL
[] = { "en", "de" };
4512 static const char* KW
[] = { "collation" };
4514 static const char* KWVAL
[] = { "phonebook", "stroke" };
4516 #if !UCONFIG_NO_SERVICE
4517 e
= ucol_openAvailableLocales(&ec
);
4519 assertSuccess("ucol_openAvailableLocales", &ec
);
4520 assertTrue("ucol_openAvailableLocales!=0", e
!=0);
4521 n
= checkUEnumeration("ucol_openAvailableLocales", e
, AVAIL
, LEN(AVAIL
));
4522 /* Don't need to check n because we check list */
4525 log_data_err("Error calling ucol_openAvailableLocales() -> %s (Are you missing data?)\n", u_errorName(ec
));
4529 e
= ucol_getKeywords(&ec
);
4531 assertSuccess("ucol_getKeywords", &ec
);
4532 assertTrue("ucol_getKeywords!=0", e
!=0);
4533 n
= checkUEnumeration("ucol_getKeywords", e
, KW
, LEN(KW
));
4534 /* Don't need to check n because we check list */
4537 log_data_err("Error calling ucol_getKeywords() -> %s (Are you missing data?)\n", u_errorName(ec
));
4540 e
= ucol_getKeywordValues(KW
[0], &ec
);
4542 assertSuccess("ucol_getKeywordValues", &ec
);
4543 assertTrue("ucol_getKeywordValues!=0", e
!=0);
4544 n
= checkUEnumeration("ucol_getKeywordValues", e
, KWVAL
, LEN(KWVAL
));
4545 /* Don't need to check n because we check list */
4548 log_data_err("Error calling ucol_getKeywordValues() -> %s (Are you missing data?)\n", u_errorName(ec
));
4551 /* Try setting a warning before calling ucol_getKeywordValues */
4552 ec
= U_USING_FALLBACK_WARNING
;
4553 e
= ucol_getKeywordValues(KW
[0], &ec
);
4554 if (assertSuccess("ucol_getKeywordValues [with warning code set]", &ec
)) {
4555 assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e
!=0);
4556 n
= checkUEnumeration("ucol_getKeywordValues [with warning code set]", e
, KWVAL
, LEN(KWVAL
));
4557 /* Don't need to check n because we check list */
4562 U_DRAFT int32_t U_EXPORT2
4563 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
4564 const char* locale, UBool* isAvailable,
4565 UErrorCode* status);
4568 n
= ucol_getFunctionalEquivalent(loc
, sizeof(loc
), "collation", "de",
4570 if (assertSuccess("getFunctionalEquivalent", &ec
)) {
4571 assertEquals("getFunctionalEquivalent(de)", "de", loc
);
4572 assertTrue("getFunctionalEquivalent(de).isAvailable==TRUE",
4573 isAvailable
== TRUE
);
4576 n
= ucol_getFunctionalEquivalent(loc
, sizeof(loc
), "collation", "de_DE",
4578 if (assertSuccess("getFunctionalEquivalent", &ec
)) {
4579 assertEquals("getFunctionalEquivalent(de_DE)", "de", loc
);
4580 assertTrue("getFunctionalEquivalent(de_DE).isAvailable==TRUE",
4581 isAvailable
== TRUE
);
4585 /* supercedes TestJ784 */
4586 static void TestBeforePinyin(void) {
4587 const static char rules
[] = {
4588 "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<\\u00E0<<<\\u00C0"
4589 "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<\\u00E8<<<\\u00C8"
4590 "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<\\u00EC<<<\\u00CC"
4591 "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<\\u00F2<<<\\u00D2"
4592 "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<<\\u00F9<<<\\u00D9"
4593 "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<<\\u01DB<<\\u00FC"
4596 const static char *test
[] = {
4607 const static char *test2
[] = {
4640 genericRulesStarter(rules
, test
, sizeof(test
)/sizeof(test
[0]));
4641 genericLocaleStarter("zh", test
, sizeof(test
)/sizeof(test
[0]));
4642 genericRulesStarter(rules
, test2
, sizeof(test2
)/sizeof(test2
[0]));
4643 genericLocaleStarter("zh", test2
, sizeof(test2
)/sizeof(test2
[0]));
4646 static void TestBeforeTightening(void) {
4647 static const struct {
4649 UErrorCode expectedStatus
;
4651 { "&[before 1]a<x", U_ZERO_ERROR
},
4652 { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR
},
4653 { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR
},
4654 { "&[before 1]a=x", U_INVALID_FORMAT_ERROR
},
4655 { "&[before 2]a<x",U_INVALID_FORMAT_ERROR
},
4656 { "&[before 2]a<<x",U_ZERO_ERROR
},
4657 { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR
},
4658 { "&[before 2]a=x",U_INVALID_FORMAT_ERROR
},
4659 { "&[before 3]a<x",U_INVALID_FORMAT_ERROR
},
4660 { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR
},
4661 { "&[before 3]a<<<x",U_ZERO_ERROR
},
4662 { "&[before 3]a=x",U_INVALID_FORMAT_ERROR
},
4663 { "&[before I]a = x",U_INVALID_FORMAT_ERROR
}
4668 UErrorCode status
= U_ZERO_ERROR
;
4669 UChar rlz
[RULE_BUFFER_LEN
] = { 0 };
4672 UCollator
*coll
= NULL
;
4675 for(i
= 0; i
< sizeof(tests
)/sizeof(tests
[0]); i
++) {
4676 rlen
= u_unescape(tests
[i
].rules
, rlz
, RULE_BUFFER_LEN
);
4677 coll
= ucol_openRules(rlz
, rlen
, UCOL_DEFAULT
, UCOL_DEFAULT
,NULL
, &status
);
4678 if(status
!= tests
[i
].expectedStatus
) {
4679 log_err_status(status
, "Opening a collator with rules %s returned error code %s, expected %s\n",
4680 tests
[i
].rules
, u_errorName(status
), u_errorName(tests
[i
].expectedStatus
));
4683 status
= U_ZERO_ERROR
;
4690 &[before 1] a < x <<< X << q <<< Q < z
4691 assert: m <<< M < x <<< X << q <<< Q < z < a < n
4694 &[before 2] a << x <<< X << q <<< Q < z
4695 assert: m <<< M < x <<< X << q <<< Q << a < z < n
4698 &[before 3] a <<< x <<< X << q <<< Q < z
4699 assert: m <<< M < x <<< X <<< a << q <<< Q < z < n
4703 &[before 1] a < x <<< X << q <<< Q < z
4704 assert: x <<< X << q <<< Q < z < m <<< M << a < n
4707 &[before 2] a << x <<< X << q <<< Q < z
4708 assert: m <<< M << x <<< X << q <<< Q << a < z < n
4711 &[before 3] a <<< x <<< X << q <<< Q < z
4712 assert: m <<< M << x <<< X <<< a << q <<< Q < z < n
4716 &[before 1] a < x <<< X << q <<< Q < z
4717 assert: x <<< X << q <<< Q < z < n < m <<< a <<< M
4720 &[before 2] a << x <<< X << q <<< Q < z
4721 assert: x <<< X << q <<< Q << m <<< a <<< M < z < n
4724 &[before 3] a <<< x <<< X << q <<< Q < z
4725 assert: m <<< x <<< X <<< a <<< M << q <<< Q < z < n
4728 &[before 1] s < x <<< X << q <<< Q < z
4729 assert: r <<< R < x <<< X << q <<< Q < z < s < n
4731 &[before 2] s << x <<< X << q <<< Q < z
4732 assert: r <<< R < x <<< X << q <<< Q << s < z < n
4734 &[before 3] s <<< x <<< X << q <<< Q < z
4735 assert: r <<< R < x <<< X <<< s << q <<< Q < z < n
4738 &[before 1] \u24DC < x <<< X << q <<< Q < z
4739 assert: x <<< X << q <<< Q < z < n < m <<< \u24DC <<< M
4741 &[before 2] \u24DC << x <<< X << q <<< Q < z
4742 assert: x <<< X << q <<< Q << m <<< \u24DC <<< M < z < n
4744 &[before 3] \u24DC <<< x <<< X << q <<< Q < z
4745 assert: m <<< x <<< X <<< \u24DC <<< M << q <<< Q < z < n
4750 /* requires features not yet supported */
4751 static void TestMoreBefore(void) {
4752 static const struct {
4754 const char* order
[16];
4757 { "&m < a &[before 1] a < x <<< X << q <<< Q < z",
4758 { "m","M","x","X","q","Q","z","a","n" }, 9},
4759 { "&m < a &[before 2] a << x <<< X << q <<< Q < z",
4760 { "m","M","x","X","q","Q","a","z","n" }, 9},
4761 { "&m < a &[before 3] a <<< x <<< X << q <<< Q < z",
4762 { "m","M","x","X","a","q","Q","z","n" }, 9},
4763 { "&m << a &[before 1] a < x <<< X << q <<< Q < z",
4764 { "x","X","q","Q","z","m","M","a","n" }, 9},
4765 { "&m << a &[before 2] a << x <<< X << q <<< Q < z",
4766 { "m","M","x","X","q","Q","a","z","n" }, 9},
4767 { "&m << a &[before 3] a <<< x <<< X << q <<< Q < z",
4768 { "m","M","x","X","a","q","Q","z","n" }, 9},
4769 { "&m <<< a &[before 1] a < x <<< X << q <<< Q < z",
4770 { "x","X","q","Q","z","n","m","a","M" }, 9},
4771 { "&m <<< a &[before 2] a << x <<< X << q <<< Q < z",
4772 { "x","X","q","Q","m","a","M","z","n" }, 9},
4773 { "&m <<< a &[before 3] a <<< x <<< X << q <<< Q < z",
4774 { "m","x","X","a","M","q","Q","z","n" }, 9},
4775 { "&[before 1] s < x <<< X << q <<< Q < z",
4776 { "r","R","x","X","q","Q","z","s","n" }, 9},
4777 { "&[before 2] s << x <<< X << q <<< Q < z",
4778 { "r","R","x","X","q","Q","s","z","n" }, 9},
4779 { "&[before 3] s <<< x <<< X << q <<< Q < z",
4780 { "r","R","x","X","s","q","Q","z","n" }, 9},
4781 { "&[before 1] \\u24DC < x <<< X << q <<< Q < z",
4782 { "x","X","q","Q","z","n","m","\\u24DC","M" }, 9},
4783 { "&[before 2] \\u24DC << x <<< X << q <<< Q < z",
4784 { "x","X","q","Q","m","\\u24DC","M","z","n" }, 9},
4785 { "&[before 3] \\u24DC <<< x <<< X << q <<< Q < z",
4786 { "m","x","X","\\u24DC","M","q","Q","z","n" }, 9}
4791 for(i
= 0; i
< sizeof(tests
)/sizeof(tests
[0]); i
++) {
4792 genericRulesStarter(tests
[i
].rules
, tests
[i
].order
, tests
[i
].size
);
4797 static void TestTailorNULL( void ) {
4798 const static char* rule
= "&a <<< '\\u0000'";
4799 UErrorCode status
= U_ZERO_ERROR
;
4800 UChar rlz
[RULE_BUFFER_LEN
] = { 0 };
4802 UChar a
= 1, null
= 0;
4803 UCollationResult res
= UCOL_EQUAL
;
4805 UCollator
*coll
= NULL
;
4808 rlen
= u_unescape(rule
, rlz
, RULE_BUFFER_LEN
);
4809 coll
= ucol_openRules(rlz
, rlen
, UCOL_DEFAULT
, UCOL_DEFAULT
,NULL
, &status
);
4811 if(U_FAILURE(status
)) {
4812 log_err_status(status
, "Could not open default collator! -> %s\n", u_errorName(status
));
4814 res
= ucol_strcoll(coll
, &a
, 1, &null
, 1);
4816 if(res
!= UCOL_LESS
) {
4817 log_err("NULL was not tailored properly!\n");
4825 TestUpperFirstQuaternary(void)
4827 const char* tests
[] = { "B", "b", "Bb", "bB" };
4828 UColAttribute att
[] = { UCOL_STRENGTH
, UCOL_CASE_FIRST
};
4829 UColAttributeValue attVals
[] = { UCOL_QUATERNARY
, UCOL_UPPER_FIRST
};
4830 genericLocaleStarterWithOptions("root", tests
, sizeof(tests
)/sizeof(tests
[0]), att
, attVals
, sizeof(att
)/sizeof(att
[0]));
4836 const char* tests
[] = { "\\u00e2T", "aT" };
4837 UColAttribute att
[] = { UCOL_STRENGTH
, UCOL_CASE_LEVEL
};
4838 UColAttributeValue attVals
[] = { UCOL_PRIMARY
, UCOL_ON
};
4839 const char* tests2
[] = { "a", "A" };
4840 const char* rule
= "&[first tertiary ignorable]=A=a";
4841 UColAttribute att2
[] = { UCOL_CASE_LEVEL
};
4842 UColAttributeValue attVals2
[] = { UCOL_ON
};
4843 /* Test whether we correctly ignore primary ignorables on case level when */
4844 /* we have only primary & case level */
4845 genericLocaleStarterWithOptionsAndResult("root", tests
, sizeof(tests
)/sizeof(tests
[0]), att
, attVals
, sizeof(att
)/sizeof(att
[0]), UCOL_EQUAL
);
4846 /* Test whether ICU4J will make case level for sortkeys that have primary strength */
4847 /* and case level */
4848 genericLocaleStarterWithOptions("root", tests2
, sizeof(tests2
)/sizeof(tests2
[0]), att
, attVals
, sizeof(att
)/sizeof(att
[0]));
4849 /* Test whether completely ignorable letters have case level info (they shouldn't) */
4850 genericRulesStarterWithOptionsAndResult(rule
, tests2
, sizeof(tests2
)/sizeof(tests2
[0]), att2
, attVals2
, sizeof(att2
)/sizeof(att2
[0]), UCOL_EQUAL
);
4856 static const char *test
= "this is a test string";
4858 int32_t ustr_length
= u_unescape(test
, ustr
, 256);
4859 unsigned char sortkey
[256];
4860 int32_t sortkey_length
;
4861 UErrorCode status
= U_ZERO_ERROR
;
4862 static UCollator
*coll
= NULL
;
4863 coll
= ucol_open("root", &status
);
4864 if(U_FAILURE(status
)) {
4865 log_err_status(status
, "Couldn't open UCA -> %s\n", u_errorName(status
));
4868 ucol_setStrength(coll
, UCOL_PRIMARY
);
4869 ucol_setAttribute(coll
, UCOL_STRENGTH
, UCOL_PRIMARY
, &status
);
4870 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
4871 if (U_FAILURE(status
)) {
4872 log_err("Failed setting atributes\n");
4875 sortkey_length
= ucol_getSortKey(coll
, ustr
, ustr_length
, NULL
, 0);
4876 if (sortkey_length
> 256) return;
4878 /* we mark the position where the null byte should be written in advance */
4879 sortkey
[sortkey_length
-1] = 0xAA;
4881 /* we set the buffer size one byte higher than needed */
4882 sortkey_length
= ucol_getSortKey(coll
, ustr
, ustr_length
, sortkey
,
4885 /* no error occurs (for me) */
4886 if (sortkey
[sortkey_length
-1] == 0xAA) {
4887 log_err("Hit bug at first try\n");
4890 /* we mark the position where the null byte should be written again */
4891 sortkey
[sortkey_length
-1] = 0xAA;
4893 /* this time we set the buffer size to the exact amount needed */
4894 sortkey_length
= ucol_getSortKey(coll
, ustr
, ustr_length
, sortkey
,
4897 /* now the trailing null byte is not written */
4898 if (sortkey
[sortkey_length
-1] == 0xAA) {
4899 log_err("Hit bug at second try\n");
4905 /* Regression test for Thai partial sort key problem */
4909 const static char *test
[] = {
4910 "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e47\\u0e21",
4911 "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e48\\u0e21"
4914 genericLocaleStarter("th", test
, sizeof(test
)/sizeof(test
[0]));
4920 const static char *test
[] = { "a", "y" };
4921 const char* rules
= "&Ny << Y &[first secondary ignorable] <<< a";
4922 genericRulesStarter(rules
, test
, sizeof(test
)/sizeof(test
[0]));
4928 UErrorCode status
= U_ZERO_ERROR
;
4930 UCollator
*coll
=NULL
;
4931 uint8_t resColl
[100], expColl
[100];
4932 int32_t rLen
, tLen
, ruleLen
, sLen
, kLen
;
4933 UChar rule
[256]={0x26, 0x62, 0x3c, 0x1FF3, 0}; /* &a<0x1FF3-omega with Ypogegrammeni*/
4934 UChar rule2
[256]={0x26, 0x7a, 0x3c, 0x0161, 0}; /* &z<s with caron*/
4935 UChar rule3
[256]={0x26, 0x7a, 0x3c, 0x0061, 0x00ea, 0}; /* &z<a+e with circumflex.*/
4936 static const UChar tData
[][20]={
4938 {0x0041, 0x0323, 0x0302, 0},
4939 {0x1EA0, 0x0302, 0},
4940 {0x00C2, 0x0323, 0},
4941 {0x1ED8, 0}, /* O with dot and circumflex */
4942 {0x1ECC, 0x0302, 0},
4944 {0x1EA1, 0x0306, 0},
4946 static const UChar tailorData
[][20]={
4947 {0x1FA2, 0}, /* Omega with 3 combining marks */
4948 {0x03C9, 0x0313, 0x0300, 0x0345, 0},
4949 {0x1FF3, 0x0313, 0x0300, 0},
4950 {0x1F60, 0x0300, 0x0345, 0},
4951 {0x1F62, 0x0345, 0},
4952 {0x1FA0, 0x0300, 0},
4954 static const UChar tailorData2
[][20]={
4955 {0x1E63, 0x030C, 0}, /* s with dot below + caron */
4956 {0x0073, 0x0323, 0x030C, 0},
4957 {0x0073, 0x030C, 0x0323, 0},
4959 static const UChar tailorData3
[][20]={
4960 {0x007a, 0}, /* z */
4961 {0x0061, 0x0065, 0}, /* a + e */
4962 {0x0061, 0x00ea, 0}, /* a + e with circumflex */
4963 {0x0061, 0x1EC7, 0}, /* a+ e with dot below and circumflex */
4964 {0x0061, 0x1EB9, 0x0302, 0}, /* a + e with dot below + combining circumflex */
4965 {0x0061, 0x00EA, 0x0323, 0}, /* a + e with circumflex + combining dot below */
4966 {0x00EA, 0x0323, 0}, /* e with circumflex + combining dot below */
4967 {0x00EA, 0}, /* e with circumflex */
4970 /* Test Vietnamese sort. */
4971 coll
= ucol_open("vi", &status
);
4972 if(U_FAILURE(status
)) {
4973 log_err_status(status
, "Couldn't open collator -> %s\n", u_errorName(status
));
4976 log_verbose("\n\nVI collation:");
4977 if ( !ucol_equal(coll
, tData
[0], u_strlen(tData
[0]), tData
[2], u_strlen(tData
[2])) ) {
4978 log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
4980 if ( !ucol_equal(coll
, tData
[0], u_strlen(tData
[0]), tData
[3], u_strlen(tData
[3])) ) {
4981 log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
4983 if ( !ucol_equal(coll
, tData
[5], u_strlen(tData
[5]), tData
[4], u_strlen(tData
[4])) ) {
4984 log_err("\\u1ED8 not equals to \\u1ECC+\\u0302\n");
4986 if ( !ucol_equal(coll
, tData
[7], u_strlen(tData
[7]), tData
[6], u_strlen(tData
[6])) ) {
4987 log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
4990 for (j
=0; j
<8; j
++) {
4991 tLen
= u_strlen(tData
[j
]);
4992 log_verbose("\n Data :%s \tlen: %d key: ", tData
[j
], tLen
);
4993 rLen
= ucol_getSortKey(coll
, tData
[j
], tLen
, resColl
, 100);
4994 for(i
= 0; i
<rLen
; i
++) {
4995 log_verbose(" %02X", resColl
[i
]);
5001 /* Test Romanian sort. */
5002 coll
= ucol_open("ro", &status
);
5003 log_verbose("\n\nRO collation:");
5004 if ( !ucol_equal(coll
, tData
[0], u_strlen(tData
[0]), tData
[1], u_strlen(tData
[1])) ) {
5005 log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
5007 if ( !ucol_equal(coll
, tData
[4], u_strlen(tData
[4]), tData
[5], u_strlen(tData
[5])) ) {
5008 log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
5010 if ( !ucol_equal(coll
, tData
[6], u_strlen(tData
[6]), tData
[7], u_strlen(tData
[7])) ) {
5011 log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
5014 for (j
=4; j
<8; j
++) {
5015 tLen
= u_strlen(tData
[j
]);
5016 log_verbose("\n Data :%s \tlen: %d key: ", tData
[j
], tLen
);
5017 rLen
= ucol_getSortKey(coll
, tData
[j
], tLen
, resColl
, 100);
5018 for(i
= 0; i
<rLen
; i
++) {
5019 log_verbose(" %02X", resColl
[i
]);
5024 /* Test the precomposed Greek character with 3 combining marks. */
5025 log_verbose("\n\nTailoring test: Greek character with 3 combining marks");
5026 ruleLen
= u_strlen(rule
);
5027 coll
= ucol_openRules(rule
, ruleLen
, UCOL_OFF
, UCOL_TERTIARY
, NULL
,&status
);
5028 if (U_FAILURE(status
)) {
5029 log_err("ucol_openRules failed with %s\n", u_errorName(status
));
5032 sLen
= u_strlen(tailorData
[0]);
5033 for (j
=1; j
<6; j
++) {
5034 tLen
= u_strlen(tailorData
[j
]);
5035 if ( !ucol_equal(coll
, tailorData
[0], sLen
, tailorData
[j
], tLen
)) {
5036 log_err("\n \\u1FA2 not equals to data[%d]:%s\n", j
, tailorData
[j
]);
5039 /* Test getSortKey. */
5040 tLen
= u_strlen(tailorData
[0]);
5041 kLen
=ucol_getSortKey(coll
, tailorData
[0], tLen
, expColl
, 100);
5042 for (j
=0; j
<6; j
++) {
5043 tLen
= u_strlen(tailorData
[j
]);
5044 rLen
= ucol_getSortKey(coll
, tailorData
[j
], tLen
, resColl
, 100);
5045 if ( kLen
!=rLen
|| uprv_memcmp(expColl
, resColl
, rLen
*sizeof(uint8_t))!=0 ) {
5046 log_err("\n Data[%d] :%s \tlen: %d key: ", j
, tailorData
[j
], tLen
);
5047 for(i
= 0; i
<rLen
; i
++) {
5048 log_err(" %02X", resColl
[i
]);
5054 log_verbose("\n\nTailoring test for s with caron:");
5055 ruleLen
= u_strlen(rule2
);
5056 coll
= ucol_openRules(rule2
, ruleLen
, UCOL_OFF
, UCOL_TERTIARY
, NULL
,&status
);
5057 tLen
= u_strlen(tailorData2
[0]);
5058 kLen
=ucol_getSortKey(coll
, tailorData2
[0], tLen
, expColl
, 100);
5059 for (j
=1; j
<3; j
++) {
5060 tLen
= u_strlen(tailorData2
[j
]);
5061 rLen
= ucol_getSortKey(coll
, tailorData2
[j
], tLen
, resColl
, 100);
5062 if ( kLen
!=rLen
|| uprv_memcmp(expColl
, resColl
, rLen
*sizeof(uint8_t))!=0 ) {
5063 log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j
, tailorData
[j
], tLen
);
5064 for(i
= 0; i
<rLen
; i
++) {
5065 log_err(" %02X", resColl
[i
]);
5071 log_verbose("\n\nTailoring test for &z< ae with circumflex:");
5072 ruleLen
= u_strlen(rule3
);
5073 coll
= ucol_openRules(rule3
, ruleLen
, UCOL_OFF
, UCOL_TERTIARY
, NULL
,&status
);
5074 tLen
= u_strlen(tailorData3
[3]);
5075 kLen
=ucol_getSortKey(coll
, tailorData3
[3], tLen
, expColl
, 100);
5076 for (j
=4; j
<6; j
++) {
5077 tLen
= u_strlen(tailorData3
[j
]);
5078 rLen
= ucol_getSortKey(coll
, tailorData3
[j
], tLen
, resColl
, 100);
5080 if ( kLen
!=rLen
|| uprv_memcmp(expColl
, resColl
, rLen
*sizeof(uint8_t))!=0 ) {
5081 log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j
, tailorData
[j
], tLen
);
5082 for(i
= 0; i
<rLen
; i
++) {
5083 log_err(" %02X", resColl
[i
]);
5087 log_verbose("\n Test Data[%d] :%s \tlen: %d key: ", j
, tailorData
[j
], tLen
);
5088 for(i
= 0; i
<rLen
; i
++) {
5089 log_verbose(" %02X", resColl
[i
]);
5096 TestTailor6179(void)
5098 UErrorCode status
= U_ZERO_ERROR
;
5100 UCollator
*coll
=NULL
;
5101 uint8_t resColl
[100];
5102 int32_t rLen
, tLen
, ruleLen
;
5103 /* &[last primary ignorable]<< a &[first primary ignorable]<<b */
5104 static const UChar rule1
[]={
5105 0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,
5106 0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x20,0x61,0x20,
5107 0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,0x20,
5108 0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x62,0x20, 0};
5109 /* &[last secondary ignorable]<<< a &[first secondary ignorable]<<<b */
5110 static const UChar rule2
[]={
5111 0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,0x64,0x61,
5112 0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x3C,
5113 0x61,0x20,0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,
5114 0x64,0x61,0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,
5115 0x3C,0x3C,0x20,0x62,0};
5117 static const UChar tData1
[][4]={
5122 static const UChar tData2
[][4]={
5129 * These values from FractionalUCA.txt will change,
5130 * and need to be updated here.
5132 static const uint8_t firstPrimaryIgnCE
[]={1, 0x88, 1, 5, 0};
5133 static const uint8_t lastPrimaryIgnCE
[]={1, 0xE3, 1, 5, 0};
5134 static const uint8_t firstSecondaryIgnCE
[]={1, 1, 0xbf, 0x04, 0};
5135 static const uint8_t lastSecondaryIgnCE
[]={1, 1, 0xbf, 0x04, 0};
5137 /* Test [Last Primary ignorable] */
5139 log_verbose("Tailoring test: &[last primary ignorable]<<a &[first primary ignorable]<<b\n");
5140 ruleLen
= u_strlen(rule1
);
5141 coll
= ucol_openRules(rule1
, ruleLen
, UCOL_OFF
, UCOL_TERTIARY
, NULL
,&status
);
5142 if (U_FAILURE(status
)) {
5143 log_err_status(status
, "Tailoring test: &[last primary ignorable] failed! -> %s\n", u_errorName(status
));
5146 tLen
= u_strlen(tData1
[0]);
5147 rLen
= ucol_getSortKey(coll
, tData1
[0], tLen
, resColl
, 100);
5148 if (rLen
!= LEN(lastPrimaryIgnCE
) || uprv_memcmp(resColl
, lastPrimaryIgnCE
, rLen
) != 0) {
5149 log_err("Bad result for &[lpi]<<a...: Data[%d] :%s \tlen: %d key: ", 0, tData1
[0], rLen
);
5150 for(i
= 0; i
<rLen
; i
++) {
5151 log_err(" %02X", resColl
[i
]);
5155 tLen
= u_strlen(tData1
[1]);
5156 rLen
= ucol_getSortKey(coll
, tData1
[1], tLen
, resColl
, 100);
5157 if (rLen
!= LEN(firstPrimaryIgnCE
) || uprv_memcmp(resColl
, firstPrimaryIgnCE
, rLen
) != 0) {
5158 log_err("Bad result for &[lpi]<<a...: Data[%d] :%s \tlen: %d key: ", 1, tData1
[1], rLen
);
5159 for(i
= 0; i
<rLen
; i
++) {
5160 log_err(" %02X", resColl
[i
]);
5167 /* Test [Last Secondary ignorable] */
5168 log_verbose("Tailoring test: &[last secondary ignorable]<<<a &[first secondary ignorable]<<<b\n");
5169 ruleLen
= u_strlen(rule1
);
5170 coll
= ucol_openRules(rule2
, ruleLen
, UCOL_OFF
, UCOL_TERTIARY
, NULL
,&status
);
5171 if (U_FAILURE(status
)) {
5172 log_err("Tailoring test: &[last secondary ignorable] failed! -> %s\n", u_errorName(status
));
5175 tLen
= u_strlen(tData2
[0]);
5176 rLen
= ucol_getSortKey(coll
, tData2
[0], tLen
, resColl
, 100);
5177 if (rLen
!= LEN(lastSecondaryIgnCE
) || uprv_memcmp(resColl
, lastSecondaryIgnCE
, rLen
) != 0) {
5178 log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s \tlen: %d key: ", 0, tData2
[0], rLen
);
5179 for(i
= 0; i
<rLen
; i
++) {
5180 log_err(" %02X", resColl
[i
]);
5184 if(isICUVersionAtLeast(52, 0, 1)) { /* TODO: debug & fix, see ticket #8982 */
5185 tLen
= u_strlen(tData2
[1]);
5186 rLen
= ucol_getSortKey(coll
, tData2
[1], tLen
, resColl
, 100);
5187 if (rLen
!= LEN(firstSecondaryIgnCE
) || uprv_memcmp(resColl
, firstSecondaryIgnCE
, rLen
) != 0) {
5188 log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s \tlen: %d key: ", 1, tData2
[1], rLen
);
5189 for(i
= 0; i
<rLen
; i
++) {
5190 log_err(" %02X", resColl
[i
]);
5199 TestUCAPrecontext(void)
5201 UErrorCode status
= U_ZERO_ERROR
;
5203 UCollator
*coll
=NULL
;
5204 uint8_t resColl
[100], prevColl
[100];
5205 int32_t rLen
, tLen
, ruleLen
;
5206 UChar rule1
[256]= {0x26, 0xb7, 0x3c, 0x61, 0}; /* & middle-dot < a */
5207 UChar rule2
[256]= {0x26, 0x4C, 0xb7, 0x3c, 0x3c, 0x61, 0};
5208 /* & l middle-dot << a a is an expansion. */
5210 UChar tData1
[][20]={
5211 { 0xb7, 0}, /* standalone middle dot(0xb7) */
5212 { 0x387, 0}, /* standalone middle dot(0x387) */
5215 { 0x4C, 0x0332, 0}, /* l with [first primary ignorable] */
5216 { 0x6C, 0xb7, 0}, /* l with middle dot(0xb7) */
5217 { 0x6C, 0x387, 0}, /* l with middle dot(0x387) */
5218 { 0x4C, 0xb7, 0}, /* L with middle dot(0xb7) */
5219 { 0x4C, 0x387, 0}, /* L with middle dot(0x387) */
5220 { 0x6C, 0x61, 0x387, 0}, /* la with middle dot(0x387) */
5221 { 0x4C, 0x61, 0xb7, 0}, /* La with middle dot(0xb7) */
5224 log_verbose("\n\nEN collation:");
5225 coll
= ucol_open("en", &status
);
5226 if (U_FAILURE(status
)) {
5227 log_err_status(status
, "Tailoring test: &z <<a|- failed! -> %s\n", u_errorName(status
));
5230 for (j
=0; j
<11; j
++) {
5231 tLen
= u_strlen(tData1
[j
]);
5232 rLen
= ucol_getSortKey(coll
, tData1
[j
], tLen
, resColl
, 100);
5233 if ((j
>0) && (strcmp((char *)resColl
, (char *)prevColl
)<0)) {
5234 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5237 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j
, tData1
[j
], rLen
);
5238 for(i
= 0; i
<rLen
; i
++) {
5239 log_verbose(" %02X", resColl
[i
]);
5241 uprv_memcpy(prevColl
, resColl
, sizeof(uint8_t)*(rLen
+1));
5246 log_verbose("\n\nJA collation:");
5247 coll
= ucol_open("ja", &status
);
5248 if (U_FAILURE(status
)) {
5249 log_err("Tailoring test: &z <<a|- failed!");
5252 for (j
=0; j
<11; j
++) {
5253 tLen
= u_strlen(tData1
[j
]);
5254 rLen
= ucol_getSortKey(coll
, tData1
[j
], tLen
, resColl
, 100);
5255 if ((j
>0) && (strcmp((char *)resColl
, (char *)prevColl
)<0)) {
5256 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5259 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j
, tData1
[j
], rLen
);
5260 for(i
= 0; i
<rLen
; i
++) {
5261 log_verbose(" %02X", resColl
[i
]);
5263 uprv_memcpy(prevColl
, resColl
, sizeof(uint8_t)*(rLen
+1));
5268 log_verbose("\n\nTailoring test: & middle dot < a ");
5269 ruleLen
= u_strlen(rule1
);
5270 coll
= ucol_openRules(rule1
, ruleLen
, UCOL_OFF
, UCOL_TERTIARY
, NULL
,&status
);
5271 if (U_FAILURE(status
)) {
5272 log_err("Tailoring test: & middle dot < a failed!");
5275 for (j
=0; j
<11; j
++) {
5276 tLen
= u_strlen(tData1
[j
]);
5277 rLen
= ucol_getSortKey(coll
, tData1
[j
], tLen
, resColl
, 100);
5278 if ((j
>0) && (strcmp((char *)resColl
, (char *)prevColl
)<0)) {
5279 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5282 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j
, tData1
[j
], rLen
);
5283 for(i
= 0; i
<rLen
; i
++) {
5284 log_verbose(" %02X", resColl
[i
]);
5286 uprv_memcpy(prevColl
, resColl
, sizeof(uint8_t)*(rLen
+1));
5291 log_verbose("\n\nTailoring test: & l middle-dot << a ");
5292 ruleLen
= u_strlen(rule2
);
5293 coll
= ucol_openRules(rule2
, ruleLen
, UCOL_OFF
, UCOL_TERTIARY
, NULL
,&status
);
5294 if (U_FAILURE(status
)) {
5295 log_err("Tailoring test: & l middle-dot << a failed!");
5298 for (j
=0; j
<11; j
++) {
5299 tLen
= u_strlen(tData1
[j
]);
5300 rLen
= ucol_getSortKey(coll
, tData1
[j
], tLen
, resColl
, 100);
5301 if ((j
>0) && (j
!=3) && (strcmp((char *)resColl
, (char *)prevColl
)<0)) {
5302 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5305 if ((j
==3)&&(strcmp((char *)resColl
, (char *)prevColl
)>0)) {
5306 log_err("\n Expecting smaller key than previous test case: Data[%d] :%s.",
5309 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j
, tData1
[j
], rLen
);
5310 for(i
= 0; i
<rLen
; i
++) {
5311 log_verbose(" %02X", resColl
[i
]);
5313 uprv_memcpy(prevColl
, resColl
, sizeof(uint8_t)*(rLen
+1));
5319 TestOutOfBuffer5468(void)
5321 static const char *test
= "\\u4e00";
5323 int32_t ustr_length
= u_unescape(test
, ustr
, 256);
5324 unsigned char shortKeyBuf
[1];
5325 int32_t sortkey_length
;
5326 UErrorCode status
= U_ZERO_ERROR
;
5327 static UCollator
*coll
= NULL
;
5329 coll
= ucol_open("root", &status
);
5330 if(U_FAILURE(status
)) {
5331 log_err_status(status
, "Couldn't open UCA -> %s\n", u_errorName(status
));
5334 ucol_setStrength(coll
, UCOL_PRIMARY
);
5335 ucol_setAttribute(coll
, UCOL_STRENGTH
, UCOL_PRIMARY
, &status
);
5336 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
5337 if (U_FAILURE(status
)) {
5338 log_err("Failed setting atributes\n");
5342 sortkey_length
= ucol_getSortKey(coll
, ustr
, ustr_length
, shortKeyBuf
, sizeof(shortKeyBuf
));
5343 if (sortkey_length
!= 4) {
5344 log_err("expecting length of sortKey is 4 got:%d ", sortkey_length
);
5346 log_verbose("length of sortKey is %d", sortkey_length
);
5350 #define TSKC_DATA_SIZE 5
5351 #define TSKC_BUF_SIZE 50
5353 TestSortKeyConsistency(void)
5355 UErrorCode icuRC
= U_ZERO_ERROR
;
5357 UChar data
[] = { 0xFFFD, 0x0006, 0x0006, 0x0006, 0xFFFD};
5359 uint8_t bufFull
[TSKC_DATA_SIZE
][TSKC_BUF_SIZE
];
5360 uint8_t bufPart
[TSKC_DATA_SIZE
][TSKC_BUF_SIZE
];
5363 ucol
= ucol_openFromShortString("LEN_S4", FALSE
, NULL
, &icuRC
);
5364 if (U_FAILURE(icuRC
))
5366 log_err_status(icuRC
, "ucol_openFromShortString failed -> %s\n", u_errorName(icuRC
));
5370 for (i
= 0; i
< TSKC_DATA_SIZE
; i
++)
5372 UCharIterator uiter
;
5373 uint32_t state
[2] = { 0, 0 };
5374 int32_t dataLen
= i
+1;
5375 for (j
=0; j
<TSKC_BUF_SIZE
; j
++)
5376 bufFull
[i
][j
] = bufPart
[i
][j
] = 0;
5379 ucol_getSortKey(ucol
, data
, dataLen
, bufFull
[i
], TSKC_BUF_SIZE
);
5381 /* Partial sort key */
5382 uiter_setString(&uiter
, data
, dataLen
);
5383 ucol_nextSortKeyPart(ucol
, &uiter
, state
, bufPart
[i
], TSKC_BUF_SIZE
, &icuRC
);
5384 if (U_FAILURE(icuRC
))
5386 log_err("ucol_nextSortKeyPart failed\n");
5391 for (i2
=0; i2
<i
; i2
++)
5393 UBool fullMatch
= TRUE
;
5394 UBool partMatch
= TRUE
;
5395 for (j
=0; j
<TSKC_BUF_SIZE
; j
++)
5397 fullMatch
= fullMatch
&& (bufFull
[i
][j
] != bufFull
[i2
][j
]);
5398 partMatch
= partMatch
&& (bufPart
[i
][j
] != bufPart
[i2
][j
]);
5400 if (fullMatch
!= partMatch
) {
5401 log_err(fullMatch
? "full key was consistent, but partial key changed\n"
5402 : "partial key was consistent, but full key changed\n");
5409 /*=============================================*/
5414 static void TestCroatianSortKey(void) {
5415 const char* collString
= "LHR_AN_CX_EX_FX_HX_NX_S3";
5416 UErrorCode status
= U_ZERO_ERROR
;
5420 static const UChar text
[] = { 0x0044, 0xD81A };
5422 size_t length
= sizeof(text
)/sizeof(*text
);
5424 uint8_t textSortKey
[32];
5425 size_t lenSortKey
= 32;
5426 size_t actualSortKeyLen
;
5427 uint32_t uStateInfo
[2] = { 0, 0 };
5429 ucol
= ucol_openFromShortString(collString
, FALSE
, NULL
, &status
);
5430 if (U_FAILURE(status
)) {
5431 log_err_status(status
, "ucol_openFromShortString error in Craotian test. -> %s\n", u_errorName(status
));
5435 uiter_setString(&iter
, text
, length
);
5437 actualSortKeyLen
= ucol_nextSortKeyPart(
5438 ucol
, &iter
, (uint32_t*)uStateInfo
,
5439 textSortKey
, lenSortKey
, &status
5442 if (actualSortKeyLen
== lenSortKey
) {
5443 log_err("ucol_nextSortKeyPart did not give correct result in Croatian test.\n");
5450 /* This test ensures that codepoints such as 0x3099 are flagged correctly by the collator since
5451 * they are both Hiragana and Katakana
5453 #define SORTKEYLEN 50
5454 static void TestHiragana(void) {
5455 UErrorCode status
= U_ZERO_ERROR
;
5457 UCollationResult strcollresult
;
5458 UChar data1
[] = { 0x3058, 0x30B8 }; /* Hiragana and Katakana letter Zi */
5459 UChar data2
[] = { 0x3057, 0x3099, 0x30B7, 0x3099 };
5460 int32_t data1Len
= sizeof(data1
)/sizeof(*data1
);
5461 int32_t data2Len
= sizeof(data2
)/sizeof(*data2
);
5463 uint8_t sortKey1
[SORTKEYLEN
];
5464 uint8_t sortKey2
[SORTKEYLEN
];
5466 UCharIterator uiter1
;
5467 UCharIterator uiter2
;
5468 uint32_t state1
[2] = { 0, 0 };
5469 uint32_t state2
[2] = { 0, 0 };
5473 ucol
= ucol_openFromShortString("LJA_AN_CX_EX_FX_HO_NX_S4", FALSE
, NULL
,
5475 if (U_FAILURE(status
)) {
5476 log_err_status(status
, "Error status: %s; Unable to open collator from short string.\n", u_errorName(status
));
5480 /* Start of full sort keys */
5481 /* Full sort key1 */
5482 keySize1
= ucol_getSortKey(ucol
, data1
, data1Len
, sortKey1
, SORTKEYLEN
);
5483 /* Full sort key2 */
5484 keySize2
= ucol_getSortKey(ucol
, data2
, data2Len
, sortKey2
, SORTKEYLEN
);
5485 if (keySize1
== keySize2
) {
5486 for (i
= 0; i
< keySize1
; i
++) {
5487 if (sortKey1
[i
] != sortKey2
[i
]) {
5488 log_err("Full sort keys are different. Should be equal.");
5492 log_err("Full sort keys sizes doesn't match: %d %d", keySize1
, keySize2
);
5494 /* End of full sort keys */
5496 /* Start of partial sort keys */
5497 /* Partial sort key1 */
5498 uiter_setString(&uiter1
, data1
, data1Len
);
5499 keySize1
= ucol_nextSortKeyPart(ucol
, &uiter1
, state1
, sortKey1
, SORTKEYLEN
, &status
);
5500 /* Partial sort key2 */
5501 uiter_setString(&uiter2
, data2
, data2Len
);
5502 keySize2
= ucol_nextSortKeyPart(ucol
, &uiter2
, state2
, sortKey2
, SORTKEYLEN
, &status
);
5503 if (U_SUCCESS(status
) && keySize1
== keySize2
) {
5504 for (j
= 0; j
< keySize1
; j
++) {
5505 if (sortKey1
[j
] != sortKey2
[j
]) {
5506 log_err("Partial sort keys are different. Should be equal");
5510 log_err("Error Status: %s or Partial sort keys sizes doesn't match: %d %d", u_errorName(status
), keySize1
, keySize2
);
5512 /* End of partial sort keys */
5514 /* Start of strcoll */
5515 /* Use ucol_strcoll() to determine ordering */
5516 strcollresult
= ucol_strcoll(ucol
, data1
, data1Len
, data2
, data2Len
);
5517 if (strcollresult
!= UCOL_EQUAL
) {
5518 log_err("Result from ucol_strcoll() should be UCOL_EQUAL.");
5524 /* Convenient struct for running collation tests */
5526 const UChar source
[MAX_TOKEN_LEN
]; /* String on left */
5527 const UChar target
[MAX_TOKEN_LEN
]; /* String on right */
5528 UCollationResult result
; /* -1, 0 or +1, depending on collation */
5532 * Utility function to test one collation test case.
5533 * @param testcases Array of test cases.
5534 * @param n_testcases Size of the array testcases.
5535 * @param str_rules Array of rules. These rules should be specifying the same rule in different formats.
5536 * @param n_rules Size of the array str_rules.
5538 static void doTestOneTestCase(const OneTestCase testcases
[],
5540 const char* str_rules
[],
5543 int rule_no
, testcase_no
;
5546 UErrorCode status
= U_ZERO_ERROR
;
5547 UParseError parse_error
;
5548 UCollator
*myCollation
;
5550 for (rule_no
= 0; rule_no
< n_rules
; ++rule_no
) {
5552 length
= u_unescape(str_rules
[rule_no
], rule
, 500);
5554 log_err("ERROR: The rule cannot be unescaped: %s\n");
5557 myCollation
= ucol_openRules(rule
, length
, UCOL_ON
, UCOL_TERTIARY
, &parse_error
, &status
);
5558 if(U_FAILURE(status
)){
5559 log_err_status(status
, "ERROR: in creation of rule based collator: %s\n", myErrorName(status
));
5562 log_verbose("Testing the <<* syntax\n");
5563 ucol_setAttribute(myCollation
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
5564 ucol_setStrength(myCollation
, UCOL_TERTIARY
);
5565 for (testcase_no
= 0; testcase_no
< n_testcases
; ++testcase_no
) {
5567 testcases
[testcase_no
].source
,
5568 testcases
[testcase_no
].target
,
5569 testcases
[testcase_no
].result
5572 ucol_close(myCollation
);
5576 const static OneTestCase rangeTestcases
[] = {
5577 { {0x0061}, {0x0062}, UCOL_LESS
}, /* "a" < "b" */
5578 { {0x0062}, {0x0063}, UCOL_LESS
}, /* "b" < "c" */
5579 { {0x0061}, {0x0063}, UCOL_LESS
}, /* "a" < "c" */
5581 { {0x0062}, {0x006b}, UCOL_LESS
}, /* "b" << "k" */
5582 { {0x006b}, {0x006c}, UCOL_LESS
}, /* "k" << "l" */
5583 { {0x0062}, {0x006c}, UCOL_LESS
}, /* "b" << "l" */
5584 { {0x0061}, {0x006c}, UCOL_LESS
}, /* "a" < "l" */
5585 { {0x0061}, {0x006d}, UCOL_LESS
}, /* "a" < "m" */
5587 { {0x0079}, {0x006d}, UCOL_LESS
}, /* "y" < "f" */
5588 { {0x0079}, {0x0067}, UCOL_LESS
}, /* "y" < "g" */
5589 { {0x0061}, {0x0068}, UCOL_LESS
}, /* "y" < "h" */
5590 { {0x0061}, {0x0065}, UCOL_LESS
}, /* "g" < "e" */
5592 { {0x0061}, {0x0031}, UCOL_EQUAL
}, /* "a" = "1" */
5593 { {0x0061}, {0x0032}, UCOL_EQUAL
}, /* "a" = "2" */
5594 { {0x0061}, {0x0033}, UCOL_EQUAL
}, /* "a" = "3" */
5595 { {0x0061}, {0x0066}, UCOL_LESS
}, /* "a" < "f" */
5596 { {0x006c, 0x0061}, {0x006b, 0x0062}, UCOL_LESS
}, /* "la" < "123" */
5597 { {0x0061, 0x0061, 0x0061}, {0x0031, 0x0032, 0x0033}, UCOL_EQUAL
}, /* "aaa" = "123" */
5598 { {0x0062}, {0x007a}, UCOL_LESS
}, /* "b" < "z" */
5599 { {0x0061, 0x007a, 0x0062}, {0x0032, 0x0079, 0x006d}, UCOL_LESS
}, /* "azm" = "2yc" */
5602 static int nRangeTestcases
= LEN(rangeTestcases
);
5604 const static OneTestCase rangeTestcasesSupplemental
[] = {
5605 { {0xfffe}, {0xffff}, UCOL_LESS
}, /* U+FFFE < U+FFFF */
5606 { {0xffff}, {0xd800, 0xdc00}, UCOL_LESS
}, /* U+FFFF < U+10000 */
5607 { {0xd800, 0xdc00}, {0xd800, 0xdc01}, UCOL_LESS
}, /* U+10000 < U+10001 */
5608 { {0xfffe}, {0xd800, 0xdc01}, UCOL_LESS
}, /* U+FFFE < U+10001 */
5609 { {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL_LESS
}, /* U+10000 < U+10001 */
5610 { {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL_LESS
}, /* U+10000 < U+10001 */
5611 { {0xfffe}, {0xd800, 0xdc02}, UCOL_LESS
}, /* U+FFFE < U+10001 */
5614 static int nRangeTestcasesSupplemental
= LEN(rangeTestcasesSupplemental
);
5616 const static OneTestCase rangeTestcasesQwerty
[] = {
5617 { {0x0071}, {0x0077}, UCOL_LESS
}, /* "q" < "w" */
5618 { {0x0077}, {0x0065}, UCOL_LESS
}, /* "w" < "e" */
5620 { {0x0079}, {0x0075}, UCOL_LESS
}, /* "y" < "u" */
5621 { {0x0071}, {0x0075}, UCOL_LESS
}, /* "q" << "u" */
5623 { {0x0074}, {0x0069}, UCOL_LESS
}, /* "t" << "i" */
5624 { {0x006f}, {0x0070}, UCOL_LESS
}, /* "o" << "p" */
5626 { {0x0079}, {0x0065}, UCOL_LESS
}, /* "y" < "e" */
5627 { {0x0069}, {0x0075}, UCOL_LESS
}, /* "i" < "u" */
5629 { {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},
5630 {0x0077, 0x0065, 0x0072, 0x0065}, UCOL_LESS
}, /* "quest" < "were" */
5631 { {0x0071, 0x0075, 0x0061, 0x0063, 0x006b},
5632 {0x0071, 0x0075, 0x0065, 0x0073, 0x0074}, UCOL_LESS
}, /* "quack" < "quest" */
5635 static int nRangeTestcasesQwerty
= LEN(rangeTestcasesQwerty
);
5637 static void TestSameStrengthList(void)
5639 const char* strRules
[] = {
5641 "&a<b<c<d &b<<k<<l<<m &k<<<x<<<y<<<z &y<f<g<h<e &a=1=2=3",
5644 "&a<*bcd &b<<*klm &k<<<*xyz &y<*fghe &a=*123",
5646 doTestOneTestCase(rangeTestcases
, nRangeTestcases
, strRules
, LEN(strRules
));
5649 static void TestSameStrengthListQuoted(void)
5651 const char* strRules
[] = {
5652 /* Lists with quoted characters */
5653 "&\\u0061<*bcd &b<<*klm &k<<<*xyz &y<*f\\u0067\\u0068e &a=*123",
5654 "&'\\u0061'<*bcd &b<<*klm &k<<<*xyz &y<*f'\\u0067\\u0068'e &a=*123",
5656 "&\\u0061<*b\\u0063d &b<<*klm &k<<<*xyz &\\u0079<*fgh\\u0065 &a=*\\u0031\\u0032\\u0033",
5657 "&'\\u0061'<*b'\\u0063'd &b<<*klm &k<<<*xyz &'\\u0079'<*fgh'\\u0065' &a=*'\\u0031\\u0032\\u0033'",
5659 "&\\u0061<*\\u0062c\\u0064 &b<<*klm &k<<<*xyz &y<*fghe &a=*\\u0031\\u0032\\u0033",
5660 "&'\\u0061'<*'\\u0062'c'\\u0064' &b<<*klm &k<<<*xyz &y<*fghe &a=*'\\u0031\\u0032\\u0033'",
5662 doTestOneTestCase(rangeTestcases
, nRangeTestcases
, strRules
, LEN(strRules
));
5665 static void TestSameStrengthListSupplemental(void)
5667 const char* strRules
[] = {
5668 "&\\ufffe<\\uffff<\\U00010000<\\U00010001<\\U00010002",
5669 "&\\ufffe<\\uffff<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02",
5670 "&\\ufffe<*\\uffff\\U00010000\\U00010001\\U00010002",
5671 "&\\ufffe<*\\uffff\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02",
5673 doTestOneTestCase(rangeTestcasesSupplemental
, nRangeTestcasesSupplemental
, strRules
, LEN(strRules
));
5676 static void TestSameStrengthListQwerty(void)
5678 const char* strRules
[] = {
5679 "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d", /* Normal */
5680 "&q<*wer &w<<*tyu &t<<<*iop &o=*asd", /* Lists */
5681 "&\\u0071<\\u0077<\\u0065<\\u0072 &\\u0077<<\\u0074<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<\\u006f<<<\\u0070 &\\u006f=\\u0061=\\u0073=\\u0064",
5682 "&'\\u0071'<\\u0077<\\u0065<\\u0072 &\\u0077<<'\\u0074'<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<'\\u006f'<<<\\u0070 &\\u006f=\\u0061='\\u0073'=\\u0064",
5683 "&\\u0071<*\\u0077\\u0065\\u0072 &\\u0077<<*\\u0074\\u0079\\u0075 &\\u0074<<<*\\u0069\\u006f\\u0070 &\\u006f=*\\u0061\\u0073\\u0064",
5685 /* Quoted characters also will work if two quoted characters are not consecutive. */
5686 "&\\u0071<*'\\u0077'\\u0065\\u0072 &\\u0077<<*\\u0074'\\u0079'\\u0075 &\\u0074<<<*\\u0069\\u006f'\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",
5688 /* Consecutive quoted charactes do not work, because a '' will be treated as a quote character. */
5689 /* "&\\u0071<*'\\u0077''\\u0065''\\u0072' &\\u0077<<*'\\u0074''\\u0079''\\u0075' &\\u0074<<<*'\\u0069''\\u006f''\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",*/
5692 doTestOneTestCase(rangeTestcasesQwerty
, nRangeTestcasesQwerty
, strRules
, LEN(strRules
));
5695 static void TestSameStrengthListQuotedQwerty(void)
5697 const char* strRules
[] = {
5698 "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d", /* Normal */
5699 "&q<*wer &w<<*tyu &t<<<*iop &o=*asd", /* Lists */
5700 "&q<*w'e'r &w<<*'t'yu &t<<<*io'p' &o=*'a's'd'", /* Lists with quotes */
5702 /* Lists with continuous quotes may not work, because '' will be treated as a quote character. */
5703 /* "&q<*'w''e''r' &w<<*'t''y''u' &t<<<*'i''o''p' &o=*'a''s''d'", */
5705 doTestOneTestCase(rangeTestcasesQwerty
, nRangeTestcasesQwerty
, strRules
, LEN(strRules
));
5708 static void TestSameStrengthListRanges(void)
5710 const char* strRules
[] = {
5711 "&a<*b-d &b<<*k-m &k<<<*x-z &y<*f-he &a=*1-3",
5713 doTestOneTestCase(rangeTestcases
, nRangeTestcases
, strRules
, LEN(strRules
));
5716 static void TestSameStrengthListSupplementalRanges(void)
5718 const char* strRules
[] = {
5719 "&\\ufffe<*\\uffff-\\U00010002",
5721 doTestOneTestCase(rangeTestcasesSupplemental
, nRangeTestcasesSupplemental
, strRules
, LEN(strRules
));
5724 static void TestSpecialCharacters(void)
5726 const char* strRules
[] = {
5728 "&';'<'+'<','<'-'<'&'<'*'",
5737 const static OneTestCase specialCharacterStrings
[] = {
5738 { {0x003b}, {0x002b}, UCOL_LESS
}, /* ; < + */
5739 { {0x002b}, {0x002c}, UCOL_LESS
}, /* + < , */
5740 { {0x002c}, {0x002d}, UCOL_LESS
}, /* , < - */
5741 { {0x002d}, {0x0026}, UCOL_LESS
}, /* - < & */
5743 doTestOneTestCase(specialCharacterStrings
, LEN(specialCharacterStrings
), strRules
, LEN(strRules
));
5746 static void TestPrivateUseCharacters(void)
5748 const char* strRules
[] = {
5750 "&'\\u5ea7'<'\\uE2D8'<'\\uE2D9'<'\\uE2DA'<'\\uE2DB'<'\\uE2DC'<'\\u4e8d'",
5751 "&\\u5ea7<\\uE2D8<\\uE2D9<\\uE2DA<\\uE2DB<\\uE2DC<\\u4e8d",
5754 const static OneTestCase privateUseCharacterStrings
[] = {
5755 { {0x5ea7}, {0xe2d8}, UCOL_LESS
},
5756 { {0xe2d8}, {0xe2d9}, UCOL_LESS
},
5757 { {0xe2d9}, {0xe2da}, UCOL_LESS
},
5758 { {0xe2da}, {0xe2db}, UCOL_LESS
},
5759 { {0xe2db}, {0xe2dc}, UCOL_LESS
},
5760 { {0xe2dc}, {0x4e8d}, UCOL_LESS
},
5762 doTestOneTestCase(privateUseCharacterStrings
, LEN(privateUseCharacterStrings
), strRules
, LEN(strRules
));
5765 static void TestPrivateUseCharactersInList(void)
5767 const char* strRules
[] = {
5769 "&'\\u5ea7'<*'\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d'",
5770 /* "&'\\u5ea7'<*\\uE2D8'\\uE2D9\\uE2DA'\\uE2DB'\\uE2DC\\u4e8d'", */
5771 "&\\u5ea7<*\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d",
5774 const static OneTestCase privateUseCharacterStrings
[] = {
5775 { {0x5ea7}, {0xe2d8}, UCOL_LESS
},
5776 { {0xe2d8}, {0xe2d9}, UCOL_LESS
},
5777 { {0xe2d9}, {0xe2da}, UCOL_LESS
},
5778 { {0xe2da}, {0xe2db}, UCOL_LESS
},
5779 { {0xe2db}, {0xe2dc}, UCOL_LESS
},
5780 { {0xe2dc}, {0x4e8d}, UCOL_LESS
},
5782 doTestOneTestCase(privateUseCharacterStrings
, LEN(privateUseCharacterStrings
), strRules
, LEN(strRules
));
5785 static void TestPrivateUseCharactersInRange(void)
5787 const char* strRules
[] = {
5789 "&'\\u5ea7'<*'\\uE2D8'-'\\uE2DC\\u4e8d'",
5790 "&\\u5ea7<*\\uE2D8-\\uE2DC\\u4e8d",
5791 /* "&\\u5ea7<\\uE2D8'\\uE2D8'-'\\uE2D9'\\uE2DA-\\uE2DB\\uE2DC\\u4e8d", */
5794 const static OneTestCase privateUseCharacterStrings
[] = {
5795 { {0x5ea7}, {0xe2d8}, UCOL_LESS
},
5796 { {0xe2d8}, {0xe2d9}, UCOL_LESS
},
5797 { {0xe2d9}, {0xe2da}, UCOL_LESS
},
5798 { {0xe2da}, {0xe2db}, UCOL_LESS
},
5799 { {0xe2db}, {0xe2dc}, UCOL_LESS
},
5800 { {0xe2dc}, {0x4e8d}, UCOL_LESS
},
5802 doTestOneTestCase(privateUseCharacterStrings
, LEN(privateUseCharacterStrings
), strRules
, LEN(strRules
));
5805 static void TestInvalidListsAndRanges(void)
5807 const char* invalidRules
[] = {
5808 /* Range not in starred expression */
5809 "&\\ufffe<\\uffff-\\U00010002",
5811 /* Range without start */
5814 /* Range without end */
5817 /* More than one hyphen */
5820 /* Range in the wrong order */
5826 UErrorCode status
= U_ZERO_ERROR
;
5827 UParseError parse_error
;
5828 int n_rules
= LEN(invalidRules
);
5831 UCollator
*myCollation
;
5833 for (rule_no
= 0; rule_no
< n_rules
; ++rule_no
) {
5835 length
= u_unescape(invalidRules
[rule_no
], rule
, 500);
5837 log_err("ERROR: The rule cannot be unescaped: %s\n");
5840 myCollation
= ucol_openRules(rule
, length
, UCOL_ON
, UCOL_TERTIARY
, &parse_error
, &status
);
5841 if(!U_FAILURE(status
)){
5842 log_err("ERROR: Could not cause a failure as expected: \n");
5844 status
= U_ZERO_ERROR
;
5849 * This test ensures that characters placed before a character in a different script have the same lead byte
5850 * in their collation key before and after script reordering.
5852 static void TestBeforeRuleWithScriptReordering(void)
5855 UErrorCode status
= U_ZERO_ERROR
;
5856 UCollator
*myCollation
;
5857 char srules
[500] = "&[before 1]\\u03b1 < \\u0e01";
5859 uint32_t rulesLength
= 0;
5860 int32_t reorderCodes
[1] = {USCRIPT_GREEK
};
5861 UCollationResult collResult
;
5863 uint8_t baseKey
[256];
5864 uint32_t baseKeyLength
;
5865 uint8_t beforeKey
[256];
5866 uint32_t beforeKeyLength
;
5868 UChar base
[] = { 0x03b1 }; /* base */
5869 int32_t baseLen
= sizeof(base
)/sizeof(*base
);
5871 UChar before
[] = { 0x0e01 }; /* ko kai */
5872 int32_t beforeLen
= sizeof(before
)/sizeof(*before
);
5874 /*UChar *data[] = { before, base };
5875 genericRulesStarter(srules, data, 2);*/
5877 log_verbose("Testing the &[before 1] rule with [reorder grek]\n");
5880 /* build collator */
5881 log_verbose("Testing the &[before 1] rule with [scriptReorder grek]\n");
5883 rulesLength
= u_unescape(srules
, rules
, LEN(rules
));
5884 myCollation
= ucol_openRules(rules
, rulesLength
, UCOL_ON
, UCOL_TERTIARY
, &error
, &status
);
5885 if(U_FAILURE(status
)) {
5886 log_err_status(status
, "ERROR: in creation of rule based collator: %s\n", myErrorName(status
));
5890 /* check collation results - before rule applied but not script reordering */
5891 collResult
= ucol_strcoll(myCollation
, base
, baseLen
, before
, beforeLen
);
5892 if (collResult
!= UCOL_GREATER
) {
5893 log_err("Collation result not correct before script reordering = %d\n", collResult
);
5896 /* check the lead byte of the collation keys before script reordering */
5897 baseKeyLength
= ucol_getSortKey(myCollation
, base
, baseLen
, baseKey
, 256);
5898 beforeKeyLength
= ucol_getSortKey(myCollation
, before
, beforeLen
, beforeKey
, 256);
5899 if (baseKey
[0] != beforeKey
[0]) {
5900 log_err("Different lead byte for sort keys using before rule and before script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey
[0], beforeKey
[0]);
5903 /* reorder the scripts */
5904 ucol_setReorderCodes(myCollation
, reorderCodes
, 1, &status
);
5905 if(U_FAILURE(status
)) {
5906 log_err_status(status
, "ERROR: while setting script order: %s\n", myErrorName(status
));
5910 /* check collation results - before rule applied and after script reordering */
5911 collResult
= ucol_strcoll(myCollation
, base
, baseLen
, before
, beforeLen
);
5912 if (collResult
!= UCOL_GREATER
) {
5913 log_err("Collation result not correct after script reordering = %d\n", collResult
);
5916 /* check the lead byte of the collation keys after script reordering */
5917 ucol_getSortKey(myCollation
, base
, baseLen
, baseKey
, 256);
5918 ucol_getSortKey(myCollation
, before
, beforeLen
, beforeKey
, 256);
5919 if (baseKey
[0] != beforeKey
[0]) {
5920 log_err("Different lead byte for sort keys using before fule and after script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey
[0], beforeKey
[0]);
5923 ucol_close(myCollation
);
5927 * Test that in a primary-compressed sort key all bytes except the first one are unchanged under script reordering.
5929 static void TestNonLeadBytesDuringCollationReordering(void)
5931 UErrorCode status
= U_ZERO_ERROR
;
5932 UCollator
*myCollation
;
5933 int32_t reorderCodes
[1] = {USCRIPT_GREEK
};
5935 uint8_t baseKey
[256];
5936 uint32_t baseKeyLength
;
5937 uint8_t reorderKey
[256];
5938 uint32_t reorderKeyLength
;
5940 UChar testString
[] = { 0x03b1, 0x03b2, 0x03b3 };
5945 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
5947 /* build collator tertiary */
5948 myCollation
= ucol_open("", &status
);
5949 ucol_setStrength(myCollation
, UCOL_TERTIARY
);
5950 if(U_FAILURE(status
)) {
5951 log_err_status(status
, "ERROR: in creation of collator: %s\n", myErrorName(status
));
5954 baseKeyLength
= ucol_getSortKey(myCollation
, testString
, LEN(testString
), baseKey
, 256);
5956 ucol_setReorderCodes(myCollation
, reorderCodes
, LEN(reorderCodes
), &status
);
5957 if(U_FAILURE(status
)) {
5958 log_err_status(status
, "ERROR: setting reorder codes: %s\n", myErrorName(status
));
5961 reorderKeyLength
= ucol_getSortKey(myCollation
, testString
, LEN(testString
), reorderKey
, 256);
5963 if (baseKeyLength
!= reorderKeyLength
) {
5964 log_err("Key lengths not the same during reordering.\n");
5968 for (i
= 1; i
< baseKeyLength
; i
++) {
5969 if (baseKey
[i
] != reorderKey
[i
]) {
5970 log_err("Collation key bytes not the same at position %d.\n", i
);
5974 ucol_close(myCollation
);
5976 /* build collator quaternary */
5977 myCollation
= ucol_open("", &status
);
5978 ucol_setStrength(myCollation
, UCOL_QUATERNARY
);
5979 if(U_FAILURE(status
)) {
5980 log_err_status(status
, "ERROR: in creation of collator: %s\n", myErrorName(status
));
5983 baseKeyLength
= ucol_getSortKey(myCollation
, testString
, LEN(testString
), baseKey
, 256);
5985 ucol_setReorderCodes(myCollation
, reorderCodes
, LEN(reorderCodes
), &status
);
5986 if(U_FAILURE(status
)) {
5987 log_err_status(status
, "ERROR: setting reorder codes: %s\n", myErrorName(status
));
5990 reorderKeyLength
= ucol_getSortKey(myCollation
, testString
, LEN(testString
), reorderKey
, 256);
5992 if (baseKeyLength
!= reorderKeyLength
) {
5993 log_err("Key lengths not the same during reordering.\n");
5997 for (i
= 1; i
< baseKeyLength
; i
++) {
5998 if (baseKey
[i
] != reorderKey
[i
]) {
5999 log_err("Collation key bytes not the same at position %d.\n", i
);
6003 ucol_close(myCollation
);
6007 * Test reordering API.
6009 static void TestReorderingAPI(void)
6011 UErrorCode status
= U_ZERO_ERROR
;
6012 UCollator
*myCollation
;
6013 int32_t reorderCodes
[3] = {USCRIPT_GREEK
, USCRIPT_HAN
, UCOL_REORDER_CODE_PUNCTUATION
};
6014 int32_t duplicateReorderCodes
[] = {USCRIPT_CUNEIFORM
, USCRIPT_GREEK
, UCOL_REORDER_CODE_CURRENCY
, USCRIPT_EGYPTIAN_HIEROGLYPHS
};
6015 int32_t reorderCodesStartingWithDefault
[] = {UCOL_REORDER_CODE_DEFAULT
, USCRIPT_GREEK
, USCRIPT_HAN
, UCOL_REORDER_CODE_PUNCTUATION
};
6016 UCollationResult collResult
;
6017 int32_t retrievedReorderCodesLength
;
6018 int32_t retrievedReorderCodes
[10];
6019 UChar greekString
[] = { 0x03b1 };
6020 UChar punctuationString
[] = { 0x203e };
6023 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
6025 /* build collator tertiary */
6026 myCollation
= ucol_open("", &status
);
6027 ucol_setStrength(myCollation
, UCOL_TERTIARY
);
6028 if(U_FAILURE(status
)) {
6029 log_err_status(status
, "ERROR: in creation of collator: %s\n", myErrorName(status
));
6033 /* set the reorderding */
6034 ucol_setReorderCodes(myCollation
, reorderCodes
, LEN(reorderCodes
), &status
);
6035 if (U_FAILURE(status
)) {
6036 log_err_status(status
, "ERROR: setting reorder codes: %s\n", myErrorName(status
));
6040 /* get the reordering */
6041 retrievedReorderCodesLength
= ucol_getReorderCodes(myCollation
, NULL
, 0, &status
);
6042 if (status
!= U_BUFFER_OVERFLOW_ERROR
) {
6043 log_err_status(status
, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status
));
6046 status
= U_ZERO_ERROR
;
6047 if (retrievedReorderCodesLength
!= LEN(reorderCodes
)) {
6048 log_err_status(status
, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength
, LEN(reorderCodes
));
6051 /* now let's really get it */
6052 retrievedReorderCodesLength
= ucol_getReorderCodes(myCollation
, retrievedReorderCodes
, LEN(retrievedReorderCodes
), &status
);
6053 if (U_FAILURE(status
)) {
6054 log_err_status(status
, "ERROR: getting reorder codes: %s\n", myErrorName(status
));
6057 if (retrievedReorderCodesLength
!= LEN(reorderCodes
)) {
6058 log_err_status(status
, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength
, LEN(reorderCodes
));
6061 for (loopIndex
= 0; loopIndex
< retrievedReorderCodesLength
; loopIndex
++) {
6062 if (retrievedReorderCodes
[loopIndex
] != reorderCodes
[loopIndex
]) {
6063 log_err_status(status
, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex
);
6067 collResult
= ucol_strcoll(myCollation
, greekString
, LEN(greekString
), punctuationString
, LEN(punctuationString
));
6068 if (collResult
!= UCOL_LESS
) {
6069 log_err_status(status
, "ERROR: collation result should have been UCOL_LESS\n");
6073 /* clear the reordering */
6074 ucol_setReorderCodes(myCollation
, NULL
, 0, &status
);
6075 if (U_FAILURE(status
)) {
6076 log_err_status(status
, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status
));
6080 /* get the reordering again */
6081 retrievedReorderCodesLength
= ucol_getReorderCodes(myCollation
, NULL
, 0, &status
);
6082 if (retrievedReorderCodesLength
!= 0) {
6083 log_err_status(status
, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength
, 0);
6087 collResult
= ucol_strcoll(myCollation
, greekString
, LEN(greekString
), punctuationString
, LEN(punctuationString
));
6088 if (collResult
!= UCOL_GREATER
) {
6089 log_err_status(status
, "ERROR: collation result should have been UCOL_GREATER\n");
6093 /* test for error condition on duplicate reorder codes */
6094 ucol_setReorderCodes(myCollation
, duplicateReorderCodes
, LEN(duplicateReorderCodes
), &status
);
6095 if (!U_FAILURE(status
)) {
6096 log_err_status(status
, "ERROR: setting duplicate reorder codes did not generate a failure\n");
6100 status
= U_ZERO_ERROR
;
6101 /* test for reorder codes after a reset code */
6102 ucol_setReorderCodes(myCollation
, reorderCodesStartingWithDefault
, LEN(reorderCodesStartingWithDefault
), &status
);
6103 if (!U_FAILURE(status
)) {
6104 log_err_status(status
, "ERROR: reorderd code sequence starting with default and having following codes didn't cause an error\n");
6108 ucol_close(myCollation
);
6112 * Test reordering API.
6114 static void TestReorderingAPIWithRuleCreatedCollator(void)
6116 UErrorCode status
= U_ZERO_ERROR
;
6117 UCollator
*myCollation
;
6119 int32_t rulesReorderCodes
[2] = {USCRIPT_HAN
, USCRIPT_GREEK
};
6120 int32_t reorderCodes
[3] = {USCRIPT_GREEK
, USCRIPT_HAN
, UCOL_REORDER_CODE_PUNCTUATION
};
6121 UCollationResult collResult
;
6122 int32_t retrievedReorderCodesLength
;
6123 int32_t retrievedReorderCodes
[10];
6124 UChar greekString
[] = { 0x03b1 };
6125 UChar punctuationString
[] = { 0x203e };
6126 UChar hanString
[] = { 0x65E5, 0x672C };
6129 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
6131 /* build collator from rules */
6132 u_uastrcpy(rules
, "[reorder Hani Grek]");
6133 myCollation
= ucol_openRules(rules
, u_strlen(rules
), UCOL_DEFAULT
, UCOL_TERTIARY
, NULL
, &status
);
6134 if(U_FAILURE(status
)) {
6135 log_err_status(status
, "ERROR: in creation of collator: %s\n", myErrorName(status
));
6139 /* get the reordering */
6140 retrievedReorderCodesLength
= ucol_getReorderCodes(myCollation
, retrievedReorderCodes
, LEN(retrievedReorderCodes
), &status
);
6141 if (U_FAILURE(status
)) {
6142 log_err_status(status
, "ERROR: getting reorder codes: %s\n", myErrorName(status
));
6145 if (retrievedReorderCodesLength
!= LEN(rulesReorderCodes
)) {
6146 log_err_status(status
, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength
, LEN(rulesReorderCodes
));
6149 for (loopIndex
= 0; loopIndex
< retrievedReorderCodesLength
; loopIndex
++) {
6150 if (retrievedReorderCodes
[loopIndex
] != rulesReorderCodes
[loopIndex
]) {
6151 log_err_status(status
, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex
);
6155 collResult
= ucol_strcoll(myCollation
, greekString
, LEN(greekString
), hanString
, LEN(hanString
));
6156 if (collResult
!= UCOL_GREATER
) {
6157 log_err_status(status
, "ERROR: collation result should have been UCOL_LESS\n");
6162 /* set the reorderding */
6163 ucol_setReorderCodes(myCollation
, reorderCodes
, LEN(reorderCodes
), &status
);
6164 if (U_FAILURE(status
)) {
6165 log_err_status(status
, "ERROR: setting reorder codes: %s\n", myErrorName(status
));
6169 /* get the reordering */
6170 retrievedReorderCodesLength
= ucol_getReorderCodes(myCollation
, NULL
, 0, &status
);
6171 if (status
!= U_BUFFER_OVERFLOW_ERROR
) {
6172 log_err_status(status
, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status
));
6175 status
= U_ZERO_ERROR
;
6176 if (retrievedReorderCodesLength
!= LEN(reorderCodes
)) {
6177 log_err_status(status
, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength
, LEN(reorderCodes
));
6180 /* now let's really get it */
6181 retrievedReorderCodesLength
= ucol_getReorderCodes(myCollation
, retrievedReorderCodes
, LEN(retrievedReorderCodes
), &status
);
6182 if (U_FAILURE(status
)) {
6183 log_err_status(status
, "ERROR: getting reorder codes: %s\n", myErrorName(status
));
6186 if (retrievedReorderCodesLength
!= LEN(reorderCodes
)) {
6187 log_err_status(status
, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength
, LEN(reorderCodes
));
6190 for (loopIndex
= 0; loopIndex
< retrievedReorderCodesLength
; loopIndex
++) {
6191 if (retrievedReorderCodes
[loopIndex
] != reorderCodes
[loopIndex
]) {
6192 log_err_status(status
, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex
);
6196 collResult
= ucol_strcoll(myCollation
, greekString
, LEN(greekString
), punctuationString
, LEN(punctuationString
));
6197 if (collResult
!= UCOL_LESS
) {
6198 log_err_status(status
, "ERROR: collation result should have been UCOL_LESS\n");
6202 /* clear the reordering */
6203 ucol_setReorderCodes(myCollation
, NULL
, 0, &status
);
6204 if (U_FAILURE(status
)) {
6205 log_err_status(status
, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status
));
6209 /* get the reordering again */
6210 retrievedReorderCodesLength
= ucol_getReorderCodes(myCollation
, NULL
, 0, &status
);
6211 if (retrievedReorderCodesLength
!= 0) {
6212 log_err_status(status
, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength
, 0);
6216 collResult
= ucol_strcoll(myCollation
, greekString
, LEN(greekString
), punctuationString
, LEN(punctuationString
));
6217 if (collResult
!= UCOL_GREATER
) {
6218 log_err_status(status
, "ERROR: collation result should have been UCOL_GREATER\n");
6222 ucol_close(myCollation
);
6225 static int compareUScriptCodes(const void * a
, const void * b
)
6227 return ( *(int32_t*)a
- *(int32_t*)b
);
6230 static void TestEquivalentReorderingScripts(void) {
6231 UErrorCode status
= U_ZERO_ERROR
;
6232 int32_t equivalentScripts
[50];
6233 int32_t equivalentScriptsLength
;
6235 int32_t equivalentScriptsResult
[] = {
6249 USCRIPT_OLD_SOUTH_ARABIAN
,
6251 USCRIPT_IMPERIAL_ARAMAIC
,
6252 USCRIPT_INSCRIPTIONAL_PARTHIAN
,
6253 USCRIPT_INSCRIPTIONAL_PAHLAVI
,
6255 USCRIPT_OLD_PERSIAN
,
6257 USCRIPT_EGYPTIAN_HIEROGLYPHS
,
6258 USCRIPT_PHONETIC_POLLARD
,
6259 USCRIPT_SORA_SOMPENG
,
6260 USCRIPT_MEROITIC_CURSIVE
,
6261 USCRIPT_MEROITIC_HIEROGLYPHS
6264 qsort(equivalentScriptsResult
, LEN(equivalentScriptsResult
), sizeof(int32_t), compareUScriptCodes
);
6266 /* UScript.GOTHIC */
6267 equivalentScriptsLength
= ucol_getEquivalentReorderCodes(USCRIPT_GOTHIC
, equivalentScripts
, LEN(equivalentScripts
), &status
);
6268 if (U_FAILURE(status
)) {
6269 log_err_status(status
, "ERROR: retrieving equivalent reorder codes: %s\n", myErrorName(status
));
6273 fprintf(stdout, "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n");
6274 fprintf(stdout, "equivalentScriptsLength = %d\n", equivalentScriptsLength);
6275 for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) {
6276 fprintf(stdout, "%d = %x\n", loopIndex, equivalentScripts[loopIndex]);
6279 if (equivalentScriptsLength
!= LEN(equivalentScriptsResult
)) {
6280 log_err_status(status
, "ERROR: retrieved equivalent script length wrong: expected = %d, was = %d\n", LEN(equivalentScriptsResult
), equivalentScriptsLength
);
6283 for (loopIndex
= 0; loopIndex
< equivalentScriptsLength
; loopIndex
++) {
6284 if (equivalentScriptsResult
[loopIndex
] != equivalentScripts
[loopIndex
]) {
6285 log_err_status(status
, "ERROR: equivalent scripts results don't match: expected = %d, was = %d\n", equivalentScriptsResult
[loopIndex
], equivalentScripts
[loopIndex
]);
6290 /* UScript.SHAVIAN */
6291 equivalentScriptsLength
= ucol_getEquivalentReorderCodes(USCRIPT_SHAVIAN
, equivalentScripts
, LEN(equivalentScripts
), &status
);
6292 if (U_FAILURE(status
)) {
6293 log_err_status(status
, "ERROR: retrieving equivalent reorder codes: %s\n", myErrorName(status
));
6296 if (equivalentScriptsLength
!= LEN(equivalentScriptsResult
)) {
6297 log_err_status(status
, "ERROR: retrieved equivalent script length wrong: expected = %d, was = %d\n", LEN(equivalentScriptsResult
), equivalentScriptsLength
);
6300 for (loopIndex
= 0; loopIndex
< equivalentScriptsLength
; loopIndex
++) {
6301 if (equivalentScriptsResult
[loopIndex
] != equivalentScripts
[loopIndex
]) {
6302 log_err_status(status
, "ERROR: equivalent scripts results don't match: expected = %d, was = %d\n", equivalentScriptsResult
[loopIndex
], equivalentScripts
[loopIndex
]);
6308 static void TestReorderingAcrossCloning(void)
6310 UErrorCode status
= U_ZERO_ERROR
;
6311 UCollator
*myCollation
;
6312 int32_t reorderCodes
[3] = {USCRIPT_GREEK
, USCRIPT_HAN
, UCOL_REORDER_CODE_PUNCTUATION
};
6313 UCollator
*clonedCollation
;
6315 int32_t retrievedReorderCodesLength
;
6316 int32_t retrievedReorderCodes
[10];
6319 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
6321 /* build collator tertiary */
6322 myCollation
= ucol_open("", &status
);
6323 ucol_setStrength(myCollation
, UCOL_TERTIARY
);
6324 if(U_FAILURE(status
)) {
6325 log_err_status(status
, "ERROR: in creation of collator: %s\n", myErrorName(status
));
6329 /* set the reorderding */
6330 ucol_setReorderCodes(myCollation
, reorderCodes
, LEN(reorderCodes
), &status
);
6331 if (U_FAILURE(status
)) {
6332 log_err_status(status
, "ERROR: setting reorder codes: %s\n", myErrorName(status
));
6336 /* clone the collator */
6337 clonedCollation
= ucol_safeClone(myCollation
, NULL
, &bufferSize
, &status
);
6338 if (U_FAILURE(status
)) {
6339 log_err_status(status
, "ERROR: cloning collator: %s\n", myErrorName(status
));
6343 /* get the reordering */
6344 retrievedReorderCodesLength
= ucol_getReorderCodes(clonedCollation
, retrievedReorderCodes
, LEN(retrievedReorderCodes
), &status
);
6345 if (U_FAILURE(status
)) {
6346 log_err_status(status
, "ERROR: getting reorder codes: %s\n", myErrorName(status
));
6349 if (retrievedReorderCodesLength
!= LEN(reorderCodes
)) {
6350 log_err_status(status
, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength
, LEN(reorderCodes
));
6353 for (loopIndex
= 0; loopIndex
< retrievedReorderCodesLength
; loopIndex
++) {
6354 if (retrievedReorderCodes
[loopIndex
] != reorderCodes
[loopIndex
]) {
6355 log_err_status(status
, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex
);
6360 /*uprv_free(buffer);*/
6361 ucol_close(myCollation
);
6362 ucol_close(clonedCollation
);
6366 * Utility function to test one collation reordering test case set.
6367 * @param testcases Array of test cases.
6368 * @param n_testcases Size of the array testcases.
6369 * @param reorderTokens Array of reordering codes.
6370 * @param reorderTokensLen Size of the array reorderTokens.
6372 static void doTestOneReorderingAPITestCase(const OneTestCase testCases
[], uint32_t testCasesLen
, const int32_t reorderTokens
[], int32_t reorderTokensLen
)
6374 uint32_t testCaseNum
;
6375 UErrorCode status
= U_ZERO_ERROR
;
6376 UCollator
*myCollation
;
6378 myCollation
= ucol_open("", &status
);
6379 if (U_FAILURE(status
)) {
6380 log_err_status(status
, "ERROR: in creation of collator: %s\n", myErrorName(status
));
6383 ucol_setReorderCodes(myCollation
, reorderTokens
, reorderTokensLen
, &status
);
6384 if(U_FAILURE(status
)) {
6385 log_err_status(status
, "ERROR: while setting script order: %s\n", myErrorName(status
));
6389 for (testCaseNum
= 0; testCaseNum
< testCasesLen
; ++testCaseNum
) {
6391 testCases
[testCaseNum
].source
,
6392 testCases
[testCaseNum
].target
,
6393 testCases
[testCaseNum
].result
6396 ucol_close(myCollation
);
6399 static void TestGreekFirstReorder(void)
6401 const char* strRules
[] = {
6405 const int32_t apiRules
[] = {
6409 const static OneTestCase privateUseCharacterStrings
[] = {
6410 { {0x0391}, {0x0391}, UCOL_EQUAL
},
6411 { {0x0041}, {0x0391}, UCOL_GREATER
},
6412 { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_GREATER
},
6413 { {0x0060}, {0x0391}, UCOL_LESS
},
6414 { {0x0391}, {0xe2dc}, UCOL_LESS
},
6415 { {0x0391}, {0x0060}, UCOL_GREATER
},
6418 /* Test rules creation */
6419 doTestOneTestCase(privateUseCharacterStrings
, LEN(privateUseCharacterStrings
), strRules
, LEN(strRules
));
6421 /* Test collation reordering API */
6422 doTestOneReorderingAPITestCase(privateUseCharacterStrings
, LEN(privateUseCharacterStrings
), apiRules
, LEN(apiRules
));
6425 static void TestGreekLastReorder(void)
6427 const char* strRules
[] = {
6428 "[reorder Zzzz Grek]"
6431 const int32_t apiRules
[] = {
6432 USCRIPT_UNKNOWN
, USCRIPT_GREEK
6435 const static OneTestCase privateUseCharacterStrings
[] = {
6436 { {0x0391}, {0x0391}, UCOL_EQUAL
},
6437 { {0x0041}, {0x0391}, UCOL_LESS
},
6438 { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_LESS
},
6439 { {0x0060}, {0x0391}, UCOL_LESS
},
6440 { {0x0391}, {0xe2dc}, UCOL_GREATER
},
6443 /* Test rules creation */
6444 doTestOneTestCase(privateUseCharacterStrings
, LEN(privateUseCharacterStrings
), strRules
, LEN(strRules
));
6446 /* Test collation reordering API */
6447 doTestOneReorderingAPITestCase(privateUseCharacterStrings
, LEN(privateUseCharacterStrings
), apiRules
, LEN(apiRules
));
6450 static void TestNonScriptReorder(void)
6452 const char* strRules
[] = {
6453 "[reorder Grek Symbol DIGIT Latn Punct space Zzzz cURRENCy]"
6456 const int32_t apiRules
[] = {
6457 USCRIPT_GREEK
, UCOL_REORDER_CODE_SYMBOL
, UCOL_REORDER_CODE_DIGIT
, USCRIPT_LATIN
,
6458 UCOL_REORDER_CODE_PUNCTUATION
, UCOL_REORDER_CODE_SPACE
, USCRIPT_UNKNOWN
,
6459 UCOL_REORDER_CODE_CURRENCY
6462 const static OneTestCase privateUseCharacterStrings
[] = {
6463 { {0x0391}, {0x0041}, UCOL_LESS
},
6464 { {0x0041}, {0x0391}, UCOL_GREATER
},
6465 { {0x0060}, {0x0041}, UCOL_LESS
},
6466 { {0x0060}, {0x0391}, UCOL_GREATER
},
6467 { {0x0024}, {0x0041}, UCOL_GREATER
},
6470 /* Test rules creation */
6471 doTestOneTestCase(privateUseCharacterStrings
, LEN(privateUseCharacterStrings
), strRules
, LEN(strRules
));
6473 /* Test collation reordering API */
6474 doTestOneReorderingAPITestCase(privateUseCharacterStrings
, LEN(privateUseCharacterStrings
), apiRules
, LEN(apiRules
));
6477 static void TestHaniReorder(void)
6479 const char* strRules
[] = {
6482 const int32_t apiRules
[] = {
6486 const static OneTestCase privateUseCharacterStrings
[] = {
6487 { {0x4e00}, {0x0041}, UCOL_LESS
},
6488 { {0x4e00}, {0x0060}, UCOL_GREATER
},
6489 { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS
},
6490 { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER
},
6491 { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS
},
6492 { {0xfa27}, {0x0041}, UCOL_LESS
},
6493 { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS
},
6496 /* Test rules creation */
6497 doTestOneTestCase(privateUseCharacterStrings
, LEN(privateUseCharacterStrings
), strRules
, LEN(strRules
));
6499 /* Test collation reordering API */
6500 doTestOneReorderingAPITestCase(privateUseCharacterStrings
, LEN(privateUseCharacterStrings
), apiRules
, LEN(apiRules
));
6503 static void TestHaniReorderWithOtherRules(void)
6505 const char* strRules
[] = {
6506 "[reorder Hani] &b<a"
6508 /*const int32_t apiRules[] = {
6512 const static OneTestCase privateUseCharacterStrings
[] = {
6513 { {0x4e00}, {0x0041}, UCOL_LESS
},
6514 { {0x4e00}, {0x0060}, UCOL_GREATER
},
6515 { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS
},
6516 { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER
},
6517 { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS
},
6518 { {0xfa27}, {0x0041}, UCOL_LESS
},
6519 { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS
},
6520 { {0x0062}, {0x0061}, UCOL_LESS
},
6523 /* Test rules creation */
6524 doTestOneTestCase(privateUseCharacterStrings
, LEN(privateUseCharacterStrings
), strRules
, LEN(strRules
));
6527 static void TestMultipleReorder(void)
6529 const char* strRules
[] = {
6530 "[reorder Grek Zzzz DIGIT Latn Hani]"
6533 const int32_t apiRules
[] = {
6534 USCRIPT_GREEK
, USCRIPT_UNKNOWN
, UCOL_REORDER_CODE_DIGIT
, USCRIPT_LATIN
, USCRIPT_HAN
6537 const static OneTestCase collationTestCases
[] = {
6538 { {0x0391}, {0x0041}, UCOL_LESS
},
6539 { {0x0031}, {0x0041}, UCOL_LESS
},
6540 { {0x0041}, {0x4e00}, UCOL_LESS
},
6543 /* Test rules creation */
6544 doTestOneTestCase(collationTestCases
, LEN(collationTestCases
), strRules
, LEN(strRules
));
6546 /* Test collation reordering API */
6547 doTestOneReorderingAPITestCase(collationTestCases
, LEN(collationTestCases
), apiRules
, LEN(apiRules
));
6551 * Test that covers issue reported in ticket 8814
6553 static void TestReorderWithNumericCollation(void)
6555 UErrorCode status
= U_ZERO_ERROR
;
6556 UCollator
*myCollation
;
6557 UCollator
*myReorderCollation
;
6558 int32_t reorderCodes
[] = {UCOL_REORDER_CODE_SPACE
, UCOL_REORDER_CODE_PUNCTUATION
, UCOL_REORDER_CODE_SYMBOL
, UCOL_REORDER_CODE_DIGIT
, USCRIPT_GREEK
,USCRIPT_LATIN
, USCRIPT_HEBREW
, UCOL_REORDER_CODE_OTHERS
};
6559 /* UChar fortyS[] = { 0x0034, 0x0030, 0x0053 };
6560 UChar fortyThreeP[] = { 0x0034, 0x0033, 0x0050 }; */
6561 UChar fortyS
[] = { 0x0053 };
6562 UChar fortyThreeP
[] = { 0x0050 };
6563 uint8_t fortyS_sortKey
[128];
6564 int32_t fortyS_sortKey_Length
;
6565 uint8_t fortyThreeP_sortKey
[128];
6566 int32_t fortyThreeP_sortKey_Length
;
6567 uint8_t fortyS_sortKey_reorder
[128];
6568 int32_t fortyS_sortKey_reorder_Length
;
6569 uint8_t fortyThreeP_sortKey_reorder
[128];
6570 int32_t fortyThreeP_sortKey_reorder_Length
;
6571 UCollationResult collResult
;
6572 UCollationResult collResultReorder
;
6574 log_verbose("Testing reordering with and without numeric collation\n");
6576 /* build collator tertiary with numeric */
6577 myCollation
= ucol_open("", &status
);
6579 ucol_setStrength(myCollation, UCOL_TERTIARY);
6581 ucol_setAttribute(myCollation
, UCOL_NUMERIC_COLLATION
, UCOL_ON
, &status
);
6582 if(U_FAILURE(status
)) {
6583 log_err_status(status
, "ERROR: in creation of collator: %s\n", myErrorName(status
));
6587 /* build collator tertiary with numeric and reordering */
6588 myReorderCollation
= ucol_open("", &status
);
6590 ucol_setStrength(myReorderCollation, UCOL_TERTIARY);
6592 ucol_setAttribute(myReorderCollation
, UCOL_NUMERIC_COLLATION
, UCOL_ON
, &status
);
6593 ucol_setReorderCodes(myReorderCollation
, reorderCodes
, LEN(reorderCodes
), &status
);
6594 if(U_FAILURE(status
)) {
6595 log_err_status(status
, "ERROR: in creation of collator: %s\n", myErrorName(status
));
6599 fortyS_sortKey_Length
= ucol_getSortKey(myCollation
, fortyS
, LEN(fortyS
), fortyS_sortKey
, 128);
6600 fortyThreeP_sortKey_Length
= ucol_getSortKey(myCollation
, fortyThreeP
, LEN(fortyThreeP
), fortyThreeP_sortKey
, 128);
6601 fortyS_sortKey_reorder_Length
= ucol_getSortKey(myReorderCollation
, fortyS
, LEN(fortyS
), fortyS_sortKey_reorder
, 128);
6602 fortyThreeP_sortKey_reorder_Length
= ucol_getSortKey(myReorderCollation
, fortyThreeP
, LEN(fortyThreeP
), fortyThreeP_sortKey_reorder
, 128);
6604 if (fortyS_sortKey_Length
< 0 || fortyThreeP_sortKey_Length
< 0 || fortyS_sortKey_reorder_Length
< 0 || fortyThreeP_sortKey_reorder_Length
< 0) {
6605 log_err_status(status
, "ERROR: couldn't generate sort keys\n");
6608 collResult
= ucol_strcoll(myCollation
, fortyS
, LEN(fortyS
), fortyThreeP
, LEN(fortyThreeP
));
6609 collResultReorder
= ucol_strcoll(myReorderCollation
, fortyS
, LEN(fortyS
), fortyThreeP
, LEN(fortyThreeP
));
6611 fprintf(stderr, "\tcollResult = %x\n", collResult);
6612 fprintf(stderr, "\tcollResultReorder = %x\n", collResultReorder);
6613 fprintf(stderr, "\nfortyS\n");
6614 for (i = 0; i < fortyS_sortKey_Length; i++) {
6615 fprintf(stderr, "%x --- %x\n", fortyS_sortKey[i], fortyS_sortKey_reorder[i]);
6617 fprintf(stderr, "\nfortyThreeP\n");
6618 for (i = 0; i < fortyThreeP_sortKey_Length; i++) {
6619 fprintf(stderr, "%x --- %x\n", fortyThreeP_sortKey[i], fortyThreeP_sortKey_reorder[i]);
6622 if (collResult
!= collResultReorder
) {
6623 log_err_status(status
, "ERROR: collation results should have been the same.\n");
6627 ucol_close(myCollation
);
6628 ucol_close(myReorderCollation
);
6631 static int compare_uint8_t_arrays(const uint8_t* a
, const uint8_t* b
)
6633 for (; *a
== *b
; ++a
, ++b
) {
6638 return (*a
< *b
? -1 : 1);
6641 static void TestImportRulesDeWithPhonebook(void)
6643 const char* normalRules
[] = {
6644 "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc",
6645 "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc",
6646 "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc",
6648 const OneTestCase normalTests
[] = {
6649 { {0x00e6}, {0x00c6}, UCOL_LESS
},
6650 { {0x00fc}, {0x00dc}, UCOL_GREATER
},
6653 const char* importRules
[] = {
6654 "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc[import de-u-co-phonebk]",
6655 "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
6656 "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
6658 const OneTestCase importTests
[] = {
6659 { {0x00e6}, {0x00c6}, UCOL_LESS
},
6660 { {0x00fc}, {0x00dc}, UCOL_LESS
},
6663 doTestOneTestCase(normalTests
, LEN(normalTests
), normalRules
, LEN(normalRules
));
6664 doTestOneTestCase(importTests
, LEN(importTests
), importRules
, LEN(importRules
));
6668 static void TestImportRulesFiWithEor(void)
6671 const char* defaultRules
[] = {
6672 "&a<b", /* Dummy rule. */
6675 const OneTestCase defaultTests
[] = {
6676 { {0x0110}, {0x00F0}, UCOL_LESS
},
6677 { {0x00a3}, {0x00a5}, UCOL_LESS
},
6678 { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS
},
6681 /* European Ordering rules: ignore currency characters. */
6682 const char* eorRules
[] = {
6683 "[import root-u-co-eor]",
6686 const OneTestCase eorTests
[] = {
6687 { {0x0110}, {0x00F0}, UCOL_LESS
},
6688 { {0x00a3}, {0x00a5}, UCOL_EQUAL
},
6689 { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL
},
6692 const char* fiStdRules
[] = {
6693 "[import fi-u-co-standard]",
6696 const OneTestCase fiStdTests
[] = {
6697 { {0x0110}, {0x00F0}, UCOL_GREATER
},
6698 { {0x00a3}, {0x00a5}, UCOL_LESS
},
6699 { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS
},
6702 /* Both European Ordering Rules and Fi Standard Rules. */
6703 const char* eorFiStdRules
[] = {
6704 "[import root-u-co-eor][import fi-u-co-standard]",
6707 /* This is essentially same as the one before once fi.txt is updated with import. */
6708 const char* fiEorRules
[] = {
6709 "[import fi-u-co-eor]",
6712 const OneTestCase fiEorTests
[] = {
6713 { {0x0110}, {0x00F0}, UCOL_GREATER
},
6714 { {0x00a3}, {0x00a5}, UCOL_EQUAL
},
6715 { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL
},
6718 doTestOneTestCase(defaultTests
, LEN(defaultTests
), defaultRules
, LEN(defaultRules
));
6719 doTestOneTestCase(eorTests
, LEN(eorTests
), eorRules
, LEN(eorRules
));
6720 doTestOneTestCase(fiStdTests
, LEN(fiStdTests
), fiStdRules
, LEN(fiStdRules
));
6721 doTestOneTestCase(fiEorTests
, LEN(fiEorTests
), eorFiStdRules
, LEN(eorFiStdRules
));
6723 /* TODO: Fix ICU ticket #8962 by uncommenting the following test after fi.txt is updated with the following rule:
6726 "[import root-u-co-eor][import fi-u-co-standard]"
6731 /* doTestOneTestCase(fiEorTests, LEN(fiEorTests), fiEorRules, LEN(fiEorRules)); */
6738 * This test case tests inclusion with the unihan rules, but this cannot be included now, unless
6739 * the resource files are built with -includeUnihanColl option.
6740 * TODO: Uncomment this function and make it work when unihan rules are built by default.
6742 static void TestImportRulesCJKWithUnihan(void)
6745 const char* defaultRules
[] = {
6746 "&a<b", /* Dummy rule. */
6749 const OneTestCase defaultTests
[] = {
6750 { {0x3402}, {0x4e1e}, UCOL_GREATER
},
6753 /* European Ordering rules: ignore currency characters. */
6754 const char* unihanRules
[] = {
6755 "[import ko-u-co-unihan]",
6758 const OneTestCase unihanTests
[] = {
6759 { {0x3402}, {0x4e1e}, UCOL_LESS
},
6762 doTestOneTestCase(defaultTests
, LEN(defaultTests
), defaultRules
, LEN(defaultRules
));
6763 doTestOneTestCase(unihanTests
, LEN(unihanTests
), unihanRules
, LEN(unihanRules
));
6768 static void TestImport(void)
6772 UCollator
* viescoll
;
6773 UCollator
* importviescoll
;
6775 UErrorCode status
= U_ZERO_ERROR
;
6777 int32_t viruleslength
;
6779 int32_t esruleslength
;
6781 int32_t viesruleslength
;
6782 char srules
[500] = "[import vi][import es]";
6784 uint32_t length
= 0;
6797 USet
* importTailoredSet
;
6800 vicoll
= ucol_open("vi", &status
);
6801 if(U_FAILURE(status
)){
6802 log_err_status(status
, "ERROR: Call ucol_open(\"vi\", ...): %s\n", myErrorName(status
));
6806 virules
= (UChar
*) ucol_getRules(vicoll
, &viruleslength
);
6807 escoll
= ucol_open("es", &status
);
6808 esrules
= (UChar
*) ucol_getRules(escoll
, &esruleslength
);
6809 viesrules
= (UChar
*)uprv_malloc((viruleslength
+esruleslength
+1)*sizeof(UChar
*));
6811 u_strcat(viesrules
, virules
);
6812 u_strcat(viesrules
, esrules
);
6813 viesruleslength
= viruleslength
+ esruleslength
;
6814 viescoll
= ucol_openRules(viesrules
, viesruleslength
, UCOL_ON
, UCOL_TERTIARY
, &error
, &status
);
6816 /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
6817 length
= u_unescape(srules
, rules
, 500);
6818 importviescoll
= ucol_openRules(rules
, length
, UCOL_ON
, UCOL_TERTIARY
, &error
, &status
);
6819 if(U_FAILURE(status
)){
6820 log_err_status(status
, "ERROR: in creation of rule based collator: %s\n", myErrorName(status
));
6824 tailoredSet
= ucol_getTailoredSet(viescoll
, &status
);
6825 importTailoredSet
= ucol_getTailoredSet(importviescoll
, &status
);
6827 if(!uset_equals(tailoredSet
, importTailoredSet
)){
6828 log_err("Tailored sets not equal");
6831 uset_close(importTailoredSet
);
6833 itemCount
= uset_getItemCount(tailoredSet
);
6835 for( i
= 0; i
< itemCount
; i
++){
6836 strLength
= uset_getItem(tailoredSet
, i
, &start
, &end
, str
, 500, &status
);
6838 for (; start
<= end
; start
++){
6840 U16_APPEND(str
, k
, 500, start
, b
);
6841 ucol_getSortKey(viescoll
, str
, 1, sk1
, 500);
6842 ucol_getSortKey(importviescoll
, str
, 1, sk2
, 500);
6843 if(compare_uint8_t_arrays(sk1
, sk2
) != 0){
6844 log_err("Sort key for %s not equal\n", str
);
6849 ucol_getSortKey(viescoll
, str
, strLength
, sk1
, 500);
6850 ucol_getSortKey(importviescoll
, str
, strLength
, sk2
, 500);
6851 if(compare_uint8_t_arrays(sk1
, sk2
) != 0){
6852 log_err("ZZSort key for %s not equal\n", str
);
6859 uset_close(tailoredSet
);
6861 uprv_free(viesrules
);
6865 ucol_close(viescoll
);
6866 ucol_close(importviescoll
);
6869 static void TestImportWithType(void)
6873 UCollator
* videcoll
;
6874 UCollator
* importvidecoll
;
6876 UErrorCode status
= U_ZERO_ERROR
;
6877 const UChar
* virules
;
6878 int32_t viruleslength
;
6879 const UChar
* derules
;
6880 int32_t deruleslength
;
6882 int32_t videruleslength
;
6883 const char srules
[500] = "[import vi][import de-u-co-phonebk]";
6885 uint32_t length
= 0;
6897 USet
* importTailoredSet
;
6899 vicoll
= ucol_open("vi", &status
);
6900 if(U_FAILURE(status
)){
6901 log_err_status(status
, "ERROR: in creation of rule based collator: %s\n", myErrorName(status
));
6904 virules
= ucol_getRules(vicoll
, &viruleslength
);
6905 /* decoll = ucol_open("de@collation=phonebook", &status); */
6906 decoll
= ucol_open("de-u-co-phonebk", &status
);
6907 if(U_FAILURE(status
)){
6908 log_err_status(status
, "ERROR: in creation of rule based collator: %s\n", myErrorName(status
));
6913 derules
= ucol_getRules(decoll
, &deruleslength
);
6914 viderules
= (UChar
*)uprv_malloc((viruleslength
+deruleslength
+1)*sizeof(UChar
*));
6916 u_strcat(viderules
, virules
);
6917 u_strcat(viderules
, derules
);
6918 videruleslength
= viruleslength
+ deruleslength
;
6919 videcoll
= ucol_openRules(viderules
, videruleslength
, UCOL_ON
, UCOL_TERTIARY
, &error
, &status
);
6921 /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
6922 length
= u_unescape(srules
, rules
, 500);
6923 importvidecoll
= ucol_openRules(rules
, length
, UCOL_ON
, UCOL_TERTIARY
, &error
, &status
);
6924 if(U_FAILURE(status
)){
6925 log_err_status(status
, "ERROR: in creation of rule based collator: %s\n", myErrorName(status
));
6929 tailoredSet
= ucol_getTailoredSet(videcoll
, &status
);
6930 importTailoredSet
= ucol_getTailoredSet(importvidecoll
, &status
);
6932 if(!uset_equals(tailoredSet
, importTailoredSet
)){
6933 log_err("Tailored sets not equal");
6936 uset_close(importTailoredSet
);
6938 itemCount
= uset_getItemCount(tailoredSet
);
6940 for( i
= 0; i
< itemCount
; i
++){
6941 strLength
= uset_getItem(tailoredSet
, i
, &start
, &end
, str
, 500, &status
);
6943 for (; start
<= end
; start
++){
6945 U16_APPEND_UNSAFE(str
, k
, start
);
6946 ucol_getSortKey(videcoll
, str
, 1, sk1
, 500);
6947 ucol_getSortKey(importvidecoll
, str
, 1, sk2
, 500);
6948 if(compare_uint8_t_arrays(sk1
, sk2
) != 0){
6949 log_err("Sort key for %s not equal\n", str
);
6954 ucol_getSortKey(videcoll
, str
, strLength
, sk1
, 500);
6955 ucol_getSortKey(importvidecoll
, str
, strLength
, sk2
, 500);
6956 if(compare_uint8_t_arrays(sk1
, sk2
) != 0){
6957 log_err("Sort key for %s not equal\n", str
);
6964 uset_close(tailoredSet
);
6966 uprv_free(viderules
);
6968 ucol_close(videcoll
);
6969 ucol_close(importvidecoll
);
6974 /* 'IV INTERNATIONAL SCIENTIFIC - PRACTICAL CONFERENCE "GEOPOLITICS, GEOECONOMICS AND INTERNATIONAL RELATIONS PROBLEMS" 22-23 June 2010, St. Petersburg, Russia' */
6975 static const UChar longUpperStr1
[]= { /* 155 chars */
6976 0x49, 0x56, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C,
6977 0x20, 0x53, 0x43, 0x49, 0x45, 0x4E, 0x54, 0x49, 0x46, 0x49, 0x43, 0x20, 0x2D, 0x20, 0x50, 0x52,
6978 0x41, 0x43, 0x54, 0x49, 0x43, 0x41, 0x4C, 0x20, 0x43, 0x4F, 0x4E, 0x46, 0x45, 0x52, 0x45, 0x4E,
6979 0x43, 0x45, 0x20, 0x22, 0x47, 0x45, 0x4F, 0x50, 0x4F, 0x4C, 0x49, 0x54, 0x49, 0x43, 0x53, 0x2C,
6980 0x20, 0x47, 0x45, 0x4F, 0x45, 0x43, 0x4F, 0x4E, 0x4F, 0x4D, 0x49, 0x43, 0x53, 0x20, 0x41, 0x4E,
6981 0x44, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C, 0x20,
6982 0x52, 0x45, 0x4C, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x53, 0x20, 0x50, 0x52, 0x4F, 0x42, 0x4C, 0x45,
6983 0x4D, 0x53, 0x22, 0x20, 0x32, 0x32, 0x2D, 0x32, 0x33, 0x20, 0x4A, 0x75, 0x6E, 0x65, 0x20, 0x32,
6984 0x30, 0x31, 0x30, 0x2C, 0x20, 0x53, 0x74, 0x2E, 0x20, 0x50, 0x65, 0x74, 0x65, 0x72, 0x73, 0x62,
6985 0x75, 0x72, 0x67, 0x2C, 0x20, 0x52, 0x75, 0x73, 0x73, 0x69, 0x61
6988 /* 'BACEDIFOGUHAJEKILOMUNAPE ' with diacritics on vowels, repeated 5 times */
6989 static const UChar longUpperStr2
[]= { /* 125 chars, > 128 collation elements */
6990 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
6991 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
6992 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
6993 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
6994 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20
6997 /* 'ABCDEFGHIJKLMNOPQRSTUVWXYZ ' repeated 12 times */
6998 static const UChar longUpperStr3
[]= { /* 324 chars */
6999 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
7000 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
7001 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
7002 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
7003 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
7004 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
7005 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
7006 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
7007 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
7008 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
7009 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
7010 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20
7013 #define MY_ARRAY_LEN(array) (sizeof(array)/sizeof(array[0]))
7016 const UChar
* longUpperStrPtr
;
7017 int32_t longUpperStrLen
;
7020 /* String pointers must be in reverse collation order of the corresponding strings */
7021 static const LongUpperStrItem longUpperStrItems
[] = {
7022 { longUpperStr1
, MY_ARRAY_LEN(longUpperStr1
) },
7023 { longUpperStr2
, MY_ARRAY_LEN(longUpperStr2
) },
7024 { longUpperStr3
, MY_ARRAY_LEN(longUpperStr3
) },
7028 enum { kCollKeyLenMax
= 800 }; /* longest expected is 749, but may change with collation changes */
7030 /* Text fix for #8445; without fix, could have crash due to stack or heap corruption */
7031 static void TestCaseLevelBufferOverflow(void)
7033 UErrorCode status
= U_ZERO_ERROR
;
7034 UCollator
* ucol
= ucol_open("root", &status
);
7035 if ( U_SUCCESS(status
) ) {
7036 ucol_setAttribute(ucol
, UCOL_CASE_LEVEL
, UCOL_ON
, &status
);
7037 if ( U_SUCCESS(status
) ) {
7038 const LongUpperStrItem
* itemPtr
;
7039 uint8_t sortKeyA
[kCollKeyLenMax
], sortKeyB
[kCollKeyLenMax
];
7040 for ( itemPtr
= longUpperStrItems
; itemPtr
->longUpperStrPtr
!= NULL
; itemPtr
++ ) {
7042 if (itemPtr
> longUpperStrItems
) {
7043 uprv_strcpy((char *)sortKeyB
, (char *)sortKeyA
);
7045 sortKeyLen
= ucol_getSortKey(ucol
, itemPtr
->longUpperStrPtr
, itemPtr
->longUpperStrLen
, sortKeyA
, kCollKeyLenMax
);
7046 if (sortKeyLen
<= 0 || sortKeyLen
> kCollKeyLenMax
) {
7047 log_err("ERROR sort key length from ucol_getSortKey is %d\n", sortKeyLen
);
7050 if ( itemPtr
> longUpperStrItems
) {
7051 int compareResult
= uprv_strcmp((char *)sortKeyA
, (char *)sortKeyB
);
7052 if (compareResult
>= 0) {
7053 log_err("ERROR in sort key comparison result, expected -1, got %d\n", compareResult
);
7058 log_err_status(status
, "ERROR in ucol_setAttribute UCOL_CASE_LEVEL on: %s\n", myErrorName(status
));
7062 log_err_status(status
, "ERROR in ucol_open for root: %s\n", myErrorName(status
));
7067 #define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x)
7069 void addMiscCollTest(TestNode
** root
)
7071 TEST(TestRuleOptions
);
7072 TEST(TestBeforePrefixFailure
);
7073 TEST(TestContractionClosure
);
7074 TEST(TestPrefixCompose
);
7075 TEST(TestStrCollIdenticalPrefix
);
7077 TEST(TestNewJapanese
);
7078 /*TEST(TestLimitations);*/
7080 TEST(TestExtremeCompression
);
7081 TEST(TestSurrogates
);
7082 TEST(TestVariableTopSetting
);
7083 TEST(TestBocsuCoverage
);
7084 TEST(TestCyrillicTailoring
);
7086 TEST(IncompleteCntTest
);
7087 TEST(BlackBirdTest
);
7089 TEST(BillFairmanTest
);
7090 TEST(RamsRulesTest
);
7091 TEST(IsTailoredTest
);
7092 TEST(TestCollations
);
7094 TEST(TestImplicitTailoring
);
7095 TEST(TestFCDProblem
);
7096 TEST(TestEmptyRule
);
7097 /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */
7099 /*TEST(TestJ831);*/ /* we changed lv locale */
7101 TEST(TestRedundantRules
);
7102 TEST(TestExpansionSyntax
);
7103 TEST(TestHangulTailoring
);
7105 TEST(TestIncrementalNormalize
);
7106 TEST(TestComposeDecompose
);
7107 TEST(TestCompressOverlap
);
7108 TEST(TestContraction
);
7109 TEST(TestExpansion
);
7110 /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys */
7111 /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported */
7113 TEST(TestSuppressContractions
);
7115 TEST(TestHebrewUCA
);
7116 TEST(TestPartialSortKeyTermination
);
7121 TEST(TestNumericCollation
);
7122 TEST(TestTibetanConformance
);
7123 TEST(TestPinyinProblem
);
7124 TEST(TestImplicitGeneration
);
7125 TEST(TestSeparateTrees
);
7126 TEST(TestBeforePinyin
);
7127 TEST(TestBeforeTightening
);
7128 /*TEST(TestMoreBefore);*/
7129 TEST(TestTailorNULL
);
7130 TEST(TestUpperFirstQuaternary
);
7136 TEST(TestSortKeyConsistency
);
7137 TEST(TestVI5913
); /* VI, RO tailored rules */
7138 TEST(TestCroatianSortKey
);
7139 TEST(TestTailor6179
);
7140 TEST(TestUCAPrecontext
);
7141 TEST(TestOutOfBuffer5468
);
7142 TEST(TestSameStrengthList
);
7144 TEST(TestSameStrengthListQuoted
);
7145 TEST(TestSameStrengthListSupplemental
);
7146 TEST(TestSameStrengthListQwerty
);
7147 TEST(TestSameStrengthListQuotedQwerty
);
7148 TEST(TestSameStrengthListRanges
);
7149 TEST(TestSameStrengthListSupplementalRanges
);
7150 TEST(TestSpecialCharacters
);
7151 TEST(TestPrivateUseCharacters
);
7152 TEST(TestPrivateUseCharactersInList
);
7153 TEST(TestPrivateUseCharactersInRange
);
7154 TEST(TestInvalidListsAndRanges
);
7155 TEST(TestImportRulesDeWithPhonebook
);
7156 /* TEST(TestImportRulesFiWithEor); EOR rules removed from CLDR 21 */
7157 /* TEST(TestImportRulesCJKWithUnihan); */
7159 TEST(TestImportWithType
);
7161 TEST(TestBeforeRuleWithScriptReordering
);
7162 TEST(TestNonLeadBytesDuringCollationReordering
);
7163 TEST(TestReorderingAPI
);
7164 TEST(TestReorderingAPIWithRuleCreatedCollator
);
7165 TEST(TestEquivalentReorderingScripts
);
7166 TEST(TestGreekFirstReorder
);
7167 TEST(TestGreekLastReorder
);
7168 TEST(TestNonScriptReorder
);
7169 TEST(TestHaniReorder
);
7170 TEST(TestHaniReorderWithOtherRules
);
7171 TEST(TestMultipleReorder
);
7172 TEST(TestReorderingAcrossCloning
);
7173 TEST(TestReorderWithNumericCollation
);
7175 TEST(TestCaseLevelBufferOverflow
);
7178 #endif /* #if !UCONFIG_NO_COLLATION */