1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
5 * Copyright (c) 2001-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8 /*******************************************************************************
12 *******************************************************************************/
14 * These are the tests specific to ICU 1.8 and above, that I didn't know where
20 #include "unicode/utypes.h"
22 #if !UCONFIG_NO_COLLATION
24 #include "unicode/ucol.h"
25 #include "unicode/ucoleitr.h"
26 #include "unicode/uloc.h"
30 #include "unicode/ustring.h"
36 #include "unicode/parseerr.h"
37 #include "unicode/ucnv.h"
38 #include "unicode/ures.h"
39 #include "unicode/uscript.h"
40 #include "unicode/utf16.h"
45 #define MAX_TOKEN_LEN 16
47 typedef UCollationResult
tst_strcoll(void *collator
, const int object
,
48 const UChar
*source
, const int sLen
,
49 const UChar
*target
, const int tLen
);
53 const static char cnt1
[][10] = {
68 const static char cnt2
[][10] = {
80 static void IncompleteCntTest(void)
82 UErrorCode status
= U_ZERO_ERROR
;
87 UCollator
*coll
= NULL
;
88 uint32_t i
= 0, j
= 0;
91 u_uastrcpy(temp
, " & Z < ABC < Q < B");
93 coll
= ucol_openRules(temp
, u_strlen(temp
), UCOL_OFF
, UCOL_DEFAULT_STRENGTH
, NULL
,&status
);
95 if(U_SUCCESS(status
)) {
96 size
= UPRV_LENGTHOF(cnt1
);
97 for(i
= 0; i
< size
-1; i
++) {
98 for(j
= i
+1; j
< size
; j
++) {
99 UCollationElements
*iter
;
100 u_uastrcpy(t1
, cnt1
[i
]);
101 u_uastrcpy(t2
, cnt1
[j
]);
102 doTest(coll
, t1
, t2
, UCOL_LESS
);
103 /* synwee : added collation element iterator test */
104 iter
= ucol_openElements(coll
, t2
, u_strlen(t2
), &status
);
105 if (U_FAILURE(status
)) {
106 log_err("Creation of iterator failed\n");
110 ucol_closeElements(iter
);
118 u_uastrcpy(temp
, " & Z < DAVIS < MARK <DAV");
119 coll
= ucol_openRules(temp
, u_strlen(temp
), UCOL_OFF
, UCOL_DEFAULT_STRENGTH
,NULL
, &status
);
121 if(U_SUCCESS(status
)) {
122 size
= UPRV_LENGTHOF(cnt2
);
123 for(i
= 0; i
< size
-1; i
++) {
124 for(j
= i
+1; j
< size
; j
++) {
125 UCollationElements
*iter
;
126 u_uastrcpy(t1
, cnt2
[i
]);
127 u_uastrcpy(t2
, cnt2
[j
]);
128 doTest(coll
, t1
, t2
, UCOL_LESS
);
130 /* synwee : added collation element iterator test */
131 iter
= ucol_openElements(coll
, t2
, u_strlen(t2
), &status
);
132 if (U_FAILURE(status
)) {
133 log_err("Creation of iterator failed\n");
137 ucol_closeElements(iter
);
147 const static char shifted
[][20] = {
159 const static UCollationResult shiftedTert
[] = {
171 const static char nonignorable
[][20] = {
183 static void BlackBirdTest(void) {
184 UErrorCode status
= U_ZERO_ERROR
;
188 uint32_t i
= 0, j
= 0;
190 UCollator
*coll
= ucol_open("en_US", &status
);
192 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_OFF
, &status
);
193 ucol_setAttribute(coll
, UCOL_ALTERNATE_HANDLING
, UCOL_NON_IGNORABLE
, &status
);
195 if(U_SUCCESS(status
)) {
196 size
= UPRV_LENGTHOF(nonignorable
);
197 for(i
= 0; i
< size
-1; i
++) {
198 for(j
= i
+1; j
< size
; j
++) {
199 u_uastrcpy(t1
, nonignorable
[i
]);
200 u_uastrcpy(t2
, nonignorable
[j
]);
201 doTest(coll
, t1
, t2
, UCOL_LESS
);
206 ucol_setAttribute(coll
, UCOL_ALTERNATE_HANDLING
, UCOL_SHIFTED
, &status
);
207 ucol_setAttribute(coll
, UCOL_STRENGTH
, UCOL_QUATERNARY
, &status
);
209 if(U_SUCCESS(status
)) {
210 size
= UPRV_LENGTHOF(shifted
);
211 for(i
= 0; i
< size
-1; i
++) {
212 for(j
= i
+1; j
< size
; j
++) {
213 u_uastrcpy(t1
, shifted
[i
]);
214 u_uastrcpy(t2
, shifted
[j
]);
215 doTest(coll
, t1
, t2
, UCOL_LESS
);
220 ucol_setAttribute(coll
, UCOL_STRENGTH
, UCOL_TERTIARY
, &status
);
221 if(U_SUCCESS(status
)) {
222 size
= UPRV_LENGTHOF(shifted
);
223 for(i
= 1; i
< size
; i
++) {
224 u_uastrcpy(t1
, shifted
[i
-1]);
225 u_uastrcpy(t2
, shifted
[i
]);
226 doTest(coll
, t1
, t2
, shiftedTert
[i
]);
233 const static UChar testSourceCases
[][MAX_TOKEN_LEN
] = {
234 {0x0041/*'A'*/, 0x0300, 0x0301, 0x0000},
235 {0x0041/*'A'*/, 0x0300, 0x0316, 0x0000},
236 {0x0041/*'A'*/, 0x0300, 0x0000},
237 {0x00C0, 0x0301, 0x0000},
238 /* this would work with forced normalization */
239 {0x00C0, 0x0316, 0x0000}
242 const static UChar testTargetCases
[][MAX_TOKEN_LEN
] = {
243 {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
244 {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000},
246 {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
247 /* this would work with forced normalization */
248 {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000}
251 const static UCollationResult results
[] = {
259 static void FunkyATest(void)
263 UErrorCode status
= U_ZERO_ERROR
;
264 UCollator
*myCollation
;
265 myCollation
= ucol_open("en_US", &status
);
266 if(U_FAILURE(status
)){
267 log_err_status(status
, "ERROR: in creation of rule based collator: %s\n", myErrorName(status
));
270 log_verbose("Testing some A letters, for some reason\n");
271 ucol_setAttribute(myCollation
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
272 ucol_setStrength(myCollation
, UCOL_TERTIARY
);
273 for (i
= 0; i
< 4 ; i
++)
275 doTest(myCollation
, testSourceCases
[i
], testTargetCases
[i
], results
[i
]);
277 ucol_close(myCollation
);
280 UColAttributeValue caseFirst
[] = {
287 UColAttributeValue alternateHandling
[] = {
292 UColAttributeValue caseLevel
[] = {
297 UColAttributeValue strengths
[] = {
306 static const char * strengthsC
[] = {
314 static const char * caseFirstC
[] = {
321 static const char * alternateHandlingC
[] = {
322 "UCOL_NON_IGNORABLE",
326 static const char * caseLevelC
[] = {
331 /* not used currently - does not test only prints */
332 static void PrintMarkDavis(void)
334 UErrorCode status
= U_ZERO_ERROR
;
336 uint8_t sortkey
[256];
337 UCollator
*coll
= ucol_open("en_US", &status
);
338 uint32_t h
,i
,j
,k
, sortkeysize
;
343 log_verbose("PrintMarkDavis");
345 u_uastrcpy(m
, "Mark Davis");
351 for(i
= 0; i
<sizem
; i
++) {
352 fprintf(stderr
, "\\u%04X ", m
[i
]);
354 fprintf(stderr
, "\n");
356 for(h
= 0; h
<UPRV_LENGTHOF(caseFirst
); h
++) {
357 ucol_setAttribute(coll
, UCOL_CASE_FIRST
, caseFirst
[i
], &status
);
358 fprintf(stderr
, "caseFirst: %s\n", caseFirstC
[h
]);
360 for(i
= 0; i
<UPRV_LENGTHOF(alternateHandling
); i
++) {
361 ucol_setAttribute(coll
, UCOL_ALTERNATE_HANDLING
, alternateHandling
[i
], &status
);
362 fprintf(stderr
, " AltHandling: %s\n", alternateHandlingC
[i
]);
364 for(j
= 0; j
<UPRV_LENGTHOF(caseLevel
); j
++) {
365 ucol_setAttribute(coll
, UCOL_CASE_LEVEL
, caseLevel
[j
], &status
);
366 fprintf(stderr
, " caseLevel: %s\n", caseLevelC
[j
]);
368 for(k
= 0; k
<UPRV_LENGTHOF(strengths
); k
++) {
369 ucol_setAttribute(coll
, UCOL_STRENGTH
, strengths
[k
], &status
);
370 sortkeysize
= ucol_getSortKey(coll
, m
, sizem
, sortkey
, 256);
371 fprintf(stderr
, " strength: %s\n Sortkey: ", strengthsC
[k
]);
372 fprintf(stderr
, "%s\n", ucol_sortKeyToString(coll
, sortkey
, buffer
, &len
));
383 static void BillFairmanTest(void) {
385 ** check for actual locale via ICU resource bundles
387 ** lp points to the original locale ("fr_FR_....")
390 UResourceBundle
*lr
,*cr
;
391 UErrorCode lec
= U_ZERO_ERROR
;
392 const char *lp
= "fr_FR_you_ll_never_find_this_locale";
394 log_verbose("BillFairmanTest\n");
396 lr
= ures_open(NULL
,lp
,&lec
);
398 cr
= ures_getByKey(lr
,"collations",0,&lec
);
400 lp
= ures_getLocaleByType(cr
, ULOC_ACTUAL_LOCALE
, &lec
);
402 if (U_SUCCESS(lec
)) {
403 if(strcmp(lp
, "fr") != 0) {
404 log_err("Wrong locale for French Collation Data, expected \"fr\" got %s", lp
);
414 const static char chTest
[][20] = {
417 "ca", "cb", "cx", "cy", "CZ",
418 "c\\u030C", "C\\u030C",
421 "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY",
422 "ch", "cH", "Ch", "CH",
423 "cha", "charly", "che", "chh", "chch", "chr",
426 "r\\u030C", "R\\u030C",
429 "s\\u030C", "S\\u030C",
431 "z\\u030C", "Z\\u030C"
434 static void TestChMove(void) {
438 uint32_t i
= 0, j
= 0;
440 UErrorCode status
= U_ZERO_ERROR
;
442 UCollator
*coll
= ucol_open("cs", &status
);
444 if(U_SUCCESS(status
)) {
445 size
= UPRV_LENGTHOF(chTest
);
446 for(i
= 0; i
< size
-1; i
++) {
447 for(j
= i
+1; j
< size
; j
++) {
448 u_unescape(chTest
[i
], t1
, 256);
449 u_unescape(chTest
[j
], t2
, 256);
450 doTest(coll
, t1
, t2
, UCOL_LESS
);
455 log_data_err("Can't open collator");
464 const static char impTest[][20] = {
475 static void TestImplicitTailoring(void) {
476 static const struct {
478 const char *data
[10];
482 /* Tailor b and c before U+4E00. */
483 "&[before 1]\\u4e00 < b < c "
484 /* Now, before U+4E00 is c; put d and e after that. */
485 "&[before 1]\\u4e00 < d < e",
486 { "b", "c", "d", "e", "\\u4e00"}, 5 },
487 { "&\\u4e00 < a <<< A < b <<< B", { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 },
488 { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3},
489 { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3}
494 for(i
= 0; i
< UPRV_LENGTHOF(tests
); i
++) {
495 genericRulesStarter(tests
[i
].rules
, tests
[i
].data
, tests
[i
].len
);
502 const char *rule = "&\\u4e00 < a <<< A < b <<< B";
504 uint32_t i = 0, j = 0;
506 uint32_t ruleLen = 0;
507 UErrorCode status = U_ZERO_ERROR;
508 UCollator *coll = NULL;
509 ruleLen = u_unescape(rule, t1, 256);
511 coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
513 if(U_SUCCESS(status)) {
514 size = UPRV_LENGTHOF(impTest);
515 for(i = 0; i < size-1; i++) {
516 for(j = i+1; j < size; j++) {
517 u_unescape(impTest[i], t1, 256);
518 u_unescape(impTest[j], t2, 256);
519 doTest(coll, t1, t2, UCOL_LESS);
524 log_err("Can't open collator");
530 static void TestFCDProblem(void) {
534 const char *s1
= "\\u0430\\u0306\\u0325";
535 const char *s2
= "\\u04D1\\u0325";
537 UErrorCode status
= U_ZERO_ERROR
;
538 UCollator
*coll
= ucol_open("", &status
);
539 u_unescape(s1
, t1
, 256);
540 u_unescape(s2
, t2
, 256);
542 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_OFF
, &status
);
543 doTest(coll
, t1
, t2
, UCOL_EQUAL
);
545 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
546 doTest(coll
, t1
, t2
, UCOL_EQUAL
);
552 The largest normalization form is 18 for NFKC/NFKD, 4 for NFD and 3 for NFC
553 We're only using NFC/NFD in this test.
555 #define NORM_BUFFER_TEST_LEN 18
558 UChar NFC
[NORM_BUFFER_TEST_LEN
];
559 UChar NFD
[NORM_BUFFER_TEST_LEN
];
562 static void TestComposeDecompose(void) {
563 /* [[:NFD_Inert=false:][:NFC_Inert=false:]] */
564 static const UChar UNICODESET_STR
[] = {
565 0x5B,0x5B,0x3A,0x4E,0x46,0x44,0x5F,0x49,0x6E,0x65,0x72,0x74,0x3D,0x66,0x61,
566 0x6C,0x73,0x65,0x3A,0x5D,0x5B,0x3A,0x4E,0x46,0x43,0x5F,0x49,0x6E,0x65,0x72,
567 0x74,0x3D,0x66,0x61,0x6C,0x73,0x65,0x3A,0x5D,0x5D,0
570 int32_t i
= 0, j
= 0;
572 UErrorCode status
= U_ZERO_ERROR
;
573 const char *locName
= NULL
;
577 uint32_t noCases
= 0;
578 UCollator
*coll
= NULL
;
580 UChar comp
[NORM_BUFFER_TEST_LEN
];
582 UCollationElements
*iter
;
583 USet
*charsToTest
= uset_openPattern(UNICODESET_STR
, -1, &status
);
584 int32_t charsToTestSize
;
586 noOfLoc
= uloc_countAvailable();
588 coll
= ucol_open("", &status
);
589 if (U_FAILURE(status
)) {
590 log_data_err("Error opening collator -> %s (Are you missing data?)\n", u_errorName(status
));
591 uset_close(charsToTest
);
594 charsToTestSize
= uset_size(charsToTest
);
595 if (charsToTestSize
<= 0) {
596 log_err("Set was zero. Missing data?\n");
597 uset_close(charsToTest
);
600 t
= (tester
**)malloc(charsToTestSize
* sizeof(tester
*));
601 t
[0] = (tester
*)malloc(sizeof(tester
));
602 log_verbose("Testing UCA extensively for %d characters\n", charsToTestSize
);
604 for(u
= 0; u
< charsToTestSize
; u
++) {
605 UChar32 ch
= uset_charAt(charsToTest
, u
);
607 U16_APPEND_UNSAFE(comp
, len
, ch
);
608 nfcSize
= unorm_normalize(comp
, len
, UNORM_NFC
, 0, t
[noCases
]->NFC
, NORM_BUFFER_TEST_LEN
, &status
);
609 nfdSize
= unorm_normalize(comp
, len
, UNORM_NFD
, 0, t
[noCases
]->NFD
, NORM_BUFFER_TEST_LEN
, &status
);
611 if(nfcSize
!= nfdSize
|| (uprv_memcmp(t
[noCases
]->NFC
, t
[noCases
]->NFD
, nfcSize
* sizeof(UChar
)) != 0)
612 || (len
!= nfdSize
|| (uprv_memcmp(comp
, t
[noCases
]->NFD
, nfdSize
* sizeof(UChar
)) != 0))) {
614 if(len
!= nfdSize
|| (uprv_memcmp(comp
, t
[noCases
]->NFD
, nfdSize
* sizeof(UChar
)) != 0)) {
615 u_strncpy(t
[noCases
]->NFC
, comp
, len
);
616 t
[noCases
]->NFC
[len
] = 0;
619 t
[noCases
] = (tester
*)malloc(sizeof(tester
));
620 uprv_memset(t
[noCases
], 0, sizeof(tester
));
623 log_verbose("Testing %d/%d of possible test cases\n", noCases
, charsToTestSize
);
624 uset_close(charsToTest
);
627 for(u
=0; u
<(UChar32
)noCases
; u
++) {
628 if(!ucol_equal(coll
, t
[u
]->NFC
, -1, t
[u
]->NFD
, -1)) {
629 log_err("Failure: codePoint %05X fails TestComposeDecompose in the UCA\n", t
[u
]->u
);
630 doTest(coll
, t
[u
]->NFC
, t
[u
]->NFD
, UCOL_EQUAL
);
634 for(u = 0; u < charsToTestSize; u++) {
636 log_verbose("%08X ", u);
638 uprv_memset(t[noCases], 0, sizeof(tester));
641 U16_APPEND_UNSAFE(comp, len, u);
643 nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
644 nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
645 doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL);
646 doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL);
652 log_verbose("Testing locales, number of cases = %i\n", noCases
);
653 for(i
= 0; i
<noOfLoc
; i
++) {
654 status
= U_ZERO_ERROR
;
655 locName
= uloc_getAvailable(i
);
656 if(hasCollationElements(locName
)) {
659 int32_t nameSize
= uloc_getDisplayName(locName
, NULL
, name
, sizeof(cName
), &status
);
661 for(j
= 0; j
<nameSize
; j
++) {
662 cName
[j
] = (char)name
[j
];
665 log_verbose("\nTesting locale %s (%s)\n", locName
, cName
);
667 coll
= ucol_open(locName
, &status
);
668 ucol_setStrength(coll
, UCOL_IDENTICAL
);
669 iter
= ucol_openElements(coll
, t
[u
]->NFD
, u_strlen(t
[u
]->NFD
), &status
);
671 for(u
=0; u
<(UChar32
)noCases
; u
++) {
672 if(!ucol_equal(coll
, t
[u
]->NFC
, -1, t
[u
]->NFD
, -1)) {
673 log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t
[u
]->u
, cName
);
674 doTest(coll
, t
[u
]->NFC
, t
[u
]->NFD
, UCOL_EQUAL
);
675 log_verbose("Testing NFC\n");
676 ucol_setText(iter
, t
[u
]->NFC
, u_strlen(t
[u
]->NFC
), &status
);
678 log_verbose("Testing NFD\n");
679 ucol_setText(iter
, t
[u
]->NFD
, u_strlen(t
[u
]->NFD
), &status
);
683 ucol_closeElements(iter
);
687 for(u
= 0; u
<= (UChar32
)noCases
; u
++) {
693 static void TestEmptyRule(void) {
694 UErrorCode status
= U_ZERO_ERROR
;
695 UChar rulez
[] = { 0 };
696 UCollator
*coll
= ucol_openRules(rulez
, 0, UCOL_OFF
, UCOL_TERTIARY
,NULL
, &status
);
701 static void TestUCARules(void) {
702 UErrorCode status
= U_ZERO_ERROR
;
705 uint32_t ruleLen
= 0;
706 UCollator
*UCAfromRules
= NULL
;
707 UCollator
*coll
= ucol_open("", &status
);
708 if(status
== U_FILE_ACCESS_ERROR
) {
709 log_data_err("Is your data around?\n");
711 } else if(U_FAILURE(status
)) {
712 log_err("Error opening collator\n");
715 ruleLen
= ucol_getRulesEx(coll
, UCOL_FULL_RULES
, rules
, 256);
717 log_verbose("TestUCARules\n");
719 rules
= (UChar
*)malloc((ruleLen
+1)*sizeof(UChar
));
720 ruleLen
= ucol_getRulesEx(coll
, UCOL_FULL_RULES
, rules
, ruleLen
);
722 log_verbose("Rules length is %d\n", ruleLen
);
723 UCAfromRules
= ucol_openRules(rules
, ruleLen
, UCOL_OFF
, UCOL_TERTIARY
, NULL
,&status
);
724 if(U_SUCCESS(status
)) {
725 ucol_close(UCAfromRules
);
727 log_verbose("Unable to create a collator from UCARules!\n");
730 u_unescape(blah, b, 256);
731 ucol_getSortKey(coll, b, 1, res, 256);
740 /* Pinyin tonal order */
742 A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0)
743 (w/macron)< (w/acute)< (w/caron)< (w/grave)
744 E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8)
745 I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec)
746 O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2)
747 U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9)
748 < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) <
751 However, in testing we got the following order:
752 A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101)
753 (w/acute)< (w/grave)< (w/caron)< (w/macron)
754 E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) <
756 I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b)
757 O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d)
758 U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) <
760 < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b)
763 static void TestBefore(void) {
764 const static char *data
[] = {
765 "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A",
766 "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E",
767 "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I",
768 "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O",
769 "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U",
770 "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc"
773 "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0"
774 "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8"
775 "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec"
776 "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2"
777 "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9"
778 "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc",
779 data
, UPRV_LENGTHOF(data
));
783 /* superceded by TestBeforePinyin */
784 static void TestJ784(void) {
785 const static char *data
[] = {
786 "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0",
787 "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8",
788 "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec",
789 "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2",
790 "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9",
792 "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc"
794 genericLocaleStarter("zh", data
, UPRV_LENGTHOF(data
));
798 static void TestUpperCaseFirst(void) {
799 const static char *data
[] = {
805 genericLocaleStarter("da", data
, UPRV_LENGTHOF(data
));
808 static void TestJ815(void) {
809 const static char *data
[] = {
825 genericLocaleStarter("fr", data
, UPRV_LENGTHOF(data
));
826 genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data
, UPRV_LENGTHOF(data
));
830 static void TestCase(void)
832 const static UChar gRules
[MAX_TOKEN_LEN
] =
833 /*" & 0 < 1,\u2461<a,A"*/
834 { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x0041, 0x0000 };
836 const static UChar testCase
[][MAX_TOKEN_LEN
] =
838 /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000},
839 /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000},
840 /*2*/ {0x2460 /*circ'1'*/, 0x0061/*'a'*/, 0x0000},
841 /*3*/ {0x2460 /*circ'1'*/, 0x0041/*'A'*/, 0x0000}
844 const static UCollationResult caseTestResults
[][9] =
846 { UCOL_LESS
, UCOL_LESS
, UCOL_LESS
, UCOL_EQUAL
, UCOL_LESS
, UCOL_LESS
, UCOL_EQUAL
, UCOL_EQUAL
, UCOL_LESS
},
847 { UCOL_GREATER
, UCOL_LESS
, UCOL_LESS
, UCOL_EQUAL
, UCOL_LESS
, UCOL_LESS
, UCOL_EQUAL
, UCOL_EQUAL
, UCOL_GREATER
},
848 { UCOL_LESS
, UCOL_LESS
, UCOL_LESS
, UCOL_EQUAL
, UCOL_GREATER
, UCOL_LESS
, UCOL_EQUAL
, UCOL_EQUAL
, UCOL_LESS
},
849 { UCOL_GREATER
, UCOL_LESS
, UCOL_GREATER
, UCOL_EQUAL
, UCOL_LESS
, UCOL_LESS
, UCOL_EQUAL
, UCOL_EQUAL
, UCOL_GREATER
}
852 const static UColAttributeValue caseTestAttributes
[][2] =
854 { UCOL_LOWER_FIRST
, UCOL_OFF
},
855 { UCOL_UPPER_FIRST
, UCOL_OFF
},
856 { UCOL_LOWER_FIRST
, UCOL_ON
},
857 { UCOL_UPPER_FIRST
, UCOL_ON
}
860 UErrorCode status
= U_ZERO_ERROR
;
861 UCollationElements
*iter
;
862 UCollator
*myCollation
;
863 myCollation
= ucol_open("en_US", &status
);
865 if(U_FAILURE(status
)){
866 log_err_status(status
, "ERROR: in creation of rule based collator: %s\n", myErrorName(status
));
869 log_verbose("Testing different case settings\n");
870 ucol_setStrength(myCollation
, UCOL_TERTIARY
);
872 for(k
= 0; k
<4; k
++) {
873 ucol_setAttribute(myCollation
, UCOL_CASE_FIRST
, caseTestAttributes
[k
][0], &status
);
874 ucol_setAttribute(myCollation
, UCOL_CASE_LEVEL
, caseTestAttributes
[k
][1], &status
);
875 log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes
[k
][0], caseTestAttributes
[k
][1]);
876 for (i
= 0; i
< 3 ; i
++) {
877 for(j
= i
+1; j
<4; j
++) {
878 doTest(myCollation
, testCase
[i
], testCase
[j
], caseTestResults
[k
][3*i
+j
-1]);
882 ucol_close(myCollation
);
884 myCollation
= ucol_openRules(gRules
, u_strlen(gRules
), UCOL_OFF
, UCOL_TERTIARY
,NULL
, &status
);
885 if(U_FAILURE(status
)){
886 log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status
));
889 log_verbose("Testing different case settings with custom rules\n");
890 ucol_setStrength(myCollation
, UCOL_TERTIARY
);
892 for(k
= 0; k
<4; k
++) {
893 ucol_setAttribute(myCollation
, UCOL_CASE_FIRST
, caseTestAttributes
[k
][0], &status
);
894 ucol_setAttribute(myCollation
, UCOL_CASE_LEVEL
, caseTestAttributes
[k
][1], &status
);
895 for (i
= 0; i
< 3 ; i
++) {
896 for(j
= i
+1; j
<4; j
++) {
897 log_verbose("k:%d, i:%d, j:%d\n", k
, i
, j
);
898 doTest(myCollation
, testCase
[i
], testCase
[j
], caseTestResults
[k
][3*i
+j
-1]);
899 iter
=ucol_openElements(myCollation
, testCase
[i
], u_strlen(testCase
[i
]), &status
);
901 ucol_closeElements(iter
);
902 iter
=ucol_openElements(myCollation
, testCase
[j
], u_strlen(testCase
[j
]), &status
);
904 ucol_closeElements(iter
);
908 ucol_close(myCollation
);
910 const static char *lowerFirst
[] = {
926 const static char *upperFirst
[] = {
941 log_verbose("mixed case test\n");
942 log_verbose("lower first, case level off\n");
943 genericRulesStarter("[caseFirst lower]&H<ch<<<Ch<<<CH", lowerFirst
, UPRV_LENGTHOF(lowerFirst
));
944 log_verbose("upper first, case level off\n");
945 genericRulesStarter("[caseFirst upper]&H<ch<<<Ch<<<CH", upperFirst
, UPRV_LENGTHOF(upperFirst
));
946 log_verbose("lower first, case level on\n");
947 genericRulesStarter("[caseFirst lower][caseLevel on]&H<ch<<<Ch<<<CH", lowerFirst
, UPRV_LENGTHOF(lowerFirst
));
948 log_verbose("upper first, case level on\n");
949 genericRulesStarter("[caseFirst upper][caseLevel on]&H<ch<<<Ch<<<CH", upperFirst
, UPRV_LENGTHOF(upperFirst
));
954 static void TestIncrementalNormalize(void) {
956 /*UChar baseA =0x61;*/
958 /* UChar baseB = 0x42;*/
959 static const UChar ccMix
[] = {0x316, 0x321, 0x300};
960 /*UChar ccMix[] = {0x61, 0x61, 0x61};*/
962 0x316 is combining grave accent below, cc=220
963 0x321 is combining palatalized hook below, cc=202
964 0x300 is combining grave accent, cc=230
968 /*int maxSLen = 64000;*/
973 UErrorCode status
= U_ZERO_ERROR
;
974 UCollationResult result
;
976 int32_t myQ
= getTestOption(QUICK_OPTION
);
978 if(getTestOption(QUICK_OPTION
) < 0) {
979 setTestOption(QUICK_OPTION
, 1);
983 /* Test 1. Run very long unnormalized strings, to force overflow of*/
984 /* most buffers along the way.*/
985 UChar strA
[MAXSLEN
+1];
986 UChar strB
[MAXSLEN
+1];
988 coll
= ucol_open("en_US", &status
);
989 if(status
== U_FILE_ACCESS_ERROR
) {
990 log_data_err("Is your data around?\n");
992 } else if(U_FAILURE(status
)) {
993 log_err("Error opening collator\n");
996 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
998 /*for (sLen = 257; sLen<MAXSLEN; sLen++) {*/
999 /*for (sLen = 4; sLen<MAXSLEN; sLen++) {*/
1000 /*for (sLen = 1000; sLen<1001; sLen++) {*/
1001 for (sLen
= 500; sLen
<501; sLen
++) {
1002 /*for (sLen = 40000; sLen<65000; sLen+=1000) {*/
1005 for (i
=1; i
<=sLen
-1; i
++) {
1006 strA
[i
] = ccMix
[i
% 3];
1007 strB
[sLen
-i
] = ccMix
[i
% 3];
1012 ucol_setStrength(coll
, UCOL_TERTIARY
); /* Do test with default strength, which runs*/
1013 doTest(coll
, strA
, strB
, UCOL_EQUAL
); /* optimized functions in the impl*/
1014 ucol_setStrength(coll
, UCOL_IDENTICAL
); /* Do again with the slow, general impl.*/
1015 doTest(coll
, strA
, strB
, UCOL_EQUAL
);
1019 setTestOption(QUICK_OPTION
, myQ
);
1022 /* Test 2: Non-normal sequence in a string that extends to the last character*/
1023 /* of the string. Checks a couple of edge cases.*/
1026 static const UChar strA
[] = {0x41, 0x41, 0x300, 0x316, 0};
1027 static const UChar strB
[] = {0x41, 0xc0, 0x316, 0};
1028 ucol_setStrength(coll
, UCOL_TERTIARY
);
1029 doTest(coll
, strA
, strB
, UCOL_EQUAL
);
1032 /* Test 3: Non-normal sequence is terminated by a surrogate pair.*/
1036 * test below used a code point from Desseret, which sorts differently
1039 /*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/
1040 static const UChar strA
[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0};
1041 static const UChar strB
[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0};
1042 ucol_setStrength(coll
, UCOL_TERTIARY
);
1043 doTest(coll
, strA
, strB
, UCOL_GREATER
);
1046 /* Test 4: Imbedded nulls do not terminate a string when length is specified.*/
1049 static const UChar strA
[] = {0x41, 0x00, 0x42, 0x00};
1050 static const UChar strB
[] = {0x41, 0x00, 0x00, 0x00};
1057 /* there used to be -3 here. Hmmmm.... */
1058 /*result = ucol_strcoll(coll, strA, -3, strB, -3);*/
1059 result
= ucol_strcoll(coll
, strA
, 3, strB
, 3);
1060 if (result
!= UCOL_GREATER
) {
1061 log_err("ERROR 1 in test 4\n");
1063 result
= ucol_strcoll(coll
, strA
, -1, strB
, -1);
1064 if (result
!= UCOL_EQUAL
) {
1065 log_err("ERROR 2 in test 4\n");
1068 ucol_getSortKey(coll
, strA
, 3, (uint8_t *)sortKeyA
, sizeof(sortKeyA
));
1069 ucol_getSortKey(coll
, strA
, -1, (uint8_t *)sortKeyAz
, sizeof(sortKeyAz
));
1070 ucol_getSortKey(coll
, strB
, 3, (uint8_t *)sortKeyB
, sizeof(sortKeyB
));
1071 ucol_getSortKey(coll
, strB
, -1, (uint8_t *)sortKeyBz
, sizeof(sortKeyBz
));
1073 r
= strcmp(sortKeyA
, sortKeyAz
);
1075 log_err("Error 3 in test 4\n");
1077 r
= strcmp(sortKeyA
, sortKeyB
);
1079 log_err("Error 4 in test 4\n");
1081 r
= strcmp(sortKeyAz
, sortKeyBz
);
1083 log_err("Error 5 in test 4\n");
1086 ucol_setStrength(coll
, UCOL_IDENTICAL
);
1087 ucol_getSortKey(coll
, strA
, 3, (uint8_t *)sortKeyA
, sizeof(sortKeyA
));
1088 ucol_getSortKey(coll
, strA
, -1, (uint8_t *)sortKeyAz
, sizeof(sortKeyAz
));
1089 ucol_getSortKey(coll
, strB
, 3, (uint8_t *)sortKeyB
, sizeof(sortKeyB
));
1090 ucol_getSortKey(coll
, strB
, -1, (uint8_t *)sortKeyBz
, sizeof(sortKeyBz
));
1092 r
= strcmp(sortKeyA
, sortKeyAz
);
1094 log_err("Error 6 in test 4\n");
1096 r
= strcmp(sortKeyA
, sortKeyB
);
1098 log_err("Error 7 in test 4\n");
1100 r
= strcmp(sortKeyAz
, sortKeyBz
);
1102 log_err("Error 8 in test 4\n");
1104 ucol_setStrength(coll
, UCOL_TERTIARY
);
1108 /* Test 5: Null characters in non-normal source strings.*/
1111 static const UChar strA
[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00};
1112 static const UChar strB
[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00};
1119 result
= ucol_strcoll(coll
, strA
, 6, strB
, 6);
1120 if (result
!= UCOL_GREATER
) {
1121 log_err("ERROR 1 in test 5\n");
1123 result
= ucol_strcoll(coll
, strA
, -1, strB
, -1);
1124 if (result
!= UCOL_EQUAL
) {
1125 log_err("ERROR 2 in test 5\n");
1128 ucol_getSortKey(coll
, strA
, 6, (uint8_t *)sortKeyA
, sizeof(sortKeyA
));
1129 ucol_getSortKey(coll
, strA
, -1, (uint8_t *)sortKeyAz
, sizeof(sortKeyAz
));
1130 ucol_getSortKey(coll
, strB
, 6, (uint8_t *)sortKeyB
, sizeof(sortKeyB
));
1131 ucol_getSortKey(coll
, strB
, -1, (uint8_t *)sortKeyBz
, sizeof(sortKeyBz
));
1133 r
= strcmp(sortKeyA
, sortKeyAz
);
1135 log_err("Error 3 in test 5\n");
1137 r
= strcmp(sortKeyA
, sortKeyB
);
1139 log_err("Error 4 in test 5\n");
1141 r
= strcmp(sortKeyAz
, sortKeyBz
);
1143 log_err("Error 5 in test 5\n");
1146 ucol_setStrength(coll
, UCOL_IDENTICAL
);
1147 ucol_getSortKey(coll
, strA
, 6, (uint8_t *)sortKeyA
, sizeof(sortKeyA
));
1148 ucol_getSortKey(coll
, strA
, -1, (uint8_t *)sortKeyAz
, sizeof(sortKeyAz
));
1149 ucol_getSortKey(coll
, strB
, 6, (uint8_t *)sortKeyB
, sizeof(sortKeyB
));
1150 ucol_getSortKey(coll
, strB
, -1, (uint8_t *)sortKeyBz
, sizeof(sortKeyBz
));
1152 r
= strcmp(sortKeyA
, sortKeyAz
);
1154 log_err("Error 6 in test 5\n");
1156 r
= strcmp(sortKeyA
, sortKeyB
);
1158 log_err("Error 7 in test 5\n");
1160 r
= strcmp(sortKeyAz
, sortKeyBz
);
1162 log_err("Error 8 in test 5\n");
1164 ucol_setStrength(coll
, UCOL_TERTIARY
);
1168 /* Test 6: Null character as base of a non-normal combining sequence.*/
1171 static const UChar strA
[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00};
1172 static const UChar strB
[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00};
1174 result
= ucol_strcoll(coll
, strA
, 5, strB
, 5);
1175 if (result
!= UCOL_LESS
) {
1176 log_err("Error 1 in test 6\n");
1178 result
= ucol_strcoll(coll
, strA
, -1, strB
, -1);
1179 if (result
!= UCOL_EQUAL
) {
1180 log_err("Error 2 in test 6\n");
1190 static void TestGetCaseBit(void) {
1191 static const char *caseBitData
[] = {
1192 "a", "A", "ch", "Ch", "CH",
1193 "\\uFF9E", "\\u0009"
1196 static const uint8_t results
[] = {
1197 UCOL_LOWER_CASE
, UCOL_UPPER_CASE
, UCOL_LOWER_CASE
, UCOL_MIXED_CASE
, UCOL_UPPER_CASE
,
1198 UCOL_UPPER_CASE
, UCOL_LOWER_CASE
1201 uint32_t i
, blen
= 0;
1203 UErrorCode status
= U_ZERO_ERROR
;
1204 UCollator
*UCA
= ucol_open("", &status
);
1207 for(i
= 0; i
<UPRV_LENGTHOF(results
); i
++) {
1208 blen
= u_unescape(caseBitData
[i
], b
, 256);
1209 res
= ucol_uprv_getCaseBits(UCA
, b
, blen
, &status
);
1210 if(results
[i
] != res
) {
1211 log_err("Expected case = %02X, got %02X for %04X\n", results
[i
], res
, b
[0]);
1217 static void TestHangulTailoring(void) {
1218 static const char *koreanData
[] = {
1219 "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53ef", "\\u5475",
1220 "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\\u67b7", "\\u67ef",
1221 "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\\u8857", "\\u8888",
1222 "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5",
1223 "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\\u659D", "\\u698E",
1224 "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\\u8B0C"
1228 "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 <<< \\u53ef <<< \\u5475 "
1229 "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef "
1230 "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 "
1231 "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 "
1232 "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E "
1233 "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C";
1236 UErrorCode status
= U_ZERO_ERROR
;
1237 UChar rlz
[2048] = { 0 };
1238 uint32_t rlen
= u_unescape(rules
, rlz
, 2048);
1240 UCollator
*coll
= ucol_openRules(rlz
, rlen
, UCOL_DEFAULT
, UCOL_DEFAULT
,NULL
, &status
);
1241 if(status
== U_FILE_ACCESS_ERROR
) {
1242 log_data_err("Is your data around?\n");
1244 } else if(U_FAILURE(status
)) {
1245 log_err("Error opening collator\n");
1249 log_verbose("Using start of korean rules\n");
1251 if(U_SUCCESS(status
)) {
1252 genericOrderingTest(coll
, koreanData
, UPRV_LENGTHOF(koreanData
));
1254 log_err("Unable to open collator with rules %s\n", rules
);
1259 log_verbose("Using ko__LOTUS locale\n");
1260 genericLocaleStarter("ko__LOTUS", koreanData
, UPRV_LENGTHOF(koreanData
));
1264 * The secondary/tertiary compression middle byte
1265 * as used by the current implementation.
1266 * Subject to change as the sort key compression changes.
1267 * See class CollationKeys.
1270 SEC_COMMON_MIDDLE
= 0x25, /* range 05..45 */
1271 TER_ONLY_COMMON_MIDDLE
= 0x65 /* range 05..C5 */
1274 static void TestCompressOverlap(void) {
1277 UErrorCode status
= U_ZERO_ERROR
;
1279 uint8_t result
[500];
1284 coll
= ucol_open("", &status
);
1286 if (U_FAILURE(status
)) {
1287 log_err_status(status
, "Collator can't be created -> %s\n", u_errorName(status
));
1290 while (count
< 149) {
1291 secstr
[count
] = 0x0020; /* [06, 05, 05] */
1292 tertstr
[count
] = 0x0020;
1296 /* top down compression ----------------------------------- */
1297 secstr
[count
] = 0x0332; /* [, 87, 05] */
1298 tertstr
[count
] = 0x3000; /* [06, 05, 07] */
1300 /* no compression secstr should have 150 secondary bytes, tertstr should
1301 have 150 tertiary bytes.
1302 with correct compression, secstr should have 6 secondary
1303 bytes (149/33 rounded up + accent), tertstr should have > 2 tertiary bytes */
1304 resultlen
= ucol_getSortKey(coll
, secstr
, 150, result
, UPRV_LENGTHOF(result
));
1305 (void)resultlen
; /* Suppress set but not used warning. */
1306 tempptr
= (uint8_t *)uprv_strchr((char *)result
, 1) + 1;
1307 while (*(tempptr
+ 1) != 1) {
1308 /* the last secondary collation element is not checked since it is not
1309 part of the compression */
1310 if (*tempptr
< SEC_COMMON_MIDDLE
) {
1311 log_err("Secondary top down compression overlapped\n");
1316 /* tertiary top/bottom/common for en_US is similar to the secondary
1317 top/bottom/common */
1318 resultlen
= ucol_getSortKey(coll
, tertstr
, 150, result
, UPRV_LENGTHOF(result
));
1319 tempptr
= (uint8_t *)uprv_strrchr((char *)result
, 1) + 1;
1320 while (*(tempptr
+ 1) != 0) {
1321 /* the last secondary collation element is not checked since it is not
1322 part of the compression */
1323 if (*tempptr
< TER_ONLY_COMMON_MIDDLE
) {
1324 log_err("Tertiary top down compression overlapped\n");
1329 /* bottom up compression ------------------------------------- */
1332 resultlen
= ucol_getSortKey(coll
, secstr
, 150, result
, UPRV_LENGTHOF(result
));
1333 tempptr
= (uint8_t *)uprv_strchr((char *)result
, 1) + 1;
1334 while (*(tempptr
+ 1) != 1) {
1335 /* the last secondary collation element is not checked since it is not
1336 part of the compression */
1337 if (*tempptr
> SEC_COMMON_MIDDLE
) {
1338 log_err("Secondary bottom up compression overlapped\n");
1343 /* tertiary top/bottom/common for en_US is similar to the secondary
1344 top/bottom/common */
1345 resultlen
= ucol_getSortKey(coll
, tertstr
, 150, result
, UPRV_LENGTHOF(result
));
1346 tempptr
= (uint8_t *)uprv_strrchr((char *)result
, 1) + 1;
1347 while (*(tempptr
+ 1) != 0) {
1348 /* the last secondary collation element is not checked since it is not
1349 part of the compression */
1350 if (*tempptr
> TER_ONLY_COMMON_MIDDLE
) {
1351 log_err("Tertiary bottom up compression overlapped\n");
1359 static void TestCyrillicTailoring(void) {
1360 static const char *test
[] = {
1366 /* Russian overrides contractions, so this test is not valid anymore */
1367 /*genericLocaleStarter("ru", test, 3);*/
1369 // Most of the following are commented out because UCA 8.0
1370 // drops most of the Cyrillic contractions from the default order.
1371 // See CLDR ticket #7246 "root collation: remove Cyrillic contractions".
1373 // genericLocaleStarter("root", test, 3);
1374 // genericRulesStarter("&\\u0410 = \\u0410", test, 3);
1375 // genericRulesStarter("&Z < \\u0410", test, 3);
1376 genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test
, 3);
1377 genericRulesStarter("&Z < \\u0410 < \\u04d0", test
, 3);
1378 // genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test, 3);
1379 // genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test, 3);
1382 static void TestSuppressContractions(void) {
1384 static const char *testNoCont2
[] = {
1389 static const char *testNoCont
[] = {
1392 "\\uFF21\\u0410\\u0302"
1395 genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont
, 3);
1396 genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2
, 3);
1399 static void TestContraction(void) {
1400 const static char *testrules
[] = {
1402 "&A = A\\u0306/\\u0306",
1405 const static UChar testdata
[][2] = {
1406 {0x0041 /* 'A' */, 0x0042 /* 'B' */},
1407 {0x0041 /* 'A' */, 0x0306 /* combining breve */},
1408 {0x0063 /* 'c' */, 0x0068 /* 'h' */}
1410 const static UChar testdata2
[][2] = {
1411 {0x0063 /* 'c' */, 0x0067 /* 'g' */},
1412 {0x0063 /* 'c' */, 0x0068 /* 'h' */},
1413 {0x0063 /* 'c' */, 0x006C /* 'l' */}
1417 * These pairs of rule strings are not guaranteed to yield the very same mappings.
1418 * In fact, LDML 24 recommends an improved way of creating mappings
1419 * which always yields different mappings for such pairs. See
1420 * http://www.unicode.org/reports/tr35/tr35-33/tr35-collation.html#Orderings
1422 const static char *testrules3
[] = {
1423 "&z < xyz &xyzw << B",
1424 "&z < xyz &xyz << B / w",
1425 "&z < ch &achm << B",
1426 "&z < ch &a << B / chm",
1427 "&\\ud800\\udc00w << B",
1428 "&\\ud800\\udc00 << B / w",
1429 "&a\\ud800\\udc00m << B",
1430 "&a << B / \\ud800\\udc00m",
1434 UErrorCode status
= U_ZERO_ERROR
;
1436 UChar rule
[256] = {0};
1440 for (i
= 0; i
< UPRV_LENGTHOF(testrules
); i
++) {
1441 UCollationElements
*iter1
;
1443 log_verbose("Rule %s for testing\n", testrules
[i
]);
1444 rlen
= u_unescape(testrules
[i
], rule
, 32);
1445 coll
= ucol_openRules(rule
, rlen
, UCOL_ON
, UCOL_TERTIARY
,NULL
, &status
);
1446 if (U_FAILURE(status
)) {
1447 log_err_status(status
, "Collator creation failed %s -> %s\n", testrules
[i
], u_errorName(status
));
1450 iter1
= ucol_openElements(coll
, testdata
[i
], 2, &status
);
1451 if (U_FAILURE(status
)) {
1452 log_err("Collation iterator creation failed\n");
1456 UCollationElements
*iter2
= ucol_openElements(coll
,
1460 if (U_FAILURE(status
)) {
1461 log_err("Collation iterator creation failed\n");
1464 ce
= ucol_next(iter2
, &status
);
1465 while (ce
!= UCOL_NULLORDER
) {
1466 if (ucol_next(iter1
, &status
) != ce
) {
1467 log_err("Collation elements in contraction split does not match\n");
1470 ce
= ucol_next(iter2
, &status
);
1473 ucol_closeElements(iter2
);
1475 if (ucol_next(iter1
, &status
) != UCOL_NULLORDER
) {
1476 log_err("Collation elements not exhausted\n");
1479 ucol_closeElements(iter1
);
1483 rlen
= u_unescape("& a < b < c < ch < d & c = ch / h", rule
, 256);
1484 coll
= ucol_openRules(rule
, rlen
, UCOL_ON
, UCOL_TERTIARY
,NULL
, &status
);
1485 if (ucol_strcoll(coll
, testdata2
[0], 2, testdata2
[1], 2) != UCOL_LESS
) {
1486 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
1487 testdata2
[0][0], testdata2
[0][1], testdata2
[1][0],
1491 if (ucol_strcoll(coll
, testdata2
[1], 2, testdata2
[2], 2) != UCOL_LESS
) {
1492 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
1493 testdata2
[1][0], testdata2
[1][1], testdata2
[2][0],
1498 #if 0 /* see above */
1499 for (i
= 0; i
< UPRV_LENGTHOF(testrules3
); i
+= 2) {
1500 log_verbose("testrules3 i==%d \"%s\" vs. \"%s\"\n", i
, testrules3
[i
], testrules3
[i
+ 1]);
1503 UCollationElements
*iter1
,
1505 UChar ch
= 0x0042 /* 'B' */;
1507 rlen
= u_unescape(testrules3
[i
], rule
, 32);
1508 coll1
= ucol_openRules(rule
, rlen
, UCOL_ON
, UCOL_TERTIARY
,NULL
, &status
);
1509 rlen
= u_unescape(testrules3
[i
+ 1], rule
, 32);
1510 coll2
= ucol_openRules(rule
, rlen
, UCOL_ON
, UCOL_TERTIARY
,NULL
, &status
);
1511 if (U_FAILURE(status
)) {
1512 log_err("Collator creation failed %s\n", testrules
[i
]);
1515 iter1
= ucol_openElements(coll1
, &ch
, 1, &status
);
1516 iter2
= ucol_openElements(coll2
, &ch
, 1, &status
);
1517 if (U_FAILURE(status
)) {
1518 log_err("Collation iterator creation failed\n");
1521 ce
= ucol_next(iter1
, &status
);
1522 if (U_FAILURE(status
)) {
1523 log_err("Retrieving ces failed\n");
1526 while (ce
!= UCOL_NULLORDER
) {
1527 uint32_t ce2
= (uint32_t)ucol_next(iter2
, &status
);
1529 log_verbose("CEs match: %08x\n", ce
);
1531 log_err("CEs do not match: %08x vs. %08x\n", ce
, ce2
);
1534 ce
= ucol_next(iter1
, &status
);
1535 if (U_FAILURE(status
)) {
1536 log_err("Retrieving ces failed\n");
1540 if (ucol_next(iter2
, &status
) != UCOL_NULLORDER
) {
1541 log_err("CEs not exhausted\n");
1544 ucol_closeElements(iter1
);
1545 ucol_closeElements(iter2
);
1552 static void TestExpansion(void) {
1553 const static char *testrules
[] = {
1556 * This seems to have tested that M was not mapped to an expansion.
1557 * I believe the old builder just did that because it computed the extension CEs
1558 * at the very end, which was a bug.
1559 * Among other problems, it violated the core tailoring principle
1560 * by making an earlier rule depend on a later one.
1561 * And, of course, if M did not get an expansion, then it was primary different from K,
1562 * unlike what the rule &K<<M says.
1564 "&J << K / B & K << M",
1568 const static UChar testdata
[][3] = {
1569 {0x004A /*'J'*/, 0x0041 /*'A'*/, 0},
1570 {0x004D /*'M'*/, 0x0041 /*'A'*/, 0},
1571 {0x004B /*'K'*/, 0x0041 /*'A'*/, 0},
1572 {0x004B /*'K'*/, 0x0043 /*'C'*/, 0},
1573 {0x004A /*'J'*/, 0x0043 /*'C'*/, 0},
1574 {0x004D /*'M'*/, 0x0043 /*'C'*/, 0}
1577 UErrorCode status
= U_ZERO_ERROR
;
1579 UChar rule
[256] = {0};
1583 for (i
= 0; i
< UPRV_LENGTHOF(testrules
); i
++) {
1585 log_verbose("Rule %s for testing\n", testrules
[i
]);
1586 rlen
= u_unescape(testrules
[i
], rule
, 32);
1587 coll
= ucol_openRules(rule
, rlen
, UCOL_ON
, UCOL_TERTIARY
,NULL
, &status
);
1588 if (U_FAILURE(status
)) {
1589 log_err_status(status
, "Collator creation failed %s -> %s\n", testrules
[i
], u_errorName(status
));
1593 for (j
= 0; j
< 5; j
++) {
1594 doTest(coll
, testdata
[j
], testdata
[j
+ 1], UCOL_LESS
);
1601 /* this test tests the current limitations of the engine */
1602 /* it always fail, so it is disabled by default */
1603 static void TestLimitations(void) {
1604 /* recursive expansions */
1606 static const char *rule
= "&a=b/c&d=c/e";
1607 static const char *tlimit01
[] = {"add","b","adf"};
1608 static const char *tlimit02
[] = {"aa","b","af"};
1609 log_verbose("recursive expansions\n");
1610 genericRulesStarter(rule
, tlimit01
, UPRV_LENGTHOF(tlimit01
));
1611 genericRulesStarter(rule
, tlimit02
, UPRV_LENGTHOF(tlimit02
));
1613 /* contractions spanning expansions */
1615 static const char *rule
= "&a<<<c/e&g<<<eh";
1616 static const char *tlimit01
[] = {"ad","c","af","f","ch","h"};
1617 static const char *tlimit02
[] = {"ad","c","ch","af","f","h"};
1618 log_verbose("contractions spanning expansions\n");
1619 genericRulesStarter(rule
, tlimit01
, UPRV_LENGTHOF(tlimit01
));
1620 genericRulesStarter(rule
, tlimit02
, UPRV_LENGTHOF(tlimit02
));
1622 /* normalization: nulls in contractions */
1624 static const char *rule
= "&a<<<\\u0000\\u0302";
1625 static const char *tlimit01
[] = {"a","\\u0000\\u0302\\u0327"};
1626 static const char *tlimit02
[] = {"\\u0000\\u0302\\u0327","a"};
1627 static const UColAttribute att
[] = { UCOL_DECOMPOSITION_MODE
};
1628 static const UColAttributeValue valOn
[] = { UCOL_ON
};
1629 static const UColAttributeValue valOff
[] = { UCOL_OFF
};
1631 log_verbose("NULL in contractions\n");
1632 genericRulesStarterWithOptions(rule
, tlimit01
, 2, att
, valOn
, 1);
1633 genericRulesStarterWithOptions(rule
, tlimit02
, 2, att
, valOn
, 1);
1634 genericRulesStarterWithOptions(rule
, tlimit01
, 2, att
, valOff
, 1);
1635 genericRulesStarterWithOptions(rule
, tlimit02
, 2, att
, valOff
, 1);
1638 /* normalization: contractions spanning normalization */
1640 static const char *rule
= "&a<<<\\u0000\\u0302";
1641 static const char *tlimit01
[] = {"a","\\u0000\\u0302\\u0327"};
1642 static const char *tlimit02
[] = {"\\u0000\\u0302\\u0327","a"};
1643 static const UColAttribute att
[] = { UCOL_DECOMPOSITION_MODE
};
1644 static const UColAttributeValue valOn
[] = { UCOL_ON
};
1645 static const UColAttributeValue valOff
[] = { UCOL_OFF
};
1647 log_verbose("contractions spanning normalization\n");
1648 genericRulesStarterWithOptions(rule
, tlimit01
, 2, att
, valOn
, 1);
1649 genericRulesStarterWithOptions(rule
, tlimit02
, 2, att
, valOn
, 1);
1650 genericRulesStarterWithOptions(rule
, tlimit01
, 2, att
, valOff
, 1);
1651 genericRulesStarterWithOptions(rule
, tlimit02
, 2, att
, valOff
, 1);
1656 /*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/
1657 static const char *rule
= "&\\u2010<x<[variable top]=z";
1658 /*static const char *rule3 = "&' '<x<[variable top]=z";*/
1659 static const char *tlimit01
[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" };
1660 static const char *tlimit02
[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"};
1661 static const char *tlimit03
[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" };
1662 static const UColAttribute att
[] = { UCOL_ALTERNATE_HANDLING
, UCOL_STRENGTH
};
1663 static const UColAttributeValue valOn
[] = { UCOL_SHIFTED
, UCOL_QUATERNARY
};
1664 static const UColAttributeValue valOff
[] = { UCOL_NON_IGNORABLE
, UCOL_TERTIARY
};
1666 log_verbose("variable top\n");
1667 genericRulesStarterWithOptions(rule
, tlimit03
, UPRV_LENGTHOF(tlimit03
), att
, valOn
, UPRV_LENGTHOF(att
));
1668 genericRulesStarterWithOptions(rule
, tlimit01
, UPRV_LENGTHOF(tlimit01
), att
, valOn
, UPRV_LENGTHOF(att
));
1669 genericRulesStarterWithOptions(rule
, tlimit02
, UPRV_LENGTHOF(tlimit02
), att
, valOn
, UPRV_LENGTHOF(att
));
1670 genericRulesStarterWithOptions(rule
, tlimit01
, UPRV_LENGTHOF(tlimit01
), att
, valOff
, UPRV_LENGTHOF(att
));
1671 genericRulesStarterWithOptions(rule
, tlimit02
, UPRV_LENGTHOF(tlimit02
), att
, valOff
, UPRV_LENGTHOF(att
));
1676 static const char *rule
= "&c<ch<<<cH<<<Ch<<<CH";
1677 static const char *tlimit01
[] = {"c","CH","Ch","cH","ch"};
1678 static const char *tlimit02
[] = {"c","CH","cH","Ch","ch"};
1679 static const UColAttribute att
[] = { UCOL_CASE_FIRST
};
1680 static const UColAttributeValue valOn
[] = { UCOL_UPPER_FIRST
};
1681 /*static const UColAttributeValue valOff[] = { UCOL_OFF};*/
1682 log_verbose("case level\n");
1683 genericRulesStarterWithOptions(rule
, tlimit01
, UPRV_LENGTHOF(tlimit01
), att
, valOn
, UPRV_LENGTHOF(att
));
1684 genericRulesStarterWithOptions(rule
, tlimit02
, UPRV_LENGTHOF(tlimit02
), att
, valOn
, UPRV_LENGTHOF(att
));
1685 /*genericRulesStarterWithOptions(rule, tlimit01, UPRV_LENGTHOF(tlimit01), att, valOff, UPRV_LENGTHOF(att));*/
1686 /*genericRulesStarterWithOptions(rule, tlimit02, UPRV_LENGTHOF(tlimit02), att, valOff, UPRV_LENGTHOF(att));*/
1692 static void TestBocsuCoverage(void) {
1693 UErrorCode status
= U_ZERO_ERROR
;
1694 const char *testString
= "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u0041";
1695 UChar test
[256] = {0};
1696 uint32_t tlen
= u_unescape(testString
, test
, 32);
1697 uint8_t key
[256] = {0};
1700 UCollator
*coll
= ucol_open("", &status
);
1701 if(U_SUCCESS(status
)) {
1702 ucol_setAttribute(coll
, UCOL_STRENGTH
, UCOL_IDENTICAL
, &status
);
1704 klen
= ucol_getSortKey(coll
, test
, tlen
, key
, 256);
1705 (void)klen
; /* Suppress set but not used warning. */
1709 log_data_err("Couldn't open UCA\n");
1713 static void TestVariableTopSetting(void) {
1714 UErrorCode status
= U_ZERO_ERROR
;
1715 uint32_t varTopOriginal
= 0, varTop1
, varTop2
;
1716 UCollator
*coll
= ucol_open("", &status
);
1717 if(U_SUCCESS(status
)) {
1719 static const UChar nul
= 0;
1720 static const UChar space
= 0x20;
1721 static const UChar dot
= 0x2e; /* punctuation */
1722 static const UChar degree
= 0xb0; /* symbol */
1723 static const UChar dollar
= 0x24; /* currency symbol */
1724 static const UChar zero
= 0x30; /* digit */
1726 varTopOriginal
= ucol_getVariableTop(coll
, &status
);
1727 log_verbose("ucol_getVariableTop(root) -> %08x\n", varTopOriginal
);
1728 ucol_setAttribute(coll
, UCOL_ALTERNATE_HANDLING
, UCOL_SHIFTED
, &status
);
1730 varTop1
= ucol_setVariableTop(coll
, &space
, 1, &status
);
1731 varTop2
= ucol_getVariableTop(coll
, &status
);
1732 log_verbose("ucol_setVariableTop(space) -> %08x\n", varTop1
);
1733 if(U_FAILURE(status
) || varTop1
!= varTop2
||
1734 !ucol_equal(coll
, &nul
, 0, &space
, 1) ||
1735 ucol_equal(coll
, &nul
, 0, &dot
, 1) ||
1736 ucol_equal(coll
, &nul
, 0, °ree
, 1) ||
1737 ucol_equal(coll
, &nul
, 0, &dollar
, 1) ||
1738 ucol_equal(coll
, &nul
, 0, &zero
, 1) ||
1739 ucol_greaterOrEqual(coll
, &space
, 1, &dot
, 1)) {
1740 log_err("ucol_setVariableTop(space) did not work - %s\n", u_errorName(status
));
1743 varTop1
= ucol_setVariableTop(coll
, &dot
, 1, &status
);
1744 varTop2
= ucol_getVariableTop(coll
, &status
);
1745 log_verbose("ucol_setVariableTop(dot) -> %08x\n", varTop1
);
1746 if(U_FAILURE(status
) || varTop1
!= varTop2
||
1747 !ucol_equal(coll
, &nul
, 0, &space
, 1) ||
1748 !ucol_equal(coll
, &nul
, 0, &dot
, 1) ||
1749 ucol_equal(coll
, &nul
, 0, °ree
, 1) ||
1750 ucol_equal(coll
, &nul
, 0, &dollar
, 1) ||
1751 ucol_equal(coll
, &nul
, 0, &zero
, 1) ||
1752 ucol_greaterOrEqual(coll
, &dot
, 1, °ree
, 1)) {
1753 log_err("ucol_setVariableTop(dot) did not work - %s\n", u_errorName(status
));
1756 varTop1
= ucol_setVariableTop(coll
, °ree
, 1, &status
);
1757 varTop2
= ucol_getVariableTop(coll
, &status
);
1758 log_verbose("ucol_setVariableTop(degree) -> %08x\n", varTop1
);
1759 if(U_FAILURE(status
) || varTop1
!= varTop2
||
1760 !ucol_equal(coll
, &nul
, 0, &space
, 1) ||
1761 !ucol_equal(coll
, &nul
, 0, &dot
, 1) ||
1762 !ucol_equal(coll
, &nul
, 0, °ree
, 1) ||
1763 ucol_equal(coll
, &nul
, 0, &dollar
, 1) ||
1764 ucol_equal(coll
, &nul
, 0, &zero
, 1) ||
1765 ucol_greaterOrEqual(coll
, °ree
, 1, &dollar
, 1)) {
1766 log_err("ucol_setVariableTop(degree) did not work - %s\n", u_errorName(status
));
1769 varTop1
= ucol_setVariableTop(coll
, &dollar
, 1, &status
);
1770 varTop2
= ucol_getVariableTop(coll
, &status
);
1771 log_verbose("ucol_setVariableTop(dollar) -> %08x\n", varTop1
);
1772 if(U_FAILURE(status
) || varTop1
!= varTop2
||
1773 !ucol_equal(coll
, &nul
, 0, &space
, 1) ||
1774 !ucol_equal(coll
, &nul
, 0, &dot
, 1) ||
1775 !ucol_equal(coll
, &nul
, 0, °ree
, 1) ||
1776 !ucol_equal(coll
, &nul
, 0, &dollar
, 1) ||
1777 ucol_equal(coll
, &nul
, 0, &zero
, 1) ||
1778 ucol_greaterOrEqual(coll
, &dollar
, 1, &zero
, 1)) {
1779 log_err("ucol_setVariableTop(dollar) did not work - %s\n", u_errorName(status
));
1782 log_verbose("Testing setting variable top to contractions\n");
1784 UChar first
[4] = { 0 };
1789 status
= U_ZERO_ERROR
;
1790 ucol_setVariableTop(coll
, first
, -1, &status
);
1792 if(U_SUCCESS(status
)) {
1793 log_err("Invalid contraction succeded in setting variable top!\n");
1798 log_verbose("Test restoring variable top\n");
1800 status
= U_ZERO_ERROR
;
1801 ucol_restoreVariableTop(coll
, varTopOriginal
, &status
);
1802 if(varTopOriginal
!= ucol_getVariableTop(coll
, &status
)) {
1803 log_err("Couldn't restore old variable top\n");
1806 log_verbose("Testing calling with error set\n");
1808 status
= U_INTERNAL_PROGRAM_ERROR
;
1809 varTop1
= ucol_setVariableTop(coll
, &space
, 1, &status
);
1810 varTop2
= ucol_getVariableTop(coll
, &status
);
1811 ucol_restoreVariableTop(coll
, varTop2
, &status
);
1812 varTop1
= ucol_setVariableTop(NULL
, &dot
, 1, &status
);
1813 varTop2
= ucol_getVariableTop(NULL
, &status
);
1814 ucol_restoreVariableTop(NULL
, varTop2
, &status
);
1815 if(status
!= U_INTERNAL_PROGRAM_ERROR
) {
1816 log_err("Bad reaction to passed error!\n");
1820 log_data_err("Couldn't open UCA collator\n");
1824 static void TestMaxVariable(void) {
1825 UErrorCode status
= U_ZERO_ERROR
;
1826 UColReorderCode oldMax
, max
;
1829 static const UChar nul
= 0;
1830 static const UChar space
= 0x20;
1831 static const UChar dot
= 0x2e; /* punctuation */
1832 static const UChar degree
= 0xb0; /* symbol */
1833 static const UChar dollar
= 0x24; /* currency symbol */
1834 static const UChar zero
= 0x30; /* digit */
1836 coll
= ucol_open("", &status
);
1837 if(U_FAILURE(status
)) {
1838 log_data_err("Couldn't open root collator\n");
1842 oldMax
= ucol_getMaxVariable(coll
);
1843 log_verbose("ucol_getMaxVariable(root) -> %04x\n", oldMax
);
1844 ucol_setAttribute(coll
, UCOL_ALTERNATE_HANDLING
, UCOL_SHIFTED
, &status
);
1846 ucol_setMaxVariable(coll
, UCOL_REORDER_CODE_SPACE
, &status
);
1847 max
= ucol_getMaxVariable(coll
);
1848 log_verbose("ucol_setMaxVariable(space) -> %04x\n", max
);
1849 if(U_FAILURE(status
) || max
!= UCOL_REORDER_CODE_SPACE
||
1850 !ucol_equal(coll
, &nul
, 0, &space
, 1) ||
1851 ucol_equal(coll
, &nul
, 0, &dot
, 1) ||
1852 ucol_equal(coll
, &nul
, 0, °ree
, 1) ||
1853 ucol_equal(coll
, &nul
, 0, &dollar
, 1) ||
1854 ucol_equal(coll
, &nul
, 0, &zero
, 1) ||
1855 ucol_greaterOrEqual(coll
, &space
, 1, &dot
, 1)) {
1856 log_err("ucol_setMaxVariable(space) did not work - %s\n", u_errorName(status
));
1859 ucol_setMaxVariable(coll
, UCOL_REORDER_CODE_PUNCTUATION
, &status
);
1860 max
= ucol_getMaxVariable(coll
);
1861 log_verbose("ucol_setMaxVariable(punctuation) -> %04x\n", max
);
1862 if(U_FAILURE(status
) || max
!= UCOL_REORDER_CODE_PUNCTUATION
||
1863 !ucol_equal(coll
, &nul
, 0, &space
, 1) ||
1864 !ucol_equal(coll
, &nul
, 0, &dot
, 1) ||
1865 ucol_equal(coll
, &nul
, 0, °ree
, 1) ||
1866 ucol_equal(coll
, &nul
, 0, &dollar
, 1) ||
1867 ucol_equal(coll
, &nul
, 0, &zero
, 1) ||
1868 ucol_greaterOrEqual(coll
, &dot
, 1, °ree
, 1)) {
1869 log_err("ucol_setMaxVariable(punctuation) did not work - %s\n", u_errorName(status
));
1872 ucol_setMaxVariable(coll
, UCOL_REORDER_CODE_SYMBOL
, &status
);
1873 max
= ucol_getMaxVariable(coll
);
1874 log_verbose("ucol_setMaxVariable(symbol) -> %04x\n", max
);
1875 if(U_FAILURE(status
) || max
!= UCOL_REORDER_CODE_SYMBOL
||
1876 !ucol_equal(coll
, &nul
, 0, &space
, 1) ||
1877 !ucol_equal(coll
, &nul
, 0, &dot
, 1) ||
1878 !ucol_equal(coll
, &nul
, 0, °ree
, 1) ||
1879 ucol_equal(coll
, &nul
, 0, &dollar
, 1) ||
1880 ucol_equal(coll
, &nul
, 0, &zero
, 1) ||
1881 ucol_greaterOrEqual(coll
, °ree
, 1, &dollar
, 1)) {
1882 log_err("ucol_setMaxVariable(symbol) did not work - %s\n", u_errorName(status
));
1885 ucol_setMaxVariable(coll
, UCOL_REORDER_CODE_CURRENCY
, &status
);
1886 max
= ucol_getMaxVariable(coll
);
1887 log_verbose("ucol_setMaxVariable(currency) -> %04x\n", max
);
1888 if(U_FAILURE(status
) || max
!= UCOL_REORDER_CODE_CURRENCY
||
1889 !ucol_equal(coll
, &nul
, 0, &space
, 1) ||
1890 !ucol_equal(coll
, &nul
, 0, &dot
, 1) ||
1891 !ucol_equal(coll
, &nul
, 0, °ree
, 1) ||
1892 !ucol_equal(coll
, &nul
, 0, &dollar
, 1) ||
1893 ucol_equal(coll
, &nul
, 0, &zero
, 1) ||
1894 ucol_greaterOrEqual(coll
, &dollar
, 1, &zero
, 1)) {
1895 log_err("ucol_setMaxVariable(currency) did not work - %s\n", u_errorName(status
));
1898 log_verbose("Test restoring maxVariable\n");
1899 status
= U_ZERO_ERROR
;
1900 ucol_setMaxVariable(coll
, oldMax
, &status
);
1901 if(oldMax
!= ucol_getMaxVariable(coll
)) {
1902 log_err("Couldn't restore old maxVariable\n");
1905 log_verbose("Testing calling with error set\n");
1906 status
= U_INTERNAL_PROGRAM_ERROR
;
1907 ucol_setMaxVariable(coll
, UCOL_REORDER_CODE_SPACE
, &status
);
1908 max
= ucol_getMaxVariable(coll
);
1909 if(max
!= oldMax
|| status
!= U_INTERNAL_PROGRAM_ERROR
) {
1910 log_err("Bad reaction to passed error!\n");
1915 static void TestNonChars(void) {
1916 static const char *test
[] = {
1917 "\\u0000", /* ignorable */
1918 "\\uFFFE", /* special merge-sort character with minimum non-ignorable weights */
1919 "\\uFDD0", "\\uFDEF",
1920 "\\U0001FFFE", "\\U0001FFFF", /* UCA 6.0: noncharacters are treated like unassigned, */
1921 "\\U0002FFFE", "\\U0002FFFF", /* not like ignorable. */
1922 "\\U0003FFFE", "\\U0003FFFF",
1923 "\\U0004FFFE", "\\U0004FFFF",
1924 "\\U0005FFFE", "\\U0005FFFF",
1925 "\\U0006FFFE", "\\U0006FFFF",
1926 "\\U0007FFFE", "\\U0007FFFF",
1927 "\\U0008FFFE", "\\U0008FFFF",
1928 "\\U0009FFFE", "\\U0009FFFF",
1929 "\\U000AFFFE", "\\U000AFFFF",
1930 "\\U000BFFFE", "\\U000BFFFF",
1931 "\\U000CFFFE", "\\U000CFFFF",
1932 "\\U000DFFFE", "\\U000DFFFF",
1933 "\\U000EFFFE", "\\U000EFFFF",
1934 "\\U000FFFFE", "\\U000FFFFF",
1935 "\\U0010FFFE", "\\U0010FFFF",
1936 "\\uFFFF" /* special character with maximum primary weight */
1938 UErrorCode status
= U_ZERO_ERROR
;
1939 UCollator
*coll
= ucol_open("en_US", &status
);
1941 log_verbose("Test non characters\n");
1943 if(U_SUCCESS(status
)) {
1944 genericOrderingTestWithResult(coll
, test
, 35, UCOL_LESS
);
1946 log_err_status(status
, "Unable to open collator\n");
1952 static void TestExtremeCompression(void) {
1953 static char *test
[4];
1954 int32_t j
= 0, i
= 0;
1956 for(i
= 0; i
<4; i
++) {
1957 test
[i
] = (char *)malloc(2048*sizeof(char));
1960 for(j
= 20; j
< 500; j
++) {
1961 for(i
= 0; i
<4; i
++) {
1962 uprv_memset(test
[i
], 'a', (j
-1)*sizeof(char));
1963 test
[i
][j
-1] = (char)('a'+i
);
1966 genericLocaleStarter("en_US", (const char **)test
, 4);
1970 for(i
= 0; i
<4; i
++) {
1976 static void TestExtremeCompression(void) {
1977 static char *test
[4];
1978 int32_t j
= 0, i
= 0;
1979 UErrorCode status
= U_ZERO_ERROR
;
1980 UCollator
*coll
= ucol_open("en_US", status
);
1981 for(i
= 0; i
<4; i
++) {
1982 test
[i
] = (char *)malloc(2048*sizeof(char));
1984 for(j
= 10; j
< 2048; j
++) {
1985 for(i
= 0; i
<4; i
++) {
1986 uprv_memset(test
[i
], 'a', (j
-2)*sizeof(char));
1987 test
[i
][j
-1] = (char)('a'+i
);
1991 genericLocaleStarter("en_US", (const char **)test
, 4);
1993 for(j
= 10; j
< 2048; j
++) {
1994 for(i
= 0; i
<1; i
++) {
1995 uprv_memset(test
[i
], 'a', (j
-1)*sizeof(char));
1999 for(i
= 0; i
<4; i
++) {
2005 static void TestSurrogates(void) {
2006 static const char *test
[] = {
2007 "z","\\ud900\\udc25", "\\ud805\\udc50",
2008 "\\ud800\\udc00y", "\\ud800\\udc00r",
2009 "\\ud800\\udc00f", "\\ud800\\udc00",
2010 "\\ud800\\udc00c", "\\ud800\\udc00b",
2011 "\\ud800\\udc00fa", "\\ud800\\udc00fb",
2016 static const char *rule
=
2017 "&z < \\ud900\\udc25 < \\ud805\\udc50"
2018 "< \\ud800\\udc00y < \\ud800\\udc00r"
2019 "< \\ud800\\udc00f << \\ud800\\udc00"
2020 "< \\ud800\\udc00fa << \\ud800\\udc00fb"
2021 "< \\ud800\\udc00a < c < b" ;
2023 genericRulesStarter(rule
, test
, 14);
2026 /* This is a test for prefix implementation, used by JIS X 4061 collation rules */
2027 static void TestPrefix(void) {
2030 static const struct {
2032 const char *data
[50];
2042 "&z<<<\\ud900\\udc25|a",
2043 {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 },
2047 for(i
= 0; i
<UPRV_LENGTHOF(tests
); i
++) {
2048 genericRulesStarter(tests
[i
].rules
, tests
[i
].data
, tests
[i
].len
);
2052 /* This test uses data suplied by Masashiko Maedera to test the implementation */
2053 /* JIS X 4061 collation order implementation */
2054 static void TestNewJapanese(void) {
2056 static const char * const test1
[] = {
2057 "\\u30b7\\u30e3\\u30fc\\u30ec",
2058 "\\u30b7\\u30e3\\u30a4",
2059 "\\u30b7\\u30e4\\u30a3",
2060 "\\u30b7\\u30e3\\u30ec",
2061 "\\u3061\\u3087\\u3053",
2062 "\\u3061\\u3088\\u3053",
2063 "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8",
2064 "\\u3066\\u30fc\\u305f",
2065 "\\u30c6\\u30fc\\u30bf",
2066 "\\u30c6\\u30a7\\u30bf",
2067 "\\u3066\\u3048\\u305f",
2068 "\\u3067\\u30fc\\u305f",
2069 "\\u30c7\\u30fc\\u30bf",
2070 "\\u30c7\\u30a7\\u30bf",
2071 "\\u3067\\u3048\\u305f",
2072 "\\u3066\\u30fc\\u305f\\u30fc",
2073 "\\u30c6\\u30fc\\u30bf\\u30a1",
2074 "\\u30c6\\u30a7\\u30bf\\u30fc",
2075 "\\u3066\\u3047\\u305f\\u3041",
2076 "\\u3066\\u3048\\u305f\\u30fc",
2077 "\\u3067\\u30fc\\u305f\\u30fc",
2078 "\\u30c7\\u30fc\\u30bf\\u30a1",
2079 "\\u3067\\u30a7\\u305f\\u30a1",
2080 "\\u30c7\\u3047\\u30bf\\u3041",
2081 "\\u30c7\\u30a8\\u30bf\\u30a2",
2083 "\\u3073\\u3085\\u3042",
2084 "\\u3074\\u3085\\u3042",
2085 "\\u3073\\u3085\\u3042\\u30fc",
2086 "\\u30d3\\u30e5\\u30a2\\u30fc",
2087 "\\u3074\\u3085\\u3042\\u30fc",
2088 "\\u30d4\\u30e5\\u30a2\\u30fc",
2089 "\\u30d2\\u30e5\\u30a6",
2090 "\\u30d2\\u30e6\\u30a6",
2091 "\\u30d4\\u30e5\\u30a6\\u30a2",
2092 "\\u3073\\u3085\\u30fc\\u3042\\u30fc",
2093 "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc",
2094 "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc",
2095 "\\u3072\\u3085\\u3093",
2096 "\\u3074\\u3085\\u3093",
2097 "\\u3075\\u30fc\\u308a",
2098 "\\u30d5\\u30fc\\u30ea",
2099 "\\u3075\\u3045\\u308a",
2100 "\\u3075\\u30a5\\u308a",
2101 "\\u3075\\u30a5\\u30ea",
2102 "\\u30d5\\u30a6\\u30ea",
2103 "\\u3076\\u30fc\\u308a",
2104 "\\u30d6\\u30fc\\u30ea",
2105 "\\u3076\\u3045\\u308a",
2106 "\\u30d6\\u30a5\\u308a",
2107 "\\u3077\\u3046\\u308a",
2108 "\\u30d7\\u30a6\\u30ea",
2109 "\\u3075\\u30fc\\u308a\\u30fc",
2110 "\\u30d5\\u30a5\\u30ea\\u30fc",
2111 "\\u3075\\u30a5\\u308a\\u30a3",
2112 "\\u30d5\\u3045\\u308a\\u3043",
2113 "\\u30d5\\u30a6\\u30ea\\u30fc",
2114 "\\u3075\\u3046\\u308a\\u3043",
2115 "\\u30d6\\u30a6\\u30ea\\u30a4",
2116 "\\u3077\\u30fc\\u308a\\u30fc",
2117 "\\u3077\\u30a5\\u308a\\u30a4",
2118 "\\u3077\\u3046\\u308a\\u30fc",
2119 "\\u30d7\\u30a6\\u30ea\\u30a4",
2135 static const char *test2
[] = {
2136 "\\u306f\\u309d", /* H\\u309d */
2137 "\\u30cf\\u30fd", /* K\\u30fd */
2138 "\\u306f\\u306f", /* HH */
2139 "\\u306f\\u30cf", /* HK */
2140 "\\u30cf\\u30cf", /* KK */
2141 "\\u306f\\u309e", /* H\\u309e */
2142 "\\u30cf\\u30fe", /* K\\u30fe */
2143 "\\u306f\\u3070", /* HH\\u309b */
2144 "\\u30cf\\u30d0", /* KK\\u309b */
2145 "\\u306f\\u3071", /* HH\\u309c */
2146 "\\u30cf\\u3071", /* KH\\u309c */
2147 "\\u30cf\\u30d1", /* KK\\u309c */
2148 "\\u3070\\u309d", /* H\\u309b\\u309d */
2149 "\\u30d0\\u30fd", /* K\\u309b\\u30fd */
2150 "\\u3070\\u306f", /* H\\u309bH */
2151 "\\u30d0\\u30cf", /* K\\u309bK */
2152 "\\u3070\\u309e", /* H\\u309b\\u309e */
2153 "\\u30d0\\u30fe", /* K\\u309b\\u30fe */
2154 "\\u3070\\u3070", /* H\\u309bH\\u309b */
2155 "\\u30d0\\u3070", /* K\\u309bH\\u309b */
2156 "\\u30d0\\u30d0", /* K\\u309bK\\u309b */
2157 "\\u3070\\u3071", /* H\\u309bH\\u309c */
2158 "\\u30d0\\u30d1", /* K\\u309bK\\u309c */
2159 "\\u3071\\u309d", /* H\\u309c\\u309d */
2160 "\\u30d1\\u30fd", /* K\\u309c\\u30fd */
2161 "\\u3071\\u306f", /* H\\u309cH */
2162 "\\u30d1\\u30cf", /* K\\u309cK */
2163 "\\u3071\\u3070", /* H\\u309cH\\u309b */
2164 "\\u3071\\u30d0", /* H\\u309cK\\u309b */
2165 "\\u30d1\\u30d0", /* K\\u309cK\\u309b */
2166 "\\u3071\\u3071", /* H\\u309cH\\u309c */
2167 "\\u30d1\\u30d1", /* K\\u309cK\\u309c */
2170 static const char *test3[] = {
2198 "\\u30b7\\u30e3\\u30fc\\u30ec",
2201 static const UColAttribute att
[] = { UCOL_STRENGTH
};
2202 static const UColAttributeValue val
[] = { UCOL_QUATERNARY
};
2204 static const UColAttribute attShifted
[] = { UCOL_STRENGTH
, UCOL_ALTERNATE_HANDLING
};
2205 static const UColAttributeValue valShifted
[] = { UCOL_QUATERNARY
, UCOL_SHIFTED
};
2207 genericLocaleStarterWithOptions("ja", test1
, UPRV_LENGTHOF(test1
), att
, val
, 1);
2208 genericLocaleStarterWithOptions("ja", test2
, UPRV_LENGTHOF(test2
), att
, val
, 1);
2209 /*genericLocaleStarter("ja", test3, UPRV_LENGTHOF(test3));*/
2210 genericLocaleStarterWithOptions("ja", test1
, UPRV_LENGTHOF(test1
), attShifted
, valShifted
, 2);
2211 genericLocaleStarterWithOptions("ja", test2
, UPRV_LENGTHOF(test2
), attShifted
, valShifted
, 2);
2214 static void TestStrCollIdenticalPrefix(void) {
2215 const char* rule
= "&\\ud9b0\\udc70=\\ud9b0\\udc71";
2216 const char* test
[] = {
2220 genericRulesStarterWithResult(rule
, test
, UPRV_LENGTHOF(test
), UCOL_EQUAL
);
2222 /* Contractions should have all their canonically equivalent */
2223 /* strings included */
2224 static void TestContractionClosure(void) {
2225 static const struct {
2227 const char *data
[10];
2230 { "&b=\\u00e4\\u00e4",
2231 { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\\u00e4" }, 5},
2233 { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4},
2238 for(i
= 0; i
<UPRV_LENGTHOF(tests
); i
++) {
2239 genericRulesStarterWithResult(tests
[i
].rules
, tests
[i
].data
, tests
[i
].len
, UCOL_EQUAL
);
2243 /* This tests also fails*/
2244 static void TestBeforePrefixFailure(void) {
2245 static const struct {
2247 const char *data
[10];
2251 "&[before 3]\\uff41 <<< x",
2252 {"x", "\\uff41"}, 2 },
2253 { "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2254 "&\\u30A8=\\u30A8=\\u3048=\\uff74"
2255 "&[before 3]\\u30a7<<<\\u30a9",
2256 {"\\u30a9", "\\u30a7"}, 2 },
2257 { "&[before 3]\\u30a7<<<\\u30a9"
2258 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2259 "&\\u30A8=\\u30A8=\\u3048=\\uff74",
2260 {"\\u30a9", "\\u30a7"}, 2 },
2265 for(i
= 0; i
<UPRV_LENGTHOF(tests
); i
++) {
2266 genericRulesStarter(tests
[i
].rules
, tests
[i
].data
, tests
[i
].len
);
2271 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2272 "&\\u30A8=\\u30A8=\\u3048=\\uff74"
2273 "&[before 3]\\u30a7<<<\\u30c6|\\u30fc";
2275 "&[before 3]\\u30a7<<<\\u30c6|\\u30fc"
2276 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2277 "&\\u30A8=\\u30A8=\\u3048=\\uff74";
2278 const char* test
[] = {
2279 "\\u30c6\\u30fc\\u30bf",
2280 "\\u30c6\\u30a7\\u30bf",
2282 genericRulesStarter(rule1
, test
, UPRV_LENGTHOF(test
));
2283 genericRulesStarter(rule2
, test
, UPRV_LENGTHOF(test
));
2284 /* this piece of code should be in some sort of verbose mode */
2285 /* it gets the collation elements for elements and prints them */
2286 /* This is useful when trying to see whether the problem is */
2288 UErrorCode status
= U_ZERO_ERROR
;
2290 UCollationElements
*it
= NULL
;
2293 uint32_t uStringLen
;
2294 UCollator
*coll
= NULL
;
2296 uStringLen
= u_unescape(rule1
, string
, 256);
2298 coll
= ucol_openRules(string
, uStringLen
, UCOL_DEFAULT
, UCOL_DEFAULT
, NULL
, &status
);
2300 /*coll = ucol_open("ja_JP_JIS", &status);*/
2301 it
= ucol_openElements(coll
, string
, 0, &status
);
2303 for(i
= 0; i
< UPRV_LENGTHOF(test
); i
++) {
2304 log_verbose("%s\n", test
[i
]);
2305 uStringLen
= u_unescape(test
[i
], string
, 256);
2306 ucol_setText(it
, string
, uStringLen
, &status
);
2308 while((CE
=ucol_next(it
, &status
)) != UCOL_NULLORDER
) {
2309 log_verbose("%08X\n", CE
);
2315 ucol_closeElements(it
);
2321 static void TestPrefixCompose(void) {
2323 "&\\u30a7<<<\\u30ab|\\u30fc=\\u30ac|\\u30fc";
2325 const char* test[] = {
2326 "\\u30c6\\u30fc\\u30bf",
2327 "\\u30c6\\u30a7\\u30bf",
2331 UErrorCode status
= U_ZERO_ERROR
;
2333 /*UCollationElements *it = NULL;*/
2336 uint32_t uStringLen
;
2337 UCollator
*coll
= NULL
;
2339 uStringLen
= u_unescape(rule1
, string
, 256);
2341 coll
= ucol_openRules(string
, uStringLen
, UCOL_DEFAULT
, UCOL_DEFAULT
, NULL
, &status
);
2349 [last variable] last variable value
2350 [last primary ignorable] largest CE for primary ignorable
2351 [last secondary ignorable] largest CE for secondary ignorable
2352 [last tertiary ignorable] largest CE for tertiary ignorable
2353 [top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8)
2356 static void TestRuleOptions(void) {
2357 /* values here are hardcoded and are correct for the current UCA
2358 * when the UCA changes, one might be forced to change these
2363 * These strings contain the last character before [variable top]
2364 * and the first and second characters (by primary weights) after it.
2365 * See FractionalUCA.txt. For example:
2366 [last variable [0C FE, 05, 05]] # U+10A7F OLD SOUTH ARABIAN NUMERIC INDICATOR
2367 [variable top = 0C FE]
2368 [first regular [0D 0A, 05, 05]] # U+0060 GRAVE ACCENT
2370 00B4; [0D 0C, 05, 05]
2372 * Note: Starting with UCA 6.0, the [variable top] collation element
2373 * is not the weight of any character or string,
2374 * which means that LAST_VARIABLE_CHAR_STRING sorts before [last variable].
2376 #define LAST_VARIABLE_CHAR_STRING "\\U00010A7F"
2377 #define FIRST_REGULAR_CHAR_STRING "\\u0060"
2378 #define SECOND_REGULAR_CHAR_STRING "\\u00B4"
2381 * This string has to match the character that has the [last regular] weight
2382 * which changes with each UCA version.
2383 * See the bottom of FractionalUCA.txt which says something like
2384 [last regular [7A FE, 05, 05]] # U+1342E EGYPTIAN HIEROGLYPH AA032
2386 * Note: Starting with UCA 6.0, the [last regular] collation element
2387 * is not the weight of any character or string,
2388 * which means that LAST_REGULAR_CHAR_STRING sorts before [last regular].
2390 #define LAST_REGULAR_CHAR_STRING "\\U0001342E"
2392 static const struct {
2394 const char *data
[10];
2398 /* "you cannot go before ...": The parser now sets an error for such nonsensical rules. */
2399 /* - all befores here amount to zero */
2400 { "&[before 3][first tertiary ignorable]<<<a",
2401 { "\\u0000", "a"}, 2
2402 }, /* you cannot go before first tertiary ignorable */
2404 { "&[before 3][last tertiary ignorable]<<<a",
2405 { "\\u0000", "a"}, 2
2406 }, /* you cannot go before last tertiary ignorable */
2409 * However, there is a real secondary ignorable (artificial addition in FractionalUCA.txt),
2410 * and it *is* possible to "go before" that.
2412 { "&[before 3][first secondary ignorable]<<<a",
2413 { "\\u0000", "a"}, 2
2416 { "&[before 3][last secondary ignorable]<<<a",
2417 { "\\u0000", "a"}, 2
2420 /* 'normal' befores */
2423 * Note: With a "SPACE first primary" boundary CE in FractionalUCA.txt,
2424 * it is not possible to tailor &[first primary ignorable]<a or &[last primary ignorable]<a
2425 * because there is no tailoring space before that boundary.
2426 * Made the tests work by tailoring to a space instead.
2428 { "&[before 3][first primary ignorable]<<<c<<<b &' '<a", /* was &[first primary ignorable]<a */
2429 { "c", "b", "\\u0332", "a" }, 4
2432 /* we don't have a code point that corresponds to
2433 * the last primary ignorable
2435 { "&[before 3][last primary ignorable]<<<c<<<b &' '<a", /* was &[last primary ignorable]<a */
2436 { "\\u0332", "\\u20e3", "c", "b", "a" }, 5
2439 { "&[before 3][first variable]<<<c<<<b &[first variable]<a",
2440 { "c", "b", "\\u0009", "a", "\\u000a" }, 5
2443 { "&[last variable]<a &[before 3][last variable]<<<c<<<b ",
2444 { LAST_VARIABLE_CHAR_STRING
, "c", "b", /* [last variable] */ "a", FIRST_REGULAR_CHAR_STRING
}, 5
2447 { "&[first regular]<a"
2448 "&[before 1][first regular]<b",
2449 { "b", FIRST_REGULAR_CHAR_STRING
, "a", SECOND_REGULAR_CHAR_STRING
}, 4
2452 { "&[before 1][last regular]<b"
2453 "&[last regular]<a",
2454 { LAST_REGULAR_CHAR_STRING
, "b", /* [last regular] */ "a", "\\u4e00" }, 4
2457 { "&[before 1][first implicit]<b"
2458 "&[first implicit]<a",
2459 { "b", "\\u4e00", "a", "\\u4e01"}, 4
2461 #if 0 /* The current builder does not support tailoring to unassigned-implicit CEs (seems unnecessary, adds complexity). */
2462 { "&[before 1][last implicit]<b"
2463 "&[last implicit]<a",
2464 { "b", "\\U0010FFFD", "a" }, 3
2467 { "&[last variable]<z"
2468 "&' '<x" /* was &[last primary ignorable]<x, see above */
2469 "&[last secondary ignorable]<<y"
2470 "&[last tertiary ignorable]<<<w"
2472 {"\\ufffb", "w", "y", "\\u20e3", "x", LAST_VARIABLE_CHAR_STRING
, "z", "u"}, 7
2478 for(i
= 0; i
<UPRV_LENGTHOF(tests
); i
++) {
2479 genericRulesStarter(tests
[i
].rules
, tests
[i
].data
, tests
[i
].len
);
2484 static void TestOptimize(void) {
2485 /* this is not really a test - just trying out
2486 * whether copying of UCA contents will fail
2487 * Cannot really test, since the functionality
2490 static const struct {
2492 const char *data
[10];
2495 /* - all befores here amount to zero */
2496 { "[optimize [\\uAC00-\\uD7FF]]",
2501 for(i
= 0; i
<UPRV_LENGTHOF(tests
); i
++) {
2502 genericRulesStarter(tests
[i
].rules
, tests
[i
].data
, tests
[i
].len
);
2507 cycheng@ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator.
2508 weiv ucol_strcollIter?
2509 cycheng@ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021
2510 weiv these are the input strings?
2511 cycheng@ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on, we have s1 > s2
2512 weiv will check - could be a problem with utf-8 iterator
2513 cycheng@ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2
2515 cycheng@ca.ibm.c... note that we have a standalone high surrogate
2516 weiv that doesn't sound right
2517 cycheng@ca.ibm.c... we got the same inconsistent results on AIX and Win2000
2518 weiv so you have two strings, you convert them to utf-8 and to utf-16BE
2519 cycheng@ca.ibm.c... yes
2520 weiv and then do the comparison
2521 cycheng@ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be
2522 weiv utf-16 strings look like a little endian ones in the example you sent me
2523 weiv It could be a bug - let me try to test it out
2524 cycheng@ca.ibm.c... ok
2525 cycheng@ca.ibm.c... we can wait till the conf. call
2526 cycheng@ca.ibm.c... next weke
2527 weiv that would be great
2529 weiv I might be wrong
2530 weiv let me play with it some more
2531 cycheng@ca.ibm.c... ok
2532 cycheng@ca.ibm.c... also please check s3 = 0x0e3a0062 and s4 = 0x0e400021. both are in utf-16be
2533 cycheng@ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that's built for db2
2534 cycheng@ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be
2536 cycheng@ca.ibm.c... i ask sherman to send you more inconsistent data
2538 cycheng@ca.ibm.c... the 4 strings we sent are just samples
2541 static void Alexis(void) {
2542 UErrorCode status
= U_ZERO_ERROR
;
2543 UCollator
*coll
= ucol_open("", &status
);
2546 const char utf16be
[2][4] = {
2547 { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 },
2548 { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 }
2551 const char utf8
[2][4] = {
2552 { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 },
2553 { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 },
2556 UCharIterator iterU161
, iterU162
;
2557 UCharIterator iterU81
, iterU82
;
2559 UCollationResult resU16
, resU8
;
2561 uiter_setUTF16BE(&iterU161
, utf16be
[0], 4);
2562 uiter_setUTF16BE(&iterU162
, utf16be
[1], 4);
2564 uiter_setUTF8(&iterU81
, utf8
[0], 4);
2565 uiter_setUTF8(&iterU82
, utf8
[1], 4);
2567 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
2569 resU16
= ucol_strcollIter(coll
, &iterU161
, &iterU162
, &status
);
2570 resU8
= ucol_strcollIter(coll
, &iterU81
, &iterU82
, &status
);
2573 if(resU16
!= resU8
) {
2574 log_err("different results\n");
2581 #define CMSCOLL_ALEXIS2_BUFFER_SIZE 256
2582 static void Alexis2(void) {
2583 UErrorCode status
= U_ZERO_ERROR
;
2584 UChar U16Source
[CMSCOLL_ALEXIS2_BUFFER_SIZE
], U16Target
[CMSCOLL_ALEXIS2_BUFFER_SIZE
];
2585 char U16BESource
[CMSCOLL_ALEXIS2_BUFFER_SIZE
], U16BETarget
[CMSCOLL_ALEXIS2_BUFFER_SIZE
];
2586 char U8Source
[CMSCOLL_ALEXIS2_BUFFER_SIZE
], U8Target
[CMSCOLL_ALEXIS2_BUFFER_SIZE
];
2587 int32_t U16LenS
= 0, U16LenT
= 0, U16BELenS
= 0, U16BELenT
= 0, U8LenS
= 0, U8LenT
= 0;
2589 UConverter
*conv
= NULL
;
2591 UCharIterator U16BEItS
, U16BEItT
;
2592 UCharIterator U8ItS
, U8ItT
;
2594 UCollationResult resU16
, resU16BE
, resU8
;
2596 static const char* const pairs
[][2] = {
2597 { "\\ud800\\u0021", "\\uFFFC\\u0062"},
2598 { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" },
2599 { "\\u0E40\\u0021", "\\u00A1\\u0021"},
2600 { "\\u0E40\\u0021", "\\uFE57\\u0062"},
2601 { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"},
2602 { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"},
2603 { "\\u0020", "\\u0020\\u0000"}
2605 5F20 (my result here)
2607 5F20 (your result here)
2613 UCollator
*coll
= ucol_open("", &status
);
2614 if(status
== U_FILE_ACCESS_ERROR
) {
2615 log_data_err("Is your data around?\n");
2617 } else if(U_FAILURE(status
)) {
2618 log_err("Error opening collator\n");
2621 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
2622 conv
= ucnv_open("UTF16BE", &status
);
2623 for(i
= 0; i
< UPRV_LENGTHOF(pairs
); i
++) {
2624 U16LenS
= u_unescape(pairs
[i
][0], U16Source
, CMSCOLL_ALEXIS2_BUFFER_SIZE
);
2625 U16LenT
= u_unescape(pairs
[i
][1], U16Target
, CMSCOLL_ALEXIS2_BUFFER_SIZE
);
2627 resU16
= ucol_strcoll(coll
, U16Source
, U16LenS
, U16Target
, U16LenT
);
2629 log_verbose("Result of strcoll is %i\n", resU16
);
2631 U16BELenS
= ucnv_fromUChars(conv
, U16BESource
, CMSCOLL_ALEXIS2_BUFFER_SIZE
, U16Source
, U16LenS
, &status
);
2632 U16BELenT
= ucnv_fromUChars(conv
, U16BETarget
, CMSCOLL_ALEXIS2_BUFFER_SIZE
, U16Target
, U16LenT
, &status
);
2633 (void)U16BELenS
; /* Suppress set but not used warnings. */
2636 /* use the original sizes, as the result from converter is in bytes */
2637 uiter_setUTF16BE(&U16BEItS
, U16BESource
, U16LenS
);
2638 uiter_setUTF16BE(&U16BEItT
, U16BETarget
, U16LenT
);
2640 resU16BE
= ucol_strcollIter(coll
, &U16BEItS
, &U16BEItT
, &status
);
2642 log_verbose("Result of U16BE is %i\n", resU16BE
);
2644 if(resU16
!= resU16BE
) {
2645 log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", pairs
[i
][0], pairs
[i
][1]);
2648 u_strToUTF8(U8Source
, CMSCOLL_ALEXIS2_BUFFER_SIZE
, &U8LenS
, U16Source
, U16LenS
, &status
);
2649 u_strToUTF8(U8Target
, CMSCOLL_ALEXIS2_BUFFER_SIZE
, &U8LenT
, U16Target
, U16LenT
, &status
);
2651 uiter_setUTF8(&U8ItS
, U8Source
, U8LenS
);
2652 uiter_setUTF8(&U8ItT
, U8Target
, U8LenT
);
2654 resU8
= ucol_strcollIter(coll
, &U8ItS
, &U8ItT
, &status
);
2656 if(resU16
!= resU8
) {
2657 log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pairs
[i
][0], pairs
[i
][1]);
2666 static void TestHebrewUCA(void) {
2667 UErrorCode status
= U_ZERO_ERROR
;
2668 static const char *first
[] = {
2669 "d790d6b8d79cd795d6bcd7a9",
2670 "d790d79cd79ed7a7d799d799d7a1",
2671 "d790d6b4d79ed795d6bcd7a9",
2674 char utf8String
[3][256];
2675 UChar utf16String
[3][256];
2677 int32_t i
= 0, j
= 0;
2678 int32_t sizeUTF8
[3];
2679 int32_t sizeUTF16
[3];
2681 UCollator
*coll
= ucol_open("", &status
);
2682 if (U_FAILURE(status
)) {
2683 log_err_status(status
, "Could not open UCA collation %s\n", u_errorName(status
));
2686 /*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/
2688 for(i
= 0; i
< UPRV_LENGTHOF(first
); i
++) {
2689 sizeUTF8
[i
] = u_parseUTF8(first
[i
], -1, utf8String
[i
], 256, &status
);
2690 u_strFromUTF8(utf16String
[i
], 256, &sizeUTF16
[i
], utf8String
[i
], sizeUTF8
[i
], &status
);
2691 log_verbose("%i: ");
2692 for(j
= 0; j
< sizeUTF16
[i
]; j
++) {
2693 /*log_verbose("\\u%04X", utf16String[i][j]);*/
2694 log_verbose("%04X", utf16String
[i
][j
]);
2698 for(i
= 0; i
< UPRV_LENGTHOF(first
)-1; i
++) {
2699 for(j
= i
+ 1; j
< UPRV_LENGTHOF(first
); j
++) {
2700 doTest(coll
, utf16String
[i
], utf16String
[j
], UCOL_LESS
);
2708 static void TestPartialSortKeyTermination(void) {
2709 static const char* cases
[] = {
2710 "\\u1234\\u1234\\udc00",
2711 "\\udc00\\ud800\\ud800"
2716 UErrorCode status
= U_ZERO_ERROR
;
2718 UCollator
*coll
= ucol_open("", &status
);
2722 UChar currCase
[256];
2724 int32_t pKeyLen
= 0;
2728 for(i
= 0; i
< UPRV_LENGTHOF(cases
); i
++) {
2729 uint32_t state
[2] = {0, 0};
2730 length
= u_unescape(cases
[i
], currCase
, 256);
2731 uiter_setString(&iter
, currCase
, length
);
2732 pKeyLen
= ucol_nextSortKeyPart(coll
, &iter
, state
, key
, 256, &status
);
2733 (void)pKeyLen
; /* Suppress set but not used warning. */
2735 log_verbose("Done\n");
2741 static void TestSettings(void) {
2742 static const char* cases
[] = {
2747 static const char* locales
[] = {
2752 UErrorCode status
= U_ZERO_ERROR
;
2754 int32_t i
= 0, j
= 0;
2756 UChar source
[256], target
[256];
2757 int32_t sLen
= 0, tLen
= 0;
2759 UCollator
*collateObject
= NULL
;
2760 for(i
= 0; i
< UPRV_LENGTHOF(locales
); i
++) {
2761 collateObject
= ucol_open(locales
[i
], &status
);
2762 ucol_setStrength(collateObject
, UCOL_PRIMARY
);
2763 ucol_setAttribute(collateObject
, UCOL_CASE_LEVEL
, UCOL_OFF
, &status
);
2764 for(j
= 1; j
< UPRV_LENGTHOF(cases
); j
++) {
2765 sLen
= u_unescape(cases
[j
-1], source
, 256);
2767 tLen
= u_unescape(cases
[j
], target
, 256);
2769 doTest(collateObject
, source
, target
, UCOL_EQUAL
);
2771 ucol_close(collateObject
);
2775 static int32_t TestEqualsForCollator(const char* locName
, UCollator
*source
, UCollator
*target
) {
2776 UErrorCode status
= U_ZERO_ERROR
;
2777 int32_t errorNo
= 0;
2778 const UChar
*sourceRules
= NULL
;
2779 int32_t sourceRulesLen
= 0;
2780 UParseError parseError
;
2781 UColAttributeValue french
= UCOL_OFF
;
2783 if(!ucol_equals(source
, target
)) {
2784 log_err("Same collators, different address not equal\n");
2788 if(uprv_strcmp(locName
, ucol_getLocaleByType(source
, ULOC_ACTUAL_LOCALE
, &status
)) == 0) {
2789 target
= ucol_safeClone(source
, NULL
, NULL
, &status
);
2790 if(U_FAILURE(status
)) {
2791 log_err("Error creating clone\n");
2795 if(!ucol_equals(source
, target
)) {
2796 log_err("Collator different from it's clone\n");
2799 french
= ucol_getAttribute(source
, UCOL_FRENCH_COLLATION
, &status
);
2800 if(french
== UCOL_ON
) {
2801 ucol_setAttribute(target
, UCOL_FRENCH_COLLATION
, UCOL_OFF
, &status
);
2803 ucol_setAttribute(target
, UCOL_FRENCH_COLLATION
, UCOL_ON
, &status
);
2805 if(U_FAILURE(status
)) {
2806 log_err("Error setting attributes\n");
2810 if(ucol_equals(source
, target
)) {
2811 log_err("Collators same even when options changed\n");
2816 sourceRules
= ucol_getRules(source
, &sourceRulesLen
);
2817 target
= ucol_openRules(sourceRules
, sourceRulesLen
, UCOL_DEFAULT
, UCOL_DEFAULT
, &parseError
, &status
);
2818 if(U_FAILURE(status
)) {
2819 log_err("Error instantiating target from rules - %s\n", u_errorName(status
));
2823 /* Note: The tailoring rule string is an optional data item. */
2824 if(!ucol_equals(source
, target
) && sourceRulesLen
!= 0) {
2825 log_err("Collator different from collator that was created from the same rules\n");
2834 static void TestEquals(void) {
2835 /* ucol_equals is not currently a public API. There is a chance that it will become
2836 * something like this.
2838 /* test whether the two collators instantiated from the same locale are equal */
2839 UErrorCode status
= U_ZERO_ERROR
;
2840 UParseError parseError
;
2841 int32_t noOfLoc
= uloc_countAvailable();
2842 const char *locName
= NULL
;
2843 UCollator
*source
= NULL
, *target
= NULL
;
2846 const char* rules
[] = {
2847 "&l < lj <<< Lj <<< LJ",
2848 "&n < nj <<< Nj <<< NJ",
2853 const char* badRules[] = {
2855 "&n < nj <<< nJ <<< NJ",
2857 "&AE <<< \\u00c4 <<< x"
2861 UChar sourceRules
[1024], targetRules
[1024];
2862 int32_t sourceRulesSize
= 0, targetRulesSize
= 0;
2863 int32_t rulesSize
= UPRV_LENGTHOF(rules
);
2865 for(i
= 0; i
< rulesSize
; i
++) {
2866 sourceRulesSize
+= u_unescape(rules
[i
], sourceRules
+sourceRulesSize
, 1024 - sourceRulesSize
);
2867 targetRulesSize
+= u_unescape(rules
[rulesSize
-i
-1], targetRules
+targetRulesSize
, 1024 - targetRulesSize
);
2870 source
= ucol_openRules(sourceRules
, sourceRulesSize
, UCOL_DEFAULT
, UCOL_DEFAULT
, &parseError
, &status
);
2871 if(status
== U_FILE_ACCESS_ERROR
) {
2872 log_data_err("Is your data around?\n");
2874 } else if(U_FAILURE(status
)) {
2875 log_err("Error opening collator\n");
2878 target
= ucol_openRules(targetRules
, targetRulesSize
, UCOL_DEFAULT
, UCOL_DEFAULT
, &parseError
, &status
);
2879 if(!ucol_equals(source
, target
)) {
2880 log_err("Equivalent collators not equal!\n");
2885 source
= ucol_open("root", &status
);
2886 target
= ucol_open("root", &status
);
2887 log_verbose("Testing root\n");
2888 if(!ucol_equals(source
, source
)) {
2889 log_err("Same collator not equal\n");
2891 if(TestEqualsForCollator("root", source
, target
)) {
2892 log_err("Errors for root\n");
2896 for(i
= 0; i
<noOfLoc
; i
++) {
2897 status
= U_ZERO_ERROR
;
2898 locName
= uloc_getAvailable(i
);
2899 /*if(hasCollationElements(locName)) {*/
2900 log_verbose("Testing equality for locale %s\n", locName
);
2901 source
= ucol_open(locName
, &status
);
2902 target
= ucol_open(locName
, &status
);
2903 if (U_FAILURE(status
)) {
2904 log_err("Error opening collator for locale %s %s\n", locName
, u_errorName(status
));
2907 if(TestEqualsForCollator(locName
, source
, target
)) {
2908 log_err("Errors for locale %s\n", locName
);
2915 static void TestJ2726(void) {
2916 UChar a
[2] = { 0x61, 0x00 }; /*"a"*/
2917 UChar aSpace
[3] = { 0x61, 0x20, 0x00 }; /*"a "*/
2918 UChar spaceA
[3] = { 0x20, 0x61, 0x00 }; /*" a"*/
2919 UErrorCode status
= U_ZERO_ERROR
;
2920 UCollator
*coll
= ucol_open("en", &status
);
2921 ucol_setAttribute(coll
, UCOL_ALTERNATE_HANDLING
, UCOL_SHIFTED
, &status
);
2922 ucol_setAttribute(coll
, UCOL_STRENGTH
, UCOL_PRIMARY
, &status
);
2923 doTest(coll
, a
, aSpace
, UCOL_EQUAL
);
2924 doTest(coll
, aSpace
, a
, UCOL_EQUAL
);
2925 doTest(coll
, a
, spaceA
, UCOL_EQUAL
);
2926 doTest(coll
, spaceA
, a
, UCOL_EQUAL
);
2927 doTest(coll
, spaceA
, aSpace
, UCOL_EQUAL
);
2928 doTest(coll
, aSpace
, spaceA
, UCOL_EQUAL
);
2932 static void NullRule(void) {
2934 UErrorCode status
= U_ZERO_ERROR
;
2935 UCollator
*coll
= ucol_openRules(r
, 1, UCOL_DEFAULT
, UCOL_DEFAULT
, NULL
, &status
);
2936 if(U_SUCCESS(status
)) {
2937 log_err("This should have been an error!\n");
2940 status
= U_ZERO_ERROR
;
2942 coll
= ucol_openRules(r
, 0, UCOL_DEFAULT
, UCOL_DEFAULT
, NULL
, &status
);
2943 if(U_FAILURE(status
)) {
2944 log_err_status(status
, "Empty rules should have produced a valid collator -> %s\n", u_errorName(status
));
2951 * Test for CollationElementIterator previous and next for the whole set of
2952 * unicode characters with normalization on.
2954 static void TestNumericCollation(void)
2956 UErrorCode status
= U_ZERO_ERROR
;
2958 const static char *basicTestStrings
[]={
2971 const static char *preZeroTestStrings
[]={
2979 "avery000000010000",
2982 const static char *thirtyTwoBitNumericStrings
[]={
2989 const static char *longNumericStrings
[]={
2990 /* Some of these sort out of the order that would expected if digits-as-numbers handled arbitrarily-long digit strings.
2991 In fact, a single collation element can represent a maximum of 254 digits as a number. Digit strings longer than that
2992 are treated as multiple collation elements. */
2993 "num9234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123z", /*253digits, num + 9.23E252 + z */
2994 "num10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*254digits, num + 1.00E253 */
2995 "num100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*255digits, num + 1.00E253 + 0, out of numeric order but expected */
2996 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 1.23E253 */
2997 "num123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345", /*255digits, num + 1.23E253 + 5 */
2998 "num1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456", /*256digits, num + 1.23E253 + 56 */
2999 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567", /*257digits, num + 1.23E253 + 567 */
3000 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 1.23E253 + a, out of numeric order but expected */
3001 "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 9.23E253, out of numeric order but expected */
3002 "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 9.23E253 + a, out of numeric order but expected */
3005 const static char *supplementaryDigits
[] = {
3006 "\\uD835\\uDFCE", /* 0 */
3007 "\\uD835\\uDFCF", /* 1 */
3008 "\\uD835\\uDFD0", /* 2 */
3009 "\\uD835\\uDFD1", /* 3 */
3010 "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */
3011 "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */
3012 "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */
3013 "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */
3014 "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */
3015 "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */
3018 const static char *foreignDigits
[] = {
3033 const static char *evenZeroes
[] = {
3040 UColAttribute att
= UCOL_NUMERIC_COLLATION
;
3041 UColAttributeValue val
= UCOL_ON
;
3043 /* Open our collator. */
3044 UCollator
* coll
= ucol_open("root", &status
);
3045 if (U_FAILURE(status
)){
3046 log_err_status(status
, "ERROR: in using ucol_open() -> %s\n",
3047 myErrorName(status
));
3050 genericLocaleStarterWithOptions("root", basicTestStrings
, UPRV_LENGTHOF(basicTestStrings
), &att
, &val
, 1);
3051 genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings
, UPRV_LENGTHOF(thirtyTwoBitNumericStrings
), &att
, &val
, 1);
3052 genericLocaleStarterWithOptions("root", longNumericStrings
, UPRV_LENGTHOF(longNumericStrings
), &att
, &val
, 1);
3053 genericLocaleStarterWithOptions("en_US", foreignDigits
, UPRV_LENGTHOF(foreignDigits
), &att
, &val
, 1);
3054 genericLocaleStarterWithOptions("root", supplementaryDigits
, UPRV_LENGTHOF(supplementaryDigits
), &att
, &val
, 1);
3055 genericLocaleStarterWithOptions("root", evenZeroes
, UPRV_LENGTHOF(evenZeroes
), &att
, &val
, 1);
3057 /* Setting up our collator to do digits. */
3058 ucol_setAttribute(coll
, UCOL_NUMERIC_COLLATION
, UCOL_ON
, &status
);
3059 if (U_FAILURE(status
)){
3060 log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n",
3061 myErrorName(status
));
3066 Testing that prepended zeroes still yield the correct collation behavior.
3067 We expect that every element in our strings array will be equal.
3069 genericOrderingTestWithResult(coll
, preZeroTestStrings
, UPRV_LENGTHOF(preZeroTestStrings
), UCOL_EQUAL
);
3074 static void TestTibetanConformance(void)
3076 const char* test
[] = {
3077 "\\u0FB2\\u0591\\u0F71\\u0061",
3078 "\\u0FB2\\u0F71\\u0061"
3081 UErrorCode status
= U_ZERO_ERROR
;
3082 UCollator
*coll
= ucol_open("", &status
);
3086 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
3087 if (U_SUCCESS(status
)) {
3088 u_unescape(test
[0], source
, 100);
3089 u_unescape(test
[1], target
, 100);
3090 doTest(coll
, source
, target
, UCOL_EQUAL
);
3091 result
= ucol_strcoll(coll
, source
, -1, target
, -1);
3092 log_verbose("result %d\n", result
);
3093 if (UCOL_EQUAL
!= result
) {
3094 log_err("Tibetan comparison error\n");
3099 genericLocaleStarterWithResult("", test
, 2, UCOL_EQUAL
);
3102 static void TestPinyinProblem(void) {
3103 static const char *test
[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B50" };
3104 genericLocaleStarter("zh__PINYIN", test
, UPRV_LENGTHOF(test
));
3108 * Iterate through the given iterator, checking to see that all the strings
3109 * in the expected array are present.
3110 * @param expected array of strings we expect to see, or NULL
3111 * @param expectedCount number of elements of expected, or 0
3113 static int32_t checkUEnumeration(const char* msg
,
3115 const char** expected
,
3116 int32_t expectedCount
) {
3117 UErrorCode ec
= U_ZERO_ERROR
;
3118 int32_t i
= 0, n
, j
, bit
;
3119 int32_t seenMask
= 0;
3121 U_ASSERT(expectedCount
>= 0 && expectedCount
< 31); /* [sic] 31 not 32 */
3122 n
= uenum_count(iter
, &ec
);
3123 if (!assertSuccess("count", &ec
)) return -1;
3124 log_verbose("%s = [", msg
);
3126 const char* s
= uenum_next(iter
, NULL
, &ec
);
3127 if (!assertSuccess("snext", &ec
) || s
== NULL
) break;
3128 if (i
!= 0) log_verbose(",");
3129 log_verbose("%s", s
);
3130 /* check expected list */
3131 for (j
=0, bit
=1; j
<expectedCount
; ++j
, bit
<<=1) {
3132 if ((seenMask
&bit
) == 0 &&
3133 uprv_strcmp(s
, expected
[j
]) == 0) {
3139 log_verbose("] (%d)\n", i
);
3140 assertTrue("count verified", i
==n
);
3141 /* did we see all expected strings? */
3142 for (j
=0, bit
=1; j
<expectedCount
; ++j
, bit
<<=1) {
3143 if ((seenMask
&bit
)!=0) {
3144 log_verbose("Ok: \"%s\" seen\n", expected
[j
]);
3146 log_err("FAIL: \"%s\" not seen\n", expected
[j
]);
3153 * Test new API added for separate collation tree.
3155 static void TestSeparateTrees(void) {
3156 UErrorCode ec
= U_ZERO_ERROR
;
3157 UEnumeration
*e
= NULL
;
3162 static const char* AVAIL
[] = { "en", "de" };
3164 static const char* KW
[] = { "collation" };
3166 static const char* KWVAL
[] = { "phonebook", "stroke" };
3168 #if !UCONFIG_NO_SERVICE
3169 e
= ucol_openAvailableLocales(&ec
);
3171 assertSuccess("ucol_openAvailableLocales", &ec
);
3172 assertTrue("ucol_openAvailableLocales!=0", e
!=0);
3173 n
= checkUEnumeration("ucol_openAvailableLocales", e
, AVAIL
, UPRV_LENGTHOF(AVAIL
));
3174 (void)n
; /* Suppress set but not used warnings. */
3175 /* Don't need to check n because we check list */
3178 log_data_err("Error calling ucol_openAvailableLocales() -> %s (Are you missing data?)\n", u_errorName(ec
));
3182 e
= ucol_getKeywords(&ec
);
3184 assertSuccess("ucol_getKeywords", &ec
);
3185 assertTrue("ucol_getKeywords!=0", e
!=0);
3186 n
= checkUEnumeration("ucol_getKeywords", e
, KW
, UPRV_LENGTHOF(KW
));
3187 /* Don't need to check n because we check list */
3190 log_data_err("Error calling ucol_getKeywords() -> %s (Are you missing data?)\n", u_errorName(ec
));
3193 e
= ucol_getKeywordValues(KW
[0], &ec
);
3195 assertSuccess("ucol_getKeywordValues", &ec
);
3196 assertTrue("ucol_getKeywordValues!=0", e
!=0);
3197 n
= checkUEnumeration("ucol_getKeywordValues", e
, KWVAL
, UPRV_LENGTHOF(KWVAL
));
3198 /* Don't need to check n because we check list */
3201 log_data_err("Error calling ucol_getKeywordValues() -> %s (Are you missing data?)\n", u_errorName(ec
));
3204 /* Try setting a warning before calling ucol_getKeywordValues */
3205 ec
= U_USING_FALLBACK_WARNING
;
3206 e
= ucol_getKeywordValues(KW
[0], &ec
);
3207 if (assertSuccess("ucol_getKeywordValues [with warning code set]", &ec
)) {
3208 assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e
!=0);
3209 n
= checkUEnumeration("ucol_getKeywordValues [with warning code set]", e
, KWVAL
, UPRV_LENGTHOF(KWVAL
));
3210 /* Don't need to check n because we check list */
3215 U_DRAFT int32_t U_EXPORT2
3216 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
3217 const char* locale, UBool* isAvailable,
3218 UErrorCode* status);
3221 n
= ucol_getFunctionalEquivalent(loc
, sizeof(loc
), "collation", "de",
3223 if (assertSuccess("getFunctionalEquivalent", &ec
)) {
3224 assertEquals("getFunctionalEquivalent(de)", "root", loc
);
3225 assertTrue("getFunctionalEquivalent(de).isAvailable==TRUE",
3226 isAvailable
== TRUE
);
3229 n
= ucol_getFunctionalEquivalent(loc
, sizeof(loc
), "collation", "de_DE",
3231 if (assertSuccess("getFunctionalEquivalent", &ec
)) {
3232 assertEquals("getFunctionalEquivalent(de_DE)", "root", loc
);
3233 assertTrue("getFunctionalEquivalent(de_DE).isAvailable==FALSE",
3234 isAvailable
== FALSE
);
3238 /* supercedes TestJ784 */
3239 static void TestBeforePinyin(void) {
3240 const static char rules
[] = {
3241 "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<\\u00E0<<<\\u00C0"
3242 "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<\\u00E8<<<\\u00C8"
3243 "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<\\u00EC<<<\\u00CC"
3244 "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<\\u00F2<<<\\u00D2"
3245 "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<<\\u00F9<<<\\u00D9"
3246 "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<<\\u01DB<<\\u00FC"
3249 const static char *test
[] = {
3260 const static char *test2
[] = {
3293 const static char *test3
[] = { // rdar://53741390
3294 "\\u85CF", // 藏 cáng
3295 "\\u92BA", // 銺 zàng
3296 "\\u85CF\\u6587", // 藏文 zàngwén
3297 "\\u85CF\\u8BED", // 藏语 zàngyǔ
3298 "\\u81D3", // 臓 zàng
3301 genericRulesStarter(rules
, test
, UPRV_LENGTHOF(test
));
3302 genericLocaleStarter("zh", test
, UPRV_LENGTHOF(test
));
3303 genericRulesStarter(rules
, test2
, UPRV_LENGTHOF(test2
));
3304 genericLocaleStarter("zh", test2
, UPRV_LENGTHOF(test2
));
3305 genericLocaleStarter("zh", test3
, UPRV_LENGTHOF(test3
));
3308 static void TestBeforeTightening(void) {
3309 static const struct {
3311 UErrorCode expectedStatus
;
3313 { "&[before 1]a<x", U_ZERO_ERROR
},
3314 { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR
},
3315 { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR
},
3316 { "&[before 1]a=x", U_INVALID_FORMAT_ERROR
},
3317 { "&[before 2]a<x",U_INVALID_FORMAT_ERROR
},
3318 { "&[before 2]a<<x",U_ZERO_ERROR
},
3319 { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR
},
3320 { "&[before 2]a=x",U_INVALID_FORMAT_ERROR
},
3321 { "&[before 3]a<x",U_INVALID_FORMAT_ERROR
},
3322 { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR
},
3323 { "&[before 3]a<<<x",U_ZERO_ERROR
},
3324 { "&[before 3]a=x",U_INVALID_FORMAT_ERROR
},
3325 { "&[before I]a = x",U_INVALID_FORMAT_ERROR
}
3330 UErrorCode status
= U_ZERO_ERROR
;
3331 UChar rlz
[RULE_BUFFER_LEN
] = { 0 };
3334 UCollator
*coll
= NULL
;
3337 for(i
= 0; i
< UPRV_LENGTHOF(tests
); i
++) {
3338 rlen
= u_unescape(tests
[i
].rules
, rlz
, RULE_BUFFER_LEN
);
3339 coll
= ucol_openRules(rlz
, rlen
, UCOL_DEFAULT
, UCOL_DEFAULT
,NULL
, &status
);
3340 if(status
!= tests
[i
].expectedStatus
) {
3341 log_err_status(status
, "Opening a collator with rules %s returned error code %s, expected %s\n",
3342 tests
[i
].rules
, u_errorName(status
), u_errorName(tests
[i
].expectedStatus
));
3345 status
= U_ZERO_ERROR
;
3352 &[before 1] a < x <<< X << q <<< Q < z
3353 assert: m <<< M < x <<< X << q <<< Q < z < a < n
3356 &[before 2] a << x <<< X << q <<< Q < z
3357 assert: m <<< M < x <<< X << q <<< Q << a < z < n
3360 &[before 3] a <<< x <<< X << q <<< Q < z
3361 assert: m <<< M < x <<< X <<< a << q <<< Q < z < n
3365 &[before 1] a < x <<< X << q <<< Q < z
3366 assert: x <<< X << q <<< Q < z < m <<< M << a < n
3369 &[before 2] a << x <<< X << q <<< Q < z
3370 assert: m <<< M << x <<< X << q <<< Q << a < z < n
3373 &[before 3] a <<< x <<< X << q <<< Q < z
3374 assert: m <<< M << x <<< X <<< a << q <<< Q < z < n
3378 &[before 1] a < x <<< X << q <<< Q < z
3379 assert: x <<< X << q <<< Q < z < n < m <<< a <<< M
3382 &[before 2] a << x <<< X << q <<< Q < z
3383 assert: x <<< X << q <<< Q << m <<< a <<< M < z < n
3386 &[before 3] a <<< x <<< X << q <<< Q < z
3387 assert: m <<< x <<< X <<< a <<< M << q <<< Q < z < n
3390 &[before 1] s < x <<< X << q <<< Q < z
3391 assert: r <<< R < x <<< X << q <<< Q < z < s < n
3393 &[before 2] s << x <<< X << q <<< Q < z
3394 assert: r <<< R < x <<< X << q <<< Q << s < z < n
3396 &[before 3] s <<< x <<< X << q <<< Q < z
3397 assert: r <<< R < x <<< X <<< s << q <<< Q < z < n
3400 &[before 1] \u24DC < x <<< X << q <<< Q < z
3401 assert: x <<< X << q <<< Q < z < n < m <<< \u24DC <<< M
3403 &[before 2] \u24DC << x <<< X << q <<< Q < z
3404 assert: x <<< X << q <<< Q << m <<< \u24DC <<< M < z < n
3406 &[before 3] \u24DC <<< x <<< X << q <<< Q < z
3407 assert: m <<< x <<< X <<< \u24DC <<< M << q <<< Q < z < n
3412 /* requires features not yet supported */
3413 static void TestMoreBefore(void) {
3414 static const struct {
3416 const char* order
[16];
3419 { "&m < a &[before 1] a < x <<< X << q <<< Q < z",
3420 { "m","M","x","X","q","Q","z","a","n" }, 9},
3421 { "&m < a &[before 2] a << x <<< X << q <<< Q < z",
3422 { "m","M","x","X","q","Q","a","z","n" }, 9},
3423 { "&m < a &[before 3] a <<< x <<< X << q <<< Q < z",
3424 { "m","M","x","X","a","q","Q","z","n" }, 9},
3425 { "&m << a &[before 1] a < x <<< X << q <<< Q < z",
3426 { "x","X","q","Q","z","m","M","a","n" }, 9},
3427 { "&m << a &[before 2] a << x <<< X << q <<< Q < z",
3428 { "m","M","x","X","q","Q","a","z","n" }, 9},
3429 { "&m << a &[before 3] a <<< x <<< X << q <<< Q < z",
3430 { "m","M","x","X","a","q","Q","z","n" }, 9},
3431 { "&m <<< a &[before 1] a < x <<< X << q <<< Q < z",
3432 { "x","X","q","Q","z","n","m","a","M" }, 9},
3433 { "&m <<< a &[before 2] a << x <<< X << q <<< Q < z",
3434 { "x","X","q","Q","m","a","M","z","n" }, 9},
3435 { "&m <<< a &[before 3] a <<< x <<< X << q <<< Q < z",
3436 { "m","x","X","a","M","q","Q","z","n" }, 9},
3437 { "&[before 1] s < x <<< X << q <<< Q < z",
3438 { "r","R","x","X","q","Q","z","s","n" }, 9},
3439 { "&[before 2] s << x <<< X << q <<< Q < z",
3440 { "r","R","x","X","q","Q","s","z","n" }, 9},
3441 { "&[before 3] s <<< x <<< X << q <<< Q < z",
3442 { "r","R","x","X","s","q","Q","z","n" }, 9},
3443 { "&[before 1] \\u24DC < x <<< X << q <<< Q < z",
3444 { "x","X","q","Q","z","n","m","\\u24DC","M" }, 9},
3445 { "&[before 2] \\u24DC << x <<< X << q <<< Q < z",
3446 { "x","X","q","Q","m","\\u24DC","M","z","n" }, 9},
3447 { "&[before 3] \\u24DC <<< x <<< X << q <<< Q < z",
3448 { "m","x","X","\\u24DC","M","q","Q","z","n" }, 9}
3453 for(i
= 0; i
< UPRV_LENGTHOF(tests
); i
++) {
3454 genericRulesStarter(tests
[i
].rules
, tests
[i
].order
, tests
[i
].size
);
3459 static void TestTailorNULL( void ) {
3460 const static char* rule
= "&a <<< '\\u0000'";
3461 UErrorCode status
= U_ZERO_ERROR
;
3462 UChar rlz
[RULE_BUFFER_LEN
] = { 0 };
3464 UChar a
= 1, null
= 0;
3465 UCollationResult res
= UCOL_EQUAL
;
3467 UCollator
*coll
= NULL
;
3470 rlen
= u_unescape(rule
, rlz
, RULE_BUFFER_LEN
);
3471 coll
= ucol_openRules(rlz
, rlen
, UCOL_DEFAULT
, UCOL_DEFAULT
,NULL
, &status
);
3473 if(U_FAILURE(status
)) {
3474 log_err_status(status
, "Could not open default collator! -> %s\n", u_errorName(status
));
3476 res
= ucol_strcoll(coll
, &a
, 1, &null
, 1);
3478 if(res
!= UCOL_LESS
) {
3479 log_err("NULL was not tailored properly!\n");
3487 TestUpperFirstQuaternary(void)
3489 const char* tests
[] = { "B", "b", "Bb", "bB" };
3490 UColAttribute att
[] = { UCOL_STRENGTH
, UCOL_CASE_FIRST
};
3491 UColAttributeValue attVals
[] = { UCOL_QUATERNARY
, UCOL_UPPER_FIRST
};
3492 genericLocaleStarterWithOptions("root", tests
, UPRV_LENGTHOF(tests
), att
, attVals
, UPRV_LENGTHOF(att
));
3498 const char* tests
[] = { "\\u00e2T", "aT" };
3499 UColAttribute att
[] = { UCOL_STRENGTH
, UCOL_CASE_LEVEL
};
3500 UColAttributeValue attVals
[] = { UCOL_PRIMARY
, UCOL_ON
};
3501 const char* tests2
[] = { "a", "A" };
3502 const char* rule
= "&[first tertiary ignorable]=A=a";
3503 UColAttribute att2
[] = { UCOL_CASE_LEVEL
};
3504 UColAttributeValue attVals2
[] = { UCOL_ON
};
3505 /* Test whether we correctly ignore primary ignorables on case level when */
3506 /* we have only primary & case level */
3507 genericLocaleStarterWithOptionsAndResult("root", tests
, UPRV_LENGTHOF(tests
), att
, attVals
, UPRV_LENGTHOF(att
), UCOL_EQUAL
);
3508 /* Test whether ICU4J will make case level for sortkeys that have primary strength */
3509 /* and case level */
3510 genericLocaleStarterWithOptions("root", tests2
, UPRV_LENGTHOF(tests2
), att
, attVals
, UPRV_LENGTHOF(att
));
3511 /* Test whether completely ignorable letters have case level info (they shouldn't) */
3512 genericRulesStarterWithOptionsAndResult(rule
, tests2
, UPRV_LENGTHOF(tests2
), att2
, attVals2
, UPRV_LENGTHOF(att2
), UCOL_EQUAL
);
3518 static const char *test
= "this is a test string";
3520 int32_t ustr_length
= u_unescape(test
, ustr
, 256);
3521 unsigned char sortkey
[256];
3522 int32_t sortkey_length
;
3523 UErrorCode status
= U_ZERO_ERROR
;
3524 static UCollator
*coll
= NULL
;
3525 coll
= ucol_open("root", &status
);
3526 if(U_FAILURE(status
)) {
3527 log_err_status(status
, "Couldn't open UCA -> %s\n", u_errorName(status
));
3530 ucol_setStrength(coll
, UCOL_PRIMARY
);
3531 ucol_setAttribute(coll
, UCOL_STRENGTH
, UCOL_PRIMARY
, &status
);
3532 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
3533 if (U_FAILURE(status
)) {
3534 log_err("Failed setting atributes\n");
3537 sortkey_length
= ucol_getSortKey(coll
, ustr
, ustr_length
, NULL
, 0);
3538 if (sortkey_length
> 256) return;
3540 /* we mark the position where the null byte should be written in advance */
3541 sortkey
[sortkey_length
-1] = 0xAA;
3543 /* we set the buffer size one byte higher than needed */
3544 sortkey_length
= ucol_getSortKey(coll
, ustr
, ustr_length
, sortkey
,
3547 /* no error occurs (for me) */
3548 if (sortkey
[sortkey_length
-1] == 0xAA) {
3549 log_err("Hit bug at first try\n");
3552 /* we mark the position where the null byte should be written again */
3553 sortkey
[sortkey_length
-1] = 0xAA;
3555 /* this time we set the buffer size to the exact amount needed */
3556 sortkey_length
= ucol_getSortKey(coll
, ustr
, ustr_length
, sortkey
,
3559 /* now the trailing null byte is not written */
3560 if (sortkey
[sortkey_length
-1] == 0xAA) {
3561 log_err("Hit bug at second try\n");
3567 /* Regression test for Thai partial sort key problem */
3571 const static char *test
[] = {
3572 "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e47\\u0e21",
3573 "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e48\\u0e21"
3576 genericLocaleStarter("th", test
, UPRV_LENGTHOF(test
));
3582 const static char *test
[] = { "a", "y" };
3583 const char* rules
= "&Ny << Y &[first secondary ignorable] <<< a";
3584 genericRulesStarter(rules
, test
, UPRV_LENGTHOF(test
));
3590 UErrorCode status
= U_ZERO_ERROR
;
3592 UCollator
*coll
=NULL
;
3593 uint8_t resColl
[100], expColl
[100];
3594 int32_t rLen
, tLen
, ruleLen
, sLen
, kLen
;
3595 UChar rule
[256]={0x26, 0x62, 0x3c, 0x1FF3, 0}; /* &b<0x1FF3-omega with Ypogegrammeni*/
3596 UChar rule2
[256]={0x26, 0x7a, 0x3c, 0x0161, 0}; /* &z<s with caron*/
3598 * Note: Just tailoring &z<ae^ does not work as expected:
3599 * The UCA spec requires for discontiguous contractions that they
3600 * extend an *existing match* by one combining mark at a time.
3601 * Therefore, ae must be a contraction so that the builder finds
3602 * discontiguous contractions for ae^, for example with an intervening underdot.
3603 * Only then do we get the expected tail closure with a\u1EC7, a\u1EB9\u0302, etc.
3606 0x26, 0x78, 0x3c, 0x61, 0x65, /* &x<ae */
3607 0x26, 0x7a, 0x3c, 0x0061, 0x00ea, /* &z<a+e with circumflex.*/
3609 static const UChar tData
[][20]={
3611 {0x0041, 0x0323, 0x0302, 0},
3612 {0x1EA0, 0x0302, 0},
3613 {0x00C2, 0x0323, 0},
3614 {0x1ED8, 0}, /* O with dot and circumflex */
3615 {0x1ECC, 0x0302, 0},
3617 {0x1EA1, 0x0306, 0},
3619 static const UChar tailorData
[][20]={
3620 {0x1FA2, 0}, /* Omega with 3 combining marks */
3621 {0x03C9, 0x0313, 0x0300, 0x0345, 0},
3622 {0x1FF3, 0x0313, 0x0300, 0},
3623 {0x1F60, 0x0300, 0x0345, 0},
3624 {0x1F62, 0x0345, 0},
3625 {0x1FA0, 0x0300, 0},
3627 static const UChar tailorData2
[][20]={
3628 {0x1E63, 0x030C, 0}, /* s with dot below + caron */
3629 {0x0073, 0x0323, 0x030C, 0},
3630 {0x0073, 0x030C, 0x0323, 0},
3632 static const UChar tailorData3
[][20]={
3633 {0x007a, 0}, /* z */
3634 {0x0061, 0x0065, 0}, /* a + e */
3635 {0x0061, 0x00ea, 0}, /* a + e with circumflex */
3636 {0x0061, 0x1EC7, 0}, /* a+ e with dot below and circumflex */
3637 {0x0061, 0x1EB9, 0x0302, 0}, /* a + e with dot below + combining circumflex */
3638 {0x0061, 0x00EA, 0x0323, 0}, /* a + e with circumflex + combining dot below */
3639 {0x00EA, 0x0323, 0}, /* e with circumflex + combining dot below */
3640 {0x00EA, 0}, /* e with circumflex */
3643 /* Test Vietnamese sort. */
3644 coll
= ucol_open("vi", &status
);
3645 if(U_FAILURE(status
)) {
3646 log_err_status(status
, "Couldn't open collator -> %s\n", u_errorName(status
));
3649 log_verbose("\n\nVI collation:");
3650 if ( !ucol_equal(coll
, tData
[0], u_strlen(tData
[0]), tData
[2], u_strlen(tData
[2])) ) {
3651 log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
3653 if ( !ucol_equal(coll
, tData
[0], u_strlen(tData
[0]), tData
[3], u_strlen(tData
[3])) ) {
3654 log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
3656 if ( !ucol_equal(coll
, tData
[5], u_strlen(tData
[5]), tData
[4], u_strlen(tData
[4])) ) {
3657 log_err("\\u1ED8 not equals to \\u1ECC+\\u0302\n");
3659 if ( !ucol_equal(coll
, tData
[7], u_strlen(tData
[7]), tData
[6], u_strlen(tData
[6])) ) {
3660 log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
3663 for (j
=0; j
<8; j
++) {
3664 tLen
= u_strlen(tData
[j
]);
3665 log_verbose("\n Data :%s \tlen: %d key: ", tData
[j
], tLen
);
3666 rLen
= ucol_getSortKey(coll
, tData
[j
], tLen
, resColl
, 100);
3667 for(i
= 0; i
<rLen
; i
++) {
3668 log_verbose(" %02X", resColl
[i
]);
3674 /* Test Romanian sort. */
3675 coll
= ucol_open("ro", &status
);
3676 log_verbose("\n\nRO collation:");
3677 if ( !ucol_equal(coll
, tData
[0], u_strlen(tData
[0]), tData
[1], u_strlen(tData
[1])) ) {
3678 log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
3680 if ( !ucol_equal(coll
, tData
[4], u_strlen(tData
[4]), tData
[5], u_strlen(tData
[5])) ) {
3681 log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
3683 if ( !ucol_equal(coll
, tData
[6], u_strlen(tData
[6]), tData
[7], u_strlen(tData
[7])) ) {
3684 log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
3687 for (j
=4; j
<8; j
++) {
3688 tLen
= u_strlen(tData
[j
]);
3689 log_verbose("\n Data :%s \tlen: %d key: ", tData
[j
], tLen
);
3690 rLen
= ucol_getSortKey(coll
, tData
[j
], tLen
, resColl
, 100);
3691 for(i
= 0; i
<rLen
; i
++) {
3692 log_verbose(" %02X", resColl
[i
]);
3697 /* Test the precomposed Greek character with 3 combining marks. */
3698 log_verbose("\n\nTailoring test: Greek character with 3 combining marks");
3699 ruleLen
= u_strlen(rule
);
3700 coll
= ucol_openRules(rule
, ruleLen
, UCOL_OFF
, UCOL_TERTIARY
, NULL
,&status
);
3701 if (U_FAILURE(status
)) {
3702 log_err("ucol_openRules failed with %s\n", u_errorName(status
));
3705 sLen
= u_strlen(tailorData
[0]);
3706 for (j
=1; j
<6; j
++) {
3707 tLen
= u_strlen(tailorData
[j
]);
3708 if ( !ucol_equal(coll
, tailorData
[0], sLen
, tailorData
[j
], tLen
)) {
3709 log_err("\n \\u1FA2 not equals to data[%d]:%s\n", j
, tailorData
[j
]);
3712 /* Test getSortKey. */
3713 tLen
= u_strlen(tailorData
[0]);
3714 kLen
=ucol_getSortKey(coll
, tailorData
[0], tLen
, expColl
, 100);
3715 for (j
=0; j
<6; j
++) {
3716 tLen
= u_strlen(tailorData
[j
]);
3717 rLen
= ucol_getSortKey(coll
, tailorData
[j
], tLen
, resColl
, 100);
3718 if ( kLen
!=rLen
|| uprv_memcmp(expColl
, resColl
, rLen
*sizeof(uint8_t))!=0 ) {
3719 log_err("\n Data[%d] :%s \tlen: %d key: ", j
, tailorData
[j
], tLen
);
3720 for(i
= 0; i
<rLen
; i
++) {
3721 log_err(" %02X", resColl
[i
]);
3727 log_verbose("\n\nTailoring test for s with caron:");
3728 ruleLen
= u_strlen(rule2
);
3729 coll
= ucol_openRules(rule2
, ruleLen
, UCOL_OFF
, UCOL_TERTIARY
, NULL
,&status
);
3730 tLen
= u_strlen(tailorData2
[0]);
3731 kLen
=ucol_getSortKey(coll
, tailorData2
[0], tLen
, expColl
, 100);
3732 for (j
=1; j
<3; j
++) {
3733 tLen
= u_strlen(tailorData2
[j
]);
3734 rLen
= ucol_getSortKey(coll
, tailorData2
[j
], tLen
, resColl
, 100);
3735 if ( kLen
!=rLen
|| uprv_memcmp(expColl
, resColl
, rLen
*sizeof(uint8_t))!=0 ) {
3736 log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j
, tailorData
[j
], tLen
);
3737 for(i
= 0; i
<rLen
; i
++) {
3738 log_err(" %02X", resColl
[i
]);
3744 log_verbose("\n\nTailoring test for &z< ae with circumflex:");
3745 ruleLen
= u_strlen(rule3
);
3746 coll
= ucol_openRules(rule3
, ruleLen
, UCOL_OFF
, UCOL_TERTIARY
, NULL
,&status
);
3747 tLen
= u_strlen(tailorData3
[3]);
3748 kLen
=ucol_getSortKey(coll
, tailorData3
[3], tLen
, expColl
, 100);
3749 log_verbose("\n Test Data[3] :%s \tlen: %d key: ", aescstrdup(tailorData3
[3], tLen
), tLen
);
3750 for(i
= 0; i
<kLen
; i
++) {
3751 log_verbose(" %02X", expColl
[i
]);
3753 for (j
=4; j
<6; j
++) {
3754 tLen
= u_strlen(tailorData3
[j
]);
3755 rLen
= ucol_getSortKey(coll
, tailorData3
[j
], tLen
, resColl
, 100);
3757 if ( kLen
!=rLen
|| uprv_memcmp(expColl
, resColl
, rLen
*sizeof(uint8_t))!=0 ) {
3758 log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j
, aescstrdup(tailorData3
[j
], tLen
), tLen
);
3759 for(i
= 0; i
<rLen
; i
++) {
3760 log_err(" %02X", resColl
[i
]);
3764 log_verbose("\n Test Data[%d] :%s \tlen: %d key: ", j
, aescstrdup(tailorData3
[j
], tLen
), tLen
);
3765 for(i
= 0; i
<rLen
; i
++) {
3766 log_verbose(" %02X", resColl
[i
]);
3773 TestTailor6179(void)
3775 UErrorCode status
= U_ZERO_ERROR
;
3777 UCollator
*coll
=NULL
;
3778 uint8_t resColl
[100];
3779 int32_t rLen
, tLen
, ruleLen
;
3780 /* &[last primary ignorable]<< a &[first primary ignorable]<<b */
3781 static const UChar rule1
[]={
3782 0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,
3783 0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x20,0x61,0x20,
3784 0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,0x20,
3785 0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x62,0x20, 0};
3786 /* &[last secondary ignorable]<<< a &[first secondary ignorable]<<<b */
3787 static const UChar rule2
[]={
3788 0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,0x64,0x61,
3789 0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x3C,
3790 0x61,0x20,0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,
3791 0x64,0x61,0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,
3792 0x3C,0x3C,0x20,0x62,0};
3794 static const UChar tData1
[][4]={
3799 static const UChar tData2
[][4]={
3806 * These values from FractionalUCA.txt will change,
3807 * and need to be updated here.
3808 * TODO: Make this not check for particular sort keys.
3809 * Instead, test that we get CEs before & after other ignorables; see ticket #6179.
3811 static const uint8_t firstPrimaryIgnCE
[]={1, 0x83, 1, 5, 0};
3812 static const uint8_t lastPrimaryIgnCE
[]={1, 0xFC, 1, 5, 0};
3813 static const uint8_t firstSecondaryIgnCE
[]={1, 1, 0xfe, 0};
3814 static const uint8_t lastSecondaryIgnCE
[]={1, 1, 0xff, 0};
3816 UParseError parseError
;
3818 /* Test [Last Primary ignorable] */
3820 log_verbose("Tailoring test: &[last primary ignorable]<<a &[first primary ignorable]<<b\n");
3821 ruleLen
= u_strlen(rule1
);
3822 coll
= ucol_openRules(rule1
, ruleLen
, UCOL_OFF
, UCOL_TERTIARY
, NULL
,&status
);
3823 if (U_FAILURE(status
)) {
3824 log_err_status(status
, "Tailoring test: &[last primary ignorable] failed! -> %s\n", u_errorName(status
));
3827 tLen
= u_strlen(tData1
[0]);
3828 rLen
= ucol_getSortKey(coll
, tData1
[0], tLen
, resColl
, 100);
3829 if (rLen
!= UPRV_LENGTHOF(lastPrimaryIgnCE
) || uprv_memcmp(resColl
, lastPrimaryIgnCE
, rLen
) != 0) {
3830 log_err("Bad result for &[lpi]<<a...: Data[%d] :%s \tlen: %d key: ", 0, tData1
[0], rLen
);
3831 for(i
= 0; i
<rLen
; i
++) {
3832 log_err(" %02X", resColl
[i
]);
3836 tLen
= u_strlen(tData1
[1]);
3837 rLen
= ucol_getSortKey(coll
, tData1
[1], tLen
, resColl
, 100);
3838 if (rLen
!= UPRV_LENGTHOF(firstPrimaryIgnCE
) || uprv_memcmp(resColl
, firstPrimaryIgnCE
, rLen
) != 0) {
3839 log_err("Bad result for &[lpi]<<a...: Data[%d] :%s \tlen: %d key: ", 1, tData1
[1], rLen
);
3840 for(i
= 0; i
<rLen
; i
++) {
3841 log_err(" %02X", resColl
[i
]);
3848 /* Test [Last Secondary ignorable] */
3849 log_verbose("Tailoring test: &[last secondary ignorable]<<<a &[first secondary ignorable]<<<b\n");
3850 ruleLen
= u_strlen(rule2
);
3851 coll
= ucol_openRules(rule2
, ruleLen
, UCOL_OFF
, UCOL_TERTIARY
, &parseError
, &status
);
3852 if (U_FAILURE(status
)) {
3853 log_err("Tailoring test: &[last secondary ignorable] failed! -> %s\n", u_errorName(status
));
3854 log_info(" offset=%d \"%s\" | \"%s\"\n",
3855 parseError
.offset
, aescstrdup(parseError
.preContext
, -1), aescstrdup(parseError
.postContext
, -1));
3858 tLen
= u_strlen(tData2
[0]);
3859 rLen
= ucol_getSortKey(coll
, tData2
[0], tLen
, resColl
, 100);
3860 if (rLen
!= UPRV_LENGTHOF(lastSecondaryIgnCE
) || uprv_memcmp(resColl
, lastSecondaryIgnCE
, rLen
) != 0) {
3861 log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s \tlen: %d key: ", 0, tData2
[0], rLen
);
3862 for(i
= 0; i
<rLen
; i
++) {
3863 log_err(" %02X", resColl
[i
]);
3867 tLen
= u_strlen(tData2
[1]);
3868 rLen
= ucol_getSortKey(coll
, tData2
[1], tLen
, resColl
, 100);
3869 if (rLen
!= UPRV_LENGTHOF(firstSecondaryIgnCE
) || uprv_memcmp(resColl
, firstSecondaryIgnCE
, rLen
) != 0) {
3870 log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s \tlen: %d key: ", 1, tData2
[1], rLen
);
3871 for(i
= 0; i
<rLen
; i
++) {
3872 log_err(" %02X", resColl
[i
]);
3880 TestUCAPrecontext(void)
3882 UErrorCode status
= U_ZERO_ERROR
;
3884 UCollator
*coll
=NULL
;
3885 uint8_t resColl
[100], prevColl
[100];
3886 int32_t rLen
, tLen
, ruleLen
;
3887 UChar rule1
[256]= {0x26, 0xb7, 0x3c, 0x61, 0}; /* & middle-dot < a */
3888 UChar rule2
[256]= {0x26, 0x4C, 0xb7, 0x3c, 0x3c, 0x61, 0};
3889 /* & l middle-dot << a a is an expansion. */
3891 UChar tData1
[][20]={
3892 { 0xb7, 0}, /* standalone middle dot(0xb7) */
3893 { 0x387, 0}, /* standalone middle dot(0x387) */
3896 { 0x4C, 0x0332, 0}, /* l with [first primary ignorable] */
3897 { 0x6C, 0xb7, 0}, /* l with middle dot(0xb7) */
3898 { 0x6C, 0x387, 0}, /* l with middle dot(0x387) */
3899 { 0x4C, 0xb7, 0}, /* L with middle dot(0xb7) */
3900 { 0x4C, 0x387, 0}, /* L with middle dot(0x387) */
3901 { 0x6C, 0x61, 0x387, 0}, /* la with middle dot(0x387) */
3902 { 0x4C, 0x61, 0xb7, 0}, /* La with middle dot(0xb7) */
3905 log_verbose("\n\nEN collation:");
3906 coll
= ucol_open("en", &status
);
3907 if (U_FAILURE(status
)) {
3908 log_err_status(status
, "Tailoring test: &z <<a|- failed! -> %s\n", u_errorName(status
));
3911 for (j
=0; j
<11; j
++) {
3912 tLen
= u_strlen(tData1
[j
]);
3913 rLen
= ucol_getSortKey(coll
, tData1
[j
], tLen
, resColl
, 100);
3914 if ((j
>0) && (strcmp((char *)resColl
, (char *)prevColl
)<0)) {
3915 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3918 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j
, tData1
[j
], rLen
);
3919 for(i
= 0; i
<rLen
; i
++) {
3920 log_verbose(" %02X", resColl
[i
]);
3922 uprv_memcpy(prevColl
, resColl
, sizeof(uint8_t)*(rLen
+1));
3927 log_verbose("\n\nJA collation:");
3928 coll
= ucol_open("ja", &status
);
3929 if (U_FAILURE(status
)) {
3930 log_err("Tailoring test: &z <<a|- failed!");
3933 for (j
=0; j
<11; j
++) {
3934 tLen
= u_strlen(tData1
[j
]);
3935 rLen
= ucol_getSortKey(coll
, tData1
[j
], tLen
, resColl
, 100);
3936 if ((j
>0) && (strcmp((char *)resColl
, (char *)prevColl
)<0)) {
3937 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3940 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j
, tData1
[j
], rLen
);
3941 for(i
= 0; i
<rLen
; i
++) {
3942 log_verbose(" %02X", resColl
[i
]);
3944 uprv_memcpy(prevColl
, resColl
, sizeof(uint8_t)*(rLen
+1));
3949 log_verbose("\n\nTailoring test: & middle dot < a ");
3950 ruleLen
= u_strlen(rule1
);
3951 coll
= ucol_openRules(rule1
, ruleLen
, UCOL_OFF
, UCOL_TERTIARY
, NULL
,&status
);
3952 if (U_FAILURE(status
)) {
3953 log_err("Tailoring test: & middle dot < a failed!");
3956 for (j
=0; j
<11; j
++) {
3957 tLen
= u_strlen(tData1
[j
]);
3958 rLen
= ucol_getSortKey(coll
, tData1
[j
], tLen
, resColl
, 100);
3959 if ((j
>0) && (strcmp((char *)resColl
, (char *)prevColl
)<0)) {
3960 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3963 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j
, tData1
[j
], rLen
);
3964 for(i
= 0; i
<rLen
; i
++) {
3965 log_verbose(" %02X", resColl
[i
]);
3967 uprv_memcpy(prevColl
, resColl
, sizeof(uint8_t)*(rLen
+1));
3972 log_verbose("\n\nTailoring test: & l middle-dot << a ");
3973 ruleLen
= u_strlen(rule2
);
3974 coll
= ucol_openRules(rule2
, ruleLen
, UCOL_OFF
, UCOL_TERTIARY
, NULL
,&status
);
3975 if (U_FAILURE(status
)) {
3976 log_err("Tailoring test: & l middle-dot << a failed!");
3979 for (j
=0; j
<11; j
++) {
3980 tLen
= u_strlen(tData1
[j
]);
3981 rLen
= ucol_getSortKey(coll
, tData1
[j
], tLen
, resColl
, 100);
3982 if ((j
>0) && (j
!=3) && (strcmp((char *)resColl
, (char *)prevColl
)<0)) {
3983 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3986 if ((j
==3)&&(strcmp((char *)resColl
, (char *)prevColl
)>0)) {
3987 log_err("\n Expecting smaller key than previous test case: Data[%d] :%s.",
3990 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j
, tData1
[j
], rLen
);
3991 for(i
= 0; i
<rLen
; i
++) {
3992 log_verbose(" %02X", resColl
[i
]);
3994 uprv_memcpy(prevColl
, resColl
, sizeof(uint8_t)*(rLen
+1));
4000 TestOutOfBuffer5468(void)
4002 static const char *test
= "\\u4e00";
4004 int32_t ustr_length
= u_unescape(test
, ustr
, 256);
4005 unsigned char shortKeyBuf
[1];
4006 int32_t sortkey_length
;
4007 UErrorCode status
= U_ZERO_ERROR
;
4008 static UCollator
*coll
= NULL
;
4010 coll
= ucol_open("root", &status
);
4011 if(U_FAILURE(status
)) {
4012 log_err_status(status
, "Couldn't open UCA -> %s\n", u_errorName(status
));
4015 ucol_setStrength(coll
, UCOL_PRIMARY
);
4016 ucol_setAttribute(coll
, UCOL_STRENGTH
, UCOL_PRIMARY
, &status
);
4017 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
4018 if (U_FAILURE(status
)) {
4019 log_err("Failed setting atributes\n");
4023 sortkey_length
= ucol_getSortKey(coll
, ustr
, ustr_length
, shortKeyBuf
, sizeof(shortKeyBuf
));
4024 if (sortkey_length
!= 4) {
4025 log_err("expecting length of sortKey is 4 got:%d ", sortkey_length
);
4027 log_verbose("length of sortKey is %d", sortkey_length
);
4031 #define TSKC_DATA_SIZE 5
4032 #define TSKC_BUF_SIZE 50
4034 TestSortKeyConsistency(void)
4036 UErrorCode icuRC
= U_ZERO_ERROR
;
4038 UChar data
[] = { 0xFFFD, 0x0006, 0x0006, 0x0006, 0xFFFD};
4040 uint8_t bufFull
[TSKC_DATA_SIZE
][TSKC_BUF_SIZE
];
4041 uint8_t bufPart
[TSKC_DATA_SIZE
][TSKC_BUF_SIZE
];
4044 ucol
= ucol_openFromShortString("LEN_S4", FALSE
, NULL
, &icuRC
);
4045 if (U_FAILURE(icuRC
))
4047 log_err_status(icuRC
, "ucol_openFromShortString failed -> %s\n", u_errorName(icuRC
));
4051 for (i
= 0; i
< TSKC_DATA_SIZE
; i
++)
4053 UCharIterator uiter
;
4054 uint32_t state
[2] = { 0, 0 };
4055 int32_t dataLen
= i
+1;
4056 for (j
=0; j
<TSKC_BUF_SIZE
; j
++)
4057 bufFull
[i
][j
] = bufPart
[i
][j
] = 0;
4060 ucol_getSortKey(ucol
, data
, dataLen
, bufFull
[i
], TSKC_BUF_SIZE
);
4062 /* Partial sort key */
4063 uiter_setString(&uiter
, data
, dataLen
);
4064 ucol_nextSortKeyPart(ucol
, &uiter
, state
, bufPart
[i
], TSKC_BUF_SIZE
, &icuRC
);
4065 if (U_FAILURE(icuRC
))
4067 log_err("ucol_nextSortKeyPart failed\n");
4072 for (i2
=0; i2
<i
; i2
++)
4074 UBool fullMatch
= TRUE
;
4075 UBool partMatch
= TRUE
;
4076 for (j
=0; j
<TSKC_BUF_SIZE
; j
++)
4078 fullMatch
= fullMatch
&& (bufFull
[i
][j
] != bufFull
[i2
][j
]);
4079 partMatch
= partMatch
&& (bufPart
[i
][j
] != bufPart
[i2
][j
]);
4081 if (fullMatch
!= partMatch
) {
4082 log_err(fullMatch
? "full key was consistent, but partial key changed\n"
4083 : "partial key was consistent, but full key changed\n");
4090 /*=============================================*/
4095 static void TestCroatianSortKey(void) {
4096 const char* collString
= "LHR_AN_CX_EX_FX_HX_NX_S3";
4097 UErrorCode status
= U_ZERO_ERROR
;
4101 static const UChar text
[] = { 0x0044, 0xD81A };
4103 size_t length
= UPRV_LENGTHOF(text
);
4105 uint8_t textSortKey
[32];
4106 size_t lenSortKey
= 32;
4107 size_t actualSortKeyLen
;
4108 uint32_t uStateInfo
[2] = { 0, 0 };
4110 ucol
= ucol_openFromShortString(collString
, FALSE
, NULL
, &status
);
4111 if (U_FAILURE(status
)) {
4112 log_err_status(status
, "ucol_openFromShortString error in Craotian test. -> %s\n", u_errorName(status
));
4116 uiter_setString(&iter
, text
, (int32_t)length
);
4118 actualSortKeyLen
= ucol_nextSortKeyPart(
4119 ucol
, &iter
, (uint32_t*)uStateInfo
,
4120 textSortKey
, (int32_t)lenSortKey
, &status
4123 if (actualSortKeyLen
== lenSortKey
) {
4124 log_err("ucol_nextSortKeyPart did not give correct result in Croatian test.\n");
4131 /* This test ensures that codepoints such as 0x3099 are flagged correctly by the collator since
4132 * they are both Hiragana and Katakana
4134 #define SORTKEYLEN 50
4135 static void TestHiragana(void) {
4136 UErrorCode status
= U_ZERO_ERROR
;
4138 UCollationResult strcollresult
;
4139 UChar data1
[] = { 0x3058, 0x30B8 }; /* Hiragana and Katakana letter Zi */
4140 UChar data2
[] = { 0x3057, 0x3099, 0x30B7, 0x3099 };
4141 int32_t data1Len
= UPRV_LENGTHOF(data1
);
4142 int32_t data2Len
= UPRV_LENGTHOF(data2
);
4144 uint8_t sortKey1
[SORTKEYLEN
];
4145 uint8_t sortKey2
[SORTKEYLEN
];
4147 UCharIterator uiter1
;
4148 UCharIterator uiter2
;
4149 uint32_t state1
[2] = { 0, 0 };
4150 uint32_t state2
[2] = { 0, 0 };
4154 ucol
= ucol_openFromShortString("LJA_AN_CX_EX_FX_HO_NX_S4", FALSE
, NULL
,
4156 if (U_FAILURE(status
)) {
4157 log_err_status(status
, "Error status: %s; Unable to open collator from short string.\n", u_errorName(status
));
4161 /* Start of full sort keys */
4162 /* Full sort key1 */
4163 keySize1
= ucol_getSortKey(ucol
, data1
, data1Len
, sortKey1
, SORTKEYLEN
);
4164 /* Full sort key2 */
4165 keySize2
= ucol_getSortKey(ucol
, data2
, data2Len
, sortKey2
, SORTKEYLEN
);
4166 if (keySize1
== keySize2
) {
4167 for (i
= 0; i
< keySize1
; i
++) {
4168 if (sortKey1
[i
] != sortKey2
[i
]) {
4169 log_err("Full sort keys are different. Should be equal.");
4173 log_err("Full sort keys sizes doesn't match: %d %d", keySize1
, keySize2
);
4175 /* End of full sort keys */
4177 /* Start of partial sort keys */
4178 /* Partial sort key1 */
4179 uiter_setString(&uiter1
, data1
, data1Len
);
4180 keySize1
= ucol_nextSortKeyPart(ucol
, &uiter1
, state1
, sortKey1
, SORTKEYLEN
, &status
);
4181 /* Partial sort key2 */
4182 uiter_setString(&uiter2
, data2
, data2Len
);
4183 keySize2
= ucol_nextSortKeyPart(ucol
, &uiter2
, state2
, sortKey2
, SORTKEYLEN
, &status
);
4184 if (U_SUCCESS(status
) && keySize1
== keySize2
) {
4185 for (j
= 0; j
< keySize1
; j
++) {
4186 if (sortKey1
[j
] != sortKey2
[j
]) {
4187 log_err("Partial sort keys are different. Should be equal");
4191 log_err("Error Status: %s or Partial sort keys sizes doesn't match: %d %d", u_errorName(status
), keySize1
, keySize2
);
4193 /* End of partial sort keys */
4195 /* Start of strcoll */
4196 /* Use ucol_strcoll() to determine ordering */
4197 strcollresult
= ucol_strcoll(ucol
, data1
, data1Len
, data2
, data2Len
);
4198 if (strcollresult
!= UCOL_EQUAL
) {
4199 log_err("Result from ucol_strcoll() should be UCOL_EQUAL.");
4205 /* Convenient struct for running collation tests */
4207 const UChar source
[MAX_TOKEN_LEN
]; /* String on left */
4208 const UChar target
[MAX_TOKEN_LEN
]; /* String on right */
4209 UCollationResult result
; /* -1, 0 or +1, depending on collation */
4213 * Utility function to test one collation test case.
4214 * @param testcases Array of test cases.
4215 * @param n_testcases Size of the array testcases.
4216 * @param str_rules Array of rules. These rules should be specifying the same rule in different formats.
4217 * @param n_rules Size of the array str_rules.
4219 static void doTestOneTestCase(const OneTestCase testcases
[],
4221 const char* str_rules
[],
4224 int rule_no
, testcase_no
;
4227 UErrorCode status
= U_ZERO_ERROR
;
4228 UParseError parse_error
;
4229 UCollator
*myCollation
;
4231 for (rule_no
= 0; rule_no
< n_rules
; ++rule_no
) {
4233 length
= u_unescape(str_rules
[rule_no
], rule
, 500);
4235 log_err("ERROR: The rule cannot be unescaped: %s\n");
4238 myCollation
= ucol_openRules(rule
, length
, UCOL_ON
, UCOL_TERTIARY
, &parse_error
, &status
);
4239 if(U_FAILURE(status
)){
4240 log_err_status(status
, "ERROR: in creation of rule based collator: %s\n", myErrorName(status
));
4241 log_info(" offset=%d \"%s\" | \"%s\"\n",
4243 aescstrdup(parse_error
.preContext
, -1),
4244 aescstrdup(parse_error
.postContext
, -1));
4247 log_verbose("Testing the <<* syntax\n");
4248 ucol_setAttribute(myCollation
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
4249 ucol_setStrength(myCollation
, UCOL_TERTIARY
);
4250 for (testcase_no
= 0; testcase_no
< n_testcases
; ++testcase_no
) {
4252 testcases
[testcase_no
].source
,
4253 testcases
[testcase_no
].target
,
4254 testcases
[testcase_no
].result
4257 ucol_close(myCollation
);
4261 const static OneTestCase rangeTestcases
[] = {
4262 { {0x0061}, {0x0062}, UCOL_LESS
}, /* "a" < "b" */
4263 { {0x0062}, {0x0063}, UCOL_LESS
}, /* "b" < "c" */
4264 { {0x0061}, {0x0063}, UCOL_LESS
}, /* "a" < "c" */
4266 { {0x0062}, {0x006b}, UCOL_LESS
}, /* "b" << "k" */
4267 { {0x006b}, {0x006c}, UCOL_LESS
}, /* "k" << "l" */
4268 { {0x0062}, {0x006c}, UCOL_LESS
}, /* "b" << "l" */
4269 { {0x0061}, {0x006c}, UCOL_LESS
}, /* "a" < "l" */
4270 { {0x0061}, {0x006d}, UCOL_LESS
}, /* "a" < "m" */
4272 { {0x0079}, {0x006d}, UCOL_LESS
}, /* "y" < "f" */
4273 { {0x0079}, {0x0067}, UCOL_LESS
}, /* "y" < "g" */
4274 { {0x0061}, {0x0068}, UCOL_LESS
}, /* "y" < "h" */
4275 { {0x0061}, {0x0065}, UCOL_LESS
}, /* "g" < "e" */
4277 { {0x0061}, {0x0031}, UCOL_EQUAL
}, /* "a" = "1" */
4278 { {0x0061}, {0x0032}, UCOL_EQUAL
}, /* "a" = "2" */
4279 { {0x0061}, {0x0033}, UCOL_EQUAL
}, /* "a" = "3" */
4280 { {0x0061}, {0x0066}, UCOL_LESS
}, /* "a" < "f" */
4281 { {0x006c, 0x0061}, {0x006b, 0x0062}, UCOL_LESS
}, /* "la" < "123" */
4282 { {0x0061, 0x0061, 0x0061}, {0x0031, 0x0032, 0x0033}, UCOL_EQUAL
}, /* "aaa" = "123" */
4283 { {0x0062}, {0x007a}, UCOL_LESS
}, /* "b" < "z" */
4284 { {0x0061, 0x007a, 0x0062}, {0x0032, 0x0079, 0x006d}, UCOL_LESS
}, /* "azm" = "2yc" */
4287 static int nRangeTestcases
= UPRV_LENGTHOF(rangeTestcases
);
4289 const static OneTestCase rangeTestcasesSupplemental
[] = {
4290 { {0x4e00}, {0xfffb}, UCOL_LESS
}, /* U+4E00 < U+FFFB */
4291 { {0xfffb}, {0xd800, 0xdc00}, UCOL_LESS
}, /* U+FFFB < U+10000 */
4292 { {0xd800, 0xdc00}, {0xd800, 0xdc01}, UCOL_LESS
}, /* U+10000 < U+10001 */
4293 { {0x4e00}, {0xd800, 0xdc01}, UCOL_LESS
}, /* U+4E00 < U+10001 */
4294 { {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL_LESS
}, /* U+10000 < U+10001 */
4295 { {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL_LESS
}, /* U+10000 < U+10001 */
4296 { {0x4e00}, {0xd800, 0xdc02}, UCOL_LESS
}, /* U+4E00 < U+10001 */
4299 static int nRangeTestcasesSupplemental
= UPRV_LENGTHOF(rangeTestcasesSupplemental
);
4301 const static OneTestCase rangeTestcasesQwerty
[] = {
4302 { {0x0071}, {0x0077}, UCOL_LESS
}, /* "q" < "w" */
4303 { {0x0077}, {0x0065}, UCOL_LESS
}, /* "w" < "e" */
4305 { {0x0079}, {0x0075}, UCOL_LESS
}, /* "y" < "u" */
4306 { {0x0071}, {0x0075}, UCOL_LESS
}, /* "q" << "u" */
4308 { {0x0074}, {0x0069}, UCOL_LESS
}, /* "t" << "i" */
4309 { {0x006f}, {0x0070}, UCOL_LESS
}, /* "o" << "p" */
4311 { {0x0079}, {0x0065}, UCOL_LESS
}, /* "y" < "e" */
4312 { {0x0069}, {0x0075}, UCOL_LESS
}, /* "i" < "u" */
4314 { {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},
4315 {0x0077, 0x0065, 0x0072, 0x0065}, UCOL_LESS
}, /* "quest" < "were" */
4316 { {0x0071, 0x0075, 0x0061, 0x0063, 0x006b},
4317 {0x0071, 0x0075, 0x0065, 0x0073, 0x0074}, UCOL_LESS
}, /* "quack" < "quest" */
4320 static int nRangeTestcasesQwerty
= UPRV_LENGTHOF(rangeTestcasesQwerty
);
4322 static void TestSameStrengthList(void)
4324 const char* strRules
[] = {
4326 "&a<b<c<d &b<<k<<l<<m &k<<<x<<<y<<<z &y<f<g<h<e &a=1=2=3",
4329 "&a<*bcd &b<<*klm &k<<<*xyz &y<*fghe &a=*123",
4331 doTestOneTestCase(rangeTestcases
, nRangeTestcases
, strRules
, UPRV_LENGTHOF(strRules
));
4334 static void TestSameStrengthListQuoted(void)
4336 const char* strRules
[] = {
4337 /* Lists with quoted characters */
4338 "&\\u0061<*bcd &b<<*klm &k<<<*xyz &y<*f\\u0067\\u0068e &a=*123",
4339 "&'\\u0061'<*bcd &b<<*klm &k<<<*xyz &y<*f'\\u0067\\u0068'e &a=*123",
4341 "&\\u0061<*b\\u0063d &b<<*klm &k<<<*xyz &\\u0079<*fgh\\u0065 &a=*\\u0031\\u0032\\u0033",
4342 "&'\\u0061'<*b'\\u0063'd &b<<*klm &k<<<*xyz &'\\u0079'<*fgh'\\u0065' &a=*'\\u0031\\u0032\\u0033'",
4344 "&\\u0061<*\\u0062c\\u0064 &b<<*klm &k<<<*xyz &y<*fghe &a=*\\u0031\\u0032\\u0033",
4345 "&'\\u0061'<*'\\u0062'c'\\u0064' &b<<*klm &k<<<*xyz &y<*fghe &a=*'\\u0031\\u0032\\u0033'",
4347 doTestOneTestCase(rangeTestcases
, nRangeTestcases
, strRules
, UPRV_LENGTHOF(strRules
));
4350 static void TestSameStrengthListSupplemental(void)
4352 const char* strRules
[] = {
4353 "&\\u4e00<\\ufffb<\\U00010000<\\U00010001<\\U00010002",
4354 "&\\u4e00<\\ufffb<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02",
4355 "&\\u4e00<*\\ufffb\\U00010000\\U00010001\\U00010002",
4356 "&\\u4e00<*\\ufffb\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02",
4358 doTestOneTestCase(rangeTestcasesSupplemental
, nRangeTestcasesSupplemental
, strRules
, UPRV_LENGTHOF(strRules
));
4361 static void TestSameStrengthListQwerty(void)
4363 const char* strRules
[] = {
4364 "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d", /* Normal */
4365 "&q<*wer &w<<*tyu &t<<<*iop &o=*asd", /* Lists */
4366 "&\\u0071<\\u0077<\\u0065<\\u0072 &\\u0077<<\\u0074<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<\\u006f<<<\\u0070 &\\u006f=\\u0061=\\u0073=\\u0064",
4367 "&'\\u0071'<\\u0077<\\u0065<\\u0072 &\\u0077<<'\\u0074'<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<'\\u006f'<<<\\u0070 &\\u006f=\\u0061='\\u0073'=\\u0064",
4368 "&\\u0071<*\\u0077\\u0065\\u0072 &\\u0077<<*\\u0074\\u0079\\u0075 &\\u0074<<<*\\u0069\\u006f\\u0070 &\\u006f=*\\u0061\\u0073\\u0064",
4370 /* Quoted characters also will work if two quoted characters are not consecutive. */
4371 "&\\u0071<*'\\u0077'\\u0065\\u0072 &\\u0077<<*\\u0074'\\u0079'\\u0075 &\\u0074<<<*\\u0069\\u006f'\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",
4373 /* Consecutive quoted charactes do not work, because a '' will be treated as a quote character. */
4374 /* "&\\u0071<*'\\u0077''\\u0065''\\u0072' &\\u0077<<*'\\u0074''\\u0079''\\u0075' &\\u0074<<<*'\\u0069''\\u006f''\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",*/
4377 doTestOneTestCase(rangeTestcasesQwerty
, nRangeTestcasesQwerty
, strRules
, UPRV_LENGTHOF(strRules
));
4380 static void TestSameStrengthListQuotedQwerty(void)
4382 const char* strRules
[] = {
4383 "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d", /* Normal */
4384 "&q<*wer &w<<*tyu &t<<<*iop &o=*asd", /* Lists */
4385 "&q<*w'e'r &w<<*'t'yu &t<<<*io'p' &o=*'a's'd'", /* Lists with quotes */
4387 /* Lists with continuous quotes may not work, because '' will be treated as a quote character. */
4388 /* "&q<*'w''e''r' &w<<*'t''y''u' &t<<<*'i''o''p' &o=*'a''s''d'", */
4390 doTestOneTestCase(rangeTestcasesQwerty
, nRangeTestcasesQwerty
, strRules
, UPRV_LENGTHOF(strRules
));
4393 static void TestSameStrengthListRanges(void)
4395 const char* strRules
[] = {
4396 "&a<*b-d &b<<*k-m &k<<<*x-z &y<*f-he &a=*1-3",
4398 doTestOneTestCase(rangeTestcases
, nRangeTestcases
, strRules
, UPRV_LENGTHOF(strRules
));
4401 static void TestSameStrengthListSupplementalRanges(void)
4403 const char* strRules
[] = {
4404 /* Note: U+FFFD..U+FFFF are not tailorable, so a range cannot include them. */
4405 "&\\u4e00<*\\ufffb\\U00010000-\\U00010002",
4407 doTestOneTestCase(rangeTestcasesSupplemental
, nRangeTestcasesSupplemental
, strRules
, UPRV_LENGTHOF(strRules
));
4410 static void TestSpecialCharacters(void)
4412 const char* strRules
[] = {
4414 "&';'<'+'<','<'-'<'&'<'*'",
4423 const static OneTestCase specialCharacterStrings
[] = {
4424 { {0x003b}, {0x002b}, UCOL_LESS
}, /* ; < + */
4425 { {0x002b}, {0x002c}, UCOL_LESS
}, /* + < , */
4426 { {0x002c}, {0x002d}, UCOL_LESS
}, /* , < - */
4427 { {0x002d}, {0x0026}, UCOL_LESS
}, /* - < & */
4429 doTestOneTestCase(specialCharacterStrings
, UPRV_LENGTHOF(specialCharacterStrings
), strRules
, UPRV_LENGTHOF(strRules
));
4432 static void TestPrivateUseCharacters(void)
4434 const char* strRules
[] = {
4436 "&'\\u5ea7'<'\\uE2D8'<'\\uE2D9'<'\\uE2DA'<'\\uE2DB'<'\\uE2DC'<'\\u4e8d'",
4437 "&\\u5ea7<\\uE2D8<\\uE2D9<\\uE2DA<\\uE2DB<\\uE2DC<\\u4e8d",
4440 const static OneTestCase privateUseCharacterStrings
[] = {
4441 { {0x5ea7}, {0xe2d8}, UCOL_LESS
},
4442 { {0xe2d8}, {0xe2d9}, UCOL_LESS
},
4443 { {0xe2d9}, {0xe2da}, UCOL_LESS
},
4444 { {0xe2da}, {0xe2db}, UCOL_LESS
},
4445 { {0xe2db}, {0xe2dc}, UCOL_LESS
},
4446 { {0xe2dc}, {0x4e8d}, UCOL_LESS
},
4448 doTestOneTestCase(privateUseCharacterStrings
, UPRV_LENGTHOF(privateUseCharacterStrings
), strRules
, UPRV_LENGTHOF(strRules
));
4451 static void TestPrivateUseCharactersInList(void)
4453 const char* strRules
[] = {
4455 "&'\\u5ea7'<*'\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d'",
4456 /* "&'\\u5ea7'<*\\uE2D8'\\uE2D9\\uE2DA'\\uE2DB'\\uE2DC\\u4e8d'", */
4457 "&\\u5ea7<*\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d",
4460 const static OneTestCase privateUseCharacterStrings
[] = {
4461 { {0x5ea7}, {0xe2d8}, UCOL_LESS
},
4462 { {0xe2d8}, {0xe2d9}, UCOL_LESS
},
4463 { {0xe2d9}, {0xe2da}, UCOL_LESS
},
4464 { {0xe2da}, {0xe2db}, UCOL_LESS
},
4465 { {0xe2db}, {0xe2dc}, UCOL_LESS
},
4466 { {0xe2dc}, {0x4e8d}, UCOL_LESS
},
4468 doTestOneTestCase(privateUseCharacterStrings
, UPRV_LENGTHOF(privateUseCharacterStrings
), strRules
, UPRV_LENGTHOF(strRules
));
4471 static void TestPrivateUseCharactersInRange(void)
4473 const char* strRules
[] = {
4475 "&'\\u5ea7'<*'\\uE2D8'-'\\uE2DC\\u4e8d'",
4476 "&\\u5ea7<*\\uE2D8-\\uE2DC\\u4e8d",
4477 /* "&\\u5ea7<\\uE2D8'\\uE2D8'-'\\uE2D9'\\uE2DA-\\uE2DB\\uE2DC\\u4e8d", */
4480 const static OneTestCase privateUseCharacterStrings
[] = {
4481 { {0x5ea7}, {0xe2d8}, UCOL_LESS
},
4482 { {0xe2d8}, {0xe2d9}, UCOL_LESS
},
4483 { {0xe2d9}, {0xe2da}, UCOL_LESS
},
4484 { {0xe2da}, {0xe2db}, UCOL_LESS
},
4485 { {0xe2db}, {0xe2dc}, UCOL_LESS
},
4486 { {0xe2dc}, {0x4e8d}, UCOL_LESS
},
4488 doTestOneTestCase(privateUseCharacterStrings
, UPRV_LENGTHOF(privateUseCharacterStrings
), strRules
, UPRV_LENGTHOF(strRules
));
4491 static void TestInvalidListsAndRanges(void)
4493 const char* invalidRules
[] = {
4494 /* Range not in starred expression */
4495 "&\\ufffe<\\uffff-\\U00010002",
4497 /* Range without start */
4500 /* Range without end */
4503 /* More than one hyphen */
4506 /* Range in the wrong order */
4512 UErrorCode status
= U_ZERO_ERROR
;
4513 UParseError parse_error
;
4514 int n_rules
= UPRV_LENGTHOF(invalidRules
);
4517 UCollator
*myCollation
;
4519 for (rule_no
= 0; rule_no
< n_rules
; ++rule_no
) {
4521 length
= u_unescape(invalidRules
[rule_no
], rule
, 500);
4523 log_err("ERROR: The rule cannot be unescaped: %s\n");
4526 myCollation
= ucol_openRules(rule
, length
, UCOL_ON
, UCOL_TERTIARY
, &parse_error
, &status
);
4527 (void)myCollation
; /* Suppress set but not used warning. */
4528 if(!U_FAILURE(status
)){
4529 log_err("ERROR: Could not cause a failure as expected: \n");
4531 status
= U_ZERO_ERROR
;
4536 * This test ensures that characters placed before a character in a different script have the same lead byte
4537 * in their collation key before and after script reordering.
4539 static void TestBeforeRuleWithScriptReordering(void)
4542 UErrorCode status
= U_ZERO_ERROR
;
4543 UCollator
*myCollation
;
4544 char srules
[500] = "&[before 1]\\u03b1 < \\u0e01";
4546 uint32_t rulesLength
= 0;
4547 int32_t reorderCodes
[1] = {USCRIPT_GREEK
};
4548 UCollationResult collResult
;
4550 uint8_t baseKey
[256];
4551 uint32_t baseKeyLength
;
4552 uint8_t beforeKey
[256];
4553 uint32_t beforeKeyLength
;
4555 UChar base
[] = { 0x03b1 }; /* base */
4556 int32_t baseLen
= UPRV_LENGTHOF(base
);
4558 UChar before
[] = { 0x0e01 }; /* ko kai */
4559 int32_t beforeLen
= UPRV_LENGTHOF(before
);
4561 /*UChar *data[] = { before, base };
4562 genericRulesStarter(srules, data, 2);*/
4564 log_verbose("Testing the &[before 1] rule with [reorder grek]\n");
4566 (void)beforeKeyLength
; /* Suppress set but not used warnings. */
4567 (void)baseKeyLength
;
4569 /* build collator */
4570 log_verbose("Testing the &[before 1] rule with [scriptReorder grek]\n");
4572 rulesLength
= u_unescape(srules
, rules
, UPRV_LENGTHOF(rules
));
4573 myCollation
= ucol_openRules(rules
, rulesLength
, UCOL_ON
, UCOL_TERTIARY
, &error
, &status
);
4574 if(U_FAILURE(status
)) {
4575 log_err_status(status
, "ERROR: in creation of rule based collator: %s\n", myErrorName(status
));
4579 /* check collation results - before rule applied but not script reordering */
4580 collResult
= ucol_strcoll(myCollation
, base
, baseLen
, before
, beforeLen
);
4581 if (collResult
!= UCOL_GREATER
) {
4582 log_err("Collation result not correct before script reordering = %d\n", collResult
);
4585 /* check the lead byte of the collation keys before script reordering */
4586 baseKeyLength
= ucol_getSortKey(myCollation
, base
, baseLen
, baseKey
, 256);
4587 beforeKeyLength
= ucol_getSortKey(myCollation
, before
, beforeLen
, beforeKey
, 256);
4588 if (baseKey
[0] != beforeKey
[0]) {
4589 log_err("Different lead byte for sort keys using before rule and before script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey
[0], beforeKey
[0]);
4592 /* reorder the scripts */
4593 ucol_setReorderCodes(myCollation
, reorderCodes
, 1, &status
);
4594 if(U_FAILURE(status
)) {
4595 log_err_status(status
, "ERROR: while setting script order: %s\n", myErrorName(status
));
4599 /* check collation results - before rule applied and after script reordering */
4600 collResult
= ucol_strcoll(myCollation
, base
, baseLen
, before
, beforeLen
);
4601 if (collResult
!= UCOL_GREATER
) {
4602 log_err("Collation result not correct after script reordering = %d\n", collResult
);
4605 /* check the lead byte of the collation keys after script reordering */
4606 ucol_getSortKey(myCollation
, base
, baseLen
, baseKey
, 256);
4607 ucol_getSortKey(myCollation
, before
, beforeLen
, beforeKey
, 256);
4608 if (baseKey
[0] != beforeKey
[0]) {
4609 log_err("Different lead byte for sort keys using before fule and after script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey
[0], beforeKey
[0]);
4612 ucol_close(myCollation
);
4616 * Test that in a primary-compressed sort key all bytes except the first one are unchanged under script reordering.
4618 static void TestNonLeadBytesDuringCollationReordering(void)
4620 UErrorCode status
= U_ZERO_ERROR
;
4621 UCollator
*myCollation
;
4622 int32_t reorderCodes
[1] = {USCRIPT_GREEK
};
4624 uint8_t baseKey
[256];
4625 uint32_t baseKeyLength
;
4626 uint8_t reorderKey
[256];
4627 uint32_t reorderKeyLength
;
4629 UChar testString
[] = { 0x03b1, 0x03b2, 0x03b3 };
4634 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
4636 /* build collator tertiary */
4637 myCollation
= ucol_open("", &status
);
4638 ucol_setStrength(myCollation
, UCOL_TERTIARY
);
4639 if(U_FAILURE(status
)) {
4640 log_err_status(status
, "ERROR: in creation of collator: %s\n", myErrorName(status
));
4643 baseKeyLength
= ucol_getSortKey(myCollation
, testString
, UPRV_LENGTHOF(testString
), baseKey
, 256);
4645 ucol_setReorderCodes(myCollation
, reorderCodes
, UPRV_LENGTHOF(reorderCodes
), &status
);
4646 if(U_FAILURE(status
)) {
4647 log_err_status(status
, "ERROR: setting reorder codes: %s\n", myErrorName(status
));
4650 reorderKeyLength
= ucol_getSortKey(myCollation
, testString
, UPRV_LENGTHOF(testString
), reorderKey
, 256);
4652 if (baseKeyLength
!= reorderKeyLength
) {
4653 log_err("Key lengths not the same during reordering.\n");
4657 for (i
= 1; i
< baseKeyLength
; i
++) {
4658 if (baseKey
[i
] != reorderKey
[i
]) {
4659 log_err("Collation key bytes not the same at position %d.\n", i
);
4663 ucol_close(myCollation
);
4665 /* build collator quaternary */
4666 myCollation
= ucol_open("", &status
);
4667 ucol_setStrength(myCollation
, UCOL_QUATERNARY
);
4668 if(U_FAILURE(status
)) {
4669 log_err_status(status
, "ERROR: in creation of collator: %s\n", myErrorName(status
));
4672 baseKeyLength
= ucol_getSortKey(myCollation
, testString
, UPRV_LENGTHOF(testString
), baseKey
, 256);
4674 ucol_setReorderCodes(myCollation
, reorderCodes
, UPRV_LENGTHOF(reorderCodes
), &status
);
4675 if(U_FAILURE(status
)) {
4676 log_err_status(status
, "ERROR: setting reorder codes: %s\n", myErrorName(status
));
4679 reorderKeyLength
= ucol_getSortKey(myCollation
, testString
, UPRV_LENGTHOF(testString
), reorderKey
, 256);
4681 if (baseKeyLength
!= reorderKeyLength
) {
4682 log_err("Key lengths not the same during reordering.\n");
4686 for (i
= 1; i
< baseKeyLength
; i
++) {
4687 if (baseKey
[i
] != reorderKey
[i
]) {
4688 log_err("Collation key bytes not the same at position %d.\n", i
);
4692 ucol_close(myCollation
);
4696 * Test reordering API.
4698 static void TestReorderingAPI(void)
4700 UErrorCode status
= U_ZERO_ERROR
;
4701 UCollator
*myCollation
;
4702 int32_t reorderCodes
[3] = {USCRIPT_GREEK
, USCRIPT_HAN
, UCOL_REORDER_CODE_PUNCTUATION
};
4703 int32_t duplicateReorderCodes
[] = {USCRIPT_HIRAGANA
, USCRIPT_GREEK
, UCOL_REORDER_CODE_CURRENCY
, USCRIPT_KATAKANA
};
4704 int32_t reorderCodesStartingWithDefault
[] = {UCOL_REORDER_CODE_DEFAULT
, USCRIPT_GREEK
, USCRIPT_HAN
, UCOL_REORDER_CODE_PUNCTUATION
};
4705 int32_t reorderCodeNone
= UCOL_REORDER_CODE_NONE
;
4706 UCollationResult collResult
;
4707 int32_t retrievedReorderCodesLength
;
4708 int32_t retrievedReorderCodes
[10];
4709 UChar greekString
[] = { 0x03b1 };
4710 UChar punctuationString
[] = { 0x203e };
4713 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
4715 /* build collator tertiary */
4716 myCollation
= ucol_open("", &status
);
4717 ucol_setStrength(myCollation
, UCOL_TERTIARY
);
4718 if(U_FAILURE(status
)) {
4719 log_err_status(status
, "ERROR: in creation of collator: %s\n", myErrorName(status
));
4723 /* set the reorderding */
4724 ucol_setReorderCodes(myCollation
, reorderCodes
, UPRV_LENGTHOF(reorderCodes
), &status
);
4725 if (U_FAILURE(status
)) {
4726 log_err_status(status
, "ERROR: setting reorder codes: %s\n", myErrorName(status
));
4730 /* get the reordering */
4731 retrievedReorderCodesLength
= ucol_getReorderCodes(myCollation
, NULL
, 0, &status
);
4732 if (status
!= U_BUFFER_OVERFLOW_ERROR
) {
4733 log_err_status(status
, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status
));
4736 status
= U_ZERO_ERROR
;
4737 if (retrievedReorderCodesLength
!= UPRV_LENGTHOF(reorderCodes
)) {
4738 log_err_status(status
, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength
, UPRV_LENGTHOF(reorderCodes
));
4741 /* now let's really get it */
4742 retrievedReorderCodesLength
= ucol_getReorderCodes(myCollation
, retrievedReorderCodes
, UPRV_LENGTHOF(retrievedReorderCodes
), &status
);
4743 if (U_FAILURE(status
)) {
4744 log_err_status(status
, "ERROR: getting reorder codes: %s\n", myErrorName(status
));
4747 if (retrievedReorderCodesLength
!= UPRV_LENGTHOF(reorderCodes
)) {
4748 log_err_status(status
, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength
, UPRV_LENGTHOF(reorderCodes
));
4751 for (loopIndex
= 0; loopIndex
< retrievedReorderCodesLength
; loopIndex
++) {
4752 if (retrievedReorderCodes
[loopIndex
] != reorderCodes
[loopIndex
]) {
4753 log_err_status(status
, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex
);
4757 collResult
= ucol_strcoll(myCollation
, greekString
, UPRV_LENGTHOF(greekString
), punctuationString
, UPRV_LENGTHOF(punctuationString
));
4758 if (collResult
!= UCOL_LESS
) {
4759 log_err_status(status
, "ERROR: collation result should have been UCOL_LESS\n");
4763 /* clear the reordering */
4764 ucol_setReorderCodes(myCollation
, NULL
, 0, &status
);
4765 if (U_FAILURE(status
)) {
4766 log_err_status(status
, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status
));
4770 /* get the reordering again */
4771 retrievedReorderCodesLength
= ucol_getReorderCodes(myCollation
, NULL
, 0, &status
);
4772 if (retrievedReorderCodesLength
!= 0) {
4773 log_err_status(status
, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength
, 0);
4777 collResult
= ucol_strcoll(myCollation
, greekString
, UPRV_LENGTHOF(greekString
), punctuationString
, UPRV_LENGTHOF(punctuationString
));
4778 if (collResult
!= UCOL_GREATER
) {
4779 log_err_status(status
, "ERROR: collation result should have been UCOL_GREATER\n");
4783 /* clear the reordering using [NONE] */
4784 ucol_setReorderCodes(myCollation
, &reorderCodeNone
, 1, &status
);
4785 if (U_FAILURE(status
)) {
4786 log_err_status(status
, "ERROR: setting reorder codes to [NONE]: %s\n", myErrorName(status
));
4790 /* get the reordering again */
4791 retrievedReorderCodesLength
= ucol_getReorderCodes(myCollation
, NULL
, 0, &status
);
4792 if (retrievedReorderCodesLength
!= 0) {
4793 log_err_status(status
,
4794 "ERROR: [NONE] retrieved reorder codes length was %d but should have been 0\n",
4795 retrievedReorderCodesLength
);
4799 /* test for error condition on duplicate reorder codes */
4800 ucol_setReorderCodes(myCollation
, duplicateReorderCodes
, UPRV_LENGTHOF(duplicateReorderCodes
), &status
);
4801 if (!U_FAILURE(status
)) {
4802 log_err_status(status
, "ERROR: setting duplicate reorder codes did not generate a failure\n");
4806 status
= U_ZERO_ERROR
;
4807 /* test for reorder codes after a reset code */
4808 ucol_setReorderCodes(myCollation
, reorderCodesStartingWithDefault
, UPRV_LENGTHOF(reorderCodesStartingWithDefault
), &status
);
4809 if (!U_FAILURE(status
)) {
4810 log_err_status(status
, "ERROR: reorderd code sequence starting with default and having following codes didn't cause an error\n");
4814 ucol_close(myCollation
);
4818 * Test reordering API.
4820 static void TestReorderingAPIWithRuleCreatedCollator(void)
4822 UErrorCode status
= U_ZERO_ERROR
;
4823 UCollator
*myCollation
;
4825 static const int32_t rulesReorderCodes
[2] = {USCRIPT_HAN
, USCRIPT_GREEK
};
4826 static const int32_t reorderCodes
[3] = {USCRIPT_GREEK
, USCRIPT_HAN
, UCOL_REORDER_CODE_PUNCTUATION
};
4827 static const int32_t onlyDefault
[1] = {UCOL_REORDER_CODE_DEFAULT
};
4828 UCollationResult collResult
;
4829 int32_t retrievedReorderCodesLength
;
4830 int32_t retrievedReorderCodes
[10];
4831 static const UChar greekString
[] = { 0x03b1 };
4832 static const UChar punctuationString
[] = { 0x203e };
4833 static const UChar hanString
[] = { 0x65E5, 0x672C };
4836 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
4838 /* build collator from rules */
4839 u_uastrcpy(rules
, "[reorder Hani Grek]");
4840 myCollation
= ucol_openRules(rules
, u_strlen(rules
), UCOL_DEFAULT
, UCOL_TERTIARY
, NULL
, &status
);
4841 if(U_FAILURE(status
)) {
4842 log_err_status(status
, "ERROR: in creation of collator: %s\n", myErrorName(status
));
4846 /* get the reordering */
4847 retrievedReorderCodesLength
= ucol_getReorderCodes(myCollation
, retrievedReorderCodes
, UPRV_LENGTHOF(retrievedReorderCodes
), &status
);
4848 if (U_FAILURE(status
)) {
4849 log_err_status(status
, "ERROR: getting reorder codes: %s\n", myErrorName(status
));
4852 if (retrievedReorderCodesLength
!= UPRV_LENGTHOF(rulesReorderCodes
)) {
4853 log_err_status(status
, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength
, UPRV_LENGTHOF(rulesReorderCodes
));
4856 for (loopIndex
= 0; loopIndex
< retrievedReorderCodesLength
; loopIndex
++) {
4857 if (retrievedReorderCodes
[loopIndex
] != rulesReorderCodes
[loopIndex
]) {
4858 log_err_status(status
, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex
);
4862 collResult
= ucol_strcoll(myCollation
, greekString
, UPRV_LENGTHOF(greekString
), hanString
, UPRV_LENGTHOF(hanString
));
4863 if (collResult
!= UCOL_GREATER
) {
4864 log_err_status(status
, "ERROR: collation result should have been UCOL_GREATER\n");
4868 /* set the reordering */
4869 ucol_setReorderCodes(myCollation
, reorderCodes
, UPRV_LENGTHOF(reorderCodes
), &status
);
4870 if (U_FAILURE(status
)) {
4871 log_err_status(status
, "ERROR: setting reorder codes: %s\n", myErrorName(status
));
4875 /* get the reordering */
4876 retrievedReorderCodesLength
= ucol_getReorderCodes(myCollation
, NULL
, 0, &status
);
4877 if (status
!= U_BUFFER_OVERFLOW_ERROR
) {
4878 log_err_status(status
, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status
));
4881 status
= U_ZERO_ERROR
;
4882 if (retrievedReorderCodesLength
!= UPRV_LENGTHOF(reorderCodes
)) {
4883 log_err_status(status
, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength
, UPRV_LENGTHOF(reorderCodes
));
4886 /* now let's really get it */
4887 retrievedReorderCodesLength
= ucol_getReorderCodes(myCollation
, retrievedReorderCodes
, UPRV_LENGTHOF(retrievedReorderCodes
), &status
);
4888 if (U_FAILURE(status
)) {
4889 log_err_status(status
, "ERROR: getting reorder codes: %s\n", myErrorName(status
));
4892 if (retrievedReorderCodesLength
!= UPRV_LENGTHOF(reorderCodes
)) {
4893 log_err_status(status
, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength
, UPRV_LENGTHOF(reorderCodes
));
4896 for (loopIndex
= 0; loopIndex
< retrievedReorderCodesLength
; loopIndex
++) {
4897 if (retrievedReorderCodes
[loopIndex
] != reorderCodes
[loopIndex
]) {
4898 log_err_status(status
, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex
);
4902 collResult
= ucol_strcoll(myCollation
, greekString
, UPRV_LENGTHOF(greekString
), punctuationString
, UPRV_LENGTHOF(punctuationString
));
4903 if (collResult
!= UCOL_LESS
) {
4904 log_err_status(status
, "ERROR: collation result should have been UCOL_LESS\n");
4908 /* clear the reordering */
4909 ucol_setReorderCodes(myCollation
, NULL
, 0, &status
);
4910 if (U_FAILURE(status
)) {
4911 log_err_status(status
, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status
));
4915 /* get the reordering again */
4916 retrievedReorderCodesLength
= ucol_getReorderCodes(myCollation
, NULL
, 0, &status
);
4917 if (retrievedReorderCodesLength
!= 0) {
4918 log_err_status(status
, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength
, 0);
4922 collResult
= ucol_strcoll(myCollation
, greekString
, UPRV_LENGTHOF(greekString
), punctuationString
, UPRV_LENGTHOF(punctuationString
));
4923 if (collResult
!= UCOL_GREATER
) {
4924 log_err_status(status
, "ERROR: collation result should have been UCOL_GREATER\n");
4928 /* reset the reordering */
4929 ucol_setReorderCodes(myCollation
, onlyDefault
, 1, &status
);
4930 if (U_FAILURE(status
)) {
4931 log_err_status(status
, "ERROR: setting reorder codes to {default}: %s\n", myErrorName(status
));
4934 retrievedReorderCodesLength
= ucol_getReorderCodes(myCollation
, retrievedReorderCodes
, UPRV_LENGTHOF(retrievedReorderCodes
), &status
);
4935 if (U_FAILURE(status
)) {
4936 log_err_status(status
, "ERROR: getting reorder codes: %s\n", myErrorName(status
));
4939 if (retrievedReorderCodesLength
!= UPRV_LENGTHOF(rulesReorderCodes
)) {
4940 log_err_status(status
, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength
, UPRV_LENGTHOF(rulesReorderCodes
));
4943 for (loopIndex
= 0; loopIndex
< retrievedReorderCodesLength
; loopIndex
++) {
4944 if (retrievedReorderCodes
[loopIndex
] != rulesReorderCodes
[loopIndex
]) {
4945 log_err_status(status
, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex
);
4950 ucol_close(myCollation
);
4953 static UBool
containsExpectedScript(const int32_t scripts
[], int32_t length
, int32_t expectedScript
) {
4955 for (i
= 0; i
< length
; ++i
) {
4956 if (expectedScript
== scripts
[i
]) { return TRUE
; }
4961 static void TestEquivalentReorderingScripts(void) {
4962 // Beginning with ICU 55, collation reordering moves single scripts
4963 // rather than groups of scripts,
4964 // except where scripts share a range and sort primary-equal.
4965 UErrorCode status
= U_ZERO_ERROR
;
4966 int32_t equivalentScripts
[100];
4970 /* These scripts are expected to be equivalent. */
4971 static const int32_t expectedScripts
[] = {
4974 USCRIPT_KATAKANA_OR_HIRAGANA
4977 equivalentScripts
[0] = 0;
4978 length
= ucol_getEquivalentReorderCodes(
4979 USCRIPT_GOTHIC
, equivalentScripts
, UPRV_LENGTHOF(equivalentScripts
), &status
);
4980 if (U_FAILURE(status
)) {
4981 log_err_status(status
, "ERROR/Gothic: retrieving equivalent reorder codes: %s\n", myErrorName(status
));
4984 if (length
!= 1 || equivalentScripts
[0] != USCRIPT_GOTHIC
) {
4985 log_err("ERROR/Gothic: retrieved equivalent scripts wrong: "
4986 "length expected 1, was = %d; expected [%d] was [%d]\n",
4987 length
, USCRIPT_GOTHIC
, equivalentScripts
[0]);
4990 length
= ucol_getEquivalentReorderCodes(
4991 USCRIPT_HIRAGANA
, equivalentScripts
, UPRV_LENGTHOF(equivalentScripts
), &status
);
4992 if (U_FAILURE(status
)) {
4993 log_err_status(status
, "ERROR/Hiragana: retrieving equivalent reorder codes: %s\n", myErrorName(status
));
4996 if (length
!= UPRV_LENGTHOF(expectedScripts
)) {
4997 log_err("ERROR/Hiragana: retrieved equivalent script length wrong: "
4998 "expected %d, was = %d\n",
4999 UPRV_LENGTHOF(expectedScripts
), length
);
5002 for (i
= 0; i
< length
; ++i
) {
5003 int32_t script
= equivalentScripts
[i
];
5004 if (script
<= prevScript
) {
5005 log_err("ERROR/Hiragana: equivalent scripts out of order at index %d\n", i
);
5007 prevScript
= script
;
5009 for (i
= 0; i
< UPRV_LENGTHOF(expectedScripts
); i
++) {
5010 if (!containsExpectedScript(equivalentScripts
, length
, expectedScripts
[i
])) {
5011 log_err("ERROR/Hiragana: equivalent scripts do not contain %d\n",
5012 expectedScripts
[i
]);
5016 length
= ucol_getEquivalentReorderCodes(
5017 USCRIPT_KATAKANA
, equivalentScripts
, UPRV_LENGTHOF(equivalentScripts
), &status
);
5018 if (U_FAILURE(status
)) {
5019 log_err_status(status
, "ERROR/Katakana: retrieving equivalent reorder codes: %s\n", myErrorName(status
));
5022 if (length
!= UPRV_LENGTHOF(expectedScripts
)) {
5023 log_err("ERROR/Katakana: retrieved equivalent script length wrong: "
5024 "expected %d, was = %d\n",
5025 UPRV_LENGTHOF(expectedScripts
), length
);
5027 for (i
= 0; i
< UPRV_LENGTHOF(expectedScripts
); i
++) {
5028 if (!containsExpectedScript(equivalentScripts
, length
, expectedScripts
[i
])) {
5029 log_err("ERROR/Katakana: equivalent scripts do not contain %d\n",
5030 expectedScripts
[i
]);
5034 length
= ucol_getEquivalentReorderCodes(
5035 USCRIPT_KATAKANA_OR_HIRAGANA
, equivalentScripts
, UPRV_LENGTHOF(equivalentScripts
), &status
);
5036 if (U_FAILURE(status
) || length
!= UPRV_LENGTHOF(expectedScripts
)) {
5037 log_err("ERROR/Hrkt: retrieved equivalent script length wrong: "
5038 "expected %d, was = %d\n",
5039 UPRV_LENGTHOF(expectedScripts
), length
);
5042 length
= ucol_getEquivalentReorderCodes(
5043 USCRIPT_HAN
, equivalentScripts
, UPRV_LENGTHOF(equivalentScripts
), &status
);
5044 if (U_FAILURE(status
) || length
!= 3) {
5045 log_err("ERROR/Hani: retrieved equivalent script length wrong: "
5046 "expected 3, was = %d\n", length
);
5048 length
= ucol_getEquivalentReorderCodes(
5049 USCRIPT_SIMPLIFIED_HAN
, equivalentScripts
, UPRV_LENGTHOF(equivalentScripts
), &status
);
5050 if (U_FAILURE(status
) || length
!= 3) {
5051 log_err("ERROR/Hans: retrieved equivalent script length wrong: "
5052 "expected 3, was = %d\n", length
);
5054 length
= ucol_getEquivalentReorderCodes(
5055 USCRIPT_TRADITIONAL_HAN
, equivalentScripts
, UPRV_LENGTHOF(equivalentScripts
), &status
);
5056 if (U_FAILURE(status
) || length
!= 3) {
5057 log_err("ERROR/Hant: retrieved equivalent script length wrong: "
5058 "expected 3, was = %d\n", length
);
5061 length
= ucol_getEquivalentReorderCodes(
5062 USCRIPT_MEROITIC_CURSIVE
, equivalentScripts
, UPRV_LENGTHOF(equivalentScripts
), &status
);
5063 if (U_FAILURE(status
) || length
!= 2) {
5064 log_err("ERROR/Merc: retrieved equivalent script length wrong: "
5065 "expected 2, was = %d\n", length
);
5067 length
= ucol_getEquivalentReorderCodes(
5068 USCRIPT_MEROITIC_HIEROGLYPHS
, equivalentScripts
, UPRV_LENGTHOF(equivalentScripts
), &status
);
5069 if (U_FAILURE(status
) || length
!= 2) {
5070 log_err("ERROR/Mero: retrieved equivalent script length wrong: "
5071 "expected 2, was = %d\n", length
);
5075 static void TestReorderingAcrossCloning(void)
5077 UErrorCode status
= U_ZERO_ERROR
;
5078 UCollator
*myCollation
;
5079 int32_t reorderCodes
[3] = {USCRIPT_GREEK
, USCRIPT_HAN
, UCOL_REORDER_CODE_PUNCTUATION
};
5080 UCollator
*clonedCollation
;
5081 int32_t retrievedReorderCodesLength
;
5082 int32_t retrievedReorderCodes
[10];
5085 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
5087 /* build collator tertiary */
5088 myCollation
= ucol_open("", &status
);
5089 ucol_setStrength(myCollation
, UCOL_TERTIARY
);
5090 if(U_FAILURE(status
)) {
5091 log_err_status(status
, "ERROR: in creation of collator: %s\n", myErrorName(status
));
5095 /* set the reorderding */
5096 ucol_setReorderCodes(myCollation
, reorderCodes
, UPRV_LENGTHOF(reorderCodes
), &status
);
5097 if (U_FAILURE(status
)) {
5098 log_err_status(status
, "ERROR: setting reorder codes: %s\n", myErrorName(status
));
5102 /* clone the collator */
5103 clonedCollation
= ucol_safeClone(myCollation
, NULL
, NULL
, &status
);
5104 if (U_FAILURE(status
)) {
5105 log_err_status(status
, "ERROR: cloning collator: %s\n", myErrorName(status
));
5109 /* get the reordering */
5110 retrievedReorderCodesLength
= ucol_getReorderCodes(clonedCollation
, retrievedReorderCodes
, UPRV_LENGTHOF(retrievedReorderCodes
), &status
);
5111 if (U_FAILURE(status
)) {
5112 log_err_status(status
, "ERROR: getting reorder codes: %s\n", myErrorName(status
));
5115 if (retrievedReorderCodesLength
!= UPRV_LENGTHOF(reorderCodes
)) {
5116 log_err_status(status
, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength
, UPRV_LENGTHOF(reorderCodes
));
5119 for (loopIndex
= 0; loopIndex
< retrievedReorderCodesLength
; loopIndex
++) {
5120 if (retrievedReorderCodes
[loopIndex
] != reorderCodes
[loopIndex
]) {
5121 log_err_status(status
, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex
);
5126 /*uprv_free(buffer);*/
5127 ucol_close(myCollation
);
5128 ucol_close(clonedCollation
);
5132 * Utility function to test one collation reordering test case set.
5133 * @param testcases Array of test cases.
5134 * @param n_testcases Size of the array testcases.
5135 * @param reorderTokens Array of reordering codes.
5136 * @param reorderTokensLen Size of the array reorderTokens.
5138 static void doTestOneReorderingAPITestCase(const OneTestCase testCases
[], uint32_t testCasesLen
, const int32_t reorderTokens
[], int32_t reorderTokensLen
)
5140 uint32_t testCaseNum
;
5141 UErrorCode status
= U_ZERO_ERROR
;
5142 UCollator
*myCollation
;
5144 myCollation
= ucol_open("", &status
);
5145 if (U_FAILURE(status
)) {
5146 log_err_status(status
, "ERROR: in creation of collator: %s\n", myErrorName(status
));
5149 ucol_setReorderCodes(myCollation
, reorderTokens
, reorderTokensLen
, &status
);
5150 if(U_FAILURE(status
)) {
5151 log_err_status(status
, "ERROR: while setting script order: %s\n", myErrorName(status
));
5155 for (testCaseNum
= 0; testCaseNum
< testCasesLen
; ++testCaseNum
) {
5157 testCases
[testCaseNum
].source
,
5158 testCases
[testCaseNum
].target
,
5159 testCases
[testCaseNum
].result
5162 ucol_close(myCollation
);
5165 static void TestGreekFirstReorder(void)
5167 const char* strRules
[] = {
5171 const int32_t apiRules
[] = {
5175 const static OneTestCase privateUseCharacterStrings
[] = {
5176 { {0x0391}, {0x0391}, UCOL_EQUAL
},
5177 { {0x0041}, {0x0391}, UCOL_GREATER
},
5178 { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_GREATER
},
5179 { {0x0060}, {0x0391}, UCOL_LESS
},
5180 { {0x0391}, {0xe2dc}, UCOL_LESS
},
5181 { {0x0391}, {0x0060}, UCOL_GREATER
},
5184 /* Test rules creation */
5185 doTestOneTestCase(privateUseCharacterStrings
, UPRV_LENGTHOF(privateUseCharacterStrings
), strRules
, UPRV_LENGTHOF(strRules
));
5187 /* Test collation reordering API */
5188 doTestOneReorderingAPITestCase(privateUseCharacterStrings
, UPRV_LENGTHOF(privateUseCharacterStrings
), apiRules
, UPRV_LENGTHOF(apiRules
));
5191 static void TestGreekLastReorder(void)
5193 const char* strRules
[] = {
5194 "[reorder Zzzz Grek]"
5197 const int32_t apiRules
[] = {
5198 USCRIPT_UNKNOWN
, USCRIPT_GREEK
5201 const static OneTestCase privateUseCharacterStrings
[] = {
5202 { {0x0391}, {0x0391}, UCOL_EQUAL
},
5203 { {0x0041}, {0x0391}, UCOL_LESS
},
5204 { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_LESS
},
5205 { {0x0060}, {0x0391}, UCOL_LESS
},
5206 { {0x0391}, {0xe2dc}, UCOL_GREATER
},
5209 /* Test rules creation */
5210 doTestOneTestCase(privateUseCharacterStrings
, UPRV_LENGTHOF(privateUseCharacterStrings
), strRules
, UPRV_LENGTHOF(strRules
));
5212 /* Test collation reordering API */
5213 doTestOneReorderingAPITestCase(privateUseCharacterStrings
, UPRV_LENGTHOF(privateUseCharacterStrings
), apiRules
, UPRV_LENGTHOF(apiRules
));
5216 static void TestNonScriptReorder(void)
5218 const char* strRules
[] = {
5219 "[reorder Grek Symbol DIGIT Latn Punct space Zzzz cURRENCy]"
5222 const int32_t apiRules
[] = {
5223 USCRIPT_GREEK
, UCOL_REORDER_CODE_SYMBOL
, UCOL_REORDER_CODE_DIGIT
, USCRIPT_LATIN
,
5224 UCOL_REORDER_CODE_PUNCTUATION
, UCOL_REORDER_CODE_SPACE
, USCRIPT_UNKNOWN
,
5225 UCOL_REORDER_CODE_CURRENCY
5228 const static OneTestCase privateUseCharacterStrings
[] = {
5229 { {0x0391}, {0x0041}, UCOL_LESS
},
5230 { {0x0041}, {0x0391}, UCOL_GREATER
},
5231 { {0x0060}, {0x0041}, UCOL_LESS
},
5232 { {0x0060}, {0x0391}, UCOL_GREATER
},
5233 { {0x0024}, {0x0041}, UCOL_GREATER
},
5236 /* Test rules creation */
5237 doTestOneTestCase(privateUseCharacterStrings
, UPRV_LENGTHOF(privateUseCharacterStrings
), strRules
, UPRV_LENGTHOF(strRules
));
5239 /* Test collation reordering API */
5240 doTestOneReorderingAPITestCase(privateUseCharacterStrings
, UPRV_LENGTHOF(privateUseCharacterStrings
), apiRules
, UPRV_LENGTHOF(apiRules
));
5243 static void TestHaniReorder(void)
5245 const char* strRules
[] = {
5248 const int32_t apiRules
[] = {
5252 const static OneTestCase privateUseCharacterStrings
[] = {
5253 { {0x4e00}, {0x0041}, UCOL_LESS
},
5254 { {0x4e00}, {0x0060}, UCOL_GREATER
},
5255 { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS
},
5256 { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER
},
5257 { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS
},
5258 { {0xfa27}, {0x0041}, UCOL_LESS
},
5259 { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS
},
5262 /* Test rules creation */
5263 doTestOneTestCase(privateUseCharacterStrings
, UPRV_LENGTHOF(privateUseCharacterStrings
), strRules
, UPRV_LENGTHOF(strRules
));
5265 /* Test collation reordering API */
5266 doTestOneReorderingAPITestCase(privateUseCharacterStrings
, UPRV_LENGTHOF(privateUseCharacterStrings
), apiRules
, UPRV_LENGTHOF(apiRules
));
5269 static void TestHaniReorderWithOtherRules(void)
5271 const char* strRules
[] = {
5272 "[reorder Hani] &b<a"
5274 /*const int32_t apiRules[] = {
5278 const static OneTestCase privateUseCharacterStrings
[] = {
5279 { {0x4e00}, {0x0041}, UCOL_LESS
},
5280 { {0x4e00}, {0x0060}, UCOL_GREATER
},
5281 { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS
},
5282 { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER
},
5283 { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS
},
5284 { {0xfa27}, {0x0041}, UCOL_LESS
},
5285 { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS
},
5286 { {0x0062}, {0x0061}, UCOL_LESS
},
5289 /* Test rules creation */
5290 doTestOneTestCase(privateUseCharacterStrings
, UPRV_LENGTHOF(privateUseCharacterStrings
), strRules
, UPRV_LENGTHOF(strRules
));
5293 static void TestMultipleReorder(void)
5295 const char* strRules
[] = {
5296 "[reorder Grek Zzzz DIGIT Latn Hani]"
5299 const int32_t apiRules
[] = {
5300 USCRIPT_GREEK
, USCRIPT_UNKNOWN
, UCOL_REORDER_CODE_DIGIT
, USCRIPT_LATIN
, USCRIPT_HAN
5303 const static OneTestCase collationTestCases
[] = {
5304 { {0x0391}, {0x0041}, UCOL_LESS
},
5305 { {0x0031}, {0x0041}, UCOL_LESS
},
5306 { {0x0041}, {0x4e00}, UCOL_LESS
},
5309 /* Test rules creation */
5310 doTestOneTestCase(collationTestCases
, UPRV_LENGTHOF(collationTestCases
), strRules
, UPRV_LENGTHOF(strRules
));
5312 /* Test collation reordering API */
5313 doTestOneReorderingAPITestCase(collationTestCases
, UPRV_LENGTHOF(collationTestCases
), apiRules
, UPRV_LENGTHOF(apiRules
));
5317 * Test that covers issue reported in ticket 8814
5319 static void TestReorderWithNumericCollation(void)
5321 UErrorCode status
= U_ZERO_ERROR
;
5322 UCollator
*myCollation
;
5323 UCollator
*myReorderCollation
;
5324 int32_t reorderCodes
[] = {UCOL_REORDER_CODE_SPACE
, UCOL_REORDER_CODE_PUNCTUATION
, UCOL_REORDER_CODE_SYMBOL
, UCOL_REORDER_CODE_DIGIT
, USCRIPT_GREEK
,USCRIPT_LATIN
, USCRIPT_HEBREW
, UCOL_REORDER_CODE_OTHERS
};
5325 /* UChar fortyS[] = { 0x0034, 0x0030, 0x0053 };
5326 UChar fortyThreeP[] = { 0x0034, 0x0033, 0x0050 }; */
5327 UChar fortyS
[] = { 0x0053 };
5328 UChar fortyThreeP
[] = { 0x0050 };
5329 uint8_t fortyS_sortKey
[128];
5330 int32_t fortyS_sortKey_Length
;
5331 uint8_t fortyThreeP_sortKey
[128];
5332 int32_t fortyThreeP_sortKey_Length
;
5333 uint8_t fortyS_sortKey_reorder
[128];
5334 int32_t fortyS_sortKey_reorder_Length
;
5335 uint8_t fortyThreeP_sortKey_reorder
[128];
5336 int32_t fortyThreeP_sortKey_reorder_Length
;
5337 UCollationResult collResult
;
5338 UCollationResult collResultReorder
;
5340 log_verbose("Testing reordering with and without numeric collation\n");
5342 /* build collator tertiary with numeric */
5343 myCollation
= ucol_open("", &status
);
5345 ucol_setStrength(myCollation, UCOL_TERTIARY);
5347 ucol_setAttribute(myCollation
, UCOL_NUMERIC_COLLATION
, UCOL_ON
, &status
);
5348 if(U_FAILURE(status
)) {
5349 log_err_status(status
, "ERROR: in creation of collator: %s\n", myErrorName(status
));
5353 /* build collator tertiary with numeric and reordering */
5354 myReorderCollation
= ucol_open("", &status
);
5356 ucol_setStrength(myReorderCollation, UCOL_TERTIARY);
5358 ucol_setAttribute(myReorderCollation
, UCOL_NUMERIC_COLLATION
, UCOL_ON
, &status
);
5359 ucol_setReorderCodes(myReorderCollation
, reorderCodes
, UPRV_LENGTHOF(reorderCodes
), &status
);
5360 if(U_FAILURE(status
)) {
5361 log_err_status(status
, "ERROR: in creation of collator: %s\n", myErrorName(status
));
5365 fortyS_sortKey_Length
= ucol_getSortKey(myCollation
, fortyS
, UPRV_LENGTHOF(fortyS
), fortyS_sortKey
, 128);
5366 fortyThreeP_sortKey_Length
= ucol_getSortKey(myCollation
, fortyThreeP
, UPRV_LENGTHOF(fortyThreeP
), fortyThreeP_sortKey
, 128);
5367 fortyS_sortKey_reorder_Length
= ucol_getSortKey(myReorderCollation
, fortyS
, UPRV_LENGTHOF(fortyS
), fortyS_sortKey_reorder
, 128);
5368 fortyThreeP_sortKey_reorder_Length
= ucol_getSortKey(myReorderCollation
, fortyThreeP
, UPRV_LENGTHOF(fortyThreeP
), fortyThreeP_sortKey_reorder
, 128);
5370 if (fortyS_sortKey_Length
< 0 || fortyThreeP_sortKey_Length
< 0 || fortyS_sortKey_reorder_Length
< 0 || fortyThreeP_sortKey_reorder_Length
< 0) {
5371 log_err_status(status
, "ERROR: couldn't generate sort keys\n");
5374 collResult
= ucol_strcoll(myCollation
, fortyS
, UPRV_LENGTHOF(fortyS
), fortyThreeP
, UPRV_LENGTHOF(fortyThreeP
));
5375 collResultReorder
= ucol_strcoll(myReorderCollation
, fortyS
, UPRV_LENGTHOF(fortyS
), fortyThreeP
, UPRV_LENGTHOF(fortyThreeP
));
5377 fprintf(stderr, "\tcollResult = %x\n", collResult);
5378 fprintf(stderr, "\tcollResultReorder = %x\n", collResultReorder);
5379 fprintf(stderr, "\nfortyS\n");
5380 for (i = 0; i < fortyS_sortKey_Length; i++) {
5381 fprintf(stderr, "%x --- %x\n", fortyS_sortKey[i], fortyS_sortKey_reorder[i]);
5383 fprintf(stderr, "\nfortyThreeP\n");
5384 for (i = 0; i < fortyThreeP_sortKey_Length; i++) {
5385 fprintf(stderr, "%x --- %x\n", fortyThreeP_sortKey[i], fortyThreeP_sortKey_reorder[i]);
5388 if (collResult
!= collResultReorder
) {
5389 log_err_status(status
, "ERROR: collation results should have been the same.\n");
5393 ucol_close(myCollation
);
5394 ucol_close(myReorderCollation
);
5397 static int compare_uint8_t_arrays(const uint8_t* a
, const uint8_t* b
)
5399 for (; *a
== *b
; ++a
, ++b
) {
5404 return (*a
< *b
? -1 : 1);
5407 static void TestImportRulesDeWithPhonebook(void)
5409 const char* normalRules
[] = {
5410 "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc",
5411 "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc",
5412 "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc",
5414 const OneTestCase normalTests
[] = {
5415 { {0x00e6}, {0x00c6}, UCOL_LESS
},
5416 { {0x00fc}, {0x00dc}, UCOL_GREATER
},
5419 const char* importRules
[] = {
5420 "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc[import de-u-co-phonebk]",
5421 "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
5422 "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
5424 const OneTestCase importTests
[] = {
5425 { {0x00e6}, {0x00c6}, UCOL_LESS
},
5426 { {0x00fc}, {0x00dc}, UCOL_LESS
},
5429 doTestOneTestCase(normalTests
, UPRV_LENGTHOF(normalTests
), normalRules
, UPRV_LENGTHOF(normalRules
));
5430 doTestOneTestCase(importTests
, UPRV_LENGTHOF(importTests
), importRules
, UPRV_LENGTHOF(importRules
));
5434 static void TestImportRulesFiWithEor(void)
5437 const char* defaultRules
[] = {
5438 "&a<b", /* Dummy rule. */
5441 const OneTestCase defaultTests
[] = {
5442 { {0x0110}, {0x00F0}, UCOL_LESS
},
5443 { {0x00a3}, {0x00a5}, UCOL_LESS
},
5444 { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS
},
5447 /* European Ordering rules: ignore currency characters. */
5448 const char* eorRules
[] = {
5449 "[import root-u-co-eor]",
5452 const OneTestCase eorTests
[] = {
5453 { {0x0110}, {0x00F0}, UCOL_LESS
},
5454 { {0x00a3}, {0x00a5}, UCOL_EQUAL
},
5455 { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL
},
5458 const char* fiStdRules
[] = {
5459 "[import fi-u-co-standard]",
5462 const OneTestCase fiStdTests
[] = {
5463 { {0x0110}, {0x00F0}, UCOL_GREATER
},
5464 { {0x00a3}, {0x00a5}, UCOL_LESS
},
5465 { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS
},
5468 /* Both European Ordering Rules and Fi Standard Rules. */
5469 const char* eorFiStdRules
[] = {
5470 "[import root-u-co-eor][import fi-u-co-standard]",
5473 /* This is essentially same as the one before once fi.txt is updated with import. */
5474 const char* fiEorRules
[] = {
5475 "[import fi-u-co-eor]",
5478 const OneTestCase fiEorTests
[] = {
5479 { {0x0110}, {0x00F0}, UCOL_GREATER
},
5480 { {0x00a3}, {0x00a5}, UCOL_EQUAL
},
5481 { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL
},
5484 doTestOneTestCase(defaultTests
, UPRV_LENGTHOF(defaultTests
), defaultRules
, UPRV_LENGTHOF(defaultRules
));
5485 doTestOneTestCase(eorTests
, UPRV_LENGTHOF(eorTests
), eorRules
, UPRV_LENGTHOF(eorRules
));
5486 doTestOneTestCase(fiStdTests
, UPRV_LENGTHOF(fiStdTests
), fiStdRules
, UPRV_LENGTHOF(fiStdRules
));
5487 doTestOneTestCase(fiEorTests
, UPRV_LENGTHOF(fiEorTests
), eorFiStdRules
, UPRV_LENGTHOF(eorFiStdRules
));
5489 log_knownIssue("8962", NULL
);
5490 /* TODO: Fix ICU ticket #8962 by uncommenting the following test after fi.txt is updated with the following rule:
5493 "[import root-u-co-eor][import fi-u-co-standard]"
5498 /* doTestOneTestCase(fiEorTests, UPRV_LENGTHOF(fiEorTests), fiEorRules, UPRV_LENGTHOF(fiEorRules)); */
5505 * This test case tests inclusion with the unihan rules, but this cannot be included now, unless
5506 * the resource files are built with -includeUnihanColl option.
5507 * TODO: Uncomment this function and make it work when unihan rules are built by default.
5509 static void TestImportRulesCJKWithUnihan(void)
5512 const char* defaultRules
[] = {
5513 "&a<b", /* Dummy rule. */
5516 const OneTestCase defaultTests
[] = {
5517 { {0x3402}, {0x4e1e}, UCOL_GREATER
},
5520 /* European Ordering rules: ignore currency characters. */
5521 const char* unihanRules
[] = {
5522 "[import ko-u-co-unihan]",
5525 const OneTestCase unihanTests
[] = {
5526 { {0x3402}, {0x4e1e}, UCOL_LESS
},
5529 doTestOneTestCase(defaultTests
, UPRV_LENGTHOF(defaultTests
), defaultRules
, UPRV_LENGTHOF(defaultRules
));
5530 doTestOneTestCase(unihanTests
, UPRV_LENGTHOF(unihanTests
), unihanRules
, UPRV_LENGTHOF(unihanRules
));
5535 static void TestImport(void)
5539 UCollator
* viescoll
;
5540 UCollator
* importviescoll
;
5542 UErrorCode status
= U_ZERO_ERROR
;
5544 int32_t viruleslength
;
5546 int32_t esruleslength
;
5548 int32_t viesruleslength
;
5549 char srules
[500] = "[import vi][import es]";
5551 uint32_t length
= 0;
5564 USet
* importTailoredSet
;
5567 vicoll
= ucol_open("vi", &status
);
5568 if(U_FAILURE(status
)){
5569 log_err_status(status
, "ERROR: Call ucol_open(\"vi\", ...): %s\n", myErrorName(status
));
5573 virules
= (UChar
*) ucol_getRules(vicoll
, &viruleslength
);
5574 if(viruleslength
== 0) {
5575 log_data_err("missing vi tailoring rule string\n");
5579 escoll
= ucol_open("es", &status
);
5580 esrules
= (UChar
*) ucol_getRules(escoll
, &esruleslength
);
5581 viesrules
= (UChar
*)uprv_malloc((viruleslength
+esruleslength
+1)*sizeof(UChar
*));
5583 u_strcat(viesrules
, virules
);
5584 u_strcat(viesrules
, esrules
);
5585 viesruleslength
= viruleslength
+ esruleslength
;
5586 viescoll
= ucol_openRules(viesrules
, viesruleslength
, UCOL_ON
, UCOL_TERTIARY
, &error
, &status
);
5588 /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
5589 length
= u_unescape(srules
, rules
, 500);
5590 importviescoll
= ucol_openRules(rules
, length
, UCOL_ON
, UCOL_TERTIARY
, &error
, &status
);
5591 if(U_FAILURE(status
)){
5592 log_err_status(status
, "ERROR: in creation of rule based collator: %s\n", myErrorName(status
));
5596 tailoredSet
= ucol_getTailoredSet(viescoll
, &status
);
5597 importTailoredSet
= ucol_getTailoredSet(importviescoll
, &status
);
5599 if(!uset_equals(tailoredSet
, importTailoredSet
)){
5600 log_err("Tailored sets not equal");
5603 uset_close(importTailoredSet
);
5605 itemCount
= uset_getItemCount(tailoredSet
);
5607 for( i
= 0; i
< itemCount
; i
++){
5608 strLength
= uset_getItem(tailoredSet
, i
, &start
, &end
, str
, 500, &status
);
5610 for (; start
<= end
; start
++){
5612 U16_APPEND(str
, k
, 500, start
, b
);
5613 (void)b
; /* Suppress set but not used warning. */
5614 ucol_getSortKey(viescoll
, str
, 1, sk1
, 500);
5615 ucol_getSortKey(importviescoll
, str
, 1, sk2
, 500);
5616 if(compare_uint8_t_arrays(sk1
, sk2
) != 0){
5617 log_err("Sort key for %s not equal\n", str
);
5622 ucol_getSortKey(viescoll
, str
, strLength
, sk1
, 500);
5623 ucol_getSortKey(importviescoll
, str
, strLength
, sk2
, 500);
5624 if(compare_uint8_t_arrays(sk1
, sk2
) != 0){
5625 log_err("ZZSort key for %s not equal\n", str
);
5632 uset_close(tailoredSet
);
5634 uprv_free(viesrules
);
5638 ucol_close(viescoll
);
5639 ucol_close(importviescoll
);
5642 static void TestImportWithType(void)
5646 UCollator
* videcoll
;
5647 UCollator
* importvidecoll
;
5649 UErrorCode status
= U_ZERO_ERROR
;
5650 const UChar
* virules
;
5651 int32_t viruleslength
;
5652 const UChar
* derules
;
5653 int32_t deruleslength
;
5655 int32_t videruleslength
;
5656 const char srules
[500] = "[import vi][import de-u-co-phonebk]";
5658 uint32_t length
= 0;
5670 USet
* importTailoredSet
;
5672 vicoll
= ucol_open("vi", &status
);
5673 if(U_FAILURE(status
)){
5674 log_err_status(status
, "ERROR: in creation of rule based collator: %s\n", myErrorName(status
));
5677 virules
= ucol_getRules(vicoll
, &viruleslength
);
5678 if(viruleslength
== 0) {
5679 log_data_err("missing vi tailoring rule string\n");
5683 /* decoll = ucol_open("de@collation=phonebook", &status); */
5684 decoll
= ucol_open("de-u-co-phonebk", &status
);
5685 if(U_FAILURE(status
)){
5686 log_err_status(status
, "ERROR: in creation of rule based collator: %s\n", myErrorName(status
));
5691 derules
= ucol_getRules(decoll
, &deruleslength
);
5692 viderules
= (UChar
*)uprv_malloc((viruleslength
+deruleslength
+1)*sizeof(UChar
*));
5694 u_strcat(viderules
, virules
);
5695 u_strcat(viderules
, derules
);
5696 videruleslength
= viruleslength
+ deruleslength
;
5697 videcoll
= ucol_openRules(viderules
, videruleslength
, UCOL_ON
, UCOL_TERTIARY
, &error
, &status
);
5699 /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
5700 length
= u_unescape(srules
, rules
, 500);
5701 importvidecoll
= ucol_openRules(rules
, length
, UCOL_ON
, UCOL_TERTIARY
, &error
, &status
);
5702 if(U_FAILURE(status
)){
5703 log_err_status(status
, "ERROR: in creation of rule based collator: %s\n", myErrorName(status
));
5707 tailoredSet
= ucol_getTailoredSet(videcoll
, &status
);
5708 importTailoredSet
= ucol_getTailoredSet(importvidecoll
, &status
);
5710 if(!uset_equals(tailoredSet
, importTailoredSet
)){
5711 log_err("Tailored sets not equal");
5714 uset_close(importTailoredSet
);
5716 itemCount
= uset_getItemCount(tailoredSet
);
5718 for( i
= 0; i
< itemCount
; i
++){
5719 strLength
= uset_getItem(tailoredSet
, i
, &start
, &end
, str
, 500, &status
);
5721 for (; start
<= end
; start
++){
5723 U16_APPEND_UNSAFE(str
, k
, start
);
5724 ucol_getSortKey(videcoll
, str
, 1, sk1
, 500);
5725 ucol_getSortKey(importvidecoll
, str
, 1, sk2
, 500);
5726 if(compare_uint8_t_arrays(sk1
, sk2
) != 0){
5727 log_err("Sort key for %s not equal\n", str
);
5732 ucol_getSortKey(videcoll
, str
, strLength
, sk1
, 500);
5733 ucol_getSortKey(importvidecoll
, str
, strLength
, sk2
, 500);
5734 if(compare_uint8_t_arrays(sk1
, sk2
) != 0){
5735 log_err("Sort key for %s not equal\n", str
);
5742 uset_close(tailoredSet
);
5744 uprv_free(viderules
);
5746 ucol_close(videcoll
);
5747 ucol_close(importvidecoll
);
5752 /* 'IV INTERNATIONAL SCIENTIFIC - PRACTICAL CONFERENCE "GEOPOLITICS, GEOECONOMICS AND INTERNATIONAL RELATIONS PROBLEMS" 22-23 June 2010, St. Petersburg, Russia' */
5753 static const UChar longUpperStr1
[]= { /* 155 chars */
5754 0x49, 0x56, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C,
5755 0x20, 0x53, 0x43, 0x49, 0x45, 0x4E, 0x54, 0x49, 0x46, 0x49, 0x43, 0x20, 0x2D, 0x20, 0x50, 0x52,
5756 0x41, 0x43, 0x54, 0x49, 0x43, 0x41, 0x4C, 0x20, 0x43, 0x4F, 0x4E, 0x46, 0x45, 0x52, 0x45, 0x4E,
5757 0x43, 0x45, 0x20, 0x22, 0x47, 0x45, 0x4F, 0x50, 0x4F, 0x4C, 0x49, 0x54, 0x49, 0x43, 0x53, 0x2C,
5758 0x20, 0x47, 0x45, 0x4F, 0x45, 0x43, 0x4F, 0x4E, 0x4F, 0x4D, 0x49, 0x43, 0x53, 0x20, 0x41, 0x4E,
5759 0x44, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C, 0x20,
5760 0x52, 0x45, 0x4C, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x53, 0x20, 0x50, 0x52, 0x4F, 0x42, 0x4C, 0x45,
5761 0x4D, 0x53, 0x22, 0x20, 0x32, 0x32, 0x2D, 0x32, 0x33, 0x20, 0x4A, 0x75, 0x6E, 0x65, 0x20, 0x32,
5762 0x30, 0x31, 0x30, 0x2C, 0x20, 0x53, 0x74, 0x2E, 0x20, 0x50, 0x65, 0x74, 0x65, 0x72, 0x73, 0x62,
5763 0x75, 0x72, 0x67, 0x2C, 0x20, 0x52, 0x75, 0x73, 0x73, 0x69, 0x61
5766 /* 'BACEDIFOGUHAJEKILOMUNAPE ' with diacritics on vowels, repeated 5 times */
5767 static const UChar longUpperStr2
[]= { /* 125 chars, > 128 collation elements */
5768 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5769 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5770 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5771 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5772 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20
5775 /* 'ABCDEFGHIJKLMNOPQRSTUVWXYZ ' repeated 12 times */
5776 static const UChar longUpperStr3
[]= { /* 324 chars */
5777 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5778 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5779 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5780 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5781 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5782 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5783 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5784 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5785 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5786 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5787 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5788 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20
5792 const UChar
* longUpperStrPtr
;
5793 int32_t longUpperStrLen
;
5796 /* String pointers must be in reverse collation order of the corresponding strings */
5797 static const LongUpperStrItem longUpperStrItems
[] = {
5798 { longUpperStr1
, UPRV_LENGTHOF(longUpperStr1
) },
5799 { longUpperStr2
, UPRV_LENGTHOF(longUpperStr2
) },
5800 { longUpperStr3
, UPRV_LENGTHOF(longUpperStr3
) },
5804 enum { kCollKeyLenMax
= 850 }; /* may change with collation changes */
5806 /* Text fix for #8445; without fix, could have crash due to stack or heap corruption */
5807 static void TestCaseLevelBufferOverflow(void)
5809 UErrorCode status
= U_ZERO_ERROR
;
5810 UCollator
* ucol
= ucol_open("root", &status
);
5811 if ( U_SUCCESS(status
) ) {
5812 ucol_setAttribute(ucol
, UCOL_CASE_LEVEL
, UCOL_ON
, &status
);
5813 if ( U_SUCCESS(status
) ) {
5814 const LongUpperStrItem
* itemPtr
;
5815 uint8_t sortKeyA
[kCollKeyLenMax
], sortKeyB
[kCollKeyLenMax
];
5816 for ( itemPtr
= longUpperStrItems
; itemPtr
->longUpperStrPtr
!= NULL
; itemPtr
++ ) {
5818 if (itemPtr
> longUpperStrItems
) {
5819 uprv_strcpy((char *)sortKeyB
, (char *)sortKeyA
);
5821 sortKeyLen
= ucol_getSortKey(ucol
, itemPtr
->longUpperStrPtr
, itemPtr
->longUpperStrLen
, sortKeyA
, kCollKeyLenMax
);
5822 if (sortKeyLen
<= 0 || sortKeyLen
> kCollKeyLenMax
) {
5823 log_err("ERROR sort key length from ucol_getSortKey is %d\n", sortKeyLen
);
5826 if ( itemPtr
> longUpperStrItems
) {
5827 int compareResult
= uprv_strcmp((char *)sortKeyA
, (char *)sortKeyB
);
5828 if (compareResult
>= 0) {
5829 log_err("ERROR in sort key comparison result, expected -1, got %d\n", compareResult
);
5834 log_err_status(status
, "ERROR in ucol_setAttribute UCOL_CASE_LEVEL on: %s\n", myErrorName(status
));
5838 log_err_status(status
, "ERROR in ucol_open for root: %s\n", myErrorName(status
));
5842 /* Test for #10595 */
5843 static const UChar testJapaneseName
[] = {0x4F50, 0x3005, 0x6728, 0x002C, 0x6B66, 0}; /* Sa sa Ki, Takeshi */
5844 #define KEY_PART_SIZE 16
5846 static void TestNextSortKeyPartJaIdentical(void)
5848 UErrorCode status
= U_ZERO_ERROR
;
5850 uint8_t keyPart
[KEY_PART_SIZE
];
5852 uint32_t state
[2] = {0, 0};
5855 coll
= ucol_open("ja", &status
);
5856 ucol_setAttribute(coll
, UCOL_STRENGTH
, UCOL_IDENTICAL
, &status
);
5857 if (U_FAILURE(status
)) {
5858 log_err_status(status
, "ERROR: in creation of Japanese collator with identical strength: %s\n", myErrorName(status
));
5862 uiter_setString(&iter
, testJapaneseName
, 5);
5863 keyPartLen
= KEY_PART_SIZE
;
5864 while (keyPartLen
== KEY_PART_SIZE
) {
5865 keyPartLen
= ucol_nextSortKeyPart(coll
, &iter
, state
, keyPart
, KEY_PART_SIZE
, &status
);
5866 if (U_FAILURE(status
)) {
5867 log_err_status(status
, "ERROR: in iterating next sort key part: %s\n", myErrorName(status
));
5875 #define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x)
5877 void addMiscCollTest(TestNode
** root
)
5879 TEST(TestRuleOptions
);
5880 TEST(TestBeforePrefixFailure
);
5881 TEST(TestContractionClosure
);
5882 TEST(TestPrefixCompose
);
5883 TEST(TestStrCollIdenticalPrefix
);
5885 TEST(TestNewJapanese
);
5886 /*TEST(TestLimitations);*/
5888 TEST(TestExtremeCompression
);
5889 TEST(TestSurrogates
);
5890 TEST(TestVariableTopSetting
);
5891 TEST(TestMaxVariable
);
5892 TEST(TestBocsuCoverage
);
5893 TEST(TestCyrillicTailoring
);
5895 TEST(IncompleteCntTest
);
5896 TEST(BlackBirdTest
);
5898 TEST(BillFairmanTest
);
5900 TEST(TestImplicitTailoring
);
5901 TEST(TestFCDProblem
);
5902 TEST(TestEmptyRule
);
5903 /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */
5905 TEST(TestUpperCaseFirst
);
5907 TEST(TestHangulTailoring
);
5909 TEST(TestIncrementalNormalize
);
5910 TEST(TestComposeDecompose
);
5911 TEST(TestCompressOverlap
);
5912 TEST(TestContraction
);
5913 TEST(TestExpansion
);
5914 /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys */
5915 /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported */
5917 TEST(TestSuppressContractions
);
5919 TEST(TestHebrewUCA
);
5920 TEST(TestPartialSortKeyTermination
);
5925 TEST(TestNumericCollation
);
5926 TEST(TestTibetanConformance
);
5927 TEST(TestPinyinProblem
);
5928 TEST(TestSeparateTrees
);
5929 TEST(TestBeforePinyin
);
5930 TEST(TestBeforeTightening
);
5931 /*TEST(TestMoreBefore);*/
5932 TEST(TestTailorNULL
);
5933 TEST(TestUpperFirstQuaternary
);
5939 TEST(TestSortKeyConsistency
);
5940 TEST(TestVI5913
); /* VI, RO tailored rules */
5941 TEST(TestCroatianSortKey
);
5942 TEST(TestTailor6179
);
5943 TEST(TestUCAPrecontext
);
5944 TEST(TestOutOfBuffer5468
);
5945 TEST(TestSameStrengthList
);
5947 TEST(TestSameStrengthListQuoted
);
5948 TEST(TestSameStrengthListSupplemental
);
5949 TEST(TestSameStrengthListQwerty
);
5950 TEST(TestSameStrengthListQuotedQwerty
);
5951 TEST(TestSameStrengthListRanges
);
5952 TEST(TestSameStrengthListSupplementalRanges
);
5953 TEST(TestSpecialCharacters
);
5954 TEST(TestPrivateUseCharacters
);
5955 TEST(TestPrivateUseCharactersInList
);
5956 TEST(TestPrivateUseCharactersInRange
);
5957 TEST(TestInvalidListsAndRanges
);
5958 TEST(TestImportRulesDeWithPhonebook
);
5959 /* TEST(TestImportRulesFiWithEor); EOR rules removed from CLDR 21 */
5960 /* TEST(TestImportRulesCJKWithUnihan); */
5962 TEST(TestImportWithType
);
5964 TEST(TestBeforeRuleWithScriptReordering
);
5965 TEST(TestNonLeadBytesDuringCollationReordering
);
5966 TEST(TestReorderingAPI
);
5967 TEST(TestReorderingAPIWithRuleCreatedCollator
);
5968 TEST(TestEquivalentReorderingScripts
);
5969 TEST(TestGreekFirstReorder
);
5970 TEST(TestGreekLastReorder
);
5971 TEST(TestNonScriptReorder
);
5972 TEST(TestHaniReorder
);
5973 TEST(TestHaniReorderWithOtherRules
);
5974 TEST(TestMultipleReorder
);
5975 TEST(TestReorderingAcrossCloning
);
5976 TEST(TestReorderWithNumericCollation
);
5978 TEST(TestCaseLevelBufferOverflow
);
5979 TEST(TestNextSortKeyPartJaIdentical
);
5982 #endif /* #if !UCONFIG_NO_COLLATION */