1 /********************************************************************
3 * Copyright (c) 2001-2016, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /*******************************************************************************
10 *******************************************************************************/
12 * These are the tests specific to ICU 1.8 and above, that I didn't know where
18 #include "unicode/utypes.h"
20 #if !UCONFIG_NO_COLLATION
22 #include "unicode/ucol.h"
23 #include "unicode/ucoleitr.h"
24 #include "unicode/uloc.h"
28 #include "unicode/ustring.h"
34 #include "unicode/parseerr.h"
35 #include "unicode/ucnv.h"
36 #include "unicode/ures.h"
37 #include "unicode/uscript.h"
38 #include "unicode/utf16.h"
43 #define MAX_TOKEN_LEN 16
45 typedef UCollationResult
tst_strcoll(void *collator
, const int object
,
46 const UChar
*source
, const int sLen
,
47 const UChar
*target
, const int tLen
);
51 const static char cnt1
[][10] = {
66 const static char cnt2
[][10] = {
78 static void IncompleteCntTest(void)
80 UErrorCode status
= U_ZERO_ERROR
;
85 UCollator
*coll
= NULL
;
86 uint32_t i
= 0, j
= 0;
89 u_uastrcpy(temp
, " & Z < ABC < Q < B");
91 coll
= ucol_openRules(temp
, u_strlen(temp
), UCOL_OFF
, UCOL_DEFAULT_STRENGTH
, NULL
,&status
);
93 if(U_SUCCESS(status
)) {
94 size
= UPRV_LENGTHOF(cnt1
);
95 for(i
= 0; i
< size
-1; i
++) {
96 for(j
= i
+1; j
< size
; j
++) {
97 UCollationElements
*iter
;
98 u_uastrcpy(t1
, cnt1
[i
]);
99 u_uastrcpy(t2
, cnt1
[j
]);
100 doTest(coll
, t1
, t2
, UCOL_LESS
);
101 /* synwee : added collation element iterator test */
102 iter
= ucol_openElements(coll
, t2
, u_strlen(t2
), &status
);
103 if (U_FAILURE(status
)) {
104 log_err("Creation of iterator failed\n");
108 ucol_closeElements(iter
);
116 u_uastrcpy(temp
, " & Z < DAVIS < MARK <DAV");
117 coll
= ucol_openRules(temp
, u_strlen(temp
), UCOL_OFF
, UCOL_DEFAULT_STRENGTH
,NULL
, &status
);
119 if(U_SUCCESS(status
)) {
120 size
= UPRV_LENGTHOF(cnt2
);
121 for(i
= 0; i
< size
-1; i
++) {
122 for(j
= i
+1; j
< size
; j
++) {
123 UCollationElements
*iter
;
124 u_uastrcpy(t1
, cnt2
[i
]);
125 u_uastrcpy(t2
, cnt2
[j
]);
126 doTest(coll
, t1
, t2
, UCOL_LESS
);
128 /* synwee : added collation element iterator test */
129 iter
= ucol_openElements(coll
, t2
, u_strlen(t2
), &status
);
130 if (U_FAILURE(status
)) {
131 log_err("Creation of iterator failed\n");
135 ucol_closeElements(iter
);
145 const static char shifted
[][20] = {
157 const static UCollationResult shiftedTert
[] = {
169 const static char nonignorable
[][20] = {
181 static void BlackBirdTest(void) {
182 UErrorCode status
= U_ZERO_ERROR
;
186 uint32_t i
= 0, j
= 0;
188 UCollator
*coll
= ucol_open("en_US", &status
);
190 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_OFF
, &status
);
191 ucol_setAttribute(coll
, UCOL_ALTERNATE_HANDLING
, UCOL_NON_IGNORABLE
, &status
);
193 if(U_SUCCESS(status
)) {
194 size
= UPRV_LENGTHOF(nonignorable
);
195 for(i
= 0; i
< size
-1; i
++) {
196 for(j
= i
+1; j
< size
; j
++) {
197 u_uastrcpy(t1
, nonignorable
[i
]);
198 u_uastrcpy(t2
, nonignorable
[j
]);
199 doTest(coll
, t1
, t2
, UCOL_LESS
);
204 ucol_setAttribute(coll
, UCOL_ALTERNATE_HANDLING
, UCOL_SHIFTED
, &status
);
205 ucol_setAttribute(coll
, UCOL_STRENGTH
, UCOL_QUATERNARY
, &status
);
207 if(U_SUCCESS(status
)) {
208 size
= UPRV_LENGTHOF(shifted
);
209 for(i
= 0; i
< size
-1; i
++) {
210 for(j
= i
+1; j
< size
; j
++) {
211 u_uastrcpy(t1
, shifted
[i
]);
212 u_uastrcpy(t2
, shifted
[j
]);
213 doTest(coll
, t1
, t2
, UCOL_LESS
);
218 ucol_setAttribute(coll
, UCOL_STRENGTH
, UCOL_TERTIARY
, &status
);
219 if(U_SUCCESS(status
)) {
220 size
= UPRV_LENGTHOF(shifted
);
221 for(i
= 1; i
< size
; i
++) {
222 u_uastrcpy(t1
, shifted
[i
-1]);
223 u_uastrcpy(t2
, shifted
[i
]);
224 doTest(coll
, t1
, t2
, shiftedTert
[i
]);
231 const static UChar testSourceCases
[][MAX_TOKEN_LEN
] = {
232 {0x0041/*'A'*/, 0x0300, 0x0301, 0x0000},
233 {0x0041/*'A'*/, 0x0300, 0x0316, 0x0000},
234 {0x0041/*'A'*/, 0x0300, 0x0000},
235 {0x00C0, 0x0301, 0x0000},
236 /* this would work with forced normalization */
237 {0x00C0, 0x0316, 0x0000}
240 const static UChar testTargetCases
[][MAX_TOKEN_LEN
] = {
241 {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
242 {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000},
244 {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
245 /* this would work with forced normalization */
246 {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000}
249 const static UCollationResult results
[] = {
257 static void FunkyATest(void)
261 UErrorCode status
= U_ZERO_ERROR
;
262 UCollator
*myCollation
;
263 myCollation
= ucol_open("en_US", &status
);
264 if(U_FAILURE(status
)){
265 log_err_status(status
, "ERROR: in creation of rule based collator: %s\n", myErrorName(status
));
268 log_verbose("Testing some A letters, for some reason\n");
269 ucol_setAttribute(myCollation
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
270 ucol_setStrength(myCollation
, UCOL_TERTIARY
);
271 for (i
= 0; i
< 4 ; i
++)
273 doTest(myCollation
, testSourceCases
[i
], testTargetCases
[i
], results
[i
]);
275 ucol_close(myCollation
);
278 UColAttributeValue caseFirst
[] = {
285 UColAttributeValue alternateHandling
[] = {
290 UColAttributeValue caseLevel
[] = {
295 UColAttributeValue strengths
[] = {
304 static const char * strengthsC
[] = {
312 static const char * caseFirstC
[] = {
319 static const char * alternateHandlingC
[] = {
320 "UCOL_NON_IGNORABLE",
324 static const char * caseLevelC
[] = {
329 /* not used currently - does not test only prints */
330 static void PrintMarkDavis(void)
332 UErrorCode status
= U_ZERO_ERROR
;
334 uint8_t sortkey
[256];
335 UCollator
*coll
= ucol_open("en_US", &status
);
336 uint32_t h
,i
,j
,k
, sortkeysize
;
341 log_verbose("PrintMarkDavis");
343 u_uastrcpy(m
, "Mark Davis");
349 for(i
= 0; i
<sizem
; i
++) {
350 fprintf(stderr
, "\\u%04X ", m
[i
]);
352 fprintf(stderr
, "\n");
354 for(h
= 0; h
<UPRV_LENGTHOF(caseFirst
); h
++) {
355 ucol_setAttribute(coll
, UCOL_CASE_FIRST
, caseFirst
[i
], &status
);
356 fprintf(stderr
, "caseFirst: %s\n", caseFirstC
[h
]);
358 for(i
= 0; i
<UPRV_LENGTHOF(alternateHandling
); i
++) {
359 ucol_setAttribute(coll
, UCOL_ALTERNATE_HANDLING
, alternateHandling
[i
], &status
);
360 fprintf(stderr
, " AltHandling: %s\n", alternateHandlingC
[i
]);
362 for(j
= 0; j
<UPRV_LENGTHOF(caseLevel
); j
++) {
363 ucol_setAttribute(coll
, UCOL_CASE_LEVEL
, caseLevel
[j
], &status
);
364 fprintf(stderr
, " caseLevel: %s\n", caseLevelC
[j
]);
366 for(k
= 0; k
<UPRV_LENGTHOF(strengths
); k
++) {
367 ucol_setAttribute(coll
, UCOL_STRENGTH
, strengths
[k
], &status
);
368 sortkeysize
= ucol_getSortKey(coll
, m
, sizem
, sortkey
, 256);
369 fprintf(stderr
, " strength: %s\n Sortkey: ", strengthsC
[k
]);
370 fprintf(stderr
, "%s\n", ucol_sortKeyToString(coll
, sortkey
, buffer
, &len
));
381 static void BillFairmanTest(void) {
383 ** check for actual locale via ICU resource bundles
385 ** lp points to the original locale ("fr_FR_....")
388 UResourceBundle
*lr
,*cr
;
389 UErrorCode lec
= U_ZERO_ERROR
;
390 const char *lp
= "fr_FR_you_ll_never_find_this_locale";
392 log_verbose("BillFairmanTest\n");
394 lr
= ures_open(NULL
,lp
,&lec
);
396 cr
= ures_getByKey(lr
,"collations",0,&lec
);
398 lp
= ures_getLocaleByType(cr
, ULOC_ACTUAL_LOCALE
, &lec
);
400 if (U_SUCCESS(lec
)) {
401 if(strcmp(lp
, "fr") != 0) {
402 log_err("Wrong locale for French Collation Data, expected \"fr\" got %s", lp
);
412 const static char chTest
[][20] = {
415 "ca", "cb", "cx", "cy", "CZ",
416 "c\\u030C", "C\\u030C",
419 "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY",
420 "ch", "cH", "Ch", "CH",
421 "cha", "charly", "che", "chh", "chch", "chr",
424 "r\\u030C", "R\\u030C",
427 "s\\u030C", "S\\u030C",
429 "z\\u030C", "Z\\u030C"
432 static void TestChMove(void) {
436 uint32_t i
= 0, j
= 0;
438 UErrorCode status
= U_ZERO_ERROR
;
440 UCollator
*coll
= ucol_open("cs", &status
);
442 if(U_SUCCESS(status
)) {
443 size
= UPRV_LENGTHOF(chTest
);
444 for(i
= 0; i
< size
-1; i
++) {
445 for(j
= i
+1; j
< size
; j
++) {
446 u_unescape(chTest
[i
], t1
, 256);
447 u_unescape(chTest
[j
], t2
, 256);
448 doTest(coll
, t1
, t2
, UCOL_LESS
);
453 log_data_err("Can't open collator");
462 const static char impTest[][20] = {
473 static void TestImplicitTailoring(void) {
474 static const struct {
476 const char *data
[10];
480 /* Tailor b and c before U+4E00. */
481 "&[before 1]\\u4e00 < b < c "
482 /* Now, before U+4E00 is c; put d and e after that. */
483 "&[before 1]\\u4e00 < d < e",
484 { "b", "c", "d", "e", "\\u4e00"}, 5 },
485 { "&\\u4e00 < a <<< A < b <<< B", { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 },
486 { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3},
487 { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3}
492 for(i
= 0; i
< UPRV_LENGTHOF(tests
); i
++) {
493 genericRulesStarter(tests
[i
].rules
, tests
[i
].data
, tests
[i
].len
);
500 const char *rule = "&\\u4e00 < a <<< A < b <<< B";
502 uint32_t i = 0, j = 0;
504 uint32_t ruleLen = 0;
505 UErrorCode status = U_ZERO_ERROR;
506 UCollator *coll = NULL;
507 ruleLen = u_unescape(rule, t1, 256);
509 coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
511 if(U_SUCCESS(status)) {
512 size = UPRV_LENGTHOF(impTest);
513 for(i = 0; i < size-1; i++) {
514 for(j = i+1; j < size; j++) {
515 u_unescape(impTest[i], t1, 256);
516 u_unescape(impTest[j], t2, 256);
517 doTest(coll, t1, t2, UCOL_LESS);
522 log_err("Can't open collator");
528 static void TestFCDProblem(void) {
532 const char *s1
= "\\u0430\\u0306\\u0325";
533 const char *s2
= "\\u04D1\\u0325";
535 UErrorCode status
= U_ZERO_ERROR
;
536 UCollator
*coll
= ucol_open("", &status
);
537 u_unescape(s1
, t1
, 256);
538 u_unescape(s2
, t2
, 256);
540 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_OFF
, &status
);
541 doTest(coll
, t1
, t2
, UCOL_EQUAL
);
543 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
544 doTest(coll
, t1
, t2
, UCOL_EQUAL
);
550 The largest normalization form is 18 for NFKC/NFKD, 4 for NFD and 3 for NFC
551 We're only using NFC/NFD in this test.
553 #define NORM_BUFFER_TEST_LEN 18
556 UChar NFC
[NORM_BUFFER_TEST_LEN
];
557 UChar NFD
[NORM_BUFFER_TEST_LEN
];
560 static void TestComposeDecompose(void) {
561 /* [[:NFD_Inert=false:][:NFC_Inert=false:]] */
562 static const UChar UNICODESET_STR
[] = {
563 0x5B,0x5B,0x3A,0x4E,0x46,0x44,0x5F,0x49,0x6E,0x65,0x72,0x74,0x3D,0x66,0x61,
564 0x6C,0x73,0x65,0x3A,0x5D,0x5B,0x3A,0x4E,0x46,0x43,0x5F,0x49,0x6E,0x65,0x72,
565 0x74,0x3D,0x66,0x61,0x6C,0x73,0x65,0x3A,0x5D,0x5D,0
568 int32_t i
= 0, j
= 0;
570 UErrorCode status
= U_ZERO_ERROR
;
571 const char *locName
= NULL
;
575 uint32_t noCases
= 0;
576 UCollator
*coll
= NULL
;
578 UChar comp
[NORM_BUFFER_TEST_LEN
];
580 UCollationElements
*iter
;
581 USet
*charsToTest
= uset_openPattern(UNICODESET_STR
, -1, &status
);
582 int32_t charsToTestSize
;
584 noOfLoc
= uloc_countAvailable();
586 coll
= ucol_open("", &status
);
587 if (U_FAILURE(status
)) {
588 log_data_err("Error opening collator -> %s (Are you missing data?)\n", u_errorName(status
));
591 charsToTestSize
= uset_size(charsToTest
);
592 if (charsToTestSize
<= 0) {
593 log_err("Set was zero. Missing data?\n");
596 t
= (tester
**)malloc(charsToTestSize
* sizeof(tester
*));
597 t
[0] = (tester
*)malloc(sizeof(tester
));
598 log_verbose("Testing UCA extensively for %d characters\n", charsToTestSize
);
600 for(u
= 0; u
< charsToTestSize
; u
++) {
601 UChar32 ch
= uset_charAt(charsToTest
, u
);
603 U16_APPEND_UNSAFE(comp
, len
, ch
);
604 nfcSize
= unorm_normalize(comp
, len
, UNORM_NFC
, 0, t
[noCases
]->NFC
, NORM_BUFFER_TEST_LEN
, &status
);
605 nfdSize
= unorm_normalize(comp
, len
, UNORM_NFD
, 0, t
[noCases
]->NFD
, NORM_BUFFER_TEST_LEN
, &status
);
607 if(nfcSize
!= nfdSize
|| (uprv_memcmp(t
[noCases
]->NFC
, t
[noCases
]->NFD
, nfcSize
* sizeof(UChar
)) != 0)
608 || (len
!= nfdSize
|| (uprv_memcmp(comp
, t
[noCases
]->NFD
, nfdSize
* sizeof(UChar
)) != 0))) {
610 if(len
!= nfdSize
|| (uprv_memcmp(comp
, t
[noCases
]->NFD
, nfdSize
* sizeof(UChar
)) != 0)) {
611 u_strncpy(t
[noCases
]->NFC
, comp
, len
);
612 t
[noCases
]->NFC
[len
] = 0;
615 t
[noCases
] = (tester
*)malloc(sizeof(tester
));
616 uprv_memset(t
[noCases
], 0, sizeof(tester
));
619 log_verbose("Testing %d/%d of possible test cases\n", noCases
, charsToTestSize
);
620 uset_close(charsToTest
);
623 for(u
=0; u
<(UChar32
)noCases
; u
++) {
624 if(!ucol_equal(coll
, t
[u
]->NFC
, -1, t
[u
]->NFD
, -1)) {
625 log_err("Failure: codePoint %05X fails TestComposeDecompose in the UCA\n", t
[u
]->u
);
626 doTest(coll
, t
[u
]->NFC
, t
[u
]->NFD
, UCOL_EQUAL
);
630 for(u = 0; u < charsToTestSize; u++) {
632 log_verbose("%08X ", u);
634 uprv_memset(t[noCases], 0, sizeof(tester));
637 U16_APPEND_UNSAFE(comp, len, u);
639 nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
640 nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
641 doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL);
642 doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL);
648 log_verbose("Testing locales, number of cases = %i\n", noCases
);
649 for(i
= 0; i
<noOfLoc
; i
++) {
650 status
= U_ZERO_ERROR
;
651 locName
= uloc_getAvailable(i
);
652 if(hasCollationElements(locName
)) {
655 int32_t nameSize
= uloc_getDisplayName(locName
, NULL
, name
, sizeof(cName
), &status
);
657 for(j
= 0; j
<nameSize
; j
++) {
658 cName
[j
] = (char)name
[j
];
661 log_verbose("\nTesting locale %s (%s)\n", locName
, cName
);
663 coll
= ucol_open(locName
, &status
);
664 ucol_setStrength(coll
, UCOL_IDENTICAL
);
665 iter
= ucol_openElements(coll
, t
[u
]->NFD
, u_strlen(t
[u
]->NFD
), &status
);
667 for(u
=0; u
<(UChar32
)noCases
; u
++) {
668 if(!ucol_equal(coll
, t
[u
]->NFC
, -1, t
[u
]->NFD
, -1)) {
669 log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t
[u
]->u
, cName
);
670 doTest(coll
, t
[u
]->NFC
, t
[u
]->NFD
, UCOL_EQUAL
);
671 log_verbose("Testing NFC\n");
672 ucol_setText(iter
, t
[u
]->NFC
, u_strlen(t
[u
]->NFC
), &status
);
674 log_verbose("Testing NFD\n");
675 ucol_setText(iter
, t
[u
]->NFD
, u_strlen(t
[u
]->NFD
), &status
);
679 ucol_closeElements(iter
);
683 for(u
= 0; u
<= (UChar32
)noCases
; u
++) {
689 static void TestEmptyRule(void) {
690 UErrorCode status
= U_ZERO_ERROR
;
691 UChar rulez
[] = { 0 };
692 UCollator
*coll
= ucol_openRules(rulez
, 0, UCOL_OFF
, UCOL_TERTIARY
,NULL
, &status
);
697 static void TestUCARules(void) {
698 UErrorCode status
= U_ZERO_ERROR
;
701 uint32_t ruleLen
= 0;
702 UCollator
*UCAfromRules
= NULL
;
703 UCollator
*coll
= ucol_open("", &status
);
704 if(status
== U_FILE_ACCESS_ERROR
) {
705 log_data_err("Is your data around?\n");
707 } else if(U_FAILURE(status
)) {
708 log_err("Error opening collator\n");
711 ruleLen
= ucol_getRulesEx(coll
, UCOL_FULL_RULES
, rules
, 256);
713 log_verbose("TestUCARules\n");
715 rules
= (UChar
*)malloc((ruleLen
+1)*sizeof(UChar
));
716 ruleLen
= ucol_getRulesEx(coll
, UCOL_FULL_RULES
, rules
, ruleLen
);
718 log_verbose("Rules length is %d\n", ruleLen
);
719 UCAfromRules
= ucol_openRules(rules
, ruleLen
, UCOL_OFF
, UCOL_TERTIARY
, NULL
,&status
);
720 if(U_SUCCESS(status
)) {
721 ucol_close(UCAfromRules
);
723 log_verbose("Unable to create a collator from UCARules!\n");
726 u_unescape(blah, b, 256);
727 ucol_getSortKey(coll, b, 1, res, 256);
736 /* Pinyin tonal order */
738 A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0)
739 (w/macron)< (w/acute)< (w/caron)< (w/grave)
740 E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8)
741 I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec)
742 O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2)
743 U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9)
744 < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) <
747 However, in testing we got the following order:
748 A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101)
749 (w/acute)< (w/grave)< (w/caron)< (w/macron)
750 E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) <
752 I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b)
753 O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d)
754 U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) <
756 < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b)
759 static void TestBefore(void) {
760 const static char *data
[] = {
761 "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A",
762 "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E",
763 "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I",
764 "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O",
765 "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U",
766 "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc"
769 "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0"
770 "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8"
771 "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec"
772 "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2"
773 "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9"
774 "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc",
775 data
, UPRV_LENGTHOF(data
));
779 /* superceded by TestBeforePinyin */
780 static void TestJ784(void) {
781 const static char *data
[] = {
782 "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0",
783 "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8",
784 "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec",
785 "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2",
786 "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9",
788 "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc"
790 genericLocaleStarter("zh", data
, UPRV_LENGTHOF(data
));
795 /* superceded by the changes to the lv locale */
796 static void TestJ831(void) {
797 const static char *data
[] = {
803 genericLocaleStarter("lv", data
, UPRV_LENGTHOF(data
));
807 static void TestJ815(void) {
808 const static char *data
[] = {
824 genericLocaleStarter("fr", data
, UPRV_LENGTHOF(data
));
825 genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data
, UPRV_LENGTHOF(data
));
829 static void TestCase(void)
831 const static UChar gRules
[MAX_TOKEN_LEN
] =
832 /*" & 0 < 1,\u2461<a,A"*/
833 { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x0041, 0x0000 };
835 const static UChar testCase
[][MAX_TOKEN_LEN
] =
837 /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000},
838 /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000},
839 /*2*/ {0x2460 /*circ'1'*/, 0x0061/*'a'*/, 0x0000},
840 /*3*/ {0x2460 /*circ'1'*/, 0x0041/*'A'*/, 0x0000}
843 const static UCollationResult caseTestResults
[][9] =
845 { UCOL_LESS
, UCOL_LESS
, UCOL_LESS
, UCOL_EQUAL
, UCOL_LESS
, UCOL_LESS
, UCOL_EQUAL
, UCOL_EQUAL
, UCOL_LESS
},
846 { UCOL_GREATER
, UCOL_LESS
, UCOL_LESS
, UCOL_EQUAL
, UCOL_LESS
, UCOL_LESS
, UCOL_EQUAL
, UCOL_EQUAL
, UCOL_GREATER
},
847 { UCOL_LESS
, UCOL_LESS
, UCOL_LESS
, UCOL_EQUAL
, UCOL_GREATER
, UCOL_LESS
, UCOL_EQUAL
, UCOL_EQUAL
, UCOL_LESS
},
848 { UCOL_GREATER
, UCOL_LESS
, UCOL_GREATER
, UCOL_EQUAL
, UCOL_LESS
, UCOL_LESS
, UCOL_EQUAL
, UCOL_EQUAL
, UCOL_GREATER
}
851 const static UColAttributeValue caseTestAttributes
[][2] =
853 { UCOL_LOWER_FIRST
, UCOL_OFF
},
854 { UCOL_UPPER_FIRST
, UCOL_OFF
},
855 { UCOL_LOWER_FIRST
, UCOL_ON
},
856 { UCOL_UPPER_FIRST
, UCOL_ON
}
859 UErrorCode status
= U_ZERO_ERROR
;
860 UCollationElements
*iter
;
861 UCollator
*myCollation
;
862 myCollation
= ucol_open("en_US", &status
);
864 if(U_FAILURE(status
)){
865 log_err_status(status
, "ERROR: in creation of rule based collator: %s\n", myErrorName(status
));
868 log_verbose("Testing different case settings\n");
869 ucol_setStrength(myCollation
, UCOL_TERTIARY
);
871 for(k
= 0; k
<4; k
++) {
872 ucol_setAttribute(myCollation
, UCOL_CASE_FIRST
, caseTestAttributes
[k
][0], &status
);
873 ucol_setAttribute(myCollation
, UCOL_CASE_LEVEL
, caseTestAttributes
[k
][1], &status
);
874 log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes
[k
][0], caseTestAttributes
[k
][1]);
875 for (i
= 0; i
< 3 ; i
++) {
876 for(j
= i
+1; j
<4; j
++) {
877 doTest(myCollation
, testCase
[i
], testCase
[j
], caseTestResults
[k
][3*i
+j
-1]);
881 ucol_close(myCollation
);
883 myCollation
= ucol_openRules(gRules
, u_strlen(gRules
), UCOL_OFF
, UCOL_TERTIARY
,NULL
, &status
);
884 if(U_FAILURE(status
)){
885 log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status
));
888 log_verbose("Testing different case settings with custom rules\n");
889 ucol_setStrength(myCollation
, UCOL_TERTIARY
);
891 for(k
= 0; k
<4; k
++) {
892 ucol_setAttribute(myCollation
, UCOL_CASE_FIRST
, caseTestAttributes
[k
][0], &status
);
893 ucol_setAttribute(myCollation
, UCOL_CASE_LEVEL
, caseTestAttributes
[k
][1], &status
);
894 for (i
= 0; i
< 3 ; i
++) {
895 for(j
= i
+1; j
<4; j
++) {
896 log_verbose("k:%d, i:%d, j:%d\n", k
, i
, j
);
897 doTest(myCollation
, testCase
[i
], testCase
[j
], caseTestResults
[k
][3*i
+j
-1]);
898 iter
=ucol_openElements(myCollation
, testCase
[i
], u_strlen(testCase
[i
]), &status
);
900 ucol_closeElements(iter
);
901 iter
=ucol_openElements(myCollation
, testCase
[j
], u_strlen(testCase
[j
]), &status
);
903 ucol_closeElements(iter
);
907 ucol_close(myCollation
);
909 const static char *lowerFirst
[] = {
925 const static char *upperFirst
[] = {
940 log_verbose("mixed case test\n");
941 log_verbose("lower first, case level off\n");
942 genericRulesStarter("[caseFirst lower]&H<ch<<<Ch<<<CH", lowerFirst
, UPRV_LENGTHOF(lowerFirst
));
943 log_verbose("upper first, case level off\n");
944 genericRulesStarter("[caseFirst upper]&H<ch<<<Ch<<<CH", upperFirst
, UPRV_LENGTHOF(upperFirst
));
945 log_verbose("lower first, case level on\n");
946 genericRulesStarter("[caseFirst lower][caseLevel on]&H<ch<<<Ch<<<CH", lowerFirst
, UPRV_LENGTHOF(lowerFirst
));
947 log_verbose("upper first, case level on\n");
948 genericRulesStarter("[caseFirst upper][caseLevel on]&H<ch<<<Ch<<<CH", upperFirst
, UPRV_LENGTHOF(upperFirst
));
953 static void TestIncrementalNormalize(void) {
955 /*UChar baseA =0x61;*/
957 /* UChar baseB = 0x42;*/
958 static const UChar ccMix
[] = {0x316, 0x321, 0x300};
959 /*UChar ccMix[] = {0x61, 0x61, 0x61};*/
961 0x316 is combining grave accent below, cc=220
962 0x321 is combining palatalized hook below, cc=202
963 0x300 is combining grave accent, cc=230
967 /*int maxSLen = 64000;*/
972 UErrorCode status
= U_ZERO_ERROR
;
973 UCollationResult result
;
975 int32_t myQ
= getTestOption(QUICK_OPTION
);
977 if(getTestOption(QUICK_OPTION
) < 0) {
978 setTestOption(QUICK_OPTION
, 1);
982 /* Test 1. Run very long unnormalized strings, to force overflow of*/
983 /* most buffers along the way.*/
984 UChar strA
[MAXSLEN
+1];
985 UChar strB
[MAXSLEN
+1];
987 coll
= ucol_open("en_US", &status
);
988 if(status
== U_FILE_ACCESS_ERROR
) {
989 log_data_err("Is your data around?\n");
991 } else if(U_FAILURE(status
)) {
992 log_err("Error opening collator\n");
995 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
997 /*for (sLen = 257; sLen<MAXSLEN; sLen++) {*/
998 /*for (sLen = 4; sLen<MAXSLEN; sLen++) {*/
999 /*for (sLen = 1000; sLen<1001; sLen++) {*/
1000 for (sLen
= 500; sLen
<501; sLen
++) {
1001 /*for (sLen = 40000; sLen<65000; sLen+=1000) {*/
1004 for (i
=1; i
<=sLen
-1; i
++) {
1005 strA
[i
] = ccMix
[i
% 3];
1006 strB
[sLen
-i
] = ccMix
[i
% 3];
1011 ucol_setStrength(coll
, UCOL_TERTIARY
); /* Do test with default strength, which runs*/
1012 doTest(coll
, strA
, strB
, UCOL_EQUAL
); /* optimized functions in the impl*/
1013 ucol_setStrength(coll
, UCOL_IDENTICAL
); /* Do again with the slow, general impl.*/
1014 doTest(coll
, strA
, strB
, UCOL_EQUAL
);
1018 setTestOption(QUICK_OPTION
, myQ
);
1021 /* Test 2: Non-normal sequence in a string that extends to the last character*/
1022 /* of the string. Checks a couple of edge cases.*/
1025 static const UChar strA
[] = {0x41, 0x41, 0x300, 0x316, 0};
1026 static const UChar strB
[] = {0x41, 0xc0, 0x316, 0};
1027 ucol_setStrength(coll
, UCOL_TERTIARY
);
1028 doTest(coll
, strA
, strB
, UCOL_EQUAL
);
1031 /* Test 3: Non-normal sequence is terminated by a surrogate pair.*/
1035 * test below used a code point from Desseret, which sorts differently
1038 /*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/
1039 static const UChar strA
[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0};
1040 static const UChar strB
[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0};
1041 ucol_setStrength(coll
, UCOL_TERTIARY
);
1042 doTest(coll
, strA
, strB
, UCOL_GREATER
);
1045 /* Test 4: Imbedded nulls do not terminate a string when length is specified.*/
1048 static const UChar strA
[] = {0x41, 0x00, 0x42, 0x00};
1049 static const UChar strB
[] = {0x41, 0x00, 0x00, 0x00};
1056 /* there used to be -3 here. Hmmmm.... */
1057 /*result = ucol_strcoll(coll, strA, -3, strB, -3);*/
1058 result
= ucol_strcoll(coll
, strA
, 3, strB
, 3);
1059 if (result
!= UCOL_GREATER
) {
1060 log_err("ERROR 1 in test 4\n");
1062 result
= ucol_strcoll(coll
, strA
, -1, strB
, -1);
1063 if (result
!= UCOL_EQUAL
) {
1064 log_err("ERROR 2 in test 4\n");
1067 ucol_getSortKey(coll
, strA
, 3, (uint8_t *)sortKeyA
, sizeof(sortKeyA
));
1068 ucol_getSortKey(coll
, strA
, -1, (uint8_t *)sortKeyAz
, sizeof(sortKeyAz
));
1069 ucol_getSortKey(coll
, strB
, 3, (uint8_t *)sortKeyB
, sizeof(sortKeyB
));
1070 ucol_getSortKey(coll
, strB
, -1, (uint8_t *)sortKeyBz
, sizeof(sortKeyBz
));
1072 r
= strcmp(sortKeyA
, sortKeyAz
);
1074 log_err("Error 3 in test 4\n");
1076 r
= strcmp(sortKeyA
, sortKeyB
);
1078 log_err("Error 4 in test 4\n");
1080 r
= strcmp(sortKeyAz
, sortKeyBz
);
1082 log_err("Error 5 in test 4\n");
1085 ucol_setStrength(coll
, UCOL_IDENTICAL
);
1086 ucol_getSortKey(coll
, strA
, 3, (uint8_t *)sortKeyA
, sizeof(sortKeyA
));
1087 ucol_getSortKey(coll
, strA
, -1, (uint8_t *)sortKeyAz
, sizeof(sortKeyAz
));
1088 ucol_getSortKey(coll
, strB
, 3, (uint8_t *)sortKeyB
, sizeof(sortKeyB
));
1089 ucol_getSortKey(coll
, strB
, -1, (uint8_t *)sortKeyBz
, sizeof(sortKeyBz
));
1091 r
= strcmp(sortKeyA
, sortKeyAz
);
1093 log_err("Error 6 in test 4\n");
1095 r
= strcmp(sortKeyA
, sortKeyB
);
1097 log_err("Error 7 in test 4\n");
1099 r
= strcmp(sortKeyAz
, sortKeyBz
);
1101 log_err("Error 8 in test 4\n");
1103 ucol_setStrength(coll
, UCOL_TERTIARY
);
1107 /* Test 5: Null characters in non-normal source strings.*/
1110 static const UChar strA
[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00};
1111 static const UChar strB
[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00};
1118 result
= ucol_strcoll(coll
, strA
, 6, strB
, 6);
1119 if (result
!= UCOL_GREATER
) {
1120 log_err("ERROR 1 in test 5\n");
1122 result
= ucol_strcoll(coll
, strA
, -1, strB
, -1);
1123 if (result
!= UCOL_EQUAL
) {
1124 log_err("ERROR 2 in test 5\n");
1127 ucol_getSortKey(coll
, strA
, 6, (uint8_t *)sortKeyA
, sizeof(sortKeyA
));
1128 ucol_getSortKey(coll
, strA
, -1, (uint8_t *)sortKeyAz
, sizeof(sortKeyAz
));
1129 ucol_getSortKey(coll
, strB
, 6, (uint8_t *)sortKeyB
, sizeof(sortKeyB
));
1130 ucol_getSortKey(coll
, strB
, -1, (uint8_t *)sortKeyBz
, sizeof(sortKeyBz
));
1132 r
= strcmp(sortKeyA
, sortKeyAz
);
1134 log_err("Error 3 in test 5\n");
1136 r
= strcmp(sortKeyA
, sortKeyB
);
1138 log_err("Error 4 in test 5\n");
1140 r
= strcmp(sortKeyAz
, sortKeyBz
);
1142 log_err("Error 5 in test 5\n");
1145 ucol_setStrength(coll
, UCOL_IDENTICAL
);
1146 ucol_getSortKey(coll
, strA
, 6, (uint8_t *)sortKeyA
, sizeof(sortKeyA
));
1147 ucol_getSortKey(coll
, strA
, -1, (uint8_t *)sortKeyAz
, sizeof(sortKeyAz
));
1148 ucol_getSortKey(coll
, strB
, 6, (uint8_t *)sortKeyB
, sizeof(sortKeyB
));
1149 ucol_getSortKey(coll
, strB
, -1, (uint8_t *)sortKeyBz
, sizeof(sortKeyBz
));
1151 r
= strcmp(sortKeyA
, sortKeyAz
);
1153 log_err("Error 6 in test 5\n");
1155 r
= strcmp(sortKeyA
, sortKeyB
);
1157 log_err("Error 7 in test 5\n");
1159 r
= strcmp(sortKeyAz
, sortKeyBz
);
1161 log_err("Error 8 in test 5\n");
1163 ucol_setStrength(coll
, UCOL_TERTIARY
);
1167 /* Test 6: Null character as base of a non-normal combining sequence.*/
1170 static const UChar strA
[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00};
1171 static const UChar strB
[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00};
1173 result
= ucol_strcoll(coll
, strA
, 5, strB
, 5);
1174 if (result
!= UCOL_LESS
) {
1175 log_err("Error 1 in test 6\n");
1177 result
= ucol_strcoll(coll
, strA
, -1, strB
, -1);
1178 if (result
!= UCOL_EQUAL
) {
1179 log_err("Error 2 in test 6\n");
1189 static void TestGetCaseBit(void) {
1190 static const char *caseBitData
[] = {
1191 "a", "A", "ch", "Ch", "CH",
1192 "\\uFF9E", "\\u0009"
1195 static const uint8_t results
[] = {
1196 UCOL_LOWER_CASE
, UCOL_UPPER_CASE
, UCOL_LOWER_CASE
, UCOL_MIXED_CASE
, UCOL_UPPER_CASE
,
1197 UCOL_UPPER_CASE
, UCOL_LOWER_CASE
1200 uint32_t i
, blen
= 0;
1202 UErrorCode status
= U_ZERO_ERROR
;
1203 UCollator
*UCA
= ucol_open("", &status
);
1206 for(i
= 0; i
<UPRV_LENGTHOF(results
); i
++) {
1207 blen
= u_unescape(caseBitData
[i
], b
, 256);
1208 res
= ucol_uprv_getCaseBits(UCA
, b
, blen
, &status
);
1209 if(results
[i
] != res
) {
1210 log_err("Expected case = %02X, got %02X for %04X\n", results
[i
], res
, b
[0]);
1216 static void TestHangulTailoring(void) {
1217 static const char *koreanData
[] = {
1218 "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53ef", "\\u5475",
1219 "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\\u67b7", "\\u67ef",
1220 "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\\u8857", "\\u8888",
1221 "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5",
1222 "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\\u659D", "\\u698E",
1223 "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\\u8B0C"
1227 "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 <<< \\u53ef <<< \\u5475 "
1228 "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef "
1229 "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 "
1230 "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 "
1231 "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E "
1232 "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C";
1235 UErrorCode status
= U_ZERO_ERROR
;
1236 UChar rlz
[2048] = { 0 };
1237 uint32_t rlen
= u_unescape(rules
, rlz
, 2048);
1239 UCollator
*coll
= ucol_openRules(rlz
, rlen
, UCOL_DEFAULT
, UCOL_DEFAULT
,NULL
, &status
);
1240 if(status
== U_FILE_ACCESS_ERROR
) {
1241 log_data_err("Is your data around?\n");
1243 } else if(U_FAILURE(status
)) {
1244 log_err("Error opening collator\n");
1248 log_verbose("Using start of korean rules\n");
1250 if(U_SUCCESS(status
)) {
1251 genericOrderingTest(coll
, koreanData
, UPRV_LENGTHOF(koreanData
));
1253 log_err("Unable to open collator with rules %s\n", rules
);
1258 log_verbose("Using ko__LOTUS locale\n");
1259 genericLocaleStarter("ko__LOTUS", koreanData
, UPRV_LENGTHOF(koreanData
));
1263 * The secondary/tertiary compression middle byte
1264 * as used by the current implementation.
1265 * Subject to change as the sort key compression changes.
1266 * See class CollationKeys.
1269 SEC_COMMON_MIDDLE
= 0x25, /* range 05..45 */
1270 TER_ONLY_COMMON_MIDDLE
= 0x65 /* range 05..C5 */
1273 static void TestCompressOverlap(void) {
1276 UErrorCode status
= U_ZERO_ERROR
;
1278 uint8_t result
[500];
1283 coll
= ucol_open("", &status
);
1285 if (U_FAILURE(status
)) {
1286 log_err_status(status
, "Collator can't be created -> %s\n", u_errorName(status
));
1289 while (count
< 149) {
1290 secstr
[count
] = 0x0020; /* [06, 05, 05] */
1291 tertstr
[count
] = 0x0020;
1295 /* top down compression ----------------------------------- */
1296 secstr
[count
] = 0x0332; /* [, 87, 05] */
1297 tertstr
[count
] = 0x3000; /* [06, 05, 07] */
1299 /* no compression secstr should have 150 secondary bytes, tertstr should
1300 have 150 tertiary bytes.
1301 with correct compression, secstr should have 6 secondary
1302 bytes (149/33 rounded up + accent), tertstr should have > 2 tertiary bytes */
1303 resultlen
= ucol_getSortKey(coll
, secstr
, 150, result
, UPRV_LENGTHOF(result
));
1304 (void)resultlen
; /* Suppress set but not used warning. */
1305 tempptr
= (uint8_t *)uprv_strchr((char *)result
, 1) + 1;
1306 while (*(tempptr
+ 1) != 1) {
1307 /* the last secondary collation element is not checked since it is not
1308 part of the compression */
1309 if (*tempptr
< SEC_COMMON_MIDDLE
) {
1310 log_err("Secondary top down compression overlapped\n");
1315 /* tertiary top/bottom/common for en_US is similar to the secondary
1316 top/bottom/common */
1317 resultlen
= ucol_getSortKey(coll
, tertstr
, 150, result
, UPRV_LENGTHOF(result
));
1318 tempptr
= (uint8_t *)uprv_strrchr((char *)result
, 1) + 1;
1319 while (*(tempptr
+ 1) != 0) {
1320 /* the last secondary collation element is not checked since it is not
1321 part of the compression */
1322 if (*tempptr
< TER_ONLY_COMMON_MIDDLE
) {
1323 log_err("Tertiary top down compression overlapped\n");
1328 /* bottom up compression ------------------------------------- */
1331 resultlen
= ucol_getSortKey(coll
, secstr
, 150, result
, UPRV_LENGTHOF(result
));
1332 tempptr
= (uint8_t *)uprv_strchr((char *)result
, 1) + 1;
1333 while (*(tempptr
+ 1) != 1) {
1334 /* the last secondary collation element is not checked since it is not
1335 part of the compression */
1336 if (*tempptr
> SEC_COMMON_MIDDLE
) {
1337 log_err("Secondary bottom up compression overlapped\n");
1342 /* tertiary top/bottom/common for en_US is similar to the secondary
1343 top/bottom/common */
1344 resultlen
= ucol_getSortKey(coll
, tertstr
, 150, result
, UPRV_LENGTHOF(result
));
1345 tempptr
= (uint8_t *)uprv_strrchr((char *)result
, 1) + 1;
1346 while (*(tempptr
+ 1) != 0) {
1347 /* the last secondary collation element is not checked since it is not
1348 part of the compression */
1349 if (*tempptr
> TER_ONLY_COMMON_MIDDLE
) {
1350 log_err("Tertiary bottom up compression overlapped\n");
1358 static void TestCyrillicTailoring(void) {
1359 static const char *test
[] = {
1365 /* Russian overrides contractions, so this test is not valid anymore */
1366 /*genericLocaleStarter("ru", test, 3);*/
1368 // Most of the following are commented out because UCA 8.0
1369 // drops most of the Cyrillic contractions from the default order.
1370 // See CLDR ticket #7246 "root collation: remove Cyrillic contractions".
1372 // genericLocaleStarter("root", test, 3);
1373 // genericRulesStarter("&\\u0410 = \\u0410", test, 3);
1374 // genericRulesStarter("&Z < \\u0410", test, 3);
1375 genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test
, 3);
1376 genericRulesStarter("&Z < \\u0410 < \\u04d0", test
, 3);
1377 // genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test, 3);
1378 // genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test, 3);
1381 static void TestSuppressContractions(void) {
1383 static const char *testNoCont2
[] = {
1388 static const char *testNoCont
[] = {
1391 "\\uFF21\\u0410\\u0302"
1394 genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont
, 3);
1395 genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2
, 3);
1398 static void TestContraction(void) {
1399 const static char *testrules
[] = {
1401 "&A = A\\u0306/\\u0306",
1404 const static UChar testdata
[][2] = {
1405 {0x0041 /* 'A' */, 0x0042 /* 'B' */},
1406 {0x0041 /* 'A' */, 0x0306 /* combining breve */},
1407 {0x0063 /* 'c' */, 0x0068 /* 'h' */}
1409 const static UChar testdata2
[][2] = {
1410 {0x0063 /* 'c' */, 0x0067 /* 'g' */},
1411 {0x0063 /* 'c' */, 0x0068 /* 'h' */},
1412 {0x0063 /* 'c' */, 0x006C /* 'l' */}
1416 * These pairs of rule strings are not guaranteed to yield the very same mappings.
1417 * In fact, LDML 24 recommends an improved way of creating mappings
1418 * which always yields different mappings for such pairs. See
1419 * http://www.unicode.org/reports/tr35/tr35-33/tr35-collation.html#Orderings
1421 const static char *testrules3
[] = {
1422 "&z < xyz &xyzw << B",
1423 "&z < xyz &xyz << B / w",
1424 "&z < ch &achm << B",
1425 "&z < ch &a << B / chm",
1426 "&\\ud800\\udc00w << B",
1427 "&\\ud800\\udc00 << B / w",
1428 "&a\\ud800\\udc00m << B",
1429 "&a << B / \\ud800\\udc00m",
1433 UErrorCode status
= U_ZERO_ERROR
;
1435 UChar rule
[256] = {0};
1439 for (i
= 0; i
< UPRV_LENGTHOF(testrules
); i
++) {
1440 UCollationElements
*iter1
;
1442 log_verbose("Rule %s for testing\n", testrules
[i
]);
1443 rlen
= u_unescape(testrules
[i
], rule
, 32);
1444 coll
= ucol_openRules(rule
, rlen
, UCOL_ON
, UCOL_TERTIARY
,NULL
, &status
);
1445 if (U_FAILURE(status
)) {
1446 log_err_status(status
, "Collator creation failed %s -> %s\n", testrules
[i
], u_errorName(status
));
1449 iter1
= ucol_openElements(coll
, testdata
[i
], 2, &status
);
1450 if (U_FAILURE(status
)) {
1451 log_err("Collation iterator creation failed\n");
1455 UCollationElements
*iter2
= ucol_openElements(coll
,
1459 if (U_FAILURE(status
)) {
1460 log_err("Collation iterator creation failed\n");
1463 ce
= ucol_next(iter2
, &status
);
1464 while (ce
!= UCOL_NULLORDER
) {
1465 if ((uint32_t)ucol_next(iter1
, &status
) != ce
) {
1466 log_err("Collation elements in contraction split does not match\n");
1469 ce
= ucol_next(iter2
, &status
);
1472 ucol_closeElements(iter2
);
1474 if (ucol_next(iter1
, &status
) != UCOL_NULLORDER
) {
1475 log_err("Collation elements not exhausted\n");
1478 ucol_closeElements(iter1
);
1482 rlen
= u_unescape("& a < b < c < ch < d & c = ch / h", rule
, 256);
1483 coll
= ucol_openRules(rule
, rlen
, UCOL_ON
, UCOL_TERTIARY
,NULL
, &status
);
1484 if (ucol_strcoll(coll
, testdata2
[0], 2, testdata2
[1], 2) != UCOL_LESS
) {
1485 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
1486 testdata2
[0][0], testdata2
[0][1], testdata2
[1][0],
1490 if (ucol_strcoll(coll
, testdata2
[1], 2, testdata2
[2], 2) != UCOL_LESS
) {
1491 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
1492 testdata2
[1][0], testdata2
[1][1], testdata2
[2][0],
1497 #if 0 /* see above */
1498 for (i
= 0; i
< UPRV_LENGTHOF(testrules3
); i
+= 2) {
1499 log_verbose("testrules3 i==%d \"%s\" vs. \"%s\"\n", i
, testrules3
[i
], testrules3
[i
+ 1]);
1502 UCollationElements
*iter1
,
1504 UChar ch
= 0x0042 /* 'B' */;
1506 rlen
= u_unescape(testrules3
[i
], rule
, 32);
1507 coll1
= ucol_openRules(rule
, rlen
, UCOL_ON
, UCOL_TERTIARY
,NULL
, &status
);
1508 rlen
= u_unescape(testrules3
[i
+ 1], rule
, 32);
1509 coll2
= ucol_openRules(rule
, rlen
, UCOL_ON
, UCOL_TERTIARY
,NULL
, &status
);
1510 if (U_FAILURE(status
)) {
1511 log_err("Collator creation failed %s\n", testrules
[i
]);
1514 iter1
= ucol_openElements(coll1
, &ch
, 1, &status
);
1515 iter2
= ucol_openElements(coll2
, &ch
, 1, &status
);
1516 if (U_FAILURE(status
)) {
1517 log_err("Collation iterator creation failed\n");
1520 ce
= ucol_next(iter1
, &status
);
1521 if (U_FAILURE(status
)) {
1522 log_err("Retrieving ces failed\n");
1525 while (ce
!= UCOL_NULLORDER
) {
1526 uint32_t ce2
= (uint32_t)ucol_next(iter2
, &status
);
1528 log_verbose("CEs match: %08x\n", ce
);
1530 log_err("CEs do not match: %08x vs. %08x\n", ce
, ce2
);
1533 ce
= ucol_next(iter1
, &status
);
1534 if (U_FAILURE(status
)) {
1535 log_err("Retrieving ces failed\n");
1539 if (ucol_next(iter2
, &status
) != UCOL_NULLORDER
) {
1540 log_err("CEs not exhausted\n");
1543 ucol_closeElements(iter1
);
1544 ucol_closeElements(iter2
);
1551 static void TestExpansion(void) {
1552 const static char *testrules
[] = {
1555 * This seems to have tested that M was not mapped to an expansion.
1556 * I believe the old builder just did that because it computed the extension CEs
1557 * at the very end, which was a bug.
1558 * Among other problems, it violated the core tailoring principle
1559 * by making an earlier rule depend on a later one.
1560 * And, of course, if M did not get an expansion, then it was primary different from K,
1561 * unlike what the rule &K<<M says.
1563 "&J << K / B & K << M",
1567 const static UChar testdata
[][3] = {
1568 {0x004A /*'J'*/, 0x0041 /*'A'*/, 0},
1569 {0x004D /*'M'*/, 0x0041 /*'A'*/, 0},
1570 {0x004B /*'K'*/, 0x0041 /*'A'*/, 0},
1571 {0x004B /*'K'*/, 0x0043 /*'C'*/, 0},
1572 {0x004A /*'J'*/, 0x0043 /*'C'*/, 0},
1573 {0x004D /*'M'*/, 0x0043 /*'C'*/, 0}
1576 UErrorCode status
= U_ZERO_ERROR
;
1578 UChar rule
[256] = {0};
1582 for (i
= 0; i
< UPRV_LENGTHOF(testrules
); i
++) {
1584 log_verbose("Rule %s for testing\n", testrules
[i
]);
1585 rlen
= u_unescape(testrules
[i
], rule
, 32);
1586 coll
= ucol_openRules(rule
, rlen
, UCOL_ON
, UCOL_TERTIARY
,NULL
, &status
);
1587 if (U_FAILURE(status
)) {
1588 log_err_status(status
, "Collator creation failed %s -> %s\n", testrules
[i
], u_errorName(status
));
1592 for (j
= 0; j
< 5; j
++) {
1593 doTest(coll
, testdata
[j
], testdata
[j
+ 1], UCOL_LESS
);
1600 /* this test tests the current limitations of the engine */
1601 /* it always fail, so it is disabled by default */
1602 static void TestLimitations(void) {
1603 /* recursive expansions */
1605 static const char *rule
= "&a=b/c&d=c/e";
1606 static const char *tlimit01
[] = {"add","b","adf"};
1607 static const char *tlimit02
[] = {"aa","b","af"};
1608 log_verbose("recursive expansions\n");
1609 genericRulesStarter(rule
, tlimit01
, UPRV_LENGTHOF(tlimit01
));
1610 genericRulesStarter(rule
, tlimit02
, UPRV_LENGTHOF(tlimit02
));
1612 /* contractions spanning expansions */
1614 static const char *rule
= "&a<<<c/e&g<<<eh";
1615 static const char *tlimit01
[] = {"ad","c","af","f","ch","h"};
1616 static const char *tlimit02
[] = {"ad","c","ch","af","f","h"};
1617 log_verbose("contractions spanning expansions\n");
1618 genericRulesStarter(rule
, tlimit01
, UPRV_LENGTHOF(tlimit01
));
1619 genericRulesStarter(rule
, tlimit02
, UPRV_LENGTHOF(tlimit02
));
1621 /* normalization: nulls in contractions */
1623 static const char *rule
= "&a<<<\\u0000\\u0302";
1624 static const char *tlimit01
[] = {"a","\\u0000\\u0302\\u0327"};
1625 static const char *tlimit02
[] = {"\\u0000\\u0302\\u0327","a"};
1626 static const UColAttribute att
[] = { UCOL_DECOMPOSITION_MODE
};
1627 static const UColAttributeValue valOn
[] = { UCOL_ON
};
1628 static const UColAttributeValue valOff
[] = { UCOL_OFF
};
1630 log_verbose("NULL in contractions\n");
1631 genericRulesStarterWithOptions(rule
, tlimit01
, 2, att
, valOn
, 1);
1632 genericRulesStarterWithOptions(rule
, tlimit02
, 2, att
, valOn
, 1);
1633 genericRulesStarterWithOptions(rule
, tlimit01
, 2, att
, valOff
, 1);
1634 genericRulesStarterWithOptions(rule
, tlimit02
, 2, att
, valOff
, 1);
1637 /* normalization: contractions spanning normalization */
1639 static const char *rule
= "&a<<<\\u0000\\u0302";
1640 static const char *tlimit01
[] = {"a","\\u0000\\u0302\\u0327"};
1641 static const char *tlimit02
[] = {"\\u0000\\u0302\\u0327","a"};
1642 static const UColAttribute att
[] = { UCOL_DECOMPOSITION_MODE
};
1643 static const UColAttributeValue valOn
[] = { UCOL_ON
};
1644 static const UColAttributeValue valOff
[] = { UCOL_OFF
};
1646 log_verbose("contractions spanning normalization\n");
1647 genericRulesStarterWithOptions(rule
, tlimit01
, 2, att
, valOn
, 1);
1648 genericRulesStarterWithOptions(rule
, tlimit02
, 2, att
, valOn
, 1);
1649 genericRulesStarterWithOptions(rule
, tlimit01
, 2, att
, valOff
, 1);
1650 genericRulesStarterWithOptions(rule
, tlimit02
, 2, att
, valOff
, 1);
1655 /*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/
1656 static const char *rule
= "&\\u2010<x<[variable top]=z";
1657 /*static const char *rule3 = "&' '<x<[variable top]=z";*/
1658 static const char *tlimit01
[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" };
1659 static const char *tlimit02
[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"};
1660 static const char *tlimit03
[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" };
1661 static const UColAttribute att
[] = { UCOL_ALTERNATE_HANDLING
, UCOL_STRENGTH
};
1662 static const UColAttributeValue valOn
[] = { UCOL_SHIFTED
, UCOL_QUATERNARY
};
1663 static const UColAttributeValue valOff
[] = { UCOL_NON_IGNORABLE
, UCOL_TERTIARY
};
1665 log_verbose("variable top\n");
1666 genericRulesStarterWithOptions(rule
, tlimit03
, UPRV_LENGTHOF(tlimit03
), att
, valOn
, UPRV_LENGTHOF(att
));
1667 genericRulesStarterWithOptions(rule
, tlimit01
, UPRV_LENGTHOF(tlimit01
), att
, valOn
, UPRV_LENGTHOF(att
));
1668 genericRulesStarterWithOptions(rule
, tlimit02
, UPRV_LENGTHOF(tlimit02
), att
, valOn
, UPRV_LENGTHOF(att
));
1669 genericRulesStarterWithOptions(rule
, tlimit01
, UPRV_LENGTHOF(tlimit01
), att
, valOff
, UPRV_LENGTHOF(att
));
1670 genericRulesStarterWithOptions(rule
, tlimit02
, UPRV_LENGTHOF(tlimit02
), att
, valOff
, UPRV_LENGTHOF(att
));
1675 static const char *rule
= "&c<ch<<<cH<<<Ch<<<CH";
1676 static const char *tlimit01
[] = {"c","CH","Ch","cH","ch"};
1677 static const char *tlimit02
[] = {"c","CH","cH","Ch","ch"};
1678 static const UColAttribute att
[] = { UCOL_CASE_FIRST
};
1679 static const UColAttributeValue valOn
[] = { UCOL_UPPER_FIRST
};
1680 /*static const UColAttributeValue valOff[] = { UCOL_OFF};*/
1681 log_verbose("case level\n");
1682 genericRulesStarterWithOptions(rule
, tlimit01
, UPRV_LENGTHOF(tlimit01
), att
, valOn
, UPRV_LENGTHOF(att
));
1683 genericRulesStarterWithOptions(rule
, tlimit02
, UPRV_LENGTHOF(tlimit02
), att
, valOn
, UPRV_LENGTHOF(att
));
1684 /*genericRulesStarterWithOptions(rule, tlimit01, UPRV_LENGTHOF(tlimit01), att, valOff, UPRV_LENGTHOF(att));*/
1685 /*genericRulesStarterWithOptions(rule, tlimit02, UPRV_LENGTHOF(tlimit02), att, valOff, UPRV_LENGTHOF(att));*/
1691 static void TestBocsuCoverage(void) {
1692 UErrorCode status
= U_ZERO_ERROR
;
1693 const char *testString
= "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u0041";
1694 UChar test
[256] = {0};
1695 uint32_t tlen
= u_unescape(testString
, test
, 32);
1696 uint8_t key
[256] = {0};
1699 UCollator
*coll
= ucol_open("", &status
);
1700 if(U_SUCCESS(status
)) {
1701 ucol_setAttribute(coll
, UCOL_STRENGTH
, UCOL_IDENTICAL
, &status
);
1703 klen
= ucol_getSortKey(coll
, test
, tlen
, key
, 256);
1704 (void)klen
; /* Suppress set but not used warning. */
1708 log_data_err("Couldn't open UCA\n");
1712 static void TestVariableTopSetting(void) {
1713 UErrorCode status
= U_ZERO_ERROR
;
1714 uint32_t varTopOriginal
= 0, varTop1
, varTop2
;
1715 UCollator
*coll
= ucol_open("", &status
);
1716 if(U_SUCCESS(status
)) {
1718 static const UChar nul
= 0;
1719 static const UChar space
= 0x20;
1720 static const UChar dot
= 0x2e; /* punctuation */
1721 static const UChar degree
= 0xb0; /* symbol */
1722 static const UChar dollar
= 0x24; /* currency symbol */
1723 static const UChar zero
= 0x30; /* digit */
1725 varTopOriginal
= ucol_getVariableTop(coll
, &status
);
1726 log_verbose("ucol_getVariableTop(root) -> %08x\n", varTopOriginal
);
1727 ucol_setAttribute(coll
, UCOL_ALTERNATE_HANDLING
, UCOL_SHIFTED
, &status
);
1729 varTop1
= ucol_setVariableTop(coll
, &space
, 1, &status
);
1730 varTop2
= ucol_getVariableTop(coll
, &status
);
1731 log_verbose("ucol_setVariableTop(space) -> %08x\n", varTop1
);
1732 if(U_FAILURE(status
) || varTop1
!= varTop2
||
1733 !ucol_equal(coll
, &nul
, 0, &space
, 1) ||
1734 ucol_equal(coll
, &nul
, 0, &dot
, 1) ||
1735 ucol_equal(coll
, &nul
, 0, °ree
, 1) ||
1736 ucol_equal(coll
, &nul
, 0, &dollar
, 1) ||
1737 ucol_equal(coll
, &nul
, 0, &zero
, 1) ||
1738 ucol_greaterOrEqual(coll
, &space
, 1, &dot
, 1)) {
1739 log_err("ucol_setVariableTop(space) did not work - %s\n", u_errorName(status
));
1742 varTop1
= ucol_setVariableTop(coll
, &dot
, 1, &status
);
1743 varTop2
= ucol_getVariableTop(coll
, &status
);
1744 log_verbose("ucol_setVariableTop(dot) -> %08x\n", varTop1
);
1745 if(U_FAILURE(status
) || varTop1
!= varTop2
||
1746 !ucol_equal(coll
, &nul
, 0, &space
, 1) ||
1747 !ucol_equal(coll
, &nul
, 0, &dot
, 1) ||
1748 ucol_equal(coll
, &nul
, 0, °ree
, 1) ||
1749 ucol_equal(coll
, &nul
, 0, &dollar
, 1) ||
1750 ucol_equal(coll
, &nul
, 0, &zero
, 1) ||
1751 ucol_greaterOrEqual(coll
, &dot
, 1, °ree
, 1)) {
1752 log_err("ucol_setVariableTop(dot) did not work - %s\n", u_errorName(status
));
1755 varTop1
= ucol_setVariableTop(coll
, °ree
, 1, &status
);
1756 varTop2
= ucol_getVariableTop(coll
, &status
);
1757 log_verbose("ucol_setVariableTop(degree) -> %08x\n", varTop1
);
1758 if(U_FAILURE(status
) || varTop1
!= varTop2
||
1759 !ucol_equal(coll
, &nul
, 0, &space
, 1) ||
1760 !ucol_equal(coll
, &nul
, 0, &dot
, 1) ||
1761 !ucol_equal(coll
, &nul
, 0, °ree
, 1) ||
1762 ucol_equal(coll
, &nul
, 0, &dollar
, 1) ||
1763 ucol_equal(coll
, &nul
, 0, &zero
, 1) ||
1764 ucol_greaterOrEqual(coll
, °ree
, 1, &dollar
, 1)) {
1765 log_err("ucol_setVariableTop(degree) did not work - %s\n", u_errorName(status
));
1768 varTop1
= ucol_setVariableTop(coll
, &dollar
, 1, &status
);
1769 varTop2
= ucol_getVariableTop(coll
, &status
);
1770 log_verbose("ucol_setVariableTop(dollar) -> %08x\n", varTop1
);
1771 if(U_FAILURE(status
) || varTop1
!= varTop2
||
1772 !ucol_equal(coll
, &nul
, 0, &space
, 1) ||
1773 !ucol_equal(coll
, &nul
, 0, &dot
, 1) ||
1774 !ucol_equal(coll
, &nul
, 0, °ree
, 1) ||
1775 !ucol_equal(coll
, &nul
, 0, &dollar
, 1) ||
1776 ucol_equal(coll
, &nul
, 0, &zero
, 1) ||
1777 ucol_greaterOrEqual(coll
, &dollar
, 1, &zero
, 1)) {
1778 log_err("ucol_setVariableTop(dollar) did not work - %s\n", u_errorName(status
));
1781 log_verbose("Testing setting variable top to contractions\n");
1783 UChar first
[4] = { 0 };
1788 status
= U_ZERO_ERROR
;
1789 ucol_setVariableTop(coll
, first
, -1, &status
);
1791 if(U_SUCCESS(status
)) {
1792 log_err("Invalid contraction succeded in setting variable top!\n");
1797 log_verbose("Test restoring variable top\n");
1799 status
= U_ZERO_ERROR
;
1800 ucol_restoreVariableTop(coll
, varTopOriginal
, &status
);
1801 if(varTopOriginal
!= ucol_getVariableTop(coll
, &status
)) {
1802 log_err("Couldn't restore old variable top\n");
1805 log_verbose("Testing calling with error set\n");
1807 status
= U_INTERNAL_PROGRAM_ERROR
;
1808 varTop1
= ucol_setVariableTop(coll
, &space
, 1, &status
);
1809 varTop2
= ucol_getVariableTop(coll
, &status
);
1810 ucol_restoreVariableTop(coll
, varTop2
, &status
);
1811 varTop1
= ucol_setVariableTop(NULL
, &dot
, 1, &status
);
1812 varTop2
= ucol_getVariableTop(NULL
, &status
);
1813 ucol_restoreVariableTop(NULL
, varTop2
, &status
);
1814 if(status
!= U_INTERNAL_PROGRAM_ERROR
) {
1815 log_err("Bad reaction to passed error!\n");
1819 log_data_err("Couldn't open UCA collator\n");
1823 static void TestMaxVariable() {
1824 UErrorCode status
= U_ZERO_ERROR
;
1825 UColReorderCode oldMax
, max
;
1828 static const UChar nul
= 0;
1829 static const UChar space
= 0x20;
1830 static const UChar dot
= 0x2e; /* punctuation */
1831 static const UChar degree
= 0xb0; /* symbol */
1832 static const UChar dollar
= 0x24; /* currency symbol */
1833 static const UChar zero
= 0x30; /* digit */
1835 coll
= ucol_open("", &status
);
1836 if(U_FAILURE(status
)) {
1837 log_data_err("Couldn't open root collator\n");
1841 oldMax
= ucol_getMaxVariable(coll
);
1842 log_verbose("ucol_getMaxVariable(root) -> %04x\n", oldMax
);
1843 ucol_setAttribute(coll
, UCOL_ALTERNATE_HANDLING
, UCOL_SHIFTED
, &status
);
1845 ucol_setMaxVariable(coll
, UCOL_REORDER_CODE_SPACE
, &status
);
1846 max
= ucol_getMaxVariable(coll
);
1847 log_verbose("ucol_setMaxVariable(space) -> %04x\n", max
);
1848 if(U_FAILURE(status
) || max
!= UCOL_REORDER_CODE_SPACE
||
1849 !ucol_equal(coll
, &nul
, 0, &space
, 1) ||
1850 ucol_equal(coll
, &nul
, 0, &dot
, 1) ||
1851 ucol_equal(coll
, &nul
, 0, °ree
, 1) ||
1852 ucol_equal(coll
, &nul
, 0, &dollar
, 1) ||
1853 ucol_equal(coll
, &nul
, 0, &zero
, 1) ||
1854 ucol_greaterOrEqual(coll
, &space
, 1, &dot
, 1)) {
1855 log_err("ucol_setMaxVariable(space) did not work - %s\n", u_errorName(status
));
1858 ucol_setMaxVariable(coll
, UCOL_REORDER_CODE_PUNCTUATION
, &status
);
1859 max
= ucol_getMaxVariable(coll
);
1860 log_verbose("ucol_setMaxVariable(punctuation) -> %04x\n", max
);
1861 if(U_FAILURE(status
) || max
!= UCOL_REORDER_CODE_PUNCTUATION
||
1862 !ucol_equal(coll
, &nul
, 0, &space
, 1) ||
1863 !ucol_equal(coll
, &nul
, 0, &dot
, 1) ||
1864 ucol_equal(coll
, &nul
, 0, °ree
, 1) ||
1865 ucol_equal(coll
, &nul
, 0, &dollar
, 1) ||
1866 ucol_equal(coll
, &nul
, 0, &zero
, 1) ||
1867 ucol_greaterOrEqual(coll
, &dot
, 1, °ree
, 1)) {
1868 log_err("ucol_setMaxVariable(punctuation) did not work - %s\n", u_errorName(status
));
1871 ucol_setMaxVariable(coll
, UCOL_REORDER_CODE_SYMBOL
, &status
);
1872 max
= ucol_getMaxVariable(coll
);
1873 log_verbose("ucol_setMaxVariable(symbol) -> %04x\n", max
);
1874 if(U_FAILURE(status
) || max
!= UCOL_REORDER_CODE_SYMBOL
||
1875 !ucol_equal(coll
, &nul
, 0, &space
, 1) ||
1876 !ucol_equal(coll
, &nul
, 0, &dot
, 1) ||
1877 !ucol_equal(coll
, &nul
, 0, °ree
, 1) ||
1878 ucol_equal(coll
, &nul
, 0, &dollar
, 1) ||
1879 ucol_equal(coll
, &nul
, 0, &zero
, 1) ||
1880 ucol_greaterOrEqual(coll
, °ree
, 1, &dollar
, 1)) {
1881 log_err("ucol_setMaxVariable(symbol) did not work - %s\n", u_errorName(status
));
1884 ucol_setMaxVariable(coll
, UCOL_REORDER_CODE_CURRENCY
, &status
);
1885 max
= ucol_getMaxVariable(coll
);
1886 log_verbose("ucol_setMaxVariable(currency) -> %04x\n", max
);
1887 if(U_FAILURE(status
) || max
!= UCOL_REORDER_CODE_CURRENCY
||
1888 !ucol_equal(coll
, &nul
, 0, &space
, 1) ||
1889 !ucol_equal(coll
, &nul
, 0, &dot
, 1) ||
1890 !ucol_equal(coll
, &nul
, 0, °ree
, 1) ||
1891 !ucol_equal(coll
, &nul
, 0, &dollar
, 1) ||
1892 ucol_equal(coll
, &nul
, 0, &zero
, 1) ||
1893 ucol_greaterOrEqual(coll
, &dollar
, 1, &zero
, 1)) {
1894 log_err("ucol_setMaxVariable(currency) did not work - %s\n", u_errorName(status
));
1897 log_verbose("Test restoring maxVariable\n");
1898 status
= U_ZERO_ERROR
;
1899 ucol_setMaxVariable(coll
, oldMax
, &status
);
1900 if(oldMax
!= ucol_getMaxVariable(coll
)) {
1901 log_err("Couldn't restore old maxVariable\n");
1904 log_verbose("Testing calling with error set\n");
1905 status
= U_INTERNAL_PROGRAM_ERROR
;
1906 ucol_setMaxVariable(coll
, UCOL_REORDER_CODE_SPACE
, &status
);
1907 max
= ucol_getMaxVariable(coll
);
1908 if(max
!= oldMax
|| status
!= U_INTERNAL_PROGRAM_ERROR
) {
1909 log_err("Bad reaction to passed error!\n");
1914 static void TestNonChars(void) {
1915 static const char *test
[] = {
1916 "\\u0000", /* ignorable */
1917 "\\uFFFE", /* special merge-sort character with minimum non-ignorable weights */
1918 "\\uFDD0", "\\uFDEF",
1919 "\\U0001FFFE", "\\U0001FFFF", /* UCA 6.0: noncharacters are treated like unassigned, */
1920 "\\U0002FFFE", "\\U0002FFFF", /* not like ignorable. */
1921 "\\U0003FFFE", "\\U0003FFFF",
1922 "\\U0004FFFE", "\\U0004FFFF",
1923 "\\U0005FFFE", "\\U0005FFFF",
1924 "\\U0006FFFE", "\\U0006FFFF",
1925 "\\U0007FFFE", "\\U0007FFFF",
1926 "\\U0008FFFE", "\\U0008FFFF",
1927 "\\U0009FFFE", "\\U0009FFFF",
1928 "\\U000AFFFE", "\\U000AFFFF",
1929 "\\U000BFFFE", "\\U000BFFFF",
1930 "\\U000CFFFE", "\\U000CFFFF",
1931 "\\U000DFFFE", "\\U000DFFFF",
1932 "\\U000EFFFE", "\\U000EFFFF",
1933 "\\U000FFFFE", "\\U000FFFFF",
1934 "\\U0010FFFE", "\\U0010FFFF",
1935 "\\uFFFF" /* special character with maximum primary weight */
1937 UErrorCode status
= U_ZERO_ERROR
;
1938 UCollator
*coll
= ucol_open("en_US", &status
);
1940 log_verbose("Test non characters\n");
1942 if(U_SUCCESS(status
)) {
1943 genericOrderingTestWithResult(coll
, test
, 35, UCOL_LESS
);
1945 log_err_status(status
, "Unable to open collator\n");
1951 static void TestExtremeCompression(void) {
1952 static char *test
[4];
1953 int32_t j
= 0, i
= 0;
1955 for(i
= 0; i
<4; i
++) {
1956 test
[i
] = (char *)malloc(2048*sizeof(char));
1959 for(j
= 20; j
< 500; j
++) {
1960 for(i
= 0; i
<4; i
++) {
1961 uprv_memset(test
[i
], 'a', (j
-1)*sizeof(char));
1962 test
[i
][j
-1] = (char)('a'+i
);
1965 genericLocaleStarter("en_US", (const char **)test
, 4);
1969 for(i
= 0; i
<4; i
++) {
1975 static void TestExtremeCompression(void) {
1976 static char *test
[4];
1977 int32_t j
= 0, i
= 0;
1978 UErrorCode status
= U_ZERO_ERROR
;
1979 UCollator
*coll
= ucol_open("en_US", status
);
1980 for(i
= 0; i
<4; i
++) {
1981 test
[i
] = (char *)malloc(2048*sizeof(char));
1983 for(j
= 10; j
< 2048; j
++) {
1984 for(i
= 0; i
<4; i
++) {
1985 uprv_memset(test
[i
], 'a', (j
-2)*sizeof(char));
1986 test
[i
][j
-1] = (char)('a'+i
);
1990 genericLocaleStarter("en_US", (const char **)test
, 4);
1992 for(j
= 10; j
< 2048; j
++) {
1993 for(i
= 0; i
<1; i
++) {
1994 uprv_memset(test
[i
], 'a', (j
-1)*sizeof(char));
1998 for(i
= 0; i
<4; i
++) {
2004 static void TestSurrogates(void) {
2005 static const char *test
[] = {
2006 "z","\\ud900\\udc25", "\\ud805\\udc50",
2007 "\\ud800\\udc00y", "\\ud800\\udc00r",
2008 "\\ud800\\udc00f", "\\ud800\\udc00",
2009 "\\ud800\\udc00c", "\\ud800\\udc00b",
2010 "\\ud800\\udc00fa", "\\ud800\\udc00fb",
2015 static const char *rule
=
2016 "&z < \\ud900\\udc25 < \\ud805\\udc50"
2017 "< \\ud800\\udc00y < \\ud800\\udc00r"
2018 "< \\ud800\\udc00f << \\ud800\\udc00"
2019 "< \\ud800\\udc00fa << \\ud800\\udc00fb"
2020 "< \\ud800\\udc00a < c < b" ;
2022 genericRulesStarter(rule
, test
, 14);
2025 /* This is a test for prefix implementation, used by JIS X 4061 collation rules */
2026 static void TestPrefix(void) {
2029 static const struct {
2031 const char *data
[50];
2041 "&z<<<\\ud900\\udc25|a",
2042 {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 },
2046 for(i
= 0; i
<UPRV_LENGTHOF(tests
); i
++) {
2047 genericRulesStarter(tests
[i
].rules
, tests
[i
].data
, tests
[i
].len
);
2051 /* This test uses data suplied by Masashiko Maedera to test the implementation */
2052 /* JIS X 4061 collation order implementation */
2053 static void TestNewJapanese(void) {
2055 static const char * const test1
[] = {
2056 "\\u30b7\\u30e3\\u30fc\\u30ec",
2057 "\\u30b7\\u30e3\\u30a4",
2058 "\\u30b7\\u30e4\\u30a3",
2059 "\\u30b7\\u30e3\\u30ec",
2060 "\\u3061\\u3087\\u3053",
2061 "\\u3061\\u3088\\u3053",
2062 "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8",
2063 "\\u3066\\u30fc\\u305f",
2064 "\\u30c6\\u30fc\\u30bf",
2065 "\\u30c6\\u30a7\\u30bf",
2066 "\\u3066\\u3048\\u305f",
2067 "\\u3067\\u30fc\\u305f",
2068 "\\u30c7\\u30fc\\u30bf",
2069 "\\u30c7\\u30a7\\u30bf",
2070 "\\u3067\\u3048\\u305f",
2071 "\\u3066\\u30fc\\u305f\\u30fc",
2072 "\\u30c6\\u30fc\\u30bf\\u30a1",
2073 "\\u30c6\\u30a7\\u30bf\\u30fc",
2074 "\\u3066\\u3047\\u305f\\u3041",
2075 "\\u3066\\u3048\\u305f\\u30fc",
2076 "\\u3067\\u30fc\\u305f\\u30fc",
2077 "\\u30c7\\u30fc\\u30bf\\u30a1",
2078 "\\u3067\\u30a7\\u305f\\u30a1",
2079 "\\u30c7\\u3047\\u30bf\\u3041",
2080 "\\u30c7\\u30a8\\u30bf\\u30a2",
2082 "\\u3073\\u3085\\u3042",
2083 "\\u3074\\u3085\\u3042",
2084 "\\u3073\\u3085\\u3042\\u30fc",
2085 "\\u30d3\\u30e5\\u30a2\\u30fc",
2086 "\\u3074\\u3085\\u3042\\u30fc",
2087 "\\u30d4\\u30e5\\u30a2\\u30fc",
2088 "\\u30d2\\u30e5\\u30a6",
2089 "\\u30d2\\u30e6\\u30a6",
2090 "\\u30d4\\u30e5\\u30a6\\u30a2",
2091 "\\u3073\\u3085\\u30fc\\u3042\\u30fc",
2092 "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc",
2093 "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc",
2094 "\\u3072\\u3085\\u3093",
2095 "\\u3074\\u3085\\u3093",
2096 "\\u3075\\u30fc\\u308a",
2097 "\\u30d5\\u30fc\\u30ea",
2098 "\\u3075\\u3045\\u308a",
2099 "\\u3075\\u30a5\\u308a",
2100 "\\u3075\\u30a5\\u30ea",
2101 "\\u30d5\\u30a6\\u30ea",
2102 "\\u3076\\u30fc\\u308a",
2103 "\\u30d6\\u30fc\\u30ea",
2104 "\\u3076\\u3045\\u308a",
2105 "\\u30d6\\u30a5\\u308a",
2106 "\\u3077\\u3046\\u308a",
2107 "\\u30d7\\u30a6\\u30ea",
2108 "\\u3075\\u30fc\\u308a\\u30fc",
2109 "\\u30d5\\u30a5\\u30ea\\u30fc",
2110 "\\u3075\\u30a5\\u308a\\u30a3",
2111 "\\u30d5\\u3045\\u308a\\u3043",
2112 "\\u30d5\\u30a6\\u30ea\\u30fc",
2113 "\\u3075\\u3046\\u308a\\u3043",
2114 "\\u30d6\\u30a6\\u30ea\\u30a4",
2115 "\\u3077\\u30fc\\u308a\\u30fc",
2116 "\\u3077\\u30a5\\u308a\\u30a4",
2117 "\\u3077\\u3046\\u308a\\u30fc",
2118 "\\u30d7\\u30a6\\u30ea\\u30a4",
2134 static const char *test2
[] = {
2135 "\\u306f\\u309d", /* H\\u309d */
2136 "\\u30cf\\u30fd", /* K\\u30fd */
2137 "\\u306f\\u306f", /* HH */
2138 "\\u306f\\u30cf", /* HK */
2139 "\\u30cf\\u30cf", /* KK */
2140 "\\u306f\\u309e", /* H\\u309e */
2141 "\\u30cf\\u30fe", /* K\\u30fe */
2142 "\\u306f\\u3070", /* HH\\u309b */
2143 "\\u30cf\\u30d0", /* KK\\u309b */
2144 "\\u306f\\u3071", /* HH\\u309c */
2145 "\\u30cf\\u3071", /* KH\\u309c */
2146 "\\u30cf\\u30d1", /* KK\\u309c */
2147 "\\u3070\\u309d", /* H\\u309b\\u309d */
2148 "\\u30d0\\u30fd", /* K\\u309b\\u30fd */
2149 "\\u3070\\u306f", /* H\\u309bH */
2150 "\\u30d0\\u30cf", /* K\\u309bK */
2151 "\\u3070\\u309e", /* H\\u309b\\u309e */
2152 "\\u30d0\\u30fe", /* K\\u309b\\u30fe */
2153 "\\u3070\\u3070", /* H\\u309bH\\u309b */
2154 "\\u30d0\\u3070", /* K\\u309bH\\u309b */
2155 "\\u30d0\\u30d0", /* K\\u309bK\\u309b */
2156 "\\u3070\\u3071", /* H\\u309bH\\u309c */
2157 "\\u30d0\\u30d1", /* K\\u309bK\\u309c */
2158 "\\u3071\\u309d", /* H\\u309c\\u309d */
2159 "\\u30d1\\u30fd", /* K\\u309c\\u30fd */
2160 "\\u3071\\u306f", /* H\\u309cH */
2161 "\\u30d1\\u30cf", /* K\\u309cK */
2162 "\\u3071\\u3070", /* H\\u309cH\\u309b */
2163 "\\u3071\\u30d0", /* H\\u309cK\\u309b */
2164 "\\u30d1\\u30d0", /* K\\u309cK\\u309b */
2165 "\\u3071\\u3071", /* H\\u309cH\\u309c */
2166 "\\u30d1\\u30d1", /* K\\u309cK\\u309c */
2169 static const char *test3[] = {
2197 "\\u30b7\\u30e3\\u30fc\\u30ec",
2200 static const UColAttribute att
[] = { UCOL_STRENGTH
};
2201 static const UColAttributeValue val
[] = { UCOL_QUATERNARY
};
2203 static const UColAttribute attShifted
[] = { UCOL_STRENGTH
, UCOL_ALTERNATE_HANDLING
};
2204 static const UColAttributeValue valShifted
[] = { UCOL_QUATERNARY
, UCOL_SHIFTED
};
2206 genericLocaleStarterWithOptions("ja", test1
, UPRV_LENGTHOF(test1
), att
, val
, 1);
2207 genericLocaleStarterWithOptions("ja", test2
, UPRV_LENGTHOF(test2
), att
, val
, 1);
2208 /*genericLocaleStarter("ja", test3, UPRV_LENGTHOF(test3));*/
2209 genericLocaleStarterWithOptions("ja", test1
, UPRV_LENGTHOF(test1
), attShifted
, valShifted
, 2);
2210 genericLocaleStarterWithOptions("ja", test2
, UPRV_LENGTHOF(test2
), attShifted
, valShifted
, 2);
2213 static void TestStrCollIdenticalPrefix(void) {
2214 const char* rule
= "&\\ud9b0\\udc70=\\ud9b0\\udc71";
2215 const char* test
[] = {
2219 genericRulesStarterWithResult(rule
, test
, UPRV_LENGTHOF(test
), UCOL_EQUAL
);
2221 /* Contractions should have all their canonically equivalent */
2222 /* strings included */
2223 static void TestContractionClosure(void) {
2224 static const struct {
2226 const char *data
[10];
2229 { "&b=\\u00e4\\u00e4",
2230 { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\\u00e4" }, 5},
2232 { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4},
2237 for(i
= 0; i
<UPRV_LENGTHOF(tests
); i
++) {
2238 genericRulesStarterWithResult(tests
[i
].rules
, tests
[i
].data
, tests
[i
].len
, UCOL_EQUAL
);
2242 /* This tests also fails*/
2243 static void TestBeforePrefixFailure(void) {
2244 static const struct {
2246 const char *data
[10];
2250 "&[before 3]\\uff41 <<< x",
2251 {"x", "\\uff41"}, 2 },
2252 { "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2253 "&\\u30A8=\\u30A8=\\u3048=\\uff74"
2254 "&[before 3]\\u30a7<<<\\u30a9",
2255 {"\\u30a9", "\\u30a7"}, 2 },
2256 { "&[before 3]\\u30a7<<<\\u30a9"
2257 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2258 "&\\u30A8=\\u30A8=\\u3048=\\uff74",
2259 {"\\u30a9", "\\u30a7"}, 2 },
2264 for(i
= 0; i
<UPRV_LENGTHOF(tests
); i
++) {
2265 genericRulesStarter(tests
[i
].rules
, tests
[i
].data
, tests
[i
].len
);
2270 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2271 "&\\u30A8=\\u30A8=\\u3048=\\uff74"
2272 "&[before 3]\\u30a7<<<\\u30c6|\\u30fc";
2274 "&[before 3]\\u30a7<<<\\u30c6|\\u30fc"
2275 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2276 "&\\u30A8=\\u30A8=\\u3048=\\uff74";
2277 const char* test
[] = {
2278 "\\u30c6\\u30fc\\u30bf",
2279 "\\u30c6\\u30a7\\u30bf",
2281 genericRulesStarter(rule1
, test
, UPRV_LENGTHOF(test
));
2282 genericRulesStarter(rule2
, test
, UPRV_LENGTHOF(test
));
2283 /* this piece of code should be in some sort of verbose mode */
2284 /* it gets the collation elements for elements and prints them */
2285 /* This is useful when trying to see whether the problem is */
2287 UErrorCode status
= U_ZERO_ERROR
;
2289 UCollationElements
*it
= NULL
;
2292 uint32_t uStringLen
;
2293 UCollator
*coll
= NULL
;
2295 uStringLen
= u_unescape(rule1
, string
, 256);
2297 coll
= ucol_openRules(string
, uStringLen
, UCOL_DEFAULT
, UCOL_DEFAULT
, NULL
, &status
);
2299 /*coll = ucol_open("ja_JP_JIS", &status);*/
2300 it
= ucol_openElements(coll
, string
, 0, &status
);
2302 for(i
= 0; i
< UPRV_LENGTHOF(test
); i
++) {
2303 log_verbose("%s\n", test
[i
]);
2304 uStringLen
= u_unescape(test
[i
], string
, 256);
2305 ucol_setText(it
, string
, uStringLen
, &status
);
2307 while((CE
=ucol_next(it
, &status
)) != UCOL_NULLORDER
) {
2308 log_verbose("%08X\n", CE
);
2314 ucol_closeElements(it
);
2320 static void TestPrefixCompose(void) {
2322 "&\\u30a7<<<\\u30ab|\\u30fc=\\u30ac|\\u30fc";
2324 const char* test[] = {
2325 "\\u30c6\\u30fc\\u30bf",
2326 "\\u30c6\\u30a7\\u30bf",
2330 UErrorCode status
= U_ZERO_ERROR
;
2332 /*UCollationElements *it = NULL;*/
2335 uint32_t uStringLen
;
2336 UCollator
*coll
= NULL
;
2338 uStringLen
= u_unescape(rule1
, string
, 256);
2340 coll
= ucol_openRules(string
, uStringLen
, UCOL_DEFAULT
, UCOL_DEFAULT
, NULL
, &status
);
2348 [last variable] last variable value
2349 [last primary ignorable] largest CE for primary ignorable
2350 [last secondary ignorable] largest CE for secondary ignorable
2351 [last tertiary ignorable] largest CE for tertiary ignorable
2352 [top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8)
2355 static void TestRuleOptions(void) {
2356 /* values here are hardcoded and are correct for the current UCA
2357 * when the UCA changes, one might be forced to change these
2362 * These strings contain the last character before [variable top]
2363 * and the first and second characters (by primary weights) after it.
2364 * See FractionalUCA.txt. For example:
2365 [last variable [0C FE, 05, 05]] # U+10A7F OLD SOUTH ARABIAN NUMERIC INDICATOR
2366 [variable top = 0C FE]
2367 [first regular [0D 0A, 05, 05]] # U+0060 GRAVE ACCENT
2369 00B4; [0D 0C, 05, 05]
2371 * Note: Starting with UCA 6.0, the [variable top] collation element
2372 * is not the weight of any character or string,
2373 * which means that LAST_VARIABLE_CHAR_STRING sorts before [last variable].
2375 #define LAST_VARIABLE_CHAR_STRING "\\U00010A7F"
2376 #define FIRST_REGULAR_CHAR_STRING "\\u0060"
2377 #define SECOND_REGULAR_CHAR_STRING "\\u00B4"
2380 * This string has to match the character that has the [last regular] weight
2381 * which changes with each UCA version.
2382 * See the bottom of FractionalUCA.txt which says something like
2383 [last regular [7A FE, 05, 05]] # U+1342E EGYPTIAN HIEROGLYPH AA032
2385 * Note: Starting with UCA 6.0, the [last regular] collation element
2386 * is not the weight of any character or string,
2387 * which means that LAST_REGULAR_CHAR_STRING sorts before [last regular].
2389 #define LAST_REGULAR_CHAR_STRING "\\U0001342E"
2391 static const struct {
2393 const char *data
[10];
2397 /* "you cannot go before ...": The parser now sets an error for such nonsensical rules. */
2398 /* - all befores here amount to zero */
2399 { "&[before 3][first tertiary ignorable]<<<a",
2400 { "\\u0000", "a"}, 2
2401 }, /* you cannot go before first tertiary ignorable */
2403 { "&[before 3][last tertiary ignorable]<<<a",
2404 { "\\u0000", "a"}, 2
2405 }, /* you cannot go before last tertiary ignorable */
2408 * However, there is a real secondary ignorable (artificial addition in FractionalUCA.txt),
2409 * and it *is* possible to "go before" that.
2411 { "&[before 3][first secondary ignorable]<<<a",
2412 { "\\u0000", "a"}, 2
2415 { "&[before 3][last secondary ignorable]<<<a",
2416 { "\\u0000", "a"}, 2
2419 /* 'normal' befores */
2422 * Note: With a "SPACE first primary" boundary CE in FractionalUCA.txt,
2423 * it is not possible to tailor &[first primary ignorable]<a or &[last primary ignorable]<a
2424 * because there is no tailoring space before that boundary.
2425 * Made the tests work by tailoring to a space instead.
2427 { "&[before 3][first primary ignorable]<<<c<<<b &' '<a", /* was &[first primary ignorable]<a */
2428 { "c", "b", "\\u0332", "a" }, 4
2431 /* we don't have a code point that corresponds to
2432 * the last primary ignorable
2434 { "&[before 3][last primary ignorable]<<<c<<<b &' '<a", /* was &[last primary ignorable]<a */
2435 { "\\u0332", "\\u20e3", "c", "b", "a" }, 5
2438 { "&[before 3][first variable]<<<c<<<b &[first variable]<a",
2439 { "c", "b", "\\u0009", "a", "\\u000a" }, 5
2442 { "&[last variable]<a &[before 3][last variable]<<<c<<<b ",
2443 { LAST_VARIABLE_CHAR_STRING
, "c", "b", /* [last variable] */ "a", FIRST_REGULAR_CHAR_STRING
}, 5
2446 { "&[first regular]<a"
2447 "&[before 1][first regular]<b",
2448 { "b", FIRST_REGULAR_CHAR_STRING
, "a", SECOND_REGULAR_CHAR_STRING
}, 4
2451 { "&[before 1][last regular]<b"
2452 "&[last regular]<a",
2453 { LAST_REGULAR_CHAR_STRING
, "b", /* [last regular] */ "a", "\\u4e00" }, 4
2456 { "&[before 1][first implicit]<b"
2457 "&[first implicit]<a",
2458 { "b", "\\u4e00", "a", "\\u4e01"}, 4
2460 #if 0 /* The current builder does not support tailoring to unassigned-implicit CEs (seems unnecessary, adds complexity). */
2461 { "&[before 1][last implicit]<b"
2462 "&[last implicit]<a",
2463 { "b", "\\U0010FFFD", "a" }, 3
2466 { "&[last variable]<z"
2467 "&' '<x" /* was &[last primary ignorable]<x, see above */
2468 "&[last secondary ignorable]<<y"
2469 "&[last tertiary ignorable]<<<w"
2471 {"\\ufffb", "w", "y", "\\u20e3", "x", LAST_VARIABLE_CHAR_STRING
, "z", "u"}, 7
2477 for(i
= 0; i
<UPRV_LENGTHOF(tests
); i
++) {
2478 genericRulesStarter(tests
[i
].rules
, tests
[i
].data
, tests
[i
].len
);
2483 static void TestOptimize(void) {
2484 /* this is not really a test - just trying out
2485 * whether copying of UCA contents will fail
2486 * Cannot really test, since the functionality
2489 static const struct {
2491 const char *data
[10];
2494 /* - all befores here amount to zero */
2495 { "[optimize [\\uAC00-\\uD7FF]]",
2500 for(i
= 0; i
<UPRV_LENGTHOF(tests
); i
++) {
2501 genericRulesStarter(tests
[i
].rules
, tests
[i
].data
, tests
[i
].len
);
2506 cycheng@ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator.
2507 weiv ucol_strcollIter?
2508 cycheng@ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021
2509 weiv these are the input strings?
2510 cycheng@ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on, we have s1 > s2
2511 weiv will check - could be a problem with utf-8 iterator
2512 cycheng@ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2
2514 cycheng@ca.ibm.c... note that we have a standalone high surrogate
2515 weiv that doesn't sound right
2516 cycheng@ca.ibm.c... we got the same inconsistent results on AIX and Win2000
2517 weiv so you have two strings, you convert them to utf-8 and to utf-16BE
2518 cycheng@ca.ibm.c... yes
2519 weiv and then do the comparison
2520 cycheng@ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be
2521 weiv utf-16 strings look like a little endian ones in the example you sent me
2522 weiv It could be a bug - let me try to test it out
2523 cycheng@ca.ibm.c... ok
2524 cycheng@ca.ibm.c... we can wait till the conf. call
2525 cycheng@ca.ibm.c... next weke
2526 weiv that would be great
2528 weiv I might be wrong
2529 weiv let me play with it some more
2530 cycheng@ca.ibm.c... ok
2531 cycheng@ca.ibm.c... also please check s3 = 0x0e3a0062 and s4 = 0x0e400021. both are in utf-16be
2532 cycheng@ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that's built for db2
2533 cycheng@ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be
2535 cycheng@ca.ibm.c... i ask sherman to send you more inconsistent data
2537 cycheng@ca.ibm.c... the 4 strings we sent are just samples
2540 static void Alexis(void) {
2541 UErrorCode status
= U_ZERO_ERROR
;
2542 UCollator
*coll
= ucol_open("", &status
);
2545 const char utf16be
[2][4] = {
2546 { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 },
2547 { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 }
2550 const char utf8
[2][4] = {
2551 { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 },
2552 { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 },
2555 UCharIterator iterU161
, iterU162
;
2556 UCharIterator iterU81
, iterU82
;
2558 UCollationResult resU16
, resU8
;
2560 uiter_setUTF16BE(&iterU161
, utf16be
[0], 4);
2561 uiter_setUTF16BE(&iterU162
, utf16be
[1], 4);
2563 uiter_setUTF8(&iterU81
, utf8
[0], 4);
2564 uiter_setUTF8(&iterU82
, utf8
[1], 4);
2566 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
2568 resU16
= ucol_strcollIter(coll
, &iterU161
, &iterU162
, &status
);
2569 resU8
= ucol_strcollIter(coll
, &iterU81
, &iterU82
, &status
);
2572 if(resU16
!= resU8
) {
2573 log_err("different results\n");
2580 #define CMSCOLL_ALEXIS2_BUFFER_SIZE 256
2581 static void Alexis2(void) {
2582 UErrorCode status
= U_ZERO_ERROR
;
2583 UChar U16Source
[CMSCOLL_ALEXIS2_BUFFER_SIZE
], U16Target
[CMSCOLL_ALEXIS2_BUFFER_SIZE
];
2584 char U16BESource
[CMSCOLL_ALEXIS2_BUFFER_SIZE
], U16BETarget
[CMSCOLL_ALEXIS2_BUFFER_SIZE
];
2585 char U8Source
[CMSCOLL_ALEXIS2_BUFFER_SIZE
], U8Target
[CMSCOLL_ALEXIS2_BUFFER_SIZE
];
2586 int32_t U16LenS
= 0, U16LenT
= 0, U16BELenS
= 0, U16BELenT
= 0, U8LenS
= 0, U8LenT
= 0;
2588 UConverter
*conv
= NULL
;
2590 UCharIterator U16BEItS
, U16BEItT
;
2591 UCharIterator U8ItS
, U8ItT
;
2593 UCollationResult resU16
, resU16BE
, resU8
;
2595 static const char* const pairs
[][2] = {
2596 { "\\ud800\\u0021", "\\uFFFC\\u0062"},
2597 { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" },
2598 { "\\u0E40\\u0021", "\\u00A1\\u0021"},
2599 { "\\u0E40\\u0021", "\\uFE57\\u0062"},
2600 { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"},
2601 { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"},
2602 { "\\u0020", "\\u0020\\u0000"}
2604 5F20 (my result here)
2606 5F20 (your result here)
2612 UCollator
*coll
= ucol_open("", &status
);
2613 if(status
== U_FILE_ACCESS_ERROR
) {
2614 log_data_err("Is your data around?\n");
2616 } else if(U_FAILURE(status
)) {
2617 log_err("Error opening collator\n");
2620 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
2621 conv
= ucnv_open("UTF16BE", &status
);
2622 for(i
= 0; i
< UPRV_LENGTHOF(pairs
); i
++) {
2623 U16LenS
= u_unescape(pairs
[i
][0], U16Source
, CMSCOLL_ALEXIS2_BUFFER_SIZE
);
2624 U16LenT
= u_unescape(pairs
[i
][1], U16Target
, CMSCOLL_ALEXIS2_BUFFER_SIZE
);
2626 resU16
= ucol_strcoll(coll
, U16Source
, U16LenS
, U16Target
, U16LenT
);
2628 log_verbose("Result of strcoll is %i\n", resU16
);
2630 U16BELenS
= ucnv_fromUChars(conv
, U16BESource
, CMSCOLL_ALEXIS2_BUFFER_SIZE
, U16Source
, U16LenS
, &status
);
2631 U16BELenT
= ucnv_fromUChars(conv
, U16BETarget
, CMSCOLL_ALEXIS2_BUFFER_SIZE
, U16Target
, U16LenT
, &status
);
2632 (void)U16BELenS
; /* Suppress set but not used warnings. */
2635 /* use the original sizes, as the result from converter is in bytes */
2636 uiter_setUTF16BE(&U16BEItS
, U16BESource
, U16LenS
);
2637 uiter_setUTF16BE(&U16BEItT
, U16BETarget
, U16LenT
);
2639 resU16BE
= ucol_strcollIter(coll
, &U16BEItS
, &U16BEItT
, &status
);
2641 log_verbose("Result of U16BE is %i\n", resU16BE
);
2643 if(resU16
!= resU16BE
) {
2644 log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", pairs
[i
][0], pairs
[i
][1]);
2647 u_strToUTF8(U8Source
, CMSCOLL_ALEXIS2_BUFFER_SIZE
, &U8LenS
, U16Source
, U16LenS
, &status
);
2648 u_strToUTF8(U8Target
, CMSCOLL_ALEXIS2_BUFFER_SIZE
, &U8LenT
, U16Target
, U16LenT
, &status
);
2650 uiter_setUTF8(&U8ItS
, U8Source
, U8LenS
);
2651 uiter_setUTF8(&U8ItT
, U8Target
, U8LenT
);
2653 resU8
= ucol_strcollIter(coll
, &U8ItS
, &U8ItT
, &status
);
2655 if(resU16
!= resU8
) {
2656 log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pairs
[i
][0], pairs
[i
][1]);
2665 static void TestHebrewUCA(void) {
2666 UErrorCode status
= U_ZERO_ERROR
;
2667 static const char *first
[] = {
2668 "d790d6b8d79cd795d6bcd7a9",
2669 "d790d79cd79ed7a7d799d799d7a1",
2670 "d790d6b4d79ed795d6bcd7a9",
2673 char utf8String
[3][256];
2674 UChar utf16String
[3][256];
2676 int32_t i
= 0, j
= 0;
2677 int32_t sizeUTF8
[3];
2678 int32_t sizeUTF16
[3];
2680 UCollator
*coll
= ucol_open("", &status
);
2681 if (U_FAILURE(status
)) {
2682 log_err_status(status
, "Could not open UCA collation %s\n", u_errorName(status
));
2685 /*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/
2687 for(i
= 0; i
< UPRV_LENGTHOF(first
); i
++) {
2688 sizeUTF8
[i
] = u_parseUTF8(first
[i
], -1, utf8String
[i
], 256, &status
);
2689 u_strFromUTF8(utf16String
[i
], 256, &sizeUTF16
[i
], utf8String
[i
], sizeUTF8
[i
], &status
);
2690 log_verbose("%i: ");
2691 for(j
= 0; j
< sizeUTF16
[i
]; j
++) {
2692 /*log_verbose("\\u%04X", utf16String[i][j]);*/
2693 log_verbose("%04X", utf16String
[i
][j
]);
2697 for(i
= 0; i
< UPRV_LENGTHOF(first
)-1; i
++) {
2698 for(j
= i
+ 1; j
< UPRV_LENGTHOF(first
); j
++) {
2699 doTest(coll
, utf16String
[i
], utf16String
[j
], UCOL_LESS
);
2707 static void TestPartialSortKeyTermination(void) {
2708 static const char* cases
[] = {
2709 "\\u1234\\u1234\\udc00",
2710 "\\udc00\\ud800\\ud800"
2715 UErrorCode status
= U_ZERO_ERROR
;
2717 UCollator
*coll
= ucol_open("", &status
);
2721 UChar currCase
[256];
2723 int32_t pKeyLen
= 0;
2727 for(i
= 0; i
< UPRV_LENGTHOF(cases
); i
++) {
2728 uint32_t state
[2] = {0, 0};
2729 length
= u_unescape(cases
[i
], currCase
, 256);
2730 uiter_setString(&iter
, currCase
, length
);
2731 pKeyLen
= ucol_nextSortKeyPart(coll
, &iter
, state
, key
, 256, &status
);
2732 (void)pKeyLen
; /* Suppress set but not used warning. */
2734 log_verbose("Done\n");
2740 static void TestSettings(void) {
2741 static const char* cases
[] = {
2746 static const char* locales
[] = {
2751 UErrorCode status
= U_ZERO_ERROR
;
2753 int32_t i
= 0, j
= 0;
2755 UChar source
[256], target
[256];
2756 int32_t sLen
= 0, tLen
= 0;
2758 UCollator
*collateObject
= NULL
;
2759 for(i
= 0; i
< UPRV_LENGTHOF(locales
); i
++) {
2760 collateObject
= ucol_open(locales
[i
], &status
);
2761 ucol_setStrength(collateObject
, UCOL_PRIMARY
);
2762 ucol_setAttribute(collateObject
, UCOL_CASE_LEVEL
, UCOL_OFF
, &status
);
2763 for(j
= 1; j
< UPRV_LENGTHOF(cases
); j
++) {
2764 sLen
= u_unescape(cases
[j
-1], source
, 256);
2766 tLen
= u_unescape(cases
[j
], target
, 256);
2768 doTest(collateObject
, source
, target
, UCOL_EQUAL
);
2770 ucol_close(collateObject
);
2774 static int32_t TestEqualsForCollator(const char* locName
, UCollator
*source
, UCollator
*target
) {
2775 UErrorCode status
= U_ZERO_ERROR
;
2776 int32_t errorNo
= 0;
2777 const UChar
*sourceRules
= NULL
;
2778 int32_t sourceRulesLen
= 0;
2779 UParseError parseError
;
2780 UColAttributeValue french
= UCOL_OFF
;
2782 if(!ucol_equals(source
, target
)) {
2783 log_err("Same collators, different address not equal\n");
2787 if(uprv_strcmp(locName
, ucol_getLocaleByType(source
, ULOC_ACTUAL_LOCALE
, &status
)) == 0) {
2788 target
= ucol_safeClone(source
, NULL
, NULL
, &status
);
2789 if(U_FAILURE(status
)) {
2790 log_err("Error creating clone\n");
2794 if(!ucol_equals(source
, target
)) {
2795 log_err("Collator different from it's clone\n");
2798 french
= ucol_getAttribute(source
, UCOL_FRENCH_COLLATION
, &status
);
2799 if(french
== UCOL_ON
) {
2800 ucol_setAttribute(target
, UCOL_FRENCH_COLLATION
, UCOL_OFF
, &status
);
2802 ucol_setAttribute(target
, UCOL_FRENCH_COLLATION
, UCOL_ON
, &status
);
2804 if(U_FAILURE(status
)) {
2805 log_err("Error setting attributes\n");
2809 if(ucol_equals(source
, target
)) {
2810 log_err("Collators same even when options changed\n");
2815 sourceRules
= ucol_getRules(source
, &sourceRulesLen
);
2816 target
= ucol_openRules(sourceRules
, sourceRulesLen
, UCOL_DEFAULT
, UCOL_DEFAULT
, &parseError
, &status
);
2817 if(U_FAILURE(status
)) {
2818 log_err("Error instantiating target from rules - %s\n", u_errorName(status
));
2822 /* Note: The tailoring rule string is an optional data item. */
2823 if(!ucol_equals(source
, target
) && sourceRulesLen
!= 0) {
2824 log_err("Collator different from collator that was created from the same rules\n");
2833 static void TestEquals(void) {
2834 /* ucol_equals is not currently a public API. There is a chance that it will become
2835 * something like this.
2837 /* test whether the two collators instantiated from the same locale are equal */
2838 UErrorCode status
= U_ZERO_ERROR
;
2839 UParseError parseError
;
2840 int32_t noOfLoc
= uloc_countAvailable();
2841 const char *locName
= NULL
;
2842 UCollator
*source
= NULL
, *target
= NULL
;
2845 const char* rules
[] = {
2846 "&l < lj <<< Lj <<< LJ",
2847 "&n < nj <<< Nj <<< NJ",
2852 const char* badRules[] = {
2854 "&n < nj <<< nJ <<< NJ",
2856 "&AE <<< \\u00c4 <<< x"
2860 UChar sourceRules
[1024], targetRules
[1024];
2861 int32_t sourceRulesSize
= 0, targetRulesSize
= 0;
2862 int32_t rulesSize
= UPRV_LENGTHOF(rules
);
2864 for(i
= 0; i
< rulesSize
; i
++) {
2865 sourceRulesSize
+= u_unescape(rules
[i
], sourceRules
+sourceRulesSize
, 1024 - sourceRulesSize
);
2866 targetRulesSize
+= u_unescape(rules
[rulesSize
-i
-1], targetRules
+targetRulesSize
, 1024 - targetRulesSize
);
2869 source
= ucol_openRules(sourceRules
, sourceRulesSize
, UCOL_DEFAULT
, UCOL_DEFAULT
, &parseError
, &status
);
2870 if(status
== U_FILE_ACCESS_ERROR
) {
2871 log_data_err("Is your data around?\n");
2873 } else if(U_FAILURE(status
)) {
2874 log_err("Error opening collator\n");
2877 target
= ucol_openRules(targetRules
, targetRulesSize
, UCOL_DEFAULT
, UCOL_DEFAULT
, &parseError
, &status
);
2878 if(!ucol_equals(source
, target
)) {
2879 log_err("Equivalent collators not equal!\n");
2884 source
= ucol_open("root", &status
);
2885 target
= ucol_open("root", &status
);
2886 log_verbose("Testing root\n");
2887 if(!ucol_equals(source
, source
)) {
2888 log_err("Same collator not equal\n");
2890 if(TestEqualsForCollator("root", source
, target
)) {
2891 log_err("Errors for root\n");
2895 for(i
= 0; i
<noOfLoc
; i
++) {
2896 status
= U_ZERO_ERROR
;
2897 locName
= uloc_getAvailable(i
);
2898 /*if(hasCollationElements(locName)) {*/
2899 log_verbose("Testing equality for locale %s\n", locName
);
2900 source
= ucol_open(locName
, &status
);
2901 target
= ucol_open(locName
, &status
);
2902 if (U_FAILURE(status
)) {
2903 log_err("Error opening collator for locale %s %s\n", locName
, u_errorName(status
));
2906 if(TestEqualsForCollator(locName
, source
, target
)) {
2907 log_err("Errors for locale %s\n", locName
);
2914 static void TestJ2726(void) {
2915 UChar a
[2] = { 0x61, 0x00 }; /*"a"*/
2916 UChar aSpace
[3] = { 0x61, 0x20, 0x00 }; /*"a "*/
2917 UChar spaceA
[3] = { 0x20, 0x61, 0x00 }; /*" a"*/
2918 UErrorCode status
= U_ZERO_ERROR
;
2919 UCollator
*coll
= ucol_open("en", &status
);
2920 ucol_setAttribute(coll
, UCOL_ALTERNATE_HANDLING
, UCOL_SHIFTED
, &status
);
2921 ucol_setAttribute(coll
, UCOL_STRENGTH
, UCOL_PRIMARY
, &status
);
2922 doTest(coll
, a
, aSpace
, UCOL_EQUAL
);
2923 doTest(coll
, aSpace
, a
, UCOL_EQUAL
);
2924 doTest(coll
, a
, spaceA
, UCOL_EQUAL
);
2925 doTest(coll
, spaceA
, a
, UCOL_EQUAL
);
2926 doTest(coll
, spaceA
, aSpace
, UCOL_EQUAL
);
2927 doTest(coll
, aSpace
, spaceA
, UCOL_EQUAL
);
2931 static void NullRule(void) {
2933 UErrorCode status
= U_ZERO_ERROR
;
2934 UCollator
*coll
= ucol_openRules(r
, 1, UCOL_DEFAULT
, UCOL_DEFAULT
, NULL
, &status
);
2935 if(U_SUCCESS(status
)) {
2936 log_err("This should have been an error!\n");
2939 status
= U_ZERO_ERROR
;
2941 coll
= ucol_openRules(r
, 0, UCOL_DEFAULT
, UCOL_DEFAULT
, NULL
, &status
);
2942 if(U_FAILURE(status
)) {
2943 log_err_status(status
, "Empty rules should have produced a valid collator -> %s\n", u_errorName(status
));
2950 * Test for CollationElementIterator previous and next for the whole set of
2951 * unicode characters with normalization on.
2953 static void TestNumericCollation(void)
2955 UErrorCode status
= U_ZERO_ERROR
;
2957 const static char *basicTestStrings
[]={
2970 const static char *preZeroTestStrings
[]={
2978 "avery000000010000",
2981 const static char *thirtyTwoBitNumericStrings
[]={
2988 const static char *longNumericStrings
[]={
2989 /* Some of these sort out of the order that would expected if digits-as-numbers handled arbitrarily-long digit strings.
2990 In fact, a single collation element can represent a maximum of 254 digits as a number. Digit strings longer than that
2991 are treated as multiple collation elements. */
2992 "num9234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123z", /*253digits, num + 9.23E252 + z */
2993 "num10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*254digits, num + 1.00E253 */
2994 "num100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*255digits, num + 1.00E253 + 0, out of numeric order but expected */
2995 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 1.23E253 */
2996 "num123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345", /*255digits, num + 1.23E253 + 5 */
2997 "num1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456", /*256digits, num + 1.23E253 + 56 */
2998 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567", /*257digits, num + 1.23E253 + 567 */
2999 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 1.23E253 + a, out of numeric order but expected */
3000 "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 9.23E253, out of numeric order but expected */
3001 "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 9.23E253 + a, out of numeric order but expected */
3004 const static char *supplementaryDigits
[] = {
3005 "\\uD835\\uDFCE", /* 0 */
3006 "\\uD835\\uDFCF", /* 1 */
3007 "\\uD835\\uDFD0", /* 2 */
3008 "\\uD835\\uDFD1", /* 3 */
3009 "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */
3010 "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */
3011 "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */
3012 "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */
3013 "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */
3014 "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */
3017 const static char *foreignDigits
[] = {
3032 const static char *evenZeroes
[] = {
3039 UColAttribute att
= UCOL_NUMERIC_COLLATION
;
3040 UColAttributeValue val
= UCOL_ON
;
3042 /* Open our collator. */
3043 UCollator
* coll
= ucol_open("root", &status
);
3044 if (U_FAILURE(status
)){
3045 log_err_status(status
, "ERROR: in using ucol_open() -> %s\n",
3046 myErrorName(status
));
3049 genericLocaleStarterWithOptions("root", basicTestStrings
, UPRV_LENGTHOF(basicTestStrings
), &att
, &val
, 1);
3050 genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings
, UPRV_LENGTHOF(thirtyTwoBitNumericStrings
), &att
, &val
, 1);
3051 genericLocaleStarterWithOptions("root", longNumericStrings
, UPRV_LENGTHOF(longNumericStrings
), &att
, &val
, 1);
3052 genericLocaleStarterWithOptions("en_US", foreignDigits
, UPRV_LENGTHOF(foreignDigits
), &att
, &val
, 1);
3053 genericLocaleStarterWithOptions("root", supplementaryDigits
, UPRV_LENGTHOF(supplementaryDigits
), &att
, &val
, 1);
3054 genericLocaleStarterWithOptions("root", evenZeroes
, UPRV_LENGTHOF(evenZeroes
), &att
, &val
, 1);
3056 /* Setting up our collator to do digits. */
3057 ucol_setAttribute(coll
, UCOL_NUMERIC_COLLATION
, UCOL_ON
, &status
);
3058 if (U_FAILURE(status
)){
3059 log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n",
3060 myErrorName(status
));
3065 Testing that prepended zeroes still yield the correct collation behavior.
3066 We expect that every element in our strings array will be equal.
3068 genericOrderingTestWithResult(coll
, preZeroTestStrings
, UPRV_LENGTHOF(preZeroTestStrings
), UCOL_EQUAL
);
3073 static void TestTibetanConformance(void)
3075 const char* test
[] = {
3076 "\\u0FB2\\u0591\\u0F71\\u0061",
3077 "\\u0FB2\\u0F71\\u0061"
3080 UErrorCode status
= U_ZERO_ERROR
;
3081 UCollator
*coll
= ucol_open("", &status
);
3085 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
3086 if (U_SUCCESS(status
)) {
3087 u_unescape(test
[0], source
, 100);
3088 u_unescape(test
[1], target
, 100);
3089 doTest(coll
, source
, target
, UCOL_EQUAL
);
3090 result
= ucol_strcoll(coll
, source
, -1, target
, -1);
3091 log_verbose("result %d\n", result
);
3092 if (UCOL_EQUAL
!= result
) {
3093 log_err("Tibetan comparison error\n");
3098 genericLocaleStarterWithResult("", test
, 2, UCOL_EQUAL
);
3101 static void TestPinyinProblem(void) {
3102 static const char *test
[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B50" };
3103 genericLocaleStarter("zh__PINYIN", test
, UPRV_LENGTHOF(test
));
3107 * Iterate through the given iterator, checking to see that all the strings
3108 * in the expected array are present.
3109 * @param expected array of strings we expect to see, or NULL
3110 * @param expectedCount number of elements of expected, or 0
3112 static int32_t checkUEnumeration(const char* msg
,
3114 const char** expected
,
3115 int32_t expectedCount
) {
3116 UErrorCode ec
= U_ZERO_ERROR
;
3117 int32_t i
= 0, n
, j
, bit
;
3118 int32_t seenMask
= 0;
3120 U_ASSERT(expectedCount
>= 0 && expectedCount
< 31); /* [sic] 31 not 32 */
3121 n
= uenum_count(iter
, &ec
);
3122 if (!assertSuccess("count", &ec
)) return -1;
3123 log_verbose("%s = [", msg
);
3125 const char* s
= uenum_next(iter
, NULL
, &ec
);
3126 if (!assertSuccess("snext", &ec
) || s
== NULL
) break;
3127 if (i
!= 0) log_verbose(",");
3128 log_verbose("%s", s
);
3129 /* check expected list */
3130 for (j
=0, bit
=1; j
<expectedCount
; ++j
, bit
<<=1) {
3131 if ((seenMask
&bit
) == 0 &&
3132 uprv_strcmp(s
, expected
[j
]) == 0) {
3138 log_verbose("] (%d)\n", i
);
3139 assertTrue("count verified", i
==n
);
3140 /* did we see all expected strings? */
3141 for (j
=0, bit
=1; j
<expectedCount
; ++j
, bit
<<=1) {
3142 if ((seenMask
&bit
)!=0) {
3143 log_verbose("Ok: \"%s\" seen\n", expected
[j
]);
3145 log_err("FAIL: \"%s\" not seen\n", expected
[j
]);
3152 * Test new API added for separate collation tree.
3154 static void TestSeparateTrees(void) {
3155 UErrorCode ec
= U_ZERO_ERROR
;
3156 UEnumeration
*e
= NULL
;
3161 static const char* AVAIL
[] = { "en", "de" };
3163 static const char* KW
[] = { "collation" };
3165 static const char* KWVAL
[] = { "phonebook", "stroke" };
3167 #if !UCONFIG_NO_SERVICE
3168 e
= ucol_openAvailableLocales(&ec
);
3170 assertSuccess("ucol_openAvailableLocales", &ec
);
3171 assertTrue("ucol_openAvailableLocales!=0", e
!=0);
3172 n
= checkUEnumeration("ucol_openAvailableLocales", e
, AVAIL
, UPRV_LENGTHOF(AVAIL
));
3173 (void)n
; /* Suppress set but not used warnings. */
3174 /* Don't need to check n because we check list */
3177 log_data_err("Error calling ucol_openAvailableLocales() -> %s (Are you missing data?)\n", u_errorName(ec
));
3181 e
= ucol_getKeywords(&ec
);
3183 assertSuccess("ucol_getKeywords", &ec
);
3184 assertTrue("ucol_getKeywords!=0", e
!=0);
3185 n
= checkUEnumeration("ucol_getKeywords", e
, KW
, UPRV_LENGTHOF(KW
));
3186 /* Don't need to check n because we check list */
3189 log_data_err("Error calling ucol_getKeywords() -> %s (Are you missing data?)\n", u_errorName(ec
));
3192 e
= ucol_getKeywordValues(KW
[0], &ec
);
3194 assertSuccess("ucol_getKeywordValues", &ec
);
3195 assertTrue("ucol_getKeywordValues!=0", e
!=0);
3196 n
= checkUEnumeration("ucol_getKeywordValues", e
, KWVAL
, UPRV_LENGTHOF(KWVAL
));
3197 /* Don't need to check n because we check list */
3200 log_data_err("Error calling ucol_getKeywordValues() -> %s (Are you missing data?)\n", u_errorName(ec
));
3203 /* Try setting a warning before calling ucol_getKeywordValues */
3204 ec
= U_USING_FALLBACK_WARNING
;
3205 e
= ucol_getKeywordValues(KW
[0], &ec
);
3206 if (assertSuccess("ucol_getKeywordValues [with warning code set]", &ec
)) {
3207 assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e
!=0);
3208 n
= checkUEnumeration("ucol_getKeywordValues [with warning code set]", e
, KWVAL
, UPRV_LENGTHOF(KWVAL
));
3209 /* Don't need to check n because we check list */
3214 U_DRAFT int32_t U_EXPORT2
3215 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
3216 const char* locale, UBool* isAvailable,
3217 UErrorCode* status);
3220 n
= ucol_getFunctionalEquivalent(loc
, sizeof(loc
), "collation", "de",
3222 if (assertSuccess("getFunctionalEquivalent", &ec
)) {
3223 assertEquals("getFunctionalEquivalent(de)", "root", loc
);
3224 assertTrue("getFunctionalEquivalent(de).isAvailable==TRUE",
3225 isAvailable
== TRUE
);
3228 n
= ucol_getFunctionalEquivalent(loc
, sizeof(loc
), "collation", "de_DE",
3230 if (assertSuccess("getFunctionalEquivalent", &ec
)) {
3231 assertEquals("getFunctionalEquivalent(de_DE)", "root", loc
);
3232 assertTrue("getFunctionalEquivalent(de_DE).isAvailable==FALSE",
3233 isAvailable
== FALSE
);
3237 /* supercedes TestJ784 */
3238 static void TestBeforePinyin(void) {
3239 const static char rules
[] = {
3240 "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<\\u00E0<<<\\u00C0"
3241 "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<\\u00E8<<<\\u00C8"
3242 "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<\\u00EC<<<\\u00CC"
3243 "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<\\u00F2<<<\\u00D2"
3244 "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<<\\u00F9<<<\\u00D9"
3245 "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<<\\u01DB<<\\u00FC"
3248 const static char *test
[] = {
3259 const static char *test2
[] = {
3292 genericRulesStarter(rules
, test
, UPRV_LENGTHOF(test
));
3293 genericLocaleStarter("zh", test
, UPRV_LENGTHOF(test
));
3294 genericRulesStarter(rules
, test2
, UPRV_LENGTHOF(test2
));
3295 genericLocaleStarter("zh", test2
, UPRV_LENGTHOF(test2
));
3298 static void TestBeforeTightening(void) {
3299 static const struct {
3301 UErrorCode expectedStatus
;
3303 { "&[before 1]a<x", U_ZERO_ERROR
},
3304 { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR
},
3305 { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR
},
3306 { "&[before 1]a=x", U_INVALID_FORMAT_ERROR
},
3307 { "&[before 2]a<x",U_INVALID_FORMAT_ERROR
},
3308 { "&[before 2]a<<x",U_ZERO_ERROR
},
3309 { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR
},
3310 { "&[before 2]a=x",U_INVALID_FORMAT_ERROR
},
3311 { "&[before 3]a<x",U_INVALID_FORMAT_ERROR
},
3312 { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR
},
3313 { "&[before 3]a<<<x",U_ZERO_ERROR
},
3314 { "&[before 3]a=x",U_INVALID_FORMAT_ERROR
},
3315 { "&[before I]a = x",U_INVALID_FORMAT_ERROR
}
3320 UErrorCode status
= U_ZERO_ERROR
;
3321 UChar rlz
[RULE_BUFFER_LEN
] = { 0 };
3324 UCollator
*coll
= NULL
;
3327 for(i
= 0; i
< UPRV_LENGTHOF(tests
); i
++) {
3328 rlen
= u_unescape(tests
[i
].rules
, rlz
, RULE_BUFFER_LEN
);
3329 coll
= ucol_openRules(rlz
, rlen
, UCOL_DEFAULT
, UCOL_DEFAULT
,NULL
, &status
);
3330 if(status
!= tests
[i
].expectedStatus
) {
3331 log_err_status(status
, "Opening a collator with rules %s returned error code %s, expected %s\n",
3332 tests
[i
].rules
, u_errorName(status
), u_errorName(tests
[i
].expectedStatus
));
3335 status
= U_ZERO_ERROR
;
3342 &[before 1] a < x <<< X << q <<< Q < z
3343 assert: m <<< M < x <<< X << q <<< Q < z < a < n
3346 &[before 2] a << x <<< X << q <<< Q < z
3347 assert: m <<< M < x <<< X << q <<< Q << a < z < n
3350 &[before 3] a <<< x <<< X << q <<< Q < z
3351 assert: m <<< M < x <<< X <<< a << q <<< Q < z < n
3355 &[before 1] a < x <<< X << q <<< Q < z
3356 assert: x <<< X << q <<< Q < z < m <<< M << a < n
3359 &[before 2] a << x <<< X << q <<< Q < z
3360 assert: m <<< M << x <<< X << q <<< Q << a < z < n
3363 &[before 3] a <<< x <<< X << q <<< Q < z
3364 assert: m <<< M << x <<< X <<< a << q <<< Q < z < n
3368 &[before 1] a < x <<< X << q <<< Q < z
3369 assert: x <<< X << q <<< Q < z < n < m <<< a <<< M
3372 &[before 2] a << x <<< X << q <<< Q < z
3373 assert: x <<< X << q <<< Q << m <<< a <<< M < z < n
3376 &[before 3] a <<< x <<< X << q <<< Q < z
3377 assert: m <<< x <<< X <<< a <<< M << q <<< Q < z < n
3380 &[before 1] s < x <<< X << q <<< Q < z
3381 assert: r <<< R < x <<< X << q <<< Q < z < s < n
3383 &[before 2] s << x <<< X << q <<< Q < z
3384 assert: r <<< R < x <<< X << q <<< Q << s < z < n
3386 &[before 3] s <<< x <<< X << q <<< Q < z
3387 assert: r <<< R < x <<< X <<< s << q <<< Q < z < n
3390 &[before 1] \u24DC < x <<< X << q <<< Q < z
3391 assert: x <<< X << q <<< Q < z < n < m <<< \u24DC <<< M
3393 &[before 2] \u24DC << x <<< X << q <<< Q < z
3394 assert: x <<< X << q <<< Q << m <<< \u24DC <<< M < z < n
3396 &[before 3] \u24DC <<< x <<< X << q <<< Q < z
3397 assert: m <<< x <<< X <<< \u24DC <<< M << q <<< Q < z < n
3402 /* requires features not yet supported */
3403 static void TestMoreBefore(void) {
3404 static const struct {
3406 const char* order
[16];
3409 { "&m < a &[before 1] a < x <<< X << q <<< Q < z",
3410 { "m","M","x","X","q","Q","z","a","n" }, 9},
3411 { "&m < a &[before 2] a << x <<< X << q <<< Q < z",
3412 { "m","M","x","X","q","Q","a","z","n" }, 9},
3413 { "&m < a &[before 3] a <<< x <<< X << q <<< Q < z",
3414 { "m","M","x","X","a","q","Q","z","n" }, 9},
3415 { "&m << a &[before 1] a < x <<< X << q <<< Q < z",
3416 { "x","X","q","Q","z","m","M","a","n" }, 9},
3417 { "&m << a &[before 2] a << x <<< X << q <<< Q < z",
3418 { "m","M","x","X","q","Q","a","z","n" }, 9},
3419 { "&m << a &[before 3] a <<< x <<< X << q <<< Q < z",
3420 { "m","M","x","X","a","q","Q","z","n" }, 9},
3421 { "&m <<< a &[before 1] a < x <<< X << q <<< Q < z",
3422 { "x","X","q","Q","z","n","m","a","M" }, 9},
3423 { "&m <<< a &[before 2] a << x <<< X << q <<< Q < z",
3424 { "x","X","q","Q","m","a","M","z","n" }, 9},
3425 { "&m <<< a &[before 3] a <<< x <<< X << q <<< Q < z",
3426 { "m","x","X","a","M","q","Q","z","n" }, 9},
3427 { "&[before 1] s < x <<< X << q <<< Q < z",
3428 { "r","R","x","X","q","Q","z","s","n" }, 9},
3429 { "&[before 2] s << x <<< X << q <<< Q < z",
3430 { "r","R","x","X","q","Q","s","z","n" }, 9},
3431 { "&[before 3] s <<< x <<< X << q <<< Q < z",
3432 { "r","R","x","X","s","q","Q","z","n" }, 9},
3433 { "&[before 1] \\u24DC < x <<< X << q <<< Q < z",
3434 { "x","X","q","Q","z","n","m","\\u24DC","M" }, 9},
3435 { "&[before 2] \\u24DC << x <<< X << q <<< Q < z",
3436 { "x","X","q","Q","m","\\u24DC","M","z","n" }, 9},
3437 { "&[before 3] \\u24DC <<< x <<< X << q <<< Q < z",
3438 { "m","x","X","\\u24DC","M","q","Q","z","n" }, 9}
3443 for(i
= 0; i
< UPRV_LENGTHOF(tests
); i
++) {
3444 genericRulesStarter(tests
[i
].rules
, tests
[i
].order
, tests
[i
].size
);
3449 static void TestTailorNULL( void ) {
3450 const static char* rule
= "&a <<< '\\u0000'";
3451 UErrorCode status
= U_ZERO_ERROR
;
3452 UChar rlz
[RULE_BUFFER_LEN
] = { 0 };
3454 UChar a
= 1, null
= 0;
3455 UCollationResult res
= UCOL_EQUAL
;
3457 UCollator
*coll
= NULL
;
3460 rlen
= u_unescape(rule
, rlz
, RULE_BUFFER_LEN
);
3461 coll
= ucol_openRules(rlz
, rlen
, UCOL_DEFAULT
, UCOL_DEFAULT
,NULL
, &status
);
3463 if(U_FAILURE(status
)) {
3464 log_err_status(status
, "Could not open default collator! -> %s\n", u_errorName(status
));
3466 res
= ucol_strcoll(coll
, &a
, 1, &null
, 1);
3468 if(res
!= UCOL_LESS
) {
3469 log_err("NULL was not tailored properly!\n");
3477 TestUpperFirstQuaternary(void)
3479 const char* tests
[] = { "B", "b", "Bb", "bB" };
3480 UColAttribute att
[] = { UCOL_STRENGTH
, UCOL_CASE_FIRST
};
3481 UColAttributeValue attVals
[] = { UCOL_QUATERNARY
, UCOL_UPPER_FIRST
};
3482 genericLocaleStarterWithOptions("root", tests
, UPRV_LENGTHOF(tests
), att
, attVals
, UPRV_LENGTHOF(att
));
3488 const char* tests
[] = { "\\u00e2T", "aT" };
3489 UColAttribute att
[] = { UCOL_STRENGTH
, UCOL_CASE_LEVEL
};
3490 UColAttributeValue attVals
[] = { UCOL_PRIMARY
, UCOL_ON
};
3491 const char* tests2
[] = { "a", "A" };
3492 const char* rule
= "&[first tertiary ignorable]=A=a";
3493 UColAttribute att2
[] = { UCOL_CASE_LEVEL
};
3494 UColAttributeValue attVals2
[] = { UCOL_ON
};
3495 /* Test whether we correctly ignore primary ignorables on case level when */
3496 /* we have only primary & case level */
3497 genericLocaleStarterWithOptionsAndResult("root", tests
, UPRV_LENGTHOF(tests
), att
, attVals
, UPRV_LENGTHOF(att
), UCOL_EQUAL
);
3498 /* Test whether ICU4J will make case level for sortkeys that have primary strength */
3499 /* and case level */
3500 genericLocaleStarterWithOptions("root", tests2
, UPRV_LENGTHOF(tests2
), att
, attVals
, UPRV_LENGTHOF(att
));
3501 /* Test whether completely ignorable letters have case level info (they shouldn't) */
3502 genericRulesStarterWithOptionsAndResult(rule
, tests2
, UPRV_LENGTHOF(tests2
), att2
, attVals2
, UPRV_LENGTHOF(att2
), UCOL_EQUAL
);
3508 static const char *test
= "this is a test string";
3510 int32_t ustr_length
= u_unescape(test
, ustr
, 256);
3511 unsigned char sortkey
[256];
3512 int32_t sortkey_length
;
3513 UErrorCode status
= U_ZERO_ERROR
;
3514 static UCollator
*coll
= NULL
;
3515 coll
= ucol_open("root", &status
);
3516 if(U_FAILURE(status
)) {
3517 log_err_status(status
, "Couldn't open UCA -> %s\n", u_errorName(status
));
3520 ucol_setStrength(coll
, UCOL_PRIMARY
);
3521 ucol_setAttribute(coll
, UCOL_STRENGTH
, UCOL_PRIMARY
, &status
);
3522 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
3523 if (U_FAILURE(status
)) {
3524 log_err("Failed setting atributes\n");
3527 sortkey_length
= ucol_getSortKey(coll
, ustr
, ustr_length
, NULL
, 0);
3528 if (sortkey_length
> 256) return;
3530 /* we mark the position where the null byte should be written in advance */
3531 sortkey
[sortkey_length
-1] = 0xAA;
3533 /* we set the buffer size one byte higher than needed */
3534 sortkey_length
= ucol_getSortKey(coll
, ustr
, ustr_length
, sortkey
,
3537 /* no error occurs (for me) */
3538 if (sortkey
[sortkey_length
-1] == 0xAA) {
3539 log_err("Hit bug at first try\n");
3542 /* we mark the position where the null byte should be written again */
3543 sortkey
[sortkey_length
-1] = 0xAA;
3545 /* this time we set the buffer size to the exact amount needed */
3546 sortkey_length
= ucol_getSortKey(coll
, ustr
, ustr_length
, sortkey
,
3549 /* now the trailing null byte is not written */
3550 if (sortkey
[sortkey_length
-1] == 0xAA) {
3551 log_err("Hit bug at second try\n");
3557 /* Regression test for Thai partial sort key problem */
3561 const static char *test
[] = {
3562 "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e47\\u0e21",
3563 "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e48\\u0e21"
3566 genericLocaleStarter("th", test
, UPRV_LENGTHOF(test
));
3572 const static char *test
[] = { "a", "y" };
3573 const char* rules
= "&Ny << Y &[first secondary ignorable] <<< a";
3574 genericRulesStarter(rules
, test
, UPRV_LENGTHOF(test
));
3580 UErrorCode status
= U_ZERO_ERROR
;
3582 UCollator
*coll
=NULL
;
3583 uint8_t resColl
[100], expColl
[100];
3584 int32_t rLen
, tLen
, ruleLen
, sLen
, kLen
;
3585 UChar rule
[256]={0x26, 0x62, 0x3c, 0x1FF3, 0}; /* &b<0x1FF3-omega with Ypogegrammeni*/
3586 UChar rule2
[256]={0x26, 0x7a, 0x3c, 0x0161, 0}; /* &z<s with caron*/
3588 * Note: Just tailoring &z<ae^ does not work as expected:
3589 * The UCA spec requires for discontiguous contractions that they
3590 * extend an *existing match* by one combining mark at a time.
3591 * Therefore, ae must be a contraction so that the builder finds
3592 * discontiguous contractions for ae^, for example with an intervening underdot.
3593 * Only then do we get the expected tail closure with a\u1EC7, a\u1EB9\u0302, etc.
3596 0x26, 0x78, 0x3c, 0x61, 0x65, /* &x<ae */
3597 0x26, 0x7a, 0x3c, 0x0061, 0x00ea, /* &z<a+e with circumflex.*/
3599 static const UChar tData
[][20]={
3601 {0x0041, 0x0323, 0x0302, 0},
3602 {0x1EA0, 0x0302, 0},
3603 {0x00C2, 0x0323, 0},
3604 {0x1ED8, 0}, /* O with dot and circumflex */
3605 {0x1ECC, 0x0302, 0},
3607 {0x1EA1, 0x0306, 0},
3609 static const UChar tailorData
[][20]={
3610 {0x1FA2, 0}, /* Omega with 3 combining marks */
3611 {0x03C9, 0x0313, 0x0300, 0x0345, 0},
3612 {0x1FF3, 0x0313, 0x0300, 0},
3613 {0x1F60, 0x0300, 0x0345, 0},
3614 {0x1F62, 0x0345, 0},
3615 {0x1FA0, 0x0300, 0},
3617 static const UChar tailorData2
[][20]={
3618 {0x1E63, 0x030C, 0}, /* s with dot below + caron */
3619 {0x0073, 0x0323, 0x030C, 0},
3620 {0x0073, 0x030C, 0x0323, 0},
3622 static const UChar tailorData3
[][20]={
3623 {0x007a, 0}, /* z */
3624 {0x0061, 0x0065, 0}, /* a + e */
3625 {0x0061, 0x00ea, 0}, /* a + e with circumflex */
3626 {0x0061, 0x1EC7, 0}, /* a+ e with dot below and circumflex */
3627 {0x0061, 0x1EB9, 0x0302, 0}, /* a + e with dot below + combining circumflex */
3628 {0x0061, 0x00EA, 0x0323, 0}, /* a + e with circumflex + combining dot below */
3629 {0x00EA, 0x0323, 0}, /* e with circumflex + combining dot below */
3630 {0x00EA, 0}, /* e with circumflex */
3633 /* Test Vietnamese sort. */
3634 coll
= ucol_open("vi", &status
);
3635 if(U_FAILURE(status
)) {
3636 log_err_status(status
, "Couldn't open collator -> %s\n", u_errorName(status
));
3639 log_verbose("\n\nVI collation:");
3640 if ( !ucol_equal(coll
, tData
[0], u_strlen(tData
[0]), tData
[2], u_strlen(tData
[2])) ) {
3641 log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
3643 if ( !ucol_equal(coll
, tData
[0], u_strlen(tData
[0]), tData
[3], u_strlen(tData
[3])) ) {
3644 log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
3646 if ( !ucol_equal(coll
, tData
[5], u_strlen(tData
[5]), tData
[4], u_strlen(tData
[4])) ) {
3647 log_err("\\u1ED8 not equals to \\u1ECC+\\u0302\n");
3649 if ( !ucol_equal(coll
, tData
[7], u_strlen(tData
[7]), tData
[6], u_strlen(tData
[6])) ) {
3650 log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
3653 for (j
=0; j
<8; j
++) {
3654 tLen
= u_strlen(tData
[j
]);
3655 log_verbose("\n Data :%s \tlen: %d key: ", tData
[j
], tLen
);
3656 rLen
= ucol_getSortKey(coll
, tData
[j
], tLen
, resColl
, 100);
3657 for(i
= 0; i
<rLen
; i
++) {
3658 log_verbose(" %02X", resColl
[i
]);
3664 /* Test Romanian sort. */
3665 coll
= ucol_open("ro", &status
);
3666 log_verbose("\n\nRO collation:");
3667 if ( !ucol_equal(coll
, tData
[0], u_strlen(tData
[0]), tData
[1], u_strlen(tData
[1])) ) {
3668 log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
3670 if ( !ucol_equal(coll
, tData
[4], u_strlen(tData
[4]), tData
[5], u_strlen(tData
[5])) ) {
3671 log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
3673 if ( !ucol_equal(coll
, tData
[6], u_strlen(tData
[6]), tData
[7], u_strlen(tData
[7])) ) {
3674 log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
3677 for (j
=4; j
<8; j
++) {
3678 tLen
= u_strlen(tData
[j
]);
3679 log_verbose("\n Data :%s \tlen: %d key: ", tData
[j
], tLen
);
3680 rLen
= ucol_getSortKey(coll
, tData
[j
], tLen
, resColl
, 100);
3681 for(i
= 0; i
<rLen
; i
++) {
3682 log_verbose(" %02X", resColl
[i
]);
3687 /* Test the precomposed Greek character with 3 combining marks. */
3688 log_verbose("\n\nTailoring test: Greek character with 3 combining marks");
3689 ruleLen
= u_strlen(rule
);
3690 coll
= ucol_openRules(rule
, ruleLen
, UCOL_OFF
, UCOL_TERTIARY
, NULL
,&status
);
3691 if (U_FAILURE(status
)) {
3692 log_err("ucol_openRules failed with %s\n", u_errorName(status
));
3695 sLen
= u_strlen(tailorData
[0]);
3696 for (j
=1; j
<6; j
++) {
3697 tLen
= u_strlen(tailorData
[j
]);
3698 if ( !ucol_equal(coll
, tailorData
[0], sLen
, tailorData
[j
], tLen
)) {
3699 log_err("\n \\u1FA2 not equals to data[%d]:%s\n", j
, tailorData
[j
]);
3702 /* Test getSortKey. */
3703 tLen
= u_strlen(tailorData
[0]);
3704 kLen
=ucol_getSortKey(coll
, tailorData
[0], tLen
, expColl
, 100);
3705 for (j
=0; j
<6; j
++) {
3706 tLen
= u_strlen(tailorData
[j
]);
3707 rLen
= ucol_getSortKey(coll
, tailorData
[j
], tLen
, resColl
, 100);
3708 if ( kLen
!=rLen
|| uprv_memcmp(expColl
, resColl
, rLen
*sizeof(uint8_t))!=0 ) {
3709 log_err("\n Data[%d] :%s \tlen: %d key: ", j
, tailorData
[j
], tLen
);
3710 for(i
= 0; i
<rLen
; i
++) {
3711 log_err(" %02X", resColl
[i
]);
3717 log_verbose("\n\nTailoring test for s with caron:");
3718 ruleLen
= u_strlen(rule2
);
3719 coll
= ucol_openRules(rule2
, ruleLen
, UCOL_OFF
, UCOL_TERTIARY
, NULL
,&status
);
3720 tLen
= u_strlen(tailorData2
[0]);
3721 kLen
=ucol_getSortKey(coll
, tailorData2
[0], tLen
, expColl
, 100);
3722 for (j
=1; j
<3; j
++) {
3723 tLen
= u_strlen(tailorData2
[j
]);
3724 rLen
= ucol_getSortKey(coll
, tailorData2
[j
], tLen
, resColl
, 100);
3725 if ( kLen
!=rLen
|| uprv_memcmp(expColl
, resColl
, rLen
*sizeof(uint8_t))!=0 ) {
3726 log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j
, tailorData
[j
], tLen
);
3727 for(i
= 0; i
<rLen
; i
++) {
3728 log_err(" %02X", resColl
[i
]);
3734 log_verbose("\n\nTailoring test for &z< ae with circumflex:");
3735 ruleLen
= u_strlen(rule3
);
3736 coll
= ucol_openRules(rule3
, ruleLen
, UCOL_OFF
, UCOL_TERTIARY
, NULL
,&status
);
3737 tLen
= u_strlen(tailorData3
[3]);
3738 kLen
=ucol_getSortKey(coll
, tailorData3
[3], tLen
, expColl
, 100);
3739 log_verbose("\n Test Data[3] :%s \tlen: %d key: ", aescstrdup(tailorData3
[3], tLen
), tLen
);
3740 for(i
= 0; i
<kLen
; i
++) {
3741 log_verbose(" %02X", expColl
[i
]);
3743 for (j
=4; j
<6; j
++) {
3744 tLen
= u_strlen(tailorData3
[j
]);
3745 rLen
= ucol_getSortKey(coll
, tailorData3
[j
], tLen
, resColl
, 100);
3747 if ( kLen
!=rLen
|| uprv_memcmp(expColl
, resColl
, rLen
*sizeof(uint8_t))!=0 ) {
3748 log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j
, aescstrdup(tailorData3
[j
], tLen
), tLen
);
3749 for(i
= 0; i
<rLen
; i
++) {
3750 log_err(" %02X", resColl
[i
]);
3754 log_verbose("\n Test Data[%d] :%s \tlen: %d key: ", j
, aescstrdup(tailorData3
[j
], tLen
), tLen
);
3755 for(i
= 0; i
<rLen
; i
++) {
3756 log_verbose(" %02X", resColl
[i
]);
3763 TestTailor6179(void)
3765 UErrorCode status
= U_ZERO_ERROR
;
3767 UCollator
*coll
=NULL
;
3768 uint8_t resColl
[100];
3769 int32_t rLen
, tLen
, ruleLen
;
3770 /* &[last primary ignorable]<< a &[first primary ignorable]<<b */
3771 static const UChar rule1
[]={
3772 0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,
3773 0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x20,0x61,0x20,
3774 0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,0x20,
3775 0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x62,0x20, 0};
3776 /* &[last secondary ignorable]<<< a &[first secondary ignorable]<<<b */
3777 static const UChar rule2
[]={
3778 0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,0x64,0x61,
3779 0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x3C,
3780 0x61,0x20,0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,
3781 0x64,0x61,0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,
3782 0x3C,0x3C,0x20,0x62,0};
3784 static const UChar tData1
[][4]={
3789 static const UChar tData2
[][4]={
3796 * These values from FractionalUCA.txt will change,
3797 * and need to be updated here.
3798 * TODO: Make this not check for particular sort keys.
3799 * Instead, test that we get CEs before & after other ignorables; see ticket #6179.
3801 static const uint8_t firstPrimaryIgnCE
[]={1, 0x83, 1, 5, 0};
3802 static const uint8_t lastPrimaryIgnCE
[]={1, 0xFC, 1, 5, 0};
3803 static const uint8_t firstSecondaryIgnCE
[]={1, 1, 0xfe, 0};
3804 static const uint8_t lastSecondaryIgnCE
[]={1, 1, 0xff, 0};
3806 UParseError parseError
;
3808 /* Test [Last Primary ignorable] */
3810 log_verbose("Tailoring test: &[last primary ignorable]<<a &[first primary ignorable]<<b\n");
3811 ruleLen
= u_strlen(rule1
);
3812 coll
= ucol_openRules(rule1
, ruleLen
, UCOL_OFF
, UCOL_TERTIARY
, NULL
,&status
);
3813 if (U_FAILURE(status
)) {
3814 log_err_status(status
, "Tailoring test: &[last primary ignorable] failed! -> %s\n", u_errorName(status
));
3817 tLen
= u_strlen(tData1
[0]);
3818 rLen
= ucol_getSortKey(coll
, tData1
[0], tLen
, resColl
, 100);
3819 if (rLen
!= UPRV_LENGTHOF(lastPrimaryIgnCE
) || uprv_memcmp(resColl
, lastPrimaryIgnCE
, rLen
) != 0) {
3820 log_err("Bad result for &[lpi]<<a...: Data[%d] :%s \tlen: %d key: ", 0, tData1
[0], rLen
);
3821 for(i
= 0; i
<rLen
; i
++) {
3822 log_err(" %02X", resColl
[i
]);
3826 tLen
= u_strlen(tData1
[1]);
3827 rLen
= ucol_getSortKey(coll
, tData1
[1], tLen
, resColl
, 100);
3828 if (rLen
!= UPRV_LENGTHOF(firstPrimaryIgnCE
) || uprv_memcmp(resColl
, firstPrimaryIgnCE
, rLen
) != 0) {
3829 log_err("Bad result for &[lpi]<<a...: Data[%d] :%s \tlen: %d key: ", 1, tData1
[1], rLen
);
3830 for(i
= 0; i
<rLen
; i
++) {
3831 log_err(" %02X", resColl
[i
]);
3838 /* Test [Last Secondary ignorable] */
3839 log_verbose("Tailoring test: &[last secondary ignorable]<<<a &[first secondary ignorable]<<<b\n");
3840 ruleLen
= u_strlen(rule2
);
3841 coll
= ucol_openRules(rule2
, ruleLen
, UCOL_OFF
, UCOL_TERTIARY
, &parseError
, &status
);
3842 if (U_FAILURE(status
)) {
3843 log_err("Tailoring test: &[last secondary ignorable] failed! -> %s\n", u_errorName(status
));
3844 log_info(" offset=%d \"%s\" | \"%s\"\n",
3845 parseError
.offset
, aescstrdup(parseError
.preContext
, -1), aescstrdup(parseError
.postContext
, -1));
3848 tLen
= u_strlen(tData2
[0]);
3849 rLen
= ucol_getSortKey(coll
, tData2
[0], tLen
, resColl
, 100);
3850 if (rLen
!= UPRV_LENGTHOF(lastSecondaryIgnCE
) || uprv_memcmp(resColl
, lastSecondaryIgnCE
, rLen
) != 0) {
3851 log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s \tlen: %d key: ", 0, tData2
[0], rLen
);
3852 for(i
= 0; i
<rLen
; i
++) {
3853 log_err(" %02X", resColl
[i
]);
3857 tLen
= u_strlen(tData2
[1]);
3858 rLen
= ucol_getSortKey(coll
, tData2
[1], tLen
, resColl
, 100);
3859 if (rLen
!= UPRV_LENGTHOF(firstSecondaryIgnCE
) || uprv_memcmp(resColl
, firstSecondaryIgnCE
, rLen
) != 0) {
3860 log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s \tlen: %d key: ", 1, tData2
[1], rLen
);
3861 for(i
= 0; i
<rLen
; i
++) {
3862 log_err(" %02X", resColl
[i
]);
3870 TestUCAPrecontext(void)
3872 UErrorCode status
= U_ZERO_ERROR
;
3874 UCollator
*coll
=NULL
;
3875 uint8_t resColl
[100], prevColl
[100];
3876 int32_t rLen
, tLen
, ruleLen
;
3877 UChar rule1
[256]= {0x26, 0xb7, 0x3c, 0x61, 0}; /* & middle-dot < a */
3878 UChar rule2
[256]= {0x26, 0x4C, 0xb7, 0x3c, 0x3c, 0x61, 0};
3879 /* & l middle-dot << a a is an expansion. */
3881 UChar tData1
[][20]={
3882 { 0xb7, 0}, /* standalone middle dot(0xb7) */
3883 { 0x387, 0}, /* standalone middle dot(0x387) */
3886 { 0x4C, 0x0332, 0}, /* l with [first primary ignorable] */
3887 { 0x6C, 0xb7, 0}, /* l with middle dot(0xb7) */
3888 { 0x6C, 0x387, 0}, /* l with middle dot(0x387) */
3889 { 0x4C, 0xb7, 0}, /* L with middle dot(0xb7) */
3890 { 0x4C, 0x387, 0}, /* L with middle dot(0x387) */
3891 { 0x6C, 0x61, 0x387, 0}, /* la with middle dot(0x387) */
3892 { 0x4C, 0x61, 0xb7, 0}, /* La with middle dot(0xb7) */
3895 log_verbose("\n\nEN collation:");
3896 coll
= ucol_open("en", &status
);
3897 if (U_FAILURE(status
)) {
3898 log_err_status(status
, "Tailoring test: &z <<a|- failed! -> %s\n", u_errorName(status
));
3901 for (j
=0; j
<11; j
++) {
3902 tLen
= u_strlen(tData1
[j
]);
3903 rLen
= ucol_getSortKey(coll
, tData1
[j
], tLen
, resColl
, 100);
3904 if ((j
>0) && (strcmp((char *)resColl
, (char *)prevColl
)<0)) {
3905 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3908 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j
, tData1
[j
], rLen
);
3909 for(i
= 0; i
<rLen
; i
++) {
3910 log_verbose(" %02X", resColl
[i
]);
3912 uprv_memcpy(prevColl
, resColl
, sizeof(uint8_t)*(rLen
+1));
3917 log_verbose("\n\nJA collation:");
3918 coll
= ucol_open("ja", &status
);
3919 if (U_FAILURE(status
)) {
3920 log_err("Tailoring test: &z <<a|- failed!");
3923 for (j
=0; j
<11; j
++) {
3924 tLen
= u_strlen(tData1
[j
]);
3925 rLen
= ucol_getSortKey(coll
, tData1
[j
], tLen
, resColl
, 100);
3926 if ((j
>0) && (strcmp((char *)resColl
, (char *)prevColl
)<0)) {
3927 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3930 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j
, tData1
[j
], rLen
);
3931 for(i
= 0; i
<rLen
; i
++) {
3932 log_verbose(" %02X", resColl
[i
]);
3934 uprv_memcpy(prevColl
, resColl
, sizeof(uint8_t)*(rLen
+1));
3939 log_verbose("\n\nTailoring test: & middle dot < a ");
3940 ruleLen
= u_strlen(rule1
);
3941 coll
= ucol_openRules(rule1
, ruleLen
, UCOL_OFF
, UCOL_TERTIARY
, NULL
,&status
);
3942 if (U_FAILURE(status
)) {
3943 log_err("Tailoring test: & middle dot < a failed!");
3946 for (j
=0; j
<11; j
++) {
3947 tLen
= u_strlen(tData1
[j
]);
3948 rLen
= ucol_getSortKey(coll
, tData1
[j
], tLen
, resColl
, 100);
3949 if ((j
>0) && (strcmp((char *)resColl
, (char *)prevColl
)<0)) {
3950 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3953 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j
, tData1
[j
], rLen
);
3954 for(i
= 0; i
<rLen
; i
++) {
3955 log_verbose(" %02X", resColl
[i
]);
3957 uprv_memcpy(prevColl
, resColl
, sizeof(uint8_t)*(rLen
+1));
3962 log_verbose("\n\nTailoring test: & l middle-dot << a ");
3963 ruleLen
= u_strlen(rule2
);
3964 coll
= ucol_openRules(rule2
, ruleLen
, UCOL_OFF
, UCOL_TERTIARY
, NULL
,&status
);
3965 if (U_FAILURE(status
)) {
3966 log_err("Tailoring test: & l middle-dot << a failed!");
3969 for (j
=0; j
<11; j
++) {
3970 tLen
= u_strlen(tData1
[j
]);
3971 rLen
= ucol_getSortKey(coll
, tData1
[j
], tLen
, resColl
, 100);
3972 if ((j
>0) && (j
!=3) && (strcmp((char *)resColl
, (char *)prevColl
)<0)) {
3973 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3976 if ((j
==3)&&(strcmp((char *)resColl
, (char *)prevColl
)>0)) {
3977 log_err("\n Expecting smaller key than previous test case: Data[%d] :%s.",
3980 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j
, tData1
[j
], rLen
);
3981 for(i
= 0; i
<rLen
; i
++) {
3982 log_verbose(" %02X", resColl
[i
]);
3984 uprv_memcpy(prevColl
, resColl
, sizeof(uint8_t)*(rLen
+1));
3990 TestOutOfBuffer5468(void)
3992 static const char *test
= "\\u4e00";
3994 int32_t ustr_length
= u_unescape(test
, ustr
, 256);
3995 unsigned char shortKeyBuf
[1];
3996 int32_t sortkey_length
;
3997 UErrorCode status
= U_ZERO_ERROR
;
3998 static UCollator
*coll
= NULL
;
4000 coll
= ucol_open("root", &status
);
4001 if(U_FAILURE(status
)) {
4002 log_err_status(status
, "Couldn't open UCA -> %s\n", u_errorName(status
));
4005 ucol_setStrength(coll
, UCOL_PRIMARY
);
4006 ucol_setAttribute(coll
, UCOL_STRENGTH
, UCOL_PRIMARY
, &status
);
4007 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
4008 if (U_FAILURE(status
)) {
4009 log_err("Failed setting atributes\n");
4013 sortkey_length
= ucol_getSortKey(coll
, ustr
, ustr_length
, shortKeyBuf
, sizeof(shortKeyBuf
));
4014 if (sortkey_length
!= 4) {
4015 log_err("expecting length of sortKey is 4 got:%d ", sortkey_length
);
4017 log_verbose("length of sortKey is %d", sortkey_length
);
4021 #define TSKC_DATA_SIZE 5
4022 #define TSKC_BUF_SIZE 50
4024 TestSortKeyConsistency(void)
4026 UErrorCode icuRC
= U_ZERO_ERROR
;
4028 UChar data
[] = { 0xFFFD, 0x0006, 0x0006, 0x0006, 0xFFFD};
4030 uint8_t bufFull
[TSKC_DATA_SIZE
][TSKC_BUF_SIZE
];
4031 uint8_t bufPart
[TSKC_DATA_SIZE
][TSKC_BUF_SIZE
];
4034 ucol
= ucol_openFromShortString("LEN_S4", FALSE
, NULL
, &icuRC
);
4035 if (U_FAILURE(icuRC
))
4037 log_err_status(icuRC
, "ucol_openFromShortString failed -> %s\n", u_errorName(icuRC
));
4041 for (i
= 0; i
< TSKC_DATA_SIZE
; i
++)
4043 UCharIterator uiter
;
4044 uint32_t state
[2] = { 0, 0 };
4045 int32_t dataLen
= i
+1;
4046 for (j
=0; j
<TSKC_BUF_SIZE
; j
++)
4047 bufFull
[i
][j
] = bufPart
[i
][j
] = 0;
4050 ucol_getSortKey(ucol
, data
, dataLen
, bufFull
[i
], TSKC_BUF_SIZE
);
4052 /* Partial sort key */
4053 uiter_setString(&uiter
, data
, dataLen
);
4054 ucol_nextSortKeyPart(ucol
, &uiter
, state
, bufPart
[i
], TSKC_BUF_SIZE
, &icuRC
);
4055 if (U_FAILURE(icuRC
))
4057 log_err("ucol_nextSortKeyPart failed\n");
4062 for (i2
=0; i2
<i
; i2
++)
4064 UBool fullMatch
= TRUE
;
4065 UBool partMatch
= TRUE
;
4066 for (j
=0; j
<TSKC_BUF_SIZE
; j
++)
4068 fullMatch
= fullMatch
&& (bufFull
[i
][j
] != bufFull
[i2
][j
]);
4069 partMatch
= partMatch
&& (bufPart
[i
][j
] != bufPart
[i2
][j
]);
4071 if (fullMatch
!= partMatch
) {
4072 log_err(fullMatch
? "full key was consistent, but partial key changed\n"
4073 : "partial key was consistent, but full key changed\n");
4080 /*=============================================*/
4085 static void TestCroatianSortKey(void) {
4086 const char* collString
= "LHR_AN_CX_EX_FX_HX_NX_S3";
4087 UErrorCode status
= U_ZERO_ERROR
;
4091 static const UChar text
[] = { 0x0044, 0xD81A };
4093 size_t length
= UPRV_LENGTHOF(text
);
4095 uint8_t textSortKey
[32];
4096 size_t lenSortKey
= 32;
4097 size_t actualSortKeyLen
;
4098 uint32_t uStateInfo
[2] = { 0, 0 };
4100 ucol
= ucol_openFromShortString(collString
, FALSE
, NULL
, &status
);
4101 if (U_FAILURE(status
)) {
4102 log_err_status(status
, "ucol_openFromShortString error in Craotian test. -> %s\n", u_errorName(status
));
4106 uiter_setString(&iter
, text
, length
);
4108 actualSortKeyLen
= ucol_nextSortKeyPart(
4109 ucol
, &iter
, (uint32_t*)uStateInfo
,
4110 textSortKey
, lenSortKey
, &status
4113 if (actualSortKeyLen
== lenSortKey
) {
4114 log_err("ucol_nextSortKeyPart did not give correct result in Croatian test.\n");
4121 /* This test ensures that codepoints such as 0x3099 are flagged correctly by the collator since
4122 * they are both Hiragana and Katakana
4124 #define SORTKEYLEN 50
4125 static void TestHiragana(void) {
4126 UErrorCode status
= U_ZERO_ERROR
;
4128 UCollationResult strcollresult
;
4129 UChar data1
[] = { 0x3058, 0x30B8 }; /* Hiragana and Katakana letter Zi */
4130 UChar data2
[] = { 0x3057, 0x3099, 0x30B7, 0x3099 };
4131 int32_t data1Len
= UPRV_LENGTHOF(data1
);
4132 int32_t data2Len
= UPRV_LENGTHOF(data2
);
4134 uint8_t sortKey1
[SORTKEYLEN
];
4135 uint8_t sortKey2
[SORTKEYLEN
];
4137 UCharIterator uiter1
;
4138 UCharIterator uiter2
;
4139 uint32_t state1
[2] = { 0, 0 };
4140 uint32_t state2
[2] = { 0, 0 };
4144 ucol
= ucol_openFromShortString("LJA_AN_CX_EX_FX_HO_NX_S4", FALSE
, NULL
,
4146 if (U_FAILURE(status
)) {
4147 log_err_status(status
, "Error status: %s; Unable to open collator from short string.\n", u_errorName(status
));
4151 /* Start of full sort keys */
4152 /* Full sort key1 */
4153 keySize1
= ucol_getSortKey(ucol
, data1
, data1Len
, sortKey1
, SORTKEYLEN
);
4154 /* Full sort key2 */
4155 keySize2
= ucol_getSortKey(ucol
, data2
, data2Len
, sortKey2
, SORTKEYLEN
);
4156 if (keySize1
== keySize2
) {
4157 for (i
= 0; i
< keySize1
; i
++) {
4158 if (sortKey1
[i
] != sortKey2
[i
]) {
4159 log_err("Full sort keys are different. Should be equal.");
4163 log_err("Full sort keys sizes doesn't match: %d %d", keySize1
, keySize2
);
4165 /* End of full sort keys */
4167 /* Start of partial sort keys */
4168 /* Partial sort key1 */
4169 uiter_setString(&uiter1
, data1
, data1Len
);
4170 keySize1
= ucol_nextSortKeyPart(ucol
, &uiter1
, state1
, sortKey1
, SORTKEYLEN
, &status
);
4171 /* Partial sort key2 */
4172 uiter_setString(&uiter2
, data2
, data2Len
);
4173 keySize2
= ucol_nextSortKeyPart(ucol
, &uiter2
, state2
, sortKey2
, SORTKEYLEN
, &status
);
4174 if (U_SUCCESS(status
) && keySize1
== keySize2
) {
4175 for (j
= 0; j
< keySize1
; j
++) {
4176 if (sortKey1
[j
] != sortKey2
[j
]) {
4177 log_err("Partial sort keys are different. Should be equal");
4181 log_err("Error Status: %s or Partial sort keys sizes doesn't match: %d %d", u_errorName(status
), keySize1
, keySize2
);
4183 /* End of partial sort keys */
4185 /* Start of strcoll */
4186 /* Use ucol_strcoll() to determine ordering */
4187 strcollresult
= ucol_strcoll(ucol
, data1
, data1Len
, data2
, data2Len
);
4188 if (strcollresult
!= UCOL_EQUAL
) {
4189 log_err("Result from ucol_strcoll() should be UCOL_EQUAL.");
4195 /* Convenient struct for running collation tests */
4197 const UChar source
[MAX_TOKEN_LEN
]; /* String on left */
4198 const UChar target
[MAX_TOKEN_LEN
]; /* String on right */
4199 UCollationResult result
; /* -1, 0 or +1, depending on collation */
4203 * Utility function to test one collation test case.
4204 * @param testcases Array of test cases.
4205 * @param n_testcases Size of the array testcases.
4206 * @param str_rules Array of rules. These rules should be specifying the same rule in different formats.
4207 * @param n_rules Size of the array str_rules.
4209 static void doTestOneTestCase(const OneTestCase testcases
[],
4211 const char* str_rules
[],
4214 int rule_no
, testcase_no
;
4217 UErrorCode status
= U_ZERO_ERROR
;
4218 UParseError parse_error
;
4219 UCollator
*myCollation
;
4221 for (rule_no
= 0; rule_no
< n_rules
; ++rule_no
) {
4223 length
= u_unescape(str_rules
[rule_no
], rule
, 500);
4225 log_err("ERROR: The rule cannot be unescaped: %s\n");
4228 myCollation
= ucol_openRules(rule
, length
, UCOL_ON
, UCOL_TERTIARY
, &parse_error
, &status
);
4229 if(U_FAILURE(status
)){
4230 log_err_status(status
, "ERROR: in creation of rule based collator: %s\n", myErrorName(status
));
4231 log_info(" offset=%d \"%s\" | \"%s\"\n",
4233 aescstrdup(parse_error
.preContext
, -1),
4234 aescstrdup(parse_error
.postContext
, -1));
4237 log_verbose("Testing the <<* syntax\n");
4238 ucol_setAttribute(myCollation
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
4239 ucol_setStrength(myCollation
, UCOL_TERTIARY
);
4240 for (testcase_no
= 0; testcase_no
< n_testcases
; ++testcase_no
) {
4242 testcases
[testcase_no
].source
,
4243 testcases
[testcase_no
].target
,
4244 testcases
[testcase_no
].result
4247 ucol_close(myCollation
);
4251 const static OneTestCase rangeTestcases
[] = {
4252 { {0x0061}, {0x0062}, UCOL_LESS
}, /* "a" < "b" */
4253 { {0x0062}, {0x0063}, UCOL_LESS
}, /* "b" < "c" */
4254 { {0x0061}, {0x0063}, UCOL_LESS
}, /* "a" < "c" */
4256 { {0x0062}, {0x006b}, UCOL_LESS
}, /* "b" << "k" */
4257 { {0x006b}, {0x006c}, UCOL_LESS
}, /* "k" << "l" */
4258 { {0x0062}, {0x006c}, UCOL_LESS
}, /* "b" << "l" */
4259 { {0x0061}, {0x006c}, UCOL_LESS
}, /* "a" < "l" */
4260 { {0x0061}, {0x006d}, UCOL_LESS
}, /* "a" < "m" */
4262 { {0x0079}, {0x006d}, UCOL_LESS
}, /* "y" < "f" */
4263 { {0x0079}, {0x0067}, UCOL_LESS
}, /* "y" < "g" */
4264 { {0x0061}, {0x0068}, UCOL_LESS
}, /* "y" < "h" */
4265 { {0x0061}, {0x0065}, UCOL_LESS
}, /* "g" < "e" */
4267 { {0x0061}, {0x0031}, UCOL_EQUAL
}, /* "a" = "1" */
4268 { {0x0061}, {0x0032}, UCOL_EQUAL
}, /* "a" = "2" */
4269 { {0x0061}, {0x0033}, UCOL_EQUAL
}, /* "a" = "3" */
4270 { {0x0061}, {0x0066}, UCOL_LESS
}, /* "a" < "f" */
4271 { {0x006c, 0x0061}, {0x006b, 0x0062}, UCOL_LESS
}, /* "la" < "123" */
4272 { {0x0061, 0x0061, 0x0061}, {0x0031, 0x0032, 0x0033}, UCOL_EQUAL
}, /* "aaa" = "123" */
4273 { {0x0062}, {0x007a}, UCOL_LESS
}, /* "b" < "z" */
4274 { {0x0061, 0x007a, 0x0062}, {0x0032, 0x0079, 0x006d}, UCOL_LESS
}, /* "azm" = "2yc" */
4277 static int nRangeTestcases
= UPRV_LENGTHOF(rangeTestcases
);
4279 const static OneTestCase rangeTestcasesSupplemental
[] = {
4280 { {0x4e00}, {0xfffb}, UCOL_LESS
}, /* U+4E00 < U+FFFB */
4281 { {0xfffb}, {0xd800, 0xdc00}, UCOL_LESS
}, /* U+FFFB < U+10000 */
4282 { {0xd800, 0xdc00}, {0xd800, 0xdc01}, UCOL_LESS
}, /* U+10000 < U+10001 */
4283 { {0x4e00}, {0xd800, 0xdc01}, UCOL_LESS
}, /* U+4E00 < U+10001 */
4284 { {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL_LESS
}, /* U+10000 < U+10001 */
4285 { {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL_LESS
}, /* U+10000 < U+10001 */
4286 { {0x4e00}, {0xd800, 0xdc02}, UCOL_LESS
}, /* U+4E00 < U+10001 */
4289 static int nRangeTestcasesSupplemental
= UPRV_LENGTHOF(rangeTestcasesSupplemental
);
4291 const static OneTestCase rangeTestcasesQwerty
[] = {
4292 { {0x0071}, {0x0077}, UCOL_LESS
}, /* "q" < "w" */
4293 { {0x0077}, {0x0065}, UCOL_LESS
}, /* "w" < "e" */
4295 { {0x0079}, {0x0075}, UCOL_LESS
}, /* "y" < "u" */
4296 { {0x0071}, {0x0075}, UCOL_LESS
}, /* "q" << "u" */
4298 { {0x0074}, {0x0069}, UCOL_LESS
}, /* "t" << "i" */
4299 { {0x006f}, {0x0070}, UCOL_LESS
}, /* "o" << "p" */
4301 { {0x0079}, {0x0065}, UCOL_LESS
}, /* "y" < "e" */
4302 { {0x0069}, {0x0075}, UCOL_LESS
}, /* "i" < "u" */
4304 { {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},
4305 {0x0077, 0x0065, 0x0072, 0x0065}, UCOL_LESS
}, /* "quest" < "were" */
4306 { {0x0071, 0x0075, 0x0061, 0x0063, 0x006b},
4307 {0x0071, 0x0075, 0x0065, 0x0073, 0x0074}, UCOL_LESS
}, /* "quack" < "quest" */
4310 static int nRangeTestcasesQwerty
= UPRV_LENGTHOF(rangeTestcasesQwerty
);
4312 static void TestSameStrengthList(void)
4314 const char* strRules
[] = {
4316 "&a<b<c<d &b<<k<<l<<m &k<<<x<<<y<<<z &y<f<g<h<e &a=1=2=3",
4319 "&a<*bcd &b<<*klm &k<<<*xyz &y<*fghe &a=*123",
4321 doTestOneTestCase(rangeTestcases
, nRangeTestcases
, strRules
, UPRV_LENGTHOF(strRules
));
4324 static void TestSameStrengthListQuoted(void)
4326 const char* strRules
[] = {
4327 /* Lists with quoted characters */
4328 "&\\u0061<*bcd &b<<*klm &k<<<*xyz &y<*f\\u0067\\u0068e &a=*123",
4329 "&'\\u0061'<*bcd &b<<*klm &k<<<*xyz &y<*f'\\u0067\\u0068'e &a=*123",
4331 "&\\u0061<*b\\u0063d &b<<*klm &k<<<*xyz &\\u0079<*fgh\\u0065 &a=*\\u0031\\u0032\\u0033",
4332 "&'\\u0061'<*b'\\u0063'd &b<<*klm &k<<<*xyz &'\\u0079'<*fgh'\\u0065' &a=*'\\u0031\\u0032\\u0033'",
4334 "&\\u0061<*\\u0062c\\u0064 &b<<*klm &k<<<*xyz &y<*fghe &a=*\\u0031\\u0032\\u0033",
4335 "&'\\u0061'<*'\\u0062'c'\\u0064' &b<<*klm &k<<<*xyz &y<*fghe &a=*'\\u0031\\u0032\\u0033'",
4337 doTestOneTestCase(rangeTestcases
, nRangeTestcases
, strRules
, UPRV_LENGTHOF(strRules
));
4340 static void TestSameStrengthListSupplemental(void)
4342 const char* strRules
[] = {
4343 "&\\u4e00<\\ufffb<\\U00010000<\\U00010001<\\U00010002",
4344 "&\\u4e00<\\ufffb<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02",
4345 "&\\u4e00<*\\ufffb\\U00010000\\U00010001\\U00010002",
4346 "&\\u4e00<*\\ufffb\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02",
4348 doTestOneTestCase(rangeTestcasesSupplemental
, nRangeTestcasesSupplemental
, strRules
, UPRV_LENGTHOF(strRules
));
4351 static void TestSameStrengthListQwerty(void)
4353 const char* strRules
[] = {
4354 "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d", /* Normal */
4355 "&q<*wer &w<<*tyu &t<<<*iop &o=*asd", /* Lists */
4356 "&\\u0071<\\u0077<\\u0065<\\u0072 &\\u0077<<\\u0074<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<\\u006f<<<\\u0070 &\\u006f=\\u0061=\\u0073=\\u0064",
4357 "&'\\u0071'<\\u0077<\\u0065<\\u0072 &\\u0077<<'\\u0074'<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<'\\u006f'<<<\\u0070 &\\u006f=\\u0061='\\u0073'=\\u0064",
4358 "&\\u0071<*\\u0077\\u0065\\u0072 &\\u0077<<*\\u0074\\u0079\\u0075 &\\u0074<<<*\\u0069\\u006f\\u0070 &\\u006f=*\\u0061\\u0073\\u0064",
4360 /* Quoted characters also will work if two quoted characters are not consecutive. */
4361 "&\\u0071<*'\\u0077'\\u0065\\u0072 &\\u0077<<*\\u0074'\\u0079'\\u0075 &\\u0074<<<*\\u0069\\u006f'\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",
4363 /* Consecutive quoted charactes do not work, because a '' will be treated as a quote character. */
4364 /* "&\\u0071<*'\\u0077''\\u0065''\\u0072' &\\u0077<<*'\\u0074''\\u0079''\\u0075' &\\u0074<<<*'\\u0069''\\u006f''\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",*/
4367 doTestOneTestCase(rangeTestcasesQwerty
, nRangeTestcasesQwerty
, strRules
, UPRV_LENGTHOF(strRules
));
4370 static void TestSameStrengthListQuotedQwerty(void)
4372 const char* strRules
[] = {
4373 "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d", /* Normal */
4374 "&q<*wer &w<<*tyu &t<<<*iop &o=*asd", /* Lists */
4375 "&q<*w'e'r &w<<*'t'yu &t<<<*io'p' &o=*'a's'd'", /* Lists with quotes */
4377 /* Lists with continuous quotes may not work, because '' will be treated as a quote character. */
4378 /* "&q<*'w''e''r' &w<<*'t''y''u' &t<<<*'i''o''p' &o=*'a''s''d'", */
4380 doTestOneTestCase(rangeTestcasesQwerty
, nRangeTestcasesQwerty
, strRules
, UPRV_LENGTHOF(strRules
));
4383 static void TestSameStrengthListRanges(void)
4385 const char* strRules
[] = {
4386 "&a<*b-d &b<<*k-m &k<<<*x-z &y<*f-he &a=*1-3",
4388 doTestOneTestCase(rangeTestcases
, nRangeTestcases
, strRules
, UPRV_LENGTHOF(strRules
));
4391 static void TestSameStrengthListSupplementalRanges(void)
4393 const char* strRules
[] = {
4394 /* Note: U+FFFD..U+FFFF are not tailorable, so a range cannot include them. */
4395 "&\\u4e00<*\\ufffb\\U00010000-\\U00010002",
4397 doTestOneTestCase(rangeTestcasesSupplemental
, nRangeTestcasesSupplemental
, strRules
, UPRV_LENGTHOF(strRules
));
4400 static void TestSpecialCharacters(void)
4402 const char* strRules
[] = {
4404 "&';'<'+'<','<'-'<'&'<'*'",
4413 const static OneTestCase specialCharacterStrings
[] = {
4414 { {0x003b}, {0x002b}, UCOL_LESS
}, /* ; < + */
4415 { {0x002b}, {0x002c}, UCOL_LESS
}, /* + < , */
4416 { {0x002c}, {0x002d}, UCOL_LESS
}, /* , < - */
4417 { {0x002d}, {0x0026}, UCOL_LESS
}, /* - < & */
4419 doTestOneTestCase(specialCharacterStrings
, UPRV_LENGTHOF(specialCharacterStrings
), strRules
, UPRV_LENGTHOF(strRules
));
4422 static void TestPrivateUseCharacters(void)
4424 const char* strRules
[] = {
4426 "&'\\u5ea7'<'\\uE2D8'<'\\uE2D9'<'\\uE2DA'<'\\uE2DB'<'\\uE2DC'<'\\u4e8d'",
4427 "&\\u5ea7<\\uE2D8<\\uE2D9<\\uE2DA<\\uE2DB<\\uE2DC<\\u4e8d",
4430 const static OneTestCase privateUseCharacterStrings
[] = {
4431 { {0x5ea7}, {0xe2d8}, UCOL_LESS
},
4432 { {0xe2d8}, {0xe2d9}, UCOL_LESS
},
4433 { {0xe2d9}, {0xe2da}, UCOL_LESS
},
4434 { {0xe2da}, {0xe2db}, UCOL_LESS
},
4435 { {0xe2db}, {0xe2dc}, UCOL_LESS
},
4436 { {0xe2dc}, {0x4e8d}, UCOL_LESS
},
4438 doTestOneTestCase(privateUseCharacterStrings
, UPRV_LENGTHOF(privateUseCharacterStrings
), strRules
, UPRV_LENGTHOF(strRules
));
4441 static void TestPrivateUseCharactersInList(void)
4443 const char* strRules
[] = {
4445 "&'\\u5ea7'<*'\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d'",
4446 /* "&'\\u5ea7'<*\\uE2D8'\\uE2D9\\uE2DA'\\uE2DB'\\uE2DC\\u4e8d'", */
4447 "&\\u5ea7<*\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d",
4450 const static OneTestCase privateUseCharacterStrings
[] = {
4451 { {0x5ea7}, {0xe2d8}, UCOL_LESS
},
4452 { {0xe2d8}, {0xe2d9}, UCOL_LESS
},
4453 { {0xe2d9}, {0xe2da}, UCOL_LESS
},
4454 { {0xe2da}, {0xe2db}, UCOL_LESS
},
4455 { {0xe2db}, {0xe2dc}, UCOL_LESS
},
4456 { {0xe2dc}, {0x4e8d}, UCOL_LESS
},
4458 doTestOneTestCase(privateUseCharacterStrings
, UPRV_LENGTHOF(privateUseCharacterStrings
), strRules
, UPRV_LENGTHOF(strRules
));
4461 static void TestPrivateUseCharactersInRange(void)
4463 const char* strRules
[] = {
4465 "&'\\u5ea7'<*'\\uE2D8'-'\\uE2DC\\u4e8d'",
4466 "&\\u5ea7<*\\uE2D8-\\uE2DC\\u4e8d",
4467 /* "&\\u5ea7<\\uE2D8'\\uE2D8'-'\\uE2D9'\\uE2DA-\\uE2DB\\uE2DC\\u4e8d", */
4470 const static OneTestCase privateUseCharacterStrings
[] = {
4471 { {0x5ea7}, {0xe2d8}, UCOL_LESS
},
4472 { {0xe2d8}, {0xe2d9}, UCOL_LESS
},
4473 { {0xe2d9}, {0xe2da}, UCOL_LESS
},
4474 { {0xe2da}, {0xe2db}, UCOL_LESS
},
4475 { {0xe2db}, {0xe2dc}, UCOL_LESS
},
4476 { {0xe2dc}, {0x4e8d}, UCOL_LESS
},
4478 doTestOneTestCase(privateUseCharacterStrings
, UPRV_LENGTHOF(privateUseCharacterStrings
), strRules
, UPRV_LENGTHOF(strRules
));
4481 static void TestInvalidListsAndRanges(void)
4483 const char* invalidRules
[] = {
4484 /* Range not in starred expression */
4485 "&\\ufffe<\\uffff-\\U00010002",
4487 /* Range without start */
4490 /* Range without end */
4493 /* More than one hyphen */
4496 /* Range in the wrong order */
4502 UErrorCode status
= U_ZERO_ERROR
;
4503 UParseError parse_error
;
4504 int n_rules
= UPRV_LENGTHOF(invalidRules
);
4507 UCollator
*myCollation
;
4509 for (rule_no
= 0; rule_no
< n_rules
; ++rule_no
) {
4511 length
= u_unescape(invalidRules
[rule_no
], rule
, 500);
4513 log_err("ERROR: The rule cannot be unescaped: %s\n");
4516 myCollation
= ucol_openRules(rule
, length
, UCOL_ON
, UCOL_TERTIARY
, &parse_error
, &status
);
4517 (void)myCollation
; /* Suppress set but not used warning. */
4518 if(!U_FAILURE(status
)){
4519 log_err("ERROR: Could not cause a failure as expected: \n");
4521 status
= U_ZERO_ERROR
;
4526 * This test ensures that characters placed before a character in a different script have the same lead byte
4527 * in their collation key before and after script reordering.
4529 static void TestBeforeRuleWithScriptReordering(void)
4532 UErrorCode status
= U_ZERO_ERROR
;
4533 UCollator
*myCollation
;
4534 char srules
[500] = "&[before 1]\\u03b1 < \\u0e01";
4536 uint32_t rulesLength
= 0;
4537 int32_t reorderCodes
[1] = {USCRIPT_GREEK
};
4538 UCollationResult collResult
;
4540 uint8_t baseKey
[256];
4541 uint32_t baseKeyLength
;
4542 uint8_t beforeKey
[256];
4543 uint32_t beforeKeyLength
;
4545 UChar base
[] = { 0x03b1 }; /* base */
4546 int32_t baseLen
= UPRV_LENGTHOF(base
);
4548 UChar before
[] = { 0x0e01 }; /* ko kai */
4549 int32_t beforeLen
= UPRV_LENGTHOF(before
);
4551 /*UChar *data[] = { before, base };
4552 genericRulesStarter(srules, data, 2);*/
4554 log_verbose("Testing the &[before 1] rule with [reorder grek]\n");
4556 (void)beforeKeyLength
; /* Suppress set but not used warnings. */
4557 (void)baseKeyLength
;
4559 /* build collator */
4560 log_verbose("Testing the &[before 1] rule with [scriptReorder grek]\n");
4562 rulesLength
= u_unescape(srules
, rules
, UPRV_LENGTHOF(rules
));
4563 myCollation
= ucol_openRules(rules
, rulesLength
, UCOL_ON
, UCOL_TERTIARY
, &error
, &status
);
4564 if(U_FAILURE(status
)) {
4565 log_err_status(status
, "ERROR: in creation of rule based collator: %s\n", myErrorName(status
));
4569 /* check collation results - before rule applied but not script reordering */
4570 collResult
= ucol_strcoll(myCollation
, base
, baseLen
, before
, beforeLen
);
4571 if (collResult
!= UCOL_GREATER
) {
4572 log_err("Collation result not correct before script reordering = %d\n", collResult
);
4575 /* check the lead byte of the collation keys before script reordering */
4576 baseKeyLength
= ucol_getSortKey(myCollation
, base
, baseLen
, baseKey
, 256);
4577 beforeKeyLength
= ucol_getSortKey(myCollation
, before
, beforeLen
, beforeKey
, 256);
4578 if (baseKey
[0] != beforeKey
[0]) {
4579 log_err("Different lead byte for sort keys using before rule and before script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey
[0], beforeKey
[0]);
4582 /* reorder the scripts */
4583 ucol_setReorderCodes(myCollation
, reorderCodes
, 1, &status
);
4584 if(U_FAILURE(status
)) {
4585 log_err_status(status
, "ERROR: while setting script order: %s\n", myErrorName(status
));
4589 /* check collation results - before rule applied and after script reordering */
4590 collResult
= ucol_strcoll(myCollation
, base
, baseLen
, before
, beforeLen
);
4591 if (collResult
!= UCOL_GREATER
) {
4592 log_err("Collation result not correct after script reordering = %d\n", collResult
);
4595 /* check the lead byte of the collation keys after script reordering */
4596 ucol_getSortKey(myCollation
, base
, baseLen
, baseKey
, 256);
4597 ucol_getSortKey(myCollation
, before
, beforeLen
, beforeKey
, 256);
4598 if (baseKey
[0] != beforeKey
[0]) {
4599 log_err("Different lead byte for sort keys using before fule and after script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey
[0], beforeKey
[0]);
4602 ucol_close(myCollation
);
4606 * Test that in a primary-compressed sort key all bytes except the first one are unchanged under script reordering.
4608 static void TestNonLeadBytesDuringCollationReordering(void)
4610 UErrorCode status
= U_ZERO_ERROR
;
4611 UCollator
*myCollation
;
4612 int32_t reorderCodes
[1] = {USCRIPT_GREEK
};
4614 uint8_t baseKey
[256];
4615 uint32_t baseKeyLength
;
4616 uint8_t reorderKey
[256];
4617 uint32_t reorderKeyLength
;
4619 UChar testString
[] = { 0x03b1, 0x03b2, 0x03b3 };
4624 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
4626 /* build collator tertiary */
4627 myCollation
= ucol_open("", &status
);
4628 ucol_setStrength(myCollation
, UCOL_TERTIARY
);
4629 if(U_FAILURE(status
)) {
4630 log_err_status(status
, "ERROR: in creation of collator: %s\n", myErrorName(status
));
4633 baseKeyLength
= ucol_getSortKey(myCollation
, testString
, UPRV_LENGTHOF(testString
), baseKey
, 256);
4635 ucol_setReorderCodes(myCollation
, reorderCodes
, UPRV_LENGTHOF(reorderCodes
), &status
);
4636 if(U_FAILURE(status
)) {
4637 log_err_status(status
, "ERROR: setting reorder codes: %s\n", myErrorName(status
));
4640 reorderKeyLength
= ucol_getSortKey(myCollation
, testString
, UPRV_LENGTHOF(testString
), reorderKey
, 256);
4642 if (baseKeyLength
!= reorderKeyLength
) {
4643 log_err("Key lengths not the same during reordering.\n");
4647 for (i
= 1; i
< baseKeyLength
; i
++) {
4648 if (baseKey
[i
] != reorderKey
[i
]) {
4649 log_err("Collation key bytes not the same at position %d.\n", i
);
4653 ucol_close(myCollation
);
4655 /* build collator quaternary */
4656 myCollation
= ucol_open("", &status
);
4657 ucol_setStrength(myCollation
, UCOL_QUATERNARY
);
4658 if(U_FAILURE(status
)) {
4659 log_err_status(status
, "ERROR: in creation of collator: %s\n", myErrorName(status
));
4662 baseKeyLength
= ucol_getSortKey(myCollation
, testString
, UPRV_LENGTHOF(testString
), baseKey
, 256);
4664 ucol_setReorderCodes(myCollation
, reorderCodes
, UPRV_LENGTHOF(reorderCodes
), &status
);
4665 if(U_FAILURE(status
)) {
4666 log_err_status(status
, "ERROR: setting reorder codes: %s\n", myErrorName(status
));
4669 reorderKeyLength
= ucol_getSortKey(myCollation
, testString
, UPRV_LENGTHOF(testString
), reorderKey
, 256);
4671 if (baseKeyLength
!= reorderKeyLength
) {
4672 log_err("Key lengths not the same during reordering.\n");
4676 for (i
= 1; i
< baseKeyLength
; i
++) {
4677 if (baseKey
[i
] != reorderKey
[i
]) {
4678 log_err("Collation key bytes not the same at position %d.\n", i
);
4682 ucol_close(myCollation
);
4686 * Test reordering API.
4688 static void TestReorderingAPI(void)
4690 UErrorCode status
= U_ZERO_ERROR
;
4691 UCollator
*myCollation
;
4692 int32_t reorderCodes
[3] = {USCRIPT_GREEK
, USCRIPT_HAN
, UCOL_REORDER_CODE_PUNCTUATION
};
4693 int32_t duplicateReorderCodes
[] = {USCRIPT_HIRAGANA
, USCRIPT_GREEK
, UCOL_REORDER_CODE_CURRENCY
, USCRIPT_KATAKANA
};
4694 int32_t reorderCodesStartingWithDefault
[] = {UCOL_REORDER_CODE_DEFAULT
, USCRIPT_GREEK
, USCRIPT_HAN
, UCOL_REORDER_CODE_PUNCTUATION
};
4695 int32_t reorderCodeNone
= UCOL_REORDER_CODE_NONE
;
4696 UCollationResult collResult
;
4697 int32_t retrievedReorderCodesLength
;
4698 int32_t retrievedReorderCodes
[10];
4699 UChar greekString
[] = { 0x03b1 };
4700 UChar punctuationString
[] = { 0x203e };
4703 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
4705 /* build collator tertiary */
4706 myCollation
= ucol_open("", &status
);
4707 ucol_setStrength(myCollation
, UCOL_TERTIARY
);
4708 if(U_FAILURE(status
)) {
4709 log_err_status(status
, "ERROR: in creation of collator: %s\n", myErrorName(status
));
4713 /* set the reorderding */
4714 ucol_setReorderCodes(myCollation
, reorderCodes
, UPRV_LENGTHOF(reorderCodes
), &status
);
4715 if (U_FAILURE(status
)) {
4716 log_err_status(status
, "ERROR: setting reorder codes: %s\n", myErrorName(status
));
4720 /* get the reordering */
4721 retrievedReorderCodesLength
= ucol_getReorderCodes(myCollation
, NULL
, 0, &status
);
4722 if (status
!= U_BUFFER_OVERFLOW_ERROR
) {
4723 log_err_status(status
, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status
));
4726 status
= U_ZERO_ERROR
;
4727 if (retrievedReorderCodesLength
!= UPRV_LENGTHOF(reorderCodes
)) {
4728 log_err_status(status
, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength
, UPRV_LENGTHOF(reorderCodes
));
4731 /* now let's really get it */
4732 retrievedReorderCodesLength
= ucol_getReorderCodes(myCollation
, retrievedReorderCodes
, UPRV_LENGTHOF(retrievedReorderCodes
), &status
);
4733 if (U_FAILURE(status
)) {
4734 log_err_status(status
, "ERROR: getting reorder codes: %s\n", myErrorName(status
));
4737 if (retrievedReorderCodesLength
!= UPRV_LENGTHOF(reorderCodes
)) {
4738 log_err_status(status
, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength
, UPRV_LENGTHOF(reorderCodes
));
4741 for (loopIndex
= 0; loopIndex
< retrievedReorderCodesLength
; loopIndex
++) {
4742 if (retrievedReorderCodes
[loopIndex
] != reorderCodes
[loopIndex
]) {
4743 log_err_status(status
, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex
);
4747 collResult
= ucol_strcoll(myCollation
, greekString
, UPRV_LENGTHOF(greekString
), punctuationString
, UPRV_LENGTHOF(punctuationString
));
4748 if (collResult
!= UCOL_LESS
) {
4749 log_err_status(status
, "ERROR: collation result should have been UCOL_LESS\n");
4753 /* clear the reordering */
4754 ucol_setReorderCodes(myCollation
, NULL
, 0, &status
);
4755 if (U_FAILURE(status
)) {
4756 log_err_status(status
, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status
));
4760 /* get the reordering again */
4761 retrievedReorderCodesLength
= ucol_getReorderCodes(myCollation
, NULL
, 0, &status
);
4762 if (retrievedReorderCodesLength
!= 0) {
4763 log_err_status(status
, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength
, 0);
4767 collResult
= ucol_strcoll(myCollation
, greekString
, UPRV_LENGTHOF(greekString
), punctuationString
, UPRV_LENGTHOF(punctuationString
));
4768 if (collResult
!= UCOL_GREATER
) {
4769 log_err_status(status
, "ERROR: collation result should have been UCOL_GREATER\n");
4773 /* clear the reordering using [NONE] */
4774 ucol_setReorderCodes(myCollation
, &reorderCodeNone
, 1, &status
);
4775 if (U_FAILURE(status
)) {
4776 log_err_status(status
, "ERROR: setting reorder codes to [NONE]: %s\n", myErrorName(status
));
4780 /* get the reordering again */
4781 retrievedReorderCodesLength
= ucol_getReorderCodes(myCollation
, NULL
, 0, &status
);
4782 if (retrievedReorderCodesLength
!= 0) {
4783 log_err_status(status
,
4784 "ERROR: [NONE] retrieved reorder codes length was %d but should have been 0\n",
4785 retrievedReorderCodesLength
);
4789 /* test for error condition on duplicate reorder codes */
4790 ucol_setReorderCodes(myCollation
, duplicateReorderCodes
, UPRV_LENGTHOF(duplicateReorderCodes
), &status
);
4791 if (!U_FAILURE(status
)) {
4792 log_err_status(status
, "ERROR: setting duplicate reorder codes did not generate a failure\n");
4796 status
= U_ZERO_ERROR
;
4797 /* test for reorder codes after a reset code */
4798 ucol_setReorderCodes(myCollation
, reorderCodesStartingWithDefault
, UPRV_LENGTHOF(reorderCodesStartingWithDefault
), &status
);
4799 if (!U_FAILURE(status
)) {
4800 log_err_status(status
, "ERROR: reorderd code sequence starting with default and having following codes didn't cause an error\n");
4804 ucol_close(myCollation
);
4808 * Test reordering API.
4810 static void TestReorderingAPIWithRuleCreatedCollator(void)
4812 UErrorCode status
= U_ZERO_ERROR
;
4813 UCollator
*myCollation
;
4815 static const int32_t rulesReorderCodes
[2] = {USCRIPT_HAN
, USCRIPT_GREEK
};
4816 static const int32_t reorderCodes
[3] = {USCRIPT_GREEK
, USCRIPT_HAN
, UCOL_REORDER_CODE_PUNCTUATION
};
4817 static const int32_t onlyDefault
[1] = {UCOL_REORDER_CODE_DEFAULT
};
4818 UCollationResult collResult
;
4819 int32_t retrievedReorderCodesLength
;
4820 int32_t retrievedReorderCodes
[10];
4821 static const UChar greekString
[] = { 0x03b1 };
4822 static const UChar punctuationString
[] = { 0x203e };
4823 static const UChar hanString
[] = { 0x65E5, 0x672C };
4826 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
4828 /* build collator from rules */
4829 u_uastrcpy(rules
, "[reorder Hani Grek]");
4830 myCollation
= ucol_openRules(rules
, u_strlen(rules
), UCOL_DEFAULT
, UCOL_TERTIARY
, NULL
, &status
);
4831 if(U_FAILURE(status
)) {
4832 log_err_status(status
, "ERROR: in creation of collator: %s\n", myErrorName(status
));
4836 /* get the reordering */
4837 retrievedReorderCodesLength
= ucol_getReorderCodes(myCollation
, retrievedReorderCodes
, UPRV_LENGTHOF(retrievedReorderCodes
), &status
);
4838 if (U_FAILURE(status
)) {
4839 log_err_status(status
, "ERROR: getting reorder codes: %s\n", myErrorName(status
));
4842 if (retrievedReorderCodesLength
!= UPRV_LENGTHOF(rulesReorderCodes
)) {
4843 log_err_status(status
, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength
, UPRV_LENGTHOF(rulesReorderCodes
));
4846 for (loopIndex
= 0; loopIndex
< retrievedReorderCodesLength
; loopIndex
++) {
4847 if (retrievedReorderCodes
[loopIndex
] != rulesReorderCodes
[loopIndex
]) {
4848 log_err_status(status
, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex
);
4852 collResult
= ucol_strcoll(myCollation
, greekString
, UPRV_LENGTHOF(greekString
), hanString
, UPRV_LENGTHOF(hanString
));
4853 if (collResult
!= UCOL_GREATER
) {
4854 log_err_status(status
, "ERROR: collation result should have been UCOL_GREATER\n");
4858 /* set the reordering */
4859 ucol_setReorderCodes(myCollation
, reorderCodes
, UPRV_LENGTHOF(reorderCodes
), &status
);
4860 if (U_FAILURE(status
)) {
4861 log_err_status(status
, "ERROR: setting reorder codes: %s\n", myErrorName(status
));
4865 /* get the reordering */
4866 retrievedReorderCodesLength
= ucol_getReorderCodes(myCollation
, NULL
, 0, &status
);
4867 if (status
!= U_BUFFER_OVERFLOW_ERROR
) {
4868 log_err_status(status
, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status
));
4871 status
= U_ZERO_ERROR
;
4872 if (retrievedReorderCodesLength
!= UPRV_LENGTHOF(reorderCodes
)) {
4873 log_err_status(status
, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength
, UPRV_LENGTHOF(reorderCodes
));
4876 /* now let's really get it */
4877 retrievedReorderCodesLength
= ucol_getReorderCodes(myCollation
, retrievedReorderCodes
, UPRV_LENGTHOF(retrievedReorderCodes
), &status
);
4878 if (U_FAILURE(status
)) {
4879 log_err_status(status
, "ERROR: getting reorder codes: %s\n", myErrorName(status
));
4882 if (retrievedReorderCodesLength
!= UPRV_LENGTHOF(reorderCodes
)) {
4883 log_err_status(status
, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength
, UPRV_LENGTHOF(reorderCodes
));
4886 for (loopIndex
= 0; loopIndex
< retrievedReorderCodesLength
; loopIndex
++) {
4887 if (retrievedReorderCodes
[loopIndex
] != reorderCodes
[loopIndex
]) {
4888 log_err_status(status
, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex
);
4892 collResult
= ucol_strcoll(myCollation
, greekString
, UPRV_LENGTHOF(greekString
), punctuationString
, UPRV_LENGTHOF(punctuationString
));
4893 if (collResult
!= UCOL_LESS
) {
4894 log_err_status(status
, "ERROR: collation result should have been UCOL_LESS\n");
4898 /* clear the reordering */
4899 ucol_setReorderCodes(myCollation
, NULL
, 0, &status
);
4900 if (U_FAILURE(status
)) {
4901 log_err_status(status
, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status
));
4905 /* get the reordering again */
4906 retrievedReorderCodesLength
= ucol_getReorderCodes(myCollation
, NULL
, 0, &status
);
4907 if (retrievedReorderCodesLength
!= 0) {
4908 log_err_status(status
, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength
, 0);
4912 collResult
= ucol_strcoll(myCollation
, greekString
, UPRV_LENGTHOF(greekString
), punctuationString
, UPRV_LENGTHOF(punctuationString
));
4913 if (collResult
!= UCOL_GREATER
) {
4914 log_err_status(status
, "ERROR: collation result should have been UCOL_GREATER\n");
4918 /* reset the reordering */
4919 ucol_setReorderCodes(myCollation
, onlyDefault
, 1, &status
);
4920 if (U_FAILURE(status
)) {
4921 log_err_status(status
, "ERROR: setting reorder codes to {default}: %s\n", myErrorName(status
));
4924 retrievedReorderCodesLength
= ucol_getReorderCodes(myCollation
, retrievedReorderCodes
, UPRV_LENGTHOF(retrievedReorderCodes
), &status
);
4925 if (U_FAILURE(status
)) {
4926 log_err_status(status
, "ERROR: getting reorder codes: %s\n", myErrorName(status
));
4929 if (retrievedReorderCodesLength
!= UPRV_LENGTHOF(rulesReorderCodes
)) {
4930 log_err_status(status
, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength
, UPRV_LENGTHOF(rulesReorderCodes
));
4933 for (loopIndex
= 0; loopIndex
< retrievedReorderCodesLength
; loopIndex
++) {
4934 if (retrievedReorderCodes
[loopIndex
] != rulesReorderCodes
[loopIndex
]) {
4935 log_err_status(status
, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex
);
4940 ucol_close(myCollation
);
4943 static UBool
containsExpectedScript(const int32_t scripts
[], int32_t length
, int32_t expectedScript
) {
4945 for (i
= 0; i
< length
; ++i
) {
4946 if (expectedScript
== scripts
[i
]) { return TRUE
; }
4951 static void TestEquivalentReorderingScripts(void) {
4952 // Beginning with ICU 55, collation reordering moves single scripts
4953 // rather than groups of scripts,
4954 // except where scripts share a range and sort primary-equal.
4955 UErrorCode status
= U_ZERO_ERROR
;
4956 int32_t equivalentScripts
[100];
4960 /* These scripts are expected to be equivalent. */
4961 static const int32_t expectedScripts
[] = {
4964 USCRIPT_KATAKANA_OR_HIRAGANA
4967 equivalentScripts
[0] = 0;
4968 length
= ucol_getEquivalentReorderCodes(
4969 USCRIPT_GOTHIC
, equivalentScripts
, UPRV_LENGTHOF(equivalentScripts
), &status
);
4970 if (U_FAILURE(status
)) {
4971 log_err_status(status
, "ERROR/Gothic: retrieving equivalent reorder codes: %s\n", myErrorName(status
));
4974 if (length
!= 1 || equivalentScripts
[0] != USCRIPT_GOTHIC
) {
4975 log_err("ERROR/Gothic: retrieved equivalent scripts wrong: "
4976 "length expected 1, was = %d; expected [%d] was [%d]\n",
4977 length
, USCRIPT_GOTHIC
, equivalentScripts
[0]);
4980 length
= ucol_getEquivalentReorderCodes(
4981 USCRIPT_HIRAGANA
, equivalentScripts
, UPRV_LENGTHOF(equivalentScripts
), &status
);
4982 if (U_FAILURE(status
)) {
4983 log_err_status(status
, "ERROR/Hiragana: retrieving equivalent reorder codes: %s\n", myErrorName(status
));
4986 if (length
!= UPRV_LENGTHOF(expectedScripts
)) {
4987 log_err("ERROR/Hiragana: retrieved equivalent script length wrong: "
4988 "expected %d, was = %d\n",
4989 UPRV_LENGTHOF(expectedScripts
), length
);
4992 for (i
= 0; i
< length
; ++i
) {
4993 int32_t script
= equivalentScripts
[i
];
4994 if (script
<= prevScript
) {
4995 log_err("ERROR/Hiragana: equivalent scripts out of order at index %d\n", i
);
4997 prevScript
= script
;
4999 for (i
= 0; i
< UPRV_LENGTHOF(expectedScripts
); i
++) {
5000 if (!containsExpectedScript(equivalentScripts
, length
, expectedScripts
[i
])) {
5001 log_err("ERROR/Hiragana: equivalent scripts do not contain %d\n",
5002 expectedScripts
[i
]);
5006 length
= ucol_getEquivalentReorderCodes(
5007 USCRIPT_KATAKANA
, equivalentScripts
, UPRV_LENGTHOF(equivalentScripts
), &status
);
5008 if (U_FAILURE(status
)) {
5009 log_err_status(status
, "ERROR/Katakana: retrieving equivalent reorder codes: %s\n", myErrorName(status
));
5012 if (length
!= UPRV_LENGTHOF(expectedScripts
)) {
5013 log_err("ERROR/Katakana: retrieved equivalent script length wrong: "
5014 "expected %d, was = %d\n",
5015 UPRV_LENGTHOF(expectedScripts
), length
);
5017 for (i
= 0; i
< UPRV_LENGTHOF(expectedScripts
); i
++) {
5018 if (!containsExpectedScript(equivalentScripts
, length
, expectedScripts
[i
])) {
5019 log_err("ERROR/Katakana: equivalent scripts do not contain %d\n",
5020 expectedScripts
[i
]);
5024 length
= ucol_getEquivalentReorderCodes(
5025 USCRIPT_KATAKANA_OR_HIRAGANA
, equivalentScripts
, UPRV_LENGTHOF(equivalentScripts
), &status
);
5026 if (U_FAILURE(status
) || length
!= UPRV_LENGTHOF(expectedScripts
)) {
5027 log_err("ERROR/Hrkt: retrieved equivalent script length wrong: "
5028 "expected %d, was = %d\n",
5029 UPRV_LENGTHOF(expectedScripts
), length
);
5032 length
= ucol_getEquivalentReorderCodes(
5033 USCRIPT_HAN
, equivalentScripts
, UPRV_LENGTHOF(equivalentScripts
), &status
);
5034 if (U_FAILURE(status
) || length
!= 3) {
5035 log_err("ERROR/Hani: retrieved equivalent script length wrong: "
5036 "expected 3, was = %d\n", length
);
5038 length
= ucol_getEquivalentReorderCodes(
5039 USCRIPT_SIMPLIFIED_HAN
, equivalentScripts
, UPRV_LENGTHOF(equivalentScripts
), &status
);
5040 if (U_FAILURE(status
) || length
!= 3) {
5041 log_err("ERROR/Hans: retrieved equivalent script length wrong: "
5042 "expected 3, was = %d\n", length
);
5044 length
= ucol_getEquivalentReorderCodes(
5045 USCRIPT_TRADITIONAL_HAN
, equivalentScripts
, UPRV_LENGTHOF(equivalentScripts
), &status
);
5046 if (U_FAILURE(status
) || length
!= 3) {
5047 log_err("ERROR/Hant: retrieved equivalent script length wrong: "
5048 "expected 3, was = %d\n", length
);
5051 length
= ucol_getEquivalentReorderCodes(
5052 USCRIPT_MEROITIC_CURSIVE
, equivalentScripts
, UPRV_LENGTHOF(equivalentScripts
), &status
);
5053 if (U_FAILURE(status
) || length
!= 2) {
5054 log_err("ERROR/Merc: retrieved equivalent script length wrong: "
5055 "expected 2, was = %d\n", length
);
5057 length
= ucol_getEquivalentReorderCodes(
5058 USCRIPT_MEROITIC_HIEROGLYPHS
, equivalentScripts
, UPRV_LENGTHOF(equivalentScripts
), &status
);
5059 if (U_FAILURE(status
) || length
!= 2) {
5060 log_err("ERROR/Mero: retrieved equivalent script length wrong: "
5061 "expected 2, was = %d\n", length
);
5065 static void TestReorderingAcrossCloning(void)
5067 UErrorCode status
= U_ZERO_ERROR
;
5068 UCollator
*myCollation
;
5069 int32_t reorderCodes
[3] = {USCRIPT_GREEK
, USCRIPT_HAN
, UCOL_REORDER_CODE_PUNCTUATION
};
5070 UCollator
*clonedCollation
;
5071 int32_t retrievedReorderCodesLength
;
5072 int32_t retrievedReorderCodes
[10];
5075 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
5077 /* build collator tertiary */
5078 myCollation
= ucol_open("", &status
);
5079 ucol_setStrength(myCollation
, UCOL_TERTIARY
);
5080 if(U_FAILURE(status
)) {
5081 log_err_status(status
, "ERROR: in creation of collator: %s\n", myErrorName(status
));
5085 /* set the reorderding */
5086 ucol_setReorderCodes(myCollation
, reorderCodes
, UPRV_LENGTHOF(reorderCodes
), &status
);
5087 if (U_FAILURE(status
)) {
5088 log_err_status(status
, "ERROR: setting reorder codes: %s\n", myErrorName(status
));
5092 /* clone the collator */
5093 clonedCollation
= ucol_safeClone(myCollation
, NULL
, NULL
, &status
);
5094 if (U_FAILURE(status
)) {
5095 log_err_status(status
, "ERROR: cloning collator: %s\n", myErrorName(status
));
5099 /* get the reordering */
5100 retrievedReorderCodesLength
= ucol_getReorderCodes(clonedCollation
, retrievedReorderCodes
, UPRV_LENGTHOF(retrievedReorderCodes
), &status
);
5101 if (U_FAILURE(status
)) {
5102 log_err_status(status
, "ERROR: getting reorder codes: %s\n", myErrorName(status
));
5105 if (retrievedReorderCodesLength
!= UPRV_LENGTHOF(reorderCodes
)) {
5106 log_err_status(status
, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength
, UPRV_LENGTHOF(reorderCodes
));
5109 for (loopIndex
= 0; loopIndex
< retrievedReorderCodesLength
; loopIndex
++) {
5110 if (retrievedReorderCodes
[loopIndex
] != reorderCodes
[loopIndex
]) {
5111 log_err_status(status
, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex
);
5116 /*uprv_free(buffer);*/
5117 ucol_close(myCollation
);
5118 ucol_close(clonedCollation
);
5122 * Utility function to test one collation reordering test case set.
5123 * @param testcases Array of test cases.
5124 * @param n_testcases Size of the array testcases.
5125 * @param reorderTokens Array of reordering codes.
5126 * @param reorderTokensLen Size of the array reorderTokens.
5128 static void doTestOneReorderingAPITestCase(const OneTestCase testCases
[], uint32_t testCasesLen
, const int32_t reorderTokens
[], int32_t reorderTokensLen
)
5130 uint32_t testCaseNum
;
5131 UErrorCode status
= U_ZERO_ERROR
;
5132 UCollator
*myCollation
;
5134 myCollation
= ucol_open("", &status
);
5135 if (U_FAILURE(status
)) {
5136 log_err_status(status
, "ERROR: in creation of collator: %s\n", myErrorName(status
));
5139 ucol_setReorderCodes(myCollation
, reorderTokens
, reorderTokensLen
, &status
);
5140 if(U_FAILURE(status
)) {
5141 log_err_status(status
, "ERROR: while setting script order: %s\n", myErrorName(status
));
5145 for (testCaseNum
= 0; testCaseNum
< testCasesLen
; ++testCaseNum
) {
5147 testCases
[testCaseNum
].source
,
5148 testCases
[testCaseNum
].target
,
5149 testCases
[testCaseNum
].result
5152 ucol_close(myCollation
);
5155 static void TestGreekFirstReorder(void)
5157 const char* strRules
[] = {
5161 const int32_t apiRules
[] = {
5165 const static OneTestCase privateUseCharacterStrings
[] = {
5166 { {0x0391}, {0x0391}, UCOL_EQUAL
},
5167 { {0x0041}, {0x0391}, UCOL_GREATER
},
5168 { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_GREATER
},
5169 { {0x0060}, {0x0391}, UCOL_LESS
},
5170 { {0x0391}, {0xe2dc}, UCOL_LESS
},
5171 { {0x0391}, {0x0060}, UCOL_GREATER
},
5174 /* Test rules creation */
5175 doTestOneTestCase(privateUseCharacterStrings
, UPRV_LENGTHOF(privateUseCharacterStrings
), strRules
, UPRV_LENGTHOF(strRules
));
5177 /* Test collation reordering API */
5178 doTestOneReorderingAPITestCase(privateUseCharacterStrings
, UPRV_LENGTHOF(privateUseCharacterStrings
), apiRules
, UPRV_LENGTHOF(apiRules
));
5181 static void TestGreekLastReorder(void)
5183 const char* strRules
[] = {
5184 "[reorder Zzzz Grek]"
5187 const int32_t apiRules
[] = {
5188 USCRIPT_UNKNOWN
, USCRIPT_GREEK
5191 const static OneTestCase privateUseCharacterStrings
[] = {
5192 { {0x0391}, {0x0391}, UCOL_EQUAL
},
5193 { {0x0041}, {0x0391}, UCOL_LESS
},
5194 { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_LESS
},
5195 { {0x0060}, {0x0391}, UCOL_LESS
},
5196 { {0x0391}, {0xe2dc}, UCOL_GREATER
},
5199 /* Test rules creation */
5200 doTestOneTestCase(privateUseCharacterStrings
, UPRV_LENGTHOF(privateUseCharacterStrings
), strRules
, UPRV_LENGTHOF(strRules
));
5202 /* Test collation reordering API */
5203 doTestOneReorderingAPITestCase(privateUseCharacterStrings
, UPRV_LENGTHOF(privateUseCharacterStrings
), apiRules
, UPRV_LENGTHOF(apiRules
));
5206 static void TestNonScriptReorder(void)
5208 const char* strRules
[] = {
5209 "[reorder Grek Symbol DIGIT Latn Punct space Zzzz cURRENCy]"
5212 const int32_t apiRules
[] = {
5213 USCRIPT_GREEK
, UCOL_REORDER_CODE_SYMBOL
, UCOL_REORDER_CODE_DIGIT
, USCRIPT_LATIN
,
5214 UCOL_REORDER_CODE_PUNCTUATION
, UCOL_REORDER_CODE_SPACE
, USCRIPT_UNKNOWN
,
5215 UCOL_REORDER_CODE_CURRENCY
5218 const static OneTestCase privateUseCharacterStrings
[] = {
5219 { {0x0391}, {0x0041}, UCOL_LESS
},
5220 { {0x0041}, {0x0391}, UCOL_GREATER
},
5221 { {0x0060}, {0x0041}, UCOL_LESS
},
5222 { {0x0060}, {0x0391}, UCOL_GREATER
},
5223 { {0x0024}, {0x0041}, UCOL_GREATER
},
5226 /* Test rules creation */
5227 doTestOneTestCase(privateUseCharacterStrings
, UPRV_LENGTHOF(privateUseCharacterStrings
), strRules
, UPRV_LENGTHOF(strRules
));
5229 /* Test collation reordering API */
5230 doTestOneReorderingAPITestCase(privateUseCharacterStrings
, UPRV_LENGTHOF(privateUseCharacterStrings
), apiRules
, UPRV_LENGTHOF(apiRules
));
5233 static void TestHaniReorder(void)
5235 const char* strRules
[] = {
5238 const int32_t apiRules
[] = {
5242 const static OneTestCase privateUseCharacterStrings
[] = {
5243 { {0x4e00}, {0x0041}, UCOL_LESS
},
5244 { {0x4e00}, {0x0060}, UCOL_GREATER
},
5245 { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS
},
5246 { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER
},
5247 { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS
},
5248 { {0xfa27}, {0x0041}, UCOL_LESS
},
5249 { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS
},
5252 /* Test rules creation */
5253 doTestOneTestCase(privateUseCharacterStrings
, UPRV_LENGTHOF(privateUseCharacterStrings
), strRules
, UPRV_LENGTHOF(strRules
));
5255 /* Test collation reordering API */
5256 doTestOneReorderingAPITestCase(privateUseCharacterStrings
, UPRV_LENGTHOF(privateUseCharacterStrings
), apiRules
, UPRV_LENGTHOF(apiRules
));
5259 static void TestHaniReorderWithOtherRules(void)
5261 const char* strRules
[] = {
5262 "[reorder Hani] &b<a"
5264 /*const int32_t apiRules[] = {
5268 const static OneTestCase privateUseCharacterStrings
[] = {
5269 { {0x4e00}, {0x0041}, UCOL_LESS
},
5270 { {0x4e00}, {0x0060}, UCOL_GREATER
},
5271 { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS
},
5272 { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER
},
5273 { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS
},
5274 { {0xfa27}, {0x0041}, UCOL_LESS
},
5275 { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS
},
5276 { {0x0062}, {0x0061}, UCOL_LESS
},
5279 /* Test rules creation */
5280 doTestOneTestCase(privateUseCharacterStrings
, UPRV_LENGTHOF(privateUseCharacterStrings
), strRules
, UPRV_LENGTHOF(strRules
));
5283 static void TestMultipleReorder(void)
5285 const char* strRules
[] = {
5286 "[reorder Grek Zzzz DIGIT Latn Hani]"
5289 const int32_t apiRules
[] = {
5290 USCRIPT_GREEK
, USCRIPT_UNKNOWN
, UCOL_REORDER_CODE_DIGIT
, USCRIPT_LATIN
, USCRIPT_HAN
5293 const static OneTestCase collationTestCases
[] = {
5294 { {0x0391}, {0x0041}, UCOL_LESS
},
5295 { {0x0031}, {0x0041}, UCOL_LESS
},
5296 { {0x0041}, {0x4e00}, UCOL_LESS
},
5299 /* Test rules creation */
5300 doTestOneTestCase(collationTestCases
, UPRV_LENGTHOF(collationTestCases
), strRules
, UPRV_LENGTHOF(strRules
));
5302 /* Test collation reordering API */
5303 doTestOneReorderingAPITestCase(collationTestCases
, UPRV_LENGTHOF(collationTestCases
), apiRules
, UPRV_LENGTHOF(apiRules
));
5307 * Test that covers issue reported in ticket 8814
5309 static void TestReorderWithNumericCollation(void)
5311 UErrorCode status
= U_ZERO_ERROR
;
5312 UCollator
*myCollation
;
5313 UCollator
*myReorderCollation
;
5314 int32_t reorderCodes
[] = {UCOL_REORDER_CODE_SPACE
, UCOL_REORDER_CODE_PUNCTUATION
, UCOL_REORDER_CODE_SYMBOL
, UCOL_REORDER_CODE_DIGIT
, USCRIPT_GREEK
,USCRIPT_LATIN
, USCRIPT_HEBREW
, UCOL_REORDER_CODE_OTHERS
};
5315 /* UChar fortyS[] = { 0x0034, 0x0030, 0x0053 };
5316 UChar fortyThreeP[] = { 0x0034, 0x0033, 0x0050 }; */
5317 UChar fortyS
[] = { 0x0053 };
5318 UChar fortyThreeP
[] = { 0x0050 };
5319 uint8_t fortyS_sortKey
[128];
5320 int32_t fortyS_sortKey_Length
;
5321 uint8_t fortyThreeP_sortKey
[128];
5322 int32_t fortyThreeP_sortKey_Length
;
5323 uint8_t fortyS_sortKey_reorder
[128];
5324 int32_t fortyS_sortKey_reorder_Length
;
5325 uint8_t fortyThreeP_sortKey_reorder
[128];
5326 int32_t fortyThreeP_sortKey_reorder_Length
;
5327 UCollationResult collResult
;
5328 UCollationResult collResultReorder
;
5330 log_verbose("Testing reordering with and without numeric collation\n");
5332 /* build collator tertiary with numeric */
5333 myCollation
= ucol_open("", &status
);
5335 ucol_setStrength(myCollation, UCOL_TERTIARY);
5337 ucol_setAttribute(myCollation
, UCOL_NUMERIC_COLLATION
, UCOL_ON
, &status
);
5338 if(U_FAILURE(status
)) {
5339 log_err_status(status
, "ERROR: in creation of collator: %s\n", myErrorName(status
));
5343 /* build collator tertiary with numeric and reordering */
5344 myReorderCollation
= ucol_open("", &status
);
5346 ucol_setStrength(myReorderCollation, UCOL_TERTIARY);
5348 ucol_setAttribute(myReorderCollation
, UCOL_NUMERIC_COLLATION
, UCOL_ON
, &status
);
5349 ucol_setReorderCodes(myReorderCollation
, reorderCodes
, UPRV_LENGTHOF(reorderCodes
), &status
);
5350 if(U_FAILURE(status
)) {
5351 log_err_status(status
, "ERROR: in creation of collator: %s\n", myErrorName(status
));
5355 fortyS_sortKey_Length
= ucol_getSortKey(myCollation
, fortyS
, UPRV_LENGTHOF(fortyS
), fortyS_sortKey
, 128);
5356 fortyThreeP_sortKey_Length
= ucol_getSortKey(myCollation
, fortyThreeP
, UPRV_LENGTHOF(fortyThreeP
), fortyThreeP_sortKey
, 128);
5357 fortyS_sortKey_reorder_Length
= ucol_getSortKey(myReorderCollation
, fortyS
, UPRV_LENGTHOF(fortyS
), fortyS_sortKey_reorder
, 128);
5358 fortyThreeP_sortKey_reorder_Length
= ucol_getSortKey(myReorderCollation
, fortyThreeP
, UPRV_LENGTHOF(fortyThreeP
), fortyThreeP_sortKey_reorder
, 128);
5360 if (fortyS_sortKey_Length
< 0 || fortyThreeP_sortKey_Length
< 0 || fortyS_sortKey_reorder_Length
< 0 || fortyThreeP_sortKey_reorder_Length
< 0) {
5361 log_err_status(status
, "ERROR: couldn't generate sort keys\n");
5364 collResult
= ucol_strcoll(myCollation
, fortyS
, UPRV_LENGTHOF(fortyS
), fortyThreeP
, UPRV_LENGTHOF(fortyThreeP
));
5365 collResultReorder
= ucol_strcoll(myReorderCollation
, fortyS
, UPRV_LENGTHOF(fortyS
), fortyThreeP
, UPRV_LENGTHOF(fortyThreeP
));
5367 fprintf(stderr, "\tcollResult = %x\n", collResult);
5368 fprintf(stderr, "\tcollResultReorder = %x\n", collResultReorder);
5369 fprintf(stderr, "\nfortyS\n");
5370 for (i = 0; i < fortyS_sortKey_Length; i++) {
5371 fprintf(stderr, "%x --- %x\n", fortyS_sortKey[i], fortyS_sortKey_reorder[i]);
5373 fprintf(stderr, "\nfortyThreeP\n");
5374 for (i = 0; i < fortyThreeP_sortKey_Length; i++) {
5375 fprintf(stderr, "%x --- %x\n", fortyThreeP_sortKey[i], fortyThreeP_sortKey_reorder[i]);
5378 if (collResult
!= collResultReorder
) {
5379 log_err_status(status
, "ERROR: collation results should have been the same.\n");
5383 ucol_close(myCollation
);
5384 ucol_close(myReorderCollation
);
5387 static int compare_uint8_t_arrays(const uint8_t* a
, const uint8_t* b
)
5389 for (; *a
== *b
; ++a
, ++b
) {
5394 return (*a
< *b
? -1 : 1);
5397 static void TestImportRulesDeWithPhonebook(void)
5399 const char* normalRules
[] = {
5400 "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc",
5401 "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc",
5402 "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc",
5404 const OneTestCase normalTests
[] = {
5405 { {0x00e6}, {0x00c6}, UCOL_LESS
},
5406 { {0x00fc}, {0x00dc}, UCOL_GREATER
},
5409 const char* importRules
[] = {
5410 "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc[import de-u-co-phonebk]",
5411 "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
5412 "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
5414 const OneTestCase importTests
[] = {
5415 { {0x00e6}, {0x00c6}, UCOL_LESS
},
5416 { {0x00fc}, {0x00dc}, UCOL_LESS
},
5419 doTestOneTestCase(normalTests
, UPRV_LENGTHOF(normalTests
), normalRules
, UPRV_LENGTHOF(normalRules
));
5420 doTestOneTestCase(importTests
, UPRV_LENGTHOF(importTests
), importRules
, UPRV_LENGTHOF(importRules
));
5424 static void TestImportRulesFiWithEor(void)
5427 const char* defaultRules
[] = {
5428 "&a<b", /* Dummy rule. */
5431 const OneTestCase defaultTests
[] = {
5432 { {0x0110}, {0x00F0}, UCOL_LESS
},
5433 { {0x00a3}, {0x00a5}, UCOL_LESS
},
5434 { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS
},
5437 /* European Ordering rules: ignore currency characters. */
5438 const char* eorRules
[] = {
5439 "[import root-u-co-eor]",
5442 const OneTestCase eorTests
[] = {
5443 { {0x0110}, {0x00F0}, UCOL_LESS
},
5444 { {0x00a3}, {0x00a5}, UCOL_EQUAL
},
5445 { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL
},
5448 const char* fiStdRules
[] = {
5449 "[import fi-u-co-standard]",
5452 const OneTestCase fiStdTests
[] = {
5453 { {0x0110}, {0x00F0}, UCOL_GREATER
},
5454 { {0x00a3}, {0x00a5}, UCOL_LESS
},
5455 { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS
},
5458 /* Both European Ordering Rules and Fi Standard Rules. */
5459 const char* eorFiStdRules
[] = {
5460 "[import root-u-co-eor][import fi-u-co-standard]",
5463 /* This is essentially same as the one before once fi.txt is updated with import. */
5464 const char* fiEorRules
[] = {
5465 "[import fi-u-co-eor]",
5468 const OneTestCase fiEorTests
[] = {
5469 { {0x0110}, {0x00F0}, UCOL_GREATER
},
5470 { {0x00a3}, {0x00a5}, UCOL_EQUAL
},
5471 { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL
},
5474 doTestOneTestCase(defaultTests
, UPRV_LENGTHOF(defaultTests
), defaultRules
, UPRV_LENGTHOF(defaultRules
));
5475 doTestOneTestCase(eorTests
, UPRV_LENGTHOF(eorTests
), eorRules
, UPRV_LENGTHOF(eorRules
));
5476 doTestOneTestCase(fiStdTests
, UPRV_LENGTHOF(fiStdTests
), fiStdRules
, UPRV_LENGTHOF(fiStdRules
));
5477 doTestOneTestCase(fiEorTests
, UPRV_LENGTHOF(fiEorTests
), eorFiStdRules
, UPRV_LENGTHOF(eorFiStdRules
));
5479 log_knownIssue("8962", NULL
);
5480 /* TODO: Fix ICU ticket #8962 by uncommenting the following test after fi.txt is updated with the following rule:
5483 "[import root-u-co-eor][import fi-u-co-standard]"
5488 /* doTestOneTestCase(fiEorTests, UPRV_LENGTHOF(fiEorTests), fiEorRules, UPRV_LENGTHOF(fiEorRules)); */
5495 * This test case tests inclusion with the unihan rules, but this cannot be included now, unless
5496 * the resource files are built with -includeUnihanColl option.
5497 * TODO: Uncomment this function and make it work when unihan rules are built by default.
5499 static void TestImportRulesCJKWithUnihan(void)
5502 const char* defaultRules
[] = {
5503 "&a<b", /* Dummy rule. */
5506 const OneTestCase defaultTests
[] = {
5507 { {0x3402}, {0x4e1e}, UCOL_GREATER
},
5510 /* European Ordering rules: ignore currency characters. */
5511 const char* unihanRules
[] = {
5512 "[import ko-u-co-unihan]",
5515 const OneTestCase unihanTests
[] = {
5516 { {0x3402}, {0x4e1e}, UCOL_LESS
},
5519 doTestOneTestCase(defaultTests
, UPRV_LENGTHOF(defaultTests
), defaultRules
, UPRV_LENGTHOF(defaultRules
));
5520 doTestOneTestCase(unihanTests
, UPRV_LENGTHOF(unihanTests
), unihanRules
, UPRV_LENGTHOF(unihanRules
));
5525 static void TestImport(void)
5529 UCollator
* viescoll
;
5530 UCollator
* importviescoll
;
5532 UErrorCode status
= U_ZERO_ERROR
;
5534 int32_t viruleslength
;
5536 int32_t esruleslength
;
5538 int32_t viesruleslength
;
5539 char srules
[500] = "[import vi][import es]";
5541 uint32_t length
= 0;
5554 USet
* importTailoredSet
;
5557 vicoll
= ucol_open("vi", &status
);
5558 if(U_FAILURE(status
)){
5559 log_err_status(status
, "ERROR: Call ucol_open(\"vi\", ...): %s\n", myErrorName(status
));
5563 virules
= (UChar
*) ucol_getRules(vicoll
, &viruleslength
);
5564 if(viruleslength
== 0) {
5565 log_data_err("missing vi tailoring rule string\n");
5569 escoll
= ucol_open("es", &status
);
5570 esrules
= (UChar
*) ucol_getRules(escoll
, &esruleslength
);
5571 viesrules
= (UChar
*)uprv_malloc((viruleslength
+esruleslength
+1)*sizeof(UChar
*));
5573 u_strcat(viesrules
, virules
);
5574 u_strcat(viesrules
, esrules
);
5575 viesruleslength
= viruleslength
+ esruleslength
;
5576 viescoll
= ucol_openRules(viesrules
, viesruleslength
, UCOL_ON
, UCOL_TERTIARY
, &error
, &status
);
5578 /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
5579 length
= u_unescape(srules
, rules
, 500);
5580 importviescoll
= ucol_openRules(rules
, length
, UCOL_ON
, UCOL_TERTIARY
, &error
, &status
);
5581 if(U_FAILURE(status
)){
5582 log_err_status(status
, "ERROR: in creation of rule based collator: %s\n", myErrorName(status
));
5586 tailoredSet
= ucol_getTailoredSet(viescoll
, &status
);
5587 importTailoredSet
= ucol_getTailoredSet(importviescoll
, &status
);
5589 if(!uset_equals(tailoredSet
, importTailoredSet
)){
5590 log_err("Tailored sets not equal");
5593 uset_close(importTailoredSet
);
5595 itemCount
= uset_getItemCount(tailoredSet
);
5597 for( i
= 0; i
< itemCount
; i
++){
5598 strLength
= uset_getItem(tailoredSet
, i
, &start
, &end
, str
, 500, &status
);
5600 for (; start
<= end
; start
++){
5602 U16_APPEND(str
, k
, 500, start
, b
);
5603 (void)b
; /* Suppress set but not used warning. */
5604 ucol_getSortKey(viescoll
, str
, 1, sk1
, 500);
5605 ucol_getSortKey(importviescoll
, str
, 1, sk2
, 500);
5606 if(compare_uint8_t_arrays(sk1
, sk2
) != 0){
5607 log_err("Sort key for %s not equal\n", str
);
5612 ucol_getSortKey(viescoll
, str
, strLength
, sk1
, 500);
5613 ucol_getSortKey(importviescoll
, str
, strLength
, sk2
, 500);
5614 if(compare_uint8_t_arrays(sk1
, sk2
) != 0){
5615 log_err("ZZSort key for %s not equal\n", str
);
5622 uset_close(tailoredSet
);
5624 uprv_free(viesrules
);
5628 ucol_close(viescoll
);
5629 ucol_close(importviescoll
);
5632 static void TestImportWithType(void)
5636 UCollator
* videcoll
;
5637 UCollator
* importvidecoll
;
5639 UErrorCode status
= U_ZERO_ERROR
;
5640 const UChar
* virules
;
5641 int32_t viruleslength
;
5642 const UChar
* derules
;
5643 int32_t deruleslength
;
5645 int32_t videruleslength
;
5646 const char srules
[500] = "[import vi][import de-u-co-phonebk]";
5648 uint32_t length
= 0;
5660 USet
* importTailoredSet
;
5662 vicoll
= ucol_open("vi", &status
);
5663 if(U_FAILURE(status
)){
5664 log_err_status(status
, "ERROR: in creation of rule based collator: %s\n", myErrorName(status
));
5667 virules
= ucol_getRules(vicoll
, &viruleslength
);
5668 if(viruleslength
== 0) {
5669 log_data_err("missing vi tailoring rule string\n");
5673 /* decoll = ucol_open("de@collation=phonebook", &status); */
5674 decoll
= ucol_open("de-u-co-phonebk", &status
);
5675 if(U_FAILURE(status
)){
5676 log_err_status(status
, "ERROR: in creation of rule based collator: %s\n", myErrorName(status
));
5681 derules
= ucol_getRules(decoll
, &deruleslength
);
5682 viderules
= (UChar
*)uprv_malloc((viruleslength
+deruleslength
+1)*sizeof(UChar
*));
5684 u_strcat(viderules
, virules
);
5685 u_strcat(viderules
, derules
);
5686 videruleslength
= viruleslength
+ deruleslength
;
5687 videcoll
= ucol_openRules(viderules
, videruleslength
, UCOL_ON
, UCOL_TERTIARY
, &error
, &status
);
5689 /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
5690 length
= u_unescape(srules
, rules
, 500);
5691 importvidecoll
= ucol_openRules(rules
, length
, UCOL_ON
, UCOL_TERTIARY
, &error
, &status
);
5692 if(U_FAILURE(status
)){
5693 log_err_status(status
, "ERROR: in creation of rule based collator: %s\n", myErrorName(status
));
5697 tailoredSet
= ucol_getTailoredSet(videcoll
, &status
);
5698 importTailoredSet
= ucol_getTailoredSet(importvidecoll
, &status
);
5700 if(!uset_equals(tailoredSet
, importTailoredSet
)){
5701 log_err("Tailored sets not equal");
5704 uset_close(importTailoredSet
);
5706 itemCount
= uset_getItemCount(tailoredSet
);
5708 for( i
= 0; i
< itemCount
; i
++){
5709 strLength
= uset_getItem(tailoredSet
, i
, &start
, &end
, str
, 500, &status
);
5711 for (; start
<= end
; start
++){
5713 U16_APPEND_UNSAFE(str
, k
, start
);
5714 ucol_getSortKey(videcoll
, str
, 1, sk1
, 500);
5715 ucol_getSortKey(importvidecoll
, str
, 1, sk2
, 500);
5716 if(compare_uint8_t_arrays(sk1
, sk2
) != 0){
5717 log_err("Sort key for %s not equal\n", str
);
5722 ucol_getSortKey(videcoll
, str
, strLength
, sk1
, 500);
5723 ucol_getSortKey(importvidecoll
, str
, strLength
, sk2
, 500);
5724 if(compare_uint8_t_arrays(sk1
, sk2
) != 0){
5725 log_err("Sort key for %s not equal\n", str
);
5732 uset_close(tailoredSet
);
5734 uprv_free(viderules
);
5736 ucol_close(videcoll
);
5737 ucol_close(importvidecoll
);
5742 /* 'IV INTERNATIONAL SCIENTIFIC - PRACTICAL CONFERENCE "GEOPOLITICS, GEOECONOMICS AND INTERNATIONAL RELATIONS PROBLEMS" 22-23 June 2010, St. Petersburg, Russia' */
5743 static const UChar longUpperStr1
[]= { /* 155 chars */
5744 0x49, 0x56, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C,
5745 0x20, 0x53, 0x43, 0x49, 0x45, 0x4E, 0x54, 0x49, 0x46, 0x49, 0x43, 0x20, 0x2D, 0x20, 0x50, 0x52,
5746 0x41, 0x43, 0x54, 0x49, 0x43, 0x41, 0x4C, 0x20, 0x43, 0x4F, 0x4E, 0x46, 0x45, 0x52, 0x45, 0x4E,
5747 0x43, 0x45, 0x20, 0x22, 0x47, 0x45, 0x4F, 0x50, 0x4F, 0x4C, 0x49, 0x54, 0x49, 0x43, 0x53, 0x2C,
5748 0x20, 0x47, 0x45, 0x4F, 0x45, 0x43, 0x4F, 0x4E, 0x4F, 0x4D, 0x49, 0x43, 0x53, 0x20, 0x41, 0x4E,
5749 0x44, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C, 0x20,
5750 0x52, 0x45, 0x4C, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x53, 0x20, 0x50, 0x52, 0x4F, 0x42, 0x4C, 0x45,
5751 0x4D, 0x53, 0x22, 0x20, 0x32, 0x32, 0x2D, 0x32, 0x33, 0x20, 0x4A, 0x75, 0x6E, 0x65, 0x20, 0x32,
5752 0x30, 0x31, 0x30, 0x2C, 0x20, 0x53, 0x74, 0x2E, 0x20, 0x50, 0x65, 0x74, 0x65, 0x72, 0x73, 0x62,
5753 0x75, 0x72, 0x67, 0x2C, 0x20, 0x52, 0x75, 0x73, 0x73, 0x69, 0x61
5756 /* 'BACEDIFOGUHAJEKILOMUNAPE ' with diacritics on vowels, repeated 5 times */
5757 static const UChar longUpperStr2
[]= { /* 125 chars, > 128 collation elements */
5758 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5759 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5760 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5761 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5762 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20
5765 /* 'ABCDEFGHIJKLMNOPQRSTUVWXYZ ' repeated 12 times */
5766 static const UChar longUpperStr3
[]= { /* 324 chars */
5767 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5768 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5769 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5770 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5771 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5772 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5773 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5774 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5775 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5776 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5777 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5778 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20
5782 const UChar
* longUpperStrPtr
;
5783 int32_t longUpperStrLen
;
5786 /* String pointers must be in reverse collation order of the corresponding strings */
5787 static const LongUpperStrItem longUpperStrItems
[] = {
5788 { longUpperStr1
, UPRV_LENGTHOF(longUpperStr1
) },
5789 { longUpperStr2
, UPRV_LENGTHOF(longUpperStr2
) },
5790 { longUpperStr3
, UPRV_LENGTHOF(longUpperStr3
) },
5794 enum { kCollKeyLenMax
= 850 }; /* may change with collation changes */
5796 /* Text fix for #8445; without fix, could have crash due to stack or heap corruption */
5797 static void TestCaseLevelBufferOverflow(void)
5799 UErrorCode status
= U_ZERO_ERROR
;
5800 UCollator
* ucol
= ucol_open("root", &status
);
5801 if ( U_SUCCESS(status
) ) {
5802 ucol_setAttribute(ucol
, UCOL_CASE_LEVEL
, UCOL_ON
, &status
);
5803 if ( U_SUCCESS(status
) ) {
5804 const LongUpperStrItem
* itemPtr
;
5805 uint8_t sortKeyA
[kCollKeyLenMax
], sortKeyB
[kCollKeyLenMax
];
5806 for ( itemPtr
= longUpperStrItems
; itemPtr
->longUpperStrPtr
!= NULL
; itemPtr
++ ) {
5808 if (itemPtr
> longUpperStrItems
) {
5809 uprv_strcpy((char *)sortKeyB
, (char *)sortKeyA
);
5811 sortKeyLen
= ucol_getSortKey(ucol
, itemPtr
->longUpperStrPtr
, itemPtr
->longUpperStrLen
, sortKeyA
, kCollKeyLenMax
);
5812 if (sortKeyLen
<= 0 || sortKeyLen
> kCollKeyLenMax
) {
5813 log_err("ERROR sort key length from ucol_getSortKey is %d\n", sortKeyLen
);
5816 if ( itemPtr
> longUpperStrItems
) {
5817 int compareResult
= uprv_strcmp((char *)sortKeyA
, (char *)sortKeyB
);
5818 if (compareResult
>= 0) {
5819 log_err("ERROR in sort key comparison result, expected -1, got %d\n", compareResult
);
5824 log_err_status(status
, "ERROR in ucol_setAttribute UCOL_CASE_LEVEL on: %s\n", myErrorName(status
));
5828 log_err_status(status
, "ERROR in ucol_open for root: %s\n", myErrorName(status
));
5832 /* Test for #10595 */
5833 static const UChar testJapaneseName
[] = {0x4F50, 0x3005, 0x6728, 0x002C, 0x6B66, 0}; /* Sa sa Ki, Takeshi */
5834 #define KEY_PART_SIZE 16
5836 static void TestNextSortKeyPartJaIdentical(void)
5838 UErrorCode status
= U_ZERO_ERROR
;
5840 uint8_t keyPart
[KEY_PART_SIZE
];
5842 uint32_t state
[2] = {0, 0};
5845 coll
= ucol_open("ja", &status
);
5846 ucol_setAttribute(coll
, UCOL_STRENGTH
, UCOL_IDENTICAL
, &status
);
5847 if (U_FAILURE(status
)) {
5848 log_err_status(status
, "ERROR: in creation of Japanese collator with identical strength: %s\n", myErrorName(status
));
5852 uiter_setString(&iter
, testJapaneseName
, 5);
5853 keyPartLen
= KEY_PART_SIZE
;
5854 while (keyPartLen
== KEY_PART_SIZE
) {
5855 keyPartLen
= ucol_nextSortKeyPart(coll
, &iter
, state
, keyPart
, KEY_PART_SIZE
, &status
);
5856 if (U_FAILURE(status
)) {
5857 log_err_status(status
, "ERROR: in iterating next sort key part: %s\n", myErrorName(status
));
5865 #define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x)
5867 void addMiscCollTest(TestNode
** root
)
5869 TEST(TestRuleOptions
);
5870 TEST(TestBeforePrefixFailure
);
5871 TEST(TestContractionClosure
);
5872 TEST(TestPrefixCompose
);
5873 TEST(TestStrCollIdenticalPrefix
);
5875 TEST(TestNewJapanese
);
5876 /*TEST(TestLimitations);*/
5878 TEST(TestExtremeCompression
);
5879 TEST(TestSurrogates
);
5880 TEST(TestVariableTopSetting
);
5881 TEST(TestMaxVariable
);
5882 TEST(TestBocsuCoverage
);
5883 TEST(TestCyrillicTailoring
);
5885 TEST(IncompleteCntTest
);
5886 TEST(BlackBirdTest
);
5888 TEST(BillFairmanTest
);
5890 TEST(TestImplicitTailoring
);
5891 TEST(TestFCDProblem
);
5892 TEST(TestEmptyRule
);
5893 /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */
5895 /*TEST(TestJ831);*/ /* we changed lv locale */
5897 TEST(TestHangulTailoring
);
5899 TEST(TestIncrementalNormalize
);
5900 TEST(TestComposeDecompose
);
5901 TEST(TestCompressOverlap
);
5902 TEST(TestContraction
);
5903 TEST(TestExpansion
);
5904 /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys */
5905 /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported */
5907 TEST(TestSuppressContractions
);
5909 TEST(TestHebrewUCA
);
5910 TEST(TestPartialSortKeyTermination
);
5915 TEST(TestNumericCollation
);
5916 TEST(TestTibetanConformance
);
5917 TEST(TestPinyinProblem
);
5918 TEST(TestSeparateTrees
);
5919 TEST(TestBeforePinyin
);
5920 TEST(TestBeforeTightening
);
5921 /*TEST(TestMoreBefore);*/
5922 TEST(TestTailorNULL
);
5923 TEST(TestUpperFirstQuaternary
);
5929 TEST(TestSortKeyConsistency
);
5930 TEST(TestVI5913
); /* VI, RO tailored rules */
5931 TEST(TestCroatianSortKey
);
5932 TEST(TestTailor6179
);
5933 TEST(TestUCAPrecontext
);
5934 TEST(TestOutOfBuffer5468
);
5935 TEST(TestSameStrengthList
);
5937 TEST(TestSameStrengthListQuoted
);
5938 TEST(TestSameStrengthListSupplemental
);
5939 TEST(TestSameStrengthListQwerty
);
5940 TEST(TestSameStrengthListQuotedQwerty
);
5941 TEST(TestSameStrengthListRanges
);
5942 TEST(TestSameStrengthListSupplementalRanges
);
5943 TEST(TestSpecialCharacters
);
5944 TEST(TestPrivateUseCharacters
);
5945 TEST(TestPrivateUseCharactersInList
);
5946 TEST(TestPrivateUseCharactersInRange
);
5947 TEST(TestInvalidListsAndRanges
);
5948 TEST(TestImportRulesDeWithPhonebook
);
5949 /* TEST(TestImportRulesFiWithEor); EOR rules removed from CLDR 21 */
5950 /* TEST(TestImportRulesCJKWithUnihan); */
5952 TEST(TestImportWithType
);
5954 TEST(TestBeforeRuleWithScriptReordering
);
5955 TEST(TestNonLeadBytesDuringCollationReordering
);
5956 TEST(TestReorderingAPI
);
5957 TEST(TestReorderingAPIWithRuleCreatedCollator
);
5958 TEST(TestEquivalentReorderingScripts
);
5959 TEST(TestGreekFirstReorder
);
5960 TEST(TestGreekLastReorder
);
5961 TEST(TestNonScriptReorder
);
5962 TEST(TestHaniReorder
);
5963 TEST(TestHaniReorderWithOtherRules
);
5964 TEST(TestMultipleReorder
);
5965 TEST(TestReorderingAcrossCloning
);
5966 TEST(TestReorderWithNumericCollation
);
5968 TEST(TestCaseLevelBufferOverflow
);
5969 TEST(TestNextSortKeyPartJaIdentical
);
5972 #endif /* #if !UCONFIG_NO_COLLATION */