1 /********************************************************************
3 * Copyright (c) 1997-2004, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /********************************************************************************
10 * Modification History:
11 * Date Name Description
12 * Madhu Katragadda Ported for C API
13 * 02/19/01 synwee Modified test case for new collation iterator
14 *********************************************************************************/
16 * Collation Iterator tests.
17 * (Let me reiterate my position...)
20 #include "unicode/utypes.h"
22 #if !UCONFIG_NO_COLLATION
24 #include "unicode/ucol.h"
25 #include "unicode/uloc.h"
26 #include "unicode/uchar.h"
27 #include "unicode/ustring.h"
28 #include "unicode/putil.h"
40 extern uint8_t ucol_uprv_getCaseBits(const UChar
*, uint32_t, UErrorCode
*);
42 void addCollIterTest(TestNode
** root
)
44 addTest(root
, &TestPrevious
, "tscoll/citertst/TestPrevious");
45 addTest(root
, &TestOffset
, "tscoll/citertst/TestOffset");
46 addTest(root
, &TestSetText
, "tscoll/citertst/TestSetText");
47 addTest(root
, &TestMaxExpansion
, "tscoll/citertst/TestMaxExpansion");
48 addTest(root
, &TestUnicodeChar
, "tscoll/citertst/TestUnicodeChar");
49 addTest(root
, &TestNormalizedUnicodeChar
,
50 "tscoll/citertst/TestNormalizedUnicodeChar");
51 addTest(root
, &TestNormalization
, "tscoll/citertst/TestNormalization");
52 addTest(root
, &TestBug672
, "tscoll/citertst/TestBug672");
53 addTest(root
, &TestBug672Normalize
, "tscoll/citertst/TestBug672Normalize");
54 addTest(root
, &TestSmallBuffer
, "tscoll/citertst/TestSmallBuffer");
55 addTest(root
, &TestCEs
, "tscoll/citertst/TestCEs");
56 addTest(root
, &TestDiscontiguos
, "tscoll/citertst/TestDiscontiguos");
57 addTest(root
, &TestCEBufferOverflow
, "tscoll/citertst/TestCEBufferOverflow");
58 addTest(root
, &TestCEValidity
, "tscoll/citertst/TestCEValidity");
59 addTest(root
, &TestSortKeyValidity
, "tscoll/citertst/TestSortKeyValidity");
62 /* The locales we support */
64 static const char * LOCALES
[] = {"en_AU", "en_BE", "en_CA"};
66 static void TestBug672() {
67 UErrorCode status
= U_ZERO_ERROR
;
73 u_uastrcpy(pattern
, "resume");
74 u_uastrcpy(text
, "Time to resume updating my resume.");
76 for (i
= 0; i
< 3; ++ i
) {
77 UCollator
*coll
= ucol_open(LOCALES
[i
], &status
);
78 UCollationElements
*pitr
= ucol_openElements(coll
, pattern
, -1,
80 UCollationElements
*titer
= ucol_openElements(coll
, text
, -1,
82 if (U_FAILURE(status
)) {
83 log_err("ERROR: in creation of either the collator or the collation iterator :%s\n",
88 log_verbose("locale tested %s\n", LOCALES
[i
]);
90 while (ucol_next(pitr
, &status
) != UCOL_NULLORDER
&&
93 if (U_FAILURE(status
)) {
94 log_err("ERROR: reversing collation iterator :%s\n",
100 ucol_setOffset(titer
, u_strlen(pattern
), &status
);
101 if (U_FAILURE(status
)) {
102 log_err("ERROR: setting offset in collator :%s\n",
103 myErrorName(status
));
106 result
[i
][0] = ucol_getOffset(titer
);
107 log_verbose("Text iterator set to offset %d\n", result
[i
][0]);
110 ucol_previous(titer
, &status
);
111 result
[i
][1] = ucol_getOffset(titer
);
112 log_verbose("Current offset %d after previous\n", result
[i
][1]);
114 /* Add one to index */
115 log_verbose("Adding one to current offset...\n");
116 ucol_setOffset(titer
, ucol_getOffset(titer
) + 1, &status
);
117 if (U_FAILURE(status
)) {
118 log_err("ERROR: setting offset in collator :%s\n",
119 myErrorName(status
));
122 result
[i
][2] = ucol_getOffset(titer
);
123 log_verbose("Current offset in text = %d\n", result
[i
][2]);
124 ucol_closeElements(pitr
);
125 ucol_closeElements(titer
);
129 if (uprv_memcmp(result
[0], result
[1], 3) != 0 ||
130 uprv_memcmp(result
[1], result
[2], 3) != 0) {
131 log_err("ERROR: Different locales have different offsets at the same character\n");
137 /* Running this test with normalization enabled showed up a bug in the incremental
138 normalization code. */
139 static void TestBug672Normalize() {
140 UErrorCode status
= U_ZERO_ERROR
;
146 u_uastrcpy(pattern
, "resume");
147 u_uastrcpy(text
, "Time to resume updating my resume.");
149 for (i
= 0; i
< 3; ++ i
) {
150 UCollator
*coll
= ucol_open(LOCALES
[i
], &status
);
151 UCollationElements
*pitr
= NULL
;
152 UCollationElements
*titer
= NULL
;
154 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
156 pitr
= ucol_openElements(coll
, pattern
, -1, &status
);
157 titer
= ucol_openElements(coll
, text
, -1, &status
);
158 if (U_FAILURE(status
)) {
159 log_err("ERROR: in creation of either the collator or the collation iterator :%s\n",
160 myErrorName(status
));
164 log_verbose("locale tested %s\n", LOCALES
[i
]);
166 while (ucol_next(pitr
, &status
) != UCOL_NULLORDER
&&
169 if (U_FAILURE(status
)) {
170 log_err("ERROR: reversing collation iterator :%s\n",
171 myErrorName(status
));
176 ucol_setOffset(titer
, u_strlen(pattern
), &status
);
177 if (U_FAILURE(status
)) {
178 log_err("ERROR: setting offset in collator :%s\n",
179 myErrorName(status
));
182 result
[i
][0] = ucol_getOffset(titer
);
183 log_verbose("Text iterator set to offset %d\n", result
[i
][0]);
186 ucol_previous(titer
, &status
);
187 result
[i
][1] = ucol_getOffset(titer
);
188 log_verbose("Current offset %d after previous\n", result
[i
][1]);
190 /* Add one to index */
191 log_verbose("Adding one to current offset...\n");
192 ucol_setOffset(titer
, ucol_getOffset(titer
) + 1, &status
);
193 if (U_FAILURE(status
)) {
194 log_err("ERROR: setting offset in collator :%s\n",
195 myErrorName(status
));
198 result
[i
][2] = ucol_getOffset(titer
);
199 log_verbose("Current offset in text = %d\n", result
[i
][2]);
200 ucol_closeElements(pitr
);
201 ucol_closeElements(titer
);
205 if (uprv_memcmp(result
[0], result
[1], 3) != 0 ||
206 uprv_memcmp(result
[1], result
[2], 3) != 0) {
207 log_err("ERROR: Different locales have different offsets at the same character\n");
215 * Test for CollationElementIterator previous and next for the whole set of
216 * unicode characters.
218 static void TestUnicodeChar()
222 UCollationElements
*iter
;
223 UErrorCode status
= U_ZERO_ERROR
;
227 en_us
= ucol_open("en_US", &status
);
228 if (U_FAILURE(status
)){
229 log_err("ERROR: in creation of collation data using ucol_open()\n %s\n",
230 myErrorName(status
));
234 for (codepoint
= 1; codepoint
< 0xFFFE;)
238 while (codepoint
% 0xFF != 0)
240 if (u_isdefined(codepoint
))
241 *(test
++) = codepoint
;
245 if (u_isdefined(codepoint
))
246 *(test
++) = codepoint
;
248 if (codepoint
!= 0xFFFF)
252 iter
=ucol_openElements(en_us
, source
, u_strlen(source
), &status
);
253 if(U_FAILURE(status
)){
254 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
255 myErrorName(status
));
259 /* A basic test to see if it's working at all */
260 log_verbose("codepoint testing %x\n", codepoint
);
262 ucol_closeElements(iter
);
264 /* null termination test */
265 iter
=ucol_openElements(en_us
, source
, -1, &status
);
266 if(U_FAILURE(status
)){
267 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
268 myErrorName(status
));
272 /* A basic test to see if it's working at all */
274 ucol_closeElements(iter
);
281 * Test for CollationElementIterator previous and next for the whole set of
282 * unicode characters with normalization on.
284 static void TestNormalizedUnicodeChar()
288 UCollationElements
*iter
;
289 UErrorCode status
= U_ZERO_ERROR
;
293 /* thai should have normalization on */
294 th_th
= ucol_open("th_TH", &status
);
295 if (U_FAILURE(status
)){
296 log_err("ERROR: in creation of thai collation using ucol_open()\n %s\n",
297 myErrorName(status
));
301 for (codepoint
= 1; codepoint
< 0xFFFE;)
305 while (codepoint
% 0xFF != 0)
307 if (u_isdefined(codepoint
))
308 *(test
++) = codepoint
;
312 if (u_isdefined(codepoint
))
313 *(test
++) = codepoint
;
315 if (codepoint
!= 0xFFFF)
319 iter
=ucol_openElements(th_th
, source
, u_strlen(source
), &status
);
320 if(U_FAILURE(status
)){
321 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
322 myErrorName(status
));
328 ucol_closeElements(iter
);
330 iter
=ucol_openElements(th_th
, source
, -1, &status
);
331 if(U_FAILURE(status
)){
332 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
333 myErrorName(status
));
339 ucol_closeElements(iter
);
346 * Test the incremental normalization
348 static void TestNormalization()
350 UErrorCode status
= U_ZERO_ERROR
;
352 "&a < \\u0300\\u0315 < A\\u0300\\u0315 < \\u0316\\u0315B < \\u0316\\u0300\\u0315";
355 int rulelen
= u_unescape(str
, rule
, 50);
357 const char *testdata
[] =
358 {"\\u1ED9", "o\\u0323\\u0302",
359 "\\u0300\\u0315", "\\u0315\\u0300",
360 "A\\u0300\\u0315B", "A\\u0315\\u0300B",
361 "A\\u0316\\u0315B", "A\\u0315\\u0316B",
362 "\\u0316\\u0300\\u0315", "\\u0315\\u0300\\u0316",
363 "A\\u0316\\u0300\\u0315B", "A\\u0315\\u0300\\u0316B",
364 "\\u0316\\u0315\\u0300", "A\\u0316\\u0315\\u0300B"};
367 UCollationElements
*iter
;
369 coll
= ucol_openRules(rule
, rulelen
, UCOL_ON
, UCOL_TERTIARY
, NULL
, &status
);
370 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
371 if (U_FAILURE(status
)){
372 log_err("ERROR: in creation of collator using ucol_openRules()\n %s\n",
373 myErrorName(status
));
377 srclen
= u_unescape(testdata
[0], source
, 10);
378 iter
= ucol_openElements(coll
, source
, srclen
, &status
);
380 ucol_closeElements(iter
);
382 srclen
= u_unescape(testdata
[1], source
, 10);
383 iter
= ucol_openElements(coll
, source
, srclen
, &status
);
385 ucol_closeElements(iter
);
388 srclen
= u_unescape(testdata
[count
], source
, 10);
389 iter
= ucol_openElements(coll
, source
, srclen
, &status
);
391 if (U_FAILURE(status
)){
392 log_err("ERROR: in creation of collator element iterator\n %s\n",
393 myErrorName(status
));
397 ucol_closeElements(iter
);
399 iter
= ucol_openElements(coll
, source
, -1, &status
);
401 if (U_FAILURE(status
)){
402 log_err("ERROR: in creation of collator element iterator\n %s\n",
403 myErrorName(status
));
407 ucol_closeElements(iter
);
414 * Test for CollationElementIterator.previous()
416 * @bug 4108758 - Make sure it works with contracting characters
419 static void TestPrevious()
421 UCollator
*coll
=NULL
;
424 UCollator
*c1
, *c2
, *c3
;
425 UCollationElements
*iter
;
426 UErrorCode status
= U_ZERO_ERROR
;
428 test1
=(UChar
*)malloc(sizeof(UChar
) * 50);
429 test2
=(UChar
*)malloc(sizeof(UChar
) * 50);
430 u_uastrcpy(test1
, "What subset of all possible test cases?");
431 u_uastrcpy(test2
, "has the highest probability of detecting");
432 coll
= ucol_open("en_US", &status
);
434 iter
=ucol_openElements(coll
, test1
, u_strlen(test1
), &status
);
435 log_verbose("English locale testing back and forth\n");
436 if(U_FAILURE(status
)){
437 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
438 myErrorName(status
));
442 /* A basic test to see if it's working at all */
444 ucol_closeElements(iter
);
447 /* Test with a contracting character sequence */
448 u_uastrcpy(rule
, "&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH");
449 c1
= ucol_openRules(rule
, u_strlen(rule
), UCOL_OFF
, UCOL_DEFAULT_STRENGTH
, NULL
, &status
);
451 log_verbose("Contraction rule testing back and forth with no normalization\n");
453 if (c1
== NULL
|| U_FAILURE(status
))
455 log_err("Couldn't create a RuleBasedCollator with a contracting sequence\n %s\n",
456 myErrorName(status
));
459 source
=(UChar
*)malloc(sizeof(UChar
) * 20);
460 u_uastrcpy(source
, "abchdcba");
461 iter
=ucol_openElements(c1
, source
, u_strlen(source
), &status
);
462 if(U_FAILURE(status
)){
463 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
464 myErrorName(status
));
468 ucol_closeElements(iter
);
471 /* Test with an expanding character sequence */
472 u_uastrcpy(rule
, "&a < b < c/abd < d");
473 c2
= ucol_openRules(rule
, u_strlen(rule
), UCOL_OFF
, UCOL_DEFAULT_STRENGTH
, NULL
, &status
);
474 log_verbose("Expansion rule testing back and forth with no normalization\n");
475 if (c2
== NULL
|| U_FAILURE(status
))
477 log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
478 myErrorName(status
));
481 u_uastrcpy(source
, "abcd");
482 iter
=ucol_openElements(c2
, source
, u_strlen(source
), &status
);
483 if(U_FAILURE(status
)){
484 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
485 myErrorName(status
));
489 ucol_closeElements(iter
);
492 u_uastrcpy(rule
, "&a < b < c/aba < d < z < ch");
493 c3
= ucol_openRules(rule
, u_strlen(rule
), UCOL_DEFAULT
, UCOL_DEFAULT_STRENGTH
,NULL
, &status
);
494 log_verbose("Expansion/contraction rule testing back and forth with no normalization\n");
496 if (c3
== NULL
|| U_FAILURE(status
))
498 log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
499 myErrorName(status
));
502 u_uastrcpy(source
, "abcdbchdc");
503 iter
=ucol_openElements(c3
, source
, u_strlen(source
), &status
);
504 if(U_FAILURE(status
)){
505 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
506 myErrorName(status
));
510 ucol_closeElements(iter
);
522 coll
= ucol_open("th_TH", &status
);
523 log_verbose("Thai locale testing back and forth with normalization\n");
524 iter
=ucol_openElements(coll
, source
, u_strlen(source
), &status
);
525 if(U_FAILURE(status
)){
526 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
527 myErrorName(status
));
531 ucol_closeElements(iter
);
541 coll
= ucol_open("ja_JP", &status
);
542 log_verbose("Japanese locale testing back and forth with normalization\n");
543 iter
=ucol_openElements(coll
, source
, u_strlen(source
), &status
);
544 if(U_FAILURE(status
)){
545 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
546 myErrorName(status
));
550 ucol_closeElements(iter
);
559 * Test for getOffset() and setOffset()
561 static void TestOffset()
563 UErrorCode status
= U_ZERO_ERROR
;
564 UCollator
*en_us
=NULL
;
565 UCollationElements
*iter
, *pristine
;
568 int32_t orderLength
=0;
570 test1
=(UChar
*)malloc(sizeof(UChar
) * 50);
571 test2
=(UChar
*)malloc(sizeof(UChar
) * 50);
572 u_uastrcpy(test1
, "What subset of all possible test cases?");
573 u_uastrcpy(test2
, "has the highest probability of detecting");
574 en_us
= ucol_open("en_US", &status
);
575 log_verbose("Testing getOffset and setOffset for collations\n");
576 iter
= ucol_openElements(en_us
, test1
, u_strlen(test1
), &status
);
577 if(U_FAILURE(status
)){
578 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
579 myErrorName(status
));
584 /* testing boundaries */
585 ucol_setOffset(iter
, 0, &status
);
586 if (U_FAILURE(status
) || ucol_previous(iter
, &status
) != UCOL_NULLORDER
) {
587 log_err("Error: After setting offset to 0, we should be at the end "
588 "of the backwards iteration");
590 ucol_setOffset(iter
, u_strlen(test1
), &status
);
591 if (U_FAILURE(status
) || ucol_next(iter
, &status
) != UCOL_NULLORDER
) {
592 log_err("Error: After setting offset to end of the string, we should "
593 "be at the end of the backwards iteration");
596 /* Run all the way through the iterator, then get the offset */
598 orders
= getOrders(iter
, &orderLength
);
600 offset
= ucol_getOffset(iter
);
602 if (offset
!= u_strlen(test1
))
604 log_err("offset at end != length %d vs %d\n", offset
,
608 /* Now set the offset back to the beginning and see if it works */
609 pristine
=ucol_openElements(en_us
, test1
, u_strlen(test1
), &status
);
610 if(U_FAILURE(status
)){
611 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
612 myErrorName(status
));
616 status
= U_ZERO_ERROR
;
618 ucol_setOffset(iter
, 0, &status
);
619 if (U_FAILURE(status
))
621 log_err("setOffset failed. %s\n", myErrorName(status
));
625 assertEqual(iter
, pristine
);
628 ucol_closeElements(pristine
);
629 ucol_closeElements(iter
);
632 /* testing offsets in normalization buffer */
638 ucol_setAttribute(en_us
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
639 iter
= ucol_openElements(en_us
, test1
, 4, &status
);
640 if(U_FAILURE(status
)){
641 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
642 myErrorName(status
));
648 while (ucol_next(iter
, &status
) != UCOL_NULLORDER
&&
652 if (ucol_getOffset(iter
) != 1) {
653 log_err("ERROR: Offset of iteration should be 0\n");
657 if (ucol_getOffset(iter
) != 4) {
658 log_err("ERROR: Offset of iteration should be 4\n");
662 if (ucol_getOffset(iter
) != 3) {
663 log_err("ERROR: Offset of iteration should be 3\n");
671 while (ucol_previous(iter
, &status
) != UCOL_NULLORDER
&&
675 if (ucol_getOffset(iter
) != 3) {
676 log_err("ERROR: Offset of iteration should be 3\n");
680 if (ucol_getOffset(iter
) != 0) {
681 log_err("ERROR: Offset of iteration should be 0\n");
687 if(U_FAILURE(status
)){
688 log_err("ERROR: in iterating collation elements %s\n",
689 myErrorName(status
));
692 ucol_closeElements(iter
);
701 static void TestSetText()
704 UErrorCode status
= U_ZERO_ERROR
;
705 UCollator
*en_us
=NULL
;
706 UCollationElements
*iter1
, *iter2
;
707 test1
=(UChar
*)malloc(sizeof(UChar
) * 50);
708 test2
=(UChar
*)malloc(sizeof(UChar
) * 50);
709 u_uastrcpy(test1
, "What subset of all possible test cases?");
710 u_uastrcpy(test2
, "has the highest probability of detecting");
711 en_us
= ucol_open("en_US", &status
);
712 log_verbose("testing setText for Collation elements\n");
713 iter1
=ucol_openElements(en_us
, test1
, u_strlen(test1
), &status
);
714 if(U_FAILURE(status
)){
715 log_err("ERROR: in creation of collation element iterator1 using ucol_openElements()\n %s\n",
716 myErrorName(status
));
720 iter2
=ucol_openElements(en_us
, test2
, u_strlen(test2
), &status
);
721 if(U_FAILURE(status
)){
722 log_err("ERROR: in creation of collation element iterator2 using ucol_openElements()\n %s\n",
723 myErrorName(status
));
728 /* Run through the second iterator just to exercise it */
729 c
= ucol_next(iter2
, &status
);
732 while ( ++i
< 10 && (c
!= UCOL_NULLORDER
))
734 if (U_FAILURE(status
))
736 log_err("iter2->next() returned an error. %s\n", myErrorName(status
));
737 ucol_closeElements(iter2
);
738 ucol_closeElements(iter1
);
743 c
= ucol_next(iter2
, &status
);
746 /* Now set it to point to the same string as the first iterator */
747 ucol_setText(iter2
, test1
, u_strlen(test1
), &status
);
748 if (U_FAILURE(status
))
750 log_err("call to iter2->setText(test1) failed. %s\n", myErrorName(status
));
754 assertEqual(iter1
, iter2
);
757 /* Now set it to point to a null string with fake length*/
758 ucol_setText(iter2
, NULL
, 2, &status
);
759 if (U_FAILURE(status
))
761 log_err("call to iter2->setText(null) failed. %s\n", myErrorName(status
));
765 if (ucol_next(iter2
, &status
) != UCOL_NULLORDER
) {
766 log_err("iter2 with null text expected to return UCOL_NULLORDER\n");
770 ucol_closeElements(iter2
);
771 ucol_closeElements(iter1
);
778 * Test for getMaxExpansion()
780 static void TestMaxExpansion()
782 UErrorCode status
= U_ZERO_ERROR
;
783 UCollator
*coll
;/*= ucol_open("en_US", &status);*/
785 UChar32 unassigned
= 0xEFFFD;
786 UChar supplementary
[2];
788 UBool isError
= FALSE
;
790 UCollationElements
*iter
;/*= ucol_openElements(coll, &ch, 1, &status);*/
791 uint32_t temporder
= 0;
794 u_uastrcpy(rule
, "&a < ab < c/aba < d < z < ch");
795 coll
= ucol_openRules(rule
, u_strlen(rule
), UCOL_DEFAULT
,
796 UCOL_DEFAULT_STRENGTH
,NULL
, &status
);
797 if(U_SUCCESS(status
) && coll
) {
798 iter
= ucol_openElements(coll
, &ch
, 1, &status
);
800 while (ch
< 0xFFFF && U_SUCCESS(status
)) {
807 ucol_setText(iter
, &ch
, 1, &status
);
808 order
= ucol_previous(iter
, &status
);
810 /* thai management */
812 order
= ucol_previous(iter
, &status
);
814 while (U_SUCCESS(status
) &&
815 ucol_previous(iter
, &status
) != UCOL_NULLORDER
) {
819 size
= ucol_getMaxExpansion(iter
, order
);
820 if (U_FAILURE(status
) || size
< count
) {
821 log_err("Failure at codepoint %d, maximum expansion count < %d\n",
826 /* testing for exact max expansion */
831 ucol_setText(iter
, &ch
, 1, &status
);
832 order
= ucol_previous(iter
, &status
);
833 size
= ucol_getMaxExpansion(iter
, order
);
834 if (U_FAILURE(status
) || size
!= 1) {
835 log_err("Failure at codepoint %d, maximum expansion count < %d\n",
842 ucol_setText(iter
, &ch
, 1, &status
);
843 temporder
= ucol_previous(iter
, &status
);
845 if (U_FAILURE(status
) || ucol_getMaxExpansion(iter
, temporder
) != 3) {
846 log_err("Failure at codepoint %d, maximum expansion count != %d\n",
851 ucol_setText(iter
, &ch
, 1, &status
);
852 temporder
= ucol_previous(iter
, &status
);
854 if (U_FAILURE(status
) || ucol_getMaxExpansion(iter
, temporder
) != 1) {
855 log_err("Failure at codepoint %d, maximum expansion count != %d\n",
859 U16_APPEND(supplementary
, index
, 2, unassigned
, isError
);
860 ucol_setText(iter
, supplementary
, 2, &status
);
861 sorder
= ucol_previous(iter
, &status
);
863 if (U_FAILURE(status
) || ucol_getMaxExpansion(iter
, sorder
) != 2) {
864 log_err("Failure at codepoint %d, maximum expansion count < %d\n",
871 ucol_setText(iter
, &ch
, 1, &status
);
872 temporder
= ucol_previous(iter
, &status
);
873 if (U_FAILURE(status
) || ucol_getMaxExpansion(iter
, temporder
) > 3) {
874 log_err("Failure at codepoint %d, maximum expansion count > %d\n",
878 ucol_closeElements(iter
);
881 /* testing special jamo &a<\u1160 */
893 coll
= ucol_openRules(rule
, u_strlen(rule
), UCOL_DEFAULT
,
894 UCOL_DEFAULT_STRENGTH
,NULL
, &status
);
895 iter
= ucol_openElements(coll
, &ch
, 1, &status
);
897 temporder
= ucol_previous(iter
, &status
);
898 if (U_FAILURE(status
) || ucol_getMaxExpansion(iter
, temporder
) != 6) {
899 log_err("Failure at codepoint %d, maximum expansion count > %d\n",
903 ucol_closeElements(iter
);
906 log_data_err("Couldn't open collator\n");
912 static void assertEqual(UCollationElements
*i1
, UCollationElements
*i2
)
916 UErrorCode status
= U_ZERO_ERROR
;
920 c1
= ucol_next(i1
, &status
);
921 c2
= ucol_next(i2
, &status
);
925 log_err("Error in iteration %d assetEqual between\n %d and %d, they are not equal\n", count
, c1
, c2
);
931 while (c1
!= UCOL_NULLORDER
);
935 * Testing iterators with extremely small buffers
937 static void TestSmallBuffer()
939 UErrorCode status
= U_ZERO_ERROR
;
941 UCollationElements
*testiter
,
948 UChar str
[] = {0x300, 0x31A, 0};
950 creating a long string of decomposable characters,
951 since by default the writable buffer is of size 256
953 while (count
< 500) {
954 if ((count
& 1) == 0) {
955 teststr
[count
++] = 0x300;
958 teststr
[count
++] = 0x31A;
962 coll
= ucol_open("th_TH", &status
);
963 if(U_SUCCESS(status
) && coll
) {
964 testiter
= ucol_openElements(coll
, teststr
, 500, &status
);
965 iter
= ucol_openElements(coll
, str
, 2, &status
);
967 orders
= getOrders(iter
, &count
);
969 log_err("Error collation elements size is not 2 for \\u0300\\u031A\n");
973 this will rearrange the string data to 250 characters of 0x300 first then
974 250 characters of 0x031A
976 testorders
= getOrders(testiter
, &count
);
979 log_err("Error decomposition does not give the right sized collation elements\n");
983 /* UCA collation element for 0x0F76 */
984 if ((count
> 250 && testorders
[-- count
] != orders
[1]) ||
985 (count
<= 250 && testorders
[-- count
] != orders
[0])) {
986 log_err("Error decomposition does not give the right collation element at %d count\n", count
);
994 ucol_reset(testiter
);
995 /* ensures that the writable buffer was cleared */
996 if (testiter
->iteratordata_
.writableBuffer
!=
997 testiter
->iteratordata_
.stackWritableBuffer
) {
998 log_err("Error Writable buffer in collation element iterator not reset\n");
1001 /* ensures closing of elements done properly to clear writable buffer */
1002 ucol_next(testiter
, &status
);
1003 ucol_next(testiter
, &status
);
1004 ucol_closeElements(testiter
);
1005 ucol_closeElements(iter
);
1008 log_data_err("Couldn't open collator\n");
1013 * Sniplets of code from genuca
1015 static int32_t hex2num(char hex
) {
1016 if(hex
>='0' && hex
<='9') {
1018 } else if(hex
>='a' && hex
<='f') {
1020 } else if(hex
>='A' && hex
<='F') {
1028 * Getting codepoints from a string
1029 * @param str character string contain codepoints seperated by space and ended
1031 * @param codepoints array for storage, assuming size > 5
1032 * @return position at the end of the codepoint section
1034 static char * getCodePoints(char *str
, UChar
*codepoints
) {
1035 char *pStartCP
= str
;
1036 char *pEndCP
= str
+ 4;
1038 *codepoints
= (UChar
)((hex2num(*pStartCP
) << 12) |
1039 (hex2num(*(pStartCP
+ 1)) << 8) |
1040 (hex2num(*(pStartCP
+ 2)) << 4) |
1041 (hex2num(*(pStartCP
+ 3))));
1043 while (*pEndCP
!= ';') {
1044 pStartCP
= pEndCP
+ 1;
1045 *codepoints
= (UChar
)((hex2num(*pStartCP
) << 12) |
1046 (hex2num(*(pStartCP
+ 1)) << 8) |
1047 (hex2num(*(pStartCP
+ 2)) << 4) |
1048 (hex2num(*(pStartCP
+ 3))));
1050 pEndCP
= pStartCP
+ 4;
1057 * Sniplets of code from genuca
1060 readElement(char **from
, char *to
, char separator
, UErrorCode
*status
)
1062 if (U_SUCCESS(*status
)) {
1065 while (**from
!= separator
) {
1066 if (**from
!= ' ') {
1067 *(buffer
+i
++) = **from
;
1081 * Sniplets of code from genuca
1084 getSingleCEValue(char *primary
, char *secondary
, char *tertiary
,
1087 if (U_SUCCESS(*status
)) {
1089 char primsave
= '\0';
1090 char secsave
= '\0';
1091 char tersave
= '\0';
1092 char *primend
= primary
+4;
1093 char *secend
= secondary
+2;
1094 char *terend
= tertiary
+2;
1099 if (uprv_strlen(primary
) > 4) {
1100 primsave
= *primend
;
1104 if (uprv_strlen(secondary
) > 2) {
1109 if (uprv_strlen(tertiary
) > 2) {
1114 primvalue
= (*primary
!='\0')?uprv_strtoul(primary
, &primend
, 16):0;
1115 secvalue
= (*secondary
!='\0')?uprv_strtoul(secondary
, &secend
, 16):0;
1116 tervalue
= (*tertiary
!='\0')?uprv_strtoul(tertiary
, &terend
, 16):0;
1117 if(primvalue
<= 0xFF) {
1121 value
= ((primvalue
<< UCOL_PRIMARYORDERSHIFT
) & UCOL_PRIMARYORDERMASK
)
1122 | ((secvalue
<< UCOL_SECONDARYORDERSHIFT
) & UCOL_SECONDARYORDERMASK
)
1123 | (tervalue
& UCOL_TERTIARYORDERMASK
);
1125 if(primsave
!='\0') {
1126 *primend
= primsave
;
1140 * Getting collation elements generated from a string
1141 * @param str character string contain collation elements contained in [] and
1142 * seperated by space
1143 * @param ce array for storage, assuming size > 20
1144 * @param status error status
1145 * @return position at the end of the codepoint section
1147 static char * getCEs(char *str
, uint32_t *ces
, UErrorCode
*status
) {
1148 char *pStartCP
= uprv_strchr(str
, '[');
1152 char secondary
[100];
1155 while (*pStartCP
== '[') {
1156 uint32_t primarycount
= 0;
1157 uint32_t secondarycount
= 0;
1158 uint32_t tertiarycount
= 0;
1160 pEndCP
= strchr(pStartCP
, ']');
1161 if(pEndCP
== NULL
) {
1166 primarycount
= readElement(&pStartCP
, primary
, ',', status
);
1167 secondarycount
= readElement(&pStartCP
, secondary
, ',', status
);
1168 tertiarycount
= readElement(&pStartCP
, tertiary
, ']', status
);
1170 /* I want to get the CEs entered right here, including continuation */
1171 ces
[count
++] = getSingleCEValue(primary
, secondary
, tertiary
, status
);
1172 if (U_FAILURE(*status
)) {
1176 while (2 * CEi
< primarycount
|| CEi
< secondarycount
||
1177 CEi
< tertiarycount
) {
1178 uint32_t value
= UCOL_CONTINUATION_MARKER
; /* Continuation marker */
1179 if (2 * CEi
< primarycount
) {
1180 value
|= ((hex2num(*(primary
+ 4 * CEi
)) & 0xF) << 28);
1181 value
|= ((hex2num(*(primary
+ 4 * CEi
+ 1)) & 0xF) << 24);
1184 if (2 * CEi
+ 1 < primarycount
) {
1185 value
|= ((hex2num(*(primary
+ 4 * CEi
+ 2)) & 0xF) << 20);
1186 value
|= ((hex2num(*(primary
+ 4 * CEi
+ 3)) &0xF) << 16);
1189 if (CEi
< secondarycount
) {
1190 value
|= ((hex2num(*(secondary
+ 2 * CEi
)) & 0xF) << 12);
1191 value
|= ((hex2num(*(secondary
+ 2 * CEi
+ 1)) & 0xF) << 8);
1194 if (CEi
< tertiarycount
) {
1195 value
|= ((hex2num(*(tertiary
+ 2 * CEi
)) & 0x3) << 4);
1196 value
|= (hex2num(*(tertiary
+ 2 * CEi
+ 1)) & 0xF);
1200 ces
[count
++] = value
;
1203 pStartCP
= pEndCP
+ 1;
1210 * Getting the FractionalUCA.txt file stream
1212 static FileStream
* getFractionalUCA(void)
1215 char backupPath
[256];
1216 FileStream
*result
= NULL
;
1218 /* Look inside ICU_DATA first */
1219 uprv_strcpy(newPath
, ctest_dataSrcDir());
1220 uprv_strcat(newPath
, "unidata" U_FILE_SEP_STRING
);
1221 uprv_strcat(newPath
, "FractionalUCA.txt");
1223 /* As a fallback, try to guess where the source data was located
1224 * at the time ICU was built, and look there.
1226 #if defined (U_TOPSRCDIR)
1227 strcpy(backupPath
, U_TOPSRCDIR U_FILE_SEP_STRING
"data");
1230 UErrorCode errorCode
= U_ZERO_ERROR
;
1231 strcpy(backupPath
, loadTestData(&errorCode
));
1232 strcat(backupPath
, U_FILE_SEP_STRING
".." U_FILE_SEP_STRING
".." U_FILE_SEP_STRING
".." U_FILE_SEP_STRING
".." U_FILE_SEP_STRING
"data");
1235 strcat(backupPath
, U_FILE_SEP_STRING
"unidata" U_FILE_SEP_STRING
"FractionalUCA.txt");
1237 result
= T_FileStream_open(newPath
, "rb");
1239 if (result
== NULL
) {
1240 result
= T_FileStream_open(backupPath
, "rb");
1241 if (result
== NULL
) {
1242 log_err("Failed to open either %s or %s\n", newPath
, backupPath
);
1249 * Testing the CEs returned by the iterator
1251 static void TestCEs() {
1252 FileStream
*file
= NULL
;
1255 UChar codepoints
[5];
1257 UErrorCode status
= U_ZERO_ERROR
;
1258 UCollator
*coll
= ucol_open("", &status
);
1259 uint32_t lineNo
= 0;
1261 if (U_FAILURE(status
)) {
1262 log_err("Error in opening root collator\n");
1266 file
= getFractionalUCA();
1269 log_err("*** unable to open input FractionalUCA.txt file ***\n");
1274 while (T_FileStream_readLine(file
, line
, sizeof(line
)) != NULL
) {
1276 UCollationElements
*iter
;
1278 /* skip this line if it is empty or a comment or is a return value
1279 or start of some variable section */
1280 if(line
[0] == 0 || line
[0] == '#' || line
[0] == '\n' ||
1281 line
[0] == 0x000D || line
[0] == '[') {
1285 str
= getCodePoints(line
, codepoints
);
1287 /* these are 'fake' codepoints in the fractional UCA, and are used just
1288 * for positioning of indirect values. They should not go through this
1291 if(*codepoints
== 0xFDD0) {
1295 getCEs(str
, ces
, &status
);
1296 if (U_FAILURE(status
)) {
1297 log_err("Error in parsing collation elements in FractionalUCA.txt\n");
1300 iter
= ucol_openElements(coll
, codepoints
, -1, &status
);
1301 if (U_FAILURE(status
)) {
1302 log_err("Error in opening collation elements\n");
1306 uint32_t ce
= (uint32_t)ucol_next(iter
, &status
);
1307 if (ce
== 0xFFFFFFFF) {
1310 /* we now unconditionally reorder Thai/Lao prevowels, so this
1311 * test would fail if we don't skip here.
1313 if(UCOL_ISTHAIPREVOWEL(*codepoints
) && ce
== 0 && count
== 0) {
1316 if (ce
!= ces
[count
] || U_FAILURE(status
)) {
1317 log_err("Collation elements in FractionalUCA.txt and iterators do not match!\n");
1320 if (ces
[count
] == 0) {
1325 ucol_closeElements(iter
);
1328 T_FileStream_close(file
);
1333 * Testing the discontigous contractions
1335 static void TestDiscontiguos() {
1336 const char *rulestr
=
1337 "&z < AB < X\\u0300 < ABC < X\\u0300\\u0315";
1339 int rulelen
= u_unescape(rulestr
, rule
, 50);
1340 const char *src
[] = {
1341 "ADB", "ADBC", "A\\u0315B", "A\\u0315BC",
1342 /* base character blocked */
1343 "XD\\u0300", "XD\\u0300\\u0315",
1344 /* non blocking combining character */
1345 "X\\u0319\\u0300", "X\\u0319\\u0300\\u0315",
1346 /* blocking combining character */
1347 "X\\u0314\\u0300", "X\\u0314\\u0300\\u0315",
1348 /* contraction prefix */
1349 "ABDC", "AB\\u0315C","X\\u0300D\\u0315", "X\\u0300\\u0319\\u0315",
1350 "X\\u0300\\u031A\\u0315",
1351 /* ends not with a contraction character */
1352 "X\\u0319\\u0300D", "X\\u0319\\u0300\\u0315D", "X\\u0300D\\u0315D",
1353 "X\\u0300\\u0319\\u0315D", "X\\u0300\\u031A\\u0315D"
1355 const char *tgt
[] = {
1356 /* non blocking combining character */
1357 "A D B", "A D BC", "A \\u0315 B", "A \\u0315 BC",
1358 /* base character blocked */
1359 "X D \\u0300", "X D \\u0300\\u0315",
1360 /* non blocking combining character */
1361 "X\\u0300 \\u0319", "X\\u0300\\u0315 \\u0319",
1362 /* blocking combining character */
1363 "X \\u0314 \\u0300", "X \\u0314 \\u0300\\u0315",
1364 /* contraction prefix */
1365 "AB DC", "AB \\u0315 C","X\\u0300 D \\u0315", "X\\u0300\\u0315 \\u0319",
1366 "X\\u0300 \\u031A \\u0315",
1367 /* ends not with a contraction character */
1368 "X\\u0300 \\u0319D", "X\\u0300\\u0315 \\u0319D", "X\\u0300 D\\u0315D",
1369 "X\\u0300\\u0315 \\u0319D", "X\\u0300 \\u031A\\u0315D"
1373 UErrorCode status
= U_ZERO_ERROR
;
1375 UCollationElements
*iter
;
1376 UCollationElements
*resultiter
;
1378 coll
= ucol_openRules(rule
, rulelen
, UCOL_OFF
, UCOL_DEFAULT_STRENGTH
,NULL
, &status
);
1379 iter
= ucol_openElements(coll
, rule
, 1, &status
);
1380 resultiter
= ucol_openElements(coll
, rule
, 1, &status
);
1382 if (U_FAILURE(status
)) {
1383 log_err("Error opening collation rules\n");
1387 while (count
< size
) {
1390 int strLen
= u_unescape(src
[count
], str
, 20);
1393 ucol_setText(iter
, str
, strLen
, &status
);
1394 if (U_FAILURE(status
)) {
1395 log_err("Error opening collation iterator\n");
1399 u_unescape(tgt
[count
], tstr
, 20);
1402 log_verbose("count %d\n", count
);
1406 UChar
*e
= u_strchr(s
, 0x20);
1410 ucol_setText(resultiter
, s
, (int32_t)(e
- s
), &status
);
1411 ce
= ucol_next(resultiter
, &status
);
1412 if (U_FAILURE(status
)) {
1413 log_err("Error manipulating collation iterator\n");
1416 while (ce
!= UCOL_NULLORDER
) {
1417 if (ce
!= (uint32_t)ucol_next(iter
, &status
) ||
1418 U_FAILURE(status
)) {
1419 log_err("Discontiguos contraction test mismatch\n");
1422 ce
= ucol_next(resultiter
, &status
);
1423 if (U_FAILURE(status
)) {
1424 log_err("Error getting next collation element\n");
1437 ucol_closeElements(resultiter
);
1438 ucol_closeElements(iter
);
1442 static void TestCEBufferOverflow()
1444 UChar str
[UCOL_EXPAND_CE_BUFFER_SIZE
+ 1];
1445 UErrorCode status
= U_ZERO_ERROR
;
1448 UCollationElements
*iter
;
1450 u_uastrcpy(rule
, "&z < AB");
1451 coll
= ucol_openRules(rule
, u_strlen(rule
), UCOL_OFF
, UCOL_DEFAULT_STRENGTH
, NULL
,&status
);
1452 if (U_FAILURE(status
)) {
1453 log_err("Rule based collator not created for testing ce buffer overflow\n");
1457 /* 0xDCDC is a trail surrogate hence deemed unsafe by the heuristic
1458 test. this will cause an overflow in getPrev */
1459 str
[0] = 0x0041; /* 'A' */
1460 /*uprv_memset(str + 1, 0xE0, sizeof(UChar) * UCOL_EXPAND_CE_BUFFER_SIZE);*/
1461 uprv_memset(str
+ 1, 0xDC, sizeof(UChar
) * UCOL_EXPAND_CE_BUFFER_SIZE
);
1462 str
[UCOL_EXPAND_CE_BUFFER_SIZE
] = 0x0042; /* 'B' */
1463 iter
= ucol_openElements(coll
, str
, UCOL_EXPAND_CE_BUFFER_SIZE
+ 1,
1465 if (ucol_previous(iter
, &status
) != UCOL_NULLORDER
||
1466 status
!= U_BUFFER_OVERFLOW_ERROR
) {
1467 log_err("CE buffer expected to overflow with long string of trail surrogates\n");
1469 ucol_closeElements(iter
);
1474 * Byte bounds checks. Checks if each byte in data is between upper and lower
1477 static UBool
checkByteBounds(uint32_t data
, char upper
, char lower
)
1481 char b
= (char)(data
& 0xFF);
1482 if (b
> upper
|| b
< lower
) {
1492 * Determines case of the string of codepoints.
1493 * If it is a multiple codepoints it has to treated as a contraction.
1496 static uint8_t getCase(const UChar
*s
, uint32_t len
) {
1497 UBool lower
= FALSE
;
1498 UBool upper
= FALSE
;
1499 UBool title
= FALSE
;
1500 UErrorCode status
= U_ZERO_ERROR
;
1502 const UChar
*ps
= s
;
1505 return UCOL_LOWER_CASE
;
1523 if ((lower
&& !upper
&& !title
) || (!lower
&& !upper
&& !title
)){
1524 return UCOL_LOWER_CASE
;
1526 if (upper
&& !lower
&& !title
) {
1527 return UCOL_UPPER_CASE
;
1529 /* mix of cases here */
1530 /* len = unorm_normalize(s, len, UNORM_NFKD, 0, str, 256, &status);
1531 if (U_FAILURE(status)) {
1532 log_err("Error normalizing data string\n");
1533 return UCOL_LOWER_CASE;
1536 if ((title
&& len
>= 2) || (lower
&& upper
)) {
1537 return UCOL_MIXED_CASE
;
1539 if (u_isupper(s
[0])) {
1540 return UCOL_UPPER_CASE
;
1542 return UCOL_LOWER_CASE
;
1547 * Checking collation element validity given the boundary arguments.
1549 static UBool
checkCEValidity(const UCollator
*coll
, const UChar
*codepoints
,
1550 int length
, uint32_t primarymax
,
1551 uint32_t secondarymax
)
1553 UErrorCode status
= U_ZERO_ERROR
;
1554 UCollationElements
*iter
= ucol_openElements(coll
, codepoints
, length
,
1559 UBool upper = FALSE;
1560 UBool lower = FALSE;
1563 if (U_FAILURE(status
)) {
1564 log_err("Error creating iterator for testing validity\n");
1567 ce
= ucol_next(iter
, &status
);
1569 while (ce
!= UCOL_NULLORDER
) {
1571 uint32_t primary
= UCOL_PRIMARYORDER(ce
);
1572 uint32_t secondary
= UCOL_SECONDARYORDER(ce
);
1573 uint32_t tertiary
= UCOL_TERTIARYORDER(ce
);
1574 /* uint32_t scasebits = tertiary & 0xC0;*/
1576 if ((tertiary
== 0 && secondary
!= 0) ||
1577 (tertiary
< 0xC0 && secondary
== 0 && primary
!= 0)) {
1578 /* n-1th level is not zero when the nth level is
1579 except for continuations, this is wrong */
1580 log_err("Lower level weight not 0 when high level weight is 0\n");
1584 /* checks if any byte is illegal ie = 01 02 03. */
1585 if (checkByteBounds(ce
, 0x3, 0x1)) {
1586 log_err("Byte range in CE lies in illegal bounds 0x1 - 0x3\n");
1590 if ((primary
!= 0 && primary
< primarymax
)
1591 || ((primary
& 0xFF) == 0xFF) || (((primary
>>8) & 0xFF) == 0xFF)
1592 || ((primary
& 0xFF) && ((primary
& 0xFF) <= 0x03))
1593 || (((primary
>>8) & 0xFF) && ((primary
>>8) & 0xFF) <= 0x03)
1594 || (primary
>= 0xFE00 && !isContinuation(ce
))) {
1595 log_err("UCA primary weight out of bounds: %04X for string starting with %04X\n",
1596 primary
, codepoints
[0]);
1599 /* case matching not done since data generated by ken */
1601 if (secondary
>= 6 && secondary
<= secondarymax
) {
1602 log_err("Secondary weight out of range\n");
1608 ce
= ucol_next(iter
, &status
);
1610 ucol_closeElements(iter
);
1613 ucol_closeElements(iter
);
1617 static void TestCEValidity()
1619 /* testing UCA collation elements */
1620 UErrorCode status
= U_ZERO_ERROR
;
1621 /* en_US has no tailorings */
1622 UCollator
*coll
= ucol_open("root", &status
);
1623 /* tailored locales */
1624 char locale
[][11] = {"fr_FR", "ko_KR", "sh_YU", "th_TH", "zh_CN", "zh__PINYIN"};
1626 FileStream
*file
= getFractionalUCA();
1628 UChar codepoints
[10];
1631 UParseError parseError
;
1632 if (U_FAILURE(status
)) {
1633 log_err("en_US collator creation failed\n");
1636 log_verbose("Testing UCA elements\n");
1638 log_err("Fractional UCA data can not be opened\n");
1642 while (T_FileStream_readLine(file
, line
, sizeof(line
)) != NULL
) {
1643 if(line
[0] == 0 || line
[0] == '#' || line
[0] == '\n' ||
1644 line
[0] == 0x000D || line
[0] == '[') {
1648 getCodePoints(line
, codepoints
);
1649 checkCEValidity(coll
, codepoints
, u_strlen(codepoints
), 5, 86);
1652 log_verbose("Testing UCA elements for the whole range of unicode characters\n");
1654 while (codepoints
[0] < 0xFFFF) {
1655 if (u_isdefined((UChar32
)codepoints
[0])) {
1656 checkCEValidity(coll
, codepoints
, 1, 5, 86);
1663 /* testing tailored collation elements */
1664 log_verbose("Testing tailored elements\n");
1666 maxCount
= sizeof(locale
)/sizeof(locale
[0]);
1668 maxCount
= uloc_countAvailable();
1670 while (count
< maxCount
) {
1671 const UChar
*rules
= NULL
,
1673 UChar
*rulesCopy
= NULL
;
1674 int32_t ruleLen
= 0;
1676 uint32_t chOffset
= 0;
1678 uint32_t exOffset
= 0;
1680 uint32_t prefixOffset
= 0;
1681 uint32_t prefixLen
= 0;
1682 UBool startOfRules
= TRUE
;
1685 UColTokenParser src
;
1686 uint32_t strength
= 0;
1689 loc
= locale
[count
];
1691 loc
= uloc_getAvailable(count
);
1692 if(!hasCollationElements(loc
)) {
1698 log_verbose("Testing CEs for %s\n", loc
);
1700 coll
= ucol_open(loc
, &status
);
1701 if (U_FAILURE(status
)) {
1702 log_err("%s collator creation failed\n", loc
);
1707 rules
= ucol_getRules(coll
, &ruleLen
);
1710 rulesCopy
= (UChar
*)malloc((ruleLen
+
1711 UCOL_TOK_EXTRA_RULE_SPACE_SIZE
) * sizeof(UChar
));
1712 uprv_memcpy(rulesCopy
, rules
, ruleLen
* sizeof(UChar
));
1713 src
.current
= src
.source
= rulesCopy
;
1714 src
.end
= rulesCopy
+ ruleLen
;
1715 src
.extraCurrent
= src
.end
;
1716 src
.extraEnd
= src
.end
+ UCOL_TOK_EXTRA_RULE_SPACE_SIZE
;
1718 while ((current
= ucol_tok_parseNextToken(&src
, startOfRules
, &parseError
,&status
)) != NULL
) {
1719 strength
= src
.parsedToken
.strength
;
1720 chOffset
= src
.parsedToken
.charsOffset
;
1721 chLen
= src
.parsedToken
.charsLen
;
1722 exOffset
= src
.parsedToken
.extensionOffset
;
1723 exLen
= src
.parsedToken
.extensionLen
;
1724 prefixOffset
= src
.parsedToken
.prefixOffset
;
1725 prefixLen
= src
.parsedToken
.prefixLen
;
1726 specs
= src
.parsedToken
.flags
;
1728 startOfRules
= FALSE
;
1729 uprv_memcpy(codepoints
, src
.source
+ chOffset
,
1730 chLen
* sizeof(UChar
));
1731 codepoints
[chLen
] = 0;
1732 checkCEValidity(coll
, codepoints
, chLen
, 4, 85);
1740 T_FileStream_close(file
);
1743 static void printSortKeyError(const UChar
*codepoints
, int length
,
1744 uint8_t *sortkey
, int sklen
)
1747 log_err("Sortkey not valid for ");
1748 while (length
> 0) {
1749 log_err("0x%04x ", *codepoints
);
1753 log_err("\nSortkey : ");
1754 while (count
< sklen
) {
1755 log_err("0x%02x ", sortkey
[count
]);
1762 * Checking sort key validity for all levels
1764 static UBool
checkSortKeyValidity(UCollator
*coll
,
1765 const UChar
*codepoints
,
1768 UErrorCode status
= U_ZERO_ERROR
;
1769 UCollationStrength strength
[5] = {UCOL_PRIMARY
, UCOL_SECONDARY
,
1770 UCOL_TERTIARY
, UCOL_QUATERNARY
,
1772 int strengthlen
= 5;
1776 while (caselevel
< 1) {
1777 if (caselevel
== 0) {
1778 ucol_setAttribute(coll
, UCOL_CASE_LEVEL
, UCOL_OFF
, &status
);
1781 ucol_setAttribute(coll
, UCOL_CASE_LEVEL
, UCOL_ON
, &status
);
1784 while (index
< strengthlen
) {
1787 uint8_t sortkey
[128];
1790 ucol_setStrength(coll
, strength
[index
]);
1791 sklen
= ucol_getSortKey(coll
, codepoints
, length
, sortkey
, 128);
1792 while (sortkey
[count
] != 0) {
1793 if (sortkey
[count
] == 2 || (sortkey
[count
] == 3 && count01
> 0 && index
!= 4)) {
1794 printSortKeyError(codepoints
, length
, sortkey
, sklen
);
1797 if (sortkey
[count
] == 1) {
1803 if (count
+ 1 != sklen
|| (count01
!= index
+ caselevel
)) {
1804 printSortKeyError(codepoints
, length
, sortkey
, sklen
);
1814 static void TestSortKeyValidity(void)
1816 /* testing UCA collation elements */
1817 UErrorCode status
= U_ZERO_ERROR
;
1818 /* en_US has no tailorings */
1819 UCollator
*coll
= ucol_open("en_US", &status
);
1820 /* tailored locales */
1821 char locale
[][6] = {"fr_FR\0", "ko_KR\0", "sh_YU\0", "th_TH\0", "zh_CN\0"};
1822 FileStream
*file
= getFractionalUCA();
1824 UChar codepoints
[10];
1826 UParseError parseError
;
1827 if (U_FAILURE(status
)) {
1828 log_err("en_US collator creation failed\n");
1831 log_verbose("Testing UCA elements\n");
1833 log_err("Fractional UCA data can not be opened\n");
1837 while (T_FileStream_readLine(file
, line
, sizeof(line
)) != NULL
) {
1838 if(line
[0] == 0 || line
[0] == '#' || line
[0] == '\n' ||
1839 line
[0] == 0x000D || line
[0] == '[') {
1843 getCodePoints(line
, codepoints
);
1844 checkSortKeyValidity(coll
, codepoints
, u_strlen(codepoints
));
1847 log_verbose("Testing UCA elements for the whole range of unicode characters\n");
1850 while (codepoints
[0] < 0xFFFF) {
1851 if (u_isdefined((UChar32
)codepoints
[0])) {
1852 checkSortKeyValidity(coll
, codepoints
, 1);
1859 /* testing tailored collation elements */
1860 log_verbose("Testing tailored elements\n");
1862 const UChar
*rules
= NULL
,
1864 UChar
*rulesCopy
= NULL
;
1865 int32_t ruleLen
= 0;
1867 uint32_t chOffset
= 0;
1869 uint32_t exOffset
= 0;
1871 uint32_t prefixOffset
= 0;
1872 uint32_t prefixLen
= 0;
1873 UBool startOfRules
= TRUE
;
1876 UColTokenParser src
;
1877 uint32_t strength
= 0;
1880 coll
= ucol_open(locale
[count
], &status
);
1881 if (U_FAILURE(status
)) {
1882 log_err("%s collator creation failed\n", locale
[count
]);
1887 rules
= ucol_getRules(coll
, &ruleLen
);
1890 rulesCopy
= (UChar
*)malloc((ruleLen
+
1891 UCOL_TOK_EXTRA_RULE_SPACE_SIZE
) * sizeof(UChar
));
1892 uprv_memcpy(rulesCopy
, rules
, ruleLen
* sizeof(UChar
));
1893 src
.current
= src
.source
= rulesCopy
;
1894 src
.end
= rulesCopy
+ ruleLen
;
1895 src
.extraCurrent
= src
.end
;
1896 src
.extraEnd
= src
.end
+ UCOL_TOK_EXTRA_RULE_SPACE_SIZE
;
1898 while ((current
= ucol_tok_parseNextToken(&src
, startOfRules
,&parseError
, &status
)) != NULL
) {
1899 strength
= src
.parsedToken
.strength
;
1900 chOffset
= src
.parsedToken
.charsOffset
;
1901 chLen
= src
.parsedToken
.charsLen
;
1902 exOffset
= src
.parsedToken
.extensionOffset
;
1903 exLen
= src
.parsedToken
.extensionLen
;
1904 prefixOffset
= src
.parsedToken
.prefixOffset
;
1905 prefixLen
= src
.parsedToken
.prefixLen
;
1906 specs
= src
.parsedToken
.flags
;
1908 startOfRules
= FALSE
;
1909 uprv_memcpy(codepoints
, src
.source
+ chOffset
,
1910 chLen
* sizeof(UChar
));
1911 codepoints
[chLen
] = 0;
1912 checkSortKeyValidity(coll
, codepoints
, chLen
);
1920 T_FileStream_close(file
);
1923 #endif /* #if !UCONFIG_NO_COLLATION */