1 /********************************************************************
3 * Copyright (c) 1997-2003, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /********************************************************************************
10 * Modification History:
11 * Date Name Description
12 * Madhu Katragadda Ported for C API
13 * 02/19/01 synwee Modified test case for new collation iterator
14 *********************************************************************************/
16 * Collation Iterator tests.
17 * (Let me reiterate my position...)
20 #include "unicode/utypes.h"
22 #if !UCONFIG_NO_COLLATION
24 #include "unicode/ucol.h"
25 #include "unicode/uloc.h"
26 #include "unicode/uchar.h"
27 #include "unicode/ustring.h"
38 extern uint8_t ucol_uprv_getCaseBits(const UChar
*, uint32_t, UErrorCode
*);
40 void addCollIterTest(TestNode
** root
)
42 addTest(root
, &TestPrevious
, "tscoll/citertst/TestPrevious");
43 addTest(root
, &TestOffset
, "tscoll/citertst/TestOffset");
44 addTest(root
, &TestSetText
, "tscoll/citertst/TestSetText");
45 addTest(root
, &TestMaxExpansion
, "tscoll/citertst/TestMaxExpansion");
46 addTest(root
, &TestUnicodeChar
, "tscoll/citertst/TestUnicodeChar");
47 addTest(root
, &TestNormalizedUnicodeChar
,
48 "tscoll/citertst/TestNormalizedUnicodeChar");
49 addTest(root
, &TestNormalization
, "tscoll/citertst/TestNormalization");
50 addTest(root
, &TestBug672
, "tscoll/citertst/TestBug672");
51 addTest(root
, &TestBug672Normalize
, "tscoll/citertst/TestBug672Normalize");
52 addTest(root
, &TestSmallBuffer
, "tscoll/citertst/TestSmallBuffer");
53 addTest(root
, &TestCEs
, "tscoll/citertst/TestCEs");
54 addTest(root
, &TestDiscontiguos
, "tscoll/citertst/TestDiscontiguos");
55 addTest(root
, &TestCEBufferOverflow
, "tscoll/citertst/TestCEBufferOverflow");
56 addTest(root
, &TestCEValidity
, "tscoll/citertst/TestCEValidity");
57 addTest(root
, &TestSortKeyValidity
, "tscoll/citertst/TestSortKeyValidity");
60 /* The locales we support */
62 static const char * LOCALES
[] = {"en_AU", "en_BE", "en_CA"};
64 static void TestBug672() {
65 UErrorCode status
= U_ZERO_ERROR
;
71 u_uastrcpy(pattern
, "resume");
72 u_uastrcpy(text
, "Time to resume updating my resume.");
74 for (i
= 0; i
< 3; ++ i
) {
75 UCollator
*coll
= ucol_open(LOCALES
[i
], &status
);
76 UCollationElements
*pitr
= ucol_openElements(coll
, pattern
, -1,
78 UCollationElements
*titer
= ucol_openElements(coll
, text
, -1,
80 if (U_FAILURE(status
)) {
81 log_err("ERROR: in creation of either the collator or the collation iterator :%s\n",
86 log_verbose("locale tested %s\n", LOCALES
[i
]);
88 while (ucol_next(pitr
, &status
) != UCOL_NULLORDER
&&
91 if (U_FAILURE(status
)) {
92 log_err("ERROR: reversing collation iterator :%s\n",
98 ucol_setOffset(titer
, u_strlen(pattern
), &status
);
99 if (U_FAILURE(status
)) {
100 log_err("ERROR: setting offset in collator :%s\n",
101 myErrorName(status
));
104 result
[i
][0] = ucol_getOffset(titer
);
105 log_verbose("Text iterator set to offset %d\n", result
[i
][0]);
108 ucol_previous(titer
, &status
);
109 result
[i
][1] = ucol_getOffset(titer
);
110 log_verbose("Current offset %d after previous\n", result
[i
][1]);
112 /* Add one to index */
113 log_verbose("Adding one to current offset...\n");
114 ucol_setOffset(titer
, ucol_getOffset(titer
) + 1, &status
);
115 if (U_FAILURE(status
)) {
116 log_err("ERROR: setting offset in collator :%s\n",
117 myErrorName(status
));
120 result
[i
][2] = ucol_getOffset(titer
);
121 log_verbose("Current offset in text = %d\n", result
[i
][2]);
122 ucol_closeElements(pitr
);
123 ucol_closeElements(titer
);
127 if (uprv_memcmp(result
[0], result
[1], 3) != 0 ||
128 uprv_memcmp(result
[1], result
[2], 3) != 0) {
129 log_err("ERROR: Different locales have different offsets at the same character\n");
135 /* Running this test with normalization enabled showed up a bug in the incremental
136 normalization code. */
137 static void TestBug672Normalize() {
138 UErrorCode status
= U_ZERO_ERROR
;
144 u_uastrcpy(pattern
, "resume");
145 u_uastrcpy(text
, "Time to resume updating my resume.");
147 for (i
= 0; i
< 3; ++ i
) {
148 UCollator
*coll
= ucol_open(LOCALES
[i
], &status
);
149 UCollationElements
*pitr
= NULL
;
150 UCollationElements
*titer
= NULL
;
152 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
154 pitr
= ucol_openElements(coll
, pattern
, -1, &status
);
155 titer
= ucol_openElements(coll
, text
, -1, &status
);
156 if (U_FAILURE(status
)) {
157 log_err("ERROR: in creation of either the collator or the collation iterator :%s\n",
158 myErrorName(status
));
162 log_verbose("locale tested %s\n", LOCALES
[i
]);
164 while (ucol_next(pitr
, &status
) != UCOL_NULLORDER
&&
167 if (U_FAILURE(status
)) {
168 log_err("ERROR: reversing collation iterator :%s\n",
169 myErrorName(status
));
174 ucol_setOffset(titer
, u_strlen(pattern
), &status
);
175 if (U_FAILURE(status
)) {
176 log_err("ERROR: setting offset in collator :%s\n",
177 myErrorName(status
));
180 result
[i
][0] = ucol_getOffset(titer
);
181 log_verbose("Text iterator set to offset %d\n", result
[i
][0]);
184 ucol_previous(titer
, &status
);
185 result
[i
][1] = ucol_getOffset(titer
);
186 log_verbose("Current offset %d after previous\n", result
[i
][1]);
188 /* Add one to index */
189 log_verbose("Adding one to current offset...\n");
190 ucol_setOffset(titer
, ucol_getOffset(titer
) + 1, &status
);
191 if (U_FAILURE(status
)) {
192 log_err("ERROR: setting offset in collator :%s\n",
193 myErrorName(status
));
196 result
[i
][2] = ucol_getOffset(titer
);
197 log_verbose("Current offset in text = %d\n", result
[i
][2]);
198 ucol_closeElements(pitr
);
199 ucol_closeElements(titer
);
203 if (uprv_memcmp(result
[0], result
[1], 3) != 0 ||
204 uprv_memcmp(result
[1], result
[2], 3) != 0) {
205 log_err("ERROR: Different locales have different offsets at the same character\n");
213 * Test for CollationElementIterator previous and next for the whole set of
214 * unicode characters.
216 static void TestUnicodeChar()
220 UCollationElements
*iter
;
221 UErrorCode status
= U_ZERO_ERROR
;
225 en_us
= ucol_open("en_US", &status
);
226 if (U_FAILURE(status
)){
227 log_err("ERROR: in creation of collation data using ucol_open()\n %s\n",
228 myErrorName(status
));
232 for (codepoint
= 1; codepoint
< 0xFFFE;)
236 while (codepoint
% 0xFF != 0)
238 if (u_isdefined(codepoint
))
239 *(test
++) = codepoint
;
243 if (u_isdefined(codepoint
))
244 *(test
++) = codepoint
;
246 if (codepoint
!= 0xFFFF)
250 iter
=ucol_openElements(en_us
, source
, u_strlen(source
), &status
);
251 if(U_FAILURE(status
)){
252 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
253 myErrorName(status
));
257 /* A basic test to see if it's working at all */
258 log_verbose("codepoint testing %x\n", codepoint
);
260 ucol_closeElements(iter
);
262 /* null termination test */
263 iter
=ucol_openElements(en_us
, source
, -1, &status
);
264 if(U_FAILURE(status
)){
265 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
266 myErrorName(status
));
270 /* A basic test to see if it's working at all */
272 ucol_closeElements(iter
);
279 * Test for CollationElementIterator previous and next for the whole set of
280 * unicode characters with normalization on.
282 static void TestNormalizedUnicodeChar()
286 UCollationElements
*iter
;
287 UErrorCode status
= U_ZERO_ERROR
;
291 /* thai should have normalization on */
292 th_th
= ucol_open("th_TH", &status
);
293 if (U_FAILURE(status
)){
294 log_err("ERROR: in creation of thai collation using ucol_open()\n %s\n",
295 myErrorName(status
));
299 for (codepoint
= 1; codepoint
< 0xFFFE;)
303 while (codepoint
% 0xFF != 0)
305 if (u_isdefined(codepoint
))
306 *(test
++) = codepoint
;
310 if (u_isdefined(codepoint
))
311 *(test
++) = codepoint
;
313 if (codepoint
!= 0xFFFF)
317 iter
=ucol_openElements(th_th
, source
, u_strlen(source
), &status
);
318 if(U_FAILURE(status
)){
319 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
320 myErrorName(status
));
326 ucol_closeElements(iter
);
328 iter
=ucol_openElements(th_th
, source
, -1, &status
);
329 if(U_FAILURE(status
)){
330 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
331 myErrorName(status
));
337 ucol_closeElements(iter
);
344 * Test the incremental normalization
346 static void TestNormalization()
348 UErrorCode status
= U_ZERO_ERROR
;
350 "&a < \\u0300\\u0315 < A\\u0300\\u0315 < \\u0316\\u0315B < \\u0316\\u0300\\u0315";
353 int rulelen
= u_unescape(str
, rule
, 50);
355 const char *testdata
[] =
356 {"\\u1ED9", "o\\u0323\\u0302",
357 "\\u0300\\u0315", "\\u0315\\u0300",
358 "A\\u0300\\u0315B", "A\\u0315\\u0300B",
359 "A\\u0316\\u0315B", "A\\u0315\\u0316B",
360 "\\u0316\\u0300\\u0315", "\\u0315\\u0300\\u0316",
361 "A\\u0316\\u0300\\u0315B", "A\\u0315\\u0300\\u0316B",
362 "\\u0316\\u0315\\u0300", "A\\u0316\\u0315\\u0300B"};
365 UCollationElements
*iter
;
367 coll
= ucol_openRules(rule
, rulelen
, UCOL_ON
, UCOL_TERTIARY
, NULL
, &status
);
368 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
369 if (U_FAILURE(status
)){
370 log_err("ERROR: in creation of collator using ucol_openRules()\n %s\n",
371 myErrorName(status
));
375 srclen
= u_unescape(testdata
[0], source
, 10);
376 iter
= ucol_openElements(coll
, source
, srclen
, &status
);
378 ucol_closeElements(iter
);
380 srclen
= u_unescape(testdata
[1], source
, 10);
381 iter
= ucol_openElements(coll
, source
, srclen
, &status
);
383 ucol_closeElements(iter
);
386 srclen
= u_unescape(testdata
[count
], source
, 10);
387 iter
= ucol_openElements(coll
, source
, srclen
, &status
);
389 if (U_FAILURE(status
)){
390 log_err("ERROR: in creation of collator element iterator\n %s\n",
391 myErrorName(status
));
395 ucol_closeElements(iter
);
397 iter
= ucol_openElements(coll
, source
, -1, &status
);
399 if (U_FAILURE(status
)){
400 log_err("ERROR: in creation of collator element iterator\n %s\n",
401 myErrorName(status
));
405 ucol_closeElements(iter
);
412 * Test for CollationElementIterator.previous()
414 * @bug 4108758 - Make sure it works with contracting characters
417 static void TestPrevious()
419 UCollator
*coll
=NULL
;
422 UCollator
*c1
, *c2
, *c3
;
423 UCollationElements
*iter
;
424 UErrorCode status
= U_ZERO_ERROR
;
426 test1
=(UChar
*)malloc(sizeof(UChar
) * 50);
427 test2
=(UChar
*)malloc(sizeof(UChar
) * 50);
428 u_uastrcpy(test1
, "What subset of all possible test cases?");
429 u_uastrcpy(test2
, "has the highest probability of detecting");
430 coll
= ucol_open("en_US", &status
);
432 iter
=ucol_openElements(coll
, test1
, u_strlen(test1
), &status
);
433 log_verbose("English locale testing back and forth\n");
434 if(U_FAILURE(status
)){
435 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
436 myErrorName(status
));
440 /* A basic test to see if it's working at all */
442 ucol_closeElements(iter
);
445 /* Test with a contracting character sequence */
446 u_uastrcpy(rule
, "&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH");
447 c1
= ucol_openRules(rule
, u_strlen(rule
), UCOL_OFF
, UCOL_DEFAULT_STRENGTH
, NULL
, &status
);
449 log_verbose("Contraction rule testing back and forth with no normalization\n");
451 if (c1
== NULL
|| U_FAILURE(status
))
453 log_err("Couldn't create a RuleBasedCollator with a contracting sequence\n %s\n",
454 myErrorName(status
));
457 source
=(UChar
*)malloc(sizeof(UChar
) * 20);
458 u_uastrcpy(source
, "abchdcba");
459 iter
=ucol_openElements(c1
, source
, u_strlen(source
), &status
);
460 if(U_FAILURE(status
)){
461 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
462 myErrorName(status
));
466 ucol_closeElements(iter
);
469 /* Test with an expanding character sequence */
470 u_uastrcpy(rule
, "&a < b < c/abd < d");
471 c2
= ucol_openRules(rule
, u_strlen(rule
), UCOL_OFF
, UCOL_DEFAULT_STRENGTH
, NULL
, &status
);
472 log_verbose("Expansion rule testing back and forth with no normalization\n");
473 if (c2
== NULL
|| U_FAILURE(status
))
475 log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
476 myErrorName(status
));
479 u_uastrcpy(source
, "abcd");
480 iter
=ucol_openElements(c2
, source
, u_strlen(source
), &status
);
481 if(U_FAILURE(status
)){
482 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
483 myErrorName(status
));
487 ucol_closeElements(iter
);
490 u_uastrcpy(rule
, "&a < b < c/aba < d < z < ch");
491 c3
= ucol_openRules(rule
, u_strlen(rule
), UCOL_DEFAULT
, UCOL_DEFAULT_STRENGTH
,NULL
, &status
);
492 log_verbose("Expansion/contraction rule testing back and forth with no normalization\n");
494 if (c3
== NULL
|| U_FAILURE(status
))
496 log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
497 myErrorName(status
));
500 u_uastrcpy(source
, "abcdbchdc");
501 iter
=ucol_openElements(c3
, source
, u_strlen(source
), &status
);
502 if(U_FAILURE(status
)){
503 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
504 myErrorName(status
));
508 ucol_closeElements(iter
);
520 coll
= ucol_open("th_TH", &status
);
521 log_verbose("Thai locale testing back and forth with normalization\n");
522 iter
=ucol_openElements(coll
, source
, u_strlen(source
), &status
);
523 if(U_FAILURE(status
)){
524 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
525 myErrorName(status
));
529 ucol_closeElements(iter
);
539 coll
= ucol_open("ja_JP", &status
);
540 log_verbose("Japanese locale testing back and forth with normalization\n");
541 iter
=ucol_openElements(coll
, source
, u_strlen(source
), &status
);
542 if(U_FAILURE(status
)){
543 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
544 myErrorName(status
));
548 ucol_closeElements(iter
);
557 * Test for getOffset() and setOffset()
559 static void TestOffset()
561 UErrorCode status
= U_ZERO_ERROR
;
562 UCollator
*en_us
=NULL
;
563 UCollationElements
*iter
, *pristine
;
566 int32_t orderLength
=0;
568 test1
=(UChar
*)malloc(sizeof(UChar
) * 50);
569 test2
=(UChar
*)malloc(sizeof(UChar
) * 50);
570 u_uastrcpy(test1
, "What subset of all possible test cases?");
571 u_uastrcpy(test2
, "has the highest probability of detecting");
572 en_us
= ucol_open("en_US", &status
);
573 log_verbose("Testing getOffset and setOffset for CollationElements\n");
574 iter
= ucol_openElements(en_us
, test1
, u_strlen(test1
), &status
);
575 if(U_FAILURE(status
)){
576 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
577 myErrorName(status
));
581 /* Run all the way through the iterator, then get the offset */
583 orders
= getOrders(iter
, &orderLength
);
585 offset
= ucol_getOffset(iter
);
587 if (offset
!= u_strlen(test1
))
589 log_err("offset at end != length %d vs %d\n", offset
,
593 /* Now set the offset back to the beginning and see if it works */
594 pristine
=ucol_openElements(en_us
, test1
, u_strlen(test1
), &status
);
595 if(U_FAILURE(status
)){
596 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
597 myErrorName(status
));
601 status
= U_ZERO_ERROR
;
603 ucol_setOffset(iter
, 0, &status
);
604 if (U_FAILURE(status
))
606 log_err("setOffset failed. %s\n", myErrorName(status
));
610 assertEqual(iter
, pristine
);
613 ucol_closeElements(pristine
);
614 ucol_closeElements(iter
);
617 /* testing offsets in normalization buffer */
623 ucol_setAttribute(en_us
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
624 iter
= ucol_openElements(en_us
, test1
, 4, &status
);
625 if(U_FAILURE(status
)){
626 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
627 myErrorName(status
));
633 while (ucol_next(iter
, &status
) != UCOL_NULLORDER
&&
637 if (ucol_getOffset(iter
) != 1) {
638 log_err("ERROR: Offset of iteration should be 0\n");
642 if (ucol_getOffset(iter
) != 4) {
643 log_err("ERROR: Offset of iteration should be 4\n");
647 if (ucol_getOffset(iter
) != 3) {
648 log_err("ERROR: Offset of iteration should be 3\n");
656 while (ucol_previous(iter
, &status
) != UCOL_NULLORDER
&&
660 if (ucol_getOffset(iter
) != 3) {
661 log_err("ERROR: Offset of iteration should be 3\n");
665 if (ucol_getOffset(iter
) != 0) {
666 log_err("ERROR: Offset of iteration should be 0\n");
672 if(U_FAILURE(status
)){
673 log_err("ERROR: in iterating collation elements %s\n",
674 myErrorName(status
));
677 ucol_closeElements(iter
);
686 static void TestSetText()
689 UErrorCode status
= U_ZERO_ERROR
;
690 UCollator
*en_us
=NULL
;
691 UCollationElements
*iter1
, *iter2
;
692 test1
=(UChar
*)malloc(sizeof(UChar
) * 50);
693 test2
=(UChar
*)malloc(sizeof(UChar
) * 50);
694 u_uastrcpy(test1
, "What subset of all possible test cases?");
695 u_uastrcpy(test2
, "has the highest probability of detecting");
696 en_us
= ucol_open("en_US", &status
);
697 log_verbose("testing setText for Collation elements\n");
698 iter1
=ucol_openElements(en_us
, test1
, u_strlen(test1
), &status
);
699 if(U_FAILURE(status
)){
700 log_err("ERROR: in creation of collation element iterator1 using ucol_openElements()\n %s\n",
701 myErrorName(status
));
705 iter2
=ucol_openElements(en_us
, test2
, u_strlen(test2
), &status
);
706 if(U_FAILURE(status
)){
707 log_err("ERROR: in creation of collation element iterator2 using ucol_openElements()\n %s\n",
708 myErrorName(status
));
713 /* Run through the second iterator just to exercise it */
714 c
= ucol_next(iter2
, &status
);
717 while ( ++i
< 10 && (c
!= UCOL_NULLORDER
))
719 if (U_FAILURE(status
))
721 log_err("iter2->next() returned an error. %s\n", myErrorName(status
));
722 ucol_closeElements(iter2
);
723 ucol_closeElements(iter1
);
728 c
= ucol_next(iter2
, &status
);
731 /* Now set it to point to the same string as the first iterator */
732 ucol_setText(iter2
, test1
, u_strlen(test1
), &status
);
733 if (U_FAILURE(status
))
735 log_err("call to iter2->setText(test1) failed. %s\n", myErrorName(status
));
739 assertEqual(iter1
, iter2
);
742 /* Now set it to point to a null string with fake length*/
743 ucol_setText(iter2
, NULL
, 2, &status
);
744 if (U_FAILURE(status
))
746 log_err("call to iter2->setText(null) failed. %s\n", myErrorName(status
));
750 if (ucol_next(iter2
, &status
) != UCOL_NULLORDER
) {
751 log_err("iter2 with null text expected to return UCOL_NULLORDER\n");
755 ucol_closeElements(iter2
);
756 ucol_closeElements(iter1
);
764 static void backAndForth(UCollationElements
*iter
)
766 /* Run through the iterator forwards and stick it into an array */
768 UErrorCode status
= U_ZERO_ERROR
;
769 int32_t orderLength
= 0;
771 orders
= getOrders(iter
, &orderLength
);
774 /* Now go through it backwards and make sure we get the same values */
778 /* synwee : changed */
779 while ((o
= ucol_previous(iter
, &status
)) != UCOL_NULLORDER
)
781 if (o
!= orders
[-- index
])
787 while (index
> 0 && orders
[-- index
] == 0)
790 if (o
!= orders
[index
])
792 log_err("Mismatch at index : 0x%x\n", index
);
800 while (index
!= 0 && orders
[index
- 1] == 0) {
806 log_err("Didn't get back to beginning - index is %d\n", index
);
810 if ((o
= ucol_next(iter
, &status
)) != UCOL_NULLORDER
)
812 log_err("Error at %x\n", o
);
815 if ((o
= ucol_previous(iter
, &status
)) != UCOL_NULLORDER
)
817 log_err("Error at %x\n", o
);
826 * Test for getMaxExpansion()
828 static void TestMaxExpansion()
830 UErrorCode status
= U_ZERO_ERROR
;
831 UCollator
*coll
;/*= ucol_open("en_US", &status);*/
833 UChar supplementary
[2] = {0xD800, 0xDC00};
835 UCollationElements
*iter
;/*= ucol_openElements(coll, &ch, 1, &status);*/
836 uint32_t temporder
= 0;
839 u_uastrcpy(rule
, "&a < ab < c/aba < d < z < ch");
840 coll
= ucol_openRules(rule
, u_strlen(rule
), UCOL_DEFAULT
,
841 UCOL_DEFAULT_STRENGTH
,NULL
, &status
);
842 if(U_SUCCESS(status
) && coll
) {
843 iter
= ucol_openElements(coll
, &ch
, 1, &status
);
845 while (ch
< 0xFFFF && U_SUCCESS(status
)) {
852 ucol_setText(iter
, &ch
, 1, &status
);
853 order
= ucol_previous(iter
, &status
);
855 /* thai management */
857 order
= ucol_previous(iter
, &status
);
859 while (U_SUCCESS(status
) &&
860 ucol_previous(iter
, &status
) != UCOL_NULLORDER
) {
864 size
= ucol_getMaxExpansion(iter
, order
);
865 if (U_FAILURE(status
) || size
< count
) {
866 log_err("Failure at codepoint %d, maximum expansion count < %d\n",
871 /* testing for exact max expansion */
876 ucol_setText(iter
, &ch
, 1, &status
);
877 order
= ucol_previous(iter
, &status
);
878 size
= ucol_getMaxExpansion(iter
, order
);
879 if (U_FAILURE(status
) || size
!= 1) {
880 log_err("Failure at codepoint %d, maximum expansion count < %d\n",
887 ucol_setText(iter
, &ch
, 1, &status
);
888 temporder
= ucol_previous(iter
, &status
);
890 if (U_FAILURE(status
) || ucol_getMaxExpansion(iter
, temporder
) != 3) {
891 log_err("Failure at codepoint %d, maximum expansion count != %d\n",
896 ucol_setText(iter
, &ch
, 1, &status
);
897 temporder
= ucol_previous(iter
, &status
);
899 if (U_FAILURE(status
) || ucol_getMaxExpansion(iter
, temporder
) != 1) {
900 log_err("Failure at codepoint %d, maximum expansion count != %d\n",
904 ucol_setText(iter
, supplementary
, 2, &status
);
905 sorder
= ucol_previous(iter
, &status
);
907 if (U_FAILURE(status
) || ucol_getMaxExpansion(iter
, sorder
) != 2) {
908 log_err("Failure at codepoint %d, maximum expansion count < %d\n",
915 ucol_setText(iter
, &ch
, 1, &status
);
916 temporder
= ucol_previous(iter
, &status
);
917 if (U_FAILURE(status
) || ucol_getMaxExpansion(iter
, temporder
) > 3) {
918 log_err("Failure at codepoint %d, maximum expansion count > %d\n",
922 ucol_closeElements(iter
);
925 /* testing special jamo &a<\u1160 */
937 coll
= ucol_openRules(rule
, u_strlen(rule
), UCOL_DEFAULT
,
938 UCOL_DEFAULT_STRENGTH
,NULL
, &status
);
939 iter
= ucol_openElements(coll
, &ch
, 1, &status
);
941 temporder
= ucol_previous(iter
, &status
);
942 if (U_FAILURE(status
) || ucol_getMaxExpansion(iter
, temporder
) != 6) {
943 log_err("Failure at codepoint %d, maximum expansion count > %d\n",
947 ucol_closeElements(iter
);
950 log_data_err("Couldn't open collator\n");
956 * Return an integer array containing all of the collation orders
957 * returned by calls to next on the specified iterator
959 static int32_t* getOrders(UCollationElements
*iter
, int32_t *orderLength
)
963 int32_t maxSize
= 100;
966 int32_t *orders
=(int32_t*)malloc(sizeof(int32_t) * maxSize
);
967 status
= U_ZERO_ERROR
;
970 while ((order
=ucol_next(iter
, &status
)) != UCOL_NULLORDER
)
975 temp
= (int32_t*)malloc(sizeof(int32_t) * maxSize
);
977 memcpy(temp
, orders
, size
* sizeof(int32_t));
983 orders
[size
++] = order
;
990 temp
= (int32_t*)malloc(sizeof(int32_t) * size
);
994 temp
= (int32_t*)malloc(sizeof(int32_t) * size
);
995 memcpy(temp
, orders
, size
* sizeof(int32_t));
1002 *orderLength
= size
;
1007 static void assertEqual(UCollationElements
*i1
, UCollationElements
*i2
)
1011 UErrorCode status
= U_ZERO_ERROR
;
1015 c1
= ucol_next(i1
, &status
);
1016 c2
= ucol_next(i2
, &status
);
1020 log_err("Error in iteration %d assetEqual between\n %d and %d, they are not equal\n", count
, c1
, c2
);
1026 while (c1
!= UCOL_NULLORDER
);
1030 * Testing iterators with extremely small buffers
1032 static void TestSmallBuffer()
1034 UErrorCode status
= U_ZERO_ERROR
;
1036 UCollationElements
*testiter
,
1039 int32_t *testorders
,
1043 UChar str
[] = {0x300, 0x31A, 0};
1045 creating a long string of decomposable characters,
1046 since by default the writable buffer is of size 256
1048 while (count
< 500) {
1049 if ((count
& 1) == 0) {
1050 teststr
[count
++] = 0x300;
1053 teststr
[count
++] = 0x31A;
1057 coll
= ucol_open("th_TH", &status
);
1058 if(U_SUCCESS(status
) && coll
) {
1059 testiter
= ucol_openElements(coll
, teststr
, 500, &status
);
1060 iter
= ucol_openElements(coll
, str
, 2, &status
);
1062 orders
= getOrders(iter
, &count
);
1064 log_err("Error collation elements size is not 2 for \\u0300\\u031A\n");
1068 this will rearrange the string data to 250 characters of 0x300 first then
1069 250 characters of 0x031A
1071 testorders
= getOrders(testiter
, &count
);
1074 log_err("Error decomposition does not give the right sized collation elements\n");
1077 while (count
!= 0) {
1078 /* UCA collation element for 0x0F76 */
1079 if ((count
> 250 && testorders
[-- count
] != orders
[1]) ||
1080 (count
<= 250 && testorders
[-- count
] != orders
[0])) {
1081 log_err("Error decomposition does not give the right collation element at %d count\n", count
);
1089 ucol_reset(testiter
);
1090 /* ensures that the writable buffer was cleared */
1091 if (testiter
->iteratordata_
.writableBuffer
!=
1092 testiter
->iteratordata_
.stackWritableBuffer
) {
1093 log_err("Error Writable buffer in collation element iterator not reset\n");
1096 /* ensures closing of elements done properly to clear writable buffer */
1097 ucol_next(testiter
, &status
);
1098 ucol_next(testiter
, &status
);
1099 ucol_closeElements(testiter
);
1100 ucol_closeElements(iter
);
1103 log_data_err("Couldn't open collator\n");
1108 * Sniplets of code from genuca
1110 static int32_t hex2num(char hex
) {
1111 if(hex
>='0' && hex
<='9') {
1113 } else if(hex
>='a' && hex
<='f') {
1115 } else if(hex
>='A' && hex
<='F') {
1123 * Getting codepoints from a string
1124 * @param str character string contain codepoints seperated by space and ended
1126 * @param codepoints array for storage, assuming size > 5
1127 * @return position at the end of the codepoint section
1129 static char * getCodePoints(char *str
, UChar
*codepoints
) {
1130 char *pStartCP
= str
;
1131 char *pEndCP
= str
+ 4;
1133 *codepoints
= (UChar
)((hex2num(*pStartCP
) << 12) |
1134 (hex2num(*(pStartCP
+ 1)) << 8) |
1135 (hex2num(*(pStartCP
+ 2)) << 4) |
1136 (hex2num(*(pStartCP
+ 3))));
1138 while (*pEndCP
!= ';') {
1139 pStartCP
= pEndCP
+ 1;
1140 *codepoints
= (UChar
)((hex2num(*pStartCP
) << 12) |
1141 (hex2num(*(pStartCP
+ 1)) << 8) |
1142 (hex2num(*(pStartCP
+ 2)) << 4) |
1143 (hex2num(*(pStartCP
+ 3))));
1145 pEndCP
= pStartCP
+ 4;
1152 * Sniplets of code from genuca
1155 readElement(char **from
, char *to
, char separator
, UErrorCode
*status
)
1157 if (U_SUCCESS(*status
)) {
1160 while (**from
!= separator
) {
1161 if (**from
!= ' ') {
1162 *(buffer
+i
++) = **from
;
1176 * Sniplets of code from genuca
1179 getSingleCEValue(char *primary
, char *secondary
, char *tertiary
,
1182 if (U_SUCCESS(*status
)) {
1184 char primsave
= '\0';
1185 char secsave
= '\0';
1186 char tersave
= '\0';
1187 char *primend
= primary
+4;
1188 char *secend
= secondary
+2;
1189 char *terend
= tertiary
+2;
1194 if (uprv_strlen(primary
) > 4) {
1195 primsave
= *primend
;
1199 if (uprv_strlen(secondary
) > 2) {
1204 if (uprv_strlen(tertiary
) > 2) {
1209 primvalue
= (*primary
!='\0')?uprv_strtoul(primary
, &primend
, 16):0;
1210 secvalue
= (*secondary
!='\0')?uprv_strtoul(secondary
, &secend
, 16):0;
1211 tervalue
= (*tertiary
!='\0')?uprv_strtoul(tertiary
, &terend
, 16):0;
1212 if(primvalue
<= 0xFF) {
1216 value
= ((primvalue
<< UCOL_PRIMARYORDERSHIFT
) & UCOL_PRIMARYORDERMASK
)
1217 | ((secvalue
<< UCOL_SECONDARYORDERSHIFT
) & UCOL_SECONDARYORDERMASK
)
1218 | (tervalue
& UCOL_TERTIARYORDERMASK
);
1220 if(primsave
!='\0') {
1221 *primend
= primsave
;
1235 * Getting collation elements generated from a string
1236 * @param str character string contain collation elements contained in [] and
1237 * seperated by space
1238 * @param ce array for storage, assuming size > 20
1239 * @param status error status
1240 * @return position at the end of the codepoint section
1242 static char * getCEs(char *str
, uint32_t *ces
, UErrorCode
*status
) {
1243 char *pStartCP
= uprv_strchr(str
, '[');
1247 char secondary
[100];
1250 while (*pStartCP
== '[') {
1251 uint32_t primarycount
= 0;
1252 uint32_t secondarycount
= 0;
1253 uint32_t tertiarycount
= 0;
1255 pEndCP
= strchr(pStartCP
, ']');
1256 if(pEndCP
== NULL
) {
1261 primarycount
= readElement(&pStartCP
, primary
, ',', status
);
1262 secondarycount
= readElement(&pStartCP
, secondary
, ',', status
);
1263 tertiarycount
= readElement(&pStartCP
, tertiary
, ']', status
);
1265 /* I want to get the CEs entered right here, including continuation */
1266 ces
[count
++] = getSingleCEValue(primary
, secondary
, tertiary
, status
);
1267 if (U_FAILURE(*status
)) {
1271 while (2 * CEi
< primarycount
|| CEi
< secondarycount
||
1272 CEi
< tertiarycount
) {
1273 uint32_t value
= UCOL_CONTINUATION_MARKER
; /* Continuation marker */
1274 if (2 * CEi
< primarycount
) {
1275 value
|= ((hex2num(*(primary
+ 4 * CEi
)) & 0xF) << 28);
1276 value
|= ((hex2num(*(primary
+ 4 * CEi
+ 1)) & 0xF) << 24);
1279 if (2 * CEi
+ 1 < primarycount
) {
1280 value
|= ((hex2num(*(primary
+ 4 * CEi
+ 2)) & 0xF) << 20);
1281 value
|= ((hex2num(*(primary
+ 4 * CEi
+ 3)) &0xF) << 16);
1284 if (CEi
< secondarycount
) {
1285 value
|= ((hex2num(*(secondary
+ 2 * CEi
)) & 0xF) << 12);
1286 value
|= ((hex2num(*(secondary
+ 2 * CEi
+ 1)) & 0xF) << 8);
1289 if (CEi
< tertiarycount
) {
1290 value
|= ((hex2num(*(tertiary
+ 2 * CEi
)) & 0x3) << 4);
1291 value
|= (hex2num(*(tertiary
+ 2 * CEi
+ 1)) & 0xF);
1295 ces
[count
++] = value
;
1298 pStartCP
= pEndCP
+ 1;
1305 * Getting the FractionalUCA.txt file stream
1307 static FileStream
* getFractionalUCA(void)
1310 char backupPath
[256];
1311 FileStream
*result
= NULL
;
1313 /* Look inside ICU_DATA first */
1314 uprv_strcpy(newPath
, u_getDataDirectory());
1315 uprv_strcat(newPath
, "unidata" U_FILE_SEP_STRING
);
1316 uprv_strcat(newPath
, "FractionalUCA.txt");
1318 /* As a fallback, try to guess where the source data was located
1319 * at the time ICU was built, and look there.
1321 #if defined (U_TOPSRCDIR)
1322 strcpy(backupPath
, U_TOPSRCDIR U_FILE_SEP_STRING
"data");
1325 UErrorCode errorCode
= U_ZERO_ERROR
;
1326 strcpy(backupPath
, loadTestData(&errorCode
));
1327 strcat(backupPath
, U_FILE_SEP_STRING
".." U_FILE_SEP_STRING
".." U_FILE_SEP_STRING
".." U_FILE_SEP_STRING
".." U_FILE_SEP_STRING
"data");
1330 strcat(backupPath
, U_FILE_SEP_STRING
"unidata" U_FILE_SEP_STRING
"FractionalUCA.txt");
1332 result
= T_FileStream_open(newPath
, "rb");
1334 if (result
== NULL
) {
1335 result
= T_FileStream_open(backupPath
, "rb");
1336 if (result
== NULL
) {
1337 log_err("Failed to open either %s or %s\n", newPath
, backupPath
);
1344 * Testing the CEs returned by the iterator
1346 static void TestCEs() {
1347 FileStream
*file
= NULL
;
1350 UChar codepoints
[5];
1352 UErrorCode status
= U_ZERO_ERROR
;
1353 UCollator
*coll
= ucol_open("", &status
);
1354 uint32_t lineNo
= 0;
1356 if (U_FAILURE(status
)) {
1357 log_err("Error in opening root collator\n");
1361 file
= getFractionalUCA();
1364 log_err("*** unable to open input FractionalUCA.txt file ***\n");
1369 while (T_FileStream_readLine(file
, line
, sizeof(line
)) != NULL
) {
1371 UCollationElements
*iter
;
1373 /* skip this line if it is empty or a comment or is a return value
1374 or start of some variable section */
1375 if(line
[0] == 0 || line
[0] == '#' || line
[0] == '\n' ||
1376 line
[0] == 0x000D || line
[0] == '[') {
1380 str
= getCodePoints(line
, codepoints
);
1382 /* these are 'fake' codepoints in the fractional UCA, and are used just
1383 * for positioning of indirect values. They should not go through this
1386 if(*codepoints
== 0xFDD0) {
1390 getCEs(str
, ces
, &status
);
1391 if (U_FAILURE(status
)) {
1392 log_err("Error in parsing collation elements in FractionalUCA.txt\n");
1395 iter
= ucol_openElements(coll
, codepoints
, -1, &status
);
1396 if (U_FAILURE(status
)) {
1397 log_err("Error in opening collation elements\n");
1401 uint32_t ce
= (uint32_t)ucol_next(iter
, &status
);
1402 if (ce
== 0xFFFFFFFF) {
1405 /* we now unconditionally reorder Thai/Lao prevowels, so this
1406 * test would fail if we don't skip here.
1408 if(UCOL_ISTHAIPREVOWEL(*codepoints
) && ce
== 0 && count
== 0) {
1411 if (ce
!= ces
[count
] || U_FAILURE(status
)) {
1412 log_err("Collation elements in FractionalUCA.txt and iterators do not match!\n");
1415 if (ces
[count
] == 0) {
1420 ucol_closeElements(iter
);
1423 T_FileStream_close(file
);
1428 * Testing the discontigous contractions
1430 static void TestDiscontiguos() {
1431 const char *rulestr
=
1432 "&z < AB < X\\u0300 < ABC < X\\u0300\\u0315";
1434 int rulelen
= u_unescape(rulestr
, rule
, 50);
1435 const char *src
[] = {
1436 "ADB", "ADBC", "A\\u0315B", "A\\u0315BC",
1437 /* base character blocked */
1438 "XD\\u0300", "XD\\u0300\\u0315",
1439 /* non blocking combining character */
1440 "X\\u0319\\u0300", "X\\u0319\\u0300\\u0315",
1441 /* blocking combining character */
1442 "X\\u0314\\u0300", "X\\u0314\\u0300\\u0315",
1443 /* contraction prefix */
1444 "ABDC", "AB\\u0315C","X\\u0300D\\u0315", "X\\u0300\\u0319\\u0315",
1445 "X\\u0300\\u031A\\u0315",
1446 /* ends not with a contraction character */
1447 "X\\u0319\\u0300D", "X\\u0319\\u0300\\u0315D", "X\\u0300D\\u0315D",
1448 "X\\u0300\\u0319\\u0315D", "X\\u0300\\u031A\\u0315D"
1450 const char *tgt
[] = {
1451 /* non blocking combining character */
1452 "A D B", "A D BC", "A \\u0315 B", "A \\u0315 BC",
1453 /* base character blocked */
1454 "X D \\u0300", "X D \\u0300\\u0315",
1455 /* non blocking combining character */
1456 "X\\u0300 \\u0319", "X\\u0300\\u0315 \\u0319",
1457 /* blocking combining character */
1458 "X \\u0314 \\u0300", "X \\u0314 \\u0300\\u0315",
1459 /* contraction prefix */
1460 "AB DC", "AB \\u0315 C","X\\u0300 D \\u0315", "X\\u0300\\u0315 \\u0319",
1461 "X\\u0300 \\u031A \\u0315",
1462 /* ends not with a contraction character */
1463 "X\\u0300 \\u0319D", "X\\u0300\\u0315 \\u0319D", "X\\u0300 D\\u0315D",
1464 "X\\u0300\\u0315 \\u0319D", "X\\u0300 \\u031A\\u0315D"
1468 UErrorCode status
= U_ZERO_ERROR
;
1470 UCollationElements
*iter
;
1471 UCollationElements
*resultiter
;
1473 coll
= ucol_openRules(rule
, rulelen
, UCOL_OFF
, UCOL_DEFAULT_STRENGTH
,NULL
, &status
);
1474 iter
= ucol_openElements(coll
, rule
, 1, &status
);
1475 resultiter
= ucol_openElements(coll
, rule
, 1, &status
);
1477 if (U_FAILURE(status
)) {
1478 log_err("Error opening collation rules\n");
1482 while (count
< size
) {
1485 int strLen
= u_unescape(src
[count
], str
, 20);
1488 ucol_setText(iter
, str
, strLen
, &status
);
1489 if (U_FAILURE(status
)) {
1490 log_err("Error opening collation iterator\n");
1494 u_unescape(tgt
[count
], tstr
, 20);
1497 log_verbose("count %d\n", count
);
1501 UChar
*e
= u_strchr(s
, 0x20);
1505 ucol_setText(resultiter
, s
, (int32_t)(e
- s
), &status
);
1506 ce
= ucol_next(resultiter
, &status
);
1507 if (U_FAILURE(status
)) {
1508 log_err("Error manipulating collation iterator\n");
1511 while (ce
!= UCOL_NULLORDER
) {
1512 if (ce
!= (uint32_t)ucol_next(iter
, &status
) ||
1513 U_FAILURE(status
)) {
1514 log_err("Discontiguos contraction test mismatch\n");
1517 ce
= ucol_next(resultiter
, &status
);
1518 if (U_FAILURE(status
)) {
1519 log_err("Error getting next collation element\n");
1532 ucol_closeElements(resultiter
);
1533 ucol_closeElements(iter
);
1537 static void TestCEBufferOverflow()
1539 UChar str
[UCOL_EXPAND_CE_BUFFER_SIZE
+ 1];
1540 UErrorCode status
= U_ZERO_ERROR
;
1543 UCollationElements
*iter
;
1545 u_uastrcpy(rule
, "&z < AB");
1546 coll
= ucol_openRules(rule
, u_strlen(rule
), UCOL_OFF
, UCOL_DEFAULT_STRENGTH
, NULL
,&status
);
1547 if (U_FAILURE(status
)) {
1548 log_err("Rule based collator not created for testing ce buffer overflow\n");
1552 /* 0xDCDC is a trail surrogate hence deemed unsafe by the heuristic
1553 test. this will cause an overflow in getPrev */
1554 str
[0] = 0x0041; /* 'A' */
1555 /*uprv_memset(str + 1, 0xE0, sizeof(UChar) * UCOL_EXPAND_CE_BUFFER_SIZE);*/
1556 uprv_memset(str
+ 1, 0xDC, sizeof(UChar
) * UCOL_EXPAND_CE_BUFFER_SIZE
);
1557 str
[UCOL_EXPAND_CE_BUFFER_SIZE
] = 0x0042; /* 'B' */
1558 iter
= ucol_openElements(coll
, str
, UCOL_EXPAND_CE_BUFFER_SIZE
+ 1,
1560 if (ucol_previous(iter
, &status
) != UCOL_NULLORDER
||
1561 status
!= U_BUFFER_OVERFLOW_ERROR
) {
1562 log_err("CE buffer expected to overflow with long string of trail surrogates\n");
1564 ucol_closeElements(iter
);
1569 * Byte bounds checks. Checks if each byte in data is between upper and lower
1572 static UBool
checkByteBounds(uint32_t data
, char upper
, char lower
)
1576 char b
= (char)(data
& 0xFF);
1577 if (b
> upper
|| b
< lower
) {
1587 * Determines case of the string of codepoints.
1588 * If it is a multiple codepoints it has to treated as a contraction.
1591 static uint8_t getCase(const UChar
*s
, uint32_t len
) {
1592 UBool lower
= FALSE
;
1593 UBool upper
= FALSE
;
1594 UBool title
= FALSE
;
1595 UErrorCode status
= U_ZERO_ERROR
;
1597 const UChar
*ps
= s
;
1600 return UCOL_LOWER_CASE
;
1618 if ((lower
&& !upper
&& !title
) || (!lower
&& !upper
&& !title
)){
1619 return UCOL_LOWER_CASE
;
1621 if (upper
&& !lower
&& !title
) {
1622 return UCOL_UPPER_CASE
;
1624 /* mix of cases here */
1625 /* len = unorm_normalize(s, len, UNORM_NFKD, 0, str, 256, &status);
1626 if (U_FAILURE(status)) {
1627 log_err("Error normalizing data string\n");
1628 return UCOL_LOWER_CASE;
1631 if ((title
&& len
>= 2) || (lower
&& upper
)) {
1632 return UCOL_MIXED_CASE
;
1634 if (u_isupper(s
[0])) {
1635 return UCOL_UPPER_CASE
;
1637 return UCOL_LOWER_CASE
;
1642 * Checking collation element validity given the boundary arguments.
1644 static UBool
checkCEValidity(const UCollator
*coll
, const UChar
*codepoints
,
1645 int length
, uint32_t primarymax
,
1646 uint32_t secondarymax
)
1648 UErrorCode status
= U_ZERO_ERROR
;
1649 UCollationElements
*iter
= ucol_openElements(coll
, codepoints
, length
,
1654 UBool upper = FALSE;
1655 UBool lower = FALSE;
1658 if (U_FAILURE(status
)) {
1659 log_err("Error creating iterator for testing validity\n");
1662 ce
= ucol_next(iter
, &status
);
1664 while (ce
!= UCOL_NULLORDER
) {
1666 uint32_t primary
= UCOL_PRIMARYORDER(ce
);
1667 uint32_t secondary
= UCOL_SECONDARYORDER(ce
);
1668 uint32_t tertiary
= UCOL_TERTIARYORDER(ce
);
1669 /* uint32_t scasebits = tertiary & 0xC0;*/
1671 if ((tertiary
== 0 && secondary
!= 0) ||
1672 (tertiary
< 0xC0 && secondary
== 0 && primary
!= 0)) {
1673 /* n-1th level is not zero when the nth level is
1674 except for continuations, this is wrong */
1675 log_err("Lower level weight not 0 when high level weight is 0\n");
1679 /* checks if any byte is illegal ie = 01 02 03. */
1680 if (checkByteBounds(ce
, 0x3, 0x1)) {
1681 log_err("Byte range in CE lies in illegal bounds 0x1 - 0x3\n");
1685 if ((primary
!= 0 && primary
< primarymax
) || (primary
>= 0xFF00 && !isContinuation(ce
))) {
1686 log_err("UCA primary weight out of bounds\n");
1689 /* case matching not done since data generated by ken */
1691 if (secondary
>= 6 && secondary
<= secondarymax
) {
1692 log_err("Secondary weight out of range\n");
1698 ce
= ucol_next(iter
, &status
);
1700 ucol_closeElements(iter
);
1703 ucol_closeElements(iter
);
1707 static void TestCEValidity()
1709 /* testing UCA collation elements */
1710 UErrorCode status
= U_ZERO_ERROR
;
1711 /* en_US has no tailorings */
1712 UCollator
*coll
= ucol_open("en_US", &status
);
1713 /* tailored locales */
1714 char locale
[][6] = {"fr_FR", "ko_KR", "sh_YU", "th_TH", "zh_CN"};
1715 FileStream
*file
= getFractionalUCA();
1717 UChar codepoints
[10];
1719 UParseError parseError
;
1720 if (U_FAILURE(status
)) {
1721 log_err("en_US collator creation failed\n");
1724 log_verbose("Testing UCA elements\n");
1726 log_err("Fractional UCA data can not be opened\n");
1730 while (T_FileStream_readLine(file
, line
, sizeof(line
)) != NULL
) {
1731 if(line
[0] == 0 || line
[0] == '#' || line
[0] == '\n' ||
1732 line
[0] == 0x000D || line
[0] == '[') {
1736 getCodePoints(line
, codepoints
);
1737 checkCEValidity(coll
, codepoints
, u_strlen(codepoints
), 5, 86);
1740 log_verbose("Testing UCA elements for the whole range of unicode characters\n");
1742 while (codepoints
[0] < 0xFFFF) {
1743 if (u_isdefined((UChar32
)codepoints
[0])) {
1744 checkCEValidity(coll
, codepoints
, 1, 5, 86);
1751 /* testing tailored collation elements */
1752 log_verbose("Testing tailored elements\n");
1754 const UChar
*rules
= NULL
,
1756 UChar
*rulesCopy
= NULL
;
1757 int32_t ruleLen
= 0;
1759 uint32_t chOffset
= 0;
1761 uint32_t exOffset
= 0;
1763 uint32_t prefixOffset
= 0;
1764 uint32_t prefixLen
= 0;
1765 UBool startOfRules
= TRUE
;
1768 UColTokenParser src
;
1769 uint32_t strength
= 0;
1772 coll
= ucol_open(locale
[count
], &status
);
1773 if (U_FAILURE(status
)) {
1774 log_err("%s collator creation failed\n", locale
[count
]);
1779 rules
= ucol_getRules(coll
, &ruleLen
);
1782 rulesCopy
= (UChar
*)malloc((ruleLen
+
1783 UCOL_TOK_EXTRA_RULE_SPACE_SIZE
) * sizeof(UChar
));
1784 uprv_memcpy(rulesCopy
, rules
, ruleLen
* sizeof(UChar
));
1785 src
.current
= src
.source
= rulesCopy
;
1786 src
.end
= rulesCopy
+ ruleLen
;
1787 src
.extraCurrent
= src
.end
;
1788 src
.extraEnd
= src
.end
+ UCOL_TOK_EXTRA_RULE_SPACE_SIZE
;
1790 while ((current
= ucol_tok_parseNextToken(&src
, startOfRules
, &parseError
,&status
)) != NULL
) {
1791 strength
= src
.parsedToken
.strength
;
1792 chOffset
= src
.parsedToken
.charsOffset
;
1793 chLen
= src
.parsedToken
.charsLen
;
1794 exOffset
= src
.parsedToken
.extensionOffset
;
1795 exLen
= src
.parsedToken
.extensionLen
;
1796 prefixOffset
= src
.parsedToken
.prefixOffset
;
1797 prefixLen
= src
.parsedToken
.prefixLen
;
1798 specs
= src
.parsedToken
.flags
;
1800 startOfRules
= FALSE
;
1801 uprv_memcpy(codepoints
, src
.source
+ chOffset
,
1802 chLen
* sizeof(UChar
));
1803 codepoints
[chLen
] = 0;
1804 checkCEValidity(coll
, codepoints
, chLen
, 4, 85);
1812 T_FileStream_close(file
);
1815 static void printSortKeyError(const UChar
*codepoints
, int length
,
1816 uint8_t *sortkey
, int sklen
)
1819 log_err("Sortkey not valid for ");
1820 while (length
> 0) {
1821 log_err("0x%04x ", *codepoints
);
1825 log_err("\nSortkey : ");
1826 while (count
< sklen
) {
1827 log_err("0x%02x ", sortkey
[count
]);
1834 * Checking sort key validity for all levels
1836 static UBool
checkSortKeyValidity(UCollator
*coll
,
1837 const UChar
*codepoints
,
1840 UErrorCode status
= U_ZERO_ERROR
;
1841 UCollationStrength strength
[5] = {UCOL_PRIMARY
, UCOL_SECONDARY
,
1842 UCOL_TERTIARY
, UCOL_QUATERNARY
,
1844 int strengthlen
= 5;
1848 while (caselevel
< 1) {
1849 if (caselevel
== 0) {
1850 ucol_setAttribute(coll
, UCOL_CASE_LEVEL
, UCOL_OFF
, &status
);
1853 ucol_setAttribute(coll
, UCOL_CASE_LEVEL
, UCOL_ON
, &status
);
1856 while (index
< strengthlen
) {
1859 uint8_t sortkey
[128];
1862 ucol_setStrength(coll
, strength
[index
]);
1863 sklen
= ucol_getSortKey(coll
, codepoints
, length
, sortkey
, 128);
1864 while (sortkey
[count
] != 0) {
1865 if (sortkey
[count
] == 2 || (sortkey
[count
] == 3 && count01
> 0 && index
!= 4)) {
1866 printSortKeyError(codepoints
, length
, sortkey
, sklen
);
1869 if (sortkey
[count
] == 1) {
1875 if (count
+ 1 != sklen
|| (count01
!= index
+ caselevel
)) {
1876 printSortKeyError(codepoints
, length
, sortkey
, sklen
);
1886 static void TestSortKeyValidity(void)
1888 /* testing UCA collation elements */
1889 UErrorCode status
= U_ZERO_ERROR
;
1890 /* en_US has no tailorings */
1891 UCollator
*coll
= ucol_open("en_US", &status
);
1892 /* tailored locales */
1893 char locale
[][6] = {"fr_FR\0", "ko_KR\0", "sh_YU\0", "th_TH\0", "zh_CN\0"};
1894 FileStream
*file
= getFractionalUCA();
1896 UChar codepoints
[10];
1898 UParseError parseError
;
1899 if (U_FAILURE(status
)) {
1900 log_err("en_US collator creation failed\n");
1903 log_verbose("Testing UCA elements\n");
1905 log_err("Fractional UCA data can not be opened\n");
1909 while (T_FileStream_readLine(file
, line
, sizeof(line
)) != NULL
) {
1910 if(line
[0] == 0 || line
[0] == '#' || line
[0] == '\n' ||
1911 line
[0] == 0x000D || line
[0] == '[') {
1915 getCodePoints(line
, codepoints
);
1916 checkSortKeyValidity(coll
, codepoints
, u_strlen(codepoints
));
1919 log_verbose("Testing UCA elements for the whole range of unicode characters\n");
1922 while (codepoints
[0] < 0xFFFF) {
1923 if (u_isdefined((UChar32
)codepoints
[0])) {
1924 checkSortKeyValidity(coll
, codepoints
, 1);
1931 /* testing tailored collation elements */
1932 log_verbose("Testing tailored elements\n");
1934 const UChar
*rules
= NULL
,
1936 UChar
*rulesCopy
= NULL
;
1937 int32_t ruleLen
= 0;
1939 uint32_t chOffset
= 0;
1941 uint32_t exOffset
= 0;
1943 uint32_t prefixOffset
= 0;
1944 uint32_t prefixLen
= 0;
1945 UBool startOfRules
= TRUE
;
1948 UColTokenParser src
;
1949 uint32_t strength
= 0;
1952 coll
= ucol_open(locale
[count
], &status
);
1953 if (U_FAILURE(status
)) {
1954 log_err("%s collator creation failed\n", locale
[count
]);
1959 rules
= ucol_getRules(coll
, &ruleLen
);
1962 rulesCopy
= (UChar
*)malloc((ruleLen
+
1963 UCOL_TOK_EXTRA_RULE_SPACE_SIZE
) * sizeof(UChar
));
1964 uprv_memcpy(rulesCopy
, rules
, ruleLen
* sizeof(UChar
));
1965 src
.current
= src
.source
= rulesCopy
;
1966 src
.end
= rulesCopy
+ ruleLen
;
1967 src
.extraCurrent
= src
.end
;
1968 src
.extraEnd
= src
.end
+ UCOL_TOK_EXTRA_RULE_SPACE_SIZE
;
1970 while ((current
= ucol_tok_parseNextToken(&src
, startOfRules
,&parseError
, &status
)) != NULL
) {
1971 strength
= src
.parsedToken
.strength
;
1972 chOffset
= src
.parsedToken
.charsOffset
;
1973 chLen
= src
.parsedToken
.charsLen
;
1974 exOffset
= src
.parsedToken
.extensionOffset
;
1975 exLen
= src
.parsedToken
.extensionLen
;
1976 prefixOffset
= src
.parsedToken
.prefixOffset
;
1977 prefixLen
= src
.parsedToken
.prefixLen
;
1978 specs
= src
.parsedToken
.flags
;
1980 startOfRules
= FALSE
;
1981 uprv_memcpy(codepoints
, src
.source
+ chOffset
,
1982 chLen
* sizeof(UChar
));
1983 codepoints
[chLen
] = 0;
1984 checkSortKeyValidity(coll
, codepoints
, chLen
);
1992 T_FileStream_close(file
);
1995 #endif /* #if !UCONFIG_NO_COLLATION */