1 /********************************************************************
3 * Copyright (c) 1997-2016, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /********************************************************************************
10 * Modification History:
11 * Date Name Description
12 * Madhu Katragadda Ported for C API
13 * 02/19/01 synwee Modified test case for new collation iterator
14 *********************************************************************************/
16 * Collation Iterator tests.
17 * (Let me reiterate my position...)
20 #include "unicode/utypes.h"
22 #if !UCONFIG_NO_COLLATION
24 #include "unicode/ucol.h"
25 #include "unicode/ucoleitr.h"
26 #include "unicode/uloc.h"
27 #include "unicode/uchar.h"
28 #include "unicode/ustring.h"
29 #include "unicode/putil.h"
41 extern uint8_t ucol_uprv_getCaseBits(const UChar
*, uint32_t, UErrorCode
*);
43 void addCollIterTest(TestNode
** root
)
45 addTest(root
, &TestPrevious
, "tscoll/citertst/TestPrevious");
46 addTest(root
, &TestOffset
, "tscoll/citertst/TestOffset");
47 addTest(root
, &TestSetText
, "tscoll/citertst/TestSetText");
48 addTest(root
, &TestMaxExpansion
, "tscoll/citertst/TestMaxExpansion");
49 addTest(root
, &TestUnicodeChar
, "tscoll/citertst/TestUnicodeChar");
50 addTest(root
, &TestNormalizedUnicodeChar
,
51 "tscoll/citertst/TestNormalizedUnicodeChar");
52 addTest(root
, &TestNormalization
, "tscoll/citertst/TestNormalization");
53 addTest(root
, &TestBug672
, "tscoll/citertst/TestBug672");
54 addTest(root
, &TestBug672Normalize
, "tscoll/citertst/TestBug672Normalize");
55 addTest(root
, &TestSmallBuffer
, "tscoll/citertst/TestSmallBuffer");
56 addTest(root
, &TestDiscontiguos
, "tscoll/citertst/TestDiscontiguos");
57 addTest(root
, &TestSearchCollatorElements
, "tscoll/citertst/TestSearchCollatorElements");
60 /* The locales we support */
62 static const char * LOCALES
[] = {"en_AU", "en_BE", "en_CA"};
64 static void TestBug672() {
65 UErrorCode status
= U_ZERO_ERROR
;
71 u_uastrcpy(pattern
, "resume");
72 u_uastrcpy(text
, "Time to resume updating my resume.");
74 for (i
= 0; i
< 3; ++ i
) {
75 UCollator
*coll
= ucol_open(LOCALES
[i
], &status
);
76 UCollationElements
*pitr
= ucol_openElements(coll
, pattern
, -1,
78 UCollationElements
*titer
= ucol_openElements(coll
, text
, -1,
80 if (U_FAILURE(status
)) {
81 log_err_status(status
, "ERROR: in creation of either the collator or the collation iterator :%s\n",
86 log_verbose("locale tested %s\n", LOCALES
[i
]);
88 while (ucol_next(pitr
, &status
) != UCOL_NULLORDER
&&
91 if (U_FAILURE(status
)) {
92 log_err("ERROR: reversing collation iterator :%s\n",
98 ucol_setOffset(titer
, u_strlen(pattern
), &status
);
99 if (U_FAILURE(status
)) {
100 log_err("ERROR: setting offset in collator :%s\n",
101 myErrorName(status
));
104 result
[i
][0] = ucol_getOffset(titer
);
105 log_verbose("Text iterator set to offset %d\n", result
[i
][0]);
108 ucol_previous(titer
, &status
);
109 result
[i
][1] = ucol_getOffset(titer
);
110 log_verbose("Current offset %d after previous\n", result
[i
][1]);
112 /* Add one to index */
113 log_verbose("Adding one to current offset...\n");
114 ucol_setOffset(titer
, ucol_getOffset(titer
) + 1, &status
);
115 if (U_FAILURE(status
)) {
116 log_err("ERROR: setting offset in collator :%s\n",
117 myErrorName(status
));
120 result
[i
][2] = ucol_getOffset(titer
);
121 log_verbose("Current offset in text = %d\n", result
[i
][2]);
122 ucol_closeElements(pitr
);
123 ucol_closeElements(titer
);
127 if (uprv_memcmp(result
[0], result
[1], 3) != 0 ||
128 uprv_memcmp(result
[1], result
[2], 3) != 0) {
129 log_err("ERROR: Different locales have different offsets at the same character\n");
135 /* Running this test with normalization enabled showed up a bug in the incremental
136 normalization code. */
137 static void TestBug672Normalize() {
138 UErrorCode status
= U_ZERO_ERROR
;
144 u_uastrcpy(pattern
, "resume");
145 u_uastrcpy(text
, "Time to resume updating my resume.");
147 for (i
= 0; i
< 3; ++ i
) {
148 UCollator
*coll
= ucol_open(LOCALES
[i
], &status
);
149 UCollationElements
*pitr
= NULL
;
150 UCollationElements
*titer
= NULL
;
152 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
154 pitr
= ucol_openElements(coll
, pattern
, -1, &status
);
155 titer
= ucol_openElements(coll
, text
, -1, &status
);
156 if (U_FAILURE(status
)) {
157 log_err_status(status
, "ERROR: in creation of either the collator or the collation iterator :%s\n",
158 myErrorName(status
));
162 log_verbose("locale tested %s\n", LOCALES
[i
]);
164 while (ucol_next(pitr
, &status
) != UCOL_NULLORDER
&&
167 if (U_FAILURE(status
)) {
168 log_err("ERROR: reversing collation iterator :%s\n",
169 myErrorName(status
));
174 ucol_setOffset(titer
, u_strlen(pattern
), &status
);
175 if (U_FAILURE(status
)) {
176 log_err("ERROR: setting offset in collator :%s\n",
177 myErrorName(status
));
180 result
[i
][0] = ucol_getOffset(titer
);
181 log_verbose("Text iterator set to offset %d\n", result
[i
][0]);
184 ucol_previous(titer
, &status
);
185 result
[i
][1] = ucol_getOffset(titer
);
186 log_verbose("Current offset %d after previous\n", result
[i
][1]);
188 /* Add one to index */
189 log_verbose("Adding one to current offset...\n");
190 ucol_setOffset(titer
, ucol_getOffset(titer
) + 1, &status
);
191 if (U_FAILURE(status
)) {
192 log_err("ERROR: setting offset in collator :%s\n",
193 myErrorName(status
));
196 result
[i
][2] = ucol_getOffset(titer
);
197 log_verbose("Current offset in text = %d\n", result
[i
][2]);
198 ucol_closeElements(pitr
);
199 ucol_closeElements(titer
);
203 if (uprv_memcmp(result
[0], result
[1], 3) != 0 ||
204 uprv_memcmp(result
[1], result
[2], 3) != 0) {
205 log_err("ERROR: Different locales have different offsets at the same character\n");
213 * Test for CollationElementIterator previous and next for the whole set of
214 * unicode characters.
216 static void TestUnicodeChar()
220 UCollationElements
*iter
;
221 UErrorCode status
= U_ZERO_ERROR
;
225 en_us
= ucol_open("en_US", &status
);
226 if (U_FAILURE(status
)){
227 log_err_status(status
, "ERROR: in creation of collation data using ucol_open()\n %s\n",
228 myErrorName(status
));
232 for (codepoint
= 1; codepoint
< 0xFFFE;)
236 while (codepoint
% 0xFF != 0)
238 if (u_isdefined(codepoint
))
239 *(test
++) = codepoint
;
243 if (u_isdefined(codepoint
))
244 *(test
++) = codepoint
;
246 if (codepoint
!= 0xFFFF)
250 iter
=ucol_openElements(en_us
, source
, u_strlen(source
), &status
);
251 if(U_FAILURE(status
)){
252 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
253 myErrorName(status
));
257 /* A basic test to see if it's working at all */
258 log_verbose("codepoint testing %x\n", codepoint
);
260 ucol_closeElements(iter
);
262 /* null termination test */
263 iter
=ucol_openElements(en_us
, source
, -1, &status
);
264 if(U_FAILURE(status
)){
265 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
266 myErrorName(status
));
270 /* A basic test to see if it's working at all */
272 ucol_closeElements(iter
);
279 * Test for CollationElementIterator previous and next for the whole set of
280 * unicode characters with normalization on.
282 static void TestNormalizedUnicodeChar()
286 UCollationElements
*iter
;
287 UErrorCode status
= U_ZERO_ERROR
;
291 /* thai should have normalization on */
292 th_th
= ucol_open("th_TH", &status
);
293 if (U_FAILURE(status
)){
294 log_err_status(status
, "ERROR: in creation of thai collation using ucol_open()\n %s\n",
295 myErrorName(status
));
299 for (codepoint
= 1; codepoint
< 0xFFFE;)
303 while (codepoint
% 0xFF != 0)
305 if (u_isdefined(codepoint
))
306 *(test
++) = codepoint
;
310 if (u_isdefined(codepoint
))
311 *(test
++) = codepoint
;
313 if (codepoint
!= 0xFFFF)
317 iter
=ucol_openElements(th_th
, source
, u_strlen(source
), &status
);
318 if(U_FAILURE(status
)){
319 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
320 myErrorName(status
));
326 ucol_closeElements(iter
);
328 iter
=ucol_openElements(th_th
, source
, -1, &status
);
329 if(U_FAILURE(status
)){
330 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
331 myErrorName(status
));
337 ucol_closeElements(iter
);
344 * Test the incremental normalization
346 static void TestNormalization()
348 UErrorCode status
= U_ZERO_ERROR
;
350 "&a < \\u0300\\u0315 < A\\u0300\\u0315 < \\u0316\\u0315B < \\u0316\\u0300\\u0315";
353 int rulelen
= u_unescape(str
, rule
, 50);
355 const char *testdata
[] =
356 {"\\u1ED9", "o\\u0323\\u0302",
357 "\\u0300\\u0315", "\\u0315\\u0300",
358 "A\\u0300\\u0315B", "A\\u0315\\u0300B",
359 "A\\u0316\\u0315B", "A\\u0315\\u0316B",
360 "\\u0316\\u0300\\u0315", "\\u0315\\u0300\\u0316",
361 "A\\u0316\\u0300\\u0315B", "A\\u0315\\u0300\\u0316B",
362 "\\u0316\\u0315\\u0300", "A\\u0316\\u0315\\u0300B"};
365 UCollationElements
*iter
;
367 coll
= ucol_openRules(rule
, rulelen
, UCOL_ON
, UCOL_TERTIARY
, NULL
, &status
);
368 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
369 if (U_FAILURE(status
)){
370 log_err_status(status
, "ERROR: in creation of collator using ucol_openRules()\n %s\n",
371 myErrorName(status
));
375 srclen
= u_unescape(testdata
[0], source
, 10);
376 iter
= ucol_openElements(coll
, source
, srclen
, &status
);
378 ucol_closeElements(iter
);
380 srclen
= u_unescape(testdata
[1], source
, 10);
381 iter
= ucol_openElements(coll
, source
, srclen
, &status
);
383 ucol_closeElements(iter
);
386 srclen
= u_unescape(testdata
[count
], source
, 10);
387 iter
= ucol_openElements(coll
, source
, srclen
, &status
);
389 if (U_FAILURE(status
)){
390 log_err("ERROR: in creation of collator element iterator\n %s\n",
391 myErrorName(status
));
395 ucol_closeElements(iter
);
397 iter
= ucol_openElements(coll
, source
, -1, &status
);
399 if (U_FAILURE(status
)){
400 log_err("ERROR: in creation of collator element iterator\n %s\n",
401 myErrorName(status
));
405 ucol_closeElements(iter
);
412 * Test for CollationElementIterator.previous()
414 * @bug 4108758 - Make sure it works with contracting characters
417 static void TestPrevious()
419 UCollator
*coll
=NULL
;
422 UCollator
*c1
, *c2
, *c3
;
423 UCollationElements
*iter
;
424 UErrorCode status
= U_ZERO_ERROR
;
428 u_uastrcpy(test1
, "What subset of all possible test cases?");
429 u_uastrcpy(test2
, "has the highest probability of detecting");
430 coll
= ucol_open("en_US", &status
);
432 iter
=ucol_openElements(coll
, test1
, u_strlen(test1
), &status
);
433 log_verbose("English locale testing back and forth\n");
434 if(U_FAILURE(status
)){
435 log_err_status(status
, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
436 myErrorName(status
));
440 /* A basic test to see if it's working at all */
442 ucol_closeElements(iter
);
445 /* Test with a contracting character sequence */
446 u_uastrcpy(rule
, "&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH");
447 c1
= ucol_openRules(rule
, u_strlen(rule
), UCOL_OFF
, UCOL_DEFAULT_STRENGTH
, NULL
, &status
);
449 log_verbose("Contraction rule testing back and forth with no normalization\n");
451 if (c1
== NULL
|| U_FAILURE(status
))
453 log_err("Couldn't create a RuleBasedCollator with a contracting sequence\n %s\n",
454 myErrorName(status
));
457 source
=(UChar
*)malloc(sizeof(UChar
) * 20);
458 u_uastrcpy(source
, "abchdcba");
459 iter
=ucol_openElements(c1
, source
, u_strlen(source
), &status
);
460 if(U_FAILURE(status
)){
461 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
462 myErrorName(status
));
466 ucol_closeElements(iter
);
469 /* Test with an expanding character sequence */
470 u_uastrcpy(rule
, "&a < b < c/abd < d");
471 c2
= ucol_openRules(rule
, u_strlen(rule
), UCOL_OFF
, UCOL_DEFAULT_STRENGTH
, NULL
, &status
);
472 log_verbose("Expansion rule testing back and forth with no normalization\n");
473 if (c2
== NULL
|| U_FAILURE(status
))
475 log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
476 myErrorName(status
));
479 u_uastrcpy(source
, "abcd");
480 iter
=ucol_openElements(c2
, source
, u_strlen(source
), &status
);
481 if(U_FAILURE(status
)){
482 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
483 myErrorName(status
));
487 ucol_closeElements(iter
);
490 u_uastrcpy(rule
, "&a < b < c/aba < d < z < ch");
491 c3
= ucol_openRules(rule
, u_strlen(rule
), UCOL_DEFAULT
, UCOL_DEFAULT_STRENGTH
,NULL
, &status
);
492 log_verbose("Expansion/contraction rule testing back and forth with no normalization\n");
494 if (c3
== NULL
|| U_FAILURE(status
))
496 log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
497 myErrorName(status
));
500 u_uastrcpy(source
, "abcdbchdc");
501 iter
=ucol_openElements(c3
, source
, u_strlen(source
), &status
);
502 if(U_FAILURE(status
)){
503 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
504 myErrorName(status
));
508 ucol_closeElements(iter
);
520 coll
= ucol_open("th_TH", &status
);
521 log_verbose("Thai locale testing back and forth with normalization\n");
522 iter
=ucol_openElements(coll
, source
, u_strlen(source
), &status
);
523 if(U_FAILURE(status
)){
524 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
525 myErrorName(status
));
529 ucol_closeElements(iter
);
539 coll
= ucol_open("ja_JP", &status
);
540 log_verbose("Japanese locale testing back and forth with normalization\n");
541 iter
=ucol_openElements(coll
, source
, u_strlen(source
), &status
);
542 if(U_FAILURE(status
)){
543 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
544 myErrorName(status
));
548 ucol_closeElements(iter
);
555 * Test for getOffset() and setOffset()
557 static void TestOffset()
559 UErrorCode status
= U_ZERO_ERROR
;
560 UCollator
*en_us
=NULL
;
561 UCollationElements
*iter
, *pristine
;
563 OrderAndOffset
*orders
;
564 int32_t orderLength
=0;
569 u_uastrcpy(test1
, "What subset of all possible test cases?");
570 u_uastrcpy(test2
, "has the highest probability of detecting");
571 en_us
= ucol_open("en_US", &status
);
572 log_verbose("Testing getOffset and setOffset for collations\n");
573 iter
= ucol_openElements(en_us
, test1
, u_strlen(test1
), &status
);
574 if(U_FAILURE(status
)){
575 log_err_status(status
, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
576 myErrorName(status
));
581 /* testing boundaries */
582 ucol_setOffset(iter
, 0, &status
);
583 if (U_FAILURE(status
) || ucol_previous(iter
, &status
) != UCOL_NULLORDER
) {
584 log_err("Error: After setting offset to 0, we should be at the end "
585 "of the backwards iteration");
587 ucol_setOffset(iter
, u_strlen(test1
), &status
);
588 if (U_FAILURE(status
) || ucol_next(iter
, &status
) != UCOL_NULLORDER
) {
589 log_err("Error: After setting offset to end of the string, we should "
590 "be at the end of the backwards iteration");
593 /* Run all the way through the iterator, then get the offset */
595 orders
= getOrders(iter
, &orderLength
);
597 offset
= ucol_getOffset(iter
);
599 if (offset
!= u_strlen(test1
))
601 log_err("offset at end != length %d vs %d\n", offset
,
605 /* Now set the offset back to the beginning and see if it works */
606 pristine
=ucol_openElements(en_us
, test1
, u_strlen(test1
), &status
);
607 if(U_FAILURE(status
)){
608 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
609 myErrorName(status
));
613 status
= U_ZERO_ERROR
;
615 ucol_setOffset(iter
, 0, &status
);
616 if (U_FAILURE(status
))
618 log_err("setOffset failed. %s\n", myErrorName(status
));
622 assertEqual(iter
, pristine
);
625 ucol_closeElements(pristine
);
626 ucol_closeElements(iter
);
629 /* testing offsets in normalization buffer */
635 ucol_setAttribute(en_us
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
636 iter
= ucol_openElements(en_us
, test1
, 4, &status
);
637 if(U_FAILURE(status
)){
638 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
639 myErrorName(status
));
645 while (ucol_next(iter
, &status
) != UCOL_NULLORDER
&&
649 if (ucol_getOffset(iter
) != 1) {
650 log_err("ERROR: Offset of iteration should be 1\n");
654 if (ucol_getOffset(iter
) != 4) {
655 log_err("ERROR: Offset of iteration should be 4\n");
659 if (ucol_getOffset(iter
) != 3) {
660 log_err("ERROR: Offset of iteration should be 3\n");
668 while (ucol_previous(iter
, &status
) != UCOL_NULLORDER
&&
673 if (ucol_getOffset(iter
) != 3) {
674 log_err("ERROR: Offset of iteration should be 3\n");
678 if (ucol_getOffset(iter
) != 1) {
679 log_err("ERROR: Offset of iteration should be 1\n");
683 if (ucol_getOffset(iter
) != 0) {
684 log_err("ERROR: Offset of iteration should be 0\n");
690 if(U_FAILURE(status
)){
691 log_err("ERROR: in iterating collation elements %s\n",
692 myErrorName(status
));
695 ucol_closeElements(iter
);
702 static void TestSetText()
705 UErrorCode status
= U_ZERO_ERROR
;
706 UCollator
*en_us
=NULL
;
707 UCollationElements
*iter1
, *iter2
;
711 u_uastrcpy(test1
, "What subset of all possible test cases?");
712 u_uastrcpy(test2
, "has the highest probability of detecting");
713 en_us
= ucol_open("en_US", &status
);
714 log_verbose("testing setText for Collation elements\n");
715 iter1
=ucol_openElements(en_us
, test1
, u_strlen(test1
), &status
);
716 if(U_FAILURE(status
)){
717 log_err_status(status
, "ERROR: in creation of collation element iterator1 using ucol_openElements()\n %s\n",
718 myErrorName(status
));
722 iter2
=ucol_openElements(en_us
, test2
, u_strlen(test2
), &status
);
723 if(U_FAILURE(status
)){
724 log_err("ERROR: in creation of collation element iterator2 using ucol_openElements()\n %s\n",
725 myErrorName(status
));
730 /* Run through the second iterator just to exercise it */
731 c
= ucol_next(iter2
, &status
);
734 while ( ++i
< 10 && (c
!= UCOL_NULLORDER
))
736 if (U_FAILURE(status
))
738 log_err("iter2->next() returned an error. %s\n", myErrorName(status
));
739 ucol_closeElements(iter2
);
740 ucol_closeElements(iter1
);
745 c
= ucol_next(iter2
, &status
);
748 /* Now set it to point to the same string as the first iterator */
749 ucol_setText(iter2
, test1
, u_strlen(test1
), &status
);
750 if (U_FAILURE(status
))
752 log_err("call to iter2->setText(test1) failed. %s\n", myErrorName(status
));
756 assertEqual(iter1
, iter2
);
759 /* Now set it to point to a null string with fake length*/
760 ucol_setText(iter2
, NULL
, 2, &status
);
761 if (status
!= U_ILLEGAL_ARGUMENT_ERROR
)
763 log_err("call to iter2->setText(null, 2) should yield an illegal-argument-error - %s\n",
764 myErrorName(status
));
767 ucol_closeElements(iter2
);
768 ucol_closeElements(iter1
);
773 * Test for getMaxExpansion()
775 static void TestMaxExpansion()
777 UErrorCode status
= U_ZERO_ERROR
;
778 UCollator
*coll
;/*= ucol_open("en_US", &status);*/
780 UChar32 unassigned
= 0xEFFFD;
781 UChar supplementary
[2];
782 uint32_t stringOffset
= 0;
783 UBool isError
= FALSE
;
785 UCollationElements
*iter
;/*= ucol_openElements(coll, &ch, 1, &status);*/
786 uint32_t temporder
= 0;
789 u_uastrcpy(rule
, "&a < ab < c/aba < d < z < ch");
790 coll
= ucol_openRules(rule
, u_strlen(rule
), UCOL_DEFAULT
,
791 UCOL_DEFAULT_STRENGTH
,NULL
, &status
);
792 if(U_SUCCESS(status
) && coll
) {
793 iter
= ucol_openElements(coll
, &ch
, 1, &status
);
795 while (ch
< 0xFFFF && U_SUCCESS(status
)) {
802 ucol_setText(iter
, &ch
, 1, &status
);
803 order
= ucol_previous(iter
, &status
);
805 /* thai management */
807 order
= ucol_previous(iter
, &status
);
809 while (U_SUCCESS(status
) &&
810 ucol_previous(iter
, &status
) != UCOL_NULLORDER
) {
814 size
= ucol_getMaxExpansion(iter
, order
);
815 if (U_FAILURE(status
) || size
< count
) {
816 log_err("Failure at codepoint %d, maximum expansion count < %d\n",
821 /* testing for exact max expansion */
826 ucol_setText(iter
, &ch
, 1, &status
);
827 order
= ucol_previous(iter
, &status
);
828 size
= ucol_getMaxExpansion(iter
, order
);
829 if (U_FAILURE(status
) || size
!= 1) {
830 log_err("Failure at codepoint %d, maximum expansion count < %d\n",
837 ucol_setText(iter
, &ch
, 1, &status
);
838 temporder
= ucol_previous(iter
, &status
);
840 if (U_FAILURE(status
) || ucol_getMaxExpansion(iter
, temporder
) != 3) {
841 log_err("Failure at codepoint %d, maximum expansion count != %d\n",
846 ucol_setText(iter
, &ch
, 1, &status
);
847 temporder
= ucol_previous(iter
, &status
);
849 if (U_FAILURE(status
) || ucol_getMaxExpansion(iter
, temporder
) != 1) {
850 log_err("Failure at codepoint %d, maximum expansion count != %d\n",
854 U16_APPEND(supplementary
, stringOffset
, 2, unassigned
, isError
);
855 (void)isError
; /* Suppress set but not used warning. */
856 ucol_setText(iter
, supplementary
, 2, &status
);
857 sorder
= ucol_previous(iter
, &status
);
859 if (U_FAILURE(status
) || ucol_getMaxExpansion(iter
, sorder
) != 2) {
860 log_err("Failure at codepoint %d, maximum expansion count < %d\n",
867 ucol_setText(iter
, &ch
, 1, &status
);
868 temporder
= ucol_previous(iter
, &status
);
869 if (U_FAILURE(status
) || ucol_getMaxExpansion(iter
, temporder
) > 3) {
870 log_err("Failure at codepoint %d, maximum expansion count > %d\n",
874 ucol_closeElements(iter
);
877 /* testing special jamo &a<\u1160 */
889 coll
= ucol_openRules(rule
, u_strlen(rule
), UCOL_DEFAULT
,
890 UCOL_DEFAULT_STRENGTH
,NULL
, &status
);
891 iter
= ucol_openElements(coll
, &ch
, 1, &status
);
893 temporder
= ucol_previous(iter
, &status
);
894 if (U_FAILURE(status
) || ucol_getMaxExpansion(iter
, temporder
) != 6) {
895 log_err("Failure at codepoint %d, maximum expansion count > %d\n",
899 ucol_closeElements(iter
);
902 log_err_status(status
, "Couldn't open collator -> %s\n", u_errorName(status
));
908 static void assertEqual(UCollationElements
*i1
, UCollationElements
*i2
)
912 UErrorCode status
= U_ZERO_ERROR
;
916 c1
= ucol_next(i1
, &status
);
917 c2
= ucol_next(i2
, &status
);
921 log_err("Error in iteration %d assetEqual between\n %d and %d, they are not equal\n", count
, c1
, c2
);
927 while (c1
!= UCOL_NULLORDER
);
931 * Testing iterators with extremely small buffers
933 static void TestSmallBuffer()
935 UErrorCode status
= U_ZERO_ERROR
;
937 UCollationElements
*testiter
,
940 OrderAndOffset
*testorders
,
944 UChar str
[] = {0x300, 0x31A, 0};
946 creating a long string of decomposable characters,
947 since by default the writable buffer is of size 256
949 while (count
< 500) {
950 if ((count
& 1) == 0) {
951 teststr
[count
++] = 0x300;
954 teststr
[count
++] = 0x31A;
958 coll
= ucol_open("th_TH", &status
);
959 if(U_SUCCESS(status
) && coll
) {
960 testiter
= ucol_openElements(coll
, teststr
, 500, &status
);
961 iter
= ucol_openElements(coll
, str
, 2, &status
);
963 orders
= getOrders(iter
, &count
);
965 log_err("Error collation elements size is not 2 for \\u0300\\u031A\n");
969 this will rearrange the string data to 250 characters of 0x300 first then
970 250 characters of 0x031A
972 testorders
= getOrders(testiter
, &count
);
975 log_err("Error decomposition does not give the right sized collation elements\n");
979 /* UCA collation element for 0x0F76 */
980 if ((count
> 250 && testorders
[-- count
].order
!= orders
[1].order
) ||
981 (count
<= 250 && testorders
[-- count
].order
!= orders
[0].order
)) {
982 log_err("Error decomposition does not give the right collation element at %d count\n", count
);
990 ucol_reset(testiter
);
992 /* ensures closing of elements done properly to clear writable buffer */
993 ucol_next(testiter
, &status
);
994 ucol_next(testiter
, &status
);
995 ucol_closeElements(testiter
);
996 ucol_closeElements(iter
);
999 log_err_status(status
, "Couldn't open collator -> %s\n", u_errorName(status
));
1004 * Testing the discontigous contractions
1006 static void TestDiscontiguos() {
1007 const char *rulestr
=
1008 "&z < AB < X\\u0300 < ABC < X\\u0300\\u0315";
1010 int rulelen
= u_unescape(rulestr
, rule
, 50);
1011 const char *src
[] = {
1012 "ADB", "ADBC", "A\\u0315B", "A\\u0315BC",
1013 /* base character blocked */
1014 "XD\\u0300", "XD\\u0300\\u0315",
1015 /* non blocking combining character */
1016 "X\\u0319\\u0300", "X\\u0319\\u0300\\u0315",
1017 /* blocking combining character */
1018 "X\\u0314\\u0300", "X\\u0314\\u0300\\u0315",
1019 /* contraction prefix */
1020 "ABDC", "AB\\u0315C","X\\u0300D\\u0315", "X\\u0300\\u0319\\u0315",
1021 "X\\u0300\\u031A\\u0315",
1022 /* ends not with a contraction character */
1023 "X\\u0319\\u0300D", "X\\u0319\\u0300\\u0315D", "X\\u0300D\\u0315D",
1024 "X\\u0300\\u0319\\u0315D", "X\\u0300\\u031A\\u0315D"
1026 const char *tgt
[] = {
1027 /* non blocking combining character */
1028 "A D B", "A D BC", "A \\u0315 B", "A \\u0315 BC",
1029 /* base character blocked */
1030 "X D \\u0300", "X D \\u0300\\u0315",
1031 /* non blocking combining character */
1032 "X\\u0300 \\u0319", "X\\u0300\\u0315 \\u0319",
1033 /* blocking combining character */
1034 "X \\u0314 \\u0300", "X \\u0314 \\u0300\\u0315",
1035 /* contraction prefix */
1036 "AB DC", "AB \\u0315 C","X\\u0300 D \\u0315", "X\\u0300\\u0315 \\u0319",
1037 "X\\u0300 \\u031A \\u0315",
1038 /* ends not with a contraction character */
1039 "X\\u0300 \\u0319D", "X\\u0300\\u0315 \\u0319D", "X\\u0300 D\\u0315D",
1040 "X\\u0300\\u0315 \\u0319D", "X\\u0300 \\u031A\\u0315D"
1044 UErrorCode status
= U_ZERO_ERROR
;
1046 UCollationElements
*iter
;
1047 UCollationElements
*resultiter
;
1049 coll
= ucol_openRules(rule
, rulelen
, UCOL_OFF
, UCOL_DEFAULT_STRENGTH
,NULL
, &status
);
1050 iter
= ucol_openElements(coll
, rule
, 1, &status
);
1051 resultiter
= ucol_openElements(coll
, rule
, 1, &status
);
1053 if (U_FAILURE(status
)) {
1054 log_err_status(status
, "Error opening collation rules -> %s\n", u_errorName(status
));
1058 while (count
< size
) {
1061 int strLen
= u_unescape(src
[count
], str
, 20);
1064 ucol_setText(iter
, str
, strLen
, &status
);
1065 if (U_FAILURE(status
)) {
1066 log_err("Error opening collation iterator\n");
1070 u_unescape(tgt
[count
], tstr
, 20);
1073 log_verbose("count %d\n", count
);
1077 UChar
*e
= u_strchr(s
, 0x20);
1081 ucol_setText(resultiter
, s
, (int32_t)(e
- s
), &status
);
1082 ce
= ucol_next(resultiter
, &status
);
1083 if (U_FAILURE(status
)) {
1084 log_err("Error manipulating collation iterator\n");
1087 while (ce
!= UCOL_NULLORDER
) {
1088 if (ce
!= (uint32_t)ucol_next(iter
, &status
) ||
1089 U_FAILURE(status
)) {
1090 log_err("Discontiguos contraction test mismatch\n");
1093 ce
= ucol_next(resultiter
, &status
);
1094 if (U_FAILURE(status
)) {
1095 log_err("Error getting next collation element\n");
1108 ucol_closeElements(resultiter
);
1109 ucol_closeElements(iter
);
1114 * TestSearchCollatorElements tests iterator behavior (forwards and backwards) with
1115 * normalization on AND jamo tailoring, among other things.
1117 * Note: This test is sensitive to changes of the root collator,
1118 * for example whether the ae-ligature maps to three CEs (as in the DUCET)
1119 * or to two CEs (as in the CLDR 24 FractionalUCA.txt).
1120 * It is also sensitive to how those CEs map to the iterator's 32-bit CE encoding.
1121 * For example, the DUCET's artificial secondary CE in the ae-ligature
1122 * may map to two 32-bit iterator CEs (as it did until ICU 52).
1124 static const UChar tsceText
[] = { /* Nothing in here should be ignorable */
1125 0x0020, 0xAC00, /* simple LV Hangul */
1126 0x0020, 0xAC01, /* simple LVT Hangul */
1127 0x0020, 0xAC0F, /* LVTT, last jamo expands for search */
1128 0x0020, 0xAFFF, /* LLVVVTT, every jamo expands for search */
1129 0x0020, 0x1100, 0x1161, 0x11A8, /* 0xAC01 as conjoining jamo */
1130 0x0020, 0x3131, 0x314F, 0x3131, /* 0xAC01 as compatibility jamo */
1131 0x0020, 0x1100, 0x1161, 0x11B6, /* 0xAC0F as conjoining jamo; last expands for search */
1132 0x0020, 0x1101, 0x1170, 0x11B6, /* 0xAFFF as conjoining jamo; all expand for search */
1133 0x0020, 0x00E6, /* small letter ae, expands */
1134 0x0020, 0x1E4D, /* small letter o with tilde and acute, decomposes */
1137 enum { kLen_tsceText
= UPRV_LENGTHOF(tsceText
) };
1139 static const int32_t rootStandardOffsets
[] = {
1148 24, 25,26, /* plus another 1-2 offset=26 if ae-ligature maps to three CEs */
1153 enum { kLen_rootStandardOffsets
= UPRV_LENGTHOF(rootStandardOffsets
) };
1155 static const int32_t rootSearchOffsets
[] = {
1163 20, 21,22,22,23,23,23,24,
1164 24, 25,26, /* plus another 1-2 offset=26 if ae-ligature maps to three CEs */
1169 enum { kLen_rootSearchOffsets
= UPRV_LENGTHOF(rootSearchOffsets
) };
1172 const char * locale
;
1173 const int32_t * offsets
;
1177 static const TSCEItem tsceItems
[] = {
1178 { "root", rootStandardOffsets
, kLen_rootStandardOffsets
},
1179 { "root@collation=search", rootSearchOffsets
, kLen_rootSearchOffsets
},
1183 static void TestSearchCollatorElements(void)
1185 const TSCEItem
* tsceItemPtr
;
1186 for (tsceItemPtr
= tsceItems
; tsceItemPtr
->locale
!= NULL
; tsceItemPtr
++) {
1187 UErrorCode status
= U_ZERO_ERROR
;
1188 UCollator
* ucol
= ucol_open(tsceItemPtr
->locale
, &status
);
1189 if ( U_SUCCESS(status
) ) {
1190 UCollationElements
* uce
= ucol_openElements(ucol
, tsceText
, kLen_tsceText
, &status
);
1191 if ( U_SUCCESS(status
) ) {
1192 int32_t offset
, element
;
1193 const int32_t * nextOffsetPtr
;
1194 const int32_t * limitOffsetPtr
;
1196 nextOffsetPtr
= tsceItemPtr
->offsets
;
1197 limitOffsetPtr
= tsceItemPtr
->offsets
+ tsceItemPtr
->offsetsLen
;
1199 offset
= ucol_getOffset(uce
);
1200 element
= ucol_next(uce
, &status
);
1201 log_verbose("(%s) offset=%2d ce=%08x\n", tsceItemPtr
->locale
, offset
, element
);
1202 if ( element
== 0 ) {
1203 log_err("error, locale %s, ucol_next returned element 0\n", tsceItemPtr
->locale
);
1205 if ( nextOffsetPtr
< limitOffsetPtr
) {
1206 if (offset
!= *nextOffsetPtr
) {
1207 log_err("error, locale %s, expected ucol_next -> ucol_getOffset %d, got %d\n",
1208 tsceItemPtr
->locale
, *nextOffsetPtr
, offset
);
1209 nextOffsetPtr
= limitOffsetPtr
;
1214 log_err("error, locale %s, ucol_next returned more elements than expected\n", tsceItemPtr
->locale
);
1216 } while ( U_SUCCESS(status
) && element
!= UCOL_NULLORDER
);
1217 if ( nextOffsetPtr
< limitOffsetPtr
) {
1218 log_err("error, locale %s, ucol_next returned fewer elements than expected\n", tsceItemPtr
->locale
);
1221 ucol_setOffset(uce
, kLen_tsceText
, &status
);
1222 status
= U_ZERO_ERROR
;
1223 nextOffsetPtr
= tsceItemPtr
->offsets
+ tsceItemPtr
->offsetsLen
;
1224 limitOffsetPtr
= tsceItemPtr
->offsets
;
1226 offset
= ucol_getOffset(uce
);
1227 element
= ucol_previous(uce
, &status
);
1228 if ( element
== 0 ) {
1229 log_err("error, locale %s, ucol_previous returned element 0\n", tsceItemPtr
->locale
);
1231 if ( nextOffsetPtr
> limitOffsetPtr
) {
1233 if (offset
!= *nextOffsetPtr
) {
1234 log_err("error, locale %s, expected ucol_previous -> ucol_getOffset %d, got %d\n",
1235 tsceItemPtr
->locale
, *nextOffsetPtr
, offset
);
1236 nextOffsetPtr
= limitOffsetPtr
;
1240 log_err("error, locale %s, ucol_previous returned more elements than expected\n", tsceItemPtr
->locale
);
1242 } while ( U_SUCCESS(status
) && element
!= UCOL_NULLORDER
);
1243 if ( nextOffsetPtr
> limitOffsetPtr
) {
1244 log_err("error, locale %s, ucol_previous returned fewer elements than expected\n", tsceItemPtr
->locale
);
1247 ucol_closeElements(uce
);
1249 log_err("error, locale %s, ucol_openElements failed: %s\n", tsceItemPtr
->locale
, u_errorName(status
) );
1253 log_data_err("error, locale %s, ucol_open failed: %s\n", tsceItemPtr
->locale
, u_errorName(status
) );
1258 #endif /* #if !UCONFIG_NO_COLLATION */