1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
5 * Copyright (c) 1997-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8 /********************************************************************************
12 * Modification History:
13 * Date Name Description
14 * Madhu Katragadda Ported for C API
15 * 02/19/01 synwee Modified test case for new collation iterator
16 *********************************************************************************/
18 * Collation Iterator tests.
19 * (Let me reiterate my position...)
22 #include "unicode/utypes.h"
24 #if !UCONFIG_NO_COLLATION
26 #include "unicode/ucol.h"
27 #include "unicode/ucoleitr.h"
28 #include "unicode/uloc.h"
29 #include "unicode/uchar.h"
30 #include "unicode/ustring.h"
31 #include "unicode/utf16.h"
32 #include "unicode/putil.h"
44 extern uint8_t ucol_uprv_getCaseBits(const UChar
*, uint32_t, UErrorCode
*);
46 void addCollIterTest(TestNode
** root
)
48 addTest(root
, &TestPrevious
, "tscoll/citertst/TestPrevious");
49 addTest(root
, &TestOffset
, "tscoll/citertst/TestOffset");
50 addTest(root
, &TestSetText
, "tscoll/citertst/TestSetText");
51 addTest(root
, &TestMaxExpansion
, "tscoll/citertst/TestMaxExpansion");
52 addTest(root
, &TestUnicodeChar
, "tscoll/citertst/TestUnicodeChar");
53 addTest(root
, &TestNormalizedUnicodeChar
,
54 "tscoll/citertst/TestNormalizedUnicodeChar");
55 addTest(root
, &TestNormalization
, "tscoll/citertst/TestNormalization");
56 addTest(root
, &TestBug672
, "tscoll/citertst/TestBug672");
57 addTest(root
, &TestBug672Normalize
, "tscoll/citertst/TestBug672Normalize");
58 addTest(root
, &TestSmallBuffer
, "tscoll/citertst/TestSmallBuffer");
59 addTest(root
, &TestDiscontiguos
, "tscoll/citertst/TestDiscontiguos");
60 addTest(root
, &TestSearchCollatorElements
, "tscoll/citertst/TestSearchCollatorElements");
63 /* The locales we support */
65 static const char * LOCALES
[] = {"en_AU", "en_BE", "en_CA"};
67 static void TestBug672() {
68 UErrorCode status
= U_ZERO_ERROR
;
74 u_uastrcpy(pattern
, "resume");
75 u_uastrcpy(text
, "Time to resume updating my resume.");
77 for (i
= 0; i
< 3; ++ i
) {
78 UCollator
*coll
= ucol_open(LOCALES
[i
], &status
);
79 UCollationElements
*pitr
= ucol_openElements(coll
, pattern
, -1,
81 UCollationElements
*titer
= ucol_openElements(coll
, text
, -1,
83 if (U_FAILURE(status
)) {
84 log_err_status(status
, "ERROR: in creation of either the collator or the collation iterator :%s\n",
89 log_verbose("locale tested %s\n", LOCALES
[i
]);
91 while (ucol_next(pitr
, &status
) != UCOL_NULLORDER
&&
94 if (U_FAILURE(status
)) {
95 log_err("ERROR: reversing collation iterator :%s\n",
101 ucol_setOffset(titer
, u_strlen(pattern
), &status
);
102 if (U_FAILURE(status
)) {
103 log_err("ERROR: setting offset in collator :%s\n",
104 myErrorName(status
));
107 result
[i
][0] = ucol_getOffset(titer
);
108 log_verbose("Text iterator set to offset %d\n", result
[i
][0]);
111 ucol_previous(titer
, &status
);
112 result
[i
][1] = ucol_getOffset(titer
);
113 log_verbose("Current offset %d after previous\n", result
[i
][1]);
115 /* Add one to index */
116 log_verbose("Adding one to current offset...\n");
117 ucol_setOffset(titer
, ucol_getOffset(titer
) + 1, &status
);
118 if (U_FAILURE(status
)) {
119 log_err("ERROR: setting offset in collator :%s\n",
120 myErrorName(status
));
123 result
[i
][2] = ucol_getOffset(titer
);
124 log_verbose("Current offset in text = %d\n", result
[i
][2]);
125 ucol_closeElements(pitr
);
126 ucol_closeElements(titer
);
130 if (uprv_memcmp(result
[0], result
[1], 3) != 0 ||
131 uprv_memcmp(result
[1], result
[2], 3) != 0) {
132 log_err("ERROR: Different locales have different offsets at the same character\n");
138 /* Running this test with normalization enabled showed up a bug in the incremental
139 normalization code. */
140 static void TestBug672Normalize() {
141 UErrorCode status
= U_ZERO_ERROR
;
147 u_uastrcpy(pattern
, "resume");
148 u_uastrcpy(text
, "Time to resume updating my resume.");
150 for (i
= 0; i
< 3; ++ i
) {
151 UCollator
*coll
= ucol_open(LOCALES
[i
], &status
);
152 UCollationElements
*pitr
= NULL
;
153 UCollationElements
*titer
= NULL
;
155 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
157 pitr
= ucol_openElements(coll
, pattern
, -1, &status
);
158 titer
= ucol_openElements(coll
, text
, -1, &status
);
159 if (U_FAILURE(status
)) {
160 log_err_status(status
, "ERROR: in creation of either the collator or the collation iterator :%s\n",
161 myErrorName(status
));
165 log_verbose("locale tested %s\n", LOCALES
[i
]);
167 while (ucol_next(pitr
, &status
) != UCOL_NULLORDER
&&
170 if (U_FAILURE(status
)) {
171 log_err("ERROR: reversing collation iterator :%s\n",
172 myErrorName(status
));
177 ucol_setOffset(titer
, u_strlen(pattern
), &status
);
178 if (U_FAILURE(status
)) {
179 log_err("ERROR: setting offset in collator :%s\n",
180 myErrorName(status
));
183 result
[i
][0] = ucol_getOffset(titer
);
184 log_verbose("Text iterator set to offset %d\n", result
[i
][0]);
187 ucol_previous(titer
, &status
);
188 result
[i
][1] = ucol_getOffset(titer
);
189 log_verbose("Current offset %d after previous\n", result
[i
][1]);
191 /* Add one to index */
192 log_verbose("Adding one to current offset...\n");
193 ucol_setOffset(titer
, ucol_getOffset(titer
) + 1, &status
);
194 if (U_FAILURE(status
)) {
195 log_err("ERROR: setting offset in collator :%s\n",
196 myErrorName(status
));
199 result
[i
][2] = ucol_getOffset(titer
);
200 log_verbose("Current offset in text = %d\n", result
[i
][2]);
201 ucol_closeElements(pitr
);
202 ucol_closeElements(titer
);
206 if (uprv_memcmp(result
[0], result
[1], 3) != 0 ||
207 uprv_memcmp(result
[1], result
[2], 3) != 0) {
208 log_err("ERROR: Different locales have different offsets at the same character\n");
216 * Test for CollationElementIterator previous and next for the whole set of
217 * unicode characters.
219 static void TestUnicodeChar()
223 UCollationElements
*iter
;
224 UErrorCode status
= U_ZERO_ERROR
;
228 en_us
= ucol_open("en_US", &status
);
229 if (U_FAILURE(status
)){
230 log_err_status(status
, "ERROR: in creation of collation data using ucol_open()\n %s\n",
231 myErrorName(status
));
235 for (codepoint
= 1; codepoint
< 0xFFFE;)
239 while (codepoint
% 0xFF != 0)
241 if (u_isdefined(codepoint
))
242 *(test
++) = codepoint
;
246 if (u_isdefined(codepoint
))
247 *(test
++) = codepoint
;
249 if (codepoint
!= 0xFFFF)
253 iter
=ucol_openElements(en_us
, source
, u_strlen(source
), &status
);
254 if(U_FAILURE(status
)){
255 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
256 myErrorName(status
));
260 /* A basic test to see if it's working at all */
261 log_verbose("codepoint testing %x\n", codepoint
);
263 ucol_closeElements(iter
);
265 /* null termination test */
266 iter
=ucol_openElements(en_us
, source
, -1, &status
);
267 if(U_FAILURE(status
)){
268 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
269 myErrorName(status
));
273 /* A basic test to see if it's working at all */
275 ucol_closeElements(iter
);
282 * Test for CollationElementIterator previous and next for the whole set of
283 * unicode characters with normalization on.
285 static void TestNormalizedUnicodeChar()
289 UCollationElements
*iter
;
290 UErrorCode status
= U_ZERO_ERROR
;
294 /* thai should have normalization on */
295 th_th
= ucol_open("th_TH", &status
);
296 if (U_FAILURE(status
)){
297 log_err_status(status
, "ERROR: in creation of thai collation using ucol_open()\n %s\n",
298 myErrorName(status
));
302 for (codepoint
= 1; codepoint
< 0xFFFE;)
306 while (codepoint
% 0xFF != 0)
308 if (u_isdefined(codepoint
))
309 *(test
++) = codepoint
;
313 if (u_isdefined(codepoint
))
314 *(test
++) = codepoint
;
316 if (codepoint
!= 0xFFFF)
320 iter
=ucol_openElements(th_th
, source
, u_strlen(source
), &status
);
321 if(U_FAILURE(status
)){
322 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
323 myErrorName(status
));
329 ucol_closeElements(iter
);
331 iter
=ucol_openElements(th_th
, source
, -1, &status
);
332 if(U_FAILURE(status
)){
333 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
334 myErrorName(status
));
340 ucol_closeElements(iter
);
347 * Test the incremental normalization
349 static void TestNormalization()
351 UErrorCode status
= U_ZERO_ERROR
;
353 "&a < \\u0300\\u0315 < A\\u0300\\u0315 < \\u0316\\u0315B < \\u0316\\u0300\\u0315";
356 int rulelen
= u_unescape(str
, rule
, 50);
358 const char *testdata
[] =
359 {"\\u1ED9", "o\\u0323\\u0302",
360 "\\u0300\\u0315", "\\u0315\\u0300",
361 "A\\u0300\\u0315B", "A\\u0315\\u0300B",
362 "A\\u0316\\u0315B", "A\\u0315\\u0316B",
363 "\\u0316\\u0300\\u0315", "\\u0315\\u0300\\u0316",
364 "A\\u0316\\u0300\\u0315B", "A\\u0315\\u0300\\u0316B",
365 "\\u0316\\u0315\\u0300", "A\\u0316\\u0315\\u0300B"};
368 UCollationElements
*iter
;
370 coll
= ucol_openRules(rule
, rulelen
, UCOL_ON
, UCOL_TERTIARY
, NULL
, &status
);
371 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
372 if (U_FAILURE(status
)){
373 log_err_status(status
, "ERROR: in creation of collator using ucol_openRules()\n %s\n",
374 myErrorName(status
));
378 srclen
= u_unescape(testdata
[0], source
, 10);
379 iter
= ucol_openElements(coll
, source
, srclen
, &status
);
381 ucol_closeElements(iter
);
383 srclen
= u_unescape(testdata
[1], source
, 10);
384 iter
= ucol_openElements(coll
, source
, srclen
, &status
);
386 ucol_closeElements(iter
);
389 srclen
= u_unescape(testdata
[count
], source
, 10);
390 iter
= ucol_openElements(coll
, source
, srclen
, &status
);
392 if (U_FAILURE(status
)){
393 log_err("ERROR: in creation of collator element iterator\n %s\n",
394 myErrorName(status
));
398 ucol_closeElements(iter
);
400 iter
= ucol_openElements(coll
, source
, -1, &status
);
402 if (U_FAILURE(status
)){
403 log_err("ERROR: in creation of collator element iterator\n %s\n",
404 myErrorName(status
));
408 ucol_closeElements(iter
);
415 * Test for CollationElementIterator.previous()
417 * @bug 4108758 - Make sure it works with contracting characters
420 static void TestPrevious()
422 UCollator
*coll
=NULL
;
425 UCollator
*c1
, *c2
, *c3
;
426 UCollationElements
*iter
;
427 UErrorCode status
= U_ZERO_ERROR
;
431 u_uastrcpy(test1
, "What subset of all possible test cases?");
432 u_uastrcpy(test2
, "has the highest probability of detecting");
433 coll
= ucol_open("en_US", &status
);
435 iter
=ucol_openElements(coll
, test1
, u_strlen(test1
), &status
);
436 log_verbose("English locale testing back and forth\n");
437 if(U_FAILURE(status
)){
438 log_err_status(status
, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
439 myErrorName(status
));
443 /* A basic test to see if it's working at all */
445 ucol_closeElements(iter
);
448 /* Test with a contracting character sequence */
449 u_uastrcpy(rule
, "&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH");
450 c1
= ucol_openRules(rule
, u_strlen(rule
), UCOL_OFF
, UCOL_DEFAULT_STRENGTH
, NULL
, &status
);
452 log_verbose("Contraction rule testing back and forth with no normalization\n");
454 if (c1
== NULL
|| U_FAILURE(status
))
456 log_err("Couldn't create a RuleBasedCollator with a contracting sequence\n %s\n",
457 myErrorName(status
));
460 source
=(UChar
*)malloc(sizeof(UChar
) * 20);
461 u_uastrcpy(source
, "abchdcba");
462 iter
=ucol_openElements(c1
, source
, u_strlen(source
), &status
);
463 if(U_FAILURE(status
)){
464 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
465 myErrorName(status
));
469 ucol_closeElements(iter
);
472 /* Test with an expanding character sequence */
473 u_uastrcpy(rule
, "&a < b < c/abd < d");
474 c2
= ucol_openRules(rule
, u_strlen(rule
), UCOL_OFF
, UCOL_DEFAULT_STRENGTH
, NULL
, &status
);
475 log_verbose("Expansion rule testing back and forth with no normalization\n");
476 if (c2
== NULL
|| U_FAILURE(status
))
478 log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
479 myErrorName(status
));
482 u_uastrcpy(source
, "abcd");
483 iter
=ucol_openElements(c2
, source
, u_strlen(source
), &status
);
484 if(U_FAILURE(status
)){
485 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
486 myErrorName(status
));
490 ucol_closeElements(iter
);
493 u_uastrcpy(rule
, "&a < b < c/aba < d < z < ch");
494 c3
= ucol_openRules(rule
, u_strlen(rule
), UCOL_DEFAULT
, UCOL_DEFAULT_STRENGTH
,NULL
, &status
);
495 log_verbose("Expansion/contraction rule testing back and forth with no normalization\n");
497 if (c3
== NULL
|| U_FAILURE(status
))
499 log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
500 myErrorName(status
));
503 u_uastrcpy(source
, "abcdbchdc");
504 iter
=ucol_openElements(c3
, source
, u_strlen(source
), &status
);
505 if(U_FAILURE(status
)){
506 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
507 myErrorName(status
));
511 ucol_closeElements(iter
);
523 coll
= ucol_open("th_TH", &status
);
524 log_verbose("Thai locale testing back and forth with normalization\n");
525 iter
=ucol_openElements(coll
, source
, u_strlen(source
), &status
);
526 if(U_FAILURE(status
)){
527 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
528 myErrorName(status
));
532 ucol_closeElements(iter
);
542 coll
= ucol_open("ja_JP", &status
);
543 log_verbose("Japanese locale testing back and forth with normalization\n");
544 iter
=ucol_openElements(coll
, source
, u_strlen(source
), &status
);
545 if(U_FAILURE(status
)){
546 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
547 myErrorName(status
));
551 ucol_closeElements(iter
);
558 * Test for getOffset() and setOffset()
560 static void TestOffset()
562 UErrorCode status
= U_ZERO_ERROR
;
563 UCollator
*en_us
=NULL
;
564 UCollationElements
*iter
, *pristine
;
566 OrderAndOffset
*orders
;
567 int32_t orderLength
=0;
572 u_uastrcpy(test1
, "What subset of all possible test cases?");
573 u_uastrcpy(test2
, "has the highest probability of detecting");
574 en_us
= ucol_open("en_US", &status
);
575 log_verbose("Testing getOffset and setOffset for collations\n");
576 iter
= ucol_openElements(en_us
, test1
, u_strlen(test1
), &status
);
577 if(U_FAILURE(status
)){
578 log_err_status(status
, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
579 myErrorName(status
));
584 /* testing boundaries */
585 ucol_setOffset(iter
, 0, &status
);
586 if (U_FAILURE(status
) || ucol_previous(iter
, &status
) != UCOL_NULLORDER
) {
587 log_err("Error: After setting offset to 0, we should be at the end "
588 "of the backwards iteration");
590 ucol_setOffset(iter
, u_strlen(test1
), &status
);
591 if (U_FAILURE(status
) || ucol_next(iter
, &status
) != UCOL_NULLORDER
) {
592 log_err("Error: After setting offset to end of the string, we should "
593 "be at the end of the backwards iteration");
596 /* Run all the way through the iterator, then get the offset */
598 orders
= getOrders(iter
, &orderLength
);
600 offset
= ucol_getOffset(iter
);
602 if (offset
!= u_strlen(test1
))
604 log_err("offset at end != length %d vs %d\n", offset
,
608 /* Now set the offset back to the beginning and see if it works */
609 pristine
=ucol_openElements(en_us
, test1
, u_strlen(test1
), &status
);
610 if(U_FAILURE(status
)){
611 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
612 myErrorName(status
));
616 status
= U_ZERO_ERROR
;
618 ucol_setOffset(iter
, 0, &status
);
619 if (U_FAILURE(status
))
621 log_err("setOffset failed. %s\n", myErrorName(status
));
625 assertEqual(iter
, pristine
);
628 ucol_closeElements(pristine
);
629 ucol_closeElements(iter
);
632 /* testing offsets in normalization buffer */
638 ucol_setAttribute(en_us
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
639 iter
= ucol_openElements(en_us
, test1
, 4, &status
);
640 if(U_FAILURE(status
)){
641 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
642 myErrorName(status
));
648 while (ucol_next(iter
, &status
) != UCOL_NULLORDER
&&
652 if (ucol_getOffset(iter
) != 1) {
653 log_err("ERROR: Offset of iteration should be 1\n");
657 if (ucol_getOffset(iter
) != 4) {
658 log_err("ERROR: Offset of iteration should be 4\n");
662 if (ucol_getOffset(iter
) != 3) {
663 log_err("ERROR: Offset of iteration should be 3\n");
671 while (ucol_previous(iter
, &status
) != UCOL_NULLORDER
&&
676 if (ucol_getOffset(iter
) != 3) {
677 log_err("ERROR: Offset of iteration should be 3\n");
681 if (ucol_getOffset(iter
) != 1) {
682 log_err("ERROR: Offset of iteration should be 1\n");
686 if (ucol_getOffset(iter
) != 0) {
687 log_err("ERROR: Offset of iteration should be 0\n");
693 if(U_FAILURE(status
)){
694 log_err("ERROR: in iterating collation elements %s\n",
695 myErrorName(status
));
698 ucol_closeElements(iter
);
705 static void TestSetText()
708 UErrorCode status
= U_ZERO_ERROR
;
709 UCollator
*en_us
=NULL
;
710 UCollationElements
*iter1
, *iter2
;
714 u_uastrcpy(test1
, "What subset of all possible test cases?");
715 u_uastrcpy(test2
, "has the highest probability of detecting");
716 en_us
= ucol_open("en_US", &status
);
717 log_verbose("testing setText for Collation elements\n");
718 iter1
=ucol_openElements(en_us
, test1
, u_strlen(test1
), &status
);
719 if(U_FAILURE(status
)){
720 log_err_status(status
, "ERROR: in creation of collation element iterator1 using ucol_openElements()\n %s\n",
721 myErrorName(status
));
725 iter2
=ucol_openElements(en_us
, test2
, u_strlen(test2
), &status
);
726 if(U_FAILURE(status
)){
727 log_err("ERROR: in creation of collation element iterator2 using ucol_openElements()\n %s\n",
728 myErrorName(status
));
733 /* Run through the second iterator just to exercise it */
734 c
= ucol_next(iter2
, &status
);
737 while ( ++i
< 10 && (c
!= UCOL_NULLORDER
))
739 if (U_FAILURE(status
))
741 log_err("iter2->next() returned an error. %s\n", myErrorName(status
));
742 ucol_closeElements(iter2
);
743 ucol_closeElements(iter1
);
748 c
= ucol_next(iter2
, &status
);
751 /* Now set it to point to the same string as the first iterator */
752 ucol_setText(iter2
, test1
, u_strlen(test1
), &status
);
753 if (U_FAILURE(status
))
755 log_err("call to iter2->setText(test1) failed. %s\n", myErrorName(status
));
759 assertEqual(iter1
, iter2
);
762 /* Now set it to point to a null string with fake length*/
763 ucol_setText(iter2
, NULL
, 2, &status
);
764 if (status
!= U_ILLEGAL_ARGUMENT_ERROR
)
766 log_err("call to iter2->setText(null, 2) should yield an illegal-argument-error - %s\n",
767 myErrorName(status
));
770 ucol_closeElements(iter2
);
771 ucol_closeElements(iter1
);
776 * Test for getMaxExpansion()
778 static void TestMaxExpansion()
780 UErrorCode status
= U_ZERO_ERROR
;
781 UCollator
*coll
;/*= ucol_open("en_US", &status);*/
783 UChar32 unassigned
= 0xEFFFD;
784 UChar supplementary
[2];
785 uint32_t stringOffset
= 0;
786 UBool isError
= FALSE
;
788 UCollationElements
*iter
;/*= ucol_openElements(coll, &ch, 1, &status);*/
789 uint32_t temporder
= 0;
792 u_uastrcpy(rule
, "&a < ab < c/aba < d < z < ch");
793 coll
= ucol_openRules(rule
, u_strlen(rule
), UCOL_DEFAULT
,
794 UCOL_DEFAULT_STRENGTH
,NULL
, &status
);
795 if(U_SUCCESS(status
) && coll
) {
796 iter
= ucol_openElements(coll
, &ch
, 1, &status
);
798 while (ch
< 0xFFFF && U_SUCCESS(status
)) {
805 ucol_setText(iter
, &ch
, 1, &status
);
806 order
= ucol_previous(iter
, &status
);
808 /* thai management */
810 order
= ucol_previous(iter
, &status
);
812 while (U_SUCCESS(status
) &&
813 ucol_previous(iter
, &status
) != UCOL_NULLORDER
) {
817 size
= ucol_getMaxExpansion(iter
, order
);
818 if (U_FAILURE(status
) || size
< count
) {
819 log_err("Failure at codepoint %d, maximum expansion count < %d\n",
824 /* testing for exact max expansion */
829 ucol_setText(iter
, &ch
, 1, &status
);
830 order
= ucol_previous(iter
, &status
);
831 size
= ucol_getMaxExpansion(iter
, order
);
832 if (U_FAILURE(status
) || size
!= 1) {
833 log_err("Failure at codepoint %d, maximum expansion count < %d\n",
840 ucol_setText(iter
, &ch
, 1, &status
);
841 temporder
= ucol_previous(iter
, &status
);
843 if (U_FAILURE(status
) || ucol_getMaxExpansion(iter
, temporder
) != 3) {
844 log_err("Failure at codepoint %d, maximum expansion count != %d\n",
849 ucol_setText(iter
, &ch
, 1, &status
);
850 temporder
= ucol_previous(iter
, &status
);
852 if (U_FAILURE(status
) || ucol_getMaxExpansion(iter
, temporder
) != 1) {
853 log_err("Failure at codepoint %d, maximum expansion count != %d\n",
857 U16_APPEND(supplementary
, stringOffset
, 2, unassigned
, isError
);
858 (void)isError
; /* Suppress set but not used warning. */
859 ucol_setText(iter
, supplementary
, 2, &status
);
860 sorder
= ucol_previous(iter
, &status
);
862 if (U_FAILURE(status
) || ucol_getMaxExpansion(iter
, sorder
) != 2) {
863 log_err("Failure at codepoint %d, maximum expansion count < %d\n",
870 ucol_setText(iter
, &ch
, 1, &status
);
871 temporder
= ucol_previous(iter
, &status
);
872 if (U_FAILURE(status
) || ucol_getMaxExpansion(iter
, temporder
) > 3) {
873 log_err("Failure at codepoint %d, maximum expansion count > %d\n",
877 ucol_closeElements(iter
);
880 /* testing special jamo &a<\u1160 */
892 coll
= ucol_openRules(rule
, u_strlen(rule
), UCOL_DEFAULT
,
893 UCOL_DEFAULT_STRENGTH
,NULL
, &status
);
894 iter
= ucol_openElements(coll
, &ch
, 1, &status
);
896 temporder
= ucol_previous(iter
, &status
);
897 if (U_FAILURE(status
) || ucol_getMaxExpansion(iter
, temporder
) != 6) {
898 log_err("Failure at codepoint %d, maximum expansion count > %d\n",
902 ucol_closeElements(iter
);
905 log_err_status(status
, "Couldn't open collator -> %s\n", u_errorName(status
));
911 static void assertEqual(UCollationElements
*i1
, UCollationElements
*i2
)
915 UErrorCode status
= U_ZERO_ERROR
;
919 c1
= ucol_next(i1
, &status
);
920 c2
= ucol_next(i2
, &status
);
924 log_err("Error in iteration %d assetEqual between\n %d and %d, they are not equal\n", count
, c1
, c2
);
930 while (c1
!= UCOL_NULLORDER
);
934 * Testing iterators with extremely small buffers
936 static void TestSmallBuffer()
938 UErrorCode status
= U_ZERO_ERROR
;
940 UCollationElements
*testiter
,
943 OrderAndOffset
*testorders
,
947 UChar str
[] = {0x300, 0x31A, 0};
949 creating a long string of decomposable characters,
950 since by default the writable buffer is of size 256
952 while (count
< 500) {
953 if ((count
& 1) == 0) {
954 teststr
[count
++] = 0x300;
957 teststr
[count
++] = 0x31A;
961 coll
= ucol_open("th_TH", &status
);
962 if(U_SUCCESS(status
) && coll
) {
963 testiter
= ucol_openElements(coll
, teststr
, 500, &status
);
964 iter
= ucol_openElements(coll
, str
, 2, &status
);
966 orders
= getOrders(iter
, &count
);
968 log_err("Error collation elements size is not 2 for \\u0300\\u031A\n");
972 this will rearrange the string data to 250 characters of 0x300 first then
973 250 characters of 0x031A
975 testorders
= getOrders(testiter
, &count
);
978 log_err("Error decomposition does not give the right sized collation elements\n");
982 /* UCA collation element for 0x0F76 */
983 if ((count
> 250 && testorders
[-- count
].order
!= orders
[1].order
) ||
984 (count
<= 250 && testorders
[-- count
].order
!= orders
[0].order
)) {
985 log_err("Error decomposition does not give the right collation element at %d count\n", count
);
993 ucol_reset(testiter
);
995 /* ensures closing of elements done properly to clear writable buffer */
996 ucol_next(testiter
, &status
);
997 ucol_next(testiter
, &status
);
998 ucol_closeElements(testiter
);
999 ucol_closeElements(iter
);
1002 log_err_status(status
, "Couldn't open collator -> %s\n", u_errorName(status
));
1007 * Testing the discontigous contractions
1009 static void TestDiscontiguos() {
1010 const char *rulestr
=
1011 "&z < AB < X\\u0300 < ABC < X\\u0300\\u0315";
1013 int rulelen
= u_unescape(rulestr
, rule
, 50);
1014 const char *src
[] = {
1015 "ADB", "ADBC", "A\\u0315B", "A\\u0315BC",
1016 /* base character blocked */
1017 "XD\\u0300", "XD\\u0300\\u0315",
1018 /* non blocking combining character */
1019 "X\\u0319\\u0300", "X\\u0319\\u0300\\u0315",
1020 /* blocking combining character */
1021 "X\\u0314\\u0300", "X\\u0314\\u0300\\u0315",
1022 /* contraction prefix */
1023 "ABDC", "AB\\u0315C","X\\u0300D\\u0315", "X\\u0300\\u0319\\u0315",
1024 "X\\u0300\\u031A\\u0315",
1025 /* ends not with a contraction character */
1026 "X\\u0319\\u0300D", "X\\u0319\\u0300\\u0315D", "X\\u0300D\\u0315D",
1027 "X\\u0300\\u0319\\u0315D", "X\\u0300\\u031A\\u0315D"
1029 const char *tgt
[] = {
1030 /* non blocking combining character */
1031 "A D B", "A D BC", "A \\u0315 B", "A \\u0315 BC",
1032 /* base character blocked */
1033 "X D \\u0300", "X D \\u0300\\u0315",
1034 /* non blocking combining character */
1035 "X\\u0300 \\u0319", "X\\u0300\\u0315 \\u0319",
1036 /* blocking combining character */
1037 "X \\u0314 \\u0300", "X \\u0314 \\u0300\\u0315",
1038 /* contraction prefix */
1039 "AB DC", "AB \\u0315 C","X\\u0300 D \\u0315", "X\\u0300\\u0315 \\u0319",
1040 "X\\u0300 \\u031A \\u0315",
1041 /* ends not with a contraction character */
1042 "X\\u0300 \\u0319D", "X\\u0300\\u0315 \\u0319D", "X\\u0300 D\\u0315D",
1043 "X\\u0300\\u0315 \\u0319D", "X\\u0300 \\u031A\\u0315D"
1047 UErrorCode status
= U_ZERO_ERROR
;
1049 UCollationElements
*iter
;
1050 UCollationElements
*resultiter
;
1052 coll
= ucol_openRules(rule
, rulelen
, UCOL_OFF
, UCOL_DEFAULT_STRENGTH
,NULL
, &status
);
1053 iter
= ucol_openElements(coll
, rule
, 1, &status
);
1054 resultiter
= ucol_openElements(coll
, rule
, 1, &status
);
1056 if (U_FAILURE(status
)) {
1057 log_err_status(status
, "Error opening collation rules -> %s\n", u_errorName(status
));
1061 while (count
< size
) {
1064 int strLen
= u_unescape(src
[count
], str
, 20);
1067 ucol_setText(iter
, str
, strLen
, &status
);
1068 if (U_FAILURE(status
)) {
1069 log_err("Error opening collation iterator\n");
1073 u_unescape(tgt
[count
], tstr
, 20);
1076 log_verbose("count %d\n", count
);
1080 UChar
*e
= u_strchr(s
, 0x20);
1084 ucol_setText(resultiter
, s
, (int32_t)(e
- s
), &status
);
1085 ce
= ucol_next(resultiter
, &status
);
1086 if (U_FAILURE(status
)) {
1087 log_err("Error manipulating collation iterator\n");
1090 while (ce
!= UCOL_NULLORDER
) {
1091 if (ce
!= ucol_next(iter
, &status
) ||
1092 U_FAILURE(status
)) {
1093 log_err("Discontiguos contraction test mismatch\n");
1096 ce
= ucol_next(resultiter
, &status
);
1097 if (U_FAILURE(status
)) {
1098 log_err("Error getting next collation element\n");
1111 ucol_closeElements(resultiter
);
1112 ucol_closeElements(iter
);
1117 * TestSearchCollatorElements tests iterator behavior (forwards and backwards) with
1118 * normalization on AND jamo tailoring, among other things.
1120 * Note: This test is sensitive to changes of the root collator,
1121 * for example whether the ae-ligature maps to three CEs (as in the DUCET)
1122 * or to two CEs (as in the CLDR 24 FractionalUCA.txt).
1123 * It is also sensitive to how those CEs map to the iterator's 32-bit CE encoding.
1124 * For example, the DUCET's artificial secondary CE in the ae-ligature
1125 * may map to two 32-bit iterator CEs (as it did until ICU 52).
1127 static const UChar tsceText
[] = { /* Nothing in here should be ignorable */
1128 0x0020, 0xAC00, /* simple LV Hangul */
1129 0x0020, 0xAC01, /* simple LVT Hangul */
1130 0x0020, 0xAC0F, /* LVTT, last jamo expands for search */
1131 0x0020, 0xAFFF, /* LLVVVTT, every jamo expands for search */
1132 0x0020, 0x1100, 0x1161, 0x11A8, /* 0xAC01 as conjoining jamo */
1133 0x0020, 0x3131, 0x314F, 0x3131, /* 0xAC01 as compatibility jamo */
1134 0x0020, 0x1100, 0x1161, 0x11B6, /* 0xAC0F as conjoining jamo; last expands for search */
1135 0x0020, 0x1101, 0x1170, 0x11B6, /* 0xAFFF as conjoining jamo; all expand for search */
1136 0x0020, 0x00E6, /* small letter ae, expands */
1137 0x0020, 0x1E4D, /* small letter o with tilde and acute, decomposes */
1140 enum { kLen_tsceText
= UPRV_LENGTHOF(tsceText
) };
1142 static const int32_t rootStandardOffsets
[] = {
1151 24, 25,26, /* plus another 1-2 offset=26 if ae-ligature maps to three CEs */
1156 enum { kLen_rootStandardOffsets
= UPRV_LENGTHOF(rootStandardOffsets
) };
1158 static const int32_t rootSearchOffsets
[] = {
1166 20, 21,22,22,23,23,23,24,
1167 24, 25,26, /* plus another 1-2 offset=26 if ae-ligature maps to three CEs */
1172 enum { kLen_rootSearchOffsets
= UPRV_LENGTHOF(rootSearchOffsets
) };
1175 const char * locale
;
1176 const int32_t * offsets
;
1180 static const TSCEItem tsceItems
[] = {
1181 { "root", rootStandardOffsets
, kLen_rootStandardOffsets
},
1182 { "root@collation=search", rootSearchOffsets
, kLen_rootSearchOffsets
},
1186 static void TestSearchCollatorElements(void)
1188 const TSCEItem
* tsceItemPtr
;
1189 for (tsceItemPtr
= tsceItems
; tsceItemPtr
->locale
!= NULL
; tsceItemPtr
++) {
1190 UErrorCode status
= U_ZERO_ERROR
;
1191 UCollator
* ucol
= ucol_open(tsceItemPtr
->locale
, &status
);
1192 if ( U_SUCCESS(status
) ) {
1193 UCollationElements
* uce
= ucol_openElements(ucol
, tsceText
, kLen_tsceText
, &status
);
1194 if ( U_SUCCESS(status
) ) {
1195 int32_t offset
, element
;
1196 const int32_t * nextOffsetPtr
;
1197 const int32_t * limitOffsetPtr
;
1199 nextOffsetPtr
= tsceItemPtr
->offsets
;
1200 limitOffsetPtr
= tsceItemPtr
->offsets
+ tsceItemPtr
->offsetsLen
;
1202 offset
= ucol_getOffset(uce
);
1203 element
= ucol_next(uce
, &status
);
1204 log_verbose("(%s) offset=%2d ce=%08x\n", tsceItemPtr
->locale
, offset
, element
);
1205 if ( element
== 0 ) {
1206 log_err("error, locale %s, ucol_next returned element 0\n", tsceItemPtr
->locale
);
1208 if ( nextOffsetPtr
< limitOffsetPtr
) {
1209 if (offset
!= *nextOffsetPtr
) {
1210 log_err("error, locale %s, expected ucol_next -> ucol_getOffset %d, got %d\n",
1211 tsceItemPtr
->locale
, *nextOffsetPtr
, offset
);
1212 nextOffsetPtr
= limitOffsetPtr
;
1217 log_err("error, locale %s, ucol_next returned more elements than expected\n", tsceItemPtr
->locale
);
1219 } while ( U_SUCCESS(status
) && element
!= UCOL_NULLORDER
);
1220 if ( nextOffsetPtr
< limitOffsetPtr
) {
1221 log_err("error, locale %s, ucol_next returned fewer elements than expected\n", tsceItemPtr
->locale
);
1224 ucol_setOffset(uce
, kLen_tsceText
, &status
);
1225 status
= U_ZERO_ERROR
;
1226 nextOffsetPtr
= tsceItemPtr
->offsets
+ tsceItemPtr
->offsetsLen
;
1227 limitOffsetPtr
= tsceItemPtr
->offsets
;
1229 offset
= ucol_getOffset(uce
);
1230 element
= ucol_previous(uce
, &status
);
1231 if ( element
== 0 ) {
1232 log_err("error, locale %s, ucol_previous returned element 0\n", tsceItemPtr
->locale
);
1234 if ( nextOffsetPtr
> limitOffsetPtr
) {
1236 if (offset
!= *nextOffsetPtr
) {
1237 log_err("error, locale %s, expected ucol_previous -> ucol_getOffset %d, got %d\n",
1238 tsceItemPtr
->locale
, *nextOffsetPtr
, offset
);
1239 nextOffsetPtr
= limitOffsetPtr
;
1243 log_err("error, locale %s, ucol_previous returned more elements than expected\n", tsceItemPtr
->locale
);
1245 } while ( U_SUCCESS(status
) && element
!= UCOL_NULLORDER
);
1246 if ( nextOffsetPtr
> limitOffsetPtr
) {
1247 log_err("error, locale %s, ucol_previous returned fewer elements than expected\n", tsceItemPtr
->locale
);
1250 ucol_closeElements(uce
);
1252 log_err("error, locale %s, ucol_openElements failed: %s\n", tsceItemPtr
->locale
, u_errorName(status
) );
1256 log_data_err("error, locale %s, ucol_open failed: %s\n", tsceItemPtr
->locale
, u_errorName(status
) );
1261 #endif /* #if !UCONFIG_NO_COLLATION */