1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
5 * Copyright (c) 1997-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8 /********************************************************************************
12 * Modification History:
13 * Date Name Description
14 * Madhu Katragadda Ported for C API
15 * 02/19/01 synwee Modified test case for new collation iterator
16 *********************************************************************************/
18 * Collation Iterator tests.
19 * (Let me reiterate my position...)
22 #include "unicode/utypes.h"
24 #if !UCONFIG_NO_COLLATION
26 #include "unicode/ucol.h"
27 #include "unicode/ucoleitr.h"
28 #include "unicode/uloc.h"
29 #include "unicode/uchar.h"
30 #include "unicode/ustring.h"
31 #include "unicode/putil.h"
43 extern uint8_t ucol_uprv_getCaseBits(const UChar
*, uint32_t, UErrorCode
*);
45 void addCollIterTest(TestNode
** root
)
47 addTest(root
, &TestPrevious
, "tscoll/citertst/TestPrevious");
48 addTest(root
, &TestOffset
, "tscoll/citertst/TestOffset");
49 addTest(root
, &TestSetText
, "tscoll/citertst/TestSetText");
50 addTest(root
, &TestMaxExpansion
, "tscoll/citertst/TestMaxExpansion");
51 addTest(root
, &TestUnicodeChar
, "tscoll/citertst/TestUnicodeChar");
52 addTest(root
, &TestNormalizedUnicodeChar
,
53 "tscoll/citertst/TestNormalizedUnicodeChar");
54 addTest(root
, &TestNormalization
, "tscoll/citertst/TestNormalization");
55 addTest(root
, &TestBug672
, "tscoll/citertst/TestBug672");
56 addTest(root
, &TestBug672Normalize
, "tscoll/citertst/TestBug672Normalize");
57 addTest(root
, &TestSmallBuffer
, "tscoll/citertst/TestSmallBuffer");
58 addTest(root
, &TestDiscontiguos
, "tscoll/citertst/TestDiscontiguos");
59 addTest(root
, &TestSearchCollatorElements
, "tscoll/citertst/TestSearchCollatorElements");
62 /* The locales we support */
64 static const char * LOCALES
[] = {"en_AU", "en_BE", "en_CA"};
66 static void TestBug672() {
67 UErrorCode status
= U_ZERO_ERROR
;
73 u_uastrcpy(pattern
, "resume");
74 u_uastrcpy(text
, "Time to resume updating my resume.");
76 for (i
= 0; i
< 3; ++ i
) {
77 UCollator
*coll
= ucol_open(LOCALES
[i
], &status
);
78 UCollationElements
*pitr
= ucol_openElements(coll
, pattern
, -1,
80 UCollationElements
*titer
= ucol_openElements(coll
, text
, -1,
82 if (U_FAILURE(status
)) {
83 log_err_status(status
, "ERROR: in creation of either the collator or the collation iterator :%s\n",
88 log_verbose("locale tested %s\n", LOCALES
[i
]);
90 while (ucol_next(pitr
, &status
) != UCOL_NULLORDER
&&
93 if (U_FAILURE(status
)) {
94 log_err("ERROR: reversing collation iterator :%s\n",
100 ucol_setOffset(titer
, u_strlen(pattern
), &status
);
101 if (U_FAILURE(status
)) {
102 log_err("ERROR: setting offset in collator :%s\n",
103 myErrorName(status
));
106 result
[i
][0] = ucol_getOffset(titer
);
107 log_verbose("Text iterator set to offset %d\n", result
[i
][0]);
110 ucol_previous(titer
, &status
);
111 result
[i
][1] = ucol_getOffset(titer
);
112 log_verbose("Current offset %d after previous\n", result
[i
][1]);
114 /* Add one to index */
115 log_verbose("Adding one to current offset...\n");
116 ucol_setOffset(titer
, ucol_getOffset(titer
) + 1, &status
);
117 if (U_FAILURE(status
)) {
118 log_err("ERROR: setting offset in collator :%s\n",
119 myErrorName(status
));
122 result
[i
][2] = ucol_getOffset(titer
);
123 log_verbose("Current offset in text = %d\n", result
[i
][2]);
124 ucol_closeElements(pitr
);
125 ucol_closeElements(titer
);
129 if (uprv_memcmp(result
[0], result
[1], 3) != 0 ||
130 uprv_memcmp(result
[1], result
[2], 3) != 0) {
131 log_err("ERROR: Different locales have different offsets at the same character\n");
137 /* Running this test with normalization enabled showed up a bug in the incremental
138 normalization code. */
139 static void TestBug672Normalize() {
140 UErrorCode status
= U_ZERO_ERROR
;
146 u_uastrcpy(pattern
, "resume");
147 u_uastrcpy(text
, "Time to resume updating my resume.");
149 for (i
= 0; i
< 3; ++ i
) {
150 UCollator
*coll
= ucol_open(LOCALES
[i
], &status
);
151 UCollationElements
*pitr
= NULL
;
152 UCollationElements
*titer
= NULL
;
154 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
156 pitr
= ucol_openElements(coll
, pattern
, -1, &status
);
157 titer
= ucol_openElements(coll
, text
, -1, &status
);
158 if (U_FAILURE(status
)) {
159 log_err_status(status
, "ERROR: in creation of either the collator or the collation iterator :%s\n",
160 myErrorName(status
));
164 log_verbose("locale tested %s\n", LOCALES
[i
]);
166 while (ucol_next(pitr
, &status
) != UCOL_NULLORDER
&&
169 if (U_FAILURE(status
)) {
170 log_err("ERROR: reversing collation iterator :%s\n",
171 myErrorName(status
));
176 ucol_setOffset(titer
, u_strlen(pattern
), &status
);
177 if (U_FAILURE(status
)) {
178 log_err("ERROR: setting offset in collator :%s\n",
179 myErrorName(status
));
182 result
[i
][0] = ucol_getOffset(titer
);
183 log_verbose("Text iterator set to offset %d\n", result
[i
][0]);
186 ucol_previous(titer
, &status
);
187 result
[i
][1] = ucol_getOffset(titer
);
188 log_verbose("Current offset %d after previous\n", result
[i
][1]);
190 /* Add one to index */
191 log_verbose("Adding one to current offset...\n");
192 ucol_setOffset(titer
, ucol_getOffset(titer
) + 1, &status
);
193 if (U_FAILURE(status
)) {
194 log_err("ERROR: setting offset in collator :%s\n",
195 myErrorName(status
));
198 result
[i
][2] = ucol_getOffset(titer
);
199 log_verbose("Current offset in text = %d\n", result
[i
][2]);
200 ucol_closeElements(pitr
);
201 ucol_closeElements(titer
);
205 if (uprv_memcmp(result
[0], result
[1], 3) != 0 ||
206 uprv_memcmp(result
[1], result
[2], 3) != 0) {
207 log_err("ERROR: Different locales have different offsets at the same character\n");
215 * Test for CollationElementIterator previous and next for the whole set of
216 * unicode characters.
218 static void TestUnicodeChar()
222 UCollationElements
*iter
;
223 UErrorCode status
= U_ZERO_ERROR
;
227 en_us
= ucol_open("en_US", &status
);
228 if (U_FAILURE(status
)){
229 log_err_status(status
, "ERROR: in creation of collation data using ucol_open()\n %s\n",
230 myErrorName(status
));
234 for (codepoint
= 1; codepoint
< 0xFFFE;)
238 while (codepoint
% 0xFF != 0)
240 if (u_isdefined(codepoint
))
241 *(test
++) = codepoint
;
245 if (u_isdefined(codepoint
))
246 *(test
++) = codepoint
;
248 if (codepoint
!= 0xFFFF)
252 iter
=ucol_openElements(en_us
, source
, u_strlen(source
), &status
);
253 if(U_FAILURE(status
)){
254 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
255 myErrorName(status
));
259 /* A basic test to see if it's working at all */
260 log_verbose("codepoint testing %x\n", codepoint
);
262 ucol_closeElements(iter
);
264 /* null termination test */
265 iter
=ucol_openElements(en_us
, source
, -1, &status
);
266 if(U_FAILURE(status
)){
267 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
268 myErrorName(status
));
272 /* A basic test to see if it's working at all */
274 ucol_closeElements(iter
);
281 * Test for CollationElementIterator previous and next for the whole set of
282 * unicode characters with normalization on.
284 static void TestNormalizedUnicodeChar()
288 UCollationElements
*iter
;
289 UErrorCode status
= U_ZERO_ERROR
;
293 /* thai should have normalization on */
294 th_th
= ucol_open("th_TH", &status
);
295 if (U_FAILURE(status
)){
296 log_err_status(status
, "ERROR: in creation of thai collation using ucol_open()\n %s\n",
297 myErrorName(status
));
301 for (codepoint
= 1; codepoint
< 0xFFFE;)
305 while (codepoint
% 0xFF != 0)
307 if (u_isdefined(codepoint
))
308 *(test
++) = codepoint
;
312 if (u_isdefined(codepoint
))
313 *(test
++) = codepoint
;
315 if (codepoint
!= 0xFFFF)
319 iter
=ucol_openElements(th_th
, source
, u_strlen(source
), &status
);
320 if(U_FAILURE(status
)){
321 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
322 myErrorName(status
));
328 ucol_closeElements(iter
);
330 iter
=ucol_openElements(th_th
, source
, -1, &status
);
331 if(U_FAILURE(status
)){
332 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
333 myErrorName(status
));
339 ucol_closeElements(iter
);
346 * Test the incremental normalization
348 static void TestNormalization()
350 UErrorCode status
= U_ZERO_ERROR
;
352 "&a < \\u0300\\u0315 < A\\u0300\\u0315 < \\u0316\\u0315B < \\u0316\\u0300\\u0315";
355 int rulelen
= u_unescape(str
, rule
, 50);
357 const char *testdata
[] =
358 {"\\u1ED9", "o\\u0323\\u0302",
359 "\\u0300\\u0315", "\\u0315\\u0300",
360 "A\\u0300\\u0315B", "A\\u0315\\u0300B",
361 "A\\u0316\\u0315B", "A\\u0315\\u0316B",
362 "\\u0316\\u0300\\u0315", "\\u0315\\u0300\\u0316",
363 "A\\u0316\\u0300\\u0315B", "A\\u0315\\u0300\\u0316B",
364 "\\u0316\\u0315\\u0300", "A\\u0316\\u0315\\u0300B"};
367 UCollationElements
*iter
;
369 coll
= ucol_openRules(rule
, rulelen
, UCOL_ON
, UCOL_TERTIARY
, NULL
, &status
);
370 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
371 if (U_FAILURE(status
)){
372 log_err_status(status
, "ERROR: in creation of collator using ucol_openRules()\n %s\n",
373 myErrorName(status
));
377 srclen
= u_unescape(testdata
[0], source
, 10);
378 iter
= ucol_openElements(coll
, source
, srclen
, &status
);
380 ucol_closeElements(iter
);
382 srclen
= u_unescape(testdata
[1], source
, 10);
383 iter
= ucol_openElements(coll
, source
, srclen
, &status
);
385 ucol_closeElements(iter
);
388 srclen
= u_unescape(testdata
[count
], source
, 10);
389 iter
= ucol_openElements(coll
, source
, srclen
, &status
);
391 if (U_FAILURE(status
)){
392 log_err("ERROR: in creation of collator element iterator\n %s\n",
393 myErrorName(status
));
397 ucol_closeElements(iter
);
399 iter
= ucol_openElements(coll
, source
, -1, &status
);
401 if (U_FAILURE(status
)){
402 log_err("ERROR: in creation of collator element iterator\n %s\n",
403 myErrorName(status
));
407 ucol_closeElements(iter
);
414 * Test for CollationElementIterator.previous()
416 * @bug 4108758 - Make sure it works with contracting characters
419 static void TestPrevious()
421 UCollator
*coll
=NULL
;
424 UCollator
*c1
, *c2
, *c3
;
425 UCollationElements
*iter
;
426 UErrorCode status
= U_ZERO_ERROR
;
430 u_uastrcpy(test1
, "What subset of all possible test cases?");
431 u_uastrcpy(test2
, "has the highest probability of detecting");
432 coll
= ucol_open("en_US", &status
);
434 iter
=ucol_openElements(coll
, test1
, u_strlen(test1
), &status
);
435 log_verbose("English locale testing back and forth\n");
436 if(U_FAILURE(status
)){
437 log_err_status(status
, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
438 myErrorName(status
));
442 /* A basic test to see if it's working at all */
444 ucol_closeElements(iter
);
447 /* Test with a contracting character sequence */
448 u_uastrcpy(rule
, "&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH");
449 c1
= ucol_openRules(rule
, u_strlen(rule
), UCOL_OFF
, UCOL_DEFAULT_STRENGTH
, NULL
, &status
);
451 log_verbose("Contraction rule testing back and forth with no normalization\n");
453 if (c1
== NULL
|| U_FAILURE(status
))
455 log_err("Couldn't create a RuleBasedCollator with a contracting sequence\n %s\n",
456 myErrorName(status
));
459 source
=(UChar
*)malloc(sizeof(UChar
) * 20);
460 u_uastrcpy(source
, "abchdcba");
461 iter
=ucol_openElements(c1
, source
, u_strlen(source
), &status
);
462 if(U_FAILURE(status
)){
463 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
464 myErrorName(status
));
468 ucol_closeElements(iter
);
471 /* Test with an expanding character sequence */
472 u_uastrcpy(rule
, "&a < b < c/abd < d");
473 c2
= ucol_openRules(rule
, u_strlen(rule
), UCOL_OFF
, UCOL_DEFAULT_STRENGTH
, NULL
, &status
);
474 log_verbose("Expansion rule testing back and forth with no normalization\n");
475 if (c2
== NULL
|| U_FAILURE(status
))
477 log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
478 myErrorName(status
));
481 u_uastrcpy(source
, "abcd");
482 iter
=ucol_openElements(c2
, source
, u_strlen(source
), &status
);
483 if(U_FAILURE(status
)){
484 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
485 myErrorName(status
));
489 ucol_closeElements(iter
);
492 u_uastrcpy(rule
, "&a < b < c/aba < d < z < ch");
493 c3
= ucol_openRules(rule
, u_strlen(rule
), UCOL_DEFAULT
, UCOL_DEFAULT_STRENGTH
,NULL
, &status
);
494 log_verbose("Expansion/contraction rule testing back and forth with no normalization\n");
496 if (c3
== NULL
|| U_FAILURE(status
))
498 log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
499 myErrorName(status
));
502 u_uastrcpy(source
, "abcdbchdc");
503 iter
=ucol_openElements(c3
, source
, u_strlen(source
), &status
);
504 if(U_FAILURE(status
)){
505 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
506 myErrorName(status
));
510 ucol_closeElements(iter
);
522 coll
= ucol_open("th_TH", &status
);
523 log_verbose("Thai locale testing back and forth with normalization\n");
524 iter
=ucol_openElements(coll
, source
, u_strlen(source
), &status
);
525 if(U_FAILURE(status
)){
526 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
527 myErrorName(status
));
531 ucol_closeElements(iter
);
541 coll
= ucol_open("ja_JP", &status
);
542 log_verbose("Japanese locale testing back and forth with normalization\n");
543 iter
=ucol_openElements(coll
, source
, u_strlen(source
), &status
);
544 if(U_FAILURE(status
)){
545 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
546 myErrorName(status
));
550 ucol_closeElements(iter
);
557 * Test for getOffset() and setOffset()
559 static void TestOffset()
561 UErrorCode status
= U_ZERO_ERROR
;
562 UCollator
*en_us
=NULL
;
563 UCollationElements
*iter
, *pristine
;
565 OrderAndOffset
*orders
;
566 int32_t orderLength
=0;
571 u_uastrcpy(test1
, "What subset of all possible test cases?");
572 u_uastrcpy(test2
, "has the highest probability of detecting");
573 en_us
= ucol_open("en_US", &status
);
574 log_verbose("Testing getOffset and setOffset for collations\n");
575 iter
= ucol_openElements(en_us
, test1
, u_strlen(test1
), &status
);
576 if(U_FAILURE(status
)){
577 log_err_status(status
, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
578 myErrorName(status
));
583 /* testing boundaries */
584 ucol_setOffset(iter
, 0, &status
);
585 if (U_FAILURE(status
) || ucol_previous(iter
, &status
) != UCOL_NULLORDER
) {
586 log_err("Error: After setting offset to 0, we should be at the end "
587 "of the backwards iteration");
589 ucol_setOffset(iter
, u_strlen(test1
), &status
);
590 if (U_FAILURE(status
) || ucol_next(iter
, &status
) != UCOL_NULLORDER
) {
591 log_err("Error: After setting offset to end of the string, we should "
592 "be at the end of the backwards iteration");
595 /* Run all the way through the iterator, then get the offset */
597 orders
= getOrders(iter
, &orderLength
);
599 offset
= ucol_getOffset(iter
);
601 if (offset
!= u_strlen(test1
))
603 log_err("offset at end != length %d vs %d\n", offset
,
607 /* Now set the offset back to the beginning and see if it works */
608 pristine
=ucol_openElements(en_us
, test1
, u_strlen(test1
), &status
);
609 if(U_FAILURE(status
)){
610 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
611 myErrorName(status
));
615 status
= U_ZERO_ERROR
;
617 ucol_setOffset(iter
, 0, &status
);
618 if (U_FAILURE(status
))
620 log_err("setOffset failed. %s\n", myErrorName(status
));
624 assertEqual(iter
, pristine
);
627 ucol_closeElements(pristine
);
628 ucol_closeElements(iter
);
631 /* testing offsets in normalization buffer */
637 ucol_setAttribute(en_us
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
638 iter
= ucol_openElements(en_us
, test1
, 4, &status
);
639 if(U_FAILURE(status
)){
640 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
641 myErrorName(status
));
647 while (ucol_next(iter
, &status
) != UCOL_NULLORDER
&&
651 if (ucol_getOffset(iter
) != 1) {
652 log_err("ERROR: Offset of iteration should be 1\n");
656 if (ucol_getOffset(iter
) != 4) {
657 log_err("ERROR: Offset of iteration should be 4\n");
661 if (ucol_getOffset(iter
) != 3) {
662 log_err("ERROR: Offset of iteration should be 3\n");
670 while (ucol_previous(iter
, &status
) != UCOL_NULLORDER
&&
675 if (ucol_getOffset(iter
) != 3) {
676 log_err("ERROR: Offset of iteration should be 3\n");
680 if (ucol_getOffset(iter
) != 1) {
681 log_err("ERROR: Offset of iteration should be 1\n");
685 if (ucol_getOffset(iter
) != 0) {
686 log_err("ERROR: Offset of iteration should be 0\n");
692 if(U_FAILURE(status
)){
693 log_err("ERROR: in iterating collation elements %s\n",
694 myErrorName(status
));
697 ucol_closeElements(iter
);
704 static void TestSetText()
707 UErrorCode status
= U_ZERO_ERROR
;
708 UCollator
*en_us
=NULL
;
709 UCollationElements
*iter1
, *iter2
;
713 u_uastrcpy(test1
, "What subset of all possible test cases?");
714 u_uastrcpy(test2
, "has the highest probability of detecting");
715 en_us
= ucol_open("en_US", &status
);
716 log_verbose("testing setText for Collation elements\n");
717 iter1
=ucol_openElements(en_us
, test1
, u_strlen(test1
), &status
);
718 if(U_FAILURE(status
)){
719 log_err_status(status
, "ERROR: in creation of collation element iterator1 using ucol_openElements()\n %s\n",
720 myErrorName(status
));
724 iter2
=ucol_openElements(en_us
, test2
, u_strlen(test2
), &status
);
725 if(U_FAILURE(status
)){
726 log_err("ERROR: in creation of collation element iterator2 using ucol_openElements()\n %s\n",
727 myErrorName(status
));
732 /* Run through the second iterator just to exercise it */
733 c
= ucol_next(iter2
, &status
);
736 while ( ++i
< 10 && (c
!= UCOL_NULLORDER
))
738 if (U_FAILURE(status
))
740 log_err("iter2->next() returned an error. %s\n", myErrorName(status
));
741 ucol_closeElements(iter2
);
742 ucol_closeElements(iter1
);
747 c
= ucol_next(iter2
, &status
);
750 /* Now set it to point to the same string as the first iterator */
751 ucol_setText(iter2
, test1
, u_strlen(test1
), &status
);
752 if (U_FAILURE(status
))
754 log_err("call to iter2->setText(test1) failed. %s\n", myErrorName(status
));
758 assertEqual(iter1
, iter2
);
761 /* Now set it to point to a null string with fake length*/
762 ucol_setText(iter2
, NULL
, 2, &status
);
763 if (status
!= U_ILLEGAL_ARGUMENT_ERROR
)
765 log_err("call to iter2->setText(null, 2) should yield an illegal-argument-error - %s\n",
766 myErrorName(status
));
769 ucol_closeElements(iter2
);
770 ucol_closeElements(iter1
);
775 * Test for getMaxExpansion()
777 static void TestMaxExpansion()
779 UErrorCode status
= U_ZERO_ERROR
;
780 UCollator
*coll
;/*= ucol_open("en_US", &status);*/
782 UChar32 unassigned
= 0xEFFFD;
783 UChar supplementary
[2];
784 uint32_t stringOffset
= 0;
785 UBool isError
= FALSE
;
787 UCollationElements
*iter
;/*= ucol_openElements(coll, &ch, 1, &status);*/
788 uint32_t temporder
= 0;
791 u_uastrcpy(rule
, "&a < ab < c/aba < d < z < ch");
792 coll
= ucol_openRules(rule
, u_strlen(rule
), UCOL_DEFAULT
,
793 UCOL_DEFAULT_STRENGTH
,NULL
, &status
);
794 if(U_SUCCESS(status
) && coll
) {
795 iter
= ucol_openElements(coll
, &ch
, 1, &status
);
797 while (ch
< 0xFFFF && U_SUCCESS(status
)) {
804 ucol_setText(iter
, &ch
, 1, &status
);
805 order
= ucol_previous(iter
, &status
);
807 /* thai management */
809 order
= ucol_previous(iter
, &status
);
811 while (U_SUCCESS(status
) &&
812 ucol_previous(iter
, &status
) != UCOL_NULLORDER
) {
816 size
= ucol_getMaxExpansion(iter
, order
);
817 if (U_FAILURE(status
) || size
< count
) {
818 log_err("Failure at codepoint %d, maximum expansion count < %d\n",
823 /* testing for exact max expansion */
828 ucol_setText(iter
, &ch
, 1, &status
);
829 order
= ucol_previous(iter
, &status
);
830 size
= ucol_getMaxExpansion(iter
, order
);
831 if (U_FAILURE(status
) || size
!= 1) {
832 log_err("Failure at codepoint %d, maximum expansion count < %d\n",
839 ucol_setText(iter
, &ch
, 1, &status
);
840 temporder
= ucol_previous(iter
, &status
);
842 if (U_FAILURE(status
) || ucol_getMaxExpansion(iter
, temporder
) != 3) {
843 log_err("Failure at codepoint %d, maximum expansion count != %d\n",
848 ucol_setText(iter
, &ch
, 1, &status
);
849 temporder
= ucol_previous(iter
, &status
);
851 if (U_FAILURE(status
) || ucol_getMaxExpansion(iter
, temporder
) != 1) {
852 log_err("Failure at codepoint %d, maximum expansion count != %d\n",
856 U16_APPEND(supplementary
, stringOffset
, 2, unassigned
, isError
);
857 (void)isError
; /* Suppress set but not used warning. */
858 ucol_setText(iter
, supplementary
, 2, &status
);
859 sorder
= ucol_previous(iter
, &status
);
861 if (U_FAILURE(status
) || ucol_getMaxExpansion(iter
, sorder
) != 2) {
862 log_err("Failure at codepoint %d, maximum expansion count < %d\n",
869 ucol_setText(iter
, &ch
, 1, &status
);
870 temporder
= ucol_previous(iter
, &status
);
871 if (U_FAILURE(status
) || ucol_getMaxExpansion(iter
, temporder
) > 3) {
872 log_err("Failure at codepoint %d, maximum expansion count > %d\n",
876 ucol_closeElements(iter
);
879 /* testing special jamo &a<\u1160 */
891 coll
= ucol_openRules(rule
, u_strlen(rule
), UCOL_DEFAULT
,
892 UCOL_DEFAULT_STRENGTH
,NULL
, &status
);
893 iter
= ucol_openElements(coll
, &ch
, 1, &status
);
895 temporder
= ucol_previous(iter
, &status
);
896 if (U_FAILURE(status
) || ucol_getMaxExpansion(iter
, temporder
) != 6) {
897 log_err("Failure at codepoint %d, maximum expansion count > %d\n",
901 ucol_closeElements(iter
);
904 log_err_status(status
, "Couldn't open collator -> %s\n", u_errorName(status
));
910 static void assertEqual(UCollationElements
*i1
, UCollationElements
*i2
)
914 UErrorCode status
= U_ZERO_ERROR
;
918 c1
= ucol_next(i1
, &status
);
919 c2
= ucol_next(i2
, &status
);
923 log_err("Error in iteration %d assetEqual between\n %d and %d, they are not equal\n", count
, c1
, c2
);
929 while (c1
!= UCOL_NULLORDER
);
933 * Testing iterators with extremely small buffers
935 static void TestSmallBuffer()
937 UErrorCode status
= U_ZERO_ERROR
;
939 UCollationElements
*testiter
,
942 OrderAndOffset
*testorders
,
946 UChar str
[] = {0x300, 0x31A, 0};
948 creating a long string of decomposable characters,
949 since by default the writable buffer is of size 256
951 while (count
< 500) {
952 if ((count
& 1) == 0) {
953 teststr
[count
++] = 0x300;
956 teststr
[count
++] = 0x31A;
960 coll
= ucol_open("th_TH", &status
);
961 if(U_SUCCESS(status
) && coll
) {
962 testiter
= ucol_openElements(coll
, teststr
, 500, &status
);
963 iter
= ucol_openElements(coll
, str
, 2, &status
);
965 orders
= getOrders(iter
, &count
);
967 log_err("Error collation elements size is not 2 for \\u0300\\u031A\n");
971 this will rearrange the string data to 250 characters of 0x300 first then
972 250 characters of 0x031A
974 testorders
= getOrders(testiter
, &count
);
977 log_err("Error decomposition does not give the right sized collation elements\n");
981 /* UCA collation element for 0x0F76 */
982 if ((count
> 250 && testorders
[-- count
].order
!= orders
[1].order
) ||
983 (count
<= 250 && testorders
[-- count
].order
!= orders
[0].order
)) {
984 log_err("Error decomposition does not give the right collation element at %d count\n", count
);
992 ucol_reset(testiter
);
994 /* ensures closing of elements done properly to clear writable buffer */
995 ucol_next(testiter
, &status
);
996 ucol_next(testiter
, &status
);
997 ucol_closeElements(testiter
);
998 ucol_closeElements(iter
);
1001 log_err_status(status
, "Couldn't open collator -> %s\n", u_errorName(status
));
1006 * Testing the discontigous contractions
1008 static void TestDiscontiguos() {
1009 const char *rulestr
=
1010 "&z < AB < X\\u0300 < ABC < X\\u0300\\u0315";
1012 int rulelen
= u_unescape(rulestr
, rule
, 50);
1013 const char *src
[] = {
1014 "ADB", "ADBC", "A\\u0315B", "A\\u0315BC",
1015 /* base character blocked */
1016 "XD\\u0300", "XD\\u0300\\u0315",
1017 /* non blocking combining character */
1018 "X\\u0319\\u0300", "X\\u0319\\u0300\\u0315",
1019 /* blocking combining character */
1020 "X\\u0314\\u0300", "X\\u0314\\u0300\\u0315",
1021 /* contraction prefix */
1022 "ABDC", "AB\\u0315C","X\\u0300D\\u0315", "X\\u0300\\u0319\\u0315",
1023 "X\\u0300\\u031A\\u0315",
1024 /* ends not with a contraction character */
1025 "X\\u0319\\u0300D", "X\\u0319\\u0300\\u0315D", "X\\u0300D\\u0315D",
1026 "X\\u0300\\u0319\\u0315D", "X\\u0300\\u031A\\u0315D"
1028 const char *tgt
[] = {
1029 /* non blocking combining character */
1030 "A D B", "A D BC", "A \\u0315 B", "A \\u0315 BC",
1031 /* base character blocked */
1032 "X D \\u0300", "X D \\u0300\\u0315",
1033 /* non blocking combining character */
1034 "X\\u0300 \\u0319", "X\\u0300\\u0315 \\u0319",
1035 /* blocking combining character */
1036 "X \\u0314 \\u0300", "X \\u0314 \\u0300\\u0315",
1037 /* contraction prefix */
1038 "AB DC", "AB \\u0315 C","X\\u0300 D \\u0315", "X\\u0300\\u0315 \\u0319",
1039 "X\\u0300 \\u031A \\u0315",
1040 /* ends not with a contraction character */
1041 "X\\u0300 \\u0319D", "X\\u0300\\u0315 \\u0319D", "X\\u0300 D\\u0315D",
1042 "X\\u0300\\u0315 \\u0319D", "X\\u0300 \\u031A\\u0315D"
1046 UErrorCode status
= U_ZERO_ERROR
;
1048 UCollationElements
*iter
;
1049 UCollationElements
*resultiter
;
1051 coll
= ucol_openRules(rule
, rulelen
, UCOL_OFF
, UCOL_DEFAULT_STRENGTH
,NULL
, &status
);
1052 iter
= ucol_openElements(coll
, rule
, 1, &status
);
1053 resultiter
= ucol_openElements(coll
, rule
, 1, &status
);
1055 if (U_FAILURE(status
)) {
1056 log_err_status(status
, "Error opening collation rules -> %s\n", u_errorName(status
));
1060 while (count
< size
) {
1063 int strLen
= u_unescape(src
[count
], str
, 20);
1066 ucol_setText(iter
, str
, strLen
, &status
);
1067 if (U_FAILURE(status
)) {
1068 log_err("Error opening collation iterator\n");
1072 u_unescape(tgt
[count
], tstr
, 20);
1075 log_verbose("count %d\n", count
);
1079 UChar
*e
= u_strchr(s
, 0x20);
1083 ucol_setText(resultiter
, s
, (int32_t)(e
- s
), &status
);
1084 ce
= ucol_next(resultiter
, &status
);
1085 if (U_FAILURE(status
)) {
1086 log_err("Error manipulating collation iterator\n");
1089 while (ce
!= UCOL_NULLORDER
) {
1090 if (ce
!= (uint32_t)ucol_next(iter
, &status
) ||
1091 U_FAILURE(status
)) {
1092 log_err("Discontiguos contraction test mismatch\n");
1095 ce
= ucol_next(resultiter
, &status
);
1096 if (U_FAILURE(status
)) {
1097 log_err("Error getting next collation element\n");
1110 ucol_closeElements(resultiter
);
1111 ucol_closeElements(iter
);
1116 * TestSearchCollatorElements tests iterator behavior (forwards and backwards) with
1117 * normalization on AND jamo tailoring, among other things.
1119 * Note: This test is sensitive to changes of the root collator,
1120 * for example whether the ae-ligature maps to three CEs (as in the DUCET)
1121 * or to two CEs (as in the CLDR 24 FractionalUCA.txt).
1122 * It is also sensitive to how those CEs map to the iterator's 32-bit CE encoding.
1123 * For example, the DUCET's artificial secondary CE in the ae-ligature
1124 * may map to two 32-bit iterator CEs (as it did until ICU 52).
1126 static const UChar tsceText
[] = { /* Nothing in here should be ignorable */
1127 0x0020, 0xAC00, /* simple LV Hangul */
1128 0x0020, 0xAC01, /* simple LVT Hangul */
1129 0x0020, 0xAC0F, /* LVTT, last jamo expands for search */
1130 0x0020, 0xAFFF, /* LLVVVTT, every jamo expands for search */
1131 0x0020, 0x1100, 0x1161, 0x11A8, /* 0xAC01 as conjoining jamo */
1132 0x0020, 0x3131, 0x314F, 0x3131, /* 0xAC01 as compatibility jamo */
1133 0x0020, 0x1100, 0x1161, 0x11B6, /* 0xAC0F as conjoining jamo; last expands for search */
1134 0x0020, 0x1101, 0x1170, 0x11B6, /* 0xAFFF as conjoining jamo; all expand for search */
1135 0x0020, 0x00E6, /* small letter ae, expands */
1136 0x0020, 0x1E4D, /* small letter o with tilde and acute, decomposes */
1139 enum { kLen_tsceText
= UPRV_LENGTHOF(tsceText
) };
1141 static const int32_t rootStandardOffsets
[] = {
1150 24, 25,26, /* plus another 1-2 offset=26 if ae-ligature maps to three CEs */
1155 enum { kLen_rootStandardOffsets
= UPRV_LENGTHOF(rootStandardOffsets
) };
1157 static const int32_t rootSearchOffsets
[] = {
1165 20, 21,22,22,23,23,23,24,
1166 24, 25,26, /* plus another 1-2 offset=26 if ae-ligature maps to three CEs */
1171 enum { kLen_rootSearchOffsets
= UPRV_LENGTHOF(rootSearchOffsets
) };
1174 const char * locale
;
1175 const int32_t * offsets
;
1179 static const TSCEItem tsceItems
[] = {
1180 { "root", rootStandardOffsets
, kLen_rootStandardOffsets
},
1181 { "root@collation=search", rootSearchOffsets
, kLen_rootSearchOffsets
},
1185 static void TestSearchCollatorElements(void)
1187 const TSCEItem
* tsceItemPtr
;
1188 for (tsceItemPtr
= tsceItems
; tsceItemPtr
->locale
!= NULL
; tsceItemPtr
++) {
1189 UErrorCode status
= U_ZERO_ERROR
;
1190 UCollator
* ucol
= ucol_open(tsceItemPtr
->locale
, &status
);
1191 if ( U_SUCCESS(status
) ) {
1192 UCollationElements
* uce
= ucol_openElements(ucol
, tsceText
, kLen_tsceText
, &status
);
1193 if ( U_SUCCESS(status
) ) {
1194 int32_t offset
, element
;
1195 const int32_t * nextOffsetPtr
;
1196 const int32_t * limitOffsetPtr
;
1198 nextOffsetPtr
= tsceItemPtr
->offsets
;
1199 limitOffsetPtr
= tsceItemPtr
->offsets
+ tsceItemPtr
->offsetsLen
;
1201 offset
= ucol_getOffset(uce
);
1202 element
= ucol_next(uce
, &status
);
1203 log_verbose("(%s) offset=%2d ce=%08x\n", tsceItemPtr
->locale
, offset
, element
);
1204 if ( element
== 0 ) {
1205 log_err("error, locale %s, ucol_next returned element 0\n", tsceItemPtr
->locale
);
1207 if ( nextOffsetPtr
< limitOffsetPtr
) {
1208 if (offset
!= *nextOffsetPtr
) {
1209 log_err("error, locale %s, expected ucol_next -> ucol_getOffset %d, got %d\n",
1210 tsceItemPtr
->locale
, *nextOffsetPtr
, offset
);
1211 nextOffsetPtr
= limitOffsetPtr
;
1216 log_err("error, locale %s, ucol_next returned more elements than expected\n", tsceItemPtr
->locale
);
1218 } while ( U_SUCCESS(status
) && element
!= UCOL_NULLORDER
);
1219 if ( nextOffsetPtr
< limitOffsetPtr
) {
1220 log_err("error, locale %s, ucol_next returned fewer elements than expected\n", tsceItemPtr
->locale
);
1223 ucol_setOffset(uce
, kLen_tsceText
, &status
);
1224 status
= U_ZERO_ERROR
;
1225 nextOffsetPtr
= tsceItemPtr
->offsets
+ tsceItemPtr
->offsetsLen
;
1226 limitOffsetPtr
= tsceItemPtr
->offsets
;
1228 offset
= ucol_getOffset(uce
);
1229 element
= ucol_previous(uce
, &status
);
1230 if ( element
== 0 ) {
1231 log_err("error, locale %s, ucol_previous returned element 0\n", tsceItemPtr
->locale
);
1233 if ( nextOffsetPtr
> limitOffsetPtr
) {
1235 if (offset
!= *nextOffsetPtr
) {
1236 log_err("error, locale %s, expected ucol_previous -> ucol_getOffset %d, got %d\n",
1237 tsceItemPtr
->locale
, *nextOffsetPtr
, offset
);
1238 nextOffsetPtr
= limitOffsetPtr
;
1242 log_err("error, locale %s, ucol_previous returned more elements than expected\n", tsceItemPtr
->locale
);
1244 } while ( U_SUCCESS(status
) && element
!= UCOL_NULLORDER
);
1245 if ( nextOffsetPtr
> limitOffsetPtr
) {
1246 log_err("error, locale %s, ucol_previous returned fewer elements than expected\n", tsceItemPtr
->locale
);
1249 ucol_closeElements(uce
);
1251 log_err("error, locale %s, ucol_openElements failed: %s\n", tsceItemPtr
->locale
, u_errorName(status
) );
1255 log_data_err("error, locale %s, ucol_open failed: %s\n", tsceItemPtr
->locale
, u_errorName(status
) );
1260 #endif /* #if !UCONFIG_NO_COLLATION */