1 /********************************************************************
3 * Copyright (c) 1997-2016, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
7 #include "unicode/utypes.h"
9 #if !UCONFIG_NO_COLLATION
11 #include "unicode/coll.h"
12 #include "unicode/tblcoll.h"
13 #include "unicode/unistr.h"
14 #include "unicode/sortkey.h"
16 #include "unicode/schriter.h"
17 #include "unicode/chariter.h"
18 #include "unicode/uchar.h"
21 static UErrorCode status
= U_ZERO_ERROR
;
23 CollationIteratorTest::CollationIteratorTest()
24 : test1("What subset of all possible test cases?", ""),
25 test2("has the highest probability of detecting", "")
27 en_us
= (RuleBasedCollator
*)Collator::createInstance(Locale::getUS(), status
);
28 if(U_FAILURE(status
)) {
31 errcheckln(status
, "Collator creation failed with %s", u_errorName(status
));
37 CollationIteratorTest::~CollationIteratorTest()
43 * Test for CollationElementIterator previous and next for the whole set of
46 void CollationIteratorTest::TestUnicodeChar()
48 CollationElementIterator
*iter
;
52 for (codepoint
= 1; codepoint
< 0xFFFE;)
56 while (codepoint
% 0xFF != 0)
58 if (u_isdefined(codepoint
))
63 if (u_isdefined(codepoint
))
66 if (codepoint
!= 0xFFFF)
69 iter
= en_us
->createCollationElementIterator(source
);
70 /* A basic test to see if it's working at all */
77 * Test for CollationElementIterator.previous()
79 * @bug 4108758 - Make sure it works with contracting characters
82 void CollationIteratorTest::TestPrevious(/* char* par */)
84 UErrorCode status
= U_ZERO_ERROR
;
85 CollationElementIterator
*iter
= en_us
->createCollationElementIterator(test1
);
87 // A basic test to see if it's working at all
91 // Test with a contracting character sequence
93 RuleBasedCollator
*c1
= NULL
;
94 c1
= new RuleBasedCollator(
95 (UnicodeString
)"&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH", status
);
97 if (c1
== NULL
|| U_FAILURE(status
))
99 errln("Couldn't create a RuleBasedCollator with a contracting sequence.");
105 iter
= c1
->createCollationElementIterator(source
);
110 // Test with an expanding character sequence
111 RuleBasedCollator
*c2
= NULL
;
112 c2
= new RuleBasedCollator((UnicodeString
)"&a < b < c/abd < d", status
);
114 if (c2
== NULL
|| U_FAILURE(status
))
116 errln("Couldn't create a RuleBasedCollator with an expanding sequence.");
122 iter
= c2
->createCollationElementIterator(source
);
128 RuleBasedCollator
*c3
= NULL
;
129 c3
= new RuleBasedCollator((UnicodeString
)"&a < b < c/aba < d < z < ch", status
);
131 if (c3
== NULL
|| U_FAILURE(status
))
133 errln("Couldn't create a RuleBasedCollator with both an expanding and a contracting sequence.");
138 source
= "abcdbchdc";
139 iter
= c3
->createCollationElementIterator(source
);
145 source
= CharsToUnicodeString("\\u0e41\\u0e02\\u0e41\\u0e02\\u0e27abc");
147 Collator
*c4
= Collator::createInstance(Locale("th", "TH", ""), status
);
148 if(U_FAILURE(status
)){
149 errln("Couldn't create a collator");
151 iter
= ((RuleBasedCollator
*)c4
)->createCollationElementIterator(source
);
156 source
= CharsToUnicodeString("\\u0061\\u30CF\\u3099\\u30FC");
157 Collator
*c5
= Collator::createInstance(Locale("ja", "JP", ""), status
);
159 iter
= ((RuleBasedCollator
*)c5
)->createCollationElementIterator(source
);
160 if(U_FAILURE(status
)){
161 errln("Couldn't create Japanese collator\n");
169 * Test for getOffset() and setOffset()
171 void CollationIteratorTest::TestOffset(/* char* par */)
173 CollationElementIterator
*iter
= en_us
->createCollationElementIterator(test1
);
174 UErrorCode status
= U_ZERO_ERROR
;
175 // testing boundaries
176 iter
->setOffset(0, status
);
177 if (U_FAILURE(status
) || iter
->previous(status
) != CollationElementIterator::NULLORDER
) {
178 errln("Error: After setting offset to 0, we should be at the end "
179 "of the backwards iteration");
181 iter
->setOffset(test1
.length(), status
);
182 if (U_FAILURE(status
) || iter
->next(status
) != CollationElementIterator::NULLORDER
) {
183 errln("Error: After setting offset to end of the string, we should "
184 "be at the end of the backwards iteration");
187 // Run all the way through the iterator, then get the offset
188 int32_t orderLength
= 0;
189 Order
*orders
= getOrders(*iter
, orderLength
);
191 int32_t offset
= iter
->getOffset();
193 if (offset
!= test1
.length())
195 UnicodeString
msg1("offset at end != length: ");
196 UnicodeString
msg2(" vs ");
198 errln(msg1
+ offset
+ msg2
+ test1
.length());
201 // Now set the offset back to the beginning and see if it works
202 CollationElementIterator
*pristine
= en_us
->createCollationElementIterator(test1
);
204 iter
->setOffset(0, status
);
206 if (U_FAILURE(status
))
208 errln("setOffset failed.");
212 assertEqual(*iter
, *pristine
);
219 // setting offset in the middle of a contraction
220 UnicodeString contraction
= "change";
221 status
= U_ZERO_ERROR
;
222 RuleBasedCollator
tailored("& a < ch", status
);
223 if (U_FAILURE(status
)) {
224 errln("Error: in creation of Spanish collator - %s", u_errorName(status
));
227 iter
= tailored
.createCollationElementIterator(contraction
);
228 Order
*order
= getOrders(*iter
, orderLength
);
229 iter
->setOffset(1, status
); // sets offset in the middle of ch
230 int32_t order2Length
= 0;
231 Order
*order2
= getOrders(*iter
, order2Length
);
232 if (orderLength
!= order2Length
|| uprv_memcmp(order
, order2
, orderLength
* sizeof(Order
)) != 0) {
233 errln("Error: setting offset in the middle of a contraction should be the same as setting it to the start of the contraction");
238 contraction
= "peache";
239 iter
= tailored
.createCollationElementIterator(contraction
);
240 iter
->setOffset(3, status
);
241 order
= getOrders(*iter
, orderLength
);
242 iter
->setOffset(4, status
); // sets offset in the middle of ch
243 order2
= getOrders(*iter
, order2Length
);
244 if (orderLength
!= order2Length
|| uprv_memcmp(order
, order2
, orderLength
* sizeof(Order
)) != 0) {
245 errln("Error: setting offset in the middle of a contraction should be the same as setting it to the start of the contraction");
250 // setting offset in the middle of a surrogate pair
251 UnicodeString surrogate
= UNICODE_STRING_SIMPLE("\\ud800\\udc00str").unescape();
252 iter
= tailored
.createCollationElementIterator(surrogate
);
253 order
= getOrders(*iter
, orderLength
);
254 iter
->setOffset(1, status
); // sets offset in the middle of surrogate
255 order2
= getOrders(*iter
, order2Length
);
256 if (orderLength
!= order2Length
|| uprv_memcmp(order
, order2
, orderLength
* sizeof(Order
)) != 0) {
257 errln("Error: setting offset in the middle of a surrogate pair should be the same as setting it to the start of the surrogate pair");
262 surrogate
= UNICODE_STRING_SIMPLE("simple\\ud800\\udc00str").unescape();
263 iter
= tailored
.createCollationElementIterator(surrogate
);
264 iter
->setOffset(6, status
);
265 order
= getOrders(*iter
, orderLength
);
266 iter
->setOffset(7, status
); // sets offset in the middle of surrogate
267 order2
= getOrders(*iter
, order2Length
);
268 if (orderLength
!= order2Length
|| uprv_memcmp(order
, order2
, orderLength
* sizeof(Order
)) != 0) {
269 errln("Error: setting offset in the middle of a surrogate pair should be the same as setting it to the start of the surrogate pair");
274 // TODO: try iterating halfway through a messy string.
280 void CollationIteratorTest::TestSetText(/* char* par */)
282 CollationElementIterator
*iter1
= en_us
->createCollationElementIterator(test1
);
283 CollationElementIterator
*iter2
= en_us
->createCollationElementIterator(test2
);
284 UErrorCode status
= U_ZERO_ERROR
;
286 // Run through the second iterator just to exercise it
287 int32_t c
= iter2
->next(status
);
290 while ( ++i
< 10 && c
!= CollationElementIterator::NULLORDER
)
292 if (U_FAILURE(status
))
294 errln("iter2->next() returned an error.");
299 c
= iter2
->next(status
);
302 // Now set it to point to the same string as the first iterator
303 iter2
->setText(test1
, status
);
305 if (U_FAILURE(status
))
307 errln("call to iter2->setText(test1) failed.");
311 assertEqual(*iter1
, *iter2
);
314 //now use the overloaded setText(ChracterIterator&, UErrorCode) function to set the text
315 CharacterIterator
* chariter
= new StringCharacterIterator(test1
);
316 iter2
->setText(*chariter
, status
);
317 if (U_FAILURE(status
))
319 errln("call to iter2->setText(chariter(test1)) failed.");
323 assertEqual(*iter1
, *iter2
);
326 // test for an empty string
327 UnicodeString
empty("");
328 iter1
->setText(empty
, status
);
329 if (U_FAILURE(status
)
330 || iter1
->next(status
) != (int32_t)CollationElementIterator::NULLORDER
) {
331 errln("Empty string should have no CEs.");
333 ((StringCharacterIterator
*)chariter
)->setText(empty
);
334 iter1
->setText(*chariter
, status
);
335 if (U_FAILURE(status
)
336 || iter1
->next(status
) != (int32_t)CollationElementIterator::NULLORDER
) {
337 errln("Empty string should have no CEs.");
345 * Test for getMaxExpansion()
347 void CollationIteratorTest::TestMaxExpansion(/* char* par */)
349 UErrorCode status
= U_ZERO_ERROR
;
350 UnicodeString
rule("&a < ab < c/aba < d < z < ch");
351 RuleBasedCollator
*coll
= new RuleBasedCollator(rule
, status
);
353 UnicodeString
str(ch
);
355 CollationElementIterator
*iter
= coll
->createCollationElementIterator(str
);
357 while (ch
< 0xFFFF && U_SUCCESS(status
)) {
361 UnicodeString
str(ch
);
362 iter
->setText(str
, status
);
363 order
= iter
->previous(status
);
365 /* thai management */
366 if (CollationElementIterator::isIgnorable(order
))
367 order
= iter
->previous(status
);
369 while (U_SUCCESS(status
)
370 && iter
->previous(status
) != (int32_t)CollationElementIterator::NULLORDER
)
375 if (U_FAILURE(status
) && iter
->getMaxExpansion(order
) < count
) {
376 errln("Failure at codepoint %d, maximum expansion count < %d\n",
388 void CollationIteratorTest::TestClearBuffers(/* char* par */)
390 UErrorCode status
= U_ZERO_ERROR
;
391 RuleBasedCollator
*c
= new RuleBasedCollator((UnicodeString
)"&a < b < c & ab = d", status
);
393 if (c
== NULL
|| U_FAILURE(status
))
395 errln("Couldn't create a RuleBasedCollator.");
400 UnicodeString
source("abcd");
401 CollationElementIterator
*i
= c
->createCollationElementIterator(source
);
402 int32_t e0
= i
->next(status
); // save the first collation element
404 if (U_FAILURE(status
))
406 errln("call to i->next() failed. err=%s", u_errorName(status
));
410 i
->setOffset(3, status
); // go to the expanding character
412 if (U_FAILURE(status
))
414 errln("call to i->setOffset(3) failed. err=%s", u_errorName(status
));
418 i
->next(status
); // but only use up half of it
420 if (U_FAILURE(status
))
422 errln("call to i->next() failed. err=%s", u_errorName(status
));
426 i
->setOffset(0, status
); // go back to the beginning
428 if (U_FAILURE(status
))
430 errln("call to i->setOffset(0) failed. err=%s", u_errorName(status
));
434 int32_t e
= i
->next(status
); // and get this one again
436 if (U_FAILURE(status
))
438 errln("call to i->next() failed. err=%s", u_errorName(status
));
442 errln("got 0x%X, expected 0x%X", e
, e0
);
454 * Testing the assignment operator
456 void CollationIteratorTest::TestAssignment()
458 UErrorCode status
= U_ZERO_ERROR
;
459 RuleBasedCollator
*coll
=
460 (RuleBasedCollator
*)Collator::createInstance(status
);
462 if (coll
== NULL
|| U_FAILURE(status
))
464 errln("Couldn't create a default collator.");
468 UnicodeString
source("abcd");
469 CollationElementIterator
*iter1
=
470 coll
->createCollationElementIterator(source
);
472 CollationElementIterator iter2
= *iter1
;
474 if (*iter1
!= iter2
) {
475 errln("Fail collation iterator assignment does not produce the same elements");
478 CollationElementIterator
iter3(*iter1
);
480 if (*iter1
!= iter3
) {
481 errln("Fail collation iterator copy constructor does not produce the same elements");
484 source
= CharsToUnicodeString("a\\u0300\\u0325");
485 coll
->setAttribute(UCOL_NORMALIZATION_MODE
, UCOL_ON
, status
);
486 CollationElementIterator
*iter4
487 = coll
->createCollationElementIterator(source
);
488 CollationElementIterator
iter5(*iter4
);
489 if (*iter4
!= iter5
) {
490 errln("collation iterator assignment does not produce the same elements");
493 if (U_FAILURE(status
) || *iter4
== iter5
) {
494 errln("collation iterator not equal");
497 if (U_FAILURE(status
) || *iter4
!= iter5
) {
498 errln("collation iterator equal");
501 if (U_FAILURE(status
) || *iter4
== iter5
) {
502 errln("collation iterator not equal");
505 if (U_FAILURE(status
) || *iter4
!= iter5
) {
506 errln("collation iterator equal");
508 CollationElementIterator
iter6(*iter4
);
509 if (*iter4
!= iter6
) {
510 errln("collation iterator equal");
513 if (U_FAILURE(status
) || *iter4
== iter5
) {
514 errln("collation iterator not equal");
517 if (U_FAILURE(status
) || *iter4
!= iter5
) {
518 errln("collation iterator equal");
521 if (U_FAILURE(status
) || *iter4
== iter5
) {
522 errln("collation iterator not equal");
525 if (U_FAILURE(status
) || *iter4
!= iter5
) {
526 errln("collation iterator equal");
534 * Testing the constructors
536 void CollationIteratorTest::TestConstructors()
538 UErrorCode status
= U_ZERO_ERROR
;
539 RuleBasedCollator
*coll
=
540 (RuleBasedCollator
*)Collator::createInstance(status
);
541 if (coll
== NULL
|| U_FAILURE(status
))
543 errln("Couldn't create a default collator.");
547 // testing protected constructor with character iterator as argument
548 StringCharacterIterator
chariter(test1
);
549 CollationElementIterator
*iter1
=
550 coll
->createCollationElementIterator(chariter
);
551 if (U_FAILURE(status
)) {
552 errln("Couldn't create collation element iterator with character iterator.");
555 CollationElementIterator
*iter2
=
556 coll
->createCollationElementIterator(test1
);
558 // initially the 2 collation element iterators should be the same
559 if (*iter1
!= *iter1
|| *iter2
!= *iter2
|| *iter1
!= *iter2
560 || *iter2
!= *iter1
) {
561 errln("CollationElementIterators constructed with the same string data should be the same at the start");
563 assertEqual(*iter1
, *iter2
);
568 // tests empty strings
569 UnicodeString
empty("");
570 iter1
= coll
->createCollationElementIterator(empty
);
571 chariter
.setText(empty
);
572 iter2
= coll
->createCollationElementIterator(chariter
);
573 if (*iter1
!= *iter1
|| *iter2
!= *iter2
|| *iter1
!= *iter2
574 || *iter2
!= *iter1
) {
575 errln("CollationElementIterators constructed with the same string data should be the same at the start");
577 if (iter1
->next(status
) != (int32_t)CollationElementIterator::NULLORDER
) {
578 errln("Empty string should have no CEs.");
580 if (iter2
->next(status
) != (int32_t)CollationElementIterator::NULLORDER
) {
581 errln("Empty string should have no CEs.");
589 * Testing the strength order
591 void CollationIteratorTest::TestStrengthOrder()
593 int order
= 0x0123ABCD;
595 UErrorCode status
= U_ZERO_ERROR
;
596 RuleBasedCollator
*coll
=
597 (RuleBasedCollator
*)Collator::createInstance(status
);
598 if (coll
== NULL
|| U_FAILURE(status
))
600 errln("Couldn't create a default collator.");
604 coll
->setStrength(Collator::PRIMARY
);
605 CollationElementIterator
*iter
=
606 coll
->createCollationElementIterator(test1
);
609 errln("Couldn't create a collation element iterator from default collator");
613 if (iter
->strengthOrder(order
) != 0x01230000) {
614 errln("Strength order for a primary strength collator should be the first 2 bytes");
618 coll
->setStrength(Collator::SECONDARY
);
619 if (iter
->strengthOrder(order
) != 0x0123AB00) {
620 errln("Strength order for a secondary strength collator should be the third byte");
624 coll
->setStrength(Collator::TERTIARY
);
625 if (iter
->strengthOrder(order
) != order
) {
626 errln("Strength order for a tertiary strength collator should be the third byte");
634 * Return a string containing all of the collation orders
635 * returned by calls to next on the specified iterator
637 UnicodeString
&CollationIteratorTest::orderString(CollationElementIterator
&iter
, UnicodeString
&target
)
640 UErrorCode status
= U_ZERO_ERROR
;
642 while ((order
= iter
.next(status
)) != CollationElementIterator::NULLORDER
)
645 appendHex(order
, 8, target
);
652 void CollationIteratorTest::assertEqual(CollationElementIterator
&i1
, CollationElementIterator
&i2
)
654 int32_t c1
, c2
, count
= 0;
655 UErrorCode status
= U_ZERO_ERROR
;
659 c1
= i1
.next(status
);
660 c2
= i2
.next(status
);
664 errln(" %d: strength(0x%X) != strength(0x%X)", count
, c1
, c2
);
670 while (c1
!= CollationElementIterator::NULLORDER
);
673 void CollationIteratorTest::runIndexedTest(int32_t index
, UBool exec
, const char* &name
, char* /*par*/)
677 logln("Collation Iteration Tests: ");
683 case 0: name
= "TestPrevious"; if (exec
) TestPrevious(/* par */); break;
684 case 1: name
= "TestOffset"; if (exec
) TestOffset(/* par */); break;
685 case 2: name
= "TestSetText"; if (exec
) TestSetText(/* par */); break;
686 case 3: name
= "TestMaxExpansion"; if (exec
) TestMaxExpansion(/* par */); break;
687 case 4: name
= "TestClearBuffers"; if (exec
) TestClearBuffers(/* par */); break;
688 case 5: name
= "TestUnicodeChar"; if (exec
) TestUnicodeChar(/* par */); break;
689 case 6: name
= "TestAssignment"; if (exec
) TestAssignment(/* par */); break;
690 case 7: name
= "TestConstructors"; if (exec
) TestConstructors(/* par */); break;
691 case 8: name
= "TestStrengthOrder"; if (exec
) TestStrengthOrder(/* par */); break;
692 default: name
= ""; break;
695 dataerrln("Class iterator not instantiated");
700 #endif /* #if !UCONFIG_NO_COLLATION */