1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
5 * Copyright (c) 1997-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
9 #include "unicode/utypes.h"
11 #if !UCONFIG_NO_COLLATION
13 #include "unicode/coll.h"
14 #include "unicode/tblcoll.h"
15 #include "unicode/unistr.h"
16 #include "unicode/sortkey.h"
18 #include "unicode/schriter.h"
19 #include "unicode/chariter.h"
20 #include "unicode/uchar.h"
23 static UErrorCode status
= U_ZERO_ERROR
;
25 CollationIteratorTest::CollationIteratorTest()
26 : test1("What subset of all possible test cases?", ""),
27 test2("has the highest probability of detecting", "")
29 en_us
= (RuleBasedCollator
*)Collator::createInstance(Locale::getUS(), status
);
30 if(U_FAILURE(status
)) {
33 errcheckln(status
, "Collator creation failed with %s", u_errorName(status
));
39 CollationIteratorTest::~CollationIteratorTest()
45 * Test for CollationElementIterator previous and next for the whole set of
48 void CollationIteratorTest::TestUnicodeChar()
50 CollationElementIterator
*iter
;
54 for (codepoint
= 1; codepoint
< 0xFFFE;)
58 while (codepoint
% 0xFF != 0)
60 if (u_isdefined(codepoint
))
65 if (u_isdefined(codepoint
))
68 if (codepoint
!= 0xFFFF)
71 iter
= en_us
->createCollationElementIterator(source
);
72 /* A basic test to see if it's working at all */
79 * Test for CollationElementIterator.previous()
81 * @bug 4108758 - Make sure it works with contracting characters
84 void CollationIteratorTest::TestPrevious(/* char* par */)
86 UErrorCode status
= U_ZERO_ERROR
;
87 CollationElementIterator
*iter
= en_us
->createCollationElementIterator(test1
);
89 // A basic test to see if it's working at all
93 // Test with a contracting character sequence
95 RuleBasedCollator
*c1
= NULL
;
96 c1
= new RuleBasedCollator(
97 (UnicodeString
)"&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH", status
);
99 if (c1
== NULL
|| U_FAILURE(status
))
101 errln("Couldn't create a RuleBasedCollator with a contracting sequence.");
107 iter
= c1
->createCollationElementIterator(source
);
112 // Test with an expanding character sequence
113 RuleBasedCollator
*c2
= NULL
;
114 c2
= new RuleBasedCollator((UnicodeString
)"&a < b < c/abd < d", status
);
116 if (c2
== NULL
|| U_FAILURE(status
))
118 errln("Couldn't create a RuleBasedCollator with an expanding sequence.");
124 iter
= c2
->createCollationElementIterator(source
);
130 RuleBasedCollator
*c3
= NULL
;
131 c3
= new RuleBasedCollator((UnicodeString
)"&a < b < c/aba < d < z < ch", status
);
133 if (c3
== NULL
|| U_FAILURE(status
))
135 errln("Couldn't create a RuleBasedCollator with both an expanding and a contracting sequence.");
140 source
= "abcdbchdc";
141 iter
= c3
->createCollationElementIterator(source
);
147 source
= CharsToUnicodeString("\\u0e41\\u0e02\\u0e41\\u0e02\\u0e27abc");
149 Collator
*c4
= Collator::createInstance(Locale("th", "TH", ""), status
);
150 if(U_FAILURE(status
)){
151 errln("Couldn't create a collator");
153 iter
= ((RuleBasedCollator
*)c4
)->createCollationElementIterator(source
);
158 source
= CharsToUnicodeString("\\u0061\\u30CF\\u3099\\u30FC");
159 Collator
*c5
= Collator::createInstance(Locale("ja", "JP", ""), status
);
161 iter
= ((RuleBasedCollator
*)c5
)->createCollationElementIterator(source
);
162 if(U_FAILURE(status
)){
163 errln("Couldn't create Japanese collator\n");
171 * Test for getOffset() and setOffset()
173 void CollationIteratorTest::TestOffset(/* char* par */)
175 CollationElementIterator
*iter
= en_us
->createCollationElementIterator(test1
);
176 UErrorCode status
= U_ZERO_ERROR
;
177 // testing boundaries
178 iter
->setOffset(0, status
);
179 if (U_FAILURE(status
) || iter
->previous(status
) != CollationElementIterator::NULLORDER
) {
180 errln("Error: After setting offset to 0, we should be at the end "
181 "of the backwards iteration");
183 iter
->setOffset(test1
.length(), status
);
184 if (U_FAILURE(status
) || iter
->next(status
) != CollationElementIterator::NULLORDER
) {
185 errln("Error: After setting offset to end of the string, we should "
186 "be at the end of the backwards iteration");
189 // Run all the way through the iterator, then get the offset
190 int32_t orderLength
= 0;
191 Order
*orders
= getOrders(*iter
, orderLength
);
193 int32_t offset
= iter
->getOffset();
195 if (offset
!= test1
.length())
197 UnicodeString
msg1("offset at end != length: ");
198 UnicodeString
msg2(" vs ");
200 errln(msg1
+ offset
+ msg2
+ test1
.length());
203 // Now set the offset back to the beginning and see if it works
204 CollationElementIterator
*pristine
= en_us
->createCollationElementIterator(test1
);
206 iter
->setOffset(0, status
);
208 if (U_FAILURE(status
))
210 errln("setOffset failed.");
214 assertEqual(*iter
, *pristine
);
221 // setting offset in the middle of a contraction
222 UnicodeString contraction
= "change";
223 status
= U_ZERO_ERROR
;
224 RuleBasedCollator
tailored("& a < ch", status
);
225 if (U_FAILURE(status
)) {
226 errln("Error: in creation of Spanish collator - %s", u_errorName(status
));
229 iter
= tailored
.createCollationElementIterator(contraction
);
230 Order
*order
= getOrders(*iter
, orderLength
);
231 iter
->setOffset(1, status
); // sets offset in the middle of ch
232 int32_t order2Length
= 0;
233 Order
*order2
= getOrders(*iter
, order2Length
);
234 if (orderLength
!= order2Length
|| uprv_memcmp(order
, order2
, orderLength
* sizeof(Order
)) != 0) {
235 errln("Error: setting offset in the middle of a contraction should be the same as setting it to the start of the contraction");
240 contraction
= "peache";
241 iter
= tailored
.createCollationElementIterator(contraction
);
242 iter
->setOffset(3, status
);
243 order
= getOrders(*iter
, orderLength
);
244 iter
->setOffset(4, status
); // sets offset in the middle of ch
245 order2
= getOrders(*iter
, order2Length
);
246 if (orderLength
!= order2Length
|| uprv_memcmp(order
, order2
, orderLength
* sizeof(Order
)) != 0) {
247 errln("Error: setting offset in the middle of a contraction should be the same as setting it to the start of the contraction");
252 // setting offset in the middle of a surrogate pair
253 UnicodeString surrogate
= UNICODE_STRING_SIMPLE("\\ud800\\udc00str").unescape();
254 iter
= tailored
.createCollationElementIterator(surrogate
);
255 order
= getOrders(*iter
, orderLength
);
256 iter
->setOffset(1, status
); // sets offset in the middle of surrogate
257 order2
= getOrders(*iter
, order2Length
);
258 if (orderLength
!= order2Length
|| uprv_memcmp(order
, order2
, orderLength
* sizeof(Order
)) != 0) {
259 errln("Error: setting offset in the middle of a surrogate pair should be the same as setting it to the start of the surrogate pair");
264 surrogate
= UNICODE_STRING_SIMPLE("simple\\ud800\\udc00str").unescape();
265 iter
= tailored
.createCollationElementIterator(surrogate
);
266 iter
->setOffset(6, status
);
267 order
= getOrders(*iter
, orderLength
);
268 iter
->setOffset(7, status
); // sets offset in the middle of surrogate
269 order2
= getOrders(*iter
, order2Length
);
270 if (orderLength
!= order2Length
|| uprv_memcmp(order
, order2
, orderLength
* sizeof(Order
)) != 0) {
271 errln("Error: setting offset in the middle of a surrogate pair should be the same as setting it to the start of the surrogate pair");
276 // TODO: try iterating halfway through a messy string.
282 void CollationIteratorTest::TestSetText(/* char* par */)
284 CollationElementIterator
*iter1
= en_us
->createCollationElementIterator(test1
);
285 CollationElementIterator
*iter2
= en_us
->createCollationElementIterator(test2
);
286 UErrorCode status
= U_ZERO_ERROR
;
288 // Run through the second iterator just to exercise it
289 int32_t c
= iter2
->next(status
);
292 while ( ++i
< 10 && c
!= CollationElementIterator::NULLORDER
)
294 if (U_FAILURE(status
))
296 errln("iter2->next() returned an error.");
301 c
= iter2
->next(status
);
304 // Now set it to point to the same string as the first iterator
305 iter2
->setText(test1
, status
);
307 if (U_FAILURE(status
))
309 errln("call to iter2->setText(test1) failed.");
313 assertEqual(*iter1
, *iter2
);
316 //now use the overloaded setText(ChracterIterator&, UErrorCode) function to set the text
317 CharacterIterator
* chariter
= new StringCharacterIterator(test1
);
318 iter2
->setText(*chariter
, status
);
319 if (U_FAILURE(status
))
321 errln("call to iter2->setText(chariter(test1)) failed.");
325 assertEqual(*iter1
, *iter2
);
328 // test for an empty string
329 UnicodeString
empty("");
330 iter1
->setText(empty
, status
);
331 if (U_FAILURE(status
)
332 || iter1
->next(status
) != (int32_t)CollationElementIterator::NULLORDER
) {
333 errln("Empty string should have no CEs.");
335 ((StringCharacterIterator
*)chariter
)->setText(empty
);
336 iter1
->setText(*chariter
, status
);
337 if (U_FAILURE(status
)
338 || iter1
->next(status
) != (int32_t)CollationElementIterator::NULLORDER
) {
339 errln("Empty string should have no CEs.");
347 * Test for getMaxExpansion()
349 void CollationIteratorTest::TestMaxExpansion(/* char* par */)
351 UErrorCode status
= U_ZERO_ERROR
;
352 UnicodeString
rule("&a < ab < c/aba < d < z < ch");
353 RuleBasedCollator
*coll
= new RuleBasedCollator(rule
, status
);
355 UnicodeString
str(ch
);
357 CollationElementIterator
*iter
= coll
->createCollationElementIterator(str
);
359 while (ch
< 0xFFFF && U_SUCCESS(status
)) {
363 UnicodeString
str(ch
);
364 iter
->setText(str
, status
);
365 order
= iter
->previous(status
);
367 /* thai management */
368 if (CollationElementIterator::isIgnorable(order
))
369 order
= iter
->previous(status
);
371 while (U_SUCCESS(status
)
372 && iter
->previous(status
) != (int32_t)CollationElementIterator::NULLORDER
)
377 if (U_FAILURE(status
) && iter
->getMaxExpansion(order
) < count
) {
378 errln("Failure at codepoint %d, maximum expansion count < %d\n",
390 void CollationIteratorTest::TestClearBuffers(/* char* par */)
392 UErrorCode status
= U_ZERO_ERROR
;
393 RuleBasedCollator
*c
= new RuleBasedCollator((UnicodeString
)"&a < b < c & ab = d", status
);
395 if (c
== NULL
|| U_FAILURE(status
))
397 errln("Couldn't create a RuleBasedCollator.");
402 UnicodeString
source("abcd");
403 CollationElementIterator
*i
= c
->createCollationElementIterator(source
);
404 int32_t e0
= i
->next(status
); // save the first collation element
406 if (U_FAILURE(status
))
408 errln("call to i->next() failed. err=%s", u_errorName(status
));
412 i
->setOffset(3, status
); // go to the expanding character
414 if (U_FAILURE(status
))
416 errln("call to i->setOffset(3) failed. err=%s", u_errorName(status
));
420 i
->next(status
); // but only use up half of it
422 if (U_FAILURE(status
))
424 errln("call to i->next() failed. err=%s", u_errorName(status
));
428 i
->setOffset(0, status
); // go back to the beginning
430 if (U_FAILURE(status
))
432 errln("call to i->setOffset(0) failed. err=%s", u_errorName(status
));
436 int32_t e
= i
->next(status
); // and get this one again
438 if (U_FAILURE(status
))
440 errln("call to i->next() failed. err=%s", u_errorName(status
));
444 errln("got 0x%X, expected 0x%X", e
, e0
);
456 * Testing the assignment operator
458 void CollationIteratorTest::TestAssignment()
460 UErrorCode status
= U_ZERO_ERROR
;
461 RuleBasedCollator
*coll
=
462 (RuleBasedCollator
*)Collator::createInstance(status
);
464 if (coll
== NULL
|| U_FAILURE(status
))
466 errln("Couldn't create a default collator.");
470 UnicodeString
source("abcd");
471 CollationElementIterator
*iter1
=
472 coll
->createCollationElementIterator(source
);
474 CollationElementIterator iter2
= *iter1
;
476 if (*iter1
!= iter2
) {
477 errln("Fail collation iterator assignment does not produce the same elements");
480 CollationElementIterator
iter3(*iter1
);
482 if (*iter1
!= iter3
) {
483 errln("Fail collation iterator copy constructor does not produce the same elements");
486 source
= CharsToUnicodeString("a\\u0300\\u0325");
487 coll
->setAttribute(UCOL_NORMALIZATION_MODE
, UCOL_ON
, status
);
488 CollationElementIterator
*iter4
489 = coll
->createCollationElementIterator(source
);
490 CollationElementIterator
iter5(*iter4
);
491 if (*iter4
!= iter5
) {
492 errln("collation iterator assignment does not produce the same elements");
495 if (U_FAILURE(status
) || *iter4
== iter5
) {
496 errln("collation iterator not equal");
499 if (U_FAILURE(status
) || *iter4
!= iter5
) {
500 errln("collation iterator equal");
503 if (U_FAILURE(status
) || *iter4
== iter5
) {
504 errln("collation iterator not equal");
507 if (U_FAILURE(status
) || *iter4
!= iter5
) {
508 errln("collation iterator equal");
510 CollationElementIterator
iter6(*iter4
);
511 if (*iter4
!= iter6
) {
512 errln("collation iterator equal");
515 if (U_FAILURE(status
) || *iter4
== iter5
) {
516 errln("collation iterator not equal");
519 if (U_FAILURE(status
) || *iter4
!= iter5
) {
520 errln("collation iterator equal");
523 if (U_FAILURE(status
) || *iter4
== iter5
) {
524 errln("collation iterator not equal");
527 if (U_FAILURE(status
) || *iter4
!= iter5
) {
528 errln("collation iterator equal");
536 * Testing the constructors
538 void CollationIteratorTest::TestConstructors()
540 UErrorCode status
= U_ZERO_ERROR
;
541 RuleBasedCollator
*coll
=
542 (RuleBasedCollator
*)Collator::createInstance(status
);
543 if (coll
== NULL
|| U_FAILURE(status
))
545 errln("Couldn't create a default collator.");
549 // testing protected constructor with character iterator as argument
550 StringCharacterIterator
chariter(test1
);
551 CollationElementIterator
*iter1
=
552 coll
->createCollationElementIterator(chariter
);
553 if (U_FAILURE(status
)) {
554 errln("Couldn't create collation element iterator with character iterator.");
557 CollationElementIterator
*iter2
=
558 coll
->createCollationElementIterator(test1
);
560 // initially the 2 collation element iterators should be the same
561 if (*iter1
!= *iter1
|| *iter2
!= *iter2
|| *iter1
!= *iter2
562 || *iter2
!= *iter1
) {
563 errln("CollationElementIterators constructed with the same string data should be the same at the start");
565 assertEqual(*iter1
, *iter2
);
570 // tests empty strings
571 UnicodeString
empty("");
572 iter1
= coll
->createCollationElementIterator(empty
);
573 chariter
.setText(empty
);
574 iter2
= coll
->createCollationElementIterator(chariter
);
575 if (*iter1
!= *iter1
|| *iter2
!= *iter2
|| *iter1
!= *iter2
576 || *iter2
!= *iter1
) {
577 errln("CollationElementIterators constructed with the same string data should be the same at the start");
579 if (iter1
->next(status
) != (int32_t)CollationElementIterator::NULLORDER
) {
580 errln("Empty string should have no CEs.");
582 if (iter2
->next(status
) != (int32_t)CollationElementIterator::NULLORDER
) {
583 errln("Empty string should have no CEs.");
591 * Testing the strength order
593 void CollationIteratorTest::TestStrengthOrder()
595 int order
= 0x0123ABCD;
597 UErrorCode status
= U_ZERO_ERROR
;
598 RuleBasedCollator
*coll
=
599 (RuleBasedCollator
*)Collator::createInstance(status
);
600 if (coll
== NULL
|| U_FAILURE(status
))
602 errln("Couldn't create a default collator.");
606 coll
->setStrength(Collator::PRIMARY
);
607 CollationElementIterator
*iter
=
608 coll
->createCollationElementIterator(test1
);
611 errln("Couldn't create a collation element iterator from default collator");
615 if (iter
->strengthOrder(order
) != 0x01230000) {
616 errln("Strength order for a primary strength collator should be the first 2 bytes");
620 coll
->setStrength(Collator::SECONDARY
);
621 if (iter
->strengthOrder(order
) != 0x0123AB00) {
622 errln("Strength order for a secondary strength collator should be the third byte");
626 coll
->setStrength(Collator::TERTIARY
);
627 if (iter
->strengthOrder(order
) != order
) {
628 errln("Strength order for a tertiary strength collator should be the third byte");
636 * Return a string containing all of the collation orders
637 * returned by calls to next on the specified iterator
639 UnicodeString
&CollationIteratorTest::orderString(CollationElementIterator
&iter
, UnicodeString
&target
)
642 UErrorCode status
= U_ZERO_ERROR
;
644 while ((order
= iter
.next(status
)) != CollationElementIterator::NULLORDER
)
647 appendHex(order
, 8, target
);
654 void CollationIteratorTest::assertEqual(CollationElementIterator
&i1
, CollationElementIterator
&i2
)
656 int32_t c1
, c2
, count
= 0;
657 UErrorCode status
= U_ZERO_ERROR
;
661 c1
= i1
.next(status
);
662 c2
= i2
.next(status
);
666 errln(" %d: strength(0x%X) != strength(0x%X)", count
, c1
, c2
);
672 while (c1
!= CollationElementIterator::NULLORDER
);
675 void CollationIteratorTest::runIndexedTest(int32_t index
, UBool exec
, const char* &name
, char* /*par*/)
679 logln("Collation Iteration Tests: ");
685 case 0: name
= "TestPrevious"; if (exec
) TestPrevious(/* par */); break;
686 case 1: name
= "TestOffset"; if (exec
) TestOffset(/* par */); break;
687 case 2: name
= "TestSetText"; if (exec
) TestSetText(/* par */); break;
688 case 3: name
= "TestMaxExpansion"; if (exec
) TestMaxExpansion(/* par */); break;
689 case 4: name
= "TestClearBuffers"; if (exec
) TestClearBuffers(/* par */); break;
690 case 5: name
= "TestUnicodeChar"; if (exec
) TestUnicodeChar(/* par */); break;
691 case 6: name
= "TestAssignment"; if (exec
) TestAssignment(/* par */); break;
692 case 7: name
= "TestConstructors"; if (exec
) TestConstructors(/* par */); break;
693 case 8: name
= "TestStrengthOrder"; if (exec
) TestStrengthOrder(/* par */); break;
694 default: name
= ""; break;
697 dataerrln("Class iterator not instantiated");
702 #endif /* #if !UCONFIG_NO_COLLATION */