1 /********************************************************************
3 * Copyright (c) 1997-2003, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
7 #include "unicode/utypes.h"
9 #if !UCONFIG_NO_COLLATION
11 #include "unicode/coll.h"
12 #include "unicode/tblcoll.h"
13 #include "unicode/unistr.h"
14 #include "unicode/sortkey.h"
16 #include "unicode/schriter.h"
17 #include "unicode/chariter.h"
18 #include "unicode/uchar.h"
21 #define ARRAY_LENGTH(array) (sizeof array / sizeof array[0])
23 static UErrorCode status
= U_ZERO_ERROR
;
25 CollationIteratorTest::CollationIteratorTest()
26 : test1("What subset of all possible test cases?", ""),
27 test2("has the highest probability of detecting", "")
29 en_us
= (RuleBasedCollator
*)Collator::createInstance(Locale::getUS(), status
);
30 if(U_FAILURE(status
)) {
33 errln("Collator creation failed with %s", u_errorName(status
));
39 CollationIteratorTest::~CollationIteratorTest()
45 * Test for CollationElementIterator previous and next for the whole set of
48 void CollationIteratorTest::TestUnicodeChar()
50 CollationElementIterator
*iter
;
54 for (codepoint
= 1; codepoint
< 0xFFFE;)
58 while (codepoint
% 0xFF != 0)
60 if (u_isdefined(codepoint
))
65 if (u_isdefined(codepoint
))
68 if (codepoint
!= 0xFFFF)
71 iter
= en_us
->createCollationElementIterator(source
);
72 /* A basic test to see if it's working at all */
79 * Test for CollationElementIterator.previous()
81 * @bug 4108758 - Make sure it works with contracting characters
84 void CollationIteratorTest::TestPrevious(/* char* par */)
86 UErrorCode status
= U_ZERO_ERROR
;
87 CollationElementIterator
*iter
= en_us
->createCollationElementIterator(test1
);
89 // A basic test to see if it's working at all
93 // Test with a contracting character sequence
95 RuleBasedCollator
*c1
= NULL
;
96 c1
= new RuleBasedCollator(
97 (UnicodeString
)"&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH", status
);
99 if (c1
== NULL
|| U_FAILURE(status
))
101 errln("Couldn't create a RuleBasedCollator with a contracting sequence.");
107 iter
= c1
->createCollationElementIterator(source
);
112 // Test with an expanding character sequence
113 RuleBasedCollator
*c2
= NULL
;
114 c2
= new RuleBasedCollator((UnicodeString
)"&a < b < c/abd < d", status
);
116 if (c2
== NULL
|| U_FAILURE(status
))
118 errln("Couldn't create a RuleBasedCollator with an expanding sequence.");
124 iter
= c2
->createCollationElementIterator(source
);
130 RuleBasedCollator
*c3
= NULL
;
131 c3
= new RuleBasedCollator((UnicodeString
)"&a < b < c/aba < d < z < ch", status
);
133 if (c3
== NULL
|| U_FAILURE(status
))
135 errln("Couldn't create a RuleBasedCollator with both an expanding and a contracting sequence.");
140 source
= "abcdbchdc";
141 iter
= c3
->createCollationElementIterator(source
);
147 source
= CharsToUnicodeString("\\u0e41\\u0e02\\u0e41\\u0e02\\u0e27abc");
149 Collator
*c4
= Collator::createInstance(Locale("th", "TH", ""), status
);
150 if(U_FAILURE(status
)){
151 errln("Couldn't create a collator");
153 iter
= ((RuleBasedCollator
*)c4
)->createCollationElementIterator(source
);
158 source
= CharsToUnicodeString("\\u0061\\u30CF\\u3099\\u30FC");
159 Collator
*c5
= Collator::createInstance(Locale("ja", "JP", ""), status
);
161 iter
= ((RuleBasedCollator
*)c5
)->createCollationElementIterator(source
);
162 if(U_FAILURE(status
)){
163 errln("Couldn't create Japanese collator\n");
171 * Test for getOffset() and setOffset()
173 void CollationIteratorTest::TestOffset(/* char* par */)
175 CollationElementIterator
*iter
= en_us
->createCollationElementIterator(test1
);
176 UErrorCode status
= U_ZERO_ERROR
;
177 // testing boundaries
178 iter
->setOffset(0, status
);
179 if (U_FAILURE(status
) || iter
->previous(status
) != UCOL_NULLORDER
) {
180 errln("Error: After setting offset to 0, we should be at the end "
181 "of the backwards iteration");
183 iter
->setOffset(test1
.length(), status
);
184 if (U_FAILURE(status
) || iter
->next(status
) != UCOL_NULLORDER
) {
185 errln("Error: After setting offset to end of the string, we should "
186 "be at the end of the backwards iteration");
189 // Run all the way through the iterator, then get the offset
190 int32_t orderLength
= 0;
191 int32_t *orders
= getOrders(*iter
, orderLength
);
193 int32_t offset
= iter
->getOffset();
195 if (offset
!= test1
.length())
197 UnicodeString
msg1("offset at end != length: ");
198 UnicodeString
msg2(" vs ");
200 errln(msg1
+ offset
+ msg2
+ test1
.length());
203 // Now set the offset back to the beginning and see if it works
204 CollationElementIterator
*pristine
= en_us
->createCollationElementIterator(test1
);
206 iter
->setOffset(0, status
);
208 if (U_FAILURE(status
))
210 errln("setOffset failed.");
214 assertEqual(*iter
, *pristine
);
217 // TODO: try iterating halfway through a messy string.
227 void CollationIteratorTest::TestSetText(/* char* par */)
229 CollationElementIterator
*iter1
= en_us
->createCollationElementIterator(test1
);
230 CollationElementIterator
*iter2
= en_us
->createCollationElementIterator(test2
);
231 UErrorCode status
= U_ZERO_ERROR
;
233 // Run through the second iterator just to exercise it
234 int32_t c
= iter2
->next(status
);
237 while ( ++i
< 10 && c
!= CollationElementIterator::NULLORDER
)
239 if (U_FAILURE(status
))
241 errln("iter2->next() returned an error.");
246 c
= iter2
->next(status
);
249 // Now set it to point to the same string as the first iterator
250 iter2
->setText(test1
, status
);
252 if (U_FAILURE(status
))
254 errln("call to iter2->setText(test1) failed.");
258 assertEqual(*iter1
, *iter2
);
261 //now use the overloaded setText(ChracterIterator&, UErrorCode) function to set the text
262 CharacterIterator
* chariter
= new StringCharacterIterator(test1
);
263 iter2
->setText(*chariter
, status
);
264 if (U_FAILURE(status
))
266 errln("call to iter2->setText(chariter(test1)) failed.");
270 assertEqual(*iter1
, *iter2
);
273 // test for an empty string
274 UnicodeString
empty("");
275 iter1
->setText(empty
, status
);
276 if (U_FAILURE(status
)
277 || iter1
->next(status
) != (int32_t)UCOL_NULLORDER
) {
278 errln("Empty string should have no CEs.");
280 ((StringCharacterIterator
*)chariter
)->setText(empty
);
281 iter1
->setText(*chariter
, status
);
282 if (U_FAILURE(status
)
283 || iter1
->next(status
) != (int32_t)UCOL_NULLORDER
) {
284 errln("Empty string should have no CEs.");
292 * Test for getMaxExpansion()
294 void CollationIteratorTest::TestMaxExpansion(/* char* par */)
296 UErrorCode status
= U_ZERO_ERROR
;
297 UnicodeString
rule("&a < ab < c/aba < d < z < ch");
298 RuleBasedCollator
*coll
= new RuleBasedCollator(rule
, status
);
300 UnicodeString
str(ch
);
302 CollationElementIterator
*iter
= coll
->createCollationElementIterator(str
);
304 while (ch
< 0xFFFF && U_SUCCESS(status
)) {
308 UnicodeString
str(ch
);
309 iter
->setText(str
, status
);
310 order
= iter
->previous(status
);
312 /* thai management */
313 if (CollationElementIterator::isIgnorable(order
))
314 order
= iter
->previous(status
);
316 while (U_SUCCESS(status
)
317 && iter
->previous(status
) != (int32_t)UCOL_NULLORDER
)
322 if (U_FAILURE(status
) && iter
->getMaxExpansion(order
) < count
) {
323 errln("Failure at codepoint %d, maximum expansion count < %d\n",
335 void CollationIteratorTest::TestClearBuffers(/* char* par */)
337 UErrorCode status
= U_ZERO_ERROR
;
338 RuleBasedCollator
*c
= new RuleBasedCollator((UnicodeString
)"&a < b < c & ab = d", status
);
340 if (c
== NULL
|| U_FAILURE(status
))
342 errln("Couldn't create a RuleBasedCollator.");
347 UnicodeString
source("abcd");
348 CollationElementIterator
*i
= c
->createCollationElementIterator(source
);
349 int32_t e0
= i
->next(status
); // save the first collation element
351 if (U_FAILURE(status
))
353 errln("call to i->next() failed. err=%s", u_errorName(status
));
357 i
->setOffset(3, status
); // go to the expanding character
359 if (U_FAILURE(status
))
361 errln("call to i->setOffset(3) failed. err=%s", u_errorName(status
));
365 i
->next(status
); // but only use up half of it
367 if (U_FAILURE(status
))
369 errln("call to i->next() failed. err=%s", u_errorName(status
));
373 i
->setOffset(0, status
); // go back to the beginning
375 if (U_FAILURE(status
))
377 errln("call to i->setOffset(0) failed. err=%s", u_errorName(status
));
381 int32_t e
= i
->next(status
); // and get this one again
383 if (U_FAILURE(status
))
385 errln("call to i->next() failed. err=%s", u_errorName(status
));
389 errln("got 0x%X, expected 0x%X", e
, e0
);
401 * Testing the assignment operator
403 void CollationIteratorTest::TestAssignment()
405 UErrorCode status
= U_ZERO_ERROR
;
406 RuleBasedCollator
*coll
=
407 (RuleBasedCollator
*)Collator::createInstance(status
);
409 if (coll
== NULL
|| U_FAILURE(status
))
411 errln("Couldn't create a default collator.");
415 UnicodeString
source("abcd");
416 CollationElementIterator
*iter1
=
417 coll
->createCollationElementIterator(source
);
419 CollationElementIterator iter2
= *iter1
;
421 if (*iter1
!= iter2
) {
422 errln("Fail collation iterator assignment does not produce the same elements");
425 CollationElementIterator
iter3(*iter1
);
427 if (*iter1
!= iter3
) {
428 errln("Fail collation iterator copy constructor does not produce the same elements");
431 source
= CharsToUnicodeString("a\\u0300\\u0325");
432 coll
->setAttribute(UCOL_NORMALIZATION_MODE
, UCOL_ON
, status
);
433 CollationElementIterator
*iter4
434 = coll
->createCollationElementIterator(source
);
435 CollationElementIterator
iter5(*iter4
);
436 if (*iter4
!= iter5
) {
437 errln("collation iterator assignment does not produce the same elements");
440 if (U_FAILURE(status
) || *iter4
== iter5
) {
441 errln("collation iterator not equal");
444 if (U_FAILURE(status
) || *iter4
!= iter5
) {
445 errln("collation iterator equal");
448 if (U_FAILURE(status
) || *iter4
== iter5
) {
449 errln("collation iterator not equal");
452 if (U_FAILURE(status
) || *iter4
!= iter5
) {
453 errln("collation iterator equal");
455 CollationElementIterator
iter6(*iter4
);
456 if (*iter4
!= iter6
) {
457 errln("collation iterator equal");
460 if (U_FAILURE(status
) || *iter4
== iter5
) {
461 errln("collation iterator not equal");
464 if (U_FAILURE(status
) || *iter4
!= iter5
) {
465 errln("collation iterator equal");
468 if (U_FAILURE(status
) || *iter4
== iter5
) {
469 errln("collation iterator not equal");
472 if (U_FAILURE(status
) || *iter4
!= iter5
) {
473 errln("collation iterator equal");
481 * Testing the constructors
483 void CollationIteratorTest::TestConstructors()
485 UErrorCode status
= U_ZERO_ERROR
;
486 RuleBasedCollator
*coll
=
487 (RuleBasedCollator
*)Collator::createInstance(status
);
488 if (coll
== NULL
|| U_FAILURE(status
))
490 errln("Couldn't create a default collator.");
494 // testing protected constructor with character iterator as argument
495 StringCharacterIterator
chariter(test1
);
496 CollationElementIterator
*iter1
=
497 coll
->createCollationElementIterator(chariter
);
498 if (U_FAILURE(status
)) {
499 errln("Couldn't create collation element iterator with character iterator.");
502 CollationElementIterator
*iter2
=
503 coll
->createCollationElementIterator(test1
);
505 // initially the 2 collation element iterators should be the same
506 if (*iter1
!= *iter1
|| *iter2
!= *iter2
|| *iter1
!= *iter2
507 || *iter2
!= *iter1
) {
508 errln("CollationElementIterators constructed with the same string data should be the same at the start");
510 assertEqual(*iter1
, *iter2
);
515 // tests empty strings
516 UnicodeString
empty("");
517 iter1
= coll
->createCollationElementIterator(empty
);
518 chariter
.setText(empty
);
519 iter2
= coll
->createCollationElementIterator(chariter
);
520 if (*iter1
!= *iter1
|| *iter2
!= *iter2
|| *iter1
!= *iter2
521 || *iter2
!= *iter1
) {
522 errln("CollationElementIterators constructed with the same string data should be the same at the start");
524 if (iter1
->next(status
) != (int32_t)UCOL_NULLORDER
) {
525 errln("Empty string should have no CEs.");
527 if (iter2
->next(status
) != (int32_t)UCOL_NULLORDER
) {
528 errln("Empty string should have no CEs.");
536 * Testing the strength order
538 void CollationIteratorTest::TestStrengthOrder()
540 int order
= 0x0123ABCD;
542 UErrorCode status
= U_ZERO_ERROR
;
543 RuleBasedCollator
*coll
=
544 (RuleBasedCollator
*)Collator::createInstance(status
);
545 if (coll
== NULL
|| U_FAILURE(status
))
547 errln("Couldn't create a default collator.");
551 coll
->setStrength(Collator::PRIMARY
);
552 CollationElementIterator
*iter
=
553 coll
->createCollationElementIterator(test1
);
556 errln("Couldn't create a collation element iterator from default collator");
560 if (iter
->strengthOrder(order
) != 0x01230000) {
561 errln("Strength order for a primary strength collator should be the first 2 bytes");
565 coll
->setStrength(Collator::SECONDARY
);
566 if (iter
->strengthOrder(order
) != 0x0123AB00) {
567 errln("Strength order for a secondary strength collator should be the third byte");
571 coll
->setStrength(Collator::TERTIARY
);
572 if (iter
->strengthOrder(order
) != order
) {
573 errln("Strength order for a tertiary strength collator should be the third byte");
581 * Return a string containing all of the collation orders
582 * returned by calls to next on the specified iterator
584 UnicodeString
&CollationIteratorTest::orderString(CollationElementIterator
&iter
, UnicodeString
&target
)
587 UErrorCode status
= U_ZERO_ERROR
;
589 while ((order
= iter
.next(status
)) != CollationElementIterator::NULLORDER
)
592 appendHex(order
, 8, target
);
599 void CollationIteratorTest::assertEqual(CollationElementIterator
&i1
, CollationElementIterator
&i2
)
601 int32_t c1
, c2
, count
= 0;
602 UErrorCode status
= U_ZERO_ERROR
;
606 c1
= i1
.next(status
);
607 c2
= i2
.next(status
);
611 errln(" %d: strength(0x%X) != strength(0x%X)", count
, c1
, c2
);
617 while (c1
!= CollationElementIterator::NULLORDER
);
620 void CollationIteratorTest::runIndexedTest(int32_t index
, UBool exec
, const char* &name
, char* /*par*/)
624 logln("Collation Iteration Tests: ");
630 case 0: name
= "TestPrevious"; if (exec
) TestPrevious(/* par */); break;
631 case 1: name
= "TestOffset"; if (exec
) TestOffset(/* par */); break;
632 case 2: name
= "TestSetText"; if (exec
) TestSetText(/* par */); break;
633 case 3: name
= "TestMaxExpansion"; if (exec
) TestMaxExpansion(/* par */); break;
634 case 4: name
= "TestClearBuffers"; if (exec
) TestClearBuffers(/* par */); break;
635 case 5: name
= "TestUnicodeChar"; if (exec
) TestUnicodeChar(/* par */); break;
636 case 6: name
= "TestAssignment"; if (exec
) TestAssignment(/* par */); break;
637 case 7: name
= "TestConstructors"; if (exec
) TestConstructors(/* par */); break;
638 case 8: name
= "TestStrengthOrder"; if (exec
) TestStrengthOrder(/* par */); break;
639 default: name
= ""; break;
642 errln("Class iterator not instantiated");
647 #endif /* #if !UCONFIG_NO_COLLATION */