]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/intltest/itercoll.cpp
ICU-57132.0.1.tar.gz
[apple/icu.git] / icuSources / test / intltest / itercoll.cpp
1 /********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2016, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6
7 #include "unicode/utypes.h"
8
9 #if !UCONFIG_NO_COLLATION
10
11 #include "unicode/coll.h"
12 #include "unicode/tblcoll.h"
13 #include "unicode/unistr.h"
14 #include "unicode/sortkey.h"
15 #include "itercoll.h"
16 #include "unicode/schriter.h"
17 #include "unicode/chariter.h"
18 #include "unicode/uchar.h"
19 #include "cmemory.h"
20
21 static UErrorCode status = U_ZERO_ERROR;
22
23 CollationIteratorTest::CollationIteratorTest()
24 : test1("What subset of all possible test cases?", ""),
25 test2("has the highest probability of detecting", "")
26 {
27 en_us = (RuleBasedCollator *)Collator::createInstance(Locale::getUS(), status);
28 if(U_FAILURE(status)) {
29 delete en_us;
30 en_us = 0;
31 errcheckln(status, "Collator creation failed with %s", u_errorName(status));
32 return;
33 }
34
35 }
36
37 CollationIteratorTest::~CollationIteratorTest()
38 {
39 delete en_us;
40 }
41
42 /**
43 * Test for CollationElementIterator previous and next for the whole set of
44 * unicode characters.
45 */
46 void CollationIteratorTest::TestUnicodeChar()
47 {
48 CollationElementIterator *iter;
49 UChar codepoint;
50 UnicodeString source;
51
52 for (codepoint = 1; codepoint < 0xFFFE;)
53 {
54 source.remove();
55
56 while (codepoint % 0xFF != 0)
57 {
58 if (u_isdefined(codepoint))
59 source += codepoint;
60 codepoint ++;
61 }
62
63 if (u_isdefined(codepoint))
64 source += codepoint;
65
66 if (codepoint != 0xFFFF)
67 codepoint ++;
68
69 iter = en_us->createCollationElementIterator(source);
70 /* A basic test to see if it's working at all */
71 backAndForth(*iter);
72 delete iter;
73 }
74 }
75
76 /**
77 * Test for CollationElementIterator.previous()
78 *
79 * @bug 4108758 - Make sure it works with contracting characters
80 *
81 */
82 void CollationIteratorTest::TestPrevious(/* char* par */)
83 {
84 UErrorCode status = U_ZERO_ERROR;
85 CollationElementIterator *iter = en_us->createCollationElementIterator(test1);
86
87 // A basic test to see if it's working at all
88 backAndForth(*iter);
89 delete iter;
90
91 // Test with a contracting character sequence
92 UnicodeString source;
93 RuleBasedCollator *c1 = NULL;
94 c1 = new RuleBasedCollator(
95 (UnicodeString)"&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH", status);
96
97 if (c1 == NULL || U_FAILURE(status))
98 {
99 errln("Couldn't create a RuleBasedCollator with a contracting sequence.");
100 delete c1;
101 return;
102 }
103
104 source = "abchdcba";
105 iter = c1->createCollationElementIterator(source);
106 backAndForth(*iter);
107 delete iter;
108 delete c1;
109
110 // Test with an expanding character sequence
111 RuleBasedCollator *c2 = NULL;
112 c2 = new RuleBasedCollator((UnicodeString)"&a < b < c/abd < d", status);
113
114 if (c2 == NULL || U_FAILURE(status))
115 {
116 errln("Couldn't create a RuleBasedCollator with an expanding sequence.");
117 delete c2;
118 return;
119 }
120
121 source = "abcd";
122 iter = c2->createCollationElementIterator(source);
123 backAndForth(*iter);
124 delete iter;
125 delete c2;
126
127 // Now try both
128 RuleBasedCollator *c3 = NULL;
129 c3 = new RuleBasedCollator((UnicodeString)"&a < b < c/aba < d < z < ch", status);
130
131 if (c3 == NULL || U_FAILURE(status))
132 {
133 errln("Couldn't create a RuleBasedCollator with both an expanding and a contracting sequence.");
134 delete c3;
135 return;
136 }
137
138 source = "abcdbchdc";
139 iter = c3->createCollationElementIterator(source);
140 backAndForth(*iter);
141 delete iter;
142 delete c3;
143
144 status=U_ZERO_ERROR;
145 source= CharsToUnicodeString("\\u0e41\\u0e02\\u0e41\\u0e02\\u0e27abc");
146
147 Collator *c4 = Collator::createInstance(Locale("th", "TH", ""), status);
148 if(U_FAILURE(status)){
149 errln("Couldn't create a collator");
150 }
151 iter = ((RuleBasedCollator*)c4)->createCollationElementIterator(source);
152 backAndForth(*iter);
153 delete iter;
154 delete c4;
155
156 source= CharsToUnicodeString("\\u0061\\u30CF\\u3099\\u30FC");
157 Collator *c5 = Collator::createInstance(Locale("ja", "JP", ""), status);
158
159 iter = ((RuleBasedCollator*)c5)->createCollationElementIterator(source);
160 if(U_FAILURE(status)){
161 errln("Couldn't create Japanese collator\n");
162 }
163 backAndForth(*iter);
164 delete iter;
165 delete c5;
166 }
167
168 /**
169 * Test for getOffset() and setOffset()
170 */
171 void CollationIteratorTest::TestOffset(/* char* par */)
172 {
173 CollationElementIterator *iter = en_us->createCollationElementIterator(test1);
174 UErrorCode status = U_ZERO_ERROR;
175 // testing boundaries
176 iter->setOffset(0, status);
177 if (U_FAILURE(status) || iter->previous(status) != CollationElementIterator::NULLORDER) {
178 errln("Error: After setting offset to 0, we should be at the end "
179 "of the backwards iteration");
180 }
181 iter->setOffset(test1.length(), status);
182 if (U_FAILURE(status) || iter->next(status) != CollationElementIterator::NULLORDER) {
183 errln("Error: After setting offset to end of the string, we should "
184 "be at the end of the backwards iteration");
185 }
186
187 // Run all the way through the iterator, then get the offset
188 int32_t orderLength = 0;
189 Order *orders = getOrders(*iter, orderLength);
190
191 int32_t offset = iter->getOffset();
192
193 if (offset != test1.length())
194 {
195 UnicodeString msg1("offset at end != length: ");
196 UnicodeString msg2(" vs ");
197
198 errln(msg1 + offset + msg2 + test1.length());
199 }
200
201 // Now set the offset back to the beginning and see if it works
202 CollationElementIterator *pristine = en_us->createCollationElementIterator(test1);
203
204 iter->setOffset(0, status);
205
206 if (U_FAILURE(status))
207 {
208 errln("setOffset failed.");
209 }
210 else
211 {
212 assertEqual(*iter, *pristine);
213 }
214
215 delete pristine;
216 delete[] orders;
217 delete iter;
218
219 // setting offset in the middle of a contraction
220 UnicodeString contraction = "change";
221 status = U_ZERO_ERROR;
222 RuleBasedCollator tailored("& a < ch", status);
223 if (U_FAILURE(status)) {
224 errln("Error: in creation of Spanish collator - %s", u_errorName(status));
225 return;
226 }
227 iter = tailored.createCollationElementIterator(contraction);
228 Order *order = getOrders(*iter, orderLength);
229 iter->setOffset(1, status); // sets offset in the middle of ch
230 int32_t order2Length = 0;
231 Order *order2 = getOrders(*iter, order2Length);
232 if (orderLength != order2Length || uprv_memcmp(order, order2, orderLength * sizeof(Order)) != 0) {
233 errln("Error: setting offset in the middle of a contraction should be the same as setting it to the start of the contraction");
234 }
235 delete[] order;
236 delete[] order2;
237 delete iter;
238 contraction = "peache";
239 iter = tailored.createCollationElementIterator(contraction);
240 iter->setOffset(3, status);
241 order = getOrders(*iter, orderLength);
242 iter->setOffset(4, status); // sets offset in the middle of ch
243 order2 = getOrders(*iter, order2Length);
244 if (orderLength != order2Length || uprv_memcmp(order, order2, orderLength * sizeof(Order)) != 0) {
245 errln("Error: setting offset in the middle of a contraction should be the same as setting it to the start of the contraction");
246 }
247 delete[] order;
248 delete[] order2;
249 delete iter;
250 // setting offset in the middle of a surrogate pair
251 UnicodeString surrogate = UNICODE_STRING_SIMPLE("\\ud800\\udc00str").unescape();
252 iter = tailored.createCollationElementIterator(surrogate);
253 order = getOrders(*iter, orderLength);
254 iter->setOffset(1, status); // sets offset in the middle of surrogate
255 order2 = getOrders(*iter, order2Length);
256 if (orderLength != order2Length || uprv_memcmp(order, order2, orderLength * sizeof(Order)) != 0) {
257 errln("Error: setting offset in the middle of a surrogate pair should be the same as setting it to the start of the surrogate pair");
258 }
259 delete[] order;
260 delete[] order2;
261 delete iter;
262 surrogate = UNICODE_STRING_SIMPLE("simple\\ud800\\udc00str").unescape();
263 iter = tailored.createCollationElementIterator(surrogate);
264 iter->setOffset(6, status);
265 order = getOrders(*iter, orderLength);
266 iter->setOffset(7, status); // sets offset in the middle of surrogate
267 order2 = getOrders(*iter, order2Length);
268 if (orderLength != order2Length || uprv_memcmp(order, order2, orderLength * sizeof(Order)) != 0) {
269 errln("Error: setting offset in the middle of a surrogate pair should be the same as setting it to the start of the surrogate pair");
270 }
271 delete[] order;
272 delete[] order2;
273 delete iter;
274 // TODO: try iterating halfway through a messy string.
275 }
276
277 /**
278 * Test for setText()
279 */
280 void CollationIteratorTest::TestSetText(/* char* par */)
281 {
282 CollationElementIterator *iter1 = en_us->createCollationElementIterator(test1);
283 CollationElementIterator *iter2 = en_us->createCollationElementIterator(test2);
284 UErrorCode status = U_ZERO_ERROR;
285
286 // Run through the second iterator just to exercise it
287 int32_t c = iter2->next(status);
288 int32_t i = 0;
289
290 while ( ++i < 10 && c != CollationElementIterator::NULLORDER)
291 {
292 if (U_FAILURE(status))
293 {
294 errln("iter2->next() returned an error.");
295 delete iter2;
296 delete iter1;
297 }
298
299 c = iter2->next(status);
300 }
301
302 // Now set it to point to the same string as the first iterator
303 iter2->setText(test1, status);
304
305 if (U_FAILURE(status))
306 {
307 errln("call to iter2->setText(test1) failed.");
308 }
309 else
310 {
311 assertEqual(*iter1, *iter2);
312 }
313 iter1->reset();
314 //now use the overloaded setText(ChracterIterator&, UErrorCode) function to set the text
315 CharacterIterator* chariter = new StringCharacterIterator(test1);
316 iter2->setText(*chariter, status);
317 if (U_FAILURE(status))
318 {
319 errln("call to iter2->setText(chariter(test1)) failed.");
320 }
321 else
322 {
323 assertEqual(*iter1, *iter2);
324 }
325
326 // test for an empty string
327 UnicodeString empty("");
328 iter1->setText(empty, status);
329 if (U_FAILURE(status)
330 || iter1->next(status) != (int32_t)CollationElementIterator::NULLORDER) {
331 errln("Empty string should have no CEs.");
332 }
333 ((StringCharacterIterator *)chariter)->setText(empty);
334 iter1->setText(*chariter, status);
335 if (U_FAILURE(status)
336 || iter1->next(status) != (int32_t)CollationElementIterator::NULLORDER) {
337 errln("Empty string should have no CEs.");
338 }
339 delete chariter;
340 delete iter2;
341 delete iter1;
342 }
343
344 /** @bug 4108762
345 * Test for getMaxExpansion()
346 */
347 void CollationIteratorTest::TestMaxExpansion(/* char* par */)
348 {
349 UErrorCode status = U_ZERO_ERROR;
350 UnicodeString rule("&a < ab < c/aba < d < z < ch");
351 RuleBasedCollator *coll = new RuleBasedCollator(rule, status);
352 UChar ch = 0;
353 UnicodeString str(ch);
354
355 CollationElementIterator *iter = coll->createCollationElementIterator(str);
356
357 while (ch < 0xFFFF && U_SUCCESS(status)) {
358 int count = 1;
359 uint32_t order;
360 ch ++;
361 UnicodeString str(ch);
362 iter->setText(str, status);
363 order = iter->previous(status);
364
365 /* thai management */
366 if (CollationElementIterator::isIgnorable(order))
367 order = iter->previous(status);
368
369 while (U_SUCCESS(status)
370 && iter->previous(status) != (int32_t)CollationElementIterator::NULLORDER)
371 {
372 count ++;
373 }
374
375 if (U_FAILURE(status) && iter->getMaxExpansion(order) < count) {
376 errln("Failure at codepoint %d, maximum expansion count < %d\n",
377 ch, count);
378 }
379 }
380
381 delete iter;
382 delete coll;
383 }
384
385 /*
386 * @bug 4157299
387 */
388 void CollationIteratorTest::TestClearBuffers(/* char* par */)
389 {
390 UErrorCode status = U_ZERO_ERROR;
391 RuleBasedCollator *c = new RuleBasedCollator((UnicodeString)"&a < b < c & ab = d", status);
392
393 if (c == NULL || U_FAILURE(status))
394 {
395 errln("Couldn't create a RuleBasedCollator.");
396 delete c;
397 return;
398 }
399
400 UnicodeString source("abcd");
401 CollationElementIterator *i = c->createCollationElementIterator(source);
402 int32_t e0 = i->next(status); // save the first collation element
403
404 if (U_FAILURE(status))
405 {
406 errln("call to i->next() failed. err=%s", u_errorName(status));
407 }
408 else
409 {
410 i->setOffset(3, status); // go to the expanding character
411
412 if (U_FAILURE(status))
413 {
414 errln("call to i->setOffset(3) failed. err=%s", u_errorName(status));
415 }
416 else
417 {
418 i->next(status); // but only use up half of it
419
420 if (U_FAILURE(status))
421 {
422 errln("call to i->next() failed. err=%s", u_errorName(status));
423 }
424 else
425 {
426 i->setOffset(0, status); // go back to the beginning
427
428 if (U_FAILURE(status))
429 {
430 errln("call to i->setOffset(0) failed. err=%s", u_errorName(status));
431 }
432 else
433 {
434 int32_t e = i->next(status); // and get this one again
435
436 if (U_FAILURE(status))
437 {
438 errln("call to i->next() failed. err=%s", u_errorName(status));
439 }
440 else if (e != e0)
441 {
442 errln("got 0x%X, expected 0x%X", e, e0);
443 }
444 }
445 }
446 }
447 }
448
449 delete i;
450 delete c;
451 }
452
453 /**
454 * Testing the assignment operator
455 */
456 void CollationIteratorTest::TestAssignment()
457 {
458 UErrorCode status = U_ZERO_ERROR;
459 RuleBasedCollator *coll =
460 (RuleBasedCollator *)Collator::createInstance(status);
461
462 if (coll == NULL || U_FAILURE(status))
463 {
464 errln("Couldn't create a default collator.");
465 return;
466 }
467
468 UnicodeString source("abcd");
469 CollationElementIterator *iter1 =
470 coll->createCollationElementIterator(source);
471
472 CollationElementIterator iter2 = *iter1;
473
474 if (*iter1 != iter2) {
475 errln("Fail collation iterator assignment does not produce the same elements");
476 }
477
478 CollationElementIterator iter3(*iter1);
479
480 if (*iter1 != iter3) {
481 errln("Fail collation iterator copy constructor does not produce the same elements");
482 }
483
484 source = CharsToUnicodeString("a\\u0300\\u0325");
485 coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
486 CollationElementIterator *iter4
487 = coll->createCollationElementIterator(source);
488 CollationElementIterator iter5(*iter4);
489 if (*iter4 != iter5) {
490 errln("collation iterator assignment does not produce the same elements");
491 }
492 iter4->next(status);
493 if (U_FAILURE(status) || *iter4 == iter5) {
494 errln("collation iterator not equal");
495 }
496 iter5.next(status);
497 if (U_FAILURE(status) || *iter4 != iter5) {
498 errln("collation iterator equal");
499 }
500 iter4->next(status);
501 if (U_FAILURE(status) || *iter4 == iter5) {
502 errln("collation iterator not equal");
503 }
504 iter5.next(status);
505 if (U_FAILURE(status) || *iter4 != iter5) {
506 errln("collation iterator equal");
507 }
508 CollationElementIterator iter6(*iter4);
509 if (*iter4 != iter6) {
510 errln("collation iterator equal");
511 }
512 iter4->next(status);
513 if (U_FAILURE(status) || *iter4 == iter5) {
514 errln("collation iterator not equal");
515 }
516 iter5.next(status);
517 if (U_FAILURE(status) || *iter4 != iter5) {
518 errln("collation iterator equal");
519 }
520 iter4->next(status);
521 if (U_FAILURE(status) || *iter4 == iter5) {
522 errln("collation iterator not equal");
523 }
524 iter5.next(status);
525 if (U_FAILURE(status) || *iter4 != iter5) {
526 errln("collation iterator equal");
527 }
528 delete iter1;
529 delete iter4;
530 delete coll;
531 }
532
533 /**
534 * Testing the constructors
535 */
536 void CollationIteratorTest::TestConstructors()
537 {
538 UErrorCode status = U_ZERO_ERROR;
539 RuleBasedCollator *coll =
540 (RuleBasedCollator *)Collator::createInstance(status);
541 if (coll == NULL || U_FAILURE(status))
542 {
543 errln("Couldn't create a default collator.");
544 return;
545 }
546
547 // testing protected constructor with character iterator as argument
548 StringCharacterIterator chariter(test1);
549 CollationElementIterator *iter1 =
550 coll->createCollationElementIterator(chariter);
551 if (U_FAILURE(status)) {
552 errln("Couldn't create collation element iterator with character iterator.");
553 return;
554 }
555 CollationElementIterator *iter2 =
556 coll->createCollationElementIterator(test1);
557
558 // initially the 2 collation element iterators should be the same
559 if (*iter1 != *iter1 || *iter2 != *iter2 || *iter1 != *iter2
560 || *iter2 != *iter1) {
561 errln("CollationElementIterators constructed with the same string data should be the same at the start");
562 }
563 assertEqual(*iter1, *iter2);
564
565 delete iter1;
566 delete iter2;
567
568 // tests empty strings
569 UnicodeString empty("");
570 iter1 = coll->createCollationElementIterator(empty);
571 chariter.setText(empty);
572 iter2 = coll->createCollationElementIterator(chariter);
573 if (*iter1 != *iter1 || *iter2 != *iter2 || *iter1 != *iter2
574 || *iter2 != *iter1) {
575 errln("CollationElementIterators constructed with the same string data should be the same at the start");
576 }
577 if (iter1->next(status) != (int32_t)CollationElementIterator::NULLORDER) {
578 errln("Empty string should have no CEs.");
579 }
580 if (iter2->next(status) != (int32_t)CollationElementIterator::NULLORDER) {
581 errln("Empty string should have no CEs.");
582 }
583 delete iter1;
584 delete iter2;
585 delete coll;
586 }
587
588 /**
589 * Testing the strength order
590 */
591 void CollationIteratorTest::TestStrengthOrder()
592 {
593 int order = 0x0123ABCD;
594
595 UErrorCode status = U_ZERO_ERROR;
596 RuleBasedCollator *coll =
597 (RuleBasedCollator *)Collator::createInstance(status);
598 if (coll == NULL || U_FAILURE(status))
599 {
600 errln("Couldn't create a default collator.");
601 return;
602 }
603
604 coll->setStrength(Collator::PRIMARY);
605 CollationElementIterator *iter =
606 coll->createCollationElementIterator(test1);
607
608 if (iter == NULL) {
609 errln("Couldn't create a collation element iterator from default collator");
610 return;
611 }
612
613 if (iter->strengthOrder(order) != 0x01230000) {
614 errln("Strength order for a primary strength collator should be the first 2 bytes");
615 return;
616 }
617
618 coll->setStrength(Collator::SECONDARY);
619 if (iter->strengthOrder(order) != 0x0123AB00) {
620 errln("Strength order for a secondary strength collator should be the third byte");
621 return;
622 }
623
624 coll->setStrength(Collator::TERTIARY);
625 if (iter->strengthOrder(order) != order) {
626 errln("Strength order for a tertiary strength collator should be the third byte");
627 return;
628 }
629 delete iter;
630 delete coll;
631 }
632
633 /**
634 * Return a string containing all of the collation orders
635 * returned by calls to next on the specified iterator
636 */
637 UnicodeString &CollationIteratorTest::orderString(CollationElementIterator &iter, UnicodeString &target)
638 {
639 int32_t order;
640 UErrorCode status = U_ZERO_ERROR;
641
642 while ((order = iter.next(status)) != CollationElementIterator::NULLORDER)
643 {
644 target += "0x";
645 appendHex(order, 8, target);
646 target += " ";
647 }
648
649 return target;
650 }
651
652 void CollationIteratorTest::assertEqual(CollationElementIterator &i1, CollationElementIterator &i2)
653 {
654 int32_t c1, c2, count = 0;
655 UErrorCode status = U_ZERO_ERROR;
656
657 do
658 {
659 c1 = i1.next(status);
660 c2 = i2.next(status);
661
662 if (c1 != c2)
663 {
664 errln(" %d: strength(0x%X) != strength(0x%X)", count, c1, c2);
665 break;
666 }
667
668 count += 1;
669 }
670 while (c1 != CollationElementIterator::NULLORDER);
671 }
672
673 void CollationIteratorTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* /*par*/)
674 {
675 if (exec)
676 {
677 logln("Collation Iteration Tests: ");
678 }
679
680 if(en_us) {
681 switch (index)
682 {
683 case 0: name = "TestPrevious"; if (exec) TestPrevious(/* par */); break;
684 case 1: name = "TestOffset"; if (exec) TestOffset(/* par */); break;
685 case 2: name = "TestSetText"; if (exec) TestSetText(/* par */); break;
686 case 3: name = "TestMaxExpansion"; if (exec) TestMaxExpansion(/* par */); break;
687 case 4: name = "TestClearBuffers"; if (exec) TestClearBuffers(/* par */); break;
688 case 5: name = "TestUnicodeChar"; if (exec) TestUnicodeChar(/* par */); break;
689 case 6: name = "TestAssignment"; if (exec) TestAssignment(/* par */); break;
690 case 7: name = "TestConstructors"; if (exec) TestConstructors(/* par */); break;
691 case 8: name = "TestStrengthOrder"; if (exec) TestStrengthOrder(/* par */); break;
692 default: name = ""; break;
693 }
694 } else {
695 dataerrln("Class iterator not instantiated");
696 name = "";
697 }
698 }
699
700 #endif /* #if !UCONFIG_NO_COLLATION */