]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/intltest/regcoll.cpp
ICU-64252.0.1.tar.gz
[apple/icu.git] / icuSources / test / intltest / regcoll.cpp
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
b75a7d8f 3/********************************************************************
729e4ab9 4 * COPYRIGHT:
2ca993e8 5 * Copyright (c) 1997-2016, International Business Machines Corporation and
b75a7d8f
A
6 * others. All Rights Reserved.
7 ********************************************************************/
8
9#include "unicode/utypes.h"
10
11#if !UCONFIG_NO_COLLATION
12
13#include "unicode/coll.h"
57a6839d 14#include "unicode/localpointer.h"
b75a7d8f
A
15#include "unicode/tblcoll.h"
16#include "unicode/unistr.h"
17#include "unicode/sortkey.h"
18#include "regcoll.h"
19#include "sfwdchit.h"
729e4ab9
A
20#include "testutil.h"
21#include "cmemory.h"
b75a7d8f 22
b75a7d8f
A
23CollationRegressionTest::CollationRegressionTest()
24{
25 UErrorCode status = U_ZERO_ERROR;
26
27 en_us = (RuleBasedCollator *)Collator::createInstance(Locale::getUS(), status);
28 if(U_FAILURE(status)) {
29 delete en_us;
30 en_us = 0;
729e4ab9 31 errcheckln(status, "Collator creation failed with %s", u_errorName(status));
b75a7d8f
A
32 return;
33 }
34}
35
36CollationRegressionTest::~CollationRegressionTest()
37{
38 delete en_us;
39}
40
41
42 // @bug 4048446
43//
44// CollationElementIterator.reset() doesn't work
45//
46void CollationRegressionTest::Test4048446(/* char* par */)
47{
48 const UnicodeString test1 = "XFILE What subset of all possible test cases has the highest probability of detecting the most errors?";
49 const UnicodeString test2 = "Xf_ile What subset of all possible test cases has the lowest probability of detecting the least errors?";
50 CollationElementIterator *i1 = en_us->createCollationElementIterator(test1);
51 CollationElementIterator *i2 = en_us->createCollationElementIterator(test1);
52 UErrorCode status = U_ZERO_ERROR;
53
54 if (i1 == NULL|| i2 == NULL)
55 {
56 errln("Could not create CollationElementIterator's");
57 delete i1;
58 delete i2;
59 return;
60 }
61
62 while (i1->next(status) != CollationElementIterator::NULLORDER)
63 {
64 if (U_FAILURE(status))
65 {
66 errln("error calling next()");
67
68 delete i1;
69 delete i2;
70 return;
71 }
72 }
73
74 i1->reset();
75
76 assertEqual(*i1, *i2);
77
78 delete i1;
79 delete i2;
80}
81
82// @bug 4051866
83//
84// Collator -> rules -> Collator round-trip broken for expanding characters
85//
86void CollationRegressionTest::Test4051866(/* char* par */)
87{
b75a7d8f
A
88 UnicodeString rules;
89 UErrorCode status = U_ZERO_ERROR;
90
57a6839d 91 rules += "&n < o ";
b75a7d8f
A
92 rules += "& oe ,o";
93 rules += (UChar)0x3080;
94 rules += "& oe ,";
95 rules += (UChar)0x1530;
96 rules += " ,O";
97 rules += "& OE ,O";
98 rules += (UChar)0x3080;
99 rules += "& OE ,";
100 rules += (UChar)0x1520;
101 rules += "< p ,P";
102
103 // Build a collator containing expanding characters
b331163b
A
104 LocalPointer<RuleBasedCollator> c1(new RuleBasedCollator(rules, status), status);
105 if (U_FAILURE(status)) {
106 errln("RuleBasedCollator(rule string) failed - %s", u_errorName(status));
107 return;
108 }
b75a7d8f
A
109
110 // Build another using the rules from the first
b331163b 111 LocalPointer<RuleBasedCollator> c2(new RuleBasedCollator(c1->getRules(), status), status);
57a6839d 112 if (U_FAILURE(status)) {
b331163b 113 errln("RuleBasedCollator(rule string from other RBC) failed - %s", u_errorName(status));
57a6839d
A
114 return;
115 }
b75a7d8f
A
116
117 // Make sure they're the same
118 if (!(c1->getRules() == c2->getRules()))
119 {
120 errln("Rules are not equal");
121 }
b75a7d8f
A
122}
123
124// @bug 4053636
125//
126// Collator thinks "black-bird" == "black"
127//
128void CollationRegressionTest::Test4053636(/* char* par */)
129{
130 if (en_us->equals("black_bird", "black"))
131 {
132 errln("black-bird == black");
133 }
134}
135
136// @bug 4054238
137//
138// CollationElementIterator will not work correctly if the associated
139// Collator object's mode is changed
140//
141void CollationRegressionTest::Test4054238(/* char* par */)
142{
143 const UChar chars3[] = {0x61, 0x00FC, 0x62, 0x65, 0x63, 0x6b, 0x20, 0x47, 0x72, 0x00F6, 0x00DF, 0x65, 0x20, 0x4c, 0x00FC, 0x62, 0x63, 0x6b, 0};
144 const UnicodeString test3(chars3);
145 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
146
147 // NOTE: The Java code uses en_us to create the CollationElementIterators
148 // but I'm pretty sure that's wrong, so I've changed this to use c.
149 UErrorCode status = U_ZERO_ERROR;
150 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
151 CollationElementIterator *i1 = c->createCollationElementIterator(test3);
152 delete i1;
153 delete c;
154}
155
156// @bug 4054734
157//
158// Collator::IDENTICAL documented but not implemented
159//
160void CollationRegressionTest::Test4054734(/* char* par */)
161{
162 /*
163 Here's the original Java:
164
165 String[] decomp = {
166 "\u0001", "<", "\u0002",
167 "\u0001", "=", "\u0001",
168 "A\u0001", ">", "~\u0002", // Ensure A and ~ are not compared bitwise
169 "\u00C0", "=", "A\u0300" // Decomp should make these equal
170 };
729e4ab9 171
b75a7d8f
A
172 String[] nodecomp = {
173 "\u00C0", ">", "A\u0300" // A-grave vs. A combining-grave
174 };
175 */
176
177 static const UChar decomp[][CollationRegressionTest::MAX_TOKEN_LEN] =
178 {
179 {0x0001, 0}, {0x3c, 0}, {0x0002, 0},
180 {0x0001, 0}, {0x3d, 0}, {0x0001, 0},
181 {0x41, 0x0001, 0}, {0x3e, 0}, {0x7e, 0x0002, 0},
182 {0x00c0, 0}, {0x3d, 0}, {0x41, 0x0300, 0}
183 };
184
729e4ab9 185
b75a7d8f
A
186 UErrorCode status = U_ZERO_ERROR;
187 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
188
189 c->setStrength(Collator::IDENTICAL);
190
191 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
2ca993e8 192 compareArray(*c, decomp, UPRV_LENGTHOF(decomp));
b75a7d8f
A
193
194 delete c;
195}
196
197// @bug 4054736
198//
199// Full Decomposition mode not implemented
200//
201void CollationRegressionTest::Test4054736(/* char* par */)
202{
203 UErrorCode status = U_ZERO_ERROR;
204 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
205
206 c->setStrength(Collator::SECONDARY);
207 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
208
209 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
210 {
211 {0xFB4F, 0}, {0x3d, 0}, {0x05D0, 0x05DC} // Alef-Lamed vs. Alef, Lamed
212 };
213
2ca993e8 214 compareArray(*c, tests, UPRV_LENGTHOF(tests));
b75a7d8f
A
215
216 delete c;
217}
218
219// @bug 4058613
220//
729e4ab9 221// Collator::createInstance() causes an ArrayIndexOutofBoundsException for Korean
b75a7d8f
A
222//
223void CollationRegressionTest::Test4058613(/* char* par */)
224{
225 // Creating a default collator doesn't work when Korean is the default
226 // locale
729e4ab9 227
b75a7d8f
A
228 Locale oldDefault = Locale::getDefault();
229 UErrorCode status = U_ZERO_ERROR;
729e4ab9 230
b75a7d8f
A
231 Locale::setDefault(Locale::getKorean(), status);
232
233 if (U_FAILURE(status))
234 {
235 errln("Could not set default locale to Locale::KOREAN");
236 return;
237 }
238
239 Collator *c = NULL;
729e4ab9 240
b75a7d8f
A
241 c = Collator::createInstance("en_US", status);
242
243 if (c == NULL || U_FAILURE(status))
244 {
245 errln("Could not create a Korean collator");
246 Locale::setDefault(oldDefault, status);
247 delete c;
248 return;
249 }
729e4ab9 250
b75a7d8f
A
251 // Since the fix to this bug was to turn off decomposition for Korean collators,
252 // ensure that's what we got
253 if (c->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_OFF)
254 {
255 errln("Decomposition is not set to NO_DECOMPOSITION for Korean collator");
256 }
257
258 delete c;
259
260 Locale::setDefault(oldDefault, status);
261}
262
263// @bug 4059820
264//
265// RuleBasedCollator.getRules does not return the exact pattern as input
266// for expanding character sequences
267//
268void CollationRegressionTest::Test4059820(/* char* par */)
269{
270 UErrorCode status = U_ZERO_ERROR;
271
272 RuleBasedCollator *c = NULL;
57a6839d 273 UnicodeString rules = "&9 < a < b , c/a < d < z";
729e4ab9 274
b75a7d8f
A
275 c = new RuleBasedCollator(rules, status);
276
277 if (c == NULL || U_FAILURE(status))
278 {
279 errln("Failure building a collator.");
280 delete c;
281 return;
282 }
283
284 if ( c->getRules().indexOf("c/a") == -1)
285 {
286 errln("returned rules do not contain 'c/a'");
287 }
288
289 delete c;
290}
291
292// @bug 4060154
293//
294// MergeCollation::fixEntry broken for "& H < \u0131, \u0130, i, I"
295//
296void CollationRegressionTest::Test4060154(/* char* par */)
297{
298 UErrorCode status = U_ZERO_ERROR;
299 UnicodeString rules;
300
57a6839d 301 rules += "&f < g, G < h, H < i, I < j, J";
b75a7d8f
A
302 rules += " & H < ";
303 rules += (UChar)0x0131;
304 rules += ", ";
305 rules += (UChar)0x0130;
306 rules += ", i, I";
307
308 RuleBasedCollator *c = NULL;
729e4ab9 309
b75a7d8f
A
310 c = new RuleBasedCollator(rules, status);
311
312 if (c == NULL || U_FAILURE(status))
313 {
314 errln("failure building collator.");
315 delete c;
316 return;
317 }
318
319 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
320
321 /*
322 String[] tertiary = {
323 "A", "<", "B",
324 "H", "<", "\u0131",
325 "H", "<", "I",
326 "\u0131", "<", "\u0130",
327 "\u0130", "<", "i",
328 "\u0130", ">", "H",
329 };
330*/
331
332 static const UChar tertiary[][CollationRegressionTest::MAX_TOKEN_LEN] =
333 {
334 {0x41, 0}, {0x3c, 0}, {0x42, 0},
335 {0x48, 0}, {0x3c, 0}, {0x0131, 0},
336 {0x48, 0}, {0x3c, 0}, {0x49, 0},
337 {0x0131, 0}, {0x3c, 0}, {0x0130, 0},
338 {0x0130, 0}, {0x3c, 0}, {0x69, 0},
339 {0x0130, 0}, {0x3e, 0}, {0x48, 0}
340 };
341
342 c->setStrength(Collator::TERTIARY);
2ca993e8 343 compareArray(*c, tertiary, UPRV_LENGTHOF(tertiary));
b75a7d8f
A
344
345 /*
346 String[] secondary = {
347 "H", "<", "I",
348 "\u0131", "=", "\u0130",
349 };
350*/
351 static const UChar secondary[][CollationRegressionTest::MAX_TOKEN_LEN] =
352 {
353 {0x48, 0}, {0x3c, 0}, {0x49, 0},
354 {0x0131, 0}, {0x3d, 0}, {0x0130, 0}
355 };
356
357 c->setStrength(Collator::PRIMARY);
2ca993e8 358 compareArray(*c, secondary, UPRV_LENGTHOF(secondary));
b75a7d8f
A
359
360 delete c;
73c04bcf 361}
b75a7d8f
A
362
363// @bug 4062418
364//
365// Secondary/Tertiary comparison incorrect in French Secondary
366//
367void CollationRegressionTest::Test4062418(/* char* par */)
368{
369 UErrorCode status = U_ZERO_ERROR;
370
371 RuleBasedCollator *c = NULL;
729e4ab9
A
372
373 c = (RuleBasedCollator *) Collator::createInstance(Locale::getCanadaFrench(), status);
b75a7d8f
A
374
375 if (c == NULL || U_FAILURE(status))
376 {
729e4ab9 377 errln("Failed to create collator for Locale::getCanadaFrench()");
b75a7d8f
A
378 delete c;
379 return;
380 }
381
382 c->setStrength(Collator::SECONDARY);
383
384/*
385 String[] tests = {
386 "p\u00eache", "<", "p\u00e9ch\u00e9", // Comparing accents from end, p\u00e9ch\u00e9 is greater
387 };
388*/
389 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
390 {
391 {0x70, 0x00EA, 0x63, 0x68, 0x65, 0}, {0x3c, 0}, {0x70, 0x00E9, 0x63, 0x68, 0x00E9, 0}
392 };
393
2ca993e8 394 compareArray(*c, tests, UPRV_LENGTHOF(tests));
b75a7d8f
A
395
396 delete c;
397}
398
399// @bug 4065540
400//
401// Collator::compare() method broken if either string contains spaces
402//
403void CollationRegressionTest::Test4065540(/* char* par */)
404{
405 if (en_us->compare("abcd e", "abcd f") == 0)
406 {
407 errln("'abcd e' == 'abcd f'");
408 }
409}
410
411// @bug 4066189
412//
413// Unicode characters need to be recursively decomposed to get the
414// correct result. For example,
415// u1EB1 -> \u0103 + \u0300 -> a + \u0306 + \u0300.
416//
417void CollationRegressionTest::Test4066189(/* char* par */)
418{
419 static const UChar chars1[] = {0x1EB1, 0};
420 static const UChar chars2[] = {0x61, 0x0306, 0x0300, 0};
421 const UnicodeString test1(chars1);
422 const UnicodeString test2(chars2);
423 UErrorCode status = U_ZERO_ERROR;
424
425 // NOTE: The java code used en_us to create the
426 // CollationElementIterator's. I'm pretty sure that
427 // was wrong, so I've change the code to use c1 and c2
428 RuleBasedCollator *c1 = (RuleBasedCollator *) en_us->clone();
429 c1->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
430 CollationElementIterator *i1 = c1->createCollationElementIterator(test1);
431
432 RuleBasedCollator *c2 = (RuleBasedCollator *) en_us->clone();
433 c2->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status);
434 CollationElementIterator *i2 = c2->createCollationElementIterator(test2);
435
436 assertEqual(*i1, *i2);
437
438 delete i2;
439 delete c2;
440 delete i1;
441 delete c1;
442}
443
444// @bug 4066696
445//
446// French secondary collation checking at the end of compare iteration fails
447//
448void CollationRegressionTest::Test4066696(/* char* par */)
449{
450 UErrorCode status = U_ZERO_ERROR;
451 RuleBasedCollator *c = NULL;
729e4ab9
A
452
453 c = (RuleBasedCollator *)Collator::createInstance(Locale::getCanadaFrench(), status);
b75a7d8f
A
454
455 if (c == NULL || U_FAILURE(status))
456 {
729e4ab9 457 errln("Failure creating collator for Locale::getCanadaFrench()");
b75a7d8f
A
458 delete c;
459 return;
460 }
461
462 c->setStrength(Collator::SECONDARY);
463
464/*
465 String[] tests = {
466 "\u00e0", "<", "\u01fa", // a-grave < A-ring-acute
467 };
468
469 should be:
470
471 String[] tests = {
472 "\u00e0", ">", "\u01fa", // a-grave < A-ring-acute
473 };
474
475*/
476
477 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
478 {
479 {0x00E0, 0}, {0x3e, 0}, {0x01FA, 0}
480 };
481
2ca993e8 482 compareArray(*c, tests, UPRV_LENGTHOF(tests));
b75a7d8f
A
483
484 delete c;
485}
486
487// @bug 4076676
488//
489// Bad canonicalization of same-class combining characters
490//
491void CollationRegressionTest::Test4076676(/* char* par */)
492{
493 // These combining characters are all in the same class, so they should not
494 // be reordered, and they should compare as unequal.
495 static const UChar s1[] = {0x41, 0x0301, 0x0302, 0x0300, 0};
496 static const UChar s2[] = {0x41, 0x0302, 0x0300, 0x0301, 0};
497
498 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
499 c->setStrength(Collator::TERTIARY);
500
501 if (c->compare(s1,s2) == 0)
502 {
503 errln("Same-class combining chars were reordered");
504 }
505
506 delete c;
507}
508
509// @bug 4079231
510//
511// RuleBasedCollator::operator==(NULL) throws NullPointerException
512//
513void CollationRegressionTest::Test4079231(/* char* par */)
514{
515 // I don't think there's any way to write this test
516 // in C++. The following is equivalent to the Java,
517 // but doesn't compile 'cause NULL can't be converted
518 // to Collator&
519 //
520 // if (en_us->operator==(NULL))
521 // {
522 // errln("en_us->operator==(NULL) returned TRUE");
523 // }
524
525 /*
526 try {
527 if (en_us->equals(null)) {
528 errln("en_us->equals(null) returned true");
529 }
530 }
531 catch (Exception e) {
532 errln("en_us->equals(null) threw " + e.toString());
533 }
534*/
535}
536
537// @bug 4078588
538//
539// RuleBasedCollator breaks on "< a < bb" rule
540//
541void CollationRegressionTest::Test4078588(/* char *par */)
542{
543 UErrorCode status = U_ZERO_ERROR;
57a6839d 544 RuleBasedCollator *rbc = new RuleBasedCollator("&9 < a < bb", status);
b75a7d8f
A
545
546 if (rbc == NULL || U_FAILURE(status))
547 {
548 errln("Failed to create RuleBasedCollator.");
549 delete rbc;
550 return;
551 }
552
553 Collator::EComparisonResult result = rbc->compare("a","bb");
554
555 if (result != Collator::LESS)
556 {
557 errln((UnicodeString)"Compare(a,bb) returned " + (int)result
558 + (UnicodeString)"; expected -1");
559 }
560
561 delete rbc;
562}
563
564// @bug 4081866
565//
566// Combining characters in different classes not reordered properly.
567//
568void CollationRegressionTest::Test4081866(/* char* par */)
569{
570 // These combining characters are all in different classes,
571 // so they should be reordered and the strings should compare as equal.
572 static const UChar s1[] = {0x41, 0x0300, 0x0316, 0x0327, 0x0315, 0};
573 static const UChar s2[] = {0x41, 0x0327, 0x0316, 0x0315, 0x0300, 0};
574
575 UErrorCode status = U_ZERO_ERROR;
576 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
577 c->setStrength(Collator::TERTIARY);
729e4ab9 578
b75a7d8f
A
579 // Now that the default collators are set to NO_DECOMPOSITION
580 // (as a result of fixing bug 4114077), we must set it explicitly
581 // when we're testing reordering behavior. -- lwerner, 5/5/98
582 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
583
584 if (c->compare(s1,s2) != 0)
585 {
586 errln("Combining chars were not reordered");
587 }
588
589 delete c;
590}
591
592// @bug 4087241
593//
594// string comparison errors in Scandinavian collators
595//
596void CollationRegressionTest::Test4087241(/* char* par */)
597{
598 UErrorCode status = U_ZERO_ERROR;
599 Locale da_DK("da", "DK");
600 RuleBasedCollator *c = NULL;
729e4ab9 601
b75a7d8f
A
602 c = (RuleBasedCollator *) Collator::createInstance(da_DK, status);
603
604 if (c == NULL || U_FAILURE(status))
605 {
606 errln("Failed to create collator for da_DK locale");
607 delete c;
608 return;
609 }
610
611 c->setStrength(Collator::SECONDARY);
612
613 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
614 {
615 {0x7a, 0}, {0x3c, 0}, {0x00E6, 0}, // z < ae
57a6839d 616 {0x61, 0x0308, 0}, {0x3c, 0}, {0x61, 0x030A, 0}, // a-umlaut < a-ring
b75a7d8f
A
617 {0x59, 0}, {0x3c, 0}, {0x75, 0x0308, 0}, // Y < u-umlaut
618 };
619
2ca993e8 620 compareArray(*c, tests, UPRV_LENGTHOF(tests));
b75a7d8f
A
621
622 delete c;
623}
624
625// @bug 4087243
626//
627// CollationKey takes ignorable strings into account when it shouldn't
628//
629void CollationRegressionTest::Test4087243(/* char* par */)
630{
631 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
632 c->setStrength(Collator::TERTIARY);
633
634 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
635 {
636 {0x31, 0x32, 0x33, 0}, {0x3d, 0}, {0x31, 0x32, 0x33, 0x0001, 0} // 1 2 3 = 1 2 3 ctrl-A
637 };
638
2ca993e8 639 compareArray(*c, tests, UPRV_LENGTHOF(tests));
b75a7d8f
A
640
641 delete c;
642}
643
644// @bug 4092260
645//
646// Mu/micro conflict
647// Micro symbol and greek lowercase letter Mu should sort identically
648//
649void CollationRegressionTest::Test4092260(/* char* par */)
650{
651 UErrorCode status = U_ZERO_ERROR;
652 Locale el("el", "");
653 Collator *c = NULL;
729e4ab9 654
b75a7d8f
A
655 c = Collator::createInstance(el, status);
656
657 if (c == NULL || U_FAILURE(status))
658 {
659 errln("Failed to create collator for el locale.");
660 delete c;
661 return;
662 }
663
664 // These now have tertiary differences in UCA
665 c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status);
666
667 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
668 {
669 {0x00B5, 0}, {0x3d, 0}, {0x03BC, 0}
670 };
671
2ca993e8 672 compareArray(*c, tests, UPRV_LENGTHOF(tests));
b75a7d8f
A
673
674 delete c;
675}
676
677// @bug 4095316
678//
679void CollationRegressionTest::Test4095316(/* char* par */)
680{
681 UErrorCode status = U_ZERO_ERROR;
682 Locale el_GR("el", "GR");
683 Collator *c = Collator::createInstance(el_GR, status);
684
685 if (c == NULL || U_FAILURE(status))
686 {
687 errln("Failed to create collator for el_GR locale");
688 delete c;
689 return;
690 }
691 // These now have tertiary differences in UCA
692 //c->setStrength(Collator::TERTIARY);
693 c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status);
694
695 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
696 {
697 {0x03D4, 0}, {0x3d, 0}, {0x03AB, 0}
698 };
699
2ca993e8 700 compareArray(*c, tests, UPRV_LENGTHOF(tests));
b75a7d8f
A
701
702 delete c;
703}
704
705// @bug 4101940
706//
707void CollationRegressionTest::Test4101940(/* char* par */)
708{
709 UErrorCode status = U_ZERO_ERROR;
710 RuleBasedCollator *c = NULL;
57a6839d 711 UnicodeString rules = "&9 < a < b";
b75a7d8f 712 UnicodeString nothing = "";
729e4ab9 713
b75a7d8f
A
714 c = new RuleBasedCollator(rules, status);
715
716 if (c == NULL || U_FAILURE(status))
717 {
718 errln("Failed to create RuleBasedCollator");
719 delete c;
720 return;
721 }
722
723 CollationElementIterator *i = c->createCollationElementIterator(nothing);
724 i->reset();
725
726 if (i->next(status) != CollationElementIterator::NULLORDER)
727 {
728 errln("next did not return NULLORDER");
729 }
730
731 delete i;
732 delete c;
733}
734
735// @bug 4103436
736//
737// Collator::compare not handling spaces properly
738//
739void CollationRegressionTest::Test4103436(/* char* par */)
740{
741 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
742 c->setStrength(Collator::TERTIARY);
743
744 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
745 {
746 {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x20, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0},
747 {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0}
748 };
749
2ca993e8 750 compareArray(*c, tests, UPRV_LENGTHOF(tests));
b75a7d8f
A
751
752 delete c;
753}
754
755// @bug 4114076
756//
757// Collation not Unicode conformant with Hangul syllables
758//
759void CollationRegressionTest::Test4114076(/* char* par */)
760{
761 UErrorCode status = U_ZERO_ERROR;
762 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
763 c->setStrength(Collator::TERTIARY);
764
765 //
766 // With Canonical decomposition, Hangul syllables should get decomposed
767 // into Jamo, but Jamo characters should not be decomposed into
768 // conjoining Jamo
769 //
770 static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
771 {
772 {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x1171, 0x11b6, 0}
773 };
774
775 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
2ca993e8 776 compareArray(*c, test1, UPRV_LENGTHOF(test1));
b75a7d8f
A
777
778 // From UTR #15:
779 // *In earlier versions of Unicode, jamo characters like ksf
729e4ab9 780 // had compatibility mappings to kf + sf. These mappings were
b75a7d8f
A
781 // removed in Unicode 2.1.9 to ensure that Hangul syllables are maintained.)
782 // That is, the following test is obsolete as of 2.1.9
783
784//obsolete- // With Full decomposition, it should go all the way down to
785//obsolete- // conjoining Jamo characters.
786//obsolete- //
787//obsolete- static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] =
788//obsolete- {
789//obsolete- {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x116e, 0x1175, 0x11af, 0x11c2, 0}
790//obsolete- };
791//obsolete-
792//obsolete- c->setDecomposition(Normalizer::DECOMP_COMPAT);
2ca993e8 793//obsolete- compareArray(*c, test2, UPRV_LENGTHOF(test2));
b75a7d8f
A
794
795 delete c;
796}
797
798
799// @bug 4124632
800//
801// Collator::getCollationKey was hanging on certain character sequences
802//
803void CollationRegressionTest::Test4124632(/* char* par */)
804{
805 UErrorCode status = U_ZERO_ERROR;
806 Collator *coll = NULL;
729e4ab9 807
b75a7d8f 808 coll = Collator::createInstance(Locale::getJapan(), status);
729e4ab9 809
b75a7d8f
A
810 if (coll == NULL || U_FAILURE(status))
811 {
812 errln("Failed to create collator for Locale::JAPAN");
813 delete coll;
46f4442e 814 return;
b75a7d8f
A
815 }
816
817 static const UChar test[] = {0x41, 0x0308, 0x62, 0x63, 0};
818 CollationKey key;
729e4ab9 819
b75a7d8f
A
820 coll->getCollationKey(test, key, status);
821
822 if (key.isBogus() || U_FAILURE(status))
823 {
824 errln("CollationKey creation failed.");
825 }
826
827 delete coll;
828}
829
830// @bug 4132736
831//
832// sort order of french words with multiple accents has errors
833//
834void CollationRegressionTest::Test4132736(/* char* par */)
835{
836 UErrorCode status = U_ZERO_ERROR;
837
838 Collator *c = NULL;
729e4ab9
A
839
840 c = Collator::createInstance(Locale::getCanadaFrench(), status);
b75a7d8f 841 c->setStrength(Collator::TERTIARY);
729e4ab9 842
b75a7d8f
A
843 if (c == NULL || U_FAILURE(status))
844 {
729e4ab9 845 errln("Failed to create a collator for Locale::getCanadaFrench()");
b75a7d8f 846 delete c;
46f4442e 847 return;
b75a7d8f
A
848 }
849
850 static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
851 {
852 {0x65, 0x0300, 0x65, 0x0301, 0}, {0x3c, 0}, {0x65, 0x0301, 0x65, 0x0300, 0},
853 {0x65, 0x0300, 0x0301, 0}, {0x3c, 0}, {0x65, 0x0301, 0x0300, 0}
854 };
855
2ca993e8 856 compareArray(*c, test1, UPRV_LENGTHOF(test1));
b75a7d8f
A
857
858 delete c;
859}
860
861// @bug 4133509
862//
863// The sorting using java.text.CollationKey is not in the exact order
864//
865void CollationRegressionTest::Test4133509(/* char* par */)
866{
867 static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
868 {
869 {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0}, {0x3c, 0}, {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x6e, 0x49, 0x6e, 0x69, 0x74, 0x69, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x72, 0x45, 0x72, 0x72, 0x6f, 0x72, 0},
870 {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0}, {0x3c, 0}, {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0x45, 0x6e, 0x76, 0x69, 0x72, 0x6f, 0x6e, 0x6d, 0x65, 0x6e, 0x74, 0},
871 {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0}, {0x3c, 0}, {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x42, 0x75, 0x66, 0x66, 0x65, 0x72, 0}
872 };
873
2ca993e8 874 compareArray(*en_us, test1, UPRV_LENGTHOF(test1));
b75a7d8f
A
875}
876
877// @bug 4114077
878//
729e4ab9 879// Collation with decomposition off doesn't work for Europe
b75a7d8f
A
880//
881void CollationRegressionTest::Test4114077(/* char* par */)
882{
883 // Ensure that we get the same results with decomposition off
884 // as we do with it on....
729e4ab9 885
b75a7d8f
A
886 UErrorCode status = U_ZERO_ERROR;
887 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
888 c->setStrength(Collator::TERTIARY);
729e4ab9 889
b75a7d8f
A
890 static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
891 {
892 {0x00C0, 0}, {0x3d, 0}, {0x41, 0x0300, 0}, // Should be equivalent
893 {0x70, 0x00ea, 0x63, 0x68, 0x65, 0}, {0x3e, 0}, {0x70, 0x00e9, 0x63, 0x68, 0x00e9, 0},
894 {0x0204, 0}, {0x3d, 0}, {0x45, 0x030F, 0},
895 {0x01fa, 0}, {0x3d, 0}, {0x41, 0x030a, 0x0301, 0}, // a-ring-acute -> a-ring, acute
896 // -> a, ring, acute
897 {0x41, 0x0300, 0x0316, 0}, {0x3c, 0}, {0x41, 0x0316, 0x0300, 0} // No reordering --> unequal
898 };
899
900 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status);
2ca993e8 901 compareArray(*c, test1, UPRV_LENGTHOF(test1));
b75a7d8f
A
902
903 static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] =
904 {
905 {0x41, 0x0300, 0x0316, 0}, {0x3d, 0}, {0x41, 0x0316, 0x0300, 0} // Reordering --> equal
906 };
907
908 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
2ca993e8 909 compareArray(*c, test2, UPRV_LENGTHOF(test2));
b75a7d8f
A
910
911 delete c;
912}
913
914// @bug 4141640
915//
729e4ab9 916// Support for Swedish gone in 1.1.6 (Can't create Swedish collator)
b75a7d8f
A
917//
918void CollationRegressionTest::Test4141640(/* char* par */)
919{
920 //
921 // Rather than just creating a Swedish collator, we might as well
922 // try to instantiate one for every locale available on the system
923 // in order to prevent this sort of bug from cropping up in the future
924 //
925 UErrorCode status = U_ZERO_ERROR;
926 int32_t i, localeCount;
927 const Locale *locales = Locale::getAvailableLocales(localeCount);
729e4ab9 928
b75a7d8f
A
929 for (i = 0; i < localeCount; i += 1)
930 {
931 Collator *c = NULL;
932
933 status = U_ZERO_ERROR;
934 c = Collator::createInstance(locales[i], status);
935
936 if (c == NULL || U_FAILURE(status))
937 {
938 UnicodeString msg, localeName;
939
940 msg += "Could not create collator for locale ";
941 msg += locales[i].getName();
942
943 errln(msg);
944 }
945
946 delete c;
947 }
948}
949
950// @bug 4139572
951//
729e4ab9 952// getCollationKey throws exception for spanish text
b75a7d8f
A
953// Cannot reproduce this bug on 1.2, however it DOES fail on 1.1.6
954//
955void CollationRegressionTest::Test4139572(/* char* par */)
956{
957 //
958 // Code pasted straight from the bug report
959 // (and then translated to C++ ;-)
960 //
961 // create spanish locale and collator
962 UErrorCode status = U_ZERO_ERROR;
963 Locale l("es", "es");
964 Collator *col = NULL;
729e4ab9 965
b75a7d8f
A
966 col = Collator::createInstance(l, status);
967
968 if (col == NULL || U_FAILURE(status))
969 {
970 errln("Failed to create a collator for es_es locale.");
971 delete col;
972 return;
973 }
974
975 CollationKey key;
976
977 // this spanish phrase kills it!
978 col->getCollationKey("Nombre De Objeto", key, status);
979
980 if (key.isBogus() || U_FAILURE(status))
981 {
982 errln("Error creating CollationKey for \"Nombre De Ojbeto\"");
983 }
984
985 delete col;
986}
987/* HSYS : RuleBasedCollator::compare() performance enhancements
988 compare() does not create CollationElementIterator() anymore.*/
729e4ab9 989
b75a7d8f
A
990class My4146160Collator : public RuleBasedCollator
991{
992public:
993 My4146160Collator(RuleBasedCollator &rbc, UErrorCode &status);
994 ~My4146160Collator();
995
996 CollationElementIterator *createCollationElementIterator(const UnicodeString &text) const;
997
998 CollationElementIterator *createCollationElementIterator(const CharacterIterator &text) const;
729e4ab9 999
b75a7d8f
A
1000 static int32_t count;
1001};
1002
1003int32_t My4146160Collator::count = 0;
1004
1005My4146160Collator::My4146160Collator(RuleBasedCollator &rbc, UErrorCode &status)
1006 : RuleBasedCollator(rbc.getRules(), status)
1007{
1008}
1009
1010My4146160Collator::~My4146160Collator()
1011{
1012}
1013
1014CollationElementIterator *My4146160Collator::createCollationElementIterator(const UnicodeString &text) const
1015{
1016 count += 1;
1017 return RuleBasedCollator::createCollationElementIterator(text);
1018}
1019
1020CollationElementIterator *My4146160Collator::createCollationElementIterator(const CharacterIterator &text) const
1021{
1022 count += 1;
1023 return RuleBasedCollator::createCollationElementIterator(text);
1024}
1025
1026// @bug 4146160
1027//
1028// RuleBasedCollator doesn't use createCollationElementIterator internally
1029//
1030void CollationRegressionTest::Test4146160(/* char* par */)
1031{
1032#if 0
1033 //
1034 // Use a custom collator class whose createCollationElementIterator
1035 // methods increment a count....
1036 //
1037 UErrorCode status = U_ZERO_ERROR;
1038 CollationKey key;
1039
1040 My4146160Collator::count = 0;
1041 My4146160Collator *mc = NULL;
729e4ab9 1042
b75a7d8f
A
1043 mc = new My4146160Collator(*en_us, status);
1044
1045 if (mc == NULL || U_FAILURE(status))
1046 {
1047 errln("Failed to create a My4146160Collator.");
1048 delete mc;
1049 return;
1050 }
1051
1052 mc->getCollationKey("1", key, status);
1053
1054 if (key.isBogus() || U_FAILURE(status))
1055 {
1056 errln("Failure to get a CollationKey from a My4146160Collator.");
1057 delete mc;
1058 return;
1059 }
1060
1061 if (My4146160Collator::count < 1)
1062 {
1063 errln("My4146160Collator::createCollationElementIterator not called for getCollationKey");
1064 }
1065
1066 My4146160Collator::count = 0;
1067 mc->compare("1", "2");
1068
1069 if (My4146160Collator::count < 1)
1070 {
1071 errln("My4146160Collator::createtCollationElementIterator not called for compare");
1072 }
1073
1074 delete mc;
1075#endif
1076}
729e4ab9 1077
57a6839d
A
1078void CollationRegressionTest::Test4179216() {
1079 // you can position a CollationElementIterator in the middle of
1080 // a contracting character sequence, yielding a bogus collation
1081 // element
1082 IcuTestErrorCode errorCode(*this, "Test4179216");
1083 RuleBasedCollator coll(en_us->getRules() + " & C < ch , cH , Ch , CH < cat < crunchy", errorCode);
1084 UnicodeString testText = "church church catcatcher runcrunchynchy";
1085 CollationElementIterator *iter = coll.createCollationElementIterator(testText);
1086
1087 // test that the "ch" combination works properly
1088 iter->setOffset(4, errorCode);
1089 int32_t elt4 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1090
1091 iter->reset();
1092 int32_t elt0 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1093
1094 iter->setOffset(5, errorCode);
1095 int32_t elt5 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1096
1097 // Compares and prints only 16-bit primary weights.
1098 if (elt4 != elt0 || elt5 != elt0) {
1099 errln("The collation elements at positions 0 (0x%04x), "
1100 "4 (0x%04x), and 5 (0x%04x) don't match.",
1101 elt0, elt4, elt5);
1102 }
1103
1104 // test that the "cat" combination works properly
1105 iter->setOffset(14, errorCode);
1106 int32_t elt14 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1107
1108 iter->setOffset(15, errorCode);
1109 int32_t elt15 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1110
1111 iter->setOffset(16, errorCode);
1112 int32_t elt16 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1113
1114 iter->setOffset(17, errorCode);
1115 int32_t elt17 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1116
1117 iter->setOffset(18, errorCode);
1118 int32_t elt18 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1119
1120 iter->setOffset(19, errorCode);
1121 int32_t elt19 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1122
1123 // Compares and prints only 16-bit primary weights.
1124 if (elt14 != elt15 || elt14 != elt16 || elt14 != elt17
1125 || elt14 != elt18 || elt14 != elt19) {
1126 errln("\"cat\" elements don't match: elt14 = 0x%04x, "
1127 "elt15 = 0x%04x, elt16 = 0x%04x, elt17 = 0x%04x, "
1128 "elt18 = 0x%04x, elt19 = 0x%04x",
1129 elt14, elt15, elt16, elt17, elt18, elt19);
1130 }
1131
1132 // now generate a complete list of the collation elements,
1133 // first using next() and then using setOffset(), and
1134 // make sure both interfaces return the same set of elements
1135 iter->reset();
1136
1137 int32_t elt = iter->next(errorCode);
1138 int32_t count = 0;
1139 while (elt != CollationElementIterator::NULLORDER) {
1140 ++count;
1141 elt = iter->next(errorCode);
1142 }
1143
1144 LocalArray<UnicodeString> nextElements(new UnicodeString[count]);
1145 LocalArray<UnicodeString> setOffsetElements(new UnicodeString[count]);
1146 int32_t lastPos = 0;
1147
1148 iter->reset();
1149 elt = iter->next(errorCode);
1150 count = 0;
1151 while (elt != CollationElementIterator::NULLORDER) {
1152 nextElements[count++] = testText.tempSubStringBetween(lastPos, iter->getOffset());
1153 lastPos = iter->getOffset();
1154 elt = iter->next(errorCode);
1155 }
1156 int32_t nextElementsLength = count;
1157 count = 0;
1158 for (int32_t i = 0; i < testText.length(); ) {
1159 iter->setOffset(i, errorCode);
1160 lastPos = iter->getOffset();
1161 elt = iter->next(errorCode);
1162 setOffsetElements[count++] = testText.tempSubStringBetween(lastPos, iter->getOffset());
1163 i = iter->getOffset();
1164 }
1165 for (int32_t i = 0; i < nextElementsLength; i++) {
1166 if (nextElements[i] == setOffsetElements[i]) {
1167 logln(nextElements[i]);
1168 } else {
1169 errln(UnicodeString("Error: next() yielded ") + nextElements[i] +
1170 ", but setOffset() yielded " + setOffsetElements[i]);
1171 }
1172 }
1173 delete iter;
1174}
1175
729e4ab9
A
1176// Ticket 7189
1177//
1178// nextSortKeyPart incorrect for EO_S1 collation
1179static int32_t calcKeyIncremental(UCollator *coll, const UChar* text, int32_t len, uint8_t *keyBuf, int32_t /*keyBufLen*/, UErrorCode& status) {
1180 UCharIterator uiter;
1181 uint32_t state[2] = { 0, 0 };
1182 int32_t keyLen;
1183 int32_t count = 8;
1184
1185 uiter_setString(&uiter, text, len);
1186 keyLen = 0;
1187 while (TRUE) {
1188 int32_t keyPartLen = ucol_nextSortKeyPart(coll, &uiter, state, &keyBuf[keyLen], count, &status);
1189 if (U_FAILURE(status)) {
1190 return -1;
1191 }
1192 if (keyPartLen == 0) {
1193 break;
1194 }
1195 keyLen += keyPartLen;
1196 }
1197 return keyLen;
1198}
1199
1200void CollationRegressionTest::TestT7189() {
1201 UErrorCode status = U_ZERO_ERROR;
1202 UCollator *coll;
1203 uint32_t i;
1204
1205 static const UChar text1[][CollationRegressionTest::MAX_TOKEN_LEN] = {
1206 // "Achter De Hoven"
1207 { 0x41, 0x63, 0x68, 0x74, 0x65, 0x72, 0x20, 0x44, 0x65, 0x20, 0x48, 0x6F, 0x76, 0x65, 0x6E, 0x00 },
1208 // "ABC"
1209 { 0x41, 0x42, 0x43, 0x00 },
1210 // "HELLO world!"
1211 { 0x48, 0x45, 0x4C, 0x4C, 0x4F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x00 }
1212 };
1213
1214 static const UChar text2[][CollationRegressionTest::MAX_TOKEN_LEN] = {
1215 // "Achter de Hoven"
1216 { 0x41, 0x63, 0x68, 0x74, 0x65, 0x72, 0x20, 0x64, 0x65, 0x20, 0x48, 0x6F, 0x76, 0x65, 0x6E, 0x00 },
1217 // "abc"
1218 { 0x61, 0x62, 0x63, 0x00 },
1219 // "hello world!"
1220 { 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x00 }
1221 };
1222
1223 // Open the collator
1224 coll = ucol_openFromShortString("EO_S1", FALSE, NULL, &status);
1225 if (U_FAILURE(status)) {
1226 errln("Failed to create a collator for short string EO_S1");
1227 return;
1228 }
1229
2ca993e8 1230 for (i = 0; i < UPRV_LENGTHOF(text1); i++) {
729e4ab9
A
1231 uint8_t key1[100], key2[100];
1232 int32_t len1, len2;
1233
1234 len1 = calcKeyIncremental(coll, text1[i], -1, key1, sizeof(key1), status);
1235 if (U_FAILURE(status)) {
1236 errln(UnicodeString("Failed to get a partial collation key for ") + text1[i]);
1237 break;
1238 }
1239 len2 = calcKeyIncremental(coll, text2[i], -1, key2, sizeof(key2), status);
1240 if (U_FAILURE(status)) {
1241 errln(UnicodeString("Failed to get a partial collation key for ") + text2[i]);
1242 break;
1243 }
1244
1245 if (len1 == len2 && uprv_memcmp(key1, key2, len1) == 0) {
1246 errln(UnicodeString("Failed: Identical key\n") + " text1: " + text1[i] + "\n" + " text2: " + text2[i] + "\n" + " key : " + TestUtility::hex(key1, len1));
1247 } else {
1248 logln(UnicodeString("Keys produced -\n") + " text1: " + text1[i] + "\n" + " key1 : " + TestUtility::hex(key1, len1) + "\n" + " text2: " + text2[i] + "\n" + " key2 : "
1249 + TestUtility::hex(key2, len2));
1250 }
1251 }
1252 ucol_close(coll);
1253}
1254
4388f060
A
1255void CollationRegressionTest::TestCaseFirstCompression() {
1256 RuleBasedCollator *col = (RuleBasedCollator *) en_us->clone();
1257 UErrorCode status = U_ZERO_ERROR;
1258
1259 // default
1260 caseFirstCompressionSub(col, "default");
1261
1262 // Upper first
1263 col->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, status);
1264 if (U_FAILURE(status)) {
1265 errln("Failed to set UCOL_UPPER_FIRST");
1266 return;
1267 }
1268 caseFirstCompressionSub(col, "upper first");
1269
1270 // Lower first
1271 col->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, status);
1272 if (U_FAILURE(status)) {
1273 errln("Failed to set UCOL_LOWER_FIRST");
1274 return;
1275 }
1276 caseFirstCompressionSub(col, "lower first");
1277
1278 delete col;
1279}
1280
1281void CollationRegressionTest::caseFirstCompressionSub(Collator *col, UnicodeString opt) {
1282 const int32_t maxLength = 50;
1283
1284 UChar str1[maxLength];
1285 UChar str2[maxLength];
1286
1287 CollationKey key1, key2;
1288
1289 for (int32_t len = 1; len <= maxLength; len++) {
1290 int32_t i = 0;
1291 for (; i < len - 1; i++) {
1292 str1[i] = str2[i] = (UChar)0x61; // 'a'
1293 }
1294 str1[i] = (UChar)0x41; // 'A'
1295 str2[i] = (UChar)0x61; // 'a'
1296
1297 UErrorCode status = U_ZERO_ERROR;
1298 col->getCollationKey(str1, len, key1, status);
1299 col->getCollationKey(str2, len, key2, status);
1300
1301 UCollationResult cmpKey = key1.compareTo(key2, status);
1302 UCollationResult cmpCol = col->compare(str1, len, str2, len, status);
1303
1304 if (U_FAILURE(status)) {
1305 errln("Error in caseFirstCompressionSub");
1306 } else if (cmpKey != cmpCol) {
1307 errln((UnicodeString)"Inconsistent comparison(" + opt
1308 + "): str1=" + UnicodeString(str1, len) + ", str2=" + UnicodeString(str2, len)
1309 + ", cmpKey=" + cmpKey + ", cmpCol=" + cmpCol);
1310 }
1311 }
1312}
1313
57a6839d
A
1314void CollationRegressionTest::TestTrailingComment() {
1315 // ICU ticket #8070:
1316 // Check that the rule parser handles a comment without terminating end-of-line.
1317 IcuTestErrorCode errorCode(*this, "TestTrailingComment");
1318 RuleBasedCollator coll(UNICODE_STRING_SIMPLE("&c<b#comment1\n<a#comment2"), errorCode);
1319 UnicodeString a((UChar)0x61), b((UChar)0x62), c((UChar)0x63);
1320 assertTrue("c<b", coll.compare(c, b) < 0);
1321 assertTrue("b<a", coll.compare(b, a) < 0);
1322}
4388f060 1323
57a6839d
A
1324void CollationRegressionTest::TestBeforeWithTooStrongAfter() {
1325 // ICU ticket #9959:
1326 // Forbid rules with a before-reset followed by a stronger relation.
1327 IcuTestErrorCode errorCode(*this, "TestBeforeWithTooStrongAfter");
1328 RuleBasedCollator before2(UNICODE_STRING_SIMPLE("&[before 2]x<<q<p"), errorCode);
1329 if(errorCode.isSuccess()) {
1330 errln("should forbid before-2-reset followed by primary relation");
1331 } else {
1332 errorCode.reset();
1333 }
1334 RuleBasedCollator before3(UNICODE_STRING_SIMPLE("&[before 3]x<<<q<<s<p"), errorCode);
1335 if(errorCode.isSuccess()) {
1336 errln("should forbid before-3-reset followed by primary or secondary relation");
1337 } else {
1338 errorCode.reset();
1339 }
1340}
4388f060 1341
b75a7d8f
A
1342void CollationRegressionTest::compareArray(Collator &c,
1343 const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN],
1344 int32_t testCount)
1345{
1346 int32_t i;
1347 Collator::EComparisonResult expectedResult = Collator::EQUAL;
1348
1349 for (i = 0; i < testCount; i += 3)
1350 {
1351 UnicodeString source(tests[i]);
1352 UnicodeString comparison(tests[i + 1]);
1353 UnicodeString target(tests[i + 2]);
1354
1355 if (comparison == "<")
1356 {
1357 expectedResult = Collator::LESS;
1358 }
1359 else if (comparison == ">")
1360 {
1361 expectedResult = Collator::GREATER;
1362 }
1363 else if (comparison == "=")
1364 {
1365 expectedResult = Collator::EQUAL;
1366 }
1367 else
1368 {
1369 UnicodeString bogus1("Bogus comparison string \"");
1370 UnicodeString bogus2("\"");
1371 errln(bogus1 + comparison + bogus2);
1372 }
1373
1374 Collator::EComparisonResult compareResult = c.compare(source, target);
1375
1376 CollationKey sourceKey, targetKey;
1377 UErrorCode status = U_ZERO_ERROR;
1378
1379 c.getCollationKey(source, sourceKey, status);
1380
1381 if (U_FAILURE(status))
1382 {
1383 errln("Couldn't get collationKey for source");
1384 continue;
1385 }
1386
1387 c.getCollationKey(target, targetKey, status);
1388
1389 if (U_FAILURE(status))
1390 {
1391 errln("Couldn't get collationKey for target");
1392 continue;
1393 }
1394
1395 Collator::EComparisonResult keyResult = sourceKey.compareTo(targetKey);
1396
1397 reportCResult( source, target, sourceKey, targetKey, compareResult, keyResult, compareResult, expectedResult );
1398
1399 }
1400}
1401
1402void CollationRegressionTest::assertEqual(CollationElementIterator &i1, CollationElementIterator &i2)
1403{
1404 int32_t c1, c2, count = 0;
1405 UErrorCode status = U_ZERO_ERROR;
1406
1407 do
1408 {
1409 c1 = i1.next(status);
1410 c2 = i2.next(status);
1411
1412 if (c1 != c2)
1413 {
1414 UnicodeString msg, msg1(" ");
729e4ab9 1415
b75a7d8f
A
1416 msg += msg1 + count;
1417 msg += ": strength(0x";
1418 appendHex(c1, 8, msg);
1419 msg += ") != strength(0x";
1420 appendHex(c2, 8, msg);
1421 msg += ")";
1422
1423 errln(msg);
1424 break;
1425 }
1426
1427 count += 1;
1428 }
1429 while (c1 != CollationElementIterator::NULLORDER);
1430}
1431
1432void CollationRegressionTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* /* par */)
1433{
1434 if (exec)
1435 {
1436 logln("Collation Regression Tests: ");
1437 }
1438
57a6839d
A
1439 if(en_us == NULL) {
1440 dataerrln("Class collator not instantiated");
1441 name = "";
1442 return;
b75a7d8f 1443 }
57a6839d
A
1444 TESTCASE_AUTO_BEGIN;
1445 TESTCASE_AUTO(Test4048446);
1446 TESTCASE_AUTO(Test4051866);
1447 TESTCASE_AUTO(Test4053636);
1448 TESTCASE_AUTO(Test4054238);
1449 TESTCASE_AUTO(Test4054734);
1450 TESTCASE_AUTO(Test4054736);
1451 TESTCASE_AUTO(Test4058613);
1452 TESTCASE_AUTO(Test4059820);
1453 TESTCASE_AUTO(Test4060154);
1454 TESTCASE_AUTO(Test4062418);
1455 TESTCASE_AUTO(Test4065540);
1456 TESTCASE_AUTO(Test4066189);
1457 TESTCASE_AUTO(Test4066696);
1458 TESTCASE_AUTO(Test4076676);
1459 TESTCASE_AUTO(Test4078588);
1460 TESTCASE_AUTO(Test4079231);
1461 TESTCASE_AUTO(Test4081866);
1462 TESTCASE_AUTO(Test4087241);
1463 TESTCASE_AUTO(Test4087243);
1464 TESTCASE_AUTO(Test4092260);
1465 TESTCASE_AUTO(Test4095316);
1466 TESTCASE_AUTO(Test4101940);
1467 TESTCASE_AUTO(Test4103436);
1468 TESTCASE_AUTO(Test4114076);
1469 TESTCASE_AUTO(Test4114077);
1470 TESTCASE_AUTO(Test4124632);
1471 TESTCASE_AUTO(Test4132736);
1472 TESTCASE_AUTO(Test4133509);
1473 TESTCASE_AUTO(Test4139572);
1474 TESTCASE_AUTO(Test4141640);
1475 TESTCASE_AUTO(Test4146160);
1476 TESTCASE_AUTO(Test4179216);
1477 TESTCASE_AUTO(TestT7189);
1478 TESTCASE_AUTO(TestCaseFirstCompression);
1479 TESTCASE_AUTO(TestTrailingComment);
1480 TESTCASE_AUTO(TestBeforeWithTooStrongAfter);
1481 TESTCASE_AUTO_END;
b75a7d8f
A
1482}
1483
1484#endif /* #if !UCONFIG_NO_COLLATION */