1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
5 * Copyright (C) 2012-2015, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
10 * created on: 2012apr27
11 * created by: Markus W. Scherer
14 #include "unicode/utypes.h"
16 #if !UCONFIG_NO_COLLATION
18 #include "unicode/coll.h"
19 #include "unicode/errorcode.h"
20 #include "unicode/localpointer.h"
21 #include "unicode/normalizer2.h"
22 #include "unicode/sortkey.h"
23 #include "unicode/std_string.h"
24 #include "unicode/strenum.h"
25 #include "unicode/tblcoll.h"
26 #include "unicode/uiter.h"
27 #include "unicode/uniset.h"
28 #include "unicode/unistr.h"
29 #include "unicode/usetiter.h"
30 #include "unicode/ustring.h"
33 #include "collation.h"
34 #include "collationdata.h"
35 #include "collationfcd.h"
36 #include "collationiterator.h"
37 #include "collationroot.h"
38 #include "collationrootelements.h"
39 #include "collationruleparser.h"
40 #include "collationweights.h"
43 #include "normalizer2impl.h"
46 #include "uitercollationiterator.h"
47 #include "utf16collationiterator.h"
48 #include "utf8collationiterator.h"
53 class CodePointIterator
;
55 // TODO: try to share code with IntlTestCollator; for example, prettify(CollationKey)
57 class CollationTest
: public IntlTest
{
60 : fcd(NULL
), nfd(NULL
),
68 void runIndexedTest(int32_t index
, UBool exec
, const char *&name
, char *par
=NULL
);
72 void TestNulTerminated();
73 void TestIllegalUTF8();
74 void TestShortFCDData();
76 void TestCollationWeights();
77 void TestRootElements();
78 void TestTailoredElements();
79 void TestDataDriven();
82 void checkFCD(const char *name
, CollationIterator
&ci
, CodePointIterator
&cpi
);
83 void checkAllocWeights(CollationWeights
&cw
,
84 uint32_t lowerLimit
, uint32_t upperLimit
, int32_t n
,
85 int32_t someLength
, int32_t minCount
);
87 static UnicodeString
printSortKey(const uint8_t *p
, int32_t length
);
88 static UnicodeString
printCollationKey(const CollationKey
&key
);
90 // Helpers & fields for data-driven test.
91 static UBool
isCROrLF(UChar c
) { return c
== 0xa || c
== 0xd; }
92 static UBool
isSpace(UChar c
) { return c
== 9 || c
== 0x20 || c
== 0x3000; }
93 static UBool
isSectionStarter(UChar c
) { return c
== 0x25 || c
== 0x2a || c
== 0x40; } // %*@
94 int32_t skipSpaces(int32_t i
) {
95 while(isSpace(fileLine
[i
])) { ++i
; }
99 UBool
readNonEmptyLine(UCHARBUF
*f
, IcuTestErrorCode
&errorCode
);
100 void parseString(int32_t &start
, UnicodeString
&prefix
, UnicodeString
&s
, UErrorCode
&errorCode
);
101 Collation::Level
parseRelationAndString(UnicodeString
&s
, IcuTestErrorCode
&errorCode
);
102 void parseAndSetAttribute(IcuTestErrorCode
&errorCode
);
103 void parseAndSetReorderCodes(int32_t start
, IcuTestErrorCode
&errorCode
);
104 void buildTailoring(UCHARBUF
*f
, IcuTestErrorCode
&errorCode
);
105 void setRootCollator(IcuTestErrorCode
&errorCode
);
106 void setLocaleCollator(IcuTestErrorCode
&errorCode
);
108 UBool
needsNormalization(const UnicodeString
&s
, UErrorCode
&errorCode
) const;
110 UBool
getSortKeyParts(const UChar
*s
, int32_t length
,
111 CharString
&dest
, int32_t partSize
,
112 IcuTestErrorCode
&errorCode
);
113 UBool
getCollationKey(const char *norm
, const UnicodeString
&line
,
114 const UChar
*s
, int32_t length
,
115 CollationKey
&key
, IcuTestErrorCode
&errorCode
);
116 UBool
getMergedCollationKey(const UChar
*s
, int32_t length
,
117 CollationKey
&key
, IcuTestErrorCode
&errorCode
);
118 UBool
checkCompareTwo(const char *norm
, const UnicodeString
&prevFileLine
,
119 const UnicodeString
&prevString
, const UnicodeString
&s
,
120 UCollationResult expectedOrder
, Collation::Level expectedLevel
,
121 IcuTestErrorCode
&errorCode
);
122 void checkCompareStrings(UCHARBUF
*f
, IcuTestErrorCode
&errorCode
);
124 const Normalizer2
*fcd
, *nfd
;
125 UnicodeString fileLine
;
126 int32_t fileLineNumber
;
127 UnicodeString fileTestName
;
131 extern IntlTest
*createCollationTest() {
132 return new CollationTest();
135 void CollationTest::runIndexedTest(int32_t index
, UBool exec
, const char *&name
, char * /*par*/) {
137 logln("TestSuite CollationTest: ");
140 TESTCASE_AUTO(TestMinMax
);
141 TESTCASE_AUTO(TestImplicits
);
142 TESTCASE_AUTO(TestNulTerminated
);
143 TESTCASE_AUTO(TestIllegalUTF8
);
144 TESTCASE_AUTO(TestShortFCDData
);
145 TESTCASE_AUTO(TestFCD
);
146 TESTCASE_AUTO(TestCollationWeights
);
147 TESTCASE_AUTO(TestRootElements
);
148 TESTCASE_AUTO(TestTailoredElements
);
149 TESTCASE_AUTO(TestDataDriven
);
153 void CollationTest::TestMinMax() {
154 IcuTestErrorCode
errorCode(*this, "TestMinMax");
156 setRootCollator(errorCode
);
157 if(errorCode
.isFailure()) {
161 RuleBasedCollator
*rbc
= dynamic_cast<RuleBasedCollator
*>(coll
);
163 errln("the root collator is not a RuleBasedCollator");
167 static const UChar s
[2] = { 0xfffe, 0xffff };
168 UVector64
ces(errorCode
);
169 rbc
->internalGetCEs(UnicodeString(FALSE
, s
, 2), ces
, errorCode
);
170 errorCode
.assertSuccess();
171 if(ces
.size() != 2) {
172 errln("expected 2 CEs for <FFFE, FFFF>, got %d", (int)ces
.size());
175 int64_t ce
= ces
.elementAti(0);
176 int64_t expected
= Collation::makeCE(Collation::MERGE_SEPARATOR_PRIMARY
);
178 errln("CE(U+fffe)=%04lx != 02..", (long)ce
);
181 ce
= ces
.elementAti(1);
182 expected
= Collation::makeCE(Collation::MAX_PRIMARY
);
184 errln("CE(U+ffff)=%04lx != max..", (long)ce
);
188 void CollationTest::TestImplicits() {
189 IcuTestErrorCode
errorCode(*this, "TestImplicits");
191 const CollationData
*cd
= CollationRoot::getData(errorCode
);
192 if(errorCode
.errDataIfFailureAndReset("CollationRoot::getData()")) {
196 // Implicit primary weights should be assigned for the following sets,
197 // and sort in ascending order by set and then code point.
198 // See http://www.unicode.org/reports/tr10/#Implicit_Weights
200 // core Han Unified Ideographs
201 UnicodeSet
coreHan("[\\p{unified_ideograph}&"
202 "[\\p{Block=CJK_Unified_Ideographs}"
203 "\\p{Block=CJK_Compatibility_Ideographs}]]",
205 // all other Unified Han ideographs
206 UnicodeSet
otherHan("[\\p{unified ideograph}-"
207 "[\\p{Block=CJK_Unified_Ideographs}"
208 "\\p{Block=CJK_Compatibility_Ideographs}]]",
210 UnicodeSet
unassigned("[[:Cn:][:Cs:][:Co:]]", errorCode
);
211 unassigned
.remove(0xfffe, 0xffff); // These have special CLDR root mappings.
213 // Starting with CLDR 26/ICU 54, the root Han order may instead be
214 // the Unihan radical-stroke order.
215 // The tests should pass either way, so we only test the order of a small set of Han characters
216 // whose radical-stroke order is the same as their code point order.
217 UnicodeSet
someHanInCPOrder(
218 "[\\u4E00-\\u4E16\\u4E18-\\u4E2B\\u4E2D-\\u4E3C\\u4E3E-\\u4E48"
219 "\\u4E4A-\\u4E60\\u4E63-\\u4E8F\\u4E91-\\u4F63\\u4F65-\\u50F1\\u50F3-\\u50F6]",
221 UnicodeSet
inOrder(someHanInCPOrder
);
222 inOrder
.addAll(unassigned
).freeze();
223 if(errorCode
.errIfFailureAndReset("UnicodeSet")) {
226 const UnicodeSet
*sets
[] = { &coreHan
, &otherHan
, &unassigned
};
228 uint32_t prevPrimary
= 0;
229 UTF16CollationIterator
ci(cd
, FALSE
, NULL
, NULL
, NULL
);
230 for(int32_t i
= 0; i
< UPRV_LENGTHOF(sets
); ++i
) {
231 LocalPointer
<UnicodeSetIterator
> iter(new UnicodeSetIterator(*sets
[i
]));
232 while(iter
->next()) {
233 UChar32 c
= iter
->getCodepoint();
235 ci
.setText(s
.getBuffer(), s
.getBuffer() + s
.length());
236 int64_t ce
= ci
.nextCE(errorCode
);
237 int64_t ce2
= ci
.nextCE(errorCode
);
238 if(errorCode
.errIfFailureAndReset("CollationIterator.nextCE()")) {
241 if(ce
== Collation::NO_CE
|| ce2
!= Collation::NO_CE
) {
242 errln("CollationIterator.nextCE(U+%04lx) did not yield exactly one CE", (long)c
);
245 if((ce
& 0xffffffff) != Collation::COMMON_SEC_AND_TER_CE
) {
246 errln("CollationIterator.nextCE(U+%04lx) has non-common sec/ter weights: %08lx",
247 (long)c
, (long)(ce
& 0xffffffff));
250 uint32_t primary
= (uint32_t)(ce
>> 32);
251 if(!(primary
> prevPrimary
) && inOrder
.contains(c
) && inOrder
.contains(prev
)) {
252 errln("CE(U+%04lx)=%04lx.. not greater than CE(U+%04lx)=%04lx..",
253 (long)c
, (long)primary
, (long)prev
, (long)prevPrimary
);
256 prevPrimary
= primary
;
261 void CollationTest::TestNulTerminated() {
262 IcuTestErrorCode
errorCode(*this, "TestNulTerminated");
263 const CollationData
*data
= CollationRoot::getData(errorCode
);
264 if(errorCode
.errDataIfFailureAndReset("CollationRoot::getData()")) {
268 static const UChar s
[] = { 0x61, 0x62, 0x61, 0x62, 0 };
270 UTF16CollationIterator
ci1(data
, FALSE
, s
, s
, s
+ 2);
271 UTF16CollationIterator
ci2(data
, FALSE
, s
+ 2, s
+ 2, NULL
);
272 for(int32_t i
= 0;; ++i
) {
273 int64_t ce1
= ci1
.nextCE(errorCode
);
274 int64_t ce2
= ci2
.nextCE(errorCode
);
275 if(errorCode
.errIfFailureAndReset("CollationIterator.nextCE()")) {
279 errln("CollationIterator.nextCE(with length) != nextCE(NUL-terminated) at CE %d", (int)i
);
282 if(ce1
== Collation::NO_CE
) { break; }
286 void CollationTest::TestIllegalUTF8() {
287 IcuTestErrorCode
errorCode(*this, "TestIllegalUTF8");
289 setRootCollator(errorCode
);
290 if(errorCode
.isFailure()) {
294 coll
->setAttribute(UCOL_STRENGTH
, UCOL_IDENTICAL
, errorCode
);
296 static const char *strings
[] = {
297 // string with U+FFFD == illegal byte sequence
298 u8
"a\uFFFDz", "a\x80z", // trail byte
299 u8
"a\uFFFD\uFFFDz", "a\xc1\x81z", // non-shortest form
300 u8
"a\uFFFD\uFFFD\uFFFDz", "a\xe0\x82\x83z", // non-shortest form
301 u8
"a\uFFFD\uFFFD\uFFFDz", "a\xed\xa0\x80z", // lead surrogate: would be U+D800
302 u8
"a\uFFFD\uFFFD\uFFFDz", "a\xed\xbf\xbfz", // trail surrogate: would be U+DFFF
303 u8
"a\uFFFD\uFFFD\uFFFD\uFFFDz", "a\xf0\x8f\xbf\xbfz", // non-shortest form
304 u8
"a\uFFFD\uFFFD\uFFFD\uFFFDz", "a\xf4\x90\x80\x80z" // out of range: would be U+110000
307 for(int32_t i
= 0; i
< UPRV_LENGTHOF(strings
); i
+= 2) {
308 StringPiece
fffd(strings
[i
]);
309 StringPiece
illegal(strings
[i
+ 1]);
310 UCollationResult order
= coll
->compareUTF8(fffd
, illegal
, errorCode
);
311 if(order
!= UCOL_EQUAL
) {
312 errln("compareUTF8(pair %d: U+FFFD, illegal UTF-8)=%d != UCOL_EQUAL",
320 void addLeadSurrogatesForSupplementary(const UnicodeSet
&src
, UnicodeSet
&dest
) {
321 for(UChar32 c
= 0x10000; c
< 0x110000;) {
322 UChar32 next
= c
+ 0x400;
323 if(src
.containsSome(c
, next
- 1)) {
324 dest
.add(U16_LEAD(c
));
332 void CollationTest::TestShortFCDData() {
333 // See CollationFCD class comments.
334 IcuTestErrorCode
errorCode(*this, "TestShortFCDData");
335 UnicodeSet
expectedLccc("[:^lccc=0:]", errorCode
);
336 errorCode
.assertSuccess();
337 expectedLccc
.add(0xdc00, 0xdfff); // add all trail surrogates
338 addLeadSurrogatesForSupplementary(expectedLccc
, expectedLccc
);
339 UnicodeSet lccc
; // actual
340 for(UChar32 c
= 0; c
<= 0xffff; ++c
) {
341 if(CollationFCD::hasLccc(c
)) { lccc
.add(c
); }
343 UnicodeSet
diff(expectedLccc
);
344 diff
.removeAll(lccc
);
345 diff
.remove(0x10000, 0x10ffff); // hasLccc() only works for the BMP
346 UnicodeString
empty("[]");
347 UnicodeString diffString
;
348 diff
.toPattern(diffString
, TRUE
);
349 assertEquals("CollationFCD::hasLccc() expected-actual", empty
, diffString
);
351 diff
.removeAll(expectedLccc
);
352 diff
.toPattern(diffString
, TRUE
);
353 assertEquals("CollationFCD::hasLccc() actual-expected", empty
, diffString
, TRUE
);
355 UnicodeSet
expectedTccc("[:^tccc=0:]", errorCode
);
356 if (errorCode
.isSuccess()) {
357 addLeadSurrogatesForSupplementary(expectedLccc
, expectedTccc
);
358 addLeadSurrogatesForSupplementary(expectedTccc
, expectedTccc
);
359 UnicodeSet tccc
; // actual
360 for(UChar32 c
= 0; c
<= 0xffff; ++c
) {
361 if(CollationFCD::hasTccc(c
)) { tccc
.add(c
); }
364 diff
.removeAll(tccc
);
365 diff
.remove(0x10000, 0x10ffff); // hasTccc() only works for the BMP
366 assertEquals("CollationFCD::hasTccc() expected-actual", empty
, diffString
);
368 diff
.removeAll(expectedTccc
);
369 diff
.toPattern(diffString
, TRUE
);
370 assertEquals("CollationFCD::hasTccc() actual-expected", empty
, diffString
);
374 class CodePointIterator
{
376 CodePointIterator(const UChar32
*cp
, int32_t length
) : cp(cp
), length(length
), pos(0) {}
377 void resetToStart() { pos
= 0; }
378 UChar32
next() { return (pos
< length
) ? cp
[pos
++] : U_SENTINEL
; }
379 UChar32
previous() { return (pos
> 0) ? cp
[--pos
] : U_SENTINEL
; }
380 int32_t getLength() const { return length
; }
381 int getIndex() const { return (int)pos
; }
388 void CollationTest::checkFCD(const char *name
,
389 CollationIterator
&ci
, CodePointIterator
&cpi
) {
390 IcuTestErrorCode
errorCode(*this, "checkFCD");
392 // Iterate forward to the limit.
394 UChar32 c1
= ci
.nextCodePoint(errorCode
);
395 UChar32 c2
= cpi
.next();
397 errln("%s.nextCodePoint(to limit, 1st pass) = U+%04lx != U+%04lx at %d",
398 name
, (long)c1
, (long)c2
, cpi
.getIndex());
401 if(c1
< 0) { break; }
404 // Iterate backward most of the way.
405 for(int32_t n
= (cpi
.getLength() * 2) / 3; n
> 0; --n
) {
406 UChar32 c1
= ci
.previousCodePoint(errorCode
);
407 UChar32 c2
= cpi
.previous();
409 errln("%s.previousCodePoint() = U+%04lx != U+%04lx at %d",
410 name
, (long)c1
, (long)c2
, cpi
.getIndex());
417 UChar32 c1
= ci
.nextCodePoint(errorCode
);
418 UChar32 c2
= cpi
.next();
420 errln("%s.nextCodePoint(to limit again) = U+%04lx != U+%04lx at %d",
421 name
, (long)c1
, (long)c2
, cpi
.getIndex());
424 if(c1
< 0) { break; }
427 // Iterate backward to the start.
429 UChar32 c1
= ci
.previousCodePoint(errorCode
);
430 UChar32 c2
= cpi
.previous();
432 errln("%s.previousCodePoint(to start) = U+%04lx != U+%04lx at %d",
433 name
, (long)c1
, (long)c2
, cpi
.getIndex());
436 if(c1
< 0) { break; }
440 void CollationTest::TestFCD() {
441 IcuTestErrorCode
errorCode(*this, "TestFCD");
442 const CollationData
*data
= CollationRoot::getData(errorCode
);
443 if(errorCode
.errDataIfFailureAndReset("CollationRoot::getData()")) {
447 // Input string, not FCD, NUL-terminated.
448 static const UChar s
[] = {
449 0x308, 0xe1, 0x62, 0x301, 0x327, 0x430, 0x62,
450 U16_LEAD(0x1D15F), U16_TRAIL(0x1D15F), // MUSICAL SYMBOL QUARTER NOTE=1D158 1D165, ccc=0, 216
451 0x327, 0x308, // ccc=202, 230
452 U16_LEAD(0x1D16D), U16_TRAIL(0x1D16D), // MUSICAL SYMBOL COMBINING AUGMENTATION DOT, ccc=226
453 U16_LEAD(0x1D15F), U16_TRAIL(0x1D15F),
454 U16_LEAD(0x1D16D), U16_TRAIL(0x1D16D),
456 0xe7, // Character with tccc!=0 decomposed together with mis-ordered sequence.
457 U16_LEAD(0x1D16D), U16_TRAIL(0x1D16D), U16_LEAD(0x1D165), U16_TRAIL(0x1D165),
458 0xe1, // Character with tccc!=0 decomposed together with decomposed sequence.
459 0xf73, 0xf75, // Tibetan composite vowels must be decomposed.
463 // Expected code points.
464 static const UChar32 cp
[] = {
465 0x308, 0xe1, 0x62, 0x327, 0x301, 0x430, 0x62,
466 0x1D158, 0x327, 0x1D165, 0x1D16D, 0x308,
469 0x63, 0x327, 0x1D165, 0x1D16D,
471 0xf71, 0xf71, 0xf72, 0xf74, 0x301,
475 FCDUTF16CollationIterator
u16ci(data
, FALSE
, s
, s
, NULL
);
476 if(errorCode
.errIfFailureAndReset("FCDUTF16CollationIterator constructor")) {
479 CodePointIterator
cpi(cp
, UPRV_LENGTHOF(cp
));
480 checkFCD("FCDUTF16CollationIterator", u16ci
, cpi
);
484 UnicodeString(s
).toUTF8String(utf8
);
485 FCDUTF8CollationIterator
u8ci(data
, FALSE
,
486 reinterpret_cast<const uint8_t *>(utf8
.c_str()), 0, -1);
487 if(errorCode
.errIfFailureAndReset("FCDUTF8CollationIterator constructor")) {
490 checkFCD("FCDUTF8CollationIterator", u8ci
, cpi
);
494 uiter_setString(&iter
, s
, UPRV_LENGTHOF(s
) - 1); // -1: without the terminating NUL
495 FCDUIterCollationIterator
uici(data
, FALSE
, iter
, 0);
496 if(errorCode
.errIfFailureAndReset("FCDUIterCollationIterator constructor")) {
499 checkFCD("FCDUIterCollationIterator", uici
, cpi
);
502 void CollationTest::checkAllocWeights(CollationWeights
&cw
,
503 uint32_t lowerLimit
, uint32_t upperLimit
, int32_t n
,
504 int32_t someLength
, int32_t minCount
) {
505 if(!cw
.allocWeights(lowerLimit
, upperLimit
, n
)) {
506 errln("CollationWeights::allocWeights(%lx, %lx, %ld) = FALSE",
507 (long)lowerLimit
, (long)upperLimit
, (long)n
);
510 uint32_t previous
= lowerLimit
;
511 int32_t count
= 0; // number of weights that have someLength
512 for(int32_t i
= 0; i
< n
; ++i
) {
513 uint32_t w
= cw
.nextWeight();
514 if(w
== 0xffffffff) {
515 errln("CollationWeights::allocWeights(%lx, %lx, %ld).nextWeight() "
516 "returns only %ld weights",
517 (long)lowerLimit
, (long)upperLimit
, (long)n
, (long)i
);
520 if(!(previous
< w
&& w
< upperLimit
)) {
521 errln("CollationWeights::allocWeights(%lx, %lx, %ld).nextWeight() "
522 "number %ld -> %lx not between %lx and %lx",
523 (long)lowerLimit
, (long)upperLimit
, (long)n
,
524 (long)(i
+ 1), (long)w
, (long)previous
, (long)upperLimit
);
527 if(CollationWeights::lengthOfWeight(w
) == someLength
) { ++count
; }
529 if(count
< minCount
) {
530 errln("CollationWeights::allocWeights(%lx, %lx, %ld).nextWeight() "
531 "returns only %ld < %ld weights of length %d",
532 (long)lowerLimit
, (long)upperLimit
, (long)n
,
533 (long)count
, (long)minCount
, (int)someLength
);
537 void CollationTest::TestCollationWeights() {
540 // Non-compressible primaries use 254 second bytes 02..FF.
541 logln("CollationWeights.initForPrimary(non-compressible)");
542 cw
.initForPrimary(FALSE
);
543 // Expect 1 weight 11 and 254 weights 12xx.
544 checkAllocWeights(cw
, 0x10000000, 0x13000000, 255, 1, 1);
545 checkAllocWeights(cw
, 0x10000000, 0x13000000, 255, 2, 254);
546 // Expect 255 two-byte weights from the ranges 10ff, 11xx, 1202.
547 checkAllocWeights(cw
, 0x10fefe40, 0x12030300, 260, 2, 255);
548 // Expect 254 two-byte weights from the ranges 10ff and 11xx.
549 checkAllocWeights(cw
, 0x10fefe40, 0x12030300, 600, 2, 254);
550 // Expect 254^2=64516 three-byte weights.
551 // During computation, there should be 3 three-byte ranges
552 // 10ffff, 11xxxx, 120202.
553 // The middle one should be split 64515:1,
554 // and the newly-split-off range and the last ranged lengthened.
555 checkAllocWeights(cw
, 0x10fffe00, 0x12020300, 1 + 64516 + 254 + 1, 3, 64516);
556 // Expect weights 1102 & 1103.
557 checkAllocWeights(cw
, 0x10ff0000, 0x11040000, 2, 2, 2);
558 // Expect weights 102102 & 102103.
559 checkAllocWeights(cw
, 0x1020ff00, 0x10210400, 2, 3, 2);
561 // Compressible primaries use 251 second bytes 04..FE.
562 logln("CollationWeights.initForPrimary(compressible)");
563 cw
.initForPrimary(TRUE
);
564 // Expect 1 weight 11 and 251 weights 12xx.
565 checkAllocWeights(cw
, 0x10000000, 0x13000000, 252, 1, 1);
566 checkAllocWeights(cw
, 0x10000000, 0x13000000, 252, 2, 251);
567 // Expect 252 two-byte weights from the ranges 10fe, 11xx, 1204.
568 checkAllocWeights(cw
, 0x10fdfe40, 0x12050300, 260, 2, 252);
569 // Expect weights 1104 & 1105.
570 checkAllocWeights(cw
, 0x10fe0000, 0x11060000, 2, 2, 2);
571 // Expect weights 102102 & 102103.
572 checkAllocWeights(cw
, 0x1020ff00, 0x10210400, 2, 3, 2);
574 // Secondary and tertiary weights use only bytes 3 & 4.
575 logln("CollationWeights.initForSecondary()");
576 cw
.initForSecondary();
577 // Expect weights fbxx and all four fc..ff.
578 checkAllocWeights(cw
, 0xfb20, 0x10000, 20, 3, 4);
580 logln("CollationWeights.initForTertiary()");
581 cw
.initForTertiary();
582 // Expect weights 3dxx and both 3e & 3f.
583 checkAllocWeights(cw
, 0x3d02, 0x4000, 10, 3, 2);
588 UBool
isValidCE(const CollationRootElements
&re
, const CollationData
&data
,
589 uint32_t p
, uint32_t s
, uint32_t ctq
) {
590 uint32_t p1
= p
>> 24;
591 uint32_t p2
= (p
>> 16) & 0xff;
592 uint32_t p3
= (p
>> 8) & 0xff;
593 uint32_t p4
= p
& 0xff;
594 uint32_t s1
= s
>> 8;
595 uint32_t s2
= s
& 0xff;
596 // ctq = Case, Tertiary, Quaternary
597 uint32_t c
= (ctq
& Collation::CASE_MASK
) >> 14;
598 uint32_t t
= ctq
& Collation::ONLY_TERTIARY_MASK
;
599 uint32_t t1
= t
>> 8;
600 uint32_t t2
= t
& 0xff;
601 uint32_t q
= ctq
& Collation::QUATERNARY_MASK
;
602 // No leading zero bytes.
603 if((p
!= 0 && p1
== 0) || (s
!= 0 && s1
== 0) || (t
!= 0 && t1
== 0)) {
606 // No intermediate zero bytes.
607 if(p1
!= 0 && p2
== 0 && (p
& 0xffff) != 0) {
610 if(p2
!= 0 && p3
== 0 && p4
!= 0) {
613 // Minimum & maximum lead bytes.
614 if((p1
!= 0 && p1
<= Collation::MERGE_SEPARATOR_BYTE
) ||
615 s1
== Collation::LEVEL_SEPARATOR_BYTE
||
616 t1
== Collation::LEVEL_SEPARATOR_BYTE
|| t1
> 0x3f) {
622 // The valid byte range for the second primary byte depends on compressibility.
624 if(data
.isCompressibleLeadByte(p1
)) {
625 if(p2
<= Collation::PRIMARY_COMPRESSION_LOW_BYTE
||
626 Collation::PRIMARY_COMPRESSION_HIGH_BYTE
<= p2
) {
630 if(p2
<= Collation::LEVEL_SEPARATOR_BYTE
) {
635 // Other bytes just need to avoid the level separator.
636 // Trailing zeros are ok.
637 U_ASSERT(Collation::LEVEL_SEPARATOR_BYTE
== 1);
638 if(p3
== Collation::LEVEL_SEPARATOR_BYTE
|| p4
== Collation::LEVEL_SEPARATOR_BYTE
||
639 s2
== Collation::LEVEL_SEPARATOR_BYTE
|| t2
== Collation::LEVEL_SEPARATOR_BYTE
) {
646 // Completely ignorable CE.
647 // Quaternary CEs are not supported.
648 if(c
!= 0 || q
!= 0) {
653 if(t
< re
.getTertiaryBoundary() || c
!= 2) {
659 if(s
< re
.getSecondaryBoundary() || t
== 0 || t
>= re
.getTertiaryBoundary()) {
665 if(s
== 0 || (Collation::COMMON_WEIGHT16
< s
&& s
<= re
.getLastCommonSecondary()) ||
666 s
>= re
.getSecondaryBoundary()) {
669 if(t
== 0 || t
>= re
.getTertiaryBoundary()) {
676 UBool
isValidCE(const CollationRootElements
&re
, const CollationData
&data
, int64_t ce
) {
677 uint32_t p
= (uint32_t)(ce
>> 32);
678 uint32_t secTer
= (uint32_t)ce
;
679 return isValidCE(re
, data
, p
, secTer
>> 16, secTer
& 0xffff);
682 class RootElementsIterator
{
684 RootElementsIterator(const CollationData
&root
)
686 elements(root
.rootElements
), length(root
.rootElementsLength
),
688 index((int32_t)elements
[CollationRootElements::IX_FIRST_TERTIARY_INDEX
]) {}
691 if(index
>= length
) { return FALSE
; }
692 uint32_t p
= elements
[index
];
693 if(p
== CollationRootElements::PRIMARY_SENTINEL
) { return FALSE
; }
694 if((p
& CollationRootElements::SEC_TER_DELTA_FLAG
) != 0) {
696 secTer
= p
& ~CollationRootElements::SEC_TER_DELTA_FLAG
;
699 if((p
& CollationRootElements::PRIMARY_STEP_MASK
) != 0) {
700 // End of a range, enumerate the primaries in the range.
701 int32_t step
= (int32_t)p
& CollationRootElements::PRIMARY_STEP_MASK
;
704 // Finished the range, return the next CE after it.
709 // Return the next primary in this range.
710 UBool isCompressible
= data
.isCompressiblePrimary(pri
);
711 if((pri
& 0xffff) == 0) {
712 pri
= Collation::incTwoBytePrimaryByOffset(pri
, isCompressible
, step
);
714 pri
= Collation::incThreeBytePrimaryByOffset(pri
, isCompressible
, step
);
718 // Simple primary CE.
721 // Does this have an explicit below-common sec/ter unit,
722 // or does it imply a common one?
723 if(index
== length
) {
724 secTer
= Collation::COMMON_SEC_AND_TER_CE
;
726 secTer
= elements
[index
];
727 if((secTer
& CollationRootElements::SEC_TER_DELTA_FLAG
) == 0) {
729 secTer
= Collation::COMMON_SEC_AND_TER_CE
;
731 secTer
&= ~CollationRootElements::SEC_TER_DELTA_FLAG
;
732 if(secTer
> Collation::COMMON_SEC_AND_TER_CE
) {
734 secTer
= Collation::COMMON_SEC_AND_TER_CE
;
736 // Explicit sec/ter below common/common.
744 uint32_t getPrimary() const { return pri
; }
745 uint32_t getSecTer() const { return secTer
; }
748 const CollationData
&data
;
749 const uint32_t *elements
;
759 void CollationTest::TestRootElements() {
760 IcuTestErrorCode
errorCode(*this, "TestRootElements");
761 const CollationData
*root
= CollationRoot::getData(errorCode
);
762 if(errorCode
.errDataIfFailureAndReset("CollationRoot::getData()")) {
765 CollationRootElements
rootElements(root
->rootElements
, root
->rootElementsLength
);
766 RootElementsIterator
iter(*root
);
768 // We check each root CE for validity,
769 // and we also verify that there is a tailoring gap between each two CEs.
770 CollationWeights cw1c
; // compressible primary weights
771 CollationWeights cw1u
; // uncompressible primary weights
772 CollationWeights cw2
;
773 CollationWeights cw3
;
775 cw1c
.initForPrimary(TRUE
);
776 cw1u
.initForPrimary(FALSE
);
777 cw2
.initForSecondary();
778 cw3
.initForTertiary();
780 // Note: The root elements do not include Han-implicit or unassigned-implicit CEs,
781 // nor the special merge-separator CE for U+FFFE.
782 uint32_t prevPri
= 0;
783 uint32_t prevSec
= 0;
784 uint32_t prevTer
= 0;
786 uint32_t pri
= iter
.getPrimary();
787 uint32_t secTer
= iter
.getSecTer();
788 // CollationRootElements CEs must have 0 case and quaternary bits.
789 if((secTer
& Collation::CASE_AND_QUATERNARY_MASK
) != 0) {
790 errln("CollationRootElements CE has non-zero case and/or quaternary bits: %08lx %08lx",
791 (long)pri
, (long)secTer
);
793 uint32_t sec
= secTer
>> 16;
794 uint32_t ter
= secTer
& Collation::ONLY_TERTIARY_MASK
;
796 if(pri
== 0 && sec
== 0 && ter
!= 0) {
797 // Tertiary CEs must have uppercase bits,
798 // but they are not stored in the CollationRootElements.
801 if(!isValidCE(rootElements
, *root
, pri
, sec
, ctq
)) {
802 errln("invalid root CE %08lx %08lx", (long)pri
, (long)secTer
);
805 uint32_t newWeight
= 0;
806 if(prevPri
== 0 || prevPri
>= Collation::FFFD_PRIMARY
) {
807 // There is currently no tailoring gap after primary ignorables,
808 // and we forbid tailoring after U+FFFD and U+FFFF.
809 } else if(root
->isCompressiblePrimary(prevPri
)) {
810 if(!cw1c
.allocWeights(prevPri
, pri
, 1)) {
811 errln("no primary/compressible tailoring gap between %08lx and %08lx",
812 (long)prevPri
, (long)pri
);
814 newWeight
= cw1c
.nextWeight();
817 if(!cw1u
.allocWeights(prevPri
, pri
, 1)) {
818 errln("no primary/uncompressible tailoring gap between %08lx and %08lx",
819 (long)prevPri
, (long)pri
);
821 newWeight
= cw1u
.nextWeight();
824 if(newWeight
!= 0 && !(prevPri
< newWeight
&& newWeight
< pri
)) {
825 errln("mis-allocated primary weight, should get %08lx < %08lx < %08lx",
826 (long)prevPri
, (long)newWeight
, (long)pri
);
828 } else if(sec
!= prevSec
) {
829 uint32_t lowerLimit
=
830 prevSec
== 0 ? rootElements
.getSecondaryBoundary() - 0x100 : prevSec
;
831 if(!cw2
.allocWeights(lowerLimit
, sec
, 1)) {
832 errln("no secondary tailoring gap between %04x and %04x", lowerLimit
, sec
);
834 uint32_t newWeight
= cw2
.nextWeight();
835 if(!(prevSec
< newWeight
&& newWeight
< sec
)) {
836 errln("mis-allocated secondary weight, should get %04x < %04x < %04x",
837 (long)lowerLimit
, (long)newWeight
, (long)sec
);
840 } else if(ter
!= prevTer
) {
841 uint32_t lowerLimit
=
842 prevTer
== 0 ? rootElements
.getTertiaryBoundary() - 0x100 : prevTer
;
843 if(!cw3
.allocWeights(lowerLimit
, ter
, 1)) {
844 errln("no teriary tailoring gap between %04x and %04x", lowerLimit
, ter
);
846 uint32_t newWeight
= cw3
.nextWeight();
847 if(!(prevTer
< newWeight
&& newWeight
< ter
)) {
848 errln("mis-allocated secondary weight, should get %04x < %04x < %04x",
849 (long)lowerLimit
, (long)newWeight
, (long)ter
);
853 errln("duplicate root CE %08lx %08lx", (long)pri
, (long)secTer
);
862 void CollationTest::TestTailoredElements() {
863 IcuTestErrorCode
errorCode(*this, "TestTailoredElements");
864 const CollationData
*root
= CollationRoot::getData(errorCode
);
865 if(errorCode
.errDataIfFailureAndReset("CollationRoot::getData()")) {
868 CollationRootElements
rootElements(root
->rootElements
, root
->rootElementsLength
);
870 UHashtable
*prevLocales
= uhash_open(uhash_hashChars
, uhash_compareChars
, NULL
, errorCode
);
871 if(errorCode
.errIfFailureAndReset("failed to create a hash table")) {
874 uhash_setKeyDeleter(prevLocales
, uprv_free
);
875 // TestRootElements() tests the root collator which does not have tailorings.
876 uhash_puti(prevLocales
, uprv_strdup(""), 1, errorCode
);
877 uhash_puti(prevLocales
, uprv_strdup("root"), 1, errorCode
);
878 uhash_puti(prevLocales
, uprv_strdup("root@collation=standard"), 1, errorCode
);
880 UVector64
ces(errorCode
);
881 LocalPointer
<StringEnumeration
> locales(Collator::getAvailableLocales());
882 U_ASSERT(locales
.isValid());
883 const char *localeID
= "root";
885 Locale
locale(localeID
);
886 LocalPointer
<StringEnumeration
> types(
887 Collator::getKeywordValuesForLocale("collation", locale
, FALSE
, errorCode
));
888 errorCode
.assertSuccess();
889 const char *type
; // first: default type
890 while((type
= types
->next(NULL
, errorCode
)) != NULL
) {
891 if(strncmp(type
, "private-", 8) == 0) {
892 errln("Collator::getKeywordValuesForLocale(%s) returns private collation keyword: %s",
895 Locale
localeWithType(locale
);
896 localeWithType
.setKeywordValue("collation", type
, errorCode
);
897 errorCode
.assertSuccess();
898 LocalPointer
<Collator
> coll(Collator::createInstance(localeWithType
, errorCode
));
899 if(errorCode
.errIfFailureAndReset("Collator::createInstance(%s)",
900 localeWithType
.getName())) {
903 Locale actual
= coll
->getLocale(ULOC_ACTUAL_LOCALE
, errorCode
);
904 if(uhash_geti(prevLocales
, actual
.getName()) != 0) {
907 uhash_puti(prevLocales
, uprv_strdup(actual
.getName()), 1, errorCode
);
908 errorCode
.assertSuccess();
909 logln("TestTailoredElements(): requested %s -> actual %s",
910 localeWithType
.getName(), actual
.getName());
911 RuleBasedCollator
*rbc
= dynamic_cast<RuleBasedCollator
*>(coll
.getAlias());
915 // Note: It would be better to get tailored strings such that we can
916 // identify the prefix, and only get the CEs for the prefix+string,
917 // not also for the prefix.
918 // There is currently no API for that.
919 // It would help in an unusual case where a contraction starting in the prefix
920 // extends past its end, and we do not see the intended mapping.
921 // For example, for a mapping p|st, if there is also a contraction ps,
922 // then we get CEs(ps)+CEs(t), rather than CEs(p|st).
923 LocalPointer
<UnicodeSet
> tailored(coll
->getTailoredSet(errorCode
));
924 errorCode
.assertSuccess();
925 UnicodeSetIterator
iter(*tailored
);
927 const UnicodeString
&s
= iter
.getString();
928 ces
.removeAllElements();
929 rbc
->internalGetCEs(s
, ces
, errorCode
);
930 errorCode
.assertSuccess();
931 for(int32_t i
= 0; i
< ces
.size(); ++i
) {
932 int64_t ce
= ces
.elementAti(i
);
933 if(!isValidCE(rootElements
, *root
, ce
)) {
934 errln("invalid tailored CE %016llx at CE index %d from string:",
935 (long long)ce
, (int)i
);
941 } while((localeID
= locales
->next(NULL
, errorCode
)) != NULL
);
942 uhash_close(prevLocales
);
945 UnicodeString
CollationTest::printSortKey(const uint8_t *p
, int32_t length
) {
947 for(int32_t i
= 0; i
< length
; ++i
) {
948 if(i
> 0) { s
.append((UChar
)0x20); }
951 s
.append((UChar
)0x2e); // period
953 s
.append((UChar
)0x7c); // vertical bar
961 UnicodeString
CollationTest::printCollationKey(const CollationKey
&key
) {
963 const uint8_t *p
= key
.getByteArray(length
);
964 return printSortKey(p
, length
);
967 UBool
CollationTest::readNonEmptyLine(UCHARBUF
*f
, IcuTestErrorCode
&errorCode
) {
970 const UChar
*line
= ucbuf_readline(f
, &lineLength
, errorCode
);
971 if(line
== NULL
|| errorCode
.isFailure()) {
976 // Strip trailing CR/LF, comments, and spaces.
977 const UChar
*comment
= u_memchr(line
, 0x23, lineLength
); // '#'
978 if(comment
!= NULL
) {
979 lineLength
= (int32_t)(comment
- line
);
981 while(lineLength
> 0 && isCROrLF(line
[lineLength
- 1])) { --lineLength
; }
983 while(lineLength
> 0 && isSpace(line
[lineLength
- 1])) { --lineLength
; }
984 if(lineLength
!= 0) {
985 fileLine
.setTo(FALSE
, line
, lineLength
);
988 // Empty line, continue.
992 void CollationTest::parseString(int32_t &start
, UnicodeString
&prefix
, UnicodeString
&s
,
993 UErrorCode
&errorCode
) {
994 int32_t length
= fileLine
.length();
996 for(i
= start
; i
< length
&& !isSpace(fileLine
[i
]); ++i
) {}
997 int32_t pipeIndex
= fileLine
.indexOf((UChar
)0x7c, start
, i
- start
); // '|'
999 prefix
= fileLine
.tempSubStringBetween(start
, pipeIndex
).unescape();
1000 if(prefix
.isEmpty()) {
1001 errln("empty prefix on line %d", (int)fileLineNumber
);
1003 errorCode
= U_PARSE_ERROR
;
1006 start
= pipeIndex
+ 1;
1010 s
= fileLine
.tempSubStringBetween(start
, i
).unescape();
1012 errln("empty string on line %d", (int)fileLineNumber
);
1014 errorCode
= U_PARSE_ERROR
;
1020 Collation::Level
CollationTest::parseRelationAndString(UnicodeString
&s
, IcuTestErrorCode
&errorCode
) {
1021 Collation::Level relation
;
1023 if(fileLine
[0] == 0x3c) { // <
1024 UChar second
= fileLine
[1];
1028 relation
= Collation::PRIMARY_LEVEL
;
1031 relation
= Collation::SECONDARY_LEVEL
;
1034 relation
= Collation::TERTIARY_LEVEL
;
1037 relation
= Collation::QUATERNARY_LEVEL
;
1040 relation
= Collation::CASE_LEVEL
;
1043 relation
= Collation::IDENTICAL_LEVEL
;
1046 relation
= Collation::NO_LEVEL
;
1050 } else if(fileLine
[0] == 0x3d) { // =
1051 relation
= Collation::ZERO_LEVEL
;
1056 if(start
== 0 || !isSpace(fileLine
[start
])) {
1057 errln("no relation (= < <1 <2 <c <3 <4 <i) at beginning of line %d", (int)fileLineNumber
);
1059 errorCode
.set(U_PARSE_ERROR
);
1060 return Collation::NO_LEVEL
;
1062 start
= skipSpaces(start
);
1063 UnicodeString prefix
;
1064 parseString(start
, prefix
, s
, errorCode
);
1065 if(errorCode
.isSuccess() && !prefix
.isEmpty()) {
1066 errln("prefix string not allowed for test string: on line %d", (int)fileLineNumber
);
1068 errorCode
.set(U_PARSE_ERROR
);
1069 return Collation::NO_LEVEL
;
1071 if(start
< fileLine
.length()) {
1072 errln("unexpected line contents after test string on line %d", (int)fileLineNumber
);
1074 errorCode
.set(U_PARSE_ERROR
);
1075 return Collation::NO_LEVEL
;
1080 static const struct {
1084 { "backwards", UCOL_FRENCH_COLLATION
},
1085 { "alternate", UCOL_ALTERNATE_HANDLING
},
1086 { "caseFirst", UCOL_CASE_FIRST
},
1087 { "caseLevel", UCOL_CASE_LEVEL
},
1088 // UCOL_NORMALIZATION_MODE is turned on and off automatically.
1089 { "strength", UCOL_STRENGTH
},
1090 // UCOL_HIRAGANA_QUATERNARY_MODE is deprecated.
1091 { "numeric", UCOL_NUMERIC_COLLATION
}
1094 static const struct {
1096 UColAttributeValue value
;
1097 } attributeValues
[] = {
1098 { "default", UCOL_DEFAULT
},
1099 { "primary", UCOL_PRIMARY
},
1100 { "secondary", UCOL_SECONDARY
},
1101 { "tertiary", UCOL_TERTIARY
},
1102 { "quaternary", UCOL_QUATERNARY
},
1103 { "identical", UCOL_IDENTICAL
},
1104 { "off", UCOL_OFF
},
1106 { "shifted", UCOL_SHIFTED
},
1107 { "non-ignorable", UCOL_NON_IGNORABLE
},
1108 { "lower", UCOL_LOWER_FIRST
},
1109 { "upper", UCOL_UPPER_FIRST
}
1112 void CollationTest::parseAndSetAttribute(IcuTestErrorCode
&errorCode
) {
1113 // Parse attributes even if the Collator could not be created,
1114 // in order to report syntax errors.
1115 int32_t start
= skipSpaces(1);
1116 int32_t equalPos
= fileLine
.indexOf((UChar
)0x3d);
1118 if(fileLine
.compare(start
, 7, UNICODE_STRING("reorder", 7)) == 0) {
1119 parseAndSetReorderCodes(start
+ 7, errorCode
);
1122 errln("missing '=' on line %d", (int)fileLineNumber
);
1124 errorCode
.set(U_PARSE_ERROR
);
1128 UnicodeString attrString
= fileLine
.tempSubStringBetween(start
, equalPos
);
1129 UnicodeString valueString
= fileLine
.tempSubString(equalPos
+1);
1130 if(attrString
== UNICODE_STRING("maxVariable", 11)) {
1131 UColReorderCode max
;
1132 if(valueString
== UNICODE_STRING("space", 5)) {
1133 max
= UCOL_REORDER_CODE_SPACE
;
1134 } else if(valueString
== UNICODE_STRING("punct", 5)) {
1135 max
= UCOL_REORDER_CODE_PUNCTUATION
;
1136 } else if(valueString
== UNICODE_STRING("symbol", 6)) {
1137 max
= UCOL_REORDER_CODE_SYMBOL
;
1138 } else if(valueString
== UNICODE_STRING("currency", 8)) {
1139 max
= UCOL_REORDER_CODE_CURRENCY
;
1141 errln("invalid attribute value name on line %d", (int)fileLineNumber
);
1143 errorCode
.set(U_PARSE_ERROR
);
1147 coll
->setMaxVariable(max
, errorCode
);
1148 if(errorCode
.isFailure()) {
1149 errln("setMaxVariable() failed on line %d: %s",
1150 (int)fileLineNumber
, errorCode
.errorName());
1160 for(int32_t i
= 0;; ++i
) {
1161 if(i
== UPRV_LENGTHOF(attributes
)) {
1162 errln("invalid attribute name on line %d", (int)fileLineNumber
);
1164 errorCode
.set(U_PARSE_ERROR
);
1167 if(attrString
== UnicodeString(attributes
[i
].name
, -1, US_INV
)) {
1168 attr
= attributes
[i
].attr
;
1173 UColAttributeValue value
;
1174 for(int32_t i
= 0;; ++i
) {
1175 if(i
== UPRV_LENGTHOF(attributeValues
)) {
1176 errln("invalid attribute value name on line %d", (int)fileLineNumber
);
1178 errorCode
.set(U_PARSE_ERROR
);
1181 if(valueString
== UnicodeString(attributeValues
[i
].name
, -1, US_INV
)) {
1182 value
= attributeValues
[i
].value
;
1188 coll
->setAttribute(attr
, value
, errorCode
);
1189 if(errorCode
.isFailure()) {
1190 errln("illegal attribute=value combination on line %d: %s",
1191 (int)fileLineNumber
, errorCode
.errorName());
1199 void CollationTest::parseAndSetReorderCodes(int32_t start
, IcuTestErrorCode
&errorCode
) {
1200 UVector32
reorderCodes(errorCode
);
1201 while(start
< fileLine
.length()) {
1202 start
= skipSpaces(start
);
1203 int32_t limit
= start
;
1204 while(limit
< fileLine
.length() && !isSpace(fileLine
[limit
])) { ++limit
; }
1206 name
.appendInvariantChars(fileLine
.tempSubStringBetween(start
, limit
), errorCode
);
1207 int32_t code
= CollationRuleParser::getReorderCode(name
.data());
1209 if(uprv_stricmp(name
.data(), "default") == 0) {
1210 code
= UCOL_REORDER_CODE_DEFAULT
; // -1
1212 errln("invalid reorder code '%s' on line %d", name
.data(), (int)fileLineNumber
);
1214 errorCode
.set(U_PARSE_ERROR
);
1218 reorderCodes
.addElement(code
, errorCode
);
1222 coll
->setReorderCodes(reorderCodes
.getBuffer(), reorderCodes
.size(), errorCode
);
1223 if(errorCode
.isFailure()) {
1224 errln("setReorderCodes() failed on line %d: %s",
1225 (int)fileLineNumber
, errorCode
.errorName());
1233 void CollationTest::buildTailoring(UCHARBUF
*f
, IcuTestErrorCode
&errorCode
) {
1234 UnicodeString rules
;
1235 while(readNonEmptyLine(f
, errorCode
) && !isSectionStarter(fileLine
[0])) {
1236 rules
.append(fileLine
.unescape());
1238 if(errorCode
.isFailure()) { return; }
1241 UParseError parseError
;
1242 UnicodeString reason
;
1244 coll
= new RuleBasedCollator(rules
, parseError
, reason
, errorCode
);
1246 errln("unable to allocate a new collator");
1247 errorCode
.set(U_MEMORY_ALLOCATION_ERROR
);
1250 if(errorCode
.isFailure()) {
1251 dataerrln("RuleBasedCollator(rules) failed - %s", errorCode
.errorName());
1252 infoln(UnicodeString(" reason: ") + reason
);
1253 if(parseError
.offset
>= 0) { infoln(" rules offset: %d", (int)parseError
.offset
); }
1254 if(parseError
.preContext
[0] != 0 || parseError
.postContext
[0] != 0) {
1255 infoln(UnicodeString(" snippet: ...") +
1256 parseError
.preContext
+ "(!)" + parseError
.postContext
+ "...");
1262 assertEquals("no error reason when RuleBasedCollator(rules) succeeds",
1263 UnicodeString(), reason
);
1267 void CollationTest::setRootCollator(IcuTestErrorCode
&errorCode
) {
1268 if(errorCode
.isFailure()) { return; }
1270 coll
= Collator::createInstance(Locale::getRoot(), errorCode
);
1271 if(errorCode
.isFailure()) {
1272 dataerrln("unable to create a root collator");
1277 void CollationTest::setLocaleCollator(IcuTestErrorCode
&errorCode
) {
1278 if(errorCode
.isFailure()) { return; }
1281 int32_t at
= fileLine
.indexOf((UChar
)0x40, 9); // @ is not invariant
1283 fileLine
.setCharAt(at
, (UChar
)0x2a); // *
1285 CharString localeID
;
1286 localeID
.appendInvariantChars(fileLine
.tempSubString(9), errorCode
);
1288 localeID
.data()[at
- 9] = '@';
1290 Locale
locale(localeID
.data());
1291 if(fileLine
.length() == 9 || errorCode
.isFailure() || locale
.isBogus()) {
1292 errln("invalid language tag on line %d", (int)fileLineNumber
);
1294 if(errorCode
.isSuccess()) { errorCode
.set(U_PARSE_ERROR
); }
1298 logln("creating a collator for locale ID %s", locale
.getName());
1299 coll
= Collator::createInstance(locale
, errorCode
);
1300 if(errorCode
.isFailure()) {
1301 dataerrln("unable to create a collator for locale %s on line %d",
1302 locale
.getName(), (int)fileLineNumber
);
1310 UBool
CollationTest::needsNormalization(const UnicodeString
&s
, UErrorCode
&errorCode
) const {
1311 if(U_FAILURE(errorCode
) || !fcd
->isNormalized(s
, errorCode
)) { return TRUE
; }
1312 // In some sequences with Tibetan composite vowel signs,
1313 // even if the string passes the FCD check,
1314 // those composites must be decomposed.
1315 // Check if s contains 0F71 immediately followed by 0F73 or 0F75 or 0F81.
1317 while((index
= s
.indexOf((UChar
)0xf71, index
)) >= 0) {
1318 if(++index
< s
.length()) {
1320 if(c
== 0xf73 || c
== 0xf75 || c
== 0xf81) { return TRUE
; }
1326 UBool
CollationTest::getSortKeyParts(const UChar
*s
, int32_t length
,
1327 CharString
&dest
, int32_t partSize
,
1328 IcuTestErrorCode
&errorCode
) {
1329 if(errorCode
.isFailure()) { return FALSE
; }
1331 U_ASSERT(partSize
<= UPRV_LENGTHOF(part
));
1333 uiter_setString(&iter
, s
, length
);
1334 uint32_t state
[2] = { 0, 0 };
1336 int32_t partLength
= coll
->internalNextSortKeyPart(&iter
, state
, part
, partSize
, errorCode
);
1337 UBool done
= partLength
< partSize
;
1339 // At the end, append the next byte as well which should be 00.
1342 dest
.append(reinterpret_cast<char *>(part
), partLength
, errorCode
);
1344 return errorCode
.isSuccess();
1349 UBool
CollationTest::getCollationKey(const char *norm
, const UnicodeString
&line
,
1350 const UChar
*s
, int32_t length
,
1351 CollationKey
&key
, IcuTestErrorCode
&errorCode
) {
1352 if(errorCode
.isFailure()) { return FALSE
; }
1353 coll
->getCollationKey(s
, length
, key
, errorCode
);
1354 if(errorCode
.isFailure()) {
1355 infoln(fileTestName
);
1356 errln("Collator(%s).getCollationKey() failed: %s",
1357 norm
, errorCode
.errorName());
1362 const uint8_t *keyBytes
= key
.getByteArray(keyLength
);
1363 if(keyLength
== 0 || keyBytes
[keyLength
- 1] != 0) {
1364 infoln(fileTestName
);
1365 errln("Collator(%s).getCollationKey() wrote an empty or unterminated key",
1368 infoln(printCollationKey(key
));
1372 int32_t numLevels
= coll
->getAttribute(UCOL_STRENGTH
, errorCode
);
1373 if(numLevels
< UCOL_IDENTICAL
) {
1378 if(coll
->getAttribute(UCOL_CASE_LEVEL
, errorCode
) == UCOL_ON
) {
1381 errorCode
.assertSuccess();
1382 int32_t numLevelSeparators
= 0;
1383 for(int32_t i
= 0; i
< (keyLength
- 1); ++i
) {
1384 uint8_t b
= keyBytes
[i
];
1386 infoln(fileTestName
);
1387 errln("Collator(%s).getCollationKey() contains a 00 byte", norm
);
1389 infoln(printCollationKey(key
));
1392 if(b
== 1) { ++numLevelSeparators
; }
1394 if(numLevelSeparators
!= (numLevels
- 1)) {
1395 infoln(fileTestName
);
1396 errln("Collator(%s).getCollationKey() has %d level separators for %d levels",
1397 norm
, (int)numLevelSeparators
, (int)numLevels
);
1399 infoln(printCollationKey(key
));
1403 // Check that internalNextSortKeyPart() makes the same key, with several part sizes.
1404 static const int32_t partSizes
[] = { 32, 3, 1 };
1405 for(int32_t psi
= 0; psi
< UPRV_LENGTHOF(partSizes
); ++psi
) {
1406 int32_t partSize
= partSizes
[psi
];
1408 if(!getSortKeyParts(s
, length
, parts
, 32, errorCode
)) {
1409 infoln(fileTestName
);
1410 errln("Collator(%s).internalNextSortKeyPart(%d) failed: %s",
1411 norm
, (int)partSize
, errorCode
.errorName());
1415 if(keyLength
!= parts
.length() || uprv_memcmp(keyBytes
, parts
.data(), keyLength
) != 0) {
1416 infoln(fileTestName
);
1417 errln("Collator(%s).getCollationKey() != internalNextSortKeyPart(%d)",
1418 norm
, (int)partSize
);
1420 infoln(printCollationKey(key
));
1421 infoln(printSortKey(reinterpret_cast<uint8_t *>(parts
.data()), parts
.length()));
1429 * Changes the key to the merged segments of the U+FFFE-separated substrings of s.
1430 * Leaves key unchanged if s does not contain U+FFFE.
1431 * @return TRUE if the key was successfully changed
1433 UBool
CollationTest::getMergedCollationKey(const UChar
*s
, int32_t length
,
1434 CollationKey
&key
, IcuTestErrorCode
&errorCode
) {
1435 if(errorCode
.isFailure()) { return FALSE
; }
1436 LocalMemory
<uint8_t> mergedKey
;
1437 int32_t mergedKeyLength
= 0;
1438 int32_t mergedKeyCapacity
= 0;
1439 int32_t sLength
= (length
>= 0) ? length
: u_strlen(s
);
1440 int32_t segmentStart
= 0;
1441 for(int32_t i
= 0;;) {
1443 if(segmentStart
== 0) {
1444 // s does not contain any U+FFFE.
1447 } else if(s
[i
] != 0xfffe) {
1451 // Get the sort key for another segment and merge it into mergedKey.
1452 CollationKey
key1(mergedKey
.getAlias(), mergedKeyLength
); // copies the bytes
1454 coll
->getCollationKey(s
+ segmentStart
, i
- segmentStart
, key2
, errorCode
);
1455 int32_t key1Length
, key2Length
;
1456 const uint8_t *key1Bytes
= key1
.getByteArray(key1Length
);
1457 const uint8_t *key2Bytes
= key2
.getByteArray(key2Length
);
1459 int32_t minCapacity
= key1Length
+ key2Length
;
1460 if(key1Length
> 0) { --minCapacity
; }
1461 if(minCapacity
<= mergedKeyCapacity
) {
1462 dest
= mergedKey
.getAlias();
1464 if(minCapacity
<= 200) {
1465 mergedKeyCapacity
= 200;
1466 } else if(minCapacity
<= 2 * mergedKeyCapacity
) {
1467 mergedKeyCapacity
*= 2;
1469 mergedKeyCapacity
= minCapacity
;
1471 dest
= mergedKey
.allocateInsteadAndReset(mergedKeyCapacity
);
1473 U_ASSERT(dest
!= NULL
|| mergedKeyCapacity
== 0);
1474 if(key1Length
== 0) {
1475 // key2 is the sort key for the first segment.
1476 uprv_memcpy(dest
, key2Bytes
, key2Length
);
1477 mergedKeyLength
= key2Length
;
1480 ucol_mergeSortkeys(key1Bytes
, key1Length
, key2Bytes
, key2Length
,
1481 dest
, mergedKeyCapacity
);
1483 if(i
== sLength
) { break; }
1486 key
= CollationKey(mergedKey
.getAlias(), mergedKeyLength
);
1493 * Replaces unpaired surrogates with U+FFFD.
1494 * Returns s if no replacement was made, otherwise buffer.
1496 const UnicodeString
&surrogatesToFFFD(const UnicodeString
&s
, UnicodeString
&buffer
) {
1498 while(i
< s
.length()) {
1499 UChar32 c
= s
.char32At(i
);
1500 if(U_IS_SURROGATE(c
)) {
1501 if(buffer
.length() < i
) {
1502 buffer
.append(s
, buffer
.length(), i
- buffer
.length());
1504 buffer
.append((UChar
)0xfffd);
1508 if(buffer
.isEmpty()) {
1511 if(buffer
.length() < i
) {
1512 buffer
.append(s
, buffer
.length(), i
- buffer
.length());
1517 int32_t getDifferenceLevel(const CollationKey
&prevKey
, const CollationKey
&key
,
1518 UCollationResult order
, UBool collHasCaseLevel
) {
1519 if(order
== UCOL_EQUAL
) {
1520 return Collation::NO_LEVEL
;
1522 int32_t prevKeyLength
;
1523 const uint8_t *prevBytes
= prevKey
.getByteArray(prevKeyLength
);
1525 const uint8_t *bytes
= key
.getByteArray(keyLength
);
1526 int32_t level
= Collation::PRIMARY_LEVEL
;
1527 for(int32_t i
= 0;; ++i
) {
1528 uint8_t b
= prevBytes
[i
];
1529 if(b
!= bytes
[i
]) { break; }
1530 if(b
== Collation::LEVEL_SEPARATOR_BYTE
) {
1532 if(level
== Collation::CASE_LEVEL
&& !collHasCaseLevel
) {
1542 UBool
CollationTest::checkCompareTwo(const char *norm
, const UnicodeString
&prevFileLine
,
1543 const UnicodeString
&prevString
, const UnicodeString
&s
,
1544 UCollationResult expectedOrder
, Collation::Level expectedLevel
,
1545 IcuTestErrorCode
&errorCode
) {
1546 if(errorCode
.isFailure()) { return FALSE
; }
1548 // Get the sort keys first, for error debug output.
1549 CollationKey prevKey
;
1550 if(!getCollationKey(norm
, prevFileLine
, prevString
.getBuffer(), prevString
.length(),
1551 prevKey
, errorCode
)) {
1555 if(!getCollationKey(norm
, fileLine
, s
.getBuffer(), s
.length(), key
, errorCode
)) { return FALSE
; }
1557 UCollationResult order
= coll
->compare(prevString
, s
, errorCode
);
1558 if(order
!= expectedOrder
|| errorCode
.isFailure()) {
1559 infoln(fileTestName
);
1560 errln("line %d Collator(%s).compare(previous, current) wrong order: %d != %d (%s)",
1561 (int)fileLineNumber
, norm
, order
, expectedOrder
, errorCode
.errorName());
1562 infoln(prevFileLine
);
1564 infoln(printCollationKey(prevKey
));
1565 infoln(printCollationKey(key
));
1568 order
= coll
->compare(s
, prevString
, errorCode
);
1569 if(order
!= -expectedOrder
|| errorCode
.isFailure()) {
1570 infoln(fileTestName
);
1571 errln("line %d Collator(%s).compare(current, previous) wrong order: %d != %d (%s)",
1572 (int)fileLineNumber
, norm
, order
, -expectedOrder
, errorCode
.errorName());
1573 infoln(prevFileLine
);
1575 infoln(printCollationKey(prevKey
));
1576 infoln(printCollationKey(key
));
1579 // Test NUL-termination if the strings do not contain NUL characters.
1580 UBool containNUL
= prevString
.indexOf((UChar
)0) >= 0 || s
.indexOf((UChar
)0) >= 0;
1582 order
= coll
->compare(prevString
.getBuffer(), -1, s
.getBuffer(), -1, errorCode
);
1583 if(order
!= expectedOrder
|| errorCode
.isFailure()) {
1584 infoln(fileTestName
);
1585 errln("line %d Collator(%s).compare(previous-NUL, current-NUL) wrong order: %d != %d (%s)",
1586 (int)fileLineNumber
, norm
, order
, expectedOrder
, errorCode
.errorName());
1587 infoln(prevFileLine
);
1589 infoln(printCollationKey(prevKey
));
1590 infoln(printCollationKey(key
));
1593 order
= coll
->compare(s
.getBuffer(), -1, prevString
.getBuffer(), -1, errorCode
);
1594 if(order
!= -expectedOrder
|| errorCode
.isFailure()) {
1595 infoln(fileTestName
);
1596 errln("line %d Collator(%s).compare(current-NUL, previous-NUL) wrong order: %d != %d (%s)",
1597 (int)fileLineNumber
, norm
, order
, -expectedOrder
, errorCode
.errorName());
1598 infoln(prevFileLine
);
1600 infoln(printCollationKey(prevKey
));
1601 infoln(printCollationKey(key
));
1606 // compare(UTF-16) treats unpaired surrogates like unassigned code points.
1607 // Unpaired surrogates cannot be converted to UTF-8.
1608 // Create valid UTF-16 strings if necessary, and use those for
1609 // both the expected compare() result and for the input to compare(UTF-8).
1610 UnicodeString prevBuffer
, sBuffer
;
1611 const UnicodeString
&prevValid
= surrogatesToFFFD(prevString
, prevBuffer
);
1612 const UnicodeString
&sValid
= surrogatesToFFFD(s
, sBuffer
);
1613 std::string prevUTF8
, sUTF8
;
1614 UnicodeString(prevValid
).toUTF8String(prevUTF8
);
1615 UnicodeString(sValid
).toUTF8String(sUTF8
);
1616 UCollationResult expectedUTF8Order
;
1617 if(&prevValid
== &prevString
&& &sValid
== &s
) {
1618 expectedUTF8Order
= expectedOrder
;
1620 expectedUTF8Order
= coll
->compare(prevValid
, sValid
, errorCode
);
1623 order
= coll
->compareUTF8(prevUTF8
, sUTF8
, errorCode
);
1624 if(order
!= expectedUTF8Order
|| errorCode
.isFailure()) {
1625 infoln(fileTestName
);
1626 errln("line %d Collator(%s).compareUTF8(previous, current) wrong order: %d != %d (%s)",
1627 (int)fileLineNumber
, norm
, order
, expectedUTF8Order
, errorCode
.errorName());
1628 infoln(prevFileLine
);
1630 infoln(printCollationKey(prevKey
));
1631 infoln(printCollationKey(key
));
1634 order
= coll
->compareUTF8(sUTF8
, prevUTF8
, errorCode
);
1635 if(order
!= -expectedUTF8Order
|| errorCode
.isFailure()) {
1636 infoln(fileTestName
);
1637 errln("line %d Collator(%s).compareUTF8(current, previous) wrong order: %d != %d (%s)",
1638 (int)fileLineNumber
, norm
, order
, -expectedUTF8Order
, errorCode
.errorName());
1639 infoln(prevFileLine
);
1641 infoln(printCollationKey(prevKey
));
1642 infoln(printCollationKey(key
));
1645 // Test NUL-termination if the strings do not contain NUL characters.
1647 order
= coll
->internalCompareUTF8(prevUTF8
.c_str(), -1, sUTF8
.c_str(), -1, errorCode
);
1648 if(order
!= expectedUTF8Order
|| errorCode
.isFailure()) {
1649 infoln(fileTestName
);
1650 errln("line %d Collator(%s).internalCompareUTF8(previous-NUL, current-NUL) wrong order: %d != %d (%s)",
1651 (int)fileLineNumber
, norm
, order
, expectedUTF8Order
, errorCode
.errorName());
1652 infoln(prevFileLine
);
1654 infoln(printCollationKey(prevKey
));
1655 infoln(printCollationKey(key
));
1658 order
= coll
->internalCompareUTF8(sUTF8
.c_str(), -1, prevUTF8
.c_str(), -1, errorCode
);
1659 if(order
!= -expectedUTF8Order
|| errorCode
.isFailure()) {
1660 infoln(fileTestName
);
1661 errln("line %d Collator(%s).internalCompareUTF8(current-NUL, previous-NUL) wrong order: %d != %d (%s)",
1662 (int)fileLineNumber
, norm
, order
, -expectedUTF8Order
, errorCode
.errorName());
1663 infoln(prevFileLine
);
1665 infoln(printCollationKey(prevKey
));
1666 infoln(printCollationKey(key
));
1671 UCharIterator leftIter
;
1672 UCharIterator rightIter
;
1673 uiter_setString(&leftIter
, prevString
.getBuffer(), prevString
.length());
1674 uiter_setString(&rightIter
, s
.getBuffer(), s
.length());
1675 order
= coll
->compare(leftIter
, rightIter
, errorCode
);
1676 if(order
!= expectedOrder
|| errorCode
.isFailure()) {
1677 infoln(fileTestName
);
1678 errln("line %d Collator(%s).compare(UCharIterator: previous, current) "
1679 "wrong order: %d != %d (%s)",
1680 (int)fileLineNumber
, norm
, order
, expectedOrder
, errorCode
.errorName());
1681 infoln(prevFileLine
);
1683 infoln(printCollationKey(prevKey
));
1684 infoln(printCollationKey(key
));
1688 order
= prevKey
.compareTo(key
, errorCode
);
1689 if(order
!= expectedOrder
|| errorCode
.isFailure()) {
1690 infoln(fileTestName
);
1691 errln("line %d Collator(%s).getCollationKey(previous, current).compareTo() wrong order: %d != %d (%s)",
1692 (int)fileLineNumber
, norm
, order
, expectedOrder
, errorCode
.errorName());
1693 infoln(prevFileLine
);
1695 infoln(printCollationKey(prevKey
));
1696 infoln(printCollationKey(key
));
1699 UBool collHasCaseLevel
= coll
->getAttribute(UCOL_CASE_LEVEL
, errorCode
) == UCOL_ON
;
1700 int32_t level
= getDifferenceLevel(prevKey
, key
, order
, collHasCaseLevel
);
1701 if(order
!= UCOL_EQUAL
&& expectedLevel
!= Collation::NO_LEVEL
) {
1702 if(level
!= expectedLevel
) {
1703 infoln(fileTestName
);
1704 errln("line %d Collator(%s).getCollationKey(previous, current).compareTo()=%d wrong level: %d != %d",
1705 (int)fileLineNumber
, norm
, order
, level
, expectedLevel
);
1706 infoln(prevFileLine
);
1708 infoln(printCollationKey(prevKey
));
1709 infoln(printCollationKey(key
));
1714 // If either string contains U+FFFE, then their sort keys must compare the same as
1715 // the merged sort keys of each string's between-FFFE segments.
1717 // It is not required that
1718 // sortkey(str1 + "\uFFFE" + str2) == mergeSortkeys(sortkey(str1), sortkey(str2))
1719 // only that those two methods yield the same order.
1721 // Use bit-wise OR so that getMergedCollationKey() is always called for both strings.
1722 if((getMergedCollationKey(prevString
.getBuffer(), prevString
.length(), prevKey
, errorCode
) |
1723 getMergedCollationKey(s
.getBuffer(), s
.length(), key
, errorCode
)) ||
1724 errorCode
.isFailure()) {
1725 order
= prevKey
.compareTo(key
, errorCode
);
1726 if(order
!= expectedOrder
|| errorCode
.isFailure()) {
1727 infoln(fileTestName
);
1728 errln("line %d ucol_mergeSortkeys(Collator(%s).getCollationKey"
1729 "(previous, current segments between U+FFFE)).compareTo() wrong order: %d != %d (%s)",
1730 (int)fileLineNumber
, norm
, order
, expectedOrder
, errorCode
.errorName());
1731 infoln(prevFileLine
);
1733 infoln(printCollationKey(prevKey
));
1734 infoln(printCollationKey(key
));
1737 int32_t mergedLevel
= getDifferenceLevel(prevKey
, key
, order
, collHasCaseLevel
);
1738 if(order
!= UCOL_EQUAL
&& expectedLevel
!= Collation::NO_LEVEL
) {
1739 if(mergedLevel
!= level
) {
1740 infoln(fileTestName
);
1741 errln("line %d ucol_mergeSortkeys(Collator(%s).getCollationKey"
1742 "(previous, current segments between U+FFFE)).compareTo()=%d wrong level: %d != %d",
1743 (int)fileLineNumber
, norm
, order
, mergedLevel
, level
);
1744 infoln(prevFileLine
);
1746 infoln(printCollationKey(prevKey
));
1747 infoln(printCollationKey(key
));
1755 void CollationTest::checkCompareStrings(UCHARBUF
*f
, IcuTestErrorCode
&errorCode
) {
1756 if(errorCode
.isFailure()) { return; }
1757 UnicodeString prevFileLine
= UNICODE_STRING("(none)", 6);
1758 UnicodeString prevString
, s
;
1759 prevString
.getTerminatedBuffer(); // Ensure NUL-termination.
1760 while(readNonEmptyLine(f
, errorCode
) && !isSectionStarter(fileLine
[0])) {
1761 // Parse the line even if it will be ignored (when we do not have a Collator)
1762 // in order to report syntax issues.
1763 Collation::Level relation
= parseRelationAndString(s
, errorCode
);
1764 if(errorCode
.isFailure()) {
1769 // We were unable to create the Collator but continue with tests.
1770 // Ignore test data for this Collator.
1771 // The next Collator creation might work.
1774 UCollationResult expectedOrder
= (relation
== Collation::ZERO_LEVEL
) ? UCOL_EQUAL
: UCOL_LESS
;
1775 Collation::Level expectedLevel
= relation
;
1776 s
.getTerminatedBuffer(); // Ensure NUL-termination.
1778 if(!needsNormalization(prevString
, errorCode
) && !needsNormalization(s
, errorCode
)) {
1779 coll
->setAttribute(UCOL_NORMALIZATION_MODE
, UCOL_OFF
, errorCode
);
1780 isOk
= checkCompareTwo("normalization=on", prevFileLine
, prevString
, s
,
1781 expectedOrder
, expectedLevel
, errorCode
);
1784 coll
->setAttribute(UCOL_NORMALIZATION_MODE
, UCOL_ON
, errorCode
);
1785 isOk
= checkCompareTwo("normalization=off", prevFileLine
, prevString
, s
,
1786 expectedOrder
, expectedLevel
, errorCode
);
1788 if(isOk
&& (!nfd
->isNormalized(prevString
, errorCode
) || !nfd
->isNormalized(s
, errorCode
))) {
1789 UnicodeString pn
= nfd
->normalize(prevString
, errorCode
);
1790 UnicodeString n
= nfd
->normalize(s
, errorCode
);
1791 pn
.getTerminatedBuffer();
1792 n
.getTerminatedBuffer();
1793 errorCode
.assertSuccess();
1794 isOk
= checkCompareTwo("NFD input", prevFileLine
, pn
, n
,
1795 expectedOrder
, expectedLevel
, errorCode
);
1798 errorCode
.reset(); // already reported
1800 prevFileLine
= fileLine
;
1802 prevString
.getTerminatedBuffer(); // Ensure NUL-termination.
1806 void CollationTest::TestDataDriven() {
1807 IcuTestErrorCode
errorCode(*this, "TestDataDriven");
1809 fcd
= Normalizer2Factory::getFCDInstance(errorCode
);
1810 nfd
= Normalizer2::getNFDInstance(errorCode
);
1811 if(errorCode
.errDataIfFailureAndReset("Normalizer2Factory::getFCDInstance() or getNFDInstance()")) {
1815 CharString
path(getSourceTestData(errorCode
), errorCode
);
1816 path
.appendPathPart("collationtest.txt", errorCode
);
1817 const char *codePage
= "UTF-8";
1818 LocalUCHARBUFPointer
f(ucbuf_open(path
.data(), &codePage
, TRUE
, FALSE
, errorCode
));
1819 if(errorCode
.errIfFailureAndReset("ucbuf_open(collationtest.txt)")) {
1822 // Read a new line if necessary.
1823 // Sub-parsers leave the first line set that they do not handle.
1824 while(errorCode
.isSuccess() && (!fileLine
.isEmpty() || readNonEmptyLine(f
.getAlias(), errorCode
))) {
1825 if(!isSectionStarter(fileLine
[0])) {
1826 errln("syntax error on line %d", (int)fileLineNumber
);
1830 if(fileLine
.startsWith(UNICODE_STRING("** test: ", 9))) {
1831 fileTestName
= fileLine
;
1834 } else if(fileLine
== UNICODE_STRING("@ root", 6)) {
1835 setRootCollator(errorCode
);
1837 } else if(fileLine
.startsWith(UNICODE_STRING("@ locale ", 9))) {
1838 setLocaleCollator(errorCode
);
1840 } else if(fileLine
== UNICODE_STRING("@ rules", 7)) {
1841 buildTailoring(f
.getAlias(), errorCode
);
1842 } else if(fileLine
[0] == 0x25 && isSpace(fileLine
[1])) { // %
1843 parseAndSetAttribute(errorCode
);
1844 } else if(fileLine
== UNICODE_STRING("* compare", 9)) {
1845 checkCompareStrings(f
.getAlias(), errorCode
);
1847 errln("syntax error on line %d", (int)fileLineNumber
);
1854 #endif // !UCONFIG_NO_COLLATION