1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
5 * Copyright (C) 2012-2015, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
10 * created on: 2012apr27
11 * created by: Markus W. Scherer
14 #include "unicode/utypes.h"
16 #if !UCONFIG_NO_COLLATION
18 #include "unicode/coll.h"
19 #include "unicode/errorcode.h"
20 #include "unicode/localpointer.h"
21 #include "unicode/normalizer2.h"
22 #include "unicode/sortkey.h"
23 #include "unicode/std_string.h"
24 #include "unicode/strenum.h"
25 #include "unicode/tblcoll.h"
26 #include "unicode/uiter.h"
27 #include "unicode/uniset.h"
28 #include "unicode/unistr.h"
29 #include "unicode/usetiter.h"
30 #include "unicode/ustring.h"
33 #include "collation.h"
34 #include "collationdata.h"
35 #include "collationfcd.h"
36 #include "collationiterator.h"
37 #include "collationroot.h"
38 #include "collationrootelements.h"
39 #include "collationruleparser.h"
40 #include "collationweights.h"
43 #include "normalizer2impl.h"
46 #include "uitercollationiterator.h"
47 #include "utf16collationiterator.h"
48 #include "utf8collationiterator.h"
53 class CodePointIterator
;
55 // TODO: try to share code with IntlTestCollator; for example, prettify(CollationKey)
57 class CollationTest
: public IntlTest
{
60 : fcd(NULL
), nfd(NULL
),
68 void runIndexedTest(int32_t index
, UBool exec
, const char *&name
, char *par
=NULL
);
72 void TestNulTerminated();
73 void TestIllegalUTF8();
74 void TestShortFCDData();
76 void TestCollationWeights();
77 void TestRootElements();
78 void TestTailoredElements();
79 void TestDataDriven();
82 void checkFCD(const char *name
, CollationIterator
&ci
, CodePointIterator
&cpi
);
83 void checkAllocWeights(CollationWeights
&cw
,
84 uint32_t lowerLimit
, uint32_t upperLimit
, int32_t n
,
85 int32_t someLength
, int32_t minCount
);
87 static UnicodeString
printSortKey(const uint8_t *p
, int32_t length
);
88 static UnicodeString
printCollationKey(const CollationKey
&key
);
90 // Helpers & fields for data-driven test.
91 static UBool
isCROrLF(UChar c
) { return c
== 0xa || c
== 0xd; }
92 static UBool
isSpace(UChar c
) { return c
== 9 || c
== 0x20 || c
== 0x3000; }
93 static UBool
isSectionStarter(UChar c
) { return c
== 0x25 || c
== 0x2a || c
== 0x40; } // %*@
94 int32_t skipSpaces(int32_t i
) {
95 while(isSpace(fileLine
[i
])) { ++i
; }
99 UBool
readNonEmptyLine(UCHARBUF
*f
, IcuTestErrorCode
&errorCode
);
100 void parseString(int32_t &start
, UnicodeString
&prefix
, UnicodeString
&s
, UErrorCode
&errorCode
);
101 Collation::Level
parseRelationAndString(UnicodeString
&s
, IcuTestErrorCode
&errorCode
);
102 void parseAndSetAttribute(IcuTestErrorCode
&errorCode
);
103 void parseAndSetReorderCodes(int32_t start
, IcuTestErrorCode
&errorCode
);
104 void buildTailoring(UCHARBUF
*f
, IcuTestErrorCode
&errorCode
);
105 void setRootCollator(IcuTestErrorCode
&errorCode
);
106 void setLocaleCollator(IcuTestErrorCode
&errorCode
);
108 UBool
needsNormalization(const UnicodeString
&s
, UErrorCode
&errorCode
) const;
110 UBool
getSortKeyParts(const UChar
*s
, int32_t length
,
111 CharString
&dest
, int32_t partSize
,
112 IcuTestErrorCode
&errorCode
);
113 UBool
getCollationKey(const char *norm
, const UnicodeString
&line
,
114 const UChar
*s
, int32_t length
,
115 CollationKey
&key
, IcuTestErrorCode
&errorCode
);
116 UBool
getMergedCollationKey(const UChar
*s
, int32_t length
,
117 CollationKey
&key
, IcuTestErrorCode
&errorCode
);
118 UBool
checkCompareTwo(const char *norm
, const UnicodeString
&prevFileLine
,
119 const UnicodeString
&prevString
, const UnicodeString
&s
,
120 UCollationResult expectedOrder
, Collation::Level expectedLevel
,
121 IcuTestErrorCode
&errorCode
);
122 void checkCompareStrings(UCHARBUF
*f
, IcuTestErrorCode
&errorCode
);
124 const Normalizer2
*fcd
, *nfd
;
125 UnicodeString fileLine
;
126 int32_t fileLineNumber
;
127 UnicodeString fileTestName
;
131 extern IntlTest
*createCollationTest() {
132 return new CollationTest();
135 void CollationTest::runIndexedTest(int32_t index
, UBool exec
, const char *&name
, char * /*par*/) {
137 logln("TestSuite CollationTest: ");
140 TESTCASE_AUTO(TestMinMax
);
141 TESTCASE_AUTO(TestImplicits
);
142 TESTCASE_AUTO(TestNulTerminated
);
143 TESTCASE_AUTO(TestIllegalUTF8
);
144 TESTCASE_AUTO(TestShortFCDData
);
145 TESTCASE_AUTO(TestFCD
);
146 TESTCASE_AUTO(TestCollationWeights
);
147 TESTCASE_AUTO(TestRootElements
);
148 TESTCASE_AUTO(TestTailoredElements
);
149 TESTCASE_AUTO(TestDataDriven
);
153 void CollationTest::TestMinMax() {
154 IcuTestErrorCode
errorCode(*this, "TestMinMax");
156 setRootCollator(errorCode
);
157 if(errorCode
.isFailure()) {
161 RuleBasedCollator
*rbc
= dynamic_cast<RuleBasedCollator
*>(coll
);
163 errln("the root collator is not a RuleBasedCollator");
167 static const UChar s
[2] = { 0xfffe, 0xffff };
168 UVector64
ces(errorCode
);
169 rbc
->internalGetCEs(UnicodeString(FALSE
, s
, 2), ces
, errorCode
);
170 errorCode
.assertSuccess();
171 if(ces
.size() != 2) {
172 errln("expected 2 CEs for <FFFE, FFFF>, got %d", (int)ces
.size());
175 int64_t ce
= ces
.elementAti(0);
176 int64_t expected
= Collation::makeCE(Collation::MERGE_SEPARATOR_PRIMARY
);
178 errln("CE(U+fffe)=%04lx != 02..", (long)ce
);
181 ce
= ces
.elementAti(1);
182 expected
= Collation::makeCE(Collation::MAX_PRIMARY
);
184 errln("CE(U+ffff)=%04lx != max..", (long)ce
);
188 void CollationTest::TestImplicits() {
189 IcuTestErrorCode
errorCode(*this, "TestImplicits");
191 const CollationData
*cd
= CollationRoot::getData(errorCode
);
192 if(errorCode
.logDataIfFailureAndReset("CollationRoot::getData()")) {
196 // Implicit primary weights should be assigned for the following sets,
197 // and sort in ascending order by set and then code point.
198 // See http://www.unicode.org/reports/tr10/#Implicit_Weights
200 // core Han Unified Ideographs
201 UnicodeSet
coreHan("[\\p{unified_ideograph}&"
202 "[\\p{Block=CJK_Unified_Ideographs}"
203 "\\p{Block=CJK_Compatibility_Ideographs}]]",
205 // all other Unified Han ideographs
206 UnicodeSet
otherHan("[\\p{unified ideograph}-"
207 "[\\p{Block=CJK_Unified_Ideographs}"
208 "\\p{Block=CJK_Compatibility_Ideographs}]]",
210 UnicodeSet
unassigned("[[:Cn:][:Cs:][:Co:]]", errorCode
);
211 unassigned
.remove(0xfffe, 0xffff); // These have special CLDR root mappings.
213 // Starting with CLDR 26/ICU 54, the root Han order may instead be
214 // the Unihan radical-stroke order.
215 // The tests should pass either way, so we only test the order of a small set of Han characters
216 // whose radical-stroke order is the same as their code point order.
217 UnicodeSet
someHanInCPOrder(
218 "[\\u4E00-\\u4E16\\u4E18-\\u4E2B\\u4E2D-\\u4E3C\\u4E3E-\\u4E48"
219 "\\u4E4A-\\u4E60\\u4E63-\\u4E8F\\u4E91-\\u4F63\\u4F65-\\u50F1\\u50F3-\\u50F6]",
221 UnicodeSet
inOrder(someHanInCPOrder
);
222 inOrder
.addAll(unassigned
).freeze();
223 if(errorCode
.logIfFailureAndReset("UnicodeSet")) {
226 const UnicodeSet
*sets
[] = { &coreHan
, &otherHan
, &unassigned
};
228 uint32_t prevPrimary
= 0;
229 UTF16CollationIterator
ci(cd
, FALSE
, NULL
, NULL
, NULL
);
230 for(int32_t i
= 0; i
< UPRV_LENGTHOF(sets
); ++i
) {
231 LocalPointer
<UnicodeSetIterator
> iter(new UnicodeSetIterator(*sets
[i
]));
232 while(iter
->next()) {
233 UChar32 c
= iter
->getCodepoint();
235 ci
.setText(s
.getBuffer(), s
.getBuffer() + s
.length());
236 int64_t ce
= ci
.nextCE(errorCode
);
237 int64_t ce2
= ci
.nextCE(errorCode
);
238 if(errorCode
.logIfFailureAndReset("CollationIterator.nextCE()")) {
241 if(ce
== Collation::NO_CE
|| ce2
!= Collation::NO_CE
) {
242 errln("CollationIterator.nextCE(U+%04lx) did not yield exactly one CE", (long)c
);
245 if((ce
& 0xffffffff) != Collation::COMMON_SEC_AND_TER_CE
) {
246 errln("CollationIterator.nextCE(U+%04lx) has non-common sec/ter weights: %08lx",
247 (long)c
, (long)(ce
& 0xffffffff));
250 uint32_t primary
= (uint32_t)(ce
>> 32);
251 if(!(primary
> prevPrimary
) && inOrder
.contains(c
) && inOrder
.contains(prev
)) {
252 errln("CE(U+%04lx)=%04lx.. not greater than CE(U+%04lx)=%04lx..",
253 (long)c
, (long)primary
, (long)prev
, (long)prevPrimary
);
256 prevPrimary
= primary
;
261 void CollationTest::TestNulTerminated() {
262 IcuTestErrorCode
errorCode(*this, "TestNulTerminated");
263 const CollationData
*data
= CollationRoot::getData(errorCode
);
264 if(errorCode
.logDataIfFailureAndReset("CollationRoot::getData()")) {
268 static const UChar s
[] = { 0x61, 0x62, 0x61, 0x62, 0 };
270 UTF16CollationIterator
ci1(data
, FALSE
, s
, s
, s
+ 2);
271 UTF16CollationIterator
ci2(data
, FALSE
, s
+ 2, s
+ 2, NULL
);
272 for(int32_t i
= 0;; ++i
) {
273 int64_t ce1
= ci1
.nextCE(errorCode
);
274 int64_t ce2
= ci2
.nextCE(errorCode
);
275 if(errorCode
.logIfFailureAndReset("CollationIterator.nextCE()")) {
279 errln("CollationIterator.nextCE(with length) != nextCE(NUL-terminated) at CE %d", (int)i
);
282 if(ce1
== Collation::NO_CE
) { break; }
286 void CollationTest::TestIllegalUTF8() {
287 IcuTestErrorCode
errorCode(*this, "TestIllegalUTF8");
289 setRootCollator(errorCode
);
290 if(errorCode
.isFailure()) {
294 coll
->setAttribute(UCOL_STRENGTH
, UCOL_IDENTICAL
, errorCode
);
296 static const char *strings
[] = {
299 // illegal byte sequences
300 "a\x80z", // trail byte
301 "a\xc1\x81z", // non-shortest form
302 "a\xe0\x82\x83z", // non-shortest form
303 "a\xed\xa0\x80z", // lead surrogate: would be U+D800
304 "a\xed\xbf\xbfz", // trail surrogate: would be U+DFFF
305 "a\xf0\x8f\xbf\xbfz", // non-shortest form
306 "a\xf4\x90\x80\x80z" // out of range: would be U+110000
309 StringPiece
fffd(strings
[0]);
310 for(int32_t i
= 1; i
< UPRV_LENGTHOF(strings
); ++i
) {
311 StringPiece
illegal(strings
[i
]);
312 UCollationResult order
= coll
->compareUTF8(fffd
, illegal
, errorCode
);
313 if(order
!= UCOL_EQUAL
) {
314 errln("compareUTF8(U+FFFD, string %d with illegal UTF-8)=%d != UCOL_EQUAL",
322 void addLeadSurrogatesForSupplementary(const UnicodeSet
&src
, UnicodeSet
&dest
) {
323 for(UChar32 c
= 0x10000; c
< 0x110000;) {
324 UChar32 next
= c
+ 0x400;
325 if(src
.containsSome(c
, next
- 1)) {
326 dest
.add(U16_LEAD(c
));
334 void CollationTest::TestShortFCDData() {
335 // See CollationFCD class comments.
336 IcuTestErrorCode
errorCode(*this, "TestShortFCDData");
337 UnicodeSet
expectedLccc("[:^lccc=0:]", errorCode
);
338 errorCode
.assertSuccess();
339 expectedLccc
.add(0xdc00, 0xdfff); // add all trail surrogates
340 addLeadSurrogatesForSupplementary(expectedLccc
, expectedLccc
);
341 UnicodeSet lccc
; // actual
342 for(UChar32 c
= 0; c
<= 0xffff; ++c
) {
343 if(CollationFCD::hasLccc(c
)) { lccc
.add(c
); }
345 UnicodeSet
diff(expectedLccc
);
346 diff
.removeAll(lccc
);
347 diff
.remove(0x10000, 0x10ffff); // hasLccc() only works for the BMP
348 UnicodeString
empty("[]");
349 UnicodeString diffString
;
350 diff
.toPattern(diffString
, TRUE
);
351 assertEquals("CollationFCD::hasLccc() expected-actual", empty
, diffString
);
353 diff
.removeAll(expectedLccc
);
354 diff
.toPattern(diffString
, TRUE
);
355 assertEquals("CollationFCD::hasLccc() actual-expected", empty
, diffString
, TRUE
);
357 UnicodeSet
expectedTccc("[:^tccc=0:]", errorCode
);
358 if (errorCode
.isSuccess()) {
359 addLeadSurrogatesForSupplementary(expectedLccc
, expectedTccc
);
360 addLeadSurrogatesForSupplementary(expectedTccc
, expectedTccc
);
361 UnicodeSet tccc
; // actual
362 for(UChar32 c
= 0; c
<= 0xffff; ++c
) {
363 if(CollationFCD::hasTccc(c
)) { tccc
.add(c
); }
366 diff
.removeAll(tccc
);
367 diff
.remove(0x10000, 0x10ffff); // hasTccc() only works for the BMP
368 assertEquals("CollationFCD::hasTccc() expected-actual", empty
, diffString
);
370 diff
.removeAll(expectedTccc
);
371 diff
.toPattern(diffString
, TRUE
);
372 assertEquals("CollationFCD::hasTccc() actual-expected", empty
, diffString
);
376 class CodePointIterator
{
378 CodePointIterator(const UChar32
*cp
, int32_t length
) : cp(cp
), length(length
), pos(0) {}
379 void resetToStart() { pos
= 0; }
380 UChar32
next() { return (pos
< length
) ? cp
[pos
++] : U_SENTINEL
; }
381 UChar32
previous() { return (pos
> 0) ? cp
[--pos
] : U_SENTINEL
; }
382 int32_t getLength() const { return length
; }
383 int getIndex() const { return (int)pos
; }
390 void CollationTest::checkFCD(const char *name
,
391 CollationIterator
&ci
, CodePointIterator
&cpi
) {
392 IcuTestErrorCode
errorCode(*this, "checkFCD");
394 // Iterate forward to the limit.
396 UChar32 c1
= ci
.nextCodePoint(errorCode
);
397 UChar32 c2
= cpi
.next();
399 errln("%s.nextCodePoint(to limit, 1st pass) = U+%04lx != U+%04lx at %d",
400 name
, (long)c1
, (long)c2
, cpi
.getIndex());
403 if(c1
< 0) { break; }
406 // Iterate backward most of the way.
407 for(int32_t n
= (cpi
.getLength() * 2) / 3; n
> 0; --n
) {
408 UChar32 c1
= ci
.previousCodePoint(errorCode
);
409 UChar32 c2
= cpi
.previous();
411 errln("%s.previousCodePoint() = U+%04lx != U+%04lx at %d",
412 name
, (long)c1
, (long)c2
, cpi
.getIndex());
419 UChar32 c1
= ci
.nextCodePoint(errorCode
);
420 UChar32 c2
= cpi
.next();
422 errln("%s.nextCodePoint(to limit again) = U+%04lx != U+%04lx at %d",
423 name
, (long)c1
, (long)c2
, cpi
.getIndex());
426 if(c1
< 0) { break; }
429 // Iterate backward to the start.
431 UChar32 c1
= ci
.previousCodePoint(errorCode
);
432 UChar32 c2
= cpi
.previous();
434 errln("%s.previousCodePoint(to start) = U+%04lx != U+%04lx at %d",
435 name
, (long)c1
, (long)c2
, cpi
.getIndex());
438 if(c1
< 0) { break; }
442 void CollationTest::TestFCD() {
443 IcuTestErrorCode
errorCode(*this, "TestFCD");
444 const CollationData
*data
= CollationRoot::getData(errorCode
);
445 if(errorCode
.logDataIfFailureAndReset("CollationRoot::getData()")) {
449 // Input string, not FCD, NUL-terminated.
450 static const UChar s
[] = {
451 0x308, 0xe1, 0x62, 0x301, 0x327, 0x430, 0x62,
452 U16_LEAD(0x1D15F), U16_TRAIL(0x1D15F), // MUSICAL SYMBOL QUARTER NOTE=1D158 1D165, ccc=0, 216
453 0x327, 0x308, // ccc=202, 230
454 U16_LEAD(0x1D16D), U16_TRAIL(0x1D16D), // MUSICAL SYMBOL COMBINING AUGMENTATION DOT, ccc=226
455 U16_LEAD(0x1D15F), U16_TRAIL(0x1D15F),
456 U16_LEAD(0x1D16D), U16_TRAIL(0x1D16D),
458 0xe7, // Character with tccc!=0 decomposed together with mis-ordered sequence.
459 U16_LEAD(0x1D16D), U16_TRAIL(0x1D16D), U16_LEAD(0x1D165), U16_TRAIL(0x1D165),
460 0xe1, // Character with tccc!=0 decomposed together with decomposed sequence.
461 0xf73, 0xf75, // Tibetan composite vowels must be decomposed.
465 // Expected code points.
466 static const UChar32 cp
[] = {
467 0x308, 0xe1, 0x62, 0x327, 0x301, 0x430, 0x62,
468 0x1D158, 0x327, 0x1D165, 0x1D16D, 0x308,
471 0x63, 0x327, 0x1D165, 0x1D16D,
473 0xf71, 0xf71, 0xf72, 0xf74, 0x301,
477 FCDUTF16CollationIterator
u16ci(data
, FALSE
, s
, s
, NULL
);
478 if(errorCode
.logIfFailureAndReset("FCDUTF16CollationIterator constructor")) {
481 CodePointIterator
cpi(cp
, UPRV_LENGTHOF(cp
));
482 checkFCD("FCDUTF16CollationIterator", u16ci
, cpi
);
486 UnicodeString(s
).toUTF8String(utf8
);
487 FCDUTF8CollationIterator
u8ci(data
, FALSE
,
488 reinterpret_cast<const uint8_t *>(utf8
.c_str()), 0, -1);
489 if(errorCode
.logIfFailureAndReset("FCDUTF8CollationIterator constructor")) {
492 checkFCD("FCDUTF8CollationIterator", u8ci
, cpi
);
496 uiter_setString(&iter
, s
, UPRV_LENGTHOF(s
) - 1); // -1: without the terminating NUL
497 FCDUIterCollationIterator
uici(data
, FALSE
, iter
, 0);
498 if(errorCode
.logIfFailureAndReset("FCDUIterCollationIterator constructor")) {
501 checkFCD("FCDUIterCollationIterator", uici
, cpi
);
504 void CollationTest::checkAllocWeights(CollationWeights
&cw
,
505 uint32_t lowerLimit
, uint32_t upperLimit
, int32_t n
,
506 int32_t someLength
, int32_t minCount
) {
507 if(!cw
.allocWeights(lowerLimit
, upperLimit
, n
)) {
508 errln("CollationWeights::allocWeights(%lx, %lx, %ld) = FALSE",
509 (long)lowerLimit
, (long)upperLimit
, (long)n
);
512 uint32_t previous
= lowerLimit
;
513 int32_t count
= 0; // number of weights that have someLength
514 for(int32_t i
= 0; i
< n
; ++i
) {
515 uint32_t w
= cw
.nextWeight();
516 if(w
== 0xffffffff) {
517 errln("CollationWeights::allocWeights(%lx, %lx, %ld).nextWeight() "
518 "returns only %ld weights",
519 (long)lowerLimit
, (long)upperLimit
, (long)n
, (long)i
);
522 if(!(previous
< w
&& w
< upperLimit
)) {
523 errln("CollationWeights::allocWeights(%lx, %lx, %ld).nextWeight() "
524 "number %ld -> %lx not between %lx and %lx",
525 (long)lowerLimit
, (long)upperLimit
, (long)n
,
526 (long)(i
+ 1), (long)w
, (long)previous
, (long)upperLimit
);
529 if(CollationWeights::lengthOfWeight(w
) == someLength
) { ++count
; }
531 if(count
< minCount
) {
532 errln("CollationWeights::allocWeights(%lx, %lx, %ld).nextWeight() "
533 "returns only %ld < %ld weights of length %d",
534 (long)lowerLimit
, (long)upperLimit
, (long)n
,
535 (long)count
, (long)minCount
, (int)someLength
);
539 void CollationTest::TestCollationWeights() {
542 // Non-compressible primaries use 254 second bytes 02..FF.
543 logln("CollationWeights.initForPrimary(non-compressible)");
544 cw
.initForPrimary(FALSE
);
545 // Expect 1 weight 11 and 254 weights 12xx.
546 checkAllocWeights(cw
, 0x10000000, 0x13000000, 255, 1, 1);
547 checkAllocWeights(cw
, 0x10000000, 0x13000000, 255, 2, 254);
548 // Expect 255 two-byte weights from the ranges 10ff, 11xx, 1202.
549 checkAllocWeights(cw
, 0x10fefe40, 0x12030300, 260, 2, 255);
550 // Expect 254 two-byte weights from the ranges 10ff and 11xx.
551 checkAllocWeights(cw
, 0x10fefe40, 0x12030300, 600, 2, 254);
552 // Expect 254^2=64516 three-byte weights.
553 // During computation, there should be 3 three-byte ranges
554 // 10ffff, 11xxxx, 120202.
555 // The middle one should be split 64515:1,
556 // and the newly-split-off range and the last ranged lengthened.
557 checkAllocWeights(cw
, 0x10fffe00, 0x12020300, 1 + 64516 + 254 + 1, 3, 64516);
558 // Expect weights 1102 & 1103.
559 checkAllocWeights(cw
, 0x10ff0000, 0x11040000, 2, 2, 2);
560 // Expect weights 102102 & 102103.
561 checkAllocWeights(cw
, 0x1020ff00, 0x10210400, 2, 3, 2);
563 // Compressible primaries use 251 second bytes 04..FE.
564 logln("CollationWeights.initForPrimary(compressible)");
565 cw
.initForPrimary(TRUE
);
566 // Expect 1 weight 11 and 251 weights 12xx.
567 checkAllocWeights(cw
, 0x10000000, 0x13000000, 252, 1, 1);
568 checkAllocWeights(cw
, 0x10000000, 0x13000000, 252, 2, 251);
569 // Expect 252 two-byte weights from the ranges 10fe, 11xx, 1204.
570 checkAllocWeights(cw
, 0x10fdfe40, 0x12050300, 260, 2, 252);
571 // Expect weights 1104 & 1105.
572 checkAllocWeights(cw
, 0x10fe0000, 0x11060000, 2, 2, 2);
573 // Expect weights 102102 & 102103.
574 checkAllocWeights(cw
, 0x1020ff00, 0x10210400, 2, 3, 2);
576 // Secondary and tertiary weights use only bytes 3 & 4.
577 logln("CollationWeights.initForSecondary()");
578 cw
.initForSecondary();
579 // Expect weights fbxx and all four fc..ff.
580 checkAllocWeights(cw
, 0xfb20, 0x10000, 20, 3, 4);
582 logln("CollationWeights.initForTertiary()");
583 cw
.initForTertiary();
584 // Expect weights 3dxx and both 3e & 3f.
585 checkAllocWeights(cw
, 0x3d02, 0x4000, 10, 3, 2);
590 UBool
isValidCE(const CollationRootElements
&re
, const CollationData
&data
,
591 uint32_t p
, uint32_t s
, uint32_t ctq
) {
592 uint32_t p1
= p
>> 24;
593 uint32_t p2
= (p
>> 16) & 0xff;
594 uint32_t p3
= (p
>> 8) & 0xff;
595 uint32_t p4
= p
& 0xff;
596 uint32_t s1
= s
>> 8;
597 uint32_t s2
= s
& 0xff;
598 // ctq = Case, Tertiary, Quaternary
599 uint32_t c
= (ctq
& Collation::CASE_MASK
) >> 14;
600 uint32_t t
= ctq
& Collation::ONLY_TERTIARY_MASK
;
601 uint32_t t1
= t
>> 8;
602 uint32_t t2
= t
& 0xff;
603 uint32_t q
= ctq
& Collation::QUATERNARY_MASK
;
604 // No leading zero bytes.
605 if((p
!= 0 && p1
== 0) || (s
!= 0 && s1
== 0) || (t
!= 0 && t1
== 0)) {
608 // No intermediate zero bytes.
609 if(p1
!= 0 && p2
== 0 && (p
& 0xffff) != 0) {
612 if(p2
!= 0 && p3
== 0 && p4
!= 0) {
615 // Minimum & maximum lead bytes.
616 if((p1
!= 0 && p1
<= Collation::MERGE_SEPARATOR_BYTE
) ||
617 s1
== Collation::LEVEL_SEPARATOR_BYTE
||
618 t1
== Collation::LEVEL_SEPARATOR_BYTE
|| t1
> 0x3f) {
624 // The valid byte range for the second primary byte depends on compressibility.
626 if(data
.isCompressibleLeadByte(p1
)) {
627 if(p2
<= Collation::PRIMARY_COMPRESSION_LOW_BYTE
||
628 Collation::PRIMARY_COMPRESSION_HIGH_BYTE
<= p2
) {
632 if(p2
<= Collation::LEVEL_SEPARATOR_BYTE
) {
637 // Other bytes just need to avoid the level separator.
638 // Trailing zeros are ok.
639 U_ASSERT(Collation::LEVEL_SEPARATOR_BYTE
== 1);
640 if(p3
== Collation::LEVEL_SEPARATOR_BYTE
|| p4
== Collation::LEVEL_SEPARATOR_BYTE
||
641 s2
== Collation::LEVEL_SEPARATOR_BYTE
|| t2
== Collation::LEVEL_SEPARATOR_BYTE
) {
648 // Completely ignorable CE.
649 // Quaternary CEs are not supported.
650 if(c
!= 0 || q
!= 0) {
655 if(t
< re
.getTertiaryBoundary() || c
!= 2) {
661 if(s
< re
.getSecondaryBoundary() || t
== 0 || t
>= re
.getTertiaryBoundary()) {
667 if(s
== 0 || (Collation::COMMON_WEIGHT16
< s
&& s
<= re
.getLastCommonSecondary()) ||
668 s
>= re
.getSecondaryBoundary()) {
671 if(t
== 0 || t
>= re
.getTertiaryBoundary()) {
678 UBool
isValidCE(const CollationRootElements
&re
, const CollationData
&data
, int64_t ce
) {
679 uint32_t p
= (uint32_t)(ce
>> 32);
680 uint32_t secTer
= (uint32_t)ce
;
681 return isValidCE(re
, data
, p
, secTer
>> 16, secTer
& 0xffff);
684 class RootElementsIterator
{
686 RootElementsIterator(const CollationData
&root
)
688 elements(root
.rootElements
), length(root
.rootElementsLength
),
690 index((int32_t)elements
[CollationRootElements::IX_FIRST_TERTIARY_INDEX
]) {}
693 if(index
>= length
) { return FALSE
; }
694 uint32_t p
= elements
[index
];
695 if(p
== CollationRootElements::PRIMARY_SENTINEL
) { return FALSE
; }
696 if((p
& CollationRootElements::SEC_TER_DELTA_FLAG
) != 0) {
698 secTer
= p
& ~CollationRootElements::SEC_TER_DELTA_FLAG
;
701 if((p
& CollationRootElements::PRIMARY_STEP_MASK
) != 0) {
702 // End of a range, enumerate the primaries in the range.
703 int32_t step
= (int32_t)p
& CollationRootElements::PRIMARY_STEP_MASK
;
706 // Finished the range, return the next CE after it.
711 // Return the next primary in this range.
712 UBool isCompressible
= data
.isCompressiblePrimary(pri
);
713 if((pri
& 0xffff) == 0) {
714 pri
= Collation::incTwoBytePrimaryByOffset(pri
, isCompressible
, step
);
716 pri
= Collation::incThreeBytePrimaryByOffset(pri
, isCompressible
, step
);
720 // Simple primary CE.
723 // Does this have an explicit below-common sec/ter unit,
724 // or does it imply a common one?
725 if(index
== length
) {
726 secTer
= Collation::COMMON_SEC_AND_TER_CE
;
728 secTer
= elements
[index
];
729 if((secTer
& CollationRootElements::SEC_TER_DELTA_FLAG
) == 0) {
731 secTer
= Collation::COMMON_SEC_AND_TER_CE
;
733 secTer
&= ~CollationRootElements::SEC_TER_DELTA_FLAG
;
734 if(secTer
> Collation::COMMON_SEC_AND_TER_CE
) {
736 secTer
= Collation::COMMON_SEC_AND_TER_CE
;
738 // Explicit sec/ter below common/common.
746 uint32_t getPrimary() const { return pri
; }
747 uint32_t getSecTer() const { return secTer
; }
750 const CollationData
&data
;
751 const uint32_t *elements
;
761 void CollationTest::TestRootElements() {
762 IcuTestErrorCode
errorCode(*this, "TestRootElements");
763 const CollationData
*root
= CollationRoot::getData(errorCode
);
764 if(errorCode
.logDataIfFailureAndReset("CollationRoot::getData()")) {
767 CollationRootElements
rootElements(root
->rootElements
, root
->rootElementsLength
);
768 RootElementsIterator
iter(*root
);
770 // We check each root CE for validity,
771 // and we also verify that there is a tailoring gap between each two CEs.
772 CollationWeights cw1c
; // compressible primary weights
773 CollationWeights cw1u
; // uncompressible primary weights
774 CollationWeights cw2
;
775 CollationWeights cw3
;
777 cw1c
.initForPrimary(TRUE
);
778 cw1u
.initForPrimary(FALSE
);
779 cw2
.initForSecondary();
780 cw3
.initForTertiary();
782 // Note: The root elements do not include Han-implicit or unassigned-implicit CEs,
783 // nor the special merge-separator CE for U+FFFE.
784 uint32_t prevPri
= 0;
785 uint32_t prevSec
= 0;
786 uint32_t prevTer
= 0;
788 uint32_t pri
= iter
.getPrimary();
789 uint32_t secTer
= iter
.getSecTer();
790 // CollationRootElements CEs must have 0 case and quaternary bits.
791 if((secTer
& Collation::CASE_AND_QUATERNARY_MASK
) != 0) {
792 errln("CollationRootElements CE has non-zero case and/or quaternary bits: %08lx %08lx",
793 (long)pri
, (long)secTer
);
795 uint32_t sec
= secTer
>> 16;
796 uint32_t ter
= secTer
& Collation::ONLY_TERTIARY_MASK
;
798 if(pri
== 0 && sec
== 0 && ter
!= 0) {
799 // Tertiary CEs must have uppercase bits,
800 // but they are not stored in the CollationRootElements.
803 if(!isValidCE(rootElements
, *root
, pri
, sec
, ctq
)) {
804 errln("invalid root CE %08lx %08lx", (long)pri
, (long)secTer
);
807 uint32_t newWeight
= 0;
808 if(prevPri
== 0 || prevPri
>= Collation::FFFD_PRIMARY
) {
809 // There is currently no tailoring gap after primary ignorables,
810 // and we forbid tailoring after U+FFFD and U+FFFF.
811 } else if(root
->isCompressiblePrimary(prevPri
)) {
812 if(!cw1c
.allocWeights(prevPri
, pri
, 1)) {
813 errln("no primary/compressible tailoring gap between %08lx and %08lx",
814 (long)prevPri
, (long)pri
);
816 newWeight
= cw1c
.nextWeight();
819 if(!cw1u
.allocWeights(prevPri
, pri
, 1)) {
820 errln("no primary/uncompressible tailoring gap between %08lx and %08lx",
821 (long)prevPri
, (long)pri
);
823 newWeight
= cw1u
.nextWeight();
826 if(newWeight
!= 0 && !(prevPri
< newWeight
&& newWeight
< pri
)) {
827 errln("mis-allocated primary weight, should get %08lx < %08lx < %08lx",
828 (long)prevPri
, (long)newWeight
, (long)pri
);
830 } else if(sec
!= prevSec
) {
831 uint32_t lowerLimit
=
832 prevSec
== 0 ? rootElements
.getSecondaryBoundary() - 0x100 : prevSec
;
833 if(!cw2
.allocWeights(lowerLimit
, sec
, 1)) {
834 errln("no secondary tailoring gap between %04x and %04x", lowerLimit
, sec
);
836 uint32_t newWeight
= cw2
.nextWeight();
837 if(!(prevSec
< newWeight
&& newWeight
< sec
)) {
838 errln("mis-allocated secondary weight, should get %04x < %04x < %04x",
839 (long)lowerLimit
, (long)newWeight
, (long)sec
);
842 } else if(ter
!= prevTer
) {
843 uint32_t lowerLimit
=
844 prevTer
== 0 ? rootElements
.getTertiaryBoundary() - 0x100 : prevTer
;
845 if(!cw3
.allocWeights(lowerLimit
, ter
, 1)) {
846 errln("no teriary tailoring gap between %04x and %04x", lowerLimit
, ter
);
848 uint32_t newWeight
= cw3
.nextWeight();
849 if(!(prevTer
< newWeight
&& newWeight
< ter
)) {
850 errln("mis-allocated secondary weight, should get %04x < %04x < %04x",
851 (long)lowerLimit
, (long)newWeight
, (long)ter
);
855 errln("duplicate root CE %08lx %08lx", (long)pri
, (long)secTer
);
864 void CollationTest::TestTailoredElements() {
865 IcuTestErrorCode
errorCode(*this, "TestTailoredElements");
866 const CollationData
*root
= CollationRoot::getData(errorCode
);
867 if(errorCode
.logDataIfFailureAndReset("CollationRoot::getData()")) {
870 CollationRootElements
rootElements(root
->rootElements
, root
->rootElementsLength
);
872 UHashtable
*prevLocales
= uhash_open(uhash_hashChars
, uhash_compareChars
, NULL
, errorCode
);
873 if(errorCode
.logIfFailureAndReset("failed to create a hash table")) {
876 uhash_setKeyDeleter(prevLocales
, uprv_free
);
877 // TestRootElements() tests the root collator which does not have tailorings.
878 uhash_puti(prevLocales
, uprv_strdup(""), 1, errorCode
);
879 uhash_puti(prevLocales
, uprv_strdup("root"), 1, errorCode
);
880 uhash_puti(prevLocales
, uprv_strdup("root@collation=standard"), 1, errorCode
);
882 UVector64
ces(errorCode
);
883 LocalPointer
<StringEnumeration
> locales(Collator::getAvailableLocales());
884 U_ASSERT(locales
.isValid());
885 const char *localeID
= "root";
887 Locale
locale(localeID
);
888 LocalPointer
<StringEnumeration
> types(
889 Collator::getKeywordValuesForLocale("collation", locale
, FALSE
, errorCode
));
890 errorCode
.assertSuccess();
891 const char *type
; // first: default type
892 while((type
= types
->next(NULL
, errorCode
)) != NULL
) {
893 if(strncmp(type
, "private-", 8) == 0) {
894 errln("Collator::getKeywordValuesForLocale(%s) returns private collation keyword: %s",
897 Locale
localeWithType(locale
);
898 localeWithType
.setKeywordValue("collation", type
, errorCode
);
899 errorCode
.assertSuccess();
900 LocalPointer
<Collator
> coll(Collator::createInstance(localeWithType
, errorCode
));
901 if(errorCode
.logIfFailureAndReset("Collator::createInstance(%s)",
902 localeWithType
.getName())) {
905 Locale actual
= coll
->getLocale(ULOC_ACTUAL_LOCALE
, errorCode
);
906 if(uhash_geti(prevLocales
, actual
.getName()) != 0) {
909 uhash_puti(prevLocales
, uprv_strdup(actual
.getName()), 1, errorCode
);
910 errorCode
.assertSuccess();
911 logln("TestTailoredElements(): requested %s -> actual %s",
912 localeWithType
.getName(), actual
.getName());
913 RuleBasedCollator
*rbc
= dynamic_cast<RuleBasedCollator
*>(coll
.getAlias());
917 // Note: It would be better to get tailored strings such that we can
918 // identify the prefix, and only get the CEs for the prefix+string,
919 // not also for the prefix.
920 // There is currently no API for that.
921 // It would help in an unusual case where a contraction starting in the prefix
922 // extends past its end, and we do not see the intended mapping.
923 // For example, for a mapping p|st, if there is also a contraction ps,
924 // then we get CEs(ps)+CEs(t), rather than CEs(p|st).
925 LocalPointer
<UnicodeSet
> tailored(coll
->getTailoredSet(errorCode
));
926 errorCode
.assertSuccess();
927 UnicodeSetIterator
iter(*tailored
);
929 const UnicodeString
&s
= iter
.getString();
930 ces
.removeAllElements();
931 rbc
->internalGetCEs(s
, ces
, errorCode
);
932 errorCode
.assertSuccess();
933 for(int32_t i
= 0; i
< ces
.size(); ++i
) {
934 int64_t ce
= ces
.elementAti(i
);
935 if(!isValidCE(rootElements
, *root
, ce
)) {
936 errln("invalid tailored CE %016llx at CE index %d from string:",
937 (long long)ce
, (int)i
);
943 } while((localeID
= locales
->next(NULL
, errorCode
)) != NULL
);
944 uhash_close(prevLocales
);
947 UnicodeString
CollationTest::printSortKey(const uint8_t *p
, int32_t length
) {
949 for(int32_t i
= 0; i
< length
; ++i
) {
950 if(i
> 0) { s
.append((UChar
)0x20); }
953 s
.append((UChar
)0x2e); // period
955 s
.append((UChar
)0x7c); // vertical bar
963 UnicodeString
CollationTest::printCollationKey(const CollationKey
&key
) {
965 const uint8_t *p
= key
.getByteArray(length
);
966 return printSortKey(p
, length
);
969 UBool
CollationTest::readNonEmptyLine(UCHARBUF
*f
, IcuTestErrorCode
&errorCode
) {
972 const UChar
*line
= ucbuf_readline(f
, &lineLength
, errorCode
);
973 if(line
== NULL
|| errorCode
.isFailure()) {
978 // Strip trailing CR/LF, comments, and spaces.
979 const UChar
*comment
= u_memchr(line
, 0x23, lineLength
); // '#'
980 if(comment
!= NULL
) {
981 lineLength
= (int32_t)(comment
- line
);
983 while(lineLength
> 0 && isCROrLF(line
[lineLength
- 1])) { --lineLength
; }
985 while(lineLength
> 0 && isSpace(line
[lineLength
- 1])) { --lineLength
; }
986 if(lineLength
!= 0) {
987 fileLine
.setTo(FALSE
, line
, lineLength
);
990 // Empty line, continue.
994 void CollationTest::parseString(int32_t &start
, UnicodeString
&prefix
, UnicodeString
&s
,
995 UErrorCode
&errorCode
) {
996 int32_t length
= fileLine
.length();
998 for(i
= start
; i
< length
&& !isSpace(fileLine
[i
]); ++i
) {}
999 int32_t pipeIndex
= fileLine
.indexOf((UChar
)0x7c, start
, i
- start
); // '|'
1000 if(pipeIndex
>= 0) {
1001 prefix
= fileLine
.tempSubStringBetween(start
, pipeIndex
).unescape();
1002 if(prefix
.isEmpty()) {
1003 errln("empty prefix on line %d", (int)fileLineNumber
);
1005 errorCode
= U_PARSE_ERROR
;
1008 start
= pipeIndex
+ 1;
1012 s
= fileLine
.tempSubStringBetween(start
, i
).unescape();
1014 errln("empty string on line %d", (int)fileLineNumber
);
1016 errorCode
= U_PARSE_ERROR
;
1022 Collation::Level
CollationTest::parseRelationAndString(UnicodeString
&s
, IcuTestErrorCode
&errorCode
) {
1023 Collation::Level relation
;
1025 if(fileLine
[0] == 0x3c) { // <
1026 UChar second
= fileLine
[1];
1030 relation
= Collation::PRIMARY_LEVEL
;
1033 relation
= Collation::SECONDARY_LEVEL
;
1036 relation
= Collation::TERTIARY_LEVEL
;
1039 relation
= Collation::QUATERNARY_LEVEL
;
1042 relation
= Collation::CASE_LEVEL
;
1045 relation
= Collation::IDENTICAL_LEVEL
;
1048 relation
= Collation::NO_LEVEL
;
1052 } else if(fileLine
[0] == 0x3d) { // =
1053 relation
= Collation::ZERO_LEVEL
;
1058 if(start
== 0 || !isSpace(fileLine
[start
])) {
1059 errln("no relation (= < <1 <2 <c <3 <4 <i) at beginning of line %d", (int)fileLineNumber
);
1061 errorCode
.set(U_PARSE_ERROR
);
1062 return Collation::NO_LEVEL
;
1064 start
= skipSpaces(start
);
1065 UnicodeString prefix
;
1066 parseString(start
, prefix
, s
, errorCode
);
1067 if(errorCode
.isSuccess() && !prefix
.isEmpty()) {
1068 errln("prefix string not allowed for test string: on line %d", (int)fileLineNumber
);
1070 errorCode
.set(U_PARSE_ERROR
);
1071 return Collation::NO_LEVEL
;
1073 if(start
< fileLine
.length()) {
1074 errln("unexpected line contents after test string on line %d", (int)fileLineNumber
);
1076 errorCode
.set(U_PARSE_ERROR
);
1077 return Collation::NO_LEVEL
;
1082 static const struct {
1086 { "backwards", UCOL_FRENCH_COLLATION
},
1087 { "alternate", UCOL_ALTERNATE_HANDLING
},
1088 { "caseFirst", UCOL_CASE_FIRST
},
1089 { "caseLevel", UCOL_CASE_LEVEL
},
1090 // UCOL_NORMALIZATION_MODE is turned on and off automatically.
1091 { "strength", UCOL_STRENGTH
},
1092 // UCOL_HIRAGANA_QUATERNARY_MODE is deprecated.
1093 { "numeric", UCOL_NUMERIC_COLLATION
}
1096 static const struct {
1098 UColAttributeValue value
;
1099 } attributeValues
[] = {
1100 { "default", UCOL_DEFAULT
},
1101 { "primary", UCOL_PRIMARY
},
1102 { "secondary", UCOL_SECONDARY
},
1103 { "tertiary", UCOL_TERTIARY
},
1104 { "quaternary", UCOL_QUATERNARY
},
1105 { "identical", UCOL_IDENTICAL
},
1106 { "off", UCOL_OFF
},
1108 { "shifted", UCOL_SHIFTED
},
1109 { "non-ignorable", UCOL_NON_IGNORABLE
},
1110 { "lower", UCOL_LOWER_FIRST
},
1111 { "upper", UCOL_UPPER_FIRST
}
1114 void CollationTest::parseAndSetAttribute(IcuTestErrorCode
&errorCode
) {
1115 // Parse attributes even if the Collator could not be created,
1116 // in order to report syntax errors.
1117 int32_t start
= skipSpaces(1);
1118 int32_t equalPos
= fileLine
.indexOf((UChar
)0x3d);
1120 if(fileLine
.compare(start
, 7, UNICODE_STRING("reorder", 7)) == 0) {
1121 parseAndSetReorderCodes(start
+ 7, errorCode
);
1124 errln("missing '=' on line %d", (int)fileLineNumber
);
1126 errorCode
.set(U_PARSE_ERROR
);
1130 UnicodeString attrString
= fileLine
.tempSubStringBetween(start
, equalPos
);
1131 UnicodeString valueString
= fileLine
.tempSubString(equalPos
+1);
1132 if(attrString
== UNICODE_STRING("maxVariable", 11)) {
1133 UColReorderCode max
;
1134 if(valueString
== UNICODE_STRING("space", 5)) {
1135 max
= UCOL_REORDER_CODE_SPACE
;
1136 } else if(valueString
== UNICODE_STRING("punct", 5)) {
1137 max
= UCOL_REORDER_CODE_PUNCTUATION
;
1138 } else if(valueString
== UNICODE_STRING("symbol", 6)) {
1139 max
= UCOL_REORDER_CODE_SYMBOL
;
1140 } else if(valueString
== UNICODE_STRING("currency", 8)) {
1141 max
= UCOL_REORDER_CODE_CURRENCY
;
1143 errln("invalid attribute value name on line %d", (int)fileLineNumber
);
1145 errorCode
.set(U_PARSE_ERROR
);
1149 coll
->setMaxVariable(max
, errorCode
);
1150 if(errorCode
.isFailure()) {
1151 errln("setMaxVariable() failed on line %d: %s",
1152 (int)fileLineNumber
, errorCode
.errorName());
1162 for(int32_t i
= 0;; ++i
) {
1163 if(i
== UPRV_LENGTHOF(attributes
)) {
1164 errln("invalid attribute name on line %d", (int)fileLineNumber
);
1166 errorCode
.set(U_PARSE_ERROR
);
1169 if(attrString
== UnicodeString(attributes
[i
].name
, -1, US_INV
)) {
1170 attr
= attributes
[i
].attr
;
1175 UColAttributeValue value
;
1176 for(int32_t i
= 0;; ++i
) {
1177 if(i
== UPRV_LENGTHOF(attributeValues
)) {
1178 errln("invalid attribute value name on line %d", (int)fileLineNumber
);
1180 errorCode
.set(U_PARSE_ERROR
);
1183 if(valueString
== UnicodeString(attributeValues
[i
].name
, -1, US_INV
)) {
1184 value
= attributeValues
[i
].value
;
1190 coll
->setAttribute(attr
, value
, errorCode
);
1191 if(errorCode
.isFailure()) {
1192 errln("illegal attribute=value combination on line %d: %s",
1193 (int)fileLineNumber
, errorCode
.errorName());
1201 void CollationTest::parseAndSetReorderCodes(int32_t start
, IcuTestErrorCode
&errorCode
) {
1202 UVector32
reorderCodes(errorCode
);
1203 while(start
< fileLine
.length()) {
1204 start
= skipSpaces(start
);
1205 int32_t limit
= start
;
1206 while(limit
< fileLine
.length() && !isSpace(fileLine
[limit
])) { ++limit
; }
1208 name
.appendInvariantChars(fileLine
.tempSubStringBetween(start
, limit
), errorCode
);
1209 int32_t code
= CollationRuleParser::getReorderCode(name
.data());
1211 if(uprv_stricmp(name
.data(), "default") == 0) {
1212 code
= UCOL_REORDER_CODE_DEFAULT
; // -1
1214 errln("invalid reorder code '%s' on line %d", name
.data(), (int)fileLineNumber
);
1216 errorCode
.set(U_PARSE_ERROR
);
1220 reorderCodes
.addElement(code
, errorCode
);
1224 coll
->setReorderCodes(reorderCodes
.getBuffer(), reorderCodes
.size(), errorCode
);
1225 if(errorCode
.isFailure()) {
1226 errln("setReorderCodes() failed on line %d: %s",
1227 (int)fileLineNumber
, errorCode
.errorName());
1235 void CollationTest::buildTailoring(UCHARBUF
*f
, IcuTestErrorCode
&errorCode
) {
1236 UnicodeString rules
;
1237 while(readNonEmptyLine(f
, errorCode
) && !isSectionStarter(fileLine
[0])) {
1238 rules
.append(fileLine
.unescape());
1240 if(errorCode
.isFailure()) { return; }
1243 UParseError parseError
;
1244 UnicodeString reason
;
1246 coll
= new RuleBasedCollator(rules
, parseError
, reason
, errorCode
);
1248 errln("unable to allocate a new collator");
1249 errorCode
.set(U_MEMORY_ALLOCATION_ERROR
);
1252 if(errorCode
.isFailure()) {
1253 dataerrln("RuleBasedCollator(rules) failed - %s", errorCode
.errorName());
1254 infoln(UnicodeString(" reason: ") + reason
);
1255 if(parseError
.offset
>= 0) { infoln(" rules offset: %d", (int)parseError
.offset
); }
1256 if(parseError
.preContext
[0] != 0 || parseError
.postContext
[0] != 0) {
1257 infoln(UnicodeString(" snippet: ...") +
1258 parseError
.preContext
+ "(!)" + parseError
.postContext
+ "...");
1264 assertEquals("no error reason when RuleBasedCollator(rules) succeeds",
1265 UnicodeString(), reason
);
1269 void CollationTest::setRootCollator(IcuTestErrorCode
&errorCode
) {
1270 if(errorCode
.isFailure()) { return; }
1272 coll
= Collator::createInstance(Locale::getRoot(), errorCode
);
1273 if(errorCode
.isFailure()) {
1274 dataerrln("unable to create a root collator");
1279 void CollationTest::setLocaleCollator(IcuTestErrorCode
&errorCode
) {
1280 if(errorCode
.isFailure()) { return; }
1283 int32_t at
= fileLine
.indexOf((UChar
)0x40, 9); // @ is not invariant
1285 fileLine
.setCharAt(at
, (UChar
)0x2a); // *
1287 CharString localeID
;
1288 localeID
.appendInvariantChars(fileLine
.tempSubString(9), errorCode
);
1290 localeID
.data()[at
- 9] = '@';
1292 Locale
locale(localeID
.data());
1293 if(fileLine
.length() == 9 || errorCode
.isFailure() || locale
.isBogus()) {
1294 errln("invalid language tag on line %d", (int)fileLineNumber
);
1296 if(errorCode
.isSuccess()) { errorCode
.set(U_PARSE_ERROR
); }
1300 logln("creating a collator for locale ID %s", locale
.getName());
1301 coll
= Collator::createInstance(locale
, errorCode
);
1302 if(errorCode
.isFailure()) {
1303 dataerrln("unable to create a collator for locale %s on line %d",
1304 locale
.getName(), (int)fileLineNumber
);
1312 UBool
CollationTest::needsNormalization(const UnicodeString
&s
, UErrorCode
&errorCode
) const {
1313 if(U_FAILURE(errorCode
) || !fcd
->isNormalized(s
, errorCode
)) { return TRUE
; }
1314 // In some sequences with Tibetan composite vowel signs,
1315 // even if the string passes the FCD check,
1316 // those composites must be decomposed.
1317 // Check if s contains 0F71 immediately followed by 0F73 or 0F75 or 0F81.
1319 while((index
= s
.indexOf((UChar
)0xf71, index
)) >= 0) {
1320 if(++index
< s
.length()) {
1322 if(c
== 0xf73 || c
== 0xf75 || c
== 0xf81) { return TRUE
; }
1328 UBool
CollationTest::getSortKeyParts(const UChar
*s
, int32_t length
,
1329 CharString
&dest
, int32_t partSize
,
1330 IcuTestErrorCode
&errorCode
) {
1331 if(errorCode
.isFailure()) { return FALSE
; }
1333 U_ASSERT(partSize
<= UPRV_LENGTHOF(part
));
1335 uiter_setString(&iter
, s
, length
);
1336 uint32_t state
[2] = { 0, 0 };
1338 int32_t partLength
= coll
->internalNextSortKeyPart(&iter
, state
, part
, partSize
, errorCode
);
1339 UBool done
= partLength
< partSize
;
1341 // At the end, append the next byte as well which should be 00.
1344 dest
.append(reinterpret_cast<char *>(part
), partLength
, errorCode
);
1346 return errorCode
.isSuccess();
1351 UBool
CollationTest::getCollationKey(const char *norm
, const UnicodeString
&line
,
1352 const UChar
*s
, int32_t length
,
1353 CollationKey
&key
, IcuTestErrorCode
&errorCode
) {
1354 if(errorCode
.isFailure()) { return FALSE
; }
1355 coll
->getCollationKey(s
, length
, key
, errorCode
);
1356 if(errorCode
.isFailure()) {
1357 infoln(fileTestName
);
1358 errln("Collator(%s).getCollationKey() failed: %s",
1359 norm
, errorCode
.errorName());
1364 const uint8_t *keyBytes
= key
.getByteArray(keyLength
);
1365 if(keyLength
== 0 || keyBytes
[keyLength
- 1] != 0) {
1366 infoln(fileTestName
);
1367 errln("Collator(%s).getCollationKey() wrote an empty or unterminated key",
1370 infoln(printCollationKey(key
));
1374 int32_t numLevels
= coll
->getAttribute(UCOL_STRENGTH
, errorCode
);
1375 if(numLevels
< UCOL_IDENTICAL
) {
1380 if(coll
->getAttribute(UCOL_CASE_LEVEL
, errorCode
) == UCOL_ON
) {
1383 errorCode
.assertSuccess();
1384 int32_t numLevelSeparators
= 0;
1385 for(int32_t i
= 0; i
< (keyLength
- 1); ++i
) {
1386 uint8_t b
= keyBytes
[i
];
1388 infoln(fileTestName
);
1389 errln("Collator(%s).getCollationKey() contains a 00 byte", norm
);
1391 infoln(printCollationKey(key
));
1394 if(b
== 1) { ++numLevelSeparators
; }
1396 if(numLevelSeparators
!= (numLevels
- 1)) {
1397 infoln(fileTestName
);
1398 errln("Collator(%s).getCollationKey() has %d level separators for %d levels",
1399 norm
, (int)numLevelSeparators
, (int)numLevels
);
1401 infoln(printCollationKey(key
));
1405 // Check that internalNextSortKeyPart() makes the same key, with several part sizes.
1406 static const int32_t partSizes
[] = { 32, 3, 1 };
1407 for(int32_t psi
= 0; psi
< UPRV_LENGTHOF(partSizes
); ++psi
) {
1408 int32_t partSize
= partSizes
[psi
];
1410 if(!getSortKeyParts(s
, length
, parts
, 32, errorCode
)) {
1411 infoln(fileTestName
);
1412 errln("Collator(%s).internalNextSortKeyPart(%d) failed: %s",
1413 norm
, (int)partSize
, errorCode
.errorName());
1417 if(keyLength
!= parts
.length() || uprv_memcmp(keyBytes
, parts
.data(), keyLength
) != 0) {
1418 infoln(fileTestName
);
1419 errln("Collator(%s).getCollationKey() != internalNextSortKeyPart(%d)",
1420 norm
, (int)partSize
);
1422 infoln(printCollationKey(key
));
1423 infoln(printSortKey(reinterpret_cast<uint8_t *>(parts
.data()), parts
.length()));
1431 * Changes the key to the merged segments of the U+FFFE-separated substrings of s.
1432 * Leaves key unchanged if s does not contain U+FFFE.
1433 * @return TRUE if the key was successfully changed
1435 UBool
CollationTest::getMergedCollationKey(const UChar
*s
, int32_t length
,
1436 CollationKey
&key
, IcuTestErrorCode
&errorCode
) {
1437 if(errorCode
.isFailure()) { return FALSE
; }
1438 LocalMemory
<uint8_t> mergedKey
;
1439 int32_t mergedKeyLength
= 0;
1440 int32_t mergedKeyCapacity
= 0;
1441 int32_t sLength
= (length
>= 0) ? length
: u_strlen(s
);
1442 int32_t segmentStart
= 0;
1443 for(int32_t i
= 0;;) {
1445 if(segmentStart
== 0) {
1446 // s does not contain any U+FFFE.
1449 } else if(s
[i
] != 0xfffe) {
1453 // Get the sort key for another segment and merge it into mergedKey.
1454 CollationKey
key1(mergedKey
.getAlias(), mergedKeyLength
); // copies the bytes
1456 coll
->getCollationKey(s
+ segmentStart
, i
- segmentStart
, key2
, errorCode
);
1457 int32_t key1Length
, key2Length
;
1458 const uint8_t *key1Bytes
= key1
.getByteArray(key1Length
);
1459 const uint8_t *key2Bytes
= key2
.getByteArray(key2Length
);
1461 int32_t minCapacity
= key1Length
+ key2Length
;
1462 if(key1Length
> 0) { --minCapacity
; }
1463 if(minCapacity
<= mergedKeyCapacity
) {
1464 dest
= mergedKey
.getAlias();
1466 if(minCapacity
<= 200) {
1467 mergedKeyCapacity
= 200;
1468 } else if(minCapacity
<= 2 * mergedKeyCapacity
) {
1469 mergedKeyCapacity
*= 2;
1471 mergedKeyCapacity
= minCapacity
;
1473 dest
= mergedKey
.allocateInsteadAndReset(mergedKeyCapacity
);
1475 U_ASSERT(dest
!= NULL
|| mergedKeyCapacity
== 0);
1476 if(key1Length
== 0) {
1477 // key2 is the sort key for the first segment.
1478 uprv_memcpy(dest
, key2Bytes
, key2Length
);
1479 mergedKeyLength
= key2Length
;
1482 ucol_mergeSortkeys(key1Bytes
, key1Length
, key2Bytes
, key2Length
,
1483 dest
, mergedKeyCapacity
);
1485 if(i
== sLength
) { break; }
1488 key
= CollationKey(mergedKey
.getAlias(), mergedKeyLength
);
1495 * Replaces unpaired surrogates with U+FFFD.
1496 * Returns s if no replacement was made, otherwise buffer.
1498 const UnicodeString
&surrogatesToFFFD(const UnicodeString
&s
, UnicodeString
&buffer
) {
1500 while(i
< s
.length()) {
1501 UChar32 c
= s
.char32At(i
);
1502 if(U_IS_SURROGATE(c
)) {
1503 if(buffer
.length() < i
) {
1504 buffer
.append(s
, buffer
.length(), i
- buffer
.length());
1506 buffer
.append((UChar
)0xfffd);
1510 if(buffer
.isEmpty()) {
1513 if(buffer
.length() < i
) {
1514 buffer
.append(s
, buffer
.length(), i
- buffer
.length());
1519 int32_t getDifferenceLevel(const CollationKey
&prevKey
, const CollationKey
&key
,
1520 UCollationResult order
, UBool collHasCaseLevel
) {
1521 if(order
== UCOL_EQUAL
) {
1522 return Collation::NO_LEVEL
;
1524 int32_t prevKeyLength
;
1525 const uint8_t *prevBytes
= prevKey
.getByteArray(prevKeyLength
);
1527 const uint8_t *bytes
= key
.getByteArray(keyLength
);
1528 int32_t level
= Collation::PRIMARY_LEVEL
;
1529 for(int32_t i
= 0;; ++i
) {
1530 uint8_t b
= prevBytes
[i
];
1531 if(b
!= bytes
[i
]) { break; }
1532 if(b
== Collation::LEVEL_SEPARATOR_BYTE
) {
1534 if(level
== Collation::CASE_LEVEL
&& !collHasCaseLevel
) {
1544 UBool
CollationTest::checkCompareTwo(const char *norm
, const UnicodeString
&prevFileLine
,
1545 const UnicodeString
&prevString
, const UnicodeString
&s
,
1546 UCollationResult expectedOrder
, Collation::Level expectedLevel
,
1547 IcuTestErrorCode
&errorCode
) {
1548 if(errorCode
.isFailure()) { return FALSE
; }
1550 // Get the sort keys first, for error debug output.
1551 CollationKey prevKey
;
1552 if(!getCollationKey(norm
, prevFileLine
, prevString
.getBuffer(), prevString
.length(),
1553 prevKey
, errorCode
)) {
1557 if(!getCollationKey(norm
, fileLine
, s
.getBuffer(), s
.length(), key
, errorCode
)) { return FALSE
; }
1559 UCollationResult order
= coll
->compare(prevString
, s
, errorCode
);
1560 if(order
!= expectedOrder
|| errorCode
.isFailure()) {
1561 infoln(fileTestName
);
1562 errln("line %d Collator(%s).compare(previous, current) wrong order: %d != %d (%s)",
1563 (int)fileLineNumber
, norm
, order
, expectedOrder
, errorCode
.errorName());
1564 infoln(prevFileLine
);
1566 infoln(printCollationKey(prevKey
));
1567 infoln(printCollationKey(key
));
1570 order
= coll
->compare(s
, prevString
, errorCode
);
1571 if(order
!= -expectedOrder
|| errorCode
.isFailure()) {
1572 infoln(fileTestName
);
1573 errln("line %d Collator(%s).compare(current, previous) wrong order: %d != %d (%s)",
1574 (int)fileLineNumber
, norm
, order
, -expectedOrder
, errorCode
.errorName());
1575 infoln(prevFileLine
);
1577 infoln(printCollationKey(prevKey
));
1578 infoln(printCollationKey(key
));
1581 // Test NUL-termination if the strings do not contain NUL characters.
1582 UBool containNUL
= prevString
.indexOf((UChar
)0) >= 0 || s
.indexOf((UChar
)0) >= 0;
1584 order
= coll
->compare(prevString
.getBuffer(), -1, s
.getBuffer(), -1, errorCode
);
1585 if(order
!= expectedOrder
|| errorCode
.isFailure()) {
1586 infoln(fileTestName
);
1587 errln("line %d Collator(%s).compare(previous-NUL, current-NUL) wrong order: %d != %d (%s)",
1588 (int)fileLineNumber
, norm
, order
, expectedOrder
, errorCode
.errorName());
1589 infoln(prevFileLine
);
1591 infoln(printCollationKey(prevKey
));
1592 infoln(printCollationKey(key
));
1595 order
= coll
->compare(s
.getBuffer(), -1, prevString
.getBuffer(), -1, errorCode
);
1596 if(order
!= -expectedOrder
|| errorCode
.isFailure()) {
1597 infoln(fileTestName
);
1598 errln("line %d Collator(%s).compare(current-NUL, previous-NUL) wrong order: %d != %d (%s)",
1599 (int)fileLineNumber
, norm
, order
, -expectedOrder
, errorCode
.errorName());
1600 infoln(prevFileLine
);
1602 infoln(printCollationKey(prevKey
));
1603 infoln(printCollationKey(key
));
1608 // compare(UTF-16) treats unpaired surrogates like unassigned code points.
1609 // Unpaired surrogates cannot be converted to UTF-8.
1610 // Create valid UTF-16 strings if necessary, and use those for
1611 // both the expected compare() result and for the input to compare(UTF-8).
1612 UnicodeString prevBuffer
, sBuffer
;
1613 const UnicodeString
&prevValid
= surrogatesToFFFD(prevString
, prevBuffer
);
1614 const UnicodeString
&sValid
= surrogatesToFFFD(s
, sBuffer
);
1615 std::string prevUTF8
, sUTF8
;
1616 UnicodeString(prevValid
).toUTF8String(prevUTF8
);
1617 UnicodeString(sValid
).toUTF8String(sUTF8
);
1618 UCollationResult expectedUTF8Order
;
1619 if(&prevValid
== &prevString
&& &sValid
== &s
) {
1620 expectedUTF8Order
= expectedOrder
;
1622 expectedUTF8Order
= coll
->compare(prevValid
, sValid
, errorCode
);
1625 order
= coll
->compareUTF8(prevUTF8
, sUTF8
, errorCode
);
1626 if(order
!= expectedUTF8Order
|| errorCode
.isFailure()) {
1627 infoln(fileTestName
);
1628 errln("line %d Collator(%s).compareUTF8(previous, current) wrong order: %d != %d (%s)",
1629 (int)fileLineNumber
, norm
, order
, expectedUTF8Order
, errorCode
.errorName());
1630 infoln(prevFileLine
);
1632 infoln(printCollationKey(prevKey
));
1633 infoln(printCollationKey(key
));
1636 order
= coll
->compareUTF8(sUTF8
, prevUTF8
, errorCode
);
1637 if(order
!= -expectedUTF8Order
|| errorCode
.isFailure()) {
1638 infoln(fileTestName
);
1639 errln("line %d Collator(%s).compareUTF8(current, previous) wrong order: %d != %d (%s)",
1640 (int)fileLineNumber
, norm
, order
, -expectedUTF8Order
, errorCode
.errorName());
1641 infoln(prevFileLine
);
1643 infoln(printCollationKey(prevKey
));
1644 infoln(printCollationKey(key
));
1647 // Test NUL-termination if the strings do not contain NUL characters.
1649 order
= coll
->internalCompareUTF8(prevUTF8
.c_str(), -1, sUTF8
.c_str(), -1, errorCode
);
1650 if(order
!= expectedUTF8Order
|| errorCode
.isFailure()) {
1651 infoln(fileTestName
);
1652 errln("line %d Collator(%s).internalCompareUTF8(previous-NUL, current-NUL) wrong order: %d != %d (%s)",
1653 (int)fileLineNumber
, norm
, order
, expectedUTF8Order
, errorCode
.errorName());
1654 infoln(prevFileLine
);
1656 infoln(printCollationKey(prevKey
));
1657 infoln(printCollationKey(key
));
1660 order
= coll
->internalCompareUTF8(sUTF8
.c_str(), -1, prevUTF8
.c_str(), -1, errorCode
);
1661 if(order
!= -expectedUTF8Order
|| errorCode
.isFailure()) {
1662 infoln(fileTestName
);
1663 errln("line %d Collator(%s).internalCompareUTF8(current-NUL, previous-NUL) wrong order: %d != %d (%s)",
1664 (int)fileLineNumber
, norm
, order
, -expectedUTF8Order
, errorCode
.errorName());
1665 infoln(prevFileLine
);
1667 infoln(printCollationKey(prevKey
));
1668 infoln(printCollationKey(key
));
1673 UCharIterator leftIter
;
1674 UCharIterator rightIter
;
1675 uiter_setString(&leftIter
, prevString
.getBuffer(), prevString
.length());
1676 uiter_setString(&rightIter
, s
.getBuffer(), s
.length());
1677 order
= coll
->compare(leftIter
, rightIter
, errorCode
);
1678 if(order
!= expectedOrder
|| errorCode
.isFailure()) {
1679 infoln(fileTestName
);
1680 errln("line %d Collator(%s).compare(UCharIterator: previous, current) "
1681 "wrong order: %d != %d (%s)",
1682 (int)fileLineNumber
, norm
, order
, expectedOrder
, errorCode
.errorName());
1683 infoln(prevFileLine
);
1685 infoln(printCollationKey(prevKey
));
1686 infoln(printCollationKey(key
));
1690 order
= prevKey
.compareTo(key
, errorCode
);
1691 if(order
!= expectedOrder
|| errorCode
.isFailure()) {
1692 infoln(fileTestName
);
1693 errln("line %d Collator(%s).getCollationKey(previous, current).compareTo() wrong order: %d != %d (%s)",
1694 (int)fileLineNumber
, norm
, order
, expectedOrder
, errorCode
.errorName());
1695 infoln(prevFileLine
);
1697 infoln(printCollationKey(prevKey
));
1698 infoln(printCollationKey(key
));
1701 UBool collHasCaseLevel
= coll
->getAttribute(UCOL_CASE_LEVEL
, errorCode
) == UCOL_ON
;
1702 int32_t level
= getDifferenceLevel(prevKey
, key
, order
, collHasCaseLevel
);
1703 if(order
!= UCOL_EQUAL
&& expectedLevel
!= Collation::NO_LEVEL
) {
1704 if(level
!= expectedLevel
) {
1705 infoln(fileTestName
);
1706 errln("line %d Collator(%s).getCollationKey(previous, current).compareTo()=%d wrong level: %d != %d",
1707 (int)fileLineNumber
, norm
, order
, level
, expectedLevel
);
1708 infoln(prevFileLine
);
1710 infoln(printCollationKey(prevKey
));
1711 infoln(printCollationKey(key
));
1716 // If either string contains U+FFFE, then their sort keys must compare the same as
1717 // the merged sort keys of each string's between-FFFE segments.
1719 // It is not required that
1720 // sortkey(str1 + "\uFFFE" + str2) == mergeSortkeys(sortkey(str1), sortkey(str2))
1721 // only that those two methods yield the same order.
1723 // Use bit-wise OR so that getMergedCollationKey() is always called for both strings.
1724 if((getMergedCollationKey(prevString
.getBuffer(), prevString
.length(), prevKey
, errorCode
) |
1725 getMergedCollationKey(s
.getBuffer(), s
.length(), key
, errorCode
)) ||
1726 errorCode
.isFailure()) {
1727 order
= prevKey
.compareTo(key
, errorCode
);
1728 if(order
!= expectedOrder
|| errorCode
.isFailure()) {
1729 infoln(fileTestName
);
1730 errln("line %d ucol_mergeSortkeys(Collator(%s).getCollationKey"
1731 "(previous, current segments between U+FFFE)).compareTo() wrong order: %d != %d (%s)",
1732 (int)fileLineNumber
, norm
, order
, expectedOrder
, errorCode
.errorName());
1733 infoln(prevFileLine
);
1735 infoln(printCollationKey(prevKey
));
1736 infoln(printCollationKey(key
));
1739 int32_t mergedLevel
= getDifferenceLevel(prevKey
, key
, order
, collHasCaseLevel
);
1740 if(order
!= UCOL_EQUAL
&& expectedLevel
!= Collation::NO_LEVEL
) {
1741 if(mergedLevel
!= level
) {
1742 infoln(fileTestName
);
1743 errln("line %d ucol_mergeSortkeys(Collator(%s).getCollationKey"
1744 "(previous, current segments between U+FFFE)).compareTo()=%d wrong level: %d != %d",
1745 (int)fileLineNumber
, norm
, order
, mergedLevel
, level
);
1746 infoln(prevFileLine
);
1748 infoln(printCollationKey(prevKey
));
1749 infoln(printCollationKey(key
));
1757 void CollationTest::checkCompareStrings(UCHARBUF
*f
, IcuTestErrorCode
&errorCode
) {
1758 if(errorCode
.isFailure()) { return; }
1759 UnicodeString prevFileLine
= UNICODE_STRING("(none)", 6);
1760 UnicodeString prevString
, s
;
1761 prevString
.getTerminatedBuffer(); // Ensure NUL-termination.
1762 while(readNonEmptyLine(f
, errorCode
) && !isSectionStarter(fileLine
[0])) {
1763 // Parse the line even if it will be ignored (when we do not have a Collator)
1764 // in order to report syntax issues.
1765 Collation::Level relation
= parseRelationAndString(s
, errorCode
);
1766 if(errorCode
.isFailure()) {
1771 // We were unable to create the Collator but continue with tests.
1772 // Ignore test data for this Collator.
1773 // The next Collator creation might work.
1776 UCollationResult expectedOrder
= (relation
== Collation::ZERO_LEVEL
) ? UCOL_EQUAL
: UCOL_LESS
;
1777 Collation::Level expectedLevel
= relation
;
1778 s
.getTerminatedBuffer(); // Ensure NUL-termination.
1780 if(!needsNormalization(prevString
, errorCode
) && !needsNormalization(s
, errorCode
)) {
1781 coll
->setAttribute(UCOL_NORMALIZATION_MODE
, UCOL_OFF
, errorCode
);
1782 isOk
= checkCompareTwo("normalization=on", prevFileLine
, prevString
, s
,
1783 expectedOrder
, expectedLevel
, errorCode
);
1786 coll
->setAttribute(UCOL_NORMALIZATION_MODE
, UCOL_ON
, errorCode
);
1787 isOk
= checkCompareTwo("normalization=off", prevFileLine
, prevString
, s
,
1788 expectedOrder
, expectedLevel
, errorCode
);
1790 if(isOk
&& (!nfd
->isNormalized(prevString
, errorCode
) || !nfd
->isNormalized(s
, errorCode
))) {
1791 UnicodeString pn
= nfd
->normalize(prevString
, errorCode
);
1792 UnicodeString n
= nfd
->normalize(s
, errorCode
);
1793 pn
.getTerminatedBuffer();
1794 n
.getTerminatedBuffer();
1795 errorCode
.assertSuccess();
1796 isOk
= checkCompareTwo("NFD input", prevFileLine
, pn
, n
,
1797 expectedOrder
, expectedLevel
, errorCode
);
1800 errorCode
.reset(); // already reported
1802 prevFileLine
= fileLine
;
1804 prevString
.getTerminatedBuffer(); // Ensure NUL-termination.
1808 void CollationTest::TestDataDriven() {
1809 IcuTestErrorCode
errorCode(*this, "TestDataDriven");
1811 fcd
= Normalizer2Factory::getFCDInstance(errorCode
);
1812 nfd
= Normalizer2::getNFDInstance(errorCode
);
1813 if(errorCode
.logDataIfFailureAndReset("Normalizer2Factory::getFCDInstance() or getNFDInstance()")) {
1817 CharString
path(getSourceTestData(errorCode
), errorCode
);
1818 path
.appendPathPart("collationtest.txt", errorCode
);
1819 const char *codePage
= "UTF-8";
1820 LocalUCHARBUFPointer
f(ucbuf_open(path
.data(), &codePage
, TRUE
, FALSE
, errorCode
));
1821 if(errorCode
.logIfFailureAndReset("ucbuf_open(collationtest.txt)")) {
1824 // Read a new line if necessary.
1825 // Sub-parsers leave the first line set that they do not handle.
1826 while(errorCode
.isSuccess() && (!fileLine
.isEmpty() || readNonEmptyLine(f
.getAlias(), errorCode
))) {
1827 if(!isSectionStarter(fileLine
[0])) {
1828 errln("syntax error on line %d", (int)fileLineNumber
);
1832 if(fileLine
.startsWith(UNICODE_STRING("** test: ", 9))) {
1833 fileTestName
= fileLine
;
1836 } else if(fileLine
== UNICODE_STRING("@ root", 6)) {
1837 setRootCollator(errorCode
);
1839 } else if(fileLine
.startsWith(UNICODE_STRING("@ locale ", 9))) {
1840 setLocaleCollator(errorCode
);
1842 } else if(fileLine
== UNICODE_STRING("@ rules", 7)) {
1843 buildTailoring(f
.getAlias(), errorCode
);
1844 } else if(fileLine
[0] == 0x25 && isSpace(fileLine
[1])) { // %
1845 parseAndSetAttribute(errorCode
);
1846 } else if(fileLine
== UNICODE_STRING("* compare", 9)) {
1847 checkCompareStrings(f
.getAlias(), errorCode
);
1849 errln("syntax error on line %d", (int)fileLineNumber
);
1856 #endif // !UCONFIG_NO_COLLATION