2 *******************************************************************************
3 * Copyright (C) 2012-2015, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
8 * created on: 2012apr27
9 * created by: Markus W. Scherer
12 #include "unicode/utypes.h"
14 #if !UCONFIG_NO_COLLATION
16 #include "unicode/coll.h"
17 #include "unicode/errorcode.h"
18 #include "unicode/localpointer.h"
19 #include "unicode/normalizer2.h"
20 #include "unicode/sortkey.h"
21 #include "unicode/std_string.h"
22 #include "unicode/strenum.h"
23 #include "unicode/tblcoll.h"
24 #include "unicode/uiter.h"
25 #include "unicode/uniset.h"
26 #include "unicode/unistr.h"
27 #include "unicode/usetiter.h"
28 #include "unicode/ustring.h"
31 #include "collation.h"
32 #include "collationdata.h"
33 #include "collationfcd.h"
34 #include "collationiterator.h"
35 #include "collationroot.h"
36 #include "collationrootelements.h"
37 #include "collationruleparser.h"
38 #include "collationweights.h"
41 #include "normalizer2impl.h"
44 #include "uitercollationiterator.h"
45 #include "utf16collationiterator.h"
46 #include "utf8collationiterator.h"
51 class CodePointIterator
;
53 // TODO: try to share code with IntlTestCollator; for example, prettify(CollationKey)
55 class CollationTest
: public IntlTest
{
58 : fcd(NULL
), nfd(NULL
),
66 void runIndexedTest(int32_t index
, UBool exec
, const char *&name
, char *par
=NULL
);
70 void TestNulTerminated();
71 void TestIllegalUTF8();
72 void TestShortFCDData();
74 void TestCollationWeights();
75 void TestRootElements();
76 void TestTailoredElements();
77 void TestDataDriven();
80 void checkFCD(const char *name
, CollationIterator
&ci
, CodePointIterator
&cpi
);
81 void checkAllocWeights(CollationWeights
&cw
,
82 uint32_t lowerLimit
, uint32_t upperLimit
, int32_t n
,
83 int32_t someLength
, int32_t minCount
);
85 static UnicodeString
printSortKey(const uint8_t *p
, int32_t length
);
86 static UnicodeString
printCollationKey(const CollationKey
&key
);
88 // Helpers & fields for data-driven test.
89 static UBool
isCROrLF(UChar c
) { return c
== 0xa || c
== 0xd; }
90 static UBool
isSpace(UChar c
) { return c
== 9 || c
== 0x20 || c
== 0x3000; }
91 static UBool
isSectionStarter(UChar c
) { return c
== 0x25 || c
== 0x2a || c
== 0x40; } // %*@
92 int32_t skipSpaces(int32_t i
) {
93 while(isSpace(fileLine
[i
])) { ++i
; }
97 UBool
readNonEmptyLine(UCHARBUF
*f
, IcuTestErrorCode
&errorCode
);
98 void parseString(int32_t &start
, UnicodeString
&prefix
, UnicodeString
&s
, UErrorCode
&errorCode
);
99 Collation::Level
parseRelationAndString(UnicodeString
&s
, IcuTestErrorCode
&errorCode
);
100 void parseAndSetAttribute(IcuTestErrorCode
&errorCode
);
101 void parseAndSetReorderCodes(int32_t start
, IcuTestErrorCode
&errorCode
);
102 void buildTailoring(UCHARBUF
*f
, IcuTestErrorCode
&errorCode
);
103 void setRootCollator(IcuTestErrorCode
&errorCode
);
104 void setLocaleCollator(IcuTestErrorCode
&errorCode
);
106 UBool
needsNormalization(const UnicodeString
&s
, UErrorCode
&errorCode
) const;
108 UBool
getSortKeyParts(const UChar
*s
, int32_t length
,
109 CharString
&dest
, int32_t partSize
,
110 IcuTestErrorCode
&errorCode
);
111 UBool
getCollationKey(const char *norm
, const UnicodeString
&line
,
112 const UChar
*s
, int32_t length
,
113 CollationKey
&key
, IcuTestErrorCode
&errorCode
);
114 UBool
getMergedCollationKey(const UChar
*s
, int32_t length
,
115 CollationKey
&key
, IcuTestErrorCode
&errorCode
);
116 UBool
checkCompareTwo(const char *norm
, const UnicodeString
&prevFileLine
,
117 const UnicodeString
&prevString
, const UnicodeString
&s
,
118 UCollationResult expectedOrder
, Collation::Level expectedLevel
,
119 IcuTestErrorCode
&errorCode
);
120 void checkCompareStrings(UCHARBUF
*f
, IcuTestErrorCode
&errorCode
);
122 const Normalizer2
*fcd
, *nfd
;
123 UnicodeString fileLine
;
124 int32_t fileLineNumber
;
125 UnicodeString fileTestName
;
129 extern IntlTest
*createCollationTest() {
130 return new CollationTest();
133 void CollationTest::runIndexedTest(int32_t index
, UBool exec
, const char *&name
, char * /*par*/) {
135 logln("TestSuite CollationTest: ");
138 TESTCASE_AUTO(TestMinMax
);
139 TESTCASE_AUTO(TestImplicits
);
140 TESTCASE_AUTO(TestNulTerminated
);
141 TESTCASE_AUTO(TestIllegalUTF8
);
142 TESTCASE_AUTO(TestShortFCDData
);
143 TESTCASE_AUTO(TestFCD
);
144 TESTCASE_AUTO(TestCollationWeights
);
145 TESTCASE_AUTO(TestRootElements
);
146 TESTCASE_AUTO(TestTailoredElements
);
147 TESTCASE_AUTO(TestDataDriven
);
151 void CollationTest::TestMinMax() {
152 IcuTestErrorCode
errorCode(*this, "TestMinMax");
154 setRootCollator(errorCode
);
155 if(errorCode
.isFailure()) {
159 RuleBasedCollator
*rbc
= dynamic_cast<RuleBasedCollator
*>(coll
);
161 errln("the root collator is not a RuleBasedCollator");
165 static const UChar s
[2] = { 0xfffe, 0xffff };
166 UVector64
ces(errorCode
);
167 rbc
->internalGetCEs(UnicodeString(FALSE
, s
, 2), ces
, errorCode
);
168 errorCode
.assertSuccess();
169 if(ces
.size() != 2) {
170 errln("expected 2 CEs for <FFFE, FFFF>, got %d", (int)ces
.size());
173 int64_t ce
= ces
.elementAti(0);
174 int64_t expected
= Collation::makeCE(Collation::MERGE_SEPARATOR_PRIMARY
);
176 errln("CE(U+fffe)=%04lx != 02..", (long)ce
);
179 ce
= ces
.elementAti(1);
180 expected
= Collation::makeCE(Collation::MAX_PRIMARY
);
182 errln("CE(U+ffff)=%04lx != max..", (long)ce
);
186 void CollationTest::TestImplicits() {
187 IcuTestErrorCode
errorCode(*this, "TestImplicits");
189 const CollationData
*cd
= CollationRoot::getData(errorCode
);
190 if(errorCode
.logDataIfFailureAndReset("CollationRoot::getData()")) {
194 // Implicit primary weights should be assigned for the following sets,
195 // and sort in ascending order by set and then code point.
196 // See http://www.unicode.org/reports/tr10/#Implicit_Weights
198 // core Han Unified Ideographs
199 UnicodeSet
coreHan("[\\p{unified_ideograph}&"
200 "[\\p{Block=CJK_Unified_Ideographs}"
201 "\\p{Block=CJK_Compatibility_Ideographs}]]",
203 // all other Unified Han ideographs
204 UnicodeSet
otherHan("[\\p{unified ideograph}-"
205 "[\\p{Block=CJK_Unified_Ideographs}"
206 "\\p{Block=CJK_Compatibility_Ideographs}]]",
208 UnicodeSet
unassigned("[[:Cn:][:Cs:][:Co:]]", errorCode
);
209 unassigned
.remove(0xfffe, 0xffff); // These have special CLDR root mappings.
211 // Starting with CLDR 26/ICU 54, the root Han order may instead be
212 // the Unihan radical-stroke order.
213 // The tests should pass either way, so we only test the order of a small set of Han characters
214 // whose radical-stroke order is the same as their code point order.
215 UnicodeSet
someHanInCPOrder(
216 "[\\u4E00-\\u4E16\\u4E18-\\u4E2B\\u4E2D-\\u4E3C\\u4E3E-\\u4E48"
217 "\\u4E4A-\\u4E60\\u4E63-\\u4E8F\\u4E91-\\u4F63\\u4F65-\\u50F1\\u50F3-\\u50F6]",
219 UnicodeSet
inOrder(someHanInCPOrder
);
220 inOrder
.addAll(unassigned
).freeze();
221 if(errorCode
.logIfFailureAndReset("UnicodeSet")) {
224 const UnicodeSet
*sets
[] = { &coreHan
, &otherHan
, &unassigned
};
226 uint32_t prevPrimary
= 0;
227 UTF16CollationIterator
ci(cd
, FALSE
, NULL
, NULL
, NULL
);
228 for(int32_t i
= 0; i
< UPRV_LENGTHOF(sets
); ++i
) {
229 LocalPointer
<UnicodeSetIterator
> iter(new UnicodeSetIterator(*sets
[i
]));
230 while(iter
->next()) {
231 UChar32 c
= iter
->getCodepoint();
233 ci
.setText(s
.getBuffer(), s
.getBuffer() + s
.length());
234 int64_t ce
= ci
.nextCE(errorCode
);
235 int64_t ce2
= ci
.nextCE(errorCode
);
236 if(errorCode
.logIfFailureAndReset("CollationIterator.nextCE()")) {
239 if(ce
== Collation::NO_CE
|| ce2
!= Collation::NO_CE
) {
240 errln("CollationIterator.nextCE(U+%04lx) did not yield exactly one CE", (long)c
);
243 if((ce
& 0xffffffff) != Collation::COMMON_SEC_AND_TER_CE
) {
244 errln("CollationIterator.nextCE(U+%04lx) has non-common sec/ter weights: %08lx",
245 (long)c
, (long)(ce
& 0xffffffff));
248 uint32_t primary
= (uint32_t)(ce
>> 32);
249 if(!(primary
> prevPrimary
) && inOrder
.contains(c
) && inOrder
.contains(prev
)) {
250 errln("CE(U+%04lx)=%04lx.. not greater than CE(U+%04lx)=%04lx..",
251 (long)c
, (long)primary
, (long)prev
, (long)prevPrimary
);
254 prevPrimary
= primary
;
259 void CollationTest::TestNulTerminated() {
260 IcuTestErrorCode
errorCode(*this, "TestNulTerminated");
261 const CollationData
*data
= CollationRoot::getData(errorCode
);
262 if(errorCode
.logDataIfFailureAndReset("CollationRoot::getData()")) {
266 static const UChar s
[] = { 0x61, 0x62, 0x61, 0x62, 0 };
268 UTF16CollationIterator
ci1(data
, FALSE
, s
, s
, s
+ 2);
269 UTF16CollationIterator
ci2(data
, FALSE
, s
+ 2, s
+ 2, NULL
);
270 for(int32_t i
= 0;; ++i
) {
271 int64_t ce1
= ci1
.nextCE(errorCode
);
272 int64_t ce2
= ci2
.nextCE(errorCode
);
273 if(errorCode
.logIfFailureAndReset("CollationIterator.nextCE()")) {
277 errln("CollationIterator.nextCE(with length) != nextCE(NUL-terminated) at CE %d", (int)i
);
280 if(ce1
== Collation::NO_CE
) { break; }
284 void CollationTest::TestIllegalUTF8() {
285 IcuTestErrorCode
errorCode(*this, "TestIllegalUTF8");
287 setRootCollator(errorCode
);
288 if(errorCode
.isFailure()) {
292 coll
->setAttribute(UCOL_STRENGTH
, UCOL_IDENTICAL
, errorCode
);
294 static const char *strings
[] = {
297 // illegal byte sequences
298 "a\x80z", // trail byte
299 "a\xc1\x81z", // non-shortest form
300 "a\xe0\x82\x83z", // non-shortest form
301 "a\xed\xa0\x80z", // lead surrogate: would be U+D800
302 "a\xed\xbf\xbfz", // trail surrogate: would be U+DFFF
303 "a\xf0\x8f\xbf\xbfz", // non-shortest form
304 "a\xf4\x90\x80\x80z" // out of range: would be U+110000
307 StringPiece
fffd(strings
[0]);
308 for(int32_t i
= 1; i
< UPRV_LENGTHOF(strings
); ++i
) {
309 StringPiece
illegal(strings
[i
]);
310 UCollationResult order
= coll
->compareUTF8(fffd
, illegal
, errorCode
);
311 if(order
!= UCOL_EQUAL
) {
312 errln("compareUTF8(U+FFFD, string %d with illegal UTF-8)=%d != UCOL_EQUAL",
320 void addLeadSurrogatesForSupplementary(const UnicodeSet
&src
, UnicodeSet
&dest
) {
321 for(UChar32 c
= 0x10000; c
< 0x110000;) {
322 UChar32 next
= c
+ 0x400;
323 if(src
.containsSome(c
, next
- 1)) {
324 dest
.add(U16_LEAD(c
));
332 void CollationTest::TestShortFCDData() {
333 // See CollationFCD class comments.
334 IcuTestErrorCode
errorCode(*this, "TestShortFCDData");
335 UnicodeSet
expectedLccc("[:^lccc=0:]", errorCode
);
336 errorCode
.assertSuccess();
337 expectedLccc
.add(0xdc00, 0xdfff); // add all trail surrogates
338 addLeadSurrogatesForSupplementary(expectedLccc
, expectedLccc
);
339 UnicodeSet lccc
; // actual
340 for(UChar32 c
= 0; c
<= 0xffff; ++c
) {
341 if(CollationFCD::hasLccc(c
)) { lccc
.add(c
); }
343 UnicodeSet
diff(expectedLccc
);
344 diff
.removeAll(lccc
);
345 diff
.remove(0x10000, 0x10ffff); // hasLccc() only works for the BMP
346 UnicodeString
empty("[]");
347 UnicodeString diffString
;
348 diff
.toPattern(diffString
, TRUE
);
349 assertEquals("CollationFCD::hasLccc() expected-actual", empty
, diffString
);
351 diff
.removeAll(expectedLccc
);
352 diff
.toPattern(diffString
, TRUE
);
353 assertEquals("CollationFCD::hasLccc() actual-expected", empty
, diffString
, TRUE
);
355 UnicodeSet
expectedTccc("[:^tccc=0:]", errorCode
);
356 if (errorCode
.isSuccess()) {
357 addLeadSurrogatesForSupplementary(expectedLccc
, expectedTccc
);
358 addLeadSurrogatesForSupplementary(expectedTccc
, expectedTccc
);
359 UnicodeSet tccc
; // actual
360 for(UChar32 c
= 0; c
<= 0xffff; ++c
) {
361 if(CollationFCD::hasTccc(c
)) { tccc
.add(c
); }
364 diff
.removeAll(tccc
);
365 diff
.remove(0x10000, 0x10ffff); // hasTccc() only works for the BMP
366 assertEquals("CollationFCD::hasTccc() expected-actual", empty
, diffString
);
368 diff
.removeAll(expectedTccc
);
369 diff
.toPattern(diffString
, TRUE
);
370 assertEquals("CollationFCD::hasTccc() actual-expected", empty
, diffString
);
374 class CodePointIterator
{
376 CodePointIterator(const UChar32
*cp
, int32_t length
) : cp(cp
), length(length
), pos(0) {}
377 void resetToStart() { pos
= 0; }
378 UChar32
next() { return (pos
< length
) ? cp
[pos
++] : U_SENTINEL
; }
379 UChar32
previous() { return (pos
> 0) ? cp
[--pos
] : U_SENTINEL
; }
380 int32_t getLength() const { return length
; }
381 int getIndex() const { return (int)pos
; }
388 void CollationTest::checkFCD(const char *name
,
389 CollationIterator
&ci
, CodePointIterator
&cpi
) {
390 IcuTestErrorCode
errorCode(*this, "checkFCD");
392 // Iterate forward to the limit.
394 UChar32 c1
= ci
.nextCodePoint(errorCode
);
395 UChar32 c2
= cpi
.next();
397 errln("%s.nextCodePoint(to limit, 1st pass) = U+%04lx != U+%04lx at %d",
398 name
, (long)c1
, (long)c2
, cpi
.getIndex());
401 if(c1
< 0) { break; }
404 // Iterate backward most of the way.
405 for(int32_t n
= (cpi
.getLength() * 2) / 3; n
> 0; --n
) {
406 UChar32 c1
= ci
.previousCodePoint(errorCode
);
407 UChar32 c2
= cpi
.previous();
409 errln("%s.previousCodePoint() = U+%04lx != U+%04lx at %d",
410 name
, (long)c1
, (long)c2
, cpi
.getIndex());
417 UChar32 c1
= ci
.nextCodePoint(errorCode
);
418 UChar32 c2
= cpi
.next();
420 errln("%s.nextCodePoint(to limit again) = U+%04lx != U+%04lx at %d",
421 name
, (long)c1
, (long)c2
, cpi
.getIndex());
424 if(c1
< 0) { break; }
427 // Iterate backward to the start.
429 UChar32 c1
= ci
.previousCodePoint(errorCode
);
430 UChar32 c2
= cpi
.previous();
432 errln("%s.previousCodePoint(to start) = U+%04lx != U+%04lx at %d",
433 name
, (long)c1
, (long)c2
, cpi
.getIndex());
436 if(c1
< 0) { break; }
440 void CollationTest::TestFCD() {
441 IcuTestErrorCode
errorCode(*this, "TestFCD");
442 const CollationData
*data
= CollationRoot::getData(errorCode
);
443 if(errorCode
.logDataIfFailureAndReset("CollationRoot::getData()")) {
447 // Input string, not FCD, NUL-terminated.
448 static const UChar s
[] = {
449 0x308, 0xe1, 0x62, 0x301, 0x327, 0x430, 0x62,
450 U16_LEAD(0x1D15F), U16_TRAIL(0x1D15F), // MUSICAL SYMBOL QUARTER NOTE=1D158 1D165, ccc=0, 216
451 0x327, 0x308, // ccc=202, 230
452 U16_LEAD(0x1D16D), U16_TRAIL(0x1D16D), // MUSICAL SYMBOL COMBINING AUGMENTATION DOT, ccc=226
453 U16_LEAD(0x1D15F), U16_TRAIL(0x1D15F),
454 U16_LEAD(0x1D16D), U16_TRAIL(0x1D16D),
456 0xe7, // Character with tccc!=0 decomposed together with mis-ordered sequence.
457 U16_LEAD(0x1D16D), U16_TRAIL(0x1D16D), U16_LEAD(0x1D165), U16_TRAIL(0x1D165),
458 0xe1, // Character with tccc!=0 decomposed together with decomposed sequence.
459 0xf73, 0xf75, // Tibetan composite vowels must be decomposed.
463 // Expected code points.
464 static const UChar32 cp
[] = {
465 0x308, 0xe1, 0x62, 0x327, 0x301, 0x430, 0x62,
466 0x1D158, 0x327, 0x1D165, 0x1D16D, 0x308,
469 0x63, 0x327, 0x1D165, 0x1D16D,
471 0xf71, 0xf71, 0xf72, 0xf74, 0x301,
475 FCDUTF16CollationIterator
u16ci(data
, FALSE
, s
, s
, NULL
);
476 if(errorCode
.logIfFailureAndReset("FCDUTF16CollationIterator constructor")) {
479 CodePointIterator
cpi(cp
, UPRV_LENGTHOF(cp
));
480 checkFCD("FCDUTF16CollationIterator", u16ci
, cpi
);
482 #if U_HAVE_STD_STRING
485 UnicodeString(s
).toUTF8String(utf8
);
486 FCDUTF8CollationIterator
u8ci(data
, FALSE
,
487 reinterpret_cast<const uint8_t *>(utf8
.c_str()), 0, -1);
488 if(errorCode
.logIfFailureAndReset("FCDUTF8CollationIterator constructor")) {
491 checkFCD("FCDUTF8CollationIterator", u8ci
, cpi
);
496 uiter_setString(&iter
, s
, UPRV_LENGTHOF(s
) - 1); // -1: without the terminating NUL
497 FCDUIterCollationIterator
uici(data
, FALSE
, iter
, 0);
498 if(errorCode
.logIfFailureAndReset("FCDUIterCollationIterator constructor")) {
501 checkFCD("FCDUIterCollationIterator", uici
, cpi
);
504 void CollationTest::checkAllocWeights(CollationWeights
&cw
,
505 uint32_t lowerLimit
, uint32_t upperLimit
, int32_t n
,
506 int32_t someLength
, int32_t minCount
) {
507 if(!cw
.allocWeights(lowerLimit
, upperLimit
, n
)) {
508 errln("CollationWeights::allocWeights(%lx, %lx, %ld) = FALSE",
509 (long)lowerLimit
, (long)upperLimit
, (long)n
);
512 uint32_t previous
= lowerLimit
;
513 int32_t count
= 0; // number of weights that have someLength
514 for(int32_t i
= 0; i
< n
; ++i
) {
515 uint32_t w
= cw
.nextWeight();
516 if(w
== 0xffffffff) {
517 errln("CollationWeights::allocWeights(%lx, %lx, %ld).nextWeight() "
518 "returns only %ld weights",
519 (long)lowerLimit
, (long)upperLimit
, (long)n
, (long)i
);
522 if(!(previous
< w
&& w
< upperLimit
)) {
523 errln("CollationWeights::allocWeights(%lx, %lx, %ld).nextWeight() "
524 "number %ld -> %lx not between %lx and %lx",
525 (long)lowerLimit
, (long)upperLimit
, (long)n
,
526 (long)(i
+ 1), (long)w
, (long)previous
, (long)upperLimit
);
529 if(CollationWeights::lengthOfWeight(w
) == someLength
) { ++count
; }
531 if(count
< minCount
) {
532 errln("CollationWeights::allocWeights(%lx, %lx, %ld).nextWeight() "
533 "returns only %ld < %ld weights of length %d",
534 (long)lowerLimit
, (long)upperLimit
, (long)n
,
535 (long)count
, (long)minCount
, (int)someLength
);
539 void CollationTest::TestCollationWeights() {
542 // Non-compressible primaries use 254 second bytes 02..FF.
543 logln("CollationWeights.initForPrimary(non-compressible)");
544 cw
.initForPrimary(FALSE
);
545 // Expect 1 weight 11 and 254 weights 12xx.
546 checkAllocWeights(cw
, 0x10000000, 0x13000000, 255, 1, 1);
547 checkAllocWeights(cw
, 0x10000000, 0x13000000, 255, 2, 254);
548 // Expect 255 two-byte weights from the ranges 10ff, 11xx, 1202.
549 checkAllocWeights(cw
, 0x10fefe40, 0x12030300, 260, 2, 255);
550 // Expect 254 two-byte weights from the ranges 10ff and 11xx.
551 checkAllocWeights(cw
, 0x10fefe40, 0x12030300, 600, 2, 254);
552 // Expect 254^2=64516 three-byte weights.
553 // During computation, there should be 3 three-byte ranges
554 // 10ffff, 11xxxx, 120202.
555 // The middle one should be split 64515:1,
556 // and the newly-split-off range and the last ranged lengthened.
557 checkAllocWeights(cw
, 0x10fffe00, 0x12020300, 1 + 64516 + 254 + 1, 3, 64516);
558 // Expect weights 1102 & 1103.
559 checkAllocWeights(cw
, 0x10ff0000, 0x11040000, 2, 2, 2);
560 // Expect weights 102102 & 102103.
561 checkAllocWeights(cw
, 0x1020ff00, 0x10210400, 2, 3, 2);
563 // Compressible primaries use 251 second bytes 04..FE.
564 logln("CollationWeights.initForPrimary(compressible)");
565 cw
.initForPrimary(TRUE
);
566 // Expect 1 weight 11 and 251 weights 12xx.
567 checkAllocWeights(cw
, 0x10000000, 0x13000000, 252, 1, 1);
568 checkAllocWeights(cw
, 0x10000000, 0x13000000, 252, 2, 251);
569 // Expect 252 two-byte weights from the ranges 10fe, 11xx, 1204.
570 checkAllocWeights(cw
, 0x10fdfe40, 0x12050300, 260, 2, 252);
571 // Expect weights 1104 & 1105.
572 checkAllocWeights(cw
, 0x10fe0000, 0x11060000, 2, 2, 2);
573 // Expect weights 102102 & 102103.
574 checkAllocWeights(cw
, 0x1020ff00, 0x10210400, 2, 3, 2);
576 // Secondary and tertiary weights use only bytes 3 & 4.
577 logln("CollationWeights.initForSecondary()");
578 cw
.initForSecondary();
579 // Expect weights fbxx and all four fc..ff.
580 checkAllocWeights(cw
, 0xfb20, 0x10000, 20, 3, 4);
582 logln("CollationWeights.initForTertiary()");
583 cw
.initForTertiary();
584 // Expect weights 3dxx and both 3e & 3f.
585 checkAllocWeights(cw
, 0x3d02, 0x4000, 10, 3, 2);
590 UBool
isValidCE(const CollationRootElements
&re
, const CollationData
&data
,
591 uint32_t p
, uint32_t s
, uint32_t ctq
) {
592 uint32_t p1
= p
>> 24;
593 uint32_t p2
= (p
>> 16) & 0xff;
594 uint32_t p3
= (p
>> 8) & 0xff;
595 uint32_t p4
= p
& 0xff;
596 uint32_t s1
= s
>> 8;
597 uint32_t s2
= s
& 0xff;
598 // ctq = Case, Tertiary, Quaternary
599 uint32_t c
= (ctq
& Collation::CASE_MASK
) >> 14;
600 uint32_t t
= ctq
& Collation::ONLY_TERTIARY_MASK
;
601 uint32_t t1
= t
>> 8;
602 uint32_t t2
= t
& 0xff;
603 uint32_t q
= ctq
& Collation::QUATERNARY_MASK
;
604 // No leading zero bytes.
605 if((p
!= 0 && p1
== 0) || (s
!= 0 && s1
== 0) || (t
!= 0 && t1
== 0)) {
608 // No intermediate zero bytes.
609 if(p1
!= 0 && p2
== 0 && (p
& 0xffff) != 0) {
612 if(p2
!= 0 && p3
== 0 && p4
!= 0) {
615 // Minimum & maximum lead bytes.
616 if((p1
!= 0 && p1
<= Collation::MERGE_SEPARATOR_BYTE
) ||
617 s1
== Collation::LEVEL_SEPARATOR_BYTE
||
618 t1
== Collation::LEVEL_SEPARATOR_BYTE
|| t1
> 0x3f) {
624 // The valid byte range for the second primary byte depends on compressibility.
626 if(data
.isCompressibleLeadByte(p1
)) {
627 if(p2
<= Collation::PRIMARY_COMPRESSION_LOW_BYTE
||
628 Collation::PRIMARY_COMPRESSION_HIGH_BYTE
<= p2
) {
632 if(p2
<= Collation::LEVEL_SEPARATOR_BYTE
) {
637 // Other bytes just need to avoid the level separator.
638 // Trailing zeros are ok.
639 U_ASSERT(Collation::LEVEL_SEPARATOR_BYTE
== 1);
640 if(p3
== Collation::LEVEL_SEPARATOR_BYTE
|| p4
== Collation::LEVEL_SEPARATOR_BYTE
||
641 s2
== Collation::LEVEL_SEPARATOR_BYTE
|| t2
== Collation::LEVEL_SEPARATOR_BYTE
) {
648 // Completely ignorable CE.
649 // Quaternary CEs are not supported.
650 if(c
!= 0 || q
!= 0) {
655 if(t
< re
.getTertiaryBoundary() || c
!= 2) {
661 if(s
< re
.getSecondaryBoundary() || t
== 0 || t
>= re
.getTertiaryBoundary()) {
667 if(s
== 0 || (Collation::COMMON_WEIGHT16
< s
&& s
<= re
.getLastCommonSecondary()) ||
668 s
>= re
.getSecondaryBoundary()) {
671 if(t
== 0 || t
>= re
.getTertiaryBoundary()) {
678 UBool
isValidCE(const CollationRootElements
&re
, const CollationData
&data
, int64_t ce
) {
679 uint32_t p
= (uint32_t)(ce
>> 32);
680 uint32_t secTer
= (uint32_t)ce
;
681 return isValidCE(re
, data
, p
, secTer
>> 16, secTer
& 0xffff);
684 class RootElementsIterator
{
686 RootElementsIterator(const CollationData
&root
)
688 elements(root
.rootElements
), length(root
.rootElementsLength
),
690 index((int32_t)elements
[CollationRootElements::IX_FIRST_TERTIARY_INDEX
]) {}
693 if(index
>= length
) { return FALSE
; }
694 uint32_t p
= elements
[index
];
695 if(p
== CollationRootElements::PRIMARY_SENTINEL
) { return FALSE
; }
696 if((p
& CollationRootElements::SEC_TER_DELTA_FLAG
) != 0) {
698 secTer
= p
& ~CollationRootElements::SEC_TER_DELTA_FLAG
;
701 if((p
& CollationRootElements::PRIMARY_STEP_MASK
) != 0) {
702 // End of a range, enumerate the primaries in the range.
703 int32_t step
= (int32_t)p
& CollationRootElements::PRIMARY_STEP_MASK
;
706 // Finished the range, return the next CE after it.
711 // Return the next primary in this range.
712 UBool isCompressible
= data
.isCompressiblePrimary(pri
);
713 if((pri
& 0xffff) == 0) {
714 pri
= Collation::incTwoBytePrimaryByOffset(pri
, isCompressible
, step
);
716 pri
= Collation::incThreeBytePrimaryByOffset(pri
, isCompressible
, step
);
720 // Simple primary CE.
723 // Does this have an explicit below-common sec/ter unit,
724 // or does it imply a common one?
725 if(index
== length
) {
726 secTer
= Collation::COMMON_SEC_AND_TER_CE
;
728 secTer
= elements
[index
];
729 if((secTer
& CollationRootElements::SEC_TER_DELTA_FLAG
) == 0) {
731 secTer
= Collation::COMMON_SEC_AND_TER_CE
;
733 secTer
&= ~CollationRootElements::SEC_TER_DELTA_FLAG
;
734 if(secTer
> Collation::COMMON_SEC_AND_TER_CE
) {
736 secTer
= Collation::COMMON_SEC_AND_TER_CE
;
738 // Explicit sec/ter below common/common.
746 uint32_t getPrimary() const { return pri
; }
747 uint32_t getSecTer() const { return secTer
; }
750 const CollationData
&data
;
751 const uint32_t *elements
;
761 void CollationTest::TestRootElements() {
762 IcuTestErrorCode
errorCode(*this, "TestRootElements");
763 const CollationData
*root
= CollationRoot::getData(errorCode
);
764 if(errorCode
.logDataIfFailureAndReset("CollationRoot::getData()")) {
767 CollationRootElements
rootElements(root
->rootElements
, root
->rootElementsLength
);
768 RootElementsIterator
iter(*root
);
770 // We check each root CE for validity,
771 // and we also verify that there is a tailoring gap between each two CEs.
772 CollationWeights cw1c
; // compressible primary weights
773 CollationWeights cw1u
; // uncompressible primary weights
774 CollationWeights cw2
;
775 CollationWeights cw3
;
777 cw1c
.initForPrimary(TRUE
);
778 cw1u
.initForPrimary(FALSE
);
779 cw2
.initForSecondary();
780 cw3
.initForTertiary();
782 // Note: The root elements do not include Han-implicit or unassigned-implicit CEs,
783 // nor the special merge-separator CE for U+FFFE.
784 uint32_t prevPri
= 0;
785 uint32_t prevSec
= 0;
786 uint32_t prevTer
= 0;
788 uint32_t pri
= iter
.getPrimary();
789 uint32_t secTer
= iter
.getSecTer();
790 // CollationRootElements CEs must have 0 case and quaternary bits.
791 if((secTer
& Collation::CASE_AND_QUATERNARY_MASK
) != 0) {
792 errln("CollationRootElements CE has non-zero case and/or quaternary bits: %08lx %08lx",
793 (long)pri
, (long)secTer
);
795 uint32_t sec
= secTer
>> 16;
796 uint32_t ter
= secTer
& Collation::ONLY_TERTIARY_MASK
;
798 if(pri
== 0 && sec
== 0 && ter
!= 0) {
799 // Tertiary CEs must have uppercase bits,
800 // but they are not stored in the CollationRootElements.
803 if(!isValidCE(rootElements
, *root
, pri
, sec
, ctq
)) {
804 errln("invalid root CE %08lx %08lx", (long)pri
, (long)secTer
);
807 uint32_t newWeight
= 0;
808 if(prevPri
== 0 || prevPri
>= Collation::FFFD_PRIMARY
) {
809 // There is currently no tailoring gap after primary ignorables,
810 // and we forbid tailoring after U+FFFD and U+FFFF.
811 } else if(root
->isCompressiblePrimary(prevPri
)) {
812 if(!cw1c
.allocWeights(prevPri
, pri
, 1)) {
813 errln("no primary/compressible tailoring gap between %08lx and %08lx",
814 (long)prevPri
, (long)pri
);
816 newWeight
= cw1c
.nextWeight();
819 if(!cw1u
.allocWeights(prevPri
, pri
, 1)) {
820 errln("no primary/uncompressible tailoring gap between %08lx and %08lx",
821 (long)prevPri
, (long)pri
);
823 newWeight
= cw1u
.nextWeight();
826 if(newWeight
!= 0 && !(prevPri
< newWeight
&& newWeight
< pri
)) {
827 errln("mis-allocated primary weight, should get %08lx < %08lx < %08lx",
828 (long)prevPri
, (long)newWeight
, (long)pri
);
830 } else if(sec
!= prevSec
) {
831 uint32_t lowerLimit
=
832 prevSec
== 0 ? rootElements
.getSecondaryBoundary() - 0x100 : prevSec
;
833 if(!cw2
.allocWeights(lowerLimit
, sec
, 1)) {
834 errln("no secondary tailoring gap between %04x and %04x", lowerLimit
, sec
);
836 uint32_t newWeight
= cw2
.nextWeight();
837 if(!(prevSec
< newWeight
&& newWeight
< sec
)) {
838 errln("mis-allocated secondary weight, should get %04x < %04x < %04x",
839 (long)lowerLimit
, (long)newWeight
, (long)sec
);
842 } else if(ter
!= prevTer
) {
843 uint32_t lowerLimit
=
844 prevTer
== 0 ? rootElements
.getTertiaryBoundary() - 0x100 : prevTer
;
845 if(!cw3
.allocWeights(lowerLimit
, ter
, 1)) {
846 errln("no teriary tailoring gap between %04x and %04x", lowerLimit
, ter
);
848 uint32_t newWeight
= cw3
.nextWeight();
849 if(!(prevTer
< newWeight
&& newWeight
< ter
)) {
850 errln("mis-allocated secondary weight, should get %04x < %04x < %04x",
851 (long)lowerLimit
, (long)newWeight
, (long)ter
);
855 errln("duplicate root CE %08lx %08lx", (long)pri
, (long)secTer
);
864 void CollationTest::TestTailoredElements() {
865 IcuTestErrorCode
errorCode(*this, "TestTailoredElements");
866 const CollationData
*root
= CollationRoot::getData(errorCode
);
867 if(errorCode
.logDataIfFailureAndReset("CollationRoot::getData()")) {
870 CollationRootElements
rootElements(root
->rootElements
, root
->rootElementsLength
);
872 UHashtable
*prevLocales
= uhash_open(uhash_hashChars
, uhash_compareChars
, NULL
, errorCode
);
873 if(errorCode
.logIfFailureAndReset("failed to create a hash table")) {
876 uhash_setKeyDeleter(prevLocales
, uprv_free
);
877 // TestRootElements() tests the root collator which does not have tailorings.
878 uhash_puti(prevLocales
, uprv_strdup(""), 1, errorCode
);
879 uhash_puti(prevLocales
, uprv_strdup("root"), 1, errorCode
);
880 uhash_puti(prevLocales
, uprv_strdup("root@collation=standard"), 1, errorCode
);
882 UVector64
ces(errorCode
);
883 LocalPointer
<StringEnumeration
> locales(Collator::getAvailableLocales());
884 U_ASSERT(locales
.isValid());
885 const char *localeID
= "root";
887 Locale
locale(localeID
);
888 LocalPointer
<StringEnumeration
> types(
889 Collator::getKeywordValuesForLocale("collation", locale
, FALSE
, errorCode
));
890 errorCode
.assertSuccess();
891 const char *type
; // first: default type
892 while((type
= types
->next(NULL
, errorCode
)) != NULL
) {
893 if(strncmp(type
, "private-", 8) == 0) {
894 errln("Collator::getKeywordValuesForLocale(%s) returns private collation keyword: %s",
897 Locale
localeWithType(locale
);
898 localeWithType
.setKeywordValue("collation", type
, errorCode
);
899 errorCode
.assertSuccess();
900 LocalPointer
<Collator
> coll(Collator::createInstance(localeWithType
, errorCode
));
901 if(errorCode
.logIfFailureAndReset("Collator::createInstance(%s)",
902 localeWithType
.getName())) {
905 Locale actual
= coll
->getLocale(ULOC_ACTUAL_LOCALE
, errorCode
);
906 if(uhash_geti(prevLocales
, actual
.getName()) != 0) {
909 uhash_puti(prevLocales
, uprv_strdup(actual
.getName()), 1, errorCode
);
910 errorCode
.assertSuccess();
911 logln("TestTailoredElements(): requested %s -> actual %s",
912 localeWithType
.getName(), actual
.getName());
913 RuleBasedCollator
*rbc
= dynamic_cast<RuleBasedCollator
*>(coll
.getAlias());
917 // Note: It would be better to get tailored strings such that we can
918 // identify the prefix, and only get the CEs for the prefix+string,
919 // not also for the prefix.
920 // There is currently no API for that.
921 // It would help in an unusual case where a contraction starting in the prefix
922 // extends past its end, and we do not see the intended mapping.
923 // For example, for a mapping p|st, if there is also a contraction ps,
924 // then we get CEs(ps)+CEs(t), rather than CEs(p|st).
925 LocalPointer
<UnicodeSet
> tailored(coll
->getTailoredSet(errorCode
));
926 errorCode
.assertSuccess();
927 UnicodeSetIterator
iter(*tailored
);
929 const UnicodeString
&s
= iter
.getString();
930 ces
.removeAllElements();
931 rbc
->internalGetCEs(s
, ces
, errorCode
);
932 errorCode
.assertSuccess();
933 for(int32_t i
= 0; i
< ces
.size(); ++i
) {
934 int64_t ce
= ces
.elementAti(i
);
935 if(!isValidCE(rootElements
, *root
, ce
)) {
936 errln("invalid tailored CE %016llx at CE index %d from string:",
937 (long long)ce
, (int)i
);
943 } while((localeID
= locales
->next(NULL
, errorCode
)) != NULL
);
944 uhash_close(prevLocales
);
947 UnicodeString
CollationTest::printSortKey(const uint8_t *p
, int32_t length
) {
949 for(int32_t i
= 0; i
< length
; ++i
) {
950 if(i
> 0) { s
.append((UChar
)0x20); }
953 s
.append((UChar
)0x2e); // period
955 s
.append((UChar
)0x7c); // vertical bar
963 UnicodeString
CollationTest::printCollationKey(const CollationKey
&key
) {
965 const uint8_t *p
= key
.getByteArray(length
);
966 return printSortKey(p
, length
);
969 UBool
CollationTest::readNonEmptyLine(UCHARBUF
*f
, IcuTestErrorCode
&errorCode
) {
972 const UChar
*line
= ucbuf_readline(f
, &lineLength
, errorCode
);
973 if(line
== NULL
|| errorCode
.isFailure()) {
978 // Strip trailing CR/LF, comments, and spaces.
979 const UChar
*comment
= u_memchr(line
, 0x23, lineLength
); // '#'
980 if(comment
!= NULL
) {
981 lineLength
= (int32_t)(comment
- line
);
983 while(lineLength
> 0 && isCROrLF(line
[lineLength
- 1])) { --lineLength
; }
985 while(lineLength
> 0 && isSpace(line
[lineLength
- 1])) { --lineLength
; }
986 if(lineLength
!= 0) {
987 fileLine
.setTo(FALSE
, line
, lineLength
);
990 // Empty line, continue.
994 void CollationTest::parseString(int32_t &start
, UnicodeString
&prefix
, UnicodeString
&s
,
995 UErrorCode
&errorCode
) {
996 int32_t length
= fileLine
.length();
998 for(i
= start
; i
< length
&& !isSpace(fileLine
[i
]); ++i
) {}
999 int32_t pipeIndex
= fileLine
.indexOf((UChar
)0x7c, start
, i
- start
); // '|'
1000 if(pipeIndex
>= 0) {
1001 prefix
= fileLine
.tempSubStringBetween(start
, pipeIndex
).unescape();
1002 if(prefix
.isEmpty()) {
1003 errln("empty prefix on line %d", (int)fileLineNumber
);
1005 errorCode
= U_PARSE_ERROR
;
1008 start
= pipeIndex
+ 1;
1012 s
= fileLine
.tempSubStringBetween(start
, i
).unescape();
1014 errln("empty string on line %d", (int)fileLineNumber
);
1016 errorCode
= U_PARSE_ERROR
;
1022 Collation::Level
CollationTest::parseRelationAndString(UnicodeString
&s
, IcuTestErrorCode
&errorCode
) {
1023 Collation::Level relation
;
1025 if(fileLine
[0] == 0x3c) { // <
1026 UChar second
= fileLine
[1];
1030 relation
= Collation::PRIMARY_LEVEL
;
1033 relation
= Collation::SECONDARY_LEVEL
;
1036 relation
= Collation::TERTIARY_LEVEL
;
1039 relation
= Collation::QUATERNARY_LEVEL
;
1042 relation
= Collation::CASE_LEVEL
;
1045 relation
= Collation::IDENTICAL_LEVEL
;
1048 relation
= Collation::NO_LEVEL
;
1052 } else if(fileLine
[0] == 0x3d) { // =
1053 relation
= Collation::ZERO_LEVEL
;
1058 if(start
== 0 || !isSpace(fileLine
[start
])) {
1059 errln("no relation (= < <1 <2 <c <3 <4 <i) at beginning of line %d", (int)fileLineNumber
);
1061 errorCode
.set(U_PARSE_ERROR
);
1062 return Collation::NO_LEVEL
;
1064 start
= skipSpaces(start
);
1065 UnicodeString prefix
;
1066 parseString(start
, prefix
, s
, errorCode
);
1067 if(errorCode
.isSuccess() && !prefix
.isEmpty()) {
1068 errln("prefix string not allowed for test string: on line %d", (int)fileLineNumber
);
1070 errorCode
.set(U_PARSE_ERROR
);
1071 return Collation::NO_LEVEL
;
1073 if(start
< fileLine
.length()) {
1074 errln("unexpected line contents after test string on line %d", (int)fileLineNumber
);
1076 errorCode
.set(U_PARSE_ERROR
);
1077 return Collation::NO_LEVEL
;
1082 static const struct {
1086 { "backwards", UCOL_FRENCH_COLLATION
},
1087 { "alternate", UCOL_ALTERNATE_HANDLING
},
1088 { "caseFirst", UCOL_CASE_FIRST
},
1089 { "caseLevel", UCOL_CASE_LEVEL
},
1090 // UCOL_NORMALIZATION_MODE is turned on and off automatically.
1091 { "strength", UCOL_STRENGTH
},
1092 // UCOL_HIRAGANA_QUATERNARY_MODE is deprecated.
1093 { "numeric", UCOL_NUMERIC_COLLATION
}
1096 static const struct {
1098 UColAttributeValue value
;
1099 } attributeValues
[] = {
1100 { "default", UCOL_DEFAULT
},
1101 { "primary", UCOL_PRIMARY
},
1102 { "secondary", UCOL_SECONDARY
},
1103 { "tertiary", UCOL_TERTIARY
},
1104 { "quaternary", UCOL_QUATERNARY
},
1105 { "identical", UCOL_IDENTICAL
},
1106 { "off", UCOL_OFF
},
1108 { "shifted", UCOL_SHIFTED
},
1109 { "non-ignorable", UCOL_NON_IGNORABLE
},
1110 { "lower", UCOL_LOWER_FIRST
},
1111 { "upper", UCOL_UPPER_FIRST
}
1114 void CollationTest::parseAndSetAttribute(IcuTestErrorCode
&errorCode
) {
1115 // Parse attributes even if the Collator could not be created,
1116 // in order to report syntax errors.
1117 int32_t start
= skipSpaces(1);
1118 int32_t equalPos
= fileLine
.indexOf(0x3d);
1120 if(fileLine
.compare(start
, 7, UNICODE_STRING("reorder", 7)) == 0) {
1121 parseAndSetReorderCodes(start
+ 7, errorCode
);
1124 errln("missing '=' on line %d", (int)fileLineNumber
);
1126 errorCode
.set(U_PARSE_ERROR
);
1130 UnicodeString attrString
= fileLine
.tempSubStringBetween(start
, equalPos
);
1131 UnicodeString valueString
= fileLine
.tempSubString(equalPos
+1);
1132 if(attrString
== UNICODE_STRING("maxVariable", 11)) {
1133 UColReorderCode max
;
1134 if(valueString
== UNICODE_STRING("space", 5)) {
1135 max
= UCOL_REORDER_CODE_SPACE
;
1136 } else if(valueString
== UNICODE_STRING("punct", 5)) {
1137 max
= UCOL_REORDER_CODE_PUNCTUATION
;
1138 } else if(valueString
== UNICODE_STRING("symbol", 6)) {
1139 max
= UCOL_REORDER_CODE_SYMBOL
;
1140 } else if(valueString
== UNICODE_STRING("currency", 8)) {
1141 max
= UCOL_REORDER_CODE_CURRENCY
;
1143 errln("invalid attribute value name on line %d", (int)fileLineNumber
);
1145 errorCode
.set(U_PARSE_ERROR
);
1149 coll
->setMaxVariable(max
, errorCode
);
1150 if(errorCode
.isFailure()) {
1151 errln("setMaxVariable() failed on line %d: %s",
1152 (int)fileLineNumber
, errorCode
.errorName());
1162 for(int32_t i
= 0;; ++i
) {
1163 if(i
== UPRV_LENGTHOF(attributes
)) {
1164 errln("invalid attribute name on line %d", (int)fileLineNumber
);
1166 errorCode
.set(U_PARSE_ERROR
);
1169 if(attrString
== UnicodeString(attributes
[i
].name
, -1, US_INV
)) {
1170 attr
= attributes
[i
].attr
;
1175 UColAttributeValue value
;
1176 for(int32_t i
= 0;; ++i
) {
1177 if(i
== UPRV_LENGTHOF(attributeValues
)) {
1178 errln("invalid attribute value name on line %d", (int)fileLineNumber
);
1180 errorCode
.set(U_PARSE_ERROR
);
1183 if(valueString
== UnicodeString(attributeValues
[i
].name
, -1, US_INV
)) {
1184 value
= attributeValues
[i
].value
;
1190 coll
->setAttribute(attr
, value
, errorCode
);
1191 if(errorCode
.isFailure()) {
1192 errln("illegal attribute=value combination on line %d: %s",
1193 (int)fileLineNumber
, errorCode
.errorName());
1201 void CollationTest::parseAndSetReorderCodes(int32_t start
, IcuTestErrorCode
&errorCode
) {
1202 UVector32
reorderCodes(errorCode
);
1203 while(start
< fileLine
.length()) {
1204 start
= skipSpaces(start
);
1205 int32_t limit
= start
;
1206 while(limit
< fileLine
.length() && !isSpace(fileLine
[limit
])) { ++limit
; }
1208 name
.appendInvariantChars(fileLine
.tempSubStringBetween(start
, limit
), errorCode
);
1209 int32_t code
= CollationRuleParser::getReorderCode(name
.data());
1211 if(uprv_stricmp(name
.data(), "default") == 0) {
1212 code
= UCOL_REORDER_CODE_DEFAULT
; // -1
1214 errln("invalid reorder code '%s' on line %d", name
.data(), (int)fileLineNumber
);
1216 errorCode
.set(U_PARSE_ERROR
);
1220 reorderCodes
.addElement(code
, errorCode
);
1224 coll
->setReorderCodes(reorderCodes
.getBuffer(), reorderCodes
.size(), errorCode
);
1225 if(errorCode
.isFailure()) {
1226 errln("setReorderCodes() failed on line %d: %s",
1227 (int)fileLineNumber
, errorCode
.errorName());
1235 void CollationTest::buildTailoring(UCHARBUF
*f
, IcuTestErrorCode
&errorCode
) {
1236 UnicodeString rules
;
1237 while(readNonEmptyLine(f
, errorCode
) && !isSectionStarter(fileLine
[0])) {
1238 rules
.append(fileLine
.unescape());
1240 if(errorCode
.isFailure()) { return; }
1243 UParseError parseError
;
1244 UnicodeString reason
;
1246 coll
= new RuleBasedCollator(rules
, parseError
, reason
, errorCode
);
1248 errln("unable to allocate a new collator");
1249 errorCode
.set(U_MEMORY_ALLOCATION_ERROR
);
1252 if(errorCode
.isFailure()) {
1253 dataerrln("RuleBasedCollator(rules) failed - %s", errorCode
.errorName());
1254 infoln(UnicodeString(" reason: ") + reason
);
1255 if(parseError
.offset
>= 0) { infoln(" rules offset: %d", (int)parseError
.offset
); }
1256 if(parseError
.preContext
[0] != 0 || parseError
.postContext
[0] != 0) {
1257 infoln(UnicodeString(" snippet: ...") +
1258 parseError
.preContext
+ "(!)" + parseError
.postContext
+ "...");
1264 assertEquals("no error reason when RuleBasedCollator(rules) succeeds",
1265 UnicodeString(), reason
);
1269 void CollationTest::setRootCollator(IcuTestErrorCode
&errorCode
) {
1270 if(errorCode
.isFailure()) { return; }
1272 coll
= Collator::createInstance(Locale::getRoot(), errorCode
);
1273 if(errorCode
.isFailure()) {
1274 dataerrln("unable to create a root collator");
1279 void CollationTest::setLocaleCollator(IcuTestErrorCode
&errorCode
) {
1280 if(errorCode
.isFailure()) { return; }
1283 int32_t at
= fileLine
.indexOf((UChar
)0x40, 9); // @ is not invariant
1285 fileLine
.setCharAt(at
, (UChar
)0x2a); // *
1287 CharString localeID
;
1288 localeID
.appendInvariantChars(fileLine
.tempSubString(9), errorCode
);
1290 localeID
.data()[at
- 9] = '@';
1292 Locale
locale(localeID
.data());
1293 if(fileLine
.length() == 9 || errorCode
.isFailure() || locale
.isBogus()) {
1294 errln("invalid language tag on line %d", (int)fileLineNumber
);
1296 if(errorCode
.isSuccess()) { errorCode
.set(U_PARSE_ERROR
); }
1300 logln("creating a collator for locale ID %s", locale
.getName());
1301 coll
= Collator::createInstance(locale
, errorCode
);
1302 if(errorCode
.isFailure()) {
1303 dataerrln("unable to create a collator for locale %s on line %d",
1304 locale
.getName(), (int)fileLineNumber
);
1312 UBool
CollationTest::needsNormalization(const UnicodeString
&s
, UErrorCode
&errorCode
) const {
1313 if(U_FAILURE(errorCode
) || !fcd
->isNormalized(s
, errorCode
)) { return TRUE
; }
1314 // In some sequences with Tibetan composite vowel signs,
1315 // even if the string passes the FCD check,
1316 // those composites must be decomposed.
1317 // Check if s contains 0F71 immediately followed by 0F73 or 0F75 or 0F81.
1319 while((index
= s
.indexOf((UChar
)0xf71, index
)) >= 0) {
1320 if(++index
< s
.length()) {
1322 if(c
== 0xf73 || c
== 0xf75 || c
== 0xf81) { return TRUE
; }
1328 UBool
CollationTest::getSortKeyParts(const UChar
*s
, int32_t length
,
1329 CharString
&dest
, int32_t partSize
,
1330 IcuTestErrorCode
&errorCode
) {
1331 if(errorCode
.isFailure()) { return FALSE
; }
1333 U_ASSERT(partSize
<= UPRV_LENGTHOF(part
));
1335 uiter_setString(&iter
, s
, length
);
1336 uint32_t state
[2] = { 0, 0 };
1338 int32_t partLength
= coll
->internalNextSortKeyPart(&iter
, state
, part
, partSize
, errorCode
);
1339 UBool done
= partLength
< partSize
;
1341 // At the end, append the next byte as well which should be 00.
1344 dest
.append(reinterpret_cast<char *>(part
), partLength
, errorCode
);
1346 return errorCode
.isSuccess();
1351 UBool
CollationTest::getCollationKey(const char *norm
, const UnicodeString
&line
,
1352 const UChar
*s
, int32_t length
,
1353 CollationKey
&key
, IcuTestErrorCode
&errorCode
) {
1354 if(errorCode
.isFailure()) { return FALSE
; }
1355 coll
->getCollationKey(s
, length
, key
, errorCode
);
1356 if(errorCode
.isFailure()) {
1357 infoln(fileTestName
);
1358 errln("Collator(%s).getCollationKey() failed: %s",
1359 norm
, errorCode
.errorName());
1364 const uint8_t *keyBytes
= key
.getByteArray(keyLength
);
1365 if(keyLength
== 0 || keyBytes
[keyLength
- 1] != 0) {
1366 infoln(fileTestName
);
1367 errln("Collator(%s).getCollationKey() wrote an empty or unterminated key",
1370 infoln(printCollationKey(key
));
1374 int32_t numLevels
= coll
->getAttribute(UCOL_STRENGTH
, errorCode
);
1375 if(numLevels
< UCOL_IDENTICAL
) {
1380 if(coll
->getAttribute(UCOL_CASE_LEVEL
, errorCode
) == UCOL_ON
) {
1383 errorCode
.assertSuccess();
1384 int32_t numLevelSeparators
= 0;
1385 for(int32_t i
= 0; i
< (keyLength
- 1); ++i
) {
1386 uint8_t b
= keyBytes
[i
];
1388 infoln(fileTestName
);
1389 errln("Collator(%s).getCollationKey() contains a 00 byte", norm
);
1391 infoln(printCollationKey(key
));
1394 if(b
== 1) { ++numLevelSeparators
; }
1396 if(numLevelSeparators
!= (numLevels
- 1)) {
1397 infoln(fileTestName
);
1398 errln("Collator(%s).getCollationKey() has %d level separators for %d levels",
1399 norm
, (int)numLevelSeparators
, (int)numLevels
);
1401 infoln(printCollationKey(key
));
1405 // Check that internalNextSortKeyPart() makes the same key, with several part sizes.
1406 static const int32_t partSizes
[] = { 32, 3, 1 };
1407 for(int32_t psi
= 0; psi
< UPRV_LENGTHOF(partSizes
); ++psi
) {
1408 int32_t partSize
= partSizes
[psi
];
1410 if(!getSortKeyParts(s
, length
, parts
, 32, errorCode
)) {
1411 infoln(fileTestName
);
1412 errln("Collator(%s).internalNextSortKeyPart(%d) failed: %s",
1413 norm
, (int)partSize
, errorCode
.errorName());
1417 if(keyLength
!= parts
.length() || uprv_memcmp(keyBytes
, parts
.data(), keyLength
) != 0) {
1418 infoln(fileTestName
);
1419 errln("Collator(%s).getCollationKey() != internalNextSortKeyPart(%d)",
1420 norm
, (int)partSize
);
1422 infoln(printCollationKey(key
));
1423 infoln(printSortKey(reinterpret_cast<uint8_t *>(parts
.data()), parts
.length()));
1431 * Changes the key to the merged segments of the U+FFFE-separated substrings of s.
1432 * Leaves key unchanged if s does not contain U+FFFE.
1433 * @return TRUE if the key was successfully changed
1435 UBool
CollationTest::getMergedCollationKey(const UChar
*s
, int32_t length
,
1436 CollationKey
&key
, IcuTestErrorCode
&errorCode
) {
1437 if(errorCode
.isFailure()) { return FALSE
; }
1438 LocalMemory
<uint8_t> mergedKey
;
1439 int32_t mergedKeyLength
= 0;
1440 int32_t mergedKeyCapacity
= 0;
1441 int32_t sLength
= (length
>= 0) ? length
: u_strlen(s
);
1442 int32_t segmentStart
= 0;
1443 for(int32_t i
= 0;;) {
1445 if(segmentStart
== 0) {
1446 // s does not contain any U+FFFE.
1449 } else if(s
[i
] != 0xfffe) {
1453 // Get the sort key for another segment and merge it into mergedKey.
1454 CollationKey
key1(mergedKey
.getAlias(), mergedKeyLength
); // copies the bytes
1456 coll
->getCollationKey(s
+ segmentStart
, i
- segmentStart
, key2
, errorCode
);
1457 int32_t key1Length
, key2Length
;
1458 const uint8_t *key1Bytes
= key1
.getByteArray(key1Length
);
1459 const uint8_t *key2Bytes
= key2
.getByteArray(key2Length
);
1461 int32_t minCapacity
= key1Length
+ key2Length
;
1462 if(key1Length
> 0) { --minCapacity
; }
1463 if(minCapacity
<= mergedKeyCapacity
) {
1464 dest
= mergedKey
.getAlias();
1466 if(minCapacity
<= 200) {
1467 mergedKeyCapacity
= 200;
1468 } else if(minCapacity
<= 2 * mergedKeyCapacity
) {
1469 mergedKeyCapacity
*= 2;
1471 mergedKeyCapacity
= minCapacity
;
1473 dest
= mergedKey
.allocateInsteadAndReset(mergedKeyCapacity
);
1475 U_ASSERT(dest
!= NULL
|| mergedKeyCapacity
== 0);
1476 if(key1Length
== 0) {
1477 // key2 is the sort key for the first segment.
1478 uprv_memcpy(dest
, key2Bytes
, key2Length
);
1479 mergedKeyLength
= key2Length
;
1482 ucol_mergeSortkeys(key1Bytes
, key1Length
, key2Bytes
, key2Length
,
1483 dest
, mergedKeyCapacity
);
1485 if(i
== sLength
) { break; }
1488 key
= CollationKey(mergedKey
.getAlias(), mergedKeyLength
);
1495 * Replaces unpaired surrogates with U+FFFD.
1496 * Returns s if no replacement was made, otherwise buffer.
1498 const UnicodeString
&surrogatesToFFFD(const UnicodeString
&s
, UnicodeString
&buffer
) {
1500 while(i
< s
.length()) {
1501 UChar32 c
= s
.char32At(i
);
1502 if(U_IS_SURROGATE(c
)) {
1503 if(buffer
.length() < i
) {
1504 buffer
.append(s
, buffer
.length(), i
- buffer
.length());
1506 buffer
.append((UChar
)0xfffd);
1510 if(buffer
.isEmpty()) {
1513 if(buffer
.length() < i
) {
1514 buffer
.append(s
, buffer
.length(), i
- buffer
.length());
1519 int32_t getDifferenceLevel(const CollationKey
&prevKey
, const CollationKey
&key
,
1520 UCollationResult order
, UBool collHasCaseLevel
) {
1521 if(order
== UCOL_EQUAL
) {
1522 return Collation::NO_LEVEL
;
1524 int32_t prevKeyLength
;
1525 const uint8_t *prevBytes
= prevKey
.getByteArray(prevKeyLength
);
1527 const uint8_t *bytes
= key
.getByteArray(keyLength
);
1528 int32_t level
= Collation::PRIMARY_LEVEL
;
1529 for(int32_t i
= 0;; ++i
) {
1530 uint8_t b
= prevBytes
[i
];
1531 if(b
!= bytes
[i
]) { break; }
1532 if(b
== Collation::LEVEL_SEPARATOR_BYTE
) {
1534 if(level
== Collation::CASE_LEVEL
&& !collHasCaseLevel
) {
1544 UBool
CollationTest::checkCompareTwo(const char *norm
, const UnicodeString
&prevFileLine
,
1545 const UnicodeString
&prevString
, const UnicodeString
&s
,
1546 UCollationResult expectedOrder
, Collation::Level expectedLevel
,
1547 IcuTestErrorCode
&errorCode
) {
1548 if(errorCode
.isFailure()) { return FALSE
; }
1550 // Get the sort keys first, for error debug output.
1551 CollationKey prevKey
;
1552 if(!getCollationKey(norm
, prevFileLine
, prevString
.getBuffer(), prevString
.length(),
1553 prevKey
, errorCode
)) {
1557 if(!getCollationKey(norm
, fileLine
, s
.getBuffer(), s
.length(), key
, errorCode
)) { return FALSE
; }
1559 UCollationResult order
= coll
->compare(prevString
, s
, errorCode
);
1560 if(order
!= expectedOrder
|| errorCode
.isFailure()) {
1561 infoln(fileTestName
);
1562 errln("line %d Collator(%s).compare(previous, current) wrong order: %d != %d (%s)",
1563 (int)fileLineNumber
, norm
, order
, expectedOrder
, errorCode
.errorName());
1564 infoln(prevFileLine
);
1566 infoln(printCollationKey(prevKey
));
1567 infoln(printCollationKey(key
));
1570 order
= coll
->compare(s
, prevString
, errorCode
);
1571 if(order
!= -expectedOrder
|| errorCode
.isFailure()) {
1572 infoln(fileTestName
);
1573 errln("line %d Collator(%s).compare(current, previous) wrong order: %d != %d (%s)",
1574 (int)fileLineNumber
, norm
, order
, -expectedOrder
, errorCode
.errorName());
1575 infoln(prevFileLine
);
1577 infoln(printCollationKey(prevKey
));
1578 infoln(printCollationKey(key
));
1581 // Test NUL-termination if the strings do not contain NUL characters.
1582 UBool containNUL
= prevString
.indexOf((UChar
)0) >= 0 || s
.indexOf((UChar
)0) >= 0;
1584 order
= coll
->compare(prevString
.getBuffer(), -1, s
.getBuffer(), -1, errorCode
);
1585 if(order
!= expectedOrder
|| errorCode
.isFailure()) {
1586 infoln(fileTestName
);
1587 errln("line %d Collator(%s).compare(previous-NUL, current-NUL) wrong order: %d != %d (%s)",
1588 (int)fileLineNumber
, norm
, order
, expectedOrder
, errorCode
.errorName());
1589 infoln(prevFileLine
);
1591 infoln(printCollationKey(prevKey
));
1592 infoln(printCollationKey(key
));
1595 order
= coll
->compare(s
.getBuffer(), -1, prevString
.getBuffer(), -1, errorCode
);
1596 if(order
!= -expectedOrder
|| errorCode
.isFailure()) {
1597 infoln(fileTestName
);
1598 errln("line %d Collator(%s).compare(current-NUL, previous-NUL) wrong order: %d != %d (%s)",
1599 (int)fileLineNumber
, norm
, order
, -expectedOrder
, errorCode
.errorName());
1600 infoln(prevFileLine
);
1602 infoln(printCollationKey(prevKey
));
1603 infoln(printCollationKey(key
));
1608 #if U_HAVE_STD_STRING
1609 // compare(UTF-16) treats unpaired surrogates like unassigned code points.
1610 // Unpaired surrogates cannot be converted to UTF-8.
1611 // Create valid UTF-16 strings if necessary, and use those for
1612 // both the expected compare() result and for the input to compare(UTF-8).
1613 UnicodeString prevBuffer
, sBuffer
;
1614 const UnicodeString
&prevValid
= surrogatesToFFFD(prevString
, prevBuffer
);
1615 const UnicodeString
&sValid
= surrogatesToFFFD(s
, sBuffer
);
1616 std::string prevUTF8
, sUTF8
;
1617 UnicodeString(prevValid
).toUTF8String(prevUTF8
);
1618 UnicodeString(sValid
).toUTF8String(sUTF8
);
1619 UCollationResult expectedUTF8Order
;
1620 if(&prevValid
== &prevString
&& &sValid
== &s
) {
1621 expectedUTF8Order
= expectedOrder
;
1623 expectedUTF8Order
= coll
->compare(prevValid
, sValid
, errorCode
);
1626 order
= coll
->compareUTF8(prevUTF8
, sUTF8
, errorCode
);
1627 if(order
!= expectedUTF8Order
|| errorCode
.isFailure()) {
1628 infoln(fileTestName
);
1629 errln("line %d Collator(%s).compareUTF8(previous, current) wrong order: %d != %d (%s)",
1630 (int)fileLineNumber
, norm
, order
, expectedUTF8Order
, errorCode
.errorName());
1631 infoln(prevFileLine
);
1633 infoln(printCollationKey(prevKey
));
1634 infoln(printCollationKey(key
));
1637 order
= coll
->compareUTF8(sUTF8
, prevUTF8
, errorCode
);
1638 if(order
!= -expectedUTF8Order
|| errorCode
.isFailure()) {
1639 infoln(fileTestName
);
1640 errln("line %d Collator(%s).compareUTF8(current, previous) wrong order: %d != %d (%s)",
1641 (int)fileLineNumber
, norm
, order
, -expectedUTF8Order
, errorCode
.errorName());
1642 infoln(prevFileLine
);
1644 infoln(printCollationKey(prevKey
));
1645 infoln(printCollationKey(key
));
1648 // Test NUL-termination if the strings do not contain NUL characters.
1650 order
= coll
->internalCompareUTF8(prevUTF8
.c_str(), -1, sUTF8
.c_str(), -1, errorCode
);
1651 if(order
!= expectedUTF8Order
|| errorCode
.isFailure()) {
1652 infoln(fileTestName
);
1653 errln("line %d Collator(%s).internalCompareUTF8(previous-NUL, current-NUL) wrong order: %d != %d (%s)",
1654 (int)fileLineNumber
, norm
, order
, expectedUTF8Order
, errorCode
.errorName());
1655 infoln(prevFileLine
);
1657 infoln(printCollationKey(prevKey
));
1658 infoln(printCollationKey(key
));
1661 order
= coll
->internalCompareUTF8(sUTF8
.c_str(), -1, prevUTF8
.c_str(), -1, errorCode
);
1662 if(order
!= -expectedUTF8Order
|| errorCode
.isFailure()) {
1663 infoln(fileTestName
);
1664 errln("line %d Collator(%s).internalCompareUTF8(current-NUL, previous-NUL) wrong order: %d != %d (%s)",
1665 (int)fileLineNumber
, norm
, order
, -expectedUTF8Order
, errorCode
.errorName());
1666 infoln(prevFileLine
);
1668 infoln(printCollationKey(prevKey
));
1669 infoln(printCollationKey(key
));
1675 UCharIterator leftIter
;
1676 UCharIterator rightIter
;
1677 uiter_setString(&leftIter
, prevString
.getBuffer(), prevString
.length());
1678 uiter_setString(&rightIter
, s
.getBuffer(), s
.length());
1679 order
= coll
->compare(leftIter
, rightIter
, errorCode
);
1680 if(order
!= expectedOrder
|| errorCode
.isFailure()) {
1681 infoln(fileTestName
);
1682 errln("line %d Collator(%s).compare(UCharIterator: previous, current) "
1683 "wrong order: %d != %d (%s)",
1684 (int)fileLineNumber
, norm
, order
, expectedOrder
, errorCode
.errorName());
1685 infoln(prevFileLine
);
1687 infoln(printCollationKey(prevKey
));
1688 infoln(printCollationKey(key
));
1692 order
= prevKey
.compareTo(key
, errorCode
);
1693 if(order
!= expectedOrder
|| errorCode
.isFailure()) {
1694 infoln(fileTestName
);
1695 errln("line %d Collator(%s).getCollationKey(previous, current).compareTo() wrong order: %d != %d (%s)",
1696 (int)fileLineNumber
, norm
, order
, expectedOrder
, errorCode
.errorName());
1697 infoln(prevFileLine
);
1699 infoln(printCollationKey(prevKey
));
1700 infoln(printCollationKey(key
));
1703 UBool collHasCaseLevel
= coll
->getAttribute(UCOL_CASE_LEVEL
, errorCode
) == UCOL_ON
;
1704 int32_t level
= getDifferenceLevel(prevKey
, key
, order
, collHasCaseLevel
);
1705 if(order
!= UCOL_EQUAL
&& expectedLevel
!= Collation::NO_LEVEL
) {
1706 if(level
!= expectedLevel
) {
1707 infoln(fileTestName
);
1708 errln("line %d Collator(%s).getCollationKey(previous, current).compareTo()=%d wrong level: %d != %d",
1709 (int)fileLineNumber
, norm
, order
, level
, expectedLevel
);
1710 infoln(prevFileLine
);
1712 infoln(printCollationKey(prevKey
));
1713 infoln(printCollationKey(key
));
1718 // If either string contains U+FFFE, then their sort keys must compare the same as
1719 // the merged sort keys of each string's between-FFFE segments.
1721 // It is not required that
1722 // sortkey(str1 + "\uFFFE" + str2) == mergeSortkeys(sortkey(str1), sortkey(str2))
1723 // only that those two methods yield the same order.
1725 // Use bit-wise OR so that getMergedCollationKey() is always called for both strings.
1726 if((getMergedCollationKey(prevString
.getBuffer(), prevString
.length(), prevKey
, errorCode
) |
1727 getMergedCollationKey(s
.getBuffer(), s
.length(), key
, errorCode
)) ||
1728 errorCode
.isFailure()) {
1729 order
= prevKey
.compareTo(key
, errorCode
);
1730 if(order
!= expectedOrder
|| errorCode
.isFailure()) {
1731 infoln(fileTestName
);
1732 errln("line %d ucol_mergeSortkeys(Collator(%s).getCollationKey"
1733 "(previous, current segments between U+FFFE)).compareTo() wrong order: %d != %d (%s)",
1734 (int)fileLineNumber
, norm
, order
, expectedOrder
, errorCode
.errorName());
1735 infoln(prevFileLine
);
1737 infoln(printCollationKey(prevKey
));
1738 infoln(printCollationKey(key
));
1741 int32_t mergedLevel
= getDifferenceLevel(prevKey
, key
, order
, collHasCaseLevel
);
1742 if(order
!= UCOL_EQUAL
&& expectedLevel
!= Collation::NO_LEVEL
) {
1743 if(mergedLevel
!= level
) {
1744 infoln(fileTestName
);
1745 errln("line %d ucol_mergeSortkeys(Collator(%s).getCollationKey"
1746 "(previous, current segments between U+FFFE)).compareTo()=%d wrong level: %d != %d",
1747 (int)fileLineNumber
, norm
, order
, mergedLevel
, level
);
1748 infoln(prevFileLine
);
1750 infoln(printCollationKey(prevKey
));
1751 infoln(printCollationKey(key
));
1759 void CollationTest::checkCompareStrings(UCHARBUF
*f
, IcuTestErrorCode
&errorCode
) {
1760 if(errorCode
.isFailure()) { return; }
1761 UnicodeString prevFileLine
= UNICODE_STRING("(none)", 6);
1762 UnicodeString prevString
, s
;
1763 prevString
.getTerminatedBuffer(); // Ensure NUL-termination.
1764 while(readNonEmptyLine(f
, errorCode
) && !isSectionStarter(fileLine
[0])) {
1765 // Parse the line even if it will be ignored (when we do not have a Collator)
1766 // in order to report syntax issues.
1767 Collation::Level relation
= parseRelationAndString(s
, errorCode
);
1768 if(errorCode
.isFailure()) {
1773 // We were unable to create the Collator but continue with tests.
1774 // Ignore test data for this Collator.
1775 // The next Collator creation might work.
1778 UCollationResult expectedOrder
= (relation
== Collation::ZERO_LEVEL
) ? UCOL_EQUAL
: UCOL_LESS
;
1779 Collation::Level expectedLevel
= relation
;
1780 s
.getTerminatedBuffer(); // Ensure NUL-termination.
1782 if(!needsNormalization(prevString
, errorCode
) && !needsNormalization(s
, errorCode
)) {
1783 coll
->setAttribute(UCOL_NORMALIZATION_MODE
, UCOL_OFF
, errorCode
);
1784 isOk
= checkCompareTwo("normalization=on", prevFileLine
, prevString
, s
,
1785 expectedOrder
, expectedLevel
, errorCode
);
1788 coll
->setAttribute(UCOL_NORMALIZATION_MODE
, UCOL_ON
, errorCode
);
1789 isOk
= checkCompareTwo("normalization=off", prevFileLine
, prevString
, s
,
1790 expectedOrder
, expectedLevel
, errorCode
);
1792 if(isOk
&& (!nfd
->isNormalized(prevString
, errorCode
) || !nfd
->isNormalized(s
, errorCode
))) {
1793 UnicodeString pn
= nfd
->normalize(prevString
, errorCode
);
1794 UnicodeString n
= nfd
->normalize(s
, errorCode
);
1795 pn
.getTerminatedBuffer();
1796 n
.getTerminatedBuffer();
1797 errorCode
.assertSuccess();
1798 isOk
= checkCompareTwo("NFD input", prevFileLine
, pn
, n
,
1799 expectedOrder
, expectedLevel
, errorCode
);
1802 errorCode
.reset(); // already reported
1804 prevFileLine
= fileLine
;
1806 prevString
.getTerminatedBuffer(); // Ensure NUL-termination.
1810 void CollationTest::TestDataDriven() {
1811 IcuTestErrorCode
errorCode(*this, "TestDataDriven");
1813 fcd
= Normalizer2Factory::getFCDInstance(errorCode
);
1814 nfd
= Normalizer2::getNFDInstance(errorCode
);
1815 if(errorCode
.logDataIfFailureAndReset("Normalizer2Factory::getFCDInstance() or getNFDInstance()")) {
1819 CharString
path(getSourceTestData(errorCode
), errorCode
);
1820 path
.appendPathPart("collationtest.txt", errorCode
);
1821 const char *codePage
= "UTF-8";
1822 LocalUCHARBUFPointer
f(ucbuf_open(path
.data(), &codePage
, TRUE
, FALSE
, errorCode
));
1823 if(errorCode
.logIfFailureAndReset("ucbuf_open(collationtest.txt)")) {
1826 // Read a new line if necessary.
1827 // Sub-parsers leave the first line set that they do not handle.
1828 while(errorCode
.isSuccess() && (!fileLine
.isEmpty() || readNonEmptyLine(f
.getAlias(), errorCode
))) {
1829 if(!isSectionStarter(fileLine
[0])) {
1830 errln("syntax error on line %d", (int)fileLineNumber
);
1834 if(fileLine
.startsWith(UNICODE_STRING("** test: ", 9))) {
1835 fileTestName
= fileLine
;
1838 } else if(fileLine
== UNICODE_STRING("@ root", 6)) {
1839 setRootCollator(errorCode
);
1841 } else if(fileLine
.startsWith(UNICODE_STRING("@ locale ", 9))) {
1842 setLocaleCollator(errorCode
);
1844 } else if(fileLine
== UNICODE_STRING("@ rules", 7)) {
1845 buildTailoring(f
.getAlias(), errorCode
);
1846 } else if(fileLine
[0] == 0x25 && isSpace(fileLine
[1])) { // %
1847 parseAndSetAttribute(errorCode
);
1848 } else if(fileLine
== UNICODE_STRING("* compare", 9)) {
1849 checkCompareStrings(f
.getAlias(), errorCode
);
1851 errln("syntax error on line %d", (int)fileLineNumber
);
1858 #endif // !UCONFIG_NO_COLLATION