2 *******************************************************************************
3 * Copyright (C) 2012-2015, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
8 * created on: 2012apr27
9 * created by: Markus W. Scherer
12 #include "unicode/utypes.h"
14 #if !UCONFIG_NO_COLLATION
16 #include "unicode/coll.h"
17 #include "unicode/errorcode.h"
18 #include "unicode/localpointer.h"
19 #include "unicode/normalizer2.h"
20 #include "unicode/sortkey.h"
21 #include "unicode/std_string.h"
22 #include "unicode/strenum.h"
23 #include "unicode/tblcoll.h"
24 #include "unicode/uiter.h"
25 #include "unicode/uniset.h"
26 #include "unicode/unistr.h"
27 #include "unicode/usetiter.h"
28 #include "unicode/ustring.h"
31 #include "collation.h"
32 #include "collationdata.h"
33 #include "collationfcd.h"
34 #include "collationiterator.h"
35 #include "collationroot.h"
36 #include "collationrootelements.h"
37 #include "collationruleparser.h"
38 #include "collationweights.h"
41 #include "normalizer2impl.h"
44 #include "uitercollationiterator.h"
45 #include "utf16collationiterator.h"
46 #include "utf8collationiterator.h"
51 // TODO: Move to ucbuf.h
52 U_DEFINE_LOCAL_OPEN_POINTER(LocalUCHARBUFPointer
, UCHARBUF
, ucbuf_close
);
54 class CodePointIterator
;
56 // TODO: try to share code with IntlTestCollator; for example, prettify(CollationKey)
58 class CollationTest
: public IntlTest
{
61 : fcd(NULL
), nfd(NULL
),
69 void runIndexedTest(int32_t index
, UBool exec
, const char *&name
, char *par
=NULL
);
73 void TestNulTerminated();
74 void TestIllegalUTF8();
75 void TestShortFCDData();
77 void TestCollationWeights();
78 void TestRootElements();
79 void TestTailoredElements();
80 void TestDataDriven();
83 void checkFCD(const char *name
, CollationIterator
&ci
, CodePointIterator
&cpi
);
84 void checkAllocWeights(CollationWeights
&cw
,
85 uint32_t lowerLimit
, uint32_t upperLimit
, int32_t n
,
86 int32_t someLength
, int32_t minCount
);
88 static UnicodeString
printSortKey(const uint8_t *p
, int32_t length
);
89 static UnicodeString
printCollationKey(const CollationKey
&key
);
91 // Helpers & fields for data-driven test.
92 static UBool
isCROrLF(UChar c
) { return c
== 0xa || c
== 0xd; }
93 static UBool
isSpace(UChar c
) { return c
== 9 || c
== 0x20 || c
== 0x3000; }
94 static UBool
isSectionStarter(UChar c
) { return c
== 0x25 || c
== 0x2a || c
== 0x40; } // %*@
95 int32_t skipSpaces(int32_t i
) {
96 while(isSpace(fileLine
[i
])) { ++i
; }
100 UBool
readNonEmptyLine(UCHARBUF
*f
, IcuTestErrorCode
&errorCode
);
101 void parseString(int32_t &start
, UnicodeString
&prefix
, UnicodeString
&s
, UErrorCode
&errorCode
);
102 Collation::Level
parseRelationAndString(UnicodeString
&s
, IcuTestErrorCode
&errorCode
);
103 void parseAndSetAttribute(IcuTestErrorCode
&errorCode
);
104 void parseAndSetReorderCodes(int32_t start
, IcuTestErrorCode
&errorCode
);
105 void buildTailoring(UCHARBUF
*f
, IcuTestErrorCode
&errorCode
);
106 void setRootCollator(IcuTestErrorCode
&errorCode
);
107 void setLocaleCollator(IcuTestErrorCode
&errorCode
);
109 UBool
needsNormalization(const UnicodeString
&s
, UErrorCode
&errorCode
) const;
111 UBool
getSortKeyParts(const UChar
*s
, int32_t length
,
112 CharString
&dest
, int32_t partSize
,
113 IcuTestErrorCode
&errorCode
);
114 UBool
getCollationKey(const char *norm
, const UnicodeString
&line
,
115 const UChar
*s
, int32_t length
,
116 CollationKey
&key
, IcuTestErrorCode
&errorCode
);
117 UBool
getMergedCollationKey(const UChar
*s
, int32_t length
,
118 CollationKey
&key
, IcuTestErrorCode
&errorCode
);
119 UBool
checkCompareTwo(const char *norm
, const UnicodeString
&prevFileLine
,
120 const UnicodeString
&prevString
, const UnicodeString
&s
,
121 UCollationResult expectedOrder
, Collation::Level expectedLevel
,
122 IcuTestErrorCode
&errorCode
);
123 void checkCompareStrings(UCHARBUF
*f
, IcuTestErrorCode
&errorCode
);
125 const Normalizer2
*fcd
, *nfd
;
126 UnicodeString fileLine
;
127 int32_t fileLineNumber
;
128 UnicodeString fileTestName
;
132 extern IntlTest
*createCollationTest() {
133 return new CollationTest();
136 void CollationTest::runIndexedTest(int32_t index
, UBool exec
, const char *&name
, char * /*par*/) {
138 logln("TestSuite CollationTest: ");
141 TESTCASE_AUTO(TestMinMax
);
142 TESTCASE_AUTO(TestImplicits
);
143 TESTCASE_AUTO(TestNulTerminated
);
144 TESTCASE_AUTO(TestIllegalUTF8
);
145 TESTCASE_AUTO(TestShortFCDData
);
146 TESTCASE_AUTO(TestFCD
);
147 TESTCASE_AUTO(TestCollationWeights
);
148 TESTCASE_AUTO(TestRootElements
);
149 TESTCASE_AUTO(TestTailoredElements
);
150 TESTCASE_AUTO(TestDataDriven
);
154 void CollationTest::TestMinMax() {
155 IcuTestErrorCode
errorCode(*this, "TestMinMax");
157 setRootCollator(errorCode
);
158 if(errorCode
.isFailure()) {
162 RuleBasedCollator
*rbc
= dynamic_cast<RuleBasedCollator
*>(coll
);
164 errln("the root collator is not a RuleBasedCollator");
168 static const UChar s
[2] = { 0xfffe, 0xffff };
169 UVector64
ces(errorCode
);
170 rbc
->internalGetCEs(UnicodeString(FALSE
, s
, 2), ces
, errorCode
);
171 errorCode
.assertSuccess();
172 if(ces
.size() != 2) {
173 errln("expected 2 CEs for <FFFE, FFFF>, got %d", (int)ces
.size());
176 int64_t ce
= ces
.elementAti(0);
177 int64_t expected
= Collation::makeCE(Collation::MERGE_SEPARATOR_PRIMARY
);
179 errln("CE(U+fffe)=%04lx != 02..", (long)ce
);
182 ce
= ces
.elementAti(1);
183 expected
= Collation::makeCE(Collation::MAX_PRIMARY
);
185 errln("CE(U+ffff)=%04lx != max..", (long)ce
);
189 void CollationTest::TestImplicits() {
190 IcuTestErrorCode
errorCode(*this, "TestImplicits");
192 const CollationData
*cd
= CollationRoot::getData(errorCode
);
193 if(errorCode
.logDataIfFailureAndReset("CollationRoot::getData()")) {
197 // Implicit primary weights should be assigned for the following sets,
198 // and sort in ascending order by set and then code point.
199 // See http://www.unicode.org/reports/tr10/#Implicit_Weights
201 // core Han Unified Ideographs
202 UnicodeSet
coreHan("[\\p{unified_ideograph}&"
203 "[\\p{Block=CJK_Unified_Ideographs}"
204 "\\p{Block=CJK_Compatibility_Ideographs}]]",
206 // all other Unified Han ideographs
207 UnicodeSet
otherHan("[\\p{unified ideograph}-"
208 "[\\p{Block=CJK_Unified_Ideographs}"
209 "\\p{Block=CJK_Compatibility_Ideographs}]]",
211 UnicodeSet
unassigned("[[:Cn:][:Cs:][:Co:]]", errorCode
);
212 unassigned
.remove(0xfffe, 0xffff); // These have special CLDR root mappings.
214 // Starting with CLDR 26/ICU 54, the root Han order may instead be
215 // the Unihan radical-stroke order.
216 // The tests should pass either way, so we only test the order of a small set of Han characters
217 // whose radical-stroke order is the same as their code point order.
218 UnicodeSet
someHanInCPOrder(
219 "[\\u4E00-\\u4E16\\u4E18-\\u4E2B\\u4E2D-\\u4E3C\\u4E3E-\\u4E48"
220 "\\u4E4A-\\u4E60\\u4E63-\\u4E8F\\u4E91-\\u4F63\\u4F65-\\u50F1\\u50F3-\\u50F6]",
222 UnicodeSet
inOrder(someHanInCPOrder
);
223 inOrder
.addAll(unassigned
).freeze();
224 if(errorCode
.logIfFailureAndReset("UnicodeSet")) {
227 const UnicodeSet
*sets
[] = { &coreHan
, &otherHan
, &unassigned
};
229 uint32_t prevPrimary
= 0;
230 UTF16CollationIterator
ci(cd
, FALSE
, NULL
, NULL
, NULL
);
231 for(int32_t i
= 0; i
< UPRV_LENGTHOF(sets
); ++i
) {
232 LocalPointer
<UnicodeSetIterator
> iter(new UnicodeSetIterator(*sets
[i
]));
233 while(iter
->next()) {
234 UChar32 c
= iter
->getCodepoint();
236 ci
.setText(s
.getBuffer(), s
.getBuffer() + s
.length());
237 int64_t ce
= ci
.nextCE(errorCode
);
238 int64_t ce2
= ci
.nextCE(errorCode
);
239 if(errorCode
.logIfFailureAndReset("CollationIterator.nextCE()")) {
242 if(ce
== Collation::NO_CE
|| ce2
!= Collation::NO_CE
) {
243 errln("CollationIterator.nextCE(U+%04lx) did not yield exactly one CE", (long)c
);
246 if((ce
& 0xffffffff) != Collation::COMMON_SEC_AND_TER_CE
) {
247 errln("CollationIterator.nextCE(U+%04lx) has non-common sec/ter weights: %08lx",
248 (long)c
, (long)(ce
& 0xffffffff));
251 uint32_t primary
= (uint32_t)(ce
>> 32);
252 if(!(primary
> prevPrimary
) && inOrder
.contains(c
) && inOrder
.contains(prev
)) {
253 errln("CE(U+%04lx)=%04lx.. not greater than CE(U+%04lx)=%04lx..",
254 (long)c
, (long)primary
, (long)prev
, (long)prevPrimary
);
257 prevPrimary
= primary
;
262 void CollationTest::TestNulTerminated() {
263 IcuTestErrorCode
errorCode(*this, "TestNulTerminated");
264 const CollationData
*data
= CollationRoot::getData(errorCode
);
265 if(errorCode
.logDataIfFailureAndReset("CollationRoot::getData()")) {
269 static const UChar s
[] = { 0x61, 0x62, 0x61, 0x62, 0 };
271 UTF16CollationIterator
ci1(data
, FALSE
, s
, s
, s
+ 2);
272 UTF16CollationIterator
ci2(data
, FALSE
, s
+ 2, s
+ 2, NULL
);
273 for(int32_t i
= 0;; ++i
) {
274 int64_t ce1
= ci1
.nextCE(errorCode
);
275 int64_t ce2
= ci2
.nextCE(errorCode
);
276 if(errorCode
.logIfFailureAndReset("CollationIterator.nextCE()")) {
280 errln("CollationIterator.nextCE(with length) != nextCE(NUL-terminated) at CE %d", (int)i
);
283 if(ce1
== Collation::NO_CE
) { break; }
287 void CollationTest::TestIllegalUTF8() {
288 IcuTestErrorCode
errorCode(*this, "TestIllegalUTF8");
290 setRootCollator(errorCode
);
291 if(errorCode
.isFailure()) {
295 coll
->setAttribute(UCOL_STRENGTH
, UCOL_IDENTICAL
, errorCode
);
297 static const char *strings
[] = {
300 // illegal byte sequences
301 "a\x80z", // trail byte
302 "a\xc1\x81z", // non-shortest form
303 "a\xe0\x82\x83z", // non-shortest form
304 "a\xed\xa0\x80z", // lead surrogate: would be U+D800
305 "a\xed\xbf\xbfz", // trail surrogate: would be U+DFFF
306 "a\xf0\x8f\xbf\xbfz", // non-shortest form
307 "a\xf4\x90\x80\x80z" // out of range: would be U+110000
310 StringPiece
fffd(strings
[0]);
311 for(int32_t i
= 1; i
< UPRV_LENGTHOF(strings
); ++i
) {
312 StringPiece
illegal(strings
[i
]);
313 UCollationResult order
= coll
->compareUTF8(fffd
, illegal
, errorCode
);
314 if(order
!= UCOL_EQUAL
) {
315 errln("compareUTF8(U+FFFD, string %d with illegal UTF-8)=%d != UCOL_EQUAL",
323 void addLeadSurrogatesForSupplementary(const UnicodeSet
&src
, UnicodeSet
&dest
) {
324 for(UChar32 c
= 0x10000; c
< 0x110000;) {
325 UChar32 next
= c
+ 0x400;
326 if(src
.containsSome(c
, next
- 1)) {
327 dest
.add(U16_LEAD(c
));
335 void CollationTest::TestShortFCDData() {
336 // See CollationFCD class comments.
337 IcuTestErrorCode
errorCode(*this, "TestShortFCDData");
338 UnicodeSet
expectedLccc("[:^lccc=0:]", errorCode
);
339 errorCode
.assertSuccess();
340 expectedLccc
.add(0xdc00, 0xdfff); // add all trail surrogates
341 addLeadSurrogatesForSupplementary(expectedLccc
, expectedLccc
);
342 UnicodeSet lccc
; // actual
343 for(UChar32 c
= 0; c
<= 0xffff; ++c
) {
344 if(CollationFCD::hasLccc(c
)) { lccc
.add(c
); }
346 UnicodeSet
diff(expectedLccc
);
347 diff
.removeAll(lccc
);
348 diff
.remove(0x10000, 0x10ffff); // hasLccc() only works for the BMP
349 UnicodeString
empty("[]");
350 UnicodeString diffString
;
351 diff
.toPattern(diffString
, TRUE
);
352 assertEquals("CollationFCD::hasLccc() expected-actual", empty
, diffString
);
354 diff
.removeAll(expectedLccc
);
355 diff
.toPattern(diffString
, TRUE
);
356 assertEquals("CollationFCD::hasLccc() actual-expected", empty
, diffString
, TRUE
);
358 UnicodeSet
expectedTccc("[:^tccc=0:]", errorCode
);
359 if (errorCode
.isSuccess()) {
360 addLeadSurrogatesForSupplementary(expectedLccc
, expectedTccc
);
361 addLeadSurrogatesForSupplementary(expectedTccc
, expectedTccc
);
362 UnicodeSet tccc
; // actual
363 for(UChar32 c
= 0; c
<= 0xffff; ++c
) {
364 if(CollationFCD::hasTccc(c
)) { tccc
.add(c
); }
367 diff
.removeAll(tccc
);
368 diff
.remove(0x10000, 0x10ffff); // hasTccc() only works for the BMP
369 assertEquals("CollationFCD::hasTccc() expected-actual", empty
, diffString
);
371 diff
.removeAll(expectedTccc
);
372 diff
.toPattern(diffString
, TRUE
);
373 assertEquals("CollationFCD::hasTccc() actual-expected", empty
, diffString
);
377 class CodePointIterator
{
379 CodePointIterator(const UChar32
*cp
, int32_t length
) : cp(cp
), length(length
), pos(0) {}
380 void resetToStart() { pos
= 0; }
381 UChar32
next() { return (pos
< length
) ? cp
[pos
++] : U_SENTINEL
; }
382 UChar32
previous() { return (pos
> 0) ? cp
[--pos
] : U_SENTINEL
; }
383 int32_t getLength() const { return length
; }
384 int getIndex() const { return (int)pos
; }
391 void CollationTest::checkFCD(const char *name
,
392 CollationIterator
&ci
, CodePointIterator
&cpi
) {
393 IcuTestErrorCode
errorCode(*this, "checkFCD");
395 // Iterate forward to the limit.
397 UChar32 c1
= ci
.nextCodePoint(errorCode
);
398 UChar32 c2
= cpi
.next();
400 errln("%s.nextCodePoint(to limit, 1st pass) = U+%04lx != U+%04lx at %d",
401 name
, (long)c1
, (long)c2
, cpi
.getIndex());
404 if(c1
< 0) { break; }
407 // Iterate backward most of the way.
408 for(int32_t n
= (cpi
.getLength() * 2) / 3; n
> 0; --n
) {
409 UChar32 c1
= ci
.previousCodePoint(errorCode
);
410 UChar32 c2
= cpi
.previous();
412 errln("%s.previousCodePoint() = U+%04lx != U+%04lx at %d",
413 name
, (long)c1
, (long)c2
, cpi
.getIndex());
420 UChar32 c1
= ci
.nextCodePoint(errorCode
);
421 UChar32 c2
= cpi
.next();
423 errln("%s.nextCodePoint(to limit again) = U+%04lx != U+%04lx at %d",
424 name
, (long)c1
, (long)c2
, cpi
.getIndex());
427 if(c1
< 0) { break; }
430 // Iterate backward to the start.
432 UChar32 c1
= ci
.previousCodePoint(errorCode
);
433 UChar32 c2
= cpi
.previous();
435 errln("%s.previousCodePoint(to start) = U+%04lx != U+%04lx at %d",
436 name
, (long)c1
, (long)c2
, cpi
.getIndex());
439 if(c1
< 0) { break; }
443 void CollationTest::TestFCD() {
444 IcuTestErrorCode
errorCode(*this, "TestFCD");
445 const CollationData
*data
= CollationRoot::getData(errorCode
);
446 if(errorCode
.logDataIfFailureAndReset("CollationRoot::getData()")) {
450 // Input string, not FCD, NUL-terminated.
451 static const UChar s
[] = {
452 0x308, 0xe1, 0x62, 0x301, 0x327, 0x430, 0x62,
453 U16_LEAD(0x1D15F), U16_TRAIL(0x1D15F), // MUSICAL SYMBOL QUARTER NOTE=1D158 1D165, ccc=0, 216
454 0x327, 0x308, // ccc=202, 230
455 U16_LEAD(0x1D16D), U16_TRAIL(0x1D16D), // MUSICAL SYMBOL COMBINING AUGMENTATION DOT, ccc=226
456 U16_LEAD(0x1D15F), U16_TRAIL(0x1D15F),
457 U16_LEAD(0x1D16D), U16_TRAIL(0x1D16D),
459 0xe7, // Character with tccc!=0 decomposed together with mis-ordered sequence.
460 U16_LEAD(0x1D16D), U16_TRAIL(0x1D16D), U16_LEAD(0x1D165), U16_TRAIL(0x1D165),
461 0xe1, // Character with tccc!=0 decomposed together with decomposed sequence.
462 0xf73, 0xf75, // Tibetan composite vowels must be decomposed.
466 // Expected code points.
467 static const UChar32 cp
[] = {
468 0x308, 0xe1, 0x62, 0x327, 0x301, 0x430, 0x62,
469 0x1D158, 0x327, 0x1D165, 0x1D16D, 0x308,
472 0x63, 0x327, 0x1D165, 0x1D16D,
474 0xf71, 0xf71, 0xf72, 0xf74, 0x301,
478 FCDUTF16CollationIterator
u16ci(data
, FALSE
, s
, s
, NULL
);
479 if(errorCode
.logIfFailureAndReset("FCDUTF16CollationIterator constructor")) {
482 CodePointIterator
cpi(cp
, UPRV_LENGTHOF(cp
));
483 checkFCD("FCDUTF16CollationIterator", u16ci
, cpi
);
485 #if U_HAVE_STD_STRING
488 UnicodeString(s
).toUTF8String(utf8
);
489 FCDUTF8CollationIterator
u8ci(data
, FALSE
,
490 reinterpret_cast<const uint8_t *>(utf8
.c_str()), 0, -1);
491 if(errorCode
.logIfFailureAndReset("FCDUTF8CollationIterator constructor")) {
494 checkFCD("FCDUTF8CollationIterator", u8ci
, cpi
);
499 uiter_setString(&iter
, s
, UPRV_LENGTHOF(s
) - 1); // -1: without the terminating NUL
500 FCDUIterCollationIterator
uici(data
, FALSE
, iter
, 0);
501 if(errorCode
.logIfFailureAndReset("FCDUIterCollationIterator constructor")) {
504 checkFCD("FCDUIterCollationIterator", uici
, cpi
);
507 void CollationTest::checkAllocWeights(CollationWeights
&cw
,
508 uint32_t lowerLimit
, uint32_t upperLimit
, int32_t n
,
509 int32_t someLength
, int32_t minCount
) {
510 if(!cw
.allocWeights(lowerLimit
, upperLimit
, n
)) {
511 errln("CollationWeights::allocWeights(%lx, %lx, %ld) = FALSE",
512 (long)lowerLimit
, (long)upperLimit
, (long)n
);
515 uint32_t previous
= lowerLimit
;
516 int32_t count
= 0; // number of weights that have someLength
517 for(int32_t i
= 0; i
< n
; ++i
) {
518 uint32_t w
= cw
.nextWeight();
519 if(w
== 0xffffffff) {
520 errln("CollationWeights::allocWeights(%lx, %lx, %ld).nextWeight() "
521 "returns only %ld weights",
522 (long)lowerLimit
, (long)upperLimit
, (long)n
, (long)i
);
525 if(!(previous
< w
&& w
< upperLimit
)) {
526 errln("CollationWeights::allocWeights(%lx, %lx, %ld).nextWeight() "
527 "number %ld -> %lx not between %lx and %lx",
528 (long)lowerLimit
, (long)upperLimit
, (long)n
,
529 (long)(i
+ 1), (long)w
, (long)previous
, (long)upperLimit
);
532 if(CollationWeights::lengthOfWeight(w
) == someLength
) { ++count
; }
534 if(count
< minCount
) {
535 errln("CollationWeights::allocWeights(%lx, %lx, %ld).nextWeight() "
536 "returns only %ld < %ld weights of length %d",
537 (long)lowerLimit
, (long)upperLimit
, (long)n
,
538 (long)count
, (long)minCount
, (int)someLength
);
542 void CollationTest::TestCollationWeights() {
545 // Non-compressible primaries use 254 second bytes 02..FF.
546 logln("CollationWeights.initForPrimary(non-compressible)");
547 cw
.initForPrimary(FALSE
);
548 // Expect 1 weight 11 and 254 weights 12xx.
549 checkAllocWeights(cw
, 0x10000000, 0x13000000, 255, 1, 1);
550 checkAllocWeights(cw
, 0x10000000, 0x13000000, 255, 2, 254);
551 // Expect 255 two-byte weights from the ranges 10ff, 11xx, 1202.
552 checkAllocWeights(cw
, 0x10fefe40, 0x12030300, 260, 2, 255);
553 // Expect 254 two-byte weights from the ranges 10ff and 11xx.
554 checkAllocWeights(cw
, 0x10fefe40, 0x12030300, 600, 2, 254);
555 // Expect 254^2=64516 three-byte weights.
556 // During computation, there should be 3 three-byte ranges
557 // 10ffff, 11xxxx, 120202.
558 // The middle one should be split 64515:1,
559 // and the newly-split-off range and the last ranged lengthened.
560 checkAllocWeights(cw
, 0x10fffe00, 0x12020300, 1 + 64516 + 254 + 1, 3, 64516);
561 // Expect weights 1102 & 1103.
562 checkAllocWeights(cw
, 0x10ff0000, 0x11040000, 2, 2, 2);
563 // Expect weights 102102 & 102103.
564 checkAllocWeights(cw
, 0x1020ff00, 0x10210400, 2, 3, 2);
566 // Compressible primaries use 251 second bytes 04..FE.
567 logln("CollationWeights.initForPrimary(compressible)");
568 cw
.initForPrimary(TRUE
);
569 // Expect 1 weight 11 and 251 weights 12xx.
570 checkAllocWeights(cw
, 0x10000000, 0x13000000, 252, 1, 1);
571 checkAllocWeights(cw
, 0x10000000, 0x13000000, 252, 2, 251);
572 // Expect 252 two-byte weights from the ranges 10fe, 11xx, 1204.
573 checkAllocWeights(cw
, 0x10fdfe40, 0x12050300, 260, 2, 252);
574 // Expect weights 1104 & 1105.
575 checkAllocWeights(cw
, 0x10fe0000, 0x11060000, 2, 2, 2);
576 // Expect weights 102102 & 102103.
577 checkAllocWeights(cw
, 0x1020ff00, 0x10210400, 2, 3, 2);
579 // Secondary and tertiary weights use only bytes 3 & 4.
580 logln("CollationWeights.initForSecondary()");
581 cw
.initForSecondary();
582 // Expect weights fbxx and all four fc..ff.
583 checkAllocWeights(cw
, 0xfb20, 0x10000, 20, 3, 4);
585 logln("CollationWeights.initForTertiary()");
586 cw
.initForTertiary();
587 // Expect weights 3dxx and both 3e & 3f.
588 checkAllocWeights(cw
, 0x3d02, 0x4000, 10, 3, 2);
593 UBool
isValidCE(const CollationRootElements
&re
, const CollationData
&data
,
594 uint32_t p
, uint32_t s
, uint32_t ctq
) {
595 uint32_t p1
= p
>> 24;
596 uint32_t p2
= (p
>> 16) & 0xff;
597 uint32_t p3
= (p
>> 8) & 0xff;
598 uint32_t p4
= p
& 0xff;
599 uint32_t s1
= s
>> 8;
600 uint32_t s2
= s
& 0xff;
601 // ctq = Case, Tertiary, Quaternary
602 uint32_t c
= (ctq
& Collation::CASE_MASK
) >> 14;
603 uint32_t t
= ctq
& Collation::ONLY_TERTIARY_MASK
;
604 uint32_t t1
= t
>> 8;
605 uint32_t t2
= t
& 0xff;
606 uint32_t q
= ctq
& Collation::QUATERNARY_MASK
;
607 // No leading zero bytes.
608 if((p
!= 0 && p1
== 0) || (s
!= 0 && s1
== 0) || (t
!= 0 && t1
== 0)) {
611 // No intermediate zero bytes.
612 if(p1
!= 0 && p2
== 0 && (p
& 0xffff) != 0) {
615 if(p2
!= 0 && p3
== 0 && p4
!= 0) {
618 // Minimum & maximum lead bytes.
619 if((p1
!= 0 && p1
<= Collation::MERGE_SEPARATOR_BYTE
) ||
620 s1
== Collation::LEVEL_SEPARATOR_BYTE
||
621 t1
== Collation::LEVEL_SEPARATOR_BYTE
|| t1
> 0x3f) {
627 // The valid byte range for the second primary byte depends on compressibility.
629 if(data
.isCompressibleLeadByte(p1
)) {
630 if(p2
<= Collation::PRIMARY_COMPRESSION_LOW_BYTE
||
631 Collation::PRIMARY_COMPRESSION_HIGH_BYTE
<= p2
) {
635 if(p2
<= Collation::LEVEL_SEPARATOR_BYTE
) {
640 // Other bytes just need to avoid the level separator.
641 // Trailing zeros are ok.
642 U_ASSERT(Collation::LEVEL_SEPARATOR_BYTE
== 1);
643 if(p3
== Collation::LEVEL_SEPARATOR_BYTE
|| p4
== Collation::LEVEL_SEPARATOR_BYTE
||
644 s2
== Collation::LEVEL_SEPARATOR_BYTE
|| t2
== Collation::LEVEL_SEPARATOR_BYTE
) {
651 // Completely ignorable CE.
652 // Quaternary CEs are not supported.
653 if(c
!= 0 || q
!= 0) {
658 if(t
< re
.getTertiaryBoundary() || c
!= 2) {
664 if(s
< re
.getSecondaryBoundary() || t
== 0 || t
>= re
.getTertiaryBoundary()) {
670 if(s
== 0 || (Collation::COMMON_WEIGHT16
< s
&& s
<= re
.getLastCommonSecondary()) ||
671 s
>= re
.getSecondaryBoundary()) {
674 if(t
== 0 || t
>= re
.getTertiaryBoundary()) {
681 UBool
isValidCE(const CollationRootElements
&re
, const CollationData
&data
, int64_t ce
) {
682 uint32_t p
= (uint32_t)(ce
>> 32);
683 uint32_t secTer
= (uint32_t)ce
;
684 return isValidCE(re
, data
, p
, secTer
>> 16, secTer
& 0xffff);
687 class RootElementsIterator
{
689 RootElementsIterator(const CollationData
&root
)
691 elements(root
.rootElements
), length(root
.rootElementsLength
),
693 index((int32_t)elements
[CollationRootElements::IX_FIRST_TERTIARY_INDEX
]) {}
696 if(index
>= length
) { return FALSE
; }
697 uint32_t p
= elements
[index
];
698 if(p
== CollationRootElements::PRIMARY_SENTINEL
) { return FALSE
; }
699 if((p
& CollationRootElements::SEC_TER_DELTA_FLAG
) != 0) {
701 secTer
= p
& ~CollationRootElements::SEC_TER_DELTA_FLAG
;
704 if((p
& CollationRootElements::PRIMARY_STEP_MASK
) != 0) {
705 // End of a range, enumerate the primaries in the range.
706 int32_t step
= (int32_t)p
& CollationRootElements::PRIMARY_STEP_MASK
;
709 // Finished the range, return the next CE after it.
714 // Return the next primary in this range.
715 UBool isCompressible
= data
.isCompressiblePrimary(pri
);
716 if((pri
& 0xffff) == 0) {
717 pri
= Collation::incTwoBytePrimaryByOffset(pri
, isCompressible
, step
);
719 pri
= Collation::incThreeBytePrimaryByOffset(pri
, isCompressible
, step
);
723 // Simple primary CE.
726 // Does this have an explicit below-common sec/ter unit,
727 // or does it imply a common one?
728 if(index
== length
) {
729 secTer
= Collation::COMMON_SEC_AND_TER_CE
;
731 secTer
= elements
[index
];
732 if((secTer
& CollationRootElements::SEC_TER_DELTA_FLAG
) == 0) {
734 secTer
= Collation::COMMON_SEC_AND_TER_CE
;
736 secTer
&= ~CollationRootElements::SEC_TER_DELTA_FLAG
;
737 if(secTer
> Collation::COMMON_SEC_AND_TER_CE
) {
739 secTer
= Collation::COMMON_SEC_AND_TER_CE
;
741 // Explicit sec/ter below common/common.
749 uint32_t getPrimary() const { return pri
; }
750 uint32_t getSecTer() const { return secTer
; }
753 const CollationData
&data
;
754 const uint32_t *elements
;
764 void CollationTest::TestRootElements() {
765 IcuTestErrorCode
errorCode(*this, "TestRootElements");
766 const CollationData
*root
= CollationRoot::getData(errorCode
);
767 if(errorCode
.logDataIfFailureAndReset("CollationRoot::getData()")) {
770 CollationRootElements
rootElements(root
->rootElements
, root
->rootElementsLength
);
771 RootElementsIterator
iter(*root
);
773 // We check each root CE for validity,
774 // and we also verify that there is a tailoring gap between each two CEs.
775 CollationWeights cw1c
; // compressible primary weights
776 CollationWeights cw1u
; // uncompressible primary weights
777 CollationWeights cw2
;
778 CollationWeights cw3
;
780 cw1c
.initForPrimary(TRUE
);
781 cw1u
.initForPrimary(FALSE
);
782 cw2
.initForSecondary();
783 cw3
.initForTertiary();
785 // Note: The root elements do not include Han-implicit or unassigned-implicit CEs,
786 // nor the special merge-separator CE for U+FFFE.
787 uint32_t prevPri
= 0;
788 uint32_t prevSec
= 0;
789 uint32_t prevTer
= 0;
791 uint32_t pri
= iter
.getPrimary();
792 uint32_t secTer
= iter
.getSecTer();
793 // CollationRootElements CEs must have 0 case and quaternary bits.
794 if((secTer
& Collation::CASE_AND_QUATERNARY_MASK
) != 0) {
795 errln("CollationRootElements CE has non-zero case and/or quaternary bits: %08lx %08lx",
796 (long)pri
, (long)secTer
);
798 uint32_t sec
= secTer
>> 16;
799 uint32_t ter
= secTer
& Collation::ONLY_TERTIARY_MASK
;
801 if(pri
== 0 && sec
== 0 && ter
!= 0) {
802 // Tertiary CEs must have uppercase bits,
803 // but they are not stored in the CollationRootElements.
806 if(!isValidCE(rootElements
, *root
, pri
, sec
, ctq
)) {
807 errln("invalid root CE %08lx %08lx", (long)pri
, (long)secTer
);
810 uint32_t newWeight
= 0;
811 if(prevPri
== 0 || prevPri
>= Collation::FFFD_PRIMARY
) {
812 // There is currently no tailoring gap after primary ignorables,
813 // and we forbid tailoring after U+FFFD and U+FFFF.
814 } else if(root
->isCompressiblePrimary(prevPri
)) {
815 if(!cw1c
.allocWeights(prevPri
, pri
, 1)) {
816 errln("no primary/compressible tailoring gap between %08lx and %08lx",
817 (long)prevPri
, (long)pri
);
819 newWeight
= cw1c
.nextWeight();
822 if(!cw1u
.allocWeights(prevPri
, pri
, 1)) {
823 errln("no primary/uncompressible tailoring gap between %08lx and %08lx",
824 (long)prevPri
, (long)pri
);
826 newWeight
= cw1u
.nextWeight();
829 if(newWeight
!= 0 && !(prevPri
< newWeight
&& newWeight
< pri
)) {
830 errln("mis-allocated primary weight, should get %08lx < %08lx < %08lx",
831 (long)prevPri
, (long)newWeight
, (long)pri
);
833 } else if(sec
!= prevSec
) {
834 uint32_t lowerLimit
=
835 prevSec
== 0 ? rootElements
.getSecondaryBoundary() - 0x100 : prevSec
;
836 if(!cw2
.allocWeights(lowerLimit
, sec
, 1)) {
837 errln("no secondary tailoring gap between %04x and %04x", lowerLimit
, sec
);
839 uint32_t newWeight
= cw2
.nextWeight();
840 if(!(prevSec
< newWeight
&& newWeight
< sec
)) {
841 errln("mis-allocated secondary weight, should get %04x < %04x < %04x",
842 (long)lowerLimit
, (long)newWeight
, (long)sec
);
845 } else if(ter
!= prevTer
) {
846 uint32_t lowerLimit
=
847 prevTer
== 0 ? rootElements
.getTertiaryBoundary() - 0x100 : prevTer
;
848 if(!cw3
.allocWeights(lowerLimit
, ter
, 1)) {
849 errln("no teriary tailoring gap between %04x and %04x", lowerLimit
, ter
);
851 uint32_t newWeight
= cw3
.nextWeight();
852 if(!(prevTer
< newWeight
&& newWeight
< ter
)) {
853 errln("mis-allocated secondary weight, should get %04x < %04x < %04x",
854 (long)lowerLimit
, (long)newWeight
, (long)ter
);
858 errln("duplicate root CE %08lx %08lx", (long)pri
, (long)secTer
);
867 void CollationTest::TestTailoredElements() {
868 IcuTestErrorCode
errorCode(*this, "TestTailoredElements");
869 const CollationData
*root
= CollationRoot::getData(errorCode
);
870 if(errorCode
.logDataIfFailureAndReset("CollationRoot::getData()")) {
873 CollationRootElements
rootElements(root
->rootElements
, root
->rootElementsLength
);
875 UHashtable
*prevLocales
= uhash_open(uhash_hashChars
, uhash_compareChars
, NULL
, errorCode
);
876 if(errorCode
.logIfFailureAndReset("failed to create a hash table")) {
879 uhash_setKeyDeleter(prevLocales
, uprv_free
);
880 // TestRootElements() tests the root collator which does not have tailorings.
881 uhash_puti(prevLocales
, uprv_strdup(""), 1, errorCode
);
882 uhash_puti(prevLocales
, uprv_strdup("root"), 1, errorCode
);
883 uhash_puti(prevLocales
, uprv_strdup("root@collation=standard"), 1, errorCode
);
885 UVector64
ces(errorCode
);
886 LocalPointer
<StringEnumeration
> locales(Collator::getAvailableLocales());
887 U_ASSERT(locales
.isValid());
888 const char *localeID
= "root";
890 Locale
locale(localeID
);
891 LocalPointer
<StringEnumeration
> types(
892 Collator::getKeywordValuesForLocale("collation", locale
, FALSE
, errorCode
));
893 errorCode
.assertSuccess();
894 const char *type
; // first: default type
895 while((type
= types
->next(NULL
, errorCode
)) != NULL
) {
896 if(strncmp(type
, "private-", 8) == 0) {
897 errln("Collator::getKeywordValuesForLocale(%s) returns private collation keyword: %s",
900 Locale
localeWithType(locale
);
901 localeWithType
.setKeywordValue("collation", type
, errorCode
);
902 errorCode
.assertSuccess();
903 LocalPointer
<Collator
> coll(Collator::createInstance(localeWithType
, errorCode
));
904 if(errorCode
.logIfFailureAndReset("Collator::createInstance(%s)",
905 localeWithType
.getName())) {
908 Locale actual
= coll
->getLocale(ULOC_ACTUAL_LOCALE
, errorCode
);
909 if(uhash_geti(prevLocales
, actual
.getName()) != 0) {
912 uhash_puti(prevLocales
, uprv_strdup(actual
.getName()), 1, errorCode
);
913 errorCode
.assertSuccess();
914 logln("TestTailoredElements(): requested %s -> actual %s",
915 localeWithType
.getName(), actual
.getName());
916 RuleBasedCollator
*rbc
= dynamic_cast<RuleBasedCollator
*>(coll
.getAlias());
920 // Note: It would be better to get tailored strings such that we can
921 // identify the prefix, and only get the CEs for the prefix+string,
922 // not also for the prefix.
923 // There is currently no API for that.
924 // It would help in an unusual case where a contraction starting in the prefix
925 // extends past its end, and we do not see the intended mapping.
926 // For example, for a mapping p|st, if there is also a contraction ps,
927 // then we get CEs(ps)+CEs(t), rather than CEs(p|st).
928 LocalPointer
<UnicodeSet
> tailored(coll
->getTailoredSet(errorCode
));
929 errorCode
.assertSuccess();
930 UnicodeSetIterator
iter(*tailored
);
932 const UnicodeString
&s
= iter
.getString();
933 ces
.removeAllElements();
934 rbc
->internalGetCEs(s
, ces
, errorCode
);
935 errorCode
.assertSuccess();
936 for(int32_t i
= 0; i
< ces
.size(); ++i
) {
937 int64_t ce
= ces
.elementAti(i
);
938 if(!isValidCE(rootElements
, *root
, ce
)) {
939 errln("invalid tailored CE %016llx at CE index %d from string:",
940 (long long)ce
, (int)i
);
946 } while((localeID
= locales
->next(NULL
, errorCode
)) != NULL
);
947 uhash_close(prevLocales
);
950 UnicodeString
CollationTest::printSortKey(const uint8_t *p
, int32_t length
) {
952 for(int32_t i
= 0; i
< length
; ++i
) {
953 if(i
> 0) { s
.append((UChar
)0x20); }
956 s
.append((UChar
)0x2e); // period
958 s
.append((UChar
)0x7c); // vertical bar
966 UnicodeString
CollationTest::printCollationKey(const CollationKey
&key
) {
968 const uint8_t *p
= key
.getByteArray(length
);
969 return printSortKey(p
, length
);
972 UBool
CollationTest::readNonEmptyLine(UCHARBUF
*f
, IcuTestErrorCode
&errorCode
) {
975 const UChar
*line
= ucbuf_readline(f
, &lineLength
, errorCode
);
976 if(line
== NULL
|| errorCode
.isFailure()) {
981 // Strip trailing CR/LF, comments, and spaces.
982 const UChar
*comment
= u_memchr(line
, 0x23, lineLength
); // '#'
983 if(comment
!= NULL
) {
984 lineLength
= (int32_t)(comment
- line
);
986 while(lineLength
> 0 && isCROrLF(line
[lineLength
- 1])) { --lineLength
; }
988 while(lineLength
> 0 && isSpace(line
[lineLength
- 1])) { --lineLength
; }
989 if(lineLength
!= 0) {
990 fileLine
.setTo(FALSE
, line
, lineLength
);
993 // Empty line, continue.
997 void CollationTest::parseString(int32_t &start
, UnicodeString
&prefix
, UnicodeString
&s
,
998 UErrorCode
&errorCode
) {
999 int32_t length
= fileLine
.length();
1001 for(i
= start
; i
< length
&& !isSpace(fileLine
[i
]); ++i
) {}
1002 int32_t pipeIndex
= fileLine
.indexOf((UChar
)0x7c, start
, i
- start
); // '|'
1003 if(pipeIndex
>= 0) {
1004 prefix
= fileLine
.tempSubStringBetween(start
, pipeIndex
).unescape();
1005 if(prefix
.isEmpty()) {
1006 errln("empty prefix on line %d", (int)fileLineNumber
);
1008 errorCode
= U_PARSE_ERROR
;
1011 start
= pipeIndex
+ 1;
1015 s
= fileLine
.tempSubStringBetween(start
, i
).unescape();
1017 errln("empty string on line %d", (int)fileLineNumber
);
1019 errorCode
= U_PARSE_ERROR
;
1025 Collation::Level
CollationTest::parseRelationAndString(UnicodeString
&s
, IcuTestErrorCode
&errorCode
) {
1026 Collation::Level relation
;
1028 if(fileLine
[0] == 0x3c) { // <
1029 UChar second
= fileLine
[1];
1033 relation
= Collation::PRIMARY_LEVEL
;
1036 relation
= Collation::SECONDARY_LEVEL
;
1039 relation
= Collation::TERTIARY_LEVEL
;
1042 relation
= Collation::QUATERNARY_LEVEL
;
1045 relation
= Collation::CASE_LEVEL
;
1048 relation
= Collation::IDENTICAL_LEVEL
;
1051 relation
= Collation::NO_LEVEL
;
1055 } else if(fileLine
[0] == 0x3d) { // =
1056 relation
= Collation::ZERO_LEVEL
;
1061 if(start
== 0 || !isSpace(fileLine
[start
])) {
1062 errln("no relation (= < <1 <2 <c <3 <4 <i) at beginning of line %d", (int)fileLineNumber
);
1064 errorCode
.set(U_PARSE_ERROR
);
1065 return Collation::NO_LEVEL
;
1067 start
= skipSpaces(start
);
1068 UnicodeString prefix
;
1069 parseString(start
, prefix
, s
, errorCode
);
1070 if(errorCode
.isSuccess() && !prefix
.isEmpty()) {
1071 errln("prefix string not allowed for test string: on line %d", (int)fileLineNumber
);
1073 errorCode
.set(U_PARSE_ERROR
);
1074 return Collation::NO_LEVEL
;
1076 if(start
< fileLine
.length()) {
1077 errln("unexpected line contents after test string on line %d", (int)fileLineNumber
);
1079 errorCode
.set(U_PARSE_ERROR
);
1080 return Collation::NO_LEVEL
;
1085 static const struct {
1089 { "backwards", UCOL_FRENCH_COLLATION
},
1090 { "alternate", UCOL_ALTERNATE_HANDLING
},
1091 { "caseFirst", UCOL_CASE_FIRST
},
1092 { "caseLevel", UCOL_CASE_LEVEL
},
1093 // UCOL_NORMALIZATION_MODE is turned on and off automatically.
1094 { "strength", UCOL_STRENGTH
},
1095 // UCOL_HIRAGANA_QUATERNARY_MODE is deprecated.
1096 { "numeric", UCOL_NUMERIC_COLLATION
}
1099 static const struct {
1101 UColAttributeValue value
;
1102 } attributeValues
[] = {
1103 { "default", UCOL_DEFAULT
},
1104 { "primary", UCOL_PRIMARY
},
1105 { "secondary", UCOL_SECONDARY
},
1106 { "tertiary", UCOL_TERTIARY
},
1107 { "quaternary", UCOL_QUATERNARY
},
1108 { "identical", UCOL_IDENTICAL
},
1109 { "off", UCOL_OFF
},
1111 { "shifted", UCOL_SHIFTED
},
1112 { "non-ignorable", UCOL_NON_IGNORABLE
},
1113 { "lower", UCOL_LOWER_FIRST
},
1114 { "upper", UCOL_UPPER_FIRST
}
1117 void CollationTest::parseAndSetAttribute(IcuTestErrorCode
&errorCode
) {
1118 // Parse attributes even if the Collator could not be created,
1119 // in order to report syntax errors.
1120 int32_t start
= skipSpaces(1);
1121 int32_t equalPos
= fileLine
.indexOf(0x3d);
1123 if(fileLine
.compare(start
, 7, UNICODE_STRING("reorder", 7)) == 0) {
1124 parseAndSetReorderCodes(start
+ 7, errorCode
);
1127 errln("missing '=' on line %d", (int)fileLineNumber
);
1129 errorCode
.set(U_PARSE_ERROR
);
1133 UnicodeString attrString
= fileLine
.tempSubStringBetween(start
, equalPos
);
1134 UnicodeString valueString
= fileLine
.tempSubString(equalPos
+1);
1135 if(attrString
== UNICODE_STRING("maxVariable", 11)) {
1136 UColReorderCode max
;
1137 if(valueString
== UNICODE_STRING("space", 5)) {
1138 max
= UCOL_REORDER_CODE_SPACE
;
1139 } else if(valueString
== UNICODE_STRING("punct", 5)) {
1140 max
= UCOL_REORDER_CODE_PUNCTUATION
;
1141 } else if(valueString
== UNICODE_STRING("symbol", 6)) {
1142 max
= UCOL_REORDER_CODE_SYMBOL
;
1143 } else if(valueString
== UNICODE_STRING("currency", 8)) {
1144 max
= UCOL_REORDER_CODE_CURRENCY
;
1146 errln("invalid attribute value name on line %d", (int)fileLineNumber
);
1148 errorCode
.set(U_PARSE_ERROR
);
1152 coll
->setMaxVariable(max
, errorCode
);
1153 if(errorCode
.isFailure()) {
1154 errln("setMaxVariable() failed on line %d: %s",
1155 (int)fileLineNumber
, errorCode
.errorName());
1165 for(int32_t i
= 0;; ++i
) {
1166 if(i
== UPRV_LENGTHOF(attributes
)) {
1167 errln("invalid attribute name on line %d", (int)fileLineNumber
);
1169 errorCode
.set(U_PARSE_ERROR
);
1172 if(attrString
== UnicodeString(attributes
[i
].name
, -1, US_INV
)) {
1173 attr
= attributes
[i
].attr
;
1178 UColAttributeValue value
;
1179 for(int32_t i
= 0;; ++i
) {
1180 if(i
== UPRV_LENGTHOF(attributeValues
)) {
1181 errln("invalid attribute value name on line %d", (int)fileLineNumber
);
1183 errorCode
.set(U_PARSE_ERROR
);
1186 if(valueString
== UnicodeString(attributeValues
[i
].name
, -1, US_INV
)) {
1187 value
= attributeValues
[i
].value
;
1193 coll
->setAttribute(attr
, value
, errorCode
);
1194 if(errorCode
.isFailure()) {
1195 errln("illegal attribute=value combination on line %d: %s",
1196 (int)fileLineNumber
, errorCode
.errorName());
1204 void CollationTest::parseAndSetReorderCodes(int32_t start
, IcuTestErrorCode
&errorCode
) {
1205 UVector32
reorderCodes(errorCode
);
1206 while(start
< fileLine
.length()) {
1207 start
= skipSpaces(start
);
1208 int32_t limit
= start
;
1209 while(limit
< fileLine
.length() && !isSpace(fileLine
[limit
])) { ++limit
; }
1211 name
.appendInvariantChars(fileLine
.tempSubStringBetween(start
, limit
), errorCode
);
1212 int32_t code
= CollationRuleParser::getReorderCode(name
.data());
1214 if(uprv_stricmp(name
.data(), "default") == 0) {
1215 code
= UCOL_REORDER_CODE_DEFAULT
; // -1
1217 errln("invalid reorder code '%s' on line %d", name
.data(), (int)fileLineNumber
);
1219 errorCode
.set(U_PARSE_ERROR
);
1223 reorderCodes
.addElement(code
, errorCode
);
1227 coll
->setReorderCodes(reorderCodes
.getBuffer(), reorderCodes
.size(), errorCode
);
1228 if(errorCode
.isFailure()) {
1229 errln("setReorderCodes() failed on line %d: %s",
1230 (int)fileLineNumber
, errorCode
.errorName());
1238 void CollationTest::buildTailoring(UCHARBUF
*f
, IcuTestErrorCode
&errorCode
) {
1239 UnicodeString rules
;
1240 while(readNonEmptyLine(f
, errorCode
) && !isSectionStarter(fileLine
[0])) {
1241 rules
.append(fileLine
.unescape());
1243 if(errorCode
.isFailure()) { return; }
1246 UParseError parseError
;
1247 UnicodeString reason
;
1249 coll
= new RuleBasedCollator(rules
, parseError
, reason
, errorCode
);
1251 errln("unable to allocate a new collator");
1252 errorCode
.set(U_MEMORY_ALLOCATION_ERROR
);
1255 if(errorCode
.isFailure()) {
1256 dataerrln("RuleBasedCollator(rules) failed - %s", errorCode
.errorName());
1257 infoln(UnicodeString(" reason: ") + reason
);
1258 if(parseError
.offset
>= 0) { infoln(" rules offset: %d", (int)parseError
.offset
); }
1259 if(parseError
.preContext
[0] != 0 || parseError
.postContext
[0] != 0) {
1260 infoln(UnicodeString(" snippet: ...") +
1261 parseError
.preContext
+ "(!)" + parseError
.postContext
+ "...");
1267 assertEquals("no error reason when RuleBasedCollator(rules) succeeds",
1268 UnicodeString(), reason
);
1272 void CollationTest::setRootCollator(IcuTestErrorCode
&errorCode
) {
1273 if(errorCode
.isFailure()) { return; }
1275 coll
= Collator::createInstance(Locale::getRoot(), errorCode
);
1276 if(errorCode
.isFailure()) {
1277 dataerrln("unable to create a root collator");
1282 void CollationTest::setLocaleCollator(IcuTestErrorCode
&errorCode
) {
1283 if(errorCode
.isFailure()) { return; }
1286 int32_t at
= fileLine
.indexOf((UChar
)0x40, 9); // @ is not invariant
1288 fileLine
.setCharAt(at
, (UChar
)0x2a); // *
1290 CharString localeID
;
1291 localeID
.appendInvariantChars(fileLine
.tempSubString(9), errorCode
);
1293 localeID
.data()[at
- 9] = '@';
1295 Locale
locale(localeID
.data());
1296 if(fileLine
.length() == 9 || errorCode
.isFailure() || locale
.isBogus()) {
1297 errln("invalid language tag on line %d", (int)fileLineNumber
);
1299 if(errorCode
.isSuccess()) { errorCode
.set(U_PARSE_ERROR
); }
1303 logln("creating a collator for locale ID %s", locale
.getName());
1304 coll
= Collator::createInstance(locale
, errorCode
);
1305 if(errorCode
.isFailure()) {
1306 dataerrln("unable to create a collator for locale %s on line %d",
1307 locale
.getName(), (int)fileLineNumber
);
1315 UBool
CollationTest::needsNormalization(const UnicodeString
&s
, UErrorCode
&errorCode
) const {
1316 if(U_FAILURE(errorCode
) || !fcd
->isNormalized(s
, errorCode
)) { return TRUE
; }
1317 // In some sequences with Tibetan composite vowel signs,
1318 // even if the string passes the FCD check,
1319 // those composites must be decomposed.
1320 // Check if s contains 0F71 immediately followed by 0F73 or 0F75 or 0F81.
1322 while((index
= s
.indexOf((UChar
)0xf71, index
)) >= 0) {
1323 if(++index
< s
.length()) {
1325 if(c
== 0xf73 || c
== 0xf75 || c
== 0xf81) { return TRUE
; }
1331 UBool
CollationTest::getSortKeyParts(const UChar
*s
, int32_t length
,
1332 CharString
&dest
, int32_t partSize
,
1333 IcuTestErrorCode
&errorCode
) {
1334 if(errorCode
.isFailure()) { return FALSE
; }
1336 U_ASSERT(partSize
<= UPRV_LENGTHOF(part
));
1338 uiter_setString(&iter
, s
, length
);
1339 uint32_t state
[2] = { 0, 0 };
1341 int32_t partLength
= coll
->internalNextSortKeyPart(&iter
, state
, part
, partSize
, errorCode
);
1342 UBool done
= partLength
< partSize
;
1344 // At the end, append the next byte as well which should be 00.
1347 dest
.append(reinterpret_cast<char *>(part
), partLength
, errorCode
);
1349 return errorCode
.isSuccess();
1354 UBool
CollationTest::getCollationKey(const char *norm
, const UnicodeString
&line
,
1355 const UChar
*s
, int32_t length
,
1356 CollationKey
&key
, IcuTestErrorCode
&errorCode
) {
1357 if(errorCode
.isFailure()) { return FALSE
; }
1358 coll
->getCollationKey(s
, length
, key
, errorCode
);
1359 if(errorCode
.isFailure()) {
1360 infoln(fileTestName
);
1361 errln("Collator(%s).getCollationKey() failed: %s",
1362 norm
, errorCode
.errorName());
1367 const uint8_t *keyBytes
= key
.getByteArray(keyLength
);
1368 if(keyLength
== 0 || keyBytes
[keyLength
- 1] != 0) {
1369 infoln(fileTestName
);
1370 errln("Collator(%s).getCollationKey() wrote an empty or unterminated key",
1373 infoln(printCollationKey(key
));
1377 int32_t numLevels
= coll
->getAttribute(UCOL_STRENGTH
, errorCode
);
1378 if(numLevels
< UCOL_IDENTICAL
) {
1383 if(coll
->getAttribute(UCOL_CASE_LEVEL
, errorCode
) == UCOL_ON
) {
1386 errorCode
.assertSuccess();
1387 int32_t numLevelSeparators
= 0;
1388 for(int32_t i
= 0; i
< (keyLength
- 1); ++i
) {
1389 uint8_t b
= keyBytes
[i
];
1391 infoln(fileTestName
);
1392 errln("Collator(%s).getCollationKey() contains a 00 byte", norm
);
1394 infoln(printCollationKey(key
));
1397 if(b
== 1) { ++numLevelSeparators
; }
1399 if(numLevelSeparators
!= (numLevels
- 1)) {
1400 infoln(fileTestName
);
1401 errln("Collator(%s).getCollationKey() has %d level separators for %d levels",
1402 norm
, (int)numLevelSeparators
, (int)numLevels
);
1404 infoln(printCollationKey(key
));
1408 // Check that internalNextSortKeyPart() makes the same key, with several part sizes.
1409 static const int32_t partSizes
[] = { 32, 3, 1 };
1410 for(int32_t psi
= 0; psi
< UPRV_LENGTHOF(partSizes
); ++psi
) {
1411 int32_t partSize
= partSizes
[psi
];
1413 if(!getSortKeyParts(s
, length
, parts
, 32, errorCode
)) {
1414 infoln(fileTestName
);
1415 errln("Collator(%s).internalNextSortKeyPart(%d) failed: %s",
1416 norm
, (int)partSize
, errorCode
.errorName());
1420 if(keyLength
!= parts
.length() || uprv_memcmp(keyBytes
, parts
.data(), keyLength
) != 0) {
1421 infoln(fileTestName
);
1422 errln("Collator(%s).getCollationKey() != internalNextSortKeyPart(%d)",
1423 norm
, (int)partSize
);
1425 infoln(printCollationKey(key
));
1426 infoln(printSortKey(reinterpret_cast<uint8_t *>(parts
.data()), parts
.length()));
1434 * Changes the key to the merged segments of the U+FFFE-separated substrings of s.
1435 * Leaves key unchanged if s does not contain U+FFFE.
1436 * @return TRUE if the key was successfully changed
1438 UBool
CollationTest::getMergedCollationKey(const UChar
*s
, int32_t length
,
1439 CollationKey
&key
, IcuTestErrorCode
&errorCode
) {
1440 if(errorCode
.isFailure()) { return FALSE
; }
1441 LocalMemory
<uint8_t> mergedKey
;
1442 int32_t mergedKeyLength
= 0;
1443 int32_t mergedKeyCapacity
= 0;
1444 int32_t sLength
= (length
>= 0) ? length
: u_strlen(s
);
1445 int32_t segmentStart
= 0;
1446 for(int32_t i
= 0;;) {
1448 if(segmentStart
== 0) {
1449 // s does not contain any U+FFFE.
1452 } else if(s
[i
] != 0xfffe) {
1456 // Get the sort key for another segment and merge it into mergedKey.
1457 CollationKey
key1(mergedKey
.getAlias(), mergedKeyLength
); // copies the bytes
1459 coll
->getCollationKey(s
+ segmentStart
, i
- segmentStart
, key2
, errorCode
);
1460 int32_t key1Length
, key2Length
;
1461 const uint8_t *key1Bytes
= key1
.getByteArray(key1Length
);
1462 const uint8_t *key2Bytes
= key2
.getByteArray(key2Length
);
1464 int32_t minCapacity
= key1Length
+ key2Length
;
1465 if(key1Length
> 0) { --minCapacity
; }
1466 if(minCapacity
<= mergedKeyCapacity
) {
1467 dest
= mergedKey
.getAlias();
1469 if(minCapacity
<= 200) {
1470 mergedKeyCapacity
= 200;
1471 } else if(minCapacity
<= 2 * mergedKeyCapacity
) {
1472 mergedKeyCapacity
*= 2;
1474 mergedKeyCapacity
= minCapacity
;
1476 dest
= mergedKey
.allocateInsteadAndReset(mergedKeyCapacity
);
1478 U_ASSERT(dest
!= NULL
|| mergedKeyCapacity
== 0);
1479 if(key1Length
== 0) {
1480 // key2 is the sort key for the first segment.
1481 uprv_memcpy(dest
, key2Bytes
, key2Length
);
1482 mergedKeyLength
= key2Length
;
1485 ucol_mergeSortkeys(key1Bytes
, key1Length
, key2Bytes
, key2Length
,
1486 dest
, mergedKeyCapacity
);
1488 if(i
== sLength
) { break; }
1491 key
= CollationKey(mergedKey
.getAlias(), mergedKeyLength
);
1498 * Replaces unpaired surrogates with U+FFFD.
1499 * Returns s if no replacement was made, otherwise buffer.
1501 const UnicodeString
&surrogatesToFFFD(const UnicodeString
&s
, UnicodeString
&buffer
) {
1503 while(i
< s
.length()) {
1504 UChar32 c
= s
.char32At(i
);
1505 if(U_IS_SURROGATE(c
)) {
1506 if(buffer
.length() < i
) {
1507 buffer
.append(s
, buffer
.length(), i
- buffer
.length());
1509 buffer
.append((UChar
)0xfffd);
1513 if(buffer
.isEmpty()) {
1516 if(buffer
.length() < i
) {
1517 buffer
.append(s
, buffer
.length(), i
- buffer
.length());
1522 int32_t getDifferenceLevel(const CollationKey
&prevKey
, const CollationKey
&key
,
1523 UCollationResult order
, UBool collHasCaseLevel
) {
1524 if(order
== UCOL_EQUAL
) {
1525 return Collation::NO_LEVEL
;
1527 int32_t prevKeyLength
;
1528 const uint8_t *prevBytes
= prevKey
.getByteArray(prevKeyLength
);
1530 const uint8_t *bytes
= key
.getByteArray(keyLength
);
1531 int32_t level
= Collation::PRIMARY_LEVEL
;
1532 for(int32_t i
= 0;; ++i
) {
1533 uint8_t b
= prevBytes
[i
];
1534 if(b
!= bytes
[i
]) { break; }
1535 if(b
== Collation::LEVEL_SEPARATOR_BYTE
) {
1537 if(level
== Collation::CASE_LEVEL
&& !collHasCaseLevel
) {
1547 UBool
CollationTest::checkCompareTwo(const char *norm
, const UnicodeString
&prevFileLine
,
1548 const UnicodeString
&prevString
, const UnicodeString
&s
,
1549 UCollationResult expectedOrder
, Collation::Level expectedLevel
,
1550 IcuTestErrorCode
&errorCode
) {
1551 if(errorCode
.isFailure()) { return FALSE
; }
1553 // Get the sort keys first, for error debug output.
1554 CollationKey prevKey
;
1555 if(!getCollationKey(norm
, prevFileLine
, prevString
.getBuffer(), prevString
.length(),
1556 prevKey
, errorCode
)) {
1560 if(!getCollationKey(norm
, fileLine
, s
.getBuffer(), s
.length(), key
, errorCode
)) { return FALSE
; }
1562 UCollationResult order
= coll
->compare(prevString
, s
, errorCode
);
1563 if(order
!= expectedOrder
|| errorCode
.isFailure()) {
1564 infoln(fileTestName
);
1565 errln("line %d Collator(%s).compare(previous, current) wrong order: %d != %d (%s)",
1566 (int)fileLineNumber
, norm
, order
, expectedOrder
, errorCode
.errorName());
1567 infoln(prevFileLine
);
1569 infoln(printCollationKey(prevKey
));
1570 infoln(printCollationKey(key
));
1573 order
= coll
->compare(s
, prevString
, errorCode
);
1574 if(order
!= -expectedOrder
|| errorCode
.isFailure()) {
1575 infoln(fileTestName
);
1576 errln("line %d Collator(%s).compare(current, previous) wrong order: %d != %d (%s)",
1577 (int)fileLineNumber
, norm
, order
, -expectedOrder
, errorCode
.errorName());
1578 infoln(prevFileLine
);
1580 infoln(printCollationKey(prevKey
));
1581 infoln(printCollationKey(key
));
1584 // Test NUL-termination if the strings do not contain NUL characters.
1585 UBool containNUL
= prevString
.indexOf((UChar
)0) >= 0 || s
.indexOf((UChar
)0) >= 0;
1587 order
= coll
->compare(prevString
.getBuffer(), -1, s
.getBuffer(), -1, errorCode
);
1588 if(order
!= expectedOrder
|| errorCode
.isFailure()) {
1589 infoln(fileTestName
);
1590 errln("line %d Collator(%s).compare(previous-NUL, current-NUL) wrong order: %d != %d (%s)",
1591 (int)fileLineNumber
, norm
, order
, expectedOrder
, errorCode
.errorName());
1592 infoln(prevFileLine
);
1594 infoln(printCollationKey(prevKey
));
1595 infoln(printCollationKey(key
));
1598 order
= coll
->compare(s
.getBuffer(), -1, prevString
.getBuffer(), -1, errorCode
);
1599 if(order
!= -expectedOrder
|| errorCode
.isFailure()) {
1600 infoln(fileTestName
);
1601 errln("line %d Collator(%s).compare(current-NUL, previous-NUL) wrong order: %d != %d (%s)",
1602 (int)fileLineNumber
, norm
, order
, -expectedOrder
, errorCode
.errorName());
1603 infoln(prevFileLine
);
1605 infoln(printCollationKey(prevKey
));
1606 infoln(printCollationKey(key
));
1611 #if U_HAVE_STD_STRING
1612 // compare(UTF-16) treats unpaired surrogates like unassigned code points.
1613 // Unpaired surrogates cannot be converted to UTF-8.
1614 // Create valid UTF-16 strings if necessary, and use those for
1615 // both the expected compare() result and for the input to compare(UTF-8).
1616 UnicodeString prevBuffer
, sBuffer
;
1617 const UnicodeString
&prevValid
= surrogatesToFFFD(prevString
, prevBuffer
);
1618 const UnicodeString
&sValid
= surrogatesToFFFD(s
, sBuffer
);
1619 std::string prevUTF8
, sUTF8
;
1620 UnicodeString(prevValid
).toUTF8String(prevUTF8
);
1621 UnicodeString(sValid
).toUTF8String(sUTF8
);
1622 UCollationResult expectedUTF8Order
;
1623 if(&prevValid
== &prevString
&& &sValid
== &s
) {
1624 expectedUTF8Order
= expectedOrder
;
1626 expectedUTF8Order
= coll
->compare(prevValid
, sValid
, errorCode
);
1629 order
= coll
->compareUTF8(prevUTF8
, sUTF8
, errorCode
);
1630 if(order
!= expectedUTF8Order
|| errorCode
.isFailure()) {
1631 infoln(fileTestName
);
1632 errln("line %d Collator(%s).compareUTF8(previous, current) wrong order: %d != %d (%s)",
1633 (int)fileLineNumber
, norm
, order
, expectedUTF8Order
, errorCode
.errorName());
1634 infoln(prevFileLine
);
1636 infoln(printCollationKey(prevKey
));
1637 infoln(printCollationKey(key
));
1640 order
= coll
->compareUTF8(sUTF8
, prevUTF8
, errorCode
);
1641 if(order
!= -expectedUTF8Order
|| errorCode
.isFailure()) {
1642 infoln(fileTestName
);
1643 errln("line %d Collator(%s).compareUTF8(current, previous) wrong order: %d != %d (%s)",
1644 (int)fileLineNumber
, norm
, order
, -expectedUTF8Order
, errorCode
.errorName());
1645 infoln(prevFileLine
);
1647 infoln(printCollationKey(prevKey
));
1648 infoln(printCollationKey(key
));
1651 // Test NUL-termination if the strings do not contain NUL characters.
1653 order
= coll
->internalCompareUTF8(prevUTF8
.c_str(), -1, sUTF8
.c_str(), -1, errorCode
);
1654 if(order
!= expectedUTF8Order
|| errorCode
.isFailure()) {
1655 infoln(fileTestName
);
1656 errln("line %d Collator(%s).internalCompareUTF8(previous-NUL, current-NUL) wrong order: %d != %d (%s)",
1657 (int)fileLineNumber
, norm
, order
, expectedUTF8Order
, errorCode
.errorName());
1658 infoln(prevFileLine
);
1660 infoln(printCollationKey(prevKey
));
1661 infoln(printCollationKey(key
));
1664 order
= coll
->internalCompareUTF8(sUTF8
.c_str(), -1, prevUTF8
.c_str(), -1, errorCode
);
1665 if(order
!= -expectedUTF8Order
|| errorCode
.isFailure()) {
1666 infoln(fileTestName
);
1667 errln("line %d Collator(%s).internalCompareUTF8(current-NUL, previous-NUL) wrong order: %d != %d (%s)",
1668 (int)fileLineNumber
, norm
, order
, -expectedUTF8Order
, errorCode
.errorName());
1669 infoln(prevFileLine
);
1671 infoln(printCollationKey(prevKey
));
1672 infoln(printCollationKey(key
));
1678 UCharIterator leftIter
;
1679 UCharIterator rightIter
;
1680 uiter_setString(&leftIter
, prevString
.getBuffer(), prevString
.length());
1681 uiter_setString(&rightIter
, s
.getBuffer(), s
.length());
1682 order
= coll
->compare(leftIter
, rightIter
, errorCode
);
1683 if(order
!= expectedOrder
|| errorCode
.isFailure()) {
1684 infoln(fileTestName
);
1685 errln("line %d Collator(%s).compare(UCharIterator: previous, current) "
1686 "wrong order: %d != %d (%s)",
1687 (int)fileLineNumber
, norm
, order
, expectedOrder
, errorCode
.errorName());
1688 infoln(prevFileLine
);
1690 infoln(printCollationKey(prevKey
));
1691 infoln(printCollationKey(key
));
1695 order
= prevKey
.compareTo(key
, errorCode
);
1696 if(order
!= expectedOrder
|| errorCode
.isFailure()) {
1697 infoln(fileTestName
);
1698 errln("line %d Collator(%s).getCollationKey(previous, current).compareTo() wrong order: %d != %d (%s)",
1699 (int)fileLineNumber
, norm
, order
, expectedOrder
, errorCode
.errorName());
1700 infoln(prevFileLine
);
1702 infoln(printCollationKey(prevKey
));
1703 infoln(printCollationKey(key
));
1706 UBool collHasCaseLevel
= coll
->getAttribute(UCOL_CASE_LEVEL
, errorCode
) == UCOL_ON
;
1707 int32_t level
= getDifferenceLevel(prevKey
, key
, order
, collHasCaseLevel
);
1708 if(order
!= UCOL_EQUAL
&& expectedLevel
!= Collation::NO_LEVEL
) {
1709 if(level
!= expectedLevel
) {
1710 infoln(fileTestName
);
1711 errln("line %d Collator(%s).getCollationKey(previous, current).compareTo()=%d wrong level: %d != %d",
1712 (int)fileLineNumber
, norm
, order
, level
, expectedLevel
);
1713 infoln(prevFileLine
);
1715 infoln(printCollationKey(prevKey
));
1716 infoln(printCollationKey(key
));
1721 // If either string contains U+FFFE, then their sort keys must compare the same as
1722 // the merged sort keys of each string's between-FFFE segments.
1724 // It is not required that
1725 // sortkey(str1 + "\uFFFE" + str2) == mergeSortkeys(sortkey(str1), sortkey(str2))
1726 // only that those two methods yield the same order.
1728 // Use bit-wise OR so that getMergedCollationKey() is always called for both strings.
1729 if((getMergedCollationKey(prevString
.getBuffer(), prevString
.length(), prevKey
, errorCode
) |
1730 getMergedCollationKey(s
.getBuffer(), s
.length(), key
, errorCode
)) ||
1731 errorCode
.isFailure()) {
1732 order
= prevKey
.compareTo(key
, errorCode
);
1733 if(order
!= expectedOrder
|| errorCode
.isFailure()) {
1734 infoln(fileTestName
);
1735 errln("line %d ucol_mergeSortkeys(Collator(%s).getCollationKey"
1736 "(previous, current segments between U+FFFE)).compareTo() wrong order: %d != %d (%s)",
1737 (int)fileLineNumber
, norm
, order
, expectedOrder
, errorCode
.errorName());
1738 infoln(prevFileLine
);
1740 infoln(printCollationKey(prevKey
));
1741 infoln(printCollationKey(key
));
1744 int32_t mergedLevel
= getDifferenceLevel(prevKey
, key
, order
, collHasCaseLevel
);
1745 if(order
!= UCOL_EQUAL
&& expectedLevel
!= Collation::NO_LEVEL
) {
1746 if(mergedLevel
!= level
) {
1747 infoln(fileTestName
);
1748 errln("line %d ucol_mergeSortkeys(Collator(%s).getCollationKey"
1749 "(previous, current segments between U+FFFE)).compareTo()=%d wrong level: %d != %d",
1750 (int)fileLineNumber
, norm
, order
, mergedLevel
, level
);
1751 infoln(prevFileLine
);
1753 infoln(printCollationKey(prevKey
));
1754 infoln(printCollationKey(key
));
1762 void CollationTest::checkCompareStrings(UCHARBUF
*f
, IcuTestErrorCode
&errorCode
) {
1763 if(errorCode
.isFailure()) { return; }
1764 UnicodeString prevFileLine
= UNICODE_STRING("(none)", 6);
1765 UnicodeString prevString
, s
;
1766 prevString
.getTerminatedBuffer(); // Ensure NUL-termination.
1767 while(readNonEmptyLine(f
, errorCode
) && !isSectionStarter(fileLine
[0])) {
1768 // Parse the line even if it will be ignored (when we do not have a Collator)
1769 // in order to report syntax issues.
1770 Collation::Level relation
= parseRelationAndString(s
, errorCode
);
1771 if(errorCode
.isFailure()) {
1776 // We were unable to create the Collator but continue with tests.
1777 // Ignore test data for this Collator.
1778 // The next Collator creation might work.
1781 UCollationResult expectedOrder
= (relation
== Collation::ZERO_LEVEL
) ? UCOL_EQUAL
: UCOL_LESS
;
1782 Collation::Level expectedLevel
= relation
;
1783 s
.getTerminatedBuffer(); // Ensure NUL-termination.
1785 if(!needsNormalization(prevString
, errorCode
) && !needsNormalization(s
, errorCode
)) {
1786 coll
->setAttribute(UCOL_NORMALIZATION_MODE
, UCOL_OFF
, errorCode
);
1787 isOk
= checkCompareTwo("normalization=on", prevFileLine
, prevString
, s
,
1788 expectedOrder
, expectedLevel
, errorCode
);
1791 coll
->setAttribute(UCOL_NORMALIZATION_MODE
, UCOL_ON
, errorCode
);
1792 isOk
= checkCompareTwo("normalization=off", prevFileLine
, prevString
, s
,
1793 expectedOrder
, expectedLevel
, errorCode
);
1795 if(isOk
&& (!nfd
->isNormalized(prevString
, errorCode
) || !nfd
->isNormalized(s
, errorCode
))) {
1796 UnicodeString pn
= nfd
->normalize(prevString
, errorCode
);
1797 UnicodeString n
= nfd
->normalize(s
, errorCode
);
1798 pn
.getTerminatedBuffer();
1799 n
.getTerminatedBuffer();
1800 errorCode
.assertSuccess();
1801 isOk
= checkCompareTwo("NFD input", prevFileLine
, pn
, n
,
1802 expectedOrder
, expectedLevel
, errorCode
);
1805 errorCode
.reset(); // already reported
1807 prevFileLine
= fileLine
;
1809 prevString
.getTerminatedBuffer(); // Ensure NUL-termination.
1813 void CollationTest::TestDataDriven() {
1814 IcuTestErrorCode
errorCode(*this, "TestDataDriven");
1816 fcd
= Normalizer2Factory::getFCDInstance(errorCode
);
1817 nfd
= Normalizer2::getNFDInstance(errorCode
);
1818 if(errorCode
.logDataIfFailureAndReset("Normalizer2Factory::getFCDInstance() or getNFDInstance()")) {
1822 CharString
path(getSourceTestData(errorCode
), errorCode
);
1823 path
.appendPathPart("collationtest.txt", errorCode
);
1824 const char *codePage
= "UTF-8";
1825 LocalUCHARBUFPointer
f(ucbuf_open(path
.data(), &codePage
, TRUE
, FALSE
, errorCode
));
1826 if(errorCode
.logIfFailureAndReset("ucbuf_open(collationtest.txt)")) {
1829 // Read a new line if necessary.
1830 // Sub-parsers leave the first line set that they do not handle.
1831 while(errorCode
.isSuccess() && (!fileLine
.isEmpty() || readNonEmptyLine(f
.getAlias(), errorCode
))) {
1832 if(!isSectionStarter(fileLine
[0])) {
1833 errln("syntax error on line %d", (int)fileLineNumber
);
1837 if(fileLine
.startsWith(UNICODE_STRING("** test: ", 9))) {
1838 fileTestName
= fileLine
;
1841 } else if(fileLine
== UNICODE_STRING("@ root", 6)) {
1842 setRootCollator(errorCode
);
1844 } else if(fileLine
.startsWith(UNICODE_STRING("@ locale ", 9))) {
1845 setLocaleCollator(errorCode
);
1847 } else if(fileLine
== UNICODE_STRING("@ rules", 7)) {
1848 buildTailoring(f
.getAlias(), errorCode
);
1849 } else if(fileLine
[0] == 0x25 && isSpace(fileLine
[1])) { // %
1850 parseAndSetAttribute(errorCode
);
1851 } else if(fileLine
== UNICODE_STRING("* compare", 9)) {
1852 checkCompareStrings(f
.getAlias(), errorCode
);
1854 errln("syntax error on line %d", (int)fileLineNumber
);
1861 #endif // !UCONFIG_NO_COLLATION