]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
57a6839d A |
3 | /* |
4 | ******************************************************************************* | |
b331163b | 5 | * Copyright (C) 2012-2015, International Business Machines |
57a6839d A |
6 | * Corporation and others. All Rights Reserved. |
7 | ******************************************************************************* | |
8 | * collationtest.cpp | |
9 | * | |
10 | * created on: 2012apr27 | |
11 | * created by: Markus W. Scherer | |
12 | */ | |
13 | ||
14 | #include "unicode/utypes.h" | |
15 | ||
16 | #if !UCONFIG_NO_COLLATION | |
17 | ||
18 | #include "unicode/coll.h" | |
19 | #include "unicode/errorcode.h" | |
20 | #include "unicode/localpointer.h" | |
21 | #include "unicode/normalizer2.h" | |
22 | #include "unicode/sortkey.h" | |
23 | #include "unicode/std_string.h" | |
24 | #include "unicode/strenum.h" | |
25 | #include "unicode/tblcoll.h" | |
26 | #include "unicode/uiter.h" | |
27 | #include "unicode/uniset.h" | |
28 | #include "unicode/unistr.h" | |
29 | #include "unicode/usetiter.h" | |
30 | #include "unicode/ustring.h" | |
31 | #include "charstr.h" | |
32 | #include "cmemory.h" | |
33 | #include "collation.h" | |
34 | #include "collationdata.h" | |
35 | #include "collationfcd.h" | |
36 | #include "collationiterator.h" | |
37 | #include "collationroot.h" | |
38 | #include "collationrootelements.h" | |
39 | #include "collationruleparser.h" | |
40 | #include "collationweights.h" | |
41 | #include "cstring.h" | |
42 | #include "intltest.h" | |
43 | #include "normalizer2impl.h" | |
44 | #include "ucbuf.h" | |
45 | #include "uhash.h" | |
46 | #include "uitercollationiterator.h" | |
47 | #include "utf16collationiterator.h" | |
48 | #include "utf8collationiterator.h" | |
49 | #include "uvectr32.h" | |
50 | #include "uvectr64.h" | |
51 | #include "writesrc.h" | |
52 | ||
57a6839d A |
53 | class CodePointIterator; |
54 | ||
55 | // TODO: try to share code with IntlTestCollator; for example, prettify(CollationKey) | |
56 | ||
57 | class CollationTest : public IntlTest { | |
58 | public: | |
59 | CollationTest() | |
60 | : fcd(NULL), nfd(NULL), | |
61 | fileLineNumber(0), | |
62 | coll(NULL) {} | |
63 | ||
64 | ~CollationTest() { | |
65 | delete coll; | |
66 | } | |
67 | ||
68 | void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL); | |
69 | ||
70 | void TestMinMax(); | |
71 | void TestImplicits(); | |
72 | void TestNulTerminated(); | |
73 | void TestIllegalUTF8(); | |
74 | void TestShortFCDData(); | |
75 | void TestFCD(); | |
76 | void TestCollationWeights(); | |
77 | void TestRootElements(); | |
78 | void TestTailoredElements(); | |
79 | void TestDataDriven(); | |
80 | ||
81 | private: | |
82 | void checkFCD(const char *name, CollationIterator &ci, CodePointIterator &cpi); | |
83 | void checkAllocWeights(CollationWeights &cw, | |
84 | uint32_t lowerLimit, uint32_t upperLimit, int32_t n, | |
85 | int32_t someLength, int32_t minCount); | |
86 | ||
87 | static UnicodeString printSortKey(const uint8_t *p, int32_t length); | |
88 | static UnicodeString printCollationKey(const CollationKey &key); | |
89 | ||
90 | // Helpers & fields for data-driven test. | |
91 | static UBool isCROrLF(UChar c) { return c == 0xa || c == 0xd; } | |
92 | static UBool isSpace(UChar c) { return c == 9 || c == 0x20 || c == 0x3000; } | |
93 | static UBool isSectionStarter(UChar c) { return c == 0x25 || c == 0x2a || c == 0x40; } // %*@ | |
94 | int32_t skipSpaces(int32_t i) { | |
95 | while(isSpace(fileLine[i])) { ++i; } | |
96 | return i; | |
97 | } | |
98 | ||
b331163b | 99 | UBool readNonEmptyLine(UCHARBUF *f, IcuTestErrorCode &errorCode); |
57a6839d A |
100 | void parseString(int32_t &start, UnicodeString &prefix, UnicodeString &s, UErrorCode &errorCode); |
101 | Collation::Level parseRelationAndString(UnicodeString &s, IcuTestErrorCode &errorCode); | |
102 | void parseAndSetAttribute(IcuTestErrorCode &errorCode); | |
103 | void parseAndSetReorderCodes(int32_t start, IcuTestErrorCode &errorCode); | |
104 | void buildTailoring(UCHARBUF *f, IcuTestErrorCode &errorCode); | |
105 | void setRootCollator(IcuTestErrorCode &errorCode); | |
106 | void setLocaleCollator(IcuTestErrorCode &errorCode); | |
107 | ||
108 | UBool needsNormalization(const UnicodeString &s, UErrorCode &errorCode) const; | |
109 | ||
110 | UBool getSortKeyParts(const UChar *s, int32_t length, | |
111 | CharString &dest, int32_t partSize, | |
112 | IcuTestErrorCode &errorCode); | |
113 | UBool getCollationKey(const char *norm, const UnicodeString &line, | |
114 | const UChar *s, int32_t length, | |
115 | CollationKey &key, IcuTestErrorCode &errorCode); | |
b331163b A |
116 | UBool getMergedCollationKey(const UChar *s, int32_t length, |
117 | CollationKey &key, IcuTestErrorCode &errorCode); | |
57a6839d A |
118 | UBool checkCompareTwo(const char *norm, const UnicodeString &prevFileLine, |
119 | const UnicodeString &prevString, const UnicodeString &s, | |
120 | UCollationResult expectedOrder, Collation::Level expectedLevel, | |
121 | IcuTestErrorCode &errorCode); | |
122 | void checkCompareStrings(UCHARBUF *f, IcuTestErrorCode &errorCode); | |
123 | ||
124 | const Normalizer2 *fcd, *nfd; | |
125 | UnicodeString fileLine; | |
126 | int32_t fileLineNumber; | |
127 | UnicodeString fileTestName; | |
128 | Collator *coll; | |
129 | }; | |
130 | ||
131 | extern IntlTest *createCollationTest() { | |
132 | return new CollationTest(); | |
133 | } | |
134 | ||
135 | void CollationTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) { | |
136 | if(exec) { | |
137 | logln("TestSuite CollationTest: "); | |
138 | } | |
139 | TESTCASE_AUTO_BEGIN; | |
140 | TESTCASE_AUTO(TestMinMax); | |
141 | TESTCASE_AUTO(TestImplicits); | |
142 | TESTCASE_AUTO(TestNulTerminated); | |
143 | TESTCASE_AUTO(TestIllegalUTF8); | |
144 | TESTCASE_AUTO(TestShortFCDData); | |
145 | TESTCASE_AUTO(TestFCD); | |
146 | TESTCASE_AUTO(TestCollationWeights); | |
147 | TESTCASE_AUTO(TestRootElements); | |
148 | TESTCASE_AUTO(TestTailoredElements); | |
149 | TESTCASE_AUTO(TestDataDriven); | |
150 | TESTCASE_AUTO_END; | |
151 | } | |
152 | ||
153 | void CollationTest::TestMinMax() { | |
154 | IcuTestErrorCode errorCode(*this, "TestMinMax"); | |
155 | ||
156 | setRootCollator(errorCode); | |
157 | if(errorCode.isFailure()) { | |
158 | errorCode.reset(); | |
159 | return; | |
160 | } | |
161 | RuleBasedCollator *rbc = dynamic_cast<RuleBasedCollator *>(coll); | |
162 | if(rbc == NULL) { | |
163 | errln("the root collator is not a RuleBasedCollator"); | |
164 | return; | |
165 | } | |
166 | ||
167 | static const UChar s[2] = { 0xfffe, 0xffff }; | |
168 | UVector64 ces(errorCode); | |
169 | rbc->internalGetCEs(UnicodeString(FALSE, s, 2), ces, errorCode); | |
170 | errorCode.assertSuccess(); | |
171 | if(ces.size() != 2) { | |
172 | errln("expected 2 CEs for <FFFE, FFFF>, got %d", (int)ces.size()); | |
173 | return; | |
174 | } | |
175 | int64_t ce = ces.elementAti(0); | |
b331163b | 176 | int64_t expected = Collation::makeCE(Collation::MERGE_SEPARATOR_PRIMARY); |
57a6839d | 177 | if(ce != expected) { |
b331163b | 178 | errln("CE(U+fffe)=%04lx != 02..", (long)ce); |
57a6839d A |
179 | } |
180 | ||
181 | ce = ces.elementAti(1); | |
182 | expected = Collation::makeCE(Collation::MAX_PRIMARY); | |
183 | if(ce != expected) { | |
184 | errln("CE(U+ffff)=%04lx != max..", (long)ce); | |
185 | } | |
186 | } | |
187 | ||
188 | void CollationTest::TestImplicits() { | |
189 | IcuTestErrorCode errorCode(*this, "TestImplicits"); | |
190 | ||
191 | const CollationData *cd = CollationRoot::getData(errorCode); | |
0f5d89e8 | 192 | if(errorCode.errDataIfFailureAndReset("CollationRoot::getData()")) { |
57a6839d A |
193 | return; |
194 | } | |
195 | ||
196 | // Implicit primary weights should be assigned for the following sets, | |
197 | // and sort in ascending order by set and then code point. | |
198 | // See http://www.unicode.org/reports/tr10/#Implicit_Weights | |
b331163b | 199 | |
57a6839d A |
200 | // core Han Unified Ideographs |
201 | UnicodeSet coreHan("[\\p{unified_ideograph}&" | |
202 | "[\\p{Block=CJK_Unified_Ideographs}" | |
203 | "\\p{Block=CJK_Compatibility_Ideographs}]]", | |
204 | errorCode); | |
205 | // all other Unified Han ideographs | |
206 | UnicodeSet otherHan("[\\p{unified ideograph}-" | |
207 | "[\\p{Block=CJK_Unified_Ideographs}" | |
208 | "\\p{Block=CJK_Compatibility_Ideographs}]]", | |
209 | errorCode); | |
210 | UnicodeSet unassigned("[[:Cn:][:Cs:][:Co:]]", errorCode); | |
211 | unassigned.remove(0xfffe, 0xffff); // These have special CLDR root mappings. | |
b331163b A |
212 | |
213 | // Starting with CLDR 26/ICU 54, the root Han order may instead be | |
214 | // the Unihan radical-stroke order. | |
215 | // The tests should pass either way, so we only test the order of a small set of Han characters | |
216 | // whose radical-stroke order is the same as their code point order. | |
217 | UnicodeSet someHanInCPOrder( | |
218 | "[\\u4E00-\\u4E16\\u4E18-\\u4E2B\\u4E2D-\\u4E3C\\u4E3E-\\u4E48" | |
219 | "\\u4E4A-\\u4E60\\u4E63-\\u4E8F\\u4E91-\\u4F63\\u4F65-\\u50F1\\u50F3-\\u50F6]", | |
220 | errorCode); | |
221 | UnicodeSet inOrder(someHanInCPOrder); | |
222 | inOrder.addAll(unassigned).freeze(); | |
0f5d89e8 | 223 | if(errorCode.errIfFailureAndReset("UnicodeSet")) { |
57a6839d A |
224 | return; |
225 | } | |
226 | const UnicodeSet *sets[] = { &coreHan, &otherHan, &unassigned }; | |
227 | UChar32 prev = 0; | |
228 | uint32_t prevPrimary = 0; | |
229 | UTF16CollationIterator ci(cd, FALSE, NULL, NULL, NULL); | |
b331163b | 230 | for(int32_t i = 0; i < UPRV_LENGTHOF(sets); ++i) { |
57a6839d A |
231 | LocalPointer<UnicodeSetIterator> iter(new UnicodeSetIterator(*sets[i])); |
232 | while(iter->next()) { | |
233 | UChar32 c = iter->getCodepoint(); | |
234 | UnicodeString s(c); | |
235 | ci.setText(s.getBuffer(), s.getBuffer() + s.length()); | |
236 | int64_t ce = ci.nextCE(errorCode); | |
237 | int64_t ce2 = ci.nextCE(errorCode); | |
0f5d89e8 | 238 | if(errorCode.errIfFailureAndReset("CollationIterator.nextCE()")) { |
57a6839d A |
239 | return; |
240 | } | |
241 | if(ce == Collation::NO_CE || ce2 != Collation::NO_CE) { | |
242 | errln("CollationIterator.nextCE(U+%04lx) did not yield exactly one CE", (long)c); | |
243 | continue; | |
244 | } | |
245 | if((ce & 0xffffffff) != Collation::COMMON_SEC_AND_TER_CE) { | |
246 | errln("CollationIterator.nextCE(U+%04lx) has non-common sec/ter weights: %08lx", | |
247 | (long)c, (long)(ce & 0xffffffff)); | |
248 | continue; | |
249 | } | |
250 | uint32_t primary = (uint32_t)(ce >> 32); | |
b331163b | 251 | if(!(primary > prevPrimary) && inOrder.contains(c) && inOrder.contains(prev)) { |
57a6839d A |
252 | errln("CE(U+%04lx)=%04lx.. not greater than CE(U+%04lx)=%04lx..", |
253 | (long)c, (long)primary, (long)prev, (long)prevPrimary); | |
254 | } | |
255 | prev = c; | |
256 | prevPrimary = primary; | |
257 | } | |
258 | } | |
259 | } | |
260 | ||
261 | void CollationTest::TestNulTerminated() { | |
262 | IcuTestErrorCode errorCode(*this, "TestNulTerminated"); | |
263 | const CollationData *data = CollationRoot::getData(errorCode); | |
0f5d89e8 | 264 | if(errorCode.errDataIfFailureAndReset("CollationRoot::getData()")) { |
57a6839d A |
265 | return; |
266 | } | |
267 | ||
268 | static const UChar s[] = { 0x61, 0x62, 0x61, 0x62, 0 }; | |
269 | ||
270 | UTF16CollationIterator ci1(data, FALSE, s, s, s + 2); | |
271 | UTF16CollationIterator ci2(data, FALSE, s + 2, s + 2, NULL); | |
272 | for(int32_t i = 0;; ++i) { | |
273 | int64_t ce1 = ci1.nextCE(errorCode); | |
274 | int64_t ce2 = ci2.nextCE(errorCode); | |
0f5d89e8 | 275 | if(errorCode.errIfFailureAndReset("CollationIterator.nextCE()")) { |
57a6839d A |
276 | return; |
277 | } | |
278 | if(ce1 != ce2) { | |
279 | errln("CollationIterator.nextCE(with length) != nextCE(NUL-terminated) at CE %d", (int)i); | |
280 | break; | |
281 | } | |
282 | if(ce1 == Collation::NO_CE) { break; } | |
283 | } | |
284 | } | |
285 | ||
286 | void CollationTest::TestIllegalUTF8() { | |
287 | IcuTestErrorCode errorCode(*this, "TestIllegalUTF8"); | |
288 | ||
289 | setRootCollator(errorCode); | |
290 | if(errorCode.isFailure()) { | |
291 | errorCode.reset(); | |
292 | return; | |
293 | } | |
294 | coll->setAttribute(UCOL_STRENGTH, UCOL_IDENTICAL, errorCode); | |
295 | ||
296 | static const char *strings[] = { | |
0f5d89e8 A |
297 | // string with U+FFFD == illegal byte sequence |
298 | u8"a\uFFFDz", "a\x80z", // trail byte | |
299 | u8"a\uFFFD\uFFFDz", "a\xc1\x81z", // non-shortest form | |
300 | u8"a\uFFFD\uFFFD\uFFFDz", "a\xe0\x82\x83z", // non-shortest form | |
301 | u8"a\uFFFD\uFFFD\uFFFDz", "a\xed\xa0\x80z", // lead surrogate: would be U+D800 | |
302 | u8"a\uFFFD\uFFFD\uFFFDz", "a\xed\xbf\xbfz", // trail surrogate: would be U+DFFF | |
303 | u8"a\uFFFD\uFFFD\uFFFD\uFFFDz", "a\xf0\x8f\xbf\xbfz", // non-shortest form | |
304 | u8"a\uFFFD\uFFFD\uFFFD\uFFFDz", "a\xf4\x90\x80\x80z" // out of range: would be U+110000 | |
57a6839d A |
305 | }; |
306 | ||
0f5d89e8 A |
307 | for(int32_t i = 0; i < UPRV_LENGTHOF(strings); i += 2) { |
308 | StringPiece fffd(strings[i]); | |
309 | StringPiece illegal(strings[i + 1]); | |
57a6839d A |
310 | UCollationResult order = coll->compareUTF8(fffd, illegal, errorCode); |
311 | if(order != UCOL_EQUAL) { | |
0f5d89e8 | 312 | errln("compareUTF8(pair %d: U+FFFD, illegal UTF-8)=%d != UCOL_EQUAL", |
57a6839d A |
313 | (int)i, order); |
314 | } | |
315 | } | |
316 | } | |
317 | ||
318 | namespace { | |
319 | ||
320 | void addLeadSurrogatesForSupplementary(const UnicodeSet &src, UnicodeSet &dest) { | |
321 | for(UChar32 c = 0x10000; c < 0x110000;) { | |
322 | UChar32 next = c + 0x400; | |
323 | if(src.containsSome(c, next - 1)) { | |
324 | dest.add(U16_LEAD(c)); | |
325 | } | |
326 | c = next; | |
327 | } | |
328 | } | |
329 | ||
330 | } // namespace | |
331 | ||
332 | void CollationTest::TestShortFCDData() { | |
333 | // See CollationFCD class comments. | |
334 | IcuTestErrorCode errorCode(*this, "TestShortFCDData"); | |
335 | UnicodeSet expectedLccc("[:^lccc=0:]", errorCode); | |
336 | errorCode.assertSuccess(); | |
337 | expectedLccc.add(0xdc00, 0xdfff); // add all trail surrogates | |
338 | addLeadSurrogatesForSupplementary(expectedLccc, expectedLccc); | |
339 | UnicodeSet lccc; // actual | |
340 | for(UChar32 c = 0; c <= 0xffff; ++c) { | |
341 | if(CollationFCD::hasLccc(c)) { lccc.add(c); } | |
342 | } | |
343 | UnicodeSet diff(expectedLccc); | |
344 | diff.removeAll(lccc); | |
345 | diff.remove(0x10000, 0x10ffff); // hasLccc() only works for the BMP | |
346 | UnicodeString empty("[]"); | |
347 | UnicodeString diffString; | |
348 | diff.toPattern(diffString, TRUE); | |
349 | assertEquals("CollationFCD::hasLccc() expected-actual", empty, diffString); | |
350 | diff = lccc; | |
351 | diff.removeAll(expectedLccc); | |
352 | diff.toPattern(diffString, TRUE); | |
353 | assertEquals("CollationFCD::hasLccc() actual-expected", empty, diffString, TRUE); | |
354 | ||
355 | UnicodeSet expectedTccc("[:^tccc=0:]", errorCode); | |
356 | if (errorCode.isSuccess()) { | |
357 | addLeadSurrogatesForSupplementary(expectedLccc, expectedTccc); | |
358 | addLeadSurrogatesForSupplementary(expectedTccc, expectedTccc); | |
359 | UnicodeSet tccc; // actual | |
360 | for(UChar32 c = 0; c <= 0xffff; ++c) { | |
361 | if(CollationFCD::hasTccc(c)) { tccc.add(c); } | |
362 | } | |
363 | diff = expectedTccc; | |
364 | diff.removeAll(tccc); | |
365 | diff.remove(0x10000, 0x10ffff); // hasTccc() only works for the BMP | |
366 | assertEquals("CollationFCD::hasTccc() expected-actual", empty, diffString); | |
367 | diff = tccc; | |
368 | diff.removeAll(expectedTccc); | |
369 | diff.toPattern(diffString, TRUE); | |
370 | assertEquals("CollationFCD::hasTccc() actual-expected", empty, diffString); | |
371 | } | |
372 | } | |
373 | ||
374 | class CodePointIterator { | |
375 | public: | |
376 | CodePointIterator(const UChar32 *cp, int32_t length) : cp(cp), length(length), pos(0) {} | |
377 | void resetToStart() { pos = 0; } | |
378 | UChar32 next() { return (pos < length) ? cp[pos++] : U_SENTINEL; } | |
379 | UChar32 previous() { return (pos > 0) ? cp[--pos] : U_SENTINEL; } | |
380 | int32_t getLength() const { return length; } | |
381 | int getIndex() const { return (int)pos; } | |
382 | private: | |
383 | const UChar32 *cp; | |
384 | int32_t length; | |
385 | int32_t pos; | |
386 | }; | |
387 | ||
388 | void CollationTest::checkFCD(const char *name, | |
389 | CollationIterator &ci, CodePointIterator &cpi) { | |
390 | IcuTestErrorCode errorCode(*this, "checkFCD"); | |
391 | ||
392 | // Iterate forward to the limit. | |
393 | for(;;) { | |
394 | UChar32 c1 = ci.nextCodePoint(errorCode); | |
395 | UChar32 c2 = cpi.next(); | |
396 | if(c1 != c2) { | |
397 | errln("%s.nextCodePoint(to limit, 1st pass) = U+%04lx != U+%04lx at %d", | |
398 | name, (long)c1, (long)c2, cpi.getIndex()); | |
399 | return; | |
400 | } | |
401 | if(c1 < 0) { break; } | |
402 | } | |
403 | ||
404 | // Iterate backward most of the way. | |
405 | for(int32_t n = (cpi.getLength() * 2) / 3; n > 0; --n) { | |
406 | UChar32 c1 = ci.previousCodePoint(errorCode); | |
407 | UChar32 c2 = cpi.previous(); | |
408 | if(c1 != c2) { | |
409 | errln("%s.previousCodePoint() = U+%04lx != U+%04lx at %d", | |
410 | name, (long)c1, (long)c2, cpi.getIndex()); | |
411 | return; | |
412 | } | |
413 | } | |
414 | ||
415 | // Forward again. | |
416 | for(;;) { | |
417 | UChar32 c1 = ci.nextCodePoint(errorCode); | |
418 | UChar32 c2 = cpi.next(); | |
419 | if(c1 != c2) { | |
420 | errln("%s.nextCodePoint(to limit again) = U+%04lx != U+%04lx at %d", | |
421 | name, (long)c1, (long)c2, cpi.getIndex()); | |
422 | return; | |
423 | } | |
424 | if(c1 < 0) { break; } | |
425 | } | |
426 | ||
427 | // Iterate backward to the start. | |
428 | for(;;) { | |
429 | UChar32 c1 = ci.previousCodePoint(errorCode); | |
430 | UChar32 c2 = cpi.previous(); | |
431 | if(c1 != c2) { | |
432 | errln("%s.previousCodePoint(to start) = U+%04lx != U+%04lx at %d", | |
433 | name, (long)c1, (long)c2, cpi.getIndex()); | |
434 | return; | |
435 | } | |
436 | if(c1 < 0) { break; } | |
437 | } | |
438 | } | |
439 | ||
440 | void CollationTest::TestFCD() { | |
441 | IcuTestErrorCode errorCode(*this, "TestFCD"); | |
442 | const CollationData *data = CollationRoot::getData(errorCode); | |
0f5d89e8 | 443 | if(errorCode.errDataIfFailureAndReset("CollationRoot::getData()")) { |
57a6839d A |
444 | return; |
445 | } | |
446 | ||
447 | // Input string, not FCD, NUL-terminated. | |
448 | static const UChar s[] = { | |
449 | 0x308, 0xe1, 0x62, 0x301, 0x327, 0x430, 0x62, | |
450 | U16_LEAD(0x1D15F), U16_TRAIL(0x1D15F), // MUSICAL SYMBOL QUARTER NOTE=1D158 1D165, ccc=0, 216 | |
451 | 0x327, 0x308, // ccc=202, 230 | |
452 | U16_LEAD(0x1D16D), U16_TRAIL(0x1D16D), // MUSICAL SYMBOL COMBINING AUGMENTATION DOT, ccc=226 | |
453 | U16_LEAD(0x1D15F), U16_TRAIL(0x1D15F), | |
454 | U16_LEAD(0x1D16D), U16_TRAIL(0x1D16D), | |
455 | 0xac01, | |
456 | 0xe7, // Character with tccc!=0 decomposed together with mis-ordered sequence. | |
457 | U16_LEAD(0x1D16D), U16_TRAIL(0x1D16D), U16_LEAD(0x1D165), U16_TRAIL(0x1D165), | |
458 | 0xe1, // Character with tccc!=0 decomposed together with decomposed sequence. | |
459 | 0xf73, 0xf75, // Tibetan composite vowels must be decomposed. | |
460 | 0x4e00, 0xf81, | |
461 | 0 | |
462 | }; | |
463 | // Expected code points. | |
464 | static const UChar32 cp[] = { | |
465 | 0x308, 0xe1, 0x62, 0x327, 0x301, 0x430, 0x62, | |
466 | 0x1D158, 0x327, 0x1D165, 0x1D16D, 0x308, | |
467 | 0x1D15F, 0x1D16D, | |
468 | 0xac01, | |
469 | 0x63, 0x327, 0x1D165, 0x1D16D, | |
470 | 0x61, | |
471 | 0xf71, 0xf71, 0xf72, 0xf74, 0x301, | |
472 | 0x4e00, 0xf71, 0xf80 | |
473 | }; | |
474 | ||
475 | FCDUTF16CollationIterator u16ci(data, FALSE, s, s, NULL); | |
0f5d89e8 | 476 | if(errorCode.errIfFailureAndReset("FCDUTF16CollationIterator constructor")) { |
57a6839d A |
477 | return; |
478 | } | |
b331163b | 479 | CodePointIterator cpi(cp, UPRV_LENGTHOF(cp)); |
57a6839d A |
480 | checkFCD("FCDUTF16CollationIterator", u16ci, cpi); |
481 | ||
57a6839d A |
482 | cpi.resetToStart(); |
483 | std::string utf8; | |
484 | UnicodeString(s).toUTF8String(utf8); | |
485 | FCDUTF8CollationIterator u8ci(data, FALSE, | |
486 | reinterpret_cast<const uint8_t *>(utf8.c_str()), 0, -1); | |
0f5d89e8 | 487 | if(errorCode.errIfFailureAndReset("FCDUTF8CollationIterator constructor")) { |
57a6839d A |
488 | return; |
489 | } | |
490 | checkFCD("FCDUTF8CollationIterator", u8ci, cpi); | |
57a6839d A |
491 | |
492 | cpi.resetToStart(); | |
493 | UCharIterator iter; | |
b331163b | 494 | uiter_setString(&iter, s, UPRV_LENGTHOF(s) - 1); // -1: without the terminating NUL |
57a6839d | 495 | FCDUIterCollationIterator uici(data, FALSE, iter, 0); |
0f5d89e8 | 496 | if(errorCode.errIfFailureAndReset("FCDUIterCollationIterator constructor")) { |
57a6839d A |
497 | return; |
498 | } | |
499 | checkFCD("FCDUIterCollationIterator", uici, cpi); | |
500 | } | |
501 | ||
502 | void CollationTest::checkAllocWeights(CollationWeights &cw, | |
503 | uint32_t lowerLimit, uint32_t upperLimit, int32_t n, | |
504 | int32_t someLength, int32_t minCount) { | |
505 | if(!cw.allocWeights(lowerLimit, upperLimit, n)) { | |
506 | errln("CollationWeights::allocWeights(%lx, %lx, %ld) = FALSE", | |
507 | (long)lowerLimit, (long)upperLimit, (long)n); | |
508 | return; | |
509 | } | |
510 | uint32_t previous = lowerLimit; | |
511 | int32_t count = 0; // number of weights that have someLength | |
512 | for(int32_t i = 0; i < n; ++i) { | |
513 | uint32_t w = cw.nextWeight(); | |
514 | if(w == 0xffffffff) { | |
515 | errln("CollationWeights::allocWeights(%lx, %lx, %ld).nextWeight() " | |
516 | "returns only %ld weights", | |
517 | (long)lowerLimit, (long)upperLimit, (long)n, (long)i); | |
518 | return; | |
519 | } | |
520 | if(!(previous < w && w < upperLimit)) { | |
521 | errln("CollationWeights::allocWeights(%lx, %lx, %ld).nextWeight() " | |
522 | "number %ld -> %lx not between %lx and %lx", | |
523 | (long)lowerLimit, (long)upperLimit, (long)n, | |
524 | (long)(i + 1), (long)w, (long)previous, (long)upperLimit); | |
525 | return; | |
526 | } | |
527 | if(CollationWeights::lengthOfWeight(w) == someLength) { ++count; } | |
528 | } | |
529 | if(count < minCount) { | |
530 | errln("CollationWeights::allocWeights(%lx, %lx, %ld).nextWeight() " | |
531 | "returns only %ld < %ld weights of length %d", | |
532 | (long)lowerLimit, (long)upperLimit, (long)n, | |
533 | (long)count, (long)minCount, (int)someLength); | |
534 | } | |
535 | } | |
536 | ||
537 | void CollationTest::TestCollationWeights() { | |
538 | CollationWeights cw; | |
539 | ||
540 | // Non-compressible primaries use 254 second bytes 02..FF. | |
541 | logln("CollationWeights.initForPrimary(non-compressible)"); | |
542 | cw.initForPrimary(FALSE); | |
543 | // Expect 1 weight 11 and 254 weights 12xx. | |
544 | checkAllocWeights(cw, 0x10000000, 0x13000000, 255, 1, 1); | |
545 | checkAllocWeights(cw, 0x10000000, 0x13000000, 255, 2, 254); | |
546 | // Expect 255 two-byte weights from the ranges 10ff, 11xx, 1202. | |
547 | checkAllocWeights(cw, 0x10fefe40, 0x12030300, 260, 2, 255); | |
548 | // Expect 254 two-byte weights from the ranges 10ff and 11xx. | |
549 | checkAllocWeights(cw, 0x10fefe40, 0x12030300, 600, 2, 254); | |
550 | // Expect 254^2=64516 three-byte weights. | |
551 | // During computation, there should be 3 three-byte ranges | |
552 | // 10ffff, 11xxxx, 120202. | |
553 | // The middle one should be split 64515:1, | |
554 | // and the newly-split-off range and the last ranged lengthened. | |
555 | checkAllocWeights(cw, 0x10fffe00, 0x12020300, 1 + 64516 + 254 + 1, 3, 64516); | |
556 | // Expect weights 1102 & 1103. | |
557 | checkAllocWeights(cw, 0x10ff0000, 0x11040000, 2, 2, 2); | |
558 | // Expect weights 102102 & 102103. | |
559 | checkAllocWeights(cw, 0x1020ff00, 0x10210400, 2, 3, 2); | |
560 | ||
561 | // Compressible primaries use 251 second bytes 04..FE. | |
562 | logln("CollationWeights.initForPrimary(compressible)"); | |
563 | cw.initForPrimary(TRUE); | |
564 | // Expect 1 weight 11 and 251 weights 12xx. | |
565 | checkAllocWeights(cw, 0x10000000, 0x13000000, 252, 1, 1); | |
566 | checkAllocWeights(cw, 0x10000000, 0x13000000, 252, 2, 251); | |
567 | // Expect 252 two-byte weights from the ranges 10fe, 11xx, 1204. | |
568 | checkAllocWeights(cw, 0x10fdfe40, 0x12050300, 260, 2, 252); | |
569 | // Expect weights 1104 & 1105. | |
570 | checkAllocWeights(cw, 0x10fe0000, 0x11060000, 2, 2, 2); | |
571 | // Expect weights 102102 & 102103. | |
572 | checkAllocWeights(cw, 0x1020ff00, 0x10210400, 2, 3, 2); | |
573 | ||
574 | // Secondary and tertiary weights use only bytes 3 & 4. | |
575 | logln("CollationWeights.initForSecondary()"); | |
576 | cw.initForSecondary(); | |
577 | // Expect weights fbxx and all four fc..ff. | |
578 | checkAllocWeights(cw, 0xfb20, 0x10000, 20, 3, 4); | |
579 | ||
580 | logln("CollationWeights.initForTertiary()"); | |
581 | cw.initForTertiary(); | |
582 | // Expect weights 3dxx and both 3e & 3f. | |
583 | checkAllocWeights(cw, 0x3d02, 0x4000, 10, 3, 2); | |
584 | } | |
585 | ||
586 | namespace { | |
587 | ||
588 | UBool isValidCE(const CollationRootElements &re, const CollationData &data, | |
589 | uint32_t p, uint32_t s, uint32_t ctq) { | |
590 | uint32_t p1 = p >> 24; | |
591 | uint32_t p2 = (p >> 16) & 0xff; | |
592 | uint32_t p3 = (p >> 8) & 0xff; | |
593 | uint32_t p4 = p & 0xff; | |
594 | uint32_t s1 = s >> 8; | |
595 | uint32_t s2 = s & 0xff; | |
596 | // ctq = Case, Tertiary, Quaternary | |
597 | uint32_t c = (ctq & Collation::CASE_MASK) >> 14; | |
598 | uint32_t t = ctq & Collation::ONLY_TERTIARY_MASK; | |
599 | uint32_t t1 = t >> 8; | |
600 | uint32_t t2 = t & 0xff; | |
601 | uint32_t q = ctq & Collation::QUATERNARY_MASK; | |
602 | // No leading zero bytes. | |
603 | if((p != 0 && p1 == 0) || (s != 0 && s1 == 0) || (t != 0 && t1 == 0)) { | |
604 | return FALSE; | |
605 | } | |
606 | // No intermediate zero bytes. | |
607 | if(p1 != 0 && p2 == 0 && (p & 0xffff) != 0) { | |
608 | return FALSE; | |
609 | } | |
610 | if(p2 != 0 && p3 == 0 && p4 != 0) { | |
611 | return FALSE; | |
612 | } | |
613 | // Minimum & maximum lead bytes. | |
614 | if((p1 != 0 && p1 <= Collation::MERGE_SEPARATOR_BYTE) || | |
b331163b A |
615 | s1 == Collation::LEVEL_SEPARATOR_BYTE || |
616 | t1 == Collation::LEVEL_SEPARATOR_BYTE || t1 > 0x3f) { | |
57a6839d A |
617 | return FALSE; |
618 | } | |
619 | if(c > 2) { | |
620 | return FALSE; | |
621 | } | |
622 | // The valid byte range for the second primary byte depends on compressibility. | |
623 | if(p2 != 0) { | |
624 | if(data.isCompressibleLeadByte(p1)) { | |
625 | if(p2 <= Collation::PRIMARY_COMPRESSION_LOW_BYTE || | |
626 | Collation::PRIMARY_COMPRESSION_HIGH_BYTE <= p2) { | |
627 | return FALSE; | |
628 | } | |
629 | } else { | |
630 | if(p2 <= Collation::LEVEL_SEPARATOR_BYTE) { | |
631 | return FALSE; | |
632 | } | |
633 | } | |
634 | } | |
635 | // Other bytes just need to avoid the level separator. | |
636 | // Trailing zeros are ok. | |
637 | U_ASSERT(Collation::LEVEL_SEPARATOR_BYTE == 1); | |
638 | if(p3 == Collation::LEVEL_SEPARATOR_BYTE || p4 == Collation::LEVEL_SEPARATOR_BYTE || | |
639 | s2 == Collation::LEVEL_SEPARATOR_BYTE || t2 == Collation::LEVEL_SEPARATOR_BYTE) { | |
640 | return FALSE; | |
641 | } | |
642 | // Well-formed CEs. | |
643 | if(p == 0) { | |
644 | if(s == 0) { | |
645 | if(t == 0) { | |
646 | // Completely ignorable CE. | |
647 | // Quaternary CEs are not supported. | |
648 | if(c != 0 || q != 0) { | |
649 | return FALSE; | |
650 | } | |
651 | } else { | |
652 | // Tertiary CE. | |
653 | if(t < re.getTertiaryBoundary() || c != 2) { | |
654 | return FALSE; | |
655 | } | |
656 | } | |
657 | } else { | |
658 | // Secondary CE. | |
659 | if(s < re.getSecondaryBoundary() || t == 0 || t >= re.getTertiaryBoundary()) { | |
660 | return FALSE; | |
661 | } | |
662 | } | |
663 | } else { | |
664 | // Primary CE. | |
665 | if(s == 0 || (Collation::COMMON_WEIGHT16 < s && s <= re.getLastCommonSecondary()) || | |
666 | s >= re.getSecondaryBoundary()) { | |
667 | return FALSE; | |
668 | } | |
669 | if(t == 0 || t >= re.getTertiaryBoundary()) { | |
670 | return FALSE; | |
671 | } | |
672 | } | |
673 | return TRUE; | |
674 | } | |
675 | ||
676 | UBool isValidCE(const CollationRootElements &re, const CollationData &data, int64_t ce) { | |
677 | uint32_t p = (uint32_t)(ce >> 32); | |
678 | uint32_t secTer = (uint32_t)ce; | |
679 | return isValidCE(re, data, p, secTer >> 16, secTer & 0xffff); | |
680 | } | |
681 | ||
682 | class RootElementsIterator { | |
683 | public: | |
684 | RootElementsIterator(const CollationData &root) | |
685 | : data(root), | |
686 | elements(root.rootElements), length(root.rootElementsLength), | |
687 | pri(0), secTer(0), | |
688 | index((int32_t)elements[CollationRootElements::IX_FIRST_TERTIARY_INDEX]) {} | |
689 | ||
690 | UBool next() { | |
691 | if(index >= length) { return FALSE; } | |
692 | uint32_t p = elements[index]; | |
693 | if(p == CollationRootElements::PRIMARY_SENTINEL) { return FALSE; } | |
694 | if((p & CollationRootElements::SEC_TER_DELTA_FLAG) != 0) { | |
695 | ++index; | |
696 | secTer = p & ~CollationRootElements::SEC_TER_DELTA_FLAG; | |
697 | return TRUE; | |
698 | } | |
699 | if((p & CollationRootElements::PRIMARY_STEP_MASK) != 0) { | |
700 | // End of a range, enumerate the primaries in the range. | |
701 | int32_t step = (int32_t)p & CollationRootElements::PRIMARY_STEP_MASK; | |
702 | p &= 0xffffff00; | |
703 | if(pri == p) { | |
704 | // Finished the range, return the next CE after it. | |
705 | ++index; | |
706 | return next(); | |
707 | } | |
708 | U_ASSERT(pri < p); | |
709 | // Return the next primary in this range. | |
710 | UBool isCompressible = data.isCompressiblePrimary(pri); | |
711 | if((pri & 0xffff) == 0) { | |
712 | pri = Collation::incTwoBytePrimaryByOffset(pri, isCompressible, step); | |
713 | } else { | |
714 | pri = Collation::incThreeBytePrimaryByOffset(pri, isCompressible, step); | |
715 | } | |
716 | return TRUE; | |
717 | } | |
718 | // Simple primary CE. | |
719 | ++index; | |
720 | pri = p; | |
b331163b A |
721 | // Does this have an explicit below-common sec/ter unit, |
722 | // or does it imply a common one? | |
723 | if(index == length) { | |
724 | secTer = Collation::COMMON_SEC_AND_TER_CE; | |
725 | } else { | |
726 | secTer = elements[index]; | |
727 | if((secTer & CollationRootElements::SEC_TER_DELTA_FLAG) == 0) { | |
728 | // No sec/ter delta. | |
729 | secTer = Collation::COMMON_SEC_AND_TER_CE; | |
730 | } else { | |
731 | secTer &= ~CollationRootElements::SEC_TER_DELTA_FLAG; | |
732 | if(secTer > Collation::COMMON_SEC_AND_TER_CE) { | |
733 | // Implied sec/ter. | |
734 | secTer = Collation::COMMON_SEC_AND_TER_CE; | |
735 | } else { | |
736 | // Explicit sec/ter below common/common. | |
737 | ++index; | |
738 | } | |
739 | } | |
740 | } | |
57a6839d A |
741 | return TRUE; |
742 | } | |
743 | ||
744 | uint32_t getPrimary() const { return pri; } | |
745 | uint32_t getSecTer() const { return secTer; } | |
746 | ||
747 | private: | |
748 | const CollationData &data; | |
749 | const uint32_t *elements; | |
750 | int32_t length; | |
751 | ||
752 | uint32_t pri; | |
753 | uint32_t secTer; | |
754 | int32_t index; | |
755 | }; | |
756 | ||
757 | } // namespace | |
758 | ||
759 | void CollationTest::TestRootElements() { | |
760 | IcuTestErrorCode errorCode(*this, "TestRootElements"); | |
761 | const CollationData *root = CollationRoot::getData(errorCode); | |
0f5d89e8 | 762 | if(errorCode.errDataIfFailureAndReset("CollationRoot::getData()")) { |
57a6839d A |
763 | return; |
764 | } | |
765 | CollationRootElements rootElements(root->rootElements, root->rootElementsLength); | |
766 | RootElementsIterator iter(*root); | |
767 | ||
768 | // We check each root CE for validity, | |
769 | // and we also verify that there is a tailoring gap between each two CEs. | |
770 | CollationWeights cw1c; // compressible primary weights | |
771 | CollationWeights cw1u; // uncompressible primary weights | |
772 | CollationWeights cw2; | |
773 | CollationWeights cw3; | |
774 | ||
775 | cw1c.initForPrimary(TRUE); | |
776 | cw1u.initForPrimary(FALSE); | |
777 | cw2.initForSecondary(); | |
778 | cw3.initForTertiary(); | |
779 | ||
780 | // Note: The root elements do not include Han-implicit or unassigned-implicit CEs, | |
781 | // nor the special merge-separator CE for U+FFFE. | |
782 | uint32_t prevPri = 0; | |
783 | uint32_t prevSec = 0; | |
784 | uint32_t prevTer = 0; | |
785 | while(iter.next()) { | |
786 | uint32_t pri = iter.getPrimary(); | |
787 | uint32_t secTer = iter.getSecTer(); | |
788 | // CollationRootElements CEs must have 0 case and quaternary bits. | |
789 | if((secTer & Collation::CASE_AND_QUATERNARY_MASK) != 0) { | |
790 | errln("CollationRootElements CE has non-zero case and/or quaternary bits: %08lx %08lx", | |
791 | (long)pri, (long)secTer); | |
792 | } | |
793 | uint32_t sec = secTer >> 16; | |
794 | uint32_t ter = secTer & Collation::ONLY_TERTIARY_MASK; | |
795 | uint32_t ctq = ter; | |
796 | if(pri == 0 && sec == 0 && ter != 0) { | |
797 | // Tertiary CEs must have uppercase bits, | |
798 | // but they are not stored in the CollationRootElements. | |
799 | ctq |= 0x8000; | |
800 | } | |
801 | if(!isValidCE(rootElements, *root, pri, sec, ctq)) { | |
802 | errln("invalid root CE %08lx %08lx", (long)pri, (long)secTer); | |
803 | } else { | |
804 | if(pri != prevPri) { | |
805 | uint32_t newWeight = 0; | |
806 | if(prevPri == 0 || prevPri >= Collation::FFFD_PRIMARY) { | |
807 | // There is currently no tailoring gap after primary ignorables, | |
808 | // and we forbid tailoring after U+FFFD and U+FFFF. | |
809 | } else if(root->isCompressiblePrimary(prevPri)) { | |
810 | if(!cw1c.allocWeights(prevPri, pri, 1)) { | |
811 | errln("no primary/compressible tailoring gap between %08lx and %08lx", | |
812 | (long)prevPri, (long)pri); | |
813 | } else { | |
814 | newWeight = cw1c.nextWeight(); | |
815 | } | |
816 | } else { | |
817 | if(!cw1u.allocWeights(prevPri, pri, 1)) { | |
818 | errln("no primary/uncompressible tailoring gap between %08lx and %08lx", | |
819 | (long)prevPri, (long)pri); | |
820 | } else { | |
821 | newWeight = cw1u.nextWeight(); | |
822 | } | |
823 | } | |
824 | if(newWeight != 0 && !(prevPri < newWeight && newWeight < pri)) { | |
825 | errln("mis-allocated primary weight, should get %08lx < %08lx < %08lx", | |
826 | (long)prevPri, (long)newWeight, (long)pri); | |
827 | } | |
828 | } else if(sec != prevSec) { | |
829 | uint32_t lowerLimit = | |
830 | prevSec == 0 ? rootElements.getSecondaryBoundary() - 0x100 : prevSec; | |
831 | if(!cw2.allocWeights(lowerLimit, sec, 1)) { | |
832 | errln("no secondary tailoring gap between %04x and %04x", lowerLimit, sec); | |
833 | } else { | |
834 | uint32_t newWeight = cw2.nextWeight(); | |
835 | if(!(prevSec < newWeight && newWeight < sec)) { | |
836 | errln("mis-allocated secondary weight, should get %04x < %04x < %04x", | |
837 | (long)lowerLimit, (long)newWeight, (long)sec); | |
838 | } | |
839 | } | |
840 | } else if(ter != prevTer) { | |
841 | uint32_t lowerLimit = | |
842 | prevTer == 0 ? rootElements.getTertiaryBoundary() - 0x100 : prevTer; | |
843 | if(!cw3.allocWeights(lowerLimit, ter, 1)) { | |
844 | errln("no teriary tailoring gap between %04x and %04x", lowerLimit, ter); | |
845 | } else { | |
846 | uint32_t newWeight = cw3.nextWeight(); | |
847 | if(!(prevTer < newWeight && newWeight < ter)) { | |
848 | errln("mis-allocated secondary weight, should get %04x < %04x < %04x", | |
849 | (long)lowerLimit, (long)newWeight, (long)ter); | |
850 | } | |
851 | } | |
852 | } else { | |
853 | errln("duplicate root CE %08lx %08lx", (long)pri, (long)secTer); | |
854 | } | |
855 | } | |
856 | prevPri = pri; | |
857 | prevSec = sec; | |
858 | prevTer = ter; | |
859 | } | |
860 | } | |
861 | ||
862 | void CollationTest::TestTailoredElements() { | |
863 | IcuTestErrorCode errorCode(*this, "TestTailoredElements"); | |
864 | const CollationData *root = CollationRoot::getData(errorCode); | |
0f5d89e8 | 865 | if(errorCode.errDataIfFailureAndReset("CollationRoot::getData()")) { |
57a6839d A |
866 | return; |
867 | } | |
868 | CollationRootElements rootElements(root->rootElements, root->rootElementsLength); | |
869 | ||
870 | UHashtable *prevLocales = uhash_open(uhash_hashChars, uhash_compareChars, NULL, errorCode); | |
0f5d89e8 | 871 | if(errorCode.errIfFailureAndReset("failed to create a hash table")) { |
57a6839d A |
872 | return; |
873 | } | |
874 | uhash_setKeyDeleter(prevLocales, uprv_free); | |
875 | // TestRootElements() tests the root collator which does not have tailorings. | |
876 | uhash_puti(prevLocales, uprv_strdup(""), 1, errorCode); | |
877 | uhash_puti(prevLocales, uprv_strdup("root"), 1, errorCode); | |
878 | uhash_puti(prevLocales, uprv_strdup("root@collation=standard"), 1, errorCode); | |
879 | ||
880 | UVector64 ces(errorCode); | |
881 | LocalPointer<StringEnumeration> locales(Collator::getAvailableLocales()); | |
882 | U_ASSERT(locales.isValid()); | |
883 | const char *localeID = "root"; | |
884 | do { | |
885 | Locale locale(localeID); | |
886 | LocalPointer<StringEnumeration> types( | |
887 | Collator::getKeywordValuesForLocale("collation", locale, FALSE, errorCode)); | |
888 | errorCode.assertSuccess(); | |
b331163b A |
889 | const char *type; // first: default type |
890 | while((type = types->next(NULL, errorCode)) != NULL) { | |
891 | if(strncmp(type, "private-", 8) == 0) { | |
892 | errln("Collator::getKeywordValuesForLocale(%s) returns private collation keyword: %s", | |
893 | localeID, type); | |
57a6839d | 894 | } |
b331163b A |
895 | Locale localeWithType(locale); |
896 | localeWithType.setKeywordValue("collation", type, errorCode); | |
57a6839d A |
897 | errorCode.assertSuccess(); |
898 | LocalPointer<Collator> coll(Collator::createInstance(localeWithType, errorCode)); | |
0f5d89e8 | 899 | if(errorCode.errIfFailureAndReset("Collator::createInstance(%s)", |
57a6839d A |
900 | localeWithType.getName())) { |
901 | continue; | |
902 | } | |
903 | Locale actual = coll->getLocale(ULOC_ACTUAL_LOCALE, errorCode); | |
904 | if(uhash_geti(prevLocales, actual.getName()) != 0) { | |
905 | continue; | |
906 | } | |
907 | uhash_puti(prevLocales, uprv_strdup(actual.getName()), 1, errorCode); | |
908 | errorCode.assertSuccess(); | |
909 | logln("TestTailoredElements(): requested %s -> actual %s", | |
910 | localeWithType.getName(), actual.getName()); | |
911 | RuleBasedCollator *rbc = dynamic_cast<RuleBasedCollator *>(coll.getAlias()); | |
912 | if(rbc == NULL) { | |
913 | continue; | |
914 | } | |
915 | // Note: It would be better to get tailored strings such that we can | |
916 | // identify the prefix, and only get the CEs for the prefix+string, | |
917 | // not also for the prefix. | |
918 | // There is currently no API for that. | |
919 | // It would help in an unusual case where a contraction starting in the prefix | |
920 | // extends past its end, and we do not see the intended mapping. | |
921 | // For example, for a mapping p|st, if there is also a contraction ps, | |
922 | // then we get CEs(ps)+CEs(t), rather than CEs(p|st). | |
923 | LocalPointer<UnicodeSet> tailored(coll->getTailoredSet(errorCode)); | |
924 | errorCode.assertSuccess(); | |
925 | UnicodeSetIterator iter(*tailored); | |
926 | while(iter.next()) { | |
927 | const UnicodeString &s = iter.getString(); | |
928 | ces.removeAllElements(); | |
929 | rbc->internalGetCEs(s, ces, errorCode); | |
930 | errorCode.assertSuccess(); | |
931 | for(int32_t i = 0; i < ces.size(); ++i) { | |
932 | int64_t ce = ces.elementAti(i); | |
933 | if(!isValidCE(rootElements, *root, ce)) { | |
934 | errln("invalid tailored CE %016llx at CE index %d from string:", | |
935 | (long long)ce, (int)i); | |
936 | infoln(prettify(s)); | |
937 | } | |
938 | } | |
939 | } | |
b331163b | 940 | } |
57a6839d A |
941 | } while((localeID = locales->next(NULL, errorCode)) != NULL); |
942 | uhash_close(prevLocales); | |
943 | } | |
944 | ||
945 | UnicodeString CollationTest::printSortKey(const uint8_t *p, int32_t length) { | |
946 | UnicodeString s; | |
947 | for(int32_t i = 0; i < length; ++i) { | |
948 | if(i > 0) { s.append((UChar)0x20); } | |
949 | uint8_t b = p[i]; | |
950 | if(b == 0) { | |
951 | s.append((UChar)0x2e); // period | |
952 | } else if(b == 1) { | |
953 | s.append((UChar)0x7c); // vertical bar | |
954 | } else { | |
955 | appendHex(b, 2, s); | |
956 | } | |
957 | } | |
958 | return s; | |
959 | } | |
960 | ||
961 | UnicodeString CollationTest::printCollationKey(const CollationKey &key) { | |
962 | int32_t length; | |
963 | const uint8_t *p = key.getByteArray(length); | |
964 | return printSortKey(p, length); | |
965 | } | |
966 | ||
b331163b A |
967 | UBool CollationTest::readNonEmptyLine(UCHARBUF *f, IcuTestErrorCode &errorCode) { |
968 | for(;;) { | |
969 | int32_t lineLength; | |
970 | const UChar *line = ucbuf_readline(f, &lineLength, errorCode); | |
971 | if(line == NULL || errorCode.isFailure()) { | |
972 | fileLine.remove(); | |
973 | return FALSE; | |
974 | } | |
975 | ++fileLineNumber; | |
976 | // Strip trailing CR/LF, comments, and spaces. | |
977 | const UChar *comment = u_memchr(line, 0x23, lineLength); // '#' | |
978 | if(comment != NULL) { | |
979 | lineLength = (int32_t)(comment - line); | |
980 | } else { | |
981 | while(lineLength > 0 && isCROrLF(line[lineLength - 1])) { --lineLength; } | |
982 | } | |
983 | while(lineLength > 0 && isSpace(line[lineLength - 1])) { --lineLength; } | |
984 | if(lineLength != 0) { | |
985 | fileLine.setTo(FALSE, line, lineLength); | |
986 | return TRUE; | |
987 | } | |
988 | // Empty line, continue. | |
57a6839d | 989 | } |
57a6839d A |
990 | } |
991 | ||
992 | void CollationTest::parseString(int32_t &start, UnicodeString &prefix, UnicodeString &s, | |
993 | UErrorCode &errorCode) { | |
994 | int32_t length = fileLine.length(); | |
995 | int32_t i; | |
996 | for(i = start; i < length && !isSpace(fileLine[i]); ++i) {} | |
997 | int32_t pipeIndex = fileLine.indexOf((UChar)0x7c, start, i - start); // '|' | |
998 | if(pipeIndex >= 0) { | |
999 | prefix = fileLine.tempSubStringBetween(start, pipeIndex).unescape(); | |
1000 | if(prefix.isEmpty()) { | |
1001 | errln("empty prefix on line %d", (int)fileLineNumber); | |
1002 | infoln(fileLine); | |
1003 | errorCode = U_PARSE_ERROR; | |
1004 | return; | |
1005 | } | |
1006 | start = pipeIndex + 1; | |
1007 | } else { | |
1008 | prefix.remove(); | |
1009 | } | |
1010 | s = fileLine.tempSubStringBetween(start, i).unescape(); | |
1011 | if(s.isEmpty()) { | |
1012 | errln("empty string on line %d", (int)fileLineNumber); | |
1013 | infoln(fileLine); | |
1014 | errorCode = U_PARSE_ERROR; | |
1015 | return; | |
1016 | } | |
1017 | start = i; | |
1018 | } | |
1019 | ||
1020 | Collation::Level CollationTest::parseRelationAndString(UnicodeString &s, IcuTestErrorCode &errorCode) { | |
1021 | Collation::Level relation; | |
1022 | int32_t start; | |
1023 | if(fileLine[0] == 0x3c) { // < | |
1024 | UChar second = fileLine[1]; | |
1025 | start = 2; | |
1026 | switch(second) { | |
1027 | case 0x31: // <1 | |
1028 | relation = Collation::PRIMARY_LEVEL; | |
1029 | break; | |
1030 | case 0x32: // <2 | |
1031 | relation = Collation::SECONDARY_LEVEL; | |
1032 | break; | |
1033 | case 0x33: // <3 | |
1034 | relation = Collation::TERTIARY_LEVEL; | |
1035 | break; | |
1036 | case 0x34: // <4 | |
1037 | relation = Collation::QUATERNARY_LEVEL; | |
1038 | break; | |
1039 | case 0x63: // <c | |
1040 | relation = Collation::CASE_LEVEL; | |
1041 | break; | |
1042 | case 0x69: // <i | |
1043 | relation = Collation::IDENTICAL_LEVEL; | |
1044 | break; | |
1045 | default: // just < | |
1046 | relation = Collation::NO_LEVEL; | |
1047 | start = 1; | |
1048 | break; | |
1049 | } | |
1050 | } else if(fileLine[0] == 0x3d) { // = | |
1051 | relation = Collation::ZERO_LEVEL; | |
1052 | start = 1; | |
1053 | } else { | |
1054 | start = 0; | |
1055 | } | |
1056 | if(start == 0 || !isSpace(fileLine[start])) { | |
1057 | errln("no relation (= < <1 <2 <c <3 <4 <i) at beginning of line %d", (int)fileLineNumber); | |
1058 | infoln(fileLine); | |
1059 | errorCode.set(U_PARSE_ERROR); | |
1060 | return Collation::NO_LEVEL; | |
1061 | } | |
1062 | start = skipSpaces(start); | |
1063 | UnicodeString prefix; | |
1064 | parseString(start, prefix, s, errorCode); | |
1065 | if(errorCode.isSuccess() && !prefix.isEmpty()) { | |
1066 | errln("prefix string not allowed for test string: on line %d", (int)fileLineNumber); | |
1067 | infoln(fileLine); | |
1068 | errorCode.set(U_PARSE_ERROR); | |
1069 | return Collation::NO_LEVEL; | |
1070 | } | |
1071 | if(start < fileLine.length()) { | |
1072 | errln("unexpected line contents after test string on line %d", (int)fileLineNumber); | |
1073 | infoln(fileLine); | |
1074 | errorCode.set(U_PARSE_ERROR); | |
1075 | return Collation::NO_LEVEL; | |
1076 | } | |
1077 | return relation; | |
1078 | } | |
1079 | ||
1080 | static const struct { | |
1081 | const char *name; | |
1082 | UColAttribute attr; | |
1083 | } attributes[] = { | |
1084 | { "backwards", UCOL_FRENCH_COLLATION }, | |
1085 | { "alternate", UCOL_ALTERNATE_HANDLING }, | |
1086 | { "caseFirst", UCOL_CASE_FIRST }, | |
1087 | { "caseLevel", UCOL_CASE_LEVEL }, | |
1088 | // UCOL_NORMALIZATION_MODE is turned on and off automatically. | |
1089 | { "strength", UCOL_STRENGTH }, | |
1090 | // UCOL_HIRAGANA_QUATERNARY_MODE is deprecated. | |
1091 | { "numeric", UCOL_NUMERIC_COLLATION } | |
1092 | }; | |
1093 | ||
1094 | static const struct { | |
1095 | const char *name; | |
1096 | UColAttributeValue value; | |
1097 | } attributeValues[] = { | |
1098 | { "default", UCOL_DEFAULT }, | |
1099 | { "primary", UCOL_PRIMARY }, | |
1100 | { "secondary", UCOL_SECONDARY }, | |
1101 | { "tertiary", UCOL_TERTIARY }, | |
1102 | { "quaternary", UCOL_QUATERNARY }, | |
1103 | { "identical", UCOL_IDENTICAL }, | |
1104 | { "off", UCOL_OFF }, | |
1105 | { "on", UCOL_ON }, | |
1106 | { "shifted", UCOL_SHIFTED }, | |
1107 | { "non-ignorable", UCOL_NON_IGNORABLE }, | |
1108 | { "lower", UCOL_LOWER_FIRST }, | |
1109 | { "upper", UCOL_UPPER_FIRST } | |
1110 | }; | |
1111 | ||
1112 | void CollationTest::parseAndSetAttribute(IcuTestErrorCode &errorCode) { | |
b331163b A |
1113 | // Parse attributes even if the Collator could not be created, |
1114 | // in order to report syntax errors. | |
57a6839d | 1115 | int32_t start = skipSpaces(1); |
f3c0d7a5 | 1116 | int32_t equalPos = fileLine.indexOf((UChar)0x3d); |
57a6839d A |
1117 | if(equalPos < 0) { |
1118 | if(fileLine.compare(start, 7, UNICODE_STRING("reorder", 7)) == 0) { | |
1119 | parseAndSetReorderCodes(start + 7, errorCode); | |
1120 | return; | |
1121 | } | |
1122 | errln("missing '=' on line %d", (int)fileLineNumber); | |
1123 | infoln(fileLine); | |
1124 | errorCode.set(U_PARSE_ERROR); | |
1125 | return; | |
1126 | } | |
1127 | ||
1128 | UnicodeString attrString = fileLine.tempSubStringBetween(start, equalPos); | |
1129 | UnicodeString valueString = fileLine.tempSubString(equalPos+1); | |
1130 | if(attrString == UNICODE_STRING("maxVariable", 11)) { | |
1131 | UColReorderCode max; | |
1132 | if(valueString == UNICODE_STRING("space", 5)) { | |
1133 | max = UCOL_REORDER_CODE_SPACE; | |
1134 | } else if(valueString == UNICODE_STRING("punct", 5)) { | |
1135 | max = UCOL_REORDER_CODE_PUNCTUATION; | |
1136 | } else if(valueString == UNICODE_STRING("symbol", 6)) { | |
1137 | max = UCOL_REORDER_CODE_SYMBOL; | |
1138 | } else if(valueString == UNICODE_STRING("currency", 8)) { | |
1139 | max = UCOL_REORDER_CODE_CURRENCY; | |
1140 | } else { | |
1141 | errln("invalid attribute value name on line %d", (int)fileLineNumber); | |
1142 | infoln(fileLine); | |
1143 | errorCode.set(U_PARSE_ERROR); | |
1144 | return; | |
1145 | } | |
b331163b A |
1146 | if(coll != NULL) { |
1147 | coll->setMaxVariable(max, errorCode); | |
1148 | if(errorCode.isFailure()) { | |
1149 | errln("setMaxVariable() failed on line %d: %s", | |
1150 | (int)fileLineNumber, errorCode.errorName()); | |
1151 | infoln(fileLine); | |
1152 | return; | |
1153 | } | |
57a6839d A |
1154 | } |
1155 | fileLine.remove(); | |
1156 | return; | |
1157 | } | |
1158 | ||
1159 | UColAttribute attr; | |
1160 | for(int32_t i = 0;; ++i) { | |
b331163b | 1161 | if(i == UPRV_LENGTHOF(attributes)) { |
57a6839d A |
1162 | errln("invalid attribute name on line %d", (int)fileLineNumber); |
1163 | infoln(fileLine); | |
1164 | errorCode.set(U_PARSE_ERROR); | |
1165 | return; | |
1166 | } | |
1167 | if(attrString == UnicodeString(attributes[i].name, -1, US_INV)) { | |
1168 | attr = attributes[i].attr; | |
1169 | break; | |
1170 | } | |
1171 | } | |
1172 | ||
1173 | UColAttributeValue value; | |
1174 | for(int32_t i = 0;; ++i) { | |
b331163b | 1175 | if(i == UPRV_LENGTHOF(attributeValues)) { |
57a6839d A |
1176 | errln("invalid attribute value name on line %d", (int)fileLineNumber); |
1177 | infoln(fileLine); | |
1178 | errorCode.set(U_PARSE_ERROR); | |
1179 | return; | |
1180 | } | |
1181 | if(valueString == UnicodeString(attributeValues[i].name, -1, US_INV)) { | |
1182 | value = attributeValues[i].value; | |
1183 | break; | |
1184 | } | |
1185 | } | |
1186 | ||
b331163b A |
1187 | if(coll != NULL) { |
1188 | coll->setAttribute(attr, value, errorCode); | |
1189 | if(errorCode.isFailure()) { | |
1190 | errln("illegal attribute=value combination on line %d: %s", | |
1191 | (int)fileLineNumber, errorCode.errorName()); | |
1192 | infoln(fileLine); | |
1193 | return; | |
1194 | } | |
57a6839d A |
1195 | } |
1196 | fileLine.remove(); | |
1197 | } | |
1198 | ||
1199 | void CollationTest::parseAndSetReorderCodes(int32_t start, IcuTestErrorCode &errorCode) { | |
1200 | UVector32 reorderCodes(errorCode); | |
1201 | while(start < fileLine.length()) { | |
1202 | start = skipSpaces(start); | |
1203 | int32_t limit = start; | |
1204 | while(limit < fileLine.length() && !isSpace(fileLine[limit])) { ++limit; } | |
1205 | CharString name; | |
1206 | name.appendInvariantChars(fileLine.tempSubStringBetween(start, limit), errorCode); | |
1207 | int32_t code = CollationRuleParser::getReorderCode(name.data()); | |
b331163b A |
1208 | if(code < 0) { |
1209 | if(uprv_stricmp(name.data(), "default") == 0) { | |
1210 | code = UCOL_REORDER_CODE_DEFAULT; // -1 | |
1211 | } else { | |
1212 | errln("invalid reorder code '%s' on line %d", name.data(), (int)fileLineNumber); | |
1213 | infoln(fileLine); | |
1214 | errorCode.set(U_PARSE_ERROR); | |
1215 | return; | |
1216 | } | |
57a6839d A |
1217 | } |
1218 | reorderCodes.addElement(code, errorCode); | |
1219 | start = limit; | |
1220 | } | |
b331163b A |
1221 | if(coll != NULL) { |
1222 | coll->setReorderCodes(reorderCodes.getBuffer(), reorderCodes.size(), errorCode); | |
1223 | if(errorCode.isFailure()) { | |
1224 | errln("setReorderCodes() failed on line %d: %s", | |
1225 | (int)fileLineNumber, errorCode.errorName()); | |
1226 | infoln(fileLine); | |
1227 | return; | |
1228 | } | |
57a6839d A |
1229 | } |
1230 | fileLine.remove(); | |
1231 | } | |
1232 | ||
1233 | void CollationTest::buildTailoring(UCHARBUF *f, IcuTestErrorCode &errorCode) { | |
1234 | UnicodeString rules; | |
b331163b | 1235 | while(readNonEmptyLine(f, errorCode) && !isSectionStarter(fileLine[0])) { |
57a6839d A |
1236 | rules.append(fileLine.unescape()); |
1237 | } | |
1238 | if(errorCode.isFailure()) { return; } | |
1239 | logln(rules); | |
1240 | ||
1241 | UParseError parseError; | |
1242 | UnicodeString reason; | |
1243 | delete coll; | |
1244 | coll = new RuleBasedCollator(rules, parseError, reason, errorCode); | |
1245 | if(coll == NULL) { | |
1246 | errln("unable to allocate a new collator"); | |
1247 | errorCode.set(U_MEMORY_ALLOCATION_ERROR); | |
1248 | return; | |
1249 | } | |
1250 | if(errorCode.isFailure()) { | |
b331163b | 1251 | dataerrln("RuleBasedCollator(rules) failed - %s", errorCode.errorName()); |
57a6839d A |
1252 | infoln(UnicodeString(" reason: ") + reason); |
1253 | if(parseError.offset >= 0) { infoln(" rules offset: %d", (int)parseError.offset); } | |
1254 | if(parseError.preContext[0] != 0 || parseError.postContext[0] != 0) { | |
1255 | infoln(UnicodeString(" snippet: ...") + | |
1256 | parseError.preContext + "(!)" + parseError.postContext + "..."); | |
1257 | } | |
b331163b A |
1258 | delete coll; |
1259 | coll = NULL; | |
1260 | errorCode.reset(); | |
57a6839d A |
1261 | } else { |
1262 | assertEquals("no error reason when RuleBasedCollator(rules) succeeds", | |
1263 | UnicodeString(), reason); | |
1264 | } | |
1265 | } | |
1266 | ||
1267 | void CollationTest::setRootCollator(IcuTestErrorCode &errorCode) { | |
1268 | if(errorCode.isFailure()) { return; } | |
1269 | delete coll; | |
1270 | coll = Collator::createInstance(Locale::getRoot(), errorCode); | |
1271 | if(errorCode.isFailure()) { | |
1272 | dataerrln("unable to create a root collator"); | |
1273 | return; | |
1274 | } | |
1275 | } | |
1276 | ||
1277 | void CollationTest::setLocaleCollator(IcuTestErrorCode &errorCode) { | |
1278 | if(errorCode.isFailure()) { return; } | |
b331163b A |
1279 | delete coll; |
1280 | coll = NULL; | |
1281 | int32_t at = fileLine.indexOf((UChar)0x40, 9); // @ is not invariant | |
1282 | if(at >= 0) { | |
1283 | fileLine.setCharAt(at, (UChar)0x2a); // * | |
1284 | } | |
1285 | CharString localeID; | |
1286 | localeID.appendInvariantChars(fileLine.tempSubString(9), errorCode); | |
1287 | if(at >= 0) { | |
1288 | localeID.data()[at - 9] = '@'; | |
1289 | } | |
1290 | Locale locale(localeID.data()); | |
1291 | if(fileLine.length() == 9 || errorCode.isFailure() || locale.isBogus()) { | |
57a6839d A |
1292 | errln("invalid language tag on line %d", (int)fileLineNumber); |
1293 | infoln(fileLine); | |
1294 | if(errorCode.isSuccess()) { errorCode.set(U_PARSE_ERROR); } | |
1295 | return; | |
1296 | } | |
1297 | ||
1298 | logln("creating a collator for locale ID %s", locale.getName()); | |
b331163b | 1299 | coll = Collator::createInstance(locale, errorCode); |
57a6839d A |
1300 | if(errorCode.isFailure()) { |
1301 | dataerrln("unable to create a collator for locale %s on line %d", | |
1302 | locale.getName(), (int)fileLineNumber); | |
1303 | infoln(fileLine); | |
b331163b A |
1304 | delete coll; |
1305 | coll = NULL; | |
1306 | errorCode.reset(); | |
57a6839d | 1307 | } |
57a6839d A |
1308 | } |
1309 | ||
1310 | UBool CollationTest::needsNormalization(const UnicodeString &s, UErrorCode &errorCode) const { | |
1311 | if(U_FAILURE(errorCode) || !fcd->isNormalized(s, errorCode)) { return TRUE; } | |
1312 | // In some sequences with Tibetan composite vowel signs, | |
1313 | // even if the string passes the FCD check, | |
1314 | // those composites must be decomposed. | |
1315 | // Check if s contains 0F71 immediately followed by 0F73 or 0F75 or 0F81. | |
1316 | int32_t index = 0; | |
1317 | while((index = s.indexOf((UChar)0xf71, index)) >= 0) { | |
1318 | if(++index < s.length()) { | |
1319 | UChar c = s[index]; | |
1320 | if(c == 0xf73 || c == 0xf75 || c == 0xf81) { return TRUE; } | |
1321 | } | |
1322 | } | |
1323 | return FALSE; | |
1324 | } | |
1325 | ||
1326 | UBool CollationTest::getSortKeyParts(const UChar *s, int32_t length, | |
1327 | CharString &dest, int32_t partSize, | |
1328 | IcuTestErrorCode &errorCode) { | |
1329 | if(errorCode.isFailure()) { return FALSE; } | |
1330 | uint8_t part[32]; | |
b331163b | 1331 | U_ASSERT(partSize <= UPRV_LENGTHOF(part)); |
57a6839d A |
1332 | UCharIterator iter; |
1333 | uiter_setString(&iter, s, length); | |
1334 | uint32_t state[2] = { 0, 0 }; | |
1335 | for(;;) { | |
1336 | int32_t partLength = coll->internalNextSortKeyPart(&iter, state, part, partSize, errorCode); | |
1337 | UBool done = partLength < partSize; | |
1338 | if(done) { | |
1339 | // At the end, append the next byte as well which should be 00. | |
1340 | ++partLength; | |
1341 | } | |
1342 | dest.append(reinterpret_cast<char *>(part), partLength, errorCode); | |
1343 | if(done) { | |
1344 | return errorCode.isSuccess(); | |
1345 | } | |
1346 | } | |
1347 | } | |
1348 | ||
1349 | UBool CollationTest::getCollationKey(const char *norm, const UnicodeString &line, | |
1350 | const UChar *s, int32_t length, | |
1351 | CollationKey &key, IcuTestErrorCode &errorCode) { | |
1352 | if(errorCode.isFailure()) { return FALSE; } | |
1353 | coll->getCollationKey(s, length, key, errorCode); | |
1354 | if(errorCode.isFailure()) { | |
1355 | infoln(fileTestName); | |
1356 | errln("Collator(%s).getCollationKey() failed: %s", | |
1357 | norm, errorCode.errorName()); | |
1358 | infoln(line); | |
1359 | return FALSE; | |
1360 | } | |
1361 | int32_t keyLength; | |
1362 | const uint8_t *keyBytes = key.getByteArray(keyLength); | |
1363 | if(keyLength == 0 || keyBytes[keyLength - 1] != 0) { | |
1364 | infoln(fileTestName); | |
1365 | errln("Collator(%s).getCollationKey() wrote an empty or unterminated key", | |
1366 | norm); | |
1367 | infoln(line); | |
1368 | infoln(printCollationKey(key)); | |
1369 | return FALSE; | |
1370 | } | |
1371 | ||
1372 | int32_t numLevels = coll->getAttribute(UCOL_STRENGTH, errorCode); | |
1373 | if(numLevels < UCOL_IDENTICAL) { | |
1374 | ++numLevels; | |
1375 | } else { | |
1376 | numLevels = 5; | |
1377 | } | |
1378 | if(coll->getAttribute(UCOL_CASE_LEVEL, errorCode) == UCOL_ON) { | |
1379 | ++numLevels; | |
1380 | } | |
1381 | errorCode.assertSuccess(); | |
1382 | int32_t numLevelSeparators = 0; | |
1383 | for(int32_t i = 0; i < (keyLength - 1); ++i) { | |
1384 | uint8_t b = keyBytes[i]; | |
1385 | if(b == 0) { | |
1386 | infoln(fileTestName); | |
1387 | errln("Collator(%s).getCollationKey() contains a 00 byte", norm); | |
1388 | infoln(line); | |
1389 | infoln(printCollationKey(key)); | |
1390 | return FALSE; | |
1391 | } | |
1392 | if(b == 1) { ++numLevelSeparators; } | |
1393 | } | |
1394 | if(numLevelSeparators != (numLevels - 1)) { | |
1395 | infoln(fileTestName); | |
1396 | errln("Collator(%s).getCollationKey() has %d level separators for %d levels", | |
1397 | norm, (int)numLevelSeparators, (int)numLevels); | |
1398 | infoln(line); | |
1399 | infoln(printCollationKey(key)); | |
1400 | return FALSE; | |
1401 | } | |
1402 | ||
b331163b A |
1403 | // Check that internalNextSortKeyPart() makes the same key, with several part sizes. |
1404 | static const int32_t partSizes[] = { 32, 3, 1 }; | |
1405 | for(int32_t psi = 0; psi < UPRV_LENGTHOF(partSizes); ++psi) { | |
1406 | int32_t partSize = partSizes[psi]; | |
1407 | CharString parts; | |
1408 | if(!getSortKeyParts(s, length, parts, 32, errorCode)) { | |
1409 | infoln(fileTestName); | |
1410 | errln("Collator(%s).internalNextSortKeyPart(%d) failed: %s", | |
1411 | norm, (int)partSize, errorCode.errorName()); | |
1412 | infoln(line); | |
1413 | return FALSE; | |
1414 | } | |
1415 | if(keyLength != parts.length() || uprv_memcmp(keyBytes, parts.data(), keyLength) != 0) { | |
1416 | infoln(fileTestName); | |
1417 | errln("Collator(%s).getCollationKey() != internalNextSortKeyPart(%d)", | |
1418 | norm, (int)partSize); | |
1419 | infoln(line); | |
1420 | infoln(printCollationKey(key)); | |
1421 | infoln(printSortKey(reinterpret_cast<uint8_t *>(parts.data()), parts.length())); | |
1422 | return FALSE; | |
1423 | } | |
1424 | } | |
1425 | return TRUE; | |
1426 | } | |
1427 | ||
1428 | /** | |
1429 | * Changes the key to the merged segments of the U+FFFE-separated substrings of s. | |
1430 | * Leaves key unchanged if s does not contain U+FFFE. | |
1431 | * @return TRUE if the key was successfully changed | |
1432 | */ | |
1433 | UBool CollationTest::getMergedCollationKey(const UChar *s, int32_t length, | |
1434 | CollationKey &key, IcuTestErrorCode &errorCode) { | |
1435 | if(errorCode.isFailure()) { return FALSE; } | |
57a6839d A |
1436 | LocalMemory<uint8_t> mergedKey; |
1437 | int32_t mergedKeyLength = 0; | |
1438 | int32_t mergedKeyCapacity = 0; | |
1439 | int32_t sLength = (length >= 0) ? length : u_strlen(s); | |
1440 | int32_t segmentStart = 0; | |
1441 | for(int32_t i = 0;;) { | |
1442 | if(i == sLength) { | |
1443 | if(segmentStart == 0) { | |
1444 | // s does not contain any U+FFFE. | |
b331163b | 1445 | return FALSE; |
57a6839d A |
1446 | } |
1447 | } else if(s[i] != 0xfffe) { | |
1448 | ++i; | |
1449 | continue; | |
1450 | } | |
1451 | // Get the sort key for another segment and merge it into mergedKey. | |
1452 | CollationKey key1(mergedKey.getAlias(), mergedKeyLength); // copies the bytes | |
1453 | CollationKey key2; | |
1454 | coll->getCollationKey(s + segmentStart, i - segmentStart, key2, errorCode); | |
1455 | int32_t key1Length, key2Length; | |
1456 | const uint8_t *key1Bytes = key1.getByteArray(key1Length); | |
1457 | const uint8_t *key2Bytes = key2.getByteArray(key2Length); | |
1458 | uint8_t *dest; | |
1459 | int32_t minCapacity = key1Length + key2Length; | |
1460 | if(key1Length > 0) { --minCapacity; } | |
1461 | if(minCapacity <= mergedKeyCapacity) { | |
1462 | dest = mergedKey.getAlias(); | |
1463 | } else { | |
1464 | if(minCapacity <= 200) { | |
1465 | mergedKeyCapacity = 200; | |
1466 | } else if(minCapacity <= 2 * mergedKeyCapacity) { | |
1467 | mergedKeyCapacity *= 2; | |
1468 | } else { | |
1469 | mergedKeyCapacity = minCapacity; | |
1470 | } | |
1471 | dest = mergedKey.allocateInsteadAndReset(mergedKeyCapacity); | |
1472 | } | |
1473 | U_ASSERT(dest != NULL || mergedKeyCapacity == 0); | |
1474 | if(key1Length == 0) { | |
1475 | // key2 is the sort key for the first segment. | |
1476 | uprv_memcpy(dest, key2Bytes, key2Length); | |
1477 | mergedKeyLength = key2Length; | |
1478 | } else { | |
1479 | mergedKeyLength = | |
1480 | ucol_mergeSortkeys(key1Bytes, key1Length, key2Bytes, key2Length, | |
1481 | dest, mergedKeyCapacity); | |
1482 | } | |
1483 | if(i == sLength) { break; } | |
1484 | segmentStart = ++i; | |
1485 | } | |
b331163b | 1486 | key = CollationKey(mergedKey.getAlias(), mergedKeyLength); |
57a6839d A |
1487 | return TRUE; |
1488 | } | |
1489 | ||
1490 | namespace { | |
1491 | ||
1492 | /** | |
1493 | * Replaces unpaired surrogates with U+FFFD. | |
1494 | * Returns s if no replacement was made, otherwise buffer. | |
1495 | */ | |
1496 | const UnicodeString &surrogatesToFFFD(const UnicodeString &s, UnicodeString &buffer) { | |
1497 | int32_t i = 0; | |
1498 | while(i < s.length()) { | |
1499 | UChar32 c = s.char32At(i); | |
1500 | if(U_IS_SURROGATE(c)) { | |
1501 | if(buffer.length() < i) { | |
1502 | buffer.append(s, buffer.length(), i - buffer.length()); | |
1503 | } | |
1504 | buffer.append((UChar)0xfffd); | |
1505 | } | |
1506 | i += U16_LENGTH(c); | |
1507 | } | |
1508 | if(buffer.isEmpty()) { | |
1509 | return s; | |
1510 | } | |
1511 | if(buffer.length() < i) { | |
1512 | buffer.append(s, buffer.length(), i - buffer.length()); | |
1513 | } | |
1514 | return buffer; | |
1515 | } | |
1516 | ||
b331163b A |
1517 | int32_t getDifferenceLevel(const CollationKey &prevKey, const CollationKey &key, |
1518 | UCollationResult order, UBool collHasCaseLevel) { | |
1519 | if(order == UCOL_EQUAL) { | |
1520 | return Collation::NO_LEVEL; | |
1521 | } | |
1522 | int32_t prevKeyLength; | |
1523 | const uint8_t *prevBytes = prevKey.getByteArray(prevKeyLength); | |
1524 | int32_t keyLength; | |
1525 | const uint8_t *bytes = key.getByteArray(keyLength); | |
1526 | int32_t level = Collation::PRIMARY_LEVEL; | |
1527 | for(int32_t i = 0;; ++i) { | |
1528 | uint8_t b = prevBytes[i]; | |
1529 | if(b != bytes[i]) { break; } | |
1530 | if(b == Collation::LEVEL_SEPARATOR_BYTE) { | |
1531 | ++level; | |
1532 | if(level == Collation::CASE_LEVEL && !collHasCaseLevel) { | |
1533 | ++level; | |
1534 | } | |
1535 | } | |
1536 | } | |
1537 | return level; | |
1538 | } | |
1539 | ||
57a6839d A |
1540 | } |
1541 | ||
1542 | UBool CollationTest::checkCompareTwo(const char *norm, const UnicodeString &prevFileLine, | |
1543 | const UnicodeString &prevString, const UnicodeString &s, | |
1544 | UCollationResult expectedOrder, Collation::Level expectedLevel, | |
1545 | IcuTestErrorCode &errorCode) { | |
1546 | if(errorCode.isFailure()) { return FALSE; } | |
1547 | ||
1548 | // Get the sort keys first, for error debug output. | |
1549 | CollationKey prevKey; | |
1550 | if(!getCollationKey(norm, prevFileLine, prevString.getBuffer(), prevString.length(), | |
1551 | prevKey, errorCode)) { | |
1552 | return FALSE; | |
1553 | } | |
1554 | CollationKey key; | |
1555 | if(!getCollationKey(norm, fileLine, s.getBuffer(), s.length(), key, errorCode)) { return FALSE; } | |
1556 | ||
1557 | UCollationResult order = coll->compare(prevString, s, errorCode); | |
1558 | if(order != expectedOrder || errorCode.isFailure()) { | |
1559 | infoln(fileTestName); | |
1560 | errln("line %d Collator(%s).compare(previous, current) wrong order: %d != %d (%s)", | |
1561 | (int)fileLineNumber, norm, order, expectedOrder, errorCode.errorName()); | |
1562 | infoln(prevFileLine); | |
1563 | infoln(fileLine); | |
1564 | infoln(printCollationKey(prevKey)); | |
1565 | infoln(printCollationKey(key)); | |
1566 | return FALSE; | |
1567 | } | |
1568 | order = coll->compare(s, prevString, errorCode); | |
1569 | if(order != -expectedOrder || errorCode.isFailure()) { | |
1570 | infoln(fileTestName); | |
1571 | errln("line %d Collator(%s).compare(current, previous) wrong order: %d != %d (%s)", | |
1572 | (int)fileLineNumber, norm, order, -expectedOrder, errorCode.errorName()); | |
1573 | infoln(prevFileLine); | |
1574 | infoln(fileLine); | |
1575 | infoln(printCollationKey(prevKey)); | |
1576 | infoln(printCollationKey(key)); | |
1577 | return FALSE; | |
1578 | } | |
1579 | // Test NUL-termination if the strings do not contain NUL characters. | |
1580 | UBool containNUL = prevString.indexOf((UChar)0) >= 0 || s.indexOf((UChar)0) >= 0; | |
1581 | if(!containNUL) { | |
1582 | order = coll->compare(prevString.getBuffer(), -1, s.getBuffer(), -1, errorCode); | |
1583 | if(order != expectedOrder || errorCode.isFailure()) { | |
1584 | infoln(fileTestName); | |
1585 | errln("line %d Collator(%s).compare(previous-NUL, current-NUL) wrong order: %d != %d (%s)", | |
1586 | (int)fileLineNumber, norm, order, expectedOrder, errorCode.errorName()); | |
1587 | infoln(prevFileLine); | |
1588 | infoln(fileLine); | |
1589 | infoln(printCollationKey(prevKey)); | |
1590 | infoln(printCollationKey(key)); | |
1591 | return FALSE; | |
1592 | } | |
1593 | order = coll->compare(s.getBuffer(), -1, prevString.getBuffer(), -1, errorCode); | |
1594 | if(order != -expectedOrder || errorCode.isFailure()) { | |
1595 | infoln(fileTestName); | |
1596 | errln("line %d Collator(%s).compare(current-NUL, previous-NUL) wrong order: %d != %d (%s)", | |
1597 | (int)fileLineNumber, norm, order, -expectedOrder, errorCode.errorName()); | |
1598 | infoln(prevFileLine); | |
1599 | infoln(fileLine); | |
1600 | infoln(printCollationKey(prevKey)); | |
1601 | infoln(printCollationKey(key)); | |
1602 | return FALSE; | |
1603 | } | |
1604 | } | |
1605 | ||
57a6839d A |
1606 | // compare(UTF-16) treats unpaired surrogates like unassigned code points. |
1607 | // Unpaired surrogates cannot be converted to UTF-8. | |
1608 | // Create valid UTF-16 strings if necessary, and use those for | |
1609 | // both the expected compare() result and for the input to compare(UTF-8). | |
1610 | UnicodeString prevBuffer, sBuffer; | |
1611 | const UnicodeString &prevValid = surrogatesToFFFD(prevString, prevBuffer); | |
1612 | const UnicodeString &sValid = surrogatesToFFFD(s, sBuffer); | |
1613 | std::string prevUTF8, sUTF8; | |
1614 | UnicodeString(prevValid).toUTF8String(prevUTF8); | |
1615 | UnicodeString(sValid).toUTF8String(sUTF8); | |
1616 | UCollationResult expectedUTF8Order; | |
1617 | if(&prevValid == &prevString && &sValid == &s) { | |
1618 | expectedUTF8Order = expectedOrder; | |
1619 | } else { | |
1620 | expectedUTF8Order = coll->compare(prevValid, sValid, errorCode); | |
1621 | } | |
1622 | ||
1623 | order = coll->compareUTF8(prevUTF8, sUTF8, errorCode); | |
1624 | if(order != expectedUTF8Order || errorCode.isFailure()) { | |
1625 | infoln(fileTestName); | |
1626 | errln("line %d Collator(%s).compareUTF8(previous, current) wrong order: %d != %d (%s)", | |
1627 | (int)fileLineNumber, norm, order, expectedUTF8Order, errorCode.errorName()); | |
1628 | infoln(prevFileLine); | |
1629 | infoln(fileLine); | |
1630 | infoln(printCollationKey(prevKey)); | |
1631 | infoln(printCollationKey(key)); | |
1632 | return FALSE; | |
1633 | } | |
1634 | order = coll->compareUTF8(sUTF8, prevUTF8, errorCode); | |
1635 | if(order != -expectedUTF8Order || errorCode.isFailure()) { | |
1636 | infoln(fileTestName); | |
1637 | errln("line %d Collator(%s).compareUTF8(current, previous) wrong order: %d != %d (%s)", | |
1638 | (int)fileLineNumber, norm, order, -expectedUTF8Order, errorCode.errorName()); | |
1639 | infoln(prevFileLine); | |
1640 | infoln(fileLine); | |
1641 | infoln(printCollationKey(prevKey)); | |
1642 | infoln(printCollationKey(key)); | |
1643 | return FALSE; | |
1644 | } | |
1645 | // Test NUL-termination if the strings do not contain NUL characters. | |
1646 | if(!containNUL) { | |
1647 | order = coll->internalCompareUTF8(prevUTF8.c_str(), -1, sUTF8.c_str(), -1, errorCode); | |
1648 | if(order != expectedUTF8Order || errorCode.isFailure()) { | |
1649 | infoln(fileTestName); | |
1650 | errln("line %d Collator(%s).internalCompareUTF8(previous-NUL, current-NUL) wrong order: %d != %d (%s)", | |
1651 | (int)fileLineNumber, norm, order, expectedUTF8Order, errorCode.errorName()); | |
1652 | infoln(prevFileLine); | |
1653 | infoln(fileLine); | |
1654 | infoln(printCollationKey(prevKey)); | |
1655 | infoln(printCollationKey(key)); | |
1656 | return FALSE; | |
1657 | } | |
1658 | order = coll->internalCompareUTF8(sUTF8.c_str(), -1, prevUTF8.c_str(), -1, errorCode); | |
1659 | if(order != -expectedUTF8Order || errorCode.isFailure()) { | |
1660 | infoln(fileTestName); | |
1661 | errln("line %d Collator(%s).internalCompareUTF8(current-NUL, previous-NUL) wrong order: %d != %d (%s)", | |
1662 | (int)fileLineNumber, norm, order, -expectedUTF8Order, errorCode.errorName()); | |
1663 | infoln(prevFileLine); | |
1664 | infoln(fileLine); | |
1665 | infoln(printCollationKey(prevKey)); | |
1666 | infoln(printCollationKey(key)); | |
1667 | return FALSE; | |
1668 | } | |
1669 | } | |
57a6839d A |
1670 | |
1671 | UCharIterator leftIter; | |
1672 | UCharIterator rightIter; | |
1673 | uiter_setString(&leftIter, prevString.getBuffer(), prevString.length()); | |
1674 | uiter_setString(&rightIter, s.getBuffer(), s.length()); | |
1675 | order = coll->compare(leftIter, rightIter, errorCode); | |
1676 | if(order != expectedOrder || errorCode.isFailure()) { | |
1677 | infoln(fileTestName); | |
1678 | errln("line %d Collator(%s).compare(UCharIterator: previous, current) " | |
1679 | "wrong order: %d != %d (%s)", | |
1680 | (int)fileLineNumber, norm, order, expectedOrder, errorCode.errorName()); | |
1681 | infoln(prevFileLine); | |
1682 | infoln(fileLine); | |
1683 | infoln(printCollationKey(prevKey)); | |
1684 | infoln(printCollationKey(key)); | |
1685 | return FALSE; | |
1686 | } | |
1687 | ||
1688 | order = prevKey.compareTo(key, errorCode); | |
1689 | if(order != expectedOrder || errorCode.isFailure()) { | |
1690 | infoln(fileTestName); | |
1691 | errln("line %d Collator(%s).getCollationKey(previous, current).compareTo() wrong order: %d != %d (%s)", | |
1692 | (int)fileLineNumber, norm, order, expectedOrder, errorCode.errorName()); | |
1693 | infoln(prevFileLine); | |
1694 | infoln(fileLine); | |
1695 | infoln(printCollationKey(prevKey)); | |
1696 | infoln(printCollationKey(key)); | |
1697 | return FALSE; | |
1698 | } | |
b331163b A |
1699 | UBool collHasCaseLevel = coll->getAttribute(UCOL_CASE_LEVEL, errorCode) == UCOL_ON; |
1700 | int32_t level = getDifferenceLevel(prevKey, key, order, collHasCaseLevel); | |
57a6839d | 1701 | if(order != UCOL_EQUAL && expectedLevel != Collation::NO_LEVEL) { |
57a6839d A |
1702 | if(level != expectedLevel) { |
1703 | infoln(fileTestName); | |
1704 | errln("line %d Collator(%s).getCollationKey(previous, current).compareTo()=%d wrong level: %d != %d", | |
1705 | (int)fileLineNumber, norm, order, level, expectedLevel); | |
1706 | infoln(prevFileLine); | |
1707 | infoln(fileLine); | |
1708 | infoln(printCollationKey(prevKey)); | |
1709 | infoln(printCollationKey(key)); | |
1710 | return FALSE; | |
1711 | } | |
1712 | } | |
b331163b A |
1713 | |
1714 | // If either string contains U+FFFE, then their sort keys must compare the same as | |
1715 | // the merged sort keys of each string's between-FFFE segments. | |
1716 | // | |
1717 | // It is not required that | |
1718 | // sortkey(str1 + "\uFFFE" + str2) == mergeSortkeys(sortkey(str1), sortkey(str2)) | |
1719 | // only that those two methods yield the same order. | |
1720 | // | |
1721 | // Use bit-wise OR so that getMergedCollationKey() is always called for both strings. | |
1722 | if((getMergedCollationKey(prevString.getBuffer(), prevString.length(), prevKey, errorCode) | | |
1723 | getMergedCollationKey(s.getBuffer(), s.length(), key, errorCode)) || | |
1724 | errorCode.isFailure()) { | |
1725 | order = prevKey.compareTo(key, errorCode); | |
1726 | if(order != expectedOrder || errorCode.isFailure()) { | |
1727 | infoln(fileTestName); | |
1728 | errln("line %d ucol_mergeSortkeys(Collator(%s).getCollationKey" | |
1729 | "(previous, current segments between U+FFFE)).compareTo() wrong order: %d != %d (%s)", | |
1730 | (int)fileLineNumber, norm, order, expectedOrder, errorCode.errorName()); | |
1731 | infoln(prevFileLine); | |
1732 | infoln(fileLine); | |
1733 | infoln(printCollationKey(prevKey)); | |
1734 | infoln(printCollationKey(key)); | |
1735 | return FALSE; | |
1736 | } | |
1737 | int32_t mergedLevel = getDifferenceLevel(prevKey, key, order, collHasCaseLevel); | |
1738 | if(order != UCOL_EQUAL && expectedLevel != Collation::NO_LEVEL) { | |
1739 | if(mergedLevel != level) { | |
1740 | infoln(fileTestName); | |
1741 | errln("line %d ucol_mergeSortkeys(Collator(%s).getCollationKey" | |
1742 | "(previous, current segments between U+FFFE)).compareTo()=%d wrong level: %d != %d", | |
1743 | (int)fileLineNumber, norm, order, mergedLevel, level); | |
1744 | infoln(prevFileLine); | |
1745 | infoln(fileLine); | |
1746 | infoln(printCollationKey(prevKey)); | |
1747 | infoln(printCollationKey(key)); | |
1748 | return FALSE; | |
1749 | } | |
1750 | } | |
1751 | } | |
57a6839d A |
1752 | return TRUE; |
1753 | } | |
1754 | ||
1755 | void CollationTest::checkCompareStrings(UCHARBUF *f, IcuTestErrorCode &errorCode) { | |
1756 | if(errorCode.isFailure()) { return; } | |
1757 | UnicodeString prevFileLine = UNICODE_STRING("(none)", 6); | |
1758 | UnicodeString prevString, s; | |
1759 | prevString.getTerminatedBuffer(); // Ensure NUL-termination. | |
b331163b A |
1760 | while(readNonEmptyLine(f, errorCode) && !isSectionStarter(fileLine[0])) { |
1761 | // Parse the line even if it will be ignored (when we do not have a Collator) | |
1762 | // in order to report syntax issues. | |
57a6839d A |
1763 | Collation::Level relation = parseRelationAndString(s, errorCode); |
1764 | if(errorCode.isFailure()) { | |
1765 | errorCode.reset(); | |
1766 | break; | |
1767 | } | |
b331163b A |
1768 | if(coll == NULL) { |
1769 | // We were unable to create the Collator but continue with tests. | |
1770 | // Ignore test data for this Collator. | |
1771 | // The next Collator creation might work. | |
1772 | continue; | |
1773 | } | |
57a6839d A |
1774 | UCollationResult expectedOrder = (relation == Collation::ZERO_LEVEL) ? UCOL_EQUAL : UCOL_LESS; |
1775 | Collation::Level expectedLevel = relation; | |
1776 | s.getTerminatedBuffer(); // Ensure NUL-termination. | |
1777 | UBool isOk = TRUE; | |
1778 | if(!needsNormalization(prevString, errorCode) && !needsNormalization(s, errorCode)) { | |
1779 | coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, errorCode); | |
1780 | isOk = checkCompareTwo("normalization=on", prevFileLine, prevString, s, | |
1781 | expectedOrder, expectedLevel, errorCode); | |
1782 | } | |
1783 | if(isOk) { | |
1784 | coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, errorCode); | |
1785 | isOk = checkCompareTwo("normalization=off", prevFileLine, prevString, s, | |
1786 | expectedOrder, expectedLevel, errorCode); | |
1787 | } | |
1788 | if(isOk && (!nfd->isNormalized(prevString, errorCode) || !nfd->isNormalized(s, errorCode))) { | |
1789 | UnicodeString pn = nfd->normalize(prevString, errorCode); | |
1790 | UnicodeString n = nfd->normalize(s, errorCode); | |
1791 | pn.getTerminatedBuffer(); | |
1792 | n.getTerminatedBuffer(); | |
1793 | errorCode.assertSuccess(); | |
1794 | isOk = checkCompareTwo("NFD input", prevFileLine, pn, n, | |
1795 | expectedOrder, expectedLevel, errorCode); | |
1796 | } | |
1797 | if(!isOk) { | |
1798 | errorCode.reset(); // already reported | |
1799 | } | |
1800 | prevFileLine = fileLine; | |
1801 | prevString = s; | |
1802 | prevString.getTerminatedBuffer(); // Ensure NUL-termination. | |
1803 | } | |
1804 | } | |
1805 | ||
1806 | void CollationTest::TestDataDriven() { | |
1807 | IcuTestErrorCode errorCode(*this, "TestDataDriven"); | |
1808 | ||
1809 | fcd = Normalizer2Factory::getFCDInstance(errorCode); | |
b331163b | 1810 | nfd = Normalizer2::getNFDInstance(errorCode); |
0f5d89e8 | 1811 | if(errorCode.errDataIfFailureAndReset("Normalizer2Factory::getFCDInstance() or getNFDInstance()")) { |
57a6839d A |
1812 | return; |
1813 | } | |
1814 | ||
1815 | CharString path(getSourceTestData(errorCode), errorCode); | |
1816 | path.appendPathPart("collationtest.txt", errorCode); | |
1817 | const char *codePage = "UTF-8"; | |
1818 | LocalUCHARBUFPointer f(ucbuf_open(path.data(), &codePage, TRUE, FALSE, errorCode)); | |
0f5d89e8 | 1819 | if(errorCode.errIfFailureAndReset("ucbuf_open(collationtest.txt)")) { |
57a6839d A |
1820 | return; |
1821 | } | |
b331163b A |
1822 | // Read a new line if necessary. |
1823 | // Sub-parsers leave the first line set that they do not handle. | |
1824 | while(errorCode.isSuccess() && (!fileLine.isEmpty() || readNonEmptyLine(f.getAlias(), errorCode))) { | |
57a6839d A |
1825 | if(!isSectionStarter(fileLine[0])) { |
1826 | errln("syntax error on line %d", (int)fileLineNumber); | |
1827 | infoln(fileLine); | |
1828 | return; | |
1829 | } | |
1830 | if(fileLine.startsWith(UNICODE_STRING("** test: ", 9))) { | |
1831 | fileTestName = fileLine; | |
1832 | logln(fileLine); | |
1833 | fileLine.remove(); | |
1834 | } else if(fileLine == UNICODE_STRING("@ root", 6)) { | |
1835 | setRootCollator(errorCode); | |
1836 | fileLine.remove(); | |
1837 | } else if(fileLine.startsWith(UNICODE_STRING("@ locale ", 9))) { | |
1838 | setLocaleCollator(errorCode); | |
1839 | fileLine.remove(); | |
1840 | } else if(fileLine == UNICODE_STRING("@ rules", 7)) { | |
1841 | buildTailoring(f.getAlias(), errorCode); | |
1842 | } else if(fileLine[0] == 0x25 && isSpace(fileLine[1])) { // % | |
1843 | parseAndSetAttribute(errorCode); | |
1844 | } else if(fileLine == UNICODE_STRING("* compare", 9)) { | |
1845 | checkCompareStrings(f.getAlias(), errorCode); | |
1846 | } else { | |
1847 | errln("syntax error on line %d", (int)fileLineNumber); | |
1848 | infoln(fileLine); | |
1849 | return; | |
1850 | } | |
1851 | } | |
1852 | } | |
1853 | ||
1854 | #endif // !UCONFIG_NO_COLLATION |