]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
b75a7d8f | 3 | /******************************************************************** |
51004dcb | 4 | * COPYRIGHT: |
57a6839d | 5 | * Copyright (c) 2002-2014, International Business Machines Corporation and |
b75a7d8f A |
6 | * others. All Rights Reserved. |
7 | ********************************************************************/ | |
8 | ||
9 | /** | |
10 | * UCAConformanceTest performs conformance tests defined in the data | |
11 | * files. ICU ships with stub data files, as the whole test are too | |
12 | * long. To do the whole test, download the test files. | |
13 | */ | |
14 | ||
15 | #include "unicode/utypes.h" | |
16 | ||
17 | #if !UCONFIG_NO_COLLATION | |
18 | ||
19 | #include "ucaconf.h" | |
57a6839d A |
20 | #include "unicode/sortkey.h" |
21 | #include "unicode/tblcoll.h" | |
374ca955 | 22 | #include "unicode/ustring.h" |
51004dcb | 23 | #include "cmemory.h" |
374ca955 A |
24 | #include "cstring.h" |
25 | #include "uparse.h" | |
26 | ||
b75a7d8f A |
27 | UCAConformanceTest::UCAConformanceTest() : |
28 | rbUCA(NULL), | |
29 | testFile(NULL), | |
30 | status(U_ZERO_ERROR) | |
31 | { | |
51004dcb | 32 | UCA = (RuleBasedCollator *)Collator::createInstance(Locale::getRoot(), status); |
46f4442e | 33 | if(U_FAILURE(status)) { |
51004dcb | 34 | dataerrln("Error - UCAConformanceTest: Unable to open UCA collator! - %s", u_errorName(status)); |
46f4442e A |
35 | } |
36 | ||
37 | const char *srcDir = IntlTest::getSourceTestData(status); | |
38 | if (U_FAILURE(status)) { | |
729e4ab9 | 39 | dataerrln("Could not open test data %s", u_errorName(status)); |
46f4442e A |
40 | return; |
41 | } | |
42 | uprv_strcpy(testDataPath, srcDir); | |
43 | uprv_strcat(testDataPath, "CollationTest_"); | |
51004dcb A |
44 | |
45 | UVersionInfo uniVersion; | |
46 | static const UVersionInfo v62 = { 6, 2, 0, 0 }; | |
47 | u_getUnicodeVersion(uniVersion); | |
48 | isAtLeastUCA62 = uprv_memcmp(uniVersion, v62, 4) >= 0; | |
b75a7d8f A |
49 | } |
50 | ||
51 | UCAConformanceTest::~UCAConformanceTest() | |
52 | { | |
51004dcb A |
53 | delete UCA; |
54 | delete rbUCA; | |
55 | if (testFile) { | |
46f4442e A |
56 | fclose(testFile); |
57 | } | |
b75a7d8f A |
58 | } |
59 | ||
60 | void UCAConformanceTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par */) | |
61 | { | |
51004dcb A |
62 | if(exec) { |
63 | logln("TestSuite UCAConformanceTest: "); | |
b75a7d8f | 64 | } |
51004dcb A |
65 | TESTCASE_AUTO_BEGIN; |
66 | TESTCASE_AUTO(TestTableNonIgnorable); | |
67 | TESTCASE_AUTO(TestTableShifted); | |
68 | TESTCASE_AUTO(TestRulesNonIgnorable); | |
69 | TESTCASE_AUTO(TestRulesShifted); | |
70 | TESTCASE_AUTO_END; | |
b75a7d8f A |
71 | } |
72 | ||
b75a7d8f A |
73 | void UCAConformanceTest::initRbUCA() |
74 | { | |
46f4442e | 75 | if(!rbUCA) { |
51004dcb | 76 | if (UCA) { |
57a6839d | 77 | UnicodeString ucarules; |
51004dcb A |
78 | UCA->getRules(UCOL_FULL_RULES, ucarules); |
79 | rbUCA = new RuleBasedCollator(ucarules, status); | |
80 | if (U_FAILURE(status)) { | |
81 | dataerrln("Failure creating UCA rule-based collator: %s", u_errorName(status)); | |
82 | return; | |
83 | } | |
84 | } else { | |
85 | dataerrln("Failure creating UCA rule-based collator: %s", u_errorName(status)); | |
46f4442e A |
86 | return; |
87 | } | |
b75a7d8f | 88 | } |
b75a7d8f A |
89 | } |
90 | ||
51004dcb | 91 | void UCAConformanceTest::setCollNonIgnorable(Collator *coll) |
b75a7d8f | 92 | { |
51004dcb A |
93 | coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); |
94 | coll->setAttribute(UCOL_CASE_FIRST, UCOL_OFF, status); | |
95 | coll->setAttribute(UCOL_CASE_LEVEL, UCOL_OFF, status); | |
96 | coll->setAttribute(UCOL_STRENGTH, isAtLeastUCA62 ? UCOL_IDENTICAL : UCOL_TERTIARY, status); | |
97 | coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, status); | |
b75a7d8f A |
98 | } |
99 | ||
51004dcb | 100 | void UCAConformanceTest::setCollShifted(Collator *coll) |
b75a7d8f | 101 | { |
51004dcb A |
102 | coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); |
103 | coll->setAttribute(UCOL_CASE_FIRST, UCOL_OFF, status); | |
104 | coll->setAttribute(UCOL_CASE_LEVEL, UCOL_OFF, status); | |
105 | coll->setAttribute(UCOL_STRENGTH, isAtLeastUCA62 ? UCOL_IDENTICAL : UCOL_QUATERNARY, status); | |
106 | coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, status); | |
b75a7d8f A |
107 | } |
108 | ||
109 | void UCAConformanceTest::openTestFile(const char *type) | |
110 | { | |
46f4442e A |
111 | const char *ext = ".txt"; |
112 | if(testFile) { | |
113 | fclose(testFile); | |
114 | } | |
115 | char buffer[1024]; | |
116 | uprv_strcpy(buffer, testDataPath); | |
117 | uprv_strcat(buffer, type); | |
118 | int32_t bufLen = (int32_t)uprv_strlen(buffer); | |
119 | ||
120 | // we try to open 3 files: | |
121 | // path/CollationTest_type.txt | |
122 | // path/CollationTest_type_SHORT.txt | |
123 | // path/CollationTest_type_STUB.txt | |
124 | // we are going to test with the first one that we manage to open. | |
125 | ||
126 | uprv_strcpy(buffer+bufLen, ext); | |
127 | ||
b75a7d8f A |
128 | testFile = fopen(buffer, "rb"); |
129 | ||
130 | if(testFile == 0) { | |
46f4442e A |
131 | uprv_strcpy(buffer+bufLen, "_SHORT"); |
132 | uprv_strcat(buffer, ext); | |
133 | testFile = fopen(buffer, "rb"); | |
134 | ||
135 | if(testFile == 0) { | |
136 | uprv_strcpy(buffer+bufLen, "_STUB"); | |
137 | uprv_strcat(buffer, ext); | |
138 | testFile = fopen(buffer, "rb"); | |
139 | ||
140 | if (testFile == 0) { | |
141 | *(buffer+bufLen) = 0; | |
729e4ab9 | 142 | dataerrln("Could not open any of the conformance test files, tried opening base %s\n", buffer); |
46f4442e A |
143 | return; |
144 | } else { | |
145 | infoln( | |
146 | "INFO: Working with the stub file.\n" | |
147 | "If you need the full conformance test, please\n" | |
148 | "download the appropriate data files from:\n" | |
57a6839d | 149 | "http://unicode.org/cldr/trac/browser/trunk/common/uca"); |
46f4442e A |
150 | } |
151 | } | |
b75a7d8f | 152 | } |
b75a7d8f A |
153 | } |
154 | ||
51004dcb A |
155 | static const uint32_t IS_SHIFTED = 1; |
156 | static const uint32_t FROM_RULES = 2; | |
157 | ||
158 | static UBool | |
159 | skipLineBecauseOfBug(const UChar *s, int32_t length, uint32_t flags) { | |
57a6839d A |
160 | // Add temporary exceptions here if there are ICU bugs, until we can fix them. |
161 | // For examples see the ICU 52 version of this file. | |
162 | (void)s; | |
163 | (void)length; | |
164 | (void)flags; | |
51004dcb A |
165 | return FALSE; |
166 | } | |
167 | ||
168 | static UCollationResult | |
169 | normalizeResult(int32_t result) { | |
170 | return result<0 ? UCOL_LESS : result==0 ? UCOL_EQUAL : UCOL_GREATER; | |
171 | } | |
172 | ||
173 | void UCAConformanceTest::testConformance(const Collator *coll) | |
b75a7d8f | 174 | { |
46f4442e A |
175 | if(testFile == 0) { |
176 | return; | |
b75a7d8f | 177 | } |
51004dcb A |
178 | uint32_t skipFlags = 0; |
179 | if(coll->getAttribute(UCOL_ALTERNATE_HANDLING, status) == UCOL_SHIFTED) { | |
180 | skipFlags |= IS_SHIFTED; | |
181 | } | |
182 | if(coll == rbUCA) { | |
183 | skipFlags |= FROM_RULES; | |
184 | } | |
b75a7d8f | 185 | |
57a6839d A |
186 | logln("-prop:ucaconfnosortkeys=1 turns off getSortKey() in UCAConformanceTest"); |
187 | UBool withSortKeys = getProperty("ucaconfnosortkeys") == NULL; | |
188 | ||
46f4442e | 189 | int32_t line = 0; |
b75a7d8f | 190 | |
46f4442e | 191 | UChar b1[1024], b2[1024]; |
46f4442e | 192 | UChar *buffer = b1, *oldB = NULL; |
729e4ab9 A |
193 | |
194 | char lineB1[1024], lineB2[1024]; | |
195 | char *lineB = lineB1, *oldLineB = lineB2; | |
196 | ||
46f4442e A |
197 | uint8_t sk1[1024], sk2[1024]; |
198 | uint8_t *oldSk = NULL, *newSk = sk1; | |
729e4ab9 | 199 | |
51004dcb A |
200 | int32_t oldLen = 0; |
201 | int32_t oldBlen = 0; | |
46f4442e | 202 | uint32_t first = 0; |
b75a7d8f | 203 | |
46f4442e | 204 | while (fgets(lineB, 1024, testFile) != NULL) { |
729e4ab9 A |
205 | // remove trailing whitespace |
206 | u_rtrim(lineB); | |
b75a7d8f | 207 | |
46f4442e | 208 | line++; |
51004dcb | 209 | if(*lineB == 0 || lineB[0] == '#') { |
46f4442e A |
210 | continue; |
211 | } | |
51004dcb | 212 | int32_t buflen = u_parseString(lineB, buffer, 1024, &first, &status); |
729e4ab9 A |
213 | if(U_FAILURE(status)) { |
214 | errln("Error parsing line %ld (%s): %s\n", | |
215 | (long)line, u_errorName(status), lineB); | |
216 | status = U_ZERO_ERROR; | |
217 | } | |
51004dcb | 218 | buffer[buflen] = 0; |
46f4442e | 219 | |
51004dcb A |
220 | if(skipLineBecauseOfBug(buffer, buflen, skipFlags)) { |
221 | logln("Skipping line %i because of a known bug", line); | |
222 | continue; | |
223 | } | |
46f4442e | 224 | |
57a6839d | 225 | int32_t resLen = withSortKeys ? coll->getSortKey(buffer, buflen, newSk, 1024) : 0; |
46f4442e A |
226 | |
227 | if(oldSk != NULL) { | |
57a6839d A |
228 | UBool ok=TRUE; |
229 | int32_t skres = withSortKeys ? strcmp((char *)oldSk, (char *)newSk) : 0; | |
51004dcb A |
230 | int32_t cmpres = coll->compare(oldB, oldBlen, buffer, buflen, status); |
231 | int32_t cmpres2 = coll->compare(buffer, buflen, oldB, oldBlen, status); | |
46f4442e A |
232 | |
233 | if(cmpres != -cmpres2) { | |
57a6839d A |
234 | errln("Compare result not symmetrical on line %i: " |
235 | "previous vs. current (%d) / current vs. previous (%d)", | |
236 | line, cmpres, cmpres2); | |
237 | ok = FALSE; | |
46f4442e A |
238 | } |
239 | ||
57a6839d A |
240 | // TODO: Compare with normalization turned off if the input passes the FCD test. |
241 | ||
242 | if(withSortKeys && cmpres != normalizeResult(skres)) { | |
51004dcb A |
243 | errln("Difference between coll->compare (%d) and sortkey compare (%d) on line %i", |
244 | cmpres, skres, line); | |
57a6839d | 245 | ok = FALSE; |
46f4442e A |
246 | } |
247 | ||
51004dcb A |
248 | int32_t res = cmpres; |
249 | if(res == 0 && !isAtLeastUCA62) { | |
250 | // Up to UCA 6.1, the collation test files use a custom tie-breaker, | |
251 | // comparing the raw input strings. | |
252 | res = u_strcmpCodePointOrder(oldB, buffer); | |
253 | // Starting with UCA 6.2, the collation test files use the standard UCA tie-breaker, | |
254 | // comparing the NFD versions of the input strings, | |
255 | // which we do via setting strength=identical. | |
256 | } | |
46f4442e A |
257 | if(res > 0) { |
258 | errln("Line %i is not greater or equal than previous line", line); | |
57a6839d A |
259 | ok = FALSE; |
260 | } | |
261 | ||
262 | if(!ok) { | |
729e4ab9 A |
263 | errln(" Previous data line %s", oldLineB); |
264 | errln(" Current data line %s", lineB); | |
57a6839d A |
265 | if(withSortKeys) { |
266 | UnicodeString oldS, newS; | |
267 | prettify(CollationKey(oldSk, oldLen), oldS); | |
268 | prettify(CollationKey(newSk, resLen), newS); | |
269 | errln(" Previous key: "+oldS); | |
270 | errln(" Current key: "+newS); | |
271 | } | |
46f4442e | 272 | } |
b75a7d8f | 273 | } |
b75a7d8f | 274 | |
729e4ab9 A |
275 | // swap buffers |
276 | oldLineB = lineB; | |
277 | oldB = buffer; | |
46f4442e | 278 | oldSk = newSk; |
729e4ab9 A |
279 | if(lineB == lineB1) { |
280 | lineB = lineB2; | |
281 | buffer = b2; | |
282 | newSk = sk2; | |
283 | } else { | |
284 | lineB = lineB1; | |
285 | buffer = b1; | |
286 | newSk = sk1; | |
287 | } | |
46f4442e | 288 | oldLen = resLen; |
46f4442e | 289 | oldBlen = buflen; |
46f4442e | 290 | } |
b75a7d8f A |
291 | } |
292 | ||
293 | void UCAConformanceTest::TestTableNonIgnorable(/* par */) { | |
51004dcb A |
294 | if (U_FAILURE(status)) { |
295 | dataerrln("Error running UCA Conformance Test: %s", u_errorName(status)); | |
296 | return; | |
297 | } | |
46f4442e A |
298 | setCollNonIgnorable(UCA); |
299 | openTestFile("NON_IGNORABLE"); | |
300 | testConformance(UCA); | |
b75a7d8f A |
301 | } |
302 | ||
303 | void UCAConformanceTest::TestTableShifted(/* par */) { | |
51004dcb A |
304 | if (U_FAILURE(status)) { |
305 | dataerrln("Error running UCA Conformance Test: %s", u_errorName(status)); | |
306 | return; | |
307 | } | |
46f4442e A |
308 | setCollShifted(UCA); |
309 | openTestFile("SHIFTED"); | |
310 | testConformance(UCA); | |
b75a7d8f A |
311 | } |
312 | ||
313 | void UCAConformanceTest::TestRulesNonIgnorable(/* par */) { | |
57a6839d | 314 | if(logKnownIssue("cldrbug:6745", "UCARules.txt has problems")) { return; } |
46f4442e | 315 | initRbUCA(); |
b75a7d8f | 316 | |
46f4442e A |
317 | if(U_SUCCESS(status)) { |
318 | setCollNonIgnorable(rbUCA); | |
319 | openTestFile("NON_IGNORABLE"); | |
320 | testConformance(rbUCA); | |
321 | } | |
b75a7d8f A |
322 | } |
323 | ||
324 | void UCAConformanceTest::TestRulesShifted(/* par */) { | |
46f4442e A |
325 | logln("This test is currently disabled, as it is impossible to " |
326 | "wholly represent fractional UCA using tailoring rules."); | |
327 | return; | |
b75a7d8f | 328 | |
46f4442e | 329 | initRbUCA(); |
b75a7d8f | 330 | |
46f4442e A |
331 | if(U_SUCCESS(status)) { |
332 | setCollShifted(rbUCA); | |
333 | openTestFile("SHIFTED"); | |
334 | testConformance(rbUCA); | |
335 | } | |
b75a7d8f A |
336 | } |
337 | ||
338 | #endif /* #if !UCONFIG_NO_COLLATION */ |