]>
Commit | Line | Data |
---|---|---|
b75a7d8f | 1 | /******************************************************************** |
51004dcb A |
2 | * COPYRIGHT: |
3 | * Copyright (c) 2002-2012, International Business Machines Corporation and | |
b75a7d8f A |
4 | * others. All Rights Reserved. |
5 | ********************************************************************/ | |
6 | ||
7 | /** | |
8 | * UCAConformanceTest performs conformance tests defined in the data | |
9 | * files. ICU ships with stub data files, as the whole test are too | |
10 | * long. To do the whole test, download the test files. | |
11 | */ | |
12 | ||
13 | #include "unicode/utypes.h" | |
14 | ||
15 | #if !UCONFIG_NO_COLLATION | |
16 | ||
17 | #include "ucaconf.h" | |
374ca955 | 18 | #include "unicode/ustring.h" |
51004dcb | 19 | #include "cmemory.h" |
374ca955 A |
20 | #include "cstring.h" |
21 | #include "uparse.h" | |
22 | ||
b75a7d8f A |
23 | UCAConformanceTest::UCAConformanceTest() : |
24 | rbUCA(NULL), | |
25 | testFile(NULL), | |
26 | status(U_ZERO_ERROR) | |
27 | { | |
51004dcb | 28 | UCA = (RuleBasedCollator *)Collator::createInstance(Locale::getRoot(), status); |
46f4442e | 29 | if(U_FAILURE(status)) { |
51004dcb | 30 | dataerrln("Error - UCAConformanceTest: Unable to open UCA collator! - %s", u_errorName(status)); |
46f4442e A |
31 | } |
32 | ||
33 | const char *srcDir = IntlTest::getSourceTestData(status); | |
34 | if (U_FAILURE(status)) { | |
729e4ab9 | 35 | dataerrln("Could not open test data %s", u_errorName(status)); |
46f4442e A |
36 | return; |
37 | } | |
38 | uprv_strcpy(testDataPath, srcDir); | |
39 | uprv_strcat(testDataPath, "CollationTest_"); | |
51004dcb A |
40 | |
41 | UVersionInfo uniVersion; | |
42 | static const UVersionInfo v62 = { 6, 2, 0, 0 }; | |
43 | u_getUnicodeVersion(uniVersion); | |
44 | isAtLeastUCA62 = uprv_memcmp(uniVersion, v62, 4) >= 0; | |
b75a7d8f A |
45 | } |
46 | ||
47 | UCAConformanceTest::~UCAConformanceTest() | |
48 | { | |
51004dcb A |
49 | delete UCA; |
50 | delete rbUCA; | |
51 | if (testFile) { | |
46f4442e A |
52 | fclose(testFile); |
53 | } | |
b75a7d8f A |
54 | } |
55 | ||
56 | void UCAConformanceTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par */) | |
57 | { | |
51004dcb A |
58 | if(exec) { |
59 | logln("TestSuite UCAConformanceTest: "); | |
b75a7d8f | 60 | } |
51004dcb A |
61 | TESTCASE_AUTO_BEGIN; |
62 | TESTCASE_AUTO(TestTableNonIgnorable); | |
63 | TESTCASE_AUTO(TestTableShifted); | |
64 | TESTCASE_AUTO(TestRulesNonIgnorable); | |
65 | TESTCASE_AUTO(TestRulesShifted); | |
66 | TESTCASE_AUTO_END; | |
b75a7d8f A |
67 | } |
68 | ||
b75a7d8f A |
69 | void UCAConformanceTest::initRbUCA() |
70 | { | |
46f4442e | 71 | if(!rbUCA) { |
51004dcb A |
72 | UnicodeString ucarules; |
73 | if (UCA) { | |
74 | UCA->getRules(UCOL_FULL_RULES, ucarules); | |
75 | rbUCA = new RuleBasedCollator(ucarules, status); | |
76 | if (U_FAILURE(status)) { | |
77 | dataerrln("Failure creating UCA rule-based collator: %s", u_errorName(status)); | |
78 | return; | |
79 | } | |
80 | } else { | |
81 | dataerrln("Failure creating UCA rule-based collator: %s", u_errorName(status)); | |
46f4442e A |
82 | return; |
83 | } | |
b75a7d8f | 84 | } |
b75a7d8f A |
85 | } |
86 | ||
51004dcb | 87 | void UCAConformanceTest::setCollNonIgnorable(Collator *coll) |
b75a7d8f | 88 | { |
51004dcb A |
89 | coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); |
90 | coll->setAttribute(UCOL_CASE_FIRST, UCOL_OFF, status); | |
91 | coll->setAttribute(UCOL_CASE_LEVEL, UCOL_OFF, status); | |
92 | coll->setAttribute(UCOL_STRENGTH, isAtLeastUCA62 ? UCOL_IDENTICAL : UCOL_TERTIARY, status); | |
93 | coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, status); | |
b75a7d8f A |
94 | } |
95 | ||
51004dcb | 96 | void UCAConformanceTest::setCollShifted(Collator *coll) |
b75a7d8f | 97 | { |
51004dcb A |
98 | coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); |
99 | coll->setAttribute(UCOL_CASE_FIRST, UCOL_OFF, status); | |
100 | coll->setAttribute(UCOL_CASE_LEVEL, UCOL_OFF, status); | |
101 | coll->setAttribute(UCOL_STRENGTH, isAtLeastUCA62 ? UCOL_IDENTICAL : UCOL_QUATERNARY, status); | |
102 | coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, status); | |
b75a7d8f A |
103 | } |
104 | ||
105 | void UCAConformanceTest::openTestFile(const char *type) | |
106 | { | |
46f4442e A |
107 | const char *ext = ".txt"; |
108 | if(testFile) { | |
109 | fclose(testFile); | |
110 | } | |
111 | char buffer[1024]; | |
112 | uprv_strcpy(buffer, testDataPath); | |
113 | uprv_strcat(buffer, type); | |
114 | int32_t bufLen = (int32_t)uprv_strlen(buffer); | |
115 | ||
116 | // we try to open 3 files: | |
117 | // path/CollationTest_type.txt | |
118 | // path/CollationTest_type_SHORT.txt | |
119 | // path/CollationTest_type_STUB.txt | |
120 | // we are going to test with the first one that we manage to open. | |
121 | ||
122 | uprv_strcpy(buffer+bufLen, ext); | |
123 | ||
b75a7d8f A |
124 | testFile = fopen(buffer, "rb"); |
125 | ||
126 | if(testFile == 0) { | |
46f4442e A |
127 | uprv_strcpy(buffer+bufLen, "_SHORT"); |
128 | uprv_strcat(buffer, ext); | |
129 | testFile = fopen(buffer, "rb"); | |
130 | ||
131 | if(testFile == 0) { | |
132 | uprv_strcpy(buffer+bufLen, "_STUB"); | |
133 | uprv_strcat(buffer, ext); | |
134 | testFile = fopen(buffer, "rb"); | |
135 | ||
136 | if (testFile == 0) { | |
137 | *(buffer+bufLen) = 0; | |
729e4ab9 | 138 | dataerrln("Could not open any of the conformance test files, tried opening base %s\n", buffer); |
46f4442e A |
139 | return; |
140 | } else { | |
141 | infoln( | |
142 | "INFO: Working with the stub file.\n" | |
143 | "If you need the full conformance test, please\n" | |
144 | "download the appropriate data files from:\n" | |
145 | "http://source.icu-project.org/repos/icu/tools/trunk/unicodetools/com/ibm/text/data/"); | |
146 | } | |
147 | } | |
b75a7d8f | 148 | } |
b75a7d8f A |
149 | } |
150 | ||
51004dcb A |
151 | static const uint32_t IS_SHIFTED = 1; |
152 | static const uint32_t FROM_RULES = 2; | |
153 | ||
154 | static UBool | |
155 | skipLineBecauseOfBug(const UChar *s, int32_t length, uint32_t flags) { | |
156 | // TODO: Fix ICU ticket #8052 | |
157 | if(length >= 3 && | |
158 | (s[0] == 0xfb2 || s[0] == 0xfb3) && | |
159 | s[1] == 0x334 && | |
160 | (s[2] == 0xf73 || s[2] == 0xf75 || s[2] == 0xf81)) { | |
161 | return TRUE; | |
162 | } | |
163 | // TODO: Fix ICU ticket #9361 | |
164 | if((flags & IS_SHIFTED) != 0 && length >= 2 && s[0] == 0xfffe) { | |
165 | return TRUE; | |
166 | } | |
167 | // TODO: Fix tailoring builder, ICU ticket #9593. | |
168 | UChar c; | |
169 | if((flags & FROM_RULES) != 0 && length >= 2 && ((c = s[1]) == 0xedc || c == 0xedd)) { | |
170 | return TRUE; | |
171 | } | |
172 | return FALSE; | |
173 | } | |
174 | ||
175 | static UCollationResult | |
176 | normalizeResult(int32_t result) { | |
177 | return result<0 ? UCOL_LESS : result==0 ? UCOL_EQUAL : UCOL_GREATER; | |
178 | } | |
179 | ||
180 | void UCAConformanceTest::testConformance(const Collator *coll) | |
b75a7d8f | 181 | { |
46f4442e A |
182 | if(testFile == 0) { |
183 | return; | |
b75a7d8f | 184 | } |
51004dcb A |
185 | uint32_t skipFlags = 0; |
186 | if(coll->getAttribute(UCOL_ALTERNATE_HANDLING, status) == UCOL_SHIFTED) { | |
187 | skipFlags |= IS_SHIFTED; | |
188 | } | |
189 | if(coll == rbUCA) { | |
190 | skipFlags |= FROM_RULES; | |
191 | } | |
b75a7d8f | 192 | |
46f4442e | 193 | int32_t line = 0; |
b75a7d8f | 194 | |
46f4442e | 195 | UChar b1[1024], b2[1024]; |
46f4442e | 196 | UChar *buffer = b1, *oldB = NULL; |
729e4ab9 A |
197 | |
198 | char lineB1[1024], lineB2[1024]; | |
199 | char *lineB = lineB1, *oldLineB = lineB2; | |
200 | ||
46f4442e A |
201 | uint8_t sk1[1024], sk2[1024]; |
202 | uint8_t *oldSk = NULL, *newSk = sk1; | |
729e4ab9 | 203 | |
51004dcb A |
204 | int32_t oldLen = 0; |
205 | int32_t oldBlen = 0; | |
46f4442e | 206 | uint32_t first = 0; |
b75a7d8f | 207 | |
46f4442e | 208 | while (fgets(lineB, 1024, testFile) != NULL) { |
729e4ab9 A |
209 | // remove trailing whitespace |
210 | u_rtrim(lineB); | |
b75a7d8f | 211 | |
46f4442e | 212 | line++; |
51004dcb | 213 | if(*lineB == 0 || lineB[0] == '#') { |
46f4442e A |
214 | continue; |
215 | } | |
51004dcb | 216 | int32_t buflen = u_parseString(lineB, buffer, 1024, &first, &status); |
729e4ab9 A |
217 | if(U_FAILURE(status)) { |
218 | errln("Error parsing line %ld (%s): %s\n", | |
219 | (long)line, u_errorName(status), lineB); | |
220 | status = U_ZERO_ERROR; | |
221 | } | |
51004dcb | 222 | buffer[buflen] = 0; |
46f4442e | 223 | |
51004dcb A |
224 | if(skipLineBecauseOfBug(buffer, buflen, skipFlags)) { |
225 | logln("Skipping line %i because of a known bug", line); | |
226 | continue; | |
227 | } | |
46f4442e | 228 | |
51004dcb | 229 | int32_t resLen = coll->getSortKey(buffer, buflen, newSk, 1024); |
46f4442e A |
230 | |
231 | if(oldSk != NULL) { | |
51004dcb A |
232 | int32_t skres = strcmp((char *)oldSk, (char *)newSk); |
233 | int32_t cmpres = coll->compare(oldB, oldBlen, buffer, buflen, status); | |
234 | int32_t cmpres2 = coll->compare(buffer, buflen, oldB, oldBlen, status); | |
46f4442e A |
235 | |
236 | if(cmpres != -cmpres2) { | |
237 | errln("Compare result not symmetrical on line %i", line); | |
238 | } | |
239 | ||
51004dcb A |
240 | if(cmpres != normalizeResult(skres)) { |
241 | errln("Difference between coll->compare (%d) and sortkey compare (%d) on line %i", | |
242 | cmpres, skres, line); | |
729e4ab9 A |
243 | errln(" Previous data line %s", oldLineB); |
244 | errln(" Current data line %s", lineB); | |
46f4442e A |
245 | } |
246 | ||
51004dcb A |
247 | int32_t res = cmpres; |
248 | if(res == 0 && !isAtLeastUCA62) { | |
249 | // Up to UCA 6.1, the collation test files use a custom tie-breaker, | |
250 | // comparing the raw input strings. | |
251 | res = u_strcmpCodePointOrder(oldB, buffer); | |
252 | // Starting with UCA 6.2, the collation test files use the standard UCA tie-breaker, | |
253 | // comparing the NFD versions of the input strings, | |
254 | // which we do via setting strength=identical. | |
255 | } | |
46f4442e A |
256 | if(res > 0) { |
257 | errln("Line %i is not greater or equal than previous line", line); | |
729e4ab9 A |
258 | errln(" Previous data line %s", oldLineB); |
259 | errln(" Current data line %s", lineB); | |
51004dcb | 260 | UnicodeString oldS, newS; |
46f4442e A |
261 | prettify(CollationKey(oldSk, oldLen), oldS); |
262 | prettify(CollationKey(newSk, resLen), newS); | |
729e4ab9 A |
263 | errln(" Previous key: "+oldS); |
264 | errln(" Current key: "+newS); | |
46f4442e | 265 | } |
b75a7d8f | 266 | } |
b75a7d8f | 267 | |
729e4ab9 A |
268 | // swap buffers |
269 | oldLineB = lineB; | |
270 | oldB = buffer; | |
46f4442e | 271 | oldSk = newSk; |
729e4ab9 A |
272 | if(lineB == lineB1) { |
273 | lineB = lineB2; | |
274 | buffer = b2; | |
275 | newSk = sk2; | |
276 | } else { | |
277 | lineB = lineB1; | |
278 | buffer = b1; | |
279 | newSk = sk1; | |
280 | } | |
46f4442e | 281 | oldLen = resLen; |
46f4442e | 282 | oldBlen = buflen; |
46f4442e | 283 | } |
b75a7d8f A |
284 | } |
285 | ||
286 | void UCAConformanceTest::TestTableNonIgnorable(/* par */) { | |
51004dcb A |
287 | if (U_FAILURE(status)) { |
288 | dataerrln("Error running UCA Conformance Test: %s", u_errorName(status)); | |
289 | return; | |
290 | } | |
46f4442e A |
291 | setCollNonIgnorable(UCA); |
292 | openTestFile("NON_IGNORABLE"); | |
293 | testConformance(UCA); | |
b75a7d8f A |
294 | } |
295 | ||
296 | void UCAConformanceTest::TestTableShifted(/* par */) { | |
51004dcb A |
297 | if (U_FAILURE(status)) { |
298 | dataerrln("Error running UCA Conformance Test: %s", u_errorName(status)); | |
299 | return; | |
300 | } | |
46f4442e A |
301 | setCollShifted(UCA); |
302 | openTestFile("SHIFTED"); | |
303 | testConformance(UCA); | |
b75a7d8f A |
304 | } |
305 | ||
306 | void UCAConformanceTest::TestRulesNonIgnorable(/* par */) { | |
46f4442e | 307 | initRbUCA(); |
b75a7d8f | 308 | |
46f4442e A |
309 | if(U_SUCCESS(status)) { |
310 | setCollNonIgnorable(rbUCA); | |
311 | openTestFile("NON_IGNORABLE"); | |
312 | testConformance(rbUCA); | |
313 | } | |
b75a7d8f A |
314 | } |
315 | ||
316 | void UCAConformanceTest::TestRulesShifted(/* par */) { | |
46f4442e A |
317 | logln("This test is currently disabled, as it is impossible to " |
318 | "wholly represent fractional UCA using tailoring rules."); | |
319 | return; | |
b75a7d8f | 320 | |
46f4442e | 321 | initRbUCA(); |
b75a7d8f | 322 | |
46f4442e A |
323 | if(U_SUCCESS(status)) { |
324 | setCollShifted(rbUCA); | |
325 | openTestFile("SHIFTED"); | |
326 | testConformance(rbUCA); | |
327 | } | |
b75a7d8f A |
328 | } |
329 | ||
330 | #endif /* #if !UCONFIG_NO_COLLATION */ |