]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | /******************************************************************** |
2 | * COPYRIGHT: | |
729e4ab9 | 3 | * Copyright (c) 2002-2010, International Business Machines Corporation and |
b75a7d8f A |
4 | * others. All Rights Reserved. |
5 | ********************************************************************/ | |
6 | ||
7 | /** | |
8 | * UCAConformanceTest performs conformance tests defined in the data | |
9 | * files. ICU ships with stub data files, as the whole test are too | |
10 | * long. To do the whole test, download the test files. | |
11 | */ | |
12 | ||
13 | #include "unicode/utypes.h" | |
14 | ||
15 | #if !UCONFIG_NO_COLLATION | |
16 | ||
17 | #include "ucaconf.h" | |
374ca955 A |
18 | #include "unicode/ustring.h" |
19 | #include "cstring.h" | |
20 | #include "uparse.h" | |
21 | ||
b75a7d8f A |
22 | UCAConformanceTest::UCAConformanceTest() : |
23 | rbUCA(NULL), | |
24 | testFile(NULL), | |
25 | status(U_ZERO_ERROR) | |
26 | { | |
46f4442e A |
27 | UCA = ucol_open("root", &status); |
28 | if(U_FAILURE(status)) { | |
29 | errln("ERROR - UCAConformanceTest: Unable to open UCA collator!"); | |
30 | } | |
31 | ||
32 | const char *srcDir = IntlTest::getSourceTestData(status); | |
33 | if (U_FAILURE(status)) { | |
729e4ab9 | 34 | dataerrln("Could not open test data %s", u_errorName(status)); |
46f4442e A |
35 | return; |
36 | } | |
37 | uprv_strcpy(testDataPath, srcDir); | |
38 | uprv_strcat(testDataPath, "CollationTest_"); | |
b75a7d8f A |
39 | } |
40 | ||
41 | UCAConformanceTest::~UCAConformanceTest() | |
42 | { | |
46f4442e A |
43 | ucol_close(UCA); |
44 | if(rbUCA) { | |
45 | ucol_close(rbUCA); | |
46 | } | |
47 | if(testFile) { | |
48 | fclose(testFile); | |
49 | } | |
b75a7d8f A |
50 | } |
51 | ||
52 | void UCAConformanceTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par */) | |
53 | { | |
54 | if (exec) logln("TestSuite UCAConformanceTest: "); | |
55 | if(U_SUCCESS(status)) { | |
56 | switch (index) { | |
57 | case 0: name = "TestTableNonIgnorable"; if (exec) TestTableNonIgnorable(/* par */); break; | |
58 | case 1: name = "TestTableShifted"; if (exec) TestTableShifted(/* par */); break; | |
59 | case 2: name = "TestRulesNonIgnorable"; if (exec) TestRulesNonIgnorable(/* par */); break; | |
60 | case 3: name = "TestRulesShifted"; if (exec) TestRulesShifted(/* par */); break; | |
61 | default: name = ""; break; | |
62 | } | |
63 | } else { | |
64 | name = ""; | |
65 | } | |
66 | } | |
67 | ||
b75a7d8f A |
68 | void UCAConformanceTest::initRbUCA() |
69 | { | |
46f4442e A |
70 | if(!rbUCA) { |
71 | UParseError parseError; | |
72 | UChar *ucarules; | |
73 | // preflight rules | |
74 | int32_t size = ucol_getRulesEx(UCA, UCOL_FULL_RULES, NULL, 0); | |
b75a7d8f | 75 | ucarules = (UChar *)malloc(size * sizeof(UChar)); |
46f4442e | 76 | |
b75a7d8f | 77 | size = ucol_getRulesEx(UCA, UCOL_FULL_RULES, ucarules, size); |
46f4442e A |
78 | rbUCA = ucol_openRules(ucarules, size, UCOL_DEFAULT, UCOL_TERTIARY, |
79 | &parseError, &status); | |
80 | free(ucarules); | |
81 | if (U_FAILURE(status)) { | |
82 | errln("Failure creating UCA rule-based collator: %s", u_errorName(status)); | |
83 | return; | |
84 | } | |
b75a7d8f | 85 | } |
b75a7d8f A |
86 | } |
87 | ||
88 | void UCAConformanceTest::setCollNonIgnorable(UCollator *coll) | |
89 | { | |
90 | ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); | |
91 | ucol_setAttribute(coll, UCOL_CASE_FIRST, UCOL_OFF, &status); | |
92 | ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_OFF, &status); | |
93 | ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status); | |
94 | ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status); | |
95 | } | |
96 | ||
97 | void UCAConformanceTest::setCollShifted(UCollator *coll) | |
98 | { | |
46f4442e A |
99 | ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); |
100 | ucol_setAttribute(coll, UCOL_CASE_FIRST, UCOL_OFF, &status); | |
101 | ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_OFF, &status); | |
102 | ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_QUATERNARY, &status); | |
103 | ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status); | |
b75a7d8f A |
104 | } |
105 | ||
106 | void UCAConformanceTest::openTestFile(const char *type) | |
107 | { | |
46f4442e A |
108 | const char *ext = ".txt"; |
109 | if(testFile) { | |
110 | fclose(testFile); | |
111 | } | |
112 | char buffer[1024]; | |
113 | uprv_strcpy(buffer, testDataPath); | |
114 | uprv_strcat(buffer, type); | |
115 | int32_t bufLen = (int32_t)uprv_strlen(buffer); | |
116 | ||
117 | // we try to open 3 files: | |
118 | // path/CollationTest_type.txt | |
119 | // path/CollationTest_type_SHORT.txt | |
120 | // path/CollationTest_type_STUB.txt | |
121 | // we are going to test with the first one that we manage to open. | |
122 | ||
123 | uprv_strcpy(buffer+bufLen, ext); | |
124 | ||
b75a7d8f A |
125 | testFile = fopen(buffer, "rb"); |
126 | ||
127 | if(testFile == 0) { | |
46f4442e A |
128 | uprv_strcpy(buffer+bufLen, "_SHORT"); |
129 | uprv_strcat(buffer, ext); | |
130 | testFile = fopen(buffer, "rb"); | |
131 | ||
132 | if(testFile == 0) { | |
133 | uprv_strcpy(buffer+bufLen, "_STUB"); | |
134 | uprv_strcat(buffer, ext); | |
135 | testFile = fopen(buffer, "rb"); | |
136 | ||
137 | if (testFile == 0) { | |
138 | *(buffer+bufLen) = 0; | |
729e4ab9 | 139 | dataerrln("Could not open any of the conformance test files, tried opening base %s\n", buffer); |
46f4442e A |
140 | return; |
141 | } else { | |
142 | infoln( | |
143 | "INFO: Working with the stub file.\n" | |
144 | "If you need the full conformance test, please\n" | |
145 | "download the appropriate data files from:\n" | |
146 | "http://source.icu-project.org/repos/icu/tools/trunk/unicodetools/com/ibm/text/data/"); | |
147 | } | |
148 | } | |
b75a7d8f | 149 | } |
b75a7d8f A |
150 | } |
151 | ||
152 | void UCAConformanceTest::testConformance(UCollator *coll) | |
153 | { | |
46f4442e A |
154 | if(testFile == 0) { |
155 | return; | |
b75a7d8f | 156 | } |
b75a7d8f | 157 | |
46f4442e | 158 | int32_t line = 0; |
b75a7d8f | 159 | |
46f4442e | 160 | UChar b1[1024], b2[1024]; |
46f4442e | 161 | UChar *buffer = b1, *oldB = NULL; |
729e4ab9 A |
162 | |
163 | char lineB1[1024], lineB2[1024]; | |
164 | char *lineB = lineB1, *oldLineB = lineB2; | |
165 | ||
46f4442e A |
166 | uint8_t sk1[1024], sk2[1024]; |
167 | uint8_t *oldSk = NULL, *newSk = sk1; | |
729e4ab9 | 168 | |
46f4442e A |
169 | int32_t resLen = 0, oldLen = 0; |
170 | int32_t buflen = 0, oldBlen = 0; | |
171 | uint32_t first = 0; | |
172 | uint32_t offset = 0; | |
173 | UnicodeString oldS, newS; | |
b75a7d8f | 174 | |
b75a7d8f | 175 | |
46f4442e | 176 | while (fgets(lineB, 1024, testFile) != NULL) { |
729e4ab9 A |
177 | // remove trailing whitespace |
178 | u_rtrim(lineB); | |
46f4442e | 179 | offset = 0; |
b75a7d8f | 180 | |
46f4442e A |
181 | line++; |
182 | if(*lineB == 0 || strlen(lineB) < 3 || lineB[0] == '#') { | |
183 | continue; | |
184 | } | |
185 | offset = u_parseString(lineB, buffer, 1024, &first, &status); | |
729e4ab9 A |
186 | if(U_FAILURE(status)) { |
187 | errln("Error parsing line %ld (%s): %s\n", | |
188 | (long)line, u_errorName(status), lineB); | |
189 | status = U_ZERO_ERROR; | |
190 | } | |
46f4442e A |
191 | buflen = offset; |
192 | buffer[offset++] = 0; | |
193 | ||
194 | resLen = ucol_getSortKey(coll, buffer, buflen, newSk, 1024); | |
195 | ||
196 | int32_t res = 0, cmpres = 0, cmpres2 = 0; | |
197 | ||
198 | if(oldSk != NULL) { | |
199 | res = strcmp((char *)oldSk, (char *)newSk); | |
200 | cmpres = ucol_strcoll(coll, oldB, oldBlen, buffer, buflen); | |
201 | cmpres2 = ucol_strcoll(coll, buffer, buflen, oldB, oldBlen); | |
202 | ||
203 | if(cmpres != -cmpres2) { | |
204 | errln("Compare result not symmetrical on line %i", line); | |
205 | } | |
206 | ||
207 | if(((res&0x80000000) != (cmpres&0x80000000)) || (res == 0 && cmpres != 0) || (res != 0 && cmpres == 0)) { | |
208 | errln("Difference between ucol_strcoll and sortkey compare on line %i", line); | |
729e4ab9 A |
209 | errln(" Previous data line %s", oldLineB); |
210 | errln(" Current data line %s", lineB); | |
46f4442e A |
211 | } |
212 | ||
213 | if(res > 0) { | |
214 | errln("Line %i is not greater or equal than previous line", line); | |
729e4ab9 A |
215 | errln(" Previous data line %s", oldLineB); |
216 | errln(" Current data line %s", lineB); | |
46f4442e A |
217 | prettify(CollationKey(oldSk, oldLen), oldS); |
218 | prettify(CollationKey(newSk, resLen), newS); | |
729e4ab9 A |
219 | errln(" Previous key: "+oldS); |
220 | errln(" Current key: "+newS); | |
46f4442e A |
221 | } else if(res == 0) { /* equal */ |
222 | res = u_strcmpCodePointOrder(oldB, buffer); | |
223 | if (res == 0) { | |
224 | errln("Probable error in test file on line %i (comparing identical strings)", line); | |
729e4ab9 A |
225 | errln(" Data line %s", lineB); |
226 | } | |
227 | /* | |
228 | * UCA 6.0 test files can have lines that compare == if they are | |
229 | * different strings but canonically equivalent. | |
230 | else if (res > 0) { | |
231 | errln("Sortkeys are identical, but code point compare gives >0 on line %i", line); | |
232 | errln(" Previous data line %s", oldLineB); | |
233 | errln(" Current data line %s", lineB); | |
46f4442e | 234 | } |
729e4ab9 | 235 | */ |
46f4442e | 236 | } |
b75a7d8f | 237 | } |
b75a7d8f | 238 | |
729e4ab9 A |
239 | // swap buffers |
240 | oldLineB = lineB; | |
241 | oldB = buffer; | |
46f4442e | 242 | oldSk = newSk; |
729e4ab9 A |
243 | if(lineB == lineB1) { |
244 | lineB = lineB2; | |
245 | buffer = b2; | |
246 | newSk = sk2; | |
247 | } else { | |
248 | lineB = lineB1; | |
249 | buffer = b1; | |
250 | newSk = sk1; | |
251 | } | |
46f4442e | 252 | oldLen = resLen; |
46f4442e | 253 | oldBlen = buflen; |
46f4442e | 254 | } |
b75a7d8f A |
255 | } |
256 | ||
257 | void UCAConformanceTest::TestTableNonIgnorable(/* par */) { | |
46f4442e A |
258 | setCollNonIgnorable(UCA); |
259 | openTestFile("NON_IGNORABLE"); | |
260 | testConformance(UCA); | |
b75a7d8f A |
261 | } |
262 | ||
263 | void UCAConformanceTest::TestTableShifted(/* par */) { | |
46f4442e A |
264 | setCollShifted(UCA); |
265 | openTestFile("SHIFTED"); | |
266 | testConformance(UCA); | |
b75a7d8f A |
267 | } |
268 | ||
269 | void UCAConformanceTest::TestRulesNonIgnorable(/* par */) { | |
46f4442e | 270 | initRbUCA(); |
b75a7d8f | 271 | |
46f4442e A |
272 | if(U_SUCCESS(status)) { |
273 | setCollNonIgnorable(rbUCA); | |
274 | openTestFile("NON_IGNORABLE"); | |
275 | testConformance(rbUCA); | |
276 | } | |
b75a7d8f A |
277 | } |
278 | ||
279 | void UCAConformanceTest::TestRulesShifted(/* par */) { | |
46f4442e A |
280 | logln("This test is currently disabled, as it is impossible to " |
281 | "wholly represent fractional UCA using tailoring rules."); | |
282 | return; | |
b75a7d8f | 283 | |
46f4442e | 284 | initRbUCA(); |
b75a7d8f | 285 | |
46f4442e A |
286 | if(U_SUCCESS(status)) { |
287 | setCollShifted(rbUCA); | |
288 | openTestFile("SHIFTED"); | |
289 | testConformance(rbUCA); | |
290 | } | |
b75a7d8f A |
291 | } |
292 | ||
293 | #endif /* #if !UCONFIG_NO_COLLATION */ |