]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | /******************************************************************** |
2 | * COPYRIGHT: | |
73c04bcf | 3 | * Copyright (c) 2002-2005, International Business Machines Corporation and |
b75a7d8f A |
4 | * others. All Rights Reserved. |
5 | ********************************************************************/ | |
6 | ||
7 | /** | |
8 | * UCAConformanceTest performs conformance tests defined in the data | |
9 | * files. ICU ships with stub data files, as the whole test are too | |
10 | * long. To do the whole test, download the test files. | |
11 | */ | |
12 | ||
13 | #include "unicode/utypes.h" | |
14 | ||
15 | #if !UCONFIG_NO_COLLATION | |
16 | ||
17 | #include "ucaconf.h" | |
374ca955 A |
18 | #include "unicode/ustring.h" |
19 | #include "cstring.h" | |
20 | #include "uparse.h" | |
21 | ||
b75a7d8f A |
22 | UCAConformanceTest::UCAConformanceTest() : |
23 | rbUCA(NULL), | |
24 | testFile(NULL), | |
25 | status(U_ZERO_ERROR) | |
26 | { | |
27 | UCA = ucol_open("root", &status); | |
28 | if(U_FAILURE(status)) { | |
29 | errln("ERROR - UCAConformanceTest: Unable to open UCA collator!"); | |
30 | } | |
31 | ||
374ca955 | 32 | const char *srcDir = IntlTest::getSourceTestData(status); |
b75a7d8f A |
33 | if (U_FAILURE(status)) { |
34 | errln("ERROR: could not open test data %s", u_errorName(status)); | |
35 | return; | |
36 | } | |
374ca955 | 37 | uprv_strcpy(testDataPath, srcDir); |
b75a7d8f A |
38 | uprv_strcat(testDataPath, "CollationTest_"); |
39 | } | |
40 | ||
41 | UCAConformanceTest::~UCAConformanceTest() | |
42 | { | |
43 | ucol_close(UCA); | |
44 | if(rbUCA) { | |
45 | ucol_close(rbUCA); | |
46 | } | |
47 | if(testFile) { | |
48 | fclose(testFile); | |
49 | } | |
50 | } | |
51 | ||
52 | void UCAConformanceTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par */) | |
53 | { | |
54 | if (exec) logln("TestSuite UCAConformanceTest: "); | |
55 | if(U_SUCCESS(status)) { | |
56 | switch (index) { | |
57 | case 0: name = "TestTableNonIgnorable"; if (exec) TestTableNonIgnorable(/* par */); break; | |
58 | case 1: name = "TestTableShifted"; if (exec) TestTableShifted(/* par */); break; | |
59 | case 2: name = "TestRulesNonIgnorable"; if (exec) TestRulesNonIgnorable(/* par */); break; | |
60 | case 3: name = "TestRulesShifted"; if (exec) TestRulesShifted(/* par */); break; | |
61 | default: name = ""; break; | |
62 | } | |
63 | } else { | |
64 | name = ""; | |
65 | } | |
66 | } | |
67 | ||
68 | static const int BUFFER_SIZE_ = 330000; | |
69 | static UChar buffer[BUFFER_SIZE_]; | |
70 | ||
71 | void UCAConformanceTest::initRbUCA() | |
72 | { | |
73 | if(!rbUCA) { | |
74 | UParseError parseError; | |
75 | UChar *ucarules = buffer; | |
76 | int32_t size = ucol_getRulesEx(UCA, UCOL_FULL_RULES, ucarules, | |
77 | BUFFER_SIZE_); | |
78 | if (size > BUFFER_SIZE_) { | |
79 | ucarules = (UChar *)malloc(size * sizeof(UChar)); | |
80 | size = ucol_getRulesEx(UCA, UCOL_FULL_RULES, ucarules, size); | |
81 | } | |
82 | rbUCA = ucol_openRules(ucarules, size, UCOL_DEFAULT, UCOL_TERTIARY, | |
83 | &parseError, &status); | |
84 | if (U_FAILURE(status)) { | |
374ca955 | 85 | errln("Failure creating UCA rule-based collator: %s", u_errorName(status)); |
b75a7d8f A |
86 | return; |
87 | } | |
88 | } | |
89 | } | |
90 | ||
91 | void UCAConformanceTest::setCollNonIgnorable(UCollator *coll) | |
92 | { | |
93 | ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); | |
94 | ucol_setAttribute(coll, UCOL_CASE_FIRST, UCOL_OFF, &status); | |
95 | ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_OFF, &status); | |
96 | ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status); | |
97 | ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status); | |
98 | } | |
99 | ||
100 | void UCAConformanceTest::setCollShifted(UCollator *coll) | |
101 | { | |
102 | ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); | |
103 | ucol_setAttribute(coll, UCOL_CASE_FIRST, UCOL_OFF, &status); | |
104 | ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_OFF, &status); | |
105 | ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_QUATERNARY, &status); | |
106 | ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status); | |
107 | } | |
108 | ||
109 | void UCAConformanceTest::openTestFile(const char *type) | |
110 | { | |
111 | const char *ext = ".txt"; | |
112 | if(testFile) { | |
113 | fclose(testFile); | |
114 | } | |
115 | char buffer[1024]; | |
116 | uprv_strcpy(buffer, testDataPath); | |
117 | uprv_strcat(buffer, type); | |
374ca955 | 118 | int32_t bufLen = (int32_t)uprv_strlen(buffer); |
b75a7d8f A |
119 | |
120 | // we try to open 3 files: | |
121 | // path/CollationTest_type.txt | |
122 | // path/CollationTest_type_SHORT.txt | |
123 | // path/CollationTest_type_STUB.txt | |
124 | // we are going to test with the first one that we manage to open. | |
125 | ||
126 | uprv_strcpy(buffer+bufLen, ext); | |
127 | ||
128 | testFile = fopen(buffer, "rb"); | |
129 | ||
130 | if(testFile == 0) { | |
131 | uprv_strcpy(buffer+bufLen, "_SHORT"); | |
132 | uprv_strcat(buffer, ext); | |
133 | testFile = fopen(buffer, "rb"); | |
134 | ||
135 | if(testFile == 0) { | |
136 | uprv_strcpy(buffer+bufLen, "_STUB"); | |
137 | uprv_strcat(buffer, ext); | |
138 | testFile = fopen(buffer, "rb"); | |
139 | ||
140 | if (testFile == 0) { | |
141 | *(buffer+bufLen) = 0; | |
142 | errln("ERROR: could not open any of the conformance test files, tried opening base %s\n", buffer); | |
143 | return; | |
144 | } else { | |
145 | infoln( | |
146 | "INFO: Working with the stub file.\n" | |
147 | "If you need the full conformance test, please\n" | |
148 | "download the appropriate data files from:\n" | |
73c04bcf | 149 | "http://dev.icu-project.org/cgi-bin/viewcvs.cgi/unicodetools/com/ibm/text/data/"); |
b75a7d8f A |
150 | } |
151 | } | |
152 | } | |
153 | } | |
154 | ||
155 | void UCAConformanceTest::testConformance(UCollator *coll) | |
156 | { | |
157 | if(testFile == 0) { | |
158 | return; | |
159 | } | |
160 | ||
161 | int32_t line = 0; | |
162 | ||
163 | UChar b1[1024], b2[1024]; | |
164 | char lineB[1024]; | |
165 | UChar *buffer = b1, *oldB = NULL; | |
166 | uint8_t sk1[1024], sk2[1024]; | |
167 | uint8_t *oldSk = NULL, *newSk = sk1; | |
168 | int32_t resLen = 0, oldLen = 0; | |
169 | int32_t buflen = 0, oldBlen = 0; | |
170 | uint32_t first = 0; | |
171 | uint32_t offset = 0; | |
73c04bcf | 172 | UnicodeString oldS, newS; |
b75a7d8f A |
173 | |
174 | ||
175 | while (fgets(lineB, 1024, testFile) != NULL) { | |
176 | offset = 0; | |
177 | ||
178 | line++; | |
73c04bcf | 179 | if(*lineB == 0 || strlen(lineB) < 3 || lineB[0] == '#') { |
b75a7d8f A |
180 | continue; |
181 | } | |
182 | offset = u_parseString(lineB, buffer, 1024, &first, &status); | |
183 | buflen = offset; | |
184 | buffer[offset++] = 0; | |
185 | ||
186 | resLen = ucol_getSortKey(coll, buffer, buflen, newSk, 1024); | |
187 | ||
188 | int32_t res = 0, cmpres = 0, cmpres2 = 0; | |
189 | ||
190 | if(oldSk != NULL) { | |
191 | res = strcmp((char *)oldSk, (char *)newSk); | |
192 | cmpres = ucol_strcoll(coll, oldB, oldBlen, buffer, buflen); | |
193 | cmpres2 = ucol_strcoll(coll, buffer, buflen, oldB, oldBlen); | |
194 | ||
195 | if(cmpres != -cmpres2) { | |
196 | errln("Compare result not symmetrical on line %i", line); | |
197 | } | |
198 | ||
199 | if(((res&0x80000000) != (cmpres&0x80000000)) || (res == 0 && cmpres != 0) || (res != 0 && cmpres == 0)) { | |
200 | errln("Difference between ucol_strcoll and sortkey compare on line %i", line); | |
201 | logln("Data line %s", lineB); | |
202 | } | |
203 | ||
204 | if(res > 0) { | |
205 | errln("Line %i is not greater or equal than previous line", line); | |
206 | logln("Data line %s", lineB); | |
73c04bcf A |
207 | prettify(CollationKey(oldSk, oldLen), oldS); |
208 | prettify(CollationKey(newSk, resLen), newS); | |
209 | logln("Keys: "+oldS+" and "+newS); | |
b75a7d8f A |
210 | } else if(res == 0) { /* equal */ |
211 | res = u_strcmpCodePointOrder(oldB, buffer); | |
212 | if (res == 0) { | |
213 | errln("Probable error in test file on line %i (comparing identical strings)", line); | |
214 | logln("Data line %s", lineB); | |
215 | } else if (res > 0) { | |
216 | errln("Sortkeys are identical, but code point comapare gives >0 on line %i", line); | |
217 | logln("Data line %s", lineB); | |
218 | } | |
219 | } | |
220 | } | |
221 | ||
222 | oldSk = newSk; | |
223 | oldLen = resLen; | |
224 | ||
225 | newSk = (newSk == sk1)?sk2:sk1; | |
226 | oldB = buffer; | |
227 | oldBlen = buflen; | |
228 | buffer = (buffer == b1)?b2:b1; | |
229 | } | |
230 | } | |
231 | ||
232 | void UCAConformanceTest::TestTableNonIgnorable(/* par */) { | |
233 | setCollNonIgnorable(UCA); | |
234 | openTestFile("NON_IGNORABLE"); | |
235 | testConformance(UCA); | |
236 | } | |
237 | ||
238 | void UCAConformanceTest::TestTableShifted(/* par */) { | |
239 | setCollShifted(UCA); | |
240 | openTestFile("SHIFTED"); | |
241 | testConformance(UCA); | |
242 | } | |
243 | ||
244 | void UCAConformanceTest::TestRulesNonIgnorable(/* par */) { | |
245 | initRbUCA(); | |
246 | ||
247 | if(U_SUCCESS(status)) { | |
248 | setCollNonIgnorable(rbUCA); | |
249 | openTestFile("NON_IGNORABLE"); | |
250 | testConformance(rbUCA); | |
251 | } | |
252 | } | |
253 | ||
254 | void UCAConformanceTest::TestRulesShifted(/* par */) { | |
255 | logln("This test is currently disabled, as it is impossible to " | |
256 | "wholly represent fractional UCA using tailoring rules."); | |
257 | return; | |
258 | ||
259 | initRbUCA(); | |
260 | ||
261 | if(U_SUCCESS(status)) { | |
262 | setCollShifted(rbUCA); | |
263 | openTestFile("SHIFTED"); | |
264 | testConformance(rbUCA); | |
265 | } | |
266 | } | |
267 | ||
268 | #endif /* #if !UCONFIG_NO_COLLATION */ |