]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/intltest/tscoll.cpp
ICU-6.2.6.tar.gz
[apple/icu.git] / icuSources / test / intltest / tscoll.cpp
1 /********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2004, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6
7 /**
8 * IntlTestCollator is the medium level test class for everything in the directory "collate".
9 */
10
11 /***********************************************************************
12 * Modification history
13 * Date Name Description
14 * 02/14/2001 synwee Compare with cintltst and commented away tests
15 * that are not run.
16 ***********************************************************************/
17
18 #include "unicode/utypes.h"
19
20 #if !UCONFIG_NO_COLLATION
21
22 #include "unicode/uchar.h"
23 #include "unicode/ustring.h"
24
25 #include "dadrcoll.h"
26
27 #include "encoll.h"
28 #include "frcoll.h"
29 #include "decoll.h"
30 #include "dacoll.h"
31 #include "escoll.h"
32 #include "ficoll.h"
33 #include "jacoll.h"
34 #include "trcoll.h"
35 #include "allcoll.h"
36 #include "g7coll.h"
37 #include "mnkytst.h"
38 #include "apicoll.h"
39 #include "regcoll.h"
40 #include "currcoll.h"
41 #include "itercoll.h"
42 //#include "capicoll.h" // CollationCAPITest
43 #include "tstnorm.h"
44 #include "normconf.h"
45 #include "thcoll.h"
46 #include "srchtest.h"
47 #include "cntabcol.h"
48 #include "lcukocol.h"
49 #include "ucaconf.h"
50 #include "svccoll.h"
51 #include "cmemory.h"
52
53 #define TESTCLASS(n,classname) \
54 case n: \
55 name = #classname; \
56 if (exec) { \
57 logln(#classname "---"); \
58 logln(""); \
59 classname t; \
60 callTest(t, par); \
61 } \
62 break
63
64 void IntlTestCollator::runIndexedTest( int32_t index, UBool exec, const char* &name, char* par )
65 {
66 if (exec) {
67 logln("TestSuite Collator: ");
68 }
69
70 switch (index) {
71 TESTCLASS(0, CollationEnglishTest);
72 TESTCLASS(1, CollationFrenchTest);
73 TESTCLASS(2, CollationGermanTest);
74 TESTCLASS(3, CollationSpanishTest);
75 TESTCLASS(4, CollationKanaTest);
76 TESTCLASS(5, CollationTurkishTest);
77 TESTCLASS(6, CollationDummyTest);
78 TESTCLASS(7, G7CollationTest);
79 TESTCLASS(8, CollationMonkeyTest);
80 TESTCLASS(9, CollationAPITest);
81 TESTCLASS(10, CollationRegressionTest);
82 TESTCLASS(11, CollationCurrencyTest);
83 TESTCLASS(12, CollationIteratorTest);
84 TESTCLASS(13, CollationThaiTest);
85 TESTCLASS(14, LotusCollationKoreanTest);
86 TESTCLASS(15, StringSearchTest);
87 TESTCLASS(16, ContractionTableTest);
88 TESTCLASS(17, DataDrivenCollatorTest);
89 TESTCLASS(18, UCAConformanceTest);
90 TESTCLASS(19, CollationServiceTest);
91
92 //TESTCLASS(n, CollationDanishTest); // removed by weiv - we have changed Danish collation
93 TESTCLASS(20, CollationFinnishTest); // removed by weiv - we have changed Finnish collation
94 //TESTCLASS(n, CollationCAPITest);
95
96 default: name = ""; break;
97 }
98 }
99
100 UCollationResult
101 IntlTestCollator::compareUsingPartials(UCollator *coll, const UChar source[], int32_t sLen, const UChar target[], int32_t tLen, int32_t pieceSize, UErrorCode &status) {
102 int32_t partialSKResult = 0;
103 uint8_t sBuf[512], tBuf[512];
104 UCharIterator sIter, tIter;
105 uint32_t sState[2], tState[2];
106 int32_t sSize = pieceSize, tSize = pieceSize;
107 int32_t i = 0;
108 status = U_ZERO_ERROR;
109 sState[0] = 0; sState[1] = 0;
110 tState[0] = 0; tState[1] = 0;
111 while(sSize == pieceSize && tSize == pieceSize && partialSKResult == 0) {
112 uiter_setString(&sIter, source, sLen);
113 uiter_setString(&tIter, target, tLen);
114 sSize = ucol_nextSortKeyPart(coll, &sIter, sState, sBuf, pieceSize, &status);
115 tSize = ucol_nextSortKeyPart(coll, &tIter, tState, tBuf, pieceSize, &status);
116
117 if(sState[0] != 0 || tState[0] != 0) {
118 log("State != 0 : %08X %08X\n", sState[0], tState[0]);
119 }
120 log("%i ", i++);
121
122 partialSKResult = memcmp(sBuf, tBuf, pieceSize);
123 }
124
125 if(partialSKResult < 0) {
126 return UCOL_LESS;
127 } else if(partialSKResult > 0) {
128 return UCOL_GREATER;
129 } else {
130 return UCOL_EQUAL;
131 }
132 }
133
134 void
135 IntlTestCollator::doTestVariant(Collator* col, const UnicodeString &source, const UnicodeString &target, Collator::EComparisonResult result)
136 {
137 UErrorCode status = U_ZERO_ERROR;
138
139 UCollator *myCollation = (UCollator *)((RuleBasedCollator *)col)->getUCollator();
140
141 Collator::EComparisonResult compareResult = col->compare(source, target);
142
143 CollationKey srckey, tgtkey;
144 col->getCollationKey(source, srckey, status);
145 col->getCollationKey(target, tgtkey, status);
146 if (U_FAILURE(status)){
147 errln("Creation of collation keys failed\n");
148 }
149 Collator::EComparisonResult keyResult = srckey.compareTo(tgtkey);
150
151 reportCResult(source, target, srckey, tgtkey, compareResult, keyResult, result, result);
152
153 UColAttributeValue norm = ucol_getAttribute(myCollation, UCOL_NORMALIZATION_MODE, &status);
154
155 int32_t sLen = source.length(), tLen = target.length();
156 const UChar* src = source.getBuffer();
157 const UChar* trg = target.getBuffer();
158 UCollationResult compareResultIter = (UCollationResult)result;
159
160 if(1) {
161 UCharIterator sIter, tIter;
162 uiter_setString(&sIter, src, sLen);
163 uiter_setString(&tIter, trg, tLen);
164 compareResultIter = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
165 if(compareResultIter != (UCollationResult)result) {
166 errln("Different result for iterative comparison "+source+" "+target);
167 }
168 }
169 /* convert the strings to UTF-8 and do try comparing with char iterator */
170 if(!quick) { /*!QUICK*/
171 char utf8Source[256], utf8Target[256];
172 int32_t utf8SourceLen = 0, utf8TargetLen = 0;
173 u_strToUTF8(utf8Source, 256, &utf8SourceLen, src, sLen, &status);
174 if(U_FAILURE(status)) { /* probably buffer is not big enough */
175 log("Src UTF-8 buffer too small! Will not compare!\n");
176 } else {
177 u_strToUTF8(utf8Target, 256, &utf8TargetLen, trg, tLen, &status);
178 if(U_SUCCESS(status)) { /* probably buffer is not big enough */
179 UCollationResult compareResultUTF8 = (UCollationResult)result, compareResultUTF8Norm = (UCollationResult)result;
180 UCharIterator sIter, tIter;
181 /*log_verbose("Strings converted to UTF-8:%s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));*/
182 uiter_setUTF8(&sIter, utf8Source, utf8SourceLen);
183 uiter_setUTF8(&tIter, utf8Target, utf8TargetLen);
184 /*uiter_setString(&sIter, source, sLen);
185 uiter_setString(&tIter, target, tLen);*/
186 compareResultUTF8 = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
187 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
188 sIter.move(&sIter, 0, UITER_START);
189 tIter.move(&tIter, 0, UITER_START);
190 compareResultUTF8Norm = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
191 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status);
192 if(compareResultUTF8 != compareResultIter) {
193 errln("different results in iterative comparison for UTF-16 and UTF-8 encoded strings. "+source+", "+target);
194 }
195 if(compareResultUTF8 != compareResultUTF8Norm) {
196 errln("different results in iterative when normalization is turned on with UTF-8 strings. "+source+", "+target);
197 }
198 } else {
199 log("Target UTF-8 buffer too small! Did not compare!\n");
200 }
201 if(U_FAILURE(status)) {
202 log("UTF-8 strcoll failed! Ignoring result\n");
203 }
204 }
205 }
206
207 /* testing the partial sortkeys */
208 if(1) { /*!QUICK*/
209 int32_t partialSizes[] = { 3, 1, 2, 4, 8, 20, 80 }; /* just size 3 in the quick mode */
210 int32_t partialSizesSize = 1;
211 if(!quick) {
212 partialSizesSize = 7;
213 }
214 int32_t i = 0;
215 log("partial sortkey test piecesize=");
216 for(i = 0; i < partialSizesSize; i++) {
217 UCollationResult partialSKResult = (UCollationResult)result, partialNormalizedSKResult = (UCollationResult)result;
218 log("%i ", partialSizes[i]);
219
220 partialSKResult = compareUsingPartials(myCollation, src, sLen, trg, tLen, partialSizes[i], status);
221 if(partialSKResult != (UCollationResult)result) {
222 errln("Partial sortkey comparison returned wrong result: "+source+", "+target+" (size "+partialSizes[i]+")");
223 }
224
225 if(norm != UCOL_ON && !quick) {
226 log("N ");
227 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
228 partialNormalizedSKResult = compareUsingPartials(myCollation, src, sLen, trg, tLen, partialSizes[i], status);
229 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status);
230 if(partialSKResult != partialNormalizedSKResult) {
231 errln("Partial sortkey comparison gets different result when normalization is on: "+source+", "+target+" (size "+partialSizes[i]+")");
232 }
233 }
234 }
235 log("\n");
236 }
237 /*
238 if (compareResult != result) {
239 errln("String comparison failed in variant test\n");
240 }
241 if (keyResult != result) {
242 errln("Collation key comparison failed in variant test\n");
243 }
244 */
245 }
246
247 void
248 IntlTestCollator::doTest(Collator* col, const UChar *source, const UChar *target, Collator::EComparisonResult result) {
249 doTest(col, UnicodeString(source), UnicodeString(target), result);
250 }
251
252 void
253 IntlTestCollator::doTest(Collator* col, const UnicodeString &source, const UnicodeString &target, Collator::EComparisonResult result)
254 {
255 if(col) {
256 doTestVariant(col, source, target, result);
257 if(result == Collator::LESS) {
258 doTestVariant(col, target, source, Collator::GREATER);
259 } else if (result == Collator::GREATER) {
260 doTestVariant(col, target, source, Collator::LESS);
261 }
262
263 UErrorCode status = U_ZERO_ERROR;
264 CollationElementIterator* c = ((RuleBasedCollator *)col)->createCollationElementIterator( source );
265 logln("Testing iterating source: "+source);
266 backAndForth(*c);
267 c->setText(target, status);
268 logln("Testing iterating target: "+target);
269 backAndForth(*c);
270 delete c;
271 }
272 }
273
274
275 // used for collation result reporting, defined here for convenience
276 // (maybe moved later)
277 void
278 IntlTestCollator::reportCResult( const UnicodeString &source, const UnicodeString &target,
279 CollationKey &sourceKey, CollationKey &targetKey,
280 Collator::EComparisonResult compareResult,
281 Collator::EComparisonResult keyResult,
282 Collator::EComparisonResult incResult,
283 Collator::EComparisonResult expectedResult )
284 {
285 if (expectedResult < -1 || expectedResult > 1)
286 {
287 errln("***** invalid call to reportCResult ****");
288 return;
289 }
290
291 UBool ok1 = (compareResult == expectedResult);
292 UBool ok2 = (keyResult == expectedResult);
293 UBool ok3 = (incResult == expectedResult);
294
295
296 if (ok1 && ok2 && ok3 && !verbose) {
297 // Keep non-verbose, passing tests fast
298 return;
299 } else {
300 UnicodeString msg1(ok1 ? "Ok: compare(" : "FAIL: compare(");
301 UnicodeString msg2(", "), msg3(") returned "), msg4("; expected ");
302 UnicodeString prettySource, prettyTarget, sExpect, sResult;
303
304 IntlTest::prettify(source, prettySource);
305 IntlTest::prettify(target, prettyTarget);
306 appendCompareResult(compareResult, sResult);
307 appendCompareResult(expectedResult, sExpect);
308
309 if (ok1) {
310 logln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult);
311 } else {
312 errln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult + msg4 + sExpect);
313 }
314
315 msg1 = UnicodeString(ok2 ? "Ok: key(" : "FAIL: key(");
316 msg2 = ").compareTo(key(";
317 msg3 = ")) returned ";
318
319 appendCompareResult(keyResult, sResult);
320
321 if (ok2) {
322 logln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult);
323 } else {
324 errln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult + msg4 + sExpect);
325
326 msg1 = " ";
327 msg2 = " vs. ";
328
329 prettify(sourceKey, prettySource);
330 prettify(targetKey, prettyTarget);
331
332 errln(msg1 + prettySource + msg2 + prettyTarget);
333 }
334 msg1 = UnicodeString (ok3 ? "Ok: incCompare(" : "FAIL: incCompare(");
335 msg2 = ", ";
336 msg3 = ") returned ";
337
338 appendCompareResult(incResult, sResult);
339
340 if (ok3) {
341 logln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult);
342 } else {
343 errln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult + msg4 + sExpect);
344 }
345 }
346 }
347
348 UnicodeString&
349 IntlTestCollator::appendCompareResult(Collator::EComparisonResult result,
350 UnicodeString& target)
351 {
352 if (result == Collator::LESS)
353 {
354 target += "LESS";
355 }
356 else if (result == Collator::EQUAL)
357 {
358 target += "EQUAL";
359 }
360 else if (result == Collator::GREATER)
361 {
362 target += "GREATER";
363 }
364 else
365 {
366 UnicodeString huh = "?";
367
368 target += (huh + (int32_t)result);
369 }
370
371 return target;
372 }
373
374 // Produce a printable representation of a CollationKey
375 UnicodeString &IntlTestCollator::prettify(const CollationKey &source, UnicodeString &target)
376 {
377 int32_t i, byteCount;
378 const uint8_t *bytes = source.getByteArray(byteCount);
379
380 target.remove();
381 target += "[";
382
383 for (i = 0; i < byteCount; i += 1)
384 {
385 appendHex(bytes[i], 2, target);
386 target += " ";
387 }
388
389 target += "]";
390
391 return target;
392 }
393
394 void IntlTestCollator::backAndForth(CollationElementIterator &iter)
395 {
396 // Run through the iterator forwards and stick it into an array
397 int32_t orderLength = 0;
398 int32_t *orders = getOrders(iter, orderLength);
399 UErrorCode status = U_ZERO_ERROR;
400
401 // Now go through it backwards and make sure we get the same values
402 int32_t index = orderLength;
403 int32_t o;
404
405 // reset the iterator
406 iter.reset();
407
408 while ((o = iter.previous(status)) != CollationElementIterator::NULLORDER)
409 {
410 if (index == 0) {
411 if(o == 0) {
412 continue;
413 } else { // this is an error, orders exhausted but there are non-ignorable CEs from
414 // going backwards
415 errln("Backward iteration returned a non ignorable after orders are exhausted");
416 break;
417 }
418 }
419 if (o != orders[--index])
420 {
421 if (o == 0)
422 index ++;
423 else
424 {
425 while (index > 0 && orders[--index] == 0)
426 {
427 }
428 if (o != orders[index])
429 {
430 errln("Mismatch at index %d: 0x%X vs 0x%X", index,
431 orders[index], o);
432 break;
433 }
434 }
435 }
436 }
437
438 while (index != 0 && orders[index - 1] == 0)
439 {
440 index --;
441 }
442
443 if (index != 0)
444 {
445 UnicodeString msg("Didn't get back to beginning - index is ");
446 errln(msg + index);
447
448 iter.reset();
449 err("next: ");
450 while ((o = iter.next(status)) != CollationElementIterator::NULLORDER)
451 {
452 UnicodeString hexString("0x");
453
454 appendHex(o, 8, hexString);
455 hexString += " ";
456 err(hexString);
457 }
458 errln("");
459
460 err("prev: ");
461 while ((o = iter.previous(status)) != CollationElementIterator::NULLORDER)
462 {
463 UnicodeString hexString("0x");
464
465 appendHex(o, 8, hexString);
466 hexString += " ";
467 err(hexString);
468 }
469 errln("");
470 }
471
472 delete[] orders;
473 }
474
475
476 /**
477 * Return an integer array containing all of the collation orders
478 * returned by calls to next on the specified iterator
479 */
480 int32_t *IntlTestCollator::getOrders(CollationElementIterator &iter, int32_t &orderLength)
481 {
482 int32_t maxSize = 100;
483 int32_t size = 0;
484 int32_t *orders = new int32_t[maxSize];
485 UErrorCode status = U_ZERO_ERROR;
486
487 int32_t order;
488 while ((order = iter.next(status)) != CollationElementIterator::NULLORDER)
489 {
490 if (size == maxSize)
491 {
492 maxSize *= 2;
493 int32_t *temp = new int32_t[maxSize];
494
495 uprv_memcpy(temp, orders, size * sizeof(int32_t));
496 delete[] orders;
497 orders = temp;
498 }
499
500 orders[size++] = order;
501 }
502
503 if (maxSize > size)
504 {
505 int32_t *temp = new int32_t[size];
506
507 uprv_memcpy(temp, orders, size * sizeof(int32_t));
508 delete[] orders;
509 orders = temp;
510 }
511
512 orderLength = size;
513 return orders;
514 }
515
516 #endif /* #if !UCONFIG_NO_COLLATION */