]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/intltest/tscoll.cpp
ICU-511.32.tar.gz
[apple/icu.git] / icuSources / test / intltest / tscoll.cpp
1 /********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2011, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6
7 /**
8 * IntlTestCollator is the medium level test class for everything in the directory "collate".
9 */
10
11 /***********************************************************************
12 * Modification history
13 * Date Name Description
14 * 02/14/2001 synwee Compare with cintltst and commented away tests
15 * that are not run.
16 ***********************************************************************/
17
18 #include "unicode/utypes.h"
19
20 #if !UCONFIG_NO_COLLATION
21
22 #include "unicode/localpointer.h"
23 #include "unicode/uchar.h"
24 #include "unicode/ustring.h"
25
26 #include "dadrcoll.h"
27
28 #include "encoll.h"
29 #include "frcoll.h"
30 #include "decoll.h"
31 #include "escoll.h"
32 #include "ficoll.h"
33 #include "jacoll.h"
34 #include "trcoll.h"
35 #include "allcoll.h"
36 #include "g7coll.h"
37 #include "mnkytst.h"
38 #include "apicoll.h"
39 #include "regcoll.h"
40 #include "currcoll.h"
41 #include "itercoll.h"
42 #include "tstnorm.h"
43 #include "normconf.h"
44 #include "thcoll.h"
45 #include "srchtest.h"
46 #include "ssearch.h"
47 #include "cntabcol.h"
48 #include "lcukocol.h"
49 #include "ucaconf.h"
50 #include "svccoll.h"
51 #include "cmemory.h"
52 #include "alphaindextst.h"
53 //#include "rndmcoll.h"
54
55 // Set to 1 to test offsets in backAndForth()
56 #define TEST_OFFSETS 0
57
58 #define TESTCLASS(n,classname) \
59 case n: \
60 name = #classname; \
61 if (exec) { \
62 logln(#classname "---"); \
63 logln(""); \
64 classname t; \
65 callTest(t, par); \
66 } \
67 break
68
69 void IntlTestCollator::runIndexedTest( int32_t index, UBool exec, const char* &name, char* par )
70 {
71 if (exec) {
72 logln("TestSuite Collator: ");
73 }
74
75 switch (index) {
76 TESTCLASS(0, CollationEnglishTest);
77 TESTCLASS(1, CollationFrenchTest);
78 TESTCLASS(2, CollationGermanTest);
79 TESTCLASS(3, CollationSpanishTest);
80 TESTCLASS(4, CollationKanaTest);
81 TESTCLASS(5, CollationTurkishTest);
82 TESTCLASS(6, CollationDummyTest);
83 TESTCLASS(7, G7CollationTest);
84 TESTCLASS(8, CollationMonkeyTest);
85 TESTCLASS(9, CollationAPITest);
86 TESTCLASS(10, CollationRegressionTest);
87 TESTCLASS(11, CollationCurrencyTest);
88 TESTCLASS(12, CollationIteratorTest);
89 TESTCLASS(13, CollationThaiTest);
90 TESTCLASS(14, LotusCollationKoreanTest);
91 TESTCLASS(15, StringSearchTest);
92 TESTCLASS(16, ContractionTableTest);
93 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
94 TESTCLASS(17, DataDrivenCollatorTest);
95 #endif
96 TESTCLASS(18, UCAConformanceTest);
97 TESTCLASS(19, CollationServiceTest);
98 TESTCLASS(20, CollationFinnishTest); // removed by weiv - we have changed Finnish collation
99 //TESTCLASS(21, RandomCollatorTest); // See ticket 5747 about reenabling this test.
100 TESTCLASS(21, SSearchTest);
101 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_NORMALIZATION
102 TESTCLASS(22, AlphabeticIndexTest);
103 #endif
104
105 default: name = ""; break;
106 }
107 }
108
109 UCollationResult
110 IntlTestCollator::compareUsingPartials(UCollator *coll, const UChar source[], int32_t sLen, const UChar target[], int32_t tLen, int32_t pieceSize, UErrorCode &status) {
111 int32_t partialSKResult = 0;
112 uint8_t sBuf[512], tBuf[512];
113 UCharIterator sIter, tIter;
114 uint32_t sState[2], tState[2];
115 int32_t sSize = pieceSize, tSize = pieceSize;
116 int32_t i = 0;
117 status = U_ZERO_ERROR;
118 sState[0] = 0; sState[1] = 0;
119 tState[0] = 0; tState[1] = 0;
120 while(sSize == pieceSize && tSize == pieceSize && partialSKResult == 0) {
121 uiter_setString(&sIter, source, sLen);
122 uiter_setString(&tIter, target, tLen);
123 sSize = ucol_nextSortKeyPart(coll, &sIter, sState, sBuf, pieceSize, &status);
124 tSize = ucol_nextSortKeyPart(coll, &tIter, tState, tBuf, pieceSize, &status);
125
126 if(sState[0] != 0 || tState[0] != 0) {
127 log("State != 0 : %08X %08X\n", sState[0], tState[0]);
128 }
129 log("%i ", i++);
130
131 partialSKResult = memcmp(sBuf, tBuf, pieceSize);
132 }
133
134 if(partialSKResult < 0) {
135 return UCOL_LESS;
136 } else if(partialSKResult > 0) {
137 return UCOL_GREATER;
138 } else {
139 return UCOL_EQUAL;
140 }
141 }
142
143 void
144 IntlTestCollator::doTestVariant(Collator* col, const UnicodeString &source, const UnicodeString &target, Collator::EComparisonResult result)
145 {
146 UErrorCode status = U_ZERO_ERROR;
147
148 UCollator *myCollation = (UCollator *)((RuleBasedCollator *)col)->getUCollator();
149
150 Collator::EComparisonResult compareResult = col->compare(source, target);
151
152 CollationKey srckey, tgtkey;
153 col->getCollationKey(source, srckey, status);
154 col->getCollationKey(target, tgtkey, status);
155 if (U_FAILURE(status)){
156 errln("Creation of collation keys failed\n");
157 }
158 Collator::EComparisonResult keyResult = srckey.compareTo(tgtkey);
159
160 reportCResult(source, target, srckey, tgtkey, compareResult, keyResult, result, result);
161
162 UColAttributeValue norm = ucol_getAttribute(myCollation, UCOL_NORMALIZATION_MODE, &status);
163
164 int32_t sLen = source.length(), tLen = target.length();
165 const UChar* src = source.getBuffer();
166 const UChar* trg = target.getBuffer();
167 UCollationResult compareResultIter = (UCollationResult)result;
168
169 {
170 UCharIterator sIter, tIter;
171 uiter_setString(&sIter, src, sLen);
172 uiter_setString(&tIter, trg, tLen);
173 compareResultIter = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
174 if(compareResultIter != (UCollationResult)result) {
175 errln("Different result for iterative comparison "+source+" "+target);
176 }
177 }
178 /* convert the strings to UTF-8 and do try comparing with char iterator */
179 if(!quick) { /*!QUICK*/
180 char utf8Source[256], utf8Target[256];
181 int32_t utf8SourceLen = 0, utf8TargetLen = 0;
182 u_strToUTF8(utf8Source, 256, &utf8SourceLen, src, sLen, &status);
183 if(U_FAILURE(status)) { /* probably buffer is not big enough */
184 log("Src UTF-8 buffer too small! Will not compare!\n");
185 } else {
186 u_strToUTF8(utf8Target, 256, &utf8TargetLen, trg, tLen, &status);
187 if(U_SUCCESS(status)) { /* probably buffer is not big enough */
188 UCollationResult compareResultUTF8 = (UCollationResult)result, compareResultUTF8Norm = (UCollationResult)result;
189 UCharIterator sIter, tIter;
190 /*log_verbose("Strings converted to UTF-8:%s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));*/
191 uiter_setUTF8(&sIter, utf8Source, utf8SourceLen);
192 uiter_setUTF8(&tIter, utf8Target, utf8TargetLen);
193 /*uiter_setString(&sIter, source, sLen);
194 uiter_setString(&tIter, target, tLen);*/
195 compareResultUTF8 = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
196 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
197 sIter.move(&sIter, 0, UITER_START);
198 tIter.move(&tIter, 0, UITER_START);
199 compareResultUTF8Norm = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
200 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status);
201 if(compareResultUTF8 != compareResultIter) {
202 errln("different results in iterative comparison for UTF-16 and UTF-8 encoded strings. "+source+", "+target);
203 }
204 if(compareResultUTF8 != compareResultUTF8Norm) {
205 errln("different results in iterative when normalization is turned on with UTF-8 strings. "+source+", "+target);
206 }
207 } else {
208 log("Target UTF-8 buffer too small! Did not compare!\n");
209 }
210 if(U_FAILURE(status)) {
211 log("UTF-8 strcoll failed! Ignoring result\n");
212 }
213 }
214 }
215
216 /* testing the partial sortkeys */
217 { /*!QUICK*/
218 int32_t partialSizes[] = { 3, 1, 2, 4, 8, 20, 80 }; /* just size 3 in the quick mode */
219 int32_t partialSizesSize = 1;
220 if(!quick) {
221 partialSizesSize = 7;
222 }
223 int32_t i = 0;
224 log("partial sortkey test piecesize=");
225 for(i = 0; i < partialSizesSize; i++) {
226 UCollationResult partialSKResult = (UCollationResult)result, partialNormalizedSKResult = (UCollationResult)result;
227 log("%i ", partialSizes[i]);
228
229 partialSKResult = compareUsingPartials(myCollation, src, sLen, trg, tLen, partialSizes[i], status);
230 if(partialSKResult != (UCollationResult)result) {
231 errln("Partial sortkey comparison returned wrong result: "+source+", "+target+" (size "+partialSizes[i]+")");
232 }
233
234 if(norm != UCOL_ON && !quick) {
235 log("N ");
236 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
237 partialNormalizedSKResult = compareUsingPartials(myCollation, src, sLen, trg, tLen, partialSizes[i], status);
238 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status);
239 if(partialSKResult != partialNormalizedSKResult) {
240 errln("Partial sortkey comparison gets different result when normalization is on: "+source+", "+target+" (size "+partialSizes[i]+")");
241 }
242 }
243 }
244 log("\n");
245 }
246 /*
247 if (compareResult != result) {
248 errln("String comparison failed in variant test\n");
249 }
250 if (keyResult != result) {
251 errln("Collation key comparison failed in variant test\n");
252 }
253 */
254 }
255
256 void
257 IntlTestCollator::doTest(Collator* col, const UChar *source, const UChar *target, Collator::EComparisonResult result) {
258 doTest(col, UnicodeString(source), UnicodeString(target), result);
259 }
260
261 void
262 IntlTestCollator::doTest(Collator* col, const UnicodeString &source, const UnicodeString &target, Collator::EComparisonResult result)
263 {
264 if(col) {
265 doTestVariant(col, source, target, result);
266 if(result == Collator::LESS) {
267 doTestVariant(col, target, source, Collator::GREATER);
268 } else if (result == Collator::GREATER) {
269 doTestVariant(col, target, source, Collator::LESS);
270 }
271
272 UErrorCode status = U_ZERO_ERROR;
273 LocalPointer<CollationElementIterator> c(((RuleBasedCollator *)col)->createCollationElementIterator(source));
274 logln("Testing iterating source: "+source);
275 backAndForth(*c);
276 c->setText(target, status);
277 logln("Testing iterating target: "+target);
278 backAndForth(*c);
279 }
280 }
281
282
283 // used for collation result reporting, defined here for convenience
284 // (maybe moved later)
285 void
286 IntlTestCollator::reportCResult( const UnicodeString &source, const UnicodeString &target,
287 CollationKey &sourceKey, CollationKey &targetKey,
288 Collator::EComparisonResult compareResult,
289 Collator::EComparisonResult keyResult,
290 Collator::EComparisonResult incResult,
291 Collator::EComparisonResult expectedResult )
292 {
293 if (expectedResult < -1 || expectedResult > 1)
294 {
295 errln("***** invalid call to reportCResult ****");
296 return;
297 }
298
299 UBool ok1 = (compareResult == expectedResult);
300 UBool ok2 = (keyResult == expectedResult);
301 UBool ok3 = (incResult == expectedResult);
302
303
304 if (ok1 && ok2 && ok3 && !verbose) {
305 // Keep non-verbose, passing tests fast
306 return;
307 } else {
308 UnicodeString msg1(ok1 ? "Ok: compare(" : "FAIL: compare(");
309 UnicodeString msg2(", "), msg3(") returned "), msg4("; expected ");
310 UnicodeString prettySource, prettyTarget, sExpect, sResult;
311
312 IntlTest::prettify(source, prettySource);
313 IntlTest::prettify(target, prettyTarget);
314 appendCompareResult(compareResult, sResult);
315 appendCompareResult(expectedResult, sExpect);
316
317 if (ok1) {
318 logln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult);
319 } else {
320 errln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult + msg4 + sExpect);
321 }
322
323 msg1 = UnicodeString(ok2 ? "Ok: key(" : "FAIL: key(");
324 msg2 = ").compareTo(key(";
325 msg3 = ")) returned ";
326
327 appendCompareResult(keyResult, sResult);
328
329 if (ok2) {
330 logln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult);
331 } else {
332 errln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult + msg4 + sExpect);
333
334 msg1 = " ";
335 msg2 = " vs. ";
336
337 prettify(sourceKey, prettySource);
338 prettify(targetKey, prettyTarget);
339
340 errln(msg1 + prettySource + msg2 + prettyTarget);
341 }
342 msg1 = UnicodeString (ok3 ? "Ok: incCompare(" : "FAIL: incCompare(");
343 msg2 = ", ";
344 msg3 = ") returned ";
345
346 appendCompareResult(incResult, sResult);
347
348 if (ok3) {
349 logln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult);
350 } else {
351 errln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult + msg4 + sExpect);
352 }
353 }
354 }
355
356 UnicodeString&
357 IntlTestCollator::appendCompareResult(Collator::EComparisonResult result,
358 UnicodeString& target)
359 {
360 if (result == Collator::LESS)
361 {
362 target += "LESS";
363 }
364 else if (result == Collator::EQUAL)
365 {
366 target += "EQUAL";
367 }
368 else if (result == Collator::GREATER)
369 {
370 target += "GREATER";
371 }
372 else
373 {
374 UnicodeString huh = "?";
375
376 target += (huh + (int32_t)result);
377 }
378
379 return target;
380 }
381
382 // Produce a printable representation of a CollationKey
383 UnicodeString &IntlTestCollator::prettify(const CollationKey &source, UnicodeString &target)
384 {
385 int32_t i, byteCount;
386 const uint8_t *bytes = source.getByteArray(byteCount);
387
388 target.remove();
389 target += "[";
390
391 for (i = 0; i < byteCount; i += 1)
392 {
393 if (i != 0) {
394 target += " ";
395 }
396 appendHex(bytes[i], 2, target);
397 }
398
399 target += "]";
400
401 return target;
402 }
403
404 void IntlTestCollator::backAndForth(CollationElementIterator &iter)
405 {
406 // Run through the iterator forwards and stick it into an array
407 int32_t orderLength = 0;
408 LocalArray<Order> orders(getOrders(iter, orderLength));
409 UErrorCode status = U_ZERO_ERROR;
410
411 // Now go through it backwards and make sure we get the same values
412 int32_t index = orderLength;
413 int32_t o;
414
415 // reset the iterator
416 iter.reset();
417
418 while ((o = iter.previous(status)) != CollationElementIterator::NULLORDER)
419 {
420 /*int32_t offset = */iter.getOffset();
421
422 if (index == 0) {
423 if(o == 0) {
424 continue;
425 } else { // this is an error, orders exhausted but there are non-ignorable CEs from
426 // going backwards
427 errln("Backward iteration returned a non ignorable after orders are exhausted");
428 break;
429 }
430 }
431
432 index -= 1;
433 if (o != orders[index].order) {
434 if (o == 0)
435 index += 1;
436 else {
437 while (index > 0 && orders[--index].order == 0) {
438 // nothing...
439 }
440
441 if (o != orders[index].order) {
442 errln("Mismatched order at index %d: 0x%0:8X vs. 0x%0:8X", index,
443 orders[index].order, o);
444 //break;
445 return;
446 }
447 }
448 }
449
450 #if TEST_OFFSETS
451 if (offset != orders[index].offset) {
452 errln("Mismatched offset at index %d: %d vs. %d", index,
453 orders[index].offset, offset);
454 //break;
455 return;
456 }
457 #endif
458
459 }
460
461 while (index != 0 && orders[index - 1].order == 0)
462 {
463 index --;
464 }
465
466 if (index != 0)
467 {
468 UnicodeString msg("Didn't get back to beginning - index is ");
469 errln(msg + index);
470
471 iter.reset();
472 err("next: ");
473 while ((o = iter.next(status)) != CollationElementIterator::NULLORDER)
474 {
475 UnicodeString hexString("0x");
476
477 appendHex(o, 8, hexString);
478 hexString += " ";
479 err(hexString);
480 }
481 errln("");
482
483 err("prev: ");
484 while ((o = iter.previous(status)) != CollationElementIterator::NULLORDER)
485 {
486 UnicodeString hexString("0x");
487
488 appendHex(o, 8, hexString);
489 hexString += " ";
490 err(hexString);
491 }
492 errln("");
493 }
494 }
495
496
497 /**
498 * Return an integer array containing all of the collation orders
499 * returned by calls to next on the specified iterator
500 */
501 IntlTestCollator::Order *IntlTestCollator::getOrders(CollationElementIterator &iter, int32_t &orderLength)
502 {
503 int32_t maxSize = 100;
504 int32_t size = 0;
505 LocalArray<Order> orders(new Order[maxSize]);
506 UErrorCode status = U_ZERO_ERROR;
507 int32_t offset = iter.getOffset();
508
509 int32_t order;
510 while ((order = iter.next(status)) != CollationElementIterator::NULLORDER)
511 {
512 if (size == maxSize)
513 {
514 maxSize *= 2;
515 Order *temp = new Order[maxSize];
516
517 uprv_memcpy(temp, orders.getAlias(), size * sizeof(Order));
518 orders.adoptInstead(temp);
519 }
520
521 orders[size].order = order;
522 orders[size].offset = offset;
523
524 offset = iter.getOffset();
525 size += 1;
526 }
527
528 if (maxSize > size)
529 {
530 Order *temp = new Order[size];
531
532 uprv_memcpy(temp, orders.getAlias(), size * sizeof(Order));
533 orders.adoptInstead(temp);
534 }
535
536 orderLength = size;
537 return orders.orphan();
538 }
539
540 #endif /* #if !UCONFIG_NO_COLLATION */