]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/cintltst/callcoll.c
ICU-6.2.22.tar.gz
[apple/icu.git] / icuSources / test / cintltst / callcoll.c
1 /********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2004, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /********************************************************************************
7 *
8 * File CALLCOLL.C
9 *
10 * Modification History:
11 * Name Description
12 * Madhu Katragadda Ported for C API
13 *********************************************************************************
14 */
15
16 /*
17 * Important: This file is included into intltest/allcoll.cpp so that the
18 * test data is shared. This makes it easier to maintain the test data,
19 * especially since the Unicode data must be portable and quoted character
20 * literals will not work.
21 * If it is included, then there will be a #define INCLUDE_CALLCOLL_C
22 * that must prevent the actual code in here from being part of the
23 * allcoll.cpp compilation.
24 */
25
26 /**
27 * CollationDummyTest is a third level test class. This tests creation of
28 * a customized collator object. For example, number 1 to be sorted
29 * equlivalent to word 'one'.
30 */
31
32 #include <string.h>
33 #include <stdlib.h>
34
35 #include "unicode/utypes.h"
36
37 #if !UCONFIG_NO_COLLATION
38
39 #include "unicode/ucol.h"
40 #include "unicode/uloc.h"
41 #include "unicode/ucoleitr.h"
42 #include "unicode/ustring.h"
43 #include "unicode/uclean.h"
44 #include "unicode/putil.h"
45
46 #include "cintltst.h"
47 #include "ccolltst.h"
48 #include "callcoll.h"
49 #include "calldata.h"
50 #include "cstring.h"
51 #include "cmemory.h"
52 #include "ucol_imp.h"
53
54 /* perform test with strength PRIMARY */
55 static void TestPrimary(void);
56
57 /* perform test with strength SECONDARY */
58 static void TestSecondary(void);
59
60 /* perform test with strength tertiary */
61 static void TestTertiary(void);
62
63 /*perform tests with strength Identical */
64 static void TestIdentical(void);
65
66 /* perform extra tests */
67 static void TestExtra(void);
68
69 /* Test jitterbug 581 */
70 static void TestJB581(void);
71
72 /* Test jitterbug 1401 */
73 static void TestJB1401(void);
74
75 /* Test [variable top] in the rule syntax */
76 static void TestVariableTop(void);
77
78 /* Test surrogates */
79 static void TestSurrogates(void);
80
81 static void TestInvalidRules(void);
82
83 static void TestJitterbug1098(void);
84
85 static void TestFCDCrash(void);
86
87 const UCollationResult results[] = {
88 UCOL_LESS,
89 UCOL_LESS, /*UCOL_GREATER,*/
90 UCOL_LESS,
91 UCOL_LESS,
92 UCOL_LESS,
93 UCOL_LESS,
94 UCOL_LESS,
95 UCOL_GREATER,
96 UCOL_GREATER,
97 UCOL_LESS, /* 10 */
98 UCOL_GREATER,
99 UCOL_LESS,
100 UCOL_GREATER,
101 UCOL_GREATER,
102 UCOL_LESS,
103 UCOL_LESS,
104 UCOL_LESS,
105 /* test primary > 17 */
106 UCOL_EQUAL,
107 UCOL_EQUAL,
108 UCOL_EQUAL, /* 20 */
109 UCOL_LESS,
110 UCOL_LESS,
111 UCOL_EQUAL,
112 UCOL_EQUAL,
113 UCOL_EQUAL,
114 UCOL_LESS,
115 /* test secondary > 26 */
116 UCOL_EQUAL,
117 UCOL_EQUAL,
118 UCOL_EQUAL,
119 UCOL_EQUAL,
120 UCOL_EQUAL, /* 30 */
121 UCOL_EQUAL,
122 UCOL_LESS,
123 UCOL_EQUAL, /* 34 */
124 UCOL_EQUAL,
125 UCOL_EQUAL,
126 UCOL_LESS /* 37 */
127 };
128
129
130 void addAllCollTest(TestNode** root)
131 {
132
133
134 addTest(root, &TestPrimary, "tscoll/callcoll/TestPrimary");
135 addTest(root, &TestSecondary, "tscoll/callcoll/TestSecondary");
136 addTest(root, &TestTertiary, "tscoll/callcoll/TestTertiary");
137 addTest(root, &TestIdentical, "tscoll/callcoll/TestIdentical");
138 addTest(root, &TestExtra, "tscoll/callcoll/TestExtra");
139 addTest(root, &TestJB581, "tscoll/callcoll/TestJB581");
140 addTest(root, &TestVariableTop, "tscoll/callcoll/TestVariableTop");
141 addTest(root, &TestSurrogates, "tscoll/callcoll/TestSurrogates");
142 addTest(root, &TestInvalidRules, "tscoll/callcoll/TestInvalidRules");
143 addTest(root, &TestJB1401, "tscoll/callcoll/TestJB1401");
144 addTest(root, &TestJitterbug1098, "tscoll/callcoll/TestJitterbug1098");
145 addTest(root, &TestFCDCrash, "tscoll/callcoll/TestFCDCrash");
146
147 }
148
149 UBool hasCollationElements(const char *locName) {
150
151 UErrorCode status = U_ZERO_ERROR;
152 UResourceBundle *ColEl = NULL;
153
154 UResourceBundle *loc = ures_open(NULL, locName, &status);;
155
156 if(U_SUCCESS(status)) {
157 status = U_ZERO_ERROR;
158 ColEl = ures_getByKey(loc, "collations", ColEl, &status);
159 if(status == U_ZERO_ERROR) { /* do the test - there are real elements */
160 ures_close(ColEl);
161 ures_close(loc);
162 return TRUE;
163 }
164 ures_close(ColEl);
165 ures_close(loc);
166 }
167 return FALSE;
168 }
169
170 static UCollationResult compareUsingPartials(UCollator *coll, const UChar source[], int32_t sLen, const UChar target[], int32_t tLen, int32_t pieceSize, UErrorCode *status) {
171 int32_t partialSKResult = 0;
172 UCharIterator sIter, tIter;
173 uint32_t sState[2], tState[2];
174 int32_t sSize = pieceSize, tSize = pieceSize;
175 /*int32_t i = 0;*/
176 uint8_t sBuf[16384], tBuf[16384];
177 if(pieceSize > 16384) {
178 log_err("Partial sortkey size buffer too small. Please consider increasing the buffer!\n");
179 *status = U_BUFFER_OVERFLOW_ERROR;
180 return UCOL_EQUAL;
181 }
182 *status = U_ZERO_ERROR;
183 sState[0] = 0; sState[1] = 0;
184 tState[0] = 0; tState[1] = 0;
185 while(sSize == pieceSize && tSize == pieceSize && partialSKResult == 0) {
186 uiter_setString(&sIter, source, sLen);
187 uiter_setString(&tIter, target, tLen);
188 sSize = ucol_nextSortKeyPart(coll, &sIter, sState, sBuf, pieceSize, status);
189 tSize = ucol_nextSortKeyPart(coll, &tIter, tState, tBuf, pieceSize, status);
190
191 if(sState[0] != 0 || tState[0] != 0) {
192 /*log_verbose("State != 0 : %08X %08X\n", sState[0], tState[0]);*/
193 }
194 /*log_verbose("%i ", i++);*/
195
196 partialSKResult = memcmp(sBuf, tBuf, pieceSize);
197 }
198
199 if(partialSKResult < 0) {
200 return UCOL_LESS;
201 } else if(partialSKResult > 0) {
202 return UCOL_GREATER;
203 } else {
204 return UCOL_EQUAL;
205 }
206 }
207
208 static void doTestVariant(UCollator* myCollation, const UChar source[], const UChar target[], UCollationResult result)
209 {
210 int32_t sortklen1, sortklen2, sortklenmax, sortklenmin;
211 int temp=0, gSortklen1=0,gSortklen2=0;
212 UCollationResult compareResult, compareResulta, keyResult, compareResultIter = result;
213 uint8_t *sortKey1, *sortKey2, *sortKey1a, *sortKey2a;
214 uint32_t sLen = u_strlen(source);
215 uint32_t tLen = u_strlen(target);
216 char buffer[256];
217 uint32_t len;
218 UErrorCode status = U_ZERO_ERROR;
219 UColAttributeValue norm = ucol_getAttribute(myCollation, UCOL_NORMALIZATION_MODE, &status);
220
221 UCharIterator sIter, tIter;
222 uiter_setString(&sIter, source, sLen);
223 uiter_setString(&tIter, target, tLen);
224 compareResultIter = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
225 if(compareResultIter != result) {
226 log_err("different results in iterative comparison for UTF-16 encoded strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));
227 }
228
229 /* convert the strings to UTF-8 and do try comparing with char iterator */
230 if(QUICK <= 0) { /*!QUICK*/
231 char utf8Source[256], utf8Target[256];
232 int32_t utf8SourceLen = 0, utf8TargetLen = 0;
233 u_strToUTF8(utf8Source, 256, &utf8SourceLen, source, sLen, &status);
234 if(U_FAILURE(status)) { /* probably buffer is not big enough */
235 log_verbose("Src UTF-8 buffer too small! Will not compare!\n");
236 } else {
237 u_strToUTF8(utf8Target, 256, &utf8TargetLen, target, tLen, &status);
238 if(U_SUCCESS(status)) { /* probably buffer is not big enough */
239 UCollationResult compareResultUTF8 = result, compareResultUTF8Norm = result;
240 /*UCharIterator sIter, tIter;*/
241 /*log_verbose("Strings converted to UTF-8:%s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));*/
242 uiter_setUTF8(&sIter, utf8Source, utf8SourceLen);
243 uiter_setUTF8(&tIter, utf8Target, utf8TargetLen);
244 /*uiter_setString(&sIter, source, sLen);
245 uiter_setString(&tIter, target, tLen);*/
246 compareResultUTF8 = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
247 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
248 sIter.move(&sIter, 0, UITER_START);
249 tIter.move(&tIter, 0, UITER_START);
250 compareResultUTF8Norm = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
251 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status);
252 if(compareResultUTF8 != compareResultIter) {
253 log_err("different results in iterative comparison for UTF-16 and UTF-8 encoded strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));
254 }
255 if(compareResultUTF8 != compareResultUTF8Norm) {
256 log_err("different results in iterative when normalization is turned on with UTF-8 strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));
257 }
258 } else {
259 log_verbose("Target UTF-8 buffer too small! Did not compare!\n");
260 }
261 if(U_FAILURE(status)) {
262 log_verbose("UTF-8 strcoll failed! Ignoring result\n");
263 }
264 }
265 }
266
267 /* testing the partial sortkeys */
268 if(1) { /*!QUICK*/
269 int32_t i = 0;
270 int32_t partialSizes[] = { 3, 1, 2, 4, 8, 20, 80 }; /* just size 3 in the quick mode */
271 int32_t partialSizesSize = 1;
272 if(QUICK <= 0) {
273 partialSizesSize = 7;
274 }
275 /*log_verbose("partial sortkey test piecesize=");*/
276 for(i = 0; i < partialSizesSize; i++) {
277 UCollationResult partialSKResult = result, partialNormalizedSKResult = result;
278 /*log_verbose("%i ", partialSizes[i]);*/
279
280 partialSKResult = compareUsingPartials(myCollation, source, sLen, target, tLen, partialSizes[i], &status);
281 if(partialSKResult != result) {
282 log_err("Partial sortkey comparison returned wrong result: %s, %s (size %i)\n",
283 aescstrdup(source,-1), aescstrdup(target,-1), partialSizes[i]);
284 }
285
286 if(QUICK <= 0 && norm != UCOL_ON) {
287 /*log_verbose("N ");*/
288 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
289 partialNormalizedSKResult = compareUsingPartials(myCollation, source, sLen, target, tLen, partialSizes[i], &status);
290 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status);
291 if(partialSKResult != partialNormalizedSKResult) {
292 log_err("Partial sortkey comparison gets different result when normalization is on: %s, %s (size %i)\n",
293 aescstrdup(source,-1), aescstrdup(target,-1), partialSizes[i]);
294 }
295 }
296 }
297 /*log_verbose("\n");*/
298 }
299
300
301 compareResult = ucol_strcoll(myCollation, source, sLen, target, tLen);
302 compareResulta = ucol_strcoll(myCollation, source, -1, target, -1);
303 if (compareResult != compareResulta) {
304 log_err("ucol_strcoll result from null terminated and explicit length strings differs.\n");
305 }
306
307 sortklen1=ucol_getSortKey(myCollation, source, sLen, NULL, 0);
308 sortklen2=ucol_getSortKey(myCollation, target, tLen, NULL, 0);
309
310 sortklenmax = (sortklen1>sortklen2?sortklen1:sortklen2);
311 sortklenmin = (sortklen1<sortklen2?sortklen1:sortklen2);
312
313 sortKey1 =(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
314 sortKey1a=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
315 ucol_getSortKey(myCollation, source, sLen, sortKey1, sortklen1+1);
316 ucol_getSortKey(myCollation, source, -1, sortKey1a, sortklen1+1);
317
318 sortKey2 =(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
319 sortKey2a=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
320 ucol_getSortKey(myCollation, target, tLen, sortKey2, sortklen2+1);
321 ucol_getSortKey(myCollation, target, -1, sortKey2a, sortklen2+1);
322
323 /* Check that sort key generated with null terminated string is identical */
324 /* to that generted with a length specified. */
325 if (uprv_strcmp((const char *)sortKey1, (const char *)sortKey1a) != 0 ||
326 uprv_strcmp((const char *)sortKey2, (const char *)sortKey2a) != 0 ) {
327 log_err("Sort Keys from null terminated and explicit length strings differ.\n");
328 }
329
330 /*memcmp(sortKey1, sortKey2,sortklenmax);*/
331 temp= uprv_strcmp((const char *)sortKey1, (const char *)sortKey2);
332 gSortklen1 = uprv_strlen((const char *)sortKey1)+1;
333 gSortklen2 = uprv_strlen((const char *)sortKey2)+1;
334 if(sortklen1 != gSortklen1){
335 log_err("SortKey length does not match Expected: %i Got: %i\n",sortklen1, gSortklen1);
336 log_verbose("Generated sortkey: %s\n", ucol_sortKeyToString(myCollation, sortKey1, buffer, &len));
337 }
338 if(sortklen2!= gSortklen2){
339 log_err("SortKey length does not match Expected: %i Got: %i\n", sortklen2, gSortklen2);
340 log_verbose("Generated sortkey: %s\n", ucol_sortKeyToString(myCollation, sortKey2, buffer, &len));
341 }
342
343 if(temp < 0) {
344 keyResult=UCOL_LESS;
345 }
346 else if(temp > 0) {
347 keyResult= UCOL_GREATER;
348 }
349 else {
350 keyResult = UCOL_EQUAL;
351 }
352 reportCResult( source, target, sortKey1, sortKey2, compareResult, keyResult, compareResultIter, result );
353 free(sortKey1);
354 free(sortKey2);
355 free(sortKey1a);
356 free(sortKey2a);
357
358 }
359
360 void doTest(UCollator* myCollation, const UChar source[], const UChar target[], UCollationResult result)
361 {
362 if(myCollation) {
363 doTestVariant(myCollation, source, target, result);
364 if(result == UCOL_LESS) {
365 doTestVariant(myCollation, target, source, UCOL_GREATER);
366 } else if(result == UCOL_GREATER) {
367 doTestVariant(myCollation, target, source, UCOL_LESS);
368 } else {
369 doTestVariant(myCollation, target, source, UCOL_EQUAL);
370 }
371 } else {
372 log_data_err("No collator! Any data around?\n");
373 }
374 }
375
376
377 /**
378 * Return an integer array containing all of the collation orders
379 * returned by calls to next on the specified iterator
380 */
381 int32_t* getOrders(UCollationElements *iter, int32_t *orderLength)
382 {
383 UErrorCode status;
384 int32_t order;
385 int32_t maxSize = 100;
386 int32_t size = 0;
387 int32_t *temp;
388 int32_t *orders =(int32_t*)malloc(sizeof(int32_t) * maxSize);
389 status= U_ZERO_ERROR;
390
391
392 while ((order=ucol_next(iter, &status)) != UCOL_NULLORDER)
393 {
394 if (size == maxSize)
395 {
396 maxSize *= 2;
397 temp = (int32_t*)malloc(sizeof(int32_t) * maxSize);
398
399 memcpy(temp, orders, size * sizeof(int32_t));
400 free(orders);
401 orders = temp;
402
403 }
404
405 orders[size++] = order;
406 }
407
408 if (maxSize > size && size > 0)
409 {
410 temp = (int32_t*)malloc(sizeof(int32_t) * size);
411
412 memcpy(temp, orders, size * sizeof(int32_t));
413 free(orders);
414 orders = temp;
415
416
417 }
418
419 *orderLength = size;
420 return orders;
421 }
422
423
424 void
425 backAndForth(UCollationElements *iter)
426 {
427 /* Run through the iterator forwards and stick it into an array */
428 int32_t index, o;
429 UErrorCode status = U_ZERO_ERROR;
430 int32_t orderLength = 0;
431 int32_t *orders;
432 orders= getOrders(iter, &orderLength);
433
434
435 /* Now go through it backwards and make sure we get the same values */
436 index = orderLength;
437 ucol_reset(iter);
438
439 /* synwee : changed */
440 while ((o = ucol_previous(iter, &status)) != UCOL_NULLORDER)
441 {
442 if (o != orders[-- index])
443 {
444 if (o == 0)
445 index ++;
446 else
447 {
448 while (index > 0 && orders[-- index] == 0)
449 {
450 }
451 if (o != orders[index])
452 {
453 log_err("Mismatch at index : 0x%x\n", index);
454 return;
455 }
456
457 }
458 }
459 }
460
461 while (index != 0 && orders[index - 1] == 0) {
462 index --;
463 }
464
465 if (index != 0)
466 {
467 log_err("Didn't get back to beginning - index is %d\n", index);
468
469 ucol_reset(iter);
470 log_err("\nnext: ");
471 if ((o = ucol_next(iter, &status)) != UCOL_NULLORDER)
472 {
473 log_err("Error at %x\n", o);
474 }
475 log_err("\nprev: ");
476 if ((o = ucol_previous(iter, &status)) != UCOL_NULLORDER)
477 {
478 log_err("Error at %x\n", o);
479 }
480 log_verbose("\n");
481 }
482
483 free(orders);
484 }
485
486 void genericOrderingTestWithResult(UCollator *coll, const char *s[], uint32_t size, UCollationResult result) {
487 UChar t1[2048] = {0};
488 UChar t2[2048] = {0};
489 UCollationElements *iter;
490 UErrorCode status = U_ZERO_ERROR;
491
492 uint32_t i = 0, j = 0;
493 log_verbose("testing sequence:\n");
494 for(i = 0; i < size; i++) {
495 log_verbose("%s\n", s[i]);
496 }
497
498 iter = ucol_openElements(coll, t1, u_strlen(t1), &status);
499 if (U_FAILURE(status)) {
500 log_err("Creation of iterator failed\n");
501 }
502 for(i = 0; i < size-1; i++) {
503 for(j = i+1; j < size; j++) {
504 u_unescape(s[i], t1, 2048);
505 u_unescape(s[j], t2, 2048);
506 doTest(coll, t1, t2, result);
507 /* synwee : added collation element iterator test */
508 ucol_setText(iter, t1, u_strlen(t1), &status);
509 backAndForth(iter);
510 ucol_setText(iter, t2, u_strlen(t2), &status);
511 backAndForth(iter);
512 }
513 }
514 ucol_closeElements(iter);
515 }
516
517 void genericOrderingTest(UCollator *coll, const char *s[], uint32_t size) {
518 genericOrderingTestWithResult(coll, s, size, UCOL_LESS);
519 }
520
521 void genericLocaleStarter(const char *locale, const char *s[], uint32_t size) {
522 UErrorCode status = U_ZERO_ERROR;
523 UCollator *coll = ucol_open(locale, &status);
524
525 log_verbose("Locale starter for %s\n", locale);
526
527 if(U_SUCCESS(status)) {
528 genericOrderingTest(coll, s, size);
529 } else if(status == U_FILE_ACCESS_ERROR) {
530 log_data_err("Is your data around?\n");
531 return;
532 } else {
533 log_err("Unable to open collator for locale %s\n", locale);
534 }
535 ucol_close(coll);
536 }
537
538 void genericLocaleStarterWithResult(const char *locale, const char *s[], uint32_t size, UCollationResult result) {
539 UErrorCode status = U_ZERO_ERROR;
540 UCollator *coll = ucol_open(locale, &status);
541
542 log_verbose("Locale starter for %s\n", locale);
543
544 if(U_SUCCESS(status)) {
545 genericOrderingTestWithResult(coll, s, size, result);
546 } else if(status == U_FILE_ACCESS_ERROR) {
547 log_data_err("Is your data around?\n");
548 return;
549 } else {
550 log_err("Unable to open collator for locale %s\n", locale);
551 }
552 ucol_close(coll);
553 }
554
555 #if 0
556 /* currently not used with options */
557 void genericRulesStarterWithOptions(const char *rules, const char *s[], uint32_t size, const UColAttribute *attrs, const UColAttributeValue *values, uint32_t attsize) {
558 UErrorCode status = U_ZERO_ERROR;
559 UChar rlz[RULE_BUFFER_LEN] = { 0 };
560 uint32_t rlen = u_unescape(rules, rlz, RULE_BUFFER_LEN);
561 uint32_t i;
562
563 UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
564
565 log_verbose("Rules starter for %s\n", rules);
566
567 if(U_SUCCESS(status)) {
568 log_verbose("Setting attributes\n");
569 for(i = 0; i < attsize; i++) {
570 ucol_setAttribute(coll, attrs[i], values[i], &status);
571 }
572
573 genericOrderingTest(coll, s, size);
574 } else {
575 log_err("Unable to open collator with rules %s\n", rules);
576 }
577 ucol_close(coll);
578 }
579 #endif
580
581 void genericLocaleStarterWithOptions(const char *locale, const char *s[], uint32_t size, const UColAttribute *attrs, const UColAttributeValue *values, uint32_t attsize) {
582 UErrorCode status = U_ZERO_ERROR;
583 uint32_t i;
584
585 UCollator *coll = ucol_open(locale, &status);
586
587 log_verbose("Locale starter for %s\n", locale);
588
589 if(U_SUCCESS(status)) {
590
591 log_verbose("Setting attributes\n");
592 for(i = 0; i < attsize; i++) {
593 ucol_setAttribute(coll, attrs[i], values[i], &status);
594 }
595
596 genericOrderingTest(coll, s, size);
597 } else {
598 log_err("Unable to open collator for locale %s\n", locale);
599 }
600 ucol_close(coll);
601 }
602
603 void genericRulesTestWithResult(const char *rules, const char *s[], uint32_t size, UCollationResult result) {
604 UErrorCode status = U_ZERO_ERROR;
605 UChar rlz[RULE_BUFFER_LEN] = { 0 };
606 uint32_t rlen = u_unescape(rules, rlz, RULE_BUFFER_LEN);
607
608 UCollator *coll = NULL;
609 coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
610 log_verbose("Rules starter for %s\n", rules);
611
612 if(U_SUCCESS(status)) {
613 genericOrderingTestWithResult(coll, s, size, result);
614 ucol_close(coll);
615 } else if(status == U_FILE_ACCESS_ERROR) {
616 log_data_err("Is your data around?\n");
617 } else {
618 log_err("Unable to open collator with rules %s\n", rules);
619 }
620 }
621
622 void genericRulesStarter(const char *rules, const char *s[], uint32_t size) {
623 genericRulesTestWithResult(rules, s, size, UCOL_LESS);
624 }
625
626 static void TestTertiary()
627 {
628 int32_t len,i;
629 UChar *rules;
630 UCollator *myCollation;
631 UErrorCode status=U_ZERO_ERROR;
632 const char* str="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
633 len = strlen(str);
634 rules=(UChar*)malloc(sizeof(UChar*) * (len+1));
635 u_uastrcpy(rules, str);
636
637 myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
638 if(U_FAILURE(status)){
639 log_err("ERROR: in creation of rule based collator :%s\n", myErrorName(status));
640 }
641
642 ucol_setStrength(myCollation, UCOL_TERTIARY);
643 for (i = 0; i < 17 ; i++)
644 {
645 doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
646 }
647 free(rules);
648 ucol_close(myCollation);
649 myCollation = 0;
650 }
651
652 static void TestPrimary( )
653 {
654 int32_t len,i;
655 UChar *rules;
656 UCollator *myCollation;
657 UErrorCode status=U_ZERO_ERROR;
658 const char* str="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
659 len = strlen(str);
660 rules=(UChar*)malloc(sizeof(UChar*) * (len+1));
661 u_uastrcpy(rules, str);
662
663 myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
664 if(U_FAILURE(status)){
665 log_err("ERROR: in creation of rule based collator :%s\n", myErrorName(status));
666 }
667 ucol_setStrength(myCollation, UCOL_PRIMARY);
668
669 for (i = 17; i < 26 ; i++)
670 {
671
672 doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
673 }
674 free(rules);
675 ucol_close(myCollation);
676 myCollation = 0;
677 }
678
679 static void TestSecondary()
680 {
681 int32_t i;
682 int32_t len;
683 UChar *rules;
684 UCollator *myCollation;
685 UErrorCode status=U_ZERO_ERROR;
686 const char* str="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
687 len = strlen(str);
688 rules=(UChar*)malloc(sizeof(UChar*) * (len+1));
689 u_uastrcpy(rules, str);
690
691 myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
692 if(U_FAILURE(status)){
693 log_err("ERROR: in creation of rule based collator :%s\n", myErrorName(status));
694 }
695 ucol_setStrength(myCollation, UCOL_SECONDARY);
696 for (i = 26; i < 34 ; i++)
697 {
698 doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
699 }
700 free(rules);
701 ucol_close(myCollation);
702 myCollation = 0;
703 }
704
705 static void TestIdentical()
706 {
707 int32_t i;
708 int32_t len;
709 UChar *rules = 0;
710 UCollator *myCollation;
711 UErrorCode status=U_ZERO_ERROR;
712 const char* str="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
713 len = strlen(str);
714 rules=(UChar*)malloc(sizeof(UChar*) * (len+1));
715 u_uastrcpy(rules, str);
716
717 myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_IDENTICAL, NULL,&status);
718 if(U_FAILURE(status)){
719 log_err("ERROR: in creation of rule based collator :%s\n", myErrorName(status));
720 }
721 for(i= 34; i<37; i++)
722 {
723 doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
724 }
725 free(rules);
726 ucol_close(myCollation);
727 myCollation = 0;
728 }
729
730 static void TestExtra()
731 {
732 int32_t i, j;
733 int32_t len;
734 UChar *rules;
735 UCollator *myCollation;
736 UErrorCode status = U_ZERO_ERROR;
737 const char* str="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
738 len = strlen(str);
739 rules=(UChar*)malloc(sizeof(UChar*) * (len+1));
740 u_uastrcpy(rules, str);
741
742 myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
743 if(U_FAILURE(status)){
744 log_err("ERROR: in creation of rule based collator :%s\n", myErrorName(status));
745 }
746 ucol_setStrength(myCollation, UCOL_TERTIARY);
747 for (i = 0; i < COUNT_TEST_CASES-1 ; i++)
748 {
749 for (j = i + 1; j < COUNT_TEST_CASES; j += 1)
750 {
751
752 doTest(myCollation, testCases[i], testCases[j], UCOL_LESS);
753 }
754 }
755 free(rules);
756 ucol_close(myCollation);
757 myCollation = 0;
758 }
759
760 static void TestJB581(void)
761 {
762 UChar dispName [100];
763 int32_t bufferLen = 0;
764 UChar source [100];
765 UChar target [100];
766 UCollationResult result = UCOL_EQUAL;
767 uint8_t sourceKeyArray [100];
768 uint8_t targetKeyArray [100];
769 int32_t sourceKeyOut = 0,
770 targetKeyOut = 0;
771 UCollator *myCollator = 0;
772 UErrorCode status = U_ZERO_ERROR;
773
774 /*u_uastrcpy(source, "This is a test.");*/
775 /*u_uastrcpy(target, "THISISATEST.");*/
776 u_uastrcpy(source, "THISISATEST.");
777 u_uastrcpy(target, "Thisisatest.");
778
779 myCollator = ucol_open("en_US", &status);
780 if (U_FAILURE(status)){
781 bufferLen = uloc_getDisplayName("en_US", 0, dispName, 100, &status);
782 /*Report the error with display name... */
783 log_err("ERROR: Failed to create the collator for : \"%s\"\n", dispName);
784 return;
785 }
786 result = ucol_strcoll(myCollator, source, -1, target, -1);
787 /* result is 1, secondary differences only for ignorable space characters*/
788 if (result != 1)
789 {
790 log_err("Comparing two strings with only secondary differences in C failed.\n");
791 }
792 /* To compare them with just primary differences */
793 ucol_setStrength(myCollator, UCOL_PRIMARY);
794 result = ucol_strcoll(myCollator, source, -1, target, -1);
795 /* result is 0 */
796 if (result != 0)
797 {
798 log_err("Comparing two strings with no differences in C failed.\n");
799 }
800 /* Now, do the same comparison with keys */
801 sourceKeyOut = ucol_getSortKey(myCollator, source, -1, sourceKeyArray, 100);
802 targetKeyOut = ucol_getSortKey(myCollator, target, -1, targetKeyArray, 100);
803 result = 0;
804 bufferLen = ((targetKeyOut > 100) ? 100 : targetKeyOut);
805 result = memcmp(sourceKeyArray, targetKeyArray, bufferLen);
806 if (result != 0)
807 {
808 log_err("Comparing two strings with sort keys in C failed.\n");
809 }
810 ucol_close(myCollator);
811 }
812
813 static void TestJB1401(void)
814 {
815 UCollator *myCollator = 0;
816 UErrorCode status = U_ZERO_ERROR;
817 static UChar NFD_UnsafeStartChars[] = {
818 0x0f73, /* Tibetan Vowel Sign II */
819 0x0f75, /* Tibetan Vowel Sign UU */
820 0x0f81, /* Tibetan Vowel Sign Reversed II */
821 0
822 };
823 int i;
824
825
826 myCollator = ucol_open("en_US", &status);
827 if (U_FAILURE(status)){
828 int32_t bufferLen = 0;
829 UChar dispName [100];
830 bufferLen = uloc_getDisplayName("en_US", 0, dispName, 100, &status);
831 /*Report the error with display name... */
832 log_err("ERROR: Failed to create the collator for : \"%s\"\n", dispName);
833 return;
834 }
835 ucol_setAttribute(myCollator, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
836 if (U_FAILURE(status)){
837 log_err("ERROR: Failed to set normalization mode ON for collator.\n");
838 return;
839 }
840
841 for (i=0; ; i++) {
842 UChar c;
843 UChar X[4];
844 UChar Y[20];
845 UChar Z[20];
846
847 /* Get the next funny character to be tested, and set up the
848 * three test strings X, Y, Z, consisting of an A-grave + test char,
849 * in original form, NFD, and then NFC form.
850 */
851 c = NFD_UnsafeStartChars[i];
852 if (c==0) {break;}
853
854 X[0]=0xC0; X[1]=c; X[2]=0; /* \u00C0 is A Grave*/
855
856 unorm_normalize(X, -1, UNORM_NFD, 0, Y, 20, &status);
857 unorm_normalize(Y, -1, UNORM_NFC, 0, Z, 20, &status);
858 if (U_FAILURE(status)){
859 log_err("ERROR: Failed to normalize test of character %x\n", c);
860 return;
861 }
862
863 /* Collation test. All three strings should be equal.
864 * doTest does both strcoll and sort keys, with params in both orders.
865 */
866 doTest(myCollator, X, Y, UCOL_EQUAL);
867 doTest(myCollator, X, Z, UCOL_EQUAL);
868 doTest(myCollator, Y, Z, UCOL_EQUAL);
869
870 /* Run collation element iterators over the three strings. Results should be same for each.
871 */
872 {
873 UCollationElements *ceiX, *ceiY, *ceiZ;
874 int32_t ceX, ceY, ceZ;
875 int j;
876
877 ceiX = ucol_openElements(myCollator, X, -1, &status);
878 ceiY = ucol_openElements(myCollator, Y, -1, &status);
879 ceiZ = ucol_openElements(myCollator, Z, -1, &status);
880 if (U_FAILURE(status)) {
881 log_err("ERROR: uucol_openElements failed.\n");
882 return;
883 }
884
885 for (j=0;; j++) {
886 ceX = ucol_next(ceiX, &status);
887 ceY = ucol_next(ceiY, &status);
888 ceZ = ucol_next(ceiZ, &status);
889 if (U_FAILURE(status)) {
890 log_err("ERROR: ucol_next failed for iteration #%d.\n", j);
891 break;
892 }
893 if (ceX != ceY || ceY != ceZ) {
894 log_err("ERROR: ucol_next failed for iteration #%d.\n", j);
895 break;
896 }
897 if (ceX == UCOL_NULLORDER) {
898 break;
899 }
900 }
901 ucol_closeElements(ceiX);
902 ucol_closeElements(ceiY);
903 ucol_closeElements(ceiZ);
904 }
905 }
906 ucol_close(myCollator);
907 }
908
909
910
911 /**
912 * Tests the [variable top] tag in rule syntax. Since the default [alternate]
913 * tag has the value shifted, any codepoints before [variable top] should give
914 * a primary ce of 0.
915 */
916 static void TestVariableTop(void)
917 {
918 const char *str = "&z = [variable top]";
919 int len = strlen(str);
920 UChar *rules;
921 UCollator *myCollation;
922 UCollator *enCollation;
923 UErrorCode status = U_ZERO_ERROR;
924 UChar source[1];
925 UChar ch;
926 uint8_t result[20];
927 uint8_t expected[20];
928
929 rules = (UChar*)malloc(sizeof(UChar*) * (len + 1));
930 u_uastrcpy(rules, str);
931
932 enCollation = ucol_open("en_US", &status);
933 myCollation = ucol_openRules(rules, len, UCOL_OFF,
934 UCOL_PRIMARY,NULL, &status);
935 if (U_FAILURE(status)) {
936 log_err("ERROR: in creation of rule based collator :%s\n",
937 myErrorName(status));
938 return;
939 }
940
941 ucol_setStrength(enCollation, UCOL_PRIMARY);
942 ucol_setAttribute(enCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED,
943 &status);
944 ucol_setAttribute(myCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED,
945 &status);
946
947 if (ucol_getAttribute(myCollation, UCOL_ALTERNATE_HANDLING, &status) !=
948 UCOL_SHIFTED || U_FAILURE(status)) {
949 log_err("ERROR: ALTERNATE_HANDLING value can not be set to SHIFTED\n");
950 }
951
952 uprv_memset(expected, 0, 20);
953
954 /* space is supposed to be a variable */
955 source[0] = ' ';
956 len = ucol_getSortKey(enCollation, source, 1, result,
957 sizeof(result));
958
959 if (uprv_memcmp(expected, result, len) != 0) {
960 log_err("ERROR: SHIFTED alternate does not return 0 for primary of space\n");
961 }
962
963 ch = 'a';
964 while (ch < 'z') {
965 source[0] = ch;
966 len = ucol_getSortKey(myCollation, source, 1, result,
967 sizeof(result));
968 if (uprv_memcmp(expected, result, len) != 0) {
969 log_err("ERROR: SHIFTED alternate does not return 0 for primary of %c\n",
970 ch);
971 }
972 ch ++;
973 }
974
975 free(rules);
976 ucol_close(enCollation);
977 ucol_close(myCollation);
978 enCollation = NULL;
979 myCollation = NULL;
980 }
981
982 /**
983 * Tests surrogate support.
984 * NOTE: This test used \\uD801\\uDC01 pair, which is now assigned to Desseret
985 * Therefore, another (unassigned) code point was used for this test.
986 */
987 static void TestSurrogates(void)
988 {
989 const char *str =
990 "&z<'\\uD800\\uDC00'<'\\uD800\\uDC0A\\u0308'<A";
991 int len = strlen(str);
992 int rlen = 0;
993 UChar *rules;
994 UCollator *myCollation;
995 UCollator *enCollation;
996 UErrorCode status = U_ZERO_ERROR;
997 UChar source[][4] =
998 {{'z', 0, 0}, {0xD800, 0xDC00, 0}, {0xD800, 0xDC0A, 0x0308, 0}, {0xD800, 0xDC02}};
999 UChar target[][4] =
1000 {{0xD800, 0xDC00, 0}, {0xD800, 0xDC0A, 0x0308, 0}, {'A', 0, 0}, {0xD800, 0xDC03}};
1001 int count = 0;
1002 uint8_t enresult[20], myresult[20];
1003 int enlen, mylen;
1004
1005 /* tests for open rules with surrogate rules */
1006 rules = (UChar*)malloc(sizeof(UChar*) * (len + 1));
1007 rlen = u_unescape(str, rules, len);
1008
1009 enCollation = ucol_open("en_US", &status);
1010 myCollation = ucol_openRules(rules, rlen, UCOL_OFF,
1011 UCOL_TERTIARY,NULL, &status);
1012 if (U_FAILURE(status)) {
1013 log_err("ERROR: in creation of rule based collator :%s\n",
1014 myErrorName(status));
1015 return;
1016 }
1017
1018 /*
1019 this test is to verify the supplementary sort key order in the english
1020 collator
1021 */
1022 log_verbose("start of english collation supplementary characters test\n");
1023 while (count < 2) {
1024 doTest(enCollation, source[count], target[count], UCOL_LESS);
1025 count ++;
1026 }
1027 doTest(enCollation, source[count], target[count], UCOL_GREATER);
1028
1029 log_verbose("start of tailored collation supplementary characters test\n");
1030 count = 0;
1031 /* tests getting collation elements for surrogates for tailored rules */
1032 while (count < 4) {
1033 doTest(myCollation, source[count], target[count], UCOL_LESS);
1034 count ++;
1035 }
1036
1037 /* tests that \uD800\uDC02 still has the same value, not changed */
1038 enlen = ucol_getSortKey(enCollation, source[3], 2, enresult, 20);
1039 mylen = ucol_getSortKey(myCollation, source[3], 2, myresult, 20);
1040 if (enlen != mylen ||
1041 uprv_memcmp(enresult, myresult, enlen) != 0) {
1042 log_verbose("Failed : non-tailored supplementary characters should have the same value\n");
1043 }
1044
1045 free(rules);
1046 ucol_close(enCollation);
1047 ucol_close(myCollation);
1048 enCollation = NULL;
1049 myCollation = NULL;
1050 }
1051
1052 /*
1053 *### TODO: Add more invalid rules to test all different scenarios.
1054 *
1055 */
1056 static void
1057 TestInvalidRules(){
1058 #define MAX_ERROR_STATES 2
1059
1060 static const char* rulesArr[MAX_ERROR_STATES] = {
1061 "& C < ch, cH, Ch[this should fail]<d",
1062 "& C < ch, cH, & Ch[variable top]"
1063 };
1064 static const char* preContextArr[MAX_ERROR_STATES] = {
1065 "his should fail",
1066 "& C < ch, cH, ",
1067
1068 };
1069 static const char* postContextArr[MAX_ERROR_STATES] = {
1070 "<d",
1071 " Ch[variable t"
1072 };
1073 int i;
1074
1075 for(i = 0;i<MAX_ERROR_STATES;i++){
1076 UChar rules[1000] = { '\0' };
1077 UChar preContextExp[1000] = { '\0' };
1078 UChar postContextExp[1000] = { '\0' };
1079 UParseError parseError;
1080 UErrorCode status = U_ZERO_ERROR;
1081 UCollator* coll=0;
1082 u_charsToUChars(rulesArr[i],rules,uprv_strlen(rulesArr[i])+1);
1083 u_charsToUChars(preContextArr[i],preContextExp,uprv_strlen(preContextArr[i])+1);
1084 u_charsToUChars(postContextArr[i],postContextExp,uprv_strlen(postContextArr[i])+1);
1085 /* clean up stuff in parseError */
1086 u_memset(parseError.preContext,0x0000,U_PARSE_CONTEXT_LEN);
1087 u_memset(parseError.postContext,0x0000,U_PARSE_CONTEXT_LEN);
1088 /* open the rules and test */
1089 coll = ucol_openRules(rules,u_strlen(rules),UCOL_OFF,UCOL_DEFAULT_STRENGTH,&parseError,&status);
1090 if(u_strcmp(parseError.preContext,preContextExp)!=0){
1091 log_err("preContext in UParseError for ucol_openRules does not match\n");
1092 }
1093 if(u_strcmp(parseError.postContext,postContextExp)!=0){
1094 log_err("postContext in UParseError for ucol_openRules does not match\n");
1095 }
1096 }
1097 }
1098
1099 static void
1100 TestJitterbug1098(){
1101 UChar rule[1000];
1102 UCollator* c1 = NULL;
1103 UErrorCode status = U_ZERO_ERROR;
1104 UParseError parseError;
1105 char preContext[200]={0};
1106 char postContext[200]={0};
1107 int i=0;
1108 const char* rules[] = {
1109 "&''<\\\\",
1110 "&\\'<\\\\",
1111 "&\\\"<'\\'",
1112 "&'\"'<\\'",
1113 '\0'
1114
1115 };
1116 const UCollationResult results1098[] = {
1117 UCOL_LESS,
1118 UCOL_LESS,
1119 UCOL_LESS,
1120 UCOL_LESS,
1121 };
1122 const UChar input[][2]= {
1123 {0x0027,0x005c},
1124 {0x0027,0x005c},
1125 {0x0022,0x005c},
1126 {0x0022,0x0027},
1127 };
1128 UChar X[2] ={0};
1129 UChar Y[2] ={0};
1130 u_memset(parseError.preContext,0x0000,U_PARSE_CONTEXT_LEN);
1131 u_memset(parseError.postContext,0x0000,U_PARSE_CONTEXT_LEN);
1132 for(;rules[i]!=0;i++){
1133 u_uastrcpy(rule, rules[i]);
1134 c1 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, &parseError, &status);
1135 if(U_FAILURE(status)){
1136 log_err("Could not parse the rules syntax. Error: %s ", u_errorName(status));
1137
1138 if (status == U_PARSE_ERROR) {
1139 u_UCharsToChars(parseError.preContext,preContext,20);
1140 u_UCharsToChars(parseError.postContext,postContext,20);
1141 log_verbose("\n\tPre-Context: %s \n\tPost-Context:%s \n",preContext,postContext);
1142 }
1143
1144 return;
1145 }
1146 X[0] = input[i][0];
1147 Y[0] = input[i][1];
1148 doTest(c1,X,Y,results1098[i]);
1149 ucol_close(c1);
1150 }
1151 }
1152
1153 /* These tests do cleanup and reinitialize ICU in the course of their operation.
1154 * The ICU data directory must be preserved across these operations.
1155 * Here is a helper function to assist with that.
1156 */
1157 static char *safeGetICUDataDirectory() {
1158 const char *dataDir = u_getDataDirectory(); /* Returned string vanashes with u_cleanup */
1159 char *retStr = NULL;
1160 if (dataDir != NULL) {
1161 retStr = (char *)malloc(strlen(dataDir)+1);
1162 strcpy(retStr, dataDir);
1163 }
1164 return retStr;
1165 }
1166
1167
1168 static void
1169 TestFCDCrash(void) {
1170 static const char *test[] = {
1171 "Gr\\u00F6\\u00DFe",
1172 "Grossist"
1173 };
1174
1175 char *icuDataDir = safeGetICUDataDirectory();
1176 UErrorCode status = U_ZERO_ERROR;
1177 UCollator *coll = ucol_open("es", &status);
1178 if(U_FAILURE(status)) {
1179 log_err("Couldn't open collator\n");
1180 return;
1181 }
1182 ucol_close(coll);
1183 coll = NULL;
1184 u_cleanup();
1185 u_setDataDirectory(icuDataDir);
1186 coll = ucol_open("de_DE", &status);
1187 if(U_FAILURE(status)) {
1188 log_err("Couldn't open collator\n");
1189 return;
1190 }
1191 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
1192 genericOrderingTest(coll, test, 2);
1193 ucol_close(coll);
1194 free(icuDataDir);
1195
1196 }
1197
1198 #endif /* #if !UCONFIG_NO_COLLATION */