]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/cintltst/cmsccoll.c
ICU-8.11.tar.gz
[apple/icu.git] / icuSources / test / cintltst / cmsccoll.c
1 /********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 2001-2006, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /*******************************************************************************
7 *
8 * File cmsccoll.C
9 *
10 *******************************************************************************/
11 /**
12 * These are the tests specific to ICU 1.8 and above, that I didn't know where
13 * to fit.
14 */
15
16 #include <stdio.h>
17
18 #include "unicode/utypes.h"
19
20 #if !UCONFIG_NO_COLLATION
21
22 #include "unicode/ucol.h"
23 #include "unicode/ucoleitr.h"
24 #include "unicode/uloc.h"
25 #include "cintltst.h"
26 #include "ccolltst.h"
27 #include "callcoll.h"
28 #include "unicode/ustring.h"
29 #include "string.h"
30 #include "ucol_imp.h"
31 #include "ucol_tok.h"
32 #include "cmemory.h"
33 #include "cstring.h"
34 #include "uassert.h"
35 #include "unicode/parseerr.h"
36 #include "unicode/ucnv.h"
37 #include "uparse.h"
38
39 #define LEN(a) (sizeof(a)/sizeof(a[0]))
40
41 #define MAX_TOKEN_LEN 16
42
43 typedef int tst_strcoll(void *collator, const int object,
44 const UChar *source, const int sLen,
45 const UChar *target, const int tLen);
46
47
48
49 const static char cnt1[][10] = {
50
51 "AA",
52 "AC",
53 "AZ",
54 "AQ",
55 "AB",
56 "ABZ",
57 "ABQ",
58 "Z",
59 "ABC",
60 "Q",
61 "B"
62 };
63
64 const static char cnt2[][10] = {
65 "DA",
66 "DAD",
67 "DAZ",
68 "MAR",
69 "Z",
70 "DAVIS",
71 "MARK",
72 "DAV",
73 "DAVI"
74 };
75
76 static void IncompleteCntTest(void)
77 {
78 UErrorCode status = U_ZERO_ERROR;
79 UChar temp[90];
80 UChar t1[90];
81 UChar t2[90];
82
83 UCollator *coll = NULL;
84 uint32_t i = 0, j = 0;
85 uint32_t size = 0;
86
87 u_uastrcpy(temp, " & Z < ABC < Q < B");
88
89 coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
90
91 if(U_SUCCESS(status)) {
92 size = sizeof(cnt1)/sizeof(cnt1[0]);
93 for(i = 0; i < size-1; i++) {
94 for(j = i+1; j < size; j++) {
95 UCollationElements *iter;
96 u_uastrcpy(t1, cnt1[i]);
97 u_uastrcpy(t2, cnt1[j]);
98 doTest(coll, t1, t2, UCOL_LESS);
99 /* synwee : added collation element iterator test */
100 iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
101 if (U_FAILURE(status)) {
102 log_err("Creation of iterator failed\n");
103 break;
104 }
105 backAndForth(iter);
106 ucol_closeElements(iter);
107 }
108 }
109 }
110
111 ucol_close(coll);
112
113
114 u_uastrcpy(temp, " & Z < DAVIS < MARK <DAV");
115 coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
116
117 if(U_SUCCESS(status)) {
118 size = sizeof(cnt2)/sizeof(cnt2[0]);
119 for(i = 0; i < size-1; i++) {
120 for(j = i+1; j < size; j++) {
121 UCollationElements *iter;
122 u_uastrcpy(t1, cnt2[i]);
123 u_uastrcpy(t2, cnt2[j]);
124 doTest(coll, t1, t2, UCOL_LESS);
125
126 /* synwee : added collation element iterator test */
127 iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
128 if (U_FAILURE(status)) {
129 log_err("Creation of iterator failed\n");
130 break;
131 }
132 backAndForth(iter);
133 ucol_closeElements(iter);
134 }
135 }
136 }
137
138 ucol_close(coll);
139
140
141 }
142
143 const static char shifted[][20] = {
144 "black bird",
145 "black-bird",
146 "blackbird",
147 "black Bird",
148 "black-Bird",
149 "blackBird",
150 "black birds",
151 "black-birds",
152 "blackbirds"
153 };
154
155 const static UCollationResult shiftedTert[] = {
156 0,
157 UCOL_EQUAL,
158 UCOL_EQUAL,
159 UCOL_LESS,
160 UCOL_EQUAL,
161 UCOL_EQUAL,
162 UCOL_LESS,
163 UCOL_EQUAL,
164 UCOL_EQUAL
165 };
166
167 const static char nonignorable[][20] = {
168 "black bird",
169 "black Bird",
170 "black birds",
171 "black-bird",
172 "black-Bird",
173 "black-birds",
174 "blackbird",
175 "blackBird",
176 "blackbirds"
177 };
178
179 static void BlackBirdTest(void) {
180 UErrorCode status = U_ZERO_ERROR;
181 UChar t1[90];
182 UChar t2[90];
183
184 uint32_t i = 0, j = 0;
185 uint32_t size = 0;
186 UCollator *coll = ucol_open("en_US", &status);
187
188 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
189 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status);
190
191 if(U_SUCCESS(status)) {
192 size = sizeof(nonignorable)/sizeof(nonignorable[0]);
193 for(i = 0; i < size-1; i++) {
194 for(j = i+1; j < size; j++) {
195 u_uastrcpy(t1, nonignorable[i]);
196 u_uastrcpy(t2, nonignorable[j]);
197 doTest(coll, t1, t2, UCOL_LESS);
198 }
199 }
200 }
201
202 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
203 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
204
205 if(U_SUCCESS(status)) {
206 size = sizeof(shifted)/sizeof(shifted[0]);
207 for(i = 0; i < size-1; i++) {
208 for(j = i+1; j < size; j++) {
209 u_uastrcpy(t1, shifted[i]);
210 u_uastrcpy(t2, shifted[j]);
211 doTest(coll, t1, t2, UCOL_LESS);
212 }
213 }
214 }
215
216 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status);
217 if(U_SUCCESS(status)) {
218 size = sizeof(shifted)/sizeof(shifted[0]);
219 for(i = 1; i < size; i++) {
220 u_uastrcpy(t1, shifted[i-1]);
221 u_uastrcpy(t2, shifted[i]);
222 doTest(coll, t1, t2, shiftedTert[i]);
223 }
224 }
225
226 ucol_close(coll);
227 }
228
229 const static UChar testSourceCases[][MAX_TOKEN_LEN] = {
230 {0x0041/*'A'*/, 0x0300, 0x0301, 0x0000},
231 {0x0041/*'A'*/, 0x0300, 0x0316, 0x0000},
232 {0x0041/*'A'*/, 0x0300, 0x0000},
233 {0x00C0, 0x0301, 0x0000},
234 /* this would work with forced normalization */
235 {0x00C0, 0x0316, 0x0000}
236 };
237
238 const static UChar testTargetCases[][MAX_TOKEN_LEN] = {
239 {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
240 {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000},
241 {0x00C0, 0},
242 {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
243 /* this would work with forced normalization */
244 {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000}
245 };
246
247 const static UCollationResult results[] = {
248 UCOL_GREATER,
249 UCOL_EQUAL,
250 UCOL_EQUAL,
251 UCOL_GREATER,
252 UCOL_EQUAL
253 };
254
255 static void FunkyATest(void)
256 {
257
258 int32_t i;
259 UErrorCode status = U_ZERO_ERROR;
260 UCollator *myCollation;
261 myCollation = ucol_open("en_US", &status);
262 if(U_FAILURE(status)){
263 log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
264 return;
265 }
266 log_verbose("Testing some A letters, for some reason\n");
267 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
268 ucol_setStrength(myCollation, UCOL_TERTIARY);
269 for (i = 0; i < 4 ; i++)
270 {
271 doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
272 }
273 ucol_close(myCollation);
274 }
275
276 UColAttributeValue caseFirst[] = {
277 UCOL_OFF,
278 UCOL_LOWER_FIRST,
279 UCOL_UPPER_FIRST
280 };
281
282
283 UColAttributeValue alternateHandling[] = {
284 UCOL_NON_IGNORABLE,
285 UCOL_SHIFTED
286 };
287
288 UColAttributeValue caseLevel[] = {
289 UCOL_OFF,
290 UCOL_ON
291 };
292
293 UColAttributeValue strengths[] = {
294 UCOL_PRIMARY,
295 UCOL_SECONDARY,
296 UCOL_TERTIARY,
297 UCOL_QUATERNARY,
298 UCOL_IDENTICAL
299 };
300
301 #if 0
302 static const char * strengthsC[] = {
303 "UCOL_PRIMARY",
304 "UCOL_SECONDARY",
305 "UCOL_TERTIARY",
306 "UCOL_QUATERNARY",
307 "UCOL_IDENTICAL"
308 };
309
310 static const char * caseFirstC[] = {
311 "UCOL_OFF",
312 "UCOL_LOWER_FIRST",
313 "UCOL_UPPER_FIRST"
314 };
315
316
317 static const char * alternateHandlingC[] = {
318 "UCOL_NON_IGNORABLE",
319 "UCOL_SHIFTED"
320 };
321
322 static const char * caseLevelC[] = {
323 "UCOL_OFF",
324 "UCOL_ON"
325 };
326
327 /* not used currently - does not test only prints */
328 static void PrintMarkDavis(void)
329 {
330 UErrorCode status = U_ZERO_ERROR;
331 UChar m[256];
332 uint8_t sortkey[256];
333 UCollator *coll = ucol_open("en_US", &status);
334 uint32_t h,i,j,k, sortkeysize;
335 uint32_t sizem = 0;
336 char buffer[512];
337 uint32_t len = 512;
338
339 log_verbose("PrintMarkDavis");
340
341 u_uastrcpy(m, "Mark Davis");
342 sizem = u_strlen(m);
343
344
345 m[1] = 0xe4;
346
347 for(i = 0; i<sizem; i++) {
348 fprintf(stderr, "\\u%04X ", m[i]);
349 }
350 fprintf(stderr, "\n");
351
352 for(h = 0; h<sizeof(caseFirst)/sizeof(caseFirst[0]); h++) {
353 ucol_setAttribute(coll, UCOL_CASE_FIRST, caseFirst[i], &status);
354 fprintf(stderr, "caseFirst: %s\n", caseFirstC[h]);
355
356 for(i = 0; i<sizeof(alternateHandling)/sizeof(alternateHandling[0]); i++) {
357 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, alternateHandling[i], &status);
358 fprintf(stderr, " AltHandling: %s\n", alternateHandlingC[i]);
359
360 for(j = 0; j<sizeof(caseLevel)/sizeof(caseLevel[0]); j++) {
361 ucol_setAttribute(coll, UCOL_CASE_LEVEL, caseLevel[j], &status);
362 fprintf(stderr, " caseLevel: %s\n", caseLevelC[j]);
363
364 for(k = 0; k<sizeof(strengths)/sizeof(strengths[0]); k++) {
365 ucol_setAttribute(coll, UCOL_STRENGTH, strengths[k], &status);
366 sortkeysize = ucol_getSortKey(coll, m, sizem, sortkey, 256);
367 fprintf(stderr, " strength: %s\n Sortkey: ", strengthsC[k]);
368 fprintf(stderr, "%s\n", ucol_sortKeyToString(coll, sortkey, buffer, &len));
369 }
370
371 }
372
373 }
374
375 }
376 }
377 #endif
378
379 static void BillFairmanTest(void) {
380 /*
381 ** check for actual locale via ICU resource bundles
382 **
383 ** lp points to the original locale ("fr_FR_....")
384 */
385
386 UResourceBundle *lr,*cr;
387 UErrorCode lec = U_ZERO_ERROR;
388 const char *lp = "fr_FR_you_ll_never_find_this_locale";
389
390 log_verbose("BillFairmanTest\n");
391
392 lr = ures_open(NULL,lp,&lec);
393 if (lr) {
394 cr = ures_getByKey(lr,"collations",0,&lec);
395 if (cr) {
396 lp = ures_getLocale(cr,&lec);
397 if (lp) {
398 if (U_SUCCESS(lec)) {
399 if(strcmp(lp, "fr") != 0) {
400 log_err("Wrong locale for French Collation Data, expected \"fr\" got %s", lp);
401 }
402 }
403 }
404 ures_close(cr);
405 }
406 ures_close(lr);
407 }
408 }
409
410 static void testPrimary(UCollator* col, const UChar* p,const UChar* q){
411 UChar source[256] = { '\0'};
412 UChar target[256] = { '\0'};
413 UChar preP = 0x31a3;
414 UChar preQ = 0x310d;
415 /*
416 UChar preP = (*p>0x0400 && *p<0x0500)?0x00e1:0x491;
417 UChar preQ = (*p>0x0400 && *p<0x0500)?0x0041:0x413;
418 */
419 /*log_verbose("Testing primary\n");*/
420
421 doTest(col, p, q, UCOL_LESS);
422 /*
423 UCollationResult result = ucol_strcoll(col,p,u_strlen(p),q,u_strlen(q));
424
425 if(result!=UCOL_LESS){
426 aescstrdup(p,utfSource,256);
427 aescstrdup(q,utfTarget,256);
428 fprintf(file,"Primary failed source: %s target: %s \n", utfSource,utfTarget);
429 }
430 */
431 source[0] = preP;
432 u_strcpy(source+1,p);
433 target[0] = preQ;
434 u_strcpy(target+1,q);
435 doTest(col, source, target, UCOL_LESS);
436 /*
437 fprintf(file,"Primary swamps 2nd failed source: %s target: %s \n", utfSource,utfTarget);
438 */
439 }
440
441 static void testSecondary(UCollator* col, const UChar* p,const UChar* q){
442 UChar source[256] = { '\0'};
443 UChar target[256] = { '\0'};
444
445 /*log_verbose("Testing secondary\n");*/
446
447 doTest(col, p, q, UCOL_LESS);
448 /*
449 fprintf(file,"secondary failed source: %s target: %s \n", utfSource,utfTarget);
450 */
451 source[0] = 0x0053;
452 u_strcpy(source+1,p);
453 target[0]= 0x0073;
454 u_strcpy(target+1,q);
455
456 doTest(col, source, target, UCOL_LESS);
457 /*
458 fprintf(file,"secondary swamps 3rd failed source: %s target: %s \n",utfSource,utfTarget);
459 */
460
461
462 u_strcpy(source,p);
463 source[u_strlen(p)] = 0x62;
464 source[u_strlen(p)+1] = 0;
465
466
467 u_strcpy(target,q);
468 target[u_strlen(q)] = 0x61;
469 target[u_strlen(q)+1] = 0;
470
471 doTest(col, source, target, UCOL_GREATER);
472
473 /*
474 fprintf(file,"secondary is swamped by 1 failed source: %s target: %s \n",utfSource,utfTarget);
475 */
476 }
477
478 static void testTertiary(UCollator* col, const UChar* p,const UChar* q){
479 UChar source[256] = { '\0'};
480 UChar target[256] = { '\0'};
481
482 /*log_verbose("Testing tertiary\n");*/
483
484 doTest(col, p, q, UCOL_LESS);
485 /*
486 fprintf(file,"Tertiary failed source: %s target: %s \n",utfSource,utfTarget);
487 */
488 source[0] = 0x0020;
489 u_strcpy(source+1,p);
490 target[0]= 0x002D;
491 u_strcpy(target+1,q);
492
493 doTest(col, source, target, UCOL_LESS);
494 /*
495 fprintf(file,"Tertiary swamps 4th failed source: %s target: %s \n", utfSource,utfTarget);
496 */
497
498 u_strcpy(source,p);
499 source[u_strlen(p)] = 0xE0;
500 source[u_strlen(p)+1] = 0;
501
502 u_strcpy(target,q);
503 target[u_strlen(q)] = 0x61;
504 target[u_strlen(q)+1] = 0;
505
506 doTest(col, source, target, UCOL_GREATER);
507
508 /*
509 fprintf(file,"Tertiary is swamped by 3rd failed source: %s target: %s \n",utfSource,utfTarget);
510 */
511 }
512
513 static void testEquality(UCollator* col, const UChar* p,const UChar* q){
514 /*
515 UChar source[256] = { '\0'};
516 UChar target[256] = { '\0'};
517 */
518
519 doTest(col, p, q, UCOL_EQUAL);
520 /*
521 fprintf(file,"Primary failed source: %s target: %s \n", utfSource,utfTarget);
522 */
523 }
524
525 static void testCollator(UCollator *coll, UErrorCode *status) {
526 const UChar *rules = NULL, *current = NULL;
527 int32_t ruleLen = 0;
528 uint32_t strength = 0;
529 uint32_t chOffset = 0; uint32_t chLen = 0;
530 uint32_t exOffset = 0; uint32_t exLen = 0;
531 uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
532 uint32_t firstEx = 0;
533 /* uint32_t rExpsLen = 0; */
534 uint32_t firstLen = 0;
535 UBool varT = FALSE; UBool top_ = TRUE;
536 uint16_t specs = 0;
537 UBool startOfRules = TRUE;
538 UBool lastReset = FALSE;
539 UBool before = FALSE;
540 uint32_t beforeStrength = 0;
541 UColTokenParser src;
542 UColOptionSet opts;
543
544 UChar first[256];
545 UChar second[256];
546 UChar tempB[256];
547 uint32_t tempLen;
548 UChar *rulesCopy = NULL;
549 UParseError parseError;
550 src.opts = &opts;
551
552 rules = ucol_getRules(coll, &ruleLen);
553 if(U_SUCCESS(*status) && ruleLen > 0) {
554 rulesCopy = (UChar *)malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
555 uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
556 src.current = src.source = rulesCopy;
557 src.end = rulesCopy+ruleLen;
558 src.extraCurrent = src.end;
559 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
560 *first = *second = 0;
561
562 while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, status)) != NULL) {
563 strength = src.parsedToken.strength;
564 chOffset = src.parsedToken.charsOffset;
565 chLen = src.parsedToken.charsLen;
566 exOffset = src.parsedToken.extensionOffset;
567 exLen = src.parsedToken.extensionLen;
568 prefixOffset = src.parsedToken.prefixOffset;
569 prefixLen = src.parsedToken.prefixLen;
570 specs = src.parsedToken.flags;
571
572 startOfRules = FALSE;
573 varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
574 top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
575 if(top_) { /* if reset is on top, the sequence is broken. We should have an empty string */
576 second[0] = 0;
577 } else {
578 u_strncpy(second,rulesCopy+chOffset, chLen);
579 second[chLen] = 0;
580
581 if(exLen > 0 && firstEx == 0) {
582 u_strncat(first, rulesCopy+exOffset, exLen);
583 first[firstLen+exLen] = 0;
584 }
585
586 if(lastReset == TRUE && prefixLen != 0) {
587 u_strncpy(first+prefixLen, first, firstLen);
588 u_strncpy(first, rulesCopy+prefixOffset, prefixLen);
589 first[firstLen+prefixLen] = 0;
590 firstLen = firstLen+prefixLen;
591 }
592
593 if(before == TRUE) { /* swap first and second */
594 u_strcpy(tempB, first);
595 u_strcpy(first, second);
596 u_strcpy(second, tempB);
597
598 tempLen = firstLen;
599 firstLen = chLen;
600 chLen = tempLen;
601
602 tempLen = firstEx;
603 firstEx = exLen;
604 exLen = tempLen;
605 if(beforeStrength < strength) {
606 strength = beforeStrength;
607 }
608 }
609 }
610 lastReset = FALSE;
611
612 switch(strength){
613 case UCOL_IDENTICAL:
614 testEquality(coll,first,second);
615 break;
616 case UCOL_PRIMARY:
617 testPrimary(coll,first,second);
618 break;
619 case UCOL_SECONDARY:
620 testSecondary(coll,first,second);
621 break;
622 case UCOL_TERTIARY:
623 testTertiary(coll,first,second);
624 break;
625 case UCOL_TOK_RESET:
626 lastReset = TRUE;
627 before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);
628 if(before) {
629 beforeStrength = (specs & UCOL_TOK_BEFORE)-1;
630 }
631 break;
632 default:
633 break;
634 }
635
636 if(before == TRUE && strength != UCOL_TOK_RESET) { /* first and second were swapped */
637 before = FALSE;
638 } else {
639 firstLen = chLen;
640 firstEx = exLen;
641 u_strcpy(first, second);
642 }
643 }
644 free(rulesCopy);
645 }
646 }
647
648 static int ucaTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) {
649 UCollator *UCA = (UCollator *)collator;
650 return ucol_strcoll(UCA, source, sLen, target, tLen);
651 }
652
653 /*
654 static int winTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) {
655 #ifdef U_WINDOWS
656 LCID lcid = (LCID)collator;
657 return CompareString(lcid, 0, source, sLen, target, tLen);
658 #else
659 return 0;
660 #endif
661 }
662 */
663
664 static UCollationResult swampEarlier(tst_strcoll* func, void *collator, int opts,
665 UChar s1, UChar s2,
666 const UChar *s, const uint32_t sLen,
667 const UChar *t, const uint32_t tLen) {
668 UChar source[256] = {0};
669 UChar target[256] = {0};
670
671 source[0] = s1;
672 u_strcpy(source+1, s);
673 target[0] = s2;
674 u_strcpy(target+1, t);
675
676 return func(collator, opts, source, sLen+1, target, tLen+1);
677 }
678
679 static UCollationResult swampLater(tst_strcoll* func, void *collator, int opts,
680 UChar s1, UChar s2,
681 const UChar *s, const uint32_t sLen,
682 const UChar *t, const uint32_t tLen) {
683 UChar source[256] = {0};
684 UChar target[256] = {0};
685
686 u_strcpy(source, s);
687 source[sLen] = s1;
688 u_strcpy(target, t);
689 target[tLen] = s2;
690
691 return func(collator, opts, source, sLen+1, target, tLen+1);
692 }
693
694 static uint32_t probeStrength(tst_strcoll* func, void *collator, int opts,
695 const UChar *s, const uint32_t sLen,
696 const UChar *t, const uint32_t tLen,
697 UCollationResult result) {
698 /*UChar fPrimary = 0x6d;*/
699 /*UChar sPrimary = 0x6e;*/
700 UChar fSecondary = 0x310d;
701 UChar sSecondary = 0x31a3;
702 UChar fTertiary = 0x310f;
703 UChar sTertiary = 0x31b7;
704
705 UCollationResult oposite;
706 if(result == UCOL_EQUAL) {
707 return UCOL_IDENTICAL;
708 } else if(result == UCOL_GREATER) {
709 oposite = UCOL_LESS;
710 } else {
711 oposite = UCOL_GREATER;
712 }
713
714 if(swampEarlier(func, collator, opts, sSecondary, fSecondary, s, sLen, t, tLen) == result) {
715 return UCOL_PRIMARY;
716 } else if((swampEarlier(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == result) &&
717 (swampEarlier(func, collator, opts, 0x310f, sTertiary, s, sLen, t, tLen) == result)) {
718 return UCOL_SECONDARY;
719 } else if((swampLater(func, collator, opts, sTertiary, fTertiary, s, sLen, t, tLen) == result) &&
720 (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == result)) {
721 return UCOL_TERTIARY;
722 } else if((swampLater(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == oposite) &&
723 (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == oposite)) {
724 return UCOL_QUATERNARY;
725 } else {
726 return UCOL_IDENTICAL;
727 }
728 }
729
730 static char *getRelationSymbol(UCollationResult res, uint32_t strength, char *buffer) {
731 uint32_t i = 0;
732
733 if(res == UCOL_EQUAL || strength == 0xdeadbeef) {
734 buffer[0] = '=';
735 buffer[1] = '=';
736 buffer[2] = '\0';
737 } else if(res == UCOL_GREATER) {
738 for(i = 0; i<strength+1; i++) {
739 buffer[i] = '>';
740 }
741 buffer[strength+1] = '\0';
742 } else {
743 for(i = 0; i<strength+1; i++) {
744 buffer[i] = '<';
745 }
746 buffer[strength+1] = '\0';
747 }
748
749 return buffer;
750 }
751
752
753
754 static void logFailure (const char *platform, const char *test,
755 const UChar *source, const uint32_t sLen,
756 const UChar *target, const uint32_t tLen,
757 UCollationResult realRes, uint32_t realStrength,
758 UCollationResult expRes, uint32_t expStrength, UBool error) {
759
760 uint32_t i = 0;
761
762 char sEsc[256], s[256], tEsc[256], t[256], b[256], output[512], relation[256];
763 static int32_t maxOutputLength = 0;
764 int32_t outputLength;
765
766 *sEsc = *tEsc = *s = *t = 0;
767 if(error == TRUE) {
768 log_err("Difference between expected and generated order. Run test with -v for more info\n");
769 } else if(VERBOSITY == 0) {
770 return;
771 }
772 for(i = 0; i<sLen; i++) {
773 sprintf(b, "%04X", source[i]);
774 strcat(sEsc, "\\u");
775 strcat(sEsc, b);
776 strcat(s, b);
777 strcat(s, " ");
778 if(source[i] < 0x80) {
779 sprintf(b, "(%c)", source[i]);
780 strcat(sEsc, b);
781 }
782 }
783 for(i = 0; i<tLen; i++) {
784 sprintf(b, "%04X", target[i]);
785 strcat(tEsc, "\\u");
786 strcat(tEsc, b);
787 strcat(t, b);
788 strcat(t, " ");
789 if(target[i] < 0x80) {
790 sprintf(b, "(%c)", target[i]);
791 strcat(tEsc, b);
792 }
793 }
794 /*
795 strcpy(output, "[[ ");
796 strcat(output, sEsc);
797 strcat(output, getRelationSymbol(expRes, expStrength, relation));
798 strcat(output, tEsc);
799
800 strcat(output, " : ");
801
802 strcat(output, sEsc);
803 strcat(output, getRelationSymbol(realRes, realStrength, relation));
804 strcat(output, tEsc);
805 strcat(output, " ]] ");
806
807 log_verbose("%s", output);
808 */
809
810
811 strcpy(output, "DIFF: ");
812
813 strcat(output, s);
814 strcat(output, " : ");
815 strcat(output, t);
816
817 strcat(output, test);
818 strcat(output, ": ");
819
820 strcat(output, sEsc);
821 strcat(output, getRelationSymbol(expRes, expStrength, relation));
822 strcat(output, tEsc);
823
824 strcat(output, " ");
825
826 strcat(output, platform);
827 strcat(output, ": ");
828
829 strcat(output, sEsc);
830 strcat(output, getRelationSymbol(realRes, realStrength, relation));
831 strcat(output, tEsc);
832
833 outputLength = (int32_t)strlen(output);
834 if(outputLength > maxOutputLength) {
835 maxOutputLength = outputLength;
836 U_ASSERT(outputLength < sizeof(output));
837 }
838
839 log_verbose("%s\n", output);
840
841 }
842
843 /*
844 static void printOutRules(const UChar *rules) {
845 uint32_t len = u_strlen(rules);
846 uint32_t i = 0;
847 char toPrint;
848 uint32_t line = 0;
849
850 fprintf(stdout, "Rules:");
851
852 for(i = 0; i<len; i++) {
853 if(rules[i]<0x7f && rules[i]>=0x20) {
854 toPrint = (char)rules[i];
855 if(toPrint == '&') {
856 line = 1;
857 fprintf(stdout, "\n&");
858 } else if(toPrint == ';') {
859 fprintf(stdout, "<<");
860 line+=2;
861 } else if(toPrint == ',') {
862 fprintf(stdout, "<<<");
863 line+=3;
864 } else {
865 fprintf(stdout, "%c", toPrint);
866 line++;
867 }
868 } else if(rules[i]<0x3400 || rules[i]>=0xa000) {
869 fprintf(stdout, "\\u%04X", rules[i]);
870 line+=6;
871 }
872 if(line>72) {
873 fprintf(stdout, "\n");
874 line = 0;
875 }
876 }
877
878 log_verbose("\n");
879
880 }
881 */
882
883 static uint32_t testSwitch(tst_strcoll* func, void *collator, int opts, uint32_t strength, const UChar *first, const UChar *second, const char* msg, UBool error) {
884 uint32_t diffs = 0;
885 UCollationResult realResult;
886 uint32_t realStrength;
887
888 uint32_t sLen = u_strlen(first);
889 uint32_t tLen = u_strlen(second);
890
891 realResult = func(collator, opts, first, sLen, second, tLen);
892 realStrength = probeStrength(func, collator, opts, first, sLen, second, tLen, realResult);
893
894 if(strength == UCOL_IDENTICAL && realResult != UCOL_IDENTICAL) {
895 logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_EQUAL, strength, error);
896 diffs++;
897 } else if(realResult != UCOL_LESS || realStrength != strength) {
898 logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_LESS, strength, error);
899 diffs++;
900 }
901 return diffs;
902 }
903
904
905 static void testAgainstUCA(UCollator *coll, UCollator *UCA, const char *refName, UBool error, UErrorCode *status) {
906 const UChar *rules = NULL, *current = NULL;
907 int32_t ruleLen = 0;
908 uint32_t strength = 0;
909 uint32_t chOffset = 0; uint32_t chLen = 0;
910 uint32_t exOffset = 0; uint32_t exLen = 0;
911 uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
912 /* uint32_t rExpsLen = 0; */
913 uint32_t firstLen = 0, secondLen = 0;
914 UBool varT = FALSE; UBool top_ = TRUE;
915 uint16_t specs = 0;
916 UBool startOfRules = TRUE;
917 UColTokenParser src;
918 UColOptionSet opts;
919
920 UChar first[256];
921 UChar second[256];
922 UChar *rulesCopy = NULL;
923
924 uint32_t UCAdiff = 0;
925 uint32_t Windiff = 1;
926 UParseError parseError;
927
928 src.opts = &opts;
929
930 rules = ucol_getRules(coll, &ruleLen);
931
932 /*printOutRules(rules);*/
933
934 if(U_SUCCESS(*status) && ruleLen > 0) {
935 rulesCopy = (UChar *)malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
936 uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
937 src.current = src.source = rulesCopy;
938 src.end = rulesCopy+ruleLen;
939 src.extraCurrent = src.end;
940 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
941 *first = *second = 0;
942
943 while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) {
944 strength = src.parsedToken.strength;
945 chOffset = src.parsedToken.charsOffset;
946 chLen = src.parsedToken.charsLen;
947 exOffset = src.parsedToken.extensionOffset;
948 exLen = src.parsedToken.extensionLen;
949 prefixOffset = src.parsedToken.prefixOffset;
950 prefixLen = src.parsedToken.prefixLen;
951 specs = src.parsedToken.flags;
952
953 startOfRules = FALSE;
954 varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
955 top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
956
957 u_strncpy(second,rulesCopy+chOffset, chLen);
958 second[chLen] = 0;
959 secondLen = chLen;
960
961 if(exLen > 0) {
962 u_strncat(first, rulesCopy+exOffset, exLen);
963 first[firstLen+exLen] = 0;
964 firstLen += exLen;
965 }
966
967 if(strength != UCOL_TOK_RESET) {
968 if((*first<0x3400 || *first>=0xa000) && (*second<0x3400 || *second>=0xa000)) {
969 UCAdiff += testSwitch(&ucaTest, (void *)UCA, 0, strength, first, second, refName, error);
970 /*Windiff += testSwitch(&winTest, (void *)lcid, 0, strength, first, second, "Win32");*/
971 }
972 }
973
974
975 firstLen = chLen;
976 u_strcpy(first, second);
977
978 }
979 if(UCAdiff != 0 && Windiff != 0) {
980 log_verbose("\n");
981 }
982 if(UCAdiff == 0) {
983 log_verbose("No immediate difference with %s!\n", refName);
984 }
985 if(Windiff == 0) {
986 log_verbose("No immediate difference with Win32!\n");
987 }
988 free(rulesCopy);
989 }
990 }
991
992 /*
993 * Takes two CEs (lead and continuation) and
994 * compares them as CEs should be compared:
995 * primary vs. primary, secondary vs. secondary
996 * tertiary vs. tertiary
997 */
998 static int32_t compareCEs(uint32_t s1, uint32_t s2,
999 uint32_t t1, uint32_t t2) {
1000 uint32_t s = 0, t = 0;
1001 if(s1 == t1 && s2 == t2) {
1002 return 0;
1003 }
1004 s = (s1 & 0xFFFF0000)|((s2 & 0xFFFF0000)>>16);
1005 t = (t1 & 0xFFFF0000)|((t2 & 0xFFFF0000)>>16);
1006 if(s < t) {
1007 return -1;
1008 } else if(s > t) {
1009 return 1;
1010 } else {
1011 s = (s1 & 0x0000FF00) | (s2 & 0x0000FF00)>>8;
1012 t = (t1 & 0x0000FF00) | (t2 & 0x0000FF00)>>8;
1013 if(s < t) {
1014 return -1;
1015 } else if(s > t) {
1016 return 1;
1017 } else {
1018 s = (s1 & 0x000000FF)<<8 | (s2 & 0x000000FF);
1019 t = (t1 & 0x000000FF)<<8 | (t2 & 0x000000FF);
1020 if(s < t) {
1021 return -1;
1022 } else {
1023 return 1;
1024 }
1025 }
1026 }
1027 }
1028
1029 typedef struct {
1030 uint32_t startCE;
1031 uint32_t startContCE;
1032 uint32_t limitCE;
1033 uint32_t limitContCE;
1034 } indirectBoundaries;
1035
1036 /* these values are used for finding CE values for indirect positioning. */
1037 /* Indirect positioning is a mechanism for allowing resets on symbolic */
1038 /* values. It only works for resets and you cannot tailor indirect names */
1039 /* An indirect name can define either an anchor point or a range. An */
1040 /* anchor point behaves in exactly the same way as a code point in reset */
1041 /* would, except that it cannot be tailored. A range (we currently only */
1042 /* know for the [top] range will explicitly set the upper bound for */
1043 /* generated CEs, thus allowing for better control over how many CEs can */
1044 /* be squeezed between in the range without performance penalty. */
1045 /* In that respect, we use [top] for tailoring of locales that use CJK */
1046 /* characters. Other indirect values are currently a pure convenience, */
1047 /* they can be used to assure that the CEs will be always positioned in */
1048 /* the same place relative to a point with known properties (e.g. first */
1049 /* primary ignorable). */
1050 static indirectBoundaries ucolIndirectBoundaries[15];
1051 static UBool indirectBoundariesSet = FALSE;
1052 static void setIndirectBoundaries(uint32_t indexR, uint32_t *start, uint32_t *end) {
1053
1054 /* Set values for the top - TODO: once we have values for all the indirects, we are going */
1055 /* to initalize here. */
1056 ucolIndirectBoundaries[indexR].startCE = start[0];
1057 ucolIndirectBoundaries[indexR].startContCE = start[1];
1058 if(end) {
1059 ucolIndirectBoundaries[indexR].limitCE = end[0];
1060 ucolIndirectBoundaries[indexR].limitContCE = end[1];
1061 } else {
1062 ucolIndirectBoundaries[indexR].limitCE = 0;
1063 ucolIndirectBoundaries[indexR].limitContCE = 0;
1064 }
1065 }
1066
1067 static void testCEs(UCollator *coll, UErrorCode *status) {
1068
1069 const UChar *rules = NULL, *current = NULL;
1070 int32_t ruleLen = 0;
1071
1072 uint32_t strength = 0;
1073 uint32_t maxStrength = UCOL_IDENTICAL;
1074 uint32_t baseCE, baseContCE, nextCE, nextContCE, currCE, currContCE;
1075 uint32_t lastCE;
1076 uint32_t lastContCE;
1077
1078 int32_t result = 0;
1079 uint32_t chOffset = 0; uint32_t chLen = 0;
1080 uint32_t exOffset = 0; uint32_t exLen = 0;
1081 uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
1082 uint32_t oldOffset = 0;
1083
1084 /* uint32_t rExpsLen = 0; */
1085 /* uint32_t firstLen = 0; */
1086 uint16_t specs = 0;
1087 UBool varT = FALSE; UBool top_ = TRUE;
1088 UBool startOfRules = TRUE;
1089 UBool before = FALSE;
1090 UColTokenParser src;
1091 UColOptionSet opts;
1092 UParseError parseError;
1093 UChar *rulesCopy = NULL;
1094 collIterate c;
1095 UCollator *UCA = ucol_open("root", status);
1096 UCAConstants *consts = (UCAConstants *)((uint8_t *)UCA->image + UCA->image->UCAConsts);
1097 uint32_t UCOL_RESET_TOP_VALUE = consts->UCA_LAST_NON_VARIABLE[0], /*UCOL_RESET_TOP_CONT = consts->UCA_LAST_NON_VARIABLE[1], */
1098 UCOL_NEXT_TOP_VALUE = consts->UCA_FIRST_IMPLICIT[0], UCOL_NEXT_TOP_CONT = consts->UCA_FIRST_IMPLICIT[1];
1099
1100 baseCE=baseContCE=nextCE=nextContCE=currCE=currContCE=lastCE=lastContCE = UCOL_NOT_FOUND;
1101
1102 src.opts = &opts;
1103
1104 rules = ucol_getRules(coll, &ruleLen);
1105
1106 src.invUCA = ucol_initInverseUCA(status);
1107
1108 if(indirectBoundariesSet == FALSE) {
1109 /* UCOL_RESET_TOP_VALUE */
1110 setIndirectBoundaries(0, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
1111 /* UCOL_FIRST_PRIMARY_IGNORABLE */
1112 setIndirectBoundaries(1, consts->UCA_FIRST_PRIMARY_IGNORABLE, 0);
1113 /* UCOL_LAST_PRIMARY_IGNORABLE */
1114 setIndirectBoundaries(2, consts->UCA_LAST_PRIMARY_IGNORABLE, 0);
1115 /* UCOL_FIRST_SECONDARY_IGNORABLE */
1116 setIndirectBoundaries(3, consts->UCA_FIRST_SECONDARY_IGNORABLE, 0);
1117 /* UCOL_LAST_SECONDARY_IGNORABLE */
1118 setIndirectBoundaries(4, consts->UCA_LAST_SECONDARY_IGNORABLE, 0);
1119 /* UCOL_FIRST_TERTIARY_IGNORABLE */
1120 setIndirectBoundaries(5, consts->UCA_FIRST_TERTIARY_IGNORABLE, 0);
1121 /* UCOL_LAST_TERTIARY_IGNORABLE */
1122 setIndirectBoundaries(6, consts->UCA_LAST_TERTIARY_IGNORABLE, 0);
1123 /* UCOL_FIRST_VARIABLE */
1124 setIndirectBoundaries(7, consts->UCA_FIRST_VARIABLE, 0);
1125 /* UCOL_LAST_VARIABLE */
1126 setIndirectBoundaries(8, consts->UCA_LAST_VARIABLE, 0);
1127 /* UCOL_FIRST_NON_VARIABLE */
1128 setIndirectBoundaries(9, consts->UCA_FIRST_NON_VARIABLE, 0);
1129 /* UCOL_LAST_NON_VARIABLE */
1130 setIndirectBoundaries(10, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
1131 /* UCOL_FIRST_IMPLICIT */
1132 setIndirectBoundaries(11, consts->UCA_FIRST_IMPLICIT, 0);
1133 /* UCOL_LAST_IMPLICIT */
1134 setIndirectBoundaries(12, consts->UCA_LAST_IMPLICIT, consts->UCA_FIRST_TRAILING);
1135 /* UCOL_FIRST_TRAILING */
1136 setIndirectBoundaries(13, consts->UCA_FIRST_TRAILING, 0);
1137 /* UCOL_LAST_TRAILING */
1138 setIndirectBoundaries(14, consts->UCA_LAST_TRAILING, 0);
1139 ucolIndirectBoundaries[14].limitCE = (consts->UCA_PRIMARY_SPECIAL_MIN<<24);
1140 indirectBoundariesSet = TRUE;
1141 }
1142
1143
1144 if(U_SUCCESS(*status) && ruleLen > 0) {
1145 rulesCopy = (UChar *)malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
1146 uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
1147 src.current = src.source = rulesCopy;
1148 src.end = rulesCopy+ruleLen;
1149 src.extraCurrent = src.end;
1150 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
1151
1152 while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) {
1153 strength = src.parsedToken.strength;
1154 chOffset = src.parsedToken.charsOffset;
1155 chLen = src.parsedToken.charsLen;
1156 exOffset = src.parsedToken.extensionOffset;
1157 exLen = src.parsedToken.extensionLen;
1158 prefixOffset = src.parsedToken.prefixOffset;
1159 prefixLen = src.parsedToken.prefixLen;
1160 specs = src.parsedToken.flags;
1161
1162 startOfRules = FALSE;
1163 varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
1164 top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
1165
1166 uprv_init_collIterate(coll, rulesCopy+chOffset, chLen, &c);
1167
1168 currCE = ucol_getNextCE(coll, &c, status);
1169 if(currCE == 0 && UCOL_ISTHAIPREVOWEL(*(rulesCopy+chOffset))) {
1170 log_verbose("Thai prevowel detected. Will pick next CE\n");
1171 currCE = ucol_getNextCE(coll, &c, status);
1172 }
1173
1174 currContCE = ucol_getNextCE(coll, &c, status);
1175 if(!isContinuation(currContCE)) {
1176 currContCE = 0;
1177 }
1178
1179 /* we need to repack CEs here */
1180
1181 if(strength == UCOL_TOK_RESET) {
1182 before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);
1183 if(top_ == TRUE) {
1184 int32_t index = src.parsedToken.indirectIndex;
1185
1186 nextCE = baseCE = currCE = ucolIndirectBoundaries[index].startCE;
1187 nextContCE = baseContCE = currContCE = ucolIndirectBoundaries[index].startContCE;
1188 } else {
1189 nextCE = baseCE = currCE;
1190 nextContCE = baseContCE = currContCE;
1191 }
1192 maxStrength = UCOL_IDENTICAL;
1193 } else {
1194 if(strength < maxStrength) {
1195 maxStrength = strength;
1196 if(baseCE == UCOL_RESET_TOP_VALUE) {
1197 log_verbose("Resetting to [top]\n");
1198 nextCE = UCOL_NEXT_TOP_VALUE;
1199 nextContCE = UCOL_NEXT_TOP_CONT;
1200 } else {
1201 result = ucol_inv_getNextCE(&src, baseCE & 0xFFFFFF3F, baseContCE, &nextCE, &nextContCE, maxStrength);
1202 }
1203 if(result < 0) {
1204 if(ucol_isTailored(coll, *(rulesCopy+oldOffset), status)) {
1205 log_verbose("Reset is tailored codepoint %04X, don't know how to continue, taking next test\n", *(rulesCopy+oldOffset));
1206 return;
1207 } else {
1208 log_err("couldn't find the CE\n");
1209 return;
1210 }
1211 }
1212 }
1213
1214 currCE &= 0xFFFFFF3F;
1215 currContCE &= 0xFFFFFFBF;
1216
1217 if(maxStrength == UCOL_IDENTICAL) {
1218 if(baseCE != currCE || baseContCE != currContCE) {
1219 log_err("current CE (initial strength UCOL_EQUAL)\n");
1220 }
1221 } else {
1222 if(strength == UCOL_IDENTICAL) {
1223 if(lastCE != currCE || lastContCE != currContCE) {
1224 log_err("current CE (initial strength UCOL_EQUAL)\n");
1225 }
1226 } else {
1227 if(compareCEs(currCE, currContCE, nextCE, nextContCE) > 0) {
1228 /*if(currCE > nextCE || (currCE == nextCE && currContCE >= nextContCE)) {*/
1229 log_err("current CE is not less than base CE\n");
1230 }
1231 if(!before) {
1232 if(compareCEs(currCE, currContCE, lastCE, lastContCE) < 0) {
1233 /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
1234 log_err("sequence of generated CEs is broken\n");
1235 }
1236 } else {
1237 before = FALSE;
1238 if(compareCEs(currCE, currContCE, lastCE, lastContCE) > 0) {
1239 /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
1240 log_err("sequence of generated CEs is broken\n");
1241 }
1242 }
1243 }
1244 }
1245
1246 }
1247
1248 oldOffset = chOffset;
1249 lastCE = currCE & 0xFFFFFF3F;
1250 lastContCE = currContCE & 0xFFFFFFBF;
1251 }
1252 free(rulesCopy);
1253 }
1254 ucol_close(UCA);
1255 }
1256
1257 #if 0
1258 /* these locales are now picked from index RB */
1259 static const char* localesToTest[] = {
1260 "ar", "bg", "ca", "cs", "da",
1261 "el", "en_BE", "en_US_POSIX",
1262 "es", "et", "fi", "fr", "hi",
1263 "hr", "hu", "is", "iw", "ja",
1264 "ko", "lt", "lv", "mk", "mt",
1265 "nb", "nn", "nn_NO", "pl", "ro",
1266 "ru", "sh", "sk", "sl", "sq",
1267 "sr", "sv", "th", "tr", "uk",
1268 "vi", "zh", "zh_TW"
1269 };
1270 #endif
1271
1272 static const char* rulesToTest[] = {
1273 /* Funky fa rule */
1274 "&\\u0622 < \\u0627 << \\u0671 < \\u0621",
1275 /*"& Z < p, P",*/
1276 /* Cui Mins rules */
1277 "&[top]<o,O<p,P<q,Q<'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu<'?'",*/
1278 "&[top]<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
1279 "&[top]<o,O<p,P<q,Q,'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U&'Qu','?'",*/
1280 "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
1281 "&[top]<'?';Qu<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qu",*/
1282 "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/um<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qum;'?'",*/
1283 "&[top]<'?';Qum<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U" /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qum"*/
1284 };
1285
1286
1287 static void TestCollations(void) {
1288 int32_t noOfLoc = uloc_countAvailable();
1289 int32_t i = 0, j = 0;
1290
1291 UErrorCode status = U_ZERO_ERROR;
1292 char cName[256];
1293 UChar name[256];
1294 int32_t nameSize;
1295
1296
1297 const char *locName = NULL;
1298 UCollator *coll = NULL;
1299 UCollator *UCA = ucol_open("", &status);
1300 UColAttributeValue oldStrength = ucol_getAttribute(UCA, UCOL_STRENGTH, &status);
1301 ucol_setAttribute(UCA, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
1302
1303 for(i = 0; i<noOfLoc; i++) {
1304 status = U_ZERO_ERROR;
1305 locName = uloc_getAvailable(i);
1306 if(uprv_strcmp("ja", locName) == 0) {
1307 log_verbose("Don't know how to test prefixes\n");
1308 continue;
1309 }
1310 if(hasCollationElements(locName)) {
1311 nameSize = uloc_getDisplayName(locName, NULL, name, 256, &status);
1312 for(j = 0; j<nameSize; j++) {
1313 cName[j] = (char)name[j];
1314 }
1315 cName[nameSize] = 0;
1316 log_verbose("\nTesting locale %s (%s)\n", locName, cName);
1317 coll = ucol_open(locName, &status);
1318 if(U_SUCCESS(status)) {
1319 testAgainstUCA(coll, UCA, "UCA", FALSE, &status);
1320 ucol_close(coll);
1321 } else {
1322 log_err("Couldn't instantiate collator for locale %s, error: %s\n", locName, u_errorName(status));
1323 status = U_ZERO_ERROR;
1324 }
1325 }
1326 }
1327 ucol_setAttribute(UCA, UCOL_STRENGTH, oldStrength, &status);
1328 ucol_close(UCA);
1329 }
1330
1331 static void RamsRulesTest(void) {
1332 UErrorCode status = U_ZERO_ERROR;
1333 int32_t i = 0;
1334 UCollator *coll = NULL;
1335 UChar rule[2048];
1336 uint32_t ruleLen;
1337 int32_t noOfLoc = uloc_countAvailable();
1338 const char *locName = NULL;
1339
1340 log_verbose("RamsRulesTest\n");
1341
1342 for(i = 0; i<noOfLoc; i++) {
1343 status = U_ZERO_ERROR;
1344 locName = uloc_getAvailable(i);
1345 if(hasCollationElements(locName)) {
1346 if (uprv_strcmp("ja", locName)==0) {
1347 log_verbose("Don't know how to test Japanese because of prefixes\n");
1348 continue;
1349 }
1350 if (uprv_strcmp("de__PHONEBOOK", locName)==0) {
1351 log_verbose("Don't know how to test Phonebook because the reset is on an expanding character\n");
1352 continue;
1353 }
1354 log_verbose("Testing locale %s\n", locName);
1355 coll = ucol_open(locName, &status);
1356 if(U_SUCCESS(status)) {
1357 if(coll->image->jamoSpecial == TRUE) {
1358 log_err("%s has special JAMOs\n", locName);
1359 }
1360 ucol_setAttribute(coll, UCOL_CASE_FIRST, UCOL_OFF, &status);
1361 testCollator(coll, &status);
1362 testCEs(coll, &status);
1363 ucol_close(coll);
1364 }
1365 }
1366 }
1367
1368 for(i = 0; i<sizeof(rulesToTest)/sizeof(rulesToTest[0]); i++) {
1369 log_verbose("Testing rule: %s\n", rulesToTest[i]);
1370 ruleLen = u_unescape(rulesToTest[i], rule, 2048);
1371 coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
1372 if(U_SUCCESS(status)) {
1373 testCollator(coll, &status);
1374 testCEs(coll, &status);
1375 ucol_close(coll);
1376 }
1377 }
1378
1379 }
1380
1381 static void IsTailoredTest(void) {
1382 UErrorCode status = U_ZERO_ERROR;
1383 uint32_t i = 0;
1384 UCollator *coll = NULL;
1385 UChar rule[2048];
1386 UChar tailored[2048];
1387 UChar notTailored[2048];
1388 uint32_t ruleLen, tailoredLen, notTailoredLen;
1389
1390 log_verbose("IsTailoredTest\n");
1391
1392 u_uastrcpy(rule, "&Z < A, B, C;c < d");
1393 ruleLen = u_strlen(rule);
1394
1395 u_uastrcpy(tailored, "ABCcd");
1396 tailoredLen = u_strlen(tailored);
1397
1398 u_uastrcpy(notTailored, "ZabD");
1399 notTailoredLen = u_strlen(notTailored);
1400
1401 coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
1402 if(U_SUCCESS(status)) {
1403 for(i = 0; i<tailoredLen; i++) {
1404 if(!ucol_isTailored(coll, tailored[i], &status)) {
1405 log_err("%i: %04X should be tailored - it is reported as not\n", i, tailored[i]);
1406 }
1407 }
1408 for(i = 0; i<notTailoredLen; i++) {
1409 if(ucol_isTailored(coll, notTailored[i], &status)) {
1410 log_err("%i: %04X should not be tailored - it is reported as it is\n", i, notTailored[i]);
1411 }
1412 }
1413 ucol_close(coll);
1414 }
1415 }
1416
1417
1418 const static char chTest[][20] = {
1419 "c",
1420 "C",
1421 "ca", "cb", "cx", "cy", "CZ",
1422 "c\\u030C", "C\\u030C",
1423 "h",
1424 "H",
1425 "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY",
1426 "ch", "cH", "Ch", "CH",
1427 "cha", "charly", "che", "chh", "chch", "chr",
1428 "i", "I", "iarly",
1429 "r", "R",
1430 "r\\u030C", "R\\u030C",
1431 "s",
1432 "S",
1433 "s\\u030C", "S\\u030C",
1434 "z", "Z",
1435 "z\\u030C", "Z\\u030C"
1436 };
1437
1438 static void TestChMove(void) {
1439 UChar t1[256] = {0};
1440 UChar t2[256] = {0};
1441
1442 uint32_t i = 0, j = 0;
1443 uint32_t size = 0;
1444 UErrorCode status = U_ZERO_ERROR;
1445
1446 UCollator *coll = ucol_open("cs", &status);
1447
1448 if(U_SUCCESS(status)) {
1449 size = sizeof(chTest)/sizeof(chTest[0]);
1450 for(i = 0; i < size-1; i++) {
1451 for(j = i+1; j < size; j++) {
1452 u_unescape(chTest[i], t1, 256);
1453 u_unescape(chTest[j], t2, 256);
1454 doTest(coll, t1, t2, UCOL_LESS);
1455 }
1456 }
1457 }
1458 else {
1459 log_err("Can't open collator");
1460 }
1461 ucol_close(coll);
1462 }
1463
1464
1465
1466
1467 const static char impTest[][20] = {
1468 "\\u4e00",
1469 "a",
1470 "A",
1471 "b",
1472 "B",
1473 "\\u4e01"
1474 };
1475
1476
1477 static void TestImplicitTailoring(void) {
1478 static struct {
1479 const char *rules;
1480 const char *data[50];
1481 const uint32_t len;
1482 } tests[] = {
1483 { "&[before 1]\\u4e00 < b < c &[before 1]\\u4e00 < d < e", { "d", "e", "b", "c", "\\u4e00"}, 5 },
1484 { "&\\u4e00 < a <<< A < b <<< B", { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 },
1485 { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3},
1486 { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3}
1487 };
1488
1489 int32_t i = 0;
1490
1491 for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
1492 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
1493 }
1494
1495 /*
1496 UChar t1[256] = {0};
1497 UChar t2[256] = {0};
1498
1499 const char *rule = "&\\u4e00 < a <<< A < b <<< B";
1500
1501 uint32_t i = 0, j = 0;
1502 uint32_t size = 0;
1503 uint32_t ruleLen = 0;
1504 UErrorCode status = U_ZERO_ERROR;
1505 UCollator *coll = NULL;
1506 ruleLen = u_unescape(rule, t1, 256);
1507
1508 coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
1509
1510 if(U_SUCCESS(status)) {
1511 size = sizeof(impTest)/sizeof(impTest[0]);
1512 for(i = 0; i < size-1; i++) {
1513 for(j = i+1; j < size; j++) {
1514 u_unescape(impTest[i], t1, 256);
1515 u_unescape(impTest[j], t2, 256);
1516 doTest(coll, t1, t2, UCOL_LESS);
1517 }
1518 }
1519 }
1520 else {
1521 log_err("Can't open collator");
1522 }
1523 ucol_close(coll);
1524 */
1525 }
1526
1527 static void TestFCDProblem(void) {
1528 UChar t1[256] = {0};
1529 UChar t2[256] = {0};
1530
1531 const char *s1 = "\\u0430\\u0306\\u0325";
1532 const char *s2 = "\\u04D1\\u0325";
1533
1534 UErrorCode status = U_ZERO_ERROR;
1535 UCollator *coll = ucol_open("", &status);
1536 u_unescape(s1, t1, 256);
1537 u_unescape(s2, t2, 256);
1538
1539 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
1540 doTest(coll, t1, t2, UCOL_EQUAL);
1541
1542 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
1543 doTest(coll, t1, t2, UCOL_EQUAL);
1544
1545 ucol_close(coll);
1546 }
1547
1548 #define NORM_BUFFER_TEST_LEN 32
1549 typedef struct {
1550 UChar32 u;
1551 UChar NFC[NORM_BUFFER_TEST_LEN];
1552 UChar NFD[NORM_BUFFER_TEST_LEN];
1553 } tester;
1554
1555 static void TestComposeDecompose(void) {
1556 int32_t noOfLoc;
1557 int32_t i = 0, j = 0;
1558
1559 UErrorCode status = U_ZERO_ERROR;
1560
1561 const char *locName = NULL;
1562
1563 uint32_t nfcSize;
1564 uint32_t nfdSize;
1565 tester **t;
1566 uint32_t noCases = 0;
1567 UCollator *coll = NULL;
1568 UChar32 u = 0;
1569 UChar comp[NORM_BUFFER_TEST_LEN];
1570 uint32_t len = 0;
1571 UCollationElements *iter;
1572
1573 noOfLoc = uloc_countAvailable();
1574
1575 t = malloc(0x30000 * sizeof(tester *));
1576 t[0] = (tester *)malloc(sizeof(tester));
1577 log_verbose("Testing UCA extensively\n");
1578 coll = ucol_open("", &status);
1579 if(status == U_FILE_ACCESS_ERROR) {
1580 log_data_err("Is your data around?\n");
1581 return;
1582 } else if(U_FAILURE(status)) {
1583 log_err("Error opening collator\n");
1584 return;
1585 }
1586
1587
1588 for(u = 0; u < 0x30000; u++) {
1589 len = 0;
1590 UTF_APPEND_CHAR_UNSAFE(comp, len, u);
1591 nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
1592 nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
1593
1594 if(nfcSize != nfdSize || (uprv_memcmp(t[noCases]->NFC, t[noCases]->NFD, nfcSize * sizeof(UChar)) != 0)
1595 || (len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0))) {
1596 t[noCases]->u = u;
1597 if(len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0)) {
1598 u_strncpy(t[noCases]->NFC, comp, len);
1599 t[noCases]->NFC[len] = 0;
1600 }
1601 noCases++;
1602 t[noCases] = (tester *)malloc(sizeof(tester));
1603 uprv_memset(t[noCases], 0, sizeof(tester));
1604 }
1605 }
1606
1607 for(u=0; u<(UChar32)noCases; u++) {
1608 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
1609 log_err("Failure: codePoint %05X fails TestComposeDecompose in the UCA\n", t[u]->u);
1610 doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
1611 }
1612 }
1613 /*
1614 for(u = 0; u < 0x30000; u++) {
1615 if(!(u&0xFFFF)) {
1616 log_verbose("%08X ", u);
1617 }
1618 uprv_memset(t[noCases], 0, sizeof(tester));
1619 t[noCases]->u = u;
1620 len = 0;
1621 UTF_APPEND_CHAR_UNSAFE(comp, len, u);
1622 comp[len] = 0;
1623 nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
1624 nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
1625 doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL);
1626 doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL);
1627 }
1628 */
1629
1630 ucol_close(coll);
1631
1632 log_verbose("Testing locales, number of cases = %i\n", noCases);
1633 for(i = 0; i<noOfLoc; i++) {
1634 status = U_ZERO_ERROR;
1635 locName = uloc_getAvailable(i);
1636 if(hasCollationElements(locName)) {
1637 char cName[256];
1638 UChar name[256];
1639 int32_t nameSize = uloc_getDisplayName(locName, NULL, name, sizeof(cName), &status);
1640
1641 for(j = 0; j<nameSize; j++) {
1642 cName[j] = (char)name[j];
1643 }
1644 cName[nameSize] = 0;
1645 log_verbose("\nTesting locale %s (%s)\n", locName, cName);
1646
1647 coll = ucol_open(locName, &status);
1648 ucol_setStrength(coll, UCOL_IDENTICAL);
1649 iter = ucol_openElements(coll, t[u]->NFD, u_strlen(t[u]->NFD), &status);
1650
1651 for(u=0; u<(UChar32)noCases; u++) {
1652 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
1653 log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t[u]->u, cName);
1654 doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
1655 log_verbose("Testing NFC\n");
1656 ucol_setText(iter, t[u]->NFC, u_strlen(t[u]->NFC), &status);
1657 backAndForth(iter);
1658 log_verbose("Testing NFD\n");
1659 ucol_setText(iter, t[u]->NFD, u_strlen(t[u]->NFD), &status);
1660 backAndForth(iter);
1661 }
1662 }
1663 ucol_closeElements(iter);
1664 ucol_close(coll);
1665 }
1666 }
1667 for(u = 0; u <= (UChar32)noCases; u++) {
1668 free(t[u]);
1669 }
1670 free(t);
1671 }
1672
1673 static void TestEmptyRule(void) {
1674 UErrorCode status = U_ZERO_ERROR;
1675 UChar rulez[] = { 0 };
1676 UCollator *coll = ucol_openRules(rulez, 0, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
1677
1678 ucol_close(coll);
1679 }
1680
1681 static void TestUCARules(void) {
1682 UErrorCode status = U_ZERO_ERROR;
1683 UChar b[256];
1684 UChar *rules = b;
1685 uint32_t ruleLen = 0;
1686 UCollator *UCAfromRules = NULL;
1687 UCollator *coll = ucol_open("", &status);
1688 if(status == U_FILE_ACCESS_ERROR) {
1689 log_data_err("Is your data around?\n");
1690 return;
1691 } else if(U_FAILURE(status)) {
1692 log_err("Error opening collator\n");
1693 return;
1694 }
1695 ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, 256);
1696
1697 log_verbose("TestUCARules\n");
1698 if(ruleLen > 256) {
1699 rules = (UChar *)malloc((ruleLen+1)*sizeof(UChar));
1700 ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, ruleLen);
1701 }
1702 log_verbose("Rules length is %d\n", ruleLen);
1703 UCAfromRules = ucol_openRules(rules, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
1704 if(U_SUCCESS(status)) {
1705 ucol_close(UCAfromRules);
1706 } else {
1707 log_verbose("Unable to create a collator from UCARules!\n");
1708 }
1709 /*
1710 u_unescape(blah, b, 256);
1711 ucol_getSortKey(coll, b, 1, res, 256);
1712 */
1713 ucol_close(coll);
1714 if(rules != b) {
1715 free(rules);
1716 }
1717 }
1718
1719
1720 /* Pinyin tonal order */
1721 /*
1722 A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0)
1723 (w/macron)< (w/acute)< (w/caron)< (w/grave)
1724 E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8)
1725 I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec)
1726 O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2)
1727 U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9)
1728 < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) <
1729 .. (\u00fc)
1730
1731 However, in testing we got the following order:
1732 A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101)
1733 (w/acute)< (w/grave)< (w/caron)< (w/macron)
1734 E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) <
1735 .. (\u0113)
1736 I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b)
1737 O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d)
1738 U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) <
1739 .. (\u01d8)
1740 < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b)
1741 */
1742
1743 static void TestBefore(void) {
1744 const static char *data[] = {
1745 "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A",
1746 "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E",
1747 "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I",
1748 "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O",
1749 "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U",
1750 "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc"
1751 };
1752 genericRulesStarter(
1753 "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0"
1754 "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8"
1755 "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec"
1756 "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2"
1757 "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9"
1758 "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc",
1759 data, sizeof(data)/sizeof(data[0]));
1760 }
1761
1762 #if 0
1763 /* superceded by TestBeforePinyin */
1764 static void TestJ784(void) {
1765 const static char *data[] = {
1766 "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0",
1767 "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8",
1768 "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec",
1769 "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2",
1770 "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9",
1771 "\\u00fc",
1772 "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc"
1773 };
1774 genericLocaleStarter("zh", data, sizeof(data)/sizeof(data[0]));
1775 }
1776 #endif
1777
1778 #if 0
1779 /* superceded by the changes to the lv locale */
1780 static void TestJ831(void) {
1781 const static char *data[] = {
1782 "I",
1783 "i",
1784 "Y",
1785 "y"
1786 };
1787 genericLocaleStarter("lv", data, sizeof(data)/sizeof(data[0]));
1788 }
1789 #endif
1790
1791 static void TestJ815(void) {
1792 const static char *data[] = {
1793 "aa",
1794 "Aa",
1795 "ab",
1796 "Ab",
1797 "ad",
1798 "Ad",
1799 "ae",
1800 "Ae",
1801 "\\u00e6",
1802 "\\u00c6",
1803 "af",
1804 "Af",
1805 "b",
1806 "B"
1807 };
1808 genericLocaleStarter("fr", data, sizeof(data)/sizeof(data[0]));
1809 genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, sizeof(data)/sizeof(data[0]));
1810 }
1811
1812
1813 /*
1814 "& a < b < c < d& r < c", "& a < b < d& r < c",
1815 "& a < b < c < d& c < m", "& a < b < c < m < d",
1816 "& a < b < c < d& a < m", "& a < m < b < c < d",
1817 "& a <<< b << c < d& a < m", "& a <<< b << c < m < d",
1818 "& a < b < c < d& [before 1] c < m", "& a < b < m < c < d",
1819 "& a < b <<< c << d <<< e& [before 3] e <<< x", "& a < b <<< c << d <<< x <<< e",
1820 "& a < b <<< c << d <<< e& [before 2] e <<< x", "& a < b <<< c <<< x << d <<< e",
1821 "& a < b <<< c << d <<< e& [before 1] e <<< x", "& a <<< x < b <<< c << d <<< e",
1822 "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x", "& a < b <<< c << d <<< e <<< f < x < g",
1823 */
1824 static void TestRedundantRules(void) {
1825 int32_t i;
1826
1827 struct {
1828 const char *rules;
1829 const char *expectedRules;
1830 const char *testdata[8];
1831 uint32_t testdatalen;
1832 } tests[] = {
1833 /* this test conflicts with positioning of CODAN placeholder */
1834 /*{
1835 "& a <<< b <<< c << d <<< e& [before 1] e <<< x",
1836 "&\\u2089<<<x",
1837 {"\\u2089", "x"}, 2
1838 }, */
1839 /* this test conflicts with the [before x] syntax tightening */
1840 /*{
1841 "& b <<< c <<< d << e <<< f& [before 1] f <<< x",
1842 "&\\u0252<<<x",
1843 {"\\u0252", "x"}, 2
1844 }, */
1845 /* this test conflicts with the [before x] syntax tightening */
1846 /*{
1847 "& a < b <<< c << d <<< e& [before 1] e <<< x",
1848 "& a <<< x < b <<< c << d <<< e",
1849 {"a", "x", "b", "c", "d", "e"}, 6
1850 }, */
1851 {
1852 "& a < b < c < d& [before 1] c < m",
1853 "& a < b < m < c < d",
1854 {"a", "b", "m", "c", "d"}, 5
1855 },
1856 {
1857 "& a < b <<< c << d <<< e& [before 3] e <<< x",
1858 "& a < b <<< c << d <<< x <<< e",
1859 {"a", "b", "c", "d", "x", "e"}, 6
1860 },
1861 /* this test conflicts with the [before x] syntax tightening */
1862 /* {
1863 "& a < b <<< c << d <<< e& [before 2] e <<< x",
1864 "& a < b <<< c <<< x << d <<< e",
1865 {"a", "b", "c", "x", "d", "e"},, 6
1866 }, */
1867 {
1868 "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x",
1869 "& a < b <<< c << d <<< e <<< f < x < g",
1870 {"a", "b", "c", "d", "e", "f", "x", "g"}, 8
1871 },
1872 {
1873 "& a <<< b << c < d& a < m",
1874 "& a <<< b << c < m < d",
1875 {"a", "b", "c", "m", "d"}, 5
1876 },
1877 {
1878 "&a<b<<b\\u0301 &z<b",
1879 "&a<b\\u0301 &z<b",
1880 {"a", "b\\u0301", "z", "b"}, 4
1881 },
1882 {
1883 "&z<m<<<q<<<m",
1884 "&z<q<<<m",
1885 {"z", "q", "m"},3
1886 },
1887 {
1888 "&z<<<m<q<<<m",
1889 "&z<q<<<m",
1890 {"z", "q", "m"}, 3
1891 },
1892 {
1893 "& a < b < c < d& r < c",
1894 "& a < b < d& r < c",
1895 {"a", "b", "d"}, 3
1896 },
1897 {
1898 "& a < b < c < d& r < c",
1899 "& a < b < d& r < c",
1900 {"r", "c"}, 2
1901 },
1902 {
1903 "& a < b < c < d& c < m",
1904 "& a < b < c < m < d",
1905 {"a", "b", "c", "m", "d"}, 5
1906 },
1907 {
1908 "& a < b < c < d& a < m",
1909 "& a < m < b < c < d",
1910 {"a", "m", "b", "c", "d"}, 5
1911 }
1912 };
1913
1914
1915 UCollator *credundant = NULL;
1916 UCollator *cresulting = NULL;
1917 UErrorCode status = U_ZERO_ERROR;
1918 UChar rlz[2048] = { 0 };
1919 uint32_t rlen = 0;
1920
1921 for(i = 0; i<sizeof(tests)/sizeof(tests[0]); i++) {
1922 log_verbose("testing rule %s, expected to be %s\n", tests[i].rules, tests[i].expectedRules);
1923 rlen = u_unescape(tests[i].rules, rlz, 2048);
1924
1925 credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
1926 if(status == U_FILE_ACCESS_ERROR) {
1927 log_data_err("Is your data around?\n");
1928 return;
1929 } else if(U_FAILURE(status)) {
1930 log_err("Error opening collator\n");
1931 return;
1932 }
1933
1934 rlen = u_unescape(tests[i].expectedRules, rlz, 2048);
1935 cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
1936
1937 testAgainstUCA(cresulting, credundant, "expected", TRUE, &status);
1938
1939 ucol_close(credundant);
1940 ucol_close(cresulting);
1941
1942 log_verbose("testing using data\n");
1943
1944 genericRulesStarter(tests[i].rules, tests[i].testdata, tests[i].testdatalen);
1945 }
1946
1947 }
1948
1949 static void TestExpansionSyntax(void) {
1950 int32_t i;
1951
1952 const static char *rules[] = {
1953 "&AE <<< a << b <<< c &d <<< f",
1954 "&AE <<< a <<< b << c << d < e < f <<< g",
1955 "&AE <<< B <<< C / D <<< F"
1956 };
1957
1958 const static char *expectedRules[] = {
1959 "&A <<< a / E << b / E <<< c /E &d <<< f",
1960 "&A <<< a / E <<< b / E << c / E << d / E < e < f <<< g",
1961 "&A <<< B / E <<< C / ED <<< F / E"
1962 };
1963
1964 const static char *testdata[][8] = {
1965 {"AE", "a", "b", "c"},
1966 {"AE", "a", "b", "c", "d", "e", "f", "g"},
1967 {"AE", "B", "C"} /* / ED <<< F / E"},*/
1968 };
1969
1970 const static uint32_t testdatalen[] = {
1971 4,
1972 8,
1973 3
1974 };
1975
1976
1977
1978 UCollator *credundant = NULL;
1979 UCollator *cresulting = NULL;
1980 UErrorCode status = U_ZERO_ERROR;
1981 UChar rlz[2048] = { 0 };
1982 uint32_t rlen = 0;
1983
1984 for(i = 0; i<sizeof(rules)/sizeof(rules[0]); i++) {
1985 log_verbose("testing rule %s, expected to be %s\n", rules[i], expectedRules[i]);
1986 rlen = u_unescape(rules[i], rlz, 2048);
1987
1988 credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
1989 if(status == U_FILE_ACCESS_ERROR) {
1990 log_data_err("Is your data around?\n");
1991 return;
1992 } else if(U_FAILURE(status)) {
1993 log_err("Error opening collator\n");
1994 return;
1995 }
1996 rlen = u_unescape(expectedRules[i], rlz, 2048);
1997 cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
1998
1999 /* testAgainstUCA still doesn't handle expansions correctly, so this is not run */
2000 /* as a hard error test, but only in information mode */
2001 testAgainstUCA(cresulting, credundant, "expected", FALSE, &status);
2002
2003 ucol_close(credundant);
2004 ucol_close(cresulting);
2005
2006 log_verbose("testing using data\n");
2007
2008 genericRulesStarter(rules[i], testdata[i], testdatalen[i]);
2009 }
2010 }
2011
2012 static void TestCase(void)
2013 {
2014 const static UChar gRules[MAX_TOKEN_LEN] =
2015 /*" & 0 < 1,\u2461<a,A"*/
2016 { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x0041, 0x0000 };
2017
2018 const static UChar testCase[][MAX_TOKEN_LEN] =
2019 {
2020 /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000},
2021 /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000},
2022 /*2*/ {0x2460 /*circ'1'*/, 0x0061/*'a'*/, 0x0000},
2023 /*3*/ {0x2460 /*circ'1'*/, 0x0041/*'A'*/, 0x0000}
2024 };
2025
2026 const static UCollationResult caseTestResults[][9] =
2027 {
2028 { UCOL_LESS, UCOL_LESS, UCOL_LESS, 0, UCOL_LESS, UCOL_LESS, 0, 0, UCOL_LESS },
2029 { UCOL_GREATER, UCOL_LESS, UCOL_LESS, 0, UCOL_LESS, UCOL_LESS, 0, 0, UCOL_GREATER },
2030 { UCOL_LESS, UCOL_LESS, UCOL_LESS, 0, UCOL_GREATER, UCOL_LESS, 0, 0, UCOL_LESS },
2031 { UCOL_GREATER, UCOL_LESS, UCOL_GREATER, 0, UCOL_LESS, UCOL_LESS, 0, 0, UCOL_GREATER }
2032
2033 };
2034
2035 const static UColAttributeValue caseTestAttributes[][2] =
2036 {
2037 { UCOL_LOWER_FIRST, UCOL_OFF},
2038 { UCOL_UPPER_FIRST, UCOL_OFF},
2039 { UCOL_LOWER_FIRST, UCOL_ON},
2040 { UCOL_UPPER_FIRST, UCOL_ON}
2041
2042 };
2043 int32_t i,j,k;
2044 UErrorCode status = U_ZERO_ERROR;
2045 UCollationElements *iter;
2046 UCollator *myCollation;
2047 myCollation = ucol_open("en_US", &status);
2048
2049 if(U_FAILURE(status)){
2050 log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
2051 return;
2052 }
2053 log_verbose("Testing different case settings\n");
2054 ucol_setStrength(myCollation, UCOL_TERTIARY);
2055
2056 for(k = 0; k<4; k++) {
2057 ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
2058 ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
2059 log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes[k][0], caseTestAttributes[k][1]);
2060 for (i = 0; i < 3 ; i++) {
2061 for(j = i+1; j<4; j++) {
2062 doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
2063 }
2064 }
2065 }
2066 ucol_close(myCollation);
2067
2068 myCollation = ucol_openRules(gRules, u_strlen(gRules), UCOL_OFF, UCOL_TERTIARY,NULL, &status);
2069 if(U_FAILURE(status)){
2070 log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
2071 return;
2072 }
2073 log_verbose("Testing different case settings with custom rules\n");
2074 ucol_setStrength(myCollation, UCOL_TERTIARY);
2075
2076 for(k = 0; k<4; k++) {
2077 ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
2078 ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
2079 for (i = 0; i < 3 ; i++) {
2080 for(j = i+1; j<4; j++) {
2081 log_verbose("k:%d, i:%d, j:%d\n", k, i, j);
2082 doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
2083 iter=ucol_openElements(myCollation, testCase[i], u_strlen(testCase[i]), &status);
2084 backAndForth(iter);
2085 ucol_closeElements(iter);
2086 iter=ucol_openElements(myCollation, testCase[j], u_strlen(testCase[j]), &status);
2087 backAndForth(iter);
2088 ucol_closeElements(iter);
2089 }
2090 }
2091 }
2092 ucol_close(myCollation);
2093 {
2094 const static char *lowerFirst[] = {
2095 "h",
2096 "H",
2097 "ch",
2098 "Ch",
2099 "CH",
2100 "cha",
2101 "chA",
2102 "Cha",
2103 "ChA",
2104 "CHa",
2105 "CHA",
2106 "i",
2107 "I"
2108 };
2109
2110 const static char *upperFirst[] = {
2111 "H",
2112 "h",
2113 "CH",
2114 "Ch",
2115 "ch",
2116 "CHA",
2117 "CHa",
2118 "ChA",
2119 "Cha",
2120 "chA",
2121 "cha",
2122 "I",
2123 "i"
2124 };
2125 log_verbose("mixed case test\n");
2126 log_verbose("lower first, case level off\n");
2127 genericRulesStarter("[casefirst lower]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
2128 log_verbose("upper first, case level off\n");
2129 genericRulesStarter("[casefirst upper]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
2130 log_verbose("lower first, case level on\n");
2131 genericRulesStarter("[casefirst lower][caselevel on]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
2132 log_verbose("upper first, case level on\n");
2133 genericRulesStarter("[casefirst upper][caselevel on]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
2134 }
2135
2136 }
2137
2138 static void TestIncrementalNormalize(void) {
2139
2140 /*UChar baseA =0x61;*/
2141 UChar baseA =0x41;
2142 /* UChar baseB = 0x42;*/
2143 UChar ccMix[] = {0x316, 0x321, 0x300};
2144 /*UChar ccMix[] = {0x61, 0x61, 0x61};*/
2145 /*
2146 0x316 is combining grave accent below, cc=220
2147 0x321 is combining palatalized hook below, cc=202
2148 0x300 is combining grave accent, cc=230
2149 */
2150
2151 /*int maxSLen = 2000;*/
2152 int maxSLen = 64000;
2153 int sLen;
2154 int i;
2155
2156 UCollator *coll;
2157 UErrorCode status = U_ZERO_ERROR;
2158 UCollationResult result;
2159
2160 int32_t myQ = QUICK;
2161
2162 if(QUICK < 0) {
2163 QUICK = 1;
2164 }
2165
2166 {
2167 /* Test 1. Run very long unnormalized strings, to force overflow of*/
2168 /* most buffers along the way.*/
2169 UChar *strA;
2170 UChar *strB;
2171
2172 strA = malloc((maxSLen+1) * sizeof(UChar));
2173 strB = malloc((maxSLen+1) * sizeof(UChar));
2174
2175 coll = ucol_open("en_US", &status);
2176 if(status == U_FILE_ACCESS_ERROR) {
2177 log_data_err("Is your data around?\n");
2178 return;
2179 } else if(U_FAILURE(status)) {
2180 log_err("Error opening collator\n");
2181 return;
2182 }
2183 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
2184
2185 /*for (sLen = 257; sLen<maxSLen; sLen++) {*/
2186 /*for (sLen = 4; sLen<maxSLen; sLen++) {*/
2187 /*for (sLen = 1000; sLen<1001; sLen++) {*/
2188 for (sLen = 500; sLen<501; sLen++) {
2189 /*for (sLen = 40000; sLen<65000; sLen+=1000) {*/
2190 strA[0] = baseA;
2191 strB[0] = baseA;
2192 for (i=1; i<=sLen-1; i++) {
2193 strA[i] = ccMix[i % 3];
2194 strB[sLen-i] = ccMix[i % 3];
2195 }
2196 strA[sLen] = 0;
2197 strB[sLen] = 0;
2198
2199 ucol_setStrength(coll, UCOL_TERTIARY); /* Do test with default strength, which runs*/
2200 doTest(coll, strA, strB, UCOL_EQUAL); /* optimized functions in the impl*/
2201 ucol_setStrength(coll, UCOL_IDENTICAL); /* Do again with the slow, general impl.*/
2202 doTest(coll, strA, strB, UCOL_EQUAL);
2203 }
2204 free(strA);
2205 free(strB);
2206 }
2207
2208 QUICK = myQ;
2209
2210
2211 /* Test 2: Non-normal sequence in a string that extends to the last character*/
2212 /* of the string. Checks a couple of edge cases.*/
2213
2214 {
2215 UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0};
2216 UChar strB[] = {0x41, 0xc0, 0x316, 0};
2217 ucol_setStrength(coll, UCOL_TERTIARY);
2218 doTest(coll, strA, strB, UCOL_EQUAL);
2219 }
2220
2221 /* Test 3: Non-normal sequence is terminated by a surrogate pair.*/
2222
2223 {
2224 /* New UCA 3.1.1.
2225 * test below used a code point from Desseret, which sorts differently
2226 * than d800 dc00
2227 */
2228 /*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/
2229 UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0};
2230 UChar strB[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0};
2231 ucol_setStrength(coll, UCOL_TERTIARY);
2232 doTest(coll, strA, strB, UCOL_GREATER);
2233 }
2234
2235 /* Test 4: Imbedded nulls do not terminate a string when length is specified.*/
2236
2237 {
2238 UChar strA[] = {0x41, 0x00, 0x42, 0x00};
2239 UChar strB[] = {0x41, 0x00, 0x00, 0x00};
2240 char sortKeyA[50];
2241 char sortKeyAz[50];
2242 char sortKeyB[50];
2243 char sortKeyBz[50];
2244 int r;
2245
2246 /* there used to be -3 here. Hmmmm.... */
2247 /*result = ucol_strcoll(coll, strA, -3, strB, -3);*/
2248 result = ucol_strcoll(coll, strA, 3, strB, 3);
2249 if (result != UCOL_GREATER) {
2250 log_err("ERROR 1 in test 4\n");
2251 }
2252 result = ucol_strcoll(coll, strA, -1, strB, -1);
2253 if (result != UCOL_EQUAL) {
2254 log_err("ERROR 2 in test 4\n");
2255 }
2256
2257 ucol_getSortKey(coll, strA, 3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2258 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2259 ucol_getSortKey(coll, strB, 3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2260 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2261
2262 r = strcmp(sortKeyA, sortKeyAz);
2263 if (r <= 0) {
2264 log_err("Error 3 in test 4\n");
2265 }
2266 r = strcmp(sortKeyA, sortKeyB);
2267 if (r <= 0) {
2268 log_err("Error 4 in test 4\n");
2269 }
2270 r = strcmp(sortKeyAz, sortKeyBz);
2271 if (r != 0) {
2272 log_err("Error 5 in test 4\n");
2273 }
2274
2275 ucol_setStrength(coll, UCOL_IDENTICAL);
2276 ucol_getSortKey(coll, strA, 3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2277 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2278 ucol_getSortKey(coll, strB, 3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2279 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2280
2281 r = strcmp(sortKeyA, sortKeyAz);
2282 if (r <= 0) {
2283 log_err("Error 6 in test 4\n");
2284 }
2285 r = strcmp(sortKeyA, sortKeyB);
2286 if (r <= 0) {
2287 log_err("Error 7 in test 4\n");
2288 }
2289 r = strcmp(sortKeyAz, sortKeyBz);
2290 if (r != 0) {
2291 log_err("Error 8 in test 4\n");
2292 }
2293 ucol_setStrength(coll, UCOL_TERTIARY);
2294 }
2295
2296
2297 /* Test 5: Null characters in non-normal source strings.*/
2298
2299 {
2300 UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00};
2301 UChar strB[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00};
2302 char sortKeyA[50];
2303 char sortKeyAz[50];
2304 char sortKeyB[50];
2305 char sortKeyBz[50];
2306 int r;
2307
2308 result = ucol_strcoll(coll, strA, 6, strB, 6);
2309 if (result != UCOL_GREATER) {
2310 log_err("ERROR 1 in test 5\n");
2311 }
2312 result = ucol_strcoll(coll, strA, -1, strB, -1);
2313 if (result != UCOL_EQUAL) {
2314 log_err("ERROR 2 in test 5\n");
2315 }
2316
2317 ucol_getSortKey(coll, strA, 6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2318 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2319 ucol_getSortKey(coll, strB, 6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2320 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2321
2322 r = strcmp(sortKeyA, sortKeyAz);
2323 if (r <= 0) {
2324 log_err("Error 3 in test 5\n");
2325 }
2326 r = strcmp(sortKeyA, sortKeyB);
2327 if (r <= 0) {
2328 log_err("Error 4 in test 5\n");
2329 }
2330 r = strcmp(sortKeyAz, sortKeyBz);
2331 if (r != 0) {
2332 log_err("Error 5 in test 5\n");
2333 }
2334
2335 ucol_setStrength(coll, UCOL_IDENTICAL);
2336 ucol_getSortKey(coll, strA, 6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2337 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2338 ucol_getSortKey(coll, strB, 6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2339 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2340
2341 r = strcmp(sortKeyA, sortKeyAz);
2342 if (r <= 0) {
2343 log_err("Error 6 in test 5\n");
2344 }
2345 r = strcmp(sortKeyA, sortKeyB);
2346 if (r <= 0) {
2347 log_err("Error 7 in test 5\n");
2348 }
2349 r = strcmp(sortKeyAz, sortKeyBz);
2350 if (r != 0) {
2351 log_err("Error 8 in test 5\n");
2352 }
2353 ucol_setStrength(coll, UCOL_TERTIARY);
2354 }
2355
2356
2357 /* Test 6: Null character as base of a non-normal combining sequence.*/
2358
2359 {
2360 UChar strA[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00};
2361 UChar strB[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00};
2362
2363 result = ucol_strcoll(coll, strA, 5, strB, 5);
2364 if (result != UCOL_LESS) {
2365 log_err("Error 1 in test 6\n");
2366 }
2367 result = ucol_strcoll(coll, strA, -1, strB, -1);
2368 if (result != UCOL_EQUAL) {
2369 log_err("Error 2 in test 6\n");
2370 }
2371 }
2372
2373 ucol_close(coll);
2374 }
2375
2376
2377
2378 #if 0
2379 static void TestGetCaseBit(void) {
2380 static const char *caseBitData[] = {
2381 "a", "A", "ch", "Ch", "CH",
2382 "\\uFF9E", "\\u0009"
2383 };
2384
2385 static const uint8_t results[] = {
2386 UCOL_LOWER_CASE, UCOL_UPPER_CASE, UCOL_LOWER_CASE, UCOL_MIXED_CASE, UCOL_UPPER_CASE,
2387 UCOL_UPPER_CASE, UCOL_LOWER_CASE
2388 };
2389
2390 uint32_t i, blen = 0;
2391 UChar b[256] = {0};
2392 UErrorCode status = U_ZERO_ERROR;
2393 UCollator *UCA = ucol_open("", &status);
2394 uint8_t res = 0;
2395
2396 for(i = 0; i<sizeof(results)/sizeof(results[0]); i++) {
2397 blen = u_unescape(caseBitData[i], b, 256);
2398 res = ucol_uprv_getCaseBits(UCA, b, blen, &status);
2399 if(results[i] != res) {
2400 log_err("Expected case = %02X, got %02X for %04X\n", results[i], res, b[0]);
2401 }
2402 }
2403 }
2404 #endif
2405
2406 static void TestHangulTailoring(void) {
2407 static const char *koreanData[] = {
2408 "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53ef", "\\u5475",
2409 "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\\u67b7", "\\u67ef",
2410 "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\\u8857", "\\u8888",
2411 "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5",
2412 "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\\u659D", "\\u698E",
2413 "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\\u8B0C"
2414 };
2415
2416 const char *rules =
2417 "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 <<< \\u53ef <<< \\u5475 "
2418 "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef "
2419 "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 "
2420 "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 "
2421 "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E "
2422 "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C";
2423
2424
2425 UErrorCode status = U_ZERO_ERROR;
2426 UChar rlz[2048] = { 0 };
2427 uint32_t rlen = u_unescape(rules, rlz, 2048);
2428
2429 UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
2430 if(status == U_FILE_ACCESS_ERROR) {
2431 log_data_err("Is your data around?\n");
2432 return;
2433 } else if(U_FAILURE(status)) {
2434 log_err("Error opening collator\n");
2435 return;
2436 }
2437
2438 log_verbose("Using start of korean rules\n");
2439
2440 if(U_SUCCESS(status)) {
2441 genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
2442 } else {
2443 log_err("Unable to open collator with rules %s\n", rules);
2444 }
2445
2446 log_verbose("Setting jamoSpecial to TRUE and testing once more\n");
2447 ((UCATableHeader *)coll->image)->jamoSpecial = TRUE; /* don't try this at home */
2448 genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
2449
2450 ucol_close(coll);
2451
2452 log_verbose("Using ko__LOTUS locale\n");
2453 genericLocaleStarter("ko__LOTUS", koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
2454 }
2455
2456 static void TestCompressOverlap(void) {
2457 UChar secstr[150];
2458 UChar tertstr[150];
2459 UErrorCode status = U_ZERO_ERROR;
2460 UCollator *coll;
2461 char result[200];
2462 uint32_t resultlen;
2463 int count = 0;
2464 char *tempptr;
2465
2466 coll = ucol_open("", &status);
2467
2468 if (U_FAILURE(status)) {
2469 log_err("Collator can't be created\n");
2470 return;
2471 }
2472 while (count < 149) {
2473 secstr[count] = 0x0020; /* [06, 05, 05] */
2474 tertstr[count] = 0x0020;
2475 count ++;
2476 }
2477
2478 /* top down compression ----------------------------------- */
2479 secstr[count] = 0x0332; /* [, 87, 05] */
2480 tertstr[count] = 0x3000; /* [06, 05, 07] */
2481
2482 /* no compression secstr should have 150 secondary bytes, tertstr should
2483 have 150 tertiary bytes.
2484 with correct overlapping compression, secstr should have 4 secondary
2485 bytes, tertstr should have > 2 tertiary bytes */
2486 resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250);
2487 tempptr = uprv_strchr(result, 1) + 1;
2488 while (*(tempptr + 1) != 1) {
2489 /* the last secondary collation element is not checked since it is not
2490 part of the compression */
2491 if (*tempptr < UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2) {
2492 log_err("Secondary compression overlapped\n");
2493 }
2494 tempptr ++;
2495 }
2496
2497 /* tertiary top/bottom/common for en_US is similar to the secondary
2498 top/bottom/common */
2499 resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250);
2500 tempptr = uprv_strrchr(result, 1) + 1;
2501 while (*(tempptr + 1) != 0) {
2502 /* the last secondary collation element is not checked since it is not
2503 part of the compression */
2504 if (*tempptr < coll->tertiaryTop - coll->tertiaryTopCount) {
2505 log_err("Tertiary compression overlapped\n");
2506 }
2507 tempptr ++;
2508 }
2509
2510 /* bottom up compression ------------------------------------- */
2511 secstr[count] = 0;
2512 tertstr[count] = 0;
2513 resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250);
2514 tempptr = uprv_strchr(result, 1) + 1;
2515 while (*(tempptr + 1) != 1) {
2516 /* the last secondary collation element is not checked since it is not
2517 part of the compression */
2518 if (*tempptr > UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2) {
2519 log_err("Secondary compression overlapped\n");
2520 }
2521 tempptr ++;
2522 }
2523
2524 /* tertiary top/bottom/common for en_US is similar to the secondary
2525 top/bottom/common */
2526 resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250);
2527 tempptr = uprv_strrchr(result, 1) + 1;
2528 while (*(tempptr + 1) != 0) {
2529 /* the last secondary collation element is not checked since it is not
2530 part of the compression */
2531 if (*tempptr > coll->tertiaryBottom + coll->tertiaryBottomCount) {
2532 log_err("Tertiary compression overlapped\n");
2533 }
2534 tempptr ++;
2535 }
2536
2537 ucol_close(coll);
2538 }
2539
2540 static void TestCyrillicTailoring(void) {
2541 static const char *test[] = {
2542 "\\u0410b",
2543 "\\u0410\\u0306a",
2544 "\\u04d0A"
2545 };
2546
2547 /* Russian overrides contractions, so this test is not valid anymore */
2548 /*genericLocaleStarter("ru", test, 3);*/
2549
2550 genericLocaleStarter("root", test, 3);
2551 genericRulesStarter("&\\u0410 = \\u0410", test, 3);
2552 genericRulesStarter("&Z < \\u0410", test, 3);
2553 genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test, 3);
2554 genericRulesStarter("&Z < \\u0410 < \\u04d0", test, 3);
2555 genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test, 3);
2556 genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test, 3);
2557 }
2558
2559 static void TestSuppressContractions(void) {
2560
2561 static const char *testNoCont2[] = {
2562 "\\u0410\\u0302a",
2563 "\\u0410\\u0306b",
2564 "\\u0410c"
2565 };
2566 static const char *testNoCont[] = {
2567 "a\\u0410",
2568 "A\\u0410\\u0306",
2569 "\\uFF21\\u0410\\u0302"
2570 };
2571
2572 genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont, 3);
2573 genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2, 3);
2574 }
2575
2576 static void TestContraction(void) {
2577 const static char *testrules[] = {
2578 "&A = AB / B",
2579 "&A = A\\u0306/\\u0306",
2580 "&c = ch / h"
2581 };
2582 const static UChar testdata[][2] = {
2583 {0x0041 /* 'A' */, 0x0042 /* 'B' */},
2584 {0x0041 /* 'A' */, 0x0306 /* combining breve */},
2585 {0x0063 /* 'c' */, 0x0068 /* 'h' */}
2586 };
2587 const static UChar testdata2[][2] = {
2588 {0x0063 /* 'c' */, 0x0067 /* 'g' */},
2589 {0x0063 /* 'c' */, 0x0068 /* 'h' */},
2590 {0x0063 /* 'c' */, 0x006C /* 'l' */}
2591 };
2592 const static char *testrules3[] = {
2593 "&z < xyz &xyzw << B",
2594 "&z < xyz &xyz << B / w",
2595 "&z < ch &achm << B",
2596 "&z < ch &a << B / chm",
2597 "&\\ud800\\udc00w << B",
2598 "&\\ud800\\udc00 << B / w",
2599 "&a\\ud800\\udc00m << B",
2600 "&a << B / \\ud800\\udc00m",
2601 };
2602
2603 UErrorCode status = U_ZERO_ERROR;
2604 UCollator *coll;
2605 UChar rule[256] = {0};
2606 uint32_t rlen = 0;
2607 int i;
2608
2609 for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
2610 UCollationElements *iter1;
2611 int j = 0;
2612 log_verbose("Rule %s for testing\n", testrules[i]);
2613 rlen = u_unescape(testrules[i], rule, 32);
2614 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2615 if (U_FAILURE(status)) {
2616 log_err("Collator creation failed %s\n", testrules[i]);
2617 return;
2618 }
2619 iter1 = ucol_openElements(coll, testdata[i], 2, &status);
2620 if (U_FAILURE(status)) {
2621 log_err("Collation iterator creation failed\n");
2622 return;
2623 }
2624 while (j < 2) {
2625 UCollationElements *iter2 = ucol_openElements(coll,
2626 &(testdata[i][j]),
2627 1, &status);
2628 uint32_t ce;
2629 if (U_FAILURE(status)) {
2630 log_err("Collation iterator creation failed\n");
2631 return;
2632 }
2633 ce = ucol_next(iter2, &status);
2634 while (ce != UCOL_NULLORDER) {
2635 if ((uint32_t)ucol_next(iter1, &status) != ce) {
2636 log_err("Collation elements in contraction split does not match\n");
2637 return;
2638 }
2639 ce = ucol_next(iter2, &status);
2640 }
2641 j ++;
2642 ucol_closeElements(iter2);
2643 }
2644 if (ucol_next(iter1, &status) != UCOL_NULLORDER) {
2645 log_err("Collation elements not exhausted\n");
2646 return;
2647 }
2648 ucol_closeElements(iter1);
2649 ucol_close(coll);
2650 }
2651
2652 rlen = u_unescape("& a < b < c < ch < d & c = ch / h", rule, 256);
2653 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2654 if (ucol_strcoll(coll, testdata2[0], 2, testdata2[1], 2) != UCOL_LESS) {
2655 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
2656 testdata2[0][0], testdata2[0][1], testdata2[1][0],
2657 testdata2[1][1]);
2658 return;
2659 }
2660 if (ucol_strcoll(coll, testdata2[1], 2, testdata2[2], 2) != UCOL_LESS) {
2661 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
2662 testdata2[1][0], testdata2[1][1], testdata2[2][0],
2663 testdata2[2][1]);
2664 return;
2665 }
2666 ucol_close(coll);
2667
2668 for (i = 0; i < sizeof(testrules3) / sizeof(testrules3[0]); i += 2) {
2669 UCollator *coll1,
2670 *coll2;
2671 UCollationElements *iter1,
2672 *iter2;
2673 UChar ch = 0x0042 /* 'B' */;
2674 uint32_t ce;
2675 rlen = u_unescape(testrules3[i], rule, 32);
2676 coll1 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2677 rlen = u_unescape(testrules3[i + 1], rule, 32);
2678 coll2 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2679 if (U_FAILURE(status)) {
2680 log_err("Collator creation failed %s\n", testrules[i]);
2681 return;
2682 }
2683 iter1 = ucol_openElements(coll1, &ch, 1, &status);
2684 iter2 = ucol_openElements(coll2, &ch, 1, &status);
2685 if (U_FAILURE(status)) {
2686 log_err("Collation iterator creation failed\n");
2687 return;
2688 }
2689 ce = ucol_next(iter1, &status);
2690 if (U_FAILURE(status)) {
2691 log_err("Retrieving ces failed\n");
2692 return;
2693 }
2694 while (ce != UCOL_NULLORDER) {
2695 if (ce != (uint32_t)ucol_next(iter2, &status)) {
2696 log_err("CEs does not match\n");
2697 return;
2698 }
2699 ce = ucol_next(iter1, &status);
2700 if (U_FAILURE(status)) {
2701 log_err("Retrieving ces failed\n");
2702 return;
2703 }
2704 }
2705 if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
2706 log_err("CEs not exhausted\n");
2707 return;
2708 }
2709 ucol_closeElements(iter1);
2710 ucol_closeElements(iter2);
2711 ucol_close(coll1);
2712 ucol_close(coll2);
2713 }
2714 }
2715
2716 static void TestExpansion(void) {
2717 const static char *testrules[] = {
2718 "&J << K / B & K << M",
2719 "&J << K / B << M"
2720 };
2721 const static UChar testdata[][3] = {
2722 {0x004A /*'J'*/, 0x0041 /*'A'*/, 0},
2723 {0x004D /*'M'*/, 0x0041 /*'A'*/, 0},
2724 {0x004B /*'K'*/, 0x0041 /*'A'*/, 0},
2725 {0x004B /*'K'*/, 0x0043 /*'C'*/, 0},
2726 {0x004A /*'J'*/, 0x0043 /*'C'*/, 0},
2727 {0x004D /*'M'*/, 0x0043 /*'C'*/, 0}
2728 };
2729
2730 UErrorCode status = U_ZERO_ERROR;
2731 UCollator *coll;
2732 UChar rule[256] = {0};
2733 uint32_t rlen = 0;
2734 int i;
2735
2736 for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
2737 int j = 0;
2738 log_verbose("Rule %s for testing\n", testrules[i]);
2739 rlen = u_unescape(testrules[i], rule, 32);
2740 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2741 if (U_FAILURE(status)) {
2742 log_err("Collator creation failed %s\n", testrules[i]);
2743 return;
2744 }
2745
2746 for (j = 0; j < 5; j ++) {
2747 doTest(coll, testdata[j], testdata[j + 1], UCOL_LESS);
2748 }
2749 ucol_close(coll);
2750 }
2751 }
2752
2753 #if 0
2754 /* this test tests the current limitations of the engine */
2755 /* it always fail, so it is disabled by default */
2756 static void TestLimitations(void) {
2757 /* recursive expansions */
2758 {
2759 static const char *rule = "&a=b/c&d=c/e";
2760 static const char *tlimit01[] = {"add","b","adf"};
2761 static const char *tlimit02[] = {"aa","b","af"};
2762 log_verbose("recursive expansions\n");
2763 genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
2764 genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
2765 }
2766 /* contractions spanning expansions */
2767 {
2768 static const char *rule = "&a<<<c/e&g<<<eh";
2769 static const char *tlimit01[] = {"ad","c","af","f","ch","h"};
2770 static const char *tlimit02[] = {"ad","c","ch","af","f","h"};
2771 log_verbose("contractions spanning expansions\n");
2772 genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
2773 genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
2774 }
2775 /* normalization: nulls in contractions */
2776 {
2777 static const char *rule = "&a<<<\\u0000\\u0302";
2778 static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
2779 static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
2780 static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
2781 static const UColAttributeValue valOn[] = { UCOL_ON };
2782 static const UColAttributeValue valOff[] = { UCOL_OFF };
2783
2784 log_verbose("NULL in contractions\n");
2785 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
2786 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
2787 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
2788 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
2789
2790 }
2791 /* normalization: contractions spanning normalization */
2792 {
2793 static const char *rule = "&a<<<\\u0000\\u0302";
2794 static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
2795 static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
2796 static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
2797 static const UColAttributeValue valOn[] = { UCOL_ON };
2798 static const UColAttributeValue valOff[] = { UCOL_OFF };
2799
2800 log_verbose("contractions spanning normalization\n");
2801 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
2802 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
2803 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
2804 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
2805
2806 }
2807 /* variable top: */
2808 {
2809 /*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/
2810 static const char *rule = "&\\u2010<x<[variable top]=z";
2811 /*static const char *rule3 = "&' '<x<[variable top]=z";*/
2812 static const char *tlimit01[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" };
2813 static const char *tlimit02[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"};
2814 static const char *tlimit03[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" };
2815 static const UColAttribute att[] = { UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH };
2816 static const UColAttributeValue valOn[] = { UCOL_SHIFTED, UCOL_QUATERNARY };
2817 static const UColAttributeValue valOff[] = { UCOL_NON_IGNORABLE, UCOL_TERTIARY };
2818
2819 log_verbose("variable top\n");
2820 genericRulesStarterWithOptions(rule, tlimit03, sizeof(tlimit03)/sizeof(tlimit03[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2821 genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2822 genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2823 genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));
2824 genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));
2825
2826 }
2827 /* case level */
2828 {
2829 static const char *rule = "&c<ch<<<cH<<<Ch<<<CH";
2830 static const char *tlimit01[] = {"c","CH","Ch","cH","ch"};
2831 static const char *tlimit02[] = {"c","CH","cH","Ch","ch"};
2832 static const UColAttribute att[] = { UCOL_CASE_FIRST};
2833 static const UColAttributeValue valOn[] = { UCOL_UPPER_FIRST};
2834 /*static const UColAttributeValue valOff[] = { UCOL_OFF};*/
2835 log_verbose("case level\n");
2836 genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2837 genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2838 /*genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
2839 /*genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
2840 }
2841
2842 }
2843 #endif
2844
2845 static void TestBocsuCoverage(void) {
2846 UErrorCode status = U_ZERO_ERROR;
2847 const char *testString = "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u0041";
2848 UChar test[256] = {0};
2849 uint32_t tlen = u_unescape(testString, test, 32);
2850 uint8_t key[256] = {0};
2851 uint32_t klen = 0;
2852
2853 UCollator *coll = ucol_open("", &status);
2854 if(U_SUCCESS(status)) {
2855 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
2856
2857 klen = ucol_getSortKey(coll, test, tlen, key, 256);
2858
2859 ucol_close(coll);
2860 } else {
2861 log_data_err("Couldn't open UCA\n");
2862 }
2863 }
2864
2865 static void TestVariableTopSetting(void) {
2866 UErrorCode status = U_ZERO_ERROR;
2867 const UChar *current = NULL;
2868 uint32_t varTopOriginal = 0, varTop1, varTop2;
2869 UCollator *coll = ucol_open("", &status);
2870 if(U_SUCCESS(status)) {
2871
2872 uint32_t strength = 0;
2873 uint16_t specs = 0;
2874 uint32_t chOffset = 0;
2875 uint32_t chLen = 0;
2876 uint32_t exOffset = 0;
2877 uint32_t exLen = 0;
2878 uint32_t oldChOffset = 0;
2879 uint32_t oldChLen = 0;
2880 uint32_t oldExOffset = 0;
2881 uint32_t oldExLen = 0;
2882 uint32_t prefixOffset = 0;
2883 uint32_t prefixLen = 0;
2884
2885 UBool startOfRules = TRUE;
2886 UColTokenParser src;
2887 UColOptionSet opts;
2888
2889 UChar *rulesCopy = NULL;
2890 uint32_t rulesLen;
2891
2892 UCollationResult result;
2893
2894 UChar first[256] = { 0 };
2895 UChar second[256] = { 0 };
2896 UParseError parseError;
2897 int32_t myQ = QUICK;
2898
2899 src.opts = &opts;
2900
2901 if(QUICK <= 0) {
2902 QUICK = 1;
2903 }
2904
2905 /* this test will fail when normalization is turned on */
2906 /* therefore we always turn off exhaustive mode for it */
2907 if(1) { /* QUICK > 0*/
2908 log_verbose("Slide variable top over UCARules\n");
2909 rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, 0);
2910 rulesCopy = (UChar *)malloc((rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
2911 rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE);
2912
2913 if(U_SUCCESS(status) && rulesLen > 0) {
2914 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
2915 src.current = src.source = rulesCopy;
2916 src.end = rulesCopy+rulesLen;
2917 src.extraCurrent = src.end;
2918 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
2919
2920 while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,&status)) != NULL) {
2921 strength = src.parsedToken.strength;
2922 chOffset = src.parsedToken.charsOffset;
2923 chLen = src.parsedToken.charsLen;
2924 exOffset = src.parsedToken.extensionOffset;
2925 exLen = src.parsedToken.extensionLen;
2926 prefixOffset = src.parsedToken.prefixOffset;
2927 prefixLen = src.parsedToken.prefixLen;
2928 specs = src.parsedToken.flags;
2929
2930 startOfRules = FALSE;
2931 if(0) {
2932 log_verbose("%04X %d ", *(rulesCopy+chOffset), chLen);
2933 }
2934 if(strength == UCOL_PRIMARY) {
2935 status = U_ZERO_ERROR;
2936 varTopOriginal = ucol_getVariableTop(coll, &status);
2937 varTop1 = ucol_setVariableTop(coll, rulesCopy+oldChOffset, oldChLen, &status);
2938 if(U_FAILURE(status)) {
2939 char buffer[256];
2940 char *buf = buffer;
2941 uint32_t i = 0, j;
2942 uint32_t CE = UCOL_NO_MORE_CES;
2943
2944 /* before we start screaming, let's see if there is a problem with the rules */
2945 collIterate s;
2946 uprv_init_collIterate(coll, rulesCopy+oldChOffset, oldChLen, &s);
2947
2948 CE = ucol_getNextCE(coll, &s, &status);
2949
2950 for(i = 0; i < oldChLen; i++) {
2951 j = sprintf(buf, "%04X ", *(rulesCopy+oldChOffset+i));
2952 buf += j;
2953 }
2954 if(status == U_PRIMARY_TOO_LONG_ERROR) {
2955 log_verbose("= Expected failure for %s =", buffer);
2956 } else {
2957 if(s.pos == s.endp) {
2958 log_err("Unexpected failure setting variable top at offset %d. Error %s. Codepoints: %s\n",
2959 oldChOffset, u_errorName(status), buffer);
2960 } else {
2961 log_verbose("There is a goofy contraction in UCA rules that does not appear in the fractional UCA. Codepoints: %s\n",
2962 buffer);
2963 }
2964 }
2965 }
2966 varTop2 = ucol_getVariableTop(coll, &status);
2967 if((varTop1 & 0xFFFF0000) != (varTop2 & 0xFFFF0000)) {
2968 log_err("cannot retrieve set varTop value!\n");
2969 continue;
2970 }
2971
2972 if((varTop1 & 0xFFFF0000) > 0 && oldExLen == 0) {
2973
2974 u_strncpy(first, rulesCopy+oldChOffset, oldChLen);
2975 u_strncpy(first+oldChLen, rulesCopy+chOffset, chLen);
2976 u_strncpy(first+oldChLen+chLen, rulesCopy+oldChOffset, oldChLen);
2977 first[2*oldChLen+chLen] = 0;
2978
2979 if(oldExLen == 0) {
2980 u_strncpy(second, rulesCopy+chOffset, chLen);
2981 second[chLen] = 0;
2982 } else { /* This is skipped momentarily, but should work once UCARules are fully UCA conformant */
2983 u_strncpy(second, rulesCopy+oldExOffset, oldExLen);
2984 u_strncpy(second+oldChLen, rulesCopy+chOffset, chLen);
2985 u_strncpy(second+oldChLen+chLen, rulesCopy+oldExOffset, oldExLen);
2986 second[2*oldExLen+chLen] = 0;
2987 }
2988 result = ucol_strcoll(coll, first, -1, second, -1);
2989 if(result == UCOL_EQUAL) {
2990 doTest(coll, first, second, UCOL_EQUAL);
2991 } else {
2992 log_verbose("Suspicious strcoll result for %04X and %04X\n", *(rulesCopy+oldChOffset), *(rulesCopy+chOffset));
2993 }
2994 }
2995 }
2996 if(strength != UCOL_TOK_RESET) {
2997 oldChOffset = chOffset;
2998 oldChLen = chLen;
2999 oldExOffset = exOffset;
3000 oldExLen = exLen;
3001 }
3002 }
3003 status = U_ZERO_ERROR;
3004 }
3005 else {
3006 log_err("Unexpected failure getting rules %s\n", u_errorName(status));
3007 return;
3008 }
3009 if (U_FAILURE(status)) {
3010 log_err("Error parsing rules %s\n", u_errorName(status));
3011 return;
3012 }
3013 status = U_ZERO_ERROR;
3014 }
3015
3016 QUICK = myQ;
3017
3018 log_verbose("Testing setting variable top to contractions\n");
3019 {
3020 /* uint32_t tailoredCE = UCOL_NOT_FOUND; */
3021 /*UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->UCAConsts+sizeof(UCAConstants));*/
3022 UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->contractionUCACombos);
3023 while(*conts != 0) {
3024 if(*(conts+2) == 0) {
3025 varTop1 = ucol_setVariableTop(coll, conts, -1, &status);
3026 } else {
3027 varTop1 = ucol_setVariableTop(coll, conts, 3, &status);
3028 }
3029 if(U_FAILURE(status)) {
3030 log_err("Couldn't set variable top to a contraction %04X %04X %04X\n",
3031 *conts, *(conts+1), *(conts+2));
3032 status = U_ZERO_ERROR;
3033 }
3034 conts+=3;
3035 }
3036
3037 status = U_ZERO_ERROR;
3038
3039 first[0] = 0x0040;
3040 first[1] = 0x0050;
3041 first[2] = 0x0000;
3042
3043 ucol_setVariableTop(coll, first, -1, &status);
3044
3045 if(U_SUCCESS(status)) {
3046 log_err("Invalid contraction succeded in setting variable top!\n");
3047 }
3048
3049 }
3050
3051 log_verbose("Test restoring variable top\n");
3052
3053 status = U_ZERO_ERROR;
3054 ucol_restoreVariableTop(coll, varTopOriginal, &status);
3055 if(varTopOriginal != ucol_getVariableTop(coll, &status)) {
3056 log_err("Couldn't restore old variable top\n");
3057 }
3058
3059 log_verbose("Testing calling with error set\n");
3060
3061 status = U_INTERNAL_PROGRAM_ERROR;
3062 varTop1 = ucol_setVariableTop(coll, first, 1, &status);
3063 varTop2 = ucol_getVariableTop(coll, &status);
3064 ucol_restoreVariableTop(coll, varTop2, &status);
3065 varTop1 = ucol_setVariableTop(NULL, first, 1, &status);
3066 varTop2 = ucol_getVariableTop(NULL, &status);
3067 ucol_restoreVariableTop(NULL, varTop2, &status);
3068 if(status != U_INTERNAL_PROGRAM_ERROR) {
3069 log_err("Bad reaction to passed error!\n");
3070 }
3071 free(rulesCopy);
3072 ucol_close(coll);
3073 } else {
3074 log_data_err("Couldn't open UCA collator\n");
3075 }
3076
3077 }
3078
3079 static void TestNonChars(void) {
3080 static const char *test[] = {
3081 "\\u0000",
3082 "\\uFFFE", "\\uFFFF",
3083 "\\U0001FFFE", "\\U0001FFFF",
3084 "\\U0002FFFE", "\\U0002FFFF",
3085 "\\U0003FFFE", "\\U0003FFFF",
3086 "\\U0004FFFE", "\\U0004FFFF",
3087 "\\U0005FFFE", "\\U0005FFFF",
3088 "\\U0006FFFE", "\\U0006FFFF",
3089 "\\U0007FFFE", "\\U0007FFFF",
3090 "\\U0008FFFE", "\\U0008FFFF",
3091 "\\U0009FFFE", "\\U0009FFFF",
3092 "\\U000AFFFE", "\\U000AFFFF",
3093 "\\U000BFFFE", "\\U000BFFFF",
3094 "\\U000CFFFE", "\\U000CFFFF",
3095 "\\U000DFFFE", "\\U000DFFFF",
3096 "\\U000EFFFE", "\\U000EFFFF",
3097 "\\U000FFFFE", "\\U000FFFFF",
3098 "\\U0010FFFE", "\\U0010FFFF"
3099 };
3100 UErrorCode status = U_ZERO_ERROR;
3101 UCollator *coll = ucol_open("en_US", &status);
3102
3103 log_verbose("Test non characters\n");
3104
3105 if(U_SUCCESS(status)) {
3106 genericOrderingTestWithResult(coll, test, 35, UCOL_EQUAL);
3107 } else {
3108 log_err("Unable to open collator\n");
3109 }
3110
3111 ucol_close(coll);
3112 }
3113
3114 static void TestExtremeCompression(void) {
3115 static char *test[4];
3116 int32_t j = 0, i = 0;
3117
3118 for(i = 0; i<4; i++) {
3119 test[i] = (char *)malloc(2048*sizeof(char));
3120 }
3121
3122 for(j = 20; j < 500; j++) {
3123 for(i = 0; i<4; i++) {
3124 uprv_memset(test[i], 'a', (j-1)*sizeof(char));
3125 test[i][j-1] = (char)('a'+i);
3126 test[i][j] = 0;
3127 }
3128 genericLocaleStarter("en_US", (const char **)test, 4);
3129 }
3130
3131
3132 for(i = 0; i<4; i++) {
3133 free(test[i]);
3134 }
3135 }
3136
3137 #if 0
3138 static void TestExtremeCompression(void) {
3139 static char *test[4];
3140 int32_t j = 0, i = 0;
3141 UErrorCode status = U_ZERO_ERROR;
3142 UCollator *coll = ucol_open("en_US", status);
3143 for(i = 0; i<4; i++) {
3144 test[i] = (char *)malloc(2048*sizeof(char));
3145 }
3146 for(j = 10; j < 2048; j++) {
3147 for(i = 0; i<4; i++) {
3148 uprv_memset(test[i], 'a', (j-2)*sizeof(char));
3149 test[i][j-1] = (char)('a'+i);
3150 test[i][j] = 0;
3151 }
3152 }
3153 genericLocaleStarter("en_US", (const char **)test, 4);
3154
3155 for(j = 10; j < 2048; j++) {
3156 for(i = 0; i<1; i++) {
3157 uprv_memset(test[i], 'a', (j-1)*sizeof(char));
3158 test[i][j] = 0;
3159 }
3160 }
3161 for(i = 0; i<4; i++) {
3162 free(test[i]);
3163 }
3164 }
3165 #endif
3166
3167 static void TestSurrogates(void) {
3168 static const char *test[] = {
3169 "z","\\ud900\\udc25", "\\ud805\\udc50",
3170 "\\ud800\\udc00y", "\\ud800\\udc00r",
3171 "\\ud800\\udc00f", "\\ud800\\udc00",
3172 "\\ud800\\udc00c", "\\ud800\\udc00b",
3173 "\\ud800\\udc00fa", "\\ud800\\udc00fb",
3174 "\\ud800\\udc00a",
3175 "c", "b"
3176 };
3177
3178 static const char *rule =
3179 "&z < \\ud900\\udc25 < \\ud805\\udc50"
3180 "< \\ud800\\udc00y < \\ud800\\udc00r"
3181 "< \\ud800\\udc00f << \\ud800\\udc00"
3182 "< \\ud800\\udc00fa << \\ud800\\udc00fb"
3183 "< \\ud800\\udc00a < c < b" ;
3184
3185 genericRulesStarter(rule, test, 14);
3186 }
3187
3188 /* This is a test for prefix implementation, used by JIS X 4061 collation rules */
3189 static void TestPrefix(void) {
3190 uint32_t i;
3191
3192 static struct {
3193 const char *rules;
3194 const char *data[50];
3195 const uint32_t len;
3196 } tests[] = {
3197 { "&z <<< z|a",
3198 {"zz", "za"}, 2 },
3199
3200 { "&z <<< z| a",
3201 {"zz", "za"}, 2 },
3202 { "[strength I]"
3203 "&a=\\ud900\\udc25"
3204 "&z<<<\\ud900\\udc25|a",
3205 {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 },
3206 };
3207
3208
3209 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3210 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3211 }
3212 }
3213
3214 /* This test uses data suplied by Masashiko Maedera to test the implementation */
3215 /* JIS X 4061 collation order implementation */
3216 static void TestNewJapanese(void) {
3217
3218 static const char *test1[] = {
3219 "\\u30b7\\u30e3\\u30fc\\u30ec",
3220 "\\u30b7\\u30e3\\u30a4",
3221 "\\u30b7\\u30e4\\u30a3",
3222 "\\u30b7\\u30e3\\u30ec",
3223 "\\u3061\\u3087\\u3053",
3224 "\\u3061\\u3088\\u3053",
3225 "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8",
3226 "\\u3066\\u30fc\\u305f",
3227 "\\u30c6\\u30fc\\u30bf",
3228 "\\u30c6\\u30a7\\u30bf",
3229 "\\u3066\\u3048\\u305f",
3230 "\\u3067\\u30fc\\u305f",
3231 "\\u30c7\\u30fc\\u30bf",
3232 "\\u30c7\\u30a7\\u30bf",
3233 "\\u3067\\u3048\\u305f",
3234 "\\u3066\\u30fc\\u305f\\u30fc",
3235 "\\u30c6\\u30fc\\u30bf\\u30a1",
3236 "\\u30c6\\u30a7\\u30bf\\u30fc",
3237 "\\u3066\\u3047\\u305f\\u3041",
3238 "\\u3066\\u3048\\u305f\\u30fc",
3239 "\\u3067\\u30fc\\u305f\\u30fc",
3240 "\\u30c7\\u30fc\\u30bf\\u30a1",
3241 "\\u3067\\u30a7\\u305f\\u30a1",
3242 "\\u30c7\\u3047\\u30bf\\u3041",
3243 "\\u30c7\\u30a8\\u30bf\\u30a2",
3244 "\\u3072\\u3086",
3245 "\\u3073\\u3085\\u3042",
3246 "\\u3074\\u3085\\u3042",
3247 "\\u3073\\u3085\\u3042\\u30fc",
3248 "\\u30d3\\u30e5\\u30a2\\u30fc",
3249 "\\u3074\\u3085\\u3042\\u30fc",
3250 "\\u30d4\\u30e5\\u30a2\\u30fc",
3251 "\\u30d2\\u30e5\\u30a6",
3252 "\\u30d2\\u30e6\\u30a6",
3253 "\\u30d4\\u30e5\\u30a6\\u30a2",
3254 "\\u3073\\u3085\\u30fc\\u3042\\u30fc",
3255 "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc",
3256 "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc",
3257 "\\u3072\\u3085\\u3093",
3258 "\\u3074\\u3085\\u3093",
3259 "\\u3075\\u30fc\\u308a",
3260 "\\u30d5\\u30fc\\u30ea",
3261 "\\u3075\\u3045\\u308a",
3262 "\\u3075\\u30a5\\u308a",
3263 "\\u3075\\u30a5\\u30ea",
3264 "\\u30d5\\u30a6\\u30ea",
3265 "\\u3076\\u30fc\\u308a",
3266 "\\u30d6\\u30fc\\u30ea",
3267 "\\u3076\\u3045\\u308a",
3268 "\\u30d6\\u30a5\\u308a",
3269 "\\u3077\\u3046\\u308a",
3270 "\\u30d7\\u30a6\\u30ea",
3271 "\\u3075\\u30fc\\u308a\\u30fc",
3272 "\\u30d5\\u30a5\\u30ea\\u30fc",
3273 "\\u3075\\u30a5\\u308a\\u30a3",
3274 "\\u30d5\\u3045\\u308a\\u3043",
3275 "\\u30d5\\u30a6\\u30ea\\u30fc",
3276 "\\u3075\\u3046\\u308a\\u3043",
3277 "\\u30d6\\u30a6\\u30ea\\u30a4",
3278 "\\u3077\\u30fc\\u308a\\u30fc",
3279 "\\u3077\\u30a5\\u308a\\u30a4",
3280 "\\u3077\\u3046\\u308a\\u30fc",
3281 "\\u30d7\\u30a6\\u30ea\\u30a4",
3282 "\\u30d5\\u30fd",
3283 "\\u3075\\u309e",
3284 "\\u3076\\u309d",
3285 "\\u3076\\u3075",
3286 "\\u3076\\u30d5",
3287 "\\u30d6\\u3075",
3288 "\\u30d6\\u30d5",
3289 "\\u3076\\u309e",
3290 "\\u3076\\u3077",
3291 "\\u30d6\\u3077",
3292 "\\u3077\\u309d",
3293 "\\u30d7\\u30fd",
3294 "\\u3077\\u3075",
3295 };
3296
3297 static const char *test2[] = {
3298 "\\u306f\\u309d", /* H\\u309d */
3299 "\\u30cf\\u30fd", /* K\\u30fd */
3300 "\\u306f\\u306f", /* HH */
3301 "\\u306f\\u30cf", /* HK */
3302 "\\u30cf\\u30cf", /* KK */
3303 "\\u306f\\u309e", /* H\\u309e */
3304 "\\u30cf\\u30fe", /* K\\u30fe */
3305 "\\u306f\\u3070", /* HH\\u309b */
3306 "\\u30cf\\u30d0", /* KK\\u309b */
3307 "\\u306f\\u3071", /* HH\\u309c */
3308 "\\u30cf\\u3071", /* KH\\u309c */
3309 "\\u30cf\\u30d1", /* KK\\u309c */
3310 "\\u3070\\u309d", /* H\\u309b\\u309d */
3311 "\\u30d0\\u30fd", /* K\\u309b\\u30fd */
3312 "\\u3070\\u306f", /* H\\u309bH */
3313 "\\u30d0\\u30cf", /* K\\u309bK */
3314 "\\u3070\\u309e", /* H\\u309b\\u309e */
3315 "\\u30d0\\u30fe", /* K\\u309b\\u30fe */
3316 "\\u3070\\u3070", /* H\\u309bH\\u309b */
3317 "\\u30d0\\u3070", /* K\\u309bH\\u309b */
3318 "\\u30d0\\u30d0", /* K\\u309bK\\u309b */
3319 "\\u3070\\u3071", /* H\\u309bH\\u309c */
3320 "\\u30d0\\u30d1", /* K\\u309bK\\u309c */
3321 "\\u3071\\u309d", /* H\\u309c\\u309d */
3322 "\\u30d1\\u30fd", /* K\\u309c\\u30fd */
3323 "\\u3071\\u306f", /* H\\u309cH */
3324 "\\u30d1\\u30cf", /* K\\u309cK */
3325 "\\u3071\\u3070", /* H\\u309cH\\u309b */
3326 "\\u3071\\u30d0", /* H\\u309cK\\u309b */
3327 "\\u30d1\\u30d0", /* K\\u309cK\\u309b */
3328 "\\u3071\\u3071", /* H\\u309cH\\u309c */
3329 "\\u30d1\\u30d1", /* K\\u309cK\\u309c */
3330 };
3331 /*
3332 static const char *test3[] = {
3333 "\\u221er\\u221e",
3334 "\\u221eR#",
3335 "\\u221et\\u221e",
3336 "#r\\u221e",
3337 "#R#",
3338 "#t%",
3339 "#T%",
3340 "8t\\u221e",
3341 "8T\\u221e",
3342 "8t#",
3343 "8T#",
3344 "8t%",
3345 "8T%",
3346 "8t8",
3347 "8T8",
3348 "\\u03c9r\\u221e",
3349 "\\u03a9R%",
3350 "rr\\u221e",
3351 "rR\\u221e",
3352 "Rr\\u221e",
3353 "RR\\u221e",
3354 "RT%",
3355 "rt8",
3356 "tr\\u221e",
3357 "tr8",
3358 "TR8",
3359 "tt8",
3360 "\\u30b7\\u30e3\\u30fc\\u30ec",
3361 };
3362 */
3363 static const UColAttribute att[] = { UCOL_STRENGTH };
3364 static const UColAttributeValue val[] = { UCOL_QUATERNARY };
3365
3366 static const UColAttribute attShifted[] = { UCOL_STRENGTH, UCOL_ALTERNATE_HANDLING};
3367 static const UColAttributeValue valShifted[] = { UCOL_QUATERNARY, UCOL_SHIFTED };
3368
3369 genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), att, val, 1);
3370 genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), att, val, 1);
3371 /*genericLocaleStarter("ja", test3, sizeof(test3)/sizeof(test3[0]));*/
3372 genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), attShifted, valShifted, 2);
3373 genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), attShifted, valShifted, 2);
3374 }
3375
3376 static void TestStrCollIdenticalPrefix(void) {
3377 const char* rule = "&\\ud9b0\\udc70=\\ud9b0\\udc71";
3378 const char* test[] = {
3379 "ab\\ud9b0\\udc70",
3380 "ab\\ud9b0\\udc71"
3381 };
3382 genericRulesStarterWithResult(rule, test, sizeof(test)/sizeof(test[0]), UCOL_EQUAL);
3383 }
3384 /* Contractions should have all their canonically equivalent */
3385 /* strings included */
3386 static void TestContractionClosure(void) {
3387 static struct {
3388 const char *rules;
3389 const char *data[50];
3390 const uint32_t len;
3391 } tests[] = {
3392 { "&b=\\u00e4\\u00e4",
3393 { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\\u00e4" }, 5},
3394 { "&b=\\u00C5",
3395 { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4},
3396 };
3397 uint32_t i;
3398
3399
3400 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3401 genericRulesStarterWithResult(tests[i].rules, tests[i].data, tests[i].len, UCOL_EQUAL);
3402 }
3403 }
3404
3405 /* This tests also fails*/
3406 static void TestBeforePrefixFailure(void) {
3407 static struct {
3408 const char *rules;
3409 const char *data[50];
3410 const uint32_t len;
3411 } tests[] = {
3412 { "&g <<< a"
3413 "&[before 3]\\uff41 <<< x",
3414 {"x", "\\uff41"}, 2 },
3415 { "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3416 "&\\u30A8=\\u30A8=\\u3048=\\uff74"
3417 "&[before 3]\\u30a7<<<\\u30a9",
3418 {"\\u30a9", "\\u30a7"}, 2 },
3419 { "&[before 3]\\u30a7<<<\\u30a9"
3420 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3421 "&\\u30A8=\\u30A8=\\u3048=\\uff74",
3422 {"\\u30a9", "\\u30a7"}, 2 },
3423 };
3424 uint32_t i;
3425
3426
3427 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3428 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3429 }
3430
3431 #if 0
3432 const char* rule1 =
3433 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3434 "&\\u30A8=\\u30A8=\\u3048=\\uff74"
3435 "&[before 3]\\u30a7<<<\\u30c6|\\u30fc";
3436 const char* rule2 =
3437 "&[before 3]\\u30a7<<<\\u30c6|\\u30fc"
3438 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3439 "&\\u30A8=\\u30A8=\\u3048=\\uff74";
3440 const char* test[] = {
3441 "\\u30c6\\u30fc\\u30bf",
3442 "\\u30c6\\u30a7\\u30bf",
3443 };
3444 genericRulesStarter(rule1, test, sizeof(test)/sizeof(test[0]));
3445 genericRulesStarter(rule2, test, sizeof(test)/sizeof(test[0]));
3446 /* this piece of code should be in some sort of verbose mode */
3447 /* it gets the collation elements for elements and prints them */
3448 /* This is useful when trying to see whether the problem is */
3449 {
3450 UErrorCode status = U_ZERO_ERROR;
3451 uint32_t i = 0;
3452 UCollationElements *it = NULL;
3453 uint32_t CE;
3454 UChar string[256];
3455 uint32_t uStringLen;
3456 UCollator *coll = NULL;
3457
3458 uStringLen = u_unescape(rule1, string, 256);
3459
3460 coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
3461
3462 /*coll = ucol_open("ja_JP_JIS", &status);*/
3463 it = ucol_openElements(coll, string, 0, &status);
3464
3465 for(i = 0; i < sizeof(test)/sizeof(test[0]); i++) {
3466 log_verbose("%s\n", test[i]);
3467 uStringLen = u_unescape(test[i], string, 256);
3468 ucol_setText(it, string, uStringLen, &status);
3469
3470 while((CE=ucol_next(it, &status)) != UCOL_NULLORDER) {
3471 log_verbose("%08X\n", CE);
3472 }
3473 log_verbose("\n");
3474
3475 }
3476
3477 ucol_closeElements(it);
3478 ucol_close(coll);
3479 }
3480 #endif
3481 }
3482
3483 static void TestPrefixCompose(void) {
3484 const char* rule1 =
3485 "&\\u30a7<<<\\u30ab|\\u30fc=\\u30ac|\\u30fc";
3486 /*
3487 const char* test[] = {
3488 "\\u30c6\\u30fc\\u30bf",
3489 "\\u30c6\\u30a7\\u30bf",
3490 };
3491 */
3492 {
3493 UErrorCode status = U_ZERO_ERROR;
3494 /*uint32_t i = 0;*/
3495 /*UCollationElements *it = NULL;*/
3496 /* uint32_t CE;*/
3497 UChar string[256];
3498 uint32_t uStringLen;
3499 UCollator *coll = NULL;
3500
3501 uStringLen = u_unescape(rule1, string, 256);
3502
3503 coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
3504 ucol_close(coll);
3505 }
3506
3507
3508 }
3509
3510 /*
3511 [last variable] last variable value
3512 [last primary ignorable] largest CE for primary ignorable
3513 [last secondary ignorable] largest CE for secondary ignorable
3514 [last tertiary ignorable] largest CE for tertiary ignorable
3515 [top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8)
3516 */
3517
3518 static void TestRuleOptions(void) {
3519 /* values here are hardcoded and are correct for the current UCA
3520 * when the UCA changes, one might be forced to change these
3521 * values. (\\u02d0, \\U00010FFFC etc...)
3522 */
3523 static struct {
3524 const char *rules;
3525 const char *data[50];
3526 const uint32_t len;
3527 } tests[] = {
3528 /* - all befores here amount to zero */
3529 { "&[before 3][first tertiary ignorable]<<<a",
3530 { "\\u0000", "a"}, 2
3531 }, /* you cannot go before first tertiary ignorable */
3532
3533 { "&[before 3][last tertiary ignorable]<<<a",
3534 { "\\u0000", "a"}, 2
3535 }, /* you cannot go before last tertiary ignorable */
3536
3537 { "&[before 3][first secondary ignorable]<<<a",
3538 { "\\u0000", "a"}, 2
3539 }, /* you cannot go before first secondary ignorable */
3540
3541 { "&[before 3][last secondary ignorable]<<<a",
3542 { "\\u0000", "a"}, 2
3543 }, /* you cannot go before first secondary ignorable */
3544
3545 /* 'normal' befores */
3546
3547 { "&[before 3][first primary ignorable]<<<c<<<b &[first primary ignorable]<a",
3548 { "c", "b", "\\u0332", "a" }, 4
3549 },
3550
3551 /* we don't have a code point that corresponds to
3552 * the last primary ignorable
3553 */
3554 { "&[before 3][last primary ignorable]<<<c<<<b &[last primary ignorable]<a",
3555 { "\\u0332", "\\u20e3", "c", "b", "a" }, 5
3556 },
3557
3558 { "&[before 3][first variable]<<<c<<<b &[first variable]<a",
3559 { "c", "b", "\\u0009", "a", "\\u000a" }, 5
3560 },
3561
3562 { "&[last variable]<a &[before 3][last variable]<<<c<<<b ",
3563 { "c", "b", "\\uD834\\uDF71", "a", "\\u02d0" }, 5
3564 },
3565
3566 { "&[first regular]<a"
3567 "&[before 1][first regular]<b",
3568 { "b", "\\u02d0", "a", "\\u02d1"}, 4
3569 },
3570
3571 { "&[before 1][last regular]<b"
3572 "&[last regular]<a",
3573 { "b", "\\uD808\\uDF6E", "a", "\\u4e00" }, 4
3574 },
3575
3576 { "&[before 1][first implicit]<b"
3577 "&[first implicit]<a",
3578 { "b", "\\u4e00", "a", "\\u4e01"}, 4
3579 },
3580
3581 { "&[before 1][last implicit]<b"
3582 "&[last implicit]<a",
3583 { "b", "\\U0010FFFD", "a" }, 3
3584 },
3585
3586 { "&[last variable]<z"
3587 "&[last primary ignorable]<x"
3588 "&[last secondary ignorable]<<y"
3589 "&[last tertiary ignorable]<<<w"
3590 "&[top]<u",
3591 {"\\ufffb", "w", "y", "\\u20e3", "x", "\\u137c", "z", "u"}, 7
3592 }
3593
3594 };
3595 uint32_t i;
3596
3597
3598 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3599 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3600 }
3601 }
3602
3603
3604 static void TestOptimize(void) {
3605 /* this is not really a test - just trying out
3606 * whether copying of UCA contents will fail
3607 * Cannot really test, since the functionality
3608 * remains the same.
3609 */
3610 static struct {
3611 const char *rules;
3612 const char *data[50];
3613 const uint32_t len;
3614 } tests[] = {
3615 /* - all befores here amount to zero */
3616 { "[optimize [\\uAC00-\\uD7FF]]",
3617 { "a", "b"}, 2}
3618 };
3619 uint32_t i;
3620
3621 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3622 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3623 }
3624 }
3625
3626 /*
3627 cycheng@ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator.
3628 weiv ucol_strcollIter?
3629 cycheng@ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021
3630 weiv these are the input strings?
3631 cycheng@ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on, we have s1 > s2
3632 weiv will check - could be a problem with utf-8 iterator
3633 cycheng@ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2
3634 weiv hmmm
3635 cycheng@ca.ibm.c... note that we have a standalone high surrogate
3636 weiv that doesn't sound right
3637 cycheng@ca.ibm.c... we got the same inconsistent results on AIX and Win2000
3638 weiv so you have two strings, you convert them to utf-8 and to utf-16BE
3639 cycheng@ca.ibm.c... yes
3640 weiv and then do the comparison
3641 cycheng@ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be
3642 weiv utf-16 strings look like a little endian ones in the example you sent me
3643 weiv It could be a bug - let me try to test it out
3644 cycheng@ca.ibm.c... ok
3645 cycheng@ca.ibm.c... we can wait till the conf. call
3646 cycheng@ca.ibm.c... next weke
3647 weiv that would be great
3648 weiv hmmm
3649 weiv I might be wrong
3650 weiv let me play with it some more
3651 cycheng@ca.ibm.c... ok
3652 cycheng@ca.ibm.c... also please check s3 = 0x0e3a0062 and s4 = 0x0e400021. both are in utf-16be
3653 cycheng@ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that's built for db2
3654 cycheng@ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be
3655 weiv ok
3656 cycheng@ca.ibm.c... i ask sherman to send you more inconsistent data
3657 weiv thanks
3658 cycheng@ca.ibm.c... the 4 strings we sent are just samples
3659 */
3660 #if 0
3661 static void Alexis(void) {
3662 UErrorCode status = U_ZERO_ERROR;
3663 UCollator *coll = ucol_open("", &status);
3664
3665
3666 const char utf16be[2][4] = {
3667 { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 },
3668 { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 }
3669 };
3670
3671 const char utf8[2][4] = {
3672 { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 },
3673 { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 },
3674 };
3675
3676 UCharIterator iterU161, iterU162;
3677 UCharIterator iterU81, iterU82;
3678
3679 UCollationResult resU16, resU8;
3680
3681 uiter_setUTF16BE(&iterU161, utf16be[0], 4);
3682 uiter_setUTF16BE(&iterU162, utf16be[1], 4);
3683
3684 uiter_setUTF8(&iterU81, utf8[0], 4);
3685 uiter_setUTF8(&iterU82, utf8[1], 4);
3686
3687 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3688
3689 resU16 = ucol_strcollIter(coll, &iterU161, &iterU162, &status);
3690 resU8 = ucol_strcollIter(coll, &iterU81, &iterU82, &status);
3691
3692
3693 if(resU16 != resU8) {
3694 log_err("different results\n");
3695 }
3696
3697 ucol_close(coll);
3698 }
3699 #endif
3700
3701 #define CMSCOLL_ALEXIS2_BUFFER_SIZE 256
3702 static void Alexis2(void) {
3703 UErrorCode status = U_ZERO_ERROR;
3704 UChar U16Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
3705 char U16BESource[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16BETarget[CMSCOLL_ALEXIS2_BUFFER_SIZE];
3706 char U8Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U8Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
3707 int32_t U16LenS = 0, U16LenT = 0, U16BELenS = 0, U16BELenT = 0, U8LenS = 0, U8LenT = 0;
3708
3709 UConverter *conv = NULL;
3710
3711 UCharIterator U16BEItS, U16BEItT;
3712 UCharIterator U8ItS, U8ItT;
3713
3714 UCollationResult resU16, resU16BE, resU8;
3715
3716 const char* pairs[][2] = {
3717 { "\\ud800\\u0021", "\\uFFFC\\u0062"},
3718 { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" },
3719 { "\\u0E40\\u0021", "\\u00A1\\u0021"},
3720 { "\\u0E40\\u0021", "\\uFE57\\u0062"},
3721 { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"},
3722 { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"},
3723 { "\\u0020", "\\u0020\\u0000"}
3724 /*
3725 5F20 (my result here)
3726 5F204E008E3F
3727 5F20 (your result here)
3728 */
3729 };
3730
3731 int32_t i = 0;
3732
3733 UCollator *coll = ucol_open("", &status);
3734 if(status == U_FILE_ACCESS_ERROR) {
3735 log_data_err("Is your data around?\n");
3736 return;
3737 } else if(U_FAILURE(status)) {
3738 log_err("Error opening collator\n");
3739 return;
3740 }
3741 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3742 conv = ucnv_open("UTF16BE", &status);
3743 for(i = 0; i < sizeof(pairs)/sizeof(pairs[0]); i++) {
3744 U16LenS = u_unescape(pairs[i][0], U16Source, CMSCOLL_ALEXIS2_BUFFER_SIZE);
3745 U16LenT = u_unescape(pairs[i][1], U16Target, CMSCOLL_ALEXIS2_BUFFER_SIZE);
3746
3747 resU16 = ucol_strcoll(coll, U16Source, U16LenS, U16Target, U16LenT);
3748
3749 log_verbose("Result of strcoll is %i\n", resU16);
3750
3751 U16BELenS = ucnv_fromUChars(conv, U16BESource, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Source, U16LenS, &status);
3752 U16BELenT = ucnv_fromUChars(conv, U16BETarget, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Target, U16LenT, &status);
3753
3754 /* use the original sizes, as the result from converter is in bytes */
3755 uiter_setUTF16BE(&U16BEItS, U16BESource, U16LenS);
3756 uiter_setUTF16BE(&U16BEItT, U16BETarget, U16LenT);
3757
3758 resU16BE = ucol_strcollIter(coll, &U16BEItS, &U16BEItT, &status);
3759
3760 log_verbose("Result of U16BE is %i\n", resU16BE);
3761
3762 if(resU16 != resU16BE) {
3763 log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", pairs[i][0], pairs[i][1]);
3764 }
3765
3766 u_strToUTF8(U8Source, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenS, U16Source, U16LenS, &status);
3767 u_strToUTF8(U8Target, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenT, U16Target, U16LenT, &status);
3768
3769 uiter_setUTF8(&U8ItS, U8Source, U8LenS);
3770 uiter_setUTF8(&U8ItT, U8Target, U8LenT);
3771
3772 resU8 = ucol_strcollIter(coll, &U8ItS, &U8ItT, &status);
3773
3774 if(resU16 != resU8) {
3775 log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pairs[i][0], pairs[i][1]);
3776 }
3777
3778 }
3779
3780 ucol_close(coll);
3781 ucnv_close(conv);
3782 }
3783
3784 static void TestHebrewUCA(void) {
3785 UErrorCode status = U_ZERO_ERROR;
3786 const char *first[] = {
3787 "d790d6b8d79cd795d6bcd7a9",
3788 "d790d79cd79ed7a7d799d799d7a1",
3789 "d790d6b4d79ed795d6bcd7a9",
3790 };
3791
3792 char utf8String[3][256];
3793 UChar utf16String[3][256];
3794
3795 int32_t i = 0, j = 0;
3796 int32_t sizeUTF8[3];
3797 int32_t sizeUTF16[3];
3798
3799 UCollator *coll = ucol_open("", &status);
3800 /*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/
3801
3802 for(i = 0; i < sizeof(first)/sizeof(first[0]); i++) {
3803 sizeUTF8[i] = u_parseUTF8(first[i], -1, utf8String[i], 256, &status);
3804 u_strFromUTF8(utf16String[i], 256, &sizeUTF16[i], utf8String[i], sizeUTF8[i], &status);
3805 log_verbose("%i: ");
3806 for(j = 0; j < sizeUTF16[i]; j++) {
3807 /*log_verbose("\\u%04X", utf16String[i][j]);*/
3808 log_verbose("%04X", utf16String[i][j]);
3809 }
3810 log_verbose("\n");
3811 }
3812 for(i = 0; i < sizeof(first)/sizeof(first[0])-1; i++) {
3813 for(j = i + 1; j < sizeof(first)/sizeof(first[0]); j++) {
3814 doTest(coll, utf16String[i], utf16String[j], UCOL_LESS);
3815 }
3816 }
3817
3818 ucol_close(coll);
3819
3820 }
3821
3822 static void TestPartialSortKeyTermination(void) {
3823 const char* cases[] = {
3824 "\\u1234\\u1234\\udc00",
3825 "\\udc00\\ud800\\ud800"
3826 };
3827
3828 int32_t i = sizeof(UCollator);
3829
3830 UErrorCode status = U_ZERO_ERROR;
3831
3832 UCollator *coll = ucol_open("", &status);
3833
3834 UCharIterator iter;
3835
3836 UChar currCase[256];
3837 int32_t length = 0;
3838 int32_t pKeyLen = 0;
3839
3840 uint8_t key[256];
3841
3842 for(i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) {
3843 uint32_t state[2] = {0, 0};
3844 length = u_unescape(cases[i], currCase, 256);
3845 uiter_setString(&iter, currCase, length);
3846 pKeyLen = ucol_nextSortKeyPart(coll, &iter, state, key, 256, &status);
3847
3848 log_verbose("Done\n");
3849
3850 }
3851 ucol_close(coll);
3852 }
3853
3854 static void TestSettings(void) {
3855 const char* cases[] = {
3856 "apple",
3857 "Apple"
3858 };
3859
3860 const char* locales[] = {
3861 "",
3862 "en"
3863 };
3864
3865 UErrorCode status = U_ZERO_ERROR;
3866
3867 int32_t i = 0, j = 0;
3868
3869 UChar source[256], target[256];
3870 int32_t sLen = 0, tLen = 0;
3871
3872 UCollator *collateObject = NULL;
3873 for(i = 0; i < sizeof(locales)/sizeof(locales[0]); i++) {
3874 collateObject = ucol_open(locales[i], &status);
3875 ucol_setStrength(collateObject, UCOL_PRIMARY);
3876 ucol_setAttribute(collateObject, UCOL_CASE_LEVEL , UCOL_OFF, &status);
3877 for(j = 1; j < sizeof(cases)/sizeof(cases[0]); j++) {
3878 sLen = u_unescape(cases[j-1], source, 256);
3879 source[sLen] = 0;
3880 tLen = u_unescape(cases[j], target, 256);
3881 source[tLen] = 0;
3882 doTest(collateObject, source, target, UCOL_EQUAL);
3883 }
3884 ucol_close(collateObject);
3885 }
3886 }
3887
3888 static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCollator *target) {
3889 UErrorCode status = U_ZERO_ERROR;
3890 int32_t errorNo = 0;
3891 /*const UChar *sourceRules = NULL;*/
3892 /*int32_t sourceRulesLen = 0;*/
3893 UColAttributeValue french = UCOL_OFF;
3894 int32_t cloneSize = 0;
3895
3896 if(!ucol_equals(source, target)) {
3897 log_err("Same collators, different address not equal\n");
3898 errorNo++;
3899 }
3900 ucol_close(target);
3901 if(uprv_strcmp(ucol_getLocale(source, ULOC_REQUESTED_LOCALE, &status), ucol_getLocale(source, ULOC_ACTUAL_LOCALE, &status)) == 0) {
3902 /* currently, safeClone is implemented through getRules/openRules
3903 * so it is the same as the test below - I will comment that test out.
3904 */
3905 /* real thing */
3906 target = ucol_safeClone(source, NULL, &cloneSize, &status);
3907 if(U_FAILURE(status)) {
3908 log_err("Error creating clone\n");
3909 errorNo++;
3910 return errorNo;
3911 }
3912 if(!ucol_equals(source, target)) {
3913 log_err("Collator different from it's clone\n");
3914 errorNo++;
3915 }
3916 french = ucol_getAttribute(source, UCOL_FRENCH_COLLATION, &status);
3917 if(french == UCOL_ON) {
3918 ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);
3919 } else {
3920 ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
3921 }
3922 if(U_FAILURE(status)) {
3923 log_err("Error setting attributes\n");
3924 errorNo++;
3925 return errorNo;
3926 }
3927 if(ucol_equals(source, target)) {
3928 log_err("Collators same even when options changed\n");
3929 errorNo++;
3930 }
3931 ucol_close(target);
3932 /* commented out since safeClone uses exactly the same technique */
3933 /*
3934 sourceRules = ucol_getRules(source, &sourceRulesLen);
3935 target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
3936 if(U_FAILURE(status)) {
3937 log_err("Error instantiating target from rules\n");
3938 errorNo++;
3939 return errorNo;
3940 }
3941 if(!ucol_equals(source, target)) {
3942 log_err("Collator different from collator that was created from the same rules\n");
3943 errorNo++;
3944 }
3945 ucol_close(target);
3946 */
3947 }
3948 return errorNo;
3949 }
3950
3951
3952 static void TestEquals(void) {
3953 /* ucol_equals is not currently a public API. There is a chance that it will become
3954 * something like this, but currently it is only used by RuleBasedCollator::operator==
3955 */
3956 /* test whether the two collators instantiated from the same locale are equal */
3957 UErrorCode status = U_ZERO_ERROR;
3958 UParseError parseError;
3959 int32_t noOfLoc = uloc_countAvailable();
3960 const char *locName = NULL;
3961 UCollator *source = NULL, *target = NULL;
3962 int32_t i = 0;
3963
3964 const char* rules[] = {
3965 "&l < lj <<< Lj <<< LJ",
3966 "&n < nj <<< Nj <<< NJ",
3967 "&ae <<< \\u00e4",
3968 "&AE <<< \\u00c4"
3969 };
3970 /*
3971 const char* badRules[] = {
3972 "&l <<< Lj",
3973 "&n < nj <<< nJ <<< NJ",
3974 "&a <<< \\u00e4",
3975 "&AE <<< \\u00c4 <<< x"
3976 };
3977 */
3978
3979 UChar sourceRules[1024], targetRules[1024];
3980 int32_t sourceRulesSize = 0, targetRulesSize = 0;
3981 int32_t rulesSize = sizeof(rules)/sizeof(rules[0]);
3982
3983 for(i = 0; i < rulesSize; i++) {
3984 sourceRulesSize += u_unescape(rules[i], sourceRules+sourceRulesSize, 1024 - sourceRulesSize);
3985 targetRulesSize += u_unescape(rules[rulesSize-i-1], targetRules+targetRulesSize, 1024 - targetRulesSize);
3986 }
3987
3988 source = ucol_openRules(sourceRules, sourceRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
3989 if(status == U_FILE_ACCESS_ERROR) {
3990 log_data_err("Is your data around?\n");
3991 return;
3992 } else if(U_FAILURE(status)) {
3993 log_err("Error opening collator\n");
3994 return;
3995 }
3996 target = ucol_openRules(targetRules, targetRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
3997 if(!ucol_equals(source, target)) {
3998 log_err("Equivalent collators not equal!\n");
3999 }
4000 ucol_close(source);
4001 ucol_close(target);
4002
4003 source = ucol_open("root", &status);
4004 target = ucol_open("root", &status);
4005 log_verbose("Testing root\n");
4006 if(!ucol_equals(source, source)) {
4007 log_err("Same collator not equal\n");
4008 }
4009 if(TestEqualsForCollator(locName, source, target)) {
4010 log_err("Errors for root\n", locName);
4011 }
4012 ucol_close(source);
4013
4014 for(i = 0; i<noOfLoc; i++) {
4015 status = U_ZERO_ERROR;
4016 locName = uloc_getAvailable(i);
4017 /*if(hasCollationElements(locName)) {*/
4018 log_verbose("Testing equality for locale %s\n", locName);
4019 source = ucol_open(locName, &status);
4020 target = ucol_open(locName, &status);
4021 if(TestEqualsForCollator(locName, source, target)) {
4022 log_err("Errors for locale %s\n", locName);
4023 }
4024 ucol_close(source);
4025 /*}*/
4026 }
4027 }
4028
4029 static void TestJ2726(void) {
4030 UChar a[2] = { 0x61, 0x00 }; /*"a"*/
4031 UChar aSpace[3] = { 0x61, 0x20, 0x00 }; /*"a "*/
4032 UChar spaceA[3] = { 0x20, 0x61, 0x00 }; /*" a"*/
4033 UErrorCode status = U_ZERO_ERROR;
4034 UCollator *coll = ucol_open("en", &status);
4035 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
4036 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
4037 doTest(coll, a, aSpace, UCOL_EQUAL);
4038 doTest(coll, aSpace, a, UCOL_EQUAL);
4039 doTest(coll, a, spaceA, UCOL_EQUAL);
4040 doTest(coll, spaceA, a, UCOL_EQUAL);
4041 doTest(coll, spaceA, aSpace, UCOL_EQUAL);
4042 doTest(coll, aSpace, spaceA, UCOL_EQUAL);
4043 ucol_close(coll);
4044 }
4045
4046 static void NullRule(void) {
4047 UChar r[3] = {0};
4048 UErrorCode status = U_ZERO_ERROR;
4049 UCollator *coll = ucol_openRules(r, 1, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
4050 if(U_SUCCESS(status)) {
4051 log_err("This should have been an error!\n");
4052 ucol_close(coll);
4053 } else {
4054 status = U_ZERO_ERROR;
4055 }
4056 coll = ucol_openRules(r, 0, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
4057 if(U_FAILURE(status)) {
4058 log_err("Empty rules should have produced a valid collator\n");
4059 } else {
4060 ucol_close(coll);
4061 }
4062 }
4063
4064 /**
4065 * Test for CollationElementIterator previous and next for the whole set of
4066 * unicode characters with normalization on.
4067 */
4068 static void TestNumericCollation(void)
4069 {
4070 UErrorCode status = U_ZERO_ERROR;
4071
4072 const static char *basicTestStrings[]={
4073 "hello1",
4074 "hello2",
4075 "hello2002",
4076 "hello2003",
4077 "hello123456",
4078 "hello1234567",
4079 "hello10000000",
4080 "hello100000000",
4081 "hello1000000000",
4082 "hello10000000000",
4083 };
4084
4085 const static char *preZeroTestStrings[]={
4086 "avery10000",
4087 "avery010000",
4088 "avery0010000",
4089 "avery00010000",
4090 "avery000010000",
4091 "avery0000010000",
4092 "avery00000010000",
4093 "avery000000010000",
4094 };
4095
4096 const static char *thirtyTwoBitNumericStrings[]={
4097 "avery42949672960",
4098 "avery42949672961",
4099 "avery42949672962",
4100 "avery429496729610"
4101 };
4102
4103 const static char *supplementaryDigits[] = {
4104 "\\uD835\\uDFCE", /* 0 */
4105 "\\uD835\\uDFCF", /* 1 */
4106 "\\uD835\\uDFD0", /* 2 */
4107 "\\uD835\\uDFD1", /* 3 */
4108 "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */
4109 "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */
4110 "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */
4111 "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */
4112 "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */
4113 "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */
4114 };
4115
4116 const static char *foreignDigits[] = {
4117 "\\u0661",
4118 "\\u0662",
4119 "\\u0663",
4120 "\\u0661\\u0660",
4121 "\\u0661\\u0662",
4122 "\\u0661\\u0663",
4123 "\\u0662\\u0660",
4124 "\\u0662\\u0662",
4125 "\\u0662\\u0663",
4126 "\\u0663\\u0660",
4127 "\\u0663\\u0662",
4128 "\\u0663\\u0663"
4129 };
4130
4131 const static char *evenZeroes[] = {
4132 "2000",
4133 "2001",
4134 "2002",
4135 "2003"
4136 };
4137
4138 UColAttribute att = UCOL_NUMERIC_COLLATION;
4139 UColAttributeValue val = UCOL_ON;
4140
4141 /* Open our collator. */
4142 UCollator* coll = ucol_open("root", &status);
4143 if (U_FAILURE(status)){
4144 log_err("ERROR: in using ucol_open()\n %s\n",
4145 myErrorName(status));
4146 return;
4147 }
4148 genericLocaleStarterWithOptions("root", basicTestStrings, sizeof(basicTestStrings)/sizeof(basicTestStrings[0]), &att, &val, 1);
4149 genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings, sizeof(thirtyTwoBitNumericStrings)/sizeof(thirtyTwoBitNumericStrings[0]), &att, &val, 1);
4150 genericLocaleStarterWithOptions("en_US", foreignDigits, sizeof(foreignDigits)/sizeof(foreignDigits[0]), &att, &val, 1);
4151 genericLocaleStarterWithOptions("root", supplementaryDigits, sizeof(supplementaryDigits)/sizeof(supplementaryDigits[0]), &att, &val, 1);
4152 genericLocaleStarterWithOptions("root", evenZeroes, sizeof(evenZeroes)/sizeof(evenZeroes[0]), &att, &val, 1);
4153
4154 /* Setting up our collator to do digits. */
4155 ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
4156 if (U_FAILURE(status)){
4157 log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n",
4158 myErrorName(status));
4159 return;
4160 }
4161
4162 /*
4163 Testing that prepended zeroes still yield the correct collation behavior.
4164 We expect that every element in our strings array will be equal.
4165 */
4166 genericOrderingTestWithResult(coll, preZeroTestStrings, sizeof(preZeroTestStrings)/sizeof(preZeroTestStrings[0]), UCOL_EQUAL);
4167
4168 ucol_close(coll);
4169 }
4170
4171 static void TestTibetanConformance(void)
4172 {
4173 const char* test[] = {
4174 "\\u0FB2\\u0591\\u0F71\\u0061",
4175 "\\u0FB2\\u0F71\\u0061"
4176 };
4177
4178 UErrorCode status = U_ZERO_ERROR;
4179 UCollator *coll = ucol_open("", &status);
4180 UChar source[100];
4181 UChar target[100];
4182 int result;
4183 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4184 if (U_SUCCESS(status)) {
4185 u_unescape(test[0], source, 100);
4186 u_unescape(test[1], target, 100);
4187 doTest(coll, source, target, UCOL_EQUAL);
4188 result = ucol_strcoll(coll, source, -1, target, -1);
4189 log_verbose("result %d\n", result);
4190 if (UCOL_EQUAL != result) {
4191 log_err("Tibetan comparison error\n");
4192 }
4193 }
4194 ucol_close(coll);
4195
4196 genericLocaleStarterWithResult("", test, 2, UCOL_EQUAL);
4197 }
4198
4199 static void TestPinyinProblem(void) {
4200 static const char *test[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B50" };
4201 genericLocaleStarter("zh__PINYIN", test, sizeof(test)/sizeof(test[0]));
4202 }
4203
4204 #define TST_UCOL_MAX_INPUT 0x220001
4205 #define topByte 0xFF000000;
4206 #define bottomByte 0xFF;
4207 #define fourBytes 0xFFFFFFFF;
4208
4209
4210 static void showImplicit(UChar32 i) {
4211 if (i >= 0 && i <= TST_UCOL_MAX_INPUT) {
4212 log_verbose("%08X\t%08X\n", i, uprv_uca_getImplicitFromRaw(i));
4213 }
4214 }
4215
4216 static void TestImplicitGeneration(void) {
4217 UErrorCode status = U_ZERO_ERROR;
4218 UChar32 last = 0;
4219 UChar32 current;
4220 UChar32 i = 0, j = 0;
4221 UChar32 roundtrip = 0;
4222 UChar32 lastBottom = 0;
4223 UChar32 currentBottom = 0;
4224 UChar32 lastTop = 0;
4225 UChar32 currentTop = 0;
4226
4227 UCollator *coll = ucol_open("root", &status);
4228 if(U_FAILURE(status)) {
4229 log_err("Couldn't open UCA\n");
4230 return;
4231 }
4232
4233 uprv_uca_getRawFromImplicit(0xE20303E7);
4234
4235 for (i = 0; i <= TST_UCOL_MAX_INPUT; ++i) {
4236 current = uprv_uca_getImplicitFromRaw(i) & fourBytes;
4237
4238 /* check that it round-trips AND that all intervening ones are illegal*/
4239 roundtrip = uprv_uca_getRawFromImplicit(current);
4240 if (roundtrip != i) {
4241 log_err("No roundtrip %08X\n", i);
4242 }
4243 if (last != 0) {
4244 for (j = last + 1; j < current; ++j) {
4245 roundtrip = uprv_uca_getRawFromImplicit(j);
4246 /* raise an error if it *doesn't* find an error*/
4247 if (roundtrip != -1) {
4248 log_err("Fails to recognize illegal %08X\n", j);
4249 }
4250 }
4251 }
4252 /* now do other consistency checks*/
4253 lastBottom = last & bottomByte;
4254 currentBottom = current & bottomByte;
4255 lastTop = last & topByte;
4256 currentTop = current & topByte;
4257
4258 /* print out some values for spot-checking*/
4259 if (lastTop != currentTop || i == 0x10000 || i == 0x110000) {
4260 showImplicit(i-3);
4261 showImplicit(i-2);
4262 showImplicit(i-1);
4263 showImplicit(i);
4264 showImplicit(i+1);
4265 showImplicit(i+2);
4266 }
4267 last = current;
4268
4269 if(uprv_uca_getCodePointFromRaw(uprv_uca_getRawFromCodePoint(i)) != i) {
4270 log_err("No raw <-> code point roundtrip for 0x%08X\n", i);
4271 }
4272 }
4273 showImplicit(TST_UCOL_MAX_INPUT-2);
4274 showImplicit(TST_UCOL_MAX_INPUT-1);
4275 showImplicit(TST_UCOL_MAX_INPUT);
4276 ucol_close(coll);
4277 }
4278
4279 /**
4280 * Iterate through the given iterator, checking to see that all the strings
4281 * in the expected array are present.
4282 * @param expected array of strings we expect to see, or NULL
4283 * @param expectedCount number of elements of expected, or 0
4284 */
4285 static int32_t checkUEnumeration(const char* msg,
4286 UEnumeration* iter,
4287 const char** expected,
4288 int32_t expectedCount) {
4289 UErrorCode ec = U_ZERO_ERROR;
4290 int32_t i = 0, n, j, bit;
4291 int32_t seenMask = 0;
4292
4293 U_ASSERT(expectedCount >= 0 && expectedCount < 31); /* [sic] 31 not 32 */
4294 n = uenum_count(iter, &ec);
4295 if (!assertSuccess("count", &ec)) return -1;
4296 log_verbose("%s = [", msg);
4297 for (;; ++i) {
4298 const char* s = uenum_next(iter, NULL, &ec);
4299 if (!assertSuccess("snext", &ec) || s == NULL) break;
4300 if (i != 0) log_verbose(",");
4301 log_verbose("%s", s);
4302 /* check expected list */
4303 for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
4304 if ((seenMask&bit) == 0 &&
4305 uprv_strcmp(s, expected[j]) == 0) {
4306 seenMask |= bit;
4307 break;
4308 }
4309 }
4310 }
4311 log_verbose("] (%d)\n", i);
4312 assertTrue("count verified", i==n);
4313 /* did we see all expected strings? */
4314 for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
4315 if ((seenMask&bit)!=0) {
4316 log_verbose("Ok: \"%s\" seen\n", expected[j]);
4317 } else {
4318 log_err("FAIL: \"%s\" not seen\n", expected[j]);
4319 }
4320 }
4321 return n;
4322 }
4323
4324 /**
4325 * Test new API added for separate collation tree.
4326 */
4327 static void TestSeparateTrees(void) {
4328 UErrorCode ec = U_ZERO_ERROR;
4329 UEnumeration *e = NULL;
4330 int32_t n = -1;
4331 UBool isAvailable;
4332 char loc[256];
4333
4334 static const char* AVAIL[] = { "en", "de" };
4335
4336 static const char* KW[] = { "collation" };
4337
4338 static const char* KWVAL[] = { "phonebook", "stroke" };
4339
4340 #if !UCONFIG_NO_SERVICE
4341 e = ucol_openAvailableLocales(&ec);
4342 assertSuccess("ucol_openAvailableLocales", &ec);
4343 assertTrue("ucol_openAvailableLocales!=0", e!=0);
4344 n = checkUEnumeration("ucol_openAvailableLocales", e, AVAIL, LEN(AVAIL));
4345 /* Don't need to check n because we check list */
4346 uenum_close(e);
4347 #endif
4348
4349 e = ucol_getKeywords(&ec);
4350 assertSuccess("ucol_getKeywords", &ec);
4351 assertTrue("ucol_getKeywords!=0", e!=0);
4352 n = checkUEnumeration("ucol_getKeywords", e, KW, LEN(KW));
4353 /* Don't need to check n because we check list */
4354 uenum_close(e);
4355
4356 e = ucol_getKeywordValues(KW[0], &ec);
4357 assertSuccess("ucol_getKeywordValues", &ec);
4358 assertTrue("ucol_getKeywordValues!=0", e!=0);
4359 n = checkUEnumeration("ucol_getKeywordValues", e, KWVAL, LEN(KWVAL));
4360 /* Don't need to check n because we check list */
4361 uenum_close(e);
4362
4363 /* Try setting a warning before calling ucol_getKeywordValues */
4364 ec = U_USING_FALLBACK_WARNING;
4365 e = ucol_getKeywordValues(KW[0], &ec);
4366 assertSuccess("ucol_getKeywordValues [with warning code set]", &ec);
4367 assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e!=0);
4368 n = checkUEnumeration("ucol_getKeywordValues [with warning code set]", e, KWVAL, LEN(KWVAL));
4369 /* Don't need to check n because we check list */
4370 uenum_close(e);
4371
4372 /*
4373 U_DRAFT int32_t U_EXPORT2
4374 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
4375 const char* locale, UBool* isAvailable,
4376 UErrorCode* status);
4377 }
4378 */
4379 n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "fr",
4380 &isAvailable, &ec);
4381 assertSuccess("getFunctionalEquivalent", &ec);
4382 assertEquals("getFunctionalEquivalent(fr)", "fr", loc);
4383 assertTrue("getFunctionalEquivalent(fr).isAvailable==TRUE",
4384 isAvailable == TRUE);
4385
4386 n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "fr_FR",
4387 &isAvailable, &ec);
4388 assertSuccess("getFunctionalEquivalent", &ec);
4389 assertEquals("getFunctionalEquivalent(fr_FR)", "fr", loc);
4390 assertTrue("getFunctionalEquivalent(fr_FR).isAvailable==TRUE",
4391 isAvailable == TRUE);
4392 }
4393
4394 /* supercedes TestJ784 */
4395 static void TestBeforePinyin(void) {
4396 const static char rules[] = {
4397 "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<\\u00E0<<<\\u00C0"
4398 "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<\\u00E8<<<\\u00C8"
4399 "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<\\u00EC<<<\\u00CC"
4400 "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<\\u00F2<<<\\u00D2"
4401 "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<<\\u00F9<<<\\u00D9"
4402 "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<<\\u01DB<<\\u00FC"
4403 };
4404
4405 const static char *test[] = {
4406 "l\\u0101",
4407 "la",
4408 "l\\u0101n",
4409 "lan ",
4410 "l\\u0113",
4411 "le",
4412 "l\\u0113n",
4413 "len"
4414 };
4415
4416 const static char *test2[] = {
4417 "x\\u0101",
4418 "x\\u0100",
4419 "X\\u0101",
4420 "X\\u0100",
4421 "x\\u00E1",
4422 "x\\u00C1",
4423 "X\\u00E1",
4424 "X\\u00C1",
4425 "x\\u01CE",
4426 "x\\u01CD",
4427 "X\\u01CE",
4428 "X\\u01CD",
4429 "x\\u00E0",
4430 "x\\u00C0",
4431 "X\\u00E0",
4432 "X\\u00C0",
4433 "xa",
4434 "xA",
4435 "Xa",
4436 "XA",
4437 "x\\u0101x",
4438 "x\\u0100x",
4439 "x\\u00E1x",
4440 "x\\u00C1x",
4441 "x\\u01CEx",
4442 "x\\u01CDx",
4443 "x\\u00E0x",
4444 "x\\u00C0x",
4445 "xax",
4446 "xAx"
4447 };
4448
4449 genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));
4450 genericLocaleStarter("zh", test, sizeof(test)/sizeof(test[0]));
4451 genericRulesStarter(rules, test2, sizeof(test2)/sizeof(test2[0]));
4452 genericLocaleStarter("zh", test2, sizeof(test2)/sizeof(test2[0]));
4453 }
4454
4455 static void TestBeforeTightening(void) {
4456 struct {
4457 const char *rules;
4458 UErrorCode expectedStatus;
4459 } tests[] = {
4460 { "&[before 1]a<x", U_ZERO_ERROR },
4461 { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR },
4462 { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR },
4463 { "&[before 1]a=x", U_INVALID_FORMAT_ERROR },
4464 { "&[before 2]a<x",U_INVALID_FORMAT_ERROR },
4465 { "&[before 2]a<<x",U_ZERO_ERROR },
4466 { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR },
4467 { "&[before 2]a=x",U_INVALID_FORMAT_ERROR },
4468 { "&[before 3]a<x",U_INVALID_FORMAT_ERROR },
4469 { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR },
4470 { "&[before 3]a<<<x",U_ZERO_ERROR },
4471 { "&[before 3]a=x",U_INVALID_FORMAT_ERROR },
4472 { "&[before I]a = x",U_INVALID_FORMAT_ERROR }
4473 };
4474
4475 int32_t i = 0;
4476
4477 UErrorCode status = U_ZERO_ERROR;
4478 UChar rlz[RULE_BUFFER_LEN] = { 0 };
4479 uint32_t rlen = 0;
4480
4481 UCollator *coll = NULL;
4482
4483
4484 for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
4485 rlen = u_unescape(tests[i].rules, rlz, RULE_BUFFER_LEN);
4486 coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
4487 if(status != tests[i].expectedStatus) {
4488 log_err("Opening a collator with rules %s returned error code %s, expected %s\n",
4489 tests[i].rules, u_errorName(status), u_errorName(tests[i].expectedStatus));
4490 }
4491 ucol_close(coll);
4492 status = U_ZERO_ERROR;
4493 }
4494
4495 }
4496
4497 #if 0
4498 &m < a
4499 &[before 1] a < x <<< X << q <<< Q < z
4500 assert: m <<< M < x <<< X << q <<< Q < z < a < n
4501
4502 &m < a
4503 &[before 2] a << x <<< X << q <<< Q < z
4504 assert: m <<< M < x <<< X << q <<< Q << a < z < n
4505
4506 &m < a
4507 &[before 3] a <<< x <<< X << q <<< Q < z
4508 assert: m <<< M < x <<< X <<< a << q <<< Q < z < n
4509
4510
4511 &m << a
4512 &[before 1] a < x <<< X << q <<< Q < z
4513 assert: x <<< X << q <<< Q < z < m <<< M << a < n
4514
4515 &m << a
4516 &[before 2] a << x <<< X << q <<< Q < z
4517 assert: m <<< M << x <<< X << q <<< Q << a < z < n
4518
4519 &m << a
4520 &[before 3] a <<< x <<< X << q <<< Q < z
4521 assert: m <<< M << x <<< X <<< a << q <<< Q < z < n
4522
4523
4524 &m <<< a
4525 &[before 1] a < x <<< X << q <<< Q < z
4526 assert: x <<< X << q <<< Q < z < n < m <<< a <<< M
4527
4528 &m <<< a
4529 &[before 2] a << x <<< X << q <<< Q < z
4530 assert: x <<< X << q <<< Q << m <<< a <<< M < z < n
4531
4532 &m <<< a
4533 &[before 3] a <<< x <<< X << q <<< Q < z
4534 assert: m <<< x <<< X <<< a <<< M << q <<< Q < z < n
4535
4536
4537 &[before 1] s < x <<< X << q <<< Q < z
4538 assert: r <<< R < x <<< X << q <<< Q < z < s < n
4539
4540 &[before 2] s << x <<< X << q <<< Q < z
4541 assert: r <<< R < x <<< X << q <<< Q << s < z < n
4542
4543 &[before 3] s <<< x <<< X << q <<< Q < z
4544 assert: r <<< R < x <<< X <<< s << q <<< Q < z < n
4545
4546
4547 &[before 1] \u24DC < x <<< X << q <<< Q < z
4548 assert: x <<< X << q <<< Q < z < n < m <<< \u24DC <<< M
4549
4550 &[before 2] \u24DC << x <<< X << q <<< Q < z
4551 assert: x <<< X << q <<< Q << m <<< \u24DC <<< M < z < n
4552
4553 &[before 3] \u24DC <<< x <<< X << q <<< Q < z
4554 assert: m <<< x <<< X <<< \u24DC <<< M << q <<< Q < z < n
4555 #endif
4556
4557
4558 #if 0
4559 /* requires features not yet supported */
4560 static void TestMoreBefore(void) {
4561 struct {
4562 const char* rules;
4563 const char* order[20];
4564 int32_t size;
4565 } tests[] = {
4566 { "&m < a &[before 1] a < x <<< X << q <<< Q < z",
4567 { "m","M","x","X","q","Q","z","a","n" }, 9},
4568 { "&m < a &[before 2] a << x <<< X << q <<< Q < z",
4569 { "m","M","x","X","q","Q","a","z","n" }, 9},
4570 { "&m < a &[before 3] a <<< x <<< X << q <<< Q < z",
4571 { "m","M","x","X","a","q","Q","z","n" }, 9},
4572 { "&m << a &[before 1] a < x <<< X << q <<< Q < z",
4573 { "x","X","q","Q","z","m","M","a","n" }, 9},
4574 { "&m << a &[before 2] a << x <<< X << q <<< Q < z",
4575 { "m","M","x","X","q","Q","a","z","n" }, 9},
4576 { "&m << a &[before 3] a <<< x <<< X << q <<< Q < z",
4577 { "m","M","x","X","a","q","Q","z","n" }, 9},
4578 { "&m <<< a &[before 1] a < x <<< X << q <<< Q < z",
4579 { "x","X","q","Q","z","n","m","a","M" }, 9},
4580 { "&m <<< a &[before 2] a << x <<< X << q <<< Q < z",
4581 { "x","X","q","Q","m","a","M","z","n" }, 9},
4582 { "&m <<< a &[before 3] a <<< x <<< X << q <<< Q < z",
4583 { "m","x","X","a","M","q","Q","z","n" }, 9},
4584 { "&[before 1] s < x <<< X << q <<< Q < z",
4585 { "r","R","x","X","q","Q","z","s","n" }, 9},
4586 { "&[before 2] s << x <<< X << q <<< Q < z",
4587 { "r","R","x","X","q","Q","s","z","n" }, 9},
4588 { "&[before 3] s <<< x <<< X << q <<< Q < z",
4589 { "r","R","x","X","s","q","Q","z","n" }, 9},
4590 { "&[before 1] \\u24DC < x <<< X << q <<< Q < z",
4591 { "x","X","q","Q","z","n","m","\\u24DC","M" }, 9},
4592 { "&[before 2] \\u24DC << x <<< X << q <<< Q < z",
4593 { "x","X","q","Q","m","\\u24DC","M","z","n" }, 9},
4594 { "&[before 3] \\u24DC <<< x <<< X << q <<< Q < z",
4595 { "m","x","X","\\u24DC","M","q","Q","z","n" }, 9}
4596 };
4597
4598 int32_t i = 0;
4599
4600 for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
4601 genericRulesStarter(tests[i].rules, tests[i].order, tests[i].size);
4602 }
4603 }
4604 #endif
4605
4606 static void TestTailorNULL( void ) {
4607 const static char* rule = "&a <<< '\\u0000'";
4608 UErrorCode status = U_ZERO_ERROR;
4609 UChar rlz[RULE_BUFFER_LEN] = { 0 };
4610 uint32_t rlen = 0;
4611 UChar a = 1, null = 0;
4612 UCollationResult res = UCOL_EQUAL;
4613
4614 UCollator *coll = NULL;
4615
4616
4617 rlen = u_unescape(rule, rlz, RULE_BUFFER_LEN);
4618 coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
4619
4620 if(U_FAILURE(status)) {
4621 log_err("Could not open default collator!\n");
4622 } else {
4623 res = ucol_strcoll(coll, &a, 1, &null, 1);
4624
4625 if(res != UCOL_LESS) {
4626 log_err("NULL was not tailored properly!\n");
4627 }
4628 }
4629
4630 ucol_close(coll);
4631 }
4632
4633 static void
4634 TestThaiSortKey(void)
4635 {
4636 UChar yamakan = 0x0E4E;
4637 UErrorCode status = U_ZERO_ERROR;
4638 uint8_t key[256];
4639 int32_t keyLen = 0;
4640 /* NOTE: there is a Thai tailoring that moves Yammakan. It should not move it, */
4641 /* since it stays in the same relative position. This should be addressed in CLDR */
4642 /* UCA 4.0 uint8_t expectedKey[256] = { 0x01, 0xd9, 0xb2, 0x01, 0x05, 0x00 }; */
4643 /* UCA 4.1 uint8_t expectedKey[256] = { 0x01, 0xdb, 0x3a, 0x01, 0x05, 0x00 }; */
4644 /* UCA 5.0 moves Yammakan */
4645 uint8_t expectedKey[256] = { 0x01, 0xdc, 0xce, 0x01, 0x05, 0x00 };
4646 UCollator *coll = ucol_open("th", &status);
4647 if(U_FAILURE(status)) {
4648 log_err("Could not open a collator, exiting (%s)\n", u_errorName(status));
4649 return;
4650 }
4651
4652 keyLen = ucol_getSortKey(coll, &yamakan, 1, key, 256);
4653 if(strcmp((char *)key, (char *)expectedKey)) {
4654 log_err("Yammakan key is different from ICU 34!\n");
4655 }
4656
4657 ucol_close(coll);
4658 }
4659
4660 static void
4661 TestUpperFirstQuaternary(void)
4662 {
4663 const char* tests[] = { "B", "b", "Bb", "bB" };
4664 UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_FIRST };
4665 UColAttributeValue attVals[] = { UCOL_QUATERNARY, UCOL_UPPER_FIRST };
4666 genericLocaleStarterWithOptions("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]));
4667 }
4668
4669 static void
4670 TestJ4960(void)
4671 {
4672 const char* tests[] = { "\\u00e2T", "aT" };
4673 UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_LEVEL };
4674 UColAttributeValue attVals[] = { UCOL_PRIMARY, UCOL_ON };
4675 const char* tests2[] = { "a", "A" };
4676 const char* rule = "&[first tertiary ignorable]=A=a";
4677 UColAttribute att2[] = { UCOL_CASE_LEVEL };
4678 UColAttributeValue attVals2[] = { UCOL_ON };
4679 /* Test whether we correctly ignore primary ignorables on case level when */
4680 /* we have only primary & case level */
4681 genericLocaleStarterWithOptionsAndResult("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]), UCOL_EQUAL);
4682 /* Test whether ICU4J will make case level for sortkeys that have primary strength */
4683 /* and case level */
4684 genericLocaleStarterWithOptions("root", tests2, sizeof(tests2)/sizeof(tests2[0]), att, attVals, sizeof(att)/sizeof(att[0]));
4685 /* Test whether completely ignorable letters have case level info (they shouldn't) */
4686 genericRulesStarterWithOptionsAndResult(rule, tests2, sizeof(tests2)/sizeof(tests2[0]), att2, attVals2, sizeof(att2)/sizeof(att2[0]), UCOL_EQUAL);
4687 }
4688
4689 static void
4690 TestJ5223(void)
4691 {
4692 static const char *test = "this is a test string";
4693 UChar ustr[256];
4694 int32_t ustr_length = u_unescape(test, ustr, 256);
4695 unsigned char sortkey[256];
4696 int32_t sortkey_length;
4697 UErrorCode status = U_ZERO_ERROR;
4698 static UCollator *coll = NULL;
4699 coll = ucol_open("root", &status);
4700 if(U_FAILURE(status)) {
4701 log_err("Couldn't open UCA\n");
4702 return;
4703 }
4704 ucol_setStrength(coll, UCOL_PRIMARY);
4705 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
4706 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4707 if (U_FAILURE(status)) {
4708 log_err("Failed setting atributes\n");
4709 return;
4710 }
4711 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, NULL, 0);
4712 if (sortkey_length > 256) return;
4713
4714 /* we mark the position where the null byte should be written in advance */
4715 sortkey[sortkey_length-1] = 0xAA;
4716
4717 /* we set the buffer size one byte higher than needed */
4718 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
4719 sortkey_length+1);
4720
4721 /* no error occurs (for me) */
4722 if (sortkey[sortkey_length-1] == 0xAA) {
4723 log_err("Hit bug at first try\n");
4724 }
4725
4726 /* we mark the position where the null byte should be written again */
4727 sortkey[sortkey_length-1] = 0xAA;
4728
4729 /* this time we set the buffer size to the exact amount needed */
4730 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
4731 sortkey_length);
4732
4733 /* now the trailing null byte is not written */
4734 if (sortkey[sortkey_length-1] == 0xAA) {
4735 log_err("Hit bug at second try\n");
4736 }
4737
4738 ucol_close(coll);
4739 }
4740
4741 /* Regression test for Thai partial sort key problem */
4742 static void
4743 TestJ5232(void)
4744 {
4745 const static char *test[] = {
4746 "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e47\\u0e21",
4747 "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e48\\u0e21"
4748 };
4749
4750 genericLocaleStarter("th", test, sizeof(test)/sizeof(test[0]));
4751 }
4752
4753
4754
4755 #define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x)
4756
4757 void addMiscCollTest(TestNode** root)
4758 {
4759 TEST(TestRuleOptions);
4760 TEST(TestBeforePrefixFailure);
4761 TEST(TestContractionClosure);
4762 TEST(TestPrefixCompose);
4763 TEST(TestStrCollIdenticalPrefix);
4764 TEST(TestPrefix);
4765 TEST(TestNewJapanese);
4766 /*TEST(TestLimitations);*/
4767 TEST(TestNonChars);
4768 TEST(TestExtremeCompression);
4769 TEST(TestSurrogates);
4770 TEST(TestVariableTopSetting);
4771 TEST(TestBocsuCoverage);
4772 TEST(TestCyrillicTailoring);
4773 TEST(TestCase);
4774 TEST(IncompleteCntTest);
4775 TEST(BlackBirdTest);
4776 TEST(FunkyATest);
4777 TEST(BillFairmanTest);
4778 TEST(RamsRulesTest);
4779 TEST(IsTailoredTest);
4780 TEST(TestCollations);
4781 TEST(TestChMove);
4782 TEST(TestImplicitTailoring);
4783 TEST(TestFCDProblem);
4784 TEST(TestEmptyRule);
4785 /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */
4786 TEST(TestJ815);
4787 /*TEST(TestJ831);*/ /* we changed lv locale */
4788 TEST(TestBefore);
4789 TEST(TestRedundantRules);
4790 TEST(TestExpansionSyntax);
4791 TEST(TestHangulTailoring);
4792 TEST(TestUCARules);
4793 TEST(TestIncrementalNormalize);
4794 TEST(TestComposeDecompose);
4795 TEST(TestCompressOverlap);
4796 TEST(TestContraction);
4797 TEST(TestExpansion);
4798 /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys */
4799 /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported */
4800 TEST(TestOptimize);
4801 TEST(TestSuppressContractions);
4802 TEST(Alexis2);
4803 TEST(TestHebrewUCA);
4804 TEST(TestPartialSortKeyTermination);
4805 TEST(TestSettings);
4806 TEST(TestEquals);
4807 TEST(TestJ2726);
4808 TEST(NullRule);
4809 TEST(TestNumericCollation);
4810 TEST(TestTibetanConformance);
4811 TEST(TestPinyinProblem);
4812 TEST(TestImplicitGeneration);
4813 TEST(TestSeparateTrees);
4814 TEST(TestBeforePinyin);
4815 TEST(TestBeforeTightening);
4816 /*TEST(TestMoreBefore);*/
4817 TEST(TestTailorNULL);
4818 TEST(TestThaiSortKey);
4819 TEST(TestUpperFirstQuaternary);
4820 TEST(TestJ4960);
4821 TEST(TestJ5223);
4822 TEST(TestJ5232);
4823 }
4824
4825 #endif /* #if !UCONFIG_NO_COLLATION */
4826