]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/cintltst/cmsccoll.c
ICU-461.12.tar.gz
[apple/icu.git] / icuSources / test / cintltst / cmsccoll.c
1
2 /********************************************************************
3 * COPYRIGHT:
4 * Copyright (c) 2001-2011, International Business Machines Corporation and
5 * others. All Rights Reserved.
6 ********************************************************************/
7 /*******************************************************************************
8 *
9 * File cmsccoll.C
10 *
11 *******************************************************************************/
12 /**
13 * These are the tests specific to ICU 1.8 and above, that I didn't know where
14 * to fit.
15 */
16
17 #include <stdio.h>
18
19 #include "unicode/utypes.h"
20
21 #if !UCONFIG_NO_COLLATION
22
23 #include "unicode/ucol.h"
24 #include "unicode/ucoleitr.h"
25 #include "unicode/uloc.h"
26 #include "cintltst.h"
27 #include "ccolltst.h"
28 #include "callcoll.h"
29 #include "unicode/ustring.h"
30 #include "string.h"
31 #include "ucol_imp.h"
32 #include "ucol_tok.h"
33 #include "cmemory.h"
34 #include "cstring.h"
35 #include "uassert.h"
36 #include "unicode/parseerr.h"
37 #include "unicode/ucnv.h"
38 #include "unicode/ures.h"
39 #include "unicode/uscript.h"
40 #include "uparse.h"
41 #include "putilimp.h"
42
43
44 #define LEN(a) (sizeof(a)/sizeof(a[0]))
45
46 #define MAX_TOKEN_LEN 16
47
48 typedef UCollationResult tst_strcoll(void *collator, const int object,
49 const UChar *source, const int sLen,
50 const UChar *target, const int tLen);
51
52
53
54 const static char cnt1[][10] = {
55
56 "AA",
57 "AC",
58 "AZ",
59 "AQ",
60 "AB",
61 "ABZ",
62 "ABQ",
63 "Z",
64 "ABC",
65 "Q",
66 "B"
67 };
68
69 const static char cnt2[][10] = {
70 "DA",
71 "DAD",
72 "DAZ",
73 "MAR",
74 "Z",
75 "DAVIS",
76 "MARK",
77 "DAV",
78 "DAVI"
79 };
80
81 static void IncompleteCntTest(void)
82 {
83 UErrorCode status = U_ZERO_ERROR;
84 UChar temp[90];
85 UChar t1[90];
86 UChar t2[90];
87
88 UCollator *coll = NULL;
89 uint32_t i = 0, j = 0;
90 uint32_t size = 0;
91
92 u_uastrcpy(temp, " & Z < ABC < Q < B");
93
94 coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
95
96 if(U_SUCCESS(status)) {
97 size = sizeof(cnt1)/sizeof(cnt1[0]);
98 for(i = 0; i < size-1; i++) {
99 for(j = i+1; j < size; j++) {
100 UCollationElements *iter;
101 u_uastrcpy(t1, cnt1[i]);
102 u_uastrcpy(t2, cnt1[j]);
103 doTest(coll, t1, t2, UCOL_LESS);
104 /* synwee : added collation element iterator test */
105 iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
106 if (U_FAILURE(status)) {
107 log_err("Creation of iterator failed\n");
108 break;
109 }
110 backAndForth(iter);
111 ucol_closeElements(iter);
112 }
113 }
114 }
115
116 ucol_close(coll);
117
118
119 u_uastrcpy(temp, " & Z < DAVIS < MARK <DAV");
120 coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
121
122 if(U_SUCCESS(status)) {
123 size = sizeof(cnt2)/sizeof(cnt2[0]);
124 for(i = 0; i < size-1; i++) {
125 for(j = i+1; j < size; j++) {
126 UCollationElements *iter;
127 u_uastrcpy(t1, cnt2[i]);
128 u_uastrcpy(t2, cnt2[j]);
129 doTest(coll, t1, t2, UCOL_LESS);
130
131 /* synwee : added collation element iterator test */
132 iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
133 if (U_FAILURE(status)) {
134 log_err("Creation of iterator failed\n");
135 break;
136 }
137 backAndForth(iter);
138 ucol_closeElements(iter);
139 }
140 }
141 }
142
143 ucol_close(coll);
144
145
146 }
147
148 const static char shifted[][20] = {
149 "black bird",
150 "black-bird",
151 "blackbird",
152 "black Bird",
153 "black-Bird",
154 "blackBird",
155 "black birds",
156 "black-birds",
157 "blackbirds"
158 };
159
160 const static UCollationResult shiftedTert[] = {
161 UCOL_EQUAL,
162 UCOL_EQUAL,
163 UCOL_EQUAL,
164 UCOL_LESS,
165 UCOL_EQUAL,
166 UCOL_EQUAL,
167 UCOL_LESS,
168 UCOL_EQUAL,
169 UCOL_EQUAL
170 };
171
172 const static char nonignorable[][20] = {
173 "black bird",
174 "black Bird",
175 "black birds",
176 "black-bird",
177 "black-Bird",
178 "black-birds",
179 "blackbird",
180 "blackBird",
181 "blackbirds"
182 };
183
184 static void BlackBirdTest(void) {
185 UErrorCode status = U_ZERO_ERROR;
186 UChar t1[90];
187 UChar t2[90];
188
189 uint32_t i = 0, j = 0;
190 uint32_t size = 0;
191 UCollator *coll = ucol_open("en_US", &status);
192
193 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
194 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status);
195
196 if(U_SUCCESS(status)) {
197 size = sizeof(nonignorable)/sizeof(nonignorable[0]);
198 for(i = 0; i < size-1; i++) {
199 for(j = i+1; j < size; j++) {
200 u_uastrcpy(t1, nonignorable[i]);
201 u_uastrcpy(t2, nonignorable[j]);
202 doTest(coll, t1, t2, UCOL_LESS);
203 }
204 }
205 }
206
207 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
208 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
209
210 if(U_SUCCESS(status)) {
211 size = sizeof(shifted)/sizeof(shifted[0]);
212 for(i = 0; i < size-1; i++) {
213 for(j = i+1; j < size; j++) {
214 u_uastrcpy(t1, shifted[i]);
215 u_uastrcpy(t2, shifted[j]);
216 doTest(coll, t1, t2, UCOL_LESS);
217 }
218 }
219 }
220
221 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status);
222 if(U_SUCCESS(status)) {
223 size = sizeof(shifted)/sizeof(shifted[0]);
224 for(i = 1; i < size; i++) {
225 u_uastrcpy(t1, shifted[i-1]);
226 u_uastrcpy(t2, shifted[i]);
227 doTest(coll, t1, t2, shiftedTert[i]);
228 }
229 }
230
231 ucol_close(coll);
232 }
233
234 const static UChar testSourceCases[][MAX_TOKEN_LEN] = {
235 {0x0041/*'A'*/, 0x0300, 0x0301, 0x0000},
236 {0x0041/*'A'*/, 0x0300, 0x0316, 0x0000},
237 {0x0041/*'A'*/, 0x0300, 0x0000},
238 {0x00C0, 0x0301, 0x0000},
239 /* this would work with forced normalization */
240 {0x00C0, 0x0316, 0x0000}
241 };
242
243 const static UChar testTargetCases[][MAX_TOKEN_LEN] = {
244 {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
245 {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000},
246 {0x00C0, 0},
247 {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
248 /* this would work with forced normalization */
249 {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000}
250 };
251
252 const static UCollationResult results[] = {
253 UCOL_GREATER,
254 UCOL_EQUAL,
255 UCOL_EQUAL,
256 UCOL_GREATER,
257 UCOL_EQUAL
258 };
259
260 static void FunkyATest(void)
261 {
262
263 int32_t i;
264 UErrorCode status = U_ZERO_ERROR;
265 UCollator *myCollation;
266 myCollation = ucol_open("en_US", &status);
267 if(U_FAILURE(status)){
268 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
269 return;
270 }
271 log_verbose("Testing some A letters, for some reason\n");
272 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
273 ucol_setStrength(myCollation, UCOL_TERTIARY);
274 for (i = 0; i < 4 ; i++)
275 {
276 doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
277 }
278 ucol_close(myCollation);
279 }
280
281 UColAttributeValue caseFirst[] = {
282 UCOL_OFF,
283 UCOL_LOWER_FIRST,
284 UCOL_UPPER_FIRST
285 };
286
287
288 UColAttributeValue alternateHandling[] = {
289 UCOL_NON_IGNORABLE,
290 UCOL_SHIFTED
291 };
292
293 UColAttributeValue caseLevel[] = {
294 UCOL_OFF,
295 UCOL_ON
296 };
297
298 UColAttributeValue strengths[] = {
299 UCOL_PRIMARY,
300 UCOL_SECONDARY,
301 UCOL_TERTIARY,
302 UCOL_QUATERNARY,
303 UCOL_IDENTICAL
304 };
305
306 #if 0
307 static const char * strengthsC[] = {
308 "UCOL_PRIMARY",
309 "UCOL_SECONDARY",
310 "UCOL_TERTIARY",
311 "UCOL_QUATERNARY",
312 "UCOL_IDENTICAL"
313 };
314
315 static const char * caseFirstC[] = {
316 "UCOL_OFF",
317 "UCOL_LOWER_FIRST",
318 "UCOL_UPPER_FIRST"
319 };
320
321
322 static const char * alternateHandlingC[] = {
323 "UCOL_NON_IGNORABLE",
324 "UCOL_SHIFTED"
325 };
326
327 static const char * caseLevelC[] = {
328 "UCOL_OFF",
329 "UCOL_ON"
330 };
331
332 /* not used currently - does not test only prints */
333 static void PrintMarkDavis(void)
334 {
335 UErrorCode status = U_ZERO_ERROR;
336 UChar m[256];
337 uint8_t sortkey[256];
338 UCollator *coll = ucol_open("en_US", &status);
339 uint32_t h,i,j,k, sortkeysize;
340 uint32_t sizem = 0;
341 char buffer[512];
342 uint32_t len = 512;
343
344 log_verbose("PrintMarkDavis");
345
346 u_uastrcpy(m, "Mark Davis");
347 sizem = u_strlen(m);
348
349
350 m[1] = 0xe4;
351
352 for(i = 0; i<sizem; i++) {
353 fprintf(stderr, "\\u%04X ", m[i]);
354 }
355 fprintf(stderr, "\n");
356
357 for(h = 0; h<sizeof(caseFirst)/sizeof(caseFirst[0]); h++) {
358 ucol_setAttribute(coll, UCOL_CASE_FIRST, caseFirst[i], &status);
359 fprintf(stderr, "caseFirst: %s\n", caseFirstC[h]);
360
361 for(i = 0; i<sizeof(alternateHandling)/sizeof(alternateHandling[0]); i++) {
362 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, alternateHandling[i], &status);
363 fprintf(stderr, " AltHandling: %s\n", alternateHandlingC[i]);
364
365 for(j = 0; j<sizeof(caseLevel)/sizeof(caseLevel[0]); j++) {
366 ucol_setAttribute(coll, UCOL_CASE_LEVEL, caseLevel[j], &status);
367 fprintf(stderr, " caseLevel: %s\n", caseLevelC[j]);
368
369 for(k = 0; k<sizeof(strengths)/sizeof(strengths[0]); k++) {
370 ucol_setAttribute(coll, UCOL_STRENGTH, strengths[k], &status);
371 sortkeysize = ucol_getSortKey(coll, m, sizem, sortkey, 256);
372 fprintf(stderr, " strength: %s\n Sortkey: ", strengthsC[k]);
373 fprintf(stderr, "%s\n", ucol_sortKeyToString(coll, sortkey, buffer, &len));
374 }
375
376 }
377
378 }
379
380 }
381 }
382 #endif
383
384 static void BillFairmanTest(void) {
385 /*
386 ** check for actual locale via ICU resource bundles
387 **
388 ** lp points to the original locale ("fr_FR_....")
389 */
390
391 UResourceBundle *lr,*cr;
392 UErrorCode lec = U_ZERO_ERROR;
393 const char *lp = "fr_FR_you_ll_never_find_this_locale";
394
395 log_verbose("BillFairmanTest\n");
396
397 lr = ures_open(NULL,lp,&lec);
398 if (lr) {
399 cr = ures_getByKey(lr,"collations",0,&lec);
400 if (cr) {
401 lp = ures_getLocaleByType(cr, ULOC_ACTUAL_LOCALE, &lec);
402 if (lp) {
403 if (U_SUCCESS(lec)) {
404 if(strcmp(lp, "fr") != 0) {
405 log_err("Wrong locale for French Collation Data, expected \"fr\" got %s", lp);
406 }
407 }
408 }
409 ures_close(cr);
410 }
411 ures_close(lr);
412 }
413 }
414
415 static void testPrimary(UCollator* col, const UChar* p,const UChar* q){
416 UChar source[256] = { '\0'};
417 UChar target[256] = { '\0'};
418 UChar preP = 0x31a3;
419 UChar preQ = 0x310d;
420 /*
421 UChar preP = (*p>0x0400 && *p<0x0500)?0x00e1:0x491;
422 UChar preQ = (*p>0x0400 && *p<0x0500)?0x0041:0x413;
423 */
424 /*log_verbose("Testing primary\n");*/
425
426 doTest(col, p, q, UCOL_LESS);
427 /*
428 UCollationResult result = ucol_strcoll(col,p,u_strlen(p),q,u_strlen(q));
429
430 if(result!=UCOL_LESS){
431 aescstrdup(p,utfSource,256);
432 aescstrdup(q,utfTarget,256);
433 fprintf(file,"Primary failed source: %s target: %s \n", utfSource,utfTarget);
434 }
435 */
436 source[0] = preP;
437 u_strcpy(source+1,p);
438 target[0] = preQ;
439 u_strcpy(target+1,q);
440 doTest(col, source, target, UCOL_LESS);
441 /*
442 fprintf(file,"Primary swamps 2nd failed source: %s target: %s \n", utfSource,utfTarget);
443 */
444 }
445
446 static void testSecondary(UCollator* col, const UChar* p,const UChar* q){
447 UChar source[256] = { '\0'};
448 UChar target[256] = { '\0'};
449
450 /*log_verbose("Testing secondary\n");*/
451
452 doTest(col, p, q, UCOL_LESS);
453 /*
454 fprintf(file,"secondary failed source: %s target: %s \n", utfSource,utfTarget);
455 */
456 source[0] = 0x0053;
457 u_strcpy(source+1,p);
458 target[0]= 0x0073;
459 u_strcpy(target+1,q);
460
461 doTest(col, source, target, UCOL_LESS);
462 /*
463 fprintf(file,"secondary swamps 3rd failed source: %s target: %s \n",utfSource,utfTarget);
464 */
465
466
467 u_strcpy(source,p);
468 source[u_strlen(p)] = 0x62;
469 source[u_strlen(p)+1] = 0;
470
471
472 u_strcpy(target,q);
473 target[u_strlen(q)] = 0x61;
474 target[u_strlen(q)+1] = 0;
475
476 doTest(col, source, target, UCOL_GREATER);
477
478 /*
479 fprintf(file,"secondary is swamped by 1 failed source: %s target: %s \n",utfSource,utfTarget);
480 */
481 }
482
483 static void testTertiary(UCollator* col, const UChar* p,const UChar* q){
484 UChar source[256] = { '\0'};
485 UChar target[256] = { '\0'};
486
487 /*log_verbose("Testing tertiary\n");*/
488
489 doTest(col, p, q, UCOL_LESS);
490 /*
491 fprintf(file,"Tertiary failed source: %s target: %s \n",utfSource,utfTarget);
492 */
493 source[0] = 0x0020;
494 u_strcpy(source+1,p);
495 target[0]= 0x002D;
496 u_strcpy(target+1,q);
497
498 doTest(col, source, target, UCOL_LESS);
499 /*
500 fprintf(file,"Tertiary swamps 4th failed source: %s target: %s \n", utfSource,utfTarget);
501 */
502
503 u_strcpy(source,p);
504 source[u_strlen(p)] = 0xE0;
505 source[u_strlen(p)+1] = 0;
506
507 u_strcpy(target,q);
508 target[u_strlen(q)] = 0x61;
509 target[u_strlen(q)+1] = 0;
510
511 doTest(col, source, target, UCOL_GREATER);
512
513 /*
514 fprintf(file,"Tertiary is swamped by 3rd failed source: %s target: %s \n",utfSource,utfTarget);
515 */
516 }
517
518 static void testEquality(UCollator* col, const UChar* p,const UChar* q){
519 /*
520 UChar source[256] = { '\0'};
521 UChar target[256] = { '\0'};
522 */
523
524 doTest(col, p, q, UCOL_EQUAL);
525 /*
526 fprintf(file,"Primary failed source: %s target: %s \n", utfSource,utfTarget);
527 */
528 }
529
530 static void testCollator(UCollator *coll, UErrorCode *status) {
531 const UChar *rules = NULL, *current = NULL;
532 int32_t ruleLen = 0;
533 uint32_t strength = 0;
534 uint32_t chOffset = 0; uint32_t chLen = 0;
535 uint32_t exOffset = 0; uint32_t exLen = 0;
536 uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
537 uint32_t firstEx = 0;
538 /* uint32_t rExpsLen = 0; */
539 uint32_t firstLen = 0;
540 UBool varT = FALSE; UBool top_ = TRUE;
541 uint16_t specs = 0;
542 UBool startOfRules = TRUE;
543 UBool lastReset = FALSE;
544 UBool before = FALSE;
545 uint32_t beforeStrength = 0;
546 UColTokenParser src;
547 UColOptionSet opts;
548
549 UChar first[256];
550 UChar second[256];
551 UChar tempB[256];
552 uint32_t tempLen;
553 UChar *rulesCopy = NULL;
554 UParseError parseError;
555
556 uprv_memset(&src, 0, sizeof(UColTokenParser));
557
558 src.opts = &opts;
559
560 rules = ucol_getRules(coll, &ruleLen);
561 if(U_SUCCESS(*status) && ruleLen > 0) {
562 rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
563 uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
564 src.current = src.source = rulesCopy;
565 src.end = rulesCopy+ruleLen;
566 src.extraCurrent = src.end;
567 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
568 *first = *second = 0;
569
570 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
571 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
572 while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, status)) != NULL) {
573 strength = src.parsedToken.strength;
574 chOffset = src.parsedToken.charsOffset;
575 chLen = src.parsedToken.charsLen;
576 exOffset = src.parsedToken.extensionOffset;
577 exLen = src.parsedToken.extensionLen;
578 prefixOffset = src.parsedToken.prefixOffset;
579 prefixLen = src.parsedToken.prefixLen;
580 specs = src.parsedToken.flags;
581
582 startOfRules = FALSE;
583 varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
584 top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
585 if(top_) { /* if reset is on top, the sequence is broken. We should have an empty string */
586 second[0] = 0;
587 } else {
588 u_strncpy(second,src.source+chOffset, chLen);
589 second[chLen] = 0;
590
591 if(exLen > 0 && firstEx == 0) {
592 u_strncat(first, src.source+exOffset, exLen);
593 first[firstLen+exLen] = 0;
594 }
595
596 if(lastReset == TRUE && prefixLen != 0) {
597 u_strncpy(first+prefixLen, first, firstLen);
598 u_strncpy(first, src.source+prefixOffset, prefixLen);
599 first[firstLen+prefixLen] = 0;
600 firstLen = firstLen+prefixLen;
601 }
602
603 if(before == TRUE) { /* swap first and second */
604 u_strcpy(tempB, first);
605 u_strcpy(first, second);
606 u_strcpy(second, tempB);
607
608 tempLen = firstLen;
609 firstLen = chLen;
610 chLen = tempLen;
611
612 tempLen = firstEx;
613 firstEx = exLen;
614 exLen = tempLen;
615 if(beforeStrength < strength) {
616 strength = beforeStrength;
617 }
618 }
619 }
620 lastReset = FALSE;
621
622 switch(strength){
623 case UCOL_IDENTICAL:
624 testEquality(coll,first,second);
625 break;
626 case UCOL_PRIMARY:
627 testPrimary(coll,first,second);
628 break;
629 case UCOL_SECONDARY:
630 testSecondary(coll,first,second);
631 break;
632 case UCOL_TERTIARY:
633 testTertiary(coll,first,second);
634 break;
635 case UCOL_TOK_RESET:
636 lastReset = TRUE;
637 before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);
638 if(before) {
639 beforeStrength = (specs & UCOL_TOK_BEFORE)-1;
640 }
641 break;
642 default:
643 break;
644 }
645
646 if(before == TRUE && strength != UCOL_TOK_RESET) { /* first and second were swapped */
647 before = FALSE;
648 } else {
649 firstLen = chLen;
650 firstEx = exLen;
651 u_strcpy(first, second);
652 }
653 }
654 uprv_free(src.source);
655 }
656 }
657
658 static UCollationResult ucaTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) {
659 UCollator *UCA = (UCollator *)collator;
660 return ucol_strcoll(UCA, source, sLen, target, tLen);
661 }
662
663 /*
664 static UCollationResult winTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) {
665 #ifdef U_WINDOWS
666 LCID lcid = (LCID)collator;
667 return (UCollationResult)CompareString(lcid, 0, source, sLen, target, tLen);
668 #else
669 return 0;
670 #endif
671 }
672 */
673
674 static UCollationResult swampEarlier(tst_strcoll* func, void *collator, int opts,
675 UChar s1, UChar s2,
676 const UChar *s, const uint32_t sLen,
677 const UChar *t, const uint32_t tLen) {
678 UChar source[256] = {0};
679 UChar target[256] = {0};
680
681 source[0] = s1;
682 u_strcpy(source+1, s);
683 target[0] = s2;
684 u_strcpy(target+1, t);
685
686 return func(collator, opts, source, sLen+1, target, tLen+1);
687 }
688
689 static UCollationResult swampLater(tst_strcoll* func, void *collator, int opts,
690 UChar s1, UChar s2,
691 const UChar *s, const uint32_t sLen,
692 const UChar *t, const uint32_t tLen) {
693 UChar source[256] = {0};
694 UChar target[256] = {0};
695
696 u_strcpy(source, s);
697 source[sLen] = s1;
698 u_strcpy(target, t);
699 target[tLen] = s2;
700
701 return func(collator, opts, source, sLen+1, target, tLen+1);
702 }
703
704 static uint32_t probeStrength(tst_strcoll* func, void *collator, int opts,
705 const UChar *s, const uint32_t sLen,
706 const UChar *t, const uint32_t tLen,
707 UCollationResult result) {
708 /*UChar fPrimary = 0x6d;*/
709 /*UChar sPrimary = 0x6e;*/
710 UChar fSecondary = 0x310d;
711 UChar sSecondary = 0x31a3;
712 UChar fTertiary = 0x310f;
713 UChar sTertiary = 0x31b7;
714
715 UCollationResult oposite;
716 if(result == UCOL_EQUAL) {
717 return UCOL_IDENTICAL;
718 } else if(result == UCOL_GREATER) {
719 oposite = UCOL_LESS;
720 } else {
721 oposite = UCOL_GREATER;
722 }
723
724 if(swampEarlier(func, collator, opts, sSecondary, fSecondary, s, sLen, t, tLen) == result) {
725 return UCOL_PRIMARY;
726 } else if((swampEarlier(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == result) &&
727 (swampEarlier(func, collator, opts, 0x310f, sTertiary, s, sLen, t, tLen) == result)) {
728 return UCOL_SECONDARY;
729 } else if((swampLater(func, collator, opts, sTertiary, fTertiary, s, sLen, t, tLen) == result) &&
730 (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == result)) {
731 return UCOL_TERTIARY;
732 } else if((swampLater(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == oposite) &&
733 (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == oposite)) {
734 return UCOL_QUATERNARY;
735 } else {
736 return UCOL_IDENTICAL;
737 }
738 }
739
740 static char *getRelationSymbol(UCollationResult res, uint32_t strength, char *buffer) {
741 uint32_t i = 0;
742
743 if(res == UCOL_EQUAL || strength == 0xdeadbeef) {
744 buffer[0] = '=';
745 buffer[1] = '=';
746 buffer[2] = '\0';
747 } else if(res == UCOL_GREATER) {
748 for(i = 0; i<strength+1; i++) {
749 buffer[i] = '>';
750 }
751 buffer[strength+1] = '\0';
752 } else {
753 for(i = 0; i<strength+1; i++) {
754 buffer[i] = '<';
755 }
756 buffer[strength+1] = '\0';
757 }
758
759 return buffer;
760 }
761
762
763
764 static void logFailure (const char *platform, const char *test,
765 const UChar *source, const uint32_t sLen,
766 const UChar *target, const uint32_t tLen,
767 UCollationResult realRes, uint32_t realStrength,
768 UCollationResult expRes, uint32_t expStrength, UBool error) {
769
770 uint32_t i = 0;
771
772 char sEsc[256], s[256], tEsc[256], t[256], b[256], output[512], relation[256];
773 static int32_t maxOutputLength = 0;
774 int32_t outputLength;
775
776 *sEsc = *tEsc = *s = *t = 0;
777 if(error == TRUE) {
778 log_err("Difference between expected and generated order. Run test with -v for more info\n");
779 } else if(getTestOption(VERBOSITY_OPTION) == 0) {
780 return;
781 }
782 for(i = 0; i<sLen; i++) {
783 sprintf(b, "%04X", source[i]);
784 strcat(sEsc, "\\u");
785 strcat(sEsc, b);
786 strcat(s, b);
787 strcat(s, " ");
788 if(source[i] < 0x80) {
789 sprintf(b, "(%c)", source[i]);
790 strcat(sEsc, b);
791 }
792 }
793 for(i = 0; i<tLen; i++) {
794 sprintf(b, "%04X", target[i]);
795 strcat(tEsc, "\\u");
796 strcat(tEsc, b);
797 strcat(t, b);
798 strcat(t, " ");
799 if(target[i] < 0x80) {
800 sprintf(b, "(%c)", target[i]);
801 strcat(tEsc, b);
802 }
803 }
804 /*
805 strcpy(output, "[[ ");
806 strcat(output, sEsc);
807 strcat(output, getRelationSymbol(expRes, expStrength, relation));
808 strcat(output, tEsc);
809
810 strcat(output, " : ");
811
812 strcat(output, sEsc);
813 strcat(output, getRelationSymbol(realRes, realStrength, relation));
814 strcat(output, tEsc);
815 strcat(output, " ]] ");
816
817 log_verbose("%s", output);
818 */
819
820
821 strcpy(output, "DIFF: ");
822
823 strcat(output, s);
824 strcat(output, " : ");
825 strcat(output, t);
826
827 strcat(output, test);
828 strcat(output, ": ");
829
830 strcat(output, sEsc);
831 strcat(output, getRelationSymbol(expRes, expStrength, relation));
832 strcat(output, tEsc);
833
834 strcat(output, " ");
835
836 strcat(output, platform);
837 strcat(output, ": ");
838
839 strcat(output, sEsc);
840 strcat(output, getRelationSymbol(realRes, realStrength, relation));
841 strcat(output, tEsc);
842
843 outputLength = (int32_t)strlen(output);
844 if(outputLength > maxOutputLength) {
845 maxOutputLength = outputLength;
846 U_ASSERT(outputLength < sizeof(output));
847 }
848
849 log_verbose("%s\n", output);
850
851 }
852
853 /*
854 static void printOutRules(const UChar *rules) {
855 uint32_t len = u_strlen(rules);
856 uint32_t i = 0;
857 char toPrint;
858 uint32_t line = 0;
859
860 fprintf(stdout, "Rules:");
861
862 for(i = 0; i<len; i++) {
863 if(rules[i]<0x7f && rules[i]>=0x20) {
864 toPrint = (char)rules[i];
865 if(toPrint == '&') {
866 line = 1;
867 fprintf(stdout, "\n&");
868 } else if(toPrint == ';') {
869 fprintf(stdout, "<<");
870 line+=2;
871 } else if(toPrint == ',') {
872 fprintf(stdout, "<<<");
873 line+=3;
874 } else {
875 fprintf(stdout, "%c", toPrint);
876 line++;
877 }
878 } else if(rules[i]<0x3400 || rules[i]>=0xa000) {
879 fprintf(stdout, "\\u%04X", rules[i]);
880 line+=6;
881 }
882 if(line>72) {
883 fprintf(stdout, "\n");
884 line = 0;
885 }
886 }
887
888 log_verbose("\n");
889
890 }
891 */
892
893 static uint32_t testSwitch(tst_strcoll* func, void *collator, int opts, uint32_t strength, const UChar *first, const UChar *second, const char* msg, UBool error) {
894 uint32_t diffs = 0;
895 UCollationResult realResult;
896 uint32_t realStrength;
897
898 uint32_t sLen = u_strlen(first);
899 uint32_t tLen = u_strlen(second);
900
901 realResult = func(collator, opts, first, sLen, second, tLen);
902 realStrength = probeStrength(func, collator, opts, first, sLen, second, tLen, realResult);
903
904 if(strength == UCOL_IDENTICAL && realResult != UCOL_IDENTICAL) {
905 logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_EQUAL, strength, error);
906 diffs++;
907 } else if(realResult != UCOL_LESS || realStrength != strength) {
908 logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_LESS, strength, error);
909 diffs++;
910 }
911 return diffs;
912 }
913
914
915 static void testAgainstUCA(UCollator *coll, UCollator *UCA, const char *refName, UBool error, UErrorCode *status) {
916 const UChar *rules = NULL, *current = NULL;
917 int32_t ruleLen = 0;
918 uint32_t strength = 0;
919 uint32_t chOffset = 0; uint32_t chLen = 0;
920 uint32_t exOffset = 0; uint32_t exLen = 0;
921 uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
922 /* uint32_t rExpsLen = 0; */
923 uint32_t firstLen = 0, secondLen = 0;
924 UBool varT = FALSE; UBool top_ = TRUE;
925 uint16_t specs = 0;
926 UBool startOfRules = TRUE;
927 UColTokenParser src;
928 UColOptionSet opts;
929
930 UChar first[256];
931 UChar second[256];
932 UChar *rulesCopy = NULL;
933
934 uint32_t UCAdiff = 0;
935 uint32_t Windiff = 1;
936 UParseError parseError;
937
938 uprv_memset(&src, 0, sizeof(UColTokenParser));
939 src.opts = &opts;
940
941 rules = ucol_getRules(coll, &ruleLen);
942
943 /*printOutRules(rules);*/
944
945 if(U_SUCCESS(*status) && ruleLen > 0) {
946 rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
947 uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
948 src.current = src.source = rulesCopy;
949 src.end = rulesCopy+ruleLen;
950 src.extraCurrent = src.end;
951 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
952 *first = *second = 0;
953
954 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
955 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
956 while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) {
957 strength = src.parsedToken.strength;
958 chOffset = src.parsedToken.charsOffset;
959 chLen = src.parsedToken.charsLen;
960 exOffset = src.parsedToken.extensionOffset;
961 exLen = src.parsedToken.extensionLen;
962 prefixOffset = src.parsedToken.prefixOffset;
963 prefixLen = src.parsedToken.prefixLen;
964 specs = src.parsedToken.flags;
965
966 startOfRules = FALSE;
967 varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
968 top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
969
970 u_strncpy(second,src.source+chOffset, chLen);
971 second[chLen] = 0;
972 secondLen = chLen;
973
974 if(exLen > 0) {
975 u_strncat(first, src.source+exOffset, exLen);
976 first[firstLen+exLen] = 0;
977 firstLen += exLen;
978 }
979
980 if(strength != UCOL_TOK_RESET) {
981 if((*first<0x3400 || *first>=0xa000) && (*second<0x3400 || *second>=0xa000)) {
982 UCAdiff += testSwitch(&ucaTest, (void *)UCA, 0, strength, first, second, refName, error);
983 /*Windiff += testSwitch(&winTest, (void *)lcid, 0, strength, first, second, "Win32");*/
984 }
985 }
986
987
988 firstLen = chLen;
989 u_strcpy(first, second);
990
991 }
992 if(UCAdiff != 0 && Windiff != 0) {
993 log_verbose("\n");
994 }
995 if(UCAdiff == 0) {
996 log_verbose("No immediate difference with %s!\n", refName);
997 }
998 if(Windiff == 0) {
999 log_verbose("No immediate difference with Win32!\n");
1000 }
1001 uprv_free(src.source);
1002 }
1003 }
1004
1005 /*
1006 * Takes two CEs (lead and continuation) and
1007 * compares them as CEs should be compared:
1008 * primary vs. primary, secondary vs. secondary
1009 * tertiary vs. tertiary
1010 */
1011 static int32_t compareCEs(uint32_t s1, uint32_t s2,
1012 uint32_t t1, uint32_t t2) {
1013 uint32_t s = 0, t = 0;
1014 if(s1 == t1 && s2 == t2) {
1015 return 0;
1016 }
1017 s = (s1 & 0xFFFF0000)|((s2 & 0xFFFF0000)>>16);
1018 t = (t1 & 0xFFFF0000)|((t2 & 0xFFFF0000)>>16);
1019 if(s < t) {
1020 return -1;
1021 } else if(s > t) {
1022 return 1;
1023 } else {
1024 s = (s1 & 0x0000FF00) | (s2 & 0x0000FF00)>>8;
1025 t = (t1 & 0x0000FF00) | (t2 & 0x0000FF00)>>8;
1026 if(s < t) {
1027 return -1;
1028 } else if(s > t) {
1029 return 1;
1030 } else {
1031 s = (s1 & 0x000000FF)<<8 | (s2 & 0x000000FF);
1032 t = (t1 & 0x000000FF)<<8 | (t2 & 0x000000FF);
1033 if(s < t) {
1034 return -1;
1035 } else {
1036 return 1;
1037 }
1038 }
1039 }
1040 }
1041
1042 typedef struct {
1043 uint32_t startCE;
1044 uint32_t startContCE;
1045 uint32_t limitCE;
1046 uint32_t limitContCE;
1047 } indirectBoundaries;
1048
1049 /* these values are used for finding CE values for indirect positioning. */
1050 /* Indirect positioning is a mechanism for allowing resets on symbolic */
1051 /* values. It only works for resets and you cannot tailor indirect names */
1052 /* An indirect name can define either an anchor point or a range. An */
1053 /* anchor point behaves in exactly the same way as a code point in reset */
1054 /* would, except that it cannot be tailored. A range (we currently only */
1055 /* know for the [top] range will explicitly set the upper bound for */
1056 /* generated CEs, thus allowing for better control over how many CEs can */
1057 /* be squeezed between in the range without performance penalty. */
1058 /* In that respect, we use [top] for tailoring of locales that use CJK */
1059 /* characters. Other indirect values are currently a pure convenience, */
1060 /* they can be used to assure that the CEs will be always positioned in */
1061 /* the same place relative to a point with known properties (e.g. first */
1062 /* primary ignorable). */
1063 static indirectBoundaries ucolIndirectBoundaries[15];
1064 static UBool indirectBoundariesSet = FALSE;
1065 static void setIndirectBoundaries(uint32_t indexR, uint32_t *start, uint32_t *end) {
1066 /* Set values for the top - TODO: once we have values for all the indirects, we are going */
1067 /* to initalize here. */
1068 ucolIndirectBoundaries[indexR].startCE = start[0];
1069 ucolIndirectBoundaries[indexR].startContCE = start[1];
1070 if(end) {
1071 ucolIndirectBoundaries[indexR].limitCE = end[0];
1072 ucolIndirectBoundaries[indexR].limitContCE = end[1];
1073 } else {
1074 ucolIndirectBoundaries[indexR].limitCE = 0;
1075 ucolIndirectBoundaries[indexR].limitContCE = 0;
1076 }
1077 }
1078
1079 static void testCEs(UCollator *coll, UErrorCode *status) {
1080 const UChar *rules = NULL, *current = NULL;
1081 int32_t ruleLen = 0;
1082
1083 uint32_t strength = 0;
1084 uint32_t maxStrength = UCOL_IDENTICAL;
1085 uint32_t baseCE, baseContCE, nextCE, nextContCE, currCE, currContCE;
1086 uint32_t lastCE;
1087 uint32_t lastContCE;
1088
1089 int32_t result = 0;
1090 uint32_t chOffset = 0; uint32_t chLen = 0;
1091 uint32_t exOffset = 0; uint32_t exLen = 0;
1092 uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
1093 uint32_t oldOffset = 0;
1094
1095 /* uint32_t rExpsLen = 0; */
1096 /* uint32_t firstLen = 0; */
1097 uint16_t specs = 0;
1098 UBool varT = FALSE; UBool top_ = TRUE;
1099 UBool startOfRules = TRUE;
1100 UBool before = FALSE;
1101 UColTokenParser src;
1102 UColOptionSet opts;
1103 UParseError parseError;
1104 UChar *rulesCopy = NULL;
1105 collIterate *c = uprv_new_collIterate(status);
1106 UCAConstants *consts = NULL;
1107 uint32_t UCOL_RESET_TOP_VALUE, /*UCOL_RESET_TOP_CONT, */
1108 UCOL_NEXT_TOP_VALUE, UCOL_NEXT_TOP_CONT;
1109 const char *colLoc;
1110 UCollator *UCA = ucol_open("root", status);
1111
1112 if (U_FAILURE(*status)) {
1113 log_err("Could not open root collator %s\n", u_errorName(*status));
1114 uprv_delete_collIterate(c);
1115 return;
1116 }
1117
1118 colLoc = ucol_getLocaleByType(coll, ULOC_ACTUAL_LOCALE, status);
1119 if (U_FAILURE(*status)) {
1120 log_err("Could not get collator name: %s\n", u_errorName(*status));
1121 ucol_close(UCA);
1122 uprv_delete_collIterate(c);
1123 return;
1124 }
1125
1126 uprv_memset(&src, 0, sizeof(UColTokenParser));
1127
1128 consts = (UCAConstants *)((uint8_t *)UCA->image + UCA->image->UCAConsts);
1129 UCOL_RESET_TOP_VALUE = consts->UCA_LAST_NON_VARIABLE[0];
1130 /*UCOL_RESET_TOP_CONT = consts->UCA_LAST_NON_VARIABLE[1]; */
1131 UCOL_NEXT_TOP_VALUE = consts->UCA_FIRST_IMPLICIT[0];
1132 UCOL_NEXT_TOP_CONT = consts->UCA_FIRST_IMPLICIT[1];
1133
1134 baseCE=baseContCE=nextCE=nextContCE=currCE=currContCE=lastCE=lastContCE = UCOL_NOT_FOUND;
1135
1136 src.opts = &opts;
1137
1138 rules = ucol_getRules(coll, &ruleLen);
1139
1140 src.invUCA = ucol_initInverseUCA(status);
1141
1142 if(indirectBoundariesSet == FALSE) {
1143 /* UCOL_RESET_TOP_VALUE */
1144 setIndirectBoundaries(0, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
1145 /* UCOL_FIRST_PRIMARY_IGNORABLE */
1146 setIndirectBoundaries(1, consts->UCA_FIRST_PRIMARY_IGNORABLE, 0);
1147 /* UCOL_LAST_PRIMARY_IGNORABLE */
1148 setIndirectBoundaries(2, consts->UCA_LAST_PRIMARY_IGNORABLE, 0);
1149 /* UCOL_FIRST_SECONDARY_IGNORABLE */
1150 setIndirectBoundaries(3, consts->UCA_FIRST_SECONDARY_IGNORABLE, 0);
1151 /* UCOL_LAST_SECONDARY_IGNORABLE */
1152 setIndirectBoundaries(4, consts->UCA_LAST_SECONDARY_IGNORABLE, 0);
1153 /* UCOL_FIRST_TERTIARY_IGNORABLE */
1154 setIndirectBoundaries(5, consts->UCA_FIRST_TERTIARY_IGNORABLE, 0);
1155 /* UCOL_LAST_TERTIARY_IGNORABLE */
1156 setIndirectBoundaries(6, consts->UCA_LAST_TERTIARY_IGNORABLE, 0);
1157 /* UCOL_FIRST_VARIABLE */
1158 setIndirectBoundaries(7, consts->UCA_FIRST_VARIABLE, 0);
1159 /* UCOL_LAST_VARIABLE */
1160 setIndirectBoundaries(8, consts->UCA_LAST_VARIABLE, 0);
1161 /* UCOL_FIRST_NON_VARIABLE */
1162 setIndirectBoundaries(9, consts->UCA_FIRST_NON_VARIABLE, 0);
1163 /* UCOL_LAST_NON_VARIABLE */
1164 setIndirectBoundaries(10, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
1165 /* UCOL_FIRST_IMPLICIT */
1166 setIndirectBoundaries(11, consts->UCA_FIRST_IMPLICIT, 0);
1167 /* UCOL_LAST_IMPLICIT */
1168 setIndirectBoundaries(12, consts->UCA_LAST_IMPLICIT, consts->UCA_FIRST_TRAILING);
1169 /* UCOL_FIRST_TRAILING */
1170 setIndirectBoundaries(13, consts->UCA_FIRST_TRAILING, 0);
1171 /* UCOL_LAST_TRAILING */
1172 setIndirectBoundaries(14, consts->UCA_LAST_TRAILING, 0);
1173 ucolIndirectBoundaries[14].limitCE = (consts->UCA_PRIMARY_SPECIAL_MIN<<24);
1174 indirectBoundariesSet = TRUE;
1175 }
1176
1177
1178 if(U_SUCCESS(*status) && ruleLen > 0) {
1179 rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
1180 uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
1181 src.current = src.source = rulesCopy;
1182 src.end = rulesCopy+ruleLen;
1183 src.extraCurrent = src.end;
1184 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
1185
1186 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
1187 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
1188 while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) {
1189 strength = src.parsedToken.strength;
1190 chOffset = src.parsedToken.charsOffset;
1191 chLen = src.parsedToken.charsLen;
1192 exOffset = src.parsedToken.extensionOffset;
1193 exLen = src.parsedToken.extensionLen;
1194 prefixOffset = src.parsedToken.prefixOffset;
1195 prefixLen = src.parsedToken.prefixLen;
1196 specs = src.parsedToken.flags;
1197
1198 startOfRules = FALSE;
1199 varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
1200 top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
1201
1202 uprv_init_collIterate(coll, src.source+chOffset, chLen, c, status);
1203
1204 currCE = ucol_getNextCE(coll, c, status);
1205 if(currCE == 0 && UCOL_ISTHAIPREVOWEL(*(src.source+chOffset))) {
1206 log_verbose("Thai prevowel detected. Will pick next CE\n");
1207 currCE = ucol_getNextCE(coll, c, status);
1208 }
1209
1210 currContCE = ucol_getNextCE(coll, c, status);
1211 if(!isContinuation(currContCE)) {
1212 currContCE = 0;
1213 }
1214
1215 /* we need to repack CEs here */
1216
1217 if(strength == UCOL_TOK_RESET) {
1218 before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);
1219 if(top_ == TRUE) {
1220 int32_t tokenIndex = src.parsedToken.indirectIndex;
1221
1222 nextCE = baseCE = currCE = ucolIndirectBoundaries[tokenIndex].startCE;
1223 nextContCE = baseContCE = currContCE = ucolIndirectBoundaries[tokenIndex].startContCE;
1224 } else {
1225 nextCE = baseCE = currCE;
1226 nextContCE = baseContCE = currContCE;
1227 }
1228 maxStrength = UCOL_IDENTICAL;
1229 } else {
1230 if(strength < maxStrength) {
1231 maxStrength = strength;
1232 if(baseCE == UCOL_RESET_TOP_VALUE) {
1233 log_verbose("Resetting to [top]\n");
1234 nextCE = UCOL_NEXT_TOP_VALUE;
1235 nextContCE = UCOL_NEXT_TOP_CONT;
1236 } else {
1237 result = ucol_inv_getNextCE(&src, baseCE & 0xFFFFFF3F, baseContCE, &nextCE, &nextContCE, maxStrength);
1238 }
1239 if(result < 0) {
1240 if(ucol_isTailored(coll, *(src.source+oldOffset), status)) {
1241 log_verbose("Reset is tailored codepoint %04X, don't know how to continue, taking next test\n", *(src.source+oldOffset));
1242 return;
1243 } else {
1244 log_err("%s: couldn't find the CE\n", colLoc);
1245 return;
1246 }
1247 }
1248 }
1249
1250 currCE &= 0xFFFFFF3F;
1251 currContCE &= 0xFFFFFFBF;
1252
1253 if(maxStrength == UCOL_IDENTICAL) {
1254 if(baseCE != currCE || baseContCE != currContCE) {
1255 log_err("%s: current CE (initial strength UCOL_EQUAL)\n", colLoc);
1256 }
1257 } else {
1258 if(strength == UCOL_IDENTICAL) {
1259 if(lastCE != currCE || lastContCE != currContCE) {
1260 log_err("%s: current CE (initial strength UCOL_EQUAL)\n", colLoc);
1261 }
1262 } else {
1263 if(compareCEs(currCE, currContCE, nextCE, nextContCE) > 0) {
1264 /*if(currCE > nextCE || (currCE == nextCE && currContCE >= nextContCE)) {*/
1265 log_err("%s: current CE is not less than base CE\n", colLoc);
1266 }
1267 if(!before) {
1268 if(compareCEs(currCE, currContCE, lastCE, lastContCE) < 0) {
1269 /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
1270 log_err("%s: sequence of generated CEs is broken\n", colLoc);
1271 }
1272 } else {
1273 before = FALSE;
1274 if(compareCEs(currCE, currContCE, lastCE, lastContCE) > 0) {
1275 /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
1276 log_err("%s: sequence of generated CEs is broken\n", colLoc);
1277 }
1278 }
1279 }
1280 }
1281
1282 }
1283
1284 oldOffset = chOffset;
1285 lastCE = currCE & 0xFFFFFF3F;
1286 lastContCE = currContCE & 0xFFFFFFBF;
1287 }
1288 uprv_free(src.source);
1289 }
1290 ucol_close(UCA);
1291 uprv_delete_collIterate(c);
1292 }
1293
1294 #if 0
1295 /* these locales are now picked from index RB */
1296 static const char* localesToTest[] = {
1297 "ar", "bg", "ca", "cs", "da",
1298 "el", "en_BE", "en_US_POSIX",
1299 "es", "et", "fi", "fr", "hi",
1300 "hr", "hu", "is", "iw", "ja",
1301 "ko", "lt", "lv", "mk", "mt",
1302 "nb", "nn", "nn_NO", "pl", "ro",
1303 "ru", "sh", "sk", "sl", "sq",
1304 "sr", "sv", "th", "tr", "uk",
1305 "vi", "zh", "zh_TW"
1306 };
1307 #endif
1308
1309 static const char* rulesToTest[] = {
1310 /* Funky fa rule */
1311 "&\\u0622 < \\u0627 << \\u0671 < \\u0621",
1312 /*"& Z < p, P",*/
1313 /* Cui Mins rules */
1314 "&[top]<o,O<p,P<q,Q<'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu<'?'",*/
1315 "&[top]<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
1316 "&[top]<o,O<p,P<q,Q,'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U&'Qu','?'",*/
1317 "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
1318 "&[top]<'?';Qu<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qu",*/
1319 "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/um<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qum;'?'",*/
1320 "&[top]<'?';Qum<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U" /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qum"*/
1321 };
1322
1323
1324 static void TestCollations(void) {
1325 int32_t noOfLoc = uloc_countAvailable();
1326 int32_t i = 0, j = 0;
1327
1328 UErrorCode status = U_ZERO_ERROR;
1329 char cName[256];
1330 UChar name[256];
1331 int32_t nameSize;
1332
1333
1334 const char *locName = NULL;
1335 UCollator *coll = NULL;
1336 UCollator *UCA = ucol_open("", &status);
1337 UColAttributeValue oldStrength = ucol_getAttribute(UCA, UCOL_STRENGTH, &status);
1338 if (U_FAILURE(status)) {
1339 log_err_status(status, "Could not open UCA collator %s\n", u_errorName(status));
1340 return;
1341 }
1342 ucol_setAttribute(UCA, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
1343
1344 for(i = 0; i<noOfLoc; i++) {
1345 status = U_ZERO_ERROR;
1346 locName = uloc_getAvailable(i);
1347 if(uprv_strcmp("ja", locName) == 0) {
1348 log_verbose("Don't know how to test prefixes\n");
1349 continue;
1350 }
1351 if(hasCollationElements(locName)) {
1352 nameSize = uloc_getDisplayName(locName, NULL, name, 256, &status);
1353 for(j = 0; j<nameSize; j++) {
1354 cName[j] = (char)name[j];
1355 }
1356 cName[nameSize] = 0;
1357 log_verbose("\nTesting locale %s (%s)\n", locName, cName);
1358 coll = ucol_open(locName, &status);
1359 if(U_SUCCESS(status)) {
1360 testAgainstUCA(coll, UCA, "UCA", FALSE, &status);
1361 ucol_close(coll);
1362 } else {
1363 log_err("Couldn't instantiate collator for locale %s, error: %s\n", locName, u_errorName(status));
1364 status = U_ZERO_ERROR;
1365 }
1366 }
1367 }
1368 ucol_setAttribute(UCA, UCOL_STRENGTH, oldStrength, &status);
1369 ucol_close(UCA);
1370 }
1371
1372 static void RamsRulesTest(void) {
1373 UErrorCode status = U_ZERO_ERROR;
1374 int32_t i = 0;
1375 UCollator *coll = NULL;
1376 UChar rule[2048];
1377 uint32_t ruleLen;
1378 int32_t noOfLoc = uloc_countAvailable();
1379 const char *locName = NULL;
1380
1381 log_verbose("RamsRulesTest\n");
1382
1383 if (uprv_strcmp("km", uloc_getDefault())==0 || uprv_strcmp("km_KH", uloc_getDefault())==0) {
1384 /* This test will fail if the default locale is "km" or "km_KH". Enable after trac#6040. */
1385 return;
1386 }
1387
1388 for(i = 0; i<noOfLoc; i++) {
1389 locName = uloc_getAvailable(i);
1390 if(hasCollationElements(locName)) {
1391 if (uprv_strcmp("ja", locName)==0) {
1392 log_verbose("Don't know how to test Japanese because of prefixes\n");
1393 continue;
1394 }
1395 if (uprv_strcmp("de__PHONEBOOK", locName)==0) {
1396 log_verbose("Don't know how to test Phonebook because the reset is on an expanding character\n");
1397 continue;
1398 }
1399 if (uprv_strcmp("bn", locName)==0 ||
1400 uprv_strcmp("en_US_POSIX", locName)==0 ||
1401 uprv_strcmp("km", locName)==0 ||
1402 uprv_strcmp("km_KH", locName)==0 ||
1403 uprv_strcmp("my", locName)==0 ||
1404 uprv_strcmp("si", locName)==0 ||
1405 uprv_strcmp("si_LK", locName)==0 ||
1406 uprv_strcmp("zh", locName)==0 ||
1407 uprv_strcmp("zh_Hant", locName)==0
1408 ) {
1409 log_verbose("Don't know how to test %s. "
1410 "TODO: Fix ticket #6040 and reenable RamsRulesTest for this locale.\n", locName);
1411 continue;
1412 }
1413 log_verbose("Testing locale %s\n", locName);
1414 status = U_ZERO_ERROR;
1415 coll = ucol_open(locName, &status);
1416 if(U_SUCCESS(status)) {
1417 if((status != U_USING_DEFAULT_WARNING) && (status != U_USING_FALLBACK_WARNING)) {
1418 if(coll->image->jamoSpecial == TRUE) {
1419 log_err("%s has special JAMOs\n", locName);
1420 }
1421 ucol_setAttribute(coll, UCOL_CASE_FIRST, UCOL_OFF, &status);
1422 testCollator(coll, &status);
1423 testCEs(coll, &status);
1424 } else {
1425 log_verbose("Skipping %s: %s\n", locName, u_errorName(status));
1426 }
1427 ucol_close(coll);
1428 } else {
1429 log_err("Could not open %s: %s\n", locName, u_errorName(status));
1430 }
1431 }
1432 }
1433
1434 for(i = 0; i<sizeof(rulesToTest)/sizeof(rulesToTest[0]); i++) {
1435 log_verbose("Testing rule: %s\n", rulesToTest[i]);
1436 ruleLen = u_unescape(rulesToTest[i], rule, 2048);
1437 status = U_ZERO_ERROR;
1438 coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
1439 if(U_SUCCESS(status)) {
1440 testCollator(coll, &status);
1441 testCEs(coll, &status);
1442 ucol_close(coll);
1443 } else {
1444 log_err_status(status, "Could not test rule: %s: '%s'\n", u_errorName(status), rulesToTest[i]);
1445 }
1446 }
1447
1448 }
1449
1450 static void IsTailoredTest(void) {
1451 UErrorCode status = U_ZERO_ERROR;
1452 uint32_t i = 0;
1453 UCollator *coll = NULL;
1454 UChar rule[2048];
1455 UChar tailored[2048];
1456 UChar notTailored[2048];
1457 uint32_t ruleLen, tailoredLen, notTailoredLen;
1458
1459 log_verbose("IsTailoredTest\n");
1460
1461 u_uastrcpy(rule, "&Z < A, B, C;c < d");
1462 ruleLen = u_strlen(rule);
1463
1464 u_uastrcpy(tailored, "ABCcd");
1465 tailoredLen = u_strlen(tailored);
1466
1467 u_uastrcpy(notTailored, "ZabD");
1468 notTailoredLen = u_strlen(notTailored);
1469
1470 coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
1471 if(U_SUCCESS(status)) {
1472 for(i = 0; i<tailoredLen; i++) {
1473 if(!ucol_isTailored(coll, tailored[i], &status)) {
1474 log_err("%i: %04X should be tailored - it is reported as not\n", i, tailored[i]);
1475 }
1476 }
1477 for(i = 0; i<notTailoredLen; i++) {
1478 if(ucol_isTailored(coll, notTailored[i], &status)) {
1479 log_err("%i: %04X should not be tailored - it is reported as it is\n", i, notTailored[i]);
1480 }
1481 }
1482 ucol_close(coll);
1483 }
1484 else {
1485 log_err_status(status, "Can't tailor rules\n");
1486 }
1487 /* Code coverage */
1488 status = U_ZERO_ERROR;
1489 coll = ucol_open("ja", &status);
1490 if(!ucol_isTailored(coll, 0x4E9C, &status)) {
1491 log_err_status(status, "0x4E9C should be tailored - it is reported as not\n");
1492 }
1493 ucol_close(coll);
1494 }
1495
1496
1497 const static char chTest[][20] = {
1498 "c",
1499 "C",
1500 "ca", "cb", "cx", "cy", "CZ",
1501 "c\\u030C", "C\\u030C",
1502 "h",
1503 "H",
1504 "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY",
1505 "ch", "cH", "Ch", "CH",
1506 "cha", "charly", "che", "chh", "chch", "chr",
1507 "i", "I", "iarly",
1508 "r", "R",
1509 "r\\u030C", "R\\u030C",
1510 "s",
1511 "S",
1512 "s\\u030C", "S\\u030C",
1513 "z", "Z",
1514 "z\\u030C", "Z\\u030C"
1515 };
1516
1517 static void TestChMove(void) {
1518 UChar t1[256] = {0};
1519 UChar t2[256] = {0};
1520
1521 uint32_t i = 0, j = 0;
1522 uint32_t size = 0;
1523 UErrorCode status = U_ZERO_ERROR;
1524
1525 UCollator *coll = ucol_open("cs", &status);
1526
1527 if(U_SUCCESS(status)) {
1528 size = sizeof(chTest)/sizeof(chTest[0]);
1529 for(i = 0; i < size-1; i++) {
1530 for(j = i+1; j < size; j++) {
1531 u_unescape(chTest[i], t1, 256);
1532 u_unescape(chTest[j], t2, 256);
1533 doTest(coll, t1, t2, UCOL_LESS);
1534 }
1535 }
1536 }
1537 else {
1538 log_data_err("Can't open collator");
1539 }
1540 ucol_close(coll);
1541 }
1542
1543
1544
1545
1546 const static char impTest[][20] = {
1547 "\\u4e00",
1548 "a",
1549 "A",
1550 "b",
1551 "B",
1552 "\\u4e01"
1553 };
1554
1555
1556 static void TestImplicitTailoring(void) {
1557 static const struct {
1558 const char *rules;
1559 const char *data[10];
1560 const uint32_t len;
1561 } tests[] = {
1562 { "&[before 1]\\u4e00 < b < c &[before 1]\\u4e00 < d < e", { "d", "e", "b", "c", "\\u4e00"}, 5 },
1563 { "&\\u4e00 < a <<< A < b <<< B", { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 },
1564 { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3},
1565 { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3}
1566 };
1567
1568 int32_t i = 0;
1569
1570 for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
1571 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
1572 }
1573
1574 /*
1575 UChar t1[256] = {0};
1576 UChar t2[256] = {0};
1577
1578 const char *rule = "&\\u4e00 < a <<< A < b <<< B";
1579
1580 uint32_t i = 0, j = 0;
1581 uint32_t size = 0;
1582 uint32_t ruleLen = 0;
1583 UErrorCode status = U_ZERO_ERROR;
1584 UCollator *coll = NULL;
1585 ruleLen = u_unescape(rule, t1, 256);
1586
1587 coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
1588
1589 if(U_SUCCESS(status)) {
1590 size = sizeof(impTest)/sizeof(impTest[0]);
1591 for(i = 0; i < size-1; i++) {
1592 for(j = i+1; j < size; j++) {
1593 u_unescape(impTest[i], t1, 256);
1594 u_unescape(impTest[j], t2, 256);
1595 doTest(coll, t1, t2, UCOL_LESS);
1596 }
1597 }
1598 }
1599 else {
1600 log_err("Can't open collator");
1601 }
1602 ucol_close(coll);
1603 */
1604 }
1605
1606 static void TestFCDProblem(void) {
1607 UChar t1[256] = {0};
1608 UChar t2[256] = {0};
1609
1610 const char *s1 = "\\u0430\\u0306\\u0325";
1611 const char *s2 = "\\u04D1\\u0325";
1612
1613 UErrorCode status = U_ZERO_ERROR;
1614 UCollator *coll = ucol_open("", &status);
1615 u_unescape(s1, t1, 256);
1616 u_unescape(s2, t2, 256);
1617
1618 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
1619 doTest(coll, t1, t2, UCOL_EQUAL);
1620
1621 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
1622 doTest(coll, t1, t2, UCOL_EQUAL);
1623
1624 ucol_close(coll);
1625 }
1626
1627 /*
1628 The largest normalization form is 18 for NFKC/NFKD, 4 for NFD and 3 for NFC
1629 We're only using NFC/NFD in this test.
1630 */
1631 #define NORM_BUFFER_TEST_LEN 18
1632 typedef struct {
1633 UChar32 u;
1634 UChar NFC[NORM_BUFFER_TEST_LEN];
1635 UChar NFD[NORM_BUFFER_TEST_LEN];
1636 } tester;
1637
1638 static void TestComposeDecompose(void) {
1639 /* [[:NFD_Inert=false:][:NFC_Inert=false:]] */
1640 static const UChar UNICODESET_STR[] = {
1641 0x5B,0x5B,0x3A,0x4E,0x46,0x44,0x5F,0x49,0x6E,0x65,0x72,0x74,0x3D,0x66,0x61,
1642 0x6C,0x73,0x65,0x3A,0x5D,0x5B,0x3A,0x4E,0x46,0x43,0x5F,0x49,0x6E,0x65,0x72,
1643 0x74,0x3D,0x66,0x61,0x6C,0x73,0x65,0x3A,0x5D,0x5D,0
1644 };
1645 int32_t noOfLoc;
1646 int32_t i = 0, j = 0;
1647
1648 UErrorCode status = U_ZERO_ERROR;
1649 const char *locName = NULL;
1650 uint32_t nfcSize;
1651 uint32_t nfdSize;
1652 tester **t;
1653 uint32_t noCases = 0;
1654 UCollator *coll = NULL;
1655 UChar32 u = 0;
1656 UChar comp[NORM_BUFFER_TEST_LEN];
1657 uint32_t len = 0;
1658 UCollationElements *iter;
1659 USet *charsToTest = uset_openPattern(UNICODESET_STR, -1, &status);
1660 int32_t charsToTestSize;
1661
1662 noOfLoc = uloc_countAvailable();
1663
1664 coll = ucol_open("", &status);
1665 if (U_FAILURE(status)) {
1666 log_data_err("Error opening collator -> %s (Are you missing data?)\n", u_errorName(status));
1667 return;
1668 }
1669 charsToTestSize = uset_size(charsToTest);
1670 if (charsToTestSize <= 0) {
1671 log_err("Set was zero. Missing data?\n");
1672 return;
1673 }
1674 t = malloc(charsToTestSize * sizeof(tester *));
1675 t[0] = (tester *)malloc(sizeof(tester));
1676 log_verbose("Testing UCA extensively for %d characters\n", charsToTestSize);
1677
1678 for(u = 0; u < charsToTestSize; u++) {
1679 UChar32 ch = uset_charAt(charsToTest, u);
1680 len = 0;
1681 UTF_APPEND_CHAR_UNSAFE(comp, len, ch);
1682 nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
1683 nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
1684
1685 if(nfcSize != nfdSize || (uprv_memcmp(t[noCases]->NFC, t[noCases]->NFD, nfcSize * sizeof(UChar)) != 0)
1686 || (len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0))) {
1687 t[noCases]->u = ch;
1688 if(len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0)) {
1689 u_strncpy(t[noCases]->NFC, comp, len);
1690 t[noCases]->NFC[len] = 0;
1691 }
1692 noCases++;
1693 t[noCases] = (tester *)malloc(sizeof(tester));
1694 uprv_memset(t[noCases], 0, sizeof(tester));
1695 }
1696 }
1697 log_verbose("Testing %d/%d of possible test cases\n", noCases, charsToTestSize);
1698 uset_close(charsToTest);
1699 charsToTest = NULL;
1700
1701 for(u=0; u<(UChar32)noCases; u++) {
1702 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
1703 log_err("Failure: codePoint %05X fails TestComposeDecompose in the UCA\n", t[u]->u);
1704 doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
1705 }
1706 }
1707 /*
1708 for(u = 0; u < charsToTestSize; u++) {
1709 if(!(u&0xFFFF)) {
1710 log_verbose("%08X ", u);
1711 }
1712 uprv_memset(t[noCases], 0, sizeof(tester));
1713 t[noCases]->u = u;
1714 len = 0;
1715 UTF_APPEND_CHAR_UNSAFE(comp, len, u);
1716 comp[len] = 0;
1717 nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
1718 nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
1719 doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL);
1720 doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL);
1721 }
1722 */
1723
1724 ucol_close(coll);
1725
1726 log_verbose("Testing locales, number of cases = %i\n", noCases);
1727 for(i = 0; i<noOfLoc; i++) {
1728 status = U_ZERO_ERROR;
1729 locName = uloc_getAvailable(i);
1730 if(hasCollationElements(locName)) {
1731 char cName[256];
1732 UChar name[256];
1733 int32_t nameSize = uloc_getDisplayName(locName, NULL, name, sizeof(cName), &status);
1734
1735 for(j = 0; j<nameSize; j++) {
1736 cName[j] = (char)name[j];
1737 }
1738 cName[nameSize] = 0;
1739 log_verbose("\nTesting locale %s (%s)\n", locName, cName);
1740
1741 coll = ucol_open(locName, &status);
1742 ucol_setStrength(coll, UCOL_IDENTICAL);
1743 iter = ucol_openElements(coll, t[u]->NFD, u_strlen(t[u]->NFD), &status);
1744
1745 for(u=0; u<(UChar32)noCases; u++) {
1746 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
1747 log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t[u]->u, cName);
1748 doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
1749 log_verbose("Testing NFC\n");
1750 ucol_setText(iter, t[u]->NFC, u_strlen(t[u]->NFC), &status);
1751 backAndForth(iter);
1752 log_verbose("Testing NFD\n");
1753 ucol_setText(iter, t[u]->NFD, u_strlen(t[u]->NFD), &status);
1754 backAndForth(iter);
1755 }
1756 }
1757 ucol_closeElements(iter);
1758 ucol_close(coll);
1759 }
1760 }
1761 for(u = 0; u <= (UChar32)noCases; u++) {
1762 free(t[u]);
1763 }
1764 free(t);
1765 }
1766
1767 static void TestEmptyRule(void) {
1768 UErrorCode status = U_ZERO_ERROR;
1769 UChar rulez[] = { 0 };
1770 UCollator *coll = ucol_openRules(rulez, 0, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
1771
1772 ucol_close(coll);
1773 }
1774
1775 static void TestUCARules(void) {
1776 UErrorCode status = U_ZERO_ERROR;
1777 UChar b[256];
1778 UChar *rules = b;
1779 uint32_t ruleLen = 0;
1780 UCollator *UCAfromRules = NULL;
1781 UCollator *coll = ucol_open("", &status);
1782 if(status == U_FILE_ACCESS_ERROR) {
1783 log_data_err("Is your data around?\n");
1784 return;
1785 } else if(U_FAILURE(status)) {
1786 log_err("Error opening collator\n");
1787 return;
1788 }
1789 ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, 256);
1790
1791 log_verbose("TestUCARules\n");
1792 if(ruleLen > 256) {
1793 rules = (UChar *)malloc((ruleLen+1)*sizeof(UChar));
1794 ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, ruleLen);
1795 }
1796 log_verbose("Rules length is %d\n", ruleLen);
1797 UCAfromRules = ucol_openRules(rules, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
1798 if(U_SUCCESS(status)) {
1799 ucol_close(UCAfromRules);
1800 } else {
1801 log_verbose("Unable to create a collator from UCARules!\n");
1802 }
1803 /*
1804 u_unescape(blah, b, 256);
1805 ucol_getSortKey(coll, b, 1, res, 256);
1806 */
1807 ucol_close(coll);
1808 if(rules != b) {
1809 free(rules);
1810 }
1811 }
1812
1813
1814 /* Pinyin tonal order */
1815 /*
1816 A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0)
1817 (w/macron)< (w/acute)< (w/caron)< (w/grave)
1818 E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8)
1819 I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec)
1820 O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2)
1821 U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9)
1822 < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) <
1823 .. (\u00fc)
1824
1825 However, in testing we got the following order:
1826 A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101)
1827 (w/acute)< (w/grave)< (w/caron)< (w/macron)
1828 E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) <
1829 .. (\u0113)
1830 I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b)
1831 O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d)
1832 U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) <
1833 .. (\u01d8)
1834 < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b)
1835 */
1836
1837 static void TestBefore(void) {
1838 const static char *data[] = {
1839 "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A",
1840 "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E",
1841 "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I",
1842 "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O",
1843 "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U",
1844 "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc"
1845 };
1846 genericRulesStarter(
1847 "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0"
1848 "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8"
1849 "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec"
1850 "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2"
1851 "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9"
1852 "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc",
1853 data, sizeof(data)/sizeof(data[0]));
1854 }
1855
1856 #if 0
1857 /* superceded by TestBeforePinyin */
1858 static void TestJ784(void) {
1859 const static char *data[] = {
1860 "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0",
1861 "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8",
1862 "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec",
1863 "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2",
1864 "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9",
1865 "\\u00fc",
1866 "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc"
1867 };
1868 genericLocaleStarter("zh", data, sizeof(data)/sizeof(data[0]));
1869 }
1870 #endif
1871
1872 #if 0
1873 /* superceded by the changes to the lv locale */
1874 static void TestJ831(void) {
1875 const static char *data[] = {
1876 "I",
1877 "i",
1878 "Y",
1879 "y"
1880 };
1881 genericLocaleStarter("lv", data, sizeof(data)/sizeof(data[0]));
1882 }
1883 #endif
1884
1885 static void TestJ815(void) {
1886 const static char *data[] = {
1887 "aa",
1888 "Aa",
1889 "ab",
1890 "Ab",
1891 "ad",
1892 "Ad",
1893 "ae",
1894 "Ae",
1895 "\\u00e6",
1896 "\\u00c6",
1897 "af",
1898 "Af",
1899 "b",
1900 "B"
1901 };
1902 genericLocaleStarter("fr", data, sizeof(data)/sizeof(data[0]));
1903 genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, sizeof(data)/sizeof(data[0]));
1904 }
1905
1906
1907 /*
1908 "& a < b < c < d& r < c", "& a < b < d& r < c",
1909 "& a < b < c < d& c < m", "& a < b < c < m < d",
1910 "& a < b < c < d& a < m", "& a < m < b < c < d",
1911 "& a <<< b << c < d& a < m", "& a <<< b << c < m < d",
1912 "& a < b < c < d& [before 1] c < m", "& a < b < m < c < d",
1913 "& a < b <<< c << d <<< e& [before 3] e <<< x", "& a < b <<< c << d <<< x <<< e",
1914 "& a < b <<< c << d <<< e& [before 2] e <<< x", "& a < b <<< c <<< x << d <<< e",
1915 "& a < b <<< c << d <<< e& [before 1] e <<< x", "& a <<< x < b <<< c << d <<< e",
1916 "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x", "& a < b <<< c << d <<< e <<< f < x < g",
1917 */
1918 static void TestRedundantRules(void) {
1919 int32_t i;
1920
1921 static const struct {
1922 const char *rules;
1923 const char *expectedRules;
1924 const char *testdata[8];
1925 uint32_t testdatalen;
1926 } tests[] = {
1927 /* this test conflicts with positioning of CODAN placeholder */
1928 /*{
1929 "& a <<< b <<< c << d <<< e& [before 1] e <<< x",
1930 "&\\u2089<<<x",
1931 {"\\u2089", "x"}, 2
1932 }, */
1933 /* this test conflicts with the [before x] syntax tightening */
1934 /*{
1935 "& b <<< c <<< d << e <<< f& [before 1] f <<< x",
1936 "&\\u0252<<<x",
1937 {"\\u0252", "x"}, 2
1938 }, */
1939 /* this test conflicts with the [before x] syntax tightening */
1940 /*{
1941 "& a < b <<< c << d <<< e& [before 1] e <<< x",
1942 "& a <<< x < b <<< c << d <<< e",
1943 {"a", "x", "b", "c", "d", "e"}, 6
1944 }, */
1945 {
1946 "& a < b < c < d& [before 1] c < m",
1947 "& a < b < m < c < d",
1948 {"a", "b", "m", "c", "d"}, 5
1949 },
1950 {
1951 "& a < b <<< c << d <<< e& [before 3] e <<< x",
1952 "& a < b <<< c << d <<< x <<< e",
1953 {"a", "b", "c", "d", "x", "e"}, 6
1954 },
1955 /* this test conflicts with the [before x] syntax tightening */
1956 /* {
1957 "& a < b <<< c << d <<< e& [before 2] e <<< x",
1958 "& a < b <<< c <<< x << d <<< e",
1959 {"a", "b", "c", "x", "d", "e"},, 6
1960 }, */
1961 {
1962 "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x",
1963 "& a < b <<< c << d <<< e <<< f < x < g",
1964 {"a", "b", "c", "d", "e", "f", "x", "g"}, 8
1965 },
1966 {
1967 "& a <<< b << c < d& a < m",
1968 "& a <<< b << c < m < d",
1969 {"a", "b", "c", "m", "d"}, 5
1970 },
1971 {
1972 "&a<b<<b\\u0301 &z<b",
1973 "&a<b\\u0301 &z<b",
1974 {"a", "b\\u0301", "z", "b"}, 4
1975 },
1976 {
1977 "&z<m<<<q<<<m",
1978 "&z<q<<<m",
1979 {"z", "q", "m"},3
1980 },
1981 {
1982 "&z<<<m<q<<<m",
1983 "&z<q<<<m",
1984 {"z", "q", "m"}, 3
1985 },
1986 {
1987 "& a < b < c < d& r < c",
1988 "& a < b < d& r < c",
1989 {"a", "b", "d"}, 3
1990 },
1991 {
1992 "& a < b < c < d& r < c",
1993 "& a < b < d& r < c",
1994 {"r", "c"}, 2
1995 },
1996 {
1997 "& a < b < c < d& c < m",
1998 "& a < b < c < m < d",
1999 {"a", "b", "c", "m", "d"}, 5
2000 },
2001 {
2002 "& a < b < c < d& a < m",
2003 "& a < m < b < c < d",
2004 {"a", "m", "b", "c", "d"}, 5
2005 }
2006 };
2007
2008
2009 UCollator *credundant = NULL;
2010 UCollator *cresulting = NULL;
2011 UErrorCode status = U_ZERO_ERROR;
2012 UChar rlz[2048] = { 0 };
2013 uint32_t rlen = 0;
2014
2015 for(i = 0; i<sizeof(tests)/sizeof(tests[0]); i++) {
2016 log_verbose("testing rule %s, expected to be %s\n", tests[i].rules, tests[i].expectedRules);
2017 rlen = u_unescape(tests[i].rules, rlz, 2048);
2018
2019 credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
2020 if(status == U_FILE_ACCESS_ERROR) {
2021 log_data_err("Is your data around?\n");
2022 return;
2023 } else if(U_FAILURE(status)) {
2024 log_err("Error opening collator\n");
2025 return;
2026 }
2027
2028 rlen = u_unescape(tests[i].expectedRules, rlz, 2048);
2029 cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
2030
2031 testAgainstUCA(cresulting, credundant, "expected", TRUE, &status);
2032
2033 ucol_close(credundant);
2034 ucol_close(cresulting);
2035
2036 log_verbose("testing using data\n");
2037
2038 genericRulesStarter(tests[i].rules, tests[i].testdata, tests[i].testdatalen);
2039 }
2040
2041 }
2042
2043 static void TestExpansionSyntax(void) {
2044 int32_t i;
2045
2046 const static char *rules[] = {
2047 "&AE <<< a << b <<< c &d <<< f",
2048 "&AE <<< a <<< b << c << d < e < f <<< g",
2049 "&AE <<< B <<< C / D <<< F"
2050 };
2051
2052 const static char *expectedRules[] = {
2053 "&A <<< a / E << b / E <<< c /E &d <<< f",
2054 "&A <<< a / E <<< b / E << c / E << d / E < e < f <<< g",
2055 "&A <<< B / E <<< C / ED <<< F / E"
2056 };
2057
2058 const static char *testdata[][8] = {
2059 {"AE", "a", "b", "c"},
2060 {"AE", "a", "b", "c", "d", "e", "f", "g"},
2061 {"AE", "B", "C"} /* / ED <<< F / E"},*/
2062 };
2063
2064 const static uint32_t testdatalen[] = {
2065 4,
2066 8,
2067 3
2068 };
2069
2070
2071
2072 UCollator *credundant = NULL;
2073 UCollator *cresulting = NULL;
2074 UErrorCode status = U_ZERO_ERROR;
2075 UChar rlz[2048] = { 0 };
2076 uint32_t rlen = 0;
2077
2078 for(i = 0; i<sizeof(rules)/sizeof(rules[0]); i++) {
2079 log_verbose("testing rule %s, expected to be %s\n", rules[i], expectedRules[i]);
2080 rlen = u_unescape(rules[i], rlz, 2048);
2081
2082 credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2083 if(status == U_FILE_ACCESS_ERROR) {
2084 log_data_err("Is your data around?\n");
2085 return;
2086 } else if(U_FAILURE(status)) {
2087 log_err("Error opening collator\n");
2088 return;
2089 }
2090 rlen = u_unescape(expectedRules[i], rlz, 2048);
2091 cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
2092
2093 /* testAgainstUCA still doesn't handle expansions correctly, so this is not run */
2094 /* as a hard error test, but only in information mode */
2095 testAgainstUCA(cresulting, credundant, "expected", FALSE, &status);
2096
2097 ucol_close(credundant);
2098 ucol_close(cresulting);
2099
2100 log_verbose("testing using data\n");
2101
2102 genericRulesStarter(rules[i], testdata[i], testdatalen[i]);
2103 }
2104 }
2105
2106 static void TestCase(void)
2107 {
2108 const static UChar gRules[MAX_TOKEN_LEN] =
2109 /*" & 0 < 1,\u2461<a,A"*/
2110 { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x0041, 0x0000 };
2111
2112 const static UChar testCase[][MAX_TOKEN_LEN] =
2113 {
2114 /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000},
2115 /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000},
2116 /*2*/ {0x2460 /*circ'1'*/, 0x0061/*'a'*/, 0x0000},
2117 /*3*/ {0x2460 /*circ'1'*/, 0x0041/*'A'*/, 0x0000}
2118 };
2119
2120 const static UCollationResult caseTestResults[][9] =
2121 {
2122 { UCOL_LESS, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
2123 { UCOL_GREATER, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER },
2124 { UCOL_LESS, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_GREATER, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
2125 { UCOL_GREATER, UCOL_LESS, UCOL_GREATER, UCOL_EQUAL, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER }
2126 };
2127
2128 const static UColAttributeValue caseTestAttributes[][2] =
2129 {
2130 { UCOL_LOWER_FIRST, UCOL_OFF},
2131 { UCOL_UPPER_FIRST, UCOL_OFF},
2132 { UCOL_LOWER_FIRST, UCOL_ON},
2133 { UCOL_UPPER_FIRST, UCOL_ON}
2134 };
2135 int32_t i,j,k;
2136 UErrorCode status = U_ZERO_ERROR;
2137 UCollationElements *iter;
2138 UCollator *myCollation;
2139 myCollation = ucol_open("en_US", &status);
2140
2141 if(U_FAILURE(status)){
2142 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
2143 return;
2144 }
2145 log_verbose("Testing different case settings\n");
2146 ucol_setStrength(myCollation, UCOL_TERTIARY);
2147
2148 for(k = 0; k<4; k++) {
2149 ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
2150 ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
2151 log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes[k][0], caseTestAttributes[k][1]);
2152 for (i = 0; i < 3 ; i++) {
2153 for(j = i+1; j<4; j++) {
2154 doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
2155 }
2156 }
2157 }
2158 ucol_close(myCollation);
2159
2160 myCollation = ucol_openRules(gRules, u_strlen(gRules), UCOL_OFF, UCOL_TERTIARY,NULL, &status);
2161 if(U_FAILURE(status)){
2162 log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
2163 return;
2164 }
2165 log_verbose("Testing different case settings with custom rules\n");
2166 ucol_setStrength(myCollation, UCOL_TERTIARY);
2167
2168 for(k = 0; k<4; k++) {
2169 ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
2170 ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
2171 for (i = 0; i < 3 ; i++) {
2172 for(j = i+1; j<4; j++) {
2173 log_verbose("k:%d, i:%d, j:%d\n", k, i, j);
2174 doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
2175 iter=ucol_openElements(myCollation, testCase[i], u_strlen(testCase[i]), &status);
2176 backAndForth(iter);
2177 ucol_closeElements(iter);
2178 iter=ucol_openElements(myCollation, testCase[j], u_strlen(testCase[j]), &status);
2179 backAndForth(iter);
2180 ucol_closeElements(iter);
2181 }
2182 }
2183 }
2184 ucol_close(myCollation);
2185 {
2186 const static char *lowerFirst[] = {
2187 "h",
2188 "H",
2189 "ch",
2190 "Ch",
2191 "CH",
2192 "cha",
2193 "chA",
2194 "Cha",
2195 "ChA",
2196 "CHa",
2197 "CHA",
2198 "i",
2199 "I"
2200 };
2201
2202 const static char *upperFirst[] = {
2203 "H",
2204 "h",
2205 "CH",
2206 "Ch",
2207 "ch",
2208 "CHA",
2209 "CHa",
2210 "ChA",
2211 "Cha",
2212 "chA",
2213 "cha",
2214 "I",
2215 "i"
2216 };
2217 log_verbose("mixed case test\n");
2218 log_verbose("lower first, case level off\n");
2219 genericRulesStarter("[casefirst lower]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
2220 log_verbose("upper first, case level off\n");
2221 genericRulesStarter("[casefirst upper]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
2222 log_verbose("lower first, case level on\n");
2223 genericRulesStarter("[casefirst lower][caselevel on]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
2224 log_verbose("upper first, case level on\n");
2225 genericRulesStarter("[casefirst upper][caselevel on]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
2226 }
2227
2228 }
2229
2230 static void TestIncrementalNormalize(void) {
2231
2232 /*UChar baseA =0x61;*/
2233 UChar baseA =0x41;
2234 /* UChar baseB = 0x42;*/
2235 static const UChar ccMix[] = {0x316, 0x321, 0x300};
2236 /*UChar ccMix[] = {0x61, 0x61, 0x61};*/
2237 /*
2238 0x316 is combining grave accent below, cc=220
2239 0x321 is combining palatalized hook below, cc=202
2240 0x300 is combining grave accent, cc=230
2241 */
2242
2243 #define MAXSLEN 2000
2244 /*int maxSLen = 64000;*/
2245 int sLen;
2246 int i;
2247
2248 UCollator *coll;
2249 UErrorCode status = U_ZERO_ERROR;
2250 UCollationResult result;
2251
2252 int32_t myQ = getTestOption(QUICK_OPTION);
2253
2254 if(getTestOption(QUICK_OPTION) < 0) {
2255 setTestOption(QUICK_OPTION, 1);
2256 }
2257
2258 {
2259 /* Test 1. Run very long unnormalized strings, to force overflow of*/
2260 /* most buffers along the way.*/
2261 UChar strA[MAXSLEN+1];
2262 UChar strB[MAXSLEN+1];
2263
2264 coll = ucol_open("en_US", &status);
2265 if(status == U_FILE_ACCESS_ERROR) {
2266 log_data_err("Is your data around?\n");
2267 return;
2268 } else if(U_FAILURE(status)) {
2269 log_err("Error opening collator\n");
2270 return;
2271 }
2272 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
2273
2274 /*for (sLen = 257; sLen<MAXSLEN; sLen++) {*/
2275 /*for (sLen = 4; sLen<MAXSLEN; sLen++) {*/
2276 /*for (sLen = 1000; sLen<1001; sLen++) {*/
2277 for (sLen = 500; sLen<501; sLen++) {
2278 /*for (sLen = 40000; sLen<65000; sLen+=1000) {*/
2279 strA[0] = baseA;
2280 strB[0] = baseA;
2281 for (i=1; i<=sLen-1; i++) {
2282 strA[i] = ccMix[i % 3];
2283 strB[sLen-i] = ccMix[i % 3];
2284 }
2285 strA[sLen] = 0;
2286 strB[sLen] = 0;
2287
2288 ucol_setStrength(coll, UCOL_TERTIARY); /* Do test with default strength, which runs*/
2289 doTest(coll, strA, strB, UCOL_EQUAL); /* optimized functions in the impl*/
2290 ucol_setStrength(coll, UCOL_IDENTICAL); /* Do again with the slow, general impl.*/
2291 doTest(coll, strA, strB, UCOL_EQUAL);
2292 }
2293 }
2294
2295 setTestOption(QUICK_OPTION, myQ);
2296
2297
2298 /* Test 2: Non-normal sequence in a string that extends to the last character*/
2299 /* of the string. Checks a couple of edge cases.*/
2300
2301 {
2302 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0};
2303 static const UChar strB[] = {0x41, 0xc0, 0x316, 0};
2304 ucol_setStrength(coll, UCOL_TERTIARY);
2305 doTest(coll, strA, strB, UCOL_EQUAL);
2306 }
2307
2308 /* Test 3: Non-normal sequence is terminated by a surrogate pair.*/
2309
2310 {
2311 /* New UCA 3.1.1.
2312 * test below used a code point from Desseret, which sorts differently
2313 * than d800 dc00
2314 */
2315 /*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/
2316 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0};
2317 static const UChar strB[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0};
2318 ucol_setStrength(coll, UCOL_TERTIARY);
2319 doTest(coll, strA, strB, UCOL_GREATER);
2320 }
2321
2322 /* Test 4: Imbedded nulls do not terminate a string when length is specified.*/
2323
2324 {
2325 static const UChar strA[] = {0x41, 0x00, 0x42, 0x00};
2326 static const UChar strB[] = {0x41, 0x00, 0x00, 0x00};
2327 char sortKeyA[50];
2328 char sortKeyAz[50];
2329 char sortKeyB[50];
2330 char sortKeyBz[50];
2331 int r;
2332
2333 /* there used to be -3 here. Hmmmm.... */
2334 /*result = ucol_strcoll(coll, strA, -3, strB, -3);*/
2335 result = ucol_strcoll(coll, strA, 3, strB, 3);
2336 if (result != UCOL_GREATER) {
2337 log_err("ERROR 1 in test 4\n");
2338 }
2339 result = ucol_strcoll(coll, strA, -1, strB, -1);
2340 if (result != UCOL_EQUAL) {
2341 log_err("ERROR 2 in test 4\n");
2342 }
2343
2344 ucol_getSortKey(coll, strA, 3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2345 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2346 ucol_getSortKey(coll, strB, 3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2347 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2348
2349 r = strcmp(sortKeyA, sortKeyAz);
2350 if (r <= 0) {
2351 log_err("Error 3 in test 4\n");
2352 }
2353 r = strcmp(sortKeyA, sortKeyB);
2354 if (r <= 0) {
2355 log_err("Error 4 in test 4\n");
2356 }
2357 r = strcmp(sortKeyAz, sortKeyBz);
2358 if (r != 0) {
2359 log_err("Error 5 in test 4\n");
2360 }
2361
2362 ucol_setStrength(coll, UCOL_IDENTICAL);
2363 ucol_getSortKey(coll, strA, 3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2364 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2365 ucol_getSortKey(coll, strB, 3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2366 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2367
2368 r = strcmp(sortKeyA, sortKeyAz);
2369 if (r <= 0) {
2370 log_err("Error 6 in test 4\n");
2371 }
2372 r = strcmp(sortKeyA, sortKeyB);
2373 if (r <= 0) {
2374 log_err("Error 7 in test 4\n");
2375 }
2376 r = strcmp(sortKeyAz, sortKeyBz);
2377 if (r != 0) {
2378 log_err("Error 8 in test 4\n");
2379 }
2380 ucol_setStrength(coll, UCOL_TERTIARY);
2381 }
2382
2383
2384 /* Test 5: Null characters in non-normal source strings.*/
2385
2386 {
2387 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00};
2388 static const UChar strB[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00};
2389 char sortKeyA[50];
2390 char sortKeyAz[50];
2391 char sortKeyB[50];
2392 char sortKeyBz[50];
2393 int r;
2394
2395 result = ucol_strcoll(coll, strA, 6, strB, 6);
2396 if (result != UCOL_GREATER) {
2397 log_err("ERROR 1 in test 5\n");
2398 }
2399 result = ucol_strcoll(coll, strA, -1, strB, -1);
2400 if (result != UCOL_EQUAL) {
2401 log_err("ERROR 2 in test 5\n");
2402 }
2403
2404 ucol_getSortKey(coll, strA, 6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2405 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2406 ucol_getSortKey(coll, strB, 6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2407 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2408
2409 r = strcmp(sortKeyA, sortKeyAz);
2410 if (r <= 0) {
2411 log_err("Error 3 in test 5\n");
2412 }
2413 r = strcmp(sortKeyA, sortKeyB);
2414 if (r <= 0) {
2415 log_err("Error 4 in test 5\n");
2416 }
2417 r = strcmp(sortKeyAz, sortKeyBz);
2418 if (r != 0) {
2419 log_err("Error 5 in test 5\n");
2420 }
2421
2422 ucol_setStrength(coll, UCOL_IDENTICAL);
2423 ucol_getSortKey(coll, strA, 6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2424 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2425 ucol_getSortKey(coll, strB, 6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2426 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2427
2428 r = strcmp(sortKeyA, sortKeyAz);
2429 if (r <= 0) {
2430 log_err("Error 6 in test 5\n");
2431 }
2432 r = strcmp(sortKeyA, sortKeyB);
2433 if (r <= 0) {
2434 log_err("Error 7 in test 5\n");
2435 }
2436 r = strcmp(sortKeyAz, sortKeyBz);
2437 if (r != 0) {
2438 log_err("Error 8 in test 5\n");
2439 }
2440 ucol_setStrength(coll, UCOL_TERTIARY);
2441 }
2442
2443
2444 /* Test 6: Null character as base of a non-normal combining sequence.*/
2445
2446 {
2447 static const UChar strA[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00};
2448 static const UChar strB[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00};
2449
2450 result = ucol_strcoll(coll, strA, 5, strB, 5);
2451 if (result != UCOL_LESS) {
2452 log_err("Error 1 in test 6\n");
2453 }
2454 result = ucol_strcoll(coll, strA, -1, strB, -1);
2455 if (result != UCOL_EQUAL) {
2456 log_err("Error 2 in test 6\n");
2457 }
2458 }
2459
2460 ucol_close(coll);
2461 }
2462
2463
2464
2465 #if 0
2466 static void TestGetCaseBit(void) {
2467 static const char *caseBitData[] = {
2468 "a", "A", "ch", "Ch", "CH",
2469 "\\uFF9E", "\\u0009"
2470 };
2471
2472 static const uint8_t results[] = {
2473 UCOL_LOWER_CASE, UCOL_UPPER_CASE, UCOL_LOWER_CASE, UCOL_MIXED_CASE, UCOL_UPPER_CASE,
2474 UCOL_UPPER_CASE, UCOL_LOWER_CASE
2475 };
2476
2477 uint32_t i, blen = 0;
2478 UChar b[256] = {0};
2479 UErrorCode status = U_ZERO_ERROR;
2480 UCollator *UCA = ucol_open("", &status);
2481 uint8_t res = 0;
2482
2483 for(i = 0; i<sizeof(results)/sizeof(results[0]); i++) {
2484 blen = u_unescape(caseBitData[i], b, 256);
2485 res = ucol_uprv_getCaseBits(UCA, b, blen, &status);
2486 if(results[i] != res) {
2487 log_err("Expected case = %02X, got %02X for %04X\n", results[i], res, b[0]);
2488 }
2489 }
2490 }
2491 #endif
2492
2493 static void TestHangulTailoring(void) {
2494 static const char *koreanData[] = {
2495 "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53ef", "\\u5475",
2496 "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\\u67b7", "\\u67ef",
2497 "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\\u8857", "\\u8888",
2498 "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5",
2499 "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\\u659D", "\\u698E",
2500 "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\\u8B0C"
2501 };
2502
2503 const char *rules =
2504 "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 <<< \\u53ef <<< \\u5475 "
2505 "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef "
2506 "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 "
2507 "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 "
2508 "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E "
2509 "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C";
2510
2511
2512 UErrorCode status = U_ZERO_ERROR;
2513 UChar rlz[2048] = { 0 };
2514 uint32_t rlen = u_unescape(rules, rlz, 2048);
2515
2516 UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
2517 if(status == U_FILE_ACCESS_ERROR) {
2518 log_data_err("Is your data around?\n");
2519 return;
2520 } else if(U_FAILURE(status)) {
2521 log_err("Error opening collator\n");
2522 return;
2523 }
2524
2525 log_verbose("Using start of korean rules\n");
2526
2527 if(U_SUCCESS(status)) {
2528 genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
2529 } else {
2530 log_err("Unable to open collator with rules %s\n", rules);
2531 }
2532
2533 log_verbose("Setting jamoSpecial to TRUE and testing once more\n");
2534 ((UCATableHeader *)coll->image)->jamoSpecial = TRUE; /* don't try this at home */
2535 genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
2536
2537 ucol_close(coll);
2538
2539 log_verbose("Using ko__LOTUS locale\n");
2540 genericLocaleStarter("ko__LOTUS", koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
2541 }
2542
2543 static void TestCompressOverlap(void) {
2544 UChar secstr[150];
2545 UChar tertstr[150];
2546 UErrorCode status = U_ZERO_ERROR;
2547 UCollator *coll;
2548 char result[200];
2549 uint32_t resultlen;
2550 int count = 0;
2551 char *tempptr;
2552
2553 coll = ucol_open("", &status);
2554
2555 if (U_FAILURE(status)) {
2556 log_err_status(status, "Collator can't be created -> %s\n", u_errorName(status));
2557 return;
2558 }
2559 while (count < 149) {
2560 secstr[count] = 0x0020; /* [06, 05, 05] */
2561 tertstr[count] = 0x0020;
2562 count ++;
2563 }
2564
2565 /* top down compression ----------------------------------- */
2566 secstr[count] = 0x0332; /* [, 87, 05] */
2567 tertstr[count] = 0x3000; /* [06, 05, 07] */
2568
2569 /* no compression secstr should have 150 secondary bytes, tertstr should
2570 have 150 tertiary bytes.
2571 with correct overlapping compression, secstr should have 4 secondary
2572 bytes, tertstr should have > 2 tertiary bytes */
2573 resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250);
2574 tempptr = uprv_strchr(result, 1) + 1;
2575 while (*(tempptr + 1) != 1) {
2576 /* the last secondary collation element is not checked since it is not
2577 part of the compression */
2578 if (*tempptr < UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2) {
2579 log_err("Secondary compression overlapped\n");
2580 }
2581 tempptr ++;
2582 }
2583
2584 /* tertiary top/bottom/common for en_US is similar to the secondary
2585 top/bottom/common */
2586 resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250);
2587 tempptr = uprv_strrchr(result, 1) + 1;
2588 while (*(tempptr + 1) != 0) {
2589 /* the last secondary collation element is not checked since it is not
2590 part of the compression */
2591 if (*tempptr < coll->tertiaryTop - coll->tertiaryTopCount) {
2592 log_err("Tertiary compression overlapped\n");
2593 }
2594 tempptr ++;
2595 }
2596
2597 /* bottom up compression ------------------------------------- */
2598 secstr[count] = 0;
2599 tertstr[count] = 0;
2600 resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250);
2601 tempptr = uprv_strchr(result, 1) + 1;
2602 while (*(tempptr + 1) != 1) {
2603 /* the last secondary collation element is not checked since it is not
2604 part of the compression */
2605 if (*tempptr > UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2) {
2606 log_err("Secondary compression overlapped\n");
2607 }
2608 tempptr ++;
2609 }
2610
2611 /* tertiary top/bottom/common for en_US is similar to the secondary
2612 top/bottom/common */
2613 resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250);
2614 tempptr = uprv_strrchr(result, 1) + 1;
2615 while (*(tempptr + 1) != 0) {
2616 /* the last secondary collation element is not checked since it is not
2617 part of the compression */
2618 if (*tempptr > coll->tertiaryBottom + coll->tertiaryBottomCount) {
2619 log_err("Tertiary compression overlapped\n");
2620 }
2621 tempptr ++;
2622 }
2623
2624 ucol_close(coll);
2625 }
2626
2627 static void TestCyrillicTailoring(void) {
2628 static const char *test[] = {
2629 "\\u0410b",
2630 "\\u0410\\u0306a",
2631 "\\u04d0A"
2632 };
2633
2634 /* Russian overrides contractions, so this test is not valid anymore */
2635 /*genericLocaleStarter("ru", test, 3);*/
2636
2637 genericLocaleStarter("root", test, 3);
2638 genericRulesStarter("&\\u0410 = \\u0410", test, 3);
2639 genericRulesStarter("&Z < \\u0410", test, 3);
2640 genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test, 3);
2641 genericRulesStarter("&Z < \\u0410 < \\u04d0", test, 3);
2642 genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test, 3);
2643 genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test, 3);
2644 }
2645
2646 static void TestSuppressContractions(void) {
2647
2648 static const char *testNoCont2[] = {
2649 "\\u0410\\u0302a",
2650 "\\u0410\\u0306b",
2651 "\\u0410c"
2652 };
2653 static const char *testNoCont[] = {
2654 "a\\u0410",
2655 "A\\u0410\\u0306",
2656 "\\uFF21\\u0410\\u0302"
2657 };
2658
2659 genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont, 3);
2660 genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2, 3);
2661 }
2662
2663 static void TestContraction(void) {
2664 const static char *testrules[] = {
2665 "&A = AB / B",
2666 "&A = A\\u0306/\\u0306",
2667 "&c = ch / h"
2668 };
2669 const static UChar testdata[][2] = {
2670 {0x0041 /* 'A' */, 0x0042 /* 'B' */},
2671 {0x0041 /* 'A' */, 0x0306 /* combining breve */},
2672 {0x0063 /* 'c' */, 0x0068 /* 'h' */}
2673 };
2674 const static UChar testdata2[][2] = {
2675 {0x0063 /* 'c' */, 0x0067 /* 'g' */},
2676 {0x0063 /* 'c' */, 0x0068 /* 'h' */},
2677 {0x0063 /* 'c' */, 0x006C /* 'l' */}
2678 };
2679 const static char *testrules3[] = {
2680 "&z < xyz &xyzw << B",
2681 "&z < xyz &xyz << B / w",
2682 "&z < ch &achm << B",
2683 "&z < ch &a << B / chm",
2684 "&\\ud800\\udc00w << B",
2685 "&\\ud800\\udc00 << B / w",
2686 "&a\\ud800\\udc00m << B",
2687 "&a << B / \\ud800\\udc00m",
2688 };
2689
2690 UErrorCode status = U_ZERO_ERROR;
2691 UCollator *coll;
2692 UChar rule[256] = {0};
2693 uint32_t rlen = 0;
2694 int i;
2695
2696 for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
2697 UCollationElements *iter1;
2698 int j = 0;
2699 log_verbose("Rule %s for testing\n", testrules[i]);
2700 rlen = u_unescape(testrules[i], rule, 32);
2701 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2702 if (U_FAILURE(status)) {
2703 log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
2704 return;
2705 }
2706 iter1 = ucol_openElements(coll, testdata[i], 2, &status);
2707 if (U_FAILURE(status)) {
2708 log_err("Collation iterator creation failed\n");
2709 return;
2710 }
2711 while (j < 2) {
2712 UCollationElements *iter2 = ucol_openElements(coll,
2713 &(testdata[i][j]),
2714 1, &status);
2715 uint32_t ce;
2716 if (U_FAILURE(status)) {
2717 log_err("Collation iterator creation failed\n");
2718 return;
2719 }
2720 ce = ucol_next(iter2, &status);
2721 while (ce != UCOL_NULLORDER) {
2722 if ((uint32_t)ucol_next(iter1, &status) != ce) {
2723 log_err("Collation elements in contraction split does not match\n");
2724 return;
2725 }
2726 ce = ucol_next(iter2, &status);
2727 }
2728 j ++;
2729 ucol_closeElements(iter2);
2730 }
2731 if (ucol_next(iter1, &status) != UCOL_NULLORDER) {
2732 log_err("Collation elements not exhausted\n");
2733 return;
2734 }
2735 ucol_closeElements(iter1);
2736 ucol_close(coll);
2737 }
2738
2739 rlen = u_unescape("& a < b < c < ch < d & c = ch / h", rule, 256);
2740 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2741 if (ucol_strcoll(coll, testdata2[0], 2, testdata2[1], 2) != UCOL_LESS) {
2742 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
2743 testdata2[0][0], testdata2[0][1], testdata2[1][0],
2744 testdata2[1][1]);
2745 return;
2746 }
2747 if (ucol_strcoll(coll, testdata2[1], 2, testdata2[2], 2) != UCOL_LESS) {
2748 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
2749 testdata2[1][0], testdata2[1][1], testdata2[2][0],
2750 testdata2[2][1]);
2751 return;
2752 }
2753 ucol_close(coll);
2754
2755 for (i = 0; i < sizeof(testrules3) / sizeof(testrules3[0]); i += 2) {
2756 UCollator *coll1,
2757 *coll2;
2758 UCollationElements *iter1,
2759 *iter2;
2760 UChar ch = 0x0042 /* 'B' */;
2761 uint32_t ce;
2762 rlen = u_unescape(testrules3[i], rule, 32);
2763 coll1 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2764 rlen = u_unescape(testrules3[i + 1], rule, 32);
2765 coll2 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2766 if (U_FAILURE(status)) {
2767 log_err("Collator creation failed %s\n", testrules[i]);
2768 return;
2769 }
2770 iter1 = ucol_openElements(coll1, &ch, 1, &status);
2771 iter2 = ucol_openElements(coll2, &ch, 1, &status);
2772 if (U_FAILURE(status)) {
2773 log_err("Collation iterator creation failed\n");
2774 return;
2775 }
2776 ce = ucol_next(iter1, &status);
2777 if (U_FAILURE(status)) {
2778 log_err("Retrieving ces failed\n");
2779 return;
2780 }
2781 while (ce != UCOL_NULLORDER) {
2782 if (ce != (uint32_t)ucol_next(iter2, &status)) {
2783 log_err("CEs does not match\n");
2784 return;
2785 }
2786 ce = ucol_next(iter1, &status);
2787 if (U_FAILURE(status)) {
2788 log_err("Retrieving ces failed\n");
2789 return;
2790 }
2791 }
2792 if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
2793 log_err("CEs not exhausted\n");
2794 return;
2795 }
2796 ucol_closeElements(iter1);
2797 ucol_closeElements(iter2);
2798 ucol_close(coll1);
2799 ucol_close(coll2);
2800 }
2801 }
2802
2803 static void TestExpansion(void) {
2804 const static char *testrules[] = {
2805 "&J << K / B & K << M",
2806 "&J << K / B << M"
2807 };
2808 const static UChar testdata[][3] = {
2809 {0x004A /*'J'*/, 0x0041 /*'A'*/, 0},
2810 {0x004D /*'M'*/, 0x0041 /*'A'*/, 0},
2811 {0x004B /*'K'*/, 0x0041 /*'A'*/, 0},
2812 {0x004B /*'K'*/, 0x0043 /*'C'*/, 0},
2813 {0x004A /*'J'*/, 0x0043 /*'C'*/, 0},
2814 {0x004D /*'M'*/, 0x0043 /*'C'*/, 0}
2815 };
2816
2817 UErrorCode status = U_ZERO_ERROR;
2818 UCollator *coll;
2819 UChar rule[256] = {0};
2820 uint32_t rlen = 0;
2821 int i;
2822
2823 for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
2824 int j = 0;
2825 log_verbose("Rule %s for testing\n", testrules[i]);
2826 rlen = u_unescape(testrules[i], rule, 32);
2827 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2828 if (U_FAILURE(status)) {
2829 log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
2830 return;
2831 }
2832
2833 for (j = 0; j < 5; j ++) {
2834 doTest(coll, testdata[j], testdata[j + 1], UCOL_LESS);
2835 }
2836 ucol_close(coll);
2837 }
2838 }
2839
2840 #if 0
2841 /* this test tests the current limitations of the engine */
2842 /* it always fail, so it is disabled by default */
2843 static void TestLimitations(void) {
2844 /* recursive expansions */
2845 {
2846 static const char *rule = "&a=b/c&d=c/e";
2847 static const char *tlimit01[] = {"add","b","adf"};
2848 static const char *tlimit02[] = {"aa","b","af"};
2849 log_verbose("recursive expansions\n");
2850 genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
2851 genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
2852 }
2853 /* contractions spanning expansions */
2854 {
2855 static const char *rule = "&a<<<c/e&g<<<eh";
2856 static const char *tlimit01[] = {"ad","c","af","f","ch","h"};
2857 static const char *tlimit02[] = {"ad","c","ch","af","f","h"};
2858 log_verbose("contractions spanning expansions\n");
2859 genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
2860 genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
2861 }
2862 /* normalization: nulls in contractions */
2863 {
2864 static const char *rule = "&a<<<\\u0000\\u0302";
2865 static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
2866 static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
2867 static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
2868 static const UColAttributeValue valOn[] = { UCOL_ON };
2869 static const UColAttributeValue valOff[] = { UCOL_OFF };
2870
2871 log_verbose("NULL in contractions\n");
2872 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
2873 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
2874 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
2875 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
2876
2877 }
2878 /* normalization: contractions spanning normalization */
2879 {
2880 static const char *rule = "&a<<<\\u0000\\u0302";
2881 static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
2882 static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
2883 static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
2884 static const UColAttributeValue valOn[] = { UCOL_ON };
2885 static const UColAttributeValue valOff[] = { UCOL_OFF };
2886
2887 log_verbose("contractions spanning normalization\n");
2888 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
2889 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
2890 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
2891 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
2892
2893 }
2894 /* variable top: */
2895 {
2896 /*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/
2897 static const char *rule = "&\\u2010<x<[variable top]=z";
2898 /*static const char *rule3 = "&' '<x<[variable top]=z";*/
2899 static const char *tlimit01[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" };
2900 static const char *tlimit02[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"};
2901 static const char *tlimit03[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" };
2902 static const UColAttribute att[] = { UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH };
2903 static const UColAttributeValue valOn[] = { UCOL_SHIFTED, UCOL_QUATERNARY };
2904 static const UColAttributeValue valOff[] = { UCOL_NON_IGNORABLE, UCOL_TERTIARY };
2905
2906 log_verbose("variable top\n");
2907 genericRulesStarterWithOptions(rule, tlimit03, sizeof(tlimit03)/sizeof(tlimit03[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2908 genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2909 genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2910 genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));
2911 genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));
2912
2913 }
2914 /* case level */
2915 {
2916 static const char *rule = "&c<ch<<<cH<<<Ch<<<CH";
2917 static const char *tlimit01[] = {"c","CH","Ch","cH","ch"};
2918 static const char *tlimit02[] = {"c","CH","cH","Ch","ch"};
2919 static const UColAttribute att[] = { UCOL_CASE_FIRST};
2920 static const UColAttributeValue valOn[] = { UCOL_UPPER_FIRST};
2921 /*static const UColAttributeValue valOff[] = { UCOL_OFF};*/
2922 log_verbose("case level\n");
2923 genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2924 genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2925 /*genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
2926 /*genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
2927 }
2928
2929 }
2930 #endif
2931
2932 static void TestBocsuCoverage(void) {
2933 UErrorCode status = U_ZERO_ERROR;
2934 const char *testString = "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u0041";
2935 UChar test[256] = {0};
2936 uint32_t tlen = u_unescape(testString, test, 32);
2937 uint8_t key[256] = {0};
2938 uint32_t klen = 0;
2939
2940 UCollator *coll = ucol_open("", &status);
2941 if(U_SUCCESS(status)) {
2942 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
2943
2944 klen = ucol_getSortKey(coll, test, tlen, key, 256);
2945
2946 ucol_close(coll);
2947 } else {
2948 log_data_err("Couldn't open UCA\n");
2949 }
2950 }
2951
2952 static void TestVariableTopSetting(void) {
2953 UErrorCode status = U_ZERO_ERROR;
2954 const UChar *current = NULL;
2955 uint32_t varTopOriginal = 0, varTop1, varTop2;
2956 UCollator *coll = ucol_open("", &status);
2957 if(U_SUCCESS(status)) {
2958
2959 uint32_t strength = 0;
2960 uint16_t specs = 0;
2961 uint32_t chOffset = 0;
2962 uint32_t chLen = 0;
2963 uint32_t exOffset = 0;
2964 uint32_t exLen = 0;
2965 uint32_t oldChOffset = 0;
2966 uint32_t oldChLen = 0;
2967 uint32_t oldExOffset = 0;
2968 uint32_t oldExLen = 0;
2969 uint32_t prefixOffset = 0;
2970 uint32_t prefixLen = 0;
2971
2972 UBool startOfRules = TRUE;
2973 UColTokenParser src;
2974 UColOptionSet opts;
2975
2976 UChar *rulesCopy = NULL;
2977 uint32_t rulesLen;
2978
2979 UCollationResult result;
2980
2981 UChar first[256] = { 0 };
2982 UChar second[256] = { 0 };
2983 UParseError parseError;
2984 int32_t myQ = getTestOption(QUICK_OPTION);
2985
2986 uprv_memset(&src, 0, sizeof(UColTokenParser));
2987
2988 src.opts = &opts;
2989
2990 if(getTestOption(QUICK_OPTION) <= 0) {
2991 setTestOption(QUICK_OPTION, 1);
2992 }
2993
2994 /* this test will fail when normalization is turned on */
2995 /* therefore we always turn off exhaustive mode for it */
2996 { /* QUICK > 0*/
2997 log_verbose("Slide variable top over UCARules\n");
2998 rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, 0);
2999 rulesCopy = (UChar *)uprv_malloc((rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
3000 rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE);
3001
3002 if(U_SUCCESS(status) && rulesLen > 0) {
3003 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
3004 src.current = src.source = rulesCopy;
3005 src.end = rulesCopy+rulesLen;
3006 src.extraCurrent = src.end;
3007 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
3008
3009 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
3010 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
3011 while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,&status)) != NULL) {
3012 strength = src.parsedToken.strength;
3013 chOffset = src.parsedToken.charsOffset;
3014 chLen = src.parsedToken.charsLen;
3015 exOffset = src.parsedToken.extensionOffset;
3016 exLen = src.parsedToken.extensionLen;
3017 prefixOffset = src.parsedToken.prefixOffset;
3018 prefixLen = src.parsedToken.prefixLen;
3019 specs = src.parsedToken.flags;
3020
3021 startOfRules = FALSE;
3022 {
3023 log_verbose("%04X %d ", *(src.source+chOffset), chLen);
3024 }
3025 if(strength == UCOL_PRIMARY) {
3026 status = U_ZERO_ERROR;
3027 varTopOriginal = ucol_getVariableTop(coll, &status);
3028 varTop1 = ucol_setVariableTop(coll, src.source+oldChOffset, oldChLen, &status);
3029 if(U_FAILURE(status)) {
3030 char buffer[256];
3031 char *buf = buffer;
3032 uint32_t i = 0, j;
3033 uint32_t CE = UCOL_NO_MORE_CES;
3034
3035 /* before we start screaming, let's see if there is a problem with the rules */
3036 UErrorCode collIterateStatus = U_ZERO_ERROR;
3037 collIterate *s = uprv_new_collIterate(&collIterateStatus);
3038 uprv_init_collIterate(coll, src.source+oldChOffset, oldChLen, s, &collIterateStatus);
3039
3040 CE = ucol_getNextCE(coll, s, &status);
3041
3042 for(i = 0; i < oldChLen; i++) {
3043 j = sprintf(buf, "%04X ", *(src.source+oldChOffset+i));
3044 buf += j;
3045 }
3046 if(status == U_PRIMARY_TOO_LONG_ERROR) {
3047 log_verbose("= Expected failure for %s =", buffer);
3048 } else {
3049 if(uprv_collIterateAtEnd(s)) {
3050 log_err("Unexpected failure setting variable top at offset %d. Error %s. Codepoints: %s\n",
3051 oldChOffset, u_errorName(status), buffer);
3052 } else {
3053 log_verbose("There is a goofy contraction in UCA rules that does not appear in the fractional UCA. Codepoints: %s\n",
3054 buffer);
3055 }
3056 }
3057 uprv_delete_collIterate(s);
3058 }
3059 varTop2 = ucol_getVariableTop(coll, &status);
3060 if((varTop1 & 0xFFFF0000) != (varTop2 & 0xFFFF0000)) {
3061 log_err("cannot retrieve set varTop value!\n");
3062 continue;
3063 }
3064
3065 if((varTop1 & 0xFFFF0000) > 0 && oldExLen == 0) {
3066
3067 u_strncpy(first, src.source+oldChOffset, oldChLen);
3068 u_strncpy(first+oldChLen, src.source+chOffset, chLen);
3069 u_strncpy(first+oldChLen+chLen, src.source+oldChOffset, oldChLen);
3070 first[2*oldChLen+chLen] = 0;
3071
3072 if(oldExLen == 0) {
3073 u_strncpy(second, src.source+chOffset, chLen);
3074 second[chLen] = 0;
3075 } else { /* This is skipped momentarily, but should work once UCARules are fully UCA conformant */
3076 u_strncpy(second, src.source+oldExOffset, oldExLen);
3077 u_strncpy(second+oldChLen, src.source+chOffset, chLen);
3078 u_strncpy(second+oldChLen+chLen, src.source+oldExOffset, oldExLen);
3079 second[2*oldExLen+chLen] = 0;
3080 }
3081 result = ucol_strcoll(coll, first, -1, second, -1);
3082 if(result == UCOL_EQUAL) {
3083 doTest(coll, first, second, UCOL_EQUAL);
3084 } else {
3085 log_verbose("Suspicious strcoll result for %04X and %04X\n", *(src.source+oldChOffset), *(src.source+chOffset));
3086 }
3087 }
3088 }
3089 if(strength != UCOL_TOK_RESET) {
3090 oldChOffset = chOffset;
3091 oldChLen = chLen;
3092 oldExOffset = exOffset;
3093 oldExLen = exLen;
3094 }
3095 }
3096 status = U_ZERO_ERROR;
3097 }
3098 else {
3099 log_err("Unexpected failure getting rules %s\n", u_errorName(status));
3100 return;
3101 }
3102 if (U_FAILURE(status)) {
3103 log_err("Error parsing rules %s\n", u_errorName(status));
3104 return;
3105 }
3106 status = U_ZERO_ERROR;
3107 }
3108
3109 setTestOption(QUICK_OPTION, myQ);
3110
3111 log_verbose("Testing setting variable top to contractions\n");
3112 {
3113 /* uint32_t tailoredCE = UCOL_NOT_FOUND; */
3114 /*UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->UCAConsts+sizeof(UCAConstants));*/
3115 UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->contractionUCACombos);
3116 while(*conts != 0) {
3117 if((*(conts+2) == 0) || (*(conts+1)==0)) { /* contracts or pre-context contractions */
3118 varTop1 = ucol_setVariableTop(coll, conts, -1, &status);
3119 } else {
3120 varTop1 = ucol_setVariableTop(coll, conts, 3, &status);
3121 }
3122 if(U_FAILURE(status)) {
3123 if(status == U_PRIMARY_TOO_LONG_ERROR) {
3124 /* ucol_setVariableTop() is documented to not accept 3-byte primaries,
3125 * therefore it is not an error when it complains about them. */
3126 log_verbose("Couldn't set variable top to a contraction %04X %04X %04X - U_PRIMARY_TOO_LONG_ERROR\n",
3127 *conts, *(conts+1), *(conts+2));
3128 } else {
3129 log_err("Couldn't set variable top to a contraction %04X %04X %04X - %s\n",
3130 *conts, *(conts+1), *(conts+2), u_errorName(status));
3131 }
3132 status = U_ZERO_ERROR;
3133 }
3134 conts+=3;
3135 }
3136
3137 status = U_ZERO_ERROR;
3138
3139 first[0] = 0x0040;
3140 first[1] = 0x0050;
3141 first[2] = 0x0000;
3142
3143 ucol_setVariableTop(coll, first, -1, &status);
3144
3145 if(U_SUCCESS(status)) {
3146 log_err("Invalid contraction succeded in setting variable top!\n");
3147 }
3148
3149 }
3150
3151 log_verbose("Test restoring variable top\n");
3152
3153 status = U_ZERO_ERROR;
3154 ucol_restoreVariableTop(coll, varTopOriginal, &status);
3155 if(varTopOriginal != ucol_getVariableTop(coll, &status)) {
3156 log_err("Couldn't restore old variable top\n");
3157 }
3158
3159 log_verbose("Testing calling with error set\n");
3160
3161 status = U_INTERNAL_PROGRAM_ERROR;
3162 varTop1 = ucol_setVariableTop(coll, first, 1, &status);
3163 varTop2 = ucol_getVariableTop(coll, &status);
3164 ucol_restoreVariableTop(coll, varTop2, &status);
3165 varTop1 = ucol_setVariableTop(NULL, first, 1, &status);
3166 varTop2 = ucol_getVariableTop(NULL, &status);
3167 ucol_restoreVariableTop(NULL, varTop2, &status);
3168 if(status != U_INTERNAL_PROGRAM_ERROR) {
3169 log_err("Bad reaction to passed error!\n");
3170 }
3171 uprv_free(src.source);
3172 ucol_close(coll);
3173 } else {
3174 log_data_err("Couldn't open UCA collator\n");
3175 }
3176
3177 }
3178
3179 static void TestNonChars(void) {
3180 static const char *test[] = {
3181 "\\u0000", /* ignorable */
3182 "\\uFFFE", /* special merge-sort character with minimum non-ignorable weights */
3183 "\\uFDD0", "\\uFDEF",
3184 "\\U0001FFFE", "\\U0001FFFF", /* UCA 6.0: noncharacters are treated like unassigned, */
3185 "\\U0002FFFE", "\\U0002FFFF", /* not like ignorable. */
3186 "\\U0003FFFE", "\\U0003FFFF",
3187 "\\U0004FFFE", "\\U0004FFFF",
3188 "\\U0005FFFE", "\\U0005FFFF",
3189 "\\U0006FFFE", "\\U0006FFFF",
3190 "\\U0007FFFE", "\\U0007FFFF",
3191 "\\U0008FFFE", "\\U0008FFFF",
3192 "\\U0009FFFE", "\\U0009FFFF",
3193 "\\U000AFFFE", "\\U000AFFFF",
3194 "\\U000BFFFE", "\\U000BFFFF",
3195 "\\U000CFFFE", "\\U000CFFFF",
3196 "\\U000DFFFE", "\\U000DFFFF",
3197 "\\U000EFFFE", "\\U000EFFFF",
3198 "\\U000FFFFE", "\\U000FFFFF",
3199 "\\U0010FFFE", "\\U0010FFFF",
3200 "\\uFFFF" /* special character with maximum primary weight */
3201 };
3202 UErrorCode status = U_ZERO_ERROR;
3203 UCollator *coll = ucol_open("en_US", &status);
3204
3205 log_verbose("Test non characters\n");
3206
3207 if(U_SUCCESS(status)) {
3208 genericOrderingTestWithResult(coll, test, 35, UCOL_LESS);
3209 } else {
3210 log_err_status(status, "Unable to open collator\n");
3211 }
3212
3213 ucol_close(coll);
3214 }
3215
3216 static void TestExtremeCompression(void) {
3217 static char *test[4];
3218 int32_t j = 0, i = 0;
3219
3220 for(i = 0; i<4; i++) {
3221 test[i] = (char *)malloc(2048*sizeof(char));
3222 }
3223
3224 for(j = 20; j < 500; j++) {
3225 for(i = 0; i<4; i++) {
3226 uprv_memset(test[i], 'a', (j-1)*sizeof(char));
3227 test[i][j-1] = (char)('a'+i);
3228 test[i][j] = 0;
3229 }
3230 genericLocaleStarter("en_US", (const char **)test, 4);
3231 }
3232
3233
3234 for(i = 0; i<4; i++) {
3235 free(test[i]);
3236 }
3237 }
3238
3239 #if 0
3240 static void TestExtremeCompression(void) {
3241 static char *test[4];
3242 int32_t j = 0, i = 0;
3243 UErrorCode status = U_ZERO_ERROR;
3244 UCollator *coll = ucol_open("en_US", status);
3245 for(i = 0; i<4; i++) {
3246 test[i] = (char *)malloc(2048*sizeof(char));
3247 }
3248 for(j = 10; j < 2048; j++) {
3249 for(i = 0; i<4; i++) {
3250 uprv_memset(test[i], 'a', (j-2)*sizeof(char));
3251 test[i][j-1] = (char)('a'+i);
3252 test[i][j] = 0;
3253 }
3254 }
3255 genericLocaleStarter("en_US", (const char **)test, 4);
3256
3257 for(j = 10; j < 2048; j++) {
3258 for(i = 0; i<1; i++) {
3259 uprv_memset(test[i], 'a', (j-1)*sizeof(char));
3260 test[i][j] = 0;
3261 }
3262 }
3263 for(i = 0; i<4; i++) {
3264 free(test[i]);
3265 }
3266 }
3267 #endif
3268
3269 static void TestSurrogates(void) {
3270 static const char *test[] = {
3271 "z","\\ud900\\udc25", "\\ud805\\udc50",
3272 "\\ud800\\udc00y", "\\ud800\\udc00r",
3273 "\\ud800\\udc00f", "\\ud800\\udc00",
3274 "\\ud800\\udc00c", "\\ud800\\udc00b",
3275 "\\ud800\\udc00fa", "\\ud800\\udc00fb",
3276 "\\ud800\\udc00a",
3277 "c", "b"
3278 };
3279
3280 static const char *rule =
3281 "&z < \\ud900\\udc25 < \\ud805\\udc50"
3282 "< \\ud800\\udc00y < \\ud800\\udc00r"
3283 "< \\ud800\\udc00f << \\ud800\\udc00"
3284 "< \\ud800\\udc00fa << \\ud800\\udc00fb"
3285 "< \\ud800\\udc00a < c < b" ;
3286
3287 genericRulesStarter(rule, test, 14);
3288 }
3289
3290 /* This is a test for prefix implementation, used by JIS X 4061 collation rules */
3291 static void TestPrefix(void) {
3292 uint32_t i;
3293
3294 static const struct {
3295 const char *rules;
3296 const char *data[50];
3297 const uint32_t len;
3298 } tests[] = {
3299 { "&z <<< z|a",
3300 {"zz", "za"}, 2 },
3301
3302 { "&z <<< z| a",
3303 {"zz", "za"}, 2 },
3304 { "[strength I]"
3305 "&a=\\ud900\\udc25"
3306 "&z<<<\\ud900\\udc25|a",
3307 {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 },
3308 };
3309
3310
3311 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3312 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3313 }
3314 }
3315
3316 /* This test uses data suplied by Masashiko Maedera to test the implementation */
3317 /* JIS X 4061 collation order implementation */
3318 static void TestNewJapanese(void) {
3319
3320 static const char * const test1[] = {
3321 "\\u30b7\\u30e3\\u30fc\\u30ec",
3322 "\\u30b7\\u30e3\\u30a4",
3323 "\\u30b7\\u30e4\\u30a3",
3324 "\\u30b7\\u30e3\\u30ec",
3325 "\\u3061\\u3087\\u3053",
3326 "\\u3061\\u3088\\u3053",
3327 "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8",
3328 "\\u3066\\u30fc\\u305f",
3329 "\\u30c6\\u30fc\\u30bf",
3330 "\\u30c6\\u30a7\\u30bf",
3331 "\\u3066\\u3048\\u305f",
3332 "\\u3067\\u30fc\\u305f",
3333 "\\u30c7\\u30fc\\u30bf",
3334 "\\u30c7\\u30a7\\u30bf",
3335 "\\u3067\\u3048\\u305f",
3336 "\\u3066\\u30fc\\u305f\\u30fc",
3337 "\\u30c6\\u30fc\\u30bf\\u30a1",
3338 "\\u30c6\\u30a7\\u30bf\\u30fc",
3339 "\\u3066\\u3047\\u305f\\u3041",
3340 "\\u3066\\u3048\\u305f\\u30fc",
3341 "\\u3067\\u30fc\\u305f\\u30fc",
3342 "\\u30c7\\u30fc\\u30bf\\u30a1",
3343 "\\u3067\\u30a7\\u305f\\u30a1",
3344 "\\u30c7\\u3047\\u30bf\\u3041",
3345 "\\u30c7\\u30a8\\u30bf\\u30a2",
3346 "\\u3072\\u3086",
3347 "\\u3073\\u3085\\u3042",
3348 "\\u3074\\u3085\\u3042",
3349 "\\u3073\\u3085\\u3042\\u30fc",
3350 "\\u30d3\\u30e5\\u30a2\\u30fc",
3351 "\\u3074\\u3085\\u3042\\u30fc",
3352 "\\u30d4\\u30e5\\u30a2\\u30fc",
3353 "\\u30d2\\u30e5\\u30a6",
3354 "\\u30d2\\u30e6\\u30a6",
3355 "\\u30d4\\u30e5\\u30a6\\u30a2",
3356 "\\u3073\\u3085\\u30fc\\u3042\\u30fc",
3357 "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc",
3358 "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc",
3359 "\\u3072\\u3085\\u3093",
3360 "\\u3074\\u3085\\u3093",
3361 "\\u3075\\u30fc\\u308a",
3362 "\\u30d5\\u30fc\\u30ea",
3363 "\\u3075\\u3045\\u308a",
3364 "\\u3075\\u30a5\\u308a",
3365 "\\u3075\\u30a5\\u30ea",
3366 "\\u30d5\\u30a6\\u30ea",
3367 "\\u3076\\u30fc\\u308a",
3368 "\\u30d6\\u30fc\\u30ea",
3369 "\\u3076\\u3045\\u308a",
3370 "\\u30d6\\u30a5\\u308a",
3371 "\\u3077\\u3046\\u308a",
3372 "\\u30d7\\u30a6\\u30ea",
3373 "\\u3075\\u30fc\\u308a\\u30fc",
3374 "\\u30d5\\u30a5\\u30ea\\u30fc",
3375 "\\u3075\\u30a5\\u308a\\u30a3",
3376 "\\u30d5\\u3045\\u308a\\u3043",
3377 "\\u30d5\\u30a6\\u30ea\\u30fc",
3378 "\\u3075\\u3046\\u308a\\u3043",
3379 "\\u30d6\\u30a6\\u30ea\\u30a4",
3380 "\\u3077\\u30fc\\u308a\\u30fc",
3381 "\\u3077\\u30a5\\u308a\\u30a4",
3382 "\\u3077\\u3046\\u308a\\u30fc",
3383 "\\u30d7\\u30a6\\u30ea\\u30a4",
3384 "\\u30d5\\u30fd",
3385 "\\u3075\\u309e",
3386 "\\u3076\\u309d",
3387 "\\u3076\\u3075",
3388 "\\u3076\\u30d5",
3389 "\\u30d6\\u3075",
3390 "\\u30d6\\u30d5",
3391 "\\u3076\\u309e",
3392 "\\u3076\\u3077",
3393 "\\u30d6\\u3077",
3394 "\\u3077\\u309d",
3395 "\\u30d7\\u30fd",
3396 "\\u3077\\u3075",
3397 };
3398
3399 static const char *test2[] = {
3400 "\\u306f\\u309d", /* H\\u309d */
3401 "\\u30cf\\u30fd", /* K\\u30fd */
3402 "\\u306f\\u306f", /* HH */
3403 "\\u306f\\u30cf", /* HK */
3404 "\\u30cf\\u30cf", /* KK */
3405 "\\u306f\\u309e", /* H\\u309e */
3406 "\\u30cf\\u30fe", /* K\\u30fe */
3407 "\\u306f\\u3070", /* HH\\u309b */
3408 "\\u30cf\\u30d0", /* KK\\u309b */
3409 "\\u306f\\u3071", /* HH\\u309c */
3410 "\\u30cf\\u3071", /* KH\\u309c */
3411 "\\u30cf\\u30d1", /* KK\\u309c */
3412 "\\u3070\\u309d", /* H\\u309b\\u309d */
3413 "\\u30d0\\u30fd", /* K\\u309b\\u30fd */
3414 "\\u3070\\u306f", /* H\\u309bH */
3415 "\\u30d0\\u30cf", /* K\\u309bK */
3416 "\\u3070\\u309e", /* H\\u309b\\u309e */
3417 "\\u30d0\\u30fe", /* K\\u309b\\u30fe */
3418 "\\u3070\\u3070", /* H\\u309bH\\u309b */
3419 "\\u30d0\\u3070", /* K\\u309bH\\u309b */
3420 "\\u30d0\\u30d0", /* K\\u309bK\\u309b */
3421 "\\u3070\\u3071", /* H\\u309bH\\u309c */
3422 "\\u30d0\\u30d1", /* K\\u309bK\\u309c */
3423 "\\u3071\\u309d", /* H\\u309c\\u309d */
3424 "\\u30d1\\u30fd", /* K\\u309c\\u30fd */
3425 "\\u3071\\u306f", /* H\\u309cH */
3426 "\\u30d1\\u30cf", /* K\\u309cK */
3427 "\\u3071\\u3070", /* H\\u309cH\\u309b */
3428 "\\u3071\\u30d0", /* H\\u309cK\\u309b */
3429 "\\u30d1\\u30d0", /* K\\u309cK\\u309b */
3430 "\\u3071\\u3071", /* H\\u309cH\\u309c */
3431 "\\u30d1\\u30d1", /* K\\u309cK\\u309c */
3432 };
3433 /*
3434 static const char *test3[] = {
3435 "\\u221er\\u221e",
3436 "\\u221eR#",
3437 "\\u221et\\u221e",
3438 "#r\\u221e",
3439 "#R#",
3440 "#t%",
3441 "#T%",
3442 "8t\\u221e",
3443 "8T\\u221e",
3444 "8t#",
3445 "8T#",
3446 "8t%",
3447 "8T%",
3448 "8t8",
3449 "8T8",
3450 "\\u03c9r\\u221e",
3451 "\\u03a9R%",
3452 "rr\\u221e",
3453 "rR\\u221e",
3454 "Rr\\u221e",
3455 "RR\\u221e",
3456 "RT%",
3457 "rt8",
3458 "tr\\u221e",
3459 "tr8",
3460 "TR8",
3461 "tt8",
3462 "\\u30b7\\u30e3\\u30fc\\u30ec",
3463 };
3464 */
3465 static const UColAttribute att[] = { UCOL_STRENGTH };
3466 static const UColAttributeValue val[] = { UCOL_QUATERNARY };
3467
3468 static const UColAttribute attShifted[] = { UCOL_STRENGTH, UCOL_ALTERNATE_HANDLING};
3469 static const UColAttributeValue valShifted[] = { UCOL_QUATERNARY, UCOL_SHIFTED };
3470
3471 genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), att, val, 1);
3472 genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), att, val, 1);
3473 /*genericLocaleStarter("ja", test3, sizeof(test3)/sizeof(test3[0]));*/
3474 genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), attShifted, valShifted, 2);
3475 genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), attShifted, valShifted, 2);
3476 }
3477
3478 static void TestStrCollIdenticalPrefix(void) {
3479 const char* rule = "&\\ud9b0\\udc70=\\ud9b0\\udc71";
3480 const char* test[] = {
3481 "ab\\ud9b0\\udc70",
3482 "ab\\ud9b0\\udc71"
3483 };
3484 genericRulesStarterWithResult(rule, test, sizeof(test)/sizeof(test[0]), UCOL_EQUAL);
3485 }
3486 /* Contractions should have all their canonically equivalent */
3487 /* strings included */
3488 static void TestContractionClosure(void) {
3489 static const struct {
3490 const char *rules;
3491 const char *data[10];
3492 const uint32_t len;
3493 } tests[] = {
3494 { "&b=\\u00e4\\u00e4",
3495 { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\\u00e4" }, 5},
3496 { "&b=\\u00C5",
3497 { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4},
3498 };
3499 uint32_t i;
3500
3501
3502 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3503 genericRulesStarterWithResult(tests[i].rules, tests[i].data, tests[i].len, UCOL_EQUAL);
3504 }
3505 }
3506
3507 /* This tests also fails*/
3508 static void TestBeforePrefixFailure(void) {
3509 static const struct {
3510 const char *rules;
3511 const char *data[10];
3512 const uint32_t len;
3513 } tests[] = {
3514 { "&g <<< a"
3515 "&[before 3]\\uff41 <<< x",
3516 {"x", "\\uff41"}, 2 },
3517 { "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3518 "&\\u30A8=\\u30A8=\\u3048=\\uff74"
3519 "&[before 3]\\u30a7<<<\\u30a9",
3520 {"\\u30a9", "\\u30a7"}, 2 },
3521 { "&[before 3]\\u30a7<<<\\u30a9"
3522 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3523 "&\\u30A8=\\u30A8=\\u3048=\\uff74",
3524 {"\\u30a9", "\\u30a7"}, 2 },
3525 };
3526 uint32_t i;
3527
3528
3529 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3530 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3531 }
3532
3533 #if 0
3534 const char* rule1 =
3535 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3536 "&\\u30A8=\\u30A8=\\u3048=\\uff74"
3537 "&[before 3]\\u30a7<<<\\u30c6|\\u30fc";
3538 const char* rule2 =
3539 "&[before 3]\\u30a7<<<\\u30c6|\\u30fc"
3540 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3541 "&\\u30A8=\\u30A8=\\u3048=\\uff74";
3542 const char* test[] = {
3543 "\\u30c6\\u30fc\\u30bf",
3544 "\\u30c6\\u30a7\\u30bf",
3545 };
3546 genericRulesStarter(rule1, test, sizeof(test)/sizeof(test[0]));
3547 genericRulesStarter(rule2, test, sizeof(test)/sizeof(test[0]));
3548 /* this piece of code should be in some sort of verbose mode */
3549 /* it gets the collation elements for elements and prints them */
3550 /* This is useful when trying to see whether the problem is */
3551 {
3552 UErrorCode status = U_ZERO_ERROR;
3553 uint32_t i = 0;
3554 UCollationElements *it = NULL;
3555 uint32_t CE;
3556 UChar string[256];
3557 uint32_t uStringLen;
3558 UCollator *coll = NULL;
3559
3560 uStringLen = u_unescape(rule1, string, 256);
3561
3562 coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
3563
3564 /*coll = ucol_open("ja_JP_JIS", &status);*/
3565 it = ucol_openElements(coll, string, 0, &status);
3566
3567 for(i = 0; i < sizeof(test)/sizeof(test[0]); i++) {
3568 log_verbose("%s\n", test[i]);
3569 uStringLen = u_unescape(test[i], string, 256);
3570 ucol_setText(it, string, uStringLen, &status);
3571
3572 while((CE=ucol_next(it, &status)) != UCOL_NULLORDER) {
3573 log_verbose("%08X\n", CE);
3574 }
3575 log_verbose("\n");
3576
3577 }
3578
3579 ucol_closeElements(it);
3580 ucol_close(coll);
3581 }
3582 #endif
3583 }
3584
3585 static void TestPrefixCompose(void) {
3586 const char* rule1 =
3587 "&\\u30a7<<<\\u30ab|\\u30fc=\\u30ac|\\u30fc";
3588 /*
3589 const char* test[] = {
3590 "\\u30c6\\u30fc\\u30bf",
3591 "\\u30c6\\u30a7\\u30bf",
3592 };
3593 */
3594 {
3595 UErrorCode status = U_ZERO_ERROR;
3596 /*uint32_t i = 0;*/
3597 /*UCollationElements *it = NULL;*/
3598 /* uint32_t CE;*/
3599 UChar string[256];
3600 uint32_t uStringLen;
3601 UCollator *coll = NULL;
3602
3603 uStringLen = u_unescape(rule1, string, 256);
3604
3605 coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
3606 ucol_close(coll);
3607 }
3608
3609
3610 }
3611
3612 /*
3613 [last variable] last variable value
3614 [last primary ignorable] largest CE for primary ignorable
3615 [last secondary ignorable] largest CE for secondary ignorable
3616 [last tertiary ignorable] largest CE for tertiary ignorable
3617 [top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8)
3618 */
3619
3620 static void TestRuleOptions(void) {
3621 /* values here are hardcoded and are correct for the current UCA
3622 * when the UCA changes, one might be forced to change these
3623 * values.
3624 */
3625
3626 /*
3627 * These strings contain the last character before [variable top]
3628 * and the first and second characters (by primary weights) after it.
3629 * See FractionalUCA.txt. For example:
3630 [last variable [0C FE, 05, 05]] # U+10A7F OLD SOUTH ARABIAN NUMERIC INDICATOR
3631 [variable top = 0C FE]
3632 [first regular [0D 0A, 05, 05]] # U+0060 GRAVE ACCENT
3633 and
3634 00B4; [0D 0C, 05, 05]
3635 *
3636 * Note: Starting with UCA 6.0, the [variable top] collation element
3637 * is not the weight of any character or string,
3638 * which means that LAST_VARIABLE_CHAR_STRING sorts before [last variable].
3639 */
3640 #define LAST_VARIABLE_CHAR_STRING "\\U00010A7F"
3641 #define FIRST_REGULAR_CHAR_STRING "\\u0060"
3642 #define SECOND_REGULAR_CHAR_STRING "\\u00B4"
3643
3644 /*
3645 * This string has to match the character that has the [last regular] weight
3646 * which changes with each UCA version.
3647 * See the bottom of FractionalUCA.txt which says something like
3648 [last regular [7A FE, 05, 05]] # U+1342E EGYPTIAN HIEROGLYPH AA032
3649 *
3650 * Note: Starting with UCA 6.0, the [last regular] collation element
3651 * is not the weight of any character or string,
3652 * which means that LAST_REGULAR_CHAR_STRING sorts before [last regular].
3653 */
3654 #define LAST_REGULAR_CHAR_STRING "\\U0001342E"
3655
3656 static const struct {
3657 const char *rules;
3658 const char *data[10];
3659 const uint32_t len;
3660 } tests[] = {
3661 /* - all befores here amount to zero */
3662 { "&[before 3][first tertiary ignorable]<<<a",
3663 { "\\u0000", "a"}, 2
3664 }, /* you cannot go before first tertiary ignorable */
3665
3666 { "&[before 3][last tertiary ignorable]<<<a",
3667 { "\\u0000", "a"}, 2
3668 }, /* you cannot go before last tertiary ignorable */
3669
3670 { "&[before 3][first secondary ignorable]<<<a",
3671 { "\\u0000", "a"}, 2
3672 }, /* you cannot go before first secondary ignorable */
3673
3674 { "&[before 3][last secondary ignorable]<<<a",
3675 { "\\u0000", "a"}, 2
3676 }, /* you cannot go before first secondary ignorable */
3677
3678 /* 'normal' befores */
3679
3680 { "&[before 3][first primary ignorable]<<<c<<<b &[first primary ignorable]<a",
3681 { "c", "b", "\\u0332", "a" }, 4
3682 },
3683
3684 /* we don't have a code point that corresponds to
3685 * the last primary ignorable
3686 */
3687 { "&[before 3][last primary ignorable]<<<c<<<b &[last primary ignorable]<a",
3688 { "\\u0332", "\\u20e3", "c", "b", "a" }, 5
3689 },
3690
3691 { "&[before 3][first variable]<<<c<<<b &[first variable]<a",
3692 { "c", "b", "\\u0009", "a", "\\u000a" }, 5
3693 },
3694
3695 { "&[last variable]<a &[before 3][last variable]<<<c<<<b ",
3696 { LAST_VARIABLE_CHAR_STRING, "c", "b", /* [last variable] */ "a", FIRST_REGULAR_CHAR_STRING }, 5
3697 },
3698
3699 { "&[first regular]<a"
3700 "&[before 1][first regular]<b",
3701 { "b", FIRST_REGULAR_CHAR_STRING, "a", SECOND_REGULAR_CHAR_STRING }, 4
3702 },
3703
3704 { "&[before 1][last regular]<b"
3705 "&[last regular]<a",
3706 { LAST_REGULAR_CHAR_STRING, "b", /* [last regular] */ "a", "\\u4e00" }, 4
3707 },
3708
3709 { "&[before 1][first implicit]<b"
3710 "&[first implicit]<a",
3711 { "b", "\\u4e00", "a", "\\u4e01"}, 4
3712 },
3713
3714 { "&[before 1][last implicit]<b"
3715 "&[last implicit]<a",
3716 { "b", "\\U0010FFFD", "a" }, 3
3717 },
3718
3719 { "&[last variable]<z"
3720 "&[last primary ignorable]<x"
3721 "&[last secondary ignorable]<<y"
3722 "&[last tertiary ignorable]<<<w"
3723 "&[top]<u",
3724 {"\\ufffb", "w", "y", "\\u20e3", "x", LAST_VARIABLE_CHAR_STRING, "z", "u"}, 7
3725 }
3726
3727 };
3728 uint32_t i;
3729
3730 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3731 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3732 }
3733 }
3734
3735
3736 static void TestOptimize(void) {
3737 /* this is not really a test - just trying out
3738 * whether copying of UCA contents will fail
3739 * Cannot really test, since the functionality
3740 * remains the same.
3741 */
3742 static const struct {
3743 const char *rules;
3744 const char *data[10];
3745 const uint32_t len;
3746 } tests[] = {
3747 /* - all befores here amount to zero */
3748 { "[optimize [\\uAC00-\\uD7FF]]",
3749 { "a", "b"}, 2}
3750 };
3751 uint32_t i;
3752
3753 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3754 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3755 }
3756 }
3757
3758 /*
3759 cycheng@ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator.
3760 weiv ucol_strcollIter?
3761 cycheng@ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021
3762 weiv these are the input strings?
3763 cycheng@ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on, we have s1 > s2
3764 weiv will check - could be a problem with utf-8 iterator
3765 cycheng@ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2
3766 weiv hmmm
3767 cycheng@ca.ibm.c... note that we have a standalone high surrogate
3768 weiv that doesn't sound right
3769 cycheng@ca.ibm.c... we got the same inconsistent results on AIX and Win2000
3770 weiv so you have two strings, you convert them to utf-8 and to utf-16BE
3771 cycheng@ca.ibm.c... yes
3772 weiv and then do the comparison
3773 cycheng@ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be
3774 weiv utf-16 strings look like a little endian ones in the example you sent me
3775 weiv It could be a bug - let me try to test it out
3776 cycheng@ca.ibm.c... ok
3777 cycheng@ca.ibm.c... we can wait till the conf. call
3778 cycheng@ca.ibm.c... next weke
3779 weiv that would be great
3780 weiv hmmm
3781 weiv I might be wrong
3782 weiv let me play with it some more
3783 cycheng@ca.ibm.c... ok
3784 cycheng@ca.ibm.c... also please check s3 = 0x0e3a0062 and s4 = 0x0e400021. both are in utf-16be
3785 cycheng@ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that's built for db2
3786 cycheng@ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be
3787 weiv ok
3788 cycheng@ca.ibm.c... i ask sherman to send you more inconsistent data
3789 weiv thanks
3790 cycheng@ca.ibm.c... the 4 strings we sent are just samples
3791 */
3792 #if 0
3793 static void Alexis(void) {
3794 UErrorCode status = U_ZERO_ERROR;
3795 UCollator *coll = ucol_open("", &status);
3796
3797
3798 const char utf16be[2][4] = {
3799 { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 },
3800 { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 }
3801 };
3802
3803 const char utf8[2][4] = {
3804 { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 },
3805 { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 },
3806 };
3807
3808 UCharIterator iterU161, iterU162;
3809 UCharIterator iterU81, iterU82;
3810
3811 UCollationResult resU16, resU8;
3812
3813 uiter_setUTF16BE(&iterU161, utf16be[0], 4);
3814 uiter_setUTF16BE(&iterU162, utf16be[1], 4);
3815
3816 uiter_setUTF8(&iterU81, utf8[0], 4);
3817 uiter_setUTF8(&iterU82, utf8[1], 4);
3818
3819 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3820
3821 resU16 = ucol_strcollIter(coll, &iterU161, &iterU162, &status);
3822 resU8 = ucol_strcollIter(coll, &iterU81, &iterU82, &status);
3823
3824
3825 if(resU16 != resU8) {
3826 log_err("different results\n");
3827 }
3828
3829 ucol_close(coll);
3830 }
3831 #endif
3832
3833 #define CMSCOLL_ALEXIS2_BUFFER_SIZE 256
3834 static void Alexis2(void) {
3835 UErrorCode status = U_ZERO_ERROR;
3836 UChar U16Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
3837 char U16BESource[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16BETarget[CMSCOLL_ALEXIS2_BUFFER_SIZE];
3838 char U8Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U8Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
3839 int32_t U16LenS = 0, U16LenT = 0, U16BELenS = 0, U16BELenT = 0, U8LenS = 0, U8LenT = 0;
3840
3841 UConverter *conv = NULL;
3842
3843 UCharIterator U16BEItS, U16BEItT;
3844 UCharIterator U8ItS, U8ItT;
3845
3846 UCollationResult resU16, resU16BE, resU8;
3847
3848 static const char* const pairs[][2] = {
3849 { "\\ud800\\u0021", "\\uFFFC\\u0062"},
3850 { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" },
3851 { "\\u0E40\\u0021", "\\u00A1\\u0021"},
3852 { "\\u0E40\\u0021", "\\uFE57\\u0062"},
3853 { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"},
3854 { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"},
3855 { "\\u0020", "\\u0020\\u0000"}
3856 /*
3857 5F20 (my result here)
3858 5F204E008E3F
3859 5F20 (your result here)
3860 */
3861 };
3862
3863 int32_t i = 0;
3864
3865 UCollator *coll = ucol_open("", &status);
3866 if(status == U_FILE_ACCESS_ERROR) {
3867 log_data_err("Is your data around?\n");
3868 return;
3869 } else if(U_FAILURE(status)) {
3870 log_err("Error opening collator\n");
3871 return;
3872 }
3873 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3874 conv = ucnv_open("UTF16BE", &status);
3875 for(i = 0; i < sizeof(pairs)/sizeof(pairs[0]); i++) {
3876 U16LenS = u_unescape(pairs[i][0], U16Source, CMSCOLL_ALEXIS2_BUFFER_SIZE);
3877 U16LenT = u_unescape(pairs[i][1], U16Target, CMSCOLL_ALEXIS2_BUFFER_SIZE);
3878
3879 resU16 = ucol_strcoll(coll, U16Source, U16LenS, U16Target, U16LenT);
3880
3881 log_verbose("Result of strcoll is %i\n", resU16);
3882
3883 U16BELenS = ucnv_fromUChars(conv, U16BESource, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Source, U16LenS, &status);
3884 U16BELenT = ucnv_fromUChars(conv, U16BETarget, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Target, U16LenT, &status);
3885
3886 /* use the original sizes, as the result from converter is in bytes */
3887 uiter_setUTF16BE(&U16BEItS, U16BESource, U16LenS);
3888 uiter_setUTF16BE(&U16BEItT, U16BETarget, U16LenT);
3889
3890 resU16BE = ucol_strcollIter(coll, &U16BEItS, &U16BEItT, &status);
3891
3892 log_verbose("Result of U16BE is %i\n", resU16BE);
3893
3894 if(resU16 != resU16BE) {
3895 log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", pairs[i][0], pairs[i][1]);
3896 }
3897
3898 u_strToUTF8(U8Source, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenS, U16Source, U16LenS, &status);
3899 u_strToUTF8(U8Target, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenT, U16Target, U16LenT, &status);
3900
3901 uiter_setUTF8(&U8ItS, U8Source, U8LenS);
3902 uiter_setUTF8(&U8ItT, U8Target, U8LenT);
3903
3904 resU8 = ucol_strcollIter(coll, &U8ItS, &U8ItT, &status);
3905
3906 if(resU16 != resU8) {
3907 log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pairs[i][0], pairs[i][1]);
3908 }
3909
3910 }
3911
3912 ucol_close(coll);
3913 ucnv_close(conv);
3914 }
3915
3916 static void TestHebrewUCA(void) {
3917 UErrorCode status = U_ZERO_ERROR;
3918 static const char *first[] = {
3919 "d790d6b8d79cd795d6bcd7a9",
3920 "d790d79cd79ed7a7d799d799d7a1",
3921 "d790d6b4d79ed795d6bcd7a9",
3922 };
3923
3924 char utf8String[3][256];
3925 UChar utf16String[3][256];
3926
3927 int32_t i = 0, j = 0;
3928 int32_t sizeUTF8[3];
3929 int32_t sizeUTF16[3];
3930
3931 UCollator *coll = ucol_open("", &status);
3932 if (U_FAILURE(status)) {
3933 log_err_status(status, "Could not open UCA collation %s\n", u_errorName(status));
3934 return;
3935 }
3936 /*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/
3937
3938 for(i = 0; i < sizeof(first)/sizeof(first[0]); i++) {
3939 sizeUTF8[i] = u_parseUTF8(first[i], -1, utf8String[i], 256, &status);
3940 u_strFromUTF8(utf16String[i], 256, &sizeUTF16[i], utf8String[i], sizeUTF8[i], &status);
3941 log_verbose("%i: ");
3942 for(j = 0; j < sizeUTF16[i]; j++) {
3943 /*log_verbose("\\u%04X", utf16String[i][j]);*/
3944 log_verbose("%04X", utf16String[i][j]);
3945 }
3946 log_verbose("\n");
3947 }
3948 for(i = 0; i < sizeof(first)/sizeof(first[0])-1; i++) {
3949 for(j = i + 1; j < sizeof(first)/sizeof(first[0]); j++) {
3950 doTest(coll, utf16String[i], utf16String[j], UCOL_LESS);
3951 }
3952 }
3953
3954 ucol_close(coll);
3955
3956 }
3957
3958 static void TestPartialSortKeyTermination(void) {
3959 static const char* cases[] = {
3960 "\\u1234\\u1234\\udc00",
3961 "\\udc00\\ud800\\ud800"
3962 };
3963
3964 int32_t i = sizeof(UCollator);
3965
3966 UErrorCode status = U_ZERO_ERROR;
3967
3968 UCollator *coll = ucol_open("", &status);
3969
3970 UCharIterator iter;
3971
3972 UChar currCase[256];
3973 int32_t length = 0;
3974 int32_t pKeyLen = 0;
3975
3976 uint8_t key[256];
3977
3978 for(i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) {
3979 uint32_t state[2] = {0, 0};
3980 length = u_unescape(cases[i], currCase, 256);
3981 uiter_setString(&iter, currCase, length);
3982 pKeyLen = ucol_nextSortKeyPart(coll, &iter, state, key, 256, &status);
3983
3984 log_verbose("Done\n");
3985
3986 }
3987 ucol_close(coll);
3988 }
3989
3990 static void TestSettings(void) {
3991 static const char* cases[] = {
3992 "apple",
3993 "Apple"
3994 };
3995
3996 static const char* locales[] = {
3997 "",
3998 "en"
3999 };
4000
4001 UErrorCode status = U_ZERO_ERROR;
4002
4003 int32_t i = 0, j = 0;
4004
4005 UChar source[256], target[256];
4006 int32_t sLen = 0, tLen = 0;
4007
4008 UCollator *collateObject = NULL;
4009 for(i = 0; i < sizeof(locales)/sizeof(locales[0]); i++) {
4010 collateObject = ucol_open(locales[i], &status);
4011 ucol_setStrength(collateObject, UCOL_PRIMARY);
4012 ucol_setAttribute(collateObject, UCOL_CASE_LEVEL , UCOL_OFF, &status);
4013 for(j = 1; j < sizeof(cases)/sizeof(cases[0]); j++) {
4014 sLen = u_unescape(cases[j-1], source, 256);
4015 source[sLen] = 0;
4016 tLen = u_unescape(cases[j], target, 256);
4017 source[tLen] = 0;
4018 doTest(collateObject, source, target, UCOL_EQUAL);
4019 }
4020 ucol_close(collateObject);
4021 }
4022 }
4023
4024 static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCollator *target) {
4025 UErrorCode status = U_ZERO_ERROR;
4026 int32_t errorNo = 0;
4027 /*const UChar *sourceRules = NULL;*/
4028 /*int32_t sourceRulesLen = 0;*/
4029 UColAttributeValue french = UCOL_OFF;
4030 int32_t cloneSize = 0;
4031
4032 if(!ucol_equals(source, target)) {
4033 log_err("Same collators, different address not equal\n");
4034 errorNo++;
4035 }
4036 ucol_close(target);
4037 if(uprv_strcmp(ucol_getLocaleByType(source, ULOC_REQUESTED_LOCALE, &status), ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &status)) == 0) {
4038 /* currently, safeClone is implemented through getRules/openRules
4039 * so it is the same as the test below - I will comment that test out.
4040 */
4041 /* real thing */
4042 target = ucol_safeClone(source, NULL, &cloneSize, &status);
4043 if(U_FAILURE(status)) {
4044 log_err("Error creating clone\n");
4045 errorNo++;
4046 return errorNo;
4047 }
4048 if(!ucol_equals(source, target)) {
4049 log_err("Collator different from it's clone\n");
4050 errorNo++;
4051 }
4052 french = ucol_getAttribute(source, UCOL_FRENCH_COLLATION, &status);
4053 if(french == UCOL_ON) {
4054 ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);
4055 } else {
4056 ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
4057 }
4058 if(U_FAILURE(status)) {
4059 log_err("Error setting attributes\n");
4060 errorNo++;
4061 return errorNo;
4062 }
4063 if(ucol_equals(source, target)) {
4064 log_err("Collators same even when options changed\n");
4065 errorNo++;
4066 }
4067 ucol_close(target);
4068 /* commented out since safeClone uses exactly the same technique */
4069 /*
4070 sourceRules = ucol_getRules(source, &sourceRulesLen);
4071 target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
4072 if(U_FAILURE(status)) {
4073 log_err("Error instantiating target from rules\n");
4074 errorNo++;
4075 return errorNo;
4076 }
4077 if(!ucol_equals(source, target)) {
4078 log_err("Collator different from collator that was created from the same rules\n");
4079 errorNo++;
4080 }
4081 ucol_close(target);
4082 */
4083 }
4084 return errorNo;
4085 }
4086
4087
4088 static void TestEquals(void) {
4089 /* ucol_equals is not currently a public API. There is a chance that it will become
4090 * something like this, but currently it is only used by RuleBasedCollator::operator==
4091 */
4092 /* test whether the two collators instantiated from the same locale are equal */
4093 UErrorCode status = U_ZERO_ERROR;
4094 UParseError parseError;
4095 int32_t noOfLoc = uloc_countAvailable();
4096 const char *locName = NULL;
4097 UCollator *source = NULL, *target = NULL;
4098 int32_t i = 0;
4099
4100 const char* rules[] = {
4101 "&l < lj <<< Lj <<< LJ",
4102 "&n < nj <<< Nj <<< NJ",
4103 "&ae <<< \\u00e4",
4104 "&AE <<< \\u00c4"
4105 };
4106 /*
4107 const char* badRules[] = {
4108 "&l <<< Lj",
4109 "&n < nj <<< nJ <<< NJ",
4110 "&a <<< \\u00e4",
4111 "&AE <<< \\u00c4 <<< x"
4112 };
4113 */
4114
4115 UChar sourceRules[1024], targetRules[1024];
4116 int32_t sourceRulesSize = 0, targetRulesSize = 0;
4117 int32_t rulesSize = sizeof(rules)/sizeof(rules[0]);
4118
4119 for(i = 0; i < rulesSize; i++) {
4120 sourceRulesSize += u_unescape(rules[i], sourceRules+sourceRulesSize, 1024 - sourceRulesSize);
4121 targetRulesSize += u_unescape(rules[rulesSize-i-1], targetRules+targetRulesSize, 1024 - targetRulesSize);
4122 }
4123
4124 source = ucol_openRules(sourceRules, sourceRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
4125 if(status == U_FILE_ACCESS_ERROR) {
4126 log_data_err("Is your data around?\n");
4127 return;
4128 } else if(U_FAILURE(status)) {
4129 log_err("Error opening collator\n");
4130 return;
4131 }
4132 target = ucol_openRules(targetRules, targetRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
4133 if(!ucol_equals(source, target)) {
4134 log_err("Equivalent collators not equal!\n");
4135 }
4136 ucol_close(source);
4137 ucol_close(target);
4138
4139 source = ucol_open("root", &status);
4140 target = ucol_open("root", &status);
4141 log_verbose("Testing root\n");
4142 if(!ucol_equals(source, source)) {
4143 log_err("Same collator not equal\n");
4144 }
4145 if(TestEqualsForCollator(locName, source, target)) {
4146 log_err("Errors for root\n", locName);
4147 }
4148 ucol_close(source);
4149
4150 for(i = 0; i<noOfLoc; i++) {
4151 status = U_ZERO_ERROR;
4152 locName = uloc_getAvailable(i);
4153 /*if(hasCollationElements(locName)) {*/
4154 log_verbose("Testing equality for locale %s\n", locName);
4155 source = ucol_open(locName, &status);
4156 target = ucol_open(locName, &status);
4157 if (U_FAILURE(status)) {
4158 log_err("Error opening collator for locale %s %s\n", locName, u_errorName(status));
4159 continue;
4160 }
4161 if(TestEqualsForCollator(locName, source, target)) {
4162 log_err("Errors for locale %s\n", locName);
4163 }
4164 ucol_close(source);
4165 /*}*/
4166 }
4167 }
4168
4169 static void TestJ2726(void) {
4170 UChar a[2] = { 0x61, 0x00 }; /*"a"*/
4171 UChar aSpace[3] = { 0x61, 0x20, 0x00 }; /*"a "*/
4172 UChar spaceA[3] = { 0x20, 0x61, 0x00 }; /*" a"*/
4173 UErrorCode status = U_ZERO_ERROR;
4174 UCollator *coll = ucol_open("en", &status);
4175 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
4176 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
4177 doTest(coll, a, aSpace, UCOL_EQUAL);
4178 doTest(coll, aSpace, a, UCOL_EQUAL);
4179 doTest(coll, a, spaceA, UCOL_EQUAL);
4180 doTest(coll, spaceA, a, UCOL_EQUAL);
4181 doTest(coll, spaceA, aSpace, UCOL_EQUAL);
4182 doTest(coll, aSpace, spaceA, UCOL_EQUAL);
4183 ucol_close(coll);
4184 }
4185
4186 static void NullRule(void) {
4187 UChar r[3] = {0};
4188 UErrorCode status = U_ZERO_ERROR;
4189 UCollator *coll = ucol_openRules(r, 1, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
4190 if(U_SUCCESS(status)) {
4191 log_err("This should have been an error!\n");
4192 ucol_close(coll);
4193 } else {
4194 status = U_ZERO_ERROR;
4195 }
4196 coll = ucol_openRules(r, 0, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
4197 if(U_FAILURE(status)) {
4198 log_err_status(status, "Empty rules should have produced a valid collator -> %s\n", u_errorName(status));
4199 } else {
4200 ucol_close(coll);
4201 }
4202 }
4203
4204 /**
4205 * Test for CollationElementIterator previous and next for the whole set of
4206 * unicode characters with normalization on.
4207 */
4208 static void TestNumericCollation(void)
4209 {
4210 UErrorCode status = U_ZERO_ERROR;
4211
4212 const static char *basicTestStrings[]={
4213 "hello1",
4214 "hello2",
4215 "hello2002",
4216 "hello2003",
4217 "hello123456",
4218 "hello1234567",
4219 "hello10000000",
4220 "hello100000000",
4221 "hello1000000000",
4222 "hello10000000000",
4223 };
4224
4225 const static char *preZeroTestStrings[]={
4226 "avery10000",
4227 "avery010000",
4228 "avery0010000",
4229 "avery00010000",
4230 "avery000010000",
4231 "avery0000010000",
4232 "avery00000010000",
4233 "avery000000010000",
4234 };
4235
4236 const static char *thirtyTwoBitNumericStrings[]={
4237 "avery42949672960",
4238 "avery42949672961",
4239 "avery42949672962",
4240 "avery429496729610"
4241 };
4242
4243 const static char *longNumericStrings[]={
4244 /* Some of these sort out of the order that would expected if digits-as-numbers handled arbitrarily-long digit strings.
4245 In fact, a single collation element can represent a maximum of 254 digits as a number. Digit strings longer than that
4246 are treated as multiple collation elements. */
4247 "num9234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123z", /*253digits, num + 9.23E252 + z */
4248 "num10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*254digits, num + 1.00E253 */
4249 "num100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*255digits, num + 1.00E253 + 0, out of numeric order but expected */
4250 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 1.23E253 */
4251 "num123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345", /*255digits, num + 1.23E253 + 5 */
4252 "num1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456", /*256digits, num + 1.23E253 + 56 */
4253 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567", /*257digits, num + 1.23E253 + 567 */
4254 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 1.23E253 + a, out of numeric order but expected */
4255 "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 9.23E253, out of numeric order but expected */
4256 "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 9.23E253 + a, out of numeric order but expected */
4257 };
4258
4259 const static char *supplementaryDigits[] = {
4260 "\\uD835\\uDFCE", /* 0 */
4261 "\\uD835\\uDFCF", /* 1 */
4262 "\\uD835\\uDFD0", /* 2 */
4263 "\\uD835\\uDFD1", /* 3 */
4264 "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */
4265 "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */
4266 "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */
4267 "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */
4268 "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */
4269 "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */
4270 };
4271
4272 const static char *foreignDigits[] = {
4273 "\\u0661",
4274 "\\u0662",
4275 "\\u0663",
4276 "\\u0661\\u0660",
4277 "\\u0661\\u0662",
4278 "\\u0661\\u0663",
4279 "\\u0662\\u0660",
4280 "\\u0662\\u0662",
4281 "\\u0662\\u0663",
4282 "\\u0663\\u0660",
4283 "\\u0663\\u0662",
4284 "\\u0663\\u0663"
4285 };
4286
4287 const static char *evenZeroes[] = {
4288 "2000",
4289 "2001",
4290 "2002",
4291 "2003"
4292 };
4293
4294 UColAttribute att = UCOL_NUMERIC_COLLATION;
4295 UColAttributeValue val = UCOL_ON;
4296
4297 /* Open our collator. */
4298 UCollator* coll = ucol_open("root", &status);
4299 if (U_FAILURE(status)){
4300 log_err_status(status, "ERROR: in using ucol_open() -> %s\n",
4301 myErrorName(status));
4302 return;
4303 }
4304 genericLocaleStarterWithOptions("root", basicTestStrings, sizeof(basicTestStrings)/sizeof(basicTestStrings[0]), &att, &val, 1);
4305 genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings, sizeof(thirtyTwoBitNumericStrings)/sizeof(thirtyTwoBitNumericStrings[0]), &att, &val, 1);
4306 genericLocaleStarterWithOptions("root", longNumericStrings, sizeof(longNumericStrings)/sizeof(longNumericStrings[0]), &att, &val, 1);
4307 genericLocaleStarterWithOptions("en_US", foreignDigits, sizeof(foreignDigits)/sizeof(foreignDigits[0]), &att, &val, 1);
4308 genericLocaleStarterWithOptions("root", supplementaryDigits, sizeof(supplementaryDigits)/sizeof(supplementaryDigits[0]), &att, &val, 1);
4309 genericLocaleStarterWithOptions("root", evenZeroes, sizeof(evenZeroes)/sizeof(evenZeroes[0]), &att, &val, 1);
4310
4311 /* Setting up our collator to do digits. */
4312 ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
4313 if (U_FAILURE(status)){
4314 log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n",
4315 myErrorName(status));
4316 return;
4317 }
4318
4319 /*
4320 Testing that prepended zeroes still yield the correct collation behavior.
4321 We expect that every element in our strings array will be equal.
4322 */
4323 genericOrderingTestWithResult(coll, preZeroTestStrings, sizeof(preZeroTestStrings)/sizeof(preZeroTestStrings[0]), UCOL_EQUAL);
4324
4325 ucol_close(coll);
4326 }
4327
4328 static void TestTibetanConformance(void)
4329 {
4330 const char* test[] = {
4331 "\\u0FB2\\u0591\\u0F71\\u0061",
4332 "\\u0FB2\\u0F71\\u0061"
4333 };
4334
4335 UErrorCode status = U_ZERO_ERROR;
4336 UCollator *coll = ucol_open("", &status);
4337 UChar source[100];
4338 UChar target[100];
4339 int result;
4340 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4341 if (U_SUCCESS(status)) {
4342 u_unescape(test[0], source, 100);
4343 u_unescape(test[1], target, 100);
4344 doTest(coll, source, target, UCOL_EQUAL);
4345 result = ucol_strcoll(coll, source, -1, target, -1);
4346 log_verbose("result %d\n", result);
4347 if (UCOL_EQUAL != result) {
4348 log_err("Tibetan comparison error\n");
4349 }
4350 }
4351 ucol_close(coll);
4352
4353 genericLocaleStarterWithResult("", test, 2, UCOL_EQUAL);
4354 }
4355
4356 static void TestPinyinProblem(void) {
4357 static const char *test[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B50" };
4358 genericLocaleStarter("zh__PINYIN", test, sizeof(test)/sizeof(test[0]));
4359 }
4360
4361 #define TST_UCOL_MAX_INPUT 0x220001
4362 #define topByte 0xFF000000;
4363 #define bottomByte 0xFF;
4364 #define fourBytes 0xFFFFFFFF;
4365
4366
4367 static void showImplicit(UChar32 i) {
4368 if (i >= 0 && i <= TST_UCOL_MAX_INPUT) {
4369 log_verbose("%08X\t%08X\n", i, uprv_uca_getImplicitFromRaw(i));
4370 }
4371 }
4372
4373 static void TestImplicitGeneration(void) {
4374 UErrorCode status = U_ZERO_ERROR;
4375 UChar32 last = 0;
4376 UChar32 current;
4377 UChar32 i = 0, j = 0;
4378 UChar32 roundtrip = 0;
4379 UChar32 lastBottom = 0;
4380 UChar32 currentBottom = 0;
4381 UChar32 lastTop = 0;
4382 UChar32 currentTop = 0;
4383
4384 UCollator *coll = ucol_open("root", &status);
4385 if(U_FAILURE(status)) {
4386 log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
4387 return;
4388 }
4389
4390 uprv_uca_getRawFromImplicit(0xE20303E7);
4391
4392 for (i = 0; i <= TST_UCOL_MAX_INPUT; ++i) {
4393 current = uprv_uca_getImplicitFromRaw(i) & fourBytes;
4394
4395 /* check that it round-trips AND that all intervening ones are illegal*/
4396 roundtrip = uprv_uca_getRawFromImplicit(current);
4397 if (roundtrip != i) {
4398 log_err("No roundtrip %08X\n", i);
4399 }
4400 if (last != 0) {
4401 for (j = last + 1; j < current; ++j) {
4402 roundtrip = uprv_uca_getRawFromImplicit(j);
4403 /* raise an error if it *doesn't* find an error*/
4404 if (roundtrip != -1) {
4405 log_err("Fails to recognize illegal %08X\n", j);
4406 }
4407 }
4408 }
4409 /* now do other consistency checks*/
4410 lastBottom = last & bottomByte;
4411 currentBottom = current & bottomByte;
4412 lastTop = last & topByte;
4413 currentTop = current & topByte;
4414
4415 /* print out some values for spot-checking*/
4416 if (lastTop != currentTop || i == 0x10000 || i == 0x110000) {
4417 showImplicit(i-3);
4418 showImplicit(i-2);
4419 showImplicit(i-1);
4420 showImplicit(i);
4421 showImplicit(i+1);
4422 showImplicit(i+2);
4423 }
4424 last = current;
4425
4426 if(uprv_uca_getCodePointFromRaw(uprv_uca_getRawFromCodePoint(i)) != i) {
4427 log_err("No raw <-> code point roundtrip for 0x%08X\n", i);
4428 }
4429 }
4430 showImplicit(TST_UCOL_MAX_INPUT-2);
4431 showImplicit(TST_UCOL_MAX_INPUT-1);
4432 showImplicit(TST_UCOL_MAX_INPUT);
4433 ucol_close(coll);
4434 }
4435
4436 /**
4437 * Iterate through the given iterator, checking to see that all the strings
4438 * in the expected array are present.
4439 * @param expected array of strings we expect to see, or NULL
4440 * @param expectedCount number of elements of expected, or 0
4441 */
4442 static int32_t checkUEnumeration(const char* msg,
4443 UEnumeration* iter,
4444 const char** expected,
4445 int32_t expectedCount) {
4446 UErrorCode ec = U_ZERO_ERROR;
4447 int32_t i = 0, n, j, bit;
4448 int32_t seenMask = 0;
4449
4450 U_ASSERT(expectedCount >= 0 && expectedCount < 31); /* [sic] 31 not 32 */
4451 n = uenum_count(iter, &ec);
4452 if (!assertSuccess("count", &ec)) return -1;
4453 log_verbose("%s = [", msg);
4454 for (;; ++i) {
4455 const char* s = uenum_next(iter, NULL, &ec);
4456 if (!assertSuccess("snext", &ec) || s == NULL) break;
4457 if (i != 0) log_verbose(",");
4458 log_verbose("%s", s);
4459 /* check expected list */
4460 for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
4461 if ((seenMask&bit) == 0 &&
4462 uprv_strcmp(s, expected[j]) == 0) {
4463 seenMask |= bit;
4464 break;
4465 }
4466 }
4467 }
4468 log_verbose("] (%d)\n", i);
4469 assertTrue("count verified", i==n);
4470 /* did we see all expected strings? */
4471 for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
4472 if ((seenMask&bit)!=0) {
4473 log_verbose("Ok: \"%s\" seen\n", expected[j]);
4474 } else {
4475 log_err("FAIL: \"%s\" not seen\n", expected[j]);
4476 }
4477 }
4478 return n;
4479 }
4480
4481 /**
4482 * Test new API added for separate collation tree.
4483 */
4484 static void TestSeparateTrees(void) {
4485 UErrorCode ec = U_ZERO_ERROR;
4486 UEnumeration *e = NULL;
4487 int32_t n = -1;
4488 UBool isAvailable;
4489 char loc[256];
4490
4491 static const char* AVAIL[] = { "en", "de" };
4492
4493 static const char* KW[] = { "collation" };
4494
4495 static const char* KWVAL[] = { "phonebook", "stroke" };
4496
4497 #if !UCONFIG_NO_SERVICE
4498 e = ucol_openAvailableLocales(&ec);
4499 if (e != NULL) {
4500 assertSuccess("ucol_openAvailableLocales", &ec);
4501 assertTrue("ucol_openAvailableLocales!=0", e!=0);
4502 n = checkUEnumeration("ucol_openAvailableLocales", e, AVAIL, LEN(AVAIL));
4503 /* Don't need to check n because we check list */
4504 uenum_close(e);
4505 } else {
4506 log_data_err("Error calling ucol_openAvailableLocales() -> %s (Are you missing data?)\n", u_errorName(ec));
4507 }
4508 #endif
4509
4510 e = ucol_getKeywords(&ec);
4511 if (e != NULL) {
4512 assertSuccess("ucol_getKeywords", &ec);
4513 assertTrue("ucol_getKeywords!=0", e!=0);
4514 n = checkUEnumeration("ucol_getKeywords", e, KW, LEN(KW));
4515 /* Don't need to check n because we check list */
4516 uenum_close(e);
4517 } else {
4518 log_data_err("Error calling ucol_getKeywords() -> %s (Are you missing data?)\n", u_errorName(ec));
4519 }
4520
4521 e = ucol_getKeywordValues(KW[0], &ec);
4522 if (e != NULL) {
4523 assertSuccess("ucol_getKeywordValues", &ec);
4524 assertTrue("ucol_getKeywordValues!=0", e!=0);
4525 n = checkUEnumeration("ucol_getKeywordValues", e, KWVAL, LEN(KWVAL));
4526 /* Don't need to check n because we check list */
4527 uenum_close(e);
4528 } else {
4529 log_data_err("Error calling ucol_getKeywordValues() -> %s (Are you missing data?)\n", u_errorName(ec));
4530 }
4531
4532 /* Try setting a warning before calling ucol_getKeywordValues */
4533 ec = U_USING_FALLBACK_WARNING;
4534 e = ucol_getKeywordValues(KW[0], &ec);
4535 if (assertSuccess("ucol_getKeywordValues [with warning code set]", &ec)) {
4536 assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e!=0);
4537 n = checkUEnumeration("ucol_getKeywordValues [with warning code set]", e, KWVAL, LEN(KWVAL));
4538 /* Don't need to check n because we check list */
4539 uenum_close(e);
4540 }
4541
4542 /*
4543 U_DRAFT int32_t U_EXPORT2
4544 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
4545 const char* locale, UBool* isAvailable,
4546 UErrorCode* status);
4547 }
4548 */
4549 n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de",
4550 &isAvailable, &ec);
4551 if (assertSuccess("getFunctionalEquivalent", &ec)) {
4552 assertEquals("getFunctionalEquivalent(de)", "de", loc);
4553 assertTrue("getFunctionalEquivalent(de).isAvailable==TRUE",
4554 isAvailable == TRUE);
4555 }
4556
4557 n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de_DE",
4558 &isAvailable, &ec);
4559 if (assertSuccess("getFunctionalEquivalent", &ec)) {
4560 assertEquals("getFunctionalEquivalent(de_DE)", "de", loc);
4561 assertTrue("getFunctionalEquivalent(de_DE).isAvailable==TRUE",
4562 isAvailable == TRUE);
4563 }
4564 }
4565
4566 /* supercedes TestJ784 */
4567 static void TestBeforePinyin(void) {
4568 const static char rules[] = {
4569 "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<\\u00E0<<<\\u00C0"
4570 "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<\\u00E8<<<\\u00C8"
4571 "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<\\u00EC<<<\\u00CC"
4572 "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<\\u00F2<<<\\u00D2"
4573 "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<<\\u00F9<<<\\u00D9"
4574 "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<<\\u01DB<<\\u00FC"
4575 };
4576
4577 const static char *test[] = {
4578 "l\\u0101",
4579 "la",
4580 "l\\u0101n",
4581 "lan ",
4582 "l\\u0113",
4583 "le",
4584 "l\\u0113n",
4585 "len"
4586 };
4587
4588 const static char *test2[] = {
4589 "x\\u0101",
4590 "x\\u0100",
4591 "X\\u0101",
4592 "X\\u0100",
4593 "x\\u00E1",
4594 "x\\u00C1",
4595 "X\\u00E1",
4596 "X\\u00C1",
4597 "x\\u01CE",
4598 "x\\u01CD",
4599 "X\\u01CE",
4600 "X\\u01CD",
4601 "x\\u00E0",
4602 "x\\u00C0",
4603 "X\\u00E0",
4604 "X\\u00C0",
4605 "xa",
4606 "xA",
4607 "Xa",
4608 "XA",
4609 "x\\u0101x",
4610 "x\\u0100x",
4611 "x\\u00E1x",
4612 "x\\u00C1x",
4613 "x\\u01CEx",
4614 "x\\u01CDx",
4615 "x\\u00E0x",
4616 "x\\u00C0x",
4617 "xax",
4618 "xAx"
4619 };
4620
4621 genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));
4622 genericLocaleStarter("zh", test, sizeof(test)/sizeof(test[0]));
4623 genericRulesStarter(rules, test2, sizeof(test2)/sizeof(test2[0]));
4624 genericLocaleStarter("zh", test2, sizeof(test2)/sizeof(test2[0]));
4625 }
4626
4627 static void TestBeforeTightening(void) {
4628 static const struct {
4629 const char *rules;
4630 UErrorCode expectedStatus;
4631 } tests[] = {
4632 { "&[before 1]a<x", U_ZERO_ERROR },
4633 { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR },
4634 { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR },
4635 { "&[before 1]a=x", U_INVALID_FORMAT_ERROR },
4636 { "&[before 2]a<x",U_INVALID_FORMAT_ERROR },
4637 { "&[before 2]a<<x",U_ZERO_ERROR },
4638 { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR },
4639 { "&[before 2]a=x",U_INVALID_FORMAT_ERROR },
4640 { "&[before 3]a<x",U_INVALID_FORMAT_ERROR },
4641 { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR },
4642 { "&[before 3]a<<<x",U_ZERO_ERROR },
4643 { "&[before 3]a=x",U_INVALID_FORMAT_ERROR },
4644 { "&[before I]a = x",U_INVALID_FORMAT_ERROR }
4645 };
4646
4647 int32_t i = 0;
4648
4649 UErrorCode status = U_ZERO_ERROR;
4650 UChar rlz[RULE_BUFFER_LEN] = { 0 };
4651 uint32_t rlen = 0;
4652
4653 UCollator *coll = NULL;
4654
4655
4656 for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
4657 rlen = u_unescape(tests[i].rules, rlz, RULE_BUFFER_LEN);
4658 coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
4659 if(status != tests[i].expectedStatus) {
4660 log_err_status(status, "Opening a collator with rules %s returned error code %s, expected %s\n",
4661 tests[i].rules, u_errorName(status), u_errorName(tests[i].expectedStatus));
4662 }
4663 ucol_close(coll);
4664 status = U_ZERO_ERROR;
4665 }
4666
4667 }
4668
4669 #if 0
4670 &m < a
4671 &[before 1] a < x <<< X << q <<< Q < z
4672 assert: m <<< M < x <<< X << q <<< Q < z < a < n
4673
4674 &m < a
4675 &[before 2] a << x <<< X << q <<< Q < z
4676 assert: m <<< M < x <<< X << q <<< Q << a < z < n
4677
4678 &m < a
4679 &[before 3] a <<< x <<< X << q <<< Q < z
4680 assert: m <<< M < x <<< X <<< a << q <<< Q < z < n
4681
4682
4683 &m << a
4684 &[before 1] a < x <<< X << q <<< Q < z
4685 assert: x <<< X << q <<< Q < z < m <<< M << a < n
4686
4687 &m << a
4688 &[before 2] a << x <<< X << q <<< Q < z
4689 assert: m <<< M << x <<< X << q <<< Q << a < z < n
4690
4691 &m << a
4692 &[before 3] a <<< x <<< X << q <<< Q < z
4693 assert: m <<< M << x <<< X <<< a << q <<< Q < z < n
4694
4695
4696 &m <<< a
4697 &[before 1] a < x <<< X << q <<< Q < z
4698 assert: x <<< X << q <<< Q < z < n < m <<< a <<< M
4699
4700 &m <<< a
4701 &[before 2] a << x <<< X << q <<< Q < z
4702 assert: x <<< X << q <<< Q << m <<< a <<< M < z < n
4703
4704 &m <<< a
4705 &[before 3] a <<< x <<< X << q <<< Q < z
4706 assert: m <<< x <<< X <<< a <<< M << q <<< Q < z < n
4707
4708
4709 &[before 1] s < x <<< X << q <<< Q < z
4710 assert: r <<< R < x <<< X << q <<< Q < z < s < n
4711
4712 &[before 2] s << x <<< X << q <<< Q < z
4713 assert: r <<< R < x <<< X << q <<< Q << s < z < n
4714
4715 &[before 3] s <<< x <<< X << q <<< Q < z
4716 assert: r <<< R < x <<< X <<< s << q <<< Q < z < n
4717
4718
4719 &[before 1] \u24DC < x <<< X << q <<< Q < z
4720 assert: x <<< X << q <<< Q < z < n < m <<< \u24DC <<< M
4721
4722 &[before 2] \u24DC << x <<< X << q <<< Q < z
4723 assert: x <<< X << q <<< Q << m <<< \u24DC <<< M < z < n
4724
4725 &[before 3] \u24DC <<< x <<< X << q <<< Q < z
4726 assert: m <<< x <<< X <<< \u24DC <<< M << q <<< Q < z < n
4727 #endif
4728
4729
4730 #if 0
4731 /* requires features not yet supported */
4732 static void TestMoreBefore(void) {
4733 static const struct {
4734 const char* rules;
4735 const char* order[16];
4736 int32_t size;
4737 } tests[] = {
4738 { "&m < a &[before 1] a < x <<< X << q <<< Q < z",
4739 { "m","M","x","X","q","Q","z","a","n" }, 9},
4740 { "&m < a &[before 2] a << x <<< X << q <<< Q < z",
4741 { "m","M","x","X","q","Q","a","z","n" }, 9},
4742 { "&m < a &[before 3] a <<< x <<< X << q <<< Q < z",
4743 { "m","M","x","X","a","q","Q","z","n" }, 9},
4744 { "&m << a &[before 1] a < x <<< X << q <<< Q < z",
4745 { "x","X","q","Q","z","m","M","a","n" }, 9},
4746 { "&m << a &[before 2] a << x <<< X << q <<< Q < z",
4747 { "m","M","x","X","q","Q","a","z","n" }, 9},
4748 { "&m << a &[before 3] a <<< x <<< X << q <<< Q < z",
4749 { "m","M","x","X","a","q","Q","z","n" }, 9},
4750 { "&m <<< a &[before 1] a < x <<< X << q <<< Q < z",
4751 { "x","X","q","Q","z","n","m","a","M" }, 9},
4752 { "&m <<< a &[before 2] a << x <<< X << q <<< Q < z",
4753 { "x","X","q","Q","m","a","M","z","n" }, 9},
4754 { "&m <<< a &[before 3] a <<< x <<< X << q <<< Q < z",
4755 { "m","x","X","a","M","q","Q","z","n" }, 9},
4756 { "&[before 1] s < x <<< X << q <<< Q < z",
4757 { "r","R","x","X","q","Q","z","s","n" }, 9},
4758 { "&[before 2] s << x <<< X << q <<< Q < z",
4759 { "r","R","x","X","q","Q","s","z","n" }, 9},
4760 { "&[before 3] s <<< x <<< X << q <<< Q < z",
4761 { "r","R","x","X","s","q","Q","z","n" }, 9},
4762 { "&[before 1] \\u24DC < x <<< X << q <<< Q < z",
4763 { "x","X","q","Q","z","n","m","\\u24DC","M" }, 9},
4764 { "&[before 2] \\u24DC << x <<< X << q <<< Q < z",
4765 { "x","X","q","Q","m","\\u24DC","M","z","n" }, 9},
4766 { "&[before 3] \\u24DC <<< x <<< X << q <<< Q < z",
4767 { "m","x","X","\\u24DC","M","q","Q","z","n" }, 9}
4768 };
4769
4770 int32_t i = 0;
4771
4772 for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
4773 genericRulesStarter(tests[i].rules, tests[i].order, tests[i].size);
4774 }
4775 }
4776 #endif
4777
4778 static void TestTailorNULL( void ) {
4779 const static char* rule = "&a <<< '\\u0000'";
4780 UErrorCode status = U_ZERO_ERROR;
4781 UChar rlz[RULE_BUFFER_LEN] = { 0 };
4782 uint32_t rlen = 0;
4783 UChar a = 1, null = 0;
4784 UCollationResult res = UCOL_EQUAL;
4785
4786 UCollator *coll = NULL;
4787
4788
4789 rlen = u_unescape(rule, rlz, RULE_BUFFER_LEN);
4790 coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
4791
4792 if(U_FAILURE(status)) {
4793 log_err_status(status, "Could not open default collator! -> %s\n", u_errorName(status));
4794 } else {
4795 res = ucol_strcoll(coll, &a, 1, &null, 1);
4796
4797 if(res != UCOL_LESS) {
4798 log_err("NULL was not tailored properly!\n");
4799 }
4800 }
4801
4802 ucol_close(coll);
4803 }
4804
4805 static void
4806 TestUpperFirstQuaternary(void)
4807 {
4808 const char* tests[] = { "B", "b", "Bb", "bB" };
4809 UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_FIRST };
4810 UColAttributeValue attVals[] = { UCOL_QUATERNARY, UCOL_UPPER_FIRST };
4811 genericLocaleStarterWithOptions("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]));
4812 }
4813
4814 static void
4815 TestJ4960(void)
4816 {
4817 const char* tests[] = { "\\u00e2T", "aT" };
4818 UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_LEVEL };
4819 UColAttributeValue attVals[] = { UCOL_PRIMARY, UCOL_ON };
4820 const char* tests2[] = { "a", "A" };
4821 const char* rule = "&[first tertiary ignorable]=A=a";
4822 UColAttribute att2[] = { UCOL_CASE_LEVEL };
4823 UColAttributeValue attVals2[] = { UCOL_ON };
4824 /* Test whether we correctly ignore primary ignorables on case level when */
4825 /* we have only primary & case level */
4826 genericLocaleStarterWithOptionsAndResult("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]), UCOL_EQUAL);
4827 /* Test whether ICU4J will make case level for sortkeys that have primary strength */
4828 /* and case level */
4829 genericLocaleStarterWithOptions("root", tests2, sizeof(tests2)/sizeof(tests2[0]), att, attVals, sizeof(att)/sizeof(att[0]));
4830 /* Test whether completely ignorable letters have case level info (they shouldn't) */
4831 genericRulesStarterWithOptionsAndResult(rule, tests2, sizeof(tests2)/sizeof(tests2[0]), att2, attVals2, sizeof(att2)/sizeof(att2[0]), UCOL_EQUAL);
4832 }
4833
4834 static void
4835 TestJ5223(void)
4836 {
4837 static const char *test = "this is a test string";
4838 UChar ustr[256];
4839 int32_t ustr_length = u_unescape(test, ustr, 256);
4840 unsigned char sortkey[256];
4841 int32_t sortkey_length;
4842 UErrorCode status = U_ZERO_ERROR;
4843 static UCollator *coll = NULL;
4844 coll = ucol_open("root", &status);
4845 if(U_FAILURE(status)) {
4846 log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
4847 return;
4848 }
4849 ucol_setStrength(coll, UCOL_PRIMARY);
4850 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
4851 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4852 if (U_FAILURE(status)) {
4853 log_err("Failed setting atributes\n");
4854 return;
4855 }
4856 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, NULL, 0);
4857 if (sortkey_length > 256) return;
4858
4859 /* we mark the position where the null byte should be written in advance */
4860 sortkey[sortkey_length-1] = 0xAA;
4861
4862 /* we set the buffer size one byte higher than needed */
4863 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
4864 sortkey_length+1);
4865
4866 /* no error occurs (for me) */
4867 if (sortkey[sortkey_length-1] == 0xAA) {
4868 log_err("Hit bug at first try\n");
4869 }
4870
4871 /* we mark the position where the null byte should be written again */
4872 sortkey[sortkey_length-1] = 0xAA;
4873
4874 /* this time we set the buffer size to the exact amount needed */
4875 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
4876 sortkey_length);
4877
4878 /* now the trailing null byte is not written */
4879 if (sortkey[sortkey_length-1] == 0xAA) {
4880 log_err("Hit bug at second try\n");
4881 }
4882
4883 ucol_close(coll);
4884 }
4885
4886 /* Regression test for Thai partial sort key problem */
4887 static void
4888 TestJ5232(void)
4889 {
4890 const static char *test[] = {
4891 "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e47\\u0e21",
4892 "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e48\\u0e21"
4893 };
4894
4895 genericLocaleStarter("th", test, sizeof(test)/sizeof(test[0]));
4896 }
4897
4898 static void
4899 TestJ5367(void)
4900 {
4901 const static char *test[] = { "a", "y" };
4902 const char* rules = "&Ny << Y &[first secondary ignorable] <<< a";
4903 genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));
4904 }
4905
4906 static void
4907 TestVI5913(void)
4908 {
4909 UErrorCode status = U_ZERO_ERROR;
4910 int32_t i, j;
4911 UCollator *coll =NULL;
4912 uint8_t resColl[100], expColl[100];
4913 int32_t rLen, tLen, ruleLen, sLen, kLen;
4914 UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0}; /* &a<0x1FF3-omega with Ypogegrammeni*/
4915 UChar rule2[256]={0x26, 0x7a, 0x3c, 0x0161, 0}; /* &z<s with caron*/
4916 UChar rule3[256]={0x26, 0x7a, 0x3c, 0x0061, 0x00ea, 0}; /* &z<a+e with circumflex.*/
4917 static const UChar tData[][20]={
4918 {0x1EAC, 0},
4919 {0x0041, 0x0323, 0x0302, 0},
4920 {0x1EA0, 0x0302, 0},
4921 {0x00C2, 0x0323, 0},
4922 {0x1ED8, 0}, /* O with dot and circumflex */
4923 {0x1ECC, 0x0302, 0},
4924 {0x1EB7, 0},
4925 {0x1EA1, 0x0306, 0},
4926 };
4927 static const UChar tailorData[][20]={
4928 {0x1FA2, 0}, /* Omega with 3 combining marks */
4929 {0x03C9, 0x0313, 0x0300, 0x0345, 0},
4930 {0x1FF3, 0x0313, 0x0300, 0},
4931 {0x1F60, 0x0300, 0x0345, 0},
4932 {0x1F62, 0x0345, 0},
4933 {0x1FA0, 0x0300, 0},
4934 };
4935 static const UChar tailorData2[][20]={
4936 {0x1E63, 0x030C, 0}, /* s with dot below + caron */
4937 {0x0073, 0x0323, 0x030C, 0},
4938 {0x0073, 0x030C, 0x0323, 0},
4939 };
4940 static const UChar tailorData3[][20]={
4941 {0x007a, 0}, /* z */
4942 {0x0061, 0x0065, 0}, /* a + e */
4943 {0x0061, 0x00ea, 0}, /* a + e with circumflex */
4944 {0x0061, 0x1EC7, 0}, /* a+ e with dot below and circumflex */
4945 {0x0061, 0x1EB9, 0x0302, 0}, /* a + e with dot below + combining circumflex */
4946 {0x0061, 0x00EA, 0x0323, 0}, /* a + e with circumflex + combining dot below */
4947 {0x00EA, 0x0323, 0}, /* e with circumflex + combining dot below */
4948 {0x00EA, 0}, /* e with circumflex */
4949 };
4950
4951 /* Test Vietnamese sort. */
4952 coll = ucol_open("vi", &status);
4953 if(U_FAILURE(status)) {
4954 log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
4955 return;
4956 }
4957 log_verbose("\n\nVI collation:");
4958 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[2], u_strlen(tData[2])) ) {
4959 log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
4960 }
4961 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[3], u_strlen(tData[3])) ) {
4962 log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
4963 }
4964 if ( !ucol_equal(coll, tData[5], u_strlen(tData[5]), tData[4], u_strlen(tData[4])) ) {
4965 log_err("\\u1ED8 not equals to \\u1ECC+\\u0302\n");
4966 }
4967 if ( !ucol_equal(coll, tData[7], u_strlen(tData[7]), tData[6], u_strlen(tData[6])) ) {
4968 log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
4969 }
4970
4971 for (j=0; j<8; j++) {
4972 tLen = u_strlen(tData[j]);
4973 log_verbose("\n Data :%s \tlen: %d key: ", tData[j], tLen);
4974 rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
4975 for(i = 0; i<rLen; i++) {
4976 log_verbose(" %02X", resColl[i]);
4977 }
4978 }
4979
4980 ucol_close(coll);
4981
4982 /* Test Romanian sort. */
4983 coll = ucol_open("ro", &status);
4984 log_verbose("\n\nRO collation:");
4985 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[1], u_strlen(tData[1])) ) {
4986 log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
4987 }
4988 if ( !ucol_equal(coll, tData[4], u_strlen(tData[4]), tData[5], u_strlen(tData[5])) ) {
4989 log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
4990 }
4991 if ( !ucol_equal(coll, tData[6], u_strlen(tData[6]), tData[7], u_strlen(tData[7])) ) {
4992 log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
4993 }
4994
4995 for (j=4; j<8; j++) {
4996 tLen = u_strlen(tData[j]);
4997 log_verbose("\n Data :%s \tlen: %d key: ", tData[j], tLen);
4998 rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
4999 for(i = 0; i<rLen; i++) {
5000 log_verbose(" %02X", resColl[i]);
5001 }
5002 }
5003 ucol_close(coll);
5004
5005 /* Test the precomposed Greek character with 3 combining marks. */
5006 log_verbose("\n\nTailoring test: Greek character with 3 combining marks");
5007 ruleLen = u_strlen(rule);
5008 coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5009 if (U_FAILURE(status)) {
5010 log_err("ucol_openRules failed with %s\n", u_errorName(status));
5011 return;
5012 }
5013 sLen = u_strlen(tailorData[0]);
5014 for (j=1; j<6; j++) {
5015 tLen = u_strlen(tailorData[j]);
5016 if ( !ucol_equal(coll, tailorData[0], sLen, tailorData[j], tLen)) {
5017 log_err("\n \\u1FA2 not equals to data[%d]:%s\n", j, tailorData[j]);
5018 }
5019 }
5020 /* Test getSortKey. */
5021 tLen = u_strlen(tailorData[0]);
5022 kLen=ucol_getSortKey(coll, tailorData[0], tLen, expColl, 100);
5023 for (j=0; j<6; j++) {
5024 tLen = u_strlen(tailorData[j]);
5025 rLen = ucol_getSortKey(coll, tailorData[j], tLen, resColl, 100);
5026 if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
5027 log_err("\n Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen);
5028 for(i = 0; i<rLen; i++) {
5029 log_err(" %02X", resColl[i]);
5030 }
5031 }
5032 }
5033 ucol_close(coll);
5034
5035 log_verbose("\n\nTailoring test for s with caron:");
5036 ruleLen = u_strlen(rule2);
5037 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5038 tLen = u_strlen(tailorData2[0]);
5039 kLen=ucol_getSortKey(coll, tailorData2[0], tLen, expColl, 100);
5040 for (j=1; j<3; j++) {
5041 tLen = u_strlen(tailorData2[j]);
5042 rLen = ucol_getSortKey(coll, tailorData2[j], tLen, resColl, 100);
5043 if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
5044 log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen);
5045 for(i = 0; i<rLen; i++) {
5046 log_err(" %02X", resColl[i]);
5047 }
5048 }
5049 }
5050 ucol_close(coll);
5051
5052 log_verbose("\n\nTailoring test for &z< ae with circumflex:");
5053 ruleLen = u_strlen(rule3);
5054 coll = ucol_openRules(rule3, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5055 tLen = u_strlen(tailorData3[3]);
5056 kLen=ucol_getSortKey(coll, tailorData3[3], tLen, expColl, 100);
5057 for (j=4; j<6; j++) {
5058 tLen = u_strlen(tailorData3[j]);
5059 rLen = ucol_getSortKey(coll, tailorData3[j], tLen, resColl, 100);
5060
5061 if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
5062 log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen);
5063 for(i = 0; i<rLen; i++) {
5064 log_err(" %02X", resColl[i]);
5065 }
5066 }
5067
5068 log_verbose("\n Test Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen);
5069 for(i = 0; i<rLen; i++) {
5070 log_verbose(" %02X", resColl[i]);
5071 }
5072 }
5073 ucol_close(coll);
5074 }
5075
5076 static void
5077 TestTailor6179(void)
5078 {
5079 UErrorCode status = U_ZERO_ERROR;
5080 int32_t i;
5081 UCollator *coll =NULL;
5082 uint8_t resColl[100];
5083 int32_t rLen, tLen, ruleLen;
5084 /* &[last primary ignorable]<< a &[first primary ignorable]<<b */
5085 UChar rule1[256]={0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,
5086 0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x20,0x61,0x20,
5087 0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,0x20,
5088 0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x62,0x20, 0};
5089 /* &[last secondary ignorable]<<< a &[first secondary ignorable]<<<b */
5090 UChar rule2[256]={0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,0x64,0x61,
5091 0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x3C,
5092 0x61,0x20,0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,
5093 0x64,0x61,0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,
5094 0x3C,0x3C,0x20,0x62,0};
5095
5096 UChar tData1[][20]={
5097 {0x61, 0},
5098 {0x62, 0},
5099 { 0xFDD0,0x009E, 0}
5100 };
5101 UChar tData2[][20]={
5102 {0x61, 0},
5103 {0x62, 0},
5104 { 0xFDD0,0x009E, 0}
5105 };
5106
5107 /*
5108 * These values from FractionalUCA.txt will change,
5109 * and need to be updated here.
5110 */
5111 uint8_t firstPrimaryIgnCE[6]={1, 87, 1, 5, 1, 0};
5112 uint8_t lastPrimaryIgnCE[6]={1, 0xE3, 0xC9, 1, 5, 0};
5113 uint8_t firstSecondaryIgnCE[6]={1, 1, 0x3f, 0x03, 0};
5114 uint8_t lastSecondaryIgnCE[6]={1, 1, 0x3f, 0x03, 0};
5115
5116 /* Test [Last Primary ignorable] */
5117
5118 log_verbose("\n\nTailoring test: &[last primary ignorable]<<a &[first primary ignorable]<<b ");
5119 ruleLen = u_strlen(rule1);
5120 coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5121 if (U_FAILURE(status)) {
5122 log_err_status(status, "Tailoring test: &[last primary ignorable] failed! -> %s\n", u_errorName(status));
5123 return;
5124 }
5125 tLen = u_strlen(tData1[0]);
5126 rLen = ucol_getSortKey(coll, tData1[0], tLen, resColl, 100);
5127 if (uprv_memcmp(resColl, lastPrimaryIgnCE, uprv_min(rLen,6)) < 0) {
5128 log_err("\n Data[%d] :%s \tlen: %d key: ", 0, tData1[0], rLen);
5129 for(i = 0; i<rLen; i++) {
5130 log_err(" %02X", resColl[i]);
5131 }
5132 }
5133 tLen = u_strlen(tData1[1]);
5134 rLen = ucol_getSortKey(coll, tData1[1], tLen, resColl, 100);
5135 if (uprv_memcmp(resColl, firstPrimaryIgnCE, uprv_min(rLen, 6)) < 0) {
5136 log_err("\n Data[%d] :%s \tlen: %d key: ", 1, tData1[1], rLen);
5137 for(i = 0; i<rLen; i++) {
5138 log_err(" %02X", resColl[i]);
5139 }
5140 }
5141 ucol_close(coll);
5142
5143
5144 /* Test [Last Secondary ignorable] */
5145 log_verbose("\n\nTailoring test: &[last secondary ignorable]<<<a &[first secondary ignorable]<<<b ");
5146 ruleLen = u_strlen(rule1);
5147 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5148 if (U_FAILURE(status)) {
5149 log_err("Tailoring test: &[last primary ignorable] failed!");
5150 return;
5151 }
5152 tLen = u_strlen(tData2[0]);
5153 rLen = ucol_getSortKey(coll, tData2[0], tLen, resColl, 100);
5154 log_verbose("\n Data[%d] :%s \tlen: %d key: ", 0, tData2[0], rLen);
5155 for(i = 0; i<rLen; i++) {
5156 log_verbose(" %02X", resColl[i]);
5157 }
5158 if (uprv_memcmp(resColl, lastSecondaryIgnCE, uprv_min(rLen, 3)) < 0) {
5159 log_err("\n Data[%d] :%s \tlen: %d key: ", 0, tData2[0], rLen);
5160 for(i = 0; i<rLen; i++) {
5161 log_err(" %02X", resColl[i]);
5162 }
5163 }
5164 tLen = u_strlen(tData2[1]);
5165 rLen = ucol_getSortKey(coll, tData2[1], tLen, resColl, 100);
5166 log_verbose("\n Data[%d] :%s \tlen: %d key: ", 1, tData2[1], rLen);
5167 for(i = 0; i<rLen; i++) {
5168 log_verbose(" %02X", resColl[i]);
5169 }
5170 if (uprv_memcmp(resColl, firstSecondaryIgnCE, uprv_min(rLen, 4)) < 0) {
5171 log_err("\n Data[%d] :%s \tlen: %d key: ", 1, tData2[1], rLen);
5172 for(i = 0; i<rLen; i++) {
5173 log_err(" %02X", resColl[i]);
5174 }
5175 }
5176 ucol_close(coll);
5177 }
5178
5179 static void
5180 TestUCAPrecontext(void)
5181 {
5182 UErrorCode status = U_ZERO_ERROR;
5183 int32_t i, j;
5184 UCollator *coll =NULL;
5185 uint8_t resColl[100], prevColl[100];
5186 int32_t rLen, tLen, ruleLen;
5187 UChar rule1[256]= {0x26, 0xb7, 0x3c, 0x61, 0}; /* & middle-dot < a */
5188 UChar rule2[256]= {0x26, 0x4C, 0xb7, 0x3c, 0x3c, 0x61, 0};
5189 /* & l middle-dot << a a is an expansion. */
5190
5191 UChar tData1[][20]={
5192 { 0xb7, 0}, /* standalone middle dot(0xb7) */
5193 { 0x387, 0}, /* standalone middle dot(0x387) */
5194 { 0x61, 0}, /* a */
5195 { 0x6C, 0}, /* l */
5196 { 0x4C, 0x0332, 0}, /* l with [first primary ignorable] */
5197 { 0x6C, 0xb7, 0}, /* l with middle dot(0xb7) */
5198 { 0x6C, 0x387, 0}, /* l with middle dot(0x387) */
5199 { 0x4C, 0xb7, 0}, /* L with middle dot(0xb7) */
5200 { 0x4C, 0x387, 0}, /* L with middle dot(0x387) */
5201 { 0x6C, 0x61, 0x387, 0}, /* la with middle dot(0x387) */
5202 { 0x4C, 0x61, 0xb7, 0}, /* La with middle dot(0xb7) */
5203 };
5204
5205 log_verbose("\n\nEN collation:");
5206 coll = ucol_open("en", &status);
5207 if (U_FAILURE(status)) {
5208 log_err_status(status, "Tailoring test: &z <<a|- failed! -> %s\n", u_errorName(status));
5209 return;
5210 }
5211 for (j=0; j<11; j++) {
5212 tLen = u_strlen(tData1[j]);
5213 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
5214 if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
5215 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5216 j, tData1[j]);
5217 }
5218 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen);
5219 for(i = 0; i<rLen; i++) {
5220 log_verbose(" %02X", resColl[i]);
5221 }
5222 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
5223 }
5224 ucol_close(coll);
5225
5226
5227 log_verbose("\n\nJA collation:");
5228 coll = ucol_open("ja", &status);
5229 if (U_FAILURE(status)) {
5230 log_err("Tailoring test: &z <<a|- failed!");
5231 return;
5232 }
5233 for (j=0; j<11; j++) {
5234 tLen = u_strlen(tData1[j]);
5235 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
5236 if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
5237 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5238 j, tData1[j]);
5239 }
5240 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen);
5241 for(i = 0; i<rLen; i++) {
5242 log_verbose(" %02X", resColl[i]);
5243 }
5244 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
5245 }
5246 ucol_close(coll);
5247
5248
5249 log_verbose("\n\nTailoring test: & middle dot < a ");
5250 ruleLen = u_strlen(rule1);
5251 coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5252 if (U_FAILURE(status)) {
5253 log_err("Tailoring test: & middle dot < a failed!");
5254 return;
5255 }
5256 for (j=0; j<11; j++) {
5257 tLen = u_strlen(tData1[j]);
5258 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
5259 if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
5260 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5261 j, tData1[j]);
5262 }
5263 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen);
5264 for(i = 0; i<rLen; i++) {
5265 log_verbose(" %02X", resColl[i]);
5266 }
5267 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
5268 }
5269 ucol_close(coll);
5270
5271
5272 log_verbose("\n\nTailoring test: & l middle-dot << a ");
5273 ruleLen = u_strlen(rule2);
5274 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5275 if (U_FAILURE(status)) {
5276 log_err("Tailoring test: & l middle-dot << a failed!");
5277 return;
5278 }
5279 for (j=0; j<11; j++) {
5280 tLen = u_strlen(tData1[j]);
5281 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
5282 if ((j>0) && (j!=3) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
5283 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5284 j, tData1[j]);
5285 }
5286 if ((j==3)&&(strcmp((char *)resColl, (char *)prevColl)>0)) {
5287 log_err("\n Expecting smaller key than previous test case: Data[%d] :%s.",
5288 j, tData1[j]);
5289 }
5290 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen);
5291 for(i = 0; i<rLen; i++) {
5292 log_verbose(" %02X", resColl[i]);
5293 }
5294 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
5295 }
5296 ucol_close(coll);
5297 }
5298
5299 static void
5300 TestOutOfBuffer5468(void)
5301 {
5302 static const char *test = "\\u4e00";
5303 UChar ustr[256];
5304 int32_t ustr_length = u_unescape(test, ustr, 256);
5305 unsigned char shortKeyBuf[1];
5306 int32_t sortkey_length;
5307 UErrorCode status = U_ZERO_ERROR;
5308 static UCollator *coll = NULL;
5309
5310 coll = ucol_open("root", &status);
5311 if(U_FAILURE(status)) {
5312 log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
5313 return;
5314 }
5315 ucol_setStrength(coll, UCOL_PRIMARY);
5316 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
5317 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
5318 if (U_FAILURE(status)) {
5319 log_err("Failed setting atributes\n");
5320 return;
5321 }
5322
5323 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, shortKeyBuf, sizeof(shortKeyBuf));
5324 if (sortkey_length != 4) {
5325 log_err("expecting length of sortKey is 4 got:%d ", sortkey_length);
5326 }
5327 log_verbose("length of sortKey is %d", sortkey_length);
5328 ucol_close(coll);
5329 }
5330
5331 #define TSKC_DATA_SIZE 5
5332 #define TSKC_BUF_SIZE 50
5333 static void
5334 TestSortKeyConsistency(void)
5335 {
5336 UErrorCode icuRC = U_ZERO_ERROR;
5337 UCollator* ucol;
5338 UChar data[] = { 0xFFFD, 0x0006, 0x0006, 0x0006, 0xFFFD};
5339
5340 uint8_t bufFull[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
5341 uint8_t bufPart[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
5342 int32_t i, j, i2;
5343
5344 ucol = ucol_openFromShortString("LEN_S4", FALSE, NULL, &icuRC);
5345 if (U_FAILURE(icuRC))
5346 {
5347 log_err_status(icuRC, "ucol_openFromShortString failed -> %s\n", u_errorName(icuRC));
5348 return;
5349 }
5350
5351 for (i = 0; i < TSKC_DATA_SIZE; i++)
5352 {
5353 UCharIterator uiter;
5354 uint32_t state[2] = { 0, 0 };
5355 int32_t dataLen = i+1;
5356 for (j=0; j<TSKC_BUF_SIZE; j++)
5357 bufFull[i][j] = bufPart[i][j] = 0;
5358
5359 /* Full sort key */
5360 ucol_getSortKey(ucol, data, dataLen, bufFull[i], TSKC_BUF_SIZE);
5361
5362 /* Partial sort key */
5363 uiter_setString(&uiter, data, dataLen);
5364 ucol_nextSortKeyPart(ucol, &uiter, state, bufPart[i], TSKC_BUF_SIZE, &icuRC);
5365 if (U_FAILURE(icuRC))
5366 {
5367 log_err("ucol_nextSortKeyPart failed\n");
5368 ucol_close(ucol);
5369 return;
5370 }
5371
5372 for (i2=0; i2<i; i2++)
5373 {
5374 UBool fullMatch = TRUE;
5375 UBool partMatch = TRUE;
5376 for (j=0; j<TSKC_BUF_SIZE; j++)
5377 {
5378 fullMatch = fullMatch && (bufFull[i][j] != bufFull[i2][j]);
5379 partMatch = partMatch && (bufPart[i][j] != bufPart[i2][j]);
5380 }
5381 if (fullMatch != partMatch) {
5382 log_err(fullMatch ? "full key was consistent, but partial key changed\n"
5383 : "partial key was consistent, but full key changed\n");
5384 ucol_close(ucol);
5385 return;
5386 }
5387 }
5388 }
5389
5390 /*=============================================*/
5391 ucol_close(ucol);
5392 }
5393
5394 /* ticket: 6101 */
5395 static void TestCroatianSortKey(void) {
5396 const char* collString = "LHR_AN_CX_EX_FX_HX_NX_S3";
5397 UErrorCode status = U_ZERO_ERROR;
5398 UCollator *ucol;
5399 UCharIterator iter;
5400
5401 static const UChar text[] = { 0x0044, 0xD81A };
5402
5403 size_t length = sizeof(text)/sizeof(*text);
5404
5405 uint8_t textSortKey[32];
5406 size_t lenSortKey = 32;
5407 size_t actualSortKeyLen;
5408 uint32_t uStateInfo[2] = { 0, 0 };
5409
5410 ucol = ucol_openFromShortString(collString, FALSE, NULL, &status);
5411 if (U_FAILURE(status)) {
5412 log_err_status(status, "ucol_openFromShortString error in Craotian test. -> %s\n", u_errorName(status));
5413 return;
5414 }
5415
5416 uiter_setString(&iter, text, length);
5417
5418 actualSortKeyLen = ucol_nextSortKeyPart(
5419 ucol, &iter, (uint32_t*)uStateInfo,
5420 textSortKey, lenSortKey, &status
5421 );
5422
5423 if (actualSortKeyLen == lenSortKey) {
5424 log_err("ucol_nextSortKeyPart did not give correct result in Croatian test.\n");
5425 }
5426
5427 ucol_close(ucol);
5428 }
5429
5430 /* ticket: 6140 */
5431 /* This test ensures that codepoints such as 0x3099 are flagged correctly by the collator since
5432 * they are both Hiragana and Katakana
5433 */
5434 #define SORTKEYLEN 50
5435 static void TestHiragana(void) {
5436 UErrorCode status = U_ZERO_ERROR;
5437 UCollator* ucol;
5438 UCollationResult strcollresult;
5439 UChar data1[] = { 0x3058, 0x30B8 }; /* Hiragana and Katakana letter Zi */
5440 UChar data2[] = { 0x3057, 0x3099, 0x30B7, 0x3099 };
5441 int32_t data1Len = sizeof(data1)/sizeof(*data1);
5442 int32_t data2Len = sizeof(data2)/sizeof(*data2);
5443 int32_t i, j;
5444 uint8_t sortKey1[SORTKEYLEN];
5445 uint8_t sortKey2[SORTKEYLEN];
5446
5447 UCharIterator uiter1;
5448 UCharIterator uiter2;
5449 uint32_t state1[2] = { 0, 0 };
5450 uint32_t state2[2] = { 0, 0 };
5451 int32_t keySize1;
5452 int32_t keySize2;
5453
5454 ucol = ucol_openFromShortString("LJA_AN_CX_EX_FX_HO_NX_S4", FALSE, NULL,
5455 &status);
5456 if (U_FAILURE(status)) {
5457 log_err_status(status, "Error status: %s; Unable to open collator from short string.\n", u_errorName(status));
5458 return;
5459 }
5460
5461 /* Start of full sort keys */
5462 /* Full sort key1 */
5463 keySize1 = ucol_getSortKey(ucol, data1, data1Len, sortKey1, SORTKEYLEN);
5464 /* Full sort key2 */
5465 keySize2 = ucol_getSortKey(ucol, data2, data2Len, sortKey2, SORTKEYLEN);
5466 if (keySize1 == keySize2) {
5467 for (i = 0; i < keySize1; i++) {
5468 if (sortKey1[i] != sortKey2[i]) {
5469 log_err("Full sort keys are different. Should be equal.");
5470 }
5471 }
5472 } else {
5473 log_err("Full sort keys sizes doesn't match: %d %d", keySize1, keySize2);
5474 }
5475 /* End of full sort keys */
5476
5477 /* Start of partial sort keys */
5478 /* Partial sort key1 */
5479 uiter_setString(&uiter1, data1, data1Len);
5480 keySize1 = ucol_nextSortKeyPart(ucol, &uiter1, state1, sortKey1, SORTKEYLEN, &status);
5481 /* Partial sort key2 */
5482 uiter_setString(&uiter2, data2, data2Len);
5483 keySize2 = ucol_nextSortKeyPart(ucol, &uiter2, state2, sortKey2, SORTKEYLEN, &status);
5484 if (U_SUCCESS(status) && keySize1 == keySize2) {
5485 for (j = 0; j < keySize1; j++) {
5486 if (sortKey1[j] != sortKey2[j]) {
5487 log_err("Partial sort keys are different. Should be equal");
5488 }
5489 }
5490 } else {
5491 log_err("Error Status: %s or Partial sort keys sizes doesn't match: %d %d", u_errorName(status), keySize1, keySize2);
5492 }
5493 /* End of partial sort keys */
5494
5495 /* Start of strcoll */
5496 /* Use ucol_strcoll() to determine ordering */
5497 strcollresult = ucol_strcoll(ucol, data1, data1Len, data2, data2Len);
5498 if (strcollresult != UCOL_EQUAL) {
5499 log_err("Result from ucol_strcoll() should be UCOL_EQUAL.");
5500 }
5501
5502 ucol_close(ucol);
5503 }
5504
5505 /* Convenient struct for running collation tests */
5506 typedef struct {
5507 const UChar source[MAX_TOKEN_LEN]; /* String on left */
5508 const UChar target[MAX_TOKEN_LEN]; /* String on right */
5509 UCollationResult result; /* -1, 0 or +1, depending on collation */
5510 } OneTestCase;
5511
5512 /*
5513 * Utility function to test one collation test case.
5514 * @param testcases Array of test cases.
5515 * @param n_testcases Size of the array testcases.
5516 * @param str_rules Array of rules. These rules should be specifying the same rule in different formats.
5517 * @param n_rules Size of the array str_rules.
5518 */
5519 static void doTestOneTestCase(const OneTestCase testcases[],
5520 int n_testcases,
5521 const char* str_rules[],
5522 int n_rules)
5523 {
5524 int rule_no, testcase_no;
5525 UChar rule[500];
5526 int32_t length = 0;
5527 UErrorCode status = U_ZERO_ERROR;
5528 UParseError parse_error;
5529 UCollator *myCollation;
5530
5531 for (rule_no = 0; rule_no < n_rules; ++rule_no) {
5532
5533 length = u_unescape(str_rules[rule_no], rule, 500);
5534 if (length == 0) {
5535 log_err("ERROR: The rule cannot be unescaped: %s\n");
5536 return;
5537 }
5538 myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
5539 if(U_FAILURE(status)){
5540 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5541 return;
5542 }
5543 log_verbose("Testing the <<* syntax\n");
5544 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
5545 ucol_setStrength(myCollation, UCOL_TERTIARY);
5546 for (testcase_no = 0; testcase_no < n_testcases; ++testcase_no) {
5547 doTest(myCollation,
5548 testcases[testcase_no].source,
5549 testcases[testcase_no].target,
5550 testcases[testcase_no].result
5551 );
5552 }
5553 ucol_close(myCollation);
5554 }
5555 }
5556
5557 const static OneTestCase rangeTestcases[] = {
5558 { {0x0061}, {0x0062}, UCOL_LESS }, /* "a" < "b" */
5559 { {0x0062}, {0x0063}, UCOL_LESS }, /* "b" < "c" */
5560 { {0x0061}, {0x0063}, UCOL_LESS }, /* "a" < "c" */
5561
5562 { {0x0062}, {0x006b}, UCOL_LESS }, /* "b" << "k" */
5563 { {0x006b}, {0x006c}, UCOL_LESS }, /* "k" << "l" */
5564 { {0x0062}, {0x006c}, UCOL_LESS }, /* "b" << "l" */
5565 { {0x0061}, {0x006c}, UCOL_LESS }, /* "a" < "l" */
5566 { {0x0061}, {0x006d}, UCOL_LESS }, /* "a" < "m" */
5567
5568 { {0x0079}, {0x006d}, UCOL_LESS }, /* "y" < "f" */
5569 { {0x0079}, {0x0067}, UCOL_LESS }, /* "y" < "g" */
5570 { {0x0061}, {0x0068}, UCOL_LESS }, /* "y" < "h" */
5571 { {0x0061}, {0x0065}, UCOL_LESS }, /* "g" < "e" */
5572
5573 { {0x0061}, {0x0031}, UCOL_EQUAL }, /* "a" = "1" */
5574 { {0x0061}, {0x0032}, UCOL_EQUAL }, /* "a" = "2" */
5575 { {0x0061}, {0x0033}, UCOL_EQUAL }, /* "a" = "3" */
5576 { {0x0061}, {0x0066}, UCOL_LESS }, /* "a" < "f" */
5577 { {0x006c, 0x0061}, {0x006b, 0x0062}, UCOL_LESS }, /* "la" < "123" */
5578 { {0x0061, 0x0061, 0x0061}, {0x0031, 0x0032, 0x0033}, UCOL_EQUAL }, /* "aaa" = "123" */
5579 { {0x0062}, {0x007a}, UCOL_LESS }, /* "b" < "z" */
5580 { {0x0061, 0x007a, 0x0062}, {0x0032, 0x0079, 0x006d}, UCOL_LESS }, /* "azm" = "2yc" */
5581 };
5582
5583 static int nRangeTestcases = LEN(rangeTestcases);
5584
5585 const static OneTestCase rangeTestcasesSupplemental[] = {
5586 { {0xfffe}, {0xffff}, UCOL_LESS }, /* U+FFFE < U+FFFF */
5587 { {0xffff}, {0xd800, 0xdc00}, UCOL_LESS }, /* U+FFFF < U+10000 */
5588 { {0xd800, 0xdc00}, {0xd800, 0xdc01}, UCOL_LESS }, /* U+10000 < U+10001 */
5589 { {0xfffe}, {0xd800, 0xdc01}, UCOL_LESS }, /* U+FFFE < U+10001 */
5590 { {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL_LESS }, /* U+10000 < U+10001 */
5591 { {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL_LESS }, /* U+10000 < U+10001 */
5592 { {0xfffe}, {0xd800, 0xdc02}, UCOL_LESS }, /* U+FFFE < U+10001 */
5593 };
5594
5595 static int nRangeTestcasesSupplemental = LEN(rangeTestcasesSupplemental);
5596
5597 const static OneTestCase rangeTestcasesQwerty[] = {
5598 { {0x0071}, {0x0077}, UCOL_LESS }, /* "q" < "w" */
5599 { {0x0077}, {0x0065}, UCOL_LESS }, /* "w" < "e" */
5600
5601 { {0x0079}, {0x0075}, UCOL_LESS }, /* "y" < "u" */
5602 { {0x0071}, {0x0075}, UCOL_LESS }, /* "q" << "u" */
5603
5604 { {0x0074}, {0x0069}, UCOL_LESS }, /* "t" << "i" */
5605 { {0x006f}, {0x0070}, UCOL_LESS }, /* "o" << "p" */
5606
5607 { {0x0079}, {0x0065}, UCOL_LESS }, /* "y" < "e" */
5608 { {0x0069}, {0x0075}, UCOL_LESS }, /* "i" < "u" */
5609
5610 { {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},
5611 {0x0077, 0x0065, 0x0072, 0x0065}, UCOL_LESS }, /* "quest" < "were" */
5612 { {0x0071, 0x0075, 0x0061, 0x0063, 0x006b},
5613 {0x0071, 0x0075, 0x0065, 0x0073, 0x0074}, UCOL_LESS }, /* "quack" < "quest" */
5614 };
5615
5616 static int nRangeTestcasesQwerty = LEN(rangeTestcasesQwerty);
5617
5618 static void TestSameStrengthList(void)
5619 {
5620 const char* strRules[] = {
5621 /* Normal */
5622 "&a<b<c<d &b<<k<<l<<m &k<<<x<<<y<<<z &y<f<g<h<e &a=1=2=3",
5623
5624 /* Lists */
5625 "&a<*bcd &b<<*klm &k<<<*xyz &y<*fghe &a=*123",
5626 };
5627 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
5628 }
5629
5630 static void TestSameStrengthListQuoted(void)
5631 {
5632 const char* strRules[] = {
5633 /* Lists with quoted characters */
5634 "&\\u0061<*bcd &b<<*klm &k<<<*xyz &y<*f\\u0067\\u0068e &a=*123",
5635 "&'\\u0061'<*bcd &b<<*klm &k<<<*xyz &y<*f'\\u0067\\u0068'e &a=*123",
5636
5637 "&\\u0061<*b\\u0063d &b<<*klm &k<<<*xyz &\\u0079<*fgh\\u0065 &a=*\\u0031\\u0032\\u0033",
5638 "&'\\u0061'<*b'\\u0063'd &b<<*klm &k<<<*xyz &'\\u0079'<*fgh'\\u0065' &a=*'\\u0031\\u0032\\u0033'",
5639
5640 "&\\u0061<*\\u0062c\\u0064 &b<<*klm &k<<<*xyz &y<*fghe &a=*\\u0031\\u0032\\u0033",
5641 "&'\\u0061'<*'\\u0062'c'\\u0064' &b<<*klm &k<<<*xyz &y<*fghe &a=*'\\u0031\\u0032\\u0033'",
5642 };
5643 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
5644 }
5645
5646 static void TestSameStrengthListSupplemental(void)
5647 {
5648 const char* strRules[] = {
5649 "&\\ufffe<\\uffff<\\U00010000<\\U00010001<\\U00010002",
5650 "&\\ufffe<\\uffff<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02",
5651 "&\\ufffe<*\\uffff\\U00010000\\U00010001\\U00010002",
5652 "&\\ufffe<*\\uffff\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02",
5653 };
5654 doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules));
5655 }
5656
5657 static void TestSameStrengthListQwerty(void)
5658 {
5659 const char* strRules[] = {
5660 "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d", /* Normal */
5661 "&q<*wer &w<<*tyu &t<<<*iop &o=*asd", /* Lists */
5662 "&\\u0071<\\u0077<\\u0065<\\u0072 &\\u0077<<\\u0074<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<\\u006f<<<\\u0070 &\\u006f=\\u0061=\\u0073=\\u0064",
5663 "&'\\u0071'<\\u0077<\\u0065<\\u0072 &\\u0077<<'\\u0074'<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<'\\u006f'<<<\\u0070 &\\u006f=\\u0061='\\u0073'=\\u0064",
5664 "&\\u0071<*\\u0077\\u0065\\u0072 &\\u0077<<*\\u0074\\u0079\\u0075 &\\u0074<<<*\\u0069\\u006f\\u0070 &\\u006f=*\\u0061\\u0073\\u0064",
5665
5666 /* Quoted characters also will work if two quoted characters are not consecutive. */
5667 "&\\u0071<*'\\u0077'\\u0065\\u0072 &\\u0077<<*\\u0074'\\u0079'\\u0075 &\\u0074<<<*\\u0069\\u006f'\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",
5668
5669 /* Consecutive quoted charactes do not work, because a '' will be treated as a quote character. */
5670 /* "&\\u0071<*'\\u0077''\\u0065''\\u0072' &\\u0077<<*'\\u0074''\\u0079''\\u0075' &\\u0074<<<*'\\u0069''\\u006f''\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",*/
5671
5672 };
5673 doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(strRules));
5674 }
5675
5676 static void TestSameStrengthListQuotedQwerty(void)
5677 {
5678 const char* strRules[] = {
5679 "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d", /* Normal */
5680 "&q<*wer &w<<*tyu &t<<<*iop &o=*asd", /* Lists */
5681 "&q<*w'e'r &w<<*'t'yu &t<<<*io'p' &o=*'a's'd'", /* Lists with quotes */
5682
5683 /* Lists with continuous quotes may not work, because '' will be treated as a quote character. */
5684 /* "&q<*'w''e''r' &w<<*'t''y''u' &t<<<*'i''o''p' &o=*'a''s''d'", */
5685 };
5686 doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(strRules));
5687 }
5688
5689 static void TestSameStrengthListRanges(void)
5690 {
5691 const char* strRules[] = {
5692 "&a<*b-d &b<<*k-m &k<<<*x-z &y<*f-he &a=*1-3",
5693 };
5694 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
5695 }
5696
5697 static void TestSameStrengthListSupplementalRanges(void)
5698 {
5699 const char* strRules[] = {
5700 "&\\ufffe<*\\uffff-\\U00010002",
5701 };
5702 doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules));
5703 }
5704
5705 static void TestSpecialCharacters(void)
5706 {
5707 const char* strRules[] = {
5708 /* Normal */
5709 "&';'<'+'<','<'-'<'&'<'*'",
5710
5711 /* List */
5712 "&';'<*'+,-&*'",
5713
5714 /* Range */
5715 "&';'<*'+'-'-&*'",
5716 };
5717
5718 const static OneTestCase specialCharacterStrings[] = {
5719 { {0x003b}, {0x002b}, UCOL_LESS }, /* ; < + */
5720 { {0x002b}, {0x002c}, UCOL_LESS }, /* + < , */
5721 { {0x002c}, {0x002d}, UCOL_LESS }, /* , < - */
5722 { {0x002d}, {0x0026}, UCOL_LESS }, /* - < & */
5723 };
5724 doTestOneTestCase(specialCharacterStrings, LEN(specialCharacterStrings), strRules, LEN(strRules));
5725 }
5726
5727 static void TestPrivateUseCharacters(void)
5728 {
5729 const char* strRules[] = {
5730 /* Normal */
5731 "&'\\u5ea7'<'\\uE2D8'<'\\uE2D9'<'\\uE2DA'<'\\uE2DB'<'\\uE2DC'<'\\u4e8d'",
5732 "&\\u5ea7<\\uE2D8<\\uE2D9<\\uE2DA<\\uE2DB<\\uE2DC<\\u4e8d",
5733 };
5734
5735 const static OneTestCase privateUseCharacterStrings[] = {
5736 { {0x5ea7}, {0xe2d8}, UCOL_LESS },
5737 { {0xe2d8}, {0xe2d9}, UCOL_LESS },
5738 { {0xe2d9}, {0xe2da}, UCOL_LESS },
5739 { {0xe2da}, {0xe2db}, UCOL_LESS },
5740 { {0xe2db}, {0xe2dc}, UCOL_LESS },
5741 { {0xe2dc}, {0x4e8d}, UCOL_LESS },
5742 };
5743 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
5744 }
5745
5746 static void TestPrivateUseCharactersInList(void)
5747 {
5748 const char* strRules[] = {
5749 /* List */
5750 "&'\\u5ea7'<*'\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d'",
5751 /* "&'\\u5ea7'<*\\uE2D8'\\uE2D9\\uE2DA'\\uE2DB'\\uE2DC\\u4e8d'", */
5752 "&\\u5ea7<*\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d",
5753 };
5754
5755 const static OneTestCase privateUseCharacterStrings[] = {
5756 { {0x5ea7}, {0xe2d8}, UCOL_LESS },
5757 { {0xe2d8}, {0xe2d9}, UCOL_LESS },
5758 { {0xe2d9}, {0xe2da}, UCOL_LESS },
5759 { {0xe2da}, {0xe2db}, UCOL_LESS },
5760 { {0xe2db}, {0xe2dc}, UCOL_LESS },
5761 { {0xe2dc}, {0x4e8d}, UCOL_LESS },
5762 };
5763 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
5764 }
5765
5766 static void TestPrivateUseCharactersInRange(void)
5767 {
5768 const char* strRules[] = {
5769 /* Range */
5770 "&'\\u5ea7'<*'\\uE2D8'-'\\uE2DC\\u4e8d'",
5771 "&\\u5ea7<*\\uE2D8-\\uE2DC\\u4e8d",
5772 /* "&\\u5ea7<\\uE2D8'\\uE2D8'-'\\uE2D9'\\uE2DA-\\uE2DB\\uE2DC\\u4e8d", */
5773 };
5774
5775 const static OneTestCase privateUseCharacterStrings[] = {
5776 { {0x5ea7}, {0xe2d8}, UCOL_LESS },
5777 { {0xe2d8}, {0xe2d9}, UCOL_LESS },
5778 { {0xe2d9}, {0xe2da}, UCOL_LESS },
5779 { {0xe2da}, {0xe2db}, UCOL_LESS },
5780 { {0xe2db}, {0xe2dc}, UCOL_LESS },
5781 { {0xe2dc}, {0x4e8d}, UCOL_LESS },
5782 };
5783 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
5784 }
5785
5786 static void TestInvalidListsAndRanges(void)
5787 {
5788 const char* invalidRules[] = {
5789 /* Range not in starred expression */
5790 "&\\ufffe<\\uffff-\\U00010002",
5791
5792 /* Range without start */
5793 "&a<*-c",
5794
5795 /* Range without end */
5796 "&a<*b-",
5797
5798 /* More than one hyphen */
5799 "&a<*b-g-l",
5800
5801 /* Range in the wrong order */
5802 "&a<*k-b",
5803
5804 };
5805
5806 UChar rule[500];
5807 UErrorCode status = U_ZERO_ERROR;
5808 UParseError parse_error;
5809 int n_rules = LEN(invalidRules);
5810 int rule_no;
5811 int length;
5812 UCollator *myCollation;
5813
5814 for (rule_no = 0; rule_no < n_rules; ++rule_no) {
5815
5816 length = u_unescape(invalidRules[rule_no], rule, 500);
5817 if (length == 0) {
5818 log_err("ERROR: The rule cannot be unescaped: %s\n");
5819 return;
5820 }
5821 myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
5822 if(!U_FAILURE(status)){
5823 log_err("ERROR: Could not cause a failure as expected: \n");
5824 }
5825 status = U_ZERO_ERROR;
5826 }
5827 }
5828
5829 /*
5830 * This test ensures that characters placed before a character in a different script have the same lead byte
5831 * in their collation key before and after script reordering.
5832 */
5833 static void TestBeforeRuleWithScriptReordering(void)
5834 {
5835 UParseError error;
5836 UErrorCode status = U_ZERO_ERROR;
5837 UCollator *myCollation;
5838 char srules[500] = "&[before 1]\\u03b1 < \\u0e01";
5839 UChar rules[500];
5840 uint32_t rulesLength = 0;
5841 int32_t reorderCodes[1] = {USCRIPT_GREEK};
5842 UCollationResult collResult;
5843
5844 uint8_t baseKey[256];
5845 uint32_t baseKeyLength;
5846 uint8_t beforeKey[256];
5847 uint32_t beforeKeyLength;
5848
5849 UChar base[] = { 0x03b1 }; /* base */
5850 int32_t baseLen = sizeof(base)/sizeof(*base);
5851
5852 UChar before[] = { 0x0e01 }; /* ko kai */
5853 int32_t beforeLen = sizeof(before)/sizeof(*before);
5854
5855 /*UChar *data[] = { before, base };
5856 genericRulesStarter(srules, data, 2);*/
5857
5858 log_verbose("Testing the &[before 1] rule with [reorder grek]\n");
5859
5860
5861 /* build collator */
5862 log_verbose("Testing the &[before 1] rule with [scriptReorder grek]\n");
5863
5864 rulesLength = u_unescape(srules, rules, LEN(rules));
5865 myCollation = ucol_openRules(rules, rulesLength, UCOL_ON, UCOL_TERTIARY, &error, &status);
5866 if(U_FAILURE(status)) {
5867 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5868 return;
5869 }
5870
5871 /* check collation results - before rule applied but not script reordering */
5872 collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
5873 if (collResult != UCOL_GREATER) {
5874 log_err("Collation result not correct before script reordering = %d\n", collResult);
5875 }
5876
5877 /* check the lead byte of the collation keys before script reordering */
5878 baseKeyLength = ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
5879 beforeKeyLength = ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
5880 if (baseKey[0] != beforeKey[0]) {
5881 log_err("Different lead byte for sort keys using before rule and before script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
5882 }
5883
5884 /* reorder the scripts */
5885 ucol_setReorderCodes(myCollation, reorderCodes, 1, &status);
5886 if(U_FAILURE(status)) {
5887 log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
5888 return;
5889 }
5890
5891 /* check collation results - before rule applied and after script reordering */
5892 collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
5893 if (collResult != UCOL_GREATER) {
5894 log_err("Collation result not correct after script reordering = %d\n", collResult);
5895 }
5896
5897 /* check the lead byte of the collation keys after script reordering */
5898 ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
5899 ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
5900 if (baseKey[0] != beforeKey[0]) {
5901 log_err("Different lead byte for sort keys using before fule and after script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
5902 }
5903
5904 ucol_close(myCollation);
5905 }
5906
5907 /*
5908 * Test that in a primary-compressed sort key all bytes except the first one are unchanged under script reordering.
5909 */
5910 static void TestNonLeadBytesDuringCollationReordering(void)
5911 {
5912 UErrorCode status = U_ZERO_ERROR;
5913 UCollator *myCollation;
5914 int32_t reorderCodes[1] = {USCRIPT_GREEK};
5915 UCollationResult collResult;
5916
5917 uint8_t baseKey[256];
5918 uint32_t baseKeyLength;
5919 uint8_t reorderKey[256];
5920 uint32_t reorderKeyLength;
5921
5922 UChar testString[] = { 0x03b1, 0x03b2, 0x03b3 };
5923
5924 int i;
5925
5926
5927 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
5928
5929 /* build collator tertiary */
5930 myCollation = ucol_open("", &status);
5931 ucol_setStrength(myCollation, UCOL_TERTIARY);
5932 if(U_FAILURE(status)) {
5933 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5934 return;
5935 }
5936 baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), baseKey, 256);
5937
5938 ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
5939 if(U_FAILURE(status)) {
5940 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
5941 return;
5942 }
5943 reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256);
5944
5945 if (baseKeyLength != reorderKeyLength) {
5946 log_err("Key lengths not the same during reordering.\n", collResult);
5947 return;
5948 }
5949
5950 for (i = 1; i < baseKeyLength; i++) {
5951 if (baseKey[i] != reorderKey[i]) {
5952 log_err("Collation key bytes not the same at position %d.\n", i);
5953 return;
5954 }
5955 }
5956 ucol_close(myCollation);
5957
5958 /* build collator quaternary */
5959 myCollation = ucol_open("", &status);
5960 ucol_setStrength(myCollation, UCOL_QUATERNARY);
5961 if(U_FAILURE(status)) {
5962 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5963 return;
5964 }
5965 baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), baseKey, 256);
5966
5967 ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
5968 if(U_FAILURE(status)) {
5969 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
5970 return;
5971 }
5972 reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256);
5973
5974 if (baseKeyLength != reorderKeyLength) {
5975 log_err("Key lengths not the same during reordering.\n", collResult);
5976 return;
5977 }
5978
5979 for (i = 1; i < baseKeyLength; i++) {
5980 if (baseKey[i] != reorderKey[i]) {
5981 log_err("Collation key bytes not the same at position %d.\n", i);
5982 return;
5983 }
5984 }
5985 ucol_close(myCollation);
5986 }
5987
5988 /*
5989 * Test reordering API.
5990 */
5991 static void TestReorderingAPI(void)
5992 {
5993 UErrorCode status = U_ZERO_ERROR;
5994 UCollator *myCollation;
5995 int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
5996 UCollationResult collResult;
5997 int32_t retrievedReorderCodesLength;
5998 UChar greekString[] = { 0x03b1 };
5999 UChar punctuationString[] = { 0x203e };
6000
6001 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
6002
6003 /* build collator tertiary */
6004 myCollation = ucol_open("", &status);
6005 ucol_setStrength(myCollation, UCOL_TERTIARY);
6006 if(U_FAILURE(status)) {
6007 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
6008 return;
6009 }
6010
6011 /* set the reorderding */
6012 ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
6013 if (U_FAILURE(status)) {
6014 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
6015 return;
6016 }
6017
6018 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
6019 if (status != U_BUFFER_OVERFLOW_ERROR) {
6020 log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
6021 return;
6022 }
6023 status = U_ZERO_ERROR;
6024 if (retrievedReorderCodesLength != LEN(reorderCodes)) {
6025 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
6026 return;
6027 }
6028 collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
6029 if (collResult != UCOL_LESS) {
6030 log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
6031 return;
6032 }
6033
6034 /* clear the reordering */
6035 ucol_setReorderCodes(myCollation, NULL, 0, &status);
6036 if (U_FAILURE(status)) {
6037 log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
6038 return;
6039 }
6040
6041 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
6042 if (retrievedReorderCodesLength != 0) {
6043 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
6044 return;
6045 }
6046
6047 collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
6048 if (collResult != UCOL_GREATER) {
6049 log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
6050 return;
6051 }
6052
6053 ucol_close(myCollation);
6054 }
6055
6056 /*
6057 * Utility function to test one collation reordering test case.
6058 * @param testcases Array of test cases.
6059 * @param n_testcases Size of the array testcases.
6060 * @param str_rules Array of rules. These rules should be specifying the same rule in different formats.
6061 * @param n_rules Size of the array str_rules.
6062 */
6063 static void doTestOneReorderingAPITestCase(const OneTestCase testCases[], uint32_t testCasesLen, const int32_t reorderTokens[], int32_t reorderTokensLen)
6064 {
6065 int testCaseNum;
6066 UErrorCode status = U_ZERO_ERROR;
6067 UCollator *myCollation;
6068
6069 for (testCaseNum = 0; testCaseNum < testCasesLen; ++testCaseNum) {
6070 myCollation = ucol_open("", &status);
6071 if (U_FAILURE(status)) {
6072 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
6073 return;
6074 }
6075 ucol_setReorderCodes(myCollation, reorderTokens, reorderTokensLen, &status);
6076 if(U_FAILURE(status)) {
6077 log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
6078 return;
6079 }
6080
6081 for (testCaseNum = 0; testCaseNum < testCasesLen; ++testCaseNum) {
6082 doTest(myCollation,
6083 testCases[testCaseNum].source,
6084 testCases[testCaseNum].target,
6085 testCases[testCaseNum].result
6086 );
6087 }
6088 ucol_close(myCollation);
6089 }
6090 }
6091
6092 static void TestGreekFirstReorder(void)
6093 {
6094 const char* strRules[] = {
6095 "[reorder Grek]"
6096 };
6097
6098 const int32_t apiRules[] = {
6099 USCRIPT_GREEK
6100 };
6101
6102 const static OneTestCase privateUseCharacterStrings[] = {
6103 { {0x0391}, {0x0391}, UCOL_EQUAL },
6104 { {0x0041}, {0x0391}, UCOL_GREATER },
6105 { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_GREATER },
6106 { {0x0060}, {0x0391}, UCOL_LESS },
6107 { {0x0391}, {0xe2dc}, UCOL_LESS },
6108 { {0x0391}, {0x0060}, UCOL_GREATER },
6109 };
6110
6111 /* Test rules creation */
6112 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
6113
6114 /* Test collation reordering API */
6115 doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
6116 }
6117
6118 static void TestGreekLastReorder(void)
6119 {
6120 const char* strRules[] = {
6121 "[reorder Zzzz Grek]"
6122 };
6123
6124 const int32_t apiRules[] = {
6125 USCRIPT_UNKNOWN, USCRIPT_GREEK
6126 };
6127
6128 const static OneTestCase privateUseCharacterStrings[] = {
6129 { {0x0391}, {0x0391}, UCOL_EQUAL },
6130 { {0x0041}, {0x0391}, UCOL_LESS },
6131 { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_LESS },
6132 { {0x0060}, {0x0391}, UCOL_LESS },
6133 { {0x0391}, {0xe2dc}, UCOL_GREATER },
6134 };
6135
6136 /* Test rules creation */
6137 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
6138
6139 /* Test collation reordering API */
6140 doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
6141 }
6142
6143 static void TestNonScriptReorder(void)
6144 {
6145 const char* strRules[] = {
6146 "[reorder Grek Symbol DIGIT Latn Punct space Zzzz cURRENCy]"
6147 };
6148
6149 const int32_t apiRules[] = {
6150 USCRIPT_GREEK, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN,
6151 UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SPACE, USCRIPT_UNKNOWN,
6152 UCOL_REORDER_CODE_CURRENCY
6153 };
6154
6155 const static OneTestCase privateUseCharacterStrings[] = {
6156 { {0x0391}, {0x0041}, UCOL_LESS },
6157 { {0x0041}, {0x0391}, UCOL_GREATER },
6158 { {0x0060}, {0x0041}, UCOL_LESS },
6159 { {0x0060}, {0x0391}, UCOL_GREATER },
6160 { {0x0024}, {0x0041}, UCOL_GREATER },
6161 };
6162
6163 /* Test rules creation */
6164 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
6165
6166 /* Test collation reordering API */
6167 doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
6168 }
6169
6170 static void TestHaniReorder(void)
6171 {
6172 const char* strRules[] = {
6173 "[reorder Hani]"
6174 };
6175 const int32_t apiRules[] = {
6176 USCRIPT_HAN
6177 };
6178
6179 const static OneTestCase privateUseCharacterStrings[] = {
6180 { {0x4e00}, {0x0041}, UCOL_LESS },
6181 { {0x4e00}, {0x0060}, UCOL_GREATER },
6182 { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
6183 { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
6184 { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
6185 { {0xfa27}, {0x0041}, UCOL_LESS },
6186 { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
6187 };
6188
6189 /* Test rules creation */
6190 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
6191
6192 /* Test collation reordering API */
6193 doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
6194 }
6195
6196 static void TestMultipleReorder()
6197 {
6198 const char* strRules[] = {
6199 "[reorder Grek Zzzz DIGIT Latn Hani]"
6200 };
6201
6202 const int32_t apiRules[] = {
6203 USCRIPT_GREEK, USCRIPT_UNKNOWN, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN, USCRIPT_HAN
6204 };
6205
6206 const static OneTestCase collationTestCases[] = {
6207 { {0x0391}, {0x0041}, UCOL_LESS},
6208 { {0x0031}, {0x0041}, UCOL_LESS},
6209 { {0x0041}, {0x4e00}, UCOL_LESS},
6210 };
6211
6212 /* Test rules creation */
6213 doTestOneTestCase(collationTestCases, LEN(collationTestCases), strRules, LEN(strRules));
6214
6215 /* Test collation reordering API */
6216 doTestOneReorderingAPITestCase(collationTestCases, LEN(collationTestCases), apiRules, LEN(apiRules));
6217 }
6218
6219 static int compare_uint8_t_arrays(const uint8_t* a, const uint8_t* b)
6220 {
6221 for (; *a == *b; ++a, ++b) {
6222 if (*a == 0) {
6223 return 0;
6224 }
6225 }
6226 return (*a < *b ? -1 : 1);
6227 }
6228
6229 static void TestImport(void)
6230 {
6231 UCollator* vicoll;
6232 UCollator* escoll;
6233 UCollator* viescoll;
6234 UCollator* importviescoll;
6235 UParseError error;
6236 UErrorCode status = U_ZERO_ERROR;
6237 UChar* virules;
6238 int32_t viruleslength;
6239 UChar* esrules;
6240 int32_t esruleslength;
6241 UChar* viesrules;
6242 int32_t viesruleslength;
6243 char srules[500] = "[import vi][import es]";
6244 UChar rules[500];
6245 uint32_t length = 0;
6246 int32_t itemCount;
6247 int32_t i, k;
6248 UChar32 start;
6249 UChar32 end;
6250 UChar str[500];
6251 int32_t strLength;
6252
6253 uint8_t sk1[500];
6254 uint8_t sk2[500];
6255
6256 UBool b;
6257 USet* tailoredSet;
6258 USet* importTailoredSet;
6259
6260
6261 vicoll = ucol_open("vi", &status);
6262 if(U_FAILURE(status)){
6263 log_err_status(status, "ERROR: Call ucol_open(\"vi\", ...): %s\n", myErrorName(status));
6264 return;
6265 }
6266
6267 virules = (UChar*) ucol_getRules(vicoll, &viruleslength);
6268 escoll = ucol_open("es", &status);
6269 esrules = (UChar*) ucol_getRules(escoll, &esruleslength);
6270 viesrules = (UChar*)uprv_malloc((viruleslength+esruleslength+1)*sizeof(UChar*));
6271 viesrules[0] = 0;
6272 u_strcat(viesrules, virules);
6273 u_strcat(viesrules, esrules);
6274 viesruleslength = viruleslength + esruleslength;
6275 viescoll = ucol_openRules(viesrules, viesruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
6276
6277 /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
6278 length = u_unescape(srules, rules, 500);
6279 importviescoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
6280 if(U_FAILURE(status)){
6281 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
6282 return;
6283 }
6284
6285 tailoredSet = ucol_getTailoredSet(viescoll, &status);
6286 importTailoredSet = ucol_getTailoredSet(importviescoll, &status);
6287
6288 if(!uset_equals(tailoredSet, importTailoredSet)){
6289 log_err("Tailored sets not equal");
6290 }
6291
6292 uset_close(importTailoredSet);
6293
6294 itemCount = uset_getItemCount(tailoredSet);
6295
6296 for( i = 0; i < itemCount; i++){
6297 strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
6298 if(strLength < 2){
6299 for (; start <= end; start++){
6300 k = 0;
6301 U16_APPEND(str, k, 500, start, b);
6302 ucol_getSortKey(viescoll, str, 1, sk1, 500);
6303 ucol_getSortKey(importviescoll, str, 1, sk2, 500);
6304 if(compare_uint8_t_arrays(sk1, sk2) != 0){
6305 log_err("Sort key for %s not equal\n", str);
6306 break;
6307 }
6308 }
6309 }else{
6310 ucol_getSortKey(viescoll, str, strLength, sk1, 500);
6311 ucol_getSortKey(importviescoll, str, strLength, sk2, 500);
6312 if(compare_uint8_t_arrays(sk1, sk2) != 0){
6313 log_err("ZZSort key for %s not equal\n", str);
6314 break;
6315 }
6316
6317 }
6318 }
6319
6320 uset_close(tailoredSet);
6321
6322 uprv_free(viesrules);
6323
6324 ucol_close(vicoll);
6325 ucol_close(escoll);
6326 ucol_close(viescoll);
6327 ucol_close(importviescoll);
6328 }
6329
6330 static void TestImportWithType(void)
6331 {
6332 UCollator* vicoll;
6333 UCollator* decoll;
6334 UCollator* videcoll;
6335 UCollator* importvidecoll;
6336 UParseError error;
6337 UErrorCode status = U_ZERO_ERROR;
6338 const UChar* virules;
6339 int32_t viruleslength;
6340 const UChar* derules;
6341 int32_t deruleslength;
6342 UChar* viderules;
6343 int32_t videruleslength;
6344 const char srules[500] = "[import vi][import de-u-co-phonebk]";
6345 UChar rules[500];
6346 uint32_t length = 0;
6347 int32_t itemCount;
6348 int32_t i, k;
6349 UChar32 start;
6350 UChar32 end;
6351 UChar str[500];
6352 int32_t strLength;
6353
6354 uint8_t sk1[500];
6355 uint8_t sk2[500];
6356
6357 USet* tailoredSet;
6358 USet* importTailoredSet;
6359
6360 vicoll = ucol_open("vi", &status);
6361 if(U_FAILURE(status)){
6362 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
6363 return;
6364 }
6365 virules = ucol_getRules(vicoll, &viruleslength);
6366 /* decoll = ucol_open("de@collation=phonebook", &status); */
6367 decoll = ucol_open("de-u-co-phonebk", &status);
6368 if(U_FAILURE(status)){
6369 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
6370 return;
6371 }
6372
6373
6374 derules = ucol_getRules(decoll, &deruleslength);
6375 viderules = (UChar*)uprv_malloc((viruleslength+deruleslength+1)*sizeof(UChar*));
6376 viderules[0] = 0;
6377 u_strcat(viderules, virules);
6378 u_strcat(viderules, derules);
6379 videruleslength = viruleslength + deruleslength;
6380 videcoll = ucol_openRules(viderules, videruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
6381
6382 /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
6383 length = u_unescape(srules, rules, 500);
6384 importvidecoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
6385 if(U_FAILURE(status)){
6386 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
6387 return;
6388 }
6389
6390 tailoredSet = ucol_getTailoredSet(videcoll, &status);
6391 importTailoredSet = ucol_getTailoredSet(importvidecoll, &status);
6392
6393 if(!uset_equals(tailoredSet, importTailoredSet)){
6394 log_err("Tailored sets not equal");
6395 }
6396
6397 uset_close(importTailoredSet);
6398
6399 itemCount = uset_getItemCount(tailoredSet);
6400
6401 for( i = 0; i < itemCount; i++){
6402 strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
6403 if(strLength < 2){
6404 for (; start <= end; start++){
6405 k = 0;
6406 U16_APPEND_UNSAFE(str, k, start);
6407 ucol_getSortKey(videcoll, str, 1, sk1, 500);
6408 ucol_getSortKey(importvidecoll, str, 1, sk2, 500);
6409 if(compare_uint8_t_arrays(sk1, sk2) != 0){
6410 log_err("Sort key for %s not equal\n", str);
6411 break;
6412 }
6413 }
6414 }else{
6415 ucol_getSortKey(videcoll, str, strLength, sk1, 500);
6416 ucol_getSortKey(importvidecoll, str, strLength, sk2, 500);
6417 if(compare_uint8_t_arrays(sk1, sk2) != 0){
6418 log_err("Sort key for %s not equal\n", str);
6419 break;
6420 }
6421
6422 }
6423 }
6424
6425 uset_close(tailoredSet);
6426
6427 uprv_free(viderules);
6428
6429 ucol_close(videcoll);
6430 ucol_close(importvidecoll);
6431 ucol_close(vicoll);
6432 ucol_close(decoll);
6433
6434 }
6435
6436
6437 #define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x)
6438
6439 void addMiscCollTest(TestNode** root)
6440 {
6441 TEST(TestRuleOptions);
6442 TEST(TestBeforePrefixFailure);
6443 TEST(TestContractionClosure);
6444 TEST(TestPrefixCompose);
6445 TEST(TestStrCollIdenticalPrefix);
6446 TEST(TestPrefix);
6447 TEST(TestNewJapanese);
6448 /*TEST(TestLimitations);*/
6449 TEST(TestNonChars);
6450 TEST(TestExtremeCompression);
6451 TEST(TestSurrogates);
6452 TEST(TestVariableTopSetting);
6453 TEST(TestBocsuCoverage);
6454 TEST(TestCyrillicTailoring);
6455 TEST(TestCase);
6456 TEST(IncompleteCntTest);
6457 TEST(BlackBirdTest);
6458 TEST(FunkyATest);
6459 TEST(BillFairmanTest);
6460 TEST(RamsRulesTest);
6461 TEST(IsTailoredTest);
6462 TEST(TestCollations);
6463 TEST(TestChMove);
6464 TEST(TestImplicitTailoring);
6465 TEST(TestFCDProblem);
6466 TEST(TestEmptyRule);
6467 /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */
6468 TEST(TestJ815);
6469 /*TEST(TestJ831);*/ /* we changed lv locale */
6470 TEST(TestBefore);
6471 TEST(TestRedundantRules);
6472 TEST(TestExpansionSyntax);
6473 TEST(TestHangulTailoring);
6474 TEST(TestUCARules);
6475 TEST(TestIncrementalNormalize);
6476 TEST(TestComposeDecompose);
6477 TEST(TestCompressOverlap);
6478 TEST(TestContraction);
6479 TEST(TestExpansion);
6480 /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys */
6481 /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported */
6482 TEST(TestOptimize);
6483 TEST(TestSuppressContractions);
6484 TEST(Alexis2);
6485 TEST(TestHebrewUCA);
6486 TEST(TestPartialSortKeyTermination);
6487 TEST(TestSettings);
6488 TEST(TestEquals);
6489 TEST(TestJ2726);
6490 TEST(NullRule);
6491 TEST(TestNumericCollation);
6492 TEST(TestTibetanConformance);
6493 TEST(TestPinyinProblem);
6494 TEST(TestImplicitGeneration);
6495 TEST(TestSeparateTrees);
6496 TEST(TestBeforePinyin);
6497 TEST(TestBeforeTightening);
6498 /*TEST(TestMoreBefore);*/
6499 TEST(TestTailorNULL);
6500 TEST(TestUpperFirstQuaternary);
6501 TEST(TestJ4960);
6502 TEST(TestJ5223);
6503 TEST(TestJ5232);
6504 TEST(TestJ5367);
6505 TEST(TestHiragana);
6506 TEST(TestSortKeyConsistency);
6507 TEST(TestVI5913); /* VI, RO tailored rules */
6508 TEST(TestCroatianSortKey);
6509 TEST(TestTailor6179);
6510 TEST(TestUCAPrecontext);
6511 TEST(TestOutOfBuffer5468);
6512 TEST(TestSameStrengthList);
6513
6514 TEST(TestSameStrengthListQuoted);
6515 TEST(TestSameStrengthListSupplemental);
6516 TEST(TestSameStrengthListQwerty);
6517 TEST(TestSameStrengthListQuotedQwerty);
6518 TEST(TestSameStrengthListRanges);
6519 TEST(TestSameStrengthListSupplementalRanges);
6520 TEST(TestSpecialCharacters);
6521 TEST(TestPrivateUseCharacters);
6522 TEST(TestPrivateUseCharactersInList);
6523 TEST(TestPrivateUseCharactersInRange);
6524 TEST(TestInvalidListsAndRanges);
6525 TEST(TestImport);
6526 TEST(TestImportWithType);
6527
6528 TEST(TestBeforeRuleWithScriptReordering);
6529 TEST(TestNonLeadBytesDuringCollationReordering);
6530 TEST(TestReorderingAPI);
6531 TEST(TestGreekFirstReorder);
6532 TEST(TestGreekLastReorder);
6533 TEST(TestNonScriptReorder);
6534 TEST(TestHaniReorder);
6535 TEST(TestMultipleReorder);
6536 }
6537
6538 #endif /* #if !UCONFIG_NO_COLLATION */