]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/cintltst/cmsccoll.c
ICU-491.11.2.tar.gz
[apple/icu.git] / icuSources / test / cintltst / cmsccoll.c
1
2 /********************************************************************
3 * COPYRIGHT:
4 * Copyright (c) 2001-2012, International Business Machines Corporation and
5 * others. All Rights Reserved.
6 ********************************************************************/
7 /*******************************************************************************
8 *
9 * File cmsccoll.C
10 *
11 *******************************************************************************/
12 /**
13 * These are the tests specific to ICU 1.8 and above, that I didn't know where
14 * to fit.
15 */
16
17 #include <stdio.h>
18
19 #include "unicode/utypes.h"
20
21 #if !UCONFIG_NO_COLLATION
22
23 #include "unicode/ucol.h"
24 #include "unicode/ucoleitr.h"
25 #include "unicode/uloc.h"
26 #include "cintltst.h"
27 #include "ccolltst.h"
28 #include "callcoll.h"
29 #include "unicode/ustring.h"
30 #include "string.h"
31 #include "ucol_imp.h"
32 #include "ucol_tok.h"
33 #include "cmemory.h"
34 #include "cstring.h"
35 #include "uassert.h"
36 #include "unicode/parseerr.h"
37 #include "unicode/ucnv.h"
38 #include "unicode/ures.h"
39 #include "unicode/uscript.h"
40 #include "unicode/utf16.h"
41 #include "uparse.h"
42 #include "putilimp.h"
43
44
45 #define LEN(a) (sizeof(a)/sizeof(a[0]))
46
47 #define MAX_TOKEN_LEN 16
48
49 typedef UCollationResult tst_strcoll(void *collator, const int object,
50 const UChar *source, const int sLen,
51 const UChar *target, const int tLen);
52
53
54
55 const static char cnt1[][10] = {
56
57 "AA",
58 "AC",
59 "AZ",
60 "AQ",
61 "AB",
62 "ABZ",
63 "ABQ",
64 "Z",
65 "ABC",
66 "Q",
67 "B"
68 };
69
70 const static char cnt2[][10] = {
71 "DA",
72 "DAD",
73 "DAZ",
74 "MAR",
75 "Z",
76 "DAVIS",
77 "MARK",
78 "DAV",
79 "DAVI"
80 };
81
82 static void IncompleteCntTest(void)
83 {
84 UErrorCode status = U_ZERO_ERROR;
85 UChar temp[90];
86 UChar t1[90];
87 UChar t2[90];
88
89 UCollator *coll = NULL;
90 uint32_t i = 0, j = 0;
91 uint32_t size = 0;
92
93 u_uastrcpy(temp, " & Z < ABC < Q < B");
94
95 coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
96
97 if(U_SUCCESS(status)) {
98 size = sizeof(cnt1)/sizeof(cnt1[0]);
99 for(i = 0; i < size-1; i++) {
100 for(j = i+1; j < size; j++) {
101 UCollationElements *iter;
102 u_uastrcpy(t1, cnt1[i]);
103 u_uastrcpy(t2, cnt1[j]);
104 doTest(coll, t1, t2, UCOL_LESS);
105 /* synwee : added collation element iterator test */
106 iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
107 if (U_FAILURE(status)) {
108 log_err("Creation of iterator failed\n");
109 break;
110 }
111 backAndForth(iter);
112 ucol_closeElements(iter);
113 }
114 }
115 }
116
117 ucol_close(coll);
118
119
120 u_uastrcpy(temp, " & Z < DAVIS < MARK <DAV");
121 coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
122
123 if(U_SUCCESS(status)) {
124 size = sizeof(cnt2)/sizeof(cnt2[0]);
125 for(i = 0; i < size-1; i++) {
126 for(j = i+1; j < size; j++) {
127 UCollationElements *iter;
128 u_uastrcpy(t1, cnt2[i]);
129 u_uastrcpy(t2, cnt2[j]);
130 doTest(coll, t1, t2, UCOL_LESS);
131
132 /* synwee : added collation element iterator test */
133 iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
134 if (U_FAILURE(status)) {
135 log_err("Creation of iterator failed\n");
136 break;
137 }
138 backAndForth(iter);
139 ucol_closeElements(iter);
140 }
141 }
142 }
143
144 ucol_close(coll);
145
146
147 }
148
149 const static char shifted[][20] = {
150 "black bird",
151 "black-bird",
152 "blackbird",
153 "black Bird",
154 "black-Bird",
155 "blackBird",
156 "black birds",
157 "black-birds",
158 "blackbirds"
159 };
160
161 const static UCollationResult shiftedTert[] = {
162 UCOL_EQUAL,
163 UCOL_EQUAL,
164 UCOL_EQUAL,
165 UCOL_LESS,
166 UCOL_EQUAL,
167 UCOL_EQUAL,
168 UCOL_LESS,
169 UCOL_EQUAL,
170 UCOL_EQUAL
171 };
172
173 const static char nonignorable[][20] = {
174 "black bird",
175 "black Bird",
176 "black birds",
177 "black-bird",
178 "black-Bird",
179 "black-birds",
180 "blackbird",
181 "blackBird",
182 "blackbirds"
183 };
184
185 static void BlackBirdTest(void) {
186 UErrorCode status = U_ZERO_ERROR;
187 UChar t1[90];
188 UChar t2[90];
189
190 uint32_t i = 0, j = 0;
191 uint32_t size = 0;
192 UCollator *coll = ucol_open("en_US", &status);
193
194 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
195 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status);
196
197 if(U_SUCCESS(status)) {
198 size = sizeof(nonignorable)/sizeof(nonignorable[0]);
199 for(i = 0; i < size-1; i++) {
200 for(j = i+1; j < size; j++) {
201 u_uastrcpy(t1, nonignorable[i]);
202 u_uastrcpy(t2, nonignorable[j]);
203 doTest(coll, t1, t2, UCOL_LESS);
204 }
205 }
206 }
207
208 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
209 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
210
211 if(U_SUCCESS(status)) {
212 size = sizeof(shifted)/sizeof(shifted[0]);
213 for(i = 0; i < size-1; i++) {
214 for(j = i+1; j < size; j++) {
215 u_uastrcpy(t1, shifted[i]);
216 u_uastrcpy(t2, shifted[j]);
217 doTest(coll, t1, t2, UCOL_LESS);
218 }
219 }
220 }
221
222 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status);
223 if(U_SUCCESS(status)) {
224 size = sizeof(shifted)/sizeof(shifted[0]);
225 for(i = 1; i < size; i++) {
226 u_uastrcpy(t1, shifted[i-1]);
227 u_uastrcpy(t2, shifted[i]);
228 doTest(coll, t1, t2, shiftedTert[i]);
229 }
230 }
231
232 ucol_close(coll);
233 }
234
235 const static UChar testSourceCases[][MAX_TOKEN_LEN] = {
236 {0x0041/*'A'*/, 0x0300, 0x0301, 0x0000},
237 {0x0041/*'A'*/, 0x0300, 0x0316, 0x0000},
238 {0x0041/*'A'*/, 0x0300, 0x0000},
239 {0x00C0, 0x0301, 0x0000},
240 /* this would work with forced normalization */
241 {0x00C0, 0x0316, 0x0000}
242 };
243
244 const static UChar testTargetCases[][MAX_TOKEN_LEN] = {
245 {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
246 {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000},
247 {0x00C0, 0},
248 {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
249 /* this would work with forced normalization */
250 {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000}
251 };
252
253 const static UCollationResult results[] = {
254 UCOL_GREATER,
255 UCOL_EQUAL,
256 UCOL_EQUAL,
257 UCOL_GREATER,
258 UCOL_EQUAL
259 };
260
261 static void FunkyATest(void)
262 {
263
264 int32_t i;
265 UErrorCode status = U_ZERO_ERROR;
266 UCollator *myCollation;
267 myCollation = ucol_open("en_US", &status);
268 if(U_FAILURE(status)){
269 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
270 return;
271 }
272 log_verbose("Testing some A letters, for some reason\n");
273 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
274 ucol_setStrength(myCollation, UCOL_TERTIARY);
275 for (i = 0; i < 4 ; i++)
276 {
277 doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
278 }
279 ucol_close(myCollation);
280 }
281
282 UColAttributeValue caseFirst[] = {
283 UCOL_OFF,
284 UCOL_LOWER_FIRST,
285 UCOL_UPPER_FIRST
286 };
287
288
289 UColAttributeValue alternateHandling[] = {
290 UCOL_NON_IGNORABLE,
291 UCOL_SHIFTED
292 };
293
294 UColAttributeValue caseLevel[] = {
295 UCOL_OFF,
296 UCOL_ON
297 };
298
299 UColAttributeValue strengths[] = {
300 UCOL_PRIMARY,
301 UCOL_SECONDARY,
302 UCOL_TERTIARY,
303 UCOL_QUATERNARY,
304 UCOL_IDENTICAL
305 };
306
307 #if 0
308 static const char * strengthsC[] = {
309 "UCOL_PRIMARY",
310 "UCOL_SECONDARY",
311 "UCOL_TERTIARY",
312 "UCOL_QUATERNARY",
313 "UCOL_IDENTICAL"
314 };
315
316 static const char * caseFirstC[] = {
317 "UCOL_OFF",
318 "UCOL_LOWER_FIRST",
319 "UCOL_UPPER_FIRST"
320 };
321
322
323 static const char * alternateHandlingC[] = {
324 "UCOL_NON_IGNORABLE",
325 "UCOL_SHIFTED"
326 };
327
328 static const char * caseLevelC[] = {
329 "UCOL_OFF",
330 "UCOL_ON"
331 };
332
333 /* not used currently - does not test only prints */
334 static void PrintMarkDavis(void)
335 {
336 UErrorCode status = U_ZERO_ERROR;
337 UChar m[256];
338 uint8_t sortkey[256];
339 UCollator *coll = ucol_open("en_US", &status);
340 uint32_t h,i,j,k, sortkeysize;
341 uint32_t sizem = 0;
342 char buffer[512];
343 uint32_t len = 512;
344
345 log_verbose("PrintMarkDavis");
346
347 u_uastrcpy(m, "Mark Davis");
348 sizem = u_strlen(m);
349
350
351 m[1] = 0xe4;
352
353 for(i = 0; i<sizem; i++) {
354 fprintf(stderr, "\\u%04X ", m[i]);
355 }
356 fprintf(stderr, "\n");
357
358 for(h = 0; h<sizeof(caseFirst)/sizeof(caseFirst[0]); h++) {
359 ucol_setAttribute(coll, UCOL_CASE_FIRST, caseFirst[i], &status);
360 fprintf(stderr, "caseFirst: %s\n", caseFirstC[h]);
361
362 for(i = 0; i<sizeof(alternateHandling)/sizeof(alternateHandling[0]); i++) {
363 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, alternateHandling[i], &status);
364 fprintf(stderr, " AltHandling: %s\n", alternateHandlingC[i]);
365
366 for(j = 0; j<sizeof(caseLevel)/sizeof(caseLevel[0]); j++) {
367 ucol_setAttribute(coll, UCOL_CASE_LEVEL, caseLevel[j], &status);
368 fprintf(stderr, " caseLevel: %s\n", caseLevelC[j]);
369
370 for(k = 0; k<sizeof(strengths)/sizeof(strengths[0]); k++) {
371 ucol_setAttribute(coll, UCOL_STRENGTH, strengths[k], &status);
372 sortkeysize = ucol_getSortKey(coll, m, sizem, sortkey, 256);
373 fprintf(stderr, " strength: %s\n Sortkey: ", strengthsC[k]);
374 fprintf(stderr, "%s\n", ucol_sortKeyToString(coll, sortkey, buffer, &len));
375 }
376
377 }
378
379 }
380
381 }
382 }
383 #endif
384
385 static void BillFairmanTest(void) {
386 /*
387 ** check for actual locale via ICU resource bundles
388 **
389 ** lp points to the original locale ("fr_FR_....")
390 */
391
392 UResourceBundle *lr,*cr;
393 UErrorCode lec = U_ZERO_ERROR;
394 const char *lp = "fr_FR_you_ll_never_find_this_locale";
395
396 log_verbose("BillFairmanTest\n");
397
398 lr = ures_open(NULL,lp,&lec);
399 if (lr) {
400 cr = ures_getByKey(lr,"collations",0,&lec);
401 if (cr) {
402 lp = ures_getLocaleByType(cr, ULOC_ACTUAL_LOCALE, &lec);
403 if (lp) {
404 if (U_SUCCESS(lec)) {
405 if(strcmp(lp, "fr") != 0) {
406 log_err("Wrong locale for French Collation Data, expected \"fr\" got %s", lp);
407 }
408 }
409 }
410 ures_close(cr);
411 }
412 ures_close(lr);
413 }
414 }
415
416 static void testPrimary(UCollator* col, const UChar* p,const UChar* q){
417 UChar source[256] = { '\0'};
418 UChar target[256] = { '\0'};
419 UChar preP = 0x31a3;
420 UChar preQ = 0x310d;
421 /*
422 UChar preP = (*p>0x0400 && *p<0x0500)?0x00e1:0x491;
423 UChar preQ = (*p>0x0400 && *p<0x0500)?0x0041:0x413;
424 */
425 /*log_verbose("Testing primary\n");*/
426
427 doTest(col, p, q, UCOL_LESS);
428 /*
429 UCollationResult result = ucol_strcoll(col,p,u_strlen(p),q,u_strlen(q));
430
431 if(result!=UCOL_LESS){
432 aescstrdup(p,utfSource,256);
433 aescstrdup(q,utfTarget,256);
434 fprintf(file,"Primary failed source: %s target: %s \n", utfSource,utfTarget);
435 }
436 */
437 source[0] = preP;
438 u_strcpy(source+1,p);
439 target[0] = preQ;
440 u_strcpy(target+1,q);
441 doTest(col, source, target, UCOL_LESS);
442 /*
443 fprintf(file,"Primary swamps 2nd failed source: %s target: %s \n", utfSource,utfTarget);
444 */
445 }
446
447 static void testSecondary(UCollator* col, const UChar* p,const UChar* q){
448 UChar source[256] = { '\0'};
449 UChar target[256] = { '\0'};
450
451 /*log_verbose("Testing secondary\n");*/
452
453 doTest(col, p, q, UCOL_LESS);
454 /*
455 fprintf(file,"secondary failed source: %s target: %s \n", utfSource,utfTarget);
456 */
457 source[0] = 0x0053;
458 u_strcpy(source+1,p);
459 target[0]= 0x0073;
460 u_strcpy(target+1,q);
461
462 doTest(col, source, target, UCOL_LESS);
463 /*
464 fprintf(file,"secondary swamps 3rd failed source: %s target: %s \n",utfSource,utfTarget);
465 */
466
467
468 u_strcpy(source,p);
469 source[u_strlen(p)] = 0x62;
470 source[u_strlen(p)+1] = 0;
471
472
473 u_strcpy(target,q);
474 target[u_strlen(q)] = 0x61;
475 target[u_strlen(q)+1] = 0;
476
477 doTest(col, source, target, UCOL_GREATER);
478
479 /*
480 fprintf(file,"secondary is swamped by 1 failed source: %s target: %s \n",utfSource,utfTarget);
481 */
482 }
483
484 static void testTertiary(UCollator* col, const UChar* p,const UChar* q){
485 UChar source[256] = { '\0'};
486 UChar target[256] = { '\0'};
487
488 /*log_verbose("Testing tertiary\n");*/
489
490 doTest(col, p, q, UCOL_LESS);
491 /*
492 fprintf(file,"Tertiary failed source: %s target: %s \n",utfSource,utfTarget);
493 */
494 source[0] = 0x0020;
495 u_strcpy(source+1,p);
496 target[0]= 0x002D;
497 u_strcpy(target+1,q);
498
499 doTest(col, source, target, UCOL_LESS);
500 /*
501 fprintf(file,"Tertiary swamps 4th failed source: %s target: %s \n", utfSource,utfTarget);
502 */
503
504 u_strcpy(source,p);
505 source[u_strlen(p)] = 0xE0;
506 source[u_strlen(p)+1] = 0;
507
508 u_strcpy(target,q);
509 target[u_strlen(q)] = 0x61;
510 target[u_strlen(q)+1] = 0;
511
512 doTest(col, source, target, UCOL_GREATER);
513
514 /*
515 fprintf(file,"Tertiary is swamped by 3rd failed source: %s target: %s \n",utfSource,utfTarget);
516 */
517 }
518
519 static void testEquality(UCollator* col, const UChar* p,const UChar* q){
520 /*
521 UChar source[256] = { '\0'};
522 UChar target[256] = { '\0'};
523 */
524
525 doTest(col, p, q, UCOL_EQUAL);
526 /*
527 fprintf(file,"Primary failed source: %s target: %s \n", utfSource,utfTarget);
528 */
529 }
530
531 static void testCollator(UCollator *coll, UErrorCode *status) {
532 const UChar *rules = NULL, *current = NULL;
533 int32_t ruleLen = 0;
534 uint32_t strength = 0;
535 uint32_t chOffset = 0; uint32_t chLen = 0;
536 uint32_t exOffset = 0; uint32_t exLen = 0;
537 uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
538 uint32_t firstEx = 0;
539 /* uint32_t rExpsLen = 0; */
540 uint32_t firstLen = 0;
541 UBool varT = FALSE; UBool top_ = TRUE;
542 uint16_t specs = 0;
543 UBool startOfRules = TRUE;
544 UBool lastReset = FALSE;
545 UBool before = FALSE;
546 uint32_t beforeStrength = 0;
547 UColTokenParser src;
548 UColOptionSet opts;
549
550 UChar first[256];
551 UChar second[256];
552 UChar tempB[256];
553 uint32_t tempLen;
554 UChar *rulesCopy = NULL;
555 UParseError parseError;
556
557 uprv_memset(&src, 0, sizeof(UColTokenParser));
558
559 src.opts = &opts;
560
561 rules = ucol_getRules(coll, &ruleLen);
562 if(U_SUCCESS(*status) && ruleLen > 0) {
563 rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
564 uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
565 src.current = src.source = rulesCopy;
566 src.end = rulesCopy+ruleLen;
567 src.extraCurrent = src.end;
568 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
569 *first = *second = 0;
570
571 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
572 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
573 while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, status)) != NULL) {
574 strength = src.parsedToken.strength;
575 chOffset = src.parsedToken.charsOffset;
576 chLen = src.parsedToken.charsLen;
577 exOffset = src.parsedToken.extensionOffset;
578 exLen = src.parsedToken.extensionLen;
579 prefixOffset = src.parsedToken.prefixOffset;
580 prefixLen = src.parsedToken.prefixLen;
581 specs = src.parsedToken.flags;
582
583 startOfRules = FALSE;
584 varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
585 top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
586 if(top_) { /* if reset is on top, the sequence is broken. We should have an empty string */
587 second[0] = 0;
588 } else {
589 u_strncpy(second,src.source+chOffset, chLen);
590 second[chLen] = 0;
591
592 if(exLen > 0 && firstEx == 0) {
593 u_strncat(first, src.source+exOffset, exLen);
594 first[firstLen+exLen] = 0;
595 }
596
597 if(lastReset == TRUE && prefixLen != 0) {
598 u_strncpy(first+prefixLen, first, firstLen);
599 u_strncpy(first, src.source+prefixOffset, prefixLen);
600 first[firstLen+prefixLen] = 0;
601 firstLen = firstLen+prefixLen;
602 }
603
604 if(before == TRUE) { /* swap first and second */
605 u_strcpy(tempB, first);
606 u_strcpy(first, second);
607 u_strcpy(second, tempB);
608
609 tempLen = firstLen;
610 firstLen = chLen;
611 chLen = tempLen;
612
613 tempLen = firstEx;
614 firstEx = exLen;
615 exLen = tempLen;
616 if(beforeStrength < strength) {
617 strength = beforeStrength;
618 }
619 }
620 }
621 lastReset = FALSE;
622
623 switch(strength){
624 case UCOL_IDENTICAL:
625 testEquality(coll,first,second);
626 break;
627 case UCOL_PRIMARY:
628 testPrimary(coll,first,second);
629 break;
630 case UCOL_SECONDARY:
631 testSecondary(coll,first,second);
632 break;
633 case UCOL_TERTIARY:
634 testTertiary(coll,first,second);
635 break;
636 case UCOL_TOK_RESET:
637 lastReset = TRUE;
638 before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);
639 if(before) {
640 beforeStrength = (specs & UCOL_TOK_BEFORE)-1;
641 }
642 break;
643 default:
644 break;
645 }
646
647 if(before == TRUE && strength != UCOL_TOK_RESET) { /* first and second were swapped */
648 before = FALSE;
649 } else {
650 firstLen = chLen;
651 firstEx = exLen;
652 u_strcpy(first, second);
653 }
654 }
655 uprv_free(src.source);
656 }
657 }
658
659 static UCollationResult ucaTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) {
660 UCollator *UCA = (UCollator *)collator;
661 return ucol_strcoll(UCA, source, sLen, target, tLen);
662 }
663
664 /*
665 static UCollationResult winTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) {
666 #if U_PLATFORM_HAS_WIN32_API
667 LCID lcid = (LCID)collator;
668 return (UCollationResult)CompareString(lcid, 0, source, sLen, target, tLen);
669 #else
670 return 0;
671 #endif
672 }
673 */
674
675 static UCollationResult swampEarlier(tst_strcoll* func, void *collator, int opts,
676 UChar s1, UChar s2,
677 const UChar *s, const uint32_t sLen,
678 const UChar *t, const uint32_t tLen) {
679 UChar source[256] = {0};
680 UChar target[256] = {0};
681
682 source[0] = s1;
683 u_strcpy(source+1, s);
684 target[0] = s2;
685 u_strcpy(target+1, t);
686
687 return func(collator, opts, source, sLen+1, target, tLen+1);
688 }
689
690 static UCollationResult swampLater(tst_strcoll* func, void *collator, int opts,
691 UChar s1, UChar s2,
692 const UChar *s, const uint32_t sLen,
693 const UChar *t, const uint32_t tLen) {
694 UChar source[256] = {0};
695 UChar target[256] = {0};
696
697 u_strcpy(source, s);
698 source[sLen] = s1;
699 u_strcpy(target, t);
700 target[tLen] = s2;
701
702 return func(collator, opts, source, sLen+1, target, tLen+1);
703 }
704
705 static uint32_t probeStrength(tst_strcoll* func, void *collator, int opts,
706 const UChar *s, const uint32_t sLen,
707 const UChar *t, const uint32_t tLen,
708 UCollationResult result) {
709 /*UChar fPrimary = 0x6d;*/
710 /*UChar sPrimary = 0x6e;*/
711 UChar fSecondary = 0x310d;
712 UChar sSecondary = 0x31a3;
713 UChar fTertiary = 0x310f;
714 UChar sTertiary = 0x31b7;
715
716 UCollationResult oposite;
717 if(result == UCOL_EQUAL) {
718 return UCOL_IDENTICAL;
719 } else if(result == UCOL_GREATER) {
720 oposite = UCOL_LESS;
721 } else {
722 oposite = UCOL_GREATER;
723 }
724
725 if(swampEarlier(func, collator, opts, sSecondary, fSecondary, s, sLen, t, tLen) == result) {
726 return UCOL_PRIMARY;
727 } else if((swampEarlier(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == result) &&
728 (swampEarlier(func, collator, opts, 0x310f, sTertiary, s, sLen, t, tLen) == result)) {
729 return UCOL_SECONDARY;
730 } else if((swampLater(func, collator, opts, sTertiary, fTertiary, s, sLen, t, tLen) == result) &&
731 (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == result)) {
732 return UCOL_TERTIARY;
733 } else if((swampLater(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == oposite) &&
734 (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == oposite)) {
735 return UCOL_QUATERNARY;
736 } else {
737 return UCOL_IDENTICAL;
738 }
739 }
740
741 static char *getRelationSymbol(UCollationResult res, uint32_t strength, char *buffer) {
742 uint32_t i = 0;
743
744 if(res == UCOL_EQUAL || strength == 0xdeadbeef) {
745 buffer[0] = '=';
746 buffer[1] = '=';
747 buffer[2] = '\0';
748 } else if(res == UCOL_GREATER) {
749 for(i = 0; i<strength+1; i++) {
750 buffer[i] = '>';
751 }
752 buffer[strength+1] = '\0';
753 } else {
754 for(i = 0; i<strength+1; i++) {
755 buffer[i] = '<';
756 }
757 buffer[strength+1] = '\0';
758 }
759
760 return buffer;
761 }
762
763
764
765 static void logFailure (const char *platform, const char *test,
766 const UChar *source, const uint32_t sLen,
767 const UChar *target, const uint32_t tLen,
768 UCollationResult realRes, uint32_t realStrength,
769 UCollationResult expRes, uint32_t expStrength, UBool error) {
770
771 uint32_t i = 0;
772
773 char sEsc[256], s[256], tEsc[256], t[256], b[256], output[512], relation[256];
774 static int32_t maxOutputLength = 0;
775 int32_t outputLength;
776
777 *sEsc = *tEsc = *s = *t = 0;
778 if(error == TRUE) {
779 log_err("Difference between expected and generated order. Run test with -v for more info\n");
780 } else if(getTestOption(VERBOSITY_OPTION) == 0) {
781 return;
782 }
783 for(i = 0; i<sLen; i++) {
784 sprintf(b, "%04X", source[i]);
785 strcat(sEsc, "\\u");
786 strcat(sEsc, b);
787 strcat(s, b);
788 strcat(s, " ");
789 if(source[i] < 0x80) {
790 sprintf(b, "(%c)", source[i]);
791 strcat(sEsc, b);
792 }
793 }
794 for(i = 0; i<tLen; i++) {
795 sprintf(b, "%04X", target[i]);
796 strcat(tEsc, "\\u");
797 strcat(tEsc, b);
798 strcat(t, b);
799 strcat(t, " ");
800 if(target[i] < 0x80) {
801 sprintf(b, "(%c)", target[i]);
802 strcat(tEsc, b);
803 }
804 }
805 /*
806 strcpy(output, "[[ ");
807 strcat(output, sEsc);
808 strcat(output, getRelationSymbol(expRes, expStrength, relation));
809 strcat(output, tEsc);
810
811 strcat(output, " : ");
812
813 strcat(output, sEsc);
814 strcat(output, getRelationSymbol(realRes, realStrength, relation));
815 strcat(output, tEsc);
816 strcat(output, " ]] ");
817
818 log_verbose("%s", output);
819 */
820
821
822 strcpy(output, "DIFF: ");
823
824 strcat(output, s);
825 strcat(output, " : ");
826 strcat(output, t);
827
828 strcat(output, test);
829 strcat(output, ": ");
830
831 strcat(output, sEsc);
832 strcat(output, getRelationSymbol(expRes, expStrength, relation));
833 strcat(output, tEsc);
834
835 strcat(output, " ");
836
837 strcat(output, platform);
838 strcat(output, ": ");
839
840 strcat(output, sEsc);
841 strcat(output, getRelationSymbol(realRes, realStrength, relation));
842 strcat(output, tEsc);
843
844 outputLength = (int32_t)strlen(output);
845 if(outputLength > maxOutputLength) {
846 maxOutputLength = outputLength;
847 U_ASSERT(outputLength < sizeof(output));
848 }
849
850 log_verbose("%s\n", output);
851
852 }
853
854 /*
855 static void printOutRules(const UChar *rules) {
856 uint32_t len = u_strlen(rules);
857 uint32_t i = 0;
858 char toPrint;
859 uint32_t line = 0;
860
861 fprintf(stdout, "Rules:");
862
863 for(i = 0; i<len; i++) {
864 if(rules[i]<0x7f && rules[i]>=0x20) {
865 toPrint = (char)rules[i];
866 if(toPrint == '&') {
867 line = 1;
868 fprintf(stdout, "\n&");
869 } else if(toPrint == ';') {
870 fprintf(stdout, "<<");
871 line+=2;
872 } else if(toPrint == ',') {
873 fprintf(stdout, "<<<");
874 line+=3;
875 } else {
876 fprintf(stdout, "%c", toPrint);
877 line++;
878 }
879 } else if(rules[i]<0x3400 || rules[i]>=0xa000) {
880 fprintf(stdout, "\\u%04X", rules[i]);
881 line+=6;
882 }
883 if(line>72) {
884 fprintf(stdout, "\n");
885 line = 0;
886 }
887 }
888
889 log_verbose("\n");
890
891 }
892 */
893
894 static uint32_t testSwitch(tst_strcoll* func, void *collator, int opts, uint32_t strength, const UChar *first, const UChar *second, const char* msg, UBool error) {
895 uint32_t diffs = 0;
896 UCollationResult realResult;
897 uint32_t realStrength;
898
899 uint32_t sLen = u_strlen(first);
900 uint32_t tLen = u_strlen(second);
901
902 realResult = func(collator, opts, first, sLen, second, tLen);
903 realStrength = probeStrength(func, collator, opts, first, sLen, second, tLen, realResult);
904
905 if(strength == UCOL_IDENTICAL && realResult != UCOL_EQUAL) {
906 logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_EQUAL, strength, error);
907 diffs++;
908 } else if(realResult != UCOL_LESS || realStrength != strength) {
909 logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_LESS, strength, error);
910 diffs++;
911 }
912 return diffs;
913 }
914
915
916 static void testAgainstUCA(UCollator *coll, UCollator *UCA, const char *refName, UBool error, UErrorCode *status) {
917 const UChar *rules = NULL, *current = NULL;
918 int32_t ruleLen = 0;
919 uint32_t strength = 0;
920 uint32_t chOffset = 0; uint32_t chLen = 0;
921 uint32_t exOffset = 0; uint32_t exLen = 0;
922 uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
923 /* uint32_t rExpsLen = 0; */
924 uint32_t firstLen = 0, secondLen = 0;
925 UBool varT = FALSE; UBool top_ = TRUE;
926 uint16_t specs = 0;
927 UBool startOfRules = TRUE;
928 UColTokenParser src;
929 UColOptionSet opts;
930
931 UChar first[256];
932 UChar second[256];
933 UChar *rulesCopy = NULL;
934
935 uint32_t UCAdiff = 0;
936 uint32_t Windiff = 1;
937 UParseError parseError;
938
939 uprv_memset(&src, 0, sizeof(UColTokenParser));
940 src.opts = &opts;
941
942 rules = ucol_getRules(coll, &ruleLen);
943
944 /*printOutRules(rules);*/
945
946 if(U_SUCCESS(*status) && ruleLen > 0) {
947 rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
948 uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
949 src.current = src.source = rulesCopy;
950 src.end = rulesCopy+ruleLen;
951 src.extraCurrent = src.end;
952 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
953 *first = *second = 0;
954
955 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
956 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
957 while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) {
958 strength = src.parsedToken.strength;
959 chOffset = src.parsedToken.charsOffset;
960 chLen = src.parsedToken.charsLen;
961 exOffset = src.parsedToken.extensionOffset;
962 exLen = src.parsedToken.extensionLen;
963 prefixOffset = src.parsedToken.prefixOffset;
964 prefixLen = src.parsedToken.prefixLen;
965 specs = src.parsedToken.flags;
966
967 startOfRules = FALSE;
968 varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
969 top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
970
971 u_strncpy(second,src.source+chOffset, chLen);
972 second[chLen] = 0;
973 secondLen = chLen;
974
975 if(exLen > 0) {
976 u_strncat(first, src.source+exOffset, exLen);
977 first[firstLen+exLen] = 0;
978 firstLen += exLen;
979 }
980
981 if(strength != UCOL_TOK_RESET) {
982 if((*first<0x3400 || *first>=0xa000) && (*second<0x3400 || *second>=0xa000)) {
983 UCAdiff += testSwitch(&ucaTest, (void *)UCA, 0, strength, first, second, refName, error);
984 /*Windiff += testSwitch(&winTest, (void *)lcid, 0, strength, first, second, "Win32");*/
985 }
986 }
987
988
989 firstLen = chLen;
990 u_strcpy(first, second);
991
992 }
993 if(UCAdiff != 0 && Windiff != 0) {
994 log_verbose("\n");
995 }
996 if(UCAdiff == 0) {
997 log_verbose("No immediate difference with %s!\n", refName);
998 }
999 if(Windiff == 0) {
1000 log_verbose("No immediate difference with Win32!\n");
1001 }
1002 uprv_free(src.source);
1003 }
1004 }
1005
1006 /*
1007 * Takes two CEs (lead and continuation) and
1008 * compares them as CEs should be compared:
1009 * primary vs. primary, secondary vs. secondary
1010 * tertiary vs. tertiary
1011 */
1012 static int32_t compareCEs(uint32_t s1, uint32_t s2,
1013 uint32_t t1, uint32_t t2) {
1014 uint32_t s = 0, t = 0;
1015 if(s1 == t1 && s2 == t2) {
1016 return 0;
1017 }
1018 s = (s1 & 0xFFFF0000)|((s2 & 0xFFFF0000)>>16);
1019 t = (t1 & 0xFFFF0000)|((t2 & 0xFFFF0000)>>16);
1020 if(s < t) {
1021 return -1;
1022 } else if(s > t) {
1023 return 1;
1024 } else {
1025 s = (s1 & 0x0000FF00) | (s2 & 0x0000FF00)>>8;
1026 t = (t1 & 0x0000FF00) | (t2 & 0x0000FF00)>>8;
1027 if(s < t) {
1028 return -1;
1029 } else if(s > t) {
1030 return 1;
1031 } else {
1032 s = (s1 & 0x000000FF)<<8 | (s2 & 0x000000FF);
1033 t = (t1 & 0x000000FF)<<8 | (t2 & 0x000000FF);
1034 if(s < t) {
1035 return -1;
1036 } else {
1037 return 1;
1038 }
1039 }
1040 }
1041 }
1042
1043 typedef struct {
1044 uint32_t startCE;
1045 uint32_t startContCE;
1046 uint32_t limitCE;
1047 uint32_t limitContCE;
1048 } indirectBoundaries;
1049
1050 /* these values are used for finding CE values for indirect positioning. */
1051 /* Indirect positioning is a mechanism for allowing resets on symbolic */
1052 /* values. It only works for resets and you cannot tailor indirect names */
1053 /* An indirect name can define either an anchor point or a range. An */
1054 /* anchor point behaves in exactly the same way as a code point in reset */
1055 /* would, except that it cannot be tailored. A range (we currently only */
1056 /* know for the [top] range will explicitly set the upper bound for */
1057 /* generated CEs, thus allowing for better control over how many CEs can */
1058 /* be squeezed between in the range without performance penalty. */
1059 /* In that respect, we use [top] for tailoring of locales that use CJK */
1060 /* characters. Other indirect values are currently a pure convenience, */
1061 /* they can be used to assure that the CEs will be always positioned in */
1062 /* the same place relative to a point with known properties (e.g. first */
1063 /* primary ignorable). */
1064 static indirectBoundaries ucolIndirectBoundaries[15];
1065 static UBool indirectBoundariesSet = FALSE;
1066 static void setIndirectBoundaries(uint32_t indexR, uint32_t *start, uint32_t *end) {
1067 /* Set values for the top - TODO: once we have values for all the indirects, we are going */
1068 /* to initalize here. */
1069 ucolIndirectBoundaries[indexR].startCE = start[0];
1070 ucolIndirectBoundaries[indexR].startContCE = start[1];
1071 if(end) {
1072 ucolIndirectBoundaries[indexR].limitCE = end[0];
1073 ucolIndirectBoundaries[indexR].limitContCE = end[1];
1074 } else {
1075 ucolIndirectBoundaries[indexR].limitCE = 0;
1076 ucolIndirectBoundaries[indexR].limitContCE = 0;
1077 }
1078 }
1079
1080 static void testCEs(UCollator *coll, UErrorCode *status) {
1081 const UChar *rules = NULL, *current = NULL;
1082 int32_t ruleLen = 0;
1083
1084 uint32_t strength = 0;
1085 uint32_t maxStrength = UCOL_IDENTICAL;
1086 uint32_t baseCE, baseContCE, nextCE, nextContCE, currCE, currContCE;
1087 uint32_t lastCE;
1088 uint32_t lastContCE;
1089
1090 int32_t result = 0;
1091 uint32_t chOffset = 0; uint32_t chLen = 0;
1092 uint32_t exOffset = 0; uint32_t exLen = 0;
1093 uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
1094 uint32_t oldOffset = 0;
1095
1096 /* uint32_t rExpsLen = 0; */
1097 /* uint32_t firstLen = 0; */
1098 uint16_t specs = 0;
1099 UBool varT = FALSE; UBool top_ = TRUE;
1100 UBool startOfRules = TRUE;
1101 UBool before = FALSE;
1102 UColTokenParser src;
1103 UColOptionSet opts;
1104 UParseError parseError;
1105 UChar *rulesCopy = NULL;
1106 collIterate *c = uprv_new_collIterate(status);
1107 UCAConstants *consts = NULL;
1108 uint32_t UCOL_RESET_TOP_VALUE, /*UCOL_RESET_TOP_CONT, */
1109 UCOL_NEXT_TOP_VALUE, UCOL_NEXT_TOP_CONT;
1110 const char *colLoc;
1111 UCollator *UCA = ucol_open("root", status);
1112
1113 if (U_FAILURE(*status)) {
1114 log_err("Could not open root collator %s\n", u_errorName(*status));
1115 uprv_delete_collIterate(c);
1116 return;
1117 }
1118
1119 colLoc = ucol_getLocaleByType(coll, ULOC_ACTUAL_LOCALE, status);
1120 if (U_FAILURE(*status)) {
1121 log_err("Could not get collator name: %s\n", u_errorName(*status));
1122 ucol_close(UCA);
1123 uprv_delete_collIterate(c);
1124 return;
1125 }
1126
1127 uprv_memset(&src, 0, sizeof(UColTokenParser));
1128
1129 consts = (UCAConstants *)((uint8_t *)UCA->image + UCA->image->UCAConsts);
1130 UCOL_RESET_TOP_VALUE = consts->UCA_LAST_NON_VARIABLE[0];
1131 /*UCOL_RESET_TOP_CONT = consts->UCA_LAST_NON_VARIABLE[1]; */
1132 UCOL_NEXT_TOP_VALUE = consts->UCA_FIRST_IMPLICIT[0];
1133 UCOL_NEXT_TOP_CONT = consts->UCA_FIRST_IMPLICIT[1];
1134
1135 baseCE=baseContCE=nextCE=nextContCE=currCE=currContCE=lastCE=lastContCE = UCOL_NOT_FOUND;
1136
1137 src.opts = &opts;
1138
1139 rules = ucol_getRules(coll, &ruleLen);
1140
1141 src.invUCA = ucol_initInverseUCA(status);
1142
1143 if(indirectBoundariesSet == FALSE) {
1144 /* UCOL_RESET_TOP_VALUE */
1145 setIndirectBoundaries(0, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
1146 /* UCOL_FIRST_PRIMARY_IGNORABLE */
1147 setIndirectBoundaries(1, consts->UCA_FIRST_PRIMARY_IGNORABLE, 0);
1148 /* UCOL_LAST_PRIMARY_IGNORABLE */
1149 setIndirectBoundaries(2, consts->UCA_LAST_PRIMARY_IGNORABLE, 0);
1150 /* UCOL_FIRST_SECONDARY_IGNORABLE */
1151 setIndirectBoundaries(3, consts->UCA_FIRST_SECONDARY_IGNORABLE, 0);
1152 /* UCOL_LAST_SECONDARY_IGNORABLE */
1153 setIndirectBoundaries(4, consts->UCA_LAST_SECONDARY_IGNORABLE, 0);
1154 /* UCOL_FIRST_TERTIARY_IGNORABLE */
1155 setIndirectBoundaries(5, consts->UCA_FIRST_TERTIARY_IGNORABLE, 0);
1156 /* UCOL_LAST_TERTIARY_IGNORABLE */
1157 setIndirectBoundaries(6, consts->UCA_LAST_TERTIARY_IGNORABLE, 0);
1158 /* UCOL_FIRST_VARIABLE */
1159 setIndirectBoundaries(7, consts->UCA_FIRST_VARIABLE, 0);
1160 /* UCOL_LAST_VARIABLE */
1161 setIndirectBoundaries(8, consts->UCA_LAST_VARIABLE, 0);
1162 /* UCOL_FIRST_NON_VARIABLE */
1163 setIndirectBoundaries(9, consts->UCA_FIRST_NON_VARIABLE, 0);
1164 /* UCOL_LAST_NON_VARIABLE */
1165 setIndirectBoundaries(10, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
1166 /* UCOL_FIRST_IMPLICIT */
1167 setIndirectBoundaries(11, consts->UCA_FIRST_IMPLICIT, 0);
1168 /* UCOL_LAST_IMPLICIT */
1169 setIndirectBoundaries(12, consts->UCA_LAST_IMPLICIT, consts->UCA_FIRST_TRAILING);
1170 /* UCOL_FIRST_TRAILING */
1171 setIndirectBoundaries(13, consts->UCA_FIRST_TRAILING, 0);
1172 /* UCOL_LAST_TRAILING */
1173 setIndirectBoundaries(14, consts->UCA_LAST_TRAILING, 0);
1174 ucolIndirectBoundaries[14].limitCE = (consts->UCA_PRIMARY_SPECIAL_MIN<<24);
1175 indirectBoundariesSet = TRUE;
1176 }
1177
1178
1179 if(U_SUCCESS(*status) && ruleLen > 0) {
1180 rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
1181 uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
1182 src.current = src.source = rulesCopy;
1183 src.end = rulesCopy+ruleLen;
1184 src.extraCurrent = src.end;
1185 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
1186
1187 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
1188 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
1189 while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) {
1190 strength = src.parsedToken.strength;
1191 chOffset = src.parsedToken.charsOffset;
1192 chLen = src.parsedToken.charsLen;
1193 exOffset = src.parsedToken.extensionOffset;
1194 exLen = src.parsedToken.extensionLen;
1195 prefixOffset = src.parsedToken.prefixOffset;
1196 prefixLen = src.parsedToken.prefixLen;
1197 specs = src.parsedToken.flags;
1198
1199 startOfRules = FALSE;
1200 varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
1201 top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
1202
1203 uprv_init_collIterate(coll, src.source+chOffset, chLen, c, status);
1204
1205 currCE = ucol_getNextCE(coll, c, status);
1206 if(currCE == 0 && UCOL_ISTHAIPREVOWEL(*(src.source+chOffset))) {
1207 log_verbose("Thai prevowel detected. Will pick next CE\n");
1208 currCE = ucol_getNextCE(coll, c, status);
1209 }
1210
1211 currContCE = ucol_getNextCE(coll, c, status);
1212 if(!isContinuation(currContCE)) {
1213 currContCE = 0;
1214 }
1215
1216 /* we need to repack CEs here */
1217
1218 if(strength == UCOL_TOK_RESET) {
1219 before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);
1220 if(top_ == TRUE) {
1221 int32_t tokenIndex = src.parsedToken.indirectIndex;
1222
1223 nextCE = baseCE = currCE = ucolIndirectBoundaries[tokenIndex].startCE;
1224 nextContCE = baseContCE = currContCE = ucolIndirectBoundaries[tokenIndex].startContCE;
1225 } else {
1226 nextCE = baseCE = currCE;
1227 nextContCE = baseContCE = currContCE;
1228 }
1229 maxStrength = UCOL_IDENTICAL;
1230 } else {
1231 if(strength < maxStrength) {
1232 maxStrength = strength;
1233 if(baseCE == UCOL_RESET_TOP_VALUE) {
1234 log_verbose("Resetting to [top]\n");
1235 nextCE = UCOL_NEXT_TOP_VALUE;
1236 nextContCE = UCOL_NEXT_TOP_CONT;
1237 } else {
1238 result = ucol_inv_getNextCE(&src, baseCE & 0xFFFFFF3F, baseContCE, &nextCE, &nextContCE, maxStrength);
1239 }
1240 if(result < 0) {
1241 if(ucol_isTailored(coll, *(src.source+oldOffset), status)) {
1242 log_verbose("Reset is tailored codepoint %04X, don't know how to continue, taking next test\n", *(src.source+oldOffset));
1243 return;
1244 } else {
1245 log_err("%s: couldn't find the CE\n", colLoc);
1246 return;
1247 }
1248 }
1249 }
1250
1251 currCE &= 0xFFFFFF3F;
1252 currContCE &= 0xFFFFFFBF;
1253
1254 if(maxStrength == UCOL_IDENTICAL) {
1255 if(baseCE != currCE || baseContCE != currContCE) {
1256 log_err("%s: current CE (initial strength UCOL_EQUAL)\n", colLoc);
1257 }
1258 } else {
1259 if(strength == UCOL_IDENTICAL) {
1260 if(lastCE != currCE || lastContCE != currContCE) {
1261 log_err("%s: current CE (initial strength UCOL_EQUAL)\n", colLoc);
1262 }
1263 } else {
1264 if(compareCEs(currCE, currContCE, nextCE, nextContCE) > 0) {
1265 /*if(currCE > nextCE || (currCE == nextCE && currContCE >= nextContCE)) {*/
1266 log_err("%s: current CE is not less than base CE\n", colLoc);
1267 }
1268 if(!before) {
1269 if(compareCEs(currCE, currContCE, lastCE, lastContCE) < 0) {
1270 /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
1271 log_err("%s: sequence of generated CEs is broken\n", colLoc);
1272 }
1273 } else {
1274 before = FALSE;
1275 if(compareCEs(currCE, currContCE, lastCE, lastContCE) > 0) {
1276 /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
1277 log_err("%s: sequence of generated CEs is broken\n", colLoc);
1278 }
1279 }
1280 }
1281 }
1282
1283 }
1284
1285 oldOffset = chOffset;
1286 lastCE = currCE & 0xFFFFFF3F;
1287 lastContCE = currContCE & 0xFFFFFFBF;
1288 }
1289 uprv_free(src.source);
1290 }
1291 ucol_close(UCA);
1292 uprv_delete_collIterate(c);
1293 }
1294
1295 #if 0
1296 /* these locales are now picked from index RB */
1297 static const char* localesToTest[] = {
1298 "ar", "bg", "ca", "cs", "da",
1299 "el", "en_BE", "en_US_POSIX",
1300 "es", "et", "fi", "fr", "hi",
1301 "hr", "hu", "is", "iw", "ja",
1302 "ko", "lt", "lv", "mk", "mt",
1303 "nb", "nn", "nn_NO", "pl", "ro",
1304 "ru", "sh", "sk", "sl", "sq",
1305 "sr", "sv", "th", "tr", "uk",
1306 "vi", "zh", "zh_TW"
1307 };
1308 #endif
1309
1310 static const char* rulesToTest[] = {
1311 /* Funky fa rule */
1312 "&\\u0622 < \\u0627 << \\u0671 < \\u0621",
1313 /*"& Z < p, P",*/
1314 /* Cui Mins rules */
1315 "&[top]<o,O<p,P<q,Q<'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu<'?'",*/
1316 "&[top]<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
1317 "&[top]<o,O<p,P<q,Q,'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U&'Qu','?'",*/
1318 "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
1319 "&[top]<'?';Qu<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qu",*/
1320 "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/um<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qum;'?'",*/
1321 "&[top]<'?';Qum<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U" /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qum"*/
1322 };
1323
1324
1325 static void TestCollations(void) {
1326 int32_t noOfLoc = uloc_countAvailable();
1327 int32_t i = 0, j = 0;
1328
1329 UErrorCode status = U_ZERO_ERROR;
1330 char cName[256];
1331 UChar name[256];
1332 int32_t nameSize;
1333
1334
1335 const char *locName = NULL;
1336 UCollator *coll = NULL;
1337 UCollator *UCA = ucol_open("", &status);
1338 UColAttributeValue oldStrength = ucol_getAttribute(UCA, UCOL_STRENGTH, &status);
1339 if (U_FAILURE(status)) {
1340 log_err_status(status, "Could not open UCA collator %s\n", u_errorName(status));
1341 return;
1342 }
1343 ucol_setAttribute(UCA, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
1344
1345 for(i = 0; i<noOfLoc; i++) {
1346 status = U_ZERO_ERROR;
1347 locName = uloc_getAvailable(i);
1348 if(uprv_strcmp("ja", locName) == 0) {
1349 log_verbose("Don't know how to test prefixes\n");
1350 continue;
1351 }
1352 if(hasCollationElements(locName)) {
1353 nameSize = uloc_getDisplayName(locName, NULL, name, 256, &status);
1354 for(j = 0; j<nameSize; j++) {
1355 cName[j] = (char)name[j];
1356 }
1357 cName[nameSize] = 0;
1358 log_verbose("\nTesting locale %s (%s)\n", locName, cName);
1359 coll = ucol_open(locName, &status);
1360 if(U_SUCCESS(status)) {
1361 testAgainstUCA(coll, UCA, "UCA", FALSE, &status);
1362 ucol_close(coll);
1363 } else {
1364 log_err("Couldn't instantiate collator for locale %s, error: %s\n", locName, u_errorName(status));
1365 status = U_ZERO_ERROR;
1366 }
1367 }
1368 }
1369 ucol_setAttribute(UCA, UCOL_STRENGTH, oldStrength, &status);
1370 ucol_close(UCA);
1371 }
1372
1373 static void RamsRulesTest(void) {
1374 UErrorCode status = U_ZERO_ERROR;
1375 int32_t i = 0;
1376 UCollator *coll = NULL;
1377 UChar rule[2048];
1378 uint32_t ruleLen;
1379 int32_t noOfLoc = uloc_countAvailable();
1380 const char *locName = NULL;
1381
1382 log_verbose("RamsRulesTest\n");
1383
1384 if (uprv_strcmp("km", uloc_getDefault())==0 || uprv_strcmp("km_KH", uloc_getDefault())==0) {
1385 /* This test will fail if the default locale is "km" or "km_KH". Enable after trac#6040. */
1386 return;
1387 }
1388
1389 for(i = 0; i<noOfLoc; i++) {
1390 locName = uloc_getAvailable(i);
1391 if(hasCollationElements(locName)) {
1392 if (uprv_strcmp("ja", locName)==0) {
1393 log_verbose("Don't know how to test Japanese because of prefixes\n");
1394 continue;
1395 }
1396 if (uprv_strcmp("de__PHONEBOOK", locName)==0) {
1397 log_verbose("Don't know how to test Phonebook because the reset is on an expanding character\n");
1398 continue;
1399 }
1400 if (uprv_strcmp("bn", locName)==0 ||
1401 uprv_strcmp("en_US_POSIX", locName)==0 ||
1402 uprv_strcmp("km", locName)==0 ||
1403 uprv_strcmp("km_KH", locName)==0 ||
1404 uprv_strcmp("my", locName)==0 ||
1405 uprv_strcmp("si", locName)==0 ||
1406 uprv_strcmp("si_LK", locName)==0 ||
1407 uprv_strcmp("zh", locName)==0 ||
1408 uprv_strcmp("zh_Hant", locName)==0
1409 ) {
1410 log_verbose("Don't know how to test %s. "
1411 "TODO: Fix ticket #6040 and reenable RamsRulesTest for this locale.\n", locName);
1412 continue;
1413 }
1414 log_verbose("Testing locale %s\n", locName);
1415 status = U_ZERO_ERROR;
1416 coll = ucol_open(locName, &status);
1417 if(U_SUCCESS(status)) {
1418 if((status != U_USING_DEFAULT_WARNING) && (status != U_USING_FALLBACK_WARNING)) {
1419 if(coll->image->jamoSpecial == TRUE) {
1420 log_err("%s has special JAMOs\n", locName);
1421 }
1422 ucol_setAttribute(coll, UCOL_CASE_FIRST, UCOL_OFF, &status);
1423 testCollator(coll, &status);
1424 testCEs(coll, &status);
1425 } else {
1426 log_verbose("Skipping %s: %s\n", locName, u_errorName(status));
1427 }
1428 ucol_close(coll);
1429 } else {
1430 log_err("Could not open %s: %s\n", locName, u_errorName(status));
1431 }
1432 }
1433 }
1434
1435 for(i = 0; i<sizeof(rulesToTest)/sizeof(rulesToTest[0]); i++) {
1436 log_verbose("Testing rule: %s\n", rulesToTest[i]);
1437 ruleLen = u_unescape(rulesToTest[i], rule, 2048);
1438 status = U_ZERO_ERROR;
1439 coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
1440 if(U_SUCCESS(status)) {
1441 testCollator(coll, &status);
1442 testCEs(coll, &status);
1443 ucol_close(coll);
1444 } else {
1445 log_err_status(status, "Could not test rule: %s: '%s'\n", u_errorName(status), rulesToTest[i]);
1446 }
1447 }
1448
1449 }
1450
1451 static void IsTailoredTest(void) {
1452 UErrorCode status = U_ZERO_ERROR;
1453 uint32_t i = 0;
1454 UCollator *coll = NULL;
1455 UChar rule[2048];
1456 UChar tailored[2048];
1457 UChar notTailored[2048];
1458 uint32_t ruleLen, tailoredLen, notTailoredLen;
1459
1460 log_verbose("IsTailoredTest\n");
1461
1462 u_uastrcpy(rule, "&Z < A, B, C;c < d");
1463 ruleLen = u_strlen(rule);
1464
1465 u_uastrcpy(tailored, "ABCcd");
1466 tailoredLen = u_strlen(tailored);
1467
1468 u_uastrcpy(notTailored, "ZabD");
1469 notTailoredLen = u_strlen(notTailored);
1470
1471 coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
1472 if(U_SUCCESS(status)) {
1473 for(i = 0; i<tailoredLen; i++) {
1474 if(!ucol_isTailored(coll, tailored[i], &status)) {
1475 log_err("%i: %04X should be tailored - it is reported as not\n", i, tailored[i]);
1476 }
1477 }
1478 for(i = 0; i<notTailoredLen; i++) {
1479 if(ucol_isTailored(coll, notTailored[i], &status)) {
1480 log_err("%i: %04X should not be tailored - it is reported as it is\n", i, notTailored[i]);
1481 }
1482 }
1483 ucol_close(coll);
1484 }
1485 else {
1486 log_err_status(status, "Can't tailor rules\n");
1487 }
1488 /* Code coverage */
1489 status = U_ZERO_ERROR;
1490 coll = ucol_open("ja", &status);
1491 if(!ucol_isTailored(coll, 0x4E9C, &status)) {
1492 log_err_status(status, "0x4E9C should be tailored - it is reported as not\n");
1493 }
1494 ucol_close(coll);
1495 }
1496
1497
1498 const static char chTest[][20] = {
1499 "c",
1500 "C",
1501 "ca", "cb", "cx", "cy", "CZ",
1502 "c\\u030C", "C\\u030C",
1503 "h",
1504 "H",
1505 "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY",
1506 "ch", "cH", "Ch", "CH",
1507 "cha", "charly", "che", "chh", "chch", "chr",
1508 "i", "I", "iarly",
1509 "r", "R",
1510 "r\\u030C", "R\\u030C",
1511 "s",
1512 "S",
1513 "s\\u030C", "S\\u030C",
1514 "z", "Z",
1515 "z\\u030C", "Z\\u030C"
1516 };
1517
1518 static void TestChMove(void) {
1519 UChar t1[256] = {0};
1520 UChar t2[256] = {0};
1521
1522 uint32_t i = 0, j = 0;
1523 uint32_t size = 0;
1524 UErrorCode status = U_ZERO_ERROR;
1525
1526 UCollator *coll = ucol_open("cs", &status);
1527
1528 if(U_SUCCESS(status)) {
1529 size = sizeof(chTest)/sizeof(chTest[0]);
1530 for(i = 0; i < size-1; i++) {
1531 for(j = i+1; j < size; j++) {
1532 u_unescape(chTest[i], t1, 256);
1533 u_unescape(chTest[j], t2, 256);
1534 doTest(coll, t1, t2, UCOL_LESS);
1535 }
1536 }
1537 }
1538 else {
1539 log_data_err("Can't open collator");
1540 }
1541 ucol_close(coll);
1542 }
1543
1544
1545
1546
1547 const static char impTest[][20] = {
1548 "\\u4e00",
1549 "a",
1550 "A",
1551 "b",
1552 "B",
1553 "\\u4e01"
1554 };
1555
1556
1557 static void TestImplicitTailoring(void) {
1558 static const struct {
1559 const char *rules;
1560 const char *data[10];
1561 const uint32_t len;
1562 } tests[] = {
1563 { "&[before 1]\\u4e00 < b < c &[before 1]\\u4e00 < d < e", { "d", "e", "b", "c", "\\u4e00"}, 5 },
1564 { "&\\u4e00 < a <<< A < b <<< B", { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 },
1565 { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3},
1566 { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3}
1567 };
1568
1569 int32_t i = 0;
1570
1571 for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
1572 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
1573 }
1574
1575 /*
1576 UChar t1[256] = {0};
1577 UChar t2[256] = {0};
1578
1579 const char *rule = "&\\u4e00 < a <<< A < b <<< B";
1580
1581 uint32_t i = 0, j = 0;
1582 uint32_t size = 0;
1583 uint32_t ruleLen = 0;
1584 UErrorCode status = U_ZERO_ERROR;
1585 UCollator *coll = NULL;
1586 ruleLen = u_unescape(rule, t1, 256);
1587
1588 coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
1589
1590 if(U_SUCCESS(status)) {
1591 size = sizeof(impTest)/sizeof(impTest[0]);
1592 for(i = 0; i < size-1; i++) {
1593 for(j = i+1; j < size; j++) {
1594 u_unescape(impTest[i], t1, 256);
1595 u_unescape(impTest[j], t2, 256);
1596 doTest(coll, t1, t2, UCOL_LESS);
1597 }
1598 }
1599 }
1600 else {
1601 log_err("Can't open collator");
1602 }
1603 ucol_close(coll);
1604 */
1605 }
1606
1607 static void TestFCDProblem(void) {
1608 UChar t1[256] = {0};
1609 UChar t2[256] = {0};
1610
1611 const char *s1 = "\\u0430\\u0306\\u0325";
1612 const char *s2 = "\\u04D1\\u0325";
1613
1614 UErrorCode status = U_ZERO_ERROR;
1615 UCollator *coll = ucol_open("", &status);
1616 u_unescape(s1, t1, 256);
1617 u_unescape(s2, t2, 256);
1618
1619 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
1620 doTest(coll, t1, t2, UCOL_EQUAL);
1621
1622 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
1623 doTest(coll, t1, t2, UCOL_EQUAL);
1624
1625 ucol_close(coll);
1626 }
1627
1628 /*
1629 The largest normalization form is 18 for NFKC/NFKD, 4 for NFD and 3 for NFC
1630 We're only using NFC/NFD in this test.
1631 */
1632 #define NORM_BUFFER_TEST_LEN 18
1633 typedef struct {
1634 UChar32 u;
1635 UChar NFC[NORM_BUFFER_TEST_LEN];
1636 UChar NFD[NORM_BUFFER_TEST_LEN];
1637 } tester;
1638
1639 static void TestComposeDecompose(void) {
1640 /* [[:NFD_Inert=false:][:NFC_Inert=false:]] */
1641 static const UChar UNICODESET_STR[] = {
1642 0x5B,0x5B,0x3A,0x4E,0x46,0x44,0x5F,0x49,0x6E,0x65,0x72,0x74,0x3D,0x66,0x61,
1643 0x6C,0x73,0x65,0x3A,0x5D,0x5B,0x3A,0x4E,0x46,0x43,0x5F,0x49,0x6E,0x65,0x72,
1644 0x74,0x3D,0x66,0x61,0x6C,0x73,0x65,0x3A,0x5D,0x5D,0
1645 };
1646 int32_t noOfLoc;
1647 int32_t i = 0, j = 0;
1648
1649 UErrorCode status = U_ZERO_ERROR;
1650 const char *locName = NULL;
1651 uint32_t nfcSize;
1652 uint32_t nfdSize;
1653 tester **t;
1654 uint32_t noCases = 0;
1655 UCollator *coll = NULL;
1656 UChar32 u = 0;
1657 UChar comp[NORM_BUFFER_TEST_LEN];
1658 uint32_t len = 0;
1659 UCollationElements *iter;
1660 USet *charsToTest = uset_openPattern(UNICODESET_STR, -1, &status);
1661 int32_t charsToTestSize;
1662
1663 noOfLoc = uloc_countAvailable();
1664
1665 coll = ucol_open("", &status);
1666 if (U_FAILURE(status)) {
1667 log_data_err("Error opening collator -> %s (Are you missing data?)\n", u_errorName(status));
1668 return;
1669 }
1670 charsToTestSize = uset_size(charsToTest);
1671 if (charsToTestSize <= 0) {
1672 log_err("Set was zero. Missing data?\n");
1673 return;
1674 }
1675 t = (tester **)malloc(charsToTestSize * sizeof(tester *));
1676 t[0] = (tester *)malloc(sizeof(tester));
1677 log_verbose("Testing UCA extensively for %d characters\n", charsToTestSize);
1678
1679 for(u = 0; u < charsToTestSize; u++) {
1680 UChar32 ch = uset_charAt(charsToTest, u);
1681 len = 0;
1682 U16_APPEND_UNSAFE(comp, len, ch);
1683 nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
1684 nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
1685
1686 if(nfcSize != nfdSize || (uprv_memcmp(t[noCases]->NFC, t[noCases]->NFD, nfcSize * sizeof(UChar)) != 0)
1687 || (len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0))) {
1688 t[noCases]->u = ch;
1689 if(len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0)) {
1690 u_strncpy(t[noCases]->NFC, comp, len);
1691 t[noCases]->NFC[len] = 0;
1692 }
1693 noCases++;
1694 t[noCases] = (tester *)malloc(sizeof(tester));
1695 uprv_memset(t[noCases], 0, sizeof(tester));
1696 }
1697 }
1698 log_verbose("Testing %d/%d of possible test cases\n", noCases, charsToTestSize);
1699 uset_close(charsToTest);
1700 charsToTest = NULL;
1701
1702 for(u=0; u<(UChar32)noCases; u++) {
1703 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
1704 log_err("Failure: codePoint %05X fails TestComposeDecompose in the UCA\n", t[u]->u);
1705 doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
1706 }
1707 }
1708 /*
1709 for(u = 0; u < charsToTestSize; u++) {
1710 if(!(u&0xFFFF)) {
1711 log_verbose("%08X ", u);
1712 }
1713 uprv_memset(t[noCases], 0, sizeof(tester));
1714 t[noCases]->u = u;
1715 len = 0;
1716 U16_APPEND_UNSAFE(comp, len, u);
1717 comp[len] = 0;
1718 nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
1719 nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
1720 doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL);
1721 doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL);
1722 }
1723 */
1724
1725 ucol_close(coll);
1726
1727 log_verbose("Testing locales, number of cases = %i\n", noCases);
1728 for(i = 0; i<noOfLoc; i++) {
1729 status = U_ZERO_ERROR;
1730 locName = uloc_getAvailable(i);
1731 if(hasCollationElements(locName)) {
1732 char cName[256];
1733 UChar name[256];
1734 int32_t nameSize = uloc_getDisplayName(locName, NULL, name, sizeof(cName), &status);
1735
1736 for(j = 0; j<nameSize; j++) {
1737 cName[j] = (char)name[j];
1738 }
1739 cName[nameSize] = 0;
1740 log_verbose("\nTesting locale %s (%s)\n", locName, cName);
1741
1742 coll = ucol_open(locName, &status);
1743 ucol_setStrength(coll, UCOL_IDENTICAL);
1744 iter = ucol_openElements(coll, t[u]->NFD, u_strlen(t[u]->NFD), &status);
1745
1746 for(u=0; u<(UChar32)noCases; u++) {
1747 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
1748 log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t[u]->u, cName);
1749 doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
1750 log_verbose("Testing NFC\n");
1751 ucol_setText(iter, t[u]->NFC, u_strlen(t[u]->NFC), &status);
1752 backAndForth(iter);
1753 log_verbose("Testing NFD\n");
1754 ucol_setText(iter, t[u]->NFD, u_strlen(t[u]->NFD), &status);
1755 backAndForth(iter);
1756 }
1757 }
1758 ucol_closeElements(iter);
1759 ucol_close(coll);
1760 }
1761 }
1762 for(u = 0; u <= (UChar32)noCases; u++) {
1763 free(t[u]);
1764 }
1765 free(t);
1766 }
1767
1768 static void TestEmptyRule(void) {
1769 UErrorCode status = U_ZERO_ERROR;
1770 UChar rulez[] = { 0 };
1771 UCollator *coll = ucol_openRules(rulez, 0, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
1772
1773 ucol_close(coll);
1774 }
1775
1776 static void TestUCARules(void) {
1777 UErrorCode status = U_ZERO_ERROR;
1778 UChar b[256];
1779 UChar *rules = b;
1780 uint32_t ruleLen = 0;
1781 UCollator *UCAfromRules = NULL;
1782 UCollator *coll = ucol_open("", &status);
1783 if(status == U_FILE_ACCESS_ERROR) {
1784 log_data_err("Is your data around?\n");
1785 return;
1786 } else if(U_FAILURE(status)) {
1787 log_err("Error opening collator\n");
1788 return;
1789 }
1790 ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, 256);
1791
1792 log_verbose("TestUCARules\n");
1793 if(ruleLen > 256) {
1794 rules = (UChar *)malloc((ruleLen+1)*sizeof(UChar));
1795 ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, ruleLen);
1796 }
1797 log_verbose("Rules length is %d\n", ruleLen);
1798 UCAfromRules = ucol_openRules(rules, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
1799 if(U_SUCCESS(status)) {
1800 ucol_close(UCAfromRules);
1801 } else {
1802 log_verbose("Unable to create a collator from UCARules!\n");
1803 }
1804 /*
1805 u_unescape(blah, b, 256);
1806 ucol_getSortKey(coll, b, 1, res, 256);
1807 */
1808 ucol_close(coll);
1809 if(rules != b) {
1810 free(rules);
1811 }
1812 }
1813
1814
1815 /* Pinyin tonal order */
1816 /*
1817 A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0)
1818 (w/macron)< (w/acute)< (w/caron)< (w/grave)
1819 E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8)
1820 I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec)
1821 O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2)
1822 U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9)
1823 < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) <
1824 .. (\u00fc)
1825
1826 However, in testing we got the following order:
1827 A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101)
1828 (w/acute)< (w/grave)< (w/caron)< (w/macron)
1829 E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) <
1830 .. (\u0113)
1831 I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b)
1832 O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d)
1833 U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) <
1834 .. (\u01d8)
1835 < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b)
1836 */
1837
1838 static void TestBefore(void) {
1839 const static char *data[] = {
1840 "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A",
1841 "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E",
1842 "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I",
1843 "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O",
1844 "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U",
1845 "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc"
1846 };
1847 genericRulesStarter(
1848 "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0"
1849 "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8"
1850 "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec"
1851 "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2"
1852 "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9"
1853 "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc",
1854 data, sizeof(data)/sizeof(data[0]));
1855 }
1856
1857 #if 0
1858 /* superceded by TestBeforePinyin */
1859 static void TestJ784(void) {
1860 const static char *data[] = {
1861 "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0",
1862 "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8",
1863 "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec",
1864 "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2",
1865 "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9",
1866 "\\u00fc",
1867 "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc"
1868 };
1869 genericLocaleStarter("zh", data, sizeof(data)/sizeof(data[0]));
1870 }
1871 #endif
1872
1873 #if 0
1874 /* superceded by the changes to the lv locale */
1875 static void TestJ831(void) {
1876 const static char *data[] = {
1877 "I",
1878 "i",
1879 "Y",
1880 "y"
1881 };
1882 genericLocaleStarter("lv", data, sizeof(data)/sizeof(data[0]));
1883 }
1884 #endif
1885
1886 static void TestJ815(void) {
1887 const static char *data[] = {
1888 "aa",
1889 "Aa",
1890 "ab",
1891 "Ab",
1892 "ad",
1893 "Ad",
1894 "ae",
1895 "Ae",
1896 "\\u00e6",
1897 "\\u00c6",
1898 "af",
1899 "Af",
1900 "b",
1901 "B"
1902 };
1903 genericLocaleStarter("fr", data, sizeof(data)/sizeof(data[0]));
1904 genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, sizeof(data)/sizeof(data[0]));
1905 }
1906
1907
1908 /*
1909 "& a < b < c < d& r < c", "& a < b < d& r < c",
1910 "& a < b < c < d& c < m", "& a < b < c < m < d",
1911 "& a < b < c < d& a < m", "& a < m < b < c < d",
1912 "& a <<< b << c < d& a < m", "& a <<< b << c < m < d",
1913 "& a < b < c < d& [before 1] c < m", "& a < b < m < c < d",
1914 "& a < b <<< c << d <<< e& [before 3] e <<< x", "& a < b <<< c << d <<< x <<< e",
1915 "& a < b <<< c << d <<< e& [before 2] e <<< x", "& a < b <<< c <<< x << d <<< e",
1916 "& a < b <<< c << d <<< e& [before 1] e <<< x", "& a <<< x < b <<< c << d <<< e",
1917 "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x", "& a < b <<< c << d <<< e <<< f < x < g",
1918 */
1919 static void TestRedundantRules(void) {
1920 int32_t i;
1921
1922 static const struct {
1923 const char *rules;
1924 const char *expectedRules;
1925 const char *testdata[8];
1926 uint32_t testdatalen;
1927 } tests[] = {
1928 /* this test conflicts with positioning of CODAN placeholder */
1929 /*{
1930 "& a <<< b <<< c << d <<< e& [before 1] e <<< x",
1931 "&\\u2089<<<x",
1932 {"\\u2089", "x"}, 2
1933 }, */
1934 /* this test conflicts with the [before x] syntax tightening */
1935 /*{
1936 "& b <<< c <<< d << e <<< f& [before 1] f <<< x",
1937 "&\\u0252<<<x",
1938 {"\\u0252", "x"}, 2
1939 }, */
1940 /* this test conflicts with the [before x] syntax tightening */
1941 /*{
1942 "& a < b <<< c << d <<< e& [before 1] e <<< x",
1943 "& a <<< x < b <<< c << d <<< e",
1944 {"a", "x", "b", "c", "d", "e"}, 6
1945 }, */
1946 {
1947 "& a < b < c < d& [before 1] c < m",
1948 "& a < b < m < c < d",
1949 {"a", "b", "m", "c", "d"}, 5
1950 },
1951 {
1952 "& a < b <<< c << d <<< e& [before 3] e <<< x",
1953 "& a < b <<< c << d <<< x <<< e",
1954 {"a", "b", "c", "d", "x", "e"}, 6
1955 },
1956 /* this test conflicts with the [before x] syntax tightening */
1957 /* {
1958 "& a < b <<< c << d <<< e& [before 2] e <<< x",
1959 "& a < b <<< c <<< x << d <<< e",
1960 {"a", "b", "c", "x", "d", "e"},, 6
1961 }, */
1962 {
1963 "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x",
1964 "& a < b <<< c << d <<< e <<< f < x < g",
1965 {"a", "b", "c", "d", "e", "f", "x", "g"}, 8
1966 },
1967 {
1968 "& a <<< b << c < d& a < m",
1969 "& a <<< b << c < m < d",
1970 {"a", "b", "c", "m", "d"}, 5
1971 },
1972 {
1973 "&a<b<<b\\u0301 &z<b",
1974 "&a<b\\u0301 &z<b",
1975 {"a", "b\\u0301", "z", "b"}, 4
1976 },
1977 {
1978 "&z<m<<<q<<<m",
1979 "&z<q<<<m",
1980 {"z", "q", "m"},3
1981 },
1982 {
1983 "&z<<<m<q<<<m",
1984 "&z<q<<<m",
1985 {"z", "q", "m"}, 3
1986 },
1987 {
1988 "& a < b < c < d& r < c",
1989 "& a < b < d& r < c",
1990 {"a", "b", "d"}, 3
1991 },
1992 {
1993 "& a < b < c < d& r < c",
1994 "& a < b < d& r < c",
1995 {"r", "c"}, 2
1996 },
1997 {
1998 "& a < b < c < d& c < m",
1999 "& a < b < c < m < d",
2000 {"a", "b", "c", "m", "d"}, 5
2001 },
2002 {
2003 "& a < b < c < d& a < m",
2004 "& a < m < b < c < d",
2005 {"a", "m", "b", "c", "d"}, 5
2006 }
2007 };
2008
2009
2010 UCollator *credundant = NULL;
2011 UCollator *cresulting = NULL;
2012 UErrorCode status = U_ZERO_ERROR;
2013 UChar rlz[2048] = { 0 };
2014 uint32_t rlen = 0;
2015
2016 for(i = 0; i<sizeof(tests)/sizeof(tests[0]); i++) {
2017 log_verbose("testing rule %s, expected to be %s\n", tests[i].rules, tests[i].expectedRules);
2018 rlen = u_unescape(tests[i].rules, rlz, 2048);
2019
2020 credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
2021 if(status == U_FILE_ACCESS_ERROR) {
2022 log_data_err("Is your data around?\n");
2023 return;
2024 } else if(U_FAILURE(status)) {
2025 log_err("Error opening collator\n");
2026 return;
2027 }
2028
2029 rlen = u_unescape(tests[i].expectedRules, rlz, 2048);
2030 cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
2031
2032 testAgainstUCA(cresulting, credundant, "expected", TRUE, &status);
2033
2034 ucol_close(credundant);
2035 ucol_close(cresulting);
2036
2037 log_verbose("testing using data\n");
2038
2039 genericRulesStarter(tests[i].rules, tests[i].testdata, tests[i].testdatalen);
2040 }
2041
2042 }
2043
2044 static void TestExpansionSyntax(void) {
2045 int32_t i;
2046
2047 const static char *rules[] = {
2048 "&AE <<< a << b <<< c &d <<< f",
2049 "&AE <<< a <<< b << c << d < e < f <<< g",
2050 "&AE <<< B <<< C / D <<< F"
2051 };
2052
2053 const static char *expectedRules[] = {
2054 "&A <<< a / E << b / E <<< c /E &d <<< f",
2055 "&A <<< a / E <<< b / E << c / E << d / E < e < f <<< g",
2056 "&A <<< B / E <<< C / ED <<< F / E"
2057 };
2058
2059 const static char *testdata[][8] = {
2060 {"AE", "a", "b", "c"},
2061 {"AE", "a", "b", "c", "d", "e", "f", "g"},
2062 {"AE", "B", "C"} /* / ED <<< F / E"},*/
2063 };
2064
2065 const static uint32_t testdatalen[] = {
2066 4,
2067 8,
2068 3
2069 };
2070
2071
2072
2073 UCollator *credundant = NULL;
2074 UCollator *cresulting = NULL;
2075 UErrorCode status = U_ZERO_ERROR;
2076 UChar rlz[2048] = { 0 };
2077 uint32_t rlen = 0;
2078
2079 for(i = 0; i<sizeof(rules)/sizeof(rules[0]); i++) {
2080 log_verbose("testing rule %s, expected to be %s\n", rules[i], expectedRules[i]);
2081 rlen = u_unescape(rules[i], rlz, 2048);
2082
2083 credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2084 if(status == U_FILE_ACCESS_ERROR) {
2085 log_data_err("Is your data around?\n");
2086 return;
2087 } else if(U_FAILURE(status)) {
2088 log_err("Error opening collator\n");
2089 return;
2090 }
2091 rlen = u_unescape(expectedRules[i], rlz, 2048);
2092 cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
2093
2094 /* testAgainstUCA still doesn't handle expansions correctly, so this is not run */
2095 /* as a hard error test, but only in information mode */
2096 testAgainstUCA(cresulting, credundant, "expected", FALSE, &status);
2097
2098 ucol_close(credundant);
2099 ucol_close(cresulting);
2100
2101 log_verbose("testing using data\n");
2102
2103 genericRulesStarter(rules[i], testdata[i], testdatalen[i]);
2104 }
2105 }
2106
2107 static void TestCase(void)
2108 {
2109 const static UChar gRules[MAX_TOKEN_LEN] =
2110 /*" & 0 < 1,\u2461<a,A"*/
2111 { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x0041, 0x0000 };
2112
2113 const static UChar testCase[][MAX_TOKEN_LEN] =
2114 {
2115 /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000},
2116 /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000},
2117 /*2*/ {0x2460 /*circ'1'*/, 0x0061/*'a'*/, 0x0000},
2118 /*3*/ {0x2460 /*circ'1'*/, 0x0041/*'A'*/, 0x0000}
2119 };
2120
2121 const static UCollationResult caseTestResults[][9] =
2122 {
2123 { UCOL_LESS, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
2124 { UCOL_GREATER, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER },
2125 { UCOL_LESS, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_GREATER, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
2126 { UCOL_GREATER, UCOL_LESS, UCOL_GREATER, UCOL_EQUAL, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER }
2127 };
2128
2129 const static UColAttributeValue caseTestAttributes[][2] =
2130 {
2131 { UCOL_LOWER_FIRST, UCOL_OFF},
2132 { UCOL_UPPER_FIRST, UCOL_OFF},
2133 { UCOL_LOWER_FIRST, UCOL_ON},
2134 { UCOL_UPPER_FIRST, UCOL_ON}
2135 };
2136 int32_t i,j,k;
2137 UErrorCode status = U_ZERO_ERROR;
2138 UCollationElements *iter;
2139 UCollator *myCollation;
2140 myCollation = ucol_open("en_US", &status);
2141
2142 if(U_FAILURE(status)){
2143 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
2144 return;
2145 }
2146 log_verbose("Testing different case settings\n");
2147 ucol_setStrength(myCollation, UCOL_TERTIARY);
2148
2149 for(k = 0; k<4; k++) {
2150 ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
2151 ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
2152 log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes[k][0], caseTestAttributes[k][1]);
2153 for (i = 0; i < 3 ; i++) {
2154 for(j = i+1; j<4; j++) {
2155 doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
2156 }
2157 }
2158 }
2159 ucol_close(myCollation);
2160
2161 myCollation = ucol_openRules(gRules, u_strlen(gRules), UCOL_OFF, UCOL_TERTIARY,NULL, &status);
2162 if(U_FAILURE(status)){
2163 log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
2164 return;
2165 }
2166 log_verbose("Testing different case settings with custom rules\n");
2167 ucol_setStrength(myCollation, UCOL_TERTIARY);
2168
2169 for(k = 0; k<4; k++) {
2170 ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
2171 ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
2172 for (i = 0; i < 3 ; i++) {
2173 for(j = i+1; j<4; j++) {
2174 log_verbose("k:%d, i:%d, j:%d\n", k, i, j);
2175 doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
2176 iter=ucol_openElements(myCollation, testCase[i], u_strlen(testCase[i]), &status);
2177 backAndForth(iter);
2178 ucol_closeElements(iter);
2179 iter=ucol_openElements(myCollation, testCase[j], u_strlen(testCase[j]), &status);
2180 backAndForth(iter);
2181 ucol_closeElements(iter);
2182 }
2183 }
2184 }
2185 ucol_close(myCollation);
2186 {
2187 const static char *lowerFirst[] = {
2188 "h",
2189 "H",
2190 "ch",
2191 "Ch",
2192 "CH",
2193 "cha",
2194 "chA",
2195 "Cha",
2196 "ChA",
2197 "CHa",
2198 "CHA",
2199 "i",
2200 "I"
2201 };
2202
2203 const static char *upperFirst[] = {
2204 "H",
2205 "h",
2206 "CH",
2207 "Ch",
2208 "ch",
2209 "CHA",
2210 "CHa",
2211 "ChA",
2212 "Cha",
2213 "chA",
2214 "cha",
2215 "I",
2216 "i"
2217 };
2218 log_verbose("mixed case test\n");
2219 log_verbose("lower first, case level off\n");
2220 genericRulesStarter("[casefirst lower]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
2221 log_verbose("upper first, case level off\n");
2222 genericRulesStarter("[casefirst upper]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
2223 log_verbose("lower first, case level on\n");
2224 genericRulesStarter("[casefirst lower][caselevel on]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
2225 log_verbose("upper first, case level on\n");
2226 genericRulesStarter("[casefirst upper][caselevel on]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
2227 }
2228
2229 }
2230
2231 static void TestIncrementalNormalize(void) {
2232
2233 /*UChar baseA =0x61;*/
2234 UChar baseA =0x41;
2235 /* UChar baseB = 0x42;*/
2236 static const UChar ccMix[] = {0x316, 0x321, 0x300};
2237 /*UChar ccMix[] = {0x61, 0x61, 0x61};*/
2238 /*
2239 0x316 is combining grave accent below, cc=220
2240 0x321 is combining palatalized hook below, cc=202
2241 0x300 is combining grave accent, cc=230
2242 */
2243
2244 #define MAXSLEN 2000
2245 /*int maxSLen = 64000;*/
2246 int sLen;
2247 int i;
2248
2249 UCollator *coll;
2250 UErrorCode status = U_ZERO_ERROR;
2251 UCollationResult result;
2252
2253 int32_t myQ = getTestOption(QUICK_OPTION);
2254
2255 if(getTestOption(QUICK_OPTION) < 0) {
2256 setTestOption(QUICK_OPTION, 1);
2257 }
2258
2259 {
2260 /* Test 1. Run very long unnormalized strings, to force overflow of*/
2261 /* most buffers along the way.*/
2262 UChar strA[MAXSLEN+1];
2263 UChar strB[MAXSLEN+1];
2264
2265 coll = ucol_open("en_US", &status);
2266 if(status == U_FILE_ACCESS_ERROR) {
2267 log_data_err("Is your data around?\n");
2268 return;
2269 } else if(U_FAILURE(status)) {
2270 log_err("Error opening collator\n");
2271 return;
2272 }
2273 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
2274
2275 /*for (sLen = 257; sLen<MAXSLEN; sLen++) {*/
2276 /*for (sLen = 4; sLen<MAXSLEN; sLen++) {*/
2277 /*for (sLen = 1000; sLen<1001; sLen++) {*/
2278 for (sLen = 500; sLen<501; sLen++) {
2279 /*for (sLen = 40000; sLen<65000; sLen+=1000) {*/
2280 strA[0] = baseA;
2281 strB[0] = baseA;
2282 for (i=1; i<=sLen-1; i++) {
2283 strA[i] = ccMix[i % 3];
2284 strB[sLen-i] = ccMix[i % 3];
2285 }
2286 strA[sLen] = 0;
2287 strB[sLen] = 0;
2288
2289 ucol_setStrength(coll, UCOL_TERTIARY); /* Do test with default strength, which runs*/
2290 doTest(coll, strA, strB, UCOL_EQUAL); /* optimized functions in the impl*/
2291 ucol_setStrength(coll, UCOL_IDENTICAL); /* Do again with the slow, general impl.*/
2292 doTest(coll, strA, strB, UCOL_EQUAL);
2293 }
2294 }
2295
2296 setTestOption(QUICK_OPTION, myQ);
2297
2298
2299 /* Test 2: Non-normal sequence in a string that extends to the last character*/
2300 /* of the string. Checks a couple of edge cases.*/
2301
2302 {
2303 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0};
2304 static const UChar strB[] = {0x41, 0xc0, 0x316, 0};
2305 ucol_setStrength(coll, UCOL_TERTIARY);
2306 doTest(coll, strA, strB, UCOL_EQUAL);
2307 }
2308
2309 /* Test 3: Non-normal sequence is terminated by a surrogate pair.*/
2310
2311 {
2312 /* New UCA 3.1.1.
2313 * test below used a code point from Desseret, which sorts differently
2314 * than d800 dc00
2315 */
2316 /*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/
2317 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0};
2318 static const UChar strB[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0};
2319 ucol_setStrength(coll, UCOL_TERTIARY);
2320 doTest(coll, strA, strB, UCOL_GREATER);
2321 }
2322
2323 /* Test 4: Imbedded nulls do not terminate a string when length is specified.*/
2324
2325 {
2326 static const UChar strA[] = {0x41, 0x00, 0x42, 0x00};
2327 static const UChar strB[] = {0x41, 0x00, 0x00, 0x00};
2328 char sortKeyA[50];
2329 char sortKeyAz[50];
2330 char sortKeyB[50];
2331 char sortKeyBz[50];
2332 int r;
2333
2334 /* there used to be -3 here. Hmmmm.... */
2335 /*result = ucol_strcoll(coll, strA, -3, strB, -3);*/
2336 result = ucol_strcoll(coll, strA, 3, strB, 3);
2337 if (result != UCOL_GREATER) {
2338 log_err("ERROR 1 in test 4\n");
2339 }
2340 result = ucol_strcoll(coll, strA, -1, strB, -1);
2341 if (result != UCOL_EQUAL) {
2342 log_err("ERROR 2 in test 4\n");
2343 }
2344
2345 ucol_getSortKey(coll, strA, 3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2346 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2347 ucol_getSortKey(coll, strB, 3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2348 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2349
2350 r = strcmp(sortKeyA, sortKeyAz);
2351 if (r <= 0) {
2352 log_err("Error 3 in test 4\n");
2353 }
2354 r = strcmp(sortKeyA, sortKeyB);
2355 if (r <= 0) {
2356 log_err("Error 4 in test 4\n");
2357 }
2358 r = strcmp(sortKeyAz, sortKeyBz);
2359 if (r != 0) {
2360 log_err("Error 5 in test 4\n");
2361 }
2362
2363 ucol_setStrength(coll, UCOL_IDENTICAL);
2364 ucol_getSortKey(coll, strA, 3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2365 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2366 ucol_getSortKey(coll, strB, 3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2367 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2368
2369 r = strcmp(sortKeyA, sortKeyAz);
2370 if (r <= 0) {
2371 log_err("Error 6 in test 4\n");
2372 }
2373 r = strcmp(sortKeyA, sortKeyB);
2374 if (r <= 0) {
2375 log_err("Error 7 in test 4\n");
2376 }
2377 r = strcmp(sortKeyAz, sortKeyBz);
2378 if (r != 0) {
2379 log_err("Error 8 in test 4\n");
2380 }
2381 ucol_setStrength(coll, UCOL_TERTIARY);
2382 }
2383
2384
2385 /* Test 5: Null characters in non-normal source strings.*/
2386
2387 {
2388 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00};
2389 static const UChar strB[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00};
2390 char sortKeyA[50];
2391 char sortKeyAz[50];
2392 char sortKeyB[50];
2393 char sortKeyBz[50];
2394 int r;
2395
2396 result = ucol_strcoll(coll, strA, 6, strB, 6);
2397 if (result != UCOL_GREATER) {
2398 log_err("ERROR 1 in test 5\n");
2399 }
2400 result = ucol_strcoll(coll, strA, -1, strB, -1);
2401 if (result != UCOL_EQUAL) {
2402 log_err("ERROR 2 in test 5\n");
2403 }
2404
2405 ucol_getSortKey(coll, strA, 6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2406 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2407 ucol_getSortKey(coll, strB, 6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2408 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2409
2410 r = strcmp(sortKeyA, sortKeyAz);
2411 if (r <= 0) {
2412 log_err("Error 3 in test 5\n");
2413 }
2414 r = strcmp(sortKeyA, sortKeyB);
2415 if (r <= 0) {
2416 log_err("Error 4 in test 5\n");
2417 }
2418 r = strcmp(sortKeyAz, sortKeyBz);
2419 if (r != 0) {
2420 log_err("Error 5 in test 5\n");
2421 }
2422
2423 ucol_setStrength(coll, UCOL_IDENTICAL);
2424 ucol_getSortKey(coll, strA, 6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2425 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2426 ucol_getSortKey(coll, strB, 6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2427 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2428
2429 r = strcmp(sortKeyA, sortKeyAz);
2430 if (r <= 0) {
2431 log_err("Error 6 in test 5\n");
2432 }
2433 r = strcmp(sortKeyA, sortKeyB);
2434 if (r <= 0) {
2435 log_err("Error 7 in test 5\n");
2436 }
2437 r = strcmp(sortKeyAz, sortKeyBz);
2438 if (r != 0) {
2439 log_err("Error 8 in test 5\n");
2440 }
2441 ucol_setStrength(coll, UCOL_TERTIARY);
2442 }
2443
2444
2445 /* Test 6: Null character as base of a non-normal combining sequence.*/
2446
2447 {
2448 static const UChar strA[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00};
2449 static const UChar strB[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00};
2450
2451 result = ucol_strcoll(coll, strA, 5, strB, 5);
2452 if (result != UCOL_LESS) {
2453 log_err("Error 1 in test 6\n");
2454 }
2455 result = ucol_strcoll(coll, strA, -1, strB, -1);
2456 if (result != UCOL_EQUAL) {
2457 log_err("Error 2 in test 6\n");
2458 }
2459 }
2460
2461 ucol_close(coll);
2462 }
2463
2464
2465
2466 #if 0
2467 static void TestGetCaseBit(void) {
2468 static const char *caseBitData[] = {
2469 "a", "A", "ch", "Ch", "CH",
2470 "\\uFF9E", "\\u0009"
2471 };
2472
2473 static const uint8_t results[] = {
2474 UCOL_LOWER_CASE, UCOL_UPPER_CASE, UCOL_LOWER_CASE, UCOL_MIXED_CASE, UCOL_UPPER_CASE,
2475 UCOL_UPPER_CASE, UCOL_LOWER_CASE
2476 };
2477
2478 uint32_t i, blen = 0;
2479 UChar b[256] = {0};
2480 UErrorCode status = U_ZERO_ERROR;
2481 UCollator *UCA = ucol_open("", &status);
2482 uint8_t res = 0;
2483
2484 for(i = 0; i<sizeof(results)/sizeof(results[0]); i++) {
2485 blen = u_unescape(caseBitData[i], b, 256);
2486 res = ucol_uprv_getCaseBits(UCA, b, blen, &status);
2487 if(results[i] != res) {
2488 log_err("Expected case = %02X, got %02X for %04X\n", results[i], res, b[0]);
2489 }
2490 }
2491 }
2492 #endif
2493
2494 static void TestHangulTailoring(void) {
2495 static const char *koreanData[] = {
2496 "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53ef", "\\u5475",
2497 "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\\u67b7", "\\u67ef",
2498 "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\\u8857", "\\u8888",
2499 "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5",
2500 "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\\u659D", "\\u698E",
2501 "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\\u8B0C"
2502 };
2503
2504 const char *rules =
2505 "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 <<< \\u53ef <<< \\u5475 "
2506 "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef "
2507 "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 "
2508 "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 "
2509 "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E "
2510 "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C";
2511
2512
2513 UErrorCode status = U_ZERO_ERROR;
2514 UChar rlz[2048] = { 0 };
2515 uint32_t rlen = u_unescape(rules, rlz, 2048);
2516
2517 UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
2518 if(status == U_FILE_ACCESS_ERROR) {
2519 log_data_err("Is your data around?\n");
2520 return;
2521 } else if(U_FAILURE(status)) {
2522 log_err("Error opening collator\n");
2523 return;
2524 }
2525
2526 log_verbose("Using start of korean rules\n");
2527
2528 if(U_SUCCESS(status)) {
2529 genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
2530 } else {
2531 log_err("Unable to open collator with rules %s\n", rules);
2532 }
2533
2534 log_verbose("Setting jamoSpecial to TRUE and testing once more\n");
2535 ((UCATableHeader *)coll->image)->jamoSpecial = TRUE; /* don't try this at home */
2536 genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
2537
2538 ucol_close(coll);
2539
2540 log_verbose("Using ko__LOTUS locale\n");
2541 genericLocaleStarter("ko__LOTUS", koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
2542 }
2543
2544 static void TestCompressOverlap(void) {
2545 UChar secstr[150];
2546 UChar tertstr[150];
2547 UErrorCode status = U_ZERO_ERROR;
2548 UCollator *coll;
2549 char result[200];
2550 uint32_t resultlen;
2551 int count = 0;
2552 char *tempptr;
2553
2554 coll = ucol_open("", &status);
2555
2556 if (U_FAILURE(status)) {
2557 log_err_status(status, "Collator can't be created -> %s\n", u_errorName(status));
2558 return;
2559 }
2560 while (count < 149) {
2561 secstr[count] = 0x0020; /* [06, 05, 05] */
2562 tertstr[count] = 0x0020;
2563 count ++;
2564 }
2565
2566 /* top down compression ----------------------------------- */
2567 secstr[count] = 0x0332; /* [, 87, 05] */
2568 tertstr[count] = 0x3000; /* [06, 05, 07] */
2569
2570 /* no compression secstr should have 150 secondary bytes, tertstr should
2571 have 150 tertiary bytes.
2572 with correct overlapping compression, secstr should have 4 secondary
2573 bytes, tertstr should have > 2 tertiary bytes */
2574 resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250);
2575 tempptr = uprv_strchr(result, 1) + 1;
2576 while (*(tempptr + 1) != 1) {
2577 /* the last secondary collation element is not checked since it is not
2578 part of the compression */
2579 if (*tempptr < UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2) {
2580 log_err("Secondary compression overlapped\n");
2581 }
2582 tempptr ++;
2583 }
2584
2585 /* tertiary top/bottom/common for en_US is similar to the secondary
2586 top/bottom/common */
2587 resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250);
2588 tempptr = uprv_strrchr(result, 1) + 1;
2589 while (*(tempptr + 1) != 0) {
2590 /* the last secondary collation element is not checked since it is not
2591 part of the compression */
2592 if (*tempptr < coll->tertiaryTop - coll->tertiaryTopCount) {
2593 log_err("Tertiary compression overlapped\n");
2594 }
2595 tempptr ++;
2596 }
2597
2598 /* bottom up compression ------------------------------------- */
2599 secstr[count] = 0;
2600 tertstr[count] = 0;
2601 resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250);
2602 tempptr = uprv_strchr(result, 1) + 1;
2603 while (*(tempptr + 1) != 1) {
2604 /* the last secondary collation element is not checked since it is not
2605 part of the compression */
2606 if (*tempptr > UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2) {
2607 log_err("Secondary compression overlapped\n");
2608 }
2609 tempptr ++;
2610 }
2611
2612 /* tertiary top/bottom/common for en_US is similar to the secondary
2613 top/bottom/common */
2614 resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250);
2615 tempptr = uprv_strrchr(result, 1) + 1;
2616 while (*(tempptr + 1) != 0) {
2617 /* the last secondary collation element is not checked since it is not
2618 part of the compression */
2619 if (*tempptr > coll->tertiaryBottom + coll->tertiaryBottomCount) {
2620 log_err("Tertiary compression overlapped\n");
2621 }
2622 tempptr ++;
2623 }
2624
2625 ucol_close(coll);
2626 }
2627
2628 static void TestCyrillicTailoring(void) {
2629 static const char *test[] = {
2630 "\\u0410b",
2631 "\\u0410\\u0306a",
2632 "\\u04d0A"
2633 };
2634
2635 /* Russian overrides contractions, so this test is not valid anymore */
2636 /*genericLocaleStarter("ru", test, 3);*/
2637
2638 genericLocaleStarter("root", test, 3);
2639 genericRulesStarter("&\\u0410 = \\u0410", test, 3);
2640 genericRulesStarter("&Z < \\u0410", test, 3);
2641 genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test, 3);
2642 genericRulesStarter("&Z < \\u0410 < \\u04d0", test, 3);
2643 genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test, 3);
2644 genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test, 3);
2645 }
2646
2647 static void TestSuppressContractions(void) {
2648
2649 static const char *testNoCont2[] = {
2650 "\\u0410\\u0302a",
2651 "\\u0410\\u0306b",
2652 "\\u0410c"
2653 };
2654 static const char *testNoCont[] = {
2655 "a\\u0410",
2656 "A\\u0410\\u0306",
2657 "\\uFF21\\u0410\\u0302"
2658 };
2659
2660 genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont, 3);
2661 genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2, 3);
2662 }
2663
2664 static void TestContraction(void) {
2665 const static char *testrules[] = {
2666 "&A = AB / B",
2667 "&A = A\\u0306/\\u0306",
2668 "&c = ch / h"
2669 };
2670 const static UChar testdata[][2] = {
2671 {0x0041 /* 'A' */, 0x0042 /* 'B' */},
2672 {0x0041 /* 'A' */, 0x0306 /* combining breve */},
2673 {0x0063 /* 'c' */, 0x0068 /* 'h' */}
2674 };
2675 const static UChar testdata2[][2] = {
2676 {0x0063 /* 'c' */, 0x0067 /* 'g' */},
2677 {0x0063 /* 'c' */, 0x0068 /* 'h' */},
2678 {0x0063 /* 'c' */, 0x006C /* 'l' */}
2679 };
2680 const static char *testrules3[] = {
2681 "&z < xyz &xyzw << B",
2682 "&z < xyz &xyz << B / w",
2683 "&z < ch &achm << B",
2684 "&z < ch &a << B / chm",
2685 "&\\ud800\\udc00w << B",
2686 "&\\ud800\\udc00 << B / w",
2687 "&a\\ud800\\udc00m << B",
2688 "&a << B / \\ud800\\udc00m",
2689 };
2690
2691 UErrorCode status = U_ZERO_ERROR;
2692 UCollator *coll;
2693 UChar rule[256] = {0};
2694 uint32_t rlen = 0;
2695 int i;
2696
2697 for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
2698 UCollationElements *iter1;
2699 int j = 0;
2700 log_verbose("Rule %s for testing\n", testrules[i]);
2701 rlen = u_unescape(testrules[i], rule, 32);
2702 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2703 if (U_FAILURE(status)) {
2704 log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
2705 return;
2706 }
2707 iter1 = ucol_openElements(coll, testdata[i], 2, &status);
2708 if (U_FAILURE(status)) {
2709 log_err("Collation iterator creation failed\n");
2710 return;
2711 }
2712 while (j < 2) {
2713 UCollationElements *iter2 = ucol_openElements(coll,
2714 &(testdata[i][j]),
2715 1, &status);
2716 uint32_t ce;
2717 if (U_FAILURE(status)) {
2718 log_err("Collation iterator creation failed\n");
2719 return;
2720 }
2721 ce = ucol_next(iter2, &status);
2722 while (ce != UCOL_NULLORDER) {
2723 if ((uint32_t)ucol_next(iter1, &status) != ce) {
2724 log_err("Collation elements in contraction split does not match\n");
2725 return;
2726 }
2727 ce = ucol_next(iter2, &status);
2728 }
2729 j ++;
2730 ucol_closeElements(iter2);
2731 }
2732 if (ucol_next(iter1, &status) != UCOL_NULLORDER) {
2733 log_err("Collation elements not exhausted\n");
2734 return;
2735 }
2736 ucol_closeElements(iter1);
2737 ucol_close(coll);
2738 }
2739
2740 rlen = u_unescape("& a < b < c < ch < d & c = ch / h", rule, 256);
2741 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2742 if (ucol_strcoll(coll, testdata2[0], 2, testdata2[1], 2) != UCOL_LESS) {
2743 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
2744 testdata2[0][0], testdata2[0][1], testdata2[1][0],
2745 testdata2[1][1]);
2746 return;
2747 }
2748 if (ucol_strcoll(coll, testdata2[1], 2, testdata2[2], 2) != UCOL_LESS) {
2749 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
2750 testdata2[1][0], testdata2[1][1], testdata2[2][0],
2751 testdata2[2][1]);
2752 return;
2753 }
2754 ucol_close(coll);
2755
2756 for (i = 0; i < sizeof(testrules3) / sizeof(testrules3[0]); i += 2) {
2757 UCollator *coll1,
2758 *coll2;
2759 UCollationElements *iter1,
2760 *iter2;
2761 UChar ch = 0x0042 /* 'B' */;
2762 uint32_t ce;
2763 rlen = u_unescape(testrules3[i], rule, 32);
2764 coll1 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2765 rlen = u_unescape(testrules3[i + 1], rule, 32);
2766 coll2 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2767 if (U_FAILURE(status)) {
2768 log_err("Collator creation failed %s\n", testrules[i]);
2769 return;
2770 }
2771 iter1 = ucol_openElements(coll1, &ch, 1, &status);
2772 iter2 = ucol_openElements(coll2, &ch, 1, &status);
2773 if (U_FAILURE(status)) {
2774 log_err("Collation iterator creation failed\n");
2775 return;
2776 }
2777 ce = ucol_next(iter1, &status);
2778 if (U_FAILURE(status)) {
2779 log_err("Retrieving ces failed\n");
2780 return;
2781 }
2782 while (ce != UCOL_NULLORDER) {
2783 if (ce != (uint32_t)ucol_next(iter2, &status)) {
2784 log_err("CEs does not match\n");
2785 return;
2786 }
2787 ce = ucol_next(iter1, &status);
2788 if (U_FAILURE(status)) {
2789 log_err("Retrieving ces failed\n");
2790 return;
2791 }
2792 }
2793 if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
2794 log_err("CEs not exhausted\n");
2795 return;
2796 }
2797 ucol_closeElements(iter1);
2798 ucol_closeElements(iter2);
2799 ucol_close(coll1);
2800 ucol_close(coll2);
2801 }
2802 }
2803
2804 static void TestExpansion(void) {
2805 const static char *testrules[] = {
2806 "&J << K / B & K << M",
2807 "&J << K / B << M"
2808 };
2809 const static UChar testdata[][3] = {
2810 {0x004A /*'J'*/, 0x0041 /*'A'*/, 0},
2811 {0x004D /*'M'*/, 0x0041 /*'A'*/, 0},
2812 {0x004B /*'K'*/, 0x0041 /*'A'*/, 0},
2813 {0x004B /*'K'*/, 0x0043 /*'C'*/, 0},
2814 {0x004A /*'J'*/, 0x0043 /*'C'*/, 0},
2815 {0x004D /*'M'*/, 0x0043 /*'C'*/, 0}
2816 };
2817
2818 UErrorCode status = U_ZERO_ERROR;
2819 UCollator *coll;
2820 UChar rule[256] = {0};
2821 uint32_t rlen = 0;
2822 int i;
2823
2824 for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
2825 int j = 0;
2826 log_verbose("Rule %s for testing\n", testrules[i]);
2827 rlen = u_unescape(testrules[i], rule, 32);
2828 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2829 if (U_FAILURE(status)) {
2830 log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
2831 return;
2832 }
2833
2834 for (j = 0; j < 5; j ++) {
2835 doTest(coll, testdata[j], testdata[j + 1], UCOL_LESS);
2836 }
2837 ucol_close(coll);
2838 }
2839 }
2840
2841 #if 0
2842 /* this test tests the current limitations of the engine */
2843 /* it always fail, so it is disabled by default */
2844 static void TestLimitations(void) {
2845 /* recursive expansions */
2846 {
2847 static const char *rule = "&a=b/c&d=c/e";
2848 static const char *tlimit01[] = {"add","b","adf"};
2849 static const char *tlimit02[] = {"aa","b","af"};
2850 log_verbose("recursive expansions\n");
2851 genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
2852 genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
2853 }
2854 /* contractions spanning expansions */
2855 {
2856 static const char *rule = "&a<<<c/e&g<<<eh";
2857 static const char *tlimit01[] = {"ad","c","af","f","ch","h"};
2858 static const char *tlimit02[] = {"ad","c","ch","af","f","h"};
2859 log_verbose("contractions spanning expansions\n");
2860 genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
2861 genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
2862 }
2863 /* normalization: nulls in contractions */
2864 {
2865 static const char *rule = "&a<<<\\u0000\\u0302";
2866 static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
2867 static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
2868 static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
2869 static const UColAttributeValue valOn[] = { UCOL_ON };
2870 static const UColAttributeValue valOff[] = { UCOL_OFF };
2871
2872 log_verbose("NULL in contractions\n");
2873 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
2874 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
2875 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
2876 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
2877
2878 }
2879 /* normalization: contractions spanning normalization */
2880 {
2881 static const char *rule = "&a<<<\\u0000\\u0302";
2882 static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
2883 static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
2884 static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
2885 static const UColAttributeValue valOn[] = { UCOL_ON };
2886 static const UColAttributeValue valOff[] = { UCOL_OFF };
2887
2888 log_verbose("contractions spanning normalization\n");
2889 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
2890 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
2891 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
2892 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
2893
2894 }
2895 /* variable top: */
2896 {
2897 /*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/
2898 static const char *rule = "&\\u2010<x<[variable top]=z";
2899 /*static const char *rule3 = "&' '<x<[variable top]=z";*/
2900 static const char *tlimit01[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" };
2901 static const char *tlimit02[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"};
2902 static const char *tlimit03[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" };
2903 static const UColAttribute att[] = { UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH };
2904 static const UColAttributeValue valOn[] = { UCOL_SHIFTED, UCOL_QUATERNARY };
2905 static const UColAttributeValue valOff[] = { UCOL_NON_IGNORABLE, UCOL_TERTIARY };
2906
2907 log_verbose("variable top\n");
2908 genericRulesStarterWithOptions(rule, tlimit03, sizeof(tlimit03)/sizeof(tlimit03[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2909 genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2910 genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2911 genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));
2912 genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));
2913
2914 }
2915 /* case level */
2916 {
2917 static const char *rule = "&c<ch<<<cH<<<Ch<<<CH";
2918 static const char *tlimit01[] = {"c","CH","Ch","cH","ch"};
2919 static const char *tlimit02[] = {"c","CH","cH","Ch","ch"};
2920 static const UColAttribute att[] = { UCOL_CASE_FIRST};
2921 static const UColAttributeValue valOn[] = { UCOL_UPPER_FIRST};
2922 /*static const UColAttributeValue valOff[] = { UCOL_OFF};*/
2923 log_verbose("case level\n");
2924 genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2925 genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2926 /*genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
2927 /*genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
2928 }
2929
2930 }
2931 #endif
2932
2933 static void TestBocsuCoverage(void) {
2934 UErrorCode status = U_ZERO_ERROR;
2935 const char *testString = "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u0041";
2936 UChar test[256] = {0};
2937 uint32_t tlen = u_unescape(testString, test, 32);
2938 uint8_t key[256] = {0};
2939 uint32_t klen = 0;
2940
2941 UCollator *coll = ucol_open("", &status);
2942 if(U_SUCCESS(status)) {
2943 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
2944
2945 klen = ucol_getSortKey(coll, test, tlen, key, 256);
2946
2947 ucol_close(coll);
2948 } else {
2949 log_data_err("Couldn't open UCA\n");
2950 }
2951 }
2952
2953 static void TestVariableTopSetting(void) {
2954 UErrorCode status = U_ZERO_ERROR;
2955 const UChar *current = NULL;
2956 uint32_t varTopOriginal = 0, varTop1, varTop2;
2957 UCollator *coll = ucol_open("", &status);
2958 if(U_SUCCESS(status)) {
2959
2960 uint32_t strength = 0;
2961 uint16_t specs = 0;
2962 uint32_t chOffset = 0;
2963 uint32_t chLen = 0;
2964 uint32_t exOffset = 0;
2965 uint32_t exLen = 0;
2966 uint32_t oldChOffset = 0;
2967 uint32_t oldChLen = 0;
2968 uint32_t oldExOffset = 0;
2969 uint32_t oldExLen = 0;
2970 uint32_t prefixOffset = 0;
2971 uint32_t prefixLen = 0;
2972
2973 UBool startOfRules = TRUE;
2974 UColTokenParser src;
2975 UColOptionSet opts;
2976
2977 UChar *rulesCopy = NULL;
2978 uint32_t rulesLen;
2979
2980 UCollationResult result;
2981
2982 UChar first[256] = { 0 };
2983 UChar second[256] = { 0 };
2984 UParseError parseError;
2985 int32_t myQ = getTestOption(QUICK_OPTION);
2986
2987 uprv_memset(&src, 0, sizeof(UColTokenParser));
2988
2989 src.opts = &opts;
2990
2991 if(getTestOption(QUICK_OPTION) <= 0) {
2992 setTestOption(QUICK_OPTION, 1);
2993 }
2994
2995 /* this test will fail when normalization is turned on */
2996 /* therefore we always turn off exhaustive mode for it */
2997 { /* QUICK > 0*/
2998 log_verbose("Slide variable top over UCARules\n");
2999 rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, 0);
3000 rulesCopy = (UChar *)uprv_malloc((rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
3001 rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE);
3002
3003 if(U_SUCCESS(status) && rulesLen > 0) {
3004 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
3005 src.current = src.source = rulesCopy;
3006 src.end = rulesCopy+rulesLen;
3007 src.extraCurrent = src.end;
3008 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
3009
3010 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
3011 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
3012 while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,&status)) != NULL) {
3013 strength = src.parsedToken.strength;
3014 chOffset = src.parsedToken.charsOffset;
3015 chLen = src.parsedToken.charsLen;
3016 exOffset = src.parsedToken.extensionOffset;
3017 exLen = src.parsedToken.extensionLen;
3018 prefixOffset = src.parsedToken.prefixOffset;
3019 prefixLen = src.parsedToken.prefixLen;
3020 specs = src.parsedToken.flags;
3021
3022 startOfRules = FALSE;
3023 {
3024 log_verbose("%04X %d ", *(src.source+chOffset), chLen);
3025 }
3026 if(strength == UCOL_PRIMARY) {
3027 status = U_ZERO_ERROR;
3028 varTopOriginal = ucol_getVariableTop(coll, &status);
3029 varTop1 = ucol_setVariableTop(coll, src.source+oldChOffset, oldChLen, &status);
3030 if(U_FAILURE(status)) {
3031 char buffer[256];
3032 char *buf = buffer;
3033 uint32_t i = 0, j;
3034 uint32_t CE = UCOL_NO_MORE_CES;
3035
3036 /* before we start screaming, let's see if there is a problem with the rules */
3037 UErrorCode collIterateStatus = U_ZERO_ERROR;
3038 collIterate *s = uprv_new_collIterate(&collIterateStatus);
3039 uprv_init_collIterate(coll, src.source+oldChOffset, oldChLen, s, &collIterateStatus);
3040
3041 CE = ucol_getNextCE(coll, s, &status);
3042
3043 for(i = 0; i < oldChLen; i++) {
3044 j = sprintf(buf, "%04X ", *(src.source+oldChOffset+i));
3045 buf += j;
3046 }
3047 if(status == U_PRIMARY_TOO_LONG_ERROR) {
3048 log_verbose("= Expected failure for %s =", buffer);
3049 } else {
3050 if(uprv_collIterateAtEnd(s)) {
3051 log_err("Unexpected failure setting variable top at offset %d. Error %s. Codepoints: %s\n",
3052 oldChOffset, u_errorName(status), buffer);
3053 } else {
3054 log_verbose("There is a goofy contraction in UCA rules that does not appear in the fractional UCA. Codepoints: %s\n",
3055 buffer);
3056 }
3057 }
3058 uprv_delete_collIterate(s);
3059 }
3060 varTop2 = ucol_getVariableTop(coll, &status);
3061 if((varTop1 & 0xFFFF0000) != (varTop2 & 0xFFFF0000)) {
3062 log_err("cannot retrieve set varTop value!\n");
3063 continue;
3064 }
3065
3066 if((varTop1 & 0xFFFF0000) > 0 && oldExLen == 0) {
3067
3068 u_strncpy(first, src.source+oldChOffset, oldChLen);
3069 u_strncpy(first+oldChLen, src.source+chOffset, chLen);
3070 u_strncpy(first+oldChLen+chLen, src.source+oldChOffset, oldChLen);
3071 first[2*oldChLen+chLen] = 0;
3072
3073 if(oldExLen == 0) {
3074 u_strncpy(second, src.source+chOffset, chLen);
3075 second[chLen] = 0;
3076 } else { /* This is skipped momentarily, but should work once UCARules are fully UCA conformant */
3077 u_strncpy(second, src.source+oldExOffset, oldExLen);
3078 u_strncpy(second+oldChLen, src.source+chOffset, chLen);
3079 u_strncpy(second+oldChLen+chLen, src.source+oldExOffset, oldExLen);
3080 second[2*oldExLen+chLen] = 0;
3081 }
3082 result = ucol_strcoll(coll, first, -1, second, -1);
3083 if(result == UCOL_EQUAL) {
3084 doTest(coll, first, second, UCOL_EQUAL);
3085 } else {
3086 log_verbose("Suspicious strcoll result for %04X and %04X\n", *(src.source+oldChOffset), *(src.source+chOffset));
3087 }
3088 }
3089 }
3090 if(strength != UCOL_TOK_RESET) {
3091 oldChOffset = chOffset;
3092 oldChLen = chLen;
3093 oldExOffset = exOffset;
3094 oldExLen = exLen;
3095 }
3096 }
3097 status = U_ZERO_ERROR;
3098 }
3099 else {
3100 log_err("Unexpected failure getting rules %s\n", u_errorName(status));
3101 return;
3102 }
3103 if (U_FAILURE(status)) {
3104 log_err("Error parsing rules %s\n", u_errorName(status));
3105 return;
3106 }
3107 status = U_ZERO_ERROR;
3108 }
3109
3110 setTestOption(QUICK_OPTION, myQ);
3111
3112 log_verbose("Testing setting variable top to contractions\n");
3113 {
3114 UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->contractionUCACombos);
3115 int32_t maxUCAContractionLength = coll->image->contractionUCACombosWidth;
3116 while(*conts != 0) {
3117 /*
3118 * A continuation is NUL-terminated and NUL-padded
3119 * except if it has the maximum length.
3120 */
3121 int32_t contractionLength = maxUCAContractionLength;
3122 while(contractionLength > 0 && conts[contractionLength - 1] == 0) {
3123 --contractionLength;
3124 }
3125 if(*(conts+1)==0) { /* pre-context */
3126 varTop1 = ucol_setVariableTop(coll, conts, 1, &status);
3127 } else {
3128 varTop1 = ucol_setVariableTop(coll, conts, contractionLength, &status);
3129 }
3130 if(U_FAILURE(status)) {
3131 if(status == U_PRIMARY_TOO_LONG_ERROR) {
3132 /* ucol_setVariableTop() is documented to not accept 3-byte primaries,
3133 * therefore it is not an error when it complains about them. */
3134 log_verbose("Couldn't set variable top to a contraction %04X %04X %04X - U_PRIMARY_TOO_LONG_ERROR\n",
3135 *conts, *(conts+1), *(conts+2));
3136 } else {
3137 log_err("Couldn't set variable top to a contraction %04X %04X %04X - %s\n",
3138 *conts, *(conts+1), *(conts+2), u_errorName(status));
3139 }
3140 status = U_ZERO_ERROR;
3141 }
3142 conts+=maxUCAContractionLength;
3143 }
3144
3145 status = U_ZERO_ERROR;
3146
3147 first[0] = 0x0040;
3148 first[1] = 0x0050;
3149 first[2] = 0x0000;
3150
3151 ucol_setVariableTop(coll, first, -1, &status);
3152
3153 if(U_SUCCESS(status)) {
3154 log_err("Invalid contraction succeded in setting variable top!\n");
3155 }
3156
3157 }
3158
3159 log_verbose("Test restoring variable top\n");
3160
3161 status = U_ZERO_ERROR;
3162 ucol_restoreVariableTop(coll, varTopOriginal, &status);
3163 if(varTopOriginal != ucol_getVariableTop(coll, &status)) {
3164 log_err("Couldn't restore old variable top\n");
3165 }
3166
3167 log_verbose("Testing calling with error set\n");
3168
3169 status = U_INTERNAL_PROGRAM_ERROR;
3170 varTop1 = ucol_setVariableTop(coll, first, 1, &status);
3171 varTop2 = ucol_getVariableTop(coll, &status);
3172 ucol_restoreVariableTop(coll, varTop2, &status);
3173 varTop1 = ucol_setVariableTop(NULL, first, 1, &status);
3174 varTop2 = ucol_getVariableTop(NULL, &status);
3175 ucol_restoreVariableTop(NULL, varTop2, &status);
3176 if(status != U_INTERNAL_PROGRAM_ERROR) {
3177 log_err("Bad reaction to passed error!\n");
3178 }
3179 uprv_free(src.source);
3180 ucol_close(coll);
3181 } else {
3182 log_data_err("Couldn't open UCA collator\n");
3183 }
3184
3185 }
3186
3187 static void TestNonChars(void) {
3188 static const char *test[] = {
3189 "\\u0000", /* ignorable */
3190 "\\uFFFE", /* special merge-sort character with minimum non-ignorable weights */
3191 "\\uFDD0", "\\uFDEF",
3192 "\\U0001FFFE", "\\U0001FFFF", /* UCA 6.0: noncharacters are treated like unassigned, */
3193 "\\U0002FFFE", "\\U0002FFFF", /* not like ignorable. */
3194 "\\U0003FFFE", "\\U0003FFFF",
3195 "\\U0004FFFE", "\\U0004FFFF",
3196 "\\U0005FFFE", "\\U0005FFFF",
3197 "\\U0006FFFE", "\\U0006FFFF",
3198 "\\U0007FFFE", "\\U0007FFFF",
3199 "\\U0008FFFE", "\\U0008FFFF",
3200 "\\U0009FFFE", "\\U0009FFFF",
3201 "\\U000AFFFE", "\\U000AFFFF",
3202 "\\U000BFFFE", "\\U000BFFFF",
3203 "\\U000CFFFE", "\\U000CFFFF",
3204 "\\U000DFFFE", "\\U000DFFFF",
3205 "\\U000EFFFE", "\\U000EFFFF",
3206 "\\U000FFFFE", "\\U000FFFFF",
3207 "\\U0010FFFE", "\\U0010FFFF",
3208 "\\uFFFF" /* special character with maximum primary weight */
3209 };
3210 UErrorCode status = U_ZERO_ERROR;
3211 UCollator *coll = ucol_open("en_US", &status);
3212
3213 log_verbose("Test non characters\n");
3214
3215 if(U_SUCCESS(status)) {
3216 genericOrderingTestWithResult(coll, test, 35, UCOL_LESS);
3217 } else {
3218 log_err_status(status, "Unable to open collator\n");
3219 }
3220
3221 ucol_close(coll);
3222 }
3223
3224 static void TestExtremeCompression(void) {
3225 static char *test[4];
3226 int32_t j = 0, i = 0;
3227
3228 for(i = 0; i<4; i++) {
3229 test[i] = (char *)malloc(2048*sizeof(char));
3230 }
3231
3232 for(j = 20; j < 500; j++) {
3233 for(i = 0; i<4; i++) {
3234 uprv_memset(test[i], 'a', (j-1)*sizeof(char));
3235 test[i][j-1] = (char)('a'+i);
3236 test[i][j] = 0;
3237 }
3238 genericLocaleStarter("en_US", (const char **)test, 4);
3239 }
3240
3241
3242 for(i = 0; i<4; i++) {
3243 free(test[i]);
3244 }
3245 }
3246
3247 #if 0
3248 static void TestExtremeCompression(void) {
3249 static char *test[4];
3250 int32_t j = 0, i = 0;
3251 UErrorCode status = U_ZERO_ERROR;
3252 UCollator *coll = ucol_open("en_US", status);
3253 for(i = 0; i<4; i++) {
3254 test[i] = (char *)malloc(2048*sizeof(char));
3255 }
3256 for(j = 10; j < 2048; j++) {
3257 for(i = 0; i<4; i++) {
3258 uprv_memset(test[i], 'a', (j-2)*sizeof(char));
3259 test[i][j-1] = (char)('a'+i);
3260 test[i][j] = 0;
3261 }
3262 }
3263 genericLocaleStarter("en_US", (const char **)test, 4);
3264
3265 for(j = 10; j < 2048; j++) {
3266 for(i = 0; i<1; i++) {
3267 uprv_memset(test[i], 'a', (j-1)*sizeof(char));
3268 test[i][j] = 0;
3269 }
3270 }
3271 for(i = 0; i<4; i++) {
3272 free(test[i]);
3273 }
3274 }
3275 #endif
3276
3277 static void TestSurrogates(void) {
3278 static const char *test[] = {
3279 "z","\\ud900\\udc25", "\\ud805\\udc50",
3280 "\\ud800\\udc00y", "\\ud800\\udc00r",
3281 "\\ud800\\udc00f", "\\ud800\\udc00",
3282 "\\ud800\\udc00c", "\\ud800\\udc00b",
3283 "\\ud800\\udc00fa", "\\ud800\\udc00fb",
3284 "\\ud800\\udc00a",
3285 "c", "b"
3286 };
3287
3288 static const char *rule =
3289 "&z < \\ud900\\udc25 < \\ud805\\udc50"
3290 "< \\ud800\\udc00y < \\ud800\\udc00r"
3291 "< \\ud800\\udc00f << \\ud800\\udc00"
3292 "< \\ud800\\udc00fa << \\ud800\\udc00fb"
3293 "< \\ud800\\udc00a < c < b" ;
3294
3295 genericRulesStarter(rule, test, 14);
3296 }
3297
3298 /* This is a test for prefix implementation, used by JIS X 4061 collation rules */
3299 static void TestPrefix(void) {
3300 uint32_t i;
3301
3302 static const struct {
3303 const char *rules;
3304 const char *data[50];
3305 const uint32_t len;
3306 } tests[] = {
3307 { "&z <<< z|a",
3308 {"zz", "za"}, 2 },
3309
3310 { "&z <<< z| a",
3311 {"zz", "za"}, 2 },
3312 { "[strength I]"
3313 "&a=\\ud900\\udc25"
3314 "&z<<<\\ud900\\udc25|a",
3315 {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 },
3316 };
3317
3318
3319 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3320 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3321 }
3322 }
3323
3324 /* This test uses data suplied by Masashiko Maedera to test the implementation */
3325 /* JIS X 4061 collation order implementation */
3326 static void TestNewJapanese(void) {
3327
3328 static const char * const test1[] = {
3329 "\\u30b7\\u30e3\\u30fc\\u30ec",
3330 "\\u30b7\\u30e3\\u30a4",
3331 "\\u30b7\\u30e4\\u30a3",
3332 "\\u30b7\\u30e3\\u30ec",
3333 "\\u3061\\u3087\\u3053",
3334 "\\u3061\\u3088\\u3053",
3335 "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8",
3336 "\\u3066\\u30fc\\u305f",
3337 "\\u30c6\\u30fc\\u30bf",
3338 "\\u30c6\\u30a7\\u30bf",
3339 "\\u3066\\u3048\\u305f",
3340 "\\u3067\\u30fc\\u305f",
3341 "\\u30c7\\u30fc\\u30bf",
3342 "\\u30c7\\u30a7\\u30bf",
3343 "\\u3067\\u3048\\u305f",
3344 "\\u3066\\u30fc\\u305f\\u30fc",
3345 "\\u30c6\\u30fc\\u30bf\\u30a1",
3346 "\\u30c6\\u30a7\\u30bf\\u30fc",
3347 "\\u3066\\u3047\\u305f\\u3041",
3348 "\\u3066\\u3048\\u305f\\u30fc",
3349 "\\u3067\\u30fc\\u305f\\u30fc",
3350 "\\u30c7\\u30fc\\u30bf\\u30a1",
3351 "\\u3067\\u30a7\\u305f\\u30a1",
3352 "\\u30c7\\u3047\\u30bf\\u3041",
3353 "\\u30c7\\u30a8\\u30bf\\u30a2",
3354 "\\u3072\\u3086",
3355 "\\u3073\\u3085\\u3042",
3356 "\\u3074\\u3085\\u3042",
3357 "\\u3073\\u3085\\u3042\\u30fc",
3358 "\\u30d3\\u30e5\\u30a2\\u30fc",
3359 "\\u3074\\u3085\\u3042\\u30fc",
3360 "\\u30d4\\u30e5\\u30a2\\u30fc",
3361 "\\u30d2\\u30e5\\u30a6",
3362 "\\u30d2\\u30e6\\u30a6",
3363 "\\u30d4\\u30e5\\u30a6\\u30a2",
3364 "\\u3073\\u3085\\u30fc\\u3042\\u30fc",
3365 "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc",
3366 "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc",
3367 "\\u3072\\u3085\\u3093",
3368 "\\u3074\\u3085\\u3093",
3369 "\\u3075\\u30fc\\u308a",
3370 "\\u30d5\\u30fc\\u30ea",
3371 "\\u3075\\u3045\\u308a",
3372 "\\u3075\\u30a5\\u308a",
3373 "\\u3075\\u30a5\\u30ea",
3374 "\\u30d5\\u30a6\\u30ea",
3375 "\\u3076\\u30fc\\u308a",
3376 "\\u30d6\\u30fc\\u30ea",
3377 "\\u3076\\u3045\\u308a",
3378 "\\u30d6\\u30a5\\u308a",
3379 "\\u3077\\u3046\\u308a",
3380 "\\u30d7\\u30a6\\u30ea",
3381 "\\u3075\\u30fc\\u308a\\u30fc",
3382 "\\u30d5\\u30a5\\u30ea\\u30fc",
3383 "\\u3075\\u30a5\\u308a\\u30a3",
3384 "\\u30d5\\u3045\\u308a\\u3043",
3385 "\\u30d5\\u30a6\\u30ea\\u30fc",
3386 "\\u3075\\u3046\\u308a\\u3043",
3387 "\\u30d6\\u30a6\\u30ea\\u30a4",
3388 "\\u3077\\u30fc\\u308a\\u30fc",
3389 "\\u3077\\u30a5\\u308a\\u30a4",
3390 "\\u3077\\u3046\\u308a\\u30fc",
3391 "\\u30d7\\u30a6\\u30ea\\u30a4",
3392 "\\u30d5\\u30fd",
3393 "\\u3075\\u309e",
3394 "\\u3076\\u309d",
3395 "\\u3076\\u3075",
3396 "\\u3076\\u30d5",
3397 "\\u30d6\\u3075",
3398 "\\u30d6\\u30d5",
3399 "\\u3076\\u309e",
3400 "\\u3076\\u3077",
3401 "\\u30d6\\u3077",
3402 "\\u3077\\u309d",
3403 "\\u30d7\\u30fd",
3404 "\\u3077\\u3075",
3405 };
3406
3407 static const char *test2[] = {
3408 "\\u306f\\u309d", /* H\\u309d */
3409 "\\u30cf\\u30fd", /* K\\u30fd */
3410 "\\u306f\\u306f", /* HH */
3411 "\\u306f\\u30cf", /* HK */
3412 "\\u30cf\\u30cf", /* KK */
3413 "\\u306f\\u309e", /* H\\u309e */
3414 "\\u30cf\\u30fe", /* K\\u30fe */
3415 "\\u306f\\u3070", /* HH\\u309b */
3416 "\\u30cf\\u30d0", /* KK\\u309b */
3417 "\\u306f\\u3071", /* HH\\u309c */
3418 "\\u30cf\\u3071", /* KH\\u309c */
3419 "\\u30cf\\u30d1", /* KK\\u309c */
3420 "\\u3070\\u309d", /* H\\u309b\\u309d */
3421 "\\u30d0\\u30fd", /* K\\u309b\\u30fd */
3422 "\\u3070\\u306f", /* H\\u309bH */
3423 "\\u30d0\\u30cf", /* K\\u309bK */
3424 "\\u3070\\u309e", /* H\\u309b\\u309e */
3425 "\\u30d0\\u30fe", /* K\\u309b\\u30fe */
3426 "\\u3070\\u3070", /* H\\u309bH\\u309b */
3427 "\\u30d0\\u3070", /* K\\u309bH\\u309b */
3428 "\\u30d0\\u30d0", /* K\\u309bK\\u309b */
3429 "\\u3070\\u3071", /* H\\u309bH\\u309c */
3430 "\\u30d0\\u30d1", /* K\\u309bK\\u309c */
3431 "\\u3071\\u309d", /* H\\u309c\\u309d */
3432 "\\u30d1\\u30fd", /* K\\u309c\\u30fd */
3433 "\\u3071\\u306f", /* H\\u309cH */
3434 "\\u30d1\\u30cf", /* K\\u309cK */
3435 "\\u3071\\u3070", /* H\\u309cH\\u309b */
3436 "\\u3071\\u30d0", /* H\\u309cK\\u309b */
3437 "\\u30d1\\u30d0", /* K\\u309cK\\u309b */
3438 "\\u3071\\u3071", /* H\\u309cH\\u309c */
3439 "\\u30d1\\u30d1", /* K\\u309cK\\u309c */
3440 };
3441 /*
3442 static const char *test3[] = {
3443 "\\u221er\\u221e",
3444 "\\u221eR#",
3445 "\\u221et\\u221e",
3446 "#r\\u221e",
3447 "#R#",
3448 "#t%",
3449 "#T%",
3450 "8t\\u221e",
3451 "8T\\u221e",
3452 "8t#",
3453 "8T#",
3454 "8t%",
3455 "8T%",
3456 "8t8",
3457 "8T8",
3458 "\\u03c9r\\u221e",
3459 "\\u03a9R%",
3460 "rr\\u221e",
3461 "rR\\u221e",
3462 "Rr\\u221e",
3463 "RR\\u221e",
3464 "RT%",
3465 "rt8",
3466 "tr\\u221e",
3467 "tr8",
3468 "TR8",
3469 "tt8",
3470 "\\u30b7\\u30e3\\u30fc\\u30ec",
3471 };
3472 */
3473 static const UColAttribute att[] = { UCOL_STRENGTH };
3474 static const UColAttributeValue val[] = { UCOL_QUATERNARY };
3475
3476 static const UColAttribute attShifted[] = { UCOL_STRENGTH, UCOL_ALTERNATE_HANDLING};
3477 static const UColAttributeValue valShifted[] = { UCOL_QUATERNARY, UCOL_SHIFTED };
3478
3479 genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), att, val, 1);
3480 genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), att, val, 1);
3481 /*genericLocaleStarter("ja", test3, sizeof(test3)/sizeof(test3[0]));*/
3482 genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), attShifted, valShifted, 2);
3483 genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), attShifted, valShifted, 2);
3484 }
3485
3486 static void TestStrCollIdenticalPrefix(void) {
3487 const char* rule = "&\\ud9b0\\udc70=\\ud9b0\\udc71";
3488 const char* test[] = {
3489 "ab\\ud9b0\\udc70",
3490 "ab\\ud9b0\\udc71"
3491 };
3492 genericRulesStarterWithResult(rule, test, sizeof(test)/sizeof(test[0]), UCOL_EQUAL);
3493 }
3494 /* Contractions should have all their canonically equivalent */
3495 /* strings included */
3496 static void TestContractionClosure(void) {
3497 static const struct {
3498 const char *rules;
3499 const char *data[10];
3500 const uint32_t len;
3501 } tests[] = {
3502 { "&b=\\u00e4\\u00e4",
3503 { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\\u00e4" }, 5},
3504 { "&b=\\u00C5",
3505 { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4},
3506 };
3507 uint32_t i;
3508
3509
3510 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3511 genericRulesStarterWithResult(tests[i].rules, tests[i].data, tests[i].len, UCOL_EQUAL);
3512 }
3513 }
3514
3515 /* This tests also fails*/
3516 static void TestBeforePrefixFailure(void) {
3517 static const struct {
3518 const char *rules;
3519 const char *data[10];
3520 const uint32_t len;
3521 } tests[] = {
3522 { "&g <<< a"
3523 "&[before 3]\\uff41 <<< x",
3524 {"x", "\\uff41"}, 2 },
3525 { "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3526 "&\\u30A8=\\u30A8=\\u3048=\\uff74"
3527 "&[before 3]\\u30a7<<<\\u30a9",
3528 {"\\u30a9", "\\u30a7"}, 2 },
3529 { "&[before 3]\\u30a7<<<\\u30a9"
3530 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3531 "&\\u30A8=\\u30A8=\\u3048=\\uff74",
3532 {"\\u30a9", "\\u30a7"}, 2 },
3533 };
3534 uint32_t i;
3535
3536
3537 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3538 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3539 }
3540
3541 #if 0
3542 const char* rule1 =
3543 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3544 "&\\u30A8=\\u30A8=\\u3048=\\uff74"
3545 "&[before 3]\\u30a7<<<\\u30c6|\\u30fc";
3546 const char* rule2 =
3547 "&[before 3]\\u30a7<<<\\u30c6|\\u30fc"
3548 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3549 "&\\u30A8=\\u30A8=\\u3048=\\uff74";
3550 const char* test[] = {
3551 "\\u30c6\\u30fc\\u30bf",
3552 "\\u30c6\\u30a7\\u30bf",
3553 };
3554 genericRulesStarter(rule1, test, sizeof(test)/sizeof(test[0]));
3555 genericRulesStarter(rule2, test, sizeof(test)/sizeof(test[0]));
3556 /* this piece of code should be in some sort of verbose mode */
3557 /* it gets the collation elements for elements and prints them */
3558 /* This is useful when trying to see whether the problem is */
3559 {
3560 UErrorCode status = U_ZERO_ERROR;
3561 uint32_t i = 0;
3562 UCollationElements *it = NULL;
3563 uint32_t CE;
3564 UChar string[256];
3565 uint32_t uStringLen;
3566 UCollator *coll = NULL;
3567
3568 uStringLen = u_unescape(rule1, string, 256);
3569
3570 coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
3571
3572 /*coll = ucol_open("ja_JP_JIS", &status);*/
3573 it = ucol_openElements(coll, string, 0, &status);
3574
3575 for(i = 0; i < sizeof(test)/sizeof(test[0]); i++) {
3576 log_verbose("%s\n", test[i]);
3577 uStringLen = u_unescape(test[i], string, 256);
3578 ucol_setText(it, string, uStringLen, &status);
3579
3580 while((CE=ucol_next(it, &status)) != UCOL_NULLORDER) {
3581 log_verbose("%08X\n", CE);
3582 }
3583 log_verbose("\n");
3584
3585 }
3586
3587 ucol_closeElements(it);
3588 ucol_close(coll);
3589 }
3590 #endif
3591 }
3592
3593 static void TestPrefixCompose(void) {
3594 const char* rule1 =
3595 "&\\u30a7<<<\\u30ab|\\u30fc=\\u30ac|\\u30fc";
3596 /*
3597 const char* test[] = {
3598 "\\u30c6\\u30fc\\u30bf",
3599 "\\u30c6\\u30a7\\u30bf",
3600 };
3601 */
3602 {
3603 UErrorCode status = U_ZERO_ERROR;
3604 /*uint32_t i = 0;*/
3605 /*UCollationElements *it = NULL;*/
3606 /* uint32_t CE;*/
3607 UChar string[256];
3608 uint32_t uStringLen;
3609 UCollator *coll = NULL;
3610
3611 uStringLen = u_unescape(rule1, string, 256);
3612
3613 coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
3614 ucol_close(coll);
3615 }
3616
3617
3618 }
3619
3620 /*
3621 [last variable] last variable value
3622 [last primary ignorable] largest CE for primary ignorable
3623 [last secondary ignorable] largest CE for secondary ignorable
3624 [last tertiary ignorable] largest CE for tertiary ignorable
3625 [top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8)
3626 */
3627
3628 static void TestRuleOptions(void) {
3629 /* values here are hardcoded and are correct for the current UCA
3630 * when the UCA changes, one might be forced to change these
3631 * values.
3632 */
3633
3634 /*
3635 * These strings contain the last character before [variable top]
3636 * and the first and second characters (by primary weights) after it.
3637 * See FractionalUCA.txt. For example:
3638 [last variable [0C FE, 05, 05]] # U+10A7F OLD SOUTH ARABIAN NUMERIC INDICATOR
3639 [variable top = 0C FE]
3640 [first regular [0D 0A, 05, 05]] # U+0060 GRAVE ACCENT
3641 and
3642 00B4; [0D 0C, 05, 05]
3643 *
3644 * Note: Starting with UCA 6.0, the [variable top] collation element
3645 * is not the weight of any character or string,
3646 * which means that LAST_VARIABLE_CHAR_STRING sorts before [last variable].
3647 */
3648 #define LAST_VARIABLE_CHAR_STRING "\\U00010A7F"
3649 #define FIRST_REGULAR_CHAR_STRING "\\u0060"
3650 #define SECOND_REGULAR_CHAR_STRING "\\u00B4"
3651
3652 /*
3653 * This string has to match the character that has the [last regular] weight
3654 * which changes with each UCA version.
3655 * See the bottom of FractionalUCA.txt which says something like
3656 [last regular [7A FE, 05, 05]] # U+1342E EGYPTIAN HIEROGLYPH AA032
3657 *
3658 * Note: Starting with UCA 6.0, the [last regular] collation element
3659 * is not the weight of any character or string,
3660 * which means that LAST_REGULAR_CHAR_STRING sorts before [last regular].
3661 */
3662 #define LAST_REGULAR_CHAR_STRING "\\U0001342E"
3663
3664 static const struct {
3665 const char *rules;
3666 const char *data[10];
3667 const uint32_t len;
3668 } tests[] = {
3669 /* - all befores here amount to zero */
3670 { "&[before 3][first tertiary ignorable]<<<a",
3671 { "\\u0000", "a"}, 2
3672 }, /* you cannot go before first tertiary ignorable */
3673
3674 { "&[before 3][last tertiary ignorable]<<<a",
3675 { "\\u0000", "a"}, 2
3676 }, /* you cannot go before last tertiary ignorable */
3677
3678 { "&[before 3][first secondary ignorable]<<<a",
3679 { "\\u0000", "a"}, 2
3680 }, /* you cannot go before first secondary ignorable */
3681
3682 { "&[before 3][last secondary ignorable]<<<a",
3683 { "\\u0000", "a"}, 2
3684 }, /* you cannot go before first secondary ignorable */
3685
3686 /* 'normal' befores */
3687
3688 { "&[before 3][first primary ignorable]<<<c<<<b &[first primary ignorable]<a",
3689 { "c", "b", "\\u0332", "a" }, 4
3690 },
3691
3692 /* we don't have a code point that corresponds to
3693 * the last primary ignorable
3694 */
3695 { "&[before 3][last primary ignorable]<<<c<<<b &[last primary ignorable]<a",
3696 { "\\u0332", "\\u20e3", "c", "b", "a" }, 5
3697 },
3698
3699 { "&[before 3][first variable]<<<c<<<b &[first variable]<a",
3700 { "c", "b", "\\u0009", "a", "\\u000a" }, 5
3701 },
3702
3703 { "&[last variable]<a &[before 3][last variable]<<<c<<<b ",
3704 { LAST_VARIABLE_CHAR_STRING, "c", "b", /* [last variable] */ "a", FIRST_REGULAR_CHAR_STRING }, 5
3705 },
3706
3707 { "&[first regular]<a"
3708 "&[before 1][first regular]<b",
3709 { "b", FIRST_REGULAR_CHAR_STRING, "a", SECOND_REGULAR_CHAR_STRING }, 4
3710 },
3711
3712 { "&[before 1][last regular]<b"
3713 "&[last regular]<a",
3714 { LAST_REGULAR_CHAR_STRING, "b", /* [last regular] */ "a", "\\u4e00" }, 4
3715 },
3716
3717 { "&[before 1][first implicit]<b"
3718 "&[first implicit]<a",
3719 { "b", "\\u4e00", "a", "\\u4e01"}, 4
3720 },
3721
3722 { "&[before 1][last implicit]<b"
3723 "&[last implicit]<a",
3724 { "b", "\\U0010FFFD", "a" }, 3
3725 },
3726
3727 { "&[last variable]<z"
3728 "&[last primary ignorable]<x"
3729 "&[last secondary ignorable]<<y"
3730 "&[last tertiary ignorable]<<<w"
3731 "&[top]<u",
3732 {"\\ufffb", "w", "y", "\\u20e3", "x", LAST_VARIABLE_CHAR_STRING, "z", "u"}, 7
3733 }
3734
3735 };
3736 uint32_t i;
3737
3738 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3739 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3740 }
3741 }
3742
3743
3744 static void TestOptimize(void) {
3745 /* this is not really a test - just trying out
3746 * whether copying of UCA contents will fail
3747 * Cannot really test, since the functionality
3748 * remains the same.
3749 */
3750 static const struct {
3751 const char *rules;
3752 const char *data[10];
3753 const uint32_t len;
3754 } tests[] = {
3755 /* - all befores here amount to zero */
3756 { "[optimize [\\uAC00-\\uD7FF]]",
3757 { "a", "b"}, 2}
3758 };
3759 uint32_t i;
3760
3761 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3762 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3763 }
3764 }
3765
3766 /*
3767 cycheng@ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator.
3768 weiv ucol_strcollIter?
3769 cycheng@ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021
3770 weiv these are the input strings?
3771 cycheng@ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on, we have s1 > s2
3772 weiv will check - could be a problem with utf-8 iterator
3773 cycheng@ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2
3774 weiv hmmm
3775 cycheng@ca.ibm.c... note that we have a standalone high surrogate
3776 weiv that doesn't sound right
3777 cycheng@ca.ibm.c... we got the same inconsistent results on AIX and Win2000
3778 weiv so you have two strings, you convert them to utf-8 and to utf-16BE
3779 cycheng@ca.ibm.c... yes
3780 weiv and then do the comparison
3781 cycheng@ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be
3782 weiv utf-16 strings look like a little endian ones in the example you sent me
3783 weiv It could be a bug - let me try to test it out
3784 cycheng@ca.ibm.c... ok
3785 cycheng@ca.ibm.c... we can wait till the conf. call
3786 cycheng@ca.ibm.c... next weke
3787 weiv that would be great
3788 weiv hmmm
3789 weiv I might be wrong
3790 weiv let me play with it some more
3791 cycheng@ca.ibm.c... ok
3792 cycheng@ca.ibm.c... also please check s3 = 0x0e3a0062 and s4 = 0x0e400021. both are in utf-16be
3793 cycheng@ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that's built for db2
3794 cycheng@ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be
3795 weiv ok
3796 cycheng@ca.ibm.c... i ask sherman to send you more inconsistent data
3797 weiv thanks
3798 cycheng@ca.ibm.c... the 4 strings we sent are just samples
3799 */
3800 #if 0
3801 static void Alexis(void) {
3802 UErrorCode status = U_ZERO_ERROR;
3803 UCollator *coll = ucol_open("", &status);
3804
3805
3806 const char utf16be[2][4] = {
3807 { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 },
3808 { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 }
3809 };
3810
3811 const char utf8[2][4] = {
3812 { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 },
3813 { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 },
3814 };
3815
3816 UCharIterator iterU161, iterU162;
3817 UCharIterator iterU81, iterU82;
3818
3819 UCollationResult resU16, resU8;
3820
3821 uiter_setUTF16BE(&iterU161, utf16be[0], 4);
3822 uiter_setUTF16BE(&iterU162, utf16be[1], 4);
3823
3824 uiter_setUTF8(&iterU81, utf8[0], 4);
3825 uiter_setUTF8(&iterU82, utf8[1], 4);
3826
3827 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3828
3829 resU16 = ucol_strcollIter(coll, &iterU161, &iterU162, &status);
3830 resU8 = ucol_strcollIter(coll, &iterU81, &iterU82, &status);
3831
3832
3833 if(resU16 != resU8) {
3834 log_err("different results\n");
3835 }
3836
3837 ucol_close(coll);
3838 }
3839 #endif
3840
3841 #define CMSCOLL_ALEXIS2_BUFFER_SIZE 256
3842 static void Alexis2(void) {
3843 UErrorCode status = U_ZERO_ERROR;
3844 UChar U16Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
3845 char U16BESource[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16BETarget[CMSCOLL_ALEXIS2_BUFFER_SIZE];
3846 char U8Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U8Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
3847 int32_t U16LenS = 0, U16LenT = 0, U16BELenS = 0, U16BELenT = 0, U8LenS = 0, U8LenT = 0;
3848
3849 UConverter *conv = NULL;
3850
3851 UCharIterator U16BEItS, U16BEItT;
3852 UCharIterator U8ItS, U8ItT;
3853
3854 UCollationResult resU16, resU16BE, resU8;
3855
3856 static const char* const pairs[][2] = {
3857 { "\\ud800\\u0021", "\\uFFFC\\u0062"},
3858 { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" },
3859 { "\\u0E40\\u0021", "\\u00A1\\u0021"},
3860 { "\\u0E40\\u0021", "\\uFE57\\u0062"},
3861 { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"},
3862 { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"},
3863 { "\\u0020", "\\u0020\\u0000"}
3864 /*
3865 5F20 (my result here)
3866 5F204E008E3F
3867 5F20 (your result here)
3868 */
3869 };
3870
3871 int32_t i = 0;
3872
3873 UCollator *coll = ucol_open("", &status);
3874 if(status == U_FILE_ACCESS_ERROR) {
3875 log_data_err("Is your data around?\n");
3876 return;
3877 } else if(U_FAILURE(status)) {
3878 log_err("Error opening collator\n");
3879 return;
3880 }
3881 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3882 conv = ucnv_open("UTF16BE", &status);
3883 for(i = 0; i < sizeof(pairs)/sizeof(pairs[0]); i++) {
3884 U16LenS = u_unescape(pairs[i][0], U16Source, CMSCOLL_ALEXIS2_BUFFER_SIZE);
3885 U16LenT = u_unescape(pairs[i][1], U16Target, CMSCOLL_ALEXIS2_BUFFER_SIZE);
3886
3887 resU16 = ucol_strcoll(coll, U16Source, U16LenS, U16Target, U16LenT);
3888
3889 log_verbose("Result of strcoll is %i\n", resU16);
3890
3891 U16BELenS = ucnv_fromUChars(conv, U16BESource, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Source, U16LenS, &status);
3892 U16BELenT = ucnv_fromUChars(conv, U16BETarget, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Target, U16LenT, &status);
3893
3894 /* use the original sizes, as the result from converter is in bytes */
3895 uiter_setUTF16BE(&U16BEItS, U16BESource, U16LenS);
3896 uiter_setUTF16BE(&U16BEItT, U16BETarget, U16LenT);
3897
3898 resU16BE = ucol_strcollIter(coll, &U16BEItS, &U16BEItT, &status);
3899
3900 log_verbose("Result of U16BE is %i\n", resU16BE);
3901
3902 if(resU16 != resU16BE) {
3903 log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", pairs[i][0], pairs[i][1]);
3904 }
3905
3906 u_strToUTF8(U8Source, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenS, U16Source, U16LenS, &status);
3907 u_strToUTF8(U8Target, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenT, U16Target, U16LenT, &status);
3908
3909 uiter_setUTF8(&U8ItS, U8Source, U8LenS);
3910 uiter_setUTF8(&U8ItT, U8Target, U8LenT);
3911
3912 resU8 = ucol_strcollIter(coll, &U8ItS, &U8ItT, &status);
3913
3914 if(resU16 != resU8) {
3915 log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pairs[i][0], pairs[i][1]);
3916 }
3917
3918 }
3919
3920 ucol_close(coll);
3921 ucnv_close(conv);
3922 }
3923
3924 static void TestHebrewUCA(void) {
3925 UErrorCode status = U_ZERO_ERROR;
3926 static const char *first[] = {
3927 "d790d6b8d79cd795d6bcd7a9",
3928 "d790d79cd79ed7a7d799d799d7a1",
3929 "d790d6b4d79ed795d6bcd7a9",
3930 };
3931
3932 char utf8String[3][256];
3933 UChar utf16String[3][256];
3934
3935 int32_t i = 0, j = 0;
3936 int32_t sizeUTF8[3];
3937 int32_t sizeUTF16[3];
3938
3939 UCollator *coll = ucol_open("", &status);
3940 if (U_FAILURE(status)) {
3941 log_err_status(status, "Could not open UCA collation %s\n", u_errorName(status));
3942 return;
3943 }
3944 /*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/
3945
3946 for(i = 0; i < sizeof(first)/sizeof(first[0]); i++) {
3947 sizeUTF8[i] = u_parseUTF8(first[i], -1, utf8String[i], 256, &status);
3948 u_strFromUTF8(utf16String[i], 256, &sizeUTF16[i], utf8String[i], sizeUTF8[i], &status);
3949 log_verbose("%i: ");
3950 for(j = 0; j < sizeUTF16[i]; j++) {
3951 /*log_verbose("\\u%04X", utf16String[i][j]);*/
3952 log_verbose("%04X", utf16String[i][j]);
3953 }
3954 log_verbose("\n");
3955 }
3956 for(i = 0; i < sizeof(first)/sizeof(first[0])-1; i++) {
3957 for(j = i + 1; j < sizeof(first)/sizeof(first[0]); j++) {
3958 doTest(coll, utf16String[i], utf16String[j], UCOL_LESS);
3959 }
3960 }
3961
3962 ucol_close(coll);
3963
3964 }
3965
3966 static void TestPartialSortKeyTermination(void) {
3967 static const char* cases[] = {
3968 "\\u1234\\u1234\\udc00",
3969 "\\udc00\\ud800\\ud800"
3970 };
3971
3972 int32_t i = sizeof(UCollator);
3973
3974 UErrorCode status = U_ZERO_ERROR;
3975
3976 UCollator *coll = ucol_open("", &status);
3977
3978 UCharIterator iter;
3979
3980 UChar currCase[256];
3981 int32_t length = 0;
3982 int32_t pKeyLen = 0;
3983
3984 uint8_t key[256];
3985
3986 for(i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) {
3987 uint32_t state[2] = {0, 0};
3988 length = u_unescape(cases[i], currCase, 256);
3989 uiter_setString(&iter, currCase, length);
3990 pKeyLen = ucol_nextSortKeyPart(coll, &iter, state, key, 256, &status);
3991
3992 log_verbose("Done\n");
3993
3994 }
3995 ucol_close(coll);
3996 }
3997
3998 static void TestSettings(void) {
3999 static const char* cases[] = {
4000 "apple",
4001 "Apple"
4002 };
4003
4004 static const char* locales[] = {
4005 "",
4006 "en"
4007 };
4008
4009 UErrorCode status = U_ZERO_ERROR;
4010
4011 int32_t i = 0, j = 0;
4012
4013 UChar source[256], target[256];
4014 int32_t sLen = 0, tLen = 0;
4015
4016 UCollator *collateObject = NULL;
4017 for(i = 0; i < sizeof(locales)/sizeof(locales[0]); i++) {
4018 collateObject = ucol_open(locales[i], &status);
4019 ucol_setStrength(collateObject, UCOL_PRIMARY);
4020 ucol_setAttribute(collateObject, UCOL_CASE_LEVEL , UCOL_OFF, &status);
4021 for(j = 1; j < sizeof(cases)/sizeof(cases[0]); j++) {
4022 sLen = u_unescape(cases[j-1], source, 256);
4023 source[sLen] = 0;
4024 tLen = u_unescape(cases[j], target, 256);
4025 source[tLen] = 0;
4026 doTest(collateObject, source, target, UCOL_EQUAL);
4027 }
4028 ucol_close(collateObject);
4029 }
4030 }
4031
4032 static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCollator *target) {
4033 UErrorCode status = U_ZERO_ERROR;
4034 int32_t errorNo = 0;
4035 /*const UChar *sourceRules = NULL;*/
4036 /*int32_t sourceRulesLen = 0;*/
4037 UColAttributeValue french = UCOL_OFF;
4038 int32_t cloneSize = 0;
4039
4040 if(!ucol_equals(source, target)) {
4041 log_err("Same collators, different address not equal\n");
4042 errorNo++;
4043 }
4044 ucol_close(target);
4045 if(uprv_strcmp(ucol_getLocaleByType(source, ULOC_REQUESTED_LOCALE, &status), ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &status)) == 0) {
4046 /* currently, safeClone is implemented through getRules/openRules
4047 * so it is the same as the test below - I will comment that test out.
4048 */
4049 /* real thing */
4050 target = ucol_safeClone(source, NULL, &cloneSize, &status);
4051 if(U_FAILURE(status)) {
4052 log_err("Error creating clone\n");
4053 errorNo++;
4054 return errorNo;
4055 }
4056 if(!ucol_equals(source, target)) {
4057 log_err("Collator different from it's clone\n");
4058 errorNo++;
4059 }
4060 french = ucol_getAttribute(source, UCOL_FRENCH_COLLATION, &status);
4061 if(french == UCOL_ON) {
4062 ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);
4063 } else {
4064 ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
4065 }
4066 if(U_FAILURE(status)) {
4067 log_err("Error setting attributes\n");
4068 errorNo++;
4069 return errorNo;
4070 }
4071 if(ucol_equals(source, target)) {
4072 log_err("Collators same even when options changed\n");
4073 errorNo++;
4074 }
4075 ucol_close(target);
4076 /* commented out since safeClone uses exactly the same technique */
4077 /*
4078 sourceRules = ucol_getRules(source, &sourceRulesLen);
4079 target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
4080 if(U_FAILURE(status)) {
4081 log_err("Error instantiating target from rules\n");
4082 errorNo++;
4083 return errorNo;
4084 }
4085 if(!ucol_equals(source, target)) {
4086 log_err("Collator different from collator that was created from the same rules\n");
4087 errorNo++;
4088 }
4089 ucol_close(target);
4090 */
4091 }
4092 return errorNo;
4093 }
4094
4095
4096 static void TestEquals(void) {
4097 /* ucol_equals is not currently a public API. There is a chance that it will become
4098 * something like this, but currently it is only used by RuleBasedCollator::operator==
4099 */
4100 /* test whether the two collators instantiated from the same locale are equal */
4101 UErrorCode status = U_ZERO_ERROR;
4102 UParseError parseError;
4103 int32_t noOfLoc = uloc_countAvailable();
4104 const char *locName = NULL;
4105 UCollator *source = NULL, *target = NULL;
4106 int32_t i = 0;
4107
4108 const char* rules[] = {
4109 "&l < lj <<< Lj <<< LJ",
4110 "&n < nj <<< Nj <<< NJ",
4111 "&ae <<< \\u00e4",
4112 "&AE <<< \\u00c4"
4113 };
4114 /*
4115 const char* badRules[] = {
4116 "&l <<< Lj",
4117 "&n < nj <<< nJ <<< NJ",
4118 "&a <<< \\u00e4",
4119 "&AE <<< \\u00c4 <<< x"
4120 };
4121 */
4122
4123 UChar sourceRules[1024], targetRules[1024];
4124 int32_t sourceRulesSize = 0, targetRulesSize = 0;
4125 int32_t rulesSize = sizeof(rules)/sizeof(rules[0]);
4126
4127 for(i = 0; i < rulesSize; i++) {
4128 sourceRulesSize += u_unescape(rules[i], sourceRules+sourceRulesSize, 1024 - sourceRulesSize);
4129 targetRulesSize += u_unescape(rules[rulesSize-i-1], targetRules+targetRulesSize, 1024 - targetRulesSize);
4130 }
4131
4132 source = ucol_openRules(sourceRules, sourceRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
4133 if(status == U_FILE_ACCESS_ERROR) {
4134 log_data_err("Is your data around?\n");
4135 return;
4136 } else if(U_FAILURE(status)) {
4137 log_err("Error opening collator\n");
4138 return;
4139 }
4140 target = ucol_openRules(targetRules, targetRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
4141 if(!ucol_equals(source, target)) {
4142 log_err("Equivalent collators not equal!\n");
4143 }
4144 ucol_close(source);
4145 ucol_close(target);
4146
4147 source = ucol_open("root", &status);
4148 target = ucol_open("root", &status);
4149 log_verbose("Testing root\n");
4150 if(!ucol_equals(source, source)) {
4151 log_err("Same collator not equal\n");
4152 }
4153 if(TestEqualsForCollator(locName, source, target)) {
4154 log_err("Errors for root\n", locName);
4155 }
4156 ucol_close(source);
4157
4158 for(i = 0; i<noOfLoc; i++) {
4159 status = U_ZERO_ERROR;
4160 locName = uloc_getAvailable(i);
4161 /*if(hasCollationElements(locName)) {*/
4162 log_verbose("Testing equality for locale %s\n", locName);
4163 source = ucol_open(locName, &status);
4164 target = ucol_open(locName, &status);
4165 if (U_FAILURE(status)) {
4166 log_err("Error opening collator for locale %s %s\n", locName, u_errorName(status));
4167 continue;
4168 }
4169 if(TestEqualsForCollator(locName, source, target)) {
4170 log_err("Errors for locale %s\n", locName);
4171 }
4172 ucol_close(source);
4173 /*}*/
4174 }
4175 }
4176
4177 static void TestJ2726(void) {
4178 UChar a[2] = { 0x61, 0x00 }; /*"a"*/
4179 UChar aSpace[3] = { 0x61, 0x20, 0x00 }; /*"a "*/
4180 UChar spaceA[3] = { 0x20, 0x61, 0x00 }; /*" a"*/
4181 UErrorCode status = U_ZERO_ERROR;
4182 UCollator *coll = ucol_open("en", &status);
4183 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
4184 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
4185 doTest(coll, a, aSpace, UCOL_EQUAL);
4186 doTest(coll, aSpace, a, UCOL_EQUAL);
4187 doTest(coll, a, spaceA, UCOL_EQUAL);
4188 doTest(coll, spaceA, a, UCOL_EQUAL);
4189 doTest(coll, spaceA, aSpace, UCOL_EQUAL);
4190 doTest(coll, aSpace, spaceA, UCOL_EQUAL);
4191 ucol_close(coll);
4192 }
4193
4194 static void NullRule(void) {
4195 UChar r[3] = {0};
4196 UErrorCode status = U_ZERO_ERROR;
4197 UCollator *coll = ucol_openRules(r, 1, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
4198 if(U_SUCCESS(status)) {
4199 log_err("This should have been an error!\n");
4200 ucol_close(coll);
4201 } else {
4202 status = U_ZERO_ERROR;
4203 }
4204 coll = ucol_openRules(r, 0, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
4205 if(U_FAILURE(status)) {
4206 log_err_status(status, "Empty rules should have produced a valid collator -> %s\n", u_errorName(status));
4207 } else {
4208 ucol_close(coll);
4209 }
4210 }
4211
4212 /**
4213 * Test for CollationElementIterator previous and next for the whole set of
4214 * unicode characters with normalization on.
4215 */
4216 static void TestNumericCollation(void)
4217 {
4218 UErrorCode status = U_ZERO_ERROR;
4219
4220 const static char *basicTestStrings[]={
4221 "hello1",
4222 "hello2",
4223 "hello2002",
4224 "hello2003",
4225 "hello123456",
4226 "hello1234567",
4227 "hello10000000",
4228 "hello100000000",
4229 "hello1000000000",
4230 "hello10000000000",
4231 };
4232
4233 const static char *preZeroTestStrings[]={
4234 "avery10000",
4235 "avery010000",
4236 "avery0010000",
4237 "avery00010000",
4238 "avery000010000",
4239 "avery0000010000",
4240 "avery00000010000",
4241 "avery000000010000",
4242 };
4243
4244 const static char *thirtyTwoBitNumericStrings[]={
4245 "avery42949672960",
4246 "avery42949672961",
4247 "avery42949672962",
4248 "avery429496729610"
4249 };
4250
4251 const static char *longNumericStrings[]={
4252 /* Some of these sort out of the order that would expected if digits-as-numbers handled arbitrarily-long digit strings.
4253 In fact, a single collation element can represent a maximum of 254 digits as a number. Digit strings longer than that
4254 are treated as multiple collation elements. */
4255 "num9234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123z", /*253digits, num + 9.23E252 + z */
4256 "num10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*254digits, num + 1.00E253 */
4257 "num100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*255digits, num + 1.00E253 + 0, out of numeric order but expected */
4258 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 1.23E253 */
4259 "num123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345", /*255digits, num + 1.23E253 + 5 */
4260 "num1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456", /*256digits, num + 1.23E253 + 56 */
4261 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567", /*257digits, num + 1.23E253 + 567 */
4262 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 1.23E253 + a, out of numeric order but expected */
4263 "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 9.23E253, out of numeric order but expected */
4264 "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 9.23E253 + a, out of numeric order but expected */
4265 };
4266
4267 const static char *supplementaryDigits[] = {
4268 "\\uD835\\uDFCE", /* 0 */
4269 "\\uD835\\uDFCF", /* 1 */
4270 "\\uD835\\uDFD0", /* 2 */
4271 "\\uD835\\uDFD1", /* 3 */
4272 "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */
4273 "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */
4274 "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */
4275 "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */
4276 "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */
4277 "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */
4278 };
4279
4280 const static char *foreignDigits[] = {
4281 "\\u0661",
4282 "\\u0662",
4283 "\\u0663",
4284 "\\u0661\\u0660",
4285 "\\u0661\\u0662",
4286 "\\u0661\\u0663",
4287 "\\u0662\\u0660",
4288 "\\u0662\\u0662",
4289 "\\u0662\\u0663",
4290 "\\u0663\\u0660",
4291 "\\u0663\\u0662",
4292 "\\u0663\\u0663"
4293 };
4294
4295 const static char *evenZeroes[] = {
4296 "2000",
4297 "2001",
4298 "2002",
4299 "2003"
4300 };
4301
4302 UColAttribute att = UCOL_NUMERIC_COLLATION;
4303 UColAttributeValue val = UCOL_ON;
4304
4305 /* Open our collator. */
4306 UCollator* coll = ucol_open("root", &status);
4307 if (U_FAILURE(status)){
4308 log_err_status(status, "ERROR: in using ucol_open() -> %s\n",
4309 myErrorName(status));
4310 return;
4311 }
4312 genericLocaleStarterWithOptions("root", basicTestStrings, sizeof(basicTestStrings)/sizeof(basicTestStrings[0]), &att, &val, 1);
4313 genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings, sizeof(thirtyTwoBitNumericStrings)/sizeof(thirtyTwoBitNumericStrings[0]), &att, &val, 1);
4314 genericLocaleStarterWithOptions("root", longNumericStrings, sizeof(longNumericStrings)/sizeof(longNumericStrings[0]), &att, &val, 1);
4315 genericLocaleStarterWithOptions("en_US", foreignDigits, sizeof(foreignDigits)/sizeof(foreignDigits[0]), &att, &val, 1);
4316 genericLocaleStarterWithOptions("root", supplementaryDigits, sizeof(supplementaryDigits)/sizeof(supplementaryDigits[0]), &att, &val, 1);
4317 genericLocaleStarterWithOptions("root", evenZeroes, sizeof(evenZeroes)/sizeof(evenZeroes[0]), &att, &val, 1);
4318
4319 /* Setting up our collator to do digits. */
4320 ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
4321 if (U_FAILURE(status)){
4322 log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n",
4323 myErrorName(status));
4324 return;
4325 }
4326
4327 /*
4328 Testing that prepended zeroes still yield the correct collation behavior.
4329 We expect that every element in our strings array will be equal.
4330 */
4331 genericOrderingTestWithResult(coll, preZeroTestStrings, sizeof(preZeroTestStrings)/sizeof(preZeroTestStrings[0]), UCOL_EQUAL);
4332
4333 ucol_close(coll);
4334 }
4335
4336 static void TestTibetanConformance(void)
4337 {
4338 const char* test[] = {
4339 "\\u0FB2\\u0591\\u0F71\\u0061",
4340 "\\u0FB2\\u0F71\\u0061"
4341 };
4342
4343 UErrorCode status = U_ZERO_ERROR;
4344 UCollator *coll = ucol_open("", &status);
4345 UChar source[100];
4346 UChar target[100];
4347 int result;
4348 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4349 if (U_SUCCESS(status)) {
4350 u_unescape(test[0], source, 100);
4351 u_unescape(test[1], target, 100);
4352 doTest(coll, source, target, UCOL_EQUAL);
4353 result = ucol_strcoll(coll, source, -1, target, -1);
4354 log_verbose("result %d\n", result);
4355 if (UCOL_EQUAL != result) {
4356 log_err("Tibetan comparison error\n");
4357 }
4358 }
4359 ucol_close(coll);
4360
4361 genericLocaleStarterWithResult("", test, 2, UCOL_EQUAL);
4362 }
4363
4364 static void TestPinyinProblem(void) {
4365 static const char *test[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B50" };
4366 genericLocaleStarter("zh__PINYIN", test, sizeof(test)/sizeof(test[0]));
4367 }
4368
4369 #define TST_UCOL_MAX_INPUT 0x220001
4370 #define topByte 0xFF000000;
4371 #define bottomByte 0xFF;
4372 #define fourBytes 0xFFFFFFFF;
4373
4374
4375 static void showImplicit(UChar32 i) {
4376 if (i >= 0 && i <= TST_UCOL_MAX_INPUT) {
4377 log_verbose("%08X\t%08X\n", i, uprv_uca_getImplicitFromRaw(i));
4378 }
4379 }
4380
4381 static void TestImplicitGeneration(void) {
4382 UErrorCode status = U_ZERO_ERROR;
4383 UChar32 last = 0;
4384 UChar32 current;
4385 UChar32 i = 0, j = 0;
4386 UChar32 roundtrip = 0;
4387 UChar32 lastBottom = 0;
4388 UChar32 currentBottom = 0;
4389 UChar32 lastTop = 0;
4390 UChar32 currentTop = 0;
4391
4392 UCollator *coll = ucol_open("root", &status);
4393 if(U_FAILURE(status)) {
4394 log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
4395 return;
4396 }
4397
4398 uprv_uca_getRawFromImplicit(0xE20303E7);
4399
4400 for (i = 0; i <= TST_UCOL_MAX_INPUT; ++i) {
4401 current = uprv_uca_getImplicitFromRaw(i) & fourBytes;
4402
4403 /* check that it round-trips AND that all intervening ones are illegal*/
4404 roundtrip = uprv_uca_getRawFromImplicit(current);
4405 if (roundtrip != i) {
4406 log_err("No roundtrip %08X\n", i);
4407 }
4408 if (last != 0) {
4409 for (j = last + 1; j < current; ++j) {
4410 roundtrip = uprv_uca_getRawFromImplicit(j);
4411 /* raise an error if it *doesn't* find an error*/
4412 if (roundtrip != -1) {
4413 log_err("Fails to recognize illegal %08X\n", j);
4414 }
4415 }
4416 }
4417 /* now do other consistency checks*/
4418 lastBottom = last & bottomByte;
4419 currentBottom = current & bottomByte;
4420 lastTop = last & topByte;
4421 currentTop = current & topByte;
4422
4423 /* print out some values for spot-checking*/
4424 if (lastTop != currentTop || i == 0x10000 || i == 0x110000) {
4425 showImplicit(i-3);
4426 showImplicit(i-2);
4427 showImplicit(i-1);
4428 showImplicit(i);
4429 showImplicit(i+1);
4430 showImplicit(i+2);
4431 }
4432 last = current;
4433
4434 if(uprv_uca_getCodePointFromRaw(uprv_uca_getRawFromCodePoint(i)) != i) {
4435 log_err("No raw <-> code point roundtrip for 0x%08X\n", i);
4436 }
4437 }
4438 showImplicit(TST_UCOL_MAX_INPUT-2);
4439 showImplicit(TST_UCOL_MAX_INPUT-1);
4440 showImplicit(TST_UCOL_MAX_INPUT);
4441 ucol_close(coll);
4442 }
4443
4444 /**
4445 * Iterate through the given iterator, checking to see that all the strings
4446 * in the expected array are present.
4447 * @param expected array of strings we expect to see, or NULL
4448 * @param expectedCount number of elements of expected, or 0
4449 */
4450 static int32_t checkUEnumeration(const char* msg,
4451 UEnumeration* iter,
4452 const char** expected,
4453 int32_t expectedCount) {
4454 UErrorCode ec = U_ZERO_ERROR;
4455 int32_t i = 0, n, j, bit;
4456 int32_t seenMask = 0;
4457
4458 U_ASSERT(expectedCount >= 0 && expectedCount < 31); /* [sic] 31 not 32 */
4459 n = uenum_count(iter, &ec);
4460 if (!assertSuccess("count", &ec)) return -1;
4461 log_verbose("%s = [", msg);
4462 for (;; ++i) {
4463 const char* s = uenum_next(iter, NULL, &ec);
4464 if (!assertSuccess("snext", &ec) || s == NULL) break;
4465 if (i != 0) log_verbose(",");
4466 log_verbose("%s", s);
4467 /* check expected list */
4468 for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
4469 if ((seenMask&bit) == 0 &&
4470 uprv_strcmp(s, expected[j]) == 0) {
4471 seenMask |= bit;
4472 break;
4473 }
4474 }
4475 }
4476 log_verbose("] (%d)\n", i);
4477 assertTrue("count verified", i==n);
4478 /* did we see all expected strings? */
4479 for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
4480 if ((seenMask&bit)!=0) {
4481 log_verbose("Ok: \"%s\" seen\n", expected[j]);
4482 } else {
4483 log_err("FAIL: \"%s\" not seen\n", expected[j]);
4484 }
4485 }
4486 return n;
4487 }
4488
4489 /**
4490 * Test new API added for separate collation tree.
4491 */
4492 static void TestSeparateTrees(void) {
4493 UErrorCode ec = U_ZERO_ERROR;
4494 UEnumeration *e = NULL;
4495 int32_t n = -1;
4496 UBool isAvailable;
4497 char loc[256];
4498
4499 static const char* AVAIL[] = { "en", "de" };
4500
4501 static const char* KW[] = { "collation" };
4502
4503 static const char* KWVAL[] = { "phonebook", "stroke" };
4504
4505 #if !UCONFIG_NO_SERVICE
4506 e = ucol_openAvailableLocales(&ec);
4507 if (e != NULL) {
4508 assertSuccess("ucol_openAvailableLocales", &ec);
4509 assertTrue("ucol_openAvailableLocales!=0", e!=0);
4510 n = checkUEnumeration("ucol_openAvailableLocales", e, AVAIL, LEN(AVAIL));
4511 /* Don't need to check n because we check list */
4512 uenum_close(e);
4513 } else {
4514 log_data_err("Error calling ucol_openAvailableLocales() -> %s (Are you missing data?)\n", u_errorName(ec));
4515 }
4516 #endif
4517
4518 e = ucol_getKeywords(&ec);
4519 if (e != NULL) {
4520 assertSuccess("ucol_getKeywords", &ec);
4521 assertTrue("ucol_getKeywords!=0", e!=0);
4522 n = checkUEnumeration("ucol_getKeywords", e, KW, LEN(KW));
4523 /* Don't need to check n because we check list */
4524 uenum_close(e);
4525 } else {
4526 log_data_err("Error calling ucol_getKeywords() -> %s (Are you missing data?)\n", u_errorName(ec));
4527 }
4528
4529 e = ucol_getKeywordValues(KW[0], &ec);
4530 if (e != NULL) {
4531 assertSuccess("ucol_getKeywordValues", &ec);
4532 assertTrue("ucol_getKeywordValues!=0", e!=0);
4533 n = checkUEnumeration("ucol_getKeywordValues", e, KWVAL, LEN(KWVAL));
4534 /* Don't need to check n because we check list */
4535 uenum_close(e);
4536 } else {
4537 log_data_err("Error calling ucol_getKeywordValues() -> %s (Are you missing data?)\n", u_errorName(ec));
4538 }
4539
4540 /* Try setting a warning before calling ucol_getKeywordValues */
4541 ec = U_USING_FALLBACK_WARNING;
4542 e = ucol_getKeywordValues(KW[0], &ec);
4543 if (assertSuccess("ucol_getKeywordValues [with warning code set]", &ec)) {
4544 assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e!=0);
4545 n = checkUEnumeration("ucol_getKeywordValues [with warning code set]", e, KWVAL, LEN(KWVAL));
4546 /* Don't need to check n because we check list */
4547 uenum_close(e);
4548 }
4549
4550 /*
4551 U_DRAFT int32_t U_EXPORT2
4552 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
4553 const char* locale, UBool* isAvailable,
4554 UErrorCode* status);
4555 }
4556 */
4557 n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de",
4558 &isAvailable, &ec);
4559 if (assertSuccess("getFunctionalEquivalent", &ec)) {
4560 assertEquals("getFunctionalEquivalent(de)", "de", loc);
4561 assertTrue("getFunctionalEquivalent(de).isAvailable==TRUE",
4562 isAvailable == TRUE);
4563 }
4564
4565 n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de_DE",
4566 &isAvailable, &ec);
4567 if (assertSuccess("getFunctionalEquivalent", &ec)) {
4568 assertEquals("getFunctionalEquivalent(de_DE)", "de", loc);
4569 assertTrue("getFunctionalEquivalent(de_DE).isAvailable==TRUE",
4570 isAvailable == TRUE);
4571 }
4572 }
4573
4574 /* supercedes TestJ784 */
4575 static void TestBeforePinyin(void) {
4576 const static char rules[] = {
4577 "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<\\u00E0<<<\\u00C0"
4578 "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<\\u00E8<<<\\u00C8"
4579 "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<\\u00EC<<<\\u00CC"
4580 "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<\\u00F2<<<\\u00D2"
4581 "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<<\\u00F9<<<\\u00D9"
4582 "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<<\\u01DB<<\\u00FC"
4583 };
4584
4585 const static char *test[] = {
4586 "l\\u0101",
4587 "la",
4588 "l\\u0101n",
4589 "lan ",
4590 "l\\u0113",
4591 "le",
4592 "l\\u0113n",
4593 "len"
4594 };
4595
4596 const static char *test2[] = {
4597 "x\\u0101",
4598 "x\\u0100",
4599 "X\\u0101",
4600 "X\\u0100",
4601 "x\\u00E1",
4602 "x\\u00C1",
4603 "X\\u00E1",
4604 "X\\u00C1",
4605 "x\\u01CE",
4606 "x\\u01CD",
4607 "X\\u01CE",
4608 "X\\u01CD",
4609 "x\\u00E0",
4610 "x\\u00C0",
4611 "X\\u00E0",
4612 "X\\u00C0",
4613 "xa",
4614 "xA",
4615 "Xa",
4616 "XA",
4617 "x\\u0101x",
4618 "x\\u0100x",
4619 "x\\u00E1x",
4620 "x\\u00C1x",
4621 "x\\u01CEx",
4622 "x\\u01CDx",
4623 "x\\u00E0x",
4624 "x\\u00C0x",
4625 "xax",
4626 "xAx"
4627 };
4628
4629 genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));
4630 genericLocaleStarter("zh", test, sizeof(test)/sizeof(test[0]));
4631 genericRulesStarter(rules, test2, sizeof(test2)/sizeof(test2[0]));
4632 genericLocaleStarter("zh", test2, sizeof(test2)/sizeof(test2[0]));
4633 }
4634
4635 static void TestBeforeTightening(void) {
4636 static const struct {
4637 const char *rules;
4638 UErrorCode expectedStatus;
4639 } tests[] = {
4640 { "&[before 1]a<x", U_ZERO_ERROR },
4641 { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR },
4642 { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR },
4643 { "&[before 1]a=x", U_INVALID_FORMAT_ERROR },
4644 { "&[before 2]a<x",U_INVALID_FORMAT_ERROR },
4645 { "&[before 2]a<<x",U_ZERO_ERROR },
4646 { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR },
4647 { "&[before 2]a=x",U_INVALID_FORMAT_ERROR },
4648 { "&[before 3]a<x",U_INVALID_FORMAT_ERROR },
4649 { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR },
4650 { "&[before 3]a<<<x",U_ZERO_ERROR },
4651 { "&[before 3]a=x",U_INVALID_FORMAT_ERROR },
4652 { "&[before I]a = x",U_INVALID_FORMAT_ERROR }
4653 };
4654
4655 int32_t i = 0;
4656
4657 UErrorCode status = U_ZERO_ERROR;
4658 UChar rlz[RULE_BUFFER_LEN] = { 0 };
4659 uint32_t rlen = 0;
4660
4661 UCollator *coll = NULL;
4662
4663
4664 for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
4665 rlen = u_unescape(tests[i].rules, rlz, RULE_BUFFER_LEN);
4666 coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
4667 if(status != tests[i].expectedStatus) {
4668 log_err_status(status, "Opening a collator with rules %s returned error code %s, expected %s\n",
4669 tests[i].rules, u_errorName(status), u_errorName(tests[i].expectedStatus));
4670 }
4671 ucol_close(coll);
4672 status = U_ZERO_ERROR;
4673 }
4674
4675 }
4676
4677 #if 0
4678 &m < a
4679 &[before 1] a < x <<< X << q <<< Q < z
4680 assert: m <<< M < x <<< X << q <<< Q < z < a < n
4681
4682 &m < a
4683 &[before 2] a << x <<< X << q <<< Q < z
4684 assert: m <<< M < x <<< X << q <<< Q << a < z < n
4685
4686 &m < a
4687 &[before 3] a <<< x <<< X << q <<< Q < z
4688 assert: m <<< M < x <<< X <<< a << q <<< Q < z < n
4689
4690
4691 &m << a
4692 &[before 1] a < x <<< X << q <<< Q < z
4693 assert: x <<< X << q <<< Q < z < m <<< M << a < n
4694
4695 &m << a
4696 &[before 2] a << x <<< X << q <<< Q < z
4697 assert: m <<< M << x <<< X << q <<< Q << a < z < n
4698
4699 &m << a
4700 &[before 3] a <<< x <<< X << q <<< Q < z
4701 assert: m <<< M << x <<< X <<< a << q <<< Q < z < n
4702
4703
4704 &m <<< a
4705 &[before 1] a < x <<< X << q <<< Q < z
4706 assert: x <<< X << q <<< Q < z < n < m <<< a <<< M
4707
4708 &m <<< a
4709 &[before 2] a << x <<< X << q <<< Q < z
4710 assert: x <<< X << q <<< Q << m <<< a <<< M < z < n
4711
4712 &m <<< a
4713 &[before 3] a <<< x <<< X << q <<< Q < z
4714 assert: m <<< x <<< X <<< a <<< M << q <<< Q < z < n
4715
4716
4717 &[before 1] s < x <<< X << q <<< Q < z
4718 assert: r <<< R < x <<< X << q <<< Q < z < s < n
4719
4720 &[before 2] s << x <<< X << q <<< Q < z
4721 assert: r <<< R < x <<< X << q <<< Q << s < z < n
4722
4723 &[before 3] s <<< x <<< X << q <<< Q < z
4724 assert: r <<< R < x <<< X <<< s << q <<< Q < z < n
4725
4726
4727 &[before 1] \u24DC < x <<< X << q <<< Q < z
4728 assert: x <<< X << q <<< Q < z < n < m <<< \u24DC <<< M
4729
4730 &[before 2] \u24DC << x <<< X << q <<< Q < z
4731 assert: x <<< X << q <<< Q << m <<< \u24DC <<< M < z < n
4732
4733 &[before 3] \u24DC <<< x <<< X << q <<< Q < z
4734 assert: m <<< x <<< X <<< \u24DC <<< M << q <<< Q < z < n
4735 #endif
4736
4737
4738 #if 0
4739 /* requires features not yet supported */
4740 static void TestMoreBefore(void) {
4741 static const struct {
4742 const char* rules;
4743 const char* order[16];
4744 int32_t size;
4745 } tests[] = {
4746 { "&m < a &[before 1] a < x <<< X << q <<< Q < z",
4747 { "m","M","x","X","q","Q","z","a","n" }, 9},
4748 { "&m < a &[before 2] a << x <<< X << q <<< Q < z",
4749 { "m","M","x","X","q","Q","a","z","n" }, 9},
4750 { "&m < a &[before 3] a <<< x <<< X << q <<< Q < z",
4751 { "m","M","x","X","a","q","Q","z","n" }, 9},
4752 { "&m << a &[before 1] a < x <<< X << q <<< Q < z",
4753 { "x","X","q","Q","z","m","M","a","n" }, 9},
4754 { "&m << a &[before 2] a << x <<< X << q <<< Q < z",
4755 { "m","M","x","X","q","Q","a","z","n" }, 9},
4756 { "&m << a &[before 3] a <<< x <<< X << q <<< Q < z",
4757 { "m","M","x","X","a","q","Q","z","n" }, 9},
4758 { "&m <<< a &[before 1] a < x <<< X << q <<< Q < z",
4759 { "x","X","q","Q","z","n","m","a","M" }, 9},
4760 { "&m <<< a &[before 2] a << x <<< X << q <<< Q < z",
4761 { "x","X","q","Q","m","a","M","z","n" }, 9},
4762 { "&m <<< a &[before 3] a <<< x <<< X << q <<< Q < z",
4763 { "m","x","X","a","M","q","Q","z","n" }, 9},
4764 { "&[before 1] s < x <<< X << q <<< Q < z",
4765 { "r","R","x","X","q","Q","z","s","n" }, 9},
4766 { "&[before 2] s << x <<< X << q <<< Q < z",
4767 { "r","R","x","X","q","Q","s","z","n" }, 9},
4768 { "&[before 3] s <<< x <<< X << q <<< Q < z",
4769 { "r","R","x","X","s","q","Q","z","n" }, 9},
4770 { "&[before 1] \\u24DC < x <<< X << q <<< Q < z",
4771 { "x","X","q","Q","z","n","m","\\u24DC","M" }, 9},
4772 { "&[before 2] \\u24DC << x <<< X << q <<< Q < z",
4773 { "x","X","q","Q","m","\\u24DC","M","z","n" }, 9},
4774 { "&[before 3] \\u24DC <<< x <<< X << q <<< Q < z",
4775 { "m","x","X","\\u24DC","M","q","Q","z","n" }, 9}
4776 };
4777
4778 int32_t i = 0;
4779
4780 for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
4781 genericRulesStarter(tests[i].rules, tests[i].order, tests[i].size);
4782 }
4783 }
4784 #endif
4785
4786 static void TestTailorNULL( void ) {
4787 const static char* rule = "&a <<< '\\u0000'";
4788 UErrorCode status = U_ZERO_ERROR;
4789 UChar rlz[RULE_BUFFER_LEN] = { 0 };
4790 uint32_t rlen = 0;
4791 UChar a = 1, null = 0;
4792 UCollationResult res = UCOL_EQUAL;
4793
4794 UCollator *coll = NULL;
4795
4796
4797 rlen = u_unescape(rule, rlz, RULE_BUFFER_LEN);
4798 coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
4799
4800 if(U_FAILURE(status)) {
4801 log_err_status(status, "Could not open default collator! -> %s\n", u_errorName(status));
4802 } else {
4803 res = ucol_strcoll(coll, &a, 1, &null, 1);
4804
4805 if(res != UCOL_LESS) {
4806 log_err("NULL was not tailored properly!\n");
4807 }
4808 }
4809
4810 ucol_close(coll);
4811 }
4812
4813 static void
4814 TestUpperFirstQuaternary(void)
4815 {
4816 const char* tests[] = { "B", "b", "Bb", "bB" };
4817 UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_FIRST };
4818 UColAttributeValue attVals[] = { UCOL_QUATERNARY, UCOL_UPPER_FIRST };
4819 genericLocaleStarterWithOptions("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]));
4820 }
4821
4822 static void
4823 TestJ4960(void)
4824 {
4825 const char* tests[] = { "\\u00e2T", "aT" };
4826 UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_LEVEL };
4827 UColAttributeValue attVals[] = { UCOL_PRIMARY, UCOL_ON };
4828 const char* tests2[] = { "a", "A" };
4829 const char* rule = "&[first tertiary ignorable]=A=a";
4830 UColAttribute att2[] = { UCOL_CASE_LEVEL };
4831 UColAttributeValue attVals2[] = { UCOL_ON };
4832 /* Test whether we correctly ignore primary ignorables on case level when */
4833 /* we have only primary & case level */
4834 genericLocaleStarterWithOptionsAndResult("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]), UCOL_EQUAL);
4835 /* Test whether ICU4J will make case level for sortkeys that have primary strength */
4836 /* and case level */
4837 genericLocaleStarterWithOptions("root", tests2, sizeof(tests2)/sizeof(tests2[0]), att, attVals, sizeof(att)/sizeof(att[0]));
4838 /* Test whether completely ignorable letters have case level info (they shouldn't) */
4839 genericRulesStarterWithOptionsAndResult(rule, tests2, sizeof(tests2)/sizeof(tests2[0]), att2, attVals2, sizeof(att2)/sizeof(att2[0]), UCOL_EQUAL);
4840 }
4841
4842 static void
4843 TestJ5223(void)
4844 {
4845 static const char *test = "this is a test string";
4846 UChar ustr[256];
4847 int32_t ustr_length = u_unescape(test, ustr, 256);
4848 unsigned char sortkey[256];
4849 int32_t sortkey_length;
4850 UErrorCode status = U_ZERO_ERROR;
4851 static UCollator *coll = NULL;
4852 coll = ucol_open("root", &status);
4853 if(U_FAILURE(status)) {
4854 log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
4855 return;
4856 }
4857 ucol_setStrength(coll, UCOL_PRIMARY);
4858 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
4859 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4860 if (U_FAILURE(status)) {
4861 log_err("Failed setting atributes\n");
4862 return;
4863 }
4864 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, NULL, 0);
4865 if (sortkey_length > 256) return;
4866
4867 /* we mark the position where the null byte should be written in advance */
4868 sortkey[sortkey_length-1] = 0xAA;
4869
4870 /* we set the buffer size one byte higher than needed */
4871 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
4872 sortkey_length+1);
4873
4874 /* no error occurs (for me) */
4875 if (sortkey[sortkey_length-1] == 0xAA) {
4876 log_err("Hit bug at first try\n");
4877 }
4878
4879 /* we mark the position where the null byte should be written again */
4880 sortkey[sortkey_length-1] = 0xAA;
4881
4882 /* this time we set the buffer size to the exact amount needed */
4883 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
4884 sortkey_length);
4885
4886 /* now the trailing null byte is not written */
4887 if (sortkey[sortkey_length-1] == 0xAA) {
4888 log_err("Hit bug at second try\n");
4889 }
4890
4891 ucol_close(coll);
4892 }
4893
4894 /* Regression test for Thai partial sort key problem */
4895 static void
4896 TestJ5232(void)
4897 {
4898 const static char *test[] = {
4899 "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e47\\u0e21",
4900 "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e48\\u0e21"
4901 };
4902
4903 genericLocaleStarter("th", test, sizeof(test)/sizeof(test[0]));
4904 }
4905
4906 static void
4907 TestJ5367(void)
4908 {
4909 const static char *test[] = { "a", "y" };
4910 const char* rules = "&Ny << Y &[first secondary ignorable] <<< a";
4911 genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));
4912 }
4913
4914 static void
4915 TestVI5913(void)
4916 {
4917 UErrorCode status = U_ZERO_ERROR;
4918 int32_t i, j;
4919 UCollator *coll =NULL;
4920 uint8_t resColl[100], expColl[100];
4921 int32_t rLen, tLen, ruleLen, sLen, kLen;
4922 UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0}; /* &a<0x1FF3-omega with Ypogegrammeni*/
4923 UChar rule2[256]={0x26, 0x7a, 0x3c, 0x0161, 0}; /* &z<s with caron*/
4924 UChar rule3[256]={0x26, 0x7a, 0x3c, 0x0061, 0x00ea, 0}; /* &z<a+e with circumflex.*/
4925 static const UChar tData[][20]={
4926 {0x1EAC, 0},
4927 {0x0041, 0x0323, 0x0302, 0},
4928 {0x1EA0, 0x0302, 0},
4929 {0x00C2, 0x0323, 0},
4930 {0x1ED8, 0}, /* O with dot and circumflex */
4931 {0x1ECC, 0x0302, 0},
4932 {0x1EB7, 0},
4933 {0x1EA1, 0x0306, 0},
4934 };
4935 static const UChar tailorData[][20]={
4936 {0x1FA2, 0}, /* Omega with 3 combining marks */
4937 {0x03C9, 0x0313, 0x0300, 0x0345, 0},
4938 {0x1FF3, 0x0313, 0x0300, 0},
4939 {0x1F60, 0x0300, 0x0345, 0},
4940 {0x1F62, 0x0345, 0},
4941 {0x1FA0, 0x0300, 0},
4942 };
4943 static const UChar tailorData2[][20]={
4944 {0x1E63, 0x030C, 0}, /* s with dot below + caron */
4945 {0x0073, 0x0323, 0x030C, 0},
4946 {0x0073, 0x030C, 0x0323, 0},
4947 };
4948 static const UChar tailorData3[][20]={
4949 {0x007a, 0}, /* z */
4950 {0x0061, 0x0065, 0}, /* a + e */
4951 {0x0061, 0x00ea, 0}, /* a + e with circumflex */
4952 {0x0061, 0x1EC7, 0}, /* a+ e with dot below and circumflex */
4953 {0x0061, 0x1EB9, 0x0302, 0}, /* a + e with dot below + combining circumflex */
4954 {0x0061, 0x00EA, 0x0323, 0}, /* a + e with circumflex + combining dot below */
4955 {0x00EA, 0x0323, 0}, /* e with circumflex + combining dot below */
4956 {0x00EA, 0}, /* e with circumflex */
4957 };
4958
4959 /* Test Vietnamese sort. */
4960 coll = ucol_open("vi", &status);
4961 if(U_FAILURE(status)) {
4962 log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
4963 return;
4964 }
4965 log_verbose("\n\nVI collation:");
4966 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[2], u_strlen(tData[2])) ) {
4967 log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
4968 }
4969 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[3], u_strlen(tData[3])) ) {
4970 log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
4971 }
4972 if ( !ucol_equal(coll, tData[5], u_strlen(tData[5]), tData[4], u_strlen(tData[4])) ) {
4973 log_err("\\u1ED8 not equals to \\u1ECC+\\u0302\n");
4974 }
4975 if ( !ucol_equal(coll, tData[7], u_strlen(tData[7]), tData[6], u_strlen(tData[6])) ) {
4976 log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
4977 }
4978
4979 for (j=0; j<8; j++) {
4980 tLen = u_strlen(tData[j]);
4981 log_verbose("\n Data :%s \tlen: %d key: ", tData[j], tLen);
4982 rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
4983 for(i = 0; i<rLen; i++) {
4984 log_verbose(" %02X", resColl[i]);
4985 }
4986 }
4987
4988 ucol_close(coll);
4989
4990 /* Test Romanian sort. */
4991 coll = ucol_open("ro", &status);
4992 log_verbose("\n\nRO collation:");
4993 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[1], u_strlen(tData[1])) ) {
4994 log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
4995 }
4996 if ( !ucol_equal(coll, tData[4], u_strlen(tData[4]), tData[5], u_strlen(tData[5])) ) {
4997 log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
4998 }
4999 if ( !ucol_equal(coll, tData[6], u_strlen(tData[6]), tData[7], u_strlen(tData[7])) ) {
5000 log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
5001 }
5002
5003 for (j=4; j<8; j++) {
5004 tLen = u_strlen(tData[j]);
5005 log_verbose("\n Data :%s \tlen: %d key: ", tData[j], tLen);
5006 rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
5007 for(i = 0; i<rLen; i++) {
5008 log_verbose(" %02X", resColl[i]);
5009 }
5010 }
5011 ucol_close(coll);
5012
5013 /* Test the precomposed Greek character with 3 combining marks. */
5014 log_verbose("\n\nTailoring test: Greek character with 3 combining marks");
5015 ruleLen = u_strlen(rule);
5016 coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5017 if (U_FAILURE(status)) {
5018 log_err("ucol_openRules failed with %s\n", u_errorName(status));
5019 return;
5020 }
5021 sLen = u_strlen(tailorData[0]);
5022 for (j=1; j<6; j++) {
5023 tLen = u_strlen(tailorData[j]);
5024 if ( !ucol_equal(coll, tailorData[0], sLen, tailorData[j], tLen)) {
5025 log_err("\n \\u1FA2 not equals to data[%d]:%s\n", j, tailorData[j]);
5026 }
5027 }
5028 /* Test getSortKey. */
5029 tLen = u_strlen(tailorData[0]);
5030 kLen=ucol_getSortKey(coll, tailorData[0], tLen, expColl, 100);
5031 for (j=0; j<6; j++) {
5032 tLen = u_strlen(tailorData[j]);
5033 rLen = ucol_getSortKey(coll, tailorData[j], tLen, resColl, 100);
5034 if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
5035 log_err("\n Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen);
5036 for(i = 0; i<rLen; i++) {
5037 log_err(" %02X", resColl[i]);
5038 }
5039 }
5040 }
5041 ucol_close(coll);
5042
5043 log_verbose("\n\nTailoring test for s with caron:");
5044 ruleLen = u_strlen(rule2);
5045 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5046 tLen = u_strlen(tailorData2[0]);
5047 kLen=ucol_getSortKey(coll, tailorData2[0], tLen, expColl, 100);
5048 for (j=1; j<3; j++) {
5049 tLen = u_strlen(tailorData2[j]);
5050 rLen = ucol_getSortKey(coll, tailorData2[j], tLen, resColl, 100);
5051 if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
5052 log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen);
5053 for(i = 0; i<rLen; i++) {
5054 log_err(" %02X", resColl[i]);
5055 }
5056 }
5057 }
5058 ucol_close(coll);
5059
5060 log_verbose("\n\nTailoring test for &z< ae with circumflex:");
5061 ruleLen = u_strlen(rule3);
5062 coll = ucol_openRules(rule3, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5063 tLen = u_strlen(tailorData3[3]);
5064 kLen=ucol_getSortKey(coll, tailorData3[3], tLen, expColl, 100);
5065 for (j=4; j<6; j++) {
5066 tLen = u_strlen(tailorData3[j]);
5067 rLen = ucol_getSortKey(coll, tailorData3[j], tLen, resColl, 100);
5068
5069 if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
5070 log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen);
5071 for(i = 0; i<rLen; i++) {
5072 log_err(" %02X", resColl[i]);
5073 }
5074 }
5075
5076 log_verbose("\n Test Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen);
5077 for(i = 0; i<rLen; i++) {
5078 log_verbose(" %02X", resColl[i]);
5079 }
5080 }
5081 ucol_close(coll);
5082 }
5083
5084 static void
5085 TestTailor6179(void)
5086 {
5087 UErrorCode status = U_ZERO_ERROR;
5088 int32_t i;
5089 UCollator *coll =NULL;
5090 uint8_t resColl[100];
5091 int32_t rLen, tLen, ruleLen;
5092 /* &[last primary ignorable]<< a &[first primary ignorable]<<b */
5093 static const UChar rule1[]={
5094 0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,
5095 0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x20,0x61,0x20,
5096 0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,0x20,
5097 0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x62,0x20, 0};
5098 /* &[last secondary ignorable]<<< a &[first secondary ignorable]<<<b */
5099 static const UChar rule2[]={
5100 0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,0x64,0x61,
5101 0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x3C,
5102 0x61,0x20,0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,
5103 0x64,0x61,0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,
5104 0x3C,0x3C,0x20,0x62,0};
5105
5106 static const UChar tData1[][4]={
5107 {0x61, 0},
5108 {0x62, 0},
5109 { 0xFDD0,0x009E, 0}
5110 };
5111 static const UChar tData2[][4]={
5112 {0x61, 0},
5113 {0x62, 0},
5114 { 0xFDD0,0x009E, 0}
5115 };
5116
5117 /*
5118 * These values from FractionalUCA.txt will change,
5119 * and need to be updated here.
5120 */
5121 static const uint8_t firstPrimaryIgnCE[]={1, 0x88, 1, 5, 0};
5122 static const uint8_t lastPrimaryIgnCE[]={1, 0xE3, 1, 5, 0};
5123 static const uint8_t firstSecondaryIgnCE[]={1, 1, 0xbf, 0x04, 0};
5124 static const uint8_t lastSecondaryIgnCE[]={1, 1, 0xbf, 0x04, 0};
5125
5126 /* Test [Last Primary ignorable] */
5127
5128 log_verbose("Tailoring test: &[last primary ignorable]<<a &[first primary ignorable]<<b\n");
5129 ruleLen = u_strlen(rule1);
5130 coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5131 if (U_FAILURE(status)) {
5132 log_err_status(status, "Tailoring test: &[last primary ignorable] failed! -> %s\n", u_errorName(status));
5133 return;
5134 }
5135 tLen = u_strlen(tData1[0]);
5136 rLen = ucol_getSortKey(coll, tData1[0], tLen, resColl, 100);
5137 if (rLen != LEN(lastPrimaryIgnCE) || uprv_memcmp(resColl, lastPrimaryIgnCE, rLen) != 0) {
5138 log_err("Bad result for &[lpi]<<a...: Data[%d] :%s \tlen: %d key: ", 0, tData1[0], rLen);
5139 for(i = 0; i<rLen; i++) {
5140 log_err(" %02X", resColl[i]);
5141 }
5142 log_err("\n");
5143 }
5144 tLen = u_strlen(tData1[1]);
5145 rLen = ucol_getSortKey(coll, tData1[1], tLen, resColl, 100);
5146 if (rLen != LEN(firstPrimaryIgnCE) || uprv_memcmp(resColl, firstPrimaryIgnCE, rLen) != 0) {
5147 log_err("Bad result for &[lpi]<<a...: Data[%d] :%s \tlen: %d key: ", 1, tData1[1], rLen);
5148 for(i = 0; i<rLen; i++) {
5149 log_err(" %02X", resColl[i]);
5150 }
5151 log_err("\n");
5152 }
5153 ucol_close(coll);
5154
5155
5156 /* Test [Last Secondary ignorable] */
5157 log_verbose("Tailoring test: &[last secondary ignorable]<<<a &[first secondary ignorable]<<<b\n");
5158 ruleLen = u_strlen(rule1);
5159 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5160 if (U_FAILURE(status)) {
5161 log_err("Tailoring test: &[last secondary ignorable] failed! -> %s\n", u_errorName(status));
5162 return;
5163 }
5164 tLen = u_strlen(tData2[0]);
5165 rLen = ucol_getSortKey(coll, tData2[0], tLen, resColl, 100);
5166 if (rLen != LEN(lastSecondaryIgnCE) || uprv_memcmp(resColl, lastSecondaryIgnCE, rLen) != 0) {
5167 log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s \tlen: %d key: ", 0, tData2[0], rLen);
5168 for(i = 0; i<rLen; i++) {
5169 log_err(" %02X", resColl[i]);
5170 }
5171 log_err("\n");
5172 }
5173 if(isICUVersionAtLeast(50, 0, 0)) { /* TODO: debug & fix, see ticket #8982 */
5174 tLen = u_strlen(tData2[1]);
5175 rLen = ucol_getSortKey(coll, tData2[1], tLen, resColl, 100);
5176 if (rLen != LEN(firstSecondaryIgnCE) || uprv_memcmp(resColl, firstSecondaryIgnCE, rLen) != 0) {
5177 log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s \tlen: %d key: ", 1, tData2[1], rLen);
5178 for(i = 0; i<rLen; i++) {
5179 log_err(" %02X", resColl[i]);
5180 }
5181 log_err("\n");
5182 }
5183 }
5184 ucol_close(coll);
5185 }
5186
5187 static void
5188 TestUCAPrecontext(void)
5189 {
5190 UErrorCode status = U_ZERO_ERROR;
5191 int32_t i, j;
5192 UCollator *coll =NULL;
5193 uint8_t resColl[100], prevColl[100];
5194 int32_t rLen, tLen, ruleLen;
5195 UChar rule1[256]= {0x26, 0xb7, 0x3c, 0x61, 0}; /* & middle-dot < a */
5196 UChar rule2[256]= {0x26, 0x4C, 0xb7, 0x3c, 0x3c, 0x61, 0};
5197 /* & l middle-dot << a a is an expansion. */
5198
5199 UChar tData1[][20]={
5200 { 0xb7, 0}, /* standalone middle dot(0xb7) */
5201 { 0x387, 0}, /* standalone middle dot(0x387) */
5202 { 0x61, 0}, /* a */
5203 { 0x6C, 0}, /* l */
5204 { 0x4C, 0x0332, 0}, /* l with [first primary ignorable] */
5205 { 0x6C, 0xb7, 0}, /* l with middle dot(0xb7) */
5206 { 0x6C, 0x387, 0}, /* l with middle dot(0x387) */
5207 { 0x4C, 0xb7, 0}, /* L with middle dot(0xb7) */
5208 { 0x4C, 0x387, 0}, /* L with middle dot(0x387) */
5209 { 0x6C, 0x61, 0x387, 0}, /* la with middle dot(0x387) */
5210 { 0x4C, 0x61, 0xb7, 0}, /* La with middle dot(0xb7) */
5211 };
5212
5213 log_verbose("\n\nEN collation:");
5214 coll = ucol_open("en", &status);
5215 if (U_FAILURE(status)) {
5216 log_err_status(status, "Tailoring test: &z <<a|- failed! -> %s\n", u_errorName(status));
5217 return;
5218 }
5219 for (j=0; j<11; j++) {
5220 tLen = u_strlen(tData1[j]);
5221 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
5222 if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
5223 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5224 j, tData1[j]);
5225 }
5226 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen);
5227 for(i = 0; i<rLen; i++) {
5228 log_verbose(" %02X", resColl[i]);
5229 }
5230 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
5231 }
5232 ucol_close(coll);
5233
5234
5235 log_verbose("\n\nJA collation:");
5236 coll = ucol_open("ja", &status);
5237 if (U_FAILURE(status)) {
5238 log_err("Tailoring test: &z <<a|- failed!");
5239 return;
5240 }
5241 for (j=0; j<11; j++) {
5242 tLen = u_strlen(tData1[j]);
5243 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
5244 if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
5245 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5246 j, tData1[j]);
5247 }
5248 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen);
5249 for(i = 0; i<rLen; i++) {
5250 log_verbose(" %02X", resColl[i]);
5251 }
5252 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
5253 }
5254 ucol_close(coll);
5255
5256
5257 log_verbose("\n\nTailoring test: & middle dot < a ");
5258 ruleLen = u_strlen(rule1);
5259 coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5260 if (U_FAILURE(status)) {
5261 log_err("Tailoring test: & middle dot < a failed!");
5262 return;
5263 }
5264 for (j=0; j<11; j++) {
5265 tLen = u_strlen(tData1[j]);
5266 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
5267 if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
5268 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5269 j, tData1[j]);
5270 }
5271 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen);
5272 for(i = 0; i<rLen; i++) {
5273 log_verbose(" %02X", resColl[i]);
5274 }
5275 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
5276 }
5277 ucol_close(coll);
5278
5279
5280 log_verbose("\n\nTailoring test: & l middle-dot << a ");
5281 ruleLen = u_strlen(rule2);
5282 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5283 if (U_FAILURE(status)) {
5284 log_err("Tailoring test: & l middle-dot << a failed!");
5285 return;
5286 }
5287 for (j=0; j<11; j++) {
5288 tLen = u_strlen(tData1[j]);
5289 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
5290 if ((j>0) && (j!=3) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
5291 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5292 j, tData1[j]);
5293 }
5294 if ((j==3)&&(strcmp((char *)resColl, (char *)prevColl)>0)) {
5295 log_err("\n Expecting smaller key than previous test case: Data[%d] :%s.",
5296 j, tData1[j]);
5297 }
5298 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen);
5299 for(i = 0; i<rLen; i++) {
5300 log_verbose(" %02X", resColl[i]);
5301 }
5302 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
5303 }
5304 ucol_close(coll);
5305 }
5306
5307 static void
5308 TestOutOfBuffer5468(void)
5309 {
5310 static const char *test = "\\u4e00";
5311 UChar ustr[256];
5312 int32_t ustr_length = u_unescape(test, ustr, 256);
5313 unsigned char shortKeyBuf[1];
5314 int32_t sortkey_length;
5315 UErrorCode status = U_ZERO_ERROR;
5316 static UCollator *coll = NULL;
5317
5318 coll = ucol_open("root", &status);
5319 if(U_FAILURE(status)) {
5320 log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
5321 return;
5322 }
5323 ucol_setStrength(coll, UCOL_PRIMARY);
5324 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
5325 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
5326 if (U_FAILURE(status)) {
5327 log_err("Failed setting atributes\n");
5328 return;
5329 }
5330
5331 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, shortKeyBuf, sizeof(shortKeyBuf));
5332 if (sortkey_length != 4) {
5333 log_err("expecting length of sortKey is 4 got:%d ", sortkey_length);
5334 }
5335 log_verbose("length of sortKey is %d", sortkey_length);
5336 ucol_close(coll);
5337 }
5338
5339 #define TSKC_DATA_SIZE 5
5340 #define TSKC_BUF_SIZE 50
5341 static void
5342 TestSortKeyConsistency(void)
5343 {
5344 UErrorCode icuRC = U_ZERO_ERROR;
5345 UCollator* ucol;
5346 UChar data[] = { 0xFFFD, 0x0006, 0x0006, 0x0006, 0xFFFD};
5347
5348 uint8_t bufFull[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
5349 uint8_t bufPart[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
5350 int32_t i, j, i2;
5351
5352 ucol = ucol_openFromShortString("LEN_S4", FALSE, NULL, &icuRC);
5353 if (U_FAILURE(icuRC))
5354 {
5355 log_err_status(icuRC, "ucol_openFromShortString failed -> %s\n", u_errorName(icuRC));
5356 return;
5357 }
5358
5359 for (i = 0; i < TSKC_DATA_SIZE; i++)
5360 {
5361 UCharIterator uiter;
5362 uint32_t state[2] = { 0, 0 };
5363 int32_t dataLen = i+1;
5364 for (j=0; j<TSKC_BUF_SIZE; j++)
5365 bufFull[i][j] = bufPart[i][j] = 0;
5366
5367 /* Full sort key */
5368 ucol_getSortKey(ucol, data, dataLen, bufFull[i], TSKC_BUF_SIZE);
5369
5370 /* Partial sort key */
5371 uiter_setString(&uiter, data, dataLen);
5372 ucol_nextSortKeyPart(ucol, &uiter, state, bufPart[i], TSKC_BUF_SIZE, &icuRC);
5373 if (U_FAILURE(icuRC))
5374 {
5375 log_err("ucol_nextSortKeyPart failed\n");
5376 ucol_close(ucol);
5377 return;
5378 }
5379
5380 for (i2=0; i2<i; i2++)
5381 {
5382 UBool fullMatch = TRUE;
5383 UBool partMatch = TRUE;
5384 for (j=0; j<TSKC_BUF_SIZE; j++)
5385 {
5386 fullMatch = fullMatch && (bufFull[i][j] != bufFull[i2][j]);
5387 partMatch = partMatch && (bufPart[i][j] != bufPart[i2][j]);
5388 }
5389 if (fullMatch != partMatch) {
5390 log_err(fullMatch ? "full key was consistent, but partial key changed\n"
5391 : "partial key was consistent, but full key changed\n");
5392 ucol_close(ucol);
5393 return;
5394 }
5395 }
5396 }
5397
5398 /*=============================================*/
5399 ucol_close(ucol);
5400 }
5401
5402 /* ticket: 6101 */
5403 static void TestCroatianSortKey(void) {
5404 const char* collString = "LHR_AN_CX_EX_FX_HX_NX_S3";
5405 UErrorCode status = U_ZERO_ERROR;
5406 UCollator *ucol;
5407 UCharIterator iter;
5408
5409 static const UChar text[] = { 0x0044, 0xD81A };
5410
5411 size_t length = sizeof(text)/sizeof(*text);
5412
5413 uint8_t textSortKey[32];
5414 size_t lenSortKey = 32;
5415 size_t actualSortKeyLen;
5416 uint32_t uStateInfo[2] = { 0, 0 };
5417
5418 ucol = ucol_openFromShortString(collString, FALSE, NULL, &status);
5419 if (U_FAILURE(status)) {
5420 log_err_status(status, "ucol_openFromShortString error in Craotian test. -> %s\n", u_errorName(status));
5421 return;
5422 }
5423
5424 uiter_setString(&iter, text, length);
5425
5426 actualSortKeyLen = ucol_nextSortKeyPart(
5427 ucol, &iter, (uint32_t*)uStateInfo,
5428 textSortKey, lenSortKey, &status
5429 );
5430
5431 if (actualSortKeyLen == lenSortKey) {
5432 log_err("ucol_nextSortKeyPart did not give correct result in Croatian test.\n");
5433 }
5434
5435 ucol_close(ucol);
5436 }
5437
5438 /* ticket: 6140 */
5439 /* This test ensures that codepoints such as 0x3099 are flagged correctly by the collator since
5440 * they are both Hiragana and Katakana
5441 */
5442 #define SORTKEYLEN 50
5443 static void TestHiragana(void) {
5444 UErrorCode status = U_ZERO_ERROR;
5445 UCollator* ucol;
5446 UCollationResult strcollresult;
5447 UChar data1[] = { 0x3058, 0x30B8 }; /* Hiragana and Katakana letter Zi */
5448 UChar data2[] = { 0x3057, 0x3099, 0x30B7, 0x3099 };
5449 int32_t data1Len = sizeof(data1)/sizeof(*data1);
5450 int32_t data2Len = sizeof(data2)/sizeof(*data2);
5451 int32_t i, j;
5452 uint8_t sortKey1[SORTKEYLEN];
5453 uint8_t sortKey2[SORTKEYLEN];
5454
5455 UCharIterator uiter1;
5456 UCharIterator uiter2;
5457 uint32_t state1[2] = { 0, 0 };
5458 uint32_t state2[2] = { 0, 0 };
5459 int32_t keySize1;
5460 int32_t keySize2;
5461
5462 ucol = ucol_openFromShortString("LJA_AN_CX_EX_FX_HO_NX_S4", FALSE, NULL,
5463 &status);
5464 if (U_FAILURE(status)) {
5465 log_err_status(status, "Error status: %s; Unable to open collator from short string.\n", u_errorName(status));
5466 return;
5467 }
5468
5469 /* Start of full sort keys */
5470 /* Full sort key1 */
5471 keySize1 = ucol_getSortKey(ucol, data1, data1Len, sortKey1, SORTKEYLEN);
5472 /* Full sort key2 */
5473 keySize2 = ucol_getSortKey(ucol, data2, data2Len, sortKey2, SORTKEYLEN);
5474 if (keySize1 == keySize2) {
5475 for (i = 0; i < keySize1; i++) {
5476 if (sortKey1[i] != sortKey2[i]) {
5477 log_err("Full sort keys are different. Should be equal.");
5478 }
5479 }
5480 } else {
5481 log_err("Full sort keys sizes doesn't match: %d %d", keySize1, keySize2);
5482 }
5483 /* End of full sort keys */
5484
5485 /* Start of partial sort keys */
5486 /* Partial sort key1 */
5487 uiter_setString(&uiter1, data1, data1Len);
5488 keySize1 = ucol_nextSortKeyPart(ucol, &uiter1, state1, sortKey1, SORTKEYLEN, &status);
5489 /* Partial sort key2 */
5490 uiter_setString(&uiter2, data2, data2Len);
5491 keySize2 = ucol_nextSortKeyPart(ucol, &uiter2, state2, sortKey2, SORTKEYLEN, &status);
5492 if (U_SUCCESS(status) && keySize1 == keySize2) {
5493 for (j = 0; j < keySize1; j++) {
5494 if (sortKey1[j] != sortKey2[j]) {
5495 log_err("Partial sort keys are different. Should be equal");
5496 }
5497 }
5498 } else {
5499 log_err("Error Status: %s or Partial sort keys sizes doesn't match: %d %d", u_errorName(status), keySize1, keySize2);
5500 }
5501 /* End of partial sort keys */
5502
5503 /* Start of strcoll */
5504 /* Use ucol_strcoll() to determine ordering */
5505 strcollresult = ucol_strcoll(ucol, data1, data1Len, data2, data2Len);
5506 if (strcollresult != UCOL_EQUAL) {
5507 log_err("Result from ucol_strcoll() should be UCOL_EQUAL.");
5508 }
5509
5510 ucol_close(ucol);
5511 }
5512
5513 /* Convenient struct for running collation tests */
5514 typedef struct {
5515 const UChar source[MAX_TOKEN_LEN]; /* String on left */
5516 const UChar target[MAX_TOKEN_LEN]; /* String on right */
5517 UCollationResult result; /* -1, 0 or +1, depending on collation */
5518 } OneTestCase;
5519
5520 /*
5521 * Utility function to test one collation test case.
5522 * @param testcases Array of test cases.
5523 * @param n_testcases Size of the array testcases.
5524 * @param str_rules Array of rules. These rules should be specifying the same rule in different formats.
5525 * @param n_rules Size of the array str_rules.
5526 */
5527 static void doTestOneTestCase(const OneTestCase testcases[],
5528 int n_testcases,
5529 const char* str_rules[],
5530 int n_rules)
5531 {
5532 int rule_no, testcase_no;
5533 UChar rule[500];
5534 int32_t length = 0;
5535 UErrorCode status = U_ZERO_ERROR;
5536 UParseError parse_error;
5537 UCollator *myCollation;
5538
5539 for (rule_no = 0; rule_no < n_rules; ++rule_no) {
5540
5541 length = u_unescape(str_rules[rule_no], rule, 500);
5542 if (length == 0) {
5543 log_err("ERROR: The rule cannot be unescaped: %s\n");
5544 return;
5545 }
5546 myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
5547 if(U_FAILURE(status)){
5548 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5549 return;
5550 }
5551 log_verbose("Testing the <<* syntax\n");
5552 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
5553 ucol_setStrength(myCollation, UCOL_TERTIARY);
5554 for (testcase_no = 0; testcase_no < n_testcases; ++testcase_no) {
5555 doTest(myCollation,
5556 testcases[testcase_no].source,
5557 testcases[testcase_no].target,
5558 testcases[testcase_no].result
5559 );
5560 }
5561 ucol_close(myCollation);
5562 }
5563 }
5564
5565 const static OneTestCase rangeTestcases[] = {
5566 { {0x0061}, {0x0062}, UCOL_LESS }, /* "a" < "b" */
5567 { {0x0062}, {0x0063}, UCOL_LESS }, /* "b" < "c" */
5568 { {0x0061}, {0x0063}, UCOL_LESS }, /* "a" < "c" */
5569
5570 { {0x0062}, {0x006b}, UCOL_LESS }, /* "b" << "k" */
5571 { {0x006b}, {0x006c}, UCOL_LESS }, /* "k" << "l" */
5572 { {0x0062}, {0x006c}, UCOL_LESS }, /* "b" << "l" */
5573 { {0x0061}, {0x006c}, UCOL_LESS }, /* "a" < "l" */
5574 { {0x0061}, {0x006d}, UCOL_LESS }, /* "a" < "m" */
5575
5576 { {0x0079}, {0x006d}, UCOL_LESS }, /* "y" < "f" */
5577 { {0x0079}, {0x0067}, UCOL_LESS }, /* "y" < "g" */
5578 { {0x0061}, {0x0068}, UCOL_LESS }, /* "y" < "h" */
5579 { {0x0061}, {0x0065}, UCOL_LESS }, /* "g" < "e" */
5580
5581 { {0x0061}, {0x0031}, UCOL_EQUAL }, /* "a" = "1" */
5582 { {0x0061}, {0x0032}, UCOL_EQUAL }, /* "a" = "2" */
5583 { {0x0061}, {0x0033}, UCOL_EQUAL }, /* "a" = "3" */
5584 { {0x0061}, {0x0066}, UCOL_LESS }, /* "a" < "f" */
5585 { {0x006c, 0x0061}, {0x006b, 0x0062}, UCOL_LESS }, /* "la" < "123" */
5586 { {0x0061, 0x0061, 0x0061}, {0x0031, 0x0032, 0x0033}, UCOL_EQUAL }, /* "aaa" = "123" */
5587 { {0x0062}, {0x007a}, UCOL_LESS }, /* "b" < "z" */
5588 { {0x0061, 0x007a, 0x0062}, {0x0032, 0x0079, 0x006d}, UCOL_LESS }, /* "azm" = "2yc" */
5589 };
5590
5591 static int nRangeTestcases = LEN(rangeTestcases);
5592
5593 const static OneTestCase rangeTestcasesSupplemental[] = {
5594 { {0xfffe}, {0xffff}, UCOL_LESS }, /* U+FFFE < U+FFFF */
5595 { {0xffff}, {0xd800, 0xdc00}, UCOL_LESS }, /* U+FFFF < U+10000 */
5596 { {0xd800, 0xdc00}, {0xd800, 0xdc01}, UCOL_LESS }, /* U+10000 < U+10001 */
5597 { {0xfffe}, {0xd800, 0xdc01}, UCOL_LESS }, /* U+FFFE < U+10001 */
5598 { {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL_LESS }, /* U+10000 < U+10001 */
5599 { {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL_LESS }, /* U+10000 < U+10001 */
5600 { {0xfffe}, {0xd800, 0xdc02}, UCOL_LESS }, /* U+FFFE < U+10001 */
5601 };
5602
5603 static int nRangeTestcasesSupplemental = LEN(rangeTestcasesSupplemental);
5604
5605 const static OneTestCase rangeTestcasesQwerty[] = {
5606 { {0x0071}, {0x0077}, UCOL_LESS }, /* "q" < "w" */
5607 { {0x0077}, {0x0065}, UCOL_LESS }, /* "w" < "e" */
5608
5609 { {0x0079}, {0x0075}, UCOL_LESS }, /* "y" < "u" */
5610 { {0x0071}, {0x0075}, UCOL_LESS }, /* "q" << "u" */
5611
5612 { {0x0074}, {0x0069}, UCOL_LESS }, /* "t" << "i" */
5613 { {0x006f}, {0x0070}, UCOL_LESS }, /* "o" << "p" */
5614
5615 { {0x0079}, {0x0065}, UCOL_LESS }, /* "y" < "e" */
5616 { {0x0069}, {0x0075}, UCOL_LESS }, /* "i" < "u" */
5617
5618 { {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},
5619 {0x0077, 0x0065, 0x0072, 0x0065}, UCOL_LESS }, /* "quest" < "were" */
5620 { {0x0071, 0x0075, 0x0061, 0x0063, 0x006b},
5621 {0x0071, 0x0075, 0x0065, 0x0073, 0x0074}, UCOL_LESS }, /* "quack" < "quest" */
5622 };
5623
5624 static int nRangeTestcasesQwerty = LEN(rangeTestcasesQwerty);
5625
5626 static void TestSameStrengthList(void)
5627 {
5628 const char* strRules[] = {
5629 /* Normal */
5630 "&a<b<c<d &b<<k<<l<<m &k<<<x<<<y<<<z &y<f<g<h<e &a=1=2=3",
5631
5632 /* Lists */
5633 "&a<*bcd &b<<*klm &k<<<*xyz &y<*fghe &a=*123",
5634 };
5635 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
5636 }
5637
5638 static void TestSameStrengthListQuoted(void)
5639 {
5640 const char* strRules[] = {
5641 /* Lists with quoted characters */
5642 "&\\u0061<*bcd &b<<*klm &k<<<*xyz &y<*f\\u0067\\u0068e &a=*123",
5643 "&'\\u0061'<*bcd &b<<*klm &k<<<*xyz &y<*f'\\u0067\\u0068'e &a=*123",
5644
5645 "&\\u0061<*b\\u0063d &b<<*klm &k<<<*xyz &\\u0079<*fgh\\u0065 &a=*\\u0031\\u0032\\u0033",
5646 "&'\\u0061'<*b'\\u0063'd &b<<*klm &k<<<*xyz &'\\u0079'<*fgh'\\u0065' &a=*'\\u0031\\u0032\\u0033'",
5647
5648 "&\\u0061<*\\u0062c\\u0064 &b<<*klm &k<<<*xyz &y<*fghe &a=*\\u0031\\u0032\\u0033",
5649 "&'\\u0061'<*'\\u0062'c'\\u0064' &b<<*klm &k<<<*xyz &y<*fghe &a=*'\\u0031\\u0032\\u0033'",
5650 };
5651 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
5652 }
5653
5654 static void TestSameStrengthListSupplemental(void)
5655 {
5656 const char* strRules[] = {
5657 "&\\ufffe<\\uffff<\\U00010000<\\U00010001<\\U00010002",
5658 "&\\ufffe<\\uffff<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02",
5659 "&\\ufffe<*\\uffff\\U00010000\\U00010001\\U00010002",
5660 "&\\ufffe<*\\uffff\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02",
5661 };
5662 doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules));
5663 }
5664
5665 static void TestSameStrengthListQwerty(void)
5666 {
5667 const char* strRules[] = {
5668 "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d", /* Normal */
5669 "&q<*wer &w<<*tyu &t<<<*iop &o=*asd", /* Lists */
5670 "&\\u0071<\\u0077<\\u0065<\\u0072 &\\u0077<<\\u0074<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<\\u006f<<<\\u0070 &\\u006f=\\u0061=\\u0073=\\u0064",
5671 "&'\\u0071'<\\u0077<\\u0065<\\u0072 &\\u0077<<'\\u0074'<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<'\\u006f'<<<\\u0070 &\\u006f=\\u0061='\\u0073'=\\u0064",
5672 "&\\u0071<*\\u0077\\u0065\\u0072 &\\u0077<<*\\u0074\\u0079\\u0075 &\\u0074<<<*\\u0069\\u006f\\u0070 &\\u006f=*\\u0061\\u0073\\u0064",
5673
5674 /* Quoted characters also will work if two quoted characters are not consecutive. */
5675 "&\\u0071<*'\\u0077'\\u0065\\u0072 &\\u0077<<*\\u0074'\\u0079'\\u0075 &\\u0074<<<*\\u0069\\u006f'\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",
5676
5677 /* Consecutive quoted charactes do not work, because a '' will be treated as a quote character. */
5678 /* "&\\u0071<*'\\u0077''\\u0065''\\u0072' &\\u0077<<*'\\u0074''\\u0079''\\u0075' &\\u0074<<<*'\\u0069''\\u006f''\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",*/
5679
5680 };
5681 doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(strRules));
5682 }
5683
5684 static void TestSameStrengthListQuotedQwerty(void)
5685 {
5686 const char* strRules[] = {
5687 "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d", /* Normal */
5688 "&q<*wer &w<<*tyu &t<<<*iop &o=*asd", /* Lists */
5689 "&q<*w'e'r &w<<*'t'yu &t<<<*io'p' &o=*'a's'd'", /* Lists with quotes */
5690
5691 /* Lists with continuous quotes may not work, because '' will be treated as a quote character. */
5692 /* "&q<*'w''e''r' &w<<*'t''y''u' &t<<<*'i''o''p' &o=*'a''s''d'", */
5693 };
5694 doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(strRules));
5695 }
5696
5697 static void TestSameStrengthListRanges(void)
5698 {
5699 const char* strRules[] = {
5700 "&a<*b-d &b<<*k-m &k<<<*x-z &y<*f-he &a=*1-3",
5701 };
5702 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
5703 }
5704
5705 static void TestSameStrengthListSupplementalRanges(void)
5706 {
5707 const char* strRules[] = {
5708 "&\\ufffe<*\\uffff-\\U00010002",
5709 };
5710 doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules));
5711 }
5712
5713 static void TestSpecialCharacters(void)
5714 {
5715 const char* strRules[] = {
5716 /* Normal */
5717 "&';'<'+'<','<'-'<'&'<'*'",
5718
5719 /* List */
5720 "&';'<*'+,-&*'",
5721
5722 /* Range */
5723 "&';'<*'+'-'-&*'",
5724 };
5725
5726 const static OneTestCase specialCharacterStrings[] = {
5727 { {0x003b}, {0x002b}, UCOL_LESS }, /* ; < + */
5728 { {0x002b}, {0x002c}, UCOL_LESS }, /* + < , */
5729 { {0x002c}, {0x002d}, UCOL_LESS }, /* , < - */
5730 { {0x002d}, {0x0026}, UCOL_LESS }, /* - < & */
5731 };
5732 doTestOneTestCase(specialCharacterStrings, LEN(specialCharacterStrings), strRules, LEN(strRules));
5733 }
5734
5735 static void TestPrivateUseCharacters(void)
5736 {
5737 const char* strRules[] = {
5738 /* Normal */
5739 "&'\\u5ea7'<'\\uE2D8'<'\\uE2D9'<'\\uE2DA'<'\\uE2DB'<'\\uE2DC'<'\\u4e8d'",
5740 "&\\u5ea7<\\uE2D8<\\uE2D9<\\uE2DA<\\uE2DB<\\uE2DC<\\u4e8d",
5741 };
5742
5743 const static OneTestCase privateUseCharacterStrings[] = {
5744 { {0x5ea7}, {0xe2d8}, UCOL_LESS },
5745 { {0xe2d8}, {0xe2d9}, UCOL_LESS },
5746 { {0xe2d9}, {0xe2da}, UCOL_LESS },
5747 { {0xe2da}, {0xe2db}, UCOL_LESS },
5748 { {0xe2db}, {0xe2dc}, UCOL_LESS },
5749 { {0xe2dc}, {0x4e8d}, UCOL_LESS },
5750 };
5751 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
5752 }
5753
5754 static void TestPrivateUseCharactersInList(void)
5755 {
5756 const char* strRules[] = {
5757 /* List */
5758 "&'\\u5ea7'<*'\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d'",
5759 /* "&'\\u5ea7'<*\\uE2D8'\\uE2D9\\uE2DA'\\uE2DB'\\uE2DC\\u4e8d'", */
5760 "&\\u5ea7<*\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d",
5761 };
5762
5763 const static OneTestCase privateUseCharacterStrings[] = {
5764 { {0x5ea7}, {0xe2d8}, UCOL_LESS },
5765 { {0xe2d8}, {0xe2d9}, UCOL_LESS },
5766 { {0xe2d9}, {0xe2da}, UCOL_LESS },
5767 { {0xe2da}, {0xe2db}, UCOL_LESS },
5768 { {0xe2db}, {0xe2dc}, UCOL_LESS },
5769 { {0xe2dc}, {0x4e8d}, UCOL_LESS },
5770 };
5771 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
5772 }
5773
5774 static void TestPrivateUseCharactersInRange(void)
5775 {
5776 const char* strRules[] = {
5777 /* Range */
5778 "&'\\u5ea7'<*'\\uE2D8'-'\\uE2DC\\u4e8d'",
5779 "&\\u5ea7<*\\uE2D8-\\uE2DC\\u4e8d",
5780 /* "&\\u5ea7<\\uE2D8'\\uE2D8'-'\\uE2D9'\\uE2DA-\\uE2DB\\uE2DC\\u4e8d", */
5781 };
5782
5783 const static OneTestCase privateUseCharacterStrings[] = {
5784 { {0x5ea7}, {0xe2d8}, UCOL_LESS },
5785 { {0xe2d8}, {0xe2d9}, UCOL_LESS },
5786 { {0xe2d9}, {0xe2da}, UCOL_LESS },
5787 { {0xe2da}, {0xe2db}, UCOL_LESS },
5788 { {0xe2db}, {0xe2dc}, UCOL_LESS },
5789 { {0xe2dc}, {0x4e8d}, UCOL_LESS },
5790 };
5791 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
5792 }
5793
5794 static void TestInvalidListsAndRanges(void)
5795 {
5796 const char* invalidRules[] = {
5797 /* Range not in starred expression */
5798 "&\\ufffe<\\uffff-\\U00010002",
5799
5800 /* Range without start */
5801 "&a<*-c",
5802
5803 /* Range without end */
5804 "&a<*b-",
5805
5806 /* More than one hyphen */
5807 "&a<*b-g-l",
5808
5809 /* Range in the wrong order */
5810 "&a<*k-b",
5811
5812 };
5813
5814 UChar rule[500];
5815 UErrorCode status = U_ZERO_ERROR;
5816 UParseError parse_error;
5817 int n_rules = LEN(invalidRules);
5818 int rule_no;
5819 int length;
5820 UCollator *myCollation;
5821
5822 for (rule_no = 0; rule_no < n_rules; ++rule_no) {
5823
5824 length = u_unescape(invalidRules[rule_no], rule, 500);
5825 if (length == 0) {
5826 log_err("ERROR: The rule cannot be unescaped: %s\n");
5827 return;
5828 }
5829 myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
5830 if(!U_FAILURE(status)){
5831 log_err("ERROR: Could not cause a failure as expected: \n");
5832 }
5833 status = U_ZERO_ERROR;
5834 }
5835 }
5836
5837 /*
5838 * This test ensures that characters placed before a character in a different script have the same lead byte
5839 * in their collation key before and after script reordering.
5840 */
5841 static void TestBeforeRuleWithScriptReordering(void)
5842 {
5843 UParseError error;
5844 UErrorCode status = U_ZERO_ERROR;
5845 UCollator *myCollation;
5846 char srules[500] = "&[before 1]\\u03b1 < \\u0e01";
5847 UChar rules[500];
5848 uint32_t rulesLength = 0;
5849 int32_t reorderCodes[1] = {USCRIPT_GREEK};
5850 UCollationResult collResult;
5851
5852 uint8_t baseKey[256];
5853 uint32_t baseKeyLength;
5854 uint8_t beforeKey[256];
5855 uint32_t beforeKeyLength;
5856
5857 UChar base[] = { 0x03b1 }; /* base */
5858 int32_t baseLen = sizeof(base)/sizeof(*base);
5859
5860 UChar before[] = { 0x0e01 }; /* ko kai */
5861 int32_t beforeLen = sizeof(before)/sizeof(*before);
5862
5863 /*UChar *data[] = { before, base };
5864 genericRulesStarter(srules, data, 2);*/
5865
5866 log_verbose("Testing the &[before 1] rule with [reorder grek]\n");
5867
5868
5869 /* build collator */
5870 log_verbose("Testing the &[before 1] rule with [scriptReorder grek]\n");
5871
5872 rulesLength = u_unescape(srules, rules, LEN(rules));
5873 myCollation = ucol_openRules(rules, rulesLength, UCOL_ON, UCOL_TERTIARY, &error, &status);
5874 if(U_FAILURE(status)) {
5875 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5876 return;
5877 }
5878
5879 /* check collation results - before rule applied but not script reordering */
5880 collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
5881 if (collResult != UCOL_GREATER) {
5882 log_err("Collation result not correct before script reordering = %d\n", collResult);
5883 }
5884
5885 /* check the lead byte of the collation keys before script reordering */
5886 baseKeyLength = ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
5887 beforeKeyLength = ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
5888 if (baseKey[0] != beforeKey[0]) {
5889 log_err("Different lead byte for sort keys using before rule and before script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
5890 }
5891
5892 /* reorder the scripts */
5893 ucol_setReorderCodes(myCollation, reorderCodes, 1, &status);
5894 if(U_FAILURE(status)) {
5895 log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
5896 return;
5897 }
5898
5899 /* check collation results - before rule applied and after script reordering */
5900 collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
5901 if (collResult != UCOL_GREATER) {
5902 log_err("Collation result not correct after script reordering = %d\n", collResult);
5903 }
5904
5905 /* check the lead byte of the collation keys after script reordering */
5906 ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
5907 ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
5908 if (baseKey[0] != beforeKey[0]) {
5909 log_err("Different lead byte for sort keys using before fule and after script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
5910 }
5911
5912 ucol_close(myCollation);
5913 }
5914
5915 /*
5916 * Test that in a primary-compressed sort key all bytes except the first one are unchanged under script reordering.
5917 */
5918 static void TestNonLeadBytesDuringCollationReordering(void)
5919 {
5920 UErrorCode status = U_ZERO_ERROR;
5921 UCollator *myCollation;
5922 int32_t reorderCodes[1] = {USCRIPT_GREEK};
5923
5924 uint8_t baseKey[256];
5925 uint32_t baseKeyLength;
5926 uint8_t reorderKey[256];
5927 uint32_t reorderKeyLength;
5928
5929 UChar testString[] = { 0x03b1, 0x03b2, 0x03b3 };
5930
5931 uint32_t i;
5932
5933
5934 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
5935
5936 /* build collator tertiary */
5937 myCollation = ucol_open("", &status);
5938 ucol_setStrength(myCollation, UCOL_TERTIARY);
5939 if(U_FAILURE(status)) {
5940 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5941 return;
5942 }
5943 baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), baseKey, 256);
5944
5945 ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
5946 if(U_FAILURE(status)) {
5947 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
5948 return;
5949 }
5950 reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256);
5951
5952 if (baseKeyLength != reorderKeyLength) {
5953 log_err("Key lengths not the same during reordering.\n");
5954 return;
5955 }
5956
5957 for (i = 1; i < baseKeyLength; i++) {
5958 if (baseKey[i] != reorderKey[i]) {
5959 log_err("Collation key bytes not the same at position %d.\n", i);
5960 return;
5961 }
5962 }
5963 ucol_close(myCollation);
5964
5965 /* build collator quaternary */
5966 myCollation = ucol_open("", &status);
5967 ucol_setStrength(myCollation, UCOL_QUATERNARY);
5968 if(U_FAILURE(status)) {
5969 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5970 return;
5971 }
5972 baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), baseKey, 256);
5973
5974 ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
5975 if(U_FAILURE(status)) {
5976 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
5977 return;
5978 }
5979 reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256);
5980
5981 if (baseKeyLength != reorderKeyLength) {
5982 log_err("Key lengths not the same during reordering.\n");
5983 return;
5984 }
5985
5986 for (i = 1; i < baseKeyLength; i++) {
5987 if (baseKey[i] != reorderKey[i]) {
5988 log_err("Collation key bytes not the same at position %d.\n", i);
5989 return;
5990 }
5991 }
5992 ucol_close(myCollation);
5993 }
5994
5995 /*
5996 * Test reordering API.
5997 */
5998 static void TestReorderingAPI(void)
5999 {
6000 UErrorCode status = U_ZERO_ERROR;
6001 UCollator *myCollation;
6002 int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
6003 int32_t duplicateReorderCodes[] = {USCRIPT_CUNEIFORM, USCRIPT_GREEK, UCOL_REORDER_CODE_CURRENCY, USCRIPT_EGYPTIAN_HIEROGLYPHS};
6004 int32_t reorderCodesStartingWithDefault[] = {UCOL_REORDER_CODE_DEFAULT, USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
6005 UCollationResult collResult;
6006 int32_t retrievedReorderCodesLength;
6007 int32_t retrievedReorderCodes[10];
6008 UChar greekString[] = { 0x03b1 };
6009 UChar punctuationString[] = { 0x203e };
6010 int loopIndex;
6011
6012 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
6013
6014 /* build collator tertiary */
6015 myCollation = ucol_open("", &status);
6016 ucol_setStrength(myCollation, UCOL_TERTIARY);
6017 if(U_FAILURE(status)) {
6018 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
6019 return;
6020 }
6021
6022 /* set the reorderding */
6023 ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
6024 if (U_FAILURE(status)) {
6025 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
6026 return;
6027 }
6028
6029 /* get the reordering */
6030 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
6031 if (status != U_BUFFER_OVERFLOW_ERROR) {
6032 log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
6033 return;
6034 }
6035 status = U_ZERO_ERROR;
6036 if (retrievedReorderCodesLength != LEN(reorderCodes)) {
6037 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
6038 return;
6039 }
6040 /* now let's really get it */
6041 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
6042 if (U_FAILURE(status)) {
6043 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
6044 return;
6045 }
6046 if (retrievedReorderCodesLength != LEN(reorderCodes)) {
6047 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
6048 return;
6049 }
6050 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
6051 if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
6052 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
6053 return;
6054 }
6055 }
6056 collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
6057 if (collResult != UCOL_LESS) {
6058 log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
6059 return;
6060 }
6061
6062 /* clear the reordering */
6063 ucol_setReorderCodes(myCollation, NULL, 0, &status);
6064 if (U_FAILURE(status)) {
6065 log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
6066 return;
6067 }
6068
6069 /* get the reordering again */
6070 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
6071 if (retrievedReorderCodesLength != 0) {
6072 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
6073 return;
6074 }
6075
6076 collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
6077 if (collResult != UCOL_GREATER) {
6078 log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
6079 return;
6080 }
6081
6082 /* test for error condition on duplicate reorder codes */
6083 ucol_setReorderCodes(myCollation, duplicateReorderCodes, LEN(duplicateReorderCodes), &status);
6084 if (!U_FAILURE(status)) {
6085 log_err_status(status, "ERROR: setting duplicate reorder codes did not generate a failure\n");
6086 return;
6087 }
6088
6089 status = U_ZERO_ERROR;
6090 /* test for reorder codes after a reset code */
6091 ucol_setReorderCodes(myCollation, reorderCodesStartingWithDefault, LEN(reorderCodesStartingWithDefault), &status);
6092 if (!U_FAILURE(status)) {
6093 log_err_status(status, "ERROR: reorderd code sequence starting with default and having following codes didn't cause an error\n");
6094 return;
6095 }
6096
6097 ucol_close(myCollation);
6098 }
6099
6100 /*
6101 * Test reordering API.
6102 */
6103 static void TestReorderingAPIWithRuleCreatedCollator(void)
6104 {
6105 UErrorCode status = U_ZERO_ERROR;
6106 UCollator *myCollation;
6107 UChar rules[90];
6108 int32_t rulesReorderCodes[2] = {USCRIPT_HAN, USCRIPT_GREEK};
6109 int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
6110 UCollationResult collResult;
6111 int32_t retrievedReorderCodesLength;
6112 int32_t retrievedReorderCodes[10];
6113 UChar greekString[] = { 0x03b1 };
6114 UChar punctuationString[] = { 0x203e };
6115 UChar hanString[] = { 0x65E5, 0x672C };
6116 int loopIndex;
6117
6118 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
6119
6120 /* build collator from rules */
6121 u_uastrcpy(rules, "[reorder Hani Grek]");
6122 myCollation = ucol_openRules(rules, u_strlen(rules), UCOL_DEFAULT, UCOL_TERTIARY, NULL, &status);
6123 if(U_FAILURE(status)) {
6124 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
6125 return;
6126 }
6127
6128 /* get the reordering */
6129 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
6130 if (U_FAILURE(status)) {
6131 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
6132 return;
6133 }
6134 if (retrievedReorderCodesLength != LEN(rulesReorderCodes)) {
6135 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(rulesReorderCodes));
6136 return;
6137 }
6138 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
6139 if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) {
6140 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
6141 return;
6142 }
6143 }
6144 collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), hanString, LEN(hanString));
6145 if (collResult != UCOL_GREATER) {
6146 log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
6147 return;
6148 }
6149
6150
6151 /* set the reorderding */
6152 ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
6153 if (U_FAILURE(status)) {
6154 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
6155 return;
6156 }
6157
6158 /* get the reordering */
6159 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
6160 if (status != U_BUFFER_OVERFLOW_ERROR) {
6161 log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
6162 return;
6163 }
6164 status = U_ZERO_ERROR;
6165 if (retrievedReorderCodesLength != LEN(reorderCodes)) {
6166 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
6167 return;
6168 }
6169 /* now let's really get it */
6170 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
6171 if (U_FAILURE(status)) {
6172 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
6173 return;
6174 }
6175 if (retrievedReorderCodesLength != LEN(reorderCodes)) {
6176 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
6177 return;
6178 }
6179 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
6180 if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
6181 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
6182 return;
6183 }
6184 }
6185 collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
6186 if (collResult != UCOL_LESS) {
6187 log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
6188 return;
6189 }
6190
6191 /* clear the reordering */
6192 ucol_setReorderCodes(myCollation, NULL, 0, &status);
6193 if (U_FAILURE(status)) {
6194 log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
6195 return;
6196 }
6197
6198 /* get the reordering again */
6199 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
6200 if (retrievedReorderCodesLength != 0) {
6201 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
6202 return;
6203 }
6204
6205 collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
6206 if (collResult != UCOL_GREATER) {
6207 log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
6208 return;
6209 }
6210
6211 ucol_close(myCollation);
6212 }
6213
6214 static int compareUScriptCodes(const void * a, const void * b)
6215 {
6216 return ( *(int32_t*)a - *(int32_t*)b );
6217 }
6218
6219 static void TestEquivalentReorderingScripts(void) {
6220 UErrorCode status = U_ZERO_ERROR;
6221 int32_t equivalentScripts[50];
6222 int32_t equivalentScriptsLength;
6223 int loopIndex;
6224 int32_t equivalentScriptsResult[] = {
6225 USCRIPT_BOPOMOFO,
6226 USCRIPT_LISU,
6227 USCRIPT_LYCIAN,
6228 USCRIPT_CARIAN,
6229 USCRIPT_LYDIAN,
6230 USCRIPT_YI,
6231 USCRIPT_OLD_ITALIC,
6232 USCRIPT_GOTHIC,
6233 USCRIPT_DESERET,
6234 USCRIPT_SHAVIAN,
6235 USCRIPT_OSMANYA,
6236 USCRIPT_LINEAR_B,
6237 USCRIPT_CYPRIOT,
6238 USCRIPT_OLD_SOUTH_ARABIAN,
6239 USCRIPT_AVESTAN,
6240 USCRIPT_IMPERIAL_ARAMAIC,
6241 USCRIPT_INSCRIPTIONAL_PARTHIAN,
6242 USCRIPT_INSCRIPTIONAL_PAHLAVI,
6243 USCRIPT_UGARITIC,
6244 USCRIPT_OLD_PERSIAN,
6245 USCRIPT_CUNEIFORM,
6246 USCRIPT_EGYPTIAN_HIEROGLYPHS,
6247 USCRIPT_PHONETIC_POLLARD,
6248 USCRIPT_SORA_SOMPENG,
6249 USCRIPT_MEROITIC_CURSIVE,
6250 USCRIPT_MEROITIC_HIEROGLYPHS
6251 };
6252
6253 qsort(equivalentScriptsResult, LEN(equivalentScriptsResult), sizeof(int32_t), compareUScriptCodes);
6254
6255 /* UScript.GOTHIC */
6256 equivalentScriptsLength = ucol_getEquivalentReorderCodes(USCRIPT_GOTHIC, equivalentScripts, LEN(equivalentScripts), &status);
6257 if (U_FAILURE(status)) {
6258 log_err_status(status, "ERROR: retrieving equivalent reorder codes: %s\n", myErrorName(status));
6259 return;
6260 }
6261 /*
6262 fprintf(stdout, "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n");
6263 fprintf(stdout, "equivalentScriptsLength = %d\n", equivalentScriptsLength);
6264 for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) {
6265 fprintf(stdout, "%d = %x\n", loopIndex, equivalentScripts[loopIndex]);
6266 }
6267 */
6268 if (equivalentScriptsLength != LEN(equivalentScriptsResult)) {
6269 log_err_status(status, "ERROR: retrieved equivalent script length wrong: expected = %d, was = %d\n", LEN(equivalentScriptsResult), equivalentScriptsLength);
6270 return;
6271 }
6272 for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) {
6273 if (equivalentScriptsResult[loopIndex] != equivalentScripts[loopIndex]) {
6274 log_err_status(status, "ERROR: equivalent scripts results don't match: expected = %d, was = %d\n", equivalentScriptsResult[loopIndex], equivalentScripts[loopIndex]);
6275 return;
6276 }
6277 }
6278
6279 /* UScript.SHAVIAN */
6280 equivalentScriptsLength = ucol_getEquivalentReorderCodes(USCRIPT_SHAVIAN, equivalentScripts, LEN(equivalentScripts), &status);
6281 if (U_FAILURE(status)) {
6282 log_err_status(status, "ERROR: retrieving equivalent reorder codes: %s\n", myErrorName(status));
6283 return;
6284 }
6285 if (equivalentScriptsLength != LEN(equivalentScriptsResult)) {
6286 log_err_status(status, "ERROR: retrieved equivalent script length wrong: expected = %d, was = %d\n", LEN(equivalentScriptsResult), equivalentScriptsLength);
6287 return;
6288 }
6289 for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) {
6290 if (equivalentScriptsResult[loopIndex] != equivalentScripts[loopIndex]) {
6291 log_err_status(status, "ERROR: equivalent scripts results don't match: expected = %d, was = %d\n", equivalentScriptsResult[loopIndex], equivalentScripts[loopIndex]);
6292 return;
6293 }
6294 }
6295 }
6296
6297 static void TestReorderingAcrossCloning(void)
6298 {
6299 UErrorCode status = U_ZERO_ERROR;
6300 UCollator *myCollation;
6301 int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
6302 UCollator *clonedCollation;
6303 int32_t bufferSize;
6304 int32_t retrievedReorderCodesLength;
6305 int32_t retrievedReorderCodes[10];
6306 int loopIndex;
6307
6308 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
6309
6310 /* build collator tertiary */
6311 myCollation = ucol_open("", &status);
6312 ucol_setStrength(myCollation, UCOL_TERTIARY);
6313 if(U_FAILURE(status)) {
6314 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
6315 return;
6316 }
6317
6318 /* set the reorderding */
6319 ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
6320 if (U_FAILURE(status)) {
6321 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
6322 return;
6323 }
6324
6325 /* clone the collator */
6326 clonedCollation = ucol_safeClone(myCollation, NULL, &bufferSize, &status);
6327 if (U_FAILURE(status)) {
6328 log_err_status(status, "ERROR: cloning collator: %s\n", myErrorName(status));
6329 return;
6330 }
6331
6332 /* get the reordering */
6333 retrievedReorderCodesLength = ucol_getReorderCodes(clonedCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
6334 if (U_FAILURE(status)) {
6335 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
6336 return;
6337 }
6338 if (retrievedReorderCodesLength != LEN(reorderCodes)) {
6339 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
6340 return;
6341 }
6342 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
6343 if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
6344 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
6345 return;
6346 }
6347 }
6348
6349 /*uprv_free(buffer);*/
6350 ucol_close(myCollation);
6351 ucol_close(clonedCollation);
6352 }
6353
6354 /*
6355 * Utility function to test one collation reordering test case set.
6356 * @param testcases Array of test cases.
6357 * @param n_testcases Size of the array testcases.
6358 * @param reorderTokens Array of reordering codes.
6359 * @param reorderTokensLen Size of the array reorderTokens.
6360 */
6361 static void doTestOneReorderingAPITestCase(const OneTestCase testCases[], uint32_t testCasesLen, const int32_t reorderTokens[], int32_t reorderTokensLen)
6362 {
6363 uint32_t testCaseNum;
6364 UErrorCode status = U_ZERO_ERROR;
6365 UCollator *myCollation;
6366
6367 myCollation = ucol_open("", &status);
6368 if (U_FAILURE(status)) {
6369 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
6370 return;
6371 }
6372 ucol_setReorderCodes(myCollation, reorderTokens, reorderTokensLen, &status);
6373 if(U_FAILURE(status)) {
6374 log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
6375 return;
6376 }
6377
6378 for (testCaseNum = 0; testCaseNum < testCasesLen; ++testCaseNum) {
6379 doTest(myCollation,
6380 testCases[testCaseNum].source,
6381 testCases[testCaseNum].target,
6382 testCases[testCaseNum].result
6383 );
6384 }
6385 ucol_close(myCollation);
6386 }
6387
6388 static void TestGreekFirstReorder(void)
6389 {
6390 const char* strRules[] = {
6391 "[reorder Grek]"
6392 };
6393
6394 const int32_t apiRules[] = {
6395 USCRIPT_GREEK
6396 };
6397
6398 const static OneTestCase privateUseCharacterStrings[] = {
6399 { {0x0391}, {0x0391}, UCOL_EQUAL },
6400 { {0x0041}, {0x0391}, UCOL_GREATER },
6401 { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_GREATER },
6402 { {0x0060}, {0x0391}, UCOL_LESS },
6403 { {0x0391}, {0xe2dc}, UCOL_LESS },
6404 { {0x0391}, {0x0060}, UCOL_GREATER },
6405 };
6406
6407 /* Test rules creation */
6408 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
6409
6410 /* Test collation reordering API */
6411 doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
6412 }
6413
6414 static void TestGreekLastReorder(void)
6415 {
6416 const char* strRules[] = {
6417 "[reorder Zzzz Grek]"
6418 };
6419
6420 const int32_t apiRules[] = {
6421 USCRIPT_UNKNOWN, USCRIPT_GREEK
6422 };
6423
6424 const static OneTestCase privateUseCharacterStrings[] = {
6425 { {0x0391}, {0x0391}, UCOL_EQUAL },
6426 { {0x0041}, {0x0391}, UCOL_LESS },
6427 { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_LESS },
6428 { {0x0060}, {0x0391}, UCOL_LESS },
6429 { {0x0391}, {0xe2dc}, UCOL_GREATER },
6430 };
6431
6432 /* Test rules creation */
6433 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
6434
6435 /* Test collation reordering API */
6436 doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
6437 }
6438
6439 static void TestNonScriptReorder(void)
6440 {
6441 const char* strRules[] = {
6442 "[reorder Grek Symbol DIGIT Latn Punct space Zzzz cURRENCy]"
6443 };
6444
6445 const int32_t apiRules[] = {
6446 USCRIPT_GREEK, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN,
6447 UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SPACE, USCRIPT_UNKNOWN,
6448 UCOL_REORDER_CODE_CURRENCY
6449 };
6450
6451 const static OneTestCase privateUseCharacterStrings[] = {
6452 { {0x0391}, {0x0041}, UCOL_LESS },
6453 { {0x0041}, {0x0391}, UCOL_GREATER },
6454 { {0x0060}, {0x0041}, UCOL_LESS },
6455 { {0x0060}, {0x0391}, UCOL_GREATER },
6456 { {0x0024}, {0x0041}, UCOL_GREATER },
6457 };
6458
6459 /* Test rules creation */
6460 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
6461
6462 /* Test collation reordering API */
6463 doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
6464 }
6465
6466 static void TestHaniReorder(void)
6467 {
6468 const char* strRules[] = {
6469 "[reorder Hani]"
6470 };
6471 const int32_t apiRules[] = {
6472 USCRIPT_HAN
6473 };
6474
6475 const static OneTestCase privateUseCharacterStrings[] = {
6476 { {0x4e00}, {0x0041}, UCOL_LESS },
6477 { {0x4e00}, {0x0060}, UCOL_GREATER },
6478 { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
6479 { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
6480 { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
6481 { {0xfa27}, {0x0041}, UCOL_LESS },
6482 { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
6483 };
6484
6485 /* Test rules creation */
6486 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
6487
6488 /* Test collation reordering API */
6489 doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
6490 }
6491
6492 static void TestHaniReorderWithOtherRules(void)
6493 {
6494 const char* strRules[] = {
6495 "[reorder Hani] &b<a"
6496 };
6497 const int32_t apiRules[] = {
6498 USCRIPT_HAN
6499 };
6500
6501 const static OneTestCase privateUseCharacterStrings[] = {
6502 { {0x4e00}, {0x0041}, UCOL_LESS },
6503 { {0x4e00}, {0x0060}, UCOL_GREATER },
6504 { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
6505 { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
6506 { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
6507 { {0xfa27}, {0x0041}, UCOL_LESS },
6508 { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
6509 { {0x0062}, {0x0061}, UCOL_LESS },
6510 };
6511
6512 /* Test rules creation */
6513 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
6514 }
6515
6516 static void TestMultipleReorder(void)
6517 {
6518 const char* strRules[] = {
6519 "[reorder Grek Zzzz DIGIT Latn Hani]"
6520 };
6521
6522 const int32_t apiRules[] = {
6523 USCRIPT_GREEK, USCRIPT_UNKNOWN, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN, USCRIPT_HAN
6524 };
6525
6526 const static OneTestCase collationTestCases[] = {
6527 { {0x0391}, {0x0041}, UCOL_LESS},
6528 { {0x0031}, {0x0041}, UCOL_LESS},
6529 { {0x0041}, {0x4e00}, UCOL_LESS},
6530 };
6531
6532 /* Test rules creation */
6533 doTestOneTestCase(collationTestCases, LEN(collationTestCases), strRules, LEN(strRules));
6534
6535 /* Test collation reordering API */
6536 doTestOneReorderingAPITestCase(collationTestCases, LEN(collationTestCases), apiRules, LEN(apiRules));
6537 }
6538
6539 /*
6540 * Test that covers issue reported in ticket 8814
6541 */
6542 static void TestReorderWithNumericCollation()
6543 {
6544 UErrorCode status = U_ZERO_ERROR;
6545 UCollator *myCollation;
6546 UCollator *myReorderCollation;
6547 int32_t reorderCodes[] = {UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_GREEK,USCRIPT_LATIN, USCRIPT_HEBREW, UCOL_REORDER_CODE_OTHERS};
6548 /* UChar fortyS[] = { 0x0034, 0x0030, 0x0053 };
6549 UChar fortyThreeP[] = { 0x0034, 0x0033, 0x0050 }; */
6550 UChar fortyS[] = { 0x0053 };
6551 UChar fortyThreeP[] = { 0x0050 };
6552 uint8_t fortyS_sortKey[128];
6553 int32_t fortyS_sortKey_Length;
6554 uint8_t fortyThreeP_sortKey[128];
6555 int32_t fortyThreeP_sortKey_Length;
6556 uint8_t fortyS_sortKey_reorder[128];
6557 int32_t fortyS_sortKey_reorder_Length;
6558 uint8_t fortyThreeP_sortKey_reorder[128];
6559 int32_t fortyThreeP_sortKey_reorder_Length;
6560 UCollationResult collResult;
6561 UCollationResult collResultReorder;
6562 int i;
6563
6564 log_verbose("Testing reordering with and without numeric collation\n");
6565
6566 /* build collator tertiary with numeric */
6567 myCollation = ucol_open("", &status);
6568 /*
6569 ucol_setStrength(myCollation, UCOL_TERTIARY);
6570 */
6571 ucol_setAttribute(myCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
6572 if(U_FAILURE(status)) {
6573 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
6574 return;
6575 }
6576
6577 /* build collator tertiary with numeric and reordering */
6578 myReorderCollation = ucol_open("", &status);
6579 /*
6580 ucol_setStrength(myReorderCollation, UCOL_TERTIARY);
6581 */
6582 ucol_setAttribute(myReorderCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
6583 ucol_setReorderCodes(myReorderCollation, reorderCodes, LEN(reorderCodes), &status);
6584 if(U_FAILURE(status)) {
6585 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
6586 return;
6587 }
6588
6589 fortyS_sortKey_Length = ucol_getSortKey(myCollation, fortyS, LEN(fortyS), fortyS_sortKey, 128);
6590 fortyThreeP_sortKey_Length = ucol_getSortKey(myCollation, fortyThreeP, LEN(fortyThreeP), fortyThreeP_sortKey, 128);
6591 fortyS_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyS, LEN(fortyS), fortyS_sortKey_reorder, 128);
6592 fortyThreeP_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyThreeP, LEN(fortyThreeP), fortyThreeP_sortKey_reorder, 128);
6593
6594 if (fortyS_sortKey_Length < 0 || fortyThreeP_sortKey_Length < 0 || fortyS_sortKey_reorder_Length < 0 || fortyThreeP_sortKey_reorder_Length < 0) {
6595 log_err_status(status, "ERROR: couldn't generate sort keys\n");
6596 return;
6597 }
6598 collResult = ucol_strcoll(myCollation, fortyS, LEN(fortyS), fortyThreeP, LEN(fortyThreeP));
6599 collResultReorder = ucol_strcoll(myReorderCollation, fortyS, LEN(fortyS), fortyThreeP, LEN(fortyThreeP));
6600 /*
6601 fprintf(stderr, "\tcollResult = %x\n", collResult);
6602 fprintf(stderr, "\tcollResultReorder = %x\n", collResultReorder);
6603 fprintf(stderr, "\nfortyS\n");
6604 for (i = 0; i < fortyS_sortKey_Length; i++) {
6605 fprintf(stderr, "%x --- %x\n", fortyS_sortKey[i], fortyS_sortKey_reorder[i]);
6606 }
6607 fprintf(stderr, "\nfortyThreeP\n");
6608 for (i = 0; i < fortyThreeP_sortKey_Length; i++) {
6609 fprintf(stderr, "%x --- %x\n", fortyThreeP_sortKey[i], fortyThreeP_sortKey_reorder[i]);
6610 }
6611 */
6612 if (collResult != collResultReorder) {
6613 log_err_status(status, "ERROR: collation results should have been the same.\n");
6614 return;
6615 }
6616
6617 ucol_close(myCollation);
6618 ucol_close(myReorderCollation);
6619 }
6620
6621 static int compare_uint8_t_arrays(const uint8_t* a, const uint8_t* b)
6622 {
6623 for (; *a == *b; ++a, ++b) {
6624 if (*a == 0) {
6625 return 0;
6626 }
6627 }
6628 return (*a < *b ? -1 : 1);
6629 }
6630
6631 static void TestImportRulesDeWithPhonebook(void)
6632 {
6633 const char* normalRules[] = {
6634 "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc",
6635 "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc",
6636 "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc",
6637 };
6638 const OneTestCase normalTests[] = {
6639 { {0x00e6}, {0x00c6}, UCOL_LESS},
6640 { {0x00fc}, {0x00dc}, UCOL_GREATER},
6641 };
6642
6643 const char* importRules[] = {
6644 "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc[import de-u-co-phonebk]",
6645 "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
6646 "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
6647 };
6648 const OneTestCase importTests[] = {
6649 { {0x00e6}, {0x00c6}, UCOL_LESS},
6650 { {0x00fc}, {0x00dc}, UCOL_LESS},
6651 };
6652
6653 doTestOneTestCase(normalTests, LEN(normalTests), normalRules, LEN(normalRules));
6654 doTestOneTestCase(importTests, LEN(importTests), importRules, LEN(importRules));
6655 }
6656
6657 static void TestImportRulesFiWithEor(void)
6658 {
6659 /* DUCET. */
6660 const char* defaultRules[] = {
6661 "&a<b", /* Dummy rule. */
6662 };
6663
6664 const OneTestCase defaultTests[] = {
6665 { {0x0110}, {0x00F0}, UCOL_LESS},
6666 { {0x00a3}, {0x00a5}, UCOL_LESS},
6667 { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},
6668 };
6669
6670 /* European Ordering rules: ignore currency characters. */
6671 const char* eorRules[] = {
6672 "[import root-u-co-eor]",
6673 };
6674
6675 const OneTestCase eorTests[] = {
6676 { {0x0110}, {0x00F0}, UCOL_LESS},
6677 { {0x00a3}, {0x00a5}, UCOL_EQUAL},
6678 { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},
6679 };
6680
6681 const char* fiStdRules[] = {
6682 "[import fi-u-co-standard]",
6683 };
6684
6685 const OneTestCase fiStdTests[] = {
6686 { {0x0110}, {0x00F0}, UCOL_GREATER},
6687 { {0x00a3}, {0x00a5}, UCOL_LESS},
6688 { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},
6689 };
6690
6691 /* Both European Ordering Rules and Fi Standard Rules. */
6692 const char* eorFiStdRules[] = {
6693 "[import root-u-co-eor][import fi-u-co-standard]",
6694 };
6695
6696 /* This is essentially same as the one before once fi.txt is updated with import. */
6697 const char* fiEorRules[] = {
6698 "[import fi-u-co-eor]",
6699 };
6700
6701 const OneTestCase fiEorTests[] = {
6702 { {0x0110}, {0x00F0}, UCOL_GREATER},
6703 { {0x00a3}, {0x00a5}, UCOL_EQUAL},
6704 { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},
6705 };
6706
6707 doTestOneTestCase(defaultTests, LEN(defaultTests), defaultRules, LEN(defaultRules));
6708 doTestOneTestCase(eorTests, LEN(eorTests), eorRules, LEN(eorRules));
6709 doTestOneTestCase(fiStdTests, LEN(fiStdTests), fiStdRules, LEN(fiStdRules));
6710 doTestOneTestCase(fiEorTests, LEN(fiEorTests), eorFiStdRules, LEN(eorFiStdRules));
6711
6712 /* TODO: Fix ICU ticket #8962 by uncommenting the following test after fi.txt is updated with the following rule:
6713 eor{
6714 Sequence{
6715 "[import root-u-co-eor][import fi-u-co-standard]"
6716 }
6717 Version{"21.0"}
6718 }
6719 */
6720 /* doTestOneTestCase(fiEorTests, LEN(fiEorTests), fiEorRules, LEN(fiEorRules)); */
6721
6722 }
6723
6724 #if 0
6725 /*
6726 * This test case tests inclusion with the unihan rules, but this cannot be included now, unless
6727 * the resource files are built with -includeUnihanColl option.
6728 * TODO: Uncomment this function and make it work when unihan rules are built by default.
6729 */
6730 static void TestImportRulesCJKWithUnihan(void)
6731 {
6732 /* DUCET. */
6733 const char* defaultRules[] = {
6734 "&a<b", /* Dummy rule. */
6735 };
6736
6737 const OneTestCase defaultTests[] = {
6738 { {0x3402}, {0x4e1e}, UCOL_GREATER},
6739 };
6740
6741 /* European Ordering rules: ignore currency characters. */
6742 const char* unihanRules[] = {
6743 "[import ko-u-co-unihan]",
6744 };
6745
6746 const OneTestCase unihanTests[] = {
6747 { {0x3402}, {0x4e1e}, UCOL_LESS},
6748 };
6749
6750 doTestOneTestCase(defaultTests, LEN(defaultTests), defaultRules, LEN(defaultRules));
6751 doTestOneTestCase(unihanTests, LEN(unihanTests), unihanRules, LEN(unihanRules));
6752
6753 }
6754 #endif
6755
6756 static void TestImport(void)
6757 {
6758 UCollator* vicoll;
6759 UCollator* escoll;
6760 UCollator* viescoll;
6761 UCollator* importviescoll;
6762 UParseError error;
6763 UErrorCode status = U_ZERO_ERROR;
6764 UChar* virules;
6765 int32_t viruleslength;
6766 UChar* esrules;
6767 int32_t esruleslength;
6768 UChar* viesrules;
6769 int32_t viesruleslength;
6770 char srules[500] = "[import vi][import es]";
6771 UChar rules[500];
6772 uint32_t length = 0;
6773 int32_t itemCount;
6774 int32_t i, k;
6775 UChar32 start;
6776 UChar32 end;
6777 UChar str[500];
6778 int32_t strLength;
6779
6780 uint8_t sk1[500];
6781 uint8_t sk2[500];
6782
6783 UBool b;
6784 USet* tailoredSet;
6785 USet* importTailoredSet;
6786
6787
6788 vicoll = ucol_open("vi", &status);
6789 if(U_FAILURE(status)){
6790 log_err_status(status, "ERROR: Call ucol_open(\"vi\", ...): %s\n", myErrorName(status));
6791 return;
6792 }
6793
6794 virules = (UChar*) ucol_getRules(vicoll, &viruleslength);
6795 escoll = ucol_open("es", &status);
6796 esrules = (UChar*) ucol_getRules(escoll, &esruleslength);
6797 viesrules = (UChar*)uprv_malloc((viruleslength+esruleslength+1)*sizeof(UChar*));
6798 viesrules[0] = 0;
6799 u_strcat(viesrules, virules);
6800 u_strcat(viesrules, esrules);
6801 viesruleslength = viruleslength + esruleslength;
6802 viescoll = ucol_openRules(viesrules, viesruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
6803
6804 /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
6805 length = u_unescape(srules, rules, 500);
6806 importviescoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
6807 if(U_FAILURE(status)){
6808 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
6809 return;
6810 }
6811
6812 tailoredSet = ucol_getTailoredSet(viescoll, &status);
6813 importTailoredSet = ucol_getTailoredSet(importviescoll, &status);
6814
6815 if(!uset_equals(tailoredSet, importTailoredSet)){
6816 log_err("Tailored sets not equal");
6817 }
6818
6819 uset_close(importTailoredSet);
6820
6821 itemCount = uset_getItemCount(tailoredSet);
6822
6823 for( i = 0; i < itemCount; i++){
6824 strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
6825 if(strLength < 2){
6826 for (; start <= end; start++){
6827 k = 0;
6828 U16_APPEND(str, k, 500, start, b);
6829 ucol_getSortKey(viescoll, str, 1, sk1, 500);
6830 ucol_getSortKey(importviescoll, str, 1, sk2, 500);
6831 if(compare_uint8_t_arrays(sk1, sk2) != 0){
6832 log_err("Sort key for %s not equal\n", str);
6833 break;
6834 }
6835 }
6836 }else{
6837 ucol_getSortKey(viescoll, str, strLength, sk1, 500);
6838 ucol_getSortKey(importviescoll, str, strLength, sk2, 500);
6839 if(compare_uint8_t_arrays(sk1, sk2) != 0){
6840 log_err("ZZSort key for %s not equal\n", str);
6841 break;
6842 }
6843
6844 }
6845 }
6846
6847 uset_close(tailoredSet);
6848
6849 uprv_free(viesrules);
6850
6851 ucol_close(vicoll);
6852 ucol_close(escoll);
6853 ucol_close(viescoll);
6854 ucol_close(importviescoll);
6855 }
6856
6857 static void TestImportWithType(void)
6858 {
6859 UCollator* vicoll;
6860 UCollator* decoll;
6861 UCollator* videcoll;
6862 UCollator* importvidecoll;
6863 UParseError error;
6864 UErrorCode status = U_ZERO_ERROR;
6865 const UChar* virules;
6866 int32_t viruleslength;
6867 const UChar* derules;
6868 int32_t deruleslength;
6869 UChar* viderules;
6870 int32_t videruleslength;
6871 const char srules[500] = "[import vi][import de-u-co-phonebk]";
6872 UChar rules[500];
6873 uint32_t length = 0;
6874 int32_t itemCount;
6875 int32_t i, k;
6876 UChar32 start;
6877 UChar32 end;
6878 UChar str[500];
6879 int32_t strLength;
6880
6881 uint8_t sk1[500];
6882 uint8_t sk2[500];
6883
6884 USet* tailoredSet;
6885 USet* importTailoredSet;
6886
6887 vicoll = ucol_open("vi", &status);
6888 if(U_FAILURE(status)){
6889 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
6890 return;
6891 }
6892 virules = ucol_getRules(vicoll, &viruleslength);
6893 /* decoll = ucol_open("de@collation=phonebook", &status); */
6894 decoll = ucol_open("de-u-co-phonebk", &status);
6895 if(U_FAILURE(status)){
6896 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
6897 return;
6898 }
6899
6900
6901 derules = ucol_getRules(decoll, &deruleslength);
6902 viderules = (UChar*)uprv_malloc((viruleslength+deruleslength+1)*sizeof(UChar*));
6903 viderules[0] = 0;
6904 u_strcat(viderules, virules);
6905 u_strcat(viderules, derules);
6906 videruleslength = viruleslength + deruleslength;
6907 videcoll = ucol_openRules(viderules, videruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
6908
6909 /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
6910 length = u_unescape(srules, rules, 500);
6911 importvidecoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
6912 if(U_FAILURE(status)){
6913 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
6914 return;
6915 }
6916
6917 tailoredSet = ucol_getTailoredSet(videcoll, &status);
6918 importTailoredSet = ucol_getTailoredSet(importvidecoll, &status);
6919
6920 if(!uset_equals(tailoredSet, importTailoredSet)){
6921 log_err("Tailored sets not equal");
6922 }
6923
6924 uset_close(importTailoredSet);
6925
6926 itemCount = uset_getItemCount(tailoredSet);
6927
6928 for( i = 0; i < itemCount; i++){
6929 strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
6930 if(strLength < 2){
6931 for (; start <= end; start++){
6932 k = 0;
6933 U16_APPEND_UNSAFE(str, k, start);
6934 ucol_getSortKey(videcoll, str, 1, sk1, 500);
6935 ucol_getSortKey(importvidecoll, str, 1, sk2, 500);
6936 if(compare_uint8_t_arrays(sk1, sk2) != 0){
6937 log_err("Sort key for %s not equal\n", str);
6938 break;
6939 }
6940 }
6941 }else{
6942 ucol_getSortKey(videcoll, str, strLength, sk1, 500);
6943 ucol_getSortKey(importvidecoll, str, strLength, sk2, 500);
6944 if(compare_uint8_t_arrays(sk1, sk2) != 0){
6945 log_err("Sort key for %s not equal\n", str);
6946 break;
6947 }
6948
6949 }
6950 }
6951
6952 uset_close(tailoredSet);
6953
6954 uprv_free(viderules);
6955
6956 ucol_close(videcoll);
6957 ucol_close(importvidecoll);
6958 ucol_close(vicoll);
6959 ucol_close(decoll);
6960 }
6961
6962 /* 'IV INTERNATIONAL SCIENTIFIC - PRACTICAL CONFERENCE "GEOPOLITICS, GEOECONOMICS AND INTERNATIONAL RELATIONS PROBLEMS" 22-23 June 2010, St. Petersburg, Russia' */
6963 static const UChar longUpperStr1[]= { /* 155 chars */
6964 0x49, 0x56, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C,
6965 0x20, 0x53, 0x43, 0x49, 0x45, 0x4E, 0x54, 0x49, 0x46, 0x49, 0x43, 0x20, 0x2D, 0x20, 0x50, 0x52,
6966 0x41, 0x43, 0x54, 0x49, 0x43, 0x41, 0x4C, 0x20, 0x43, 0x4F, 0x4E, 0x46, 0x45, 0x52, 0x45, 0x4E,
6967 0x43, 0x45, 0x20, 0x22, 0x47, 0x45, 0x4F, 0x50, 0x4F, 0x4C, 0x49, 0x54, 0x49, 0x43, 0x53, 0x2C,
6968 0x20, 0x47, 0x45, 0x4F, 0x45, 0x43, 0x4F, 0x4E, 0x4F, 0x4D, 0x49, 0x43, 0x53, 0x20, 0x41, 0x4E,
6969 0x44, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C, 0x20,
6970 0x52, 0x45, 0x4C, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x53, 0x20, 0x50, 0x52, 0x4F, 0x42, 0x4C, 0x45,
6971 0x4D, 0x53, 0x22, 0x20, 0x32, 0x32, 0x2D, 0x32, 0x33, 0x20, 0x4A, 0x75, 0x6E, 0x65, 0x20, 0x32,
6972 0x30, 0x31, 0x30, 0x2C, 0x20, 0x53, 0x74, 0x2E, 0x20, 0x50, 0x65, 0x74, 0x65, 0x72, 0x73, 0x62,
6973 0x75, 0x72, 0x67, 0x2C, 0x20, 0x52, 0x75, 0x73, 0x73, 0x69, 0x61
6974 };
6975
6976 /* 'BACEDIFOGUHAJEKILOMUNAPE ' with diacritics on vowels, repeated 5 times */
6977 static const UChar longUpperStr2[]= { /* 125 chars, > 128 collation elements */
6978 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
6979 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
6980 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
6981 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
6982 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20
6983 };
6984
6985 /* 'ABCDEFGHIJKLMNOPQRSTUVWXYZ ' repeated 12 times */
6986 static const UChar longUpperStr3[]= { /* 324 chars */
6987 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
6988 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
6989 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
6990 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
6991 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
6992 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
6993 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
6994 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
6995 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
6996 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
6997 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
6998 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20
6999 };
7000
7001 #define MY_ARRAY_LEN(array) (sizeof(array)/sizeof(array[0]))
7002
7003 typedef struct {
7004 const UChar * longUpperStrPtr;
7005 int32_t longUpperStrLen;
7006 } LongUpperStrItem;
7007
7008 /* String pointers must be in reverse collation order of the corresponding strings */
7009 static const LongUpperStrItem longUpperStrItems[] = {
7010 { longUpperStr1, MY_ARRAY_LEN(longUpperStr1) },
7011 { longUpperStr2, MY_ARRAY_LEN(longUpperStr2) },
7012 { longUpperStr3, MY_ARRAY_LEN(longUpperStr3) },
7013 { NULL, 0 }
7014 };
7015
7016 enum { kCollKeyLenMax = 800 }; /* longest expected is 749, but may change with collation changes */
7017
7018 /* Text fix for #8445; without fix, could have crash due to stack or heap corruption */
7019 static void TestCaseLevelBufferOverflow(void)
7020 {
7021 UErrorCode status = U_ZERO_ERROR;
7022 UCollator * ucol = ucol_open("root", &status);
7023 if ( U_SUCCESS(status) ) {
7024 ucol_setAttribute(ucol, UCOL_CASE_LEVEL, UCOL_ON, &status);
7025 if ( U_SUCCESS(status) ) {
7026 const LongUpperStrItem * itemPtr;
7027 uint8_t sortKeyA[kCollKeyLenMax], sortKeyB[kCollKeyLenMax];
7028 for ( itemPtr = longUpperStrItems; itemPtr->longUpperStrPtr != NULL; itemPtr++ ) {
7029 int32_t sortKeyLen;
7030 if (itemPtr > longUpperStrItems) {
7031 uprv_strcpy((char *)sortKeyB, (char *)sortKeyA);
7032 }
7033 sortKeyLen = ucol_getSortKey(ucol, itemPtr->longUpperStrPtr, itemPtr->longUpperStrLen, sortKeyA, kCollKeyLenMax);
7034 if (sortKeyLen <= 0 || sortKeyLen > kCollKeyLenMax) {
7035 log_err("ERROR sort key length from ucol_getSortKey is %d\n", sortKeyLen);
7036 break;
7037 }
7038 if ( itemPtr > longUpperStrItems ) {
7039 int compareResult = uprv_strcmp((char *)sortKeyA, (char *)sortKeyB);
7040 if (compareResult >= 0) {
7041 log_err("ERROR in sort key comparison result, expected -1, got %d\n", compareResult);
7042 }
7043 }
7044 }
7045 } else {
7046 log_err_status(status, "ERROR in ucol_setAttribute UCOL_CASE_LEVEL on: %s\n", myErrorName(status));
7047 }
7048 ucol_close(ucol);
7049 } else {
7050 log_err_status(status, "ERROR in ucol_open for root: %s\n", myErrorName(status));
7051 }
7052 }
7053
7054
7055 #define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x)
7056
7057 void addMiscCollTest(TestNode** root)
7058 {
7059 TEST(TestRuleOptions);
7060 TEST(TestBeforePrefixFailure);
7061 TEST(TestContractionClosure);
7062 TEST(TestPrefixCompose);
7063 TEST(TestStrCollIdenticalPrefix);
7064 TEST(TestPrefix);
7065 TEST(TestNewJapanese);
7066 /*TEST(TestLimitations);*/
7067 TEST(TestNonChars);
7068 TEST(TestExtremeCompression);
7069 TEST(TestSurrogates);
7070 TEST(TestVariableTopSetting);
7071 TEST(TestBocsuCoverage);
7072 TEST(TestCyrillicTailoring);
7073 TEST(TestCase);
7074 TEST(IncompleteCntTest);
7075 TEST(BlackBirdTest);
7076 TEST(FunkyATest);
7077 TEST(BillFairmanTest);
7078 TEST(RamsRulesTest);
7079 TEST(IsTailoredTest);
7080 TEST(TestCollations);
7081 TEST(TestChMove);
7082 TEST(TestImplicitTailoring);
7083 TEST(TestFCDProblem);
7084 TEST(TestEmptyRule);
7085 /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */
7086 TEST(TestJ815);
7087 /*TEST(TestJ831);*/ /* we changed lv locale */
7088 TEST(TestBefore);
7089 TEST(TestRedundantRules);
7090 TEST(TestExpansionSyntax);
7091 TEST(TestHangulTailoring);
7092 TEST(TestUCARules);
7093 TEST(TestIncrementalNormalize);
7094 TEST(TestComposeDecompose);
7095 TEST(TestCompressOverlap);
7096 TEST(TestContraction);
7097 TEST(TestExpansion);
7098 /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys */
7099 /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported */
7100 TEST(TestOptimize);
7101 TEST(TestSuppressContractions);
7102 TEST(Alexis2);
7103 TEST(TestHebrewUCA);
7104 TEST(TestPartialSortKeyTermination);
7105 TEST(TestSettings);
7106 TEST(TestEquals);
7107 TEST(TestJ2726);
7108 TEST(NullRule);
7109 TEST(TestNumericCollation);
7110 TEST(TestTibetanConformance);
7111 TEST(TestPinyinProblem);
7112 TEST(TestImplicitGeneration);
7113 TEST(TestSeparateTrees);
7114 TEST(TestBeforePinyin);
7115 TEST(TestBeforeTightening);
7116 /*TEST(TestMoreBefore);*/
7117 TEST(TestTailorNULL);
7118 TEST(TestUpperFirstQuaternary);
7119 TEST(TestJ4960);
7120 TEST(TestJ5223);
7121 TEST(TestJ5232);
7122 TEST(TestJ5367);
7123 TEST(TestHiragana);
7124 TEST(TestSortKeyConsistency);
7125 TEST(TestVI5913); /* VI, RO tailored rules */
7126 TEST(TestCroatianSortKey);
7127 TEST(TestTailor6179);
7128 TEST(TestUCAPrecontext);
7129 TEST(TestOutOfBuffer5468);
7130 TEST(TestSameStrengthList);
7131
7132 TEST(TestSameStrengthListQuoted);
7133 TEST(TestSameStrengthListSupplemental);
7134 TEST(TestSameStrengthListQwerty);
7135 TEST(TestSameStrengthListQuotedQwerty);
7136 TEST(TestSameStrengthListRanges);
7137 TEST(TestSameStrengthListSupplementalRanges);
7138 TEST(TestSpecialCharacters);
7139 TEST(TestPrivateUseCharacters);
7140 TEST(TestPrivateUseCharactersInList);
7141 TEST(TestPrivateUseCharactersInRange);
7142 TEST(TestInvalidListsAndRanges);
7143 TEST(TestImportRulesDeWithPhonebook);
7144 /* TEST(TestImportRulesFiWithEor); EOR rules removed from CLDR 21 */
7145 /* TEST(TestImportRulesCJKWithUnihan); */
7146 TEST(TestImport);
7147 TEST(TestImportWithType);
7148
7149 TEST(TestBeforeRuleWithScriptReordering);
7150 TEST(TestNonLeadBytesDuringCollationReordering);
7151 TEST(TestReorderingAPI);
7152 TEST(TestReorderingAPIWithRuleCreatedCollator);
7153 TEST(TestEquivalentReorderingScripts);
7154 TEST(TestGreekFirstReorder);
7155 TEST(TestGreekLastReorder);
7156 TEST(TestNonScriptReorder);
7157 TEST(TestHaniReorder);
7158 TEST(TestHaniReorderWithOtherRules);
7159 TEST(TestMultipleReorder);
7160 TEST(TestReorderingAcrossCloning);
7161 /* test for ticket 8814 - disabled until resolved */
7162 /*TEST(TestReorderWithNumericCollation);*/
7163
7164 TEST(TestCaseLevelBufferOverflow);
7165 }
7166
7167 #endif /* #if !UCONFIG_NO_COLLATION */