]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/cintltst/cmsccoll.c
ICU-511.35.tar.gz
[apple/icu.git] / icuSources / test / cintltst / cmsccoll.c
1
2 /********************************************************************
3 * COPYRIGHT:
4 * Copyright (c) 2001-2013, International Business Machines Corporation and
5 * others. All Rights Reserved.
6 ********************************************************************/
7 /*******************************************************************************
8 *
9 * File cmsccoll.C
10 *
11 *******************************************************************************/
12 /**
13 * These are the tests specific to ICU 1.8 and above, that I didn't know where
14 * to fit.
15 */
16
17 #include <stdio.h>
18
19 #include "unicode/utypes.h"
20
21 #if !UCONFIG_NO_COLLATION
22
23 #include "unicode/ucol.h"
24 #include "unicode/ucoleitr.h"
25 #include "unicode/uloc.h"
26 #include "cintltst.h"
27 #include "ccolltst.h"
28 #include "callcoll.h"
29 #include "unicode/ustring.h"
30 #include "string.h"
31 #include "ucol_imp.h"
32 #include "ucol_tok.h"
33 #include "cmemory.h"
34 #include "cstring.h"
35 #include "uassert.h"
36 #include "unicode/parseerr.h"
37 #include "unicode/ucnv.h"
38 #include "unicode/ures.h"
39 #include "unicode/uscript.h"
40 #include "unicode/utf16.h"
41 #include "uparse.h"
42 #include "putilimp.h"
43
44
45 #define LEN(a) (sizeof(a)/sizeof(a[0]))
46
47 #define MAX_TOKEN_LEN 16
48
49 typedef UCollationResult tst_strcoll(void *collator, const int object,
50 const UChar *source, const int sLen,
51 const UChar *target, const int tLen);
52
53
54
55 const static char cnt1[][10] = {
56
57 "AA",
58 "AC",
59 "AZ",
60 "AQ",
61 "AB",
62 "ABZ",
63 "ABQ",
64 "Z",
65 "ABC",
66 "Q",
67 "B"
68 };
69
70 const static char cnt2[][10] = {
71 "DA",
72 "DAD",
73 "DAZ",
74 "MAR",
75 "Z",
76 "DAVIS",
77 "MARK",
78 "DAV",
79 "DAVI"
80 };
81
82 static void IncompleteCntTest(void)
83 {
84 UErrorCode status = U_ZERO_ERROR;
85 UChar temp[90];
86 UChar t1[90];
87 UChar t2[90];
88
89 UCollator *coll = NULL;
90 uint32_t i = 0, j = 0;
91 uint32_t size = 0;
92
93 u_uastrcpy(temp, " & Z < ABC < Q < B");
94
95 coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
96
97 if(U_SUCCESS(status)) {
98 size = sizeof(cnt1)/sizeof(cnt1[0]);
99 for(i = 0; i < size-1; i++) {
100 for(j = i+1; j < size; j++) {
101 UCollationElements *iter;
102 u_uastrcpy(t1, cnt1[i]);
103 u_uastrcpy(t2, cnt1[j]);
104 doTest(coll, t1, t2, UCOL_LESS);
105 /* synwee : added collation element iterator test */
106 iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
107 if (U_FAILURE(status)) {
108 log_err("Creation of iterator failed\n");
109 break;
110 }
111 backAndForth(iter);
112 ucol_closeElements(iter);
113 }
114 }
115 }
116
117 ucol_close(coll);
118
119
120 u_uastrcpy(temp, " & Z < DAVIS < MARK <DAV");
121 coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
122
123 if(U_SUCCESS(status)) {
124 size = sizeof(cnt2)/sizeof(cnt2[0]);
125 for(i = 0; i < size-1; i++) {
126 for(j = i+1; j < size; j++) {
127 UCollationElements *iter;
128 u_uastrcpy(t1, cnt2[i]);
129 u_uastrcpy(t2, cnt2[j]);
130 doTest(coll, t1, t2, UCOL_LESS);
131
132 /* synwee : added collation element iterator test */
133 iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
134 if (U_FAILURE(status)) {
135 log_err("Creation of iterator failed\n");
136 break;
137 }
138 backAndForth(iter);
139 ucol_closeElements(iter);
140 }
141 }
142 }
143
144 ucol_close(coll);
145
146
147 }
148
149 const static char shifted[][20] = {
150 "black bird",
151 "black-bird",
152 "blackbird",
153 "black Bird",
154 "black-Bird",
155 "blackBird",
156 "black birds",
157 "black-birds",
158 "blackbirds"
159 };
160
161 const static UCollationResult shiftedTert[] = {
162 UCOL_EQUAL,
163 UCOL_EQUAL,
164 UCOL_EQUAL,
165 UCOL_LESS,
166 UCOL_EQUAL,
167 UCOL_EQUAL,
168 UCOL_LESS,
169 UCOL_EQUAL,
170 UCOL_EQUAL
171 };
172
173 const static char nonignorable[][20] = {
174 "black bird",
175 "black Bird",
176 "black birds",
177 "black-bird",
178 "black-Bird",
179 "black-birds",
180 "blackbird",
181 "blackBird",
182 "blackbirds"
183 };
184
185 static void BlackBirdTest(void) {
186 UErrorCode status = U_ZERO_ERROR;
187 UChar t1[90];
188 UChar t2[90];
189
190 uint32_t i = 0, j = 0;
191 uint32_t size = 0;
192 UCollator *coll = ucol_open("en_US", &status);
193
194 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
195 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status);
196
197 if(U_SUCCESS(status)) {
198 size = sizeof(nonignorable)/sizeof(nonignorable[0]);
199 for(i = 0; i < size-1; i++) {
200 for(j = i+1; j < size; j++) {
201 u_uastrcpy(t1, nonignorable[i]);
202 u_uastrcpy(t2, nonignorable[j]);
203 doTest(coll, t1, t2, UCOL_LESS);
204 }
205 }
206 }
207
208 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
209 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
210
211 if(U_SUCCESS(status)) {
212 size = sizeof(shifted)/sizeof(shifted[0]);
213 for(i = 0; i < size-1; i++) {
214 for(j = i+1; j < size; j++) {
215 u_uastrcpy(t1, shifted[i]);
216 u_uastrcpy(t2, shifted[j]);
217 doTest(coll, t1, t2, UCOL_LESS);
218 }
219 }
220 }
221
222 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status);
223 if(U_SUCCESS(status)) {
224 size = sizeof(shifted)/sizeof(shifted[0]);
225 for(i = 1; i < size; i++) {
226 u_uastrcpy(t1, shifted[i-1]);
227 u_uastrcpy(t2, shifted[i]);
228 doTest(coll, t1, t2, shiftedTert[i]);
229 }
230 }
231
232 ucol_close(coll);
233 }
234
235 const static UChar testSourceCases[][MAX_TOKEN_LEN] = {
236 {0x0041/*'A'*/, 0x0300, 0x0301, 0x0000},
237 {0x0041/*'A'*/, 0x0300, 0x0316, 0x0000},
238 {0x0041/*'A'*/, 0x0300, 0x0000},
239 {0x00C0, 0x0301, 0x0000},
240 /* this would work with forced normalization */
241 {0x00C0, 0x0316, 0x0000}
242 };
243
244 const static UChar testTargetCases[][MAX_TOKEN_LEN] = {
245 {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
246 {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000},
247 {0x00C0, 0},
248 {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
249 /* this would work with forced normalization */
250 {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000}
251 };
252
253 const static UCollationResult results[] = {
254 UCOL_GREATER,
255 UCOL_EQUAL,
256 UCOL_EQUAL,
257 UCOL_GREATER,
258 UCOL_EQUAL
259 };
260
261 static void FunkyATest(void)
262 {
263
264 int32_t i;
265 UErrorCode status = U_ZERO_ERROR;
266 UCollator *myCollation;
267 myCollation = ucol_open("en_US", &status);
268 if(U_FAILURE(status)){
269 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
270 return;
271 }
272 log_verbose("Testing some A letters, for some reason\n");
273 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
274 ucol_setStrength(myCollation, UCOL_TERTIARY);
275 for (i = 0; i < 4 ; i++)
276 {
277 doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
278 }
279 ucol_close(myCollation);
280 }
281
282 UColAttributeValue caseFirst[] = {
283 UCOL_OFF,
284 UCOL_LOWER_FIRST,
285 UCOL_UPPER_FIRST
286 };
287
288
289 UColAttributeValue alternateHandling[] = {
290 UCOL_NON_IGNORABLE,
291 UCOL_SHIFTED
292 };
293
294 UColAttributeValue caseLevel[] = {
295 UCOL_OFF,
296 UCOL_ON
297 };
298
299 UColAttributeValue strengths[] = {
300 UCOL_PRIMARY,
301 UCOL_SECONDARY,
302 UCOL_TERTIARY,
303 UCOL_QUATERNARY,
304 UCOL_IDENTICAL
305 };
306
307 #if 0
308 static const char * strengthsC[] = {
309 "UCOL_PRIMARY",
310 "UCOL_SECONDARY",
311 "UCOL_TERTIARY",
312 "UCOL_QUATERNARY",
313 "UCOL_IDENTICAL"
314 };
315
316 static const char * caseFirstC[] = {
317 "UCOL_OFF",
318 "UCOL_LOWER_FIRST",
319 "UCOL_UPPER_FIRST"
320 };
321
322
323 static const char * alternateHandlingC[] = {
324 "UCOL_NON_IGNORABLE",
325 "UCOL_SHIFTED"
326 };
327
328 static const char * caseLevelC[] = {
329 "UCOL_OFF",
330 "UCOL_ON"
331 };
332
333 /* not used currently - does not test only prints */
334 static void PrintMarkDavis(void)
335 {
336 UErrorCode status = U_ZERO_ERROR;
337 UChar m[256];
338 uint8_t sortkey[256];
339 UCollator *coll = ucol_open("en_US", &status);
340 uint32_t h,i,j,k, sortkeysize;
341 uint32_t sizem = 0;
342 char buffer[512];
343 uint32_t len = 512;
344
345 log_verbose("PrintMarkDavis");
346
347 u_uastrcpy(m, "Mark Davis");
348 sizem = u_strlen(m);
349
350
351 m[1] = 0xe4;
352
353 for(i = 0; i<sizem; i++) {
354 fprintf(stderr, "\\u%04X ", m[i]);
355 }
356 fprintf(stderr, "\n");
357
358 for(h = 0; h<sizeof(caseFirst)/sizeof(caseFirst[0]); h++) {
359 ucol_setAttribute(coll, UCOL_CASE_FIRST, caseFirst[i], &status);
360 fprintf(stderr, "caseFirst: %s\n", caseFirstC[h]);
361
362 for(i = 0; i<sizeof(alternateHandling)/sizeof(alternateHandling[0]); i++) {
363 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, alternateHandling[i], &status);
364 fprintf(stderr, " AltHandling: %s\n", alternateHandlingC[i]);
365
366 for(j = 0; j<sizeof(caseLevel)/sizeof(caseLevel[0]); j++) {
367 ucol_setAttribute(coll, UCOL_CASE_LEVEL, caseLevel[j], &status);
368 fprintf(stderr, " caseLevel: %s\n", caseLevelC[j]);
369
370 for(k = 0; k<sizeof(strengths)/sizeof(strengths[0]); k++) {
371 ucol_setAttribute(coll, UCOL_STRENGTH, strengths[k], &status);
372 sortkeysize = ucol_getSortKey(coll, m, sizem, sortkey, 256);
373 fprintf(stderr, " strength: %s\n Sortkey: ", strengthsC[k]);
374 fprintf(stderr, "%s\n", ucol_sortKeyToString(coll, sortkey, buffer, &len));
375 }
376
377 }
378
379 }
380
381 }
382 }
383 #endif
384
385 static void BillFairmanTest(void) {
386 /*
387 ** check for actual locale via ICU resource bundles
388 **
389 ** lp points to the original locale ("fr_FR_....")
390 */
391
392 UResourceBundle *lr,*cr;
393 UErrorCode lec = U_ZERO_ERROR;
394 const char *lp = "fr_FR_you_ll_never_find_this_locale";
395
396 log_verbose("BillFairmanTest\n");
397
398 lr = ures_open(NULL,lp,&lec);
399 if (lr) {
400 cr = ures_getByKey(lr,"collations",0,&lec);
401 if (cr) {
402 lp = ures_getLocaleByType(cr, ULOC_ACTUAL_LOCALE, &lec);
403 if (lp) {
404 if (U_SUCCESS(lec)) {
405 if(strcmp(lp, "fr") != 0) {
406 log_err("Wrong locale for French Collation Data, expected \"fr\" got %s", lp);
407 }
408 }
409 }
410 ures_close(cr);
411 }
412 ures_close(lr);
413 }
414 }
415
416 static void testPrimary(UCollator* col, const UChar* p,const UChar* q){
417 UChar source[256] = { '\0'};
418 UChar target[256] = { '\0'};
419 UChar preP = 0x31a3;
420 UChar preQ = 0x310d;
421 /*
422 UChar preP = (*p>0x0400 && *p<0x0500)?0x00e1:0x491;
423 UChar preQ = (*p>0x0400 && *p<0x0500)?0x0041:0x413;
424 */
425 /*log_verbose("Testing primary\n");*/
426
427 doTest(col, p, q, UCOL_LESS);
428 /*
429 UCollationResult result = ucol_strcoll(col,p,u_strlen(p),q,u_strlen(q));
430
431 if(result!=UCOL_LESS){
432 aescstrdup(p,utfSource,256);
433 aescstrdup(q,utfTarget,256);
434 fprintf(file,"Primary failed source: %s target: %s \n", utfSource,utfTarget);
435 }
436 */
437 source[0] = preP;
438 u_strcpy(source+1,p);
439 target[0] = preQ;
440 u_strcpy(target+1,q);
441 doTest(col, source, target, UCOL_LESS);
442 /*
443 fprintf(file,"Primary swamps 2nd failed source: %s target: %s \n", utfSource,utfTarget);
444 */
445 }
446
447 static void testSecondary(UCollator* col, const UChar* p,const UChar* q){
448 UChar source[256] = { '\0'};
449 UChar target[256] = { '\0'};
450
451 /*log_verbose("Testing secondary\n");*/
452
453 doTest(col, p, q, UCOL_LESS);
454 /*
455 fprintf(file,"secondary failed source: %s target: %s \n", utfSource,utfTarget);
456 */
457 source[0] = 0x0053;
458 u_strcpy(source+1,p);
459 target[0]= 0x0073;
460 u_strcpy(target+1,q);
461
462 doTest(col, source, target, UCOL_LESS);
463 /*
464 fprintf(file,"secondary swamps 3rd failed source: %s target: %s \n",utfSource,utfTarget);
465 */
466
467
468 u_strcpy(source,p);
469 source[u_strlen(p)] = 0x62;
470 source[u_strlen(p)+1] = 0;
471
472
473 u_strcpy(target,q);
474 target[u_strlen(q)] = 0x61;
475 target[u_strlen(q)+1] = 0;
476
477 doTest(col, source, target, UCOL_GREATER);
478
479 /*
480 fprintf(file,"secondary is swamped by 1 failed source: %s target: %s \n",utfSource,utfTarget);
481 */
482 }
483
484 static void testTertiary(UCollator* col, const UChar* p,const UChar* q){
485 UChar source[256] = { '\0'};
486 UChar target[256] = { '\0'};
487
488 /*log_verbose("Testing tertiary\n");*/
489
490 doTest(col, p, q, UCOL_LESS);
491 /*
492 fprintf(file,"Tertiary failed source: %s target: %s \n",utfSource,utfTarget);
493 */
494 source[0] = 0x0020;
495 u_strcpy(source+1,p);
496 target[0]= 0x002D;
497 u_strcpy(target+1,q);
498
499 doTest(col, source, target, UCOL_LESS);
500 /*
501 fprintf(file,"Tertiary swamps 4th failed source: %s target: %s \n", utfSource,utfTarget);
502 */
503
504 u_strcpy(source,p);
505 source[u_strlen(p)] = 0xE0;
506 source[u_strlen(p)+1] = 0;
507
508 u_strcpy(target,q);
509 target[u_strlen(q)] = 0x61;
510 target[u_strlen(q)+1] = 0;
511
512 doTest(col, source, target, UCOL_GREATER);
513
514 /*
515 fprintf(file,"Tertiary is swamped by 3rd failed source: %s target: %s \n",utfSource,utfTarget);
516 */
517 }
518
519 static void testEquality(UCollator* col, const UChar* p,const UChar* q){
520 /*
521 UChar source[256] = { '\0'};
522 UChar target[256] = { '\0'};
523 */
524
525 doTest(col, p, q, UCOL_EQUAL);
526 /*
527 fprintf(file,"Primary failed source: %s target: %s \n", utfSource,utfTarget);
528 */
529 }
530
531 static void testCollator(UCollator *coll, UErrorCode *status) {
532 const UChar *rules = NULL, *current = NULL;
533 int32_t ruleLen = 0;
534 uint32_t strength = 0;
535 uint32_t chOffset = 0; uint32_t chLen = 0;
536 uint32_t exOffset = 0; uint32_t exLen = 0;
537 uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
538 uint32_t firstEx = 0;
539 /* uint32_t rExpsLen = 0; */
540 uint32_t firstLen = 0;
541 UBool varT = FALSE; UBool top_ = TRUE;
542 uint16_t specs = 0;
543 UBool startOfRules = TRUE;
544 UBool lastReset = FALSE;
545 UBool before = FALSE;
546 uint32_t beforeStrength = 0;
547 UColTokenParser src;
548 UColOptionSet opts;
549
550 UChar first[256];
551 UChar second[256];
552 UChar tempB[256];
553 uint32_t tempLen;
554 UChar *rulesCopy = NULL;
555 UParseError parseError;
556
557 uprv_memset(&src, 0, sizeof(UColTokenParser));
558
559 src.opts = &opts;
560
561 rules = ucol_getRules(coll, &ruleLen);
562 if(U_SUCCESS(*status) && ruleLen > 0) {
563 rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
564 uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
565 src.current = src.source = rulesCopy;
566 src.end = rulesCopy+ruleLen;
567 src.extraCurrent = src.end;
568 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
569 *first = *second = 0;
570
571 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
572 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
573 while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, status)) != NULL) {
574 strength = src.parsedToken.strength;
575 chOffset = src.parsedToken.charsOffset;
576 chLen = src.parsedToken.charsLen;
577 exOffset = src.parsedToken.extensionOffset;
578 exLen = src.parsedToken.extensionLen;
579 prefixOffset = src.parsedToken.prefixOffset;
580 prefixLen = src.parsedToken.prefixLen;
581 specs = src.parsedToken.flags;
582
583 startOfRules = FALSE;
584 varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
585 top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
586 if(top_) { /* if reset is on top, the sequence is broken. We should have an empty string */
587 second[0] = 0;
588 } else {
589 u_strncpy(second,src.source+chOffset, chLen);
590 second[chLen] = 0;
591
592 if(exLen > 0 && firstEx == 0) {
593 u_strncat(first, src.source+exOffset, exLen);
594 first[firstLen+exLen] = 0;
595 }
596
597 if(lastReset == TRUE && prefixLen != 0) {
598 u_strncpy(first+prefixLen, first, firstLen);
599 u_strncpy(first, src.source+prefixOffset, prefixLen);
600 first[firstLen+prefixLen] = 0;
601 firstLen = firstLen+prefixLen;
602 }
603
604 if(before == TRUE) { /* swap first and second */
605 u_strcpy(tempB, first);
606 u_strcpy(first, second);
607 u_strcpy(second, tempB);
608
609 tempLen = firstLen;
610 firstLen = chLen;
611 chLen = tempLen;
612
613 tempLen = firstEx;
614 firstEx = exLen;
615 exLen = tempLen;
616 if(beforeStrength < strength) {
617 strength = beforeStrength;
618 }
619 }
620 }
621 lastReset = FALSE;
622
623 switch(strength){
624 case UCOL_IDENTICAL:
625 testEquality(coll,first,second);
626 break;
627 case UCOL_PRIMARY:
628 testPrimary(coll,first,second);
629 break;
630 case UCOL_SECONDARY:
631 testSecondary(coll,first,second);
632 break;
633 case UCOL_TERTIARY:
634 testTertiary(coll,first,second);
635 break;
636 case UCOL_TOK_RESET:
637 lastReset = TRUE;
638 before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);
639 if(before) {
640 beforeStrength = (specs & UCOL_TOK_BEFORE)-1;
641 }
642 break;
643 default:
644 break;
645 }
646
647 if(before == TRUE && strength != UCOL_TOK_RESET) { /* first and second were swapped */
648 before = FALSE;
649 } else {
650 firstLen = chLen;
651 firstEx = exLen;
652 u_strcpy(first, second);
653 }
654 }
655 uprv_free(src.source);
656 uprv_free(src.reorderCodes);
657 }
658 }
659
660 static UCollationResult ucaTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) {
661 UCollator *UCA = (UCollator *)collator;
662 return ucol_strcoll(UCA, source, sLen, target, tLen);
663 }
664
665 /*
666 static UCollationResult winTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) {
667 #if U_PLATFORM_HAS_WIN32_API
668 LCID lcid = (LCID)collator;
669 return (UCollationResult)CompareString(lcid, 0, source, sLen, target, tLen);
670 #else
671 return 0;
672 #endif
673 }
674 */
675
676 static UCollationResult swampEarlier(tst_strcoll* func, void *collator, int opts,
677 UChar s1, UChar s2,
678 const UChar *s, const uint32_t sLen,
679 const UChar *t, const uint32_t tLen) {
680 UChar source[256] = {0};
681 UChar target[256] = {0};
682
683 source[0] = s1;
684 u_strcpy(source+1, s);
685 target[0] = s2;
686 u_strcpy(target+1, t);
687
688 return func(collator, opts, source, sLen+1, target, tLen+1);
689 }
690
691 static UCollationResult swampLater(tst_strcoll* func, void *collator, int opts,
692 UChar s1, UChar s2,
693 const UChar *s, const uint32_t sLen,
694 const UChar *t, const uint32_t tLen) {
695 UChar source[256] = {0};
696 UChar target[256] = {0};
697
698 u_strcpy(source, s);
699 source[sLen] = s1;
700 u_strcpy(target, t);
701 target[tLen] = s2;
702
703 return func(collator, opts, source, sLen+1, target, tLen+1);
704 }
705
706 static uint32_t probeStrength(tst_strcoll* func, void *collator, int opts,
707 const UChar *s, const uint32_t sLen,
708 const UChar *t, const uint32_t tLen,
709 UCollationResult result) {
710 /*UChar fPrimary = 0x6d;*/
711 /*UChar sPrimary = 0x6e;*/
712 UChar fSecondary = 0x310d;
713 UChar sSecondary = 0x31a3;
714 UChar fTertiary = 0x310f;
715 UChar sTertiary = 0x31b7;
716
717 UCollationResult oposite;
718 if(result == UCOL_EQUAL) {
719 return UCOL_IDENTICAL;
720 } else if(result == UCOL_GREATER) {
721 oposite = UCOL_LESS;
722 } else {
723 oposite = UCOL_GREATER;
724 }
725
726 if(swampEarlier(func, collator, opts, sSecondary, fSecondary, s, sLen, t, tLen) == result) {
727 return UCOL_PRIMARY;
728 } else if((swampEarlier(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == result) &&
729 (swampEarlier(func, collator, opts, 0x310f, sTertiary, s, sLen, t, tLen) == result)) {
730 return UCOL_SECONDARY;
731 } else if((swampLater(func, collator, opts, sTertiary, fTertiary, s, sLen, t, tLen) == result) &&
732 (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == result)) {
733 return UCOL_TERTIARY;
734 } else if((swampLater(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == oposite) &&
735 (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == oposite)) {
736 return UCOL_QUATERNARY;
737 } else {
738 return UCOL_IDENTICAL;
739 }
740 }
741
742 static char *getRelationSymbol(UCollationResult res, uint32_t strength, char *buffer) {
743 uint32_t i = 0;
744
745 if(res == UCOL_EQUAL || strength == 0xdeadbeef) {
746 buffer[0] = '=';
747 buffer[1] = '=';
748 buffer[2] = '\0';
749 } else if(res == UCOL_GREATER) {
750 for(i = 0; i<strength+1; i++) {
751 buffer[i] = '>';
752 }
753 buffer[strength+1] = '\0';
754 } else {
755 for(i = 0; i<strength+1; i++) {
756 buffer[i] = '<';
757 }
758 buffer[strength+1] = '\0';
759 }
760
761 return buffer;
762 }
763
764
765
766 static void logFailure (const char *platform, const char *test,
767 const UChar *source, const uint32_t sLen,
768 const UChar *target, const uint32_t tLen,
769 UCollationResult realRes, uint32_t realStrength,
770 UCollationResult expRes, uint32_t expStrength, UBool error) {
771
772 uint32_t i = 0;
773
774 char sEsc[256], s[256], tEsc[256], t[256], b[256], output[512], relation[256];
775 static int32_t maxOutputLength = 0;
776 int32_t outputLength;
777
778 *sEsc = *tEsc = *s = *t = 0;
779 if(error == TRUE) {
780 log_err("Difference between expected and generated order. Run test with -v for more info\n");
781 } else if(getTestOption(VERBOSITY_OPTION) == 0) {
782 return;
783 }
784 for(i = 0; i<sLen; i++) {
785 sprintf(b, "%04X", source[i]);
786 strcat(sEsc, "\\u");
787 strcat(sEsc, b);
788 strcat(s, b);
789 strcat(s, " ");
790 if(source[i] < 0x80) {
791 sprintf(b, "(%c)", source[i]);
792 strcat(sEsc, b);
793 }
794 }
795 for(i = 0; i<tLen; i++) {
796 sprintf(b, "%04X", target[i]);
797 strcat(tEsc, "\\u");
798 strcat(tEsc, b);
799 strcat(t, b);
800 strcat(t, " ");
801 if(target[i] < 0x80) {
802 sprintf(b, "(%c)", target[i]);
803 strcat(tEsc, b);
804 }
805 }
806 /*
807 strcpy(output, "[[ ");
808 strcat(output, sEsc);
809 strcat(output, getRelationSymbol(expRes, expStrength, relation));
810 strcat(output, tEsc);
811
812 strcat(output, " : ");
813
814 strcat(output, sEsc);
815 strcat(output, getRelationSymbol(realRes, realStrength, relation));
816 strcat(output, tEsc);
817 strcat(output, " ]] ");
818
819 log_verbose("%s", output);
820 */
821
822
823 strcpy(output, "DIFF: ");
824
825 strcat(output, s);
826 strcat(output, " : ");
827 strcat(output, t);
828
829 strcat(output, test);
830 strcat(output, ": ");
831
832 strcat(output, sEsc);
833 strcat(output, getRelationSymbol(expRes, expStrength, relation));
834 strcat(output, tEsc);
835
836 strcat(output, " ");
837
838 strcat(output, platform);
839 strcat(output, ": ");
840
841 strcat(output, sEsc);
842 strcat(output, getRelationSymbol(realRes, realStrength, relation));
843 strcat(output, tEsc);
844
845 outputLength = (int32_t)strlen(output);
846 if(outputLength > maxOutputLength) {
847 maxOutputLength = outputLength;
848 U_ASSERT(outputLength < sizeof(output));
849 }
850
851 log_verbose("%s\n", output);
852
853 }
854
855 /*
856 static void printOutRules(const UChar *rules) {
857 uint32_t len = u_strlen(rules);
858 uint32_t i = 0;
859 char toPrint;
860 uint32_t line = 0;
861
862 fprintf(stdout, "Rules:");
863
864 for(i = 0; i<len; i++) {
865 if(rules[i]<0x7f && rules[i]>=0x20) {
866 toPrint = (char)rules[i];
867 if(toPrint == '&') {
868 line = 1;
869 fprintf(stdout, "\n&");
870 } else if(toPrint == ';') {
871 fprintf(stdout, "<<");
872 line+=2;
873 } else if(toPrint == ',') {
874 fprintf(stdout, "<<<");
875 line+=3;
876 } else {
877 fprintf(stdout, "%c", toPrint);
878 line++;
879 }
880 } else if(rules[i]<0x3400 || rules[i]>=0xa000) {
881 fprintf(stdout, "\\u%04X", rules[i]);
882 line+=6;
883 }
884 if(line>72) {
885 fprintf(stdout, "\n");
886 line = 0;
887 }
888 }
889
890 log_verbose("\n");
891
892 }
893 */
894
895 static uint32_t testSwitch(tst_strcoll* func, void *collator, int opts, uint32_t strength, const UChar *first, const UChar *second, const char* msg, UBool error) {
896 uint32_t diffs = 0;
897 UCollationResult realResult;
898 uint32_t realStrength;
899
900 uint32_t sLen = u_strlen(first);
901 uint32_t tLen = u_strlen(second);
902
903 realResult = func(collator, opts, first, sLen, second, tLen);
904 realStrength = probeStrength(func, collator, opts, first, sLen, second, tLen, realResult);
905
906 if(strength == UCOL_IDENTICAL && realResult != UCOL_EQUAL) {
907 logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_EQUAL, strength, error);
908 diffs++;
909 } else if(realResult != UCOL_LESS || realStrength != strength) {
910 logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_LESS, strength, error);
911 diffs++;
912 }
913 return diffs;
914 }
915
916
917 static void testAgainstUCA(UCollator *coll, UCollator *UCA, const char *refName, UBool error, UErrorCode *status) {
918 const UChar *rules = NULL, *current = NULL;
919 int32_t ruleLen = 0;
920 uint32_t strength = 0;
921 uint32_t chOffset = 0; uint32_t chLen = 0;
922 uint32_t exOffset = 0; uint32_t exLen = 0;
923 uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
924 /* uint32_t rExpsLen = 0; */
925 uint32_t firstLen = 0, secondLen = 0;
926 UBool varT = FALSE; UBool top_ = TRUE;
927 uint16_t specs = 0;
928 UBool startOfRules = TRUE;
929 UColTokenParser src;
930 UColOptionSet opts;
931
932 UChar first[256];
933 UChar second[256];
934 UChar *rulesCopy = NULL;
935
936 uint32_t UCAdiff = 0;
937 uint32_t Windiff = 1;
938 UParseError parseError;
939
940 uprv_memset(&src, 0, sizeof(UColTokenParser));
941 src.opts = &opts;
942
943 rules = ucol_getRules(coll, &ruleLen);
944
945 /*printOutRules(rules);*/
946
947 if(U_SUCCESS(*status) && ruleLen > 0) {
948 rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
949 uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
950 src.current = src.source = rulesCopy;
951 src.end = rulesCopy+ruleLen;
952 src.extraCurrent = src.end;
953 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
954 *first = *second = 0;
955
956 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
957 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
958 while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) {
959 strength = src.parsedToken.strength;
960 chOffset = src.parsedToken.charsOffset;
961 chLen = src.parsedToken.charsLen;
962 exOffset = src.parsedToken.extensionOffset;
963 exLen = src.parsedToken.extensionLen;
964 prefixOffset = src.parsedToken.prefixOffset;
965 prefixLen = src.parsedToken.prefixLen;
966 specs = src.parsedToken.flags;
967
968 startOfRules = FALSE;
969 varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
970 top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
971
972 u_strncpy(second,src.source+chOffset, chLen);
973 second[chLen] = 0;
974 secondLen = chLen;
975
976 if(exLen > 0) {
977 u_strncat(first, src.source+exOffset, exLen);
978 first[firstLen+exLen] = 0;
979 firstLen += exLen;
980 }
981
982 if(strength != UCOL_TOK_RESET) {
983 if((*first<0x3400 || *first>=0xa000) && (*second<0x3400 || *second>=0xa000)) {
984 UCAdiff += testSwitch(&ucaTest, (void *)UCA, 0, strength, first, second, refName, error);
985 /*Windiff += testSwitch(&winTest, (void *)lcid, 0, strength, first, second, "Win32");*/
986 }
987 }
988
989
990 firstLen = chLen;
991 u_strcpy(first, second);
992
993 }
994 if(UCAdiff != 0 && Windiff != 0) {
995 log_verbose("\n");
996 }
997 if(UCAdiff == 0) {
998 log_verbose("No immediate difference with %s!\n", refName);
999 }
1000 if(Windiff == 0) {
1001 log_verbose("No immediate difference with Win32!\n");
1002 }
1003 uprv_free(src.source);
1004 uprv_free(src.reorderCodes);
1005 }
1006 }
1007
1008 /*
1009 * Takes two CEs (lead and continuation) and
1010 * compares them as CEs should be compared:
1011 * primary vs. primary, secondary vs. secondary
1012 * tertiary vs. tertiary
1013 */
1014 static int32_t compareCEs(uint32_t s1, uint32_t s2,
1015 uint32_t t1, uint32_t t2) {
1016 uint32_t s = 0, t = 0;
1017 if(s1 == t1 && s2 == t2) {
1018 return 0;
1019 }
1020 s = (s1 & 0xFFFF0000)|((s2 & 0xFFFF0000)>>16);
1021 t = (t1 & 0xFFFF0000)|((t2 & 0xFFFF0000)>>16);
1022 if(s < t) {
1023 return -1;
1024 } else if(s > t) {
1025 return 1;
1026 } else {
1027 s = (s1 & 0x0000FF00) | (s2 & 0x0000FF00)>>8;
1028 t = (t1 & 0x0000FF00) | (t2 & 0x0000FF00)>>8;
1029 if(s < t) {
1030 return -1;
1031 } else if(s > t) {
1032 return 1;
1033 } else {
1034 s = (s1 & 0x000000FF)<<8 | (s2 & 0x000000FF);
1035 t = (t1 & 0x000000FF)<<8 | (t2 & 0x000000FF);
1036 if(s < t) {
1037 return -1;
1038 } else {
1039 return 1;
1040 }
1041 }
1042 }
1043 }
1044
1045 typedef struct {
1046 uint32_t startCE;
1047 uint32_t startContCE;
1048 uint32_t limitCE;
1049 uint32_t limitContCE;
1050 } indirectBoundaries;
1051
1052 /* these values are used for finding CE values for indirect positioning. */
1053 /* Indirect positioning is a mechanism for allowing resets on symbolic */
1054 /* values. It only works for resets and you cannot tailor indirect names */
1055 /* An indirect name can define either an anchor point or a range. An */
1056 /* anchor point behaves in exactly the same way as a code point in reset */
1057 /* would, except that it cannot be tailored. A range (we currently only */
1058 /* know for the [top] range will explicitly set the upper bound for */
1059 /* generated CEs, thus allowing for better control over how many CEs can */
1060 /* be squeezed between in the range without performance penalty. */
1061 /* In that respect, we use [top] for tailoring of locales that use CJK */
1062 /* characters. Other indirect values are currently a pure convenience, */
1063 /* they can be used to assure that the CEs will be always positioned in */
1064 /* the same place relative to a point with known properties (e.g. first */
1065 /* primary ignorable). */
1066 static indirectBoundaries ucolIndirectBoundaries[15];
1067 static UBool indirectBoundariesSet = FALSE;
1068 static void setIndirectBoundaries(uint32_t indexR, uint32_t *start, uint32_t *end) {
1069 /* Set values for the top - TODO: once we have values for all the indirects, we are going */
1070 /* to initalize here. */
1071 ucolIndirectBoundaries[indexR].startCE = start[0];
1072 ucolIndirectBoundaries[indexR].startContCE = start[1];
1073 if(end) {
1074 ucolIndirectBoundaries[indexR].limitCE = end[0];
1075 ucolIndirectBoundaries[indexR].limitContCE = end[1];
1076 } else {
1077 ucolIndirectBoundaries[indexR].limitCE = 0;
1078 ucolIndirectBoundaries[indexR].limitContCE = 0;
1079 }
1080 }
1081
1082 static void testCEs(UCollator *coll, UErrorCode *status) {
1083 const UChar *rules = NULL, *current = NULL;
1084 int32_t ruleLen = 0;
1085
1086 uint32_t strength = 0;
1087 uint32_t maxStrength = UCOL_IDENTICAL;
1088 uint32_t baseCE, baseContCE, nextCE, nextContCE, currCE, currContCE;
1089 uint32_t lastCE;
1090 uint32_t lastContCE;
1091
1092 int32_t result = 0;
1093 uint32_t chOffset = 0; uint32_t chLen = 0;
1094 uint32_t exOffset = 0; uint32_t exLen = 0;
1095 uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
1096 uint32_t oldOffset = 0;
1097
1098 /* uint32_t rExpsLen = 0; */
1099 /* uint32_t firstLen = 0; */
1100 uint16_t specs = 0;
1101 UBool varT = FALSE; UBool top_ = TRUE;
1102 UBool startOfRules = TRUE;
1103 UBool before = FALSE;
1104 UColTokenParser src;
1105 UColOptionSet opts;
1106 UParseError parseError;
1107 UChar *rulesCopy = NULL;
1108 collIterate *c = uprv_new_collIterate(status);
1109 UCAConstants *consts = NULL;
1110 uint32_t UCOL_RESET_TOP_VALUE, /*UCOL_RESET_TOP_CONT, */
1111 UCOL_NEXT_TOP_VALUE, UCOL_NEXT_TOP_CONT;
1112 const char *colLoc;
1113 UCollator *UCA = ucol_open("root", status);
1114
1115 if (U_FAILURE(*status)) {
1116 log_err("Could not open root collator %s\n", u_errorName(*status));
1117 uprv_delete_collIterate(c);
1118 return;
1119 }
1120
1121 colLoc = ucol_getLocaleByType(coll, ULOC_ACTUAL_LOCALE, status);
1122 if (U_FAILURE(*status)) {
1123 log_err("Could not get collator name: %s\n", u_errorName(*status));
1124 ucol_close(UCA);
1125 uprv_delete_collIterate(c);
1126 return;
1127 }
1128
1129 uprv_memset(&src, 0, sizeof(UColTokenParser));
1130
1131 consts = (UCAConstants *)((uint8_t *)UCA->image + UCA->image->UCAConsts);
1132 UCOL_RESET_TOP_VALUE = consts->UCA_LAST_NON_VARIABLE[0];
1133 /*UCOL_RESET_TOP_CONT = consts->UCA_LAST_NON_VARIABLE[1]; */
1134 UCOL_NEXT_TOP_VALUE = consts->UCA_FIRST_IMPLICIT[0];
1135 UCOL_NEXT_TOP_CONT = consts->UCA_FIRST_IMPLICIT[1];
1136
1137 baseCE=baseContCE=nextCE=nextContCE=currCE=currContCE=lastCE=lastContCE = UCOL_NOT_FOUND;
1138
1139 src.opts = &opts;
1140
1141 rules = ucol_getRules(coll, &ruleLen);
1142
1143 src.invUCA = ucol_initInverseUCA(status);
1144
1145 if(indirectBoundariesSet == FALSE) {
1146 /* UCOL_RESET_TOP_VALUE */
1147 setIndirectBoundaries(0, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
1148 /* UCOL_FIRST_PRIMARY_IGNORABLE */
1149 setIndirectBoundaries(1, consts->UCA_FIRST_PRIMARY_IGNORABLE, 0);
1150 /* UCOL_LAST_PRIMARY_IGNORABLE */
1151 setIndirectBoundaries(2, consts->UCA_LAST_PRIMARY_IGNORABLE, 0);
1152 /* UCOL_FIRST_SECONDARY_IGNORABLE */
1153 setIndirectBoundaries(3, consts->UCA_FIRST_SECONDARY_IGNORABLE, 0);
1154 /* UCOL_LAST_SECONDARY_IGNORABLE */
1155 setIndirectBoundaries(4, consts->UCA_LAST_SECONDARY_IGNORABLE, 0);
1156 /* UCOL_FIRST_TERTIARY_IGNORABLE */
1157 setIndirectBoundaries(5, consts->UCA_FIRST_TERTIARY_IGNORABLE, 0);
1158 /* UCOL_LAST_TERTIARY_IGNORABLE */
1159 setIndirectBoundaries(6, consts->UCA_LAST_TERTIARY_IGNORABLE, 0);
1160 /* UCOL_FIRST_VARIABLE */
1161 setIndirectBoundaries(7, consts->UCA_FIRST_VARIABLE, 0);
1162 /* UCOL_LAST_VARIABLE */
1163 setIndirectBoundaries(8, consts->UCA_LAST_VARIABLE, 0);
1164 /* UCOL_FIRST_NON_VARIABLE */
1165 setIndirectBoundaries(9, consts->UCA_FIRST_NON_VARIABLE, 0);
1166 /* UCOL_LAST_NON_VARIABLE */
1167 setIndirectBoundaries(10, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
1168 /* UCOL_FIRST_IMPLICIT */
1169 setIndirectBoundaries(11, consts->UCA_FIRST_IMPLICIT, 0);
1170 /* UCOL_LAST_IMPLICIT */
1171 setIndirectBoundaries(12, consts->UCA_LAST_IMPLICIT, consts->UCA_FIRST_TRAILING);
1172 /* UCOL_FIRST_TRAILING */
1173 setIndirectBoundaries(13, consts->UCA_FIRST_TRAILING, 0);
1174 /* UCOL_LAST_TRAILING */
1175 setIndirectBoundaries(14, consts->UCA_LAST_TRAILING, 0);
1176 ucolIndirectBoundaries[14].limitCE = (consts->UCA_PRIMARY_SPECIAL_MIN<<24);
1177 indirectBoundariesSet = TRUE;
1178 }
1179
1180
1181 if(U_SUCCESS(*status) && ruleLen > 0) {
1182 rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
1183 uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
1184 src.current = src.source = rulesCopy;
1185 src.end = rulesCopy+ruleLen;
1186 src.extraCurrent = src.end;
1187 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
1188
1189 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
1190 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
1191 while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) {
1192 strength = src.parsedToken.strength;
1193 chOffset = src.parsedToken.charsOffset;
1194 chLen = src.parsedToken.charsLen;
1195 exOffset = src.parsedToken.extensionOffset;
1196 exLen = src.parsedToken.extensionLen;
1197 prefixOffset = src.parsedToken.prefixOffset;
1198 prefixLen = src.parsedToken.prefixLen;
1199 specs = src.parsedToken.flags;
1200
1201 startOfRules = FALSE;
1202 varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
1203 top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
1204
1205 uprv_init_collIterate(coll, src.source+chOffset, chLen, c, status);
1206
1207 currCE = ucol_getNextCE(coll, c, status);
1208 if(currCE == 0 && UCOL_ISTHAIPREVOWEL(*(src.source+chOffset))) {
1209 log_verbose("Thai prevowel detected. Will pick next CE\n");
1210 currCE = ucol_getNextCE(coll, c, status);
1211 }
1212
1213 currContCE = ucol_getNextCE(coll, c, status);
1214 if(!isContinuation(currContCE)) {
1215 currContCE = 0;
1216 }
1217
1218 /* we need to repack CEs here */
1219
1220 if(strength == UCOL_TOK_RESET) {
1221 before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);
1222 if(top_ == TRUE) {
1223 int32_t tokenIndex = src.parsedToken.indirectIndex;
1224
1225 nextCE = baseCE = currCE = ucolIndirectBoundaries[tokenIndex].startCE;
1226 nextContCE = baseContCE = currContCE = ucolIndirectBoundaries[tokenIndex].startContCE;
1227 } else {
1228 nextCE = baseCE = currCE;
1229 nextContCE = baseContCE = currContCE;
1230 }
1231 maxStrength = UCOL_IDENTICAL;
1232 } else {
1233 if(strength < maxStrength) {
1234 maxStrength = strength;
1235 if(baseCE == UCOL_RESET_TOP_VALUE) {
1236 log_verbose("Resetting to [top]\n");
1237 nextCE = UCOL_NEXT_TOP_VALUE;
1238 nextContCE = UCOL_NEXT_TOP_CONT;
1239 } else {
1240 result = ucol_inv_getNextCE(&src, baseCE & 0xFFFFFF3F, baseContCE, &nextCE, &nextContCE, maxStrength);
1241 }
1242 if(result < 0) {
1243 if(ucol_isTailored(coll, *(src.source+oldOffset), status)) {
1244 log_verbose("Reset is tailored codepoint %04X, don't know how to continue, taking next test\n", *(src.source+oldOffset));
1245 return;
1246 } else {
1247 log_err("%s: couldn't find the CE\n", colLoc);
1248 return;
1249 }
1250 }
1251 }
1252
1253 currCE &= 0xFFFFFF3F;
1254 currContCE &= 0xFFFFFFBF;
1255
1256 if(maxStrength == UCOL_IDENTICAL) {
1257 if(baseCE != currCE || baseContCE != currContCE) {
1258 log_err("%s: current CE (initial strength UCOL_EQUAL)\n", colLoc);
1259 }
1260 } else {
1261 if(strength == UCOL_IDENTICAL) {
1262 if(lastCE != currCE || lastContCE != currContCE) {
1263 log_err("%s: current CE (initial strength UCOL_EQUAL)\n", colLoc);
1264 }
1265 } else {
1266 if(compareCEs(currCE, currContCE, nextCE, nextContCE) > 0) {
1267 /*if(currCE > nextCE || (currCE == nextCE && currContCE >= nextContCE)) {*/
1268 log_err("%s: current CE is not less than base CE\n", colLoc);
1269 }
1270 if(!before) {
1271 if(compareCEs(currCE, currContCE, lastCE, lastContCE) < 0) {
1272 /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
1273 log_err("%s: sequence of generated CEs is broken\n", colLoc);
1274 }
1275 } else {
1276 before = FALSE;
1277 if(compareCEs(currCE, currContCE, lastCE, lastContCE) > 0) {
1278 /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
1279 log_err("%s: sequence of generated CEs is broken\n", colLoc);
1280 }
1281 }
1282 }
1283 }
1284
1285 }
1286
1287 oldOffset = chOffset;
1288 lastCE = currCE & 0xFFFFFF3F;
1289 lastContCE = currContCE & 0xFFFFFFBF;
1290 }
1291 uprv_free(src.source);
1292 uprv_free(src.reorderCodes);
1293 }
1294 ucol_close(UCA);
1295 uprv_delete_collIterate(c);
1296 }
1297
1298 #if 0
1299 /* these locales are now picked from index RB */
1300 static const char* localesToTest[] = {
1301 "ar", "bg", "ca", "cs", "da",
1302 "el", "en_BE", "en_US_POSIX",
1303 "es", "et", "fi", "fr", "hi",
1304 "hr", "hu", "is", "iw", "ja",
1305 "ko", "lt", "lv", "mk", "mt",
1306 "nb", "nn", "nn_NO", "pl", "ro",
1307 "ru", "sh", "sk", "sl", "sq",
1308 "sr", "sv", "th", "tr", "uk",
1309 "vi", "zh", "zh_TW"
1310 };
1311 #endif
1312
1313 static const char* rulesToTest[] = {
1314 /* Funky fa rule */
1315 "&\\u0622 < \\u0627 << \\u0671 < \\u0621",
1316 /*"& Z < p, P",*/
1317 /* Cui Mins rules */
1318 "&[top]<o,O<p,P<q,Q<'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu<'?'",*/
1319 "&[top]<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
1320 "&[top]<o,O<p,P<q,Q,'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U&'Qu','?'",*/
1321 "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
1322 "&[top]<'?';Qu<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qu",*/
1323 "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/um<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qum;'?'",*/
1324 "&[top]<'?';Qum<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U" /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qum"*/
1325 };
1326
1327
1328 static void TestCollations(void) {
1329 int32_t noOfLoc = uloc_countAvailable();
1330 int32_t i = 0, j = 0;
1331
1332 UErrorCode status = U_ZERO_ERROR;
1333 char cName[256];
1334 UChar name[256];
1335 int32_t nameSize;
1336
1337
1338 const char *locName = NULL;
1339 UCollator *coll = NULL;
1340 UCollator *UCA = ucol_open("", &status);
1341 UColAttributeValue oldStrength = ucol_getAttribute(UCA, UCOL_STRENGTH, &status);
1342 if (U_FAILURE(status)) {
1343 log_err_status(status, "Could not open UCA collator %s\n", u_errorName(status));
1344 return;
1345 }
1346 ucol_setAttribute(UCA, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
1347
1348 for(i = 0; i<noOfLoc; i++) {
1349 status = U_ZERO_ERROR;
1350 locName = uloc_getAvailable(i);
1351 if(uprv_strcmp("ja", locName) == 0) {
1352 log_verbose("Don't know how to test prefixes\n");
1353 continue;
1354 }
1355 if(hasCollationElements(locName)) {
1356 nameSize = uloc_getDisplayName(locName, NULL, name, 256, &status);
1357 for(j = 0; j<nameSize; j++) {
1358 cName[j] = (char)name[j];
1359 }
1360 cName[nameSize] = 0;
1361 log_verbose("\nTesting locale %s (%s)\n", locName, cName);
1362 coll = ucol_open(locName, &status);
1363 if(U_SUCCESS(status)) {
1364 testAgainstUCA(coll, UCA, "UCA", FALSE, &status);
1365 ucol_close(coll);
1366 } else {
1367 log_err("Couldn't instantiate collator for locale %s, error: %s\n", locName, u_errorName(status));
1368 status = U_ZERO_ERROR;
1369 }
1370 }
1371 }
1372 ucol_setAttribute(UCA, UCOL_STRENGTH, oldStrength, &status);
1373 ucol_close(UCA);
1374 }
1375
1376 static void RamsRulesTest(void) {
1377 UErrorCode status = U_ZERO_ERROR;
1378 int32_t i = 0;
1379 UCollator *coll = NULL;
1380 UChar rule[2048];
1381 uint32_t ruleLen;
1382 int32_t noOfLoc = uloc_countAvailable();
1383 const char *locName = NULL;
1384
1385 log_verbose("RamsRulesTest\n");
1386
1387 if (uprv_strcmp("km", uloc_getDefault())==0 || uprv_strcmp("km_KH", uloc_getDefault())==0) {
1388 /* This test will fail if the default locale is "km" or "km_KH". Enable after trac#6040. */
1389 return;
1390 }
1391
1392 for(i = 0; i<noOfLoc; i++) {
1393 locName = uloc_getAvailable(i);
1394 if(hasCollationElements(locName)) {
1395 if (uprv_strcmp("ja", locName)==0) {
1396 log_verbose("Don't know how to test Japanese because of prefixes\n");
1397 continue;
1398 }
1399 if (uprv_strcmp("de__PHONEBOOK", locName)==0) {
1400 log_verbose("Don't know how to test Phonebook because the reset is on an expanding character\n");
1401 continue;
1402 }
1403 if (uprv_strcmp("bn", locName)==0 ||
1404 uprv_strcmp("bs", locName)==0 || /* Add due to import per cldrbug 5647 */
1405 uprv_strcmp("bs_Cyrl", locName)==0 || /* Add due to import per cldrbug 5647 */
1406 uprv_strcmp("en_US_POSIX", locName)==0 ||
1407 uprv_strcmp("fa_AF", locName)==0 || /* Add due to import per cldrbug 5647 */
1408 uprv_strcmp("he", locName)==0 || /* Add due to new tailoring of \u05F3 vs \u0027 per cldrbug 5576 */
1409 uprv_strcmp("he_IL", locName)==0 || /* Add due to new tailoring of \u05F3 vs \u0027 per cldrbug 5576 */
1410 uprv_strcmp("km", locName)==0 ||
1411 uprv_strcmp("km_KH", locName)==0 ||
1412 uprv_strcmp("my", locName)==0 ||
1413 uprv_strcmp("si", locName)==0 ||
1414 uprv_strcmp("si_LK", locName)==0 ||
1415 uprv_strcmp("sr_Latn", locName)==0 || /* Add due to import per cldrbug 5647 */
1416 uprv_strcmp("th", locName)==0 ||
1417 uprv_strcmp("th_TH", locName)==0 ||
1418 uprv_strcmp("zh", locName)==0 ||
1419 uprv_strcmp("zh_Hant", locName)==0
1420 ) {
1421 log_verbose("Don't know how to test %s. "
1422 "TODO: Fix ticket #6040 and reenable RamsRulesTest for this locale.\n", locName);
1423 continue;
1424 }
1425 log_verbose("Testing locale %s\n", locName);
1426 status = U_ZERO_ERROR;
1427 coll = ucol_open(locName, &status);
1428 if(U_SUCCESS(status)) {
1429 if((status != U_USING_DEFAULT_WARNING) && (status != U_USING_FALLBACK_WARNING)) {
1430 if(coll->image->jamoSpecial == TRUE) {
1431 log_err("%s has special JAMOs\n", locName);
1432 }
1433 ucol_setAttribute(coll, UCOL_CASE_FIRST, UCOL_OFF, &status);
1434 testCollator(coll, &status);
1435 testCEs(coll, &status);
1436 } else {
1437 log_verbose("Skipping %s: %s\n", locName, u_errorName(status));
1438 }
1439 ucol_close(coll);
1440 } else {
1441 log_err("Could not open %s: %s\n", locName, u_errorName(status));
1442 }
1443 }
1444 }
1445
1446 for(i = 0; i<sizeof(rulesToTest)/sizeof(rulesToTest[0]); i++) {
1447 log_verbose("Testing rule: %s\n", rulesToTest[i]);
1448 ruleLen = u_unescape(rulesToTest[i], rule, 2048);
1449 status = U_ZERO_ERROR;
1450 coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
1451 if(U_SUCCESS(status)) {
1452 testCollator(coll, &status);
1453 testCEs(coll, &status);
1454 ucol_close(coll);
1455 } else {
1456 log_err_status(status, "Could not test rule: %s: '%s'\n", u_errorName(status), rulesToTest[i]);
1457 }
1458 }
1459
1460 }
1461
1462 static void IsTailoredTest(void) {
1463 UErrorCode status = U_ZERO_ERROR;
1464 uint32_t i = 0;
1465 UCollator *coll = NULL;
1466 UChar rule[2048];
1467 UChar tailored[2048];
1468 UChar notTailored[2048];
1469 uint32_t ruleLen, tailoredLen, notTailoredLen;
1470
1471 log_verbose("IsTailoredTest\n");
1472
1473 u_uastrcpy(rule, "&Z < A, B, C;c < d");
1474 ruleLen = u_strlen(rule);
1475
1476 u_uastrcpy(tailored, "ABCcd");
1477 tailoredLen = u_strlen(tailored);
1478
1479 u_uastrcpy(notTailored, "ZabD");
1480 notTailoredLen = u_strlen(notTailored);
1481
1482 coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
1483 if(U_SUCCESS(status)) {
1484 for(i = 0; i<tailoredLen; i++) {
1485 if(!ucol_isTailored(coll, tailored[i], &status)) {
1486 log_err("%i: %04X should be tailored - it is reported as not\n", i, tailored[i]);
1487 }
1488 }
1489 for(i = 0; i<notTailoredLen; i++) {
1490 if(ucol_isTailored(coll, notTailored[i], &status)) {
1491 log_err("%i: %04X should not be tailored - it is reported as it is\n", i, notTailored[i]);
1492 }
1493 }
1494 ucol_close(coll);
1495 }
1496 else {
1497 log_err_status(status, "Can't tailor rules\n");
1498 }
1499 /* Code coverage */
1500 status = U_ZERO_ERROR;
1501 coll = ucol_open("ja", &status);
1502 if(!ucol_isTailored(coll, 0x4E9C, &status)) {
1503 log_err_status(status, "0x4E9C should be tailored - it is reported as not\n");
1504 }
1505 ucol_close(coll);
1506 }
1507
1508
1509 const static char chTest[][20] = {
1510 "c",
1511 "C",
1512 "ca", "cb", "cx", "cy", "CZ",
1513 "c\\u030C", "C\\u030C",
1514 "h",
1515 "H",
1516 "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY",
1517 "ch", "cH", "Ch", "CH",
1518 "cha", "charly", "che", "chh", "chch", "chr",
1519 "i", "I", "iarly",
1520 "r", "R",
1521 "r\\u030C", "R\\u030C",
1522 "s",
1523 "S",
1524 "s\\u030C", "S\\u030C",
1525 "z", "Z",
1526 "z\\u030C", "Z\\u030C"
1527 };
1528
1529 static void TestChMove(void) {
1530 UChar t1[256] = {0};
1531 UChar t2[256] = {0};
1532
1533 uint32_t i = 0, j = 0;
1534 uint32_t size = 0;
1535 UErrorCode status = U_ZERO_ERROR;
1536
1537 UCollator *coll = ucol_open("cs", &status);
1538
1539 if(U_SUCCESS(status)) {
1540 size = sizeof(chTest)/sizeof(chTest[0]);
1541 for(i = 0; i < size-1; i++) {
1542 for(j = i+1; j < size; j++) {
1543 u_unescape(chTest[i], t1, 256);
1544 u_unescape(chTest[j], t2, 256);
1545 doTest(coll, t1, t2, UCOL_LESS);
1546 }
1547 }
1548 }
1549 else {
1550 log_data_err("Can't open collator");
1551 }
1552 ucol_close(coll);
1553 }
1554
1555
1556
1557
1558 const static char impTest[][20] = {
1559 "\\u4e00",
1560 "a",
1561 "A",
1562 "b",
1563 "B",
1564 "\\u4e01"
1565 };
1566
1567
1568 static void TestImplicitTailoring(void) {
1569 static const struct {
1570 const char *rules;
1571 const char *data[10];
1572 const uint32_t len;
1573 } tests[] = {
1574 { "&[before 1]\\u4e00 < b < c &[before 1]\\u4e00 < d < e", { "d", "e", "b", "c", "\\u4e00"}, 5 },
1575 { "&\\u4e00 < a <<< A < b <<< B", { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 },
1576 { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3},
1577 { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3}
1578 };
1579
1580 int32_t i = 0;
1581
1582 for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
1583 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
1584 }
1585
1586 /*
1587 UChar t1[256] = {0};
1588 UChar t2[256] = {0};
1589
1590 const char *rule = "&\\u4e00 < a <<< A < b <<< B";
1591
1592 uint32_t i = 0, j = 0;
1593 uint32_t size = 0;
1594 uint32_t ruleLen = 0;
1595 UErrorCode status = U_ZERO_ERROR;
1596 UCollator *coll = NULL;
1597 ruleLen = u_unescape(rule, t1, 256);
1598
1599 coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
1600
1601 if(U_SUCCESS(status)) {
1602 size = sizeof(impTest)/sizeof(impTest[0]);
1603 for(i = 0; i < size-1; i++) {
1604 for(j = i+1; j < size; j++) {
1605 u_unescape(impTest[i], t1, 256);
1606 u_unescape(impTest[j], t2, 256);
1607 doTest(coll, t1, t2, UCOL_LESS);
1608 }
1609 }
1610 }
1611 else {
1612 log_err("Can't open collator");
1613 }
1614 ucol_close(coll);
1615 */
1616 }
1617
1618 static void TestFCDProblem(void) {
1619 UChar t1[256] = {0};
1620 UChar t2[256] = {0};
1621
1622 const char *s1 = "\\u0430\\u0306\\u0325";
1623 const char *s2 = "\\u04D1\\u0325";
1624
1625 UErrorCode status = U_ZERO_ERROR;
1626 UCollator *coll = ucol_open("", &status);
1627 u_unescape(s1, t1, 256);
1628 u_unescape(s2, t2, 256);
1629
1630 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
1631 doTest(coll, t1, t2, UCOL_EQUAL);
1632
1633 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
1634 doTest(coll, t1, t2, UCOL_EQUAL);
1635
1636 ucol_close(coll);
1637 }
1638
1639 /*
1640 The largest normalization form is 18 for NFKC/NFKD, 4 for NFD and 3 for NFC
1641 We're only using NFC/NFD in this test.
1642 */
1643 #define NORM_BUFFER_TEST_LEN 18
1644 typedef struct {
1645 UChar32 u;
1646 UChar NFC[NORM_BUFFER_TEST_LEN];
1647 UChar NFD[NORM_BUFFER_TEST_LEN];
1648 } tester;
1649
1650 static void TestComposeDecompose(void) {
1651 /* [[:NFD_Inert=false:][:NFC_Inert=false:]] */
1652 static const UChar UNICODESET_STR[] = {
1653 0x5B,0x5B,0x3A,0x4E,0x46,0x44,0x5F,0x49,0x6E,0x65,0x72,0x74,0x3D,0x66,0x61,
1654 0x6C,0x73,0x65,0x3A,0x5D,0x5B,0x3A,0x4E,0x46,0x43,0x5F,0x49,0x6E,0x65,0x72,
1655 0x74,0x3D,0x66,0x61,0x6C,0x73,0x65,0x3A,0x5D,0x5D,0
1656 };
1657 int32_t noOfLoc;
1658 int32_t i = 0, j = 0;
1659
1660 UErrorCode status = U_ZERO_ERROR;
1661 const char *locName = NULL;
1662 uint32_t nfcSize;
1663 uint32_t nfdSize;
1664 tester **t;
1665 uint32_t noCases = 0;
1666 UCollator *coll = NULL;
1667 UChar32 u = 0;
1668 UChar comp[NORM_BUFFER_TEST_LEN];
1669 uint32_t len = 0;
1670 UCollationElements *iter;
1671 USet *charsToTest = uset_openPattern(UNICODESET_STR, -1, &status);
1672 int32_t charsToTestSize;
1673
1674 noOfLoc = uloc_countAvailable();
1675
1676 coll = ucol_open("", &status);
1677 if (U_FAILURE(status)) {
1678 log_data_err("Error opening collator -> %s (Are you missing data?)\n", u_errorName(status));
1679 return;
1680 }
1681 charsToTestSize = uset_size(charsToTest);
1682 if (charsToTestSize <= 0) {
1683 log_err("Set was zero. Missing data?\n");
1684 return;
1685 }
1686 t = (tester **)malloc(charsToTestSize * sizeof(tester *));
1687 t[0] = (tester *)malloc(sizeof(tester));
1688 log_verbose("Testing UCA extensively for %d characters\n", charsToTestSize);
1689
1690 for(u = 0; u < charsToTestSize; u++) {
1691 UChar32 ch = uset_charAt(charsToTest, u);
1692 len = 0;
1693 U16_APPEND_UNSAFE(comp, len, ch);
1694 nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
1695 nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
1696
1697 if(nfcSize != nfdSize || (uprv_memcmp(t[noCases]->NFC, t[noCases]->NFD, nfcSize * sizeof(UChar)) != 0)
1698 || (len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0))) {
1699 t[noCases]->u = ch;
1700 if(len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0)) {
1701 u_strncpy(t[noCases]->NFC, comp, len);
1702 t[noCases]->NFC[len] = 0;
1703 }
1704 noCases++;
1705 t[noCases] = (tester *)malloc(sizeof(tester));
1706 uprv_memset(t[noCases], 0, sizeof(tester));
1707 }
1708 }
1709 log_verbose("Testing %d/%d of possible test cases\n", noCases, charsToTestSize);
1710 uset_close(charsToTest);
1711 charsToTest = NULL;
1712
1713 for(u=0; u<(UChar32)noCases; u++) {
1714 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
1715 log_err("Failure: codePoint %05X fails TestComposeDecompose in the UCA\n", t[u]->u);
1716 doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
1717 }
1718 }
1719 /*
1720 for(u = 0; u < charsToTestSize; u++) {
1721 if(!(u&0xFFFF)) {
1722 log_verbose("%08X ", u);
1723 }
1724 uprv_memset(t[noCases], 0, sizeof(tester));
1725 t[noCases]->u = u;
1726 len = 0;
1727 U16_APPEND_UNSAFE(comp, len, u);
1728 comp[len] = 0;
1729 nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
1730 nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
1731 doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL);
1732 doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL);
1733 }
1734 */
1735
1736 ucol_close(coll);
1737
1738 log_verbose("Testing locales, number of cases = %i\n", noCases);
1739 for(i = 0; i<noOfLoc; i++) {
1740 status = U_ZERO_ERROR;
1741 locName = uloc_getAvailable(i);
1742 if(hasCollationElements(locName)) {
1743 char cName[256];
1744 UChar name[256];
1745 int32_t nameSize = uloc_getDisplayName(locName, NULL, name, sizeof(cName), &status);
1746
1747 for(j = 0; j<nameSize; j++) {
1748 cName[j] = (char)name[j];
1749 }
1750 cName[nameSize] = 0;
1751 log_verbose("\nTesting locale %s (%s)\n", locName, cName);
1752
1753 coll = ucol_open(locName, &status);
1754 ucol_setStrength(coll, UCOL_IDENTICAL);
1755 iter = ucol_openElements(coll, t[u]->NFD, u_strlen(t[u]->NFD), &status);
1756
1757 for(u=0; u<(UChar32)noCases; u++) {
1758 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
1759 log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t[u]->u, cName);
1760 doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
1761 log_verbose("Testing NFC\n");
1762 ucol_setText(iter, t[u]->NFC, u_strlen(t[u]->NFC), &status);
1763 backAndForth(iter);
1764 log_verbose("Testing NFD\n");
1765 ucol_setText(iter, t[u]->NFD, u_strlen(t[u]->NFD), &status);
1766 backAndForth(iter);
1767 }
1768 }
1769 ucol_closeElements(iter);
1770 ucol_close(coll);
1771 }
1772 }
1773 for(u = 0; u <= (UChar32)noCases; u++) {
1774 free(t[u]);
1775 }
1776 free(t);
1777 }
1778
1779 static void TestEmptyRule(void) {
1780 UErrorCode status = U_ZERO_ERROR;
1781 UChar rulez[] = { 0 };
1782 UCollator *coll = ucol_openRules(rulez, 0, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
1783
1784 ucol_close(coll);
1785 }
1786
1787 static void TestUCARules(void) {
1788 UErrorCode status = U_ZERO_ERROR;
1789 UChar b[256];
1790 UChar *rules = b;
1791 uint32_t ruleLen = 0;
1792 UCollator *UCAfromRules = NULL;
1793 UCollator *coll = ucol_open("", &status);
1794 if(status == U_FILE_ACCESS_ERROR) {
1795 log_data_err("Is your data around?\n");
1796 return;
1797 } else if(U_FAILURE(status)) {
1798 log_err("Error opening collator\n");
1799 return;
1800 }
1801 ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, 256);
1802
1803 log_verbose("TestUCARules\n");
1804 if(ruleLen > 256) {
1805 rules = (UChar *)malloc((ruleLen+1)*sizeof(UChar));
1806 ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, ruleLen);
1807 }
1808 log_verbose("Rules length is %d\n", ruleLen);
1809 UCAfromRules = ucol_openRules(rules, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
1810 if(U_SUCCESS(status)) {
1811 ucol_close(UCAfromRules);
1812 } else {
1813 log_verbose("Unable to create a collator from UCARules!\n");
1814 }
1815 /*
1816 u_unescape(blah, b, 256);
1817 ucol_getSortKey(coll, b, 1, res, 256);
1818 */
1819 ucol_close(coll);
1820 if(rules != b) {
1821 free(rules);
1822 }
1823 }
1824
1825
1826 /* Pinyin tonal order */
1827 /*
1828 A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0)
1829 (w/macron)< (w/acute)< (w/caron)< (w/grave)
1830 E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8)
1831 I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec)
1832 O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2)
1833 U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9)
1834 < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) <
1835 .. (\u00fc)
1836
1837 However, in testing we got the following order:
1838 A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101)
1839 (w/acute)< (w/grave)< (w/caron)< (w/macron)
1840 E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) <
1841 .. (\u0113)
1842 I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b)
1843 O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d)
1844 U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) <
1845 .. (\u01d8)
1846 < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b)
1847 */
1848
1849 static void TestBefore(void) {
1850 const static char *data[] = {
1851 "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A",
1852 "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E",
1853 "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I",
1854 "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O",
1855 "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U",
1856 "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc"
1857 };
1858 genericRulesStarter(
1859 "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0"
1860 "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8"
1861 "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec"
1862 "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2"
1863 "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9"
1864 "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc",
1865 data, sizeof(data)/sizeof(data[0]));
1866 }
1867
1868 #if 0
1869 /* superceded by TestBeforePinyin */
1870 static void TestJ784(void) {
1871 const static char *data[] = {
1872 "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0",
1873 "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8",
1874 "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec",
1875 "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2",
1876 "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9",
1877 "\\u00fc",
1878 "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc"
1879 };
1880 genericLocaleStarter("zh", data, sizeof(data)/sizeof(data[0]));
1881 }
1882 #endif
1883
1884 #if 0
1885 /* superceded by the changes to the lv locale */
1886 static void TestJ831(void) {
1887 const static char *data[] = {
1888 "I",
1889 "i",
1890 "Y",
1891 "y"
1892 };
1893 genericLocaleStarter("lv", data, sizeof(data)/sizeof(data[0]));
1894 }
1895 #endif
1896
1897 static void TestJ815(void) {
1898 const static char *data[] = {
1899 "aa",
1900 "Aa",
1901 "ab",
1902 "Ab",
1903 "ad",
1904 "Ad",
1905 "ae",
1906 "Ae",
1907 "\\u00e6",
1908 "\\u00c6",
1909 "af",
1910 "Af",
1911 "b",
1912 "B"
1913 };
1914 genericLocaleStarter("fr", data, sizeof(data)/sizeof(data[0]));
1915 genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, sizeof(data)/sizeof(data[0]));
1916 }
1917
1918
1919 /*
1920 "& a < b < c < d& r < c", "& a < b < d& r < c",
1921 "& a < b < c < d& c < m", "& a < b < c < m < d",
1922 "& a < b < c < d& a < m", "& a < m < b < c < d",
1923 "& a <<< b << c < d& a < m", "& a <<< b << c < m < d",
1924 "& a < b < c < d& [before 1] c < m", "& a < b < m < c < d",
1925 "& a < b <<< c << d <<< e& [before 3] e <<< x", "& a < b <<< c << d <<< x <<< e",
1926 "& a < b <<< c << d <<< e& [before 2] e <<< x", "& a < b <<< c <<< x << d <<< e",
1927 "& a < b <<< c << d <<< e& [before 1] e <<< x", "& a <<< x < b <<< c << d <<< e",
1928 "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x", "& a < b <<< c << d <<< e <<< f < x < g",
1929 */
1930 static void TestRedundantRules(void) {
1931 int32_t i;
1932
1933 static const struct {
1934 const char *rules;
1935 const char *expectedRules;
1936 const char *testdata[8];
1937 uint32_t testdatalen;
1938 } tests[] = {
1939 /* this test conflicts with positioning of CODAN placeholder */
1940 /*{
1941 "& a <<< b <<< c << d <<< e& [before 1] e <<< x",
1942 "&\\u2089<<<x",
1943 {"\\u2089", "x"}, 2
1944 }, */
1945 /* this test conflicts with the [before x] syntax tightening */
1946 /*{
1947 "& b <<< c <<< d << e <<< f& [before 1] f <<< x",
1948 "&\\u0252<<<x",
1949 {"\\u0252", "x"}, 2
1950 }, */
1951 /* this test conflicts with the [before x] syntax tightening */
1952 /*{
1953 "& a < b <<< c << d <<< e& [before 1] e <<< x",
1954 "& a <<< x < b <<< c << d <<< e",
1955 {"a", "x", "b", "c", "d", "e"}, 6
1956 }, */
1957 {
1958 "& a < b < c < d& [before 1] c < m",
1959 "& a < b < m < c < d",
1960 {"a", "b", "m", "c", "d"}, 5
1961 },
1962 {
1963 "& a < b <<< c << d <<< e& [before 3] e <<< x",
1964 "& a < b <<< c << d <<< x <<< e",
1965 {"a", "b", "c", "d", "x", "e"}, 6
1966 },
1967 /* this test conflicts with the [before x] syntax tightening */
1968 /* {
1969 "& a < b <<< c << d <<< e& [before 2] e <<< x",
1970 "& a < b <<< c <<< x << d <<< e",
1971 {"a", "b", "c", "x", "d", "e"},, 6
1972 }, */
1973 {
1974 "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x",
1975 "& a < b <<< c << d <<< e <<< f < x < g",
1976 {"a", "b", "c", "d", "e", "f", "x", "g"}, 8
1977 },
1978 {
1979 "& a <<< b << c < d& a < m",
1980 "& a <<< b << c < m < d",
1981 {"a", "b", "c", "m", "d"}, 5
1982 },
1983 {
1984 "&a<b<<b\\u0301 &z<b",
1985 "&a<b\\u0301 &z<b",
1986 {"a", "b\\u0301", "z", "b"}, 4
1987 },
1988 {
1989 "&z<m<<<q<<<m",
1990 "&z<q<<<m",
1991 {"z", "q", "m"},3
1992 },
1993 {
1994 "&z<<<m<q<<<m",
1995 "&z<q<<<m",
1996 {"z", "q", "m"}, 3
1997 },
1998 {
1999 "& a < b < c < d& r < c",
2000 "& a < b < d& r < c",
2001 {"a", "b", "d"}, 3
2002 },
2003 {
2004 "& a < b < c < d& r < c",
2005 "& a < b < d& r < c",
2006 {"r", "c"}, 2
2007 },
2008 {
2009 "& a < b < c < d& c < m",
2010 "& a < b < c < m < d",
2011 {"a", "b", "c", "m", "d"}, 5
2012 },
2013 {
2014 "& a < b < c < d& a < m",
2015 "& a < m < b < c < d",
2016 {"a", "m", "b", "c", "d"}, 5
2017 }
2018 };
2019
2020
2021 UCollator *credundant = NULL;
2022 UCollator *cresulting = NULL;
2023 UErrorCode status = U_ZERO_ERROR;
2024 UChar rlz[2048] = { 0 };
2025 uint32_t rlen = 0;
2026
2027 for(i = 0; i<sizeof(tests)/sizeof(tests[0]); i++) {
2028 log_verbose("testing rule %s, expected to be %s\n", tests[i].rules, tests[i].expectedRules);
2029 rlen = u_unescape(tests[i].rules, rlz, 2048);
2030
2031 credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
2032 if(status == U_FILE_ACCESS_ERROR) {
2033 log_data_err("Is your data around?\n");
2034 return;
2035 } else if(U_FAILURE(status)) {
2036 log_err("Error opening collator\n");
2037 return;
2038 }
2039
2040 rlen = u_unescape(tests[i].expectedRules, rlz, 2048);
2041 cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
2042
2043 testAgainstUCA(cresulting, credundant, "expected", TRUE, &status);
2044
2045 ucol_close(credundant);
2046 ucol_close(cresulting);
2047
2048 log_verbose("testing using data\n");
2049
2050 genericRulesStarter(tests[i].rules, tests[i].testdata, tests[i].testdatalen);
2051 }
2052
2053 }
2054
2055 static void TestExpansionSyntax(void) {
2056 int32_t i;
2057
2058 const static char *rules[] = {
2059 "&AE <<< a << b <<< c &d <<< f",
2060 "&AE <<< a <<< b << c << d < e < f <<< g",
2061 "&AE <<< B <<< C / D <<< F"
2062 };
2063
2064 const static char *expectedRules[] = {
2065 "&A <<< a / E << b / E <<< c /E &d <<< f",
2066 "&A <<< a / E <<< b / E << c / E << d / E < e < f <<< g",
2067 "&A <<< B / E <<< C / ED <<< F / E"
2068 };
2069
2070 const static char *testdata[][8] = {
2071 {"AE", "a", "b", "c"},
2072 {"AE", "a", "b", "c", "d", "e", "f", "g"},
2073 {"AE", "B", "C"} /* / ED <<< F / E"},*/
2074 };
2075
2076 const static uint32_t testdatalen[] = {
2077 4,
2078 8,
2079 3
2080 };
2081
2082
2083
2084 UCollator *credundant = NULL;
2085 UCollator *cresulting = NULL;
2086 UErrorCode status = U_ZERO_ERROR;
2087 UChar rlz[2048] = { 0 };
2088 uint32_t rlen = 0;
2089
2090 for(i = 0; i<sizeof(rules)/sizeof(rules[0]); i++) {
2091 log_verbose("testing rule %s, expected to be %s\n", rules[i], expectedRules[i]);
2092 rlen = u_unescape(rules[i], rlz, 2048);
2093
2094 credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2095 if(status == U_FILE_ACCESS_ERROR) {
2096 log_data_err("Is your data around?\n");
2097 return;
2098 } else if(U_FAILURE(status)) {
2099 log_err("Error opening collator\n");
2100 return;
2101 }
2102 rlen = u_unescape(expectedRules[i], rlz, 2048);
2103 cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
2104
2105 /* testAgainstUCA still doesn't handle expansions correctly, so this is not run */
2106 /* as a hard error test, but only in information mode */
2107 testAgainstUCA(cresulting, credundant, "expected", FALSE, &status);
2108
2109 ucol_close(credundant);
2110 ucol_close(cresulting);
2111
2112 log_verbose("testing using data\n");
2113
2114 genericRulesStarter(rules[i], testdata[i], testdatalen[i]);
2115 }
2116 }
2117
2118 static void TestCase(void)
2119 {
2120 const static UChar gRules[MAX_TOKEN_LEN] =
2121 /*" & 0 < 1,\u2461<a,A"*/
2122 { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x0041, 0x0000 };
2123
2124 const static UChar testCase[][MAX_TOKEN_LEN] =
2125 {
2126 /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000},
2127 /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000},
2128 /*2*/ {0x2460 /*circ'1'*/, 0x0061/*'a'*/, 0x0000},
2129 /*3*/ {0x2460 /*circ'1'*/, 0x0041/*'A'*/, 0x0000}
2130 };
2131
2132 const static UCollationResult caseTestResults[][9] =
2133 {
2134 { UCOL_LESS, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
2135 { UCOL_GREATER, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER },
2136 { UCOL_LESS, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_GREATER, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
2137 { UCOL_GREATER, UCOL_LESS, UCOL_GREATER, UCOL_EQUAL, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER }
2138 };
2139
2140 const static UColAttributeValue caseTestAttributes[][2] =
2141 {
2142 { UCOL_LOWER_FIRST, UCOL_OFF},
2143 { UCOL_UPPER_FIRST, UCOL_OFF},
2144 { UCOL_LOWER_FIRST, UCOL_ON},
2145 { UCOL_UPPER_FIRST, UCOL_ON}
2146 };
2147 int32_t i,j,k;
2148 UErrorCode status = U_ZERO_ERROR;
2149 UCollationElements *iter;
2150 UCollator *myCollation;
2151 myCollation = ucol_open("en_US", &status);
2152
2153 if(U_FAILURE(status)){
2154 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
2155 return;
2156 }
2157 log_verbose("Testing different case settings\n");
2158 ucol_setStrength(myCollation, UCOL_TERTIARY);
2159
2160 for(k = 0; k<4; k++) {
2161 ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
2162 ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
2163 log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes[k][0], caseTestAttributes[k][1]);
2164 for (i = 0; i < 3 ; i++) {
2165 for(j = i+1; j<4; j++) {
2166 doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
2167 }
2168 }
2169 }
2170 ucol_close(myCollation);
2171
2172 myCollation = ucol_openRules(gRules, u_strlen(gRules), UCOL_OFF, UCOL_TERTIARY,NULL, &status);
2173 if(U_FAILURE(status)){
2174 log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
2175 return;
2176 }
2177 log_verbose("Testing different case settings with custom rules\n");
2178 ucol_setStrength(myCollation, UCOL_TERTIARY);
2179
2180 for(k = 0; k<4; k++) {
2181 ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
2182 ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
2183 for (i = 0; i < 3 ; i++) {
2184 for(j = i+1; j<4; j++) {
2185 log_verbose("k:%d, i:%d, j:%d\n", k, i, j);
2186 doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
2187 iter=ucol_openElements(myCollation, testCase[i], u_strlen(testCase[i]), &status);
2188 backAndForth(iter);
2189 ucol_closeElements(iter);
2190 iter=ucol_openElements(myCollation, testCase[j], u_strlen(testCase[j]), &status);
2191 backAndForth(iter);
2192 ucol_closeElements(iter);
2193 }
2194 }
2195 }
2196 ucol_close(myCollation);
2197 {
2198 const static char *lowerFirst[] = {
2199 "h",
2200 "H",
2201 "ch",
2202 "Ch",
2203 "CH",
2204 "cha",
2205 "chA",
2206 "Cha",
2207 "ChA",
2208 "CHa",
2209 "CHA",
2210 "i",
2211 "I"
2212 };
2213
2214 const static char *upperFirst[] = {
2215 "H",
2216 "h",
2217 "CH",
2218 "Ch",
2219 "ch",
2220 "CHA",
2221 "CHa",
2222 "ChA",
2223 "Cha",
2224 "chA",
2225 "cha",
2226 "I",
2227 "i"
2228 };
2229 log_verbose("mixed case test\n");
2230 log_verbose("lower first, case level off\n");
2231 genericRulesStarter("[casefirst lower]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
2232 log_verbose("upper first, case level off\n");
2233 genericRulesStarter("[casefirst upper]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
2234 log_verbose("lower first, case level on\n");
2235 genericRulesStarter("[casefirst lower][caselevel on]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
2236 log_verbose("upper first, case level on\n");
2237 genericRulesStarter("[casefirst upper][caselevel on]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
2238 }
2239
2240 }
2241
2242 static void TestIncrementalNormalize(void) {
2243
2244 /*UChar baseA =0x61;*/
2245 UChar baseA =0x41;
2246 /* UChar baseB = 0x42;*/
2247 static const UChar ccMix[] = {0x316, 0x321, 0x300};
2248 /*UChar ccMix[] = {0x61, 0x61, 0x61};*/
2249 /*
2250 0x316 is combining grave accent below, cc=220
2251 0x321 is combining palatalized hook below, cc=202
2252 0x300 is combining grave accent, cc=230
2253 */
2254
2255 #define MAXSLEN 2000
2256 /*int maxSLen = 64000;*/
2257 int sLen;
2258 int i;
2259
2260 UCollator *coll;
2261 UErrorCode status = U_ZERO_ERROR;
2262 UCollationResult result;
2263
2264 int32_t myQ = getTestOption(QUICK_OPTION);
2265
2266 if(getTestOption(QUICK_OPTION) < 0) {
2267 setTestOption(QUICK_OPTION, 1);
2268 }
2269
2270 {
2271 /* Test 1. Run very long unnormalized strings, to force overflow of*/
2272 /* most buffers along the way.*/
2273 UChar strA[MAXSLEN+1];
2274 UChar strB[MAXSLEN+1];
2275
2276 coll = ucol_open("en_US", &status);
2277 if(status == U_FILE_ACCESS_ERROR) {
2278 log_data_err("Is your data around?\n");
2279 return;
2280 } else if(U_FAILURE(status)) {
2281 log_err("Error opening collator\n");
2282 return;
2283 }
2284 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
2285
2286 /*for (sLen = 257; sLen<MAXSLEN; sLen++) {*/
2287 /*for (sLen = 4; sLen<MAXSLEN; sLen++) {*/
2288 /*for (sLen = 1000; sLen<1001; sLen++) {*/
2289 for (sLen = 500; sLen<501; sLen++) {
2290 /*for (sLen = 40000; sLen<65000; sLen+=1000) {*/
2291 strA[0] = baseA;
2292 strB[0] = baseA;
2293 for (i=1; i<=sLen-1; i++) {
2294 strA[i] = ccMix[i % 3];
2295 strB[sLen-i] = ccMix[i % 3];
2296 }
2297 strA[sLen] = 0;
2298 strB[sLen] = 0;
2299
2300 ucol_setStrength(coll, UCOL_TERTIARY); /* Do test with default strength, which runs*/
2301 doTest(coll, strA, strB, UCOL_EQUAL); /* optimized functions in the impl*/
2302 ucol_setStrength(coll, UCOL_IDENTICAL); /* Do again with the slow, general impl.*/
2303 doTest(coll, strA, strB, UCOL_EQUAL);
2304 }
2305 }
2306
2307 setTestOption(QUICK_OPTION, myQ);
2308
2309
2310 /* Test 2: Non-normal sequence in a string that extends to the last character*/
2311 /* of the string. Checks a couple of edge cases.*/
2312
2313 {
2314 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0};
2315 static const UChar strB[] = {0x41, 0xc0, 0x316, 0};
2316 ucol_setStrength(coll, UCOL_TERTIARY);
2317 doTest(coll, strA, strB, UCOL_EQUAL);
2318 }
2319
2320 /* Test 3: Non-normal sequence is terminated by a surrogate pair.*/
2321
2322 {
2323 /* New UCA 3.1.1.
2324 * test below used a code point from Desseret, which sorts differently
2325 * than d800 dc00
2326 */
2327 /*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/
2328 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0};
2329 static const UChar strB[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0};
2330 ucol_setStrength(coll, UCOL_TERTIARY);
2331 doTest(coll, strA, strB, UCOL_GREATER);
2332 }
2333
2334 /* Test 4: Imbedded nulls do not terminate a string when length is specified.*/
2335
2336 {
2337 static const UChar strA[] = {0x41, 0x00, 0x42, 0x00};
2338 static const UChar strB[] = {0x41, 0x00, 0x00, 0x00};
2339 char sortKeyA[50];
2340 char sortKeyAz[50];
2341 char sortKeyB[50];
2342 char sortKeyBz[50];
2343 int r;
2344
2345 /* there used to be -3 here. Hmmmm.... */
2346 /*result = ucol_strcoll(coll, strA, -3, strB, -3);*/
2347 result = ucol_strcoll(coll, strA, 3, strB, 3);
2348 if (result != UCOL_GREATER) {
2349 log_err("ERROR 1 in test 4\n");
2350 }
2351 result = ucol_strcoll(coll, strA, -1, strB, -1);
2352 if (result != UCOL_EQUAL) {
2353 log_err("ERROR 2 in test 4\n");
2354 }
2355
2356 ucol_getSortKey(coll, strA, 3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2357 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2358 ucol_getSortKey(coll, strB, 3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2359 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2360
2361 r = strcmp(sortKeyA, sortKeyAz);
2362 if (r <= 0) {
2363 log_err("Error 3 in test 4\n");
2364 }
2365 r = strcmp(sortKeyA, sortKeyB);
2366 if (r <= 0) {
2367 log_err("Error 4 in test 4\n");
2368 }
2369 r = strcmp(sortKeyAz, sortKeyBz);
2370 if (r != 0) {
2371 log_err("Error 5 in test 4\n");
2372 }
2373
2374 ucol_setStrength(coll, UCOL_IDENTICAL);
2375 ucol_getSortKey(coll, strA, 3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2376 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2377 ucol_getSortKey(coll, strB, 3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2378 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2379
2380 r = strcmp(sortKeyA, sortKeyAz);
2381 if (r <= 0) {
2382 log_err("Error 6 in test 4\n");
2383 }
2384 r = strcmp(sortKeyA, sortKeyB);
2385 if (r <= 0) {
2386 log_err("Error 7 in test 4\n");
2387 }
2388 r = strcmp(sortKeyAz, sortKeyBz);
2389 if (r != 0) {
2390 log_err("Error 8 in test 4\n");
2391 }
2392 ucol_setStrength(coll, UCOL_TERTIARY);
2393 }
2394
2395
2396 /* Test 5: Null characters in non-normal source strings.*/
2397
2398 {
2399 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00};
2400 static const UChar strB[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00};
2401 char sortKeyA[50];
2402 char sortKeyAz[50];
2403 char sortKeyB[50];
2404 char sortKeyBz[50];
2405 int r;
2406
2407 result = ucol_strcoll(coll, strA, 6, strB, 6);
2408 if (result != UCOL_GREATER) {
2409 log_err("ERROR 1 in test 5\n");
2410 }
2411 result = ucol_strcoll(coll, strA, -1, strB, -1);
2412 if (result != UCOL_EQUAL) {
2413 log_err("ERROR 2 in test 5\n");
2414 }
2415
2416 ucol_getSortKey(coll, strA, 6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2417 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2418 ucol_getSortKey(coll, strB, 6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2419 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2420
2421 r = strcmp(sortKeyA, sortKeyAz);
2422 if (r <= 0) {
2423 log_err("Error 3 in test 5\n");
2424 }
2425 r = strcmp(sortKeyA, sortKeyB);
2426 if (r <= 0) {
2427 log_err("Error 4 in test 5\n");
2428 }
2429 r = strcmp(sortKeyAz, sortKeyBz);
2430 if (r != 0) {
2431 log_err("Error 5 in test 5\n");
2432 }
2433
2434 ucol_setStrength(coll, UCOL_IDENTICAL);
2435 ucol_getSortKey(coll, strA, 6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2436 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2437 ucol_getSortKey(coll, strB, 6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2438 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2439
2440 r = strcmp(sortKeyA, sortKeyAz);
2441 if (r <= 0) {
2442 log_err("Error 6 in test 5\n");
2443 }
2444 r = strcmp(sortKeyA, sortKeyB);
2445 if (r <= 0) {
2446 log_err("Error 7 in test 5\n");
2447 }
2448 r = strcmp(sortKeyAz, sortKeyBz);
2449 if (r != 0) {
2450 log_err("Error 8 in test 5\n");
2451 }
2452 ucol_setStrength(coll, UCOL_TERTIARY);
2453 }
2454
2455
2456 /* Test 6: Null character as base of a non-normal combining sequence.*/
2457
2458 {
2459 static const UChar strA[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00};
2460 static const UChar strB[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00};
2461
2462 result = ucol_strcoll(coll, strA, 5, strB, 5);
2463 if (result != UCOL_LESS) {
2464 log_err("Error 1 in test 6\n");
2465 }
2466 result = ucol_strcoll(coll, strA, -1, strB, -1);
2467 if (result != UCOL_EQUAL) {
2468 log_err("Error 2 in test 6\n");
2469 }
2470 }
2471
2472 ucol_close(coll);
2473 }
2474
2475
2476
2477 #if 0
2478 static void TestGetCaseBit(void) {
2479 static const char *caseBitData[] = {
2480 "a", "A", "ch", "Ch", "CH",
2481 "\\uFF9E", "\\u0009"
2482 };
2483
2484 static const uint8_t results[] = {
2485 UCOL_LOWER_CASE, UCOL_UPPER_CASE, UCOL_LOWER_CASE, UCOL_MIXED_CASE, UCOL_UPPER_CASE,
2486 UCOL_UPPER_CASE, UCOL_LOWER_CASE
2487 };
2488
2489 uint32_t i, blen = 0;
2490 UChar b[256] = {0};
2491 UErrorCode status = U_ZERO_ERROR;
2492 UCollator *UCA = ucol_open("", &status);
2493 uint8_t res = 0;
2494
2495 for(i = 0; i<sizeof(results)/sizeof(results[0]); i++) {
2496 blen = u_unescape(caseBitData[i], b, 256);
2497 res = ucol_uprv_getCaseBits(UCA, b, blen, &status);
2498 if(results[i] != res) {
2499 log_err("Expected case = %02X, got %02X for %04X\n", results[i], res, b[0]);
2500 }
2501 }
2502 }
2503 #endif
2504
2505 static void TestHangulTailoring(void) {
2506 static const char *koreanData[] = {
2507 "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53ef", "\\u5475",
2508 "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\\u67b7", "\\u67ef",
2509 "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\\u8857", "\\u8888",
2510 "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5",
2511 "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\\u659D", "\\u698E",
2512 "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\\u8B0C"
2513 };
2514
2515 const char *rules =
2516 "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 <<< \\u53ef <<< \\u5475 "
2517 "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef "
2518 "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 "
2519 "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 "
2520 "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E "
2521 "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C";
2522
2523
2524 UErrorCode status = U_ZERO_ERROR;
2525 UChar rlz[2048] = { 0 };
2526 uint32_t rlen = u_unescape(rules, rlz, 2048);
2527
2528 UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
2529 if(status == U_FILE_ACCESS_ERROR) {
2530 log_data_err("Is your data around?\n");
2531 return;
2532 } else if(U_FAILURE(status)) {
2533 log_err("Error opening collator\n");
2534 return;
2535 }
2536
2537 log_verbose("Using start of korean rules\n");
2538
2539 if(U_SUCCESS(status)) {
2540 genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
2541 } else {
2542 log_err("Unable to open collator with rules %s\n", rules);
2543 }
2544
2545 log_verbose("Setting jamoSpecial to TRUE and testing once more\n");
2546 ((UCATableHeader *)coll->image)->jamoSpecial = TRUE; /* don't try this at home */
2547 genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
2548
2549 ucol_close(coll);
2550
2551 log_verbose("Using ko__LOTUS locale\n");
2552 genericLocaleStarter("ko__LOTUS", koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
2553 }
2554
2555 static void TestCompressOverlap(void) {
2556 UChar secstr[150];
2557 UChar tertstr[150];
2558 UErrorCode status = U_ZERO_ERROR;
2559 UCollator *coll;
2560 char result[200];
2561 uint32_t resultlen;
2562 int count = 0;
2563 char *tempptr;
2564
2565 coll = ucol_open("", &status);
2566
2567 if (U_FAILURE(status)) {
2568 log_err_status(status, "Collator can't be created -> %s\n", u_errorName(status));
2569 return;
2570 }
2571 while (count < 149) {
2572 secstr[count] = 0x0020; /* [06, 05, 05] */
2573 tertstr[count] = 0x0020;
2574 count ++;
2575 }
2576
2577 /* top down compression ----------------------------------- */
2578 secstr[count] = 0x0332; /* [, 87, 05] */
2579 tertstr[count] = 0x3000; /* [06, 05, 07] */
2580
2581 /* no compression secstr should have 150 secondary bytes, tertstr should
2582 have 150 tertiary bytes.
2583 with correct overlapping compression, secstr should have 4 secondary
2584 bytes, tertstr should have > 2 tertiary bytes */
2585 resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250);
2586 tempptr = uprv_strchr(result, 1) + 1;
2587 while (*(tempptr + 1) != 1) {
2588 /* the last secondary collation element is not checked since it is not
2589 part of the compression */
2590 if (*tempptr < UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2) {
2591 log_err("Secondary compression overlapped\n");
2592 }
2593 tempptr ++;
2594 }
2595
2596 /* tertiary top/bottom/common for en_US is similar to the secondary
2597 top/bottom/common */
2598 resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250);
2599 tempptr = uprv_strrchr(result, 1) + 1;
2600 while (*(tempptr + 1) != 0) {
2601 /* the last secondary collation element is not checked since it is not
2602 part of the compression */
2603 if (*tempptr < coll->tertiaryTop - coll->tertiaryTopCount) {
2604 log_err("Tertiary compression overlapped\n");
2605 }
2606 tempptr ++;
2607 }
2608
2609 /* bottom up compression ------------------------------------- */
2610 secstr[count] = 0;
2611 tertstr[count] = 0;
2612 resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250);
2613 tempptr = uprv_strchr(result, 1) + 1;
2614 while (*(tempptr + 1) != 1) {
2615 /* the last secondary collation element is not checked since it is not
2616 part of the compression */
2617 if (*tempptr > UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2) {
2618 log_err("Secondary compression overlapped\n");
2619 }
2620 tempptr ++;
2621 }
2622
2623 /* tertiary top/bottom/common for en_US is similar to the secondary
2624 top/bottom/common */
2625 resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250);
2626 tempptr = uprv_strrchr(result, 1) + 1;
2627 while (*(tempptr + 1) != 0) {
2628 /* the last secondary collation element is not checked since it is not
2629 part of the compression */
2630 if (*tempptr > coll->tertiaryBottom + coll->tertiaryBottomCount) {
2631 log_err("Tertiary compression overlapped\n");
2632 }
2633 tempptr ++;
2634 }
2635
2636 ucol_close(coll);
2637 }
2638
2639 static void TestCyrillicTailoring(void) {
2640 static const char *test[] = {
2641 "\\u0410b",
2642 "\\u0410\\u0306a",
2643 "\\u04d0A"
2644 };
2645
2646 /* Russian overrides contractions, so this test is not valid anymore */
2647 /*genericLocaleStarter("ru", test, 3);*/
2648
2649 genericLocaleStarter("root", test, 3);
2650 genericRulesStarter("&\\u0410 = \\u0410", test, 3);
2651 genericRulesStarter("&Z < \\u0410", test, 3);
2652 genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test, 3);
2653 genericRulesStarter("&Z < \\u0410 < \\u04d0", test, 3);
2654 genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test, 3);
2655 genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test, 3);
2656 }
2657
2658 static void TestSuppressContractions(void) {
2659
2660 static const char *testNoCont2[] = {
2661 "\\u0410\\u0302a",
2662 "\\u0410\\u0306b",
2663 "\\u0410c"
2664 };
2665 static const char *testNoCont[] = {
2666 "a\\u0410",
2667 "A\\u0410\\u0306",
2668 "\\uFF21\\u0410\\u0302"
2669 };
2670
2671 genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont, 3);
2672 genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2, 3);
2673 }
2674
2675 static void TestContraction(void) {
2676 const static char *testrules[] = {
2677 "&A = AB / B",
2678 "&A = A\\u0306/\\u0306",
2679 "&c = ch / h"
2680 };
2681 const static UChar testdata[][2] = {
2682 {0x0041 /* 'A' */, 0x0042 /* 'B' */},
2683 {0x0041 /* 'A' */, 0x0306 /* combining breve */},
2684 {0x0063 /* 'c' */, 0x0068 /* 'h' */}
2685 };
2686 const static UChar testdata2[][2] = {
2687 {0x0063 /* 'c' */, 0x0067 /* 'g' */},
2688 {0x0063 /* 'c' */, 0x0068 /* 'h' */},
2689 {0x0063 /* 'c' */, 0x006C /* 'l' */}
2690 };
2691 const static char *testrules3[] = {
2692 "&z < xyz &xyzw << B",
2693 "&z < xyz &xyz << B / w",
2694 "&z < ch &achm << B",
2695 "&z < ch &a << B / chm",
2696 "&\\ud800\\udc00w << B",
2697 "&\\ud800\\udc00 << B / w",
2698 "&a\\ud800\\udc00m << B",
2699 "&a << B / \\ud800\\udc00m",
2700 };
2701
2702 UErrorCode status = U_ZERO_ERROR;
2703 UCollator *coll;
2704 UChar rule[256] = {0};
2705 uint32_t rlen = 0;
2706 int i;
2707
2708 for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
2709 UCollationElements *iter1;
2710 int j = 0;
2711 log_verbose("Rule %s for testing\n", testrules[i]);
2712 rlen = u_unescape(testrules[i], rule, 32);
2713 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2714 if (U_FAILURE(status)) {
2715 log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
2716 return;
2717 }
2718 iter1 = ucol_openElements(coll, testdata[i], 2, &status);
2719 if (U_FAILURE(status)) {
2720 log_err("Collation iterator creation failed\n");
2721 return;
2722 }
2723 while (j < 2) {
2724 UCollationElements *iter2 = ucol_openElements(coll,
2725 &(testdata[i][j]),
2726 1, &status);
2727 uint32_t ce;
2728 if (U_FAILURE(status)) {
2729 log_err("Collation iterator creation failed\n");
2730 return;
2731 }
2732 ce = ucol_next(iter2, &status);
2733 while (ce != UCOL_NULLORDER) {
2734 if ((uint32_t)ucol_next(iter1, &status) != ce) {
2735 log_err("Collation elements in contraction split does not match\n");
2736 return;
2737 }
2738 ce = ucol_next(iter2, &status);
2739 }
2740 j ++;
2741 ucol_closeElements(iter2);
2742 }
2743 if (ucol_next(iter1, &status) != UCOL_NULLORDER) {
2744 log_err("Collation elements not exhausted\n");
2745 return;
2746 }
2747 ucol_closeElements(iter1);
2748 ucol_close(coll);
2749 }
2750
2751 rlen = u_unescape("& a < b < c < ch < d & c = ch / h", rule, 256);
2752 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2753 if (ucol_strcoll(coll, testdata2[0], 2, testdata2[1], 2) != UCOL_LESS) {
2754 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
2755 testdata2[0][0], testdata2[0][1], testdata2[1][0],
2756 testdata2[1][1]);
2757 return;
2758 }
2759 if (ucol_strcoll(coll, testdata2[1], 2, testdata2[2], 2) != UCOL_LESS) {
2760 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
2761 testdata2[1][0], testdata2[1][1], testdata2[2][0],
2762 testdata2[2][1]);
2763 return;
2764 }
2765 ucol_close(coll);
2766
2767 for (i = 0; i < sizeof(testrules3) / sizeof(testrules3[0]); i += 2) {
2768 UCollator *coll1,
2769 *coll2;
2770 UCollationElements *iter1,
2771 *iter2;
2772 UChar ch = 0x0042 /* 'B' */;
2773 uint32_t ce;
2774 rlen = u_unescape(testrules3[i], rule, 32);
2775 coll1 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2776 rlen = u_unescape(testrules3[i + 1], rule, 32);
2777 coll2 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2778 if (U_FAILURE(status)) {
2779 log_err("Collator creation failed %s\n", testrules[i]);
2780 return;
2781 }
2782 iter1 = ucol_openElements(coll1, &ch, 1, &status);
2783 iter2 = ucol_openElements(coll2, &ch, 1, &status);
2784 if (U_FAILURE(status)) {
2785 log_err("Collation iterator creation failed\n");
2786 return;
2787 }
2788 ce = ucol_next(iter1, &status);
2789 if (U_FAILURE(status)) {
2790 log_err("Retrieving ces failed\n");
2791 return;
2792 }
2793 while (ce != UCOL_NULLORDER) {
2794 if (ce != (uint32_t)ucol_next(iter2, &status)) {
2795 log_err("CEs does not match\n");
2796 return;
2797 }
2798 ce = ucol_next(iter1, &status);
2799 if (U_FAILURE(status)) {
2800 log_err("Retrieving ces failed\n");
2801 return;
2802 }
2803 }
2804 if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
2805 log_err("CEs not exhausted\n");
2806 return;
2807 }
2808 ucol_closeElements(iter1);
2809 ucol_closeElements(iter2);
2810 ucol_close(coll1);
2811 ucol_close(coll2);
2812 }
2813 }
2814
2815 static void TestExpansion(void) {
2816 const static char *testrules[] = {
2817 "&J << K / B & K << M",
2818 "&J << K / B << M"
2819 };
2820 const static UChar testdata[][3] = {
2821 {0x004A /*'J'*/, 0x0041 /*'A'*/, 0},
2822 {0x004D /*'M'*/, 0x0041 /*'A'*/, 0},
2823 {0x004B /*'K'*/, 0x0041 /*'A'*/, 0},
2824 {0x004B /*'K'*/, 0x0043 /*'C'*/, 0},
2825 {0x004A /*'J'*/, 0x0043 /*'C'*/, 0},
2826 {0x004D /*'M'*/, 0x0043 /*'C'*/, 0}
2827 };
2828
2829 UErrorCode status = U_ZERO_ERROR;
2830 UCollator *coll;
2831 UChar rule[256] = {0};
2832 uint32_t rlen = 0;
2833 int i;
2834
2835 for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
2836 int j = 0;
2837 log_verbose("Rule %s for testing\n", testrules[i]);
2838 rlen = u_unescape(testrules[i], rule, 32);
2839 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2840 if (U_FAILURE(status)) {
2841 log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
2842 return;
2843 }
2844
2845 for (j = 0; j < 5; j ++) {
2846 doTest(coll, testdata[j], testdata[j + 1], UCOL_LESS);
2847 }
2848 ucol_close(coll);
2849 }
2850 }
2851
2852 #if 0
2853 /* this test tests the current limitations of the engine */
2854 /* it always fail, so it is disabled by default */
2855 static void TestLimitations(void) {
2856 /* recursive expansions */
2857 {
2858 static const char *rule = "&a=b/c&d=c/e";
2859 static const char *tlimit01[] = {"add","b","adf"};
2860 static const char *tlimit02[] = {"aa","b","af"};
2861 log_verbose("recursive expansions\n");
2862 genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
2863 genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
2864 }
2865 /* contractions spanning expansions */
2866 {
2867 static const char *rule = "&a<<<c/e&g<<<eh";
2868 static const char *tlimit01[] = {"ad","c","af","f","ch","h"};
2869 static const char *tlimit02[] = {"ad","c","ch","af","f","h"};
2870 log_verbose("contractions spanning expansions\n");
2871 genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
2872 genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
2873 }
2874 /* normalization: nulls in contractions */
2875 {
2876 static const char *rule = "&a<<<\\u0000\\u0302";
2877 static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
2878 static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
2879 static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
2880 static const UColAttributeValue valOn[] = { UCOL_ON };
2881 static const UColAttributeValue valOff[] = { UCOL_OFF };
2882
2883 log_verbose("NULL in contractions\n");
2884 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
2885 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
2886 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
2887 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
2888
2889 }
2890 /* normalization: contractions spanning normalization */
2891 {
2892 static const char *rule = "&a<<<\\u0000\\u0302";
2893 static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
2894 static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
2895 static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
2896 static const UColAttributeValue valOn[] = { UCOL_ON };
2897 static const UColAttributeValue valOff[] = { UCOL_OFF };
2898
2899 log_verbose("contractions spanning normalization\n");
2900 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
2901 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
2902 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
2903 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
2904
2905 }
2906 /* variable top: */
2907 {
2908 /*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/
2909 static const char *rule = "&\\u2010<x<[variable top]=z";
2910 /*static const char *rule3 = "&' '<x<[variable top]=z";*/
2911 static const char *tlimit01[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" };
2912 static const char *tlimit02[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"};
2913 static const char *tlimit03[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" };
2914 static const UColAttribute att[] = { UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH };
2915 static const UColAttributeValue valOn[] = { UCOL_SHIFTED, UCOL_QUATERNARY };
2916 static const UColAttributeValue valOff[] = { UCOL_NON_IGNORABLE, UCOL_TERTIARY };
2917
2918 log_verbose("variable top\n");
2919 genericRulesStarterWithOptions(rule, tlimit03, sizeof(tlimit03)/sizeof(tlimit03[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2920 genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2921 genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2922 genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));
2923 genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));
2924
2925 }
2926 /* case level */
2927 {
2928 static const char *rule = "&c<ch<<<cH<<<Ch<<<CH";
2929 static const char *tlimit01[] = {"c","CH","Ch","cH","ch"};
2930 static const char *tlimit02[] = {"c","CH","cH","Ch","ch"};
2931 static const UColAttribute att[] = { UCOL_CASE_FIRST};
2932 static const UColAttributeValue valOn[] = { UCOL_UPPER_FIRST};
2933 /*static const UColAttributeValue valOff[] = { UCOL_OFF};*/
2934 log_verbose("case level\n");
2935 genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2936 genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2937 /*genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
2938 /*genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
2939 }
2940
2941 }
2942 #endif
2943
2944 static void TestBocsuCoverage(void) {
2945 UErrorCode status = U_ZERO_ERROR;
2946 const char *testString = "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u0041";
2947 UChar test[256] = {0};
2948 uint32_t tlen = u_unescape(testString, test, 32);
2949 uint8_t key[256] = {0};
2950 uint32_t klen = 0;
2951
2952 UCollator *coll = ucol_open("", &status);
2953 if(U_SUCCESS(status)) {
2954 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
2955
2956 klen = ucol_getSortKey(coll, test, tlen, key, 256);
2957
2958 ucol_close(coll);
2959 } else {
2960 log_data_err("Couldn't open UCA\n");
2961 }
2962 }
2963
2964 static void TestVariableTopSetting(void) {
2965 UErrorCode status = U_ZERO_ERROR;
2966 const UChar *current = NULL;
2967 uint32_t varTopOriginal = 0, varTop1, varTop2;
2968 UCollator *coll = ucol_open("", &status);
2969 if(U_SUCCESS(status)) {
2970
2971 uint32_t strength = 0;
2972 uint16_t specs = 0;
2973 uint32_t chOffset = 0;
2974 uint32_t chLen = 0;
2975 uint32_t exOffset = 0;
2976 uint32_t exLen = 0;
2977 uint32_t oldChOffset = 0;
2978 uint32_t oldChLen = 0;
2979 uint32_t oldExOffset = 0;
2980 uint32_t oldExLen = 0;
2981 uint32_t prefixOffset = 0;
2982 uint32_t prefixLen = 0;
2983
2984 UBool startOfRules = TRUE;
2985 UColTokenParser src;
2986 UColOptionSet opts;
2987
2988 UChar *rulesCopy = NULL;
2989 uint32_t rulesLen;
2990
2991 UCollationResult result;
2992
2993 UChar first[256] = { 0 };
2994 UChar second[256] = { 0 };
2995 UParseError parseError;
2996 int32_t myQ = getTestOption(QUICK_OPTION);
2997
2998 uprv_memset(&src, 0, sizeof(UColTokenParser));
2999
3000 src.opts = &opts;
3001
3002 if(getTestOption(QUICK_OPTION) <= 0) {
3003 setTestOption(QUICK_OPTION, 1);
3004 }
3005
3006 /* this test will fail when normalization is turned on */
3007 /* therefore we always turn off exhaustive mode for it */
3008 { /* QUICK > 0*/
3009 log_verbose("Slide variable top over UCARules\n");
3010 rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, 0);
3011 rulesCopy = (UChar *)uprv_malloc((rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
3012 rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE);
3013
3014 if(U_SUCCESS(status) && rulesLen > 0) {
3015 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
3016 src.current = src.source = rulesCopy;
3017 src.end = rulesCopy+rulesLen;
3018 src.extraCurrent = src.end;
3019 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
3020
3021 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
3022 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
3023 while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,&status)) != NULL) {
3024 strength = src.parsedToken.strength;
3025 chOffset = src.parsedToken.charsOffset;
3026 chLen = src.parsedToken.charsLen;
3027 exOffset = src.parsedToken.extensionOffset;
3028 exLen = src.parsedToken.extensionLen;
3029 prefixOffset = src.parsedToken.prefixOffset;
3030 prefixLen = src.parsedToken.prefixLen;
3031 specs = src.parsedToken.flags;
3032
3033 startOfRules = FALSE;
3034 {
3035 log_verbose("%04X %d ", *(src.source+chOffset), chLen);
3036 }
3037 if(strength == UCOL_PRIMARY) {
3038 status = U_ZERO_ERROR;
3039 varTopOriginal = ucol_getVariableTop(coll, &status);
3040 varTop1 = ucol_setVariableTop(coll, src.source+oldChOffset, oldChLen, &status);
3041 if(U_FAILURE(status)) {
3042 char buffer[256];
3043 char *buf = buffer;
3044 uint32_t i = 0, j;
3045 uint32_t CE = UCOL_NO_MORE_CES;
3046
3047 /* before we start screaming, let's see if there is a problem with the rules */
3048 UErrorCode collIterateStatus = U_ZERO_ERROR;
3049 collIterate *s = uprv_new_collIterate(&collIterateStatus);
3050 uprv_init_collIterate(coll, src.source+oldChOffset, oldChLen, s, &collIterateStatus);
3051
3052 CE = ucol_getNextCE(coll, s, &status);
3053
3054 for(i = 0; i < oldChLen; i++) {
3055 j = sprintf(buf, "%04X ", *(src.source+oldChOffset+i));
3056 buf += j;
3057 }
3058 if(status == U_PRIMARY_TOO_LONG_ERROR) {
3059 log_verbose("= Expected failure for %s =", buffer);
3060 } else {
3061 if(uprv_collIterateAtEnd(s)) {
3062 log_err("Unexpected failure setting variable top at offset %d. Error %s. Codepoints: %s\n",
3063 oldChOffset, u_errorName(status), buffer);
3064 } else {
3065 log_verbose("There is a goofy contraction in UCA rules that does not appear in the fractional UCA. Codepoints: %s\n",
3066 buffer);
3067 }
3068 }
3069 uprv_delete_collIterate(s);
3070 }
3071 varTop2 = ucol_getVariableTop(coll, &status);
3072 if((varTop1 & 0xFFFF0000) != (varTop2 & 0xFFFF0000)) {
3073 log_err("cannot retrieve set varTop value!\n");
3074 continue;
3075 }
3076
3077 if((varTop1 & 0xFFFF0000) > 0 && oldExLen == 0) {
3078
3079 u_strncpy(first, src.source+oldChOffset, oldChLen);
3080 u_strncpy(first+oldChLen, src.source+chOffset, chLen);
3081 u_strncpy(first+oldChLen+chLen, src.source+oldChOffset, oldChLen);
3082 first[2*oldChLen+chLen] = 0;
3083
3084 if(oldExLen == 0) {
3085 u_strncpy(second, src.source+chOffset, chLen);
3086 second[chLen] = 0;
3087 } else { /* This is skipped momentarily, but should work once UCARules are fully UCA conformant */
3088 u_strncpy(second, src.source+oldExOffset, oldExLen);
3089 u_strncpy(second+oldChLen, src.source+chOffset, chLen);
3090 u_strncpy(second+oldChLen+chLen, src.source+oldExOffset, oldExLen);
3091 second[2*oldExLen+chLen] = 0;
3092 }
3093 result = ucol_strcoll(coll, first, -1, second, -1);
3094 if(result == UCOL_EQUAL) {
3095 doTest(coll, first, second, UCOL_EQUAL);
3096 } else {
3097 log_verbose("Suspicious strcoll result for %04X and %04X\n", *(src.source+oldChOffset), *(src.source+chOffset));
3098 }
3099 }
3100 }
3101 if(strength != UCOL_TOK_RESET) {
3102 oldChOffset = chOffset;
3103 oldChLen = chLen;
3104 oldExOffset = exOffset;
3105 oldExLen = exLen;
3106 }
3107 }
3108 status = U_ZERO_ERROR;
3109 }
3110 else {
3111 log_err("Unexpected failure getting rules %s\n", u_errorName(status));
3112 return;
3113 }
3114 if (U_FAILURE(status)) {
3115 log_err("Error parsing rules %s\n", u_errorName(status));
3116 return;
3117 }
3118 status = U_ZERO_ERROR;
3119 }
3120
3121 setTestOption(QUICK_OPTION, myQ);
3122
3123 log_verbose("Testing setting variable top to contractions\n");
3124 {
3125 UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->contractionUCACombos);
3126 int32_t maxUCAContractionLength = coll->image->contractionUCACombosWidth;
3127 while(*conts != 0) {
3128 /*
3129 * A continuation is NUL-terminated and NUL-padded
3130 * except if it has the maximum length.
3131 */
3132 int32_t contractionLength = maxUCAContractionLength;
3133 while(contractionLength > 0 && conts[contractionLength - 1] == 0) {
3134 --contractionLength;
3135 }
3136 if(*(conts+1)==0) { /* pre-context */
3137 varTop1 = ucol_setVariableTop(coll, conts, 1, &status);
3138 } else {
3139 varTop1 = ucol_setVariableTop(coll, conts, contractionLength, &status);
3140 }
3141 if(U_FAILURE(status)) {
3142 if(status == U_PRIMARY_TOO_LONG_ERROR) {
3143 /* ucol_setVariableTop() is documented to not accept 3-byte primaries,
3144 * therefore it is not an error when it complains about them. */
3145 log_verbose("Couldn't set variable top to a contraction %04X %04X %04X - U_PRIMARY_TOO_LONG_ERROR\n",
3146 *conts, *(conts+1), *(conts+2));
3147 } else {
3148 log_err("Couldn't set variable top to a contraction %04X %04X %04X - %s\n",
3149 *conts, *(conts+1), *(conts+2), u_errorName(status));
3150 }
3151 status = U_ZERO_ERROR;
3152 }
3153 conts+=maxUCAContractionLength;
3154 }
3155
3156 status = U_ZERO_ERROR;
3157
3158 first[0] = 0x0040;
3159 first[1] = 0x0050;
3160 first[2] = 0x0000;
3161
3162 ucol_setVariableTop(coll, first, -1, &status);
3163
3164 if(U_SUCCESS(status)) {
3165 log_err("Invalid contraction succeded in setting variable top!\n");
3166 }
3167
3168 }
3169
3170 log_verbose("Test restoring variable top\n");
3171
3172 status = U_ZERO_ERROR;
3173 ucol_restoreVariableTop(coll, varTopOriginal, &status);
3174 if(varTopOriginal != ucol_getVariableTop(coll, &status)) {
3175 log_err("Couldn't restore old variable top\n");
3176 }
3177
3178 log_verbose("Testing calling with error set\n");
3179
3180 status = U_INTERNAL_PROGRAM_ERROR;
3181 varTop1 = ucol_setVariableTop(coll, first, 1, &status);
3182 varTop2 = ucol_getVariableTop(coll, &status);
3183 ucol_restoreVariableTop(coll, varTop2, &status);
3184 varTop1 = ucol_setVariableTop(NULL, first, 1, &status);
3185 varTop2 = ucol_getVariableTop(NULL, &status);
3186 ucol_restoreVariableTop(NULL, varTop2, &status);
3187 if(status != U_INTERNAL_PROGRAM_ERROR) {
3188 log_err("Bad reaction to passed error!\n");
3189 }
3190 uprv_free(src.source);
3191 ucol_close(coll);
3192 } else {
3193 log_data_err("Couldn't open UCA collator\n");
3194 }
3195
3196 }
3197
3198 static void TestNonChars(void) {
3199 static const char *test[] = {
3200 "\\u0000", /* ignorable */
3201 "\\uFFFE", /* special merge-sort character with minimum non-ignorable weights */
3202 "\\uFDD0", "\\uFDEF",
3203 "\\U0001FFFE", "\\U0001FFFF", /* UCA 6.0: noncharacters are treated like unassigned, */
3204 "\\U0002FFFE", "\\U0002FFFF", /* not like ignorable. */
3205 "\\U0003FFFE", "\\U0003FFFF",
3206 "\\U0004FFFE", "\\U0004FFFF",
3207 "\\U0005FFFE", "\\U0005FFFF",
3208 "\\U0006FFFE", "\\U0006FFFF",
3209 "\\U0007FFFE", "\\U0007FFFF",
3210 "\\U0008FFFE", "\\U0008FFFF",
3211 "\\U0009FFFE", "\\U0009FFFF",
3212 "\\U000AFFFE", "\\U000AFFFF",
3213 "\\U000BFFFE", "\\U000BFFFF",
3214 "\\U000CFFFE", "\\U000CFFFF",
3215 "\\U000DFFFE", "\\U000DFFFF",
3216 "\\U000EFFFE", "\\U000EFFFF",
3217 "\\U000FFFFE", "\\U000FFFFF",
3218 "\\U0010FFFE", "\\U0010FFFF",
3219 "\\uFFFF" /* special character with maximum primary weight */
3220 };
3221 UErrorCode status = U_ZERO_ERROR;
3222 UCollator *coll = ucol_open("en_US", &status);
3223
3224 log_verbose("Test non characters\n");
3225
3226 if(U_SUCCESS(status)) {
3227 genericOrderingTestWithResult(coll, test, 35, UCOL_LESS);
3228 } else {
3229 log_err_status(status, "Unable to open collator\n");
3230 }
3231
3232 ucol_close(coll);
3233 }
3234
3235 static void TestExtremeCompression(void) {
3236 static char *test[4];
3237 int32_t j = 0, i = 0;
3238
3239 for(i = 0; i<4; i++) {
3240 test[i] = (char *)malloc(2048*sizeof(char));
3241 }
3242
3243 for(j = 20; j < 500; j++) {
3244 for(i = 0; i<4; i++) {
3245 uprv_memset(test[i], 'a', (j-1)*sizeof(char));
3246 test[i][j-1] = (char)('a'+i);
3247 test[i][j] = 0;
3248 }
3249 genericLocaleStarter("en_US", (const char **)test, 4);
3250 }
3251
3252
3253 for(i = 0; i<4; i++) {
3254 free(test[i]);
3255 }
3256 }
3257
3258 #if 0
3259 static void TestExtremeCompression(void) {
3260 static char *test[4];
3261 int32_t j = 0, i = 0;
3262 UErrorCode status = U_ZERO_ERROR;
3263 UCollator *coll = ucol_open("en_US", status);
3264 for(i = 0; i<4; i++) {
3265 test[i] = (char *)malloc(2048*sizeof(char));
3266 }
3267 for(j = 10; j < 2048; j++) {
3268 for(i = 0; i<4; i++) {
3269 uprv_memset(test[i], 'a', (j-2)*sizeof(char));
3270 test[i][j-1] = (char)('a'+i);
3271 test[i][j] = 0;
3272 }
3273 }
3274 genericLocaleStarter("en_US", (const char **)test, 4);
3275
3276 for(j = 10; j < 2048; j++) {
3277 for(i = 0; i<1; i++) {
3278 uprv_memset(test[i], 'a', (j-1)*sizeof(char));
3279 test[i][j] = 0;
3280 }
3281 }
3282 for(i = 0; i<4; i++) {
3283 free(test[i]);
3284 }
3285 }
3286 #endif
3287
3288 static void TestSurrogates(void) {
3289 static const char *test[] = {
3290 "z","\\ud900\\udc25", "\\ud805\\udc50",
3291 "\\ud800\\udc00y", "\\ud800\\udc00r",
3292 "\\ud800\\udc00f", "\\ud800\\udc00",
3293 "\\ud800\\udc00c", "\\ud800\\udc00b",
3294 "\\ud800\\udc00fa", "\\ud800\\udc00fb",
3295 "\\ud800\\udc00a",
3296 "c", "b"
3297 };
3298
3299 static const char *rule =
3300 "&z < \\ud900\\udc25 < \\ud805\\udc50"
3301 "< \\ud800\\udc00y < \\ud800\\udc00r"
3302 "< \\ud800\\udc00f << \\ud800\\udc00"
3303 "< \\ud800\\udc00fa << \\ud800\\udc00fb"
3304 "< \\ud800\\udc00a < c < b" ;
3305
3306 genericRulesStarter(rule, test, 14);
3307 }
3308
3309 /* This is a test for prefix implementation, used by JIS X 4061 collation rules */
3310 static void TestPrefix(void) {
3311 uint32_t i;
3312
3313 static const struct {
3314 const char *rules;
3315 const char *data[50];
3316 const uint32_t len;
3317 } tests[] = {
3318 { "&z <<< z|a",
3319 {"zz", "za"}, 2 },
3320
3321 { "&z <<< z| a",
3322 {"zz", "za"}, 2 },
3323 { "[strength I]"
3324 "&a=\\ud900\\udc25"
3325 "&z<<<\\ud900\\udc25|a",
3326 {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 },
3327 };
3328
3329
3330 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3331 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3332 }
3333 }
3334
3335 /* This test uses data suplied by Masashiko Maedera to test the implementation */
3336 /* JIS X 4061 collation order implementation */
3337 static void TestNewJapanese(void) {
3338
3339 static const char * const test1[] = {
3340 "\\u30b7\\u30e3\\u30fc\\u30ec",
3341 "\\u30b7\\u30e3\\u30a4",
3342 "\\u30b7\\u30e4\\u30a3",
3343 "\\u30b7\\u30e3\\u30ec",
3344 "\\u3061\\u3087\\u3053",
3345 "\\u3061\\u3088\\u3053",
3346 "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8",
3347 "\\u3066\\u30fc\\u305f",
3348 "\\u30c6\\u30fc\\u30bf",
3349 "\\u30c6\\u30a7\\u30bf",
3350 "\\u3066\\u3048\\u305f",
3351 "\\u3067\\u30fc\\u305f",
3352 "\\u30c7\\u30fc\\u30bf",
3353 "\\u30c7\\u30a7\\u30bf",
3354 "\\u3067\\u3048\\u305f",
3355 "\\u3066\\u30fc\\u305f\\u30fc",
3356 "\\u30c6\\u30fc\\u30bf\\u30a1",
3357 "\\u30c6\\u30a7\\u30bf\\u30fc",
3358 "\\u3066\\u3047\\u305f\\u3041",
3359 "\\u3066\\u3048\\u305f\\u30fc",
3360 "\\u3067\\u30fc\\u305f\\u30fc",
3361 "\\u30c7\\u30fc\\u30bf\\u30a1",
3362 "\\u3067\\u30a7\\u305f\\u30a1",
3363 "\\u30c7\\u3047\\u30bf\\u3041",
3364 "\\u30c7\\u30a8\\u30bf\\u30a2",
3365 "\\u3072\\u3086",
3366 "\\u3073\\u3085\\u3042",
3367 "\\u3074\\u3085\\u3042",
3368 "\\u3073\\u3085\\u3042\\u30fc",
3369 "\\u30d3\\u30e5\\u30a2\\u30fc",
3370 "\\u3074\\u3085\\u3042\\u30fc",
3371 "\\u30d4\\u30e5\\u30a2\\u30fc",
3372 "\\u30d2\\u30e5\\u30a6",
3373 "\\u30d2\\u30e6\\u30a6",
3374 "\\u30d4\\u30e5\\u30a6\\u30a2",
3375 "\\u3073\\u3085\\u30fc\\u3042\\u30fc",
3376 "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc",
3377 "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc",
3378 "\\u3072\\u3085\\u3093",
3379 "\\u3074\\u3085\\u3093",
3380 "\\u3075\\u30fc\\u308a",
3381 "\\u30d5\\u30fc\\u30ea",
3382 "\\u3075\\u3045\\u308a",
3383 "\\u3075\\u30a5\\u308a",
3384 "\\u3075\\u30a5\\u30ea",
3385 "\\u30d5\\u30a6\\u30ea",
3386 "\\u3076\\u30fc\\u308a",
3387 "\\u30d6\\u30fc\\u30ea",
3388 "\\u3076\\u3045\\u308a",
3389 "\\u30d6\\u30a5\\u308a",
3390 "\\u3077\\u3046\\u308a",
3391 "\\u30d7\\u30a6\\u30ea",
3392 "\\u3075\\u30fc\\u308a\\u30fc",
3393 "\\u30d5\\u30a5\\u30ea\\u30fc",
3394 "\\u3075\\u30a5\\u308a\\u30a3",
3395 "\\u30d5\\u3045\\u308a\\u3043",
3396 "\\u30d5\\u30a6\\u30ea\\u30fc",
3397 "\\u3075\\u3046\\u308a\\u3043",
3398 "\\u30d6\\u30a6\\u30ea\\u30a4",
3399 "\\u3077\\u30fc\\u308a\\u30fc",
3400 "\\u3077\\u30a5\\u308a\\u30a4",
3401 "\\u3077\\u3046\\u308a\\u30fc",
3402 "\\u30d7\\u30a6\\u30ea\\u30a4",
3403 "\\u30d5\\u30fd",
3404 "\\u3075\\u309e",
3405 "\\u3076\\u309d",
3406 "\\u3076\\u3075",
3407 "\\u3076\\u30d5",
3408 "\\u30d6\\u3075",
3409 "\\u30d6\\u30d5",
3410 "\\u3076\\u309e",
3411 "\\u3076\\u3077",
3412 "\\u30d6\\u3077",
3413 "\\u3077\\u309d",
3414 "\\u30d7\\u30fd",
3415 "\\u3077\\u3075",
3416 };
3417
3418 static const char *test2[] = {
3419 "\\u306f\\u309d", /* H\\u309d */
3420 "\\u30cf\\u30fd", /* K\\u30fd */
3421 "\\u306f\\u306f", /* HH */
3422 "\\u306f\\u30cf", /* HK */
3423 "\\u30cf\\u30cf", /* KK */
3424 "\\u306f\\u309e", /* H\\u309e */
3425 "\\u30cf\\u30fe", /* K\\u30fe */
3426 "\\u306f\\u3070", /* HH\\u309b */
3427 "\\u30cf\\u30d0", /* KK\\u309b */
3428 "\\u306f\\u3071", /* HH\\u309c */
3429 "\\u30cf\\u3071", /* KH\\u309c */
3430 "\\u30cf\\u30d1", /* KK\\u309c */
3431 "\\u3070\\u309d", /* H\\u309b\\u309d */
3432 "\\u30d0\\u30fd", /* K\\u309b\\u30fd */
3433 "\\u3070\\u306f", /* H\\u309bH */
3434 "\\u30d0\\u30cf", /* K\\u309bK */
3435 "\\u3070\\u309e", /* H\\u309b\\u309e */
3436 "\\u30d0\\u30fe", /* K\\u309b\\u30fe */
3437 "\\u3070\\u3070", /* H\\u309bH\\u309b */
3438 "\\u30d0\\u3070", /* K\\u309bH\\u309b */
3439 "\\u30d0\\u30d0", /* K\\u309bK\\u309b */
3440 "\\u3070\\u3071", /* H\\u309bH\\u309c */
3441 "\\u30d0\\u30d1", /* K\\u309bK\\u309c */
3442 "\\u3071\\u309d", /* H\\u309c\\u309d */
3443 "\\u30d1\\u30fd", /* K\\u309c\\u30fd */
3444 "\\u3071\\u306f", /* H\\u309cH */
3445 "\\u30d1\\u30cf", /* K\\u309cK */
3446 "\\u3071\\u3070", /* H\\u309cH\\u309b */
3447 "\\u3071\\u30d0", /* H\\u309cK\\u309b */
3448 "\\u30d1\\u30d0", /* K\\u309cK\\u309b */
3449 "\\u3071\\u3071", /* H\\u309cH\\u309c */
3450 "\\u30d1\\u30d1", /* K\\u309cK\\u309c */
3451 };
3452 /*
3453 static const char *test3[] = {
3454 "\\u221er\\u221e",
3455 "\\u221eR#",
3456 "\\u221et\\u221e",
3457 "#r\\u221e",
3458 "#R#",
3459 "#t%",
3460 "#T%",
3461 "8t\\u221e",
3462 "8T\\u221e",
3463 "8t#",
3464 "8T#",
3465 "8t%",
3466 "8T%",
3467 "8t8",
3468 "8T8",
3469 "\\u03c9r\\u221e",
3470 "\\u03a9R%",
3471 "rr\\u221e",
3472 "rR\\u221e",
3473 "Rr\\u221e",
3474 "RR\\u221e",
3475 "RT%",
3476 "rt8",
3477 "tr\\u221e",
3478 "tr8",
3479 "TR8",
3480 "tt8",
3481 "\\u30b7\\u30e3\\u30fc\\u30ec",
3482 };
3483 */
3484 static const UColAttribute att[] = { UCOL_STRENGTH };
3485 static const UColAttributeValue val[] = { UCOL_QUATERNARY };
3486
3487 static const UColAttribute attShifted[] = { UCOL_STRENGTH, UCOL_ALTERNATE_HANDLING};
3488 static const UColAttributeValue valShifted[] = { UCOL_QUATERNARY, UCOL_SHIFTED };
3489
3490 genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), att, val, 1);
3491 genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), att, val, 1);
3492 /*genericLocaleStarter("ja", test3, sizeof(test3)/sizeof(test3[0]));*/
3493 genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), attShifted, valShifted, 2);
3494 genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), attShifted, valShifted, 2);
3495 }
3496
3497 static void TestStrCollIdenticalPrefix(void) {
3498 const char* rule = "&\\ud9b0\\udc70=\\ud9b0\\udc71";
3499 const char* test[] = {
3500 "ab\\ud9b0\\udc70",
3501 "ab\\ud9b0\\udc71"
3502 };
3503 genericRulesStarterWithResult(rule, test, sizeof(test)/sizeof(test[0]), UCOL_EQUAL);
3504 }
3505 /* Contractions should have all their canonically equivalent */
3506 /* strings included */
3507 static void TestContractionClosure(void) {
3508 static const struct {
3509 const char *rules;
3510 const char *data[10];
3511 const uint32_t len;
3512 } tests[] = {
3513 { "&b=\\u00e4\\u00e4",
3514 { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\\u00e4" }, 5},
3515 { "&b=\\u00C5",
3516 { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4},
3517 };
3518 uint32_t i;
3519
3520
3521 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3522 genericRulesStarterWithResult(tests[i].rules, tests[i].data, tests[i].len, UCOL_EQUAL);
3523 }
3524 }
3525
3526 /* This tests also fails*/
3527 static void TestBeforePrefixFailure(void) {
3528 static const struct {
3529 const char *rules;
3530 const char *data[10];
3531 const uint32_t len;
3532 } tests[] = {
3533 { "&g <<< a"
3534 "&[before 3]\\uff41 <<< x",
3535 {"x", "\\uff41"}, 2 },
3536 { "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3537 "&\\u30A8=\\u30A8=\\u3048=\\uff74"
3538 "&[before 3]\\u30a7<<<\\u30a9",
3539 {"\\u30a9", "\\u30a7"}, 2 },
3540 { "&[before 3]\\u30a7<<<\\u30a9"
3541 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3542 "&\\u30A8=\\u30A8=\\u3048=\\uff74",
3543 {"\\u30a9", "\\u30a7"}, 2 },
3544 };
3545 uint32_t i;
3546
3547
3548 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3549 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3550 }
3551
3552 #if 0
3553 const char* rule1 =
3554 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3555 "&\\u30A8=\\u30A8=\\u3048=\\uff74"
3556 "&[before 3]\\u30a7<<<\\u30c6|\\u30fc";
3557 const char* rule2 =
3558 "&[before 3]\\u30a7<<<\\u30c6|\\u30fc"
3559 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3560 "&\\u30A8=\\u30A8=\\u3048=\\uff74";
3561 const char* test[] = {
3562 "\\u30c6\\u30fc\\u30bf",
3563 "\\u30c6\\u30a7\\u30bf",
3564 };
3565 genericRulesStarter(rule1, test, sizeof(test)/sizeof(test[0]));
3566 genericRulesStarter(rule2, test, sizeof(test)/sizeof(test[0]));
3567 /* this piece of code should be in some sort of verbose mode */
3568 /* it gets the collation elements for elements and prints them */
3569 /* This is useful when trying to see whether the problem is */
3570 {
3571 UErrorCode status = U_ZERO_ERROR;
3572 uint32_t i = 0;
3573 UCollationElements *it = NULL;
3574 uint32_t CE;
3575 UChar string[256];
3576 uint32_t uStringLen;
3577 UCollator *coll = NULL;
3578
3579 uStringLen = u_unescape(rule1, string, 256);
3580
3581 coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
3582
3583 /*coll = ucol_open("ja_JP_JIS", &status);*/
3584 it = ucol_openElements(coll, string, 0, &status);
3585
3586 for(i = 0; i < sizeof(test)/sizeof(test[0]); i++) {
3587 log_verbose("%s\n", test[i]);
3588 uStringLen = u_unescape(test[i], string, 256);
3589 ucol_setText(it, string, uStringLen, &status);
3590
3591 while((CE=ucol_next(it, &status)) != UCOL_NULLORDER) {
3592 log_verbose("%08X\n", CE);
3593 }
3594 log_verbose("\n");
3595
3596 }
3597
3598 ucol_closeElements(it);
3599 ucol_close(coll);
3600 }
3601 #endif
3602 }
3603
3604 static void TestPrefixCompose(void) {
3605 const char* rule1 =
3606 "&\\u30a7<<<\\u30ab|\\u30fc=\\u30ac|\\u30fc";
3607 /*
3608 const char* test[] = {
3609 "\\u30c6\\u30fc\\u30bf",
3610 "\\u30c6\\u30a7\\u30bf",
3611 };
3612 */
3613 {
3614 UErrorCode status = U_ZERO_ERROR;
3615 /*uint32_t i = 0;*/
3616 /*UCollationElements *it = NULL;*/
3617 /* uint32_t CE;*/
3618 UChar string[256];
3619 uint32_t uStringLen;
3620 UCollator *coll = NULL;
3621
3622 uStringLen = u_unescape(rule1, string, 256);
3623
3624 coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
3625 ucol_close(coll);
3626 }
3627
3628
3629 }
3630
3631 /*
3632 [last variable] last variable value
3633 [last primary ignorable] largest CE for primary ignorable
3634 [last secondary ignorable] largest CE for secondary ignorable
3635 [last tertiary ignorable] largest CE for tertiary ignorable
3636 [top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8)
3637 */
3638
3639 static void TestRuleOptions(void) {
3640 /* values here are hardcoded and are correct for the current UCA
3641 * when the UCA changes, one might be forced to change these
3642 * values.
3643 */
3644
3645 /*
3646 * These strings contain the last character before [variable top]
3647 * and the first and second characters (by primary weights) after it.
3648 * See FractionalUCA.txt. For example:
3649 [last variable [0C FE, 05, 05]] # U+10A7F OLD SOUTH ARABIAN NUMERIC INDICATOR
3650 [variable top = 0C FE]
3651 [first regular [0D 0A, 05, 05]] # U+0060 GRAVE ACCENT
3652 and
3653 00B4; [0D 0C, 05, 05]
3654 *
3655 * Note: Starting with UCA 6.0, the [variable top] collation element
3656 * is not the weight of any character or string,
3657 * which means that LAST_VARIABLE_CHAR_STRING sorts before [last variable].
3658 */
3659 #define LAST_VARIABLE_CHAR_STRING "\\U00010A7F"
3660 #define FIRST_REGULAR_CHAR_STRING "\\u0060"
3661 #define SECOND_REGULAR_CHAR_STRING "\\u00B4"
3662
3663 /*
3664 * This string has to match the character that has the [last regular] weight
3665 * which changes with each UCA version.
3666 * See the bottom of FractionalUCA.txt which says something like
3667 [last regular [7A FE, 05, 05]] # U+1342E EGYPTIAN HIEROGLYPH AA032
3668 *
3669 * Note: Starting with UCA 6.0, the [last regular] collation element
3670 * is not the weight of any character or string,
3671 * which means that LAST_REGULAR_CHAR_STRING sorts before [last regular].
3672 */
3673 #define LAST_REGULAR_CHAR_STRING "\\U0001342E"
3674
3675 static const struct {
3676 const char *rules;
3677 const char *data[10];
3678 const uint32_t len;
3679 } tests[] = {
3680 /* - all befores here amount to zero */
3681 { "&[before 3][first tertiary ignorable]<<<a",
3682 { "\\u0000", "a"}, 2
3683 }, /* you cannot go before first tertiary ignorable */
3684
3685 { "&[before 3][last tertiary ignorable]<<<a",
3686 { "\\u0000", "a"}, 2
3687 }, /* you cannot go before last tertiary ignorable */
3688
3689 { "&[before 3][first secondary ignorable]<<<a",
3690 { "\\u0000", "a"}, 2
3691 }, /* you cannot go before first secondary ignorable */
3692
3693 { "&[before 3][last secondary ignorable]<<<a",
3694 { "\\u0000", "a"}, 2
3695 }, /* you cannot go before first secondary ignorable */
3696
3697 /* 'normal' befores */
3698
3699 { "&[before 3][first primary ignorable]<<<c<<<b &[first primary ignorable]<a",
3700 { "c", "b", "\\u0332", "a" }, 4
3701 },
3702
3703 /* we don't have a code point that corresponds to
3704 * the last primary ignorable
3705 */
3706 { "&[before 3][last primary ignorable]<<<c<<<b &[last primary ignorable]<a",
3707 { "\\u0332", "\\u20e3", "c", "b", "a" }, 5
3708 },
3709
3710 { "&[before 3][first variable]<<<c<<<b &[first variable]<a",
3711 { "c", "b", "\\u0009", "a", "\\u000a" }, 5
3712 },
3713
3714 { "&[last variable]<a &[before 3][last variable]<<<c<<<b ",
3715 { LAST_VARIABLE_CHAR_STRING, "c", "b", /* [last variable] */ "a", FIRST_REGULAR_CHAR_STRING }, 5
3716 },
3717
3718 { "&[first regular]<a"
3719 "&[before 1][first regular]<b",
3720 { "b", FIRST_REGULAR_CHAR_STRING, "a", SECOND_REGULAR_CHAR_STRING }, 4
3721 },
3722
3723 { "&[before 1][last regular]<b"
3724 "&[last regular]<a",
3725 { LAST_REGULAR_CHAR_STRING, "b", /* [last regular] */ "a", "\\u4e00" }, 4
3726 },
3727
3728 { "&[before 1][first implicit]<b"
3729 "&[first implicit]<a",
3730 { "b", "\\u4e00", "a", "\\u4e01"}, 4
3731 },
3732
3733 { "&[before 1][last implicit]<b"
3734 "&[last implicit]<a",
3735 { "b", "\\U0010FFFD", "a" }, 3
3736 },
3737
3738 { "&[last variable]<z"
3739 "&[last primary ignorable]<x"
3740 "&[last secondary ignorable]<<y"
3741 "&[last tertiary ignorable]<<<w"
3742 "&[top]<u",
3743 {"\\ufffb", "w", "y", "\\u20e3", "x", LAST_VARIABLE_CHAR_STRING, "z", "u"}, 7
3744 }
3745
3746 };
3747 uint32_t i;
3748
3749 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3750 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3751 }
3752 }
3753
3754
3755 static void TestOptimize(void) {
3756 /* this is not really a test - just trying out
3757 * whether copying of UCA contents will fail
3758 * Cannot really test, since the functionality
3759 * remains the same.
3760 */
3761 static const struct {
3762 const char *rules;
3763 const char *data[10];
3764 const uint32_t len;
3765 } tests[] = {
3766 /* - all befores here amount to zero */
3767 { "[optimize [\\uAC00-\\uD7FF]]",
3768 { "a", "b"}, 2}
3769 };
3770 uint32_t i;
3771
3772 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3773 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3774 }
3775 }
3776
3777 /*
3778 cycheng@ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator.
3779 weiv ucol_strcollIter?
3780 cycheng@ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021
3781 weiv these are the input strings?
3782 cycheng@ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on, we have s1 > s2
3783 weiv will check - could be a problem with utf-8 iterator
3784 cycheng@ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2
3785 weiv hmmm
3786 cycheng@ca.ibm.c... note that we have a standalone high surrogate
3787 weiv that doesn't sound right
3788 cycheng@ca.ibm.c... we got the same inconsistent results on AIX and Win2000
3789 weiv so you have two strings, you convert them to utf-8 and to utf-16BE
3790 cycheng@ca.ibm.c... yes
3791 weiv and then do the comparison
3792 cycheng@ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be
3793 weiv utf-16 strings look like a little endian ones in the example you sent me
3794 weiv It could be a bug - let me try to test it out
3795 cycheng@ca.ibm.c... ok
3796 cycheng@ca.ibm.c... we can wait till the conf. call
3797 cycheng@ca.ibm.c... next weke
3798 weiv that would be great
3799 weiv hmmm
3800 weiv I might be wrong
3801 weiv let me play with it some more
3802 cycheng@ca.ibm.c... ok
3803 cycheng@ca.ibm.c... also please check s3 = 0x0e3a0062 and s4 = 0x0e400021. both are in utf-16be
3804 cycheng@ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that's built for db2
3805 cycheng@ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be
3806 weiv ok
3807 cycheng@ca.ibm.c... i ask sherman to send you more inconsistent data
3808 weiv thanks
3809 cycheng@ca.ibm.c... the 4 strings we sent are just samples
3810 */
3811 #if 0
3812 static void Alexis(void) {
3813 UErrorCode status = U_ZERO_ERROR;
3814 UCollator *coll = ucol_open("", &status);
3815
3816
3817 const char utf16be[2][4] = {
3818 { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 },
3819 { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 }
3820 };
3821
3822 const char utf8[2][4] = {
3823 { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 },
3824 { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 },
3825 };
3826
3827 UCharIterator iterU161, iterU162;
3828 UCharIterator iterU81, iterU82;
3829
3830 UCollationResult resU16, resU8;
3831
3832 uiter_setUTF16BE(&iterU161, utf16be[0], 4);
3833 uiter_setUTF16BE(&iterU162, utf16be[1], 4);
3834
3835 uiter_setUTF8(&iterU81, utf8[0], 4);
3836 uiter_setUTF8(&iterU82, utf8[1], 4);
3837
3838 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3839
3840 resU16 = ucol_strcollIter(coll, &iterU161, &iterU162, &status);
3841 resU8 = ucol_strcollIter(coll, &iterU81, &iterU82, &status);
3842
3843
3844 if(resU16 != resU8) {
3845 log_err("different results\n");
3846 }
3847
3848 ucol_close(coll);
3849 }
3850 #endif
3851
3852 #define CMSCOLL_ALEXIS2_BUFFER_SIZE 256
3853 static void Alexis2(void) {
3854 UErrorCode status = U_ZERO_ERROR;
3855 UChar U16Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
3856 char U16BESource[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16BETarget[CMSCOLL_ALEXIS2_BUFFER_SIZE];
3857 char U8Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U8Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
3858 int32_t U16LenS = 0, U16LenT = 0, U16BELenS = 0, U16BELenT = 0, U8LenS = 0, U8LenT = 0;
3859
3860 UConverter *conv = NULL;
3861
3862 UCharIterator U16BEItS, U16BEItT;
3863 UCharIterator U8ItS, U8ItT;
3864
3865 UCollationResult resU16, resU16BE, resU8;
3866
3867 static const char* const pairs[][2] = {
3868 { "\\ud800\\u0021", "\\uFFFC\\u0062"},
3869 { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" },
3870 { "\\u0E40\\u0021", "\\u00A1\\u0021"},
3871 { "\\u0E40\\u0021", "\\uFE57\\u0062"},
3872 { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"},
3873 { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"},
3874 { "\\u0020", "\\u0020\\u0000"}
3875 /*
3876 5F20 (my result here)
3877 5F204E008E3F
3878 5F20 (your result here)
3879 */
3880 };
3881
3882 int32_t i = 0;
3883
3884 UCollator *coll = ucol_open("", &status);
3885 if(status == U_FILE_ACCESS_ERROR) {
3886 log_data_err("Is your data around?\n");
3887 return;
3888 } else if(U_FAILURE(status)) {
3889 log_err("Error opening collator\n");
3890 return;
3891 }
3892 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3893 conv = ucnv_open("UTF16BE", &status);
3894 for(i = 0; i < sizeof(pairs)/sizeof(pairs[0]); i++) {
3895 U16LenS = u_unescape(pairs[i][0], U16Source, CMSCOLL_ALEXIS2_BUFFER_SIZE);
3896 U16LenT = u_unescape(pairs[i][1], U16Target, CMSCOLL_ALEXIS2_BUFFER_SIZE);
3897
3898 resU16 = ucol_strcoll(coll, U16Source, U16LenS, U16Target, U16LenT);
3899
3900 log_verbose("Result of strcoll is %i\n", resU16);
3901
3902 U16BELenS = ucnv_fromUChars(conv, U16BESource, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Source, U16LenS, &status);
3903 U16BELenT = ucnv_fromUChars(conv, U16BETarget, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Target, U16LenT, &status);
3904
3905 /* use the original sizes, as the result from converter is in bytes */
3906 uiter_setUTF16BE(&U16BEItS, U16BESource, U16LenS);
3907 uiter_setUTF16BE(&U16BEItT, U16BETarget, U16LenT);
3908
3909 resU16BE = ucol_strcollIter(coll, &U16BEItS, &U16BEItT, &status);
3910
3911 log_verbose("Result of U16BE is %i\n", resU16BE);
3912
3913 if(resU16 != resU16BE) {
3914 log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", pairs[i][0], pairs[i][1]);
3915 }
3916
3917 u_strToUTF8(U8Source, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenS, U16Source, U16LenS, &status);
3918 u_strToUTF8(U8Target, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenT, U16Target, U16LenT, &status);
3919
3920 uiter_setUTF8(&U8ItS, U8Source, U8LenS);
3921 uiter_setUTF8(&U8ItT, U8Target, U8LenT);
3922
3923 resU8 = ucol_strcollIter(coll, &U8ItS, &U8ItT, &status);
3924
3925 if(resU16 != resU8) {
3926 log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pairs[i][0], pairs[i][1]);
3927 }
3928
3929 }
3930
3931 ucol_close(coll);
3932 ucnv_close(conv);
3933 }
3934
3935 static void TestHebrewUCA(void) {
3936 UErrorCode status = U_ZERO_ERROR;
3937 static const char *first[] = {
3938 "d790d6b8d79cd795d6bcd7a9",
3939 "d790d79cd79ed7a7d799d799d7a1",
3940 "d790d6b4d79ed795d6bcd7a9",
3941 };
3942
3943 char utf8String[3][256];
3944 UChar utf16String[3][256];
3945
3946 int32_t i = 0, j = 0;
3947 int32_t sizeUTF8[3];
3948 int32_t sizeUTF16[3];
3949
3950 UCollator *coll = ucol_open("", &status);
3951 if (U_FAILURE(status)) {
3952 log_err_status(status, "Could not open UCA collation %s\n", u_errorName(status));
3953 return;
3954 }
3955 /*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/
3956
3957 for(i = 0; i < sizeof(first)/sizeof(first[0]); i++) {
3958 sizeUTF8[i] = u_parseUTF8(first[i], -1, utf8String[i], 256, &status);
3959 u_strFromUTF8(utf16String[i], 256, &sizeUTF16[i], utf8String[i], sizeUTF8[i], &status);
3960 log_verbose("%i: ");
3961 for(j = 0; j < sizeUTF16[i]; j++) {
3962 /*log_verbose("\\u%04X", utf16String[i][j]);*/
3963 log_verbose("%04X", utf16String[i][j]);
3964 }
3965 log_verbose("\n");
3966 }
3967 for(i = 0; i < sizeof(first)/sizeof(first[0])-1; i++) {
3968 for(j = i + 1; j < sizeof(first)/sizeof(first[0]); j++) {
3969 doTest(coll, utf16String[i], utf16String[j], UCOL_LESS);
3970 }
3971 }
3972
3973 ucol_close(coll);
3974
3975 }
3976
3977 static void TestPartialSortKeyTermination(void) {
3978 static const char* cases[] = {
3979 "\\u1234\\u1234\\udc00",
3980 "\\udc00\\ud800\\ud800"
3981 };
3982
3983 int32_t i = sizeof(UCollator);
3984
3985 UErrorCode status = U_ZERO_ERROR;
3986
3987 UCollator *coll = ucol_open("", &status);
3988
3989 UCharIterator iter;
3990
3991 UChar currCase[256];
3992 int32_t length = 0;
3993 int32_t pKeyLen = 0;
3994
3995 uint8_t key[256];
3996
3997 for(i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) {
3998 uint32_t state[2] = {0, 0};
3999 length = u_unescape(cases[i], currCase, 256);
4000 uiter_setString(&iter, currCase, length);
4001 pKeyLen = ucol_nextSortKeyPart(coll, &iter, state, key, 256, &status);
4002
4003 log_verbose("Done\n");
4004
4005 }
4006 ucol_close(coll);
4007 }
4008
4009 static void TestSettings(void) {
4010 static const char* cases[] = {
4011 "apple",
4012 "Apple"
4013 };
4014
4015 static const char* locales[] = {
4016 "",
4017 "en"
4018 };
4019
4020 UErrorCode status = U_ZERO_ERROR;
4021
4022 int32_t i = 0, j = 0;
4023
4024 UChar source[256], target[256];
4025 int32_t sLen = 0, tLen = 0;
4026
4027 UCollator *collateObject = NULL;
4028 for(i = 0; i < sizeof(locales)/sizeof(locales[0]); i++) {
4029 collateObject = ucol_open(locales[i], &status);
4030 ucol_setStrength(collateObject, UCOL_PRIMARY);
4031 ucol_setAttribute(collateObject, UCOL_CASE_LEVEL , UCOL_OFF, &status);
4032 for(j = 1; j < sizeof(cases)/sizeof(cases[0]); j++) {
4033 sLen = u_unescape(cases[j-1], source, 256);
4034 source[sLen] = 0;
4035 tLen = u_unescape(cases[j], target, 256);
4036 source[tLen] = 0;
4037 doTest(collateObject, source, target, UCOL_EQUAL);
4038 }
4039 ucol_close(collateObject);
4040 }
4041 }
4042
4043 static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCollator *target) {
4044 UErrorCode status = U_ZERO_ERROR;
4045 int32_t errorNo = 0;
4046 /*const UChar *sourceRules = NULL;*/
4047 /*int32_t sourceRulesLen = 0;*/
4048 UColAttributeValue french = UCOL_OFF;
4049 int32_t cloneSize = 0;
4050
4051 if(!ucol_equals(source, target)) {
4052 log_err("Same collators, different address not equal\n");
4053 errorNo++;
4054 }
4055 ucol_close(target);
4056 if(uprv_strcmp(ucol_getLocaleByType(source, ULOC_REQUESTED_LOCALE, &status), ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &status)) == 0) {
4057 /* currently, safeClone is implemented through getRules/openRules
4058 * so it is the same as the test below - I will comment that test out.
4059 */
4060 /* real thing */
4061 target = ucol_safeClone(source, NULL, &cloneSize, &status);
4062 if(U_FAILURE(status)) {
4063 log_err("Error creating clone\n");
4064 errorNo++;
4065 return errorNo;
4066 }
4067 if(!ucol_equals(source, target)) {
4068 log_err("Collator different from it's clone\n");
4069 errorNo++;
4070 }
4071 french = ucol_getAttribute(source, UCOL_FRENCH_COLLATION, &status);
4072 if(french == UCOL_ON) {
4073 ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);
4074 } else {
4075 ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
4076 }
4077 if(U_FAILURE(status)) {
4078 log_err("Error setting attributes\n");
4079 errorNo++;
4080 return errorNo;
4081 }
4082 if(ucol_equals(source, target)) {
4083 log_err("Collators same even when options changed\n");
4084 errorNo++;
4085 }
4086 ucol_close(target);
4087 /* commented out since safeClone uses exactly the same technique */
4088 /*
4089 sourceRules = ucol_getRules(source, &sourceRulesLen);
4090 target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
4091 if(U_FAILURE(status)) {
4092 log_err("Error instantiating target from rules\n");
4093 errorNo++;
4094 return errorNo;
4095 }
4096 if(!ucol_equals(source, target)) {
4097 log_err("Collator different from collator that was created from the same rules\n");
4098 errorNo++;
4099 }
4100 ucol_close(target);
4101 */
4102 }
4103 return errorNo;
4104 }
4105
4106
4107 static void TestEquals(void) {
4108 /* ucol_equals is not currently a public API. There is a chance that it will become
4109 * something like this, but currently it is only used by RuleBasedCollator::operator==
4110 */
4111 /* test whether the two collators instantiated from the same locale are equal */
4112 UErrorCode status = U_ZERO_ERROR;
4113 UParseError parseError;
4114 int32_t noOfLoc = uloc_countAvailable();
4115 const char *locName = NULL;
4116 UCollator *source = NULL, *target = NULL;
4117 int32_t i = 0;
4118
4119 const char* rules[] = {
4120 "&l < lj <<< Lj <<< LJ",
4121 "&n < nj <<< Nj <<< NJ",
4122 "&ae <<< \\u00e4",
4123 "&AE <<< \\u00c4"
4124 };
4125 /*
4126 const char* badRules[] = {
4127 "&l <<< Lj",
4128 "&n < nj <<< nJ <<< NJ",
4129 "&a <<< \\u00e4",
4130 "&AE <<< \\u00c4 <<< x"
4131 };
4132 */
4133
4134 UChar sourceRules[1024], targetRules[1024];
4135 int32_t sourceRulesSize = 0, targetRulesSize = 0;
4136 int32_t rulesSize = sizeof(rules)/sizeof(rules[0]);
4137
4138 for(i = 0; i < rulesSize; i++) {
4139 sourceRulesSize += u_unescape(rules[i], sourceRules+sourceRulesSize, 1024 - sourceRulesSize);
4140 targetRulesSize += u_unescape(rules[rulesSize-i-1], targetRules+targetRulesSize, 1024 - targetRulesSize);
4141 }
4142
4143 source = ucol_openRules(sourceRules, sourceRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
4144 if(status == U_FILE_ACCESS_ERROR) {
4145 log_data_err("Is your data around?\n");
4146 return;
4147 } else if(U_FAILURE(status)) {
4148 log_err("Error opening collator\n");
4149 return;
4150 }
4151 target = ucol_openRules(targetRules, targetRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
4152 if(!ucol_equals(source, target)) {
4153 log_err("Equivalent collators not equal!\n");
4154 }
4155 ucol_close(source);
4156 ucol_close(target);
4157
4158 source = ucol_open("root", &status);
4159 target = ucol_open("root", &status);
4160 log_verbose("Testing root\n");
4161 if(!ucol_equals(source, source)) {
4162 log_err("Same collator not equal\n");
4163 }
4164 if(TestEqualsForCollator(locName, source, target)) {
4165 log_err("Errors for root\n", locName);
4166 }
4167 ucol_close(source);
4168
4169 for(i = 0; i<noOfLoc; i++) {
4170 status = U_ZERO_ERROR;
4171 locName = uloc_getAvailable(i);
4172 /*if(hasCollationElements(locName)) {*/
4173 log_verbose("Testing equality for locale %s\n", locName);
4174 source = ucol_open(locName, &status);
4175 target = ucol_open(locName, &status);
4176 if (U_FAILURE(status)) {
4177 log_err("Error opening collator for locale %s %s\n", locName, u_errorName(status));
4178 continue;
4179 }
4180 if(TestEqualsForCollator(locName, source, target)) {
4181 log_err("Errors for locale %s\n", locName);
4182 }
4183 ucol_close(source);
4184 /*}*/
4185 }
4186 }
4187
4188 static void TestJ2726(void) {
4189 UChar a[2] = { 0x61, 0x00 }; /*"a"*/
4190 UChar aSpace[3] = { 0x61, 0x20, 0x00 }; /*"a "*/
4191 UChar spaceA[3] = { 0x20, 0x61, 0x00 }; /*" a"*/
4192 UErrorCode status = U_ZERO_ERROR;
4193 UCollator *coll = ucol_open("en", &status);
4194 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
4195 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
4196 doTest(coll, a, aSpace, UCOL_EQUAL);
4197 doTest(coll, aSpace, a, UCOL_EQUAL);
4198 doTest(coll, a, spaceA, UCOL_EQUAL);
4199 doTest(coll, spaceA, a, UCOL_EQUAL);
4200 doTest(coll, spaceA, aSpace, UCOL_EQUAL);
4201 doTest(coll, aSpace, spaceA, UCOL_EQUAL);
4202 ucol_close(coll);
4203 }
4204
4205 static void NullRule(void) {
4206 UChar r[3] = {0};
4207 UErrorCode status = U_ZERO_ERROR;
4208 UCollator *coll = ucol_openRules(r, 1, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
4209 if(U_SUCCESS(status)) {
4210 log_err("This should have been an error!\n");
4211 ucol_close(coll);
4212 } else {
4213 status = U_ZERO_ERROR;
4214 }
4215 coll = ucol_openRules(r, 0, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
4216 if(U_FAILURE(status)) {
4217 log_err_status(status, "Empty rules should have produced a valid collator -> %s\n", u_errorName(status));
4218 } else {
4219 ucol_close(coll);
4220 }
4221 }
4222
4223 /**
4224 * Test for CollationElementIterator previous and next for the whole set of
4225 * unicode characters with normalization on.
4226 */
4227 static void TestNumericCollation(void)
4228 {
4229 UErrorCode status = U_ZERO_ERROR;
4230
4231 const static char *basicTestStrings[]={
4232 "hello1",
4233 "hello2",
4234 "hello2002",
4235 "hello2003",
4236 "hello123456",
4237 "hello1234567",
4238 "hello10000000",
4239 "hello100000000",
4240 "hello1000000000",
4241 "hello10000000000",
4242 };
4243
4244 const static char *preZeroTestStrings[]={
4245 "avery10000",
4246 "avery010000",
4247 "avery0010000",
4248 "avery00010000",
4249 "avery000010000",
4250 "avery0000010000",
4251 "avery00000010000",
4252 "avery000000010000",
4253 };
4254
4255 const static char *thirtyTwoBitNumericStrings[]={
4256 "avery42949672960",
4257 "avery42949672961",
4258 "avery42949672962",
4259 "avery429496729610"
4260 };
4261
4262 const static char *longNumericStrings[]={
4263 /* Some of these sort out of the order that would expected if digits-as-numbers handled arbitrarily-long digit strings.
4264 In fact, a single collation element can represent a maximum of 254 digits as a number. Digit strings longer than that
4265 are treated as multiple collation elements. */
4266 "num9234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123z", /*253digits, num + 9.23E252 + z */
4267 "num10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*254digits, num + 1.00E253 */
4268 "num100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*255digits, num + 1.00E253 + 0, out of numeric order but expected */
4269 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 1.23E253 */
4270 "num123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345", /*255digits, num + 1.23E253 + 5 */
4271 "num1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456", /*256digits, num + 1.23E253 + 56 */
4272 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567", /*257digits, num + 1.23E253 + 567 */
4273 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 1.23E253 + a, out of numeric order but expected */
4274 "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 9.23E253, out of numeric order but expected */
4275 "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 9.23E253 + a, out of numeric order but expected */
4276 };
4277
4278 const static char *supplementaryDigits[] = {
4279 "\\uD835\\uDFCE", /* 0 */
4280 "\\uD835\\uDFCF", /* 1 */
4281 "\\uD835\\uDFD0", /* 2 */
4282 "\\uD835\\uDFD1", /* 3 */
4283 "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */
4284 "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */
4285 "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */
4286 "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */
4287 "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */
4288 "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */
4289 };
4290
4291 const static char *foreignDigits[] = {
4292 "\\u0661",
4293 "\\u0662",
4294 "\\u0663",
4295 "\\u0661\\u0660",
4296 "\\u0661\\u0662",
4297 "\\u0661\\u0663",
4298 "\\u0662\\u0660",
4299 "\\u0662\\u0662",
4300 "\\u0662\\u0663",
4301 "\\u0663\\u0660",
4302 "\\u0663\\u0662",
4303 "\\u0663\\u0663"
4304 };
4305
4306 const static char *evenZeroes[] = {
4307 "2000",
4308 "2001",
4309 "2002",
4310 "2003"
4311 };
4312
4313 UColAttribute att = UCOL_NUMERIC_COLLATION;
4314 UColAttributeValue val = UCOL_ON;
4315
4316 /* Open our collator. */
4317 UCollator* coll = ucol_open("root", &status);
4318 if (U_FAILURE(status)){
4319 log_err_status(status, "ERROR: in using ucol_open() -> %s\n",
4320 myErrorName(status));
4321 return;
4322 }
4323 genericLocaleStarterWithOptions("root", basicTestStrings, sizeof(basicTestStrings)/sizeof(basicTestStrings[0]), &att, &val, 1);
4324 genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings, sizeof(thirtyTwoBitNumericStrings)/sizeof(thirtyTwoBitNumericStrings[0]), &att, &val, 1);
4325 genericLocaleStarterWithOptions("root", longNumericStrings, sizeof(longNumericStrings)/sizeof(longNumericStrings[0]), &att, &val, 1);
4326 genericLocaleStarterWithOptions("en_US", foreignDigits, sizeof(foreignDigits)/sizeof(foreignDigits[0]), &att, &val, 1);
4327 genericLocaleStarterWithOptions("root", supplementaryDigits, sizeof(supplementaryDigits)/sizeof(supplementaryDigits[0]), &att, &val, 1);
4328 genericLocaleStarterWithOptions("root", evenZeroes, sizeof(evenZeroes)/sizeof(evenZeroes[0]), &att, &val, 1);
4329
4330 /* Setting up our collator to do digits. */
4331 ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
4332 if (U_FAILURE(status)){
4333 log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n",
4334 myErrorName(status));
4335 return;
4336 }
4337
4338 /*
4339 Testing that prepended zeroes still yield the correct collation behavior.
4340 We expect that every element in our strings array will be equal.
4341 */
4342 genericOrderingTestWithResult(coll, preZeroTestStrings, sizeof(preZeroTestStrings)/sizeof(preZeroTestStrings[0]), UCOL_EQUAL);
4343
4344 ucol_close(coll);
4345 }
4346
4347 static void TestTibetanConformance(void)
4348 {
4349 const char* test[] = {
4350 "\\u0FB2\\u0591\\u0F71\\u0061",
4351 "\\u0FB2\\u0F71\\u0061"
4352 };
4353
4354 UErrorCode status = U_ZERO_ERROR;
4355 UCollator *coll = ucol_open("", &status);
4356 UChar source[100];
4357 UChar target[100];
4358 int result;
4359 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4360 if (U_SUCCESS(status)) {
4361 u_unescape(test[0], source, 100);
4362 u_unescape(test[1], target, 100);
4363 doTest(coll, source, target, UCOL_EQUAL);
4364 result = ucol_strcoll(coll, source, -1, target, -1);
4365 log_verbose("result %d\n", result);
4366 if (UCOL_EQUAL != result) {
4367 log_err("Tibetan comparison error\n");
4368 }
4369 }
4370 ucol_close(coll);
4371
4372 genericLocaleStarterWithResult("", test, 2, UCOL_EQUAL);
4373 }
4374
4375 static void TestPinyinProblem(void) {
4376 static const char *test[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B50" };
4377 genericLocaleStarter("zh__PINYIN", test, sizeof(test)/sizeof(test[0]));
4378 }
4379
4380 #define TST_UCOL_MAX_INPUT 0x220001
4381 #define topByte 0xFF000000;
4382 #define bottomByte 0xFF;
4383 #define fourBytes 0xFFFFFFFF;
4384
4385
4386 static void showImplicit(UChar32 i) {
4387 if (i >= 0 && i <= TST_UCOL_MAX_INPUT) {
4388 log_verbose("%08X\t%08X\n", i, uprv_uca_getImplicitFromRaw(i));
4389 }
4390 }
4391
4392 static void TestImplicitGeneration(void) {
4393 UErrorCode status = U_ZERO_ERROR;
4394 UChar32 last = 0;
4395 UChar32 current;
4396 UChar32 i = 0, j = 0;
4397 UChar32 roundtrip = 0;
4398 UChar32 lastBottom = 0;
4399 UChar32 currentBottom = 0;
4400 UChar32 lastTop = 0;
4401 UChar32 currentTop = 0;
4402
4403 UCollator *coll = ucol_open("root", &status);
4404 if(U_FAILURE(status)) {
4405 log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
4406 return;
4407 }
4408
4409 uprv_uca_getRawFromImplicit(0xE20303E7);
4410
4411 for (i = 0; i <= TST_UCOL_MAX_INPUT; ++i) {
4412 current = uprv_uca_getImplicitFromRaw(i) & fourBytes;
4413
4414 /* check that it round-trips AND that all intervening ones are illegal*/
4415 roundtrip = uprv_uca_getRawFromImplicit(current);
4416 if (roundtrip != i) {
4417 log_err("No roundtrip %08X\n", i);
4418 }
4419 if (last != 0) {
4420 for (j = last + 1; j < current; ++j) {
4421 roundtrip = uprv_uca_getRawFromImplicit(j);
4422 /* raise an error if it *doesn't* find an error*/
4423 if (roundtrip != -1) {
4424 log_err("Fails to recognize illegal %08X\n", j);
4425 }
4426 }
4427 }
4428 /* now do other consistency checks*/
4429 lastBottom = last & bottomByte;
4430 currentBottom = current & bottomByte;
4431 lastTop = last & topByte;
4432 currentTop = current & topByte;
4433
4434 /* print out some values for spot-checking*/
4435 if (lastTop != currentTop || i == 0x10000 || i == 0x110000) {
4436 showImplicit(i-3);
4437 showImplicit(i-2);
4438 showImplicit(i-1);
4439 showImplicit(i);
4440 showImplicit(i+1);
4441 showImplicit(i+2);
4442 }
4443 last = current;
4444
4445 if(uprv_uca_getCodePointFromRaw(uprv_uca_getRawFromCodePoint(i)) != i) {
4446 log_err("No raw <-> code point roundtrip for 0x%08X\n", i);
4447 }
4448 }
4449 showImplicit(TST_UCOL_MAX_INPUT-2);
4450 showImplicit(TST_UCOL_MAX_INPUT-1);
4451 showImplicit(TST_UCOL_MAX_INPUT);
4452 ucol_close(coll);
4453 }
4454
4455 /**
4456 * Iterate through the given iterator, checking to see that all the strings
4457 * in the expected array are present.
4458 * @param expected array of strings we expect to see, or NULL
4459 * @param expectedCount number of elements of expected, or 0
4460 */
4461 static int32_t checkUEnumeration(const char* msg,
4462 UEnumeration* iter,
4463 const char** expected,
4464 int32_t expectedCount) {
4465 UErrorCode ec = U_ZERO_ERROR;
4466 int32_t i = 0, n, j, bit;
4467 int32_t seenMask = 0;
4468
4469 U_ASSERT(expectedCount >= 0 && expectedCount < 31); /* [sic] 31 not 32 */
4470 n = uenum_count(iter, &ec);
4471 if (!assertSuccess("count", &ec)) return -1;
4472 log_verbose("%s = [", msg);
4473 for (;; ++i) {
4474 const char* s = uenum_next(iter, NULL, &ec);
4475 if (!assertSuccess("snext", &ec) || s == NULL) break;
4476 if (i != 0) log_verbose(",");
4477 log_verbose("%s", s);
4478 /* check expected list */
4479 for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
4480 if ((seenMask&bit) == 0 &&
4481 uprv_strcmp(s, expected[j]) == 0) {
4482 seenMask |= bit;
4483 break;
4484 }
4485 }
4486 }
4487 log_verbose("] (%d)\n", i);
4488 assertTrue("count verified", i==n);
4489 /* did we see all expected strings? */
4490 for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
4491 if ((seenMask&bit)!=0) {
4492 log_verbose("Ok: \"%s\" seen\n", expected[j]);
4493 } else {
4494 log_err("FAIL: \"%s\" not seen\n", expected[j]);
4495 }
4496 }
4497 return n;
4498 }
4499
4500 /**
4501 * Test new API added for separate collation tree.
4502 */
4503 static void TestSeparateTrees(void) {
4504 UErrorCode ec = U_ZERO_ERROR;
4505 UEnumeration *e = NULL;
4506 int32_t n = -1;
4507 UBool isAvailable;
4508 char loc[256];
4509
4510 static const char* AVAIL[] = { "en", "de" };
4511
4512 static const char* KW[] = { "collation" };
4513
4514 static const char* KWVAL[] = { "phonebook", "stroke" };
4515
4516 #if !UCONFIG_NO_SERVICE
4517 e = ucol_openAvailableLocales(&ec);
4518 if (e != NULL) {
4519 assertSuccess("ucol_openAvailableLocales", &ec);
4520 assertTrue("ucol_openAvailableLocales!=0", e!=0);
4521 n = checkUEnumeration("ucol_openAvailableLocales", e, AVAIL, LEN(AVAIL));
4522 /* Don't need to check n because we check list */
4523 uenum_close(e);
4524 } else {
4525 log_data_err("Error calling ucol_openAvailableLocales() -> %s (Are you missing data?)\n", u_errorName(ec));
4526 }
4527 #endif
4528
4529 e = ucol_getKeywords(&ec);
4530 if (e != NULL) {
4531 assertSuccess("ucol_getKeywords", &ec);
4532 assertTrue("ucol_getKeywords!=0", e!=0);
4533 n = checkUEnumeration("ucol_getKeywords", e, KW, LEN(KW));
4534 /* Don't need to check n because we check list */
4535 uenum_close(e);
4536 } else {
4537 log_data_err("Error calling ucol_getKeywords() -> %s (Are you missing data?)\n", u_errorName(ec));
4538 }
4539
4540 e = ucol_getKeywordValues(KW[0], &ec);
4541 if (e != NULL) {
4542 assertSuccess("ucol_getKeywordValues", &ec);
4543 assertTrue("ucol_getKeywordValues!=0", e!=0);
4544 n = checkUEnumeration("ucol_getKeywordValues", e, KWVAL, LEN(KWVAL));
4545 /* Don't need to check n because we check list */
4546 uenum_close(e);
4547 } else {
4548 log_data_err("Error calling ucol_getKeywordValues() -> %s (Are you missing data?)\n", u_errorName(ec));
4549 }
4550
4551 /* Try setting a warning before calling ucol_getKeywordValues */
4552 ec = U_USING_FALLBACK_WARNING;
4553 e = ucol_getKeywordValues(KW[0], &ec);
4554 if (assertSuccess("ucol_getKeywordValues [with warning code set]", &ec)) {
4555 assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e!=0);
4556 n = checkUEnumeration("ucol_getKeywordValues [with warning code set]", e, KWVAL, LEN(KWVAL));
4557 /* Don't need to check n because we check list */
4558 uenum_close(e);
4559 }
4560
4561 /*
4562 U_DRAFT int32_t U_EXPORT2
4563 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
4564 const char* locale, UBool* isAvailable,
4565 UErrorCode* status);
4566 }
4567 */
4568 n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de",
4569 &isAvailable, &ec);
4570 if (assertSuccess("getFunctionalEquivalent", &ec)) {
4571 assertEquals("getFunctionalEquivalent(de)", "de", loc);
4572 assertTrue("getFunctionalEquivalent(de).isAvailable==TRUE",
4573 isAvailable == TRUE);
4574 }
4575
4576 n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de_DE",
4577 &isAvailable, &ec);
4578 if (assertSuccess("getFunctionalEquivalent", &ec)) {
4579 assertEquals("getFunctionalEquivalent(de_DE)", "de", loc);
4580 assertTrue("getFunctionalEquivalent(de_DE).isAvailable==TRUE",
4581 isAvailable == TRUE);
4582 }
4583 }
4584
4585 /* supercedes TestJ784 */
4586 static void TestBeforePinyin(void) {
4587 const static char rules[] = {
4588 "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<\\u00E0<<<\\u00C0"
4589 "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<\\u00E8<<<\\u00C8"
4590 "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<\\u00EC<<<\\u00CC"
4591 "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<\\u00F2<<<\\u00D2"
4592 "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<<\\u00F9<<<\\u00D9"
4593 "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<<\\u01DB<<\\u00FC"
4594 };
4595
4596 const static char *test[] = {
4597 "l\\u0101",
4598 "la",
4599 "l\\u0101n",
4600 "lan ",
4601 "l\\u0113",
4602 "le",
4603 "l\\u0113n",
4604 "len"
4605 };
4606
4607 const static char *test2[] = {
4608 "x\\u0101",
4609 "x\\u0100",
4610 "X\\u0101",
4611 "X\\u0100",
4612 "x\\u00E1",
4613 "x\\u00C1",
4614 "X\\u00E1",
4615 "X\\u00C1",
4616 "x\\u01CE",
4617 "x\\u01CD",
4618 "X\\u01CE",
4619 "X\\u01CD",
4620 "x\\u00E0",
4621 "x\\u00C0",
4622 "X\\u00E0",
4623 "X\\u00C0",
4624 "xa",
4625 "xA",
4626 "Xa",
4627 "XA",
4628 "x\\u0101x",
4629 "x\\u0100x",
4630 "x\\u00E1x",
4631 "x\\u00C1x",
4632 "x\\u01CEx",
4633 "x\\u01CDx",
4634 "x\\u00E0x",
4635 "x\\u00C0x",
4636 "xax",
4637 "xAx"
4638 };
4639
4640 genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));
4641 genericLocaleStarter("zh", test, sizeof(test)/sizeof(test[0]));
4642 genericRulesStarter(rules, test2, sizeof(test2)/sizeof(test2[0]));
4643 genericLocaleStarter("zh", test2, sizeof(test2)/sizeof(test2[0]));
4644 }
4645
4646 static void TestBeforeTightening(void) {
4647 static const struct {
4648 const char *rules;
4649 UErrorCode expectedStatus;
4650 } tests[] = {
4651 { "&[before 1]a<x", U_ZERO_ERROR },
4652 { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR },
4653 { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR },
4654 { "&[before 1]a=x", U_INVALID_FORMAT_ERROR },
4655 { "&[before 2]a<x",U_INVALID_FORMAT_ERROR },
4656 { "&[before 2]a<<x",U_ZERO_ERROR },
4657 { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR },
4658 { "&[before 2]a=x",U_INVALID_FORMAT_ERROR },
4659 { "&[before 3]a<x",U_INVALID_FORMAT_ERROR },
4660 { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR },
4661 { "&[before 3]a<<<x",U_ZERO_ERROR },
4662 { "&[before 3]a=x",U_INVALID_FORMAT_ERROR },
4663 { "&[before I]a = x",U_INVALID_FORMAT_ERROR }
4664 };
4665
4666 int32_t i = 0;
4667
4668 UErrorCode status = U_ZERO_ERROR;
4669 UChar rlz[RULE_BUFFER_LEN] = { 0 };
4670 uint32_t rlen = 0;
4671
4672 UCollator *coll = NULL;
4673
4674
4675 for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
4676 rlen = u_unescape(tests[i].rules, rlz, RULE_BUFFER_LEN);
4677 coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
4678 if(status != tests[i].expectedStatus) {
4679 log_err_status(status, "Opening a collator with rules %s returned error code %s, expected %s\n",
4680 tests[i].rules, u_errorName(status), u_errorName(tests[i].expectedStatus));
4681 }
4682 ucol_close(coll);
4683 status = U_ZERO_ERROR;
4684 }
4685
4686 }
4687
4688 /*
4689 &m < a
4690 &[before 1] a < x <<< X << q <<< Q < z
4691 assert: m <<< M < x <<< X << q <<< Q < z < a < n
4692
4693 &m < a
4694 &[before 2] a << x <<< X << q <<< Q < z
4695 assert: m <<< M < x <<< X << q <<< Q << a < z < n
4696
4697 &m < a
4698 &[before 3] a <<< x <<< X << q <<< Q < z
4699 assert: m <<< M < x <<< X <<< a << q <<< Q < z < n
4700
4701
4702 &m << a
4703 &[before 1] a < x <<< X << q <<< Q < z
4704 assert: x <<< X << q <<< Q < z < m <<< M << a < n
4705
4706 &m << a
4707 &[before 2] a << x <<< X << q <<< Q < z
4708 assert: m <<< M << x <<< X << q <<< Q << a < z < n
4709
4710 &m << a
4711 &[before 3] a <<< x <<< X << q <<< Q < z
4712 assert: m <<< M << x <<< X <<< a << q <<< Q < z < n
4713
4714
4715 &m <<< a
4716 &[before 1] a < x <<< X << q <<< Q < z
4717 assert: x <<< X << q <<< Q < z < n < m <<< a <<< M
4718
4719 &m <<< a
4720 &[before 2] a << x <<< X << q <<< Q < z
4721 assert: x <<< X << q <<< Q << m <<< a <<< M < z < n
4722
4723 &m <<< a
4724 &[before 3] a <<< x <<< X << q <<< Q < z
4725 assert: m <<< x <<< X <<< a <<< M << q <<< Q < z < n
4726
4727
4728 &[before 1] s < x <<< X << q <<< Q < z
4729 assert: r <<< R < x <<< X << q <<< Q < z < s < n
4730
4731 &[before 2] s << x <<< X << q <<< Q < z
4732 assert: r <<< R < x <<< X << q <<< Q << s < z < n
4733
4734 &[before 3] s <<< x <<< X << q <<< Q < z
4735 assert: r <<< R < x <<< X <<< s << q <<< Q < z < n
4736
4737
4738 &[before 1] \u24DC < x <<< X << q <<< Q < z
4739 assert: x <<< X << q <<< Q < z < n < m <<< \u24DC <<< M
4740
4741 &[before 2] \u24DC << x <<< X << q <<< Q < z
4742 assert: x <<< X << q <<< Q << m <<< \u24DC <<< M < z < n
4743
4744 &[before 3] \u24DC <<< x <<< X << q <<< Q < z
4745 assert: m <<< x <<< X <<< \u24DC <<< M << q <<< Q < z < n
4746 */
4747
4748
4749 #if 0
4750 /* requires features not yet supported */
4751 static void TestMoreBefore(void) {
4752 static const struct {
4753 const char* rules;
4754 const char* order[16];
4755 int32_t size;
4756 } tests[] = {
4757 { "&m < a &[before 1] a < x <<< X << q <<< Q < z",
4758 { "m","M","x","X","q","Q","z","a","n" }, 9},
4759 { "&m < a &[before 2] a << x <<< X << q <<< Q < z",
4760 { "m","M","x","X","q","Q","a","z","n" }, 9},
4761 { "&m < a &[before 3] a <<< x <<< X << q <<< Q < z",
4762 { "m","M","x","X","a","q","Q","z","n" }, 9},
4763 { "&m << a &[before 1] a < x <<< X << q <<< Q < z",
4764 { "x","X","q","Q","z","m","M","a","n" }, 9},
4765 { "&m << a &[before 2] a << x <<< X << q <<< Q < z",
4766 { "m","M","x","X","q","Q","a","z","n" }, 9},
4767 { "&m << a &[before 3] a <<< x <<< X << q <<< Q < z",
4768 { "m","M","x","X","a","q","Q","z","n" }, 9},
4769 { "&m <<< a &[before 1] a < x <<< X << q <<< Q < z",
4770 { "x","X","q","Q","z","n","m","a","M" }, 9},
4771 { "&m <<< a &[before 2] a << x <<< X << q <<< Q < z",
4772 { "x","X","q","Q","m","a","M","z","n" }, 9},
4773 { "&m <<< a &[before 3] a <<< x <<< X << q <<< Q < z",
4774 { "m","x","X","a","M","q","Q","z","n" }, 9},
4775 { "&[before 1] s < x <<< X << q <<< Q < z",
4776 { "r","R","x","X","q","Q","z","s","n" }, 9},
4777 { "&[before 2] s << x <<< X << q <<< Q < z",
4778 { "r","R","x","X","q","Q","s","z","n" }, 9},
4779 { "&[before 3] s <<< x <<< X << q <<< Q < z",
4780 { "r","R","x","X","s","q","Q","z","n" }, 9},
4781 { "&[before 1] \\u24DC < x <<< X << q <<< Q < z",
4782 { "x","X","q","Q","z","n","m","\\u24DC","M" }, 9},
4783 { "&[before 2] \\u24DC << x <<< X << q <<< Q < z",
4784 { "x","X","q","Q","m","\\u24DC","M","z","n" }, 9},
4785 { "&[before 3] \\u24DC <<< x <<< X << q <<< Q < z",
4786 { "m","x","X","\\u24DC","M","q","Q","z","n" }, 9}
4787 };
4788
4789 int32_t i = 0;
4790
4791 for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
4792 genericRulesStarter(tests[i].rules, tests[i].order, tests[i].size);
4793 }
4794 }
4795 #endif
4796
4797 static void TestTailorNULL( void ) {
4798 const static char* rule = "&a <<< '\\u0000'";
4799 UErrorCode status = U_ZERO_ERROR;
4800 UChar rlz[RULE_BUFFER_LEN] = { 0 };
4801 uint32_t rlen = 0;
4802 UChar a = 1, null = 0;
4803 UCollationResult res = UCOL_EQUAL;
4804
4805 UCollator *coll = NULL;
4806
4807
4808 rlen = u_unescape(rule, rlz, RULE_BUFFER_LEN);
4809 coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
4810
4811 if(U_FAILURE(status)) {
4812 log_err_status(status, "Could not open default collator! -> %s\n", u_errorName(status));
4813 } else {
4814 res = ucol_strcoll(coll, &a, 1, &null, 1);
4815
4816 if(res != UCOL_LESS) {
4817 log_err("NULL was not tailored properly!\n");
4818 }
4819 }
4820
4821 ucol_close(coll);
4822 }
4823
4824 static void
4825 TestUpperFirstQuaternary(void)
4826 {
4827 const char* tests[] = { "B", "b", "Bb", "bB" };
4828 UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_FIRST };
4829 UColAttributeValue attVals[] = { UCOL_QUATERNARY, UCOL_UPPER_FIRST };
4830 genericLocaleStarterWithOptions("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]));
4831 }
4832
4833 static void
4834 TestJ4960(void)
4835 {
4836 const char* tests[] = { "\\u00e2T", "aT" };
4837 UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_LEVEL };
4838 UColAttributeValue attVals[] = { UCOL_PRIMARY, UCOL_ON };
4839 const char* tests2[] = { "a", "A" };
4840 const char* rule = "&[first tertiary ignorable]=A=a";
4841 UColAttribute att2[] = { UCOL_CASE_LEVEL };
4842 UColAttributeValue attVals2[] = { UCOL_ON };
4843 /* Test whether we correctly ignore primary ignorables on case level when */
4844 /* we have only primary & case level */
4845 genericLocaleStarterWithOptionsAndResult("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]), UCOL_EQUAL);
4846 /* Test whether ICU4J will make case level for sortkeys that have primary strength */
4847 /* and case level */
4848 genericLocaleStarterWithOptions("root", tests2, sizeof(tests2)/sizeof(tests2[0]), att, attVals, sizeof(att)/sizeof(att[0]));
4849 /* Test whether completely ignorable letters have case level info (they shouldn't) */
4850 genericRulesStarterWithOptionsAndResult(rule, tests2, sizeof(tests2)/sizeof(tests2[0]), att2, attVals2, sizeof(att2)/sizeof(att2[0]), UCOL_EQUAL);
4851 }
4852
4853 static void
4854 TestJ5223(void)
4855 {
4856 static const char *test = "this is a test string";
4857 UChar ustr[256];
4858 int32_t ustr_length = u_unescape(test, ustr, 256);
4859 unsigned char sortkey[256];
4860 int32_t sortkey_length;
4861 UErrorCode status = U_ZERO_ERROR;
4862 static UCollator *coll = NULL;
4863 coll = ucol_open("root", &status);
4864 if(U_FAILURE(status)) {
4865 log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
4866 return;
4867 }
4868 ucol_setStrength(coll, UCOL_PRIMARY);
4869 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
4870 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4871 if (U_FAILURE(status)) {
4872 log_err("Failed setting atributes\n");
4873 return;
4874 }
4875 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, NULL, 0);
4876 if (sortkey_length > 256) return;
4877
4878 /* we mark the position where the null byte should be written in advance */
4879 sortkey[sortkey_length-1] = 0xAA;
4880
4881 /* we set the buffer size one byte higher than needed */
4882 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
4883 sortkey_length+1);
4884
4885 /* no error occurs (for me) */
4886 if (sortkey[sortkey_length-1] == 0xAA) {
4887 log_err("Hit bug at first try\n");
4888 }
4889
4890 /* we mark the position where the null byte should be written again */
4891 sortkey[sortkey_length-1] = 0xAA;
4892
4893 /* this time we set the buffer size to the exact amount needed */
4894 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
4895 sortkey_length);
4896
4897 /* now the trailing null byte is not written */
4898 if (sortkey[sortkey_length-1] == 0xAA) {
4899 log_err("Hit bug at second try\n");
4900 }
4901
4902 ucol_close(coll);
4903 }
4904
4905 /* Regression test for Thai partial sort key problem */
4906 static void
4907 TestJ5232(void)
4908 {
4909 const static char *test[] = {
4910 "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e47\\u0e21",
4911 "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e48\\u0e21"
4912 };
4913
4914 genericLocaleStarter("th", test, sizeof(test)/sizeof(test[0]));
4915 }
4916
4917 static void
4918 TestJ5367(void)
4919 {
4920 const static char *test[] = { "a", "y" };
4921 const char* rules = "&Ny << Y &[first secondary ignorable] <<< a";
4922 genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));
4923 }
4924
4925 static void
4926 TestVI5913(void)
4927 {
4928 UErrorCode status = U_ZERO_ERROR;
4929 int32_t i, j;
4930 UCollator *coll =NULL;
4931 uint8_t resColl[100], expColl[100];
4932 int32_t rLen, tLen, ruleLen, sLen, kLen;
4933 UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0}; /* &a<0x1FF3-omega with Ypogegrammeni*/
4934 UChar rule2[256]={0x26, 0x7a, 0x3c, 0x0161, 0}; /* &z<s with caron*/
4935 UChar rule3[256]={0x26, 0x7a, 0x3c, 0x0061, 0x00ea, 0}; /* &z<a+e with circumflex.*/
4936 static const UChar tData[][20]={
4937 {0x1EAC, 0},
4938 {0x0041, 0x0323, 0x0302, 0},
4939 {0x1EA0, 0x0302, 0},
4940 {0x00C2, 0x0323, 0},
4941 {0x1ED8, 0}, /* O with dot and circumflex */
4942 {0x1ECC, 0x0302, 0},
4943 {0x1EB7, 0},
4944 {0x1EA1, 0x0306, 0},
4945 };
4946 static const UChar tailorData[][20]={
4947 {0x1FA2, 0}, /* Omega with 3 combining marks */
4948 {0x03C9, 0x0313, 0x0300, 0x0345, 0},
4949 {0x1FF3, 0x0313, 0x0300, 0},
4950 {0x1F60, 0x0300, 0x0345, 0},
4951 {0x1F62, 0x0345, 0},
4952 {0x1FA0, 0x0300, 0},
4953 };
4954 static const UChar tailorData2[][20]={
4955 {0x1E63, 0x030C, 0}, /* s with dot below + caron */
4956 {0x0073, 0x0323, 0x030C, 0},
4957 {0x0073, 0x030C, 0x0323, 0},
4958 };
4959 static const UChar tailorData3[][20]={
4960 {0x007a, 0}, /* z */
4961 {0x0061, 0x0065, 0}, /* a + e */
4962 {0x0061, 0x00ea, 0}, /* a + e with circumflex */
4963 {0x0061, 0x1EC7, 0}, /* a+ e with dot below and circumflex */
4964 {0x0061, 0x1EB9, 0x0302, 0}, /* a + e with dot below + combining circumflex */
4965 {0x0061, 0x00EA, 0x0323, 0}, /* a + e with circumflex + combining dot below */
4966 {0x00EA, 0x0323, 0}, /* e with circumflex + combining dot below */
4967 {0x00EA, 0}, /* e with circumflex */
4968 };
4969
4970 /* Test Vietnamese sort. */
4971 coll = ucol_open("vi", &status);
4972 if(U_FAILURE(status)) {
4973 log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
4974 return;
4975 }
4976 log_verbose("\n\nVI collation:");
4977 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[2], u_strlen(tData[2])) ) {
4978 log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
4979 }
4980 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[3], u_strlen(tData[3])) ) {
4981 log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
4982 }
4983 if ( !ucol_equal(coll, tData[5], u_strlen(tData[5]), tData[4], u_strlen(tData[4])) ) {
4984 log_err("\\u1ED8 not equals to \\u1ECC+\\u0302\n");
4985 }
4986 if ( !ucol_equal(coll, tData[7], u_strlen(tData[7]), tData[6], u_strlen(tData[6])) ) {
4987 log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
4988 }
4989
4990 for (j=0; j<8; j++) {
4991 tLen = u_strlen(tData[j]);
4992 log_verbose("\n Data :%s \tlen: %d key: ", tData[j], tLen);
4993 rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
4994 for(i = 0; i<rLen; i++) {
4995 log_verbose(" %02X", resColl[i]);
4996 }
4997 }
4998
4999 ucol_close(coll);
5000
5001 /* Test Romanian sort. */
5002 coll = ucol_open("ro", &status);
5003 log_verbose("\n\nRO collation:");
5004 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[1], u_strlen(tData[1])) ) {
5005 log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
5006 }
5007 if ( !ucol_equal(coll, tData[4], u_strlen(tData[4]), tData[5], u_strlen(tData[5])) ) {
5008 log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
5009 }
5010 if ( !ucol_equal(coll, tData[6], u_strlen(tData[6]), tData[7], u_strlen(tData[7])) ) {
5011 log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
5012 }
5013
5014 for (j=4; j<8; j++) {
5015 tLen = u_strlen(tData[j]);
5016 log_verbose("\n Data :%s \tlen: %d key: ", tData[j], tLen);
5017 rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
5018 for(i = 0; i<rLen; i++) {
5019 log_verbose(" %02X", resColl[i]);
5020 }
5021 }
5022 ucol_close(coll);
5023
5024 /* Test the precomposed Greek character with 3 combining marks. */
5025 log_verbose("\n\nTailoring test: Greek character with 3 combining marks");
5026 ruleLen = u_strlen(rule);
5027 coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5028 if (U_FAILURE(status)) {
5029 log_err("ucol_openRules failed with %s\n", u_errorName(status));
5030 return;
5031 }
5032 sLen = u_strlen(tailorData[0]);
5033 for (j=1; j<6; j++) {
5034 tLen = u_strlen(tailorData[j]);
5035 if ( !ucol_equal(coll, tailorData[0], sLen, tailorData[j], tLen)) {
5036 log_err("\n \\u1FA2 not equals to data[%d]:%s\n", j, tailorData[j]);
5037 }
5038 }
5039 /* Test getSortKey. */
5040 tLen = u_strlen(tailorData[0]);
5041 kLen=ucol_getSortKey(coll, tailorData[0], tLen, expColl, 100);
5042 for (j=0; j<6; j++) {
5043 tLen = u_strlen(tailorData[j]);
5044 rLen = ucol_getSortKey(coll, tailorData[j], tLen, resColl, 100);
5045 if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
5046 log_err("\n Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen);
5047 for(i = 0; i<rLen; i++) {
5048 log_err(" %02X", resColl[i]);
5049 }
5050 }
5051 }
5052 ucol_close(coll);
5053
5054 log_verbose("\n\nTailoring test for s with caron:");
5055 ruleLen = u_strlen(rule2);
5056 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5057 tLen = u_strlen(tailorData2[0]);
5058 kLen=ucol_getSortKey(coll, tailorData2[0], tLen, expColl, 100);
5059 for (j=1; j<3; j++) {
5060 tLen = u_strlen(tailorData2[j]);
5061 rLen = ucol_getSortKey(coll, tailorData2[j], tLen, resColl, 100);
5062 if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
5063 log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen);
5064 for(i = 0; i<rLen; i++) {
5065 log_err(" %02X", resColl[i]);
5066 }
5067 }
5068 }
5069 ucol_close(coll);
5070
5071 log_verbose("\n\nTailoring test for &z< ae with circumflex:");
5072 ruleLen = u_strlen(rule3);
5073 coll = ucol_openRules(rule3, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5074 tLen = u_strlen(tailorData3[3]);
5075 kLen=ucol_getSortKey(coll, tailorData3[3], tLen, expColl, 100);
5076 for (j=4; j<6; j++) {
5077 tLen = u_strlen(tailorData3[j]);
5078 rLen = ucol_getSortKey(coll, tailorData3[j], tLen, resColl, 100);
5079
5080 if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
5081 log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen);
5082 for(i = 0; i<rLen; i++) {
5083 log_err(" %02X", resColl[i]);
5084 }
5085 }
5086
5087 log_verbose("\n Test Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen);
5088 for(i = 0; i<rLen; i++) {
5089 log_verbose(" %02X", resColl[i]);
5090 }
5091 }
5092 ucol_close(coll);
5093 }
5094
5095 static void
5096 TestTailor6179(void)
5097 {
5098 UErrorCode status = U_ZERO_ERROR;
5099 int32_t i;
5100 UCollator *coll =NULL;
5101 uint8_t resColl[100];
5102 int32_t rLen, tLen, ruleLen;
5103 /* &[last primary ignorable]<< a &[first primary ignorable]<<b */
5104 static const UChar rule1[]={
5105 0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,
5106 0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x20,0x61,0x20,
5107 0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,0x20,
5108 0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x62,0x20, 0};
5109 /* &[last secondary ignorable]<<< a &[first secondary ignorable]<<<b */
5110 static const UChar rule2[]={
5111 0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,0x64,0x61,
5112 0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x3C,
5113 0x61,0x20,0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,
5114 0x64,0x61,0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,
5115 0x3C,0x3C,0x20,0x62,0};
5116
5117 static const UChar tData1[][4]={
5118 {0x61, 0},
5119 {0x62, 0},
5120 { 0xFDD0,0x009E, 0}
5121 };
5122 static const UChar tData2[][4]={
5123 {0x61, 0},
5124 {0x62, 0},
5125 { 0xFDD0,0x009E, 0}
5126 };
5127
5128 /*
5129 * These values from FractionalUCA.txt will change,
5130 * and need to be updated here.
5131 */
5132 static const uint8_t firstPrimaryIgnCE[]={1, 0x88, 1, 5, 0};
5133 static const uint8_t lastPrimaryIgnCE[]={1, 0xE3, 1, 5, 0};
5134 static const uint8_t firstSecondaryIgnCE[]={1, 1, 0xbf, 0x04, 0};
5135 static const uint8_t lastSecondaryIgnCE[]={1, 1, 0xbf, 0x04, 0};
5136
5137 /* Test [Last Primary ignorable] */
5138
5139 log_verbose("Tailoring test: &[last primary ignorable]<<a &[first primary ignorable]<<b\n");
5140 ruleLen = u_strlen(rule1);
5141 coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5142 if (U_FAILURE(status)) {
5143 log_err_status(status, "Tailoring test: &[last primary ignorable] failed! -> %s\n", u_errorName(status));
5144 return;
5145 }
5146 tLen = u_strlen(tData1[0]);
5147 rLen = ucol_getSortKey(coll, tData1[0], tLen, resColl, 100);
5148 if (rLen != LEN(lastPrimaryIgnCE) || uprv_memcmp(resColl, lastPrimaryIgnCE, rLen) != 0) {
5149 log_err("Bad result for &[lpi]<<a...: Data[%d] :%s \tlen: %d key: ", 0, tData1[0], rLen);
5150 for(i = 0; i<rLen; i++) {
5151 log_err(" %02X", resColl[i]);
5152 }
5153 log_err("\n");
5154 }
5155 tLen = u_strlen(tData1[1]);
5156 rLen = ucol_getSortKey(coll, tData1[1], tLen, resColl, 100);
5157 if (rLen != LEN(firstPrimaryIgnCE) || uprv_memcmp(resColl, firstPrimaryIgnCE, rLen) != 0) {
5158 log_err("Bad result for &[lpi]<<a...: Data[%d] :%s \tlen: %d key: ", 1, tData1[1], rLen);
5159 for(i = 0; i<rLen; i++) {
5160 log_err(" %02X", resColl[i]);
5161 }
5162 log_err("\n");
5163 }
5164 ucol_close(coll);
5165
5166
5167 /* Test [Last Secondary ignorable] */
5168 log_verbose("Tailoring test: &[last secondary ignorable]<<<a &[first secondary ignorable]<<<b\n");
5169 ruleLen = u_strlen(rule1);
5170 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5171 if (U_FAILURE(status)) {
5172 log_err("Tailoring test: &[last secondary ignorable] failed! -> %s\n", u_errorName(status));
5173 return;
5174 }
5175 tLen = u_strlen(tData2[0]);
5176 rLen = ucol_getSortKey(coll, tData2[0], tLen, resColl, 100);
5177 if (rLen != LEN(lastSecondaryIgnCE) || uprv_memcmp(resColl, lastSecondaryIgnCE, rLen) != 0) {
5178 log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s \tlen: %d key: ", 0, tData2[0], rLen);
5179 for(i = 0; i<rLen; i++) {
5180 log_err(" %02X", resColl[i]);
5181 }
5182 log_err("\n");
5183 }
5184 if(isICUVersionAtLeast(52, 0, 1)) { /* TODO: debug & fix, see ticket #8982 */
5185 tLen = u_strlen(tData2[1]);
5186 rLen = ucol_getSortKey(coll, tData2[1], tLen, resColl, 100);
5187 if (rLen != LEN(firstSecondaryIgnCE) || uprv_memcmp(resColl, firstSecondaryIgnCE, rLen) != 0) {
5188 log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s \tlen: %d key: ", 1, tData2[1], rLen);
5189 for(i = 0; i<rLen; i++) {
5190 log_err(" %02X", resColl[i]);
5191 }
5192 log_err("\n");
5193 }
5194 }
5195 ucol_close(coll);
5196 }
5197
5198 static void
5199 TestUCAPrecontext(void)
5200 {
5201 UErrorCode status = U_ZERO_ERROR;
5202 int32_t i, j;
5203 UCollator *coll =NULL;
5204 uint8_t resColl[100], prevColl[100];
5205 int32_t rLen, tLen, ruleLen;
5206 UChar rule1[256]= {0x26, 0xb7, 0x3c, 0x61, 0}; /* & middle-dot < a */
5207 UChar rule2[256]= {0x26, 0x4C, 0xb7, 0x3c, 0x3c, 0x61, 0};
5208 /* & l middle-dot << a a is an expansion. */
5209
5210 UChar tData1[][20]={
5211 { 0xb7, 0}, /* standalone middle dot(0xb7) */
5212 { 0x387, 0}, /* standalone middle dot(0x387) */
5213 { 0x61, 0}, /* a */
5214 { 0x6C, 0}, /* l */
5215 { 0x4C, 0x0332, 0}, /* l with [first primary ignorable] */
5216 { 0x6C, 0xb7, 0}, /* l with middle dot(0xb7) */
5217 { 0x6C, 0x387, 0}, /* l with middle dot(0x387) */
5218 { 0x4C, 0xb7, 0}, /* L with middle dot(0xb7) */
5219 { 0x4C, 0x387, 0}, /* L with middle dot(0x387) */
5220 { 0x6C, 0x61, 0x387, 0}, /* la with middle dot(0x387) */
5221 { 0x4C, 0x61, 0xb7, 0}, /* La with middle dot(0xb7) */
5222 };
5223
5224 log_verbose("\n\nEN collation:");
5225 coll = ucol_open("en", &status);
5226 if (U_FAILURE(status)) {
5227 log_err_status(status, "Tailoring test: &z <<a|- failed! -> %s\n", u_errorName(status));
5228 return;
5229 }
5230 for (j=0; j<11; j++) {
5231 tLen = u_strlen(tData1[j]);
5232 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
5233 if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
5234 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5235 j, tData1[j]);
5236 }
5237 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen);
5238 for(i = 0; i<rLen; i++) {
5239 log_verbose(" %02X", resColl[i]);
5240 }
5241 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
5242 }
5243 ucol_close(coll);
5244
5245
5246 log_verbose("\n\nJA collation:");
5247 coll = ucol_open("ja", &status);
5248 if (U_FAILURE(status)) {
5249 log_err("Tailoring test: &z <<a|- failed!");
5250 return;
5251 }
5252 for (j=0; j<11; j++) {
5253 tLen = u_strlen(tData1[j]);
5254 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
5255 if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
5256 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5257 j, tData1[j]);
5258 }
5259 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen);
5260 for(i = 0; i<rLen; i++) {
5261 log_verbose(" %02X", resColl[i]);
5262 }
5263 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
5264 }
5265 ucol_close(coll);
5266
5267
5268 log_verbose("\n\nTailoring test: & middle dot < a ");
5269 ruleLen = u_strlen(rule1);
5270 coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5271 if (U_FAILURE(status)) {
5272 log_err("Tailoring test: & middle dot < a failed!");
5273 return;
5274 }
5275 for (j=0; j<11; j++) {
5276 tLen = u_strlen(tData1[j]);
5277 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
5278 if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
5279 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5280 j, tData1[j]);
5281 }
5282 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen);
5283 for(i = 0; i<rLen; i++) {
5284 log_verbose(" %02X", resColl[i]);
5285 }
5286 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
5287 }
5288 ucol_close(coll);
5289
5290
5291 log_verbose("\n\nTailoring test: & l middle-dot << a ");
5292 ruleLen = u_strlen(rule2);
5293 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5294 if (U_FAILURE(status)) {
5295 log_err("Tailoring test: & l middle-dot << a failed!");
5296 return;
5297 }
5298 for (j=0; j<11; j++) {
5299 tLen = u_strlen(tData1[j]);
5300 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
5301 if ((j>0) && (j!=3) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
5302 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5303 j, tData1[j]);
5304 }
5305 if ((j==3)&&(strcmp((char *)resColl, (char *)prevColl)>0)) {
5306 log_err("\n Expecting smaller key than previous test case: Data[%d] :%s.",
5307 j, tData1[j]);
5308 }
5309 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen);
5310 for(i = 0; i<rLen; i++) {
5311 log_verbose(" %02X", resColl[i]);
5312 }
5313 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
5314 }
5315 ucol_close(coll);
5316 }
5317
5318 static void
5319 TestOutOfBuffer5468(void)
5320 {
5321 static const char *test = "\\u4e00";
5322 UChar ustr[256];
5323 int32_t ustr_length = u_unescape(test, ustr, 256);
5324 unsigned char shortKeyBuf[1];
5325 int32_t sortkey_length;
5326 UErrorCode status = U_ZERO_ERROR;
5327 static UCollator *coll = NULL;
5328
5329 coll = ucol_open("root", &status);
5330 if(U_FAILURE(status)) {
5331 log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
5332 return;
5333 }
5334 ucol_setStrength(coll, UCOL_PRIMARY);
5335 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
5336 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
5337 if (U_FAILURE(status)) {
5338 log_err("Failed setting atributes\n");
5339 return;
5340 }
5341
5342 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, shortKeyBuf, sizeof(shortKeyBuf));
5343 if (sortkey_length != 4) {
5344 log_err("expecting length of sortKey is 4 got:%d ", sortkey_length);
5345 }
5346 log_verbose("length of sortKey is %d", sortkey_length);
5347 ucol_close(coll);
5348 }
5349
5350 #define TSKC_DATA_SIZE 5
5351 #define TSKC_BUF_SIZE 50
5352 static void
5353 TestSortKeyConsistency(void)
5354 {
5355 UErrorCode icuRC = U_ZERO_ERROR;
5356 UCollator* ucol;
5357 UChar data[] = { 0xFFFD, 0x0006, 0x0006, 0x0006, 0xFFFD};
5358
5359 uint8_t bufFull[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
5360 uint8_t bufPart[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
5361 int32_t i, j, i2;
5362
5363 ucol = ucol_openFromShortString("LEN_S4", FALSE, NULL, &icuRC);
5364 if (U_FAILURE(icuRC))
5365 {
5366 log_err_status(icuRC, "ucol_openFromShortString failed -> %s\n", u_errorName(icuRC));
5367 return;
5368 }
5369
5370 for (i = 0; i < TSKC_DATA_SIZE; i++)
5371 {
5372 UCharIterator uiter;
5373 uint32_t state[2] = { 0, 0 };
5374 int32_t dataLen = i+1;
5375 for (j=0; j<TSKC_BUF_SIZE; j++)
5376 bufFull[i][j] = bufPart[i][j] = 0;
5377
5378 /* Full sort key */
5379 ucol_getSortKey(ucol, data, dataLen, bufFull[i], TSKC_BUF_SIZE);
5380
5381 /* Partial sort key */
5382 uiter_setString(&uiter, data, dataLen);
5383 ucol_nextSortKeyPart(ucol, &uiter, state, bufPart[i], TSKC_BUF_SIZE, &icuRC);
5384 if (U_FAILURE(icuRC))
5385 {
5386 log_err("ucol_nextSortKeyPart failed\n");
5387 ucol_close(ucol);
5388 return;
5389 }
5390
5391 for (i2=0; i2<i; i2++)
5392 {
5393 UBool fullMatch = TRUE;
5394 UBool partMatch = TRUE;
5395 for (j=0; j<TSKC_BUF_SIZE; j++)
5396 {
5397 fullMatch = fullMatch && (bufFull[i][j] != bufFull[i2][j]);
5398 partMatch = partMatch && (bufPart[i][j] != bufPart[i2][j]);
5399 }
5400 if (fullMatch != partMatch) {
5401 log_err(fullMatch ? "full key was consistent, but partial key changed\n"
5402 : "partial key was consistent, but full key changed\n");
5403 ucol_close(ucol);
5404 return;
5405 }
5406 }
5407 }
5408
5409 /*=============================================*/
5410 ucol_close(ucol);
5411 }
5412
5413 /* ticket: 6101 */
5414 static void TestCroatianSortKey(void) {
5415 const char* collString = "LHR_AN_CX_EX_FX_HX_NX_S3";
5416 UErrorCode status = U_ZERO_ERROR;
5417 UCollator *ucol;
5418 UCharIterator iter;
5419
5420 static const UChar text[] = { 0x0044, 0xD81A };
5421
5422 size_t length = sizeof(text)/sizeof(*text);
5423
5424 uint8_t textSortKey[32];
5425 size_t lenSortKey = 32;
5426 size_t actualSortKeyLen;
5427 uint32_t uStateInfo[2] = { 0, 0 };
5428
5429 ucol = ucol_openFromShortString(collString, FALSE, NULL, &status);
5430 if (U_FAILURE(status)) {
5431 log_err_status(status, "ucol_openFromShortString error in Craotian test. -> %s\n", u_errorName(status));
5432 return;
5433 }
5434
5435 uiter_setString(&iter, text, length);
5436
5437 actualSortKeyLen = ucol_nextSortKeyPart(
5438 ucol, &iter, (uint32_t*)uStateInfo,
5439 textSortKey, lenSortKey, &status
5440 );
5441
5442 if (actualSortKeyLen == lenSortKey) {
5443 log_err("ucol_nextSortKeyPart did not give correct result in Croatian test.\n");
5444 }
5445
5446 ucol_close(ucol);
5447 }
5448
5449 /* ticket: 6140 */
5450 /* This test ensures that codepoints such as 0x3099 are flagged correctly by the collator since
5451 * they are both Hiragana and Katakana
5452 */
5453 #define SORTKEYLEN 50
5454 static void TestHiragana(void) {
5455 UErrorCode status = U_ZERO_ERROR;
5456 UCollator* ucol;
5457 UCollationResult strcollresult;
5458 UChar data1[] = { 0x3058, 0x30B8 }; /* Hiragana and Katakana letter Zi */
5459 UChar data2[] = { 0x3057, 0x3099, 0x30B7, 0x3099 };
5460 int32_t data1Len = sizeof(data1)/sizeof(*data1);
5461 int32_t data2Len = sizeof(data2)/sizeof(*data2);
5462 int32_t i, j;
5463 uint8_t sortKey1[SORTKEYLEN];
5464 uint8_t sortKey2[SORTKEYLEN];
5465
5466 UCharIterator uiter1;
5467 UCharIterator uiter2;
5468 uint32_t state1[2] = { 0, 0 };
5469 uint32_t state2[2] = { 0, 0 };
5470 int32_t keySize1;
5471 int32_t keySize2;
5472
5473 ucol = ucol_openFromShortString("LJA_AN_CX_EX_FX_HO_NX_S4", FALSE, NULL,
5474 &status);
5475 if (U_FAILURE(status)) {
5476 log_err_status(status, "Error status: %s; Unable to open collator from short string.\n", u_errorName(status));
5477 return;
5478 }
5479
5480 /* Start of full sort keys */
5481 /* Full sort key1 */
5482 keySize1 = ucol_getSortKey(ucol, data1, data1Len, sortKey1, SORTKEYLEN);
5483 /* Full sort key2 */
5484 keySize2 = ucol_getSortKey(ucol, data2, data2Len, sortKey2, SORTKEYLEN);
5485 if (keySize1 == keySize2) {
5486 for (i = 0; i < keySize1; i++) {
5487 if (sortKey1[i] != sortKey2[i]) {
5488 log_err("Full sort keys are different. Should be equal.");
5489 }
5490 }
5491 } else {
5492 log_err("Full sort keys sizes doesn't match: %d %d", keySize1, keySize2);
5493 }
5494 /* End of full sort keys */
5495
5496 /* Start of partial sort keys */
5497 /* Partial sort key1 */
5498 uiter_setString(&uiter1, data1, data1Len);
5499 keySize1 = ucol_nextSortKeyPart(ucol, &uiter1, state1, sortKey1, SORTKEYLEN, &status);
5500 /* Partial sort key2 */
5501 uiter_setString(&uiter2, data2, data2Len);
5502 keySize2 = ucol_nextSortKeyPart(ucol, &uiter2, state2, sortKey2, SORTKEYLEN, &status);
5503 if (U_SUCCESS(status) && keySize1 == keySize2) {
5504 for (j = 0; j < keySize1; j++) {
5505 if (sortKey1[j] != sortKey2[j]) {
5506 log_err("Partial sort keys are different. Should be equal");
5507 }
5508 }
5509 } else {
5510 log_err("Error Status: %s or Partial sort keys sizes doesn't match: %d %d", u_errorName(status), keySize1, keySize2);
5511 }
5512 /* End of partial sort keys */
5513
5514 /* Start of strcoll */
5515 /* Use ucol_strcoll() to determine ordering */
5516 strcollresult = ucol_strcoll(ucol, data1, data1Len, data2, data2Len);
5517 if (strcollresult != UCOL_EQUAL) {
5518 log_err("Result from ucol_strcoll() should be UCOL_EQUAL.");
5519 }
5520
5521 ucol_close(ucol);
5522 }
5523
5524 /* Convenient struct for running collation tests */
5525 typedef struct {
5526 const UChar source[MAX_TOKEN_LEN]; /* String on left */
5527 const UChar target[MAX_TOKEN_LEN]; /* String on right */
5528 UCollationResult result; /* -1, 0 or +1, depending on collation */
5529 } OneTestCase;
5530
5531 /*
5532 * Utility function to test one collation test case.
5533 * @param testcases Array of test cases.
5534 * @param n_testcases Size of the array testcases.
5535 * @param str_rules Array of rules. These rules should be specifying the same rule in different formats.
5536 * @param n_rules Size of the array str_rules.
5537 */
5538 static void doTestOneTestCase(const OneTestCase testcases[],
5539 int n_testcases,
5540 const char* str_rules[],
5541 int n_rules)
5542 {
5543 int rule_no, testcase_no;
5544 UChar rule[500];
5545 int32_t length = 0;
5546 UErrorCode status = U_ZERO_ERROR;
5547 UParseError parse_error;
5548 UCollator *myCollation;
5549
5550 for (rule_no = 0; rule_no < n_rules; ++rule_no) {
5551
5552 length = u_unescape(str_rules[rule_no], rule, 500);
5553 if (length == 0) {
5554 log_err("ERROR: The rule cannot be unescaped: %s\n");
5555 return;
5556 }
5557 myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
5558 if(U_FAILURE(status)){
5559 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5560 return;
5561 }
5562 log_verbose("Testing the <<* syntax\n");
5563 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
5564 ucol_setStrength(myCollation, UCOL_TERTIARY);
5565 for (testcase_no = 0; testcase_no < n_testcases; ++testcase_no) {
5566 doTest(myCollation,
5567 testcases[testcase_no].source,
5568 testcases[testcase_no].target,
5569 testcases[testcase_no].result
5570 );
5571 }
5572 ucol_close(myCollation);
5573 }
5574 }
5575
5576 const static OneTestCase rangeTestcases[] = {
5577 { {0x0061}, {0x0062}, UCOL_LESS }, /* "a" < "b" */
5578 { {0x0062}, {0x0063}, UCOL_LESS }, /* "b" < "c" */
5579 { {0x0061}, {0x0063}, UCOL_LESS }, /* "a" < "c" */
5580
5581 { {0x0062}, {0x006b}, UCOL_LESS }, /* "b" << "k" */
5582 { {0x006b}, {0x006c}, UCOL_LESS }, /* "k" << "l" */
5583 { {0x0062}, {0x006c}, UCOL_LESS }, /* "b" << "l" */
5584 { {0x0061}, {0x006c}, UCOL_LESS }, /* "a" < "l" */
5585 { {0x0061}, {0x006d}, UCOL_LESS }, /* "a" < "m" */
5586
5587 { {0x0079}, {0x006d}, UCOL_LESS }, /* "y" < "f" */
5588 { {0x0079}, {0x0067}, UCOL_LESS }, /* "y" < "g" */
5589 { {0x0061}, {0x0068}, UCOL_LESS }, /* "y" < "h" */
5590 { {0x0061}, {0x0065}, UCOL_LESS }, /* "g" < "e" */
5591
5592 { {0x0061}, {0x0031}, UCOL_EQUAL }, /* "a" = "1" */
5593 { {0x0061}, {0x0032}, UCOL_EQUAL }, /* "a" = "2" */
5594 { {0x0061}, {0x0033}, UCOL_EQUAL }, /* "a" = "3" */
5595 { {0x0061}, {0x0066}, UCOL_LESS }, /* "a" < "f" */
5596 { {0x006c, 0x0061}, {0x006b, 0x0062}, UCOL_LESS }, /* "la" < "123" */
5597 { {0x0061, 0x0061, 0x0061}, {0x0031, 0x0032, 0x0033}, UCOL_EQUAL }, /* "aaa" = "123" */
5598 { {0x0062}, {0x007a}, UCOL_LESS }, /* "b" < "z" */
5599 { {0x0061, 0x007a, 0x0062}, {0x0032, 0x0079, 0x006d}, UCOL_LESS }, /* "azm" = "2yc" */
5600 };
5601
5602 static int nRangeTestcases = LEN(rangeTestcases);
5603
5604 const static OneTestCase rangeTestcasesSupplemental[] = {
5605 { {0xfffe}, {0xffff}, UCOL_LESS }, /* U+FFFE < U+FFFF */
5606 { {0xffff}, {0xd800, 0xdc00}, UCOL_LESS }, /* U+FFFF < U+10000 */
5607 { {0xd800, 0xdc00}, {0xd800, 0xdc01}, UCOL_LESS }, /* U+10000 < U+10001 */
5608 { {0xfffe}, {0xd800, 0xdc01}, UCOL_LESS }, /* U+FFFE < U+10001 */
5609 { {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL_LESS }, /* U+10000 < U+10001 */
5610 { {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL_LESS }, /* U+10000 < U+10001 */
5611 { {0xfffe}, {0xd800, 0xdc02}, UCOL_LESS }, /* U+FFFE < U+10001 */
5612 };
5613
5614 static int nRangeTestcasesSupplemental = LEN(rangeTestcasesSupplemental);
5615
5616 const static OneTestCase rangeTestcasesQwerty[] = {
5617 { {0x0071}, {0x0077}, UCOL_LESS }, /* "q" < "w" */
5618 { {0x0077}, {0x0065}, UCOL_LESS }, /* "w" < "e" */
5619
5620 { {0x0079}, {0x0075}, UCOL_LESS }, /* "y" < "u" */
5621 { {0x0071}, {0x0075}, UCOL_LESS }, /* "q" << "u" */
5622
5623 { {0x0074}, {0x0069}, UCOL_LESS }, /* "t" << "i" */
5624 { {0x006f}, {0x0070}, UCOL_LESS }, /* "o" << "p" */
5625
5626 { {0x0079}, {0x0065}, UCOL_LESS }, /* "y" < "e" */
5627 { {0x0069}, {0x0075}, UCOL_LESS }, /* "i" < "u" */
5628
5629 { {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},
5630 {0x0077, 0x0065, 0x0072, 0x0065}, UCOL_LESS }, /* "quest" < "were" */
5631 { {0x0071, 0x0075, 0x0061, 0x0063, 0x006b},
5632 {0x0071, 0x0075, 0x0065, 0x0073, 0x0074}, UCOL_LESS }, /* "quack" < "quest" */
5633 };
5634
5635 static int nRangeTestcasesQwerty = LEN(rangeTestcasesQwerty);
5636
5637 static void TestSameStrengthList(void)
5638 {
5639 const char* strRules[] = {
5640 /* Normal */
5641 "&a<b<c<d &b<<k<<l<<m &k<<<x<<<y<<<z &y<f<g<h<e &a=1=2=3",
5642
5643 /* Lists */
5644 "&a<*bcd &b<<*klm &k<<<*xyz &y<*fghe &a=*123",
5645 };
5646 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
5647 }
5648
5649 static void TestSameStrengthListQuoted(void)
5650 {
5651 const char* strRules[] = {
5652 /* Lists with quoted characters */
5653 "&\\u0061<*bcd &b<<*klm &k<<<*xyz &y<*f\\u0067\\u0068e &a=*123",
5654 "&'\\u0061'<*bcd &b<<*klm &k<<<*xyz &y<*f'\\u0067\\u0068'e &a=*123",
5655
5656 "&\\u0061<*b\\u0063d &b<<*klm &k<<<*xyz &\\u0079<*fgh\\u0065 &a=*\\u0031\\u0032\\u0033",
5657 "&'\\u0061'<*b'\\u0063'd &b<<*klm &k<<<*xyz &'\\u0079'<*fgh'\\u0065' &a=*'\\u0031\\u0032\\u0033'",
5658
5659 "&\\u0061<*\\u0062c\\u0064 &b<<*klm &k<<<*xyz &y<*fghe &a=*\\u0031\\u0032\\u0033",
5660 "&'\\u0061'<*'\\u0062'c'\\u0064' &b<<*klm &k<<<*xyz &y<*fghe &a=*'\\u0031\\u0032\\u0033'",
5661 };
5662 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
5663 }
5664
5665 static void TestSameStrengthListSupplemental(void)
5666 {
5667 const char* strRules[] = {
5668 "&\\ufffe<\\uffff<\\U00010000<\\U00010001<\\U00010002",
5669 "&\\ufffe<\\uffff<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02",
5670 "&\\ufffe<*\\uffff\\U00010000\\U00010001\\U00010002",
5671 "&\\ufffe<*\\uffff\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02",
5672 };
5673 doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules));
5674 }
5675
5676 static void TestSameStrengthListQwerty(void)
5677 {
5678 const char* strRules[] = {
5679 "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d", /* Normal */
5680 "&q<*wer &w<<*tyu &t<<<*iop &o=*asd", /* Lists */
5681 "&\\u0071<\\u0077<\\u0065<\\u0072 &\\u0077<<\\u0074<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<\\u006f<<<\\u0070 &\\u006f=\\u0061=\\u0073=\\u0064",
5682 "&'\\u0071'<\\u0077<\\u0065<\\u0072 &\\u0077<<'\\u0074'<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<'\\u006f'<<<\\u0070 &\\u006f=\\u0061='\\u0073'=\\u0064",
5683 "&\\u0071<*\\u0077\\u0065\\u0072 &\\u0077<<*\\u0074\\u0079\\u0075 &\\u0074<<<*\\u0069\\u006f\\u0070 &\\u006f=*\\u0061\\u0073\\u0064",
5684
5685 /* Quoted characters also will work if two quoted characters are not consecutive. */
5686 "&\\u0071<*'\\u0077'\\u0065\\u0072 &\\u0077<<*\\u0074'\\u0079'\\u0075 &\\u0074<<<*\\u0069\\u006f'\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",
5687
5688 /* Consecutive quoted charactes do not work, because a '' will be treated as a quote character. */
5689 /* "&\\u0071<*'\\u0077''\\u0065''\\u0072' &\\u0077<<*'\\u0074''\\u0079''\\u0075' &\\u0074<<<*'\\u0069''\\u006f''\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",*/
5690
5691 };
5692 doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(strRules));
5693 }
5694
5695 static void TestSameStrengthListQuotedQwerty(void)
5696 {
5697 const char* strRules[] = {
5698 "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d", /* Normal */
5699 "&q<*wer &w<<*tyu &t<<<*iop &o=*asd", /* Lists */
5700 "&q<*w'e'r &w<<*'t'yu &t<<<*io'p' &o=*'a's'd'", /* Lists with quotes */
5701
5702 /* Lists with continuous quotes may not work, because '' will be treated as a quote character. */
5703 /* "&q<*'w''e''r' &w<<*'t''y''u' &t<<<*'i''o''p' &o=*'a''s''d'", */
5704 };
5705 doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(strRules));
5706 }
5707
5708 static void TestSameStrengthListRanges(void)
5709 {
5710 const char* strRules[] = {
5711 "&a<*b-d &b<<*k-m &k<<<*x-z &y<*f-he &a=*1-3",
5712 };
5713 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
5714 }
5715
5716 static void TestSameStrengthListSupplementalRanges(void)
5717 {
5718 const char* strRules[] = {
5719 "&\\ufffe<*\\uffff-\\U00010002",
5720 };
5721 doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules));
5722 }
5723
5724 static void TestSpecialCharacters(void)
5725 {
5726 const char* strRules[] = {
5727 /* Normal */
5728 "&';'<'+'<','<'-'<'&'<'*'",
5729
5730 /* List */
5731 "&';'<*'+,-&*'",
5732
5733 /* Range */
5734 "&';'<*'+'-'-&*'",
5735 };
5736
5737 const static OneTestCase specialCharacterStrings[] = {
5738 { {0x003b}, {0x002b}, UCOL_LESS }, /* ; < + */
5739 { {0x002b}, {0x002c}, UCOL_LESS }, /* + < , */
5740 { {0x002c}, {0x002d}, UCOL_LESS }, /* , < - */
5741 { {0x002d}, {0x0026}, UCOL_LESS }, /* - < & */
5742 };
5743 doTestOneTestCase(specialCharacterStrings, LEN(specialCharacterStrings), strRules, LEN(strRules));
5744 }
5745
5746 static void TestPrivateUseCharacters(void)
5747 {
5748 const char* strRules[] = {
5749 /* Normal */
5750 "&'\\u5ea7'<'\\uE2D8'<'\\uE2D9'<'\\uE2DA'<'\\uE2DB'<'\\uE2DC'<'\\u4e8d'",
5751 "&\\u5ea7<\\uE2D8<\\uE2D9<\\uE2DA<\\uE2DB<\\uE2DC<\\u4e8d",
5752 };
5753
5754 const static OneTestCase privateUseCharacterStrings[] = {
5755 { {0x5ea7}, {0xe2d8}, UCOL_LESS },
5756 { {0xe2d8}, {0xe2d9}, UCOL_LESS },
5757 { {0xe2d9}, {0xe2da}, UCOL_LESS },
5758 { {0xe2da}, {0xe2db}, UCOL_LESS },
5759 { {0xe2db}, {0xe2dc}, UCOL_LESS },
5760 { {0xe2dc}, {0x4e8d}, UCOL_LESS },
5761 };
5762 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
5763 }
5764
5765 static void TestPrivateUseCharactersInList(void)
5766 {
5767 const char* strRules[] = {
5768 /* List */
5769 "&'\\u5ea7'<*'\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d'",
5770 /* "&'\\u5ea7'<*\\uE2D8'\\uE2D9\\uE2DA'\\uE2DB'\\uE2DC\\u4e8d'", */
5771 "&\\u5ea7<*\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d",
5772 };
5773
5774 const static OneTestCase privateUseCharacterStrings[] = {
5775 { {0x5ea7}, {0xe2d8}, UCOL_LESS },
5776 { {0xe2d8}, {0xe2d9}, UCOL_LESS },
5777 { {0xe2d9}, {0xe2da}, UCOL_LESS },
5778 { {0xe2da}, {0xe2db}, UCOL_LESS },
5779 { {0xe2db}, {0xe2dc}, UCOL_LESS },
5780 { {0xe2dc}, {0x4e8d}, UCOL_LESS },
5781 };
5782 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
5783 }
5784
5785 static void TestPrivateUseCharactersInRange(void)
5786 {
5787 const char* strRules[] = {
5788 /* Range */
5789 "&'\\u5ea7'<*'\\uE2D8'-'\\uE2DC\\u4e8d'",
5790 "&\\u5ea7<*\\uE2D8-\\uE2DC\\u4e8d",
5791 /* "&\\u5ea7<\\uE2D8'\\uE2D8'-'\\uE2D9'\\uE2DA-\\uE2DB\\uE2DC\\u4e8d", */
5792 };
5793
5794 const static OneTestCase privateUseCharacterStrings[] = {
5795 { {0x5ea7}, {0xe2d8}, UCOL_LESS },
5796 { {0xe2d8}, {0xe2d9}, UCOL_LESS },
5797 { {0xe2d9}, {0xe2da}, UCOL_LESS },
5798 { {0xe2da}, {0xe2db}, UCOL_LESS },
5799 { {0xe2db}, {0xe2dc}, UCOL_LESS },
5800 { {0xe2dc}, {0x4e8d}, UCOL_LESS },
5801 };
5802 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
5803 }
5804
5805 static void TestInvalidListsAndRanges(void)
5806 {
5807 const char* invalidRules[] = {
5808 /* Range not in starred expression */
5809 "&\\ufffe<\\uffff-\\U00010002",
5810
5811 /* Range without start */
5812 "&a<*-c",
5813
5814 /* Range without end */
5815 "&a<*b-",
5816
5817 /* More than one hyphen */
5818 "&a<*b-g-l",
5819
5820 /* Range in the wrong order */
5821 "&a<*k-b",
5822
5823 };
5824
5825 UChar rule[500];
5826 UErrorCode status = U_ZERO_ERROR;
5827 UParseError parse_error;
5828 int n_rules = LEN(invalidRules);
5829 int rule_no;
5830 int length;
5831 UCollator *myCollation;
5832
5833 for (rule_no = 0; rule_no < n_rules; ++rule_no) {
5834
5835 length = u_unescape(invalidRules[rule_no], rule, 500);
5836 if (length == 0) {
5837 log_err("ERROR: The rule cannot be unescaped: %s\n");
5838 return;
5839 }
5840 myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
5841 if(!U_FAILURE(status)){
5842 log_err("ERROR: Could not cause a failure as expected: \n");
5843 }
5844 status = U_ZERO_ERROR;
5845 }
5846 }
5847
5848 /*
5849 * This test ensures that characters placed before a character in a different script have the same lead byte
5850 * in their collation key before and after script reordering.
5851 */
5852 static void TestBeforeRuleWithScriptReordering(void)
5853 {
5854 UParseError error;
5855 UErrorCode status = U_ZERO_ERROR;
5856 UCollator *myCollation;
5857 char srules[500] = "&[before 1]\\u03b1 < \\u0e01";
5858 UChar rules[500];
5859 uint32_t rulesLength = 0;
5860 int32_t reorderCodes[1] = {USCRIPT_GREEK};
5861 UCollationResult collResult;
5862
5863 uint8_t baseKey[256];
5864 uint32_t baseKeyLength;
5865 uint8_t beforeKey[256];
5866 uint32_t beforeKeyLength;
5867
5868 UChar base[] = { 0x03b1 }; /* base */
5869 int32_t baseLen = sizeof(base)/sizeof(*base);
5870
5871 UChar before[] = { 0x0e01 }; /* ko kai */
5872 int32_t beforeLen = sizeof(before)/sizeof(*before);
5873
5874 /*UChar *data[] = { before, base };
5875 genericRulesStarter(srules, data, 2);*/
5876
5877 log_verbose("Testing the &[before 1] rule with [reorder grek]\n");
5878
5879
5880 /* build collator */
5881 log_verbose("Testing the &[before 1] rule with [scriptReorder grek]\n");
5882
5883 rulesLength = u_unescape(srules, rules, LEN(rules));
5884 myCollation = ucol_openRules(rules, rulesLength, UCOL_ON, UCOL_TERTIARY, &error, &status);
5885 if(U_FAILURE(status)) {
5886 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5887 return;
5888 }
5889
5890 /* check collation results - before rule applied but not script reordering */
5891 collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
5892 if (collResult != UCOL_GREATER) {
5893 log_err("Collation result not correct before script reordering = %d\n", collResult);
5894 }
5895
5896 /* check the lead byte of the collation keys before script reordering */
5897 baseKeyLength = ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
5898 beforeKeyLength = ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
5899 if (baseKey[0] != beforeKey[0]) {
5900 log_err("Different lead byte for sort keys using before rule and before script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
5901 }
5902
5903 /* reorder the scripts */
5904 ucol_setReorderCodes(myCollation, reorderCodes, 1, &status);
5905 if(U_FAILURE(status)) {
5906 log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
5907 return;
5908 }
5909
5910 /* check collation results - before rule applied and after script reordering */
5911 collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
5912 if (collResult != UCOL_GREATER) {
5913 log_err("Collation result not correct after script reordering = %d\n", collResult);
5914 }
5915
5916 /* check the lead byte of the collation keys after script reordering */
5917 ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
5918 ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
5919 if (baseKey[0] != beforeKey[0]) {
5920 log_err("Different lead byte for sort keys using before fule and after script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
5921 }
5922
5923 ucol_close(myCollation);
5924 }
5925
5926 /*
5927 * Test that in a primary-compressed sort key all bytes except the first one are unchanged under script reordering.
5928 */
5929 static void TestNonLeadBytesDuringCollationReordering(void)
5930 {
5931 UErrorCode status = U_ZERO_ERROR;
5932 UCollator *myCollation;
5933 int32_t reorderCodes[1] = {USCRIPT_GREEK};
5934
5935 uint8_t baseKey[256];
5936 uint32_t baseKeyLength;
5937 uint8_t reorderKey[256];
5938 uint32_t reorderKeyLength;
5939
5940 UChar testString[] = { 0x03b1, 0x03b2, 0x03b3 };
5941
5942 uint32_t i;
5943
5944
5945 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
5946
5947 /* build collator tertiary */
5948 myCollation = ucol_open("", &status);
5949 ucol_setStrength(myCollation, UCOL_TERTIARY);
5950 if(U_FAILURE(status)) {
5951 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5952 return;
5953 }
5954 baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), baseKey, 256);
5955
5956 ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
5957 if(U_FAILURE(status)) {
5958 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
5959 return;
5960 }
5961 reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256);
5962
5963 if (baseKeyLength != reorderKeyLength) {
5964 log_err("Key lengths not the same during reordering.\n");
5965 return;
5966 }
5967
5968 for (i = 1; i < baseKeyLength; i++) {
5969 if (baseKey[i] != reorderKey[i]) {
5970 log_err("Collation key bytes not the same at position %d.\n", i);
5971 return;
5972 }
5973 }
5974 ucol_close(myCollation);
5975
5976 /* build collator quaternary */
5977 myCollation = ucol_open("", &status);
5978 ucol_setStrength(myCollation, UCOL_QUATERNARY);
5979 if(U_FAILURE(status)) {
5980 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5981 return;
5982 }
5983 baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), baseKey, 256);
5984
5985 ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
5986 if(U_FAILURE(status)) {
5987 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
5988 return;
5989 }
5990 reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256);
5991
5992 if (baseKeyLength != reorderKeyLength) {
5993 log_err("Key lengths not the same during reordering.\n");
5994 return;
5995 }
5996
5997 for (i = 1; i < baseKeyLength; i++) {
5998 if (baseKey[i] != reorderKey[i]) {
5999 log_err("Collation key bytes not the same at position %d.\n", i);
6000 return;
6001 }
6002 }
6003 ucol_close(myCollation);
6004 }
6005
6006 /*
6007 * Test reordering API.
6008 */
6009 static void TestReorderingAPI(void)
6010 {
6011 UErrorCode status = U_ZERO_ERROR;
6012 UCollator *myCollation;
6013 int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
6014 int32_t duplicateReorderCodes[] = {USCRIPT_CUNEIFORM, USCRIPT_GREEK, UCOL_REORDER_CODE_CURRENCY, USCRIPT_EGYPTIAN_HIEROGLYPHS};
6015 int32_t reorderCodesStartingWithDefault[] = {UCOL_REORDER_CODE_DEFAULT, USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
6016 UCollationResult collResult;
6017 int32_t retrievedReorderCodesLength;
6018 int32_t retrievedReorderCodes[10];
6019 UChar greekString[] = { 0x03b1 };
6020 UChar punctuationString[] = { 0x203e };
6021 int loopIndex;
6022
6023 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
6024
6025 /* build collator tertiary */
6026 myCollation = ucol_open("", &status);
6027 ucol_setStrength(myCollation, UCOL_TERTIARY);
6028 if(U_FAILURE(status)) {
6029 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
6030 return;
6031 }
6032
6033 /* set the reorderding */
6034 ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
6035 if (U_FAILURE(status)) {
6036 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
6037 return;
6038 }
6039
6040 /* get the reordering */
6041 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
6042 if (status != U_BUFFER_OVERFLOW_ERROR) {
6043 log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
6044 return;
6045 }
6046 status = U_ZERO_ERROR;
6047 if (retrievedReorderCodesLength != LEN(reorderCodes)) {
6048 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
6049 return;
6050 }
6051 /* now let's really get it */
6052 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
6053 if (U_FAILURE(status)) {
6054 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
6055 return;
6056 }
6057 if (retrievedReorderCodesLength != LEN(reorderCodes)) {
6058 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
6059 return;
6060 }
6061 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
6062 if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
6063 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
6064 return;
6065 }
6066 }
6067 collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
6068 if (collResult != UCOL_LESS) {
6069 log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
6070 return;
6071 }
6072
6073 /* clear the reordering */
6074 ucol_setReorderCodes(myCollation, NULL, 0, &status);
6075 if (U_FAILURE(status)) {
6076 log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
6077 return;
6078 }
6079
6080 /* get the reordering again */
6081 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
6082 if (retrievedReorderCodesLength != 0) {
6083 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
6084 return;
6085 }
6086
6087 collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
6088 if (collResult != UCOL_GREATER) {
6089 log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
6090 return;
6091 }
6092
6093 /* test for error condition on duplicate reorder codes */
6094 ucol_setReorderCodes(myCollation, duplicateReorderCodes, LEN(duplicateReorderCodes), &status);
6095 if (!U_FAILURE(status)) {
6096 log_err_status(status, "ERROR: setting duplicate reorder codes did not generate a failure\n");
6097 return;
6098 }
6099
6100 status = U_ZERO_ERROR;
6101 /* test for reorder codes after a reset code */
6102 ucol_setReorderCodes(myCollation, reorderCodesStartingWithDefault, LEN(reorderCodesStartingWithDefault), &status);
6103 if (!U_FAILURE(status)) {
6104 log_err_status(status, "ERROR: reorderd code sequence starting with default and having following codes didn't cause an error\n");
6105 return;
6106 }
6107
6108 ucol_close(myCollation);
6109 }
6110
6111 /*
6112 * Test reordering API.
6113 */
6114 static void TestReorderingAPIWithRuleCreatedCollator(void)
6115 {
6116 UErrorCode status = U_ZERO_ERROR;
6117 UCollator *myCollation;
6118 UChar rules[90];
6119 int32_t rulesReorderCodes[2] = {USCRIPT_HAN, USCRIPT_GREEK};
6120 int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
6121 UCollationResult collResult;
6122 int32_t retrievedReorderCodesLength;
6123 int32_t retrievedReorderCodes[10];
6124 UChar greekString[] = { 0x03b1 };
6125 UChar punctuationString[] = { 0x203e };
6126 UChar hanString[] = { 0x65E5, 0x672C };
6127 int loopIndex;
6128
6129 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
6130
6131 /* build collator from rules */
6132 u_uastrcpy(rules, "[reorder Hani Grek]");
6133 myCollation = ucol_openRules(rules, u_strlen(rules), UCOL_DEFAULT, UCOL_TERTIARY, NULL, &status);
6134 if(U_FAILURE(status)) {
6135 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
6136 return;
6137 }
6138
6139 /* get the reordering */
6140 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
6141 if (U_FAILURE(status)) {
6142 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
6143 return;
6144 }
6145 if (retrievedReorderCodesLength != LEN(rulesReorderCodes)) {
6146 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(rulesReorderCodes));
6147 return;
6148 }
6149 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
6150 if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) {
6151 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
6152 return;
6153 }
6154 }
6155 collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), hanString, LEN(hanString));
6156 if (collResult != UCOL_GREATER) {
6157 log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
6158 return;
6159 }
6160
6161
6162 /* set the reorderding */
6163 ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
6164 if (U_FAILURE(status)) {
6165 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
6166 return;
6167 }
6168
6169 /* get the reordering */
6170 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
6171 if (status != U_BUFFER_OVERFLOW_ERROR) {
6172 log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
6173 return;
6174 }
6175 status = U_ZERO_ERROR;
6176 if (retrievedReorderCodesLength != LEN(reorderCodes)) {
6177 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
6178 return;
6179 }
6180 /* now let's really get it */
6181 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
6182 if (U_FAILURE(status)) {
6183 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
6184 return;
6185 }
6186 if (retrievedReorderCodesLength != LEN(reorderCodes)) {
6187 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
6188 return;
6189 }
6190 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
6191 if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
6192 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
6193 return;
6194 }
6195 }
6196 collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
6197 if (collResult != UCOL_LESS) {
6198 log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
6199 return;
6200 }
6201
6202 /* clear the reordering */
6203 ucol_setReorderCodes(myCollation, NULL, 0, &status);
6204 if (U_FAILURE(status)) {
6205 log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
6206 return;
6207 }
6208
6209 /* get the reordering again */
6210 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
6211 if (retrievedReorderCodesLength != 0) {
6212 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
6213 return;
6214 }
6215
6216 collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
6217 if (collResult != UCOL_GREATER) {
6218 log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
6219 return;
6220 }
6221
6222 ucol_close(myCollation);
6223 }
6224
6225 static int compareUScriptCodes(const void * a, const void * b)
6226 {
6227 return ( *(int32_t*)a - *(int32_t*)b );
6228 }
6229
6230 static void TestEquivalentReorderingScripts(void) {
6231 UErrorCode status = U_ZERO_ERROR;
6232 int32_t equivalentScripts[50];
6233 int32_t equivalentScriptsLength;
6234 int loopIndex;
6235 int32_t equivalentScriptsResult[] = {
6236 USCRIPT_BOPOMOFO,
6237 USCRIPT_LISU,
6238 USCRIPT_LYCIAN,
6239 USCRIPT_CARIAN,
6240 USCRIPT_LYDIAN,
6241 USCRIPT_YI,
6242 USCRIPT_OLD_ITALIC,
6243 USCRIPT_GOTHIC,
6244 USCRIPT_DESERET,
6245 USCRIPT_SHAVIAN,
6246 USCRIPT_OSMANYA,
6247 USCRIPT_LINEAR_B,
6248 USCRIPT_CYPRIOT,
6249 USCRIPT_OLD_SOUTH_ARABIAN,
6250 USCRIPT_AVESTAN,
6251 USCRIPT_IMPERIAL_ARAMAIC,
6252 USCRIPT_INSCRIPTIONAL_PARTHIAN,
6253 USCRIPT_INSCRIPTIONAL_PAHLAVI,
6254 USCRIPT_UGARITIC,
6255 USCRIPT_OLD_PERSIAN,
6256 USCRIPT_CUNEIFORM,
6257 USCRIPT_EGYPTIAN_HIEROGLYPHS,
6258 USCRIPT_PHONETIC_POLLARD,
6259 USCRIPT_SORA_SOMPENG,
6260 USCRIPT_MEROITIC_CURSIVE,
6261 USCRIPT_MEROITIC_HIEROGLYPHS
6262 };
6263
6264 qsort(equivalentScriptsResult, LEN(equivalentScriptsResult), sizeof(int32_t), compareUScriptCodes);
6265
6266 /* UScript.GOTHIC */
6267 equivalentScriptsLength = ucol_getEquivalentReorderCodes(USCRIPT_GOTHIC, equivalentScripts, LEN(equivalentScripts), &status);
6268 if (U_FAILURE(status)) {
6269 log_err_status(status, "ERROR: retrieving equivalent reorder codes: %s\n", myErrorName(status));
6270 return;
6271 }
6272 /*
6273 fprintf(stdout, "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n");
6274 fprintf(stdout, "equivalentScriptsLength = %d\n", equivalentScriptsLength);
6275 for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) {
6276 fprintf(stdout, "%d = %x\n", loopIndex, equivalentScripts[loopIndex]);
6277 }
6278 */
6279 if (equivalentScriptsLength != LEN(equivalentScriptsResult)) {
6280 log_err_status(status, "ERROR: retrieved equivalent script length wrong: expected = %d, was = %d\n", LEN(equivalentScriptsResult), equivalentScriptsLength);
6281 return;
6282 }
6283 for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) {
6284 if (equivalentScriptsResult[loopIndex] != equivalentScripts[loopIndex]) {
6285 log_err_status(status, "ERROR: equivalent scripts results don't match: expected = %d, was = %d\n", equivalentScriptsResult[loopIndex], equivalentScripts[loopIndex]);
6286 return;
6287 }
6288 }
6289
6290 /* UScript.SHAVIAN */
6291 equivalentScriptsLength = ucol_getEquivalentReorderCodes(USCRIPT_SHAVIAN, equivalentScripts, LEN(equivalentScripts), &status);
6292 if (U_FAILURE(status)) {
6293 log_err_status(status, "ERROR: retrieving equivalent reorder codes: %s\n", myErrorName(status));
6294 return;
6295 }
6296 if (equivalentScriptsLength != LEN(equivalentScriptsResult)) {
6297 log_err_status(status, "ERROR: retrieved equivalent script length wrong: expected = %d, was = %d\n", LEN(equivalentScriptsResult), equivalentScriptsLength);
6298 return;
6299 }
6300 for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) {
6301 if (equivalentScriptsResult[loopIndex] != equivalentScripts[loopIndex]) {
6302 log_err_status(status, "ERROR: equivalent scripts results don't match: expected = %d, was = %d\n", equivalentScriptsResult[loopIndex], equivalentScripts[loopIndex]);
6303 return;
6304 }
6305 }
6306 }
6307
6308 static void TestReorderingAcrossCloning(void)
6309 {
6310 UErrorCode status = U_ZERO_ERROR;
6311 UCollator *myCollation;
6312 int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
6313 UCollator *clonedCollation;
6314 int32_t bufferSize;
6315 int32_t retrievedReorderCodesLength;
6316 int32_t retrievedReorderCodes[10];
6317 int loopIndex;
6318
6319 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
6320
6321 /* build collator tertiary */
6322 myCollation = ucol_open("", &status);
6323 ucol_setStrength(myCollation, UCOL_TERTIARY);
6324 if(U_FAILURE(status)) {
6325 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
6326 return;
6327 }
6328
6329 /* set the reorderding */
6330 ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
6331 if (U_FAILURE(status)) {
6332 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
6333 return;
6334 }
6335
6336 /* clone the collator */
6337 clonedCollation = ucol_safeClone(myCollation, NULL, &bufferSize, &status);
6338 if (U_FAILURE(status)) {
6339 log_err_status(status, "ERROR: cloning collator: %s\n", myErrorName(status));
6340 return;
6341 }
6342
6343 /* get the reordering */
6344 retrievedReorderCodesLength = ucol_getReorderCodes(clonedCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
6345 if (U_FAILURE(status)) {
6346 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
6347 return;
6348 }
6349 if (retrievedReorderCodesLength != LEN(reorderCodes)) {
6350 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
6351 return;
6352 }
6353 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
6354 if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
6355 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
6356 return;
6357 }
6358 }
6359
6360 /*uprv_free(buffer);*/
6361 ucol_close(myCollation);
6362 ucol_close(clonedCollation);
6363 }
6364
6365 /*
6366 * Utility function to test one collation reordering test case set.
6367 * @param testcases Array of test cases.
6368 * @param n_testcases Size of the array testcases.
6369 * @param reorderTokens Array of reordering codes.
6370 * @param reorderTokensLen Size of the array reorderTokens.
6371 */
6372 static void doTestOneReorderingAPITestCase(const OneTestCase testCases[], uint32_t testCasesLen, const int32_t reorderTokens[], int32_t reorderTokensLen)
6373 {
6374 uint32_t testCaseNum;
6375 UErrorCode status = U_ZERO_ERROR;
6376 UCollator *myCollation;
6377
6378 myCollation = ucol_open("", &status);
6379 if (U_FAILURE(status)) {
6380 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
6381 return;
6382 }
6383 ucol_setReorderCodes(myCollation, reorderTokens, reorderTokensLen, &status);
6384 if(U_FAILURE(status)) {
6385 log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
6386 return;
6387 }
6388
6389 for (testCaseNum = 0; testCaseNum < testCasesLen; ++testCaseNum) {
6390 doTest(myCollation,
6391 testCases[testCaseNum].source,
6392 testCases[testCaseNum].target,
6393 testCases[testCaseNum].result
6394 );
6395 }
6396 ucol_close(myCollation);
6397 }
6398
6399 static void TestGreekFirstReorder(void)
6400 {
6401 const char* strRules[] = {
6402 "[reorder Grek]"
6403 };
6404
6405 const int32_t apiRules[] = {
6406 USCRIPT_GREEK
6407 };
6408
6409 const static OneTestCase privateUseCharacterStrings[] = {
6410 { {0x0391}, {0x0391}, UCOL_EQUAL },
6411 { {0x0041}, {0x0391}, UCOL_GREATER },
6412 { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_GREATER },
6413 { {0x0060}, {0x0391}, UCOL_LESS },
6414 { {0x0391}, {0xe2dc}, UCOL_LESS },
6415 { {0x0391}, {0x0060}, UCOL_GREATER },
6416 };
6417
6418 /* Test rules creation */
6419 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
6420
6421 /* Test collation reordering API */
6422 doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
6423 }
6424
6425 static void TestGreekLastReorder(void)
6426 {
6427 const char* strRules[] = {
6428 "[reorder Zzzz Grek]"
6429 };
6430
6431 const int32_t apiRules[] = {
6432 USCRIPT_UNKNOWN, USCRIPT_GREEK
6433 };
6434
6435 const static OneTestCase privateUseCharacterStrings[] = {
6436 { {0x0391}, {0x0391}, UCOL_EQUAL },
6437 { {0x0041}, {0x0391}, UCOL_LESS },
6438 { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_LESS },
6439 { {0x0060}, {0x0391}, UCOL_LESS },
6440 { {0x0391}, {0xe2dc}, UCOL_GREATER },
6441 };
6442
6443 /* Test rules creation */
6444 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
6445
6446 /* Test collation reordering API */
6447 doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
6448 }
6449
6450 static void TestNonScriptReorder(void)
6451 {
6452 const char* strRules[] = {
6453 "[reorder Grek Symbol DIGIT Latn Punct space Zzzz cURRENCy]"
6454 };
6455
6456 const int32_t apiRules[] = {
6457 USCRIPT_GREEK, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN,
6458 UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SPACE, USCRIPT_UNKNOWN,
6459 UCOL_REORDER_CODE_CURRENCY
6460 };
6461
6462 const static OneTestCase privateUseCharacterStrings[] = {
6463 { {0x0391}, {0x0041}, UCOL_LESS },
6464 { {0x0041}, {0x0391}, UCOL_GREATER },
6465 { {0x0060}, {0x0041}, UCOL_LESS },
6466 { {0x0060}, {0x0391}, UCOL_GREATER },
6467 { {0x0024}, {0x0041}, UCOL_GREATER },
6468 };
6469
6470 /* Test rules creation */
6471 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
6472
6473 /* Test collation reordering API */
6474 doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
6475 }
6476
6477 static void TestHaniReorder(void)
6478 {
6479 const char* strRules[] = {
6480 "[reorder Hani]"
6481 };
6482 const int32_t apiRules[] = {
6483 USCRIPT_HAN
6484 };
6485
6486 const static OneTestCase privateUseCharacterStrings[] = {
6487 { {0x4e00}, {0x0041}, UCOL_LESS },
6488 { {0x4e00}, {0x0060}, UCOL_GREATER },
6489 { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
6490 { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
6491 { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
6492 { {0xfa27}, {0x0041}, UCOL_LESS },
6493 { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
6494 };
6495
6496 /* Test rules creation */
6497 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
6498
6499 /* Test collation reordering API */
6500 doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
6501 }
6502
6503 static void TestHaniReorderWithOtherRules(void)
6504 {
6505 const char* strRules[] = {
6506 "[reorder Hani] &b<a"
6507 };
6508 /*const int32_t apiRules[] = {
6509 USCRIPT_HAN
6510 };*/
6511
6512 const static OneTestCase privateUseCharacterStrings[] = {
6513 { {0x4e00}, {0x0041}, UCOL_LESS },
6514 { {0x4e00}, {0x0060}, UCOL_GREATER },
6515 { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
6516 { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
6517 { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
6518 { {0xfa27}, {0x0041}, UCOL_LESS },
6519 { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
6520 { {0x0062}, {0x0061}, UCOL_LESS },
6521 };
6522
6523 /* Test rules creation */
6524 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
6525 }
6526
6527 static void TestMultipleReorder(void)
6528 {
6529 const char* strRules[] = {
6530 "[reorder Grek Zzzz DIGIT Latn Hani]"
6531 };
6532
6533 const int32_t apiRules[] = {
6534 USCRIPT_GREEK, USCRIPT_UNKNOWN, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN, USCRIPT_HAN
6535 };
6536
6537 const static OneTestCase collationTestCases[] = {
6538 { {0x0391}, {0x0041}, UCOL_LESS},
6539 { {0x0031}, {0x0041}, UCOL_LESS},
6540 { {0x0041}, {0x4e00}, UCOL_LESS},
6541 };
6542
6543 /* Test rules creation */
6544 doTestOneTestCase(collationTestCases, LEN(collationTestCases), strRules, LEN(strRules));
6545
6546 /* Test collation reordering API */
6547 doTestOneReorderingAPITestCase(collationTestCases, LEN(collationTestCases), apiRules, LEN(apiRules));
6548 }
6549
6550 /*
6551 * Test that covers issue reported in ticket 8814
6552 */
6553 static void TestReorderWithNumericCollation(void)
6554 {
6555 UErrorCode status = U_ZERO_ERROR;
6556 UCollator *myCollation;
6557 UCollator *myReorderCollation;
6558 int32_t reorderCodes[] = {UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_GREEK,USCRIPT_LATIN, USCRIPT_HEBREW, UCOL_REORDER_CODE_OTHERS};
6559 /* UChar fortyS[] = { 0x0034, 0x0030, 0x0053 };
6560 UChar fortyThreeP[] = { 0x0034, 0x0033, 0x0050 }; */
6561 UChar fortyS[] = { 0x0053 };
6562 UChar fortyThreeP[] = { 0x0050 };
6563 uint8_t fortyS_sortKey[128];
6564 int32_t fortyS_sortKey_Length;
6565 uint8_t fortyThreeP_sortKey[128];
6566 int32_t fortyThreeP_sortKey_Length;
6567 uint8_t fortyS_sortKey_reorder[128];
6568 int32_t fortyS_sortKey_reorder_Length;
6569 uint8_t fortyThreeP_sortKey_reorder[128];
6570 int32_t fortyThreeP_sortKey_reorder_Length;
6571 UCollationResult collResult;
6572 UCollationResult collResultReorder;
6573
6574 log_verbose("Testing reordering with and without numeric collation\n");
6575
6576 /* build collator tertiary with numeric */
6577 myCollation = ucol_open("", &status);
6578 /*
6579 ucol_setStrength(myCollation, UCOL_TERTIARY);
6580 */
6581 ucol_setAttribute(myCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
6582 if(U_FAILURE(status)) {
6583 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
6584 return;
6585 }
6586
6587 /* build collator tertiary with numeric and reordering */
6588 myReorderCollation = ucol_open("", &status);
6589 /*
6590 ucol_setStrength(myReorderCollation, UCOL_TERTIARY);
6591 */
6592 ucol_setAttribute(myReorderCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
6593 ucol_setReorderCodes(myReorderCollation, reorderCodes, LEN(reorderCodes), &status);
6594 if(U_FAILURE(status)) {
6595 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
6596 return;
6597 }
6598
6599 fortyS_sortKey_Length = ucol_getSortKey(myCollation, fortyS, LEN(fortyS), fortyS_sortKey, 128);
6600 fortyThreeP_sortKey_Length = ucol_getSortKey(myCollation, fortyThreeP, LEN(fortyThreeP), fortyThreeP_sortKey, 128);
6601 fortyS_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyS, LEN(fortyS), fortyS_sortKey_reorder, 128);
6602 fortyThreeP_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyThreeP, LEN(fortyThreeP), fortyThreeP_sortKey_reorder, 128);
6603
6604 if (fortyS_sortKey_Length < 0 || fortyThreeP_sortKey_Length < 0 || fortyS_sortKey_reorder_Length < 0 || fortyThreeP_sortKey_reorder_Length < 0) {
6605 log_err_status(status, "ERROR: couldn't generate sort keys\n");
6606 return;
6607 }
6608 collResult = ucol_strcoll(myCollation, fortyS, LEN(fortyS), fortyThreeP, LEN(fortyThreeP));
6609 collResultReorder = ucol_strcoll(myReorderCollation, fortyS, LEN(fortyS), fortyThreeP, LEN(fortyThreeP));
6610 /*
6611 fprintf(stderr, "\tcollResult = %x\n", collResult);
6612 fprintf(stderr, "\tcollResultReorder = %x\n", collResultReorder);
6613 fprintf(stderr, "\nfortyS\n");
6614 for (i = 0; i < fortyS_sortKey_Length; i++) {
6615 fprintf(stderr, "%x --- %x\n", fortyS_sortKey[i], fortyS_sortKey_reorder[i]);
6616 }
6617 fprintf(stderr, "\nfortyThreeP\n");
6618 for (i = 0; i < fortyThreeP_sortKey_Length; i++) {
6619 fprintf(stderr, "%x --- %x\n", fortyThreeP_sortKey[i], fortyThreeP_sortKey_reorder[i]);
6620 }
6621 */
6622 if (collResult != collResultReorder) {
6623 log_err_status(status, "ERROR: collation results should have been the same.\n");
6624 return;
6625 }
6626
6627 ucol_close(myCollation);
6628 ucol_close(myReorderCollation);
6629 }
6630
6631 static int compare_uint8_t_arrays(const uint8_t* a, const uint8_t* b)
6632 {
6633 for (; *a == *b; ++a, ++b) {
6634 if (*a == 0) {
6635 return 0;
6636 }
6637 }
6638 return (*a < *b ? -1 : 1);
6639 }
6640
6641 static void TestImportRulesDeWithPhonebook(void)
6642 {
6643 const char* normalRules[] = {
6644 "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc",
6645 "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc",
6646 "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc",
6647 };
6648 const OneTestCase normalTests[] = {
6649 { {0x00e6}, {0x00c6}, UCOL_LESS},
6650 { {0x00fc}, {0x00dc}, UCOL_GREATER},
6651 };
6652
6653 const char* importRules[] = {
6654 "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc[import de-u-co-phonebk]",
6655 "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
6656 "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
6657 };
6658 const OneTestCase importTests[] = {
6659 { {0x00e6}, {0x00c6}, UCOL_LESS},
6660 { {0x00fc}, {0x00dc}, UCOL_LESS},
6661 };
6662
6663 doTestOneTestCase(normalTests, LEN(normalTests), normalRules, LEN(normalRules));
6664 doTestOneTestCase(importTests, LEN(importTests), importRules, LEN(importRules));
6665 }
6666
6667 #if 0
6668 static void TestImportRulesFiWithEor(void)
6669 {
6670 /* DUCET. */
6671 const char* defaultRules[] = {
6672 "&a<b", /* Dummy rule. */
6673 };
6674
6675 const OneTestCase defaultTests[] = {
6676 { {0x0110}, {0x00F0}, UCOL_LESS},
6677 { {0x00a3}, {0x00a5}, UCOL_LESS},
6678 { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},
6679 };
6680
6681 /* European Ordering rules: ignore currency characters. */
6682 const char* eorRules[] = {
6683 "[import root-u-co-eor]",
6684 };
6685
6686 const OneTestCase eorTests[] = {
6687 { {0x0110}, {0x00F0}, UCOL_LESS},
6688 { {0x00a3}, {0x00a5}, UCOL_EQUAL},
6689 { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},
6690 };
6691
6692 const char* fiStdRules[] = {
6693 "[import fi-u-co-standard]",
6694 };
6695
6696 const OneTestCase fiStdTests[] = {
6697 { {0x0110}, {0x00F0}, UCOL_GREATER},
6698 { {0x00a3}, {0x00a5}, UCOL_LESS},
6699 { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},
6700 };
6701
6702 /* Both European Ordering Rules and Fi Standard Rules. */
6703 const char* eorFiStdRules[] = {
6704 "[import root-u-co-eor][import fi-u-co-standard]",
6705 };
6706
6707 /* This is essentially same as the one before once fi.txt is updated with import. */
6708 const char* fiEorRules[] = {
6709 "[import fi-u-co-eor]",
6710 };
6711
6712 const OneTestCase fiEorTests[] = {
6713 { {0x0110}, {0x00F0}, UCOL_GREATER},
6714 { {0x00a3}, {0x00a5}, UCOL_EQUAL},
6715 { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},
6716 };
6717
6718 doTestOneTestCase(defaultTests, LEN(defaultTests), defaultRules, LEN(defaultRules));
6719 doTestOneTestCase(eorTests, LEN(eorTests), eorRules, LEN(eorRules));
6720 doTestOneTestCase(fiStdTests, LEN(fiStdTests), fiStdRules, LEN(fiStdRules));
6721 doTestOneTestCase(fiEorTests, LEN(fiEorTests), eorFiStdRules, LEN(eorFiStdRules));
6722
6723 /* TODO: Fix ICU ticket #8962 by uncommenting the following test after fi.txt is updated with the following rule:
6724 eor{
6725 Sequence{
6726 "[import root-u-co-eor][import fi-u-co-standard]"
6727 }
6728 Version{"21.0"}
6729 }
6730 */
6731 /* doTestOneTestCase(fiEorTests, LEN(fiEorTests), fiEorRules, LEN(fiEorRules)); */
6732
6733 }
6734 #endif
6735
6736 #if 0
6737 /*
6738 * This test case tests inclusion with the unihan rules, but this cannot be included now, unless
6739 * the resource files are built with -includeUnihanColl option.
6740 * TODO: Uncomment this function and make it work when unihan rules are built by default.
6741 */
6742 static void TestImportRulesCJKWithUnihan(void)
6743 {
6744 /* DUCET. */
6745 const char* defaultRules[] = {
6746 "&a<b", /* Dummy rule. */
6747 };
6748
6749 const OneTestCase defaultTests[] = {
6750 { {0x3402}, {0x4e1e}, UCOL_GREATER},
6751 };
6752
6753 /* European Ordering rules: ignore currency characters. */
6754 const char* unihanRules[] = {
6755 "[import ko-u-co-unihan]",
6756 };
6757
6758 const OneTestCase unihanTests[] = {
6759 { {0x3402}, {0x4e1e}, UCOL_LESS},
6760 };
6761
6762 doTestOneTestCase(defaultTests, LEN(defaultTests), defaultRules, LEN(defaultRules));
6763 doTestOneTestCase(unihanTests, LEN(unihanTests), unihanRules, LEN(unihanRules));
6764
6765 }
6766 #endif
6767
6768 static void TestImport(void)
6769 {
6770 UCollator* vicoll;
6771 UCollator* escoll;
6772 UCollator* viescoll;
6773 UCollator* importviescoll;
6774 UParseError error;
6775 UErrorCode status = U_ZERO_ERROR;
6776 UChar* virules;
6777 int32_t viruleslength;
6778 UChar* esrules;
6779 int32_t esruleslength;
6780 UChar* viesrules;
6781 int32_t viesruleslength;
6782 char srules[500] = "[import vi][import es]";
6783 UChar rules[500];
6784 uint32_t length = 0;
6785 int32_t itemCount;
6786 int32_t i, k;
6787 UChar32 start;
6788 UChar32 end;
6789 UChar str[500];
6790 int32_t strLength;
6791
6792 uint8_t sk1[500];
6793 uint8_t sk2[500];
6794
6795 UBool b;
6796 USet* tailoredSet;
6797 USet* importTailoredSet;
6798
6799
6800 vicoll = ucol_open("vi", &status);
6801 if(U_FAILURE(status)){
6802 log_err_status(status, "ERROR: Call ucol_open(\"vi\", ...): %s\n", myErrorName(status));
6803 return;
6804 }
6805
6806 virules = (UChar*) ucol_getRules(vicoll, &viruleslength);
6807 escoll = ucol_open("es", &status);
6808 esrules = (UChar*) ucol_getRules(escoll, &esruleslength);
6809 viesrules = (UChar*)uprv_malloc((viruleslength+esruleslength+1)*sizeof(UChar*));
6810 viesrules[0] = 0;
6811 u_strcat(viesrules, virules);
6812 u_strcat(viesrules, esrules);
6813 viesruleslength = viruleslength + esruleslength;
6814 viescoll = ucol_openRules(viesrules, viesruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
6815
6816 /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
6817 length = u_unescape(srules, rules, 500);
6818 importviescoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
6819 if(U_FAILURE(status)){
6820 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
6821 return;
6822 }
6823
6824 tailoredSet = ucol_getTailoredSet(viescoll, &status);
6825 importTailoredSet = ucol_getTailoredSet(importviescoll, &status);
6826
6827 if(!uset_equals(tailoredSet, importTailoredSet)){
6828 log_err("Tailored sets not equal");
6829 }
6830
6831 uset_close(importTailoredSet);
6832
6833 itemCount = uset_getItemCount(tailoredSet);
6834
6835 for( i = 0; i < itemCount; i++){
6836 strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
6837 if(strLength < 2){
6838 for (; start <= end; start++){
6839 k = 0;
6840 U16_APPEND(str, k, 500, start, b);
6841 ucol_getSortKey(viescoll, str, 1, sk1, 500);
6842 ucol_getSortKey(importviescoll, str, 1, sk2, 500);
6843 if(compare_uint8_t_arrays(sk1, sk2) != 0){
6844 log_err("Sort key for %s not equal\n", str);
6845 break;
6846 }
6847 }
6848 }else{
6849 ucol_getSortKey(viescoll, str, strLength, sk1, 500);
6850 ucol_getSortKey(importviescoll, str, strLength, sk2, 500);
6851 if(compare_uint8_t_arrays(sk1, sk2) != 0){
6852 log_err("ZZSort key for %s not equal\n", str);
6853 break;
6854 }
6855
6856 }
6857 }
6858
6859 uset_close(tailoredSet);
6860
6861 uprv_free(viesrules);
6862
6863 ucol_close(vicoll);
6864 ucol_close(escoll);
6865 ucol_close(viescoll);
6866 ucol_close(importviescoll);
6867 }
6868
6869 static void TestImportWithType(void)
6870 {
6871 UCollator* vicoll;
6872 UCollator* decoll;
6873 UCollator* videcoll;
6874 UCollator* importvidecoll;
6875 UParseError error;
6876 UErrorCode status = U_ZERO_ERROR;
6877 const UChar* virules;
6878 int32_t viruleslength;
6879 const UChar* derules;
6880 int32_t deruleslength;
6881 UChar* viderules;
6882 int32_t videruleslength;
6883 const char srules[500] = "[import vi][import de-u-co-phonebk]";
6884 UChar rules[500];
6885 uint32_t length = 0;
6886 int32_t itemCount;
6887 int32_t i, k;
6888 UChar32 start;
6889 UChar32 end;
6890 UChar str[500];
6891 int32_t strLength;
6892
6893 uint8_t sk1[500];
6894 uint8_t sk2[500];
6895
6896 USet* tailoredSet;
6897 USet* importTailoredSet;
6898
6899 vicoll = ucol_open("vi", &status);
6900 if(U_FAILURE(status)){
6901 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
6902 return;
6903 }
6904 virules = ucol_getRules(vicoll, &viruleslength);
6905 /* decoll = ucol_open("de@collation=phonebook", &status); */
6906 decoll = ucol_open("de-u-co-phonebk", &status);
6907 if(U_FAILURE(status)){
6908 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
6909 return;
6910 }
6911
6912
6913 derules = ucol_getRules(decoll, &deruleslength);
6914 viderules = (UChar*)uprv_malloc((viruleslength+deruleslength+1)*sizeof(UChar*));
6915 viderules[0] = 0;
6916 u_strcat(viderules, virules);
6917 u_strcat(viderules, derules);
6918 videruleslength = viruleslength + deruleslength;
6919 videcoll = ucol_openRules(viderules, videruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
6920
6921 /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
6922 length = u_unescape(srules, rules, 500);
6923 importvidecoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
6924 if(U_FAILURE(status)){
6925 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
6926 return;
6927 }
6928
6929 tailoredSet = ucol_getTailoredSet(videcoll, &status);
6930 importTailoredSet = ucol_getTailoredSet(importvidecoll, &status);
6931
6932 if(!uset_equals(tailoredSet, importTailoredSet)){
6933 log_err("Tailored sets not equal");
6934 }
6935
6936 uset_close(importTailoredSet);
6937
6938 itemCount = uset_getItemCount(tailoredSet);
6939
6940 for( i = 0; i < itemCount; i++){
6941 strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
6942 if(strLength < 2){
6943 for (; start <= end; start++){
6944 k = 0;
6945 U16_APPEND_UNSAFE(str, k, start);
6946 ucol_getSortKey(videcoll, str, 1, sk1, 500);
6947 ucol_getSortKey(importvidecoll, str, 1, sk2, 500);
6948 if(compare_uint8_t_arrays(sk1, sk2) != 0){
6949 log_err("Sort key for %s not equal\n", str);
6950 break;
6951 }
6952 }
6953 }else{
6954 ucol_getSortKey(videcoll, str, strLength, sk1, 500);
6955 ucol_getSortKey(importvidecoll, str, strLength, sk2, 500);
6956 if(compare_uint8_t_arrays(sk1, sk2) != 0){
6957 log_err("Sort key for %s not equal\n", str);
6958 break;
6959 }
6960
6961 }
6962 }
6963
6964 uset_close(tailoredSet);
6965
6966 uprv_free(viderules);
6967
6968 ucol_close(videcoll);
6969 ucol_close(importvidecoll);
6970 ucol_close(vicoll);
6971 ucol_close(decoll);
6972 }
6973
6974 /* 'IV INTERNATIONAL SCIENTIFIC - PRACTICAL CONFERENCE "GEOPOLITICS, GEOECONOMICS AND INTERNATIONAL RELATIONS PROBLEMS" 22-23 June 2010, St. Petersburg, Russia' */
6975 static const UChar longUpperStr1[]= { /* 155 chars */
6976 0x49, 0x56, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C,
6977 0x20, 0x53, 0x43, 0x49, 0x45, 0x4E, 0x54, 0x49, 0x46, 0x49, 0x43, 0x20, 0x2D, 0x20, 0x50, 0x52,
6978 0x41, 0x43, 0x54, 0x49, 0x43, 0x41, 0x4C, 0x20, 0x43, 0x4F, 0x4E, 0x46, 0x45, 0x52, 0x45, 0x4E,
6979 0x43, 0x45, 0x20, 0x22, 0x47, 0x45, 0x4F, 0x50, 0x4F, 0x4C, 0x49, 0x54, 0x49, 0x43, 0x53, 0x2C,
6980 0x20, 0x47, 0x45, 0x4F, 0x45, 0x43, 0x4F, 0x4E, 0x4F, 0x4D, 0x49, 0x43, 0x53, 0x20, 0x41, 0x4E,
6981 0x44, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C, 0x20,
6982 0x52, 0x45, 0x4C, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x53, 0x20, 0x50, 0x52, 0x4F, 0x42, 0x4C, 0x45,
6983 0x4D, 0x53, 0x22, 0x20, 0x32, 0x32, 0x2D, 0x32, 0x33, 0x20, 0x4A, 0x75, 0x6E, 0x65, 0x20, 0x32,
6984 0x30, 0x31, 0x30, 0x2C, 0x20, 0x53, 0x74, 0x2E, 0x20, 0x50, 0x65, 0x74, 0x65, 0x72, 0x73, 0x62,
6985 0x75, 0x72, 0x67, 0x2C, 0x20, 0x52, 0x75, 0x73, 0x73, 0x69, 0x61
6986 };
6987
6988 /* 'BACEDIFOGUHAJEKILOMUNAPE ' with diacritics on vowels, repeated 5 times */
6989 static const UChar longUpperStr2[]= { /* 125 chars, > 128 collation elements */
6990 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
6991 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
6992 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
6993 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
6994 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20
6995 };
6996
6997 /* 'ABCDEFGHIJKLMNOPQRSTUVWXYZ ' repeated 12 times */
6998 static const UChar longUpperStr3[]= { /* 324 chars */
6999 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
7000 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
7001 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
7002 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
7003 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
7004 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
7005 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
7006 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
7007 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
7008 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
7009 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
7010 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20
7011 };
7012
7013 #define MY_ARRAY_LEN(array) (sizeof(array)/sizeof(array[0]))
7014
7015 typedef struct {
7016 const UChar * longUpperStrPtr;
7017 int32_t longUpperStrLen;
7018 } LongUpperStrItem;
7019
7020 /* String pointers must be in reverse collation order of the corresponding strings */
7021 static const LongUpperStrItem longUpperStrItems[] = {
7022 { longUpperStr1, MY_ARRAY_LEN(longUpperStr1) },
7023 { longUpperStr2, MY_ARRAY_LEN(longUpperStr2) },
7024 { longUpperStr3, MY_ARRAY_LEN(longUpperStr3) },
7025 { NULL, 0 }
7026 };
7027
7028 enum { kCollKeyLenMax = 800 }; /* longest expected is 749, but may change with collation changes */
7029
7030 /* Text fix for #8445; without fix, could have crash due to stack or heap corruption */
7031 static void TestCaseLevelBufferOverflow(void)
7032 {
7033 UErrorCode status = U_ZERO_ERROR;
7034 UCollator * ucol = ucol_open("root", &status);
7035 if ( U_SUCCESS(status) ) {
7036 ucol_setAttribute(ucol, UCOL_CASE_LEVEL, UCOL_ON, &status);
7037 if ( U_SUCCESS(status) ) {
7038 const LongUpperStrItem * itemPtr;
7039 uint8_t sortKeyA[kCollKeyLenMax], sortKeyB[kCollKeyLenMax];
7040 for ( itemPtr = longUpperStrItems; itemPtr->longUpperStrPtr != NULL; itemPtr++ ) {
7041 int32_t sortKeyLen;
7042 if (itemPtr > longUpperStrItems) {
7043 uprv_strcpy((char *)sortKeyB, (char *)sortKeyA);
7044 }
7045 sortKeyLen = ucol_getSortKey(ucol, itemPtr->longUpperStrPtr, itemPtr->longUpperStrLen, sortKeyA, kCollKeyLenMax);
7046 if (sortKeyLen <= 0 || sortKeyLen > kCollKeyLenMax) {
7047 log_err("ERROR sort key length from ucol_getSortKey is %d\n", sortKeyLen);
7048 break;
7049 }
7050 if ( itemPtr > longUpperStrItems ) {
7051 int compareResult = uprv_strcmp((char *)sortKeyA, (char *)sortKeyB);
7052 if (compareResult >= 0) {
7053 log_err("ERROR in sort key comparison result, expected -1, got %d\n", compareResult);
7054 }
7055 }
7056 }
7057 } else {
7058 log_err_status(status, "ERROR in ucol_setAttribute UCOL_CASE_LEVEL on: %s\n", myErrorName(status));
7059 }
7060 ucol_close(ucol);
7061 } else {
7062 log_err_status(status, "ERROR in ucol_open for root: %s\n", myErrorName(status));
7063 }
7064 }
7065
7066
7067 #define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x)
7068
7069 void addMiscCollTest(TestNode** root)
7070 {
7071 TEST(TestRuleOptions);
7072 TEST(TestBeforePrefixFailure);
7073 TEST(TestContractionClosure);
7074 TEST(TestPrefixCompose);
7075 TEST(TestStrCollIdenticalPrefix);
7076 TEST(TestPrefix);
7077 TEST(TestNewJapanese);
7078 /*TEST(TestLimitations);*/
7079 TEST(TestNonChars);
7080 TEST(TestExtremeCompression);
7081 TEST(TestSurrogates);
7082 TEST(TestVariableTopSetting);
7083 TEST(TestBocsuCoverage);
7084 TEST(TestCyrillicTailoring);
7085 TEST(TestCase);
7086 TEST(IncompleteCntTest);
7087 TEST(BlackBirdTest);
7088 TEST(FunkyATest);
7089 TEST(BillFairmanTest);
7090 TEST(RamsRulesTest);
7091 TEST(IsTailoredTest);
7092 TEST(TestCollations);
7093 TEST(TestChMove);
7094 TEST(TestImplicitTailoring);
7095 TEST(TestFCDProblem);
7096 TEST(TestEmptyRule);
7097 /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */
7098 TEST(TestJ815);
7099 /*TEST(TestJ831);*/ /* we changed lv locale */
7100 TEST(TestBefore);
7101 TEST(TestRedundantRules);
7102 TEST(TestExpansionSyntax);
7103 TEST(TestHangulTailoring);
7104 TEST(TestUCARules);
7105 TEST(TestIncrementalNormalize);
7106 TEST(TestComposeDecompose);
7107 TEST(TestCompressOverlap);
7108 TEST(TestContraction);
7109 TEST(TestExpansion);
7110 /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys */
7111 /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported */
7112 TEST(TestOptimize);
7113 TEST(TestSuppressContractions);
7114 TEST(Alexis2);
7115 TEST(TestHebrewUCA);
7116 TEST(TestPartialSortKeyTermination);
7117 TEST(TestSettings);
7118 TEST(TestEquals);
7119 TEST(TestJ2726);
7120 TEST(NullRule);
7121 TEST(TestNumericCollation);
7122 TEST(TestTibetanConformance);
7123 TEST(TestPinyinProblem);
7124 TEST(TestImplicitGeneration);
7125 TEST(TestSeparateTrees);
7126 TEST(TestBeforePinyin);
7127 TEST(TestBeforeTightening);
7128 /*TEST(TestMoreBefore);*/
7129 TEST(TestTailorNULL);
7130 TEST(TestUpperFirstQuaternary);
7131 TEST(TestJ4960);
7132 TEST(TestJ5223);
7133 TEST(TestJ5232);
7134 TEST(TestJ5367);
7135 TEST(TestHiragana);
7136 TEST(TestSortKeyConsistency);
7137 TEST(TestVI5913); /* VI, RO tailored rules */
7138 TEST(TestCroatianSortKey);
7139 TEST(TestTailor6179);
7140 TEST(TestUCAPrecontext);
7141 TEST(TestOutOfBuffer5468);
7142 TEST(TestSameStrengthList);
7143
7144 TEST(TestSameStrengthListQuoted);
7145 TEST(TestSameStrengthListSupplemental);
7146 TEST(TestSameStrengthListQwerty);
7147 TEST(TestSameStrengthListQuotedQwerty);
7148 TEST(TestSameStrengthListRanges);
7149 TEST(TestSameStrengthListSupplementalRanges);
7150 TEST(TestSpecialCharacters);
7151 TEST(TestPrivateUseCharacters);
7152 TEST(TestPrivateUseCharactersInList);
7153 TEST(TestPrivateUseCharactersInRange);
7154 TEST(TestInvalidListsAndRanges);
7155 TEST(TestImportRulesDeWithPhonebook);
7156 /* TEST(TestImportRulesFiWithEor); EOR rules removed from CLDR 21 */
7157 /* TEST(TestImportRulesCJKWithUnihan); */
7158 TEST(TestImport);
7159 TEST(TestImportWithType);
7160
7161 TEST(TestBeforeRuleWithScriptReordering);
7162 TEST(TestNonLeadBytesDuringCollationReordering);
7163 TEST(TestReorderingAPI);
7164 TEST(TestReorderingAPIWithRuleCreatedCollator);
7165 TEST(TestEquivalentReorderingScripts);
7166 TEST(TestGreekFirstReorder);
7167 TEST(TestGreekLastReorder);
7168 TEST(TestNonScriptReorder);
7169 TEST(TestHaniReorder);
7170 TEST(TestHaniReorderWithOtherRules);
7171 TEST(TestMultipleReorder);
7172 TEST(TestReorderingAcrossCloning);
7173 TEST(TestReorderWithNumericCollation);
7174
7175 TEST(TestCaseLevelBufferOverflow);
7176 }
7177
7178 #endif /* #if !UCONFIG_NO_COLLATION */