]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/cintltst/cmsccoll.c
ICU-3.13.tar.gz
[apple/icu.git] / icuSources / test / cintltst / cmsccoll.c
CommitLineData
b75a7d8f
A
1/********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 2001-2003, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6/*******************************************************************************
7*
8* File cmsccoll.C
9*
10*******************************************************************************/
11/**
12 * These are the tests specific to ICU 1.8 and above, that I didn't know where
13 * to fit.
14 */
15
16#include <stdio.h>
17
18#include "unicode/utypes.h"
19
20#if !UCONFIG_NO_COLLATION
21
22#include "unicode/ucol.h"
23#include "unicode/ucoleitr.h"
24#include "unicode/uloc.h"
25#include "cintltst.h"
26#include "ccolltst.h"
27#include "callcoll.h"
28#include "unicode/ustring.h"
29#include "string.h"
30#include "ucol_imp.h"
31#include "ucol_tok.h"
32#include "cmemory.h"
33#include "cstring.h"
34#include "unicode/parseerr.h"
35#include "unicode/ucnv.h"
36#include "uparse.h"
37
38#define MAX_TOKEN_LEN 16
39#define RULE_BUFFER_LEN 8192
40
41void genericLocaleStarter(const char *locale, const char *s[], uint32_t size); /* keep gcc happy */
42
43typedef int tst_strcoll(void *collator, const int object,
44 const UChar *source, const int sLen,
45 const UChar *target, const int tLen);
46
47
48/**
49 * Return an integer array containing all of the collation orders
50 * returned by calls to next on the specified iterator
51 */
52static int32_t* getOrders(UCollationElements *iter, int32_t *orderLength)
53{
54 UErrorCode status;
55 int32_t order;
56 int32_t maxSize = 100;
57 int32_t size = 0;
58 int32_t *temp;
59 int32_t *orders =(int32_t*)malloc(sizeof(int32_t) * maxSize);
60 status= U_ZERO_ERROR;
61
62
63 while ((order=ucol_next(iter, &status)) != UCOL_NULLORDER)
64 {
65 if (size == maxSize)
66 {
67 maxSize *= 2;
68 temp = (int32_t*)malloc(sizeof(int32_t) * maxSize);
69
70 memcpy(temp, orders, size * sizeof(int32_t));
71 free(orders);
72 orders = temp;
73
74 }
75
76 orders[size++] = order;
77 }
78
79 if (maxSize > size && size > 0)
80 {
81 temp = (int32_t*)malloc(sizeof(int32_t) * size);
82
83 memcpy(temp, orders, size * sizeof(int32_t));
84 free(orders);
85 orders = temp;
86
87
88 }
89
90 *orderLength = size;
91 return orders;
92}
93
94static void backAndForth(UCollationElements *iter)
95{
96 /* Run through the iterator forwards and stick it into an array */
97 int32_t index, o;
98 UErrorCode status = U_ZERO_ERROR;
99 int32_t orderLength = 0;
100 int32_t *orders;
101 orders= getOrders(iter, &orderLength);
102
103
104 /* Now go through it backwards and make sure we get the same values */
105 index = orderLength;
106 ucol_reset(iter);
107
108 /* synwee : changed */
109 while ((o = ucol_previous(iter, &status)) != UCOL_NULLORDER)
110 {
111 if (o != orders[-- index])
112 {
113 if (o == 0)
114 index ++;
115 else
116 {
117 while (index > 0 && orders[-- index] == 0)
118 {
119 }
120 if (o != orders[index])
121 {
122 log_err("Mismatch at index : %d\n", index);
123 break;
124 }
125 }
126 }
127 }
128
129 while (index != 0 && orders[index - 1] == 0) {
130 index --;
131 }
132
133 if (index != 0)
134 {
135 log_err("Didn't get back to beginning - index is %d\n", index);
136
137 ucol_reset(iter);
138 log_err("\nnext: ");
139 while ((o = ucol_next(iter, &status)) != UCOL_NULLORDER)
140 {
141 log_err("Error at %d\n", o);
142 }
143 log_err("\nprev: ");
144 while ((o = ucol_previous(iter, &status)) != UCOL_NULLORDER)
145 {
146 log_err("Error at %d\n", o);
147 }
148 log_verbose("\n");
149 }
150
151 free(orders);
152}
153
154const static char cnt1[][10] = {
155
156 "AA",
157 "AC",
158 "AZ",
159 "AQ",
160 "AB",
161 "ABZ",
162 "ABQ",
163 "Z",
164 "ABC",
165 "Q",
166 "B"
167};
168
169const static char cnt2[][10] = {
170 "DA",
171 "DAD",
172 "DAZ",
173 "MAR",
174 "Z",
175 "DAVIS",
176 "MARK",
177 "DAV",
178 "DAVI"
179};
180
181static void IncompleteCntTest(void)
182{
183 UErrorCode status = U_ZERO_ERROR;
184 UChar temp[90];
185 UChar t1[90];
186 UChar t2[90];
187
188 UCollator *coll = NULL;
189 uint32_t i = 0, j = 0;
190 uint32_t size = 0;
191
192 u_uastrcpy(temp, " & Z < ABC < Q < B");
193
194 coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
195
196 if(U_SUCCESS(status)) {
197 size = sizeof(cnt1)/sizeof(cnt1[0]);
198 for(i = 0; i < size-1; i++) {
199 for(j = i+1; j < size; j++) {
200 UCollationElements *iter;
201 u_uastrcpy(t1, cnt1[i]);
202 u_uastrcpy(t2, cnt1[j]);
203 doTest(coll, t1, t2, UCOL_LESS);
204 /* synwee : added collation element iterator test */
205 iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
206 if (U_FAILURE(status)) {
207 log_err("Creation of iterator failed\n");
208 break;
209 }
210 backAndForth(iter);
211 ucol_closeElements(iter);
212 }
213 }
214 }
215
216 ucol_close(coll);
217
218
219 u_uastrcpy(temp, " & Z < DAVIS < MARK <DAV");
220 coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
221
222 if(U_SUCCESS(status)) {
223 size = sizeof(cnt2)/sizeof(cnt2[0]);
224 for(i = 0; i < size-1; i++) {
225 for(j = i+1; j < size; j++) {
226 UCollationElements *iter;
227 u_uastrcpy(t1, cnt2[i]);
228 u_uastrcpy(t2, cnt2[j]);
229 doTest(coll, t1, t2, UCOL_LESS);
230
231 /* synwee : added collation element iterator test */
232 iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
233 if (U_FAILURE(status)) {
234 log_err("Creation of iterator failed\n");
235 break;
236 }
237 backAndForth(iter);
238 ucol_closeElements(iter);
239 }
240 }
241 }
242
243 ucol_close(coll);
244
245
246}
247
248const static char shifted[][20] = {
249 "black bird",
250 "black-bird",
251 "blackbird",
252 "black Bird",
253 "black-Bird",
254 "blackBird",
255 "black birds",
256 "black-birds",
257 "blackbirds"
258};
259
260const static UCollationResult shiftedTert[] = {
261 0,
262 UCOL_EQUAL,
263 UCOL_EQUAL,
264 UCOL_LESS,
265 UCOL_EQUAL,
266 UCOL_EQUAL,
267 UCOL_LESS,
268 UCOL_EQUAL,
269 UCOL_EQUAL
270};
271
272const static char nonignorable[][20] = {
273 "black bird",
274 "black Bird",
275 "black birds",
276 "black-bird",
277 "black-Bird",
278 "black-birds",
279 "blackbird",
280 "blackBird",
281 "blackbirds"
282};
283
284static void BlackBirdTest(void) {
285 UErrorCode status = U_ZERO_ERROR;
286 UChar t1[90];
287 UChar t2[90];
288
289 uint32_t i = 0, j = 0;
290 uint32_t size = 0;
291 UCollator *coll = ucol_open("en_US", &status);
292
293 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
294 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status);
295
296 if(U_SUCCESS(status)) {
297 size = sizeof(nonignorable)/sizeof(nonignorable[0]);
298 for(i = 0; i < size-1; i++) {
299 for(j = i+1; j < size; j++) {
300 u_uastrcpy(t1, nonignorable[i]);
301 u_uastrcpy(t2, nonignorable[j]);
302 doTest(coll, t1, t2, UCOL_LESS);
303 }
304 }
305 }
306
307 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
308 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
309
310 if(U_SUCCESS(status)) {
311 size = sizeof(shifted)/sizeof(shifted[0]);
312 for(i = 0; i < size-1; i++) {
313 for(j = i+1; j < size; j++) {
314 u_uastrcpy(t1, shifted[i]);
315 u_uastrcpy(t2, shifted[j]);
316 doTest(coll, t1, t2, UCOL_LESS);
317 }
318 }
319 }
320
321 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status);
322 if(U_SUCCESS(status)) {
323 size = sizeof(shifted)/sizeof(shifted[0]);
324 for(i = 1; i < size; i++) {
325 u_uastrcpy(t1, shifted[i-1]);
326 u_uastrcpy(t2, shifted[i]);
327 doTest(coll, t1, t2, shiftedTert[i]);
328 }
329 }
330
331 ucol_close(coll);
332}
333
334const static UChar testSourceCases[][MAX_TOKEN_LEN] = {
335 {0x0041/*'A'*/, 0x0300, 0x0301, 0x0000},
336 {0x0041/*'A'*/, 0x0300, 0x0316, 0x0000},
337 {0x0041/*'A'*/, 0x0300, 0x0000},
338 {0x00C0, 0x0301, 0x0000},
339 /* this would work with forced normalization */
340 {0x00C0, 0x0316, 0x0000}
341};
342
343const static UChar testTargetCases[][MAX_TOKEN_LEN] = {
344 {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
345 {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000},
346 {0x00C0, 0},
347 {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
348 /* this would work with forced normalization */
349 {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000}
350};
351
352const static UCollationResult results[] = {
353 UCOL_GREATER,
354 UCOL_EQUAL,
355 UCOL_EQUAL,
356 UCOL_GREATER,
357 UCOL_EQUAL
358};
359
360static void FunkyATest(void)
361{
362
363 int32_t i;
364 UErrorCode status = U_ZERO_ERROR;
365 UCollator *myCollation;
366 myCollation = ucol_open("en_US", &status);
367 if(U_FAILURE(status)){
368 log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
369 return;
370 }
371 log_verbose("Testing some A letters, for some reason\n");
372 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
373 ucol_setStrength(myCollation, UCOL_TERTIARY);
374 for (i = 0; i < 4 ; i++)
375 {
376 doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
377 }
378 ucol_close(myCollation);
379}
380
381UColAttributeValue caseFirst[] = {
382 UCOL_OFF,
383 UCOL_LOWER_FIRST,
384 UCOL_UPPER_FIRST
385};
386
387
388UColAttributeValue alternateHandling[] = {
389 UCOL_NON_IGNORABLE,
390 UCOL_SHIFTED
391};
392
393UColAttributeValue caseLevel[] = {
394 UCOL_OFF,
395 UCOL_ON
396};
397
398UColAttributeValue strengths[] = {
399 UCOL_PRIMARY,
400 UCOL_SECONDARY,
401 UCOL_TERTIARY,
402 UCOL_QUATERNARY,
403 UCOL_IDENTICAL
404};
405
406#if 0
407static const char * strengthsC[] = {
408 "UCOL_PRIMARY",
409 "UCOL_SECONDARY",
410 "UCOL_TERTIARY",
411 "UCOL_QUATERNARY",
412 "UCOL_IDENTICAL"
413};
414
415static const char * caseFirstC[] = {
416 "UCOL_OFF",
417 "UCOL_LOWER_FIRST",
418 "UCOL_UPPER_FIRST"
419};
420
421
422static const char * alternateHandlingC[] = {
423 "UCOL_NON_IGNORABLE",
424 "UCOL_SHIFTED"
425};
426
427static const char * caseLevelC[] = {
428 "UCOL_OFF",
429 "UCOL_ON"
430};
431
432/* not used currently - does not test only prints */
433static void PrintMarkDavis(void)
434{
435 UErrorCode status = U_ZERO_ERROR;
436 UChar m[256];
437 uint8_t sortkey[256];
438 UCollator *coll = ucol_open("en_US", &status);
439 uint32_t h,i,j,k, sortkeysize;
440 uint32_t sizem = 0;
441 char buffer[512];
442 uint32_t len = 512;
443
444 log_verbose("PrintMarkDavis");
445
446 u_uastrcpy(m, "Mark Davis");
447 sizem = u_strlen(m);
448
449
450 m[1] = 0xe4;
451
452 for(i = 0; i<sizem; i++) {
453 fprintf(stderr, "\\u%04X ", m[i]);
454 }
455 fprintf(stderr, "\n");
456
457 for(h = 0; h<sizeof(caseFirst)/sizeof(caseFirst[0]); h++) {
458 ucol_setAttribute(coll, UCOL_CASE_FIRST, caseFirst[i], &status);
459 fprintf(stderr, "caseFirst: %s\n", caseFirstC[h]);
460
461 for(i = 0; i<sizeof(alternateHandling)/sizeof(alternateHandling[0]); i++) {
462 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, alternateHandling[i], &status);
463 fprintf(stderr, " AltHandling: %s\n", alternateHandlingC[i]);
464
465 for(j = 0; j<sizeof(caseLevel)/sizeof(caseLevel[0]); j++) {
466 ucol_setAttribute(coll, UCOL_CASE_LEVEL, caseLevel[j], &status);
467 fprintf(stderr, " caseLevel: %s\n", caseLevelC[j]);
468
469 for(k = 0; k<sizeof(strengths)/sizeof(strengths[0]); k++) {
470 ucol_setAttribute(coll, UCOL_STRENGTH, strengths[k], &status);
471 sortkeysize = ucol_getSortKey(coll, m, sizem, sortkey, 256);
472 fprintf(stderr, " strength: %s\n Sortkey: ", strengthsC[k]);
473 fprintf(stderr, "%s\n", ucol_sortKeyToString(coll, sortkey, buffer, &len));
474 }
475
476 }
477
478 }
479
480 }
481}
482#endif
483
484static void BillFairmanTest(void) {
485/*
486** check for actual locale via ICU resource bundles
487**
488** lp points to the original locale ("fr_FR_....")
489*/
490
491 UResourceBundle *lr,*cr;
492 UErrorCode lec = U_ZERO_ERROR;
493 const char *lp = "fr_FR_you_ll_never_find_this_locale";
494
495 log_verbose("BillFairmanTest\n");
496
497 lr = ures_open(NULL,lp,&lec);
498 if (lr) {
499 cr = ures_getByKey(lr,"CollationElements",0,&lec);
500 if (cr) {
501 lp = ures_getLocale(cr,&lec);
502 if (lp) {
503 if (U_SUCCESS(lec)) {
504 if(strcmp(lp, "fr") != 0) {
505 log_err("Wrong locale for French Collation Data, expected \"fr\" got %s", lp);
506 }
507 }
508 }
509 ures_close(cr);
510 }
511 ures_close(lr);
512 }
513}
514
515static void testPrimary(UCollator* col, const UChar* p,const UChar* q){
516 UChar source[256] = { '\0'};
517 UChar target[256] = { '\0'};
518 UChar preP = 0x31a3;
519 UChar preQ = 0x310d;
520/*
521 UChar preP = (*p>0x0400 && *p<0x0500)?0x00e1:0x491;
522 UChar preQ = (*p>0x0400 && *p<0x0500)?0x0041:0x413;
523*/
524 /*log_verbose("Testing primary\n");*/
525
526 doTest(col, p, q, UCOL_LESS);
527/*
528 UCollationResult result = ucol_strcoll(col,p,u_strlen(p),q,u_strlen(q));
529
530 if(result!=UCOL_LESS){
531 aescstrdup(p,utfSource,256);
532 aescstrdup(q,utfTarget,256);
533 fprintf(file,"Primary failed source: %s target: %s \n", utfSource,utfTarget);
534 }
535*/
536 source[0] = preP;
537 u_strcpy(source+1,p);
538 target[0] = preQ;
539 u_strcpy(target+1,q);
540 doTest(col, source, target, UCOL_LESS);
541/*
542 fprintf(file,"Primary swamps 2nd failed source: %s target: %s \n", utfSource,utfTarget);
543*/
544}
545
546static void testSecondary(UCollator* col, const UChar* p,const UChar* q){
547 UChar source[256] = { '\0'};
548 UChar target[256] = { '\0'};
549
550 /*log_verbose("Testing secondary\n");*/
551
552 doTest(col, p, q, UCOL_LESS);
553/*
554 fprintf(file,"secondary failed source: %s target: %s \n", utfSource,utfTarget);
555*/
556 source[0] = 0x0053;
557 u_strcpy(source+1,p);
558 target[0]= 0x0073;
559 u_strcpy(target+1,q);
560
561 doTest(col, source, target, UCOL_LESS);
562/*
563 fprintf(file,"secondary swamps 3rd failed source: %s target: %s \n",utfSource,utfTarget);
564*/
565
566
567 u_strcpy(source,p);
568 source[u_strlen(p)] = 0x62;
569 source[u_strlen(p)+1] = 0;
570
571
572 u_strcpy(target,q);
573 target[u_strlen(q)] = 0x61;
574 target[u_strlen(q)+1] = 0;
575
576 doTest(col, source, target, UCOL_GREATER);
577
578/*
579 fprintf(file,"secondary is swamped by 1 failed source: %s target: %s \n",utfSource,utfTarget);
580*/
581}
582
583static void testTertiary(UCollator* col, const UChar* p,const UChar* q){
584 UChar source[256] = { '\0'};
585 UChar target[256] = { '\0'};
586
587 /*log_verbose("Testing tertiary\n");*/
588
589 doTest(col, p, q, UCOL_LESS);
590/*
591 fprintf(file,"Tertiary failed source: %s target: %s \n",utfSource,utfTarget);
592*/
593 source[0] = 0x0020;
594 u_strcpy(source+1,p);
595 target[0]= 0x002D;
596 u_strcpy(target+1,q);
597
598 doTest(col, source, target, UCOL_LESS);
599/*
600 fprintf(file,"Tertiary swamps 4th failed source: %s target: %s \n", utfSource,utfTarget);
601*/
602
603 u_strcpy(source,p);
604 source[u_strlen(p)] = 0xE0;
605 source[u_strlen(p)+1] = 0;
606
607 u_strcpy(target,q);
608 target[u_strlen(q)] = 0x61;
609 target[u_strlen(q)+1] = 0;
610
611 doTest(col, source, target, UCOL_GREATER);
612
613/*
614 fprintf(file,"Tertiary is swamped by 3rd failed source: %s target: %s \n",utfSource,utfTarget);
615*/
616}
617
618static void testEquality(UCollator* col, const UChar* p,const UChar* q){
619/*
620 UChar source[256] = { '\0'};
621 UChar target[256] = { '\0'};
622*/
623
624 doTest(col, p, q, UCOL_EQUAL);
625/*
626 fprintf(file,"Primary failed source: %s target: %s \n", utfSource,utfTarget);
627*/
628}
629
630static void testCollator(UCollator *coll, UErrorCode *status) {
631 const UChar *rules = NULL, *current = NULL;
632 int32_t ruleLen = 0;
633 uint32_t strength = 0;
634 uint32_t chOffset = 0; uint32_t chLen = 0;
635 uint32_t exOffset = 0; uint32_t exLen = 0;
636 uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
637 uint32_t firstEx = 0;
638/* uint32_t rExpsLen = 0; */
639 uint32_t firstLen = 0;
640 UBool varT = FALSE; UBool top_ = TRUE;
641 uint16_t specs = 0;
642 UBool startOfRules = TRUE;
643 UBool lastReset = FALSE;
644 UBool before = FALSE;
645 UColTokenParser src;
646 UColOptionSet opts;
647
648 UChar first[256];
649 UChar second[256];
650 UChar tempB[256];
651 uint32_t tempLen;
652 UChar *rulesCopy = NULL;
653 UParseError parseError;
654 src.opts = &opts;
655
656 rules = ucol_getRules(coll, &ruleLen);
657 if(U_SUCCESS(*status) && ruleLen > 0) {
658 rulesCopy = (UChar *)malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
659 uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
660 src.current = src.source = rulesCopy;
661 src.end = rulesCopy+ruleLen;
662 src.extraCurrent = src.end;
663 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
664 *first = *second = 0;
665
666 while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, status)) != NULL) {
667 strength = src.parsedToken.strength;
668 chOffset = src.parsedToken.charsOffset;
669 chLen = src.parsedToken.charsLen;
670 exOffset = src.parsedToken.extensionOffset;
671 exLen = src.parsedToken.extensionLen;
672 prefixOffset = src.parsedToken.prefixOffset;
673 prefixLen = src.parsedToken.prefixLen;
674 specs = src.parsedToken.flags;
675
676 startOfRules = FALSE;
677 varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
678 top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
679 if(top_) { /* if reset is on top, we should just continue */
680 continue;
681 }
682 u_strncpy(second,rulesCopy+chOffset, chLen);
683 second[chLen] = 0;
684
685 if(exLen > 0 && firstEx == 0) {
686 u_strncat(first, rulesCopy+exOffset, exLen);
687 first[firstLen+exLen] = 0;
688 }
689
690 if(lastReset == TRUE && prefixLen != 0) {
691 u_strncpy(first+prefixLen, first, firstLen);
692 u_strncpy(first, rulesCopy+prefixOffset, prefixLen);
693 first[firstLen+prefixLen] = 0;
694 firstLen = firstLen+prefixLen;
695 }
696
697 if(before == TRUE) { /* swap first and second */
698 u_strcpy(tempB, first);
699 u_strcpy(first, second);
700 u_strcpy(second, tempB);
701
702 tempLen = firstLen;
703 firstLen = chLen;
704 chLen = tempLen;
705
706 tempLen = firstEx;
707 firstEx = exLen;
708 exLen = tempLen;
709 }
710
711 lastReset = FALSE;
712
713 switch(strength){
714 case UCOL_IDENTICAL:
715 testEquality(coll,first,second);
716 break;
717 case UCOL_PRIMARY:
718 testPrimary(coll,first,second);
719 break;
720 case UCOL_SECONDARY:
721 testSecondary(coll,first,second);
722 break;
723 case UCOL_TERTIARY:
724 testTertiary(coll,first,second);
725 break;
726 case UCOL_TOK_RESET:
727 lastReset = TRUE;
728 before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);
729 break;
730 default:
731 break;
732 }
733
734 if(before == TRUE && strength != UCOL_TOK_RESET) { /* first and second were swapped */
735 before = FALSE;
736 } else {
737 firstLen = chLen;
738 firstEx = exLen;
739 u_strcpy(first, second);
740 }
741 }
742 free(rulesCopy);
743 }
744}
745
746static int ucaTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) {
747 UCollator *UCA = (UCollator *)collator;
748 return ucol_strcoll(UCA, source, sLen, target, tLen);
749}
750
751/*
752static int winTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) {
753#ifdef WIN32
754 LCID lcid = (LCID)collator;
755 return CompareString(lcid, 0, source, sLen, target, tLen);
756#else
757 return 0;
758#endif
759}
760*/
761
762static UCollationResult swampEarlier(tst_strcoll* func, void *collator, int opts,
763 UChar s1, UChar s2,
764 const UChar *s, const uint32_t sLen,
765 const UChar *t, const uint32_t tLen) {
766 UChar source[256] = {0};
767 UChar target[256] = {0};
768
769 source[0] = s1;
770 u_strcpy(source+1, s);
771 target[0] = s2;
772 u_strcpy(target+1, t);
773
774 return func(collator, opts, source, sLen+1, target, tLen+1);
775}
776
777static UCollationResult swampLater(tst_strcoll* func, void *collator, int opts,
778 UChar s1, UChar s2,
779 const UChar *s, const uint32_t sLen,
780 const UChar *t, const uint32_t tLen) {
781 UChar source[256] = {0};
782 UChar target[256] = {0};
783
784 u_strcpy(source, s);
785 source[sLen] = s1;
786 u_strcpy(target, t);
787 target[tLen] = s2;
788
789 return func(collator, opts, source, sLen+1, target, tLen+1);
790}
791
792static uint32_t probeStrength(tst_strcoll* func, void *collator, int opts,
793 const UChar *s, const uint32_t sLen,
794 const UChar *t, const uint32_t tLen,
795 UCollationResult result) {
796 /*UChar fPrimary = 0x6d;*/
797 /*UChar sPrimary = 0x6e;*/
798 UChar fSecondary = 0x310d;
799 UChar sSecondary = 0x31a3;
800 UChar fTertiary = 0x310f;
801 UChar sTertiary = 0x31b7;
802
803 UCollationResult oposite;
804 if(result == UCOL_EQUAL) {
805 return UCOL_IDENTICAL;
806 } else if(result == UCOL_GREATER) {
807 oposite = UCOL_LESS;
808 } else {
809 oposite = UCOL_GREATER;
810 }
811
812 if(swampEarlier(func, collator, opts, sSecondary, fSecondary, s, sLen, t, tLen) == result) {
813 return UCOL_PRIMARY;
814 } else if((swampEarlier(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == result) &&
815 (swampEarlier(func, collator, opts, 0x310f, sTertiary, s, sLen, t, tLen) == result)) {
816 return UCOL_SECONDARY;
817 } else if((swampLater(func, collator, opts, sTertiary, fTertiary, s, sLen, t, tLen) == result) &&
818 (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == result)) {
819 return UCOL_TERTIARY;
820 } else if((swampLater(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == oposite) &&
821 (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == oposite)) {
822 return UCOL_QUATERNARY;
823 } else {
824 return UCOL_IDENTICAL;
825 }
826}
827
828static char *getRelationSymbol(UCollationResult res, uint32_t strength, char *buffer) {
829 uint32_t i = 0;
830
831 if(res == UCOL_EQUAL || strength == 0xdeadbeef) {
832 buffer[0] = '=';
833 buffer[1] = '=';
834 buffer[2] = '\0';
835 } else if(res == UCOL_GREATER) {
836 for(i = 0; i<strength+1; i++) {
837 buffer[i] = '>';
838 }
839 buffer[strength+1] = '\0';
840 } else {
841 for(i = 0; i<strength+1; i++) {
842 buffer[i] = '<';
843 }
844 buffer[strength+1] = '\0';
845 }
846
847 return buffer;
848}
849
850
851
852static void logFailure (const char *platform, const char *test,
853 const UChar *source, const uint32_t sLen,
854 const UChar *target, const uint32_t tLen,
855 UCollationResult realRes, uint32_t realStrength,
856 UCollationResult expRes, uint32_t expStrength, UBool error) {
857
858 uint32_t i = 0;
859
860 char sEsc[256], s[256], tEsc[256], t[256], b[256], output[256], relation[256];
861
862 *sEsc = *tEsc = *s = *t = 0;
863 if(error == TRUE) {
864 log_err("Difference between expected and generated order. Run test with -v for more info\n");
865 }
866 for(i = 0; i<sLen; i++) {
867 sprintf(b, "%04X", source[i]);
868 strcat(sEsc, "\\u");
869 strcat(sEsc, b);
870 strcat(s, b);
871 strcat(s, " ");
872 if(source[i] < 0x80) {
873 sprintf(b, "(%c)", source[i]);
874 strcat(sEsc, b);
875 }
876 }
877 for(i = 0; i<tLen; i++) {
878 sprintf(b, "%04X", target[i]);
879 strcat(tEsc, "\\u");
880 strcat(tEsc, b);
881 strcat(t, b);
882 strcat(t, " ");
883 if(target[i] < 0x80) {
884 sprintf(b, "(%c)", target[i]);
885 strcat(tEsc, b);
886 }
887 }
888/*
889 strcpy(output, "[[ ");
890 strcat(output, sEsc);
891 strcat(output, getRelationSymbol(expRes, expStrength, relation));
892 strcat(output, tEsc);
893
894 strcat(output, " : ");
895
896 strcat(output, sEsc);
897 strcat(output, getRelationSymbol(realRes, realStrength, relation));
898 strcat(output, tEsc);
899 strcat(output, " ]] ");
900
901 log_verbose("%s", output);
902*/
903
904
905 strcpy(output, "DIFF: ");
906
907 strcat(output, s);
908 strcat(output, " : ");
909 strcat(output, t);
910
911 strcat(output, test);
912 strcat(output, ": ");
913
914 strcat(output, sEsc);
915 strcat(output, getRelationSymbol(expRes, expStrength, relation));
916 strcat(output, tEsc);
917
918 strcat(output, " ");
919
920 strcat(output, platform);
921 strcat(output, ": ");
922
923 strcat(output, sEsc);
924 strcat(output, getRelationSymbol(realRes, realStrength, relation));
925 strcat(output, tEsc);
926
927 log_verbose("%s\n", output);
928
929}
930
931/*
932static void printOutRules(const UChar *rules) {
933 uint32_t len = u_strlen(rules);
934 uint32_t i = 0;
935 char toPrint;
936 uint32_t line = 0;
937
938 fprintf(stdout, "Rules:");
939
940 for(i = 0; i<len; i++) {
941 if(rules[i]<0x7f && rules[i]>=0x20) {
942 toPrint = (char)rules[i];
943 if(toPrint == '&') {
944 line = 1;
945 fprintf(stdout, "\n&");
946 } else if(toPrint == ';') {
947 fprintf(stdout, "<<");
948 line+=2;
949 } else if(toPrint == ',') {
950 fprintf(stdout, "<<<");
951 line+=3;
952 } else {
953 fprintf(stdout, "%c", toPrint);
954 line++;
955 }
956 } else if(rules[i]<0x3400 || rules[i]>=0xa000) {
957 fprintf(stdout, "\\u%04X", rules[i]);
958 line+=6;
959 }
960 if(line>72) {
961 fprintf(stdout, "\n");
962 line = 0;
963 }
964 }
965
966 log_verbose("\n");
967
968}
969*/
970
971static uint32_t testSwitch(tst_strcoll* func, void *collator, int opts, uint32_t strength, const UChar *first, const UChar *second, const char* msg, UBool error) {
972 uint32_t diffs = 0;
973 UCollationResult realResult;
974 uint32_t realStrength;
975
976 uint32_t sLen = u_strlen(first);
977 uint32_t tLen = u_strlen(second);
978
979 realResult = func(collator, opts, first, sLen, second, tLen);
980 realStrength = probeStrength(func, collator, opts, first, sLen, second, tLen, realResult);
981
982 if(strength == UCOL_IDENTICAL && realResult != UCOL_IDENTICAL) {
983 logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_EQUAL, strength, error);
984 diffs++;
985 } else if(realResult != UCOL_LESS || realStrength != strength) {
986 logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_LESS, strength, error);
987 diffs++;
988 }
989 return diffs;
990}
991
992
993static void testAgainstUCA(UCollator *coll, UCollator *UCA, const char *refName, UBool error, UErrorCode *status) {
994 const UChar *rules = NULL, *current = NULL;
995 int32_t ruleLen = 0;
996 uint32_t strength = 0;
997 uint32_t chOffset = 0; uint32_t chLen = 0;
998 uint32_t exOffset = 0; uint32_t exLen = 0;
999 uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
1000/* uint32_t rExpsLen = 0; */
1001 uint32_t firstLen = 0, secondLen = 0;
1002 UBool varT = FALSE; UBool top_ = TRUE;
1003 uint16_t specs = 0;
1004 UBool startOfRules = TRUE;
1005 UColTokenParser src;
1006 UColOptionSet opts;
1007
1008 UChar first[256];
1009 UChar second[256];
1010 UChar *rulesCopy = NULL;
1011
1012 uint32_t UCAdiff = 0;
1013 uint32_t Windiff = 1;
1014 UParseError parseError;
1015
1016 src.opts = &opts;
1017
1018 rules = ucol_getRules(coll, &ruleLen);
1019
1020 /*printOutRules(rules);*/
1021
1022 if(U_SUCCESS(*status) && ruleLen > 0) {
1023 rulesCopy = (UChar *)malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
1024 uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
1025 src.current = src.source = rulesCopy;
1026 src.end = rulesCopy+ruleLen;
1027 src.extraCurrent = src.end;
1028 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
1029 *first = *second = 0;
1030
1031 while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) {
1032 strength = src.parsedToken.strength;
1033 chOffset = src.parsedToken.charsOffset;
1034 chLen = src.parsedToken.charsLen;
1035 exOffset = src.parsedToken.extensionOffset;
1036 exLen = src.parsedToken.extensionLen;
1037 prefixOffset = src.parsedToken.prefixOffset;
1038 prefixLen = src.parsedToken.prefixLen;
1039 specs = src.parsedToken.flags;
1040
1041 startOfRules = FALSE;
1042 varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
1043 top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
1044
1045 u_strncpy(second,rulesCopy+chOffset, chLen);
1046 second[chLen] = 0;
1047 secondLen = chLen;
1048
1049 if(exLen > 0) {
1050 u_strncat(first, rulesCopy+exOffset, exLen);
1051 first[firstLen+exLen] = 0;
1052 firstLen += exLen;
1053 }
1054
1055 if(strength != UCOL_TOK_RESET) {
1056 if((*first<0x3400 || *first>=0xa000) && (*second<0x3400 || *second>=0xa000)) {
1057 UCAdiff += testSwitch(&ucaTest, (void *)UCA, 0, strength, first, second, refName, error);
1058 /*Windiff += testSwitch(&winTest, (void *)lcid, 0, strength, first, second, "Win32");*/
1059 }
1060 }
1061
1062
1063 firstLen = chLen;
1064 u_strcpy(first, second);
1065
1066 }
1067 if(UCAdiff != 0 && Windiff != 0) {
1068 log_verbose("\n");
1069 }
1070 if(UCAdiff == 0) {
1071 log_verbose("No immediate difference with %s!\n", refName);
1072 }
1073 if(Windiff == 0) {
1074 log_verbose("No immediate difference with Win32!\n");
1075 }
1076 free(rulesCopy);
1077 }
1078}
1079
1080/*
1081 * Takes two CEs (lead and continuation) and
1082 * compares them as CEs should be compared:
1083 * primary vs. primary, secondary vs. secondary
1084 * tertiary vs. tertiary
1085 */
1086static int32_t compareCEs(uint32_t s1, uint32_t s2,
1087 uint32_t t1, uint32_t t2) {
1088 uint32_t s = 0, t = 0;
1089 if(s1 == t1 && s2 == t2) {
1090 return 0;
1091 }
1092 s = (s1 & 0xFFFF0000)|((s2 & 0xFFFF0000)>>16);
1093 t = (t1 & 0xFFFF0000)|((t2 & 0xFFFF0000)>>16);
1094 if(s < t) {
1095 return -1;
1096 } else if(s > t) {
1097 return 1;
1098 } else {
1099 s = (s1 & 0x0000FF00) | (s2 & 0x0000FF00)>>8;
1100 t = (t1 & 0x0000FF00) | (t2 & 0x0000FF00)>>8;
1101 if(s < t) {
1102 return -1;
1103 } else if(s > t) {
1104 return 1;
1105 } else {
1106 s = (s1 & 0x000000FF)<<8 | (s2 & 0x000000FF);
1107 t = (t1 & 0x000000FF)<<8 | (t2 & 0x000000FF);
1108 if(s < t) {
1109 return -1;
1110 } else {
1111 return 1;
1112 }
1113 }
1114 }
1115}
1116
1117static void testCEs(UCollator *coll, UErrorCode *status) {
1118
1119 const UChar *rules = NULL, *current = NULL;
1120 int32_t ruleLen = 0;
1121
1122 uint32_t strength = 0;
1123 uint32_t maxStrength = UCOL_IDENTICAL;
1124 uint32_t baseCE, baseContCE, nextCE, nextContCE, currCE, currContCE;
1125 uint32_t lastCE;
1126 uint32_t lastContCE;
1127
1128 int32_t result = 0;
1129 uint32_t chOffset = 0; uint32_t chLen = 0;
1130 uint32_t exOffset = 0; uint32_t exLen = 0;
1131 uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
1132 uint32_t oldOffset = 0;
1133
1134 /* uint32_t rExpsLen = 0; */
1135 /* uint32_t firstLen = 0; */
1136 uint16_t specs = 0;
1137 UBool varT = FALSE; UBool top_ = TRUE;
1138 UBool startOfRules = TRUE;
1139 UColTokenParser src;
1140 UColOptionSet opts;
1141 UParseError parseError;
1142 UChar *rulesCopy = NULL;
1143 collIterate c;
1144 UCollator *UCA = ucol_open("root", status);
1145 UCAConstants *consts = (UCAConstants *)((uint8_t *)UCA->image + UCA->image->UCAConsts);
1146 uint32_t UCOL_RESET_TOP_VALUE = consts->UCA_LAST_NON_VARIABLE[0], UCOL_RESET_TOP_CONT = consts->UCA_LAST_NON_VARIABLE[1],
1147 UCOL_NEXT_TOP_VALUE = consts->UCA_FIRST_IMPLICIT[0], UCOL_NEXT_TOP_CONT = consts->UCA_FIRST_IMPLICIT[1];
1148
1149 baseCE=baseContCE=nextCE=nextContCE=currCE=currContCE=lastCE=lastContCE = UCOL_NOT_FOUND;
1150
1151 src.opts = &opts;
1152
1153 rules = ucol_getRules(coll, &ruleLen);
1154
1155 ucol_initInverseUCA(status);
1156
1157 if(U_SUCCESS(*status) && ruleLen > 0) {
1158 rulesCopy = (UChar *)malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
1159 uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
1160 src.current = src.source = rulesCopy;
1161 src.end = rulesCopy+ruleLen;
1162 src.extraCurrent = src.end;
1163 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
1164
1165 while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) {
1166 strength = src.parsedToken.strength;
1167 chOffset = src.parsedToken.charsOffset;
1168 chLen = src.parsedToken.charsLen;
1169 exOffset = src.parsedToken.extensionOffset;
1170 exLen = src.parsedToken.extensionLen;
1171 prefixOffset = src.parsedToken.prefixOffset;
1172 prefixLen = src.parsedToken.prefixLen;
1173 specs = src.parsedToken.flags;
1174
1175 startOfRules = FALSE;
1176 varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
1177 top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
1178
1179 uprv_init_collIterate(coll, rulesCopy+chOffset, chLen, &c);
1180
1181 currCE = ucol_getNextCE(coll, &c, status);
1182 if(currCE == 0 && UCOL_ISTHAIPREVOWEL(*(rulesCopy+chOffset))) {
1183 log_verbose("Thai prevowel detected. Will pick next CE\n");
1184 currCE = ucol_getNextCE(coll, &c, status);
1185 }
1186
1187 currContCE = ucol_getNextCE(coll, &c, status);
1188 if(!isContinuation(currContCE)) {
1189 currContCE = 0;
1190 }
1191
1192 /* we need to repack CEs here */
1193
1194 if(strength == UCOL_TOK_RESET) {
1195 if(top_ == TRUE) {
1196
1197 nextCE = baseCE = currCE = UCOL_RESET_TOP_VALUE;
1198 nextContCE = baseContCE = currContCE = UCOL_RESET_TOP_CONT;
1199 } else {
1200 nextCE = baseCE = currCE;
1201 nextContCE = baseContCE = currContCE;
1202 }
1203 maxStrength = UCOL_IDENTICAL;
1204 } else {
1205 if(strength < maxStrength) {
1206 maxStrength = strength;
1207 if(baseCE == UCOL_RESET_TOP_VALUE) {
1208 log_verbose("Resetting to [top]\n");
1209 nextCE = UCOL_NEXT_TOP_VALUE;
1210 nextContCE = UCOL_NEXT_TOP_CONT;
1211 } else {
1212 result = ucol_inv_getNextCE(baseCE & 0xFFFFFF3F, baseContCE, &nextCE, &nextContCE, maxStrength);
1213 }
1214 if(result < 0) {
1215 if(ucol_isTailored(coll, *(rulesCopy+oldOffset), status)) {
1216 log_verbose("Reset is tailored codepoint %04X, don't know how to continue, taking next test\n", *(rulesCopy+oldOffset));
1217 return;
1218 } else {
1219 log_err("couldn't find the CE\n");
1220 return;
1221 }
1222 }
1223 }
1224
1225 currCE &= 0xFFFFFF3F;
1226 currContCE &= 0xFFFFFFBF;
1227
1228 if(maxStrength == UCOL_IDENTICAL) {
1229 if(baseCE != currCE || baseContCE != currContCE) {
1230 log_err("current CE (initial strength UCOL_EQUAL)\n");
1231 }
1232 } else {
1233 if(strength == UCOL_IDENTICAL) {
1234 if(lastCE != currCE || lastContCE != currContCE) {
1235 log_err("current CE (initial strength UCOL_EQUAL)\n");
1236 }
1237 } else {
1238 if(compareCEs(currCE, currContCE, nextCE, nextContCE) > 0) {
1239 /*if(currCE > nextCE || (currCE == nextCE && currContCE >= nextContCE)) {*/
1240 log_err("current CE is not less than base CE\n");
1241 }
1242 if(compareCEs(currCE, currContCE, lastCE, lastContCE) < 0) {
1243 /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
1244 log_err("sequence of generated CEs is broken\n");
1245 }
1246 }
1247 }
1248
1249 }
1250
1251 oldOffset = chOffset;
1252 lastCE = currCE & 0xFFFFFF3F;
1253 lastContCE = currContCE & 0xFFFFFFBF;
1254 }
1255 free(rulesCopy);
1256 }
1257 ucol_close(UCA);
1258}
1259
1260#if 0
1261/* these locales are now picked from index RB */
1262static const char* localesToTest[] = {
1263"ar", "bg", "ca", "cs", "da",
1264"el", "en_BE", "en_US_POSIX",
1265"es", "et", "fi", "fr", "hi",
1266"hr", "hu", "is", "iw", "ja",
1267"ko", "lt", "lv", "mk", "mt",
1268"nb", "nn", "nn_NO", "pl", "ro",
1269"ru", "sh", "sk", "sl", "sq",
1270"sr", "sv", "th", "tr", "uk",
1271"vi", "zh", "zh_TW"
1272};
1273#endif
1274
1275static const char* rulesToTest[] = {
1276 /* Funky fa rule */
1277 "&\\u0622 < \\u0627 << \\u0671 < \\u0621",
1278 /*"& Z < p, P",*/
1279 /* Cui Mins rules */
1280 "&[top]<o,O<p,P<q,Q<'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu<'?'",*/
1281 "&[top]<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
1282 "&[top]<o,O<p,P<q,Q,'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U&'Qu','?'",*/
1283 "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
1284 "&[top]<'?';Qu<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qu",*/
1285 "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/um<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qum;'?'",*/
1286 "&[top]<'?';Qum<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U" /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qum"*/
1287};
1288
1289static UBool hasCollationElements(const char *locName) {
1290
1291 UErrorCode status = U_ZERO_ERROR;
1292 UResourceBundle *ColEl = NULL;
1293
1294 UResourceBundle *loc = ures_open(NULL, locName, &status);;
1295
1296 if(U_SUCCESS(status)) {
1297 status = U_ZERO_ERROR;
1298 ColEl = ures_getByKey(loc, "CollationElements", ColEl, &status);
1299 if(status == U_ZERO_ERROR) { /* do the test - there are real elements */
1300 ures_close(ColEl);
1301 ures_close(loc);
1302 return TRUE;
1303 }
1304 ures_close(ColEl);
1305 ures_close(loc);
1306 }
1307 return FALSE;
1308}
1309
1310
1311static void TestCollations(void) {
1312 int32_t noOfLoc = uloc_countAvailable();
1313 int32_t i = 0, j = 0;
1314
1315 UErrorCode status = U_ZERO_ERROR;
1316 char cName[256];
1317 UChar name[256];
1318 int32_t nameSize;
1319
1320
1321 const char *locName = NULL;
1322 UCollator *coll = NULL;
1323 UCollator *UCA = ucol_open("", &status);
1324 UColAttributeValue oldStrength = ucol_getAttribute(UCA, UCOL_STRENGTH, &status);
1325 ucol_setAttribute(UCA, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
1326
1327 for(i = 0; i<noOfLoc; i++) {
1328 status = U_ZERO_ERROR;
1329 locName = uloc_getAvailable(i);
1330 if(uprv_strcmp("ja", locName) == 0) {
1331 log_verbose("Don't know how to test prefixes\n");
1332 continue;
1333 }
1334 if(hasCollationElements(locName)) {
1335 nameSize = uloc_getDisplayName(locName, NULL, name, 256, &status);
1336 for(j = 0; j<nameSize; j++) {
1337 cName[j] = (char)name[j];
1338 }
1339 cName[nameSize] = 0;
1340 log_verbose("\nTesting locale %s (%s)\n", locName, cName);
1341 coll = ucol_open(locName, &status);
1342 testAgainstUCA(coll, UCA, "UCA", FALSE, &status);
1343 ucol_close(coll);
1344 }
1345 }
1346 ucol_setAttribute(UCA, UCOL_STRENGTH, oldStrength, &status);
1347 ucol_close(UCA);
1348}
1349
1350static void RamsRulesTest(void) {
1351 UErrorCode status = U_ZERO_ERROR;
1352 int32_t i = 0;
1353 UCollator *coll = NULL;
1354 UChar rule[2048];
1355 uint32_t ruleLen;
1356 int32_t noOfLoc = uloc_countAvailable();
1357 const char *locName = NULL;
1358
1359 log_verbose("RamsRulesTest\n");
1360
1361 for(i = 0; i<noOfLoc; i++) {
1362 status = U_ZERO_ERROR;
1363 locName = uloc_getAvailable(i);
1364 if(hasCollationElements(locName)) {
1365 if (uprv_strcmp("ja", locName)==0) {
1366 log_verbose("Don't know how to test Japanese because of prefixes\n");
1367 continue;
1368 }
1369 log_verbose("Testing locale %s\n", locName);
1370 coll = ucol_open(locName, &status);
1371 if(U_SUCCESS(status)) {
1372 if(coll->image->jamoSpecial == TRUE) {
1373 log_err("%s has special JAMOs\n", locName);
1374 }
1375 ucol_setAttribute(coll, UCOL_CASE_FIRST, UCOL_OFF, &status);
1376 testCollator(coll, &status);
1377 testCEs(coll, &status);
1378 ucol_close(coll);
1379 }
1380 }
1381 }
1382
1383 for(i = 0; i<sizeof(rulesToTest)/sizeof(rulesToTest[0]); i++) {
1384 log_verbose("Testing rule: %s\n", rulesToTest[i]);
1385 ruleLen = u_unescape(rulesToTest[i], rule, 2048);
1386 coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
1387 if(U_SUCCESS(status)) {
1388 testCollator(coll, &status);
1389 testCEs(coll, &status);
1390 ucol_close(coll);
1391 }
1392 }
1393
1394}
1395
1396static void IsTailoredTest(void) {
1397 UErrorCode status = U_ZERO_ERROR;
1398 uint32_t i = 0;
1399 UCollator *coll = NULL;
1400 UChar rule[2048];
1401 UChar tailored[2048];
1402 UChar notTailored[2048];
1403 uint32_t ruleLen, tailoredLen, notTailoredLen;
1404
1405 log_verbose("IsTailoredTest\n");
1406
1407 u_uastrcpy(rule, "&Z < A, B, C;c < d");
1408 ruleLen = u_strlen(rule);
1409
1410 u_uastrcpy(tailored, "ABCcd");
1411 tailoredLen = u_strlen(tailored);
1412
1413 u_uastrcpy(notTailored, "ZabD");
1414 notTailoredLen = u_strlen(notTailored);
1415
1416 coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
1417 if(U_SUCCESS(status)) {
1418 for(i = 0; i<tailoredLen; i++) {
1419 if(!ucol_isTailored(coll, tailored[i], &status)) {
1420 log_err("%i: %04X should be tailored - it is reported as not\n", i, tailored[i]);
1421 }
1422 }
1423 for(i = 0; i<notTailoredLen; i++) {
1424 if(ucol_isTailored(coll, notTailored[i], &status)) {
1425 log_err("%i: %04X should not be tailored - it is reported as it is\n", i, notTailored[i]);
1426 }
1427 }
1428 ucol_close(coll);
1429 }
1430}
1431
1432static void genericOrderingTestWithResult(UCollator *coll, const char *s[], uint32_t size, UCollationResult result) {
1433 UChar t1[2048] = {0};
1434 UChar t2[2048] = {0};
1435 UCollationElements *iter;
1436 UErrorCode status = U_ZERO_ERROR;
1437
1438 uint32_t i = 0, j = 0;
1439 log_verbose("testing sequence:\n");
1440 for(i = 0; i < size; i++) {
1441 log_verbose("%s\n", s[i]);
1442 }
1443
1444 iter = ucol_openElements(coll, t1, u_strlen(t1), &status);
1445 if (U_FAILURE(status)) {
1446 log_err("Creation of iterator failed\n");
1447 }
1448 for(i = 0; i < size-1; i++) {
1449 for(j = i+1; j < size; j++) {
1450 u_unescape(s[i], t1, 2048);
1451 u_unescape(s[j], t2, 2048);
1452 doTest(coll, t1, t2, result);
1453 /* synwee : added collation element iterator test */
1454 ucol_setText(iter, t1, u_strlen(t1), &status);
1455 backAndForth(iter);
1456 ucol_setText(iter, t2, u_strlen(t2), &status);
1457 backAndForth(iter);
1458 }
1459 }
1460 ucol_closeElements(iter);
1461}
1462
1463static void genericOrderingTest(UCollator *coll, const char *s[], uint32_t size) {
1464 genericOrderingTestWithResult(coll, s, size, UCOL_LESS);
1465}
1466
1467void genericLocaleStarter(const char *locale, const char *s[], uint32_t size) {
1468 UErrorCode status = U_ZERO_ERROR;
1469 UCollator *coll = ucol_open(locale, &status);
1470
1471 log_verbose("Locale starter for %s\n", locale);
1472
1473 if(U_SUCCESS(status)) {
1474 genericOrderingTest(coll, s, size);
1475 } else if(status == U_FILE_ACCESS_ERROR) {
1476 log_data_err("Is your data around?\n");
1477 return;
1478 } else {
1479 log_err("Unable to open collator for locale %s\n", locale);
1480 }
1481 ucol_close(coll);
1482}
1483
1484#if 0
1485/* currently not used with options */
1486static void genericRulesStarterWithOptions(const char *rules, const char *s[], uint32_t size, const UColAttribute *attrs, const UColAttributeValue *values, uint32_t attsize) {
1487 UErrorCode status = U_ZERO_ERROR;
1488 UChar rlz[RULE_BUFFER_LEN] = { 0 };
1489 uint32_t rlen = u_unescape(rules, rlz, RULE_BUFFER_LEN);
1490 uint32_t i;
1491
1492 UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
1493
1494 log_verbose("Rules starter for %s\n", rules);
1495
1496 if(U_SUCCESS(status)) {
1497 log_verbose("Setting attributes\n");
1498 for(i = 0; i < attsize; i++) {
1499 ucol_setAttribute(coll, attrs[i], values[i], &status);
1500 }
1501
1502 genericOrderingTest(coll, s, size);
1503 } else {
1504 log_err("Unable to open collator with rules %s\n", rules);
1505 }
1506 ucol_close(coll);
1507}
1508#endif
1509
1510static void genericLocaleStarterWithOptions(const char *locale, const char *s[], uint32_t size, const UColAttribute *attrs, const UColAttributeValue *values, uint32_t attsize) {
1511 UErrorCode status = U_ZERO_ERROR;
1512 uint32_t i;
1513
1514 UCollator *coll = ucol_open(locale, &status);
1515
1516 log_verbose("Locale starter for %s\n", locale);
1517
1518 if(U_SUCCESS(status)) {
1519
1520 log_verbose("Setting attributes\n");
1521 for(i = 0; i < attsize; i++) {
1522 ucol_setAttribute(coll, attrs[i], values[i], &status);
1523 }
1524
1525 genericOrderingTest(coll, s, size);
1526 } else {
1527 log_err("Unable to open collator for locale %s\n", locale);
1528 }
1529 ucol_close(coll);
1530}
1531
1532static void genericRulesTestWithResult(const char *rules, const char *s[], uint32_t size, UCollationResult result) {
1533 UErrorCode status = U_ZERO_ERROR;
1534 UChar rlz[RULE_BUFFER_LEN] = { 0 };
1535 uint32_t rlen = u_unescape(rules, rlz, RULE_BUFFER_LEN);
1536
1537 UCollator *coll = NULL;
1538 coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
1539 log_verbose("Rules starter for %s\n", rules);
1540
1541 if(U_SUCCESS(status)) {
1542 genericOrderingTestWithResult(coll, s, size, result);
1543 ucol_close(coll);
1544 } else if(status == U_FILE_ACCESS_ERROR) {
1545 log_data_err("Is your data around?\n");
1546 } else {
1547 log_err("Unable to open collator with rules %s\n", rules);
1548 }
1549}
1550
1551static void genericRulesStarter(const char *rules, const char *s[], uint32_t size) {
1552 genericRulesTestWithResult(rules, s, size, UCOL_LESS);
1553}
1554
1555const static char chTest[][20] = {
1556 "c",
1557 "C",
1558 "ca", "cb", "cx", "cy", "CZ",
1559 "c\\u030C", "C\\u030C",
1560 "h",
1561 "H",
1562 "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY",
1563 "ch", "cH", "Ch", "CH",
1564 "cha", "charly", "che", "chh", "chch", "chr",
1565 "i", "I", "iarly",
1566 "r", "R",
1567 "r\\u030C", "R\\u030C",
1568 "s",
1569 "S",
1570 "s\\u030C", "S\\u030C",
1571 "z", "Z",
1572 "z\\u030C", "Z\\u030C"
1573};
1574
1575static void TestChMove(void) {
1576 UChar t1[256] = {0};
1577 UChar t2[256] = {0};
1578
1579 uint32_t i = 0, j = 0;
1580 uint32_t size = 0;
1581 UErrorCode status = U_ZERO_ERROR;
1582
1583 UCollator *coll = ucol_open("cs", &status);
1584
1585 if(U_SUCCESS(status)) {
1586 size = sizeof(chTest)/sizeof(chTest[0]);
1587 for(i = 0; i < size-1; i++) {
1588 for(j = i+1; j < size; j++) {
1589 u_unescape(chTest[i], t1, 256);
1590 u_unescape(chTest[j], t2, 256);
1591 doTest(coll, t1, t2, UCOL_LESS);
1592 }
1593 }
1594 }
1595 else {
1596 log_err("Can't open collator");
1597 }
1598 ucol_close(coll);
1599}
1600
1601const static char impTest[][20] = {
1602 "\\u4e00",
1603 "a",
1604 "A",
1605 "b",
1606 "B",
1607 "\\u4e01"
1608};
1609
1610
1611static void TestImplicitTailoring(void) {
1612 UChar t1[256] = {0};
1613 UChar t2[256] = {0};
1614
1615 const char *rule = "&\\u4e00 < a <<< A < b <<< B";
1616
1617 uint32_t i = 0, j = 0;
1618 uint32_t size = 0;
1619 uint32_t ruleLen = 0;
1620 UErrorCode status = U_ZERO_ERROR;
1621 UCollator *coll = NULL;
1622 ruleLen = u_unescape(rule, t1, 256);
1623
1624 coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
1625
1626 if(U_SUCCESS(status)) {
1627 size = sizeof(impTest)/sizeof(impTest[0]);
1628 for(i = 0; i < size-1; i++) {
1629 for(j = i+1; j < size; j++) {
1630 u_unescape(impTest[i], t1, 256);
1631 u_unescape(impTest[j], t2, 256);
1632 doTest(coll, t1, t2, UCOL_LESS);
1633 }
1634 }
1635 }
1636 else {
1637 log_err("Can't open collator");
1638 }
1639 ucol_close(coll);
1640}
1641
1642static void TestFCDProblem(void) {
1643 UChar t1[256] = {0};
1644 UChar t2[256] = {0};
1645
1646 const char *s1 = "\\u0430\\u0306\\u0325";
1647 const char *s2 = "\\u04D1\\u0325";
1648
1649 UErrorCode status = U_ZERO_ERROR;
1650 UCollator *coll = ucol_open("", &status);
1651 u_unescape(s1, t1, 256);
1652 u_unescape(s2, t2, 256);
1653
1654 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
1655 doTest(coll, t1, t2, UCOL_EQUAL);
1656
1657 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
1658 doTest(coll, t1, t2, UCOL_EQUAL);
1659
1660 ucol_close(coll);
1661}
1662
1663#define NORM_BUFFER_TEST_LEN 32
1664typedef struct {
1665 UChar32 u;
1666 UChar NFC[NORM_BUFFER_TEST_LEN];
1667 UChar NFD[NORM_BUFFER_TEST_LEN];
1668} tester;
1669
1670static void TestComposeDecompose(void) {
1671 int32_t noOfLoc;
1672 int32_t i = 0, j = 0;
1673
1674 UErrorCode status = U_ZERO_ERROR;
1675
1676 const char *locName = NULL;
1677
1678 uint32_t nfcSize;
1679 uint32_t nfdSize;
1680 tester **t;
1681 uint32_t noCases = 0;
1682 UCollator *coll = NULL;
1683 UChar32 u = 0;
1684 UChar comp[NORM_BUFFER_TEST_LEN];
1685 uint32_t len = 0;
1686
1687 noOfLoc = uloc_countAvailable();
1688
1689 t = malloc(0x30000 * sizeof(tester *));
1690 t[0] = (tester *)malloc(sizeof(tester));
1691 log_verbose("Testing UCA extensively\n");
1692 coll = ucol_open("", &status);
1693 if(status == U_FILE_ACCESS_ERROR) {
1694 log_data_err("Is your data around?\n");
1695 return;
1696 } else if(U_FAILURE(status)) {
1697 log_err("Error opening collator\n");
1698 return;
1699 }
1700
1701
1702 for(u = 0; u < 0x30000; u++) {
1703 len = 0;
1704 UTF_APPEND_CHAR_UNSAFE(comp, len, u);
1705 nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
1706 nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
1707
1708 if(nfcSize != nfdSize || (uprv_memcmp(t[noCases]->NFC, t[noCases]->NFD, nfcSize * sizeof(UChar)) != 0)
1709 || (len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0))) {
1710 t[noCases]->u = u;
1711 if(len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0)) {
1712 u_strncpy(t[noCases]->NFC, comp, len);
1713 t[noCases]->NFC[len] = 0;
1714 }
1715 noCases++;
1716 t[noCases] = (tester *)malloc(sizeof(tester));
1717 uprv_memset(t[noCases], 0, sizeof(tester));
1718 }
1719 }
1720
1721 for(u=0; u<(UChar32)noCases; u++) {
1722 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
1723 log_err("Failure: codePoint %05X fails TestComposeDecompose in the UCA\n", t[u]->u);
1724 doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
1725 }
1726 }
1727 /*
1728 for(u = 0; u < 0x30000; u++) {
1729 if(!(u&0xFFFF)) {
1730 log_verbose("%08X ", u);
1731 }
1732 uprv_memset(t[noCases], 0, sizeof(tester));
1733 t[noCases]->u = u;
1734 len = 0;
1735 UTF_APPEND_CHAR_UNSAFE(comp, len, u);
1736 comp[len] = 0;
1737 nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
1738 nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
1739 doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL);
1740 doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL);
1741 }
1742 */
1743
1744 ucol_close(coll);
1745
1746 log_verbose("Testing locales, number of cases = %i\n", noCases);
1747 for(i = 0; i<noOfLoc; i++) {
1748 status = U_ZERO_ERROR;
1749 locName = uloc_getAvailable(i);
1750 if(hasCollationElements(locName)) {
1751 char cName[256];
1752 UChar name[256];
1753 int32_t nameSize = uloc_getDisplayName(locName, NULL, name, sizeof(cName), &status);
1754
1755 for(j = 0; j<nameSize; j++) {
1756 cName[j] = (char)name[j];
1757 }
1758 cName[nameSize] = 0;
1759 log_verbose("\nTesting locale %s (%s)\n", locName, cName);
1760
1761 coll = ucol_open(locName, &status);
1762 ucol_setStrength(coll, UCOL_IDENTICAL);
1763
1764 for(u=0; u<(UChar32)noCases; u++) {
1765 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
1766 log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t[u]->u, cName);
1767 doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
1768 }
1769 }
1770 ucol_close(coll);
1771 }
1772 }
1773 for(u = 0; u <= (UChar32)noCases; u++) {
1774 free(t[u]);
1775 }
1776 free(t);
1777}
1778
1779static void TestEmptyRule(void) {
1780 UErrorCode status = U_ZERO_ERROR;
1781 UChar rulez[] = { 0 };
1782 UCollator *coll = ucol_openRules(rulez, 0, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
1783
1784 ucol_close(coll);
1785}
1786
1787static void TestUCARules(void) {
1788 UErrorCode status = U_ZERO_ERROR;
1789 UChar b[256];
1790 UChar *rules = b;
1791 uint32_t ruleLen = 0;
1792 UCollator *UCAfromRules = NULL;
1793 UCollator *coll = ucol_open("", &status);
1794 if(status == U_FILE_ACCESS_ERROR) {
1795 log_data_err("Is your data around?\n");
1796 return;
1797 } else if(U_FAILURE(status)) {
1798 log_err("Error opening collator\n");
1799 return;
1800 }
1801 ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, 256);
1802
1803 log_verbose("TestUCARules\n");
1804 if(ruleLen > 256) {
1805 rules = (UChar *)malloc((ruleLen+1)*sizeof(UChar));
1806 ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, ruleLen);
1807 }
1808 log_verbose("Rules length is %d\n", ruleLen);
1809 UCAfromRules = ucol_openRules(rules, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
1810 if(U_SUCCESS(status)) {
1811 ucol_close(UCAfromRules);
1812 } else {
1813 log_verbose("Unable to create a collator from UCARules!\n");
1814 }
1815/*
1816 u_unescape(blah, b, 256);
1817 ucol_getSortKey(coll, b, 1, res, 256);
1818*/
1819 ucol_close(coll);
1820 if(rules != b) {
1821 free(rules);
1822 }
1823}
1824
1825
1826/* Pinyin tonal order */
1827/*
1828 A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0)
1829 (w/macron)< (w/acute)< (w/caron)< (w/grave)
1830 E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8)
1831 I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec)
1832 O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2)
1833 U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9)
1834 < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) <
1835.. (\u00fc)
1836
1837However, in testing we got the following order:
1838 A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101)
1839 (w/acute)< (w/grave)< (w/caron)< (w/macron)
1840 E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) <
1841.. (\u0113)
1842 I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b)
1843 O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d)
1844 U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) <
1845.. (\u01d8)
1846 < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b)
1847*/
1848
1849static void TestBefore(void) {
1850 const static char *data[] = {
1851 "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A",
1852 "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E",
1853 "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I",
1854 "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O",
1855 "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U",
1856 "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc"
1857 };
1858 genericRulesStarter(
1859 "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0"
1860 "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8"
1861 "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec"
1862 "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2"
1863 "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9"
1864 "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc",
1865 data, sizeof(data)/sizeof(data[0]));
1866}
1867
1868static void TestJ784(void) {
1869 const static char *data[] = {
1870 "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0",
1871 "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8",
1872 "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec",
1873 "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2",
1874 "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9",
1875 "\\u00fc",
1876 "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc"
1877 };
1878 genericLocaleStarter("zh", data, sizeof(data)/sizeof(data[0]));
1879}
1880
1881
1882static void TestJ831(void) {
1883 const static char *data[] = {
1884 "I",
1885 "i",
1886 "Y",
1887 "y"
1888 };
1889 genericLocaleStarter("lv", data, sizeof(data)/sizeof(data[0]));
1890}
1891
1892static void TestJ815(void) {
1893 const static char *data[] = {
1894 "aa",
1895 "Aa",
1896 "ab",
1897 "Ab",
1898 "ad",
1899 "Ad",
1900 "ae",
1901 "Ae",
1902 "\\u00e6",
1903 "\\u00c6",
1904 "af",
1905 "Af",
1906 "b",
1907 "B"
1908 };
1909 genericLocaleStarter("fr", data, sizeof(data)/sizeof(data[0]));
1910 genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, sizeof(data)/sizeof(data[0]));
1911}
1912
1913
1914/*
1915"& a < b < c < d& r < c", "& a < b < d& r < c",
1916"& a < b < c < d& c < m", "& a < b < c < m < d",
1917"& a < b < c < d& a < m", "& a < m < b < c < d",
1918"& a <<< b << c < d& a < m", "& a <<< b << c < m < d",
1919"& a < b < c < d& [before 1] c < m", "& a < b < m < c < d",
1920"& a < b <<< c << d <<< e& [before 3] e <<< x", "& a < b <<< c << d <<< x <<< e",
1921"& a < b <<< c << d <<< e& [before 2] e <<< x", "& a < b <<< c <<< x << d <<< e",
1922"& a < b <<< c << d <<< e& [before 1] e <<< x", "& a <<< x < b <<< c << d <<< e",
1923"& a < b <<< c << d <<< e <<< f < g& [before 1] g < x", "& a < b <<< c << d <<< e <<< f < x < g",
1924*/
1925static void TestRedundantRules(void) {
1926 int32_t i;
1927
1928 const static char *rules[] = {
1929 "& a <<< b <<< c << d <<< e& [before 1] e <<< x",
1930 "& a < b <<< c << d <<< e& [before 1] e <<< x",
1931 "& a < b < c < d& [before 1] c < m",
1932 "& a < b <<< c << d <<< e& [before 3] e <<< x",
1933 "& a < b <<< c << d <<< e& [before 2] e <<< x",
1934 "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x",
1935 "& a <<< b << c < d& a < m",
1936 "&a<b<<b\\u0301 &z<b",
1937 "&z<m<<<q<<<m",
1938 "&z<<<m<q<<<m",
1939 "& a < b < c < d& r < c",
1940 "& a < b < c < d& r < c",
1941 "& a < b < c < d& c < m",
1942 "& a < b < c < d& a < m"
1943 };
1944
1945 const static char *expectedRules[] = {
1946 /*"&\\u3029<<<x",*/
1947 "&\\u2089<<<x",
1948 "& a <<< x < b <<< c << d <<< e",
1949 "& a < b < m < c < d",
1950 "& a < b <<< c << d <<< x <<< e",
1951 "& a < b <<< c <<< x << d <<< e",
1952 "& a < b <<< c << d <<< e <<< f < x < g",
1953 "& a <<< b << c < m < d",
1954 "&a<b\\u0301 &z<b",
1955 "&z<q<<<m",
1956 "&z<q<<<m",
1957 "& a < b < d& r < c",
1958 "& a < b < d& r < c",
1959 "& a < b < c < m < d",
1960 "& a < m < b < c < d"
1961 };
1962
1963 const static char *testdata[][8] = {
1964 /*{"\\u3029", "x"},*/
1965 {"\\u2089", "x"},
1966 {"a", "x", "b", "c", "d", "e"},
1967 {"a", "b", "m", "c", "d"},
1968 {"a", "b", "c", "d", "x", "e"},
1969 {"a", "b", "c", "x", "d", "e"},
1970 {"a", "b", "c", "d", "e", "f", "x", "g"},
1971 {"a", "b", "c", "m", "d"},
1972 {"a", "b\\u0301", "z", "b"},
1973 {"z", "q", "m"},
1974 {"z", "q", "m"},
1975 {"a", "b", "d"},
1976 {"r", "c"},
1977 {"a", "b", "c", "m", "d"},
1978 {"a", "m", "b", "c", "d"}
1979 };
1980
1981 const static uint32_t testdatalen[] = {
1982 2,
1983 6,
1984 5,
1985 6,
1986 6,
1987 8,
1988 5,
1989 4,
1990 3,
1991 3,
1992 3,
1993 2,
1994 5,
1995 5
1996 };
1997
1998
1999
2000 UCollator *credundant = NULL;
2001 UCollator *cresulting = NULL;
2002 UErrorCode status = U_ZERO_ERROR;
2003 UChar rlz[2048] = { 0 };
2004 uint32_t rlen = 0;
2005
2006 for(i = 0; i<sizeof(rules)/sizeof(rules[0]); i++) {
2007 log_verbose("testing rule %s, expected to be %s\n", rules[i], expectedRules[i]);
2008 rlen = u_unescape(rules[i], rlz, 2048);
2009
2010 credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
2011 if(status == U_FILE_ACCESS_ERROR) {
2012 log_data_err("Is your data around?\n");
2013 return;
2014 } else if(U_FAILURE(status)) {
2015 log_err("Error opening collator\n");
2016 return;
2017 }
2018
2019 rlen = u_unescape(expectedRules[i], rlz, 2048);
2020 cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
2021
2022 testAgainstUCA(cresulting, credundant, "expected", TRUE, &status);
2023
2024 ucol_close(credundant);
2025 ucol_close(cresulting);
2026
2027 log_verbose("testing using data\n");
2028
2029 genericRulesStarter(rules[i], testdata[i], testdatalen[i]);
2030 }
2031
2032}
2033
2034static void TestExpansionSyntax(void) {
2035 int32_t i;
2036
2037 const static char *rules[] = {
2038 "&AE <<< a << b <<< c &d <<< f",
2039 "&AE <<< a <<< b << c << d < e < f <<< g",
2040 "&AE <<< B <<< C / D <<< F"
2041 };
2042
2043 const static char *expectedRules[] = {
2044 "&A <<< a / E << b / E <<< c /E &d <<< f",
2045 "&A <<< a / E <<< b / E << c / E << d / E < e < f <<< g",
2046 "&A <<< B / E <<< C / ED <<< F / E"
2047 };
2048
2049 const static char *testdata[][8] = {
2050 {"AE", "a", "b", "c"},
2051 {"AE", "a", "b", "c", "d", "e", "f", "g"},
2052 {"AE", "B", "C"} /* / ED <<< F / E"},*/
2053 };
2054
2055 const static uint32_t testdatalen[] = {
2056 4,
2057 8,
2058 3
2059 };
2060
2061
2062
2063 UCollator *credundant = NULL;
2064 UCollator *cresulting = NULL;
2065 UErrorCode status = U_ZERO_ERROR;
2066 UChar rlz[2048] = { 0 };
2067 uint32_t rlen = 0;
2068
2069 for(i = 0; i<sizeof(rules)/sizeof(rules[0]); i++) {
2070 log_verbose("testing rule %s, expected to be %s\n", rules[i], expectedRules[i]);
2071 rlen = u_unescape(rules[i], rlz, 2048);
2072
2073 credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2074 if(status == U_FILE_ACCESS_ERROR) {
2075 log_data_err("Is your data around?\n");
2076 return;
2077 } else if(U_FAILURE(status)) {
2078 log_err("Error opening collator\n");
2079 return;
2080 }
2081 rlen = u_unescape(expectedRules[i], rlz, 2048);
2082 cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
2083
2084 /* testAgainstUCA still doesn't handle expansions correctly, so this is not run */
2085 /* as a hard error test, but only in information mode */
2086 testAgainstUCA(cresulting, credundant, "expected", FALSE, &status);
2087
2088 ucol_close(credundant);
2089 ucol_close(cresulting);
2090
2091 log_verbose("testing using data\n");
2092
2093 genericRulesStarter(rules[i], testdata[i], testdatalen[i]);
2094 }
2095}
2096
2097static void TestCase(void)
2098{
2099 const static UChar gRules[MAX_TOKEN_LEN] =
2100 /*" & 0 < 1,\u2461<a,A"*/
2101 { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x0041, 0x0000 };
2102
2103 const static UChar testCase[][MAX_TOKEN_LEN] =
2104 {
2105 /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000},
2106 /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000},
2107 /*2*/ {0x2460 /*circ'1'*/, 0x0061/*'a'*/, 0x0000},
2108 /*3*/ {0x2460 /*circ'1'*/, 0x0041/*'A'*/, 0x0000}
2109 };
2110
2111 const static UCollationResult caseTestResults[][9] =
2112 {
2113 { UCOL_LESS, UCOL_LESS, UCOL_LESS, 0, UCOL_LESS, UCOL_LESS, 0, 0, UCOL_LESS },
2114 { UCOL_GREATER, UCOL_LESS, UCOL_LESS, 0, UCOL_LESS, UCOL_LESS, 0, 0, UCOL_GREATER },
2115 { UCOL_LESS, UCOL_LESS, UCOL_LESS, 0, UCOL_GREATER, UCOL_LESS, 0, 0, UCOL_LESS },
2116 { UCOL_GREATER, UCOL_LESS, UCOL_GREATER, 0, UCOL_LESS, UCOL_LESS, 0, 0, UCOL_GREATER }
2117
2118 };
2119
2120 const static UColAttributeValue caseTestAttributes[][2] =
2121 {
2122 { UCOL_LOWER_FIRST, UCOL_OFF},
2123 { UCOL_UPPER_FIRST, UCOL_OFF},
2124 { UCOL_LOWER_FIRST, UCOL_ON},
2125 { UCOL_UPPER_FIRST, UCOL_ON}
2126
2127 };
2128 int32_t i,j,k;
2129 UErrorCode status = U_ZERO_ERROR;
2130 UCollator *myCollation;
2131 myCollation = ucol_open("en_US", &status);
2132 if(U_FAILURE(status)){
2133 log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
2134 return;
2135 }
2136 log_verbose("Testing different case settings\n");
2137 ucol_setStrength(myCollation, UCOL_TERTIARY);
2138
2139 for(k = 0; k<4; k++) {
2140 ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
2141 ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
2142 log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes[k][0], caseTestAttributes[k][1]);
2143 for (i = 0; i < 3 ; i++) {
2144 for(j = i+1; j<4; j++) {
2145 doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
2146 }
2147 }
2148 }
2149 ucol_close(myCollation);
2150
2151 myCollation = ucol_openRules(gRules, u_strlen(gRules), UCOL_OFF, UCOL_TERTIARY,NULL, &status);
2152 if(U_FAILURE(status)){
2153 log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
2154 return;
2155 }
2156 log_verbose("Testing different case settings with custom rules\n");
2157 ucol_setStrength(myCollation, UCOL_TERTIARY);
2158
2159 for(k = 0; k<4; k++) {
2160 ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
2161 ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
2162 for (i = 0; i < 3 ; i++) {
2163 for(j = i+1; j<4; j++) {
2164 log_verbose("k:%d, i:%d, j:%d\n", k, i, j);
2165 doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
2166 }
2167 }
2168 }
2169 ucol_close(myCollation);
2170 {
2171 const static char *lowerFirst[] = {
2172 "h",
2173 "H",
2174 "ch",
2175 "Ch",
2176 "CH",
2177 "cha",
2178 "chA",
2179 "Cha",
2180 "ChA",
2181 "CHa",
2182 "CHA",
2183 "i",
2184 "I"
2185 };
2186
2187 const static char *upperFirst[] = {
2188 "H",
2189 "h",
2190 "CH",
2191 "Ch",
2192 "ch",
2193 "CHA",
2194 "CHa",
2195 "ChA",
2196 "Cha",
2197 "chA",
2198 "cha",
2199 "I",
2200 "i"
2201 };
2202 log_verbose("mixed case test\n");
2203 log_verbose("lower first, case level off\n");
2204 genericRulesStarter("[casefirst lower]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
2205 log_verbose("upper first, case level off\n");
2206 genericRulesStarter("[casefirst upper]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
2207 log_verbose("lower first, case level on\n");
2208 genericRulesStarter("[casefirst lower][caselevel on]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
2209 log_verbose("upper first, case level on\n");
2210 genericRulesStarter("[casefirst upper][caselevel on]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
2211 }
2212
2213}
2214
2215static void TestIncrementalNormalize(void) {
2216
2217 /*UChar baseA =0x61;*/
2218 UChar baseA =0x41;
2219/* UChar baseB = 0x42;*/
2220 UChar ccMix[] = {0x316, 0x321, 0x300};
2221 /*UChar ccMix[] = {0x61, 0x61, 0x61};*/
2222 /*
2223 0x316 is combining grave accent below, cc=220
2224 0x321 is combining palatalized hook below, cc=202
2225 0x300 is combining grave accent, cc=230
2226 */
2227
2228 /*int maxSLen = 2000;*/
2229 int maxSLen = 64000;
2230 int sLen;
2231 int i;
2232
2233 UCollator *coll;
2234 UErrorCode status = U_ZERO_ERROR;
2235 UCollationResult result;
2236
2237 int32_t myQ = QUICK;
2238
2239 if(QUICK < 0) {
2240 QUICK = 1;
2241 }
2242
2243 {
2244 /* Test 1. Run very long unnormalized strings, to force overflow of*/
2245 /* most buffers along the way.*/
2246 UChar *strA;
2247 UChar *strB;
2248
2249 strA = malloc((maxSLen+1) * sizeof(UChar));
2250 strB = malloc((maxSLen+1) * sizeof(UChar));
2251
2252 coll = ucol_open("en_US", &status);
2253 if(status == U_FILE_ACCESS_ERROR) {
2254 log_data_err("Is your data around?\n");
2255 return;
2256 } else if(U_FAILURE(status)) {
2257 log_err("Error opening collator\n");
2258 return;
2259 }
2260 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
2261
2262 /*for (sLen = 257; sLen<maxSLen; sLen++) {*/
2263 /*for (sLen = 4; sLen<maxSLen; sLen++) {*/
2264 /*for (sLen = 1000; sLen<1001; sLen++) {*/
2265 for (sLen = 500; sLen<501; sLen++) {
2266 /*for (sLen = 40000; sLen<65000; sLen+=1000) {*/
2267 strA[0] = baseA;
2268 strB[0] = baseA;
2269 for (i=1; i<=sLen-1; i++) {
2270 strA[i] = ccMix[i % 3];
2271 strB[sLen-i] = ccMix[i % 3];
2272 }
2273 strA[sLen] = 0;
2274 strB[sLen] = 0;
2275
2276 ucol_setStrength(coll, UCOL_TERTIARY); /* Do test with default strength, which runs*/
2277 doTest(coll, strA, strB, UCOL_EQUAL); /* optimized functions in the impl*/
2278 ucol_setStrength(coll, UCOL_IDENTICAL); /* Do again with the slow, general impl.*/
2279 doTest(coll, strA, strB, UCOL_EQUAL);
2280 }
2281 free(strA);
2282 free(strB);
2283 }
2284
2285 QUICK = myQ;
2286
2287
2288 /* Test 2: Non-normal sequence in a string that extends to the last character*/
2289 /* of the string. Checks a couple of edge cases.*/
2290
2291 {
2292 UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0};
2293 UChar strB[] = {0x41, 0xc0, 0x316, 0};
2294 ucol_setStrength(coll, UCOL_TERTIARY);
2295 doTest(coll, strA, strB, UCOL_EQUAL);
2296 }
2297
2298 /* Test 3: Non-normal sequence is terminated by a surrogate pair.*/
2299
2300 {
2301 /* New UCA 3.1.1.
2302 * test below used a code point from Desseret, which sorts differently
2303 * than d800 dc00
2304 */
2305 /*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/
2306 UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0};
2307 UChar strB[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0};
2308 ucol_setStrength(coll, UCOL_TERTIARY);
2309 doTest(coll, strA, strB, UCOL_GREATER);
2310 }
2311
2312 /* Test 4: Imbedded nulls do not terminate a string when length is specified.*/
2313
2314 {
2315 UChar strA[] = {0x41, 0x00, 0x42, 0x00};
2316 UChar strB[] = {0x41, 0x00, 0x00, 0x00};
2317 char sortKeyA[50];
2318 char sortKeyAz[50];
2319 char sortKeyB[50];
2320 char sortKeyBz[50];
2321 int r;
2322
2323 /* there used to be -3 here. Hmmmm.... */
2324 /*result = ucol_strcoll(coll, strA, -3, strB, -3);*/
2325 result = ucol_strcoll(coll, strA, 3, strB, 3);
2326 if (result != UCOL_GREATER) {
2327 log_err("ERROR 1 in test 4\n");
2328 }
2329 result = ucol_strcoll(coll, strA, -1, strB, -1);
2330 if (result != UCOL_EQUAL) {
2331 log_err("ERROR 2 in test 4\n");
2332 }
2333
2334 ucol_getSortKey(coll, strA, 3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2335 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2336 ucol_getSortKey(coll, strB, 3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2337 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2338
2339 r = strcmp(sortKeyA, sortKeyAz);
2340 if (r <= 0) {
2341 log_err("Error 3 in test 4\n");
2342 }
2343 r = strcmp(sortKeyA, sortKeyB);
2344 if (r <= 0) {
2345 log_err("Error 4 in test 4\n");
2346 }
2347 r = strcmp(sortKeyAz, sortKeyBz);
2348 if (r != 0) {
2349 log_err("Error 5 in test 4\n");
2350 }
2351
2352 ucol_setStrength(coll, UCOL_IDENTICAL);
2353 ucol_getSortKey(coll, strA, 3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2354 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2355 ucol_getSortKey(coll, strB, 3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2356 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2357
2358 r = strcmp(sortKeyA, sortKeyAz);
2359 if (r <= 0) {
2360 log_err("Error 6 in test 4\n");
2361 }
2362 r = strcmp(sortKeyA, sortKeyB);
2363 if (r <= 0) {
2364 log_err("Error 7 in test 4\n");
2365 }
2366 r = strcmp(sortKeyAz, sortKeyBz);
2367 if (r != 0) {
2368 log_err("Error 8 in test 4\n");
2369 }
2370 ucol_setStrength(coll, UCOL_TERTIARY);
2371 }
2372
2373
2374 /* Test 5: Null characters in non-normal source strings.*/
2375
2376 {
2377 UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00};
2378 UChar strB[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00};
2379 char sortKeyA[50];
2380 char sortKeyAz[50];
2381 char sortKeyB[50];
2382 char sortKeyBz[50];
2383 int r;
2384
2385 result = ucol_strcoll(coll, strA, 6, strB, 6);
2386 if (result != UCOL_GREATER) {
2387 log_err("ERROR 1 in test 5\n");
2388 }
2389 result = ucol_strcoll(coll, strA, -1, strB, -1);
2390 if (result != UCOL_EQUAL) {
2391 log_err("ERROR 2 in test 5\n");
2392 }
2393
2394 ucol_getSortKey(coll, strA, 6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2395 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2396 ucol_getSortKey(coll, strB, 6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2397 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2398
2399 r = strcmp(sortKeyA, sortKeyAz);
2400 if (r <= 0) {
2401 log_err("Error 3 in test 5\n");
2402 }
2403 r = strcmp(sortKeyA, sortKeyB);
2404 if (r <= 0) {
2405 log_err("Error 4 in test 5\n");
2406 }
2407 r = strcmp(sortKeyAz, sortKeyBz);
2408 if (r != 0) {
2409 log_err("Error 5 in test 5\n");
2410 }
2411
2412 ucol_setStrength(coll, UCOL_IDENTICAL);
2413 ucol_getSortKey(coll, strA, 6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2414 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2415 ucol_getSortKey(coll, strB, 6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2416 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2417
2418 r = strcmp(sortKeyA, sortKeyAz);
2419 if (r <= 0) {
2420 log_err("Error 6 in test 5\n");
2421 }
2422 r = strcmp(sortKeyA, sortKeyB);
2423 if (r <= 0) {
2424 log_err("Error 7 in test 5\n");
2425 }
2426 r = strcmp(sortKeyAz, sortKeyBz);
2427 if (r != 0) {
2428 log_err("Error 8 in test 5\n");
2429 }
2430 ucol_setStrength(coll, UCOL_TERTIARY);
2431 }
2432
2433
2434 /* Test 6: Null character as base of a non-normal combining sequence.*/
2435
2436 {
2437 UChar strA[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00};
2438 UChar strB[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00};
2439
2440 result = ucol_strcoll(coll, strA, 5, strB, 5);
2441 if (result != UCOL_LESS) {
2442 log_err("Error 1 in test 6\n");
2443 }
2444 result = ucol_strcoll(coll, strA, -1, strB, -1);
2445 if (result != UCOL_EQUAL) {
2446 log_err("Error 2 in test 6\n");
2447 }
2448 }
2449
2450 ucol_close(coll);
2451}
2452
2453
2454
2455#if 0
2456static void TestGetCaseBit(void) {
2457 static const char *caseBitData[] = {
2458 "a", "A", "ch", "Ch", "CH",
2459 "\\uFF9E", "\\u0009"
2460 };
2461
2462 static const uint8_t results[] = {
2463 UCOL_LOWER_CASE, UCOL_UPPER_CASE, UCOL_LOWER_CASE, UCOL_MIXED_CASE, UCOL_UPPER_CASE,
2464 UCOL_UPPER_CASE, UCOL_LOWER_CASE
2465 };
2466
2467 uint32_t i, blen = 0;
2468 UChar b[256] = {0};
2469 UErrorCode status = U_ZERO_ERROR;
2470 UCollator *UCA = ucol_open("", &status);
2471 uint8_t res = 0;
2472
2473 for(i = 0; i<sizeof(results)/sizeof(results[0]); i++) {
2474 blen = u_unescape(caseBitData[i], b, 256);
2475 res = ucol_uprv_getCaseBits(UCA, b, blen, &status);
2476 if(results[i] != res) {
2477 log_err("Expected case = %02X, got %02X for %04X\n", results[i], res, b[0]);
2478 }
2479 }
2480}
2481#endif
2482
2483static void TestHangulTailoring(void) {
2484 static const char *koreanData[] = {
2485 "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53ef", "\\u5475",
2486 "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\\u67b7", "\\u67ef",
2487 "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\\u8857", "\\u8888",
2488 "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5",
2489 "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\\u659D", "\\u698E",
2490 "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\\u8B0C"
2491 };
2492
2493 const char *rules =
2494 "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 <<< \\u53ef <<< \\u5475 "
2495 "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef "
2496 "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 "
2497 "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 "
2498 "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E "
2499 "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C";
2500
2501
2502 UErrorCode status = U_ZERO_ERROR;
2503 UChar rlz[2048] = { 0 };
2504 uint32_t rlen = u_unescape(rules, rlz, 2048);
2505
2506 UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
2507 if(status == U_FILE_ACCESS_ERROR) {
2508 log_data_err("Is your data around?\n");
2509 return;
2510 } else if(U_FAILURE(status)) {
2511 log_err("Error opening collator\n");
2512 return;
2513 }
2514
2515 log_verbose("Using start of korean rules\n");
2516
2517 if(U_SUCCESS(status)) {
2518 genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
2519 } else {
2520 log_err("Unable to open collator with rules %s\n", rules);
2521 }
2522
2523 log_verbose("Setting jamoSpecial to TRUE and testing once more\n");
2524 ((UCATableHeader *)coll->image)->jamoSpecial = TRUE; /* don't try this at home */
2525 genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
2526
2527 ucol_close(coll);
2528
2529 log_verbose("Using ko__LOTUS locale\n");
2530 genericLocaleStarter("ko__LOTUS", koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
2531}
2532
2533static void TestCompressOverlap(void) {
2534 UChar secstr[150];
2535 UChar tertstr[150];
2536 UErrorCode status = U_ZERO_ERROR;
2537 UCollator *coll;
2538 char result[200];
2539 uint32_t resultlen;
2540 int count = 0;
2541 char *tempptr;
2542
2543 coll = ucol_open("", &status);
2544
2545 if (U_FAILURE(status)) {
2546 log_err("Collator can't be created\n");
2547 return;
2548 }
2549 while (count < 149) {
2550 secstr[count] = 0x0020; /* [06, 05, 05] */
2551 tertstr[count] = 0x0020;
2552 count ++;
2553 }
2554
2555 /* top down compression ----------------------------------- */
2556 secstr[count] = 0x0332; /* [, 87, 05] */
2557 tertstr[count] = 0x3000; /* [06, 05, 07] */
2558
2559 /* no compression secstr should have 150 secondary bytes, tertstr should
2560 have 150 tertiary bytes.
2561 with correct overlapping compression, secstr should have 4 secondary
2562 bytes, tertstr should have > 2 tertiary bytes */
2563 resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250);
2564 tempptr = uprv_strchr(result, 1) + 1;
2565 while (*(tempptr + 1) != 1) {
2566 /* the last secondary collation element is not checked since it is not
2567 part of the compression */
2568 if (*tempptr < UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2) {
2569 log_err("Secondary compression overlapped\n");
2570 }
2571 tempptr ++;
2572 }
2573
2574 /* tertiary top/bottom/common for en_US is similar to the secondary
2575 top/bottom/common */
2576 resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250);
2577 tempptr = uprv_strrchr(result, 1) + 1;
2578 while (*(tempptr + 1) != 0) {
2579 /* the last secondary collation element is not checked since it is not
2580 part of the compression */
2581 if (*tempptr < coll->tertiaryTop - coll->tertiaryTopCount) {
2582 log_err("Tertiary compression overlapped\n");
2583 }
2584 tempptr ++;
2585 }
2586
2587 /* bottom up compression ------------------------------------- */
2588 secstr[count] = 0;
2589 tertstr[count] = 0;
2590 resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250);
2591 tempptr = uprv_strchr(result, 1) + 1;
2592 while (*(tempptr + 1) != 1) {
2593 /* the last secondary collation element is not checked since it is not
2594 part of the compression */
2595 if (*tempptr > UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2) {
2596 log_err("Secondary compression overlapped\n");
2597 }
2598 tempptr ++;
2599 }
2600
2601 /* tertiary top/bottom/common for en_US is similar to the secondary
2602 top/bottom/common */
2603 resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250);
2604 tempptr = uprv_strrchr(result, 1) + 1;
2605 while (*(tempptr + 1) != 0) {
2606 /* the last secondary collation element is not checked since it is not
2607 part of the compression */
2608 if (*tempptr > coll->tertiaryBottom + coll->tertiaryBottomCount) {
2609 log_err("Tertiary compression overlapped\n");
2610 }
2611 tempptr ++;
2612 }
2613
2614 ucol_close(coll);
2615}
2616
2617static void TestCyrillicTailoring(void) {
2618 static const char *test[] = {
2619 "\\u0410b",
2620 "\\u0410\\u0306a",
2621 "\\u04d0A"
2622 };
2623
2624 /* Russian overrides contractions, so this test is not valid anymore */
2625 /*genericLocaleStarter("ru", test, 3);*/
2626
2627 genericLocaleStarter("root", test, 3);
2628 genericRulesStarter("&\\u0410 = \\u0410", test, 3);
2629 genericRulesStarter("&Z < \\u0410", test, 3);
2630 genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test, 3);
2631 genericRulesStarter("&Z < \\u0410 < \\u04d0", test, 3);
2632 genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test, 3);
2633 genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test, 3);
2634}
2635
2636static void TestSuppressContractions(void) {
2637
2638 static const char *testNoCont2[] = {
2639 "\\u0410\\u0302a",
2640 "\\u0410\\u0306b",
2641 "\\u0410c"
2642 };
2643 static const char *testNoCont[] = {
2644 "a\\u0410",
2645 "A\\u0410\\u0306",
2646 "\\uFF21\\u0410\\u0302"
2647 };
2648
2649 genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont, 3);
2650 genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2, 3);
2651}
2652
2653static void TestContraction(void) {
2654 const static char *testrules[] = {
2655 "&A = AB / B",
2656 "&A = A\\u0306/\\u0306",
2657 "&c = ch / h"
2658 };
2659 const static UChar testdata[][2] = {
2660 {0x0041 /* 'A' */, 0x0042 /* 'B' */},
2661 {0x0041 /* 'A' */, 0x0306 /* combining breve */},
2662 {0x0063 /* 'c' */, 0x0068 /* 'h' */}
2663 };
2664 const static UChar testdata2[][2] = {
2665 {0x0063 /* 'c' */, 0x0067 /* 'g' */},
2666 {0x0063 /* 'c' */, 0x0068 /* 'h' */},
2667 {0x0063 /* 'c' */, 0x006C /* 'l' */}
2668 };
2669 const static char *testrules3[] = {
2670 "&z < xyz &xyzw << B",
2671 "&z < xyz &xyz << B / w",
2672 "&z < ch &achm << B",
2673 "&z < ch &a << B / chm",
2674 "&\\ud800\\udc00w << B",
2675 "&\\ud800\\udc00 << B / w",
2676 "&a\\ud800\\udc00m << B",
2677 "&a << B / \\ud800\\udc00m",
2678 };
2679
2680 UErrorCode status = U_ZERO_ERROR;
2681 UCollator *coll;
2682 UChar rule[256] = {0};
2683 uint32_t rlen = 0;
2684 int i;
2685
2686 for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
2687 UCollationElements *iter1;
2688 int j = 0;
2689 log_verbose("Rule %s for testing\n", testrules[i]);
2690 rlen = u_unescape(testrules[i], rule, 32);
2691 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2692 if (U_FAILURE(status)) {
2693 log_err("Collator creation failed %s\n", testrules[i]);
2694 return;
2695 }
2696 iter1 = ucol_openElements(coll, testdata[i], 2, &status);
2697 if (U_FAILURE(status)) {
2698 log_err("Collation iterator creation failed\n");
2699 return;
2700 }
2701 while (j < 2) {
2702 UCollationElements *iter2 = ucol_openElements(coll,
2703 &(testdata[i][j]),
2704 1, &status);
2705 uint32_t ce;
2706 if (U_FAILURE(status)) {
2707 log_err("Collation iterator creation failed\n");
2708 return;
2709 }
2710 ce = ucol_next(iter2, &status);
2711 while (ce != UCOL_NULLORDER) {
2712 if ((uint32_t)ucol_next(iter1, &status) != ce) {
2713 log_err("Collation elements in contraction split does not match\n");
2714 return;
2715 }
2716 ce = ucol_next(iter2, &status);
2717 }
2718 j ++;
2719 ucol_closeElements(iter2);
2720 }
2721 if (ucol_next(iter1, &status) != UCOL_NULLORDER) {
2722 log_err("Collation elements not exhausted\n");
2723 return;
2724 }
2725 ucol_closeElements(iter1);
2726 ucol_close(coll);
2727 }
2728
2729 rlen = u_unescape("& a < b < c < ch < d & c = ch / h", rule, 256);
2730 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2731 if (ucol_strcoll(coll, testdata2[0], 2, testdata2[1], 2) != UCOL_LESS) {
2732 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
2733 testdata2[0][0], testdata2[0][1], testdata2[1][0],
2734 testdata2[1][1]);
2735 return;
2736 }
2737 if (ucol_strcoll(coll, testdata2[1], 2, testdata2[2], 2) != UCOL_LESS) {
2738 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
2739 testdata2[1][0], testdata2[1][1], testdata2[2][0],
2740 testdata2[2][1]);
2741 return;
2742 }
2743 ucol_close(coll);
2744
2745 for (i = 0; i < sizeof(testrules3) / sizeof(testrules3[0]); i += 2) {
2746 UCollator *coll1,
2747 *coll2;
2748 UCollationElements *iter1,
2749 *iter2;
2750 UChar ch = 0x0042 /* 'B' */;
2751 uint32_t ce;
2752 rlen = u_unescape(testrules3[i], rule, 32);
2753 coll1 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2754 rlen = u_unescape(testrules3[i + 1], rule, 32);
2755 coll2 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2756 if (U_FAILURE(status)) {
2757 log_err("Collator creation failed %s\n", testrules[i]);
2758 return;
2759 }
2760 iter1 = ucol_openElements(coll1, &ch, 1, &status);
2761 iter2 = ucol_openElements(coll2, &ch, 1, &status);
2762 if (U_FAILURE(status)) {
2763 log_err("Collation iterator creation failed\n");
2764 return;
2765 }
2766 ce = ucol_next(iter1, &status);
2767 if (U_FAILURE(status)) {
2768 log_err("Retrieving ces failed\n");
2769 return;
2770 }
2771 while (ce != UCOL_NULLORDER) {
2772 if (ce != (uint32_t)ucol_next(iter2, &status)) {
2773 log_err("CEs does not match\n");
2774 return;
2775 }
2776 ce = ucol_next(iter1, &status);
2777 if (U_FAILURE(status)) {
2778 log_err("Retrieving ces failed\n");
2779 return;
2780 }
2781 }
2782 if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
2783 log_err("CEs not exhausted\n");
2784 return;
2785 }
2786 ucol_closeElements(iter1);
2787 ucol_closeElements(iter2);
2788 ucol_close(coll1);
2789 ucol_close(coll2);
2790 }
2791}
2792
2793static void TestExpansion(void) {
2794 const static char *testrules[] = {
2795 "&J << K / B & K << M",
2796 "&J << K / B << M"
2797 };
2798 const static UChar testdata[][3] = {
2799 {0x004A /*'J'*/, 0x0041 /*'A'*/, 0},
2800 {0x004D /*'M'*/, 0x0041 /*'A'*/, 0},
2801 {0x004B /*'K'*/, 0x0041 /*'A'*/, 0},
2802 {0x004B /*'K'*/, 0x0043 /*'C'*/, 0},
2803 {0x004A /*'J'*/, 0x0043 /*'C'*/, 0},
2804 {0x004D /*'M'*/, 0x0043 /*'C'*/, 0}
2805 };
2806
2807 UErrorCode status = U_ZERO_ERROR;
2808 UCollator *coll;
2809 UChar rule[256] = {0};
2810 uint32_t rlen = 0;
2811 int i;
2812
2813 for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
2814 int j = 0;
2815 log_verbose("Rule %s for testing\n", testrules[i]);
2816 rlen = u_unescape(testrules[i], rule, 32);
2817 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2818 if (U_FAILURE(status)) {
2819 log_err("Collator creation failed %s\n", testrules[i]);
2820 return;
2821 }
2822
2823 for (j = 0; j < 5; j ++) {
2824 doTest(coll, testdata[j], testdata[j + 1], UCOL_LESS);
2825 }
2826 ucol_close(coll);
2827 }
2828}
2829
2830/**
2831 * Test for CollationElementIterator previous and next for the whole set of
2832 * unicode characters with normalization on.
2833 */
2834static void TestNumericCollation(void)
2835{
2836 UCollationElements *iter;
2837 UErrorCode status = U_ZERO_ERROR;
2838
2839 int i = 0, j = 0, size = 0;
2840 /*UCollationResult collResult;*/
2841 UChar t1[100], t2[100];
2842
2843 const static char *basicTestStrings[]={
2844 "hello1",
2845 "hello2",
2846 "hello123456"
2847 };
2848
2849 const static char *preZeroTestStrings[]={
2850 "avery1",
2851 "avery01",
2852 "avery001",
2853 "avery0001"
2854 };
2855
2856 const static char *thirtyTwoBitNumericStrings[]={
2857 "avery42949672960",
2858 "avery42949672961",
2859 "avery42949672962",
2860 "avery429496729610"
2861 };
2862
2863 const static char *arabicNumericStrings[]={
2864 "avery""\\u0660", /*avery followed by the arabic digit 0*/
2865 "avery""\\u0661", /*avery followed by the arabic digit 1*/
2866 "avery""\\u0662", /*avery followed by the arabic digit 1*/
2867 "avery""\\u0661\\u0662" /*avery followed by the arabic digits 1 and 2*/
2868 };
2869
2870
2871 /* Open our collator. */
2872 UCollator* coll = ucol_open(NULL, &status);
2873 if (U_FAILURE(status)){
2874 log_err("ERROR: in using ucol_open()\n %s\n",
2875 myErrorName(status));
2876 return;
2877 }
2878
2879 /* Setting up our collator to do digits.*/
2880 ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
2881 if (U_FAILURE(status)){
2882 log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n",
2883 myErrorName(status));
2884 return;
2885 }
2886
2887 /*
2888 Testing basic numeric collation behavior. We expect that every element in our
2889 strings array is less than it's predecessor. This shows that numeric collation
2890 works (on a superficial level).
2891 */
2892 size = sizeof(basicTestStrings)/sizeof(basicTestStrings[0]);
2893 for(i = 0; i < size-1; i++) {
2894 for(j = i+1; j < size; j++) {
2895 u_uastrcpy(t1, basicTestStrings[i]);
2896 u_uastrcpy(t2, basicTestStrings[j]);
2897 doTest(coll, t1, t2, UCOL_LESS);
2898 }
2899 }
2900 /*
2901 Testing that prepended zeroes still yield the correct collation behavior.
2902 We expect that every element in our strings array will be equal.
2903 */
2904 size = sizeof(preZeroTestStrings)/sizeof(preZeroTestStrings[0]);
2905 for(i = 0; i < size-1; i++) {
2906 for(j = i+1; j < size; j++) {
2907 u_uastrcpy(t1, preZeroTestStrings[i]);
2908 u_uastrcpy(t2, preZeroTestStrings[j]);
2909 doTest(coll, t1, t2, UCOL_EQUAL);
2910 }
2911 }
2912
2913 /*
2914 Testing that numeric collation can handle numbers greater than 32-bits.
2915 We expect that every element in our strings array is less than it's successor.
2916 */
2917 size = sizeof(thirtyTwoBitNumericStrings)/sizeof(thirtyTwoBitNumericStrings[0]);
2918 for(i = 0; i < size-1; i++) {
2919 for(j = i+1; j < size; j++) {
2920 u_uastrcpy(t1, thirtyTwoBitNumericStrings[i]);
2921 u_uastrcpy(t2, thirtyTwoBitNumericStrings[j]);
2922 doTest(coll, t1, t2, UCOL_LESS);
2923 }
2924 }
2925
2926 /*
2927 Testing that numeric collation for arabic numbers. This case is interesting because
2928 it ensures that non-Roman characters that have the numeric property still get processed
2929 correctly. We expect every element to be less than it's successor.
2930 */
2931
2932 size = sizeof(arabicNumericStrings)/sizeof(arabicNumericStrings[0]);
2933 for(i = 0; i < size-1; i++) {
2934 for(j = i+1; j < size; j++) {
2935 u_unescape(arabicNumericStrings[i], t1,100);
2936 u_unescape(arabicNumericStrings[j], t2,100);
2937 doTest(coll, t1, t2, UCOL_LESS);
2938 }
2939 }
2940 /*
2941 Testing collation element iterator. Running backAndForth on
2942 a string with numbers in it should be sufficient.
2943 */
2944 u_uastrcpy(t1, basicTestStrings[2]);
2945 iter=ucol_openElements(coll, t1, u_strlen(t1), &status);
2946 backAndForth(iter);
2947 ucol_closeElements(iter);
2948
2949 ucol_close(coll);
2950}
2951
2952#if 0
2953/* this test tests the current limitations of the engine */
2954/* it always fail, so it is disabled by default */
2955static void TestLimitations(void) {
2956 /* recursive expansions */
2957 {
2958 static const char *rule = "&a=b/c&d=c/e";
2959 static const char *tlimit01[] = {"add","b","adf"};
2960 static const char *tlimit02[] = {"aa","b","af"};
2961 log_verbose("recursive expansions\n");
2962 genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
2963 genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
2964 }
2965 /* contractions spanning expansions */
2966 {
2967 static const char *rule = "&a<<<c/e&g<<<eh";
2968 static const char *tlimit01[] = {"ad","c","af","f","ch","h"};
2969 static const char *tlimit02[] = {"ad","c","ch","af","f","h"};
2970 log_verbose("contractions spanning expansions\n");
2971 genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
2972 genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
2973 }
2974 /* normalization: nulls in contractions */
2975 {
2976 static const char *rule = "&a<<<\\u0000\\u0302";
2977 static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
2978 static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
2979 static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
2980 static const UColAttributeValue valOn[] = { UCOL_ON };
2981 static const UColAttributeValue valOff[] = { UCOL_OFF };
2982
2983 log_verbose("NULL in contractions\n");
2984 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
2985 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
2986 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
2987 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
2988
2989 }
2990 /* normalization: contractions spanning normalization */
2991 {
2992 static const char *rule = "&a<<<\\u0000\\u0302";
2993 static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
2994 static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
2995 static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
2996 static const UColAttributeValue valOn[] = { UCOL_ON };
2997 static const UColAttributeValue valOff[] = { UCOL_OFF };
2998
2999 log_verbose("contractions spanning normalization\n");
3000 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
3001 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
3002 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
3003 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
3004
3005 }
3006 /* variable top: */
3007 {
3008 /*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/
3009 static const char *rule = "&\\u2010<x<[variable top]=z";
3010 /*static const char *rule3 = "&' '<x<[variable top]=z";*/
3011 static const char *tlimit01[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" };
3012 static const char *tlimit02[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"};
3013 static const char *tlimit03[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" };
3014 static const UColAttribute att[] = { UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH };
3015 static const UColAttributeValue valOn[] = { UCOL_SHIFTED, UCOL_QUATERNARY };
3016 static const UColAttributeValue valOff[] = { UCOL_NON_IGNORABLE, UCOL_TERTIARY };
3017
3018 log_verbose("variable top\n");
3019 genericRulesStarterWithOptions(rule, tlimit03, sizeof(tlimit03)/sizeof(tlimit03[0]), att, valOn, sizeof(att)/sizeof(att[0]));
3020 genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
3021 genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
3022 genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));
3023 genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));
3024
3025 }
3026 /* case level */
3027 {
3028 static const char *rule = "&c<ch<<<cH<<<Ch<<<CH";
3029 static const char *tlimit01[] = {"c","CH","Ch","cH","ch"};
3030 static const char *tlimit02[] = {"c","CH","cH","Ch","ch"};
3031 static const UColAttribute att[] = { UCOL_CASE_FIRST};
3032 static const UColAttributeValue valOn[] = { UCOL_UPPER_FIRST};
3033 /*static const UColAttributeValue valOff[] = { UCOL_OFF};*/
3034 log_verbose("case level\n");
3035 genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
3036 genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
3037 /*genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
3038 /*genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
3039 }
3040
3041}
3042#endif
3043
3044static void TestBocsuCoverage(void) {
3045 UErrorCode status = U_ZERO_ERROR;
3046 const char *testString = "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u0041";
3047 UChar test[256] = {0};
3048 uint32_t tlen = u_unescape(testString, test, 32);
3049 uint8_t key[256] = {0};
3050 uint32_t klen = 0;
3051
3052 UCollator *coll = ucol_open("", &status);
3053 if(U_SUCCESS(status)) {
3054 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
3055
3056 klen = ucol_getSortKey(coll, test, tlen, key, 256);
3057
3058 ucol_close(coll);
3059 } else {
3060 log_data_err("Couldn't open UCA\n");
3061 }
3062}
3063
3064static void TestVariableTopSetting(void) {
3065 UErrorCode status = U_ZERO_ERROR;
3066 const UChar *current = NULL;
3067 uint32_t varTopOriginal = 0, varTop1, varTop2;
3068 UCollator *coll = ucol_open("", &status);
3069 if(U_SUCCESS(status)) {
3070
3071 uint32_t strength = 0;
3072 uint16_t specs = 0;
3073 uint32_t chOffset = 0;
3074 uint32_t chLen = 0;
3075 uint32_t exOffset = 0;
3076 uint32_t exLen = 0;
3077 uint32_t oldChOffset = 0;
3078 uint32_t oldChLen = 0;
3079 uint32_t oldExOffset = 0;
3080 uint32_t oldExLen = 0;
3081 uint32_t prefixOffset = 0;
3082 uint32_t prefixLen = 0;
3083
3084 UBool startOfRules = TRUE;
3085 UColTokenParser src;
3086 UColOptionSet opts;
3087
3088 UChar *rulesCopy = NULL;
3089 uint32_t rulesLen;
3090
3091 UCollationResult result;
3092
3093 UChar first[256] = { 0 };
3094 UChar second[256] = { 0 };
3095 UParseError parseError;
3096 int32_t myQ = QUICK;
3097
3098 src.opts = &opts;
3099
3100 if(QUICK <= 0) {
3101 QUICK = 1;
3102 }
3103
3104 /* this test will fail when normalization is turned on */
3105 /* therefore we always turn off exhaustive mode for it */
3106 if(1) { /* QUICK > 0*/
3107 log_verbose("Slide variable top over UCARules\n");
3108 rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, 0);
3109 rulesCopy = (UChar *)malloc((rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
3110 rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE);
3111
3112 if(U_SUCCESS(status) && rulesLen > 0) {
3113 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
3114 src.current = src.source = rulesCopy;
3115 src.end = rulesCopy+rulesLen;
3116 src.extraCurrent = src.end;
3117 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
3118
3119 while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,&status)) != NULL) {
3120 strength = src.parsedToken.strength;
3121 chOffset = src.parsedToken.charsOffset;
3122 chLen = src.parsedToken.charsLen;
3123 exOffset = src.parsedToken.extensionOffset;
3124 exLen = src.parsedToken.extensionLen;
3125 prefixOffset = src.parsedToken.prefixOffset;
3126 prefixLen = src.parsedToken.prefixLen;
3127 specs = src.parsedToken.flags;
3128
3129 startOfRules = FALSE;
3130 if(0) {
3131 log_verbose("%04X %d ", *(rulesCopy+chOffset), chLen);
3132 }
3133 if(strength == UCOL_PRIMARY) {
3134 status = U_ZERO_ERROR;
3135 varTopOriginal = ucol_getVariableTop(coll, &status);
3136 varTop1 = ucol_setVariableTop(coll, rulesCopy+oldChOffset, oldChLen, &status);
3137 if(U_FAILURE(status)) {
3138 char buffer[256];
3139 char *buf = buffer;
3140 uint32_t i = 0, j;
3141 uint32_t CE = UCOL_NO_MORE_CES;
3142
3143 /* before we start screaming, let's see if there is a problem with the rules */
3144 collIterate s;
3145 uprv_init_collIterate(coll, rulesCopy+oldChOffset, oldChLen, &s);
3146
3147 CE = ucol_getNextCE(coll, &s, &status);
3148
3149 for(i = 0; i < oldChLen; i++) {
3150 j = sprintf(buf, "%04X ", *(rulesCopy+oldChOffset+i));
3151 buf += j;
3152 }
3153 if(status == U_PRIMARY_TOO_LONG_ERROR) {
3154 log_verbose("= Expected failure for %s =", buffer);
3155 } else {
3156 if(s.pos == s.endp) {
3157 log_err("Unexpected failure setting variable top at offset %d. Error %s. Codepoints: %s\n",
3158 oldChOffset, u_errorName(status), buffer);
3159 } else {
3160 log_verbose("There is a goofy contraction in UCA rules that does not appear in the fractional UCA. Codepoints: %s\n",
3161 buffer);
3162 }
3163 }
3164 }
3165 varTop2 = ucol_getVariableTop(coll, &status);
3166 if((varTop1 & 0xFFFF0000) != (varTop2 & 0xFFFF0000)) {
3167 log_err("cannot retrieve set varTop value!\n");
3168 continue;
3169 }
3170
3171 if((varTop1 & 0xFFFF0000) > 0 && oldExLen == 0) {
3172
3173 u_strncpy(first, rulesCopy+oldChOffset, oldChLen);
3174 u_strncpy(first+oldChLen, rulesCopy+chOffset, chLen);
3175 u_strncpy(first+oldChLen+chLen, rulesCopy+oldChOffset, oldChLen);
3176 first[2*oldChLen+chLen] = 0;
3177
3178 if(oldExLen == 0) {
3179 u_strncpy(second, rulesCopy+chOffset, chLen);
3180 second[chLen] = 0;
3181 } else { /* This is skipped momentarily, but should work once UCARules are fully UCA conformant */
3182 u_strncpy(second, rulesCopy+oldExOffset, oldExLen);
3183 u_strncpy(second+oldChLen, rulesCopy+chOffset, chLen);
3184 u_strncpy(second+oldChLen+chLen, rulesCopy+oldExOffset, oldExLen);
3185 second[2*oldExLen+chLen] = 0;
3186 }
3187 result = ucol_strcoll(coll, first, -1, second, -1);
3188 if(result == UCOL_EQUAL) {
3189 doTest(coll, first, second, UCOL_EQUAL);
3190 } else {
3191 log_verbose("Suspicious strcoll result for %04X and %04X\n", *(rulesCopy+oldChOffset), *(rulesCopy+chOffset));
3192 }
3193 }
3194 }
3195 if(strength != UCOL_TOK_RESET) {
3196 oldChOffset = chOffset;
3197 oldChLen = chLen;
3198 oldExOffset = exOffset;
3199 oldExLen = exLen;
3200 }
3201 }
3202 status = U_ZERO_ERROR;
3203 }
3204 else {
3205 log_err("Unexpected failure getting rules %s\n", u_errorName(status));
3206 return;
3207 }
3208 if (U_FAILURE(status)) {
3209 log_err("Error parsing rules %s\n", u_errorName(status));
3210 return;
3211 }
3212 status = U_ZERO_ERROR;
3213 }
3214
3215 QUICK = myQ;
3216
3217 log_verbose("Testing setting variable top to contractions\n");
3218 {
3219 /* uint32_t tailoredCE = UCOL_NOT_FOUND; */
3220 /*UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->UCAConsts+sizeof(UCAConstants));*/
3221 UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->contractionUCACombos);
3222 while(*conts != 0) {
3223 if(*(conts+2) == 0) {
3224 varTop1 = ucol_setVariableTop(coll, conts, -1, &status);
3225 } else {
3226 varTop1 = ucol_setVariableTop(coll, conts, 3, &status);
3227 }
3228 if(U_FAILURE(status)) {
3229 log_err("Couldn't set variable top to a contraction %04X %04X %04X\n",
3230 *conts, *(conts+1), *(conts+2));
3231 status = U_ZERO_ERROR;
3232 }
3233 conts+=3;
3234 }
3235
3236 status = U_ZERO_ERROR;
3237
3238 first[0] = 0x0040;
3239 first[1] = 0x0050;
3240 first[2] = 0x0000;
3241
3242 ucol_setVariableTop(coll, first, -1, &status);
3243
3244 if(U_SUCCESS(status)) {
3245 log_err("Invalid contraction succeded in setting variable top!\n");
3246 }
3247
3248 }
3249
3250 log_verbose("Test restoring variable top\n");
3251
3252 status = U_ZERO_ERROR;
3253 ucol_restoreVariableTop(coll, varTopOriginal, &status);
3254 if(varTopOriginal != ucol_getVariableTop(coll, &status)) {
3255 log_err("Couldn't restore old variable top\n");
3256 }
3257
3258 log_verbose("Testing calling with error set\n");
3259
3260 status = U_INTERNAL_PROGRAM_ERROR;
3261 varTop1 = ucol_setVariableTop(coll, first, 1, &status);
3262 varTop2 = ucol_getVariableTop(coll, &status);
3263 ucol_restoreVariableTop(coll, varTop2, &status);
3264 varTop1 = ucol_setVariableTop(NULL, first, 1, &status);
3265 varTop2 = ucol_getVariableTop(NULL, &status);
3266 ucol_restoreVariableTop(NULL, varTop2, &status);
3267 if(status != U_INTERNAL_PROGRAM_ERROR) {
3268 log_err("Bad reaction to passed error!\n");
3269 }
3270 free(rulesCopy);
3271 ucol_close(coll);
3272 } else {
3273 log_data_err("Couldn't open UCA collator\n");
3274 }
3275
3276}
3277
3278static void TestNonChars(void) {
3279 static const char *test[] = {
3280 "\\u0000",
3281 "\\uFFFE", "\\uFFFF",
3282 "\\U0001FFFE", "\\U0001FFFF",
3283 "\\U0002FFFE", "\\U0002FFFF",
3284 "\\U0003FFFE", "\\U0003FFFF",
3285 "\\U0004FFFE", "\\U0004FFFF",
3286 "\\U0005FFFE", "\\U0005FFFF",
3287 "\\U0006FFFE", "\\U0006FFFF",
3288 "\\U0007FFFE", "\\U0007FFFF",
3289 "\\U0008FFFE", "\\U0008FFFF",
3290 "\\U0009FFFE", "\\U0009FFFF",
3291 "\\U000AFFFE", "\\U000AFFFF",
3292 "\\U000BFFFE", "\\U000BFFFF",
3293 "\\U000CFFFE", "\\U000CFFFF",
3294 "\\U000DFFFE", "\\U000DFFFF",
3295 "\\U000EFFFE", "\\U000EFFFF",
3296 "\\U000FFFFE", "\\U000FFFFF",
3297 "\\U0010FFFE", "\\U0010FFFF"
3298 };
3299 UErrorCode status = U_ZERO_ERROR;
3300 UCollator *coll = ucol_open("en_US", &status);
3301
3302 log_verbose("Test non characters\n");
3303
3304 if(U_SUCCESS(status)) {
3305 genericOrderingTestWithResult(coll, test, 35, UCOL_EQUAL);
3306 } else {
3307 log_err("Unable to open collator\n");
3308 }
3309
3310 ucol_close(coll);
3311}
3312
3313static void TestExtremeCompression(void) {
3314 static char *test[4];
3315 int32_t j = 0, i = 0;
3316
3317 for(i = 0; i<4; i++) {
3318 test[i] = (char *)malloc(2048*sizeof(char));
3319 }
3320
3321 for(j = 20; j < 500; j++) {
3322 for(i = 0; i<4; i++) {
3323 uprv_memset(test[i], 'a', (j-1)*sizeof(char));
3324 test[i][j-1] = (char)('a'+i);
3325 test[i][j] = 0;
3326 }
3327 genericLocaleStarter("en_US", (const char **)test, 4);
3328 }
3329
3330
3331 for(i = 0; i<4; i++) {
3332 free(test[i]);
3333 }
3334}
3335
3336#if 0
3337static void TestExtremeCompression(void) {
3338 static char *test[4];
3339 int32_t j = 0, i = 0;
3340 UErrorCode status = U_ZERO_ERROR;
3341 UCollator *coll = ucol_open("en_US", status);
3342 for(i = 0; i<4; i++) {
3343 test[i] = (char *)malloc(2048*sizeof(char));
3344 }
3345 for(j = 10; j < 2048; j++) {
3346 for(i = 0; i<4; i++) {
3347 uprv_memset(test[i], 'a', (j-2)*sizeof(char));
3348 test[i][j-1] = (char)('a'+i);
3349 test[i][j] = 0;
3350 }
3351 }
3352 genericLocaleStarter("en_US", (const char **)test, 4);
3353
3354 for(j = 10; j < 2048; j++) {
3355 for(i = 0; i<1; i++) {
3356 uprv_memset(test[i], 'a', (j-1)*sizeof(char));
3357 test[i][j] = 0;
3358 }
3359 }
3360 for(i = 0; i<4; i++) {
3361 free(test[i]);
3362 }
3363}
3364#endif
3365
3366static void TestSurrogates(void) {
3367 static const char *test[] = {
3368 "z","\\ud900\\udc25", "\\ud805\\udc50",
3369 "\\ud800\\udc00y", "\\ud800\\udc00r",
3370 "\\ud800\\udc00f", "\\ud800\\udc00",
3371 "\\ud800\\udc00c", "\\ud800\\udc00b",
3372 "\\ud800\\udc00fa", "\\ud800\\udc00fb",
3373 "\\ud800\\udc00a",
3374 "c", "b"
3375 };
3376
3377 static const char *rule =
3378 "&z < \\ud900\\udc25 < \\ud805\\udc50"
3379 "< \\ud800\\udc00y < \\ud800\\udc00r"
3380 "< \\ud800\\udc00f << \\ud800\\udc00"
3381 "< \\ud800\\udc00fa << \\ud800\\udc00fb"
3382 "< \\ud800\\udc00a < c < b" ;
3383
3384 genericRulesStarter(rule, test, 14);
3385}
3386
3387/* This is a test for prefix implementation, used by JIS X 4061 collation rules */
3388static void TestPrefix(void) {
3389 uint32_t i;
3390
3391 static struct {
3392 const char *rules;
3393 const char *data[50];
3394 const uint32_t len;
3395 } tests[] = {
3396 { "&z <<< z|a",
3397 {"zz", "za"}, 2 },
3398
3399 { "&z <<< z| a",
3400 {"zz", "za"}, 2 },
3401 { "[strength I]"
3402 "&a=\\ud900\\udc25"
3403 "&z<<<\\ud900\\udc25|a",
3404 {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 },
3405 };
3406
3407
3408 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3409 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3410 }
3411}
3412
3413/* This test uses data suplied by Masashiko Maedera to test the implementation */
3414/* JIS X 4061 collation order implementation */
3415static void TestNewJapanese(void) {
3416
3417 static const char *test1[] = {
3418 "\\u30b7\\u30e3\\u30fc\\u30ec",
3419 "\\u30b7\\u30e3\\u30a4",
3420 "\\u30b7\\u30e4\\u30a3",
3421 "\\u30b7\\u30e3\\u30ec",
3422 "\\u3061\\u3087\\u3053",
3423 "\\u3061\\u3088\\u3053",
3424 "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8",
3425 "\\u3066\\u30fc\\u305f",
3426 "\\u30c6\\u30fc\\u30bf",
3427 "\\u30c6\\u30a7\\u30bf",
3428 "\\u3066\\u3048\\u305f",
3429 "\\u3067\\u30fc\\u305f",
3430 "\\u30c7\\u30fc\\u30bf",
3431 "\\u30c7\\u30a7\\u30bf",
3432 "\\u3067\\u3048\\u305f",
3433 "\\u3066\\u30fc\\u305f\\u30fc",
3434 "\\u30c6\\u30fc\\u30bf\\u30a1",
3435 "\\u30c6\\u30a7\\u30bf\\u30fc",
3436 "\\u3066\\u3047\\u305f\\u3041",
3437 "\\u3066\\u3048\\u305f\\u30fc",
3438 "\\u3067\\u30fc\\u305f\\u30fc",
3439 "\\u30c7\\u30fc\\u30bf\\u30a1",
3440 "\\u3067\\u30a7\\u305f\\u30a1",
3441 "\\u30c7\\u3047\\u30bf\\u3041",
3442 "\\u30c7\\u30a8\\u30bf\\u30a2",
3443 "\\u3072\\u3086",
3444 "\\u3073\\u3085\\u3042",
3445 "\\u3074\\u3085\\u3042",
3446 "\\u3073\\u3085\\u3042\\u30fc",
3447 "\\u30d3\\u30e5\\u30a2\\u30fc",
3448 "\\u3074\\u3085\\u3042\\u30fc",
3449 "\\u30d4\\u30e5\\u30a2\\u30fc",
3450 "\\u30d2\\u30e5\\u30a6",
3451 "\\u30d2\\u30e6\\u30a6",
3452 "\\u30d4\\u30e5\\u30a6\\u30a2",
3453 "\\u3073\\u3085\\u30fc\\u3042\\u30fc",
3454 "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc",
3455 "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc",
3456 "\\u3072\\u3085\\u3093",
3457 "\\u3074\\u3085\\u3093",
3458 "\\u3075\\u30fc\\u308a",
3459 "\\u30d5\\u30fc\\u30ea",
3460 "\\u3075\\u3045\\u308a",
3461 "\\u3075\\u30a5\\u308a",
3462 "\\u3075\\u30a5\\u30ea",
3463 "\\u30d5\\u30a6\\u30ea",
3464 "\\u3076\\u30fc\\u308a",
3465 "\\u30d6\\u30fc\\u30ea",
3466 "\\u3076\\u3045\\u308a",
3467 "\\u30d6\\u30a5\\u308a",
3468 "\\u3077\\u3046\\u308a",
3469 "\\u30d7\\u30a6\\u30ea",
3470 "\\u3075\\u30fc\\u308a\\u30fc",
3471 "\\u30d5\\u30a5\\u30ea\\u30fc",
3472 "\\u3075\\u30a5\\u308a\\u30a3",
3473 "\\u30d5\\u3045\\u308a\\u3043",
3474 "\\u30d5\\u30a6\\u30ea\\u30fc",
3475 "\\u3075\\u3046\\u308a\\u3043",
3476 "\\u30d6\\u30a6\\u30ea\\u30a4",
3477 "\\u3077\\u30fc\\u308a\\u30fc",
3478 "\\u3077\\u30a5\\u308a\\u30a4",
3479 "\\u3077\\u3046\\u308a\\u30fc",
3480 "\\u30d7\\u30a6\\u30ea\\u30a4",
3481 "\\u30d5\\u30fd",
3482 "\\u3075\\u309e",
3483 "\\u3076\\u309d",
3484 "\\u3076\\u3075",
3485 "\\u3076\\u30d5",
3486 "\\u30d6\\u3075",
3487 "\\u30d6\\u30d5",
3488 "\\u3076\\u309e",
3489 "\\u3076\\u3077",
3490 "\\u30d6\\u3077",
3491 "\\u3077\\u309d",
3492 "\\u30d7\\u30fd",
3493 "\\u3077\\u3075",
3494};
3495
3496 static const char *test2[] = {
3497 "\\u306f\\u309d", /* H\\u309d */
3498 /*"\\u30cf\\u30fd",*/ /* K\\u30fd */
3499 "\\u306f\\u306f", /* HH */
3500 "\\u306f\\u30cf", /* HK */
3501 "\\u30cf\\u30cf", /* KK */
3502 "\\u306f\\u309e", /* H\\u309e */
3503 "\\u30cf\\u30fe", /* K\\u30fe */
3504 "\\u306f\\u3070", /* HH\\u309b */
3505 "\\u30cf\\u30d0", /* KK\\u309b */
3506 "\\u306f\\u3071", /* HH\\u309c */
3507 "\\u30cf\\u3071", /* KH\\u309c */
3508 "\\u30cf\\u30d1", /* KK\\u309c */
3509 "\\u3070\\u309d", /* H\\u309b\\u309d */
3510 "\\u30d0\\u30fd", /* K\\u309b\\u30fd */
3511 "\\u3070\\u306f", /* H\\u309bH */
3512 "\\u30d0\\u30cf", /* K\\u309bK */
3513 "\\u3070\\u309e", /* H\\u309b\\u309e */
3514 "\\u30d0\\u30fe", /* K\\u309b\\u30fe */
3515 "\\u3070\\u3070", /* H\\u309bH\\u309b */
3516 "\\u30d0\\u3070", /* K\\u309bH\\u309b */
3517 "\\u30d0\\u30d0", /* K\\u309bK\\u309b */
3518 "\\u3070\\u3071", /* H\\u309bH\\u309c */
3519 "\\u30d0\\u30d1", /* K\\u309bK\\u309c */
3520 "\\u3071\\u309d", /* H\\u309c\\u309d */
3521 "\\u30d1\\u30fd", /* K\\u309c\\u30fd */
3522 "\\u3071\\u306f", /* H\\u309cH */
3523 "\\u30d1\\u30cf", /* K\\u309cK */
3524 "\\u3071\\u3070", /* H\\u309cH\\u309b */
3525 "\\u3071\\u30d0", /* H\\u309cK\\u309b */
3526 "\\u30d1\\u30d0", /* K\\u309cK\\u309b */
3527 "\\u3071\\u3071", /* H\\u309cH\\u309c */
3528 "\\u30d1\\u30d1", /* K\\u309cK\\u309c */
3529 };
3530 /*
3531 static const char *test3[] = {
3532 "\\u221er\\u221e",
3533 "\\u221eR#",
3534 "\\u221et\\u221e",
3535 "#r\\u221e",
3536 "#R#",
3537 "#t%",
3538 "#T%",
3539 "8t\\u221e",
3540 "8T\\u221e",
3541 "8t#",
3542 "8T#",
3543 "8t%",
3544 "8T%",
3545 "8t8",
3546 "8T8",
3547 "\\u03c9r\\u221e",
3548 "\\u03a9R%",
3549 "rr\\u221e",
3550 "rR\\u221e",
3551 "Rr\\u221e",
3552 "RR\\u221e",
3553 "RT%",
3554 "rt8",
3555 "tr\\u221e",
3556 "tr8",
3557 "TR8",
3558 "tt8",
3559 "\\u30b7\\u30e3\\u30fc\\u30ec",
3560 };
3561 */
3562 static const UColAttribute att[] = { UCOL_STRENGTH };
3563 static const UColAttributeValue val[] = { UCOL_QUATERNARY };
3564
3565 static const UColAttribute attShifted[] = { UCOL_STRENGTH, UCOL_ALTERNATE_HANDLING};
3566 static const UColAttributeValue valShifted[] = { UCOL_QUATERNARY, UCOL_SHIFTED };
3567
3568 genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), att, val, 1);
3569 genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), att, val, 1);
3570 /*genericLocaleStarter("ja", test3, sizeof(test3)/sizeof(test3[0]));*/
3571 genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), attShifted, valShifted, 2);
3572 genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), attShifted, valShifted, 2);
3573}
3574
3575static void TestStrCollIdenticalPrefix(void) {
3576 const char* rule = "&\\ud9b0\\udc70=\\ud9b0\\udc71";
3577 const char* test[] = {
3578 "ab\\ud9b0\\udc70",
3579 "ab\\ud9b0\\udc71"
3580 };
3581 genericRulesTestWithResult(rule, test, sizeof(test)/sizeof(test[0]), UCOL_EQUAL);
3582}
3583/* Contractions should have all their canonically equivalent */
3584/* strings included */
3585static void TestContractionClosure(void) {
3586 static struct {
3587 const char *rules;
3588 const char *data[50];
3589 const uint32_t len;
3590 } tests[] = {
3591 { "&b=\\u00e4\\u00e4",
3592 { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\\u00e4" }, 5},
3593 { "&b=\\u00C5",
3594 { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4},
3595 };
3596 uint32_t i;
3597
3598
3599 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3600 genericRulesTestWithResult(tests[i].rules, tests[i].data, tests[i].len, UCOL_EQUAL);
3601 }
3602}
3603
3604/* This tests also fails*/
3605static void TestBeforePrefixFailure(void) {
3606 static struct {
3607 const char *rules;
3608 const char *data[50];
3609 const uint32_t len;
3610 } tests[] = {
3611 { "&g <<< a"
3612 "&[before 3]\\uff41 <<< x",
3613 {"x", "\\uff41"}, 2 },
3614 { "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3615 "&\\u30A8=\\u30A8=\\u3048=\\uff74"
3616 "&[before 3]\\u30a7<<<\\u30a9",
3617 {"\\u30a9", "\\u30a7"}, 2 },
3618 { "&[before 3]\\u30a7<<<\\u30a9"
3619 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3620 "&\\u30A8=\\u30A8=\\u3048=\\uff74",
3621 {"\\u30a9", "\\u30a7"}, 2 },
3622 };
3623 uint32_t i;
3624
3625
3626 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3627 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3628 }
3629
3630#if 0
3631 const char* rule1 =
3632 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3633 "&\\u30A8=\\u30A8=\\u3048=\\uff74"
3634 "&[before 3]\\u30a7<<<\\u30c6|\\u30fc";
3635 const char* rule2 =
3636 "&[before 3]\\u30a7<<<\\u30c6|\\u30fc"
3637 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3638 "&\\u30A8=\\u30A8=\\u3048=\\uff74";
3639 const char* test[] = {
3640 "\\u30c6\\u30fc\\u30bf",
3641 "\\u30c6\\u30a7\\u30bf",
3642 };
3643 genericRulesStarter(rule1, test, sizeof(test)/sizeof(test[0]));
3644 genericRulesStarter(rule2, test, sizeof(test)/sizeof(test[0]));
3645/* this piece of code should be in some sort of verbose mode */
3646/* it gets the collation elements for elements and prints them */
3647/* This is useful when trying to see whether the problem is */
3648 {
3649 UErrorCode status = U_ZERO_ERROR;
3650 uint32_t i = 0;
3651 UCollationElements *it = NULL;
3652 uint32_t CE;
3653 UChar string[256];
3654 uint32_t uStringLen;
3655 UCollator *coll = NULL;
3656
3657 uStringLen = u_unescape(rule1, string, 256);
3658
3659 coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
3660
3661 /*coll = ucol_open("ja_JP_JIS", &status);*/
3662 it = ucol_openElements(coll, string, 0, &status);
3663
3664 for(i = 0; i < sizeof(test)/sizeof(test[0]); i++) {
3665 log_verbose("%s\n", test[i]);
3666 uStringLen = u_unescape(test[i], string, 256);
3667 ucol_setText(it, string, uStringLen, &status);
3668
3669 while((CE=ucol_next(it, &status)) != UCOL_NULLORDER) {
3670 log_verbose("%08X\n", CE);
3671 }
3672 log_verbose("\n");
3673
3674 }
3675
3676 ucol_closeElements(it);
3677 ucol_close(coll);
3678 }
3679#endif
3680}
3681
3682static void TestPrefixCompose(void) {
3683 const char* rule1 =
3684 "&\\u30a7<<<\\u30ab|\\u30fc=\\u30ac|\\u30fc";
3685 /*
3686 const char* test[] = {
3687 "\\u30c6\\u30fc\\u30bf",
3688 "\\u30c6\\u30a7\\u30bf",
3689 };
3690 */
3691 {
3692 UErrorCode status = U_ZERO_ERROR;
3693 /*uint32_t i = 0;*/
3694 /*UCollationElements *it = NULL;*/
3695/* uint32_t CE;*/
3696 UChar string[256];
3697 uint32_t uStringLen;
3698 UCollator *coll = NULL;
3699
3700 uStringLen = u_unescape(rule1, string, 256);
3701
3702 coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
3703 ucol_close(coll);
3704 }
3705
3706
3707}
3708
3709/*
3710[last variable] last variable value
3711[last primary ignorable] largest CE for primary ignorable
3712[last secondary ignorable] largest CE for secondary ignorable
3713[last tertiary ignorable] largest CE for tertiary ignorable
3714[top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8)
3715*/
3716
3717static void TestRuleOptions(void) {
3718 /* values here are hardcoded and are correct for the current UCA
3719 * when the UCA changes, one might be forced to change these
3720 * values. (\\u02d0, \\U00010FFFC etc...)
3721 */
3722 static struct {
3723 const char *rules;
3724 const char *data[50];
3725 const uint32_t len;
3726 } tests[] = {
3727 /* - all befores here amount to zero */
3728 { "&[before 1][first tertiary ignorable]<<<a",
3729 { "\\u0000", "a"}, 2}, /* you cannot go before first tertiary ignorable */
3730
3731 { "&[before 1][last tertiary ignorable]<<<a",
3732 { "\\u0000", "a"}, 2}, /* you cannot go before last tertiary ignorable */
3733
3734 { "&[before 1][first secondary ignorable]<<<a",
3735 { "\\u0000", "a"}, 2}, /* you cannot go before first secondary ignorable */
3736
3737 { "&[before 1][last secondary ignorable]<<<a",
3738 { "\\u0000", "a"}, 2}, /* you cannot go before first secondary ignorable */
3739
3740 /* 'normal' befores */
3741
3742 { "&[before 1][first primary ignorable]<<<c<<<b &[first primary ignorable]<a",
3743 { "c", "b", "\\u0332", "a" }, 4},
3744
3745 /* we don't have a code point that corresponds to
3746 * the last primary ignorable
3747 */
3748 { "&[before 2][last primary ignorable]<<<c<<<b &[last primary ignorable]<a",
3749 { "\\u0332", "\\u20e3", "c", "b", "a" }, 5},
3750
3751 { "&[before 1][first variable]<<<c<<<b &[first variable]<a",
3752 { "c", "b", "\\u0009", "a", "\\u000a" }, 5},
3753
3754 { "&[last variable]<a &[before 1][last variable]<<<c<<<b ",
3755 { "c", "b", "\\uD800\\uDF23", "a", "\\u02d0" }, 5},
3756
3757 { "&[first regular]<a"
3758 "&[before 1][first regular]<b",
3759 { "b", "\\u02d0", "a", "\\u02d1"}, 4},
3760
3761 { "&[before 1][last regular]<b"
3762 "&[last regular]<a",
3763 { "b", "\\uD801\\uDC25", "a", "\\u4e00" }, 4},
3764
3765 { "&[before 1][first implicit]<b"
3766 "&[first implicit]<a",
3767 { "b", "\\u4e00", "a", "\\u4e01"}, 4},
3768
3769 { "&[before 1][last implicit]<b"
3770 "&[last implicit]<a",
3771 { "b", "\\U0010FFFC", "a" }, 3},
3772
3773 { "&[last variable]<z"
3774 "&[last primary ignorable]<x"
3775 "&[last secondary ignorable]<<y"
3776 "&[last tertiary ignorable]<<<w"
3777 "&[top]<u",
3778 {"\\ufffb", "w", "y", "\\u20e3", "x", "\\u137c", "z", "u"}, 7 }
3779
3780 };
3781 uint32_t i;
3782
3783
3784 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3785 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3786 }
3787}
3788
3789
3790static void TestOptimize(void) {
3791 /* this is not really a test - just trying out
3792 * whether copying of UCA contents will fail
3793 * Cannot really test, since the functionality
3794 * remains the same.
3795 */
3796 static struct {
3797 const char *rules;
3798 const char *data[50];
3799 const uint32_t len;
3800 } tests[] = {
3801 /* - all befores here amount to zero */
3802 { "[optimize [\\uAC00-\\uD7FF]]",
3803 { "a", "b"}, 2}
3804 };
3805 uint32_t i;
3806
3807 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3808 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3809 }
3810}
3811
3812/*
3813cycheng@ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator.
3814weiv ucol_strcollIter?
3815cycheng@ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021
3816weiv these are the input strings?
3817cycheng@ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on, we have s1 > s2
3818weiv will check - could be a problem with utf-8 iterator
3819cycheng@ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2
3820weiv hmmm
3821cycheng@ca.ibm.c... note that we have a standalone high surrogate
3822weiv that doesn't sound right
3823cycheng@ca.ibm.c... we got the same inconsistent results on AIX and Win2000
3824weiv so you have two strings, you convert them to utf-8 and to utf-16BE
3825cycheng@ca.ibm.c... yes
3826weiv and then do the comparison
3827cycheng@ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be
3828weiv utf-16 strings look like a little endian ones in the example you sent me
3829weiv It could be a bug - let me try to test it out
3830cycheng@ca.ibm.c... ok
3831cycheng@ca.ibm.c... we can wait till the conf. call
3832cycheng@ca.ibm.c... next weke
3833weiv that would be great
3834weiv hmmm
3835weiv I might be wrong
3836weiv let me play with it some more
3837cycheng@ca.ibm.c... ok
3838cycheng@ca.ibm.c... also please check s3 = 0x0e3a0062 and s4 = 0x0e400021. both are in utf-16be
3839cycheng@ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that's built for db2
3840cycheng@ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be
3841weiv ok
3842cycheng@ca.ibm.c... i ask sherman to send you more inconsistent data
3843weiv thanks
3844cycheng@ca.ibm.c... the 4 strings we sent are just samples
3845*/
3846#if 0
3847static void Alexis(void) {
3848 UErrorCode status = U_ZERO_ERROR;
3849 UCollator *coll = ucol_open("", &status);
3850
3851
3852 const char utf16be[2][4] = {
3853 { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 },
3854 { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 }
3855 };
3856
3857 const char utf8[2][4] = {
3858 { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 },
3859 { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 },
3860 };
3861
3862 UCharIterator iterU161, iterU162;
3863 UCharIterator iterU81, iterU82;
3864
3865 UCollationResult resU16, resU8;
3866
3867 uiter_setUTF16BE(&iterU161, utf16be[0], 4);
3868 uiter_setUTF16BE(&iterU162, utf16be[1], 4);
3869
3870 uiter_setUTF8(&iterU81, utf8[0], 4);
3871 uiter_setUTF8(&iterU82, utf8[1], 4);
3872
3873 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3874
3875 resU16 = ucol_strcollIter(coll, &iterU161, &iterU162, &status);
3876 resU8 = ucol_strcollIter(coll, &iterU81, &iterU82, &status);
3877
3878
3879 if(resU16 != resU8) {
3880 log_err("different results\n");
3881 }
3882
3883 ucol_close(coll);
3884}
3885#endif
3886
3887#define CMSCOLL_ALEXIS2_BUFFER_SIZE 256
3888static void Alexis2(void) {
3889 UErrorCode status = U_ZERO_ERROR;
3890 UChar U16Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
3891 char U16BESource[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16BETarget[CMSCOLL_ALEXIS2_BUFFER_SIZE];
3892 char U8Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U8Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
3893 int32_t U16LenS = 0, U16LenT = 0, U16BELenS = 0, U16BELenT = 0, U8LenS = 0, U8LenT = 0;
3894
3895 UConverter *conv = NULL;
3896
3897 UCharIterator U16BEItS, U16BEItT;
3898 UCharIterator U8ItS, U8ItT;
3899
3900 UCollationResult resU16, resU16BE, resU8;
3901
3902 const char* pairs[][2] = {
3903 { "\\ud800\\u0021", "\\uFFFC\\u0062"},
3904 { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" },
3905 { "\\u0E40\\u0021", "\\u00A1\\u0021"},
3906 { "\\u0E40\\u0021", "\\uFE57\\u0062"},
3907 { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"},
3908 { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"},
3909 { "\\u0020", "\\u0020\\u0000"}
3910/*
39115F20 (my result here)
39125F204E008E3F
39135F20 (your result here)
3914*/
3915 };
3916
3917 int32_t i = 0;
3918
3919 UCollator *coll = ucol_open("", &status);
3920 if(status == U_FILE_ACCESS_ERROR) {
3921 log_data_err("Is your data around?\n");
3922 return;
3923 } else if(U_FAILURE(status)) {
3924 log_err("Error opening collator\n");
3925 return;
3926 }
3927 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3928 conv = ucnv_open("UTF16BE", &status);
3929 for(i = 0; i < sizeof(pairs)/sizeof(pairs[0]); i++) {
3930 U16LenS = u_unescape(pairs[i][0], U16Source, CMSCOLL_ALEXIS2_BUFFER_SIZE);
3931 U16LenT = u_unescape(pairs[i][1], U16Target, CMSCOLL_ALEXIS2_BUFFER_SIZE);
3932
3933 resU16 = ucol_strcoll(coll, U16Source, U16LenS, U16Target, U16LenT);
3934
3935 log_verbose("Result of strcoll is %i\n", resU16);
3936
3937 U16BELenS = ucnv_fromUChars(conv, U16BESource, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Source, U16LenS, &status);
3938 U16BELenT = ucnv_fromUChars(conv, U16BETarget, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Target, U16LenT, &status);
3939
3940 /* use the original sizes, as the result from converter is in bytes */
3941 uiter_setUTF16BE(&U16BEItS, U16BESource, U16LenS);
3942 uiter_setUTF16BE(&U16BEItT, U16BETarget, U16LenT);
3943
3944 resU16BE = ucol_strcollIter(coll, &U16BEItS, &U16BEItT, &status);
3945
3946 log_verbose("Result of U16BE is %i\n", resU16BE);
3947
3948 if(resU16 != resU16BE) {
3949 log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", pairs[i][0], pairs[i][1]);
3950 }
3951
3952 u_strToUTF8(U8Source, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenS, U16Source, U16LenS, &status);
3953 u_strToUTF8(U8Target, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenT, U16Target, U16LenT, &status);
3954
3955 uiter_setUTF8(&U8ItS, U8Source, U8LenS);
3956 uiter_setUTF8(&U8ItT, U8Target, U8LenT);
3957
3958 resU8 = ucol_strcollIter(coll, &U8ItS, &U8ItT, &status);
3959
3960 if(resU16 != resU8) {
3961 log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pairs[i][0], pairs[i][1]);
3962 }
3963
3964 }
3965
3966 ucol_close(coll);
3967 ucnv_close(conv);
3968}
3969
3970static void TestHebrewUCA(void) {
3971 UErrorCode status = U_ZERO_ERROR;
3972 const char *first[] = {
3973 "d790d6b8d79cd795d6bcd7a9",
3974 "d790d79cd79ed7a7d799d799d7a1",
3975 "d790d6b4d79ed795d6bcd7a9",
3976 };
3977
3978 char utf8String[3][256];
3979 UChar utf16String[3][256];
3980
3981 int32_t i = 0, j = 0;
3982 int32_t sizeUTF8[3];
3983 int32_t sizeUTF16[3];
3984
3985 UCollator *coll = ucol_open("", &status);
3986 /*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/
3987
3988 for(i = 0; i < sizeof(first)/sizeof(first[0]); i++) {
3989 sizeUTF8[i] = u_parseUTF8(first[i], -1, utf8String[i], 256, &status);
3990 u_strFromUTF8(utf16String[i], 256, &sizeUTF16[i], utf8String[i], sizeUTF8[i], &status);
3991 log_verbose("%i: ");
3992 for(j = 0; j < sizeUTF16[i]; j++) {
3993 /*log_verbose("\\u%04X", utf16String[i][j]);*/
3994 log_verbose("%04X", utf16String[i][j]);
3995 }
3996 log_verbose("\n");
3997 }
3998 for(i = 0; i < sizeof(first)/sizeof(first[0])-1; i++) {
3999 for(j = i + 1; j < sizeof(first)/sizeof(first[0]); j++) {
4000 doTest(coll, utf16String[i], utf16String[j], UCOL_LESS);
4001 }
4002 }
4003
4004 ucol_close(coll);
4005
4006}
4007
4008static void TestPartialSortKeyTermination(void) {
4009 const char* cases[] = {
4010 "\\u1234\\u1234\\udc00",
4011 "\\udc00\\ud800\\ud800"
4012 };
4013
4014 int32_t i = sizeof(UCollator);
4015
4016 UErrorCode status = U_ZERO_ERROR;
4017
4018 UCollator *coll = ucol_open("", &status);
4019
4020 UCharIterator iter;
4021
4022 UChar currCase[256];
4023 int32_t length = 0;
4024 int32_t pKeyLen = 0;
4025
4026 uint8_t key[256];
4027
4028 for(i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) {
4029 uint32_t state[2] = {0, 0};
4030 length = u_unescape(cases[i], currCase, 256);
4031 uiter_setString(&iter, currCase, length);
4032 pKeyLen = ucol_nextSortKeyPart(coll, &iter, state, key, 256, &status);
4033
4034 log_verbose("Done\n");
4035
4036 }
4037 ucol_close(coll);
4038}
4039
4040static void TestSettings(void) {
4041 const char* cases[] = {
4042 "apple",
4043 "Apple"
4044 };
4045
4046 const char* locales[] = {
4047 "",
4048 "en"
4049 };
4050
4051 UErrorCode status = U_ZERO_ERROR;
4052
4053 int32_t i = 0, j = 0;
4054
4055 UChar source[256], target[256];
4056 int32_t sLen = 0, tLen = 0;
4057
4058 UCollator *collateObject = NULL;
4059 for(i = 0; i < sizeof(locales)/sizeof(locales[0]); i++) {
4060 collateObject = ucol_open(locales[i], &status);
4061 ucol_setStrength(collateObject, UCOL_PRIMARY);
4062 ucol_setAttribute(collateObject, UCOL_CASE_LEVEL , UCOL_OFF, &status);
4063 for(j = 1; j < sizeof(cases)/sizeof(cases[0]); j++) {
4064 sLen = u_unescape(cases[j-1], source, 256);
4065 source[sLen] = 0;
4066 tLen = u_unescape(cases[j], target, 256);
4067 source[tLen] = 0;
4068 doTest(collateObject, source, target, UCOL_EQUAL);
4069 }
4070 ucol_close(collateObject);
4071 }
4072}
4073
4074static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCollator *target) {
4075 UErrorCode status = U_ZERO_ERROR;
4076 int32_t errorNo = 0;
4077 /*const UChar *sourceRules = NULL;*/
4078 /*int32_t sourceRulesLen = 0;*/
4079 UColAttributeValue french = UCOL_OFF;
4080 int32_t cloneSize = 0;
4081
4082 if(!ucol_equals(source, target)) {
4083 log_err("Same collators, different address not equal\n");
4084 errorNo++;
4085 }
4086 ucol_close(target);
4087 if(uprv_strcmp(ucol_getLocale(source, ULOC_REQUESTED_LOCALE, &status), ucol_getLocale(source, ULOC_ACTUAL_LOCALE, &status)) == 0) {
4088 /* currently, safeClone is implemented through getRules/openRules
4089 * so it is the same as the test below - I will comment that test out.
4090 */
4091 /* real thing */
4092 target = ucol_safeClone(source, NULL, &cloneSize, &status);
4093 if(U_FAILURE(status)) {
4094 log_err("Error creating clone\n");
4095 errorNo++;
4096 return errorNo;
4097 }
4098 if(!ucol_equals(source, target)) {
4099 log_err("Collator different from it's clone\n");
4100 errorNo++;
4101 }
4102 french = ucol_getAttribute(source, UCOL_FRENCH_COLLATION, &status);
4103 if(french == UCOL_ON) {
4104 ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);
4105 } else {
4106 ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
4107 }
4108 if(U_FAILURE(status)) {
4109 log_err("Error setting attributes\n");
4110 errorNo++;
4111 return errorNo;
4112 }
4113 if(ucol_equals(source, target)) {
4114 log_err("Collators same even when options changed\n");
4115 errorNo++;
4116 }
4117 ucol_close(target);
4118 /* commented out since safeClone uses exactly the same technique */
4119 /*
4120 sourceRules = ucol_getRules(source, &sourceRulesLen);
4121 target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
4122 if(U_FAILURE(status)) {
4123 log_err("Error instantiating target from rules\n");
4124 errorNo++;
4125 return errorNo;
4126 }
4127 if(!ucol_equals(source, target)) {
4128 log_err("Collator different from collator that was created from the same rules\n");
4129 errorNo++;
4130 }
4131 ucol_close(target);
4132 */
4133 }
4134 return errorNo;
4135}
4136
4137
4138static void TestEquals(void) {
4139 /* ucol_equals is not currently a public API. There is a chance that it will become
4140 * something like this, but currently it is only used by RuleBasedCollator::operator==
4141 */
4142 /* test whether the two collators instantiated from the same locale are equal */
4143 UErrorCode status = U_ZERO_ERROR;
4144 UParseError parseError;
4145 int32_t noOfLoc = uloc_countAvailable();
4146 const char *locName = NULL;
4147 UCollator *source = NULL, *target = NULL;
4148 int32_t i = 0;
4149
4150 const char* rules[] = {
4151 "&l < lj <<< Lj <<< LJ",
4152 "&n < nj <<< Nj <<< NJ",
4153 "&ae <<< \\u00e4",
4154 "&AE <<< \\u00c4"
4155 };
4156 /*
4157 const char* badRules[] = {
4158 "&l <<< Lj",
4159 "&n < nj <<< nJ <<< NJ",
4160 "&a <<< \\u00e4",
4161 "&AE <<< \\u00c4 <<< x"
4162 };
4163 */
4164
4165 UChar sourceRules[1024], targetRules[1024];
4166 int32_t sourceRulesSize = 0, targetRulesSize = 0;
4167 int32_t rulesSize = sizeof(rules)/sizeof(rules[0]);
4168
4169 for(i = 0; i < rulesSize; i++) {
4170 sourceRulesSize += u_unescape(rules[i], sourceRules+sourceRulesSize, 1024 - sourceRulesSize);
4171 targetRulesSize += u_unescape(rules[rulesSize-i-1], targetRules+targetRulesSize, 1024 - targetRulesSize);
4172 }
4173
4174 source = ucol_openRules(sourceRules, sourceRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
4175 if(status == U_FILE_ACCESS_ERROR) {
4176 log_data_err("Is your data around?\n");
4177 return;
4178 } else if(U_FAILURE(status)) {
4179 log_err("Error opening collator\n");
4180 return;
4181 }
4182 target = ucol_openRules(targetRules, targetRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
4183 if(!ucol_equals(source, target)) {
4184 log_err("Equivalent collators not equal!\n");
4185 }
4186 ucol_close(source);
4187 ucol_close(target);
4188
4189 source = ucol_open("root", &status);
4190 target = ucol_open("root", &status);
4191 log_verbose("Testing root\n");
4192 if(!ucol_equals(source, source)) {
4193 log_err("Same collator not equal\n");
4194 }
4195 if(TestEqualsForCollator(locName, source, target)) {
4196 log_err("Errors for root\n", locName);
4197 }
4198 ucol_close(source);
4199
4200 for(i = 0; i<noOfLoc; i++) {
4201 status = U_ZERO_ERROR;
4202 locName = uloc_getAvailable(i);
4203 /*if(hasCollationElements(locName)) {*/
4204 log_verbose("Testing equality for locale %s\n", locName);
4205 source = ucol_open(locName, &status);
4206 target = ucol_open(locName, &status);
4207 if(TestEqualsForCollator(locName, source, target)) {
4208 log_err("Errors for locale %s\n", locName);
4209 }
4210 ucol_close(source);
4211 /*}*/
4212 }
4213}
4214
4215static void TestJ2726(void) {
4216 UChar a[2] = { 0x61, 0x00 }; /*"a"*/
4217 UChar aSpace[3] = { 0x61, 0x20, 0x00 }; /*"a "*/
4218 UChar spaceA[3] = { 0x20, 0x61, 0x00 }; /*" a"*/
4219 UErrorCode status = U_ZERO_ERROR;
4220 UCollator *coll = ucol_open("en", &status);
4221 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
4222 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
4223 doTest(coll, a, aSpace, UCOL_EQUAL);
4224 doTest(coll, aSpace, a, UCOL_EQUAL);
4225 doTest(coll, a, spaceA, UCOL_EQUAL);
4226 doTest(coll, spaceA, a, UCOL_EQUAL);
4227 doTest(coll, spaceA, aSpace, UCOL_EQUAL);
4228 doTest(coll, aSpace, spaceA, UCOL_EQUAL);
4229 ucol_close(coll);
4230}
4231
4232
4233#define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x)
4234
4235void addMiscCollTest(TestNode** root)
4236{
4237 addTest(root, &TestNumericCollation, "tscoll/cmsccoll/TestNumericCollation");
4238 addTest(root, &TestRuleOptions, "tscoll/cmsccoll/TestRuleOptions");
4239 addTest(root, &TestBeforePrefixFailure, "tscoll/cmsccoll/TestBeforePrefixFailure");
4240 addTest(root, &TestContractionClosure, "tscoll/cmsccoll/TestContractionClosure");
4241 addTest(root, &TestPrefixCompose, "tscoll/cmsccoll/TestPrefixCompose");
4242 addTest(root, &TestStrCollIdenticalPrefix, "tscoll/cmsccoll/TestStrCollIdenticalPrefix");
4243 addTest(root, &TestPrefix, "tscoll/cmsccoll/TestPrefix");
4244 addTest(root, &TestNewJapanese, "tscoll/cmsccoll/TestNewJapanese");
4245 /*addTest(root, &TestLimitations, "tscoll/cmsccoll/TestLimitations");*/
4246 addTest(root, &TestNonChars, "tscoll/cmsccoll/TestNonChars");
4247 addTest(root, &TestExtremeCompression, "tscoll/cmsccoll/TestExtremeCompression");
4248 addTest(root, &TestSurrogates, "tscoll/cmsccoll/TestSurrogates");
4249 addTest(root, &TestVariableTopSetting, "tscoll/cmsccoll/TestVariableTopSetting");
4250 addTest(root, &TestBocsuCoverage, "tscoll/cmsccoll/TestBocsuCoverage");
4251 addTest(root, &TestCyrillicTailoring, "tscoll/cmsccoll/TestCyrillicTailoring");
4252 addTest(root, &TestCase, "tscoll/cmsccoll/TestCase");
4253 addTest(root, &IncompleteCntTest, "tscoll/cmsccoll/IncompleteCntTest");
4254 addTest(root, &BlackBirdTest, "tscoll/cmsccoll/BlackBirdTest");
4255 addTest(root, &FunkyATest, "tscoll/cmsccoll/FunkyATest");
4256 addTest(root, &BillFairmanTest, "tscoll/cmsccoll/BillFairmanTest");
4257 addTest(root, &RamsRulesTest, "tscoll/cmsccoll/RamsRulesTest");
4258 addTest(root, &IsTailoredTest, "tscoll/cmsccoll/IsTailoredTest");
4259 addTest(root, &TestCollations, "tscoll/cmsccoll/TestCollations");
4260 addTest(root, &TestChMove, "tscoll/cmsccoll/TestChMove");
4261 addTest(root, &TestImplicitTailoring, "tscoll/cmsccoll/TestImplicitTailoring");
4262 addTest(root, &TestFCDProblem, "tscoll/cmsccoll/TestFCDProblem");
4263 addTest(root, &TestEmptyRule, "tscoll/cmsccoll/TestEmptyRule");
4264 addTest(root, &TestJ784, "tscoll/cmsccoll/TestJ784");
4265 addTest(root, &TestJ815, "tscoll/cmsccoll/TestJ815");
4266 addTest(root, &TestJ831, "tscoll/cmsccoll/TestJ831");
4267 addTest(root, &TestBefore, "tscoll/cmsccoll/TestBefore");
4268 addTest(root, &TestRedundantRules, "tscoll/cmsccoll/TestRedundantRules");
4269 addTest(root, &TestExpansionSyntax, "tscoll/cmsccoll/TestExpansionSyntax");
4270 addTest(root, &TestHangulTailoring, "tscoll/cmsccoll/TestHangulTailoring");
4271 addTest(root, &TestUCARules, "tscoll/cmsccoll/TestUCARules");
4272 addTest(root, &TestIncrementalNormalize, "tscoll/cmsccoll/TestIncrementalNormalize");
4273 addTest(root, &TestComposeDecompose, "tscoll/cmsccoll/TestComposeDecompose");
4274 addTest(root, &TestCompressOverlap, "tscoll/cmsccoll/TestCompressOverlap");
4275 addTest(root, &TestContraction, "tscoll/cmsccoll/TestContraction");
4276 addTest(root, &TestExpansion, "tscoll/cmsccoll/TestExpansion");
4277 /*addTest(root, &PrintMarkDavis, "tscoll/cmsccoll/PrintMarkDavis");*/ /* this test doesn't test - just prints sortkeys */
4278 /*addTest(root, &TestGetCaseBit, "tscoll/cmsccoll/TestGetCaseBit");*/ /*this one requires internal things to be exported */
4279 TEST(TestOptimize);
4280 TEST(TestSuppressContractions);
4281 TEST(Alexis2);
4282 TEST(TestHebrewUCA);
4283 TEST(TestPartialSortKeyTermination);
4284 TEST(TestSettings);
4285 TEST(TestEquals);
4286 TEST(TestJ2726);
4287}
4288
4289#endif /* #if !UCONFIG_NO_COLLATION */