icuSources/test/cintltst/cmsccoll.c

   1
   2 /********************************************************************
   3  * COPYRIGHT:
   4  * Copyright (c) 2001-2011, International Business Machines Corporation and
   5  * others. All Rights Reserved.
   6  ********************************************************************/
   7 /*******************************************************************************
   8 *
   9 * File cmsccoll.C
  10 *
  11 *******************************************************************************/
  12 /**
  13  * These are the tests specific to ICU 1.8 and above, that I didn't know where
  14  * to fit.
  15  */
  16
  17 #include <stdio.h>
  18
  19 #include "unicode/utypes.h"
  20
  21 #if !UCONFIG_NO_COLLATION
  22
  23 #include "unicode/ucol.h"
  24 #include "unicode/ucoleitr.h"
  25 #include "unicode/uloc.h"
  26 #include "cintltst.h"
  27 #include "ccolltst.h"
  28 #include "callcoll.h"
  29 #include "unicode/ustring.h"
  30 #include "string.h"
  31 #include "ucol_imp.h"
  32 #include "ucol_tok.h"
  33 #include "cmemory.h"
  34 #include "cstring.h"
  35 #include "uassert.h"
  36 #include "unicode/parseerr.h"
  37 #include "unicode/ucnv.h"
  38 #include "unicode/ures.h"
  39 #include "unicode/uscript.h"
  40 #include "uparse.h"
  41 #include "putilimp.h"
  42
  43
  44 #define LEN(a) (sizeof(a)/sizeof(a[0]))
  45
  46 #define MAX_TOKEN_LEN 16
  47
  48 typedef UCollationResult tst_strcoll(void *collator, const int object,
  49                         const UChar *source, const int sLen,
  50                         const UChar *target, const int tLen);
  51
  52
  53
  54 const static char cnt1[][10] = {
  55
  56   "AA",
  57   "AC",
  58   "AZ",
  59   "AQ",
  60   "AB",
  61   "ABZ",
  62   "ABQ",
  63   "Z",
  64   "ABC",
  65   "Q",
  66   "B"
  67 };
  68
  69 const static char cnt2[][10] = {
  70   "DA",
  71   "DAD",
  72   "DAZ",
  73   "MAR",
  74   "Z",
  75   "DAVIS",
  76   "MARK",
  77   "DAV",
  78   "DAVI"
  79 };
  80
  81 static void IncompleteCntTest(void)
  82 {
  83   UErrorCode status = U_ZERO_ERROR;
  84   UChar temp[90];
  85   UChar t1[90];
  86   UChar t2[90];
  87
  88   UCollator *coll =  NULL;
  89   uint32_t i = 0, j = 0;
  90   uint32_t size = 0;
  91
  92   u_uastrcpy(temp, " & Z < ABC < Q < B");
  93
  94   coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
  95
  96   if(U_SUCCESS(status)) {
  97     size = sizeof(cnt1)/sizeof(cnt1[0]);
  98     for(i = 0; i < size-1; i++) {
  99       for(j = i+1; j < size; j++) {
 100         UCollationElements *iter;
 101         u_uastrcpy(t1, cnt1[i]);
 102         u_uastrcpy(t2, cnt1[j]);
 103         doTest(coll, t1, t2, UCOL_LESS);
 104         /* synwee : added collation element iterator test */
 105         iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
 106         if (U_FAILURE(status)) {
 107           log_err("Creation of iterator failed\n");
 108           break;
 109         }
 110         backAndForth(iter);
 111         ucol_closeElements(iter);
 112       }
 113     }
 114   }
 115
 116   ucol_close(coll);
 117
 118
 119   u_uastrcpy(temp, " & Z < DAVIS < MARK <DAV");
 120   coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
 121
 122   if(U_SUCCESS(status)) {
 123     size = sizeof(cnt2)/sizeof(cnt2[0]);
 124     for(i = 0; i < size-1; i++) {
 125       for(j = i+1; j < size; j++) {
 126         UCollationElements *iter;
 127         u_uastrcpy(t1, cnt2[i]);
 128         u_uastrcpy(t2, cnt2[j]);
 129         doTest(coll, t1, t2, UCOL_LESS);
 130
 131         /* synwee : added collation element iterator test */
 132         iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
 133         if (U_FAILURE(status)) {
 134           log_err("Creation of iterator failed\n");
 135           break;
 136         }
 137         backAndForth(iter);
 138         ucol_closeElements(iter);
 139       }
 140     }
 141   }
 142
 143   ucol_close(coll);
 144
 145
 146 }
 147
 148 const static char shifted[][20] = {
 149   "black bird",
 150   "black-bird",
 151   "blackbird",
 152   "black Bird",
 153   "black-Bird",
 154   "blackBird",
 155   "black birds",
 156   "black-birds",
 157   "blackbirds"
 158 };
 159
 160 const static UCollationResult shiftedTert[] = {
 161   UCOL_EQUAL,
 162   UCOL_EQUAL,
 163   UCOL_EQUAL,
 164   UCOL_LESS,
 165   UCOL_EQUAL,
 166   UCOL_EQUAL,
 167   UCOL_LESS,
 168   UCOL_EQUAL,
 169   UCOL_EQUAL
 170 };
 171
 172 const static char nonignorable[][20] = {
 173   "black bird",
 174   "black Bird",
 175   "black birds",
 176   "black-bird",
 177   "black-Bird",
 178   "black-birds",
 179   "blackbird",
 180   "blackBird",
 181   "blackbirds"
 182 };
 183
 184 static void BlackBirdTest(void) {
 185   UErrorCode status = U_ZERO_ERROR;
 186   UChar t1[90];
 187   UChar t2[90];
 188
 189   uint32_t i = 0, j = 0;
 190   uint32_t size = 0;
 191   UCollator *coll = ucol_open("en_US", &status);
 192
 193   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
 194   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status);
 195
 196   if(U_SUCCESS(status)) {
 197     size = sizeof(nonignorable)/sizeof(nonignorable[0]);
 198     for(i = 0; i < size-1; i++) {
 199       for(j = i+1; j < size; j++) {
 200         u_uastrcpy(t1, nonignorable[i]);
 201         u_uastrcpy(t2, nonignorable[j]);
 202         doTest(coll, t1, t2, UCOL_LESS);
 203       }
 204     }
 205   }
 206
 207   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
 208   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
 209
 210   if(U_SUCCESS(status)) {
 211     size = sizeof(shifted)/sizeof(shifted[0]);
 212     for(i = 0; i < size-1; i++) {
 213       for(j = i+1; j < size; j++) {
 214         u_uastrcpy(t1, shifted[i]);
 215         u_uastrcpy(t2, shifted[j]);
 216         doTest(coll, t1, t2, UCOL_LESS);
 217       }
 218     }
 219   }
 220
 221   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status);
 222   if(U_SUCCESS(status)) {
 223     size = sizeof(shifted)/sizeof(shifted[0]);
 224     for(i = 1; i < size; i++) {
 225       u_uastrcpy(t1, shifted[i-1]);
 226       u_uastrcpy(t2, shifted[i]);
 227       doTest(coll, t1, t2, shiftedTert[i]);
 228     }
 229   }
 230
 231   ucol_close(coll);
 232 }
 233
 234 const static UChar testSourceCases[][MAX_TOKEN_LEN] = {
 235     {0x0041/*'A'*/, 0x0300, 0x0301, 0x0000},
 236     {0x0041/*'A'*/, 0x0300, 0x0316, 0x0000},
 237     {0x0041/*'A'*/, 0x0300, 0x0000},
 238     {0x00C0, 0x0301, 0x0000},
 239     /* this would work with forced normalization */
 240     {0x00C0, 0x0316, 0x0000}
 241 };
 242
 243 const static UChar testTargetCases[][MAX_TOKEN_LEN] = {
 244     {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
 245     {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000},
 246     {0x00C0, 0},
 247     {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
 248     /* this would work with forced normalization */
 249     {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000}
 250 };
 251
 252 const static UCollationResult results[] = {
 253     UCOL_GREATER,
 254     UCOL_EQUAL,
 255     UCOL_EQUAL,
 256     UCOL_GREATER,
 257     UCOL_EQUAL
 258 };
 259
 260 static void FunkyATest(void)
 261 {
 262
 263     int32_t i;
 264     UErrorCode status = U_ZERO_ERROR;
 265     UCollator  *myCollation;
 266     myCollation = ucol_open("en_US", &status);
 267     if(U_FAILURE(status)){
 268         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
 269         return;
 270     }
 271     log_verbose("Testing some A letters, for some reason\n");
 272     ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
 273     ucol_setStrength(myCollation, UCOL_TERTIARY);
 274     for (i = 0; i < 4 ; i++)
 275     {
 276         doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
 277     }
 278     ucol_close(myCollation);
 279 }
 280
 281 UColAttributeValue caseFirst[] = {
 282     UCOL_OFF,
 283     UCOL_LOWER_FIRST,
 284     UCOL_UPPER_FIRST
 285 };
 286
 287
 288 UColAttributeValue alternateHandling[] = {
 289     UCOL_NON_IGNORABLE,
 290     UCOL_SHIFTED
 291 };
 292
 293 UColAttributeValue caseLevel[] = {
 294     UCOL_OFF,
 295     UCOL_ON
 296 };
 297
 298 UColAttributeValue strengths[] = {
 299     UCOL_PRIMARY,
 300     UCOL_SECONDARY,
 301     UCOL_TERTIARY,
 302     UCOL_QUATERNARY,
 303     UCOL_IDENTICAL
 304 };
 305
 306 #if 0
 307 static const char * strengthsC[] = {
 308     "UCOL_PRIMARY",
 309     "UCOL_SECONDARY",
 310     "UCOL_TERTIARY",
 311     "UCOL_QUATERNARY",
 312     "UCOL_IDENTICAL"
 313 };
 314
 315 static const char * caseFirstC[] = {
 316     "UCOL_OFF",
 317     "UCOL_LOWER_FIRST",
 318     "UCOL_UPPER_FIRST"
 319 };
 320
 321
 322 static const char * alternateHandlingC[] = {
 323     "UCOL_NON_IGNORABLE",
 324     "UCOL_SHIFTED"
 325 };
 326
 327 static const char * caseLevelC[] = {
 328     "UCOL_OFF",
 329     "UCOL_ON"
 330 };
 331
 332 /* not used currently - does not test only prints */
 333 static void PrintMarkDavis(void)
 334 {
 335   UErrorCode status = U_ZERO_ERROR;
 336   UChar m[256];
 337   uint8_t sortkey[256];
 338   UCollator *coll = ucol_open("en_US", &status);
 339   uint32_t h,i,j,k, sortkeysize;
 340   uint32_t sizem = 0;
 341   char buffer[512];
 342   uint32_t len = 512;
 343
 344   log_verbose("PrintMarkDavis");
 345
 346   u_uastrcpy(m, "Mark Davis");
 347   sizem = u_strlen(m);
 348
 349
 350   m[1] = 0xe4;
 351
 352   for(i = 0; i<sizem; i++) {
 353     fprintf(stderr, "\\u%04X ", m[i]);
 354   }
 355   fprintf(stderr, "\n");
 356
 357   for(h = 0; h<sizeof(caseFirst)/sizeof(caseFirst[0]); h++) {
 358     ucol_setAttribute(coll, UCOL_CASE_FIRST, caseFirst[i], &status);
 359     fprintf(stderr, "caseFirst: %s\n", caseFirstC[h]);
 360
 361     for(i = 0; i<sizeof(alternateHandling)/sizeof(alternateHandling[0]); i++) {
 362       ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, alternateHandling[i], &status);
 363       fprintf(stderr, "  AltHandling: %s\n", alternateHandlingC[i]);
 364
 365       for(j = 0; j<sizeof(caseLevel)/sizeof(caseLevel[0]); j++) {
 366         ucol_setAttribute(coll, UCOL_CASE_LEVEL, caseLevel[j], &status);
 367         fprintf(stderr, "    caseLevel: %s\n", caseLevelC[j]);
 368
 369         for(k = 0; k<sizeof(strengths)/sizeof(strengths[0]); k++) {
 370           ucol_setAttribute(coll, UCOL_STRENGTH, strengths[k], &status);
 371           sortkeysize = ucol_getSortKey(coll, m, sizem, sortkey, 256);
 372           fprintf(stderr, "      strength: %s\n      Sortkey: ", strengthsC[k]);
 373           fprintf(stderr, "%s\n", ucol_sortKeyToString(coll, sortkey, buffer, &len));
 374         }
 375
 376       }
 377
 378     }
 379
 380   }
 381 }
 382 #endif
 383
 384 static void BillFairmanTest(void) {
 385 /*
 386 ** check for actual locale via ICU resource bundles
 387 **
 388 ** lp points to the original locale ("fr_FR_....")
 389 */
 390
 391     UResourceBundle *lr,*cr;
 392     UErrorCode              lec = U_ZERO_ERROR;
 393     const char *lp = "fr_FR_you_ll_never_find_this_locale";
 394
 395     log_verbose("BillFairmanTest\n");
 396
 397     lr = ures_open(NULL,lp,&lec);
 398     if (lr) {
 399         cr = ures_getByKey(lr,"collations",0,&lec);
 400         if (cr) {
 401             lp = ures_getLocaleByType(cr, ULOC_ACTUAL_LOCALE, &lec);
 402             if (lp) {
 403                 if (U_SUCCESS(lec)) {
 404                     if(strcmp(lp, "fr") != 0) {
 405                         log_err("Wrong locale for French Collation Data, expected \"fr\" got %s", lp);
 406                     }
 407                 }
 408             }
 409             ures_close(cr);
 410         }
 411         ures_close(lr);
 412     }
 413 }
 414
 415 static void testPrimary(UCollator* col, const UChar* p,const UChar* q){
 416     UChar source[256] = { '\0'};
 417     UChar target[256] = { '\0'};
 418     UChar preP = 0x31a3;
 419     UChar preQ = 0x310d;
 420 /*
 421     UChar preP = (*p>0x0400 && *p<0x0500)?0x00e1:0x491;
 422     UChar preQ = (*p>0x0400 && *p<0x0500)?0x0041:0x413;
 423 */
 424     /*log_verbose("Testing primary\n");*/
 425
 426     doTest(col, p, q, UCOL_LESS);
 427 /*
 428     UCollationResult result = ucol_strcoll(col,p,u_strlen(p),q,u_strlen(q));
 429
 430     if(result!=UCOL_LESS){
 431        aescstrdup(p,utfSource,256);
 432        aescstrdup(q,utfTarget,256);
 433        fprintf(file,"Primary failed  source: %s target: %s \n", utfSource,utfTarget);
 434     }
 435 */
 436     source[0] = preP;
 437     u_strcpy(source+1,p);
 438     target[0] = preQ;
 439     u_strcpy(target+1,q);
 440     doTest(col, source, target, UCOL_LESS);
 441 /*
 442     fprintf(file,"Primary swamps 2nd failed  source: %s target: %s \n", utfSource,utfTarget);
 443 */
 444 }
 445
 446 static void testSecondary(UCollator* col, const UChar* p,const UChar* q){
 447     UChar source[256] = { '\0'};
 448     UChar target[256] = { '\0'};
 449
 450     /*log_verbose("Testing secondary\n");*/
 451
 452     doTest(col, p, q, UCOL_LESS);
 453 /*
 454     fprintf(file,"secondary failed  source: %s target: %s \n", utfSource,utfTarget);
 455 */
 456     source[0] = 0x0053;
 457     u_strcpy(source+1,p);
 458     target[0]= 0x0073;
 459     u_strcpy(target+1,q);
 460
 461     doTest(col, source, target, UCOL_LESS);
 462 /*
 463     fprintf(file,"secondary swamps 3rd failed  source: %s target: %s \n",utfSource,utfTarget);
 464 */
 465
 466
 467     u_strcpy(source,p);
 468     source[u_strlen(p)] = 0x62;
 469     source[u_strlen(p)+1] = 0;
 470
 471
 472     u_strcpy(target,q);
 473     target[u_strlen(q)] = 0x61;
 474     target[u_strlen(q)+1] = 0;
 475
 476     doTest(col, source, target, UCOL_GREATER);
 477
 478 /*
 479     fprintf(file,"secondary is swamped by 1  failed  source: %s target: %s \n",utfSource,utfTarget);
 480 */
 481 }
 482
 483 static void testTertiary(UCollator* col, const UChar* p,const UChar* q){
 484     UChar source[256] = { '\0'};
 485     UChar target[256] = { '\0'};
 486
 487     /*log_verbose("Testing tertiary\n");*/
 488
 489     doTest(col, p, q, UCOL_LESS);
 490 /*
 491     fprintf(file,"Tertiary failed  source: %s target: %s \n",utfSource,utfTarget);
 492 */
 493     source[0] = 0x0020;
 494     u_strcpy(source+1,p);
 495     target[0]= 0x002D;
 496     u_strcpy(target+1,q);
 497
 498     doTest(col, source, target, UCOL_LESS);
 499 /*
 500     fprintf(file,"Tertiary swamps 4th failed  source: %s target: %s \n", utfSource,utfTarget);
 501 */
 502
 503     u_strcpy(source,p);
 504     source[u_strlen(p)] = 0xE0;
 505     source[u_strlen(p)+1] = 0;
 506
 507     u_strcpy(target,q);
 508     target[u_strlen(q)] = 0x61;
 509     target[u_strlen(q)+1] = 0;
 510
 511     doTest(col, source, target, UCOL_GREATER);
 512
 513 /*
 514     fprintf(file,"Tertiary is swamped by 3rd failed  source: %s target: %s \n",utfSource,utfTarget);
 515 */
 516 }
 517
 518 static void testEquality(UCollator* col, const UChar* p,const UChar* q){
 519 /*
 520     UChar source[256] = { '\0'};
 521     UChar target[256] = { '\0'};
 522 */
 523
 524     doTest(col, p, q, UCOL_EQUAL);
 525 /*
 526     fprintf(file,"Primary failed  source: %s target: %s \n", utfSource,utfTarget);
 527 */
 528 }
 529
 530 static void testCollator(UCollator *coll, UErrorCode *status) {
 531   const UChar *rules = NULL, *current = NULL;
 532   int32_t ruleLen = 0;
 533   uint32_t strength = 0;
 534   uint32_t chOffset = 0; uint32_t chLen = 0;
 535   uint32_t exOffset = 0; uint32_t exLen = 0;
 536   uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
 537   uint32_t firstEx = 0;
 538 /*  uint32_t rExpsLen = 0; */
 539   uint32_t firstLen = 0;
 540   UBool varT = FALSE; UBool top_ = TRUE;
 541   uint16_t specs = 0;
 542   UBool startOfRules = TRUE;
 543   UBool lastReset = FALSE;
 544   UBool before = FALSE;
 545   uint32_t beforeStrength = 0;
 546   UColTokenParser src;
 547   UColOptionSet opts;
 548
 549   UChar first[256];
 550   UChar second[256];
 551   UChar tempB[256];
 552   uint32_t tempLen;
 553   UChar *rulesCopy = NULL;
 554   UParseError parseError;
 555
 556   uprv_memset(&src, 0, sizeof(UColTokenParser));
 557
 558   src.opts = &opts;
 559
 560   rules = ucol_getRules(coll, &ruleLen);
 561   if(U_SUCCESS(*status) && ruleLen > 0) {
 562     rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
 563     uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
 564     src.current = src.source = rulesCopy;
 565     src.end = rulesCopy+ruleLen;
 566     src.extraCurrent = src.end;
 567     src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
 568     *first = *second = 0;
 569
 570         /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
 571            the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
 572     while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, status)) != NULL) {
 573       strength = src.parsedToken.strength;
 574       chOffset = src.parsedToken.charsOffset;
 575       chLen = src.parsedToken.charsLen;
 576       exOffset = src.parsedToken.extensionOffset;
 577       exLen = src.parsedToken.extensionLen;
 578       prefixOffset = src.parsedToken.prefixOffset;
 579       prefixLen = src.parsedToken.prefixLen;
 580       specs = src.parsedToken.flags;
 581
 582       startOfRules = FALSE;
 583       varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
 584       top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
 585       if(top_) { /* if reset is on top, the sequence is broken. We should have an empty string */
 586         second[0] = 0;
 587       } else {
 588         u_strncpy(second,src.source+chOffset, chLen);
 589         second[chLen] = 0;
 590
 591         if(exLen > 0 && firstEx == 0) {
 592           u_strncat(first, src.source+exOffset, exLen);
 593           first[firstLen+exLen] = 0;
 594         }
 595
 596         if(lastReset == TRUE && prefixLen != 0) {
 597           u_strncpy(first+prefixLen, first, firstLen);
 598           u_strncpy(first, src.source+prefixOffset, prefixLen);
 599           first[firstLen+prefixLen] = 0;
 600           firstLen = firstLen+prefixLen;
 601         }
 602
 603         if(before == TRUE) { /* swap first and second */
 604           u_strcpy(tempB, first);
 605           u_strcpy(first, second);
 606           u_strcpy(second, tempB);
 607
 608           tempLen = firstLen;
 609           firstLen = chLen;
 610           chLen = tempLen;
 611
 612           tempLen = firstEx;
 613           firstEx = exLen;
 614           exLen = tempLen;
 615           if(beforeStrength < strength) {
 616             strength = beforeStrength;
 617           }
 618         }
 619       }
 620       lastReset = FALSE;
 621
 622       switch(strength){
 623       case UCOL_IDENTICAL:
 624           testEquality(coll,first,second);
 625           break;
 626       case UCOL_PRIMARY:
 627           testPrimary(coll,first,second);
 628           break;
 629       case UCOL_SECONDARY:
 630           testSecondary(coll,first,second);
 631           break;
 632       case UCOL_TERTIARY:
 633           testTertiary(coll,first,second);
 634           break;
 635       case UCOL_TOK_RESET:
 636         lastReset = TRUE;
 637         before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);
 638         if(before) {
 639           beforeStrength = (specs & UCOL_TOK_BEFORE)-1;
 640         }
 641         break;
 642       default:
 643           break;
 644       }
 645
 646       if(before == TRUE && strength != UCOL_TOK_RESET) { /* first and second were swapped */
 647         before = FALSE;
 648       } else {
 649         firstLen = chLen;
 650         firstEx = exLen;
 651         u_strcpy(first, second);
 652       }
 653     }
 654     uprv_free(src.source);
 655   }
 656 }
 657
 658 static UCollationResult ucaTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) {
 659   UCollator *UCA = (UCollator *)collator;
 660   return ucol_strcoll(UCA, source, sLen, target, tLen);
 661 }
 662
 663 /*
 664 static UCollationResult winTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) {
 665 #ifdef U_WINDOWS
 666   LCID lcid = (LCID)collator;
 667   return (UCollationResult)CompareString(lcid, 0, source, sLen, target, tLen);
 668 #else
 669   return 0;
 670 #endif
 671 }
 672 */
 673
 674 static UCollationResult swampEarlier(tst_strcoll* func, void *collator, int opts,
 675                                      UChar s1, UChar s2,
 676                                      const UChar *s, const uint32_t sLen,
 677                                      const UChar *t, const uint32_t tLen) {
 678   UChar source[256] = {0};
 679   UChar target[256] = {0};
 680
 681   source[0] = s1;
 682   u_strcpy(source+1, s);
 683   target[0] = s2;
 684   u_strcpy(target+1, t);
 685
 686   return func(collator, opts, source, sLen+1, target, tLen+1);
 687 }
 688
 689 static UCollationResult swampLater(tst_strcoll* func, void *collator, int opts,
 690                                    UChar s1, UChar s2,
 691                                    const UChar *s, const uint32_t sLen,
 692                                    const UChar *t, const uint32_t tLen) {
 693   UChar source[256] = {0};
 694   UChar target[256] = {0};
 695
 696   u_strcpy(source, s);
 697   source[sLen] = s1;
 698   u_strcpy(target, t);
 699   target[tLen] = s2;
 700
 701   return func(collator, opts, source, sLen+1, target, tLen+1);
 702 }
 703
 704 static uint32_t probeStrength(tst_strcoll* func, void *collator, int opts,
 705                               const UChar *s, const uint32_t sLen,
 706                               const UChar *t, const uint32_t tLen,
 707                               UCollationResult result) {
 708   /*UChar fPrimary = 0x6d;*/
 709   /*UChar sPrimary = 0x6e;*/
 710   UChar fSecondary = 0x310d;
 711   UChar sSecondary = 0x31a3;
 712   UChar fTertiary = 0x310f;
 713   UChar sTertiary = 0x31b7;
 714
 715   UCollationResult oposite;
 716   if(result == UCOL_EQUAL) {
 717     return UCOL_IDENTICAL;
 718   } else if(result == UCOL_GREATER) {
 719     oposite = UCOL_LESS;
 720   } else {
 721     oposite = UCOL_GREATER;
 722   }
 723
 724   if(swampEarlier(func, collator, opts, sSecondary, fSecondary, s, sLen, t, tLen) == result) {
 725     return UCOL_PRIMARY;
 726   } else if((swampEarlier(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == result) &&
 727     (swampEarlier(func, collator, opts, 0x310f, sTertiary, s, sLen, t, tLen) == result)) {
 728     return UCOL_SECONDARY;
 729   } else if((swampLater(func, collator, opts, sTertiary, fTertiary, s, sLen, t, tLen) == result) &&
 730     (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == result)) {
 731     return UCOL_TERTIARY;
 732   } else if((swampLater(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == oposite) &&
 733     (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == oposite)) {
 734     return UCOL_QUATERNARY;
 735   } else {
 736     return UCOL_IDENTICAL;
 737   }
 738 }
 739
 740 static char *getRelationSymbol(UCollationResult res, uint32_t strength, char *buffer) {
 741   uint32_t i = 0;
 742
 743   if(res == UCOL_EQUAL || strength == 0xdeadbeef) {
 744     buffer[0] = '=';
 745     buffer[1] = '=';
 746     buffer[2] = '\0';
 747   } else if(res == UCOL_GREATER) {
 748     for(i = 0; i<strength+1; i++) {
 749       buffer[i] = '>';
 750     }
 751     buffer[strength+1] = '\0';
 752   } else {
 753     for(i = 0; i<strength+1; i++) {
 754       buffer[i] = '<';
 755     }
 756     buffer[strength+1] = '\0';
 757   }
 758
 759   return buffer;
 760 }
 761
 762
 763
 764 static void logFailure (const char *platform, const char *test,
 765                         const UChar *source, const uint32_t sLen,
 766                         const UChar *target, const uint32_t tLen,
 767                         UCollationResult realRes, uint32_t realStrength,
 768                         UCollationResult expRes, uint32_t expStrength, UBool error) {
 769
 770   uint32_t i = 0;
 771
 772   char sEsc[256], s[256], tEsc[256], t[256], b[256], output[512], relation[256];
 773   static int32_t maxOutputLength = 0;
 774   int32_t outputLength;
 775
 776   *sEsc = *tEsc = *s = *t = 0;
 777   if(error == TRUE) {
 778     log_err("Difference between expected and generated order. Run test with -v for more info\n");
 779   } else if(getTestOption(VERBOSITY_OPTION) == 0) {
 780     return;
 781   }
 782   for(i = 0; i<sLen; i++) {
 783     sprintf(b, "%04X", source[i]);
 784     strcat(sEsc, "\\u");
 785     strcat(sEsc, b);
 786     strcat(s, b);
 787     strcat(s, " ");
 788     if(source[i] < 0x80) {
 789       sprintf(b, "(%c)", source[i]);
 790       strcat(sEsc, b);
 791     }
 792   }
 793   for(i = 0; i<tLen; i++) {
 794     sprintf(b, "%04X", target[i]);
 795     strcat(tEsc, "\\u");
 796     strcat(tEsc, b);
 797     strcat(t, b);
 798     strcat(t, " ");
 799     if(target[i] < 0x80) {
 800       sprintf(b, "(%c)", target[i]);
 801       strcat(tEsc, b);
 802     }
 803   }
 804 /*
 805   strcpy(output, "[[ ");
 806   strcat(output, sEsc);
 807   strcat(output, getRelationSymbol(expRes, expStrength, relation));
 808   strcat(output, tEsc);
 809
 810   strcat(output, " : ");
 811
 812   strcat(output, sEsc);
 813   strcat(output, getRelationSymbol(realRes, realStrength, relation));
 814   strcat(output, tEsc);
 815   strcat(output, " ]] ");
 816
 817   log_verbose("%s", output);
 818 */
 819
 820
 821   strcpy(output, "DIFF: ");
 822
 823   strcat(output, s);
 824   strcat(output, " : ");
 825   strcat(output, t);
 826
 827   strcat(output, test);
 828   strcat(output, ": ");
 829
 830   strcat(output, sEsc);
 831   strcat(output, getRelationSymbol(expRes, expStrength, relation));
 832   strcat(output, tEsc);
 833
 834   strcat(output, " ");
 835
 836   strcat(output, platform);
 837   strcat(output, ": ");
 838
 839   strcat(output, sEsc);
 840   strcat(output, getRelationSymbol(realRes, realStrength, relation));
 841   strcat(output, tEsc);
 842
 843   outputLength = (int32_t)strlen(output);
 844   if(outputLength > maxOutputLength) {
 845     maxOutputLength = outputLength;
 846     U_ASSERT(outputLength < sizeof(output));
 847   }
 848
 849   log_verbose("%s\n", output);
 850
 851 }
 852
 853 /*
 854 static void printOutRules(const UChar *rules) {
 855   uint32_t len = u_strlen(rules);
 856   uint32_t i = 0;
 857   char toPrint;
 858   uint32_t line = 0;
 859
 860   fprintf(stdout, "Rules:");
 861
 862   for(i = 0; i<len; i++) {
 863     if(rules[i]<0x7f && rules[i]>=0x20) {
 864       toPrint = (char)rules[i];
 865       if(toPrint == '&') {
 866         line = 1;
 867         fprintf(stdout, "\n&");
 868       } else if(toPrint == ';') {
 869         fprintf(stdout, "<<");
 870         line+=2;
 871       } else if(toPrint == ',') {
 872         fprintf(stdout, "<<<");
 873         line+=3;
 874       } else {
 875         fprintf(stdout, "%c", toPrint);
 876         line++;
 877       }
 878     } else if(rules[i]<0x3400 || rules[i]>=0xa000) {
 879       fprintf(stdout, "\\u%04X", rules[i]);
 880       line+=6;
 881     }
 882     if(line>72) {
 883       fprintf(stdout, "\n");
 884       line = 0;
 885     }
 886   }
 887
 888   log_verbose("\n");
 889
 890 }
 891 */
 892
 893 static uint32_t testSwitch(tst_strcoll* func, void *collator, int opts, uint32_t strength, const UChar *first, const UChar *second, const char* msg, UBool error) {
 894   uint32_t diffs = 0;
 895   UCollationResult realResult;
 896   uint32_t realStrength;
 897
 898   uint32_t sLen = u_strlen(first);
 899   uint32_t tLen = u_strlen(second);
 900
 901   realResult = func(collator, opts, first, sLen, second, tLen);
 902   realStrength = probeStrength(func, collator, opts, first, sLen, second, tLen, realResult);
 903
 904   if(strength == UCOL_IDENTICAL && realResult != UCOL_IDENTICAL) {
 905     logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_EQUAL, strength, error);
 906     diffs++;
 907   } else if(realResult != UCOL_LESS || realStrength != strength) {
 908     logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_LESS, strength, error);
 909     diffs++;
 910   }
 911   return diffs;
 912 }
 913
 914
 915 static void testAgainstUCA(UCollator *coll, UCollator *UCA, const char *refName, UBool error, UErrorCode *status) {
 916   const UChar *rules = NULL, *current = NULL;
 917   int32_t ruleLen = 0;
 918   uint32_t strength = 0;
 919   uint32_t chOffset = 0; uint32_t chLen = 0;
 920   uint32_t exOffset = 0; uint32_t exLen = 0;
 921   uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
 922 /*  uint32_t rExpsLen = 0; */
 923   uint32_t firstLen = 0, secondLen = 0;
 924   UBool varT = FALSE; UBool top_ = TRUE;
 925   uint16_t specs = 0;
 926   UBool startOfRules = TRUE;
 927   UColTokenParser src;
 928   UColOptionSet opts;
 929
 930   UChar first[256];
 931   UChar second[256];
 932   UChar *rulesCopy = NULL;
 933
 934   uint32_t UCAdiff = 0;
 935   uint32_t Windiff = 1;
 936   UParseError parseError;
 937
 938   uprv_memset(&src, 0, sizeof(UColTokenParser));
 939   src.opts = &opts;
 940
 941   rules = ucol_getRules(coll, &ruleLen);
 942
 943   /*printOutRules(rules);*/
 944
 945   if(U_SUCCESS(*status) && ruleLen > 0) {
 946     rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
 947     uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
 948     src.current = src.source = rulesCopy;
 949     src.end = rulesCopy+ruleLen;
 950     src.extraCurrent = src.end;
 951     src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
 952     *first = *second = 0;
 953
 954     /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
 955        the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
 956     while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) {
 957       strength = src.parsedToken.strength;
 958       chOffset = src.parsedToken.charsOffset;
 959       chLen = src.parsedToken.charsLen;
 960       exOffset = src.parsedToken.extensionOffset;
 961       exLen = src.parsedToken.extensionLen;
 962       prefixOffset = src.parsedToken.prefixOffset;
 963       prefixLen = src.parsedToken.prefixLen;
 964       specs = src.parsedToken.flags;
 965
 966       startOfRules = FALSE;
 967       varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
 968       top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
 969
 970       u_strncpy(second,src.source+chOffset, chLen);
 971       second[chLen] = 0;
 972       secondLen = chLen;
 973
 974       if(exLen > 0) {
 975         u_strncat(first, src.source+exOffset, exLen);
 976         first[firstLen+exLen] = 0;
 977         firstLen += exLen;
 978       }
 979
 980       if(strength != UCOL_TOK_RESET) {
 981         if((*first<0x3400 || *first>=0xa000) && (*second<0x3400 || *second>=0xa000)) {
 982           UCAdiff += testSwitch(&ucaTest, (void *)UCA, 0, strength, first, second, refName, error);
 983           /*Windiff += testSwitch(&winTest, (void *)lcid, 0, strength, first, second, "Win32");*/
 984         }
 985       }
 986
 987
 988       firstLen = chLen;
 989       u_strcpy(first, second);
 990
 991     }
 992     if(UCAdiff != 0 && Windiff != 0) {
 993       log_verbose("\n");
 994     }
 995     if(UCAdiff == 0) {
 996       log_verbose("No immediate difference with %s!\n", refName);
 997     }
 998     if(Windiff == 0) {
 999       log_verbose("No immediate difference with Win32!\n");
1000     }
1001     uprv_free(src.source);
1002   }
1003 }
1004
1005 /*
1006  * Takes two CEs (lead and continuation) and
1007  * compares them as CEs should be compared:
1008  * primary vs. primary, secondary vs. secondary
1009  * tertiary vs. tertiary
1010  */
1011 static int32_t compareCEs(uint32_t s1, uint32_t s2,
1012                    uint32_t t1, uint32_t t2) {
1013   uint32_t s = 0, t = 0;
1014   if(s1 == t1 && s2 == t2) {
1015     return 0;
1016   }
1017   s = (s1 & 0xFFFF0000)|((s2 & 0xFFFF0000)>>16);
1018   t = (t1 & 0xFFFF0000)|((t2 & 0xFFFF0000)>>16);
1019   if(s < t) {
1020     return -1;
1021   } else if(s > t) {
1022     return 1;
1023   } else {
1024     s = (s1 & 0x0000FF00) | (s2 & 0x0000FF00)>>8;
1025     t = (t1 & 0x0000FF00) | (t2 & 0x0000FF00)>>8;
1026     if(s < t) {
1027       return -1;
1028     } else if(s > t) {
1029       return 1;
1030     } else {
1031       s = (s1 & 0x000000FF)<<8 | (s2 & 0x000000FF);
1032       t = (t1 & 0x000000FF)<<8 | (t2 & 0x000000FF);
1033       if(s < t) {
1034         return -1;
1035       } else {
1036         return 1;
1037       }
1038     }
1039   }
1040 }
1041
1042 typedef struct {
1043   uint32_t startCE;
1044   uint32_t startContCE;
1045   uint32_t limitCE;
1046   uint32_t limitContCE;
1047 } indirectBoundaries;
1048
1049 /* these values are used for finding CE values for indirect positioning. */
1050 /* Indirect positioning is a mechanism for allowing resets on symbolic   */
1051 /* values. It only works for resets and you cannot tailor indirect names */
1052 /* An indirect name can define either an anchor point or a range. An     */
1053 /* anchor point behaves in exactly the same way as a code point in reset */
1054 /* would, except that it cannot be tailored. A range (we currently only  */
1055 /* know for the [top] range will explicitly set the upper bound for      */
1056 /* generated CEs, thus allowing for better control over how many CEs can */
1057 /* be squeezed between in the range without performance penalty.         */
1058 /* In that respect, we use [top] for tailoring of locales that use CJK   */
1059 /* characters. Other indirect values are currently a pure convenience,   */
1060 /* they can be used to assure that the CEs will be always positioned in  */
1061 /* the same place relative to a point with known properties (e.g. first  */
1062 /* primary ignorable). */
1063 static indirectBoundaries ucolIndirectBoundaries[15];
1064 static UBool indirectBoundariesSet = FALSE;
1065 static void setIndirectBoundaries(uint32_t indexR, uint32_t *start, uint32_t *end) {
1066     /* Set values for the top - TODO: once we have values for all the indirects, we are going */
1067     /* to initalize here. */
1068     ucolIndirectBoundaries[indexR].startCE = start[0];
1069     ucolIndirectBoundaries[indexR].startContCE = start[1];
1070     if(end) {
1071         ucolIndirectBoundaries[indexR].limitCE = end[0];
1072         ucolIndirectBoundaries[indexR].limitContCE = end[1];
1073     } else {
1074         ucolIndirectBoundaries[indexR].limitCE = 0;
1075         ucolIndirectBoundaries[indexR].limitContCE = 0;
1076     }
1077 }
1078
1079 static void testCEs(UCollator *coll, UErrorCode *status) {
1080     const UChar *rules = NULL, *current = NULL;
1081     int32_t ruleLen = 0;
1082
1083     uint32_t strength = 0;
1084     uint32_t maxStrength = UCOL_IDENTICAL;
1085     uint32_t baseCE, baseContCE, nextCE, nextContCE, currCE, currContCE;
1086     uint32_t lastCE;
1087     uint32_t lastContCE;
1088
1089     int32_t result = 0;
1090     uint32_t chOffset = 0; uint32_t chLen = 0;
1091     uint32_t exOffset = 0; uint32_t exLen = 0;
1092     uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
1093     uint32_t oldOffset = 0;
1094
1095     /* uint32_t rExpsLen = 0; */
1096     /* uint32_t firstLen = 0; */
1097     uint16_t specs = 0;
1098     UBool varT = FALSE; UBool top_ = TRUE;
1099     UBool startOfRules = TRUE;
1100     UBool before = FALSE;
1101     UColTokenParser src;
1102     UColOptionSet opts;
1103     UParseError parseError;
1104     UChar *rulesCopy = NULL;
1105     collIterate *c = uprv_new_collIterate(status);
1106     UCAConstants *consts = NULL;
1107     uint32_t UCOL_RESET_TOP_VALUE, /*UCOL_RESET_TOP_CONT, */
1108         UCOL_NEXT_TOP_VALUE, UCOL_NEXT_TOP_CONT;
1109     const char *colLoc;
1110     UCollator *UCA = ucol_open("root", status);
1111
1112     if (U_FAILURE(*status)) {
1113         log_err("Could not open root collator %s\n", u_errorName(*status));
1114         uprv_delete_collIterate(c);
1115         return;
1116     }
1117
1118     colLoc = ucol_getLocaleByType(coll, ULOC_ACTUAL_LOCALE, status);
1119     if (U_FAILURE(*status)) {
1120         log_err("Could not get collator name: %s\n", u_errorName(*status));
1121         ucol_close(UCA);
1122         uprv_delete_collIterate(c);
1123         return;
1124     }
1125
1126     uprv_memset(&src, 0, sizeof(UColTokenParser));
1127
1128     consts = (UCAConstants *)((uint8_t *)UCA->image + UCA->image->UCAConsts);
1129     UCOL_RESET_TOP_VALUE = consts->UCA_LAST_NON_VARIABLE[0];
1130     /*UCOL_RESET_TOP_CONT = consts->UCA_LAST_NON_VARIABLE[1]; */
1131     UCOL_NEXT_TOP_VALUE = consts->UCA_FIRST_IMPLICIT[0];
1132     UCOL_NEXT_TOP_CONT = consts->UCA_FIRST_IMPLICIT[1];
1133
1134     baseCE=baseContCE=nextCE=nextContCE=currCE=currContCE=lastCE=lastContCE = UCOL_NOT_FOUND;
1135
1136     src.opts = &opts;
1137
1138     rules = ucol_getRules(coll, &ruleLen);
1139
1140     src.invUCA = ucol_initInverseUCA(status);
1141
1142     if(indirectBoundariesSet == FALSE) {
1143         /* UCOL_RESET_TOP_VALUE */
1144         setIndirectBoundaries(0, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
1145         /* UCOL_FIRST_PRIMARY_IGNORABLE */
1146         setIndirectBoundaries(1, consts->UCA_FIRST_PRIMARY_IGNORABLE, 0);
1147         /* UCOL_LAST_PRIMARY_IGNORABLE */
1148         setIndirectBoundaries(2, consts->UCA_LAST_PRIMARY_IGNORABLE, 0);
1149         /* UCOL_FIRST_SECONDARY_IGNORABLE */
1150         setIndirectBoundaries(3, consts->UCA_FIRST_SECONDARY_IGNORABLE, 0);
1151         /* UCOL_LAST_SECONDARY_IGNORABLE */
1152         setIndirectBoundaries(4, consts->UCA_LAST_SECONDARY_IGNORABLE, 0);
1153         /* UCOL_FIRST_TERTIARY_IGNORABLE */
1154         setIndirectBoundaries(5, consts->UCA_FIRST_TERTIARY_IGNORABLE, 0);
1155         /* UCOL_LAST_TERTIARY_IGNORABLE */
1156         setIndirectBoundaries(6, consts->UCA_LAST_TERTIARY_IGNORABLE, 0);
1157         /* UCOL_FIRST_VARIABLE */
1158         setIndirectBoundaries(7, consts->UCA_FIRST_VARIABLE, 0);
1159         /* UCOL_LAST_VARIABLE */
1160         setIndirectBoundaries(8, consts->UCA_LAST_VARIABLE, 0);
1161         /* UCOL_FIRST_NON_VARIABLE */
1162         setIndirectBoundaries(9, consts->UCA_FIRST_NON_VARIABLE, 0);
1163         /* UCOL_LAST_NON_VARIABLE */
1164         setIndirectBoundaries(10, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
1165         /* UCOL_FIRST_IMPLICIT */
1166         setIndirectBoundaries(11, consts->UCA_FIRST_IMPLICIT, 0);
1167         /* UCOL_LAST_IMPLICIT */
1168         setIndirectBoundaries(12, consts->UCA_LAST_IMPLICIT, consts->UCA_FIRST_TRAILING);
1169         /* UCOL_FIRST_TRAILING */
1170         setIndirectBoundaries(13, consts->UCA_FIRST_TRAILING, 0);
1171         /* UCOL_LAST_TRAILING */
1172         setIndirectBoundaries(14, consts->UCA_LAST_TRAILING, 0);
1173         ucolIndirectBoundaries[14].limitCE = (consts->UCA_PRIMARY_SPECIAL_MIN<<24);
1174         indirectBoundariesSet = TRUE;
1175     }
1176
1177
1178     if(U_SUCCESS(*status) && ruleLen > 0) {
1179         rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
1180         uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
1181         src.current = src.source = rulesCopy;
1182         src.end = rulesCopy+ruleLen;
1183         src.extraCurrent = src.end;
1184         src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
1185
1186             /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
1187                the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
1188         while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) {
1189             strength = src.parsedToken.strength;
1190             chOffset = src.parsedToken.charsOffset;
1191             chLen = src.parsedToken.charsLen;
1192             exOffset = src.parsedToken.extensionOffset;
1193             exLen = src.parsedToken.extensionLen;
1194             prefixOffset = src.parsedToken.prefixOffset;
1195             prefixLen = src.parsedToken.prefixLen;
1196             specs = src.parsedToken.flags;
1197
1198             startOfRules = FALSE;
1199             varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
1200             top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
1201
1202             uprv_init_collIterate(coll, src.source+chOffset, chLen, c, status);
1203
1204             currCE = ucol_getNextCE(coll, c, status);
1205             if(currCE == 0 && UCOL_ISTHAIPREVOWEL(*(src.source+chOffset))) {
1206                 log_verbose("Thai prevowel detected. Will pick next CE\n");
1207                 currCE = ucol_getNextCE(coll, c, status);
1208             }
1209
1210             currContCE = ucol_getNextCE(coll, c, status);
1211             if(!isContinuation(currContCE)) {
1212                 currContCE = 0;
1213             }
1214
1215             /* we need to repack CEs here */
1216
1217             if(strength == UCOL_TOK_RESET) {
1218                 before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);
1219                 if(top_ == TRUE) {
1220                     int32_t tokenIndex = src.parsedToken.indirectIndex;
1221
1222                     nextCE = baseCE = currCE = ucolIndirectBoundaries[tokenIndex].startCE;
1223                     nextContCE = baseContCE = currContCE = ucolIndirectBoundaries[tokenIndex].startContCE;
1224                 } else {
1225                     nextCE = baseCE = currCE;
1226                     nextContCE = baseContCE = currContCE;
1227                 }
1228                 maxStrength = UCOL_IDENTICAL;
1229             } else {
1230                 if(strength < maxStrength) {
1231                     maxStrength = strength;
1232                     if(baseCE == UCOL_RESET_TOP_VALUE) {
1233                         log_verbose("Resetting to [top]\n");
1234                         nextCE = UCOL_NEXT_TOP_VALUE;
1235                         nextContCE = UCOL_NEXT_TOP_CONT;
1236                     } else {
1237                         result = ucol_inv_getNextCE(&src, baseCE & 0xFFFFFF3F, baseContCE, &nextCE, &nextContCE, maxStrength);
1238                     }
1239                     if(result < 0) {
1240                         if(ucol_isTailored(coll, *(src.source+oldOffset), status)) {
1241                             log_verbose("Reset is tailored codepoint %04X, don't know how to continue, taking next test\n", *(src.source+oldOffset));
1242                             return;
1243                         } else {
1244                             log_err("%s: couldn't find the CE\n", colLoc);
1245                             return;
1246                         }
1247                     }
1248                 }
1249
1250                 currCE &= 0xFFFFFF3F;
1251                 currContCE &= 0xFFFFFFBF;
1252
1253                 if(maxStrength == UCOL_IDENTICAL) {
1254                     if(baseCE != currCE || baseContCE != currContCE) {
1255                         log_err("%s: current CE  (initial strength UCOL_EQUAL)\n", colLoc);
1256                     }
1257                 } else {
1258                     if(strength == UCOL_IDENTICAL) {
1259                         if(lastCE != currCE || lastContCE != currContCE) {
1260                             log_err("%s: current CE  (initial strength UCOL_EQUAL)\n", colLoc);
1261                         }
1262                     } else {
1263                         if(compareCEs(currCE, currContCE, nextCE, nextContCE) > 0) {
1264                             /*if(currCE > nextCE || (currCE == nextCE && currContCE >= nextContCE)) {*/
1265                             log_err("%s: current CE is not less than base CE\n", colLoc);
1266                         }
1267                         if(!before) {
1268                             if(compareCEs(currCE, currContCE, lastCE, lastContCE) < 0) {
1269                                 /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
1270                                 log_err("%s: sequence of generated CEs is broken\n", colLoc);
1271                             }
1272                         } else {
1273                             before = FALSE;
1274                             if(compareCEs(currCE, currContCE, lastCE, lastContCE) > 0) {
1275                                 /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
1276                                 log_err("%s: sequence of generated CEs is broken\n", colLoc);
1277                             }
1278                         }
1279                     }
1280                 }
1281
1282             }
1283
1284             oldOffset = chOffset;
1285             lastCE = currCE & 0xFFFFFF3F;
1286             lastContCE = currContCE & 0xFFFFFFBF;
1287         }
1288         uprv_free(src.source);
1289     }
1290     ucol_close(UCA);
1291     uprv_delete_collIterate(c);
1292 }
1293
1294 #if 0
1295 /* these locales are now picked from index RB */
1296 static const char* localesToTest[] = {
1297 "ar", "bg", "ca", "cs", "da",
1298 "el", "en_BE", "en_US_POSIX",
1299 "es", "et", "fi", "fr", "hi",
1300 "hr", "hu", "is", "iw", "ja",
1301 "ko", "lt", "lv", "mk", "mt",
1302 "nb", "nn", "nn_NO", "pl", "ro",
1303 "ru", "sh", "sk", "sl", "sq",
1304 "sr", "sv", "th", "tr", "uk",
1305 "vi", "zh", "zh_TW"
1306 };
1307 #endif
1308
1309 static const char* rulesToTest[] = {
1310   /* Funky fa rule */
1311   "&\\u0622 < \\u0627 << \\u0671 < \\u0621",
1312   /*"& Z < p, P",*/
1313     /* Cui Mins rules */
1314     "&[top]<o,O<p,P<q,Q<'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu<'?'",*/
1315     "&[top]<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
1316     "&[top]<o,O<p,P<q,Q,'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U&'Qu','?'",*/
1317     "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/u<r,R<u,U",  /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
1318     "&[top]<'?';Qu<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U",  /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qu",*/
1319     "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/um<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qum;'?'",*/
1320     "&[top]<'?';Qum<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U"  /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qum"*/
1321 };
1322
1323
1324 static void TestCollations(void) {
1325     int32_t noOfLoc = uloc_countAvailable();
1326     int32_t i = 0, j = 0;
1327
1328     UErrorCode status = U_ZERO_ERROR;
1329     char cName[256];
1330     UChar name[256];
1331     int32_t nameSize;
1332
1333
1334     const char *locName = NULL;
1335     UCollator *coll = NULL;
1336     UCollator *UCA = ucol_open("", &status);
1337     UColAttributeValue oldStrength = ucol_getAttribute(UCA, UCOL_STRENGTH, &status);
1338     if (U_FAILURE(status)) {
1339         log_err_status(status, "Could not open UCA collator %s\n", u_errorName(status));
1340         return;
1341     }
1342     ucol_setAttribute(UCA, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
1343
1344     for(i = 0; i<noOfLoc; i++) {
1345         status = U_ZERO_ERROR;
1346         locName = uloc_getAvailable(i);
1347         if(uprv_strcmp("ja", locName) == 0) {
1348             log_verbose("Don't know how to test prefixes\n");
1349             continue;
1350         }
1351         if(hasCollationElements(locName)) {
1352             nameSize = uloc_getDisplayName(locName, NULL, name, 256, &status);
1353             for(j = 0; j<nameSize; j++) {
1354                 cName[j] = (char)name[j];
1355             }
1356             cName[nameSize] = 0;
1357             log_verbose("\nTesting locale %s (%s)\n", locName, cName);
1358             coll = ucol_open(locName, &status);
1359             if(U_SUCCESS(status)) {
1360                 testAgainstUCA(coll, UCA, "UCA", FALSE, &status);
1361                 ucol_close(coll);
1362             } else {
1363                 log_err("Couldn't instantiate collator for locale %s, error: %s\n", locName, u_errorName(status));
1364                 status = U_ZERO_ERROR;
1365             }
1366         }
1367     }
1368     ucol_setAttribute(UCA, UCOL_STRENGTH, oldStrength, &status);
1369     ucol_close(UCA);
1370 }
1371
1372 static void RamsRulesTest(void) {
1373     UErrorCode status = U_ZERO_ERROR;
1374     int32_t i = 0;
1375     UCollator *coll = NULL;
1376     UChar rule[2048];
1377     uint32_t ruleLen;
1378     int32_t noOfLoc = uloc_countAvailable();
1379     const char *locName = NULL;
1380
1381     log_verbose("RamsRulesTest\n");
1382
1383     if (uprv_strcmp("km", uloc_getDefault())==0 || uprv_strcmp("km_KH", uloc_getDefault())==0) {
1384         /* This test will fail if the default locale is "km" or "km_KH". Enable after trac#6040. */
1385         return;
1386     }
1387
1388     for(i = 0; i<noOfLoc; i++) {
1389         locName = uloc_getAvailable(i);
1390         if(hasCollationElements(locName)) {
1391             if (uprv_strcmp("ja", locName)==0) {
1392                 log_verbose("Don't know how to test Japanese because of prefixes\n");
1393                 continue;
1394             }
1395             if (uprv_strcmp("de__PHONEBOOK", locName)==0) {
1396                 log_verbose("Don't know how to test Phonebook because the reset is on an expanding character\n");
1397                 continue;
1398             }
1399             if (uprv_strcmp("bn", locName)==0 ||
1400                 uprv_strcmp("en_US_POSIX", locName)==0 ||
1401                 uprv_strcmp("km", locName)==0 ||
1402                 uprv_strcmp("km_KH", locName)==0 ||
1403                 uprv_strcmp("my", locName)==0 ||
1404                 uprv_strcmp("si", locName)==0 ||
1405                 uprv_strcmp("si_LK", locName)==0 ||
1406                 uprv_strcmp("zh", locName)==0 ||
1407                 uprv_strcmp("zh_Hant", locName)==0
1408             ) {
1409                 log_verbose("Don't know how to test %s. "
1410                             "TODO: Fix ticket #6040 and reenable RamsRulesTest for this locale.\n", locName);
1411                 continue;
1412             }
1413             log_verbose("Testing locale %s\n", locName);
1414             status = U_ZERO_ERROR;
1415             coll = ucol_open(locName, &status);
1416             if(U_SUCCESS(status)) {
1417               if((status != U_USING_DEFAULT_WARNING) && (status != U_USING_FALLBACK_WARNING)) {
1418                 if(coll->image->jamoSpecial == TRUE) {
1419                   log_err("%s has special JAMOs\n", locName);
1420                 }
1421                 ucol_setAttribute(coll, UCOL_CASE_FIRST, UCOL_OFF, &status);
1422                 testCollator(coll, &status);
1423                 testCEs(coll, &status);
1424               } else {
1425                 log_verbose("Skipping %s: %s\n", locName, u_errorName(status));
1426               }
1427               ucol_close(coll);
1428             } else {
1429               log_err("Could not open %s: %s\n", locName, u_errorName(status));
1430             }
1431         }
1432     }
1433
1434     for(i = 0; i<sizeof(rulesToTest)/sizeof(rulesToTest[0]); i++) {
1435         log_verbose("Testing rule: %s\n", rulesToTest[i]);
1436         ruleLen = u_unescape(rulesToTest[i], rule, 2048);
1437         status = U_ZERO_ERROR;
1438         coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
1439         if(U_SUCCESS(status)) {
1440             testCollator(coll, &status);
1441             testCEs(coll, &status);
1442             ucol_close(coll);
1443         } else {
1444           log_err_status(status, "Could not test rule: %s: '%s'\n", u_errorName(status), rulesToTest[i]);
1445         }
1446     }
1447
1448 }
1449
1450 static void IsTailoredTest(void) {
1451     UErrorCode status = U_ZERO_ERROR;
1452     uint32_t i = 0;
1453     UCollator *coll = NULL;
1454     UChar rule[2048];
1455     UChar tailored[2048];
1456     UChar notTailored[2048];
1457     uint32_t ruleLen, tailoredLen, notTailoredLen;
1458
1459     log_verbose("IsTailoredTest\n");
1460
1461     u_uastrcpy(rule, "&Z < A, B, C;c < d");
1462     ruleLen = u_strlen(rule);
1463
1464     u_uastrcpy(tailored, "ABCcd");
1465     tailoredLen = u_strlen(tailored);
1466
1467     u_uastrcpy(notTailored, "ZabD");
1468     notTailoredLen = u_strlen(notTailored);
1469
1470     coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
1471     if(U_SUCCESS(status)) {
1472         for(i = 0; i<tailoredLen; i++) {
1473             if(!ucol_isTailored(coll, tailored[i], &status)) {
1474                 log_err("%i: %04X should be tailored - it is reported as not\n", i, tailored[i]);
1475             }
1476         }
1477         for(i = 0; i<notTailoredLen; i++) {
1478             if(ucol_isTailored(coll, notTailored[i], &status)) {
1479                 log_err("%i: %04X should not be tailored - it is reported as it is\n", i, notTailored[i]);
1480             }
1481         }
1482         ucol_close(coll);
1483     }
1484     else {
1485         log_err_status(status, "Can't tailor rules\n");
1486     }
1487     /* Code coverage */
1488     status = U_ZERO_ERROR;
1489     coll = ucol_open("ja", &status);
1490     if(!ucol_isTailored(coll, 0x4E9C, &status)) {
1491         log_err_status(status, "0x4E9C should be tailored - it is reported as not\n");
1492     }
1493     ucol_close(coll);
1494 }
1495
1496
1497 const static char chTest[][20] = {
1498   "c",
1499   "C",
1500   "ca", "cb", "cx", "cy", "CZ",
1501   "c\\u030C", "C\\u030C",
1502   "h",
1503   "H",
1504   "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY",
1505   "ch", "cH", "Ch", "CH",
1506   "cha", "charly", "che", "chh", "chch", "chr",
1507   "i", "I", "iarly",
1508   "r", "R",
1509   "r\\u030C", "R\\u030C",
1510   "s",
1511   "S",
1512   "s\\u030C", "S\\u030C",
1513   "z", "Z",
1514   "z\\u030C", "Z\\u030C"
1515 };
1516
1517 static void TestChMove(void) {
1518     UChar t1[256] = {0};
1519     UChar t2[256] = {0};
1520
1521     uint32_t i = 0, j = 0;
1522     uint32_t size = 0;
1523     UErrorCode status = U_ZERO_ERROR;
1524
1525     UCollator *coll = ucol_open("cs", &status);
1526
1527     if(U_SUCCESS(status)) {
1528         size = sizeof(chTest)/sizeof(chTest[0]);
1529         for(i = 0; i < size-1; i++) {
1530             for(j = i+1; j < size; j++) {
1531                 u_unescape(chTest[i], t1, 256);
1532                 u_unescape(chTest[j], t2, 256);
1533                 doTest(coll, t1, t2, UCOL_LESS);
1534             }
1535         }
1536     }
1537     else {
1538         log_data_err("Can't open collator");
1539     }
1540     ucol_close(coll);
1541 }
1542
1543
1544
1545
1546 const static char impTest[][20] = {
1547   "\\u4e00",
1548     "a",
1549     "A",
1550     "b",
1551     "B",
1552     "\\u4e01"
1553 };
1554
1555
1556 static void TestImplicitTailoring(void) {
1557   static const struct {
1558     const char *rules;
1559     const char *data[10];
1560     const uint32_t len;
1561   } tests[] = {
1562       { "&[before 1]\\u4e00 < b < c &[before 1]\\u4e00 < d < e", { "d", "e", "b", "c", "\\u4e00"}, 5 },
1563       { "&\\u4e00 < a <<< A < b <<< B",   { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 },
1564       { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3},
1565       { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3}
1566   };
1567
1568   int32_t i = 0;
1569
1570   for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
1571       genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
1572   }
1573
1574 /*
1575   UChar t1[256] = {0};
1576   UChar t2[256] = {0};
1577
1578   const char *rule = "&\\u4e00 < a <<< A < b <<< B";
1579
1580   uint32_t i = 0, j = 0;
1581   uint32_t size = 0;
1582   uint32_t ruleLen = 0;
1583   UErrorCode status = U_ZERO_ERROR;
1584   UCollator *coll = NULL;
1585   ruleLen = u_unescape(rule, t1, 256);
1586
1587   coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
1588
1589   if(U_SUCCESS(status)) {
1590     size = sizeof(impTest)/sizeof(impTest[0]);
1591     for(i = 0; i < size-1; i++) {
1592       for(j = i+1; j < size; j++) {
1593         u_unescape(impTest[i], t1, 256);
1594         u_unescape(impTest[j], t2, 256);
1595         doTest(coll, t1, t2, UCOL_LESS);
1596       }
1597     }
1598   }
1599   else {
1600     log_err("Can't open collator");
1601   }
1602   ucol_close(coll);
1603   */
1604 }
1605
1606 static void TestFCDProblem(void) {
1607   UChar t1[256] = {0};
1608   UChar t2[256] = {0};
1609
1610   const char *s1 = "\\u0430\\u0306\\u0325";
1611   const char *s2 = "\\u04D1\\u0325";
1612
1613   UErrorCode status = U_ZERO_ERROR;
1614   UCollator *coll = ucol_open("", &status);
1615   u_unescape(s1, t1, 256);
1616   u_unescape(s2, t2, 256);
1617
1618   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
1619   doTest(coll, t1, t2, UCOL_EQUAL);
1620
1621   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
1622   doTest(coll, t1, t2, UCOL_EQUAL);
1623
1624   ucol_close(coll);
1625 }
1626
1627 /*
1628 The largest normalization form is 18 for NFKC/NFKD, 4 for NFD and 3 for NFC
1629 We're only using NFC/NFD in this test.
1630 */
1631 #define NORM_BUFFER_TEST_LEN 18
1632 typedef struct {
1633   UChar32 u;
1634   UChar NFC[NORM_BUFFER_TEST_LEN];
1635   UChar NFD[NORM_BUFFER_TEST_LEN];
1636 } tester;
1637
1638 static void TestComposeDecompose(void) {
1639     /* [[:NFD_Inert=false:][:NFC_Inert=false:]] */
1640     static const UChar UNICODESET_STR[] = {
1641         0x5B,0x5B,0x3A,0x4E,0x46,0x44,0x5F,0x49,0x6E,0x65,0x72,0x74,0x3D,0x66,0x61,
1642         0x6C,0x73,0x65,0x3A,0x5D,0x5B,0x3A,0x4E,0x46,0x43,0x5F,0x49,0x6E,0x65,0x72,
1643         0x74,0x3D,0x66,0x61,0x6C,0x73,0x65,0x3A,0x5D,0x5D,0
1644     };
1645     int32_t noOfLoc;
1646     int32_t i = 0, j = 0;
1647
1648     UErrorCode status = U_ZERO_ERROR;
1649     const char *locName = NULL;
1650     uint32_t nfcSize;
1651     uint32_t nfdSize;
1652     tester **t;
1653     uint32_t noCases = 0;
1654     UCollator *coll = NULL;
1655     UChar32 u = 0;
1656     UChar comp[NORM_BUFFER_TEST_LEN];
1657     uint32_t len = 0;
1658     UCollationElements *iter;
1659     USet *charsToTest = uset_openPattern(UNICODESET_STR, -1, &status);
1660     int32_t charsToTestSize;
1661
1662     noOfLoc = uloc_countAvailable();
1663
1664     coll = ucol_open("", &status);
1665     if (U_FAILURE(status)) {
1666         log_data_err("Error opening collator -> %s (Are you missing data?)\n", u_errorName(status));
1667         return;
1668     }
1669     charsToTestSize = uset_size(charsToTest);
1670     if (charsToTestSize <= 0) {
1671         log_err("Set was zero. Missing data?\n");
1672         return;
1673     }
1674     t = malloc(charsToTestSize * sizeof(tester *));
1675     t[0] = (tester *)malloc(sizeof(tester));
1676     log_verbose("Testing UCA extensively for %d characters\n", charsToTestSize);
1677
1678     for(u = 0; u < charsToTestSize; u++) {
1679         UChar32 ch = uset_charAt(charsToTest, u);
1680         len = 0;
1681         UTF_APPEND_CHAR_UNSAFE(comp, len, ch);
1682         nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
1683         nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
1684
1685         if(nfcSize != nfdSize || (uprv_memcmp(t[noCases]->NFC, t[noCases]->NFD, nfcSize * sizeof(UChar)) != 0)
1686           || (len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0))) {
1687             t[noCases]->u = ch;
1688             if(len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0)) {
1689                 u_strncpy(t[noCases]->NFC, comp, len);
1690                 t[noCases]->NFC[len] = 0;
1691             }
1692             noCases++;
1693             t[noCases] = (tester *)malloc(sizeof(tester));
1694             uprv_memset(t[noCases], 0, sizeof(tester));
1695         }
1696     }
1697     log_verbose("Testing %d/%d of possible test cases\n", noCases, charsToTestSize);
1698     uset_close(charsToTest);
1699     charsToTest = NULL;
1700
1701     for(u=0; u<(UChar32)noCases; u++) {
1702         if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
1703             log_err("Failure: codePoint %05X fails TestComposeDecompose in the UCA\n", t[u]->u);
1704             doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
1705         }
1706     }
1707     /*
1708     for(u = 0; u < charsToTestSize; u++) {
1709       if(!(u&0xFFFF)) {
1710         log_verbose("%08X ", u);
1711       }
1712       uprv_memset(t[noCases], 0, sizeof(tester));
1713       t[noCases]->u = u;
1714       len = 0;
1715       UTF_APPEND_CHAR_UNSAFE(comp, len, u);
1716       comp[len] = 0;
1717       nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
1718       nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
1719       doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL);
1720       doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL);
1721     }
1722     */
1723
1724     ucol_close(coll);
1725
1726     log_verbose("Testing locales, number of cases = %i\n", noCases);
1727     for(i = 0; i<noOfLoc; i++) {
1728         status = U_ZERO_ERROR;
1729         locName = uloc_getAvailable(i);
1730         if(hasCollationElements(locName)) {
1731             char cName[256];
1732             UChar name[256];
1733             int32_t nameSize = uloc_getDisplayName(locName, NULL, name, sizeof(cName), &status);
1734
1735             for(j = 0; j<nameSize; j++) {
1736                 cName[j] = (char)name[j];
1737             }
1738             cName[nameSize] = 0;
1739             log_verbose("\nTesting locale %s (%s)\n", locName, cName);
1740
1741             coll = ucol_open(locName, &status);
1742             ucol_setStrength(coll, UCOL_IDENTICAL);
1743             iter = ucol_openElements(coll, t[u]->NFD, u_strlen(t[u]->NFD), &status);
1744
1745             for(u=0; u<(UChar32)noCases; u++) {
1746                 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
1747                     log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t[u]->u, cName);
1748                     doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
1749                     log_verbose("Testing NFC\n");
1750                     ucol_setText(iter, t[u]->NFC, u_strlen(t[u]->NFC), &status);
1751                     backAndForth(iter);
1752                     log_verbose("Testing NFD\n");
1753                     ucol_setText(iter, t[u]->NFD, u_strlen(t[u]->NFD), &status);
1754                     backAndForth(iter);
1755                 }
1756             }
1757             ucol_closeElements(iter);
1758             ucol_close(coll);
1759         }
1760     }
1761     for(u = 0; u <= (UChar32)noCases; u++) {
1762         free(t[u]);
1763     }
1764     free(t);
1765 }
1766
1767 static void TestEmptyRule(void) {
1768   UErrorCode status = U_ZERO_ERROR;
1769   UChar rulez[] = { 0 };
1770   UCollator *coll = ucol_openRules(rulez, 0, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
1771
1772   ucol_close(coll);
1773 }
1774
1775 static void TestUCARules(void) {
1776   UErrorCode status = U_ZERO_ERROR;
1777   UChar b[256];
1778   UChar *rules = b;
1779   uint32_t ruleLen = 0;
1780   UCollator *UCAfromRules = NULL;
1781   UCollator *coll = ucol_open("", &status);
1782   if(status == U_FILE_ACCESS_ERROR) {
1783     log_data_err("Is your data around?\n");
1784     return;
1785   } else if(U_FAILURE(status)) {
1786     log_err("Error opening collator\n");
1787     return;
1788   }
1789   ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, 256);
1790
1791   log_verbose("TestUCARules\n");
1792   if(ruleLen > 256) {
1793     rules = (UChar *)malloc((ruleLen+1)*sizeof(UChar));
1794     ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, ruleLen);
1795   }
1796   log_verbose("Rules length is %d\n", ruleLen);
1797   UCAfromRules = ucol_openRules(rules, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
1798   if(U_SUCCESS(status)) {
1799     ucol_close(UCAfromRules);
1800   } else {
1801     log_verbose("Unable to create a collator from UCARules!\n");
1802   }
1803 /*
1804   u_unescape(blah, b, 256);
1805   ucol_getSortKey(coll, b, 1, res, 256);
1806 */
1807   ucol_close(coll);
1808   if(rules != b) {
1809     free(rules);
1810   }
1811 }
1812
1813
1814 /* Pinyin tonal order */
1815 /*
1816     A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0)
1817           (w/macron)<  (w/acute)<   (w/caron)<   (w/grave)
1818     E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8)
1819     I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec)
1820     O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2)
1821     U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9)
1822       < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) <
1823 .. (\u00fc)
1824
1825 However, in testing we got the following order:
1826     A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101)
1827           (w/acute)<   (w/grave)<   (w/caron)<   (w/macron)
1828     E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) <
1829 .. (\u0113)
1830     I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b)
1831     O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d)
1832     U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) <
1833 .. (\u01d8)
1834       < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b)
1835 */
1836
1837 static void TestBefore(void) {
1838   const static char *data[] = {
1839       "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A",
1840       "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E",
1841       "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I",
1842       "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O",
1843       "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U",
1844       "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc"
1845   };
1846   genericRulesStarter(
1847     "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0"
1848     "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8"
1849     "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec"
1850     "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2"
1851     "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9"
1852     "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc",
1853     data, sizeof(data)/sizeof(data[0]));
1854 }
1855
1856 #if 0
1857 /* superceded by TestBeforePinyin */
1858 static void TestJ784(void) {
1859   const static char *data[] = {
1860       "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0",
1861       "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8",
1862       "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec",
1863       "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2",
1864       "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9",
1865       "\\u00fc",
1866            "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc"
1867   };
1868   genericLocaleStarter("zh", data, sizeof(data)/sizeof(data[0]));
1869 }
1870 #endif
1871
1872 #if 0
1873 /* superceded by the changes to the lv locale */
1874 static void TestJ831(void) {
1875   const static char *data[] = {
1876     "I",
1877       "i",
1878       "Y",
1879       "y"
1880   };
1881   genericLocaleStarter("lv", data, sizeof(data)/sizeof(data[0]));
1882 }
1883 #endif
1884
1885 static void TestJ815(void) {
1886   const static char *data[] = {
1887     "aa",
1888       "Aa",
1889       "ab",
1890       "Ab",
1891       "ad",
1892       "Ad",
1893       "ae",
1894       "Ae",
1895       "\\u00e6",
1896       "\\u00c6",
1897       "af",
1898       "Af",
1899       "b",
1900       "B"
1901   };
1902   genericLocaleStarter("fr", data, sizeof(data)/sizeof(data[0]));
1903   genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, sizeof(data)/sizeof(data[0]));
1904 }
1905
1906
1907 /*
1908 "& a < b < c < d& r < c",                                   "& a < b < d& r < c",
1909 "& a < b < c < d& c < m",                                   "& a < b < c < m < d",
1910 "& a < b < c < d& a < m",                                   "& a < m < b < c < d",
1911 "& a <<< b << c < d& a < m",                                "& a <<< b << c < m < d",
1912 "& a < b < c < d& [before 1] c < m",                        "& a < b < m < c < d",
1913 "& a < b <<< c << d <<< e& [before 3] e <<< x",            "& a < b <<< c << d <<< x <<< e",
1914 "& a < b <<< c << d <<< e& [before 2] e <<< x",            "& a < b <<< c <<< x << d <<< e",
1915 "& a < b <<< c << d <<< e& [before 1] e <<< x",            "& a <<< x < b <<< c << d <<< e",
1916 "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x",    "& a < b <<< c << d <<< e <<< f < x < g",
1917 */
1918 static void TestRedundantRules(void) {
1919   int32_t i;
1920
1921   static const struct {
1922       const char *rules;
1923       const char *expectedRules;
1924       const char *testdata[8];
1925       uint32_t testdatalen;
1926   } tests[] = {
1927     /* this test conflicts with positioning of CODAN placeholder */
1928        /*{
1929         "& a <<< b <<< c << d <<< e& [before 1] e <<< x",
1930         "&\\u2089<<<x",
1931         {"\\u2089", "x"}, 2
1932        }, */
1933     /* this test conflicts with the [before x] syntax tightening */
1934       /*{
1935         "& b <<< c <<< d << e <<< f& [before 1] f <<< x",
1936         "&\\u0252<<<x",
1937         {"\\u0252", "x"}, 2
1938       }, */
1939     /* this test conflicts with the [before x] syntax tightening */
1940       /*{
1941          "& a < b <<< c << d <<< e& [before 1] e <<< x",
1942          "& a <<< x < b <<< c << d <<< e",
1943         {"a", "x", "b", "c", "d", "e"}, 6
1944       }, */
1945       {
1946         "& a < b < c < d& [before 1] c < m",
1947         "& a < b < m < c < d",
1948         {"a", "b", "m", "c", "d"}, 5
1949       },
1950       {
1951         "& a < b <<< c << d <<< e& [before 3] e <<< x",
1952         "& a < b <<< c << d <<< x <<< e",
1953         {"a", "b", "c", "d", "x", "e"}, 6
1954       },
1955     /* this test conflicts with the [before x] syntax tightening */
1956       /* {
1957         "& a < b <<< c << d <<< e& [before 2] e <<< x",
1958         "& a < b <<< c <<< x << d <<< e",
1959         {"a", "b", "c", "x", "d", "e"},, 6
1960       }, */
1961       {
1962         "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x",
1963         "& a < b <<< c << d <<< e <<< f < x < g",
1964         {"a", "b", "c", "d", "e", "f", "x", "g"}, 8
1965       },
1966       {
1967         "& a <<< b << c < d& a < m",
1968         "& a <<< b << c < m < d",
1969         {"a", "b", "c", "m", "d"}, 5
1970       },
1971       {
1972         "&a<b<<b\\u0301 &z<b",
1973         "&a<b\\u0301 &z<b",
1974         {"a", "b\\u0301", "z", "b"}, 4
1975       },
1976       {
1977         "&z<m<<<q<<<m",
1978         "&z<q<<<m",
1979         {"z", "q", "m"},3
1980       },
1981       {
1982         "&z<<<m<q<<<m",
1983         "&z<q<<<m",
1984         {"z", "q", "m"}, 3
1985       },
1986       {
1987         "& a < b < c < d& r < c",
1988         "& a < b < d& r < c",
1989         {"a", "b", "d"}, 3
1990       },
1991       {
1992         "& a < b < c < d& r < c",
1993         "& a < b < d& r < c",
1994         {"r", "c"}, 2
1995       },
1996       {
1997         "& a < b < c < d& c < m",
1998         "& a < b < c < m < d",
1999         {"a", "b", "c", "m", "d"}, 5
2000       },
2001       {
2002         "& a < b < c < d& a < m",
2003         "& a < m < b < c < d",
2004         {"a", "m", "b", "c", "d"}, 5
2005       }
2006   };
2007
2008
2009   UCollator *credundant = NULL;
2010   UCollator *cresulting = NULL;
2011   UErrorCode status = U_ZERO_ERROR;
2012   UChar rlz[2048] = { 0 };
2013   uint32_t rlen = 0;
2014
2015   for(i = 0; i<sizeof(tests)/sizeof(tests[0]); i++) {
2016     log_verbose("testing rule %s, expected to be %s\n", tests[i].rules, tests[i].expectedRules);
2017     rlen = u_unescape(tests[i].rules, rlz, 2048);
2018
2019     credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
2020     if(status == U_FILE_ACCESS_ERROR) {
2021       log_data_err("Is your data around?\n");
2022       return;
2023     } else if(U_FAILURE(status)) {
2024       log_err("Error opening collator\n");
2025       return;
2026     }
2027
2028     rlen = u_unescape(tests[i].expectedRules, rlz, 2048);
2029     cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
2030
2031     testAgainstUCA(cresulting, credundant, "expected", TRUE, &status);
2032
2033     ucol_close(credundant);
2034     ucol_close(cresulting);
2035
2036     log_verbose("testing using data\n");
2037
2038     genericRulesStarter(tests[i].rules, tests[i].testdata, tests[i].testdatalen);
2039   }
2040
2041 }
2042
2043 static void TestExpansionSyntax(void) {
2044   int32_t i;
2045
2046   const static char *rules[] = {
2047     "&AE <<< a << b <<< c &d <<< f",
2048     "&AE <<< a <<< b << c << d < e < f <<< g",
2049     "&AE <<< B <<< C / D <<< F"
2050   };
2051
2052   const static char *expectedRules[] = {
2053     "&A <<< a / E << b / E <<< c /E  &d <<< f",
2054     "&A <<< a / E <<< b / E << c / E << d / E < e < f <<< g",
2055     "&A <<< B / E <<< C / ED <<< F / E"
2056   };
2057
2058   const static char *testdata[][8] = {
2059     {"AE", "a", "b", "c"},
2060     {"AE", "a", "b", "c", "d", "e", "f", "g"},
2061     {"AE", "B", "C"} /* / ED <<< F / E"},*/
2062   };
2063
2064   const static uint32_t testdatalen[] = {
2065       4,
2066       8,
2067       3
2068   };
2069
2070
2071
2072   UCollator *credundant = NULL;
2073   UCollator *cresulting = NULL;
2074   UErrorCode status = U_ZERO_ERROR;
2075   UChar rlz[2048] = { 0 };
2076   uint32_t rlen = 0;
2077
2078   for(i = 0; i<sizeof(rules)/sizeof(rules[0]); i++) {
2079     log_verbose("testing rule %s, expected to be %s\n", rules[i], expectedRules[i]);
2080     rlen = u_unescape(rules[i], rlz, 2048);
2081
2082     credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2083     if(status == U_FILE_ACCESS_ERROR) {
2084       log_data_err("Is your data around?\n");
2085       return;
2086     } else if(U_FAILURE(status)) {
2087       log_err("Error opening collator\n");
2088       return;
2089     }
2090     rlen = u_unescape(expectedRules[i], rlz, 2048);
2091     cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
2092
2093     /* testAgainstUCA still doesn't handle expansions correctly, so this is not run */
2094     /* as a hard error test, but only in information mode */
2095     testAgainstUCA(cresulting, credundant, "expected", FALSE, &status);
2096
2097     ucol_close(credundant);
2098     ucol_close(cresulting);
2099
2100     log_verbose("testing using data\n");
2101
2102     genericRulesStarter(rules[i], testdata[i], testdatalen[i]);
2103   }
2104 }
2105
2106 static void TestCase(void)
2107 {
2108     const static UChar gRules[MAX_TOKEN_LEN] =
2109     /*" & 0 < 1,\u2461<a,A"*/
2110     { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x0041, 0x0000 };
2111
2112     const static UChar testCase[][MAX_TOKEN_LEN] =
2113     {
2114         /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000},
2115         /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000},
2116         /*2*/ {0x2460 /*circ'1'*/, 0x0061/*'a'*/, 0x0000},
2117         /*3*/ {0x2460 /*circ'1'*/, 0x0041/*'A'*/, 0x0000}
2118     };
2119
2120     const static UCollationResult caseTestResults[][9] =
2121     {
2122         { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
2123         { UCOL_GREATER, UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER },
2124         { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_GREATER, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
2125         { UCOL_GREATER, UCOL_LESS, UCOL_GREATER, UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER }
2126     };
2127
2128     const static UColAttributeValue caseTestAttributes[][2] =
2129     {
2130         { UCOL_LOWER_FIRST, UCOL_OFF},
2131         { UCOL_UPPER_FIRST, UCOL_OFF},
2132         { UCOL_LOWER_FIRST, UCOL_ON},
2133         { UCOL_UPPER_FIRST, UCOL_ON}
2134     };
2135     int32_t i,j,k;
2136     UErrorCode status = U_ZERO_ERROR;
2137     UCollationElements *iter;
2138     UCollator  *myCollation;
2139     myCollation = ucol_open("en_US", &status);
2140
2141     if(U_FAILURE(status)){
2142         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
2143         return;
2144     }
2145     log_verbose("Testing different case settings\n");
2146     ucol_setStrength(myCollation, UCOL_TERTIARY);
2147
2148     for(k = 0; k<4; k++) {
2149       ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
2150       ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
2151       log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes[k][0], caseTestAttributes[k][1]);
2152       for (i = 0; i < 3 ; i++) {
2153         for(j = i+1; j<4; j++) {
2154           doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
2155         }
2156       }
2157     }
2158     ucol_close(myCollation);
2159
2160     myCollation = ucol_openRules(gRules, u_strlen(gRules), UCOL_OFF, UCOL_TERTIARY,NULL, &status);
2161     if(U_FAILURE(status)){
2162         log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
2163         return;
2164     }
2165     log_verbose("Testing different case settings with custom rules\n");
2166     ucol_setStrength(myCollation, UCOL_TERTIARY);
2167
2168     for(k = 0; k<4; k++) {
2169       ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
2170       ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
2171       for (i = 0; i < 3 ; i++) {
2172         for(j = i+1; j<4; j++) {
2173           log_verbose("k:%d, i:%d, j:%d\n", k, i, j);
2174           doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
2175           iter=ucol_openElements(myCollation, testCase[i], u_strlen(testCase[i]), &status);
2176           backAndForth(iter);
2177           ucol_closeElements(iter);
2178           iter=ucol_openElements(myCollation, testCase[j], u_strlen(testCase[j]), &status);
2179           backAndForth(iter);
2180           ucol_closeElements(iter);
2181         }
2182       }
2183     }
2184     ucol_close(myCollation);
2185     {
2186       const static char *lowerFirst[] = {
2187         "h",
2188         "H",
2189         "ch",
2190         "Ch",
2191         "CH",
2192         "cha",
2193         "chA",
2194         "Cha",
2195         "ChA",
2196         "CHa",
2197         "CHA",
2198         "i",
2199         "I"
2200       };
2201
2202       const static char *upperFirst[] = {
2203         "H",
2204         "h",
2205         "CH",
2206         "Ch",
2207         "ch",
2208         "CHA",
2209         "CHa",
2210         "ChA",
2211         "Cha",
2212         "chA",
2213         "cha",
2214         "I",
2215         "i"
2216       };
2217       log_verbose("mixed case test\n");
2218       log_verbose("lower first, case level off\n");
2219       genericRulesStarter("[casefirst lower]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
2220       log_verbose("upper first, case level off\n");
2221       genericRulesStarter("[casefirst upper]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
2222       log_verbose("lower first, case level on\n");
2223       genericRulesStarter("[casefirst lower][caselevel on]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
2224       log_verbose("upper first, case level on\n");
2225       genericRulesStarter("[casefirst upper][caselevel on]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
2226     }
2227
2228 }
2229
2230 static void TestIncrementalNormalize(void) {
2231
2232     /*UChar baseA     =0x61;*/
2233     UChar baseA     =0x41;
2234 /*    UChar baseB     = 0x42;*/
2235     static const UChar ccMix[]   = {0x316, 0x321, 0x300};
2236     /*UChar ccMix[]   = {0x61, 0x61, 0x61};*/
2237     /*
2238         0x316 is combining grave accent below, cc=220
2239         0x321 is combining palatalized hook below, cc=202
2240         0x300 is combining grave accent, cc=230
2241     */
2242
2243 #define MAXSLEN 2000
2244     /*int          maxSLen   = 64000;*/
2245     int          sLen;
2246     int          i;
2247
2248     UCollator        *coll;
2249     UErrorCode       status = U_ZERO_ERROR;
2250     UCollationResult result;
2251
2252     int32_t myQ = getTestOption(QUICK_OPTION);
2253
2254     if(getTestOption(QUICK_OPTION) < 0) {
2255         setTestOption(QUICK_OPTION, 1);
2256     }
2257
2258     {
2259         /* Test 1.  Run very long unnormalized strings, to force overflow of*/
2260         /*          most buffers along the way.*/
2261         UChar            strA[MAXSLEN+1];
2262         UChar            strB[MAXSLEN+1];
2263
2264         coll = ucol_open("en_US", &status);
2265         if(status == U_FILE_ACCESS_ERROR) {
2266           log_data_err("Is your data around?\n");
2267           return;
2268         } else if(U_FAILURE(status)) {
2269           log_err("Error opening collator\n");
2270           return;
2271         }
2272         ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
2273
2274         /*for (sLen = 257; sLen<MAXSLEN; sLen++) {*/
2275         /*for (sLen = 4; sLen<MAXSLEN; sLen++) {*/
2276         /*for (sLen = 1000; sLen<1001; sLen++) {*/
2277         for (sLen = 500; sLen<501; sLen++) {
2278         /*for (sLen = 40000; sLen<65000; sLen+=1000) {*/
2279             strA[0] = baseA;
2280             strB[0] = baseA;
2281             for (i=1; i<=sLen-1; i++) {
2282                 strA[i] = ccMix[i % 3];
2283                 strB[sLen-i] = ccMix[i % 3];
2284             }
2285             strA[sLen]   = 0;
2286             strB[sLen]   = 0;
2287
2288             ucol_setStrength(coll, UCOL_TERTIARY);   /* Do test with default strength, which runs*/
2289             doTest(coll, strA, strB, UCOL_EQUAL);    /*   optimized functions in the impl*/
2290             ucol_setStrength(coll, UCOL_IDENTICAL);   /* Do again with the slow, general impl.*/
2291             doTest(coll, strA, strB, UCOL_EQUAL);
2292         }
2293     }
2294
2295     setTestOption(QUICK_OPTION, myQ);
2296
2297
2298     /*  Test 2:  Non-normal sequence in a string that extends to the last character*/
2299     /*         of the string.  Checks a couple of edge cases.*/
2300
2301     {
2302         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0};
2303         static const UChar strB[] = {0x41, 0xc0, 0x316, 0};
2304         ucol_setStrength(coll, UCOL_TERTIARY);
2305         doTest(coll, strA, strB, UCOL_EQUAL);
2306     }
2307
2308     /*  Test 3:  Non-normal sequence is terminated by a surrogate pair.*/
2309
2310     {
2311       /* New UCA  3.1.1.
2312        * test below used a code point from Desseret, which sorts differently
2313        * than d800 dc00
2314        */
2315         /*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/
2316         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0};
2317         static const UChar strB[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0};
2318         ucol_setStrength(coll, UCOL_TERTIARY);
2319         doTest(coll, strA, strB, UCOL_GREATER);
2320     }
2321
2322     /*  Test 4:  Imbedded nulls do not terminate a string when length is specified.*/
2323
2324     {
2325         static const UChar strA[] = {0x41, 0x00, 0x42, 0x00};
2326         static const UChar strB[] = {0x41, 0x00, 0x00, 0x00};
2327         char  sortKeyA[50];
2328         char  sortKeyAz[50];
2329         char  sortKeyB[50];
2330         char  sortKeyBz[50];
2331         int   r;
2332
2333         /* there used to be -3 here. Hmmmm.... */
2334         /*result = ucol_strcoll(coll, strA, -3, strB, -3);*/
2335         result = ucol_strcoll(coll, strA, 3, strB, 3);
2336         if (result != UCOL_GREATER) {
2337             log_err("ERROR 1 in test 4\n");
2338         }
2339         result = ucol_strcoll(coll, strA, -1, strB, -1);
2340         if (result != UCOL_EQUAL) {
2341             log_err("ERROR 2 in test 4\n");
2342         }
2343
2344         ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2345         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2346         ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2347         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2348
2349         r = strcmp(sortKeyA, sortKeyAz);
2350         if (r <= 0) {
2351             log_err("Error 3 in test 4\n");
2352         }
2353         r = strcmp(sortKeyA, sortKeyB);
2354         if (r <= 0) {
2355             log_err("Error 4 in test 4\n");
2356         }
2357         r = strcmp(sortKeyAz, sortKeyBz);
2358         if (r != 0) {
2359             log_err("Error 5 in test 4\n");
2360         }
2361
2362         ucol_setStrength(coll, UCOL_IDENTICAL);
2363         ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2364         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2365         ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2366         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2367
2368         r = strcmp(sortKeyA, sortKeyAz);
2369         if (r <= 0) {
2370             log_err("Error 6 in test 4\n");
2371         }
2372         r = strcmp(sortKeyA, sortKeyB);
2373         if (r <= 0) {
2374             log_err("Error 7 in test 4\n");
2375         }
2376         r = strcmp(sortKeyAz, sortKeyBz);
2377         if (r != 0) {
2378             log_err("Error 8 in test 4\n");
2379         }
2380         ucol_setStrength(coll, UCOL_TERTIARY);
2381     }
2382
2383
2384     /*  Test 5:  Null characters in non-normal source strings.*/
2385
2386     {
2387         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00};
2388         static const UChar strB[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00};
2389         char  sortKeyA[50];
2390         char  sortKeyAz[50];
2391         char  sortKeyB[50];
2392         char  sortKeyBz[50];
2393         int   r;
2394
2395         result = ucol_strcoll(coll, strA, 6, strB, 6);
2396         if (result != UCOL_GREATER) {
2397             log_err("ERROR 1 in test 5\n");
2398         }
2399         result = ucol_strcoll(coll, strA, -1, strB, -1);
2400         if (result != UCOL_EQUAL) {
2401             log_err("ERROR 2 in test 5\n");
2402         }
2403
2404         ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2405         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2406         ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2407         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2408
2409         r = strcmp(sortKeyA, sortKeyAz);
2410         if (r <= 0) {
2411             log_err("Error 3 in test 5\n");
2412         }
2413         r = strcmp(sortKeyA, sortKeyB);
2414         if (r <= 0) {
2415             log_err("Error 4 in test 5\n");
2416         }
2417         r = strcmp(sortKeyAz, sortKeyBz);
2418         if (r != 0) {
2419             log_err("Error 5 in test 5\n");
2420         }
2421
2422         ucol_setStrength(coll, UCOL_IDENTICAL);
2423         ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2424         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2425         ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2426         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2427
2428         r = strcmp(sortKeyA, sortKeyAz);
2429         if (r <= 0) {
2430             log_err("Error 6 in test 5\n");
2431         }
2432         r = strcmp(sortKeyA, sortKeyB);
2433         if (r <= 0) {
2434             log_err("Error 7 in test 5\n");
2435         }
2436         r = strcmp(sortKeyAz, sortKeyBz);
2437         if (r != 0) {
2438             log_err("Error 8 in test 5\n");
2439         }
2440         ucol_setStrength(coll, UCOL_TERTIARY);
2441     }
2442
2443
2444     /*  Test 6:  Null character as base of a non-normal combining sequence.*/
2445
2446     {
2447         static const UChar strA[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00};
2448         static const UChar strB[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00};
2449
2450         result = ucol_strcoll(coll, strA, 5, strB, 5);
2451         if (result != UCOL_LESS) {
2452             log_err("Error 1 in test 6\n");
2453         }
2454         result = ucol_strcoll(coll, strA, -1, strB, -1);
2455         if (result != UCOL_EQUAL) {
2456             log_err("Error 2 in test 6\n");
2457         }
2458     }
2459
2460     ucol_close(coll);
2461 }
2462
2463
2464
2465 #if 0
2466 static void TestGetCaseBit(void) {
2467   static const char *caseBitData[] = {
2468     "a", "A", "ch", "Ch", "CH",
2469       "\\uFF9E", "\\u0009"
2470   };
2471
2472   static const uint8_t results[] = {
2473     UCOL_LOWER_CASE, UCOL_UPPER_CASE, UCOL_LOWER_CASE, UCOL_MIXED_CASE, UCOL_UPPER_CASE,
2474       UCOL_UPPER_CASE, UCOL_LOWER_CASE
2475   };
2476
2477   uint32_t i, blen = 0;
2478   UChar b[256] = {0};
2479   UErrorCode status = U_ZERO_ERROR;
2480   UCollator *UCA = ucol_open("", &status);
2481   uint8_t res = 0;
2482
2483   for(i = 0; i<sizeof(results)/sizeof(results[0]); i++) {
2484     blen = u_unescape(caseBitData[i], b, 256);
2485     res = ucol_uprv_getCaseBits(UCA, b, blen, &status);
2486     if(results[i] != res) {
2487       log_err("Expected case = %02X, got %02X for %04X\n", results[i], res, b[0]);
2488     }
2489   }
2490 }
2491 #endif
2492
2493 static void TestHangulTailoring(void) {
2494     static const char *koreanData[] = {
2495         "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53ef", "\\u5475",
2496             "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\\u67b7", "\\u67ef",
2497             "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\\u8857", "\\u8888",
2498             "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5",
2499             "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\\u659D", "\\u698E",
2500             "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\\u8B0C"
2501     };
2502
2503     const char *rules =
2504         "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 <<< \\u53ef <<< \\u5475 "
2505         "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef "
2506         "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 "
2507         "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 "
2508         "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E "
2509         "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C";
2510
2511
2512   UErrorCode status = U_ZERO_ERROR;
2513   UChar rlz[2048] = { 0 };
2514   uint32_t rlen = u_unescape(rules, rlz, 2048);
2515
2516   UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
2517   if(status == U_FILE_ACCESS_ERROR) {
2518     log_data_err("Is your data around?\n");
2519     return;
2520   } else if(U_FAILURE(status)) {
2521     log_err("Error opening collator\n");
2522     return;
2523   }
2524
2525   log_verbose("Using start of korean rules\n");
2526
2527   if(U_SUCCESS(status)) {
2528     genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
2529   } else {
2530     log_err("Unable to open collator with rules %s\n", rules);
2531   }
2532
2533   log_verbose("Setting jamoSpecial to TRUE and testing once more\n");
2534   ((UCATableHeader *)coll->image)->jamoSpecial = TRUE; /* don't try this at home  */
2535   genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
2536
2537   ucol_close(coll);
2538
2539   log_verbose("Using ko__LOTUS locale\n");
2540   genericLocaleStarter("ko__LOTUS", koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
2541 }
2542
2543 static void TestCompressOverlap(void) {
2544     UChar       secstr[150];
2545     UChar       tertstr[150];
2546     UErrorCode  status = U_ZERO_ERROR;
2547     UCollator  *coll;
2548     char        result[200];
2549     uint32_t    resultlen;
2550     int         count = 0;
2551     char       *tempptr;
2552
2553     coll = ucol_open("", &status);
2554
2555     if (U_FAILURE(status)) {
2556         log_err_status(status, "Collator can't be created -> %s\n", u_errorName(status));
2557         return;
2558     }
2559     while (count < 149) {
2560         secstr[count] = 0x0020; /* [06, 05, 05] */
2561         tertstr[count] = 0x0020;
2562         count ++;
2563     }
2564
2565     /* top down compression ----------------------------------- */
2566     secstr[count] = 0x0332; /* [, 87, 05] */
2567     tertstr[count] = 0x3000; /* [06, 05, 07] */
2568
2569     /* no compression secstr should have 150 secondary bytes, tertstr should
2570     have 150 tertiary bytes.
2571     with correct overlapping compression, secstr should have 4 secondary
2572     bytes, tertstr should have > 2 tertiary bytes */
2573     resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250);
2574     tempptr = uprv_strchr(result, 1) + 1;
2575     while (*(tempptr + 1) != 1) {
2576         /* the last secondary collation element is not checked since it is not
2577         part of the compression */
2578         if (*tempptr < UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2) {
2579             log_err("Secondary compression overlapped\n");
2580         }
2581         tempptr ++;
2582     }
2583
2584     /* tertiary top/bottom/common for en_US is similar to the secondary
2585     top/bottom/common */
2586     resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250);
2587     tempptr = uprv_strrchr(result, 1) + 1;
2588     while (*(tempptr + 1) != 0) {
2589         /* the last secondary collation element is not checked since it is not
2590         part of the compression */
2591         if (*tempptr < coll->tertiaryTop - coll->tertiaryTopCount) {
2592             log_err("Tertiary compression overlapped\n");
2593         }
2594         tempptr ++;
2595     }
2596
2597     /* bottom up compression ------------------------------------- */
2598     secstr[count] = 0;
2599     tertstr[count] = 0;
2600     resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250);
2601     tempptr = uprv_strchr(result, 1) + 1;
2602     while (*(tempptr + 1) != 1) {
2603         /* the last secondary collation element is not checked since it is not
2604         part of the compression */
2605         if (*tempptr > UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2) {
2606             log_err("Secondary compression overlapped\n");
2607         }
2608         tempptr ++;
2609     }
2610
2611     /* tertiary top/bottom/common for en_US is similar to the secondary
2612     top/bottom/common */
2613     resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250);
2614     tempptr = uprv_strrchr(result, 1) + 1;
2615     while (*(tempptr + 1) != 0) {
2616         /* the last secondary collation element is not checked since it is not
2617         part of the compression */
2618         if (*tempptr > coll->tertiaryBottom + coll->tertiaryBottomCount) {
2619             log_err("Tertiary compression overlapped\n");
2620         }
2621         tempptr ++;
2622     }
2623
2624     ucol_close(coll);
2625 }
2626
2627 static void TestCyrillicTailoring(void) {
2628   static const char *test[] = {
2629     "\\u0410b",
2630       "\\u0410\\u0306a",
2631       "\\u04d0A"
2632   };
2633
2634     /* Russian overrides contractions, so this test is not valid anymore */
2635     /*genericLocaleStarter("ru", test, 3);*/
2636
2637     genericLocaleStarter("root", test, 3);
2638     genericRulesStarter("&\\u0410 = \\u0410", test, 3);
2639     genericRulesStarter("&Z < \\u0410", test, 3);
2640     genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test, 3);
2641     genericRulesStarter("&Z < \\u0410 < \\u04d0", test, 3);
2642     genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test, 3);
2643     genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test, 3);
2644 }
2645
2646 static void TestSuppressContractions(void) {
2647
2648   static const char *testNoCont2[] = {
2649       "\\u0410\\u0302a",
2650       "\\u0410\\u0306b",
2651       "\\u0410c"
2652   };
2653   static const char *testNoCont[] = {
2654       "a\\u0410",
2655       "A\\u0410\\u0306",
2656       "\\uFF21\\u0410\\u0302"
2657   };
2658
2659   genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont, 3);
2660   genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2, 3);
2661 }
2662
2663 static void TestContraction(void) {
2664     const static char *testrules[] = {
2665         "&A = AB / B",
2666         "&A = A\\u0306/\\u0306",
2667         "&c = ch / h"
2668     };
2669     const static UChar testdata[][2] = {
2670         {0x0041 /* 'A' */, 0x0042 /* 'B' */},
2671         {0x0041 /* 'A' */, 0x0306 /* combining breve */},
2672         {0x0063 /* 'c' */, 0x0068 /* 'h' */}
2673     };
2674     const static UChar testdata2[][2] = {
2675         {0x0063 /* 'c' */, 0x0067 /* 'g' */},
2676         {0x0063 /* 'c' */, 0x0068 /* 'h' */},
2677         {0x0063 /* 'c' */, 0x006C /* 'l' */}
2678     };
2679     const static char *testrules3[] = {
2680         "&z < xyz &xyzw << B",
2681         "&z < xyz &xyz << B / w",
2682         "&z < ch &achm << B",
2683         "&z < ch &a << B / chm",
2684         "&\\ud800\\udc00w << B",
2685         "&\\ud800\\udc00 << B / w",
2686         "&a\\ud800\\udc00m << B",
2687         "&a << B / \\ud800\\udc00m",
2688     };
2689
2690     UErrorCode  status   = U_ZERO_ERROR;
2691     UCollator  *coll;
2692     UChar       rule[256] = {0};
2693     uint32_t    rlen     = 0;
2694     int         i;
2695
2696     for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
2697         UCollationElements *iter1;
2698         int j = 0;
2699         log_verbose("Rule %s for testing\n", testrules[i]);
2700         rlen = u_unescape(testrules[i], rule, 32);
2701         coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2702         if (U_FAILURE(status)) {
2703             log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
2704             return;
2705         }
2706         iter1 = ucol_openElements(coll, testdata[i], 2, &status);
2707         if (U_FAILURE(status)) {
2708             log_err("Collation iterator creation failed\n");
2709             return;
2710         }
2711         while (j < 2) {
2712             UCollationElements *iter2 = ucol_openElements(coll,
2713                                                          &(testdata[i][j]),
2714                                                          1, &status);
2715             uint32_t ce;
2716             if (U_FAILURE(status)) {
2717                 log_err("Collation iterator creation failed\n");
2718                 return;
2719             }
2720             ce = ucol_next(iter2, &status);
2721             while (ce != UCOL_NULLORDER) {
2722                 if ((uint32_t)ucol_next(iter1, &status) != ce) {
2723                     log_err("Collation elements in contraction split does not match\n");
2724                     return;
2725                 }
2726                 ce = ucol_next(iter2, &status);
2727             }
2728             j ++;
2729             ucol_closeElements(iter2);
2730         }
2731         if (ucol_next(iter1, &status) != UCOL_NULLORDER) {
2732             log_err("Collation elements not exhausted\n");
2733             return;
2734         }
2735         ucol_closeElements(iter1);
2736         ucol_close(coll);
2737     }
2738
2739     rlen = u_unescape("& a < b < c < ch < d & c = ch / h", rule, 256);
2740     coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2741     if (ucol_strcoll(coll, testdata2[0], 2, testdata2[1], 2) != UCOL_LESS) {
2742         log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
2743                 testdata2[0][0], testdata2[0][1], testdata2[1][0],
2744                 testdata2[1][1]);
2745         return;
2746     }
2747     if (ucol_strcoll(coll, testdata2[1], 2, testdata2[2], 2) != UCOL_LESS) {
2748         log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
2749                 testdata2[1][0], testdata2[1][1], testdata2[2][0],
2750                 testdata2[2][1]);
2751         return;
2752     }
2753     ucol_close(coll);
2754
2755     for (i = 0; i < sizeof(testrules3) / sizeof(testrules3[0]); i += 2) {
2756         UCollator          *coll1,
2757                            *coll2;
2758         UCollationElements *iter1,
2759                            *iter2;
2760         UChar               ch = 0x0042 /* 'B' */;
2761         uint32_t            ce;
2762         rlen = u_unescape(testrules3[i], rule, 32);
2763         coll1 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2764         rlen = u_unescape(testrules3[i + 1], rule, 32);
2765         coll2 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2766         if (U_FAILURE(status)) {
2767             log_err("Collator creation failed %s\n", testrules[i]);
2768             return;
2769         }
2770         iter1 = ucol_openElements(coll1, &ch, 1, &status);
2771         iter2 = ucol_openElements(coll2, &ch, 1, &status);
2772         if (U_FAILURE(status)) {
2773             log_err("Collation iterator creation failed\n");
2774             return;
2775         }
2776         ce = ucol_next(iter1, &status);
2777         if (U_FAILURE(status)) {
2778             log_err("Retrieving ces failed\n");
2779             return;
2780         }
2781         while (ce != UCOL_NULLORDER) {
2782             if (ce != (uint32_t)ucol_next(iter2, &status)) {
2783                 log_err("CEs does not match\n");
2784                 return;
2785             }
2786             ce = ucol_next(iter1, &status);
2787             if (U_FAILURE(status)) {
2788                 log_err("Retrieving ces failed\n");
2789                 return;
2790             }
2791         }
2792         if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
2793             log_err("CEs not exhausted\n");
2794             return;
2795         }
2796         ucol_closeElements(iter1);
2797         ucol_closeElements(iter2);
2798         ucol_close(coll1);
2799         ucol_close(coll2);
2800     }
2801 }
2802
2803 static void TestExpansion(void) {
2804     const static char *testrules[] = {
2805         "&J << K / B & K << M",
2806         "&J << K / B << M"
2807     };
2808     const static UChar testdata[][3] = {
2809         {0x004A /*'J'*/, 0x0041 /*'A'*/, 0},
2810         {0x004D /*'M'*/, 0x0041 /*'A'*/, 0},
2811         {0x004B /*'K'*/, 0x0041 /*'A'*/, 0},
2812         {0x004B /*'K'*/, 0x0043 /*'C'*/, 0},
2813         {0x004A /*'J'*/, 0x0043 /*'C'*/, 0},
2814         {0x004D /*'M'*/, 0x0043 /*'C'*/, 0}
2815     };
2816
2817     UErrorCode  status   = U_ZERO_ERROR;
2818     UCollator  *coll;
2819     UChar       rule[256] = {0};
2820     uint32_t    rlen     = 0;
2821     int         i;
2822
2823     for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
2824         int j = 0;
2825         log_verbose("Rule %s for testing\n", testrules[i]);
2826         rlen = u_unescape(testrules[i], rule, 32);
2827         coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2828         if (U_FAILURE(status)) {
2829             log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
2830             return;
2831         }
2832
2833         for (j = 0; j < 5; j ++) {
2834             doTest(coll, testdata[j], testdata[j + 1], UCOL_LESS);
2835         }
2836         ucol_close(coll);
2837     }
2838 }
2839
2840 #if 0
2841 /* this test tests the current limitations of the engine */
2842 /* it always fail, so it is disabled by default */
2843 static void TestLimitations(void) {
2844   /* recursive expansions */
2845   {
2846     static const char *rule = "&a=b/c&d=c/e";
2847     static const char *tlimit01[] = {"add","b","adf"};
2848     static const char *tlimit02[] = {"aa","b","af"};
2849     log_verbose("recursive expansions\n");
2850     genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
2851     genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
2852   }
2853   /* contractions spanning expansions */
2854   {
2855     static const char *rule = "&a<<<c/e&g<<<eh";
2856     static const char *tlimit01[] = {"ad","c","af","f","ch","h"};
2857     static const char *tlimit02[] = {"ad","c","ch","af","f","h"};
2858     log_verbose("contractions spanning expansions\n");
2859     genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
2860     genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
2861   }
2862   /* normalization: nulls in contractions */
2863   {
2864     static const char *rule = "&a<<<\\u0000\\u0302";
2865     static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
2866     static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
2867     static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
2868     static const UColAttributeValue valOn[] = { UCOL_ON };
2869     static const UColAttributeValue valOff[] = { UCOL_OFF };
2870
2871     log_verbose("NULL in contractions\n");
2872     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
2873     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
2874     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
2875     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
2876
2877   }
2878   /* normalization: contractions spanning normalization */
2879   {
2880     static const char *rule = "&a<<<\\u0000\\u0302";
2881     static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
2882     static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
2883     static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
2884     static const UColAttributeValue valOn[] = { UCOL_ON };
2885     static const UColAttributeValue valOff[] = { UCOL_OFF };
2886
2887     log_verbose("contractions spanning normalization\n");
2888     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
2889     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
2890     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
2891     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
2892
2893   }
2894   /* variable top:  */
2895   {
2896     /*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/
2897     static const char *rule = "&\\u2010<x<[variable top]=z";
2898     /*static const char *rule3 = "&' '<x<[variable top]=z";*/
2899     static const char *tlimit01[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" };
2900     static const char *tlimit02[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"};
2901     static const char *tlimit03[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" };
2902     static const UColAttribute att[] = { UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH };
2903     static const UColAttributeValue valOn[] = { UCOL_SHIFTED, UCOL_QUATERNARY };
2904     static const UColAttributeValue valOff[] = { UCOL_NON_IGNORABLE, UCOL_TERTIARY };
2905
2906     log_verbose("variable top\n");
2907     genericRulesStarterWithOptions(rule, tlimit03, sizeof(tlimit03)/sizeof(tlimit03[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2908     genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2909     genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2910     genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));
2911     genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));
2912
2913   }
2914   /* case level */
2915   {
2916     static const char *rule = "&c<ch<<<cH<<<Ch<<<CH";
2917     static const char *tlimit01[] = {"c","CH","Ch","cH","ch"};
2918     static const char *tlimit02[] = {"c","CH","cH","Ch","ch"};
2919     static const UColAttribute att[] = { UCOL_CASE_FIRST};
2920     static const UColAttributeValue valOn[] = { UCOL_UPPER_FIRST};
2921     /*static const UColAttributeValue valOff[] = { UCOL_OFF};*/
2922     log_verbose("case level\n");
2923     genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2924     genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2925     /*genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
2926     /*genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
2927   }
2928
2929 }
2930 #endif
2931
2932 static void TestBocsuCoverage(void) {
2933   UErrorCode status = U_ZERO_ERROR;
2934   const char *testString = "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u0041";
2935   UChar       test[256] = {0};
2936   uint32_t    tlen     = u_unescape(testString, test, 32);
2937   uint8_t key[256]     = {0};
2938   uint32_t klen         = 0;
2939
2940   UCollator *coll = ucol_open("", &status);
2941   if(U_SUCCESS(status)) {
2942   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
2943
2944   klen = ucol_getSortKey(coll, test, tlen, key, 256);
2945
2946   ucol_close(coll);
2947   } else {
2948     log_data_err("Couldn't open UCA\n");
2949   }
2950 }
2951
2952 static void TestVariableTopSetting(void) {
2953   UErrorCode status = U_ZERO_ERROR;
2954   const UChar *current = NULL;
2955   uint32_t varTopOriginal = 0, varTop1, varTop2;
2956   UCollator *coll = ucol_open("", &status);
2957   if(U_SUCCESS(status)) {
2958
2959   uint32_t strength = 0;
2960   uint16_t specs = 0;
2961   uint32_t chOffset = 0;
2962   uint32_t chLen = 0;
2963   uint32_t exOffset = 0;
2964   uint32_t exLen = 0;
2965   uint32_t oldChOffset = 0;
2966   uint32_t oldChLen = 0;
2967   uint32_t oldExOffset = 0;
2968   uint32_t oldExLen = 0;
2969   uint32_t prefixOffset = 0;
2970   uint32_t prefixLen = 0;
2971
2972   UBool startOfRules = TRUE;
2973   UColTokenParser src;
2974   UColOptionSet opts;
2975
2976   UChar *rulesCopy = NULL;
2977   uint32_t rulesLen;
2978
2979   UCollationResult result;
2980
2981   UChar first[256] = { 0 };
2982   UChar second[256] = { 0 };
2983   UParseError parseError;
2984   int32_t myQ = getTestOption(QUICK_OPTION);
2985
2986   uprv_memset(&src, 0, sizeof(UColTokenParser));
2987
2988   src.opts = &opts;
2989
2990   if(getTestOption(QUICK_OPTION) <= 0) {
2991     setTestOption(QUICK_OPTION, 1);
2992   }
2993
2994   /* this test will fail when normalization is turned on */
2995   /* therefore we always turn off exhaustive mode for it */
2996   { /* QUICK > 0*/
2997     log_verbose("Slide variable top over UCARules\n");
2998     rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, 0);
2999     rulesCopy = (UChar *)uprv_malloc((rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
3000     rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE);
3001
3002     if(U_SUCCESS(status) && rulesLen > 0) {
3003       ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
3004       src.current = src.source = rulesCopy;
3005       src.end = rulesCopy+rulesLen;
3006       src.extraCurrent = src.end;
3007       src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
3008
3009           /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
3010            the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
3011       while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,&status)) != NULL) {
3012         strength = src.parsedToken.strength;
3013         chOffset = src.parsedToken.charsOffset;
3014         chLen = src.parsedToken.charsLen;
3015         exOffset = src.parsedToken.extensionOffset;
3016         exLen = src.parsedToken.extensionLen;
3017         prefixOffset = src.parsedToken.prefixOffset;
3018         prefixLen = src.parsedToken.prefixLen;
3019         specs = src.parsedToken.flags;
3020
3021         startOfRules = FALSE;
3022         {
3023           log_verbose("%04X %d ", *(src.source+chOffset), chLen);
3024         }
3025         if(strength == UCOL_PRIMARY) {
3026           status = U_ZERO_ERROR;
3027           varTopOriginal = ucol_getVariableTop(coll, &status);
3028           varTop1 = ucol_setVariableTop(coll, src.source+oldChOffset, oldChLen, &status);
3029           if(U_FAILURE(status)) {
3030             char buffer[256];
3031             char *buf = buffer;
3032             uint32_t i = 0, j;
3033             uint32_t CE = UCOL_NO_MORE_CES;
3034
3035             /* before we start screaming, let's see if there is a problem with the rules */
3036             UErrorCode collIterateStatus = U_ZERO_ERROR;
3037             collIterate *s = uprv_new_collIterate(&collIterateStatus);
3038             uprv_init_collIterate(coll, src.source+oldChOffset, oldChLen, s, &collIterateStatus);
3039
3040             CE = ucol_getNextCE(coll, s, &status);
3041
3042             for(i = 0; i < oldChLen; i++) {
3043               j = sprintf(buf, "%04X ", *(src.source+oldChOffset+i));
3044               buf += j;
3045             }
3046             if(status == U_PRIMARY_TOO_LONG_ERROR) {
3047               log_verbose("= Expected failure for %s =", buffer);
3048             } else {
3049               if(uprv_collIterateAtEnd(s)) {
3050                 log_err("Unexpected failure setting variable top at offset %d. Error %s. Codepoints: %s\n",
3051                   oldChOffset, u_errorName(status), buffer);
3052               } else {
3053                 log_verbose("There is a goofy contraction in UCA rules that does not appear in the fractional UCA. Codepoints: %s\n",
3054                   buffer);
3055               }
3056             }
3057             uprv_delete_collIterate(s);
3058           }
3059           varTop2 = ucol_getVariableTop(coll, &status);
3060           if((varTop1 & 0xFFFF0000) != (varTop2 & 0xFFFF0000)) {
3061             log_err("cannot retrieve set varTop value!\n");
3062             continue;
3063           }
3064
3065           if((varTop1 & 0xFFFF0000) > 0 && oldExLen == 0) {
3066
3067             u_strncpy(first, src.source+oldChOffset, oldChLen);
3068             u_strncpy(first+oldChLen, src.source+chOffset, chLen);
3069             u_strncpy(first+oldChLen+chLen, src.source+oldChOffset, oldChLen);
3070             first[2*oldChLen+chLen] = 0;
3071
3072             if(oldExLen == 0) {
3073               u_strncpy(second, src.source+chOffset, chLen);
3074               second[chLen] = 0;
3075             } else { /* This is skipped momentarily, but should work once UCARules are fully UCA conformant */
3076               u_strncpy(second, src.source+oldExOffset, oldExLen);
3077               u_strncpy(second+oldChLen, src.source+chOffset, chLen);
3078               u_strncpy(second+oldChLen+chLen, src.source+oldExOffset, oldExLen);
3079               second[2*oldExLen+chLen] = 0;
3080             }
3081             result = ucol_strcoll(coll, first, -1, second, -1);
3082             if(result == UCOL_EQUAL) {
3083               doTest(coll, first, second, UCOL_EQUAL);
3084             } else {
3085               log_verbose("Suspicious strcoll result for %04X and %04X\n", *(src.source+oldChOffset), *(src.source+chOffset));
3086             }
3087           }
3088         }
3089         if(strength != UCOL_TOK_RESET) {
3090           oldChOffset = chOffset;
3091           oldChLen = chLen;
3092           oldExOffset = exOffset;
3093           oldExLen = exLen;
3094         }
3095       }
3096       status = U_ZERO_ERROR;
3097     }
3098     else {
3099       log_err("Unexpected failure getting rules %s\n", u_errorName(status));
3100       return;
3101     }
3102     if (U_FAILURE(status)) {
3103         log_err("Error parsing rules %s\n", u_errorName(status));
3104         return;
3105     }
3106     status = U_ZERO_ERROR;
3107   }
3108
3109   setTestOption(QUICK_OPTION, myQ);
3110
3111   log_verbose("Testing setting variable top to contractions\n");
3112   {
3113     /* uint32_t tailoredCE = UCOL_NOT_FOUND; */
3114     /*UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->UCAConsts+sizeof(UCAConstants));*/
3115     UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->contractionUCACombos);
3116     while(*conts != 0) {
3117       if((*(conts+2) == 0) || (*(conts+1)==0)) { /* contracts or pre-context contractions */
3118         varTop1 = ucol_setVariableTop(coll, conts, -1, &status);
3119       } else {
3120         varTop1 = ucol_setVariableTop(coll, conts, 3, &status);
3121       }
3122       if(U_FAILURE(status)) {
3123         if(status == U_PRIMARY_TOO_LONG_ERROR) {
3124           /* ucol_setVariableTop() is documented to not accept 3-byte primaries,
3125            * therefore it is not an error when it complains about them. */
3126           log_verbose("Couldn't set variable top to a contraction %04X %04X %04X - U_PRIMARY_TOO_LONG_ERROR\n",
3127                       *conts, *(conts+1), *(conts+2));
3128         } else {
3129           log_err("Couldn't set variable top to a contraction %04X %04X %04X - %s\n",
3130                   *conts, *(conts+1), *(conts+2), u_errorName(status));
3131         }
3132         status = U_ZERO_ERROR;
3133       }
3134       conts+=3;
3135     }
3136
3137     status = U_ZERO_ERROR;
3138
3139     first[0] = 0x0040;
3140     first[1] = 0x0050;
3141     first[2] = 0x0000;
3142
3143     ucol_setVariableTop(coll, first, -1, &status);
3144
3145     if(U_SUCCESS(status)) {
3146       log_err("Invalid contraction succeded in setting variable top!\n");
3147     }
3148
3149   }
3150
3151   log_verbose("Test restoring variable top\n");
3152
3153   status = U_ZERO_ERROR;
3154   ucol_restoreVariableTop(coll, varTopOriginal, &status);
3155   if(varTopOriginal != ucol_getVariableTop(coll, &status)) {
3156     log_err("Couldn't restore old variable top\n");
3157   }
3158
3159   log_verbose("Testing calling with error set\n");
3160
3161   status = U_INTERNAL_PROGRAM_ERROR;
3162   varTop1 = ucol_setVariableTop(coll, first, 1, &status);
3163   varTop2 = ucol_getVariableTop(coll, &status);
3164   ucol_restoreVariableTop(coll, varTop2, &status);
3165   varTop1 = ucol_setVariableTop(NULL, first, 1, &status);
3166   varTop2 = ucol_getVariableTop(NULL, &status);
3167   ucol_restoreVariableTop(NULL, varTop2, &status);
3168   if(status != U_INTERNAL_PROGRAM_ERROR) {
3169     log_err("Bad reaction to passed error!\n");
3170   }
3171   uprv_free(src.source);
3172   ucol_close(coll);
3173   } else {
3174     log_data_err("Couldn't open UCA collator\n");
3175   }
3176
3177 }
3178
3179 static void TestNonChars(void) {
3180   static const char *test[] = {
3181       "\\u0000",  /* ignorable */
3182       "\\uFFFE",  /* special merge-sort character with minimum non-ignorable weights */
3183       "\\uFDD0", "\\uFDEF",
3184       "\\U0001FFFE", "\\U0001FFFF",  /* UCA 6.0: noncharacters are treated like unassigned, */
3185       "\\U0002FFFE", "\\U0002FFFF",  /* not like ignorable. */
3186       "\\U0003FFFE", "\\U0003FFFF",
3187       "\\U0004FFFE", "\\U0004FFFF",
3188       "\\U0005FFFE", "\\U0005FFFF",
3189       "\\U0006FFFE", "\\U0006FFFF",
3190       "\\U0007FFFE", "\\U0007FFFF",
3191       "\\U0008FFFE", "\\U0008FFFF",
3192       "\\U0009FFFE", "\\U0009FFFF",
3193       "\\U000AFFFE", "\\U000AFFFF",
3194       "\\U000BFFFE", "\\U000BFFFF",
3195       "\\U000CFFFE", "\\U000CFFFF",
3196       "\\U000DFFFE", "\\U000DFFFF",
3197       "\\U000EFFFE", "\\U000EFFFF",
3198       "\\U000FFFFE", "\\U000FFFFF",
3199       "\\U0010FFFE", "\\U0010FFFF",
3200       "\\uFFFF"  /* special character with maximum primary weight */
3201   };
3202   UErrorCode status = U_ZERO_ERROR;
3203   UCollator *coll = ucol_open("en_US", &status);
3204
3205   log_verbose("Test non characters\n");
3206
3207   if(U_SUCCESS(status)) {
3208     genericOrderingTestWithResult(coll, test, 35, UCOL_LESS);
3209   } else {
3210     log_err_status(status, "Unable to open collator\n");
3211   }
3212
3213   ucol_close(coll);
3214 }
3215
3216 static void TestExtremeCompression(void) {
3217   static char *test[4];
3218   int32_t j = 0, i = 0;
3219
3220   for(i = 0; i<4; i++) {
3221     test[i] = (char *)malloc(2048*sizeof(char));
3222   }
3223
3224   for(j = 20; j < 500; j++) {
3225     for(i = 0; i<4; i++) {
3226       uprv_memset(test[i], 'a', (j-1)*sizeof(char));
3227       test[i][j-1] = (char)('a'+i);
3228       test[i][j] = 0;
3229     }
3230     genericLocaleStarter("en_US", (const char **)test, 4);
3231   }
3232
3233
3234   for(i = 0; i<4; i++) {
3235     free(test[i]);
3236   }
3237 }
3238
3239 #if 0
3240 static void TestExtremeCompression(void) {
3241   static char *test[4];
3242   int32_t j = 0, i = 0;
3243   UErrorCode status = U_ZERO_ERROR;
3244   UCollator *coll = ucol_open("en_US", status);
3245   for(i = 0; i<4; i++) {
3246     test[i] = (char *)malloc(2048*sizeof(char));
3247   }
3248   for(j = 10; j < 2048; j++) {
3249     for(i = 0; i<4; i++) {
3250       uprv_memset(test[i], 'a', (j-2)*sizeof(char));
3251       test[i][j-1] = (char)('a'+i);
3252       test[i][j] = 0;
3253     }
3254   }
3255   genericLocaleStarter("en_US", (const char **)test, 4);
3256
3257   for(j = 10; j < 2048; j++) {
3258     for(i = 0; i<1; i++) {
3259       uprv_memset(test[i], 'a', (j-1)*sizeof(char));
3260       test[i][j] = 0;
3261     }
3262   }
3263   for(i = 0; i<4; i++) {
3264     free(test[i]);
3265   }
3266 }
3267 #endif
3268
3269 static void TestSurrogates(void) {
3270   static const char *test[] = {
3271     "z","\\ud900\\udc25",  "\\ud805\\udc50",
3272        "\\ud800\\udc00y",  "\\ud800\\udc00r",
3273        "\\ud800\\udc00f",  "\\ud800\\udc00",
3274        "\\ud800\\udc00c", "\\ud800\\udc00b",
3275        "\\ud800\\udc00fa", "\\ud800\\udc00fb",
3276        "\\ud800\\udc00a",
3277        "c", "b"
3278   };
3279
3280   static const char *rule =
3281     "&z < \\ud900\\udc25   < \\ud805\\udc50"
3282        "< \\ud800\\udc00y  < \\ud800\\udc00r"
3283        "< \\ud800\\udc00f  << \\ud800\\udc00"
3284        "< \\ud800\\udc00fa << \\ud800\\udc00fb"
3285        "< \\ud800\\udc00a  < c < b" ;
3286
3287   genericRulesStarter(rule, test, 14);
3288 }
3289
3290 /* This is a test for prefix implementation, used by JIS X 4061 collation rules */
3291 static void TestPrefix(void) {
3292   uint32_t i;
3293
3294   static const struct {
3295     const char *rules;
3296     const char *data[50];
3297     const uint32_t len;
3298   } tests[] = {
3299     { "&z <<< z|a",
3300       {"zz", "za"}, 2 },
3301
3302     { "&z <<< z|   a",
3303       {"zz", "za"}, 2 },
3304     { "[strength I]"
3305       "&a=\\ud900\\udc25"
3306       "&z<<<\\ud900\\udc25|a",
3307       {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 },
3308   };
3309
3310
3311   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3312     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3313   }
3314 }
3315
3316 /* This test uses data suplied by Masashiko Maedera to test the implementation */
3317 /* JIS X 4061 collation order implementation                                   */
3318 static void TestNewJapanese(void) {
3319
3320   static const char * const test1[] = {
3321       "\\u30b7\\u30e3\\u30fc\\u30ec",
3322       "\\u30b7\\u30e3\\u30a4",
3323       "\\u30b7\\u30e4\\u30a3",
3324       "\\u30b7\\u30e3\\u30ec",
3325       "\\u3061\\u3087\\u3053",
3326       "\\u3061\\u3088\\u3053",
3327       "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8",
3328       "\\u3066\\u30fc\\u305f",
3329       "\\u30c6\\u30fc\\u30bf",
3330       "\\u30c6\\u30a7\\u30bf",
3331       "\\u3066\\u3048\\u305f",
3332       "\\u3067\\u30fc\\u305f",
3333       "\\u30c7\\u30fc\\u30bf",
3334       "\\u30c7\\u30a7\\u30bf",
3335       "\\u3067\\u3048\\u305f",
3336       "\\u3066\\u30fc\\u305f\\u30fc",
3337       "\\u30c6\\u30fc\\u30bf\\u30a1",
3338       "\\u30c6\\u30a7\\u30bf\\u30fc",
3339       "\\u3066\\u3047\\u305f\\u3041",
3340       "\\u3066\\u3048\\u305f\\u30fc",
3341       "\\u3067\\u30fc\\u305f\\u30fc",
3342       "\\u30c7\\u30fc\\u30bf\\u30a1",
3343       "\\u3067\\u30a7\\u305f\\u30a1",
3344       "\\u30c7\\u3047\\u30bf\\u3041",
3345       "\\u30c7\\u30a8\\u30bf\\u30a2",
3346       "\\u3072\\u3086",
3347       "\\u3073\\u3085\\u3042",
3348       "\\u3074\\u3085\\u3042",
3349       "\\u3073\\u3085\\u3042\\u30fc",
3350       "\\u30d3\\u30e5\\u30a2\\u30fc",
3351       "\\u3074\\u3085\\u3042\\u30fc",
3352       "\\u30d4\\u30e5\\u30a2\\u30fc",
3353       "\\u30d2\\u30e5\\u30a6",
3354       "\\u30d2\\u30e6\\u30a6",
3355       "\\u30d4\\u30e5\\u30a6\\u30a2",
3356       "\\u3073\\u3085\\u30fc\\u3042\\u30fc",
3357       "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc",
3358       "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc",
3359       "\\u3072\\u3085\\u3093",
3360       "\\u3074\\u3085\\u3093",
3361       "\\u3075\\u30fc\\u308a",
3362       "\\u30d5\\u30fc\\u30ea",
3363       "\\u3075\\u3045\\u308a",
3364       "\\u3075\\u30a5\\u308a",
3365       "\\u3075\\u30a5\\u30ea",
3366       "\\u30d5\\u30a6\\u30ea",
3367       "\\u3076\\u30fc\\u308a",
3368       "\\u30d6\\u30fc\\u30ea",
3369       "\\u3076\\u3045\\u308a",
3370       "\\u30d6\\u30a5\\u308a",
3371       "\\u3077\\u3046\\u308a",
3372       "\\u30d7\\u30a6\\u30ea",
3373       "\\u3075\\u30fc\\u308a\\u30fc",
3374       "\\u30d5\\u30a5\\u30ea\\u30fc",
3375       "\\u3075\\u30a5\\u308a\\u30a3",
3376       "\\u30d5\\u3045\\u308a\\u3043",
3377       "\\u30d5\\u30a6\\u30ea\\u30fc",
3378       "\\u3075\\u3046\\u308a\\u3043",
3379       "\\u30d6\\u30a6\\u30ea\\u30a4",
3380       "\\u3077\\u30fc\\u308a\\u30fc",
3381       "\\u3077\\u30a5\\u308a\\u30a4",
3382       "\\u3077\\u3046\\u308a\\u30fc",
3383       "\\u30d7\\u30a6\\u30ea\\u30a4",
3384       "\\u30d5\\u30fd",
3385       "\\u3075\\u309e",
3386       "\\u3076\\u309d",
3387       "\\u3076\\u3075",
3388       "\\u3076\\u30d5",
3389       "\\u30d6\\u3075",
3390       "\\u30d6\\u30d5",
3391       "\\u3076\\u309e",
3392       "\\u3076\\u3077",
3393       "\\u30d6\\u3077",
3394       "\\u3077\\u309d",
3395       "\\u30d7\\u30fd",
3396       "\\u3077\\u3075",
3397 };
3398
3399   static const char *test2[] = {
3400     "\\u306f\\u309d", /* H\\u309d */
3401     "\\u30cf\\u30fd", /* K\\u30fd */
3402     "\\u306f\\u306f", /* HH */
3403     "\\u306f\\u30cf", /* HK */
3404     "\\u30cf\\u30cf", /* KK */
3405     "\\u306f\\u309e", /* H\\u309e */
3406     "\\u30cf\\u30fe", /* K\\u30fe */
3407     "\\u306f\\u3070", /* HH\\u309b */
3408     "\\u30cf\\u30d0", /* KK\\u309b */
3409     "\\u306f\\u3071", /* HH\\u309c */
3410     "\\u30cf\\u3071", /* KH\\u309c */
3411     "\\u30cf\\u30d1", /* KK\\u309c */
3412     "\\u3070\\u309d", /* H\\u309b\\u309d */
3413     "\\u30d0\\u30fd", /* K\\u309b\\u30fd */
3414     "\\u3070\\u306f", /* H\\u309bH */
3415     "\\u30d0\\u30cf", /* K\\u309bK */
3416     "\\u3070\\u309e", /* H\\u309b\\u309e */
3417     "\\u30d0\\u30fe", /* K\\u309b\\u30fe */
3418     "\\u3070\\u3070", /* H\\u309bH\\u309b */
3419     "\\u30d0\\u3070", /* K\\u309bH\\u309b */
3420     "\\u30d0\\u30d0", /* K\\u309bK\\u309b */
3421     "\\u3070\\u3071", /* H\\u309bH\\u309c */
3422     "\\u30d0\\u30d1", /* K\\u309bK\\u309c */
3423     "\\u3071\\u309d", /* H\\u309c\\u309d */
3424     "\\u30d1\\u30fd", /* K\\u309c\\u30fd */
3425     "\\u3071\\u306f", /* H\\u309cH */
3426     "\\u30d1\\u30cf", /* K\\u309cK */
3427     "\\u3071\\u3070", /* H\\u309cH\\u309b */
3428     "\\u3071\\u30d0", /* H\\u309cK\\u309b */
3429     "\\u30d1\\u30d0", /* K\\u309cK\\u309b */
3430     "\\u3071\\u3071", /* H\\u309cH\\u309c */
3431     "\\u30d1\\u30d1", /* K\\u309cK\\u309c */
3432   };
3433   /*
3434   static const char *test3[] = {
3435     "\\u221er\\u221e",
3436     "\\u221eR#",
3437     "\\u221et\\u221e",
3438     "#r\\u221e",
3439     "#R#",
3440     "#t%",
3441     "#T%",
3442     "8t\\u221e",
3443     "8T\\u221e",
3444     "8t#",
3445     "8T#",
3446     "8t%",
3447     "8T%",
3448     "8t8",
3449     "8T8",
3450     "\\u03c9r\\u221e",
3451     "\\u03a9R%",
3452     "rr\\u221e",
3453     "rR\\u221e",
3454     "Rr\\u221e",
3455     "RR\\u221e",
3456     "RT%",
3457     "rt8",
3458     "tr\\u221e",
3459     "tr8",
3460     "TR8",
3461     "tt8",
3462     "\\u30b7\\u30e3\\u30fc\\u30ec",
3463   };
3464   */
3465   static const UColAttribute att[] = { UCOL_STRENGTH };
3466   static const UColAttributeValue val[] = { UCOL_QUATERNARY };
3467
3468   static const UColAttribute attShifted[] = { UCOL_STRENGTH, UCOL_ALTERNATE_HANDLING};
3469   static const UColAttributeValue valShifted[] = { UCOL_QUATERNARY, UCOL_SHIFTED };
3470
3471   genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), att, val, 1);
3472   genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), att, val, 1);
3473   /*genericLocaleStarter("ja", test3, sizeof(test3)/sizeof(test3[0]));*/
3474   genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), attShifted, valShifted, 2);
3475   genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), attShifted, valShifted, 2);
3476 }
3477
3478 static void TestStrCollIdenticalPrefix(void) {
3479   const char* rule = "&\\ud9b0\\udc70=\\ud9b0\\udc71";
3480   const char* test[] = {
3481     "ab\\ud9b0\\udc70",
3482     "ab\\ud9b0\\udc71"
3483   };
3484   genericRulesStarterWithResult(rule, test, sizeof(test)/sizeof(test[0]), UCOL_EQUAL);
3485 }
3486 /* Contractions should have all their canonically equivalent */
3487 /* strings included */
3488 static void TestContractionClosure(void) {
3489   static const struct {
3490     const char *rules;
3491     const char *data[10];
3492     const uint32_t len;
3493   } tests[] = {
3494     {   "&b=\\u00e4\\u00e4",
3495       { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\\u00e4" }, 5},
3496     {   "&b=\\u00C5",
3497       { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4},
3498   };
3499   uint32_t i;
3500
3501
3502   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3503     genericRulesStarterWithResult(tests[i].rules, tests[i].data, tests[i].len, UCOL_EQUAL);
3504   }
3505 }
3506
3507 /* This tests also fails*/
3508 static void TestBeforePrefixFailure(void) {
3509   static const struct {
3510     const char *rules;
3511     const char *data[10];
3512     const uint32_t len;
3513   } tests[] = {
3514     { "&g <<< a"
3515       "&[before 3]\\uff41 <<< x",
3516       {"x", "\\uff41"}, 2 },
3517     {   "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3518         "&\\u30A8=\\u30A8=\\u3048=\\uff74"
3519         "&[before 3]\\u30a7<<<\\u30a9",
3520       {"\\u30a9", "\\u30a7"}, 2 },
3521     {   "&[before 3]\\u30a7<<<\\u30a9"
3522         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3523         "&\\u30A8=\\u30A8=\\u3048=\\uff74",
3524       {"\\u30a9", "\\u30a7"}, 2 },
3525   };
3526   uint32_t i;
3527
3528
3529   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3530     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3531   }
3532
3533 #if 0
3534   const char* rule1 =
3535         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3536         "&\\u30A8=\\u30A8=\\u3048=\\uff74"
3537         "&[before 3]\\u30a7<<<\\u30c6|\\u30fc";
3538   const char* rule2 =
3539         "&[before 3]\\u30a7<<<\\u30c6|\\u30fc"
3540         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3541         "&\\u30A8=\\u30A8=\\u3048=\\uff74";
3542   const char* test[] = {
3543       "\\u30c6\\u30fc\\u30bf",
3544       "\\u30c6\\u30a7\\u30bf",
3545   };
3546   genericRulesStarter(rule1, test, sizeof(test)/sizeof(test[0]));
3547   genericRulesStarter(rule2, test, sizeof(test)/sizeof(test[0]));
3548 /* this piece of code should be in some sort of verbose mode     */
3549 /* it gets the collation elements for elements and prints them   */
3550 /* This is useful when trying to see whether the problem is      */
3551   {
3552     UErrorCode status = U_ZERO_ERROR;
3553     uint32_t i = 0;
3554     UCollationElements *it = NULL;
3555     uint32_t CE;
3556     UChar string[256];
3557     uint32_t uStringLen;
3558     UCollator *coll = NULL;
3559
3560     uStringLen = u_unescape(rule1, string, 256);
3561
3562     coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
3563
3564     /*coll = ucol_open("ja_JP_JIS", &status);*/
3565     it = ucol_openElements(coll, string, 0, &status);
3566
3567     for(i = 0; i < sizeof(test)/sizeof(test[0]); i++) {
3568       log_verbose("%s\n", test[i]);
3569       uStringLen = u_unescape(test[i], string, 256);
3570       ucol_setText(it, string, uStringLen, &status);
3571
3572       while((CE=ucol_next(it, &status)) != UCOL_NULLORDER) {
3573         log_verbose("%08X\n", CE);
3574       }
3575       log_verbose("\n");
3576
3577     }
3578
3579     ucol_closeElements(it);
3580     ucol_close(coll);
3581   }
3582 #endif
3583 }
3584
3585 static void TestPrefixCompose(void) {
3586   const char* rule1 =
3587         "&\\u30a7<<<\\u30ab|\\u30fc=\\u30ac|\\u30fc";
3588   /*
3589   const char* test[] = {
3590       "\\u30c6\\u30fc\\u30bf",
3591       "\\u30c6\\u30a7\\u30bf",
3592   };
3593   */
3594   {
3595     UErrorCode status = U_ZERO_ERROR;
3596     /*uint32_t i = 0;*/
3597     /*UCollationElements *it = NULL;*/
3598 /*    uint32_t CE;*/
3599     UChar string[256];
3600     uint32_t uStringLen;
3601     UCollator *coll = NULL;
3602
3603     uStringLen = u_unescape(rule1, string, 256);
3604
3605     coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
3606     ucol_close(coll);
3607   }
3608
3609
3610 }
3611
3612 /*
3613 [last variable] last variable value
3614 [last primary ignorable] largest CE for primary ignorable
3615 [last secondary ignorable] largest CE for secondary ignorable
3616 [last tertiary ignorable] largest CE for tertiary ignorable
3617 [top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8)
3618 */
3619
3620 static void TestRuleOptions(void) {
3621   /* values here are hardcoded and are correct for the current UCA
3622    * when the UCA changes, one might be forced to change these
3623    * values.
3624    */
3625
3626   /*
3627    * These strings contain the last character before [variable top]
3628    * and the first and second characters (by primary weights) after it.
3629    * See FractionalUCA.txt. For example:
3630       [last variable [0C FE, 05, 05]] # U+10A7F OLD SOUTH ARABIAN NUMERIC INDICATOR
3631       [variable top = 0C FE]
3632       [first regular [0D 0A, 05, 05]] # U+0060 GRAVE ACCENT
3633      and
3634       00B4; [0D 0C, 05, 05]
3635    *
3636    * Note: Starting with UCA 6.0, the [variable top] collation element
3637    * is not the weight of any character or string,
3638    * which means that LAST_VARIABLE_CHAR_STRING sorts before [last variable].
3639    */
3640 #define LAST_VARIABLE_CHAR_STRING "\\U00010A7F"
3641 #define FIRST_REGULAR_CHAR_STRING "\\u0060"
3642 #define SECOND_REGULAR_CHAR_STRING "\\u00B4"
3643
3644   /*
3645    * This string has to match the character that has the [last regular] weight
3646    * which changes with each UCA version.
3647    * See the bottom of FractionalUCA.txt which says something like
3648       [last regular [7A FE, 05, 05]] # U+1342E EGYPTIAN HIEROGLYPH AA032
3649    *
3650    * Note: Starting with UCA 6.0, the [last regular] collation element
3651    * is not the weight of any character or string,
3652    * which means that LAST_REGULAR_CHAR_STRING sorts before [last regular].
3653    */
3654 #define LAST_REGULAR_CHAR_STRING "\\U0001342E"
3655
3656   static const struct {
3657     const char *rules;
3658     const char *data[10];
3659     const uint32_t len;
3660   } tests[] = {
3661     /* - all befores here amount to zero */
3662     { "&[before 3][first tertiary ignorable]<<<a",
3663         { "\\u0000", "a"}, 2
3664     }, /* you cannot go before first tertiary ignorable */
3665
3666     { "&[before 3][last tertiary ignorable]<<<a",
3667         { "\\u0000", "a"}, 2
3668     }, /* you cannot go before last tertiary ignorable */
3669
3670     { "&[before 3][first secondary ignorable]<<<a",
3671         { "\\u0000", "a"}, 2
3672     }, /* you cannot go before first secondary ignorable */
3673
3674     { "&[before 3][last secondary ignorable]<<<a",
3675         { "\\u0000", "a"}, 2
3676     }, /* you cannot go before first secondary ignorable */
3677
3678     /* 'normal' befores */
3679
3680     { "&[before 3][first primary ignorable]<<<c<<<b &[first primary ignorable]<a",
3681         {  "c", "b", "\\u0332", "a" }, 4
3682     },
3683
3684     /* we don't have a code point that corresponds to
3685      * the last primary ignorable
3686      */
3687     { "&[before 3][last primary ignorable]<<<c<<<b &[last primary ignorable]<a",
3688         {  "\\u0332", "\\u20e3", "c", "b", "a" }, 5
3689     },
3690
3691     { "&[before 3][first variable]<<<c<<<b &[first variable]<a",
3692         {  "c", "b", "\\u0009", "a", "\\u000a" }, 5
3693     },
3694
3695     { "&[last variable]<a &[before 3][last variable]<<<c<<<b ",
3696         { LAST_VARIABLE_CHAR_STRING, "c", "b", /* [last variable] */ "a", FIRST_REGULAR_CHAR_STRING }, 5
3697     },
3698
3699     { "&[first regular]<a"
3700       "&[before 1][first regular]<b",
3701       { "b", FIRST_REGULAR_CHAR_STRING, "a", SECOND_REGULAR_CHAR_STRING }, 4
3702     },
3703
3704     { "&[before 1][last regular]<b"
3705       "&[last regular]<a",
3706         { LAST_REGULAR_CHAR_STRING, "b", /* [last regular] */ "a", "\\u4e00" }, 4
3707     },
3708
3709     { "&[before 1][first implicit]<b"
3710       "&[first implicit]<a",
3711         { "b", "\\u4e00", "a", "\\u4e01"}, 4
3712     },
3713
3714     { "&[before 1][last implicit]<b"
3715       "&[last implicit]<a",
3716         { "b", "\\U0010FFFD", "a" }, 3
3717     },
3718
3719     { "&[last variable]<z"
3720       "&[last primary ignorable]<x"
3721       "&[last secondary ignorable]<<y"
3722       "&[last tertiary ignorable]<<<w"
3723       "&[top]<u",
3724       {"\\ufffb",  "w", "y", "\\u20e3", "x", LAST_VARIABLE_CHAR_STRING, "z", "u"}, 7
3725     }
3726
3727   };
3728   uint32_t i;
3729
3730   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3731     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3732   }
3733 }
3734
3735
3736 static void TestOptimize(void) {
3737   /* this is not really a test - just trying out
3738    * whether copying of UCA contents will fail
3739    * Cannot really test, since the functionality
3740    * remains the same.
3741    */
3742   static const struct {
3743     const char *rules;
3744     const char *data[10];
3745     const uint32_t len;
3746   } tests[] = {
3747     /* - all befores here amount to zero */
3748     { "[optimize [\\uAC00-\\uD7FF]]",
3749     { "a", "b"}, 2}
3750   };
3751   uint32_t i;
3752
3753   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3754     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3755   }
3756 }
3757
3758 /*
3759 cycheng@ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator.
3760 weiv    ucol_strcollIter?
3761 cycheng@ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021
3762 weiv    these are the input strings?
3763 cycheng@ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on, we have s1 > s2
3764 weiv    will check - could be a problem with utf-8 iterator
3765 cycheng@ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2
3766 weiv    hmmm
3767 cycheng@ca.ibm.c... note that we have a standalone high surrogate
3768 weiv    that doesn't sound right
3769 cycheng@ca.ibm.c... we got the same inconsistent results on AIX and Win2000
3770 weiv    so you have two strings, you convert them to utf-8 and to utf-16BE
3771 cycheng@ca.ibm.c... yes
3772 weiv    and then do the comparison
3773 cycheng@ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be
3774 weiv    utf-16 strings look like a little endian ones in the example you sent me
3775 weiv    It could be a bug - let me try to test it out
3776 cycheng@ca.ibm.c... ok
3777 cycheng@ca.ibm.c... we can wait till the conf. call
3778 cycheng@ca.ibm.c... next weke
3779 weiv    that would be great
3780 weiv    hmmm
3781 weiv    I might be wrong
3782 weiv    let me play with it some more
3783 cycheng@ca.ibm.c... ok
3784 cycheng@ca.ibm.c... also please check s3 = 0x0e3a0062  and s4 = 0x0e400021. both are in utf-16be
3785 cycheng@ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that's built for db2
3786 cycheng@ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be
3787 weiv    ok
3788 cycheng@ca.ibm.c... i ask sherman to send you more inconsistent data
3789 weiv    thanks
3790 cycheng@ca.ibm.c... the 4 strings we sent are just samples
3791 */
3792 #if 0
3793 static void Alexis(void) {
3794   UErrorCode status = U_ZERO_ERROR;
3795   UCollator *coll = ucol_open("", &status);
3796
3797
3798   const char utf16be[2][4] = {
3799     { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 },
3800     { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 }
3801   };
3802
3803   const char utf8[2][4] = {
3804     { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 },
3805     { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 },
3806   };
3807
3808   UCharIterator iterU161, iterU162;
3809   UCharIterator iterU81, iterU82;
3810
3811   UCollationResult resU16, resU8;
3812
3813   uiter_setUTF16BE(&iterU161, utf16be[0], 4);
3814   uiter_setUTF16BE(&iterU162, utf16be[1], 4);
3815
3816   uiter_setUTF8(&iterU81, utf8[0], 4);
3817   uiter_setUTF8(&iterU82, utf8[1], 4);
3818
3819   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3820
3821   resU16 = ucol_strcollIter(coll, &iterU161, &iterU162, &status);
3822   resU8 = ucol_strcollIter(coll, &iterU81, &iterU82, &status);
3823
3824
3825   if(resU16 != resU8) {
3826     log_err("different results\n");
3827   }
3828
3829   ucol_close(coll);
3830 }
3831 #endif
3832
3833 #define CMSCOLL_ALEXIS2_BUFFER_SIZE 256
3834 static void Alexis2(void) {
3835   UErrorCode status = U_ZERO_ERROR;
3836   UChar U16Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
3837   char U16BESource[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16BETarget[CMSCOLL_ALEXIS2_BUFFER_SIZE];
3838   char U8Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U8Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
3839   int32_t U16LenS = 0, U16LenT = 0, U16BELenS = 0, U16BELenT = 0, U8LenS = 0, U8LenT = 0;
3840
3841   UConverter *conv = NULL;
3842
3843   UCharIterator U16BEItS, U16BEItT;
3844   UCharIterator U8ItS, U8ItT;
3845
3846   UCollationResult resU16, resU16BE, resU8;
3847
3848   static const char* const pairs[][2] = {
3849     { "\\ud800\\u0021", "\\uFFFC\\u0062"},
3850     { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" },
3851     { "\\u0E40\\u0021", "\\u00A1\\u0021"},
3852     { "\\u0E40\\u0021", "\\uFE57\\u0062"},
3853     { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"},
3854     { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"},
3855     { "\\u0020", "\\u0020\\u0000"}
3856 /*
3857 5F20 (my result here)
3858 5F204E008E3F
3859 5F20 (your result here)
3860 */
3861   };
3862
3863   int32_t i = 0;
3864
3865   UCollator *coll = ucol_open("", &status);
3866   if(status == U_FILE_ACCESS_ERROR) {
3867     log_data_err("Is your data around?\n");
3868     return;
3869   } else if(U_FAILURE(status)) {
3870     log_err("Error opening collator\n");
3871     return;
3872   }
3873   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3874   conv = ucnv_open("UTF16BE", &status);
3875   for(i = 0; i < sizeof(pairs)/sizeof(pairs[0]); i++) {
3876     U16LenS = u_unescape(pairs[i][0], U16Source, CMSCOLL_ALEXIS2_BUFFER_SIZE);
3877     U16LenT = u_unescape(pairs[i][1], U16Target, CMSCOLL_ALEXIS2_BUFFER_SIZE);
3878
3879     resU16 = ucol_strcoll(coll, U16Source, U16LenS, U16Target, U16LenT);
3880
3881     log_verbose("Result of strcoll is %i\n", resU16);
3882
3883     U16BELenS = ucnv_fromUChars(conv, U16BESource, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Source, U16LenS, &status);
3884     U16BELenT = ucnv_fromUChars(conv, U16BETarget, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Target, U16LenT, &status);
3885
3886     /* use the original sizes, as the result from converter is in bytes */
3887     uiter_setUTF16BE(&U16BEItS, U16BESource, U16LenS);
3888     uiter_setUTF16BE(&U16BEItT, U16BETarget, U16LenT);
3889
3890     resU16BE = ucol_strcollIter(coll, &U16BEItS, &U16BEItT, &status);
3891
3892     log_verbose("Result of U16BE is %i\n", resU16BE);
3893
3894     if(resU16 != resU16BE) {
3895       log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", pairs[i][0], pairs[i][1]);
3896     }
3897
3898     u_strToUTF8(U8Source, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenS, U16Source, U16LenS, &status);
3899     u_strToUTF8(U8Target, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenT, U16Target, U16LenT, &status);
3900
3901     uiter_setUTF8(&U8ItS, U8Source, U8LenS);
3902     uiter_setUTF8(&U8ItT, U8Target, U8LenT);
3903
3904     resU8 = ucol_strcollIter(coll, &U8ItS, &U8ItT, &status);
3905
3906     if(resU16 != resU8) {
3907       log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pairs[i][0], pairs[i][1]);
3908     }
3909
3910   }
3911
3912   ucol_close(coll);
3913   ucnv_close(conv);
3914 }
3915
3916 static void TestHebrewUCA(void) {
3917   UErrorCode status = U_ZERO_ERROR;
3918   static const char *first[] = {
3919     "d790d6b8d79cd795d6bcd7a9",
3920     "d790d79cd79ed7a7d799d799d7a1",
3921     "d790d6b4d79ed795d6bcd7a9",
3922   };
3923
3924   char utf8String[3][256];
3925   UChar utf16String[3][256];
3926
3927   int32_t i = 0, j = 0;
3928   int32_t sizeUTF8[3];
3929   int32_t sizeUTF16[3];
3930
3931   UCollator *coll = ucol_open("", &status);
3932   if (U_FAILURE(status)) {
3933       log_err_status(status, "Could not open UCA collation %s\n", u_errorName(status));
3934       return;
3935   }
3936   /*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/
3937
3938   for(i = 0; i < sizeof(first)/sizeof(first[0]); i++) {
3939     sizeUTF8[i] = u_parseUTF8(first[i], -1, utf8String[i], 256, &status);
3940     u_strFromUTF8(utf16String[i], 256, &sizeUTF16[i], utf8String[i], sizeUTF8[i], &status);
3941     log_verbose("%i: ");
3942     for(j = 0; j < sizeUTF16[i]; j++) {
3943       /*log_verbose("\\u%04X", utf16String[i][j]);*/
3944       log_verbose("%04X", utf16String[i][j]);
3945     }
3946     log_verbose("\n");
3947   }
3948   for(i = 0; i < sizeof(first)/sizeof(first[0])-1; i++) {
3949     for(j = i + 1; j < sizeof(first)/sizeof(first[0]); j++) {
3950       doTest(coll, utf16String[i], utf16String[j], UCOL_LESS);
3951     }
3952   }
3953
3954   ucol_close(coll);
3955
3956 }
3957
3958 static void TestPartialSortKeyTermination(void) {
3959   static const char* cases[] = {
3960     "\\u1234\\u1234\\udc00",
3961     "\\udc00\\ud800\\ud800"
3962   };
3963
3964   int32_t i = sizeof(UCollator);
3965
3966   UErrorCode status = U_ZERO_ERROR;
3967
3968   UCollator *coll = ucol_open("", &status);
3969
3970   UCharIterator iter;
3971
3972   UChar currCase[256];
3973   int32_t length = 0;
3974   int32_t pKeyLen = 0;
3975
3976   uint8_t key[256];
3977
3978   for(i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) {
3979     uint32_t state[2] = {0, 0};
3980     length = u_unescape(cases[i], currCase, 256);
3981     uiter_setString(&iter, currCase, length);
3982     pKeyLen = ucol_nextSortKeyPart(coll, &iter, state, key, 256, &status);
3983
3984     log_verbose("Done\n");
3985
3986   }
3987   ucol_close(coll);
3988 }
3989
3990 static void TestSettings(void) {
3991   static const char* cases[] = {
3992     "apple",
3993       "Apple"
3994   };
3995
3996   static const char* locales[] = {
3997     "",
3998       "en"
3999   };
4000
4001   UErrorCode status = U_ZERO_ERROR;
4002
4003   int32_t i = 0, j = 0;
4004
4005   UChar source[256], target[256];
4006   int32_t sLen = 0, tLen = 0;
4007
4008   UCollator *collateObject = NULL;
4009   for(i = 0; i < sizeof(locales)/sizeof(locales[0]); i++) {
4010     collateObject = ucol_open(locales[i], &status);
4011     ucol_setStrength(collateObject, UCOL_PRIMARY);
4012     ucol_setAttribute(collateObject, UCOL_CASE_LEVEL , UCOL_OFF, &status);
4013     for(j = 1; j < sizeof(cases)/sizeof(cases[0]); j++) {
4014       sLen = u_unescape(cases[j-1], source, 256);
4015       source[sLen] = 0;
4016       tLen = u_unescape(cases[j], target, 256);
4017       source[tLen] = 0;
4018       doTest(collateObject, source, target, UCOL_EQUAL);
4019     }
4020     ucol_close(collateObject);
4021   }
4022 }
4023
4024 static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCollator *target) {
4025     UErrorCode status = U_ZERO_ERROR;
4026     int32_t errorNo = 0;
4027     /*const UChar *sourceRules = NULL;*/
4028     /*int32_t sourceRulesLen = 0;*/
4029     UColAttributeValue french = UCOL_OFF;
4030     int32_t cloneSize = 0;
4031
4032     if(!ucol_equals(source, target)) {
4033         log_err("Same collators, different address not equal\n");
4034         errorNo++;
4035     }
4036     ucol_close(target);
4037     if(uprv_strcmp(ucol_getLocaleByType(source, ULOC_REQUESTED_LOCALE, &status), ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &status)) == 0) {
4038         /* currently, safeClone is implemented through getRules/openRules
4039         * so it is the same as the test below - I will comment that test out.
4040         */
4041         /* real thing */
4042         target = ucol_safeClone(source, NULL, &cloneSize, &status);
4043         if(U_FAILURE(status)) {
4044             log_err("Error creating clone\n");
4045             errorNo++;
4046             return errorNo;
4047         }
4048         if(!ucol_equals(source, target)) {
4049             log_err("Collator different from it's clone\n");
4050             errorNo++;
4051         }
4052         french = ucol_getAttribute(source, UCOL_FRENCH_COLLATION, &status);
4053         if(french == UCOL_ON) {
4054             ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);
4055         } else {
4056             ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
4057         }
4058         if(U_FAILURE(status)) {
4059             log_err("Error setting attributes\n");
4060             errorNo++;
4061             return errorNo;
4062         }
4063         if(ucol_equals(source, target)) {
4064             log_err("Collators same even when options changed\n");
4065             errorNo++;
4066         }
4067         ucol_close(target);
4068         /* commented out since safeClone uses exactly the same technique */
4069         /*
4070         sourceRules = ucol_getRules(source, &sourceRulesLen);
4071         target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
4072         if(U_FAILURE(status)) {
4073         log_err("Error instantiating target from rules\n");
4074         errorNo++;
4075         return errorNo;
4076         }
4077         if(!ucol_equals(source, target)) {
4078         log_err("Collator different from collator that was created from the same rules\n");
4079         errorNo++;
4080         }
4081         ucol_close(target);
4082         */
4083     }
4084     return errorNo;
4085 }
4086
4087
4088 static void TestEquals(void) {
4089     /* ucol_equals is not currently a public API. There is a chance that it will become
4090     * something like this, but currently it is only used by RuleBasedCollator::operator==
4091     */
4092     /* test whether the two collators instantiated from the same locale are equal */
4093     UErrorCode status = U_ZERO_ERROR;
4094     UParseError parseError;
4095     int32_t noOfLoc = uloc_countAvailable();
4096     const char *locName = NULL;
4097     UCollator *source = NULL, *target = NULL;
4098     int32_t i = 0;
4099
4100     const char* rules[] = {
4101         "&l < lj <<< Lj <<< LJ",
4102         "&n < nj <<< Nj <<< NJ",
4103         "&ae <<< \\u00e4",
4104         "&AE <<< \\u00c4"
4105     };
4106     /*
4107     const char* badRules[] = {
4108     "&l <<< Lj",
4109     "&n < nj <<< nJ <<< NJ",
4110     "&a <<< \\u00e4",
4111     "&AE <<< \\u00c4 <<< x"
4112     };
4113     */
4114
4115     UChar sourceRules[1024], targetRules[1024];
4116     int32_t sourceRulesSize = 0, targetRulesSize = 0;
4117     int32_t rulesSize = sizeof(rules)/sizeof(rules[0]);
4118
4119     for(i = 0; i < rulesSize; i++) {
4120         sourceRulesSize += u_unescape(rules[i], sourceRules+sourceRulesSize, 1024 - sourceRulesSize);
4121         targetRulesSize += u_unescape(rules[rulesSize-i-1], targetRules+targetRulesSize, 1024 - targetRulesSize);
4122     }
4123
4124     source = ucol_openRules(sourceRules, sourceRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
4125     if(status == U_FILE_ACCESS_ERROR) {
4126         log_data_err("Is your data around?\n");
4127         return;
4128     } else if(U_FAILURE(status)) {
4129         log_err("Error opening collator\n");
4130         return;
4131     }
4132     target = ucol_openRules(targetRules, targetRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
4133     if(!ucol_equals(source, target)) {
4134         log_err("Equivalent collators not equal!\n");
4135     }
4136     ucol_close(source);
4137     ucol_close(target);
4138
4139     source = ucol_open("root", &status);
4140     target = ucol_open("root", &status);
4141     log_verbose("Testing root\n");
4142     if(!ucol_equals(source, source)) {
4143         log_err("Same collator not equal\n");
4144     }
4145     if(TestEqualsForCollator(locName, source, target)) {
4146         log_err("Errors for root\n", locName);
4147     }
4148     ucol_close(source);
4149
4150     for(i = 0; i<noOfLoc; i++) {
4151         status = U_ZERO_ERROR;
4152         locName = uloc_getAvailable(i);
4153         /*if(hasCollationElements(locName)) {*/
4154         log_verbose("Testing equality for locale %s\n", locName);
4155         source = ucol_open(locName, &status);
4156         target = ucol_open(locName, &status);
4157         if (U_FAILURE(status)) {
4158             log_err("Error opening collator for locale %s  %s\n", locName, u_errorName(status));
4159             continue;
4160         }
4161         if(TestEqualsForCollator(locName, source, target)) {
4162             log_err("Errors for locale %s\n", locName);
4163         }
4164         ucol_close(source);
4165         /*}*/
4166     }
4167 }
4168
4169 static void TestJ2726(void) {
4170     UChar a[2] = { 0x61, 0x00 }; /*"a"*/
4171     UChar aSpace[3] = { 0x61, 0x20, 0x00 }; /*"a "*/
4172     UChar spaceA[3] = { 0x20, 0x61, 0x00 }; /*" a"*/
4173     UErrorCode status = U_ZERO_ERROR;
4174     UCollator *coll = ucol_open("en", &status);
4175     ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
4176     ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
4177     doTest(coll, a, aSpace, UCOL_EQUAL);
4178     doTest(coll, aSpace, a, UCOL_EQUAL);
4179     doTest(coll, a, spaceA, UCOL_EQUAL);
4180     doTest(coll, spaceA, a, UCOL_EQUAL);
4181     doTest(coll, spaceA, aSpace, UCOL_EQUAL);
4182     doTest(coll, aSpace, spaceA, UCOL_EQUAL);
4183     ucol_close(coll);
4184 }
4185
4186 static void NullRule(void) {
4187     UChar r[3] = {0};
4188     UErrorCode status = U_ZERO_ERROR;
4189     UCollator *coll = ucol_openRules(r, 1, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
4190     if(U_SUCCESS(status)) {
4191         log_err("This should have been an error!\n");
4192         ucol_close(coll);
4193     } else {
4194         status = U_ZERO_ERROR;
4195     }
4196     coll = ucol_openRules(r, 0, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
4197     if(U_FAILURE(status)) {
4198         log_err_status(status, "Empty rules should have produced a valid collator -> %s\n", u_errorName(status));
4199     } else {
4200         ucol_close(coll);
4201     }
4202 }
4203
4204 /**
4205  * Test for CollationElementIterator previous and next for the whole set of
4206  * unicode characters with normalization on.
4207  */
4208 static void TestNumericCollation(void)
4209 {
4210     UErrorCode status = U_ZERO_ERROR;
4211
4212     const static char *basicTestStrings[]={
4213     "hello1",
4214     "hello2",
4215     "hello2002",
4216     "hello2003",
4217     "hello123456",
4218     "hello1234567",
4219     "hello10000000",
4220     "hello100000000",
4221     "hello1000000000",
4222     "hello10000000000",
4223     };
4224
4225     const static char *preZeroTestStrings[]={
4226     "avery10000",
4227     "avery010000",
4228     "avery0010000",
4229     "avery00010000",
4230     "avery000010000",
4231     "avery0000010000",
4232     "avery00000010000",
4233     "avery000000010000",
4234     };
4235
4236     const static char *thirtyTwoBitNumericStrings[]={
4237     "avery42949672960",
4238     "avery42949672961",
4239     "avery42949672962",
4240     "avery429496729610"
4241     };
4242
4243      const static char *longNumericStrings[]={
4244      /* Some of these sort out of the order that would expected if digits-as-numbers handled arbitrarily-long digit strings.
4245         In fact, a single collation element can represent a maximum of 254 digits as a number. Digit strings longer than that
4246         are treated as multiple collation elements. */
4247     "num9234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123z", /*253digits, num + 9.23E252 + z */
4248     "num10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*254digits, num + 1.00E253 */
4249     "num100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*255digits, num + 1.00E253 + 0, out of numeric order but expected */
4250     "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 1.23E253 */
4251     "num123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345", /*255digits, num + 1.23E253 + 5 */
4252     "num1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456", /*256digits, num + 1.23E253 + 56 */
4253     "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567", /*257digits, num + 1.23E253 + 567 */
4254     "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 1.23E253 + a, out of numeric order but expected */
4255     "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 9.23E253, out of numeric order but expected */
4256     "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 9.23E253 + a, out of numeric order but expected */
4257     };
4258
4259     const static char *supplementaryDigits[] = {
4260       "\\uD835\\uDFCE", /* 0 */
4261       "\\uD835\\uDFCF", /* 1 */
4262       "\\uD835\\uDFD0", /* 2 */
4263       "\\uD835\\uDFD1", /* 3 */
4264       "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */
4265       "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */
4266       "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */
4267       "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */
4268       "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */
4269       "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */
4270     };
4271
4272     const static char *foreignDigits[] = {
4273       "\\u0661",
4274         "\\u0662",
4275         "\\u0663",
4276       "\\u0661\\u0660",
4277       "\\u0661\\u0662",
4278       "\\u0661\\u0663",
4279       "\\u0662\\u0660",
4280       "\\u0662\\u0662",
4281       "\\u0662\\u0663",
4282       "\\u0663\\u0660",
4283       "\\u0663\\u0662",
4284       "\\u0663\\u0663"
4285     };
4286
4287     const static char *evenZeroes[] = {
4288       "2000",
4289       "2001",
4290         "2002",
4291         "2003"
4292     };
4293
4294     UColAttribute att = UCOL_NUMERIC_COLLATION;
4295     UColAttributeValue val = UCOL_ON;
4296
4297     /* Open our collator. */
4298     UCollator* coll = ucol_open("root", &status);
4299     if (U_FAILURE(status)){
4300         log_err_status(status, "ERROR: in using ucol_open() -> %s\n",
4301               myErrorName(status));
4302         return;
4303     }
4304     genericLocaleStarterWithOptions("root", basicTestStrings, sizeof(basicTestStrings)/sizeof(basicTestStrings[0]), &att, &val, 1);
4305     genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings, sizeof(thirtyTwoBitNumericStrings)/sizeof(thirtyTwoBitNumericStrings[0]), &att, &val, 1);
4306     genericLocaleStarterWithOptions("root", longNumericStrings, sizeof(longNumericStrings)/sizeof(longNumericStrings[0]), &att, &val, 1);
4307     genericLocaleStarterWithOptions("en_US", foreignDigits, sizeof(foreignDigits)/sizeof(foreignDigits[0]), &att, &val, 1);
4308     genericLocaleStarterWithOptions("root", supplementaryDigits, sizeof(supplementaryDigits)/sizeof(supplementaryDigits[0]), &att, &val, 1);
4309     genericLocaleStarterWithOptions("root", evenZeroes, sizeof(evenZeroes)/sizeof(evenZeroes[0]), &att, &val, 1);
4310
4311     /* Setting up our collator to do digits. */
4312     ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
4313     if (U_FAILURE(status)){
4314         log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n",
4315               myErrorName(status));
4316         return;
4317     }
4318
4319     /*
4320        Testing that prepended zeroes still yield the correct collation behavior.
4321        We expect that every element in our strings array will be equal.
4322     */
4323     genericOrderingTestWithResult(coll, preZeroTestStrings, sizeof(preZeroTestStrings)/sizeof(preZeroTestStrings[0]), UCOL_EQUAL);
4324
4325     ucol_close(coll);
4326 }
4327
4328 static void TestTibetanConformance(void)
4329 {
4330     const char* test[] = {
4331         "\\u0FB2\\u0591\\u0F71\\u0061",
4332         "\\u0FB2\\u0F71\\u0061"
4333     };
4334
4335     UErrorCode status = U_ZERO_ERROR;
4336     UCollator *coll = ucol_open("", &status);
4337     UChar source[100];
4338     UChar target[100];
4339     int result;
4340     ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4341     if (U_SUCCESS(status)) {
4342         u_unescape(test[0], source, 100);
4343         u_unescape(test[1], target, 100);
4344         doTest(coll, source, target, UCOL_EQUAL);
4345         result = ucol_strcoll(coll, source, -1,   target, -1);
4346         log_verbose("result %d\n", result);
4347         if (UCOL_EQUAL != result) {
4348             log_err("Tibetan comparison error\n");
4349         }
4350     }
4351     ucol_close(coll);
4352
4353     genericLocaleStarterWithResult("", test, 2, UCOL_EQUAL);
4354 }
4355
4356 static void TestPinyinProblem(void) {
4357     static const char *test[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B50" };
4358     genericLocaleStarter("zh__PINYIN", test, sizeof(test)/sizeof(test[0]));
4359 }
4360
4361 #define TST_UCOL_MAX_INPUT 0x220001
4362 #define topByte 0xFF000000;
4363 #define bottomByte 0xFF;
4364 #define fourBytes 0xFFFFFFFF;
4365
4366
4367 static void showImplicit(UChar32 i) {
4368     if (i >= 0 && i <= TST_UCOL_MAX_INPUT) {
4369         log_verbose("%08X\t%08X\n", i, uprv_uca_getImplicitFromRaw(i));
4370     }
4371 }
4372
4373 static void TestImplicitGeneration(void) {
4374     UErrorCode status = U_ZERO_ERROR;
4375     UChar32 last = 0;
4376     UChar32 current;
4377     UChar32 i = 0, j = 0;
4378     UChar32 roundtrip = 0;
4379     UChar32 lastBottom = 0;
4380     UChar32 currentBottom = 0;
4381     UChar32 lastTop = 0;
4382     UChar32 currentTop = 0;
4383
4384     UCollator *coll = ucol_open("root", &status);
4385     if(U_FAILURE(status)) {
4386         log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
4387         return;
4388     }
4389
4390     uprv_uca_getRawFromImplicit(0xE20303E7);
4391
4392     for (i = 0; i <= TST_UCOL_MAX_INPUT; ++i) {
4393         current = uprv_uca_getImplicitFromRaw(i) & fourBytes;
4394
4395         /* check that it round-trips AND that all intervening ones are illegal*/
4396         roundtrip = uprv_uca_getRawFromImplicit(current);
4397         if (roundtrip != i) {
4398             log_err("No roundtrip %08X\n", i);
4399         }
4400         if (last != 0) {
4401             for (j = last + 1; j < current; ++j) {
4402                 roundtrip = uprv_uca_getRawFromImplicit(j);
4403                 /* raise an error if it *doesn't* find an error*/
4404                 if (roundtrip != -1) {
4405                     log_err("Fails to recognize illegal %08X\n", j);
4406                 }
4407             }
4408         }
4409         /* now do other consistency checks*/
4410         lastBottom = last & bottomByte;
4411         currentBottom = current & bottomByte;
4412         lastTop = last & topByte;
4413         currentTop = current & topByte;
4414
4415         /* print out some values for spot-checking*/
4416         if (lastTop != currentTop || i == 0x10000 || i == 0x110000) {
4417             showImplicit(i-3);
4418             showImplicit(i-2);
4419             showImplicit(i-1);
4420             showImplicit(i);
4421             showImplicit(i+1);
4422             showImplicit(i+2);
4423         }
4424         last = current;
4425
4426         if(uprv_uca_getCodePointFromRaw(uprv_uca_getRawFromCodePoint(i)) != i) {
4427             log_err("No raw <-> code point roundtrip for 0x%08X\n", i);
4428         }
4429     }
4430     showImplicit(TST_UCOL_MAX_INPUT-2);
4431     showImplicit(TST_UCOL_MAX_INPUT-1);
4432     showImplicit(TST_UCOL_MAX_INPUT);
4433     ucol_close(coll);
4434 }
4435
4436 /**
4437  * Iterate through the given iterator, checking to see that all the strings
4438  * in the expected array are present.
4439  * @param expected array of strings we expect to see, or NULL
4440  * @param expectedCount number of elements of expected, or 0
4441  */
4442 static int32_t checkUEnumeration(const char* msg,
4443                                  UEnumeration* iter,
4444                                  const char** expected,
4445                                  int32_t expectedCount) {
4446     UErrorCode ec = U_ZERO_ERROR;
4447     int32_t i = 0, n, j, bit;
4448     int32_t seenMask = 0;
4449
4450     U_ASSERT(expectedCount >= 0 && expectedCount < 31); /* [sic] 31 not 32 */
4451     n = uenum_count(iter, &ec);
4452     if (!assertSuccess("count", &ec)) return -1;
4453     log_verbose("%s = [", msg);
4454     for (;; ++i) {
4455         const char* s = uenum_next(iter, NULL, &ec);
4456         if (!assertSuccess("snext", &ec) || s == NULL) break;
4457         if (i != 0) log_verbose(",");
4458         log_verbose("%s", s);
4459         /* check expected list */
4460         for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
4461             if ((seenMask&bit) == 0 &&
4462                 uprv_strcmp(s, expected[j]) == 0) {
4463                 seenMask |= bit;
4464                 break;
4465             }
4466         }
4467     }
4468     log_verbose("] (%d)\n", i);
4469     assertTrue("count verified", i==n);
4470     /* did we see all expected strings? */
4471     for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
4472         if ((seenMask&bit)!=0) {
4473             log_verbose("Ok: \"%s\" seen\n", expected[j]);
4474         } else {
4475             log_err("FAIL: \"%s\" not seen\n", expected[j]);
4476         }
4477     }
4478     return n;
4479 }
4480
4481 /**
4482  * Test new API added for separate collation tree.
4483  */
4484 static void TestSeparateTrees(void) {
4485     UErrorCode ec = U_ZERO_ERROR;
4486     UEnumeration *e = NULL;
4487     int32_t n = -1;
4488     UBool isAvailable;
4489     char loc[256];
4490
4491     static const char* AVAIL[] = { "en", "de" };
4492
4493     static const char* KW[] = { "collation" };
4494
4495     static const char* KWVAL[] = { "phonebook", "stroke" };
4496
4497 #if !UCONFIG_NO_SERVICE
4498     e = ucol_openAvailableLocales(&ec);
4499     if (e != NULL) {
4500         assertSuccess("ucol_openAvailableLocales", &ec);
4501         assertTrue("ucol_openAvailableLocales!=0", e!=0);
4502         n = checkUEnumeration("ucol_openAvailableLocales", e, AVAIL, LEN(AVAIL));
4503         /* Don't need to check n because we check list */
4504         uenum_close(e);
4505     } else {
4506         log_data_err("Error calling ucol_openAvailableLocales() -> %s (Are you missing data?)\n", u_errorName(ec));
4507     }
4508 #endif
4509
4510     e = ucol_getKeywords(&ec);
4511     if (e != NULL) {
4512         assertSuccess("ucol_getKeywords", &ec);
4513         assertTrue("ucol_getKeywords!=0", e!=0);
4514         n = checkUEnumeration("ucol_getKeywords", e, KW, LEN(KW));
4515         /* Don't need to check n because we check list */
4516         uenum_close(e);
4517     } else {
4518         log_data_err("Error calling ucol_getKeywords() -> %s (Are you missing data?)\n", u_errorName(ec));
4519     }
4520
4521     e = ucol_getKeywordValues(KW[0], &ec);
4522     if (e != NULL) {
4523         assertSuccess("ucol_getKeywordValues", &ec);
4524         assertTrue("ucol_getKeywordValues!=0", e!=0);
4525         n = checkUEnumeration("ucol_getKeywordValues", e, KWVAL, LEN(KWVAL));
4526         /* Don't need to check n because we check list */
4527         uenum_close(e);
4528     } else {
4529         log_data_err("Error calling ucol_getKeywordValues() -> %s (Are you missing data?)\n", u_errorName(ec));
4530     }
4531
4532     /* Try setting a warning before calling ucol_getKeywordValues */
4533     ec = U_USING_FALLBACK_WARNING;
4534     e = ucol_getKeywordValues(KW[0], &ec);
4535     if (assertSuccess("ucol_getKeywordValues [with warning code set]", &ec)) {
4536         assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e!=0);
4537         n = checkUEnumeration("ucol_getKeywordValues [with warning code set]", e, KWVAL, LEN(KWVAL));
4538         /* Don't need to check n because we check list */
4539         uenum_close(e);
4540     }
4541
4542     /*
4543 U_DRAFT int32_t U_EXPORT2
4544 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
4545                              const char* locale, UBool* isAvailable,
4546                              UErrorCode* status);
4547 }
4548 */
4549     n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de",
4550                                      &isAvailable, &ec);
4551     if (assertSuccess("getFunctionalEquivalent", &ec)) {
4552         assertEquals("getFunctionalEquivalent(de)", "de", loc);
4553         assertTrue("getFunctionalEquivalent(de).isAvailable==TRUE",
4554                    isAvailable == TRUE);
4555     }
4556
4557     n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de_DE",
4558                                      &isAvailable, &ec);
4559     if (assertSuccess("getFunctionalEquivalent", &ec)) {
4560         assertEquals("getFunctionalEquivalent(de_DE)", "de", loc);
4561         assertTrue("getFunctionalEquivalent(de_DE).isAvailable==TRUE",
4562                    isAvailable == TRUE);
4563     }
4564 }
4565
4566 /* supercedes TestJ784 */
4567 static void TestBeforePinyin(void) {
4568     const static char rules[] = {
4569         "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<\\u00E0<<<\\u00C0"
4570         "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<\\u00E8<<<\\u00C8"
4571         "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<\\u00EC<<<\\u00CC"
4572         "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<\\u00F2<<<\\u00D2"
4573         "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<<\\u00F9<<<\\u00D9"
4574         "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<<\\u01DB<<\\u00FC"
4575     };
4576
4577     const static char *test[] = {
4578         "l\\u0101",
4579         "la",
4580         "l\\u0101n",
4581         "lan ",
4582         "l\\u0113",
4583         "le",
4584         "l\\u0113n",
4585         "len"
4586     };
4587
4588     const static char *test2[] = {
4589         "x\\u0101",
4590         "x\\u0100",
4591         "X\\u0101",
4592         "X\\u0100",
4593         "x\\u00E1",
4594         "x\\u00C1",
4595         "X\\u00E1",
4596         "X\\u00C1",
4597         "x\\u01CE",
4598         "x\\u01CD",
4599         "X\\u01CE",
4600         "X\\u01CD",
4601         "x\\u00E0",
4602         "x\\u00C0",
4603         "X\\u00E0",
4604         "X\\u00C0",
4605         "xa",
4606         "xA",
4607         "Xa",
4608         "XA",
4609         "x\\u0101x",
4610         "x\\u0100x",
4611         "x\\u00E1x",
4612         "x\\u00C1x",
4613         "x\\u01CEx",
4614         "x\\u01CDx",
4615         "x\\u00E0x",
4616         "x\\u00C0x",
4617         "xax",
4618         "xAx"
4619     };
4620
4621     genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));
4622     genericLocaleStarter("zh", test, sizeof(test)/sizeof(test[0]));
4623     genericRulesStarter(rules, test2, sizeof(test2)/sizeof(test2[0]));
4624     genericLocaleStarter("zh", test2, sizeof(test2)/sizeof(test2[0]));
4625 }
4626
4627 static void TestBeforeTightening(void) {
4628     static const struct {
4629         const char *rules;
4630         UErrorCode expectedStatus;
4631     } tests[] = {
4632         { "&[before 1]a<x", U_ZERO_ERROR },
4633         { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR },
4634         { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR },
4635         { "&[before 1]a=x", U_INVALID_FORMAT_ERROR },
4636         { "&[before 2]a<x",U_INVALID_FORMAT_ERROR },
4637         { "&[before 2]a<<x",U_ZERO_ERROR },
4638         { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR },
4639         { "&[before 2]a=x",U_INVALID_FORMAT_ERROR },
4640         { "&[before 3]a<x",U_INVALID_FORMAT_ERROR  },
4641         { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR  },
4642         { "&[before 3]a<<<x",U_ZERO_ERROR },
4643         { "&[before 3]a=x",U_INVALID_FORMAT_ERROR  },
4644         { "&[before I]a = x",U_INVALID_FORMAT_ERROR }
4645     };
4646
4647     int32_t i = 0;
4648
4649     UErrorCode status = U_ZERO_ERROR;
4650     UChar rlz[RULE_BUFFER_LEN] = { 0 };
4651     uint32_t rlen = 0;
4652
4653     UCollator *coll = NULL;
4654
4655
4656     for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
4657         rlen = u_unescape(tests[i].rules, rlz, RULE_BUFFER_LEN);
4658         coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
4659         if(status != tests[i].expectedStatus) {
4660             log_err_status(status, "Opening a collator with rules %s returned error code %s, expected %s\n",
4661                 tests[i].rules, u_errorName(status), u_errorName(tests[i].expectedStatus));
4662         }
4663         ucol_close(coll);
4664         status = U_ZERO_ERROR;
4665     }
4666
4667 }
4668
4669 #if 0
4670 &m < a
4671 &[before 1] a < x <<< X << q <<< Q < z
4672 assert: m <<< M < x <<< X << q <<< Q < z < a < n
4673
4674 &m < a
4675 &[before 2] a << x <<< X << q <<< Q < z
4676 assert: m <<< M < x <<< X << q <<< Q << a < z < n
4677
4678 &m < a
4679 &[before 3] a <<< x <<< X << q <<< Q < z
4680 assert: m <<< M < x <<< X <<< a << q <<< Q < z < n
4681
4682
4683 &m << a
4684 &[before 1] a < x <<< X << q <<< Q < z
4685 assert: x <<< X << q <<< Q < z < m <<< M << a < n
4686
4687 &m << a
4688 &[before 2] a << x <<< X << q <<< Q < z
4689 assert: m <<< M << x <<< X << q <<< Q << a < z < n
4690
4691 &m << a
4692 &[before 3] a <<< x <<< X << q <<< Q < z
4693 assert: m <<< M << x <<< X <<< a << q <<< Q < z < n
4694
4695
4696 &m <<< a
4697 &[before 1] a < x <<< X << q <<< Q < z
4698 assert: x <<< X << q <<< Q < z < n < m <<< a <<< M
4699
4700 &m <<< a
4701 &[before 2] a << x <<< X << q <<< Q < z
4702 assert:  x <<< X << q <<< Q << m <<< a <<< M < z < n
4703
4704 &m <<< a
4705 &[before 3] a <<< x <<< X << q <<< Q < z
4706 assert: m <<< x <<< X <<< a <<< M  << q <<< Q < z < n
4707
4708
4709 &[before 1] s < x <<< X << q <<< Q < z
4710 assert: r <<< R < x <<< X << q <<< Q < z < s < n
4711
4712 &[before 2] s << x <<< X << q <<< Q < z
4713 assert: r <<< R < x <<< X << q <<< Q << s < z < n
4714
4715 &[before 3] s <<< x <<< X << q <<< Q < z
4716 assert: r <<< R < x <<< X <<< s << q <<< Q < z < n
4717
4718
4719 &[before 1] \u24DC < x <<< X << q <<< Q < z
4720 assert: x <<< X << q <<< Q < z < n < m <<< \u24DC <<< M
4721
4722 &[before 2] \u24DC << x <<< X << q <<< Q < z
4723 assert:  x <<< X << q <<< Q << m <<< \u24DC <<< M < z < n
4724
4725 &[before 3] \u24DC <<< x <<< X << q <<< Q < z
4726 assert: m <<< x <<< X <<< \u24DC <<< M  << q <<< Q < z < n
4727 #endif
4728
4729
4730 #if 0
4731 /* requires features not yet supported */
4732 static void TestMoreBefore(void) {
4733     static const struct {
4734         const char* rules;
4735         const char* order[16];
4736         int32_t size;
4737     } tests[] = {
4738         { "&m < a &[before 1] a < x <<< X << q <<< Q < z",
4739         { "m","M","x","X","q","Q","z","a","n" }, 9},
4740         { "&m < a &[before 2] a << x <<< X << q <<< Q < z",
4741         { "m","M","x","X","q","Q","a","z","n" }, 9},
4742         { "&m < a &[before 3] a <<< x <<< X << q <<< Q < z",
4743         { "m","M","x","X","a","q","Q","z","n" }, 9},
4744         { "&m << a &[before 1] a < x <<< X << q <<< Q < z",
4745         { "x","X","q","Q","z","m","M","a","n" }, 9},
4746         { "&m << a &[before 2] a << x <<< X << q <<< Q < z",
4747         { "m","M","x","X","q","Q","a","z","n" }, 9},
4748         { "&m << a &[before 3] a <<< x <<< X << q <<< Q < z",
4749         { "m","M","x","X","a","q","Q","z","n" }, 9},
4750         { "&m <<< a &[before 1] a < x <<< X << q <<< Q < z",
4751         { "x","X","q","Q","z","n","m","a","M" }, 9},
4752         { "&m <<< a &[before 2] a << x <<< X << q <<< Q < z",
4753         { "x","X","q","Q","m","a","M","z","n" }, 9},
4754         { "&m <<< a &[before 3] a <<< x <<< X << q <<< Q < z",
4755         { "m","x","X","a","M","q","Q","z","n" }, 9},
4756         { "&[before 1] s < x <<< X << q <<< Q < z",
4757         { "r","R","x","X","q","Q","z","s","n" }, 9},
4758         { "&[before 2] s << x <<< X << q <<< Q < z",
4759         { "r","R","x","X","q","Q","s","z","n" }, 9},
4760         { "&[before 3] s <<< x <<< X << q <<< Q < z",
4761         { "r","R","x","X","s","q","Q","z","n" }, 9},
4762         { "&[before 1] \\u24DC < x <<< X << q <<< Q < z",
4763         { "x","X","q","Q","z","n","m","\\u24DC","M" }, 9},
4764         { "&[before 2] \\u24DC << x <<< X << q <<< Q < z",
4765         { "x","X","q","Q","m","\\u24DC","M","z","n" }, 9},
4766         { "&[before 3] \\u24DC <<< x <<< X << q <<< Q < z",
4767         { "m","x","X","\\u24DC","M","q","Q","z","n" }, 9}
4768     };
4769
4770     int32_t i = 0;
4771
4772     for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
4773         genericRulesStarter(tests[i].rules, tests[i].order, tests[i].size);
4774     }
4775 }
4776 #endif
4777
4778 static void TestTailorNULL( void ) {
4779     const static char* rule = "&a <<< '\\u0000'";
4780     UErrorCode status = U_ZERO_ERROR;
4781     UChar rlz[RULE_BUFFER_LEN] = { 0 };
4782     uint32_t rlen = 0;
4783     UChar a = 1, null = 0;
4784     UCollationResult res = UCOL_EQUAL;
4785
4786     UCollator *coll = NULL;
4787
4788
4789     rlen = u_unescape(rule, rlz, RULE_BUFFER_LEN);
4790     coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
4791
4792     if(U_FAILURE(status)) {
4793         log_err_status(status, "Could not open default collator! -> %s\n", u_errorName(status));
4794     } else {
4795         res = ucol_strcoll(coll, &a, 1, &null, 1);
4796
4797         if(res != UCOL_LESS) {
4798             log_err("NULL was not tailored properly!\n");
4799         }
4800     }
4801
4802     ucol_close(coll);
4803 }
4804
4805 static void
4806 TestUpperFirstQuaternary(void)
4807 {
4808   const char* tests[] = { "B", "b", "Bb", "bB" };
4809   UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_FIRST };
4810   UColAttributeValue attVals[] = { UCOL_QUATERNARY, UCOL_UPPER_FIRST };
4811   genericLocaleStarterWithOptions("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]));
4812 }
4813
4814 static void
4815 TestJ4960(void)
4816 {
4817   const char* tests[] = { "\\u00e2T", "aT" };
4818   UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_LEVEL };
4819   UColAttributeValue attVals[] = { UCOL_PRIMARY, UCOL_ON };
4820   const char* tests2[] = { "a", "A" };
4821   const char* rule = "&[first tertiary ignorable]=A=a";
4822   UColAttribute att2[] = { UCOL_CASE_LEVEL };
4823   UColAttributeValue attVals2[] = { UCOL_ON };
4824   /* Test whether we correctly ignore primary ignorables on case level when */
4825   /* we have only primary & case level */
4826   genericLocaleStarterWithOptionsAndResult("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]), UCOL_EQUAL);
4827   /* Test whether ICU4J will make case level for sortkeys that have primary strength */
4828   /* and case level */
4829   genericLocaleStarterWithOptions("root", tests2, sizeof(tests2)/sizeof(tests2[0]), att, attVals, sizeof(att)/sizeof(att[0]));
4830   /* Test whether completely ignorable letters have case level info (they shouldn't) */
4831   genericRulesStarterWithOptionsAndResult(rule, tests2, sizeof(tests2)/sizeof(tests2[0]), att2, attVals2, sizeof(att2)/sizeof(att2[0]), UCOL_EQUAL);
4832 }
4833
4834 static void
4835 TestJ5223(void)
4836 {
4837   static const char *test = "this is a test string";
4838   UChar ustr[256];
4839   int32_t ustr_length = u_unescape(test, ustr, 256);
4840   unsigned char sortkey[256];
4841   int32_t sortkey_length;
4842   UErrorCode status = U_ZERO_ERROR;
4843   static UCollator *coll = NULL;
4844   coll = ucol_open("root", &status);
4845   if(U_FAILURE(status)) {
4846     log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
4847     return;
4848   }
4849   ucol_setStrength(coll, UCOL_PRIMARY);
4850   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
4851   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4852   if (U_FAILURE(status)) {
4853     log_err("Failed setting atributes\n");
4854     return;
4855   }
4856   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, NULL, 0);
4857   if (sortkey_length > 256) return;
4858
4859   /* we mark the position where the null byte should be written in advance */
4860   sortkey[sortkey_length-1] = 0xAA;
4861
4862   /* we set the buffer size one byte higher than needed */
4863   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
4864     sortkey_length+1);
4865
4866   /* no error occurs (for me) */
4867   if (sortkey[sortkey_length-1] == 0xAA) {
4868     log_err("Hit bug at first try\n");
4869   }
4870
4871   /* we mark the position where the null byte should be written again */
4872   sortkey[sortkey_length-1] = 0xAA;
4873
4874   /* this time we set the buffer size to the exact amount needed */
4875   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
4876     sortkey_length);
4877
4878   /* now the trailing null byte is not written */
4879   if (sortkey[sortkey_length-1] == 0xAA) {
4880     log_err("Hit bug at second try\n");
4881   }
4882
4883   ucol_close(coll);
4884 }
4885
4886 /* Regression test for Thai partial sort key problem */
4887 static void
4888 TestJ5232(void)
4889 {
4890     const static char *test[] = {
4891         "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e47\\u0e21",
4892         "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e48\\u0e21"
4893     };
4894
4895     genericLocaleStarter("th", test, sizeof(test)/sizeof(test[0]));
4896 }
4897
4898 static void
4899 TestJ5367(void)
4900 {
4901     const static char *test[] = { "a", "y" };
4902     const char* rules = "&Ny << Y &[first secondary ignorable] <<< a";
4903     genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));
4904 }
4905
4906 static void
4907 TestVI5913(void)
4908 {
4909     UErrorCode status = U_ZERO_ERROR;
4910     int32_t i, j;
4911     UCollator *coll =NULL;
4912     uint8_t  resColl[100], expColl[100];
4913     int32_t  rLen, tLen, ruleLen, sLen, kLen;
4914     UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0};  /* &a<0x1FF3-omega with Ypogegrammeni*/
4915     UChar rule2[256]={0x26, 0x7a, 0x3c, 0x0161, 0};  /* &z<s with caron*/
4916     UChar rule3[256]={0x26, 0x7a, 0x3c, 0x0061, 0x00ea, 0};  /* &z<a+e with circumflex.*/
4917     static const UChar tData[][20]={
4918         {0x1EAC, 0},
4919         {0x0041, 0x0323, 0x0302, 0},
4920         {0x1EA0, 0x0302, 0},
4921         {0x00C2, 0x0323, 0},
4922         {0x1ED8, 0},  /* O with dot and circumflex */
4923         {0x1ECC, 0x0302, 0},
4924         {0x1EB7, 0},
4925         {0x1EA1, 0x0306, 0},
4926     };
4927     static const UChar tailorData[][20]={
4928         {0x1FA2, 0},  /* Omega with 3 combining marks */
4929         {0x03C9, 0x0313, 0x0300, 0x0345, 0},
4930         {0x1FF3, 0x0313, 0x0300, 0},
4931         {0x1F60, 0x0300, 0x0345, 0},
4932         {0x1F62, 0x0345, 0},
4933         {0x1FA0, 0x0300, 0},
4934     };
4935     static const UChar tailorData2[][20]={
4936         {0x1E63, 0x030C, 0},  /* s with dot below + caron */
4937         {0x0073, 0x0323, 0x030C, 0},
4938         {0x0073, 0x030C, 0x0323, 0},
4939     };
4940     static const UChar tailorData3[][20]={
4941         {0x007a, 0},  /*  z */
4942         {0x0061, 0x0065, 0},  /*  a + e */
4943         {0x0061, 0x00ea, 0}, /* a + e with circumflex */
4944         {0x0061, 0x1EC7, 0},  /* a+ e with dot below and circumflex */
4945         {0x0061, 0x1EB9, 0x0302, 0}, /* a + e with dot below + combining circumflex */
4946         {0x0061, 0x00EA, 0x0323, 0},  /* a + e with circumflex + combining dot below */
4947         {0x00EA, 0x0323, 0},  /* e with circumflex + combining dot below */
4948         {0x00EA, 0},  /* e with circumflex  */
4949     };
4950
4951     /* Test Vietnamese sort. */
4952     coll = ucol_open("vi", &status);
4953     if(U_FAILURE(status)) {
4954         log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
4955         return;
4956     }
4957     log_verbose("\n\nVI collation:");
4958     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[2], u_strlen(tData[2])) ) {
4959         log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
4960     }
4961     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[3], u_strlen(tData[3])) ) {
4962         log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
4963     }
4964     if ( !ucol_equal(coll, tData[5], u_strlen(tData[5]), tData[4], u_strlen(tData[4])) ) {
4965         log_err("\\u1ED8 not equals to \\u1ECC+\\u0302\n");
4966     }
4967     if ( !ucol_equal(coll, tData[7], u_strlen(tData[7]), tData[6], u_strlen(tData[6])) ) {
4968         log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
4969     }
4970
4971     for (j=0; j<8; j++) {
4972         tLen = u_strlen(tData[j]);
4973         log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);
4974         rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
4975         for(i = 0; i<rLen; i++) {
4976             log_verbose(" %02X", resColl[i]);
4977         }
4978     }
4979
4980     ucol_close(coll);
4981
4982     /* Test Romanian sort. */
4983     coll = ucol_open("ro", &status);
4984     log_verbose("\n\nRO collation:");
4985     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[1], u_strlen(tData[1])) ) {
4986         log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
4987     }
4988     if ( !ucol_equal(coll, tData[4], u_strlen(tData[4]), tData[5], u_strlen(tData[5])) ) {
4989         log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
4990     }
4991     if ( !ucol_equal(coll, tData[6], u_strlen(tData[6]), tData[7], u_strlen(tData[7])) ) {
4992         log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
4993     }
4994
4995     for (j=4; j<8; j++) {
4996         tLen = u_strlen(tData[j]);
4997         log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);
4998         rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
4999         for(i = 0; i<rLen; i++) {
5000             log_verbose(" %02X", resColl[i]);
5001         }
5002     }
5003     ucol_close(coll);
5004
5005     /* Test the precomposed Greek character with 3 combining marks. */
5006     log_verbose("\n\nTailoring test: Greek character with 3 combining marks");
5007     ruleLen = u_strlen(rule);
5008     coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5009     if (U_FAILURE(status)) {
5010         log_err("ucol_openRules failed with %s\n", u_errorName(status));
5011         return;
5012     }
5013     sLen = u_strlen(tailorData[0]);
5014     for (j=1; j<6; j++) {
5015         tLen = u_strlen(tailorData[j]);
5016         if ( !ucol_equal(coll, tailorData[0], sLen, tailorData[j], tLen))  {
5017             log_err("\n \\u1FA2 not equals to data[%d]:%s\n", j, tailorData[j]);
5018         }
5019     }
5020     /* Test getSortKey. */
5021     tLen = u_strlen(tailorData[0]);
5022     kLen=ucol_getSortKey(coll, tailorData[0], tLen, expColl, 100);
5023     for (j=0; j<6; j++) {
5024         tLen = u_strlen(tailorData[j]);
5025         rLen = ucol_getSortKey(coll, tailorData[j], tLen, resColl, 100);
5026         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
5027             log_err("\n Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
5028             for(i = 0; i<rLen; i++) {
5029                 log_err(" %02X", resColl[i]);
5030             }
5031         }
5032     }
5033     ucol_close(coll);
5034
5035     log_verbose("\n\nTailoring test for s with caron:");
5036     ruleLen = u_strlen(rule2);
5037     coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5038     tLen = u_strlen(tailorData2[0]);
5039     kLen=ucol_getSortKey(coll, tailorData2[0], tLen, expColl, 100);
5040     for (j=1; j<3; j++) {
5041         tLen = u_strlen(tailorData2[j]);
5042         rLen = ucol_getSortKey(coll, tailorData2[j], tLen, resColl, 100);
5043         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
5044             log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
5045             for(i = 0; i<rLen; i++) {
5046                 log_err(" %02X", resColl[i]);
5047             }
5048         }
5049     }
5050     ucol_close(coll);
5051
5052     log_verbose("\n\nTailoring test for &z< ae with circumflex:");
5053     ruleLen = u_strlen(rule3);
5054     coll = ucol_openRules(rule3, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5055     tLen = u_strlen(tailorData3[3]);
5056     kLen=ucol_getSortKey(coll, tailorData3[3], tLen, expColl, 100);
5057     for (j=4; j<6; j++) {
5058         tLen = u_strlen(tailorData3[j]);
5059         rLen = ucol_getSortKey(coll, tailorData3[j], tLen, resColl, 100);
5060
5061         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
5062             log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
5063             for(i = 0; i<rLen; i++) {
5064                 log_err(" %02X", resColl[i]);
5065             }
5066         }
5067
5068         log_verbose("\n Test Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
5069          for(i = 0; i<rLen; i++) {
5070              log_verbose(" %02X", resColl[i]);
5071          }
5072     }
5073     ucol_close(coll);
5074 }
5075
5076 static void
5077 TestTailor6179(void)
5078 {
5079     UErrorCode status = U_ZERO_ERROR;
5080     int32_t i;
5081     UCollator *coll =NULL;
5082     uint8_t  resColl[100];
5083     int32_t  rLen, tLen, ruleLen;
5084     /* &[last primary ignorable]<< a  &[first primary ignorable]<<b */
5085     UChar rule1[256]={0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,
5086             0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x20,0x61,0x20,
5087             0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,0x20,
5088             0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x62,0x20, 0};
5089     /* &[last secondary ignorable]<<< a &[first secondary ignorable]<<<b */
5090     UChar rule2[256]={0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,0x64,0x61,
5091             0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x3C,
5092             0x61,0x20,0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,
5093             0x64,0x61,0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,
5094             0x3C,0x3C,0x20,0x62,0};
5095
5096     UChar tData1[][20]={
5097         {0x61, 0},
5098         {0x62, 0},
5099         { 0xFDD0,0x009E, 0}
5100     };
5101     UChar tData2[][20]={
5102             {0x61, 0},
5103             {0x62, 0},
5104             { 0xFDD0,0x009E, 0}
5105      };
5106
5107     /*
5108      * These values from FractionalUCA.txt will change,
5109      * and need to be updated here.
5110      */
5111     uint8_t firstPrimaryIgnCE[6]={1, 87, 1, 5, 1, 0};
5112     uint8_t lastPrimaryIgnCE[6]={1, 0xE3, 0xC9, 1, 5, 0};
5113     uint8_t firstSecondaryIgnCE[6]={1, 1, 0x3f, 0x03, 0};
5114     uint8_t lastSecondaryIgnCE[6]={1, 1, 0x3f, 0x03, 0};
5115
5116     /* Test [Last Primary ignorable] */
5117
5118     log_verbose("\n\nTailoring test: &[last primary ignorable]<<a  &[first primary ignorable]<<b ");
5119     ruleLen = u_strlen(rule1);
5120     coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5121     if (U_FAILURE(status)) {
5122         log_err_status(status, "Tailoring test: &[last primary ignorable] failed! -> %s\n", u_errorName(status));
5123         return;
5124     }
5125     tLen = u_strlen(tData1[0]);
5126     rLen = ucol_getSortKey(coll, tData1[0], tLen, resColl, 100);
5127     if (uprv_memcmp(resColl, lastPrimaryIgnCE, uprv_min(rLen,6)) < 0) {
5128         log_err("\n Data[%d] :%s  \tlen: %d key: ", 0, tData1[0], rLen);
5129         for(i = 0; i<rLen; i++) {
5130             log_err(" %02X", resColl[i]);
5131         }
5132     }
5133     tLen = u_strlen(tData1[1]);
5134     rLen = ucol_getSortKey(coll, tData1[1], tLen, resColl, 100);
5135     if (uprv_memcmp(resColl, firstPrimaryIgnCE, uprv_min(rLen, 6)) < 0) {
5136         log_err("\n Data[%d] :%s  \tlen: %d key: ", 1, tData1[1], rLen);
5137         for(i = 0; i<rLen; i++) {
5138             log_err(" %02X", resColl[i]);
5139         }
5140     }
5141     ucol_close(coll);
5142
5143
5144     /* Test [Last Secondary ignorable] */
5145     log_verbose("\n\nTailoring test: &[last secondary ignorable]<<<a  &[first secondary ignorable]<<<b ");
5146     ruleLen = u_strlen(rule1);
5147     coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5148     if (U_FAILURE(status)) {
5149         log_err("Tailoring test: &[last primary ignorable] failed!");
5150         return;
5151     }
5152     tLen = u_strlen(tData2[0]);
5153     rLen = ucol_getSortKey(coll, tData2[0], tLen, resColl, 100);
5154     log_verbose("\n Data[%d] :%s  \tlen: %d key: ", 0, tData2[0], rLen);
5155     for(i = 0; i<rLen; i++) {
5156         log_verbose(" %02X", resColl[i]);
5157     }
5158     if (uprv_memcmp(resColl, lastSecondaryIgnCE, uprv_min(rLen, 3)) < 0) {
5159         log_err("\n Data[%d] :%s  \tlen: %d key: ", 0, tData2[0], rLen);
5160         for(i = 0; i<rLen; i++) {
5161             log_err(" %02X", resColl[i]);
5162         }
5163     }
5164     tLen = u_strlen(tData2[1]);
5165     rLen = ucol_getSortKey(coll, tData2[1], tLen, resColl, 100);
5166     log_verbose("\n Data[%d] :%s  \tlen: %d key: ", 1, tData2[1], rLen);
5167     for(i = 0; i<rLen; i++) {
5168         log_verbose(" %02X", resColl[i]);
5169     }
5170     if (uprv_memcmp(resColl, firstSecondaryIgnCE, uprv_min(rLen, 4)) < 0) {
5171         log_err("\n Data[%d] :%s  \tlen: %d key: ", 1, tData2[1], rLen);
5172         for(i = 0; i<rLen; i++) {
5173             log_err(" %02X", resColl[i]);
5174         }
5175     }
5176     ucol_close(coll);
5177 }
5178
5179 static void
5180 TestUCAPrecontext(void)
5181 {
5182     UErrorCode status = U_ZERO_ERROR;
5183     int32_t i, j;
5184     UCollator *coll =NULL;
5185     uint8_t  resColl[100], prevColl[100];
5186     int32_t  rLen, tLen, ruleLen;
5187     UChar rule1[256]= {0x26, 0xb7, 0x3c, 0x61, 0}; /* & middle-dot < a */
5188     UChar rule2[256]= {0x26, 0x4C, 0xb7, 0x3c, 0x3c, 0x61, 0};
5189     /* & l middle-dot << a  a is an expansion. */
5190
5191     UChar tData1[][20]={
5192             { 0xb7, 0},  /* standalone middle dot(0xb7) */
5193             { 0x387, 0}, /* standalone middle dot(0x387) */
5194             { 0x61, 0},  /* a */
5195             { 0x6C, 0},  /* l */
5196             { 0x4C, 0x0332, 0},  /* l with [first primary ignorable] */
5197             { 0x6C, 0xb7, 0},  /* l with middle dot(0xb7) */
5198             { 0x6C, 0x387, 0}, /* l with middle dot(0x387) */
5199             { 0x4C, 0xb7, 0},  /* L with middle dot(0xb7) */
5200             { 0x4C, 0x387, 0}, /* L with middle dot(0x387) */
5201             { 0x6C, 0x61, 0x387, 0}, /* la  with middle dot(0x387) */
5202             { 0x4C, 0x61, 0xb7, 0},  /* La with middle dot(0xb7) */
5203      };
5204
5205     log_verbose("\n\nEN collation:");
5206     coll = ucol_open("en", &status);
5207     if (U_FAILURE(status)) {
5208         log_err_status(status, "Tailoring test: &z <<a|- failed! -> %s\n", u_errorName(status));
5209         return;
5210     }
5211     for (j=0; j<11; j++) {
5212         tLen = u_strlen(tData1[j]);
5213         rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
5214         if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
5215             log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5216                     j, tData1[j]);
5217         }
5218         log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
5219         for(i = 0; i<rLen; i++) {
5220             log_verbose(" %02X", resColl[i]);
5221         }
5222         uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
5223      }
5224      ucol_close(coll);
5225
5226
5227      log_verbose("\n\nJA collation:");
5228      coll = ucol_open("ja", &status);
5229      if (U_FAILURE(status)) {
5230          log_err("Tailoring test: &z <<a|- failed!");
5231          return;
5232      }
5233      for (j=0; j<11; j++) {
5234          tLen = u_strlen(tData1[j]);
5235          rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
5236          if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
5237              log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5238                      j, tData1[j]);
5239          }
5240          log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
5241          for(i = 0; i<rLen; i++) {
5242              log_verbose(" %02X", resColl[i]);
5243          }
5244          uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
5245       }
5246       ucol_close(coll);
5247
5248
5249       log_verbose("\n\nTailoring test: & middle dot < a ");
5250       ruleLen = u_strlen(rule1);
5251       coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5252       if (U_FAILURE(status)) {
5253           log_err("Tailoring test: & middle dot < a failed!");
5254           return;
5255       }
5256       for (j=0; j<11; j++) {
5257           tLen = u_strlen(tData1[j]);
5258           rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
5259           if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
5260               log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5261                       j, tData1[j]);
5262           }
5263           log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
5264           for(i = 0; i<rLen; i++) {
5265               log_verbose(" %02X", resColl[i]);
5266           }
5267           uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
5268        }
5269        ucol_close(coll);
5270
5271
5272        log_verbose("\n\nTailoring test: & l middle-dot << a ");
5273        ruleLen = u_strlen(rule2);
5274        coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5275        if (U_FAILURE(status)) {
5276            log_err("Tailoring test: & l middle-dot << a failed!");
5277            return;
5278        }
5279        for (j=0; j<11; j++) {
5280            tLen = u_strlen(tData1[j]);
5281            rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
5282            if ((j>0) && (j!=3) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
5283                log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5284                        j, tData1[j]);
5285            }
5286            if ((j==3)&&(strcmp((char *)resColl, (char *)prevColl)>0)) {
5287                log_err("\n Expecting smaller key than previous test case: Data[%d] :%s.",
5288                        j, tData1[j]);
5289            }
5290            log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
5291            for(i = 0; i<rLen; i++) {
5292                log_verbose(" %02X", resColl[i]);
5293            }
5294            uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
5295         }
5296         ucol_close(coll);
5297 }
5298
5299 static void
5300 TestOutOfBuffer5468(void)
5301 {
5302     static const char *test = "\\u4e00";
5303     UChar ustr[256];
5304     int32_t ustr_length = u_unescape(test, ustr, 256);
5305     unsigned char shortKeyBuf[1];
5306     int32_t sortkey_length;
5307     UErrorCode status = U_ZERO_ERROR;
5308     static UCollator *coll = NULL;
5309
5310     coll = ucol_open("root", &status);
5311     if(U_FAILURE(status)) {
5312       log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
5313       return;
5314     }
5315     ucol_setStrength(coll, UCOL_PRIMARY);
5316     ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
5317     ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
5318     if (U_FAILURE(status)) {
5319       log_err("Failed setting atributes\n");
5320       return;
5321     }
5322
5323     sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, shortKeyBuf, sizeof(shortKeyBuf));
5324     if (sortkey_length != 4) {
5325         log_err("expecting length of sortKey is 4  got:%d ", sortkey_length);
5326     }
5327     log_verbose("length of sortKey is %d", sortkey_length);
5328     ucol_close(coll);
5329 }
5330
5331 #define TSKC_DATA_SIZE 5
5332 #define TSKC_BUF_SIZE  50
5333 static void
5334 TestSortKeyConsistency(void)
5335 {
5336     UErrorCode icuRC = U_ZERO_ERROR;
5337     UCollator* ucol;
5338     UChar data[] = { 0xFFFD, 0x0006, 0x0006, 0x0006, 0xFFFD};
5339
5340     uint8_t bufFull[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
5341     uint8_t bufPart[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
5342     int32_t i, j, i2;
5343
5344     ucol = ucol_openFromShortString("LEN_S4", FALSE, NULL, &icuRC);
5345     if (U_FAILURE(icuRC))
5346     {
5347         log_err_status(icuRC, "ucol_openFromShortString failed -> %s\n", u_errorName(icuRC));
5348         return;
5349     }
5350
5351     for (i = 0; i < TSKC_DATA_SIZE; i++)
5352     {
5353         UCharIterator uiter;
5354         uint32_t state[2] = { 0, 0 };
5355         int32_t dataLen = i+1;
5356         for (j=0; j<TSKC_BUF_SIZE; j++)
5357             bufFull[i][j] = bufPart[i][j] = 0;
5358
5359         /* Full sort key */
5360         ucol_getSortKey(ucol, data, dataLen, bufFull[i], TSKC_BUF_SIZE);
5361
5362         /* Partial sort key */
5363         uiter_setString(&uiter, data, dataLen);
5364         ucol_nextSortKeyPart(ucol, &uiter, state, bufPart[i], TSKC_BUF_SIZE, &icuRC);
5365         if (U_FAILURE(icuRC))
5366         {
5367             log_err("ucol_nextSortKeyPart failed\n");
5368             ucol_close(ucol);
5369             return;
5370         }
5371
5372         for (i2=0; i2<i; i2++)
5373         {
5374             UBool fullMatch = TRUE;
5375             UBool partMatch = TRUE;
5376             for (j=0; j<TSKC_BUF_SIZE; j++)
5377             {
5378                 fullMatch = fullMatch && (bufFull[i][j] != bufFull[i2][j]);
5379                 partMatch = partMatch && (bufPart[i][j] != bufPart[i2][j]);
5380             }
5381             if (fullMatch != partMatch) {
5382                 log_err(fullMatch ? "full key was consistent, but partial key changed\n"
5383                                   : "partial key was consistent, but full key changed\n");
5384                 ucol_close(ucol);
5385                 return;
5386             }
5387         }
5388     }
5389
5390     /*=============================================*/
5391    ucol_close(ucol);
5392 }
5393
5394 /* ticket: 6101 */
5395 static void TestCroatianSortKey(void) {
5396     const char* collString = "LHR_AN_CX_EX_FX_HX_NX_S3";
5397     UErrorCode status = U_ZERO_ERROR;
5398     UCollator *ucol;
5399     UCharIterator iter;
5400
5401     static const UChar text[] = { 0x0044, 0xD81A };
5402
5403     size_t length = sizeof(text)/sizeof(*text);
5404
5405     uint8_t textSortKey[32];
5406     size_t lenSortKey = 32;
5407     size_t actualSortKeyLen;
5408     uint32_t uStateInfo[2] = { 0, 0 };
5409
5410     ucol = ucol_openFromShortString(collString, FALSE, NULL, &status);
5411     if (U_FAILURE(status)) {
5412         log_err_status(status, "ucol_openFromShortString error in Craotian test. -> %s\n", u_errorName(status));
5413         return;
5414     }
5415
5416     uiter_setString(&iter, text, length);
5417
5418     actualSortKeyLen = ucol_nextSortKeyPart(
5419         ucol, &iter, (uint32_t*)uStateInfo,
5420         textSortKey, lenSortKey, &status
5421         );
5422
5423     if (actualSortKeyLen == lenSortKey) {
5424         log_err("ucol_nextSortKeyPart did not give correct result in Croatian test.\n");
5425     }
5426
5427     ucol_close(ucol);
5428 }
5429
5430 /* ticket: 6140 */
5431 /* This test ensures that codepoints such as 0x3099 are flagged correctly by the collator since
5432  * they are both Hiragana and Katakana
5433  */
5434 #define SORTKEYLEN 50
5435 static void TestHiragana(void) {
5436     UErrorCode status = U_ZERO_ERROR;
5437     UCollator* ucol;
5438     UCollationResult strcollresult;
5439     UChar data1[] = { 0x3058, 0x30B8 }; /* Hiragana and Katakana letter Zi */
5440     UChar data2[] = { 0x3057, 0x3099, 0x30B7, 0x3099 };
5441     int32_t data1Len = sizeof(data1)/sizeof(*data1);
5442     int32_t data2Len = sizeof(data2)/sizeof(*data2);
5443     int32_t i, j;
5444     uint8_t sortKey1[SORTKEYLEN];
5445     uint8_t sortKey2[SORTKEYLEN];
5446
5447     UCharIterator uiter1;
5448     UCharIterator uiter2;
5449     uint32_t state1[2] = { 0, 0 };
5450     uint32_t state2[2] = { 0, 0 };
5451     int32_t keySize1;
5452     int32_t keySize2;
5453
5454     ucol = ucol_openFromShortString("LJA_AN_CX_EX_FX_HO_NX_S4", FALSE, NULL,
5455             &status);
5456     if (U_FAILURE(status)) {
5457         log_err_status(status, "Error status: %s; Unable to open collator from short string.\n", u_errorName(status));
5458         return;
5459     }
5460
5461     /* Start of full sort keys */
5462     /* Full sort key1 */
5463     keySize1 = ucol_getSortKey(ucol, data1, data1Len, sortKey1, SORTKEYLEN);
5464     /* Full sort key2 */
5465     keySize2 = ucol_getSortKey(ucol, data2, data2Len, sortKey2, SORTKEYLEN);
5466     if (keySize1 == keySize2) {
5467         for (i = 0; i < keySize1; i++) {
5468             if (sortKey1[i] != sortKey2[i]) {
5469                 log_err("Full sort keys are different. Should be equal.");
5470             }
5471         }
5472     } else {
5473         log_err("Full sort keys sizes doesn't match: %d %d", keySize1, keySize2);
5474     }
5475     /* End of full sort keys */
5476
5477     /* Start of partial sort keys */
5478     /* Partial sort key1 */
5479     uiter_setString(&uiter1, data1, data1Len);
5480     keySize1 = ucol_nextSortKeyPart(ucol, &uiter1, state1, sortKey1, SORTKEYLEN, &status);
5481     /* Partial sort key2 */
5482     uiter_setString(&uiter2, data2, data2Len);
5483     keySize2 = ucol_nextSortKeyPart(ucol, &uiter2, state2, sortKey2, SORTKEYLEN, &status);
5484     if (U_SUCCESS(status) && keySize1 == keySize2) {
5485         for (j = 0; j < keySize1; j++) {
5486             if (sortKey1[j] != sortKey2[j]) {
5487                 log_err("Partial sort keys are different. Should be equal");
5488             }
5489         }
5490     } else {
5491         log_err("Error Status: %s or Partial sort keys sizes doesn't match: %d %d", u_errorName(status), keySize1, keySize2);
5492     }
5493     /* End of partial sort keys */
5494
5495     /* Start of strcoll */
5496     /* Use ucol_strcoll() to determine ordering */
5497     strcollresult = ucol_strcoll(ucol, data1, data1Len, data2, data2Len);
5498     if (strcollresult != UCOL_EQUAL) {
5499         log_err("Result from ucol_strcoll() should be UCOL_EQUAL.");
5500     }
5501
5502     ucol_close(ucol);
5503 }
5504
5505 /* Convenient struct for running collation tests */
5506 typedef struct {
5507   const UChar source[MAX_TOKEN_LEN];  /* String on left */
5508   const UChar target[MAX_TOKEN_LEN];  /* String on right */
5509   UCollationResult result;            /* -1, 0 or +1, depending on collation */
5510 } OneTestCase;
5511
5512 /*
5513  * Utility function to test one collation test case.
5514  * @param testcases Array of test cases.
5515  * @param n_testcases Size of the array testcases.
5516  * @param str_rules Array of rules.  These rules should be specifying the same rule in different formats.
5517  * @param n_rules Size of the array str_rules.
5518  */
5519 static void doTestOneTestCase(const OneTestCase testcases[],
5520                               int n_testcases,
5521                               const char* str_rules[],
5522                               int n_rules)
5523 {
5524   int rule_no, testcase_no;
5525   UChar rule[500];
5526   int32_t length = 0;
5527   UErrorCode status = U_ZERO_ERROR;
5528   UParseError parse_error;
5529   UCollator  *myCollation;
5530
5531   for (rule_no = 0; rule_no < n_rules; ++rule_no) {
5532
5533     length = u_unescape(str_rules[rule_no], rule, 500);
5534     if (length == 0) {
5535         log_err("ERROR: The rule cannot be unescaped: %s\n");
5536         return;
5537     }
5538     myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
5539     if(U_FAILURE(status)){
5540         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5541         return;
5542     }
5543     log_verbose("Testing the <<* syntax\n");
5544     ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
5545     ucol_setStrength(myCollation, UCOL_TERTIARY);
5546     for (testcase_no = 0; testcase_no < n_testcases; ++testcase_no) {
5547       doTest(myCollation,
5548              testcases[testcase_no].source,
5549              testcases[testcase_no].target,
5550              testcases[testcase_no].result
5551              );
5552     }
5553     ucol_close(myCollation);
5554   }
5555 }
5556
5557 const static OneTestCase rangeTestcases[] = {
5558   { {0x0061},                            {0x0062},                          UCOL_LESS }, /* "a" < "b" */
5559   { {0x0062},                            {0x0063},                          UCOL_LESS }, /* "b" < "c" */
5560   { {0x0061},                            {0x0063},                          UCOL_LESS }, /* "a" < "c" */
5561
5562   { {0x0062},                            {0x006b},                          UCOL_LESS }, /* "b" << "k" */
5563   { {0x006b},                            {0x006c},                          UCOL_LESS }, /* "k" << "l" */
5564   { {0x0062},                            {0x006c},                          UCOL_LESS }, /* "b" << "l" */
5565   { {0x0061},                            {0x006c},                          UCOL_LESS }, /* "a" < "l" */
5566   { {0x0061},                            {0x006d},                          UCOL_LESS },  /* "a" < "m" */
5567
5568   { {0x0079},                            {0x006d},                          UCOL_LESS },  /* "y" < "f" */
5569   { {0x0079},                            {0x0067},                          UCOL_LESS },  /* "y" < "g" */
5570   { {0x0061},                            {0x0068},                          UCOL_LESS },  /* "y" < "h" */
5571   { {0x0061},                            {0x0065},                          UCOL_LESS },  /* "g" < "e" */
5572
5573   { {0x0061},                            {0x0031},                          UCOL_EQUAL }, /* "a" = "1" */
5574   { {0x0061},                            {0x0032},                          UCOL_EQUAL }, /* "a" = "2" */
5575   { {0x0061},                            {0x0033},                          UCOL_EQUAL }, /* "a" = "3" */
5576   { {0x0061},                            {0x0066},                          UCOL_LESS }, /* "a" < "f" */
5577   { {0x006c, 0x0061},                    {0x006b, 0x0062},                  UCOL_LESS },  /* "la" < "123" */
5578   { {0x0061, 0x0061, 0x0061},            {0x0031, 0x0032, 0x0033},          UCOL_EQUAL }, /* "aaa" = "123" */
5579   { {0x0062},                            {0x007a},                          UCOL_LESS },  /* "b" < "z" */
5580   { {0x0061, 0x007a, 0x0062},            {0x0032, 0x0079, 0x006d},          UCOL_LESS }, /* "azm" = "2yc" */
5581 };
5582
5583 static int nRangeTestcases = LEN(rangeTestcases);
5584
5585 const static OneTestCase rangeTestcasesSupplemental[] = {
5586   { {0xfffe},                            {0xffff},                          UCOL_LESS }, /* U+FFFE < U+FFFF */
5587   { {0xffff},                            {0xd800, 0xdc00},                  UCOL_LESS }, /* U+FFFF < U+10000 */
5588   { {0xd800, 0xdc00},                    {0xd800, 0xdc01},                  UCOL_LESS }, /* U+10000 < U+10001 */
5589   { {0xfffe},                            {0xd800, 0xdc01},                  UCOL_LESS }, /* U+FFFE < U+10001 */
5590   { {0xd800, 0xdc01},                    {0xd800, 0xdc02},                  UCOL_LESS }, /* U+10000 < U+10001 */
5591   { {0xd800, 0xdc01},                    {0xd800, 0xdc02},                  UCOL_LESS }, /* U+10000 < U+10001 */
5592   { {0xfffe},                            {0xd800, 0xdc02},                  UCOL_LESS }, /* U+FFFE < U+10001 */
5593 };
5594
5595 static int nRangeTestcasesSupplemental = LEN(rangeTestcasesSupplemental);
5596
5597 const static OneTestCase rangeTestcasesQwerty[] = {
5598   { {0x0071},                            {0x0077},                          UCOL_LESS }, /* "q" < "w" */
5599   { {0x0077},                            {0x0065},                          UCOL_LESS }, /* "w" < "e" */
5600
5601   { {0x0079},                            {0x0075},                          UCOL_LESS }, /* "y" < "u" */
5602   { {0x0071},                            {0x0075},                          UCOL_LESS }, /* "q" << "u" */
5603
5604   { {0x0074},                            {0x0069},                          UCOL_LESS }, /* "t" << "i" */
5605   { {0x006f},                            {0x0070},                          UCOL_LESS }, /* "o" << "p" */
5606
5607   { {0x0079},                            {0x0065},                          UCOL_LESS },  /* "y" < "e" */
5608   { {0x0069},                            {0x0075},                          UCOL_LESS },  /* "i" < "u" */
5609
5610   { {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},
5611     {0x0077, 0x0065, 0x0072, 0x0065},                                       UCOL_LESS }, /* "quest" < "were" */
5612   { {0x0071, 0x0075, 0x0061, 0x0063, 0x006b},
5613     {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},                               UCOL_LESS }, /* "quack" < "quest" */
5614 };
5615
5616 static int nRangeTestcasesQwerty = LEN(rangeTestcasesQwerty);
5617
5618 static void TestSameStrengthList(void)
5619 {
5620   const char* strRules[] = {
5621     /* Normal */
5622     "&a<b<c<d &b<<k<<l<<m &k<<<x<<<y<<<z  &y<f<g<h<e &a=1=2=3",
5623
5624     /* Lists */
5625     "&a<*bcd &b<<*klm &k<<<*xyz &y<*fghe &a=*123",
5626   };
5627   doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
5628 }
5629
5630 static void TestSameStrengthListQuoted(void)
5631 {
5632   const char* strRules[] = {
5633     /* Lists with quoted characters */
5634     "&\\u0061<*bcd &b<<*klm &k<<<*xyz &y<*f\\u0067\\u0068e &a=*123",
5635     "&'\\u0061'<*bcd &b<<*klm &k<<<*xyz &y<*f'\\u0067\\u0068'e &a=*123",
5636
5637     "&\\u0061<*b\\u0063d &b<<*klm &k<<<*xyz &\\u0079<*fgh\\u0065 &a=*\\u0031\\u0032\\u0033",
5638     "&'\\u0061'<*b'\\u0063'd &b<<*klm &k<<<*xyz &'\\u0079'<*fgh'\\u0065' &a=*'\\u0031\\u0032\\u0033'",
5639
5640     "&\\u0061<*\\u0062c\\u0064 &b<<*klm &k<<<*xyz  &y<*fghe &a=*\\u0031\\u0032\\u0033",
5641     "&'\\u0061'<*'\\u0062'c'\\u0064' &b<<*klm &k<<<*xyz  &y<*fghe &a=*'\\u0031\\u0032\\u0033'",
5642   };
5643   doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
5644 }
5645
5646 static void TestSameStrengthListSupplemental(void)
5647 {
5648   const char* strRules[] = {
5649     "&\\ufffe<\\uffff<\\U00010000<\\U00010001<\\U00010002",
5650     "&\\ufffe<\\uffff<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02",
5651     "&\\ufffe<*\\uffff\\U00010000\\U00010001\\U00010002",
5652     "&\\ufffe<*\\uffff\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02",
5653   };
5654   doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules));
5655 }
5656
5657 static void TestSameStrengthListQwerty(void)
5658 {
5659   const char* strRules[] = {
5660     "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d",   /* Normal */
5661     "&q<*wer &w<<*tyu &t<<<*iop &o=*asd",             /* Lists  */
5662     "&\\u0071<\\u0077<\\u0065<\\u0072 &\\u0077<<\\u0074<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<\\u006f<<<\\u0070 &\\u006f=\\u0061=\\u0073=\\u0064",
5663     "&'\\u0071'<\\u0077<\\u0065<\\u0072 &\\u0077<<'\\u0074'<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<'\\u006f'<<<\\u0070 &\\u006f=\\u0061='\\u0073'=\\u0064",
5664     "&\\u0071<*\\u0077\\u0065\\u0072 &\\u0077<<*\\u0074\\u0079\\u0075 &\\u0074<<<*\\u0069\\u006f\\u0070 &\\u006f=*\\u0061\\u0073\\u0064",
5665
5666     /* Quoted characters also will work if two quoted characters are not consecutive.  */
5667     "&\\u0071<*'\\u0077'\\u0065\\u0072 &\\u0077<<*\\u0074'\\u0079'\\u0075 &\\u0074<<<*\\u0069\\u006f'\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",
5668
5669     /* Consecutive quoted charactes do not work, because a '' will be treated as a quote character. */
5670     /* "&\\u0071<*'\\u0077''\\u0065''\\u0072' &\\u0077<<*'\\u0074''\\u0079''\\u0075' &\\u0074<<<*'\\u0069''\\u006f''\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",*/
5671
5672  };
5673   doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(strRules));
5674 }
5675
5676 static void TestSameStrengthListQuotedQwerty(void)
5677 {
5678   const char* strRules[] = {
5679     "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d",   /* Normal */
5680     "&q<*wer &w<<*tyu &t<<<*iop &o=*asd",             /* Lists  */
5681     "&q<*w'e'r &w<<*'t'yu &t<<<*io'p' &o=*'a's'd'",   /* Lists with quotes */
5682
5683     /* Lists with continuous quotes may not work, because '' will be treated as a quote character. */
5684     /* "&q<*'w''e''r' &w<<*'t''y''u' &t<<<*'i''o''p' &o=*'a''s''d'", */
5685    };
5686   doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(strRules));
5687 }
5688
5689 static void TestSameStrengthListRanges(void)
5690 {
5691   const char* strRules[] = {
5692     "&a<*b-d &b<<*k-m &k<<<*x-z &y<*f-he &a=*1-3",
5693   };
5694   doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
5695 }
5696
5697 static void TestSameStrengthListSupplementalRanges(void)
5698 {
5699   const char* strRules[] = {
5700     "&\\ufffe<*\\uffff-\\U00010002",
5701   };
5702   doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules));
5703 }
5704
5705 static void TestSpecialCharacters(void)
5706 {
5707   const char* strRules[] = {
5708     /* Normal */
5709     "&';'<'+'<','<'-'<'&'<'*'",
5710
5711     /* List */
5712     "&';'<*'+,-&*'",
5713
5714     /* Range */
5715     "&';'<*'+'-'-&*'",
5716   };
5717
5718   const static OneTestCase specialCharacterStrings[] = {
5719     { {0x003b}, {0x002b}, UCOL_LESS },  /* ; < + */
5720     { {0x002b}, {0x002c}, UCOL_LESS },  /* + < , */
5721     { {0x002c}, {0x002d}, UCOL_LESS },  /* , < - */
5722     { {0x002d}, {0x0026}, UCOL_LESS },  /* - < & */
5723   };
5724   doTestOneTestCase(specialCharacterStrings, LEN(specialCharacterStrings), strRules, LEN(strRules));
5725 }
5726
5727 static void TestPrivateUseCharacters(void)
5728 {
5729   const char* strRules[] = {
5730     /* Normal */
5731     "&'\\u5ea7'<'\\uE2D8'<'\\uE2D9'<'\\uE2DA'<'\\uE2DB'<'\\uE2DC'<'\\u4e8d'",
5732     "&\\u5ea7<\\uE2D8<\\uE2D9<\\uE2DA<\\uE2DB<\\uE2DC<\\u4e8d",
5733   };
5734
5735   const static OneTestCase privateUseCharacterStrings[] = {
5736     { {0x5ea7}, {0xe2d8}, UCOL_LESS },
5737     { {0xe2d8}, {0xe2d9}, UCOL_LESS },
5738     { {0xe2d9}, {0xe2da}, UCOL_LESS },
5739     { {0xe2da}, {0xe2db}, UCOL_LESS },
5740     { {0xe2db}, {0xe2dc}, UCOL_LESS },
5741     { {0xe2dc}, {0x4e8d}, UCOL_LESS },
5742   };
5743   doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
5744 }
5745
5746 static void TestPrivateUseCharactersInList(void)
5747 {
5748   const char* strRules[] = {
5749     /* List */
5750     "&'\\u5ea7'<*'\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d'",
5751     /* "&'\\u5ea7'<*\\uE2D8'\\uE2D9\\uE2DA'\\uE2DB'\\uE2DC\\u4e8d'", */
5752     "&\\u5ea7<*\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d",
5753   };
5754
5755   const static OneTestCase privateUseCharacterStrings[] = {
5756     { {0x5ea7}, {0xe2d8}, UCOL_LESS },
5757     { {0xe2d8}, {0xe2d9}, UCOL_LESS },
5758     { {0xe2d9}, {0xe2da}, UCOL_LESS },
5759     { {0xe2da}, {0xe2db}, UCOL_LESS },
5760     { {0xe2db}, {0xe2dc}, UCOL_LESS },
5761     { {0xe2dc}, {0x4e8d}, UCOL_LESS },
5762   };
5763   doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
5764 }
5765
5766 static void TestPrivateUseCharactersInRange(void)
5767 {
5768   const char* strRules[] = {
5769     /* Range */
5770     "&'\\u5ea7'<*'\\uE2D8'-'\\uE2DC\\u4e8d'",
5771     "&\\u5ea7<*\\uE2D8-\\uE2DC\\u4e8d",
5772     /* "&\\u5ea7<\\uE2D8'\\uE2D8'-'\\uE2D9'\\uE2DA-\\uE2DB\\uE2DC\\u4e8d", */
5773   };
5774
5775   const static OneTestCase privateUseCharacterStrings[] = {
5776     { {0x5ea7}, {0xe2d8}, UCOL_LESS },
5777     { {0xe2d8}, {0xe2d9}, UCOL_LESS },
5778     { {0xe2d9}, {0xe2da}, UCOL_LESS },
5779     { {0xe2da}, {0xe2db}, UCOL_LESS },
5780     { {0xe2db}, {0xe2dc}, UCOL_LESS },
5781     { {0xe2dc}, {0x4e8d}, UCOL_LESS },
5782   };
5783   doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
5784 }
5785
5786 static void TestInvalidListsAndRanges(void)
5787 {
5788   const char* invalidRules[] = {
5789     /* Range not in starred expression */
5790     "&\\ufffe<\\uffff-\\U00010002",
5791
5792     /* Range without start */
5793     "&a<*-c",
5794
5795     /* Range without end */
5796     "&a<*b-",
5797
5798     /* More than one hyphen */
5799     "&a<*b-g-l",
5800
5801     /* Range in the wrong order */
5802     "&a<*k-b",
5803
5804   };
5805
5806   UChar rule[500];
5807   UErrorCode status = U_ZERO_ERROR;
5808   UParseError parse_error;
5809   int n_rules = LEN(invalidRules);
5810   int rule_no;
5811   int length;
5812   UCollator  *myCollation;
5813
5814   for (rule_no = 0; rule_no < n_rules; ++rule_no) {
5815
5816     length = u_unescape(invalidRules[rule_no], rule, 500);
5817     if (length == 0) {
5818         log_err("ERROR: The rule cannot be unescaped: %s\n");
5819         return;
5820     }
5821     myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
5822     if(!U_FAILURE(status)){
5823       log_err("ERROR: Could not cause a failure as expected: \n");
5824     }
5825     status = U_ZERO_ERROR;
5826   }
5827 }
5828
5829 /*
5830  * This test ensures that characters placed before a character in a different script have the same lead byte
5831  * in their collation key before and after script reordering.
5832  */
5833 static void TestBeforeRuleWithScriptReordering(void)
5834 {
5835     UParseError error;
5836     UErrorCode status = U_ZERO_ERROR;
5837     UCollator  *myCollation;
5838     char srules[500] = "&[before 1]\\u03b1 < \\u0e01";
5839     UChar rules[500];
5840     uint32_t rulesLength = 0;
5841     int32_t reorderCodes[1] = {USCRIPT_GREEK};
5842     UCollationResult collResult;
5843
5844     uint8_t baseKey[256];
5845     uint32_t baseKeyLength;
5846     uint8_t beforeKey[256];
5847     uint32_t beforeKeyLength;
5848
5849     UChar base[] = { 0x03b1 }; /* base */
5850     int32_t baseLen = sizeof(base)/sizeof(*base);
5851
5852     UChar before[] = { 0x0e01 }; /* ko kai */
5853     int32_t beforeLen = sizeof(before)/sizeof(*before);
5854
5855     /*UChar *data[] = { before, base };
5856     genericRulesStarter(srules, data, 2);*/
5857
5858     log_verbose("Testing the &[before 1] rule with [reorder grek]\n");
5859
5860
5861     /* build collator */
5862     log_verbose("Testing the &[before 1] rule with [scriptReorder grek]\n");
5863
5864     rulesLength = u_unescape(srules, rules, LEN(rules));
5865     myCollation = ucol_openRules(rules, rulesLength, UCOL_ON, UCOL_TERTIARY, &error, &status);
5866     if(U_FAILURE(status)) {
5867         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5868         return;
5869     }
5870
5871     /* check collation results - before rule applied but not script reordering */
5872     collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
5873     if (collResult != UCOL_GREATER) {
5874         log_err("Collation result not correct before script reordering = %d\n", collResult);
5875     }
5876
5877     /* check the lead byte of the collation keys before script reordering */
5878     baseKeyLength = ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
5879     beforeKeyLength = ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
5880     if (baseKey[0] != beforeKey[0]) {
5881       log_err("Different lead byte for sort keys using before rule and before script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
5882    }
5883
5884     /* reorder the scripts */
5885     ucol_setReorderCodes(myCollation, reorderCodes, 1, &status);
5886     if(U_FAILURE(status)) {
5887         log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
5888         return;
5889     }
5890
5891     /* check collation results - before rule applied and after script reordering */
5892     collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
5893     if (collResult != UCOL_GREATER) {
5894         log_err("Collation result not correct after script reordering = %d\n", collResult);
5895     }
5896
5897     /* check the lead byte of the collation keys after script reordering */
5898     ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
5899     ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
5900     if (baseKey[0] != beforeKey[0]) {
5901         log_err("Different lead byte for sort keys using before fule and after script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
5902     }
5903
5904     ucol_close(myCollation);
5905 }
5906
5907 /*
5908  * Test that in a primary-compressed sort key all bytes except the first one are unchanged under script reordering.
5909  */
5910 static void TestNonLeadBytesDuringCollationReordering(void)
5911 {
5912     UErrorCode status = U_ZERO_ERROR;
5913     UCollator  *myCollation;
5914     int32_t reorderCodes[1] = {USCRIPT_GREEK};
5915     UCollationResult collResult;
5916
5917     uint8_t baseKey[256];
5918     uint32_t baseKeyLength;
5919     uint8_t reorderKey[256];
5920     uint32_t reorderKeyLength;
5921
5922     UChar testString[] = { 0x03b1, 0x03b2, 0x03b3 };
5923
5924     int i;
5925
5926
5927     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
5928
5929     /* build collator tertiary */
5930     myCollation = ucol_open("", &status);
5931     ucol_setStrength(myCollation, UCOL_TERTIARY);
5932     if(U_FAILURE(status)) {
5933         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5934         return;
5935     }
5936     baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), baseKey, 256);
5937
5938     ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
5939     if(U_FAILURE(status)) {
5940         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
5941         return;
5942     }
5943     reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256);
5944
5945     if (baseKeyLength != reorderKeyLength) {
5946         log_err("Key lengths not the same during reordering.\n", collResult);
5947         return;
5948     }
5949
5950     for (i = 1; i < baseKeyLength; i++) {
5951         if (baseKey[i] != reorderKey[i]) {
5952             log_err("Collation key bytes not the same at position %d.\n", i);
5953             return;
5954         }
5955     }
5956     ucol_close(myCollation);
5957
5958     /* build collator quaternary */
5959     myCollation = ucol_open("", &status);
5960     ucol_setStrength(myCollation, UCOL_QUATERNARY);
5961     if(U_FAILURE(status)) {
5962         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5963         return;
5964     }
5965     baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), baseKey, 256);
5966
5967     ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
5968     if(U_FAILURE(status)) {
5969         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
5970         return;
5971     }
5972     reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256);
5973
5974     if (baseKeyLength != reorderKeyLength) {
5975         log_err("Key lengths not the same during reordering.\n", collResult);
5976         return;
5977     }
5978
5979     for (i = 1; i < baseKeyLength; i++) {
5980         if (baseKey[i] != reorderKey[i]) {
5981             log_err("Collation key bytes not the same at position %d.\n", i);
5982             return;
5983         }
5984     }
5985     ucol_close(myCollation);
5986 }
5987
5988 /*
5989  * Test reordering API.
5990  */
5991 static void TestReorderingAPI(void)
5992 {
5993     UErrorCode status = U_ZERO_ERROR;
5994     UCollator  *myCollation;
5995     int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
5996     UCollationResult collResult;
5997     int32_t retrievedReorderCodesLength;
5998     UChar greekString[] = { 0x03b1 };
5999     UChar punctuationString[] = { 0x203e };
6000
6001     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
6002
6003     /* build collator tertiary */
6004     myCollation = ucol_open("", &status);
6005     ucol_setStrength(myCollation, UCOL_TERTIARY);
6006     if(U_FAILURE(status)) {
6007         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
6008         return;
6009     }
6010
6011     /* set the reorderding */
6012     ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
6013     if (U_FAILURE(status)) {
6014         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
6015         return;
6016     }
6017
6018     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
6019     if (status != U_BUFFER_OVERFLOW_ERROR) {
6020         log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
6021         return;
6022     }
6023     status = U_ZERO_ERROR;
6024     if (retrievedReorderCodesLength != LEN(reorderCodes)) {
6025         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
6026         return;
6027     }
6028     collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
6029     if (collResult != UCOL_LESS) {
6030         log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
6031         return;
6032     }
6033
6034     /* clear the reordering */
6035     ucol_setReorderCodes(myCollation, NULL, 0, &status);
6036     if (U_FAILURE(status)) {
6037         log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
6038         return;
6039     }
6040
6041     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
6042     if (retrievedReorderCodesLength != 0) {
6043         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
6044         return;
6045     }
6046
6047     collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
6048     if (collResult != UCOL_GREATER) {
6049         log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
6050         return;
6051     }
6052
6053     ucol_close(myCollation);
6054 }
6055
6056 /*
6057  * Utility function to test one collation reordering test case.
6058  * @param testcases Array of test cases.
6059  * @param n_testcases Size of the array testcases.
6060  * @param str_rules Array of rules.  These rules should be specifying the same rule in different formats.
6061  * @param n_rules Size of the array str_rules.
6062  */
6063 static void doTestOneReorderingAPITestCase(const OneTestCase testCases[], uint32_t testCasesLen, const int32_t reorderTokens[], int32_t reorderTokensLen)
6064 {
6065     int testCaseNum;
6066     UErrorCode status = U_ZERO_ERROR;
6067     UCollator  *myCollation;
6068
6069     for (testCaseNum = 0; testCaseNum < testCasesLen; ++testCaseNum) {
6070         myCollation = ucol_open("", &status);
6071         if (U_FAILURE(status)) {
6072             log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
6073             return;
6074         }
6075         ucol_setReorderCodes(myCollation, reorderTokens, reorderTokensLen, &status);
6076         if(U_FAILURE(status)) {
6077             log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
6078             return;
6079         }
6080
6081         for (testCaseNum = 0; testCaseNum < testCasesLen; ++testCaseNum) {
6082             doTest(myCollation,
6083                 testCases[testCaseNum].source,
6084                 testCases[testCaseNum].target,
6085                 testCases[testCaseNum].result
6086             );
6087         }
6088         ucol_close(myCollation);
6089     }
6090 }
6091
6092 static void TestGreekFirstReorder(void)
6093 {
6094     const char* strRules[] = {
6095         "[reorder Grek]"
6096     };
6097
6098     const int32_t apiRules[] = {
6099         USCRIPT_GREEK
6100     };
6101
6102     const static OneTestCase privateUseCharacterStrings[] = {
6103         { {0x0391}, {0x0391}, UCOL_EQUAL },
6104         { {0x0041}, {0x0391}, UCOL_GREATER },
6105         { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_GREATER },
6106         { {0x0060}, {0x0391}, UCOL_LESS },
6107         { {0x0391}, {0xe2dc}, UCOL_LESS },
6108         { {0x0391}, {0x0060}, UCOL_GREATER },
6109     };
6110
6111     /* Test rules creation */
6112     doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
6113
6114     /* Test collation reordering API */
6115     doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
6116 }
6117
6118 static void TestGreekLastReorder(void)
6119 {
6120     const char* strRules[] = {
6121         "[reorder Zzzz Grek]"
6122     };
6123
6124     const int32_t apiRules[] = {
6125         USCRIPT_UNKNOWN, USCRIPT_GREEK
6126     };
6127
6128     const static OneTestCase privateUseCharacterStrings[] = {
6129         { {0x0391}, {0x0391}, UCOL_EQUAL },
6130         { {0x0041}, {0x0391}, UCOL_LESS },
6131         { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_LESS },
6132         { {0x0060}, {0x0391}, UCOL_LESS },
6133         { {0x0391}, {0xe2dc}, UCOL_GREATER },
6134     };
6135
6136     /* Test rules creation */
6137     doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
6138
6139     /* Test collation reordering API */
6140     doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
6141 }
6142
6143 static void TestNonScriptReorder(void)
6144 {
6145     const char* strRules[] = {
6146         "[reorder Grek Symbol DIGIT Latn Punct space Zzzz cURRENCy]"
6147     };
6148
6149     const int32_t apiRules[] = {
6150         USCRIPT_GREEK, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN,
6151         UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SPACE, USCRIPT_UNKNOWN,
6152         UCOL_REORDER_CODE_CURRENCY
6153     };
6154
6155     const static OneTestCase privateUseCharacterStrings[] = {
6156         { {0x0391}, {0x0041}, UCOL_LESS },
6157         { {0x0041}, {0x0391}, UCOL_GREATER },
6158         { {0x0060}, {0x0041}, UCOL_LESS },
6159         { {0x0060}, {0x0391}, UCOL_GREATER },
6160         { {0x0024}, {0x0041}, UCOL_GREATER },
6161     };
6162
6163     /* Test rules creation */
6164     doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
6165
6166     /* Test collation reordering API */
6167     doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
6168 }
6169
6170 static void TestHaniReorder(void)
6171 {
6172     const char* strRules[] = {
6173         "[reorder Hani]"
6174     };
6175     const int32_t apiRules[] = {
6176         USCRIPT_HAN
6177     };
6178
6179     const static OneTestCase privateUseCharacterStrings[] = {
6180         { {0x4e00}, {0x0041}, UCOL_LESS },
6181         { {0x4e00}, {0x0060}, UCOL_GREATER },
6182         { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
6183         { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
6184         { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
6185         { {0xfa27}, {0x0041}, UCOL_LESS },
6186         { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
6187     };
6188
6189     /* Test rules creation */
6190     doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
6191
6192     /* Test collation reordering API */
6193     doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
6194 }
6195
6196 static void TestMultipleReorder()
6197 {
6198     const char* strRules[] = {
6199         "[reorder Grek Zzzz DIGIT Latn Hani]"
6200     };
6201
6202     const int32_t apiRules[] = {
6203         USCRIPT_GREEK, USCRIPT_UNKNOWN, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN, USCRIPT_HAN
6204     };
6205
6206     const static OneTestCase collationTestCases[] = {
6207         { {0x0391}, {0x0041}, UCOL_LESS},
6208         { {0x0031}, {0x0041}, UCOL_LESS},
6209         { {0x0041}, {0x4e00}, UCOL_LESS},
6210     };
6211
6212     /* Test rules creation */
6213     doTestOneTestCase(collationTestCases, LEN(collationTestCases), strRules, LEN(strRules));
6214
6215     /* Test collation reordering API */
6216     doTestOneReorderingAPITestCase(collationTestCases, LEN(collationTestCases), apiRules, LEN(apiRules));
6217 }
6218
6219 static int compare_uint8_t_arrays(const uint8_t* a, const uint8_t* b)
6220 {
6221   for (; *a == *b; ++a, ++b) {
6222     if (*a == 0) {
6223       return 0;
6224     }
6225   }
6226   return (*a < *b ? -1 : 1);
6227 }
6228
6229 static void TestImport(void)
6230 {
6231     UCollator* vicoll;
6232     UCollator* escoll;
6233     UCollator* viescoll;
6234     UCollator* importviescoll;
6235     UParseError error;
6236     UErrorCode status = U_ZERO_ERROR;
6237     UChar* virules;
6238     int32_t viruleslength;
6239     UChar* esrules;
6240     int32_t esruleslength;
6241     UChar* viesrules;
6242     int32_t viesruleslength;
6243     char srules[500] = "[import vi][import es]";
6244     UChar rules[500];
6245     uint32_t length = 0;
6246     int32_t itemCount;
6247     int32_t i, k;
6248     UChar32 start;
6249     UChar32 end;
6250     UChar str[500];
6251     int32_t strLength;
6252
6253     uint8_t sk1[500];
6254     uint8_t sk2[500];
6255
6256     UBool b;
6257     USet* tailoredSet;
6258     USet* importTailoredSet;
6259
6260
6261     vicoll = ucol_open("vi", &status);
6262     if(U_FAILURE(status)){
6263         log_err_status(status, "ERROR: Call ucol_open(\"vi\", ...): %s\n", myErrorName(status));
6264         return;
6265     }
6266
6267     virules = (UChar*) ucol_getRules(vicoll, &viruleslength);
6268     escoll = ucol_open("es", &status);
6269     esrules = (UChar*) ucol_getRules(escoll, &esruleslength);
6270     viesrules = (UChar*)uprv_malloc((viruleslength+esruleslength+1)*sizeof(UChar*));
6271     viesrules[0] = 0;
6272     u_strcat(viesrules, virules);
6273     u_strcat(viesrules, esrules);
6274     viesruleslength = viruleslength + esruleslength;
6275     viescoll = ucol_openRules(viesrules, viesruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
6276
6277     /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
6278     length = u_unescape(srules, rules, 500);
6279     importviescoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
6280     if(U_FAILURE(status)){
6281         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
6282         return;
6283     }
6284
6285     tailoredSet = ucol_getTailoredSet(viescoll, &status);
6286     importTailoredSet = ucol_getTailoredSet(importviescoll, &status);
6287
6288     if(!uset_equals(tailoredSet, importTailoredSet)){
6289         log_err("Tailored sets not equal");
6290     }
6291
6292     uset_close(importTailoredSet);
6293
6294     itemCount = uset_getItemCount(tailoredSet);
6295
6296     for( i = 0; i < itemCount; i++){
6297         strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
6298         if(strLength < 2){
6299             for (; start <= end; start++){
6300                 k = 0;
6301                 U16_APPEND(str, k, 500, start, b);
6302                 ucol_getSortKey(viescoll, str, 1, sk1, 500);
6303                 ucol_getSortKey(importviescoll, str, 1, sk2, 500);
6304                 if(compare_uint8_t_arrays(sk1, sk2) != 0){
6305                     log_err("Sort key for %s not equal\n", str);
6306                     break;
6307                 }
6308             }
6309         }else{
6310             ucol_getSortKey(viescoll, str, strLength, sk1, 500);
6311             ucol_getSortKey(importviescoll, str, strLength, sk2, 500);
6312             if(compare_uint8_t_arrays(sk1, sk2) != 0){
6313                 log_err("ZZSort key for %s not equal\n", str);
6314                 break;
6315             }
6316
6317         }
6318     }
6319
6320     uset_close(tailoredSet);
6321
6322     uprv_free(viesrules);
6323
6324     ucol_close(vicoll);
6325     ucol_close(escoll);
6326     ucol_close(viescoll);
6327     ucol_close(importviescoll);
6328 }
6329
6330 static void TestImportWithType(void)
6331 {
6332     UCollator* vicoll;
6333     UCollator* decoll;
6334     UCollator* videcoll;
6335     UCollator* importvidecoll;
6336     UParseError error;
6337     UErrorCode status = U_ZERO_ERROR;
6338     const UChar* virules;
6339     int32_t viruleslength;
6340     const UChar* derules;
6341     int32_t deruleslength;
6342     UChar* viderules;
6343     int32_t videruleslength;
6344     const char srules[500] = "[import vi][import de-u-co-phonebk]";
6345     UChar rules[500];
6346     uint32_t length = 0;
6347     int32_t itemCount;
6348     int32_t i, k;
6349     UChar32 start;
6350     UChar32 end;
6351     UChar str[500];
6352     int32_t strLength;
6353
6354     uint8_t sk1[500];
6355     uint8_t sk2[500];
6356
6357     USet* tailoredSet;
6358     USet* importTailoredSet;
6359
6360     vicoll = ucol_open("vi", &status);
6361     if(U_FAILURE(status)){
6362         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
6363         return;
6364     }
6365     virules = ucol_getRules(vicoll, &viruleslength);
6366     /* decoll = ucol_open("de@collation=phonebook", &status); */
6367     decoll = ucol_open("de-u-co-phonebk", &status);
6368     if(U_FAILURE(status)){
6369         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
6370         return;
6371     }
6372
6373
6374     derules = ucol_getRules(decoll, &deruleslength);
6375     viderules = (UChar*)uprv_malloc((viruleslength+deruleslength+1)*sizeof(UChar*));
6376     viderules[0] = 0;
6377     u_strcat(viderules, virules);
6378     u_strcat(viderules, derules);
6379     videruleslength = viruleslength + deruleslength;
6380     videcoll = ucol_openRules(viderules, videruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
6381
6382     /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
6383     length = u_unescape(srules, rules, 500);
6384     importvidecoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
6385     if(U_FAILURE(status)){
6386         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
6387         return;
6388     }
6389
6390     tailoredSet = ucol_getTailoredSet(videcoll, &status);
6391     importTailoredSet = ucol_getTailoredSet(importvidecoll, &status);
6392
6393     if(!uset_equals(tailoredSet, importTailoredSet)){
6394         log_err("Tailored sets not equal");
6395     }
6396
6397     uset_close(importTailoredSet);
6398
6399     itemCount = uset_getItemCount(tailoredSet);
6400
6401     for( i = 0; i < itemCount; i++){
6402         strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
6403         if(strLength < 2){
6404             for (; start <= end; start++){
6405                 k = 0;
6406                 U16_APPEND_UNSAFE(str, k, start);
6407                 ucol_getSortKey(videcoll, str, 1, sk1, 500);
6408                 ucol_getSortKey(importvidecoll, str, 1, sk2, 500);
6409                 if(compare_uint8_t_arrays(sk1, sk2) != 0){
6410                     log_err("Sort key for %s not equal\n", str);
6411                     break;
6412                 }
6413             }
6414         }else{
6415             ucol_getSortKey(videcoll, str, strLength, sk1, 500);
6416             ucol_getSortKey(importvidecoll, str, strLength, sk2, 500);
6417             if(compare_uint8_t_arrays(sk1, sk2) != 0){
6418                 log_err("Sort key for %s not equal\n", str);
6419                 break;
6420             }
6421
6422         }
6423     }
6424
6425     uset_close(tailoredSet);
6426
6427     uprv_free(viderules);
6428
6429     ucol_close(videcoll);
6430     ucol_close(importvidecoll);
6431     ucol_close(vicoll);
6432     ucol_close(decoll);
6433
6434 }
6435
6436
6437 #define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x)
6438
6439 void addMiscCollTest(TestNode** root)
6440 {
6441     TEST(TestRuleOptions);
6442     TEST(TestBeforePrefixFailure);
6443     TEST(TestContractionClosure);
6444     TEST(TestPrefixCompose);
6445     TEST(TestStrCollIdenticalPrefix);
6446     TEST(TestPrefix);
6447     TEST(TestNewJapanese);
6448     /*TEST(TestLimitations);*/
6449     TEST(TestNonChars);
6450     TEST(TestExtremeCompression);
6451     TEST(TestSurrogates);
6452     TEST(TestVariableTopSetting);
6453     TEST(TestBocsuCoverage);
6454     TEST(TestCyrillicTailoring);
6455     TEST(TestCase);
6456     TEST(IncompleteCntTest);
6457     TEST(BlackBirdTest);
6458     TEST(FunkyATest);
6459     TEST(BillFairmanTest);
6460     TEST(RamsRulesTest);
6461     TEST(IsTailoredTest);
6462     TEST(TestCollations);
6463     TEST(TestChMove);
6464     TEST(TestImplicitTailoring);
6465     TEST(TestFCDProblem);
6466     TEST(TestEmptyRule);
6467     /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */
6468     TEST(TestJ815);
6469     /*TEST(TestJ831);*/ /* we changed lv locale */
6470     TEST(TestBefore);
6471     TEST(TestRedundantRules);
6472     TEST(TestExpansionSyntax);
6473     TEST(TestHangulTailoring);
6474     TEST(TestUCARules);
6475     TEST(TestIncrementalNormalize);
6476     TEST(TestComposeDecompose);
6477     TEST(TestCompressOverlap);
6478     TEST(TestContraction);
6479     TEST(TestExpansion);
6480     /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys */
6481     /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported */
6482     TEST(TestOptimize);
6483     TEST(TestSuppressContractions);
6484     TEST(Alexis2);
6485     TEST(TestHebrewUCA);
6486     TEST(TestPartialSortKeyTermination);
6487     TEST(TestSettings);
6488     TEST(TestEquals);
6489     TEST(TestJ2726);
6490     TEST(NullRule);
6491     TEST(TestNumericCollation);
6492     TEST(TestTibetanConformance);
6493     TEST(TestPinyinProblem);
6494     TEST(TestImplicitGeneration);
6495     TEST(TestSeparateTrees);
6496     TEST(TestBeforePinyin);
6497     TEST(TestBeforeTightening);
6498     /*TEST(TestMoreBefore);*/
6499     TEST(TestTailorNULL);
6500     TEST(TestUpperFirstQuaternary);
6501     TEST(TestJ4960);
6502     TEST(TestJ5223);
6503     TEST(TestJ5232);
6504     TEST(TestJ5367);
6505     TEST(TestHiragana);
6506     TEST(TestSortKeyConsistency);
6507     TEST(TestVI5913);  /* VI, RO tailored rules */
6508     TEST(TestCroatianSortKey);
6509     TEST(TestTailor6179);
6510     TEST(TestUCAPrecontext);
6511     TEST(TestOutOfBuffer5468);
6512     TEST(TestSameStrengthList);
6513
6514     TEST(TestSameStrengthListQuoted);
6515     TEST(TestSameStrengthListSupplemental);
6516     TEST(TestSameStrengthListQwerty);
6517     TEST(TestSameStrengthListQuotedQwerty);
6518     TEST(TestSameStrengthListRanges);
6519     TEST(TestSameStrengthListSupplementalRanges);
6520     TEST(TestSpecialCharacters);
6521     TEST(TestPrivateUseCharacters);
6522     TEST(TestPrivateUseCharactersInList);
6523     TEST(TestPrivateUseCharactersInRange);
6524     TEST(TestInvalidListsAndRanges);
6525     TEST(TestImport);
6526     TEST(TestImportWithType);
6527
6528     TEST(TestBeforeRuleWithScriptReordering);
6529     TEST(TestNonLeadBytesDuringCollationReordering);
6530     TEST(TestReorderingAPI);
6531     TEST(TestGreekFirstReorder);
6532     TEST(TestGreekLastReorder);
6533     TEST(TestNonScriptReorder);
6534     TEST(TestHaniReorder);
6535     TEST(TestMultipleReorder);
6536 }
6537
6538 #endif /* #if !UCONFIG_NO_COLLATION */