icuSources/test/cintltst/cmsccoll.c

   1
   2 /********************************************************************
   3  * COPYRIGHT:
   4  * Copyright (c) 2001-2012, International Business Machines Corporation and
   5  * others. All Rights Reserved.
   6  ********************************************************************/
   7 /*******************************************************************************
   8 *
   9 * File cmsccoll.C
  10 *
  11 *******************************************************************************/
  12 /**
  13  * These are the tests specific to ICU 1.8 and above, that I didn't know where
  14  * to fit.
  15  */
  16
  17 #include <stdio.h>
  18
  19 #include "unicode/utypes.h"
  20
  21 #if !UCONFIG_NO_COLLATION
  22
  23 #include "unicode/ucol.h"
  24 #include "unicode/ucoleitr.h"
  25 #include "unicode/uloc.h"
  26 #include "cintltst.h"
  27 #include "ccolltst.h"
  28 #include "callcoll.h"
  29 #include "unicode/ustring.h"
  30 #include "string.h"
  31 #include "ucol_imp.h"
  32 #include "ucol_tok.h"
  33 #include "cmemory.h"
  34 #include "cstring.h"
  35 #include "uassert.h"
  36 #include "unicode/parseerr.h"
  37 #include "unicode/ucnv.h"
  38 #include "unicode/ures.h"
  39 #include "unicode/uscript.h"
  40 #include "unicode/utf16.h"
  41 #include "uparse.h"
  42 #include "putilimp.h"
  43
  44
  45 #define LEN(a) (sizeof(a)/sizeof(a[0]))
  46
  47 #define MAX_TOKEN_LEN 16
  48
  49 typedef UCollationResult tst_strcoll(void *collator, const int object,
  50                         const UChar *source, const int sLen,
  51                         const UChar *target, const int tLen);
  52
  53
  54
  55 const static char cnt1[][10] = {
  56
  57   "AA",
  58   "AC",
  59   "AZ",
  60   "AQ",
  61   "AB",
  62   "ABZ",
  63   "ABQ",
  64   "Z",
  65   "ABC",
  66   "Q",
  67   "B"
  68 };
  69
  70 const static char cnt2[][10] = {
  71   "DA",
  72   "DAD",
  73   "DAZ",
  74   "MAR",
  75   "Z",
  76   "DAVIS",
  77   "MARK",
  78   "DAV",
  79   "DAVI"
  80 };
  81
  82 static void IncompleteCntTest(void)
  83 {
  84   UErrorCode status = U_ZERO_ERROR;
  85   UChar temp[90];
  86   UChar t1[90];
  87   UChar t2[90];
  88
  89   UCollator *coll =  NULL;
  90   uint32_t i = 0, j = 0;
  91   uint32_t size = 0;
  92
  93   u_uastrcpy(temp, " & Z < ABC < Q < B");
  94
  95   coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
  96
  97   if(U_SUCCESS(status)) {
  98     size = sizeof(cnt1)/sizeof(cnt1[0]);
  99     for(i = 0; i < size-1; i++) {
 100       for(j = i+1; j < size; j++) {
 101         UCollationElements *iter;
 102         u_uastrcpy(t1, cnt1[i]);
 103         u_uastrcpy(t2, cnt1[j]);
 104         doTest(coll, t1, t2, UCOL_LESS);
 105         /* synwee : added collation element iterator test */
 106         iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
 107         if (U_FAILURE(status)) {
 108           log_err("Creation of iterator failed\n");
 109           break;
 110         }
 111         backAndForth(iter);
 112         ucol_closeElements(iter);
 113       }
 114     }
 115   }
 116
 117   ucol_close(coll);
 118
 119
 120   u_uastrcpy(temp, " & Z < DAVIS < MARK <DAV");
 121   coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
 122
 123   if(U_SUCCESS(status)) {
 124     size = sizeof(cnt2)/sizeof(cnt2[0]);
 125     for(i = 0; i < size-1; i++) {
 126       for(j = i+1; j < size; j++) {
 127         UCollationElements *iter;
 128         u_uastrcpy(t1, cnt2[i]);
 129         u_uastrcpy(t2, cnt2[j]);
 130         doTest(coll, t1, t2, UCOL_LESS);
 131
 132         /* synwee : added collation element iterator test */
 133         iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
 134         if (U_FAILURE(status)) {
 135           log_err("Creation of iterator failed\n");
 136           break;
 137         }
 138         backAndForth(iter);
 139         ucol_closeElements(iter);
 140       }
 141     }
 142   }
 143
 144   ucol_close(coll);
 145
 146
 147 }
 148
 149 const static char shifted[][20] = {
 150   "black bird",
 151   "black-bird",
 152   "blackbird",
 153   "black Bird",
 154   "black-Bird",
 155   "blackBird",
 156   "black birds",
 157   "black-birds",
 158   "blackbirds"
 159 };
 160
 161 const static UCollationResult shiftedTert[] = {
 162   UCOL_EQUAL,
 163   UCOL_EQUAL,
 164   UCOL_EQUAL,
 165   UCOL_LESS,
 166   UCOL_EQUAL,
 167   UCOL_EQUAL,
 168   UCOL_LESS,
 169   UCOL_EQUAL,
 170   UCOL_EQUAL
 171 };
 172
 173 const static char nonignorable[][20] = {
 174   "black bird",
 175   "black Bird",
 176   "black birds",
 177   "black-bird",
 178   "black-Bird",
 179   "black-birds",
 180   "blackbird",
 181   "blackBird",
 182   "blackbirds"
 183 };
 184
 185 static void BlackBirdTest(void) {
 186   UErrorCode status = U_ZERO_ERROR;
 187   UChar t1[90];
 188   UChar t2[90];
 189
 190   uint32_t i = 0, j = 0;
 191   uint32_t size = 0;
 192   UCollator *coll = ucol_open("en_US", &status);
 193
 194   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
 195   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status);
 196
 197   if(U_SUCCESS(status)) {
 198     size = sizeof(nonignorable)/sizeof(nonignorable[0]);
 199     for(i = 0; i < size-1; i++) {
 200       for(j = i+1; j < size; j++) {
 201         u_uastrcpy(t1, nonignorable[i]);
 202         u_uastrcpy(t2, nonignorable[j]);
 203         doTest(coll, t1, t2, UCOL_LESS);
 204       }
 205     }
 206   }
 207
 208   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
 209   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
 210
 211   if(U_SUCCESS(status)) {
 212     size = sizeof(shifted)/sizeof(shifted[0]);
 213     for(i = 0; i < size-1; i++) {
 214       for(j = i+1; j < size; j++) {
 215         u_uastrcpy(t1, shifted[i]);
 216         u_uastrcpy(t2, shifted[j]);
 217         doTest(coll, t1, t2, UCOL_LESS);
 218       }
 219     }
 220   }
 221
 222   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status);
 223   if(U_SUCCESS(status)) {
 224     size = sizeof(shifted)/sizeof(shifted[0]);
 225     for(i = 1; i < size; i++) {
 226       u_uastrcpy(t1, shifted[i-1]);
 227       u_uastrcpy(t2, shifted[i]);
 228       doTest(coll, t1, t2, shiftedTert[i]);
 229     }
 230   }
 231
 232   ucol_close(coll);
 233 }
 234
 235 const static UChar testSourceCases[][MAX_TOKEN_LEN] = {
 236     {0x0041/*'A'*/, 0x0300, 0x0301, 0x0000},
 237     {0x0041/*'A'*/, 0x0300, 0x0316, 0x0000},
 238     {0x0041/*'A'*/, 0x0300, 0x0000},
 239     {0x00C0, 0x0301, 0x0000},
 240     /* this would work with forced normalization */
 241     {0x00C0, 0x0316, 0x0000}
 242 };
 243
 244 const static UChar testTargetCases[][MAX_TOKEN_LEN] = {
 245     {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
 246     {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000},
 247     {0x00C0, 0},
 248     {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
 249     /* this would work with forced normalization */
 250     {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000}
 251 };
 252
 253 const static UCollationResult results[] = {
 254     UCOL_GREATER,
 255     UCOL_EQUAL,
 256     UCOL_EQUAL,
 257     UCOL_GREATER,
 258     UCOL_EQUAL
 259 };
 260
 261 static void FunkyATest(void)
 262 {
 263
 264     int32_t i;
 265     UErrorCode status = U_ZERO_ERROR;
 266     UCollator  *myCollation;
 267     myCollation = ucol_open("en_US", &status);
 268     if(U_FAILURE(status)){
 269         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
 270         return;
 271     }
 272     log_verbose("Testing some A letters, for some reason\n");
 273     ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
 274     ucol_setStrength(myCollation, UCOL_TERTIARY);
 275     for (i = 0; i < 4 ; i++)
 276     {
 277         doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
 278     }
 279     ucol_close(myCollation);
 280 }
 281
 282 UColAttributeValue caseFirst[] = {
 283     UCOL_OFF,
 284     UCOL_LOWER_FIRST,
 285     UCOL_UPPER_FIRST
 286 };
 287
 288
 289 UColAttributeValue alternateHandling[] = {
 290     UCOL_NON_IGNORABLE,
 291     UCOL_SHIFTED
 292 };
 293
 294 UColAttributeValue caseLevel[] = {
 295     UCOL_OFF,
 296     UCOL_ON
 297 };
 298
 299 UColAttributeValue strengths[] = {
 300     UCOL_PRIMARY,
 301     UCOL_SECONDARY,
 302     UCOL_TERTIARY,
 303     UCOL_QUATERNARY,
 304     UCOL_IDENTICAL
 305 };
 306
 307 #if 0
 308 static const char * strengthsC[] = {
 309     "UCOL_PRIMARY",
 310     "UCOL_SECONDARY",
 311     "UCOL_TERTIARY",
 312     "UCOL_QUATERNARY",
 313     "UCOL_IDENTICAL"
 314 };
 315
 316 static const char * caseFirstC[] = {
 317     "UCOL_OFF",
 318     "UCOL_LOWER_FIRST",
 319     "UCOL_UPPER_FIRST"
 320 };
 321
 322
 323 static const char * alternateHandlingC[] = {
 324     "UCOL_NON_IGNORABLE",
 325     "UCOL_SHIFTED"
 326 };
 327
 328 static const char * caseLevelC[] = {
 329     "UCOL_OFF",
 330     "UCOL_ON"
 331 };
 332
 333 /* not used currently - does not test only prints */
 334 static void PrintMarkDavis(void)
 335 {
 336   UErrorCode status = U_ZERO_ERROR;
 337   UChar m[256];
 338   uint8_t sortkey[256];
 339   UCollator *coll = ucol_open("en_US", &status);
 340   uint32_t h,i,j,k, sortkeysize;
 341   uint32_t sizem = 0;
 342   char buffer[512];
 343   uint32_t len = 512;
 344
 345   log_verbose("PrintMarkDavis");
 346
 347   u_uastrcpy(m, "Mark Davis");
 348   sizem = u_strlen(m);
 349
 350
 351   m[1] = 0xe4;
 352
 353   for(i = 0; i<sizem; i++) {
 354     fprintf(stderr, "\\u%04X ", m[i]);
 355   }
 356   fprintf(stderr, "\n");
 357
 358   for(h = 0; h<sizeof(caseFirst)/sizeof(caseFirst[0]); h++) {
 359     ucol_setAttribute(coll, UCOL_CASE_FIRST, caseFirst[i], &status);
 360     fprintf(stderr, "caseFirst: %s\n", caseFirstC[h]);
 361
 362     for(i = 0; i<sizeof(alternateHandling)/sizeof(alternateHandling[0]); i++) {
 363       ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, alternateHandling[i], &status);
 364       fprintf(stderr, "  AltHandling: %s\n", alternateHandlingC[i]);
 365
 366       for(j = 0; j<sizeof(caseLevel)/sizeof(caseLevel[0]); j++) {
 367         ucol_setAttribute(coll, UCOL_CASE_LEVEL, caseLevel[j], &status);
 368         fprintf(stderr, "    caseLevel: %s\n", caseLevelC[j]);
 369
 370         for(k = 0; k<sizeof(strengths)/sizeof(strengths[0]); k++) {
 371           ucol_setAttribute(coll, UCOL_STRENGTH, strengths[k], &status);
 372           sortkeysize = ucol_getSortKey(coll, m, sizem, sortkey, 256);
 373           fprintf(stderr, "      strength: %s\n      Sortkey: ", strengthsC[k]);
 374           fprintf(stderr, "%s\n", ucol_sortKeyToString(coll, sortkey, buffer, &len));
 375         }
 376
 377       }
 378
 379     }
 380
 381   }
 382 }
 383 #endif
 384
 385 static void BillFairmanTest(void) {
 386 /*
 387 ** check for actual locale via ICU resource bundles
 388 **
 389 ** lp points to the original locale ("fr_FR_....")
 390 */
 391
 392     UResourceBundle *lr,*cr;
 393     UErrorCode              lec = U_ZERO_ERROR;
 394     const char *lp = "fr_FR_you_ll_never_find_this_locale";
 395
 396     log_verbose("BillFairmanTest\n");
 397
 398     lr = ures_open(NULL,lp,&lec);
 399     if (lr) {
 400         cr = ures_getByKey(lr,"collations",0,&lec);
 401         if (cr) {
 402             lp = ures_getLocaleByType(cr, ULOC_ACTUAL_LOCALE, &lec);
 403             if (lp) {
 404                 if (U_SUCCESS(lec)) {
 405                     if(strcmp(lp, "fr") != 0) {
 406                         log_err("Wrong locale for French Collation Data, expected \"fr\" got %s", lp);
 407                     }
 408                 }
 409             }
 410             ures_close(cr);
 411         }
 412         ures_close(lr);
 413     }
 414 }
 415
 416 static void testPrimary(UCollator* col, const UChar* p,const UChar* q){
 417     UChar source[256] = { '\0'};
 418     UChar target[256] = { '\0'};
 419     UChar preP = 0x31a3;
 420     UChar preQ = 0x310d;
 421 /*
 422     UChar preP = (*p>0x0400 && *p<0x0500)?0x00e1:0x491;
 423     UChar preQ = (*p>0x0400 && *p<0x0500)?0x0041:0x413;
 424 */
 425     /*log_verbose("Testing primary\n");*/
 426
 427     doTest(col, p, q, UCOL_LESS);
 428 /*
 429     UCollationResult result = ucol_strcoll(col,p,u_strlen(p),q,u_strlen(q));
 430
 431     if(result!=UCOL_LESS){
 432        aescstrdup(p,utfSource,256);
 433        aescstrdup(q,utfTarget,256);
 434        fprintf(file,"Primary failed  source: %s target: %s \n", utfSource,utfTarget);
 435     }
 436 */
 437     source[0] = preP;
 438     u_strcpy(source+1,p);
 439     target[0] = preQ;
 440     u_strcpy(target+1,q);
 441     doTest(col, source, target, UCOL_LESS);
 442 /*
 443     fprintf(file,"Primary swamps 2nd failed  source: %s target: %s \n", utfSource,utfTarget);
 444 */
 445 }
 446
 447 static void testSecondary(UCollator* col, const UChar* p,const UChar* q){
 448     UChar source[256] = { '\0'};
 449     UChar target[256] = { '\0'};
 450
 451     /*log_verbose("Testing secondary\n");*/
 452
 453     doTest(col, p, q, UCOL_LESS);
 454 /*
 455     fprintf(file,"secondary failed  source: %s target: %s \n", utfSource,utfTarget);
 456 */
 457     source[0] = 0x0053;
 458     u_strcpy(source+1,p);
 459     target[0]= 0x0073;
 460     u_strcpy(target+1,q);
 461
 462     doTest(col, source, target, UCOL_LESS);
 463 /*
 464     fprintf(file,"secondary swamps 3rd failed  source: %s target: %s \n",utfSource,utfTarget);
 465 */
 466
 467
 468     u_strcpy(source,p);
 469     source[u_strlen(p)] = 0x62;
 470     source[u_strlen(p)+1] = 0;
 471
 472
 473     u_strcpy(target,q);
 474     target[u_strlen(q)] = 0x61;
 475     target[u_strlen(q)+1] = 0;
 476
 477     doTest(col, source, target, UCOL_GREATER);
 478
 479 /*
 480     fprintf(file,"secondary is swamped by 1  failed  source: %s target: %s \n",utfSource,utfTarget);
 481 */
 482 }
 483
 484 static void testTertiary(UCollator* col, const UChar* p,const UChar* q){
 485     UChar source[256] = { '\0'};
 486     UChar target[256] = { '\0'};
 487
 488     /*log_verbose("Testing tertiary\n");*/
 489
 490     doTest(col, p, q, UCOL_LESS);
 491 /*
 492     fprintf(file,"Tertiary failed  source: %s target: %s \n",utfSource,utfTarget);
 493 */
 494     source[0] = 0x0020;
 495     u_strcpy(source+1,p);
 496     target[0]= 0x002D;
 497     u_strcpy(target+1,q);
 498
 499     doTest(col, source, target, UCOL_LESS);
 500 /*
 501     fprintf(file,"Tertiary swamps 4th failed  source: %s target: %s \n", utfSource,utfTarget);
 502 */
 503
 504     u_strcpy(source,p);
 505     source[u_strlen(p)] = 0xE0;
 506     source[u_strlen(p)+1] = 0;
 507
 508     u_strcpy(target,q);
 509     target[u_strlen(q)] = 0x61;
 510     target[u_strlen(q)+1] = 0;
 511
 512     doTest(col, source, target, UCOL_GREATER);
 513
 514 /*
 515     fprintf(file,"Tertiary is swamped by 3rd failed  source: %s target: %s \n",utfSource,utfTarget);
 516 */
 517 }
 518
 519 static void testEquality(UCollator* col, const UChar* p,const UChar* q){
 520 /*
 521     UChar source[256] = { '\0'};
 522     UChar target[256] = { '\0'};
 523 */
 524
 525     doTest(col, p, q, UCOL_EQUAL);
 526 /*
 527     fprintf(file,"Primary failed  source: %s target: %s \n", utfSource,utfTarget);
 528 */
 529 }
 530
 531 static void testCollator(UCollator *coll, UErrorCode *status) {
 532   const UChar *rules = NULL, *current = NULL;
 533   int32_t ruleLen = 0;
 534   uint32_t strength = 0;
 535   uint32_t chOffset = 0; uint32_t chLen = 0;
 536   uint32_t exOffset = 0; uint32_t exLen = 0;
 537   uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
 538   uint32_t firstEx = 0;
 539 /*  uint32_t rExpsLen = 0; */
 540   uint32_t firstLen = 0;
 541   UBool varT = FALSE; UBool top_ = TRUE;
 542   uint16_t specs = 0;
 543   UBool startOfRules = TRUE;
 544   UBool lastReset = FALSE;
 545   UBool before = FALSE;
 546   uint32_t beforeStrength = 0;
 547   UColTokenParser src;
 548   UColOptionSet opts;
 549
 550   UChar first[256];
 551   UChar second[256];
 552   UChar tempB[256];
 553   uint32_t tempLen;
 554   UChar *rulesCopy = NULL;
 555   UParseError parseError;
 556
 557   uprv_memset(&src, 0, sizeof(UColTokenParser));
 558
 559   src.opts = &opts;
 560
 561   rules = ucol_getRules(coll, &ruleLen);
 562   if(U_SUCCESS(*status) && ruleLen > 0) {
 563     rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
 564     uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
 565     src.current = src.source = rulesCopy;
 566     src.end = rulesCopy+ruleLen;
 567     src.extraCurrent = src.end;
 568     src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
 569     *first = *second = 0;
 570
 571         /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
 572            the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
 573     while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, status)) != NULL) {
 574       strength = src.parsedToken.strength;
 575       chOffset = src.parsedToken.charsOffset;
 576       chLen = src.parsedToken.charsLen;
 577       exOffset = src.parsedToken.extensionOffset;
 578       exLen = src.parsedToken.extensionLen;
 579       prefixOffset = src.parsedToken.prefixOffset;
 580       prefixLen = src.parsedToken.prefixLen;
 581       specs = src.parsedToken.flags;
 582
 583       startOfRules = FALSE;
 584       varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
 585       top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
 586       if(top_) { /* if reset is on top, the sequence is broken. We should have an empty string */
 587         second[0] = 0;
 588       } else {
 589         u_strncpy(second,src.source+chOffset, chLen);
 590         second[chLen] = 0;
 591
 592         if(exLen > 0 && firstEx == 0) {
 593           u_strncat(first, src.source+exOffset, exLen);
 594           first[firstLen+exLen] = 0;
 595         }
 596
 597         if(lastReset == TRUE && prefixLen != 0) {
 598           u_strncpy(first+prefixLen, first, firstLen);
 599           u_strncpy(first, src.source+prefixOffset, prefixLen);
 600           first[firstLen+prefixLen] = 0;
 601           firstLen = firstLen+prefixLen;
 602         }
 603
 604         if(before == TRUE) { /* swap first and second */
 605           u_strcpy(tempB, first);
 606           u_strcpy(first, second);
 607           u_strcpy(second, tempB);
 608
 609           tempLen = firstLen;
 610           firstLen = chLen;
 611           chLen = tempLen;
 612
 613           tempLen = firstEx;
 614           firstEx = exLen;
 615           exLen = tempLen;
 616           if(beforeStrength < strength) {
 617             strength = beforeStrength;
 618           }
 619         }
 620       }
 621       lastReset = FALSE;
 622
 623       switch(strength){
 624       case UCOL_IDENTICAL:
 625           testEquality(coll,first,second);
 626           break;
 627       case UCOL_PRIMARY:
 628           testPrimary(coll,first,second);
 629           break;
 630       case UCOL_SECONDARY:
 631           testSecondary(coll,first,second);
 632           break;
 633       case UCOL_TERTIARY:
 634           testTertiary(coll,first,second);
 635           break;
 636       case UCOL_TOK_RESET:
 637         lastReset = TRUE;
 638         before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);
 639         if(before) {
 640           beforeStrength = (specs & UCOL_TOK_BEFORE)-1;
 641         }
 642         break;
 643       default:
 644           break;
 645       }
 646
 647       if(before == TRUE && strength != UCOL_TOK_RESET) { /* first and second were swapped */
 648         before = FALSE;
 649       } else {
 650         firstLen = chLen;
 651         firstEx = exLen;
 652         u_strcpy(first, second);
 653       }
 654     }
 655     uprv_free(src.source);
 656   }
 657 }
 658
 659 static UCollationResult ucaTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) {
 660   UCollator *UCA = (UCollator *)collator;
 661   return ucol_strcoll(UCA, source, sLen, target, tLen);
 662 }
 663
 664 /*
 665 static UCollationResult winTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) {
 666 #if U_PLATFORM_HAS_WIN32_API
 667   LCID lcid = (LCID)collator;
 668   return (UCollationResult)CompareString(lcid, 0, source, sLen, target, tLen);
 669 #else
 670   return 0;
 671 #endif
 672 }
 673 */
 674
 675 static UCollationResult swampEarlier(tst_strcoll* func, void *collator, int opts,
 676                                      UChar s1, UChar s2,
 677                                      const UChar *s, const uint32_t sLen,
 678                                      const UChar *t, const uint32_t tLen) {
 679   UChar source[256] = {0};
 680   UChar target[256] = {0};
 681
 682   source[0] = s1;
 683   u_strcpy(source+1, s);
 684   target[0] = s2;
 685   u_strcpy(target+1, t);
 686
 687   return func(collator, opts, source, sLen+1, target, tLen+1);
 688 }
 689
 690 static UCollationResult swampLater(tst_strcoll* func, void *collator, int opts,
 691                                    UChar s1, UChar s2,
 692                                    const UChar *s, const uint32_t sLen,
 693                                    const UChar *t, const uint32_t tLen) {
 694   UChar source[256] = {0};
 695   UChar target[256] = {0};
 696
 697   u_strcpy(source, s);
 698   source[sLen] = s1;
 699   u_strcpy(target, t);
 700   target[tLen] = s2;
 701
 702   return func(collator, opts, source, sLen+1, target, tLen+1);
 703 }
 704
 705 static uint32_t probeStrength(tst_strcoll* func, void *collator, int opts,
 706                               const UChar *s, const uint32_t sLen,
 707                               const UChar *t, const uint32_t tLen,
 708                               UCollationResult result) {
 709   /*UChar fPrimary = 0x6d;*/
 710   /*UChar sPrimary = 0x6e;*/
 711   UChar fSecondary = 0x310d;
 712   UChar sSecondary = 0x31a3;
 713   UChar fTertiary = 0x310f;
 714   UChar sTertiary = 0x31b7;
 715
 716   UCollationResult oposite;
 717   if(result == UCOL_EQUAL) {
 718     return UCOL_IDENTICAL;
 719   } else if(result == UCOL_GREATER) {
 720     oposite = UCOL_LESS;
 721   } else {
 722     oposite = UCOL_GREATER;
 723   }
 724
 725   if(swampEarlier(func, collator, opts, sSecondary, fSecondary, s, sLen, t, tLen) == result) {
 726     return UCOL_PRIMARY;
 727   } else if((swampEarlier(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == result) &&
 728     (swampEarlier(func, collator, opts, 0x310f, sTertiary, s, sLen, t, tLen) == result)) {
 729     return UCOL_SECONDARY;
 730   } else if((swampLater(func, collator, opts, sTertiary, fTertiary, s, sLen, t, tLen) == result) &&
 731     (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == result)) {
 732     return UCOL_TERTIARY;
 733   } else if((swampLater(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == oposite) &&
 734     (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == oposite)) {
 735     return UCOL_QUATERNARY;
 736   } else {
 737     return UCOL_IDENTICAL;
 738   }
 739 }
 740
 741 static char *getRelationSymbol(UCollationResult res, uint32_t strength, char *buffer) {
 742   uint32_t i = 0;
 743
 744   if(res == UCOL_EQUAL || strength == 0xdeadbeef) {
 745     buffer[0] = '=';
 746     buffer[1] = '=';
 747     buffer[2] = '\0';
 748   } else if(res == UCOL_GREATER) {
 749     for(i = 0; i<strength+1; i++) {
 750       buffer[i] = '>';
 751     }
 752     buffer[strength+1] = '\0';
 753   } else {
 754     for(i = 0; i<strength+1; i++) {
 755       buffer[i] = '<';
 756     }
 757     buffer[strength+1] = '\0';
 758   }
 759
 760   return buffer;
 761 }
 762
 763
 764
 765 static void logFailure (const char *platform, const char *test,
 766                         const UChar *source, const uint32_t sLen,
 767                         const UChar *target, const uint32_t tLen,
 768                         UCollationResult realRes, uint32_t realStrength,
 769                         UCollationResult expRes, uint32_t expStrength, UBool error) {
 770
 771   uint32_t i = 0;
 772
 773   char sEsc[256], s[256], tEsc[256], t[256], b[256], output[512], relation[256];
 774   static int32_t maxOutputLength = 0;
 775   int32_t outputLength;
 776
 777   *sEsc = *tEsc = *s = *t = 0;
 778   if(error == TRUE) {
 779     log_err("Difference between expected and generated order. Run test with -v for more info\n");
 780   } else if(getTestOption(VERBOSITY_OPTION) == 0) {
 781     return;
 782   }
 783   for(i = 0; i<sLen; i++) {
 784     sprintf(b, "%04X", source[i]);
 785     strcat(sEsc, "\\u");
 786     strcat(sEsc, b);
 787     strcat(s, b);
 788     strcat(s, " ");
 789     if(source[i] < 0x80) {
 790       sprintf(b, "(%c)", source[i]);
 791       strcat(sEsc, b);
 792     }
 793   }
 794   for(i = 0; i<tLen; i++) {
 795     sprintf(b, "%04X", target[i]);
 796     strcat(tEsc, "\\u");
 797     strcat(tEsc, b);
 798     strcat(t, b);
 799     strcat(t, " ");
 800     if(target[i] < 0x80) {
 801       sprintf(b, "(%c)", target[i]);
 802       strcat(tEsc, b);
 803     }
 804   }
 805 /*
 806   strcpy(output, "[[ ");
 807   strcat(output, sEsc);
 808   strcat(output, getRelationSymbol(expRes, expStrength, relation));
 809   strcat(output, tEsc);
 810
 811   strcat(output, " : ");
 812
 813   strcat(output, sEsc);
 814   strcat(output, getRelationSymbol(realRes, realStrength, relation));
 815   strcat(output, tEsc);
 816   strcat(output, " ]] ");
 817
 818   log_verbose("%s", output);
 819 */
 820
 821
 822   strcpy(output, "DIFF: ");
 823
 824   strcat(output, s);
 825   strcat(output, " : ");
 826   strcat(output, t);
 827
 828   strcat(output, test);
 829   strcat(output, ": ");
 830
 831   strcat(output, sEsc);
 832   strcat(output, getRelationSymbol(expRes, expStrength, relation));
 833   strcat(output, tEsc);
 834
 835   strcat(output, " ");
 836
 837   strcat(output, platform);
 838   strcat(output, ": ");
 839
 840   strcat(output, sEsc);
 841   strcat(output, getRelationSymbol(realRes, realStrength, relation));
 842   strcat(output, tEsc);
 843
 844   outputLength = (int32_t)strlen(output);
 845   if(outputLength > maxOutputLength) {
 846     maxOutputLength = outputLength;
 847     U_ASSERT(outputLength < sizeof(output));
 848   }
 849
 850   log_verbose("%s\n", output);
 851
 852 }
 853
 854 /*
 855 static void printOutRules(const UChar *rules) {
 856   uint32_t len = u_strlen(rules);
 857   uint32_t i = 0;
 858   char toPrint;
 859   uint32_t line = 0;
 860
 861   fprintf(stdout, "Rules:");
 862
 863   for(i = 0; i<len; i++) {
 864     if(rules[i]<0x7f && rules[i]>=0x20) {
 865       toPrint = (char)rules[i];
 866       if(toPrint == '&') {
 867         line = 1;
 868         fprintf(stdout, "\n&");
 869       } else if(toPrint == ';') {
 870         fprintf(stdout, "<<");
 871         line+=2;
 872       } else if(toPrint == ',') {
 873         fprintf(stdout, "<<<");
 874         line+=3;
 875       } else {
 876         fprintf(stdout, "%c", toPrint);
 877         line++;
 878       }
 879     } else if(rules[i]<0x3400 || rules[i]>=0xa000) {
 880       fprintf(stdout, "\\u%04X", rules[i]);
 881       line+=6;
 882     }
 883     if(line>72) {
 884       fprintf(stdout, "\n");
 885       line = 0;
 886     }
 887   }
 888
 889   log_verbose("\n");
 890
 891 }
 892 */
 893
 894 static uint32_t testSwitch(tst_strcoll* func, void *collator, int opts, uint32_t strength, const UChar *first, const UChar *second, const char* msg, UBool error) {
 895   uint32_t diffs = 0;
 896   UCollationResult realResult;
 897   uint32_t realStrength;
 898
 899   uint32_t sLen = u_strlen(first);
 900   uint32_t tLen = u_strlen(second);
 901
 902   realResult = func(collator, opts, first, sLen, second, tLen);
 903   realStrength = probeStrength(func, collator, opts, first, sLen, second, tLen, realResult);
 904
 905   if(strength == UCOL_IDENTICAL && realResult != UCOL_EQUAL) {
 906     logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_EQUAL, strength, error);
 907     diffs++;
 908   } else if(realResult != UCOL_LESS || realStrength != strength) {
 909     logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_LESS, strength, error);
 910     diffs++;
 911   }
 912   return diffs;
 913 }
 914
 915
 916 static void testAgainstUCA(UCollator *coll, UCollator *UCA, const char *refName, UBool error, UErrorCode *status) {
 917   const UChar *rules = NULL, *current = NULL;
 918   int32_t ruleLen = 0;
 919   uint32_t strength = 0;
 920   uint32_t chOffset = 0; uint32_t chLen = 0;
 921   uint32_t exOffset = 0; uint32_t exLen = 0;
 922   uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
 923 /*  uint32_t rExpsLen = 0; */
 924   uint32_t firstLen = 0, secondLen = 0;
 925   UBool varT = FALSE; UBool top_ = TRUE;
 926   uint16_t specs = 0;
 927   UBool startOfRules = TRUE;
 928   UColTokenParser src;
 929   UColOptionSet opts;
 930
 931   UChar first[256];
 932   UChar second[256];
 933   UChar *rulesCopy = NULL;
 934
 935   uint32_t UCAdiff = 0;
 936   uint32_t Windiff = 1;
 937   UParseError parseError;
 938
 939   uprv_memset(&src, 0, sizeof(UColTokenParser));
 940   src.opts = &opts;
 941
 942   rules = ucol_getRules(coll, &ruleLen);
 943
 944   /*printOutRules(rules);*/
 945
 946   if(U_SUCCESS(*status) && ruleLen > 0) {
 947     rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
 948     uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
 949     src.current = src.source = rulesCopy;
 950     src.end = rulesCopy+ruleLen;
 951     src.extraCurrent = src.end;
 952     src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
 953     *first = *second = 0;
 954
 955     /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
 956        the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
 957     while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) {
 958       strength = src.parsedToken.strength;
 959       chOffset = src.parsedToken.charsOffset;
 960       chLen = src.parsedToken.charsLen;
 961       exOffset = src.parsedToken.extensionOffset;
 962       exLen = src.parsedToken.extensionLen;
 963       prefixOffset = src.parsedToken.prefixOffset;
 964       prefixLen = src.parsedToken.prefixLen;
 965       specs = src.parsedToken.flags;
 966
 967       startOfRules = FALSE;
 968       varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
 969       top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
 970
 971       u_strncpy(second,src.source+chOffset, chLen);
 972       second[chLen] = 0;
 973       secondLen = chLen;
 974
 975       if(exLen > 0) {
 976         u_strncat(first, src.source+exOffset, exLen);
 977         first[firstLen+exLen] = 0;
 978         firstLen += exLen;
 979       }
 980
 981       if(strength != UCOL_TOK_RESET) {
 982         if((*first<0x3400 || *first>=0xa000) && (*second<0x3400 || *second>=0xa000)) {
 983           UCAdiff += testSwitch(&ucaTest, (void *)UCA, 0, strength, first, second, refName, error);
 984           /*Windiff += testSwitch(&winTest, (void *)lcid, 0, strength, first, second, "Win32");*/
 985         }
 986       }
 987
 988
 989       firstLen = chLen;
 990       u_strcpy(first, second);
 991
 992     }
 993     if(UCAdiff != 0 && Windiff != 0) {
 994       log_verbose("\n");
 995     }
 996     if(UCAdiff == 0) {
 997       log_verbose("No immediate difference with %s!\n", refName);
 998     }
 999     if(Windiff == 0) {
1000       log_verbose("No immediate difference with Win32!\n");
1001     }
1002     uprv_free(src.source);
1003   }
1004 }
1005
1006 /*
1007  * Takes two CEs (lead and continuation) and
1008  * compares them as CEs should be compared:
1009  * primary vs. primary, secondary vs. secondary
1010  * tertiary vs. tertiary
1011  */
1012 static int32_t compareCEs(uint32_t s1, uint32_t s2,
1013                    uint32_t t1, uint32_t t2) {
1014   uint32_t s = 0, t = 0;
1015   if(s1 == t1 && s2 == t2) {
1016     return 0;
1017   }
1018   s = (s1 & 0xFFFF0000)|((s2 & 0xFFFF0000)>>16);
1019   t = (t1 & 0xFFFF0000)|((t2 & 0xFFFF0000)>>16);
1020   if(s < t) {
1021     return -1;
1022   } else if(s > t) {
1023     return 1;
1024   } else {
1025     s = (s1 & 0x0000FF00) | (s2 & 0x0000FF00)>>8;
1026     t = (t1 & 0x0000FF00) | (t2 & 0x0000FF00)>>8;
1027     if(s < t) {
1028       return -1;
1029     } else if(s > t) {
1030       return 1;
1031     } else {
1032       s = (s1 & 0x000000FF)<<8 | (s2 & 0x000000FF);
1033       t = (t1 & 0x000000FF)<<8 | (t2 & 0x000000FF);
1034       if(s < t) {
1035         return -1;
1036       } else {
1037         return 1;
1038       }
1039     }
1040   }
1041 }
1042
1043 typedef struct {
1044   uint32_t startCE;
1045   uint32_t startContCE;
1046   uint32_t limitCE;
1047   uint32_t limitContCE;
1048 } indirectBoundaries;
1049
1050 /* these values are used for finding CE values for indirect positioning. */
1051 /* Indirect positioning is a mechanism for allowing resets on symbolic   */
1052 /* values. It only works for resets and you cannot tailor indirect names */
1053 /* An indirect name can define either an anchor point or a range. An     */
1054 /* anchor point behaves in exactly the same way as a code point in reset */
1055 /* would, except that it cannot be tailored. A range (we currently only  */
1056 /* know for the [top] range will explicitly set the upper bound for      */
1057 /* generated CEs, thus allowing for better control over how many CEs can */
1058 /* be squeezed between in the range without performance penalty.         */
1059 /* In that respect, we use [top] for tailoring of locales that use CJK   */
1060 /* characters. Other indirect values are currently a pure convenience,   */
1061 /* they can be used to assure that the CEs will be always positioned in  */
1062 /* the same place relative to a point with known properties (e.g. first  */
1063 /* primary ignorable). */
1064 static indirectBoundaries ucolIndirectBoundaries[15];
1065 static UBool indirectBoundariesSet = FALSE;
1066 static void setIndirectBoundaries(uint32_t indexR, uint32_t *start, uint32_t *end) {
1067     /* Set values for the top - TODO: once we have values for all the indirects, we are going */
1068     /* to initalize here. */
1069     ucolIndirectBoundaries[indexR].startCE = start[0];
1070     ucolIndirectBoundaries[indexR].startContCE = start[1];
1071     if(end) {
1072         ucolIndirectBoundaries[indexR].limitCE = end[0];
1073         ucolIndirectBoundaries[indexR].limitContCE = end[1];
1074     } else {
1075         ucolIndirectBoundaries[indexR].limitCE = 0;
1076         ucolIndirectBoundaries[indexR].limitContCE = 0;
1077     }
1078 }
1079
1080 static void testCEs(UCollator *coll, UErrorCode *status) {
1081     const UChar *rules = NULL, *current = NULL;
1082     int32_t ruleLen = 0;
1083
1084     uint32_t strength = 0;
1085     uint32_t maxStrength = UCOL_IDENTICAL;
1086     uint32_t baseCE, baseContCE, nextCE, nextContCE, currCE, currContCE;
1087     uint32_t lastCE;
1088     uint32_t lastContCE;
1089
1090     int32_t result = 0;
1091     uint32_t chOffset = 0; uint32_t chLen = 0;
1092     uint32_t exOffset = 0; uint32_t exLen = 0;
1093     uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
1094     uint32_t oldOffset = 0;
1095
1096     /* uint32_t rExpsLen = 0; */
1097     /* uint32_t firstLen = 0; */
1098     uint16_t specs = 0;
1099     UBool varT = FALSE; UBool top_ = TRUE;
1100     UBool startOfRules = TRUE;
1101     UBool before = FALSE;
1102     UColTokenParser src;
1103     UColOptionSet opts;
1104     UParseError parseError;
1105     UChar *rulesCopy = NULL;
1106     collIterate *c = uprv_new_collIterate(status);
1107     UCAConstants *consts = NULL;
1108     uint32_t UCOL_RESET_TOP_VALUE, /*UCOL_RESET_TOP_CONT, */
1109         UCOL_NEXT_TOP_VALUE, UCOL_NEXT_TOP_CONT;
1110     const char *colLoc;
1111     UCollator *UCA = ucol_open("root", status);
1112
1113     if (U_FAILURE(*status)) {
1114         log_err("Could not open root collator %s\n", u_errorName(*status));
1115         uprv_delete_collIterate(c);
1116         return;
1117     }
1118
1119     colLoc = ucol_getLocaleByType(coll, ULOC_ACTUAL_LOCALE, status);
1120     if (U_FAILURE(*status)) {
1121         log_err("Could not get collator name: %s\n", u_errorName(*status));
1122         ucol_close(UCA);
1123         uprv_delete_collIterate(c);
1124         return;
1125     }
1126
1127     uprv_memset(&src, 0, sizeof(UColTokenParser));
1128
1129     consts = (UCAConstants *)((uint8_t *)UCA->image + UCA->image->UCAConsts);
1130     UCOL_RESET_TOP_VALUE = consts->UCA_LAST_NON_VARIABLE[0];
1131     /*UCOL_RESET_TOP_CONT = consts->UCA_LAST_NON_VARIABLE[1]; */
1132     UCOL_NEXT_TOP_VALUE = consts->UCA_FIRST_IMPLICIT[0];
1133     UCOL_NEXT_TOP_CONT = consts->UCA_FIRST_IMPLICIT[1];
1134
1135     baseCE=baseContCE=nextCE=nextContCE=currCE=currContCE=lastCE=lastContCE = UCOL_NOT_FOUND;
1136
1137     src.opts = &opts;
1138
1139     rules = ucol_getRules(coll, &ruleLen);
1140
1141     src.invUCA = ucol_initInverseUCA(status);
1142
1143     if(indirectBoundariesSet == FALSE) {
1144         /* UCOL_RESET_TOP_VALUE */
1145         setIndirectBoundaries(0, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
1146         /* UCOL_FIRST_PRIMARY_IGNORABLE */
1147         setIndirectBoundaries(1, consts->UCA_FIRST_PRIMARY_IGNORABLE, 0);
1148         /* UCOL_LAST_PRIMARY_IGNORABLE */
1149         setIndirectBoundaries(2, consts->UCA_LAST_PRIMARY_IGNORABLE, 0);
1150         /* UCOL_FIRST_SECONDARY_IGNORABLE */
1151         setIndirectBoundaries(3, consts->UCA_FIRST_SECONDARY_IGNORABLE, 0);
1152         /* UCOL_LAST_SECONDARY_IGNORABLE */
1153         setIndirectBoundaries(4, consts->UCA_LAST_SECONDARY_IGNORABLE, 0);
1154         /* UCOL_FIRST_TERTIARY_IGNORABLE */
1155         setIndirectBoundaries(5, consts->UCA_FIRST_TERTIARY_IGNORABLE, 0);
1156         /* UCOL_LAST_TERTIARY_IGNORABLE */
1157         setIndirectBoundaries(6, consts->UCA_LAST_TERTIARY_IGNORABLE, 0);
1158         /* UCOL_FIRST_VARIABLE */
1159         setIndirectBoundaries(7, consts->UCA_FIRST_VARIABLE, 0);
1160         /* UCOL_LAST_VARIABLE */
1161         setIndirectBoundaries(8, consts->UCA_LAST_VARIABLE, 0);
1162         /* UCOL_FIRST_NON_VARIABLE */
1163         setIndirectBoundaries(9, consts->UCA_FIRST_NON_VARIABLE, 0);
1164         /* UCOL_LAST_NON_VARIABLE */
1165         setIndirectBoundaries(10, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
1166         /* UCOL_FIRST_IMPLICIT */
1167         setIndirectBoundaries(11, consts->UCA_FIRST_IMPLICIT, 0);
1168         /* UCOL_LAST_IMPLICIT */
1169         setIndirectBoundaries(12, consts->UCA_LAST_IMPLICIT, consts->UCA_FIRST_TRAILING);
1170         /* UCOL_FIRST_TRAILING */
1171         setIndirectBoundaries(13, consts->UCA_FIRST_TRAILING, 0);
1172         /* UCOL_LAST_TRAILING */
1173         setIndirectBoundaries(14, consts->UCA_LAST_TRAILING, 0);
1174         ucolIndirectBoundaries[14].limitCE = (consts->UCA_PRIMARY_SPECIAL_MIN<<24);
1175         indirectBoundariesSet = TRUE;
1176     }
1177
1178
1179     if(U_SUCCESS(*status) && ruleLen > 0) {
1180         rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
1181         uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
1182         src.current = src.source = rulesCopy;
1183         src.end = rulesCopy+ruleLen;
1184         src.extraCurrent = src.end;
1185         src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
1186
1187             /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
1188                the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
1189         while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) {
1190             strength = src.parsedToken.strength;
1191             chOffset = src.parsedToken.charsOffset;
1192             chLen = src.parsedToken.charsLen;
1193             exOffset = src.parsedToken.extensionOffset;
1194             exLen = src.parsedToken.extensionLen;
1195             prefixOffset = src.parsedToken.prefixOffset;
1196             prefixLen = src.parsedToken.prefixLen;
1197             specs = src.parsedToken.flags;
1198
1199             startOfRules = FALSE;
1200             varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
1201             top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
1202
1203             uprv_init_collIterate(coll, src.source+chOffset, chLen, c, status);
1204
1205             currCE = ucol_getNextCE(coll, c, status);
1206             if(currCE == 0 && UCOL_ISTHAIPREVOWEL(*(src.source+chOffset))) {
1207                 log_verbose("Thai prevowel detected. Will pick next CE\n");
1208                 currCE = ucol_getNextCE(coll, c, status);
1209             }
1210
1211             currContCE = ucol_getNextCE(coll, c, status);
1212             if(!isContinuation(currContCE)) {
1213                 currContCE = 0;
1214             }
1215
1216             /* we need to repack CEs here */
1217
1218             if(strength == UCOL_TOK_RESET) {
1219                 before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);
1220                 if(top_ == TRUE) {
1221                     int32_t tokenIndex = src.parsedToken.indirectIndex;
1222
1223                     nextCE = baseCE = currCE = ucolIndirectBoundaries[tokenIndex].startCE;
1224                     nextContCE = baseContCE = currContCE = ucolIndirectBoundaries[tokenIndex].startContCE;
1225                 } else {
1226                     nextCE = baseCE = currCE;
1227                     nextContCE = baseContCE = currContCE;
1228                 }
1229                 maxStrength = UCOL_IDENTICAL;
1230             } else {
1231                 if(strength < maxStrength) {
1232                     maxStrength = strength;
1233                     if(baseCE == UCOL_RESET_TOP_VALUE) {
1234                         log_verbose("Resetting to [top]\n");
1235                         nextCE = UCOL_NEXT_TOP_VALUE;
1236                         nextContCE = UCOL_NEXT_TOP_CONT;
1237                     } else {
1238                         result = ucol_inv_getNextCE(&src, baseCE & 0xFFFFFF3F, baseContCE, &nextCE, &nextContCE, maxStrength);
1239                     }
1240                     if(result < 0) {
1241                         if(ucol_isTailored(coll, *(src.source+oldOffset), status)) {
1242                             log_verbose("Reset is tailored codepoint %04X, don't know how to continue, taking next test\n", *(src.source+oldOffset));
1243                             return;
1244                         } else {
1245                             log_err("%s: couldn't find the CE\n", colLoc);
1246                             return;
1247                         }
1248                     }
1249                 }
1250
1251                 currCE &= 0xFFFFFF3F;
1252                 currContCE &= 0xFFFFFFBF;
1253
1254                 if(maxStrength == UCOL_IDENTICAL) {
1255                     if(baseCE != currCE || baseContCE != currContCE) {
1256                         log_err("%s: current CE  (initial strength UCOL_EQUAL)\n", colLoc);
1257                     }
1258                 } else {
1259                     if(strength == UCOL_IDENTICAL) {
1260                         if(lastCE != currCE || lastContCE != currContCE) {
1261                             log_err("%s: current CE  (initial strength UCOL_EQUAL)\n", colLoc);
1262                         }
1263                     } else {
1264                         if(compareCEs(currCE, currContCE, nextCE, nextContCE) > 0) {
1265                             /*if(currCE > nextCE || (currCE == nextCE && currContCE >= nextContCE)) {*/
1266                             log_err("%s: current CE is not less than base CE\n", colLoc);
1267                         }
1268                         if(!before) {
1269                             if(compareCEs(currCE, currContCE, lastCE, lastContCE) < 0) {
1270                                 /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
1271                                 log_err("%s: sequence of generated CEs is broken\n", colLoc);
1272                             }
1273                         } else {
1274                             before = FALSE;
1275                             if(compareCEs(currCE, currContCE, lastCE, lastContCE) > 0) {
1276                                 /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
1277                                 log_err("%s: sequence of generated CEs is broken\n", colLoc);
1278                             }
1279                         }
1280                     }
1281                 }
1282
1283             }
1284
1285             oldOffset = chOffset;
1286             lastCE = currCE & 0xFFFFFF3F;
1287             lastContCE = currContCE & 0xFFFFFFBF;
1288         }
1289         uprv_free(src.source);
1290     }
1291     ucol_close(UCA);
1292     uprv_delete_collIterate(c);
1293 }
1294
1295 #if 0
1296 /* these locales are now picked from index RB */
1297 static const char* localesToTest[] = {
1298 "ar", "bg", "ca", "cs", "da",
1299 "el", "en_BE", "en_US_POSIX",
1300 "es", "et", "fi", "fr", "hi",
1301 "hr", "hu", "is", "iw", "ja",
1302 "ko", "lt", "lv", "mk", "mt",
1303 "nb", "nn", "nn_NO", "pl", "ro",
1304 "ru", "sh", "sk", "sl", "sq",
1305 "sr", "sv", "th", "tr", "uk",
1306 "vi", "zh", "zh_TW"
1307 };
1308 #endif
1309
1310 static const char* rulesToTest[] = {
1311   /* Funky fa rule */
1312   "&\\u0622 < \\u0627 << \\u0671 < \\u0621",
1313   /*"& Z < p, P",*/
1314     /* Cui Mins rules */
1315     "&[top]<o,O<p,P<q,Q<'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu<'?'",*/
1316     "&[top]<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
1317     "&[top]<o,O<p,P<q,Q,'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U&'Qu','?'",*/
1318     "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/u<r,R<u,U",  /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
1319     "&[top]<'?';Qu<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U",  /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qu",*/
1320     "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/um<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qum;'?'",*/
1321     "&[top]<'?';Qum<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U"  /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qum"*/
1322 };
1323
1324
1325 static void TestCollations(void) {
1326     int32_t noOfLoc = uloc_countAvailable();
1327     int32_t i = 0, j = 0;
1328
1329     UErrorCode status = U_ZERO_ERROR;
1330     char cName[256];
1331     UChar name[256];
1332     int32_t nameSize;
1333
1334
1335     const char *locName = NULL;
1336     UCollator *coll = NULL;
1337     UCollator *UCA = ucol_open("", &status);
1338     UColAttributeValue oldStrength = ucol_getAttribute(UCA, UCOL_STRENGTH, &status);
1339     if (U_FAILURE(status)) {
1340         log_err_status(status, "Could not open UCA collator %s\n", u_errorName(status));
1341         return;
1342     }
1343     ucol_setAttribute(UCA, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
1344
1345     for(i = 0; i<noOfLoc; i++) {
1346         status = U_ZERO_ERROR;
1347         locName = uloc_getAvailable(i);
1348         if(uprv_strcmp("ja", locName) == 0) {
1349             log_verbose("Don't know how to test prefixes\n");
1350             continue;
1351         }
1352         if(hasCollationElements(locName)) {
1353             nameSize = uloc_getDisplayName(locName, NULL, name, 256, &status);
1354             for(j = 0; j<nameSize; j++) {
1355                 cName[j] = (char)name[j];
1356             }
1357             cName[nameSize] = 0;
1358             log_verbose("\nTesting locale %s (%s)\n", locName, cName);
1359             coll = ucol_open(locName, &status);
1360             if(U_SUCCESS(status)) {
1361                 testAgainstUCA(coll, UCA, "UCA", FALSE, &status);
1362                 ucol_close(coll);
1363             } else {
1364                 log_err("Couldn't instantiate collator for locale %s, error: %s\n", locName, u_errorName(status));
1365                 status = U_ZERO_ERROR;
1366             }
1367         }
1368     }
1369     ucol_setAttribute(UCA, UCOL_STRENGTH, oldStrength, &status);
1370     ucol_close(UCA);
1371 }
1372
1373 static void RamsRulesTest(void) {
1374     UErrorCode status = U_ZERO_ERROR;
1375     int32_t i = 0;
1376     UCollator *coll = NULL;
1377     UChar rule[2048];
1378     uint32_t ruleLen;
1379     int32_t noOfLoc = uloc_countAvailable();
1380     const char *locName = NULL;
1381
1382     log_verbose("RamsRulesTest\n");
1383
1384     if (uprv_strcmp("km", uloc_getDefault())==0 || uprv_strcmp("km_KH", uloc_getDefault())==0) {
1385         /* This test will fail if the default locale is "km" or "km_KH". Enable after trac#6040. */
1386         return;
1387     }
1388
1389     for(i = 0; i<noOfLoc; i++) {
1390         locName = uloc_getAvailable(i);
1391         if(hasCollationElements(locName)) {
1392             if (uprv_strcmp("ja", locName)==0) {
1393                 log_verbose("Don't know how to test Japanese because of prefixes\n");
1394                 continue;
1395             }
1396             if (uprv_strcmp("de__PHONEBOOK", locName)==0) {
1397                 log_verbose("Don't know how to test Phonebook because the reset is on an expanding character\n");
1398                 continue;
1399             }
1400             if (uprv_strcmp("bn", locName)==0 ||
1401                 uprv_strcmp("en_US_POSIX", locName)==0 ||
1402                 uprv_strcmp("km", locName)==0 ||
1403                 uprv_strcmp("km_KH", locName)==0 ||
1404                 uprv_strcmp("my", locName)==0 ||
1405                 uprv_strcmp("si", locName)==0 ||
1406                 uprv_strcmp("si_LK", locName)==0 ||
1407                 uprv_strcmp("zh", locName)==0 ||
1408                 uprv_strcmp("zh_Hant", locName)==0
1409             ) {
1410                 log_verbose("Don't know how to test %s. "
1411                             "TODO: Fix ticket #6040 and reenable RamsRulesTest for this locale.\n", locName);
1412                 continue;
1413             }
1414             log_verbose("Testing locale %s\n", locName);
1415             status = U_ZERO_ERROR;
1416             coll = ucol_open(locName, &status);
1417             if(U_SUCCESS(status)) {
1418               if((status != U_USING_DEFAULT_WARNING) && (status != U_USING_FALLBACK_WARNING)) {
1419                 if(coll->image->jamoSpecial == TRUE) {
1420                   log_err("%s has special JAMOs\n", locName);
1421                 }
1422                 ucol_setAttribute(coll, UCOL_CASE_FIRST, UCOL_OFF, &status);
1423                 testCollator(coll, &status);
1424                 testCEs(coll, &status);
1425               } else {
1426                 log_verbose("Skipping %s: %s\n", locName, u_errorName(status));
1427               }
1428               ucol_close(coll);
1429             } else {
1430               log_err("Could not open %s: %s\n", locName, u_errorName(status));
1431             }
1432         }
1433     }
1434
1435     for(i = 0; i<sizeof(rulesToTest)/sizeof(rulesToTest[0]); i++) {
1436         log_verbose("Testing rule: %s\n", rulesToTest[i]);
1437         ruleLen = u_unescape(rulesToTest[i], rule, 2048);
1438         status = U_ZERO_ERROR;
1439         coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
1440         if(U_SUCCESS(status)) {
1441             testCollator(coll, &status);
1442             testCEs(coll, &status);
1443             ucol_close(coll);
1444         } else {
1445           log_err_status(status, "Could not test rule: %s: '%s'\n", u_errorName(status), rulesToTest[i]);
1446         }
1447     }
1448
1449 }
1450
1451 static void IsTailoredTest(void) {
1452     UErrorCode status = U_ZERO_ERROR;
1453     uint32_t i = 0;
1454     UCollator *coll = NULL;
1455     UChar rule[2048];
1456     UChar tailored[2048];
1457     UChar notTailored[2048];
1458     uint32_t ruleLen, tailoredLen, notTailoredLen;
1459
1460     log_verbose("IsTailoredTest\n");
1461
1462     u_uastrcpy(rule, "&Z < A, B, C;c < d");
1463     ruleLen = u_strlen(rule);
1464
1465     u_uastrcpy(tailored, "ABCcd");
1466     tailoredLen = u_strlen(tailored);
1467
1468     u_uastrcpy(notTailored, "ZabD");
1469     notTailoredLen = u_strlen(notTailored);
1470
1471     coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
1472     if(U_SUCCESS(status)) {
1473         for(i = 0; i<tailoredLen; i++) {
1474             if(!ucol_isTailored(coll, tailored[i], &status)) {
1475                 log_err("%i: %04X should be tailored - it is reported as not\n", i, tailored[i]);
1476             }
1477         }
1478         for(i = 0; i<notTailoredLen; i++) {
1479             if(ucol_isTailored(coll, notTailored[i], &status)) {
1480                 log_err("%i: %04X should not be tailored - it is reported as it is\n", i, notTailored[i]);
1481             }
1482         }
1483         ucol_close(coll);
1484     }
1485     else {
1486         log_err_status(status, "Can't tailor rules\n");
1487     }
1488     /* Code coverage */
1489     status = U_ZERO_ERROR;
1490     coll = ucol_open("ja", &status);
1491     if(!ucol_isTailored(coll, 0x4E9C, &status)) {
1492         log_err_status(status, "0x4E9C should be tailored - it is reported as not\n");
1493     }
1494     ucol_close(coll);
1495 }
1496
1497
1498 const static char chTest[][20] = {
1499   "c",
1500   "C",
1501   "ca", "cb", "cx", "cy", "CZ",
1502   "c\\u030C", "C\\u030C",
1503   "h",
1504   "H",
1505   "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY",
1506   "ch", "cH", "Ch", "CH",
1507   "cha", "charly", "che", "chh", "chch", "chr",
1508   "i", "I", "iarly",
1509   "r", "R",
1510   "r\\u030C", "R\\u030C",
1511   "s",
1512   "S",
1513   "s\\u030C", "S\\u030C",
1514   "z", "Z",
1515   "z\\u030C", "Z\\u030C"
1516 };
1517
1518 static void TestChMove(void) {
1519     UChar t1[256] = {0};
1520     UChar t2[256] = {0};
1521
1522     uint32_t i = 0, j = 0;
1523     uint32_t size = 0;
1524     UErrorCode status = U_ZERO_ERROR;
1525
1526     UCollator *coll = ucol_open("cs", &status);
1527
1528     if(U_SUCCESS(status)) {
1529         size = sizeof(chTest)/sizeof(chTest[0]);
1530         for(i = 0; i < size-1; i++) {
1531             for(j = i+1; j < size; j++) {
1532                 u_unescape(chTest[i], t1, 256);
1533                 u_unescape(chTest[j], t2, 256);
1534                 doTest(coll, t1, t2, UCOL_LESS);
1535             }
1536         }
1537     }
1538     else {
1539         log_data_err("Can't open collator");
1540     }
1541     ucol_close(coll);
1542 }
1543
1544
1545
1546
1547 const static char impTest[][20] = {
1548   "\\u4e00",
1549     "a",
1550     "A",
1551     "b",
1552     "B",
1553     "\\u4e01"
1554 };
1555
1556
1557 static void TestImplicitTailoring(void) {
1558   static const struct {
1559     const char *rules;
1560     const char *data[10];
1561     const uint32_t len;
1562   } tests[] = {
1563       { "&[before 1]\\u4e00 < b < c &[before 1]\\u4e00 < d < e", { "d", "e", "b", "c", "\\u4e00"}, 5 },
1564       { "&\\u4e00 < a <<< A < b <<< B",   { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 },
1565       { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3},
1566       { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3}
1567   };
1568
1569   int32_t i = 0;
1570
1571   for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
1572       genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
1573   }
1574
1575 /*
1576   UChar t1[256] = {0};
1577   UChar t2[256] = {0};
1578
1579   const char *rule = "&\\u4e00 < a <<< A < b <<< B";
1580
1581   uint32_t i = 0, j = 0;
1582   uint32_t size = 0;
1583   uint32_t ruleLen = 0;
1584   UErrorCode status = U_ZERO_ERROR;
1585   UCollator *coll = NULL;
1586   ruleLen = u_unescape(rule, t1, 256);
1587
1588   coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
1589
1590   if(U_SUCCESS(status)) {
1591     size = sizeof(impTest)/sizeof(impTest[0]);
1592     for(i = 0; i < size-1; i++) {
1593       for(j = i+1; j < size; j++) {
1594         u_unescape(impTest[i], t1, 256);
1595         u_unescape(impTest[j], t2, 256);
1596         doTest(coll, t1, t2, UCOL_LESS);
1597       }
1598     }
1599   }
1600   else {
1601     log_err("Can't open collator");
1602   }
1603   ucol_close(coll);
1604   */
1605 }
1606
1607 static void TestFCDProblem(void) {
1608   UChar t1[256] = {0};
1609   UChar t2[256] = {0};
1610
1611   const char *s1 = "\\u0430\\u0306\\u0325";
1612   const char *s2 = "\\u04D1\\u0325";
1613
1614   UErrorCode status = U_ZERO_ERROR;
1615   UCollator *coll = ucol_open("", &status);
1616   u_unescape(s1, t1, 256);
1617   u_unescape(s2, t2, 256);
1618
1619   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
1620   doTest(coll, t1, t2, UCOL_EQUAL);
1621
1622   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
1623   doTest(coll, t1, t2, UCOL_EQUAL);
1624
1625   ucol_close(coll);
1626 }
1627
1628 /*
1629 The largest normalization form is 18 for NFKC/NFKD, 4 for NFD and 3 for NFC
1630 We're only using NFC/NFD in this test.
1631 */
1632 #define NORM_BUFFER_TEST_LEN 18
1633 typedef struct {
1634   UChar32 u;
1635   UChar NFC[NORM_BUFFER_TEST_LEN];
1636   UChar NFD[NORM_BUFFER_TEST_LEN];
1637 } tester;
1638
1639 static void TestComposeDecompose(void) {
1640     /* [[:NFD_Inert=false:][:NFC_Inert=false:]] */
1641     static const UChar UNICODESET_STR[] = {
1642         0x5B,0x5B,0x3A,0x4E,0x46,0x44,0x5F,0x49,0x6E,0x65,0x72,0x74,0x3D,0x66,0x61,
1643         0x6C,0x73,0x65,0x3A,0x5D,0x5B,0x3A,0x4E,0x46,0x43,0x5F,0x49,0x6E,0x65,0x72,
1644         0x74,0x3D,0x66,0x61,0x6C,0x73,0x65,0x3A,0x5D,0x5D,0
1645     };
1646     int32_t noOfLoc;
1647     int32_t i = 0, j = 0;
1648
1649     UErrorCode status = U_ZERO_ERROR;
1650     const char *locName = NULL;
1651     uint32_t nfcSize;
1652     uint32_t nfdSize;
1653     tester **t;
1654     uint32_t noCases = 0;
1655     UCollator *coll = NULL;
1656     UChar32 u = 0;
1657     UChar comp[NORM_BUFFER_TEST_LEN];
1658     uint32_t len = 0;
1659     UCollationElements *iter;
1660     USet *charsToTest = uset_openPattern(UNICODESET_STR, -1, &status);
1661     int32_t charsToTestSize;
1662
1663     noOfLoc = uloc_countAvailable();
1664
1665     coll = ucol_open("", &status);
1666     if (U_FAILURE(status)) {
1667         log_data_err("Error opening collator -> %s (Are you missing data?)\n", u_errorName(status));
1668         return;
1669     }
1670     charsToTestSize = uset_size(charsToTest);
1671     if (charsToTestSize <= 0) {
1672         log_err("Set was zero. Missing data?\n");
1673         return;
1674     }
1675     t = (tester **)malloc(charsToTestSize * sizeof(tester *));
1676     t[0] = (tester *)malloc(sizeof(tester));
1677     log_verbose("Testing UCA extensively for %d characters\n", charsToTestSize);
1678
1679     for(u = 0; u < charsToTestSize; u++) {
1680         UChar32 ch = uset_charAt(charsToTest, u);
1681         len = 0;
1682         U16_APPEND_UNSAFE(comp, len, ch);
1683         nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
1684         nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
1685
1686         if(nfcSize != nfdSize || (uprv_memcmp(t[noCases]->NFC, t[noCases]->NFD, nfcSize * sizeof(UChar)) != 0)
1687           || (len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0))) {
1688             t[noCases]->u = ch;
1689             if(len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0)) {
1690                 u_strncpy(t[noCases]->NFC, comp, len);
1691                 t[noCases]->NFC[len] = 0;
1692             }
1693             noCases++;
1694             t[noCases] = (tester *)malloc(sizeof(tester));
1695             uprv_memset(t[noCases], 0, sizeof(tester));
1696         }
1697     }
1698     log_verbose("Testing %d/%d of possible test cases\n", noCases, charsToTestSize);
1699     uset_close(charsToTest);
1700     charsToTest = NULL;
1701
1702     for(u=0; u<(UChar32)noCases; u++) {
1703         if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
1704             log_err("Failure: codePoint %05X fails TestComposeDecompose in the UCA\n", t[u]->u);
1705             doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
1706         }
1707     }
1708     /*
1709     for(u = 0; u < charsToTestSize; u++) {
1710       if(!(u&0xFFFF)) {
1711         log_verbose("%08X ", u);
1712       }
1713       uprv_memset(t[noCases], 0, sizeof(tester));
1714       t[noCases]->u = u;
1715       len = 0;
1716       U16_APPEND_UNSAFE(comp, len, u);
1717       comp[len] = 0;
1718       nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
1719       nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
1720       doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL);
1721       doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL);
1722     }
1723     */
1724
1725     ucol_close(coll);
1726
1727     log_verbose("Testing locales, number of cases = %i\n", noCases);
1728     for(i = 0; i<noOfLoc; i++) {
1729         status = U_ZERO_ERROR;
1730         locName = uloc_getAvailable(i);
1731         if(hasCollationElements(locName)) {
1732             char cName[256];
1733             UChar name[256];
1734             int32_t nameSize = uloc_getDisplayName(locName, NULL, name, sizeof(cName), &status);
1735
1736             for(j = 0; j<nameSize; j++) {
1737                 cName[j] = (char)name[j];
1738             }
1739             cName[nameSize] = 0;
1740             log_verbose("\nTesting locale %s (%s)\n", locName, cName);
1741
1742             coll = ucol_open(locName, &status);
1743             ucol_setStrength(coll, UCOL_IDENTICAL);
1744             iter = ucol_openElements(coll, t[u]->NFD, u_strlen(t[u]->NFD), &status);
1745
1746             for(u=0; u<(UChar32)noCases; u++) {
1747                 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
1748                     log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t[u]->u, cName);
1749                     doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
1750                     log_verbose("Testing NFC\n");
1751                     ucol_setText(iter, t[u]->NFC, u_strlen(t[u]->NFC), &status);
1752                     backAndForth(iter);
1753                     log_verbose("Testing NFD\n");
1754                     ucol_setText(iter, t[u]->NFD, u_strlen(t[u]->NFD), &status);
1755                     backAndForth(iter);
1756                 }
1757             }
1758             ucol_closeElements(iter);
1759             ucol_close(coll);
1760         }
1761     }
1762     for(u = 0; u <= (UChar32)noCases; u++) {
1763         free(t[u]);
1764     }
1765     free(t);
1766 }
1767
1768 static void TestEmptyRule(void) {
1769   UErrorCode status = U_ZERO_ERROR;
1770   UChar rulez[] = { 0 };
1771   UCollator *coll = ucol_openRules(rulez, 0, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
1772
1773   ucol_close(coll);
1774 }
1775
1776 static void TestUCARules(void) {
1777   UErrorCode status = U_ZERO_ERROR;
1778   UChar b[256];
1779   UChar *rules = b;
1780   uint32_t ruleLen = 0;
1781   UCollator *UCAfromRules = NULL;
1782   UCollator *coll = ucol_open("", &status);
1783   if(status == U_FILE_ACCESS_ERROR) {
1784     log_data_err("Is your data around?\n");
1785     return;
1786   } else if(U_FAILURE(status)) {
1787     log_err("Error opening collator\n");
1788     return;
1789   }
1790   ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, 256);
1791
1792   log_verbose("TestUCARules\n");
1793   if(ruleLen > 256) {
1794     rules = (UChar *)malloc((ruleLen+1)*sizeof(UChar));
1795     ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, ruleLen);
1796   }
1797   log_verbose("Rules length is %d\n", ruleLen);
1798   UCAfromRules = ucol_openRules(rules, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
1799   if(U_SUCCESS(status)) {
1800     ucol_close(UCAfromRules);
1801   } else {
1802     log_verbose("Unable to create a collator from UCARules!\n");
1803   }
1804 /*
1805   u_unescape(blah, b, 256);
1806   ucol_getSortKey(coll, b, 1, res, 256);
1807 */
1808   ucol_close(coll);
1809   if(rules != b) {
1810     free(rules);
1811   }
1812 }
1813
1814
1815 /* Pinyin tonal order */
1816 /*
1817     A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0)
1818           (w/macron)<  (w/acute)<   (w/caron)<   (w/grave)
1819     E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8)
1820     I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec)
1821     O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2)
1822     U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9)
1823       < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) <
1824 .. (\u00fc)
1825
1826 However, in testing we got the following order:
1827     A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101)
1828           (w/acute)<   (w/grave)<   (w/caron)<   (w/macron)
1829     E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) <
1830 .. (\u0113)
1831     I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b)
1832     O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d)
1833     U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) <
1834 .. (\u01d8)
1835       < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b)
1836 */
1837
1838 static void TestBefore(void) {
1839   const static char *data[] = {
1840       "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A",
1841       "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E",
1842       "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I",
1843       "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O",
1844       "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U",
1845       "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc"
1846   };
1847   genericRulesStarter(
1848     "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0"
1849     "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8"
1850     "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec"
1851     "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2"
1852     "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9"
1853     "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc",
1854     data, sizeof(data)/sizeof(data[0]));
1855 }
1856
1857 #if 0
1858 /* superceded by TestBeforePinyin */
1859 static void TestJ784(void) {
1860   const static char *data[] = {
1861       "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0",
1862       "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8",
1863       "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec",
1864       "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2",
1865       "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9",
1866       "\\u00fc",
1867            "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc"
1868   };
1869   genericLocaleStarter("zh", data, sizeof(data)/sizeof(data[0]));
1870 }
1871 #endif
1872
1873 #if 0
1874 /* superceded by the changes to the lv locale */
1875 static void TestJ831(void) {
1876   const static char *data[] = {
1877     "I",
1878       "i",
1879       "Y",
1880       "y"
1881   };
1882   genericLocaleStarter("lv", data, sizeof(data)/sizeof(data[0]));
1883 }
1884 #endif
1885
1886 static void TestJ815(void) {
1887   const static char *data[] = {
1888     "aa",
1889       "Aa",
1890       "ab",
1891       "Ab",
1892       "ad",
1893       "Ad",
1894       "ae",
1895       "Ae",
1896       "\\u00e6",
1897       "\\u00c6",
1898       "af",
1899       "Af",
1900       "b",
1901       "B"
1902   };
1903   genericLocaleStarter("fr", data, sizeof(data)/sizeof(data[0]));
1904   genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, sizeof(data)/sizeof(data[0]));
1905 }
1906
1907
1908 /*
1909 "& a < b < c < d& r < c",                                   "& a < b < d& r < c",
1910 "& a < b < c < d& c < m",                                   "& a < b < c < m < d",
1911 "& a < b < c < d& a < m",                                   "& a < m < b < c < d",
1912 "& a <<< b << c < d& a < m",                                "& a <<< b << c < m < d",
1913 "& a < b < c < d& [before 1] c < m",                        "& a < b < m < c < d",
1914 "& a < b <<< c << d <<< e& [before 3] e <<< x",            "& a < b <<< c << d <<< x <<< e",
1915 "& a < b <<< c << d <<< e& [before 2] e <<< x",            "& a < b <<< c <<< x << d <<< e",
1916 "& a < b <<< c << d <<< e& [before 1] e <<< x",            "& a <<< x < b <<< c << d <<< e",
1917 "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x",    "& a < b <<< c << d <<< e <<< f < x < g",
1918 */
1919 static void TestRedundantRules(void) {
1920   int32_t i;
1921
1922   static const struct {
1923       const char *rules;
1924       const char *expectedRules;
1925       const char *testdata[8];
1926       uint32_t testdatalen;
1927   } tests[] = {
1928     /* this test conflicts with positioning of CODAN placeholder */
1929        /*{
1930         "& a <<< b <<< c << d <<< e& [before 1] e <<< x",
1931         "&\\u2089<<<x",
1932         {"\\u2089", "x"}, 2
1933        }, */
1934     /* this test conflicts with the [before x] syntax tightening */
1935       /*{
1936         "& b <<< c <<< d << e <<< f& [before 1] f <<< x",
1937         "&\\u0252<<<x",
1938         {"\\u0252", "x"}, 2
1939       }, */
1940     /* this test conflicts with the [before x] syntax tightening */
1941       /*{
1942          "& a < b <<< c << d <<< e& [before 1] e <<< x",
1943          "& a <<< x < b <<< c << d <<< e",
1944         {"a", "x", "b", "c", "d", "e"}, 6
1945       }, */
1946       {
1947         "& a < b < c < d& [before 1] c < m",
1948         "& a < b < m < c < d",
1949         {"a", "b", "m", "c", "d"}, 5
1950       },
1951       {
1952         "& a < b <<< c << d <<< e& [before 3] e <<< x",
1953         "& a < b <<< c << d <<< x <<< e",
1954         {"a", "b", "c", "d", "x", "e"}, 6
1955       },
1956     /* this test conflicts with the [before x] syntax tightening */
1957       /* {
1958         "& a < b <<< c << d <<< e& [before 2] e <<< x",
1959         "& a < b <<< c <<< x << d <<< e",
1960         {"a", "b", "c", "x", "d", "e"},, 6
1961       }, */
1962       {
1963         "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x",
1964         "& a < b <<< c << d <<< e <<< f < x < g",
1965         {"a", "b", "c", "d", "e", "f", "x", "g"}, 8
1966       },
1967       {
1968         "& a <<< b << c < d& a < m",
1969         "& a <<< b << c < m < d",
1970         {"a", "b", "c", "m", "d"}, 5
1971       },
1972       {
1973         "&a<b<<b\\u0301 &z<b",
1974         "&a<b\\u0301 &z<b",
1975         {"a", "b\\u0301", "z", "b"}, 4
1976       },
1977       {
1978         "&z<m<<<q<<<m",
1979         "&z<q<<<m",
1980         {"z", "q", "m"},3
1981       },
1982       {
1983         "&z<<<m<q<<<m",
1984         "&z<q<<<m",
1985         {"z", "q", "m"}, 3
1986       },
1987       {
1988         "& a < b < c < d& r < c",
1989         "& a < b < d& r < c",
1990         {"a", "b", "d"}, 3
1991       },
1992       {
1993         "& a < b < c < d& r < c",
1994         "& a < b < d& r < c",
1995         {"r", "c"}, 2
1996       },
1997       {
1998         "& a < b < c < d& c < m",
1999         "& a < b < c < m < d",
2000         {"a", "b", "c", "m", "d"}, 5
2001       },
2002       {
2003         "& a < b < c < d& a < m",
2004         "& a < m < b < c < d",
2005         {"a", "m", "b", "c", "d"}, 5
2006       }
2007   };
2008
2009
2010   UCollator *credundant = NULL;
2011   UCollator *cresulting = NULL;
2012   UErrorCode status = U_ZERO_ERROR;
2013   UChar rlz[2048] = { 0 };
2014   uint32_t rlen = 0;
2015
2016   for(i = 0; i<sizeof(tests)/sizeof(tests[0]); i++) {
2017     log_verbose("testing rule %s, expected to be %s\n", tests[i].rules, tests[i].expectedRules);
2018     rlen = u_unescape(tests[i].rules, rlz, 2048);
2019
2020     credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
2021     if(status == U_FILE_ACCESS_ERROR) {
2022       log_data_err("Is your data around?\n");
2023       return;
2024     } else if(U_FAILURE(status)) {
2025       log_err("Error opening collator\n");
2026       return;
2027     }
2028
2029     rlen = u_unescape(tests[i].expectedRules, rlz, 2048);
2030     cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
2031
2032     testAgainstUCA(cresulting, credundant, "expected", TRUE, &status);
2033
2034     ucol_close(credundant);
2035     ucol_close(cresulting);
2036
2037     log_verbose("testing using data\n");
2038
2039     genericRulesStarter(tests[i].rules, tests[i].testdata, tests[i].testdatalen);
2040   }
2041
2042 }
2043
2044 static void TestExpansionSyntax(void) {
2045   int32_t i;
2046
2047   const static char *rules[] = {
2048     "&AE <<< a << b <<< c &d <<< f",
2049     "&AE <<< a <<< b << c << d < e < f <<< g",
2050     "&AE <<< B <<< C / D <<< F"
2051   };
2052
2053   const static char *expectedRules[] = {
2054     "&A <<< a / E << b / E <<< c /E  &d <<< f",
2055     "&A <<< a / E <<< b / E << c / E << d / E < e < f <<< g",
2056     "&A <<< B / E <<< C / ED <<< F / E"
2057   };
2058
2059   const static char *testdata[][8] = {
2060     {"AE", "a", "b", "c"},
2061     {"AE", "a", "b", "c", "d", "e", "f", "g"},
2062     {"AE", "B", "C"} /* / ED <<< F / E"},*/
2063   };
2064
2065   const static uint32_t testdatalen[] = {
2066       4,
2067       8,
2068       3
2069   };
2070
2071
2072
2073   UCollator *credundant = NULL;
2074   UCollator *cresulting = NULL;
2075   UErrorCode status = U_ZERO_ERROR;
2076   UChar rlz[2048] = { 0 };
2077   uint32_t rlen = 0;
2078
2079   for(i = 0; i<sizeof(rules)/sizeof(rules[0]); i++) {
2080     log_verbose("testing rule %s, expected to be %s\n", rules[i], expectedRules[i]);
2081     rlen = u_unescape(rules[i], rlz, 2048);
2082
2083     credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2084     if(status == U_FILE_ACCESS_ERROR) {
2085       log_data_err("Is your data around?\n");
2086       return;
2087     } else if(U_FAILURE(status)) {
2088       log_err("Error opening collator\n");
2089       return;
2090     }
2091     rlen = u_unescape(expectedRules[i], rlz, 2048);
2092     cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
2093
2094     /* testAgainstUCA still doesn't handle expansions correctly, so this is not run */
2095     /* as a hard error test, but only in information mode */
2096     testAgainstUCA(cresulting, credundant, "expected", FALSE, &status);
2097
2098     ucol_close(credundant);
2099     ucol_close(cresulting);
2100
2101     log_verbose("testing using data\n");
2102
2103     genericRulesStarter(rules[i], testdata[i], testdatalen[i]);
2104   }
2105 }
2106
2107 static void TestCase(void)
2108 {
2109     const static UChar gRules[MAX_TOKEN_LEN] =
2110     /*" & 0 < 1,\u2461<a,A"*/
2111     { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x0041, 0x0000 };
2112
2113     const static UChar testCase[][MAX_TOKEN_LEN] =
2114     {
2115         /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000},
2116         /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000},
2117         /*2*/ {0x2460 /*circ'1'*/, 0x0061/*'a'*/, 0x0000},
2118         /*3*/ {0x2460 /*circ'1'*/, 0x0041/*'A'*/, 0x0000}
2119     };
2120
2121     const static UCollationResult caseTestResults[][9] =
2122     {
2123         { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
2124         { UCOL_GREATER, UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER },
2125         { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_GREATER, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
2126         { UCOL_GREATER, UCOL_LESS, UCOL_GREATER, UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER }
2127     };
2128
2129     const static UColAttributeValue caseTestAttributes[][2] =
2130     {
2131         { UCOL_LOWER_FIRST, UCOL_OFF},
2132         { UCOL_UPPER_FIRST, UCOL_OFF},
2133         { UCOL_LOWER_FIRST, UCOL_ON},
2134         { UCOL_UPPER_FIRST, UCOL_ON}
2135     };
2136     int32_t i,j,k;
2137     UErrorCode status = U_ZERO_ERROR;
2138     UCollationElements *iter;
2139     UCollator  *myCollation;
2140     myCollation = ucol_open("en_US", &status);
2141
2142     if(U_FAILURE(status)){
2143         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
2144         return;
2145     }
2146     log_verbose("Testing different case settings\n");
2147     ucol_setStrength(myCollation, UCOL_TERTIARY);
2148
2149     for(k = 0; k<4; k++) {
2150       ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
2151       ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
2152       log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes[k][0], caseTestAttributes[k][1]);
2153       for (i = 0; i < 3 ; i++) {
2154         for(j = i+1; j<4; j++) {
2155           doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
2156         }
2157       }
2158     }
2159     ucol_close(myCollation);
2160
2161     myCollation = ucol_openRules(gRules, u_strlen(gRules), UCOL_OFF, UCOL_TERTIARY,NULL, &status);
2162     if(U_FAILURE(status)){
2163         log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
2164         return;
2165     }
2166     log_verbose("Testing different case settings with custom rules\n");
2167     ucol_setStrength(myCollation, UCOL_TERTIARY);
2168
2169     for(k = 0; k<4; k++) {
2170       ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
2171       ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
2172       for (i = 0; i < 3 ; i++) {
2173         for(j = i+1; j<4; j++) {
2174           log_verbose("k:%d, i:%d, j:%d\n", k, i, j);
2175           doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
2176           iter=ucol_openElements(myCollation, testCase[i], u_strlen(testCase[i]), &status);
2177           backAndForth(iter);
2178           ucol_closeElements(iter);
2179           iter=ucol_openElements(myCollation, testCase[j], u_strlen(testCase[j]), &status);
2180           backAndForth(iter);
2181           ucol_closeElements(iter);
2182         }
2183       }
2184     }
2185     ucol_close(myCollation);
2186     {
2187       const static char *lowerFirst[] = {
2188         "h",
2189         "H",
2190         "ch",
2191         "Ch",
2192         "CH",
2193         "cha",
2194         "chA",
2195         "Cha",
2196         "ChA",
2197         "CHa",
2198         "CHA",
2199         "i",
2200         "I"
2201       };
2202
2203       const static char *upperFirst[] = {
2204         "H",
2205         "h",
2206         "CH",
2207         "Ch",
2208         "ch",
2209         "CHA",
2210         "CHa",
2211         "ChA",
2212         "Cha",
2213         "chA",
2214         "cha",
2215         "I",
2216         "i"
2217       };
2218       log_verbose("mixed case test\n");
2219       log_verbose("lower first, case level off\n");
2220       genericRulesStarter("[casefirst lower]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
2221       log_verbose("upper first, case level off\n");
2222       genericRulesStarter("[casefirst upper]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
2223       log_verbose("lower first, case level on\n");
2224       genericRulesStarter("[casefirst lower][caselevel on]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
2225       log_verbose("upper first, case level on\n");
2226       genericRulesStarter("[casefirst upper][caselevel on]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
2227     }
2228
2229 }
2230
2231 static void TestIncrementalNormalize(void) {
2232
2233     /*UChar baseA     =0x61;*/
2234     UChar baseA     =0x41;
2235 /*    UChar baseB     = 0x42;*/
2236     static const UChar ccMix[]   = {0x316, 0x321, 0x300};
2237     /*UChar ccMix[]   = {0x61, 0x61, 0x61};*/
2238     /*
2239         0x316 is combining grave accent below, cc=220
2240         0x321 is combining palatalized hook below, cc=202
2241         0x300 is combining grave accent, cc=230
2242     */
2243
2244 #define MAXSLEN 2000
2245     /*int          maxSLen   = 64000;*/
2246     int          sLen;
2247     int          i;
2248
2249     UCollator        *coll;
2250     UErrorCode       status = U_ZERO_ERROR;
2251     UCollationResult result;
2252
2253     int32_t myQ = getTestOption(QUICK_OPTION);
2254
2255     if(getTestOption(QUICK_OPTION) < 0) {
2256         setTestOption(QUICK_OPTION, 1);
2257     }
2258
2259     {
2260         /* Test 1.  Run very long unnormalized strings, to force overflow of*/
2261         /*          most buffers along the way.*/
2262         UChar            strA[MAXSLEN+1];
2263         UChar            strB[MAXSLEN+1];
2264
2265         coll = ucol_open("en_US", &status);
2266         if(status == U_FILE_ACCESS_ERROR) {
2267           log_data_err("Is your data around?\n");
2268           return;
2269         } else if(U_FAILURE(status)) {
2270           log_err("Error opening collator\n");
2271           return;
2272         }
2273         ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
2274
2275         /*for (sLen = 257; sLen<MAXSLEN; sLen++) {*/
2276         /*for (sLen = 4; sLen<MAXSLEN; sLen++) {*/
2277         /*for (sLen = 1000; sLen<1001; sLen++) {*/
2278         for (sLen = 500; sLen<501; sLen++) {
2279         /*for (sLen = 40000; sLen<65000; sLen+=1000) {*/
2280             strA[0] = baseA;
2281             strB[0] = baseA;
2282             for (i=1; i<=sLen-1; i++) {
2283                 strA[i] = ccMix[i % 3];
2284                 strB[sLen-i] = ccMix[i % 3];
2285             }
2286             strA[sLen]   = 0;
2287             strB[sLen]   = 0;
2288
2289             ucol_setStrength(coll, UCOL_TERTIARY);   /* Do test with default strength, which runs*/
2290             doTest(coll, strA, strB, UCOL_EQUAL);    /*   optimized functions in the impl*/
2291             ucol_setStrength(coll, UCOL_IDENTICAL);   /* Do again with the slow, general impl.*/
2292             doTest(coll, strA, strB, UCOL_EQUAL);
2293         }
2294     }
2295
2296     setTestOption(QUICK_OPTION, myQ);
2297
2298
2299     /*  Test 2:  Non-normal sequence in a string that extends to the last character*/
2300     /*         of the string.  Checks a couple of edge cases.*/
2301
2302     {
2303         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0};
2304         static const UChar strB[] = {0x41, 0xc0, 0x316, 0};
2305         ucol_setStrength(coll, UCOL_TERTIARY);
2306         doTest(coll, strA, strB, UCOL_EQUAL);
2307     }
2308
2309     /*  Test 3:  Non-normal sequence is terminated by a surrogate pair.*/
2310
2311     {
2312       /* New UCA  3.1.1.
2313        * test below used a code point from Desseret, which sorts differently
2314        * than d800 dc00
2315        */
2316         /*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/
2317         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0};
2318         static const UChar strB[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0};
2319         ucol_setStrength(coll, UCOL_TERTIARY);
2320         doTest(coll, strA, strB, UCOL_GREATER);
2321     }
2322
2323     /*  Test 4:  Imbedded nulls do not terminate a string when length is specified.*/
2324
2325     {
2326         static const UChar strA[] = {0x41, 0x00, 0x42, 0x00};
2327         static const UChar strB[] = {0x41, 0x00, 0x00, 0x00};
2328         char  sortKeyA[50];
2329         char  sortKeyAz[50];
2330         char  sortKeyB[50];
2331         char  sortKeyBz[50];
2332         int   r;
2333
2334         /* there used to be -3 here. Hmmmm.... */
2335         /*result = ucol_strcoll(coll, strA, -3, strB, -3);*/
2336         result = ucol_strcoll(coll, strA, 3, strB, 3);
2337         if (result != UCOL_GREATER) {
2338             log_err("ERROR 1 in test 4\n");
2339         }
2340         result = ucol_strcoll(coll, strA, -1, strB, -1);
2341         if (result != UCOL_EQUAL) {
2342             log_err("ERROR 2 in test 4\n");
2343         }
2344
2345         ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2346         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2347         ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2348         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2349
2350         r = strcmp(sortKeyA, sortKeyAz);
2351         if (r <= 0) {
2352             log_err("Error 3 in test 4\n");
2353         }
2354         r = strcmp(sortKeyA, sortKeyB);
2355         if (r <= 0) {
2356             log_err("Error 4 in test 4\n");
2357         }
2358         r = strcmp(sortKeyAz, sortKeyBz);
2359         if (r != 0) {
2360             log_err("Error 5 in test 4\n");
2361         }
2362
2363         ucol_setStrength(coll, UCOL_IDENTICAL);
2364         ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2365         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2366         ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2367         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2368
2369         r = strcmp(sortKeyA, sortKeyAz);
2370         if (r <= 0) {
2371             log_err("Error 6 in test 4\n");
2372         }
2373         r = strcmp(sortKeyA, sortKeyB);
2374         if (r <= 0) {
2375             log_err("Error 7 in test 4\n");
2376         }
2377         r = strcmp(sortKeyAz, sortKeyBz);
2378         if (r != 0) {
2379             log_err("Error 8 in test 4\n");
2380         }
2381         ucol_setStrength(coll, UCOL_TERTIARY);
2382     }
2383
2384
2385     /*  Test 5:  Null characters in non-normal source strings.*/
2386
2387     {
2388         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00};
2389         static const UChar strB[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00};
2390         char  sortKeyA[50];
2391         char  sortKeyAz[50];
2392         char  sortKeyB[50];
2393         char  sortKeyBz[50];
2394         int   r;
2395
2396         result = ucol_strcoll(coll, strA, 6, strB, 6);
2397         if (result != UCOL_GREATER) {
2398             log_err("ERROR 1 in test 5\n");
2399         }
2400         result = ucol_strcoll(coll, strA, -1, strB, -1);
2401         if (result != UCOL_EQUAL) {
2402             log_err("ERROR 2 in test 5\n");
2403         }
2404
2405         ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2406         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2407         ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2408         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2409
2410         r = strcmp(sortKeyA, sortKeyAz);
2411         if (r <= 0) {
2412             log_err("Error 3 in test 5\n");
2413         }
2414         r = strcmp(sortKeyA, sortKeyB);
2415         if (r <= 0) {
2416             log_err("Error 4 in test 5\n");
2417         }
2418         r = strcmp(sortKeyAz, sortKeyBz);
2419         if (r != 0) {
2420             log_err("Error 5 in test 5\n");
2421         }
2422
2423         ucol_setStrength(coll, UCOL_IDENTICAL);
2424         ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2425         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2426         ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2427         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2428
2429         r = strcmp(sortKeyA, sortKeyAz);
2430         if (r <= 0) {
2431             log_err("Error 6 in test 5\n");
2432         }
2433         r = strcmp(sortKeyA, sortKeyB);
2434         if (r <= 0) {
2435             log_err("Error 7 in test 5\n");
2436         }
2437         r = strcmp(sortKeyAz, sortKeyBz);
2438         if (r != 0) {
2439             log_err("Error 8 in test 5\n");
2440         }
2441         ucol_setStrength(coll, UCOL_TERTIARY);
2442     }
2443
2444
2445     /*  Test 6:  Null character as base of a non-normal combining sequence.*/
2446
2447     {
2448         static const UChar strA[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00};
2449         static const UChar strB[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00};
2450
2451         result = ucol_strcoll(coll, strA, 5, strB, 5);
2452         if (result != UCOL_LESS) {
2453             log_err("Error 1 in test 6\n");
2454         }
2455         result = ucol_strcoll(coll, strA, -1, strB, -1);
2456         if (result != UCOL_EQUAL) {
2457             log_err("Error 2 in test 6\n");
2458         }
2459     }
2460
2461     ucol_close(coll);
2462 }
2463
2464
2465
2466 #if 0
2467 static void TestGetCaseBit(void) {
2468   static const char *caseBitData[] = {
2469     "a", "A", "ch", "Ch", "CH",
2470       "\\uFF9E", "\\u0009"
2471   };
2472
2473   static const uint8_t results[] = {
2474     UCOL_LOWER_CASE, UCOL_UPPER_CASE, UCOL_LOWER_CASE, UCOL_MIXED_CASE, UCOL_UPPER_CASE,
2475       UCOL_UPPER_CASE, UCOL_LOWER_CASE
2476   };
2477
2478   uint32_t i, blen = 0;
2479   UChar b[256] = {0};
2480   UErrorCode status = U_ZERO_ERROR;
2481   UCollator *UCA = ucol_open("", &status);
2482   uint8_t res = 0;
2483
2484   for(i = 0; i<sizeof(results)/sizeof(results[0]); i++) {
2485     blen = u_unescape(caseBitData[i], b, 256);
2486     res = ucol_uprv_getCaseBits(UCA, b, blen, &status);
2487     if(results[i] != res) {
2488       log_err("Expected case = %02X, got %02X for %04X\n", results[i], res, b[0]);
2489     }
2490   }
2491 }
2492 #endif
2493
2494 static void TestHangulTailoring(void) {
2495     static const char *koreanData[] = {
2496         "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53ef", "\\u5475",
2497             "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\\u67b7", "\\u67ef",
2498             "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\\u8857", "\\u8888",
2499             "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5",
2500             "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\\u659D", "\\u698E",
2501             "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\\u8B0C"
2502     };
2503
2504     const char *rules =
2505         "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 <<< \\u53ef <<< \\u5475 "
2506         "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef "
2507         "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 "
2508         "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 "
2509         "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E "
2510         "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C";
2511
2512
2513   UErrorCode status = U_ZERO_ERROR;
2514   UChar rlz[2048] = { 0 };
2515   uint32_t rlen = u_unescape(rules, rlz, 2048);
2516
2517   UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
2518   if(status == U_FILE_ACCESS_ERROR) {
2519     log_data_err("Is your data around?\n");
2520     return;
2521   } else if(U_FAILURE(status)) {
2522     log_err("Error opening collator\n");
2523     return;
2524   }
2525
2526   log_verbose("Using start of korean rules\n");
2527
2528   if(U_SUCCESS(status)) {
2529     genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
2530   } else {
2531     log_err("Unable to open collator with rules %s\n", rules);
2532   }
2533
2534   log_verbose("Setting jamoSpecial to TRUE and testing once more\n");
2535   ((UCATableHeader *)coll->image)->jamoSpecial = TRUE; /* don't try this at home  */
2536   genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
2537
2538   ucol_close(coll);
2539
2540   log_verbose("Using ko__LOTUS locale\n");
2541   genericLocaleStarter("ko__LOTUS", koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
2542 }
2543
2544 static void TestCompressOverlap(void) {
2545     UChar       secstr[150];
2546     UChar       tertstr[150];
2547     UErrorCode  status = U_ZERO_ERROR;
2548     UCollator  *coll;
2549     char        result[200];
2550     uint32_t    resultlen;
2551     int         count = 0;
2552     char       *tempptr;
2553
2554     coll = ucol_open("", &status);
2555
2556     if (U_FAILURE(status)) {
2557         log_err_status(status, "Collator can't be created -> %s\n", u_errorName(status));
2558         return;
2559     }
2560     while (count < 149) {
2561         secstr[count] = 0x0020; /* [06, 05, 05] */
2562         tertstr[count] = 0x0020;
2563         count ++;
2564     }
2565
2566     /* top down compression ----------------------------------- */
2567     secstr[count] = 0x0332; /* [, 87, 05] */
2568     tertstr[count] = 0x3000; /* [06, 05, 07] */
2569
2570     /* no compression secstr should have 150 secondary bytes, tertstr should
2571     have 150 tertiary bytes.
2572     with correct overlapping compression, secstr should have 4 secondary
2573     bytes, tertstr should have > 2 tertiary bytes */
2574     resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250);
2575     tempptr = uprv_strchr(result, 1) + 1;
2576     while (*(tempptr + 1) != 1) {
2577         /* the last secondary collation element is not checked since it is not
2578         part of the compression */
2579         if (*tempptr < UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2) {
2580             log_err("Secondary compression overlapped\n");
2581         }
2582         tempptr ++;
2583     }
2584
2585     /* tertiary top/bottom/common for en_US is similar to the secondary
2586     top/bottom/common */
2587     resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250);
2588     tempptr = uprv_strrchr(result, 1) + 1;
2589     while (*(tempptr + 1) != 0) {
2590         /* the last secondary collation element is not checked since it is not
2591         part of the compression */
2592         if (*tempptr < coll->tertiaryTop - coll->tertiaryTopCount) {
2593             log_err("Tertiary compression overlapped\n");
2594         }
2595         tempptr ++;
2596     }
2597
2598     /* bottom up compression ------------------------------------- */
2599     secstr[count] = 0;
2600     tertstr[count] = 0;
2601     resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250);
2602     tempptr = uprv_strchr(result, 1) + 1;
2603     while (*(tempptr + 1) != 1) {
2604         /* the last secondary collation element is not checked since it is not
2605         part of the compression */
2606         if (*tempptr > UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2) {
2607             log_err("Secondary compression overlapped\n");
2608         }
2609         tempptr ++;
2610     }
2611
2612     /* tertiary top/bottom/common for en_US is similar to the secondary
2613     top/bottom/common */
2614     resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250);
2615     tempptr = uprv_strrchr(result, 1) + 1;
2616     while (*(tempptr + 1) != 0) {
2617         /* the last secondary collation element is not checked since it is not
2618         part of the compression */
2619         if (*tempptr > coll->tertiaryBottom + coll->tertiaryBottomCount) {
2620             log_err("Tertiary compression overlapped\n");
2621         }
2622         tempptr ++;
2623     }
2624
2625     ucol_close(coll);
2626 }
2627
2628 static void TestCyrillicTailoring(void) {
2629   static const char *test[] = {
2630     "\\u0410b",
2631       "\\u0410\\u0306a",
2632       "\\u04d0A"
2633   };
2634
2635     /* Russian overrides contractions, so this test is not valid anymore */
2636     /*genericLocaleStarter("ru", test, 3);*/
2637
2638     genericLocaleStarter("root", test, 3);
2639     genericRulesStarter("&\\u0410 = \\u0410", test, 3);
2640     genericRulesStarter("&Z < \\u0410", test, 3);
2641     genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test, 3);
2642     genericRulesStarter("&Z < \\u0410 < \\u04d0", test, 3);
2643     genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test, 3);
2644     genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test, 3);
2645 }
2646
2647 static void TestSuppressContractions(void) {
2648
2649   static const char *testNoCont2[] = {
2650       "\\u0410\\u0302a",
2651       "\\u0410\\u0306b",
2652       "\\u0410c"
2653   };
2654   static const char *testNoCont[] = {
2655       "a\\u0410",
2656       "A\\u0410\\u0306",
2657       "\\uFF21\\u0410\\u0302"
2658   };
2659
2660   genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont, 3);
2661   genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2, 3);
2662 }
2663
2664 static void TestContraction(void) {
2665     const static char *testrules[] = {
2666         "&A = AB / B",
2667         "&A = A\\u0306/\\u0306",
2668         "&c = ch / h"
2669     };
2670     const static UChar testdata[][2] = {
2671         {0x0041 /* 'A' */, 0x0042 /* 'B' */},
2672         {0x0041 /* 'A' */, 0x0306 /* combining breve */},
2673         {0x0063 /* 'c' */, 0x0068 /* 'h' */}
2674     };
2675     const static UChar testdata2[][2] = {
2676         {0x0063 /* 'c' */, 0x0067 /* 'g' */},
2677         {0x0063 /* 'c' */, 0x0068 /* 'h' */},
2678         {0x0063 /* 'c' */, 0x006C /* 'l' */}
2679     };
2680     const static char *testrules3[] = {
2681         "&z < xyz &xyzw << B",
2682         "&z < xyz &xyz << B / w",
2683         "&z < ch &achm << B",
2684         "&z < ch &a << B / chm",
2685         "&\\ud800\\udc00w << B",
2686         "&\\ud800\\udc00 << B / w",
2687         "&a\\ud800\\udc00m << B",
2688         "&a << B / \\ud800\\udc00m",
2689     };
2690
2691     UErrorCode  status   = U_ZERO_ERROR;
2692     UCollator  *coll;
2693     UChar       rule[256] = {0};
2694     uint32_t    rlen     = 0;
2695     int         i;
2696
2697     for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
2698         UCollationElements *iter1;
2699         int j = 0;
2700         log_verbose("Rule %s for testing\n", testrules[i]);
2701         rlen = u_unescape(testrules[i], rule, 32);
2702         coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2703         if (U_FAILURE(status)) {
2704             log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
2705             return;
2706         }
2707         iter1 = ucol_openElements(coll, testdata[i], 2, &status);
2708         if (U_FAILURE(status)) {
2709             log_err("Collation iterator creation failed\n");
2710             return;
2711         }
2712         while (j < 2) {
2713             UCollationElements *iter2 = ucol_openElements(coll,
2714                                                          &(testdata[i][j]),
2715                                                          1, &status);
2716             uint32_t ce;
2717             if (U_FAILURE(status)) {
2718                 log_err("Collation iterator creation failed\n");
2719                 return;
2720             }
2721             ce = ucol_next(iter2, &status);
2722             while (ce != UCOL_NULLORDER) {
2723                 if ((uint32_t)ucol_next(iter1, &status) != ce) {
2724                     log_err("Collation elements in contraction split does not match\n");
2725                     return;
2726                 }
2727                 ce = ucol_next(iter2, &status);
2728             }
2729             j ++;
2730             ucol_closeElements(iter2);
2731         }
2732         if (ucol_next(iter1, &status) != UCOL_NULLORDER) {
2733             log_err("Collation elements not exhausted\n");
2734             return;
2735         }
2736         ucol_closeElements(iter1);
2737         ucol_close(coll);
2738     }
2739
2740     rlen = u_unescape("& a < b < c < ch < d & c = ch / h", rule, 256);
2741     coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2742     if (ucol_strcoll(coll, testdata2[0], 2, testdata2[1], 2) != UCOL_LESS) {
2743         log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
2744                 testdata2[0][0], testdata2[0][1], testdata2[1][0],
2745                 testdata2[1][1]);
2746         return;
2747     }
2748     if (ucol_strcoll(coll, testdata2[1], 2, testdata2[2], 2) != UCOL_LESS) {
2749         log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
2750                 testdata2[1][0], testdata2[1][1], testdata2[2][0],
2751                 testdata2[2][1]);
2752         return;
2753     }
2754     ucol_close(coll);
2755
2756     for (i = 0; i < sizeof(testrules3) / sizeof(testrules3[0]); i += 2) {
2757         UCollator          *coll1,
2758                            *coll2;
2759         UCollationElements *iter1,
2760                            *iter2;
2761         UChar               ch = 0x0042 /* 'B' */;
2762         uint32_t            ce;
2763         rlen = u_unescape(testrules3[i], rule, 32);
2764         coll1 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2765         rlen = u_unescape(testrules3[i + 1], rule, 32);
2766         coll2 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2767         if (U_FAILURE(status)) {
2768             log_err("Collator creation failed %s\n", testrules[i]);
2769             return;
2770         }
2771         iter1 = ucol_openElements(coll1, &ch, 1, &status);
2772         iter2 = ucol_openElements(coll2, &ch, 1, &status);
2773         if (U_FAILURE(status)) {
2774             log_err("Collation iterator creation failed\n");
2775             return;
2776         }
2777         ce = ucol_next(iter1, &status);
2778         if (U_FAILURE(status)) {
2779             log_err("Retrieving ces failed\n");
2780             return;
2781         }
2782         while (ce != UCOL_NULLORDER) {
2783             if (ce != (uint32_t)ucol_next(iter2, &status)) {
2784                 log_err("CEs does not match\n");
2785                 return;
2786             }
2787             ce = ucol_next(iter1, &status);
2788             if (U_FAILURE(status)) {
2789                 log_err("Retrieving ces failed\n");
2790                 return;
2791             }
2792         }
2793         if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
2794             log_err("CEs not exhausted\n");
2795             return;
2796         }
2797         ucol_closeElements(iter1);
2798         ucol_closeElements(iter2);
2799         ucol_close(coll1);
2800         ucol_close(coll2);
2801     }
2802 }
2803
2804 static void TestExpansion(void) {
2805     const static char *testrules[] = {
2806         "&J << K / B & K << M",
2807         "&J << K / B << M"
2808     };
2809     const static UChar testdata[][3] = {
2810         {0x004A /*'J'*/, 0x0041 /*'A'*/, 0},
2811         {0x004D /*'M'*/, 0x0041 /*'A'*/, 0},
2812         {0x004B /*'K'*/, 0x0041 /*'A'*/, 0},
2813         {0x004B /*'K'*/, 0x0043 /*'C'*/, 0},
2814         {0x004A /*'J'*/, 0x0043 /*'C'*/, 0},
2815         {0x004D /*'M'*/, 0x0043 /*'C'*/, 0}
2816     };
2817
2818     UErrorCode  status   = U_ZERO_ERROR;
2819     UCollator  *coll;
2820     UChar       rule[256] = {0};
2821     uint32_t    rlen     = 0;
2822     int         i;
2823
2824     for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
2825         int j = 0;
2826         log_verbose("Rule %s for testing\n", testrules[i]);
2827         rlen = u_unescape(testrules[i], rule, 32);
2828         coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2829         if (U_FAILURE(status)) {
2830             log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
2831             return;
2832         }
2833
2834         for (j = 0; j < 5; j ++) {
2835             doTest(coll, testdata[j], testdata[j + 1], UCOL_LESS);
2836         }
2837         ucol_close(coll);
2838     }
2839 }
2840
2841 #if 0
2842 /* this test tests the current limitations of the engine */
2843 /* it always fail, so it is disabled by default */
2844 static void TestLimitations(void) {
2845   /* recursive expansions */
2846   {
2847     static const char *rule = "&a=b/c&d=c/e";
2848     static const char *tlimit01[] = {"add","b","adf"};
2849     static const char *tlimit02[] = {"aa","b","af"};
2850     log_verbose("recursive expansions\n");
2851     genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
2852     genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
2853   }
2854   /* contractions spanning expansions */
2855   {
2856     static const char *rule = "&a<<<c/e&g<<<eh";
2857     static const char *tlimit01[] = {"ad","c","af","f","ch","h"};
2858     static const char *tlimit02[] = {"ad","c","ch","af","f","h"};
2859     log_verbose("contractions spanning expansions\n");
2860     genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
2861     genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
2862   }
2863   /* normalization: nulls in contractions */
2864   {
2865     static const char *rule = "&a<<<\\u0000\\u0302";
2866     static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
2867     static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
2868     static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
2869     static const UColAttributeValue valOn[] = { UCOL_ON };
2870     static const UColAttributeValue valOff[] = { UCOL_OFF };
2871
2872     log_verbose("NULL in contractions\n");
2873     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
2874     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
2875     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
2876     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
2877
2878   }
2879   /* normalization: contractions spanning normalization */
2880   {
2881     static const char *rule = "&a<<<\\u0000\\u0302";
2882     static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
2883     static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
2884     static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
2885     static const UColAttributeValue valOn[] = { UCOL_ON };
2886     static const UColAttributeValue valOff[] = { UCOL_OFF };
2887
2888     log_verbose("contractions spanning normalization\n");
2889     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
2890     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
2891     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
2892     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
2893
2894   }
2895   /* variable top:  */
2896   {
2897     /*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/
2898     static const char *rule = "&\\u2010<x<[variable top]=z";
2899     /*static const char *rule3 = "&' '<x<[variable top]=z";*/
2900     static const char *tlimit01[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" };
2901     static const char *tlimit02[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"};
2902     static const char *tlimit03[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" };
2903     static const UColAttribute att[] = { UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH };
2904     static const UColAttributeValue valOn[] = { UCOL_SHIFTED, UCOL_QUATERNARY };
2905     static const UColAttributeValue valOff[] = { UCOL_NON_IGNORABLE, UCOL_TERTIARY };
2906
2907     log_verbose("variable top\n");
2908     genericRulesStarterWithOptions(rule, tlimit03, sizeof(tlimit03)/sizeof(tlimit03[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2909     genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2910     genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2911     genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));
2912     genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));
2913
2914   }
2915   /* case level */
2916   {
2917     static const char *rule = "&c<ch<<<cH<<<Ch<<<CH";
2918     static const char *tlimit01[] = {"c","CH","Ch","cH","ch"};
2919     static const char *tlimit02[] = {"c","CH","cH","Ch","ch"};
2920     static const UColAttribute att[] = { UCOL_CASE_FIRST};
2921     static const UColAttributeValue valOn[] = { UCOL_UPPER_FIRST};
2922     /*static const UColAttributeValue valOff[] = { UCOL_OFF};*/
2923     log_verbose("case level\n");
2924     genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2925     genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2926     /*genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
2927     /*genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
2928   }
2929
2930 }
2931 #endif
2932
2933 static void TestBocsuCoverage(void) {
2934   UErrorCode status = U_ZERO_ERROR;
2935   const char *testString = "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u0041";
2936   UChar       test[256] = {0};
2937   uint32_t    tlen     = u_unescape(testString, test, 32);
2938   uint8_t key[256]     = {0};
2939   uint32_t klen         = 0;
2940
2941   UCollator *coll = ucol_open("", &status);
2942   if(U_SUCCESS(status)) {
2943   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
2944
2945   klen = ucol_getSortKey(coll, test, tlen, key, 256);
2946
2947   ucol_close(coll);
2948   } else {
2949     log_data_err("Couldn't open UCA\n");
2950   }
2951 }
2952
2953 static void TestVariableTopSetting(void) {
2954   UErrorCode status = U_ZERO_ERROR;
2955   const UChar *current = NULL;
2956   uint32_t varTopOriginal = 0, varTop1, varTop2;
2957   UCollator *coll = ucol_open("", &status);
2958   if(U_SUCCESS(status)) {
2959
2960   uint32_t strength = 0;
2961   uint16_t specs = 0;
2962   uint32_t chOffset = 0;
2963   uint32_t chLen = 0;
2964   uint32_t exOffset = 0;
2965   uint32_t exLen = 0;
2966   uint32_t oldChOffset = 0;
2967   uint32_t oldChLen = 0;
2968   uint32_t oldExOffset = 0;
2969   uint32_t oldExLen = 0;
2970   uint32_t prefixOffset = 0;
2971   uint32_t prefixLen = 0;
2972
2973   UBool startOfRules = TRUE;
2974   UColTokenParser src;
2975   UColOptionSet opts;
2976
2977   UChar *rulesCopy = NULL;
2978   uint32_t rulesLen;
2979
2980   UCollationResult result;
2981
2982   UChar first[256] = { 0 };
2983   UChar second[256] = { 0 };
2984   UParseError parseError;
2985   int32_t myQ = getTestOption(QUICK_OPTION);
2986
2987   uprv_memset(&src, 0, sizeof(UColTokenParser));
2988
2989   src.opts = &opts;
2990
2991   if(getTestOption(QUICK_OPTION) <= 0) {
2992     setTestOption(QUICK_OPTION, 1);
2993   }
2994
2995   /* this test will fail when normalization is turned on */
2996   /* therefore we always turn off exhaustive mode for it */
2997   { /* QUICK > 0*/
2998     log_verbose("Slide variable top over UCARules\n");
2999     rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, 0);
3000     rulesCopy = (UChar *)uprv_malloc((rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
3001     rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE);
3002
3003     if(U_SUCCESS(status) && rulesLen > 0) {
3004       ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
3005       src.current = src.source = rulesCopy;
3006       src.end = rulesCopy+rulesLen;
3007       src.extraCurrent = src.end;
3008       src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
3009
3010           /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
3011            the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
3012       while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,&status)) != NULL) {
3013         strength = src.parsedToken.strength;
3014         chOffset = src.parsedToken.charsOffset;
3015         chLen = src.parsedToken.charsLen;
3016         exOffset = src.parsedToken.extensionOffset;
3017         exLen = src.parsedToken.extensionLen;
3018         prefixOffset = src.parsedToken.prefixOffset;
3019         prefixLen = src.parsedToken.prefixLen;
3020         specs = src.parsedToken.flags;
3021
3022         startOfRules = FALSE;
3023         {
3024           log_verbose("%04X %d ", *(src.source+chOffset), chLen);
3025         }
3026         if(strength == UCOL_PRIMARY) {
3027           status = U_ZERO_ERROR;
3028           varTopOriginal = ucol_getVariableTop(coll, &status);
3029           varTop1 = ucol_setVariableTop(coll, src.source+oldChOffset, oldChLen, &status);
3030           if(U_FAILURE(status)) {
3031             char buffer[256];
3032             char *buf = buffer;
3033             uint32_t i = 0, j;
3034             uint32_t CE = UCOL_NO_MORE_CES;
3035
3036             /* before we start screaming, let's see if there is a problem with the rules */
3037             UErrorCode collIterateStatus = U_ZERO_ERROR;
3038             collIterate *s = uprv_new_collIterate(&collIterateStatus);
3039             uprv_init_collIterate(coll, src.source+oldChOffset, oldChLen, s, &collIterateStatus);
3040
3041             CE = ucol_getNextCE(coll, s, &status);
3042
3043             for(i = 0; i < oldChLen; i++) {
3044               j = sprintf(buf, "%04X ", *(src.source+oldChOffset+i));
3045               buf += j;
3046             }
3047             if(status == U_PRIMARY_TOO_LONG_ERROR) {
3048               log_verbose("= Expected failure for %s =", buffer);
3049             } else {
3050               if(uprv_collIterateAtEnd(s)) {
3051                 log_err("Unexpected failure setting variable top at offset %d. Error %s. Codepoints: %s\n",
3052                   oldChOffset, u_errorName(status), buffer);
3053               } else {
3054                 log_verbose("There is a goofy contraction in UCA rules that does not appear in the fractional UCA. Codepoints: %s\n",
3055                   buffer);
3056               }
3057             }
3058             uprv_delete_collIterate(s);
3059           }
3060           varTop2 = ucol_getVariableTop(coll, &status);
3061           if((varTop1 & 0xFFFF0000) != (varTop2 & 0xFFFF0000)) {
3062             log_err("cannot retrieve set varTop value!\n");
3063             continue;
3064           }
3065
3066           if((varTop1 & 0xFFFF0000) > 0 && oldExLen == 0) {
3067
3068             u_strncpy(first, src.source+oldChOffset, oldChLen);
3069             u_strncpy(first+oldChLen, src.source+chOffset, chLen);
3070             u_strncpy(first+oldChLen+chLen, src.source+oldChOffset, oldChLen);
3071             first[2*oldChLen+chLen] = 0;
3072
3073             if(oldExLen == 0) {
3074               u_strncpy(second, src.source+chOffset, chLen);
3075               second[chLen] = 0;
3076             } else { /* This is skipped momentarily, but should work once UCARules are fully UCA conformant */
3077               u_strncpy(second, src.source+oldExOffset, oldExLen);
3078               u_strncpy(second+oldChLen, src.source+chOffset, chLen);
3079               u_strncpy(second+oldChLen+chLen, src.source+oldExOffset, oldExLen);
3080               second[2*oldExLen+chLen] = 0;
3081             }
3082             result = ucol_strcoll(coll, first, -1, second, -1);
3083             if(result == UCOL_EQUAL) {
3084               doTest(coll, first, second, UCOL_EQUAL);
3085             } else {
3086               log_verbose("Suspicious strcoll result for %04X and %04X\n", *(src.source+oldChOffset), *(src.source+chOffset));
3087             }
3088           }
3089         }
3090         if(strength != UCOL_TOK_RESET) {
3091           oldChOffset = chOffset;
3092           oldChLen = chLen;
3093           oldExOffset = exOffset;
3094           oldExLen = exLen;
3095         }
3096       }
3097       status = U_ZERO_ERROR;
3098     }
3099     else {
3100       log_err("Unexpected failure getting rules %s\n", u_errorName(status));
3101       return;
3102     }
3103     if (U_FAILURE(status)) {
3104         log_err("Error parsing rules %s\n", u_errorName(status));
3105         return;
3106     }
3107     status = U_ZERO_ERROR;
3108   }
3109
3110   setTestOption(QUICK_OPTION, myQ);
3111
3112   log_verbose("Testing setting variable top to contractions\n");
3113   {
3114     UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->contractionUCACombos);
3115     int32_t maxUCAContractionLength = coll->image->contractionUCACombosWidth;
3116     while(*conts != 0) {
3117       /*
3118        * A continuation is NUL-terminated and NUL-padded
3119        * except if it has the maximum length.
3120        */
3121       int32_t contractionLength = maxUCAContractionLength;
3122       while(contractionLength > 0 && conts[contractionLength - 1] == 0) {
3123         --contractionLength;
3124       }
3125       if(*(conts+1)==0) { /* pre-context */
3126         varTop1 = ucol_setVariableTop(coll, conts, 1, &status);
3127       } else {
3128         varTop1 = ucol_setVariableTop(coll, conts, contractionLength, &status);
3129       }
3130       if(U_FAILURE(status)) {
3131         if(status == U_PRIMARY_TOO_LONG_ERROR) {
3132           /* ucol_setVariableTop() is documented to not accept 3-byte primaries,
3133            * therefore it is not an error when it complains about them. */
3134           log_verbose("Couldn't set variable top to a contraction %04X %04X %04X - U_PRIMARY_TOO_LONG_ERROR\n",
3135                       *conts, *(conts+1), *(conts+2));
3136         } else {
3137           log_err("Couldn't set variable top to a contraction %04X %04X %04X - %s\n",
3138                   *conts, *(conts+1), *(conts+2), u_errorName(status));
3139         }
3140         status = U_ZERO_ERROR;
3141       }
3142       conts+=maxUCAContractionLength;
3143     }
3144
3145     status = U_ZERO_ERROR;
3146
3147     first[0] = 0x0040;
3148     first[1] = 0x0050;
3149     first[2] = 0x0000;
3150
3151     ucol_setVariableTop(coll, first, -1, &status);
3152
3153     if(U_SUCCESS(status)) {
3154       log_err("Invalid contraction succeded in setting variable top!\n");
3155     }
3156
3157   }
3158
3159   log_verbose("Test restoring variable top\n");
3160
3161   status = U_ZERO_ERROR;
3162   ucol_restoreVariableTop(coll, varTopOriginal, &status);
3163   if(varTopOriginal != ucol_getVariableTop(coll, &status)) {
3164     log_err("Couldn't restore old variable top\n");
3165   }
3166
3167   log_verbose("Testing calling with error set\n");
3168
3169   status = U_INTERNAL_PROGRAM_ERROR;
3170   varTop1 = ucol_setVariableTop(coll, first, 1, &status);
3171   varTop2 = ucol_getVariableTop(coll, &status);
3172   ucol_restoreVariableTop(coll, varTop2, &status);
3173   varTop1 = ucol_setVariableTop(NULL, first, 1, &status);
3174   varTop2 = ucol_getVariableTop(NULL, &status);
3175   ucol_restoreVariableTop(NULL, varTop2, &status);
3176   if(status != U_INTERNAL_PROGRAM_ERROR) {
3177     log_err("Bad reaction to passed error!\n");
3178   }
3179   uprv_free(src.source);
3180   ucol_close(coll);
3181   } else {
3182     log_data_err("Couldn't open UCA collator\n");
3183   }
3184
3185 }
3186
3187 static void TestNonChars(void) {
3188   static const char *test[] = {
3189       "\\u0000",  /* ignorable */
3190       "\\uFFFE",  /* special merge-sort character with minimum non-ignorable weights */
3191       "\\uFDD0", "\\uFDEF",
3192       "\\U0001FFFE", "\\U0001FFFF",  /* UCA 6.0: noncharacters are treated like unassigned, */
3193       "\\U0002FFFE", "\\U0002FFFF",  /* not like ignorable. */
3194       "\\U0003FFFE", "\\U0003FFFF",
3195       "\\U0004FFFE", "\\U0004FFFF",
3196       "\\U0005FFFE", "\\U0005FFFF",
3197       "\\U0006FFFE", "\\U0006FFFF",
3198       "\\U0007FFFE", "\\U0007FFFF",
3199       "\\U0008FFFE", "\\U0008FFFF",
3200       "\\U0009FFFE", "\\U0009FFFF",
3201       "\\U000AFFFE", "\\U000AFFFF",
3202       "\\U000BFFFE", "\\U000BFFFF",
3203       "\\U000CFFFE", "\\U000CFFFF",
3204       "\\U000DFFFE", "\\U000DFFFF",
3205       "\\U000EFFFE", "\\U000EFFFF",
3206       "\\U000FFFFE", "\\U000FFFFF",
3207       "\\U0010FFFE", "\\U0010FFFF",
3208       "\\uFFFF"  /* special character with maximum primary weight */
3209   };
3210   UErrorCode status = U_ZERO_ERROR;
3211   UCollator *coll = ucol_open("en_US", &status);
3212
3213   log_verbose("Test non characters\n");
3214
3215   if(U_SUCCESS(status)) {
3216     genericOrderingTestWithResult(coll, test, 35, UCOL_LESS);
3217   } else {
3218     log_err_status(status, "Unable to open collator\n");
3219   }
3220
3221   ucol_close(coll);
3222 }
3223
3224 static void TestExtremeCompression(void) {
3225   static char *test[4];
3226   int32_t j = 0, i = 0;
3227
3228   for(i = 0; i<4; i++) {
3229     test[i] = (char *)malloc(2048*sizeof(char));
3230   }
3231
3232   for(j = 20; j < 500; j++) {
3233     for(i = 0; i<4; i++) {
3234       uprv_memset(test[i], 'a', (j-1)*sizeof(char));
3235       test[i][j-1] = (char)('a'+i);
3236       test[i][j] = 0;
3237     }
3238     genericLocaleStarter("en_US", (const char **)test, 4);
3239   }
3240
3241
3242   for(i = 0; i<4; i++) {
3243     free(test[i]);
3244   }
3245 }
3246
3247 #if 0
3248 static void TestExtremeCompression(void) {
3249   static char *test[4];
3250   int32_t j = 0, i = 0;
3251   UErrorCode status = U_ZERO_ERROR;
3252   UCollator *coll = ucol_open("en_US", status);
3253   for(i = 0; i<4; i++) {
3254     test[i] = (char *)malloc(2048*sizeof(char));
3255   }
3256   for(j = 10; j < 2048; j++) {
3257     for(i = 0; i<4; i++) {
3258       uprv_memset(test[i], 'a', (j-2)*sizeof(char));
3259       test[i][j-1] = (char)('a'+i);
3260       test[i][j] = 0;
3261     }
3262   }
3263   genericLocaleStarter("en_US", (const char **)test, 4);
3264
3265   for(j = 10; j < 2048; j++) {
3266     for(i = 0; i<1; i++) {
3267       uprv_memset(test[i], 'a', (j-1)*sizeof(char));
3268       test[i][j] = 0;
3269     }
3270   }
3271   for(i = 0; i<4; i++) {
3272     free(test[i]);
3273   }
3274 }
3275 #endif
3276
3277 static void TestSurrogates(void) {
3278   static const char *test[] = {
3279     "z","\\ud900\\udc25",  "\\ud805\\udc50",
3280        "\\ud800\\udc00y",  "\\ud800\\udc00r",
3281        "\\ud800\\udc00f",  "\\ud800\\udc00",
3282        "\\ud800\\udc00c", "\\ud800\\udc00b",
3283        "\\ud800\\udc00fa", "\\ud800\\udc00fb",
3284        "\\ud800\\udc00a",
3285        "c", "b"
3286   };
3287
3288   static const char *rule =
3289     "&z < \\ud900\\udc25   < \\ud805\\udc50"
3290        "< \\ud800\\udc00y  < \\ud800\\udc00r"
3291        "< \\ud800\\udc00f  << \\ud800\\udc00"
3292        "< \\ud800\\udc00fa << \\ud800\\udc00fb"
3293        "< \\ud800\\udc00a  < c < b" ;
3294
3295   genericRulesStarter(rule, test, 14);
3296 }
3297
3298 /* This is a test for prefix implementation, used by JIS X 4061 collation rules */
3299 static void TestPrefix(void) {
3300   uint32_t i;
3301
3302   static const struct {
3303     const char *rules;
3304     const char *data[50];
3305     const uint32_t len;
3306   } tests[] = {
3307     { "&z <<< z|a",
3308       {"zz", "za"}, 2 },
3309
3310     { "&z <<< z|   a",
3311       {"zz", "za"}, 2 },
3312     { "[strength I]"
3313       "&a=\\ud900\\udc25"
3314       "&z<<<\\ud900\\udc25|a",
3315       {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 },
3316   };
3317
3318
3319   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3320     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3321   }
3322 }
3323
3324 /* This test uses data suplied by Masashiko Maedera to test the implementation */
3325 /* JIS X 4061 collation order implementation                                   */
3326 static void TestNewJapanese(void) {
3327
3328   static const char * const test1[] = {
3329       "\\u30b7\\u30e3\\u30fc\\u30ec",
3330       "\\u30b7\\u30e3\\u30a4",
3331       "\\u30b7\\u30e4\\u30a3",
3332       "\\u30b7\\u30e3\\u30ec",
3333       "\\u3061\\u3087\\u3053",
3334       "\\u3061\\u3088\\u3053",
3335       "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8",
3336       "\\u3066\\u30fc\\u305f",
3337       "\\u30c6\\u30fc\\u30bf",
3338       "\\u30c6\\u30a7\\u30bf",
3339       "\\u3066\\u3048\\u305f",
3340       "\\u3067\\u30fc\\u305f",
3341       "\\u30c7\\u30fc\\u30bf",
3342       "\\u30c7\\u30a7\\u30bf",
3343       "\\u3067\\u3048\\u305f",
3344       "\\u3066\\u30fc\\u305f\\u30fc",
3345       "\\u30c6\\u30fc\\u30bf\\u30a1",
3346       "\\u30c6\\u30a7\\u30bf\\u30fc",
3347       "\\u3066\\u3047\\u305f\\u3041",
3348       "\\u3066\\u3048\\u305f\\u30fc",
3349       "\\u3067\\u30fc\\u305f\\u30fc",
3350       "\\u30c7\\u30fc\\u30bf\\u30a1",
3351       "\\u3067\\u30a7\\u305f\\u30a1",
3352       "\\u30c7\\u3047\\u30bf\\u3041",
3353       "\\u30c7\\u30a8\\u30bf\\u30a2",
3354       "\\u3072\\u3086",
3355       "\\u3073\\u3085\\u3042",
3356       "\\u3074\\u3085\\u3042",
3357       "\\u3073\\u3085\\u3042\\u30fc",
3358       "\\u30d3\\u30e5\\u30a2\\u30fc",
3359       "\\u3074\\u3085\\u3042\\u30fc",
3360       "\\u30d4\\u30e5\\u30a2\\u30fc",
3361       "\\u30d2\\u30e5\\u30a6",
3362       "\\u30d2\\u30e6\\u30a6",
3363       "\\u30d4\\u30e5\\u30a6\\u30a2",
3364       "\\u3073\\u3085\\u30fc\\u3042\\u30fc",
3365       "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc",
3366       "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc",
3367       "\\u3072\\u3085\\u3093",
3368       "\\u3074\\u3085\\u3093",
3369       "\\u3075\\u30fc\\u308a",
3370       "\\u30d5\\u30fc\\u30ea",
3371       "\\u3075\\u3045\\u308a",
3372       "\\u3075\\u30a5\\u308a",
3373       "\\u3075\\u30a5\\u30ea",
3374       "\\u30d5\\u30a6\\u30ea",
3375       "\\u3076\\u30fc\\u308a",
3376       "\\u30d6\\u30fc\\u30ea",
3377       "\\u3076\\u3045\\u308a",
3378       "\\u30d6\\u30a5\\u308a",
3379       "\\u3077\\u3046\\u308a",
3380       "\\u30d7\\u30a6\\u30ea",
3381       "\\u3075\\u30fc\\u308a\\u30fc",
3382       "\\u30d5\\u30a5\\u30ea\\u30fc",
3383       "\\u3075\\u30a5\\u308a\\u30a3",
3384       "\\u30d5\\u3045\\u308a\\u3043",
3385       "\\u30d5\\u30a6\\u30ea\\u30fc",
3386       "\\u3075\\u3046\\u308a\\u3043",
3387       "\\u30d6\\u30a6\\u30ea\\u30a4",
3388       "\\u3077\\u30fc\\u308a\\u30fc",
3389       "\\u3077\\u30a5\\u308a\\u30a4",
3390       "\\u3077\\u3046\\u308a\\u30fc",
3391       "\\u30d7\\u30a6\\u30ea\\u30a4",
3392       "\\u30d5\\u30fd",
3393       "\\u3075\\u309e",
3394       "\\u3076\\u309d",
3395       "\\u3076\\u3075",
3396       "\\u3076\\u30d5",
3397       "\\u30d6\\u3075",
3398       "\\u30d6\\u30d5",
3399       "\\u3076\\u309e",
3400       "\\u3076\\u3077",
3401       "\\u30d6\\u3077",
3402       "\\u3077\\u309d",
3403       "\\u30d7\\u30fd",
3404       "\\u3077\\u3075",
3405 };
3406
3407   static const char *test2[] = {
3408     "\\u306f\\u309d", /* H\\u309d */
3409     "\\u30cf\\u30fd", /* K\\u30fd */
3410     "\\u306f\\u306f", /* HH */
3411     "\\u306f\\u30cf", /* HK */
3412     "\\u30cf\\u30cf", /* KK */
3413     "\\u306f\\u309e", /* H\\u309e */
3414     "\\u30cf\\u30fe", /* K\\u30fe */
3415     "\\u306f\\u3070", /* HH\\u309b */
3416     "\\u30cf\\u30d0", /* KK\\u309b */
3417     "\\u306f\\u3071", /* HH\\u309c */
3418     "\\u30cf\\u3071", /* KH\\u309c */
3419     "\\u30cf\\u30d1", /* KK\\u309c */
3420     "\\u3070\\u309d", /* H\\u309b\\u309d */
3421     "\\u30d0\\u30fd", /* K\\u309b\\u30fd */
3422     "\\u3070\\u306f", /* H\\u309bH */
3423     "\\u30d0\\u30cf", /* K\\u309bK */
3424     "\\u3070\\u309e", /* H\\u309b\\u309e */
3425     "\\u30d0\\u30fe", /* K\\u309b\\u30fe */
3426     "\\u3070\\u3070", /* H\\u309bH\\u309b */
3427     "\\u30d0\\u3070", /* K\\u309bH\\u309b */
3428     "\\u30d0\\u30d0", /* K\\u309bK\\u309b */
3429     "\\u3070\\u3071", /* H\\u309bH\\u309c */
3430     "\\u30d0\\u30d1", /* K\\u309bK\\u309c */
3431     "\\u3071\\u309d", /* H\\u309c\\u309d */
3432     "\\u30d1\\u30fd", /* K\\u309c\\u30fd */
3433     "\\u3071\\u306f", /* H\\u309cH */
3434     "\\u30d1\\u30cf", /* K\\u309cK */
3435     "\\u3071\\u3070", /* H\\u309cH\\u309b */
3436     "\\u3071\\u30d0", /* H\\u309cK\\u309b */
3437     "\\u30d1\\u30d0", /* K\\u309cK\\u309b */
3438     "\\u3071\\u3071", /* H\\u309cH\\u309c */
3439     "\\u30d1\\u30d1", /* K\\u309cK\\u309c */
3440   };
3441   /*
3442   static const char *test3[] = {
3443     "\\u221er\\u221e",
3444     "\\u221eR#",
3445     "\\u221et\\u221e",
3446     "#r\\u221e",
3447     "#R#",
3448     "#t%",
3449     "#T%",
3450     "8t\\u221e",
3451     "8T\\u221e",
3452     "8t#",
3453     "8T#",
3454     "8t%",
3455     "8T%",
3456     "8t8",
3457     "8T8",
3458     "\\u03c9r\\u221e",
3459     "\\u03a9R%",
3460     "rr\\u221e",
3461     "rR\\u221e",
3462     "Rr\\u221e",
3463     "RR\\u221e",
3464     "RT%",
3465     "rt8",
3466     "tr\\u221e",
3467     "tr8",
3468     "TR8",
3469     "tt8",
3470     "\\u30b7\\u30e3\\u30fc\\u30ec",
3471   };
3472   */
3473   static const UColAttribute att[] = { UCOL_STRENGTH };
3474   static const UColAttributeValue val[] = { UCOL_QUATERNARY };
3475
3476   static const UColAttribute attShifted[] = { UCOL_STRENGTH, UCOL_ALTERNATE_HANDLING};
3477   static const UColAttributeValue valShifted[] = { UCOL_QUATERNARY, UCOL_SHIFTED };
3478
3479   genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), att, val, 1);
3480   genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), att, val, 1);
3481   /*genericLocaleStarter("ja", test3, sizeof(test3)/sizeof(test3[0]));*/
3482   genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), attShifted, valShifted, 2);
3483   genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), attShifted, valShifted, 2);
3484 }
3485
3486 static void TestStrCollIdenticalPrefix(void) {
3487   const char* rule = "&\\ud9b0\\udc70=\\ud9b0\\udc71";
3488   const char* test[] = {
3489     "ab\\ud9b0\\udc70",
3490     "ab\\ud9b0\\udc71"
3491   };
3492   genericRulesStarterWithResult(rule, test, sizeof(test)/sizeof(test[0]), UCOL_EQUAL);
3493 }
3494 /* Contractions should have all their canonically equivalent */
3495 /* strings included */
3496 static void TestContractionClosure(void) {
3497   static const struct {
3498     const char *rules;
3499     const char *data[10];
3500     const uint32_t len;
3501   } tests[] = {
3502     {   "&b=\\u00e4\\u00e4",
3503       { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\\u00e4" }, 5},
3504     {   "&b=\\u00C5",
3505       { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4},
3506   };
3507   uint32_t i;
3508
3509
3510   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3511     genericRulesStarterWithResult(tests[i].rules, tests[i].data, tests[i].len, UCOL_EQUAL);
3512   }
3513 }
3514
3515 /* This tests also fails*/
3516 static void TestBeforePrefixFailure(void) {
3517   static const struct {
3518     const char *rules;
3519     const char *data[10];
3520     const uint32_t len;
3521   } tests[] = {
3522     { "&g <<< a"
3523       "&[before 3]\\uff41 <<< x",
3524       {"x", "\\uff41"}, 2 },
3525     {   "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3526         "&\\u30A8=\\u30A8=\\u3048=\\uff74"
3527         "&[before 3]\\u30a7<<<\\u30a9",
3528       {"\\u30a9", "\\u30a7"}, 2 },
3529     {   "&[before 3]\\u30a7<<<\\u30a9"
3530         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3531         "&\\u30A8=\\u30A8=\\u3048=\\uff74",
3532       {"\\u30a9", "\\u30a7"}, 2 },
3533   };
3534   uint32_t i;
3535
3536
3537   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3538     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3539   }
3540
3541 #if 0
3542   const char* rule1 =
3543         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3544         "&\\u30A8=\\u30A8=\\u3048=\\uff74"
3545         "&[before 3]\\u30a7<<<\\u30c6|\\u30fc";
3546   const char* rule2 =
3547         "&[before 3]\\u30a7<<<\\u30c6|\\u30fc"
3548         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3549         "&\\u30A8=\\u30A8=\\u3048=\\uff74";
3550   const char* test[] = {
3551       "\\u30c6\\u30fc\\u30bf",
3552       "\\u30c6\\u30a7\\u30bf",
3553   };
3554   genericRulesStarter(rule1, test, sizeof(test)/sizeof(test[0]));
3555   genericRulesStarter(rule2, test, sizeof(test)/sizeof(test[0]));
3556 /* this piece of code should be in some sort of verbose mode     */
3557 /* it gets the collation elements for elements and prints them   */
3558 /* This is useful when trying to see whether the problem is      */
3559   {
3560     UErrorCode status = U_ZERO_ERROR;
3561     uint32_t i = 0;
3562     UCollationElements *it = NULL;
3563     uint32_t CE;
3564     UChar string[256];
3565     uint32_t uStringLen;
3566     UCollator *coll = NULL;
3567
3568     uStringLen = u_unescape(rule1, string, 256);
3569
3570     coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
3571
3572     /*coll = ucol_open("ja_JP_JIS", &status);*/
3573     it = ucol_openElements(coll, string, 0, &status);
3574
3575     for(i = 0; i < sizeof(test)/sizeof(test[0]); i++) {
3576       log_verbose("%s\n", test[i]);
3577       uStringLen = u_unescape(test[i], string, 256);
3578       ucol_setText(it, string, uStringLen, &status);
3579
3580       while((CE=ucol_next(it, &status)) != UCOL_NULLORDER) {
3581         log_verbose("%08X\n", CE);
3582       }
3583       log_verbose("\n");
3584
3585     }
3586
3587     ucol_closeElements(it);
3588     ucol_close(coll);
3589   }
3590 #endif
3591 }
3592
3593 static void TestPrefixCompose(void) {
3594   const char* rule1 =
3595         "&\\u30a7<<<\\u30ab|\\u30fc=\\u30ac|\\u30fc";
3596   /*
3597   const char* test[] = {
3598       "\\u30c6\\u30fc\\u30bf",
3599       "\\u30c6\\u30a7\\u30bf",
3600   };
3601   */
3602   {
3603     UErrorCode status = U_ZERO_ERROR;
3604     /*uint32_t i = 0;*/
3605     /*UCollationElements *it = NULL;*/
3606 /*    uint32_t CE;*/
3607     UChar string[256];
3608     uint32_t uStringLen;
3609     UCollator *coll = NULL;
3610
3611     uStringLen = u_unescape(rule1, string, 256);
3612
3613     coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
3614     ucol_close(coll);
3615   }
3616
3617
3618 }
3619
3620 /*
3621 [last variable] last variable value
3622 [last primary ignorable] largest CE for primary ignorable
3623 [last secondary ignorable] largest CE for secondary ignorable
3624 [last tertiary ignorable] largest CE for tertiary ignorable
3625 [top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8)
3626 */
3627
3628 static void TestRuleOptions(void) {
3629   /* values here are hardcoded and are correct for the current UCA
3630    * when the UCA changes, one might be forced to change these
3631    * values.
3632    */
3633
3634   /*
3635    * These strings contain the last character before [variable top]
3636    * and the first and second characters (by primary weights) after it.
3637    * See FractionalUCA.txt. For example:
3638       [last variable [0C FE, 05, 05]] # U+10A7F OLD SOUTH ARABIAN NUMERIC INDICATOR
3639       [variable top = 0C FE]
3640       [first regular [0D 0A, 05, 05]] # U+0060 GRAVE ACCENT
3641      and
3642       00B4; [0D 0C, 05, 05]
3643    *
3644    * Note: Starting with UCA 6.0, the [variable top] collation element
3645    * is not the weight of any character or string,
3646    * which means that LAST_VARIABLE_CHAR_STRING sorts before [last variable].
3647    */
3648 #define LAST_VARIABLE_CHAR_STRING "\\U00010A7F"
3649 #define FIRST_REGULAR_CHAR_STRING "\\u0060"
3650 #define SECOND_REGULAR_CHAR_STRING "\\u00B4"
3651
3652   /*
3653    * This string has to match the character that has the [last regular] weight
3654    * which changes with each UCA version.
3655    * See the bottom of FractionalUCA.txt which says something like
3656       [last regular [7A FE, 05, 05]] # U+1342E EGYPTIAN HIEROGLYPH AA032
3657    *
3658    * Note: Starting with UCA 6.0, the [last regular] collation element
3659    * is not the weight of any character or string,
3660    * which means that LAST_REGULAR_CHAR_STRING sorts before [last regular].
3661    */
3662 #define LAST_REGULAR_CHAR_STRING "\\U0001342E"
3663
3664   static const struct {
3665     const char *rules;
3666     const char *data[10];
3667     const uint32_t len;
3668   } tests[] = {
3669     /* - all befores here amount to zero */
3670     { "&[before 3][first tertiary ignorable]<<<a",
3671         { "\\u0000", "a"}, 2
3672     }, /* you cannot go before first tertiary ignorable */
3673
3674     { "&[before 3][last tertiary ignorable]<<<a",
3675         { "\\u0000", "a"}, 2
3676     }, /* you cannot go before last tertiary ignorable */
3677
3678     { "&[before 3][first secondary ignorable]<<<a",
3679         { "\\u0000", "a"}, 2
3680     }, /* you cannot go before first secondary ignorable */
3681
3682     { "&[before 3][last secondary ignorable]<<<a",
3683         { "\\u0000", "a"}, 2
3684     }, /* you cannot go before first secondary ignorable */
3685
3686     /* 'normal' befores */
3687
3688     { "&[before 3][first primary ignorable]<<<c<<<b &[first primary ignorable]<a",
3689         {  "c", "b", "\\u0332", "a" }, 4
3690     },
3691
3692     /* we don't have a code point that corresponds to
3693      * the last primary ignorable
3694      */
3695     { "&[before 3][last primary ignorable]<<<c<<<b &[last primary ignorable]<a",
3696         {  "\\u0332", "\\u20e3", "c", "b", "a" }, 5
3697     },
3698
3699     { "&[before 3][first variable]<<<c<<<b &[first variable]<a",
3700         {  "c", "b", "\\u0009", "a", "\\u000a" }, 5
3701     },
3702
3703     { "&[last variable]<a &[before 3][last variable]<<<c<<<b ",
3704         { LAST_VARIABLE_CHAR_STRING, "c", "b", /* [last variable] */ "a", FIRST_REGULAR_CHAR_STRING }, 5
3705     },
3706
3707     { "&[first regular]<a"
3708       "&[before 1][first regular]<b",
3709       { "b", FIRST_REGULAR_CHAR_STRING, "a", SECOND_REGULAR_CHAR_STRING }, 4
3710     },
3711
3712     { "&[before 1][last regular]<b"
3713       "&[last regular]<a",
3714         { LAST_REGULAR_CHAR_STRING, "b", /* [last regular] */ "a", "\\u4e00" }, 4
3715     },
3716
3717     { "&[before 1][first implicit]<b"
3718       "&[first implicit]<a",
3719         { "b", "\\u4e00", "a", "\\u4e01"}, 4
3720     },
3721
3722     { "&[before 1][last implicit]<b"
3723       "&[last implicit]<a",
3724         { "b", "\\U0010FFFD", "a" }, 3
3725     },
3726
3727     { "&[last variable]<z"
3728       "&[last primary ignorable]<x"
3729       "&[last secondary ignorable]<<y"
3730       "&[last tertiary ignorable]<<<w"
3731       "&[top]<u",
3732       {"\\ufffb",  "w", "y", "\\u20e3", "x", LAST_VARIABLE_CHAR_STRING, "z", "u"}, 7
3733     }
3734
3735   };
3736   uint32_t i;
3737
3738   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3739     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3740   }
3741 }
3742
3743
3744 static void TestOptimize(void) {
3745   /* this is not really a test - just trying out
3746    * whether copying of UCA contents will fail
3747    * Cannot really test, since the functionality
3748    * remains the same.
3749    */
3750   static const struct {
3751     const char *rules;
3752     const char *data[10];
3753     const uint32_t len;
3754   } tests[] = {
3755     /* - all befores here amount to zero */
3756     { "[optimize [\\uAC00-\\uD7FF]]",
3757     { "a", "b"}, 2}
3758   };
3759   uint32_t i;
3760
3761   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3762     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3763   }
3764 }
3765
3766 /*
3767 cycheng@ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator.
3768 weiv    ucol_strcollIter?
3769 cycheng@ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021
3770 weiv    these are the input strings?
3771 cycheng@ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on, we have s1 > s2
3772 weiv    will check - could be a problem with utf-8 iterator
3773 cycheng@ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2
3774 weiv    hmmm
3775 cycheng@ca.ibm.c... note that we have a standalone high surrogate
3776 weiv    that doesn't sound right
3777 cycheng@ca.ibm.c... we got the same inconsistent results on AIX and Win2000
3778 weiv    so you have two strings, you convert them to utf-8 and to utf-16BE
3779 cycheng@ca.ibm.c... yes
3780 weiv    and then do the comparison
3781 cycheng@ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be
3782 weiv    utf-16 strings look like a little endian ones in the example you sent me
3783 weiv    It could be a bug - let me try to test it out
3784 cycheng@ca.ibm.c... ok
3785 cycheng@ca.ibm.c... we can wait till the conf. call
3786 cycheng@ca.ibm.c... next weke
3787 weiv    that would be great
3788 weiv    hmmm
3789 weiv    I might be wrong
3790 weiv    let me play with it some more
3791 cycheng@ca.ibm.c... ok
3792 cycheng@ca.ibm.c... also please check s3 = 0x0e3a0062  and s4 = 0x0e400021. both are in utf-16be
3793 cycheng@ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that's built for db2
3794 cycheng@ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be
3795 weiv    ok
3796 cycheng@ca.ibm.c... i ask sherman to send you more inconsistent data
3797 weiv    thanks
3798 cycheng@ca.ibm.c... the 4 strings we sent are just samples
3799 */
3800 #if 0
3801 static void Alexis(void) {
3802   UErrorCode status = U_ZERO_ERROR;
3803   UCollator *coll = ucol_open("", &status);
3804
3805
3806   const char utf16be[2][4] = {
3807     { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 },
3808     { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 }
3809   };
3810
3811   const char utf8[2][4] = {
3812     { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 },
3813     { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 },
3814   };
3815
3816   UCharIterator iterU161, iterU162;
3817   UCharIterator iterU81, iterU82;
3818
3819   UCollationResult resU16, resU8;
3820
3821   uiter_setUTF16BE(&iterU161, utf16be[0], 4);
3822   uiter_setUTF16BE(&iterU162, utf16be[1], 4);
3823
3824   uiter_setUTF8(&iterU81, utf8[0], 4);
3825   uiter_setUTF8(&iterU82, utf8[1], 4);
3826
3827   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3828
3829   resU16 = ucol_strcollIter(coll, &iterU161, &iterU162, &status);
3830   resU8 = ucol_strcollIter(coll, &iterU81, &iterU82, &status);
3831
3832
3833   if(resU16 != resU8) {
3834     log_err("different results\n");
3835   }
3836
3837   ucol_close(coll);
3838 }
3839 #endif
3840
3841 #define CMSCOLL_ALEXIS2_BUFFER_SIZE 256
3842 static void Alexis2(void) {
3843   UErrorCode status = U_ZERO_ERROR;
3844   UChar U16Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
3845   char U16BESource[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16BETarget[CMSCOLL_ALEXIS2_BUFFER_SIZE];
3846   char U8Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U8Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
3847   int32_t U16LenS = 0, U16LenT = 0, U16BELenS = 0, U16BELenT = 0, U8LenS = 0, U8LenT = 0;
3848
3849   UConverter *conv = NULL;
3850
3851   UCharIterator U16BEItS, U16BEItT;
3852   UCharIterator U8ItS, U8ItT;
3853
3854   UCollationResult resU16, resU16BE, resU8;
3855
3856   static const char* const pairs[][2] = {
3857     { "\\ud800\\u0021", "\\uFFFC\\u0062"},
3858     { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" },
3859     { "\\u0E40\\u0021", "\\u00A1\\u0021"},
3860     { "\\u0E40\\u0021", "\\uFE57\\u0062"},
3861     { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"},
3862     { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"},
3863     { "\\u0020", "\\u0020\\u0000"}
3864 /*
3865 5F20 (my result here)
3866 5F204E008E3F
3867 5F20 (your result here)
3868 */
3869   };
3870
3871   int32_t i = 0;
3872
3873   UCollator *coll = ucol_open("", &status);
3874   if(status == U_FILE_ACCESS_ERROR) {
3875     log_data_err("Is your data around?\n");
3876     return;
3877   } else if(U_FAILURE(status)) {
3878     log_err("Error opening collator\n");
3879     return;
3880   }
3881   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3882   conv = ucnv_open("UTF16BE", &status);
3883   for(i = 0; i < sizeof(pairs)/sizeof(pairs[0]); i++) {
3884     U16LenS = u_unescape(pairs[i][0], U16Source, CMSCOLL_ALEXIS2_BUFFER_SIZE);
3885     U16LenT = u_unescape(pairs[i][1], U16Target, CMSCOLL_ALEXIS2_BUFFER_SIZE);
3886
3887     resU16 = ucol_strcoll(coll, U16Source, U16LenS, U16Target, U16LenT);
3888
3889     log_verbose("Result of strcoll is %i\n", resU16);
3890
3891     U16BELenS = ucnv_fromUChars(conv, U16BESource, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Source, U16LenS, &status);
3892     U16BELenT = ucnv_fromUChars(conv, U16BETarget, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Target, U16LenT, &status);
3893
3894     /* use the original sizes, as the result from converter is in bytes */
3895     uiter_setUTF16BE(&U16BEItS, U16BESource, U16LenS);
3896     uiter_setUTF16BE(&U16BEItT, U16BETarget, U16LenT);
3897
3898     resU16BE = ucol_strcollIter(coll, &U16BEItS, &U16BEItT, &status);
3899
3900     log_verbose("Result of U16BE is %i\n", resU16BE);
3901
3902     if(resU16 != resU16BE) {
3903       log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", pairs[i][0], pairs[i][1]);
3904     }
3905
3906     u_strToUTF8(U8Source, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenS, U16Source, U16LenS, &status);
3907     u_strToUTF8(U8Target, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenT, U16Target, U16LenT, &status);
3908
3909     uiter_setUTF8(&U8ItS, U8Source, U8LenS);
3910     uiter_setUTF8(&U8ItT, U8Target, U8LenT);
3911
3912     resU8 = ucol_strcollIter(coll, &U8ItS, &U8ItT, &status);
3913
3914     if(resU16 != resU8) {
3915       log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pairs[i][0], pairs[i][1]);
3916     }
3917
3918   }
3919
3920   ucol_close(coll);
3921   ucnv_close(conv);
3922 }
3923
3924 static void TestHebrewUCA(void) {
3925   UErrorCode status = U_ZERO_ERROR;
3926   static const char *first[] = {
3927     "d790d6b8d79cd795d6bcd7a9",
3928     "d790d79cd79ed7a7d799d799d7a1",
3929     "d790d6b4d79ed795d6bcd7a9",
3930   };
3931
3932   char utf8String[3][256];
3933   UChar utf16String[3][256];
3934
3935   int32_t i = 0, j = 0;
3936   int32_t sizeUTF8[3];
3937   int32_t sizeUTF16[3];
3938
3939   UCollator *coll = ucol_open("", &status);
3940   if (U_FAILURE(status)) {
3941       log_err_status(status, "Could not open UCA collation %s\n", u_errorName(status));
3942       return;
3943   }
3944   /*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/
3945
3946   for(i = 0; i < sizeof(first)/sizeof(first[0]); i++) {
3947     sizeUTF8[i] = u_parseUTF8(first[i], -1, utf8String[i], 256, &status);
3948     u_strFromUTF8(utf16String[i], 256, &sizeUTF16[i], utf8String[i], sizeUTF8[i], &status);
3949     log_verbose("%i: ");
3950     for(j = 0; j < sizeUTF16[i]; j++) {
3951       /*log_verbose("\\u%04X", utf16String[i][j]);*/
3952       log_verbose("%04X", utf16String[i][j]);
3953     }
3954     log_verbose("\n");
3955   }
3956   for(i = 0; i < sizeof(first)/sizeof(first[0])-1; i++) {
3957     for(j = i + 1; j < sizeof(first)/sizeof(first[0]); j++) {
3958       doTest(coll, utf16String[i], utf16String[j], UCOL_LESS);
3959     }
3960   }
3961
3962   ucol_close(coll);
3963
3964 }
3965
3966 static void TestPartialSortKeyTermination(void) {
3967   static const char* cases[] = {
3968     "\\u1234\\u1234\\udc00",
3969     "\\udc00\\ud800\\ud800"
3970   };
3971
3972   int32_t i = sizeof(UCollator);
3973
3974   UErrorCode status = U_ZERO_ERROR;
3975
3976   UCollator *coll = ucol_open("", &status);
3977
3978   UCharIterator iter;
3979
3980   UChar currCase[256];
3981   int32_t length = 0;
3982   int32_t pKeyLen = 0;
3983
3984   uint8_t key[256];
3985
3986   for(i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) {
3987     uint32_t state[2] = {0, 0};
3988     length = u_unescape(cases[i], currCase, 256);
3989     uiter_setString(&iter, currCase, length);
3990     pKeyLen = ucol_nextSortKeyPart(coll, &iter, state, key, 256, &status);
3991
3992     log_verbose("Done\n");
3993
3994   }
3995   ucol_close(coll);
3996 }
3997
3998 static void TestSettings(void) {
3999   static const char* cases[] = {
4000     "apple",
4001       "Apple"
4002   };
4003
4004   static const char* locales[] = {
4005     "",
4006       "en"
4007   };
4008
4009   UErrorCode status = U_ZERO_ERROR;
4010
4011   int32_t i = 0, j = 0;
4012
4013   UChar source[256], target[256];
4014   int32_t sLen = 0, tLen = 0;
4015
4016   UCollator *collateObject = NULL;
4017   for(i = 0; i < sizeof(locales)/sizeof(locales[0]); i++) {
4018     collateObject = ucol_open(locales[i], &status);
4019     ucol_setStrength(collateObject, UCOL_PRIMARY);
4020     ucol_setAttribute(collateObject, UCOL_CASE_LEVEL , UCOL_OFF, &status);
4021     for(j = 1; j < sizeof(cases)/sizeof(cases[0]); j++) {
4022       sLen = u_unescape(cases[j-1], source, 256);
4023       source[sLen] = 0;
4024       tLen = u_unescape(cases[j], target, 256);
4025       source[tLen] = 0;
4026       doTest(collateObject, source, target, UCOL_EQUAL);
4027     }
4028     ucol_close(collateObject);
4029   }
4030 }
4031
4032 static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCollator *target) {
4033     UErrorCode status = U_ZERO_ERROR;
4034     int32_t errorNo = 0;
4035     /*const UChar *sourceRules = NULL;*/
4036     /*int32_t sourceRulesLen = 0;*/
4037     UColAttributeValue french = UCOL_OFF;
4038     int32_t cloneSize = 0;
4039
4040     if(!ucol_equals(source, target)) {
4041         log_err("Same collators, different address not equal\n");
4042         errorNo++;
4043     }
4044     ucol_close(target);
4045     if(uprv_strcmp(ucol_getLocaleByType(source, ULOC_REQUESTED_LOCALE, &status), ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &status)) == 0) {
4046         /* currently, safeClone is implemented through getRules/openRules
4047         * so it is the same as the test below - I will comment that test out.
4048         */
4049         /* real thing */
4050         target = ucol_safeClone(source, NULL, &cloneSize, &status);
4051         if(U_FAILURE(status)) {
4052             log_err("Error creating clone\n");
4053             errorNo++;
4054             return errorNo;
4055         }
4056         if(!ucol_equals(source, target)) {
4057             log_err("Collator different from it's clone\n");
4058             errorNo++;
4059         }
4060         french = ucol_getAttribute(source, UCOL_FRENCH_COLLATION, &status);
4061         if(french == UCOL_ON) {
4062             ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);
4063         } else {
4064             ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
4065         }
4066         if(U_FAILURE(status)) {
4067             log_err("Error setting attributes\n");
4068             errorNo++;
4069             return errorNo;
4070         }
4071         if(ucol_equals(source, target)) {
4072             log_err("Collators same even when options changed\n");
4073             errorNo++;
4074         }
4075         ucol_close(target);
4076         /* commented out since safeClone uses exactly the same technique */
4077         /*
4078         sourceRules = ucol_getRules(source, &sourceRulesLen);
4079         target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
4080         if(U_FAILURE(status)) {
4081         log_err("Error instantiating target from rules\n");
4082         errorNo++;
4083         return errorNo;
4084         }
4085         if(!ucol_equals(source, target)) {
4086         log_err("Collator different from collator that was created from the same rules\n");
4087         errorNo++;
4088         }
4089         ucol_close(target);
4090         */
4091     }
4092     return errorNo;
4093 }
4094
4095
4096 static void TestEquals(void) {
4097     /* ucol_equals is not currently a public API. There is a chance that it will become
4098     * something like this, but currently it is only used by RuleBasedCollator::operator==
4099     */
4100     /* test whether the two collators instantiated from the same locale are equal */
4101     UErrorCode status = U_ZERO_ERROR;
4102     UParseError parseError;
4103     int32_t noOfLoc = uloc_countAvailable();
4104     const char *locName = NULL;
4105     UCollator *source = NULL, *target = NULL;
4106     int32_t i = 0;
4107
4108     const char* rules[] = {
4109         "&l < lj <<< Lj <<< LJ",
4110         "&n < nj <<< Nj <<< NJ",
4111         "&ae <<< \\u00e4",
4112         "&AE <<< \\u00c4"
4113     };
4114     /*
4115     const char* badRules[] = {
4116     "&l <<< Lj",
4117     "&n < nj <<< nJ <<< NJ",
4118     "&a <<< \\u00e4",
4119     "&AE <<< \\u00c4 <<< x"
4120     };
4121     */
4122
4123     UChar sourceRules[1024], targetRules[1024];
4124     int32_t sourceRulesSize = 0, targetRulesSize = 0;
4125     int32_t rulesSize = sizeof(rules)/sizeof(rules[0]);
4126
4127     for(i = 0; i < rulesSize; i++) {
4128         sourceRulesSize += u_unescape(rules[i], sourceRules+sourceRulesSize, 1024 - sourceRulesSize);
4129         targetRulesSize += u_unescape(rules[rulesSize-i-1], targetRules+targetRulesSize, 1024 - targetRulesSize);
4130     }
4131
4132     source = ucol_openRules(sourceRules, sourceRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
4133     if(status == U_FILE_ACCESS_ERROR) {
4134         log_data_err("Is your data around?\n");
4135         return;
4136     } else if(U_FAILURE(status)) {
4137         log_err("Error opening collator\n");
4138         return;
4139     }
4140     target = ucol_openRules(targetRules, targetRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
4141     if(!ucol_equals(source, target)) {
4142         log_err("Equivalent collators not equal!\n");
4143     }
4144     ucol_close(source);
4145     ucol_close(target);
4146
4147     source = ucol_open("root", &status);
4148     target = ucol_open("root", &status);
4149     log_verbose("Testing root\n");
4150     if(!ucol_equals(source, source)) {
4151         log_err("Same collator not equal\n");
4152     }
4153     if(TestEqualsForCollator(locName, source, target)) {
4154         log_err("Errors for root\n", locName);
4155     }
4156     ucol_close(source);
4157
4158     for(i = 0; i<noOfLoc; i++) {
4159         status = U_ZERO_ERROR;
4160         locName = uloc_getAvailable(i);
4161         /*if(hasCollationElements(locName)) {*/
4162         log_verbose("Testing equality for locale %s\n", locName);
4163         source = ucol_open(locName, &status);
4164         target = ucol_open(locName, &status);
4165         if (U_FAILURE(status)) {
4166             log_err("Error opening collator for locale %s  %s\n", locName, u_errorName(status));
4167             continue;
4168         }
4169         if(TestEqualsForCollator(locName, source, target)) {
4170             log_err("Errors for locale %s\n", locName);
4171         }
4172         ucol_close(source);
4173         /*}*/
4174     }
4175 }
4176
4177 static void TestJ2726(void) {
4178     UChar a[2] = { 0x61, 0x00 }; /*"a"*/
4179     UChar aSpace[3] = { 0x61, 0x20, 0x00 }; /*"a "*/
4180     UChar spaceA[3] = { 0x20, 0x61, 0x00 }; /*" a"*/
4181     UErrorCode status = U_ZERO_ERROR;
4182     UCollator *coll = ucol_open("en", &status);
4183     ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
4184     ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
4185     doTest(coll, a, aSpace, UCOL_EQUAL);
4186     doTest(coll, aSpace, a, UCOL_EQUAL);
4187     doTest(coll, a, spaceA, UCOL_EQUAL);
4188     doTest(coll, spaceA, a, UCOL_EQUAL);
4189     doTest(coll, spaceA, aSpace, UCOL_EQUAL);
4190     doTest(coll, aSpace, spaceA, UCOL_EQUAL);
4191     ucol_close(coll);
4192 }
4193
4194 static void NullRule(void) {
4195     UChar r[3] = {0};
4196     UErrorCode status = U_ZERO_ERROR;
4197     UCollator *coll = ucol_openRules(r, 1, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
4198     if(U_SUCCESS(status)) {
4199         log_err("This should have been an error!\n");
4200         ucol_close(coll);
4201     } else {
4202         status = U_ZERO_ERROR;
4203     }
4204     coll = ucol_openRules(r, 0, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
4205     if(U_FAILURE(status)) {
4206         log_err_status(status, "Empty rules should have produced a valid collator -> %s\n", u_errorName(status));
4207     } else {
4208         ucol_close(coll);
4209     }
4210 }
4211
4212 /**
4213  * Test for CollationElementIterator previous and next for the whole set of
4214  * unicode characters with normalization on.
4215  */
4216 static void TestNumericCollation(void)
4217 {
4218     UErrorCode status = U_ZERO_ERROR;
4219
4220     const static char *basicTestStrings[]={
4221     "hello1",
4222     "hello2",
4223     "hello2002",
4224     "hello2003",
4225     "hello123456",
4226     "hello1234567",
4227     "hello10000000",
4228     "hello100000000",
4229     "hello1000000000",
4230     "hello10000000000",
4231     };
4232
4233     const static char *preZeroTestStrings[]={
4234     "avery10000",
4235     "avery010000",
4236     "avery0010000",
4237     "avery00010000",
4238     "avery000010000",
4239     "avery0000010000",
4240     "avery00000010000",
4241     "avery000000010000",
4242     };
4243
4244     const static char *thirtyTwoBitNumericStrings[]={
4245     "avery42949672960",
4246     "avery42949672961",
4247     "avery42949672962",
4248     "avery429496729610"
4249     };
4250
4251      const static char *longNumericStrings[]={
4252      /* Some of these sort out of the order that would expected if digits-as-numbers handled arbitrarily-long digit strings.
4253         In fact, a single collation element can represent a maximum of 254 digits as a number. Digit strings longer than that
4254         are treated as multiple collation elements. */
4255     "num9234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123z", /*253digits, num + 9.23E252 + z */
4256     "num10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*254digits, num + 1.00E253 */
4257     "num100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*255digits, num + 1.00E253 + 0, out of numeric order but expected */
4258     "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 1.23E253 */
4259     "num123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345", /*255digits, num + 1.23E253 + 5 */
4260     "num1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456", /*256digits, num + 1.23E253 + 56 */
4261     "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567", /*257digits, num + 1.23E253 + 567 */
4262     "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 1.23E253 + a, out of numeric order but expected */
4263     "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 9.23E253, out of numeric order but expected */
4264     "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 9.23E253 + a, out of numeric order but expected */
4265     };
4266
4267     const static char *supplementaryDigits[] = {
4268       "\\uD835\\uDFCE", /* 0 */
4269       "\\uD835\\uDFCF", /* 1 */
4270       "\\uD835\\uDFD0", /* 2 */
4271       "\\uD835\\uDFD1", /* 3 */
4272       "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */
4273       "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */
4274       "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */
4275       "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */
4276       "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */
4277       "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */
4278     };
4279
4280     const static char *foreignDigits[] = {
4281       "\\u0661",
4282         "\\u0662",
4283         "\\u0663",
4284       "\\u0661\\u0660",
4285       "\\u0661\\u0662",
4286       "\\u0661\\u0663",
4287       "\\u0662\\u0660",
4288       "\\u0662\\u0662",
4289       "\\u0662\\u0663",
4290       "\\u0663\\u0660",
4291       "\\u0663\\u0662",
4292       "\\u0663\\u0663"
4293     };
4294
4295     const static char *evenZeroes[] = {
4296       "2000",
4297       "2001",
4298         "2002",
4299         "2003"
4300     };
4301
4302     UColAttribute att = UCOL_NUMERIC_COLLATION;
4303     UColAttributeValue val = UCOL_ON;
4304
4305     /* Open our collator. */
4306     UCollator* coll = ucol_open("root", &status);
4307     if (U_FAILURE(status)){
4308         log_err_status(status, "ERROR: in using ucol_open() -> %s\n",
4309               myErrorName(status));
4310         return;
4311     }
4312     genericLocaleStarterWithOptions("root", basicTestStrings, sizeof(basicTestStrings)/sizeof(basicTestStrings[0]), &att, &val, 1);
4313     genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings, sizeof(thirtyTwoBitNumericStrings)/sizeof(thirtyTwoBitNumericStrings[0]), &att, &val, 1);
4314     genericLocaleStarterWithOptions("root", longNumericStrings, sizeof(longNumericStrings)/sizeof(longNumericStrings[0]), &att, &val, 1);
4315     genericLocaleStarterWithOptions("en_US", foreignDigits, sizeof(foreignDigits)/sizeof(foreignDigits[0]), &att, &val, 1);
4316     genericLocaleStarterWithOptions("root", supplementaryDigits, sizeof(supplementaryDigits)/sizeof(supplementaryDigits[0]), &att, &val, 1);
4317     genericLocaleStarterWithOptions("root", evenZeroes, sizeof(evenZeroes)/sizeof(evenZeroes[0]), &att, &val, 1);
4318
4319     /* Setting up our collator to do digits. */
4320     ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
4321     if (U_FAILURE(status)){
4322         log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n",
4323               myErrorName(status));
4324         return;
4325     }
4326
4327     /*
4328        Testing that prepended zeroes still yield the correct collation behavior.
4329        We expect that every element in our strings array will be equal.
4330     */
4331     genericOrderingTestWithResult(coll, preZeroTestStrings, sizeof(preZeroTestStrings)/sizeof(preZeroTestStrings[0]), UCOL_EQUAL);
4332
4333     ucol_close(coll);
4334 }
4335
4336 static void TestTibetanConformance(void)
4337 {
4338     const char* test[] = {
4339         "\\u0FB2\\u0591\\u0F71\\u0061",
4340         "\\u0FB2\\u0F71\\u0061"
4341     };
4342
4343     UErrorCode status = U_ZERO_ERROR;
4344     UCollator *coll = ucol_open("", &status);
4345     UChar source[100];
4346     UChar target[100];
4347     int result;
4348     ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4349     if (U_SUCCESS(status)) {
4350         u_unescape(test[0], source, 100);
4351         u_unescape(test[1], target, 100);
4352         doTest(coll, source, target, UCOL_EQUAL);
4353         result = ucol_strcoll(coll, source, -1,   target, -1);
4354         log_verbose("result %d\n", result);
4355         if (UCOL_EQUAL != result) {
4356             log_err("Tibetan comparison error\n");
4357         }
4358     }
4359     ucol_close(coll);
4360
4361     genericLocaleStarterWithResult("", test, 2, UCOL_EQUAL);
4362 }
4363
4364 static void TestPinyinProblem(void) {
4365     static const char *test[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B50" };
4366     genericLocaleStarter("zh__PINYIN", test, sizeof(test)/sizeof(test[0]));
4367 }
4368
4369 #define TST_UCOL_MAX_INPUT 0x220001
4370 #define topByte 0xFF000000;
4371 #define bottomByte 0xFF;
4372 #define fourBytes 0xFFFFFFFF;
4373
4374
4375 static void showImplicit(UChar32 i) {
4376     if (i >= 0 && i <= TST_UCOL_MAX_INPUT) {
4377         log_verbose("%08X\t%08X\n", i, uprv_uca_getImplicitFromRaw(i));
4378     }
4379 }
4380
4381 static void TestImplicitGeneration(void) {
4382     UErrorCode status = U_ZERO_ERROR;
4383     UChar32 last = 0;
4384     UChar32 current;
4385     UChar32 i = 0, j = 0;
4386     UChar32 roundtrip = 0;
4387     UChar32 lastBottom = 0;
4388     UChar32 currentBottom = 0;
4389     UChar32 lastTop = 0;
4390     UChar32 currentTop = 0;
4391
4392     UCollator *coll = ucol_open("root", &status);
4393     if(U_FAILURE(status)) {
4394         log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
4395         return;
4396     }
4397
4398     uprv_uca_getRawFromImplicit(0xE20303E7);
4399
4400     for (i = 0; i <= TST_UCOL_MAX_INPUT; ++i) {
4401         current = uprv_uca_getImplicitFromRaw(i) & fourBytes;
4402
4403         /* check that it round-trips AND that all intervening ones are illegal*/
4404         roundtrip = uprv_uca_getRawFromImplicit(current);
4405         if (roundtrip != i) {
4406             log_err("No roundtrip %08X\n", i);
4407         }
4408         if (last != 0) {
4409             for (j = last + 1; j < current; ++j) {
4410                 roundtrip = uprv_uca_getRawFromImplicit(j);
4411                 /* raise an error if it *doesn't* find an error*/
4412                 if (roundtrip != -1) {
4413                     log_err("Fails to recognize illegal %08X\n", j);
4414                 }
4415             }
4416         }
4417         /* now do other consistency checks*/
4418         lastBottom = last & bottomByte;
4419         currentBottom = current & bottomByte;
4420         lastTop = last & topByte;
4421         currentTop = current & topByte;
4422
4423         /* print out some values for spot-checking*/
4424         if (lastTop != currentTop || i == 0x10000 || i == 0x110000) {
4425             showImplicit(i-3);
4426             showImplicit(i-2);
4427             showImplicit(i-1);
4428             showImplicit(i);
4429             showImplicit(i+1);
4430             showImplicit(i+2);
4431         }
4432         last = current;
4433
4434         if(uprv_uca_getCodePointFromRaw(uprv_uca_getRawFromCodePoint(i)) != i) {
4435             log_err("No raw <-> code point roundtrip for 0x%08X\n", i);
4436         }
4437     }
4438     showImplicit(TST_UCOL_MAX_INPUT-2);
4439     showImplicit(TST_UCOL_MAX_INPUT-1);
4440     showImplicit(TST_UCOL_MAX_INPUT);
4441     ucol_close(coll);
4442 }
4443
4444 /**
4445  * Iterate through the given iterator, checking to see that all the strings
4446  * in the expected array are present.
4447  * @param expected array of strings we expect to see, or NULL
4448  * @param expectedCount number of elements of expected, or 0
4449  */
4450 static int32_t checkUEnumeration(const char* msg,
4451                                  UEnumeration* iter,
4452                                  const char** expected,
4453                                  int32_t expectedCount) {
4454     UErrorCode ec = U_ZERO_ERROR;
4455     int32_t i = 0, n, j, bit;
4456     int32_t seenMask = 0;
4457
4458     U_ASSERT(expectedCount >= 0 && expectedCount < 31); /* [sic] 31 not 32 */
4459     n = uenum_count(iter, &ec);
4460     if (!assertSuccess("count", &ec)) return -1;
4461     log_verbose("%s = [", msg);
4462     for (;; ++i) {
4463         const char* s = uenum_next(iter, NULL, &ec);
4464         if (!assertSuccess("snext", &ec) || s == NULL) break;
4465         if (i != 0) log_verbose(",");
4466         log_verbose("%s", s);
4467         /* check expected list */
4468         for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
4469             if ((seenMask&bit) == 0 &&
4470                 uprv_strcmp(s, expected[j]) == 0) {
4471                 seenMask |= bit;
4472                 break;
4473             }
4474         }
4475     }
4476     log_verbose("] (%d)\n", i);
4477     assertTrue("count verified", i==n);
4478     /* did we see all expected strings? */
4479     for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
4480         if ((seenMask&bit)!=0) {
4481             log_verbose("Ok: \"%s\" seen\n", expected[j]);
4482         } else {
4483             log_err("FAIL: \"%s\" not seen\n", expected[j]);
4484         }
4485     }
4486     return n;
4487 }
4488
4489 /**
4490  * Test new API added for separate collation tree.
4491  */
4492 static void TestSeparateTrees(void) {
4493     UErrorCode ec = U_ZERO_ERROR;
4494     UEnumeration *e = NULL;
4495     int32_t n = -1;
4496     UBool isAvailable;
4497     char loc[256];
4498
4499     static const char* AVAIL[] = { "en", "de" };
4500
4501     static const char* KW[] = { "collation" };
4502
4503     static const char* KWVAL[] = { "phonebook", "stroke" };
4504
4505 #if !UCONFIG_NO_SERVICE
4506     e = ucol_openAvailableLocales(&ec);
4507     if (e != NULL) {
4508         assertSuccess("ucol_openAvailableLocales", &ec);
4509         assertTrue("ucol_openAvailableLocales!=0", e!=0);
4510         n = checkUEnumeration("ucol_openAvailableLocales", e, AVAIL, LEN(AVAIL));
4511         /* Don't need to check n because we check list */
4512         uenum_close(e);
4513     } else {
4514         log_data_err("Error calling ucol_openAvailableLocales() -> %s (Are you missing data?)\n", u_errorName(ec));
4515     }
4516 #endif
4517
4518     e = ucol_getKeywords(&ec);
4519     if (e != NULL) {
4520         assertSuccess("ucol_getKeywords", &ec);
4521         assertTrue("ucol_getKeywords!=0", e!=0);
4522         n = checkUEnumeration("ucol_getKeywords", e, KW, LEN(KW));
4523         /* Don't need to check n because we check list */
4524         uenum_close(e);
4525     } else {
4526         log_data_err("Error calling ucol_getKeywords() -> %s (Are you missing data?)\n", u_errorName(ec));
4527     }
4528
4529     e = ucol_getKeywordValues(KW[0], &ec);
4530     if (e != NULL) {
4531         assertSuccess("ucol_getKeywordValues", &ec);
4532         assertTrue("ucol_getKeywordValues!=0", e!=0);
4533         n = checkUEnumeration("ucol_getKeywordValues", e, KWVAL, LEN(KWVAL));
4534         /* Don't need to check n because we check list */
4535         uenum_close(e);
4536     } else {
4537         log_data_err("Error calling ucol_getKeywordValues() -> %s (Are you missing data?)\n", u_errorName(ec));
4538     }
4539
4540     /* Try setting a warning before calling ucol_getKeywordValues */
4541     ec = U_USING_FALLBACK_WARNING;
4542     e = ucol_getKeywordValues(KW[0], &ec);
4543     if (assertSuccess("ucol_getKeywordValues [with warning code set]", &ec)) {
4544         assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e!=0);
4545         n = checkUEnumeration("ucol_getKeywordValues [with warning code set]", e, KWVAL, LEN(KWVAL));
4546         /* Don't need to check n because we check list */
4547         uenum_close(e);
4548     }
4549
4550     /*
4551 U_DRAFT int32_t U_EXPORT2
4552 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
4553                              const char* locale, UBool* isAvailable,
4554                              UErrorCode* status);
4555 }
4556 */
4557     n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de",
4558                                      &isAvailable, &ec);
4559     if (assertSuccess("getFunctionalEquivalent", &ec)) {
4560         assertEquals("getFunctionalEquivalent(de)", "de", loc);
4561         assertTrue("getFunctionalEquivalent(de).isAvailable==TRUE",
4562                    isAvailable == TRUE);
4563     }
4564
4565     n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de_DE",
4566                                      &isAvailable, &ec);
4567     if (assertSuccess("getFunctionalEquivalent", &ec)) {
4568         assertEquals("getFunctionalEquivalent(de_DE)", "de", loc);
4569         assertTrue("getFunctionalEquivalent(de_DE).isAvailable==TRUE",
4570                    isAvailable == TRUE);
4571     }
4572 }
4573
4574 /* supercedes TestJ784 */
4575 static void TestBeforePinyin(void) {
4576     const static char rules[] = {
4577         "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<\\u00E0<<<\\u00C0"
4578         "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<\\u00E8<<<\\u00C8"
4579         "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<\\u00EC<<<\\u00CC"
4580         "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<\\u00F2<<<\\u00D2"
4581         "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<<\\u00F9<<<\\u00D9"
4582         "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<<\\u01DB<<\\u00FC"
4583     };
4584
4585     const static char *test[] = {
4586         "l\\u0101",
4587         "la",
4588         "l\\u0101n",
4589         "lan ",
4590         "l\\u0113",
4591         "le",
4592         "l\\u0113n",
4593         "len"
4594     };
4595
4596     const static char *test2[] = {
4597         "x\\u0101",
4598         "x\\u0100",
4599         "X\\u0101",
4600         "X\\u0100",
4601         "x\\u00E1",
4602         "x\\u00C1",
4603         "X\\u00E1",
4604         "X\\u00C1",
4605         "x\\u01CE",
4606         "x\\u01CD",
4607         "X\\u01CE",
4608         "X\\u01CD",
4609         "x\\u00E0",
4610         "x\\u00C0",
4611         "X\\u00E0",
4612         "X\\u00C0",
4613         "xa",
4614         "xA",
4615         "Xa",
4616         "XA",
4617         "x\\u0101x",
4618         "x\\u0100x",
4619         "x\\u00E1x",
4620         "x\\u00C1x",
4621         "x\\u01CEx",
4622         "x\\u01CDx",
4623         "x\\u00E0x",
4624         "x\\u00C0x",
4625         "xax",
4626         "xAx"
4627     };
4628
4629     genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));
4630     genericLocaleStarter("zh", test, sizeof(test)/sizeof(test[0]));
4631     genericRulesStarter(rules, test2, sizeof(test2)/sizeof(test2[0]));
4632     genericLocaleStarter("zh", test2, sizeof(test2)/sizeof(test2[0]));
4633 }
4634
4635 static void TestBeforeTightening(void) {
4636     static const struct {
4637         const char *rules;
4638         UErrorCode expectedStatus;
4639     } tests[] = {
4640         { "&[before 1]a<x", U_ZERO_ERROR },
4641         { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR },
4642         { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR },
4643         { "&[before 1]a=x", U_INVALID_FORMAT_ERROR },
4644         { "&[before 2]a<x",U_INVALID_FORMAT_ERROR },
4645         { "&[before 2]a<<x",U_ZERO_ERROR },
4646         { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR },
4647         { "&[before 2]a=x",U_INVALID_FORMAT_ERROR },
4648         { "&[before 3]a<x",U_INVALID_FORMAT_ERROR  },
4649         { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR  },
4650         { "&[before 3]a<<<x",U_ZERO_ERROR },
4651         { "&[before 3]a=x",U_INVALID_FORMAT_ERROR  },
4652         { "&[before I]a = x",U_INVALID_FORMAT_ERROR }
4653     };
4654
4655     int32_t i = 0;
4656
4657     UErrorCode status = U_ZERO_ERROR;
4658     UChar rlz[RULE_BUFFER_LEN] = { 0 };
4659     uint32_t rlen = 0;
4660
4661     UCollator *coll = NULL;
4662
4663
4664     for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
4665         rlen = u_unescape(tests[i].rules, rlz, RULE_BUFFER_LEN);
4666         coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
4667         if(status != tests[i].expectedStatus) {
4668             log_err_status(status, "Opening a collator with rules %s returned error code %s, expected %s\n",
4669                 tests[i].rules, u_errorName(status), u_errorName(tests[i].expectedStatus));
4670         }
4671         ucol_close(coll);
4672         status = U_ZERO_ERROR;
4673     }
4674
4675 }
4676
4677 #if 0
4678 &m < a
4679 &[before 1] a < x <<< X << q <<< Q < z
4680 assert: m <<< M < x <<< X << q <<< Q < z < a < n
4681
4682 &m < a
4683 &[before 2] a << x <<< X << q <<< Q < z
4684 assert: m <<< M < x <<< X << q <<< Q << a < z < n
4685
4686 &m < a
4687 &[before 3] a <<< x <<< X << q <<< Q < z
4688 assert: m <<< M < x <<< X <<< a << q <<< Q < z < n
4689
4690
4691 &m << a
4692 &[before 1] a < x <<< X << q <<< Q < z
4693 assert: x <<< X << q <<< Q < z < m <<< M << a < n
4694
4695 &m << a
4696 &[before 2] a << x <<< X << q <<< Q < z
4697 assert: m <<< M << x <<< X << q <<< Q << a < z < n
4698
4699 &m << a
4700 &[before 3] a <<< x <<< X << q <<< Q < z
4701 assert: m <<< M << x <<< X <<< a << q <<< Q < z < n
4702
4703
4704 &m <<< a
4705 &[before 1] a < x <<< X << q <<< Q < z
4706 assert: x <<< X << q <<< Q < z < n < m <<< a <<< M
4707
4708 &m <<< a
4709 &[before 2] a << x <<< X << q <<< Q < z
4710 assert:  x <<< X << q <<< Q << m <<< a <<< M < z < n
4711
4712 &m <<< a
4713 &[before 3] a <<< x <<< X << q <<< Q < z
4714 assert: m <<< x <<< X <<< a <<< M  << q <<< Q < z < n
4715
4716
4717 &[before 1] s < x <<< X << q <<< Q < z
4718 assert: r <<< R < x <<< X << q <<< Q < z < s < n
4719
4720 &[before 2] s << x <<< X << q <<< Q < z
4721 assert: r <<< R < x <<< X << q <<< Q << s < z < n
4722
4723 &[before 3] s <<< x <<< X << q <<< Q < z
4724 assert: r <<< R < x <<< X <<< s << q <<< Q < z < n
4725
4726
4727 &[before 1] \u24DC < x <<< X << q <<< Q < z
4728 assert: x <<< X << q <<< Q < z < n < m <<< \u24DC <<< M
4729
4730 &[before 2] \u24DC << x <<< X << q <<< Q < z
4731 assert:  x <<< X << q <<< Q << m <<< \u24DC <<< M < z < n
4732
4733 &[before 3] \u24DC <<< x <<< X << q <<< Q < z
4734 assert: m <<< x <<< X <<< \u24DC <<< M  << q <<< Q < z < n
4735 #endif
4736
4737
4738 #if 0
4739 /* requires features not yet supported */
4740 static void TestMoreBefore(void) {
4741     static const struct {
4742         const char* rules;
4743         const char* order[16];
4744         int32_t size;
4745     } tests[] = {
4746         { "&m < a &[before 1] a < x <<< X << q <<< Q < z",
4747         { "m","M","x","X","q","Q","z","a","n" }, 9},
4748         { "&m < a &[before 2] a << x <<< X << q <<< Q < z",
4749         { "m","M","x","X","q","Q","a","z","n" }, 9},
4750         { "&m < a &[before 3] a <<< x <<< X << q <<< Q < z",
4751         { "m","M","x","X","a","q","Q","z","n" }, 9},
4752         { "&m << a &[before 1] a < x <<< X << q <<< Q < z",
4753         { "x","X","q","Q","z","m","M","a","n" }, 9},
4754         { "&m << a &[before 2] a << x <<< X << q <<< Q < z",
4755         { "m","M","x","X","q","Q","a","z","n" }, 9},
4756         { "&m << a &[before 3] a <<< x <<< X << q <<< Q < z",
4757         { "m","M","x","X","a","q","Q","z","n" }, 9},
4758         { "&m <<< a &[before 1] a < x <<< X << q <<< Q < z",
4759         { "x","X","q","Q","z","n","m","a","M" }, 9},
4760         { "&m <<< a &[before 2] a << x <<< X << q <<< Q < z",
4761         { "x","X","q","Q","m","a","M","z","n" }, 9},
4762         { "&m <<< a &[before 3] a <<< x <<< X << q <<< Q < z",
4763         { "m","x","X","a","M","q","Q","z","n" }, 9},
4764         { "&[before 1] s < x <<< X << q <<< Q < z",
4765         { "r","R","x","X","q","Q","z","s","n" }, 9},
4766         { "&[before 2] s << x <<< X << q <<< Q < z",
4767         { "r","R","x","X","q","Q","s","z","n" }, 9},
4768         { "&[before 3] s <<< x <<< X << q <<< Q < z",
4769         { "r","R","x","X","s","q","Q","z","n" }, 9},
4770         { "&[before 1] \\u24DC < x <<< X << q <<< Q < z",
4771         { "x","X","q","Q","z","n","m","\\u24DC","M" }, 9},
4772         { "&[before 2] \\u24DC << x <<< X << q <<< Q < z",
4773         { "x","X","q","Q","m","\\u24DC","M","z","n" }, 9},
4774         { "&[before 3] \\u24DC <<< x <<< X << q <<< Q < z",
4775         { "m","x","X","\\u24DC","M","q","Q","z","n" }, 9}
4776     };
4777
4778     int32_t i = 0;
4779
4780     for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
4781         genericRulesStarter(tests[i].rules, tests[i].order, tests[i].size);
4782     }
4783 }
4784 #endif
4785
4786 static void TestTailorNULL( void ) {
4787     const static char* rule = "&a <<< '\\u0000'";
4788     UErrorCode status = U_ZERO_ERROR;
4789     UChar rlz[RULE_BUFFER_LEN] = { 0 };
4790     uint32_t rlen = 0;
4791     UChar a = 1, null = 0;
4792     UCollationResult res = UCOL_EQUAL;
4793
4794     UCollator *coll = NULL;
4795
4796
4797     rlen = u_unescape(rule, rlz, RULE_BUFFER_LEN);
4798     coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
4799
4800     if(U_FAILURE(status)) {
4801         log_err_status(status, "Could not open default collator! -> %s\n", u_errorName(status));
4802     } else {
4803         res = ucol_strcoll(coll, &a, 1, &null, 1);
4804
4805         if(res != UCOL_LESS) {
4806             log_err("NULL was not tailored properly!\n");
4807         }
4808     }
4809
4810     ucol_close(coll);
4811 }
4812
4813 static void
4814 TestUpperFirstQuaternary(void)
4815 {
4816   const char* tests[] = { "B", "b", "Bb", "bB" };
4817   UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_FIRST };
4818   UColAttributeValue attVals[] = { UCOL_QUATERNARY, UCOL_UPPER_FIRST };
4819   genericLocaleStarterWithOptions("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]));
4820 }
4821
4822 static void
4823 TestJ4960(void)
4824 {
4825   const char* tests[] = { "\\u00e2T", "aT" };
4826   UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_LEVEL };
4827   UColAttributeValue attVals[] = { UCOL_PRIMARY, UCOL_ON };
4828   const char* tests2[] = { "a", "A" };
4829   const char* rule = "&[first tertiary ignorable]=A=a";
4830   UColAttribute att2[] = { UCOL_CASE_LEVEL };
4831   UColAttributeValue attVals2[] = { UCOL_ON };
4832   /* Test whether we correctly ignore primary ignorables on case level when */
4833   /* we have only primary & case level */
4834   genericLocaleStarterWithOptionsAndResult("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]), UCOL_EQUAL);
4835   /* Test whether ICU4J will make case level for sortkeys that have primary strength */
4836   /* and case level */
4837   genericLocaleStarterWithOptions("root", tests2, sizeof(tests2)/sizeof(tests2[0]), att, attVals, sizeof(att)/sizeof(att[0]));
4838   /* Test whether completely ignorable letters have case level info (they shouldn't) */
4839   genericRulesStarterWithOptionsAndResult(rule, tests2, sizeof(tests2)/sizeof(tests2[0]), att2, attVals2, sizeof(att2)/sizeof(att2[0]), UCOL_EQUAL);
4840 }
4841
4842 static void
4843 TestJ5223(void)
4844 {
4845   static const char *test = "this is a test string";
4846   UChar ustr[256];
4847   int32_t ustr_length = u_unescape(test, ustr, 256);
4848   unsigned char sortkey[256];
4849   int32_t sortkey_length;
4850   UErrorCode status = U_ZERO_ERROR;
4851   static UCollator *coll = NULL;
4852   coll = ucol_open("root", &status);
4853   if(U_FAILURE(status)) {
4854     log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
4855     return;
4856   }
4857   ucol_setStrength(coll, UCOL_PRIMARY);
4858   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
4859   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4860   if (U_FAILURE(status)) {
4861     log_err("Failed setting atributes\n");
4862     return;
4863   }
4864   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, NULL, 0);
4865   if (sortkey_length > 256) return;
4866
4867   /* we mark the position where the null byte should be written in advance */
4868   sortkey[sortkey_length-1] = 0xAA;
4869
4870   /* we set the buffer size one byte higher than needed */
4871   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
4872     sortkey_length+1);
4873
4874   /* no error occurs (for me) */
4875   if (sortkey[sortkey_length-1] == 0xAA) {
4876     log_err("Hit bug at first try\n");
4877   }
4878
4879   /* we mark the position where the null byte should be written again */
4880   sortkey[sortkey_length-1] = 0xAA;
4881
4882   /* this time we set the buffer size to the exact amount needed */
4883   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
4884     sortkey_length);
4885
4886   /* now the trailing null byte is not written */
4887   if (sortkey[sortkey_length-1] == 0xAA) {
4888     log_err("Hit bug at second try\n");
4889   }
4890
4891   ucol_close(coll);
4892 }
4893
4894 /* Regression test for Thai partial sort key problem */
4895 static void
4896 TestJ5232(void)
4897 {
4898     const static char *test[] = {
4899         "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e47\\u0e21",
4900         "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e48\\u0e21"
4901     };
4902
4903     genericLocaleStarter("th", test, sizeof(test)/sizeof(test[0]));
4904 }
4905
4906 static void
4907 TestJ5367(void)
4908 {
4909     const static char *test[] = { "a", "y" };
4910     const char* rules = "&Ny << Y &[first secondary ignorable] <<< a";
4911     genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));
4912 }
4913
4914 static void
4915 TestVI5913(void)
4916 {
4917     UErrorCode status = U_ZERO_ERROR;
4918     int32_t i, j;
4919     UCollator *coll =NULL;
4920     uint8_t  resColl[100], expColl[100];
4921     int32_t  rLen, tLen, ruleLen, sLen, kLen;
4922     UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0};  /* &a<0x1FF3-omega with Ypogegrammeni*/
4923     UChar rule2[256]={0x26, 0x7a, 0x3c, 0x0161, 0};  /* &z<s with caron*/
4924     UChar rule3[256]={0x26, 0x7a, 0x3c, 0x0061, 0x00ea, 0};  /* &z<a+e with circumflex.*/
4925     static const UChar tData[][20]={
4926         {0x1EAC, 0},
4927         {0x0041, 0x0323, 0x0302, 0},
4928         {0x1EA0, 0x0302, 0},
4929         {0x00C2, 0x0323, 0},
4930         {0x1ED8, 0},  /* O with dot and circumflex */
4931         {0x1ECC, 0x0302, 0},
4932         {0x1EB7, 0},
4933         {0x1EA1, 0x0306, 0},
4934     };
4935     static const UChar tailorData[][20]={
4936         {0x1FA2, 0},  /* Omega with 3 combining marks */
4937         {0x03C9, 0x0313, 0x0300, 0x0345, 0},
4938         {0x1FF3, 0x0313, 0x0300, 0},
4939         {0x1F60, 0x0300, 0x0345, 0},
4940         {0x1F62, 0x0345, 0},
4941         {0x1FA0, 0x0300, 0},
4942     };
4943     static const UChar tailorData2[][20]={
4944         {0x1E63, 0x030C, 0},  /* s with dot below + caron */
4945         {0x0073, 0x0323, 0x030C, 0},
4946         {0x0073, 0x030C, 0x0323, 0},
4947     };
4948     static const UChar tailorData3[][20]={
4949         {0x007a, 0},  /*  z */
4950         {0x0061, 0x0065, 0},  /*  a + e */
4951         {0x0061, 0x00ea, 0}, /* a + e with circumflex */
4952         {0x0061, 0x1EC7, 0},  /* a+ e with dot below and circumflex */
4953         {0x0061, 0x1EB9, 0x0302, 0}, /* a + e with dot below + combining circumflex */
4954         {0x0061, 0x00EA, 0x0323, 0},  /* a + e with circumflex + combining dot below */
4955         {0x00EA, 0x0323, 0},  /* e with circumflex + combining dot below */
4956         {0x00EA, 0},  /* e with circumflex  */
4957     };
4958
4959     /* Test Vietnamese sort. */
4960     coll = ucol_open("vi", &status);
4961     if(U_FAILURE(status)) {
4962         log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
4963         return;
4964     }
4965     log_verbose("\n\nVI collation:");
4966     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[2], u_strlen(tData[2])) ) {
4967         log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
4968     }
4969     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[3], u_strlen(tData[3])) ) {
4970         log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
4971     }
4972     if ( !ucol_equal(coll, tData[5], u_strlen(tData[5]), tData[4], u_strlen(tData[4])) ) {
4973         log_err("\\u1ED8 not equals to \\u1ECC+\\u0302\n");
4974     }
4975     if ( !ucol_equal(coll, tData[7], u_strlen(tData[7]), tData[6], u_strlen(tData[6])) ) {
4976         log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
4977     }
4978
4979     for (j=0; j<8; j++) {
4980         tLen = u_strlen(tData[j]);
4981         log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);
4982         rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
4983         for(i = 0; i<rLen; i++) {
4984             log_verbose(" %02X", resColl[i]);
4985         }
4986     }
4987
4988     ucol_close(coll);
4989
4990     /* Test Romanian sort. */
4991     coll = ucol_open("ro", &status);
4992     log_verbose("\n\nRO collation:");
4993     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[1], u_strlen(tData[1])) ) {
4994         log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
4995     }
4996     if ( !ucol_equal(coll, tData[4], u_strlen(tData[4]), tData[5], u_strlen(tData[5])) ) {
4997         log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
4998     }
4999     if ( !ucol_equal(coll, tData[6], u_strlen(tData[6]), tData[7], u_strlen(tData[7])) ) {
5000         log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
5001     }
5002
5003     for (j=4; j<8; j++) {
5004         tLen = u_strlen(tData[j]);
5005         log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);
5006         rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
5007         for(i = 0; i<rLen; i++) {
5008             log_verbose(" %02X", resColl[i]);
5009         }
5010     }
5011     ucol_close(coll);
5012
5013     /* Test the precomposed Greek character with 3 combining marks. */
5014     log_verbose("\n\nTailoring test: Greek character with 3 combining marks");
5015     ruleLen = u_strlen(rule);
5016     coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5017     if (U_FAILURE(status)) {
5018         log_err("ucol_openRules failed with %s\n", u_errorName(status));
5019         return;
5020     }
5021     sLen = u_strlen(tailorData[0]);
5022     for (j=1; j<6; j++) {
5023         tLen = u_strlen(tailorData[j]);
5024         if ( !ucol_equal(coll, tailorData[0], sLen, tailorData[j], tLen))  {
5025             log_err("\n \\u1FA2 not equals to data[%d]:%s\n", j, tailorData[j]);
5026         }
5027     }
5028     /* Test getSortKey. */
5029     tLen = u_strlen(tailorData[0]);
5030     kLen=ucol_getSortKey(coll, tailorData[0], tLen, expColl, 100);
5031     for (j=0; j<6; j++) {
5032         tLen = u_strlen(tailorData[j]);
5033         rLen = ucol_getSortKey(coll, tailorData[j], tLen, resColl, 100);
5034         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
5035             log_err("\n Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
5036             for(i = 0; i<rLen; i++) {
5037                 log_err(" %02X", resColl[i]);
5038             }
5039         }
5040     }
5041     ucol_close(coll);
5042
5043     log_verbose("\n\nTailoring test for s with caron:");
5044     ruleLen = u_strlen(rule2);
5045     coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5046     tLen = u_strlen(tailorData2[0]);
5047     kLen=ucol_getSortKey(coll, tailorData2[0], tLen, expColl, 100);
5048     for (j=1; j<3; j++) {
5049         tLen = u_strlen(tailorData2[j]);
5050         rLen = ucol_getSortKey(coll, tailorData2[j], tLen, resColl, 100);
5051         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
5052             log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
5053             for(i = 0; i<rLen; i++) {
5054                 log_err(" %02X", resColl[i]);
5055             }
5056         }
5057     }
5058     ucol_close(coll);
5059
5060     log_verbose("\n\nTailoring test for &z< ae with circumflex:");
5061     ruleLen = u_strlen(rule3);
5062     coll = ucol_openRules(rule3, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5063     tLen = u_strlen(tailorData3[3]);
5064     kLen=ucol_getSortKey(coll, tailorData3[3], tLen, expColl, 100);
5065     for (j=4; j<6; j++) {
5066         tLen = u_strlen(tailorData3[j]);
5067         rLen = ucol_getSortKey(coll, tailorData3[j], tLen, resColl, 100);
5068
5069         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
5070             log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
5071             for(i = 0; i<rLen; i++) {
5072                 log_err(" %02X", resColl[i]);
5073             }
5074         }
5075
5076         log_verbose("\n Test Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
5077          for(i = 0; i<rLen; i++) {
5078              log_verbose(" %02X", resColl[i]);
5079          }
5080     }
5081     ucol_close(coll);
5082 }
5083
5084 static void
5085 TestTailor6179(void)
5086 {
5087     UErrorCode status = U_ZERO_ERROR;
5088     int32_t i;
5089     UCollator *coll =NULL;
5090     uint8_t  resColl[100];
5091     int32_t  rLen, tLen, ruleLen;
5092     /* &[last primary ignorable]<< a  &[first primary ignorable]<<b */
5093     static const UChar rule1[]={
5094             0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,
5095             0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x20,0x61,0x20,
5096             0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,0x20,
5097             0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x62,0x20, 0};
5098     /* &[last secondary ignorable]<<< a &[first secondary ignorable]<<<b */
5099     static const UChar rule2[]={
5100             0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,0x64,0x61,
5101             0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x3C,
5102             0x61,0x20,0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,
5103             0x64,0x61,0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,
5104             0x3C,0x3C,0x20,0x62,0};
5105
5106     static const UChar tData1[][4]={
5107         {0x61, 0},
5108         {0x62, 0},
5109         { 0xFDD0,0x009E, 0}
5110     };
5111     static const UChar tData2[][4]={
5112         {0x61, 0},
5113         {0x62, 0},
5114         { 0xFDD0,0x009E, 0}
5115      };
5116
5117     /*
5118      * These values from FractionalUCA.txt will change,
5119      * and need to be updated here.
5120      */
5121     static const uint8_t firstPrimaryIgnCE[]={1, 0x88, 1, 5, 0};
5122     static const uint8_t lastPrimaryIgnCE[]={1, 0xE3, 1, 5, 0};
5123     static const uint8_t firstSecondaryIgnCE[]={1, 1, 0xbf, 0x04, 0};
5124     static const uint8_t lastSecondaryIgnCE[]={1, 1, 0xbf, 0x04, 0};
5125
5126     /* Test [Last Primary ignorable] */
5127
5128     log_verbose("Tailoring test: &[last primary ignorable]<<a  &[first primary ignorable]<<b\n");
5129     ruleLen = u_strlen(rule1);
5130     coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5131     if (U_FAILURE(status)) {
5132         log_err_status(status, "Tailoring test: &[last primary ignorable] failed! -> %s\n", u_errorName(status));
5133         return;
5134     }
5135     tLen = u_strlen(tData1[0]);
5136     rLen = ucol_getSortKey(coll, tData1[0], tLen, resColl, 100);
5137     if (rLen != LEN(lastPrimaryIgnCE) || uprv_memcmp(resColl, lastPrimaryIgnCE, rLen) != 0) {
5138         log_err("Bad result for &[lpi]<<a...: Data[%d] :%s  \tlen: %d key: ", 0, tData1[0], rLen);
5139         for(i = 0; i<rLen; i++) {
5140             log_err(" %02X", resColl[i]);
5141         }
5142         log_err("\n");
5143     }
5144     tLen = u_strlen(tData1[1]);
5145     rLen = ucol_getSortKey(coll, tData1[1], tLen, resColl, 100);
5146     if (rLen != LEN(firstPrimaryIgnCE) || uprv_memcmp(resColl, firstPrimaryIgnCE, rLen) != 0) {
5147         log_err("Bad result for &[lpi]<<a...: Data[%d] :%s  \tlen: %d key: ", 1, tData1[1], rLen);
5148         for(i = 0; i<rLen; i++) {
5149             log_err(" %02X", resColl[i]);
5150         }
5151         log_err("\n");
5152     }
5153     ucol_close(coll);
5154
5155
5156     /* Test [Last Secondary ignorable] */
5157     log_verbose("Tailoring test: &[last secondary ignorable]<<<a  &[first secondary ignorable]<<<b\n");
5158     ruleLen = u_strlen(rule1);
5159     coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5160     if (U_FAILURE(status)) {
5161         log_err("Tailoring test: &[last secondary ignorable] failed! -> %s\n", u_errorName(status));
5162         return;
5163     }
5164     tLen = u_strlen(tData2[0]);
5165     rLen = ucol_getSortKey(coll, tData2[0], tLen, resColl, 100);
5166     if (rLen != LEN(lastSecondaryIgnCE) || uprv_memcmp(resColl, lastSecondaryIgnCE, rLen) != 0) {
5167         log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s  \tlen: %d key: ", 0, tData2[0], rLen);
5168         for(i = 0; i<rLen; i++) {
5169             log_err(" %02X", resColl[i]);
5170         }
5171         log_err("\n");
5172     }
5173 if(isICUVersionAtLeast(50, 0, 0)) {  /* TODO: debug & fix, see ticket #8982 */
5174     tLen = u_strlen(tData2[1]);
5175     rLen = ucol_getSortKey(coll, tData2[1], tLen, resColl, 100);
5176     if (rLen != LEN(firstSecondaryIgnCE) || uprv_memcmp(resColl, firstSecondaryIgnCE, rLen) != 0) {
5177         log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s  \tlen: %d key: ", 1, tData2[1], rLen);
5178         for(i = 0; i<rLen; i++) {
5179             log_err(" %02X", resColl[i]);
5180         }
5181         log_err("\n");
5182     }
5183 }
5184     ucol_close(coll);
5185 }
5186
5187 static void
5188 TestUCAPrecontext(void)
5189 {
5190     UErrorCode status = U_ZERO_ERROR;
5191     int32_t i, j;
5192     UCollator *coll =NULL;
5193     uint8_t  resColl[100], prevColl[100];
5194     int32_t  rLen, tLen, ruleLen;
5195     UChar rule1[256]= {0x26, 0xb7, 0x3c, 0x61, 0}; /* & middle-dot < a */
5196     UChar rule2[256]= {0x26, 0x4C, 0xb7, 0x3c, 0x3c, 0x61, 0};
5197     /* & l middle-dot << a  a is an expansion. */
5198
5199     UChar tData1[][20]={
5200             { 0xb7, 0},  /* standalone middle dot(0xb7) */
5201             { 0x387, 0}, /* standalone middle dot(0x387) */
5202             { 0x61, 0},  /* a */
5203             { 0x6C, 0},  /* l */
5204             { 0x4C, 0x0332, 0},  /* l with [first primary ignorable] */
5205             { 0x6C, 0xb7, 0},  /* l with middle dot(0xb7) */
5206             { 0x6C, 0x387, 0}, /* l with middle dot(0x387) */
5207             { 0x4C, 0xb7, 0},  /* L with middle dot(0xb7) */
5208             { 0x4C, 0x387, 0}, /* L with middle dot(0x387) */
5209             { 0x6C, 0x61, 0x387, 0}, /* la  with middle dot(0x387) */
5210             { 0x4C, 0x61, 0xb7, 0},  /* La with middle dot(0xb7) */
5211      };
5212
5213     log_verbose("\n\nEN collation:");
5214     coll = ucol_open("en", &status);
5215     if (U_FAILURE(status)) {
5216         log_err_status(status, "Tailoring test: &z <<a|- failed! -> %s\n", u_errorName(status));
5217         return;
5218     }
5219     for (j=0; j<11; j++) {
5220         tLen = u_strlen(tData1[j]);
5221         rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
5222         if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
5223             log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5224                     j, tData1[j]);
5225         }
5226         log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
5227         for(i = 0; i<rLen; i++) {
5228             log_verbose(" %02X", resColl[i]);
5229         }
5230         uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
5231      }
5232      ucol_close(coll);
5233
5234
5235      log_verbose("\n\nJA collation:");
5236      coll = ucol_open("ja", &status);
5237      if (U_FAILURE(status)) {
5238          log_err("Tailoring test: &z <<a|- failed!");
5239          return;
5240      }
5241      for (j=0; j<11; j++) {
5242          tLen = u_strlen(tData1[j]);
5243          rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
5244          if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
5245              log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5246                      j, tData1[j]);
5247          }
5248          log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
5249          for(i = 0; i<rLen; i++) {
5250              log_verbose(" %02X", resColl[i]);
5251          }
5252          uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
5253       }
5254       ucol_close(coll);
5255
5256
5257       log_verbose("\n\nTailoring test: & middle dot < a ");
5258       ruleLen = u_strlen(rule1);
5259       coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5260       if (U_FAILURE(status)) {
5261           log_err("Tailoring test: & middle dot < a failed!");
5262           return;
5263       }
5264       for (j=0; j<11; j++) {
5265           tLen = u_strlen(tData1[j]);
5266           rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
5267           if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
5268               log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5269                       j, tData1[j]);
5270           }
5271           log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
5272           for(i = 0; i<rLen; i++) {
5273               log_verbose(" %02X", resColl[i]);
5274           }
5275           uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
5276        }
5277        ucol_close(coll);
5278
5279
5280        log_verbose("\n\nTailoring test: & l middle-dot << a ");
5281        ruleLen = u_strlen(rule2);
5282        coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5283        if (U_FAILURE(status)) {
5284            log_err("Tailoring test: & l middle-dot << a failed!");
5285            return;
5286        }
5287        for (j=0; j<11; j++) {
5288            tLen = u_strlen(tData1[j]);
5289            rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
5290            if ((j>0) && (j!=3) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
5291                log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5292                        j, tData1[j]);
5293            }
5294            if ((j==3)&&(strcmp((char *)resColl, (char *)prevColl)>0)) {
5295                log_err("\n Expecting smaller key than previous test case: Data[%d] :%s.",
5296                        j, tData1[j]);
5297            }
5298            log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
5299            for(i = 0; i<rLen; i++) {
5300                log_verbose(" %02X", resColl[i]);
5301            }
5302            uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
5303         }
5304         ucol_close(coll);
5305 }
5306
5307 static void
5308 TestOutOfBuffer5468(void)
5309 {
5310     static const char *test = "\\u4e00";
5311     UChar ustr[256];
5312     int32_t ustr_length = u_unescape(test, ustr, 256);
5313     unsigned char shortKeyBuf[1];
5314     int32_t sortkey_length;
5315     UErrorCode status = U_ZERO_ERROR;
5316     static UCollator *coll = NULL;
5317
5318     coll = ucol_open("root", &status);
5319     if(U_FAILURE(status)) {
5320       log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
5321       return;
5322     }
5323     ucol_setStrength(coll, UCOL_PRIMARY);
5324     ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
5325     ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
5326     if (U_FAILURE(status)) {
5327       log_err("Failed setting atributes\n");
5328       return;
5329     }
5330
5331     sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, shortKeyBuf, sizeof(shortKeyBuf));
5332     if (sortkey_length != 4) {
5333         log_err("expecting length of sortKey is 4  got:%d ", sortkey_length);
5334     }
5335     log_verbose("length of sortKey is %d", sortkey_length);
5336     ucol_close(coll);
5337 }
5338
5339 #define TSKC_DATA_SIZE 5
5340 #define TSKC_BUF_SIZE  50
5341 static void
5342 TestSortKeyConsistency(void)
5343 {
5344     UErrorCode icuRC = U_ZERO_ERROR;
5345     UCollator* ucol;
5346     UChar data[] = { 0xFFFD, 0x0006, 0x0006, 0x0006, 0xFFFD};
5347
5348     uint8_t bufFull[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
5349     uint8_t bufPart[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
5350     int32_t i, j, i2;
5351
5352     ucol = ucol_openFromShortString("LEN_S4", FALSE, NULL, &icuRC);
5353     if (U_FAILURE(icuRC))
5354     {
5355         log_err_status(icuRC, "ucol_openFromShortString failed -> %s\n", u_errorName(icuRC));
5356         return;
5357     }
5358
5359     for (i = 0; i < TSKC_DATA_SIZE; i++)
5360     {
5361         UCharIterator uiter;
5362         uint32_t state[2] = { 0, 0 };
5363         int32_t dataLen = i+1;
5364         for (j=0; j<TSKC_BUF_SIZE; j++)
5365             bufFull[i][j] = bufPart[i][j] = 0;
5366
5367         /* Full sort key */
5368         ucol_getSortKey(ucol, data, dataLen, bufFull[i], TSKC_BUF_SIZE);
5369
5370         /* Partial sort key */
5371         uiter_setString(&uiter, data, dataLen);
5372         ucol_nextSortKeyPart(ucol, &uiter, state, bufPart[i], TSKC_BUF_SIZE, &icuRC);
5373         if (U_FAILURE(icuRC))
5374         {
5375             log_err("ucol_nextSortKeyPart failed\n");
5376             ucol_close(ucol);
5377             return;
5378         }
5379
5380         for (i2=0; i2<i; i2++)
5381         {
5382             UBool fullMatch = TRUE;
5383             UBool partMatch = TRUE;
5384             for (j=0; j<TSKC_BUF_SIZE; j++)
5385             {
5386                 fullMatch = fullMatch && (bufFull[i][j] != bufFull[i2][j]);
5387                 partMatch = partMatch && (bufPart[i][j] != bufPart[i2][j]);
5388             }
5389             if (fullMatch != partMatch) {
5390                 log_err(fullMatch ? "full key was consistent, but partial key changed\n"
5391                                   : "partial key was consistent, but full key changed\n");
5392                 ucol_close(ucol);
5393                 return;
5394             }
5395         }
5396     }
5397
5398     /*=============================================*/
5399    ucol_close(ucol);
5400 }
5401
5402 /* ticket: 6101 */
5403 static void TestCroatianSortKey(void) {
5404     const char* collString = "LHR_AN_CX_EX_FX_HX_NX_S3";
5405     UErrorCode status = U_ZERO_ERROR;
5406     UCollator *ucol;
5407     UCharIterator iter;
5408
5409     static const UChar text[] = { 0x0044, 0xD81A };
5410
5411     size_t length = sizeof(text)/sizeof(*text);
5412
5413     uint8_t textSortKey[32];
5414     size_t lenSortKey = 32;
5415     size_t actualSortKeyLen;
5416     uint32_t uStateInfo[2] = { 0, 0 };
5417
5418     ucol = ucol_openFromShortString(collString, FALSE, NULL, &status);
5419     if (U_FAILURE(status)) {
5420         log_err_status(status, "ucol_openFromShortString error in Craotian test. -> %s\n", u_errorName(status));
5421         return;
5422     }
5423
5424     uiter_setString(&iter, text, length);
5425
5426     actualSortKeyLen = ucol_nextSortKeyPart(
5427         ucol, &iter, (uint32_t*)uStateInfo,
5428         textSortKey, lenSortKey, &status
5429         );
5430
5431     if (actualSortKeyLen == lenSortKey) {
5432         log_err("ucol_nextSortKeyPart did not give correct result in Croatian test.\n");
5433     }
5434
5435     ucol_close(ucol);
5436 }
5437
5438 /* ticket: 6140 */
5439 /* This test ensures that codepoints such as 0x3099 are flagged correctly by the collator since
5440  * they are both Hiragana and Katakana
5441  */
5442 #define SORTKEYLEN 50
5443 static void TestHiragana(void) {
5444     UErrorCode status = U_ZERO_ERROR;
5445     UCollator* ucol;
5446     UCollationResult strcollresult;
5447     UChar data1[] = { 0x3058, 0x30B8 }; /* Hiragana and Katakana letter Zi */
5448     UChar data2[] = { 0x3057, 0x3099, 0x30B7, 0x3099 };
5449     int32_t data1Len = sizeof(data1)/sizeof(*data1);
5450     int32_t data2Len = sizeof(data2)/sizeof(*data2);
5451     int32_t i, j;
5452     uint8_t sortKey1[SORTKEYLEN];
5453     uint8_t sortKey2[SORTKEYLEN];
5454
5455     UCharIterator uiter1;
5456     UCharIterator uiter2;
5457     uint32_t state1[2] = { 0, 0 };
5458     uint32_t state2[2] = { 0, 0 };
5459     int32_t keySize1;
5460     int32_t keySize2;
5461
5462     ucol = ucol_openFromShortString("LJA_AN_CX_EX_FX_HO_NX_S4", FALSE, NULL,
5463             &status);
5464     if (U_FAILURE(status)) {
5465         log_err_status(status, "Error status: %s; Unable to open collator from short string.\n", u_errorName(status));
5466         return;
5467     }
5468
5469     /* Start of full sort keys */
5470     /* Full sort key1 */
5471     keySize1 = ucol_getSortKey(ucol, data1, data1Len, sortKey1, SORTKEYLEN);
5472     /* Full sort key2 */
5473     keySize2 = ucol_getSortKey(ucol, data2, data2Len, sortKey2, SORTKEYLEN);
5474     if (keySize1 == keySize2) {
5475         for (i = 0; i < keySize1; i++) {
5476             if (sortKey1[i] != sortKey2[i]) {
5477                 log_err("Full sort keys are different. Should be equal.");
5478             }
5479         }
5480     } else {
5481         log_err("Full sort keys sizes doesn't match: %d %d", keySize1, keySize2);
5482     }
5483     /* End of full sort keys */
5484
5485     /* Start of partial sort keys */
5486     /* Partial sort key1 */
5487     uiter_setString(&uiter1, data1, data1Len);
5488     keySize1 = ucol_nextSortKeyPart(ucol, &uiter1, state1, sortKey1, SORTKEYLEN, &status);
5489     /* Partial sort key2 */
5490     uiter_setString(&uiter2, data2, data2Len);
5491     keySize2 = ucol_nextSortKeyPart(ucol, &uiter2, state2, sortKey2, SORTKEYLEN, &status);
5492     if (U_SUCCESS(status) && keySize1 == keySize2) {
5493         for (j = 0; j < keySize1; j++) {
5494             if (sortKey1[j] != sortKey2[j]) {
5495                 log_err("Partial sort keys are different. Should be equal");
5496             }
5497         }
5498     } else {
5499         log_err("Error Status: %s or Partial sort keys sizes doesn't match: %d %d", u_errorName(status), keySize1, keySize2);
5500     }
5501     /* End of partial sort keys */
5502
5503     /* Start of strcoll */
5504     /* Use ucol_strcoll() to determine ordering */
5505     strcollresult = ucol_strcoll(ucol, data1, data1Len, data2, data2Len);
5506     if (strcollresult != UCOL_EQUAL) {
5507         log_err("Result from ucol_strcoll() should be UCOL_EQUAL.");
5508     }
5509
5510     ucol_close(ucol);
5511 }
5512
5513 /* Convenient struct for running collation tests */
5514 typedef struct {
5515   const UChar source[MAX_TOKEN_LEN];  /* String on left */
5516   const UChar target[MAX_TOKEN_LEN];  /* String on right */
5517   UCollationResult result;            /* -1, 0 or +1, depending on collation */
5518 } OneTestCase;
5519
5520 /*
5521  * Utility function to test one collation test case.
5522  * @param testcases Array of test cases.
5523  * @param n_testcases Size of the array testcases.
5524  * @param str_rules Array of rules.  These rules should be specifying the same rule in different formats.
5525  * @param n_rules Size of the array str_rules.
5526  */
5527 static void doTestOneTestCase(const OneTestCase testcases[],
5528                               int n_testcases,
5529                               const char* str_rules[],
5530                               int n_rules)
5531 {
5532   int rule_no, testcase_no;
5533   UChar rule[500];
5534   int32_t length = 0;
5535   UErrorCode status = U_ZERO_ERROR;
5536   UParseError parse_error;
5537   UCollator  *myCollation;
5538
5539   for (rule_no = 0; rule_no < n_rules; ++rule_no) {
5540
5541     length = u_unescape(str_rules[rule_no], rule, 500);
5542     if (length == 0) {
5543         log_err("ERROR: The rule cannot be unescaped: %s\n");
5544         return;
5545     }
5546     myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
5547     if(U_FAILURE(status)){
5548         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5549         return;
5550     }
5551     log_verbose("Testing the <<* syntax\n");
5552     ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
5553     ucol_setStrength(myCollation, UCOL_TERTIARY);
5554     for (testcase_no = 0; testcase_no < n_testcases; ++testcase_no) {
5555       doTest(myCollation,
5556              testcases[testcase_no].source,
5557              testcases[testcase_no].target,
5558              testcases[testcase_no].result
5559              );
5560     }
5561     ucol_close(myCollation);
5562   }
5563 }
5564
5565 const static OneTestCase rangeTestcases[] = {
5566   { {0x0061},                            {0x0062},                          UCOL_LESS }, /* "a" < "b" */
5567   { {0x0062},                            {0x0063},                          UCOL_LESS }, /* "b" < "c" */
5568   { {0x0061},                            {0x0063},                          UCOL_LESS }, /* "a" < "c" */
5569
5570   { {0x0062},                            {0x006b},                          UCOL_LESS }, /* "b" << "k" */
5571   { {0x006b},                            {0x006c},                          UCOL_LESS }, /* "k" << "l" */
5572   { {0x0062},                            {0x006c},                          UCOL_LESS }, /* "b" << "l" */
5573   { {0x0061},                            {0x006c},                          UCOL_LESS }, /* "a" < "l" */
5574   { {0x0061},                            {0x006d},                          UCOL_LESS },  /* "a" < "m" */
5575
5576   { {0x0079},                            {0x006d},                          UCOL_LESS },  /* "y" < "f" */
5577   { {0x0079},                            {0x0067},                          UCOL_LESS },  /* "y" < "g" */
5578   { {0x0061},                            {0x0068},                          UCOL_LESS },  /* "y" < "h" */
5579   { {0x0061},                            {0x0065},                          UCOL_LESS },  /* "g" < "e" */
5580
5581   { {0x0061},                            {0x0031},                          UCOL_EQUAL }, /* "a" = "1" */
5582   { {0x0061},                            {0x0032},                          UCOL_EQUAL }, /* "a" = "2" */
5583   { {0x0061},                            {0x0033},                          UCOL_EQUAL }, /* "a" = "3" */
5584   { {0x0061},                            {0x0066},                          UCOL_LESS }, /* "a" < "f" */
5585   { {0x006c, 0x0061},                    {0x006b, 0x0062},                  UCOL_LESS },  /* "la" < "123" */
5586   { {0x0061, 0x0061, 0x0061},            {0x0031, 0x0032, 0x0033},          UCOL_EQUAL }, /* "aaa" = "123" */
5587   { {0x0062},                            {0x007a},                          UCOL_LESS },  /* "b" < "z" */
5588   { {0x0061, 0x007a, 0x0062},            {0x0032, 0x0079, 0x006d},          UCOL_LESS }, /* "azm" = "2yc" */
5589 };
5590
5591 static int nRangeTestcases = LEN(rangeTestcases);
5592
5593 const static OneTestCase rangeTestcasesSupplemental[] = {
5594   { {0xfffe},                            {0xffff},                          UCOL_LESS }, /* U+FFFE < U+FFFF */
5595   { {0xffff},                            {0xd800, 0xdc00},                  UCOL_LESS }, /* U+FFFF < U+10000 */
5596   { {0xd800, 0xdc00},                    {0xd800, 0xdc01},                  UCOL_LESS }, /* U+10000 < U+10001 */
5597   { {0xfffe},                            {0xd800, 0xdc01},                  UCOL_LESS }, /* U+FFFE < U+10001 */
5598   { {0xd800, 0xdc01},                    {0xd800, 0xdc02},                  UCOL_LESS }, /* U+10000 < U+10001 */
5599   { {0xd800, 0xdc01},                    {0xd800, 0xdc02},                  UCOL_LESS }, /* U+10000 < U+10001 */
5600   { {0xfffe},                            {0xd800, 0xdc02},                  UCOL_LESS }, /* U+FFFE < U+10001 */
5601 };
5602
5603 static int nRangeTestcasesSupplemental = LEN(rangeTestcasesSupplemental);
5604
5605 const static OneTestCase rangeTestcasesQwerty[] = {
5606   { {0x0071},                            {0x0077},                          UCOL_LESS }, /* "q" < "w" */
5607   { {0x0077},                            {0x0065},                          UCOL_LESS }, /* "w" < "e" */
5608
5609   { {0x0079},                            {0x0075},                          UCOL_LESS }, /* "y" < "u" */
5610   { {0x0071},                            {0x0075},                          UCOL_LESS }, /* "q" << "u" */
5611
5612   { {0x0074},                            {0x0069},                          UCOL_LESS }, /* "t" << "i" */
5613   { {0x006f},                            {0x0070},                          UCOL_LESS }, /* "o" << "p" */
5614
5615   { {0x0079},                            {0x0065},                          UCOL_LESS },  /* "y" < "e" */
5616   { {0x0069},                            {0x0075},                          UCOL_LESS },  /* "i" < "u" */
5617
5618   { {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},
5619     {0x0077, 0x0065, 0x0072, 0x0065},                                       UCOL_LESS }, /* "quest" < "were" */
5620   { {0x0071, 0x0075, 0x0061, 0x0063, 0x006b},
5621     {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},                               UCOL_LESS }, /* "quack" < "quest" */
5622 };
5623
5624 static int nRangeTestcasesQwerty = LEN(rangeTestcasesQwerty);
5625
5626 static void TestSameStrengthList(void)
5627 {
5628   const char* strRules[] = {
5629     /* Normal */
5630     "&a<b<c<d &b<<k<<l<<m &k<<<x<<<y<<<z  &y<f<g<h<e &a=1=2=3",
5631
5632     /* Lists */
5633     "&a<*bcd &b<<*klm &k<<<*xyz &y<*fghe &a=*123",
5634   };
5635   doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
5636 }
5637
5638 static void TestSameStrengthListQuoted(void)
5639 {
5640   const char* strRules[] = {
5641     /* Lists with quoted characters */
5642     "&\\u0061<*bcd &b<<*klm &k<<<*xyz &y<*f\\u0067\\u0068e &a=*123",
5643     "&'\\u0061'<*bcd &b<<*klm &k<<<*xyz &y<*f'\\u0067\\u0068'e &a=*123",
5644
5645     "&\\u0061<*b\\u0063d &b<<*klm &k<<<*xyz &\\u0079<*fgh\\u0065 &a=*\\u0031\\u0032\\u0033",
5646     "&'\\u0061'<*b'\\u0063'd &b<<*klm &k<<<*xyz &'\\u0079'<*fgh'\\u0065' &a=*'\\u0031\\u0032\\u0033'",
5647
5648     "&\\u0061<*\\u0062c\\u0064 &b<<*klm &k<<<*xyz  &y<*fghe &a=*\\u0031\\u0032\\u0033",
5649     "&'\\u0061'<*'\\u0062'c'\\u0064' &b<<*klm &k<<<*xyz  &y<*fghe &a=*'\\u0031\\u0032\\u0033'",
5650   };
5651   doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
5652 }
5653
5654 static void TestSameStrengthListSupplemental(void)
5655 {
5656   const char* strRules[] = {
5657     "&\\ufffe<\\uffff<\\U00010000<\\U00010001<\\U00010002",
5658     "&\\ufffe<\\uffff<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02",
5659     "&\\ufffe<*\\uffff\\U00010000\\U00010001\\U00010002",
5660     "&\\ufffe<*\\uffff\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02",
5661   };
5662   doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules));
5663 }
5664
5665 static void TestSameStrengthListQwerty(void)
5666 {
5667   const char* strRules[] = {
5668     "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d",   /* Normal */
5669     "&q<*wer &w<<*tyu &t<<<*iop &o=*asd",             /* Lists  */
5670     "&\\u0071<\\u0077<\\u0065<\\u0072 &\\u0077<<\\u0074<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<\\u006f<<<\\u0070 &\\u006f=\\u0061=\\u0073=\\u0064",
5671     "&'\\u0071'<\\u0077<\\u0065<\\u0072 &\\u0077<<'\\u0074'<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<'\\u006f'<<<\\u0070 &\\u006f=\\u0061='\\u0073'=\\u0064",
5672     "&\\u0071<*\\u0077\\u0065\\u0072 &\\u0077<<*\\u0074\\u0079\\u0075 &\\u0074<<<*\\u0069\\u006f\\u0070 &\\u006f=*\\u0061\\u0073\\u0064",
5673
5674     /* Quoted characters also will work if two quoted characters are not consecutive.  */
5675     "&\\u0071<*'\\u0077'\\u0065\\u0072 &\\u0077<<*\\u0074'\\u0079'\\u0075 &\\u0074<<<*\\u0069\\u006f'\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",
5676
5677     /* Consecutive quoted charactes do not work, because a '' will be treated as a quote character. */
5678     /* "&\\u0071<*'\\u0077''\\u0065''\\u0072' &\\u0077<<*'\\u0074''\\u0079''\\u0075' &\\u0074<<<*'\\u0069''\\u006f''\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",*/
5679
5680  };
5681   doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(strRules));
5682 }
5683
5684 static void TestSameStrengthListQuotedQwerty(void)
5685 {
5686   const char* strRules[] = {
5687     "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d",   /* Normal */
5688     "&q<*wer &w<<*tyu &t<<<*iop &o=*asd",             /* Lists  */
5689     "&q<*w'e'r &w<<*'t'yu &t<<<*io'p' &o=*'a's'd'",   /* Lists with quotes */
5690
5691     /* Lists with continuous quotes may not work, because '' will be treated as a quote character. */
5692     /* "&q<*'w''e''r' &w<<*'t''y''u' &t<<<*'i''o''p' &o=*'a''s''d'", */
5693    };
5694   doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(strRules));
5695 }
5696
5697 static void TestSameStrengthListRanges(void)
5698 {
5699   const char* strRules[] = {
5700     "&a<*b-d &b<<*k-m &k<<<*x-z &y<*f-he &a=*1-3",
5701   };
5702   doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
5703 }
5704
5705 static void TestSameStrengthListSupplementalRanges(void)
5706 {
5707   const char* strRules[] = {
5708     "&\\ufffe<*\\uffff-\\U00010002",
5709   };
5710   doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules));
5711 }
5712
5713 static void TestSpecialCharacters(void)
5714 {
5715   const char* strRules[] = {
5716     /* Normal */
5717     "&';'<'+'<','<'-'<'&'<'*'",
5718
5719     /* List */
5720     "&';'<*'+,-&*'",
5721
5722     /* Range */
5723     "&';'<*'+'-'-&*'",
5724   };
5725
5726   const static OneTestCase specialCharacterStrings[] = {
5727     { {0x003b}, {0x002b}, UCOL_LESS },  /* ; < + */
5728     { {0x002b}, {0x002c}, UCOL_LESS },  /* + < , */
5729     { {0x002c}, {0x002d}, UCOL_LESS },  /* , < - */
5730     { {0x002d}, {0x0026}, UCOL_LESS },  /* - < & */
5731   };
5732   doTestOneTestCase(specialCharacterStrings, LEN(specialCharacterStrings), strRules, LEN(strRules));
5733 }
5734
5735 static void TestPrivateUseCharacters(void)
5736 {
5737   const char* strRules[] = {
5738     /* Normal */
5739     "&'\\u5ea7'<'\\uE2D8'<'\\uE2D9'<'\\uE2DA'<'\\uE2DB'<'\\uE2DC'<'\\u4e8d'",
5740     "&\\u5ea7<\\uE2D8<\\uE2D9<\\uE2DA<\\uE2DB<\\uE2DC<\\u4e8d",
5741   };
5742
5743   const static OneTestCase privateUseCharacterStrings[] = {
5744     { {0x5ea7}, {0xe2d8}, UCOL_LESS },
5745     { {0xe2d8}, {0xe2d9}, UCOL_LESS },
5746     { {0xe2d9}, {0xe2da}, UCOL_LESS },
5747     { {0xe2da}, {0xe2db}, UCOL_LESS },
5748     { {0xe2db}, {0xe2dc}, UCOL_LESS },
5749     { {0xe2dc}, {0x4e8d}, UCOL_LESS },
5750   };
5751   doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
5752 }
5753
5754 static void TestPrivateUseCharactersInList(void)
5755 {
5756   const char* strRules[] = {
5757     /* List */
5758     "&'\\u5ea7'<*'\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d'",
5759     /* "&'\\u5ea7'<*\\uE2D8'\\uE2D9\\uE2DA'\\uE2DB'\\uE2DC\\u4e8d'", */
5760     "&\\u5ea7<*\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d",
5761   };
5762
5763   const static OneTestCase privateUseCharacterStrings[] = {
5764     { {0x5ea7}, {0xe2d8}, UCOL_LESS },
5765     { {0xe2d8}, {0xe2d9}, UCOL_LESS },
5766     { {0xe2d9}, {0xe2da}, UCOL_LESS },
5767     { {0xe2da}, {0xe2db}, UCOL_LESS },
5768     { {0xe2db}, {0xe2dc}, UCOL_LESS },
5769     { {0xe2dc}, {0x4e8d}, UCOL_LESS },
5770   };
5771   doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
5772 }
5773
5774 static void TestPrivateUseCharactersInRange(void)
5775 {
5776   const char* strRules[] = {
5777     /* Range */
5778     "&'\\u5ea7'<*'\\uE2D8'-'\\uE2DC\\u4e8d'",
5779     "&\\u5ea7<*\\uE2D8-\\uE2DC\\u4e8d",
5780     /* "&\\u5ea7<\\uE2D8'\\uE2D8'-'\\uE2D9'\\uE2DA-\\uE2DB\\uE2DC\\u4e8d", */
5781   };
5782
5783   const static OneTestCase privateUseCharacterStrings[] = {
5784     { {0x5ea7}, {0xe2d8}, UCOL_LESS },
5785     { {0xe2d8}, {0xe2d9}, UCOL_LESS },
5786     { {0xe2d9}, {0xe2da}, UCOL_LESS },
5787     { {0xe2da}, {0xe2db}, UCOL_LESS },
5788     { {0xe2db}, {0xe2dc}, UCOL_LESS },
5789     { {0xe2dc}, {0x4e8d}, UCOL_LESS },
5790   };
5791   doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
5792 }
5793
5794 static void TestInvalidListsAndRanges(void)
5795 {
5796   const char* invalidRules[] = {
5797     /* Range not in starred expression */
5798     "&\\ufffe<\\uffff-\\U00010002",
5799
5800     /* Range without start */
5801     "&a<*-c",
5802
5803     /* Range without end */
5804     "&a<*b-",
5805
5806     /* More than one hyphen */
5807     "&a<*b-g-l",
5808
5809     /* Range in the wrong order */
5810     "&a<*k-b",
5811
5812   };
5813
5814   UChar rule[500];
5815   UErrorCode status = U_ZERO_ERROR;
5816   UParseError parse_error;
5817   int n_rules = LEN(invalidRules);
5818   int rule_no;
5819   int length;
5820   UCollator  *myCollation;
5821
5822   for (rule_no = 0; rule_no < n_rules; ++rule_no) {
5823
5824     length = u_unescape(invalidRules[rule_no], rule, 500);
5825     if (length == 0) {
5826         log_err("ERROR: The rule cannot be unescaped: %s\n");
5827         return;
5828     }
5829     myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
5830     if(!U_FAILURE(status)){
5831       log_err("ERROR: Could not cause a failure as expected: \n");
5832     }
5833     status = U_ZERO_ERROR;
5834   }
5835 }
5836
5837 /*
5838  * This test ensures that characters placed before a character in a different script have the same lead byte
5839  * in their collation key before and after script reordering.
5840  */
5841 static void TestBeforeRuleWithScriptReordering(void)
5842 {
5843     UParseError error;
5844     UErrorCode status = U_ZERO_ERROR;
5845     UCollator  *myCollation;
5846     char srules[500] = "&[before 1]\\u03b1 < \\u0e01";
5847     UChar rules[500];
5848     uint32_t rulesLength = 0;
5849     int32_t reorderCodes[1] = {USCRIPT_GREEK};
5850     UCollationResult collResult;
5851
5852     uint8_t baseKey[256];
5853     uint32_t baseKeyLength;
5854     uint8_t beforeKey[256];
5855     uint32_t beforeKeyLength;
5856
5857     UChar base[] = { 0x03b1 }; /* base */
5858     int32_t baseLen = sizeof(base)/sizeof(*base);
5859
5860     UChar before[] = { 0x0e01 }; /* ko kai */
5861     int32_t beforeLen = sizeof(before)/sizeof(*before);
5862
5863     /*UChar *data[] = { before, base };
5864     genericRulesStarter(srules, data, 2);*/
5865
5866     log_verbose("Testing the &[before 1] rule with [reorder grek]\n");
5867
5868
5869     /* build collator */
5870     log_verbose("Testing the &[before 1] rule with [scriptReorder grek]\n");
5871
5872     rulesLength = u_unescape(srules, rules, LEN(rules));
5873     myCollation = ucol_openRules(rules, rulesLength, UCOL_ON, UCOL_TERTIARY, &error, &status);
5874     if(U_FAILURE(status)) {
5875         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5876         return;
5877     }
5878
5879     /* check collation results - before rule applied but not script reordering */
5880     collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
5881     if (collResult != UCOL_GREATER) {
5882         log_err("Collation result not correct before script reordering = %d\n", collResult);
5883     }
5884
5885     /* check the lead byte of the collation keys before script reordering */
5886     baseKeyLength = ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
5887     beforeKeyLength = ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
5888     if (baseKey[0] != beforeKey[0]) {
5889       log_err("Different lead byte for sort keys using before rule and before script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
5890    }
5891
5892     /* reorder the scripts */
5893     ucol_setReorderCodes(myCollation, reorderCodes, 1, &status);
5894     if(U_FAILURE(status)) {
5895         log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
5896         return;
5897     }
5898
5899     /* check collation results - before rule applied and after script reordering */
5900     collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
5901     if (collResult != UCOL_GREATER) {
5902         log_err("Collation result not correct after script reordering = %d\n", collResult);
5903     }
5904
5905     /* check the lead byte of the collation keys after script reordering */
5906     ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
5907     ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
5908     if (baseKey[0] != beforeKey[0]) {
5909         log_err("Different lead byte for sort keys using before fule and after script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
5910     }
5911
5912     ucol_close(myCollation);
5913 }
5914
5915 /*
5916  * Test that in a primary-compressed sort key all bytes except the first one are unchanged under script reordering.
5917  */
5918 static void TestNonLeadBytesDuringCollationReordering(void)
5919 {
5920     UErrorCode status = U_ZERO_ERROR;
5921     UCollator  *myCollation;
5922     int32_t reorderCodes[1] = {USCRIPT_GREEK};
5923
5924     uint8_t baseKey[256];
5925     uint32_t baseKeyLength;
5926     uint8_t reorderKey[256];
5927     uint32_t reorderKeyLength;
5928
5929     UChar testString[] = { 0x03b1, 0x03b2, 0x03b3 };
5930
5931     uint32_t i;
5932
5933
5934     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
5935
5936     /* build collator tertiary */
5937     myCollation = ucol_open("", &status);
5938     ucol_setStrength(myCollation, UCOL_TERTIARY);
5939     if(U_FAILURE(status)) {
5940         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5941         return;
5942     }
5943     baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), baseKey, 256);
5944
5945     ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
5946     if(U_FAILURE(status)) {
5947         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
5948         return;
5949     }
5950     reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256);
5951
5952     if (baseKeyLength != reorderKeyLength) {
5953         log_err("Key lengths not the same during reordering.\n");
5954         return;
5955     }
5956
5957     for (i = 1; i < baseKeyLength; i++) {
5958         if (baseKey[i] != reorderKey[i]) {
5959             log_err("Collation key bytes not the same at position %d.\n", i);
5960             return;
5961         }
5962     }
5963     ucol_close(myCollation);
5964
5965     /* build collator quaternary */
5966     myCollation = ucol_open("", &status);
5967     ucol_setStrength(myCollation, UCOL_QUATERNARY);
5968     if(U_FAILURE(status)) {
5969         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5970         return;
5971     }
5972     baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), baseKey, 256);
5973
5974     ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
5975     if(U_FAILURE(status)) {
5976         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
5977         return;
5978     }
5979     reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256);
5980
5981     if (baseKeyLength != reorderKeyLength) {
5982         log_err("Key lengths not the same during reordering.\n");
5983         return;
5984     }
5985
5986     for (i = 1; i < baseKeyLength; i++) {
5987         if (baseKey[i] != reorderKey[i]) {
5988             log_err("Collation key bytes not the same at position %d.\n", i);
5989             return;
5990         }
5991     }
5992     ucol_close(myCollation);
5993 }
5994
5995 /*
5996  * Test reordering API.
5997  */
5998 static void TestReorderingAPI(void)
5999 {
6000     UErrorCode status = U_ZERO_ERROR;
6001     UCollator  *myCollation;
6002     int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
6003     int32_t duplicateReorderCodes[] = {USCRIPT_CUNEIFORM, USCRIPT_GREEK, UCOL_REORDER_CODE_CURRENCY, USCRIPT_EGYPTIAN_HIEROGLYPHS};
6004     int32_t reorderCodesStartingWithDefault[] = {UCOL_REORDER_CODE_DEFAULT, USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
6005     UCollationResult collResult;
6006     int32_t retrievedReorderCodesLength;
6007     int32_t retrievedReorderCodes[10];
6008     UChar greekString[] = { 0x03b1 };
6009     UChar punctuationString[] = { 0x203e };
6010     int loopIndex;
6011
6012     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
6013
6014     /* build collator tertiary */
6015     myCollation = ucol_open("", &status);
6016     ucol_setStrength(myCollation, UCOL_TERTIARY);
6017     if(U_FAILURE(status)) {
6018         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
6019         return;
6020     }
6021
6022     /* set the reorderding */
6023     ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
6024     if (U_FAILURE(status)) {
6025         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
6026         return;
6027     }
6028
6029     /* get the reordering */
6030     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
6031     if (status != U_BUFFER_OVERFLOW_ERROR) {
6032         log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
6033         return;
6034     }
6035     status = U_ZERO_ERROR;
6036     if (retrievedReorderCodesLength != LEN(reorderCodes)) {
6037         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
6038         return;
6039     }
6040     /* now let's really get it */
6041     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
6042     if (U_FAILURE(status)) {
6043         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
6044         return;
6045     }
6046     if (retrievedReorderCodesLength != LEN(reorderCodes)) {
6047         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
6048         return;
6049     }
6050     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
6051         if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
6052             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
6053             return;
6054         }
6055     }
6056     collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
6057     if (collResult != UCOL_LESS) {
6058         log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
6059         return;
6060     }
6061
6062     /* clear the reordering */
6063     ucol_setReorderCodes(myCollation, NULL, 0, &status);
6064     if (U_FAILURE(status)) {
6065         log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
6066         return;
6067     }
6068
6069     /* get the reordering again */
6070     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
6071     if (retrievedReorderCodesLength != 0) {
6072         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
6073         return;
6074     }
6075
6076     collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
6077     if (collResult != UCOL_GREATER) {
6078         log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
6079         return;
6080     }
6081
6082     /* test for error condition on duplicate reorder codes */
6083     ucol_setReorderCodes(myCollation, duplicateReorderCodes, LEN(duplicateReorderCodes), &status);
6084     if (!U_FAILURE(status)) {
6085         log_err_status(status, "ERROR: setting duplicate reorder codes did not generate a failure\n");
6086         return;
6087     }
6088
6089     status = U_ZERO_ERROR;
6090     /* test for reorder codes after a reset code */
6091     ucol_setReorderCodes(myCollation, reorderCodesStartingWithDefault, LEN(reorderCodesStartingWithDefault), &status);
6092     if (!U_FAILURE(status)) {
6093         log_err_status(status, "ERROR: reorderd code sequence starting with default and having following codes didn't cause an error\n");
6094         return;
6095     }
6096
6097     ucol_close(myCollation);
6098 }
6099
6100 /*
6101  * Test reordering API.
6102  */
6103 static void TestReorderingAPIWithRuleCreatedCollator(void)
6104 {
6105     UErrorCode status = U_ZERO_ERROR;
6106     UCollator  *myCollation;
6107     UChar rules[90];
6108     int32_t rulesReorderCodes[2] = {USCRIPT_HAN, USCRIPT_GREEK};
6109     int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
6110     UCollationResult collResult;
6111     int32_t retrievedReorderCodesLength;
6112     int32_t retrievedReorderCodes[10];
6113     UChar greekString[] = { 0x03b1 };
6114     UChar punctuationString[] = { 0x203e };
6115     UChar hanString[] = { 0x65E5, 0x672C };
6116     int loopIndex;
6117
6118     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
6119
6120     /* build collator from rules */
6121     u_uastrcpy(rules, "[reorder Hani Grek]");
6122     myCollation = ucol_openRules(rules, u_strlen(rules), UCOL_DEFAULT, UCOL_TERTIARY, NULL, &status);
6123     if(U_FAILURE(status)) {
6124         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
6125         return;
6126     }
6127
6128     /* get the reordering */
6129     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
6130     if (U_FAILURE(status)) {
6131         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
6132         return;
6133     }
6134     if (retrievedReorderCodesLength != LEN(rulesReorderCodes)) {
6135         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(rulesReorderCodes));
6136         return;
6137     }
6138     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
6139         if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) {
6140             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
6141             return;
6142         }
6143     }
6144     collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), hanString, LEN(hanString));
6145     if (collResult != UCOL_GREATER) {
6146         log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
6147         return;
6148     }
6149
6150
6151     /* set the reorderding */
6152     ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
6153     if (U_FAILURE(status)) {
6154         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
6155         return;
6156     }
6157
6158     /* get the reordering */
6159     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
6160     if (status != U_BUFFER_OVERFLOW_ERROR) {
6161         log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
6162         return;
6163     }
6164     status = U_ZERO_ERROR;
6165     if (retrievedReorderCodesLength != LEN(reorderCodes)) {
6166         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
6167         return;
6168     }
6169     /* now let's really get it */
6170     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
6171     if (U_FAILURE(status)) {
6172         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
6173         return;
6174     }
6175     if (retrievedReorderCodesLength != LEN(reorderCodes)) {
6176         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
6177         return;
6178     }
6179     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
6180         if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
6181             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
6182             return;
6183         }
6184     }
6185     collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
6186     if (collResult != UCOL_LESS) {
6187         log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
6188         return;
6189     }
6190
6191     /* clear the reordering */
6192     ucol_setReorderCodes(myCollation, NULL, 0, &status);
6193     if (U_FAILURE(status)) {
6194         log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
6195         return;
6196     }
6197
6198     /* get the reordering again */
6199     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
6200     if (retrievedReorderCodesLength != 0) {
6201         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
6202         return;
6203     }
6204
6205     collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
6206     if (collResult != UCOL_GREATER) {
6207         log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
6208         return;
6209     }
6210
6211     ucol_close(myCollation);
6212 }
6213
6214 static int compareUScriptCodes(const void * a, const void * b)
6215 {
6216   return ( *(int32_t*)a - *(int32_t*)b );
6217 }
6218
6219 static void TestEquivalentReorderingScripts(void) {
6220     UErrorCode status = U_ZERO_ERROR;
6221     int32_t equivalentScripts[50];
6222     int32_t equivalentScriptsLength;
6223     int loopIndex;
6224     int32_t equivalentScriptsResult[] = {
6225         USCRIPT_BOPOMOFO,
6226         USCRIPT_LISU,
6227         USCRIPT_LYCIAN,
6228         USCRIPT_CARIAN,
6229         USCRIPT_LYDIAN,
6230         USCRIPT_YI,
6231         USCRIPT_OLD_ITALIC,
6232         USCRIPT_GOTHIC,
6233         USCRIPT_DESERET,
6234         USCRIPT_SHAVIAN,
6235         USCRIPT_OSMANYA,
6236         USCRIPT_LINEAR_B,
6237         USCRIPT_CYPRIOT,
6238         USCRIPT_OLD_SOUTH_ARABIAN,
6239         USCRIPT_AVESTAN,
6240         USCRIPT_IMPERIAL_ARAMAIC,
6241         USCRIPT_INSCRIPTIONAL_PARTHIAN,
6242         USCRIPT_INSCRIPTIONAL_PAHLAVI,
6243         USCRIPT_UGARITIC,
6244         USCRIPT_OLD_PERSIAN,
6245         USCRIPT_CUNEIFORM,
6246         USCRIPT_EGYPTIAN_HIEROGLYPHS,
6247         USCRIPT_PHONETIC_POLLARD,
6248         USCRIPT_SORA_SOMPENG,
6249         USCRIPT_MEROITIC_CURSIVE,
6250         USCRIPT_MEROITIC_HIEROGLYPHS
6251     };
6252
6253     qsort(equivalentScriptsResult, LEN(equivalentScriptsResult), sizeof(int32_t), compareUScriptCodes);
6254
6255     /* UScript.GOTHIC */
6256     equivalentScriptsLength = ucol_getEquivalentReorderCodes(USCRIPT_GOTHIC, equivalentScripts, LEN(equivalentScripts), &status);
6257     if (U_FAILURE(status)) {
6258         log_err_status(status, "ERROR: retrieving equivalent reorder codes: %s\n", myErrorName(status));
6259         return;
6260     }
6261     /*
6262     fprintf(stdout, "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n");
6263     fprintf(stdout, "equivalentScriptsLength = %d\n", equivalentScriptsLength);
6264     for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) {
6265         fprintf(stdout, "%d = %x\n", loopIndex, equivalentScripts[loopIndex]);
6266     }
6267     */
6268     if (equivalentScriptsLength != LEN(equivalentScriptsResult)) {
6269         log_err_status(status, "ERROR: retrieved equivalent script length wrong: expected = %d, was = %d\n", LEN(equivalentScriptsResult), equivalentScriptsLength);
6270         return;
6271     }
6272     for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) {
6273         if (equivalentScriptsResult[loopIndex] != equivalentScripts[loopIndex]) {
6274             log_err_status(status, "ERROR: equivalent scripts results don't match: expected = %d, was = %d\n", equivalentScriptsResult[loopIndex], equivalentScripts[loopIndex]);
6275             return;
6276         }
6277     }
6278
6279     /* UScript.SHAVIAN */
6280     equivalentScriptsLength = ucol_getEquivalentReorderCodes(USCRIPT_SHAVIAN, equivalentScripts, LEN(equivalentScripts), &status);
6281     if (U_FAILURE(status)) {
6282         log_err_status(status, "ERROR: retrieving equivalent reorder codes: %s\n", myErrorName(status));
6283         return;
6284     }
6285     if (equivalentScriptsLength != LEN(equivalentScriptsResult)) {
6286         log_err_status(status, "ERROR: retrieved equivalent script length wrong: expected = %d, was = %d\n", LEN(equivalentScriptsResult), equivalentScriptsLength);
6287         return;
6288     }
6289     for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) {
6290         if (equivalentScriptsResult[loopIndex] != equivalentScripts[loopIndex]) {
6291             log_err_status(status, "ERROR: equivalent scripts results don't match: expected = %d, was = %d\n", equivalentScriptsResult[loopIndex], equivalentScripts[loopIndex]);
6292             return;
6293         }
6294     }
6295 }
6296
6297 static void TestReorderingAcrossCloning(void)
6298 {
6299     UErrorCode status = U_ZERO_ERROR;
6300     UCollator  *myCollation;
6301     int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
6302     UCollator *clonedCollation;
6303     int32_t bufferSize;
6304     int32_t retrievedReorderCodesLength;
6305     int32_t retrievedReorderCodes[10];
6306     int loopIndex;
6307
6308     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
6309
6310     /* build collator tertiary */
6311     myCollation = ucol_open("", &status);
6312     ucol_setStrength(myCollation, UCOL_TERTIARY);
6313     if(U_FAILURE(status)) {
6314         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
6315         return;
6316     }
6317
6318     /* set the reorderding */
6319     ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
6320     if (U_FAILURE(status)) {
6321         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
6322         return;
6323     }
6324
6325     /* clone the collator */
6326     clonedCollation = ucol_safeClone(myCollation, NULL, &bufferSize, &status);
6327     if (U_FAILURE(status)) {
6328         log_err_status(status, "ERROR: cloning collator: %s\n", myErrorName(status));
6329         return;
6330     }
6331
6332     /* get the reordering */
6333     retrievedReorderCodesLength = ucol_getReorderCodes(clonedCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
6334     if (U_FAILURE(status)) {
6335         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
6336         return;
6337     }
6338     if (retrievedReorderCodesLength != LEN(reorderCodes)) {
6339         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
6340         return;
6341     }
6342     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
6343         if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
6344             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
6345             return;
6346         }
6347     }
6348
6349     /*uprv_free(buffer);*/
6350     ucol_close(myCollation);
6351     ucol_close(clonedCollation);
6352 }
6353
6354 /*
6355  * Utility function to test one collation reordering test case set.
6356  * @param testcases Array of test cases.
6357  * @param n_testcases Size of the array testcases.
6358  * @param reorderTokens Array of reordering codes.
6359  * @param reorderTokensLen Size of the array reorderTokens.
6360  */
6361 static void doTestOneReorderingAPITestCase(const OneTestCase testCases[], uint32_t testCasesLen, const int32_t reorderTokens[], int32_t reorderTokensLen)
6362 {
6363     uint32_t testCaseNum;
6364     UErrorCode status = U_ZERO_ERROR;
6365     UCollator  *myCollation;
6366
6367     myCollation = ucol_open("", &status);
6368     if (U_FAILURE(status)) {
6369         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
6370         return;
6371     }
6372     ucol_setReorderCodes(myCollation, reorderTokens, reorderTokensLen, &status);
6373     if(U_FAILURE(status)) {
6374         log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
6375         return;
6376     }
6377
6378     for (testCaseNum = 0; testCaseNum < testCasesLen; ++testCaseNum) {
6379         doTest(myCollation,
6380             testCases[testCaseNum].source,
6381             testCases[testCaseNum].target,
6382             testCases[testCaseNum].result
6383         );
6384     }
6385     ucol_close(myCollation);
6386 }
6387
6388 static void TestGreekFirstReorder(void)
6389 {
6390     const char* strRules[] = {
6391         "[reorder Grek]"
6392     };
6393
6394     const int32_t apiRules[] = {
6395         USCRIPT_GREEK
6396     };
6397
6398     const static OneTestCase privateUseCharacterStrings[] = {
6399         { {0x0391}, {0x0391}, UCOL_EQUAL },
6400         { {0x0041}, {0x0391}, UCOL_GREATER },
6401         { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_GREATER },
6402         { {0x0060}, {0x0391}, UCOL_LESS },
6403         { {0x0391}, {0xe2dc}, UCOL_LESS },
6404         { {0x0391}, {0x0060}, UCOL_GREATER },
6405     };
6406
6407     /* Test rules creation */
6408     doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
6409
6410     /* Test collation reordering API */
6411     doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
6412 }
6413
6414 static void TestGreekLastReorder(void)
6415 {
6416     const char* strRules[] = {
6417         "[reorder Zzzz Grek]"
6418     };
6419
6420     const int32_t apiRules[] = {
6421         USCRIPT_UNKNOWN, USCRIPT_GREEK
6422     };
6423
6424     const static OneTestCase privateUseCharacterStrings[] = {
6425         { {0x0391}, {0x0391}, UCOL_EQUAL },
6426         { {0x0041}, {0x0391}, UCOL_LESS },
6427         { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_LESS },
6428         { {0x0060}, {0x0391}, UCOL_LESS },
6429         { {0x0391}, {0xe2dc}, UCOL_GREATER },
6430     };
6431
6432     /* Test rules creation */
6433     doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
6434
6435     /* Test collation reordering API */
6436     doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
6437 }
6438
6439 static void TestNonScriptReorder(void)
6440 {
6441     const char* strRules[] = {
6442         "[reorder Grek Symbol DIGIT Latn Punct space Zzzz cURRENCy]"
6443     };
6444
6445     const int32_t apiRules[] = {
6446         USCRIPT_GREEK, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN,
6447         UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SPACE, USCRIPT_UNKNOWN,
6448         UCOL_REORDER_CODE_CURRENCY
6449     };
6450
6451     const static OneTestCase privateUseCharacterStrings[] = {
6452         { {0x0391}, {0x0041}, UCOL_LESS },
6453         { {0x0041}, {0x0391}, UCOL_GREATER },
6454         { {0x0060}, {0x0041}, UCOL_LESS },
6455         { {0x0060}, {0x0391}, UCOL_GREATER },
6456         { {0x0024}, {0x0041}, UCOL_GREATER },
6457     };
6458
6459     /* Test rules creation */
6460     doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
6461
6462     /* Test collation reordering API */
6463     doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
6464 }
6465
6466 static void TestHaniReorder(void)
6467 {
6468     const char* strRules[] = {
6469         "[reorder Hani]"
6470     };
6471     const int32_t apiRules[] = {
6472         USCRIPT_HAN
6473     };
6474
6475     const static OneTestCase privateUseCharacterStrings[] = {
6476         { {0x4e00}, {0x0041}, UCOL_LESS },
6477         { {0x4e00}, {0x0060}, UCOL_GREATER },
6478         { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
6479         { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
6480         { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
6481         { {0xfa27}, {0x0041}, UCOL_LESS },
6482         { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
6483     };
6484
6485     /* Test rules creation */
6486     doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
6487
6488     /* Test collation reordering API */
6489     doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
6490 }
6491
6492 static void TestHaniReorderWithOtherRules(void)
6493 {
6494     const char* strRules[] = {
6495         "[reorder Hani] &b<a"
6496     };
6497     const int32_t apiRules[] = {
6498         USCRIPT_HAN
6499     };
6500
6501     const static OneTestCase privateUseCharacterStrings[] = {
6502         { {0x4e00}, {0x0041}, UCOL_LESS },
6503         { {0x4e00}, {0x0060}, UCOL_GREATER },
6504         { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
6505         { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
6506         { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
6507         { {0xfa27}, {0x0041}, UCOL_LESS },
6508         { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
6509         { {0x0062}, {0x0061}, UCOL_LESS },
6510     };
6511
6512     /* Test rules creation */
6513     doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
6514 }
6515
6516 static void TestMultipleReorder(void)
6517 {
6518     const char* strRules[] = {
6519         "[reorder Grek Zzzz DIGIT Latn Hani]"
6520     };
6521
6522     const int32_t apiRules[] = {
6523         USCRIPT_GREEK, USCRIPT_UNKNOWN, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN, USCRIPT_HAN
6524     };
6525
6526     const static OneTestCase collationTestCases[] = {
6527         { {0x0391}, {0x0041}, UCOL_LESS},
6528         { {0x0031}, {0x0041}, UCOL_LESS},
6529         { {0x0041}, {0x4e00}, UCOL_LESS},
6530     };
6531
6532     /* Test rules creation */
6533     doTestOneTestCase(collationTestCases, LEN(collationTestCases), strRules, LEN(strRules));
6534
6535     /* Test collation reordering API */
6536     doTestOneReorderingAPITestCase(collationTestCases, LEN(collationTestCases), apiRules, LEN(apiRules));
6537 }
6538
6539 /*
6540  * Test that covers issue reported in ticket 8814
6541  */
6542 static void TestReorderWithNumericCollation()
6543 {
6544     UErrorCode status = U_ZERO_ERROR;
6545     UCollator  *myCollation;
6546     UCollator  *myReorderCollation;
6547     int32_t reorderCodes[] = {UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_GREEK,USCRIPT_LATIN, USCRIPT_HEBREW, UCOL_REORDER_CODE_OTHERS};
6548     /* UChar fortyS[] = { 0x0034, 0x0030, 0x0053 };
6549     UChar fortyThreeP[] = { 0x0034, 0x0033, 0x0050 }; */
6550     UChar fortyS[] = { 0x0053 };
6551     UChar fortyThreeP[] = { 0x0050 };
6552     uint8_t fortyS_sortKey[128];
6553     int32_t fortyS_sortKey_Length;
6554     uint8_t fortyThreeP_sortKey[128];
6555     int32_t fortyThreeP_sortKey_Length;
6556     uint8_t fortyS_sortKey_reorder[128];
6557     int32_t fortyS_sortKey_reorder_Length;
6558     uint8_t fortyThreeP_sortKey_reorder[128];
6559     int32_t fortyThreeP_sortKey_reorder_Length;
6560     UCollationResult collResult;
6561     UCollationResult collResultReorder;
6562     int i;
6563
6564     log_verbose("Testing reordering with and without numeric collation\n");
6565
6566     /* build collator tertiary with numeric */
6567     myCollation = ucol_open("", &status);
6568     /*
6569     ucol_setStrength(myCollation, UCOL_TERTIARY);
6570     */
6571     ucol_setAttribute(myCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
6572     if(U_FAILURE(status)) {
6573         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
6574         return;
6575     }
6576
6577     /* build collator tertiary with numeric and reordering */
6578     myReorderCollation = ucol_open("", &status);
6579     /*
6580     ucol_setStrength(myReorderCollation, UCOL_TERTIARY);
6581     */
6582     ucol_setAttribute(myReorderCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
6583     ucol_setReorderCodes(myReorderCollation, reorderCodes, LEN(reorderCodes), &status);
6584     if(U_FAILURE(status)) {
6585         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
6586         return;
6587     }
6588
6589     fortyS_sortKey_Length = ucol_getSortKey(myCollation, fortyS, LEN(fortyS), fortyS_sortKey, 128);
6590     fortyThreeP_sortKey_Length = ucol_getSortKey(myCollation, fortyThreeP, LEN(fortyThreeP), fortyThreeP_sortKey, 128);
6591     fortyS_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyS, LEN(fortyS), fortyS_sortKey_reorder, 128);
6592     fortyThreeP_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyThreeP, LEN(fortyThreeP), fortyThreeP_sortKey_reorder, 128);
6593
6594     if (fortyS_sortKey_Length < 0 || fortyThreeP_sortKey_Length < 0 || fortyS_sortKey_reorder_Length < 0 || fortyThreeP_sortKey_reorder_Length < 0) {
6595         log_err_status(status, "ERROR: couldn't generate sort keys\n");
6596         return;
6597     }
6598     collResult = ucol_strcoll(myCollation, fortyS, LEN(fortyS), fortyThreeP, LEN(fortyThreeP));
6599     collResultReorder = ucol_strcoll(myReorderCollation, fortyS, LEN(fortyS), fortyThreeP, LEN(fortyThreeP));
6600     /*
6601     fprintf(stderr, "\tcollResult = %x\n", collResult);
6602     fprintf(stderr, "\tcollResultReorder = %x\n", collResultReorder);
6603     fprintf(stderr, "\nfortyS\n");
6604     for (i = 0; i < fortyS_sortKey_Length; i++) {
6605         fprintf(stderr, "%x --- %x\n", fortyS_sortKey[i], fortyS_sortKey_reorder[i]);
6606     }
6607     fprintf(stderr, "\nfortyThreeP\n");
6608     for (i = 0; i < fortyThreeP_sortKey_Length; i++) {
6609         fprintf(stderr, "%x --- %x\n", fortyThreeP_sortKey[i], fortyThreeP_sortKey_reorder[i]);
6610     }
6611     */
6612     if (collResult != collResultReorder) {
6613         log_err_status(status, "ERROR: collation results should have been the same.\n");
6614         return;
6615     }
6616
6617     ucol_close(myCollation);
6618     ucol_close(myReorderCollation);
6619 }
6620
6621 static int compare_uint8_t_arrays(const uint8_t* a, const uint8_t* b)
6622 {
6623   for (; *a == *b; ++a, ++b) {
6624     if (*a == 0) {
6625       return 0;
6626     }
6627   }
6628   return (*a < *b ? -1 : 1);
6629 }
6630
6631 static void TestImportRulesDeWithPhonebook(void)
6632 {
6633   const char* normalRules[] = {
6634     "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc",
6635     "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc",
6636     "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc",
6637   };
6638   const OneTestCase normalTests[] = {
6639     { {0x00e6}, {0x00c6}, UCOL_LESS},
6640     { {0x00fc}, {0x00dc}, UCOL_GREATER},
6641   };
6642
6643   const char* importRules[] = {
6644     "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc[import de-u-co-phonebk]",
6645     "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
6646     "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
6647   };
6648   const OneTestCase importTests[] = {
6649     { {0x00e6}, {0x00c6}, UCOL_LESS},
6650     { {0x00fc}, {0x00dc}, UCOL_LESS},
6651   };
6652
6653   doTestOneTestCase(normalTests, LEN(normalTests), normalRules, LEN(normalRules));
6654   doTestOneTestCase(importTests, LEN(importTests), importRules, LEN(importRules));
6655 }
6656
6657 static void TestImportRulesFiWithEor(void)
6658 {
6659   /* DUCET. */
6660   const char* defaultRules[] = {
6661     "&a<b",                                    /* Dummy rule. */
6662   };
6663
6664   const OneTestCase defaultTests[] = {
6665     { {0x0110}, {0x00F0}, UCOL_LESS},
6666     { {0x00a3}, {0x00a5}, UCOL_LESS},
6667     { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},
6668   };
6669
6670   /* European Ordering rules: ignore currency characters. */
6671   const char* eorRules[] = {
6672     "[import root-u-co-eor]",
6673   };
6674
6675   const OneTestCase eorTests[] = {
6676     { {0x0110}, {0x00F0}, UCOL_LESS},
6677     { {0x00a3}, {0x00a5}, UCOL_EQUAL},
6678     { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},
6679   };
6680
6681   const char* fiStdRules[] = {
6682     "[import fi-u-co-standard]",
6683   };
6684
6685   const OneTestCase fiStdTests[] = {
6686     { {0x0110}, {0x00F0}, UCOL_GREATER},
6687     { {0x00a3}, {0x00a5}, UCOL_LESS},
6688     { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},
6689   };
6690
6691   /* Both European Ordering Rules and Fi Standard Rules. */
6692   const char* eorFiStdRules[] = {
6693     "[import root-u-co-eor][import fi-u-co-standard]",
6694   };
6695
6696   /* This is essentially same as the one before once fi.txt is updated with import. */
6697   const char* fiEorRules[] = {
6698     "[import fi-u-co-eor]",
6699   };
6700
6701   const OneTestCase fiEorTests[] = {
6702     { {0x0110}, {0x00F0}, UCOL_GREATER},
6703     { {0x00a3}, {0x00a5}, UCOL_EQUAL},
6704     { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},
6705   };
6706
6707   doTestOneTestCase(defaultTests, LEN(defaultTests), defaultRules, LEN(defaultRules));
6708   doTestOneTestCase(eorTests, LEN(eorTests), eorRules, LEN(eorRules));
6709   doTestOneTestCase(fiStdTests, LEN(fiStdTests), fiStdRules, LEN(fiStdRules));
6710   doTestOneTestCase(fiEorTests, LEN(fiEorTests), eorFiStdRules, LEN(eorFiStdRules));
6711
6712   /* TODO: Fix ICU ticket #8962 by uncommenting the following test after fi.txt is updated with the following rule:
6713         eor{
6714             Sequence{
6715                 "[import root-u-co-eor][import fi-u-co-standard]"
6716             }
6717             Version{"21.0"}
6718         }
6719   */
6720   /* doTestOneTestCase(fiEorTests, LEN(fiEorTests), fiEorRules, LEN(fiEorRules)); */
6721
6722 }
6723
6724 #if 0
6725 /*
6726  * This test case tests inclusion with the unihan rules, but this cannot be included now, unless
6727  * the resource files are built with -includeUnihanColl option.
6728  * TODO: Uncomment this function and make it work when unihan rules are built by default.
6729  */
6730 static void TestImportRulesCJKWithUnihan(void)
6731 {
6732   /* DUCET. */
6733   const char* defaultRules[] = {
6734     "&a<b",                                    /* Dummy rule. */
6735   };
6736
6737   const OneTestCase defaultTests[] = {
6738     { {0x3402}, {0x4e1e}, UCOL_GREATER},
6739   };
6740
6741   /* European Ordering rules: ignore currency characters. */
6742   const char* unihanRules[] = {
6743     "[import ko-u-co-unihan]",
6744   };
6745
6746   const OneTestCase unihanTests[] = {
6747     { {0x3402}, {0x4e1e}, UCOL_LESS},
6748   };
6749
6750   doTestOneTestCase(defaultTests, LEN(defaultTests), defaultRules, LEN(defaultRules));
6751   doTestOneTestCase(unihanTests, LEN(unihanTests), unihanRules, LEN(unihanRules));
6752
6753 }
6754 #endif
6755
6756 static void TestImport(void)
6757 {
6758     UCollator* vicoll;
6759     UCollator* escoll;
6760     UCollator* viescoll;
6761     UCollator* importviescoll;
6762     UParseError error;
6763     UErrorCode status = U_ZERO_ERROR;
6764     UChar* virules;
6765     int32_t viruleslength;
6766     UChar* esrules;
6767     int32_t esruleslength;
6768     UChar* viesrules;
6769     int32_t viesruleslength;
6770     char srules[500] = "[import vi][import es]";
6771     UChar rules[500];
6772     uint32_t length = 0;
6773     int32_t itemCount;
6774     int32_t i, k;
6775     UChar32 start;
6776     UChar32 end;
6777     UChar str[500];
6778     int32_t strLength;
6779
6780     uint8_t sk1[500];
6781     uint8_t sk2[500];
6782
6783     UBool b;
6784     USet* tailoredSet;
6785     USet* importTailoredSet;
6786
6787
6788     vicoll = ucol_open("vi", &status);
6789     if(U_FAILURE(status)){
6790         log_err_status(status, "ERROR: Call ucol_open(\"vi\", ...): %s\n", myErrorName(status));
6791         return;
6792     }
6793
6794     virules = (UChar*) ucol_getRules(vicoll, &viruleslength);
6795     escoll = ucol_open("es", &status);
6796     esrules = (UChar*) ucol_getRules(escoll, &esruleslength);
6797     viesrules = (UChar*)uprv_malloc((viruleslength+esruleslength+1)*sizeof(UChar*));
6798     viesrules[0] = 0;
6799     u_strcat(viesrules, virules);
6800     u_strcat(viesrules, esrules);
6801     viesruleslength = viruleslength + esruleslength;
6802     viescoll = ucol_openRules(viesrules, viesruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
6803
6804     /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
6805     length = u_unescape(srules, rules, 500);
6806     importviescoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
6807     if(U_FAILURE(status)){
6808         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
6809         return;
6810     }
6811
6812     tailoredSet = ucol_getTailoredSet(viescoll, &status);
6813     importTailoredSet = ucol_getTailoredSet(importviescoll, &status);
6814
6815     if(!uset_equals(tailoredSet, importTailoredSet)){
6816         log_err("Tailored sets not equal");
6817     }
6818
6819     uset_close(importTailoredSet);
6820
6821     itemCount = uset_getItemCount(tailoredSet);
6822
6823     for( i = 0; i < itemCount; i++){
6824         strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
6825         if(strLength < 2){
6826             for (; start <= end; start++){
6827                 k = 0;
6828                 U16_APPEND(str, k, 500, start, b);
6829                 ucol_getSortKey(viescoll, str, 1, sk1, 500);
6830                 ucol_getSortKey(importviescoll, str, 1, sk2, 500);
6831                 if(compare_uint8_t_arrays(sk1, sk2) != 0){
6832                     log_err("Sort key for %s not equal\n", str);
6833                     break;
6834                 }
6835             }
6836         }else{
6837             ucol_getSortKey(viescoll, str, strLength, sk1, 500);
6838             ucol_getSortKey(importviescoll, str, strLength, sk2, 500);
6839             if(compare_uint8_t_arrays(sk1, sk2) != 0){
6840                 log_err("ZZSort key for %s not equal\n", str);
6841                 break;
6842             }
6843
6844         }
6845     }
6846
6847     uset_close(tailoredSet);
6848
6849     uprv_free(viesrules);
6850
6851     ucol_close(vicoll);
6852     ucol_close(escoll);
6853     ucol_close(viescoll);
6854     ucol_close(importviescoll);
6855 }
6856
6857 static void TestImportWithType(void)
6858 {
6859     UCollator* vicoll;
6860     UCollator* decoll;
6861     UCollator* videcoll;
6862     UCollator* importvidecoll;
6863     UParseError error;
6864     UErrorCode status = U_ZERO_ERROR;
6865     const UChar* virules;
6866     int32_t viruleslength;
6867     const UChar* derules;
6868     int32_t deruleslength;
6869     UChar* viderules;
6870     int32_t videruleslength;
6871     const char srules[500] = "[import vi][import de-u-co-phonebk]";
6872     UChar rules[500];
6873     uint32_t length = 0;
6874     int32_t itemCount;
6875     int32_t i, k;
6876     UChar32 start;
6877     UChar32 end;
6878     UChar str[500];
6879     int32_t strLength;
6880
6881     uint8_t sk1[500];
6882     uint8_t sk2[500];
6883
6884     USet* tailoredSet;
6885     USet* importTailoredSet;
6886
6887     vicoll = ucol_open("vi", &status);
6888     if(U_FAILURE(status)){
6889         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
6890         return;
6891     }
6892     virules = ucol_getRules(vicoll, &viruleslength);
6893     /* decoll = ucol_open("de@collation=phonebook", &status); */
6894     decoll = ucol_open("de-u-co-phonebk", &status);
6895     if(U_FAILURE(status)){
6896         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
6897         return;
6898     }
6899
6900
6901     derules = ucol_getRules(decoll, &deruleslength);
6902     viderules = (UChar*)uprv_malloc((viruleslength+deruleslength+1)*sizeof(UChar*));
6903     viderules[0] = 0;
6904     u_strcat(viderules, virules);
6905     u_strcat(viderules, derules);
6906     videruleslength = viruleslength + deruleslength;
6907     videcoll = ucol_openRules(viderules, videruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
6908
6909     /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
6910     length = u_unescape(srules, rules, 500);
6911     importvidecoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
6912     if(U_FAILURE(status)){
6913         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
6914         return;
6915     }
6916
6917     tailoredSet = ucol_getTailoredSet(videcoll, &status);
6918     importTailoredSet = ucol_getTailoredSet(importvidecoll, &status);
6919
6920     if(!uset_equals(tailoredSet, importTailoredSet)){
6921         log_err("Tailored sets not equal");
6922     }
6923
6924     uset_close(importTailoredSet);
6925
6926     itemCount = uset_getItemCount(tailoredSet);
6927
6928     for( i = 0; i < itemCount; i++){
6929         strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
6930         if(strLength < 2){
6931             for (; start <= end; start++){
6932                 k = 0;
6933                 U16_APPEND_UNSAFE(str, k, start);
6934                 ucol_getSortKey(videcoll, str, 1, sk1, 500);
6935                 ucol_getSortKey(importvidecoll, str, 1, sk2, 500);
6936                 if(compare_uint8_t_arrays(sk1, sk2) != 0){
6937                     log_err("Sort key for %s not equal\n", str);
6938                     break;
6939                 }
6940             }
6941         }else{
6942             ucol_getSortKey(videcoll, str, strLength, sk1, 500);
6943             ucol_getSortKey(importvidecoll, str, strLength, sk2, 500);
6944             if(compare_uint8_t_arrays(sk1, sk2) != 0){
6945                 log_err("Sort key for %s not equal\n", str);
6946                 break;
6947             }
6948
6949         }
6950     }
6951
6952     uset_close(tailoredSet);
6953
6954     uprv_free(viderules);
6955
6956     ucol_close(videcoll);
6957     ucol_close(importvidecoll);
6958     ucol_close(vicoll);
6959     ucol_close(decoll);
6960 }
6961
6962 /* 'IV INTERNATIONAL SCIENTIFIC - PRACTICAL CONFERENCE "GEOPOLITICS, GEOECONOMICS AND INTERNATIONAL RELATIONS PROBLEMS" 22-23 June 2010, St. Petersburg, Russia' */
6963 static const UChar longUpperStr1[]= { /* 155 chars */
6964     0x49, 0x56, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C,
6965     0x20, 0x53, 0x43, 0x49, 0x45, 0x4E, 0x54, 0x49, 0x46, 0x49, 0x43, 0x20, 0x2D, 0x20, 0x50, 0x52,
6966     0x41, 0x43, 0x54, 0x49, 0x43, 0x41, 0x4C, 0x20, 0x43, 0x4F, 0x4E, 0x46, 0x45, 0x52, 0x45, 0x4E,
6967     0x43, 0x45, 0x20, 0x22, 0x47, 0x45, 0x4F, 0x50, 0x4F, 0x4C, 0x49, 0x54, 0x49, 0x43, 0x53, 0x2C,
6968     0x20, 0x47, 0x45, 0x4F, 0x45, 0x43, 0x4F, 0x4E, 0x4F, 0x4D, 0x49, 0x43, 0x53, 0x20, 0x41, 0x4E,
6969     0x44, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C, 0x20,
6970     0x52, 0x45, 0x4C, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x53, 0x20, 0x50, 0x52, 0x4F, 0x42, 0x4C, 0x45,
6971     0x4D, 0x53, 0x22, 0x20, 0x32, 0x32, 0x2D, 0x32, 0x33, 0x20, 0x4A, 0x75, 0x6E, 0x65, 0x20, 0x32,
6972     0x30, 0x31, 0x30, 0x2C, 0x20, 0x53, 0x74, 0x2E, 0x20, 0x50, 0x65, 0x74, 0x65, 0x72, 0x73, 0x62,
6973     0x75, 0x72, 0x67, 0x2C, 0x20, 0x52, 0x75, 0x73, 0x73, 0x69, 0x61
6974 };
6975
6976 /* 'BACEDIFOGUHAJEKILOMUNAPE ' with diacritics on vowels, repeated 5 times */
6977 static const UChar longUpperStr2[]= { /* 125 chars, > 128 collation elements */
6978     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
6979     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
6980     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
6981     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
6982     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20
6983 };
6984
6985 /* 'ABCDEFGHIJKLMNOPQRSTUVWXYZ ' repeated 12 times */
6986 static const UChar longUpperStr3[]= { /* 324 chars */
6987     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
6988     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
6989     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
6990     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
6991     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
6992     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
6993     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
6994     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
6995     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
6996     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
6997     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
6998     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20
6999 };
7000
7001 #define MY_ARRAY_LEN(array) (sizeof(array)/sizeof(array[0]))
7002
7003 typedef struct {
7004     const UChar * longUpperStrPtr;
7005     int32_t       longUpperStrLen;
7006 } LongUpperStrItem;
7007
7008 /* String pointers must be in reverse collation order of the corresponding strings */
7009 static const LongUpperStrItem longUpperStrItems[] = {
7010     { longUpperStr1, MY_ARRAY_LEN(longUpperStr1) },
7011     { longUpperStr2, MY_ARRAY_LEN(longUpperStr2) },
7012     { longUpperStr3, MY_ARRAY_LEN(longUpperStr3) },
7013     { NULL,          0                           }
7014 };
7015
7016 enum { kCollKeyLenMax = 800 }; /* longest expected is 749, but may change with collation changes */
7017
7018 /* Text fix for #8445; without fix, could have crash due to stack or heap corruption */
7019 static void TestCaseLevelBufferOverflow(void)
7020 {
7021     UErrorCode status = U_ZERO_ERROR;
7022     UCollator * ucol = ucol_open("root", &status);
7023     if ( U_SUCCESS(status) ) {
7024         ucol_setAttribute(ucol, UCOL_CASE_LEVEL, UCOL_ON, &status);
7025         if ( U_SUCCESS(status) ) {
7026             const LongUpperStrItem * itemPtr;
7027             uint8_t sortKeyA[kCollKeyLenMax], sortKeyB[kCollKeyLenMax];
7028             for ( itemPtr = longUpperStrItems; itemPtr->longUpperStrPtr != NULL; itemPtr++ ) {
7029                 int32_t sortKeyLen;
7030                 if (itemPtr > longUpperStrItems) {
7031                     uprv_strcpy((char *)sortKeyB, (char *)sortKeyA);
7032                 }
7033                 sortKeyLen = ucol_getSortKey(ucol, itemPtr->longUpperStrPtr, itemPtr->longUpperStrLen, sortKeyA, kCollKeyLenMax);
7034                 if (sortKeyLen <= 0 || sortKeyLen > kCollKeyLenMax) {
7035                     log_err("ERROR sort key length from ucol_getSortKey is %d\n", sortKeyLen);
7036                     break;
7037                 }
7038                 if ( itemPtr > longUpperStrItems ) {
7039                     int compareResult = uprv_strcmp((char *)sortKeyA, (char *)sortKeyB);
7040                     if (compareResult >= 0) {
7041                         log_err("ERROR in sort key comparison result, expected -1, got %d\n", compareResult);
7042                     }
7043                 }
7044             }
7045         } else {
7046             log_err_status(status, "ERROR in ucol_setAttribute UCOL_CASE_LEVEL on: %s\n", myErrorName(status));
7047         }
7048         ucol_close(ucol);
7049     } else {
7050         log_err_status(status, "ERROR in ucol_open for root: %s\n", myErrorName(status));
7051     }
7052 }
7053
7054
7055 #define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x)
7056
7057 void addMiscCollTest(TestNode** root)
7058 {
7059     TEST(TestRuleOptions);
7060     TEST(TestBeforePrefixFailure);
7061     TEST(TestContractionClosure);
7062     TEST(TestPrefixCompose);
7063     TEST(TestStrCollIdenticalPrefix);
7064     TEST(TestPrefix);
7065     TEST(TestNewJapanese);
7066     /*TEST(TestLimitations);*/
7067     TEST(TestNonChars);
7068     TEST(TestExtremeCompression);
7069     TEST(TestSurrogates);
7070     TEST(TestVariableTopSetting);
7071     TEST(TestBocsuCoverage);
7072     TEST(TestCyrillicTailoring);
7073     TEST(TestCase);
7074     TEST(IncompleteCntTest);
7075     TEST(BlackBirdTest);
7076     TEST(FunkyATest);
7077     TEST(BillFairmanTest);
7078     TEST(RamsRulesTest);
7079     TEST(IsTailoredTest);
7080     TEST(TestCollations);
7081     TEST(TestChMove);
7082     TEST(TestImplicitTailoring);
7083     TEST(TestFCDProblem);
7084     TEST(TestEmptyRule);
7085     /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */
7086     TEST(TestJ815);
7087     /*TEST(TestJ831);*/ /* we changed lv locale */
7088     TEST(TestBefore);
7089     TEST(TestRedundantRules);
7090     TEST(TestExpansionSyntax);
7091     TEST(TestHangulTailoring);
7092     TEST(TestUCARules);
7093     TEST(TestIncrementalNormalize);
7094     TEST(TestComposeDecompose);
7095     TEST(TestCompressOverlap);
7096     TEST(TestContraction);
7097     TEST(TestExpansion);
7098     /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys */
7099     /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported */
7100     TEST(TestOptimize);
7101     TEST(TestSuppressContractions);
7102     TEST(Alexis2);
7103     TEST(TestHebrewUCA);
7104     TEST(TestPartialSortKeyTermination);
7105     TEST(TestSettings);
7106     TEST(TestEquals);
7107     TEST(TestJ2726);
7108     TEST(NullRule);
7109     TEST(TestNumericCollation);
7110     TEST(TestTibetanConformance);
7111     TEST(TestPinyinProblem);
7112     TEST(TestImplicitGeneration);
7113     TEST(TestSeparateTrees);
7114     TEST(TestBeforePinyin);
7115     TEST(TestBeforeTightening);
7116     /*TEST(TestMoreBefore);*/
7117     TEST(TestTailorNULL);
7118     TEST(TestUpperFirstQuaternary);
7119     TEST(TestJ4960);
7120     TEST(TestJ5223);
7121     TEST(TestJ5232);
7122     TEST(TestJ5367);
7123     TEST(TestHiragana);
7124     TEST(TestSortKeyConsistency);
7125     TEST(TestVI5913);  /* VI, RO tailored rules */
7126     TEST(TestCroatianSortKey);
7127     TEST(TestTailor6179);
7128     TEST(TestUCAPrecontext);
7129     TEST(TestOutOfBuffer5468);
7130     TEST(TestSameStrengthList);
7131
7132     TEST(TestSameStrengthListQuoted);
7133     TEST(TestSameStrengthListSupplemental);
7134     TEST(TestSameStrengthListQwerty);
7135     TEST(TestSameStrengthListQuotedQwerty);
7136     TEST(TestSameStrengthListRanges);
7137     TEST(TestSameStrengthListSupplementalRanges);
7138     TEST(TestSpecialCharacters);
7139     TEST(TestPrivateUseCharacters);
7140     TEST(TestPrivateUseCharactersInList);
7141     TEST(TestPrivateUseCharactersInRange);
7142     TEST(TestInvalidListsAndRanges);
7143     TEST(TestImportRulesDeWithPhonebook);
7144     /* TEST(TestImportRulesFiWithEor); EOR rules removed from CLDR 21 */
7145     /* TEST(TestImportRulesCJKWithUnihan); */
7146     TEST(TestImport);
7147     TEST(TestImportWithType);
7148
7149     TEST(TestBeforeRuleWithScriptReordering);
7150     TEST(TestNonLeadBytesDuringCollationReordering);
7151     TEST(TestReorderingAPI);
7152     TEST(TestReorderingAPIWithRuleCreatedCollator);
7153     TEST(TestEquivalentReorderingScripts);
7154     TEST(TestGreekFirstReorder);
7155     TEST(TestGreekLastReorder);
7156     TEST(TestNonScriptReorder);
7157     TEST(TestHaniReorder);
7158     TEST(TestHaniReorderWithOtherRules);
7159     TEST(TestMultipleReorder);
7160     TEST(TestReorderingAcrossCloning);
7161     /* test for ticket 8814 - disabled until resolved */
7162     /*TEST(TestReorderWithNumericCollation);*/
7163
7164     TEST(TestCaseLevelBufferOverflow);
7165 }
7166
7167 #endif /* #if !UCONFIG_NO_COLLATION */