icuSources/test/cintltst/cmsccoll.c

   1 /********************************************************************
   2  * COPYRIGHT:
   3  * Copyright (c) 2001-2016, International Business Machines Corporation and
   4  * others. All Rights Reserved.
   5  ********************************************************************/
   6 /*******************************************************************************
   7 *
   8 * File cmsccoll.C
   9 *
  10 *******************************************************************************/
  11 /**
  12  * These are the tests specific to ICU 1.8 and above, that I didn't know where
  13  * to fit.
  14  */
  15
  16 #include <stdio.h>
  17
  18 #include "unicode/utypes.h"
  19
  20 #if !UCONFIG_NO_COLLATION
  21
  22 #include "unicode/ucol.h"
  23 #include "unicode/ucoleitr.h"
  24 #include "unicode/uloc.h"
  25 #include "cintltst.h"
  26 #include "ccolltst.h"
  27 #include "callcoll.h"
  28 #include "unicode/ustring.h"
  29 #include "string.h"
  30 #include "ucol_imp.h"
  31 #include "cmemory.h"
  32 #include "cstring.h"
  33 #include "uassert.h"
  34 #include "unicode/parseerr.h"
  35 #include "unicode/ucnv.h"
  36 #include "unicode/ures.h"
  37 #include "unicode/uscript.h"
  38 #include "unicode/utf16.h"
  39 #include "uparse.h"
  40 #include "putilimp.h"
  41
  42
  43 #define MAX_TOKEN_LEN 16
  44
  45 typedef UCollationResult tst_strcoll(void *collator, const int object,
  46                         const UChar *source, const int sLen,
  47                         const UChar *target, const int tLen);
  48
  49
  50
  51 const static char cnt1[][10] = {
  52
  53   "AA",
  54   "AC",
  55   "AZ",
  56   "AQ",
  57   "AB",
  58   "ABZ",
  59   "ABQ",
  60   "Z",
  61   "ABC",
  62   "Q",
  63   "B"
  64 };
  65
  66 const static char cnt2[][10] = {
  67   "DA",
  68   "DAD",
  69   "DAZ",
  70   "MAR",
  71   "Z",
  72   "DAVIS",
  73   "MARK",
  74   "DAV",
  75   "DAVI"
  76 };
  77
  78 static void IncompleteCntTest(void)
  79 {
  80   UErrorCode status = U_ZERO_ERROR;
  81   UChar temp[90];
  82   UChar t1[90];
  83   UChar t2[90];
  84
  85   UCollator *coll =  NULL;
  86   uint32_t i = 0, j = 0;
  87   uint32_t size = 0;
  88
  89   u_uastrcpy(temp, " & Z < ABC < Q < B");
  90
  91   coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
  92
  93   if(U_SUCCESS(status)) {
  94     size = UPRV_LENGTHOF(cnt1);
  95     for(i = 0; i < size-1; i++) {
  96       for(j = i+1; j < size; j++) {
  97         UCollationElements *iter;
  98         u_uastrcpy(t1, cnt1[i]);
  99         u_uastrcpy(t2, cnt1[j]);
 100         doTest(coll, t1, t2, UCOL_LESS);
 101         /* synwee : added collation element iterator test */
 102         iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
 103         if (U_FAILURE(status)) {
 104           log_err("Creation of iterator failed\n");
 105           break;
 106         }
 107         backAndForth(iter);
 108         ucol_closeElements(iter);
 109       }
 110     }
 111   }
 112
 113   ucol_close(coll);
 114
 115
 116   u_uastrcpy(temp, " & Z < DAVIS < MARK <DAV");
 117   coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
 118
 119   if(U_SUCCESS(status)) {
 120     size = UPRV_LENGTHOF(cnt2);
 121     for(i = 0; i < size-1; i++) {
 122       for(j = i+1; j < size; j++) {
 123         UCollationElements *iter;
 124         u_uastrcpy(t1, cnt2[i]);
 125         u_uastrcpy(t2, cnt2[j]);
 126         doTest(coll, t1, t2, UCOL_LESS);
 127
 128         /* synwee : added collation element iterator test */
 129         iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
 130         if (U_FAILURE(status)) {
 131           log_err("Creation of iterator failed\n");
 132           break;
 133         }
 134         backAndForth(iter);
 135         ucol_closeElements(iter);
 136       }
 137     }
 138   }
 139
 140   ucol_close(coll);
 141
 142
 143 }
 144
 145 const static char shifted[][20] = {
 146   "black bird",
 147   "black-bird",
 148   "blackbird",
 149   "black Bird",
 150   "black-Bird",
 151   "blackBird",
 152   "black birds",
 153   "black-birds",
 154   "blackbirds"
 155 };
 156
 157 const static UCollationResult shiftedTert[] = {
 158   UCOL_EQUAL,
 159   UCOL_EQUAL,
 160   UCOL_EQUAL,
 161   UCOL_LESS,
 162   UCOL_EQUAL,
 163   UCOL_EQUAL,
 164   UCOL_LESS,
 165   UCOL_EQUAL,
 166   UCOL_EQUAL
 167 };
 168
 169 const static char nonignorable[][20] = {
 170   "black bird",
 171   "black Bird",
 172   "black birds",
 173   "black-bird",
 174   "black-Bird",
 175   "black-birds",
 176   "blackbird",
 177   "blackBird",
 178   "blackbirds"
 179 };
 180
 181 static void BlackBirdTest(void) {
 182   UErrorCode status = U_ZERO_ERROR;
 183   UChar t1[90];
 184   UChar t2[90];
 185
 186   uint32_t i = 0, j = 0;
 187   uint32_t size = 0;
 188   UCollator *coll = ucol_open("en_US", &status);
 189
 190   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
 191   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status);
 192
 193   if(U_SUCCESS(status)) {
 194     size = UPRV_LENGTHOF(nonignorable);
 195     for(i = 0; i < size-1; i++) {
 196       for(j = i+1; j < size; j++) {
 197         u_uastrcpy(t1, nonignorable[i]);
 198         u_uastrcpy(t2, nonignorable[j]);
 199         doTest(coll, t1, t2, UCOL_LESS);
 200       }
 201     }
 202   }
 203
 204   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
 205   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
 206
 207   if(U_SUCCESS(status)) {
 208     size = UPRV_LENGTHOF(shifted);
 209     for(i = 0; i < size-1; i++) {
 210       for(j = i+1; j < size; j++) {
 211         u_uastrcpy(t1, shifted[i]);
 212         u_uastrcpy(t2, shifted[j]);
 213         doTest(coll, t1, t2, UCOL_LESS);
 214       }
 215     }
 216   }
 217
 218   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status);
 219   if(U_SUCCESS(status)) {
 220     size = UPRV_LENGTHOF(shifted);
 221     for(i = 1; i < size; i++) {
 222       u_uastrcpy(t1, shifted[i-1]);
 223       u_uastrcpy(t2, shifted[i]);
 224       doTest(coll, t1, t2, shiftedTert[i]);
 225     }
 226   }
 227
 228   ucol_close(coll);
 229 }
 230
 231 const static UChar testSourceCases[][MAX_TOKEN_LEN] = {
 232     {0x0041/*'A'*/, 0x0300, 0x0301, 0x0000},
 233     {0x0041/*'A'*/, 0x0300, 0x0316, 0x0000},
 234     {0x0041/*'A'*/, 0x0300, 0x0000},
 235     {0x00C0, 0x0301, 0x0000},
 236     /* this would work with forced normalization */
 237     {0x00C0, 0x0316, 0x0000}
 238 };
 239
 240 const static UChar testTargetCases[][MAX_TOKEN_LEN] = {
 241     {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
 242     {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000},
 243     {0x00C0, 0},
 244     {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
 245     /* this would work with forced normalization */
 246     {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000}
 247 };
 248
 249 const static UCollationResult results[] = {
 250     UCOL_GREATER,
 251     UCOL_EQUAL,
 252     UCOL_EQUAL,
 253     UCOL_GREATER,
 254     UCOL_EQUAL
 255 };
 256
 257 static void FunkyATest(void)
 258 {
 259
 260     int32_t i;
 261     UErrorCode status = U_ZERO_ERROR;
 262     UCollator  *myCollation;
 263     myCollation = ucol_open("en_US", &status);
 264     if(U_FAILURE(status)){
 265         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
 266         return;
 267     }
 268     log_verbose("Testing some A letters, for some reason\n");
 269     ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
 270     ucol_setStrength(myCollation, UCOL_TERTIARY);
 271     for (i = 0; i < 4 ; i++)
 272     {
 273         doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
 274     }
 275     ucol_close(myCollation);
 276 }
 277
 278 UColAttributeValue caseFirst[] = {
 279     UCOL_OFF,
 280     UCOL_LOWER_FIRST,
 281     UCOL_UPPER_FIRST
 282 };
 283
 284
 285 UColAttributeValue alternateHandling[] = {
 286     UCOL_NON_IGNORABLE,
 287     UCOL_SHIFTED
 288 };
 289
 290 UColAttributeValue caseLevel[] = {
 291     UCOL_OFF,
 292     UCOL_ON
 293 };
 294
 295 UColAttributeValue strengths[] = {
 296     UCOL_PRIMARY,
 297     UCOL_SECONDARY,
 298     UCOL_TERTIARY,
 299     UCOL_QUATERNARY,
 300     UCOL_IDENTICAL
 301 };
 302
 303 #if 0
 304 static const char * strengthsC[] = {
 305     "UCOL_PRIMARY",
 306     "UCOL_SECONDARY",
 307     "UCOL_TERTIARY",
 308     "UCOL_QUATERNARY",
 309     "UCOL_IDENTICAL"
 310 };
 311
 312 static const char * caseFirstC[] = {
 313     "UCOL_OFF",
 314     "UCOL_LOWER_FIRST",
 315     "UCOL_UPPER_FIRST"
 316 };
 317
 318
 319 static const char * alternateHandlingC[] = {
 320     "UCOL_NON_IGNORABLE",
 321     "UCOL_SHIFTED"
 322 };
 323
 324 static const char * caseLevelC[] = {
 325     "UCOL_OFF",
 326     "UCOL_ON"
 327 };
 328
 329 /* not used currently - does not test only prints */
 330 static void PrintMarkDavis(void)
 331 {
 332   UErrorCode status = U_ZERO_ERROR;
 333   UChar m[256];
 334   uint8_t sortkey[256];
 335   UCollator *coll = ucol_open("en_US", &status);
 336   uint32_t h,i,j,k, sortkeysize;
 337   uint32_t sizem = 0;
 338   char buffer[512];
 339   uint32_t len = 512;
 340
 341   log_verbose("PrintMarkDavis");
 342
 343   u_uastrcpy(m, "Mark Davis");
 344   sizem = u_strlen(m);
 345
 346
 347   m[1] = 0xe4;
 348
 349   for(i = 0; i<sizem; i++) {
 350     fprintf(stderr, "\\u%04X ", m[i]);
 351   }
 352   fprintf(stderr, "\n");
 353
 354   for(h = 0; h<UPRV_LENGTHOF(caseFirst); h++) {
 355     ucol_setAttribute(coll, UCOL_CASE_FIRST, caseFirst[i], &status);
 356     fprintf(stderr, "caseFirst: %s\n", caseFirstC[h]);
 357
 358     for(i = 0; i<UPRV_LENGTHOF(alternateHandling); i++) {
 359       ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, alternateHandling[i], &status);
 360       fprintf(stderr, "  AltHandling: %s\n", alternateHandlingC[i]);
 361
 362       for(j = 0; j<UPRV_LENGTHOF(caseLevel); j++) {
 363         ucol_setAttribute(coll, UCOL_CASE_LEVEL, caseLevel[j], &status);
 364         fprintf(stderr, "    caseLevel: %s\n", caseLevelC[j]);
 365
 366         for(k = 0; k<UPRV_LENGTHOF(strengths); k++) {
 367           ucol_setAttribute(coll, UCOL_STRENGTH, strengths[k], &status);
 368           sortkeysize = ucol_getSortKey(coll, m, sizem, sortkey, 256);
 369           fprintf(stderr, "      strength: %s\n      Sortkey: ", strengthsC[k]);
 370           fprintf(stderr, "%s\n", ucol_sortKeyToString(coll, sortkey, buffer, &len));
 371         }
 372
 373       }
 374
 375     }
 376
 377   }
 378 }
 379 #endif
 380
 381 static void BillFairmanTest(void) {
 382 /*
 383 ** check for actual locale via ICU resource bundles
 384 **
 385 ** lp points to the original locale ("fr_FR_....")
 386 */
 387
 388     UResourceBundle *lr,*cr;
 389     UErrorCode              lec = U_ZERO_ERROR;
 390     const char *lp = "fr_FR_you_ll_never_find_this_locale";
 391
 392     log_verbose("BillFairmanTest\n");
 393
 394     lr = ures_open(NULL,lp,&lec);
 395     if (lr) {
 396         cr = ures_getByKey(lr,"collations",0,&lec);
 397         if (cr) {
 398             lp = ures_getLocaleByType(cr, ULOC_ACTUAL_LOCALE, &lec);
 399             if (lp) {
 400                 if (U_SUCCESS(lec)) {
 401                     if(strcmp(lp, "fr") != 0) {
 402                         log_err("Wrong locale for French Collation Data, expected \"fr\" got %s", lp);
 403                     }
 404                 }
 405             }
 406             ures_close(cr);
 407         }
 408         ures_close(lr);
 409     }
 410 }
 411
 412 const static char chTest[][20] = {
 413   "c",
 414   "C",
 415   "ca", "cb", "cx", "cy", "CZ",
 416   "c\\u030C", "C\\u030C",
 417   "h",
 418   "H",
 419   "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY",
 420   "ch", "cH", "Ch", "CH",
 421   "cha", "charly", "che", "chh", "chch", "chr",
 422   "i", "I", "iarly",
 423   "r", "R",
 424   "r\\u030C", "R\\u030C",
 425   "s",
 426   "S",
 427   "s\\u030C", "S\\u030C",
 428   "z", "Z",
 429   "z\\u030C", "Z\\u030C"
 430 };
 431
 432 static void TestChMove(void) {
 433     UChar t1[256] = {0};
 434     UChar t2[256] = {0};
 435
 436     uint32_t i = 0, j = 0;
 437     uint32_t size = 0;
 438     UErrorCode status = U_ZERO_ERROR;
 439
 440     UCollator *coll = ucol_open("cs", &status);
 441
 442     if(U_SUCCESS(status)) {
 443         size = UPRV_LENGTHOF(chTest);
 444         for(i = 0; i < size-1; i++) {
 445             for(j = i+1; j < size; j++) {
 446                 u_unescape(chTest[i], t1, 256);
 447                 u_unescape(chTest[j], t2, 256);
 448                 doTest(coll, t1, t2, UCOL_LESS);
 449             }
 450         }
 451     }
 452     else {
 453         log_data_err("Can't open collator");
 454     }
 455     ucol_close(coll);
 456 }
 457
 458
 459
 460
 461 /*
 462 const static char impTest[][20] = {
 463   "\\u4e00",
 464     "a",
 465     "A",
 466     "b",
 467     "B",
 468     "\\u4e01"
 469 };
 470 */
 471
 472
 473 static void TestImplicitTailoring(void) {
 474   static const struct {
 475     const char *rules;
 476     const char *data[10];
 477     const uint32_t len;
 478   } tests[] = {
 479       {
 480         /* Tailor b and c before U+4E00. */
 481         "&[before 1]\\u4e00 < b < c "
 482         /* Now, before U+4E00 is c; put d and e after that. */
 483         "&[before 1]\\u4e00 < d < e",
 484         { "b", "c", "d", "e", "\\u4e00"}, 5 },
 485       { "&\\u4e00 < a <<< A < b <<< B",   { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 },
 486       { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3},
 487       { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3}
 488   };
 489
 490   int32_t i = 0;
 491
 492   for(i = 0; i < UPRV_LENGTHOF(tests); i++) {
 493       genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
 494   }
 495
 496 /*
 497   UChar t1[256] = {0};
 498   UChar t2[256] = {0};
 499
 500   const char *rule = "&\\u4e00 < a <<< A < b <<< B";
 501
 502   uint32_t i = 0, j = 0;
 503   uint32_t size = 0;
 504   uint32_t ruleLen = 0;
 505   UErrorCode status = U_ZERO_ERROR;
 506   UCollator *coll = NULL;
 507   ruleLen = u_unescape(rule, t1, 256);
 508
 509   coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
 510
 511   if(U_SUCCESS(status)) {
 512     size = UPRV_LENGTHOF(impTest);
 513     for(i = 0; i < size-1; i++) {
 514       for(j = i+1; j < size; j++) {
 515         u_unescape(impTest[i], t1, 256);
 516         u_unescape(impTest[j], t2, 256);
 517         doTest(coll, t1, t2, UCOL_LESS);
 518       }
 519     }
 520   }
 521   else {
 522     log_err("Can't open collator");
 523   }
 524   ucol_close(coll);
 525   */
 526 }
 527
 528 static void TestFCDProblem(void) {
 529   UChar t1[256] = {0};
 530   UChar t2[256] = {0};
 531
 532   const char *s1 = "\\u0430\\u0306\\u0325";
 533   const char *s2 = "\\u04D1\\u0325";
 534
 535   UErrorCode status = U_ZERO_ERROR;
 536   UCollator *coll = ucol_open("", &status);
 537   u_unescape(s1, t1, 256);
 538   u_unescape(s2, t2, 256);
 539
 540   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
 541   doTest(coll, t1, t2, UCOL_EQUAL);
 542
 543   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
 544   doTest(coll, t1, t2, UCOL_EQUAL);
 545
 546   ucol_close(coll);
 547 }
 548
 549 /*
 550 The largest normalization form is 18 for NFKC/NFKD, 4 for NFD and 3 for NFC
 551 We're only using NFC/NFD in this test.
 552 */
 553 #define NORM_BUFFER_TEST_LEN 18
 554 typedef struct {
 555   UChar32 u;
 556   UChar NFC[NORM_BUFFER_TEST_LEN];
 557   UChar NFD[NORM_BUFFER_TEST_LEN];
 558 } tester;
 559
 560 static void TestComposeDecompose(void) {
 561     /* [[:NFD_Inert=false:][:NFC_Inert=false:]] */
 562     static const UChar UNICODESET_STR[] = {
 563         0x5B,0x5B,0x3A,0x4E,0x46,0x44,0x5F,0x49,0x6E,0x65,0x72,0x74,0x3D,0x66,0x61,
 564         0x6C,0x73,0x65,0x3A,0x5D,0x5B,0x3A,0x4E,0x46,0x43,0x5F,0x49,0x6E,0x65,0x72,
 565         0x74,0x3D,0x66,0x61,0x6C,0x73,0x65,0x3A,0x5D,0x5D,0
 566     };
 567     int32_t noOfLoc;
 568     int32_t i = 0, j = 0;
 569
 570     UErrorCode status = U_ZERO_ERROR;
 571     const char *locName = NULL;
 572     uint32_t nfcSize;
 573     uint32_t nfdSize;
 574     tester **t;
 575     uint32_t noCases = 0;
 576     UCollator *coll = NULL;
 577     UChar32 u = 0;
 578     UChar comp[NORM_BUFFER_TEST_LEN];
 579     uint32_t len = 0;
 580     UCollationElements *iter;
 581     USet *charsToTest = uset_openPattern(UNICODESET_STR, -1, &status);
 582     int32_t charsToTestSize;
 583
 584     noOfLoc = uloc_countAvailable();
 585
 586     coll = ucol_open("", &status);
 587     if (U_FAILURE(status)) {
 588         log_data_err("Error opening collator -> %s (Are you missing data?)\n", u_errorName(status));
 589         return;
 590     }
 591     charsToTestSize = uset_size(charsToTest);
 592     if (charsToTestSize <= 0) {
 593         log_err("Set was zero. Missing data?\n");
 594         return;
 595     }
 596     t = (tester **)malloc(charsToTestSize * sizeof(tester *));
 597     t[0] = (tester *)malloc(sizeof(tester));
 598     log_verbose("Testing UCA extensively for %d characters\n", charsToTestSize);
 599
 600     for(u = 0; u < charsToTestSize; u++) {
 601         UChar32 ch = uset_charAt(charsToTest, u);
 602         len = 0;
 603         U16_APPEND_UNSAFE(comp, len, ch);
 604         nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
 605         nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
 606
 607         if(nfcSize != nfdSize || (uprv_memcmp(t[noCases]->NFC, t[noCases]->NFD, nfcSize * sizeof(UChar)) != 0)
 608           || (len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0))) {
 609             t[noCases]->u = ch;
 610             if(len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0)) {
 611                 u_strncpy(t[noCases]->NFC, comp, len);
 612                 t[noCases]->NFC[len] = 0;
 613             }
 614             noCases++;
 615             t[noCases] = (tester *)malloc(sizeof(tester));
 616             uprv_memset(t[noCases], 0, sizeof(tester));
 617         }
 618     }
 619     log_verbose("Testing %d/%d of possible test cases\n", noCases, charsToTestSize);
 620     uset_close(charsToTest);
 621     charsToTest = NULL;
 622
 623     for(u=0; u<(UChar32)noCases; u++) {
 624         if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
 625             log_err("Failure: codePoint %05X fails TestComposeDecompose in the UCA\n", t[u]->u);
 626             doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
 627         }
 628     }
 629     /*
 630     for(u = 0; u < charsToTestSize; u++) {
 631       if(!(u&0xFFFF)) {
 632         log_verbose("%08X ", u);
 633       }
 634       uprv_memset(t[noCases], 0, sizeof(tester));
 635       t[noCases]->u = u;
 636       len = 0;
 637       U16_APPEND_UNSAFE(comp, len, u);
 638       comp[len] = 0;
 639       nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
 640       nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
 641       doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL);
 642       doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL);
 643     }
 644     */
 645
 646     ucol_close(coll);
 647
 648     log_verbose("Testing locales, number of cases = %i\n", noCases);
 649     for(i = 0; i<noOfLoc; i++) {
 650         status = U_ZERO_ERROR;
 651         locName = uloc_getAvailable(i);
 652         if(hasCollationElements(locName)) {
 653             char cName[256];
 654             UChar name[256];
 655             int32_t nameSize = uloc_getDisplayName(locName, NULL, name, sizeof(cName), &status);
 656
 657             for(j = 0; j<nameSize; j++) {
 658                 cName[j] = (char)name[j];
 659             }
 660             cName[nameSize] = 0;
 661             log_verbose("\nTesting locale %s (%s)\n", locName, cName);
 662
 663             coll = ucol_open(locName, &status);
 664             ucol_setStrength(coll, UCOL_IDENTICAL);
 665             iter = ucol_openElements(coll, t[u]->NFD, u_strlen(t[u]->NFD), &status);
 666
 667             for(u=0; u<(UChar32)noCases; u++) {
 668                 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
 669                     log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t[u]->u, cName);
 670                     doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
 671                     log_verbose("Testing NFC\n");
 672                     ucol_setText(iter, t[u]->NFC, u_strlen(t[u]->NFC), &status);
 673                     backAndForth(iter);
 674                     log_verbose("Testing NFD\n");
 675                     ucol_setText(iter, t[u]->NFD, u_strlen(t[u]->NFD), &status);
 676                     backAndForth(iter);
 677                 }
 678             }
 679             ucol_closeElements(iter);
 680             ucol_close(coll);
 681         }
 682     }
 683     for(u = 0; u <= (UChar32)noCases; u++) {
 684         free(t[u]);
 685     }
 686     free(t);
 687 }
 688
 689 static void TestEmptyRule(void) {
 690   UErrorCode status = U_ZERO_ERROR;
 691   UChar rulez[] = { 0 };
 692   UCollator *coll = ucol_openRules(rulez, 0, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
 693
 694   ucol_close(coll);
 695 }
 696
 697 static void TestUCARules(void) {
 698   UErrorCode status = U_ZERO_ERROR;
 699   UChar b[256];
 700   UChar *rules = b;
 701   uint32_t ruleLen = 0;
 702   UCollator *UCAfromRules = NULL;
 703   UCollator *coll = ucol_open("", &status);
 704   if(status == U_FILE_ACCESS_ERROR) {
 705     log_data_err("Is your data around?\n");
 706     return;
 707   } else if(U_FAILURE(status)) {
 708     log_err("Error opening collator\n");
 709     return;
 710   }
 711   ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, 256);
 712
 713   log_verbose("TestUCARules\n");
 714   if(ruleLen > 256) {
 715     rules = (UChar *)malloc((ruleLen+1)*sizeof(UChar));
 716     ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, ruleLen);
 717   }
 718   log_verbose("Rules length is %d\n", ruleLen);
 719   UCAfromRules = ucol_openRules(rules, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
 720   if(U_SUCCESS(status)) {
 721     ucol_close(UCAfromRules);
 722   } else {
 723     log_verbose("Unable to create a collator from UCARules!\n");
 724   }
 725 /*
 726   u_unescape(blah, b, 256);
 727   ucol_getSortKey(coll, b, 1, res, 256);
 728 */
 729   ucol_close(coll);
 730   if(rules != b) {
 731     free(rules);
 732   }
 733 }
 734
 735
 736 /* Pinyin tonal order */
 737 /*
 738     A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0)
 739           (w/macron)<  (w/acute)<   (w/caron)<   (w/grave)
 740     E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8)
 741     I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec)
 742     O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2)
 743     U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9)
 744       < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) <
 745 .. (\u00fc)
 746
 747 However, in testing we got the following order:
 748     A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101)
 749           (w/acute)<   (w/grave)<   (w/caron)<   (w/macron)
 750     E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) <
 751 .. (\u0113)
 752     I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b)
 753     O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d)
 754     U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) <
 755 .. (\u01d8)
 756       < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b)
 757 */
 758
 759 static void TestBefore(void) {
 760   const static char *data[] = {
 761       "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A",
 762       "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E",
 763       "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I",
 764       "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O",
 765       "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U",
 766       "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc"
 767   };
 768   genericRulesStarter(
 769     "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0"
 770     "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8"
 771     "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec"
 772     "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2"
 773     "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9"
 774     "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc",
 775     data, UPRV_LENGTHOF(data));
 776 }
 777
 778 #if 0
 779 /* superceded by TestBeforePinyin */
 780 static void TestJ784(void) {
 781   const static char *data[] = {
 782       "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0",
 783       "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8",
 784       "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec",
 785       "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2",
 786       "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9",
 787       "\\u00fc",
 788            "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc"
 789   };
 790   genericLocaleStarter("zh", data, UPRV_LENGTHOF(data));
 791 }
 792 #endif
 793
 794 #if 0
 795 /* superceded by the changes to the lv locale */
 796 static void TestJ831(void) {
 797   const static char *data[] = {
 798     "I",
 799       "i",
 800       "Y",
 801       "y"
 802   };
 803   genericLocaleStarter("lv", data, UPRV_LENGTHOF(data));
 804 }
 805 #endif
 806
 807 static void TestJ815(void) {
 808   const static char *data[] = {
 809     "aa",
 810       "Aa",
 811       "ab",
 812       "Ab",
 813       "ad",
 814       "Ad",
 815       "ae",
 816       "Ae",
 817       "\\u00e6",
 818       "\\u00c6",
 819       "af",
 820       "Af",
 821       "b",
 822       "B"
 823   };
 824   genericLocaleStarter("fr", data, UPRV_LENGTHOF(data));
 825   genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, UPRV_LENGTHOF(data));
 826 }
 827
 828
 829 static void TestCase(void)
 830 {
 831     const static UChar gRules[MAX_TOKEN_LEN] =
 832     /*" & 0 < 1,\u2461<a,A"*/
 833     { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x0041, 0x0000 };
 834
 835     const static UChar testCase[][MAX_TOKEN_LEN] =
 836     {
 837         /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000},
 838         /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000},
 839         /*2*/ {0x2460 /*circ'1'*/, 0x0061/*'a'*/, 0x0000},
 840         /*3*/ {0x2460 /*circ'1'*/, 0x0041/*'A'*/, 0x0000}
 841     };
 842
 843     const static UCollationResult caseTestResults[][9] =
 844     {
 845         { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
 846         { UCOL_GREATER, UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER },
 847         { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_GREATER, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
 848         { UCOL_GREATER, UCOL_LESS, UCOL_GREATER, UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER }
 849     };
 850
 851     const static UColAttributeValue caseTestAttributes[][2] =
 852     {
 853         { UCOL_LOWER_FIRST, UCOL_OFF},
 854         { UCOL_UPPER_FIRST, UCOL_OFF},
 855         { UCOL_LOWER_FIRST, UCOL_ON},
 856         { UCOL_UPPER_FIRST, UCOL_ON}
 857     };
 858     int32_t i,j,k;
 859     UErrorCode status = U_ZERO_ERROR;
 860     UCollationElements *iter;
 861     UCollator  *myCollation;
 862     myCollation = ucol_open("en_US", &status);
 863
 864     if(U_FAILURE(status)){
 865         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
 866         return;
 867     }
 868     log_verbose("Testing different case settings\n");
 869     ucol_setStrength(myCollation, UCOL_TERTIARY);
 870
 871     for(k = 0; k<4; k++) {
 872       ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
 873       ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
 874       log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes[k][0], caseTestAttributes[k][1]);
 875       for (i = 0; i < 3 ; i++) {
 876         for(j = i+1; j<4; j++) {
 877           doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
 878         }
 879       }
 880     }
 881     ucol_close(myCollation);
 882
 883     myCollation = ucol_openRules(gRules, u_strlen(gRules), UCOL_OFF, UCOL_TERTIARY,NULL, &status);
 884     if(U_FAILURE(status)){
 885         log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
 886         return;
 887     }
 888     log_verbose("Testing different case settings with custom rules\n");
 889     ucol_setStrength(myCollation, UCOL_TERTIARY);
 890
 891     for(k = 0; k<4; k++) {
 892       ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
 893       ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
 894       for (i = 0; i < 3 ; i++) {
 895         for(j = i+1; j<4; j++) {
 896           log_verbose("k:%d, i:%d, j:%d\n", k, i, j);
 897           doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
 898           iter=ucol_openElements(myCollation, testCase[i], u_strlen(testCase[i]), &status);
 899           backAndForth(iter);
 900           ucol_closeElements(iter);
 901           iter=ucol_openElements(myCollation, testCase[j], u_strlen(testCase[j]), &status);
 902           backAndForth(iter);
 903           ucol_closeElements(iter);
 904         }
 905       }
 906     }
 907     ucol_close(myCollation);
 908     {
 909       const static char *lowerFirst[] = {
 910         "h",
 911         "H",
 912         "ch",
 913         "Ch",
 914         "CH",
 915         "cha",
 916         "chA",
 917         "Cha",
 918         "ChA",
 919         "CHa",
 920         "CHA",
 921         "i",
 922         "I"
 923       };
 924
 925       const static char *upperFirst[] = {
 926         "H",
 927         "h",
 928         "CH",
 929         "Ch",
 930         "ch",
 931         "CHA",
 932         "CHa",
 933         "ChA",
 934         "Cha",
 935         "chA",
 936         "cha",
 937         "I",
 938         "i"
 939       };
 940       log_verbose("mixed case test\n");
 941       log_verbose("lower first, case level off\n");
 942       genericRulesStarter("[caseFirst lower]&H<ch<<<Ch<<<CH", lowerFirst, UPRV_LENGTHOF(lowerFirst));
 943       log_verbose("upper first, case level off\n");
 944       genericRulesStarter("[caseFirst upper]&H<ch<<<Ch<<<CH", upperFirst, UPRV_LENGTHOF(upperFirst));
 945       log_verbose("lower first, case level on\n");
 946       genericRulesStarter("[caseFirst lower][caseLevel on]&H<ch<<<Ch<<<CH", lowerFirst, UPRV_LENGTHOF(lowerFirst));
 947       log_verbose("upper first, case level on\n");
 948       genericRulesStarter("[caseFirst upper][caseLevel on]&H<ch<<<Ch<<<CH", upperFirst, UPRV_LENGTHOF(upperFirst));
 949     }
 950
 951 }
 952
 953 static void TestIncrementalNormalize(void) {
 954
 955     /*UChar baseA     =0x61;*/
 956     UChar baseA     =0x41;
 957 /*    UChar baseB     = 0x42;*/
 958     static const UChar ccMix[]   = {0x316, 0x321, 0x300};
 959     /*UChar ccMix[]   = {0x61, 0x61, 0x61};*/
 960     /*
 961         0x316 is combining grave accent below, cc=220
 962         0x321 is combining palatalized hook below, cc=202
 963         0x300 is combining grave accent, cc=230
 964     */
 965
 966 #define MAXSLEN 2000
 967     /*int          maxSLen   = 64000;*/
 968     int          sLen;
 969     int          i;
 970
 971     UCollator        *coll;
 972     UErrorCode       status = U_ZERO_ERROR;
 973     UCollationResult result;
 974
 975     int32_t myQ = getTestOption(QUICK_OPTION);
 976
 977     if(getTestOption(QUICK_OPTION) < 0) {
 978         setTestOption(QUICK_OPTION, 1);
 979     }
 980
 981     {
 982         /* Test 1.  Run very long unnormalized strings, to force overflow of*/
 983         /*          most buffers along the way.*/
 984         UChar            strA[MAXSLEN+1];
 985         UChar            strB[MAXSLEN+1];
 986
 987         coll = ucol_open("en_US", &status);
 988         if(status == U_FILE_ACCESS_ERROR) {
 989           log_data_err("Is your data around?\n");
 990           return;
 991         } else if(U_FAILURE(status)) {
 992           log_err("Error opening collator\n");
 993           return;
 994         }
 995         ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
 996
 997         /*for (sLen = 257; sLen<MAXSLEN; sLen++) {*/
 998         /*for (sLen = 4; sLen<MAXSLEN; sLen++) {*/
 999         /*for (sLen = 1000; sLen<1001; sLen++) {*/
1000         for (sLen = 500; sLen<501; sLen++) {
1001         /*for (sLen = 40000; sLen<65000; sLen+=1000) {*/
1002             strA[0] = baseA;
1003             strB[0] = baseA;
1004             for (i=1; i<=sLen-1; i++) {
1005                 strA[i] = ccMix[i % 3];
1006                 strB[sLen-i] = ccMix[i % 3];
1007             }
1008             strA[sLen]   = 0;
1009             strB[sLen]   = 0;
1010
1011             ucol_setStrength(coll, UCOL_TERTIARY);   /* Do test with default strength, which runs*/
1012             doTest(coll, strA, strB, UCOL_EQUAL);    /*   optimized functions in the impl*/
1013             ucol_setStrength(coll, UCOL_IDENTICAL);   /* Do again with the slow, general impl.*/
1014             doTest(coll, strA, strB, UCOL_EQUAL);
1015         }
1016     }
1017
1018     setTestOption(QUICK_OPTION, myQ);
1019
1020
1021     /*  Test 2:  Non-normal sequence in a string that extends to the last character*/
1022     /*         of the string.  Checks a couple of edge cases.*/
1023
1024     {
1025         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0};
1026         static const UChar strB[] = {0x41, 0xc0, 0x316, 0};
1027         ucol_setStrength(coll, UCOL_TERTIARY);
1028         doTest(coll, strA, strB, UCOL_EQUAL);
1029     }
1030
1031     /*  Test 3:  Non-normal sequence is terminated by a surrogate pair.*/
1032
1033     {
1034       /* New UCA  3.1.1.
1035        * test below used a code point from Desseret, which sorts differently
1036        * than d800 dc00
1037        */
1038         /*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/
1039         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0};
1040         static const UChar strB[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0};
1041         ucol_setStrength(coll, UCOL_TERTIARY);
1042         doTest(coll, strA, strB, UCOL_GREATER);
1043     }
1044
1045     /*  Test 4:  Imbedded nulls do not terminate a string when length is specified.*/
1046
1047     {
1048         static const UChar strA[] = {0x41, 0x00, 0x42, 0x00};
1049         static const UChar strB[] = {0x41, 0x00, 0x00, 0x00};
1050         char  sortKeyA[50];
1051         char  sortKeyAz[50];
1052         char  sortKeyB[50];
1053         char  sortKeyBz[50];
1054         int   r;
1055
1056         /* there used to be -3 here. Hmmmm.... */
1057         /*result = ucol_strcoll(coll, strA, -3, strB, -3);*/
1058         result = ucol_strcoll(coll, strA, 3, strB, 3);
1059         if (result != UCOL_GREATER) {
1060             log_err("ERROR 1 in test 4\n");
1061         }
1062         result = ucol_strcoll(coll, strA, -1, strB, -1);
1063         if (result != UCOL_EQUAL) {
1064             log_err("ERROR 2 in test 4\n");
1065         }
1066
1067         ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1068         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1069         ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1070         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1071
1072         r = strcmp(sortKeyA, sortKeyAz);
1073         if (r <= 0) {
1074             log_err("Error 3 in test 4\n");
1075         }
1076         r = strcmp(sortKeyA, sortKeyB);
1077         if (r <= 0) {
1078             log_err("Error 4 in test 4\n");
1079         }
1080         r = strcmp(sortKeyAz, sortKeyBz);
1081         if (r != 0) {
1082             log_err("Error 5 in test 4\n");
1083         }
1084
1085         ucol_setStrength(coll, UCOL_IDENTICAL);
1086         ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1087         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1088         ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1089         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1090
1091         r = strcmp(sortKeyA, sortKeyAz);
1092         if (r <= 0) {
1093             log_err("Error 6 in test 4\n");
1094         }
1095         r = strcmp(sortKeyA, sortKeyB);
1096         if (r <= 0) {
1097             log_err("Error 7 in test 4\n");
1098         }
1099         r = strcmp(sortKeyAz, sortKeyBz);
1100         if (r != 0) {
1101             log_err("Error 8 in test 4\n");
1102         }
1103         ucol_setStrength(coll, UCOL_TERTIARY);
1104     }
1105
1106
1107     /*  Test 5:  Null characters in non-normal source strings.*/
1108
1109     {
1110         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00};
1111         static const UChar strB[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00};
1112         char  sortKeyA[50];
1113         char  sortKeyAz[50];
1114         char  sortKeyB[50];
1115         char  sortKeyBz[50];
1116         int   r;
1117
1118         result = ucol_strcoll(coll, strA, 6, strB, 6);
1119         if (result != UCOL_GREATER) {
1120             log_err("ERROR 1 in test 5\n");
1121         }
1122         result = ucol_strcoll(coll, strA, -1, strB, -1);
1123         if (result != UCOL_EQUAL) {
1124             log_err("ERROR 2 in test 5\n");
1125         }
1126
1127         ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1128         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1129         ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1130         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1131
1132         r = strcmp(sortKeyA, sortKeyAz);
1133         if (r <= 0) {
1134             log_err("Error 3 in test 5\n");
1135         }
1136         r = strcmp(sortKeyA, sortKeyB);
1137         if (r <= 0) {
1138             log_err("Error 4 in test 5\n");
1139         }
1140         r = strcmp(sortKeyAz, sortKeyBz);
1141         if (r != 0) {
1142             log_err("Error 5 in test 5\n");
1143         }
1144
1145         ucol_setStrength(coll, UCOL_IDENTICAL);
1146         ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1147         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1148         ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1149         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1150
1151         r = strcmp(sortKeyA, sortKeyAz);
1152         if (r <= 0) {
1153             log_err("Error 6 in test 5\n");
1154         }
1155         r = strcmp(sortKeyA, sortKeyB);
1156         if (r <= 0) {
1157             log_err("Error 7 in test 5\n");
1158         }
1159         r = strcmp(sortKeyAz, sortKeyBz);
1160         if (r != 0) {
1161             log_err("Error 8 in test 5\n");
1162         }
1163         ucol_setStrength(coll, UCOL_TERTIARY);
1164     }
1165
1166
1167     /*  Test 6:  Null character as base of a non-normal combining sequence.*/
1168
1169     {
1170         static const UChar strA[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00};
1171         static const UChar strB[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00};
1172
1173         result = ucol_strcoll(coll, strA, 5, strB, 5);
1174         if (result != UCOL_LESS) {
1175             log_err("Error 1 in test 6\n");
1176         }
1177         result = ucol_strcoll(coll, strA, -1, strB, -1);
1178         if (result != UCOL_EQUAL) {
1179             log_err("Error 2 in test 6\n");
1180         }
1181     }
1182
1183     ucol_close(coll);
1184 }
1185
1186
1187
1188 #if 0
1189 static void TestGetCaseBit(void) {
1190   static const char *caseBitData[] = {
1191     "a", "A", "ch", "Ch", "CH",
1192       "\\uFF9E", "\\u0009"
1193   };
1194
1195   static const uint8_t results[] = {
1196     UCOL_LOWER_CASE, UCOL_UPPER_CASE, UCOL_LOWER_CASE, UCOL_MIXED_CASE, UCOL_UPPER_CASE,
1197       UCOL_UPPER_CASE, UCOL_LOWER_CASE
1198   };
1199
1200   uint32_t i, blen = 0;
1201   UChar b[256] = {0};
1202   UErrorCode status = U_ZERO_ERROR;
1203   UCollator *UCA = ucol_open("", &status);
1204   uint8_t res = 0;
1205
1206   for(i = 0; i<UPRV_LENGTHOF(results); i++) {
1207     blen = u_unescape(caseBitData[i], b, 256);
1208     res = ucol_uprv_getCaseBits(UCA, b, blen, &status);
1209     if(results[i] != res) {
1210       log_err("Expected case = %02X, got %02X for %04X\n", results[i], res, b[0]);
1211     }
1212   }
1213 }
1214 #endif
1215
1216 static void TestHangulTailoring(void) {
1217     static const char *koreanData[] = {
1218         "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53ef", "\\u5475",
1219             "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\\u67b7", "\\u67ef",
1220             "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\\u8857", "\\u8888",
1221             "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5",
1222             "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\\u659D", "\\u698E",
1223             "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\\u8B0C"
1224     };
1225
1226     const char *rules =
1227         "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 <<< \\u53ef <<< \\u5475 "
1228         "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef "
1229         "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 "
1230         "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 "
1231         "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E "
1232         "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C";
1233
1234
1235   UErrorCode status = U_ZERO_ERROR;
1236   UChar rlz[2048] = { 0 };
1237   uint32_t rlen = u_unescape(rules, rlz, 2048);
1238
1239   UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
1240   if(status == U_FILE_ACCESS_ERROR) {
1241     log_data_err("Is your data around?\n");
1242     return;
1243   } else if(U_FAILURE(status)) {
1244     log_err("Error opening collator\n");
1245     return;
1246   }
1247
1248   log_verbose("Using start of korean rules\n");
1249
1250   if(U_SUCCESS(status)) {
1251     genericOrderingTest(coll, koreanData, UPRV_LENGTHOF(koreanData));
1252   } else {
1253     log_err("Unable to open collator with rules %s\n", rules);
1254   }
1255
1256   ucol_close(coll);
1257
1258   log_verbose("Using ko__LOTUS locale\n");
1259   genericLocaleStarter("ko__LOTUS", koreanData, UPRV_LENGTHOF(koreanData));
1260 }
1261
1262 /*
1263  * The secondary/tertiary compression middle byte
1264  * as used by the current implementation.
1265  * Subject to change as the sort key compression changes.
1266  * See class CollationKeys.
1267  */
1268 enum {
1269     SEC_COMMON_MIDDLE = 0x25,  /* range 05..45 */
1270     TER_ONLY_COMMON_MIDDLE = 0x65  /* range 05..C5 */
1271 };
1272
1273 static void TestCompressOverlap(void) {
1274     UChar       secstr[150];
1275     UChar       tertstr[150];
1276     UErrorCode  status = U_ZERO_ERROR;
1277     UCollator  *coll;
1278     uint8_t     result[500];
1279     uint32_t    resultlen;
1280     int         count = 0;
1281     uint8_t    *tempptr;
1282
1283     coll = ucol_open("", &status);
1284
1285     if (U_FAILURE(status)) {
1286         log_err_status(status, "Collator can't be created -> %s\n", u_errorName(status));
1287         return;
1288     }
1289     while (count < 149) {
1290         secstr[count] = 0x0020; /* [06, 05, 05] */
1291         tertstr[count] = 0x0020;
1292         count ++;
1293     }
1294
1295     /* top down compression ----------------------------------- */
1296     secstr[count] = 0x0332; /* [, 87, 05] */
1297     tertstr[count] = 0x3000; /* [06, 05, 07] */
1298
1299     /* no compression secstr should have 150 secondary bytes, tertstr should
1300     have 150 tertiary bytes.
1301     with correct compression, secstr should have 6 secondary
1302     bytes (149/33 rounded up + accent), tertstr should have > 2 tertiary bytes */
1303     resultlen = ucol_getSortKey(coll, secstr, 150, result, UPRV_LENGTHOF(result));
1304     (void)resultlen;    /* Suppress set but not used warning. */
1305     tempptr = (uint8_t *)uprv_strchr((char *)result, 1) + 1;
1306     while (*(tempptr + 1) != 1) {
1307         /* the last secondary collation element is not checked since it is not
1308         part of the compression */
1309         if (*tempptr < SEC_COMMON_MIDDLE) {
1310             log_err("Secondary top down compression overlapped\n");
1311         }
1312         tempptr ++;
1313     }
1314
1315     /* tertiary top/bottom/common for en_US is similar to the secondary
1316     top/bottom/common */
1317     resultlen = ucol_getSortKey(coll, tertstr, 150, result, UPRV_LENGTHOF(result));
1318     tempptr = (uint8_t *)uprv_strrchr((char *)result, 1) + 1;
1319     while (*(tempptr + 1) != 0) {
1320         /* the last secondary collation element is not checked since it is not
1321         part of the compression */
1322         if (*tempptr < TER_ONLY_COMMON_MIDDLE) {
1323             log_err("Tertiary top down compression overlapped\n");
1324         }
1325         tempptr ++;
1326     }
1327
1328     /* bottom up compression ------------------------------------- */
1329     secstr[count] = 0;
1330     tertstr[count] = 0;
1331     resultlen = ucol_getSortKey(coll, secstr, 150, result, UPRV_LENGTHOF(result));
1332     tempptr = (uint8_t *)uprv_strchr((char *)result, 1) + 1;
1333     while (*(tempptr + 1) != 1) {
1334         /* the last secondary collation element is not checked since it is not
1335         part of the compression */
1336         if (*tempptr > SEC_COMMON_MIDDLE) {
1337             log_err("Secondary bottom up compression overlapped\n");
1338         }
1339         tempptr ++;
1340     }
1341
1342     /* tertiary top/bottom/common for en_US is similar to the secondary
1343     top/bottom/common */
1344     resultlen = ucol_getSortKey(coll, tertstr, 150, result, UPRV_LENGTHOF(result));
1345     tempptr = (uint8_t *)uprv_strrchr((char *)result, 1) + 1;
1346     while (*(tempptr + 1) != 0) {
1347         /* the last secondary collation element is not checked since it is not
1348         part of the compression */
1349         if (*tempptr > TER_ONLY_COMMON_MIDDLE) {
1350             log_err("Tertiary bottom up compression overlapped\n");
1351         }
1352         tempptr ++;
1353     }
1354
1355     ucol_close(coll);
1356 }
1357
1358 static void TestCyrillicTailoring(void) {
1359   static const char *test[] = {
1360     "\\u0410b",
1361       "\\u0410\\u0306a",
1362       "\\u04d0A"
1363   };
1364
1365     /* Russian overrides contractions, so this test is not valid anymore */
1366     /*genericLocaleStarter("ru", test, 3);*/
1367
1368     // Most of the following are commented out because UCA 8.0
1369     // drops most of the Cyrillic contractions from the default order.
1370     // See CLDR ticket #7246 "root collation: remove Cyrillic contractions".
1371
1372     // genericLocaleStarter("root", test, 3);
1373     // genericRulesStarter("&\\u0410 = \\u0410", test, 3);
1374     // genericRulesStarter("&Z < \\u0410", test, 3);
1375     genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test, 3);
1376     genericRulesStarter("&Z < \\u0410 < \\u04d0", test, 3);
1377     // genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test, 3);
1378     // genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test, 3);
1379 }
1380
1381 static void TestSuppressContractions(void) {
1382
1383   static const char *testNoCont2[] = {
1384       "\\u0410\\u0302a",
1385       "\\u0410\\u0306b",
1386       "\\u0410c"
1387   };
1388   static const char *testNoCont[] = {
1389       "a\\u0410",
1390       "A\\u0410\\u0306",
1391       "\\uFF21\\u0410\\u0302"
1392   };
1393
1394   genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont, 3);
1395   genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2, 3);
1396 }
1397
1398 static void TestContraction(void) {
1399     const static char *testrules[] = {
1400         "&A = AB / B",
1401         "&A = A\\u0306/\\u0306",
1402         "&c = ch / h"
1403     };
1404     const static UChar testdata[][2] = {
1405         {0x0041 /* 'A' */, 0x0042 /* 'B' */},
1406         {0x0041 /* 'A' */, 0x0306 /* combining breve */},
1407         {0x0063 /* 'c' */, 0x0068 /* 'h' */}
1408     };
1409     const static UChar testdata2[][2] = {
1410         {0x0063 /* 'c' */, 0x0067 /* 'g' */},
1411         {0x0063 /* 'c' */, 0x0068 /* 'h' */},
1412         {0x0063 /* 'c' */, 0x006C /* 'l' */}
1413     };
1414 #if 0
1415     /*
1416      * These pairs of rule strings are not guaranteed to yield the very same mappings.
1417      * In fact, LDML 24 recommends an improved way of creating mappings
1418      * which always yields different mappings for such pairs. See
1419      * http://www.unicode.org/reports/tr35/tr35-33/tr35-collation.html#Orderings
1420      */
1421     const static char *testrules3[] = {
1422         "&z < xyz &xyzw << B",
1423         "&z < xyz &xyz << B / w",
1424         "&z < ch &achm << B",
1425         "&z < ch &a << B / chm",
1426         "&\\ud800\\udc00w << B",
1427         "&\\ud800\\udc00 << B / w",
1428         "&a\\ud800\\udc00m << B",
1429         "&a << B / \\ud800\\udc00m",
1430     };
1431 #endif
1432
1433     UErrorCode  status   = U_ZERO_ERROR;
1434     UCollator  *coll;
1435     UChar       rule[256] = {0};
1436     uint32_t    rlen     = 0;
1437     int         i;
1438
1439     for (i = 0; i < UPRV_LENGTHOF(testrules); i ++) {
1440         UCollationElements *iter1;
1441         int j = 0;
1442         log_verbose("Rule %s for testing\n", testrules[i]);
1443         rlen = u_unescape(testrules[i], rule, 32);
1444         coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1445         if (U_FAILURE(status)) {
1446             log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
1447             return;
1448         }
1449         iter1 = ucol_openElements(coll, testdata[i], 2, &status);
1450         if (U_FAILURE(status)) {
1451             log_err("Collation iterator creation failed\n");
1452             return;
1453         }
1454         while (j < 2) {
1455             UCollationElements *iter2 = ucol_openElements(coll,
1456                                                          &(testdata[i][j]),
1457                                                          1, &status);
1458             uint32_t ce;
1459             if (U_FAILURE(status)) {
1460                 log_err("Collation iterator creation failed\n");
1461                 return;
1462             }
1463             ce = ucol_next(iter2, &status);
1464             while (ce != UCOL_NULLORDER) {
1465                 if ((uint32_t)ucol_next(iter1, &status) != ce) {
1466                     log_err("Collation elements in contraction split does not match\n");
1467                     return;
1468                 }
1469                 ce = ucol_next(iter2, &status);
1470             }
1471             j ++;
1472             ucol_closeElements(iter2);
1473         }
1474         if (ucol_next(iter1, &status) != UCOL_NULLORDER) {
1475             log_err("Collation elements not exhausted\n");
1476             return;
1477         }
1478         ucol_closeElements(iter1);
1479         ucol_close(coll);
1480     }
1481
1482     rlen = u_unescape("& a < b < c < ch < d & c = ch / h", rule, 256);
1483     coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1484     if (ucol_strcoll(coll, testdata2[0], 2, testdata2[1], 2) != UCOL_LESS) {
1485         log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
1486                 testdata2[0][0], testdata2[0][1], testdata2[1][0],
1487                 testdata2[1][1]);
1488         return;
1489     }
1490     if (ucol_strcoll(coll, testdata2[1], 2, testdata2[2], 2) != UCOL_LESS) {
1491         log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
1492                 testdata2[1][0], testdata2[1][1], testdata2[2][0],
1493                 testdata2[2][1]);
1494         return;
1495     }
1496     ucol_close(coll);
1497 #if 0  /* see above */
1498     for (i = 0; i < UPRV_LENGTHOF(testrules3); i += 2) {
1499         log_verbose("testrules3 i==%d  \"%s\" vs. \"%s\"\n", i, testrules3[i], testrules3[i + 1]);
1500         UCollator          *coll1,
1501                            *coll2;
1502         UCollationElements *iter1,
1503                            *iter2;
1504         UChar               ch = 0x0042 /* 'B' */;
1505         uint32_t            ce;
1506         rlen = u_unescape(testrules3[i], rule, 32);
1507         coll1 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1508         rlen = u_unescape(testrules3[i + 1], rule, 32);
1509         coll2 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1510         if (U_FAILURE(status)) {
1511             log_err("Collator creation failed %s\n", testrules[i]);
1512             return;
1513         }
1514         iter1 = ucol_openElements(coll1, &ch, 1, &status);
1515         iter2 = ucol_openElements(coll2, &ch, 1, &status);
1516         if (U_FAILURE(status)) {
1517             log_err("Collation iterator creation failed\n");
1518             return;
1519         }
1520         ce = ucol_next(iter1, &status);
1521         if (U_FAILURE(status)) {
1522             log_err("Retrieving ces failed\n");
1523             return;
1524         }
1525         while (ce != UCOL_NULLORDER) {
1526             uint32_t ce2 = (uint32_t)ucol_next(iter2, &status);
1527             if (ce == ce2) {
1528                 log_verbose("CEs match: %08x\n", ce);
1529             } else {
1530                 log_err("CEs do not match: %08x vs. %08x\n", ce, ce2);
1531                 return;
1532             }
1533             ce = ucol_next(iter1, &status);
1534             if (U_FAILURE(status)) {
1535                 log_err("Retrieving ces failed\n");
1536                 return;
1537             }
1538         }
1539         if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
1540             log_err("CEs not exhausted\n");
1541             return;
1542         }
1543         ucol_closeElements(iter1);
1544         ucol_closeElements(iter2);
1545         ucol_close(coll1);
1546         ucol_close(coll2);
1547     }
1548 #endif
1549 }
1550
1551 static void TestExpansion(void) {
1552     const static char *testrules[] = {
1553 #if 0
1554         /*
1555          * This seems to have tested that M was not mapped to an expansion.
1556          * I believe the old builder just did that because it computed the extension CEs
1557          * at the very end, which was a bug.
1558          * Among other problems, it violated the core tailoring principle
1559          * by making an earlier rule depend on a later one.
1560          * And, of course, if M did not get an expansion, then it was primary different from K,
1561          * unlike what the rule &K<<M says.
1562          */
1563         "&J << K / B & K << M",
1564 #endif
1565         "&J << K / B << M"
1566     };
1567     const static UChar testdata[][3] = {
1568         {0x004A /*'J'*/, 0x0041 /*'A'*/, 0},
1569         {0x004D /*'M'*/, 0x0041 /*'A'*/, 0},
1570         {0x004B /*'K'*/, 0x0041 /*'A'*/, 0},
1571         {0x004B /*'K'*/, 0x0043 /*'C'*/, 0},
1572         {0x004A /*'J'*/, 0x0043 /*'C'*/, 0},
1573         {0x004D /*'M'*/, 0x0043 /*'C'*/, 0}
1574     };
1575
1576     UErrorCode  status   = U_ZERO_ERROR;
1577     UCollator  *coll;
1578     UChar       rule[256] = {0};
1579     uint32_t    rlen     = 0;
1580     int         i;
1581
1582     for (i = 0; i < UPRV_LENGTHOF(testrules); i ++) {
1583         int j = 0;
1584         log_verbose("Rule %s for testing\n", testrules[i]);
1585         rlen = u_unescape(testrules[i], rule, 32);
1586         coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1587         if (U_FAILURE(status)) {
1588             log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
1589             return;
1590         }
1591
1592         for (j = 0; j < 5; j ++) {
1593             doTest(coll, testdata[j], testdata[j + 1], UCOL_LESS);
1594         }
1595         ucol_close(coll);
1596     }
1597 }
1598
1599 #if 0
1600 /* this test tests the current limitations of the engine */
1601 /* it always fail, so it is disabled by default */
1602 static void TestLimitations(void) {
1603   /* recursive expansions */
1604   {
1605     static const char *rule = "&a=b/c&d=c/e";
1606     static const char *tlimit01[] = {"add","b","adf"};
1607     static const char *tlimit02[] = {"aa","b","af"};
1608     log_verbose("recursive expansions\n");
1609     genericRulesStarter(rule, tlimit01, UPRV_LENGTHOF(tlimit01));
1610     genericRulesStarter(rule, tlimit02, UPRV_LENGTHOF(tlimit02));
1611   }
1612   /* contractions spanning expansions */
1613   {
1614     static const char *rule = "&a<<<c/e&g<<<eh";
1615     static const char *tlimit01[] = {"ad","c","af","f","ch","h"};
1616     static const char *tlimit02[] = {"ad","c","ch","af","f","h"};
1617     log_verbose("contractions spanning expansions\n");
1618     genericRulesStarter(rule, tlimit01, UPRV_LENGTHOF(tlimit01));
1619     genericRulesStarter(rule, tlimit02, UPRV_LENGTHOF(tlimit02));
1620   }
1621   /* normalization: nulls in contractions */
1622   {
1623     static const char *rule = "&a<<<\\u0000\\u0302";
1624     static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
1625     static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
1626     static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
1627     static const UColAttributeValue valOn[] = { UCOL_ON };
1628     static const UColAttributeValue valOff[] = { UCOL_OFF };
1629
1630     log_verbose("NULL in contractions\n");
1631     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
1632     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
1633     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
1634     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
1635
1636   }
1637   /* normalization: contractions spanning normalization */
1638   {
1639     static const char *rule = "&a<<<\\u0000\\u0302";
1640     static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
1641     static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
1642     static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
1643     static const UColAttributeValue valOn[] = { UCOL_ON };
1644     static const UColAttributeValue valOff[] = { UCOL_OFF };
1645
1646     log_verbose("contractions spanning normalization\n");
1647     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
1648     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
1649     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
1650     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
1651
1652   }
1653   /* variable top:  */
1654   {
1655     /*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/
1656     static const char *rule = "&\\u2010<x<[variable top]=z";
1657     /*static const char *rule3 = "&' '<x<[variable top]=z";*/
1658     static const char *tlimit01[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" };
1659     static const char *tlimit02[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"};
1660     static const char *tlimit03[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" };
1661     static const UColAttribute att[] = { UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH };
1662     static const UColAttributeValue valOn[] = { UCOL_SHIFTED, UCOL_QUATERNARY };
1663     static const UColAttributeValue valOff[] = { UCOL_NON_IGNORABLE, UCOL_TERTIARY };
1664
1665     log_verbose("variable top\n");
1666     genericRulesStarterWithOptions(rule, tlimit03, UPRV_LENGTHOF(tlimit03), att, valOn, UPRV_LENGTHOF(att));
1667     genericRulesStarterWithOptions(rule, tlimit01, UPRV_LENGTHOF(tlimit01), att, valOn, UPRV_LENGTHOF(att));
1668     genericRulesStarterWithOptions(rule, tlimit02, UPRV_LENGTHOF(tlimit02), att, valOn, UPRV_LENGTHOF(att));
1669     genericRulesStarterWithOptions(rule, tlimit01, UPRV_LENGTHOF(tlimit01), att, valOff, UPRV_LENGTHOF(att));
1670     genericRulesStarterWithOptions(rule, tlimit02, UPRV_LENGTHOF(tlimit02), att, valOff, UPRV_LENGTHOF(att));
1671
1672   }
1673   /* case level */
1674   {
1675     static const char *rule = "&c<ch<<<cH<<<Ch<<<CH";
1676     static const char *tlimit01[] = {"c","CH","Ch","cH","ch"};
1677     static const char *tlimit02[] = {"c","CH","cH","Ch","ch"};
1678     static const UColAttribute att[] = { UCOL_CASE_FIRST};
1679     static const UColAttributeValue valOn[] = { UCOL_UPPER_FIRST};
1680     /*static const UColAttributeValue valOff[] = { UCOL_OFF};*/
1681     log_verbose("case level\n");
1682     genericRulesStarterWithOptions(rule, tlimit01, UPRV_LENGTHOF(tlimit01), att, valOn, UPRV_LENGTHOF(att));
1683     genericRulesStarterWithOptions(rule, tlimit02, UPRV_LENGTHOF(tlimit02), att, valOn, UPRV_LENGTHOF(att));
1684     /*genericRulesStarterWithOptions(rule, tlimit01, UPRV_LENGTHOF(tlimit01), att, valOff, UPRV_LENGTHOF(att));*/
1685     /*genericRulesStarterWithOptions(rule, tlimit02, UPRV_LENGTHOF(tlimit02), att, valOff, UPRV_LENGTHOF(att));*/
1686   }
1687
1688 }
1689 #endif
1690
1691 static void TestBocsuCoverage(void) {
1692   UErrorCode status = U_ZERO_ERROR;
1693   const char *testString = "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u0041";
1694   UChar       test[256] = {0};
1695   uint32_t    tlen     = u_unescape(testString, test, 32);
1696   uint8_t key[256]     = {0};
1697   uint32_t klen         = 0;
1698
1699   UCollator *coll = ucol_open("", &status);
1700   if(U_SUCCESS(status)) {
1701   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
1702
1703   klen = ucol_getSortKey(coll, test, tlen, key, 256);
1704   (void)klen;    /* Suppress set but not used warning. */
1705
1706   ucol_close(coll);
1707   } else {
1708     log_data_err("Couldn't open UCA\n");
1709   }
1710 }
1711
1712 static void TestVariableTopSetting(void) {
1713   UErrorCode status = U_ZERO_ERROR;
1714   uint32_t varTopOriginal = 0, varTop1, varTop2;
1715   UCollator *coll = ucol_open("", &status);
1716   if(U_SUCCESS(status)) {
1717
1718   static const UChar nul = 0;
1719   static const UChar space = 0x20;
1720   static const UChar dot = 0x2e;  /* punctuation */
1721   static const UChar degree = 0xb0;  /* symbol */
1722   static const UChar dollar = 0x24;  /* currency symbol */
1723   static const UChar zero = 0x30;  /* digit */
1724
1725   varTopOriginal = ucol_getVariableTop(coll, &status);
1726   log_verbose("ucol_getVariableTop(root) -> %08x\n", varTopOriginal);
1727   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
1728
1729   varTop1 = ucol_setVariableTop(coll, &space, 1, &status);
1730   varTop2 = ucol_getVariableTop(coll, &status);
1731   log_verbose("ucol_setVariableTop(space) -> %08x\n", varTop1);
1732   if(U_FAILURE(status) || varTop1 != varTop2 ||
1733       !ucol_equal(coll, &nul, 0, &space, 1) ||
1734       ucol_equal(coll, &nul, 0, &dot, 1) ||
1735       ucol_equal(coll, &nul, 0, &degree, 1) ||
1736       ucol_equal(coll, &nul, 0, &dollar, 1) ||
1737       ucol_equal(coll, &nul, 0, &zero, 1) ||
1738       ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) {
1739     log_err("ucol_setVariableTop(space) did not work - %s\n", u_errorName(status));
1740   }
1741
1742   varTop1 = ucol_setVariableTop(coll, &dot, 1, &status);
1743   varTop2 = ucol_getVariableTop(coll, &status);
1744   log_verbose("ucol_setVariableTop(dot) -> %08x\n", varTop1);
1745   if(U_FAILURE(status) || varTop1 != varTop2 ||
1746       !ucol_equal(coll, &nul, 0, &space, 1) ||
1747       !ucol_equal(coll, &nul, 0, &dot, 1) ||
1748       ucol_equal(coll, &nul, 0, &degree, 1) ||
1749       ucol_equal(coll, &nul, 0, &dollar, 1) ||
1750       ucol_equal(coll, &nul, 0, &zero, 1) ||
1751       ucol_greaterOrEqual(coll, &dot, 1, &degree, 1)) {
1752     log_err("ucol_setVariableTop(dot) did not work - %s\n", u_errorName(status));
1753   }
1754
1755   varTop1 = ucol_setVariableTop(coll, &degree, 1, &status);
1756   varTop2 = ucol_getVariableTop(coll, &status);
1757   log_verbose("ucol_setVariableTop(degree) -> %08x\n", varTop1);
1758   if(U_FAILURE(status) || varTop1 != varTop2 ||
1759       !ucol_equal(coll, &nul, 0, &space, 1) ||
1760       !ucol_equal(coll, &nul, 0, &dot, 1) ||
1761       !ucol_equal(coll, &nul, 0, &degree, 1) ||
1762       ucol_equal(coll, &nul, 0, &dollar, 1) ||
1763       ucol_equal(coll, &nul, 0, &zero, 1) ||
1764       ucol_greaterOrEqual(coll, &degree, 1, &dollar, 1)) {
1765     log_err("ucol_setVariableTop(degree) did not work - %s\n", u_errorName(status));
1766   }
1767
1768   varTop1 = ucol_setVariableTop(coll, &dollar, 1, &status);
1769   varTop2 = ucol_getVariableTop(coll, &status);
1770   log_verbose("ucol_setVariableTop(dollar) -> %08x\n", varTop1);
1771   if(U_FAILURE(status) || varTop1 != varTop2 ||
1772       !ucol_equal(coll, &nul, 0, &space, 1) ||
1773       !ucol_equal(coll, &nul, 0, &dot, 1) ||
1774       !ucol_equal(coll, &nul, 0, &degree, 1) ||
1775       !ucol_equal(coll, &nul, 0, &dollar, 1) ||
1776       ucol_equal(coll, &nul, 0, &zero, 1) ||
1777       ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) {
1778     log_err("ucol_setVariableTop(dollar) did not work - %s\n", u_errorName(status));
1779   }
1780
1781   log_verbose("Testing setting variable top to contractions\n");
1782   {
1783     UChar first[4] = { 0 };
1784     first[0] = 0x0040;
1785     first[1] = 0x0050;
1786     first[2] = 0x0000;
1787
1788     status = U_ZERO_ERROR;
1789     ucol_setVariableTop(coll, first, -1, &status);
1790
1791     if(U_SUCCESS(status)) {
1792       log_err("Invalid contraction succeded in setting variable top!\n");
1793     }
1794
1795   }
1796
1797   log_verbose("Test restoring variable top\n");
1798
1799   status = U_ZERO_ERROR;
1800   ucol_restoreVariableTop(coll, varTopOriginal, &status);
1801   if(varTopOriginal != ucol_getVariableTop(coll, &status)) {
1802     log_err("Couldn't restore old variable top\n");
1803   }
1804
1805   log_verbose("Testing calling with error set\n");
1806
1807   status = U_INTERNAL_PROGRAM_ERROR;
1808   varTop1 = ucol_setVariableTop(coll, &space, 1, &status);
1809   varTop2 = ucol_getVariableTop(coll, &status);
1810   ucol_restoreVariableTop(coll, varTop2, &status);
1811   varTop1 = ucol_setVariableTop(NULL, &dot, 1, &status);
1812   varTop2 = ucol_getVariableTop(NULL, &status);
1813   ucol_restoreVariableTop(NULL, varTop2, &status);
1814   if(status != U_INTERNAL_PROGRAM_ERROR) {
1815     log_err("Bad reaction to passed error!\n");
1816   }
1817   ucol_close(coll);
1818   } else {
1819     log_data_err("Couldn't open UCA collator\n");
1820   }
1821 }
1822
1823 static void TestMaxVariable() {
1824   UErrorCode status = U_ZERO_ERROR;
1825   UColReorderCode oldMax, max;
1826   UCollator *coll;
1827
1828   static const UChar nul = 0;
1829   static const UChar space = 0x20;
1830   static const UChar dot = 0x2e;  /* punctuation */
1831   static const UChar degree = 0xb0;  /* symbol */
1832   static const UChar dollar = 0x24;  /* currency symbol */
1833   static const UChar zero = 0x30;  /* digit */
1834
1835   coll = ucol_open("", &status);
1836   if(U_FAILURE(status)) {
1837     log_data_err("Couldn't open root collator\n");
1838     return;
1839   }
1840
1841   oldMax = ucol_getMaxVariable(coll);
1842   log_verbose("ucol_getMaxVariable(root) -> %04x\n", oldMax);
1843   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
1844
1845   ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status);
1846   max = ucol_getMaxVariable(coll);
1847   log_verbose("ucol_setMaxVariable(space) -> %04x\n", max);
1848   if(U_FAILURE(status) || max != UCOL_REORDER_CODE_SPACE ||
1849       !ucol_equal(coll, &nul, 0, &space, 1) ||
1850       ucol_equal(coll, &nul, 0, &dot, 1) ||
1851       ucol_equal(coll, &nul, 0, &degree, 1) ||
1852       ucol_equal(coll, &nul, 0, &dollar, 1) ||
1853       ucol_equal(coll, &nul, 0, &zero, 1) ||
1854       ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) {
1855     log_err("ucol_setMaxVariable(space) did not work - %s\n", u_errorName(status));
1856   }
1857
1858   ucol_setMaxVariable(coll, UCOL_REORDER_CODE_PUNCTUATION, &status);
1859   max = ucol_getMaxVariable(coll);
1860   log_verbose("ucol_setMaxVariable(punctuation) -> %04x\n", max);
1861   if(U_FAILURE(status) || max != UCOL_REORDER_CODE_PUNCTUATION ||
1862       !ucol_equal(coll, &nul, 0, &space, 1) ||
1863       !ucol_equal(coll, &nul, 0, &dot, 1) ||
1864       ucol_equal(coll, &nul, 0, &degree, 1) ||
1865       ucol_equal(coll, &nul, 0, &dollar, 1) ||
1866       ucol_equal(coll, &nul, 0, &zero, 1) ||
1867       ucol_greaterOrEqual(coll, &dot, 1, &degree, 1)) {
1868     log_err("ucol_setMaxVariable(punctuation) did not work - %s\n", u_errorName(status));
1869   }
1870
1871   ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SYMBOL, &status);
1872   max = ucol_getMaxVariable(coll);
1873   log_verbose("ucol_setMaxVariable(symbol) -> %04x\n", max);
1874   if(U_FAILURE(status) || max != UCOL_REORDER_CODE_SYMBOL ||
1875       !ucol_equal(coll, &nul, 0, &space, 1) ||
1876       !ucol_equal(coll, &nul, 0, &dot, 1) ||
1877       !ucol_equal(coll, &nul, 0, &degree, 1) ||
1878       ucol_equal(coll, &nul, 0, &dollar, 1) ||
1879       ucol_equal(coll, &nul, 0, &zero, 1) ||
1880       ucol_greaterOrEqual(coll, &degree, 1, &dollar, 1)) {
1881     log_err("ucol_setMaxVariable(symbol) did not work - %s\n", u_errorName(status));
1882   }
1883
1884   ucol_setMaxVariable(coll, UCOL_REORDER_CODE_CURRENCY, &status);
1885   max = ucol_getMaxVariable(coll);
1886   log_verbose("ucol_setMaxVariable(currency) -> %04x\n", max);
1887   if(U_FAILURE(status) || max != UCOL_REORDER_CODE_CURRENCY ||
1888       !ucol_equal(coll, &nul, 0, &space, 1) ||
1889       !ucol_equal(coll, &nul, 0, &dot, 1) ||
1890       !ucol_equal(coll, &nul, 0, &degree, 1) ||
1891       !ucol_equal(coll, &nul, 0, &dollar, 1) ||
1892       ucol_equal(coll, &nul, 0, &zero, 1) ||
1893       ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) {
1894     log_err("ucol_setMaxVariable(currency) did not work - %s\n", u_errorName(status));
1895   }
1896
1897   log_verbose("Test restoring maxVariable\n");
1898   status = U_ZERO_ERROR;
1899   ucol_setMaxVariable(coll, oldMax, &status);
1900   if(oldMax != ucol_getMaxVariable(coll)) {
1901     log_err("Couldn't restore old maxVariable\n");
1902   }
1903
1904   log_verbose("Testing calling with error set\n");
1905   status = U_INTERNAL_PROGRAM_ERROR;
1906   ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status);
1907   max = ucol_getMaxVariable(coll);
1908   if(max != oldMax || status != U_INTERNAL_PROGRAM_ERROR) {
1909     log_err("Bad reaction to passed error!\n");
1910   }
1911   ucol_close(coll);
1912 }
1913
1914 static void TestNonChars(void) {
1915   static const char *test[] = {
1916       "\\u0000",  /* ignorable */
1917       "\\uFFFE",  /* special merge-sort character with minimum non-ignorable weights */
1918       "\\uFDD0", "\\uFDEF",
1919       "\\U0001FFFE", "\\U0001FFFF",  /* UCA 6.0: noncharacters are treated like unassigned, */
1920       "\\U0002FFFE", "\\U0002FFFF",  /* not like ignorable. */
1921       "\\U0003FFFE", "\\U0003FFFF",
1922       "\\U0004FFFE", "\\U0004FFFF",
1923       "\\U0005FFFE", "\\U0005FFFF",
1924       "\\U0006FFFE", "\\U0006FFFF",
1925       "\\U0007FFFE", "\\U0007FFFF",
1926       "\\U0008FFFE", "\\U0008FFFF",
1927       "\\U0009FFFE", "\\U0009FFFF",
1928       "\\U000AFFFE", "\\U000AFFFF",
1929       "\\U000BFFFE", "\\U000BFFFF",
1930       "\\U000CFFFE", "\\U000CFFFF",
1931       "\\U000DFFFE", "\\U000DFFFF",
1932       "\\U000EFFFE", "\\U000EFFFF",
1933       "\\U000FFFFE", "\\U000FFFFF",
1934       "\\U0010FFFE", "\\U0010FFFF",
1935       "\\uFFFF"  /* special character with maximum primary weight */
1936   };
1937   UErrorCode status = U_ZERO_ERROR;
1938   UCollator *coll = ucol_open("en_US", &status);
1939
1940   log_verbose("Test non characters\n");
1941
1942   if(U_SUCCESS(status)) {
1943     genericOrderingTestWithResult(coll, test, 35, UCOL_LESS);
1944   } else {
1945     log_err_status(status, "Unable to open collator\n");
1946   }
1947
1948   ucol_close(coll);
1949 }
1950
1951 static void TestExtremeCompression(void) {
1952   static char *test[4];
1953   int32_t j = 0, i = 0;
1954
1955   for(i = 0; i<4; i++) {
1956     test[i] = (char *)malloc(2048*sizeof(char));
1957   }
1958
1959   for(j = 20; j < 500; j++) {
1960     for(i = 0; i<4; i++) {
1961       uprv_memset(test[i], 'a', (j-1)*sizeof(char));
1962       test[i][j-1] = (char)('a'+i);
1963       test[i][j] = 0;
1964     }
1965     genericLocaleStarter("en_US", (const char **)test, 4);
1966   }
1967
1968
1969   for(i = 0; i<4; i++) {
1970     free(test[i]);
1971   }
1972 }
1973
1974 #if 0
1975 static void TestExtremeCompression(void) {
1976   static char *test[4];
1977   int32_t j = 0, i = 0;
1978   UErrorCode status = U_ZERO_ERROR;
1979   UCollator *coll = ucol_open("en_US", status);
1980   for(i = 0; i<4; i++) {
1981     test[i] = (char *)malloc(2048*sizeof(char));
1982   }
1983   for(j = 10; j < 2048; j++) {
1984     for(i = 0; i<4; i++) {
1985       uprv_memset(test[i], 'a', (j-2)*sizeof(char));
1986       test[i][j-1] = (char)('a'+i);
1987       test[i][j] = 0;
1988     }
1989   }
1990   genericLocaleStarter("en_US", (const char **)test, 4);
1991
1992   for(j = 10; j < 2048; j++) {
1993     for(i = 0; i<1; i++) {
1994       uprv_memset(test[i], 'a', (j-1)*sizeof(char));
1995       test[i][j] = 0;
1996     }
1997   }
1998   for(i = 0; i<4; i++) {
1999     free(test[i]);
2000   }
2001 }
2002 #endif
2003
2004 static void TestSurrogates(void) {
2005   static const char *test[] = {
2006     "z","\\ud900\\udc25",  "\\ud805\\udc50",
2007        "\\ud800\\udc00y",  "\\ud800\\udc00r",
2008        "\\ud800\\udc00f",  "\\ud800\\udc00",
2009        "\\ud800\\udc00c", "\\ud800\\udc00b",
2010        "\\ud800\\udc00fa", "\\ud800\\udc00fb",
2011        "\\ud800\\udc00a",
2012        "c", "b"
2013   };
2014
2015   static const char *rule =
2016     "&z < \\ud900\\udc25   < \\ud805\\udc50"
2017        "< \\ud800\\udc00y  < \\ud800\\udc00r"
2018        "< \\ud800\\udc00f  << \\ud800\\udc00"
2019        "< \\ud800\\udc00fa << \\ud800\\udc00fb"
2020        "< \\ud800\\udc00a  < c < b" ;
2021
2022   genericRulesStarter(rule, test, 14);
2023 }
2024
2025 /* This is a test for prefix implementation, used by JIS X 4061 collation rules */
2026 static void TestPrefix(void) {
2027   uint32_t i;
2028
2029   static const struct {
2030     const char *rules;
2031     const char *data[50];
2032     const uint32_t len;
2033   } tests[] = {
2034     { "&z <<< z|a",
2035       {"zz", "za"}, 2 },
2036
2037     { "&z <<< z|   a",
2038       {"zz", "za"}, 2 },
2039     { "[strength I]"
2040       "&a=\\ud900\\udc25"
2041       "&z<<<\\ud900\\udc25|a",
2042       {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 },
2043   };
2044
2045
2046   for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
2047     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2048   }
2049 }
2050
2051 /* This test uses data suplied by Masashiko Maedera to test the implementation */
2052 /* JIS X 4061 collation order implementation                                   */
2053 static void TestNewJapanese(void) {
2054
2055   static const char * const test1[] = {
2056       "\\u30b7\\u30e3\\u30fc\\u30ec",
2057       "\\u30b7\\u30e3\\u30a4",
2058       "\\u30b7\\u30e4\\u30a3",
2059       "\\u30b7\\u30e3\\u30ec",
2060       "\\u3061\\u3087\\u3053",
2061       "\\u3061\\u3088\\u3053",
2062       "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8",
2063       "\\u3066\\u30fc\\u305f",
2064       "\\u30c6\\u30fc\\u30bf",
2065       "\\u30c6\\u30a7\\u30bf",
2066       "\\u3066\\u3048\\u305f",
2067       "\\u3067\\u30fc\\u305f",
2068       "\\u30c7\\u30fc\\u30bf",
2069       "\\u30c7\\u30a7\\u30bf",
2070       "\\u3067\\u3048\\u305f",
2071       "\\u3066\\u30fc\\u305f\\u30fc",
2072       "\\u30c6\\u30fc\\u30bf\\u30a1",
2073       "\\u30c6\\u30a7\\u30bf\\u30fc",
2074       "\\u3066\\u3047\\u305f\\u3041",
2075       "\\u3066\\u3048\\u305f\\u30fc",
2076       "\\u3067\\u30fc\\u305f\\u30fc",
2077       "\\u30c7\\u30fc\\u30bf\\u30a1",
2078       "\\u3067\\u30a7\\u305f\\u30a1",
2079       "\\u30c7\\u3047\\u30bf\\u3041",
2080       "\\u30c7\\u30a8\\u30bf\\u30a2",
2081       "\\u3072\\u3086",
2082       "\\u3073\\u3085\\u3042",
2083       "\\u3074\\u3085\\u3042",
2084       "\\u3073\\u3085\\u3042\\u30fc",
2085       "\\u30d3\\u30e5\\u30a2\\u30fc",
2086       "\\u3074\\u3085\\u3042\\u30fc",
2087       "\\u30d4\\u30e5\\u30a2\\u30fc",
2088       "\\u30d2\\u30e5\\u30a6",
2089       "\\u30d2\\u30e6\\u30a6",
2090       "\\u30d4\\u30e5\\u30a6\\u30a2",
2091       "\\u3073\\u3085\\u30fc\\u3042\\u30fc",
2092       "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc",
2093       "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc",
2094       "\\u3072\\u3085\\u3093",
2095       "\\u3074\\u3085\\u3093",
2096       "\\u3075\\u30fc\\u308a",
2097       "\\u30d5\\u30fc\\u30ea",
2098       "\\u3075\\u3045\\u308a",
2099       "\\u3075\\u30a5\\u308a",
2100       "\\u3075\\u30a5\\u30ea",
2101       "\\u30d5\\u30a6\\u30ea",
2102       "\\u3076\\u30fc\\u308a",
2103       "\\u30d6\\u30fc\\u30ea",
2104       "\\u3076\\u3045\\u308a",
2105       "\\u30d6\\u30a5\\u308a",
2106       "\\u3077\\u3046\\u308a",
2107       "\\u30d7\\u30a6\\u30ea",
2108       "\\u3075\\u30fc\\u308a\\u30fc",
2109       "\\u30d5\\u30a5\\u30ea\\u30fc",
2110       "\\u3075\\u30a5\\u308a\\u30a3",
2111       "\\u30d5\\u3045\\u308a\\u3043",
2112       "\\u30d5\\u30a6\\u30ea\\u30fc",
2113       "\\u3075\\u3046\\u308a\\u3043",
2114       "\\u30d6\\u30a6\\u30ea\\u30a4",
2115       "\\u3077\\u30fc\\u308a\\u30fc",
2116       "\\u3077\\u30a5\\u308a\\u30a4",
2117       "\\u3077\\u3046\\u308a\\u30fc",
2118       "\\u30d7\\u30a6\\u30ea\\u30a4",
2119       "\\u30d5\\u30fd",
2120       "\\u3075\\u309e",
2121       "\\u3076\\u309d",
2122       "\\u3076\\u3075",
2123       "\\u3076\\u30d5",
2124       "\\u30d6\\u3075",
2125       "\\u30d6\\u30d5",
2126       "\\u3076\\u309e",
2127       "\\u3076\\u3077",
2128       "\\u30d6\\u3077",
2129       "\\u3077\\u309d",
2130       "\\u30d7\\u30fd",
2131       "\\u3077\\u3075",
2132 };
2133
2134   static const char *test2[] = {
2135     "\\u306f\\u309d", /* H\\u309d */
2136     "\\u30cf\\u30fd", /* K\\u30fd */
2137     "\\u306f\\u306f", /* HH */
2138     "\\u306f\\u30cf", /* HK */
2139     "\\u30cf\\u30cf", /* KK */
2140     "\\u306f\\u309e", /* H\\u309e */
2141     "\\u30cf\\u30fe", /* K\\u30fe */
2142     "\\u306f\\u3070", /* HH\\u309b */
2143     "\\u30cf\\u30d0", /* KK\\u309b */
2144     "\\u306f\\u3071", /* HH\\u309c */
2145     "\\u30cf\\u3071", /* KH\\u309c */
2146     "\\u30cf\\u30d1", /* KK\\u309c */
2147     "\\u3070\\u309d", /* H\\u309b\\u309d */
2148     "\\u30d0\\u30fd", /* K\\u309b\\u30fd */
2149     "\\u3070\\u306f", /* H\\u309bH */
2150     "\\u30d0\\u30cf", /* K\\u309bK */
2151     "\\u3070\\u309e", /* H\\u309b\\u309e */
2152     "\\u30d0\\u30fe", /* K\\u309b\\u30fe */
2153     "\\u3070\\u3070", /* H\\u309bH\\u309b */
2154     "\\u30d0\\u3070", /* K\\u309bH\\u309b */
2155     "\\u30d0\\u30d0", /* K\\u309bK\\u309b */
2156     "\\u3070\\u3071", /* H\\u309bH\\u309c */
2157     "\\u30d0\\u30d1", /* K\\u309bK\\u309c */
2158     "\\u3071\\u309d", /* H\\u309c\\u309d */
2159     "\\u30d1\\u30fd", /* K\\u309c\\u30fd */
2160     "\\u3071\\u306f", /* H\\u309cH */
2161     "\\u30d1\\u30cf", /* K\\u309cK */
2162     "\\u3071\\u3070", /* H\\u309cH\\u309b */
2163     "\\u3071\\u30d0", /* H\\u309cK\\u309b */
2164     "\\u30d1\\u30d0", /* K\\u309cK\\u309b */
2165     "\\u3071\\u3071", /* H\\u309cH\\u309c */
2166     "\\u30d1\\u30d1", /* K\\u309cK\\u309c */
2167   };
2168   /*
2169   static const char *test3[] = {
2170     "\\u221er\\u221e",
2171     "\\u221eR#",
2172     "\\u221et\\u221e",
2173     "#r\\u221e",
2174     "#R#",
2175     "#t%",
2176     "#T%",
2177     "8t\\u221e",
2178     "8T\\u221e",
2179     "8t#",
2180     "8T#",
2181     "8t%",
2182     "8T%",
2183     "8t8",
2184     "8T8",
2185     "\\u03c9r\\u221e",
2186     "\\u03a9R%",
2187     "rr\\u221e",
2188     "rR\\u221e",
2189     "Rr\\u221e",
2190     "RR\\u221e",
2191     "RT%",
2192     "rt8",
2193     "tr\\u221e",
2194     "tr8",
2195     "TR8",
2196     "tt8",
2197     "\\u30b7\\u30e3\\u30fc\\u30ec",
2198   };
2199   */
2200   static const UColAttribute att[] = { UCOL_STRENGTH };
2201   static const UColAttributeValue val[] = { UCOL_QUATERNARY };
2202
2203   static const UColAttribute attShifted[] = { UCOL_STRENGTH, UCOL_ALTERNATE_HANDLING};
2204   static const UColAttributeValue valShifted[] = { UCOL_QUATERNARY, UCOL_SHIFTED };
2205
2206   genericLocaleStarterWithOptions("ja", test1, UPRV_LENGTHOF(test1), att, val, 1);
2207   genericLocaleStarterWithOptions("ja", test2, UPRV_LENGTHOF(test2), att, val, 1);
2208   /*genericLocaleStarter("ja", test3, UPRV_LENGTHOF(test3));*/
2209   genericLocaleStarterWithOptions("ja", test1, UPRV_LENGTHOF(test1), attShifted, valShifted, 2);
2210   genericLocaleStarterWithOptions("ja", test2, UPRV_LENGTHOF(test2), attShifted, valShifted, 2);
2211 }
2212
2213 static void TestStrCollIdenticalPrefix(void) {
2214   const char* rule = "&\\ud9b0\\udc70=\\ud9b0\\udc71";
2215   const char* test[] = {
2216     "ab\\ud9b0\\udc70",
2217     "ab\\ud9b0\\udc71"
2218   };
2219   genericRulesStarterWithResult(rule, test, UPRV_LENGTHOF(test), UCOL_EQUAL);
2220 }
2221 /* Contractions should have all their canonically equivalent */
2222 /* strings included */
2223 static void TestContractionClosure(void) {
2224   static const struct {
2225     const char *rules;
2226     const char *data[10];
2227     const uint32_t len;
2228   } tests[] = {
2229     {   "&b=\\u00e4\\u00e4",
2230       { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\\u00e4" }, 5},
2231     {   "&b=\\u00C5",
2232       { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4},
2233   };
2234   uint32_t i;
2235
2236
2237   for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
2238     genericRulesStarterWithResult(tests[i].rules, tests[i].data, tests[i].len, UCOL_EQUAL);
2239   }
2240 }
2241
2242 /* This tests also fails*/
2243 static void TestBeforePrefixFailure(void) {
2244   static const struct {
2245     const char *rules;
2246     const char *data[10];
2247     const uint32_t len;
2248   } tests[] = {
2249     { "&g <<< a"
2250       "&[before 3]\\uff41 <<< x",
2251       {"x", "\\uff41"}, 2 },
2252     {   "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2253         "&\\u30A8=\\u30A8=\\u3048=\\uff74"
2254         "&[before 3]\\u30a7<<<\\u30a9",
2255       {"\\u30a9", "\\u30a7"}, 2 },
2256     {   "&[before 3]\\u30a7<<<\\u30a9"
2257         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2258         "&\\u30A8=\\u30A8=\\u3048=\\uff74",
2259       {"\\u30a9", "\\u30a7"}, 2 },
2260   };
2261   uint32_t i;
2262
2263
2264   for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
2265     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2266   }
2267
2268 #if 0
2269   const char* rule1 =
2270         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2271         "&\\u30A8=\\u30A8=\\u3048=\\uff74"
2272         "&[before 3]\\u30a7<<<\\u30c6|\\u30fc";
2273   const char* rule2 =
2274         "&[before 3]\\u30a7<<<\\u30c6|\\u30fc"
2275         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2276         "&\\u30A8=\\u30A8=\\u3048=\\uff74";
2277   const char* test[] = {
2278       "\\u30c6\\u30fc\\u30bf",
2279       "\\u30c6\\u30a7\\u30bf",
2280   };
2281   genericRulesStarter(rule1, test, UPRV_LENGTHOF(test));
2282   genericRulesStarter(rule2, test, UPRV_LENGTHOF(test));
2283 /* this piece of code should be in some sort of verbose mode     */
2284 /* it gets the collation elements for elements and prints them   */
2285 /* This is useful when trying to see whether the problem is      */
2286   {
2287     UErrorCode status = U_ZERO_ERROR;
2288     uint32_t i = 0;
2289     UCollationElements *it = NULL;
2290     uint32_t CE;
2291     UChar string[256];
2292     uint32_t uStringLen;
2293     UCollator *coll = NULL;
2294
2295     uStringLen = u_unescape(rule1, string, 256);
2296
2297     coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2298
2299     /*coll = ucol_open("ja_JP_JIS", &status);*/
2300     it = ucol_openElements(coll, string, 0, &status);
2301
2302     for(i = 0; i < UPRV_LENGTHOF(test); i++) {
2303       log_verbose("%s\n", test[i]);
2304       uStringLen = u_unescape(test[i], string, 256);
2305       ucol_setText(it, string, uStringLen, &status);
2306
2307       while((CE=ucol_next(it, &status)) != UCOL_NULLORDER) {
2308         log_verbose("%08X\n", CE);
2309       }
2310       log_verbose("\n");
2311
2312     }
2313
2314     ucol_closeElements(it);
2315     ucol_close(coll);
2316   }
2317 #endif
2318 }
2319
2320 static void TestPrefixCompose(void) {
2321   const char* rule1 =
2322         "&\\u30a7<<<\\u30ab|\\u30fc=\\u30ac|\\u30fc";
2323   /*
2324   const char* test[] = {
2325       "\\u30c6\\u30fc\\u30bf",
2326       "\\u30c6\\u30a7\\u30bf",
2327   };
2328   */
2329   {
2330     UErrorCode status = U_ZERO_ERROR;
2331     /*uint32_t i = 0;*/
2332     /*UCollationElements *it = NULL;*/
2333 /*    uint32_t CE;*/
2334     UChar string[256];
2335     uint32_t uStringLen;
2336     UCollator *coll = NULL;
2337
2338     uStringLen = u_unescape(rule1, string, 256);
2339
2340     coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2341     ucol_close(coll);
2342   }
2343
2344
2345 }
2346
2347 /*
2348 [last variable] last variable value
2349 [last primary ignorable] largest CE for primary ignorable
2350 [last secondary ignorable] largest CE for secondary ignorable
2351 [last tertiary ignorable] largest CE for tertiary ignorable
2352 [top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8)
2353 */
2354
2355 static void TestRuleOptions(void) {
2356   /* values here are hardcoded and are correct for the current UCA
2357    * when the UCA changes, one might be forced to change these
2358    * values.
2359    */
2360
2361   /*
2362    * These strings contain the last character before [variable top]
2363    * and the first and second characters (by primary weights) after it.
2364    * See FractionalUCA.txt. For example:
2365       [last variable [0C FE, 05, 05]] # U+10A7F OLD SOUTH ARABIAN NUMERIC INDICATOR
2366       [variable top = 0C FE]
2367       [first regular [0D 0A, 05, 05]] # U+0060 GRAVE ACCENT
2368      and
2369       00B4; [0D 0C, 05, 05]
2370    *
2371    * Note: Starting with UCA 6.0, the [variable top] collation element
2372    * is not the weight of any character or string,
2373    * which means that LAST_VARIABLE_CHAR_STRING sorts before [last variable].
2374    */
2375 #define LAST_VARIABLE_CHAR_STRING "\\U00010A7F"
2376 #define FIRST_REGULAR_CHAR_STRING "\\u0060"
2377 #define SECOND_REGULAR_CHAR_STRING "\\u00B4"
2378
2379   /*
2380    * This string has to match the character that has the [last regular] weight
2381    * which changes with each UCA version.
2382    * See the bottom of FractionalUCA.txt which says something like
2383       [last regular [7A FE, 05, 05]] # U+1342E EGYPTIAN HIEROGLYPH AA032
2384    *
2385    * Note: Starting with UCA 6.0, the [last regular] collation element
2386    * is not the weight of any character or string,
2387    * which means that LAST_REGULAR_CHAR_STRING sorts before [last regular].
2388    */
2389 #define LAST_REGULAR_CHAR_STRING "\\U0001342E"
2390
2391   static const struct {
2392     const char *rules;
2393     const char *data[10];
2394     const uint32_t len;
2395   } tests[] = {
2396 #if 0
2397     /* "you cannot go before ...": The parser now sets an error for such nonsensical rules. */
2398     /* - all befores here amount to zero */
2399     { "&[before 3][first tertiary ignorable]<<<a",
2400         { "\\u0000", "a"}, 2
2401     }, /* you cannot go before first tertiary ignorable */
2402
2403     { "&[before 3][last tertiary ignorable]<<<a",
2404         { "\\u0000", "a"}, 2
2405     }, /* you cannot go before last tertiary ignorable */
2406 #endif
2407     /*
2408      * However, there is a real secondary ignorable (artificial addition in FractionalUCA.txt),
2409      * and it *is* possible to "go before" that.
2410      */
2411     { "&[before 3][first secondary ignorable]<<<a",
2412         { "\\u0000", "a"}, 2
2413     },
2414
2415     { "&[before 3][last secondary ignorable]<<<a",
2416         { "\\u0000", "a"}, 2
2417     },
2418
2419     /* 'normal' befores */
2420
2421     /*
2422      * Note: With a "SPACE first primary" boundary CE in FractionalUCA.txt,
2423      * it is not possible to tailor &[first primary ignorable]<a or &[last primary ignorable]<a
2424      * because there is no tailoring space before that boundary.
2425      * Made the tests work by tailoring to a space instead.
2426      */
2427     { "&[before 3][first primary ignorable]<<<c<<<b &' '<a",  /* was &[first primary ignorable]<a */
2428         {  "c", "b", "\\u0332", "a" }, 4
2429     },
2430
2431     /* we don't have a code point that corresponds to
2432      * the last primary ignorable
2433      */
2434     { "&[before 3][last primary ignorable]<<<c<<<b &' '<a",  /* was &[last primary ignorable]<a */
2435         {  "\\u0332", "\\u20e3", "c", "b", "a" }, 5
2436     },
2437
2438     { "&[before 3][first variable]<<<c<<<b &[first variable]<a",
2439         {  "c", "b", "\\u0009", "a", "\\u000a" }, 5
2440     },
2441
2442     { "&[last variable]<a &[before 3][last variable]<<<c<<<b ",
2443         { LAST_VARIABLE_CHAR_STRING, "c", "b", /* [last variable] */ "a", FIRST_REGULAR_CHAR_STRING }, 5
2444     },
2445
2446     { "&[first regular]<a"
2447       "&[before 1][first regular]<b",
2448       { "b", FIRST_REGULAR_CHAR_STRING, "a", SECOND_REGULAR_CHAR_STRING }, 4
2449     },
2450
2451     { "&[before 1][last regular]<b"
2452       "&[last regular]<a",
2453         { LAST_REGULAR_CHAR_STRING, "b", /* [last regular] */ "a", "\\u4e00" }, 4
2454     },
2455
2456     { "&[before 1][first implicit]<b"
2457       "&[first implicit]<a",
2458         { "b", "\\u4e00", "a", "\\u4e01"}, 4
2459     },
2460 #if 0  /* The current builder does not support tailoring to unassigned-implicit CEs (seems unnecessary, adds complexity). */
2461     { "&[before 1][last implicit]<b"
2462       "&[last implicit]<a",
2463         { "b", "\\U0010FFFD", "a" }, 3
2464     },
2465 #endif
2466     { "&[last variable]<z"
2467       "&' '<x"  /* was &[last primary ignorable]<x, see above */
2468       "&[last secondary ignorable]<<y"
2469       "&[last tertiary ignorable]<<<w"
2470       "&[top]<u",
2471       {"\\ufffb",  "w", "y", "\\u20e3", "x", LAST_VARIABLE_CHAR_STRING, "z", "u"}, 7
2472     }
2473
2474   };
2475   uint32_t i;
2476
2477   for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
2478     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2479   }
2480 }
2481
2482
2483 static void TestOptimize(void) {
2484   /* this is not really a test - just trying out
2485    * whether copying of UCA contents will fail
2486    * Cannot really test, since the functionality
2487    * remains the same.
2488    */
2489   static const struct {
2490     const char *rules;
2491     const char *data[10];
2492     const uint32_t len;
2493   } tests[] = {
2494     /* - all befores here amount to zero */
2495     { "[optimize [\\uAC00-\\uD7FF]]",
2496     { "a", "b"}, 2}
2497   };
2498   uint32_t i;
2499
2500   for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
2501     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2502   }
2503 }
2504
2505 /*
2506 cycheng@ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator.
2507 weiv    ucol_strcollIter?
2508 cycheng@ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021
2509 weiv    these are the input strings?
2510 cycheng@ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on, we have s1 > s2
2511 weiv    will check - could be a problem with utf-8 iterator
2512 cycheng@ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2
2513 weiv    hmmm
2514 cycheng@ca.ibm.c... note that we have a standalone high surrogate
2515 weiv    that doesn't sound right
2516 cycheng@ca.ibm.c... we got the same inconsistent results on AIX and Win2000
2517 weiv    so you have two strings, you convert them to utf-8 and to utf-16BE
2518 cycheng@ca.ibm.c... yes
2519 weiv    and then do the comparison
2520 cycheng@ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be
2521 weiv    utf-16 strings look like a little endian ones in the example you sent me
2522 weiv    It could be a bug - let me try to test it out
2523 cycheng@ca.ibm.c... ok
2524 cycheng@ca.ibm.c... we can wait till the conf. call
2525 cycheng@ca.ibm.c... next weke
2526 weiv    that would be great
2527 weiv    hmmm
2528 weiv    I might be wrong
2529 weiv    let me play with it some more
2530 cycheng@ca.ibm.c... ok
2531 cycheng@ca.ibm.c... also please check s3 = 0x0e3a0062  and s4 = 0x0e400021. both are in utf-16be
2532 cycheng@ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that's built for db2
2533 cycheng@ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be
2534 weiv    ok
2535 cycheng@ca.ibm.c... i ask sherman to send you more inconsistent data
2536 weiv    thanks
2537 cycheng@ca.ibm.c... the 4 strings we sent are just samples
2538 */
2539 #if 0
2540 static void Alexis(void) {
2541   UErrorCode status = U_ZERO_ERROR;
2542   UCollator *coll = ucol_open("", &status);
2543
2544
2545   const char utf16be[2][4] = {
2546     { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 },
2547     { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 }
2548   };
2549
2550   const char utf8[2][4] = {
2551     { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 },
2552     { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 },
2553   };
2554
2555   UCharIterator iterU161, iterU162;
2556   UCharIterator iterU81, iterU82;
2557
2558   UCollationResult resU16, resU8;
2559
2560   uiter_setUTF16BE(&iterU161, utf16be[0], 4);
2561   uiter_setUTF16BE(&iterU162, utf16be[1], 4);
2562
2563   uiter_setUTF8(&iterU81, utf8[0], 4);
2564   uiter_setUTF8(&iterU82, utf8[1], 4);
2565
2566   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
2567
2568   resU16 = ucol_strcollIter(coll, &iterU161, &iterU162, &status);
2569   resU8 = ucol_strcollIter(coll, &iterU81, &iterU82, &status);
2570
2571
2572   if(resU16 != resU8) {
2573     log_err("different results\n");
2574   }
2575
2576   ucol_close(coll);
2577 }
2578 #endif
2579
2580 #define CMSCOLL_ALEXIS2_BUFFER_SIZE 256
2581 static void Alexis2(void) {
2582   UErrorCode status = U_ZERO_ERROR;
2583   UChar U16Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
2584   char U16BESource[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16BETarget[CMSCOLL_ALEXIS2_BUFFER_SIZE];
2585   char U8Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U8Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
2586   int32_t U16LenS = 0, U16LenT = 0, U16BELenS = 0, U16BELenT = 0, U8LenS = 0, U8LenT = 0;
2587
2588   UConverter *conv = NULL;
2589
2590   UCharIterator U16BEItS, U16BEItT;
2591   UCharIterator U8ItS, U8ItT;
2592
2593   UCollationResult resU16, resU16BE, resU8;
2594
2595   static const char* const pairs[][2] = {
2596     { "\\ud800\\u0021", "\\uFFFC\\u0062"},
2597     { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" },
2598     { "\\u0E40\\u0021", "\\u00A1\\u0021"},
2599     { "\\u0E40\\u0021", "\\uFE57\\u0062"},
2600     { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"},
2601     { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"},
2602     { "\\u0020", "\\u0020\\u0000"}
2603 /*
2604 5F20 (my result here)
2605 5F204E008E3F
2606 5F20 (your result here)
2607 */
2608   };
2609
2610   int32_t i = 0;
2611
2612   UCollator *coll = ucol_open("", &status);
2613   if(status == U_FILE_ACCESS_ERROR) {
2614     log_data_err("Is your data around?\n");
2615     return;
2616   } else if(U_FAILURE(status)) {
2617     log_err("Error opening collator\n");
2618     return;
2619   }
2620   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
2621   conv = ucnv_open("UTF16BE", &status);
2622   for(i = 0; i < UPRV_LENGTHOF(pairs); i++) {
2623     U16LenS = u_unescape(pairs[i][0], U16Source, CMSCOLL_ALEXIS2_BUFFER_SIZE);
2624     U16LenT = u_unescape(pairs[i][1], U16Target, CMSCOLL_ALEXIS2_BUFFER_SIZE);
2625
2626     resU16 = ucol_strcoll(coll, U16Source, U16LenS, U16Target, U16LenT);
2627
2628     log_verbose("Result of strcoll is %i\n", resU16);
2629
2630     U16BELenS = ucnv_fromUChars(conv, U16BESource, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Source, U16LenS, &status);
2631     U16BELenT = ucnv_fromUChars(conv, U16BETarget, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Target, U16LenT, &status);
2632     (void)U16BELenS;    /* Suppress set but not used warnings. */
2633     (void)U16BELenT;
2634
2635     /* use the original sizes, as the result from converter is in bytes */
2636     uiter_setUTF16BE(&U16BEItS, U16BESource, U16LenS);
2637     uiter_setUTF16BE(&U16BEItT, U16BETarget, U16LenT);
2638
2639     resU16BE = ucol_strcollIter(coll, &U16BEItS, &U16BEItT, &status);
2640
2641     log_verbose("Result of U16BE is %i\n", resU16BE);
2642
2643     if(resU16 != resU16BE) {
2644       log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", pairs[i][0], pairs[i][1]);
2645     }
2646
2647     u_strToUTF8(U8Source, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenS, U16Source, U16LenS, &status);
2648     u_strToUTF8(U8Target, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenT, U16Target, U16LenT, &status);
2649
2650     uiter_setUTF8(&U8ItS, U8Source, U8LenS);
2651     uiter_setUTF8(&U8ItT, U8Target, U8LenT);
2652
2653     resU8 = ucol_strcollIter(coll, &U8ItS, &U8ItT, &status);
2654
2655     if(resU16 != resU8) {
2656       log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pairs[i][0], pairs[i][1]);
2657     }
2658
2659   }
2660
2661   ucol_close(coll);
2662   ucnv_close(conv);
2663 }
2664
2665 static void TestHebrewUCA(void) {
2666   UErrorCode status = U_ZERO_ERROR;
2667   static const char *first[] = {
2668     "d790d6b8d79cd795d6bcd7a9",
2669     "d790d79cd79ed7a7d799d799d7a1",
2670     "d790d6b4d79ed795d6bcd7a9",
2671   };
2672
2673   char utf8String[3][256];
2674   UChar utf16String[3][256];
2675
2676   int32_t i = 0, j = 0;
2677   int32_t sizeUTF8[3];
2678   int32_t sizeUTF16[3];
2679
2680   UCollator *coll = ucol_open("", &status);
2681   if (U_FAILURE(status)) {
2682       log_err_status(status, "Could not open UCA collation %s\n", u_errorName(status));
2683       return;
2684   }
2685   /*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/
2686
2687   for(i = 0; i < UPRV_LENGTHOF(first); i++) {
2688     sizeUTF8[i] = u_parseUTF8(first[i], -1, utf8String[i], 256, &status);
2689     u_strFromUTF8(utf16String[i], 256, &sizeUTF16[i], utf8String[i], sizeUTF8[i], &status);
2690     log_verbose("%i: ");
2691     for(j = 0; j < sizeUTF16[i]; j++) {
2692       /*log_verbose("\\u%04X", utf16String[i][j]);*/
2693       log_verbose("%04X", utf16String[i][j]);
2694     }
2695     log_verbose("\n");
2696   }
2697   for(i = 0; i < UPRV_LENGTHOF(first)-1; i++) {
2698     for(j = i + 1; j < UPRV_LENGTHOF(first); j++) {
2699       doTest(coll, utf16String[i], utf16String[j], UCOL_LESS);
2700     }
2701   }
2702
2703   ucol_close(coll);
2704
2705 }
2706
2707 static void TestPartialSortKeyTermination(void) {
2708   static const char* cases[] = {
2709     "\\u1234\\u1234\\udc00",
2710     "\\udc00\\ud800\\ud800"
2711   };
2712
2713   int32_t i;
2714
2715   UErrorCode status = U_ZERO_ERROR;
2716
2717   UCollator *coll = ucol_open("", &status);
2718
2719   UCharIterator iter;
2720
2721   UChar currCase[256];
2722   int32_t length = 0;
2723   int32_t pKeyLen = 0;
2724
2725   uint8_t key[256];
2726
2727   for(i = 0; i < UPRV_LENGTHOF(cases); i++) {
2728     uint32_t state[2] = {0, 0};
2729     length = u_unescape(cases[i], currCase, 256);
2730     uiter_setString(&iter, currCase, length);
2731     pKeyLen = ucol_nextSortKeyPart(coll, &iter, state, key, 256, &status);
2732     (void)pKeyLen;   /* Suppress set but not used warning. */
2733
2734     log_verbose("Done\n");
2735
2736   }
2737   ucol_close(coll);
2738 }
2739
2740 static void TestSettings(void) {
2741   static const char* cases[] = {
2742     "apple",
2743       "Apple"
2744   };
2745
2746   static const char* locales[] = {
2747     "",
2748       "en"
2749   };
2750
2751   UErrorCode status = U_ZERO_ERROR;
2752
2753   int32_t i = 0, j = 0;
2754
2755   UChar source[256], target[256];
2756   int32_t sLen = 0, tLen = 0;
2757
2758   UCollator *collateObject = NULL;
2759   for(i = 0; i < UPRV_LENGTHOF(locales); i++) {
2760     collateObject = ucol_open(locales[i], &status);
2761     ucol_setStrength(collateObject, UCOL_PRIMARY);
2762     ucol_setAttribute(collateObject, UCOL_CASE_LEVEL , UCOL_OFF, &status);
2763     for(j = 1; j < UPRV_LENGTHOF(cases); j++) {
2764       sLen = u_unescape(cases[j-1], source, 256);
2765       source[sLen] = 0;
2766       tLen = u_unescape(cases[j], target, 256);
2767       source[tLen] = 0;
2768       doTest(collateObject, source, target, UCOL_EQUAL);
2769     }
2770     ucol_close(collateObject);
2771   }
2772 }
2773
2774 static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCollator *target) {
2775     UErrorCode status = U_ZERO_ERROR;
2776     int32_t errorNo = 0;
2777     const UChar *sourceRules = NULL;
2778     int32_t sourceRulesLen = 0;
2779     UParseError parseError;
2780     UColAttributeValue french = UCOL_OFF;
2781
2782     if(!ucol_equals(source, target)) {
2783         log_err("Same collators, different address not equal\n");
2784         errorNo++;
2785     }
2786     ucol_close(target);
2787     if(uprv_strcmp(locName, ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &status)) == 0) {
2788         target = ucol_safeClone(source, NULL, NULL, &status);
2789         if(U_FAILURE(status)) {
2790             log_err("Error creating clone\n");
2791             errorNo++;
2792             return errorNo;
2793         }
2794         if(!ucol_equals(source, target)) {
2795             log_err("Collator different from it's clone\n");
2796             errorNo++;
2797         }
2798         french = ucol_getAttribute(source, UCOL_FRENCH_COLLATION, &status);
2799         if(french == UCOL_ON) {
2800             ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);
2801         } else {
2802             ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
2803         }
2804         if(U_FAILURE(status)) {
2805             log_err("Error setting attributes\n");
2806             errorNo++;
2807             return errorNo;
2808         }
2809         if(ucol_equals(source, target)) {
2810             log_err("Collators same even when options changed\n");
2811             errorNo++;
2812         }
2813         ucol_close(target);
2814
2815         sourceRules = ucol_getRules(source, &sourceRulesLen);
2816         target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
2817         if(U_FAILURE(status)) {
2818             log_err("Error instantiating target from rules - %s\n", u_errorName(status));
2819             errorNo++;
2820             return errorNo;
2821         }
2822         /* Note: The tailoring rule string is an optional data item. */
2823         if(!ucol_equals(source, target) && sourceRulesLen != 0) {
2824             log_err("Collator different from collator that was created from the same rules\n");
2825             errorNo++;
2826         }
2827         ucol_close(target);
2828     }
2829     return errorNo;
2830 }
2831
2832
2833 static void TestEquals(void) {
2834     /* ucol_equals is not currently a public API. There is a chance that it will become
2835     * something like this.
2836     */
2837     /* test whether the two collators instantiated from the same locale are equal */
2838     UErrorCode status = U_ZERO_ERROR;
2839     UParseError parseError;
2840     int32_t noOfLoc = uloc_countAvailable();
2841     const char *locName = NULL;
2842     UCollator *source = NULL, *target = NULL;
2843     int32_t i = 0;
2844
2845     const char* rules[] = {
2846         "&l < lj <<< Lj <<< LJ",
2847         "&n < nj <<< Nj <<< NJ",
2848         "&ae <<< \\u00e4",
2849         "&AE <<< \\u00c4"
2850     };
2851     /*
2852     const char* badRules[] = {
2853     "&l <<< Lj",
2854     "&n < nj <<< nJ <<< NJ",
2855     "&a <<< \\u00e4",
2856     "&AE <<< \\u00c4 <<< x"
2857     };
2858     */
2859
2860     UChar sourceRules[1024], targetRules[1024];
2861     int32_t sourceRulesSize = 0, targetRulesSize = 0;
2862     int32_t rulesSize = UPRV_LENGTHOF(rules);
2863
2864     for(i = 0; i < rulesSize; i++) {
2865         sourceRulesSize += u_unescape(rules[i], sourceRules+sourceRulesSize, 1024 - sourceRulesSize);
2866         targetRulesSize += u_unescape(rules[rulesSize-i-1], targetRules+targetRulesSize, 1024 - targetRulesSize);
2867     }
2868
2869     source = ucol_openRules(sourceRules, sourceRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
2870     if(status == U_FILE_ACCESS_ERROR) {
2871         log_data_err("Is your data around?\n");
2872         return;
2873     } else if(U_FAILURE(status)) {
2874         log_err("Error opening collator\n");
2875         return;
2876     }
2877     target = ucol_openRules(targetRules, targetRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
2878     if(!ucol_equals(source, target)) {
2879         log_err("Equivalent collators not equal!\n");
2880     }
2881     ucol_close(source);
2882     ucol_close(target);
2883
2884     source = ucol_open("root", &status);
2885     target = ucol_open("root", &status);
2886     log_verbose("Testing root\n");
2887     if(!ucol_equals(source, source)) {
2888         log_err("Same collator not equal\n");
2889     }
2890     if(TestEqualsForCollator("root", source, target)) {
2891         log_err("Errors for root\n");
2892     }
2893     ucol_close(source);
2894
2895     for(i = 0; i<noOfLoc; i++) {
2896         status = U_ZERO_ERROR;
2897         locName = uloc_getAvailable(i);
2898         /*if(hasCollationElements(locName)) {*/
2899         log_verbose("Testing equality for locale %s\n", locName);
2900         source = ucol_open(locName, &status);
2901         target = ucol_open(locName, &status);
2902         if (U_FAILURE(status)) {
2903             log_err("Error opening collator for locale %s  %s\n", locName, u_errorName(status));
2904             continue;
2905         }
2906         if(TestEqualsForCollator(locName, source, target)) {
2907             log_err("Errors for locale %s\n", locName);
2908         }
2909         ucol_close(source);
2910         /*}*/
2911     }
2912 }
2913
2914 static void TestJ2726(void) {
2915     UChar a[2] = { 0x61, 0x00 }; /*"a"*/
2916     UChar aSpace[3] = { 0x61, 0x20, 0x00 }; /*"a "*/
2917     UChar spaceA[3] = { 0x20, 0x61, 0x00 }; /*" a"*/
2918     UErrorCode status = U_ZERO_ERROR;
2919     UCollator *coll = ucol_open("en", &status);
2920     ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
2921     ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
2922     doTest(coll, a, aSpace, UCOL_EQUAL);
2923     doTest(coll, aSpace, a, UCOL_EQUAL);
2924     doTest(coll, a, spaceA, UCOL_EQUAL);
2925     doTest(coll, spaceA, a, UCOL_EQUAL);
2926     doTest(coll, spaceA, aSpace, UCOL_EQUAL);
2927     doTest(coll, aSpace, spaceA, UCOL_EQUAL);
2928     ucol_close(coll);
2929 }
2930
2931 static void NullRule(void) {
2932     UChar r[3] = {0};
2933     UErrorCode status = U_ZERO_ERROR;
2934     UCollator *coll = ucol_openRules(r, 1, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2935     if(U_SUCCESS(status)) {
2936         log_err("This should have been an error!\n");
2937         ucol_close(coll);
2938     } else {
2939         status = U_ZERO_ERROR;
2940     }
2941     coll = ucol_openRules(r, 0, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2942     if(U_FAILURE(status)) {
2943         log_err_status(status, "Empty rules should have produced a valid collator -> %s\n", u_errorName(status));
2944     } else {
2945         ucol_close(coll);
2946     }
2947 }
2948
2949 /**
2950  * Test for CollationElementIterator previous and next for the whole set of
2951  * unicode characters with normalization on.
2952  */
2953 static void TestNumericCollation(void)
2954 {
2955     UErrorCode status = U_ZERO_ERROR;
2956
2957     const static char *basicTestStrings[]={
2958     "hello1",
2959     "hello2",
2960     "hello2002",
2961     "hello2003",
2962     "hello123456",
2963     "hello1234567",
2964     "hello10000000",
2965     "hello100000000",
2966     "hello1000000000",
2967     "hello10000000000",
2968     };
2969
2970     const static char *preZeroTestStrings[]={
2971     "avery10000",
2972     "avery010000",
2973     "avery0010000",
2974     "avery00010000",
2975     "avery000010000",
2976     "avery0000010000",
2977     "avery00000010000",
2978     "avery000000010000",
2979     };
2980
2981     const static char *thirtyTwoBitNumericStrings[]={
2982     "avery42949672960",
2983     "avery42949672961",
2984     "avery42949672962",
2985     "avery429496729610"
2986     };
2987
2988      const static char *longNumericStrings[]={
2989      /* Some of these sort out of the order that would expected if digits-as-numbers handled arbitrarily-long digit strings.
2990         In fact, a single collation element can represent a maximum of 254 digits as a number. Digit strings longer than that
2991         are treated as multiple collation elements. */
2992     "num9234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123z", /*253digits, num + 9.23E252 + z */
2993     "num10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*254digits, num + 1.00E253 */
2994     "num100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*255digits, num + 1.00E253 + 0, out of numeric order but expected */
2995     "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 1.23E253 */
2996     "num123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345", /*255digits, num + 1.23E253 + 5 */
2997     "num1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456", /*256digits, num + 1.23E253 + 56 */
2998     "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567", /*257digits, num + 1.23E253 + 567 */
2999     "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 1.23E253 + a, out of numeric order but expected */
3000     "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 9.23E253, out of numeric order but expected */
3001     "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 9.23E253 + a, out of numeric order but expected */
3002     };
3003
3004     const static char *supplementaryDigits[] = {
3005       "\\uD835\\uDFCE", /* 0 */
3006       "\\uD835\\uDFCF", /* 1 */
3007       "\\uD835\\uDFD0", /* 2 */
3008       "\\uD835\\uDFD1", /* 3 */
3009       "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */
3010       "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */
3011       "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */
3012       "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */
3013       "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */
3014       "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */
3015     };
3016
3017     const static char *foreignDigits[] = {
3018       "\\u0661",
3019         "\\u0662",
3020         "\\u0663",
3021       "\\u0661\\u0660",
3022       "\\u0661\\u0662",
3023       "\\u0661\\u0663",
3024       "\\u0662\\u0660",
3025       "\\u0662\\u0662",
3026       "\\u0662\\u0663",
3027       "\\u0663\\u0660",
3028       "\\u0663\\u0662",
3029       "\\u0663\\u0663"
3030     };
3031
3032     const static char *evenZeroes[] = {
3033       "2000",
3034       "2001",
3035         "2002",
3036         "2003"
3037     };
3038
3039     UColAttribute att = UCOL_NUMERIC_COLLATION;
3040     UColAttributeValue val = UCOL_ON;
3041
3042     /* Open our collator. */
3043     UCollator* coll = ucol_open("root", &status);
3044     if (U_FAILURE(status)){
3045         log_err_status(status, "ERROR: in using ucol_open() -> %s\n",
3046               myErrorName(status));
3047         return;
3048     }
3049     genericLocaleStarterWithOptions("root", basicTestStrings, UPRV_LENGTHOF(basicTestStrings), &att, &val, 1);
3050     genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings, UPRV_LENGTHOF(thirtyTwoBitNumericStrings), &att, &val, 1);
3051     genericLocaleStarterWithOptions("root", longNumericStrings, UPRV_LENGTHOF(longNumericStrings), &att, &val, 1);
3052     genericLocaleStarterWithOptions("en_US", foreignDigits, UPRV_LENGTHOF(foreignDigits), &att, &val, 1);
3053     genericLocaleStarterWithOptions("root", supplementaryDigits, UPRV_LENGTHOF(supplementaryDigits), &att, &val, 1);
3054     genericLocaleStarterWithOptions("root", evenZeroes, UPRV_LENGTHOF(evenZeroes), &att, &val, 1);
3055
3056     /* Setting up our collator to do digits. */
3057     ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
3058     if (U_FAILURE(status)){
3059         log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n",
3060               myErrorName(status));
3061         return;
3062     }
3063
3064     /*
3065        Testing that prepended zeroes still yield the correct collation behavior.
3066        We expect that every element in our strings array will be equal.
3067     */
3068     genericOrderingTestWithResult(coll, preZeroTestStrings, UPRV_LENGTHOF(preZeroTestStrings), UCOL_EQUAL);
3069
3070     ucol_close(coll);
3071 }
3072
3073 static void TestTibetanConformance(void)
3074 {
3075     const char* test[] = {
3076         "\\u0FB2\\u0591\\u0F71\\u0061",
3077         "\\u0FB2\\u0F71\\u0061"
3078     };
3079
3080     UErrorCode status = U_ZERO_ERROR;
3081     UCollator *coll = ucol_open("", &status);
3082     UChar source[100];
3083     UChar target[100];
3084     int result;
3085     ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3086     if (U_SUCCESS(status)) {
3087         u_unescape(test[0], source, 100);
3088         u_unescape(test[1], target, 100);
3089         doTest(coll, source, target, UCOL_EQUAL);
3090         result = ucol_strcoll(coll, source, -1,   target, -1);
3091         log_verbose("result %d\n", result);
3092         if (UCOL_EQUAL != result) {
3093             log_err("Tibetan comparison error\n");
3094         }
3095     }
3096     ucol_close(coll);
3097
3098     genericLocaleStarterWithResult("", test, 2, UCOL_EQUAL);
3099 }
3100
3101 static void TestPinyinProblem(void) {
3102     static const char *test[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B50" };
3103     genericLocaleStarter("zh__PINYIN", test, UPRV_LENGTHOF(test));
3104 }
3105
3106 /**
3107  * Iterate through the given iterator, checking to see that all the strings
3108  * in the expected array are present.
3109  * @param expected array of strings we expect to see, or NULL
3110  * @param expectedCount number of elements of expected, or 0
3111  */
3112 static int32_t checkUEnumeration(const char* msg,
3113                                  UEnumeration* iter,
3114                                  const char** expected,
3115                                  int32_t expectedCount) {
3116     UErrorCode ec = U_ZERO_ERROR;
3117     int32_t i = 0, n, j, bit;
3118     int32_t seenMask = 0;
3119
3120     U_ASSERT(expectedCount >= 0 && expectedCount < 31); /* [sic] 31 not 32 */
3121     n = uenum_count(iter, &ec);
3122     if (!assertSuccess("count", &ec)) return -1;
3123     log_verbose("%s = [", msg);
3124     for (;; ++i) {
3125         const char* s = uenum_next(iter, NULL, &ec);
3126         if (!assertSuccess("snext", &ec) || s == NULL) break;
3127         if (i != 0) log_verbose(",");
3128         log_verbose("%s", s);
3129         /* check expected list */
3130         for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
3131             if ((seenMask&bit) == 0 &&
3132                 uprv_strcmp(s, expected[j]) == 0) {
3133                 seenMask |= bit;
3134                 break;
3135             }
3136         }
3137     }
3138     log_verbose("] (%d)\n", i);
3139     assertTrue("count verified", i==n);
3140     /* did we see all expected strings? */
3141     for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
3142         if ((seenMask&bit)!=0) {
3143             log_verbose("Ok: \"%s\" seen\n", expected[j]);
3144         } else {
3145             log_err("FAIL: \"%s\" not seen\n", expected[j]);
3146         }
3147     }
3148     return n;
3149 }
3150
3151 /**
3152  * Test new API added for separate collation tree.
3153  */
3154 static void TestSeparateTrees(void) {
3155     UErrorCode ec = U_ZERO_ERROR;
3156     UEnumeration *e = NULL;
3157     int32_t n = -1;
3158     UBool isAvailable;
3159     char loc[256];
3160
3161     static const char* AVAIL[] = { "en", "de" };
3162
3163     static const char* KW[] = { "collation" };
3164
3165     static const char* KWVAL[] = { "phonebook", "stroke" };
3166
3167 #if !UCONFIG_NO_SERVICE
3168     e = ucol_openAvailableLocales(&ec);
3169     if (e != NULL) {
3170         assertSuccess("ucol_openAvailableLocales", &ec);
3171         assertTrue("ucol_openAvailableLocales!=0", e!=0);
3172         n = checkUEnumeration("ucol_openAvailableLocales", e, AVAIL, UPRV_LENGTHOF(AVAIL));
3173         (void)n;    /* Suppress set but not used warnings. */
3174         /* Don't need to check n because we check list */
3175         uenum_close(e);
3176     } else {
3177         log_data_err("Error calling ucol_openAvailableLocales() -> %s (Are you missing data?)\n", u_errorName(ec));
3178     }
3179 #endif
3180
3181     e = ucol_getKeywords(&ec);
3182     if (e != NULL) {
3183         assertSuccess("ucol_getKeywords", &ec);
3184         assertTrue("ucol_getKeywords!=0", e!=0);
3185         n = checkUEnumeration("ucol_getKeywords", e, KW, UPRV_LENGTHOF(KW));
3186         /* Don't need to check n because we check list */
3187         uenum_close(e);
3188     } else {
3189         log_data_err("Error calling ucol_getKeywords() -> %s (Are you missing data?)\n", u_errorName(ec));
3190     }
3191
3192     e = ucol_getKeywordValues(KW[0], &ec);
3193     if (e != NULL) {
3194         assertSuccess("ucol_getKeywordValues", &ec);
3195         assertTrue("ucol_getKeywordValues!=0", e!=0);
3196         n = checkUEnumeration("ucol_getKeywordValues", e, KWVAL, UPRV_LENGTHOF(KWVAL));
3197         /* Don't need to check n because we check list */
3198         uenum_close(e);
3199     } else {
3200         log_data_err("Error calling ucol_getKeywordValues() -> %s (Are you missing data?)\n", u_errorName(ec));
3201     }
3202
3203     /* Try setting a warning before calling ucol_getKeywordValues */
3204     ec = U_USING_FALLBACK_WARNING;
3205     e = ucol_getKeywordValues(KW[0], &ec);
3206     if (assertSuccess("ucol_getKeywordValues [with warning code set]", &ec)) {
3207         assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e!=0);
3208         n = checkUEnumeration("ucol_getKeywordValues [with warning code set]", e, KWVAL, UPRV_LENGTHOF(KWVAL));
3209         /* Don't need to check n because we check list */
3210         uenum_close(e);
3211     }
3212
3213     /*
3214 U_DRAFT int32_t U_EXPORT2
3215 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
3216                              const char* locale, UBool* isAvailable,
3217                              UErrorCode* status);
3218 }
3219 */
3220     n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de",
3221                                      &isAvailable, &ec);
3222     if (assertSuccess("getFunctionalEquivalent", &ec)) {
3223         assertEquals("getFunctionalEquivalent(de)", "root", loc);
3224         assertTrue("getFunctionalEquivalent(de).isAvailable==TRUE",
3225                    isAvailable == TRUE);
3226     }
3227
3228     n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de_DE",
3229                                      &isAvailable, &ec);
3230     if (assertSuccess("getFunctionalEquivalent", &ec)) {
3231         assertEquals("getFunctionalEquivalent(de_DE)", "root", loc);
3232         assertTrue("getFunctionalEquivalent(de_DE).isAvailable==FALSE",
3233                    isAvailable == FALSE);
3234     }
3235 }
3236
3237 /* supercedes TestJ784 */
3238 static void TestBeforePinyin(void) {
3239     const static char rules[] = {
3240         "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<\\u00E0<<<\\u00C0"
3241         "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<\\u00E8<<<\\u00C8"
3242         "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<\\u00EC<<<\\u00CC"
3243         "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<\\u00F2<<<\\u00D2"
3244         "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<<\\u00F9<<<\\u00D9"
3245         "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<<\\u01DB<<\\u00FC"
3246     };
3247
3248     const static char *test[] = {
3249         "l\\u0101",
3250         "la",
3251         "l\\u0101n",
3252         "lan ",
3253         "l\\u0113",
3254         "le",
3255         "l\\u0113n",
3256         "len"
3257     };
3258
3259     const static char *test2[] = {
3260         "x\\u0101",
3261         "x\\u0100",
3262         "X\\u0101",
3263         "X\\u0100",
3264         "x\\u00E1",
3265         "x\\u00C1",
3266         "X\\u00E1",
3267         "X\\u00C1",
3268         "x\\u01CE",
3269         "x\\u01CD",
3270         "X\\u01CE",
3271         "X\\u01CD",
3272         "x\\u00E0",
3273         "x\\u00C0",
3274         "X\\u00E0",
3275         "X\\u00C0",
3276         "xa",
3277         "xA",
3278         "Xa",
3279         "XA",
3280         "x\\u0101x",
3281         "x\\u0100x",
3282         "x\\u00E1x",
3283         "x\\u00C1x",
3284         "x\\u01CEx",
3285         "x\\u01CDx",
3286         "x\\u00E0x",
3287         "x\\u00C0x",
3288         "xax",
3289         "xAx"
3290     };
3291
3292     genericRulesStarter(rules, test, UPRV_LENGTHOF(test));
3293     genericLocaleStarter("zh", test, UPRV_LENGTHOF(test));
3294     genericRulesStarter(rules, test2, UPRV_LENGTHOF(test2));
3295     genericLocaleStarter("zh", test2, UPRV_LENGTHOF(test2));
3296 }
3297
3298 static void TestBeforeTightening(void) {
3299     static const struct {
3300         const char *rules;
3301         UErrorCode expectedStatus;
3302     } tests[] = {
3303         { "&[before 1]a<x", U_ZERO_ERROR },
3304         { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR },
3305         { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR },
3306         { "&[before 1]a=x", U_INVALID_FORMAT_ERROR },
3307         { "&[before 2]a<x",U_INVALID_FORMAT_ERROR },
3308         { "&[before 2]a<<x",U_ZERO_ERROR },
3309         { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR },
3310         { "&[before 2]a=x",U_INVALID_FORMAT_ERROR },
3311         { "&[before 3]a<x",U_INVALID_FORMAT_ERROR  },
3312         { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR  },
3313         { "&[before 3]a<<<x",U_ZERO_ERROR },
3314         { "&[before 3]a=x",U_INVALID_FORMAT_ERROR  },
3315         { "&[before I]a = x",U_INVALID_FORMAT_ERROR }
3316     };
3317
3318     int32_t i = 0;
3319
3320     UErrorCode status = U_ZERO_ERROR;
3321     UChar rlz[RULE_BUFFER_LEN] = { 0 };
3322     uint32_t rlen = 0;
3323
3324     UCollator *coll = NULL;
3325
3326
3327     for(i = 0; i < UPRV_LENGTHOF(tests); i++) {
3328         rlen = u_unescape(tests[i].rules, rlz, RULE_BUFFER_LEN);
3329         coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
3330         if(status != tests[i].expectedStatus) {
3331             log_err_status(status, "Opening a collator with rules %s returned error code %s, expected %s\n",
3332                 tests[i].rules, u_errorName(status), u_errorName(tests[i].expectedStatus));
3333         }
3334         ucol_close(coll);
3335         status = U_ZERO_ERROR;
3336     }
3337
3338 }
3339
3340 /*
3341 &m < a
3342 &[before 1] a < x <<< X << q <<< Q < z
3343 assert: m <<< M < x <<< X << q <<< Q < z < a < n
3344
3345 &m < a
3346 &[before 2] a << x <<< X << q <<< Q < z
3347 assert: m <<< M < x <<< X << q <<< Q << a < z < n
3348
3349 &m < a
3350 &[before 3] a <<< x <<< X << q <<< Q < z
3351 assert: m <<< M < x <<< X <<< a << q <<< Q < z < n
3352
3353
3354 &m << a
3355 &[before 1] a < x <<< X << q <<< Q < z
3356 assert: x <<< X << q <<< Q < z < m <<< M << a < n
3357
3358 &m << a
3359 &[before 2] a << x <<< X << q <<< Q < z
3360 assert: m <<< M << x <<< X << q <<< Q << a < z < n
3361
3362 &m << a
3363 &[before 3] a <<< x <<< X << q <<< Q < z
3364 assert: m <<< M << x <<< X <<< a << q <<< Q < z < n
3365
3366
3367 &m <<< a
3368 &[before 1] a < x <<< X << q <<< Q < z
3369 assert: x <<< X << q <<< Q < z < n < m <<< a <<< M
3370
3371 &m <<< a
3372 &[before 2] a << x <<< X << q <<< Q < z
3373 assert:  x <<< X << q <<< Q << m <<< a <<< M < z < n
3374
3375 &m <<< a
3376 &[before 3] a <<< x <<< X << q <<< Q < z
3377 assert: m <<< x <<< X <<< a <<< M  << q <<< Q < z < n
3378
3379
3380 &[before 1] s < x <<< X << q <<< Q < z
3381 assert: r <<< R < x <<< X << q <<< Q < z < s < n
3382
3383 &[before 2] s << x <<< X << q <<< Q < z
3384 assert: r <<< R < x <<< X << q <<< Q << s < z < n
3385
3386 &[before 3] s <<< x <<< X << q <<< Q < z
3387 assert: r <<< R < x <<< X <<< s << q <<< Q < z < n
3388
3389
3390 &[before 1] \u24DC < x <<< X << q <<< Q < z
3391 assert: x <<< X << q <<< Q < z < n < m <<< \u24DC <<< M
3392
3393 &[before 2] \u24DC << x <<< X << q <<< Q < z
3394 assert:  x <<< X << q <<< Q << m <<< \u24DC <<< M < z < n
3395
3396 &[before 3] \u24DC <<< x <<< X << q <<< Q < z
3397 assert: m <<< x <<< X <<< \u24DC <<< M  << q <<< Q < z < n
3398 */
3399
3400
3401 #if 0
3402 /* requires features not yet supported */
3403 static void TestMoreBefore(void) {
3404     static const struct {
3405         const char* rules;
3406         const char* order[16];
3407         int32_t size;
3408     } tests[] = {
3409         { "&m < a &[before 1] a < x <<< X << q <<< Q < z",
3410         { "m","M","x","X","q","Q","z","a","n" }, 9},
3411         { "&m < a &[before 2] a << x <<< X << q <<< Q < z",
3412         { "m","M","x","X","q","Q","a","z","n" }, 9},
3413         { "&m < a &[before 3] a <<< x <<< X << q <<< Q < z",
3414         { "m","M","x","X","a","q","Q","z","n" }, 9},
3415         { "&m << a &[before 1] a < x <<< X << q <<< Q < z",
3416         { "x","X","q","Q","z","m","M","a","n" }, 9},
3417         { "&m << a &[before 2] a << x <<< X << q <<< Q < z",
3418         { "m","M","x","X","q","Q","a","z","n" }, 9},
3419         { "&m << a &[before 3] a <<< x <<< X << q <<< Q < z",
3420         { "m","M","x","X","a","q","Q","z","n" }, 9},
3421         { "&m <<< a &[before 1] a < x <<< X << q <<< Q < z",
3422         { "x","X","q","Q","z","n","m","a","M" }, 9},
3423         { "&m <<< a &[before 2] a << x <<< X << q <<< Q < z",
3424         { "x","X","q","Q","m","a","M","z","n" }, 9},
3425         { "&m <<< a &[before 3] a <<< x <<< X << q <<< Q < z",
3426         { "m","x","X","a","M","q","Q","z","n" }, 9},
3427         { "&[before 1] s < x <<< X << q <<< Q < z",
3428         { "r","R","x","X","q","Q","z","s","n" }, 9},
3429         { "&[before 2] s << x <<< X << q <<< Q < z",
3430         { "r","R","x","X","q","Q","s","z","n" }, 9},
3431         { "&[before 3] s <<< x <<< X << q <<< Q < z",
3432         { "r","R","x","X","s","q","Q","z","n" }, 9},
3433         { "&[before 1] \\u24DC < x <<< X << q <<< Q < z",
3434         { "x","X","q","Q","z","n","m","\\u24DC","M" }, 9},
3435         { "&[before 2] \\u24DC << x <<< X << q <<< Q < z",
3436         { "x","X","q","Q","m","\\u24DC","M","z","n" }, 9},
3437         { "&[before 3] \\u24DC <<< x <<< X << q <<< Q < z",
3438         { "m","x","X","\\u24DC","M","q","Q","z","n" }, 9}
3439     };
3440
3441     int32_t i = 0;
3442
3443     for(i = 0; i < UPRV_LENGTHOF(tests); i++) {
3444         genericRulesStarter(tests[i].rules, tests[i].order, tests[i].size);
3445     }
3446 }
3447 #endif
3448
3449 static void TestTailorNULL( void ) {
3450     const static char* rule = "&a <<< '\\u0000'";
3451     UErrorCode status = U_ZERO_ERROR;
3452     UChar rlz[RULE_BUFFER_LEN] = { 0 };
3453     uint32_t rlen = 0;
3454     UChar a = 1, null = 0;
3455     UCollationResult res = UCOL_EQUAL;
3456
3457     UCollator *coll = NULL;
3458
3459
3460     rlen = u_unescape(rule, rlz, RULE_BUFFER_LEN);
3461     coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
3462
3463     if(U_FAILURE(status)) {
3464         log_err_status(status, "Could not open default collator! -> %s\n", u_errorName(status));
3465     } else {
3466         res = ucol_strcoll(coll, &a, 1, &null, 1);
3467
3468         if(res != UCOL_LESS) {
3469             log_err("NULL was not tailored properly!\n");
3470         }
3471     }
3472
3473     ucol_close(coll);
3474 }
3475
3476 static void
3477 TestUpperFirstQuaternary(void)
3478 {
3479   const char* tests[] = { "B", "b", "Bb", "bB" };
3480   UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_FIRST };
3481   UColAttributeValue attVals[] = { UCOL_QUATERNARY, UCOL_UPPER_FIRST };
3482   genericLocaleStarterWithOptions("root", tests, UPRV_LENGTHOF(tests), att, attVals, UPRV_LENGTHOF(att));
3483 }
3484
3485 static void
3486 TestJ4960(void)
3487 {
3488   const char* tests[] = { "\\u00e2T", "aT" };
3489   UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_LEVEL };
3490   UColAttributeValue attVals[] = { UCOL_PRIMARY, UCOL_ON };
3491   const char* tests2[] = { "a", "A" };
3492   const char* rule = "&[first tertiary ignorable]=A=a";
3493   UColAttribute att2[] = { UCOL_CASE_LEVEL };
3494   UColAttributeValue attVals2[] = { UCOL_ON };
3495   /* Test whether we correctly ignore primary ignorables on case level when */
3496   /* we have only primary & case level */
3497   genericLocaleStarterWithOptionsAndResult("root", tests, UPRV_LENGTHOF(tests), att, attVals, UPRV_LENGTHOF(att), UCOL_EQUAL);
3498   /* Test whether ICU4J will make case level for sortkeys that have primary strength */
3499   /* and case level */
3500   genericLocaleStarterWithOptions("root", tests2, UPRV_LENGTHOF(tests2), att, attVals, UPRV_LENGTHOF(att));
3501   /* Test whether completely ignorable letters have case level info (they shouldn't) */
3502   genericRulesStarterWithOptionsAndResult(rule, tests2, UPRV_LENGTHOF(tests2), att2, attVals2, UPRV_LENGTHOF(att2), UCOL_EQUAL);
3503 }
3504
3505 static void
3506 TestJ5223(void)
3507 {
3508   static const char *test = "this is a test string";
3509   UChar ustr[256];
3510   int32_t ustr_length = u_unescape(test, ustr, 256);
3511   unsigned char sortkey[256];
3512   int32_t sortkey_length;
3513   UErrorCode status = U_ZERO_ERROR;
3514   static UCollator *coll = NULL;
3515   coll = ucol_open("root", &status);
3516   if(U_FAILURE(status)) {
3517     log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
3518     return;
3519   }
3520   ucol_setStrength(coll, UCOL_PRIMARY);
3521   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
3522   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3523   if (U_FAILURE(status)) {
3524     log_err("Failed setting atributes\n");
3525     return;
3526   }
3527   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, NULL, 0);
3528   if (sortkey_length > 256) return;
3529
3530   /* we mark the position where the null byte should be written in advance */
3531   sortkey[sortkey_length-1] = 0xAA;
3532
3533   /* we set the buffer size one byte higher than needed */
3534   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
3535     sortkey_length+1);
3536
3537   /* no error occurs (for me) */
3538   if (sortkey[sortkey_length-1] == 0xAA) {
3539     log_err("Hit bug at first try\n");
3540   }
3541
3542   /* we mark the position where the null byte should be written again */
3543   sortkey[sortkey_length-1] = 0xAA;
3544
3545   /* this time we set the buffer size to the exact amount needed */
3546   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
3547     sortkey_length);
3548
3549   /* now the trailing null byte is not written */
3550   if (sortkey[sortkey_length-1] == 0xAA) {
3551     log_err("Hit bug at second try\n");
3552   }
3553
3554   ucol_close(coll);
3555 }
3556
3557 /* Regression test for Thai partial sort key problem */
3558 static void
3559 TestJ5232(void)
3560 {
3561     const static char *test[] = {
3562         "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e47\\u0e21",
3563         "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e48\\u0e21"
3564     };
3565
3566     genericLocaleStarter("th", test, UPRV_LENGTHOF(test));
3567 }
3568
3569 static void
3570 TestJ5367(void)
3571 {
3572     const static char *test[] = { "a", "y" };
3573     const char* rules = "&Ny << Y &[first secondary ignorable] <<< a";
3574     genericRulesStarter(rules, test, UPRV_LENGTHOF(test));
3575 }
3576
3577 static void
3578 TestVI5913(void)
3579 {
3580     UErrorCode status = U_ZERO_ERROR;
3581     int32_t i, j;
3582     UCollator *coll =NULL;
3583     uint8_t  resColl[100], expColl[100];
3584     int32_t  rLen, tLen, ruleLen, sLen, kLen;
3585     UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0};  /* &b<0x1FF3-omega with Ypogegrammeni*/
3586     UChar rule2[256]={0x26, 0x7a, 0x3c, 0x0161, 0};  /* &z<s with caron*/
3587     /*
3588      * Note: Just tailoring &z<ae^ does not work as expected:
3589      * The UCA spec requires for discontiguous contractions that they
3590      * extend an *existing match* by one combining mark at a time.
3591      * Therefore, ae must be a contraction so that the builder finds
3592      * discontiguous contractions for ae^, for example with an intervening underdot.
3593      * Only then do we get the expected tail closure with a\u1EC7, a\u1EB9\u0302, etc.
3594      */
3595     UChar rule3[256]={
3596         0x26, 0x78, 0x3c, 0x61, 0x65,      /* &x<ae */
3597         0x26, 0x7a, 0x3c, 0x0061, 0x00ea,  /* &z<a+e with circumflex.*/
3598         0};
3599     static const UChar tData[][20]={
3600         {0x1EAC, 0},
3601         {0x0041, 0x0323, 0x0302, 0},
3602         {0x1EA0, 0x0302, 0},
3603         {0x00C2, 0x0323, 0},
3604         {0x1ED8, 0},  /* O with dot and circumflex */
3605         {0x1ECC, 0x0302, 0},
3606         {0x1EB7, 0},
3607         {0x1EA1, 0x0306, 0},
3608     };
3609     static const UChar tailorData[][20]={
3610         {0x1FA2, 0},  /* Omega with 3 combining marks */
3611         {0x03C9, 0x0313, 0x0300, 0x0345, 0},
3612         {0x1FF3, 0x0313, 0x0300, 0},
3613         {0x1F60, 0x0300, 0x0345, 0},
3614         {0x1F62, 0x0345, 0},
3615         {0x1FA0, 0x0300, 0},
3616     };
3617     static const UChar tailorData2[][20]={
3618         {0x1E63, 0x030C, 0},  /* s with dot below + caron */
3619         {0x0073, 0x0323, 0x030C, 0},
3620         {0x0073, 0x030C, 0x0323, 0},
3621     };
3622     static const UChar tailorData3[][20]={
3623         {0x007a, 0},  /*  z */
3624         {0x0061, 0x0065, 0},  /*  a + e */
3625         {0x0061, 0x00ea, 0}, /* a + e with circumflex */
3626         {0x0061, 0x1EC7, 0},  /* a+ e with dot below and circumflex */
3627         {0x0061, 0x1EB9, 0x0302, 0}, /* a + e with dot below + combining circumflex */
3628         {0x0061, 0x00EA, 0x0323, 0},  /* a + e with circumflex + combining dot below */
3629         {0x00EA, 0x0323, 0},  /* e with circumflex + combining dot below */
3630         {0x00EA, 0},  /* e with circumflex  */
3631     };
3632
3633     /* Test Vietnamese sort. */
3634     coll = ucol_open("vi", &status);
3635     if(U_FAILURE(status)) {
3636         log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
3637         return;
3638     }
3639     log_verbose("\n\nVI collation:");
3640     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[2], u_strlen(tData[2])) ) {
3641         log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
3642     }
3643     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[3], u_strlen(tData[3])) ) {
3644         log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
3645     }
3646     if ( !ucol_equal(coll, tData[5], u_strlen(tData[5]), tData[4], u_strlen(tData[4])) ) {
3647         log_err("\\u1ED8 not equals to \\u1ECC+\\u0302\n");
3648     }
3649     if ( !ucol_equal(coll, tData[7], u_strlen(tData[7]), tData[6], u_strlen(tData[6])) ) {
3650         log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
3651     }
3652
3653     for (j=0; j<8; j++) {
3654         tLen = u_strlen(tData[j]);
3655         log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);
3656         rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
3657         for(i = 0; i<rLen; i++) {
3658             log_verbose(" %02X", resColl[i]);
3659         }
3660     }
3661
3662     ucol_close(coll);
3663
3664     /* Test Romanian sort. */
3665     coll = ucol_open("ro", &status);
3666     log_verbose("\n\nRO collation:");
3667     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[1], u_strlen(tData[1])) ) {
3668         log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
3669     }
3670     if ( !ucol_equal(coll, tData[4], u_strlen(tData[4]), tData[5], u_strlen(tData[5])) ) {
3671         log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
3672     }
3673     if ( !ucol_equal(coll, tData[6], u_strlen(tData[6]), tData[7], u_strlen(tData[7])) ) {
3674         log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
3675     }
3676
3677     for (j=4; j<8; j++) {
3678         tLen = u_strlen(tData[j]);
3679         log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);
3680         rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
3681         for(i = 0; i<rLen; i++) {
3682             log_verbose(" %02X", resColl[i]);
3683         }
3684     }
3685     ucol_close(coll);
3686
3687     /* Test the precomposed Greek character with 3 combining marks. */
3688     log_verbose("\n\nTailoring test: Greek character with 3 combining marks");
3689     ruleLen = u_strlen(rule);
3690     coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3691     if (U_FAILURE(status)) {
3692         log_err("ucol_openRules failed with %s\n", u_errorName(status));
3693         return;
3694     }
3695     sLen = u_strlen(tailorData[0]);
3696     for (j=1; j<6; j++) {
3697         tLen = u_strlen(tailorData[j]);
3698         if ( !ucol_equal(coll, tailorData[0], sLen, tailorData[j], tLen))  {
3699             log_err("\n \\u1FA2 not equals to data[%d]:%s\n", j, tailorData[j]);
3700         }
3701     }
3702     /* Test getSortKey. */
3703     tLen = u_strlen(tailorData[0]);
3704     kLen=ucol_getSortKey(coll, tailorData[0], tLen, expColl, 100);
3705     for (j=0; j<6; j++) {
3706         tLen = u_strlen(tailorData[j]);
3707         rLen = ucol_getSortKey(coll, tailorData[j], tLen, resColl, 100);
3708         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
3709             log_err("\n Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
3710             for(i = 0; i<rLen; i++) {
3711                 log_err(" %02X", resColl[i]);
3712             }
3713         }
3714     }
3715     ucol_close(coll);
3716
3717     log_verbose("\n\nTailoring test for s with caron:");
3718     ruleLen = u_strlen(rule2);
3719     coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3720     tLen = u_strlen(tailorData2[0]);
3721     kLen=ucol_getSortKey(coll, tailorData2[0], tLen, expColl, 100);
3722     for (j=1; j<3; j++) {
3723         tLen = u_strlen(tailorData2[j]);
3724         rLen = ucol_getSortKey(coll, tailorData2[j], tLen, resColl, 100);
3725         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
3726             log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
3727             for(i = 0; i<rLen; i++) {
3728                 log_err(" %02X", resColl[i]);
3729             }
3730         }
3731     }
3732     ucol_close(coll);
3733
3734     log_verbose("\n\nTailoring test for &z< ae with circumflex:");
3735     ruleLen = u_strlen(rule3);
3736     coll = ucol_openRules(rule3, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3737     tLen = u_strlen(tailorData3[3]);
3738     kLen=ucol_getSortKey(coll, tailorData3[3], tLen, expColl, 100);
3739     log_verbose("\n Test Data[3] :%s  \tlen: %d key: ", aescstrdup(tailorData3[3], tLen), tLen);
3740     for(i = 0; i<kLen; i++) {
3741         log_verbose(" %02X", expColl[i]);
3742     }
3743     for (j=4; j<6; j++) {
3744         tLen = u_strlen(tailorData3[j]);
3745         rLen = ucol_getSortKey(coll, tailorData3[j], tLen, resColl, 100);
3746
3747         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
3748             log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, aescstrdup(tailorData3[j], tLen), tLen);
3749             for(i = 0; i<rLen; i++) {
3750                 log_err(" %02X", resColl[i]);
3751             }
3752         }
3753
3754         log_verbose("\n Test Data[%d] :%s  \tlen: %d key: ", j, aescstrdup(tailorData3[j], tLen), tLen);
3755          for(i = 0; i<rLen; i++) {
3756              log_verbose(" %02X", resColl[i]);
3757          }
3758     }
3759     ucol_close(coll);
3760 }
3761
3762 static void
3763 TestTailor6179(void)
3764 {
3765     UErrorCode status = U_ZERO_ERROR;
3766     int32_t i;
3767     UCollator *coll =NULL;
3768     uint8_t  resColl[100];
3769     int32_t  rLen, tLen, ruleLen;
3770     /* &[last primary ignorable]<< a  &[first primary ignorable]<<b */
3771     static const UChar rule1[]={
3772             0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,
3773             0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x20,0x61,0x20,
3774             0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,0x20,
3775             0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x62,0x20, 0};
3776     /* &[last secondary ignorable]<<< a &[first secondary ignorable]<<<b */
3777     static const UChar rule2[]={
3778             0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,0x64,0x61,
3779             0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x3C,
3780             0x61,0x20,0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,
3781             0x64,0x61,0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,
3782             0x3C,0x3C,0x20,0x62,0};
3783
3784     static const UChar tData1[][4]={
3785         {0x61, 0},
3786         {0x62, 0},
3787         { 0xFDD0,0x009E, 0}
3788     };
3789     static const UChar tData2[][4]={
3790         {0x61, 0},
3791         {0x62, 0},
3792         { 0xFDD0,0x009E, 0}
3793      };
3794
3795     /*
3796      * These values from FractionalUCA.txt will change,
3797      * and need to be updated here.
3798      * TODO: Make this not check for particular sort keys.
3799      * Instead, test that we get CEs before & after other ignorables; see ticket #6179.
3800      */
3801     static const uint8_t firstPrimaryIgnCE[]={1, 0x83, 1, 5, 0};
3802     static const uint8_t lastPrimaryIgnCE[]={1, 0xFC, 1, 5, 0};
3803     static const uint8_t firstSecondaryIgnCE[]={1, 1, 0xfe, 0};
3804     static const uint8_t lastSecondaryIgnCE[]={1, 1, 0xff, 0};
3805
3806     UParseError parseError;
3807
3808     /* Test [Last Primary ignorable] */
3809
3810     log_verbose("Tailoring test: &[last primary ignorable]<<a  &[first primary ignorable]<<b\n");
3811     ruleLen = u_strlen(rule1);
3812     coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3813     if (U_FAILURE(status)) {
3814         log_err_status(status, "Tailoring test: &[last primary ignorable] failed! -> %s\n", u_errorName(status));
3815         return;
3816     }
3817     tLen = u_strlen(tData1[0]);
3818     rLen = ucol_getSortKey(coll, tData1[0], tLen, resColl, 100);
3819     if (rLen != UPRV_LENGTHOF(lastPrimaryIgnCE) || uprv_memcmp(resColl, lastPrimaryIgnCE, rLen) != 0) {
3820         log_err("Bad result for &[lpi]<<a...: Data[%d] :%s  \tlen: %d key: ", 0, tData1[0], rLen);
3821         for(i = 0; i<rLen; i++) {
3822             log_err(" %02X", resColl[i]);
3823         }
3824         log_err("\n");
3825     }
3826     tLen = u_strlen(tData1[1]);
3827     rLen = ucol_getSortKey(coll, tData1[1], tLen, resColl, 100);
3828     if (rLen != UPRV_LENGTHOF(firstPrimaryIgnCE) || uprv_memcmp(resColl, firstPrimaryIgnCE, rLen) != 0) {
3829         log_err("Bad result for &[lpi]<<a...: Data[%d] :%s  \tlen: %d key: ", 1, tData1[1], rLen);
3830         for(i = 0; i<rLen; i++) {
3831             log_err(" %02X", resColl[i]);
3832         }
3833         log_err("\n");
3834     }
3835     ucol_close(coll);
3836
3837
3838     /* Test [Last Secondary ignorable] */
3839     log_verbose("Tailoring test: &[last secondary ignorable]<<<a  &[first secondary ignorable]<<<b\n");
3840     ruleLen = u_strlen(rule2);
3841     coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, &parseError, &status);
3842     if (U_FAILURE(status)) {
3843         log_err("Tailoring test: &[last secondary ignorable] failed! -> %s\n", u_errorName(status));
3844         log_info("  offset=%d  \"%s\" | \"%s\"\n",
3845                  parseError.offset, aescstrdup(parseError.preContext, -1), aescstrdup(parseError.postContext, -1));
3846         return;
3847     }
3848     tLen = u_strlen(tData2[0]);
3849     rLen = ucol_getSortKey(coll, tData2[0], tLen, resColl, 100);
3850     if (rLen != UPRV_LENGTHOF(lastSecondaryIgnCE) || uprv_memcmp(resColl, lastSecondaryIgnCE, rLen) != 0) {
3851         log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s  \tlen: %d key: ", 0, tData2[0], rLen);
3852         for(i = 0; i<rLen; i++) {
3853             log_err(" %02X", resColl[i]);
3854         }
3855         log_err("\n");
3856     }
3857     tLen = u_strlen(tData2[1]);
3858     rLen = ucol_getSortKey(coll, tData2[1], tLen, resColl, 100);
3859     if (rLen != UPRV_LENGTHOF(firstSecondaryIgnCE) || uprv_memcmp(resColl, firstSecondaryIgnCE, rLen) != 0) {
3860       log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s  \tlen: %d key: ", 1, tData2[1], rLen);
3861       for(i = 0; i<rLen; i++) {
3862         log_err(" %02X", resColl[i]);
3863       }
3864       log_err("\n");
3865     }
3866     ucol_close(coll);
3867 }
3868
3869 static void
3870 TestUCAPrecontext(void)
3871 {
3872     UErrorCode status = U_ZERO_ERROR;
3873     int32_t i, j;
3874     UCollator *coll =NULL;
3875     uint8_t  resColl[100], prevColl[100];
3876     int32_t  rLen, tLen, ruleLen;
3877     UChar rule1[256]= {0x26, 0xb7, 0x3c, 0x61, 0}; /* & middle-dot < a */
3878     UChar rule2[256]= {0x26, 0x4C, 0xb7, 0x3c, 0x3c, 0x61, 0};
3879     /* & l middle-dot << a  a is an expansion. */
3880
3881     UChar tData1[][20]={
3882             { 0xb7, 0},  /* standalone middle dot(0xb7) */
3883             { 0x387, 0}, /* standalone middle dot(0x387) */
3884             { 0x61, 0},  /* a */
3885             { 0x6C, 0},  /* l */
3886             { 0x4C, 0x0332, 0},  /* l with [first primary ignorable] */
3887             { 0x6C, 0xb7, 0},  /* l with middle dot(0xb7) */
3888             { 0x6C, 0x387, 0}, /* l with middle dot(0x387) */
3889             { 0x4C, 0xb7, 0},  /* L with middle dot(0xb7) */
3890             { 0x4C, 0x387, 0}, /* L with middle dot(0x387) */
3891             { 0x6C, 0x61, 0x387, 0}, /* la  with middle dot(0x387) */
3892             { 0x4C, 0x61, 0xb7, 0},  /* La with middle dot(0xb7) */
3893      };
3894
3895     log_verbose("\n\nEN collation:");
3896     coll = ucol_open("en", &status);
3897     if (U_FAILURE(status)) {
3898         log_err_status(status, "Tailoring test: &z <<a|- failed! -> %s\n", u_errorName(status));
3899         return;
3900     }
3901     for (j=0; j<11; j++) {
3902         tLen = u_strlen(tData1[j]);
3903         rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3904         if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
3905             log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3906                     j, tData1[j]);
3907         }
3908         log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
3909         for(i = 0; i<rLen; i++) {
3910             log_verbose(" %02X", resColl[i]);
3911         }
3912         uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3913      }
3914      ucol_close(coll);
3915
3916
3917      log_verbose("\n\nJA collation:");
3918      coll = ucol_open("ja", &status);
3919      if (U_FAILURE(status)) {
3920          log_err("Tailoring test: &z <<a|- failed!");
3921          return;
3922      }
3923      for (j=0; j<11; j++) {
3924          tLen = u_strlen(tData1[j]);
3925          rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3926          if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
3927              log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3928                      j, tData1[j]);
3929          }
3930          log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
3931          for(i = 0; i<rLen; i++) {
3932              log_verbose(" %02X", resColl[i]);
3933          }
3934          uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3935       }
3936       ucol_close(coll);
3937
3938
3939       log_verbose("\n\nTailoring test: & middle dot < a ");
3940       ruleLen = u_strlen(rule1);
3941       coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3942       if (U_FAILURE(status)) {
3943           log_err("Tailoring test: & middle dot < a failed!");
3944           return;
3945       }
3946       for (j=0; j<11; j++) {
3947           tLen = u_strlen(tData1[j]);
3948           rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3949           if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
3950               log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3951                       j, tData1[j]);
3952           }
3953           log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
3954           for(i = 0; i<rLen; i++) {
3955               log_verbose(" %02X", resColl[i]);
3956           }
3957           uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3958        }
3959        ucol_close(coll);
3960
3961
3962        log_verbose("\n\nTailoring test: & l middle-dot << a ");
3963        ruleLen = u_strlen(rule2);
3964        coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3965        if (U_FAILURE(status)) {
3966            log_err("Tailoring test: & l middle-dot << a failed!");
3967            return;
3968        }
3969        for (j=0; j<11; j++) {
3970            tLen = u_strlen(tData1[j]);
3971            rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3972            if ((j>0) && (j!=3) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
3973                log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3974                        j, tData1[j]);
3975            }
3976            if ((j==3)&&(strcmp((char *)resColl, (char *)prevColl)>0)) {
3977                log_err("\n Expecting smaller key than previous test case: Data[%d] :%s.",
3978                        j, tData1[j]);
3979            }
3980            log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
3981            for(i = 0; i<rLen; i++) {
3982                log_verbose(" %02X", resColl[i]);
3983            }
3984            uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3985         }
3986         ucol_close(coll);
3987 }
3988
3989 static void
3990 TestOutOfBuffer5468(void)
3991 {
3992     static const char *test = "\\u4e00";
3993     UChar ustr[256];
3994     int32_t ustr_length = u_unescape(test, ustr, 256);
3995     unsigned char shortKeyBuf[1];
3996     int32_t sortkey_length;
3997     UErrorCode status = U_ZERO_ERROR;
3998     static UCollator *coll = NULL;
3999
4000     coll = ucol_open("root", &status);
4001     if(U_FAILURE(status)) {
4002       log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
4003       return;
4004     }
4005     ucol_setStrength(coll, UCOL_PRIMARY);
4006     ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
4007     ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4008     if (U_FAILURE(status)) {
4009       log_err("Failed setting atributes\n");
4010       return;
4011     }
4012
4013     sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, shortKeyBuf, sizeof(shortKeyBuf));
4014     if (sortkey_length != 4) {
4015         log_err("expecting length of sortKey is 4  got:%d ", sortkey_length);
4016     }
4017     log_verbose("length of sortKey is %d", sortkey_length);
4018     ucol_close(coll);
4019 }
4020
4021 #define TSKC_DATA_SIZE 5
4022 #define TSKC_BUF_SIZE  50
4023 static void
4024 TestSortKeyConsistency(void)
4025 {
4026     UErrorCode icuRC = U_ZERO_ERROR;
4027     UCollator* ucol;
4028     UChar data[] = { 0xFFFD, 0x0006, 0x0006, 0x0006, 0xFFFD};
4029
4030     uint8_t bufFull[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
4031     uint8_t bufPart[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
4032     int32_t i, j, i2;
4033
4034     ucol = ucol_openFromShortString("LEN_S4", FALSE, NULL, &icuRC);
4035     if (U_FAILURE(icuRC))
4036     {
4037         log_err_status(icuRC, "ucol_openFromShortString failed -> %s\n", u_errorName(icuRC));
4038         return;
4039     }
4040
4041     for (i = 0; i < TSKC_DATA_SIZE; i++)
4042     {
4043         UCharIterator uiter;
4044         uint32_t state[2] = { 0, 0 };
4045         int32_t dataLen = i+1;
4046         for (j=0; j<TSKC_BUF_SIZE; j++)
4047             bufFull[i][j] = bufPart[i][j] = 0;
4048
4049         /* Full sort key */
4050         ucol_getSortKey(ucol, data, dataLen, bufFull[i], TSKC_BUF_SIZE);
4051
4052         /* Partial sort key */
4053         uiter_setString(&uiter, data, dataLen);
4054         ucol_nextSortKeyPart(ucol, &uiter, state, bufPart[i], TSKC_BUF_SIZE, &icuRC);
4055         if (U_FAILURE(icuRC))
4056         {
4057             log_err("ucol_nextSortKeyPart failed\n");
4058             ucol_close(ucol);
4059             return;
4060         }
4061
4062         for (i2=0; i2<i; i2++)
4063         {
4064             UBool fullMatch = TRUE;
4065             UBool partMatch = TRUE;
4066             for (j=0; j<TSKC_BUF_SIZE; j++)
4067             {
4068                 fullMatch = fullMatch && (bufFull[i][j] != bufFull[i2][j]);
4069                 partMatch = partMatch && (bufPart[i][j] != bufPart[i2][j]);
4070             }
4071             if (fullMatch != partMatch) {
4072                 log_err(fullMatch ? "full key was consistent, but partial key changed\n"
4073                                   : "partial key was consistent, but full key changed\n");
4074                 ucol_close(ucol);
4075                 return;
4076             }
4077         }
4078     }
4079
4080     /*=============================================*/
4081    ucol_close(ucol);
4082 }
4083
4084 /* ticket: 6101 */
4085 static void TestCroatianSortKey(void) {
4086     const char* collString = "LHR_AN_CX_EX_FX_HX_NX_S3";
4087     UErrorCode status = U_ZERO_ERROR;
4088     UCollator *ucol;
4089     UCharIterator iter;
4090
4091     static const UChar text[] = { 0x0044, 0xD81A };
4092
4093     size_t length = UPRV_LENGTHOF(text);
4094
4095     uint8_t textSortKey[32];
4096     size_t lenSortKey = 32;
4097     size_t actualSortKeyLen;
4098     uint32_t uStateInfo[2] = { 0, 0 };
4099
4100     ucol = ucol_openFromShortString(collString, FALSE, NULL, &status);
4101     if (U_FAILURE(status)) {
4102         log_err_status(status, "ucol_openFromShortString error in Craotian test. -> %s\n", u_errorName(status));
4103         return;
4104     }
4105
4106     uiter_setString(&iter, text, length);
4107
4108     actualSortKeyLen = ucol_nextSortKeyPart(
4109         ucol, &iter, (uint32_t*)uStateInfo,
4110         textSortKey, lenSortKey, &status
4111         );
4112
4113     if (actualSortKeyLen == lenSortKey) {
4114         log_err("ucol_nextSortKeyPart did not give correct result in Croatian test.\n");
4115     }
4116
4117     ucol_close(ucol);
4118 }
4119
4120 /* ticket: 6140 */
4121 /* This test ensures that codepoints such as 0x3099 are flagged correctly by the collator since
4122  * they are both Hiragana and Katakana
4123  */
4124 #define SORTKEYLEN 50
4125 static void TestHiragana(void) {
4126     UErrorCode status = U_ZERO_ERROR;
4127     UCollator* ucol;
4128     UCollationResult strcollresult;
4129     UChar data1[] = { 0x3058, 0x30B8 }; /* Hiragana and Katakana letter Zi */
4130     UChar data2[] = { 0x3057, 0x3099, 0x30B7, 0x3099 };
4131     int32_t data1Len = UPRV_LENGTHOF(data1);
4132     int32_t data2Len = UPRV_LENGTHOF(data2);
4133     int32_t i, j;
4134     uint8_t sortKey1[SORTKEYLEN];
4135     uint8_t sortKey2[SORTKEYLEN];
4136
4137     UCharIterator uiter1;
4138     UCharIterator uiter2;
4139     uint32_t state1[2] = { 0, 0 };
4140     uint32_t state2[2] = { 0, 0 };
4141     int32_t keySize1;
4142     int32_t keySize2;
4143
4144     ucol = ucol_openFromShortString("LJA_AN_CX_EX_FX_HO_NX_S4", FALSE, NULL,
4145             &status);
4146     if (U_FAILURE(status)) {
4147         log_err_status(status, "Error status: %s; Unable to open collator from short string.\n", u_errorName(status));
4148         return;
4149     }
4150
4151     /* Start of full sort keys */
4152     /* Full sort key1 */
4153     keySize1 = ucol_getSortKey(ucol, data1, data1Len, sortKey1, SORTKEYLEN);
4154     /* Full sort key2 */
4155     keySize2 = ucol_getSortKey(ucol, data2, data2Len, sortKey2, SORTKEYLEN);
4156     if (keySize1 == keySize2) {
4157         for (i = 0; i < keySize1; i++) {
4158             if (sortKey1[i] != sortKey2[i]) {
4159                 log_err("Full sort keys are different. Should be equal.");
4160             }
4161         }
4162     } else {
4163         log_err("Full sort keys sizes doesn't match: %d %d", keySize1, keySize2);
4164     }
4165     /* End of full sort keys */
4166
4167     /* Start of partial sort keys */
4168     /* Partial sort key1 */
4169     uiter_setString(&uiter1, data1, data1Len);
4170     keySize1 = ucol_nextSortKeyPart(ucol, &uiter1, state1, sortKey1, SORTKEYLEN, &status);
4171     /* Partial sort key2 */
4172     uiter_setString(&uiter2, data2, data2Len);
4173     keySize2 = ucol_nextSortKeyPart(ucol, &uiter2, state2, sortKey2, SORTKEYLEN, &status);
4174     if (U_SUCCESS(status) && keySize1 == keySize2) {
4175         for (j = 0; j < keySize1; j++) {
4176             if (sortKey1[j] != sortKey2[j]) {
4177                 log_err("Partial sort keys are different. Should be equal");
4178             }
4179         }
4180     } else {
4181         log_err("Error Status: %s or Partial sort keys sizes doesn't match: %d %d", u_errorName(status), keySize1, keySize2);
4182     }
4183     /* End of partial sort keys */
4184
4185     /* Start of strcoll */
4186     /* Use ucol_strcoll() to determine ordering */
4187     strcollresult = ucol_strcoll(ucol, data1, data1Len, data2, data2Len);
4188     if (strcollresult != UCOL_EQUAL) {
4189         log_err("Result from ucol_strcoll() should be UCOL_EQUAL.");
4190     }
4191
4192     ucol_close(ucol);
4193 }
4194
4195 /* Convenient struct for running collation tests */
4196 typedef struct {
4197   const UChar source[MAX_TOKEN_LEN];  /* String on left */
4198   const UChar target[MAX_TOKEN_LEN];  /* String on right */
4199   UCollationResult result;            /* -1, 0 or +1, depending on collation */
4200 } OneTestCase;
4201
4202 /*
4203  * Utility function to test one collation test case.
4204  * @param testcases Array of test cases.
4205  * @param n_testcases Size of the array testcases.
4206  * @param str_rules Array of rules.  These rules should be specifying the same rule in different formats.
4207  * @param n_rules Size of the array str_rules.
4208  */
4209 static void doTestOneTestCase(const OneTestCase testcases[],
4210                               int n_testcases,
4211                               const char* str_rules[],
4212                               int n_rules)
4213 {
4214   int rule_no, testcase_no;
4215   UChar rule[500];
4216   int32_t length = 0;
4217   UErrorCode status = U_ZERO_ERROR;
4218   UParseError parse_error;
4219   UCollator  *myCollation;
4220
4221   for (rule_no = 0; rule_no < n_rules; ++rule_no) {
4222
4223     length = u_unescape(str_rules[rule_no], rule, 500);
4224     if (length == 0) {
4225         log_err("ERROR: The rule cannot be unescaped: %s\n");
4226         return;
4227     }
4228     myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
4229     if(U_FAILURE(status)){
4230         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
4231         log_info("  offset=%d  \"%s\" | \"%s\"\n",
4232                  parse_error.offset,
4233                  aescstrdup(parse_error.preContext, -1),
4234                  aescstrdup(parse_error.postContext, -1));
4235         return;
4236     }
4237     log_verbose("Testing the <<* syntax\n");
4238     ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4239     ucol_setStrength(myCollation, UCOL_TERTIARY);
4240     for (testcase_no = 0; testcase_no < n_testcases; ++testcase_no) {
4241       doTest(myCollation,
4242              testcases[testcase_no].source,
4243              testcases[testcase_no].target,
4244              testcases[testcase_no].result
4245              );
4246     }
4247     ucol_close(myCollation);
4248   }
4249 }
4250
4251 const static OneTestCase rangeTestcases[] = {
4252   { {0x0061},                            {0x0062},                          UCOL_LESS }, /* "a" < "b" */
4253   { {0x0062},                            {0x0063},                          UCOL_LESS }, /* "b" < "c" */
4254   { {0x0061},                            {0x0063},                          UCOL_LESS }, /* "a" < "c" */
4255
4256   { {0x0062},                            {0x006b},                          UCOL_LESS }, /* "b" << "k" */
4257   { {0x006b},                            {0x006c},                          UCOL_LESS }, /* "k" << "l" */
4258   { {0x0062},                            {0x006c},                          UCOL_LESS }, /* "b" << "l" */
4259   { {0x0061},                            {0x006c},                          UCOL_LESS }, /* "a" < "l" */
4260   { {0x0061},                            {0x006d},                          UCOL_LESS },  /* "a" < "m" */
4261
4262   { {0x0079},                            {0x006d},                          UCOL_LESS },  /* "y" < "f" */
4263   { {0x0079},                            {0x0067},                          UCOL_LESS },  /* "y" < "g" */
4264   { {0x0061},                            {0x0068},                          UCOL_LESS },  /* "y" < "h" */
4265   { {0x0061},                            {0x0065},                          UCOL_LESS },  /* "g" < "e" */
4266
4267   { {0x0061},                            {0x0031},                          UCOL_EQUAL }, /* "a" = "1" */
4268   { {0x0061},                            {0x0032},                          UCOL_EQUAL }, /* "a" = "2" */
4269   { {0x0061},                            {0x0033},                          UCOL_EQUAL }, /* "a" = "3" */
4270   { {0x0061},                            {0x0066},                          UCOL_LESS }, /* "a" < "f" */
4271   { {0x006c, 0x0061},                    {0x006b, 0x0062},                  UCOL_LESS },  /* "la" < "123" */
4272   { {0x0061, 0x0061, 0x0061},            {0x0031, 0x0032, 0x0033},          UCOL_EQUAL }, /* "aaa" = "123" */
4273   { {0x0062},                            {0x007a},                          UCOL_LESS },  /* "b" < "z" */
4274   { {0x0061, 0x007a, 0x0062},            {0x0032, 0x0079, 0x006d},          UCOL_LESS }, /* "azm" = "2yc" */
4275 };
4276
4277 static int nRangeTestcases = UPRV_LENGTHOF(rangeTestcases);
4278
4279 const static OneTestCase rangeTestcasesSupplemental[] = {
4280   { {0x4e00},                            {0xfffb},                          UCOL_LESS }, /* U+4E00 < U+FFFB */
4281   { {0xfffb},                            {0xd800, 0xdc00},                  UCOL_LESS }, /* U+FFFB < U+10000 */
4282   { {0xd800, 0xdc00},                    {0xd800, 0xdc01},                  UCOL_LESS }, /* U+10000 < U+10001 */
4283   { {0x4e00},                            {0xd800, 0xdc01},                  UCOL_LESS }, /* U+4E00 < U+10001 */
4284   { {0xd800, 0xdc01},                    {0xd800, 0xdc02},                  UCOL_LESS }, /* U+10000 < U+10001 */
4285   { {0xd800, 0xdc01},                    {0xd800, 0xdc02},                  UCOL_LESS }, /* U+10000 < U+10001 */
4286   { {0x4e00},                            {0xd800, 0xdc02},                  UCOL_LESS }, /* U+4E00 < U+10001 */
4287 };
4288
4289 static int nRangeTestcasesSupplemental = UPRV_LENGTHOF(rangeTestcasesSupplemental);
4290
4291 const static OneTestCase rangeTestcasesQwerty[] = {
4292   { {0x0071},                            {0x0077},                          UCOL_LESS }, /* "q" < "w" */
4293   { {0x0077},                            {0x0065},                          UCOL_LESS }, /* "w" < "e" */
4294
4295   { {0x0079},                            {0x0075},                          UCOL_LESS }, /* "y" < "u" */
4296   { {0x0071},                            {0x0075},                          UCOL_LESS }, /* "q" << "u" */
4297
4298   { {0x0074},                            {0x0069},                          UCOL_LESS }, /* "t" << "i" */
4299   { {0x006f},                            {0x0070},                          UCOL_LESS }, /* "o" << "p" */
4300
4301   { {0x0079},                            {0x0065},                          UCOL_LESS },  /* "y" < "e" */
4302   { {0x0069},                            {0x0075},                          UCOL_LESS },  /* "i" < "u" */
4303
4304   { {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},
4305     {0x0077, 0x0065, 0x0072, 0x0065},                                       UCOL_LESS }, /* "quest" < "were" */
4306   { {0x0071, 0x0075, 0x0061, 0x0063, 0x006b},
4307     {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},                               UCOL_LESS }, /* "quack" < "quest" */
4308 };
4309
4310 static int nRangeTestcasesQwerty = UPRV_LENGTHOF(rangeTestcasesQwerty);
4311
4312 static void TestSameStrengthList(void)
4313 {
4314   const char* strRules[] = {
4315     /* Normal */
4316     "&a<b<c<d &b<<k<<l<<m &k<<<x<<<y<<<z  &y<f<g<h<e &a=1=2=3",
4317
4318     /* Lists */
4319     "&a<*bcd &b<<*klm &k<<<*xyz &y<*fghe &a=*123",
4320   };
4321   doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, UPRV_LENGTHOF(strRules));
4322 }
4323
4324 static void TestSameStrengthListQuoted(void)
4325 {
4326   const char* strRules[] = {
4327     /* Lists with quoted characters */
4328     "&\\u0061<*bcd &b<<*klm &k<<<*xyz &y<*f\\u0067\\u0068e &a=*123",
4329     "&'\\u0061'<*bcd &b<<*klm &k<<<*xyz &y<*f'\\u0067\\u0068'e &a=*123",
4330
4331     "&\\u0061<*b\\u0063d &b<<*klm &k<<<*xyz &\\u0079<*fgh\\u0065 &a=*\\u0031\\u0032\\u0033",
4332     "&'\\u0061'<*b'\\u0063'd &b<<*klm &k<<<*xyz &'\\u0079'<*fgh'\\u0065' &a=*'\\u0031\\u0032\\u0033'",
4333
4334     "&\\u0061<*\\u0062c\\u0064 &b<<*klm &k<<<*xyz  &y<*fghe &a=*\\u0031\\u0032\\u0033",
4335     "&'\\u0061'<*'\\u0062'c'\\u0064' &b<<*klm &k<<<*xyz  &y<*fghe &a=*'\\u0031\\u0032\\u0033'",
4336   };
4337   doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, UPRV_LENGTHOF(strRules));
4338 }
4339
4340 static void TestSameStrengthListSupplemental(void)
4341 {
4342   const char* strRules[] = {
4343     "&\\u4e00<\\ufffb<\\U00010000<\\U00010001<\\U00010002",
4344     "&\\u4e00<\\ufffb<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02",
4345     "&\\u4e00<*\\ufffb\\U00010000\\U00010001\\U00010002",
4346     "&\\u4e00<*\\ufffb\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02",
4347   };
4348   doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, UPRV_LENGTHOF(strRules));
4349 }
4350
4351 static void TestSameStrengthListQwerty(void)
4352 {
4353   const char* strRules[] = {
4354     "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d",   /* Normal */
4355     "&q<*wer &w<<*tyu &t<<<*iop &o=*asd",             /* Lists  */
4356     "&\\u0071<\\u0077<\\u0065<\\u0072 &\\u0077<<\\u0074<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<\\u006f<<<\\u0070 &\\u006f=\\u0061=\\u0073=\\u0064",
4357     "&'\\u0071'<\\u0077<\\u0065<\\u0072 &\\u0077<<'\\u0074'<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<'\\u006f'<<<\\u0070 &\\u006f=\\u0061='\\u0073'=\\u0064",
4358     "&\\u0071<*\\u0077\\u0065\\u0072 &\\u0077<<*\\u0074\\u0079\\u0075 &\\u0074<<<*\\u0069\\u006f\\u0070 &\\u006f=*\\u0061\\u0073\\u0064",
4359
4360     /* Quoted characters also will work if two quoted characters are not consecutive.  */
4361     "&\\u0071<*'\\u0077'\\u0065\\u0072 &\\u0077<<*\\u0074'\\u0079'\\u0075 &\\u0074<<<*\\u0069\\u006f'\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",
4362
4363     /* Consecutive quoted charactes do not work, because a '' will be treated as a quote character. */
4364     /* "&\\u0071<*'\\u0077''\\u0065''\\u0072' &\\u0077<<*'\\u0074''\\u0079''\\u0075' &\\u0074<<<*'\\u0069''\\u006f''\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",*/
4365
4366  };
4367   doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, UPRV_LENGTHOF(strRules));
4368 }
4369
4370 static void TestSameStrengthListQuotedQwerty(void)
4371 {
4372   const char* strRules[] = {
4373     "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d",   /* Normal */
4374     "&q<*wer &w<<*tyu &t<<<*iop &o=*asd",             /* Lists  */
4375     "&q<*w'e'r &w<<*'t'yu &t<<<*io'p' &o=*'a's'd'",   /* Lists with quotes */
4376
4377     /* Lists with continuous quotes may not work, because '' will be treated as a quote character. */
4378     /* "&q<*'w''e''r' &w<<*'t''y''u' &t<<<*'i''o''p' &o=*'a''s''d'", */
4379    };
4380   doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, UPRV_LENGTHOF(strRules));
4381 }
4382
4383 static void TestSameStrengthListRanges(void)
4384 {
4385   const char* strRules[] = {
4386     "&a<*b-d &b<<*k-m &k<<<*x-z &y<*f-he &a=*1-3",
4387   };
4388   doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, UPRV_LENGTHOF(strRules));
4389 }
4390
4391 static void TestSameStrengthListSupplementalRanges(void)
4392 {
4393   const char* strRules[] = {
4394     /* Note: U+FFFD..U+FFFF are not tailorable, so a range cannot include them. */
4395     "&\\u4e00<*\\ufffb\\U00010000-\\U00010002",
4396   };
4397   doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, UPRV_LENGTHOF(strRules));
4398 }
4399
4400 static void TestSpecialCharacters(void)
4401 {
4402   const char* strRules[] = {
4403     /* Normal */
4404     "&';'<'+'<','<'-'<'&'<'*'",
4405
4406     /* List */
4407     "&';'<*'+,-&*'",
4408
4409     /* Range */
4410     "&';'<*'+'-'-&*'",
4411   };
4412
4413   const static OneTestCase specialCharacterStrings[] = {
4414     { {0x003b}, {0x002b}, UCOL_LESS },  /* ; < + */
4415     { {0x002b}, {0x002c}, UCOL_LESS },  /* + < , */
4416     { {0x002c}, {0x002d}, UCOL_LESS },  /* , < - */
4417     { {0x002d}, {0x0026}, UCOL_LESS },  /* - < & */
4418   };
4419   doTestOneTestCase(specialCharacterStrings, UPRV_LENGTHOF(specialCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
4420 }
4421
4422 static void TestPrivateUseCharacters(void)
4423 {
4424   const char* strRules[] = {
4425     /* Normal */
4426     "&'\\u5ea7'<'\\uE2D8'<'\\uE2D9'<'\\uE2DA'<'\\uE2DB'<'\\uE2DC'<'\\u4e8d'",
4427     "&\\u5ea7<\\uE2D8<\\uE2D9<\\uE2DA<\\uE2DB<\\uE2DC<\\u4e8d",
4428   };
4429
4430   const static OneTestCase privateUseCharacterStrings[] = {
4431     { {0x5ea7}, {0xe2d8}, UCOL_LESS },
4432     { {0xe2d8}, {0xe2d9}, UCOL_LESS },
4433     { {0xe2d9}, {0xe2da}, UCOL_LESS },
4434     { {0xe2da}, {0xe2db}, UCOL_LESS },
4435     { {0xe2db}, {0xe2dc}, UCOL_LESS },
4436     { {0xe2dc}, {0x4e8d}, UCOL_LESS },
4437   };
4438   doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
4439 }
4440
4441 static void TestPrivateUseCharactersInList(void)
4442 {
4443   const char* strRules[] = {
4444     /* List */
4445     "&'\\u5ea7'<*'\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d'",
4446     /* "&'\\u5ea7'<*\\uE2D8'\\uE2D9\\uE2DA'\\uE2DB'\\uE2DC\\u4e8d'", */
4447     "&\\u5ea7<*\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d",
4448   };
4449
4450   const static OneTestCase privateUseCharacterStrings[] = {
4451     { {0x5ea7}, {0xe2d8}, UCOL_LESS },
4452     { {0xe2d8}, {0xe2d9}, UCOL_LESS },
4453     { {0xe2d9}, {0xe2da}, UCOL_LESS },
4454     { {0xe2da}, {0xe2db}, UCOL_LESS },
4455     { {0xe2db}, {0xe2dc}, UCOL_LESS },
4456     { {0xe2dc}, {0x4e8d}, UCOL_LESS },
4457   };
4458   doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
4459 }
4460
4461 static void TestPrivateUseCharactersInRange(void)
4462 {
4463   const char* strRules[] = {
4464     /* Range */
4465     "&'\\u5ea7'<*'\\uE2D8'-'\\uE2DC\\u4e8d'",
4466     "&\\u5ea7<*\\uE2D8-\\uE2DC\\u4e8d",
4467     /* "&\\u5ea7<\\uE2D8'\\uE2D8'-'\\uE2D9'\\uE2DA-\\uE2DB\\uE2DC\\u4e8d", */
4468   };
4469
4470   const static OneTestCase privateUseCharacterStrings[] = {
4471     { {0x5ea7}, {0xe2d8}, UCOL_LESS },
4472     { {0xe2d8}, {0xe2d9}, UCOL_LESS },
4473     { {0xe2d9}, {0xe2da}, UCOL_LESS },
4474     { {0xe2da}, {0xe2db}, UCOL_LESS },
4475     { {0xe2db}, {0xe2dc}, UCOL_LESS },
4476     { {0xe2dc}, {0x4e8d}, UCOL_LESS },
4477   };
4478   doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
4479 }
4480
4481 static void TestInvalidListsAndRanges(void)
4482 {
4483   const char* invalidRules[] = {
4484     /* Range not in starred expression */
4485     "&\\ufffe<\\uffff-\\U00010002",
4486
4487     /* Range without start */
4488     "&a<*-c",
4489
4490     /* Range without end */
4491     "&a<*b-",
4492
4493     /* More than one hyphen */
4494     "&a<*b-g-l",
4495
4496     /* Range in the wrong order */
4497     "&a<*k-b",
4498
4499   };
4500
4501   UChar rule[500];
4502   UErrorCode status = U_ZERO_ERROR;
4503   UParseError parse_error;
4504   int n_rules = UPRV_LENGTHOF(invalidRules);
4505   int rule_no;
4506   int length;
4507   UCollator  *myCollation;
4508
4509   for (rule_no = 0; rule_no < n_rules; ++rule_no) {
4510
4511     length = u_unescape(invalidRules[rule_no], rule, 500);
4512     if (length == 0) {
4513         log_err("ERROR: The rule cannot be unescaped: %s\n");
4514         return;
4515     }
4516     myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
4517     (void)myCollation;      /* Suppress set but not used warning. */
4518     if(!U_FAILURE(status)){
4519       log_err("ERROR: Could not cause a failure as expected: \n");
4520     }
4521     status = U_ZERO_ERROR;
4522   }
4523 }
4524
4525 /*
4526  * This test ensures that characters placed before a character in a different script have the same lead byte
4527  * in their collation key before and after script reordering.
4528  */
4529 static void TestBeforeRuleWithScriptReordering(void)
4530 {
4531     UParseError error;
4532     UErrorCode status = U_ZERO_ERROR;
4533     UCollator  *myCollation;
4534     char srules[500] = "&[before 1]\\u03b1 < \\u0e01";
4535     UChar rules[500];
4536     uint32_t rulesLength = 0;
4537     int32_t reorderCodes[1] = {USCRIPT_GREEK};
4538     UCollationResult collResult;
4539
4540     uint8_t baseKey[256];
4541     uint32_t baseKeyLength;
4542     uint8_t beforeKey[256];
4543     uint32_t beforeKeyLength;
4544
4545     UChar base[] = { 0x03b1 }; /* base */
4546     int32_t baseLen = UPRV_LENGTHOF(base);
4547
4548     UChar before[] = { 0x0e01 }; /* ko kai */
4549     int32_t beforeLen = UPRV_LENGTHOF(before);
4550
4551     /*UChar *data[] = { before, base };
4552     genericRulesStarter(srules, data, 2);*/
4553
4554     log_verbose("Testing the &[before 1] rule with [reorder grek]\n");
4555
4556     (void)beforeKeyLength;   /* Suppress set but not used warnings. */
4557     (void)baseKeyLength;
4558
4559     /* build collator */
4560     log_verbose("Testing the &[before 1] rule with [scriptReorder grek]\n");
4561
4562     rulesLength = u_unescape(srules, rules, UPRV_LENGTHOF(rules));
4563     myCollation = ucol_openRules(rules, rulesLength, UCOL_ON, UCOL_TERTIARY, &error, &status);
4564     if(U_FAILURE(status)) {
4565         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
4566         return;
4567     }
4568
4569     /* check collation results - before rule applied but not script reordering */
4570     collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
4571     if (collResult != UCOL_GREATER) {
4572         log_err("Collation result not correct before script reordering = %d\n", collResult);
4573     }
4574
4575     /* check the lead byte of the collation keys before script reordering */
4576     baseKeyLength = ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
4577     beforeKeyLength = ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
4578     if (baseKey[0] != beforeKey[0]) {
4579       log_err("Different lead byte for sort keys using before rule and before script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
4580    }
4581
4582     /* reorder the scripts */
4583     ucol_setReorderCodes(myCollation, reorderCodes, 1, &status);
4584     if(U_FAILURE(status)) {
4585         log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
4586         return;
4587     }
4588
4589     /* check collation results - before rule applied and after script reordering */
4590     collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
4591     if (collResult != UCOL_GREATER) {
4592         log_err("Collation result not correct after script reordering = %d\n", collResult);
4593     }
4594
4595     /* check the lead byte of the collation keys after script reordering */
4596     ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
4597     ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
4598     if (baseKey[0] != beforeKey[0]) {
4599         log_err("Different lead byte for sort keys using before fule and after script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
4600     }
4601
4602     ucol_close(myCollation);
4603 }
4604
4605 /*
4606  * Test that in a primary-compressed sort key all bytes except the first one are unchanged under script reordering.
4607  */
4608 static void TestNonLeadBytesDuringCollationReordering(void)
4609 {
4610     UErrorCode status = U_ZERO_ERROR;
4611     UCollator  *myCollation;
4612     int32_t reorderCodes[1] = {USCRIPT_GREEK};
4613
4614     uint8_t baseKey[256];
4615     uint32_t baseKeyLength;
4616     uint8_t reorderKey[256];
4617     uint32_t reorderKeyLength;
4618
4619     UChar testString[] = { 0x03b1, 0x03b2, 0x03b3 };
4620
4621     uint32_t i;
4622
4623
4624     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
4625
4626     /* build collator tertiary */
4627     myCollation = ucol_open("", &status);
4628     ucol_setStrength(myCollation, UCOL_TERTIARY);
4629     if(U_FAILURE(status)) {
4630         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4631         return;
4632     }
4633     baseKeyLength = ucol_getSortKey(myCollation, testString, UPRV_LENGTHOF(testString), baseKey, 256);
4634
4635     ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
4636     if(U_FAILURE(status)) {
4637         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4638         return;
4639     }
4640     reorderKeyLength = ucol_getSortKey(myCollation, testString, UPRV_LENGTHOF(testString), reorderKey, 256);
4641
4642     if (baseKeyLength != reorderKeyLength) {
4643         log_err("Key lengths not the same during reordering.\n");
4644         return;
4645     }
4646
4647     for (i = 1; i < baseKeyLength; i++) {
4648         if (baseKey[i] != reorderKey[i]) {
4649             log_err("Collation key bytes not the same at position %d.\n", i);
4650             return;
4651         }
4652     }
4653     ucol_close(myCollation);
4654
4655     /* build collator quaternary */
4656     myCollation = ucol_open("", &status);
4657     ucol_setStrength(myCollation, UCOL_QUATERNARY);
4658     if(U_FAILURE(status)) {
4659         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4660         return;
4661     }
4662     baseKeyLength = ucol_getSortKey(myCollation, testString, UPRV_LENGTHOF(testString), baseKey, 256);
4663
4664     ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
4665     if(U_FAILURE(status)) {
4666         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4667         return;
4668     }
4669     reorderKeyLength = ucol_getSortKey(myCollation, testString, UPRV_LENGTHOF(testString), reorderKey, 256);
4670
4671     if (baseKeyLength != reorderKeyLength) {
4672         log_err("Key lengths not the same during reordering.\n");
4673         return;
4674     }
4675
4676     for (i = 1; i < baseKeyLength; i++) {
4677         if (baseKey[i] != reorderKey[i]) {
4678             log_err("Collation key bytes not the same at position %d.\n", i);
4679             return;
4680         }
4681     }
4682     ucol_close(myCollation);
4683 }
4684
4685 /*
4686  * Test reordering API.
4687  */
4688 static void TestReorderingAPI(void)
4689 {
4690     UErrorCode status = U_ZERO_ERROR;
4691     UCollator  *myCollation;
4692     int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
4693     int32_t duplicateReorderCodes[] = {USCRIPT_HIRAGANA, USCRIPT_GREEK, UCOL_REORDER_CODE_CURRENCY, USCRIPT_KATAKANA};
4694     int32_t reorderCodesStartingWithDefault[] = {UCOL_REORDER_CODE_DEFAULT, USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
4695     int32_t reorderCodeNone = UCOL_REORDER_CODE_NONE;
4696     UCollationResult collResult;
4697     int32_t retrievedReorderCodesLength;
4698     int32_t retrievedReorderCodes[10];
4699     UChar greekString[] = { 0x03b1 };
4700     UChar punctuationString[] = { 0x203e };
4701     int loopIndex;
4702
4703     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
4704
4705     /* build collator tertiary */
4706     myCollation = ucol_open("", &status);
4707     ucol_setStrength(myCollation, UCOL_TERTIARY);
4708     if(U_FAILURE(status)) {
4709         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4710         return;
4711     }
4712
4713     /* set the reorderding */
4714     ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
4715     if (U_FAILURE(status)) {
4716         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4717         return;
4718     }
4719
4720     /* get the reordering */
4721     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4722     if (status != U_BUFFER_OVERFLOW_ERROR) {
4723         log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
4724         return;
4725     }
4726     status = U_ZERO_ERROR;
4727     if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
4728         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
4729         return;
4730     }
4731     /* now let's really get it */
4732     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
4733     if (U_FAILURE(status)) {
4734         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4735         return;
4736     }
4737     if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
4738         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
4739         return;
4740     }
4741     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4742         if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
4743             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4744             return;
4745         }
4746     }
4747     collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), punctuationString, UPRV_LENGTHOF(punctuationString));
4748     if (collResult != UCOL_LESS) {
4749         log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
4750         return;
4751     }
4752
4753     /* clear the reordering */
4754     ucol_setReorderCodes(myCollation, NULL, 0, &status);
4755     if (U_FAILURE(status)) {
4756         log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
4757         return;
4758     }
4759
4760     /* get the reordering again */
4761     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4762     if (retrievedReorderCodesLength != 0) {
4763         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
4764         return;
4765     }
4766
4767     collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), punctuationString, UPRV_LENGTHOF(punctuationString));
4768     if (collResult != UCOL_GREATER) {
4769         log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
4770         return;
4771     }
4772
4773     /* clear the reordering using [NONE] */
4774     ucol_setReorderCodes(myCollation, &reorderCodeNone, 1, &status);
4775     if (U_FAILURE(status)) {
4776         log_err_status(status, "ERROR: setting reorder codes to [NONE]: %s\n", myErrorName(status));
4777         return;
4778     }
4779
4780     /* get the reordering again */
4781     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4782     if (retrievedReorderCodesLength != 0) {
4783         log_err_status(status,
4784                        "ERROR: [NONE] retrieved reorder codes length was %d but should have been 0\n",
4785                        retrievedReorderCodesLength);
4786         return;
4787     }
4788
4789     /* test for error condition on duplicate reorder codes */
4790     ucol_setReorderCodes(myCollation, duplicateReorderCodes, UPRV_LENGTHOF(duplicateReorderCodes), &status);
4791     if (!U_FAILURE(status)) {
4792         log_err_status(status, "ERROR: setting duplicate reorder codes did not generate a failure\n");
4793         return;
4794     }
4795
4796     status = U_ZERO_ERROR;
4797     /* test for reorder codes after a reset code */
4798     ucol_setReorderCodes(myCollation, reorderCodesStartingWithDefault, UPRV_LENGTHOF(reorderCodesStartingWithDefault), &status);
4799     if (!U_FAILURE(status)) {
4800         log_err_status(status, "ERROR: reorderd code sequence starting with default and having following codes didn't cause an error\n");
4801         return;
4802     }
4803
4804     ucol_close(myCollation);
4805 }
4806
4807 /*
4808  * Test reordering API.
4809  */
4810 static void TestReorderingAPIWithRuleCreatedCollator(void)
4811 {
4812     UErrorCode status = U_ZERO_ERROR;
4813     UCollator  *myCollation;
4814     UChar rules[90];
4815     static const int32_t rulesReorderCodes[2] = {USCRIPT_HAN, USCRIPT_GREEK};
4816     static const int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
4817     static const int32_t onlyDefault[1] = {UCOL_REORDER_CODE_DEFAULT};
4818     UCollationResult collResult;
4819     int32_t retrievedReorderCodesLength;
4820     int32_t retrievedReorderCodes[10];
4821     static const UChar greekString[] = { 0x03b1 };
4822     static const UChar punctuationString[] = { 0x203e };
4823     static const UChar hanString[] = { 0x65E5, 0x672C };
4824     int loopIndex;
4825
4826     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
4827
4828     /* build collator from rules */
4829     u_uastrcpy(rules, "[reorder Hani Grek]");
4830     myCollation = ucol_openRules(rules, u_strlen(rules), UCOL_DEFAULT, UCOL_TERTIARY, NULL, &status);
4831     if(U_FAILURE(status)) {
4832         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4833         return;
4834     }
4835
4836     /* get the reordering */
4837     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
4838     if (U_FAILURE(status)) {
4839         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4840         return;
4841     }
4842     if (retrievedReorderCodesLength != UPRV_LENGTHOF(rulesReorderCodes)) {
4843         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(rulesReorderCodes));
4844         return;
4845     }
4846     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4847         if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) {
4848             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4849             return;
4850         }
4851     }
4852     collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), hanString, UPRV_LENGTHOF(hanString));
4853     if (collResult != UCOL_GREATER) {
4854         log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
4855         return;
4856     }
4857
4858     /* set the reordering */
4859     ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
4860     if (U_FAILURE(status)) {
4861         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4862         return;
4863     }
4864
4865     /* get the reordering */
4866     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4867     if (status != U_BUFFER_OVERFLOW_ERROR) {
4868         log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
4869         return;
4870     }
4871     status = U_ZERO_ERROR;
4872     if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
4873         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
4874         return;
4875     }
4876     /* now let's really get it */
4877     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
4878     if (U_FAILURE(status)) {
4879         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4880         return;
4881     }
4882     if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
4883         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
4884         return;
4885     }
4886     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4887         if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
4888             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4889             return;
4890         }
4891     }
4892     collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), punctuationString, UPRV_LENGTHOF(punctuationString));
4893     if (collResult != UCOL_LESS) {
4894         log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
4895         return;
4896     }
4897
4898     /* clear the reordering */
4899     ucol_setReorderCodes(myCollation, NULL, 0, &status);
4900     if (U_FAILURE(status)) {
4901         log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
4902         return;
4903     }
4904
4905     /* get the reordering again */
4906     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4907     if (retrievedReorderCodesLength != 0) {
4908         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
4909         return;
4910     }
4911
4912     collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), punctuationString, UPRV_LENGTHOF(punctuationString));
4913     if (collResult != UCOL_GREATER) {
4914         log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
4915         return;
4916     }
4917
4918     /* reset the reordering */
4919     ucol_setReorderCodes(myCollation, onlyDefault, 1, &status);
4920     if (U_FAILURE(status)) {
4921         log_err_status(status, "ERROR: setting reorder codes to {default}: %s\n", myErrorName(status));
4922         return;
4923     }
4924     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
4925     if (U_FAILURE(status)) {
4926         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4927         return;
4928     }
4929     if (retrievedReorderCodesLength != UPRV_LENGTHOF(rulesReorderCodes)) {
4930         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(rulesReorderCodes));
4931         return;
4932     }
4933     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4934         if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) {
4935             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4936             return;
4937         }
4938     }
4939
4940     ucol_close(myCollation);
4941 }
4942
4943 static UBool containsExpectedScript(const int32_t scripts[], int32_t length, int32_t expectedScript) {
4944     int32_t i;
4945     for (i = 0; i < length; ++i) {
4946         if (expectedScript == scripts[i]) { return TRUE; }
4947     }
4948     return FALSE;
4949 }
4950
4951 static void TestEquivalentReorderingScripts(void) {
4952     // Beginning with ICU 55, collation reordering moves single scripts
4953     // rather than groups of scripts,
4954     // except where scripts share a range and sort primary-equal.
4955     UErrorCode status = U_ZERO_ERROR;
4956     int32_t equivalentScripts[100];
4957     int32_t length;
4958     int i;
4959     int32_t prevScript;
4960     /* These scripts are expected to be equivalent. */
4961     static const int32_t expectedScripts[] = {
4962         USCRIPT_HIRAGANA,
4963         USCRIPT_KATAKANA,
4964         USCRIPT_KATAKANA_OR_HIRAGANA
4965     };
4966
4967     equivalentScripts[0] = 0;
4968     length = ucol_getEquivalentReorderCodes(
4969             USCRIPT_GOTHIC, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
4970     if (U_FAILURE(status)) {
4971         log_err_status(status, "ERROR/Gothic: retrieving equivalent reorder codes: %s\n", myErrorName(status));
4972         return;
4973     }
4974     if (length != 1 || equivalentScripts[0] != USCRIPT_GOTHIC) {
4975         log_err("ERROR/Gothic: retrieved equivalent scripts wrong: "
4976                 "length expected 1, was = %d; expected [%d] was [%d]\n",
4977                 length, USCRIPT_GOTHIC, equivalentScripts[0]);
4978     }
4979
4980     length = ucol_getEquivalentReorderCodes(
4981             USCRIPT_HIRAGANA, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
4982     if (U_FAILURE(status)) {
4983         log_err_status(status, "ERROR/Hiragana: retrieving equivalent reorder codes: %s\n", myErrorName(status));
4984         return;
4985     }
4986     if (length != UPRV_LENGTHOF(expectedScripts)) {
4987         log_err("ERROR/Hiragana: retrieved equivalent script length wrong: "
4988                 "expected %d, was = %d\n",
4989                 UPRV_LENGTHOF(expectedScripts), length);
4990     }
4991     prevScript = -1;
4992     for (i = 0; i < length; ++i) {
4993         int32_t script = equivalentScripts[i];
4994         if (script <= prevScript) {
4995             log_err("ERROR/Hiragana: equivalent scripts out of order at index %d\n", i);
4996         }
4997         prevScript = script;
4998     }
4999     for (i = 0; i < UPRV_LENGTHOF(expectedScripts); i++) {
5000         if (!containsExpectedScript(equivalentScripts, length, expectedScripts[i])) {
5001             log_err("ERROR/Hiragana: equivalent scripts do not contain %d\n",
5002                     expectedScripts[i]);
5003         }
5004     }
5005
5006     length = ucol_getEquivalentReorderCodes(
5007             USCRIPT_KATAKANA, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5008     if (U_FAILURE(status)) {
5009         log_err_status(status, "ERROR/Katakana: retrieving equivalent reorder codes: %s\n", myErrorName(status));
5010         return;
5011     }
5012     if (length != UPRV_LENGTHOF(expectedScripts)) {
5013         log_err("ERROR/Katakana: retrieved equivalent script length wrong: "
5014                 "expected %d, was = %d\n",
5015                 UPRV_LENGTHOF(expectedScripts), length);
5016     }
5017     for (i = 0; i < UPRV_LENGTHOF(expectedScripts); i++) {
5018         if (!containsExpectedScript(equivalentScripts, length, expectedScripts[i])) {
5019             log_err("ERROR/Katakana: equivalent scripts do not contain %d\n",
5020                     expectedScripts[i]);
5021         }
5022     }
5023
5024     length = ucol_getEquivalentReorderCodes(
5025             USCRIPT_KATAKANA_OR_HIRAGANA, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5026     if (U_FAILURE(status) || length != UPRV_LENGTHOF(expectedScripts)) {
5027         log_err("ERROR/Hrkt: retrieved equivalent script length wrong: "
5028                 "expected %d, was = %d\n",
5029                 UPRV_LENGTHOF(expectedScripts), length);
5030     }
5031
5032     length = ucol_getEquivalentReorderCodes(
5033             USCRIPT_HAN, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5034     if (U_FAILURE(status) || length != 3) {
5035         log_err("ERROR/Hani: retrieved equivalent script length wrong: "
5036                 "expected 3, was = %d\n", length);
5037     }
5038     length = ucol_getEquivalentReorderCodes(
5039             USCRIPT_SIMPLIFIED_HAN, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5040     if (U_FAILURE(status) || length != 3) {
5041         log_err("ERROR/Hans: retrieved equivalent script length wrong: "
5042                 "expected 3, was = %d\n", length);
5043     }
5044     length = ucol_getEquivalentReorderCodes(
5045             USCRIPT_TRADITIONAL_HAN, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5046     if (U_FAILURE(status) || length != 3) {
5047         log_err("ERROR/Hant: retrieved equivalent script length wrong: "
5048                 "expected 3, was = %d\n", length);
5049     }
5050
5051     length = ucol_getEquivalentReorderCodes(
5052             USCRIPT_MEROITIC_CURSIVE, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5053     if (U_FAILURE(status) || length != 2) {
5054         log_err("ERROR/Merc: retrieved equivalent script length wrong: "
5055                 "expected 2, was = %d\n", length);
5056     }
5057     length = ucol_getEquivalentReorderCodes(
5058             USCRIPT_MEROITIC_HIEROGLYPHS, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5059     if (U_FAILURE(status) || length != 2) {
5060         log_err("ERROR/Mero: retrieved equivalent script length wrong: "
5061                 "expected 2, was = %d\n", length);
5062     }
5063 }
5064
5065 static void TestReorderingAcrossCloning(void)
5066 {
5067     UErrorCode status = U_ZERO_ERROR;
5068     UCollator  *myCollation;
5069     int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
5070     UCollator *clonedCollation;
5071     int32_t retrievedReorderCodesLength;
5072     int32_t retrievedReorderCodes[10];
5073     int loopIndex;
5074
5075     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
5076
5077     /* build collator tertiary */
5078     myCollation = ucol_open("", &status);
5079     ucol_setStrength(myCollation, UCOL_TERTIARY);
5080     if(U_FAILURE(status)) {
5081         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5082         return;
5083     }
5084
5085     /* set the reorderding */
5086     ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
5087     if (U_FAILURE(status)) {
5088         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
5089         return;
5090     }
5091
5092     /* clone the collator */
5093     clonedCollation = ucol_safeClone(myCollation, NULL, NULL, &status);
5094     if (U_FAILURE(status)) {
5095         log_err_status(status, "ERROR: cloning collator: %s\n", myErrorName(status));
5096         return;
5097     }
5098
5099     /* get the reordering */
5100     retrievedReorderCodesLength = ucol_getReorderCodes(clonedCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
5101     if (U_FAILURE(status)) {
5102         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
5103         return;
5104     }
5105     if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
5106         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
5107         return;
5108     }
5109     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
5110         if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
5111             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
5112             return;
5113         }
5114     }
5115
5116     /*uprv_free(buffer);*/
5117     ucol_close(myCollation);
5118     ucol_close(clonedCollation);
5119 }
5120
5121 /*
5122  * Utility function to test one collation reordering test case set.
5123  * @param testcases Array of test cases.
5124  * @param n_testcases Size of the array testcases.
5125  * @param reorderTokens Array of reordering codes.
5126  * @param reorderTokensLen Size of the array reorderTokens.
5127  */
5128 static void doTestOneReorderingAPITestCase(const OneTestCase testCases[], uint32_t testCasesLen, const int32_t reorderTokens[], int32_t reorderTokensLen)
5129 {
5130     uint32_t testCaseNum;
5131     UErrorCode status = U_ZERO_ERROR;
5132     UCollator  *myCollation;
5133
5134     myCollation = ucol_open("", &status);
5135     if (U_FAILURE(status)) {
5136         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5137         return;
5138     }
5139     ucol_setReorderCodes(myCollation, reorderTokens, reorderTokensLen, &status);
5140     if(U_FAILURE(status)) {
5141         log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
5142         return;
5143     }
5144
5145     for (testCaseNum = 0; testCaseNum < testCasesLen; ++testCaseNum) {
5146         doTest(myCollation,
5147             testCases[testCaseNum].source,
5148             testCases[testCaseNum].target,
5149             testCases[testCaseNum].result
5150         );
5151     }
5152     ucol_close(myCollation);
5153 }
5154
5155 static void TestGreekFirstReorder(void)
5156 {
5157     const char* strRules[] = {
5158         "[reorder Grek]"
5159     };
5160
5161     const int32_t apiRules[] = {
5162         USCRIPT_GREEK
5163     };
5164
5165     const static OneTestCase privateUseCharacterStrings[] = {
5166         { {0x0391}, {0x0391}, UCOL_EQUAL },
5167         { {0x0041}, {0x0391}, UCOL_GREATER },
5168         { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_GREATER },
5169         { {0x0060}, {0x0391}, UCOL_LESS },
5170         { {0x0391}, {0xe2dc}, UCOL_LESS },
5171         { {0x0391}, {0x0060}, UCOL_GREATER },
5172     };
5173
5174     /* Test rules creation */
5175     doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
5176
5177     /* Test collation reordering API */
5178     doTestOneReorderingAPITestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), apiRules, UPRV_LENGTHOF(apiRules));
5179 }
5180
5181 static void TestGreekLastReorder(void)
5182 {
5183     const char* strRules[] = {
5184         "[reorder Zzzz Grek]"
5185     };
5186
5187     const int32_t apiRules[] = {
5188         USCRIPT_UNKNOWN, USCRIPT_GREEK
5189     };
5190
5191     const static OneTestCase privateUseCharacterStrings[] = {
5192         { {0x0391}, {0x0391}, UCOL_EQUAL },
5193         { {0x0041}, {0x0391}, UCOL_LESS },
5194         { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_LESS },
5195         { {0x0060}, {0x0391}, UCOL_LESS },
5196         { {0x0391}, {0xe2dc}, UCOL_GREATER },
5197     };
5198
5199     /* Test rules creation */
5200     doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
5201
5202     /* Test collation reordering API */
5203     doTestOneReorderingAPITestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), apiRules, UPRV_LENGTHOF(apiRules));
5204 }
5205
5206 static void TestNonScriptReorder(void)
5207 {
5208     const char* strRules[] = {
5209         "[reorder Grek Symbol DIGIT Latn Punct space Zzzz cURRENCy]"
5210     };
5211
5212     const int32_t apiRules[] = {
5213         USCRIPT_GREEK, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN,
5214         UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SPACE, USCRIPT_UNKNOWN,
5215         UCOL_REORDER_CODE_CURRENCY
5216     };
5217
5218     const static OneTestCase privateUseCharacterStrings[] = {
5219         { {0x0391}, {0x0041}, UCOL_LESS },
5220         { {0x0041}, {0x0391}, UCOL_GREATER },
5221         { {0x0060}, {0x0041}, UCOL_LESS },
5222         { {0x0060}, {0x0391}, UCOL_GREATER },
5223         { {0x0024}, {0x0041}, UCOL_GREATER },
5224     };
5225
5226     /* Test rules creation */
5227     doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
5228
5229     /* Test collation reordering API */
5230     doTestOneReorderingAPITestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), apiRules, UPRV_LENGTHOF(apiRules));
5231 }
5232
5233 static void TestHaniReorder(void)
5234 {
5235     const char* strRules[] = {
5236         "[reorder Hani]"
5237     };
5238     const int32_t apiRules[] = {
5239         USCRIPT_HAN
5240     };
5241
5242     const static OneTestCase privateUseCharacterStrings[] = {
5243         { {0x4e00}, {0x0041}, UCOL_LESS },
5244         { {0x4e00}, {0x0060}, UCOL_GREATER },
5245         { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
5246         { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
5247         { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
5248         { {0xfa27}, {0x0041}, UCOL_LESS },
5249         { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
5250     };
5251
5252     /* Test rules creation */
5253     doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
5254
5255     /* Test collation reordering API */
5256     doTestOneReorderingAPITestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), apiRules, UPRV_LENGTHOF(apiRules));
5257 }
5258
5259 static void TestHaniReorderWithOtherRules(void)
5260 {
5261     const char* strRules[] = {
5262         "[reorder Hani] &b<a"
5263     };
5264     /*const int32_t apiRules[] = {
5265         USCRIPT_HAN
5266     };*/
5267
5268     const static OneTestCase privateUseCharacterStrings[] = {
5269         { {0x4e00}, {0x0041}, UCOL_LESS },
5270         { {0x4e00}, {0x0060}, UCOL_GREATER },
5271         { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
5272         { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
5273         { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
5274         { {0xfa27}, {0x0041}, UCOL_LESS },
5275         { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
5276         { {0x0062}, {0x0061}, UCOL_LESS },
5277     };
5278
5279     /* Test rules creation */
5280     doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
5281 }
5282
5283 static void TestMultipleReorder(void)
5284 {
5285     const char* strRules[] = {
5286         "[reorder Grek Zzzz DIGIT Latn Hani]"
5287     };
5288
5289     const int32_t apiRules[] = {
5290         USCRIPT_GREEK, USCRIPT_UNKNOWN, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN, USCRIPT_HAN
5291     };
5292
5293     const static OneTestCase collationTestCases[] = {
5294         { {0x0391}, {0x0041}, UCOL_LESS},
5295         { {0x0031}, {0x0041}, UCOL_LESS},
5296         { {0x0041}, {0x4e00}, UCOL_LESS},
5297     };
5298
5299     /* Test rules creation */
5300     doTestOneTestCase(collationTestCases, UPRV_LENGTHOF(collationTestCases), strRules, UPRV_LENGTHOF(strRules));
5301
5302     /* Test collation reordering API */
5303     doTestOneReorderingAPITestCase(collationTestCases, UPRV_LENGTHOF(collationTestCases), apiRules, UPRV_LENGTHOF(apiRules));
5304 }
5305
5306 /*
5307  * Test that covers issue reported in ticket 8814
5308  */
5309 static void TestReorderWithNumericCollation(void)
5310 {
5311     UErrorCode status = U_ZERO_ERROR;
5312     UCollator  *myCollation;
5313     UCollator  *myReorderCollation;
5314     int32_t reorderCodes[] = {UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_GREEK,USCRIPT_LATIN, USCRIPT_HEBREW, UCOL_REORDER_CODE_OTHERS};
5315     /* UChar fortyS[] = { 0x0034, 0x0030, 0x0053 };
5316     UChar fortyThreeP[] = { 0x0034, 0x0033, 0x0050 }; */
5317     UChar fortyS[] = { 0x0053 };
5318     UChar fortyThreeP[] = { 0x0050 };
5319     uint8_t fortyS_sortKey[128];
5320     int32_t fortyS_sortKey_Length;
5321     uint8_t fortyThreeP_sortKey[128];
5322     int32_t fortyThreeP_sortKey_Length;
5323     uint8_t fortyS_sortKey_reorder[128];
5324     int32_t fortyS_sortKey_reorder_Length;
5325     uint8_t fortyThreeP_sortKey_reorder[128];
5326     int32_t fortyThreeP_sortKey_reorder_Length;
5327     UCollationResult collResult;
5328     UCollationResult collResultReorder;
5329
5330     log_verbose("Testing reordering with and without numeric collation\n");
5331
5332     /* build collator tertiary with numeric */
5333     myCollation = ucol_open("", &status);
5334     /*
5335     ucol_setStrength(myCollation, UCOL_TERTIARY);
5336     */
5337     ucol_setAttribute(myCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
5338     if(U_FAILURE(status)) {
5339         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5340         return;
5341     }
5342
5343     /* build collator tertiary with numeric and reordering */
5344     myReorderCollation = ucol_open("", &status);
5345     /*
5346     ucol_setStrength(myReorderCollation, UCOL_TERTIARY);
5347     */
5348     ucol_setAttribute(myReorderCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
5349     ucol_setReorderCodes(myReorderCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
5350     if(U_FAILURE(status)) {
5351         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5352         return;
5353     }
5354
5355     fortyS_sortKey_Length = ucol_getSortKey(myCollation, fortyS, UPRV_LENGTHOF(fortyS), fortyS_sortKey, 128);
5356     fortyThreeP_sortKey_Length = ucol_getSortKey(myCollation, fortyThreeP, UPRV_LENGTHOF(fortyThreeP), fortyThreeP_sortKey, 128);
5357     fortyS_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyS, UPRV_LENGTHOF(fortyS), fortyS_sortKey_reorder, 128);
5358     fortyThreeP_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyThreeP, UPRV_LENGTHOF(fortyThreeP), fortyThreeP_sortKey_reorder, 128);
5359
5360     if (fortyS_sortKey_Length < 0 || fortyThreeP_sortKey_Length < 0 || fortyS_sortKey_reorder_Length < 0 || fortyThreeP_sortKey_reorder_Length < 0) {
5361         log_err_status(status, "ERROR: couldn't generate sort keys\n");
5362         return;
5363     }
5364     collResult = ucol_strcoll(myCollation, fortyS, UPRV_LENGTHOF(fortyS), fortyThreeP, UPRV_LENGTHOF(fortyThreeP));
5365     collResultReorder = ucol_strcoll(myReorderCollation, fortyS, UPRV_LENGTHOF(fortyS), fortyThreeP, UPRV_LENGTHOF(fortyThreeP));
5366     /*
5367     fprintf(stderr, "\tcollResult = %x\n", collResult);
5368     fprintf(stderr, "\tcollResultReorder = %x\n", collResultReorder);
5369     fprintf(stderr, "\nfortyS\n");
5370     for (i = 0; i < fortyS_sortKey_Length; i++) {
5371         fprintf(stderr, "%x --- %x\n", fortyS_sortKey[i], fortyS_sortKey_reorder[i]);
5372     }
5373     fprintf(stderr, "\nfortyThreeP\n");
5374     for (i = 0; i < fortyThreeP_sortKey_Length; i++) {
5375         fprintf(stderr, "%x --- %x\n", fortyThreeP_sortKey[i], fortyThreeP_sortKey_reorder[i]);
5376     }
5377     */
5378     if (collResult != collResultReorder) {
5379         log_err_status(status, "ERROR: collation results should have been the same.\n");
5380         return;
5381     }
5382
5383     ucol_close(myCollation);
5384     ucol_close(myReorderCollation);
5385 }
5386
5387 static int compare_uint8_t_arrays(const uint8_t* a, const uint8_t* b)
5388 {
5389   for (; *a == *b; ++a, ++b) {
5390     if (*a == 0) {
5391       return 0;
5392     }
5393   }
5394   return (*a < *b ? -1 : 1);
5395 }
5396
5397 static void TestImportRulesDeWithPhonebook(void)
5398 {
5399   const char* normalRules[] = {
5400     "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc",
5401     "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc",
5402     "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc",
5403   };
5404   const OneTestCase normalTests[] = {
5405     { {0x00e6}, {0x00c6}, UCOL_LESS},
5406     { {0x00fc}, {0x00dc}, UCOL_GREATER},
5407   };
5408
5409   const char* importRules[] = {
5410     "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc[import de-u-co-phonebk]",
5411     "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
5412     "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
5413   };
5414   const OneTestCase importTests[] = {
5415     { {0x00e6}, {0x00c6}, UCOL_LESS},
5416     { {0x00fc}, {0x00dc}, UCOL_LESS},
5417   };
5418
5419   doTestOneTestCase(normalTests, UPRV_LENGTHOF(normalTests), normalRules, UPRV_LENGTHOF(normalRules));
5420   doTestOneTestCase(importTests, UPRV_LENGTHOF(importTests), importRules, UPRV_LENGTHOF(importRules));
5421 }
5422
5423 #if 0
5424 static void TestImportRulesFiWithEor(void)
5425 {
5426   /* DUCET. */
5427   const char* defaultRules[] = {
5428     "&a<b",                                    /* Dummy rule. */
5429   };
5430
5431   const OneTestCase defaultTests[] = {
5432     { {0x0110}, {0x00F0}, UCOL_LESS},
5433     { {0x00a3}, {0x00a5}, UCOL_LESS},
5434     { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},
5435   };
5436
5437   /* European Ordering rules: ignore currency characters. */
5438   const char* eorRules[] = {
5439     "[import root-u-co-eor]",
5440   };
5441
5442   const OneTestCase eorTests[] = {
5443     { {0x0110}, {0x00F0}, UCOL_LESS},
5444     { {0x00a3}, {0x00a5}, UCOL_EQUAL},
5445     { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},
5446   };
5447
5448   const char* fiStdRules[] = {
5449     "[import fi-u-co-standard]",
5450   };
5451
5452   const OneTestCase fiStdTests[] = {
5453     { {0x0110}, {0x00F0}, UCOL_GREATER},
5454     { {0x00a3}, {0x00a5}, UCOL_LESS},
5455     { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},
5456   };
5457
5458   /* Both European Ordering Rules and Fi Standard Rules. */
5459   const char* eorFiStdRules[] = {
5460     "[import root-u-co-eor][import fi-u-co-standard]",
5461   };
5462
5463   /* This is essentially same as the one before once fi.txt is updated with import. */
5464   const char* fiEorRules[] = {
5465     "[import fi-u-co-eor]",
5466   };
5467
5468   const OneTestCase fiEorTests[] = {
5469     { {0x0110}, {0x00F0}, UCOL_GREATER},
5470     { {0x00a3}, {0x00a5}, UCOL_EQUAL},
5471     { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},
5472   };
5473
5474   doTestOneTestCase(defaultTests, UPRV_LENGTHOF(defaultTests), defaultRules, UPRV_LENGTHOF(defaultRules));
5475   doTestOneTestCase(eorTests, UPRV_LENGTHOF(eorTests), eorRules, UPRV_LENGTHOF(eorRules));
5476   doTestOneTestCase(fiStdTests, UPRV_LENGTHOF(fiStdTests), fiStdRules, UPRV_LENGTHOF(fiStdRules));
5477   doTestOneTestCase(fiEorTests, UPRV_LENGTHOF(fiEorTests), eorFiStdRules, UPRV_LENGTHOF(eorFiStdRules));
5478
5479   log_knownIssue("8962", NULL);
5480   /* TODO: Fix ICU ticket #8962 by uncommenting the following test after fi.txt is updated with the following rule:
5481         eor{
5482             Sequence{
5483                 "[import root-u-co-eor][import fi-u-co-standard]"
5484             }
5485             Version{"21.0"}
5486         }
5487   */
5488   /* doTestOneTestCase(fiEorTests, UPRV_LENGTHOF(fiEorTests), fiEorRules, UPRV_LENGTHOF(fiEorRules)); */
5489
5490 }
5491 #endif
5492
5493 #if 0
5494 /*
5495  * This test case tests inclusion with the unihan rules, but this cannot be included now, unless
5496  * the resource files are built with -includeUnihanColl option.
5497  * TODO: Uncomment this function and make it work when unihan rules are built by default.
5498  */
5499 static void TestImportRulesCJKWithUnihan(void)
5500 {
5501   /* DUCET. */
5502   const char* defaultRules[] = {
5503     "&a<b",                                    /* Dummy rule. */
5504   };
5505
5506   const OneTestCase defaultTests[] = {
5507     { {0x3402}, {0x4e1e}, UCOL_GREATER},
5508   };
5509
5510   /* European Ordering rules: ignore currency characters. */
5511   const char* unihanRules[] = {
5512     "[import ko-u-co-unihan]",
5513   };
5514
5515   const OneTestCase unihanTests[] = {
5516     { {0x3402}, {0x4e1e}, UCOL_LESS},
5517   };
5518
5519   doTestOneTestCase(defaultTests, UPRV_LENGTHOF(defaultTests), defaultRules, UPRV_LENGTHOF(defaultRules));
5520   doTestOneTestCase(unihanTests, UPRV_LENGTHOF(unihanTests), unihanRules, UPRV_LENGTHOF(unihanRules));
5521
5522 }
5523 #endif
5524
5525 static void TestImport(void)
5526 {
5527     UCollator* vicoll;
5528     UCollator* escoll;
5529     UCollator* viescoll;
5530     UCollator* importviescoll;
5531     UParseError error;
5532     UErrorCode status = U_ZERO_ERROR;
5533     UChar* virules;
5534     int32_t viruleslength;
5535     UChar* esrules;
5536     int32_t esruleslength;
5537     UChar* viesrules;
5538     int32_t viesruleslength;
5539     char srules[500] = "[import vi][import es]";
5540     UChar rules[500];
5541     uint32_t length = 0;
5542     int32_t itemCount;
5543     int32_t i, k;
5544     UChar32 start;
5545     UChar32 end;
5546     UChar str[500];
5547     int32_t strLength;
5548
5549     uint8_t sk1[500];
5550     uint8_t sk2[500];
5551
5552     UBool b;
5553     USet* tailoredSet;
5554     USet* importTailoredSet;
5555
5556
5557     vicoll = ucol_open("vi", &status);
5558     if(U_FAILURE(status)){
5559         log_err_status(status, "ERROR: Call ucol_open(\"vi\", ...): %s\n", myErrorName(status));
5560         return;
5561     }
5562
5563     virules = (UChar*) ucol_getRules(vicoll, &viruleslength);
5564     if(viruleslength == 0) {
5565         log_data_err("missing vi tailoring rule string\n");
5566         ucol_close(vicoll);
5567         return;
5568     }
5569     escoll = ucol_open("es", &status);
5570     esrules = (UChar*) ucol_getRules(escoll, &esruleslength);
5571     viesrules = (UChar*)uprv_malloc((viruleslength+esruleslength+1)*sizeof(UChar*));
5572     viesrules[0] = 0;
5573     u_strcat(viesrules, virules);
5574     u_strcat(viesrules, esrules);
5575     viesruleslength = viruleslength + esruleslength;
5576     viescoll = ucol_openRules(viesrules, viesruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
5577
5578     /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
5579     length = u_unescape(srules, rules, 500);
5580     importviescoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
5581     if(U_FAILURE(status)){
5582         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5583         return;
5584     }
5585
5586     tailoredSet = ucol_getTailoredSet(viescoll, &status);
5587     importTailoredSet = ucol_getTailoredSet(importviescoll, &status);
5588
5589     if(!uset_equals(tailoredSet, importTailoredSet)){
5590         log_err("Tailored sets not equal");
5591     }
5592
5593     uset_close(importTailoredSet);
5594
5595     itemCount = uset_getItemCount(tailoredSet);
5596
5597     for( i = 0; i < itemCount; i++){
5598         strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
5599         if(strLength < 2){
5600             for (; start <= end; start++){
5601                 k = 0;
5602                 U16_APPEND(str, k, 500, start, b);
5603                 (void)b;    /* Suppress set but not used warning. */
5604                 ucol_getSortKey(viescoll, str, 1, sk1, 500);
5605                 ucol_getSortKey(importviescoll, str, 1, sk2, 500);
5606                 if(compare_uint8_t_arrays(sk1, sk2) != 0){
5607                     log_err("Sort key for %s not equal\n", str);
5608                     break;
5609                 }
5610             }
5611         }else{
5612             ucol_getSortKey(viescoll, str, strLength, sk1, 500);
5613             ucol_getSortKey(importviescoll, str, strLength, sk2, 500);
5614             if(compare_uint8_t_arrays(sk1, sk2) != 0){
5615                 log_err("ZZSort key for %s not equal\n", str);
5616                 break;
5617             }
5618
5619         }
5620     }
5621
5622     uset_close(tailoredSet);
5623
5624     uprv_free(viesrules);
5625
5626     ucol_close(vicoll);
5627     ucol_close(escoll);
5628     ucol_close(viescoll);
5629     ucol_close(importviescoll);
5630 }
5631
5632 static void TestImportWithType(void)
5633 {
5634     UCollator* vicoll;
5635     UCollator* decoll;
5636     UCollator* videcoll;
5637     UCollator* importvidecoll;
5638     UParseError error;
5639     UErrorCode status = U_ZERO_ERROR;
5640     const UChar* virules;
5641     int32_t viruleslength;
5642     const UChar* derules;
5643     int32_t deruleslength;
5644     UChar* viderules;
5645     int32_t videruleslength;
5646     const char srules[500] = "[import vi][import de-u-co-phonebk]";
5647     UChar rules[500];
5648     uint32_t length = 0;
5649     int32_t itemCount;
5650     int32_t i, k;
5651     UChar32 start;
5652     UChar32 end;
5653     UChar str[500];
5654     int32_t strLength;
5655
5656     uint8_t sk1[500];
5657     uint8_t sk2[500];
5658
5659     USet* tailoredSet;
5660     USet* importTailoredSet;
5661
5662     vicoll = ucol_open("vi", &status);
5663     if(U_FAILURE(status)){
5664         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5665         return;
5666     }
5667     virules = ucol_getRules(vicoll, &viruleslength);
5668     if(viruleslength == 0) {
5669         log_data_err("missing vi tailoring rule string\n");
5670         ucol_close(vicoll);
5671         return;
5672     }
5673     /* decoll = ucol_open("de@collation=phonebook", &status); */
5674     decoll = ucol_open("de-u-co-phonebk", &status);
5675     if(U_FAILURE(status)){
5676         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5677         return;
5678     }
5679
5680
5681     derules = ucol_getRules(decoll, &deruleslength);
5682     viderules = (UChar*)uprv_malloc((viruleslength+deruleslength+1)*sizeof(UChar*));
5683     viderules[0] = 0;
5684     u_strcat(viderules, virules);
5685     u_strcat(viderules, derules);
5686     videruleslength = viruleslength + deruleslength;
5687     videcoll = ucol_openRules(viderules, videruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
5688
5689     /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
5690     length = u_unescape(srules, rules, 500);
5691     importvidecoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
5692     if(U_FAILURE(status)){
5693         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5694         return;
5695     }
5696
5697     tailoredSet = ucol_getTailoredSet(videcoll, &status);
5698     importTailoredSet = ucol_getTailoredSet(importvidecoll, &status);
5699
5700     if(!uset_equals(tailoredSet, importTailoredSet)){
5701         log_err("Tailored sets not equal");
5702     }
5703
5704     uset_close(importTailoredSet);
5705
5706     itemCount = uset_getItemCount(tailoredSet);
5707
5708     for( i = 0; i < itemCount; i++){
5709         strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
5710         if(strLength < 2){
5711             for (; start <= end; start++){
5712                 k = 0;
5713                 U16_APPEND_UNSAFE(str, k, start);
5714                 ucol_getSortKey(videcoll, str, 1, sk1, 500);
5715                 ucol_getSortKey(importvidecoll, str, 1, sk2, 500);
5716                 if(compare_uint8_t_arrays(sk1, sk2) != 0){
5717                     log_err("Sort key for %s not equal\n", str);
5718                     break;
5719                 }
5720             }
5721         }else{
5722             ucol_getSortKey(videcoll, str, strLength, sk1, 500);
5723             ucol_getSortKey(importvidecoll, str, strLength, sk2, 500);
5724             if(compare_uint8_t_arrays(sk1, sk2) != 0){
5725                 log_err("Sort key for %s not equal\n", str);
5726                 break;
5727             }
5728
5729         }
5730     }
5731
5732     uset_close(tailoredSet);
5733
5734     uprv_free(viderules);
5735
5736     ucol_close(videcoll);
5737     ucol_close(importvidecoll);
5738     ucol_close(vicoll);
5739     ucol_close(decoll);
5740 }
5741
5742 /* 'IV INTERNATIONAL SCIENTIFIC - PRACTICAL CONFERENCE "GEOPOLITICS, GEOECONOMICS AND INTERNATIONAL RELATIONS PROBLEMS" 22-23 June 2010, St. Petersburg, Russia' */
5743 static const UChar longUpperStr1[]= { /* 155 chars */
5744     0x49, 0x56, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C,
5745     0x20, 0x53, 0x43, 0x49, 0x45, 0x4E, 0x54, 0x49, 0x46, 0x49, 0x43, 0x20, 0x2D, 0x20, 0x50, 0x52,
5746     0x41, 0x43, 0x54, 0x49, 0x43, 0x41, 0x4C, 0x20, 0x43, 0x4F, 0x4E, 0x46, 0x45, 0x52, 0x45, 0x4E,
5747     0x43, 0x45, 0x20, 0x22, 0x47, 0x45, 0x4F, 0x50, 0x4F, 0x4C, 0x49, 0x54, 0x49, 0x43, 0x53, 0x2C,
5748     0x20, 0x47, 0x45, 0x4F, 0x45, 0x43, 0x4F, 0x4E, 0x4F, 0x4D, 0x49, 0x43, 0x53, 0x20, 0x41, 0x4E,
5749     0x44, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C, 0x20,
5750     0x52, 0x45, 0x4C, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x53, 0x20, 0x50, 0x52, 0x4F, 0x42, 0x4C, 0x45,
5751     0x4D, 0x53, 0x22, 0x20, 0x32, 0x32, 0x2D, 0x32, 0x33, 0x20, 0x4A, 0x75, 0x6E, 0x65, 0x20, 0x32,
5752     0x30, 0x31, 0x30, 0x2C, 0x20, 0x53, 0x74, 0x2E, 0x20, 0x50, 0x65, 0x74, 0x65, 0x72, 0x73, 0x62,
5753     0x75, 0x72, 0x67, 0x2C, 0x20, 0x52, 0x75, 0x73, 0x73, 0x69, 0x61
5754 };
5755
5756 /* 'BACEDIFOGUHAJEKILOMUNAPE ' with diacritics on vowels, repeated 5 times */
5757 static const UChar longUpperStr2[]= { /* 125 chars, > 128 collation elements */
5758     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5759     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5760     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5761     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5762     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20
5763 };
5764
5765 /* 'ABCDEFGHIJKLMNOPQRSTUVWXYZ ' repeated 12 times */
5766 static const UChar longUpperStr3[]= { /* 324 chars */
5767     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5768     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5769     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5770     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5771     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5772     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5773     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5774     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5775     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5776     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5777     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5778     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20
5779 };
5780
5781 typedef struct {
5782     const UChar * longUpperStrPtr;
5783     int32_t       longUpperStrLen;
5784 } LongUpperStrItem;
5785
5786 /* String pointers must be in reverse collation order of the corresponding strings */
5787 static const LongUpperStrItem longUpperStrItems[] = {
5788     { longUpperStr1, UPRV_LENGTHOF(longUpperStr1) },
5789     { longUpperStr2, UPRV_LENGTHOF(longUpperStr2) },
5790     { longUpperStr3, UPRV_LENGTHOF(longUpperStr3) },
5791     { NULL,          0                           }
5792 };
5793
5794 enum { kCollKeyLenMax = 850 }; /* may change with collation changes */
5795
5796 /* Text fix for #8445; without fix, could have crash due to stack or heap corruption */
5797 static void TestCaseLevelBufferOverflow(void)
5798 {
5799     UErrorCode status = U_ZERO_ERROR;
5800     UCollator * ucol = ucol_open("root", &status);
5801     if ( U_SUCCESS(status) ) {
5802         ucol_setAttribute(ucol, UCOL_CASE_LEVEL, UCOL_ON, &status);
5803         if ( U_SUCCESS(status) ) {
5804             const LongUpperStrItem * itemPtr;
5805             uint8_t sortKeyA[kCollKeyLenMax], sortKeyB[kCollKeyLenMax];
5806             for ( itemPtr = longUpperStrItems; itemPtr->longUpperStrPtr != NULL; itemPtr++ ) {
5807                 int32_t sortKeyLen;
5808                 if (itemPtr > longUpperStrItems) {
5809                     uprv_strcpy((char *)sortKeyB, (char *)sortKeyA);
5810                 }
5811                 sortKeyLen = ucol_getSortKey(ucol, itemPtr->longUpperStrPtr, itemPtr->longUpperStrLen, sortKeyA, kCollKeyLenMax);
5812                 if (sortKeyLen <= 0 || sortKeyLen > kCollKeyLenMax) {
5813                     log_err("ERROR sort key length from ucol_getSortKey is %d\n", sortKeyLen);
5814                     break;
5815                 }
5816                 if ( itemPtr > longUpperStrItems ) {
5817                     int compareResult = uprv_strcmp((char *)sortKeyA, (char *)sortKeyB);
5818                     if (compareResult >= 0) {
5819                         log_err("ERROR in sort key comparison result, expected -1, got %d\n", compareResult);
5820                     }
5821                 }
5822             }
5823         } else {
5824             log_err_status(status, "ERROR in ucol_setAttribute UCOL_CASE_LEVEL on: %s\n", myErrorName(status));
5825         }
5826         ucol_close(ucol);
5827     } else {
5828         log_err_status(status, "ERROR in ucol_open for root: %s\n", myErrorName(status));
5829     }
5830 }
5831
5832 /* Test for #10595 */
5833 static const UChar testJapaneseName[] = {0x4F50, 0x3005, 0x6728, 0x002C, 0x6B66, 0}; /* Sa sa Ki, Takeshi */
5834 #define KEY_PART_SIZE 16
5835
5836 static void TestNextSortKeyPartJaIdentical(void)
5837 {
5838     UErrorCode status = U_ZERO_ERROR;
5839     UCollator *coll;
5840     uint8_t keyPart[KEY_PART_SIZE];
5841     UCharIterator iter;
5842     uint32_t state[2] = {0, 0};
5843     int32_t keyPartLen;
5844
5845     coll = ucol_open("ja", &status);
5846     ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
5847     if (U_FAILURE(status)) {
5848         log_err_status(status, "ERROR: in creation of Japanese collator with identical strength: %s\n", myErrorName(status));
5849         return;
5850     }
5851
5852     uiter_setString(&iter, testJapaneseName, 5);
5853     keyPartLen = KEY_PART_SIZE;
5854     while (keyPartLen == KEY_PART_SIZE) {
5855         keyPartLen = ucol_nextSortKeyPart(coll, &iter, state, keyPart, KEY_PART_SIZE, &status);
5856         if (U_FAILURE(status)) {
5857             log_err_status(status, "ERROR: in iterating next sort key part: %s\n", myErrorName(status));
5858             break;
5859         }
5860     }
5861
5862     ucol_close(coll);
5863 }
5864
5865 #define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x)
5866
5867 void addMiscCollTest(TestNode** root)
5868 {
5869     TEST(TestRuleOptions);
5870     TEST(TestBeforePrefixFailure);
5871     TEST(TestContractionClosure);
5872     TEST(TestPrefixCompose);
5873     TEST(TestStrCollIdenticalPrefix);
5874     TEST(TestPrefix);
5875     TEST(TestNewJapanese);
5876     /*TEST(TestLimitations);*/
5877     TEST(TestNonChars);
5878     TEST(TestExtremeCompression);
5879     TEST(TestSurrogates);
5880     TEST(TestVariableTopSetting);
5881     TEST(TestMaxVariable);
5882     TEST(TestBocsuCoverage);
5883     TEST(TestCyrillicTailoring);
5884     TEST(TestCase);
5885     TEST(IncompleteCntTest);
5886     TEST(BlackBirdTest);
5887     TEST(FunkyATest);
5888     TEST(BillFairmanTest);
5889     TEST(TestChMove);
5890     TEST(TestImplicitTailoring);
5891     TEST(TestFCDProblem);
5892     TEST(TestEmptyRule);
5893     /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */
5894     TEST(TestJ815);
5895     /*TEST(TestJ831);*/ /* we changed lv locale */
5896     TEST(TestBefore);
5897     TEST(TestHangulTailoring);
5898     TEST(TestUCARules);
5899     TEST(TestIncrementalNormalize);
5900     TEST(TestComposeDecompose);
5901     TEST(TestCompressOverlap);
5902     TEST(TestContraction);
5903     TEST(TestExpansion);
5904     /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys */
5905     /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported */
5906     TEST(TestOptimize);
5907     TEST(TestSuppressContractions);
5908     TEST(Alexis2);
5909     TEST(TestHebrewUCA);
5910     TEST(TestPartialSortKeyTermination);
5911     TEST(TestSettings);
5912     TEST(TestEquals);
5913     TEST(TestJ2726);
5914     TEST(NullRule);
5915     TEST(TestNumericCollation);
5916     TEST(TestTibetanConformance);
5917     TEST(TestPinyinProblem);
5918     TEST(TestSeparateTrees);
5919     TEST(TestBeforePinyin);
5920     TEST(TestBeforeTightening);
5921     /*TEST(TestMoreBefore);*/
5922     TEST(TestTailorNULL);
5923     TEST(TestUpperFirstQuaternary);
5924     TEST(TestJ4960);
5925     TEST(TestJ5223);
5926     TEST(TestJ5232);
5927     TEST(TestJ5367);
5928     TEST(TestHiragana);
5929     TEST(TestSortKeyConsistency);
5930     TEST(TestVI5913);  /* VI, RO tailored rules */
5931     TEST(TestCroatianSortKey);
5932     TEST(TestTailor6179);
5933     TEST(TestUCAPrecontext);
5934     TEST(TestOutOfBuffer5468);
5935     TEST(TestSameStrengthList);
5936
5937     TEST(TestSameStrengthListQuoted);
5938     TEST(TestSameStrengthListSupplemental);
5939     TEST(TestSameStrengthListQwerty);
5940     TEST(TestSameStrengthListQuotedQwerty);
5941     TEST(TestSameStrengthListRanges);
5942     TEST(TestSameStrengthListSupplementalRanges);
5943     TEST(TestSpecialCharacters);
5944     TEST(TestPrivateUseCharacters);
5945     TEST(TestPrivateUseCharactersInList);
5946     TEST(TestPrivateUseCharactersInRange);
5947     TEST(TestInvalidListsAndRanges);
5948     TEST(TestImportRulesDeWithPhonebook);
5949     /* TEST(TestImportRulesFiWithEor); EOR rules removed from CLDR 21 */
5950     /* TEST(TestImportRulesCJKWithUnihan); */
5951     TEST(TestImport);
5952     TEST(TestImportWithType);
5953
5954     TEST(TestBeforeRuleWithScriptReordering);
5955     TEST(TestNonLeadBytesDuringCollationReordering);
5956     TEST(TestReorderingAPI);
5957     TEST(TestReorderingAPIWithRuleCreatedCollator);
5958     TEST(TestEquivalentReorderingScripts);
5959     TEST(TestGreekFirstReorder);
5960     TEST(TestGreekLastReorder);
5961     TEST(TestNonScriptReorder);
5962     TEST(TestHaniReorder);
5963     TEST(TestHaniReorderWithOtherRules);
5964     TEST(TestMultipleReorder);
5965     TEST(TestReorderingAcrossCloning);
5966     TEST(TestReorderWithNumericCollation);
5967
5968     TEST(TestCaseLevelBufferOverflow);
5969     TEST(TestNextSortKeyPartJaIdentical);
5970 }
5971
5972 #endif /* #if !UCONFIG_NO_COLLATION */