icuSources/test/cintltst/cmsccoll.c

   1 /********************************************************************
   2  * COPYRIGHT:
   3  * Copyright (c) 2001-2006, International Business Machines Corporation and
   4  * others. All Rights Reserved.
   5  ********************************************************************/
   6 /*******************************************************************************
   7 *
   8 * File cmsccoll.C
   9 *
  10 *******************************************************************************/
  11 /**
  12  * These are the tests specific to ICU 1.8 and above, that I didn't know where
  13  * to fit.
  14  */
  15
  16 #include <stdio.h>
  17
  18 #include "unicode/utypes.h"
  19
  20 #if !UCONFIG_NO_COLLATION
  21
  22 #include "unicode/ucol.h"
  23 #include "unicode/ucoleitr.h"
  24 #include "unicode/uloc.h"
  25 #include "cintltst.h"
  26 #include "ccolltst.h"
  27 #include "callcoll.h"
  28 #include "unicode/ustring.h"
  29 #include "string.h"
  30 #include "ucol_imp.h"
  31 #include "ucol_tok.h"
  32 #include "cmemory.h"
  33 #include "cstring.h"
  34 #include "uassert.h"
  35 #include "unicode/parseerr.h"
  36 #include "unicode/ucnv.h"
  37 #include "uparse.h"
  38
  39 #define LEN(a) (sizeof(a)/sizeof(a[0]))
  40
  41 #define MAX_TOKEN_LEN 16
  42
  43 typedef int tst_strcoll(void *collator, const int object,
  44                         const UChar *source, const int sLen,
  45                         const UChar *target, const int tLen);
  46
  47
  48
  49 const static char cnt1[][10] = {
  50
  51   "AA",
  52   "AC",
  53   "AZ",
  54   "AQ",
  55   "AB",
  56   "ABZ",
  57   "ABQ",
  58   "Z",
  59   "ABC",
  60   "Q",
  61   "B"
  62 };
  63
  64 const static char cnt2[][10] = {
  65   "DA",
  66   "DAD",
  67   "DAZ",
  68   "MAR",
  69   "Z",
  70   "DAVIS",
  71   "MARK",
  72   "DAV",
  73   "DAVI"
  74 };
  75
  76 static void IncompleteCntTest(void)
  77 {
  78   UErrorCode status = U_ZERO_ERROR;
  79   UChar temp[90];
  80   UChar t1[90];
  81   UChar t2[90];
  82
  83   UCollator *coll =  NULL;
  84   uint32_t i = 0, j = 0;
  85   uint32_t size = 0;
  86
  87   u_uastrcpy(temp, " & Z < ABC < Q < B");
  88
  89   coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
  90
  91   if(U_SUCCESS(status)) {
  92     size = sizeof(cnt1)/sizeof(cnt1[0]);
  93     for(i = 0; i < size-1; i++) {
  94       for(j = i+1; j < size; j++) {
  95         UCollationElements *iter;
  96         u_uastrcpy(t1, cnt1[i]);
  97         u_uastrcpy(t2, cnt1[j]);
  98         doTest(coll, t1, t2, UCOL_LESS);
  99         /* synwee : added collation element iterator test */
 100         iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
 101         if (U_FAILURE(status)) {
 102           log_err("Creation of iterator failed\n");
 103           break;
 104         }
 105         backAndForth(iter);
 106         ucol_closeElements(iter);
 107       }
 108     }
 109   }
 110
 111   ucol_close(coll);
 112
 113
 114   u_uastrcpy(temp, " & Z < DAVIS < MARK <DAV");
 115   coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
 116
 117   if(U_SUCCESS(status)) {
 118     size = sizeof(cnt2)/sizeof(cnt2[0]);
 119     for(i = 0; i < size-1; i++) {
 120       for(j = i+1; j < size; j++) {
 121         UCollationElements *iter;
 122         u_uastrcpy(t1, cnt2[i]);
 123         u_uastrcpy(t2, cnt2[j]);
 124         doTest(coll, t1, t2, UCOL_LESS);
 125
 126         /* synwee : added collation element iterator test */
 127         iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
 128         if (U_FAILURE(status)) {
 129           log_err("Creation of iterator failed\n");
 130           break;
 131         }
 132         backAndForth(iter);
 133         ucol_closeElements(iter);
 134       }
 135     }
 136   }
 137
 138   ucol_close(coll);
 139
 140
 141 }
 142
 143 const static char shifted[][20] = {
 144   "black bird",
 145   "black-bird",
 146   "blackbird",
 147   "black Bird",
 148   "black-Bird",
 149   "blackBird",
 150   "black birds",
 151   "black-birds",
 152   "blackbirds"
 153 };
 154
 155 const static UCollationResult shiftedTert[] = {
 156   0,
 157   UCOL_EQUAL,
 158   UCOL_EQUAL,
 159   UCOL_LESS,
 160   UCOL_EQUAL,
 161   UCOL_EQUAL,
 162   UCOL_LESS,
 163   UCOL_EQUAL,
 164   UCOL_EQUAL
 165 };
 166
 167 const static char nonignorable[][20] = {
 168   "black bird",
 169   "black Bird",
 170   "black birds",
 171   "black-bird",
 172   "black-Bird",
 173   "black-birds",
 174   "blackbird",
 175   "blackBird",
 176   "blackbirds"
 177 };
 178
 179 static void BlackBirdTest(void) {
 180   UErrorCode status = U_ZERO_ERROR;
 181   UChar t1[90];
 182   UChar t2[90];
 183
 184   uint32_t i = 0, j = 0;
 185   uint32_t size = 0;
 186   UCollator *coll = ucol_open("en_US", &status);
 187
 188   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
 189   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status);
 190
 191   if(U_SUCCESS(status)) {
 192     size = sizeof(nonignorable)/sizeof(nonignorable[0]);
 193     for(i = 0; i < size-1; i++) {
 194       for(j = i+1; j < size; j++) {
 195         u_uastrcpy(t1, nonignorable[i]);
 196         u_uastrcpy(t2, nonignorable[j]);
 197         doTest(coll, t1, t2, UCOL_LESS);
 198       }
 199     }
 200   }
 201
 202   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
 203   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
 204
 205   if(U_SUCCESS(status)) {
 206     size = sizeof(shifted)/sizeof(shifted[0]);
 207     for(i = 0; i < size-1; i++) {
 208       for(j = i+1; j < size; j++) {
 209         u_uastrcpy(t1, shifted[i]);
 210         u_uastrcpy(t2, shifted[j]);
 211         doTest(coll, t1, t2, UCOL_LESS);
 212       }
 213     }
 214   }
 215
 216   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status);
 217   if(U_SUCCESS(status)) {
 218     size = sizeof(shifted)/sizeof(shifted[0]);
 219     for(i = 1; i < size; i++) {
 220       u_uastrcpy(t1, shifted[i-1]);
 221       u_uastrcpy(t2, shifted[i]);
 222       doTest(coll, t1, t2, shiftedTert[i]);
 223     }
 224   }
 225
 226   ucol_close(coll);
 227 }
 228
 229 const static UChar testSourceCases[][MAX_TOKEN_LEN] = {
 230     {0x0041/*'A'*/, 0x0300, 0x0301, 0x0000},
 231     {0x0041/*'A'*/, 0x0300, 0x0316, 0x0000},
 232     {0x0041/*'A'*/, 0x0300, 0x0000},
 233     {0x00C0, 0x0301, 0x0000},
 234     /* this would work with forced normalization */
 235     {0x00C0, 0x0316, 0x0000}
 236 };
 237
 238 const static UChar testTargetCases[][MAX_TOKEN_LEN] = {
 239     {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
 240     {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000},
 241     {0x00C0, 0},
 242     {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
 243     /* this would work with forced normalization */
 244     {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000}
 245 };
 246
 247 const static UCollationResult results[] = {
 248     UCOL_GREATER,
 249     UCOL_EQUAL,
 250     UCOL_EQUAL,
 251     UCOL_GREATER,
 252     UCOL_EQUAL
 253 };
 254
 255 static void FunkyATest(void)
 256 {
 257
 258     int32_t i;
 259     UErrorCode status = U_ZERO_ERROR;
 260     UCollator  *myCollation;
 261     myCollation = ucol_open("en_US", &status);
 262     if(U_FAILURE(status)){
 263         log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
 264         return;
 265     }
 266     log_verbose("Testing some A letters, for some reason\n");
 267     ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
 268     ucol_setStrength(myCollation, UCOL_TERTIARY);
 269     for (i = 0; i < 4 ; i++)
 270     {
 271         doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
 272     }
 273     ucol_close(myCollation);
 274 }
 275
 276 UColAttributeValue caseFirst[] = {
 277     UCOL_OFF,
 278     UCOL_LOWER_FIRST,
 279     UCOL_UPPER_FIRST
 280 };
 281
 282
 283 UColAttributeValue alternateHandling[] = {
 284     UCOL_NON_IGNORABLE,
 285     UCOL_SHIFTED
 286 };
 287
 288 UColAttributeValue caseLevel[] = {
 289     UCOL_OFF,
 290     UCOL_ON
 291 };
 292
 293 UColAttributeValue strengths[] = {
 294     UCOL_PRIMARY,
 295     UCOL_SECONDARY,
 296     UCOL_TERTIARY,
 297     UCOL_QUATERNARY,
 298     UCOL_IDENTICAL
 299 };
 300
 301 #if 0
 302 static const char * strengthsC[] = {
 303     "UCOL_PRIMARY",
 304     "UCOL_SECONDARY",
 305     "UCOL_TERTIARY",
 306     "UCOL_QUATERNARY",
 307     "UCOL_IDENTICAL"
 308 };
 309
 310 static const char * caseFirstC[] = {
 311     "UCOL_OFF",
 312     "UCOL_LOWER_FIRST",
 313     "UCOL_UPPER_FIRST"
 314 };
 315
 316
 317 static const char * alternateHandlingC[] = {
 318     "UCOL_NON_IGNORABLE",
 319     "UCOL_SHIFTED"
 320 };
 321
 322 static const char * caseLevelC[] = {
 323     "UCOL_OFF",
 324     "UCOL_ON"
 325 };
 326
 327 /* not used currently - does not test only prints */
 328 static void PrintMarkDavis(void)
 329 {
 330   UErrorCode status = U_ZERO_ERROR;
 331   UChar m[256];
 332   uint8_t sortkey[256];
 333   UCollator *coll = ucol_open("en_US", &status);
 334   uint32_t h,i,j,k, sortkeysize;
 335   uint32_t sizem = 0;
 336   char buffer[512];
 337   uint32_t len = 512;
 338
 339   log_verbose("PrintMarkDavis");
 340
 341   u_uastrcpy(m, "Mark Davis");
 342   sizem = u_strlen(m);
 343
 344
 345   m[1] = 0xe4;
 346
 347   for(i = 0; i<sizem; i++) {
 348     fprintf(stderr, "\\u%04X ", m[i]);
 349   }
 350   fprintf(stderr, "\n");
 351
 352   for(h = 0; h<sizeof(caseFirst)/sizeof(caseFirst[0]); h++) {
 353     ucol_setAttribute(coll, UCOL_CASE_FIRST, caseFirst[i], &status);
 354     fprintf(stderr, "caseFirst: %s\n", caseFirstC[h]);
 355
 356     for(i = 0; i<sizeof(alternateHandling)/sizeof(alternateHandling[0]); i++) {
 357       ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, alternateHandling[i], &status);
 358       fprintf(stderr, "  AltHandling: %s\n", alternateHandlingC[i]);
 359
 360       for(j = 0; j<sizeof(caseLevel)/sizeof(caseLevel[0]); j++) {
 361         ucol_setAttribute(coll, UCOL_CASE_LEVEL, caseLevel[j], &status);
 362         fprintf(stderr, "    caseLevel: %s\n", caseLevelC[j]);
 363
 364         for(k = 0; k<sizeof(strengths)/sizeof(strengths[0]); k++) {
 365           ucol_setAttribute(coll, UCOL_STRENGTH, strengths[k], &status);
 366           sortkeysize = ucol_getSortKey(coll, m, sizem, sortkey, 256);
 367           fprintf(stderr, "      strength: %s\n      Sortkey: ", strengthsC[k]);
 368           fprintf(stderr, "%s\n", ucol_sortKeyToString(coll, sortkey, buffer, &len));
 369         }
 370
 371       }
 372
 373     }
 374
 375   }
 376 }
 377 #endif
 378
 379 static void BillFairmanTest(void) {
 380 /*
 381 ** check for actual locale via ICU resource bundles
 382 **
 383 ** lp points to the original locale ("fr_FR_....")
 384 */
 385
 386     UResourceBundle *lr,*cr;
 387     UErrorCode              lec = U_ZERO_ERROR;
 388     const char *lp = "fr_FR_you_ll_never_find_this_locale";
 389
 390     log_verbose("BillFairmanTest\n");
 391
 392     lr = ures_open(NULL,lp,&lec);
 393     if (lr) {
 394         cr = ures_getByKey(lr,"collations",0,&lec);
 395         if (cr) {
 396             lp = ures_getLocale(cr,&lec);
 397             if (lp) {
 398                 if (U_SUCCESS(lec)) {
 399                     if(strcmp(lp, "fr") != 0) {
 400                         log_err("Wrong locale for French Collation Data, expected \"fr\" got %s", lp);
 401                     }
 402                 }
 403             }
 404             ures_close(cr);
 405         }
 406         ures_close(lr);
 407     }
 408 }
 409
 410 static void testPrimary(UCollator* col, const UChar* p,const UChar* q){
 411     UChar source[256] = { '\0'};
 412     UChar target[256] = { '\0'};
 413     UChar preP = 0x31a3;
 414     UChar preQ = 0x310d;
 415 /*
 416     UChar preP = (*p>0x0400 && *p<0x0500)?0x00e1:0x491;
 417     UChar preQ = (*p>0x0400 && *p<0x0500)?0x0041:0x413;
 418 */
 419     /*log_verbose("Testing primary\n");*/
 420
 421     doTest(col, p, q, UCOL_LESS);
 422 /*
 423     UCollationResult result = ucol_strcoll(col,p,u_strlen(p),q,u_strlen(q));
 424
 425     if(result!=UCOL_LESS){
 426        aescstrdup(p,utfSource,256);
 427        aescstrdup(q,utfTarget,256);
 428        fprintf(file,"Primary failed  source: %s target: %s \n", utfSource,utfTarget);
 429     }
 430 */
 431     source[0] = preP;
 432     u_strcpy(source+1,p);
 433     target[0] = preQ;
 434     u_strcpy(target+1,q);
 435     doTest(col, source, target, UCOL_LESS);
 436 /*
 437     fprintf(file,"Primary swamps 2nd failed  source: %s target: %s \n", utfSource,utfTarget);
 438 */
 439 }
 440
 441 static void testSecondary(UCollator* col, const UChar* p,const UChar* q){
 442     UChar source[256] = { '\0'};
 443     UChar target[256] = { '\0'};
 444
 445     /*log_verbose("Testing secondary\n");*/
 446
 447     doTest(col, p, q, UCOL_LESS);
 448 /*
 449     fprintf(file,"secondary failed  source: %s target: %s \n", utfSource,utfTarget);
 450 */
 451     source[0] = 0x0053;
 452     u_strcpy(source+1,p);
 453     target[0]= 0x0073;
 454     u_strcpy(target+1,q);
 455
 456     doTest(col, source, target, UCOL_LESS);
 457 /*
 458     fprintf(file,"secondary swamps 3rd failed  source: %s target: %s \n",utfSource,utfTarget);
 459 */
 460
 461
 462     u_strcpy(source,p);
 463     source[u_strlen(p)] = 0x62;
 464     source[u_strlen(p)+1] = 0;
 465
 466
 467     u_strcpy(target,q);
 468     target[u_strlen(q)] = 0x61;
 469     target[u_strlen(q)+1] = 0;
 470
 471     doTest(col, source, target, UCOL_GREATER);
 472
 473 /*
 474     fprintf(file,"secondary is swamped by 1  failed  source: %s target: %s \n",utfSource,utfTarget);
 475 */
 476 }
 477
 478 static void testTertiary(UCollator* col, const UChar* p,const UChar* q){
 479     UChar source[256] = { '\0'};
 480     UChar target[256] = { '\0'};
 481
 482     /*log_verbose("Testing tertiary\n");*/
 483
 484     doTest(col, p, q, UCOL_LESS);
 485 /*
 486     fprintf(file,"Tertiary failed  source: %s target: %s \n",utfSource,utfTarget);
 487 */
 488     source[0] = 0x0020;
 489     u_strcpy(source+1,p);
 490     target[0]= 0x002D;
 491     u_strcpy(target+1,q);
 492
 493     doTest(col, source, target, UCOL_LESS);
 494 /*
 495     fprintf(file,"Tertiary swamps 4th failed  source: %s target: %s \n", utfSource,utfTarget);
 496 */
 497
 498     u_strcpy(source,p);
 499     source[u_strlen(p)] = 0xE0;
 500     source[u_strlen(p)+1] = 0;
 501
 502     u_strcpy(target,q);
 503     target[u_strlen(q)] = 0x61;
 504     target[u_strlen(q)+1] = 0;
 505
 506     doTest(col, source, target, UCOL_GREATER);
 507
 508 /*
 509     fprintf(file,"Tertiary is swamped by 3rd failed  source: %s target: %s \n",utfSource,utfTarget);
 510 */
 511 }
 512
 513 static void testEquality(UCollator* col, const UChar* p,const UChar* q){
 514 /*
 515     UChar source[256] = { '\0'};
 516     UChar target[256] = { '\0'};
 517 */
 518
 519     doTest(col, p, q, UCOL_EQUAL);
 520 /*
 521     fprintf(file,"Primary failed  source: %s target: %s \n", utfSource,utfTarget);
 522 */
 523 }
 524
 525 static void testCollator(UCollator *coll, UErrorCode *status) {
 526   const UChar *rules = NULL, *current = NULL;
 527   int32_t ruleLen = 0;
 528   uint32_t strength = 0;
 529   uint32_t chOffset = 0; uint32_t chLen = 0;
 530   uint32_t exOffset = 0; uint32_t exLen = 0;
 531   uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
 532   uint32_t firstEx = 0;
 533 /*  uint32_t rExpsLen = 0; */
 534   uint32_t firstLen = 0;
 535   UBool varT = FALSE; UBool top_ = TRUE;
 536   uint16_t specs = 0;
 537   UBool startOfRules = TRUE;
 538   UBool lastReset = FALSE;
 539   UBool before = FALSE;
 540   uint32_t beforeStrength = 0;
 541   UColTokenParser src;
 542   UColOptionSet opts;
 543
 544   UChar first[256];
 545   UChar second[256];
 546   UChar tempB[256];
 547   uint32_t tempLen;
 548   UChar *rulesCopy = NULL;
 549   UParseError parseError;
 550   src.opts = &opts;
 551
 552   rules = ucol_getRules(coll, &ruleLen);
 553   if(U_SUCCESS(*status) && ruleLen > 0) {
 554     rulesCopy = (UChar *)malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
 555     uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
 556     src.current = src.source = rulesCopy;
 557     src.end = rulesCopy+ruleLen;
 558     src.extraCurrent = src.end;
 559     src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
 560     *first = *second = 0;
 561
 562     while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, status)) != NULL) {
 563       strength = src.parsedToken.strength;
 564       chOffset = src.parsedToken.charsOffset;
 565       chLen = src.parsedToken.charsLen;
 566       exOffset = src.parsedToken.extensionOffset;
 567       exLen = src.parsedToken.extensionLen;
 568       prefixOffset = src.parsedToken.prefixOffset;
 569       prefixLen = src.parsedToken.prefixLen;
 570       specs = src.parsedToken.flags;
 571
 572       startOfRules = FALSE;
 573       varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
 574       top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
 575       if(top_) { /* if reset is on top, the sequence is broken. We should have an empty string */
 576         second[0] = 0;
 577       } else {
 578         u_strncpy(second,rulesCopy+chOffset, chLen);
 579         second[chLen] = 0;
 580
 581         if(exLen > 0 && firstEx == 0) {
 582           u_strncat(first, rulesCopy+exOffset, exLen);
 583           first[firstLen+exLen] = 0;
 584         }
 585
 586         if(lastReset == TRUE && prefixLen != 0) {
 587           u_strncpy(first+prefixLen, first, firstLen);
 588           u_strncpy(first, rulesCopy+prefixOffset, prefixLen);
 589           first[firstLen+prefixLen] = 0;
 590           firstLen = firstLen+prefixLen;
 591         }
 592
 593         if(before == TRUE) { /* swap first and second */
 594           u_strcpy(tempB, first);
 595           u_strcpy(first, second);
 596           u_strcpy(second, tempB);
 597
 598           tempLen = firstLen;
 599           firstLen = chLen;
 600           chLen = tempLen;
 601
 602           tempLen = firstEx;
 603           firstEx = exLen;
 604           exLen = tempLen;
 605           if(beforeStrength < strength) {
 606             strength = beforeStrength;
 607           }
 608         }
 609       }
 610       lastReset = FALSE;
 611
 612       switch(strength){
 613       case UCOL_IDENTICAL:
 614           testEquality(coll,first,second);
 615           break;
 616       case UCOL_PRIMARY:
 617           testPrimary(coll,first,second);
 618           break;
 619       case UCOL_SECONDARY:
 620           testSecondary(coll,first,second);
 621           break;
 622       case UCOL_TERTIARY:
 623           testTertiary(coll,first,second);
 624           break;
 625       case UCOL_TOK_RESET:
 626         lastReset = TRUE;
 627         before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);
 628         if(before) {
 629           beforeStrength = (specs & UCOL_TOK_BEFORE)-1;
 630         }
 631         break;
 632       default:
 633           break;
 634       }
 635
 636       if(before == TRUE && strength != UCOL_TOK_RESET) { /* first and second were swapped */
 637         before = FALSE;
 638       } else {
 639         firstLen = chLen;
 640         firstEx = exLen;
 641         u_strcpy(first, second);
 642       }
 643     }
 644     free(rulesCopy);
 645   }
 646 }
 647
 648 static int ucaTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) {
 649   UCollator *UCA = (UCollator *)collator;
 650   return ucol_strcoll(UCA, source, sLen, target, tLen);
 651 }
 652
 653 /*
 654 static int winTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) {
 655 #ifdef U_WINDOWS
 656   LCID lcid = (LCID)collator;
 657   return CompareString(lcid, 0, source, sLen, target, tLen);
 658 #else
 659   return 0;
 660 #endif
 661 }
 662 */
 663
 664 static UCollationResult swampEarlier(tst_strcoll* func, void *collator, int opts,
 665                                      UChar s1, UChar s2,
 666                                      const UChar *s, const uint32_t sLen,
 667                                      const UChar *t, const uint32_t tLen) {
 668   UChar source[256] = {0};
 669   UChar target[256] = {0};
 670
 671   source[0] = s1;
 672   u_strcpy(source+1, s);
 673   target[0] = s2;
 674   u_strcpy(target+1, t);
 675
 676   return func(collator, opts, source, sLen+1, target, tLen+1);
 677 }
 678
 679 static UCollationResult swampLater(tst_strcoll* func, void *collator, int opts,
 680                                    UChar s1, UChar s2,
 681                                    const UChar *s, const uint32_t sLen,
 682                                    const UChar *t, const uint32_t tLen) {
 683   UChar source[256] = {0};
 684   UChar target[256] = {0};
 685
 686   u_strcpy(source, s);
 687   source[sLen] = s1;
 688   u_strcpy(target, t);
 689   target[tLen] = s2;
 690
 691   return func(collator, opts, source, sLen+1, target, tLen+1);
 692 }
 693
 694 static uint32_t probeStrength(tst_strcoll* func, void *collator, int opts,
 695                               const UChar *s, const uint32_t sLen,
 696                               const UChar *t, const uint32_t tLen,
 697                               UCollationResult result) {
 698   /*UChar fPrimary = 0x6d;*/
 699   /*UChar sPrimary = 0x6e;*/
 700   UChar fSecondary = 0x310d;
 701   UChar sSecondary = 0x31a3;
 702   UChar fTertiary = 0x310f;
 703   UChar sTertiary = 0x31b7;
 704
 705   UCollationResult oposite;
 706   if(result == UCOL_EQUAL) {
 707     return UCOL_IDENTICAL;
 708   } else if(result == UCOL_GREATER) {
 709     oposite = UCOL_LESS;
 710   } else {
 711     oposite = UCOL_GREATER;
 712   }
 713
 714   if(swampEarlier(func, collator, opts, sSecondary, fSecondary, s, sLen, t, tLen) == result) {
 715     return UCOL_PRIMARY;
 716   } else if((swampEarlier(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == result) &&
 717     (swampEarlier(func, collator, opts, 0x310f, sTertiary, s, sLen, t, tLen) == result)) {
 718     return UCOL_SECONDARY;
 719   } else if((swampLater(func, collator, opts, sTertiary, fTertiary, s, sLen, t, tLen) == result) &&
 720     (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == result)) {
 721     return UCOL_TERTIARY;
 722   } else if((swampLater(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == oposite) &&
 723     (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == oposite)) {
 724     return UCOL_QUATERNARY;
 725   } else {
 726     return UCOL_IDENTICAL;
 727   }
 728 }
 729
 730 static char *getRelationSymbol(UCollationResult res, uint32_t strength, char *buffer) {
 731   uint32_t i = 0;
 732
 733   if(res == UCOL_EQUAL || strength == 0xdeadbeef) {
 734     buffer[0] = '=';
 735     buffer[1] = '=';
 736     buffer[2] = '\0';
 737   } else if(res == UCOL_GREATER) {
 738     for(i = 0; i<strength+1; i++) {
 739       buffer[i] = '>';
 740     }
 741     buffer[strength+1] = '\0';
 742   } else {
 743     for(i = 0; i<strength+1; i++) {
 744       buffer[i] = '<';
 745     }
 746     buffer[strength+1] = '\0';
 747   }
 748
 749   return buffer;
 750 }
 751
 752
 753
 754 static void logFailure (const char *platform, const char *test,
 755                         const UChar *source, const uint32_t sLen,
 756                         const UChar *target, const uint32_t tLen,
 757                         UCollationResult realRes, uint32_t realStrength,
 758                         UCollationResult expRes, uint32_t expStrength, UBool error) {
 759
 760   uint32_t i = 0;
 761
 762   char sEsc[256], s[256], tEsc[256], t[256], b[256], output[512], relation[256];
 763   static int32_t maxOutputLength = 0;
 764   int32_t outputLength;
 765
 766   *sEsc = *tEsc = *s = *t = 0;
 767   if(error == TRUE) {
 768     log_err("Difference between expected and generated order. Run test with -v for more info\n");
 769   } else if(VERBOSITY == 0) {
 770     return;
 771   }
 772   for(i = 0; i<sLen; i++) {
 773     sprintf(b, "%04X", source[i]);
 774     strcat(sEsc, "\\u");
 775     strcat(sEsc, b);
 776     strcat(s, b);
 777     strcat(s, " ");
 778     if(source[i] < 0x80) {
 779       sprintf(b, "(%c)", source[i]);
 780       strcat(sEsc, b);
 781     }
 782   }
 783   for(i = 0; i<tLen; i++) {
 784     sprintf(b, "%04X", target[i]);
 785     strcat(tEsc, "\\u");
 786     strcat(tEsc, b);
 787     strcat(t, b);
 788     strcat(t, " ");
 789     if(target[i] < 0x80) {
 790       sprintf(b, "(%c)", target[i]);
 791       strcat(tEsc, b);
 792     }
 793   }
 794 /*
 795   strcpy(output, "[[ ");
 796   strcat(output, sEsc);
 797   strcat(output, getRelationSymbol(expRes, expStrength, relation));
 798   strcat(output, tEsc);
 799
 800   strcat(output, " : ");
 801
 802   strcat(output, sEsc);
 803   strcat(output, getRelationSymbol(realRes, realStrength, relation));
 804   strcat(output, tEsc);
 805   strcat(output, " ]] ");
 806
 807   log_verbose("%s", output);
 808 */
 809
 810
 811   strcpy(output, "DIFF: ");
 812
 813   strcat(output, s);
 814   strcat(output, " : ");
 815   strcat(output, t);
 816
 817   strcat(output, test);
 818   strcat(output, ": ");
 819
 820   strcat(output, sEsc);
 821   strcat(output, getRelationSymbol(expRes, expStrength, relation));
 822   strcat(output, tEsc);
 823
 824   strcat(output, " ");
 825
 826   strcat(output, platform);
 827   strcat(output, ": ");
 828
 829   strcat(output, sEsc);
 830   strcat(output, getRelationSymbol(realRes, realStrength, relation));
 831   strcat(output, tEsc);
 832
 833   outputLength = (int32_t)strlen(output);
 834   if(outputLength > maxOutputLength) {
 835     maxOutputLength = outputLength;
 836     U_ASSERT(outputLength < sizeof(output));
 837   }
 838
 839   log_verbose("%s\n", output);
 840
 841 }
 842
 843 /*
 844 static void printOutRules(const UChar *rules) {
 845   uint32_t len = u_strlen(rules);
 846   uint32_t i = 0;
 847   char toPrint;
 848   uint32_t line = 0;
 849
 850   fprintf(stdout, "Rules:");
 851
 852   for(i = 0; i<len; i++) {
 853     if(rules[i]<0x7f && rules[i]>=0x20) {
 854       toPrint = (char)rules[i];
 855       if(toPrint == '&') {
 856         line = 1;
 857         fprintf(stdout, "\n&");
 858       } else if(toPrint == ';') {
 859         fprintf(stdout, "<<");
 860         line+=2;
 861       } else if(toPrint == ',') {
 862         fprintf(stdout, "<<<");
 863         line+=3;
 864       } else {
 865         fprintf(stdout, "%c", toPrint);
 866         line++;
 867       }
 868     } else if(rules[i]<0x3400 || rules[i]>=0xa000) {
 869       fprintf(stdout, "\\u%04X", rules[i]);
 870       line+=6;
 871     }
 872     if(line>72) {
 873       fprintf(stdout, "\n");
 874       line = 0;
 875     }
 876   }
 877
 878   log_verbose("\n");
 879
 880 }
 881 */
 882
 883 static uint32_t testSwitch(tst_strcoll* func, void *collator, int opts, uint32_t strength, const UChar *first, const UChar *second, const char* msg, UBool error) {
 884   uint32_t diffs = 0;
 885   UCollationResult realResult;
 886   uint32_t realStrength;
 887
 888   uint32_t sLen = u_strlen(first);
 889   uint32_t tLen = u_strlen(second);
 890
 891   realResult = func(collator, opts, first, sLen, second, tLen);
 892   realStrength = probeStrength(func, collator, opts, first, sLen, second, tLen, realResult);
 893
 894   if(strength == UCOL_IDENTICAL && realResult != UCOL_IDENTICAL) {
 895     logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_EQUAL, strength, error);
 896     diffs++;
 897   } else if(realResult != UCOL_LESS || realStrength != strength) {
 898     logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_LESS, strength, error);
 899     diffs++;
 900   }
 901   return diffs;
 902 }
 903
 904
 905 static void testAgainstUCA(UCollator *coll, UCollator *UCA, const char *refName, UBool error, UErrorCode *status) {
 906   const UChar *rules = NULL, *current = NULL;
 907   int32_t ruleLen = 0;
 908   uint32_t strength = 0;
 909   uint32_t chOffset = 0; uint32_t chLen = 0;
 910   uint32_t exOffset = 0; uint32_t exLen = 0;
 911   uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
 912 /*  uint32_t rExpsLen = 0; */
 913   uint32_t firstLen = 0, secondLen = 0;
 914   UBool varT = FALSE; UBool top_ = TRUE;
 915   uint16_t specs = 0;
 916   UBool startOfRules = TRUE;
 917   UColTokenParser src;
 918   UColOptionSet opts;
 919
 920   UChar first[256];
 921   UChar second[256];
 922   UChar *rulesCopy = NULL;
 923
 924   uint32_t UCAdiff = 0;
 925   uint32_t Windiff = 1;
 926   UParseError parseError;
 927
 928   src.opts = &opts;
 929
 930   rules = ucol_getRules(coll, &ruleLen);
 931
 932   /*printOutRules(rules);*/
 933
 934   if(U_SUCCESS(*status) && ruleLen > 0) {
 935     rulesCopy = (UChar *)malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
 936     uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
 937     src.current = src.source = rulesCopy;
 938     src.end = rulesCopy+ruleLen;
 939     src.extraCurrent = src.end;
 940     src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
 941     *first = *second = 0;
 942
 943     while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) {
 944       strength = src.parsedToken.strength;
 945       chOffset = src.parsedToken.charsOffset;
 946       chLen = src.parsedToken.charsLen;
 947       exOffset = src.parsedToken.extensionOffset;
 948       exLen = src.parsedToken.extensionLen;
 949       prefixOffset = src.parsedToken.prefixOffset;
 950       prefixLen = src.parsedToken.prefixLen;
 951       specs = src.parsedToken.flags;
 952
 953       startOfRules = FALSE;
 954       varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
 955       top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
 956
 957       u_strncpy(second,rulesCopy+chOffset, chLen);
 958       second[chLen] = 0;
 959       secondLen = chLen;
 960
 961       if(exLen > 0) {
 962         u_strncat(first, rulesCopy+exOffset, exLen);
 963         first[firstLen+exLen] = 0;
 964         firstLen += exLen;
 965       }
 966
 967       if(strength != UCOL_TOK_RESET) {
 968         if((*first<0x3400 || *first>=0xa000) && (*second<0x3400 || *second>=0xa000)) {
 969           UCAdiff += testSwitch(&ucaTest, (void *)UCA, 0, strength, first, second, refName, error);
 970           /*Windiff += testSwitch(&winTest, (void *)lcid, 0, strength, first, second, "Win32");*/
 971         }
 972       }
 973
 974
 975       firstLen = chLen;
 976       u_strcpy(first, second);
 977
 978     }
 979     if(UCAdiff != 0 && Windiff != 0) {
 980       log_verbose("\n");
 981     }
 982     if(UCAdiff == 0) {
 983       log_verbose("No immediate difference with %s!\n", refName);
 984     }
 985     if(Windiff == 0) {
 986       log_verbose("No immediate difference with Win32!\n");
 987     }
 988     free(rulesCopy);
 989   }
 990 }
 991
 992 /*
 993  * Takes two CEs (lead and continuation) and
 994  * compares them as CEs should be compared:
 995  * primary vs. primary, secondary vs. secondary
 996  * tertiary vs. tertiary
 997  */
 998 static int32_t compareCEs(uint32_t s1, uint32_t s2,
 999                    uint32_t t1, uint32_t t2) {
1000   uint32_t s = 0, t = 0;
1001   if(s1 == t1 && s2 == t2) {
1002     return 0;
1003   }
1004   s = (s1 & 0xFFFF0000)|((s2 & 0xFFFF0000)>>16);
1005   t = (t1 & 0xFFFF0000)|((t2 & 0xFFFF0000)>>16);
1006   if(s < t) {
1007     return -1;
1008   } else if(s > t) {
1009     return 1;
1010   } else {
1011     s = (s1 & 0x0000FF00) | (s2 & 0x0000FF00)>>8;
1012     t = (t1 & 0x0000FF00) | (t2 & 0x0000FF00)>>8;
1013     if(s < t) {
1014       return -1;
1015     } else if(s > t) {
1016       return 1;
1017     } else {
1018       s = (s1 & 0x000000FF)<<8 | (s2 & 0x000000FF);
1019       t = (t1 & 0x000000FF)<<8 | (t2 & 0x000000FF);
1020       if(s < t) {
1021         return -1;
1022       } else {
1023         return 1;
1024       }
1025     }
1026   }
1027 }
1028
1029 typedef struct {
1030   uint32_t startCE;
1031   uint32_t startContCE;
1032   uint32_t limitCE;
1033   uint32_t limitContCE;
1034 } indirectBoundaries;
1035
1036 /* these values are used for finding CE values for indirect positioning. */
1037 /* Indirect positioning is a mechanism for allowing resets on symbolic   */
1038 /* values. It only works for resets and you cannot tailor indirect names */
1039 /* An indirect name can define either an anchor point or a range. An     */
1040 /* anchor point behaves in exactly the same way as a code point in reset */
1041 /* would, except that it cannot be tailored. A range (we currently only  */
1042 /* know for the [top] range will explicitly set the upper bound for      */
1043 /* generated CEs, thus allowing for better control over how many CEs can */
1044 /* be squeezed between in the range without performance penalty.         */
1045 /* In that respect, we use [top] for tailoring of locales that use CJK   */
1046 /* characters. Other indirect values are currently a pure convenience,   */
1047 /* they can be used to assure that the CEs will be always positioned in  */
1048 /* the same place relative to a point with known properties (e.g. first  */
1049 /* primary ignorable). */
1050 static indirectBoundaries ucolIndirectBoundaries[15];
1051 static UBool indirectBoundariesSet = FALSE;
1052 static void setIndirectBoundaries(uint32_t indexR, uint32_t *start, uint32_t *end) {
1053
1054   /* Set values for the top - TODO: once we have values for all the indirects, we are going */
1055   /* to initalize here. */
1056   ucolIndirectBoundaries[indexR].startCE = start[0];
1057   ucolIndirectBoundaries[indexR].startContCE = start[1];
1058   if(end) {
1059     ucolIndirectBoundaries[indexR].limitCE = end[0];
1060     ucolIndirectBoundaries[indexR].limitContCE = end[1];
1061   } else {
1062     ucolIndirectBoundaries[indexR].limitCE = 0;
1063     ucolIndirectBoundaries[indexR].limitContCE = 0;
1064   }
1065 }
1066
1067 static void testCEs(UCollator *coll, UErrorCode *status) {
1068
1069   const UChar *rules = NULL, *current = NULL;
1070   int32_t ruleLen = 0;
1071
1072   uint32_t strength = 0;
1073   uint32_t maxStrength = UCOL_IDENTICAL;
1074   uint32_t baseCE, baseContCE, nextCE, nextContCE, currCE, currContCE;
1075   uint32_t lastCE;
1076   uint32_t lastContCE;
1077
1078   int32_t result = 0;
1079   uint32_t chOffset = 0; uint32_t chLen = 0;
1080   uint32_t exOffset = 0; uint32_t exLen = 0;
1081   uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
1082   uint32_t oldOffset = 0;
1083
1084   /* uint32_t rExpsLen = 0; */
1085   /* uint32_t firstLen = 0; */
1086   uint16_t specs = 0;
1087   UBool varT = FALSE; UBool top_ = TRUE;
1088   UBool startOfRules = TRUE;
1089   UBool before = FALSE;
1090   UColTokenParser src;
1091   UColOptionSet opts;
1092   UParseError parseError;
1093   UChar *rulesCopy = NULL;
1094   collIterate c;
1095   UCollator *UCA = ucol_open("root", status);
1096   UCAConstants *consts = (UCAConstants *)((uint8_t *)UCA->image + UCA->image->UCAConsts);
1097   uint32_t UCOL_RESET_TOP_VALUE = consts->UCA_LAST_NON_VARIABLE[0], /*UCOL_RESET_TOP_CONT = consts->UCA_LAST_NON_VARIABLE[1], */
1098            UCOL_NEXT_TOP_VALUE = consts->UCA_FIRST_IMPLICIT[0], UCOL_NEXT_TOP_CONT = consts->UCA_FIRST_IMPLICIT[1];
1099
1100   baseCE=baseContCE=nextCE=nextContCE=currCE=currContCE=lastCE=lastContCE = UCOL_NOT_FOUND;
1101
1102   src.opts = &opts;
1103
1104   rules = ucol_getRules(coll, &ruleLen);
1105
1106   src.invUCA = ucol_initInverseUCA(status);
1107
1108   if(indirectBoundariesSet == FALSE) {
1109     /* UCOL_RESET_TOP_VALUE */
1110     setIndirectBoundaries(0, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
1111     /* UCOL_FIRST_PRIMARY_IGNORABLE */
1112     setIndirectBoundaries(1, consts->UCA_FIRST_PRIMARY_IGNORABLE, 0);
1113     /* UCOL_LAST_PRIMARY_IGNORABLE */
1114     setIndirectBoundaries(2, consts->UCA_LAST_PRIMARY_IGNORABLE, 0);
1115     /* UCOL_FIRST_SECONDARY_IGNORABLE */
1116     setIndirectBoundaries(3, consts->UCA_FIRST_SECONDARY_IGNORABLE, 0);
1117     /* UCOL_LAST_SECONDARY_IGNORABLE */
1118     setIndirectBoundaries(4, consts->UCA_LAST_SECONDARY_IGNORABLE, 0);
1119     /* UCOL_FIRST_TERTIARY_IGNORABLE */
1120     setIndirectBoundaries(5, consts->UCA_FIRST_TERTIARY_IGNORABLE, 0);
1121     /* UCOL_LAST_TERTIARY_IGNORABLE */
1122     setIndirectBoundaries(6, consts->UCA_LAST_TERTIARY_IGNORABLE, 0);
1123     /* UCOL_FIRST_VARIABLE */
1124     setIndirectBoundaries(7, consts->UCA_FIRST_VARIABLE, 0);
1125     /* UCOL_LAST_VARIABLE */
1126     setIndirectBoundaries(8, consts->UCA_LAST_VARIABLE, 0);
1127     /* UCOL_FIRST_NON_VARIABLE */
1128     setIndirectBoundaries(9, consts->UCA_FIRST_NON_VARIABLE, 0);
1129     /* UCOL_LAST_NON_VARIABLE */
1130     setIndirectBoundaries(10, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
1131     /* UCOL_FIRST_IMPLICIT */
1132     setIndirectBoundaries(11, consts->UCA_FIRST_IMPLICIT, 0);
1133     /* UCOL_LAST_IMPLICIT */
1134     setIndirectBoundaries(12, consts->UCA_LAST_IMPLICIT, consts->UCA_FIRST_TRAILING);
1135     /* UCOL_FIRST_TRAILING */
1136     setIndirectBoundaries(13, consts->UCA_FIRST_TRAILING, 0);
1137     /* UCOL_LAST_TRAILING */
1138     setIndirectBoundaries(14, consts->UCA_LAST_TRAILING, 0);
1139     ucolIndirectBoundaries[14].limitCE = (consts->UCA_PRIMARY_SPECIAL_MIN<<24);
1140     indirectBoundariesSet = TRUE;
1141   }
1142
1143
1144   if(U_SUCCESS(*status) && ruleLen > 0) {
1145     rulesCopy = (UChar *)malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
1146     uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
1147     src.current = src.source = rulesCopy;
1148     src.end = rulesCopy+ruleLen;
1149     src.extraCurrent = src.end;
1150     src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
1151
1152     while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) {
1153       strength = src.parsedToken.strength;
1154       chOffset = src.parsedToken.charsOffset;
1155       chLen = src.parsedToken.charsLen;
1156       exOffset = src.parsedToken.extensionOffset;
1157       exLen = src.parsedToken.extensionLen;
1158       prefixOffset = src.parsedToken.prefixOffset;
1159       prefixLen = src.parsedToken.prefixLen;
1160       specs = src.parsedToken.flags;
1161
1162       startOfRules = FALSE;
1163       varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
1164       top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
1165
1166       uprv_init_collIterate(coll, rulesCopy+chOffset, chLen, &c);
1167
1168       currCE = ucol_getNextCE(coll, &c, status);
1169       if(currCE == 0 && UCOL_ISTHAIPREVOWEL(*(rulesCopy+chOffset))) {
1170         log_verbose("Thai prevowel detected. Will pick next CE\n");
1171         currCE = ucol_getNextCE(coll, &c, status);
1172       }
1173
1174       currContCE = ucol_getNextCE(coll, &c, status);
1175       if(!isContinuation(currContCE)) {
1176         currContCE = 0;
1177       }
1178
1179       /* we need to repack CEs here */
1180
1181       if(strength == UCOL_TOK_RESET) {
1182         before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);
1183         if(top_ == TRUE) {
1184           int32_t index = src.parsedToken.indirectIndex;
1185
1186           nextCE = baseCE = currCE = ucolIndirectBoundaries[index].startCE;
1187           nextContCE = baseContCE = currContCE = ucolIndirectBoundaries[index].startContCE;
1188         } else {
1189           nextCE = baseCE = currCE;
1190           nextContCE = baseContCE = currContCE;
1191         }
1192         maxStrength = UCOL_IDENTICAL;
1193       } else {
1194         if(strength < maxStrength) {
1195           maxStrength = strength;
1196           if(baseCE == UCOL_RESET_TOP_VALUE) {
1197               log_verbose("Resetting to [top]\n");
1198               nextCE = UCOL_NEXT_TOP_VALUE;
1199               nextContCE = UCOL_NEXT_TOP_CONT;
1200           } else {
1201             result = ucol_inv_getNextCE(&src, baseCE & 0xFFFFFF3F, baseContCE, &nextCE, &nextContCE, maxStrength);
1202           }
1203           if(result < 0) {
1204             if(ucol_isTailored(coll, *(rulesCopy+oldOffset), status)) {
1205               log_verbose("Reset is tailored codepoint %04X, don't know how to continue, taking next test\n", *(rulesCopy+oldOffset));
1206               return;
1207             } else {
1208               log_err("couldn't find the CE\n");
1209               return;
1210             }
1211           }
1212         }
1213
1214         currCE &= 0xFFFFFF3F;
1215         currContCE &= 0xFFFFFFBF;
1216
1217         if(maxStrength == UCOL_IDENTICAL) {
1218           if(baseCE != currCE || baseContCE != currContCE) {
1219             log_err("current CE  (initial strength UCOL_EQUAL)\n");
1220           }
1221         } else {
1222           if(strength == UCOL_IDENTICAL) {
1223             if(lastCE != currCE || lastContCE != currContCE) {
1224               log_err("current CE  (initial strength UCOL_EQUAL)\n");
1225             }
1226           } else {
1227             if(compareCEs(currCE, currContCE, nextCE, nextContCE) > 0) {
1228             /*if(currCE > nextCE || (currCE == nextCE && currContCE >= nextContCE)) {*/
1229               log_err("current CE is not less than base CE\n");
1230             }
1231             if(!before) {
1232               if(compareCEs(currCE, currContCE, lastCE, lastContCE) < 0) {
1233               /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
1234                 log_err("sequence of generated CEs is broken\n");
1235               }
1236             } else {
1237               before = FALSE;
1238               if(compareCEs(currCE, currContCE, lastCE, lastContCE) > 0) {
1239               /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
1240                 log_err("sequence of generated CEs is broken\n");
1241               }
1242             }
1243           }
1244         }
1245
1246       }
1247
1248       oldOffset = chOffset;
1249       lastCE = currCE & 0xFFFFFF3F;
1250       lastContCE = currContCE & 0xFFFFFFBF;
1251     }
1252     free(rulesCopy);
1253   }
1254   ucol_close(UCA);
1255 }
1256
1257 #if 0
1258 /* these locales are now picked from index RB */
1259 static const char* localesToTest[] = {
1260 "ar", "bg", "ca", "cs", "da",
1261 "el", "en_BE", "en_US_POSIX",
1262 "es", "et", "fi", "fr", "hi",
1263 "hr", "hu", "is", "iw", "ja",
1264 "ko", "lt", "lv", "mk", "mt",
1265 "nb", "nn", "nn_NO", "pl", "ro",
1266 "ru", "sh", "sk", "sl", "sq",
1267 "sr", "sv", "th", "tr", "uk",
1268 "vi", "zh", "zh_TW"
1269 };
1270 #endif
1271
1272 static const char* rulesToTest[] = {
1273   /* Funky fa rule */
1274   "&\\u0622 < \\u0627 << \\u0671 < \\u0621",
1275   /*"& Z < p, P",*/
1276     /* Cui Mins rules */
1277     "&[top]<o,O<p,P<q,Q<'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu<'?'",*/
1278     "&[top]<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
1279     "&[top]<o,O<p,P<q,Q,'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U&'Qu','?'",*/
1280     "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/u<r,R<u,U",  /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
1281     "&[top]<'?';Qu<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U",  /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qu",*/
1282     "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/um<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qum;'?'",*/
1283     "&[top]<'?';Qum<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U"  /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qum"*/
1284 };
1285
1286
1287 static void TestCollations(void) {
1288   int32_t noOfLoc = uloc_countAvailable();
1289   int32_t i = 0, j = 0;
1290
1291   UErrorCode status = U_ZERO_ERROR;
1292   char cName[256];
1293   UChar name[256];
1294   int32_t nameSize;
1295
1296
1297   const char *locName = NULL;
1298   UCollator *coll = NULL;
1299   UCollator *UCA = ucol_open("", &status);
1300   UColAttributeValue oldStrength = ucol_getAttribute(UCA, UCOL_STRENGTH, &status);
1301   ucol_setAttribute(UCA, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
1302
1303   for(i = 0; i<noOfLoc; i++) {
1304     status = U_ZERO_ERROR;
1305     locName = uloc_getAvailable(i);
1306     if(uprv_strcmp("ja", locName) == 0) {
1307       log_verbose("Don't know how to test prefixes\n");
1308       continue;
1309     }
1310     if(hasCollationElements(locName)) {
1311         nameSize = uloc_getDisplayName(locName, NULL, name, 256, &status);
1312         for(j = 0; j<nameSize; j++) {
1313           cName[j] = (char)name[j];
1314         }
1315         cName[nameSize] = 0;
1316         log_verbose("\nTesting locale %s (%s)\n", locName, cName);
1317         coll = ucol_open(locName, &status);
1318         if(U_SUCCESS(status)) {
1319           testAgainstUCA(coll, UCA, "UCA", FALSE, &status);
1320           ucol_close(coll);
1321         } else {
1322           log_err("Couldn't instantiate collator for locale %s, error: %s\n", locName, u_errorName(status));
1323           status = U_ZERO_ERROR;
1324         }
1325     }
1326   }
1327   ucol_setAttribute(UCA, UCOL_STRENGTH, oldStrength, &status);
1328   ucol_close(UCA);
1329 }
1330
1331 static void RamsRulesTest(void) {
1332   UErrorCode status = U_ZERO_ERROR;
1333   int32_t i = 0;
1334   UCollator *coll = NULL;
1335   UChar rule[2048];
1336   uint32_t ruleLen;
1337   int32_t noOfLoc = uloc_countAvailable();
1338   const char *locName = NULL;
1339
1340   log_verbose("RamsRulesTest\n");
1341
1342   for(i = 0; i<noOfLoc; i++) {
1343     status = U_ZERO_ERROR;
1344     locName = uloc_getAvailable(i);
1345     if(hasCollationElements(locName)) {
1346       if (uprv_strcmp("ja", locName)==0) {
1347         log_verbose("Don't know how to test Japanese because of prefixes\n");
1348         continue;
1349       }
1350       if (uprv_strcmp("de__PHONEBOOK", locName)==0) {
1351         log_verbose("Don't know how to test Phonebook because the reset is on an expanding character\n");
1352         continue;
1353       }
1354       log_verbose("Testing locale %s\n", locName);
1355       coll = ucol_open(locName, &status);
1356       if(U_SUCCESS(status)) {
1357         if(coll->image->jamoSpecial == TRUE) {
1358           log_err("%s has special JAMOs\n", locName);
1359         }
1360         ucol_setAttribute(coll, UCOL_CASE_FIRST, UCOL_OFF, &status);
1361         testCollator(coll, &status);
1362         testCEs(coll, &status);
1363         ucol_close(coll);
1364       }
1365     }
1366   }
1367
1368   for(i = 0; i<sizeof(rulesToTest)/sizeof(rulesToTest[0]); i++) {
1369     log_verbose("Testing rule: %s\n", rulesToTest[i]);
1370     ruleLen = u_unescape(rulesToTest[i], rule, 2048);
1371     coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
1372     if(U_SUCCESS(status)) {
1373       testCollator(coll, &status);
1374       testCEs(coll, &status);
1375       ucol_close(coll);
1376     }
1377   }
1378
1379 }
1380
1381 static void IsTailoredTest(void) {
1382   UErrorCode status = U_ZERO_ERROR;
1383   uint32_t i = 0;
1384   UCollator *coll = NULL;
1385   UChar rule[2048];
1386   UChar tailored[2048];
1387   UChar notTailored[2048];
1388   uint32_t ruleLen, tailoredLen, notTailoredLen;
1389
1390   log_verbose("IsTailoredTest\n");
1391
1392   u_uastrcpy(rule, "&Z < A, B, C;c < d");
1393   ruleLen = u_strlen(rule);
1394
1395   u_uastrcpy(tailored, "ABCcd");
1396   tailoredLen = u_strlen(tailored);
1397
1398   u_uastrcpy(notTailored, "ZabD");
1399   notTailoredLen = u_strlen(notTailored);
1400
1401   coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
1402   if(U_SUCCESS(status)) {
1403     for(i = 0; i<tailoredLen; i++) {
1404       if(!ucol_isTailored(coll, tailored[i], &status)) {
1405         log_err("%i: %04X should be tailored - it is reported as not\n", i, tailored[i]);
1406       }
1407     }
1408     for(i = 0; i<notTailoredLen; i++) {
1409       if(ucol_isTailored(coll, notTailored[i], &status)) {
1410         log_err("%i: %04X should not be tailored - it is reported as it is\n", i, notTailored[i]);
1411       }
1412     }
1413     ucol_close(coll);
1414   }
1415 }
1416
1417
1418 const static char chTest[][20] = {
1419   "c",
1420   "C",
1421   "ca", "cb", "cx", "cy", "CZ",
1422   "c\\u030C", "C\\u030C",
1423   "h",
1424   "H",
1425   "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY",
1426   "ch", "cH", "Ch", "CH",
1427   "cha", "charly", "che", "chh", "chch", "chr",
1428   "i", "I", "iarly",
1429   "r", "R",
1430   "r\\u030C", "R\\u030C",
1431   "s",
1432   "S",
1433   "s\\u030C", "S\\u030C",
1434   "z", "Z",
1435   "z\\u030C", "Z\\u030C"
1436 };
1437
1438 static void TestChMove(void) {
1439   UChar t1[256] = {0};
1440   UChar t2[256] = {0};
1441
1442   uint32_t i = 0, j = 0;
1443   uint32_t size = 0;
1444   UErrorCode status = U_ZERO_ERROR;
1445
1446   UCollator *coll = ucol_open("cs", &status);
1447
1448   if(U_SUCCESS(status)) {
1449     size = sizeof(chTest)/sizeof(chTest[0]);
1450     for(i = 0; i < size-1; i++) {
1451       for(j = i+1; j < size; j++) {
1452         u_unescape(chTest[i], t1, 256);
1453         u_unescape(chTest[j], t2, 256);
1454         doTest(coll, t1, t2, UCOL_LESS);
1455       }
1456     }
1457   }
1458   else {
1459     log_err("Can't open collator");
1460   }
1461   ucol_close(coll);
1462 }
1463
1464
1465
1466
1467 const static char impTest[][20] = {
1468   "\\u4e00",
1469     "a",
1470     "A",
1471     "b",
1472     "B",
1473     "\\u4e01"
1474 };
1475
1476
1477 static void TestImplicitTailoring(void) {
1478   static struct {
1479     const char *rules;
1480     const char *data[50];
1481     const uint32_t len;
1482   } tests[] = {
1483       { "&[before 1]\\u4e00 < b < c &[before 1]\\u4e00 < d < e", { "d", "e", "b", "c", "\\u4e00"}, 5 },
1484       { "&\\u4e00 < a <<< A < b <<< B",   { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 },
1485       { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3},
1486       { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3}
1487   };
1488
1489   int32_t i = 0;
1490
1491   for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
1492       genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
1493   }
1494
1495 /*
1496   UChar t1[256] = {0};
1497   UChar t2[256] = {0};
1498
1499   const char *rule = "&\\u4e00 < a <<< A < b <<< B";
1500
1501   uint32_t i = 0, j = 0;
1502   uint32_t size = 0;
1503   uint32_t ruleLen = 0;
1504   UErrorCode status = U_ZERO_ERROR;
1505   UCollator *coll = NULL;
1506   ruleLen = u_unescape(rule, t1, 256);
1507
1508   coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
1509
1510   if(U_SUCCESS(status)) {
1511     size = sizeof(impTest)/sizeof(impTest[0]);
1512     for(i = 0; i < size-1; i++) {
1513       for(j = i+1; j < size; j++) {
1514         u_unescape(impTest[i], t1, 256);
1515         u_unescape(impTest[j], t2, 256);
1516         doTest(coll, t1, t2, UCOL_LESS);
1517       }
1518     }
1519   }
1520   else {
1521     log_err("Can't open collator");
1522   }
1523   ucol_close(coll);
1524   */
1525 }
1526
1527 static void TestFCDProblem(void) {
1528   UChar t1[256] = {0};
1529   UChar t2[256] = {0};
1530
1531   const char *s1 = "\\u0430\\u0306\\u0325";
1532   const char *s2 = "\\u04D1\\u0325";
1533
1534   UErrorCode status = U_ZERO_ERROR;
1535   UCollator *coll = ucol_open("", &status);
1536   u_unescape(s1, t1, 256);
1537   u_unescape(s2, t2, 256);
1538
1539   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
1540   doTest(coll, t1, t2, UCOL_EQUAL);
1541
1542   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
1543   doTest(coll, t1, t2, UCOL_EQUAL);
1544
1545   ucol_close(coll);
1546 }
1547
1548 #define NORM_BUFFER_TEST_LEN 32
1549 typedef struct {
1550   UChar32 u;
1551   UChar NFC[NORM_BUFFER_TEST_LEN];
1552   UChar NFD[NORM_BUFFER_TEST_LEN];
1553 } tester;
1554
1555 static void TestComposeDecompose(void) {
1556     int32_t noOfLoc;
1557     int32_t i = 0, j = 0;
1558
1559     UErrorCode status = U_ZERO_ERROR;
1560
1561     const char *locName = NULL;
1562
1563     uint32_t nfcSize;
1564     uint32_t nfdSize;
1565     tester **t;
1566     uint32_t noCases = 0;
1567     UCollator *coll = NULL;
1568     UChar32 u = 0;
1569     UChar comp[NORM_BUFFER_TEST_LEN];
1570     uint32_t len = 0;
1571     UCollationElements *iter;
1572
1573     noOfLoc = uloc_countAvailable();
1574
1575     t = malloc(0x30000 * sizeof(tester *));
1576     t[0] = (tester *)malloc(sizeof(tester));
1577     log_verbose("Testing UCA extensively\n");
1578     coll = ucol_open("", &status);
1579     if(status == U_FILE_ACCESS_ERROR) {
1580       log_data_err("Is your data around?\n");
1581       return;
1582     } else if(U_FAILURE(status)) {
1583       log_err("Error opening collator\n");
1584       return;
1585     }
1586
1587
1588     for(u = 0; u < 0x30000; u++) {
1589       len = 0;
1590       UTF_APPEND_CHAR_UNSAFE(comp, len, u);
1591         nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
1592         nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
1593
1594         if(nfcSize != nfdSize || (uprv_memcmp(t[noCases]->NFC, t[noCases]->NFD, nfcSize * sizeof(UChar)) != 0)
1595           || (len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0))) {
1596             t[noCases]->u = u;
1597             if(len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0)) {
1598               u_strncpy(t[noCases]->NFC, comp, len);
1599               t[noCases]->NFC[len] = 0;
1600             }
1601             noCases++;
1602             t[noCases] = (tester *)malloc(sizeof(tester));
1603             uprv_memset(t[noCases], 0, sizeof(tester));
1604         }
1605     }
1606
1607     for(u=0; u<(UChar32)noCases; u++) {
1608       if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
1609         log_err("Failure: codePoint %05X fails TestComposeDecompose in the UCA\n", t[u]->u);
1610         doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
1611       }
1612     }
1613     /*
1614     for(u = 0; u < 0x30000; u++) {
1615       if(!(u&0xFFFF)) {
1616         log_verbose("%08X ", u);
1617       }
1618       uprv_memset(t[noCases], 0, sizeof(tester));
1619       t[noCases]->u = u;
1620       len = 0;
1621       UTF_APPEND_CHAR_UNSAFE(comp, len, u);
1622       comp[len] = 0;
1623       nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
1624       nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
1625       doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL);
1626       doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL);
1627     }
1628     */
1629
1630     ucol_close(coll);
1631
1632     log_verbose("Testing locales, number of cases = %i\n", noCases);
1633     for(i = 0; i<noOfLoc; i++) {
1634         status = U_ZERO_ERROR;
1635         locName = uloc_getAvailable(i);
1636         if(hasCollationElements(locName)) {
1637             char cName[256];
1638             UChar name[256];
1639             int32_t nameSize = uloc_getDisplayName(locName, NULL, name, sizeof(cName), &status);
1640
1641             for(j = 0; j<nameSize; j++) {
1642                 cName[j] = (char)name[j];
1643             }
1644             cName[nameSize] = 0;
1645             log_verbose("\nTesting locale %s (%s)\n", locName, cName);
1646
1647             coll = ucol_open(locName, &status);
1648             ucol_setStrength(coll, UCOL_IDENTICAL);
1649             iter = ucol_openElements(coll, t[u]->NFD, u_strlen(t[u]->NFD), &status);
1650
1651             for(u=0; u<(UChar32)noCases; u++) {
1652               if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
1653                 log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t[u]->u, cName);
1654                 doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
1655                 log_verbose("Testing NFC\n");
1656                 ucol_setText(iter, t[u]->NFC, u_strlen(t[u]->NFC), &status);
1657                   backAndForth(iter);
1658                 log_verbose("Testing NFD\n");
1659                   ucol_setText(iter, t[u]->NFD, u_strlen(t[u]->NFD), &status);
1660                   backAndForth(iter);
1661               }
1662             }
1663             ucol_closeElements(iter);
1664             ucol_close(coll);
1665         }
1666     }
1667     for(u = 0; u <= (UChar32)noCases; u++) {
1668         free(t[u]);
1669     }
1670     free(t);
1671 }
1672
1673 static void TestEmptyRule(void) {
1674   UErrorCode status = U_ZERO_ERROR;
1675   UChar rulez[] = { 0 };
1676   UCollator *coll = ucol_openRules(rulez, 0, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
1677
1678   ucol_close(coll);
1679 }
1680
1681 static void TestUCARules(void) {
1682   UErrorCode status = U_ZERO_ERROR;
1683   UChar b[256];
1684   UChar *rules = b;
1685   uint32_t ruleLen = 0;
1686   UCollator *UCAfromRules = NULL;
1687   UCollator *coll = ucol_open("", &status);
1688   if(status == U_FILE_ACCESS_ERROR) {
1689     log_data_err("Is your data around?\n");
1690     return;
1691   } else if(U_FAILURE(status)) {
1692     log_err("Error opening collator\n");
1693     return;
1694   }
1695   ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, 256);
1696
1697   log_verbose("TestUCARules\n");
1698   if(ruleLen > 256) {
1699     rules = (UChar *)malloc((ruleLen+1)*sizeof(UChar));
1700     ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, ruleLen);
1701   }
1702   log_verbose("Rules length is %d\n", ruleLen);
1703   UCAfromRules = ucol_openRules(rules, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
1704   if(U_SUCCESS(status)) {
1705     ucol_close(UCAfromRules);
1706   } else {
1707     log_verbose("Unable to create a collator from UCARules!\n");
1708   }
1709 /*
1710   u_unescape(blah, b, 256);
1711   ucol_getSortKey(coll, b, 1, res, 256);
1712 */
1713   ucol_close(coll);
1714   if(rules != b) {
1715     free(rules);
1716   }
1717 }
1718
1719
1720 /* Pinyin tonal order */
1721 /*
1722     A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0)
1723           (w/macron)<  (w/acute)<   (w/caron)<   (w/grave)
1724     E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8)
1725     I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec)
1726     O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2)
1727     U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9)
1728       < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) <
1729 .. (\u00fc)
1730
1731 However, in testing we got the following order:
1732     A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101)
1733           (w/acute)<   (w/grave)<   (w/caron)<   (w/macron)
1734     E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) <
1735 .. (\u0113)
1736     I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b)
1737     O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d)
1738     U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) <
1739 .. (\u01d8)
1740       < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b)
1741 */
1742
1743 static void TestBefore(void) {
1744   const static char *data[] = {
1745       "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A",
1746       "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E",
1747       "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I",
1748       "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O",
1749       "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U",
1750       "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc"
1751   };
1752   genericRulesStarter(
1753     "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0"
1754     "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8"
1755     "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec"
1756     "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2"
1757     "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9"
1758     "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc",
1759     data, sizeof(data)/sizeof(data[0]));
1760 }
1761
1762 #if 0
1763 /* superceded by TestBeforePinyin */
1764 static void TestJ784(void) {
1765   const static char *data[] = {
1766       "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0",
1767       "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8",
1768       "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec",
1769       "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2",
1770       "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9",
1771       "\\u00fc",
1772            "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc"
1773   };
1774   genericLocaleStarter("zh", data, sizeof(data)/sizeof(data[0]));
1775 }
1776 #endif
1777
1778 #if 0
1779 /* superceded by the changes to the lv locale */
1780 static void TestJ831(void) {
1781   const static char *data[] = {
1782     "I",
1783       "i",
1784       "Y",
1785       "y"
1786   };
1787   genericLocaleStarter("lv", data, sizeof(data)/sizeof(data[0]));
1788 }
1789 #endif
1790
1791 static void TestJ815(void) {
1792   const static char *data[] = {
1793     "aa",
1794       "Aa",
1795       "ab",
1796       "Ab",
1797       "ad",
1798       "Ad",
1799       "ae",
1800       "Ae",
1801       "\\u00e6",
1802       "\\u00c6",
1803       "af",
1804       "Af",
1805       "b",
1806       "B"
1807   };
1808   genericLocaleStarter("fr", data, sizeof(data)/sizeof(data[0]));
1809   genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, sizeof(data)/sizeof(data[0]));
1810 }
1811
1812
1813 /*
1814 "& a < b < c < d& r < c",                                   "& a < b < d& r < c",
1815 "& a < b < c < d& c < m",                                   "& a < b < c < m < d",
1816 "& a < b < c < d& a < m",                                   "& a < m < b < c < d",
1817 "& a <<< b << c < d& a < m",                                "& a <<< b << c < m < d",
1818 "& a < b < c < d& [before 1] c < m",                        "& a < b < m < c < d",
1819 "& a < b <<< c << d <<< e& [before 3] e <<< x",            "& a < b <<< c << d <<< x <<< e",
1820 "& a < b <<< c << d <<< e& [before 2] e <<< x",            "& a < b <<< c <<< x << d <<< e",
1821 "& a < b <<< c << d <<< e& [before 1] e <<< x",            "& a <<< x < b <<< c << d <<< e",
1822 "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x",    "& a < b <<< c << d <<< e <<< f < x < g",
1823 */
1824 static void TestRedundantRules(void) {
1825   int32_t i;
1826
1827   struct {
1828       const char *rules;
1829       const char *expectedRules;
1830       const char *testdata[8];
1831       uint32_t testdatalen;
1832   } tests[] = {
1833     /* this test conflicts with positioning of CODAN placeholder */
1834        /*{
1835         "& a <<< b <<< c << d <<< e& [before 1] e <<< x",
1836         "&\\u2089<<<x",
1837         {"\\u2089", "x"}, 2
1838        }, */
1839     /* this test conflicts with the [before x] syntax tightening */
1840       /*{
1841         "& b <<< c <<< d << e <<< f& [before 1] f <<< x",
1842         "&\\u0252<<<x",
1843         {"\\u0252", "x"}, 2
1844       }, */
1845     /* this test conflicts with the [before x] syntax tightening */
1846       /*{
1847          "& a < b <<< c << d <<< e& [before 1] e <<< x",
1848          "& a <<< x < b <<< c << d <<< e",
1849         {"a", "x", "b", "c", "d", "e"}, 6
1850       }, */
1851       {
1852         "& a < b < c < d& [before 1] c < m",
1853         "& a < b < m < c < d",
1854         {"a", "b", "m", "c", "d"}, 5
1855       },
1856       {
1857         "& a < b <<< c << d <<< e& [before 3] e <<< x",
1858         "& a < b <<< c << d <<< x <<< e",
1859         {"a", "b", "c", "d", "x", "e"}, 6
1860       },
1861     /* this test conflicts with the [before x] syntax tightening */
1862       /* {
1863         "& a < b <<< c << d <<< e& [before 2] e <<< x",
1864         "& a < b <<< c <<< x << d <<< e",
1865         {"a", "b", "c", "x", "d", "e"},, 6
1866       }, */
1867       {
1868         "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x",
1869         "& a < b <<< c << d <<< e <<< f < x < g",
1870         {"a", "b", "c", "d", "e", "f", "x", "g"}, 8
1871       },
1872       {
1873         "& a <<< b << c < d& a < m",
1874         "& a <<< b << c < m < d",
1875         {"a", "b", "c", "m", "d"}, 5
1876       },
1877       {
1878         "&a<b<<b\\u0301 &z<b",
1879         "&a<b\\u0301 &z<b",
1880         {"a", "b\\u0301", "z", "b"}, 4
1881       },
1882       {
1883         "&z<m<<<q<<<m",
1884         "&z<q<<<m",
1885         {"z", "q", "m"},3
1886       },
1887       {
1888         "&z<<<m<q<<<m",
1889         "&z<q<<<m",
1890         {"z", "q", "m"}, 3
1891       },
1892       {
1893         "& a < b < c < d& r < c",
1894         "& a < b < d& r < c",
1895         {"a", "b", "d"}, 3
1896       },
1897       {
1898         "& a < b < c < d& r < c",
1899         "& a < b < d& r < c",
1900         {"r", "c"}, 2
1901       },
1902       {
1903         "& a < b < c < d& c < m",
1904         "& a < b < c < m < d",
1905         {"a", "b", "c", "m", "d"}, 5
1906       },
1907       {
1908         "& a < b < c < d& a < m",
1909         "& a < m < b < c < d",
1910         {"a", "m", "b", "c", "d"}, 5
1911       }
1912   };
1913
1914
1915   UCollator *credundant = NULL;
1916   UCollator *cresulting = NULL;
1917   UErrorCode status = U_ZERO_ERROR;
1918   UChar rlz[2048] = { 0 };
1919   uint32_t rlen = 0;
1920
1921   for(i = 0; i<sizeof(tests)/sizeof(tests[0]); i++) {
1922     log_verbose("testing rule %s, expected to be %s\n", tests[i].rules, tests[i].expectedRules);
1923     rlen = u_unescape(tests[i].rules, rlz, 2048);
1924
1925     credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
1926     if(status == U_FILE_ACCESS_ERROR) {
1927       log_data_err("Is your data around?\n");
1928       return;
1929     } else if(U_FAILURE(status)) {
1930       log_err("Error opening collator\n");
1931       return;
1932     }
1933
1934     rlen = u_unescape(tests[i].expectedRules, rlz, 2048);
1935     cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
1936
1937     testAgainstUCA(cresulting, credundant, "expected", TRUE, &status);
1938
1939     ucol_close(credundant);
1940     ucol_close(cresulting);
1941
1942     log_verbose("testing using data\n");
1943
1944     genericRulesStarter(tests[i].rules, tests[i].testdata, tests[i].testdatalen);
1945   }
1946
1947 }
1948
1949 static void TestExpansionSyntax(void) {
1950   int32_t i;
1951
1952   const static char *rules[] = {
1953     "&AE <<< a << b <<< c &d <<< f",
1954     "&AE <<< a <<< b << c << d < e < f <<< g",
1955     "&AE <<< B <<< C / D <<< F"
1956   };
1957
1958   const static char *expectedRules[] = {
1959     "&A <<< a / E << b / E <<< c /E  &d <<< f",
1960     "&A <<< a / E <<< b / E << c / E << d / E < e < f <<< g",
1961     "&A <<< B / E <<< C / ED <<< F / E"
1962   };
1963
1964   const static char *testdata[][8] = {
1965     {"AE", "a", "b", "c"},
1966     {"AE", "a", "b", "c", "d", "e", "f", "g"},
1967     {"AE", "B", "C"} /* / ED <<< F / E"},*/
1968   };
1969
1970   const static uint32_t testdatalen[] = {
1971       4,
1972       8,
1973       3
1974   };
1975
1976
1977
1978   UCollator *credundant = NULL;
1979   UCollator *cresulting = NULL;
1980   UErrorCode status = U_ZERO_ERROR;
1981   UChar rlz[2048] = { 0 };
1982   uint32_t rlen = 0;
1983
1984   for(i = 0; i<sizeof(rules)/sizeof(rules[0]); i++) {
1985     log_verbose("testing rule %s, expected to be %s\n", rules[i], expectedRules[i]);
1986     rlen = u_unescape(rules[i], rlz, 2048);
1987
1988     credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
1989     if(status == U_FILE_ACCESS_ERROR) {
1990       log_data_err("Is your data around?\n");
1991       return;
1992     } else if(U_FAILURE(status)) {
1993       log_err("Error opening collator\n");
1994       return;
1995     }
1996     rlen = u_unescape(expectedRules[i], rlz, 2048);
1997     cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
1998
1999     /* testAgainstUCA still doesn't handle expansions correctly, so this is not run */
2000     /* as a hard error test, but only in information mode */
2001     testAgainstUCA(cresulting, credundant, "expected", FALSE, &status);
2002
2003     ucol_close(credundant);
2004     ucol_close(cresulting);
2005
2006     log_verbose("testing using data\n");
2007
2008     genericRulesStarter(rules[i], testdata[i], testdatalen[i]);
2009   }
2010 }
2011
2012 static void TestCase(void)
2013 {
2014     const static UChar gRules[MAX_TOKEN_LEN] =
2015     /*" & 0 < 1,\u2461<a,A"*/
2016     { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x0041, 0x0000 };
2017
2018     const static UChar testCase[][MAX_TOKEN_LEN] =
2019     {
2020         /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000},
2021         /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000},
2022         /*2*/ {0x2460 /*circ'1'*/, 0x0061/*'a'*/, 0x0000},
2023         /*3*/ {0x2460 /*circ'1'*/, 0x0041/*'A'*/, 0x0000}
2024     };
2025
2026     const static UCollationResult caseTestResults[][9] =
2027     {
2028             { UCOL_LESS, UCOL_LESS, UCOL_LESS, 0, UCOL_LESS, UCOL_LESS, 0, 0, UCOL_LESS },
2029             { UCOL_GREATER, UCOL_LESS, UCOL_LESS, 0, UCOL_LESS, UCOL_LESS, 0, 0, UCOL_GREATER },
2030             { UCOL_LESS, UCOL_LESS, UCOL_LESS, 0, UCOL_GREATER, UCOL_LESS, 0, 0, UCOL_LESS },
2031             { UCOL_GREATER, UCOL_LESS, UCOL_GREATER, 0, UCOL_LESS, UCOL_LESS, 0, 0, UCOL_GREATER }
2032
2033     };
2034
2035     const static UColAttributeValue caseTestAttributes[][2] =
2036     {
2037             { UCOL_LOWER_FIRST, UCOL_OFF},
2038             { UCOL_UPPER_FIRST, UCOL_OFF},
2039             { UCOL_LOWER_FIRST, UCOL_ON},
2040             { UCOL_UPPER_FIRST, UCOL_ON}
2041
2042     };
2043     int32_t i,j,k;
2044     UErrorCode status = U_ZERO_ERROR;
2045     UCollationElements *iter;
2046     UCollator  *myCollation;
2047     myCollation = ucol_open("en_US", &status);
2048
2049     if(U_FAILURE(status)){
2050         log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
2051         return;
2052     }
2053     log_verbose("Testing different case settings\n");
2054     ucol_setStrength(myCollation, UCOL_TERTIARY);
2055
2056     for(k = 0; k<4; k++) {
2057       ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
2058       ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
2059       log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes[k][0], caseTestAttributes[k][1]);
2060       for (i = 0; i < 3 ; i++) {
2061         for(j = i+1; j<4; j++) {
2062           doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
2063         }
2064       }
2065     }
2066     ucol_close(myCollation);
2067
2068     myCollation = ucol_openRules(gRules, u_strlen(gRules), UCOL_OFF, UCOL_TERTIARY,NULL, &status);
2069     if(U_FAILURE(status)){
2070         log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
2071         return;
2072     }
2073     log_verbose("Testing different case settings with custom rules\n");
2074     ucol_setStrength(myCollation, UCOL_TERTIARY);
2075
2076     for(k = 0; k<4; k++) {
2077       ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
2078       ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
2079       for (i = 0; i < 3 ; i++) {
2080         for(j = i+1; j<4; j++) {
2081           log_verbose("k:%d, i:%d, j:%d\n", k, i, j);
2082           doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
2083           iter=ucol_openElements(myCollation, testCase[i], u_strlen(testCase[i]), &status);
2084           backAndForth(iter);
2085           ucol_closeElements(iter);
2086           iter=ucol_openElements(myCollation, testCase[j], u_strlen(testCase[j]), &status);
2087           backAndForth(iter);
2088           ucol_closeElements(iter);
2089         }
2090       }
2091     }
2092     ucol_close(myCollation);
2093     {
2094       const static char *lowerFirst[] = {
2095         "h",
2096         "H",
2097         "ch",
2098         "Ch",
2099         "CH",
2100         "cha",
2101         "chA",
2102         "Cha",
2103         "ChA",
2104         "CHa",
2105         "CHA",
2106         "i",
2107         "I"
2108       };
2109
2110       const static char *upperFirst[] = {
2111         "H",
2112         "h",
2113         "CH",
2114         "Ch",
2115         "ch",
2116         "CHA",
2117         "CHa",
2118         "ChA",
2119         "Cha",
2120         "chA",
2121         "cha",
2122         "I",
2123         "i"
2124       };
2125       log_verbose("mixed case test\n");
2126       log_verbose("lower first, case level off\n");
2127       genericRulesStarter("[casefirst lower]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
2128       log_verbose("upper first, case level off\n");
2129       genericRulesStarter("[casefirst upper]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
2130       log_verbose("lower first, case level on\n");
2131       genericRulesStarter("[casefirst lower][caselevel on]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
2132       log_verbose("upper first, case level on\n");
2133       genericRulesStarter("[casefirst upper][caselevel on]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
2134     }
2135
2136 }
2137
2138 static void TestIncrementalNormalize(void) {
2139
2140     /*UChar baseA     =0x61;*/
2141     UChar baseA     =0x41;
2142 /*    UChar baseB     = 0x42;*/
2143     UChar ccMix[]   = {0x316, 0x321, 0x300};
2144     /*UChar ccMix[]   = {0x61, 0x61, 0x61};*/
2145     /*
2146         0x316 is combining grave accent below, cc=220
2147         0x321 is combining palatalized hook below, cc=202
2148         0x300 is combining grave accent, cc=230
2149     */
2150
2151     /*int          maxSLen   = 2000;*/
2152     int          maxSLen   = 64000;
2153     int          sLen;
2154     int          i;
2155
2156     UCollator        *coll;
2157     UErrorCode       status = U_ZERO_ERROR;
2158     UCollationResult result;
2159
2160     int32_t myQ = QUICK;
2161
2162     if(QUICK < 0) {
2163       QUICK = 1;
2164     }
2165
2166     {
2167         /* Test 1.  Run very long unnormalized strings, to force overflow of*/
2168         /*          most buffers along the way.*/
2169         UChar            *strA;
2170         UChar            *strB;
2171
2172         strA = malloc((maxSLen+1) * sizeof(UChar));
2173         strB = malloc((maxSLen+1) * sizeof(UChar));
2174
2175         coll = ucol_open("en_US", &status);
2176         if(status == U_FILE_ACCESS_ERROR) {
2177           log_data_err("Is your data around?\n");
2178           return;
2179         } else if(U_FAILURE(status)) {
2180           log_err("Error opening collator\n");
2181           return;
2182         }
2183         ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
2184
2185         /*for (sLen = 257; sLen<maxSLen; sLen++) {*/
2186         /*for (sLen = 4; sLen<maxSLen; sLen++) {*/
2187         /*for (sLen = 1000; sLen<1001; sLen++) {*/
2188         for (sLen = 500; sLen<501; sLen++) {
2189         /*for (sLen = 40000; sLen<65000; sLen+=1000) {*/
2190             strA[0] = baseA;
2191             strB[0] = baseA;
2192             for (i=1; i<=sLen-1; i++) {
2193                 strA[i] = ccMix[i % 3];
2194                 strB[sLen-i] = ccMix[i % 3];
2195             }
2196             strA[sLen]   = 0;
2197             strB[sLen]   = 0;
2198
2199             ucol_setStrength(coll, UCOL_TERTIARY);   /* Do test with default strength, which runs*/
2200             doTest(coll, strA, strB, UCOL_EQUAL);    /*   optimized functions in the impl*/
2201             ucol_setStrength(coll, UCOL_IDENTICAL);   /* Do again with the slow, general impl.*/
2202             doTest(coll, strA, strB, UCOL_EQUAL);
2203         }
2204         free(strA);
2205         free(strB);
2206     }
2207
2208     QUICK = myQ;
2209
2210
2211     /*  Test 2:  Non-normal sequence in a string that extends to the last character*/
2212     /*         of the string.  Checks a couple of edge cases.*/
2213
2214     {
2215         UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0};
2216         UChar strB[] = {0x41, 0xc0, 0x316, 0};
2217         ucol_setStrength(coll, UCOL_TERTIARY);
2218         doTest(coll, strA, strB, UCOL_EQUAL);
2219     }
2220
2221     /*  Test 3:  Non-normal sequence is terminated by a surrogate pair.*/
2222
2223     {
2224       /* New UCA  3.1.1.
2225        * test below used a code point from Desseret, which sorts differently
2226        * than d800 dc00
2227        */
2228         /*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/
2229         UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0};
2230         UChar strB[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0};
2231         ucol_setStrength(coll, UCOL_TERTIARY);
2232         doTest(coll, strA, strB, UCOL_GREATER);
2233     }
2234
2235     /*  Test 4:  Imbedded nulls do not terminate a string when length is specified.*/
2236
2237     {
2238         UChar strA[] = {0x41, 0x00, 0x42, 0x00};
2239         UChar strB[] = {0x41, 0x00, 0x00, 0x00};
2240         char  sortKeyA[50];
2241         char  sortKeyAz[50];
2242         char  sortKeyB[50];
2243         char  sortKeyBz[50];
2244         int   r;
2245
2246         /* there used to be -3 here. Hmmmm.... */
2247         /*result = ucol_strcoll(coll, strA, -3, strB, -3);*/
2248         result = ucol_strcoll(coll, strA, 3, strB, 3);
2249         if (result != UCOL_GREATER) {
2250             log_err("ERROR 1 in test 4\n");
2251         }
2252         result = ucol_strcoll(coll, strA, -1, strB, -1);
2253         if (result != UCOL_EQUAL) {
2254             log_err("ERROR 2 in test 4\n");
2255         }
2256
2257         ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2258         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2259         ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2260         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2261
2262         r = strcmp(sortKeyA, sortKeyAz);
2263         if (r <= 0) {
2264             log_err("Error 3 in test 4\n");
2265         }
2266         r = strcmp(sortKeyA, sortKeyB);
2267         if (r <= 0) {
2268             log_err("Error 4 in test 4\n");
2269         }
2270         r = strcmp(sortKeyAz, sortKeyBz);
2271         if (r != 0) {
2272             log_err("Error 5 in test 4\n");
2273         }
2274
2275         ucol_setStrength(coll, UCOL_IDENTICAL);
2276         ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2277         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2278         ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2279         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2280
2281         r = strcmp(sortKeyA, sortKeyAz);
2282         if (r <= 0) {
2283             log_err("Error 6 in test 4\n");
2284         }
2285         r = strcmp(sortKeyA, sortKeyB);
2286         if (r <= 0) {
2287             log_err("Error 7 in test 4\n");
2288         }
2289         r = strcmp(sortKeyAz, sortKeyBz);
2290         if (r != 0) {
2291             log_err("Error 8 in test 4\n");
2292         }
2293         ucol_setStrength(coll, UCOL_TERTIARY);
2294     }
2295
2296
2297     /*  Test 5:  Null characters in non-normal source strings.*/
2298
2299     {
2300         UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00};
2301         UChar strB[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00};
2302         char  sortKeyA[50];
2303         char  sortKeyAz[50];
2304         char  sortKeyB[50];
2305         char  sortKeyBz[50];
2306         int   r;
2307
2308         result = ucol_strcoll(coll, strA, 6, strB, 6);
2309         if (result != UCOL_GREATER) {
2310             log_err("ERROR 1 in test 5\n");
2311         }
2312         result = ucol_strcoll(coll, strA, -1, strB, -1);
2313         if (result != UCOL_EQUAL) {
2314             log_err("ERROR 2 in test 5\n");
2315         }
2316
2317         ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2318         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2319         ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2320         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2321
2322         r = strcmp(sortKeyA, sortKeyAz);
2323         if (r <= 0) {
2324             log_err("Error 3 in test 5\n");
2325         }
2326         r = strcmp(sortKeyA, sortKeyB);
2327         if (r <= 0) {
2328             log_err("Error 4 in test 5\n");
2329         }
2330         r = strcmp(sortKeyAz, sortKeyBz);
2331         if (r != 0) {
2332             log_err("Error 5 in test 5\n");
2333         }
2334
2335         ucol_setStrength(coll, UCOL_IDENTICAL);
2336         ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2337         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2338         ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2339         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2340
2341         r = strcmp(sortKeyA, sortKeyAz);
2342         if (r <= 0) {
2343             log_err("Error 6 in test 5\n");
2344         }
2345         r = strcmp(sortKeyA, sortKeyB);
2346         if (r <= 0) {
2347             log_err("Error 7 in test 5\n");
2348         }
2349         r = strcmp(sortKeyAz, sortKeyBz);
2350         if (r != 0) {
2351             log_err("Error 8 in test 5\n");
2352         }
2353         ucol_setStrength(coll, UCOL_TERTIARY);
2354     }
2355
2356
2357     /*  Test 6:  Null character as base of a non-normal combining sequence.*/
2358
2359     {
2360         UChar strA[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00};
2361         UChar strB[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00};
2362
2363         result = ucol_strcoll(coll, strA, 5, strB, 5);
2364         if (result != UCOL_LESS) {
2365             log_err("Error 1 in test 6\n");
2366         }
2367         result = ucol_strcoll(coll, strA, -1, strB, -1);
2368         if (result != UCOL_EQUAL) {
2369             log_err("Error 2 in test 6\n");
2370         }
2371     }
2372
2373     ucol_close(coll);
2374 }
2375
2376
2377
2378 #if 0
2379 static void TestGetCaseBit(void) {
2380   static const char *caseBitData[] = {
2381     "a", "A", "ch", "Ch", "CH",
2382       "\\uFF9E", "\\u0009"
2383   };
2384
2385   static const uint8_t results[] = {
2386     UCOL_LOWER_CASE, UCOL_UPPER_CASE, UCOL_LOWER_CASE, UCOL_MIXED_CASE, UCOL_UPPER_CASE,
2387       UCOL_UPPER_CASE, UCOL_LOWER_CASE
2388   };
2389
2390   uint32_t i, blen = 0;
2391   UChar b[256] = {0};
2392   UErrorCode status = U_ZERO_ERROR;
2393   UCollator *UCA = ucol_open("", &status);
2394   uint8_t res = 0;
2395
2396   for(i = 0; i<sizeof(results)/sizeof(results[0]); i++) {
2397     blen = u_unescape(caseBitData[i], b, 256);
2398     res = ucol_uprv_getCaseBits(UCA, b, blen, &status);
2399     if(results[i] != res) {
2400       log_err("Expected case = %02X, got %02X for %04X\n", results[i], res, b[0]);
2401     }
2402   }
2403 }
2404 #endif
2405
2406 static void TestHangulTailoring(void) {
2407     static const char *koreanData[] = {
2408         "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53ef", "\\u5475",
2409             "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\\u67b7", "\\u67ef",
2410             "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\\u8857", "\\u8888",
2411             "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5",
2412             "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\\u659D", "\\u698E",
2413             "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\\u8B0C"
2414     };
2415
2416     const char *rules =
2417         "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 <<< \\u53ef <<< \\u5475 "
2418         "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef "
2419         "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 "
2420         "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 "
2421         "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E "
2422         "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C";
2423
2424
2425   UErrorCode status = U_ZERO_ERROR;
2426   UChar rlz[2048] = { 0 };
2427   uint32_t rlen = u_unescape(rules, rlz, 2048);
2428
2429   UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
2430   if(status == U_FILE_ACCESS_ERROR) {
2431     log_data_err("Is your data around?\n");
2432     return;
2433   } else if(U_FAILURE(status)) {
2434     log_err("Error opening collator\n");
2435     return;
2436   }
2437
2438   log_verbose("Using start of korean rules\n");
2439
2440   if(U_SUCCESS(status)) {
2441     genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
2442   } else {
2443     log_err("Unable to open collator with rules %s\n", rules);
2444   }
2445
2446   log_verbose("Setting jamoSpecial to TRUE and testing once more\n");
2447   ((UCATableHeader *)coll->image)->jamoSpecial = TRUE; /* don't try this at home  */
2448   genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
2449
2450   ucol_close(coll);
2451
2452   log_verbose("Using ko__LOTUS locale\n");
2453   genericLocaleStarter("ko__LOTUS", koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
2454 }
2455
2456 static void TestCompressOverlap(void) {
2457     UChar       secstr[150];
2458     UChar       tertstr[150];
2459     UErrorCode  status = U_ZERO_ERROR;
2460     UCollator  *coll;
2461     char        result[200];
2462     uint32_t    resultlen;
2463     int         count = 0;
2464     char       *tempptr;
2465
2466     coll = ucol_open("", &status);
2467
2468     if (U_FAILURE(status)) {
2469         log_err("Collator can't be created\n");
2470         return;
2471     }
2472     while (count < 149) {
2473         secstr[count] = 0x0020; /* [06, 05, 05] */
2474         tertstr[count] = 0x0020;
2475         count ++;
2476     }
2477
2478     /* top down compression ----------------------------------- */
2479     secstr[count] = 0x0332; /* [, 87, 05] */
2480     tertstr[count] = 0x3000; /* [06, 05, 07] */
2481
2482     /* no compression secstr should have 150 secondary bytes, tertstr should
2483     have 150 tertiary bytes.
2484     with correct overlapping compression, secstr should have 4 secondary
2485     bytes, tertstr should have > 2 tertiary bytes */
2486     resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250);
2487     tempptr = uprv_strchr(result, 1) + 1;
2488     while (*(tempptr + 1) != 1) {
2489         /* the last secondary collation element is not checked since it is not
2490         part of the compression */
2491         if (*tempptr < UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2) {
2492             log_err("Secondary compression overlapped\n");
2493         }
2494         tempptr ++;
2495     }
2496
2497     /* tertiary top/bottom/common for en_US is similar to the secondary
2498     top/bottom/common */
2499     resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250);
2500     tempptr = uprv_strrchr(result, 1) + 1;
2501     while (*(tempptr + 1) != 0) {
2502         /* the last secondary collation element is not checked since it is not
2503         part of the compression */
2504         if (*tempptr < coll->tertiaryTop - coll->tertiaryTopCount) {
2505             log_err("Tertiary compression overlapped\n");
2506         }
2507         tempptr ++;
2508     }
2509
2510     /* bottom up compression ------------------------------------- */
2511     secstr[count] = 0;
2512     tertstr[count] = 0;
2513     resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250);
2514     tempptr = uprv_strchr(result, 1) + 1;
2515     while (*(tempptr + 1) != 1) {
2516         /* the last secondary collation element is not checked since it is not
2517         part of the compression */
2518         if (*tempptr > UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2) {
2519             log_err("Secondary compression overlapped\n");
2520         }
2521         tempptr ++;
2522     }
2523
2524     /* tertiary top/bottom/common for en_US is similar to the secondary
2525     top/bottom/common */
2526     resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250);
2527     tempptr = uprv_strrchr(result, 1) + 1;
2528     while (*(tempptr + 1) != 0) {
2529         /* the last secondary collation element is not checked since it is not
2530         part of the compression */
2531         if (*tempptr > coll->tertiaryBottom + coll->tertiaryBottomCount) {
2532             log_err("Tertiary compression overlapped\n");
2533         }
2534         tempptr ++;
2535     }
2536
2537     ucol_close(coll);
2538 }
2539
2540 static void TestCyrillicTailoring(void) {
2541   static const char *test[] = {
2542     "\\u0410b",
2543       "\\u0410\\u0306a",
2544       "\\u04d0A"
2545   };
2546
2547     /* Russian overrides contractions, so this test is not valid anymore */
2548     /*genericLocaleStarter("ru", test, 3);*/
2549
2550     genericLocaleStarter("root", test, 3);
2551     genericRulesStarter("&\\u0410 = \\u0410", test, 3);
2552     genericRulesStarter("&Z < \\u0410", test, 3);
2553     genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test, 3);
2554     genericRulesStarter("&Z < \\u0410 < \\u04d0", test, 3);
2555     genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test, 3);
2556     genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test, 3);
2557 }
2558
2559 static void TestSuppressContractions(void) {
2560
2561   static const char *testNoCont2[] = {
2562       "\\u0410\\u0302a",
2563       "\\u0410\\u0306b",
2564       "\\u0410c"
2565   };
2566   static const char *testNoCont[] = {
2567       "a\\u0410",
2568       "A\\u0410\\u0306",
2569       "\\uFF21\\u0410\\u0302"
2570   };
2571
2572   genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont, 3);
2573   genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2, 3);
2574 }
2575
2576 static void TestContraction(void) {
2577     const static char *testrules[] = {
2578         "&A = AB / B",
2579         "&A = A\\u0306/\\u0306",
2580         "&c = ch / h"
2581     };
2582     const static UChar testdata[][2] = {
2583         {0x0041 /* 'A' */, 0x0042 /* 'B' */},
2584         {0x0041 /* 'A' */, 0x0306 /* combining breve */},
2585         {0x0063 /* 'c' */, 0x0068 /* 'h' */}
2586     };
2587     const static UChar testdata2[][2] = {
2588         {0x0063 /* 'c' */, 0x0067 /* 'g' */},
2589         {0x0063 /* 'c' */, 0x0068 /* 'h' */},
2590         {0x0063 /* 'c' */, 0x006C /* 'l' */}
2591     };
2592     const static char *testrules3[] = {
2593         "&z < xyz &xyzw << B",
2594         "&z < xyz &xyz << B / w",
2595         "&z < ch &achm << B",
2596         "&z < ch &a << B / chm",
2597         "&\\ud800\\udc00w << B",
2598         "&\\ud800\\udc00 << B / w",
2599         "&a\\ud800\\udc00m << B",
2600         "&a << B / \\ud800\\udc00m",
2601     };
2602
2603     UErrorCode  status   = U_ZERO_ERROR;
2604     UCollator  *coll;
2605     UChar       rule[256] = {0};
2606     uint32_t    rlen     = 0;
2607     int         i;
2608
2609     for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
2610         UCollationElements *iter1;
2611         int j = 0;
2612         log_verbose("Rule %s for testing\n", testrules[i]);
2613         rlen = u_unescape(testrules[i], rule, 32);
2614         coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2615         if (U_FAILURE(status)) {
2616             log_err("Collator creation failed %s\n", testrules[i]);
2617             return;
2618         }
2619         iter1 = ucol_openElements(coll, testdata[i], 2, &status);
2620         if (U_FAILURE(status)) {
2621             log_err("Collation iterator creation failed\n");
2622             return;
2623         }
2624         while (j < 2) {
2625             UCollationElements *iter2 = ucol_openElements(coll,
2626                                                          &(testdata[i][j]),
2627                                                          1, &status);
2628             uint32_t ce;
2629             if (U_FAILURE(status)) {
2630                 log_err("Collation iterator creation failed\n");
2631                 return;
2632             }
2633             ce = ucol_next(iter2, &status);
2634             while (ce != UCOL_NULLORDER) {
2635                 if ((uint32_t)ucol_next(iter1, &status) != ce) {
2636                     log_err("Collation elements in contraction split does not match\n");
2637                     return;
2638                 }
2639                 ce = ucol_next(iter2, &status);
2640             }
2641             j ++;
2642             ucol_closeElements(iter2);
2643         }
2644         if (ucol_next(iter1, &status) != UCOL_NULLORDER) {
2645             log_err("Collation elements not exhausted\n");
2646             return;
2647         }
2648         ucol_closeElements(iter1);
2649         ucol_close(coll);
2650     }
2651
2652     rlen = u_unescape("& a < b < c < ch < d & c = ch / h", rule, 256);
2653     coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2654     if (ucol_strcoll(coll, testdata2[0], 2, testdata2[1], 2) != UCOL_LESS) {
2655         log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
2656                 testdata2[0][0], testdata2[0][1], testdata2[1][0],
2657                 testdata2[1][1]);
2658         return;
2659     }
2660     if (ucol_strcoll(coll, testdata2[1], 2, testdata2[2], 2) != UCOL_LESS) {
2661         log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
2662                 testdata2[1][0], testdata2[1][1], testdata2[2][0],
2663                 testdata2[2][1]);
2664         return;
2665     }
2666     ucol_close(coll);
2667
2668     for (i = 0; i < sizeof(testrules3) / sizeof(testrules3[0]); i += 2) {
2669         UCollator          *coll1,
2670                            *coll2;
2671         UCollationElements *iter1,
2672                            *iter2;
2673         UChar               ch = 0x0042 /* 'B' */;
2674         uint32_t            ce;
2675         rlen = u_unescape(testrules3[i], rule, 32);
2676         coll1 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2677         rlen = u_unescape(testrules3[i + 1], rule, 32);
2678         coll2 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2679         if (U_FAILURE(status)) {
2680             log_err("Collator creation failed %s\n", testrules[i]);
2681             return;
2682         }
2683         iter1 = ucol_openElements(coll1, &ch, 1, &status);
2684         iter2 = ucol_openElements(coll2, &ch, 1, &status);
2685         if (U_FAILURE(status)) {
2686             log_err("Collation iterator creation failed\n");
2687             return;
2688         }
2689         ce = ucol_next(iter1, &status);
2690         if (U_FAILURE(status)) {
2691             log_err("Retrieving ces failed\n");
2692             return;
2693         }
2694         while (ce != UCOL_NULLORDER) {
2695             if (ce != (uint32_t)ucol_next(iter2, &status)) {
2696                 log_err("CEs does not match\n");
2697                 return;
2698             }
2699             ce = ucol_next(iter1, &status);
2700             if (U_FAILURE(status)) {
2701                 log_err("Retrieving ces failed\n");
2702                 return;
2703             }
2704         }
2705         if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
2706             log_err("CEs not exhausted\n");
2707             return;
2708         }
2709         ucol_closeElements(iter1);
2710         ucol_closeElements(iter2);
2711         ucol_close(coll1);
2712         ucol_close(coll2);
2713     }
2714 }
2715
2716 static void TestExpansion(void) {
2717     const static char *testrules[] = {
2718         "&J << K / B & K << M",
2719         "&J << K / B << M"
2720     };
2721     const static UChar testdata[][3] = {
2722         {0x004A /*'J'*/, 0x0041 /*'A'*/, 0},
2723         {0x004D /*'M'*/, 0x0041 /*'A'*/, 0},
2724         {0x004B /*'K'*/, 0x0041 /*'A'*/, 0},
2725         {0x004B /*'K'*/, 0x0043 /*'C'*/, 0},
2726         {0x004A /*'J'*/, 0x0043 /*'C'*/, 0},
2727         {0x004D /*'M'*/, 0x0043 /*'C'*/, 0}
2728     };
2729
2730     UErrorCode  status   = U_ZERO_ERROR;
2731     UCollator  *coll;
2732     UChar       rule[256] = {0};
2733     uint32_t    rlen     = 0;
2734     int         i;
2735
2736     for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
2737         int j = 0;
2738         log_verbose("Rule %s for testing\n", testrules[i]);
2739         rlen = u_unescape(testrules[i], rule, 32);
2740         coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2741         if (U_FAILURE(status)) {
2742             log_err("Collator creation failed %s\n", testrules[i]);
2743             return;
2744         }
2745
2746         for (j = 0; j < 5; j ++) {
2747             doTest(coll, testdata[j], testdata[j + 1], UCOL_LESS);
2748         }
2749         ucol_close(coll);
2750     }
2751 }
2752
2753 #if 0
2754 /* this test tests the current limitations of the engine */
2755 /* it always fail, so it is disabled by default */
2756 static void TestLimitations(void) {
2757   /* recursive expansions */
2758   {
2759     static const char *rule = "&a=b/c&d=c/e";
2760     static const char *tlimit01[] = {"add","b","adf"};
2761     static const char *tlimit02[] = {"aa","b","af"};
2762     log_verbose("recursive expansions\n");
2763     genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
2764     genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
2765   }
2766   /* contractions spanning expansions */
2767   {
2768     static const char *rule = "&a<<<c/e&g<<<eh";
2769     static const char *tlimit01[] = {"ad","c","af","f","ch","h"};
2770     static const char *tlimit02[] = {"ad","c","ch","af","f","h"};
2771     log_verbose("contractions spanning expansions\n");
2772     genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
2773     genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
2774   }
2775   /* normalization: nulls in contractions */
2776   {
2777     static const char *rule = "&a<<<\\u0000\\u0302";
2778     static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
2779     static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
2780     static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
2781     static const UColAttributeValue valOn[] = { UCOL_ON };
2782     static const UColAttributeValue valOff[] = { UCOL_OFF };
2783
2784     log_verbose("NULL in contractions\n");
2785     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
2786     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
2787     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
2788     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
2789
2790   }
2791   /* normalization: contractions spanning normalization */
2792   {
2793     static const char *rule = "&a<<<\\u0000\\u0302";
2794     static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
2795     static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
2796     static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
2797     static const UColAttributeValue valOn[] = { UCOL_ON };
2798     static const UColAttributeValue valOff[] = { UCOL_OFF };
2799
2800     log_verbose("contractions spanning normalization\n");
2801     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
2802     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
2803     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
2804     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
2805
2806   }
2807   /* variable top:  */
2808   {
2809     /*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/
2810     static const char *rule = "&\\u2010<x<[variable top]=z";
2811     /*static const char *rule3 = "&' '<x<[variable top]=z";*/
2812     static const char *tlimit01[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" };
2813     static const char *tlimit02[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"};
2814     static const char *tlimit03[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" };
2815     static const UColAttribute att[] = { UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH };
2816     static const UColAttributeValue valOn[] = { UCOL_SHIFTED, UCOL_QUATERNARY };
2817     static const UColAttributeValue valOff[] = { UCOL_NON_IGNORABLE, UCOL_TERTIARY };
2818
2819     log_verbose("variable top\n");
2820     genericRulesStarterWithOptions(rule, tlimit03, sizeof(tlimit03)/sizeof(tlimit03[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2821     genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2822     genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2823     genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));
2824     genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));
2825
2826   }
2827   /* case level */
2828   {
2829     static const char *rule = "&c<ch<<<cH<<<Ch<<<CH";
2830     static const char *tlimit01[] = {"c","CH","Ch","cH","ch"};
2831     static const char *tlimit02[] = {"c","CH","cH","Ch","ch"};
2832     static const UColAttribute att[] = { UCOL_CASE_FIRST};
2833     static const UColAttributeValue valOn[] = { UCOL_UPPER_FIRST};
2834     /*static const UColAttributeValue valOff[] = { UCOL_OFF};*/
2835     log_verbose("case level\n");
2836     genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2837     genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2838     /*genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
2839     /*genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
2840   }
2841
2842 }
2843 #endif
2844
2845 static void TestBocsuCoverage(void) {
2846   UErrorCode status = U_ZERO_ERROR;
2847   const char *testString = "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u0041";
2848   UChar       test[256] = {0};
2849   uint32_t    tlen     = u_unescape(testString, test, 32);
2850   uint8_t key[256]     = {0};
2851   uint32_t klen         = 0;
2852
2853   UCollator *coll = ucol_open("", &status);
2854   if(U_SUCCESS(status)) {
2855   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
2856
2857   klen = ucol_getSortKey(coll, test, tlen, key, 256);
2858
2859   ucol_close(coll);
2860   } else {
2861     log_data_err("Couldn't open UCA\n");
2862   }
2863 }
2864
2865 static void TestVariableTopSetting(void) {
2866   UErrorCode status = U_ZERO_ERROR;
2867   const UChar *current = NULL;
2868   uint32_t varTopOriginal = 0, varTop1, varTop2;
2869   UCollator *coll = ucol_open("", &status);
2870   if(U_SUCCESS(status)) {
2871
2872   uint32_t strength = 0;
2873   uint16_t specs = 0;
2874   uint32_t chOffset = 0;
2875   uint32_t chLen = 0;
2876   uint32_t exOffset = 0;
2877   uint32_t exLen = 0;
2878   uint32_t oldChOffset = 0;
2879   uint32_t oldChLen = 0;
2880   uint32_t oldExOffset = 0;
2881   uint32_t oldExLen = 0;
2882   uint32_t prefixOffset = 0;
2883   uint32_t prefixLen = 0;
2884
2885   UBool startOfRules = TRUE;
2886   UColTokenParser src;
2887   UColOptionSet opts;
2888
2889   UChar *rulesCopy = NULL;
2890   uint32_t rulesLen;
2891
2892   UCollationResult result;
2893
2894   UChar first[256] = { 0 };
2895   UChar second[256] = { 0 };
2896   UParseError parseError;
2897   int32_t myQ = QUICK;
2898
2899   src.opts = &opts;
2900
2901   if(QUICK <= 0) {
2902     QUICK = 1;
2903   }
2904
2905   /* this test will fail when normalization is turned on */
2906   /* therefore we always turn off exhaustive mode for it */
2907   if(1) { /* QUICK > 0*/
2908     log_verbose("Slide variable top over UCARules\n");
2909     rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, 0);
2910     rulesCopy = (UChar *)malloc((rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
2911     rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE);
2912
2913     if(U_SUCCESS(status) && rulesLen > 0) {
2914       ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
2915       src.current = src.source = rulesCopy;
2916       src.end = rulesCopy+rulesLen;
2917       src.extraCurrent = src.end;
2918       src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
2919
2920       while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,&status)) != NULL) {
2921         strength = src.parsedToken.strength;
2922         chOffset = src.parsedToken.charsOffset;
2923         chLen = src.parsedToken.charsLen;
2924         exOffset = src.parsedToken.extensionOffset;
2925         exLen = src.parsedToken.extensionLen;
2926         prefixOffset = src.parsedToken.prefixOffset;
2927         prefixLen = src.parsedToken.prefixLen;
2928         specs = src.parsedToken.flags;
2929
2930         startOfRules = FALSE;
2931         if(0) {
2932           log_verbose("%04X %d ", *(rulesCopy+chOffset), chLen);
2933         }
2934         if(strength == UCOL_PRIMARY) {
2935           status = U_ZERO_ERROR;
2936           varTopOriginal = ucol_getVariableTop(coll, &status);
2937           varTop1 = ucol_setVariableTop(coll, rulesCopy+oldChOffset, oldChLen, &status);
2938           if(U_FAILURE(status)) {
2939             char buffer[256];
2940             char *buf = buffer;
2941             uint32_t i = 0, j;
2942             uint32_t CE = UCOL_NO_MORE_CES;
2943
2944             /* before we start screaming, let's see if there is a problem with the rules */
2945             collIterate s;
2946             uprv_init_collIterate(coll, rulesCopy+oldChOffset, oldChLen, &s);
2947
2948             CE = ucol_getNextCE(coll, &s, &status);
2949
2950             for(i = 0; i < oldChLen; i++) {
2951               j = sprintf(buf, "%04X ", *(rulesCopy+oldChOffset+i));
2952               buf += j;
2953             }
2954             if(status == U_PRIMARY_TOO_LONG_ERROR) {
2955               log_verbose("= Expected failure for %s =", buffer);
2956             } else {
2957               if(s.pos == s.endp) {
2958                 log_err("Unexpected failure setting variable top at offset %d. Error %s. Codepoints: %s\n",
2959                   oldChOffset, u_errorName(status), buffer);
2960               } else {
2961                 log_verbose("There is a goofy contraction in UCA rules that does not appear in the fractional UCA. Codepoints: %s\n",
2962                   buffer);
2963               }
2964             }
2965           }
2966           varTop2 = ucol_getVariableTop(coll, &status);
2967           if((varTop1 & 0xFFFF0000) != (varTop2 & 0xFFFF0000)) {
2968             log_err("cannot retrieve set varTop value!\n");
2969             continue;
2970           }
2971
2972           if((varTop1 & 0xFFFF0000) > 0 && oldExLen == 0) {
2973
2974             u_strncpy(first, rulesCopy+oldChOffset, oldChLen);
2975             u_strncpy(first+oldChLen, rulesCopy+chOffset, chLen);
2976             u_strncpy(first+oldChLen+chLen, rulesCopy+oldChOffset, oldChLen);
2977             first[2*oldChLen+chLen] = 0;
2978
2979             if(oldExLen == 0) {
2980               u_strncpy(second, rulesCopy+chOffset, chLen);
2981               second[chLen] = 0;
2982             } else { /* This is skipped momentarily, but should work once UCARules are fully UCA conformant */
2983               u_strncpy(second, rulesCopy+oldExOffset, oldExLen);
2984               u_strncpy(second+oldChLen, rulesCopy+chOffset, chLen);
2985               u_strncpy(second+oldChLen+chLen, rulesCopy+oldExOffset, oldExLen);
2986               second[2*oldExLen+chLen] = 0;
2987             }
2988             result = ucol_strcoll(coll, first, -1, second, -1);
2989             if(result == UCOL_EQUAL) {
2990               doTest(coll, first, second, UCOL_EQUAL);
2991             } else {
2992               log_verbose("Suspicious strcoll result for %04X and %04X\n", *(rulesCopy+oldChOffset), *(rulesCopy+chOffset));
2993             }
2994           }
2995         }
2996         if(strength != UCOL_TOK_RESET) {
2997           oldChOffset = chOffset;
2998           oldChLen = chLen;
2999           oldExOffset = exOffset;
3000           oldExLen = exLen;
3001         }
3002       }
3003       status = U_ZERO_ERROR;
3004     }
3005     else {
3006       log_err("Unexpected failure getting rules %s\n", u_errorName(status));
3007       return;
3008     }
3009     if (U_FAILURE(status)) {
3010         log_err("Error parsing rules %s\n", u_errorName(status));
3011         return;
3012     }
3013     status = U_ZERO_ERROR;
3014   }
3015
3016   QUICK = myQ;
3017
3018   log_verbose("Testing setting variable top to contractions\n");
3019   {
3020     /* uint32_t tailoredCE = UCOL_NOT_FOUND; */
3021     /*UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->UCAConsts+sizeof(UCAConstants));*/
3022     UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->contractionUCACombos);
3023     while(*conts != 0) {
3024       if(*(conts+2) == 0) {
3025         varTop1 = ucol_setVariableTop(coll, conts, -1, &status);
3026       } else {
3027         varTop1 = ucol_setVariableTop(coll, conts, 3, &status);
3028       }
3029       if(U_FAILURE(status)) {
3030         log_err("Couldn't set variable top to a contraction %04X %04X %04X\n",
3031           *conts, *(conts+1), *(conts+2));
3032         status = U_ZERO_ERROR;
3033       }
3034       conts+=3;
3035     }
3036
3037     status = U_ZERO_ERROR;
3038
3039     first[0] = 0x0040;
3040     first[1] = 0x0050;
3041     first[2] = 0x0000;
3042
3043     ucol_setVariableTop(coll, first, -1, &status);
3044
3045     if(U_SUCCESS(status)) {
3046       log_err("Invalid contraction succeded in setting variable top!\n");
3047     }
3048
3049   }
3050
3051   log_verbose("Test restoring variable top\n");
3052
3053   status = U_ZERO_ERROR;
3054   ucol_restoreVariableTop(coll, varTopOriginal, &status);
3055   if(varTopOriginal != ucol_getVariableTop(coll, &status)) {
3056     log_err("Couldn't restore old variable top\n");
3057   }
3058
3059   log_verbose("Testing calling with error set\n");
3060
3061   status = U_INTERNAL_PROGRAM_ERROR;
3062   varTop1 = ucol_setVariableTop(coll, first, 1, &status);
3063   varTop2 = ucol_getVariableTop(coll, &status);
3064   ucol_restoreVariableTop(coll, varTop2, &status);
3065   varTop1 = ucol_setVariableTop(NULL, first, 1, &status);
3066   varTop2 = ucol_getVariableTop(NULL, &status);
3067   ucol_restoreVariableTop(NULL, varTop2, &status);
3068   if(status != U_INTERNAL_PROGRAM_ERROR) {
3069     log_err("Bad reaction to passed error!\n");
3070   }
3071   free(rulesCopy);
3072   ucol_close(coll);
3073   } else {
3074     log_data_err("Couldn't open UCA collator\n");
3075   }
3076
3077 }
3078
3079 static void TestNonChars(void) {
3080   static const char *test[] = {
3081     "\\u0000",
3082     "\\uFFFE", "\\uFFFF",
3083       "\\U0001FFFE", "\\U0001FFFF",
3084       "\\U0002FFFE", "\\U0002FFFF",
3085       "\\U0003FFFE", "\\U0003FFFF",
3086       "\\U0004FFFE", "\\U0004FFFF",
3087       "\\U0005FFFE", "\\U0005FFFF",
3088       "\\U0006FFFE", "\\U0006FFFF",
3089       "\\U0007FFFE", "\\U0007FFFF",
3090       "\\U0008FFFE", "\\U0008FFFF",
3091       "\\U0009FFFE", "\\U0009FFFF",
3092       "\\U000AFFFE", "\\U000AFFFF",
3093       "\\U000BFFFE", "\\U000BFFFF",
3094       "\\U000CFFFE", "\\U000CFFFF",
3095       "\\U000DFFFE", "\\U000DFFFF",
3096       "\\U000EFFFE", "\\U000EFFFF",
3097       "\\U000FFFFE", "\\U000FFFFF",
3098       "\\U0010FFFE", "\\U0010FFFF"
3099   };
3100   UErrorCode status = U_ZERO_ERROR;
3101   UCollator *coll = ucol_open("en_US", &status);
3102
3103   log_verbose("Test non characters\n");
3104
3105   if(U_SUCCESS(status)) {
3106     genericOrderingTestWithResult(coll, test, 35, UCOL_EQUAL);
3107   } else {
3108     log_err("Unable to open collator\n");
3109   }
3110
3111   ucol_close(coll);
3112 }
3113
3114 static void TestExtremeCompression(void) {
3115   static char *test[4];
3116   int32_t j = 0, i = 0;
3117
3118   for(i = 0; i<4; i++) {
3119     test[i] = (char *)malloc(2048*sizeof(char));
3120   }
3121
3122   for(j = 20; j < 500; j++) {
3123     for(i = 0; i<4; i++) {
3124       uprv_memset(test[i], 'a', (j-1)*sizeof(char));
3125       test[i][j-1] = (char)('a'+i);
3126       test[i][j] = 0;
3127     }
3128     genericLocaleStarter("en_US", (const char **)test, 4);
3129   }
3130
3131
3132   for(i = 0; i<4; i++) {
3133     free(test[i]);
3134   }
3135 }
3136
3137 #if 0
3138 static void TestExtremeCompression(void) {
3139   static char *test[4];
3140   int32_t j = 0, i = 0;
3141   UErrorCode status = U_ZERO_ERROR;
3142   UCollator *coll = ucol_open("en_US", status);
3143   for(i = 0; i<4; i++) {
3144     test[i] = (char *)malloc(2048*sizeof(char));
3145   }
3146   for(j = 10; j < 2048; j++) {
3147     for(i = 0; i<4; i++) {
3148       uprv_memset(test[i], 'a', (j-2)*sizeof(char));
3149       test[i][j-1] = (char)('a'+i);
3150       test[i][j] = 0;
3151     }
3152   }
3153   genericLocaleStarter("en_US", (const char **)test, 4);
3154
3155   for(j = 10; j < 2048; j++) {
3156     for(i = 0; i<1; i++) {
3157       uprv_memset(test[i], 'a', (j-1)*sizeof(char));
3158       test[i][j] = 0;
3159     }
3160   }
3161   for(i = 0; i<4; i++) {
3162     free(test[i]);
3163   }
3164 }
3165 #endif
3166
3167 static void TestSurrogates(void) {
3168   static const char *test[] = {
3169     "z","\\ud900\\udc25",  "\\ud805\\udc50",
3170        "\\ud800\\udc00y",  "\\ud800\\udc00r",
3171        "\\ud800\\udc00f",  "\\ud800\\udc00",
3172        "\\ud800\\udc00c", "\\ud800\\udc00b",
3173        "\\ud800\\udc00fa", "\\ud800\\udc00fb",
3174        "\\ud800\\udc00a",
3175        "c", "b"
3176   };
3177
3178   static const char *rule =
3179     "&z < \\ud900\\udc25   < \\ud805\\udc50"
3180        "< \\ud800\\udc00y  < \\ud800\\udc00r"
3181        "< \\ud800\\udc00f  << \\ud800\\udc00"
3182        "< \\ud800\\udc00fa << \\ud800\\udc00fb"
3183        "< \\ud800\\udc00a  < c < b" ;
3184
3185   genericRulesStarter(rule, test, 14);
3186 }
3187
3188 /* This is a test for prefix implementation, used by JIS X 4061 collation rules */
3189 static void TestPrefix(void) {
3190   uint32_t i;
3191
3192   static struct {
3193     const char *rules;
3194     const char *data[50];
3195     const uint32_t len;
3196   } tests[] = {
3197     { "&z <<< z|a",
3198       {"zz", "za"}, 2 },
3199
3200     { "&z <<< z|   a",
3201       {"zz", "za"}, 2 },
3202     { "[strength I]"
3203       "&a=\\ud900\\udc25"
3204       "&z<<<\\ud900\\udc25|a",
3205       {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 },
3206   };
3207
3208
3209   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3210     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3211   }
3212 }
3213
3214 /* This test uses data suplied by Masashiko Maedera to test the implementation */
3215 /* JIS X 4061 collation order implementation                                   */
3216 static void TestNewJapanese(void) {
3217
3218   static const char *test1[] = {
3219       "\\u30b7\\u30e3\\u30fc\\u30ec",
3220       "\\u30b7\\u30e3\\u30a4",
3221       "\\u30b7\\u30e4\\u30a3",
3222       "\\u30b7\\u30e3\\u30ec",
3223       "\\u3061\\u3087\\u3053",
3224       "\\u3061\\u3088\\u3053",
3225       "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8",
3226       "\\u3066\\u30fc\\u305f",
3227       "\\u30c6\\u30fc\\u30bf",
3228       "\\u30c6\\u30a7\\u30bf",
3229       "\\u3066\\u3048\\u305f",
3230       "\\u3067\\u30fc\\u305f",
3231       "\\u30c7\\u30fc\\u30bf",
3232       "\\u30c7\\u30a7\\u30bf",
3233       "\\u3067\\u3048\\u305f",
3234       "\\u3066\\u30fc\\u305f\\u30fc",
3235       "\\u30c6\\u30fc\\u30bf\\u30a1",
3236       "\\u30c6\\u30a7\\u30bf\\u30fc",
3237       "\\u3066\\u3047\\u305f\\u3041",
3238       "\\u3066\\u3048\\u305f\\u30fc",
3239       "\\u3067\\u30fc\\u305f\\u30fc",
3240       "\\u30c7\\u30fc\\u30bf\\u30a1",
3241       "\\u3067\\u30a7\\u305f\\u30a1",
3242       "\\u30c7\\u3047\\u30bf\\u3041",
3243       "\\u30c7\\u30a8\\u30bf\\u30a2",
3244       "\\u3072\\u3086",
3245       "\\u3073\\u3085\\u3042",
3246       "\\u3074\\u3085\\u3042",
3247       "\\u3073\\u3085\\u3042\\u30fc",
3248       "\\u30d3\\u30e5\\u30a2\\u30fc",
3249       "\\u3074\\u3085\\u3042\\u30fc",
3250       "\\u30d4\\u30e5\\u30a2\\u30fc",
3251       "\\u30d2\\u30e5\\u30a6",
3252       "\\u30d2\\u30e6\\u30a6",
3253       "\\u30d4\\u30e5\\u30a6\\u30a2",
3254       "\\u3073\\u3085\\u30fc\\u3042\\u30fc",
3255       "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc",
3256       "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc",
3257       "\\u3072\\u3085\\u3093",
3258       "\\u3074\\u3085\\u3093",
3259       "\\u3075\\u30fc\\u308a",
3260       "\\u30d5\\u30fc\\u30ea",
3261       "\\u3075\\u3045\\u308a",
3262       "\\u3075\\u30a5\\u308a",
3263       "\\u3075\\u30a5\\u30ea",
3264       "\\u30d5\\u30a6\\u30ea",
3265       "\\u3076\\u30fc\\u308a",
3266       "\\u30d6\\u30fc\\u30ea",
3267       "\\u3076\\u3045\\u308a",
3268       "\\u30d6\\u30a5\\u308a",
3269       "\\u3077\\u3046\\u308a",
3270       "\\u30d7\\u30a6\\u30ea",
3271       "\\u3075\\u30fc\\u308a\\u30fc",
3272       "\\u30d5\\u30a5\\u30ea\\u30fc",
3273       "\\u3075\\u30a5\\u308a\\u30a3",
3274       "\\u30d5\\u3045\\u308a\\u3043",
3275       "\\u30d5\\u30a6\\u30ea\\u30fc",
3276       "\\u3075\\u3046\\u308a\\u3043",
3277       "\\u30d6\\u30a6\\u30ea\\u30a4",
3278       "\\u3077\\u30fc\\u308a\\u30fc",
3279       "\\u3077\\u30a5\\u308a\\u30a4",
3280       "\\u3077\\u3046\\u308a\\u30fc",
3281       "\\u30d7\\u30a6\\u30ea\\u30a4",
3282       "\\u30d5\\u30fd",
3283       "\\u3075\\u309e",
3284       "\\u3076\\u309d",
3285       "\\u3076\\u3075",
3286       "\\u3076\\u30d5",
3287       "\\u30d6\\u3075",
3288       "\\u30d6\\u30d5",
3289       "\\u3076\\u309e",
3290       "\\u3076\\u3077",
3291       "\\u30d6\\u3077",
3292       "\\u3077\\u309d",
3293       "\\u30d7\\u30fd",
3294       "\\u3077\\u3075",
3295 };
3296
3297   static const char *test2[] = {
3298     "\\u306f\\u309d", /* H\\u309d */
3299     "\\u30cf\\u30fd", /* K\\u30fd */
3300     "\\u306f\\u306f", /* HH */
3301     "\\u306f\\u30cf", /* HK */
3302     "\\u30cf\\u30cf", /* KK */
3303     "\\u306f\\u309e", /* H\\u309e */
3304     "\\u30cf\\u30fe", /* K\\u30fe */
3305     "\\u306f\\u3070", /* HH\\u309b */
3306     "\\u30cf\\u30d0", /* KK\\u309b */
3307     "\\u306f\\u3071", /* HH\\u309c */
3308     "\\u30cf\\u3071", /* KH\\u309c */
3309     "\\u30cf\\u30d1", /* KK\\u309c */
3310     "\\u3070\\u309d", /* H\\u309b\\u309d */
3311     "\\u30d0\\u30fd", /* K\\u309b\\u30fd */
3312     "\\u3070\\u306f", /* H\\u309bH */
3313     "\\u30d0\\u30cf", /* K\\u309bK */
3314     "\\u3070\\u309e", /* H\\u309b\\u309e */
3315     "\\u30d0\\u30fe", /* K\\u309b\\u30fe */
3316     "\\u3070\\u3070", /* H\\u309bH\\u309b */
3317     "\\u30d0\\u3070", /* K\\u309bH\\u309b */
3318     "\\u30d0\\u30d0", /* K\\u309bK\\u309b */
3319     "\\u3070\\u3071", /* H\\u309bH\\u309c */
3320     "\\u30d0\\u30d1", /* K\\u309bK\\u309c */
3321     "\\u3071\\u309d", /* H\\u309c\\u309d */
3322     "\\u30d1\\u30fd", /* K\\u309c\\u30fd */
3323     "\\u3071\\u306f", /* H\\u309cH */
3324     "\\u30d1\\u30cf", /* K\\u309cK */
3325     "\\u3071\\u3070", /* H\\u309cH\\u309b */
3326     "\\u3071\\u30d0", /* H\\u309cK\\u309b */
3327     "\\u30d1\\u30d0", /* K\\u309cK\\u309b */
3328     "\\u3071\\u3071", /* H\\u309cH\\u309c */
3329     "\\u30d1\\u30d1", /* K\\u309cK\\u309c */
3330   };
3331   /*
3332   static const char *test3[] = {
3333     "\\u221er\\u221e",
3334     "\\u221eR#",
3335     "\\u221et\\u221e",
3336     "#r\\u221e",
3337     "#R#",
3338     "#t%",
3339     "#T%",
3340     "8t\\u221e",
3341     "8T\\u221e",
3342     "8t#",
3343     "8T#",
3344     "8t%",
3345     "8T%",
3346     "8t8",
3347     "8T8",
3348     "\\u03c9r\\u221e",
3349     "\\u03a9R%",
3350     "rr\\u221e",
3351     "rR\\u221e",
3352     "Rr\\u221e",
3353     "RR\\u221e",
3354     "RT%",
3355     "rt8",
3356     "tr\\u221e",
3357     "tr8",
3358     "TR8",
3359     "tt8",
3360     "\\u30b7\\u30e3\\u30fc\\u30ec",
3361   };
3362   */
3363   static const UColAttribute att[] = { UCOL_STRENGTH };
3364   static const UColAttributeValue val[] = { UCOL_QUATERNARY };
3365
3366   static const UColAttribute attShifted[] = { UCOL_STRENGTH, UCOL_ALTERNATE_HANDLING};
3367   static const UColAttributeValue valShifted[] = { UCOL_QUATERNARY, UCOL_SHIFTED };
3368
3369   genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), att, val, 1);
3370   genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), att, val, 1);
3371   /*genericLocaleStarter("ja", test3, sizeof(test3)/sizeof(test3[0]));*/
3372   genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), attShifted, valShifted, 2);
3373   genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), attShifted, valShifted, 2);
3374 }
3375
3376 static void TestStrCollIdenticalPrefix(void) {
3377   const char* rule = "&\\ud9b0\\udc70=\\ud9b0\\udc71";
3378   const char* test[] = {
3379     "ab\\ud9b0\\udc70",
3380     "ab\\ud9b0\\udc71"
3381   };
3382   genericRulesStarterWithResult(rule, test, sizeof(test)/sizeof(test[0]), UCOL_EQUAL);
3383 }
3384 /* Contractions should have all their canonically equivalent */
3385 /* strings included */
3386 static void TestContractionClosure(void) {
3387   static struct {
3388     const char *rules;
3389     const char *data[50];
3390     const uint32_t len;
3391   } tests[] = {
3392     {   "&b=\\u00e4\\u00e4",
3393       { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\\u00e4" }, 5},
3394     {   "&b=\\u00C5",
3395       { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4},
3396   };
3397   uint32_t i;
3398
3399
3400   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3401     genericRulesStarterWithResult(tests[i].rules, tests[i].data, tests[i].len, UCOL_EQUAL);
3402   }
3403 }
3404
3405 /* This tests also fails*/
3406 static void TestBeforePrefixFailure(void) {
3407   static struct {
3408     const char *rules;
3409     const char *data[50];
3410     const uint32_t len;
3411   } tests[] = {
3412     { "&g <<< a"
3413       "&[before 3]\\uff41 <<< x",
3414       {"x", "\\uff41"}, 2 },
3415     {   "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3416         "&\\u30A8=\\u30A8=\\u3048=\\uff74"
3417         "&[before 3]\\u30a7<<<\\u30a9",
3418       {"\\u30a9", "\\u30a7"}, 2 },
3419     {   "&[before 3]\\u30a7<<<\\u30a9"
3420         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3421         "&\\u30A8=\\u30A8=\\u3048=\\uff74",
3422       {"\\u30a9", "\\u30a7"}, 2 },
3423   };
3424   uint32_t i;
3425
3426
3427   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3428     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3429   }
3430
3431 #if 0
3432   const char* rule1 =
3433         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3434         "&\\u30A8=\\u30A8=\\u3048=\\uff74"
3435         "&[before 3]\\u30a7<<<\\u30c6|\\u30fc";
3436   const char* rule2 =
3437         "&[before 3]\\u30a7<<<\\u30c6|\\u30fc"
3438         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3439         "&\\u30A8=\\u30A8=\\u3048=\\uff74";
3440   const char* test[] = {
3441       "\\u30c6\\u30fc\\u30bf",
3442       "\\u30c6\\u30a7\\u30bf",
3443   };
3444   genericRulesStarter(rule1, test, sizeof(test)/sizeof(test[0]));
3445   genericRulesStarter(rule2, test, sizeof(test)/sizeof(test[0]));
3446 /* this piece of code should be in some sort of verbose mode     */
3447 /* it gets the collation elements for elements and prints them   */
3448 /* This is useful when trying to see whether the problem is      */
3449   {
3450     UErrorCode status = U_ZERO_ERROR;
3451     uint32_t i = 0;
3452     UCollationElements *it = NULL;
3453     uint32_t CE;
3454     UChar string[256];
3455     uint32_t uStringLen;
3456     UCollator *coll = NULL;
3457
3458     uStringLen = u_unescape(rule1, string, 256);
3459
3460     coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
3461
3462     /*coll = ucol_open("ja_JP_JIS", &status);*/
3463     it = ucol_openElements(coll, string, 0, &status);
3464
3465     for(i = 0; i < sizeof(test)/sizeof(test[0]); i++) {
3466       log_verbose("%s\n", test[i]);
3467       uStringLen = u_unescape(test[i], string, 256);
3468       ucol_setText(it, string, uStringLen, &status);
3469
3470       while((CE=ucol_next(it, &status)) != UCOL_NULLORDER) {
3471         log_verbose("%08X\n", CE);
3472       }
3473       log_verbose("\n");
3474
3475     }
3476
3477     ucol_closeElements(it);
3478     ucol_close(coll);
3479   }
3480 #endif
3481 }
3482
3483 static void TestPrefixCompose(void) {
3484   const char* rule1 =
3485         "&\\u30a7<<<\\u30ab|\\u30fc=\\u30ac|\\u30fc";
3486   /*
3487   const char* test[] = {
3488       "\\u30c6\\u30fc\\u30bf",
3489       "\\u30c6\\u30a7\\u30bf",
3490   };
3491   */
3492   {
3493     UErrorCode status = U_ZERO_ERROR;
3494     /*uint32_t i = 0;*/
3495     /*UCollationElements *it = NULL;*/
3496 /*    uint32_t CE;*/
3497     UChar string[256];
3498     uint32_t uStringLen;
3499     UCollator *coll = NULL;
3500
3501     uStringLen = u_unescape(rule1, string, 256);
3502
3503     coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
3504     ucol_close(coll);
3505   }
3506
3507
3508 }
3509
3510 /*
3511 [last variable] last variable value
3512 [last primary ignorable] largest CE for primary ignorable
3513 [last secondary ignorable] largest CE for secondary ignorable
3514 [last tertiary ignorable] largest CE for tertiary ignorable
3515 [top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8)
3516 */
3517
3518 static void TestRuleOptions(void) {
3519   /* values here are hardcoded and are correct for the current UCA
3520    * when the UCA changes, one might be forced to change these
3521    * values. (\\u02d0, \\U00010FFFC etc...)
3522    */
3523   static struct {
3524     const char *rules;
3525     const char *data[50];
3526     const uint32_t len;
3527   } tests[] = {
3528     /* - all befores here amount to zero */
3529     { "&[before 3][first tertiary ignorable]<<<a",
3530         { "\\u0000", "a"}, 2
3531     }, /* you cannot go before first tertiary ignorable */
3532
3533     { "&[before 3][last tertiary ignorable]<<<a",
3534         { "\\u0000", "a"}, 2
3535     }, /* you cannot go before last tertiary ignorable */
3536
3537     { "&[before 3][first secondary ignorable]<<<a",
3538         { "\\u0000", "a"}, 2
3539     }, /* you cannot go before first secondary ignorable */
3540
3541     { "&[before 3][last secondary ignorable]<<<a",
3542         { "\\u0000", "a"}, 2
3543     }, /* you cannot go before first secondary ignorable */
3544
3545     /* 'normal' befores */
3546
3547     { "&[before 3][first primary ignorable]<<<c<<<b &[first primary ignorable]<a",
3548         {  "c", "b", "\\u0332", "a" }, 4
3549     },
3550
3551     /* we don't have a code point that corresponds to
3552      * the last primary ignorable
3553      */
3554     { "&[before 3][last primary ignorable]<<<c<<<b &[last primary ignorable]<a",
3555         {  "\\u0332", "\\u20e3", "c", "b", "a" }, 5
3556     },
3557
3558     { "&[before 3][first variable]<<<c<<<b &[first variable]<a",
3559         {  "c", "b", "\\u0009", "a", "\\u000a" }, 5
3560     },
3561
3562     { "&[last variable]<a &[before 3][last variable]<<<c<<<b ",
3563         {  "c", "b", "\\uD834\\uDF71", "a", "\\u02d0" }, 5
3564     },
3565
3566     { "&[first regular]<a"
3567       "&[before 1][first regular]<b",
3568       { "b", "\\u02d0", "a", "\\u02d1"}, 4
3569     },
3570
3571     { "&[before 1][last regular]<b"
3572       "&[last regular]<a",
3573         { "b", "\\uD808\\uDF6E", "a", "\\u4e00" }, 4
3574     },
3575
3576     { "&[before 1][first implicit]<b"
3577       "&[first implicit]<a",
3578         { "b", "\\u4e00", "a", "\\u4e01"}, 4
3579     },
3580
3581     { "&[before 1][last implicit]<b"
3582       "&[last implicit]<a",
3583         { "b", "\\U0010FFFD", "a" }, 3
3584     },
3585
3586     { "&[last variable]<z"
3587       "&[last primary ignorable]<x"
3588       "&[last secondary ignorable]<<y"
3589       "&[last tertiary ignorable]<<<w"
3590       "&[top]<u",
3591       {"\\ufffb",  "w", "y", "\\u20e3", "x", "\\u137c", "z", "u"}, 7
3592     }
3593
3594   };
3595   uint32_t i;
3596
3597
3598   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3599     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3600   }
3601 }
3602
3603
3604 static void TestOptimize(void) {
3605   /* this is not really a test - just trying out
3606    * whether copying of UCA contents will fail
3607    * Cannot really test, since the functionality
3608    * remains the same.
3609    */
3610   static struct {
3611     const char *rules;
3612     const char *data[50];
3613     const uint32_t len;
3614   } tests[] = {
3615     /* - all befores here amount to zero */
3616     { "[optimize [\\uAC00-\\uD7FF]]",
3617     { "a", "b"}, 2}
3618   };
3619   uint32_t i;
3620
3621   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3622     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3623   }
3624 }
3625
3626 /*
3627 cycheng@ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator.
3628 weiv    ucol_strcollIter?
3629 cycheng@ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021
3630 weiv    these are the input strings?
3631 cycheng@ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on, we have s1 > s2
3632 weiv    will check - could be a problem with utf-8 iterator
3633 cycheng@ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2
3634 weiv    hmmm
3635 cycheng@ca.ibm.c... note that we have a standalone high surrogate
3636 weiv    that doesn't sound right
3637 cycheng@ca.ibm.c... we got the same inconsistent results on AIX and Win2000
3638 weiv    so you have two strings, you convert them to utf-8 and to utf-16BE
3639 cycheng@ca.ibm.c... yes
3640 weiv    and then do the comparison
3641 cycheng@ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be
3642 weiv    utf-16 strings look like a little endian ones in the example you sent me
3643 weiv    It could be a bug - let me try to test it out
3644 cycheng@ca.ibm.c... ok
3645 cycheng@ca.ibm.c... we can wait till the conf. call
3646 cycheng@ca.ibm.c... next weke
3647 weiv    that would be great
3648 weiv    hmmm
3649 weiv    I might be wrong
3650 weiv    let me play with it some more
3651 cycheng@ca.ibm.c... ok
3652 cycheng@ca.ibm.c... also please check s3 = 0x0e3a0062  and s4 = 0x0e400021. both are in utf-16be
3653 cycheng@ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that's built for db2
3654 cycheng@ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be
3655 weiv    ok
3656 cycheng@ca.ibm.c... i ask sherman to send you more inconsistent data
3657 weiv    thanks
3658 cycheng@ca.ibm.c... the 4 strings we sent are just samples
3659 */
3660 #if 0
3661 static void Alexis(void) {
3662   UErrorCode status = U_ZERO_ERROR;
3663   UCollator *coll = ucol_open("", &status);
3664
3665
3666   const char utf16be[2][4] = {
3667     { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 },
3668     { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 }
3669   };
3670
3671   const char utf8[2][4] = {
3672     { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 },
3673     { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 },
3674   };
3675
3676   UCharIterator iterU161, iterU162;
3677   UCharIterator iterU81, iterU82;
3678
3679   UCollationResult resU16, resU8;
3680
3681   uiter_setUTF16BE(&iterU161, utf16be[0], 4);
3682   uiter_setUTF16BE(&iterU162, utf16be[1], 4);
3683
3684   uiter_setUTF8(&iterU81, utf8[0], 4);
3685   uiter_setUTF8(&iterU82, utf8[1], 4);
3686
3687   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3688
3689   resU16 = ucol_strcollIter(coll, &iterU161, &iterU162, &status);
3690   resU8 = ucol_strcollIter(coll, &iterU81, &iterU82, &status);
3691
3692
3693   if(resU16 != resU8) {
3694     log_err("different results\n");
3695   }
3696
3697   ucol_close(coll);
3698 }
3699 #endif
3700
3701 #define CMSCOLL_ALEXIS2_BUFFER_SIZE 256
3702 static void Alexis2(void) {
3703   UErrorCode status = U_ZERO_ERROR;
3704   UChar U16Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
3705   char U16BESource[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16BETarget[CMSCOLL_ALEXIS2_BUFFER_SIZE];
3706   char U8Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U8Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
3707   int32_t U16LenS = 0, U16LenT = 0, U16BELenS = 0, U16BELenT = 0, U8LenS = 0, U8LenT = 0;
3708
3709   UConverter *conv = NULL;
3710
3711   UCharIterator U16BEItS, U16BEItT;
3712   UCharIterator U8ItS, U8ItT;
3713
3714   UCollationResult resU16, resU16BE, resU8;
3715
3716   const char* pairs[][2] = {
3717     { "\\ud800\\u0021", "\\uFFFC\\u0062"},
3718     { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" },
3719     { "\\u0E40\\u0021", "\\u00A1\\u0021"},
3720     { "\\u0E40\\u0021", "\\uFE57\\u0062"},
3721     { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"},
3722     { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"},
3723     { "\\u0020", "\\u0020\\u0000"}
3724 /*
3725 5F20 (my result here)
3726 5F204E008E3F
3727 5F20 (your result here)
3728 */
3729   };
3730
3731   int32_t i = 0;
3732
3733   UCollator *coll = ucol_open("", &status);
3734   if(status == U_FILE_ACCESS_ERROR) {
3735     log_data_err("Is your data around?\n");
3736     return;
3737   } else if(U_FAILURE(status)) {
3738     log_err("Error opening collator\n");
3739     return;
3740   }
3741   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3742   conv = ucnv_open("UTF16BE", &status);
3743   for(i = 0; i < sizeof(pairs)/sizeof(pairs[0]); i++) {
3744     U16LenS = u_unescape(pairs[i][0], U16Source, CMSCOLL_ALEXIS2_BUFFER_SIZE);
3745     U16LenT = u_unescape(pairs[i][1], U16Target, CMSCOLL_ALEXIS2_BUFFER_SIZE);
3746
3747     resU16 = ucol_strcoll(coll, U16Source, U16LenS, U16Target, U16LenT);
3748
3749     log_verbose("Result of strcoll is %i\n", resU16);
3750
3751     U16BELenS = ucnv_fromUChars(conv, U16BESource, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Source, U16LenS, &status);
3752     U16BELenT = ucnv_fromUChars(conv, U16BETarget, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Target, U16LenT, &status);
3753
3754     /* use the original sizes, as the result from converter is in bytes */
3755     uiter_setUTF16BE(&U16BEItS, U16BESource, U16LenS);
3756     uiter_setUTF16BE(&U16BEItT, U16BETarget, U16LenT);
3757
3758     resU16BE = ucol_strcollIter(coll, &U16BEItS, &U16BEItT, &status);
3759
3760     log_verbose("Result of U16BE is %i\n", resU16BE);
3761
3762     if(resU16 != resU16BE) {
3763       log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", pairs[i][0], pairs[i][1]);
3764     }
3765
3766     u_strToUTF8(U8Source, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenS, U16Source, U16LenS, &status);
3767     u_strToUTF8(U8Target, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenT, U16Target, U16LenT, &status);
3768
3769     uiter_setUTF8(&U8ItS, U8Source, U8LenS);
3770     uiter_setUTF8(&U8ItT, U8Target, U8LenT);
3771
3772     resU8 = ucol_strcollIter(coll, &U8ItS, &U8ItT, &status);
3773
3774     if(resU16 != resU8) {
3775       log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pairs[i][0], pairs[i][1]);
3776     }
3777
3778   }
3779
3780   ucol_close(coll);
3781   ucnv_close(conv);
3782 }
3783
3784 static void TestHebrewUCA(void) {
3785   UErrorCode status = U_ZERO_ERROR;
3786   const char *first[] = {
3787     "d790d6b8d79cd795d6bcd7a9",
3788     "d790d79cd79ed7a7d799d799d7a1",
3789     "d790d6b4d79ed795d6bcd7a9",
3790   };
3791
3792   char utf8String[3][256];
3793   UChar utf16String[3][256];
3794
3795   int32_t i = 0, j = 0;
3796   int32_t sizeUTF8[3];
3797   int32_t sizeUTF16[3];
3798
3799   UCollator *coll = ucol_open("", &status);
3800   /*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/
3801
3802   for(i = 0; i < sizeof(first)/sizeof(first[0]); i++) {
3803     sizeUTF8[i] = u_parseUTF8(first[i], -1, utf8String[i], 256, &status);
3804     u_strFromUTF8(utf16String[i], 256, &sizeUTF16[i], utf8String[i], sizeUTF8[i], &status);
3805     log_verbose("%i: ");
3806     for(j = 0; j < sizeUTF16[i]; j++) {
3807       /*log_verbose("\\u%04X", utf16String[i][j]);*/
3808       log_verbose("%04X", utf16String[i][j]);
3809     }
3810     log_verbose("\n");
3811   }
3812   for(i = 0; i < sizeof(first)/sizeof(first[0])-1; i++) {
3813     for(j = i + 1; j < sizeof(first)/sizeof(first[0]); j++) {
3814       doTest(coll, utf16String[i], utf16String[j], UCOL_LESS);
3815     }
3816   }
3817
3818   ucol_close(coll);
3819
3820 }
3821
3822 static void TestPartialSortKeyTermination(void) {
3823   const char* cases[] = {
3824     "\\u1234\\u1234\\udc00",
3825     "\\udc00\\ud800\\ud800"
3826   };
3827
3828   int32_t i = sizeof(UCollator);
3829
3830   UErrorCode status = U_ZERO_ERROR;
3831
3832   UCollator *coll = ucol_open("", &status);
3833
3834   UCharIterator iter;
3835
3836   UChar currCase[256];
3837   int32_t length = 0;
3838   int32_t pKeyLen = 0;
3839
3840   uint8_t key[256];
3841
3842   for(i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) {
3843     uint32_t state[2] = {0, 0};
3844     length = u_unescape(cases[i], currCase, 256);
3845     uiter_setString(&iter, currCase, length);
3846     pKeyLen = ucol_nextSortKeyPart(coll, &iter, state, key, 256, &status);
3847
3848     log_verbose("Done\n");
3849
3850   }
3851   ucol_close(coll);
3852 }
3853
3854 static void TestSettings(void) {
3855   const char* cases[] = {
3856     "apple",
3857       "Apple"
3858   };
3859
3860   const char* locales[] = {
3861     "",
3862       "en"
3863   };
3864
3865   UErrorCode status = U_ZERO_ERROR;
3866
3867   int32_t i = 0, j = 0;
3868
3869   UChar source[256], target[256];
3870   int32_t sLen = 0, tLen = 0;
3871
3872   UCollator *collateObject = NULL;
3873   for(i = 0; i < sizeof(locales)/sizeof(locales[0]); i++) {
3874     collateObject = ucol_open(locales[i], &status);
3875     ucol_setStrength(collateObject, UCOL_PRIMARY);
3876     ucol_setAttribute(collateObject, UCOL_CASE_LEVEL , UCOL_OFF, &status);
3877     for(j = 1; j < sizeof(cases)/sizeof(cases[0]); j++) {
3878       sLen = u_unescape(cases[j-1], source, 256);
3879       source[sLen] = 0;
3880       tLen = u_unescape(cases[j], target, 256);
3881       source[tLen] = 0;
3882       doTest(collateObject, source, target, UCOL_EQUAL);
3883     }
3884     ucol_close(collateObject);
3885   }
3886 }
3887
3888 static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCollator *target) {
3889   UErrorCode status = U_ZERO_ERROR;
3890   int32_t errorNo = 0;
3891   /*const UChar *sourceRules = NULL;*/
3892   /*int32_t sourceRulesLen = 0;*/
3893   UColAttributeValue french = UCOL_OFF;
3894   int32_t cloneSize = 0;
3895
3896   if(!ucol_equals(source, target)) {
3897     log_err("Same collators, different address not equal\n");
3898     errorNo++;
3899   }
3900   ucol_close(target);
3901   if(uprv_strcmp(ucol_getLocale(source, ULOC_REQUESTED_LOCALE, &status), ucol_getLocale(source, ULOC_ACTUAL_LOCALE, &status)) == 0) {
3902     /* currently, safeClone is implemented through getRules/openRules
3903      * so it is the same as the test below - I will comment that test out.
3904      */
3905     /* real thing */
3906     target = ucol_safeClone(source, NULL, &cloneSize, &status);
3907     if(U_FAILURE(status)) {
3908       log_err("Error creating clone\n");
3909       errorNo++;
3910       return errorNo;
3911     }
3912     if(!ucol_equals(source, target)) {
3913       log_err("Collator different from it's clone\n");
3914       errorNo++;
3915     }
3916     french = ucol_getAttribute(source, UCOL_FRENCH_COLLATION, &status);
3917     if(french == UCOL_ON) {
3918       ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);
3919     } else {
3920       ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
3921     }
3922     if(U_FAILURE(status)) {
3923       log_err("Error setting attributes\n");
3924       errorNo++;
3925       return errorNo;
3926     }
3927     if(ucol_equals(source, target)) {
3928       log_err("Collators same even when options changed\n");
3929       errorNo++;
3930     }
3931     ucol_close(target);
3932     /* commented out since safeClone uses exactly the same technique */
3933     /*
3934     sourceRules = ucol_getRules(source, &sourceRulesLen);
3935     target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
3936     if(U_FAILURE(status)) {
3937       log_err("Error instantiating target from rules\n");
3938       errorNo++;
3939       return errorNo;
3940     }
3941     if(!ucol_equals(source, target)) {
3942       log_err("Collator different from collator that was created from the same rules\n");
3943       errorNo++;
3944     }
3945     ucol_close(target);
3946     */
3947   }
3948   return errorNo;
3949 }
3950
3951
3952 static void TestEquals(void) {
3953   /* ucol_equals is not currently a public API. There is a chance that it will become
3954    * something like this, but currently it is only used by RuleBasedCollator::operator==
3955    */
3956   /* test whether the two collators instantiated from the same locale are equal */
3957   UErrorCode status = U_ZERO_ERROR;
3958   UParseError parseError;
3959   int32_t noOfLoc = uloc_countAvailable();
3960   const char *locName = NULL;
3961   UCollator *source = NULL, *target = NULL;
3962   int32_t i = 0;
3963
3964   const char* rules[] = {
3965     "&l < lj <<< Lj <<< LJ",
3966       "&n < nj <<< Nj <<< NJ",
3967       "&ae <<< \\u00e4",
3968       "&AE <<< \\u00c4"
3969   };
3970   /*
3971   const char* badRules[] = {
3972     "&l <<< Lj",
3973       "&n < nj <<< nJ <<< NJ",
3974       "&a <<< \\u00e4",
3975       "&AE <<< \\u00c4 <<< x"
3976   };
3977   */
3978
3979   UChar sourceRules[1024], targetRules[1024];
3980   int32_t sourceRulesSize = 0, targetRulesSize = 0;
3981   int32_t rulesSize = sizeof(rules)/sizeof(rules[0]);
3982
3983   for(i = 0; i < rulesSize; i++) {
3984     sourceRulesSize += u_unescape(rules[i], sourceRules+sourceRulesSize, 1024 - sourceRulesSize);
3985     targetRulesSize += u_unescape(rules[rulesSize-i-1], targetRules+targetRulesSize, 1024 - targetRulesSize);
3986   }
3987
3988   source = ucol_openRules(sourceRules, sourceRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
3989   if(status == U_FILE_ACCESS_ERROR) {
3990     log_data_err("Is your data around?\n");
3991     return;
3992   } else if(U_FAILURE(status)) {
3993     log_err("Error opening collator\n");
3994     return;
3995   }
3996   target = ucol_openRules(targetRules, targetRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
3997   if(!ucol_equals(source, target)) {
3998     log_err("Equivalent collators not equal!\n");
3999   }
4000   ucol_close(source);
4001   ucol_close(target);
4002
4003   source = ucol_open("root", &status);
4004   target = ucol_open("root", &status);
4005   log_verbose("Testing root\n");
4006   if(!ucol_equals(source, source)) {
4007     log_err("Same collator not equal\n");
4008   }
4009   if(TestEqualsForCollator(locName, source, target)) {
4010     log_err("Errors for root\n", locName);
4011   }
4012   ucol_close(source);
4013
4014   for(i = 0; i<noOfLoc; i++) {
4015     status = U_ZERO_ERROR;
4016     locName = uloc_getAvailable(i);
4017     /*if(hasCollationElements(locName)) {*/
4018       log_verbose("Testing equality for locale %s\n", locName);
4019       source = ucol_open(locName, &status);
4020       target = ucol_open(locName, &status);
4021       if(TestEqualsForCollator(locName, source, target)) {
4022         log_err("Errors for locale %s\n", locName);
4023       }
4024       ucol_close(source);
4025     /*}*/
4026   }
4027 }
4028
4029 static void TestJ2726(void) {
4030   UChar a[2] = { 0x61, 0x00 }; /*"a"*/
4031   UChar aSpace[3] = { 0x61, 0x20, 0x00 }; /*"a "*/
4032   UChar spaceA[3] = { 0x20, 0x61, 0x00 }; /*" a"*/
4033   UErrorCode status = U_ZERO_ERROR;
4034   UCollator *coll = ucol_open("en", &status);
4035   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
4036   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
4037   doTest(coll, a, aSpace, UCOL_EQUAL);
4038   doTest(coll, aSpace, a, UCOL_EQUAL);
4039   doTest(coll, a, spaceA, UCOL_EQUAL);
4040   doTest(coll, spaceA, a, UCOL_EQUAL);
4041   doTest(coll, spaceA, aSpace, UCOL_EQUAL);
4042   doTest(coll, aSpace, spaceA, UCOL_EQUAL);
4043   ucol_close(coll);
4044 }
4045
4046 static void NullRule(void) {
4047   UChar r[3] = {0};
4048   UErrorCode status = U_ZERO_ERROR;
4049   UCollator *coll = ucol_openRules(r, 1, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
4050   if(U_SUCCESS(status)) {
4051     log_err("This should have been an error!\n");
4052     ucol_close(coll);
4053   } else {
4054     status = U_ZERO_ERROR;
4055   }
4056   coll = ucol_openRules(r, 0, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
4057   if(U_FAILURE(status)) {
4058     log_err("Empty rules should have produced a valid collator\n");
4059   } else {
4060     ucol_close(coll);
4061   }
4062 }
4063
4064 /**
4065  * Test for CollationElementIterator previous and next for the whole set of
4066  * unicode characters with normalization on.
4067  */
4068 static void TestNumericCollation(void)
4069 {
4070     UErrorCode status = U_ZERO_ERROR;
4071
4072     const static char *basicTestStrings[]={
4073     "hello1",
4074     "hello2",
4075     "hello2002",
4076     "hello2003",
4077     "hello123456",
4078     "hello1234567",
4079     "hello10000000",
4080     "hello100000000",
4081     "hello1000000000",
4082     "hello10000000000",
4083     };
4084
4085     const static char *preZeroTestStrings[]={
4086     "avery10000",
4087     "avery010000",
4088     "avery0010000",
4089     "avery00010000",
4090     "avery000010000",
4091     "avery0000010000",
4092     "avery00000010000",
4093     "avery000000010000",
4094     };
4095
4096     const static char *thirtyTwoBitNumericStrings[]={
4097     "avery42949672960",
4098     "avery42949672961",
4099     "avery42949672962",
4100     "avery429496729610"
4101     };
4102
4103     const static char *supplementaryDigits[] = {
4104       "\\uD835\\uDFCE", /* 0 */
4105       "\\uD835\\uDFCF", /* 1 */
4106       "\\uD835\\uDFD0", /* 2 */
4107       "\\uD835\\uDFD1", /* 3 */
4108       "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */
4109       "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */
4110       "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */
4111       "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */
4112       "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */
4113       "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */
4114     };
4115
4116     const static char *foreignDigits[] = {
4117       "\\u0661",
4118         "\\u0662",
4119         "\\u0663",
4120       "\\u0661\\u0660",
4121       "\\u0661\\u0662",
4122       "\\u0661\\u0663",
4123       "\\u0662\\u0660",
4124       "\\u0662\\u0662",
4125       "\\u0662\\u0663",
4126       "\\u0663\\u0660",
4127       "\\u0663\\u0662",
4128       "\\u0663\\u0663"
4129     };
4130
4131     const static char *evenZeroes[] = {
4132       "2000",
4133       "2001",
4134         "2002",
4135         "2003"
4136     };
4137
4138     UColAttribute att = UCOL_NUMERIC_COLLATION;
4139     UColAttributeValue val = UCOL_ON;
4140
4141     /* Open our collator. */
4142     UCollator* coll = ucol_open("root", &status);
4143     if (U_FAILURE(status)){
4144         log_err("ERROR: in using ucol_open()\n %s\n",
4145               myErrorName(status));
4146         return;
4147     }
4148     genericLocaleStarterWithOptions("root", basicTestStrings, sizeof(basicTestStrings)/sizeof(basicTestStrings[0]), &att, &val, 1);
4149     genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings, sizeof(thirtyTwoBitNumericStrings)/sizeof(thirtyTwoBitNumericStrings[0]), &att, &val, 1);
4150     genericLocaleStarterWithOptions("en_US", foreignDigits, sizeof(foreignDigits)/sizeof(foreignDigits[0]), &att, &val, 1);
4151     genericLocaleStarterWithOptions("root", supplementaryDigits, sizeof(supplementaryDigits)/sizeof(supplementaryDigits[0]), &att, &val, 1);
4152     genericLocaleStarterWithOptions("root", evenZeroes, sizeof(evenZeroes)/sizeof(evenZeroes[0]), &att, &val, 1);
4153
4154     /* Setting up our collator to do digits. */
4155     ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
4156     if (U_FAILURE(status)){
4157         log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n",
4158               myErrorName(status));
4159         return;
4160     }
4161
4162     /*
4163        Testing that prepended zeroes still yield the correct collation behavior.
4164        We expect that every element in our strings array will be equal.
4165     */
4166     genericOrderingTestWithResult(coll, preZeroTestStrings, sizeof(preZeroTestStrings)/sizeof(preZeroTestStrings[0]), UCOL_EQUAL);
4167
4168     ucol_close(coll);
4169 }
4170
4171 static void TestTibetanConformance(void)
4172 {
4173     const char* test[] = {
4174         "\\u0FB2\\u0591\\u0F71\\u0061",
4175         "\\u0FB2\\u0F71\\u0061"
4176     };
4177
4178     UErrorCode status = U_ZERO_ERROR;
4179     UCollator *coll = ucol_open("", &status);
4180     UChar source[100];
4181     UChar target[100];
4182     int result;
4183     ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4184     if (U_SUCCESS(status)) {
4185         u_unescape(test[0], source, 100);
4186         u_unescape(test[1], target, 100);
4187         doTest(coll, source, target, UCOL_EQUAL);
4188         result = ucol_strcoll(coll, source, -1,   target, -1);
4189         log_verbose("result %d\n", result);
4190         if (UCOL_EQUAL != result) {
4191             log_err("Tibetan comparison error\n");
4192         }
4193     }
4194     ucol_close(coll);
4195
4196     genericLocaleStarterWithResult("", test, 2, UCOL_EQUAL);
4197 }
4198
4199 static void TestPinyinProblem(void) {
4200     static const char *test[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B50" };
4201     genericLocaleStarter("zh__PINYIN", test, sizeof(test)/sizeof(test[0]));
4202 }
4203
4204 #define TST_UCOL_MAX_INPUT 0x220001
4205 #define topByte 0xFF000000;
4206 #define bottomByte 0xFF;
4207 #define fourBytes 0xFFFFFFFF;
4208
4209
4210 static void showImplicit(UChar32 i) {
4211     if (i >= 0 && i <= TST_UCOL_MAX_INPUT) {
4212         log_verbose("%08X\t%08X\n", i, uprv_uca_getImplicitFromRaw(i));
4213     }
4214 }
4215
4216 static void TestImplicitGeneration(void) {
4217     UErrorCode status = U_ZERO_ERROR;
4218     UChar32 last = 0;
4219     UChar32 current;
4220     UChar32 i = 0, j = 0;
4221     UChar32 roundtrip = 0;
4222     UChar32 lastBottom = 0;
4223     UChar32 currentBottom = 0;
4224     UChar32 lastTop = 0;
4225     UChar32 currentTop = 0;
4226
4227     UCollator *coll = ucol_open("root", &status);
4228     if(U_FAILURE(status)) {
4229         log_err("Couldn't open UCA\n");
4230         return;
4231     }
4232
4233     uprv_uca_getRawFromImplicit(0xE20303E7);
4234
4235     for (i = 0; i <= TST_UCOL_MAX_INPUT; ++i) {
4236         current = uprv_uca_getImplicitFromRaw(i) & fourBytes;
4237
4238         /* check that it round-trips AND that all intervening ones are illegal*/
4239         roundtrip = uprv_uca_getRawFromImplicit(current);
4240         if (roundtrip != i) {
4241             log_err("No roundtrip %08X\n", i);
4242         }
4243         if (last != 0) {
4244             for (j = last + 1; j < current; ++j) {
4245                 roundtrip = uprv_uca_getRawFromImplicit(j);
4246                 /* raise an error if it *doesn't* find an error*/
4247                 if (roundtrip != -1) {
4248                     log_err("Fails to recognize illegal %08X\n", j);
4249                 }
4250             }
4251         }
4252         /* now do other consistency checks*/
4253         lastBottom = last & bottomByte;
4254         currentBottom = current & bottomByte;
4255         lastTop = last & topByte;
4256         currentTop = current & topByte;
4257
4258         /* print out some values for spot-checking*/
4259         if (lastTop != currentTop || i == 0x10000 || i == 0x110000) {
4260             showImplicit(i-3);
4261             showImplicit(i-2);
4262             showImplicit(i-1);
4263             showImplicit(i);
4264             showImplicit(i+1);
4265             showImplicit(i+2);
4266         }
4267         last = current;
4268
4269         if(uprv_uca_getCodePointFromRaw(uprv_uca_getRawFromCodePoint(i)) != i) {
4270             log_err("No raw <-> code point roundtrip for 0x%08X\n", i);
4271         }
4272     }
4273     showImplicit(TST_UCOL_MAX_INPUT-2);
4274     showImplicit(TST_UCOL_MAX_INPUT-1);
4275     showImplicit(TST_UCOL_MAX_INPUT);
4276     ucol_close(coll);
4277 }
4278
4279 /**
4280  * Iterate through the given iterator, checking to see that all the strings
4281  * in the expected array are present.
4282  * @param expected array of strings we expect to see, or NULL
4283  * @param expectedCount number of elements of expected, or 0
4284  */
4285 static int32_t checkUEnumeration(const char* msg,
4286                                  UEnumeration* iter,
4287                                  const char** expected,
4288                                  int32_t expectedCount) {
4289     UErrorCode ec = U_ZERO_ERROR;
4290     int32_t i = 0, n, j, bit;
4291     int32_t seenMask = 0;
4292
4293     U_ASSERT(expectedCount >= 0 && expectedCount < 31); /* [sic] 31 not 32 */
4294     n = uenum_count(iter, &ec);
4295     if (!assertSuccess("count", &ec)) return -1;
4296     log_verbose("%s = [", msg);
4297     for (;; ++i) {
4298         const char* s = uenum_next(iter, NULL, &ec);
4299         if (!assertSuccess("snext", &ec) || s == NULL) break;
4300         if (i != 0) log_verbose(",");
4301         log_verbose("%s", s);
4302         /* check expected list */
4303         for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
4304             if ((seenMask&bit) == 0 &&
4305                 uprv_strcmp(s, expected[j]) == 0) {
4306                 seenMask |= bit;
4307                 break;
4308             }
4309         }
4310     }
4311     log_verbose("] (%d)\n", i);
4312     assertTrue("count verified", i==n);
4313     /* did we see all expected strings? */
4314     for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
4315         if ((seenMask&bit)!=0) {
4316             log_verbose("Ok: \"%s\" seen\n", expected[j]);
4317         } else {
4318             log_err("FAIL: \"%s\" not seen\n", expected[j]);
4319         }
4320     }
4321     return n;
4322 }
4323
4324 /**
4325  * Test new API added for separate collation tree.
4326  */
4327 static void TestSeparateTrees(void) {
4328     UErrorCode ec = U_ZERO_ERROR;
4329     UEnumeration *e = NULL;
4330     int32_t n = -1;
4331     UBool isAvailable;
4332     char loc[256];
4333
4334     static const char* AVAIL[] = { "en", "de" };
4335
4336     static const char* KW[] = { "collation" };
4337
4338     static const char* KWVAL[] = { "phonebook", "stroke" };
4339
4340 #if !UCONFIG_NO_SERVICE
4341     e = ucol_openAvailableLocales(&ec);
4342     assertSuccess("ucol_openAvailableLocales", &ec);
4343     assertTrue("ucol_openAvailableLocales!=0", e!=0);
4344     n = checkUEnumeration("ucol_openAvailableLocales", e, AVAIL, LEN(AVAIL));
4345     /* Don't need to check n because we check list */
4346     uenum_close(e);
4347 #endif
4348
4349     e = ucol_getKeywords(&ec);
4350     assertSuccess("ucol_getKeywords", &ec);
4351     assertTrue("ucol_getKeywords!=0", e!=0);
4352     n = checkUEnumeration("ucol_getKeywords", e, KW, LEN(KW));
4353     /* Don't need to check n because we check list */
4354     uenum_close(e);
4355
4356     e = ucol_getKeywordValues(KW[0], &ec);
4357     assertSuccess("ucol_getKeywordValues", &ec);
4358     assertTrue("ucol_getKeywordValues!=0", e!=0);
4359     n = checkUEnumeration("ucol_getKeywordValues", e, KWVAL, LEN(KWVAL));
4360     /* Don't need to check n because we check list */
4361     uenum_close(e);
4362
4363     /* Try setting a warning before calling ucol_getKeywordValues */
4364     ec = U_USING_FALLBACK_WARNING;
4365     e = ucol_getKeywordValues(KW[0], &ec);
4366     assertSuccess("ucol_getKeywordValues [with warning code set]", &ec);
4367     assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e!=0);
4368     n = checkUEnumeration("ucol_getKeywordValues [with warning code set]", e, KWVAL, LEN(KWVAL));
4369     /* Don't need to check n because we check list */
4370     uenum_close(e);
4371
4372     /*
4373 U_DRAFT int32_t U_EXPORT2
4374 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
4375                              const char* locale, UBool* isAvailable,
4376                              UErrorCode* status);
4377 }
4378 */
4379     n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "fr",
4380                                      &isAvailable, &ec);
4381     assertSuccess("getFunctionalEquivalent", &ec);
4382     assertEquals("getFunctionalEquivalent(fr)", "fr", loc);
4383     assertTrue("getFunctionalEquivalent(fr).isAvailable==TRUE",
4384                isAvailable == TRUE);
4385
4386     n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "fr_FR",
4387                                      &isAvailable, &ec);
4388     assertSuccess("getFunctionalEquivalent", &ec);
4389     assertEquals("getFunctionalEquivalent(fr_FR)", "fr", loc);
4390     assertTrue("getFunctionalEquivalent(fr_FR).isAvailable==TRUE",
4391                isAvailable == TRUE);
4392 }
4393
4394 /* supercedes TestJ784 */
4395 static void TestBeforePinyin(void) {
4396     const static char rules[] = {
4397         "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<\\u00E0<<<\\u00C0"
4398         "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<\\u00E8<<<\\u00C8"
4399         "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<\\u00EC<<<\\u00CC"
4400         "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<\\u00F2<<<\\u00D2"
4401         "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<<\\u00F9<<<\\u00D9"
4402         "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<<\\u01DB<<\\u00FC"
4403     };
4404
4405     const static char *test[] = {
4406         "l\\u0101",
4407         "la",
4408         "l\\u0101n",
4409         "lan ",
4410         "l\\u0113",
4411         "le",
4412         "l\\u0113n",
4413         "len"
4414     };
4415
4416     const static char *test2[] = {
4417         "x\\u0101",
4418         "x\\u0100",
4419         "X\\u0101",
4420         "X\\u0100",
4421         "x\\u00E1",
4422         "x\\u00C1",
4423         "X\\u00E1",
4424         "X\\u00C1",
4425         "x\\u01CE",
4426         "x\\u01CD",
4427         "X\\u01CE",
4428         "X\\u01CD",
4429         "x\\u00E0",
4430         "x\\u00C0",
4431         "X\\u00E0",
4432         "X\\u00C0",
4433         "xa",
4434         "xA",
4435         "Xa",
4436         "XA",
4437         "x\\u0101x",
4438         "x\\u0100x",
4439         "x\\u00E1x",
4440         "x\\u00C1x",
4441         "x\\u01CEx",
4442         "x\\u01CDx",
4443         "x\\u00E0x",
4444         "x\\u00C0x",
4445         "xax",
4446         "xAx"
4447     };
4448
4449     genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));
4450     genericLocaleStarter("zh", test, sizeof(test)/sizeof(test[0]));
4451     genericRulesStarter(rules, test2, sizeof(test2)/sizeof(test2[0]));
4452     genericLocaleStarter("zh", test2, sizeof(test2)/sizeof(test2[0]));
4453 }
4454
4455 static void TestBeforeTightening(void) {
4456     struct {
4457         const char *rules;
4458         UErrorCode expectedStatus;
4459     } tests[] = {
4460         { "&[before 1]a<x", U_ZERO_ERROR },
4461         { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR },
4462         { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR },
4463         { "&[before 1]a=x", U_INVALID_FORMAT_ERROR },
4464         { "&[before 2]a<x",U_INVALID_FORMAT_ERROR },
4465         { "&[before 2]a<<x",U_ZERO_ERROR },
4466         { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR },
4467         { "&[before 2]a=x",U_INVALID_FORMAT_ERROR },
4468         { "&[before 3]a<x",U_INVALID_FORMAT_ERROR  },
4469         { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR  },
4470         { "&[before 3]a<<<x",U_ZERO_ERROR },
4471         { "&[before 3]a=x",U_INVALID_FORMAT_ERROR  },
4472         { "&[before I]a = x",U_INVALID_FORMAT_ERROR }
4473     };
4474
4475     int32_t i = 0;
4476
4477     UErrorCode status = U_ZERO_ERROR;
4478     UChar rlz[RULE_BUFFER_LEN] = { 0 };
4479     uint32_t rlen = 0;
4480
4481     UCollator *coll = NULL;
4482
4483
4484     for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
4485         rlen = u_unescape(tests[i].rules, rlz, RULE_BUFFER_LEN);
4486         coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
4487         if(status != tests[i].expectedStatus) {
4488             log_err("Opening a collator with rules %s returned error code %s, expected %s\n",
4489                 tests[i].rules, u_errorName(status), u_errorName(tests[i].expectedStatus));
4490         }
4491         ucol_close(coll);
4492         status = U_ZERO_ERROR;
4493     }
4494
4495 }
4496
4497 #if 0
4498 &m < a
4499 &[before 1] a < x <<< X << q <<< Q < z
4500 assert: m <<< M < x <<< X << q <<< Q < z < a < n
4501
4502 &m < a
4503 &[before 2] a << x <<< X << q <<< Q < z
4504 assert: m <<< M < x <<< X << q <<< Q << a < z < n
4505
4506 &m < a
4507 &[before 3] a <<< x <<< X << q <<< Q < z
4508 assert: m <<< M < x <<< X <<< a << q <<< Q < z < n
4509
4510
4511 &m << a
4512 &[before 1] a < x <<< X << q <<< Q < z
4513 assert: x <<< X << q <<< Q < z < m <<< M << a < n
4514
4515 &m << a
4516 &[before 2] a << x <<< X << q <<< Q < z
4517 assert: m <<< M << x <<< X << q <<< Q << a < z < n
4518
4519 &m << a
4520 &[before 3] a <<< x <<< X << q <<< Q < z
4521 assert: m <<< M << x <<< X <<< a << q <<< Q < z < n
4522
4523
4524 &m <<< a
4525 &[before 1] a < x <<< X << q <<< Q < z
4526 assert: x <<< X << q <<< Q < z < n < m <<< a <<< M
4527
4528 &m <<< a
4529 &[before 2] a << x <<< X << q <<< Q < z
4530 assert:  x <<< X << q <<< Q << m <<< a <<< M < z < n
4531
4532 &m <<< a
4533 &[before 3] a <<< x <<< X << q <<< Q < z
4534 assert: m <<< x <<< X <<< a <<< M  << q <<< Q < z < n
4535
4536
4537 &[before 1] s < x <<< X << q <<< Q < z
4538 assert: r <<< R < x <<< X << q <<< Q < z < s < n
4539
4540 &[before 2] s << x <<< X << q <<< Q < z
4541 assert: r <<< R < x <<< X << q <<< Q << s < z < n
4542
4543 &[before 3] s <<< x <<< X << q <<< Q < z
4544 assert: r <<< R < x <<< X <<< s << q <<< Q < z < n
4545
4546
4547 &[before 1] \u24DC < x <<< X << q <<< Q < z
4548 assert: x <<< X << q <<< Q < z < n < m <<< \u24DC <<< M
4549
4550 &[before 2] \u24DC << x <<< X << q <<< Q < z
4551 assert:  x <<< X << q <<< Q << m <<< \u24DC <<< M < z < n
4552
4553 &[before 3] \u24DC <<< x <<< X << q <<< Q < z
4554 assert: m <<< x <<< X <<< \u24DC <<< M  << q <<< Q < z < n
4555 #endif
4556
4557
4558 #if 0
4559 /* requires features not yet supported */
4560 static void TestMoreBefore(void) {
4561     struct {
4562         const char* rules;
4563         const char* order[20];
4564         int32_t size;
4565     } tests[] = {
4566         { "&m < a &[before 1] a < x <<< X << q <<< Q < z",
4567         { "m","M","x","X","q","Q","z","a","n" }, 9},
4568         { "&m < a &[before 2] a << x <<< X << q <<< Q < z",
4569         { "m","M","x","X","q","Q","a","z","n" }, 9},
4570         { "&m < a &[before 3] a <<< x <<< X << q <<< Q < z",
4571         { "m","M","x","X","a","q","Q","z","n" }, 9},
4572         { "&m << a &[before 1] a < x <<< X << q <<< Q < z",
4573         { "x","X","q","Q","z","m","M","a","n" }, 9},
4574         { "&m << a &[before 2] a << x <<< X << q <<< Q < z",
4575         { "m","M","x","X","q","Q","a","z","n" }, 9},
4576         { "&m << a &[before 3] a <<< x <<< X << q <<< Q < z",
4577         { "m","M","x","X","a","q","Q","z","n" }, 9},
4578         { "&m <<< a &[before 1] a < x <<< X << q <<< Q < z",
4579         { "x","X","q","Q","z","n","m","a","M" }, 9},
4580         { "&m <<< a &[before 2] a << x <<< X << q <<< Q < z",
4581         { "x","X","q","Q","m","a","M","z","n" }, 9},
4582         { "&m <<< a &[before 3] a <<< x <<< X << q <<< Q < z",
4583         { "m","x","X","a","M","q","Q","z","n" }, 9},
4584         { "&[before 1] s < x <<< X << q <<< Q < z",
4585         { "r","R","x","X","q","Q","z","s","n" }, 9},
4586         { "&[before 2] s << x <<< X << q <<< Q < z",
4587         { "r","R","x","X","q","Q","s","z","n" }, 9},
4588         { "&[before 3] s <<< x <<< X << q <<< Q < z",
4589         { "r","R","x","X","s","q","Q","z","n" }, 9},
4590         { "&[before 1] \\u24DC < x <<< X << q <<< Q < z",
4591         { "x","X","q","Q","z","n","m","\\u24DC","M" }, 9},
4592         { "&[before 2] \\u24DC << x <<< X << q <<< Q < z",
4593         { "x","X","q","Q","m","\\u24DC","M","z","n" }, 9},
4594         { "&[before 3] \\u24DC <<< x <<< X << q <<< Q < z",
4595         { "m","x","X","\\u24DC","M","q","Q","z","n" }, 9}
4596     };
4597
4598     int32_t i = 0;
4599
4600     for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
4601         genericRulesStarter(tests[i].rules, tests[i].order, tests[i].size);
4602     }
4603 }
4604 #endif
4605
4606 static void TestTailorNULL( void ) {
4607     const static char* rule = "&a <<< '\\u0000'";
4608     UErrorCode status = U_ZERO_ERROR;
4609     UChar rlz[RULE_BUFFER_LEN] = { 0 };
4610     uint32_t rlen = 0;
4611     UChar a = 1, null = 0;
4612     UCollationResult res = UCOL_EQUAL;
4613
4614     UCollator *coll = NULL;
4615
4616
4617     rlen = u_unescape(rule, rlz, RULE_BUFFER_LEN);
4618     coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
4619
4620     if(U_FAILURE(status)) {
4621         log_err("Could not open default collator!\n");
4622     } else {
4623         res = ucol_strcoll(coll, &a, 1, &null, 1);
4624
4625         if(res != UCOL_LESS) {
4626             log_err("NULL was not tailored properly!\n");
4627         }
4628     }
4629
4630     ucol_close(coll);
4631 }
4632
4633 static void
4634 TestThaiSortKey(void)
4635 {
4636   UChar yamakan = 0x0E4E;
4637   UErrorCode status = U_ZERO_ERROR;
4638   uint8_t key[256];
4639   int32_t keyLen = 0;
4640   /* NOTE: there is a Thai tailoring that moves Yammakan. It should not move it, */
4641   /* since it stays in the same relative position. This should be addressed in CLDR */
4642   /* UCA 4.0 uint8_t expectedKey[256] = { 0x01, 0xd9, 0xb2, 0x01, 0x05, 0x00 }; */
4643   /* UCA 4.1 uint8_t expectedKey[256] = { 0x01, 0xdb, 0x3a, 0x01, 0x05, 0x00 }; */
4644   /* UCA 5.0 moves Yammakan */
4645   uint8_t expectedKey[256] = { 0x01, 0xdc, 0xce, 0x01, 0x05, 0x00 };
4646   UCollator *coll = ucol_open("th", &status);
4647   if(U_FAILURE(status)) {
4648     log_err("Could not open a collator, exiting (%s)\n", u_errorName(status));
4649     return;
4650   }
4651
4652   keyLen = ucol_getSortKey(coll, &yamakan, 1, key, 256);
4653   if(strcmp((char *)key, (char *)expectedKey)) {
4654     log_err("Yammakan key is different from ICU 34!\n");
4655   }
4656
4657   ucol_close(coll);
4658 }
4659
4660 static void
4661 TestUpperFirstQuaternary(void)
4662 {
4663   const char* tests[] = { "B", "b", "Bb", "bB" };
4664   UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_FIRST };
4665   UColAttributeValue attVals[] = { UCOL_QUATERNARY, UCOL_UPPER_FIRST };
4666   genericLocaleStarterWithOptions("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]));
4667 }
4668
4669 static void
4670 TestJ4960(void)
4671 {
4672   const char* tests[] = { "\\u00e2T", "aT" };
4673   UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_LEVEL };
4674   UColAttributeValue attVals[] = { UCOL_PRIMARY, UCOL_ON };
4675   const char* tests2[] = { "a", "A" };
4676   const char* rule = "&[first tertiary ignorable]=A=a";
4677   UColAttribute att2[] = { UCOL_CASE_LEVEL };
4678   UColAttributeValue attVals2[] = { UCOL_ON };
4679   /* Test whether we correctly ignore primary ignorables on case level when */
4680   /* we have only primary & case level */
4681   genericLocaleStarterWithOptionsAndResult("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]), UCOL_EQUAL);
4682   /* Test whether ICU4J will make case level for sortkeys that have primary strength */
4683   /* and case level */
4684   genericLocaleStarterWithOptions("root", tests2, sizeof(tests2)/sizeof(tests2[0]), att, attVals, sizeof(att)/sizeof(att[0]));
4685   /* Test whether completely ignorable letters have case level info (they shouldn't) */
4686   genericRulesStarterWithOptionsAndResult(rule, tests2, sizeof(tests2)/sizeof(tests2[0]), att2, attVals2, sizeof(att2)/sizeof(att2[0]), UCOL_EQUAL);
4687 }
4688
4689 static void
4690 TestJ5223(void)
4691 {
4692   static const char *test = "this is a test string";
4693   UChar ustr[256];
4694   int32_t ustr_length = u_unescape(test, ustr, 256);
4695   unsigned char sortkey[256];
4696   int32_t sortkey_length;
4697   UErrorCode status = U_ZERO_ERROR;
4698   static UCollator *coll = NULL;
4699   coll = ucol_open("root", &status);
4700   if(U_FAILURE(status)) {
4701     log_err("Couldn't open UCA\n");
4702     return;
4703   }
4704   ucol_setStrength(coll, UCOL_PRIMARY);
4705   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
4706   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4707   if (U_FAILURE(status)) {
4708     log_err("Failed setting atributes\n");
4709     return;
4710   }
4711   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, NULL, 0);
4712   if (sortkey_length > 256) return;
4713
4714   /* we mark the position where the null byte should be written in advance */
4715   sortkey[sortkey_length-1] = 0xAA;
4716
4717   /* we set the buffer size one byte higher than needed */
4718   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
4719     sortkey_length+1);
4720
4721   /* no error occurs (for me) */
4722   if (sortkey[sortkey_length-1] == 0xAA) {
4723     log_err("Hit bug at first try\n");
4724   }
4725
4726   /* we mark the position where the null byte should be written again */
4727   sortkey[sortkey_length-1] = 0xAA;
4728
4729   /* this time we set the buffer size to the exact amount needed */
4730   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
4731     sortkey_length);
4732
4733   /* now the trailing null byte is not written */
4734   if (sortkey[sortkey_length-1] == 0xAA) {
4735     log_err("Hit bug at second try\n");
4736   }
4737
4738   ucol_close(coll);
4739 }
4740
4741 /* Regression test for Thai partial sort key problem */
4742 static void
4743 TestJ5232(void)
4744 {
4745     const static char *test[] = {
4746         "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e47\\u0e21",
4747         "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e48\\u0e21"
4748     };
4749
4750     genericLocaleStarter("th", test, sizeof(test)/sizeof(test[0]));
4751 }
4752
4753
4754
4755 #define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x)
4756
4757 void addMiscCollTest(TestNode** root)
4758 {
4759     TEST(TestRuleOptions);
4760     TEST(TestBeforePrefixFailure);
4761     TEST(TestContractionClosure);
4762     TEST(TestPrefixCompose);
4763     TEST(TestStrCollIdenticalPrefix);
4764     TEST(TestPrefix);
4765     TEST(TestNewJapanese);
4766     /*TEST(TestLimitations);*/
4767     TEST(TestNonChars);
4768     TEST(TestExtremeCompression);
4769     TEST(TestSurrogates);
4770     TEST(TestVariableTopSetting);
4771     TEST(TestBocsuCoverage);
4772     TEST(TestCyrillicTailoring);
4773     TEST(TestCase);
4774     TEST(IncompleteCntTest);
4775     TEST(BlackBirdTest);
4776     TEST(FunkyATest);
4777     TEST(BillFairmanTest);
4778     TEST(RamsRulesTest);
4779     TEST(IsTailoredTest);
4780     TEST(TestCollations);
4781     TEST(TestChMove);
4782     TEST(TestImplicitTailoring);
4783     TEST(TestFCDProblem);
4784     TEST(TestEmptyRule);
4785     /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */
4786     TEST(TestJ815);
4787     /*TEST(TestJ831);*/ /* we changed lv locale */
4788     TEST(TestBefore);
4789     TEST(TestRedundantRules);
4790     TEST(TestExpansionSyntax);
4791     TEST(TestHangulTailoring);
4792     TEST(TestUCARules);
4793     TEST(TestIncrementalNormalize);
4794     TEST(TestComposeDecompose);
4795     TEST(TestCompressOverlap);
4796     TEST(TestContraction);
4797     TEST(TestExpansion);
4798     /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys */
4799     /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported */
4800     TEST(TestOptimize);
4801     TEST(TestSuppressContractions);
4802     TEST(Alexis2);
4803     TEST(TestHebrewUCA);
4804     TEST(TestPartialSortKeyTermination);
4805     TEST(TestSettings);
4806     TEST(TestEquals);
4807     TEST(TestJ2726);
4808     TEST(NullRule);
4809     TEST(TestNumericCollation);
4810     TEST(TestTibetanConformance);
4811     TEST(TestPinyinProblem);
4812     TEST(TestImplicitGeneration);
4813     TEST(TestSeparateTrees);
4814     TEST(TestBeforePinyin);
4815     TEST(TestBeforeTightening);
4816     /*TEST(TestMoreBefore);*/
4817     TEST(TestTailorNULL);
4818     TEST(TestThaiSortKey);
4819     TEST(TestUpperFirstQuaternary);
4820     TEST(TestJ4960);
4821     TEST(TestJ5223);
4822     TEST(TestJ5232);
4823 }
4824
4825 #endif /* #if !UCONFIG_NO_COLLATION */
4826