icuSources/test/cintltst/citertst.c

   1 /********************************************************************
   2  * COPYRIGHT:
   3  * Copyright (c) 1997-2011, International Business Machines Corporation and
   4  * others. All Rights Reserved.
   5  ********************************************************************/
   6 /********************************************************************************
   7 *
   8 * File CITERTST.C
   9 *
  10 * Modification History:
  11 * Date      Name               Description
  12 *           Madhu Katragadda   Ported for C API
  13 * 02/19/01  synwee             Modified test case for new collation iterator
  14 *********************************************************************************/
  15 /*
  16  * Collation Iterator tests.
  17  * (Let me reiterate my position...)
  18  */
  19
  20 #include "unicode/utypes.h"
  21
  22 #if !UCONFIG_NO_COLLATION
  23
  24 #include "unicode/ucol.h"
  25 #include "unicode/ucoleitr.h"
  26 #include "unicode/uloc.h"
  27 #include "unicode/uchar.h"
  28 #include "unicode/ustring.h"
  29 #include "unicode/putil.h"
  30 #include "callcoll.h"
  31 #include "cmemory.h"
  32 #include "cintltst.h"
  33 #include "citertst.h"
  34 #include "ccolltst.h"
  35 #include "filestrm.h"
  36 #include "cstring.h"
  37 #include "ucol_imp.h"
  38 #include "ucol_tok.h"
  39 #include "uparse.h"
  40 #include <stdio.h>
  41
  42 extern uint8_t ucol_uprv_getCaseBits(const UChar *, uint32_t, UErrorCode *);
  43
  44 void addCollIterTest(TestNode** root)
  45 {
  46     addTest(root, &TestPrevious, "tscoll/citertst/TestPrevious");
  47     addTest(root, &TestOffset, "tscoll/citertst/TestOffset");
  48     addTest(root, &TestSetText, "tscoll/citertst/TestSetText");
  49     addTest(root, &TestMaxExpansion, "tscoll/citertst/TestMaxExpansion");
  50     addTest(root, &TestUnicodeChar, "tscoll/citertst/TestUnicodeChar");
  51     addTest(root, &TestNormalizedUnicodeChar,
  52                                 "tscoll/citertst/TestNormalizedUnicodeChar");
  53     addTest(root, &TestNormalization, "tscoll/citertst/TestNormalization");
  54     addTest(root, &TestBug672, "tscoll/citertst/TestBug672");
  55     addTest(root, &TestBug672Normalize, "tscoll/citertst/TestBug672Normalize");
  56     addTest(root, &TestSmallBuffer, "tscoll/citertst/TestSmallBuffer");
  57     addTest(root, &TestCEs, "tscoll/citertst/TestCEs");
  58     addTest(root, &TestDiscontiguos, "tscoll/citertst/TestDiscontiguos");
  59     addTest(root, &TestCEBufferOverflow, "tscoll/citertst/TestCEBufferOverflow");
  60     addTest(root, &TestCEValidity, "tscoll/citertst/TestCEValidity");
  61     addTest(root, &TestSortKeyValidity, "tscoll/citertst/TestSortKeyValidity");
  62     addTest(root, &TestSearchCollatorElements, "tscoll/citertst/TestSearchCollatorElements");
  63 }
  64
  65 /* The locales we support */
  66
  67 static const char * LOCALES[] = {"en_AU", "en_BE", "en_CA"};
  68
  69 static void TestBug672() {
  70     UErrorCode  status = U_ZERO_ERROR;
  71     UChar       pattern[20];
  72     UChar       text[50];
  73     int         i;
  74     int         result[3][3];
  75
  76     u_uastrcpy(pattern, "resume");
  77     u_uastrcpy(text, "Time to resume updating my resume.");
  78
  79     for (i = 0; i < 3; ++ i) {
  80         UCollator          *coll = ucol_open(LOCALES[i], &status);
  81         UCollationElements *pitr = ucol_openElements(coll, pattern, -1,
  82                                                      &status);
  83         UCollationElements *titer = ucol_openElements(coll, text, -1,
  84                                                      &status);
  85         if (U_FAILURE(status)) {
  86             log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n",
  87                     myErrorName(status));
  88             return;
  89         }
  90
  91         log_verbose("locale tested %s\n", LOCALES[i]);
  92
  93         while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
  94                U_SUCCESS(status)) {
  95         }
  96         if (U_FAILURE(status)) {
  97             log_err("ERROR: reversing collation iterator :%s\n",
  98                     myErrorName(status));
  99             return;
 100         }
 101         ucol_reset(pitr);
 102
 103         ucol_setOffset(titer, u_strlen(pattern), &status);
 104         if (U_FAILURE(status)) {
 105             log_err("ERROR: setting offset in collator :%s\n",
 106                     myErrorName(status));
 107             return;
 108         }
 109         result[i][0] = ucol_getOffset(titer);
 110         log_verbose("Text iterator set to offset %d\n", result[i][0]);
 111
 112         /* Use previous() */
 113         ucol_previous(titer, &status);
 114         result[i][1] = ucol_getOffset(titer);
 115         log_verbose("Current offset %d after previous\n", result[i][1]);
 116
 117         /* Add one to index */
 118         log_verbose("Adding one to current offset...\n");
 119         ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
 120         if (U_FAILURE(status)) {
 121             log_err("ERROR: setting offset in collator :%s\n",
 122                     myErrorName(status));
 123             return;
 124         }
 125         result[i][2] = ucol_getOffset(titer);
 126         log_verbose("Current offset in text = %d\n", result[i][2]);
 127         ucol_closeElements(pitr);
 128         ucol_closeElements(titer);
 129         ucol_close(coll);
 130     }
 131
 132     if (uprv_memcmp(result[0], result[1], 3) != 0 ||
 133         uprv_memcmp(result[1], result[2], 3) != 0) {
 134         log_err("ERROR: Different locales have different offsets at the same character\n");
 135     }
 136 }
 137
 138
 139
 140 /*  Running this test with normalization enabled showed up a bug in the incremental
 141     normalization code. */
 142 static void TestBug672Normalize() {
 143     UErrorCode  status = U_ZERO_ERROR;
 144     UChar       pattern[20];
 145     UChar       text[50];
 146     int         i;
 147     int         result[3][3];
 148
 149     u_uastrcpy(pattern, "resume");
 150     u_uastrcpy(text, "Time to resume updating my resume.");
 151
 152     for (i = 0; i < 3; ++ i) {
 153         UCollator          *coll = ucol_open(LOCALES[i], &status);
 154         UCollationElements *pitr = NULL;
 155         UCollationElements *titer = NULL;
 156
 157         ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
 158
 159         pitr = ucol_openElements(coll, pattern, -1, &status);
 160         titer = ucol_openElements(coll, text, -1, &status);
 161         if (U_FAILURE(status)) {
 162             log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n",
 163                     myErrorName(status));
 164             return;
 165         }
 166
 167         log_verbose("locale tested %s\n", LOCALES[i]);
 168
 169         while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
 170                U_SUCCESS(status)) {
 171         }
 172         if (U_FAILURE(status)) {
 173             log_err("ERROR: reversing collation iterator :%s\n",
 174                     myErrorName(status));
 175             return;
 176         }
 177         ucol_reset(pitr);
 178
 179         ucol_setOffset(titer, u_strlen(pattern), &status);
 180         if (U_FAILURE(status)) {
 181             log_err("ERROR: setting offset in collator :%s\n",
 182                     myErrorName(status));
 183             return;
 184         }
 185         result[i][0] = ucol_getOffset(titer);
 186         log_verbose("Text iterator set to offset %d\n", result[i][0]);
 187
 188         /* Use previous() */
 189         ucol_previous(titer, &status);
 190         result[i][1] = ucol_getOffset(titer);
 191         log_verbose("Current offset %d after previous\n", result[i][1]);
 192
 193         /* Add one to index */
 194         log_verbose("Adding one to current offset...\n");
 195         ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
 196         if (U_FAILURE(status)) {
 197             log_err("ERROR: setting offset in collator :%s\n",
 198                     myErrorName(status));
 199             return;
 200         }
 201         result[i][2] = ucol_getOffset(titer);
 202         log_verbose("Current offset in text = %d\n", result[i][2]);
 203         ucol_closeElements(pitr);
 204         ucol_closeElements(titer);
 205         ucol_close(coll);
 206     }
 207
 208     if (uprv_memcmp(result[0], result[1], 3) != 0 ||
 209         uprv_memcmp(result[1], result[2], 3) != 0) {
 210         log_err("ERROR: Different locales have different offsets at the same character\n");
 211     }
 212 }
 213
 214
 215
 216
 217 /**
 218  * Test for CollationElementIterator previous and next for the whole set of
 219  * unicode characters.
 220  */
 221 static void TestUnicodeChar()
 222 {
 223     UChar source[0x100];
 224     UCollator *en_us;
 225     UCollationElements *iter;
 226     UErrorCode status = U_ZERO_ERROR;
 227     UChar codepoint;
 228
 229     UChar *test;
 230     en_us = ucol_open("en_US", &status);
 231     if (U_FAILURE(status)){
 232        log_err_status(status, "ERROR: in creation of collation data using ucol_open()\n %s\n",
 233               myErrorName(status));
 234        return;
 235     }
 236
 237     for (codepoint = 1; codepoint < 0xFFFE;)
 238     {
 239       test = source;
 240
 241       while (codepoint % 0xFF != 0)
 242       {
 243         if (u_isdefined(codepoint))
 244           *(test ++) = codepoint;
 245         codepoint ++;
 246       }
 247
 248       if (u_isdefined(codepoint))
 249         *(test ++) = codepoint;
 250
 251       if (codepoint != 0xFFFF)
 252         codepoint ++;
 253
 254       *test = 0;
 255       iter=ucol_openElements(en_us, source, u_strlen(source), &status);
 256       if(U_FAILURE(status)){
 257           log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
 258               myErrorName(status));
 259           ucol_close(en_us);
 260           return;
 261       }
 262       /* A basic test to see if it's working at all */
 263       log_verbose("codepoint testing %x\n", codepoint);
 264       backAndForth(iter);
 265       ucol_closeElements(iter);
 266
 267       /* null termination test */
 268       iter=ucol_openElements(en_us, source, -1, &status);
 269       if(U_FAILURE(status)){
 270           log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
 271               myErrorName(status));
 272           ucol_close(en_us);
 273           return;
 274       }
 275       /* A basic test to see if it's working at all */
 276       backAndForth(iter);
 277       ucol_closeElements(iter);
 278     }
 279
 280     ucol_close(en_us);
 281 }
 282
 283 /**
 284  * Test for CollationElementIterator previous and next for the whole set of
 285  * unicode characters with normalization on.
 286  */
 287 static void TestNormalizedUnicodeChar()
 288 {
 289     UChar source[0x100];
 290     UCollator *th_th;
 291     UCollationElements *iter;
 292     UErrorCode status = U_ZERO_ERROR;
 293     UChar codepoint;
 294
 295     UChar *test;
 296     /* thai should have normalization on */
 297     th_th = ucol_open("th_TH", &status);
 298     if (U_FAILURE(status)){
 299         log_err_status(status, "ERROR: in creation of thai collation using ucol_open()\n %s\n",
 300               myErrorName(status));
 301         return;
 302     }
 303
 304     for (codepoint = 1; codepoint < 0xFFFE;)
 305     {
 306       test = source;
 307
 308       while (codepoint % 0xFF != 0)
 309       {
 310         if (u_isdefined(codepoint))
 311           *(test ++) = codepoint;
 312         codepoint ++;
 313       }
 314
 315       if (u_isdefined(codepoint))
 316         *(test ++) = codepoint;
 317
 318       if (codepoint != 0xFFFF)
 319         codepoint ++;
 320
 321       *test = 0;
 322       iter=ucol_openElements(th_th, source, u_strlen(source), &status);
 323       if(U_FAILURE(status)){
 324           log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
 325               myErrorName(status));
 326             ucol_close(th_th);
 327           return;
 328       }
 329
 330       backAndForth(iter);
 331       ucol_closeElements(iter);
 332
 333       iter=ucol_openElements(th_th, source, -1, &status);
 334       if(U_FAILURE(status)){
 335           log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
 336               myErrorName(status));
 337             ucol_close(th_th);
 338           return;
 339       }
 340
 341       backAndForth(iter);
 342       ucol_closeElements(iter);
 343     }
 344
 345     ucol_close(th_th);
 346 }
 347
 348 /**
 349 * Test the incremental normalization
 350 */
 351 static void TestNormalization()
 352 {
 353           UErrorCode          status = U_ZERO_ERROR;
 354     const char               *str    =
 355                             "&a < \\u0300\\u0315 < A\\u0300\\u0315 < \\u0316\\u0315B < \\u0316\\u0300\\u0315";
 356           UCollator          *coll;
 357           UChar               rule[50];
 358           int                 rulelen = u_unescape(str, rule, 50);
 359           int                 count = 0;
 360     const char                *testdata[] =
 361                         {"\\u1ED9", "o\\u0323\\u0302",
 362                         "\\u0300\\u0315", "\\u0315\\u0300",
 363                         "A\\u0300\\u0315B", "A\\u0315\\u0300B",
 364                         "A\\u0316\\u0315B", "A\\u0315\\u0316B",
 365                         "\\u0316\\u0300\\u0315", "\\u0315\\u0300\\u0316",
 366                         "A\\u0316\\u0300\\u0315B", "A\\u0315\\u0300\\u0316B",
 367                         "\\u0316\\u0315\\u0300", "A\\u0316\\u0315\\u0300B"};
 368     int32_t   srclen;
 369     UChar source[10];
 370     UCollationElements *iter;
 371
 372     coll = ucol_openRules(rule, rulelen, UCOL_ON, UCOL_TERTIARY, NULL, &status);
 373     ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
 374     if (U_FAILURE(status)){
 375         log_err_status(status, "ERROR: in creation of collator using ucol_openRules()\n %s\n",
 376               myErrorName(status));
 377         return;
 378     }
 379
 380     srclen = u_unescape(testdata[0], source, 10);
 381     iter = ucol_openElements(coll, source, srclen, &status);
 382     backAndForth(iter);
 383     ucol_closeElements(iter);
 384
 385     srclen = u_unescape(testdata[1], source, 10);
 386     iter = ucol_openElements(coll, source, srclen, &status);
 387     backAndForth(iter);
 388     ucol_closeElements(iter);
 389
 390     while (count < 12) {
 391         srclen = u_unescape(testdata[count], source, 10);
 392         iter = ucol_openElements(coll, source, srclen, &status);
 393
 394         if (U_FAILURE(status)){
 395             log_err("ERROR: in creation of collator element iterator\n %s\n",
 396                   myErrorName(status));
 397             return;
 398         }
 399         backAndForth(iter);
 400         ucol_closeElements(iter);
 401
 402         iter = ucol_openElements(coll, source, -1, &status);
 403
 404         if (U_FAILURE(status)){
 405             log_err("ERROR: in creation of collator element iterator\n %s\n",
 406                   myErrorName(status));
 407             return;
 408         }
 409         backAndForth(iter);
 410         ucol_closeElements(iter);
 411         count ++;
 412     }
 413     ucol_close(coll);
 414 }
 415
 416 /**
 417  * Test for CollationElementIterator.previous()
 418  *
 419  * @bug 4108758 - Make sure it works with contracting characters
 420  *
 421  */
 422 static void TestPrevious()
 423 {
 424     UCollator *coll=NULL;
 425     UChar rule[50];
 426     UChar *source;
 427     UCollator *c1, *c2, *c3;
 428     UCollationElements *iter;
 429     UErrorCode status = U_ZERO_ERROR;
 430     UChar test1[50];
 431     UChar test2[50];
 432
 433     u_uastrcpy(test1, "What subset of all possible test cases?");
 434     u_uastrcpy(test2, "has the highest probability of detecting");
 435     coll = ucol_open("en_US", &status);
 436
 437     iter=ucol_openElements(coll, test1, u_strlen(test1), &status);
 438     log_verbose("English locale testing back and forth\n");
 439     if(U_FAILURE(status)){
 440         log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
 441             myErrorName(status));
 442         ucol_close(coll);
 443         return;
 444     }
 445     /* A basic test to see if it's working at all */
 446     backAndForth(iter);
 447     ucol_closeElements(iter);
 448     ucol_close(coll);
 449
 450     /* Test with a contracting character sequence */
 451     u_uastrcpy(rule, "&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH");
 452     c1 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
 453
 454     log_verbose("Contraction rule testing back and forth with no normalization\n");
 455
 456     if (c1 == NULL || U_FAILURE(status))
 457     {
 458         log_err("Couldn't create a RuleBasedCollator with a contracting sequence\n %s\n",
 459             myErrorName(status));
 460         return;
 461     }
 462     source=(UChar*)malloc(sizeof(UChar) * 20);
 463     u_uastrcpy(source, "abchdcba");
 464     iter=ucol_openElements(c1, source, u_strlen(source), &status);
 465     if(U_FAILURE(status)){
 466         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
 467             myErrorName(status));
 468         return;
 469     }
 470     backAndForth(iter);
 471     ucol_closeElements(iter);
 472     ucol_close(c1);
 473
 474     /* Test with an expanding character sequence */
 475     u_uastrcpy(rule, "&a < b < c/abd < d");
 476     c2 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
 477     log_verbose("Expansion rule testing back and forth with no normalization\n");
 478     if (c2 == NULL || U_FAILURE(status))
 479     {
 480         log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
 481             myErrorName(status));
 482         return;
 483     }
 484     u_uastrcpy(source, "abcd");
 485     iter=ucol_openElements(c2, source, u_strlen(source), &status);
 486     if(U_FAILURE(status)){
 487         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
 488             myErrorName(status));
 489         return;
 490     }
 491     backAndForth(iter);
 492     ucol_closeElements(iter);
 493     ucol_close(c2);
 494     /* Now try both */
 495     u_uastrcpy(rule, "&a < b < c/aba < d < z < ch");
 496     c3 = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,  UCOL_DEFAULT_STRENGTH,NULL, &status);
 497     log_verbose("Expansion/contraction rule testing back and forth with no normalization\n");
 498
 499     if (c3 == NULL || U_FAILURE(status))
 500     {
 501         log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
 502             myErrorName(status));
 503         return;
 504     }
 505     u_uastrcpy(source, "abcdbchdc");
 506     iter=ucol_openElements(c3, source, u_strlen(source), &status);
 507     if(U_FAILURE(status)){
 508         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
 509             myErrorName(status));
 510         return;
 511     }
 512     backAndForth(iter);
 513     ucol_closeElements(iter);
 514     ucol_close(c3);
 515     source[0] = 0x0e41;
 516     source[1] = 0x0e02;
 517     source[2] = 0x0e41;
 518     source[3] = 0x0e02;
 519     source[4] = 0x0e27;
 520     source[5] = 0x61;
 521     source[6] = 0x62;
 522     source[7] = 0x63;
 523     source[8] = 0;
 524
 525     coll = ucol_open("th_TH", &status);
 526     log_verbose("Thai locale testing back and forth with normalization\n");
 527     iter=ucol_openElements(coll, source, u_strlen(source), &status);
 528     if(U_FAILURE(status)){
 529         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
 530             myErrorName(status));
 531         return;
 532     }
 533     backAndForth(iter);
 534     ucol_closeElements(iter);
 535     ucol_close(coll);
 536
 537     /* prev test */
 538     source[0] = 0x0061;
 539     source[1] = 0x30CF;
 540     source[2] = 0x3099;
 541     source[3] = 0x30FC;
 542     source[4] = 0;
 543
 544     coll = ucol_open("ja_JP", &status);
 545     log_verbose("Japanese locale testing back and forth with normalization\n");
 546     iter=ucol_openElements(coll, source, u_strlen(source), &status);
 547     if(U_FAILURE(status)){
 548         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
 549             myErrorName(status));
 550         return;
 551     }
 552     backAndForth(iter);
 553     ucol_closeElements(iter);
 554     ucol_close(coll);
 555
 556     free(source);
 557 }
 558
 559 /**
 560  * Test for getOffset() and setOffset()
 561  */
 562 static void TestOffset()
 563 {
 564     UErrorCode status= U_ZERO_ERROR;
 565     UCollator *en_us=NULL;
 566     UCollationElements *iter, *pristine;
 567     int32_t offset;
 568     OrderAndOffset *orders;
 569     int32_t orderLength=0;
 570     int     count = 0;
 571     UChar test1[50];
 572     UChar test2[50];
 573
 574     u_uastrcpy(test1, "What subset of all possible test cases?");
 575     u_uastrcpy(test2, "has the highest probability of detecting");
 576     en_us = ucol_open("en_US", &status);
 577     log_verbose("Testing getOffset and setOffset for collations\n");
 578     iter = ucol_openElements(en_us, test1, u_strlen(test1), &status);
 579     if(U_FAILURE(status)){
 580         log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
 581             myErrorName(status));
 582         ucol_close(en_us);
 583         return;
 584     }
 585
 586     /* testing boundaries */
 587     ucol_setOffset(iter, 0, &status);
 588     if (U_FAILURE(status) || ucol_previous(iter, &status) != UCOL_NULLORDER) {
 589         log_err("Error: After setting offset to 0, we should be at the end "
 590                 "of the backwards iteration");
 591     }
 592     ucol_setOffset(iter, u_strlen(test1), &status);
 593     if (U_FAILURE(status) || ucol_next(iter, &status) != UCOL_NULLORDER) {
 594         log_err("Error: After setting offset to end of the string, we should "
 595                 "be at the end of the backwards iteration");
 596     }
 597
 598     /* Run all the way through the iterator, then get the offset */
 599
 600     orders = getOrders(iter, &orderLength);
 601
 602     offset = ucol_getOffset(iter);
 603
 604     if (offset != u_strlen(test1))
 605     {
 606         log_err("offset at end != length %d vs %d\n", offset,
 607             u_strlen(test1) );
 608     }
 609
 610     /* Now set the offset back to the beginning and see if it works */
 611     pristine=ucol_openElements(en_us, test1, u_strlen(test1), &status);
 612     if(U_FAILURE(status)){
 613         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
 614             myErrorName(status));
 615     ucol_close(en_us);
 616         return;
 617     }
 618     status = U_ZERO_ERROR;
 619
 620     ucol_setOffset(iter, 0, &status);
 621     if (U_FAILURE(status))
 622     {
 623         log_err("setOffset failed. %s\n",    myErrorName(status));
 624     }
 625     else
 626     {
 627         assertEqual(iter, pristine);
 628     }
 629
 630     ucol_closeElements(pristine);
 631     ucol_closeElements(iter);
 632     free(orders);
 633
 634     /* testing offsets in normalization buffer */
 635     test1[0] = 0x61;
 636     test1[1] = 0x300;
 637     test1[2] = 0x316;
 638     test1[3] = 0x62;
 639     test1[4] = 0;
 640     ucol_setAttribute(en_us, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
 641     iter = ucol_openElements(en_us, test1, 4, &status);
 642     if(U_FAILURE(status)){
 643         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
 644             myErrorName(status));
 645         ucol_close(en_us);
 646         return;
 647     }
 648
 649     count = 0;
 650     while (ucol_next(iter, &status) != UCOL_NULLORDER &&
 651         U_SUCCESS(status)) {
 652         switch (count) {
 653         case 0:
 654             if (ucol_getOffset(iter) != 1) {
 655                 log_err("ERROR: Offset of iteration should be 1\n");
 656             }
 657             break;
 658         case 3:
 659             if (ucol_getOffset(iter) != 4) {
 660                 log_err("ERROR: Offset of iteration should be 4\n");
 661             }
 662             break;
 663         default:
 664             if (ucol_getOffset(iter) != 3) {
 665                 log_err("ERROR: Offset of iteration should be 3\n");
 666             }
 667         }
 668         count ++;
 669     }
 670
 671     ucol_reset(iter);
 672     count = 0;
 673     while (ucol_previous(iter, &status) != UCOL_NULLORDER &&
 674         U_SUCCESS(status)) {
 675         switch (count) {
 676         case 0:
 677         case 1:
 678             if (ucol_getOffset(iter) != 3) {
 679                 log_err("ERROR: Offset of iteration should be 3\n");
 680             }
 681             break;
 682         case 2:
 683             if (ucol_getOffset(iter) != 1) {
 684                 log_err("ERROR: Offset of iteration should be 1\n");
 685             }
 686             break;
 687         default:
 688             if (ucol_getOffset(iter) != 0) {
 689                 log_err("ERROR: Offset of iteration should be 0\n");
 690             }
 691         }
 692         count ++;
 693     }
 694
 695     if(U_FAILURE(status)){
 696         log_err("ERROR: in iterating collation elements %s\n",
 697             myErrorName(status));
 698     }
 699
 700     ucol_closeElements(iter);
 701     ucol_close(en_us);
 702 }
 703
 704 /**
 705  * Test for setText()
 706  */
 707 static void TestSetText()
 708 {
 709     int32_t c,i;
 710     UErrorCode status = U_ZERO_ERROR;
 711     UCollator *en_us=NULL;
 712     UCollationElements *iter1, *iter2;
 713     UChar test1[50];
 714     UChar test2[50];
 715
 716     u_uastrcpy(test1, "What subset of all possible test cases?");
 717     u_uastrcpy(test2, "has the highest probability of detecting");
 718     en_us = ucol_open("en_US", &status);
 719     log_verbose("testing setText for Collation elements\n");
 720     iter1=ucol_openElements(en_us, test1, u_strlen(test1), &status);
 721     if(U_FAILURE(status)){
 722         log_err_status(status, "ERROR: in creation of collation element iterator1 using ucol_openElements()\n %s\n",
 723             myErrorName(status));
 724     ucol_close(en_us);
 725         return;
 726     }
 727     iter2=ucol_openElements(en_us, test2, u_strlen(test2), &status);
 728     if(U_FAILURE(status)){
 729         log_err("ERROR: in creation of collation element iterator2 using ucol_openElements()\n %s\n",
 730             myErrorName(status));
 731     ucol_close(en_us);
 732         return;
 733     }
 734
 735     /* Run through the second iterator just to exercise it */
 736     c = ucol_next(iter2, &status);
 737     i = 0;
 738
 739     while ( ++i < 10 && (c != UCOL_NULLORDER))
 740     {
 741         if (U_FAILURE(status))
 742         {
 743             log_err("iter2->next() returned an error. %s\n", myErrorName(status));
 744             ucol_closeElements(iter2);
 745             ucol_closeElements(iter1);
 746     ucol_close(en_us);
 747             return;
 748         }
 749
 750         c = ucol_next(iter2, &status);
 751     }
 752
 753     /* Now set it to point to the same string as the first iterator */
 754     ucol_setText(iter2, test1, u_strlen(test1), &status);
 755     if (U_FAILURE(status))
 756     {
 757         log_err("call to iter2->setText(test1) failed. %s\n", myErrorName(status));
 758     }
 759     else
 760     {
 761         assertEqual(iter1, iter2);
 762     }
 763
 764     /* Now set it to point to a null string with fake length*/
 765     ucol_setText(iter2, NULL, 2, &status);
 766     if (U_FAILURE(status))
 767     {
 768         log_err("call to iter2->setText(null) failed. %s\n", myErrorName(status));
 769     }
 770     else
 771     {
 772         if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
 773             log_err("iter2 with null text expected to return UCOL_NULLORDER\n");
 774         }
 775     }
 776
 777     ucol_closeElements(iter2);
 778     ucol_closeElements(iter1);
 779     ucol_close(en_us);
 780 }
 781
 782 /** @bug 4108762
 783  * Test for getMaxExpansion()
 784  */
 785 static void TestMaxExpansion()
 786 {
 787     UErrorCode          status = U_ZERO_ERROR;
 788     UCollator          *coll   ;/*= ucol_open("en_US", &status);*/
 789     UChar               ch     = 0;
 790     UChar32             unassigned = 0xEFFFD;
 791     UChar               supplementary[2];
 792     uint32_t            stringOffset = 0;
 793     UBool               isError = FALSE;
 794     uint32_t            sorder = 0;
 795     UCollationElements *iter   ;/*= ucol_openElements(coll, &ch, 1, &status);*/
 796     uint32_t            temporder = 0;
 797
 798     UChar rule[256];
 799     u_uastrcpy(rule, "&a < ab < c/aba < d < z < ch");
 800     coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
 801         UCOL_DEFAULT_STRENGTH,NULL, &status);
 802     if(U_SUCCESS(status) && coll) {
 803       iter = ucol_openElements(coll, &ch, 1, &status);
 804
 805       while (ch < 0xFFFF && U_SUCCESS(status)) {
 806           int      count = 1;
 807           uint32_t order;
 808           int32_t  size = 0;
 809
 810           ch ++;
 811
 812           ucol_setText(iter, &ch, 1, &status);
 813           order = ucol_previous(iter, &status);
 814
 815           /* thai management */
 816           if (order == 0)
 817               order = ucol_previous(iter, &status);
 818
 819           while (U_SUCCESS(status) &&
 820               ucol_previous(iter, &status) != UCOL_NULLORDER) {
 821               count ++;
 822           }
 823
 824           size = ucol_getMaxExpansion(iter, order);
 825           if (U_FAILURE(status) || size < count) {
 826               log_err("Failure at codepoint %d, maximum expansion count < %d\n",
 827                   ch, count);
 828           }
 829       }
 830
 831       /* testing for exact max expansion */
 832       ch = 0;
 833       while (ch < 0x61) {
 834           uint32_t order;
 835           int32_t  size;
 836           ucol_setText(iter, &ch, 1, &status);
 837           order = ucol_previous(iter, &status);
 838           size  = ucol_getMaxExpansion(iter, order);
 839           if (U_FAILURE(status) || size != 1) {
 840               log_err("Failure at codepoint %d, maximum expansion count < %d\n",
 841                   ch, 1);
 842           }
 843           ch ++;
 844       }
 845
 846       ch = 0x63;
 847       ucol_setText(iter, &ch, 1, &status);
 848       temporder = ucol_previous(iter, &status);
 849
 850       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 3) {
 851           log_err("Failure at codepoint %d, maximum expansion count != %d\n",
 852                   ch, 3);
 853       }
 854
 855       ch = 0x64;
 856       ucol_setText(iter, &ch, 1, &status);
 857       temporder = ucol_previous(iter, &status);
 858
 859       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 1) {
 860           log_err("Failure at codepoint %d, maximum expansion count != %d\n",
 861                   ch, 3);
 862       }
 863
 864       U16_APPEND(supplementary, stringOffset, 2, unassigned, isError);
 865       ucol_setText(iter, supplementary, 2, &status);
 866       sorder = ucol_previous(iter, &status);
 867
 868       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, sorder) != 2) {
 869           log_err("Failure at codepoint %d, maximum expansion count < %d\n",
 870                   ch, 2);
 871       }
 872
 873       /* testing jamo */
 874       ch = 0x1165;
 875
 876       ucol_setText(iter, &ch, 1, &status);
 877       temporder = ucol_previous(iter, &status);
 878       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) > 3) {
 879           log_err("Failure at codepoint %d, maximum expansion count > %d\n",
 880                   ch, 3);
 881       }
 882
 883       ucol_closeElements(iter);
 884       ucol_close(coll);
 885
 886       /* testing special jamo &a<\u1160 */
 887       rule[0] = 0x26;
 888       rule[1] = 0x71;
 889       rule[2] = 0x3c;
 890       rule[3] = 0x1165;
 891       rule[4] = 0x2f;
 892       rule[5] = 0x71;
 893       rule[6] = 0x71;
 894       rule[7] = 0x71;
 895       rule[8] = 0x71;
 896       rule[9] = 0;
 897
 898       coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
 899           UCOL_DEFAULT_STRENGTH,NULL, &status);
 900       iter = ucol_openElements(coll, &ch, 1, &status);
 901
 902       temporder = ucol_previous(iter, &status);
 903       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 6) {
 904           log_err("Failure at codepoint %d, maximum expansion count > %d\n",
 905                   ch, 5);
 906       }
 907
 908       ucol_closeElements(iter);
 909       ucol_close(coll);
 910     } else {
 911       log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
 912     }
 913
 914 }
 915
 916
 917 static void assertEqual(UCollationElements *i1, UCollationElements *i2)
 918 {
 919     int32_t c1, c2;
 920     int32_t count = 0;
 921     UErrorCode status = U_ZERO_ERROR;
 922
 923     do
 924     {
 925         c1 = ucol_next(i1, &status);
 926         c2 = ucol_next(i2, &status);
 927
 928         if (c1 != c2)
 929         {
 930             log_err("Error in iteration %d assetEqual between\n  %d  and   %d, they are not equal\n", count, c1, c2);
 931             break;
 932         }
 933
 934         count += 1;
 935     }
 936     while (c1 != UCOL_NULLORDER);
 937 }
 938
 939 /**
 940  * Testing iterators with extremely small buffers
 941  */
 942 static void TestSmallBuffer()
 943 {
 944     UErrorCode          status = U_ZERO_ERROR;
 945     UCollator          *coll;
 946     UCollationElements *testiter,
 947                        *iter;
 948     int32_t             count = 0;
 949     OrderAndOffset     *testorders,
 950                        *orders;
 951
 952     UChar teststr[500];
 953     UChar str[] = {0x300, 0x31A, 0};
 954     /*
 955     creating a long string of decomposable characters,
 956     since by default the writable buffer is of size 256
 957     */
 958     while (count < 500) {
 959         if ((count & 1) == 0) {
 960             teststr[count ++] = 0x300;
 961         }
 962         else {
 963             teststr[count ++] = 0x31A;
 964         }
 965     }
 966
 967     coll = ucol_open("th_TH", &status);
 968     if(U_SUCCESS(status) && coll) {
 969       testiter = ucol_openElements(coll, teststr, 500, &status);
 970       iter = ucol_openElements(coll, str, 2, &status);
 971
 972       orders     = getOrders(iter, &count);
 973       if (count != 2) {
 974           log_err("Error collation elements size is not 2 for \\u0300\\u031A\n");
 975       }
 976
 977       /*
 978       this will rearrange the string data to 250 characters of 0x300 first then
 979       250 characters of 0x031A
 980       */
 981       testorders = getOrders(testiter, &count);
 982
 983       if (count != 500) {
 984           log_err("Error decomposition does not give the right sized collation elements\n");
 985       }
 986
 987       while (count != 0) {
 988           /* UCA collation element for 0x0F76 */
 989           if ((count > 250 && testorders[-- count].order != orders[1].order) ||
 990               (count <= 250 && testorders[-- count].order != orders[0].order)) {
 991               log_err("Error decomposition does not give the right collation element at %d count\n", count);
 992               break;
 993           }
 994       }
 995
 996       free(testorders);
 997       free(orders);
 998
 999       ucol_reset(testiter);
1000
1001       /* ensures closing of elements done properly to clear writable buffer */
1002       ucol_next(testiter, &status);
1003       ucol_next(testiter, &status);
1004       ucol_closeElements(testiter);
1005       ucol_closeElements(iter);
1006       ucol_close(coll);
1007     } else {
1008       log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
1009     }
1010 }
1011
1012 /**
1013 * Sniplets of code from genuca
1014 */
1015 static int32_t hex2num(char hex) {
1016     if(hex>='0' && hex <='9') {
1017         return hex-'0';
1018     } else if(hex>='a' && hex<='f') {
1019         return hex-'a'+10;
1020     } else if(hex>='A' && hex<='F') {
1021         return hex-'A'+10;
1022     } else {
1023         return 0;
1024     }
1025 }
1026
1027 /**
1028 * Getting codepoints from a string
1029 * @param str character string contain codepoints seperated by space and ended
1030 *        by a semicolon
1031 * @param codepoints array for storage, assuming size > 5
1032 * @return position at the end of the codepoint section
1033 */
1034 static char *getCodePoints(char *str, UChar *codepoints, UChar *contextCPs) {
1035     UErrorCode errorCode = U_ZERO_ERROR;
1036     char *semi = uprv_strchr(str, ';');
1037     char *pipe = uprv_strchr(str, '|');
1038     char *s;
1039     *codepoints = 0;
1040     *contextCPs = 0;
1041     if(semi == NULL) {
1042         log_err("expected semicolon after code point string in FractionalUCA.txt %s\n", str);
1043         return str;
1044     }
1045     if(pipe != NULL) {
1046         int32_t contextLength;
1047         *pipe = 0;
1048         contextLength = u_parseString(str, contextCPs, 99, NULL, &errorCode);
1049         *pipe = '|';
1050         if(U_FAILURE(errorCode)) {
1051             log_err("error parsing precontext string from FractionalUCA.txt %s\n", str);
1052             return str;
1053         }
1054         /* prepend the precontext string to the codepoints */
1055         u_memcpy(codepoints, contextCPs, contextLength);
1056         codepoints += contextLength;
1057         /* start of the code point string */
1058         s = pipe + 1;
1059     } else {
1060         s = str;
1061     }
1062     u_parseString(s, codepoints, 99, NULL, &errorCode);
1063     if(U_FAILURE(errorCode)) {
1064         log_err("error parsing code point string from FractionalUCA.txt %s\n", str);
1065         return str;
1066     }
1067     return semi + 1;
1068 }
1069
1070 /**
1071 * Sniplets of code from genuca
1072 */
1073 static int32_t
1074 readElement(char **from, char *to, char separator, UErrorCode *status)
1075 {
1076     if (U_SUCCESS(*status)) {
1077         char    buffer[1024];
1078         int32_t i = 0;
1079         while (**from != separator) {
1080             if (**from != ' ') {
1081                 *(buffer+i++) = **from;
1082             }
1083             (*from)++;
1084         }
1085         (*from)++;
1086         *(buffer + i) = 0;
1087         strcpy(to, buffer);
1088         return i/2;
1089     }
1090
1091     return 0;
1092 }
1093
1094 /**
1095 * Sniplets of code from genuca
1096 */
1097 static uint32_t
1098 getSingleCEValue(char *primary, char *secondary, char *tertiary,
1099                           UErrorCode *status)
1100 {
1101     if (U_SUCCESS(*status)) {
1102         uint32_t  value    = 0;
1103         char      primsave = '\0';
1104         char      secsave  = '\0';
1105         char      tersave  = '\0';
1106         char     *primend  = primary+4;
1107         char     *secend   = secondary+2;
1108         char     *terend   = tertiary+2;
1109         uint32_t  primvalue;
1110         uint32_t  secvalue;
1111         uint32_t  tervalue;
1112
1113         if (uprv_strlen(primary) > 4) {
1114             primsave = *primend;
1115             *primend = '\0';
1116         }
1117
1118         if (uprv_strlen(secondary) > 2) {
1119             secsave = *secend;
1120             *secend = '\0';
1121         }
1122
1123         if (uprv_strlen(tertiary) > 2) {
1124             tersave = *terend;
1125             *terend = '\0';
1126         }
1127
1128         primvalue = (*primary!='\0')?uprv_strtoul(primary, &primend, 16):0;
1129         secvalue  = (*secondary!='\0')?uprv_strtoul(secondary, &secend, 16):0;
1130         tervalue  = (*tertiary!='\0')?uprv_strtoul(tertiary, &terend, 16):0;
1131         if(primvalue <= 0xFF) {
1132           primvalue <<= 8;
1133         }
1134
1135         value = ((primvalue << UCOL_PRIMARYORDERSHIFT) & UCOL_PRIMARYORDERMASK)
1136            | ((secvalue << UCOL_SECONDARYORDERSHIFT) & UCOL_SECONDARYORDERMASK)
1137            | (tervalue & UCOL_TERTIARYORDERMASK);
1138
1139         if(primsave!='\0') {
1140             *primend = primsave;
1141         }
1142         if(secsave!='\0') {
1143             *secend = secsave;
1144         }
1145         if(tersave!='\0') {
1146             *terend = tersave;
1147         }
1148         return value;
1149     }
1150     return 0;
1151 }
1152
1153 /**
1154 * Getting collation elements generated from a string
1155 * @param str character string contain collation elements contained in [] and
1156 *        seperated by space
1157 * @param ce array for storage, assuming size > 20
1158 * @param status error status
1159 * @return position at the end of the codepoint section
1160 */
1161 static char * getCEs(char *str, uint32_t *ces, UErrorCode *status) {
1162     char       *pStartCP     = uprv_strchr(str, '[');
1163     int         count        = 0;
1164     char       *pEndCP;
1165     char        primary[100];
1166     char        secondary[100];
1167     char        tertiary[100];
1168
1169     while (*pStartCP == '[') {
1170         uint32_t primarycount   = 0;
1171         uint32_t secondarycount = 0;
1172         uint32_t tertiarycount  = 0;
1173         uint32_t CEi = 1;
1174         pEndCP = strchr(pStartCP, ']');
1175         if(pEndCP == NULL) {
1176             break;
1177         }
1178         pStartCP ++;
1179
1180         primarycount   = readElement(&pStartCP, primary, ',', status);
1181         secondarycount = readElement(&pStartCP, secondary, ',', status);
1182         tertiarycount  = readElement(&pStartCP, tertiary, ']', status);
1183
1184         /* I want to get the CEs entered right here, including continuation */
1185         ces[count ++] = getSingleCEValue(primary, secondary, tertiary, status);
1186         if (U_FAILURE(*status)) {
1187             break;
1188         }
1189
1190         while (2 * CEi < primarycount || CEi < secondarycount ||
1191                CEi < tertiarycount) {
1192             uint32_t value = UCOL_CONTINUATION_MARKER; /* Continuation marker */
1193             if (2 * CEi < primarycount) {
1194                 value |= ((hex2num(*(primary + 4 * CEi)) & 0xF) << 28);
1195                 value |= ((hex2num(*(primary + 4 * CEi + 1)) & 0xF) << 24);
1196             }
1197
1198             if (2 * CEi + 1 < primarycount) {
1199                 value |= ((hex2num(*(primary + 4 * CEi + 2)) & 0xF) << 20);
1200                 value |= ((hex2num(*(primary + 4 * CEi + 3)) &0xF) << 16);
1201             }
1202
1203             if (CEi < secondarycount) {
1204                 value |= ((hex2num(*(secondary + 2 * CEi)) & 0xF) << 12);
1205                 value |= ((hex2num(*(secondary + 2 * CEi + 1)) & 0xF) << 8);
1206             }
1207
1208             if (CEi < tertiarycount) {
1209                 value |= ((hex2num(*(tertiary + 2 * CEi)) & 0x3) << 4);
1210                 value |= (hex2num(*(tertiary + 2 * CEi + 1)) & 0xF);
1211             }
1212
1213             CEi ++;
1214             ces[count ++] = value;
1215         }
1216
1217       pStartCP = pEndCP + 1;
1218     }
1219     ces[count] = 0;
1220     return pStartCP;
1221 }
1222
1223 /**
1224 * Getting the FractionalUCA.txt file stream
1225 */
1226 static FileStream * getFractionalUCA(void)
1227 {
1228     char        newPath[256];
1229     char        backupPath[256];
1230     FileStream *result = NULL;
1231
1232     /* Look inside ICU_DATA first */
1233     uprv_strcpy(newPath, ctest_dataSrcDir());
1234     uprv_strcat(newPath, "unidata" U_FILE_SEP_STRING );
1235     uprv_strcat(newPath, "FractionalUCA.txt");
1236
1237     /* As a fallback, try to guess where the source data was located
1238      *   at the time ICU was built, and look there.
1239      */
1240 #if defined (U_TOPSRCDIR)
1241     strcpy(backupPath, U_TOPSRCDIR  U_FILE_SEP_STRING "data");
1242 #else
1243     {
1244         UErrorCode errorCode = U_ZERO_ERROR;
1245         strcpy(backupPath, loadTestData(&errorCode));
1246         strcat(backupPath, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING "data");
1247     }
1248 #endif
1249     strcat(backupPath, U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING "FractionalUCA.txt");
1250
1251     result = T_FileStream_open(newPath, "rb");
1252
1253     if (result == NULL) {
1254         result = T_FileStream_open(backupPath, "rb");
1255         if (result == NULL) {
1256             log_err("Failed to open either %s or %s\n", newPath, backupPath);
1257         }
1258     }
1259     return result;
1260 }
1261
1262 /**
1263 * Testing the CEs returned by the iterator
1264 */
1265 static void TestCEs() {
1266     FileStream *file = NULL;
1267     char        line[2048];
1268     char       *str;
1269     UChar       codepoints[10];
1270     uint32_t    ces[20];
1271     UErrorCode  status = U_ZERO_ERROR;
1272     UCollator          *coll = ucol_open("", &status);
1273     uint32_t lineNo = 0;
1274     UChar       contextCPs[5];
1275
1276     if (U_FAILURE(status)) {
1277         log_err_status(status, "Error in opening root collator -> %s\n", u_errorName(status));
1278         return;
1279     }
1280
1281     file = getFractionalUCA();
1282
1283     if (file == NULL) {
1284         log_err("*** unable to open input FractionalUCA.txt file ***\n");
1285         return;
1286     }
1287
1288
1289     while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
1290         int                 count = 0;
1291         UCollationElements *iter;
1292         int32_t            preContextCeLen=0;
1293         lineNo++;
1294         /* skip this line if it is empty or a comment or is a return value
1295         or start of some variable section */
1296         if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
1297             line[0] == 0x000D || line[0] == '[') {
1298             continue;
1299         }
1300
1301         str = getCodePoints(line, codepoints, contextCPs);
1302
1303         /* these are 'fake' codepoints in the fractional UCA, and are used just
1304          * for positioning of indirect values. They should not go through this
1305          * test.
1306          */
1307         if(*codepoints == 0xFDD0) {
1308           continue;
1309         }
1310         if (*contextCPs != 0) {
1311             iter = ucol_openElements(coll, contextCPs, -1, &status);
1312             if (U_FAILURE(status)) {
1313                 log_err("Error in opening collation elements\n");
1314                 break;
1315             }
1316             while((ces[preContextCeLen] = ucol_next(iter, &status)) != (uint32_t)UCOL_NULLORDER) {
1317                 preContextCeLen++;
1318             }
1319             ucol_closeElements(iter);
1320         }
1321
1322         getCEs(str, ces+preContextCeLen, &status);
1323         if (U_FAILURE(status)) {
1324             log_err("Error in parsing collation elements in FractionalUCA.txt\n");
1325             break;
1326         }
1327         iter = ucol_openElements(coll, codepoints, -1, &status);
1328         if (U_FAILURE(status)) {
1329             log_err("Error in opening collation elements\n");
1330             break;
1331         }
1332         for (;;) {
1333             uint32_t ce = (uint32_t)ucol_next(iter, &status);
1334             if (ce == 0xFFFFFFFF) {
1335                 ce = 0;
1336             }
1337             /* we now unconditionally reorder Thai/Lao prevowels, so this
1338              * test would fail if we don't skip here.
1339              */
1340             if(UCOL_ISTHAIPREVOWEL(*codepoints) && ce == 0 && count == 0) {
1341               continue;
1342             }
1343             if (ce != ces[count] || U_FAILURE(status)) {
1344                 log_err("Collation elements in FractionalUCA.txt and iterators do not match!\n");
1345                 break;
1346             }
1347             if (ces[count] == 0) {
1348                 break;
1349             }
1350             count ++;
1351         }
1352         ucol_closeElements(iter);
1353     }
1354
1355     T_FileStream_close(file);
1356     ucol_close(coll);
1357 }
1358
1359 /**
1360 * Testing the discontigous contractions
1361 */
1362 static void TestDiscontiguos() {
1363     const char               *rulestr    =
1364                             "&z < AB < X\\u0300 < ABC < X\\u0300\\u0315";
1365           UChar               rule[50];
1366           int                 rulelen = u_unescape(rulestr, rule, 50);
1367     const char               *src[] = {
1368      "ADB", "ADBC", "A\\u0315B", "A\\u0315BC",
1369     /* base character blocked */
1370      "XD\\u0300", "XD\\u0300\\u0315",
1371     /* non blocking combining character */
1372      "X\\u0319\\u0300", "X\\u0319\\u0300\\u0315",
1373      /* blocking combining character */
1374      "X\\u0314\\u0300", "X\\u0314\\u0300\\u0315",
1375      /* contraction prefix */
1376      "ABDC", "AB\\u0315C","X\\u0300D\\u0315", "X\\u0300\\u0319\\u0315",
1377      "X\\u0300\\u031A\\u0315",
1378      /* ends not with a contraction character */
1379      "X\\u0319\\u0300D", "X\\u0319\\u0300\\u0315D", "X\\u0300D\\u0315D",
1380      "X\\u0300\\u0319\\u0315D", "X\\u0300\\u031A\\u0315D"
1381     };
1382     const char               *tgt[] = {
1383      /* non blocking combining character */
1384      "A D B", "A D BC", "A \\u0315 B", "A \\u0315 BC",
1385     /* base character blocked */
1386      "X D \\u0300", "X D \\u0300\\u0315",
1387     /* non blocking combining character */
1388      "X\\u0300 \\u0319", "X\\u0300\\u0315 \\u0319",
1389      /* blocking combining character */
1390      "X \\u0314 \\u0300", "X \\u0314 \\u0300\\u0315",
1391      /* contraction prefix */
1392      "AB DC", "AB \\u0315 C","X\\u0300 D \\u0315", "X\\u0300\\u0315 \\u0319",
1393      "X\\u0300 \\u031A \\u0315",
1394      /* ends not with a contraction character */
1395      "X\\u0300 \\u0319D", "X\\u0300\\u0315 \\u0319D", "X\\u0300 D\\u0315D",
1396      "X\\u0300\\u0315 \\u0319D", "X\\u0300 \\u031A\\u0315D"
1397     };
1398           int                 size   = 20;
1399           UCollator          *coll;
1400           UErrorCode          status    = U_ZERO_ERROR;
1401           int                 count     = 0;
1402           UCollationElements *iter;
1403           UCollationElements *resultiter;
1404
1405     coll       = ucol_openRules(rule, rulelen, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
1406     iter       = ucol_openElements(coll, rule, 1, &status);
1407     resultiter = ucol_openElements(coll, rule, 1, &status);
1408
1409     if (U_FAILURE(status)) {
1410         log_err_status(status, "Error opening collation rules -> %s\n", u_errorName(status));
1411         return;
1412     }
1413
1414     while (count < size) {
1415         UChar  str[20];
1416         UChar  tstr[20];
1417         int    strLen = u_unescape(src[count], str, 20);
1418         UChar *s;
1419
1420         ucol_setText(iter, str, strLen, &status);
1421         if (U_FAILURE(status)) {
1422             log_err("Error opening collation iterator\n");
1423             return;
1424         }
1425
1426         u_unescape(tgt[count], tstr, 20);
1427         s = tstr;
1428
1429         log_verbose("count %d\n", count);
1430
1431         for (;;) {
1432             uint32_t  ce;
1433             UChar    *e = u_strchr(s, 0x20);
1434             if (e == 0) {
1435                 e = u_strchr(s, 0);
1436             }
1437             ucol_setText(resultiter, s, (int32_t)(e - s), &status);
1438             ce = ucol_next(resultiter, &status);
1439             if (U_FAILURE(status)) {
1440                 log_err("Error manipulating collation iterator\n");
1441                 return;
1442             }
1443             while (ce != UCOL_NULLORDER) {
1444                 if (ce != (uint32_t)ucol_next(iter, &status) ||
1445                     U_FAILURE(status)) {
1446                     log_err("Discontiguos contraction test mismatch\n");
1447                     return;
1448                 }
1449                 ce = ucol_next(resultiter, &status);
1450                 if (U_FAILURE(status)) {
1451                     log_err("Error getting next collation element\n");
1452                     return;
1453                 }
1454             }
1455             s = e + 1;
1456             if (*e == 0) {
1457                 break;
1458             }
1459         }
1460         ucol_reset(iter);
1461         backAndForth(iter);
1462         count ++;
1463     }
1464     ucol_closeElements(resultiter);
1465     ucol_closeElements(iter);
1466     ucol_close(coll);
1467 }
1468
1469 static void TestCEBufferOverflow()
1470 {
1471     UChar               str[UCOL_EXPAND_CE_BUFFER_SIZE + 1];
1472     UErrorCode          status = U_ZERO_ERROR;
1473     UChar               rule[10];
1474     UCollator          *coll;
1475     UCollationElements *iter;
1476
1477     u_uastrcpy(rule, "&z < AB");
1478     coll = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
1479     if (U_FAILURE(status)) {
1480         log_err_status(status, "Rule based collator not created for testing ce buffer overflow -> %s\n", u_errorName(status));
1481         return;
1482     }
1483
1484     /* 0xDCDC is a trail surrogate hence deemed unsafe by the heuristic
1485     test. this will cause an overflow in getPrev */
1486     str[0] = 0x0041;    /* 'A' */
1487     /*uprv_memset(str + 1, 0xE0, sizeof(UChar) * UCOL_EXPAND_CE_BUFFER_SIZE);*/
1488     uprv_memset(str + 1, 0xDC, sizeof(UChar) * UCOL_EXPAND_CE_BUFFER_SIZE);
1489     str[UCOL_EXPAND_CE_BUFFER_SIZE] = 0x0042;   /* 'B' */
1490     iter = ucol_openElements(coll, str, UCOL_EXPAND_CE_BUFFER_SIZE + 1,
1491                              &status);
1492     if (ucol_previous(iter, &status) == UCOL_NULLORDER ||
1493         status == U_BUFFER_OVERFLOW_ERROR) {
1494         log_err("CE buffer should not overflow with long string of trail surrogates\n");
1495     }
1496     ucol_closeElements(iter);
1497     ucol_close(coll);
1498 }
1499
1500 /**
1501 * Checking collation element validity.
1502 */
1503 #define MAX_CODEPOINTS_TO_SHOW 10
1504 static void showCodepoints(const UChar *codepoints, int length, char * codepointText) {
1505     int i, lengthToUse = length;
1506     if (lengthToUse > MAX_CODEPOINTS_TO_SHOW) {
1507         lengthToUse = MAX_CODEPOINTS_TO_SHOW;
1508     }
1509     for (i = 0; i < lengthToUse; ++i) {
1510         int bytesWritten = sprintf(codepointText, " %04X", *codepoints++);
1511         if (bytesWritten <= 0) {
1512             break;
1513         }
1514         codepointText += bytesWritten;
1515     }
1516     if (i < length) {
1517         sprintf(codepointText, " ...");
1518     }
1519 }
1520
1521 static UBool checkCEValidity(const UCollator *coll, const UChar *codepoints,
1522                              int length)
1523 {
1524     UErrorCode          status = U_ZERO_ERROR;
1525     UCollationElements *iter   = ucol_openElements(coll, codepoints, length,
1526                                                   &status);
1527     UBool result = FALSE;
1528     UBool primaryDone = FALSE, secondaryDone = FALSE, tertiaryDone = FALSE;
1529     const char * collLocale;
1530
1531     if (U_FAILURE(status)) {
1532         log_err("Error creating iterator for testing validity\n");
1533         return FALSE;
1534     }
1535     collLocale = ucol_getLocale(coll, ULOC_VALID_LOCALE, &status);
1536     if (U_FAILURE(status) || collLocale==NULL) {
1537         status = U_ZERO_ERROR;
1538         collLocale = "?";
1539     }
1540
1541     for (;;) {
1542         uint32_t ce = ucol_next(iter, &status);
1543         uint32_t primary, p1, p2, secondary, tertiary;
1544         if (ce == UCOL_NULLORDER) {
1545             result = TRUE;
1546             break;
1547         }
1548         if (ce == 0) {
1549             continue;
1550         }
1551         if (ce == 0x02000202) {
1552             /* special CE for merge-sort character */
1553             if (*codepoints == 0xFFFE /* && length == 1 */) {
1554                 /*
1555                  * Note: We should check for length==1 but the token parser appears
1556                  * to give us trailing NUL characters.
1557                  * TODO: Ticket #8047: Change TestCEValidity to use ucol_getTailoredSet()
1558                  *                     rather than the internal collation rule parser
1559                  */
1560                 continue;
1561             } else {
1562                 log_err("Special 02/02/02 weight for code point U+%04X [len %d] != U+FFFE\n",
1563                         (int)*codepoints, (int)length);
1564                 break;
1565             }
1566         }
1567         primary   = UCOL_PRIMARYORDER(ce);
1568         p1 = primary >> 8;
1569         p2 = primary & 0xFF;
1570         secondary = UCOL_SECONDARYORDER(ce);
1571         tertiary  = UCOL_TERTIARYORDER(ce) & UCOL_REMOVE_CONTINUATION;
1572
1573         if (!isContinuation(ce)) {
1574             if ((ce & UCOL_REMOVE_CONTINUATION) == 0) {
1575                 log_err("Empty CE %08lX except for case bits\n", (long)ce);
1576                 break;
1577             }
1578             if (p1 == 0) {
1579                 if (p2 != 0) {
1580                     log_err("Primary 00 xx in %08lX\n", (long)ce);
1581                     break;
1582                 }
1583                 primaryDone = TRUE;
1584             } else {
1585                 if (p1 <= 2 || p1 >= 0xF0) {
1586                     /* Primary first bytes F0..FF are specials. */
1587                     log_err("Primary first byte of %08lX out of range\n", (long)ce);
1588                     break;
1589                 }
1590                 if (p2 == 0) {
1591                     primaryDone = TRUE;
1592                 } else {
1593                     if (p2 <= 3 || p2 >= 0xFF) {
1594                         /* Primary second bytes 03 and FF are sort key compression terminators. */
1595                         log_err("Primary second byte of %08lX out of range\n", (long)ce);
1596                         break;
1597                     }
1598                     primaryDone = FALSE;
1599                 }
1600             }
1601             if (secondary == 0) {
1602                 if (primary != 0) {
1603                     log_err("Primary!=0 secondary==0 in %08lX\n", (long)ce);
1604                     break;
1605                 }
1606                 secondaryDone = TRUE;
1607             } else {
1608                 if (secondary <= 2 ||
1609                     (UCOL_BYTE_COMMON < secondary && secondary <= (UCOL_BYTE_COMMON + 0x80))
1610                 ) {
1611                     /* Secondary first bytes common+1..+0x80 are used for sort key compression. */
1612                     log_err("Secondary byte of %08lX out of range\n", (long)ce);
1613                     break;
1614                 }
1615                 secondaryDone = FALSE;
1616             }
1617             if (tertiary == 0) {
1618                 /* We know that ce != 0. */
1619                 log_err("Primary!=0 or secondary!=0 but tertiary==0 in %08lX\n", (long)ce);
1620                 break;
1621             }
1622             if (tertiary <= 2) {
1623                 log_err("Tertiary byte of %08lX out of range\n", (long)ce);
1624                 break;
1625             }
1626             tertiaryDone = FALSE;
1627         } else {
1628             if ((ce & UCOL_REMOVE_CONTINUATION) == 0) {
1629                 log_err("Empty continuation %08lX\n", (long)ce);
1630                 break;
1631             }
1632             if (primaryDone && primary != 0) {
1633                 log_err("Primary was done but continues in %08lX\n", (long)ce);
1634                 break;
1635             }
1636             if (p1 == 0) {
1637                 if (p2 != 0) {
1638                     log_err("Primary 00 xx in %08lX\n", (long)ce);
1639                     break;
1640                 }
1641                 primaryDone = TRUE;
1642             } else {
1643                 if (p1 <= 2) {
1644                     log_err("Primary first byte of %08lX out of range\n", (long)ce);
1645                     break;
1646                 }
1647                 if (p2 == 0) {
1648                     primaryDone = TRUE;
1649                 } else {
1650                     if (p2 <= 3) {
1651                         log_err("Primary second byte of %08lX out of range\n", (long)ce);
1652                         break;
1653                     }
1654                 }
1655             }
1656             if (secondaryDone && secondary != 0) {
1657                 log_err("Secondary was done but continues in %08lX\n", (long)ce);
1658                 break;
1659             }
1660             if (secondary == 0) {
1661                 secondaryDone = TRUE;
1662             } else {
1663                 if (secondary <= 2) {
1664                     log_err("Secondary byte of %08lX out of range\n", (long)ce);
1665                     break;
1666                 }
1667             }
1668             if (tertiaryDone && tertiary != 0) {
1669                 log_err("Tertiary was done but continues in %08lX\n", (long)ce);
1670                 break;
1671             }
1672             if (tertiary == 0) {
1673                 tertiaryDone = TRUE;
1674             } else if (tertiary <= 2) {
1675                 log_err("Tertiary byte of %08lX out of range\n", (long)ce);
1676                 break;
1677             }
1678         }
1679     }
1680     if (!result) {
1681         char codepointText[5*MAX_CODEPOINTS_TO_SHOW + 5];
1682         showCodepoints(codepoints, length, codepointText);
1683         log_err("Locale: %s  Code point string: %s\n", collLocale, codepointText);
1684     }
1685     ucol_closeElements(iter);
1686     return result;
1687 }
1688
1689 static void TestCEValidity()
1690 {
1691     /* testing UCA collation elements */
1692     UErrorCode  status      = U_ZERO_ERROR;
1693     /* en_US has no tailorings */
1694     UCollator  *coll        = ucol_open("root", &status);
1695     /* tailored locales */
1696     char        locale[][11] = {"fr_FR", "ko_KR", "sh_YU", "th_TH", "zh_CN", "zh__PINYIN"};
1697     const char *loc;
1698     FileStream *file = NULL;
1699     char        line[2048];
1700     UChar       codepoints[11];
1701     int         count = 0;
1702     int         maxCount = 0;
1703     UChar       contextCPs[3];
1704     UChar32     c;
1705     UParseError parseError;
1706     if (U_FAILURE(status)) {
1707         log_err_status(status, "en_US collator creation failed -> %s\n", u_errorName(status));
1708         return;
1709     }
1710     log_verbose("Testing UCA elements\n");
1711     file = getFractionalUCA();
1712     if (file == NULL) {
1713         log_err("Fractional UCA data can not be opened\n");
1714         return;
1715     }
1716
1717     while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
1718         if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
1719             line[0] == 0x000D || line[0] == '[') {
1720             continue;
1721         }
1722
1723         getCodePoints(line, codepoints, contextCPs);
1724         checkCEValidity(coll, codepoints, u_strlen(codepoints));
1725     }
1726
1727     log_verbose("Testing UCA elements for the whole range of unicode characters\n");
1728     for (c = 0; c <= 0xffff; ++c) {
1729         if (u_isdefined(c)) {
1730             codepoints[0] = (UChar)c;
1731             checkCEValidity(coll, codepoints, 1);
1732         }
1733     }
1734     for (; c <= 0x10ffff; ++c) {
1735         if (u_isdefined(c)) {
1736             int32_t i = 0;
1737             U16_APPEND_UNSAFE(codepoints, i, c);
1738             checkCEValidity(coll, codepoints, i);
1739         }
1740     }
1741
1742     ucol_close(coll);
1743
1744     /* testing tailored collation elements */
1745     log_verbose("Testing tailored elements\n");
1746     if(getTestOption(QUICK_OPTION)) {
1747         maxCount = sizeof(locale)/sizeof(locale[0]);
1748     } else {
1749         maxCount = uloc_countAvailable();
1750     }
1751     while (count < maxCount) {
1752         const UChar *rules = NULL,
1753                     *current = NULL;
1754         UChar *rulesCopy = NULL;
1755         int32_t ruleLen = 0;
1756
1757         uint32_t chOffset = 0;
1758         uint32_t chLen = 0;
1759         uint32_t exOffset = 0;
1760         uint32_t exLen = 0;
1761         uint32_t prefixOffset = 0;
1762         uint32_t prefixLen = 0;
1763         UBool    startOfRules = TRUE;
1764         UColOptionSet opts;
1765
1766         UColTokenParser src;
1767         uint32_t strength = 0;
1768         uint16_t specs = 0;
1769         if(getTestOption(QUICK_OPTION)) {
1770             loc = locale[count];
1771         } else {
1772             loc = uloc_getAvailable(count);
1773             if(!hasCollationElements(loc)) {
1774                 count++;
1775                 continue;
1776             }
1777         }
1778
1779         uprv_memset(&src, 0, sizeof(UColTokenParser));
1780
1781         log_verbose("Testing CEs for %s\n", loc);
1782
1783         coll      = ucol_open(loc, &status);
1784         if (U_FAILURE(status)) {
1785             log_err("%s collator creation failed\n", loc);
1786             return;
1787         }
1788
1789         src.opts = &opts;
1790         rules = ucol_getRules(coll, &ruleLen);
1791
1792         if (ruleLen > 0) {
1793             rulesCopy = (UChar *)uprv_malloc((ruleLen +
1794                 UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar));
1795             uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar));
1796             src.current = src.source = rulesCopy;
1797             src.end = rulesCopy + ruleLen;
1798             src.extraCurrent = src.end;
1799             src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
1800
1801                 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
1802                    the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
1803             while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,&status)) != NULL) {
1804               strength = src.parsedToken.strength;
1805               chOffset = src.parsedToken.charsOffset;
1806               chLen = src.parsedToken.charsLen;
1807               exOffset = src.parsedToken.extensionOffset;
1808               exLen = src.parsedToken.extensionLen;
1809               prefixOffset = src.parsedToken.prefixOffset;
1810               prefixLen = src.parsedToken.prefixLen;
1811               specs = src.parsedToken.flags;
1812
1813                 startOfRules = FALSE;
1814                 uprv_memcpy(codepoints, src.source + chOffset,
1815                                                        chLen * sizeof(UChar));
1816                 codepoints[chLen] = 0;
1817                 checkCEValidity(coll, codepoints, chLen);
1818             }
1819             uprv_free(src.source);
1820         }
1821
1822         ucol_close(coll);
1823         count ++;
1824     }
1825     T_FileStream_close(file);
1826 }
1827
1828 static void printSortKeyError(const UChar   *codepoints, int length,
1829                                     uint8_t *sortkey, int sklen)
1830 {
1831     int count = 0;
1832     log_err("Sortkey not valid for ");
1833     while (length > 0) {
1834         log_err("0x%04x ", *codepoints);
1835         length --;
1836         codepoints ++;
1837     }
1838     log_err("\nSortkey : ");
1839     while (count < sklen) {
1840         log_err("0x%02x ", sortkey[count]);
1841         count ++;
1842     }
1843     log_err("\n");
1844 }
1845
1846 /**
1847 * Checking sort key validity for all levels
1848 */
1849 static UBool checkSortKeyValidity(UCollator *coll,
1850                                   const UChar *codepoints,
1851                                   int length)
1852 {
1853     UErrorCode status  = U_ZERO_ERROR;
1854     UCollationStrength strength[5] = {UCOL_PRIMARY, UCOL_SECONDARY,
1855                                       UCOL_TERTIARY, UCOL_QUATERNARY,
1856                                       UCOL_IDENTICAL};
1857     int        strengthlen = 5;
1858     int        strengthIndex = 0;
1859     int        caselevel   = 0;
1860
1861     while (caselevel < 1) {
1862         if (caselevel == 0) {
1863             ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_OFF, &status);
1864         }
1865         else {
1866             ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_ON, &status);
1867         }
1868
1869         while (strengthIndex < strengthlen) {
1870             int        count01 = 0;
1871             uint32_t   count   = 0;
1872             uint8_t    sortkey[128];
1873             uint32_t   sklen;
1874
1875             ucol_setStrength(coll, strength[strengthIndex]);
1876             sklen = ucol_getSortKey(coll, codepoints, length, sortkey, 128);
1877             while (sortkey[count] != 0) {
1878                 if (sortkey[count] == 2 || (sortkey[count] == 3 && count01 > 0 && strengthIndex != 4)) {
1879                     printSortKeyError(codepoints, length, sortkey, sklen);
1880                     return FALSE;
1881                 }
1882                 if (sortkey[count] == 1) {
1883                     count01 ++;
1884                 }
1885                 count ++;
1886             }
1887
1888             if (count + 1 != sklen || (count01 != strengthIndex + caselevel)) {
1889                 printSortKeyError(codepoints, length, sortkey, sklen);
1890                 return FALSE;
1891             }
1892             strengthIndex ++;
1893         }
1894         caselevel ++;
1895     }
1896     return TRUE;
1897 }
1898
1899 static void TestSortKeyValidity(void)
1900 {
1901     /* testing UCA collation elements */
1902     UErrorCode  status      = U_ZERO_ERROR;
1903     /* en_US has no tailorings */
1904     UCollator  *coll        = ucol_open("en_US", &status);
1905     /* tailored locales */
1906     char        locale[][6] = {"fr_FR", "ko_KR", "sh_YU", "th_TH", "zh_CN"};
1907     FileStream *file = NULL;
1908     char        line[2048];
1909     UChar       codepoints[10];
1910     int         count = 0;
1911     UChar       contextCPs[5];
1912     UParseError parseError;
1913     if (U_FAILURE(status)) {
1914         log_err_status(status, "en_US collator creation failed -> %s\n", u_errorName(status));
1915         return;
1916     }
1917     log_verbose("Testing UCA elements\n");
1918     file = getFractionalUCA();
1919     if (file == NULL) {
1920         log_err("Fractional UCA data can not be opened\n");
1921         return;
1922     }
1923
1924     while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
1925         if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
1926             line[0] == 0x000D || line[0] == '[') {
1927             continue;
1928         }
1929
1930         getCodePoints(line, codepoints, contextCPs);
1931         if(codepoints[0] == 0xFFFE) {
1932             /* Skip special merge-sort character U+FFFE which has otherwise illegal 02 weight bytes. */
1933             continue;
1934         }
1935         checkSortKeyValidity(coll, codepoints, u_strlen(codepoints));
1936     }
1937
1938     log_verbose("Testing UCA elements for the whole range of unicode characters\n");
1939     codepoints[0] = 0;
1940
1941     while (codepoints[0] < 0xFFFF) {
1942         if (u_isdefined((UChar32)codepoints[0])) {
1943             checkSortKeyValidity(coll, codepoints, 1);
1944         }
1945         codepoints[0] ++;
1946     }
1947
1948     ucol_close(coll);
1949
1950     /* testing tailored collation elements */
1951     log_verbose("Testing tailored elements\n");
1952     while (count < 5) {
1953         const UChar *rules = NULL,
1954                     *current = NULL;
1955         UChar *rulesCopy = NULL;
1956         int32_t ruleLen = 0;
1957
1958         uint32_t chOffset = 0;
1959         uint32_t chLen = 0;
1960         uint32_t exOffset = 0;
1961         uint32_t exLen = 0;
1962         uint32_t prefixOffset = 0;
1963         uint32_t prefixLen = 0;
1964         UBool    startOfRules = TRUE;
1965         UColOptionSet opts;
1966
1967         UColTokenParser src;
1968         uint32_t strength = 0;
1969         uint16_t specs = 0;
1970
1971         uprv_memset(&src, 0, sizeof(UColTokenParser));
1972
1973         coll      = ucol_open(locale[count], &status);
1974         if (U_FAILURE(status)) {
1975             log_err("%s collator creation failed\n", locale[count]);
1976             return;
1977         }
1978
1979         src.opts = &opts;
1980         rules = ucol_getRules(coll, &ruleLen);
1981
1982         if (ruleLen > 0) {
1983             rulesCopy = (UChar *)uprv_malloc((ruleLen +
1984                 UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar));
1985             uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar));
1986             src.current = src.source = rulesCopy;
1987             src.end = rulesCopy + ruleLen;
1988             src.extraCurrent = src.end;
1989             src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
1990
1991                 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
1992                    the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
1993             while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, &status)) != NULL) {
1994                 strength = src.parsedToken.strength;
1995                 chOffset = src.parsedToken.charsOffset;
1996                 chLen = src.parsedToken.charsLen;
1997                 exOffset = src.parsedToken.extensionOffset;
1998                 exLen = src.parsedToken.extensionLen;
1999                 prefixOffset = src.parsedToken.prefixOffset;
2000                 prefixLen = src.parsedToken.prefixLen;
2001                 specs = src.parsedToken.flags;
2002
2003                 startOfRules = FALSE;
2004                 uprv_memcpy(codepoints, src.source + chOffset,
2005                                                        chLen * sizeof(UChar));
2006                 codepoints[chLen] = 0;
2007                 if(codepoints[0] == 0xFFFE) {
2008                     /* Skip special merge-sort character U+FFFE which has otherwise illegal 02 weight bytes. */
2009                     continue;
2010                 }
2011                 checkSortKeyValidity(coll, codepoints, chLen);
2012             }
2013             uprv_free(src.source);
2014         }
2015
2016         ucol_close(coll);
2017         count ++;
2018     }
2019     T_FileStream_close(file);
2020 }
2021
2022 /**
2023 * TestSearchCollatorElements tests iterator behavior (forwards and backwards) with
2024 * normalization on AND jamo tailoring, among other things.
2025 */
2026 static const UChar tsceText[] = {   /* Nothing in here should be ignorable */
2027     0x0020, 0xAC00,                 /* simple LV Hangul */
2028     0x0020, 0xAC01,                 /* simple LVT Hangul */
2029     0x0020, 0xAC0F,                 /* LVTT, last jamo expands for search */
2030     0x0020, 0xAFFF,                 /* LLVVVTT, every jamo expands for search */
2031     0x0020, 0x1100, 0x1161, 0x11A8, /* 0xAC01 as conjoining jamo */
2032     0x0020, 0x3131, 0x314F, 0x3131, /* 0xAC01 as compatibility jamo */
2033     0x0020, 0x1100, 0x1161, 0x11B6, /* 0xAC0F as conjoining jamo; last expands for search */
2034     0x0020, 0x1101, 0x1170, 0x11B6, /* 0xAFFF as conjoining jamo; all expand for search */
2035     0x0020, 0x00E6,                 /* small letter ae, expands */
2036     0x0020, 0x1E4D,                 /* small letter o with tilde and acute, decomposes */
2037     0x0020
2038 };
2039 enum { kLen_tsceText = sizeof(tsceText)/sizeof(tsceText[0]) };
2040
2041 static const int32_t rootStandardOffsets[] = {
2042     0,  1,2,
2043     2,  3,4,4,
2044     4,  5,6,6,
2045     6,  7,8,8,
2046     8,  9,10,11,
2047     12, 13,14,15,
2048     16, 17,18,19,
2049     20, 21,22,23,
2050     24, 25,26,26,26,
2051     26, 27,28,28,
2052     28,
2053     29
2054 };
2055 enum { kLen_rootStandardOffsets = sizeof(rootStandardOffsets)/sizeof(rootStandardOffsets[0]) };
2056
2057 static const int32_t rootSearchOffsets[] = {
2058     0,  1,2,
2059     2,  3,4,4,
2060     4,  5,6,6,6,
2061     6,  7,8,8,8,8,8,8,
2062     8,  9,10,11,
2063     12, 13,14,15,
2064     16, 17,18,19,20,
2065     20, 21,22,22,23,23,23,24,
2066     24, 25,26,26,26,
2067     26, 27,28,28,
2068     28,
2069     29
2070 };
2071 enum { kLen_rootSearchOffsets = sizeof(rootSearchOffsets)/sizeof(rootSearchOffsets[0]) };
2072
2073 typedef struct {
2074     const char *    locale;
2075     const int32_t * offsets;
2076     int32_t         offsetsLen;
2077 } TSCEItem;
2078
2079 static const TSCEItem tsceItems[] = {
2080     { "root",                  rootStandardOffsets, kLen_rootStandardOffsets },
2081 #if 1
2082     /* No jamo tailorings in Apple version of search collator currently */
2083     { "root@collation=search", rootStandardOffsets, kLen_rootStandardOffsets },
2084 #else
2085     /* Use this when we do have jamo tailorings */
2086     { "root@collation=search", rootSearchOffsets,   kLen_rootSearchOffsets   },
2087 #endif
2088     { NULL,                    NULL,                0                        }
2089 };
2090
2091 static void TestSearchCollatorElements(void)
2092 {
2093     const TSCEItem * tsceItemPtr;
2094     for (tsceItemPtr = tsceItems; tsceItemPtr->locale != NULL; tsceItemPtr++) {
2095         UErrorCode status = U_ZERO_ERROR;
2096         UCollator* ucol = ucol_open(tsceItemPtr->locale, &status);
2097         if ( U_SUCCESS(status) ) {
2098             UCollationElements * uce = ucol_openElements(ucol, tsceText, kLen_tsceText, &status);
2099             if ( U_SUCCESS(status) ) {
2100                 int32_t offset, element;
2101                 const int32_t * nextOffsetPtr;
2102                 const int32_t * limitOffsetPtr;
2103
2104                 nextOffsetPtr = tsceItemPtr->offsets;
2105                 limitOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen;
2106                 do {
2107                     offset = ucol_getOffset(uce);
2108                     element = ucol_next(uce, &status);
2109                     if ( element == 0 ) {
2110                         log_err("error, locale %s, ucol_next returned element 0\n", tsceItemPtr->locale );
2111                     }
2112                     if ( nextOffsetPtr < limitOffsetPtr ) {
2113                         if (offset != *nextOffsetPtr) {
2114                             log_err("error, locale %s, expected ucol_next -> ucol_getOffset %d, got %d\n",
2115                                                             tsceItemPtr->locale, *nextOffsetPtr, offset );
2116                             nextOffsetPtr = limitOffsetPtr;
2117                             break;
2118                         }
2119                         nextOffsetPtr++;
2120                     } else {
2121                         log_err("error, locale %s, ucol_next returned more elements than expected\n", tsceItemPtr->locale );
2122                     }
2123                 } while ( U_SUCCESS(status) && element != UCOL_NULLORDER );
2124                 if ( nextOffsetPtr < limitOffsetPtr ) {
2125                     log_err("error, locale %s, ucol_next returned fewer elements than expected\n", tsceItemPtr->locale );
2126                 }
2127
2128                 ucol_setOffset(uce, kLen_tsceText, &status);
2129                 status = U_ZERO_ERROR;
2130                 nextOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen;
2131                 limitOffsetPtr = tsceItemPtr->offsets;
2132                 do {
2133                     offset = ucol_getOffset(uce);
2134                     element = ucol_previous(uce, &status);
2135                     if ( element == 0 ) {
2136                         log_err("error, locale %s, ucol_previous returned element 0\n", tsceItemPtr->locale );
2137                     }
2138                     if ( nextOffsetPtr > limitOffsetPtr ) {
2139                         nextOffsetPtr--;
2140                         if (offset != *nextOffsetPtr) {
2141                             log_err("error, locale %s, expected ucol_previous -> ucol_getOffset %d, got %d\n",
2142                                                                 tsceItemPtr->locale, *nextOffsetPtr, offset );
2143                             nextOffsetPtr = limitOffsetPtr;
2144                             break;
2145                         }
2146                    } else {
2147                         log_err("error, locale %s, ucol_previous returned more elements than expected\n", tsceItemPtr->locale );
2148                     }
2149                 } while ( U_SUCCESS(status) && element != UCOL_NULLORDER );
2150                 if ( nextOffsetPtr > limitOffsetPtr ) {
2151                     log_err("error, locale %s, ucol_previous returned fewer elements than expected\n", tsceItemPtr->locale );
2152                 }
2153
2154                 ucol_closeElements(uce);
2155             } else {
2156                 log_err("error, locale %s, ucol_openElements failed: %s\n", tsceItemPtr->locale, u_errorName(status) );
2157             }
2158             ucol_close(ucol);
2159         } else {
2160             log_err("error, locale %s, ucol_open failed: %s\n", tsceItemPtr->locale, u_errorName(status) );
2161         }
2162     }
2163 }
2164
2165 #endif /* #if !UCONFIG_NO_COLLATION */