icuSources/test/cintltst/citertst.c

   1 /********************************************************************
   2  * COPYRIGHT:
   3  * Copyright (c) 1997-2003, International Business Machines Corporation and
   4  * others. All Rights Reserved.
   5  ********************************************************************/
   6 /********************************************************************************
   7 *
   8 * File CITERTST.C
   9 *
  10 * Modification History:
  11 * Date      Name               Description
  12 *           Madhu Katragadda   Ported for C API
  13 * 02/19/01  synwee             Modified test case for new collation iterator
  14 *********************************************************************************/
  15 /*
  16  * Collation Iterator tests.
  17  * (Let me reiterate my position...)
  18  */
  19
  20 #include "unicode/utypes.h"
  21
  22 #if !UCONFIG_NO_COLLATION
  23
  24 #include "unicode/ucol.h"
  25 #include "unicode/uloc.h"
  26 #include "unicode/uchar.h"
  27 #include "unicode/ustring.h"
  28 #include "cmemory.h"
  29 #include "cintltst.h"
  30 #include "citertst.h"
  31 #include "ccolltst.h"
  32 #include "filestrm.h"
  33 #include "cstring.h"
  34 #include "ucol_imp.h"
  35 #include "ucol_tok.h"
  36 #include <stdio.h>
  37
  38 extern uint8_t ucol_uprv_getCaseBits(const UChar *, uint32_t, UErrorCode *);
  39
  40 void addCollIterTest(TestNode** root)
  41 {
  42     addTest(root, &TestPrevious, "tscoll/citertst/TestPrevious");
  43     addTest(root, &TestOffset, "tscoll/citertst/TestOffset");
  44     addTest(root, &TestSetText, "tscoll/citertst/TestSetText");
  45     addTest(root, &TestMaxExpansion, "tscoll/citertst/TestMaxExpansion");
  46     addTest(root, &TestUnicodeChar, "tscoll/citertst/TestUnicodeChar");
  47     addTest(root, &TestNormalizedUnicodeChar,
  48                                 "tscoll/citertst/TestNormalizedUnicodeChar");
  49     addTest(root, &TestNormalization, "tscoll/citertst/TestNormalization");
  50     addTest(root, &TestBug672, "tscoll/citertst/TestBug672");
  51     addTest(root, &TestBug672Normalize, "tscoll/citertst/TestBug672Normalize");
  52     addTest(root, &TestSmallBuffer, "tscoll/citertst/TestSmallBuffer");
  53     addTest(root, &TestCEs, "tscoll/citertst/TestCEs");
  54     addTest(root, &TestDiscontiguos, "tscoll/citertst/TestDiscontiguos");
  55     addTest(root, &TestCEBufferOverflow, "tscoll/citertst/TestCEBufferOverflow");
  56     addTest(root, &TestCEValidity, "tscoll/citertst/TestCEValidity");
  57     addTest(root, &TestSortKeyValidity, "tscoll/citertst/TestSortKeyValidity");
  58 }
  59
  60 /* The locales we support */
  61
  62 static const char * LOCALES[] = {"en_AU", "en_BE", "en_CA"};
  63
  64 static void TestBug672() {
  65     UErrorCode  status = U_ZERO_ERROR;
  66     UChar       pattern[20];
  67     UChar       text[50];
  68     int         i;
  69     int         result[3][3];
  70
  71     u_uastrcpy(pattern, "resume");
  72     u_uastrcpy(text, "Time to resume updating my resume.");
  73
  74     for (i = 0; i < 3; ++ i) {
  75         UCollator          *coll = ucol_open(LOCALES[i], &status);
  76         UCollationElements *pitr = ucol_openElements(coll, pattern, -1,
  77                                                      &status);
  78         UCollationElements *titer = ucol_openElements(coll, text, -1,
  79                                                      &status);
  80         if (U_FAILURE(status)) {
  81             log_err("ERROR: in creation of either the collator or the collation iterator :%s\n",
  82                     myErrorName(status));
  83             return;
  84         }
  85
  86         log_verbose("locale tested %s\n", LOCALES[i]);
  87
  88         while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
  89                U_SUCCESS(status)) {
  90         }
  91         if (U_FAILURE(status)) {
  92             log_err("ERROR: reversing collation iterator :%s\n",
  93                     myErrorName(status));
  94             return;
  95         }
  96         ucol_reset(pitr);
  97
  98         ucol_setOffset(titer, u_strlen(pattern), &status);
  99         if (U_FAILURE(status)) {
 100             log_err("ERROR: setting offset in collator :%s\n",
 101                     myErrorName(status));
 102             return;
 103         }
 104         result[i][0] = ucol_getOffset(titer);
 105         log_verbose("Text iterator set to offset %d\n", result[i][0]);
 106
 107         /* Use previous() */
 108         ucol_previous(titer, &status);
 109         result[i][1] = ucol_getOffset(titer);
 110         log_verbose("Current offset %d after previous\n", result[i][1]);
 111
 112         /* Add one to index */
 113         log_verbose("Adding one to current offset...\n");
 114         ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
 115         if (U_FAILURE(status)) {
 116             log_err("ERROR: setting offset in collator :%s\n",
 117                     myErrorName(status));
 118             return;
 119         }
 120         result[i][2] = ucol_getOffset(titer);
 121         log_verbose("Current offset in text = %d\n", result[i][2]);
 122         ucol_closeElements(pitr);
 123         ucol_closeElements(titer);
 124         ucol_close(coll);
 125     }
 126
 127     if (uprv_memcmp(result[0], result[1], 3) != 0 ||
 128         uprv_memcmp(result[1], result[2], 3) != 0) {
 129         log_err("ERROR: Different locales have different offsets at the same character\n");
 130     }
 131 }
 132
 133
 134
 135 /*  Running this test with normalization enabled showed up a bug in the incremental
 136     normalization code. */
 137 static void TestBug672Normalize() {
 138     UErrorCode  status = U_ZERO_ERROR;
 139     UChar       pattern[20];
 140     UChar       text[50];
 141     int         i;
 142     int         result[3][3];
 143
 144     u_uastrcpy(pattern, "resume");
 145     u_uastrcpy(text, "Time to resume updating my resume.");
 146
 147     for (i = 0; i < 3; ++ i) {
 148         UCollator          *coll = ucol_open(LOCALES[i], &status);
 149         UCollationElements *pitr = NULL;
 150         UCollationElements *titer = NULL;
 151
 152         ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
 153
 154         pitr = ucol_openElements(coll, pattern, -1, &status);
 155         titer = ucol_openElements(coll, text, -1, &status);
 156         if (U_FAILURE(status)) {
 157             log_err("ERROR: in creation of either the collator or the collation iterator :%s\n",
 158                     myErrorName(status));
 159             return;
 160         }
 161
 162         log_verbose("locale tested %s\n", LOCALES[i]);
 163
 164         while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
 165                U_SUCCESS(status)) {
 166         }
 167         if (U_FAILURE(status)) {
 168             log_err("ERROR: reversing collation iterator :%s\n",
 169                     myErrorName(status));
 170             return;
 171         }
 172         ucol_reset(pitr);
 173
 174         ucol_setOffset(titer, u_strlen(pattern), &status);
 175         if (U_FAILURE(status)) {
 176             log_err("ERROR: setting offset in collator :%s\n",
 177                     myErrorName(status));
 178             return;
 179         }
 180         result[i][0] = ucol_getOffset(titer);
 181         log_verbose("Text iterator set to offset %d\n", result[i][0]);
 182
 183         /* Use previous() */
 184         ucol_previous(titer, &status);
 185         result[i][1] = ucol_getOffset(titer);
 186         log_verbose("Current offset %d after previous\n", result[i][1]);
 187
 188         /* Add one to index */
 189         log_verbose("Adding one to current offset...\n");
 190         ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
 191         if (U_FAILURE(status)) {
 192             log_err("ERROR: setting offset in collator :%s\n",
 193                     myErrorName(status));
 194             return;
 195         }
 196         result[i][2] = ucol_getOffset(titer);
 197         log_verbose("Current offset in text = %d\n", result[i][2]);
 198         ucol_closeElements(pitr);
 199         ucol_closeElements(titer);
 200         ucol_close(coll);
 201     }
 202
 203     if (uprv_memcmp(result[0], result[1], 3) != 0 ||
 204         uprv_memcmp(result[1], result[2], 3) != 0) {
 205         log_err("ERROR: Different locales have different offsets at the same character\n");
 206     }
 207 }
 208
 209
 210
 211
 212 /**
 213  * Test for CollationElementIterator previous and next for the whole set of
 214  * unicode characters.
 215  */
 216 static void TestUnicodeChar()
 217 {
 218     UChar source[0x100];
 219     UCollator *en_us;
 220     UCollationElements *iter;
 221     UErrorCode status = U_ZERO_ERROR;
 222     UChar codepoint;
 223
 224     UChar *test;
 225     en_us = ucol_open("en_US", &status);
 226     if (U_FAILURE(status)){
 227        log_err("ERROR: in creation of collation data using ucol_open()\n %s\n",
 228               myErrorName(status));
 229        return;
 230     }
 231
 232     for (codepoint = 1; codepoint < 0xFFFE;)
 233     {
 234       test = source;
 235
 236       while (codepoint % 0xFF != 0)
 237       {
 238         if (u_isdefined(codepoint))
 239           *(test ++) = codepoint;
 240         codepoint ++;
 241       }
 242
 243       if (u_isdefined(codepoint))
 244         *(test ++) = codepoint;
 245
 246       if (codepoint != 0xFFFF)
 247         codepoint ++;
 248
 249       *test = 0;
 250       iter=ucol_openElements(en_us, source, u_strlen(source), &status);
 251       if(U_FAILURE(status)){
 252           log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
 253               myErrorName(status));
 254           ucol_close(en_us);
 255           return;
 256       }
 257       /* A basic test to see if it's working at all */
 258       log_verbose("codepoint testing %x\n", codepoint);
 259       backAndForth(iter);
 260       ucol_closeElements(iter);
 261
 262       /* null termination test */
 263       iter=ucol_openElements(en_us, source, -1, &status);
 264       if(U_FAILURE(status)){
 265           log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
 266               myErrorName(status));
 267           ucol_close(en_us);
 268           return;
 269       }
 270       /* A basic test to see if it's working at all */
 271       backAndForth(iter);
 272       ucol_closeElements(iter);
 273     }
 274
 275     ucol_close(en_us);
 276 }
 277
 278 /**
 279  * Test for CollationElementIterator previous and next for the whole set of
 280  * unicode characters with normalization on.
 281  */
 282 static void TestNormalizedUnicodeChar()
 283 {
 284     UChar source[0x100];
 285     UCollator *th_th;
 286     UCollationElements *iter;
 287     UErrorCode status = U_ZERO_ERROR;
 288     UChar codepoint;
 289
 290     UChar *test;
 291     /* thai should have normalization on */
 292     th_th = ucol_open("th_TH", &status);
 293     if (U_FAILURE(status)){
 294         log_err("ERROR: in creation of thai collation using ucol_open()\n %s\n",
 295               myErrorName(status));
 296         return;
 297     }
 298
 299     for (codepoint = 1; codepoint < 0xFFFE;)
 300     {
 301       test = source;
 302
 303       while (codepoint % 0xFF != 0)
 304       {
 305         if (u_isdefined(codepoint))
 306           *(test ++) = codepoint;
 307         codepoint ++;
 308       }
 309
 310       if (u_isdefined(codepoint))
 311         *(test ++) = codepoint;
 312
 313       if (codepoint != 0xFFFF)
 314         codepoint ++;
 315
 316       *test = 0;
 317       iter=ucol_openElements(th_th, source, u_strlen(source), &status);
 318       if(U_FAILURE(status)){
 319           log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
 320               myErrorName(status));
 321             ucol_close(th_th);
 322           return;
 323       }
 324
 325       backAndForth(iter);
 326       ucol_closeElements(iter);
 327
 328       iter=ucol_openElements(th_th, source, -1, &status);
 329       if(U_FAILURE(status)){
 330           log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
 331               myErrorName(status));
 332             ucol_close(th_th);
 333           return;
 334       }
 335
 336       backAndForth(iter);
 337       ucol_closeElements(iter);
 338     }
 339
 340     ucol_close(th_th);
 341 }
 342
 343 /**
 344 * Test the incremental normalization
 345 */
 346 static void TestNormalization()
 347 {
 348           UErrorCode          status = U_ZERO_ERROR;
 349     const char               *str    =
 350                             "&a < \\u0300\\u0315 < A\\u0300\\u0315 < \\u0316\\u0315B < \\u0316\\u0300\\u0315";
 351           UCollator          *coll;
 352           UChar               rule[50];
 353           int                 rulelen = u_unescape(str, rule, 50);
 354           int                 count = 0;
 355     const char                *testdata[] =
 356                         {"\\u1ED9", "o\\u0323\\u0302",
 357                         "\\u0300\\u0315", "\\u0315\\u0300",
 358                         "A\\u0300\\u0315B", "A\\u0315\\u0300B",
 359                         "A\\u0316\\u0315B", "A\\u0315\\u0316B",
 360                         "\\u0316\\u0300\\u0315", "\\u0315\\u0300\\u0316",
 361                         "A\\u0316\\u0300\\u0315B", "A\\u0315\\u0300\\u0316B",
 362                         "\\u0316\\u0315\\u0300", "A\\u0316\\u0315\\u0300B"};
 363     int32_t   srclen;
 364     UChar source[10];
 365     UCollationElements *iter;
 366
 367     coll = ucol_openRules(rule, rulelen, UCOL_ON, UCOL_TERTIARY, NULL, &status);
 368     ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
 369     if (U_FAILURE(status)){
 370         log_err("ERROR: in creation of collator using ucol_openRules()\n %s\n",
 371               myErrorName(status));
 372         return;
 373     }
 374
 375     srclen = u_unescape(testdata[0], source, 10);
 376     iter = ucol_openElements(coll, source, srclen, &status);
 377     backAndForth(iter);
 378     ucol_closeElements(iter);
 379
 380     srclen = u_unescape(testdata[1], source, 10);
 381     iter = ucol_openElements(coll, source, srclen, &status);
 382     backAndForth(iter);
 383     ucol_closeElements(iter);
 384
 385     while (count < 12) {
 386         srclen = u_unescape(testdata[count], source, 10);
 387         iter = ucol_openElements(coll, source, srclen, &status);
 388
 389         if (U_FAILURE(status)){
 390             log_err("ERROR: in creation of collator element iterator\n %s\n",
 391                   myErrorName(status));
 392             return;
 393         }
 394         backAndForth(iter);
 395         ucol_closeElements(iter);
 396
 397         iter = ucol_openElements(coll, source, -1, &status);
 398
 399         if (U_FAILURE(status)){
 400             log_err("ERROR: in creation of collator element iterator\n %s\n",
 401                   myErrorName(status));
 402             return;
 403         }
 404         backAndForth(iter);
 405         ucol_closeElements(iter);
 406         count ++;
 407     }
 408     ucol_close(coll);
 409 }
 410
 411 /**
 412  * Test for CollationElementIterator.previous()
 413  *
 414  * @bug 4108758 - Make sure it works with contracting characters
 415  *
 416  */
 417 static void TestPrevious()
 418 {
 419     UCollator *coll=NULL;
 420     UChar rule[50];
 421     UChar *source;
 422     UCollator *c1, *c2, *c3;
 423     UCollationElements *iter;
 424     UErrorCode status = U_ZERO_ERROR;
 425
 426     test1=(UChar*)malloc(sizeof(UChar) * 50);
 427     test2=(UChar*)malloc(sizeof(UChar) * 50);
 428     u_uastrcpy(test1, "What subset of all possible test cases?");
 429     u_uastrcpy(test2, "has the highest probability of detecting");
 430     coll = ucol_open("en_US", &status);
 431
 432     iter=ucol_openElements(coll, test1, u_strlen(test1), &status);
 433     log_verbose("English locale testing back and forth\n");
 434     if(U_FAILURE(status)){
 435         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
 436             myErrorName(status));
 437         ucol_close(coll);
 438         return;
 439     }
 440     /* A basic test to see if it's working at all */
 441     backAndForth(iter);
 442     ucol_closeElements(iter);
 443     ucol_close(coll);
 444
 445     /* Test with a contracting character sequence */
 446     u_uastrcpy(rule, "&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH");
 447     c1 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
 448
 449     log_verbose("Contraction rule testing back and forth with no normalization\n");
 450
 451     if (c1 == NULL || U_FAILURE(status))
 452     {
 453         log_err("Couldn't create a RuleBasedCollator with a contracting sequence\n %s\n",
 454             myErrorName(status));
 455         return;
 456     }
 457     source=(UChar*)malloc(sizeof(UChar) * 20);
 458     u_uastrcpy(source, "abchdcba");
 459     iter=ucol_openElements(c1, source, u_strlen(source), &status);
 460     if(U_FAILURE(status)){
 461         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
 462             myErrorName(status));
 463         return;
 464     }
 465     backAndForth(iter);
 466     ucol_closeElements(iter);
 467     ucol_close(c1);
 468
 469     /* Test with an expanding character sequence */
 470     u_uastrcpy(rule, "&a < b < c/abd < d");
 471     c2 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
 472     log_verbose("Expansion rule testing back and forth with no normalization\n");
 473     if (c2 == NULL || U_FAILURE(status))
 474     {
 475         log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
 476             myErrorName(status));
 477         return;
 478     }
 479     u_uastrcpy(source, "abcd");
 480     iter=ucol_openElements(c2, source, u_strlen(source), &status);
 481     if(U_FAILURE(status)){
 482         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
 483             myErrorName(status));
 484         return;
 485     }
 486     backAndForth(iter);
 487     ucol_closeElements(iter);
 488     ucol_close(c2);
 489     /* Now try both */
 490     u_uastrcpy(rule, "&a < b < c/aba < d < z < ch");
 491     c3 = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,  UCOL_DEFAULT_STRENGTH,NULL, &status);
 492     log_verbose("Expansion/contraction rule testing back and forth with no normalization\n");
 493
 494     if (c3 == NULL || U_FAILURE(status))
 495     {
 496         log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
 497             myErrorName(status));
 498         return;
 499     }
 500     u_uastrcpy(source, "abcdbchdc");
 501     iter=ucol_openElements(c3, source, u_strlen(source), &status);
 502     if(U_FAILURE(status)){
 503         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
 504             myErrorName(status));
 505         return;
 506     }
 507     backAndForth(iter);
 508     ucol_closeElements(iter);
 509     ucol_close(c3);
 510     source[0] = 0x0e41;
 511     source[1] = 0x0e02;
 512     source[2] = 0x0e41;
 513     source[3] = 0x0e02;
 514     source[4] = 0x0e27;
 515     source[5] = 0x61;
 516     source[6] = 0x62;
 517     source[7] = 0x63;
 518     source[8] = 0;
 519
 520     coll = ucol_open("th_TH", &status);
 521     log_verbose("Thai locale testing back and forth with normalization\n");
 522     iter=ucol_openElements(coll, source, u_strlen(source), &status);
 523     if(U_FAILURE(status)){
 524         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
 525             myErrorName(status));
 526         return;
 527     }
 528     backAndForth(iter);
 529     ucol_closeElements(iter);
 530     ucol_close(coll);
 531
 532     /* prev test */
 533     source[0] = 0x0061;
 534     source[1] = 0x30CF;
 535     source[2] = 0x3099;
 536     source[3] = 0x30FC;
 537     source[4] = 0;
 538
 539     coll = ucol_open("ja_JP", &status);
 540     log_verbose("Japanese locale testing back and forth with normalization\n");
 541     iter=ucol_openElements(coll, source, u_strlen(source), &status);
 542     if(U_FAILURE(status)){
 543         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
 544             myErrorName(status));
 545         return;
 546     }
 547     backAndForth(iter);
 548     ucol_closeElements(iter);
 549     ucol_close(coll);
 550
 551     free(source);
 552     free(test1);
 553     free(test2);
 554 }
 555
 556 /**
 557  * Test for getOffset() and setOffset()
 558  */
 559 static void TestOffset()
 560 {
 561     UErrorCode status= U_ZERO_ERROR;
 562     UCollator *en_us=NULL;
 563     UCollationElements *iter, *pristine;
 564     int32_t offset;
 565     int32_t *orders;
 566     int32_t orderLength=0;
 567     int     count = 0;
 568     test1=(UChar*)malloc(sizeof(UChar) * 50);
 569     test2=(UChar*)malloc(sizeof(UChar) * 50);
 570     u_uastrcpy(test1, "What subset of all possible test cases?");
 571     u_uastrcpy(test2, "has the highest probability of detecting");
 572     en_us = ucol_open("en_US", &status);
 573     log_verbose("Testing getOffset and setOffset for CollationElements\n");
 574     iter = ucol_openElements(en_us, test1, u_strlen(test1), &status);
 575     if(U_FAILURE(status)){
 576         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
 577             myErrorName(status));
 578         ucol_close(en_us);
 579         return;
 580     }
 581     /* Run all the way through the iterator, then get the offset */
 582
 583     orders = getOrders(iter, &orderLength);
 584
 585     offset = ucol_getOffset(iter);
 586
 587     if (offset != u_strlen(test1))
 588     {
 589         log_err("offset at end != length %d vs %d\n", offset,
 590             u_strlen(test1) );
 591     }
 592
 593     /* Now set the offset back to the beginning and see if it works */
 594     pristine=ucol_openElements(en_us, test1, u_strlen(test1), &status);
 595     if(U_FAILURE(status)){
 596         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
 597             myErrorName(status));
 598     ucol_close(en_us);
 599         return;
 600     }
 601     status = U_ZERO_ERROR;
 602
 603     ucol_setOffset(iter, 0, &status);
 604     if (U_FAILURE(status))
 605     {
 606         log_err("setOffset failed. %s\n",    myErrorName(status));
 607     }
 608     else
 609     {
 610         assertEqual(iter, pristine);
 611     }
 612
 613     ucol_closeElements(pristine);
 614     ucol_closeElements(iter);
 615     free(orders);
 616
 617     /* testing offsets in normalization buffer */
 618     test1[0] = 0x61;
 619     test1[1] = 0x300;
 620     test1[2] = 0x316;
 621     test1[3] = 0x62;
 622     test1[4] = 0;
 623     ucol_setAttribute(en_us, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
 624     iter = ucol_openElements(en_us, test1, 4, &status);
 625     if(U_FAILURE(status)){
 626         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
 627             myErrorName(status));
 628         ucol_close(en_us);
 629         return;
 630     }
 631
 632     count = 0;
 633     while (ucol_next(iter, &status) != UCOL_NULLORDER &&
 634         U_SUCCESS(status)) {
 635         switch (count) {
 636         case 0:
 637             if (ucol_getOffset(iter) != 1) {
 638                 log_err("ERROR: Offset of iteration should be 0\n");
 639             }
 640             break;
 641         case 3:
 642             if (ucol_getOffset(iter) != 4) {
 643                 log_err("ERROR: Offset of iteration should be 4\n");
 644             }
 645             break;
 646         default:
 647             if (ucol_getOffset(iter) != 3) {
 648                 log_err("ERROR: Offset of iteration should be 3\n");
 649             }
 650         }
 651         count ++;
 652     }
 653
 654     ucol_reset(iter);
 655     count = 0;
 656     while (ucol_previous(iter, &status) != UCOL_NULLORDER &&
 657         U_SUCCESS(status)) {
 658         switch (count) {
 659         case 0:
 660             if (ucol_getOffset(iter) != 3) {
 661                 log_err("ERROR: Offset of iteration should be 3\n");
 662             }
 663             break;
 664         default:
 665             if (ucol_getOffset(iter) != 0) {
 666                 log_err("ERROR: Offset of iteration should be 0\n");
 667             }
 668         }
 669         count ++;
 670     }
 671
 672     if(U_FAILURE(status)){
 673         log_err("ERROR: in iterating collation elements %s\n",
 674             myErrorName(status));
 675     }
 676
 677     ucol_closeElements(iter);
 678     ucol_close(en_us);
 679     free(test1);
 680     free(test2);
 681 }
 682
 683 /**
 684  * Test for setText()
 685  */
 686 static void TestSetText()
 687 {
 688     int32_t c,i;
 689     UErrorCode status = U_ZERO_ERROR;
 690     UCollator *en_us=NULL;
 691     UCollationElements *iter1, *iter2;
 692     test1=(UChar*)malloc(sizeof(UChar) * 50);
 693     test2=(UChar*)malloc(sizeof(UChar) * 50);
 694     u_uastrcpy(test1, "What subset of all possible test cases?");
 695     u_uastrcpy(test2, "has the highest probability of detecting");
 696     en_us = ucol_open("en_US", &status);
 697     log_verbose("testing setText for Collation elements\n");
 698     iter1=ucol_openElements(en_us, test1, u_strlen(test1), &status);
 699     if(U_FAILURE(status)){
 700         log_err("ERROR: in creation of collation element iterator1 using ucol_openElements()\n %s\n",
 701             myErrorName(status));
 702     ucol_close(en_us);
 703         return;
 704     }
 705     iter2=ucol_openElements(en_us, test2, u_strlen(test2), &status);
 706     if(U_FAILURE(status)){
 707         log_err("ERROR: in creation of collation element iterator2 using ucol_openElements()\n %s\n",
 708             myErrorName(status));
 709     ucol_close(en_us);
 710         return;
 711     }
 712
 713     /* Run through the second iterator just to exercise it */
 714     c = ucol_next(iter2, &status);
 715     i = 0;
 716
 717     while ( ++i < 10 && (c != UCOL_NULLORDER))
 718     {
 719         if (U_FAILURE(status))
 720         {
 721             log_err("iter2->next() returned an error. %s\n", myErrorName(status));
 722             ucol_closeElements(iter2);
 723             ucol_closeElements(iter1);
 724     ucol_close(en_us);
 725             return;
 726         }
 727
 728         c = ucol_next(iter2, &status);
 729     }
 730
 731     /* Now set it to point to the same string as the first iterator */
 732     ucol_setText(iter2, test1, u_strlen(test1), &status);
 733     if (U_FAILURE(status))
 734     {
 735         log_err("call to iter2->setText(test1) failed. %s\n", myErrorName(status));
 736     }
 737     else
 738     {
 739         assertEqual(iter1, iter2);
 740     }
 741
 742     /* Now set it to point to a null string with fake length*/
 743     ucol_setText(iter2, NULL, 2, &status);
 744     if (U_FAILURE(status))
 745     {
 746         log_err("call to iter2->setText(null) failed. %s\n", myErrorName(status));
 747     }
 748     else
 749     {
 750         if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
 751             log_err("iter2 with null text expected to return UCOL_NULLORDER\n");
 752         }
 753     }
 754
 755     ucol_closeElements(iter2);
 756     ucol_closeElements(iter1);
 757     ucol_close(en_us);
 758     free(test1);
 759     free(test2);
 760 }
 761
 762
 763
 764 static void backAndForth(UCollationElements *iter)
 765 {
 766     /* Run through the iterator forwards and stick it into an array */
 767     int32_t index, o;
 768     UErrorCode status = U_ZERO_ERROR;
 769     int32_t orderLength = 0;
 770     int32_t *orders;
 771     orders= getOrders(iter, &orderLength);
 772
 773
 774     /* Now go through it backwards and make sure we get the same values */
 775     index = orderLength;
 776     ucol_reset(iter);
 777
 778     /* synwee : changed */
 779     while ((o = ucol_previous(iter, &status)) != UCOL_NULLORDER)
 780     {
 781       if (o != orders[-- index])
 782       {
 783         if (o == 0)
 784           index ++;
 785         else
 786         {
 787           while (index > 0 && orders[-- index] == 0)
 788           {
 789           }
 790           if (o != orders[index])
 791           {
 792             log_err("Mismatch at index : 0x%x\n", index);
 793             return;
 794           }
 795
 796         }
 797       }
 798     }
 799
 800     while (index != 0 && orders[index - 1] == 0) {
 801       index --;
 802     }
 803
 804     if (index != 0)
 805     {
 806         log_err("Didn't get back to beginning - index is %d\n", index);
 807
 808         ucol_reset(iter);
 809         log_err("\nnext: ");
 810         if ((o = ucol_next(iter, &status)) != UCOL_NULLORDER)
 811         {
 812             log_err("Error at %x\n", o);
 813         }
 814         log_err("\nprev: ");
 815         if ((o = ucol_previous(iter, &status)) != UCOL_NULLORDER)
 816         {
 817             log_err("Error at %x\n", o);
 818         }
 819         log_verbose("\n");
 820     }
 821
 822     free(orders);
 823 }
 824
 825 /** @bug 4108762
 826  * Test for getMaxExpansion()
 827  */
 828 static void TestMaxExpansion()
 829 {
 830     UErrorCode          status = U_ZERO_ERROR;
 831     UCollator          *coll   ;/*= ucol_open("en_US", &status);*/
 832     UChar               ch     = 0;
 833     UChar               supplementary[2] = {0xD800, 0xDC00};
 834     uint32_t            sorder = 0;
 835     UCollationElements *iter   ;/*= ucol_openElements(coll, &ch, 1, &status);*/
 836     uint32_t            temporder = 0;
 837
 838     UChar rule[256];
 839     u_uastrcpy(rule, "&a < ab < c/aba < d < z < ch");
 840     coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
 841         UCOL_DEFAULT_STRENGTH,NULL, &status);
 842     if(U_SUCCESS(status) && coll) {
 843       iter = ucol_openElements(coll, &ch, 1, &status);
 844
 845       while (ch < 0xFFFF && U_SUCCESS(status)) {
 846           int      count = 1;
 847           uint32_t order;
 848           int32_t  size = 0;
 849
 850           ch ++;
 851
 852           ucol_setText(iter, &ch, 1, &status);
 853           order = ucol_previous(iter, &status);
 854
 855           /* thai management */
 856           if (order == 0)
 857               order = ucol_previous(iter, &status);
 858
 859           while (U_SUCCESS(status) &&
 860               ucol_previous(iter, &status) != UCOL_NULLORDER) {
 861               count ++;
 862           }
 863
 864           size = ucol_getMaxExpansion(iter, order);
 865           if (U_FAILURE(status) || size < count) {
 866               log_err("Failure at codepoint %d, maximum expansion count < %d\n",
 867                   ch, count);
 868           }
 869       }
 870
 871       /* testing for exact max expansion */
 872       ch = 0;
 873       while (ch < 0x61) {
 874           uint32_t order;
 875           int32_t  size;
 876           ucol_setText(iter, &ch, 1, &status);
 877           order = ucol_previous(iter, &status);
 878           size  = ucol_getMaxExpansion(iter, order);
 879           if (U_FAILURE(status) || size != 1) {
 880               log_err("Failure at codepoint %d, maximum expansion count < %d\n",
 881                   ch, 1);
 882           }
 883           ch ++;
 884       }
 885
 886       ch = 0x63;
 887       ucol_setText(iter, &ch, 1, &status);
 888       temporder = ucol_previous(iter, &status);
 889
 890       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 3) {
 891           log_err("Failure at codepoint %d, maximum expansion count != %d\n",
 892                   ch, 3);
 893       }
 894
 895       ch = 0x64;
 896       ucol_setText(iter, &ch, 1, &status);
 897       temporder = ucol_previous(iter, &status);
 898
 899       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 1) {
 900           log_err("Failure at codepoint %d, maximum expansion count != %d\n",
 901                   ch, 3);
 902       }
 903
 904       ucol_setText(iter, supplementary, 2, &status);
 905       sorder = ucol_previous(iter, &status);
 906
 907       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, sorder) != 2) {
 908           log_err("Failure at codepoint %d, maximum expansion count < %d\n",
 909                   ch, 2);
 910       }
 911
 912       /* testing jamo */
 913       ch = 0x1165;
 914
 915       ucol_setText(iter, &ch, 1, &status);
 916       temporder = ucol_previous(iter, &status);
 917       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) > 3) {
 918           log_err("Failure at codepoint %d, maximum expansion count > %d\n",
 919                   ch, 3);
 920       }
 921
 922       ucol_closeElements(iter);
 923       ucol_close(coll);
 924
 925       /* testing special jamo &a<\u1160 */
 926       rule[0] = 0x26;
 927       rule[1] = 0x71;
 928       rule[2] = 0x3c;
 929       rule[3] = 0x1165;
 930       rule[4] = 0x2f;
 931       rule[5] = 0x71;
 932       rule[6] = 0x71;
 933       rule[7] = 0x71;
 934       rule[8] = 0x71;
 935       rule[9] = 0;
 936
 937       coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
 938           UCOL_DEFAULT_STRENGTH,NULL, &status);
 939       iter = ucol_openElements(coll, &ch, 1, &status);
 940
 941       temporder = ucol_previous(iter, &status);
 942       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 6) {
 943           log_err("Failure at codepoint %d, maximum expansion count > %d\n",
 944                   ch, 5);
 945       }
 946
 947       ucol_closeElements(iter);
 948       ucol_close(coll);
 949     } else {
 950       log_data_err("Couldn't open collator\n");
 951     }
 952
 953 }
 954
 955 /**
 956  * Return an integer array containing all of the collation orders
 957  * returned by calls to next on the specified iterator
 958  */
 959 static int32_t* getOrders(UCollationElements *iter, int32_t *orderLength)
 960 {
 961     UErrorCode status;
 962     int32_t order;
 963     int32_t maxSize = 100;
 964     int32_t size = 0;
 965     int32_t *temp;
 966     int32_t *orders =(int32_t*)malloc(sizeof(int32_t) * maxSize);
 967     status= U_ZERO_ERROR;
 968
 969
 970     while ((order=ucol_next(iter, &status)) != UCOL_NULLORDER)
 971     {
 972         if (size == maxSize)
 973         {
 974             maxSize *= 2;
 975             temp = (int32_t*)malloc(sizeof(int32_t) * maxSize);
 976
 977             memcpy(temp, orders, size * sizeof(int32_t));
 978             free(orders);
 979             orders = temp;
 980
 981         }
 982
 983         orders[size++] = order;
 984     }
 985
 986     if (maxSize > size)
 987     {
 988         if (size == 0) {
 989             size = 1;
 990             temp = (int32_t*)malloc(sizeof(int32_t) * size);
 991             temp[0] = 0;
 992         }
 993         else {
 994             temp = (int32_t*)malloc(sizeof(int32_t) * size);
 995             memcpy(temp, orders, size * sizeof(int32_t));
 996         }
 997
 998         free(orders);
 999         orders = temp;
1000     }
1001
1002     *orderLength = size;
1003     return orders;
1004 }
1005
1006
1007 static void assertEqual(UCollationElements *i1, UCollationElements *i2)
1008 {
1009     int32_t c1, c2;
1010     int32_t count = 0;
1011     UErrorCode status = U_ZERO_ERROR;
1012
1013     do
1014     {
1015         c1 = ucol_next(i1, &status);
1016         c2 = ucol_next(i2, &status);
1017
1018         if (c1 != c2)
1019         {
1020             log_err("Error in iteration %d assetEqual between\n  %d  and   %d, they are not equal\n", count, c1, c2);
1021             break;
1022         }
1023
1024         count += 1;
1025     }
1026     while (c1 != UCOL_NULLORDER);
1027 }
1028
1029 /**
1030  * Testing iterators with extremely small buffers
1031  */
1032 static void TestSmallBuffer()
1033 {
1034     UErrorCode          status = U_ZERO_ERROR;
1035     UCollator          *coll;
1036     UCollationElements *testiter,
1037                        *iter;
1038     int32_t             count = 0;
1039     int32_t            *testorders,
1040                        *orders;
1041
1042     UChar teststr[500];
1043     UChar str[] = {0x300, 0x31A, 0};
1044     /*
1045     creating a long string of decomposable characters,
1046     since by default the writable buffer is of size 256
1047     */
1048     while (count < 500) {
1049         if ((count & 1) == 0) {
1050             teststr[count ++] = 0x300;
1051         }
1052         else {
1053             teststr[count ++] = 0x31A;
1054         }
1055     }
1056
1057     coll = ucol_open("th_TH", &status);
1058     if(U_SUCCESS(status) && coll) {
1059       testiter = ucol_openElements(coll, teststr, 500, &status);
1060       iter = ucol_openElements(coll, str, 2, &status);
1061
1062       orders     = getOrders(iter, &count);
1063       if (count != 2) {
1064           log_err("Error collation elements size is not 2 for \\u0300\\u031A\n");
1065       }
1066
1067       /*
1068       this will rearrange the string data to 250 characters of 0x300 first then
1069       250 characters of 0x031A
1070       */
1071       testorders = getOrders(testiter, &count);
1072
1073       if (count != 500) {
1074           log_err("Error decomposition does not give the right sized collation elements\n");
1075       }
1076
1077       while (count != 0) {
1078           /* UCA collation element for 0x0F76 */
1079           if ((count > 250 && testorders[-- count] != orders[1]) ||
1080               (count <= 250 && testorders[-- count] != orders[0])) {
1081               log_err("Error decomposition does not give the right collation element at %d count\n", count);
1082               break;
1083           }
1084       }
1085
1086       free(testorders);
1087       free(orders);
1088
1089       ucol_reset(testiter);
1090       /* ensures that the writable buffer was cleared */
1091       if (testiter->iteratordata_.writableBuffer !=
1092           testiter->iteratordata_.stackWritableBuffer) {
1093           log_err("Error Writable buffer in collation element iterator not reset\n");
1094       }
1095
1096       /* ensures closing of elements done properly to clear writable buffer */
1097       ucol_next(testiter, &status);
1098       ucol_next(testiter, &status);
1099       ucol_closeElements(testiter);
1100       ucol_closeElements(iter);
1101       ucol_close(coll);
1102     } else {
1103       log_data_err("Couldn't open collator\n");
1104     }
1105 }
1106
1107 /**
1108 * Sniplets of code from genuca
1109 */
1110 static int32_t hex2num(char hex) {
1111     if(hex>='0' && hex <='9') {
1112         return hex-'0';
1113     } else if(hex>='a' && hex<='f') {
1114         return hex-'a'+10;
1115     } else if(hex>='A' && hex<='F') {
1116         return hex-'A'+10;
1117     } else {
1118         return 0;
1119     }
1120 }
1121
1122 /**
1123 * Getting codepoints from a string
1124 * @param str character string contain codepoints seperated by space and ended
1125 *        by a semicolon
1126 * @param codepoints array for storage, assuming size > 5
1127 * @return position at the end of the codepoint section
1128 */
1129 static char * getCodePoints(char *str, UChar *codepoints) {
1130     char *pStartCP = str;
1131     char *pEndCP   = str + 4;
1132
1133     *codepoints = (UChar)((hex2num(*pStartCP) << 12) |
1134                           (hex2num(*(pStartCP + 1)) << 8) |
1135                           (hex2num(*(pStartCP + 2)) << 4) |
1136                           (hex2num(*(pStartCP + 3))));
1137     codepoints ++;
1138     while (*pEndCP != ';') {
1139         pStartCP = pEndCP + 1;
1140         *codepoints = (UChar)((hex2num(*pStartCP) << 12) |
1141                           (hex2num(*(pStartCP + 1)) << 8) |
1142                           (hex2num(*(pStartCP + 2)) << 4) |
1143                           (hex2num(*(pStartCP + 3))));
1144         codepoints ++;
1145         pEndCP = pStartCP + 4;
1146     }
1147     *codepoints = 0;
1148     return pEndCP + 1;
1149 }
1150
1151 /**
1152 * Sniplets of code from genuca
1153 */
1154 static int32_t
1155 readElement(char **from, char *to, char separator, UErrorCode *status)
1156 {
1157     if (U_SUCCESS(*status)) {
1158         char    buffer[1024];
1159         int32_t i = 0;
1160         while (**from != separator) {
1161             if (**from != ' ') {
1162                 *(buffer+i++) = **from;
1163             }
1164             (*from)++;
1165         }
1166         (*from)++;
1167         *(buffer + i) = 0;
1168         strcpy(to, buffer);
1169         return i/2;
1170     }
1171
1172     return 0;
1173 }
1174
1175 /**
1176 * Sniplets of code from genuca
1177 */
1178 static uint32_t
1179 getSingleCEValue(char *primary, char *secondary, char *tertiary,
1180                           UErrorCode *status)
1181 {
1182     if (U_SUCCESS(*status)) {
1183         uint32_t  value    = 0;
1184         char      primsave = '\0';
1185         char      secsave  = '\0';
1186         char      tersave  = '\0';
1187         char     *primend  = primary+4;
1188         char     *secend   = secondary+2;
1189         char     *terend   = tertiary+2;
1190         uint32_t  primvalue;
1191         uint32_t  secvalue;
1192         uint32_t  tervalue;
1193
1194         if (uprv_strlen(primary) > 4) {
1195             primsave = *primend;
1196             *primend = '\0';
1197         }
1198
1199         if (uprv_strlen(secondary) > 2) {
1200             secsave = *secend;
1201             *secend = '\0';
1202         }
1203
1204         if (uprv_strlen(tertiary) > 2) {
1205             tersave = *terend;
1206             *terend = '\0';
1207         }
1208
1209         primvalue = (*primary!='\0')?uprv_strtoul(primary, &primend, 16):0;
1210         secvalue  = (*secondary!='\0')?uprv_strtoul(secondary, &secend, 16):0;
1211         tervalue  = (*tertiary!='\0')?uprv_strtoul(tertiary, &terend, 16):0;
1212         if(primvalue <= 0xFF) {
1213           primvalue <<= 8;
1214         }
1215
1216         value = ((primvalue << UCOL_PRIMARYORDERSHIFT) & UCOL_PRIMARYORDERMASK)
1217            | ((secvalue << UCOL_SECONDARYORDERSHIFT) & UCOL_SECONDARYORDERMASK)
1218            | (tervalue & UCOL_TERTIARYORDERMASK);
1219
1220         if(primsave!='\0') {
1221             *primend = primsave;
1222         }
1223         if(secsave!='\0') {
1224             *secend = secsave;
1225         }
1226         if(tersave!='\0') {
1227             *terend = tersave;
1228         }
1229         return value;
1230     }
1231     return 0;
1232 }
1233
1234 /**
1235 * Getting collation elements generated from a string
1236 * @param str character string contain collation elements contained in [] and
1237 *        seperated by space
1238 * @param ce array for storage, assuming size > 20
1239 * @param status error status
1240 * @return position at the end of the codepoint section
1241 */
1242 static char * getCEs(char *str, uint32_t *ces, UErrorCode *status) {
1243     char       *pStartCP     = uprv_strchr(str, '[');
1244     int         count        = 0;
1245     char       *pEndCP;
1246     char        primary[100];
1247     char        secondary[100];
1248     char        tertiary[100];
1249
1250     while (*pStartCP == '[') {
1251         uint32_t primarycount   = 0;
1252         uint32_t secondarycount = 0;
1253         uint32_t tertiarycount  = 0;
1254         uint32_t CEi = 1;
1255         pEndCP = strchr(pStartCP, ']');
1256         if(pEndCP == NULL) {
1257             break;
1258         }
1259         pStartCP ++;
1260
1261         primarycount   = readElement(&pStartCP, primary, ',', status);
1262         secondarycount = readElement(&pStartCP, secondary, ',', status);
1263         tertiarycount  = readElement(&pStartCP, tertiary, ']', status);
1264
1265         /* I want to get the CEs entered right here, including continuation */
1266         ces[count ++] = getSingleCEValue(primary, secondary, tertiary, status);
1267         if (U_FAILURE(*status)) {
1268             break;
1269         }
1270
1271         while (2 * CEi < primarycount || CEi < secondarycount ||
1272                CEi < tertiarycount) {
1273             uint32_t value = UCOL_CONTINUATION_MARKER; /* Continuation marker */
1274             if (2 * CEi < primarycount) {
1275                 value |= ((hex2num(*(primary + 4 * CEi)) & 0xF) << 28);
1276                 value |= ((hex2num(*(primary + 4 * CEi + 1)) & 0xF) << 24);
1277             }
1278
1279             if (2 * CEi + 1 < primarycount) {
1280                 value |= ((hex2num(*(primary + 4 * CEi + 2)) & 0xF) << 20);
1281                 value |= ((hex2num(*(primary + 4 * CEi + 3)) &0xF) << 16);
1282             }
1283
1284             if (CEi < secondarycount) {
1285                 value |= ((hex2num(*(secondary + 2 * CEi)) & 0xF) << 12);
1286                 value |= ((hex2num(*(secondary + 2 * CEi + 1)) & 0xF) << 8);
1287             }
1288
1289             if (CEi < tertiarycount) {
1290                 value |= ((hex2num(*(tertiary + 2 * CEi)) & 0x3) << 4);
1291                 value |= (hex2num(*(tertiary + 2 * CEi + 1)) & 0xF);
1292             }
1293
1294             CEi ++;
1295             ces[count ++] = value;
1296         }
1297
1298       pStartCP = pEndCP + 1;
1299     }
1300     ces[count] = 0;
1301     return pStartCP;
1302 }
1303
1304 /**
1305 * Getting the FractionalUCA.txt file stream
1306 */
1307 static FileStream * getFractionalUCA(void)
1308 {
1309     char        newPath[256];
1310     char        backupPath[256];
1311     FileStream *result = NULL;
1312
1313     /* Look inside ICU_DATA first */
1314     uprv_strcpy(newPath, u_getDataDirectory());
1315     uprv_strcat(newPath, "unidata" U_FILE_SEP_STRING );
1316     uprv_strcat(newPath, "FractionalUCA.txt");
1317
1318     /* As a fallback, try to guess where the source data was located
1319      *   at the time ICU was built, and look there.
1320      */
1321 #if defined (U_TOPSRCDIR)
1322     strcpy(backupPath, U_TOPSRCDIR  U_FILE_SEP_STRING "data");
1323 #else
1324     {
1325         UErrorCode errorCode = U_ZERO_ERROR;
1326         strcpy(backupPath, loadTestData(&errorCode));
1327         strcat(backupPath, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING "data");
1328     }
1329 #endif
1330     strcat(backupPath, U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING "FractionalUCA.txt");
1331
1332     result = T_FileStream_open(newPath, "rb");
1333
1334     if (result == NULL) {
1335         result = T_FileStream_open(backupPath, "rb");
1336         if (result == NULL) {
1337             log_err("Failed to open either %s or %s\n", newPath, backupPath);
1338         }
1339     }
1340     return result;
1341 }
1342
1343 /**
1344 * Testing the CEs returned by the iterator
1345 */
1346 static void TestCEs() {
1347     FileStream *file = NULL;
1348     char        line[1024];
1349     char       *str;
1350     UChar       codepoints[5];
1351     uint32_t    ces[20];
1352     UErrorCode  status = U_ZERO_ERROR;
1353     UCollator          *coll = ucol_open("", &status);
1354     uint32_t lineNo = 0;
1355
1356     if (U_FAILURE(status)) {
1357         log_err("Error in opening root collator\n");
1358         return;
1359     }
1360
1361     file = getFractionalUCA();
1362
1363     if (file == NULL) {
1364         log_err("*** unable to open input FractionalUCA.txt file ***\n");
1365         return;
1366     }
1367
1368
1369     while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
1370         int                 count = 0;
1371         UCollationElements *iter;
1372         lineNo++;
1373         /* skip this line if it is empty or a comment or is a return value
1374         or start of some variable section */
1375         if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
1376             line[0] == 0x000D || line[0] == '[') {
1377             continue;
1378         }
1379
1380         str = getCodePoints(line, codepoints);
1381
1382         /* these are 'fake' codepoints in the fractional UCA, and are used just
1383          * for positioning of indirect values. They should not go through this
1384          * test.
1385          */
1386         if(*codepoints == 0xFDD0) {
1387           continue;
1388         }
1389
1390         getCEs(str, ces, &status);
1391         if (U_FAILURE(status)) {
1392             log_err("Error in parsing collation elements in FractionalUCA.txt\n");
1393             break;
1394         }
1395         iter = ucol_openElements(coll, codepoints, -1, &status);
1396         if (U_FAILURE(status)) {
1397             log_err("Error in opening collation elements\n");
1398             break;
1399         }
1400         for (;;) {
1401             uint32_t ce = (uint32_t)ucol_next(iter, &status);
1402             if (ce == 0xFFFFFFFF) {
1403                 ce = 0;
1404             }
1405             /* we now unconditionally reorder Thai/Lao prevowels, so this
1406              * test would fail if we don't skip here.
1407              */
1408             if(UCOL_ISTHAIPREVOWEL(*codepoints) && ce == 0 && count == 0) {
1409               continue;
1410             }
1411             if (ce != ces[count] || U_FAILURE(status)) {
1412                 log_err("Collation elements in FractionalUCA.txt and iterators do not match!\n");
1413                 break;
1414             }
1415             if (ces[count] == 0) {
1416                 break;
1417             }
1418             count ++;
1419         }
1420         ucol_closeElements(iter);
1421     }
1422
1423     T_FileStream_close(file);
1424     ucol_close(coll);
1425 }
1426
1427 /**
1428 * Testing the discontigous contractions
1429 */
1430 static void TestDiscontiguos() {
1431     const char               *rulestr    =
1432                             "&z < AB < X\\u0300 < ABC < X\\u0300\\u0315";
1433           UChar               rule[50];
1434           int                 rulelen = u_unescape(rulestr, rule, 50);
1435     const char               *src[] = {
1436      "ADB", "ADBC", "A\\u0315B", "A\\u0315BC",
1437     /* base character blocked */
1438      "XD\\u0300", "XD\\u0300\\u0315",
1439     /* non blocking combining character */
1440      "X\\u0319\\u0300", "X\\u0319\\u0300\\u0315",
1441      /* blocking combining character */
1442      "X\\u0314\\u0300", "X\\u0314\\u0300\\u0315",
1443      /* contraction prefix */
1444      "ABDC", "AB\\u0315C","X\\u0300D\\u0315", "X\\u0300\\u0319\\u0315",
1445      "X\\u0300\\u031A\\u0315",
1446      /* ends not with a contraction character */
1447      "X\\u0319\\u0300D", "X\\u0319\\u0300\\u0315D", "X\\u0300D\\u0315D",
1448      "X\\u0300\\u0319\\u0315D", "X\\u0300\\u031A\\u0315D"
1449     };
1450     const char               *tgt[] = {
1451      /* non blocking combining character */
1452      "A D B", "A D BC", "A \\u0315 B", "A \\u0315 BC",
1453     /* base character blocked */
1454      "X D \\u0300", "X D \\u0300\\u0315",
1455     /* non blocking combining character */
1456      "X\\u0300 \\u0319", "X\\u0300\\u0315 \\u0319",
1457      /* blocking combining character */
1458      "X \\u0314 \\u0300", "X \\u0314 \\u0300\\u0315",
1459      /* contraction prefix */
1460      "AB DC", "AB \\u0315 C","X\\u0300 D \\u0315", "X\\u0300\\u0315 \\u0319",
1461      "X\\u0300 \\u031A \\u0315",
1462      /* ends not with a contraction character */
1463      "X\\u0300 \\u0319D", "X\\u0300\\u0315 \\u0319D", "X\\u0300 D\\u0315D",
1464      "X\\u0300\\u0315 \\u0319D", "X\\u0300 \\u031A\\u0315D"
1465     };
1466           int                 size   = 20;
1467           UCollator          *coll;
1468           UErrorCode          status    = U_ZERO_ERROR;
1469           int                 count     = 0;
1470           UCollationElements *iter;
1471           UCollationElements *resultiter;
1472
1473     coll       = ucol_openRules(rule, rulelen, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
1474     iter       = ucol_openElements(coll, rule, 1, &status);
1475     resultiter = ucol_openElements(coll, rule, 1, &status);
1476
1477     if (U_FAILURE(status)) {
1478         log_err("Error opening collation rules\n");
1479         return;
1480     }
1481
1482     while (count < size) {
1483         UChar  str[20];
1484         UChar  tstr[20];
1485         int    strLen = u_unescape(src[count], str, 20);
1486         UChar *s;
1487
1488         ucol_setText(iter, str, strLen, &status);
1489         if (U_FAILURE(status)) {
1490             log_err("Error opening collation iterator\n");
1491             return;
1492         }
1493
1494         u_unescape(tgt[count], tstr, 20);
1495         s = tstr;
1496
1497         log_verbose("count %d\n", count);
1498
1499         for (;;) {
1500             uint32_t  ce;
1501             UChar    *e = u_strchr(s, 0x20);
1502             if (e == 0) {
1503                 e = u_strchr(s, 0);
1504             }
1505             ucol_setText(resultiter, s, (int32_t)(e - s), &status);
1506             ce = ucol_next(resultiter, &status);
1507             if (U_FAILURE(status)) {
1508                 log_err("Error manipulating collation iterator\n");
1509                 return;
1510             }
1511             while (ce != UCOL_NULLORDER) {
1512                 if (ce != (uint32_t)ucol_next(iter, &status) ||
1513                     U_FAILURE(status)) {
1514                     log_err("Discontiguos contraction test mismatch\n");
1515                     return;
1516                 }
1517                 ce = ucol_next(resultiter, &status);
1518                 if (U_FAILURE(status)) {
1519                     log_err("Error getting next collation element\n");
1520                     return;
1521                 }
1522             }
1523             s = e + 1;
1524             if (*e == 0) {
1525                 break;
1526             }
1527         }
1528         ucol_reset(iter);
1529         backAndForth(iter);
1530         count ++;
1531     }
1532     ucol_closeElements(resultiter);
1533     ucol_closeElements(iter);
1534     ucol_close(coll);
1535 }
1536
1537 static void TestCEBufferOverflow()
1538 {
1539     UChar               str[UCOL_EXPAND_CE_BUFFER_SIZE + 1];
1540     UErrorCode          status = U_ZERO_ERROR;
1541     UChar               rule[10];
1542     UCollator          *coll;
1543     UCollationElements *iter;
1544
1545     u_uastrcpy(rule, "&z < AB");
1546     coll = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
1547     if (U_FAILURE(status)) {
1548         log_err("Rule based collator not created for testing ce buffer overflow\n");
1549         return;
1550     }
1551
1552     /* 0xDCDC is a trail surrogate hence deemed unsafe by the heuristic
1553     test. this will cause an overflow in getPrev */
1554     str[0] = 0x0041;    /* 'A' */
1555     /*uprv_memset(str + 1, 0xE0, sizeof(UChar) * UCOL_EXPAND_CE_BUFFER_SIZE);*/
1556     uprv_memset(str + 1, 0xDC, sizeof(UChar) * UCOL_EXPAND_CE_BUFFER_SIZE);
1557     str[UCOL_EXPAND_CE_BUFFER_SIZE] = 0x0042;   /* 'B' */
1558     iter = ucol_openElements(coll, str, UCOL_EXPAND_CE_BUFFER_SIZE + 1,
1559                              &status);
1560     if (ucol_previous(iter, &status) != UCOL_NULLORDER ||
1561         status != U_BUFFER_OVERFLOW_ERROR) {
1562         log_err("CE buffer expected to overflow with long string of trail surrogates\n");
1563     }
1564     ucol_closeElements(iter);
1565     ucol_close(coll);
1566 }
1567
1568 /**
1569 * Byte bounds checks. Checks if each byte in data is between upper and lower
1570 * inclusive.
1571 */
1572 static UBool checkByteBounds(uint32_t data, char upper, char lower)
1573 {
1574     int count = 4;
1575     while (count > 0) {
1576         char b = (char)(data & 0xFF);
1577         if (b > upper || b < lower) {
1578             return FALSE;
1579         }
1580         data = data >> 8;
1581         count --;
1582     }
1583     return TRUE;
1584 }
1585
1586 /**
1587 * Determines case of the string of codepoints.
1588 * If it is a multiple codepoints it has to treated as a contraction.
1589 */
1590 #if 0
1591 static uint8_t getCase(const UChar *s, uint32_t len) {
1592     UBool       lower = FALSE;
1593     UBool       upper = FALSE;
1594     UBool       title = FALSE;
1595     UErrorCode  status = U_ZERO_ERROR;
1596     UChar       str[256];
1597     const UChar      *ps = s;
1598
1599     if (len == 0) {
1600         return UCOL_LOWER_CASE;
1601     }
1602
1603     while (len > 0) {
1604         UChar c = *ps ++;
1605
1606         if (u_islower(c)) {
1607             lower = TRUE;
1608         }
1609         if (u_isupper(c)) {
1610             upper = TRUE;
1611         }
1612         if (u_istitle(c)) {
1613             title = TRUE;
1614         }
1615
1616         len --;
1617     }
1618     if ((lower && !upper && !title) || (!lower && !upper && !title)){
1619         return UCOL_LOWER_CASE;
1620     }
1621     if (upper && !lower && !title) {
1622         return UCOL_UPPER_CASE;
1623     }
1624     /* mix of cases here */
1625     /* len = unorm_normalize(s, len, UNORM_NFKD, 0, str, 256, &status);
1626     if (U_FAILURE(status)) {
1627         log_err("Error normalizing data string\n");
1628         return UCOL_LOWER_CASE;
1629     }*/
1630
1631     if ((title && len >= 2) || (lower && upper)) {
1632         return UCOL_MIXED_CASE;
1633     }
1634     if (u_isupper(s[0])) {
1635         return UCOL_UPPER_CASE;
1636     }
1637     return UCOL_LOWER_CASE;
1638 }
1639 #endif
1640
1641 /**
1642 * Checking collation element validity given the boundary arguments.
1643 */
1644 static UBool checkCEValidity(const UCollator *coll, const UChar *codepoints,
1645                              int length, uint32_t primarymax,
1646                              uint32_t secondarymax)
1647 {
1648     UErrorCode          status = U_ZERO_ERROR;
1649     UCollationElements *iter   = ucol_openElements(coll, codepoints, length,
1650                                                   &status);
1651     uint32_t            ce;
1652     UBool               first  = TRUE;
1653 /*
1654     UBool               upper  = FALSE;
1655     UBool               lower  = FALSE;
1656 */
1657
1658     if (U_FAILURE(status)) {
1659         log_err("Error creating iterator for testing validity\n");
1660     }
1661
1662     ce = ucol_next(iter, &status);
1663
1664     while (ce != UCOL_NULLORDER) {
1665        if (ce != 0) {
1666            uint32_t primary   = UCOL_PRIMARYORDER(ce);
1667            uint32_t secondary = UCOL_SECONDARYORDER(ce);
1668            uint32_t tertiary  = UCOL_TERTIARYORDER(ce);
1669 /*           uint32_t scasebits = tertiary & 0xC0;*/
1670
1671            if ((tertiary == 0 && secondary != 0) ||
1672                (tertiary < 0xC0 && secondary == 0 && primary != 0)) {
1673                /* n-1th level is not zero when the nth level is
1674                   except for continuations, this is wrong */
1675                log_err("Lower level weight not 0 when high level weight is 0\n");
1676                goto fail;
1677            }
1678            else {
1679                /* checks if any byte is illegal ie = 01 02 03. */
1680                if (checkByteBounds(ce, 0x3, 0x1)) {
1681                    log_err("Byte range in CE lies in illegal bounds 0x1 - 0x3\n");
1682                    goto fail;
1683                }
1684            }
1685            if ((primary != 0 && primary < primarymax) || (primary >= 0xFF00 && !isContinuation(ce))) {
1686                log_err("UCA primary weight out of bounds\n");
1687                goto fail;
1688            }
1689            /* case matching not done since data generated by ken */
1690            if (first) {
1691                if (secondary >= 6 && secondary <= secondarymax) {
1692                    log_err("Secondary weight out of range\n");
1693                    goto fail;
1694                }
1695                first = FALSE;
1696            }
1697        }
1698        ce   = ucol_next(iter, &status);
1699    }
1700    ucol_closeElements(iter);
1701    return TRUE;
1702 fail :
1703    ucol_closeElements(iter);
1704    return FALSE;
1705 }
1706
1707 static void TestCEValidity()
1708 {
1709     /* testing UCA collation elements */
1710     UErrorCode  status      = U_ZERO_ERROR;
1711     /* en_US has no tailorings */
1712     UCollator  *coll        = ucol_open("en_US", &status);
1713     /* tailored locales */
1714     char        locale[][6] = {"fr_FR", "ko_KR", "sh_YU", "th_TH", "zh_CN"};
1715     FileStream *file = getFractionalUCA();
1716     char        line[1024];
1717     UChar       codepoints[10];
1718     int         count = 0;
1719     UParseError parseError;
1720     if (U_FAILURE(status)) {
1721         log_err("en_US collator creation failed\n");
1722         return;
1723     }
1724     log_verbose("Testing UCA elements\n");
1725     if (file == NULL) {
1726         log_err("Fractional UCA data can not be opened\n");
1727         return;
1728     }
1729
1730     while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
1731         if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
1732             line[0] == 0x000D || line[0] == '[') {
1733             continue;
1734         }
1735
1736         getCodePoints(line, codepoints);
1737         checkCEValidity(coll, codepoints, u_strlen(codepoints), 5, 86);
1738     }
1739
1740     log_verbose("Testing UCA elements for the whole range of unicode characters\n");
1741     codepoints[0] = 0;
1742     while (codepoints[0] < 0xFFFF) {
1743         if (u_isdefined((UChar32)codepoints[0])) {
1744             checkCEValidity(coll, codepoints, 1, 5, 86);
1745         }
1746         codepoints[0] ++;
1747     }
1748
1749     ucol_close(coll);
1750
1751     /* testing tailored collation elements */
1752     log_verbose("Testing tailored elements\n");
1753     while (count < 5) {
1754         const UChar *rules = NULL,
1755                     *current = NULL;
1756         UChar *rulesCopy = NULL;
1757         int32_t ruleLen = 0;
1758
1759         uint32_t chOffset = 0;
1760         uint32_t chLen = 0;
1761         uint32_t exOffset = 0;
1762         uint32_t exLen = 0;
1763         uint32_t prefixOffset = 0;
1764         uint32_t prefixLen = 0;
1765         UBool    startOfRules = TRUE;
1766         UColOptionSet opts;
1767
1768         UColTokenParser src;
1769         uint32_t strength = 0;
1770         uint16_t specs = 0;
1771
1772         coll      = ucol_open(locale[count], &status);
1773         if (U_FAILURE(status)) {
1774             log_err("%s collator creation failed\n", locale[count]);
1775             return;
1776         }
1777
1778         src.opts = &opts;
1779         rules = ucol_getRules(coll, &ruleLen);
1780
1781         if (ruleLen > 0) {
1782             rulesCopy = (UChar *)malloc((ruleLen +
1783                 UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar));
1784             uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar));
1785             src.current = src.source = rulesCopy;
1786             src.end = rulesCopy + ruleLen;
1787             src.extraCurrent = src.end;
1788             src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
1789
1790             while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,&status)) != NULL) {
1791               strength = src.parsedToken.strength;
1792               chOffset = src.parsedToken.charsOffset;
1793               chLen = src.parsedToken.charsLen;
1794               exOffset = src.parsedToken.extensionOffset;
1795               exLen = src.parsedToken.extensionLen;
1796               prefixOffset = src.parsedToken.prefixOffset;
1797               prefixLen = src.parsedToken.prefixLen;
1798               specs = src.parsedToken.flags;
1799
1800                 startOfRules = FALSE;
1801                 uprv_memcpy(codepoints, src.source + chOffset,
1802                                                        chLen * sizeof(UChar));
1803                 codepoints[chLen] = 0;
1804                 checkCEValidity(coll, codepoints, chLen, 4, 85);
1805             }
1806             free(rulesCopy);
1807         }
1808
1809         ucol_close(coll);
1810         count ++;
1811     }
1812     T_FileStream_close(file);
1813 }
1814
1815 static void printSortKeyError(const UChar   *codepoints, int length,
1816                                     uint8_t *sortkey, int sklen)
1817 {
1818     int count = 0;
1819     log_err("Sortkey not valid for ");
1820     while (length > 0) {
1821         log_err("0x%04x ", *codepoints);
1822         length --;
1823         codepoints ++;
1824     }
1825     log_err("\nSortkey : ");
1826     while (count < sklen) {
1827         log_err("0x%02x ", sortkey[count]);
1828         count ++;
1829     }
1830     log_err("\n");
1831 }
1832
1833 /**
1834 * Checking sort key validity for all levels
1835 */
1836 static UBool checkSortKeyValidity(UCollator *coll,
1837                                   const UChar *codepoints,
1838                                   int length)
1839 {
1840     UErrorCode status  = U_ZERO_ERROR;
1841     UCollationStrength strength[5] = {UCOL_PRIMARY, UCOL_SECONDARY,
1842                                       UCOL_TERTIARY, UCOL_QUATERNARY,
1843                                       UCOL_IDENTICAL};
1844     int        strengthlen = 5;
1845     int        index       = 0;
1846     int        caselevel   = 0;
1847
1848     while (caselevel < 1) {
1849         if (caselevel == 0) {
1850             ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_OFF, &status);
1851         }
1852         else {
1853             ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_ON, &status);
1854         }
1855
1856         while (index < strengthlen) {
1857             int        count01 = 0;
1858             uint32_t   count   = 0;
1859             uint8_t    sortkey[128];
1860             uint32_t   sklen;
1861
1862             ucol_setStrength(coll, strength[index]);
1863             sklen = ucol_getSortKey(coll, codepoints, length, sortkey, 128);
1864             while (sortkey[count] != 0) {
1865                 if (sortkey[count] == 2 || (sortkey[count] == 3 && count01 > 0 && index != 4)) {
1866                     printSortKeyError(codepoints, length, sortkey, sklen);
1867                     return FALSE;
1868                 }
1869                 if (sortkey[count] == 1) {
1870                     count01 ++;
1871                 }
1872                 count ++;
1873             }
1874
1875             if (count + 1 != sklen || (count01 != index + caselevel)) {
1876                 printSortKeyError(codepoints, length, sortkey, sklen);
1877                 return FALSE;
1878             }
1879             index ++;
1880         }
1881         caselevel ++;
1882     }
1883     return TRUE;
1884 }
1885
1886 static void TestSortKeyValidity(void)
1887 {
1888     /* testing UCA collation elements */
1889     UErrorCode  status      = U_ZERO_ERROR;
1890     /* en_US has no tailorings */
1891     UCollator  *coll        = ucol_open("en_US", &status);
1892     /* tailored locales */
1893     char        locale[][6] = {"fr_FR\0", "ko_KR\0", "sh_YU\0", "th_TH\0", "zh_CN\0"};
1894     FileStream *file = getFractionalUCA();
1895     char        line[1024];
1896     UChar       codepoints[10];
1897     int         count = 0;
1898     UParseError parseError;
1899     if (U_FAILURE(status)) {
1900         log_err("en_US collator creation failed\n");
1901         return;
1902     }
1903     log_verbose("Testing UCA elements\n");
1904     if (file == NULL) {
1905         log_err("Fractional UCA data can not be opened\n");
1906         return;
1907     }
1908
1909     while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
1910         if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
1911             line[0] == 0x000D || line[0] == '[') {
1912             continue;
1913         }
1914
1915         getCodePoints(line, codepoints);
1916         checkSortKeyValidity(coll, codepoints, u_strlen(codepoints));
1917     }
1918
1919     log_verbose("Testing UCA elements for the whole range of unicode characters\n");
1920     codepoints[0] = 0;
1921
1922     while (codepoints[0] < 0xFFFF) {
1923         if (u_isdefined((UChar32)codepoints[0])) {
1924             checkSortKeyValidity(coll, codepoints, 1);
1925         }
1926         codepoints[0] ++;
1927     }
1928
1929     ucol_close(coll);
1930
1931     /* testing tailored collation elements */
1932     log_verbose("Testing tailored elements\n");
1933     while (count < 5) {
1934         const UChar *rules = NULL,
1935                     *current = NULL;
1936         UChar *rulesCopy = NULL;
1937         int32_t ruleLen = 0;
1938
1939         uint32_t chOffset = 0;
1940         uint32_t chLen = 0;
1941         uint32_t exOffset = 0;
1942         uint32_t exLen = 0;
1943         uint32_t prefixOffset = 0;
1944         uint32_t prefixLen = 0;
1945         UBool    startOfRules = TRUE;
1946         UColOptionSet opts;
1947
1948         UColTokenParser src;
1949         uint32_t strength = 0;
1950         uint16_t specs = 0;
1951
1952         coll      = ucol_open(locale[count], &status);
1953         if (U_FAILURE(status)) {
1954             log_err("%s collator creation failed\n", locale[count]);
1955             return;
1956         }
1957
1958         src.opts = &opts;
1959         rules = ucol_getRules(coll, &ruleLen);
1960
1961         if (ruleLen > 0) {
1962             rulesCopy = (UChar *)malloc((ruleLen +
1963                 UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar));
1964             uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar));
1965             src.current = src.source = rulesCopy;
1966             src.end = rulesCopy + ruleLen;
1967             src.extraCurrent = src.end;
1968             src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
1969
1970             while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, &status)) != NULL) {
1971                 strength = src.parsedToken.strength;
1972                 chOffset = src.parsedToken.charsOffset;
1973                 chLen = src.parsedToken.charsLen;
1974                 exOffset = src.parsedToken.extensionOffset;
1975                 exLen = src.parsedToken.extensionLen;
1976                 prefixOffset = src.parsedToken.prefixOffset;
1977                 prefixLen = src.parsedToken.prefixLen;
1978                 specs = src.parsedToken.flags;
1979
1980                 startOfRules = FALSE;
1981                 uprv_memcpy(codepoints, src.source + chOffset,
1982                                                        chLen * sizeof(UChar));
1983                 codepoints[chLen] = 0;
1984                 checkSortKeyValidity(coll, codepoints, chLen);
1985             }
1986             free(rulesCopy);
1987         }
1988
1989         ucol_close(coll);
1990         count ++;
1991     }
1992     T_FileStream_close(file);
1993 }
1994
1995 #endif /* #if !UCONFIG_NO_COLLATION */