icuSources/test/cintltst/citertst.c

   1 /********************************************************************
   2  * COPYRIGHT:
   3  * Copyright (c) 1997-2004, International Business Machines Corporation and
   4  * others. All Rights Reserved.
   5  ********************************************************************/
   6 /********************************************************************************
   7 *
   8 * File CITERTST.C
   9 *
  10 * Modification History:
  11 * Date      Name               Description
  12 *           Madhu Katragadda   Ported for C API
  13 * 02/19/01  synwee             Modified test case for new collation iterator
  14 *********************************************************************************/
  15 /*
  16  * Collation Iterator tests.
  17  * (Let me reiterate my position...)
  18  */
  19
  20 #include "unicode/utypes.h"
  21
  22 #if !UCONFIG_NO_COLLATION
  23
  24 #include "unicode/ucol.h"
  25 #include "unicode/uloc.h"
  26 #include "unicode/uchar.h"
  27 #include "unicode/ustring.h"
  28 #include "unicode/putil.h"
  29 #include "callcoll.h"
  30 #include "cmemory.h"
  31 #include "cintltst.h"
  32 #include "citertst.h"
  33 #include "ccolltst.h"
  34 #include "filestrm.h"
  35 #include "cstring.h"
  36 #include "ucol_imp.h"
  37 #include "ucol_tok.h"
  38 #include <stdio.h>
  39
  40 extern uint8_t ucol_uprv_getCaseBits(const UChar *, uint32_t, UErrorCode *);
  41
  42 void addCollIterTest(TestNode** root)
  43 {
  44     addTest(root, &TestPrevious, "tscoll/citertst/TestPrevious");
  45     addTest(root, &TestOffset, "tscoll/citertst/TestOffset");
  46     addTest(root, &TestSetText, "tscoll/citertst/TestSetText");
  47     addTest(root, &TestMaxExpansion, "tscoll/citertst/TestMaxExpansion");
  48     addTest(root, &TestUnicodeChar, "tscoll/citertst/TestUnicodeChar");
  49     addTest(root, &TestNormalizedUnicodeChar,
  50                                 "tscoll/citertst/TestNormalizedUnicodeChar");
  51     addTest(root, &TestNormalization, "tscoll/citertst/TestNormalization");
  52     addTest(root, &TestBug672, "tscoll/citertst/TestBug672");
  53     addTest(root, &TestBug672Normalize, "tscoll/citertst/TestBug672Normalize");
  54     addTest(root, &TestSmallBuffer, "tscoll/citertst/TestSmallBuffer");
  55     addTest(root, &TestCEs, "tscoll/citertst/TestCEs");
  56     addTest(root, &TestDiscontiguos, "tscoll/citertst/TestDiscontiguos");
  57     addTest(root, &TestCEBufferOverflow, "tscoll/citertst/TestCEBufferOverflow");
  58     addTest(root, &TestCEValidity, "tscoll/citertst/TestCEValidity");
  59     addTest(root, &TestSortKeyValidity, "tscoll/citertst/TestSortKeyValidity");
  60 }
  61
  62 /* The locales we support */
  63
  64 static const char * LOCALES[] = {"en_AU", "en_BE", "en_CA"};
  65
  66 static void TestBug672() {
  67     UErrorCode  status = U_ZERO_ERROR;
  68     UChar       pattern[20];
  69     UChar       text[50];
  70     int         i;
  71     int         result[3][3];
  72
  73     u_uastrcpy(pattern, "resume");
  74     u_uastrcpy(text, "Time to resume updating my resume.");
  75
  76     for (i = 0; i < 3; ++ i) {
  77         UCollator          *coll = ucol_open(LOCALES[i], &status);
  78         UCollationElements *pitr = ucol_openElements(coll, pattern, -1,
  79                                                      &status);
  80         UCollationElements *titer = ucol_openElements(coll, text, -1,
  81                                                      &status);
  82         if (U_FAILURE(status)) {
  83             log_err("ERROR: in creation of either the collator or the collation iterator :%s\n",
  84                     myErrorName(status));
  85             return;
  86         }
  87
  88         log_verbose("locale tested %s\n", LOCALES[i]);
  89
  90         while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
  91                U_SUCCESS(status)) {
  92         }
  93         if (U_FAILURE(status)) {
  94             log_err("ERROR: reversing collation iterator :%s\n",
  95                     myErrorName(status));
  96             return;
  97         }
  98         ucol_reset(pitr);
  99
 100         ucol_setOffset(titer, u_strlen(pattern), &status);
 101         if (U_FAILURE(status)) {
 102             log_err("ERROR: setting offset in collator :%s\n",
 103                     myErrorName(status));
 104             return;
 105         }
 106         result[i][0] = ucol_getOffset(titer);
 107         log_verbose("Text iterator set to offset %d\n", result[i][0]);
 108
 109         /* Use previous() */
 110         ucol_previous(titer, &status);
 111         result[i][1] = ucol_getOffset(titer);
 112         log_verbose("Current offset %d after previous\n", result[i][1]);
 113
 114         /* Add one to index */
 115         log_verbose("Adding one to current offset...\n");
 116         ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
 117         if (U_FAILURE(status)) {
 118             log_err("ERROR: setting offset in collator :%s\n",
 119                     myErrorName(status));
 120             return;
 121         }
 122         result[i][2] = ucol_getOffset(titer);
 123         log_verbose("Current offset in text = %d\n", result[i][2]);
 124         ucol_closeElements(pitr);
 125         ucol_closeElements(titer);
 126         ucol_close(coll);
 127     }
 128
 129     if (uprv_memcmp(result[0], result[1], 3) != 0 ||
 130         uprv_memcmp(result[1], result[2], 3) != 0) {
 131         log_err("ERROR: Different locales have different offsets at the same character\n");
 132     }
 133 }
 134
 135
 136
 137 /*  Running this test with normalization enabled showed up a bug in the incremental
 138     normalization code. */
 139 static void TestBug672Normalize() {
 140     UErrorCode  status = U_ZERO_ERROR;
 141     UChar       pattern[20];
 142     UChar       text[50];
 143     int         i;
 144     int         result[3][3];
 145
 146     u_uastrcpy(pattern, "resume");
 147     u_uastrcpy(text, "Time to resume updating my resume.");
 148
 149     for (i = 0; i < 3; ++ i) {
 150         UCollator          *coll = ucol_open(LOCALES[i], &status);
 151         UCollationElements *pitr = NULL;
 152         UCollationElements *titer = NULL;
 153
 154         ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
 155
 156         pitr = ucol_openElements(coll, pattern, -1, &status);
 157         titer = ucol_openElements(coll, text, -1, &status);
 158         if (U_FAILURE(status)) {
 159             log_err("ERROR: in creation of either the collator or the collation iterator :%s\n",
 160                     myErrorName(status));
 161             return;
 162         }
 163
 164         log_verbose("locale tested %s\n", LOCALES[i]);
 165
 166         while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
 167                U_SUCCESS(status)) {
 168         }
 169         if (U_FAILURE(status)) {
 170             log_err("ERROR: reversing collation iterator :%s\n",
 171                     myErrorName(status));
 172             return;
 173         }
 174         ucol_reset(pitr);
 175
 176         ucol_setOffset(titer, u_strlen(pattern), &status);
 177         if (U_FAILURE(status)) {
 178             log_err("ERROR: setting offset in collator :%s\n",
 179                     myErrorName(status));
 180             return;
 181         }
 182         result[i][0] = ucol_getOffset(titer);
 183         log_verbose("Text iterator set to offset %d\n", result[i][0]);
 184
 185         /* Use previous() */
 186         ucol_previous(titer, &status);
 187         result[i][1] = ucol_getOffset(titer);
 188         log_verbose("Current offset %d after previous\n", result[i][1]);
 189
 190         /* Add one to index */
 191         log_verbose("Adding one to current offset...\n");
 192         ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
 193         if (U_FAILURE(status)) {
 194             log_err("ERROR: setting offset in collator :%s\n",
 195                     myErrorName(status));
 196             return;
 197         }
 198         result[i][2] = ucol_getOffset(titer);
 199         log_verbose("Current offset in text = %d\n", result[i][2]);
 200         ucol_closeElements(pitr);
 201         ucol_closeElements(titer);
 202         ucol_close(coll);
 203     }
 204
 205     if (uprv_memcmp(result[0], result[1], 3) != 0 ||
 206         uprv_memcmp(result[1], result[2], 3) != 0) {
 207         log_err("ERROR: Different locales have different offsets at the same character\n");
 208     }
 209 }
 210
 211
 212
 213
 214 /**
 215  * Test for CollationElementIterator previous and next for the whole set of
 216  * unicode characters.
 217  */
 218 static void TestUnicodeChar()
 219 {
 220     UChar source[0x100];
 221     UCollator *en_us;
 222     UCollationElements *iter;
 223     UErrorCode status = U_ZERO_ERROR;
 224     UChar codepoint;
 225
 226     UChar *test;
 227     en_us = ucol_open("en_US", &status);
 228     if (U_FAILURE(status)){
 229        log_err("ERROR: in creation of collation data using ucol_open()\n %s\n",
 230               myErrorName(status));
 231        return;
 232     }
 233
 234     for (codepoint = 1; codepoint < 0xFFFE;)
 235     {
 236       test = source;
 237
 238       while (codepoint % 0xFF != 0)
 239       {
 240         if (u_isdefined(codepoint))
 241           *(test ++) = codepoint;
 242         codepoint ++;
 243       }
 244
 245       if (u_isdefined(codepoint))
 246         *(test ++) = codepoint;
 247
 248       if (codepoint != 0xFFFF)
 249         codepoint ++;
 250
 251       *test = 0;
 252       iter=ucol_openElements(en_us, source, u_strlen(source), &status);
 253       if(U_FAILURE(status)){
 254           log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
 255               myErrorName(status));
 256           ucol_close(en_us);
 257           return;
 258       }
 259       /* A basic test to see if it's working at all */
 260       log_verbose("codepoint testing %x\n", codepoint);
 261       backAndForth(iter);
 262       ucol_closeElements(iter);
 263
 264       /* null termination test */
 265       iter=ucol_openElements(en_us, source, -1, &status);
 266       if(U_FAILURE(status)){
 267           log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
 268               myErrorName(status));
 269           ucol_close(en_us);
 270           return;
 271       }
 272       /* A basic test to see if it's working at all */
 273       backAndForth(iter);
 274       ucol_closeElements(iter);
 275     }
 276
 277     ucol_close(en_us);
 278 }
 279
 280 /**
 281  * Test for CollationElementIterator previous and next for the whole set of
 282  * unicode characters with normalization on.
 283  */
 284 static void TestNormalizedUnicodeChar()
 285 {
 286     UChar source[0x100];
 287     UCollator *th_th;
 288     UCollationElements *iter;
 289     UErrorCode status = U_ZERO_ERROR;
 290     UChar codepoint;
 291
 292     UChar *test;
 293     /* thai should have normalization on */
 294     th_th = ucol_open("th_TH", &status);
 295     if (U_FAILURE(status)){
 296         log_err("ERROR: in creation of thai collation using ucol_open()\n %s\n",
 297               myErrorName(status));
 298         return;
 299     }
 300
 301     for (codepoint = 1; codepoint < 0xFFFE;)
 302     {
 303       test = source;
 304
 305       while (codepoint % 0xFF != 0)
 306       {
 307         if (u_isdefined(codepoint))
 308           *(test ++) = codepoint;
 309         codepoint ++;
 310       }
 311
 312       if (u_isdefined(codepoint))
 313         *(test ++) = codepoint;
 314
 315       if (codepoint != 0xFFFF)
 316         codepoint ++;
 317
 318       *test = 0;
 319       iter=ucol_openElements(th_th, source, u_strlen(source), &status);
 320       if(U_FAILURE(status)){
 321           log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
 322               myErrorName(status));
 323             ucol_close(th_th);
 324           return;
 325       }
 326
 327       backAndForth(iter);
 328       ucol_closeElements(iter);
 329
 330       iter=ucol_openElements(th_th, source, -1, &status);
 331       if(U_FAILURE(status)){
 332           log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
 333               myErrorName(status));
 334             ucol_close(th_th);
 335           return;
 336       }
 337
 338       backAndForth(iter);
 339       ucol_closeElements(iter);
 340     }
 341
 342     ucol_close(th_th);
 343 }
 344
 345 /**
 346 * Test the incremental normalization
 347 */
 348 static void TestNormalization()
 349 {
 350           UErrorCode          status = U_ZERO_ERROR;
 351     const char               *str    =
 352                             "&a < \\u0300\\u0315 < A\\u0300\\u0315 < \\u0316\\u0315B < \\u0316\\u0300\\u0315";
 353           UCollator          *coll;
 354           UChar               rule[50];
 355           int                 rulelen = u_unescape(str, rule, 50);
 356           int                 count = 0;
 357     const char                *testdata[] =
 358                         {"\\u1ED9", "o\\u0323\\u0302",
 359                         "\\u0300\\u0315", "\\u0315\\u0300",
 360                         "A\\u0300\\u0315B", "A\\u0315\\u0300B",
 361                         "A\\u0316\\u0315B", "A\\u0315\\u0316B",
 362                         "\\u0316\\u0300\\u0315", "\\u0315\\u0300\\u0316",
 363                         "A\\u0316\\u0300\\u0315B", "A\\u0315\\u0300\\u0316B",
 364                         "\\u0316\\u0315\\u0300", "A\\u0316\\u0315\\u0300B"};
 365     int32_t   srclen;
 366     UChar source[10];
 367     UCollationElements *iter;
 368
 369     coll = ucol_openRules(rule, rulelen, UCOL_ON, UCOL_TERTIARY, NULL, &status);
 370     ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
 371     if (U_FAILURE(status)){
 372         log_err("ERROR: in creation of collator using ucol_openRules()\n %s\n",
 373               myErrorName(status));
 374         return;
 375     }
 376
 377     srclen = u_unescape(testdata[0], source, 10);
 378     iter = ucol_openElements(coll, source, srclen, &status);
 379     backAndForth(iter);
 380     ucol_closeElements(iter);
 381
 382     srclen = u_unescape(testdata[1], source, 10);
 383     iter = ucol_openElements(coll, source, srclen, &status);
 384     backAndForth(iter);
 385     ucol_closeElements(iter);
 386
 387     while (count < 12) {
 388         srclen = u_unescape(testdata[count], source, 10);
 389         iter = ucol_openElements(coll, source, srclen, &status);
 390
 391         if (U_FAILURE(status)){
 392             log_err("ERROR: in creation of collator element iterator\n %s\n",
 393                   myErrorName(status));
 394             return;
 395         }
 396         backAndForth(iter);
 397         ucol_closeElements(iter);
 398
 399         iter = ucol_openElements(coll, source, -1, &status);
 400
 401         if (U_FAILURE(status)){
 402             log_err("ERROR: in creation of collator element iterator\n %s\n",
 403                   myErrorName(status));
 404             return;
 405         }
 406         backAndForth(iter);
 407         ucol_closeElements(iter);
 408         count ++;
 409     }
 410     ucol_close(coll);
 411 }
 412
 413 /**
 414  * Test for CollationElementIterator.previous()
 415  *
 416  * @bug 4108758 - Make sure it works with contracting characters
 417  *
 418  */
 419 static void TestPrevious()
 420 {
 421     UCollator *coll=NULL;
 422     UChar rule[50];
 423     UChar *source;
 424     UCollator *c1, *c2, *c3;
 425     UCollationElements *iter;
 426     UErrorCode status = U_ZERO_ERROR;
 427
 428     test1=(UChar*)malloc(sizeof(UChar) * 50);
 429     test2=(UChar*)malloc(sizeof(UChar) * 50);
 430     u_uastrcpy(test1, "What subset of all possible test cases?");
 431     u_uastrcpy(test2, "has the highest probability of detecting");
 432     coll = ucol_open("en_US", &status);
 433
 434     iter=ucol_openElements(coll, test1, u_strlen(test1), &status);
 435     log_verbose("English locale testing back and forth\n");
 436     if(U_FAILURE(status)){
 437         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
 438             myErrorName(status));
 439         ucol_close(coll);
 440         return;
 441     }
 442     /* A basic test to see if it's working at all */
 443     backAndForth(iter);
 444     ucol_closeElements(iter);
 445     ucol_close(coll);
 446
 447     /* Test with a contracting character sequence */
 448     u_uastrcpy(rule, "&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH");
 449     c1 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
 450
 451     log_verbose("Contraction rule testing back and forth with no normalization\n");
 452
 453     if (c1 == NULL || U_FAILURE(status))
 454     {
 455         log_err("Couldn't create a RuleBasedCollator with a contracting sequence\n %s\n",
 456             myErrorName(status));
 457         return;
 458     }
 459     source=(UChar*)malloc(sizeof(UChar) * 20);
 460     u_uastrcpy(source, "abchdcba");
 461     iter=ucol_openElements(c1, source, u_strlen(source), &status);
 462     if(U_FAILURE(status)){
 463         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
 464             myErrorName(status));
 465         return;
 466     }
 467     backAndForth(iter);
 468     ucol_closeElements(iter);
 469     ucol_close(c1);
 470
 471     /* Test with an expanding character sequence */
 472     u_uastrcpy(rule, "&a < b < c/abd < d");
 473     c2 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
 474     log_verbose("Expansion rule testing back and forth with no normalization\n");
 475     if (c2 == NULL || U_FAILURE(status))
 476     {
 477         log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
 478             myErrorName(status));
 479         return;
 480     }
 481     u_uastrcpy(source, "abcd");
 482     iter=ucol_openElements(c2, source, u_strlen(source), &status);
 483     if(U_FAILURE(status)){
 484         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
 485             myErrorName(status));
 486         return;
 487     }
 488     backAndForth(iter);
 489     ucol_closeElements(iter);
 490     ucol_close(c2);
 491     /* Now try both */
 492     u_uastrcpy(rule, "&a < b < c/aba < d < z < ch");
 493     c3 = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,  UCOL_DEFAULT_STRENGTH,NULL, &status);
 494     log_verbose("Expansion/contraction rule testing back and forth with no normalization\n");
 495
 496     if (c3 == NULL || U_FAILURE(status))
 497     {
 498         log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
 499             myErrorName(status));
 500         return;
 501     }
 502     u_uastrcpy(source, "abcdbchdc");
 503     iter=ucol_openElements(c3, source, u_strlen(source), &status);
 504     if(U_FAILURE(status)){
 505         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
 506             myErrorName(status));
 507         return;
 508     }
 509     backAndForth(iter);
 510     ucol_closeElements(iter);
 511     ucol_close(c3);
 512     source[0] = 0x0e41;
 513     source[1] = 0x0e02;
 514     source[2] = 0x0e41;
 515     source[3] = 0x0e02;
 516     source[4] = 0x0e27;
 517     source[5] = 0x61;
 518     source[6] = 0x62;
 519     source[7] = 0x63;
 520     source[8] = 0;
 521
 522     coll = ucol_open("th_TH", &status);
 523     log_verbose("Thai locale testing back and forth with normalization\n");
 524     iter=ucol_openElements(coll, source, u_strlen(source), &status);
 525     if(U_FAILURE(status)){
 526         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
 527             myErrorName(status));
 528         return;
 529     }
 530     backAndForth(iter);
 531     ucol_closeElements(iter);
 532     ucol_close(coll);
 533
 534     /* prev test */
 535     source[0] = 0x0061;
 536     source[1] = 0x30CF;
 537     source[2] = 0x3099;
 538     source[3] = 0x30FC;
 539     source[4] = 0;
 540
 541     coll = ucol_open("ja_JP", &status);
 542     log_verbose("Japanese locale testing back and forth with normalization\n");
 543     iter=ucol_openElements(coll, source, u_strlen(source), &status);
 544     if(U_FAILURE(status)){
 545         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
 546             myErrorName(status));
 547         return;
 548     }
 549     backAndForth(iter);
 550     ucol_closeElements(iter);
 551     ucol_close(coll);
 552
 553     free(source);
 554     free(test1);
 555     free(test2);
 556 }
 557
 558 /**
 559  * Test for getOffset() and setOffset()
 560  */
 561 static void TestOffset()
 562 {
 563     UErrorCode status= U_ZERO_ERROR;
 564     UCollator *en_us=NULL;
 565     UCollationElements *iter, *pristine;
 566     int32_t offset;
 567     int32_t *orders;
 568     int32_t orderLength=0;
 569     int     count = 0;
 570     test1=(UChar*)malloc(sizeof(UChar) * 50);
 571     test2=(UChar*)malloc(sizeof(UChar) * 50);
 572     u_uastrcpy(test1, "What subset of all possible test cases?");
 573     u_uastrcpy(test2, "has the highest probability of detecting");
 574     en_us = ucol_open("en_US", &status);
 575     log_verbose("Testing getOffset and setOffset for collations\n");
 576     iter = ucol_openElements(en_us, test1, u_strlen(test1), &status);
 577     if(U_FAILURE(status)){
 578         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
 579             myErrorName(status));
 580         ucol_close(en_us);
 581         return;
 582     }
 583
 584     /* testing boundaries */
 585     ucol_setOffset(iter, 0, &status);
 586     if (U_FAILURE(status) || ucol_previous(iter, &status) != UCOL_NULLORDER) {
 587         log_err("Error: After setting offset to 0, we should be at the end "
 588                 "of the backwards iteration");
 589     }
 590     ucol_setOffset(iter, u_strlen(test1), &status);
 591     if (U_FAILURE(status) || ucol_next(iter, &status) != UCOL_NULLORDER) {
 592         log_err("Error: After setting offset to end of the string, we should "
 593                 "be at the end of the backwards iteration");
 594     }
 595
 596     /* Run all the way through the iterator, then get the offset */
 597
 598     orders = getOrders(iter, &orderLength);
 599
 600     offset = ucol_getOffset(iter);
 601
 602     if (offset != u_strlen(test1))
 603     {
 604         log_err("offset at end != length %d vs %d\n", offset,
 605             u_strlen(test1) );
 606     }
 607
 608     /* Now set the offset back to the beginning and see if it works */
 609     pristine=ucol_openElements(en_us, test1, u_strlen(test1), &status);
 610     if(U_FAILURE(status)){
 611         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
 612             myErrorName(status));
 613     ucol_close(en_us);
 614         return;
 615     }
 616     status = U_ZERO_ERROR;
 617
 618     ucol_setOffset(iter, 0, &status);
 619     if (U_FAILURE(status))
 620     {
 621         log_err("setOffset failed. %s\n",    myErrorName(status));
 622     }
 623     else
 624     {
 625         assertEqual(iter, pristine);
 626     }
 627
 628     ucol_closeElements(pristine);
 629     ucol_closeElements(iter);
 630     free(orders);
 631
 632     /* testing offsets in normalization buffer */
 633     test1[0] = 0x61;
 634     test1[1] = 0x300;
 635     test1[2] = 0x316;
 636     test1[3] = 0x62;
 637     test1[4] = 0;
 638     ucol_setAttribute(en_us, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
 639     iter = ucol_openElements(en_us, test1, 4, &status);
 640     if(U_FAILURE(status)){
 641         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
 642             myErrorName(status));
 643         ucol_close(en_us);
 644         return;
 645     }
 646
 647     count = 0;
 648     while (ucol_next(iter, &status) != UCOL_NULLORDER &&
 649         U_SUCCESS(status)) {
 650         switch (count) {
 651         case 0:
 652             if (ucol_getOffset(iter) != 1) {
 653                 log_err("ERROR: Offset of iteration should be 0\n");
 654             }
 655             break;
 656         case 3:
 657             if (ucol_getOffset(iter) != 4) {
 658                 log_err("ERROR: Offset of iteration should be 4\n");
 659             }
 660             break;
 661         default:
 662             if (ucol_getOffset(iter) != 3) {
 663                 log_err("ERROR: Offset of iteration should be 3\n");
 664             }
 665         }
 666         count ++;
 667     }
 668
 669     ucol_reset(iter);
 670     count = 0;
 671     while (ucol_previous(iter, &status) != UCOL_NULLORDER &&
 672         U_SUCCESS(status)) {
 673         switch (count) {
 674         case 0:
 675             if (ucol_getOffset(iter) != 3) {
 676                 log_err("ERROR: Offset of iteration should be 3\n");
 677             }
 678             break;
 679         default:
 680             if (ucol_getOffset(iter) != 0) {
 681                 log_err("ERROR: Offset of iteration should be 0\n");
 682             }
 683         }
 684         count ++;
 685     }
 686
 687     if(U_FAILURE(status)){
 688         log_err("ERROR: in iterating collation elements %s\n",
 689             myErrorName(status));
 690     }
 691
 692     ucol_closeElements(iter);
 693     ucol_close(en_us);
 694     free(test1);
 695     free(test2);
 696 }
 697
 698 /**
 699  * Test for setText()
 700  */
 701 static void TestSetText()
 702 {
 703     int32_t c,i;
 704     UErrorCode status = U_ZERO_ERROR;
 705     UCollator *en_us=NULL;
 706     UCollationElements *iter1, *iter2;
 707     test1=(UChar*)malloc(sizeof(UChar) * 50);
 708     test2=(UChar*)malloc(sizeof(UChar) * 50);
 709     u_uastrcpy(test1, "What subset of all possible test cases?");
 710     u_uastrcpy(test2, "has the highest probability of detecting");
 711     en_us = ucol_open("en_US", &status);
 712     log_verbose("testing setText for Collation elements\n");
 713     iter1=ucol_openElements(en_us, test1, u_strlen(test1), &status);
 714     if(U_FAILURE(status)){
 715         log_err("ERROR: in creation of collation element iterator1 using ucol_openElements()\n %s\n",
 716             myErrorName(status));
 717     ucol_close(en_us);
 718         return;
 719     }
 720     iter2=ucol_openElements(en_us, test2, u_strlen(test2), &status);
 721     if(U_FAILURE(status)){
 722         log_err("ERROR: in creation of collation element iterator2 using ucol_openElements()\n %s\n",
 723             myErrorName(status));
 724     ucol_close(en_us);
 725         return;
 726     }
 727
 728     /* Run through the second iterator just to exercise it */
 729     c = ucol_next(iter2, &status);
 730     i = 0;
 731
 732     while ( ++i < 10 && (c != UCOL_NULLORDER))
 733     {
 734         if (U_FAILURE(status))
 735         {
 736             log_err("iter2->next() returned an error. %s\n", myErrorName(status));
 737             ucol_closeElements(iter2);
 738             ucol_closeElements(iter1);
 739     ucol_close(en_us);
 740             return;
 741         }
 742
 743         c = ucol_next(iter2, &status);
 744     }
 745
 746     /* Now set it to point to the same string as the first iterator */
 747     ucol_setText(iter2, test1, u_strlen(test1), &status);
 748     if (U_FAILURE(status))
 749     {
 750         log_err("call to iter2->setText(test1) failed. %s\n", myErrorName(status));
 751     }
 752     else
 753     {
 754         assertEqual(iter1, iter2);
 755     }
 756
 757     /* Now set it to point to a null string with fake length*/
 758     ucol_setText(iter2, NULL, 2, &status);
 759     if (U_FAILURE(status))
 760     {
 761         log_err("call to iter2->setText(null) failed. %s\n", myErrorName(status));
 762     }
 763     else
 764     {
 765         if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
 766             log_err("iter2 with null text expected to return UCOL_NULLORDER\n");
 767         }
 768     }
 769
 770     ucol_closeElements(iter2);
 771     ucol_closeElements(iter1);
 772     ucol_close(en_us);
 773     free(test1);
 774     free(test2);
 775 }
 776
 777 /** @bug 4108762
 778  * Test for getMaxExpansion()
 779  */
 780 static void TestMaxExpansion()
 781 {
 782     UErrorCode          status = U_ZERO_ERROR;
 783     UCollator          *coll   ;/*= ucol_open("en_US", &status);*/
 784     UChar               ch     = 0;
 785     UChar32             unassigned = 0xEFFFD;
 786     UChar               supplementary[2];
 787     uint32_t            index = 0;
 788     UBool               isError = FALSE;
 789     uint32_t            sorder = 0;
 790     UCollationElements *iter   ;/*= ucol_openElements(coll, &ch, 1, &status);*/
 791     uint32_t            temporder = 0;
 792
 793     UChar rule[256];
 794     u_uastrcpy(rule, "&a < ab < c/aba < d < z < ch");
 795     coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
 796         UCOL_DEFAULT_STRENGTH,NULL, &status);
 797     if(U_SUCCESS(status) && coll) {
 798       iter = ucol_openElements(coll, &ch, 1, &status);
 799
 800       while (ch < 0xFFFF && U_SUCCESS(status)) {
 801           int      count = 1;
 802           uint32_t order;
 803           int32_t  size = 0;
 804
 805           ch ++;
 806
 807           ucol_setText(iter, &ch, 1, &status);
 808           order = ucol_previous(iter, &status);
 809
 810           /* thai management */
 811           if (order == 0)
 812               order = ucol_previous(iter, &status);
 813
 814           while (U_SUCCESS(status) &&
 815               ucol_previous(iter, &status) != UCOL_NULLORDER) {
 816               count ++;
 817           }
 818
 819           size = ucol_getMaxExpansion(iter, order);
 820           if (U_FAILURE(status) || size < count) {
 821               log_err("Failure at codepoint %d, maximum expansion count < %d\n",
 822                   ch, count);
 823           }
 824       }
 825
 826       /* testing for exact max expansion */
 827       ch = 0;
 828       while (ch < 0x61) {
 829           uint32_t order;
 830           int32_t  size;
 831           ucol_setText(iter, &ch, 1, &status);
 832           order = ucol_previous(iter, &status);
 833           size  = ucol_getMaxExpansion(iter, order);
 834           if (U_FAILURE(status) || size != 1) {
 835               log_err("Failure at codepoint %d, maximum expansion count < %d\n",
 836                   ch, 1);
 837           }
 838           ch ++;
 839       }
 840
 841       ch = 0x63;
 842       ucol_setText(iter, &ch, 1, &status);
 843       temporder = ucol_previous(iter, &status);
 844
 845       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 3) {
 846           log_err("Failure at codepoint %d, maximum expansion count != %d\n",
 847                   ch, 3);
 848       }
 849
 850       ch = 0x64;
 851       ucol_setText(iter, &ch, 1, &status);
 852       temporder = ucol_previous(iter, &status);
 853
 854       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 1) {
 855           log_err("Failure at codepoint %d, maximum expansion count != %d\n",
 856                   ch, 3);
 857       }
 858
 859       U16_APPEND(supplementary, index, 2, unassigned, isError);
 860       ucol_setText(iter, supplementary, 2, &status);
 861       sorder = ucol_previous(iter, &status);
 862
 863       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, sorder) != 2) {
 864           log_err("Failure at codepoint %d, maximum expansion count < %d\n",
 865                   ch, 2);
 866       }
 867
 868       /* testing jamo */
 869       ch = 0x1165;
 870
 871       ucol_setText(iter, &ch, 1, &status);
 872       temporder = ucol_previous(iter, &status);
 873       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) > 3) {
 874           log_err("Failure at codepoint %d, maximum expansion count > %d\n",
 875                   ch, 3);
 876       }
 877
 878       ucol_closeElements(iter);
 879       ucol_close(coll);
 880
 881       /* testing special jamo &a<\u1160 */
 882       rule[0] = 0x26;
 883       rule[1] = 0x71;
 884       rule[2] = 0x3c;
 885       rule[3] = 0x1165;
 886       rule[4] = 0x2f;
 887       rule[5] = 0x71;
 888       rule[6] = 0x71;
 889       rule[7] = 0x71;
 890       rule[8] = 0x71;
 891       rule[9] = 0;
 892
 893       coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
 894           UCOL_DEFAULT_STRENGTH,NULL, &status);
 895       iter = ucol_openElements(coll, &ch, 1, &status);
 896
 897       temporder = ucol_previous(iter, &status);
 898       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 6) {
 899           log_err("Failure at codepoint %d, maximum expansion count > %d\n",
 900                   ch, 5);
 901       }
 902
 903       ucol_closeElements(iter);
 904       ucol_close(coll);
 905     } else {
 906       log_data_err("Couldn't open collator\n");
 907     }
 908
 909 }
 910
 911
 912 static void assertEqual(UCollationElements *i1, UCollationElements *i2)
 913 {
 914     int32_t c1, c2;
 915     int32_t count = 0;
 916     UErrorCode status = U_ZERO_ERROR;
 917
 918     do
 919     {
 920         c1 = ucol_next(i1, &status);
 921         c2 = ucol_next(i2, &status);
 922
 923         if (c1 != c2)
 924         {
 925             log_err("Error in iteration %d assetEqual between\n  %d  and   %d, they are not equal\n", count, c1, c2);
 926             break;
 927         }
 928
 929         count += 1;
 930     }
 931     while (c1 != UCOL_NULLORDER);
 932 }
 933
 934 /**
 935  * Testing iterators with extremely small buffers
 936  */
 937 static void TestSmallBuffer()
 938 {
 939     UErrorCode          status = U_ZERO_ERROR;
 940     UCollator          *coll;
 941     UCollationElements *testiter,
 942                        *iter;
 943     int32_t             count = 0;
 944     int32_t            *testorders,
 945                        *orders;
 946
 947     UChar teststr[500];
 948     UChar str[] = {0x300, 0x31A, 0};
 949     /*
 950     creating a long string of decomposable characters,
 951     since by default the writable buffer is of size 256
 952     */
 953     while (count < 500) {
 954         if ((count & 1) == 0) {
 955             teststr[count ++] = 0x300;
 956         }
 957         else {
 958             teststr[count ++] = 0x31A;
 959         }
 960     }
 961
 962     coll = ucol_open("th_TH", &status);
 963     if(U_SUCCESS(status) && coll) {
 964       testiter = ucol_openElements(coll, teststr, 500, &status);
 965       iter = ucol_openElements(coll, str, 2, &status);
 966
 967       orders     = getOrders(iter, &count);
 968       if (count != 2) {
 969           log_err("Error collation elements size is not 2 for \\u0300\\u031A\n");
 970       }
 971
 972       /*
 973       this will rearrange the string data to 250 characters of 0x300 first then
 974       250 characters of 0x031A
 975       */
 976       testorders = getOrders(testiter, &count);
 977
 978       if (count != 500) {
 979           log_err("Error decomposition does not give the right sized collation elements\n");
 980       }
 981
 982       while (count != 0) {
 983           /* UCA collation element for 0x0F76 */
 984           if ((count > 250 && testorders[-- count] != orders[1]) ||
 985               (count <= 250 && testorders[-- count] != orders[0])) {
 986               log_err("Error decomposition does not give the right collation element at %d count\n", count);
 987               break;
 988           }
 989       }
 990
 991       free(testorders);
 992       free(orders);
 993
 994       ucol_reset(testiter);
 995       /* ensures that the writable buffer was cleared */
 996       if (testiter->iteratordata_.writableBuffer !=
 997           testiter->iteratordata_.stackWritableBuffer) {
 998           log_err("Error Writable buffer in collation element iterator not reset\n");
 999       }
1000
1001       /* ensures closing of elements done properly to clear writable buffer */
1002       ucol_next(testiter, &status);
1003       ucol_next(testiter, &status);
1004       ucol_closeElements(testiter);
1005       ucol_closeElements(iter);
1006       ucol_close(coll);
1007     } else {
1008       log_data_err("Couldn't open collator\n");
1009     }
1010 }
1011
1012 /**
1013 * Sniplets of code from genuca
1014 */
1015 static int32_t hex2num(char hex) {
1016     if(hex>='0' && hex <='9') {
1017         return hex-'0';
1018     } else if(hex>='a' && hex<='f') {
1019         return hex-'a'+10;
1020     } else if(hex>='A' && hex<='F') {
1021         return hex-'A'+10;
1022     } else {
1023         return 0;
1024     }
1025 }
1026
1027 /**
1028 * Getting codepoints from a string
1029 * @param str character string contain codepoints seperated by space and ended
1030 *        by a semicolon
1031 * @param codepoints array for storage, assuming size > 5
1032 * @return position at the end of the codepoint section
1033 */
1034 static char * getCodePoints(char *str, UChar *codepoints) {
1035     char *pStartCP = str;
1036     char *pEndCP   = str + 4;
1037
1038     *codepoints = (UChar)((hex2num(*pStartCP) << 12) |
1039                           (hex2num(*(pStartCP + 1)) << 8) |
1040                           (hex2num(*(pStartCP + 2)) << 4) |
1041                           (hex2num(*(pStartCP + 3))));
1042     codepoints ++;
1043     while (*pEndCP != ';') {
1044         pStartCP = pEndCP + 1;
1045         *codepoints = (UChar)((hex2num(*pStartCP) << 12) |
1046                           (hex2num(*(pStartCP + 1)) << 8) |
1047                           (hex2num(*(pStartCP + 2)) << 4) |
1048                           (hex2num(*(pStartCP + 3))));
1049         codepoints ++;
1050         pEndCP = pStartCP + 4;
1051     }
1052     *codepoints = 0;
1053     return pEndCP + 1;
1054 }
1055
1056 /**
1057 * Sniplets of code from genuca
1058 */
1059 static int32_t
1060 readElement(char **from, char *to, char separator, UErrorCode *status)
1061 {
1062     if (U_SUCCESS(*status)) {
1063         char    buffer[1024];
1064         int32_t i = 0;
1065         while (**from != separator) {
1066             if (**from != ' ') {
1067                 *(buffer+i++) = **from;
1068             }
1069             (*from)++;
1070         }
1071         (*from)++;
1072         *(buffer + i) = 0;
1073         strcpy(to, buffer);
1074         return i/2;
1075     }
1076
1077     return 0;
1078 }
1079
1080 /**
1081 * Sniplets of code from genuca
1082 */
1083 static uint32_t
1084 getSingleCEValue(char *primary, char *secondary, char *tertiary,
1085                           UErrorCode *status)
1086 {
1087     if (U_SUCCESS(*status)) {
1088         uint32_t  value    = 0;
1089         char      primsave = '\0';
1090         char      secsave  = '\0';
1091         char      tersave  = '\0';
1092         char     *primend  = primary+4;
1093         char     *secend   = secondary+2;
1094         char     *terend   = tertiary+2;
1095         uint32_t  primvalue;
1096         uint32_t  secvalue;
1097         uint32_t  tervalue;
1098
1099         if (uprv_strlen(primary) > 4) {
1100             primsave = *primend;
1101             *primend = '\0';
1102         }
1103
1104         if (uprv_strlen(secondary) > 2) {
1105             secsave = *secend;
1106             *secend = '\0';
1107         }
1108
1109         if (uprv_strlen(tertiary) > 2) {
1110             tersave = *terend;
1111             *terend = '\0';
1112         }
1113
1114         primvalue = (*primary!='\0')?uprv_strtoul(primary, &primend, 16):0;
1115         secvalue  = (*secondary!='\0')?uprv_strtoul(secondary, &secend, 16):0;
1116         tervalue  = (*tertiary!='\0')?uprv_strtoul(tertiary, &terend, 16):0;
1117         if(primvalue <= 0xFF) {
1118           primvalue <<= 8;
1119         }
1120
1121         value = ((primvalue << UCOL_PRIMARYORDERSHIFT) & UCOL_PRIMARYORDERMASK)
1122            | ((secvalue << UCOL_SECONDARYORDERSHIFT) & UCOL_SECONDARYORDERMASK)
1123            | (tervalue & UCOL_TERTIARYORDERMASK);
1124
1125         if(primsave!='\0') {
1126             *primend = primsave;
1127         }
1128         if(secsave!='\0') {
1129             *secend = secsave;
1130         }
1131         if(tersave!='\0') {
1132             *terend = tersave;
1133         }
1134         return value;
1135     }
1136     return 0;
1137 }
1138
1139 /**
1140 * Getting collation elements generated from a string
1141 * @param str character string contain collation elements contained in [] and
1142 *        seperated by space
1143 * @param ce array for storage, assuming size > 20
1144 * @param status error status
1145 * @return position at the end of the codepoint section
1146 */
1147 static char * getCEs(char *str, uint32_t *ces, UErrorCode *status) {
1148     char       *pStartCP     = uprv_strchr(str, '[');
1149     int         count        = 0;
1150     char       *pEndCP;
1151     char        primary[100];
1152     char        secondary[100];
1153     char        tertiary[100];
1154
1155     while (*pStartCP == '[') {
1156         uint32_t primarycount   = 0;
1157         uint32_t secondarycount = 0;
1158         uint32_t tertiarycount  = 0;
1159         uint32_t CEi = 1;
1160         pEndCP = strchr(pStartCP, ']');
1161         if(pEndCP == NULL) {
1162             break;
1163         }
1164         pStartCP ++;
1165
1166         primarycount   = readElement(&pStartCP, primary, ',', status);
1167         secondarycount = readElement(&pStartCP, secondary, ',', status);
1168         tertiarycount  = readElement(&pStartCP, tertiary, ']', status);
1169
1170         /* I want to get the CEs entered right here, including continuation */
1171         ces[count ++] = getSingleCEValue(primary, secondary, tertiary, status);
1172         if (U_FAILURE(*status)) {
1173             break;
1174         }
1175
1176         while (2 * CEi < primarycount || CEi < secondarycount ||
1177                CEi < tertiarycount) {
1178             uint32_t value = UCOL_CONTINUATION_MARKER; /* Continuation marker */
1179             if (2 * CEi < primarycount) {
1180                 value |= ((hex2num(*(primary + 4 * CEi)) & 0xF) << 28);
1181                 value |= ((hex2num(*(primary + 4 * CEi + 1)) & 0xF) << 24);
1182             }
1183
1184             if (2 * CEi + 1 < primarycount) {
1185                 value |= ((hex2num(*(primary + 4 * CEi + 2)) & 0xF) << 20);
1186                 value |= ((hex2num(*(primary + 4 * CEi + 3)) &0xF) << 16);
1187             }
1188
1189             if (CEi < secondarycount) {
1190                 value |= ((hex2num(*(secondary + 2 * CEi)) & 0xF) << 12);
1191                 value |= ((hex2num(*(secondary + 2 * CEi + 1)) & 0xF) << 8);
1192             }
1193
1194             if (CEi < tertiarycount) {
1195                 value |= ((hex2num(*(tertiary + 2 * CEi)) & 0x3) << 4);
1196                 value |= (hex2num(*(tertiary + 2 * CEi + 1)) & 0xF);
1197             }
1198
1199             CEi ++;
1200             ces[count ++] = value;
1201         }
1202
1203       pStartCP = pEndCP + 1;
1204     }
1205     ces[count] = 0;
1206     return pStartCP;
1207 }
1208
1209 /**
1210 * Getting the FractionalUCA.txt file stream
1211 */
1212 static FileStream * getFractionalUCA(void)
1213 {
1214     char        newPath[256];
1215     char        backupPath[256];
1216     FileStream *result = NULL;
1217
1218     /* Look inside ICU_DATA first */
1219     uprv_strcpy(newPath, ctest_dataSrcDir());
1220     uprv_strcat(newPath, "unidata" U_FILE_SEP_STRING );
1221     uprv_strcat(newPath, "FractionalUCA.txt");
1222
1223     /* As a fallback, try to guess where the source data was located
1224      *   at the time ICU was built, and look there.
1225      */
1226 #if defined (U_TOPSRCDIR)
1227     strcpy(backupPath, U_TOPSRCDIR  U_FILE_SEP_STRING "data");
1228 #else
1229     {
1230         UErrorCode errorCode = U_ZERO_ERROR;
1231         strcpy(backupPath, loadTestData(&errorCode));
1232         strcat(backupPath, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING "data");
1233     }
1234 #endif
1235     strcat(backupPath, U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING "FractionalUCA.txt");
1236
1237     result = T_FileStream_open(newPath, "rb");
1238
1239     if (result == NULL) {
1240         result = T_FileStream_open(backupPath, "rb");
1241         if (result == NULL) {
1242             log_err("Failed to open either %s or %s\n", newPath, backupPath);
1243         }
1244     }
1245     return result;
1246 }
1247
1248 /**
1249 * Testing the CEs returned by the iterator
1250 */
1251 static void TestCEs() {
1252     FileStream *file = NULL;
1253     char        line[1024];
1254     char       *str;
1255     UChar       codepoints[5];
1256     uint32_t    ces[20];
1257     UErrorCode  status = U_ZERO_ERROR;
1258     UCollator          *coll = ucol_open("", &status);
1259     uint32_t lineNo = 0;
1260
1261     if (U_FAILURE(status)) {
1262         log_err("Error in opening root collator\n");
1263         return;
1264     }
1265
1266     file = getFractionalUCA();
1267
1268     if (file == NULL) {
1269         log_err("*** unable to open input FractionalUCA.txt file ***\n");
1270         return;
1271     }
1272
1273
1274     while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
1275         int                 count = 0;
1276         UCollationElements *iter;
1277         lineNo++;
1278         /* skip this line if it is empty or a comment or is a return value
1279         or start of some variable section */
1280         if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
1281             line[0] == 0x000D || line[0] == '[') {
1282             continue;
1283         }
1284
1285         str = getCodePoints(line, codepoints);
1286
1287         /* these are 'fake' codepoints in the fractional UCA, and are used just
1288          * for positioning of indirect values. They should not go through this
1289          * test.
1290          */
1291         if(*codepoints == 0xFDD0) {
1292           continue;
1293         }
1294
1295         getCEs(str, ces, &status);
1296         if (U_FAILURE(status)) {
1297             log_err("Error in parsing collation elements in FractionalUCA.txt\n");
1298             break;
1299         }
1300         iter = ucol_openElements(coll, codepoints, -1, &status);
1301         if (U_FAILURE(status)) {
1302             log_err("Error in opening collation elements\n");
1303             break;
1304         }
1305         for (;;) {
1306             uint32_t ce = (uint32_t)ucol_next(iter, &status);
1307             if (ce == 0xFFFFFFFF) {
1308                 ce = 0;
1309             }
1310             /* we now unconditionally reorder Thai/Lao prevowels, so this
1311              * test would fail if we don't skip here.
1312              */
1313             if(UCOL_ISTHAIPREVOWEL(*codepoints) && ce == 0 && count == 0) {
1314               continue;
1315             }
1316             if (ce != ces[count] || U_FAILURE(status)) {
1317                 log_err("Collation elements in FractionalUCA.txt and iterators do not match!\n");
1318                 break;
1319             }
1320             if (ces[count] == 0) {
1321                 break;
1322             }
1323             count ++;
1324         }
1325         ucol_closeElements(iter);
1326     }
1327
1328     T_FileStream_close(file);
1329     ucol_close(coll);
1330 }
1331
1332 /**
1333 * Testing the discontigous contractions
1334 */
1335 static void TestDiscontiguos() {
1336     const char               *rulestr    =
1337                             "&z < AB < X\\u0300 < ABC < X\\u0300\\u0315";
1338           UChar               rule[50];
1339           int                 rulelen = u_unescape(rulestr, rule, 50);
1340     const char               *src[] = {
1341      "ADB", "ADBC", "A\\u0315B", "A\\u0315BC",
1342     /* base character blocked */
1343      "XD\\u0300", "XD\\u0300\\u0315",
1344     /* non blocking combining character */
1345      "X\\u0319\\u0300", "X\\u0319\\u0300\\u0315",
1346      /* blocking combining character */
1347      "X\\u0314\\u0300", "X\\u0314\\u0300\\u0315",
1348      /* contraction prefix */
1349      "ABDC", "AB\\u0315C","X\\u0300D\\u0315", "X\\u0300\\u0319\\u0315",
1350      "X\\u0300\\u031A\\u0315",
1351      /* ends not with a contraction character */
1352      "X\\u0319\\u0300D", "X\\u0319\\u0300\\u0315D", "X\\u0300D\\u0315D",
1353      "X\\u0300\\u0319\\u0315D", "X\\u0300\\u031A\\u0315D"
1354     };
1355     const char               *tgt[] = {
1356      /* non blocking combining character */
1357      "A D B", "A D BC", "A \\u0315 B", "A \\u0315 BC",
1358     /* base character blocked */
1359      "X D \\u0300", "X D \\u0300\\u0315",
1360     /* non blocking combining character */
1361      "X\\u0300 \\u0319", "X\\u0300\\u0315 \\u0319",
1362      /* blocking combining character */
1363      "X \\u0314 \\u0300", "X \\u0314 \\u0300\\u0315",
1364      /* contraction prefix */
1365      "AB DC", "AB \\u0315 C","X\\u0300 D \\u0315", "X\\u0300\\u0315 \\u0319",
1366      "X\\u0300 \\u031A \\u0315",
1367      /* ends not with a contraction character */
1368      "X\\u0300 \\u0319D", "X\\u0300\\u0315 \\u0319D", "X\\u0300 D\\u0315D",
1369      "X\\u0300\\u0315 \\u0319D", "X\\u0300 \\u031A\\u0315D"
1370     };
1371           int                 size   = 20;
1372           UCollator          *coll;
1373           UErrorCode          status    = U_ZERO_ERROR;
1374           int                 count     = 0;
1375           UCollationElements *iter;
1376           UCollationElements *resultiter;
1377
1378     coll       = ucol_openRules(rule, rulelen, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
1379     iter       = ucol_openElements(coll, rule, 1, &status);
1380     resultiter = ucol_openElements(coll, rule, 1, &status);
1381
1382     if (U_FAILURE(status)) {
1383         log_err("Error opening collation rules\n");
1384         return;
1385     }
1386
1387     while (count < size) {
1388         UChar  str[20];
1389         UChar  tstr[20];
1390         int    strLen = u_unescape(src[count], str, 20);
1391         UChar *s;
1392
1393         ucol_setText(iter, str, strLen, &status);
1394         if (U_FAILURE(status)) {
1395             log_err("Error opening collation iterator\n");
1396             return;
1397         }
1398
1399         u_unescape(tgt[count], tstr, 20);
1400         s = tstr;
1401
1402         log_verbose("count %d\n", count);
1403
1404         for (;;) {
1405             uint32_t  ce;
1406             UChar    *e = u_strchr(s, 0x20);
1407             if (e == 0) {
1408                 e = u_strchr(s, 0);
1409             }
1410             ucol_setText(resultiter, s, (int32_t)(e - s), &status);
1411             ce = ucol_next(resultiter, &status);
1412             if (U_FAILURE(status)) {
1413                 log_err("Error manipulating collation iterator\n");
1414                 return;
1415             }
1416             while (ce != UCOL_NULLORDER) {
1417                 if (ce != (uint32_t)ucol_next(iter, &status) ||
1418                     U_FAILURE(status)) {
1419                     log_err("Discontiguos contraction test mismatch\n");
1420                     return;
1421                 }
1422                 ce = ucol_next(resultiter, &status);
1423                 if (U_FAILURE(status)) {
1424                     log_err("Error getting next collation element\n");
1425                     return;
1426                 }
1427             }
1428             s = e + 1;
1429             if (*e == 0) {
1430                 break;
1431             }
1432         }
1433         ucol_reset(iter);
1434         backAndForth(iter);
1435         count ++;
1436     }
1437     ucol_closeElements(resultiter);
1438     ucol_closeElements(iter);
1439     ucol_close(coll);
1440 }
1441
1442 static void TestCEBufferOverflow()
1443 {
1444     UChar               str[UCOL_EXPAND_CE_BUFFER_SIZE + 1];
1445     UErrorCode          status = U_ZERO_ERROR;
1446     UChar               rule[10];
1447     UCollator          *coll;
1448     UCollationElements *iter;
1449
1450     u_uastrcpy(rule, "&z < AB");
1451     coll = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
1452     if (U_FAILURE(status)) {
1453         log_err("Rule based collator not created for testing ce buffer overflow\n");
1454         return;
1455     }
1456
1457     /* 0xDCDC is a trail surrogate hence deemed unsafe by the heuristic
1458     test. this will cause an overflow in getPrev */
1459     str[0] = 0x0041;    /* 'A' */
1460     /*uprv_memset(str + 1, 0xE0, sizeof(UChar) * UCOL_EXPAND_CE_BUFFER_SIZE);*/
1461     uprv_memset(str + 1, 0xDC, sizeof(UChar) * UCOL_EXPAND_CE_BUFFER_SIZE);
1462     str[UCOL_EXPAND_CE_BUFFER_SIZE] = 0x0042;   /* 'B' */
1463     iter = ucol_openElements(coll, str, UCOL_EXPAND_CE_BUFFER_SIZE + 1,
1464                              &status);
1465     if (ucol_previous(iter, &status) != UCOL_NULLORDER ||
1466         status != U_BUFFER_OVERFLOW_ERROR) {
1467         log_err("CE buffer expected to overflow with long string of trail surrogates\n");
1468     }
1469     ucol_closeElements(iter);
1470     ucol_close(coll);
1471 }
1472
1473 /**
1474 * Byte bounds checks. Checks if each byte in data is between upper and lower
1475 * inclusive.
1476 */
1477 static UBool checkByteBounds(uint32_t data, char upper, char lower)
1478 {
1479     int count = 4;
1480     while (count > 0) {
1481         char b = (char)(data & 0xFF);
1482         if (b > upper || b < lower) {
1483             return FALSE;
1484         }
1485         data = data >> 8;
1486         count --;
1487     }
1488     return TRUE;
1489 }
1490
1491 /**
1492 * Determines case of the string of codepoints.
1493 * If it is a multiple codepoints it has to treated as a contraction.
1494 */
1495 #if 0
1496 static uint8_t getCase(const UChar *s, uint32_t len) {
1497     UBool       lower = FALSE;
1498     UBool       upper = FALSE;
1499     UBool       title = FALSE;
1500     UErrorCode  status = U_ZERO_ERROR;
1501     UChar       str[256];
1502     const UChar      *ps = s;
1503
1504     if (len == 0) {
1505         return UCOL_LOWER_CASE;
1506     }
1507
1508     while (len > 0) {
1509         UChar c = *ps ++;
1510
1511         if (u_islower(c)) {
1512             lower = TRUE;
1513         }
1514         if (u_isupper(c)) {
1515             upper = TRUE;
1516         }
1517         if (u_istitle(c)) {
1518             title = TRUE;
1519         }
1520
1521         len --;
1522     }
1523     if ((lower && !upper && !title) || (!lower && !upper && !title)){
1524         return UCOL_LOWER_CASE;
1525     }
1526     if (upper && !lower && !title) {
1527         return UCOL_UPPER_CASE;
1528     }
1529     /* mix of cases here */
1530     /* len = unorm_normalize(s, len, UNORM_NFKD, 0, str, 256, &status);
1531     if (U_FAILURE(status)) {
1532         log_err("Error normalizing data string\n");
1533         return UCOL_LOWER_CASE;
1534     }*/
1535
1536     if ((title && len >= 2) || (lower && upper)) {
1537         return UCOL_MIXED_CASE;
1538     }
1539     if (u_isupper(s[0])) {
1540         return UCOL_UPPER_CASE;
1541     }
1542     return UCOL_LOWER_CASE;
1543 }
1544 #endif
1545
1546 /**
1547 * Checking collation element validity given the boundary arguments.
1548 */
1549 static UBool checkCEValidity(const UCollator *coll, const UChar *codepoints,
1550                              int length, uint32_t primarymax,
1551                              uint32_t secondarymax)
1552 {
1553     UErrorCode          status = U_ZERO_ERROR;
1554     UCollationElements *iter   = ucol_openElements(coll, codepoints, length,
1555                                                   &status);
1556     uint32_t            ce;
1557     UBool               first  = TRUE;
1558 /*
1559     UBool               upper  = FALSE;
1560     UBool               lower  = FALSE;
1561 */
1562
1563     if (U_FAILURE(status)) {
1564         log_err("Error creating iterator for testing validity\n");
1565     }
1566
1567     ce = ucol_next(iter, &status);
1568
1569     while (ce != UCOL_NULLORDER) {
1570        if (ce != 0) {
1571            uint32_t primary   = UCOL_PRIMARYORDER(ce);
1572            uint32_t secondary = UCOL_SECONDARYORDER(ce);
1573            uint32_t tertiary  = UCOL_TERTIARYORDER(ce);
1574 /*           uint32_t scasebits = tertiary & 0xC0;*/
1575
1576            if ((tertiary == 0 && secondary != 0) ||
1577                (tertiary < 0xC0 && secondary == 0 && primary != 0)) {
1578                /* n-1th level is not zero when the nth level is
1579                   except for continuations, this is wrong */
1580                log_err("Lower level weight not 0 when high level weight is 0\n");
1581                goto fail;
1582            }
1583            else {
1584                /* checks if any byte is illegal ie = 01 02 03. */
1585                if (checkByteBounds(ce, 0x3, 0x1)) {
1586                    log_err("Byte range in CE lies in illegal bounds 0x1 - 0x3\n");
1587                    goto fail;
1588                }
1589            }
1590            if ((primary != 0 && primary < primarymax)
1591                || ((primary & 0xFF) == 0xFF) || (((primary>>8) & 0xFF) == 0xFF)
1592                || ((primary & 0xFF) && ((primary & 0xFF) <= 0x03))
1593                || (((primary>>8) & 0xFF) && ((primary>>8) & 0xFF) <= 0x03)
1594                || (primary >= 0xFE00 && !isContinuation(ce))) {
1595                log_err("UCA primary weight out of bounds: %04X for string starting with %04X\n",
1596                    primary, codepoints[0]);
1597                goto fail;
1598            }
1599            /* case matching not done since data generated by ken */
1600            if (first) {
1601                if (secondary >= 6 && secondary <= secondarymax) {
1602                    log_err("Secondary weight out of range\n");
1603                    goto fail;
1604                }
1605                first = FALSE;
1606            }
1607        }
1608        ce   = ucol_next(iter, &status);
1609    }
1610    ucol_closeElements(iter);
1611    return TRUE;
1612 fail :
1613    ucol_closeElements(iter);
1614    return FALSE;
1615 }
1616
1617 static void TestCEValidity()
1618 {
1619     /* testing UCA collation elements */
1620     UErrorCode  status      = U_ZERO_ERROR;
1621     /* en_US has no tailorings */
1622     UCollator  *coll        = ucol_open("root", &status);
1623     /* tailored locales */
1624     char        locale[][11] = {"fr_FR", "ko_KR", "sh_YU", "th_TH", "zh_CN", "zh__PINYIN"};
1625     const char *loc;
1626     FileStream *file = getFractionalUCA();
1627     char        line[1024];
1628     UChar       codepoints[10];
1629     int         count = 0;
1630     int         maxCount = 0;
1631     UParseError parseError;
1632     if (U_FAILURE(status)) {
1633         log_err("en_US collator creation failed\n");
1634         return;
1635     }
1636     log_verbose("Testing UCA elements\n");
1637     if (file == NULL) {
1638         log_err("Fractional UCA data can not be opened\n");
1639         return;
1640     }
1641
1642     while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
1643         if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
1644             line[0] == 0x000D || line[0] == '[') {
1645             continue;
1646         }
1647
1648         getCodePoints(line, codepoints);
1649         checkCEValidity(coll, codepoints, u_strlen(codepoints), 5, 86);
1650     }
1651
1652     log_verbose("Testing UCA elements for the whole range of unicode characters\n");
1653     codepoints[0] = 0;
1654     while (codepoints[0] < 0xFFFF) {
1655         if (u_isdefined((UChar32)codepoints[0])) {
1656             checkCEValidity(coll, codepoints, 1, 5, 86);
1657         }
1658         codepoints[0] ++;
1659     }
1660
1661     ucol_close(coll);
1662
1663     /* testing tailored collation elements */
1664     log_verbose("Testing tailored elements\n");
1665     if(QUICK) {
1666         maxCount = sizeof(locale)/sizeof(locale[0]);
1667     } else {
1668         maxCount = uloc_countAvailable();
1669     }
1670     while (count < maxCount) {
1671         const UChar *rules = NULL,
1672                     *current = NULL;
1673         UChar *rulesCopy = NULL;
1674         int32_t ruleLen = 0;
1675
1676         uint32_t chOffset = 0;
1677         uint32_t chLen = 0;
1678         uint32_t exOffset = 0;
1679         uint32_t exLen = 0;
1680         uint32_t prefixOffset = 0;
1681         uint32_t prefixLen = 0;
1682         UBool    startOfRules = TRUE;
1683         UColOptionSet opts;
1684
1685         UColTokenParser src;
1686         uint32_t strength = 0;
1687         uint16_t specs = 0;
1688         if(QUICK) {
1689             loc = locale[count];
1690         } else {
1691             loc = uloc_getAvailable(count);
1692             if(!hasCollationElements(loc)) {
1693                 count++;
1694                 continue;
1695             }
1696         }
1697
1698         log_verbose("Testing CEs for %s\n", loc);
1699
1700         coll      = ucol_open(loc, &status);
1701         if (U_FAILURE(status)) {
1702             log_err("%s collator creation failed\n", loc);
1703             return;
1704         }
1705
1706         src.opts = &opts;
1707         rules = ucol_getRules(coll, &ruleLen);
1708
1709         if (ruleLen > 0) {
1710             rulesCopy = (UChar *)malloc((ruleLen +
1711                 UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar));
1712             uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar));
1713             src.current = src.source = rulesCopy;
1714             src.end = rulesCopy + ruleLen;
1715             src.extraCurrent = src.end;
1716             src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
1717
1718             while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,&status)) != NULL) {
1719               strength = src.parsedToken.strength;
1720               chOffset = src.parsedToken.charsOffset;
1721               chLen = src.parsedToken.charsLen;
1722               exOffset = src.parsedToken.extensionOffset;
1723               exLen = src.parsedToken.extensionLen;
1724               prefixOffset = src.parsedToken.prefixOffset;
1725               prefixLen = src.parsedToken.prefixLen;
1726               specs = src.parsedToken.flags;
1727
1728                 startOfRules = FALSE;
1729                 uprv_memcpy(codepoints, src.source + chOffset,
1730                                                        chLen * sizeof(UChar));
1731                 codepoints[chLen] = 0;
1732                 checkCEValidity(coll, codepoints, chLen, 4, 85);
1733             }
1734             free(rulesCopy);
1735         }
1736
1737         ucol_close(coll);
1738         count ++;
1739     }
1740     T_FileStream_close(file);
1741 }
1742
1743 static void printSortKeyError(const UChar   *codepoints, int length,
1744                                     uint8_t *sortkey, int sklen)
1745 {
1746     int count = 0;
1747     log_err("Sortkey not valid for ");
1748     while (length > 0) {
1749         log_err("0x%04x ", *codepoints);
1750         length --;
1751         codepoints ++;
1752     }
1753     log_err("\nSortkey : ");
1754     while (count < sklen) {
1755         log_err("0x%02x ", sortkey[count]);
1756         count ++;
1757     }
1758     log_err("\n");
1759 }
1760
1761 /**
1762 * Checking sort key validity for all levels
1763 */
1764 static UBool checkSortKeyValidity(UCollator *coll,
1765                                   const UChar *codepoints,
1766                                   int length)
1767 {
1768     UErrorCode status  = U_ZERO_ERROR;
1769     UCollationStrength strength[5] = {UCOL_PRIMARY, UCOL_SECONDARY,
1770                                       UCOL_TERTIARY, UCOL_QUATERNARY,
1771                                       UCOL_IDENTICAL};
1772     int        strengthlen = 5;
1773     int        index       = 0;
1774     int        caselevel   = 0;
1775
1776     while (caselevel < 1) {
1777         if (caselevel == 0) {
1778             ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_OFF, &status);
1779         }
1780         else {
1781             ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_ON, &status);
1782         }
1783
1784         while (index < strengthlen) {
1785             int        count01 = 0;
1786             uint32_t   count   = 0;
1787             uint8_t    sortkey[128];
1788             uint32_t   sklen;
1789
1790             ucol_setStrength(coll, strength[index]);
1791             sklen = ucol_getSortKey(coll, codepoints, length, sortkey, 128);
1792             while (sortkey[count] != 0) {
1793                 if (sortkey[count] == 2 || (sortkey[count] == 3 && count01 > 0 && index != 4)) {
1794                     printSortKeyError(codepoints, length, sortkey, sklen);
1795                     return FALSE;
1796                 }
1797                 if (sortkey[count] == 1) {
1798                     count01 ++;
1799                 }
1800                 count ++;
1801             }
1802
1803             if (count + 1 != sklen || (count01 != index + caselevel)) {
1804                 printSortKeyError(codepoints, length, sortkey, sklen);
1805                 return FALSE;
1806             }
1807             index ++;
1808         }
1809         caselevel ++;
1810     }
1811     return TRUE;
1812 }
1813
1814 static void TestSortKeyValidity(void)
1815 {
1816     /* testing UCA collation elements */
1817     UErrorCode  status      = U_ZERO_ERROR;
1818     /* en_US has no tailorings */
1819     UCollator  *coll        = ucol_open("en_US", &status);
1820     /* tailored locales */
1821     char        locale[][6] = {"fr_FR\0", "ko_KR\0", "sh_YU\0", "th_TH\0", "zh_CN\0"};
1822     FileStream *file = getFractionalUCA();
1823     char        line[1024];
1824     UChar       codepoints[10];
1825     int         count = 0;
1826     UParseError parseError;
1827     if (U_FAILURE(status)) {
1828         log_err("en_US collator creation failed\n");
1829         return;
1830     }
1831     log_verbose("Testing UCA elements\n");
1832     if (file == NULL) {
1833         log_err("Fractional UCA data can not be opened\n");
1834         return;
1835     }
1836
1837     while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
1838         if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
1839             line[0] == 0x000D || line[0] == '[') {
1840             continue;
1841         }
1842
1843         getCodePoints(line, codepoints);
1844         checkSortKeyValidity(coll, codepoints, u_strlen(codepoints));
1845     }
1846
1847     log_verbose("Testing UCA elements for the whole range of unicode characters\n");
1848     codepoints[0] = 0;
1849
1850     while (codepoints[0] < 0xFFFF) {
1851         if (u_isdefined((UChar32)codepoints[0])) {
1852             checkSortKeyValidity(coll, codepoints, 1);
1853         }
1854         codepoints[0] ++;
1855     }
1856
1857     ucol_close(coll);
1858
1859     /* testing tailored collation elements */
1860     log_verbose("Testing tailored elements\n");
1861     while (count < 5) {
1862         const UChar *rules = NULL,
1863                     *current = NULL;
1864         UChar *rulesCopy = NULL;
1865         int32_t ruleLen = 0;
1866
1867         uint32_t chOffset = 0;
1868         uint32_t chLen = 0;
1869         uint32_t exOffset = 0;
1870         uint32_t exLen = 0;
1871         uint32_t prefixOffset = 0;
1872         uint32_t prefixLen = 0;
1873         UBool    startOfRules = TRUE;
1874         UColOptionSet opts;
1875
1876         UColTokenParser src;
1877         uint32_t strength = 0;
1878         uint16_t specs = 0;
1879
1880         coll      = ucol_open(locale[count], &status);
1881         if (U_FAILURE(status)) {
1882             log_err("%s collator creation failed\n", locale[count]);
1883             return;
1884         }
1885
1886         src.opts = &opts;
1887         rules = ucol_getRules(coll, &ruleLen);
1888
1889         if (ruleLen > 0) {
1890             rulesCopy = (UChar *)malloc((ruleLen +
1891                 UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar));
1892             uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar));
1893             src.current = src.source = rulesCopy;
1894             src.end = rulesCopy + ruleLen;
1895             src.extraCurrent = src.end;
1896             src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
1897
1898             while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, &status)) != NULL) {
1899                 strength = src.parsedToken.strength;
1900                 chOffset = src.parsedToken.charsOffset;
1901                 chLen = src.parsedToken.charsLen;
1902                 exOffset = src.parsedToken.extensionOffset;
1903                 exLen = src.parsedToken.extensionLen;
1904                 prefixOffset = src.parsedToken.prefixOffset;
1905                 prefixLen = src.parsedToken.prefixLen;
1906                 specs = src.parsedToken.flags;
1907
1908                 startOfRules = FALSE;
1909                 uprv_memcpy(codepoints, src.source + chOffset,
1910                                                        chLen * sizeof(UChar));
1911                 codepoints[chLen] = 0;
1912                 checkSortKeyValidity(coll, codepoints, chLen);
1913             }
1914             free(rulesCopy);
1915         }
1916
1917         ucol_close(coll);
1918         count ++;
1919     }
1920     T_FileStream_close(file);
1921 }
1922
1923 #endif /* #if !UCONFIG_NO_COLLATION */