icuSources/test/cintltst/cucdtst.c

   1 /********************************************************************
   2  * COPYRIGHT:
   3  * Copyright (c) 1997-2011, International Business Machines Corporation and
   4  * others. All Rights Reserved.
   5  ********************************************************************/
   6 /*******************************************************************************
   7 *
   8 * File CUCDTST.C
   9 *
  10 * Modification History:
  11 *        Name                     Description
  12 *     Madhu Katragadda            Ported for C API, added tests for string functions
  13 ********************************************************************************
  14 */
  15
  16 #include <string.h>
  17 #include <math.h>
  18 #include <stdlib.h>
  19
  20 #include "unicode/utypes.h"
  21 #include "unicode/uchar.h"
  22 #include "unicode/putil.h"
  23 #include "unicode/ustring.h"
  24 #include "unicode/uloc.h"
  25 #include "unicode/unorm2.h"
  26
  27 #include "cintltst.h"
  28 #include "putilimp.h"
  29 #include "uparse.h"
  30 #include "ucase.h"
  31 #include "ubidi_props.h"
  32 #include "uprops.h"
  33 #include "uset_imp.h"
  34 #include "usc_impl.h"
  35 #include "udatamem.h" /* for testing ucase_openBinary() */
  36 #include "cucdapi.h"
  37
  38 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
  39
  40 /* prototypes --------------------------------------------------------------- */
  41
  42 static void TestUpperLower(void);
  43 static void TestLetterNumber(void);
  44 static void TestMisc(void);
  45 static void TestPOSIX(void);
  46 static void TestControlPrint(void);
  47 static void TestIdentifier(void);
  48 static void TestUnicodeData(void);
  49 static void TestCodeUnit(void);
  50 static void TestCodePoint(void);
  51 static void TestCharLength(void);
  52 static void TestCharNames(void);
  53 static void TestMirroring(void);
  54 static void TestUScriptRunAPI(void);
  55 static void TestAdditionalProperties(void);
  56 static void TestNumericProperties(void);
  57 static void TestPropertyNames(void);
  58 static void TestPropertyValues(void);
  59 static void TestConsistency(void);
  60 static void TestUCase(void);
  61 static void TestUBiDiProps(void);
  62 static void TestCaseFolding(void);
  63
  64 /* internal methods used */
  65 static int32_t MakeProp(char* str);
  66 static int32_t MakeDir(char* str);
  67
  68 /* helpers ------------------------------------------------------------------ */
  69
  70 static void
  71 parseUCDFile(const char *filename,
  72              char *fields[][2], int32_t fieldCount,
  73              UParseLineFn *lineFn, void *context,
  74              UErrorCode *pErrorCode) {
  75     char path[256];
  76     char backupPath[256];
  77
  78     if(U_FAILURE(*pErrorCode)) {
  79         return;
  80     }
  81
  82     /* Look inside ICU_DATA first */
  83     strcpy(path, u_getDataDirectory());
  84     strcat(path, ".." U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING);
  85     strcat(path, filename);
  86
  87     /* As a fallback, try to guess where the source data was located
  88      *    at the time ICU was built, and look there.
  89      */
  90     strcpy(backupPath, ctest_dataSrcDir());
  91     strcat(backupPath, U_FILE_SEP_STRING);
  92     strcat(backupPath, "unidata" U_FILE_SEP_STRING);
  93     strcat(backupPath, filename);
  94
  95     u_parseDelimitedFile(path, ';', fields, fieldCount, lineFn, context, pErrorCode);
  96     if(*pErrorCode==U_FILE_ACCESS_ERROR) {
  97         *pErrorCode=U_ZERO_ERROR;
  98         u_parseDelimitedFile(backupPath, ';', fields, fieldCount, lineFn, context, pErrorCode);
  99     }
 100     if(U_FAILURE(*pErrorCode)) {
 101         log_err_status(*pErrorCode, "error parsing %s: %s\n", filename, u_errorName(*pErrorCode));
 102     }
 103 }
 104
 105 /* test data ---------------------------------------------------------------- */
 106
 107 static const UChar  LAST_CHAR_CODE_IN_FILE = 0xFFFD;
 108 static const char tagStrings[] = "MnMcMeNdNlNoZsZlZpCcCfCsCoCnLuLlLtLmLoPcPdPsPePoSmScSkSoPiPf";
 109 static const int32_t tagValues[] =
 110     {
 111     /* Mn */ U_NON_SPACING_MARK,
 112     /* Mc */ U_COMBINING_SPACING_MARK,
 113     /* Me */ U_ENCLOSING_MARK,
 114     /* Nd */ U_DECIMAL_DIGIT_NUMBER,
 115     /* Nl */ U_LETTER_NUMBER,
 116     /* No */ U_OTHER_NUMBER,
 117     /* Zs */ U_SPACE_SEPARATOR,
 118     /* Zl */ U_LINE_SEPARATOR,
 119     /* Zp */ U_PARAGRAPH_SEPARATOR,
 120     /* Cc */ U_CONTROL_CHAR,
 121     /* Cf */ U_FORMAT_CHAR,
 122     /* Cs */ U_SURROGATE,
 123     /* Co */ U_PRIVATE_USE_CHAR,
 124     /* Cn */ U_UNASSIGNED,
 125     /* Lu */ U_UPPERCASE_LETTER,
 126     /* Ll */ U_LOWERCASE_LETTER,
 127     /* Lt */ U_TITLECASE_LETTER,
 128     /* Lm */ U_MODIFIER_LETTER,
 129     /* Lo */ U_OTHER_LETTER,
 130     /* Pc */ U_CONNECTOR_PUNCTUATION,
 131     /* Pd */ U_DASH_PUNCTUATION,
 132     /* Ps */ U_START_PUNCTUATION,
 133     /* Pe */ U_END_PUNCTUATION,
 134     /* Po */ U_OTHER_PUNCTUATION,
 135     /* Sm */ U_MATH_SYMBOL,
 136     /* Sc */ U_CURRENCY_SYMBOL,
 137     /* Sk */ U_MODIFIER_SYMBOL,
 138     /* So */ U_OTHER_SYMBOL,
 139     /* Pi */ U_INITIAL_PUNCTUATION,
 140     /* Pf */ U_FINAL_PUNCTUATION
 141     };
 142
 143 static const char dirStrings[][5] = {
 144     "L",
 145     "R",
 146     "EN",
 147     "ES",
 148     "ET",
 149     "AN",
 150     "CS",
 151     "B",
 152     "S",
 153     "WS",
 154     "ON",
 155     "LRE",
 156     "LRO",
 157     "AL",
 158     "RLE",
 159     "RLO",
 160     "PDF",
 161     "NSM",
 162     "BN"
 163 };
 164
 165 void addUnicodeTest(TestNode** root);
 166
 167 void addUnicodeTest(TestNode** root)
 168 {
 169     addTest(root, &TestCodeUnit, "tsutil/cucdtst/TestCodeUnit");
 170     addTest(root, &TestCodePoint, "tsutil/cucdtst/TestCodePoint");
 171     addTest(root, &TestCharLength, "tsutil/cucdtst/TestCharLength");
 172     addTest(root, &TestBinaryValues, "tsutil/cucdtst/TestBinaryValues");
 173     addTest(root, &TestUnicodeData, "tsutil/cucdtst/TestUnicodeData");
 174     addTest(root, &TestAdditionalProperties, "tsutil/cucdtst/TestAdditionalProperties");
 175     addTest(root, &TestNumericProperties, "tsutil/cucdtst/TestNumericProperties");
 176     addTest(root, &TestUpperLower, "tsutil/cucdtst/TestUpperLower");
 177     addTest(root, &TestLetterNumber, "tsutil/cucdtst/TestLetterNumber");
 178     addTest(root, &TestMisc, "tsutil/cucdtst/TestMisc");
 179     addTest(root, &TestPOSIX, "tsutil/cucdtst/TestPOSIX");
 180     addTest(root, &TestControlPrint, "tsutil/cucdtst/TestControlPrint");
 181     addTest(root, &TestIdentifier, "tsutil/cucdtst/TestIdentifier");
 182     addTest(root, &TestCharNames, "tsutil/cucdtst/TestCharNames");
 183     addTest(root, &TestMirroring, "tsutil/cucdtst/TestMirroring");
 184     addTest(root, &TestUScriptCodeAPI, "tsutil/cucdtst/TestUScriptCodeAPI");
 185     addTest(root, &TestHasScript, "tsutil/cucdtst/TestHasScript");
 186     addTest(root, &TestGetScriptExtensions, "tsutil/cucdtst/TestGetScriptExtensions");
 187     addTest(root, &TestUScriptRunAPI, "tsutil/cucdtst/TestUScriptRunAPI");
 188     addTest(root, &TestPropertyNames, "tsutil/cucdtst/TestPropertyNames");
 189     addTest(root, &TestPropertyValues, "tsutil/cucdtst/TestPropertyValues");
 190     addTest(root, &TestConsistency, "tsutil/cucdtst/TestConsistency");
 191     addTest(root, &TestUCase, "tsutil/cucdtst/TestUCase");
 192     addTest(root, &TestUBiDiProps, "tsutil/cucdtst/TestUBiDiProps");
 193     addTest(root, &TestCaseFolding, "tsutil/cucdtst/TestCaseFolding");
 194 }
 195
 196 /*==================================================== */
 197 /* test u_toupper() and u_tolower()                    */
 198 /*==================================================== */
 199 static void TestUpperLower()
 200 {
 201     const UChar upper[] = {0x41, 0x42, 0x00b2, 0x01c4, 0x01c6, 0x01c9, 0x01c8, 0x01c9, 0x000c, 0x0000};
 202     const UChar lower[] = {0x61, 0x62, 0x00b2, 0x01c6, 0x01c6, 0x01c9, 0x01c9, 0x01c9, 0x000c, 0x0000};
 203     U_STRING_DECL(upperTest, "abcdefg123hij.?:klmno", 21);
 204     U_STRING_DECL(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);
 205     int32_t i;
 206
 207     U_STRING_INIT(upperTest, "abcdefg123hij.?:klmno", 21);
 208     U_STRING_INIT(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);
 209
 210 /*
 211 Checks LetterLike Symbols which were previously a source of confusion
 212 [Bertrand A. D. 02/04/98]
 213 */
 214     for (i=0x2100;i<0x2138;i++)
 215     {
 216         /* Unicode 5.0 adds lowercase U+214E (TURNED SMALL F) to U+2132 (TURNED CAPITAL F) */
 217         if(i!=0x2126 && i!=0x212a && i!=0x212b && i!=0x2132)
 218         {
 219             if (i != (int)u_tolower(i)) /* itself */
 220                 log_err("Failed case conversion with itself: U+%04x\n", i);
 221             if (i != (int)u_toupper(i))
 222                 log_err("Failed case conversion with itself: U+%04x\n", i);
 223         }
 224     }
 225
 226     for(i=0; i < u_strlen(upper); i++){
 227         if(u_tolower(upper[i]) != lower[i]){
 228             log_err("FAILED u_tolower() for %lx Expected %lx Got %lx\n", upper[i], lower[i], u_tolower(upper[i]));
 229         }
 230     }
 231
 232     log_verbose("testing upper lower\n");
 233     for (i = 0; i < 21; i++) {
 234
 235         if (u_isalpha(upperTest[i]) && !u_islower(upperTest[i]))
 236         {
 237             log_err("Failed isLowerCase test at  %c\n", upperTest[i]);
 238         }
 239         else if (u_isalpha(lowerTest[i]) && !u_isupper(lowerTest[i]))
 240          {
 241             log_err("Failed isUpperCase test at %c\n", lowerTest[i]);
 242         }
 243         else if (upperTest[i] != u_tolower(lowerTest[i]))
 244         {
 245             log_err("Failed case conversion from %c  To %c :\n", lowerTest[i], upperTest[i]);
 246         }
 247         else if (lowerTest[i] != u_toupper(upperTest[i]))
 248          {
 249             log_err("Failed case conversion : %c To %c \n", upperTest[i], lowerTest[i]);
 250         }
 251         else if (upperTest[i] != u_tolower(upperTest[i]))
 252         {
 253             log_err("Failed case conversion with itself: %c\n", upperTest[i]);
 254         }
 255         else if (lowerTest[i] != u_toupper(lowerTest[i]))
 256         {
 257             log_err("Failed case conversion with itself: %c\n", lowerTest[i]);
 258         }
 259     }
 260     log_verbose("done testing upper lower\n");
 261
 262     log_verbose("testing u_istitle\n");
 263     {
 264         static const UChar expected[] = {
 265             0x1F88,
 266             0x1F89,
 267             0x1F8A,
 268             0x1F8B,
 269             0x1F8C,
 270             0x1F8D,
 271             0x1F8E,
 272             0x1F8F,
 273             0x1F88,
 274             0x1F89,
 275             0x1F8A,
 276             0x1F8B,
 277             0x1F8C,
 278             0x1F8D,
 279             0x1F8E,
 280             0x1F8F,
 281             0x1F98,
 282             0x1F99,
 283             0x1F9A,
 284             0x1F9B,
 285             0x1F9C,
 286             0x1F9D,
 287             0x1F9E,
 288             0x1F9F,
 289             0x1F98,
 290             0x1F99,
 291             0x1F9A,
 292             0x1F9B,
 293             0x1F9C,
 294             0x1F9D,
 295             0x1F9E,
 296             0x1F9F,
 297             0x1FA8,
 298             0x1FA9,
 299             0x1FAA,
 300             0x1FAB,
 301             0x1FAC,
 302             0x1FAD,
 303             0x1FAE,
 304             0x1FAF,
 305             0x1FA8,
 306             0x1FA9,
 307             0x1FAA,
 308             0x1FAB,
 309             0x1FAC,
 310             0x1FAD,
 311             0x1FAE,
 312             0x1FAF,
 313             0x1FBC,
 314             0x1FBC,
 315             0x1FCC,
 316             0x1FCC,
 317             0x1FFC,
 318             0x1FFC,
 319         };
 320         int32_t num = sizeof(expected)/sizeof(expected[0]);
 321         for(i=0; i<num; i++){
 322             if(!u_istitle(expected[i])){
 323                 log_err("u_istitle failed for 0x%4X. Expected TRUE, got FALSE\n",expected[i]);
 324             }
 325         }
 326
 327     }
 328 }
 329
 330 /* compare two sets and verify that their difference or intersection is empty */
 331 static UBool
 332 showADiffB(const USet *a, const USet *b,
 333            const char *a_name, const char *b_name,
 334            UBool expect, UBool diffIsError) {
 335     USet *aa;
 336     int32_t i, start, end, length;
 337     UErrorCode errorCode;
 338
 339     /*
 340      * expect:
 341      * TRUE  -> a-b should be empty, that is, b should contain all of a
 342      * FALSE -> a&b should be empty, that is, a should contain none of b (and vice versa)
 343      */
 344     if(expect ? uset_containsAll(b, a) : uset_containsNone(a, b)) {
 345         return TRUE;
 346     }
 347
 348     /* clone a to aa because a is const */
 349     aa=uset_open(1, 0);
 350     if(aa==NULL) {
 351         /* unusual problem - out of memory? */
 352         return FALSE;
 353     }
 354     uset_addAll(aa, a);
 355
 356     /* compute the set in question */
 357     if(expect) {
 358         /* a-b */
 359         uset_removeAll(aa, b);
 360     } else {
 361         /* a&b */
 362         uset_retainAll(aa, b);
 363     }
 364
 365     /* aa is not empty because of the initial tests above; show its contents */
 366     errorCode=U_ZERO_ERROR;
 367     i=0;
 368     for(;;) {
 369         length=uset_getItem(aa, i, &start, &end, NULL, 0, &errorCode);
 370         if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
 371             break; /* done */
 372         }
 373         if(U_FAILURE(errorCode)) {
 374             log_err("error comparing %s with %s at difference item %d: %s\n",
 375                 a_name, b_name, i, u_errorName(errorCode));
 376             break;
 377         }
 378         if(length!=0) {
 379             break; /* done with code points, got a string or -1 */
 380         }
 381
 382         if(diffIsError) {
 383             if(expect) {
 384                 log_err("error: %s contains U+%04x..U+%04x but %s does not\n", a_name, start, end, b_name);
 385             } else {
 386                 log_err("error: %s and %s both contain U+%04x..U+%04x but should not intersect\n", a_name, b_name, start, end);
 387             }
 388         } else {
 389             if(expect) {
 390                 log_verbose("info: %s contains U+%04x..U+%04x but %s does not\n", a_name, start, end, b_name);
 391             } else {
 392                 log_verbose("info: %s and %s both contain U+%04x..U+%04x but should not intersect\n", a_name, b_name, start, end);
 393             }
 394         }
 395
 396         ++i;
 397     }
 398
 399     uset_close(aa);
 400     return FALSE;
 401 }
 402
 403 static UBool
 404 showAMinusB(const USet *a, const USet *b,
 405             const char *a_name, const char *b_name,
 406             UBool diffIsError) {
 407     return showADiffB(a, b, a_name, b_name, TRUE, diffIsError);
 408 }
 409
 410 static UBool
 411 showAIntersectB(const USet *a, const USet *b,
 412                 const char *a_name, const char *b_name,
 413                 UBool diffIsError) {
 414     return showADiffB(a, b, a_name, b_name, FALSE, diffIsError);
 415 }
 416
 417 static UBool
 418 compareUSets(const USet *a, const USet *b,
 419              const char *a_name, const char *b_name,
 420              UBool diffIsError) {
 421     /*
 422      * Use an arithmetic & not a logical && so that both branches
 423      * are always taken and all differences are shown.
 424      */
 425     return
 426         showAMinusB(a, b, a_name, b_name, diffIsError) &
 427         showAMinusB(b, a, b_name, a_name, diffIsError);
 428 }
 429
 430 /* test isLetter(u_isapha()) and isDigit(u_isdigit()) */
 431 static void TestLetterNumber()
 432 {
 433     UChar i = 0x0000;
 434
 435     log_verbose("Testing for isalpha\n");
 436     for (i = 0x0041; i < 0x005B; i++) {
 437         if (!u_isalpha(i))
 438         {
 439             log_err("Failed isLetter test at  %.4X\n", i);
 440         }
 441     }
 442     for (i = 0x0660; i < 0x066A; i++) {
 443         if (u_isalpha(i))
 444         {
 445             log_err("Failed isLetter test with numbers at %.4X\n", i);
 446         }
 447     }
 448
 449     log_verbose("Testing for isdigit\n");
 450     for (i = 0x0660; i < 0x066A; i++) {
 451         if (!u_isdigit(i))
 452         {
 453             log_verbose("Failed isNumber test at %.4X\n", i);
 454         }
 455     }
 456
 457     log_verbose("Testing for isalnum\n");
 458     for (i = 0x0041; i < 0x005B; i++) {
 459         if (!u_isalnum(i))
 460         {
 461             log_err("Failed isAlNum test at  %.4X\n", i);
 462         }
 463     }
 464     for (i = 0x0660; i < 0x066A; i++) {
 465         if (!u_isalnum(i))
 466         {
 467             log_err("Failed isAlNum test at  %.4X\n", i);
 468         }
 469     }
 470
 471     {
 472         /*
 473          * The following checks work only starting from Unicode 4.0.
 474          * Check the version number here.
 475          */
 476         static UVersionInfo u401={ 4, 0, 1, 0 };
 477         UVersionInfo version;
 478         u_getUnicodeVersion(version);
 479         if(version[0]<4 || 0==memcmp(version, u401, 4)) {
 480             return;
 481         }
 482     }
 483
 484     {
 485         /*
 486          * Sanity check:
 487          * Verify that exactly the digit characters have decimal digit values.
 488          * This assumption is used in the implementation of u_digit()
 489          * (which checks nt=de)
 490          * compared with the parallel java.lang.Character.digit()
 491          * (which checks Nd).
 492          *
 493          * This was not true in Unicode 3.2 and earlier.
 494          * Unicode 4.0 fixed discrepancies.
 495          * Unicode 4.0.1 re-introduced problems in this area due to an
 496          * unintentionally incomplete last-minute change.
 497          */
 498         U_STRING_DECL(digitsPattern, "[:Nd:]", 6);
 499         U_STRING_DECL(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24);
 500
 501         USet *digits, *decimalValues;
 502         UErrorCode errorCode;
 503
 504         U_STRING_INIT(digitsPattern, "[:Nd:]", 6);
 505         U_STRING_INIT(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24);
 506         errorCode=U_ZERO_ERROR;
 507         digits=uset_openPattern(digitsPattern, 6, &errorCode);
 508         decimalValues=uset_openPattern(decimalValuesPattern, 24, &errorCode);
 509
 510         if(U_SUCCESS(errorCode)) {
 511             compareUSets(digits, decimalValues, "[:Nd:]", "[:Numeric_Type=Decimal:]", TRUE);
 512         }
 513
 514         uset_close(digits);
 515         uset_close(decimalValues);
 516     }
 517 }
 518
 519 static void testSampleCharProps(UBool propFn(UChar32), const char *propName,
 520                                 const UChar32 *sampleChars, int32_t sampleCharsLength,
 521                                 UBool expected) {
 522     int32_t i;
 523     for (i = 0; i < sampleCharsLength; ++i) {
 524         UBool result = propFn(sampleChars[i]);
 525         if (result != expected) {
 526             log_err("error: character property function %s(U+%04x)=%d is wrong\n",
 527                     propName, sampleChars[i], result);
 528         }
 529     }
 530 }
 531
 532 /* Tests for isDefined(u_isdefined)(, isBaseForm(u_isbase()), isSpaceChar(u_isspace()), isWhiteSpace(), u_CharDigitValue() */
 533 static void TestMisc()
 534 {
 535     static const UChar32 sampleSpaces[] = {0x0020, 0x00a0, 0x2000, 0x2001, 0x2005};
 536     static const UChar32 sampleNonSpaces[] = {0x61, 0x62, 0x63, 0x64, 0x74};
 537     static const UChar32 sampleUndefined[] = {0xfff1, 0xfff7, 0xfa6e};
 538     static const UChar32 sampleDefined[] = {0x523E, 0x4f88, 0xfffd};
 539     static const UChar32 sampleBase[] = {0x0061, 0x0031, 0x03d2};
 540     static const UChar32 sampleNonBase[] = {0x002B, 0x0020, 0x203B};
 541 /*    static const UChar sampleChars[] = {0x000a, 0x0045, 0x4e00, 0xDC00, 0xFFE8, 0xFFF0};*/
 542     static const UChar32 sampleDigits[]= {0x0030, 0x0662, 0x0F23, 0x0ED5};
 543     static const UChar32 sampleNonDigits[] = {0x0010, 0x0041, 0x0122, 0x68FE};
 544     static const UChar32 sampleWhiteSpaces[] = {0x2008, 0x2009, 0x200a, 0x001c, 0x000c};
 545     static const UChar32 sampleNonWhiteSpaces[] = {0x61, 0x62, 0x3c, 0x28, 0x3f, 0x85, 0x2007, 0xffef};
 546
 547     static const int32_t sampleDigitValues[] = {0, 2, 3, 5};
 548
 549     uint32_t mask;
 550
 551     int32_t i;
 552     char icuVersion[U_MAX_VERSION_STRING_LENGTH];
 553     UVersionInfo realVersion;
 554
 555     memset(icuVersion, 0, U_MAX_VERSION_STRING_LENGTH);
 556
 557     testSampleCharProps(u_isspace, "u_isspace", sampleSpaces, LENGTHOF(sampleSpaces), TRUE);
 558     testSampleCharProps(u_isspace, "u_isspace", sampleNonSpaces, LENGTHOF(sampleNonSpaces), FALSE);
 559
 560     testSampleCharProps(u_isJavaSpaceChar, "u_isJavaSpaceChar",
 561                         sampleSpaces, LENGTHOF(sampleSpaces), TRUE);
 562     testSampleCharProps(u_isJavaSpaceChar, "u_isJavaSpaceChar",
 563                         sampleNonSpaces, LENGTHOF(sampleNonSpaces), FALSE);
 564
 565     testSampleCharProps(u_isWhitespace, "u_isWhitespace",
 566                         sampleWhiteSpaces, LENGTHOF(sampleWhiteSpaces), TRUE);
 567     testSampleCharProps(u_isWhitespace, "u_isWhitespace",
 568                         sampleNonWhiteSpaces, LENGTHOF(sampleNonWhiteSpaces), FALSE);
 569
 570     testSampleCharProps(u_isdefined, "u_isdefined",
 571                         sampleDefined, LENGTHOF(sampleDefined), TRUE);
 572     testSampleCharProps(u_isdefined, "u_isdefined",
 573                         sampleUndefined, LENGTHOF(sampleUndefined), FALSE);
 574
 575     testSampleCharProps(u_isbase, "u_isbase", sampleBase, LENGTHOF(sampleBase), TRUE);
 576     testSampleCharProps(u_isbase, "u_isbase", sampleNonBase, LENGTHOF(sampleNonBase), FALSE);
 577
 578     testSampleCharProps(u_isdigit, "u_isdigit", sampleDigits, LENGTHOF(sampleDigits), TRUE);
 579     testSampleCharProps(u_isdigit, "u_isdigit", sampleNonDigits, LENGTHOF(sampleNonDigits), FALSE);
 580
 581     for (i = 0; i < LENGTHOF(sampleDigits); i++) {
 582         if (u_charDigitValue(sampleDigits[i]) != sampleDigitValues[i]) {
 583             log_err("error: u_charDigitValue(U+04x)=%d != %d\n",
 584                     sampleDigits[i], u_charDigitValue(sampleDigits[i]), sampleDigitValues[i]);
 585         }
 586     }
 587
 588     /* Tests the ICU version #*/
 589     u_getVersion(realVersion);
 590     u_versionToString(realVersion, icuVersion);
 591     if (strncmp(icuVersion, U_ICU_VERSION, uprv_min((int32_t)strlen(icuVersion), (int32_t)strlen(U_ICU_VERSION))) != 0)
 592     {
 593         log_err("ICU version test failed. Header says=%s, got=%s \n", U_ICU_VERSION, icuVersion);
 594     }
 595 #if defined(ICU_VERSION)
 596     /* test only happens where we have configure.in with VERSION - sanity check. */
 597     if(strcmp(U_ICU_VERSION, ICU_VERSION))
 598     {
 599         log_err("ICU version mismatch: Header says %s, build environment says %s.\n",  U_ICU_VERSION, ICU_VERSION);
 600     }
 601 #endif
 602
 603     /* test U_GC_... */
 604     if(
 605         U_GET_GC_MASK(0x41)!=U_GC_LU_MASK ||
 606         U_GET_GC_MASK(0x662)!=U_GC_ND_MASK ||
 607         U_GET_GC_MASK(0xa0)!=U_GC_ZS_MASK ||
 608         U_GET_GC_MASK(0x28)!=U_GC_PS_MASK ||
 609         U_GET_GC_MASK(0x2044)!=U_GC_SM_MASK ||
 610         U_GET_GC_MASK(0xe0063)!=U_GC_CF_MASK
 611     ) {
 612         log_err("error: U_GET_GC_MASK does not work properly\n");
 613     }
 614
 615     mask=0;
 616     mask=(mask&~U_GC_CN_MASK)|U_GC_CN_MASK;
 617
 618     mask=(mask&~U_GC_LU_MASK)|U_GC_LU_MASK;
 619     mask=(mask&~U_GC_LL_MASK)|U_GC_LL_MASK;
 620     mask=(mask&~U_GC_LT_MASK)|U_GC_LT_MASK;
 621     mask=(mask&~U_GC_LM_MASK)|U_GC_LM_MASK;
 622     mask=(mask&~U_GC_LO_MASK)|U_GC_LO_MASK;
 623
 624     mask=(mask&~U_GC_MN_MASK)|U_GC_MN_MASK;
 625     mask=(mask&~U_GC_ME_MASK)|U_GC_ME_MASK;
 626     mask=(mask&~U_GC_MC_MASK)|U_GC_MC_MASK;
 627
 628     mask=(mask&~U_GC_ND_MASK)|U_GC_ND_MASK;
 629     mask=(mask&~U_GC_NL_MASK)|U_GC_NL_MASK;
 630     mask=(mask&~U_GC_NO_MASK)|U_GC_NO_MASK;
 631
 632     mask=(mask&~U_GC_ZS_MASK)|U_GC_ZS_MASK;
 633     mask=(mask&~U_GC_ZL_MASK)|U_GC_ZL_MASK;
 634     mask=(mask&~U_GC_ZP_MASK)|U_GC_ZP_MASK;
 635
 636     mask=(mask&~U_GC_CC_MASK)|U_GC_CC_MASK;
 637     mask=(mask&~U_GC_CF_MASK)|U_GC_CF_MASK;
 638     mask=(mask&~U_GC_CO_MASK)|U_GC_CO_MASK;
 639     mask=(mask&~U_GC_CS_MASK)|U_GC_CS_MASK;
 640
 641     mask=(mask&~U_GC_PD_MASK)|U_GC_PD_MASK;
 642     mask=(mask&~U_GC_PS_MASK)|U_GC_PS_MASK;
 643     mask=(mask&~U_GC_PE_MASK)|U_GC_PE_MASK;
 644     mask=(mask&~U_GC_PC_MASK)|U_GC_PC_MASK;
 645     mask=(mask&~U_GC_PO_MASK)|U_GC_PO_MASK;
 646
 647     mask=(mask&~U_GC_SM_MASK)|U_GC_SM_MASK;
 648     mask=(mask&~U_GC_SC_MASK)|U_GC_SC_MASK;
 649     mask=(mask&~U_GC_SK_MASK)|U_GC_SK_MASK;
 650     mask=(mask&~U_GC_SO_MASK)|U_GC_SO_MASK;
 651
 652     mask=(mask&~U_GC_PI_MASK)|U_GC_PI_MASK;
 653     mask=(mask&~U_GC_PF_MASK)|U_GC_PF_MASK;
 654
 655     if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffffffff)) {
 656         log_err("error: problems with U_GC_XX_MASK constants\n");
 657     }
 658
 659     mask=0;
 660     mask=(mask&~U_GC_C_MASK)|U_GC_C_MASK;
 661     mask=(mask&~U_GC_L_MASK)|U_GC_L_MASK;
 662     mask=(mask&~U_GC_M_MASK)|U_GC_M_MASK;
 663     mask=(mask&~U_GC_N_MASK)|U_GC_N_MASK;
 664     mask=(mask&~U_GC_Z_MASK)|U_GC_Z_MASK;
 665     mask=(mask&~U_GC_P_MASK)|U_GC_P_MASK;
 666     mask=(mask&~U_GC_S_MASK)|U_GC_S_MASK;
 667
 668     if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffffffff)) {
 669         log_err("error: problems with U_GC_Y_MASK constants\n");
 670     }
 671     {
 672         static const UChar32 digit[10]={ 0x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037,0x0038,0x0039 };
 673         for(i=0; i<10; i++){
 674             if(digit[i]!=u_forDigit(i,10)){
 675                 log_err("u_forDigit failed for %i. Expected: 0x%4X Got: 0x%4X\n",i,digit[i],u_forDigit(i,10));
 676             }
 677         }
 678     }
 679
 680     /* test u_digit() */
 681     {
 682         static const struct {
 683             UChar32 c;
 684             int8_t radix, value;
 685         } data[]={
 686             /* base 16 */
 687             { 0x0031, 16, 1 },
 688             { 0x0038, 16, 8 },
 689             { 0x0043, 16, 12 },
 690             { 0x0066, 16, 15 },
 691             { 0x00e4, 16, -1 },
 692             { 0x0662, 16, 2 },
 693             { 0x06f5, 16, 5 },
 694             { 0xff13, 16, 3 },
 695             { 0xff41, 16, 10 },
 696
 697             /* base 8 */
 698             { 0x0031, 8, 1 },
 699             { 0x0038, 8, -1 },
 700             { 0x0043, 8, -1 },
 701             { 0x0066, 8, -1 },
 702             { 0x00e4, 8, -1 },
 703             { 0x0662, 8, 2 },
 704             { 0x06f5, 8, 5 },
 705             { 0xff13, 8, 3 },
 706             { 0xff41, 8, -1 },
 707
 708             /* base 36 */
 709             { 0x5a, 36, 35 },
 710             { 0x7a, 36, 35 },
 711             { 0xff3a, 36, 35 },
 712             { 0xff5a, 36, 35 },
 713
 714             /* wrong radix values */
 715             { 0x0031, 1, -1 },
 716             { 0xff3a, 37, -1 }
 717         };
 718
 719         for(i=0; i<LENGTHOF(data); ++i) {
 720             if(u_digit(data[i].c, data[i].radix)!=data[i].value) {
 721                 log_err("u_digit(U+%04x, %d)=%d expected %d\n",
 722                         data[i].c,
 723                         data[i].radix,
 724                         u_digit(data[i].c, data[i].radix),
 725                         data[i].value);
 726             }
 727         }
 728     }
 729 }
 730
 731 /* test C/POSIX-style functions --------------------------------------------- */
 732
 733 /* bit flags */
 734 #define ISAL     1
 735 #define ISLO     2
 736 #define ISUP     4
 737
 738 #define ISDI     8
 739 #define ISXD  0x10
 740
 741 #define ISAN  0x20
 742
 743 #define ISPU  0x40
 744 #define ISGR  0x80
 745 #define ISPR 0x100
 746
 747 #define ISSP 0x200
 748 #define ISBL 0x400
 749 #define ISCN 0x800
 750
 751 /* C/POSIX-style functions, in the same order as the bit flags */
 752 typedef UBool U_EXPORT2 IsPOSIXClass(UChar32 c);
 753
 754 static const struct {
 755     IsPOSIXClass *fn;
 756     const char *name;
 757 } posixClasses[]={
 758     { u_isalpha, "isalpha" },
 759     { u_islower, "islower" },
 760     { u_isupper, "isupper" },
 761     { u_isdigit, "isdigit" },
 762     { u_isxdigit, "isxdigit" },
 763     { u_isalnum, "isalnum" },
 764     { u_ispunct, "ispunct" },
 765     { u_isgraph, "isgraph" },
 766     { u_isprint, "isprint" },
 767     { u_isspace, "isspace" },
 768     { u_isblank, "isblank" },
 769     { u_iscntrl, "iscntrl" }
 770 };
 771
 772 static const struct {
 773     UChar32 c;
 774     uint32_t posixResults;
 775 } posixData[]={
 776     { 0x0008,                                                        ISCN },    /* backspace */
 777     { 0x0009,                                              ISSP|ISBL|ISCN },    /* TAB */
 778     { 0x000a,                                              ISSP|     ISCN },    /* LF */
 779     { 0x000c,                                              ISSP|     ISCN },    /* FF */
 780     { 0x000d,                                              ISSP|     ISCN },    /* CR */
 781     { 0x0020,                                         ISPR|ISSP|ISBL      },    /* space */
 782     { 0x0021,                               ISPU|ISGR|ISPR                },    /* ! */
 783     { 0x0033,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* 3 */
 784     { 0x0040,                               ISPU|ISGR|ISPR                },    /* @ */
 785     { 0x0041, ISAL|     ISUP|     ISXD|ISAN|     ISGR|ISPR                },    /* A */
 786     { 0x007a, ISAL|ISLO|               ISAN|     ISGR|ISPR                },    /* z */
 787     { 0x007b,                               ISPU|ISGR|ISPR                },    /* { */
 788     { 0x0085,                                              ISSP|     ISCN },    /* NEL */
 789     { 0x00a0,                                         ISPR|ISSP|ISBL      },    /* NBSP */
 790     { 0x00a4,                                    ISGR|ISPR                },    /* currency sign */
 791     { 0x00e4, ISAL|ISLO|               ISAN|     ISGR|ISPR                },    /* a-umlaut */
 792     { 0x0300,                                    ISGR|ISPR                },    /* combining grave */
 793     { 0x0600,                                                        ISCN },    /* arabic number sign */
 794     { 0x0627, ISAL|                    ISAN|     ISGR|ISPR                },    /* alef */
 795     { 0x0663,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* arabic 3 */
 796     { 0x2002,                                         ISPR|ISSP|ISBL      },    /* en space */
 797     { 0x2007,                                         ISPR|ISSP|ISBL      },    /* figure space */
 798     { 0x2009,                                         ISPR|ISSP|ISBL      },    /* thin space */
 799     { 0x200b,                                                        ISCN },    /* ZWSP */
 800   /*{ 0x200b,                                         ISPR|ISSP           },*/    /* ZWSP */ /* ZWSP became a control char in 4.0.1*/
 801     { 0x200e,                                                        ISCN },    /* LRM */
 802     { 0x2028,                                         ISPR|ISSP|     ISCN },    /* LS */
 803     { 0x2029,                                         ISPR|ISSP|     ISCN },    /* PS */
 804     { 0x20ac,                                    ISGR|ISPR                },    /* Euro */
 805     { 0xff15,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth 5 */
 806     { 0xff25, ISAL|     ISUP|     ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth E */
 807     { 0xff35, ISAL|     ISUP|          ISAN|     ISGR|ISPR                },    /* fullwidth U */
 808     { 0xff45, ISAL|ISLO|          ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth e */
 809     { 0xff55, ISAL|ISLO|               ISAN|     ISGR|ISPR                }     /* fullwidth u */
 810 };
 811
 812 static void
 813 TestPOSIX() {
 814     uint32_t mask;
 815     int32_t cl, i;
 816     UBool expect;
 817
 818     mask=1;
 819     for(cl=0; cl<12; ++cl) {
 820         for(i=0; i<LENGTHOF(posixData); ++i) {
 821             expect=(UBool)((posixData[i].posixResults&mask)!=0);
 822             if(posixClasses[cl].fn(posixData[i].c)!=expect) {
 823                 log_err("u_%s(U+%04x)=%s is wrong\n",
 824                     posixClasses[cl].name, posixData[i].c, expect ? "FALSE" : "TRUE");
 825             }
 826         }
 827         mask<<=1;
 828     }
 829 }
 830
 831 /* Tests for isControl(u_iscntrl()) and isPrintable(u_isprint()) */
 832 static void TestControlPrint()
 833 {
 834     const UChar32 sampleControl[] = {0x1b, 0x97, 0x82, 0x2028, 0x2029, 0x200c, 0x202b};
 835     const UChar32 sampleNonControl[] = {0x61, 0x0031, 0x00e2};
 836     const UChar32 samplePrintable[] = {0x0042, 0x005f, 0x2014};
 837     const UChar32 sampleNonPrintable[] = {0x200c, 0x009f, 0x001b};
 838     UChar32 c;
 839
 840     testSampleCharProps(u_iscntrl, "u_iscntrl", sampleControl, LENGTHOF(sampleControl), TRUE);
 841     testSampleCharProps(u_iscntrl, "u_iscntrl", sampleNonControl, LENGTHOF(sampleNonControl), FALSE);
 842
 843     testSampleCharProps(u_isprint, "u_isprint",
 844                         samplePrintable, LENGTHOF(samplePrintable), TRUE);
 845     testSampleCharProps(u_isprint, "u_isprint",
 846                         sampleNonPrintable, LENGTHOF(sampleNonPrintable), FALSE);
 847
 848     /* test all ISO 8 controls */
 849     for(c=0; c<=0x9f; ++c) {
 850         if(c==0x20) {
 851             /* skip ASCII graphic characters and continue with DEL */
 852             c=0x7f;
 853         }
 854         if(!u_iscntrl(c)) {
 855             log_err("error: u_iscntrl(ISO 8 control U+%04x)=FALSE\n", c);
 856         }
 857         if(!u_isISOControl(c)) {
 858             log_err("error: u_isISOControl(ISO 8 control U+%04x)=FALSE\n", c);
 859         }
 860         if(u_isprint(c)) {
 861             log_err("error: u_isprint(ISO 8 control U+%04x)=TRUE\n", c);
 862         }
 863     }
 864
 865     /* test all Latin-1 graphic characters */
 866     for(c=0x20; c<=0xff; ++c) {
 867         if(c==0x7f) {
 868             c=0xa0;
 869         } else if(c==0xad) {
 870             /* Unicode 4 changes 00AD Soft Hyphen to Cf (and it is in fact not printable) */
 871             ++c;
 872         }
 873         if(!u_isprint(c)) {
 874             log_err("error: u_isprint(Latin-1 graphic character U+%04x)=FALSE\n", c);
 875         }
 876     }
 877 }
 878
 879 /* u_isJavaIDStart, u_isJavaIDPart, u_isIDStart(), u_isIDPart(), u_isIDIgnorable()*/
 880 static void TestIdentifier()
 881 {
 882     const UChar32 sampleJavaIDStart[] = {0x0071, 0x00e4, 0x005f};
 883     const UChar32 sampleNonJavaIDStart[] = {0x0020, 0x2030, 0x0082};
 884     const UChar32 sampleJavaIDPart[] = {0x005f, 0x0032, 0x0045};
 885     const UChar32 sampleNonJavaIDPart[] = {0x2030, 0x2020, 0x0020};
 886     const UChar32 sampleUnicodeIDStart[] = {0x0250, 0x00e2, 0x0061};
 887     const UChar32 sampleNonUnicodeIDStart[] = {0x2000, 0x000a, 0x2019};
 888     const UChar32 sampleUnicodeIDPart[] = {0x005f, 0x0032, 0x0045};
 889     const UChar32 sampleNonUnicodeIDPart[] = {0x2030, 0x00a3, 0x0020};
 890     const UChar32 sampleIDIgnore[] = {0x0006, 0x0010, 0x206b, 0x85};
 891     const UChar32 sampleNonIDIgnore[] = {0x0075, 0x00a3, 0x0061};
 892
 893     testSampleCharProps(u_isJavaIDStart, "u_isJavaIDStart",
 894                         sampleJavaIDStart, LENGTHOF(sampleJavaIDStart), TRUE);
 895     testSampleCharProps(u_isJavaIDStart, "u_isJavaIDStart",
 896                         sampleNonJavaIDStart, LENGTHOF(sampleNonJavaIDStart), FALSE);
 897
 898     testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
 899                         sampleJavaIDPart, LENGTHOF(sampleJavaIDPart), TRUE);
 900     testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
 901                         sampleNonJavaIDPart, LENGTHOF(sampleNonJavaIDPart), FALSE);
 902
 903     /* IDPart should imply IDStart */
 904     testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
 905                         sampleJavaIDStart, LENGTHOF(sampleJavaIDStart), TRUE);
 906
 907     testSampleCharProps(u_isIDStart, "u_isIDStart",
 908                         sampleUnicodeIDStart, LENGTHOF(sampleUnicodeIDStart), TRUE);
 909     testSampleCharProps(u_isIDStart, "u_isIDStart",
 910                         sampleNonUnicodeIDStart, LENGTHOF(sampleNonUnicodeIDStart), FALSE);
 911
 912     testSampleCharProps(u_isIDPart, "u_isIDPart",
 913                         sampleUnicodeIDPart, LENGTHOF(sampleUnicodeIDPart), TRUE);
 914     testSampleCharProps(u_isIDPart, "u_isIDPart",
 915                         sampleNonUnicodeIDPart, LENGTHOF(sampleNonUnicodeIDPart), FALSE);
 916
 917     /* IDPart should imply IDStart */
 918     testSampleCharProps(u_isIDPart, "u_isIDPart",
 919                         sampleUnicodeIDStart, LENGTHOF(sampleUnicodeIDStart), TRUE);
 920
 921     testSampleCharProps(u_isIDIgnorable, "u_isIDIgnorable",
 922                         sampleIDIgnore, LENGTHOF(sampleIDIgnore), TRUE);
 923     testSampleCharProps(u_isIDIgnorable, "u_isIDIgnorable",
 924                         sampleNonIDIgnore, LENGTHOF(sampleNonIDIgnore), FALSE);
 925 }
 926
 927 /* for each line of UnicodeData.txt, check some of the properties */
 928 typedef struct UnicodeDataContext {
 929 #if UCONFIG_NO_NORMALIZATION
 930     const void *dummy;
 931 #else
 932     const UNormalizer2 *nfc;
 933     const UNormalizer2 *nfkc;
 934 #endif
 935 } UnicodeDataContext;
 936
 937 /*
 938  * ### TODO
 939  * This test fails incorrectly if the First or Last code point of a repetitive area
 940  * is overridden, which is allowed and is encouraged for the PUAs.
 941  * Currently, this means that both area First/Last and override lines are
 942  * tested against the properties from the API,
 943  * and the area boundary will not match and cause an error.
 944  *
 945  * This function should detect area boundaries and skip them for the test of individual
 946  * code points' properties.
 947  * Then it should check that the areas contain all the same properties except where overridden.
 948  * For this, it would have had to set a flag for which code points were listed explicitly.
 949  */
 950 static void U_CALLCONV
 951 unicodeDataLineFn(void *context,
 952                   char *fields[][2], int32_t fieldCount,
 953                   UErrorCode *pErrorCode)
 954 {
 955     char buffer[100];
 956     const char *d;
 957     char *end;
 958     uint32_t value;
 959     UChar32 c;
 960     int32_t i;
 961     int8_t type;
 962     int32_t dt;
 963     UChar dm[32], s[32];
 964     int32_t dmLength, length;
 965
 966 #if !UCONFIG_NO_NORMALIZATION
 967     const UNormalizer2 *nfc, *nfkc;
 968 #endif
 969
 970     /* get the character code, field 0 */
 971     c=strtoul(fields[0][0], &end, 16);
 972     if(end<=fields[0][0] || end!=fields[0][1]) {
 973         log_err("error: syntax error in field 0 at %s\n", fields[0][0]);
 974         return;
 975     }
 976     if((uint32_t)c>=UCHAR_MAX_VALUE + 1) {
 977         log_err("error in UnicodeData.txt: code point %lu out of range\n", c);
 978         return;
 979     }
 980
 981     /* get general category, field 2 */
 982     *fields[2][1]=0;
 983     type = (int8_t)tagValues[MakeProp(fields[2][0])];
 984     if(u_charType(c)!=type) {
 985         log_err("error: u_charType(U+%04lx)==%u instead of %u\n", c, u_charType(c), type);
 986     }
 987     if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
 988         log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
 989     }
 990
 991     /* get canonical combining class, field 3 */
 992     value=strtoul(fields[3][0], &end, 10);
 993     if(end<=fields[3][0] || end!=fields[3][1]) {
 994         log_err("error: syntax error in field 3 at code 0x%lx\n", c);
 995         return;
 996     }
 997     if(value>255) {
 998         log_err("error in UnicodeData.txt: combining class %lu out of range\n", value);
 999         return;
1000     }
1001 #if !UCONFIG_NO_NORMALIZATION
1002     if(value!=u_getCombiningClass(c) || value!=(uint32_t)u_getIntPropertyValue(c, UCHAR_CANONICAL_COMBINING_CLASS)) {
1003         log_err("error: u_getCombiningClass(U+%04lx)==%hu instead of %lu\n", c, u_getCombiningClass(c), value);
1004     }
1005     nfkc=((UnicodeDataContext *)context)->nfkc;
1006     if(value!=unorm2_getCombiningClass(nfkc, c)) {
1007         log_err("error: unorm2_getCombiningClass(nfkc, U+%04lx)==%hu instead of %lu\n", c, unorm2_getCombiningClass(nfkc, c), value);
1008     }
1009 #endif
1010
1011     /* get BiDi category, field 4 */
1012     *fields[4][1]=0;
1013     i=MakeDir(fields[4][0]);
1014     if(i!=u_charDirection(c) || i!=u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)) {
1015         log_err("error: u_charDirection(U+%04lx)==%u instead of %u (%s)\n", c, u_charDirection(c), MakeDir(fields[4][0]), fields[4][0]);
1016     }
1017
1018     /* get Decomposition_Type & Decomposition_Mapping, field 5 */
1019     d=NULL;
1020     if(fields[5][0]==fields[5][1]) {
1021         /* no decomposition, except UnicodeData.txt omits Hangul syllable decompositions */
1022         if(c==0xac00 || c==0xd7a3) {
1023             dt=U_DT_CANONICAL;
1024         } else {
1025             dt=U_DT_NONE;
1026         }
1027     } else {
1028         d=fields[5][0];
1029         *fields[5][1]=0;
1030         dt=UCHAR_INVALID_CODE;
1031         if(*d=='<') {
1032             end=strchr(++d, '>');
1033             if(end!=NULL) {
1034                 *end=0;
1035                 dt=u_getPropertyValueEnum(UCHAR_DECOMPOSITION_TYPE, d);
1036                 d=u_skipWhitespace(end+1);
1037             }
1038         } else {
1039             dt=U_DT_CANONICAL;
1040         }
1041     }
1042     if(dt>U_DT_NONE) {
1043         if(c==0xac00) {
1044             dm[0]=0x1100;
1045             dm[1]=0x1161;
1046             dm[2]=0;
1047             dmLength=2;
1048         } else if(c==0xd7a3) {
1049             dm[0]=0xd788;
1050             dm[1]=0x11c2;
1051             dm[2]=0;
1052             dmLength=2;
1053         } else {
1054             dmLength=u_parseString(d, dm, 32, NULL, pErrorCode);
1055         }
1056     } else {
1057         dmLength=-1;
1058     }
1059     if(dt<0 || U_FAILURE(*pErrorCode)) {
1060         log_err("error in UnicodeData.txt: syntax error in U+%04lX decomposition field\n", (long)c);
1061         return;
1062     }
1063 #if !UCONFIG_NO_NORMALIZATION
1064     i=u_getIntPropertyValue(c, UCHAR_DECOMPOSITION_TYPE);
1065     if(i!=dt) {
1066         log_err("error: u_getIntPropertyValue(U+%04lx, UCHAR_DECOMPOSITION_TYPE)==%d instead of %d\n", c, i, dt);
1067     }
1068     /* Expect Decomposition_Mapping=nfkc.getRawDecomposition(c). */
1069     length=unorm2_getRawDecomposition(nfkc, c, s, 32, pErrorCode);
1070     if(U_FAILURE(*pErrorCode) || length!=dmLength || (length>0 && 0!=u_strcmp(s, dm))) {
1071         log_err("error: unorm2_getRawDecomposition(nfkc, U+%04lx)==%d instead of %d "
1072                 "or the Decomposition_Mapping is different (%s)\n",
1073                 c, length, dmLength, u_errorName(*pErrorCode));
1074         return;
1075     }
1076     /* For canonical decompositions only, expect Decomposition_Mapping=nfc.getRawDecomposition(c). */
1077     if(dt!=U_DT_CANONICAL) {
1078         dmLength=-1;
1079     }
1080     nfc=((UnicodeDataContext *)context)->nfc;
1081     length=unorm2_getRawDecomposition(nfc, c, s, 32, pErrorCode);
1082     if(U_FAILURE(*pErrorCode) || length!=dmLength || (length>0 && 0!=u_strcmp(s, dm))) {
1083         log_err("error: unorm2_getRawDecomposition(nfc, U+%04lx)==%d instead of %d "
1084                 "or the Decomposition_Mapping is different (%s)\n",
1085                 c, length, dmLength, u_errorName(*pErrorCode));
1086         return;
1087     }
1088     /* recompose */
1089     if(dt==U_DT_CANONICAL && !u_hasBinaryProperty(c, UCHAR_FULL_COMPOSITION_EXCLUSION)) {
1090         UChar32 a, b, composite;
1091         i=0;
1092         U16_NEXT(dm, i, dmLength, a);
1093         U16_NEXT(dm, i, dmLength, b);
1094         /* i==dmLength */
1095         composite=unorm2_composePair(nfc, a, b);
1096         if(composite!=c) {
1097             log_err("error: nfc U+%04lX decomposes to U+%04lX+U+%04lX but does not compose back (instead U+%04lX)\n",
1098                     (long)c, (long)a, (long)b, (long)composite);
1099         }
1100         /*
1101          * Note: NFKC has fewer round-trip mappings than NFC,
1102          * so we can't just test unorm2_composePair(nfkc, a, b) here without further data.
1103          */
1104     }
1105 #endif
1106
1107     /* get ISO Comment, field 11 */
1108     *fields[11][1]=0;
1109     i=u_getISOComment(c, buffer, sizeof(buffer), pErrorCode);
1110     if(U_FAILURE(*pErrorCode) || 0!=strcmp(fields[11][0], buffer)) {
1111         log_err_status(*pErrorCode, "error: u_getISOComment(U+%04lx) wrong (%s): \"%s\" should be \"%s\"\n",
1112             c, u_errorName(*pErrorCode),
1113             U_FAILURE(*pErrorCode) ? buffer : "[error]",
1114             fields[11][0]);
1115     }
1116
1117     /* get uppercase mapping, field 12 */
1118     if(fields[12][0]!=fields[12][1]) {
1119         value=strtoul(fields[12][0], &end, 16);
1120         if(end!=fields[12][1]) {
1121             log_err("error: syntax error in field 12 at code 0x%lx\n", c);
1122             return;
1123         }
1124         if((UChar32)value!=u_toupper(c)) {
1125             log_err("error: u_toupper(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_toupper(c), value);
1126         }
1127     } else {
1128         /* no case mapping: the API must map the code point to itself */
1129         if(c!=u_toupper(c)) {
1130             log_err("error: U+%04lx does not have an uppercase mapping but u_toupper()==U+%04lx\n", c, u_toupper(c));
1131         }
1132     }
1133
1134     /* get lowercase mapping, field 13 */
1135     if(fields[13][0]!=fields[13][1]) {
1136         value=strtoul(fields[13][0], &end, 16);
1137         if(end!=fields[13][1]) {
1138             log_err("error: syntax error in field 13 at code 0x%lx\n", c);
1139             return;
1140         }
1141         if((UChar32)value!=u_tolower(c)) {
1142             log_err("error: u_tolower(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_tolower(c), value);
1143         }
1144     } else {
1145         /* no case mapping: the API must map the code point to itself */
1146         if(c!=u_tolower(c)) {
1147             log_err("error: U+%04lx does not have a lowercase mapping but u_tolower()==U+%04lx\n", c, u_tolower(c));
1148         }
1149     }
1150
1151     /* get titlecase mapping, field 14 */
1152     if(fields[14][0]!=fields[14][1]) {
1153         value=strtoul(fields[14][0], &end, 16);
1154         if(end!=fields[14][1]) {
1155             log_err("error: syntax error in field 14 at code 0x%lx\n", c);
1156             return;
1157         }
1158         if((UChar32)value!=u_totitle(c)) {
1159             log_err("error: u_totitle(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_totitle(c), value);
1160         }
1161     } else {
1162         /* no case mapping: the API must map the code point to itself */
1163         if(c!=u_totitle(c)) {
1164             log_err("error: U+%04lx does not have a titlecase mapping but u_totitle()==U+%04lx\n", c, u_totitle(c));
1165         }
1166     }
1167 }
1168
1169 static UBool U_CALLCONV
1170 enumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) {
1171     static const UChar32 test[][2]={
1172         {0x41, U_UPPERCASE_LETTER},
1173         {0x308, U_NON_SPACING_MARK},
1174         {0xfffe, U_GENERAL_OTHER_TYPES},
1175         {0xe0041, U_FORMAT_CHAR},
1176         {0xeffff, U_UNASSIGNED}
1177     };
1178
1179     int32_t i, count;
1180
1181     if(0!=strcmp((const char *)context, "a1")) {
1182         log_err("error: u_enumCharTypes() passes on an incorrect context pointer\n");
1183         return FALSE;
1184     }
1185
1186     count=LENGTHOF(test);
1187     for(i=0; i<count; ++i) {
1188         if(start<=test[i][0] && test[i][0]<limit) {
1189             if(type!=(UCharCategory)test[i][1]) {
1190                 log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld instead of U+%04lx with %ld\n",
1191                         start, limit, (long)type, test[i][0], test[i][1]);
1192             }
1193             /* stop at the range that includes the last test code point (increases code coverage for enumeration) */
1194             return i==(count-1) ? FALSE : TRUE;
1195         }
1196     }
1197
1198     if(start>test[count-1][0]) {
1199         log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld after it should have stopped\n",
1200                 start, limit, (long)type);
1201         return FALSE;
1202     }
1203
1204     return TRUE;
1205 }
1206
1207 static UBool U_CALLCONV
1208 enumDefaultsRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) {
1209     /* default Bidi classes for unassigned code points, from the DerivedBidiClass.txt header */
1210     static const int32_t defaultBidi[][2]={ /* { limit, class } */
1211         { 0x0590, U_LEFT_TO_RIGHT },
1212         { 0x0600, U_RIGHT_TO_LEFT },
1213         { 0x07C0, U_RIGHT_TO_LEFT_ARABIC },
1214         { 0x08A0, U_RIGHT_TO_LEFT },
1215         { 0x0900, U_RIGHT_TO_LEFT_ARABIC },  /* Unicode 6.1 changes U+08A0..U+08FF from R to AL */
1216         { 0xFB1D, U_LEFT_TO_RIGHT },
1217         { 0xFB50, U_RIGHT_TO_LEFT },
1218         { 0xFE00, U_RIGHT_TO_LEFT_ARABIC },
1219         { 0xFE70, U_LEFT_TO_RIGHT },
1220         { 0xFF00, U_RIGHT_TO_LEFT_ARABIC },
1221         { 0x10800, U_LEFT_TO_RIGHT },
1222         { 0x11000, U_RIGHT_TO_LEFT },
1223         { 0x1E800, U_LEFT_TO_RIGHT },  /* new default-R range in Unicode 5.2: U+1E800 - U+1EFFF */
1224         { 0x1EE00, U_RIGHT_TO_LEFT },
1225         { 0x1EF00, U_RIGHT_TO_LEFT_ARABIC },  /* Unicode 6.1 changes U+1EE00..U+1EEFF from R to AL */
1226         { 0x1F000, U_RIGHT_TO_LEFT },
1227         { 0x110000, U_LEFT_TO_RIGHT }
1228     };
1229
1230     UChar32 c;
1231     int32_t i;
1232     UCharDirection shouldBeDir;
1233
1234     /*
1235      * LineBreak.txt specifies:
1236      *   #  - Assigned characters that are not listed explicitly are given the value
1237      *   #    "AL".
1238      *   #  - Unassigned characters are given the value "XX".
1239      *
1240      * PUA characters are listed explicitly with "XX".
1241      * Verify that no assigned character has "XX".
1242      */
1243     if(type!=U_UNASSIGNED && type!=U_PRIVATE_USE_CHAR) {
1244         c=start;
1245         while(c<limit) {
1246             if(0==u_getIntPropertyValue(c, UCHAR_LINE_BREAK)) {
1247                 log_err("error UCHAR_LINE_BREAK(assigned U+%04lx)=XX\n", c);
1248             }
1249             ++c;
1250         }
1251     }
1252
1253     /*
1254      * Verify default Bidi classes.
1255      * For recent Unicode versions, see UCD.html.
1256      *
1257      * For older Unicode versions:
1258      * See table 3-7 "Bidirectional Character Types" in UAX #9.
1259      * http://www.unicode.org/reports/tr9/
1260      *
1261      * See also DerivedBidiClass.txt for Cn code points!
1262      *
1263      * Unicode 4.0.1/Public Review Issue #28 (http://www.unicode.org/review/resolved-pri.html)
1264      * changed some default values.
1265      * In particular, non-characters and unassigned Default Ignorable Code Points
1266      * change from L to BN.
1267      *
1268      * UCD.html version 4.0.1 does not yet reflect these changes.
1269      */
1270     if(type==U_UNASSIGNED || type==U_PRIVATE_USE_CHAR) {
1271         /* enumerate the intersections of defaultBidi ranges with [start..limit[ */
1272         c=start;
1273         for(i=0; i<LENGTHOF(defaultBidi) && c<limit; ++i) {
1274             if((int32_t)c<defaultBidi[i][0]) {
1275                 while(c<limit && (int32_t)c<defaultBidi[i][0]) {
1276                     if(U_IS_UNICODE_NONCHAR(c) || u_hasBinaryProperty(c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT)) {
1277                         shouldBeDir=U_BOUNDARY_NEUTRAL;
1278                     } else {
1279                         shouldBeDir=(UCharDirection)defaultBidi[i][1];
1280                     }
1281
1282                     if( u_charDirection(c)!=shouldBeDir ||
1283                         u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)!=shouldBeDir
1284                     ) {
1285                         log_err("error: u_charDirection(unassigned/PUA U+%04lx)=%s should be %s\n",
1286                             c, dirStrings[u_charDirection(c)], dirStrings[shouldBeDir]);
1287                     }
1288                     ++c;
1289                 }
1290             }
1291         }
1292     }
1293
1294     return TRUE;
1295 }
1296
1297 /* tests for several properties */
1298 static void TestUnicodeData()
1299 {
1300     UVersionInfo expectVersionArray;
1301     UVersionInfo versionArray;
1302     char *fields[15][2];
1303     UErrorCode errorCode;
1304     UChar32 c;
1305     int8_t type;
1306
1307     UnicodeDataContext context;
1308
1309     u_versionFromString(expectVersionArray, U_UNICODE_VERSION);
1310     u_getUnicodeVersion(versionArray);
1311     if(memcmp(versionArray, expectVersionArray, U_MAX_VERSION_LENGTH) != 0)
1312     {
1313         log_err("Testing u_getUnicodeVersion() - expected " U_UNICODE_VERSION " got %d.%d.%d.%d\n",
1314         versionArray[0], versionArray[1], versionArray[2], versionArray[3]);
1315     }
1316
1317 #if defined(ICU_UNICODE_VERSION)
1318     /* test only happens where we have configure.in with UNICODE_VERSION - sanity check. */
1319     if(strcmp(U_UNICODE_VERSION, ICU_UNICODE_VERSION))
1320     {
1321          log_err("Testing configure.in's ICU_UNICODE_VERSION - expected " U_UNICODE_VERSION " got " ICU_UNICODE_VERSION "\n");
1322     }
1323 #endif
1324
1325     if (ublock_getCode((UChar)0x0041) != UBLOCK_BASIC_LATIN || u_getIntPropertyValue(0x41, UCHAR_BLOCK)!=(int32_t)UBLOCK_BASIC_LATIN) {
1326         log_err("ublock_getCode(U+0041) property failed! Expected : %i Got: %i \n", UBLOCK_BASIC_LATIN,ublock_getCode((UChar)0x0041));
1327     }
1328
1329     errorCode=U_ZERO_ERROR;
1330 #if !UCONFIG_NO_NORMALIZATION
1331     context.nfc=unorm2_getNFCInstance(&errorCode);
1332     context.nfkc=unorm2_getNFKCInstance(&errorCode);
1333     if(U_FAILURE(errorCode)) {
1334         log_data_err("error: unable to open an NFC or NFKC UNormalizer2 - %s\n", u_errorName(errorCode));
1335         return;
1336     }
1337 #endif
1338     parseUCDFile("UnicodeData.txt", fields, 15, unicodeDataLineFn, &context, &errorCode);
1339     if(U_FAILURE(errorCode)) {
1340         return; /* if we couldn't parse UnicodeData.txt, we should return */
1341     }
1342
1343     /* sanity check on repeated properties */
1344     for(c=0xfffe; c<=0x10ffff;) {
1345         type=u_charType(c);
1346         if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
1347             log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
1348         }
1349         if(type!=U_UNASSIGNED) {
1350             log_err("error: u_charType(U+%04lx)!=U_UNASSIGNED (returns %d)\n", c, u_charType(c));
1351         }
1352         if((c&0xffff)==0xfffe) {
1353             ++c;
1354         } else {
1355             c+=0xffff;
1356         }
1357     }
1358
1359     /* test that PUA is not "unassigned" */
1360     for(c=0xe000; c<=0x10fffd;) {
1361         type=u_charType(c);
1362         if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
1363             log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
1364         }
1365         if(type==U_UNASSIGNED) {
1366             log_err("error: u_charType(U+%04lx)==U_UNASSIGNED\n", c);
1367         } else if(type!=U_PRIVATE_USE_CHAR) {
1368             log_verbose("PUA override: u_charType(U+%04lx)=%d\n", c, type);
1369         }
1370         if(c==0xf8ff) {
1371             c=0xf0000;
1372         } else if(c==0xffffd) {
1373             c=0x100000;
1374         } else {
1375             ++c;
1376         }
1377     }
1378
1379     /* test u_enumCharTypes() */
1380     u_enumCharTypes(enumTypeRange, "a1");
1381
1382     /* check default properties */
1383     u_enumCharTypes(enumDefaultsRange, NULL);
1384 }
1385
1386 static void TestCodeUnit(){
1387     const UChar codeunit[]={0x0000,0xe065,0x20ac,0xd7ff,0xd800,0xd841,0xd905,0xdbff,0xdc00,0xdc02,0xddee,0xdfff,0};
1388
1389     int32_t i;
1390
1391     for(i=0; i<(int32_t)(sizeof(codeunit)/sizeof(codeunit[0])); i++){
1392         UChar c=codeunit[i];
1393         if(i<4){
1394             if(!(UTF_IS_SINGLE(c)) || (UTF_IS_LEAD(c)) || (UTF_IS_TRAIL(c)) ||(UTF_IS_SURROGATE(c))){
1395                 log_err("ERROR: U+%04x is a single", c);
1396             }
1397
1398         }
1399         if(i >= 4 && i< 8){
1400             if(!(UTF_IS_LEAD(c)) || UTF_IS_SINGLE(c) || UTF_IS_TRAIL(c) || !(UTF_IS_SURROGATE(c))){
1401                 log_err("ERROR: U+%04x is a first surrogate", c);
1402             }
1403         }
1404         if(i >= 8 && i< 12){
1405             if(!(UTF_IS_TRAIL(c)) || UTF_IS_SINGLE(c) || UTF_IS_LEAD(c) || !(UTF_IS_SURROGATE(c))){
1406                 log_err("ERROR: U+%04x is a second surrogate", c);
1407             }
1408         }
1409     }
1410
1411 }
1412
1413 static void TestCodePoint(){
1414     const UChar32 codePoint[]={
1415         /*surrogate, notvalid(codepoint), not a UnicodeChar, not Error */
1416         0xd800,
1417         0xdbff,
1418         0xdc00,
1419         0xdfff,
1420         0xdc04,
1421         0xd821,
1422         /*not a surrogate, valid, isUnicodeChar , not Error*/
1423         0x20ac,
1424         0xd7ff,
1425         0xe000,
1426         0xe123,
1427         0x0061,
1428         0xe065,
1429         0x20402,
1430         0x24506,
1431         0x23456,
1432         0x20402,
1433         0x10402,
1434         0x23456,
1435         /*not a surrogate, not valid, isUnicodeChar, isError */
1436         0x0015,
1437         0x009f,
1438         /*not a surrogate, not valid, not isUnicodeChar, isError */
1439         0xffff,
1440         0xfffe,
1441     };
1442     int32_t i;
1443     for(i=0; i<(int32_t)(sizeof(codePoint)/sizeof(codePoint[0])); i++){
1444         UChar32 c=codePoint[i];
1445         if(i<6){
1446             if(!UTF_IS_SURROGATE(c) || !U_IS_SURROGATE(c) || !U16_IS_SURROGATE(c)){
1447                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1448             }
1449             if(UTF_IS_VALID(c)){
1450                 log_err("ERROR: isValid() failed for U+%04x\n", c);
1451             }
1452             if(UTF_IS_UNICODE_CHAR(c) || U_IS_UNICODE_CHAR(c)){
1453                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1454             }
1455             if(UTF_IS_ERROR(c)){
1456                 log_err("ERROR: isError() failed for U+%04x\n", c);
1457             }
1458         }else if(i >=6 && i<18){
1459             if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
1460                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1461             }
1462             if(!UTF_IS_VALID(c)){
1463                 log_err("ERROR: isValid() failed for U+%04x\n", c);
1464             }
1465             if(!UTF_IS_UNICODE_CHAR(c) || !U_IS_UNICODE_CHAR(c)){
1466                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1467             }
1468             if(UTF_IS_ERROR(c)){
1469                 log_err("ERROR: isError() failed for U+%04x\n", c);
1470             }
1471         }else if(i >=18 && i<20){
1472             if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
1473                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1474             }
1475             if(UTF_IS_VALID(c)){
1476                 log_err("ERROR: isValid() failed for U+%04x\n", c);
1477             }
1478             if(!UTF_IS_UNICODE_CHAR(c) || !U_IS_UNICODE_CHAR(c)){
1479                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1480             }
1481             if(!UTF_IS_ERROR(c)){
1482                 log_err("ERROR: isError() failed for U+%04x\n", c);
1483             }
1484         }
1485         else if(i >=18 && i<(int32_t)(sizeof(codePoint)/sizeof(codePoint[0]))){
1486             if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
1487                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1488             }
1489             if(UTF_IS_VALID(c)){
1490                 log_err("ERROR: isValid() failed for U+%04x\n", c);
1491             }
1492             if(UTF_IS_UNICODE_CHAR(c) || U_IS_UNICODE_CHAR(c)){
1493                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1494             }
1495             if(!UTF_IS_ERROR(c)){
1496                 log_err("ERROR: isError() failed for U+%04x\n", c);
1497             }
1498         }
1499     }
1500
1501     if(
1502         !U_IS_BMP(0) || !U_IS_BMP(0x61) || !U_IS_BMP(0x20ac) ||
1503         !U_IS_BMP(0xd9da) || !U_IS_BMP(0xdfed) || !U_IS_BMP(0xffff) ||
1504         U_IS_BMP(U_SENTINEL) || U_IS_BMP(0x10000) || U_IS_BMP(0x50005) ||
1505         U_IS_BMP(0x10ffff) || U_IS_BMP(0x110000) || U_IS_BMP(0x7fffffff)
1506     ) {
1507         log_err("error with U_IS_BMP()\n");
1508     }
1509
1510     if(
1511         U_IS_SUPPLEMENTARY(0) || U_IS_SUPPLEMENTARY(0x61) || U_IS_SUPPLEMENTARY(0x20ac) ||
1512         U_IS_SUPPLEMENTARY(0xd9da) || U_IS_SUPPLEMENTARY(0xdfed) || U_IS_SUPPLEMENTARY(0xffff) ||
1513         U_IS_SUPPLEMENTARY(U_SENTINEL) || !U_IS_SUPPLEMENTARY(0x10000) || !U_IS_SUPPLEMENTARY(0x50005) ||
1514         !U_IS_SUPPLEMENTARY(0x10ffff) || U_IS_SUPPLEMENTARY(0x110000) || U_IS_SUPPLEMENTARY(0x7fffffff)
1515     ) {
1516         log_err("error with U_IS_SUPPLEMENTARY()\n");
1517     }
1518 }
1519
1520 static void TestCharLength()
1521 {
1522     const int32_t codepoint[]={
1523         1, 0x0061,
1524         1, 0xe065,
1525         1, 0x20ac,
1526         2, 0x20402,
1527         2, 0x23456,
1528         2, 0x24506,
1529         2, 0x20402,
1530         2, 0x10402,
1531         1, 0xd7ff,
1532         1, 0xe000
1533     };
1534
1535     int32_t i;
1536     UBool multiple;
1537     for(i=0; i<(int32_t)(sizeof(codepoint)/sizeof(codepoint[0])); i=(int16_t)(i+2)){
1538         UChar32 c=codepoint[i+1];
1539         if(UTF_CHAR_LENGTH(c) != codepoint[i] || U16_LENGTH(c) != codepoint[i]){
1540             log_err("The no: of code units for U+%04x:- Expected: %d Got: %d\n", c, codepoint[i], U16_LENGTH(c));
1541         }
1542         multiple=(UBool)(codepoint[i] == 1 ? FALSE : TRUE);
1543         if(UTF_NEED_MULTIPLE_UCHAR(c) != multiple){
1544             log_err("ERROR: Unicode::needMultipleUChar() failed for U+%04x\n", c);
1545         }
1546     }
1547 }
1548
1549 /*internal functions ----*/
1550 static int32_t MakeProp(char* str)
1551 {
1552     int32_t result = 0;
1553     char* matchPosition =0;
1554
1555     matchPosition = strstr(tagStrings, str);
1556     if (matchPosition == 0)
1557     {
1558         log_err("unrecognized type letter ");
1559         log_err(str);
1560     }
1561     else
1562         result = (int32_t)((matchPosition - tagStrings) / 2);
1563     return result;
1564 }
1565
1566 static int32_t MakeDir(char* str)
1567 {
1568     int32_t pos = 0;
1569     for (pos = 0; pos < 19; pos++) {
1570         if (strcmp(str, dirStrings[pos]) == 0) {
1571             return pos;
1572         }
1573     }
1574     return -1;
1575 }
1576
1577 /* test u_charName() -------------------------------------------------------- */
1578
1579 static const struct {
1580     uint32_t code;
1581     const char *name, *oldName, *extName, *alias;
1582 } names[]={
1583     {0x0061, "LATIN SMALL LETTER A", "", "LATIN SMALL LETTER A"},
1584     {0x01a2, "LATIN CAPITAL LETTER OI", "",
1585              "LATIN CAPITAL LETTER OI",
1586              "LATIN CAPITAL LETTER GHA"},
1587     {0x0284, "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK", "",
1588              "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK" },
1589     {0x0fd0, "TIBETAN MARK BSKA- SHOG GI MGO RGYAN", "",
1590              "TIBETAN MARK BSKA- SHOG GI MGO RGYAN",
1591              "TIBETAN MARK BKA- SHOG GI MGO RGYAN"},
1592     {0x3401, "CJK UNIFIED IDEOGRAPH-3401", "", "CJK UNIFIED IDEOGRAPH-3401" },
1593     {0x7fed, "CJK UNIFIED IDEOGRAPH-7FED", "", "CJK UNIFIED IDEOGRAPH-7FED" },
1594     {0xac00, "HANGUL SYLLABLE GA", "", "HANGUL SYLLABLE GA" },
1595     {0xd7a3, "HANGUL SYLLABLE HIH", "", "HANGUL SYLLABLE HIH" },
1596     {0xd800, "", "", "<lead surrogate-D800>" },
1597     {0xdc00, "", "", "<trail surrogate-DC00>" },
1598     {0xff08, "FULLWIDTH LEFT PARENTHESIS", "", "FULLWIDTH LEFT PARENTHESIS" },
1599     {0xffe5, "FULLWIDTH YEN SIGN", "", "FULLWIDTH YEN SIGN" },
1600     {0xffff, "", "", "<noncharacter-FFFF>" },
1601     {0x1d0c5, "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS", "",
1602               "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS",
1603               "BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS"},
1604     {0x23456, "CJK UNIFIED IDEOGRAPH-23456", "", "CJK UNIFIED IDEOGRAPH-23456" }
1605 };
1606
1607 static UBool
1608 enumCharNamesFn(void *context,
1609                 UChar32 code, UCharNameChoice nameChoice,
1610                 const char *name, int32_t length) {
1611     int32_t *pCount=(int32_t *)context;
1612     const char *expected;
1613     int i;
1614
1615     if(length<=0 || length!=(int32_t)strlen(name)) {
1616         /* should not be called with an empty string or invalid length */
1617         log_err("u_enumCharName(0x%lx)=%s but length=%ld\n", name, length);
1618         return TRUE;
1619     }
1620
1621     ++*pCount;
1622     for(i=0; i<sizeof(names)/sizeof(names[0]); ++i) {
1623         if(code==(UChar32)names[i].code) {
1624             switch (nameChoice) {
1625                 case U_EXTENDED_CHAR_NAME:
1626                     if(0!=strcmp(name, names[i].extName)) {
1627                         log_err("u_enumCharName(0x%lx - Extended)=%s instead of %s\n", code, name, names[i].extName);
1628                     }
1629                     break;
1630                 case U_UNICODE_CHAR_NAME:
1631                     if(0!=strcmp(name, names[i].name)) {
1632                         log_err("u_enumCharName(0x%lx)=%s instead of %s\n", code, name, names[i].name);
1633                     }
1634                     break;
1635                 case U_UNICODE_10_CHAR_NAME:
1636                     expected=names[i].oldName;
1637                     if(expected[0]==0 || 0!=strcmp(name, expected)) {
1638                         log_err("u_enumCharName(0x%lx - 1.0)=%s instead of %s\n", code, name, expected);
1639                     }
1640                     break;
1641                 case U_CHAR_NAME_ALIAS:
1642                     expected=names[i].alias;
1643                     if(expected==NULL || expected[0]==0 || 0!=strcmp(name, expected)) {
1644                         log_err("u_enumCharName(0x%lx - alias)=%s instead of %s\n", code, name, expected);
1645                     }
1646                     break;
1647                 case U_CHAR_NAME_CHOICE_COUNT:
1648                     break;
1649             }
1650             break;
1651         }
1652     }
1653     return TRUE;
1654 }
1655
1656 struct enumExtCharNamesContext {
1657     uint32_t length;
1658     int32_t last;
1659 };
1660
1661 static UBool
1662 enumExtCharNamesFn(void *context,
1663                 UChar32 code, UCharNameChoice nameChoice,
1664                 const char *name, int32_t length) {
1665     struct enumExtCharNamesContext *ecncp = (struct enumExtCharNamesContext *) context;
1666
1667     if (ecncp->last != (int32_t) code - 1) {
1668         if (ecncp->last < 0) {
1669             log_err("u_enumCharName(0x%lx - Ext) after u_enumCharName(0x%lx - Ext) instead of u_enumCharName(0x%lx - Ext)\n", code, ecncp->last, ecncp->last + 1);
1670         } else {
1671             log_err("u_enumCharName(0x%lx - Ext) instead of u_enumCharName(0x0 - Ext)\n", code);
1672         }
1673     }
1674     ecncp->last = (int32_t) code;
1675
1676     if (!*name) {
1677         log_err("u_enumCharName(0x%lx - Ext) should not be an empty string\n", code);
1678     }
1679
1680     return enumCharNamesFn(&ecncp->length, code, nameChoice, name, length);
1681 }
1682
1683 /**
1684  * This can be made more efficient by moving it into putil.c and having
1685  * it directly access the ebcdic translation tables.
1686  * TODO: If we get this method in putil.c, then delete it from here.
1687  */
1688 static UChar
1689 u_charToUChar(char c) {
1690     UChar uc;
1691     u_charsToUChars(&c, &uc, 1);
1692     return uc;
1693 }
1694
1695 static void
1696 TestCharNames() {
1697     static char name[80];
1698     UErrorCode errorCode=U_ZERO_ERROR;
1699     struct enumExtCharNamesContext extContext;
1700     const char *expected;
1701     int32_t length;
1702     UChar32 c;
1703     int32_t i;
1704
1705     log_verbose("Testing uprv_getMaxCharNameLength()\n");
1706     length=uprv_getMaxCharNameLength();
1707     if(length==0) {
1708         /* no names data available */
1709         return;
1710     }
1711     if(length<83) { /* Unicode 3.2 max char name length */
1712         log_err("uprv_getMaxCharNameLength()=%d is too short");
1713     }
1714     /* ### TODO same tests for max ISO comment length as for max name length */
1715
1716     log_verbose("Testing u_charName()\n");
1717     for(i=0; i<(int32_t)(sizeof(names)/sizeof(names[0])); ++i) {
1718         /* modern Unicode character name */
1719         length=u_charName(names[i].code, U_UNICODE_CHAR_NAME, name, sizeof(name), &errorCode);
1720         if(U_FAILURE(errorCode)) {
1721             log_err("u_charName(0x%lx) error %s\n", names[i].code, u_errorName(errorCode));
1722             return;
1723         }
1724         if(length<0 || 0!=strcmp(name, names[i].name) || length!=(uint16_t)strlen(name)) {
1725             log_err("u_charName(0x%lx) gets: %s (length %ld) instead of: %s\n", names[i].code, name, length, names[i].name);
1726         }
1727
1728         /* find the modern name */
1729         if (*names[i].name) {
1730             c=u_charFromName(U_UNICODE_CHAR_NAME, names[i].name, &errorCode);
1731             if(U_FAILURE(errorCode)) {
1732                 log_err("u_charFromName(%s) error %s\n", names[i].name, u_errorName(errorCode));
1733                 return;
1734             }
1735             if(c!=(UChar32)names[i].code) {
1736                 log_err("u_charFromName(%s) gets 0x%lx instead of 0x%lx\n", names[i].name, c, names[i].code);
1737             }
1738         }
1739
1740         /* Unicode 1.0 character name */
1741         length=u_charName(names[i].code, U_UNICODE_10_CHAR_NAME, name, sizeof(name), &errorCode);
1742         if(U_FAILURE(errorCode)) {
1743             log_err("u_charName(0x%lx - 1.0) error %s\n", names[i].code, u_errorName(errorCode));
1744             return;
1745         }
1746         if(length<0 || (length>0 && 0!=strcmp(name, names[i].oldName)) || length!=(uint16_t)strlen(name)) {
1747             log_err("u_charName(0x%lx - 1.0) gets %s length %ld instead of nothing or %s\n", names[i].code, name, length, names[i].oldName);
1748         }
1749
1750         /* find the Unicode 1.0 name if it is stored (length>0 means that we could read it) */
1751         if(names[i].oldName[0]!=0 /* && length>0 */) {
1752             c=u_charFromName(U_UNICODE_10_CHAR_NAME, names[i].oldName, &errorCode);
1753             if(U_FAILURE(errorCode)) {
1754                 log_err("u_charFromName(%s - 1.0) error %s\n", names[i].oldName, u_errorName(errorCode));
1755                 return;
1756             }
1757             if(c!=(UChar32)names[i].code) {
1758                 log_err("u_charFromName(%s - 1.0) gets 0x%lx instead of 0x%lx\n", names[i].oldName, c, names[i].code);
1759             }
1760         }
1761
1762         /* Unicode character name alias */
1763         length=u_charName(names[i].code, U_CHAR_NAME_ALIAS, name, sizeof(name), &errorCode);
1764         if(U_FAILURE(errorCode)) {
1765             log_err("u_charName(0x%lx - alias) error %s\n", names[i].code, u_errorName(errorCode));
1766             return;
1767         }
1768         expected=names[i].alias;
1769         if(expected==NULL) {
1770             expected="";
1771         }
1772         if(length<0 || (length>0 && 0!=strcmp(name, expected)) || length!=(uint16_t)strlen(name)) {
1773             log_err("u_charName(0x%lx - alias) gets %s length %ld instead of nothing or %s\n",
1774                     names[i].code, name, length, expected);
1775         }
1776
1777         /* find the Unicode character name alias if it is stored (length>0 means that we could read it) */
1778         if(expected[0]!=0 /* && length>0 */) {
1779             c=u_charFromName(U_CHAR_NAME_ALIAS, expected, &errorCode);
1780             if(U_FAILURE(errorCode)) {
1781                 log_err("u_charFromName(%s - alias) error %s\n",
1782                         expected, u_errorName(errorCode));
1783                 return;
1784             }
1785             if(c!=(UChar32)names[i].code) {
1786                 log_err("u_charFromName(%s - alias) gets 0x%lx instead of 0x%lx\n",
1787                         expected, c, names[i].code);
1788             }
1789         }
1790     }
1791
1792     /* test u_enumCharNames() */
1793     length=0;
1794     errorCode=U_ZERO_ERROR;
1795     u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumCharNamesFn, &length, U_UNICODE_CHAR_NAME, &errorCode);
1796     if(U_FAILURE(errorCode) || length<94140) {
1797         log_err("u_enumCharNames(%ld..%lx) error %s names count=%ld\n", UCHAR_MIN_VALUE, UCHAR_MAX_VALUE, u_errorName(errorCode), length);
1798     }
1799
1800     extContext.length = 0;
1801     extContext.last = -1;
1802     errorCode=U_ZERO_ERROR;
1803     u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumExtCharNamesFn, &extContext, U_EXTENDED_CHAR_NAME, &errorCode);
1804     if(U_FAILURE(errorCode) || extContext.length<UCHAR_MAX_VALUE + 1) {
1805         log_err("u_enumCharNames(%ld..0x%lx - Extended) error %s names count=%ld\n", UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, u_errorName(errorCode), extContext.length);
1806     }
1807
1808     /* test that u_charFromName() uppercases the input name, i.e., works with mixed-case names (new in 2.0) */
1809     if(0x61!=u_charFromName(U_UNICODE_CHAR_NAME, "LATin smALl letTER A", &errorCode)) {
1810         log_err("u_charFromName(U_UNICODE_CHAR_NAME, \"LATin smALl letTER A\") did not find U+0061 (%s)\n", u_errorName(errorCode));
1811     }
1812
1813     /* Test getCharNameCharacters */
1814     if(!getTestOption(QUICK_OPTION)) {
1815         enum { BUFSIZE = 256 };
1816         UErrorCode ec = U_ZERO_ERROR;
1817         char buf[BUFSIZE];
1818         int32_t maxLength;
1819         UChar32 cp;
1820         UChar pat[BUFSIZE], dumbPat[BUFSIZE];
1821         int32_t l1, l2;
1822         UBool map[256];
1823         UBool ok;
1824
1825         USet* set = uset_open(1, 0); /* empty set */
1826         USet* dumb = uset_open(1, 0); /* empty set */
1827
1828         /*
1829          * uprv_getCharNameCharacters() will likely return more lowercase
1830          * letters than actual character names contain because
1831          * it includes all the characters in lowercased names of
1832          * general categories, for the full possible set of extended names.
1833          */
1834         {
1835             USetAdder sa={
1836                 NULL,
1837                 uset_add,
1838                 uset_addRange,
1839                 uset_addString,
1840                 NULL /* don't need remove() */
1841             };
1842             sa.set=set;
1843             uprv_getCharNameCharacters(&sa);
1844         }
1845
1846         /* build set the dumb (but sure-fire) way */
1847         for (i=0; i<256; ++i) {
1848             map[i] = FALSE;
1849         }
1850
1851         maxLength=0;
1852         for (cp=0; cp<0x110000; ++cp) {
1853             int32_t len = u_charName(cp, U_EXTENDED_CHAR_NAME,
1854                                      buf, BUFSIZE, &ec);
1855             if (U_FAILURE(ec)) {
1856                 log_err("FAIL: u_charName failed when it shouldn't\n");
1857                 uset_close(set);
1858                 uset_close(dumb);
1859                 return;
1860             }
1861             if(len>maxLength) {
1862                 maxLength=len;
1863             }
1864
1865             for (i=0; i<len; ++i) {
1866                 if (!map[(uint8_t) buf[i]]) {
1867                     uset_add(dumb, (UChar32)u_charToUChar(buf[i]));
1868                     map[(uint8_t) buf[i]] = TRUE;
1869                 }
1870             }
1871
1872             /* test for leading/trailing whitespace */
1873             if(buf[0]==' ' || buf[0]=='\t' || buf[len-1]==' ' || buf[len-1]=='\t') {
1874                 log_err("u_charName(U+%04x) returns a name with leading or trailing whitespace\n", cp);
1875             }
1876         }
1877
1878         if(map[(uint8_t)'\t']) {
1879             log_err("u_charName() returned a name with a TAB for some code point\n", cp);
1880         }
1881
1882         length=uprv_getMaxCharNameLength();
1883         if(length!=maxLength) {
1884             log_err("uprv_getMaxCharNameLength()=%d differs from the maximum length %d of all extended names\n",
1885                     length, maxLength);
1886         }
1887
1888         /* compare the sets.  Where is my uset_equals?!! */
1889         ok=TRUE;
1890         for(i=0; i<256; ++i) {
1891             if(uset_contains(set, i)!=uset_contains(dumb, i)) {
1892                 if(0x61<=i && i<=0x7a /* a-z */ && uset_contains(set, i) && !uset_contains(dumb, i)) {
1893                     /* ignore lowercase a-z that are in set but not in dumb */
1894                     ok=TRUE;
1895                 } else {
1896                     ok=FALSE;
1897                     break;
1898                 }
1899             }
1900         }
1901
1902         l1 = uset_toPattern(set, pat, BUFSIZE, TRUE, &ec);
1903         l2 = uset_toPattern(dumb, dumbPat, BUFSIZE, TRUE, &ec);
1904         if (U_FAILURE(ec)) {
1905             log_err("FAIL: uset_toPattern failed when it shouldn't\n");
1906             uset_close(set);
1907             uset_close(dumb);
1908             return;
1909         }
1910
1911         if (l1 >= BUFSIZE) {
1912             l1 = BUFSIZE-1;
1913             pat[l1] = 0;
1914         }
1915         if (l2 >= BUFSIZE) {
1916             l2 = BUFSIZE-1;
1917             dumbPat[l2] = 0;
1918         }
1919
1920         if (!ok) {
1921             log_err("FAIL: uprv_getCharNameCharacters() returned %s, expected %s (too many lowercase a-z are ok)\n",
1922                     aescstrdup(pat, l1), aescstrdup(dumbPat, l2));
1923         } else if(getTestOption(VERBOSITY_OPTION)) {
1924             log_verbose("Ok: uprv_getCharNameCharacters() returned %s\n", aescstrdup(pat, l1));
1925         }
1926
1927         uset_close(set);
1928         uset_close(dumb);
1929     }
1930
1931     /* ### TODO: test error cases and other interesting things */
1932 }
1933
1934 /* test u_isMirrored() and u_charMirror() ----------------------------------- */
1935
1936 static void
1937 TestMirroring() {
1938     USet *set;
1939     UErrorCode errorCode;
1940
1941     UChar32 start, end, c2, c3;
1942     int32_t i;
1943
1944     U_STRING_DECL(mirroredPattern, "[:Bidi_Mirrored:]", 17);
1945
1946     U_STRING_INIT(mirroredPattern, "[:Bidi_Mirrored:]", 17);
1947
1948     log_verbose("Testing u_isMirrored()\n");
1949     if(!(u_isMirrored(0x28) && u_isMirrored(0xbb) && u_isMirrored(0x2045) && u_isMirrored(0x232a) &&
1950          !u_isMirrored(0x27) && !u_isMirrored(0x61) && !u_isMirrored(0x284) && !u_isMirrored(0x3400)
1951         )
1952     ) {
1953         log_err("u_isMirrored() does not work correctly\n");
1954     }
1955
1956     log_verbose("Testing u_charMirror()\n");
1957     if(!(u_charMirror(0x3c)==0x3e && u_charMirror(0x5d)==0x5b && u_charMirror(0x208d)==0x208e && u_charMirror(0x3017)==0x3016 &&
1958          u_charMirror(0xbb)==0xab && u_charMirror(0x2215)==0x29F5 && u_charMirror(0x29F5)==0x2215 && /* large delta between the code points */
1959          u_charMirror(0x2e)==0x2e && u_charMirror(0x6f3)==0x6f3 && u_charMirror(0x301c)==0x301c && u_charMirror(0xa4ab)==0xa4ab &&
1960          /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */
1961          u_charMirror(0x2018)==0x2018 && u_charMirror(0x201b)==0x201b && u_charMirror(0x301d)==0x301d
1962          )
1963     ) {
1964         log_err("u_charMirror() does not work correctly\n");
1965     }
1966
1967     /* verify that Bidi_Mirroring_Glyph roundtrips */
1968     errorCode=U_ZERO_ERROR;
1969     set=uset_openPattern(mirroredPattern, 17, &errorCode);
1970
1971     if (U_FAILURE(errorCode)) {
1972         log_data_err("uset_openPattern(mirroredPattern, 17, &errorCode) failed!\n");
1973     } else {
1974         for(i=0; 0==uset_getItem(set, i, &start, &end, NULL, 0, &errorCode); ++i) {
1975             do {
1976                 c2=u_charMirror(start);
1977                 c3=u_charMirror(c2);
1978                 if(c3!=start) {
1979                     log_err("u_charMirror() does not roundtrip: U+%04lx->U+%04lx->U+%04lx\n", (long)start, (long)c2, (long)c3);
1980                 }
1981             } while(++start<=end);
1982         }
1983     }
1984
1985     uset_close(set);
1986 }
1987
1988
1989 struct RunTestData
1990 {
1991     const char *runText;
1992     UScriptCode runCode;
1993 };
1994
1995 typedef struct RunTestData RunTestData;
1996
1997 static void
1998 CheckScriptRuns(UScriptRun *scriptRun, int32_t *runStarts, const RunTestData *testData, int32_t nRuns,
1999                 const char *prefix)
2000 {
2001     int32_t run, runStart, runLimit;
2002     UScriptCode runCode;
2003
2004     /* iterate over all the runs */
2005     run = 0;
2006     while (uscript_nextRun(scriptRun, &runStart, &runLimit, &runCode)) {
2007         if (runStart != runStarts[run]) {
2008             log_err("%s: incorrect start offset for run %d: expected %d, got %d\n",
2009                 prefix, run, runStarts[run], runStart);
2010         }
2011
2012         if (runLimit != runStarts[run + 1]) {
2013             log_err("%s: incorrect limit offset for run %d: expected %d, got %d\n",
2014                 prefix, run, runStarts[run + 1], runLimit);
2015         }
2016
2017         if (runCode != testData[run].runCode) {
2018             log_err("%s: incorrect script for run %d: expected \"%s\", got \"%s\"\n",
2019                 prefix, run, uscript_getName(testData[run].runCode), uscript_getName(runCode));
2020         }
2021
2022         run += 1;
2023
2024         /* stop when we've seen all the runs we expect to see */
2025         if (run >= nRuns) {
2026             break;
2027         }
2028     }
2029
2030     /* Complain if we didn't see then number of runs we expected */
2031     if (run != nRuns) {
2032         log_err("%s: incorrect number of runs: expected %d, got %d\n", prefix, run, nRuns);
2033     }
2034 }
2035
2036 static void
2037 TestUScriptRunAPI()
2038 {
2039     static const RunTestData testData1[] = {
2040         {"\\u0020\\u0946\\u0939\\u093F\\u0928\\u094D\\u0926\\u0940\\u0020", USCRIPT_DEVANAGARI},
2041         {"\\u0627\\u0644\\u0639\\u0631\\u0628\\u064A\\u0629\\u0020", USCRIPT_ARABIC},
2042         {"\\u0420\\u0443\\u0441\\u0441\\u043A\\u0438\\u0439\\u0020", USCRIPT_CYRILLIC},
2043         {"English (", USCRIPT_LATIN},
2044         {"\\u0E44\\u0E17\\u0E22", USCRIPT_THAI},
2045         {") ", USCRIPT_LATIN},
2046         {"\\u6F22\\u5B75", USCRIPT_HAN},
2047         {"\\u3068\\u3072\\u3089\\u304C\\u306A\\u3068", USCRIPT_HIRAGANA},
2048         {"\\u30AB\\u30BF\\u30AB\\u30CA", USCRIPT_KATAKANA},
2049         {"\\U00010400\\U00010401\\U00010402\\U00010403", USCRIPT_DESERET}
2050     };
2051
2052     static const RunTestData testData2[] = {
2053        {"((((((((((abc))))))))))", USCRIPT_LATIN}
2054     };
2055
2056     static const struct {
2057       const RunTestData *testData;
2058       int32_t nRuns;
2059     } testDataEntries[] = {
2060         {testData1, LENGTHOF(testData1)},
2061         {testData2, LENGTHOF(testData2)}
2062     };
2063
2064     static const int32_t nTestEntries = LENGTHOF(testDataEntries);
2065     int32_t testEntry;
2066
2067     for (testEntry = 0; testEntry < nTestEntries; testEntry += 1) {
2068         UChar testString[1024];
2069         int32_t runStarts[256];
2070         int32_t nTestRuns = testDataEntries[testEntry].nRuns;
2071         const RunTestData *testData = testDataEntries[testEntry].testData;
2072
2073         int32_t run, stringLimit;
2074         UScriptRun *scriptRun = NULL;
2075         UErrorCode err;
2076
2077         /*
2078          * Fill in the test string and the runStarts array.
2079          */
2080         stringLimit = 0;
2081         for (run = 0; run < nTestRuns; run += 1) {
2082             runStarts[run] = stringLimit;
2083             stringLimit += u_unescape(testData[run].runText, &testString[stringLimit], 1024 - stringLimit);
2084             /*stringLimit -= 1;*/
2085         }
2086
2087         /* The limit of the last run */
2088         runStarts[nTestRuns] = stringLimit;
2089
2090         /*
2091          * Make sure that calling uscript_OpenRun with a NULL text pointer
2092          * and a non-zero text length returns the correct error.
2093          */
2094         err = U_ZERO_ERROR;
2095         scriptRun = uscript_openRun(NULL, stringLimit, &err);
2096
2097         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2098             log_err("uscript_openRun(NULL, stringLimit, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2099         }
2100
2101         if (scriptRun != NULL) {
2102             log_err("uscript_openRun(NULL, stringLimit, &err) returned a non-NULL result.\n");
2103             uscript_closeRun(scriptRun);
2104         }
2105
2106         /*
2107          * Make sure that calling uscript_OpenRun with a non-NULL text pointer
2108          * and a zero text length returns the correct error.
2109          */
2110         err = U_ZERO_ERROR;
2111         scriptRun = uscript_openRun(testString, 0, &err);
2112
2113         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2114             log_err("uscript_openRun(testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2115         }
2116
2117         if (scriptRun != NULL) {
2118             log_err("uscript_openRun(testString, 0, &err) returned a non-NULL result.\n");
2119             uscript_closeRun(scriptRun);
2120         }
2121
2122         /*
2123          * Make sure that calling uscript_openRun with a NULL text pointer
2124          * and a zero text length doesn't return an error.
2125          */
2126         err = U_ZERO_ERROR;
2127         scriptRun = uscript_openRun(NULL, 0, &err);
2128
2129         if (U_FAILURE(err)) {
2130             log_err("Got error %s from uscript_openRun(NULL, 0, &err)\n", u_errorName(err));
2131         }
2132
2133         /* Make sure that the empty iterator doesn't find any runs */
2134         if (uscript_nextRun(scriptRun, NULL, NULL, NULL)) {
2135             log_err("uscript_nextRun(...) returned TRUE for an empty iterator.\n");
2136         }
2137
2138         /*
2139          * Make sure that calling uscript_setRunText with a NULL text pointer
2140          * and a non-zero text length returns the correct error.
2141          */
2142         err = U_ZERO_ERROR;
2143         uscript_setRunText(scriptRun, NULL, stringLimit, &err);
2144
2145         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2146             log_err("uscript_setRunText(scriptRun, NULL, stringLimit, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2147         }
2148
2149         /*
2150          * Make sure that calling uscript_OpenRun with a non-NULL text pointer
2151          * and a zero text length returns the correct error.
2152          */
2153         err = U_ZERO_ERROR;
2154         uscript_setRunText(scriptRun, testString, 0, &err);
2155
2156         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2157             log_err("uscript_setRunText(scriptRun, testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2158         }
2159
2160         /*
2161          * Now call uscript_setRunText on the empty iterator
2162          * and make sure that it works.
2163          */
2164         err = U_ZERO_ERROR;
2165         uscript_setRunText(scriptRun, testString, stringLimit, &err);
2166
2167         if (U_FAILURE(err)) {
2168             log_err("Got error %s from uscript_setRunText(...)\n", u_errorName(err));
2169         } else {
2170             CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_setRunText");
2171         }
2172
2173         uscript_closeRun(scriptRun);
2174
2175         /*
2176          * Now open an interator over the testString
2177          * using uscript_openRun and make sure that it works
2178          */
2179         scriptRun = uscript_openRun(testString, stringLimit, &err);
2180
2181         if (U_FAILURE(err)) {
2182             log_err("Got error %s from uscript_openRun(...)\n", u_errorName(err));
2183         } else {
2184             CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_openRun");
2185         }
2186
2187         /* Now reset the iterator, and make sure
2188          * that it still works.
2189          */
2190         uscript_resetRun(scriptRun);
2191
2192         CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_resetRun");
2193
2194         /* Close the iterator */
2195         uscript_closeRun(scriptRun);
2196     }
2197 }
2198
2199 /* test additional, non-core properties */
2200 static void
2201 TestAdditionalProperties() {
2202     /* test data for u_charAge() */
2203     static const struct {
2204         UChar32 c;
2205         UVersionInfo version;
2206     } charAges[]={
2207         {0x41,    { 1, 1, 0, 0 }},
2208         {0xffff,  { 1, 1, 0, 0 }},
2209         {0x20ab,  { 2, 0, 0, 0 }},
2210         {0x2fffe, { 2, 0, 0, 0 }},
2211         {0x20ac,  { 2, 1, 0, 0 }},
2212         {0xfb1d,  { 3, 0, 0, 0 }},
2213         {0x3f4,   { 3, 1, 0, 0 }},
2214         {0x10300, { 3, 1, 0, 0 }},
2215         {0x220,   { 3, 2, 0, 0 }},
2216         {0xff60,  { 3, 2, 0, 0 }}
2217     };
2218
2219     /* test data for u_hasBinaryProperty() */
2220     static const int32_t
2221     props[][3]={ /* code point, property, value */
2222         { 0x0627, UCHAR_ALPHABETIC, TRUE },
2223         { 0x1034a, UCHAR_ALPHABETIC, TRUE },
2224         { 0x2028, UCHAR_ALPHABETIC, FALSE },
2225
2226         { 0x0066, UCHAR_ASCII_HEX_DIGIT, TRUE },
2227         { 0x0067, UCHAR_ASCII_HEX_DIGIT, FALSE },
2228
2229         { 0x202c, UCHAR_BIDI_CONTROL, TRUE },
2230         { 0x202f, UCHAR_BIDI_CONTROL, FALSE },
2231
2232         { 0x003c, UCHAR_BIDI_MIRRORED, TRUE },
2233         { 0x003d, UCHAR_BIDI_MIRRORED, FALSE },
2234
2235         /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */
2236         { 0x2018, UCHAR_BIDI_MIRRORED, FALSE },
2237         { 0x201d, UCHAR_BIDI_MIRRORED, FALSE },
2238         { 0x201f, UCHAR_BIDI_MIRRORED, FALSE },
2239         { 0x301e, UCHAR_BIDI_MIRRORED, FALSE },
2240
2241         { 0x058a, UCHAR_DASH, TRUE },
2242         { 0x007e, UCHAR_DASH, FALSE },
2243
2244         { 0x0c4d, UCHAR_DIACRITIC, TRUE },
2245         { 0x3000, UCHAR_DIACRITIC, FALSE },
2246
2247         { 0x0e46, UCHAR_EXTENDER, TRUE },
2248         { 0x0020, UCHAR_EXTENDER, FALSE },
2249
2250 #if !UCONFIG_NO_NORMALIZATION
2251         { 0xfb1d, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE },
2252         { 0x1d15f, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE },
2253         { 0xfb1e, UCHAR_FULL_COMPOSITION_EXCLUSION, FALSE },
2254
2255         { 0x110a, UCHAR_NFD_INERT, TRUE },      /* Jamo L */
2256         { 0x0308, UCHAR_NFD_INERT, FALSE },
2257
2258         { 0x1164, UCHAR_NFKD_INERT, TRUE },     /* Jamo V */
2259         { 0x1d79d, UCHAR_NFKD_INERT, FALSE },   /* math compat version of xi */
2260
2261         { 0x0021, UCHAR_NFC_INERT, TRUE },      /* ! */
2262         { 0x0061, UCHAR_NFC_INERT, FALSE },     /* a */
2263         { 0x00e4, UCHAR_NFC_INERT, FALSE },     /* a-umlaut */
2264         { 0x0102, UCHAR_NFC_INERT, FALSE },     /* a-breve */
2265         { 0xac1c, UCHAR_NFC_INERT, FALSE },     /* Hangul LV */
2266         { 0xac1d, UCHAR_NFC_INERT, TRUE },      /* Hangul LVT */
2267
2268         { 0x1d79d, UCHAR_NFKC_INERT, FALSE },   /* math compat version of xi */
2269         { 0x2a6d6, UCHAR_NFKC_INERT, TRUE },    /* Han, last of CJK ext. B */
2270
2271         { 0x00e4, UCHAR_SEGMENT_STARTER, TRUE },
2272         { 0x0308, UCHAR_SEGMENT_STARTER, FALSE },
2273         { 0x110a, UCHAR_SEGMENT_STARTER, TRUE }, /* Jamo L */
2274         { 0x1164, UCHAR_SEGMENT_STARTER, FALSE },/* Jamo V */
2275         { 0xac1c, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LV */
2276         { 0xac1d, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LVT */
2277 #endif
2278
2279         { 0x0044, UCHAR_HEX_DIGIT, TRUE },
2280         { 0xff46, UCHAR_HEX_DIGIT, TRUE },
2281         { 0x0047, UCHAR_HEX_DIGIT, FALSE },
2282
2283         { 0x30fb, UCHAR_HYPHEN, TRUE },
2284         { 0xfe58, UCHAR_HYPHEN, FALSE },
2285
2286         { 0x2172, UCHAR_ID_CONTINUE, TRUE },
2287         { 0x0307, UCHAR_ID_CONTINUE, TRUE },
2288         { 0x005c, UCHAR_ID_CONTINUE, FALSE },
2289
2290         { 0x2172, UCHAR_ID_START, TRUE },
2291         { 0x007a, UCHAR_ID_START, TRUE },
2292         { 0x0039, UCHAR_ID_START, FALSE },
2293
2294         { 0x4db5, UCHAR_IDEOGRAPHIC, TRUE },
2295         { 0x2f999, UCHAR_IDEOGRAPHIC, TRUE },
2296         { 0x2f99, UCHAR_IDEOGRAPHIC, FALSE },
2297
2298         { 0x200c, UCHAR_JOIN_CONTROL, TRUE },
2299         { 0x2029, UCHAR_JOIN_CONTROL, FALSE },
2300
2301         { 0x1d7bc, UCHAR_LOWERCASE, TRUE },
2302         { 0x0345, UCHAR_LOWERCASE, TRUE },
2303         { 0x0030, UCHAR_LOWERCASE, FALSE },
2304
2305         { 0x1d7a9, UCHAR_MATH, TRUE },
2306         { 0x2135, UCHAR_MATH, TRUE },
2307         { 0x0062, UCHAR_MATH, FALSE },
2308
2309         { 0xfde1, UCHAR_NONCHARACTER_CODE_POINT, TRUE },
2310         { 0x10ffff, UCHAR_NONCHARACTER_CODE_POINT, TRUE },
2311         { 0x10fffd, UCHAR_NONCHARACTER_CODE_POINT, FALSE },
2312
2313         { 0x0022, UCHAR_QUOTATION_MARK, TRUE },
2314         { 0xff62, UCHAR_QUOTATION_MARK, TRUE },
2315         { 0xd840, UCHAR_QUOTATION_MARK, FALSE },
2316
2317         { 0x061f, UCHAR_TERMINAL_PUNCTUATION, TRUE },
2318         { 0xe003f, UCHAR_TERMINAL_PUNCTUATION, FALSE },
2319
2320         { 0x1d44a, UCHAR_UPPERCASE, TRUE },
2321         { 0x2162, UCHAR_UPPERCASE, TRUE },
2322         { 0x0345, UCHAR_UPPERCASE, FALSE },
2323
2324         { 0x0020, UCHAR_WHITE_SPACE, TRUE },
2325         { 0x202f, UCHAR_WHITE_SPACE, TRUE },
2326         { 0x3001, UCHAR_WHITE_SPACE, FALSE },
2327
2328         { 0x0711, UCHAR_XID_CONTINUE, TRUE },
2329         { 0x1d1aa, UCHAR_XID_CONTINUE, TRUE },
2330         { 0x007c, UCHAR_XID_CONTINUE, FALSE },
2331
2332         { 0x16ee, UCHAR_XID_START, TRUE },
2333         { 0x23456, UCHAR_XID_START, TRUE },
2334         { 0x1d1aa, UCHAR_XID_START, FALSE },
2335
2336         /*
2337          * Version break:
2338          * The following properties are only supported starting with the
2339          * Unicode version indicated in the second field.
2340          */
2341         { -1, 0x320, 0 },
2342
2343         { 0x180c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE },
2344         { 0xfe02, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE },
2345         { 0x1801, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, FALSE },
2346
2347         { 0x0149, UCHAR_DEPRECATED, TRUE },         /* changed in Unicode 5.2 */
2348         { 0x0341, UCHAR_DEPRECATED, FALSE },        /* changed in Unicode 5.2 */
2349         { 0xe0041, UCHAR_DEPRECATED, TRUE },        /* changed from Unicode 5 to 5.1 */
2350         { 0xe0100, UCHAR_DEPRECATED, FALSE },
2351
2352         { 0x00a0, UCHAR_GRAPHEME_BASE, TRUE },
2353         { 0x0a4d, UCHAR_GRAPHEME_BASE, FALSE },
2354         { 0xff9d, UCHAR_GRAPHEME_BASE, TRUE },
2355         { 0xff9f, UCHAR_GRAPHEME_BASE, FALSE },     /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */
2356
2357         { 0x0300, UCHAR_GRAPHEME_EXTEND, TRUE },
2358         { 0xff9d, UCHAR_GRAPHEME_EXTEND, FALSE },
2359         { 0xff9f, UCHAR_GRAPHEME_EXTEND, TRUE },    /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */
2360         { 0x0603, UCHAR_GRAPHEME_EXTEND, FALSE },
2361
2362         { 0x0a4d, UCHAR_GRAPHEME_LINK, TRUE },
2363         { 0xff9f, UCHAR_GRAPHEME_LINK, FALSE },
2364
2365         { 0x2ff7, UCHAR_IDS_BINARY_OPERATOR, TRUE },
2366         { 0x2ff3, UCHAR_IDS_BINARY_OPERATOR, FALSE },
2367
2368         { 0x2ff3, UCHAR_IDS_TRINARY_OPERATOR, TRUE },
2369         { 0x2f03, UCHAR_IDS_TRINARY_OPERATOR, FALSE },
2370
2371         { 0x0ec1, UCHAR_LOGICAL_ORDER_EXCEPTION, TRUE },
2372         { 0xdcba, UCHAR_LOGICAL_ORDER_EXCEPTION, FALSE },
2373
2374         { 0x2e9b, UCHAR_RADICAL, TRUE },
2375         { 0x4e00, UCHAR_RADICAL, FALSE },
2376
2377         { 0x012f, UCHAR_SOFT_DOTTED, TRUE },
2378         { 0x0049, UCHAR_SOFT_DOTTED, FALSE },
2379
2380         { 0xfa11, UCHAR_UNIFIED_IDEOGRAPH, TRUE },
2381         { 0xfa12, UCHAR_UNIFIED_IDEOGRAPH, FALSE },
2382
2383         { -1, 0x401, 0 }, /* version break for Unicode 4.0.1 */
2384
2385         { 0x002e, UCHAR_S_TERM, TRUE },
2386         { 0x0061, UCHAR_S_TERM, FALSE },
2387
2388         { 0x180c, UCHAR_VARIATION_SELECTOR, TRUE },
2389         { 0xfe03, UCHAR_VARIATION_SELECTOR, TRUE },
2390         { 0xe01ef, UCHAR_VARIATION_SELECTOR, TRUE },
2391         { 0xe0200, UCHAR_VARIATION_SELECTOR, FALSE },
2392
2393         /* enum/integer type properties */
2394
2395         /* UCHAR_BIDI_CLASS tested for assigned characters in TestUnicodeData() */
2396         /* test default Bidi classes for unassigned code points */
2397         { 0x0590, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2398         { 0x05cf, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2399         { 0x05ed, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2400         { 0x07f2, UCHAR_BIDI_CLASS, U_DIR_NON_SPACING_MARK }, /* Nko, new in Unicode 5.0 */
2401         { 0x07fe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, /* unassigned R */
2402         { 0x089f, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2403         { 0xfb37, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2404         { 0xfb42, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2405         { 0x10806, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2406         { 0x10909, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2407         { 0x10fe4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2408
2409         { 0x0605, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2410         { 0x061c, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2411         { 0x063f, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2412         { 0x070e, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2413         { 0x0775, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2414         { 0xfbc2, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2415         { 0xfd90, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2416         { 0xfefe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2417
2418         { 0x02AF, UCHAR_BLOCK, UBLOCK_IPA_EXTENSIONS },
2419         { 0x0C4E, UCHAR_BLOCK, UBLOCK_TELUGU },
2420         { 0x155A, UCHAR_BLOCK, UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS },
2421         { 0x1717, UCHAR_BLOCK, UBLOCK_TAGALOG },
2422         { 0x1900, UCHAR_BLOCK, UBLOCK_LIMBU },
2423         { 0x1AFF, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
2424         { 0x3040, UCHAR_BLOCK, UBLOCK_HIRAGANA },
2425         { 0x1D0FF, UCHAR_BLOCK, UBLOCK_BYZANTINE_MUSICAL_SYMBOLS },
2426         { 0x50000, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
2427         { 0xEFFFF, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
2428         { 0x10D0FF, UCHAR_BLOCK, UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B },
2429
2430         /* UCHAR_CANONICAL_COMBINING_CLASS tested for assigned characters in TestUnicodeData() */
2431         { 0xd7d7, UCHAR_CANONICAL_COMBINING_CLASS, 0 },
2432
2433         { 0x00A0, UCHAR_DECOMPOSITION_TYPE, U_DT_NOBREAK },
2434         { 0x00A8, UCHAR_DECOMPOSITION_TYPE, U_DT_COMPAT },
2435         { 0x00bf, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE },
2436         { 0x00c0, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
2437         { 0x1E9B, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
2438         { 0xBCDE, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
2439         { 0xFB5D, UCHAR_DECOMPOSITION_TYPE, U_DT_MEDIAL },
2440         { 0x1D736, UCHAR_DECOMPOSITION_TYPE, U_DT_FONT },
2441         { 0xe0033, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE },
2442
2443         { 0x0009, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
2444         { 0x0020, UCHAR_EAST_ASIAN_WIDTH, U_EA_NARROW },
2445         { 0x00B1, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2446         { 0x20A9, UCHAR_EAST_ASIAN_WIDTH, U_EA_HALFWIDTH },
2447         { 0x2FFB, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2448         { 0x3000, UCHAR_EAST_ASIAN_WIDTH, U_EA_FULLWIDTH },
2449         { 0x35bb, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2450         { 0x58bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2451         { 0xD7A3, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2452         { 0xEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2453         { 0x1D198, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
2454         { 0x20000, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2455         { 0x2F8C7, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2456         { 0x3a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE }, /* plane 3 got default W values in Unicode 4 */
2457         { 0x5a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
2458         { 0xFEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2459         { 0x10EEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2460
2461         /* UCHAR_GENERAL_CATEGORY tested for assigned characters in TestUnicodeData() */
2462         { 0xd7c7, UCHAR_GENERAL_CATEGORY, 0 },
2463         { 0xd7d7, UCHAR_GENERAL_CATEGORY, U_OTHER_LETTER },     /* changed in Unicode 5.2 */
2464
2465         { 0x0444, UCHAR_JOINING_GROUP, U_JG_NO_JOINING_GROUP },
2466         { 0x0639, UCHAR_JOINING_GROUP, U_JG_AIN },
2467         { 0x072A, UCHAR_JOINING_GROUP, U_JG_DALATH_RISH },
2468         { 0x0647, UCHAR_JOINING_GROUP, U_JG_HEH },
2469         { 0x06C1, UCHAR_JOINING_GROUP, U_JG_HEH_GOAL },
2470
2471         { 0x200C, UCHAR_JOINING_TYPE, U_JT_NON_JOINING },
2472         { 0x200D, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING },
2473         { 0x0639, UCHAR_JOINING_TYPE, U_JT_DUAL_JOINING },
2474         { 0x0640, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING },
2475         { 0x06C3, UCHAR_JOINING_TYPE, U_JT_RIGHT_JOINING },
2476         { 0x0300, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
2477         { 0x070F, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
2478         { 0xe0033, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
2479
2480         /* TestUnicodeData() verifies that no assigned character has "XX" (unknown) */
2481         { 0xe7e7, UCHAR_LINE_BREAK, U_LB_UNKNOWN },
2482         { 0x10fffd, UCHAR_LINE_BREAK, U_LB_UNKNOWN },
2483         { 0x0028, UCHAR_LINE_BREAK, U_LB_OPEN_PUNCTUATION },
2484         { 0x232A, UCHAR_LINE_BREAK, U_LB_CLOSE_PUNCTUATION },
2485         { 0x3401, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2486         { 0x4e02, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2487         { 0x20004, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2488         { 0xf905, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2489         { 0xdb7e, UCHAR_LINE_BREAK, U_LB_SURROGATE },
2490         { 0xdbfd, UCHAR_LINE_BREAK, U_LB_SURROGATE },
2491         { 0xdffc, UCHAR_LINE_BREAK, U_LB_SURROGATE },
2492         { 0x2762, UCHAR_LINE_BREAK, U_LB_EXCLAMATION },
2493         { 0x002F, UCHAR_LINE_BREAK, U_LB_BREAK_SYMBOLS },
2494         { 0x1D49C, UCHAR_LINE_BREAK, U_LB_ALPHABETIC },
2495         { 0x1731, UCHAR_LINE_BREAK, U_LB_ALPHABETIC },
2496
2497         /* UCHAR_NUMERIC_TYPE tested in TestNumericProperties() */
2498
2499         /* UCHAR_SCRIPT tested in TestUScriptCodeAPI() */
2500
2501         { 0x10ff, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2502         { 0x1100, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2503         { 0x1111, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2504         { 0x1159, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2505         { 0x115a, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
2506         { 0x115e, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
2507         { 0x115f, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2508
2509         { 0xa95f, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2510         { 0xa960, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
2511         { 0xa97c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
2512         { 0xa97d, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2513
2514         { 0x1160, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2515         { 0x1161, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2516         { 0x1172, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2517         { 0x11a2, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2518         { 0x11a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
2519         { 0x11a7, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
2520
2521         { 0xd7af, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2522         { 0xd7b0, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
2523         { 0xd7c6, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
2524         { 0xd7c7, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2525
2526         { 0x11a8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2527         { 0x11b8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2528         { 0x11c8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2529         { 0x11f9, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2530         { 0x11fa, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
2531         { 0x11ff, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
2532         { 0x1200, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2533
2534         { 0xd7ca, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2535         { 0xd7cb, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
2536         { 0xd7fb, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
2537         { 0xd7fc, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2538
2539         { 0xac00, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2540         { 0xac1c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2541         { 0xc5ec, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2542         { 0xd788, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2543
2544         { 0xac01, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2545         { 0xac1b, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2546         { 0xac1d, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2547         { 0xc5ee, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2548         { 0xd7a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2549
2550         { 0xd7a4, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2551
2552         { -1, 0x410, 0 }, /* version break for Unicode 4.1 */
2553
2554         { 0x00d7, UCHAR_PATTERN_SYNTAX, TRUE },
2555         { 0xfe45, UCHAR_PATTERN_SYNTAX, TRUE },
2556         { 0x0061, UCHAR_PATTERN_SYNTAX, FALSE },
2557
2558         { 0x0020, UCHAR_PATTERN_WHITE_SPACE, TRUE },
2559         { 0x0085, UCHAR_PATTERN_WHITE_SPACE, TRUE },
2560         { 0x200f, UCHAR_PATTERN_WHITE_SPACE, TRUE },
2561         { 0x00a0, UCHAR_PATTERN_WHITE_SPACE, FALSE },
2562         { 0x3000, UCHAR_PATTERN_WHITE_SPACE, FALSE },
2563
2564         { 0x1d200, UCHAR_BLOCK, UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION },
2565         { 0x2c8e,  UCHAR_BLOCK, UBLOCK_COPTIC },
2566         { 0xfe17,  UCHAR_BLOCK, UBLOCK_VERTICAL_FORMS },
2567
2568         { 0x1a00,  UCHAR_SCRIPT, USCRIPT_BUGINESE },
2569         { 0x2cea,  UCHAR_SCRIPT, USCRIPT_COPTIC },
2570         { 0xa82b,  UCHAR_SCRIPT, USCRIPT_SYLOTI_NAGRI },
2571         { 0x103d0, UCHAR_SCRIPT, USCRIPT_OLD_PERSIAN },
2572
2573         { 0xcc28, UCHAR_LINE_BREAK, U_LB_H2 },
2574         { 0xcc29, UCHAR_LINE_BREAK, U_LB_H3 },
2575         { 0xac03, UCHAR_LINE_BREAK, U_LB_H3 },
2576         { 0x115f, UCHAR_LINE_BREAK, U_LB_JL },
2577         { 0x11aa, UCHAR_LINE_BREAK, U_LB_JT },
2578         { 0x11a1, UCHAR_LINE_BREAK, U_LB_JV },
2579
2580         { 0xb2c9, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_LVT },
2581         { 0x036f, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_EXTEND },
2582         { 0x0000, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_CONTROL },
2583         { 0x1160, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_V },
2584
2585         { 0x05f4, UCHAR_WORD_BREAK, U_WB_MIDLETTER },
2586         { 0x4ef0, UCHAR_WORD_BREAK, U_WB_OTHER },
2587         { 0x19d9, UCHAR_WORD_BREAK, U_WB_NUMERIC },
2588         { 0x2044, UCHAR_WORD_BREAK, U_WB_MIDNUM },
2589
2590         { 0xfffd, UCHAR_SENTENCE_BREAK, U_SB_OTHER },
2591         { 0x1ffc, UCHAR_SENTENCE_BREAK, U_SB_UPPER },
2592         { 0xff63, UCHAR_SENTENCE_BREAK, U_SB_CLOSE },
2593         { 0x2028, UCHAR_SENTENCE_BREAK, U_SB_SEP },
2594
2595         { -1, 0x520, 0 }, /* version break for Unicode 5.2 */
2596
2597         /* unassigned code points in new default Bidi R blocks */
2598         { 0x1ede4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2599         { 0x1efe4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2600
2601         /* test some script codes >127 */
2602         { 0xa6e6,  UCHAR_SCRIPT, USCRIPT_BAMUM },
2603         { 0xa4d0,  UCHAR_SCRIPT, USCRIPT_LISU },
2604         { 0x10a7f,  UCHAR_SCRIPT, USCRIPT_OLD_SOUTH_ARABIAN },
2605
2606         { -1, 0x600, 0 }, /* version break for Unicode 6.0 */
2607
2608         /* value changed in Unicode 6.0 */
2609         { 0x06C3, UCHAR_JOINING_GROUP, U_JG_TEH_MARBUTA_GOAL },
2610
2611         { -1, 0x610, 0 }, /* version break for Unicode 6.1 */
2612
2613         /* unassigned code points in new/changed default Bidi AL blocks */
2614         { 0x08ba, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2615         { 0x1eee4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2616
2617         /* undefined UProperty values */
2618         { 0x61, 0x4a7, 0 },
2619         { 0x234bc, 0x15ed, 0 }
2620     };
2621
2622     UVersionInfo version;
2623     UChar32 c;
2624     int32_t i, result, uVersion;
2625     UProperty which;
2626
2627     /* what is our Unicode version? */
2628     u_getUnicodeVersion(version);
2629     uVersion=((int32_t)version[0]<<8)|(version[1]<<4)|version[2]; /* major/minor/update version numbers */
2630
2631     u_charAge(0x20, version);
2632     if(version[0]==0) {
2633         /* no additional properties available */
2634         log_err("TestAdditionalProperties: no additional properties available, not tested\n");
2635         return;
2636     }
2637
2638     /* test u_charAge() */
2639     for(i=0; i<sizeof(charAges)/sizeof(charAges[0]); ++i) {
2640         u_charAge(charAges[i].c, version);
2641         if(0!=memcmp(version, charAges[i].version, sizeof(UVersionInfo))) {
2642             log_err("error: u_charAge(U+%04lx)={ %u, %u, %u, %u } instead of { %u, %u, %u, %u }\n",
2643                 charAges[i].c,
2644                 version[0], version[1], version[2], version[3],
2645                 charAges[i].version[0], charAges[i].version[1], charAges[i].version[2], charAges[i].version[3]);
2646         }
2647     }
2648
2649     if( u_getIntPropertyMinValue(UCHAR_DASH)!=0 ||
2650         u_getIntPropertyMinValue(UCHAR_BIDI_CLASS)!=0 ||
2651         u_getIntPropertyMinValue(UCHAR_BLOCK)!=0 ||   /* j2478 */
2652         u_getIntPropertyMinValue(UCHAR_SCRIPT)!=0 || /*JB#2410*/
2653         u_getIntPropertyMinValue(0x2345)!=0
2654     ) {
2655         log_err("error: u_getIntPropertyMinValue() wrong\n");
2656     }
2657     if( u_getIntPropertyMaxValue(UCHAR_DASH)!=1) {
2658         log_err("error: u_getIntPropertyMaxValue(UCHAR_DASH) wrong\n");
2659     }
2660     if( u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE)!=1) {
2661         log_err("error: u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE) wrong\n");
2662     }
2663     if( u_getIntPropertyMaxValue((UProperty)(UCHAR_BINARY_LIMIT-1))!=1) {
2664         log_err("error: u_getIntPropertyMaxValue(UCHAR_BINARY_LIMIT-1) wrong\n");
2665     }
2666     if( u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)!=(int32_t)U_CHAR_DIRECTION_COUNT-1 ) {
2667         log_err("error: u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS) wrong\n");
2668     }
2669     if( u_getIntPropertyMaxValue(UCHAR_BLOCK)!=(int32_t)UBLOCK_COUNT-1 ) {
2670         log_err("error: u_getIntPropertyMaxValue(UCHAR_BLOCK) wrong\n");
2671     }
2672     if(u_getIntPropertyMaxValue(UCHAR_LINE_BREAK)!=(int32_t)U_LB_COUNT-1) {
2673         log_err("error: u_getIntPropertyMaxValue(UCHAR_LINE_BREAK) wrong\n");
2674     }
2675     if(u_getIntPropertyMaxValue(UCHAR_SCRIPT)!=(int32_t)USCRIPT_CODE_LIMIT-1) {
2676         log_err("error: u_getIntPropertyMaxValue(UCHAR_SCRIPT) wrong\n");
2677     }
2678     if(u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE)!=(int32_t)U_NT_COUNT-1) {
2679         log_err("error: u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE) wrong\n");
2680     }
2681     if(u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY)!=(int32_t)U_CHAR_CATEGORY_COUNT-1) {
2682         log_err("error: u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY) wrong\n");
2683     }
2684     if(u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE)!=(int32_t)U_HST_COUNT-1) {
2685         log_err("error: u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE) wrong\n");
2686     }
2687     if(u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK)!=(int32_t)U_GCB_COUNT-1) {
2688         log_err("error: u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK) wrong\n");
2689     }
2690     if(u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK)!=(int32_t)U_SB_COUNT-1) {
2691         log_err("error: u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK) wrong\n");
2692     }
2693     if(u_getIntPropertyMaxValue(UCHAR_WORD_BREAK)!=(int32_t)U_WB_COUNT-1) {
2694         log_err("error: u_getIntPropertyMaxValue(UCHAR_WORD_BREAK) wrong\n");
2695     }
2696     /*JB#2410*/
2697     if( u_getIntPropertyMaxValue(0x2345)!=-1) {
2698         log_err("error: u_getIntPropertyMaxValue(0x2345) wrong\n");
2699     }
2700     if( u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) != (int32_t) (U_DT_COUNT - 1)) {
2701         log_err("error: u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) wrong\n");
2702     }
2703     if( u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) !=  (int32_t) (U_JG_COUNT -1)) {
2704         log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) wrong\n");
2705     }
2706     if( u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) != (int32_t) (U_JT_COUNT -1)) {
2707         log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) wrong\n");
2708     }
2709     if( u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) != (int32_t) (U_EA_COUNT -1)) {
2710         log_err("error: u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) wrong\n");
2711     }
2712
2713     /* test u_hasBinaryProperty() and u_getIntPropertyValue() */
2714     for(i=0; i<sizeof(props)/sizeof(props[0]); ++i) {
2715         const char *whichName;
2716
2717         if(props[i][0]<0) {
2718             /* Unicode version break */
2719             if(uVersion<props[i][1]) {
2720                 break; /* do not test properties that are not yet supported */
2721             } else {
2722                 continue; /* skip this row */
2723             }
2724         }
2725
2726         c=(UChar32)props[i][0];
2727         which=(UProperty)props[i][1];
2728         whichName=u_getPropertyName(which, U_LONG_PROPERTY_NAME);
2729
2730         if(which<UCHAR_INT_START) {
2731             result=u_hasBinaryProperty(c, which);
2732             if(result!=props[i][2]) {
2733                 log_data_err("error: u_hasBinaryProperty(U+%04lx, %s)=%d is wrong (props[%d]) - (Are you missing data?)\n",
2734                         c, whichName, result, i);
2735             }
2736         }
2737
2738         result=u_getIntPropertyValue(c, which);
2739         if(result!=props[i][2]) {
2740             log_data_err("error: u_getIntPropertyValue(U+%04lx, %s)=%d is wrong, should be %d (props[%d]) - (Are you missing data?)\n",
2741                     c, whichName, result, props[i][2], i);
2742         }
2743
2744         /* test separate functions, too */
2745         switch((UProperty)props[i][1]) {
2746         case UCHAR_ALPHABETIC:
2747             if(u_isUAlphabetic((UChar32)props[i][0])!=(UBool)props[i][2]) {
2748                 log_err("error: u_isUAlphabetic(U+%04lx)=%d is wrong (props[%d])\n",
2749                         props[i][0], result, i);
2750             }
2751             break;
2752         case UCHAR_LOWERCASE:
2753             if(u_isULowercase((UChar32)props[i][0])!=(UBool)props[i][2]) {
2754                 log_err("error: u_isULowercase(U+%04lx)=%d is wrong (props[%d])\n",
2755                         props[i][0], result, i);
2756             }
2757             break;
2758         case UCHAR_UPPERCASE:
2759             if(u_isUUppercase((UChar32)props[i][0])!=(UBool)props[i][2]) {
2760                 log_err("error: u_isUUppercase(U+%04lx)=%d is wrong (props[%d])\n",
2761                         props[i][0], result, i);
2762             }
2763             break;
2764         case UCHAR_WHITE_SPACE:
2765             if(u_isUWhiteSpace((UChar32)props[i][0])!=(UBool)props[i][2]) {
2766                 log_err("error: u_isUWhiteSpace(U+%04lx)=%d is wrong (props[%d])\n",
2767                         props[i][0], result, i);
2768             }
2769             break;
2770         default:
2771             break;
2772         }
2773     }
2774 }
2775
2776 static void
2777 TestNumericProperties(void) {
2778     /* see UnicodeData.txt, DerivedNumericValues.txt */
2779     static const struct {
2780         UChar32 c;
2781         int32_t type;
2782         double numValue;
2783     } values[]={
2784         { 0x0F33, U_NT_NUMERIC, -1./2. },
2785         { 0x0C66, U_NT_DECIMAL, 0 },
2786         { 0x96f6, U_NT_NUMERIC, 0 },
2787         { 0xa833, U_NT_NUMERIC, 1./16. },
2788         { 0x2152, U_NT_NUMERIC, 1./10. },
2789         { 0x2151, U_NT_NUMERIC, 1./9. },
2790         { 0x1245f, U_NT_NUMERIC, 1./8. },
2791         { 0x2150, U_NT_NUMERIC, 1./7. },
2792         { 0x2159, U_NT_NUMERIC, 1./6. },
2793         { 0x09f6, U_NT_NUMERIC, 3./16. },
2794         { 0x2155, U_NT_NUMERIC, 1./5. },
2795         { 0x00BD, U_NT_NUMERIC, 1./2. },
2796         { 0x0031, U_NT_DECIMAL, 1. },
2797         { 0x4e00, U_NT_NUMERIC, 1. },
2798         { 0x58f1, U_NT_NUMERIC, 1. },
2799         { 0x10320, U_NT_NUMERIC, 1. },
2800         { 0x0F2B, U_NT_NUMERIC, 3./2. },
2801         { 0x00B2, U_NT_DIGIT, 2. },
2802         { 0x5f10, U_NT_NUMERIC, 2. },
2803         { 0x1813, U_NT_DECIMAL, 3. },
2804         { 0x5f0e, U_NT_NUMERIC, 3. },
2805         { 0x2173, U_NT_NUMERIC, 4. },
2806         { 0x8086, U_NT_NUMERIC, 4. },
2807         { 0x278E, U_NT_DIGIT, 5. },
2808         { 0x1D7F2, U_NT_DECIMAL, 6. },
2809         { 0x247A, U_NT_DIGIT, 7. },
2810         { 0x7396, U_NT_NUMERIC, 9. },
2811         { 0x1372, U_NT_NUMERIC, 10. },
2812         { 0x216B, U_NT_NUMERIC, 12. },
2813         { 0x16EE, U_NT_NUMERIC, 17. },
2814         { 0x249A, U_NT_NUMERIC, 19. },
2815         { 0x303A, U_NT_NUMERIC, 30. },
2816         { 0x5345, U_NT_NUMERIC, 30. },
2817         { 0x32B2, U_NT_NUMERIC, 37. },
2818         { 0x1375, U_NT_NUMERIC, 40. },
2819         { 0x10323, U_NT_NUMERIC, 50. },
2820         { 0x0BF1, U_NT_NUMERIC, 100. },
2821         { 0x964c, U_NT_NUMERIC, 100. },
2822         { 0x217E, U_NT_NUMERIC, 500. },
2823         { 0x2180, U_NT_NUMERIC, 1000. },
2824         { 0x4edf, U_NT_NUMERIC, 1000. },
2825         { 0x2181, U_NT_NUMERIC, 5000. },
2826         { 0x137C, U_NT_NUMERIC, 10000. },
2827         { 0x4e07, U_NT_NUMERIC, 10000. },
2828         { 0x4ebf, U_NT_NUMERIC, 100000000. },
2829         { 0x5146, U_NT_NUMERIC, 1000000000000. },
2830         { -1, U_NT_NONE, U_NO_NUMERIC_VALUE },
2831         { 0x61, U_NT_NONE, U_NO_NUMERIC_VALUE },
2832         { 0x3000, U_NT_NONE, U_NO_NUMERIC_VALUE },
2833         { 0xfffe, U_NT_NONE, U_NO_NUMERIC_VALUE },
2834         { 0x10301, U_NT_NONE, U_NO_NUMERIC_VALUE },
2835         { 0xe0033, U_NT_NONE, U_NO_NUMERIC_VALUE },
2836         { 0x10ffff, U_NT_NONE, U_NO_NUMERIC_VALUE },
2837         { 0x110000, U_NT_NONE, U_NO_NUMERIC_VALUE }
2838     };
2839
2840     double nv;
2841     UChar32 c;
2842     int32_t i, type;
2843
2844     for(i=0; i<LENGTHOF(values); ++i) {
2845         c=values[i].c;
2846         type=u_getIntPropertyValue(c, UCHAR_NUMERIC_TYPE);
2847         nv=u_getNumericValue(c);
2848
2849         if(type!=values[i].type) {
2850             log_err("UCHAR_NUMERIC_TYPE(U+%04lx)=%d should be %d\n", c, type, values[i].type);
2851         }
2852         if(0.000001 <= fabs(nv - values[i].numValue)) {
2853             log_err("u_getNumericValue(U+%04lx)=%g should be %g\n", c, nv, values[i].numValue);
2854         }
2855     }
2856 }
2857
2858 /**
2859  * Test the property names and property value names API.
2860  */
2861 static void
2862 TestPropertyNames(void) {
2863     int32_t p, v, choice=0, rev;
2864     UBool atLeastSomething = FALSE;
2865
2866     for (p=0; ; ++p) {
2867         UProperty propEnum = (UProperty)p;
2868         UBool sawProp = FALSE;
2869         if(p > 10 && !atLeastSomething) {
2870           log_data_err("Never got anything after 10 tries.\nYour data is probably fried. Quitting this test\n", p, choice);
2871           return;
2872         }
2873
2874         for (choice=0; ; ++choice) {
2875             const char* name = u_getPropertyName(propEnum, (UPropertyNameChoice)choice);
2876             if (name) {
2877                 if (!sawProp)
2878                     log_verbose("prop 0x%04x+%2d:", p&~0xfff, p&0xfff);
2879                 log_verbose("%d=\"%s\"", choice, name);
2880                 sawProp = TRUE;
2881                 atLeastSomething = TRUE;
2882
2883                 /* test reverse mapping */
2884                 rev = u_getPropertyEnum(name);
2885                 if (rev != p) {
2886                     log_err("Property round-trip failure: %d -> %s -> %d\n",
2887                             p, name, rev);
2888                 }
2889             }
2890             if (!name && choice>0) break;
2891         }
2892         if (sawProp) {
2893             /* looks like a valid property; check the values */
2894             const char* pname = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME);
2895             int32_t max = 0;
2896             if (p == UCHAR_CANONICAL_COMBINING_CLASS) {
2897                 max = 255;
2898             } else if (p == UCHAR_GENERAL_CATEGORY_MASK) {
2899                 /* it's far too slow to iterate all the way up to
2900                    the real max, U_GC_P_MASK */
2901                 max = U_GC_NL_MASK;
2902             } else if (p == UCHAR_BLOCK) {
2903                 /* UBlockCodes, unlike other values, start at 1 */
2904                 max = 1;
2905             }
2906             log_verbose("\n");
2907             for (v=-1; ; ++v) {
2908                 UBool sawValue = FALSE;
2909                 for (choice=0; ; ++choice) {
2910                     const char* vname = u_getPropertyValueName(propEnum, v, (UPropertyNameChoice)choice);
2911                     if (vname) {
2912                         if (!sawValue) log_verbose(" %s, value %d:", pname, v);
2913                         log_verbose("%d=\"%s\"", choice, vname);
2914                         sawValue = TRUE;
2915
2916                         /* test reverse mapping */
2917                         rev = u_getPropertyValueEnum(propEnum, vname);
2918                         if (rev != v) {
2919                             log_err("Value round-trip failure (%s): %d -> %s -> %d\n",
2920                                     pname, v, vname, rev);
2921                         }
2922                     }
2923                     if (!vname && choice>0) break;
2924                 }
2925                 if (sawValue) {
2926                     log_verbose("\n");
2927                 }
2928                 if (!sawValue && v>=max) break;
2929             }
2930         }
2931         if (!sawProp) {
2932             if (p>=UCHAR_STRING_LIMIT) {
2933                 break;
2934             } else if (p>=UCHAR_DOUBLE_LIMIT) {
2935                 p = UCHAR_STRING_START - 1;
2936             } else if (p>=UCHAR_MASK_LIMIT) {
2937                 p = UCHAR_DOUBLE_START - 1;
2938             } else if (p>=UCHAR_INT_LIMIT) {
2939                 p = UCHAR_MASK_START - 1;
2940             } else if (p>=UCHAR_BINARY_LIMIT) {
2941                 p = UCHAR_INT_START - 1;
2942             }
2943         }
2944     }
2945 }
2946
2947 /**
2948  * Test the property values API.  See JB#2410.
2949  */
2950 static void
2951 TestPropertyValues(void) {
2952     int32_t i, p, min, max;
2953     UErrorCode ec;
2954
2955     /* Min should be 0 for everything. */
2956     /* Until JB#2478 is fixed, the one exception is UCHAR_BLOCK. */
2957     for (p=UCHAR_INT_START; p<UCHAR_INT_LIMIT; ++p) {
2958         UProperty propEnum = (UProperty)p;
2959         min = u_getIntPropertyMinValue(propEnum);
2960         if (min != 0) {
2961             if (p == UCHAR_BLOCK) {
2962                 /* This is okay...for now.  See JB#2487.
2963                    TODO Update this for JB#2487. */
2964             } else {
2965                 const char* name;
2966                 name = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME);
2967                 if (name == NULL)
2968                     name = "<ERROR>";
2969                 log_err("FAIL: u_getIntPropertyMinValue(%s) = %d, exp. 0\n",
2970                         name, min);
2971             }
2972         }
2973     }
2974
2975     if( u_getIntPropertyMinValue(UCHAR_GENERAL_CATEGORY_MASK)!=0 ||
2976         u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY_MASK)!=-1) {
2977         log_err("error: u_getIntPropertyMin/MaxValue(UCHAR_GENERAL_CATEGORY_MASK) is wrong\n");
2978     }
2979
2980     /* Max should be -1 for invalid properties. */
2981     max = u_getIntPropertyMaxValue(UCHAR_INVALID_CODE);
2982     if (max != -1) {
2983         log_err("FAIL: u_getIntPropertyMaxValue(-1) = %d, exp. -1\n",
2984                 max);
2985     }
2986
2987     /* Script should return USCRIPT_INVALID_CODE for an invalid code point. */
2988     for (i=0; i<2; ++i) {
2989         int32_t script;
2990         const char* desc;
2991         ec = U_ZERO_ERROR;
2992         switch (i) {
2993         case 0:
2994             script = uscript_getScript(-1, &ec);
2995             desc = "uscript_getScript(-1)";
2996             break;
2997         case 1:
2998             script = u_getIntPropertyValue(-1, UCHAR_SCRIPT);
2999             desc = "u_getIntPropertyValue(-1, UCHAR_SCRIPT)";
3000             break;
3001         default:
3002             log_err("Internal test error. Too many scripts\n");
3003             return;
3004         }
3005         /* We don't explicitly test ec.  It should be U_FAILURE but it
3006            isn't documented as such. */
3007         if (script != (int32_t)USCRIPT_INVALID_CODE) {
3008             log_err("FAIL: %s = %d, exp. 0\n",
3009                     desc, script);
3010         }
3011     }
3012 }
3013
3014 /* various tests for consistency of UCD data and API behavior */
3015 static void
3016 TestConsistency() {
3017     char buffer[300];
3018     USet *set1, *set2, *set3, *set4;
3019     UErrorCode errorCode;
3020
3021     UChar32 start, end;
3022     int32_t i, length;
3023
3024     U_STRING_DECL(hyphenPattern, "[:Hyphen:]", 10);
3025     U_STRING_DECL(dashPattern, "[:Dash:]", 8);
3026     U_STRING_DECL(lowerPattern, "[:Lowercase:]", 13);
3027     U_STRING_DECL(formatPattern, "[:Cf:]", 6);
3028     U_STRING_DECL(alphaPattern, "[:Alphabetic:]", 14);
3029
3030     U_STRING_DECL(mathBlocksPattern,
3031         "[[:block=Mathematical Operators:][:block=Miscellaneous Mathematical Symbols-A:][:block=Miscellaneous Mathematical Symbols-B:][:block=Supplemental Mathematical Operators:][:block=Mathematical Alphanumeric Symbols:]]",
3032         1+32+46+46+45+43+1+1); /* +1 for NUL */
3033     U_STRING_DECL(mathPattern, "[:Math:]", 8);
3034     U_STRING_DECL(unassignedPattern, "[:Cn:]", 6);
3035     U_STRING_DECL(unknownPattern, "[:sc=Unknown:]", 14);
3036     U_STRING_DECL(reservedPattern, "[[:Cn:][:Co:][:Cs:]]", 20);
3037
3038     U_STRING_INIT(hyphenPattern, "[:Hyphen:]", 10);
3039     U_STRING_INIT(dashPattern, "[:Dash:]", 8);
3040     U_STRING_INIT(lowerPattern, "[:Lowercase:]", 13);
3041     U_STRING_INIT(formatPattern, "[:Cf:]", 6);
3042     U_STRING_INIT(alphaPattern, "[:Alphabetic:]", 14);
3043
3044     U_STRING_INIT(mathBlocksPattern,
3045         "[[:block=Mathematical Operators:][:block=Miscellaneous Mathematical Symbols-A:][:block=Miscellaneous Mathematical Symbols-B:][:block=Supplemental Mathematical Operators:][:block=Mathematical Alphanumeric Symbols:]]",
3046         1+32+46+46+45+43+1+1); /* +1 for NUL */
3047     U_STRING_INIT(mathPattern, "[:Math:]", 8);
3048     U_STRING_INIT(unassignedPattern, "[:Cn:]", 6);
3049     U_STRING_INIT(unknownPattern, "[:sc=Unknown:]", 14);
3050     U_STRING_INIT(reservedPattern, "[[:Cn:][:Co:][:Cs:]]", 20);
3051
3052     /*
3053      * It used to be that UCD.html and its precursors said
3054      * "Those dashes used to mark connections between pieces of words,
3055      *  plus the Katakana middle dot."
3056      *
3057      * Unicode 4 changed 00AD Soft Hyphen to Cf and removed it from Dash
3058      * but not from Hyphen.
3059      * UTC 94 (2003mar) decided to leave it that way and to change UCD.html.
3060      * Therefore, do not show errors when testing the Hyphen property.
3061      */
3062     log_verbose("Starting with Unicode 4, inconsistencies with [:Hyphen:] are\n"
3063                 "known to the UTC and not considered errors.\n");
3064
3065     errorCode=U_ZERO_ERROR;
3066     set1=uset_openPattern(hyphenPattern, 10, &errorCode);
3067     set2=uset_openPattern(dashPattern, 8, &errorCode);
3068     if(U_SUCCESS(errorCode)) {
3069         /* remove the Katakana middle dot(s) from set1 */
3070         uset_remove(set1, 0x30fb);
3071         uset_remove(set1, 0xff65); /* halfwidth variant */
3072         showAMinusB(set1, set2, "[:Hyphen:]", "[:Dash:]", FALSE);
3073     } else {
3074         log_data_err("error opening [:Hyphen:] or [:Dash:] - %s (Are you missing data?)\n", u_errorName(errorCode));
3075     }
3076
3077     /* check that Cf is neither Hyphen nor Dash nor Alphabetic */
3078     set3=uset_openPattern(formatPattern, 6, &errorCode);
3079     set4=uset_openPattern(alphaPattern, 14, &errorCode);
3080     if(U_SUCCESS(errorCode)) {
3081         showAIntersectB(set3, set1, "[:Cf:]", "[:Hyphen:]", FALSE);
3082         showAIntersectB(set3, set2, "[:Cf:]", "[:Dash:]", TRUE);
3083         showAIntersectB(set3, set4, "[:Cf:]", "[:Alphabetic:]", TRUE);
3084     } else {
3085         log_data_err("error opening [:Cf:] or [:Alpbabetic:] - %s (Are you missing data?)\n", u_errorName(errorCode));
3086     }
3087
3088     uset_close(set1);
3089     uset_close(set2);
3090     uset_close(set3);
3091     uset_close(set4);
3092
3093     /*
3094      * Check that each lowercase character has "small" in its name
3095      * and not "capital".
3096      * There are some such characters, some of which seem odd.
3097      * Use the verbose flag to see these notices.
3098      */
3099     errorCode=U_ZERO_ERROR;
3100     set1=uset_openPattern(lowerPattern, 13, &errorCode);
3101     if(U_SUCCESS(errorCode)) {
3102         for(i=0;; ++i) {
3103             length=uset_getItem(set1, i, &start, &end, NULL, 0, &errorCode);
3104             if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
3105                 break; /* done */
3106             }
3107             if(U_FAILURE(errorCode)) {
3108                 log_err("error iterating over [:Lowercase:] at item %d: %s\n",
3109                         i, u_errorName(errorCode));
3110                 break;
3111             }
3112             if(length!=0) {
3113                 break; /* done with code points, got a string or -1 */
3114             }
3115
3116             while(start<=end) {
3117                 length=u_charName(start, U_UNICODE_CHAR_NAME, buffer, sizeof(buffer), &errorCode);
3118                 if(U_FAILURE(errorCode)) {
3119                     log_data_err("error getting the name of U+%04x - %s\n", start, u_errorName(errorCode));
3120                     errorCode=U_ZERO_ERROR;
3121                 }
3122                 if( (strstr(buffer, "SMALL")==NULL || strstr(buffer, "CAPITAL")!=NULL) &&
3123                     strstr(buffer, "SMALL CAPITAL")==NULL
3124                 ) {
3125                     log_verbose("info: [:Lowercase:] contains U+%04x whose name does not suggest lowercase: %s\n", start, buffer);
3126                 }
3127                 ++start;
3128             }
3129         }
3130     } else {
3131         log_data_err("error opening [:Lowercase:] - %s (Are you missing data?)\n", u_errorName(errorCode));
3132     }
3133     uset_close(set1);
3134
3135     /* verify that all assigned characters in Math blocks are exactly Math characters */
3136     errorCode=U_ZERO_ERROR;
3137     set1=uset_openPattern(mathBlocksPattern, -1, &errorCode);
3138     set2=uset_openPattern(mathPattern, 8, &errorCode);
3139     set3=uset_openPattern(unassignedPattern, 6, &errorCode);
3140     if(U_SUCCESS(errorCode)) {
3141         uset_retainAll(set2, set1); /* [math blocks]&[:Math:] */
3142         uset_complement(set3);      /* assigned characters */
3143         uset_retainAll(set1, set3); /* [math blocks]&[assigned] */
3144         compareUSets(set1, set2,
3145                      "[assigned Math block chars]", "[math blocks]&[:Math:]",
3146                      TRUE);
3147     } else {
3148         log_data_err("error opening [math blocks] or [:Math:] or [:Cn:] - %s (Are you missing data?)\n", u_errorName(errorCode));
3149     }
3150     uset_close(set1);
3151     uset_close(set2);
3152     uset_close(set3);
3153
3154     /* new in Unicode 5.0: exactly all unassigned+PUA+surrogate code points have script=Unknown */
3155     errorCode=U_ZERO_ERROR;
3156     set1=uset_openPattern(unknownPattern, 14, &errorCode);
3157     set2=uset_openPattern(reservedPattern, 20, &errorCode);
3158     if(U_SUCCESS(errorCode)) {
3159         compareUSets(set1, set2,
3160                      "[:sc=Unknown:]", "[[:Cn:][:Co:][:Cs:]]",
3161                      TRUE);
3162     } else {
3163         log_data_err("error opening [:sc=Unknown:] or [[:Cn:][:Co:][:Cs:]] - %s (Are you missing data?)\n", u_errorName(errorCode));
3164     }
3165     uset_close(set1);
3166     uset_close(set2);
3167 }
3168
3169 /*
3170  * Starting with ICU4C 3.4, the core Unicode properties files
3171  * (uprops.icu, ucase.icu, ubidi.icu, unorm.icu)
3172  * are hardcoded in the common DLL and therefore not included
3173  * in the data package any more.
3174  * Test requiring these files are disabled so that
3175  * we need not jump through hoops (like adding snapshots of these files
3176  * to testdata).
3177  * See Jitterbug 4497.
3178  */
3179 #define HARDCODED_DATA_4497 1
3180
3181 /* API coverage for ucase.c */
3182 static void TestUCase() {
3183 #if !HARDCODED_DATA_4497
3184     UDataMemory *pData;
3185     UCaseProps *csp;
3186     const UCaseProps *ccsp;
3187     UErrorCode errorCode;
3188
3189     /* coverage for ucase_openBinary() */
3190     errorCode=U_ZERO_ERROR;
3191     pData=udata_open(NULL, UCASE_DATA_TYPE, UCASE_DATA_NAME, &errorCode);
3192     if(U_FAILURE(errorCode)) {
3193         log_data_err("unable to open " UCASE_DATA_NAME "." UCASE_DATA_TYPE ": %s\n",
3194                     u_errorName(errorCode));
3195         return;
3196     }
3197
3198     csp=ucase_openBinary((const uint8_t *)pData->pHeader, -1, &errorCode);
3199     if(U_FAILURE(errorCode)) {
3200         log_err("ucase_openBinary() fails for the contents of " UCASE_DATA_NAME "." UCASE_DATA_TYPE ": %s\n",
3201                 u_errorName(errorCode));
3202         udata_close(pData);
3203         return;
3204     }
3205
3206     if(UCASE_LOWER!=ucase_getType(csp, 0xdf)) { /* verify islower(sharp s) */
3207         log_err("ucase_openBinary() does not seem to return working UCaseProps\n");
3208     }
3209
3210     ucase_close(csp);
3211     udata_close(pData);
3212
3213     /* coverage for ucase_getDummy() */
3214     errorCode=U_ZERO_ERROR;
3215     ccsp=ucase_getDummy(&errorCode);
3216     if(ucase_tolower(ccsp, 0x41)!=0x41) {
3217         log_err("ucase_tolower(dummy, A)!=A\n");
3218     }
3219 #endif
3220 }
3221
3222 /* API coverage for ubidi_props.c */
3223 static void TestUBiDiProps() {
3224 #if !HARDCODED_DATA_4497
3225     UDataMemory *pData;
3226     UBiDiProps *bdp;
3227     const UBiDiProps *cbdp;
3228     UErrorCode errorCode;
3229
3230     /* coverage for ubidi_openBinary() */
3231     errorCode=U_ZERO_ERROR;
3232     pData=udata_open(NULL, UBIDI_DATA_TYPE, UBIDI_DATA_NAME, &errorCode);
3233     if(U_FAILURE(errorCode)) {
3234         log_data_err("unable to open " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE ": %s\n",
3235                     u_errorName(errorCode));
3236         return;
3237     }
3238
3239     bdp=ubidi_openBinary((const uint8_t *)pData->pHeader, -1, &errorCode);
3240     if(U_FAILURE(errorCode)) {
3241         log_err("ubidi_openBinary() fails for the contents of " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE ": %s\n",
3242                 u_errorName(errorCode));
3243         udata_close(pData);
3244         return;
3245     }
3246
3247     if(0x2215!=ubidi_getMirror(bdp, 0x29F5)) { /* verify some data */
3248         log_err("ubidi_openBinary() does not seem to return working UBiDiProps\n");
3249     }
3250
3251     ubidi_closeProps(bdp);
3252     udata_close(pData);
3253
3254     /* coverage for ubidi_getDummy() */
3255     errorCode=U_ZERO_ERROR;
3256     cbdp=ubidi_getDummy(&errorCode);
3257     if(ubidi_getClass(cbdp, 0x20)!=0) {
3258         log_err("ubidi_getClass(dummy, space)!=0\n");
3259     }
3260 #endif
3261 }
3262
3263 /* test case folding, compare return values with CaseFolding.txt ------------ */
3264
3265 /* bit set for which case foldings for a character have been tested already */
3266 enum {
3267     CF_SIMPLE=1,
3268     CF_FULL=2,
3269     CF_TURKIC=4,
3270     CF_ALL=7
3271 };
3272
3273 static void
3274 testFold(UChar32 c, int which,
3275          UChar32 simple, UChar32 turkic,
3276          const UChar *full, int32_t fullLength,
3277          const UChar *turkicFull, int32_t turkicFullLength) {
3278     UChar s[2], t[32];
3279     UChar32 c2;
3280     int32_t length, length2;
3281
3282     UErrorCode errorCode=U_ZERO_ERROR;
3283
3284     length=0;
3285     U16_APPEND_UNSAFE(s, length, c);
3286
3287     if((which&CF_SIMPLE)!=0 && (c2=u_foldCase(c, 0))!=simple) {
3288         log_err("u_foldCase(U+%04lx, default)=U+%04lx != U+%04lx\n", (long)c, (long)c2, (long)simple);
3289     }
3290     if((which&CF_FULL)!=0) {
3291         length2=u_strFoldCase(t, LENGTHOF(t), s, length, 0, &errorCode);
3292         if(length2!=fullLength || 0!=u_memcmp(t, full, fullLength)) {
3293             log_err("u_strFoldCase(U+%04lx, default) does not fold properly\n", (long)c);
3294         }
3295     }
3296     if((which&CF_TURKIC)!=0) {
3297         if((c2=u_foldCase(c, U_FOLD_CASE_EXCLUDE_SPECIAL_I))!=turkic) {
3298             log_err("u_foldCase(U+%04lx, turkic)=U+%04lx != U+%04lx\n", (long)c, (long)c2, (long)simple);
3299         }
3300
3301         length2=u_strFoldCase(t, LENGTHOF(t), s, length, U_FOLD_CASE_EXCLUDE_SPECIAL_I, &errorCode);
3302         if(length2!=turkicFullLength || 0!=u_memcmp(t, turkicFull, length2)) {
3303             log_err("u_strFoldCase(U+%04lx, turkic) does not fold properly\n", (long)c);
3304         }
3305     }
3306 }
3307
3308 /* test that c case-folds to itself */
3309 static void
3310 testFoldToSelf(UChar32 c, int which) {
3311     UChar s[2];
3312     int32_t length;
3313
3314     length=0;
3315     U16_APPEND_UNSAFE(s, length, c);
3316     testFold(c, which, c, c, s, length, s, length);
3317 }
3318
3319 struct CaseFoldingData {
3320     USet *notSeen;
3321     UChar32 prev, prevSimple;
3322     UChar prevFull[32];
3323     int32_t prevFullLength;
3324     int which;
3325 };
3326 typedef struct CaseFoldingData CaseFoldingData;
3327
3328 static void U_CALLCONV
3329 caseFoldingLineFn(void *context,
3330                   char *fields[][2], int32_t fieldCount,
3331                   UErrorCode *pErrorCode) {
3332     CaseFoldingData *pData=(CaseFoldingData *)context;
3333     char *end;
3334     UChar full[32];
3335     UChar32 c, prev, simple;
3336     int32_t count;
3337     int which;
3338     char status;
3339
3340     /* get code point */
3341     const char *s=u_skipWhitespace(fields[0][0]);
3342     if(0==strncmp(s, "0000..10FFFF", 12)) {
3343         /*
3344          * Ignore the line
3345          * # @missing: 0000..10FFFF; C; <code point>
3346          * because maps-to-self is already our default, and this line breaks this parser.
3347          */
3348         return;
3349     }
3350     c=(UChar32)strtoul(s, &end, 16);
3351     end=(char *)u_skipWhitespace(end);
3352     if(end<=fields[0][0] || end!=fields[0][1]) {
3353         log_err("syntax error in CaseFolding.txt field 0 at %s\n", fields[0][0]);
3354         *pErrorCode=U_PARSE_ERROR;
3355         return;
3356     }
3357
3358     /* get the status of this mapping */
3359     status=*u_skipWhitespace(fields[1][0]);
3360     if(status!='C' && status!='S' && status!='F' && status!='T') {
3361         log_err("unrecognized status field in CaseFolding.txt at %s\n", fields[0][0]);
3362         *pErrorCode=U_PARSE_ERROR;
3363         return;
3364     }
3365
3366     /* get the mapping */
3367     count=u_parseString(fields[2][0], full, 32, (uint32_t *)&simple, pErrorCode);
3368     if(U_FAILURE(*pErrorCode)) {
3369         log_err("error parsing CaseFolding.txt mapping at %s\n", fields[0][0]);
3370         return;
3371     }
3372
3373     /* there is a simple mapping only if there is exactly one code point (count is in UChars) */
3374     if(count==0 || count>2 || (count==2 && U16_IS_SINGLE(full[1]))) {
3375         simple=c;
3376     }
3377
3378     if(c!=(prev=pData->prev)) {
3379         /*
3380          * Test remaining mappings for the previous code point.
3381          * If a turkic folding was not mentioned, then it should fold the same
3382          * as the regular simple case folding.
3383          */
3384         UChar prevString[2];
3385         int32_t length;
3386
3387         length=0;
3388         U16_APPEND_UNSAFE(prevString, length, prev);
3389         testFold(prev, (~pData->which)&CF_ALL,
3390                  prev, pData->prevSimple,
3391                  prevString, length,
3392                  pData->prevFull, pData->prevFullLength);
3393         pData->prev=pData->prevSimple=c;
3394         length=0;
3395         U16_APPEND_UNSAFE(pData->prevFull, length, c);
3396         pData->prevFullLength=length;
3397         pData->which=0;
3398     }
3399
3400     /*
3401      * Turn the status into a bit set of case foldings to test.
3402      * Remember non-Turkic case foldings as defaults for Turkic mode.
3403      */
3404     switch(status) {
3405     case 'C':
3406         which=CF_SIMPLE|CF_FULL;
3407         pData->prevSimple=simple;
3408         u_memcpy(pData->prevFull, full, count);
3409         pData->prevFullLength=count;
3410         break;
3411     case 'S':
3412         which=CF_SIMPLE;
3413         pData->prevSimple=simple;
3414         break;
3415     case 'F':
3416         which=CF_FULL;
3417         u_memcpy(pData->prevFull, full, count);
3418         pData->prevFullLength=count;
3419         break;
3420     case 'T':
3421         which=CF_TURKIC;
3422         break;
3423     default:
3424         which=0;
3425         break; /* won't happen because of test above */
3426     }
3427
3428     testFold(c, which, simple, simple, full, count, full, count);
3429
3430     /* remember which case foldings of c have been tested */
3431     pData->which|=which;
3432
3433     /* remove c from the set of ones not mentioned in CaseFolding.txt */
3434     uset_remove(pData->notSeen, c);
3435 }
3436
3437 static void
3438 TestCaseFolding() {
3439     CaseFoldingData data={ NULL };
3440     char *fields[3][2];
3441     UErrorCode errorCode;
3442
3443     static char *lastLine= (char *)"10FFFF; C; 10FFFF;";
3444
3445     errorCode=U_ZERO_ERROR;
3446     /* test BMP & plane 1 - nothing interesting above */
3447     data.notSeen=uset_open(0, 0x1ffff);
3448     data.prevFullLength=1; /* length of full case folding of U+0000 */
3449
3450     parseUCDFile("CaseFolding.txt", fields, 3, caseFoldingLineFn, &data, &errorCode);
3451     if(U_SUCCESS(errorCode)) {
3452         int32_t i, start, end;
3453
3454         /* add a pseudo-last line to finish testing of the actual last one */
3455         fields[0][0]=lastLine;
3456         fields[0][1]=lastLine+6;
3457         fields[1][0]=lastLine+7;
3458         fields[1][1]=lastLine+9;
3459         fields[2][0]=lastLine+10;
3460         fields[2][1]=lastLine+17;
3461         caseFoldingLineFn(&data, fields, 3, &errorCode);
3462
3463         /* verify that all code points that are not mentioned in CaseFolding.txt fold to themselves */
3464         for(i=0;
3465             0==uset_getItem(data.notSeen, i, &start, &end, NULL, 0, &errorCode) &&
3466                 U_SUCCESS(errorCode);
3467             ++i
3468         ) {
3469             do {
3470                 testFoldToSelf(start, CF_ALL);
3471             } while(++start<=end);
3472         }
3473     }
3474
3475     uset_close(data.notSeen);
3476 }