icuSources/test/cintltst/cucdtst.c

   1 /********************************************************************
   2  * COPYRIGHT:
   3  * Copyright (c) 1997-2013, International Business Machines Corporation and
   4  * others. All Rights Reserved.
   5  ********************************************************************/
   6 /*******************************************************************************
   7 *
   8 * File CUCDTST.C
   9 *
  10 * Modification History:
  11 *        Name                     Description
  12 *     Madhu Katragadda            Ported for C API, added tests for string functions
  13 ********************************************************************************
  14 */
  15
  16 #include <string.h>
  17 #include <math.h>
  18 #include <stdlib.h>
  19
  20 #include "unicode/utypes.h"
  21 #include "unicode/uchar.h"
  22 #include "unicode/putil.h"
  23 #include "unicode/ustring.h"
  24 #include "unicode/uloc.h"
  25 #include "unicode/unorm2.h"
  26
  27 #include "cintltst.h"
  28 #include "putilimp.h"
  29 #include "uparse.h"
  30 #include "ucase.h"
  31 #include "ubidi_props.h"
  32 #include "uprops.h"
  33 #include "uset_imp.h"
  34 #include "usc_impl.h"
  35 #include "udatamem.h" /* for testing ucase_openBinary() */
  36 #include "cucdapi.h"
  37
  38 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
  39
  40 /* prototypes --------------------------------------------------------------- */
  41
  42 static void TestUpperLower(void);
  43 static void TestLetterNumber(void);
  44 static void TestMisc(void);
  45 static void TestPOSIX(void);
  46 static void TestControlPrint(void);
  47 static void TestIdentifier(void);
  48 static void TestUnicodeData(void);
  49 static void TestCodeUnit(void);
  50 static void TestCodePoint(void);
  51 static void TestCharLength(void);
  52 static void TestCharNames(void);
  53 static void TestMirroring(void);
  54 static void TestUScriptRunAPI(void);
  55 static void TestAdditionalProperties(void);
  56 static void TestNumericProperties(void);
  57 static void TestPropertyNames(void);
  58 static void TestPropertyValues(void);
  59 static void TestConsistency(void);
  60 static void TestUCase(void);
  61 static void TestUBiDiProps(void);
  62 static void TestCaseFolding(void);
  63
  64 /* internal methods used */
  65 static int32_t MakeProp(char* str);
  66 static int32_t MakeDir(char* str);
  67
  68 /* helpers ------------------------------------------------------------------ */
  69
  70 static void
  71 parseUCDFile(const char *filename,
  72              char *fields[][2], int32_t fieldCount,
  73              UParseLineFn *lineFn, void *context,
  74              UErrorCode *pErrorCode) {
  75     char path[256];
  76     char backupPath[256];
  77
  78     if(U_FAILURE(*pErrorCode)) {
  79         return;
  80     }
  81
  82     /* Look inside ICU_DATA first */
  83     strcpy(path, u_getDataDirectory());
  84     strcat(path, ".." U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING);
  85     strcat(path, filename);
  86
  87     /* As a fallback, try to guess where the source data was located
  88      *    at the time ICU was built, and look there.
  89      */
  90     strcpy(backupPath, ctest_dataSrcDir());
  91     strcat(backupPath, U_FILE_SEP_STRING);
  92     strcat(backupPath, "unidata" U_FILE_SEP_STRING);
  93     strcat(backupPath, filename);
  94
  95     u_parseDelimitedFile(path, ';', fields, fieldCount, lineFn, context, pErrorCode);
  96     if(*pErrorCode==U_FILE_ACCESS_ERROR) {
  97         *pErrorCode=U_ZERO_ERROR;
  98         u_parseDelimitedFile(backupPath, ';', fields, fieldCount, lineFn, context, pErrorCode);
  99     }
 100     if(U_FAILURE(*pErrorCode)) {
 101         log_err_status(*pErrorCode, "error parsing %s: %s\n", filename, u_errorName(*pErrorCode));
 102     }
 103 }
 104
 105 /* test data ---------------------------------------------------------------- */
 106
 107 static const UChar  LAST_CHAR_CODE_IN_FILE = 0xFFFD;
 108 static const char tagStrings[] = "MnMcMeNdNlNoZsZlZpCcCfCsCoCnLuLlLtLmLoPcPdPsPePoSmScSkSoPiPf";
 109 static const int32_t tagValues[] =
 110     {
 111     /* Mn */ U_NON_SPACING_MARK,
 112     /* Mc */ U_COMBINING_SPACING_MARK,
 113     /* Me */ U_ENCLOSING_MARK,
 114     /* Nd */ U_DECIMAL_DIGIT_NUMBER,
 115     /* Nl */ U_LETTER_NUMBER,
 116     /* No */ U_OTHER_NUMBER,
 117     /* Zs */ U_SPACE_SEPARATOR,
 118     /* Zl */ U_LINE_SEPARATOR,
 119     /* Zp */ U_PARAGRAPH_SEPARATOR,
 120     /* Cc */ U_CONTROL_CHAR,
 121     /* Cf */ U_FORMAT_CHAR,
 122     /* Cs */ U_SURROGATE,
 123     /* Co */ U_PRIVATE_USE_CHAR,
 124     /* Cn */ U_UNASSIGNED,
 125     /* Lu */ U_UPPERCASE_LETTER,
 126     /* Ll */ U_LOWERCASE_LETTER,
 127     /* Lt */ U_TITLECASE_LETTER,
 128     /* Lm */ U_MODIFIER_LETTER,
 129     /* Lo */ U_OTHER_LETTER,
 130     /* Pc */ U_CONNECTOR_PUNCTUATION,
 131     /* Pd */ U_DASH_PUNCTUATION,
 132     /* Ps */ U_START_PUNCTUATION,
 133     /* Pe */ U_END_PUNCTUATION,
 134     /* Po */ U_OTHER_PUNCTUATION,
 135     /* Sm */ U_MATH_SYMBOL,
 136     /* Sc */ U_CURRENCY_SYMBOL,
 137     /* Sk */ U_MODIFIER_SYMBOL,
 138     /* So */ U_OTHER_SYMBOL,
 139     /* Pi */ U_INITIAL_PUNCTUATION,
 140     /* Pf */ U_FINAL_PUNCTUATION
 141     };
 142
 143 static const char dirStrings[][5] = {
 144     "L",
 145     "R",
 146     "EN",
 147     "ES",
 148     "ET",
 149     "AN",
 150     "CS",
 151     "B",
 152     "S",
 153     "WS",
 154     "ON",
 155     "LRE",
 156     "LRO",
 157     "AL",
 158     "RLE",
 159     "RLO",
 160     "PDF",
 161     "NSM",
 162     "BN"
 163 };
 164
 165 void addUnicodeTest(TestNode** root);
 166
 167 void addUnicodeTest(TestNode** root)
 168 {
 169     addTest(root, &TestCodeUnit, "tsutil/cucdtst/TestCodeUnit");
 170     addTest(root, &TestCodePoint, "tsutil/cucdtst/TestCodePoint");
 171     addTest(root, &TestCharLength, "tsutil/cucdtst/TestCharLength");
 172     addTest(root, &TestBinaryValues, "tsutil/cucdtst/TestBinaryValues");
 173     addTest(root, &TestUnicodeData, "tsutil/cucdtst/TestUnicodeData");
 174     addTest(root, &TestAdditionalProperties, "tsutil/cucdtst/TestAdditionalProperties");
 175     addTest(root, &TestNumericProperties, "tsutil/cucdtst/TestNumericProperties");
 176     addTest(root, &TestUpperLower, "tsutil/cucdtst/TestUpperLower");
 177     addTest(root, &TestLetterNumber, "tsutil/cucdtst/TestLetterNumber");
 178     addTest(root, &TestMisc, "tsutil/cucdtst/TestMisc");
 179     addTest(root, &TestPOSIX, "tsutil/cucdtst/TestPOSIX");
 180     addTest(root, &TestControlPrint, "tsutil/cucdtst/TestControlPrint");
 181     addTest(root, &TestIdentifier, "tsutil/cucdtst/TestIdentifier");
 182     addTest(root, &TestCharNames, "tsutil/cucdtst/TestCharNames");
 183     addTest(root, &TestMirroring, "tsutil/cucdtst/TestMirroring");
 184     addTest(root, &TestUScriptCodeAPI, "tsutil/cucdtst/TestUScriptCodeAPI");
 185     addTest(root, &TestHasScript, "tsutil/cucdtst/TestHasScript");
 186     addTest(root, &TestGetScriptExtensions, "tsutil/cucdtst/TestGetScriptExtensions");
 187     addTest(root, &TestScriptMetadataAPI, "tsutil/cucdtst/TestScriptMetadataAPI");
 188     addTest(root, &TestUScriptRunAPI, "tsutil/cucdtst/TestUScriptRunAPI");
 189     addTest(root, &TestPropertyNames, "tsutil/cucdtst/TestPropertyNames");
 190     addTest(root, &TestPropertyValues, "tsutil/cucdtst/TestPropertyValues");
 191     addTest(root, &TestConsistency, "tsutil/cucdtst/TestConsistency");
 192     addTest(root, &TestUCase, "tsutil/cucdtst/TestUCase");
 193     addTest(root, &TestUBiDiProps, "tsutil/cucdtst/TestUBiDiProps");
 194     addTest(root, &TestCaseFolding, "tsutil/cucdtst/TestCaseFolding");
 195 }
 196
 197 /*==================================================== */
 198 /* test u_toupper() and u_tolower()                    */
 199 /*==================================================== */
 200 static void TestUpperLower()
 201 {
 202     const UChar upper[] = {0x41, 0x42, 0x00b2, 0x01c4, 0x01c6, 0x01c9, 0x01c8, 0x01c9, 0x000c, 0x0000};
 203     const UChar lower[] = {0x61, 0x62, 0x00b2, 0x01c6, 0x01c6, 0x01c9, 0x01c9, 0x01c9, 0x000c, 0x0000};
 204     U_STRING_DECL(upperTest, "abcdefg123hij.?:klmno", 21);
 205     U_STRING_DECL(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);
 206     int32_t i;
 207
 208     U_STRING_INIT(upperTest, "abcdefg123hij.?:klmno", 21);
 209     U_STRING_INIT(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);
 210
 211 /*
 212 Checks LetterLike Symbols which were previously a source of confusion
 213 [Bertrand A. D. 02/04/98]
 214 */
 215     for (i=0x2100;i<0x2138;i++)
 216     {
 217         /* Unicode 5.0 adds lowercase U+214E (TURNED SMALL F) to U+2132 (TURNED CAPITAL F) */
 218         if(i!=0x2126 && i!=0x212a && i!=0x212b && i!=0x2132)
 219         {
 220             if (i != (int)u_tolower(i)) /* itself */
 221                 log_err("Failed case conversion with itself: U+%04x\n", i);
 222             if (i != (int)u_toupper(i))
 223                 log_err("Failed case conversion with itself: U+%04x\n", i);
 224         }
 225     }
 226
 227     for(i=0; i < u_strlen(upper); i++){
 228         if(u_tolower(upper[i]) != lower[i]){
 229             log_err("FAILED u_tolower() for %lx Expected %lx Got %lx\n", upper[i], lower[i], u_tolower(upper[i]));
 230         }
 231     }
 232
 233     log_verbose("testing upper lower\n");
 234     for (i = 0; i < 21; i++) {
 235
 236         if (u_isalpha(upperTest[i]) && !u_islower(upperTest[i]))
 237         {
 238             log_err("Failed isLowerCase test at  %c\n", upperTest[i]);
 239         }
 240         else if (u_isalpha(lowerTest[i]) && !u_isupper(lowerTest[i]))
 241          {
 242             log_err("Failed isUpperCase test at %c\n", lowerTest[i]);
 243         }
 244         else if (upperTest[i] != u_tolower(lowerTest[i]))
 245         {
 246             log_err("Failed case conversion from %c  To %c :\n", lowerTest[i], upperTest[i]);
 247         }
 248         else if (lowerTest[i] != u_toupper(upperTest[i]))
 249          {
 250             log_err("Failed case conversion : %c To %c \n", upperTest[i], lowerTest[i]);
 251         }
 252         else if (upperTest[i] != u_tolower(upperTest[i]))
 253         {
 254             log_err("Failed case conversion with itself: %c\n", upperTest[i]);
 255         }
 256         else if (lowerTest[i] != u_toupper(lowerTest[i]))
 257         {
 258             log_err("Failed case conversion with itself: %c\n", lowerTest[i]);
 259         }
 260     }
 261     log_verbose("done testing upper lower\n");
 262
 263     log_verbose("testing u_istitle\n");
 264     {
 265         static const UChar expected[] = {
 266             0x1F88,
 267             0x1F89,
 268             0x1F8A,
 269             0x1F8B,
 270             0x1F8C,
 271             0x1F8D,
 272             0x1F8E,
 273             0x1F8F,
 274             0x1F88,
 275             0x1F89,
 276             0x1F8A,
 277             0x1F8B,
 278             0x1F8C,
 279             0x1F8D,
 280             0x1F8E,
 281             0x1F8F,
 282             0x1F98,
 283             0x1F99,
 284             0x1F9A,
 285             0x1F9B,
 286             0x1F9C,
 287             0x1F9D,
 288             0x1F9E,
 289             0x1F9F,
 290             0x1F98,
 291             0x1F99,
 292             0x1F9A,
 293             0x1F9B,
 294             0x1F9C,
 295             0x1F9D,
 296             0x1F9E,
 297             0x1F9F,
 298             0x1FA8,
 299             0x1FA9,
 300             0x1FAA,
 301             0x1FAB,
 302             0x1FAC,
 303             0x1FAD,
 304             0x1FAE,
 305             0x1FAF,
 306             0x1FA8,
 307             0x1FA9,
 308             0x1FAA,
 309             0x1FAB,
 310             0x1FAC,
 311             0x1FAD,
 312             0x1FAE,
 313             0x1FAF,
 314             0x1FBC,
 315             0x1FBC,
 316             0x1FCC,
 317             0x1FCC,
 318             0x1FFC,
 319             0x1FFC,
 320         };
 321         int32_t num = sizeof(expected)/sizeof(expected[0]);
 322         for(i=0; i<num; i++){
 323             if(!u_istitle(expected[i])){
 324                 log_err("u_istitle failed for 0x%4X. Expected TRUE, got FALSE\n",expected[i]);
 325             }
 326         }
 327
 328     }
 329 }
 330
 331 /* compare two sets and verify that their difference or intersection is empty */
 332 static UBool
 333 showADiffB(const USet *a, const USet *b,
 334            const char *a_name, const char *b_name,
 335            UBool expect, UBool diffIsError) {
 336     USet *aa;
 337     int32_t i, start, end, length;
 338     UErrorCode errorCode;
 339
 340     /*
 341      * expect:
 342      * TRUE  -> a-b should be empty, that is, b should contain all of a
 343      * FALSE -> a&b should be empty, that is, a should contain none of b (and vice versa)
 344      */
 345     if(expect ? uset_containsAll(b, a) : uset_containsNone(a, b)) {
 346         return TRUE;
 347     }
 348
 349     /* clone a to aa because a is const */
 350     aa=uset_open(1, 0);
 351     if(aa==NULL) {
 352         /* unusual problem - out of memory? */
 353         return FALSE;
 354     }
 355     uset_addAll(aa, a);
 356
 357     /* compute the set in question */
 358     if(expect) {
 359         /* a-b */
 360         uset_removeAll(aa, b);
 361     } else {
 362         /* a&b */
 363         uset_retainAll(aa, b);
 364     }
 365
 366     /* aa is not empty because of the initial tests above; show its contents */
 367     errorCode=U_ZERO_ERROR;
 368     i=0;
 369     for(;;) {
 370         length=uset_getItem(aa, i, &start, &end, NULL, 0, &errorCode);
 371         if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
 372             break; /* done */
 373         }
 374         if(U_FAILURE(errorCode)) {
 375             log_err("error comparing %s with %s at difference item %d: %s\n",
 376                 a_name, b_name, i, u_errorName(errorCode));
 377             break;
 378         }
 379         if(length!=0) {
 380             break; /* done with code points, got a string or -1 */
 381         }
 382
 383         if(diffIsError) {
 384             if(expect) {
 385                 log_err("error: %s contains U+%04x..U+%04x but %s does not\n", a_name, start, end, b_name);
 386             } else {
 387                 log_err("error: %s and %s both contain U+%04x..U+%04x but should not intersect\n", a_name, b_name, start, end);
 388             }
 389         } else {
 390             if(expect) {
 391                 log_verbose("info: %s contains U+%04x..U+%04x but %s does not\n", a_name, start, end, b_name);
 392             } else {
 393                 log_verbose("info: %s and %s both contain U+%04x..U+%04x but should not intersect\n", a_name, b_name, start, end);
 394             }
 395         }
 396
 397         ++i;
 398     }
 399
 400     uset_close(aa);
 401     return FALSE;
 402 }
 403
 404 static UBool
 405 showAMinusB(const USet *a, const USet *b,
 406             const char *a_name, const char *b_name,
 407             UBool diffIsError) {
 408     return showADiffB(a, b, a_name, b_name, TRUE, diffIsError);
 409 }
 410
 411 static UBool
 412 showAIntersectB(const USet *a, const USet *b,
 413                 const char *a_name, const char *b_name,
 414                 UBool diffIsError) {
 415     return showADiffB(a, b, a_name, b_name, FALSE, diffIsError);
 416 }
 417
 418 static UBool
 419 compareUSets(const USet *a, const USet *b,
 420              const char *a_name, const char *b_name,
 421              UBool diffIsError) {
 422     /*
 423      * Use an arithmetic & not a logical && so that both branches
 424      * are always taken and all differences are shown.
 425      */
 426     return
 427         showAMinusB(a, b, a_name, b_name, diffIsError) &
 428         showAMinusB(b, a, b_name, a_name, diffIsError);
 429 }
 430
 431 /* test isLetter(u_isapha()) and isDigit(u_isdigit()) */
 432 static void TestLetterNumber()
 433 {
 434     UChar i = 0x0000;
 435
 436     log_verbose("Testing for isalpha\n");
 437     for (i = 0x0041; i < 0x005B; i++) {
 438         if (!u_isalpha(i))
 439         {
 440             log_err("Failed isLetter test at  %.4X\n", i);
 441         }
 442     }
 443     for (i = 0x0660; i < 0x066A; i++) {
 444         if (u_isalpha(i))
 445         {
 446             log_err("Failed isLetter test with numbers at %.4X\n", i);
 447         }
 448     }
 449
 450     log_verbose("Testing for isdigit\n");
 451     for (i = 0x0660; i < 0x066A; i++) {
 452         if (!u_isdigit(i))
 453         {
 454             log_verbose("Failed isNumber test at %.4X\n", i);
 455         }
 456     }
 457
 458     log_verbose("Testing for isalnum\n");
 459     for (i = 0x0041; i < 0x005B; i++) {
 460         if (!u_isalnum(i))
 461         {
 462             log_err("Failed isAlNum test at  %.4X\n", i);
 463         }
 464     }
 465     for (i = 0x0660; i < 0x066A; i++) {
 466         if (!u_isalnum(i))
 467         {
 468             log_err("Failed isAlNum test at  %.4X\n", i);
 469         }
 470     }
 471
 472     {
 473         /*
 474          * The following checks work only starting from Unicode 4.0.
 475          * Check the version number here.
 476          */
 477         static UVersionInfo u401={ 4, 0, 1, 0 };
 478         UVersionInfo version;
 479         u_getUnicodeVersion(version);
 480         if(version[0]<4 || 0==memcmp(version, u401, 4)) {
 481             return;
 482         }
 483     }
 484
 485     {
 486         /*
 487          * Sanity check:
 488          * Verify that exactly the digit characters have decimal digit values.
 489          * This assumption is used in the implementation of u_digit()
 490          * (which checks nt=de)
 491          * compared with the parallel java.lang.Character.digit()
 492          * (which checks Nd).
 493          *
 494          * This was not true in Unicode 3.2 and earlier.
 495          * Unicode 4.0 fixed discrepancies.
 496          * Unicode 4.0.1 re-introduced problems in this area due to an
 497          * unintentionally incomplete last-minute change.
 498          */
 499         U_STRING_DECL(digitsPattern, "[:Nd:]", 6);
 500         U_STRING_DECL(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24);
 501
 502         USet *digits, *decimalValues;
 503         UErrorCode errorCode;
 504
 505         U_STRING_INIT(digitsPattern, "[:Nd:]", 6);
 506         U_STRING_INIT(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24);
 507         errorCode=U_ZERO_ERROR;
 508         digits=uset_openPattern(digitsPattern, 6, &errorCode);
 509         decimalValues=uset_openPattern(decimalValuesPattern, 24, &errorCode);
 510
 511         if(U_SUCCESS(errorCode)) {
 512             compareUSets(digits, decimalValues, "[:Nd:]", "[:Numeric_Type=Decimal:]", TRUE);
 513         }
 514
 515         uset_close(digits);
 516         uset_close(decimalValues);
 517     }
 518 }
 519
 520 static void testSampleCharProps(UBool propFn(UChar32), const char *propName,
 521                                 const UChar32 *sampleChars, int32_t sampleCharsLength,
 522                                 UBool expected) {
 523     int32_t i;
 524     for (i = 0; i < sampleCharsLength; ++i) {
 525         UBool result = propFn(sampleChars[i]);
 526         if (result != expected) {
 527             log_err("error: character property function %s(U+%04x)=%d is wrong\n",
 528                     propName, sampleChars[i], result);
 529         }
 530     }
 531 }
 532
 533 /* Tests for isDefined(u_isdefined)(, isBaseForm(u_isbase()), isSpaceChar(u_isspace()), isWhiteSpace(), u_CharDigitValue() */
 534 static void TestMisc()
 535 {
 536     static const UChar32 sampleSpaces[] = {0x0020, 0x00a0, 0x2000, 0x2001, 0x2005};
 537     static const UChar32 sampleNonSpaces[] = {0x61, 0x62, 0x63, 0x64, 0x74};
 538     static const UChar32 sampleUndefined[] = {0xfff1, 0xfff7, 0xfa6e};
 539     static const UChar32 sampleDefined[] = {0x523E, 0x4f88, 0xfffd};
 540     static const UChar32 sampleBase[] = {0x0061, 0x0031, 0x03d2};
 541     static const UChar32 sampleNonBase[] = {0x002B, 0x0020, 0x203B};
 542 /*    static const UChar sampleChars[] = {0x000a, 0x0045, 0x4e00, 0xDC00, 0xFFE8, 0xFFF0};*/
 543     static const UChar32 sampleDigits[]= {0x0030, 0x0662, 0x0F23, 0x0ED5};
 544     static const UChar32 sampleNonDigits[] = {0x0010, 0x0041, 0x0122, 0x68FE};
 545     static const UChar32 sampleWhiteSpaces[] = {0x2008, 0x2009, 0x200a, 0x001c, 0x000c};
 546     static const UChar32 sampleNonWhiteSpaces[] = {0x61, 0x62, 0x3c, 0x28, 0x3f, 0x85, 0x2007, 0xffef};
 547
 548     static const int32_t sampleDigitValues[] = {0, 2, 3, 5};
 549
 550     uint32_t mask;
 551
 552     int32_t i;
 553     char icuVersion[U_MAX_VERSION_STRING_LENGTH];
 554     UVersionInfo realVersion;
 555
 556     memset(icuVersion, 0, U_MAX_VERSION_STRING_LENGTH);
 557
 558     testSampleCharProps(u_isspace, "u_isspace", sampleSpaces, LENGTHOF(sampleSpaces), TRUE);
 559     testSampleCharProps(u_isspace, "u_isspace", sampleNonSpaces, LENGTHOF(sampleNonSpaces), FALSE);
 560
 561     testSampleCharProps(u_isJavaSpaceChar, "u_isJavaSpaceChar",
 562                         sampleSpaces, LENGTHOF(sampleSpaces), TRUE);
 563     testSampleCharProps(u_isJavaSpaceChar, "u_isJavaSpaceChar",
 564                         sampleNonSpaces, LENGTHOF(sampleNonSpaces), FALSE);
 565
 566     testSampleCharProps(u_isWhitespace, "u_isWhitespace",
 567                         sampleWhiteSpaces, LENGTHOF(sampleWhiteSpaces), TRUE);
 568     testSampleCharProps(u_isWhitespace, "u_isWhitespace",
 569                         sampleNonWhiteSpaces, LENGTHOF(sampleNonWhiteSpaces), FALSE);
 570
 571     testSampleCharProps(u_isdefined, "u_isdefined",
 572                         sampleDefined, LENGTHOF(sampleDefined), TRUE);
 573     testSampleCharProps(u_isdefined, "u_isdefined",
 574                         sampleUndefined, LENGTHOF(sampleUndefined), FALSE);
 575
 576     testSampleCharProps(u_isbase, "u_isbase", sampleBase, LENGTHOF(sampleBase), TRUE);
 577     testSampleCharProps(u_isbase, "u_isbase", sampleNonBase, LENGTHOF(sampleNonBase), FALSE);
 578
 579     testSampleCharProps(u_isdigit, "u_isdigit", sampleDigits, LENGTHOF(sampleDigits), TRUE);
 580     testSampleCharProps(u_isdigit, "u_isdigit", sampleNonDigits, LENGTHOF(sampleNonDigits), FALSE);
 581
 582     for (i = 0; i < LENGTHOF(sampleDigits); i++) {
 583         if (u_charDigitValue(sampleDigits[i]) != sampleDigitValues[i]) {
 584             log_err("error: u_charDigitValue(U+04x)=%d != %d\n",
 585                     sampleDigits[i], u_charDigitValue(sampleDigits[i]), sampleDigitValues[i]);
 586         }
 587     }
 588
 589     /* Tests the ICU version #*/
 590     u_getVersion(realVersion);
 591     u_versionToString(realVersion, icuVersion);
 592     if (strncmp(icuVersion, U_ICU_VERSION, uprv_min((int32_t)strlen(icuVersion), (int32_t)strlen(U_ICU_VERSION))) != 0)
 593     {
 594         log_err("ICU version test failed. Header says=%s, got=%s \n", U_ICU_VERSION, icuVersion);
 595     }
 596 #if defined(ICU_VERSION)
 597     /* test only happens where we have configure.in with VERSION - sanity check. */
 598     if(strcmp(U_ICU_VERSION, ICU_VERSION))
 599     {
 600         log_err("ICU version mismatch: Header says %s, build environment says %s.\n",  U_ICU_VERSION, ICU_VERSION);
 601     }
 602 #endif
 603
 604     /* test U_GC_... */
 605     if(
 606         U_GET_GC_MASK(0x41)!=U_GC_LU_MASK ||
 607         U_GET_GC_MASK(0x662)!=U_GC_ND_MASK ||
 608         U_GET_GC_MASK(0xa0)!=U_GC_ZS_MASK ||
 609         U_GET_GC_MASK(0x28)!=U_GC_PS_MASK ||
 610         U_GET_GC_MASK(0x2044)!=U_GC_SM_MASK ||
 611         U_GET_GC_MASK(0xe0063)!=U_GC_CF_MASK
 612     ) {
 613         log_err("error: U_GET_GC_MASK does not work properly\n");
 614     }
 615
 616     mask=0;
 617     mask=(mask&~U_GC_CN_MASK)|U_GC_CN_MASK;
 618
 619     mask=(mask&~U_GC_LU_MASK)|U_GC_LU_MASK;
 620     mask=(mask&~U_GC_LL_MASK)|U_GC_LL_MASK;
 621     mask=(mask&~U_GC_LT_MASK)|U_GC_LT_MASK;
 622     mask=(mask&~U_GC_LM_MASK)|U_GC_LM_MASK;
 623     mask=(mask&~U_GC_LO_MASK)|U_GC_LO_MASK;
 624
 625     mask=(mask&~U_GC_MN_MASK)|U_GC_MN_MASK;
 626     mask=(mask&~U_GC_ME_MASK)|U_GC_ME_MASK;
 627     mask=(mask&~U_GC_MC_MASK)|U_GC_MC_MASK;
 628
 629     mask=(mask&~U_GC_ND_MASK)|U_GC_ND_MASK;
 630     mask=(mask&~U_GC_NL_MASK)|U_GC_NL_MASK;
 631     mask=(mask&~U_GC_NO_MASK)|U_GC_NO_MASK;
 632
 633     mask=(mask&~U_GC_ZS_MASK)|U_GC_ZS_MASK;
 634     mask=(mask&~U_GC_ZL_MASK)|U_GC_ZL_MASK;
 635     mask=(mask&~U_GC_ZP_MASK)|U_GC_ZP_MASK;
 636
 637     mask=(mask&~U_GC_CC_MASK)|U_GC_CC_MASK;
 638     mask=(mask&~U_GC_CF_MASK)|U_GC_CF_MASK;
 639     mask=(mask&~U_GC_CO_MASK)|U_GC_CO_MASK;
 640     mask=(mask&~U_GC_CS_MASK)|U_GC_CS_MASK;
 641
 642     mask=(mask&~U_GC_PD_MASK)|U_GC_PD_MASK;
 643     mask=(mask&~U_GC_PS_MASK)|U_GC_PS_MASK;
 644     mask=(mask&~U_GC_PE_MASK)|U_GC_PE_MASK;
 645     mask=(mask&~U_GC_PC_MASK)|U_GC_PC_MASK;
 646     mask=(mask&~U_GC_PO_MASK)|U_GC_PO_MASK;
 647
 648     mask=(mask&~U_GC_SM_MASK)|U_GC_SM_MASK;
 649     mask=(mask&~U_GC_SC_MASK)|U_GC_SC_MASK;
 650     mask=(mask&~U_GC_SK_MASK)|U_GC_SK_MASK;
 651     mask=(mask&~U_GC_SO_MASK)|U_GC_SO_MASK;
 652
 653     mask=(mask&~U_GC_PI_MASK)|U_GC_PI_MASK;
 654     mask=(mask&~U_GC_PF_MASK)|U_GC_PF_MASK;
 655
 656     if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffffffff)) {
 657         log_err("error: problems with U_GC_XX_MASK constants\n");
 658     }
 659
 660     mask=0;
 661     mask=(mask&~U_GC_C_MASK)|U_GC_C_MASK;
 662     mask=(mask&~U_GC_L_MASK)|U_GC_L_MASK;
 663     mask=(mask&~U_GC_M_MASK)|U_GC_M_MASK;
 664     mask=(mask&~U_GC_N_MASK)|U_GC_N_MASK;
 665     mask=(mask&~U_GC_Z_MASK)|U_GC_Z_MASK;
 666     mask=(mask&~U_GC_P_MASK)|U_GC_P_MASK;
 667     mask=(mask&~U_GC_S_MASK)|U_GC_S_MASK;
 668
 669     if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffffffff)) {
 670         log_err("error: problems with U_GC_Y_MASK constants\n");
 671     }
 672     {
 673         static const UChar32 digit[10]={ 0x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037,0x0038,0x0039 };
 674         for(i=0; i<10; i++){
 675             if(digit[i]!=u_forDigit(i,10)){
 676                 log_err("u_forDigit failed for %i. Expected: 0x%4X Got: 0x%4X\n",i,digit[i],u_forDigit(i,10));
 677             }
 678         }
 679     }
 680
 681     /* test u_digit() */
 682     {
 683         static const struct {
 684             UChar32 c;
 685             int8_t radix, value;
 686         } data[]={
 687             /* base 16 */
 688             { 0x0031, 16, 1 },
 689             { 0x0038, 16, 8 },
 690             { 0x0043, 16, 12 },
 691             { 0x0066, 16, 15 },
 692             { 0x00e4, 16, -1 },
 693             { 0x0662, 16, 2 },
 694             { 0x06f5, 16, 5 },
 695             { 0xff13, 16, 3 },
 696             { 0xff41, 16, 10 },
 697
 698             /* base 8 */
 699             { 0x0031, 8, 1 },
 700             { 0x0038, 8, -1 },
 701             { 0x0043, 8, -1 },
 702             { 0x0066, 8, -1 },
 703             { 0x00e4, 8, -1 },
 704             { 0x0662, 8, 2 },
 705             { 0x06f5, 8, 5 },
 706             { 0xff13, 8, 3 },
 707             { 0xff41, 8, -1 },
 708
 709             /* base 36 */
 710             { 0x5a, 36, 35 },
 711             { 0x7a, 36, 35 },
 712             { 0xff3a, 36, 35 },
 713             { 0xff5a, 36, 35 },
 714
 715             /* wrong radix values */
 716             { 0x0031, 1, -1 },
 717             { 0xff3a, 37, -1 }
 718         };
 719
 720         for(i=0; i<LENGTHOF(data); ++i) {
 721             if(u_digit(data[i].c, data[i].radix)!=data[i].value) {
 722                 log_err("u_digit(U+%04x, %d)=%d expected %d\n",
 723                         data[i].c,
 724                         data[i].radix,
 725                         u_digit(data[i].c, data[i].radix),
 726                         data[i].value);
 727             }
 728         }
 729     }
 730 }
 731
 732 /* test C/POSIX-style functions --------------------------------------------- */
 733
 734 /* bit flags */
 735 #define ISAL     1
 736 #define ISLO     2
 737 #define ISUP     4
 738
 739 #define ISDI     8
 740 #define ISXD  0x10
 741
 742 #define ISAN  0x20
 743
 744 #define ISPU  0x40
 745 #define ISGR  0x80
 746 #define ISPR 0x100
 747
 748 #define ISSP 0x200
 749 #define ISBL 0x400
 750 #define ISCN 0x800
 751
 752 /* C/POSIX-style functions, in the same order as the bit flags */
 753 typedef UBool U_EXPORT2 IsPOSIXClass(UChar32 c);
 754
 755 static const struct {
 756     IsPOSIXClass *fn;
 757     const char *name;
 758 } posixClasses[]={
 759     { u_isalpha, "isalpha" },
 760     { u_islower, "islower" },
 761     { u_isupper, "isupper" },
 762     { u_isdigit, "isdigit" },
 763     { u_isxdigit, "isxdigit" },
 764     { u_isalnum, "isalnum" },
 765     { u_ispunct, "ispunct" },
 766     { u_isgraph, "isgraph" },
 767     { u_isprint, "isprint" },
 768     { u_isspace, "isspace" },
 769     { u_isblank, "isblank" },
 770     { u_iscntrl, "iscntrl" }
 771 };
 772
 773 static const struct {
 774     UChar32 c;
 775     uint32_t posixResults;
 776 } posixData[]={
 777     { 0x0008,                                                        ISCN },    /* backspace */
 778     { 0x0009,                                              ISSP|ISBL|ISCN },    /* TAB */
 779     { 0x000a,                                              ISSP|     ISCN },    /* LF */
 780     { 0x000c,                                              ISSP|     ISCN },    /* FF */
 781     { 0x000d,                                              ISSP|     ISCN },    /* CR */
 782     { 0x0020,                                         ISPR|ISSP|ISBL      },    /* space */
 783     { 0x0021,                               ISPU|ISGR|ISPR                },    /* ! */
 784     { 0x0033,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* 3 */
 785     { 0x0040,                               ISPU|ISGR|ISPR                },    /* @ */
 786     { 0x0041, ISAL|     ISUP|     ISXD|ISAN|     ISGR|ISPR                },    /* A */
 787     { 0x007a, ISAL|ISLO|               ISAN|     ISGR|ISPR                },    /* z */
 788     { 0x007b,                               ISPU|ISGR|ISPR                },    /* { */
 789     { 0x0085,                                              ISSP|     ISCN },    /* NEL */
 790     { 0x00a0,                                         ISPR|ISSP|ISBL      },    /* NBSP */
 791     { 0x00a4,                                    ISGR|ISPR                },    /* currency sign */
 792     { 0x00e4, ISAL|ISLO|               ISAN|     ISGR|ISPR                },    /* a-umlaut */
 793     { 0x0300,                                    ISGR|ISPR                },    /* combining grave */
 794     { 0x0600,                                                        ISCN },    /* arabic number sign */
 795     { 0x0627, ISAL|                    ISAN|     ISGR|ISPR                },    /* alef */
 796     { 0x0663,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* arabic 3 */
 797     { 0x2002,                                         ISPR|ISSP|ISBL      },    /* en space */
 798     { 0x2007,                                         ISPR|ISSP|ISBL      },    /* figure space */
 799     { 0x2009,                                         ISPR|ISSP|ISBL      },    /* thin space */
 800     { 0x200b,                                                        ISCN },    /* ZWSP */
 801   /*{ 0x200b,                                         ISPR|ISSP           },*/    /* ZWSP */ /* ZWSP became a control char in 4.0.1*/
 802     { 0x200e,                                                        ISCN },    /* LRM */
 803     { 0x2028,                                         ISPR|ISSP|     ISCN },    /* LS */
 804     { 0x2029,                                         ISPR|ISSP|     ISCN },    /* PS */
 805     { 0x20ac,                                    ISGR|ISPR                },    /* Euro */
 806     { 0xff15,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth 5 */
 807     { 0xff25, ISAL|     ISUP|     ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth E */
 808     { 0xff35, ISAL|     ISUP|          ISAN|     ISGR|ISPR                },    /* fullwidth U */
 809     { 0xff45, ISAL|ISLO|          ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth e */
 810     { 0xff55, ISAL|ISLO|               ISAN|     ISGR|ISPR                }     /* fullwidth u */
 811 };
 812
 813 static void
 814 TestPOSIX() {
 815     uint32_t mask;
 816     int32_t cl, i;
 817     UBool expect;
 818
 819     mask=1;
 820     for(cl=0; cl<12; ++cl) {
 821         for(i=0; i<LENGTHOF(posixData); ++i) {
 822             expect=(UBool)((posixData[i].posixResults&mask)!=0);
 823             if(posixClasses[cl].fn(posixData[i].c)!=expect) {
 824                 log_err("u_%s(U+%04x)=%s is wrong\n",
 825                     posixClasses[cl].name, posixData[i].c, expect ? "FALSE" : "TRUE");
 826             }
 827         }
 828         mask<<=1;
 829     }
 830 }
 831
 832 /* Tests for isControl(u_iscntrl()) and isPrintable(u_isprint()) */
 833 static void TestControlPrint()
 834 {
 835     const UChar32 sampleControl[] = {0x1b, 0x97, 0x82, 0x2028, 0x2029, 0x200c, 0x202b};
 836     const UChar32 sampleNonControl[] = {0x61, 0x0031, 0x00e2};
 837     const UChar32 samplePrintable[] = {0x0042, 0x005f, 0x2014};
 838     const UChar32 sampleNonPrintable[] = {0x200c, 0x009f, 0x001b};
 839     UChar32 c;
 840
 841     testSampleCharProps(u_iscntrl, "u_iscntrl", sampleControl, LENGTHOF(sampleControl), TRUE);
 842     testSampleCharProps(u_iscntrl, "u_iscntrl", sampleNonControl, LENGTHOF(sampleNonControl), FALSE);
 843
 844     testSampleCharProps(u_isprint, "u_isprint",
 845                         samplePrintable, LENGTHOF(samplePrintable), TRUE);
 846     testSampleCharProps(u_isprint, "u_isprint",
 847                         sampleNonPrintable, LENGTHOF(sampleNonPrintable), FALSE);
 848
 849     /* test all ISO 8 controls */
 850     for(c=0; c<=0x9f; ++c) {
 851         if(c==0x20) {
 852             /* skip ASCII graphic characters and continue with DEL */
 853             c=0x7f;
 854         }
 855         if(!u_iscntrl(c)) {
 856             log_err("error: u_iscntrl(ISO 8 control U+%04x)=FALSE\n", c);
 857         }
 858         if(!u_isISOControl(c)) {
 859             log_err("error: u_isISOControl(ISO 8 control U+%04x)=FALSE\n", c);
 860         }
 861         if(u_isprint(c)) {
 862             log_err("error: u_isprint(ISO 8 control U+%04x)=TRUE\n", c);
 863         }
 864     }
 865
 866     /* test all Latin-1 graphic characters */
 867     for(c=0x20; c<=0xff; ++c) {
 868         if(c==0x7f) {
 869             c=0xa0;
 870         } else if(c==0xad) {
 871             /* Unicode 4 changes 00AD Soft Hyphen to Cf (and it is in fact not printable) */
 872             ++c;
 873         }
 874         if(!u_isprint(c)) {
 875             log_err("error: u_isprint(Latin-1 graphic character U+%04x)=FALSE\n", c);
 876         }
 877     }
 878 }
 879
 880 /* u_isJavaIDStart, u_isJavaIDPart, u_isIDStart(), u_isIDPart(), u_isIDIgnorable()*/
 881 static void TestIdentifier()
 882 {
 883     const UChar32 sampleJavaIDStart[] = {0x0071, 0x00e4, 0x005f};
 884     const UChar32 sampleNonJavaIDStart[] = {0x0020, 0x2030, 0x0082};
 885     const UChar32 sampleJavaIDPart[] = {0x005f, 0x0032, 0x0045};
 886     const UChar32 sampleNonJavaIDPart[] = {0x2030, 0x2020, 0x0020};
 887     const UChar32 sampleUnicodeIDStart[] = {0x0250, 0x00e2, 0x0061};
 888     const UChar32 sampleNonUnicodeIDStart[] = {0x2000, 0x000a, 0x2019};
 889     const UChar32 sampleUnicodeIDPart[] = {0x005f, 0x0032, 0x0045};
 890     const UChar32 sampleNonUnicodeIDPart[] = {0x2030, 0x00a3, 0x0020};
 891     const UChar32 sampleIDIgnore[] = {0x0006, 0x0010, 0x206b, 0x85};
 892     const UChar32 sampleNonIDIgnore[] = {0x0075, 0x00a3, 0x0061};
 893
 894     testSampleCharProps(u_isJavaIDStart, "u_isJavaIDStart",
 895                         sampleJavaIDStart, LENGTHOF(sampleJavaIDStart), TRUE);
 896     testSampleCharProps(u_isJavaIDStart, "u_isJavaIDStart",
 897                         sampleNonJavaIDStart, LENGTHOF(sampleNonJavaIDStart), FALSE);
 898
 899     testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
 900                         sampleJavaIDPart, LENGTHOF(sampleJavaIDPart), TRUE);
 901     testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
 902                         sampleNonJavaIDPart, LENGTHOF(sampleNonJavaIDPart), FALSE);
 903
 904     /* IDPart should imply IDStart */
 905     testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
 906                         sampleJavaIDStart, LENGTHOF(sampleJavaIDStart), TRUE);
 907
 908     testSampleCharProps(u_isIDStart, "u_isIDStart",
 909                         sampleUnicodeIDStart, LENGTHOF(sampleUnicodeIDStart), TRUE);
 910     testSampleCharProps(u_isIDStart, "u_isIDStart",
 911                         sampleNonUnicodeIDStart, LENGTHOF(sampleNonUnicodeIDStart), FALSE);
 912
 913     testSampleCharProps(u_isIDPart, "u_isIDPart",
 914                         sampleUnicodeIDPart, LENGTHOF(sampleUnicodeIDPart), TRUE);
 915     testSampleCharProps(u_isIDPart, "u_isIDPart",
 916                         sampleNonUnicodeIDPart, LENGTHOF(sampleNonUnicodeIDPart), FALSE);
 917
 918     /* IDPart should imply IDStart */
 919     testSampleCharProps(u_isIDPart, "u_isIDPart",
 920                         sampleUnicodeIDStart, LENGTHOF(sampleUnicodeIDStart), TRUE);
 921
 922     testSampleCharProps(u_isIDIgnorable, "u_isIDIgnorable",
 923                         sampleIDIgnore, LENGTHOF(sampleIDIgnore), TRUE);
 924     testSampleCharProps(u_isIDIgnorable, "u_isIDIgnorable",
 925                         sampleNonIDIgnore, LENGTHOF(sampleNonIDIgnore), FALSE);
 926 }
 927
 928 /* for each line of UnicodeData.txt, check some of the properties */
 929 typedef struct UnicodeDataContext {
 930 #if UCONFIG_NO_NORMALIZATION
 931     const void *dummy;
 932 #else
 933     const UNormalizer2 *nfc;
 934     const UNormalizer2 *nfkc;
 935 #endif
 936 } UnicodeDataContext;
 937
 938 /*
 939  * ### TODO
 940  * This test fails incorrectly if the First or Last code point of a repetitive area
 941  * is overridden, which is allowed and is encouraged for the PUAs.
 942  * Currently, this means that both area First/Last and override lines are
 943  * tested against the properties from the API,
 944  * and the area boundary will not match and cause an error.
 945  *
 946  * This function should detect area boundaries and skip them for the test of individual
 947  * code points' properties.
 948  * Then it should check that the areas contain all the same properties except where overridden.
 949  * For this, it would have had to set a flag for which code points were listed explicitly.
 950  */
 951 static void U_CALLCONV
 952 unicodeDataLineFn(void *context,
 953                   char *fields[][2], int32_t fieldCount,
 954                   UErrorCode *pErrorCode)
 955 {
 956     char buffer[100];
 957     const char *d;
 958     char *end;
 959     uint32_t value;
 960     UChar32 c;
 961     int32_t i;
 962     int8_t type;
 963     int32_t dt;
 964     UChar dm[32], s[32];
 965     int32_t dmLength, length;
 966
 967 #if !UCONFIG_NO_NORMALIZATION
 968     const UNormalizer2 *nfc, *nfkc;
 969 #endif
 970
 971     /* get the character code, field 0 */
 972     c=strtoul(fields[0][0], &end, 16);
 973     if(end<=fields[0][0] || end!=fields[0][1]) {
 974         log_err("error: syntax error in field 0 at %s\n", fields[0][0]);
 975         return;
 976     }
 977     if((uint32_t)c>=UCHAR_MAX_VALUE + 1) {
 978         log_err("error in UnicodeData.txt: code point %lu out of range\n", c);
 979         return;
 980     }
 981
 982     /* get general category, field 2 */
 983     *fields[2][1]=0;
 984     type = (int8_t)tagValues[MakeProp(fields[2][0])];
 985     if(u_charType(c)!=type) {
 986         log_err("error: u_charType(U+%04lx)==%u instead of %u\n", c, u_charType(c), type);
 987     }
 988     if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
 989         log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
 990     }
 991
 992     /* get canonical combining class, field 3 */
 993     value=strtoul(fields[3][0], &end, 10);
 994     if(end<=fields[3][0] || end!=fields[3][1]) {
 995         log_err("error: syntax error in field 3 at code 0x%lx\n", c);
 996         return;
 997     }
 998     if(value>255) {
 999         log_err("error in UnicodeData.txt: combining class %lu out of range\n", value);
1000         return;
1001     }
1002 #if !UCONFIG_NO_NORMALIZATION
1003     if(value!=u_getCombiningClass(c) || value!=(uint32_t)u_getIntPropertyValue(c, UCHAR_CANONICAL_COMBINING_CLASS)) {
1004         log_err("error: u_getCombiningClass(U+%04lx)==%hu instead of %lu\n", c, u_getCombiningClass(c), value);
1005     }
1006     nfkc=((UnicodeDataContext *)context)->nfkc;
1007     if(value!=unorm2_getCombiningClass(nfkc, c)) {
1008         log_err("error: unorm2_getCombiningClass(nfkc, U+%04lx)==%hu instead of %lu\n", c, unorm2_getCombiningClass(nfkc, c), value);
1009     }
1010 #endif
1011
1012     /* get BiDi category, field 4 */
1013     *fields[4][1]=0;
1014     i=MakeDir(fields[4][0]);
1015     if(i!=u_charDirection(c) || i!=u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)) {
1016         log_err("error: u_charDirection(U+%04lx)==%u instead of %u (%s)\n", c, u_charDirection(c), MakeDir(fields[4][0]), fields[4][0]);
1017     }
1018
1019     /* get Decomposition_Type & Decomposition_Mapping, field 5 */
1020     d=NULL;
1021     if(fields[5][0]==fields[5][1]) {
1022         /* no decomposition, except UnicodeData.txt omits Hangul syllable decompositions */
1023         if(c==0xac00 || c==0xd7a3) {
1024             dt=U_DT_CANONICAL;
1025         } else {
1026             dt=U_DT_NONE;
1027         }
1028     } else {
1029         d=fields[5][0];
1030         *fields[5][1]=0;
1031         dt=UCHAR_INVALID_CODE;
1032         if(*d=='<') {
1033             end=strchr(++d, '>');
1034             if(end!=NULL) {
1035                 *end=0;
1036                 dt=u_getPropertyValueEnum(UCHAR_DECOMPOSITION_TYPE, d);
1037                 d=u_skipWhitespace(end+1);
1038             }
1039         } else {
1040             dt=U_DT_CANONICAL;
1041         }
1042     }
1043     if(dt>U_DT_NONE) {
1044         if(c==0xac00) {
1045             dm[0]=0x1100;
1046             dm[1]=0x1161;
1047             dm[2]=0;
1048             dmLength=2;
1049         } else if(c==0xd7a3) {
1050             dm[0]=0xd788;
1051             dm[1]=0x11c2;
1052             dm[2]=0;
1053             dmLength=2;
1054         } else {
1055             dmLength=u_parseString(d, dm, 32, NULL, pErrorCode);
1056         }
1057     } else {
1058         dmLength=-1;
1059     }
1060     if(dt<0 || U_FAILURE(*pErrorCode)) {
1061         log_err("error in UnicodeData.txt: syntax error in U+%04lX decomposition field\n", (long)c);
1062         return;
1063     }
1064 #if !UCONFIG_NO_NORMALIZATION
1065     i=u_getIntPropertyValue(c, UCHAR_DECOMPOSITION_TYPE);
1066     if(i!=dt) {
1067         log_err("error: u_getIntPropertyValue(U+%04lx, UCHAR_DECOMPOSITION_TYPE)==%d instead of %d\n", c, i, dt);
1068     }
1069     /* Expect Decomposition_Mapping=nfkc.getRawDecomposition(c). */
1070     length=unorm2_getRawDecomposition(nfkc, c, s, 32, pErrorCode);
1071     if(U_FAILURE(*pErrorCode) || length!=dmLength || (length>0 && 0!=u_strcmp(s, dm))) {
1072         log_err("error: unorm2_getRawDecomposition(nfkc, U+%04lx)==%d instead of %d "
1073                 "or the Decomposition_Mapping is different (%s)\n",
1074                 c, length, dmLength, u_errorName(*pErrorCode));
1075         return;
1076     }
1077     /* For canonical decompositions only, expect Decomposition_Mapping=nfc.getRawDecomposition(c). */
1078     if(dt!=U_DT_CANONICAL) {
1079         dmLength=-1;
1080     }
1081     nfc=((UnicodeDataContext *)context)->nfc;
1082     length=unorm2_getRawDecomposition(nfc, c, s, 32, pErrorCode);
1083     if(U_FAILURE(*pErrorCode) || length!=dmLength || (length>0 && 0!=u_strcmp(s, dm))) {
1084         log_err("error: unorm2_getRawDecomposition(nfc, U+%04lx)==%d instead of %d "
1085                 "or the Decomposition_Mapping is different (%s)\n",
1086                 c, length, dmLength, u_errorName(*pErrorCode));
1087         return;
1088     }
1089     /* recompose */
1090     if(dt==U_DT_CANONICAL && !u_hasBinaryProperty(c, UCHAR_FULL_COMPOSITION_EXCLUSION)) {
1091         UChar32 a, b, composite;
1092         i=0;
1093         U16_NEXT(dm, i, dmLength, a);
1094         U16_NEXT(dm, i, dmLength, b);
1095         /* i==dmLength */
1096         composite=unorm2_composePair(nfc, a, b);
1097         if(composite!=c) {
1098             log_err("error: nfc U+%04lX decomposes to U+%04lX+U+%04lX but does not compose back (instead U+%04lX)\n",
1099                     (long)c, (long)a, (long)b, (long)composite);
1100         }
1101         /*
1102          * Note: NFKC has fewer round-trip mappings than NFC,
1103          * so we can't just test unorm2_composePair(nfkc, a, b) here without further data.
1104          */
1105     }
1106 #endif
1107
1108     /* get ISO Comment, field 11 */
1109     *fields[11][1]=0;
1110     i=u_getISOComment(c, buffer, sizeof(buffer), pErrorCode);
1111     if(U_FAILURE(*pErrorCode) || 0!=strcmp(fields[11][0], buffer)) {
1112         log_err_status(*pErrorCode, "error: u_getISOComment(U+%04lx) wrong (%s): \"%s\" should be \"%s\"\n",
1113             c, u_errorName(*pErrorCode),
1114             U_FAILURE(*pErrorCode) ? buffer : "[error]",
1115             fields[11][0]);
1116     }
1117
1118     /* get uppercase mapping, field 12 */
1119     if(fields[12][0]!=fields[12][1]) {
1120         value=strtoul(fields[12][0], &end, 16);
1121         if(end!=fields[12][1]) {
1122             log_err("error: syntax error in field 12 at code 0x%lx\n", c);
1123             return;
1124         }
1125         if((UChar32)value!=u_toupper(c)) {
1126             log_err("error: u_toupper(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_toupper(c), value);
1127         }
1128     } else {
1129         /* no case mapping: the API must map the code point to itself */
1130         if(c!=u_toupper(c)) {
1131             log_err("error: U+%04lx does not have an uppercase mapping but u_toupper()==U+%04lx\n", c, u_toupper(c));
1132         }
1133     }
1134
1135     /* get lowercase mapping, field 13 */
1136     if(fields[13][0]!=fields[13][1]) {
1137         value=strtoul(fields[13][0], &end, 16);
1138         if(end!=fields[13][1]) {
1139             log_err("error: syntax error in field 13 at code 0x%lx\n", c);
1140             return;
1141         }
1142         if((UChar32)value!=u_tolower(c)) {
1143             log_err("error: u_tolower(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_tolower(c), value);
1144         }
1145     } else {
1146         /* no case mapping: the API must map the code point to itself */
1147         if(c!=u_tolower(c)) {
1148             log_err("error: U+%04lx does not have a lowercase mapping but u_tolower()==U+%04lx\n", c, u_tolower(c));
1149         }
1150     }
1151
1152     /* get titlecase mapping, field 14 */
1153     if(fields[14][0]!=fields[14][1]) {
1154         value=strtoul(fields[14][0], &end, 16);
1155         if(end!=fields[14][1]) {
1156             log_err("error: syntax error in field 14 at code 0x%lx\n", c);
1157             return;
1158         }
1159         if((UChar32)value!=u_totitle(c)) {
1160             log_err("error: u_totitle(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_totitle(c), value);
1161         }
1162     } else {
1163         /* no case mapping: the API must map the code point to itself */
1164         if(c!=u_totitle(c)) {
1165             log_err("error: U+%04lx does not have a titlecase mapping but u_totitle()==U+%04lx\n", c, u_totitle(c));
1166         }
1167     }
1168 }
1169
1170 static UBool U_CALLCONV
1171 enumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) {
1172     static const UChar32 test[][2]={
1173         {0x41, U_UPPERCASE_LETTER},
1174         {0x308, U_NON_SPACING_MARK},
1175         {0xfffe, U_GENERAL_OTHER_TYPES},
1176         {0xe0041, U_FORMAT_CHAR},
1177         {0xeffff, U_UNASSIGNED}
1178     };
1179
1180     int32_t i, count;
1181
1182     if(0!=strcmp((const char *)context, "a1")) {
1183         log_err("error: u_enumCharTypes() passes on an incorrect context pointer\n");
1184         return FALSE;
1185     }
1186
1187     count=LENGTHOF(test);
1188     for(i=0; i<count; ++i) {
1189         if(start<=test[i][0] && test[i][0]<limit) {
1190             if(type!=(UCharCategory)test[i][1]) {
1191                 log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld instead of U+%04lx with %ld\n",
1192                         start, limit, (long)type, test[i][0], test[i][1]);
1193             }
1194             /* stop at the range that includes the last test code point (increases code coverage for enumeration) */
1195             return i==(count-1) ? FALSE : TRUE;
1196         }
1197     }
1198
1199     if(start>test[count-1][0]) {
1200         log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld after it should have stopped\n",
1201                 start, limit, (long)type);
1202         return FALSE;
1203     }
1204
1205     return TRUE;
1206 }
1207
1208 static UBool U_CALLCONV
1209 enumDefaultsRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) {
1210     /* default Bidi classes for unassigned code points, from the DerivedBidiClass.txt header */
1211     static const int32_t defaultBidi[][2]={ /* { limit, class } */
1212         { 0x0590, U_LEFT_TO_RIGHT },
1213         { 0x0600, U_RIGHT_TO_LEFT },
1214         { 0x07C0, U_RIGHT_TO_LEFT_ARABIC },
1215         { 0x08A0, U_RIGHT_TO_LEFT },
1216         { 0x0900, U_RIGHT_TO_LEFT_ARABIC },  /* Unicode 6.1 changes U+08A0..U+08FF from R to AL */
1217         { 0xFB1D, U_LEFT_TO_RIGHT },
1218         { 0xFB50, U_RIGHT_TO_LEFT },
1219         { 0xFE00, U_RIGHT_TO_LEFT_ARABIC },
1220         { 0xFE70, U_LEFT_TO_RIGHT },
1221         { 0xFF00, U_RIGHT_TO_LEFT_ARABIC },
1222         { 0x10800, U_LEFT_TO_RIGHT },
1223         { 0x11000, U_RIGHT_TO_LEFT },
1224         { 0x1E800, U_LEFT_TO_RIGHT },  /* new default-R range in Unicode 5.2: U+1E800 - U+1EFFF */
1225         { 0x1EE00, U_RIGHT_TO_LEFT },
1226         { 0x1EF00, U_RIGHT_TO_LEFT_ARABIC },  /* Unicode 6.1 changes U+1EE00..U+1EEFF from R to AL */
1227         { 0x1F000, U_RIGHT_TO_LEFT },
1228         { 0x110000, U_LEFT_TO_RIGHT }
1229     };
1230
1231     UChar32 c;
1232     int32_t i;
1233     UCharDirection shouldBeDir;
1234
1235     /*
1236      * LineBreak.txt specifies:
1237      *   #  - Assigned characters that are not listed explicitly are given the value
1238      *   #    "AL".
1239      *   #  - Unassigned characters are given the value "XX".
1240      *
1241      * PUA characters are listed explicitly with "XX".
1242      * Verify that no assigned character has "XX".
1243      */
1244     if(type!=U_UNASSIGNED && type!=U_PRIVATE_USE_CHAR) {
1245         c=start;
1246         while(c<limit) {
1247             if(0==u_getIntPropertyValue(c, UCHAR_LINE_BREAK)) {
1248                 log_err("error UCHAR_LINE_BREAK(assigned U+%04lx)=XX\n", c);
1249             }
1250             ++c;
1251         }
1252     }
1253
1254     /*
1255      * Verify default Bidi classes.
1256      * For recent Unicode versions, see UCD.html.
1257      *
1258      * For older Unicode versions:
1259      * See table 3-7 "Bidirectional Character Types" in UAX #9.
1260      * http://www.unicode.org/reports/tr9/
1261      *
1262      * See also DerivedBidiClass.txt for Cn code points!
1263      *
1264      * Unicode 4.0.1/Public Review Issue #28 (http://www.unicode.org/review/resolved-pri.html)
1265      * changed some default values.
1266      * In particular, non-characters and unassigned Default Ignorable Code Points
1267      * change from L to BN.
1268      *
1269      * UCD.html version 4.0.1 does not yet reflect these changes.
1270      */
1271     if(type==U_UNASSIGNED || type==U_PRIVATE_USE_CHAR) {
1272         /* enumerate the intersections of defaultBidi ranges with [start..limit[ */
1273         c=start;
1274         for(i=0; i<LENGTHOF(defaultBidi) && c<limit; ++i) {
1275             if((int32_t)c<defaultBidi[i][0]) {
1276                 while(c<limit && (int32_t)c<defaultBidi[i][0]) {
1277                     if(U_IS_UNICODE_NONCHAR(c) || u_hasBinaryProperty(c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT)) {
1278                         shouldBeDir=U_BOUNDARY_NEUTRAL;
1279                     } else {
1280                         shouldBeDir=(UCharDirection)defaultBidi[i][1];
1281                     }
1282
1283                     if( u_charDirection(c)!=shouldBeDir ||
1284                         u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)!=shouldBeDir
1285                     ) {
1286                         log_err("error: u_charDirection(unassigned/PUA U+%04lx)=%s should be %s\n",
1287                             c, dirStrings[u_charDirection(c)], dirStrings[shouldBeDir]);
1288                     }
1289                     ++c;
1290                 }
1291             }
1292         }
1293     }
1294
1295     return TRUE;
1296 }
1297
1298 /* tests for several properties */
1299 static void TestUnicodeData()
1300 {
1301     UVersionInfo expectVersionArray;
1302     UVersionInfo versionArray;
1303     char *fields[15][2];
1304     UErrorCode errorCode;
1305     UChar32 c;
1306     int8_t type;
1307
1308     UnicodeDataContext context;
1309
1310     u_versionFromString(expectVersionArray, U_UNICODE_VERSION);
1311     u_getUnicodeVersion(versionArray);
1312     if(memcmp(versionArray, expectVersionArray, U_MAX_VERSION_LENGTH) != 0)
1313     {
1314         log_err("Testing u_getUnicodeVersion() - expected " U_UNICODE_VERSION " got %d.%d.%d.%d\n",
1315         versionArray[0], versionArray[1], versionArray[2], versionArray[3]);
1316     }
1317
1318 #if defined(ICU_UNICODE_VERSION)
1319     /* test only happens where we have configure.in with UNICODE_VERSION - sanity check. */
1320     if(strcmp(U_UNICODE_VERSION, ICU_UNICODE_VERSION))
1321     {
1322          log_err("Testing configure.in's ICU_UNICODE_VERSION - expected " U_UNICODE_VERSION " got " ICU_UNICODE_VERSION "\n");
1323     }
1324 #endif
1325
1326     if (ublock_getCode((UChar)0x0041) != UBLOCK_BASIC_LATIN || u_getIntPropertyValue(0x41, UCHAR_BLOCK)!=(int32_t)UBLOCK_BASIC_LATIN) {
1327         log_err("ublock_getCode(U+0041) property failed! Expected : %i Got: %i \n", UBLOCK_BASIC_LATIN,ublock_getCode((UChar)0x0041));
1328     }
1329
1330     errorCode=U_ZERO_ERROR;
1331 #if !UCONFIG_NO_NORMALIZATION
1332     context.nfc=unorm2_getNFCInstance(&errorCode);
1333     context.nfkc=unorm2_getNFKCInstance(&errorCode);
1334     if(U_FAILURE(errorCode)) {
1335         log_data_err("error: unable to open an NFC or NFKC UNormalizer2 - %s\n", u_errorName(errorCode));
1336         return;
1337     }
1338 #endif
1339     parseUCDFile("UnicodeData.txt", fields, 15, unicodeDataLineFn, &context, &errorCode);
1340     if(U_FAILURE(errorCode)) {
1341         return; /* if we couldn't parse UnicodeData.txt, we should return */
1342     }
1343
1344     /* sanity check on repeated properties */
1345     for(c=0xfffe; c<=0x10ffff;) {
1346         type=u_charType(c);
1347         if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
1348             log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
1349         }
1350         if(type!=U_UNASSIGNED) {
1351             log_err("error: u_charType(U+%04lx)!=U_UNASSIGNED (returns %d)\n", c, u_charType(c));
1352         }
1353         if((c&0xffff)==0xfffe) {
1354             ++c;
1355         } else {
1356             c+=0xffff;
1357         }
1358     }
1359
1360     /* test that PUA is not "unassigned" */
1361     for(c=0xe000; c<=0x10fffd;) {
1362         type=u_charType(c);
1363         if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
1364             log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
1365         }
1366         if(type==U_UNASSIGNED) {
1367             log_err("error: u_charType(U+%04lx)==U_UNASSIGNED\n", c);
1368         } else if(type!=U_PRIVATE_USE_CHAR) {
1369             log_verbose("PUA override: u_charType(U+%04lx)=%d\n", c, type);
1370         }
1371         if(c==0xf8ff) {
1372             c=0xf0000;
1373         } else if(c==0xffffd) {
1374             c=0x100000;
1375         } else {
1376             ++c;
1377         }
1378     }
1379
1380     /* test u_enumCharTypes() */
1381     u_enumCharTypes(enumTypeRange, "a1");
1382
1383     /* check default properties */
1384     u_enumCharTypes(enumDefaultsRange, NULL);
1385 }
1386
1387 static void TestCodeUnit(){
1388     const UChar codeunit[]={0x0000,0xe065,0x20ac,0xd7ff,0xd800,0xd841,0xd905,0xdbff,0xdc00,0xdc02,0xddee,0xdfff,0};
1389
1390     int32_t i;
1391
1392     for(i=0; i<(int32_t)(sizeof(codeunit)/sizeof(codeunit[0])); i++){
1393         UChar c=codeunit[i];
1394         if(i<4){
1395             if(!(UTF_IS_SINGLE(c)) || (UTF_IS_LEAD(c)) || (UTF_IS_TRAIL(c)) ||(UTF_IS_SURROGATE(c))){
1396                 log_err("ERROR: U+%04x is a single", c);
1397             }
1398
1399         }
1400         if(i >= 4 && i< 8){
1401             if(!(UTF_IS_LEAD(c)) || UTF_IS_SINGLE(c) || UTF_IS_TRAIL(c) || !(UTF_IS_SURROGATE(c))){
1402                 log_err("ERROR: U+%04x is a first surrogate", c);
1403             }
1404         }
1405         if(i >= 8 && i< 12){
1406             if(!(UTF_IS_TRAIL(c)) || UTF_IS_SINGLE(c) || UTF_IS_LEAD(c) || !(UTF_IS_SURROGATE(c))){
1407                 log_err("ERROR: U+%04x is a second surrogate", c);
1408             }
1409         }
1410     }
1411
1412 }
1413
1414 static void TestCodePoint(){
1415     const UChar32 codePoint[]={
1416         /*surrogate, notvalid(codepoint), not a UnicodeChar, not Error */
1417         0xd800,
1418         0xdbff,
1419         0xdc00,
1420         0xdfff,
1421         0xdc04,
1422         0xd821,
1423         /*not a surrogate, valid, isUnicodeChar , not Error*/
1424         0x20ac,
1425         0xd7ff,
1426         0xe000,
1427         0xe123,
1428         0x0061,
1429         0xe065,
1430         0x20402,
1431         0x24506,
1432         0x23456,
1433         0x20402,
1434         0x10402,
1435         0x23456,
1436         /*not a surrogate, not valid, isUnicodeChar, isError */
1437         0x0015,
1438         0x009f,
1439         /*not a surrogate, not valid, not isUnicodeChar, isError */
1440         0xffff,
1441         0xfffe,
1442     };
1443     int32_t i;
1444     for(i=0; i<(int32_t)(sizeof(codePoint)/sizeof(codePoint[0])); i++){
1445         UChar32 c=codePoint[i];
1446         if(i<6){
1447             if(!UTF_IS_SURROGATE(c) || !U_IS_SURROGATE(c) || !U16_IS_SURROGATE(c)){
1448                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1449             }
1450             if(UTF_IS_VALID(c)){
1451                 log_err("ERROR: isValid() failed for U+%04x\n", c);
1452             }
1453             if(UTF_IS_UNICODE_CHAR(c) || U_IS_UNICODE_CHAR(c)){
1454                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1455             }
1456             if(UTF_IS_ERROR(c)){
1457                 log_err("ERROR: isError() failed for U+%04x\n", c);
1458             }
1459         }else if(i >=6 && i<18){
1460             if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
1461                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1462             }
1463             if(!UTF_IS_VALID(c)){
1464                 log_err("ERROR: isValid() failed for U+%04x\n", c);
1465             }
1466             if(!UTF_IS_UNICODE_CHAR(c) || !U_IS_UNICODE_CHAR(c)){
1467                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1468             }
1469             if(UTF_IS_ERROR(c)){
1470                 log_err("ERROR: isError() failed for U+%04x\n", c);
1471             }
1472         }else if(i >=18 && i<20){
1473             if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
1474                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1475             }
1476             if(UTF_IS_VALID(c)){
1477                 log_err("ERROR: isValid() failed for U+%04x\n", c);
1478             }
1479             if(!UTF_IS_UNICODE_CHAR(c) || !U_IS_UNICODE_CHAR(c)){
1480                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1481             }
1482             if(!UTF_IS_ERROR(c)){
1483                 log_err("ERROR: isError() failed for U+%04x\n", c);
1484             }
1485         }
1486         else if(i >=18 && i<(int32_t)(sizeof(codePoint)/sizeof(codePoint[0]))){
1487             if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
1488                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1489             }
1490             if(UTF_IS_VALID(c)){
1491                 log_err("ERROR: isValid() failed for U+%04x\n", c);
1492             }
1493             if(UTF_IS_UNICODE_CHAR(c) || U_IS_UNICODE_CHAR(c)){
1494                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1495             }
1496             if(!UTF_IS_ERROR(c)){
1497                 log_err("ERROR: isError() failed for U+%04x\n", c);
1498             }
1499         }
1500     }
1501
1502     if(
1503         !U_IS_BMP(0) || !U_IS_BMP(0x61) || !U_IS_BMP(0x20ac) ||
1504         !U_IS_BMP(0xd9da) || !U_IS_BMP(0xdfed) || !U_IS_BMP(0xffff) ||
1505         U_IS_BMP(U_SENTINEL) || U_IS_BMP(0x10000) || U_IS_BMP(0x50005) ||
1506         U_IS_BMP(0x10ffff) || U_IS_BMP(0x110000) || U_IS_BMP(0x7fffffff)
1507     ) {
1508         log_err("error with U_IS_BMP()\n");
1509     }
1510
1511     if(
1512         U_IS_SUPPLEMENTARY(0) || U_IS_SUPPLEMENTARY(0x61) || U_IS_SUPPLEMENTARY(0x20ac) ||
1513         U_IS_SUPPLEMENTARY(0xd9da) || U_IS_SUPPLEMENTARY(0xdfed) || U_IS_SUPPLEMENTARY(0xffff) ||
1514         U_IS_SUPPLEMENTARY(U_SENTINEL) || !U_IS_SUPPLEMENTARY(0x10000) || !U_IS_SUPPLEMENTARY(0x50005) ||
1515         !U_IS_SUPPLEMENTARY(0x10ffff) || U_IS_SUPPLEMENTARY(0x110000) || U_IS_SUPPLEMENTARY(0x7fffffff)
1516     ) {
1517         log_err("error with U_IS_SUPPLEMENTARY()\n");
1518     }
1519 }
1520
1521 static void TestCharLength()
1522 {
1523     const int32_t codepoint[]={
1524         1, 0x0061,
1525         1, 0xe065,
1526         1, 0x20ac,
1527         2, 0x20402,
1528         2, 0x23456,
1529         2, 0x24506,
1530         2, 0x20402,
1531         2, 0x10402,
1532         1, 0xd7ff,
1533         1, 0xe000
1534     };
1535
1536     int32_t i;
1537     UBool multiple;
1538     for(i=0; i<(int32_t)(sizeof(codepoint)/sizeof(codepoint[0])); i=(int16_t)(i+2)){
1539         UChar32 c=codepoint[i+1];
1540         if(UTF_CHAR_LENGTH(c) != codepoint[i] || U16_LENGTH(c) != codepoint[i]){
1541             log_err("The no: of code units for U+%04x:- Expected: %d Got: %d\n", c, codepoint[i], U16_LENGTH(c));
1542         }
1543         multiple=(UBool)(codepoint[i] == 1 ? FALSE : TRUE);
1544         if(UTF_NEED_MULTIPLE_UCHAR(c) != multiple){
1545             log_err("ERROR: Unicode::needMultipleUChar() failed for U+%04x\n", c);
1546         }
1547     }
1548 }
1549
1550 /*internal functions ----*/
1551 static int32_t MakeProp(char* str)
1552 {
1553     int32_t result = 0;
1554     char* matchPosition =0;
1555
1556     matchPosition = strstr(tagStrings, str);
1557     if (matchPosition == 0)
1558     {
1559         log_err("unrecognized type letter ");
1560         log_err(str);
1561     }
1562     else
1563         result = (int32_t)((matchPosition - tagStrings) / 2);
1564     return result;
1565 }
1566
1567 static int32_t MakeDir(char* str)
1568 {
1569     int32_t pos = 0;
1570     for (pos = 0; pos < 19; pos++) {
1571         if (strcmp(str, dirStrings[pos]) == 0) {
1572             return pos;
1573         }
1574     }
1575     return -1;
1576 }
1577
1578 /* test u_charName() -------------------------------------------------------- */
1579
1580 static const struct {
1581     uint32_t code;
1582     const char *name, *oldName, *extName, *alias;
1583 } names[]={
1584     {0x0061, "LATIN SMALL LETTER A", "", "LATIN SMALL LETTER A"},
1585     {0x01a2, "LATIN CAPITAL LETTER OI", "",
1586              "LATIN CAPITAL LETTER OI",
1587              "LATIN CAPITAL LETTER GHA"},
1588     {0x0284, "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK", "",
1589              "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK" },
1590     {0x0fd0, "TIBETAN MARK BSKA- SHOG GI MGO RGYAN", "",
1591              "TIBETAN MARK BSKA- SHOG GI MGO RGYAN",
1592              "TIBETAN MARK BKA- SHOG GI MGO RGYAN"},
1593     {0x3401, "CJK UNIFIED IDEOGRAPH-3401", "", "CJK UNIFIED IDEOGRAPH-3401" },
1594     {0x7fed, "CJK UNIFIED IDEOGRAPH-7FED", "", "CJK UNIFIED IDEOGRAPH-7FED" },
1595     {0xac00, "HANGUL SYLLABLE GA", "", "HANGUL SYLLABLE GA" },
1596     {0xd7a3, "HANGUL SYLLABLE HIH", "", "HANGUL SYLLABLE HIH" },
1597     {0xd800, "", "", "<lead surrogate-D800>" },
1598     {0xdc00, "", "", "<trail surrogate-DC00>" },
1599     {0xff08, "FULLWIDTH LEFT PARENTHESIS", "", "FULLWIDTH LEFT PARENTHESIS" },
1600     {0xffe5, "FULLWIDTH YEN SIGN", "", "FULLWIDTH YEN SIGN" },
1601     {0xffff, "", "", "<noncharacter-FFFF>" },
1602     {0x1d0c5, "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS", "",
1603               "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS",
1604               "BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS"},
1605     {0x23456, "CJK UNIFIED IDEOGRAPH-23456", "", "CJK UNIFIED IDEOGRAPH-23456" }
1606 };
1607
1608 static UBool
1609 enumCharNamesFn(void *context,
1610                 UChar32 code, UCharNameChoice nameChoice,
1611                 const char *name, int32_t length) {
1612     int32_t *pCount=(int32_t *)context;
1613     const char *expected;
1614     int i;
1615
1616     if(length<=0 || length!=(int32_t)strlen(name)) {
1617         /* should not be called with an empty string or invalid length */
1618         log_err("u_enumCharName(0x%lx)=%s but length=%ld\n", name, length);
1619         return TRUE;
1620     }
1621
1622     ++*pCount;
1623     for(i=0; i<sizeof(names)/sizeof(names[0]); ++i) {
1624         if(code==(UChar32)names[i].code) {
1625             switch (nameChoice) {
1626                 case U_EXTENDED_CHAR_NAME:
1627                     if(0!=strcmp(name, names[i].extName)) {
1628                         log_err("u_enumCharName(0x%lx - Extended)=%s instead of %s\n", code, name, names[i].extName);
1629                     }
1630                     break;
1631                 case U_UNICODE_CHAR_NAME:
1632                     if(0!=strcmp(name, names[i].name)) {
1633                         log_err("u_enumCharName(0x%lx)=%s instead of %s\n", code, name, names[i].name);
1634                     }
1635                     break;
1636                 case U_UNICODE_10_CHAR_NAME:
1637                     expected=names[i].oldName;
1638                     if(expected[0]==0 || 0!=strcmp(name, expected)) {
1639                         log_err("u_enumCharName(0x%lx - 1.0)=%s instead of %s\n", code, name, expected);
1640                     }
1641                     break;
1642                 case U_CHAR_NAME_ALIAS:
1643                     expected=names[i].alias;
1644                     if(expected==NULL || expected[0]==0 || 0!=strcmp(name, expected)) {
1645                         log_err("u_enumCharName(0x%lx - alias)=%s instead of %s\n", code, name, expected);
1646                     }
1647                     break;
1648                 case U_CHAR_NAME_CHOICE_COUNT:
1649                     break;
1650             }
1651             break;
1652         }
1653     }
1654     return TRUE;
1655 }
1656
1657 struct enumExtCharNamesContext {
1658     uint32_t length;
1659     int32_t last;
1660 };
1661
1662 static UBool
1663 enumExtCharNamesFn(void *context,
1664                 UChar32 code, UCharNameChoice nameChoice,
1665                 const char *name, int32_t length) {
1666     struct enumExtCharNamesContext *ecncp = (struct enumExtCharNamesContext *) context;
1667
1668     if (ecncp->last != (int32_t) code - 1) {
1669         if (ecncp->last < 0) {
1670             log_err("u_enumCharName(0x%lx - Ext) after u_enumCharName(0x%lx - Ext) instead of u_enumCharName(0x%lx - Ext)\n", code, ecncp->last, ecncp->last + 1);
1671         } else {
1672             log_err("u_enumCharName(0x%lx - Ext) instead of u_enumCharName(0x0 - Ext)\n", code);
1673         }
1674     }
1675     ecncp->last = (int32_t) code;
1676
1677     if (!*name) {
1678         log_err("u_enumCharName(0x%lx - Ext) should not be an empty string\n", code);
1679     }
1680
1681     return enumCharNamesFn(&ecncp->length, code, nameChoice, name, length);
1682 }
1683
1684 /**
1685  * This can be made more efficient by moving it into putil.c and having
1686  * it directly access the ebcdic translation tables.
1687  * TODO: If we get this method in putil.c, then delete it from here.
1688  */
1689 static UChar
1690 u_charToUChar(char c) {
1691     UChar uc;
1692     u_charsToUChars(&c, &uc, 1);
1693     return uc;
1694 }
1695
1696 static void
1697 TestCharNames() {
1698     static char name[80];
1699     UErrorCode errorCode=U_ZERO_ERROR;
1700     struct enumExtCharNamesContext extContext;
1701     const char *expected;
1702     int32_t length;
1703     UChar32 c;
1704     int32_t i;
1705
1706     log_verbose("Testing uprv_getMaxCharNameLength()\n");
1707     length=uprv_getMaxCharNameLength();
1708     if(length==0) {
1709         /* no names data available */
1710         return;
1711     }
1712     if(length<83) { /* Unicode 3.2 max char name length */
1713         log_err("uprv_getMaxCharNameLength()=%d is too short");
1714     }
1715     /* ### TODO same tests for max ISO comment length as for max name length */
1716
1717     log_verbose("Testing u_charName()\n");
1718     for(i=0; i<(int32_t)(sizeof(names)/sizeof(names[0])); ++i) {
1719         /* modern Unicode character name */
1720         length=u_charName(names[i].code, U_UNICODE_CHAR_NAME, name, sizeof(name), &errorCode);
1721         if(U_FAILURE(errorCode)) {
1722             log_err("u_charName(0x%lx) error %s\n", names[i].code, u_errorName(errorCode));
1723             return;
1724         }
1725         if(length<0 || 0!=strcmp(name, names[i].name) || length!=(uint16_t)strlen(name)) {
1726             log_err("u_charName(0x%lx) gets: %s (length %ld) instead of: %s\n", names[i].code, name, length, names[i].name);
1727         }
1728
1729         /* find the modern name */
1730         if (*names[i].name) {
1731             c=u_charFromName(U_UNICODE_CHAR_NAME, names[i].name, &errorCode);
1732             if(U_FAILURE(errorCode)) {
1733                 log_err("u_charFromName(%s) error %s\n", names[i].name, u_errorName(errorCode));
1734                 return;
1735             }
1736             if(c!=(UChar32)names[i].code) {
1737                 log_err("u_charFromName(%s) gets 0x%lx instead of 0x%lx\n", names[i].name, c, names[i].code);
1738             }
1739         }
1740
1741         /* Unicode 1.0 character name */
1742         length=u_charName(names[i].code, U_UNICODE_10_CHAR_NAME, name, sizeof(name), &errorCode);
1743         if(U_FAILURE(errorCode)) {
1744             log_err("u_charName(0x%lx - 1.0) error %s\n", names[i].code, u_errorName(errorCode));
1745             return;
1746         }
1747         if(length<0 || (length>0 && 0!=strcmp(name, names[i].oldName)) || length!=(uint16_t)strlen(name)) {
1748             log_err("u_charName(0x%lx - 1.0) gets %s length %ld instead of nothing or %s\n", names[i].code, name, length, names[i].oldName);
1749         }
1750
1751         /* find the Unicode 1.0 name if it is stored (length>0 means that we could read it) */
1752         if(names[i].oldName[0]!=0 /* && length>0 */) {
1753             c=u_charFromName(U_UNICODE_10_CHAR_NAME, names[i].oldName, &errorCode);
1754             if(U_FAILURE(errorCode)) {
1755                 log_err("u_charFromName(%s - 1.0) error %s\n", names[i].oldName, u_errorName(errorCode));
1756                 return;
1757             }
1758             if(c!=(UChar32)names[i].code) {
1759                 log_err("u_charFromName(%s - 1.0) gets 0x%lx instead of 0x%lx\n", names[i].oldName, c, names[i].code);
1760             }
1761         }
1762
1763         /* Unicode character name alias */
1764         length=u_charName(names[i].code, U_CHAR_NAME_ALIAS, name, sizeof(name), &errorCode);
1765         if(U_FAILURE(errorCode)) {
1766             log_err("u_charName(0x%lx - alias) error %s\n", names[i].code, u_errorName(errorCode));
1767             return;
1768         }
1769         expected=names[i].alias;
1770         if(expected==NULL) {
1771             expected="";
1772         }
1773         if(length<0 || (length>0 && 0!=strcmp(name, expected)) || length!=(uint16_t)strlen(name)) {
1774             log_err("u_charName(0x%lx - alias) gets %s length %ld instead of nothing or %s\n",
1775                     names[i].code, name, length, expected);
1776         }
1777
1778         /* find the Unicode character name alias if it is stored (length>0 means that we could read it) */
1779         if(expected[0]!=0 /* && length>0 */) {
1780             c=u_charFromName(U_CHAR_NAME_ALIAS, expected, &errorCode);
1781             if(U_FAILURE(errorCode)) {
1782                 log_err("u_charFromName(%s - alias) error %s\n",
1783                         expected, u_errorName(errorCode));
1784                 return;
1785             }
1786             if(c!=(UChar32)names[i].code) {
1787                 log_err("u_charFromName(%s - alias) gets 0x%lx instead of 0x%lx\n",
1788                         expected, c, names[i].code);
1789             }
1790         }
1791     }
1792
1793     /* test u_enumCharNames() */
1794     length=0;
1795     errorCode=U_ZERO_ERROR;
1796     u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumCharNamesFn, &length, U_UNICODE_CHAR_NAME, &errorCode);
1797     if(U_FAILURE(errorCode) || length<94140) {
1798         log_err("u_enumCharNames(%ld..%lx) error %s names count=%ld\n", UCHAR_MIN_VALUE, UCHAR_MAX_VALUE, u_errorName(errorCode), length);
1799     }
1800
1801     extContext.length = 0;
1802     extContext.last = -1;
1803     errorCode=U_ZERO_ERROR;
1804     u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumExtCharNamesFn, &extContext, U_EXTENDED_CHAR_NAME, &errorCode);
1805     if(U_FAILURE(errorCode) || extContext.length<UCHAR_MAX_VALUE + 1) {
1806         log_err("u_enumCharNames(%ld..0x%lx - Extended) error %s names count=%ld\n", UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, u_errorName(errorCode), extContext.length);
1807     }
1808
1809     /* test that u_charFromName() uppercases the input name, i.e., works with mixed-case names (new in 2.0) */
1810     if(0x61!=u_charFromName(U_UNICODE_CHAR_NAME, "LATin smALl letTER A", &errorCode)) {
1811         log_err("u_charFromName(U_UNICODE_CHAR_NAME, \"LATin smALl letTER A\") did not find U+0061 (%s)\n", u_errorName(errorCode));
1812     }
1813
1814     /* Test getCharNameCharacters */
1815     if(!getTestOption(QUICK_OPTION)) {
1816         enum { BUFSIZE = 256 };
1817         UErrorCode ec = U_ZERO_ERROR;
1818         char buf[BUFSIZE];
1819         int32_t maxLength;
1820         UChar32 cp;
1821         UChar pat[BUFSIZE], dumbPat[BUFSIZE];
1822         int32_t l1, l2;
1823         UBool map[256];
1824         UBool ok;
1825
1826         USet* set = uset_open(1, 0); /* empty set */
1827         USet* dumb = uset_open(1, 0); /* empty set */
1828
1829         /*
1830          * uprv_getCharNameCharacters() will likely return more lowercase
1831          * letters than actual character names contain because
1832          * it includes all the characters in lowercased names of
1833          * general categories, for the full possible set of extended names.
1834          */
1835         {
1836             USetAdder sa={
1837                 NULL,
1838                 uset_add,
1839                 uset_addRange,
1840                 uset_addString,
1841                 NULL /* don't need remove() */
1842             };
1843             sa.set=set;
1844             uprv_getCharNameCharacters(&sa);
1845         }
1846
1847         /* build set the dumb (but sure-fire) way */
1848         for (i=0; i<256; ++i) {
1849             map[i] = FALSE;
1850         }
1851
1852         maxLength=0;
1853         for (cp=0; cp<0x110000; ++cp) {
1854             int32_t len = u_charName(cp, U_EXTENDED_CHAR_NAME,
1855                                      buf, BUFSIZE, &ec);
1856             if (U_FAILURE(ec)) {
1857                 log_err("FAIL: u_charName failed when it shouldn't\n");
1858                 uset_close(set);
1859                 uset_close(dumb);
1860                 return;
1861             }
1862             if(len>maxLength) {
1863                 maxLength=len;
1864             }
1865
1866             for (i=0; i<len; ++i) {
1867                 if (!map[(uint8_t) buf[i]]) {
1868                     uset_add(dumb, (UChar32)u_charToUChar(buf[i]));
1869                     map[(uint8_t) buf[i]] = TRUE;
1870                 }
1871             }
1872
1873             /* test for leading/trailing whitespace */
1874             if(buf[0]==' ' || buf[0]=='\t' || buf[len-1]==' ' || buf[len-1]=='\t') {
1875                 log_err("u_charName(U+%04x) returns a name with leading or trailing whitespace\n", cp);
1876             }
1877         }
1878
1879         if(map[(uint8_t)'\t']) {
1880             log_err("u_charName() returned a name with a TAB for some code point\n", cp);
1881         }
1882
1883         length=uprv_getMaxCharNameLength();
1884         if(length!=maxLength) {
1885             log_err("uprv_getMaxCharNameLength()=%d differs from the maximum length %d of all extended names\n",
1886                     length, maxLength);
1887         }
1888
1889         /* compare the sets.  Where is my uset_equals?!! */
1890         ok=TRUE;
1891         for(i=0; i<256; ++i) {
1892             if(uset_contains(set, i)!=uset_contains(dumb, i)) {
1893                 if(0x61<=i && i<=0x7a /* a-z */ && uset_contains(set, i) && !uset_contains(dumb, i)) {
1894                     /* ignore lowercase a-z that are in set but not in dumb */
1895                     ok=TRUE;
1896                 } else {
1897                     ok=FALSE;
1898                     break;
1899                 }
1900             }
1901         }
1902
1903         l1 = uset_toPattern(set, pat, BUFSIZE, TRUE, &ec);
1904         l2 = uset_toPattern(dumb, dumbPat, BUFSIZE, TRUE, &ec);
1905         if (U_FAILURE(ec)) {
1906             log_err("FAIL: uset_toPattern failed when it shouldn't\n");
1907             uset_close(set);
1908             uset_close(dumb);
1909             return;
1910         }
1911
1912         if (l1 >= BUFSIZE) {
1913             l1 = BUFSIZE-1;
1914             pat[l1] = 0;
1915         }
1916         if (l2 >= BUFSIZE) {
1917             l2 = BUFSIZE-1;
1918             dumbPat[l2] = 0;
1919         }
1920
1921         if (!ok) {
1922             log_err("FAIL: uprv_getCharNameCharacters() returned %s, expected %s (too many lowercase a-z are ok)\n",
1923                     aescstrdup(pat, l1), aescstrdup(dumbPat, l2));
1924         } else if(getTestOption(VERBOSITY_OPTION)) {
1925             log_verbose("Ok: uprv_getCharNameCharacters() returned %s\n", aescstrdup(pat, l1));
1926         }
1927
1928         uset_close(set);
1929         uset_close(dumb);
1930     }
1931
1932     /* ### TODO: test error cases and other interesting things */
1933 }
1934
1935 /* test u_isMirrored() and u_charMirror() ----------------------------------- */
1936
1937 static void
1938 TestMirroring() {
1939     USet *set;
1940     UErrorCode errorCode;
1941
1942     UChar32 start, end, c2, c3;
1943     int32_t i;
1944
1945     U_STRING_DECL(mirroredPattern, "[:Bidi_Mirrored:]", 17);
1946
1947     U_STRING_INIT(mirroredPattern, "[:Bidi_Mirrored:]", 17);
1948
1949     log_verbose("Testing u_isMirrored()\n");
1950     if(!(u_isMirrored(0x28) && u_isMirrored(0xbb) && u_isMirrored(0x2045) && u_isMirrored(0x232a) &&
1951          !u_isMirrored(0x27) && !u_isMirrored(0x61) && !u_isMirrored(0x284) && !u_isMirrored(0x3400)
1952         )
1953     ) {
1954         log_err("u_isMirrored() does not work correctly\n");
1955     }
1956
1957     log_verbose("Testing u_charMirror()\n");
1958     if(!(u_charMirror(0x3c)==0x3e && u_charMirror(0x5d)==0x5b && u_charMirror(0x208d)==0x208e && u_charMirror(0x3017)==0x3016 &&
1959          u_charMirror(0xbb)==0xab && u_charMirror(0x2215)==0x29F5 && u_charMirror(0x29F5)==0x2215 && /* large delta between the code points */
1960          u_charMirror(0x2e)==0x2e && u_charMirror(0x6f3)==0x6f3 && u_charMirror(0x301c)==0x301c && u_charMirror(0xa4ab)==0xa4ab &&
1961          /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */
1962          u_charMirror(0x2018)==0x2018 && u_charMirror(0x201b)==0x201b && u_charMirror(0x301d)==0x301d
1963          )
1964     ) {
1965         log_err("u_charMirror() does not work correctly\n");
1966     }
1967
1968     /* verify that Bidi_Mirroring_Glyph roundtrips */
1969     errorCode=U_ZERO_ERROR;
1970     set=uset_openPattern(mirroredPattern, 17, &errorCode);
1971
1972     if (U_FAILURE(errorCode)) {
1973         log_data_err("uset_openPattern(mirroredPattern, 17, &errorCode) failed!\n");
1974     } else {
1975         for(i=0; 0==uset_getItem(set, i, &start, &end, NULL, 0, &errorCode); ++i) {
1976             do {
1977                 c2=u_charMirror(start);
1978                 c3=u_charMirror(c2);
1979                 if(c3!=start) {
1980                     log_err("u_charMirror() does not roundtrip: U+%04lx->U+%04lx->U+%04lx\n", (long)start, (long)c2, (long)c3);
1981                 }
1982             } while(++start<=end);
1983         }
1984     }
1985
1986     uset_close(set);
1987 }
1988
1989
1990 struct RunTestData
1991 {
1992     const char *runText;
1993     UScriptCode runCode;
1994 };
1995
1996 typedef struct RunTestData RunTestData;
1997
1998 static void
1999 CheckScriptRuns(UScriptRun *scriptRun, int32_t *runStarts, const RunTestData *testData, int32_t nRuns,
2000                 const char *prefix)
2001 {
2002     int32_t run, runStart, runLimit;
2003     UScriptCode runCode;
2004
2005     /* iterate over all the runs */
2006     run = 0;
2007     while (uscript_nextRun(scriptRun, &runStart, &runLimit, &runCode)) {
2008         if (runStart != runStarts[run]) {
2009             log_err("%s: incorrect start offset for run %d: expected %d, got %d\n",
2010                 prefix, run, runStarts[run], runStart);
2011         }
2012
2013         if (runLimit != runStarts[run + 1]) {
2014             log_err("%s: incorrect limit offset for run %d: expected %d, got %d\n",
2015                 prefix, run, runStarts[run + 1], runLimit);
2016         }
2017
2018         if (runCode != testData[run].runCode) {
2019             log_err("%s: incorrect script for run %d: expected \"%s\", got \"%s\"\n",
2020                 prefix, run, uscript_getName(testData[run].runCode), uscript_getName(runCode));
2021         }
2022
2023         run += 1;
2024
2025         /* stop when we've seen all the runs we expect to see */
2026         if (run >= nRuns) {
2027             break;
2028         }
2029     }
2030
2031     /* Complain if we didn't see then number of runs we expected */
2032     if (run != nRuns) {
2033         log_err("%s: incorrect number of runs: expected %d, got %d\n", prefix, run, nRuns);
2034     }
2035 }
2036
2037 static void
2038 TestUScriptRunAPI()
2039 {
2040     static const RunTestData testData1[] = {
2041         {"\\u0020\\u0946\\u0939\\u093F\\u0928\\u094D\\u0926\\u0940\\u0020", USCRIPT_DEVANAGARI},
2042         {"\\u0627\\u0644\\u0639\\u0631\\u0628\\u064A\\u0629\\u0020", USCRIPT_ARABIC},
2043         {"\\u0420\\u0443\\u0441\\u0441\\u043A\\u0438\\u0439\\u0020", USCRIPT_CYRILLIC},
2044         {"English (", USCRIPT_LATIN},
2045         {"\\u0E44\\u0E17\\u0E22", USCRIPT_THAI},
2046         {") ", USCRIPT_LATIN},
2047         {"\\u6F22\\u5B75", USCRIPT_HAN},
2048         {"\\u3068\\u3072\\u3089\\u304C\\u306A\\u3068", USCRIPT_HIRAGANA},
2049         {"\\u30AB\\u30BF\\u30AB\\u30CA", USCRIPT_KATAKANA},
2050         {"\\U00010400\\U00010401\\U00010402\\U00010403", USCRIPT_DESERET}
2051     };
2052
2053     static const RunTestData testData2[] = {
2054        {"((((((((((abc))))))))))", USCRIPT_LATIN}
2055     };
2056
2057     static const struct {
2058       const RunTestData *testData;
2059       int32_t nRuns;
2060     } testDataEntries[] = {
2061         {testData1, LENGTHOF(testData1)},
2062         {testData2, LENGTHOF(testData2)}
2063     };
2064
2065     static const int32_t nTestEntries = LENGTHOF(testDataEntries);
2066     int32_t testEntry;
2067
2068     for (testEntry = 0; testEntry < nTestEntries; testEntry += 1) {
2069         UChar testString[1024];
2070         int32_t runStarts[256];
2071         int32_t nTestRuns = testDataEntries[testEntry].nRuns;
2072         const RunTestData *testData = testDataEntries[testEntry].testData;
2073
2074         int32_t run, stringLimit;
2075         UScriptRun *scriptRun = NULL;
2076         UErrorCode err;
2077
2078         /*
2079          * Fill in the test string and the runStarts array.
2080          */
2081         stringLimit = 0;
2082         for (run = 0; run < nTestRuns; run += 1) {
2083             runStarts[run] = stringLimit;
2084             stringLimit += u_unescape(testData[run].runText, &testString[stringLimit], 1024 - stringLimit);
2085             /*stringLimit -= 1;*/
2086         }
2087
2088         /* The limit of the last run */
2089         runStarts[nTestRuns] = stringLimit;
2090
2091         /*
2092          * Make sure that calling uscript_OpenRun with a NULL text pointer
2093          * and a non-zero text length returns the correct error.
2094          */
2095         err = U_ZERO_ERROR;
2096         scriptRun = uscript_openRun(NULL, stringLimit, &err);
2097
2098         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2099             log_err("uscript_openRun(NULL, stringLimit, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2100         }
2101
2102         if (scriptRun != NULL) {
2103             log_err("uscript_openRun(NULL, stringLimit, &err) returned a non-NULL result.\n");
2104             uscript_closeRun(scriptRun);
2105         }
2106
2107         /*
2108          * Make sure that calling uscript_OpenRun with a non-NULL text pointer
2109          * and a zero text length returns the correct error.
2110          */
2111         err = U_ZERO_ERROR;
2112         scriptRun = uscript_openRun(testString, 0, &err);
2113
2114         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2115             log_err("uscript_openRun(testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2116         }
2117
2118         if (scriptRun != NULL) {
2119             log_err("uscript_openRun(testString, 0, &err) returned a non-NULL result.\n");
2120             uscript_closeRun(scriptRun);
2121         }
2122
2123         /*
2124          * Make sure that calling uscript_openRun with a NULL text pointer
2125          * and a zero text length doesn't return an error.
2126          */
2127         err = U_ZERO_ERROR;
2128         scriptRun = uscript_openRun(NULL, 0, &err);
2129
2130         if (U_FAILURE(err)) {
2131             log_err("Got error %s from uscript_openRun(NULL, 0, &err)\n", u_errorName(err));
2132         }
2133
2134         /* Make sure that the empty iterator doesn't find any runs */
2135         if (uscript_nextRun(scriptRun, NULL, NULL, NULL)) {
2136             log_err("uscript_nextRun(...) returned TRUE for an empty iterator.\n");
2137         }
2138
2139         /*
2140          * Make sure that calling uscript_setRunText with a NULL text pointer
2141          * and a non-zero text length returns the correct error.
2142          */
2143         err = U_ZERO_ERROR;
2144         uscript_setRunText(scriptRun, NULL, stringLimit, &err);
2145
2146         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2147             log_err("uscript_setRunText(scriptRun, NULL, stringLimit, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2148         }
2149
2150         /*
2151          * Make sure that calling uscript_OpenRun with a non-NULL text pointer
2152          * and a zero text length returns the correct error.
2153          */
2154         err = U_ZERO_ERROR;
2155         uscript_setRunText(scriptRun, testString, 0, &err);
2156
2157         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2158             log_err("uscript_setRunText(scriptRun, testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2159         }
2160
2161         /*
2162          * Now call uscript_setRunText on the empty iterator
2163          * and make sure that it works.
2164          */
2165         err = U_ZERO_ERROR;
2166         uscript_setRunText(scriptRun, testString, stringLimit, &err);
2167
2168         if (U_FAILURE(err)) {
2169             log_err("Got error %s from uscript_setRunText(...)\n", u_errorName(err));
2170         } else {
2171             CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_setRunText");
2172         }
2173
2174         uscript_closeRun(scriptRun);
2175
2176         /*
2177          * Now open an interator over the testString
2178          * using uscript_openRun and make sure that it works
2179          */
2180         scriptRun = uscript_openRun(testString, stringLimit, &err);
2181
2182         if (U_FAILURE(err)) {
2183             log_err("Got error %s from uscript_openRun(...)\n", u_errorName(err));
2184         } else {
2185             CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_openRun");
2186         }
2187
2188         /* Now reset the iterator, and make sure
2189          * that it still works.
2190          */
2191         uscript_resetRun(scriptRun);
2192
2193         CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_resetRun");
2194
2195         /* Close the iterator */
2196         uscript_closeRun(scriptRun);
2197     }
2198 }
2199
2200 /* test additional, non-core properties */
2201 static void
2202 TestAdditionalProperties() {
2203     /* test data for u_charAge() */
2204     static const struct {
2205         UChar32 c;
2206         UVersionInfo version;
2207     } charAges[]={
2208         {0x41,    { 1, 1, 0, 0 }},
2209         {0xffff,  { 1, 1, 0, 0 }},
2210         {0x20ab,  { 2, 0, 0, 0 }},
2211         {0x2fffe, { 2, 0, 0, 0 }},
2212         {0x20ac,  { 2, 1, 0, 0 }},
2213         {0xfb1d,  { 3, 0, 0, 0 }},
2214         {0x3f4,   { 3, 1, 0, 0 }},
2215         {0x10300, { 3, 1, 0, 0 }},
2216         {0x220,   { 3, 2, 0, 0 }},
2217         {0xff60,  { 3, 2, 0, 0 }}
2218     };
2219
2220     /* test data for u_hasBinaryProperty() */
2221     static const int32_t
2222     props[][3]={ /* code point, property, value */
2223         { 0x0627, UCHAR_ALPHABETIC, TRUE },
2224         { 0x1034a, UCHAR_ALPHABETIC, TRUE },
2225         { 0x2028, UCHAR_ALPHABETIC, FALSE },
2226
2227         { 0x0066, UCHAR_ASCII_HEX_DIGIT, TRUE },
2228         { 0x0067, UCHAR_ASCII_HEX_DIGIT, FALSE },
2229
2230         { 0x202c, UCHAR_BIDI_CONTROL, TRUE },
2231         { 0x202f, UCHAR_BIDI_CONTROL, FALSE },
2232
2233         { 0x003c, UCHAR_BIDI_MIRRORED, TRUE },
2234         { 0x003d, UCHAR_BIDI_MIRRORED, FALSE },
2235
2236         /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */
2237         { 0x2018, UCHAR_BIDI_MIRRORED, FALSE },
2238         { 0x201d, UCHAR_BIDI_MIRRORED, FALSE },
2239         { 0x201f, UCHAR_BIDI_MIRRORED, FALSE },
2240         { 0x301e, UCHAR_BIDI_MIRRORED, FALSE },
2241
2242         { 0x058a, UCHAR_DASH, TRUE },
2243         { 0x007e, UCHAR_DASH, FALSE },
2244
2245         { 0x0c4d, UCHAR_DIACRITIC, TRUE },
2246         { 0x3000, UCHAR_DIACRITIC, FALSE },
2247
2248         { 0x0e46, UCHAR_EXTENDER, TRUE },
2249         { 0x0020, UCHAR_EXTENDER, FALSE },
2250
2251 #if !UCONFIG_NO_NORMALIZATION
2252         { 0xfb1d, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE },
2253         { 0x1d15f, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE },
2254         { 0xfb1e, UCHAR_FULL_COMPOSITION_EXCLUSION, FALSE },
2255
2256         { 0x110a, UCHAR_NFD_INERT, TRUE },      /* Jamo L */
2257         { 0x0308, UCHAR_NFD_INERT, FALSE },
2258
2259         { 0x1164, UCHAR_NFKD_INERT, TRUE },     /* Jamo V */
2260         { 0x1d79d, UCHAR_NFKD_INERT, FALSE },   /* math compat version of xi */
2261
2262         { 0x0021, UCHAR_NFC_INERT, TRUE },      /* ! */
2263         { 0x0061, UCHAR_NFC_INERT, FALSE },     /* a */
2264         { 0x00e4, UCHAR_NFC_INERT, FALSE },     /* a-umlaut */
2265         { 0x0102, UCHAR_NFC_INERT, FALSE },     /* a-breve */
2266         { 0xac1c, UCHAR_NFC_INERT, FALSE },     /* Hangul LV */
2267         { 0xac1d, UCHAR_NFC_INERT, TRUE },      /* Hangul LVT */
2268
2269         { 0x1d79d, UCHAR_NFKC_INERT, FALSE },   /* math compat version of xi */
2270         { 0x2a6d6, UCHAR_NFKC_INERT, TRUE },    /* Han, last of CJK ext. B */
2271
2272         { 0x00e4, UCHAR_SEGMENT_STARTER, TRUE },
2273         { 0x0308, UCHAR_SEGMENT_STARTER, FALSE },
2274         { 0x110a, UCHAR_SEGMENT_STARTER, TRUE }, /* Jamo L */
2275         { 0x1164, UCHAR_SEGMENT_STARTER, FALSE },/* Jamo V */
2276         { 0xac1c, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LV */
2277         { 0xac1d, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LVT */
2278 #endif
2279
2280         { 0x0044, UCHAR_HEX_DIGIT, TRUE },
2281         { 0xff46, UCHAR_HEX_DIGIT, TRUE },
2282         { 0x0047, UCHAR_HEX_DIGIT, FALSE },
2283
2284         { 0x30fb, UCHAR_HYPHEN, TRUE },
2285         { 0xfe58, UCHAR_HYPHEN, FALSE },
2286
2287         { 0x2172, UCHAR_ID_CONTINUE, TRUE },
2288         { 0x0307, UCHAR_ID_CONTINUE, TRUE },
2289         { 0x005c, UCHAR_ID_CONTINUE, FALSE },
2290
2291         { 0x2172, UCHAR_ID_START, TRUE },
2292         { 0x007a, UCHAR_ID_START, TRUE },
2293         { 0x0039, UCHAR_ID_START, FALSE },
2294
2295         { 0x4db5, UCHAR_IDEOGRAPHIC, TRUE },
2296         { 0x2f999, UCHAR_IDEOGRAPHIC, TRUE },
2297         { 0x2f99, UCHAR_IDEOGRAPHIC, FALSE },
2298
2299         { 0x200c, UCHAR_JOIN_CONTROL, TRUE },
2300         { 0x2029, UCHAR_JOIN_CONTROL, FALSE },
2301
2302         { 0x1d7bc, UCHAR_LOWERCASE, TRUE },
2303         { 0x0345, UCHAR_LOWERCASE, TRUE },
2304         { 0x0030, UCHAR_LOWERCASE, FALSE },
2305
2306         { 0x1d7a9, UCHAR_MATH, TRUE },
2307         { 0x2135, UCHAR_MATH, TRUE },
2308         { 0x0062, UCHAR_MATH, FALSE },
2309
2310         { 0xfde1, UCHAR_NONCHARACTER_CODE_POINT, TRUE },
2311         { 0x10ffff, UCHAR_NONCHARACTER_CODE_POINT, TRUE },
2312         { 0x10fffd, UCHAR_NONCHARACTER_CODE_POINT, FALSE },
2313
2314         { 0x0022, UCHAR_QUOTATION_MARK, TRUE },
2315         { 0xff62, UCHAR_QUOTATION_MARK, TRUE },
2316         { 0xd840, UCHAR_QUOTATION_MARK, FALSE },
2317
2318         { 0x061f, UCHAR_TERMINAL_PUNCTUATION, TRUE },
2319         { 0xe003f, UCHAR_TERMINAL_PUNCTUATION, FALSE },
2320
2321         { 0x1d44a, UCHAR_UPPERCASE, TRUE },
2322         { 0x2162, UCHAR_UPPERCASE, TRUE },
2323         { 0x0345, UCHAR_UPPERCASE, FALSE },
2324
2325         { 0x0020, UCHAR_WHITE_SPACE, TRUE },
2326         { 0x202f, UCHAR_WHITE_SPACE, TRUE },
2327         { 0x3001, UCHAR_WHITE_SPACE, FALSE },
2328
2329         { 0x0711, UCHAR_XID_CONTINUE, TRUE },
2330         { 0x1d1aa, UCHAR_XID_CONTINUE, TRUE },
2331         { 0x007c, UCHAR_XID_CONTINUE, FALSE },
2332
2333         { 0x16ee, UCHAR_XID_START, TRUE },
2334         { 0x23456, UCHAR_XID_START, TRUE },
2335         { 0x1d1aa, UCHAR_XID_START, FALSE },
2336
2337         /*
2338          * Version break:
2339          * The following properties are only supported starting with the
2340          * Unicode version indicated in the second field.
2341          */
2342         { -1, 0x320, 0 },
2343
2344         { 0x180c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE },
2345         { 0xfe02, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE },
2346         { 0x1801, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, FALSE },
2347
2348         { 0x0149, UCHAR_DEPRECATED, TRUE },         /* changed in Unicode 5.2 */
2349         { 0x0341, UCHAR_DEPRECATED, FALSE },        /* changed in Unicode 5.2 */
2350         { 0xe0041, UCHAR_DEPRECATED, TRUE },        /* changed from Unicode 5 to 5.1 */
2351         { 0xe0100, UCHAR_DEPRECATED, FALSE },
2352
2353         { 0x00a0, UCHAR_GRAPHEME_BASE, TRUE },
2354         { 0x0a4d, UCHAR_GRAPHEME_BASE, FALSE },
2355         { 0xff9d, UCHAR_GRAPHEME_BASE, TRUE },
2356         { 0xff9f, UCHAR_GRAPHEME_BASE, FALSE },     /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */
2357
2358         { 0x0300, UCHAR_GRAPHEME_EXTEND, TRUE },
2359         { 0xff9d, UCHAR_GRAPHEME_EXTEND, FALSE },
2360         { 0xff9f, UCHAR_GRAPHEME_EXTEND, TRUE },    /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */
2361         { 0x0603, UCHAR_GRAPHEME_EXTEND, FALSE },
2362
2363         { 0x0a4d, UCHAR_GRAPHEME_LINK, TRUE },
2364         { 0xff9f, UCHAR_GRAPHEME_LINK, FALSE },
2365
2366         { 0x2ff7, UCHAR_IDS_BINARY_OPERATOR, TRUE },
2367         { 0x2ff3, UCHAR_IDS_BINARY_OPERATOR, FALSE },
2368
2369         { 0x2ff3, UCHAR_IDS_TRINARY_OPERATOR, TRUE },
2370         { 0x2f03, UCHAR_IDS_TRINARY_OPERATOR, FALSE },
2371
2372         { 0x0ec1, UCHAR_LOGICAL_ORDER_EXCEPTION, TRUE },
2373         { 0xdcba, UCHAR_LOGICAL_ORDER_EXCEPTION, FALSE },
2374
2375         { 0x2e9b, UCHAR_RADICAL, TRUE },
2376         { 0x4e00, UCHAR_RADICAL, FALSE },
2377
2378         { 0x012f, UCHAR_SOFT_DOTTED, TRUE },
2379         { 0x0049, UCHAR_SOFT_DOTTED, FALSE },
2380
2381         { 0xfa11, UCHAR_UNIFIED_IDEOGRAPH, TRUE },
2382         { 0xfa12, UCHAR_UNIFIED_IDEOGRAPH, FALSE },
2383
2384         { -1, 0x401, 0 }, /* version break for Unicode 4.0.1 */
2385
2386         { 0x002e, UCHAR_S_TERM, TRUE },
2387         { 0x0061, UCHAR_S_TERM, FALSE },
2388
2389         { 0x180c, UCHAR_VARIATION_SELECTOR, TRUE },
2390         { 0xfe03, UCHAR_VARIATION_SELECTOR, TRUE },
2391         { 0xe01ef, UCHAR_VARIATION_SELECTOR, TRUE },
2392         { 0xe0200, UCHAR_VARIATION_SELECTOR, FALSE },
2393
2394         /* enum/integer type properties */
2395
2396         /* UCHAR_BIDI_CLASS tested for assigned characters in TestUnicodeData() */
2397         /* test default Bidi classes for unassigned code points */
2398         { 0x0590, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2399         { 0x05cf, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2400         { 0x05ed, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2401         { 0x07f2, UCHAR_BIDI_CLASS, U_DIR_NON_SPACING_MARK }, /* Nko, new in Unicode 5.0 */
2402         { 0x07fe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, /* unassigned R */
2403         { 0x089f, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2404         { 0xfb37, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2405         { 0xfb42, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2406         { 0x10806, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2407         { 0x10909, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2408         { 0x10fe4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2409
2410         { 0x0605, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2411         { 0x061c, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2412         { 0x063f, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2413         { 0x070e, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2414         { 0x0775, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2415         { 0xfbc2, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2416         { 0xfd90, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2417         { 0xfefe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2418
2419         { 0x02AF, UCHAR_BLOCK, UBLOCK_IPA_EXTENSIONS },
2420         { 0x0C4E, UCHAR_BLOCK, UBLOCK_TELUGU },
2421         { 0x155A, UCHAR_BLOCK, UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS },
2422         { 0x1717, UCHAR_BLOCK, UBLOCK_TAGALOG },
2423         { 0x1900, UCHAR_BLOCK, UBLOCK_LIMBU },
2424         { 0x1AFF, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
2425         { 0x3040, UCHAR_BLOCK, UBLOCK_HIRAGANA },
2426         { 0x1D0FF, UCHAR_BLOCK, UBLOCK_BYZANTINE_MUSICAL_SYMBOLS },
2427         { 0x50000, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
2428         { 0xEFFFF, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
2429         { 0x10D0FF, UCHAR_BLOCK, UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B },
2430
2431         /* UCHAR_CANONICAL_COMBINING_CLASS tested for assigned characters in TestUnicodeData() */
2432         { 0xd7d7, UCHAR_CANONICAL_COMBINING_CLASS, 0 },
2433
2434         { 0x00A0, UCHAR_DECOMPOSITION_TYPE, U_DT_NOBREAK },
2435         { 0x00A8, UCHAR_DECOMPOSITION_TYPE, U_DT_COMPAT },
2436         { 0x00bf, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE },
2437         { 0x00c0, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
2438         { 0x1E9B, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
2439         { 0xBCDE, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
2440         { 0xFB5D, UCHAR_DECOMPOSITION_TYPE, U_DT_MEDIAL },
2441         { 0x1D736, UCHAR_DECOMPOSITION_TYPE, U_DT_FONT },
2442         { 0xe0033, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE },
2443
2444         { 0x0009, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
2445         { 0x0020, UCHAR_EAST_ASIAN_WIDTH, U_EA_NARROW },
2446         { 0x00B1, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2447         { 0x20A9, UCHAR_EAST_ASIAN_WIDTH, U_EA_HALFWIDTH },
2448         { 0x2FFB, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2449         { 0x3000, UCHAR_EAST_ASIAN_WIDTH, U_EA_FULLWIDTH },
2450         { 0x35bb, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2451         { 0x58bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2452         { 0xD7A3, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2453         { 0xEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2454         { 0x1D198, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
2455         { 0x20000, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2456         { 0x2F8C7, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2457         { 0x3a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE }, /* plane 3 got default W values in Unicode 4 */
2458         { 0x5a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
2459         { 0xFEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2460         { 0x10EEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2461
2462         /* UCHAR_GENERAL_CATEGORY tested for assigned characters in TestUnicodeData() */
2463         { 0xd7c7, UCHAR_GENERAL_CATEGORY, 0 },
2464         { 0xd7d7, UCHAR_GENERAL_CATEGORY, U_OTHER_LETTER },     /* changed in Unicode 5.2 */
2465
2466         { 0x0444, UCHAR_JOINING_GROUP, U_JG_NO_JOINING_GROUP },
2467         { 0x0639, UCHAR_JOINING_GROUP, U_JG_AIN },
2468         { 0x072A, UCHAR_JOINING_GROUP, U_JG_DALATH_RISH },
2469         { 0x0647, UCHAR_JOINING_GROUP, U_JG_HEH },
2470         { 0x06C1, UCHAR_JOINING_GROUP, U_JG_HEH_GOAL },
2471
2472         { 0x200C, UCHAR_JOINING_TYPE, U_JT_NON_JOINING },
2473         { 0x200D, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING },
2474         { 0x0639, UCHAR_JOINING_TYPE, U_JT_DUAL_JOINING },
2475         { 0x0640, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING },
2476         { 0x06C3, UCHAR_JOINING_TYPE, U_JT_RIGHT_JOINING },
2477         { 0x0300, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
2478         { 0x070F, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
2479         { 0xe0033, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
2480
2481         /* TestUnicodeData() verifies that no assigned character has "XX" (unknown) */
2482         { 0xe7e7, UCHAR_LINE_BREAK, U_LB_UNKNOWN },
2483         { 0x10fffd, UCHAR_LINE_BREAK, U_LB_UNKNOWN },
2484         { 0x0028, UCHAR_LINE_BREAK, U_LB_OPEN_PUNCTUATION },
2485         { 0x232A, UCHAR_LINE_BREAK, U_LB_CLOSE_PUNCTUATION },
2486         { 0x3401, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2487         { 0x4e02, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2488         { 0x20004, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2489         { 0xf905, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2490         { 0xdb7e, UCHAR_LINE_BREAK, U_LB_SURROGATE },
2491         { 0xdbfd, UCHAR_LINE_BREAK, U_LB_SURROGATE },
2492         { 0xdffc, UCHAR_LINE_BREAK, U_LB_SURROGATE },
2493         { 0x2762, UCHAR_LINE_BREAK, U_LB_EXCLAMATION },
2494         { 0x002F, UCHAR_LINE_BREAK, U_LB_BREAK_SYMBOLS },
2495         { 0x1D49C, UCHAR_LINE_BREAK, U_LB_ALPHABETIC },
2496         { 0x1731, UCHAR_LINE_BREAK, U_LB_ALPHABETIC },
2497
2498         /* UCHAR_NUMERIC_TYPE tested in TestNumericProperties() */
2499
2500         /* UCHAR_SCRIPT tested in TestUScriptCodeAPI() */
2501
2502         { 0x10ff, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2503         { 0x1100, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2504         { 0x1111, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2505         { 0x1159, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2506         { 0x115a, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
2507         { 0x115e, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
2508         { 0x115f, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2509
2510         { 0xa95f, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2511         { 0xa960, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
2512         { 0xa97c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
2513         { 0xa97d, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2514
2515         { 0x1160, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2516         { 0x1161, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2517         { 0x1172, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2518         { 0x11a2, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2519         { 0x11a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
2520         { 0x11a7, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
2521
2522         { 0xd7af, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2523         { 0xd7b0, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
2524         { 0xd7c6, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
2525         { 0xd7c7, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2526
2527         { 0x11a8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2528         { 0x11b8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2529         { 0x11c8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2530         { 0x11f9, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2531         { 0x11fa, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
2532         { 0x11ff, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
2533         { 0x1200, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2534
2535         { 0xd7ca, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2536         { 0xd7cb, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
2537         { 0xd7fb, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
2538         { 0xd7fc, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2539
2540         { 0xac00, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2541         { 0xac1c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2542         { 0xc5ec, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2543         { 0xd788, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2544
2545         { 0xac01, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2546         { 0xac1b, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2547         { 0xac1d, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2548         { 0xc5ee, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2549         { 0xd7a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2550
2551         { 0xd7a4, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2552
2553         { -1, 0x410, 0 }, /* version break for Unicode 4.1 */
2554
2555         { 0x00d7, UCHAR_PATTERN_SYNTAX, TRUE },
2556         { 0xfe45, UCHAR_PATTERN_SYNTAX, TRUE },
2557         { 0x0061, UCHAR_PATTERN_SYNTAX, FALSE },
2558
2559         { 0x0020, UCHAR_PATTERN_WHITE_SPACE, TRUE },
2560         { 0x0085, UCHAR_PATTERN_WHITE_SPACE, TRUE },
2561         { 0x200f, UCHAR_PATTERN_WHITE_SPACE, TRUE },
2562         { 0x00a0, UCHAR_PATTERN_WHITE_SPACE, FALSE },
2563         { 0x3000, UCHAR_PATTERN_WHITE_SPACE, FALSE },
2564
2565         { 0x1d200, UCHAR_BLOCK, UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION },
2566         { 0x2c8e,  UCHAR_BLOCK, UBLOCK_COPTIC },
2567         { 0xfe17,  UCHAR_BLOCK, UBLOCK_VERTICAL_FORMS },
2568
2569         { 0x1a00,  UCHAR_SCRIPT, USCRIPT_BUGINESE },
2570         { 0x2cea,  UCHAR_SCRIPT, USCRIPT_COPTIC },
2571         { 0xa82b,  UCHAR_SCRIPT, USCRIPT_SYLOTI_NAGRI },
2572         { 0x103d0, UCHAR_SCRIPT, USCRIPT_OLD_PERSIAN },
2573
2574         { 0xcc28, UCHAR_LINE_BREAK, U_LB_H2 },
2575         { 0xcc29, UCHAR_LINE_BREAK, U_LB_H3 },
2576         { 0xac03, UCHAR_LINE_BREAK, U_LB_H3 },
2577         { 0x115f, UCHAR_LINE_BREAK, U_LB_JL },
2578         { 0x11aa, UCHAR_LINE_BREAK, U_LB_JT },
2579         { 0x11a1, UCHAR_LINE_BREAK, U_LB_JV },
2580
2581         { 0xb2c9, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_LVT },
2582         { 0x036f, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_EXTEND },
2583         { 0x0000, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_CONTROL },
2584         { 0x1160, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_V },
2585
2586         { 0x05f4, UCHAR_WORD_BREAK, U_WB_MIDLETTER },
2587         { 0x4ef0, UCHAR_WORD_BREAK, U_WB_OTHER },
2588         { 0x19d9, UCHAR_WORD_BREAK, U_WB_NUMERIC },
2589         { 0x2044, UCHAR_WORD_BREAK, U_WB_MIDNUM },
2590
2591         { 0xfffd, UCHAR_SENTENCE_BREAK, U_SB_OTHER },
2592         { 0x1ffc, UCHAR_SENTENCE_BREAK, U_SB_UPPER },
2593         { 0xff63, UCHAR_SENTENCE_BREAK, U_SB_CLOSE },
2594         { 0x2028, UCHAR_SENTENCE_BREAK, U_SB_SEP },
2595
2596         { -1, 0x520, 0 }, /* version break for Unicode 5.2 */
2597
2598         /* unassigned code points in new default Bidi R blocks */
2599         { 0x1ede4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2600         { 0x1efe4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2601
2602         /* test some script codes >127 */
2603         { 0xa6e6,  UCHAR_SCRIPT, USCRIPT_BAMUM },
2604         { 0xa4d0,  UCHAR_SCRIPT, USCRIPT_LISU },
2605         { 0x10a7f,  UCHAR_SCRIPT, USCRIPT_OLD_SOUTH_ARABIAN },
2606
2607         { -1, 0x600, 0 }, /* version break for Unicode 6.0 */
2608
2609         /* value changed in Unicode 6.0 */
2610         { 0x06C3, UCHAR_JOINING_GROUP, U_JG_TEH_MARBUTA_GOAL },
2611
2612         { -1, 0x610, 0 }, /* version break for Unicode 6.1 */
2613
2614         /* unassigned code points in new/changed default Bidi AL blocks */
2615         { 0x08ba, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2616         { 0x1eee4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2617
2618         /* undefined UProperty values */
2619         { 0x61, 0x4a7, 0 },
2620         { 0x234bc, 0x15ed, 0 }
2621     };
2622
2623     UVersionInfo version;
2624     UChar32 c;
2625     int32_t i, result, uVersion;
2626     UProperty which;
2627
2628     /* what is our Unicode version? */
2629     u_getUnicodeVersion(version);
2630     uVersion=((int32_t)version[0]<<8)|(version[1]<<4)|version[2]; /* major/minor/update version numbers */
2631
2632     u_charAge(0x20, version);
2633     if(version[0]==0) {
2634         /* no additional properties available */
2635         log_err("TestAdditionalProperties: no additional properties available, not tested\n");
2636         return;
2637     }
2638
2639     /* test u_charAge() */
2640     for(i=0; i<sizeof(charAges)/sizeof(charAges[0]); ++i) {
2641         u_charAge(charAges[i].c, version);
2642         if(0!=memcmp(version, charAges[i].version, sizeof(UVersionInfo))) {
2643             log_err("error: u_charAge(U+%04lx)={ %u, %u, %u, %u } instead of { %u, %u, %u, %u }\n",
2644                 charAges[i].c,
2645                 version[0], version[1], version[2], version[3],
2646                 charAges[i].version[0], charAges[i].version[1], charAges[i].version[2], charAges[i].version[3]);
2647         }
2648     }
2649
2650     if( u_getIntPropertyMinValue(UCHAR_DASH)!=0 ||
2651         u_getIntPropertyMinValue(UCHAR_BIDI_CLASS)!=0 ||
2652         u_getIntPropertyMinValue(UCHAR_BLOCK)!=0 ||   /* j2478 */
2653         u_getIntPropertyMinValue(UCHAR_SCRIPT)!=0 || /*JB#2410*/
2654         u_getIntPropertyMinValue(0x2345)!=0
2655     ) {
2656         log_err("error: u_getIntPropertyMinValue() wrong\n");
2657     }
2658     if( u_getIntPropertyMaxValue(UCHAR_DASH)!=1) {
2659         log_err("error: u_getIntPropertyMaxValue(UCHAR_DASH) wrong\n");
2660     }
2661     if( u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE)!=1) {
2662         log_err("error: u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE) wrong\n");
2663     }
2664     if( u_getIntPropertyMaxValue((UProperty)(UCHAR_BINARY_LIMIT-1))!=1) {
2665         log_err("error: u_getIntPropertyMaxValue(UCHAR_BINARY_LIMIT-1) wrong\n");
2666     }
2667     if( u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)!=(int32_t)U_CHAR_DIRECTION_COUNT-1 ) {
2668         log_err("error: u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS) wrong\n");
2669     }
2670     if( u_getIntPropertyMaxValue(UCHAR_BLOCK)!=(int32_t)UBLOCK_COUNT-1 ) {
2671         log_err("error: u_getIntPropertyMaxValue(UCHAR_BLOCK) wrong\n");
2672     }
2673     if(u_getIntPropertyMaxValue(UCHAR_LINE_BREAK)!=(int32_t)U_LB_COUNT-1) {
2674         log_err("error: u_getIntPropertyMaxValue(UCHAR_LINE_BREAK) wrong\n");
2675     }
2676     if(u_getIntPropertyMaxValue(UCHAR_SCRIPT)!=(int32_t)USCRIPT_CODE_LIMIT-1) {
2677         log_err("error: u_getIntPropertyMaxValue(UCHAR_SCRIPT) wrong\n");
2678     }
2679     if(u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE)!=(int32_t)U_NT_COUNT-1) {
2680         log_err("error: u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE) wrong\n");
2681     }
2682     if(u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY)!=(int32_t)U_CHAR_CATEGORY_COUNT-1) {
2683         log_err("error: u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY) wrong\n");
2684     }
2685     if(u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE)!=(int32_t)U_HST_COUNT-1) {
2686         log_err("error: u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE) wrong\n");
2687     }
2688     if(u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK)!=(int32_t)U_GCB_COUNT-1) {
2689         log_err("error: u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK) wrong\n");
2690     }
2691     if(u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK)!=(int32_t)U_SB_COUNT-1) {
2692         log_err("error: u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK) wrong\n");
2693     }
2694     if(u_getIntPropertyMaxValue(UCHAR_WORD_BREAK)!=(int32_t)U_WB_COUNT-1) {
2695         log_err("error: u_getIntPropertyMaxValue(UCHAR_WORD_BREAK) wrong\n");
2696     }
2697     /*JB#2410*/
2698     if( u_getIntPropertyMaxValue(0x2345)!=-1) {
2699         log_err("error: u_getIntPropertyMaxValue(0x2345) wrong\n");
2700     }
2701     if( u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) != (int32_t) (U_DT_COUNT - 1)) {
2702         log_err("error: u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) wrong\n");
2703     }
2704     if( u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) !=  (int32_t) (U_JG_COUNT -1)) {
2705         log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) wrong\n");
2706     }
2707     if( u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) != (int32_t) (U_JT_COUNT -1)) {
2708         log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) wrong\n");
2709     }
2710     if( u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) != (int32_t) (U_EA_COUNT -1)) {
2711         log_err("error: u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) wrong\n");
2712     }
2713
2714     /* test u_hasBinaryProperty() and u_getIntPropertyValue() */
2715     for(i=0; i<sizeof(props)/sizeof(props[0]); ++i) {
2716         const char *whichName;
2717
2718         if(props[i][0]<0) {
2719             /* Unicode version break */
2720             if(uVersion<props[i][1]) {
2721                 break; /* do not test properties that are not yet supported */
2722             } else {
2723                 continue; /* skip this row */
2724             }
2725         }
2726
2727         c=(UChar32)props[i][0];
2728         which=(UProperty)props[i][1];
2729         whichName=u_getPropertyName(which, U_LONG_PROPERTY_NAME);
2730
2731         if(which<UCHAR_INT_START) {
2732             result=u_hasBinaryProperty(c, which);
2733             if(result!=props[i][2]) {
2734                 log_data_err("error: u_hasBinaryProperty(U+%04lx, %s)=%d is wrong (props[%d]) - (Are you missing data?)\n",
2735                         c, whichName, result, i);
2736             }
2737         }
2738
2739         result=u_getIntPropertyValue(c, which);
2740         if(result!=props[i][2]) {
2741             log_data_err("error: u_getIntPropertyValue(U+%04lx, %s)=%d is wrong, should be %d (props[%d]) - (Are you missing data?)\n",
2742                     c, whichName, result, props[i][2], i);
2743         }
2744
2745         /* test separate functions, too */
2746         switch((UProperty)props[i][1]) {
2747         case UCHAR_ALPHABETIC:
2748             if(u_isUAlphabetic((UChar32)props[i][0])!=(UBool)props[i][2]) {
2749                 log_err("error: u_isUAlphabetic(U+%04lx)=%d is wrong (props[%d])\n",
2750                         props[i][0], result, i);
2751             }
2752             break;
2753         case UCHAR_LOWERCASE:
2754             if(u_isULowercase((UChar32)props[i][0])!=(UBool)props[i][2]) {
2755                 log_err("error: u_isULowercase(U+%04lx)=%d is wrong (props[%d])\n",
2756                         props[i][0], result, i);
2757             }
2758             break;
2759         case UCHAR_UPPERCASE:
2760             if(u_isUUppercase((UChar32)props[i][0])!=(UBool)props[i][2]) {
2761                 log_err("error: u_isUUppercase(U+%04lx)=%d is wrong (props[%d])\n",
2762                         props[i][0], result, i);
2763             }
2764             break;
2765         case UCHAR_WHITE_SPACE:
2766             if(u_isUWhiteSpace((UChar32)props[i][0])!=(UBool)props[i][2]) {
2767                 log_err("error: u_isUWhiteSpace(U+%04lx)=%d is wrong (props[%d])\n",
2768                         props[i][0], result, i);
2769             }
2770             break;
2771         default:
2772             break;
2773         }
2774     }
2775 }
2776
2777 static void
2778 TestNumericProperties(void) {
2779     /* see UnicodeData.txt, DerivedNumericValues.txt */
2780     static const struct {
2781         UChar32 c;
2782         int32_t type;
2783         double numValue;
2784     } values[]={
2785         { 0x12456, U_NT_NUMERIC, -1. },
2786         { 0x12457, U_NT_NUMERIC, -1. },
2787         { 0x0F33, U_NT_NUMERIC, -1./2. },
2788         { 0x0C66, U_NT_DECIMAL, 0 },
2789         { 0x96f6, U_NT_NUMERIC, 0 },
2790         { 0xa833, U_NT_NUMERIC, 1./16. },
2791         { 0x2152, U_NT_NUMERIC, 1./10. },
2792         { 0x2151, U_NT_NUMERIC, 1./9. },
2793         { 0x1245f, U_NT_NUMERIC, 1./8. },
2794         { 0x2150, U_NT_NUMERIC, 1./7. },
2795         { 0x2159, U_NT_NUMERIC, 1./6. },
2796         { 0x09f6, U_NT_NUMERIC, 3./16. },
2797         { 0x2155, U_NT_NUMERIC, 1./5. },
2798         { 0x00BD, U_NT_NUMERIC, 1./2. },
2799         { 0x0031, U_NT_DECIMAL, 1. },
2800         { 0x4e00, U_NT_NUMERIC, 1. },
2801         { 0x58f1, U_NT_NUMERIC, 1. },
2802         { 0x10320, U_NT_NUMERIC, 1. },
2803         { 0x0F2B, U_NT_NUMERIC, 3./2. },
2804         { 0x00B2, U_NT_DIGIT, 2. },
2805         { 0x5f10, U_NT_NUMERIC, 2. },
2806         { 0x1813, U_NT_DECIMAL, 3. },
2807         { 0x5f0e, U_NT_NUMERIC, 3. },
2808         { 0x2173, U_NT_NUMERIC, 4. },
2809         { 0x8086, U_NT_NUMERIC, 4. },
2810         { 0x278E, U_NT_DIGIT, 5. },
2811         { 0x1D7F2, U_NT_DECIMAL, 6. },
2812         { 0x247A, U_NT_DIGIT, 7. },
2813         { 0x7396, U_NT_NUMERIC, 9. },
2814         { 0x1372, U_NT_NUMERIC, 10. },
2815         { 0x216B, U_NT_NUMERIC, 12. },
2816         { 0x16EE, U_NT_NUMERIC, 17. },
2817         { 0x249A, U_NT_NUMERIC, 19. },
2818         { 0x303A, U_NT_NUMERIC, 30. },
2819         { 0x5345, U_NT_NUMERIC, 30. },
2820         { 0x32B2, U_NT_NUMERIC, 37. },
2821         { 0x1375, U_NT_NUMERIC, 40. },
2822         { 0x10323, U_NT_NUMERIC, 50. },
2823         { 0x0BF1, U_NT_NUMERIC, 100. },
2824         { 0x964c, U_NT_NUMERIC, 100. },
2825         { 0x217E, U_NT_NUMERIC, 500. },
2826         { 0x2180, U_NT_NUMERIC, 1000. },
2827         { 0x4edf, U_NT_NUMERIC, 1000. },
2828         { 0x2181, U_NT_NUMERIC, 5000. },
2829         { 0x137C, U_NT_NUMERIC, 10000. },
2830         { 0x4e07, U_NT_NUMERIC, 10000. },
2831         { 0x12432, U_NT_NUMERIC, 216000. },
2832         { 0x12433, U_NT_NUMERIC, 432000. },
2833         { 0x4ebf, U_NT_NUMERIC, 100000000. },
2834         { 0x5146, U_NT_NUMERIC, 1000000000000. },
2835         { -1, U_NT_NONE, U_NO_NUMERIC_VALUE },
2836         { 0x61, U_NT_NONE, U_NO_NUMERIC_VALUE },
2837         { 0x3000, U_NT_NONE, U_NO_NUMERIC_VALUE },
2838         { 0xfffe, U_NT_NONE, U_NO_NUMERIC_VALUE },
2839         { 0x10301, U_NT_NONE, U_NO_NUMERIC_VALUE },
2840         { 0xe0033, U_NT_NONE, U_NO_NUMERIC_VALUE },
2841         { 0x10ffff, U_NT_NONE, U_NO_NUMERIC_VALUE },
2842         { 0x110000, U_NT_NONE, U_NO_NUMERIC_VALUE }
2843     };
2844
2845     double nv;
2846     UChar32 c;
2847     int32_t i, type;
2848
2849     for(i=0; i<LENGTHOF(values); ++i) {
2850         c=values[i].c;
2851         type=u_getIntPropertyValue(c, UCHAR_NUMERIC_TYPE);
2852         nv=u_getNumericValue(c);
2853
2854         if(type!=values[i].type) {
2855             log_err("UCHAR_NUMERIC_TYPE(U+%04lx)=%d should be %d\n", c, type, values[i].type);
2856         }
2857         if(0.000001 <= fabs(nv - values[i].numValue)) {
2858             log_err("u_getNumericValue(U+%04lx)=%g should be %g\n", c, nv, values[i].numValue);
2859         }
2860     }
2861 }
2862
2863 /**
2864  * Test the property names and property value names API.
2865  */
2866 static void
2867 TestPropertyNames(void) {
2868     int32_t p, v, choice=0, rev;
2869     UBool atLeastSomething = FALSE;
2870
2871     for (p=0; ; ++p) {
2872         UProperty propEnum = (UProperty)p;
2873         UBool sawProp = FALSE;
2874         if(p > 10 && !atLeastSomething) {
2875           log_data_err("Never got anything after 10 tries.\nYour data is probably fried. Quitting this test\n", p, choice);
2876           return;
2877         }
2878
2879         for (choice=0; ; ++choice) {
2880             const char* name = u_getPropertyName(propEnum, (UPropertyNameChoice)choice);
2881             if (name) {
2882                 if (!sawProp)
2883                     log_verbose("prop 0x%04x+%2d:", p&~0xfff, p&0xfff);
2884                 log_verbose("%d=\"%s\"", choice, name);
2885                 sawProp = TRUE;
2886                 atLeastSomething = TRUE;
2887
2888                 /* test reverse mapping */
2889                 rev = u_getPropertyEnum(name);
2890                 if (rev != p) {
2891                     log_err("Property round-trip failure: %d -> %s -> %d\n",
2892                             p, name, rev);
2893                 }
2894             }
2895             if (!name && choice>0) break;
2896         }
2897         if (sawProp) {
2898             /* looks like a valid property; check the values */
2899             const char* pname = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME);
2900             int32_t max = 0;
2901             if (p == UCHAR_CANONICAL_COMBINING_CLASS) {
2902                 max = 255;
2903             } else if (p == UCHAR_GENERAL_CATEGORY_MASK) {
2904                 /* it's far too slow to iterate all the way up to
2905                    the real max, U_GC_P_MASK */
2906                 max = U_GC_NL_MASK;
2907             } else if (p == UCHAR_BLOCK) {
2908                 /* UBlockCodes, unlike other values, start at 1 */
2909                 max = 1;
2910             }
2911             log_verbose("\n");
2912             for (v=-1; ; ++v) {
2913                 UBool sawValue = FALSE;
2914                 for (choice=0; ; ++choice) {
2915                     const char* vname = u_getPropertyValueName(propEnum, v, (UPropertyNameChoice)choice);
2916                     if (vname) {
2917                         if (!sawValue) log_verbose(" %s, value %d:", pname, v);
2918                         log_verbose("%d=\"%s\"", choice, vname);
2919                         sawValue = TRUE;
2920
2921                         /* test reverse mapping */
2922                         rev = u_getPropertyValueEnum(propEnum, vname);
2923                         if (rev != v) {
2924                             log_err("Value round-trip failure (%s): %d -> %s -> %d\n",
2925                                     pname, v, vname, rev);
2926                         }
2927                     }
2928                     if (!vname && choice>0) break;
2929                 }
2930                 if (sawValue) {
2931                     log_verbose("\n");
2932                 }
2933                 if (!sawValue && v>=max) break;
2934             }
2935         }
2936         if (!sawProp) {
2937             if (p>=UCHAR_STRING_LIMIT) {
2938                 break;
2939             } else if (p>=UCHAR_DOUBLE_LIMIT) {
2940                 p = UCHAR_STRING_START - 1;
2941             } else if (p>=UCHAR_MASK_LIMIT) {
2942                 p = UCHAR_DOUBLE_START - 1;
2943             } else if (p>=UCHAR_INT_LIMIT) {
2944                 p = UCHAR_MASK_START - 1;
2945             } else if (p>=UCHAR_BINARY_LIMIT) {
2946                 p = UCHAR_INT_START - 1;
2947             }
2948         }
2949     }
2950 }
2951
2952 /**
2953  * Test the property values API.  See JB#2410.
2954  */
2955 static void
2956 TestPropertyValues(void) {
2957     int32_t i, p, min, max;
2958     UErrorCode ec;
2959
2960     /* Min should be 0 for everything. */
2961     /* Until JB#2478 is fixed, the one exception is UCHAR_BLOCK. */
2962     for (p=UCHAR_INT_START; p<UCHAR_INT_LIMIT; ++p) {
2963         UProperty propEnum = (UProperty)p;
2964         min = u_getIntPropertyMinValue(propEnum);
2965         if (min != 0) {
2966             if (p == UCHAR_BLOCK) {
2967                 /* This is okay...for now.  See JB#2487.
2968                    TODO Update this for JB#2487. */
2969             } else {
2970                 const char* name;
2971                 name = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME);
2972                 if (name == NULL)
2973                     name = "<ERROR>";
2974                 log_err("FAIL: u_getIntPropertyMinValue(%s) = %d, exp. 0\n",
2975                         name, min);
2976             }
2977         }
2978     }
2979
2980     if( u_getIntPropertyMinValue(UCHAR_GENERAL_CATEGORY_MASK)!=0 ||
2981         u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY_MASK)!=-1) {
2982         log_err("error: u_getIntPropertyMin/MaxValue(UCHAR_GENERAL_CATEGORY_MASK) is wrong\n");
2983     }
2984
2985     /* Max should be -1 for invalid properties. */
2986     max = u_getIntPropertyMaxValue(UCHAR_INVALID_CODE);
2987     if (max != -1) {
2988         log_err("FAIL: u_getIntPropertyMaxValue(-1) = %d, exp. -1\n",
2989                 max);
2990     }
2991
2992     /* Script should return USCRIPT_INVALID_CODE for an invalid code point. */
2993     for (i=0; i<2; ++i) {
2994         int32_t script;
2995         const char* desc;
2996         ec = U_ZERO_ERROR;
2997         switch (i) {
2998         case 0:
2999             script = uscript_getScript(-1, &ec);
3000             desc = "uscript_getScript(-1)";
3001             break;
3002         case 1:
3003             script = u_getIntPropertyValue(-1, UCHAR_SCRIPT);
3004             desc = "u_getIntPropertyValue(-1, UCHAR_SCRIPT)";
3005             break;
3006         default:
3007             log_err("Internal test error. Too many scripts\n");
3008             return;
3009         }
3010         /* We don't explicitly test ec.  It should be U_FAILURE but it
3011            isn't documented as such. */
3012         if (script != (int32_t)USCRIPT_INVALID_CODE) {
3013             log_err("FAIL: %s = %d, exp. 0\n",
3014                     desc, script);
3015         }
3016     }
3017 }
3018
3019 /* various tests for consistency of UCD data and API behavior */
3020 static void
3021 TestConsistency() {
3022     char buffer[300];
3023     USet *set1, *set2, *set3, *set4;
3024     UErrorCode errorCode;
3025
3026     UChar32 start, end;
3027     int32_t i, length;
3028
3029     U_STRING_DECL(hyphenPattern, "[:Hyphen:]", 10);
3030     U_STRING_DECL(dashPattern, "[:Dash:]", 8);
3031     U_STRING_DECL(lowerPattern, "[:Lowercase:]", 13);
3032     U_STRING_DECL(formatPattern, "[:Cf:]", 6);
3033     U_STRING_DECL(alphaPattern, "[:Alphabetic:]", 14);
3034
3035     U_STRING_DECL(mathBlocksPattern,
3036         "[[:block=Mathematical Operators:][:block=Miscellaneous Mathematical Symbols-A:][:block=Miscellaneous Mathematical Symbols-B:][:block=Supplemental Mathematical Operators:][:block=Mathematical Alphanumeric Symbols:]]",
3037         1+32+46+46+45+43+1+1); /* +1 for NUL */
3038     U_STRING_DECL(mathPattern, "[:Math:]", 8);
3039     U_STRING_DECL(unassignedPattern, "[:Cn:]", 6);
3040     U_STRING_DECL(unknownPattern, "[:sc=Unknown:]", 14);
3041     U_STRING_DECL(reservedPattern, "[[:Cn:][:Co:][:Cs:]]", 20);
3042
3043     U_STRING_INIT(hyphenPattern, "[:Hyphen:]", 10);
3044     U_STRING_INIT(dashPattern, "[:Dash:]", 8);
3045     U_STRING_INIT(lowerPattern, "[:Lowercase:]", 13);
3046     U_STRING_INIT(formatPattern, "[:Cf:]", 6);
3047     U_STRING_INIT(alphaPattern, "[:Alphabetic:]", 14);
3048
3049     U_STRING_INIT(mathBlocksPattern,
3050         "[[:block=Mathematical Operators:][:block=Miscellaneous Mathematical Symbols-A:][:block=Miscellaneous Mathematical Symbols-B:][:block=Supplemental Mathematical Operators:][:block=Mathematical Alphanumeric Symbols:]]",
3051         1+32+46+46+45+43+1+1); /* +1 for NUL */
3052     U_STRING_INIT(mathPattern, "[:Math:]", 8);
3053     U_STRING_INIT(unassignedPattern, "[:Cn:]", 6);
3054     U_STRING_INIT(unknownPattern, "[:sc=Unknown:]", 14);
3055     U_STRING_INIT(reservedPattern, "[[:Cn:][:Co:][:Cs:]]", 20);
3056
3057     /*
3058      * It used to be that UCD.html and its precursors said
3059      * "Those dashes used to mark connections between pieces of words,
3060      *  plus the Katakana middle dot."
3061      *
3062      * Unicode 4 changed 00AD Soft Hyphen to Cf and removed it from Dash
3063      * but not from Hyphen.
3064      * UTC 94 (2003mar) decided to leave it that way and to change UCD.html.
3065      * Therefore, do not show errors when testing the Hyphen property.
3066      */
3067     log_verbose("Starting with Unicode 4, inconsistencies with [:Hyphen:] are\n"
3068                 "known to the UTC and not considered errors.\n");
3069
3070     errorCode=U_ZERO_ERROR;
3071     set1=uset_openPattern(hyphenPattern, 10, &errorCode);
3072     set2=uset_openPattern(dashPattern, 8, &errorCode);
3073     if(U_SUCCESS(errorCode)) {
3074         /* remove the Katakana middle dot(s) from set1 */
3075         uset_remove(set1, 0x30fb);
3076         uset_remove(set1, 0xff65); /* halfwidth variant */
3077         showAMinusB(set1, set2, "[:Hyphen:]", "[:Dash:]", FALSE);
3078     } else {
3079         log_data_err("error opening [:Hyphen:] or [:Dash:] - %s (Are you missing data?)\n", u_errorName(errorCode));
3080     }
3081
3082     /* check that Cf is neither Hyphen nor Dash nor Alphabetic */
3083     set3=uset_openPattern(formatPattern, 6, &errorCode);
3084     set4=uset_openPattern(alphaPattern, 14, &errorCode);
3085     if(U_SUCCESS(errorCode)) {
3086         showAIntersectB(set3, set1, "[:Cf:]", "[:Hyphen:]", FALSE);
3087         showAIntersectB(set3, set2, "[:Cf:]", "[:Dash:]", TRUE);
3088         showAIntersectB(set3, set4, "[:Cf:]", "[:Alphabetic:]", TRUE);
3089     } else {
3090         log_data_err("error opening [:Cf:] or [:Alpbabetic:] - %s (Are you missing data?)\n", u_errorName(errorCode));
3091     }
3092
3093     uset_close(set1);
3094     uset_close(set2);
3095     uset_close(set3);
3096     uset_close(set4);
3097
3098     /*
3099      * Check that each lowercase character has "small" in its name
3100      * and not "capital".
3101      * There are some such characters, some of which seem odd.
3102      * Use the verbose flag to see these notices.
3103      */
3104     errorCode=U_ZERO_ERROR;
3105     set1=uset_openPattern(lowerPattern, 13, &errorCode);
3106     if(U_SUCCESS(errorCode)) {
3107         for(i=0;; ++i) {
3108             length=uset_getItem(set1, i, &start, &end, NULL, 0, &errorCode);
3109             if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
3110                 break; /* done */
3111             }
3112             if(U_FAILURE(errorCode)) {
3113                 log_err("error iterating over [:Lowercase:] at item %d: %s\n",
3114                         i, u_errorName(errorCode));
3115                 break;
3116             }
3117             if(length!=0) {
3118                 break; /* done with code points, got a string or -1 */
3119             }
3120
3121             while(start<=end) {
3122                 length=u_charName(start, U_UNICODE_CHAR_NAME, buffer, sizeof(buffer), &errorCode);
3123                 if(U_FAILURE(errorCode)) {
3124                     log_data_err("error getting the name of U+%04x - %s\n", start, u_errorName(errorCode));
3125                     errorCode=U_ZERO_ERROR;
3126                 }
3127                 if( (strstr(buffer, "SMALL")==NULL || strstr(buffer, "CAPITAL")!=NULL) &&
3128                     strstr(buffer, "SMALL CAPITAL")==NULL
3129                 ) {
3130                     log_verbose("info: [:Lowercase:] contains U+%04x whose name does not suggest lowercase: %s\n", start, buffer);
3131                 }
3132                 ++start;
3133             }
3134         }
3135     } else {
3136         log_data_err("error opening [:Lowercase:] - %s (Are you missing data?)\n", u_errorName(errorCode));
3137     }
3138     uset_close(set1);
3139
3140     /* verify that all assigned characters in Math blocks are exactly Math characters */
3141     errorCode=U_ZERO_ERROR;
3142     set1=uset_openPattern(mathBlocksPattern, -1, &errorCode);
3143     set2=uset_openPattern(mathPattern, 8, &errorCode);
3144     set3=uset_openPattern(unassignedPattern, 6, &errorCode);
3145     if(U_SUCCESS(errorCode)) {
3146         uset_retainAll(set2, set1); /* [math blocks]&[:Math:] */
3147         uset_complement(set3);      /* assigned characters */
3148         uset_retainAll(set1, set3); /* [math blocks]&[assigned] */
3149         compareUSets(set1, set2,
3150                      "[assigned Math block chars]", "[math blocks]&[:Math:]",
3151                      TRUE);
3152     } else {
3153         log_data_err("error opening [math blocks] or [:Math:] or [:Cn:] - %s (Are you missing data?)\n", u_errorName(errorCode));
3154     }
3155     uset_close(set1);
3156     uset_close(set2);
3157     uset_close(set3);
3158
3159     /* new in Unicode 5.0: exactly all unassigned+PUA+surrogate code points have script=Unknown */
3160     errorCode=U_ZERO_ERROR;
3161     set1=uset_openPattern(unknownPattern, 14, &errorCode);
3162     set2=uset_openPattern(reservedPattern, 20, &errorCode);
3163     if(U_SUCCESS(errorCode)) {
3164         compareUSets(set1, set2,
3165                      "[:sc=Unknown:]", "[[:Cn:][:Co:][:Cs:]]",
3166                      TRUE);
3167     } else {
3168         log_data_err("error opening [:sc=Unknown:] or [[:Cn:][:Co:][:Cs:]] - %s (Are you missing data?)\n", u_errorName(errorCode));
3169     }
3170     uset_close(set1);
3171     uset_close(set2);
3172 }
3173
3174 /*
3175  * Starting with ICU4C 3.4, the core Unicode properties files
3176  * (uprops.icu, ucase.icu, ubidi.icu, unorm.icu)
3177  * are hardcoded in the common DLL and therefore not included
3178  * in the data package any more.
3179  * Test requiring these files are disabled so that
3180  * we need not jump through hoops (like adding snapshots of these files
3181  * to testdata).
3182  * See Jitterbug 4497.
3183  */
3184 #define HARDCODED_DATA_4497 1
3185
3186 /* API coverage for ucase.c */
3187 static void TestUCase() {
3188 #if !HARDCODED_DATA_4497
3189     UDataMemory *pData;
3190     UCaseProps *csp;
3191     const UCaseProps *ccsp;
3192     UErrorCode errorCode;
3193
3194     /* coverage for ucase_openBinary() */
3195     errorCode=U_ZERO_ERROR;
3196     pData=udata_open(NULL, UCASE_DATA_TYPE, UCASE_DATA_NAME, &errorCode);
3197     if(U_FAILURE(errorCode)) {
3198         log_data_err("unable to open " UCASE_DATA_NAME "." UCASE_DATA_TYPE ": %s\n",
3199                     u_errorName(errorCode));
3200         return;
3201     }
3202
3203     csp=ucase_openBinary((const uint8_t *)pData->pHeader, -1, &errorCode);
3204     if(U_FAILURE(errorCode)) {
3205         log_err("ucase_openBinary() fails for the contents of " UCASE_DATA_NAME "." UCASE_DATA_TYPE ": %s\n",
3206                 u_errorName(errorCode));
3207         udata_close(pData);
3208         return;
3209     }
3210
3211     if(UCASE_LOWER!=ucase_getType(csp, 0xdf)) { /* verify islower(sharp s) */
3212         log_err("ucase_openBinary() does not seem to return working UCaseProps\n");
3213     }
3214
3215     ucase_close(csp);
3216     udata_close(pData);
3217
3218     /* coverage for ucase_getDummy() */
3219     errorCode=U_ZERO_ERROR;
3220     ccsp=ucase_getDummy(&errorCode);
3221     if(ucase_tolower(ccsp, 0x41)!=0x41) {
3222         log_err("ucase_tolower(dummy, A)!=A\n");
3223     }
3224 #endif
3225 }
3226
3227 /* API coverage for ubidi_props.c */
3228 static void TestUBiDiProps() {
3229 #if !HARDCODED_DATA_4497
3230     UDataMemory *pData;
3231     UBiDiProps *bdp;
3232     const UBiDiProps *cbdp;
3233     UErrorCode errorCode;
3234
3235     /* coverage for ubidi_openBinary() */
3236     errorCode=U_ZERO_ERROR;
3237     pData=udata_open(NULL, UBIDI_DATA_TYPE, UBIDI_DATA_NAME, &errorCode);
3238     if(U_FAILURE(errorCode)) {
3239         log_data_err("unable to open " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE ": %s\n",
3240                     u_errorName(errorCode));
3241         return;
3242     }
3243
3244     bdp=ubidi_openBinary((const uint8_t *)pData->pHeader, -1, &errorCode);
3245     if(U_FAILURE(errorCode)) {
3246         log_err("ubidi_openBinary() fails for the contents of " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE ": %s\n",
3247                 u_errorName(errorCode));
3248         udata_close(pData);
3249         return;
3250     }
3251
3252     if(0x2215!=ubidi_getMirror(bdp, 0x29F5)) { /* verify some data */
3253         log_err("ubidi_openBinary() does not seem to return working UBiDiProps\n");
3254     }
3255
3256     ubidi_closeProps(bdp);
3257     udata_close(pData);
3258
3259     /* coverage for ubidi_getDummy() */
3260     errorCode=U_ZERO_ERROR;
3261     cbdp=ubidi_getDummy(&errorCode);
3262     if(ubidi_getClass(cbdp, 0x20)!=0) {
3263         log_err("ubidi_getClass(dummy, space)!=0\n");
3264     }
3265 #endif
3266 }
3267
3268 /* test case folding, compare return values with CaseFolding.txt ------------ */
3269
3270 /* bit set for which case foldings for a character have been tested already */
3271 enum {
3272     CF_SIMPLE=1,
3273     CF_FULL=2,
3274     CF_TURKIC=4,
3275     CF_ALL=7
3276 };
3277
3278 static void
3279 testFold(UChar32 c, int which,
3280          UChar32 simple, UChar32 turkic,
3281          const UChar *full, int32_t fullLength,
3282          const UChar *turkicFull, int32_t turkicFullLength) {
3283     UChar s[2], t[32];
3284     UChar32 c2;
3285     int32_t length, length2;
3286
3287     UErrorCode errorCode=U_ZERO_ERROR;
3288
3289     length=0;
3290     U16_APPEND_UNSAFE(s, length, c);
3291
3292     if((which&CF_SIMPLE)!=0 && (c2=u_foldCase(c, 0))!=simple) {
3293         log_err("u_foldCase(U+%04lx, default)=U+%04lx != U+%04lx\n", (long)c, (long)c2, (long)simple);
3294     }
3295     if((which&CF_FULL)!=0) {
3296         length2=u_strFoldCase(t, LENGTHOF(t), s, length, 0, &errorCode);
3297         if(length2!=fullLength || 0!=u_memcmp(t, full, fullLength)) {
3298             log_err("u_strFoldCase(U+%04lx, default) does not fold properly\n", (long)c);
3299         }
3300     }
3301     if((which&CF_TURKIC)!=0) {
3302         if((c2=u_foldCase(c, U_FOLD_CASE_EXCLUDE_SPECIAL_I))!=turkic) {
3303             log_err("u_foldCase(U+%04lx, turkic)=U+%04lx != U+%04lx\n", (long)c, (long)c2, (long)simple);
3304         }
3305
3306         length2=u_strFoldCase(t, LENGTHOF(t), s, length, U_FOLD_CASE_EXCLUDE_SPECIAL_I, &errorCode);
3307         if(length2!=turkicFullLength || 0!=u_memcmp(t, turkicFull, length2)) {
3308             log_err("u_strFoldCase(U+%04lx, turkic) does not fold properly\n", (long)c);
3309         }
3310     }
3311 }
3312
3313 /* test that c case-folds to itself */
3314 static void
3315 testFoldToSelf(UChar32 c, int which) {
3316     UChar s[2];
3317     int32_t length;
3318
3319     length=0;
3320     U16_APPEND_UNSAFE(s, length, c);
3321     testFold(c, which, c, c, s, length, s, length);
3322 }
3323
3324 struct CaseFoldingData {
3325     USet *notSeen;
3326     UChar32 prev, prevSimple;
3327     UChar prevFull[32];
3328     int32_t prevFullLength;
3329     int which;
3330 };
3331 typedef struct CaseFoldingData CaseFoldingData;
3332
3333 static void U_CALLCONV
3334 caseFoldingLineFn(void *context,
3335                   char *fields[][2], int32_t fieldCount,
3336                   UErrorCode *pErrorCode) {
3337     CaseFoldingData *pData=(CaseFoldingData *)context;
3338     char *end;
3339     UChar full[32];
3340     UChar32 c, prev, simple;
3341     int32_t count;
3342     int which;
3343     char status;
3344
3345     /* get code point */
3346     const char *s=u_skipWhitespace(fields[0][0]);
3347     if(0==strncmp(s, "0000..10FFFF", 12)) {
3348         /*
3349          * Ignore the line
3350          * # @missing: 0000..10FFFF; C; <code point>
3351          * because maps-to-self is already our default, and this line breaks this parser.
3352          */
3353         return;
3354     }
3355     c=(UChar32)strtoul(s, &end, 16);
3356     end=(char *)u_skipWhitespace(end);
3357     if(end<=fields[0][0] || end!=fields[0][1]) {
3358         log_err("syntax error in CaseFolding.txt field 0 at %s\n", fields[0][0]);
3359         *pErrorCode=U_PARSE_ERROR;
3360         return;
3361     }
3362
3363     /* get the status of this mapping */
3364     status=*u_skipWhitespace(fields[1][0]);
3365     if(status!='C' && status!='S' && status!='F' && status!='T') {
3366         log_err("unrecognized status field in CaseFolding.txt at %s\n", fields[0][0]);
3367         *pErrorCode=U_PARSE_ERROR;
3368         return;
3369     }
3370
3371     /* get the mapping */
3372     count=u_parseString(fields[2][0], full, 32, (uint32_t *)&simple, pErrorCode);
3373     if(U_FAILURE(*pErrorCode)) {
3374         log_err("error parsing CaseFolding.txt mapping at %s\n", fields[0][0]);
3375         return;
3376     }
3377
3378     /* there is a simple mapping only if there is exactly one code point (count is in UChars) */
3379     if(count==0 || count>2 || (count==2 && U16_IS_SINGLE(full[1]))) {
3380         simple=c;
3381     }
3382
3383     if(c!=(prev=pData->prev)) {
3384         /*
3385          * Test remaining mappings for the previous code point.
3386          * If a turkic folding was not mentioned, then it should fold the same
3387          * as the regular simple case folding.
3388          */
3389         UChar prevString[2];
3390         int32_t length;
3391
3392         length=0;
3393         U16_APPEND_UNSAFE(prevString, length, prev);
3394         testFold(prev, (~pData->which)&CF_ALL,
3395                  prev, pData->prevSimple,
3396                  prevString, length,
3397                  pData->prevFull, pData->prevFullLength);
3398         pData->prev=pData->prevSimple=c;
3399         length=0;
3400         U16_APPEND_UNSAFE(pData->prevFull, length, c);
3401         pData->prevFullLength=length;
3402         pData->which=0;
3403     }
3404
3405     /*
3406      * Turn the status into a bit set of case foldings to test.
3407      * Remember non-Turkic case foldings as defaults for Turkic mode.
3408      */
3409     switch(status) {
3410     case 'C':
3411         which=CF_SIMPLE|CF_FULL;
3412         pData->prevSimple=simple;
3413         u_memcpy(pData->prevFull, full, count);
3414         pData->prevFullLength=count;
3415         break;
3416     case 'S':
3417         which=CF_SIMPLE;
3418         pData->prevSimple=simple;
3419         break;
3420     case 'F':
3421         which=CF_FULL;
3422         u_memcpy(pData->prevFull, full, count);
3423         pData->prevFullLength=count;
3424         break;
3425     case 'T':
3426         which=CF_TURKIC;
3427         break;
3428     default:
3429         which=0;
3430         break; /* won't happen because of test above */
3431     }
3432
3433     testFold(c, which, simple, simple, full, count, full, count);
3434
3435     /* remember which case foldings of c have been tested */
3436     pData->which|=which;
3437
3438     /* remove c from the set of ones not mentioned in CaseFolding.txt */
3439     uset_remove(pData->notSeen, c);
3440 }
3441
3442 static void
3443 TestCaseFolding() {
3444     CaseFoldingData data={ NULL };
3445     char *fields[3][2];
3446     UErrorCode errorCode;
3447
3448     static char *lastLine= (char *)"10FFFF; C; 10FFFF;";
3449
3450     errorCode=U_ZERO_ERROR;
3451     /* test BMP & plane 1 - nothing interesting above */
3452     data.notSeen=uset_open(0, 0x1ffff);
3453     data.prevFullLength=1; /* length of full case folding of U+0000 */
3454
3455     parseUCDFile("CaseFolding.txt", fields, 3, caseFoldingLineFn, &data, &errorCode);
3456     if(U_SUCCESS(errorCode)) {
3457         int32_t i, start, end;
3458
3459         /* add a pseudo-last line to finish testing of the actual last one */
3460         fields[0][0]=lastLine;
3461         fields[0][1]=lastLine+6;
3462         fields[1][0]=lastLine+7;
3463         fields[1][1]=lastLine+9;
3464         fields[2][0]=lastLine+10;
3465         fields[2][1]=lastLine+17;
3466         caseFoldingLineFn(&data, fields, 3, &errorCode);
3467
3468         /* verify that all code points that are not mentioned in CaseFolding.txt fold to themselves */
3469         for(i=0;
3470             0==uset_getItem(data.notSeen, i, &start, &end, NULL, 0, &errorCode) &&
3471                 U_SUCCESS(errorCode);
3472             ++i
3473         ) {
3474             do {
3475                 testFoldToSelf(start, CF_ALL);
3476             } while(++start<=end);
3477         }
3478     }
3479
3480     uset_close(data.notSeen);
3481 }