icuSources/test/cintltst/cucdtst.c

   1 // © 2016 and later: Unicode, Inc. and others.
   2 // License & terms of use: http://www.unicode.org/copyright.html
   3 /********************************************************************
   4  * COPYRIGHT:
   5  * Copyright (c) 1997-2016, International Business Machines Corporation and
   6  * others. All Rights Reserved.
   7  ********************************************************************/
   8 /*******************************************************************************
   9 *
  10 * File CUCDTST.C
  11 *
  12 * Modification History:
  13 *        Name                     Description
  14 *     Madhu Katragadda            Ported for C API, added tests for string functions
  15 ********************************************************************************
  16 */
  17
  18 #include <string.h>
  19 #include <math.h>
  20 #include <stdlib.h>
  21
  22 #include "unicode/utypes.h"
  23 #include "unicode/uchar.h"
  24 #include "unicode/putil.h"
  25 #include "unicode/ustring.h"
  26 #include "unicode/uloc.h"
  27 #include "unicode/unorm2.h"
  28 #include "unicode/utf16.h"
  29 #include "unicode/utf_old.h"
  30 #include "cintltst.h"
  31 #include "putilimp.h"
  32 #include "uparse.h"
  33 #include "ucase.h"
  34 #include "ubidi_props.h"
  35 #include "uprops.h"
  36 #include "uset_imp.h"
  37 #include "usc_impl.h"
  38 #include "udatamem.h"
  39 #include "cucdapi.h"
  40 #include "cmemory.h"
  41
  42 /* prototypes --------------------------------------------------------------- */
  43
  44 static void TestUpperLower(void);
  45 static void TestLetterNumber(void);
  46 static void TestMisc(void);
  47 static void TestPOSIX(void);
  48 static void TestControlPrint(void);
  49 static void TestIdentifier(void);
  50 static void TestUnicodeData(void);
  51 static void TestCodeUnit(void);
  52 static void TestCodePoint(void);
  53 static void TestCharLength(void);
  54 static void TestCharNames(void);
  55 static void TestUCharFromNameUnderflow(void);
  56 static void TestMirroring(void);
  57 static void TestUScriptRunAPI(void);
  58 static void TestAdditionalProperties(void);
  59 static void TestNumericProperties(void);
  60 static void TestPropertyNames(void);
  61 static void TestPropertyValues(void);
  62 static void TestConsistency(void);
  63 static void TestCaseFolding(void);
  64 static void TestBinaryCharacterPropertiesAPI(void);
  65 static void TestIntCharacterPropertiesAPI(void);
  66
  67 /* internal methods used */
  68 static int32_t MakeProp(char* str);
  69 static int32_t MakeDir(char* str);
  70
  71 /* helpers ------------------------------------------------------------------ */
  72
  73 static void
  74 parseUCDFile(const char *filename,
  75              char *fields[][2], int32_t fieldCount,
  76              UParseLineFn *lineFn, void *context,
  77              UErrorCode *pErrorCode) {
  78     char path[256];
  79     char backupPath[256];
  80
  81     if(U_FAILURE(*pErrorCode)) {
  82         return;
  83     }
  84
  85     /* Look inside ICU_DATA first */
  86     strcpy(path, u_getDataDirectory());
  87     strcat(path, ".." U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING);
  88     strcat(path, filename);
  89
  90     /* As a fallback, try to guess where the source data was located
  91      *    at the time ICU was built, and look there.
  92      */
  93     strcpy(backupPath, ctest_dataSrcDir());
  94     strcat(backupPath, U_FILE_SEP_STRING);
  95     strcat(backupPath, "unidata" U_FILE_SEP_STRING);
  96     strcat(backupPath, filename);
  97
  98     u_parseDelimitedFile(path, ';', fields, fieldCount, lineFn, context, pErrorCode);
  99     if(*pErrorCode==U_FILE_ACCESS_ERROR) {
 100         *pErrorCode=U_ZERO_ERROR;
 101         u_parseDelimitedFile(backupPath, ';', fields, fieldCount, lineFn, context, pErrorCode);
 102     }
 103     if(U_FAILURE(*pErrorCode)) {
 104         log_err_status(*pErrorCode, "error parsing %s: %s\n", filename, u_errorName(*pErrorCode));
 105     }
 106 }
 107
 108 /* test data ---------------------------------------------------------------- */
 109
 110 static const char tagStrings[] = "MnMcMeNdNlNoZsZlZpCcCfCsCoCnLuLlLtLmLoPcPdPsPePoSmScSkSoPiPf";
 111 static const int32_t tagValues[] =
 112     {
 113     /* Mn */ U_NON_SPACING_MARK,
 114     /* Mc */ U_COMBINING_SPACING_MARK,
 115     /* Me */ U_ENCLOSING_MARK,
 116     /* Nd */ U_DECIMAL_DIGIT_NUMBER,
 117     /* Nl */ U_LETTER_NUMBER,
 118     /* No */ U_OTHER_NUMBER,
 119     /* Zs */ U_SPACE_SEPARATOR,
 120     /* Zl */ U_LINE_SEPARATOR,
 121     /* Zp */ U_PARAGRAPH_SEPARATOR,
 122     /* Cc */ U_CONTROL_CHAR,
 123     /* Cf */ U_FORMAT_CHAR,
 124     /* Cs */ U_SURROGATE,
 125     /* Co */ U_PRIVATE_USE_CHAR,
 126     /* Cn */ U_UNASSIGNED,
 127     /* Lu */ U_UPPERCASE_LETTER,
 128     /* Ll */ U_LOWERCASE_LETTER,
 129     /* Lt */ U_TITLECASE_LETTER,
 130     /* Lm */ U_MODIFIER_LETTER,
 131     /* Lo */ U_OTHER_LETTER,
 132     /* Pc */ U_CONNECTOR_PUNCTUATION,
 133     /* Pd */ U_DASH_PUNCTUATION,
 134     /* Ps */ U_START_PUNCTUATION,
 135     /* Pe */ U_END_PUNCTUATION,
 136     /* Po */ U_OTHER_PUNCTUATION,
 137     /* Sm */ U_MATH_SYMBOL,
 138     /* Sc */ U_CURRENCY_SYMBOL,
 139     /* Sk */ U_MODIFIER_SYMBOL,
 140     /* So */ U_OTHER_SYMBOL,
 141     /* Pi */ U_INITIAL_PUNCTUATION,
 142     /* Pf */ U_FINAL_PUNCTUATION
 143     };
 144
 145 static const char dirStrings[][5] = {
 146     "L",
 147     "R",
 148     "EN",
 149     "ES",
 150     "ET",
 151     "AN",
 152     "CS",
 153     "B",
 154     "S",
 155     "WS",
 156     "ON",
 157     "LRE",
 158     "LRO",
 159     "AL",
 160     "RLE",
 161     "RLO",
 162     "PDF",
 163     "NSM",
 164     "BN",
 165     /* new in Unicode 6.3/ICU 52 */
 166     "FSI",
 167     "LRI",
 168     "RLI",
 169     "PDI"
 170 };
 171
 172 void addUnicodeTest(TestNode** root);
 173
 174 void addUnicodeTest(TestNode** root)
 175 {
 176     addTest(root, &TestCodeUnit, "tsutil/cucdtst/TestCodeUnit");
 177     addTest(root, &TestCodePoint, "tsutil/cucdtst/TestCodePoint");
 178     addTest(root, &TestCharLength, "tsutil/cucdtst/TestCharLength");
 179     addTest(root, &TestBinaryValues, "tsutil/cucdtst/TestBinaryValues");
 180     addTest(root, &TestUnicodeData, "tsutil/cucdtst/TestUnicodeData");
 181     addTest(root, &TestAdditionalProperties, "tsutil/cucdtst/TestAdditionalProperties");
 182     addTest(root, &TestNumericProperties, "tsutil/cucdtst/TestNumericProperties");
 183     addTest(root, &TestUpperLower, "tsutil/cucdtst/TestUpperLower");
 184     addTest(root, &TestLetterNumber, "tsutil/cucdtst/TestLetterNumber");
 185     addTest(root, &TestMisc, "tsutil/cucdtst/TestMisc");
 186     addTest(root, &TestPOSIX, "tsutil/cucdtst/TestPOSIX");
 187     addTest(root, &TestControlPrint, "tsutil/cucdtst/TestControlPrint");
 188     addTest(root, &TestIdentifier, "tsutil/cucdtst/TestIdentifier");
 189     addTest(root, &TestCharNames, "tsutil/cucdtst/TestCharNames");
 190     addTest(root, &TestUCharFromNameUnderflow, "tsutil/cucdtst/TestUCharFromNameUnderflow");
 191     addTest(root, &TestMirroring, "tsutil/cucdtst/TestMirroring");
 192     addTest(root, &TestUScriptCodeAPI, "tsutil/cucdtst/TestUScriptCodeAPI");
 193     addTest(root, &TestHasScript, "tsutil/cucdtst/TestHasScript");
 194     addTest(root, &TestGetScriptExtensions, "tsutil/cucdtst/TestGetScriptExtensions");
 195     addTest(root, &TestScriptMetadataAPI, "tsutil/cucdtst/TestScriptMetadataAPI");
 196     addTest(root, &TestUScriptRunAPI, "tsutil/cucdtst/TestUScriptRunAPI");
 197     addTest(root, &TestPropertyNames, "tsutil/cucdtst/TestPropertyNames");
 198     addTest(root, &TestPropertyValues, "tsutil/cucdtst/TestPropertyValues");
 199     addTest(root, &TestConsistency, "tsutil/cucdtst/TestConsistency");
 200     addTest(root, &TestCaseFolding, "tsutil/cucdtst/TestCaseFolding");
 201     addTest(root, &TestBinaryCharacterPropertiesAPI,
 202             "tsutil/cucdtst/TestBinaryCharacterPropertiesAPI");
 203     addTest(root, &TestIntCharacterPropertiesAPI,
 204             "tsutil/cucdtst/TestIntCharacterPropertiesAPI");
 205 }
 206
 207 /*==================================================== */
 208 /* test u_toupper() and u_tolower()                    */
 209 /*==================================================== */
 210 static void TestUpperLower()
 211 {
 212     const UChar upper[] = {0x41, 0x42, 0x00b2, 0x01c4, 0x01c6, 0x01c9, 0x01c8, 0x01c9, 0x000c, 0x0000};
 213     const UChar lower[] = {0x61, 0x62, 0x00b2, 0x01c6, 0x01c6, 0x01c9, 0x01c9, 0x01c9, 0x000c, 0x0000};
 214     U_STRING_DECL(upperTest, "abcdefg123hij.?:klmno", 21);
 215     U_STRING_DECL(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);
 216     int32_t i;
 217
 218     U_STRING_INIT(upperTest, "abcdefg123hij.?:klmno", 21);
 219     U_STRING_INIT(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);
 220
 221 /*
 222 Checks LetterLike Symbols which were previously a source of confusion
 223 [Bertrand A. D. 02/04/98]
 224 */
 225     for (i=0x2100;i<0x2138;i++)
 226     {
 227         /* Unicode 5.0 adds lowercase U+214E (TURNED SMALL F) to U+2132 (TURNED CAPITAL F) */
 228         if(i!=0x2126 && i!=0x212a && i!=0x212b && i!=0x2132)
 229         {
 230             if (i != (int)u_tolower(i)) /* itself */
 231                 log_err("Failed case conversion with itself: U+%04x\n", i);
 232             if (i != (int)u_toupper(i))
 233                 log_err("Failed case conversion with itself: U+%04x\n", i);
 234         }
 235     }
 236
 237     for(i=0; i < u_strlen(upper); i++){
 238         if(u_tolower(upper[i]) != lower[i]){
 239             log_err("FAILED u_tolower() for %lx Expected %lx Got %lx\n", upper[i], lower[i], u_tolower(upper[i]));
 240         }
 241     }
 242
 243     log_verbose("testing upper lower\n");
 244     for (i = 0; i < 21; i++) {
 245
 246         if (u_isalpha(upperTest[i]) && !u_islower(upperTest[i]))
 247         {
 248             log_err("Failed isLowerCase test at  %c\n", upperTest[i]);
 249         }
 250         else if (u_isalpha(lowerTest[i]) && !u_isupper(lowerTest[i]))
 251          {
 252             log_err("Failed isUpperCase test at %c\n", lowerTest[i]);
 253         }
 254         else if (upperTest[i] != u_tolower(lowerTest[i]))
 255         {
 256             log_err("Failed case conversion from %c  To %c :\n", lowerTest[i], upperTest[i]);
 257         }
 258         else if (lowerTest[i] != u_toupper(upperTest[i]))
 259          {
 260             log_err("Failed case conversion : %c To %c \n", upperTest[i], lowerTest[i]);
 261         }
 262         else if (upperTest[i] != u_tolower(upperTest[i]))
 263         {
 264             log_err("Failed case conversion with itself: %c\n", upperTest[i]);
 265         }
 266         else if (lowerTest[i] != u_toupper(lowerTest[i]))
 267         {
 268             log_err("Failed case conversion with itself: %c\n", lowerTest[i]);
 269         }
 270     }
 271     log_verbose("done testing upper lower\n");
 272
 273     log_verbose("testing u_istitle\n");
 274     {
 275         static const UChar expected[] = {
 276             0x1F88,
 277             0x1F89,
 278             0x1F8A,
 279             0x1F8B,
 280             0x1F8C,
 281             0x1F8D,
 282             0x1F8E,
 283             0x1F8F,
 284             0x1F88,
 285             0x1F89,
 286             0x1F8A,
 287             0x1F8B,
 288             0x1F8C,
 289             0x1F8D,
 290             0x1F8E,
 291             0x1F8F,
 292             0x1F98,
 293             0x1F99,
 294             0x1F9A,
 295             0x1F9B,
 296             0x1F9C,
 297             0x1F9D,
 298             0x1F9E,
 299             0x1F9F,
 300             0x1F98,
 301             0x1F99,
 302             0x1F9A,
 303             0x1F9B,
 304             0x1F9C,
 305             0x1F9D,
 306             0x1F9E,
 307             0x1F9F,
 308             0x1FA8,
 309             0x1FA9,
 310             0x1FAA,
 311             0x1FAB,
 312             0x1FAC,
 313             0x1FAD,
 314             0x1FAE,
 315             0x1FAF,
 316             0x1FA8,
 317             0x1FA9,
 318             0x1FAA,
 319             0x1FAB,
 320             0x1FAC,
 321             0x1FAD,
 322             0x1FAE,
 323             0x1FAF,
 324             0x1FBC,
 325             0x1FBC,
 326             0x1FCC,
 327             0x1FCC,
 328             0x1FFC,
 329             0x1FFC,
 330         };
 331         int32_t num = UPRV_LENGTHOF(expected);
 332         for(i=0; i<num; i++){
 333             if(!u_istitle(expected[i])){
 334                 log_err("u_istitle failed for 0x%4X. Expected TRUE, got FALSE\n",expected[i]);
 335             }
 336         }
 337
 338     }
 339 }
 340
 341 /* compare two sets and verify that their difference or intersection is empty */
 342 static UBool
 343 showADiffB(const USet *a, const USet *b,
 344            const char *a_name, const char *b_name,
 345            UBool expect, UBool diffIsError) {
 346     USet *aa;
 347     int32_t i, start, end, length;
 348     UErrorCode errorCode;
 349
 350     /*
 351      * expect:
 352      * TRUE  -> a-b should be empty, that is, b should contain all of a
 353      * FALSE -> a&b should be empty, that is, a should contain none of b (and vice versa)
 354      */
 355     if(expect ? uset_containsAll(b, a) : uset_containsNone(a, b)) {
 356         return TRUE;
 357     }
 358
 359     /* clone a to aa because a is const */
 360     aa=uset_open(1, 0);
 361     if(aa==NULL) {
 362         /* unusual problem - out of memory? */
 363         return FALSE;
 364     }
 365     uset_addAll(aa, a);
 366
 367     /* compute the set in question */
 368     if(expect) {
 369         /* a-b */
 370         uset_removeAll(aa, b);
 371     } else {
 372         /* a&b */
 373         uset_retainAll(aa, b);
 374     }
 375
 376     /* aa is not empty because of the initial tests above; show its contents */
 377     errorCode=U_ZERO_ERROR;
 378     i=0;
 379     for(;;) {
 380         length=uset_getItem(aa, i, &start, &end, NULL, 0, &errorCode);
 381         if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
 382             break; /* done */
 383         }
 384         if(U_FAILURE(errorCode)) {
 385             log_err("error comparing %s with %s at difference item %d: %s\n",
 386                 a_name, b_name, i, u_errorName(errorCode));
 387             break;
 388         }
 389         if(length!=0) {
 390             break; /* done with code points, got a string or -1 */
 391         }
 392
 393         if(diffIsError) {
 394             if(expect) {
 395                 log_err("error: %s contains U+%04x..U+%04x but %s does not\n", a_name, start, end, b_name);
 396             } else {
 397                 log_err("error: %s and %s both contain U+%04x..U+%04x but should not intersect\n", a_name, b_name, start, end);
 398             }
 399         } else {
 400             if(expect) {
 401                 log_verbose("info: %s contains U+%04x..U+%04x but %s does not\n", a_name, start, end, b_name);
 402             } else {
 403                 log_verbose("info: %s and %s both contain U+%04x..U+%04x but should not intersect\n", a_name, b_name, start, end);
 404             }
 405         }
 406
 407         ++i;
 408     }
 409
 410     uset_close(aa);
 411     return FALSE;
 412 }
 413
 414 static UBool
 415 showAMinusB(const USet *a, const USet *b,
 416             const char *a_name, const char *b_name,
 417             UBool diffIsError) {
 418     return showADiffB(a, b, a_name, b_name, TRUE, diffIsError);
 419 }
 420
 421 static UBool
 422 showAIntersectB(const USet *a, const USet *b,
 423                 const char *a_name, const char *b_name,
 424                 UBool diffIsError) {
 425     return showADiffB(a, b, a_name, b_name, FALSE, diffIsError);
 426 }
 427
 428 static UBool
 429 compareUSets(const USet *a, const USet *b,
 430              const char *a_name, const char *b_name,
 431              UBool diffIsError) {
 432     /*
 433      * Use an arithmetic & not a logical && so that both branches
 434      * are always taken and all differences are shown.
 435      */
 436     return
 437         showAMinusB(a, b, a_name, b_name, diffIsError) &
 438         showAMinusB(b, a, b_name, a_name, diffIsError);
 439 }
 440
 441 /* test isLetter(u_isapha()) and isDigit(u_isdigit()) */
 442 static void TestLetterNumber()
 443 {
 444     UChar i = 0x0000;
 445
 446     log_verbose("Testing for isalpha\n");
 447     for (i = 0x0041; i < 0x005B; i++) {
 448         if (!u_isalpha(i))
 449         {
 450             log_err("Failed isLetter test at  %.4X\n", i);
 451         }
 452     }
 453     for (i = 0x0660; i < 0x066A; i++) {
 454         if (u_isalpha(i))
 455         {
 456             log_err("Failed isLetter test with numbers at %.4X\n", i);
 457         }
 458     }
 459
 460     log_verbose("Testing for isdigit\n");
 461     for (i = 0x0660; i < 0x066A; i++) {
 462         if (!u_isdigit(i))
 463         {
 464             log_verbose("Failed isNumber test at %.4X\n", i);
 465         }
 466     }
 467
 468     log_verbose("Testing for isalnum\n");
 469     for (i = 0x0041; i < 0x005B; i++) {
 470         if (!u_isalnum(i))
 471         {
 472             log_err("Failed isAlNum test at  %.4X\n", i);
 473         }
 474     }
 475     for (i = 0x0660; i < 0x066A; i++) {
 476         if (!u_isalnum(i))
 477         {
 478             log_err("Failed isAlNum test at  %.4X\n", i);
 479         }
 480     }
 481
 482     {
 483         /*
 484          * The following checks work only starting from Unicode 4.0.
 485          * Check the version number here.
 486          */
 487         static UVersionInfo u401={ 4, 0, 1, 0 };
 488         UVersionInfo version;
 489         u_getUnicodeVersion(version);
 490         if(version[0]<4 || 0==memcmp(version, u401, 4)) {
 491             return;
 492         }
 493     }
 494
 495     {
 496         /*
 497          * Sanity check:
 498          * Verify that exactly the digit characters have decimal digit values.
 499          * This assumption is used in the implementation of u_digit()
 500          * (which checks nt=de)
 501          * compared with the parallel java.lang.Character.digit()
 502          * (which checks Nd).
 503          *
 504          * This was not true in Unicode 3.2 and earlier.
 505          * Unicode 4.0 fixed discrepancies.
 506          * Unicode 4.0.1 re-introduced problems in this area due to an
 507          * unintentionally incomplete last-minute change.
 508          */
 509         U_STRING_DECL(digitsPattern, "[:Nd:]", 6);
 510         U_STRING_DECL(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24);
 511
 512         USet *digits, *decimalValues;
 513         UErrorCode errorCode;
 514
 515         U_STRING_INIT(digitsPattern, "[:Nd:]", 6);
 516         U_STRING_INIT(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24);
 517         errorCode=U_ZERO_ERROR;
 518         digits=uset_openPattern(digitsPattern, 6, &errorCode);
 519         decimalValues=uset_openPattern(decimalValuesPattern, 24, &errorCode);
 520
 521         if(U_SUCCESS(errorCode)) {
 522             compareUSets(digits, decimalValues, "[:Nd:]", "[:Numeric_Type=Decimal:]", TRUE);
 523         }
 524
 525         uset_close(digits);
 526         uset_close(decimalValues);
 527     }
 528 }
 529
 530 static void testSampleCharProps(UBool propFn(UChar32), const char *propName,
 531                                 const UChar32 *sampleChars, int32_t sampleCharsLength,
 532                                 UBool expected) {
 533     int32_t i;
 534     for (i = 0; i < sampleCharsLength; ++i) {
 535         UBool result = propFn(sampleChars[i]);
 536         if (result != expected) {
 537             log_err("error: character property function %s(U+%04x)=%d is wrong\n",
 538                     propName, sampleChars[i], result);
 539         }
 540     }
 541 }
 542
 543 /* Tests for isDefined(u_isdefined)(, isBaseForm(u_isbase()), isSpaceChar(u_isspace()), isWhiteSpace(), u_CharDigitValue() */
 544 static void TestMisc()
 545 {
 546     static const UChar32 sampleSpaces[] = {0x0020, 0x00a0, 0x2000, 0x2001, 0x2005};
 547     static const UChar32 sampleNonSpaces[] = {0x61, 0x62, 0x63, 0x64, 0x74};
 548     static const UChar32 sampleUndefined[] = {0xfff1, 0xfff7, 0xfa6e};
 549     static const UChar32 sampleDefined[] = {0x523E, 0x4f88, 0xfffd};
 550     static const UChar32 sampleBase[] = {0x0061, 0x0031, 0x03d2};
 551     static const UChar32 sampleNonBase[] = {0x002B, 0x0020, 0x203B};
 552 /*    static const UChar sampleChars[] = {0x000a, 0x0045, 0x4e00, 0xDC00, 0xFFE8, 0xFFF0};*/
 553     static const UChar32 sampleDigits[]= {0x0030, 0x0662, 0x0F23, 0x0ED5};
 554     static const UChar32 sampleNonDigits[] = {0x0010, 0x0041, 0x0122, 0x68FE};
 555     static const UChar32 sampleWhiteSpaces[] = {0x2008, 0x2009, 0x200a, 0x001c, 0x000c};
 556     static const UChar32 sampleNonWhiteSpaces[] = {0x61, 0x62, 0x3c, 0x28, 0x3f, 0x85, 0x2007, 0xffef};
 557
 558     static const int32_t sampleDigitValues[] = {0, 2, 3, 5};
 559
 560     uint32_t mask;
 561
 562     int32_t i;
 563     char icuVersion[U_MAX_VERSION_STRING_LENGTH];
 564     UVersionInfo realVersion;
 565
 566     memset(icuVersion, 0, U_MAX_VERSION_STRING_LENGTH);
 567
 568     testSampleCharProps(u_isspace, "u_isspace", sampleSpaces, UPRV_LENGTHOF(sampleSpaces), TRUE);
 569     testSampleCharProps(u_isspace, "u_isspace", sampleNonSpaces, UPRV_LENGTHOF(sampleNonSpaces), FALSE);
 570
 571     testSampleCharProps(u_isJavaSpaceChar, "u_isJavaSpaceChar",
 572                         sampleSpaces, UPRV_LENGTHOF(sampleSpaces), TRUE);
 573     testSampleCharProps(u_isJavaSpaceChar, "u_isJavaSpaceChar",
 574                         sampleNonSpaces, UPRV_LENGTHOF(sampleNonSpaces), FALSE);
 575
 576     testSampleCharProps(u_isWhitespace, "u_isWhitespace",
 577                         sampleWhiteSpaces, UPRV_LENGTHOF(sampleWhiteSpaces), TRUE);
 578     testSampleCharProps(u_isWhitespace, "u_isWhitespace",
 579                         sampleNonWhiteSpaces, UPRV_LENGTHOF(sampleNonWhiteSpaces), FALSE);
 580
 581     testSampleCharProps(u_isdefined, "u_isdefined",
 582                         sampleDefined, UPRV_LENGTHOF(sampleDefined), TRUE);
 583     testSampleCharProps(u_isdefined, "u_isdefined",
 584                         sampleUndefined, UPRV_LENGTHOF(sampleUndefined), FALSE);
 585
 586     testSampleCharProps(u_isbase, "u_isbase", sampleBase, UPRV_LENGTHOF(sampleBase), TRUE);
 587     testSampleCharProps(u_isbase, "u_isbase", sampleNonBase, UPRV_LENGTHOF(sampleNonBase), FALSE);
 588
 589     testSampleCharProps(u_isdigit, "u_isdigit", sampleDigits, UPRV_LENGTHOF(sampleDigits), TRUE);
 590     testSampleCharProps(u_isdigit, "u_isdigit", sampleNonDigits, UPRV_LENGTHOF(sampleNonDigits), FALSE);
 591
 592     for (i = 0; i < UPRV_LENGTHOF(sampleDigits); i++) {
 593         if (u_charDigitValue(sampleDigits[i]) != sampleDigitValues[i]) {
 594             log_err("error: u_charDigitValue(U+04x)=%d != %d\n",
 595                     sampleDigits[i], u_charDigitValue(sampleDigits[i]), sampleDigitValues[i]);
 596         }
 597     }
 598
 599     /* Tests the ICU version #*/
 600     u_getVersion(realVersion);
 601     u_versionToString(realVersion, icuVersion);
 602     if (strncmp(icuVersion, U_ICU_VERSION, uprv_min((int32_t)strlen(icuVersion), (int32_t)strlen(U_ICU_VERSION))) != 0)
 603     {
 604         log_err("ICU version test failed. Header says=%s, got=%s \n", U_ICU_VERSION, icuVersion);
 605     }
 606 #if defined(ICU_VERSION)
 607     /* test only happens where we have configure.in with VERSION - sanity check. */
 608     if(strcmp(U_ICU_VERSION, ICU_VERSION))
 609     {
 610         log_err("ICU version mismatch: Header says %s, build environment says %s.\n",  U_ICU_VERSION, ICU_VERSION);
 611     }
 612 #endif
 613
 614     /* test U_GC_... */
 615     if(
 616         U_GET_GC_MASK(0x41)!=U_GC_LU_MASK ||
 617         U_GET_GC_MASK(0x662)!=U_GC_ND_MASK ||
 618         U_GET_GC_MASK(0xa0)!=U_GC_ZS_MASK ||
 619         U_GET_GC_MASK(0x28)!=U_GC_PS_MASK ||
 620         U_GET_GC_MASK(0x2044)!=U_GC_SM_MASK ||
 621         U_GET_GC_MASK(0xe0063)!=U_GC_CF_MASK
 622     ) {
 623         log_err("error: U_GET_GC_MASK does not work properly\n");
 624     }
 625
 626     mask=0;
 627     mask=(mask&~U_GC_CN_MASK)|U_GC_CN_MASK;
 628
 629     mask=(mask&~U_GC_LU_MASK)|U_GC_LU_MASK;
 630     mask=(mask&~U_GC_LL_MASK)|U_GC_LL_MASK;
 631     mask=(mask&~U_GC_LT_MASK)|U_GC_LT_MASK;
 632     mask=(mask&~U_GC_LM_MASK)|U_GC_LM_MASK;
 633     mask=(mask&~U_GC_LO_MASK)|U_GC_LO_MASK;
 634
 635     mask=(mask&~U_GC_MN_MASK)|U_GC_MN_MASK;
 636     mask=(mask&~U_GC_ME_MASK)|U_GC_ME_MASK;
 637     mask=(mask&~U_GC_MC_MASK)|U_GC_MC_MASK;
 638
 639     mask=(mask&~U_GC_ND_MASK)|U_GC_ND_MASK;
 640     mask=(mask&~U_GC_NL_MASK)|U_GC_NL_MASK;
 641     mask=(mask&~U_GC_NO_MASK)|U_GC_NO_MASK;
 642
 643     mask=(mask&~U_GC_ZS_MASK)|U_GC_ZS_MASK;
 644     mask=(mask&~U_GC_ZL_MASK)|U_GC_ZL_MASK;
 645     mask=(mask&~U_GC_ZP_MASK)|U_GC_ZP_MASK;
 646
 647     mask=(mask&~U_GC_CC_MASK)|U_GC_CC_MASK;
 648     mask=(mask&~U_GC_CF_MASK)|U_GC_CF_MASK;
 649     mask=(mask&~U_GC_CO_MASK)|U_GC_CO_MASK;
 650     mask=(mask&~U_GC_CS_MASK)|U_GC_CS_MASK;
 651
 652     mask=(mask&~U_GC_PD_MASK)|U_GC_PD_MASK;
 653     mask=(mask&~U_GC_PS_MASK)|U_GC_PS_MASK;
 654     mask=(mask&~U_GC_PE_MASK)|U_GC_PE_MASK;
 655     mask=(mask&~U_GC_PC_MASK)|U_GC_PC_MASK;
 656     mask=(mask&~U_GC_PO_MASK)|U_GC_PO_MASK;
 657
 658     mask=(mask&~U_GC_SM_MASK)|U_GC_SM_MASK;
 659     mask=(mask&~U_GC_SC_MASK)|U_GC_SC_MASK;
 660     mask=(mask&~U_GC_SK_MASK)|U_GC_SK_MASK;
 661     mask=(mask&~U_GC_SO_MASK)|U_GC_SO_MASK;
 662
 663     mask=(mask&~U_GC_PI_MASK)|U_GC_PI_MASK;
 664     mask=(mask&~U_GC_PF_MASK)|U_GC_PF_MASK;
 665
 666     if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffffffff)) {
 667         log_err("error: problems with U_GC_XX_MASK constants\n");
 668     }
 669
 670     mask=0;
 671     mask=(mask&~U_GC_C_MASK)|U_GC_C_MASK;
 672     mask=(mask&~U_GC_L_MASK)|U_GC_L_MASK;
 673     mask=(mask&~U_GC_M_MASK)|U_GC_M_MASK;
 674     mask=(mask&~U_GC_N_MASK)|U_GC_N_MASK;
 675     mask=(mask&~U_GC_Z_MASK)|U_GC_Z_MASK;
 676     mask=(mask&~U_GC_P_MASK)|U_GC_P_MASK;
 677     mask=(mask&~U_GC_S_MASK)|U_GC_S_MASK;
 678
 679     if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffffffff)) {
 680         log_err("error: problems with U_GC_Y_MASK constants\n");
 681     }
 682     {
 683         static const UChar32 digit[10]={ 0x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037,0x0038,0x0039 };
 684         for(i=0; i<10; i++){
 685             if(digit[i]!=u_forDigit(i,10)){
 686                 log_err("u_forDigit failed for %i. Expected: 0x%4X Got: 0x%4X\n",i,digit[i],u_forDigit(i,10));
 687             }
 688         }
 689     }
 690
 691     /* test u_digit() */
 692     {
 693         static const struct {
 694             UChar32 c;
 695             int8_t radix, value;
 696         } data[]={
 697             /* base 16 */
 698             { 0x0031, 16, 1 },
 699             { 0x0038, 16, 8 },
 700             { 0x0043, 16, 12 },
 701             { 0x0066, 16, 15 },
 702             { 0x00e4, 16, -1 },
 703             { 0x0662, 16, 2 },
 704             { 0x06f5, 16, 5 },
 705             { 0xff13, 16, 3 },
 706             { 0xff41, 16, 10 },
 707
 708             /* base 8 */
 709             { 0x0031, 8, 1 },
 710             { 0x0038, 8, -1 },
 711             { 0x0043, 8, -1 },
 712             { 0x0066, 8, -1 },
 713             { 0x00e4, 8, -1 },
 714             { 0x0662, 8, 2 },
 715             { 0x06f5, 8, 5 },
 716             { 0xff13, 8, 3 },
 717             { 0xff41, 8, -1 },
 718
 719             /* base 36 */
 720             { 0x5a, 36, 35 },
 721             { 0x7a, 36, 35 },
 722             { 0xff3a, 36, 35 },
 723             { 0xff5a, 36, 35 },
 724
 725             /* wrong radix values */
 726             { 0x0031, 1, -1 },
 727             { 0xff3a, 37, -1 }
 728         };
 729
 730         for(i=0; i<UPRV_LENGTHOF(data); ++i) {
 731             if(u_digit(data[i].c, data[i].radix)!=data[i].value) {
 732                 log_err("u_digit(U+%04x, %d)=%d expected %d\n",
 733                         data[i].c,
 734                         data[i].radix,
 735                         u_digit(data[i].c, data[i].radix),
 736                         data[i].value);
 737             }
 738         }
 739     }
 740 }
 741
 742 /* test C/POSIX-style functions --------------------------------------------- */
 743
 744 /* bit flags */
 745 #define ISAL     1
 746 #define ISLO     2
 747 #define ISUP     4
 748
 749 #define ISDI     8
 750 #define ISXD  0x10
 751
 752 #define ISAN  0x20
 753
 754 #define ISPU  0x40
 755 #define ISGR  0x80
 756 #define ISPR 0x100
 757
 758 #define ISSP 0x200
 759 #define ISBL 0x400
 760 #define ISCN 0x800
 761
 762 /* C/POSIX-style functions, in the same order as the bit flags */
 763 typedef UBool U_EXPORT2 IsPOSIXClass(UChar32 c);
 764
 765 static const struct {
 766     IsPOSIXClass *fn;
 767     const char *name;
 768 } posixClasses[]={
 769     { u_isalpha, "isalpha" },
 770     { u_islower, "islower" },
 771     { u_isupper, "isupper" },
 772     { u_isdigit, "isdigit" },
 773     { u_isxdigit, "isxdigit" },
 774     { u_isalnum, "isalnum" },
 775     { u_ispunct, "ispunct" },
 776     { u_isgraph, "isgraph" },
 777     { u_isprint, "isprint" },
 778     { u_isspace, "isspace" },
 779     { u_isblank, "isblank" },
 780     { u_iscntrl, "iscntrl" }
 781 };
 782
 783 static const struct {
 784     UChar32 c;
 785     uint32_t posixResults;
 786 } posixData[]={
 787     { 0x0008,                                                        ISCN },    /* backspace */
 788     { 0x0009,                                              ISSP|ISBL|ISCN },    /* TAB */
 789     { 0x000a,                                              ISSP|     ISCN },    /* LF */
 790     { 0x000c,                                              ISSP|     ISCN },    /* FF */
 791     { 0x000d,                                              ISSP|     ISCN },    /* CR */
 792     { 0x0020,                                         ISPR|ISSP|ISBL      },    /* space */
 793     { 0x0021,                               ISPU|ISGR|ISPR                },    /* ! */
 794     { 0x0033,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* 3 */
 795     { 0x0040,                               ISPU|ISGR|ISPR                },    /* @ */
 796     { 0x0041, ISAL|     ISUP|     ISXD|ISAN|     ISGR|ISPR                },    /* A */
 797     { 0x007a, ISAL|ISLO|               ISAN|     ISGR|ISPR                },    /* z */
 798     { 0x007b,                               ISPU|ISGR|ISPR                },    /* { */
 799     { 0x0085,                                              ISSP|     ISCN },    /* NEL */
 800     { 0x00a0,                                         ISPR|ISSP|ISBL      },    /* NBSP */
 801     { 0x00a4,                                    ISGR|ISPR                },    /* currency sign */
 802     { 0x00e4, ISAL|ISLO|               ISAN|     ISGR|ISPR                },    /* a-umlaut */
 803     { 0x0300,                                    ISGR|ISPR                },    /* combining grave */
 804     { 0x0600,                                                        ISCN },    /* arabic number sign */
 805     { 0x0627, ISAL|                    ISAN|     ISGR|ISPR                },    /* alef */
 806     { 0x0663,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* arabic 3 */
 807     { 0x2002,                                         ISPR|ISSP|ISBL      },    /* en space */
 808     { 0x2007,                                         ISPR|ISSP|ISBL      },    /* figure space */
 809     { 0x2009,                                         ISPR|ISSP|ISBL      },    /* thin space */
 810     { 0x200b,                                                        ISCN },    /* ZWSP */
 811   /*{ 0x200b,                                         ISPR|ISSP           },*/    /* ZWSP */ /* ZWSP became a control char in 4.0.1*/
 812     { 0x200e,                                                        ISCN },    /* LRM */
 813     { 0x2028,                                         ISPR|ISSP|     ISCN },    /* LS */
 814     { 0x2029,                                         ISPR|ISSP|     ISCN },    /* PS */
 815     { 0x20ac,                                    ISGR|ISPR                },    /* Euro */
 816     { 0xff15,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth 5 */
 817     { 0xff25, ISAL|     ISUP|     ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth E */
 818     { 0xff35, ISAL|     ISUP|          ISAN|     ISGR|ISPR                },    /* fullwidth U */
 819     { 0xff45, ISAL|ISLO|          ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth e */
 820     { 0xff55, ISAL|ISLO|               ISAN|     ISGR|ISPR                }     /* fullwidth u */
 821 };
 822
 823 static void
 824 TestPOSIX() {
 825     uint32_t mask;
 826     int32_t cl, i;
 827     UBool expect;
 828
 829     mask=1;
 830     for(cl=0; cl<12; ++cl) {
 831         for(i=0; i<UPRV_LENGTHOF(posixData); ++i) {
 832             expect=(UBool)((posixData[i].posixResults&mask)!=0);
 833             if(posixClasses[cl].fn(posixData[i].c)!=expect) {
 834                 log_err("u_%s(U+%04x)=%s is wrong\n",
 835                     posixClasses[cl].name, posixData[i].c, expect ? "FALSE" : "TRUE");
 836             }
 837         }
 838         mask<<=1;
 839     }
 840 }
 841
 842 /* Tests for isControl(u_iscntrl()) and isPrintable(u_isprint()) */
 843 static void TestControlPrint()
 844 {
 845     const UChar32 sampleControl[] = {0x1b, 0x97, 0x82, 0x2028, 0x2029, 0x200c, 0x202b};
 846     const UChar32 sampleNonControl[] = {0x61, 0x0031, 0x00e2};
 847     const UChar32 samplePrintable[] = {0x0042, 0x005f, 0x2014};
 848     const UChar32 sampleNonPrintable[] = {0x200c, 0x009f, 0x001b};
 849     UChar32 c;
 850
 851     testSampleCharProps(u_iscntrl, "u_iscntrl", sampleControl, UPRV_LENGTHOF(sampleControl), TRUE);
 852     testSampleCharProps(u_iscntrl, "u_iscntrl", sampleNonControl, UPRV_LENGTHOF(sampleNonControl), FALSE);
 853
 854     testSampleCharProps(u_isprint, "u_isprint",
 855                         samplePrintable, UPRV_LENGTHOF(samplePrintable), TRUE);
 856     testSampleCharProps(u_isprint, "u_isprint",
 857                         sampleNonPrintable, UPRV_LENGTHOF(sampleNonPrintable), FALSE);
 858
 859     /* test all ISO 8 controls */
 860     for(c=0; c<=0x9f; ++c) {
 861         if(c==0x20) {
 862             /* skip ASCII graphic characters and continue with DEL */
 863             c=0x7f;
 864         }
 865         if(!u_iscntrl(c)) {
 866             log_err("error: u_iscntrl(ISO 8 control U+%04x)=FALSE\n", c);
 867         }
 868         if(!u_isISOControl(c)) {
 869             log_err("error: u_isISOControl(ISO 8 control U+%04x)=FALSE\n", c);
 870         }
 871         if(u_isprint(c)) {
 872             log_err("error: u_isprint(ISO 8 control U+%04x)=TRUE\n", c);
 873         }
 874     }
 875
 876     /* test all Latin-1 graphic characters */
 877     for(c=0x20; c<=0xff; ++c) {
 878         if(c==0x7f) {
 879             c=0xa0;
 880         } else if(c==0xad) {
 881             /* Unicode 4 changes 00AD Soft Hyphen to Cf (and it is in fact not printable) */
 882             ++c;
 883         }
 884         if(!u_isprint(c)) {
 885             log_err("error: u_isprint(Latin-1 graphic character U+%04x)=FALSE\n", c);
 886         }
 887     }
 888 }
 889
 890 /* u_isJavaIDStart, u_isJavaIDPart, u_isIDStart(), u_isIDPart(), u_isIDIgnorable()*/
 891 static void TestIdentifier()
 892 {
 893     const UChar32 sampleJavaIDStart[] = {0x0071, 0x00e4, 0x005f};
 894     const UChar32 sampleNonJavaIDStart[] = {0x0020, 0x2030, 0x0082};
 895     const UChar32 sampleJavaIDPart[] = {0x005f, 0x0032, 0x0045};
 896     const UChar32 sampleNonJavaIDPart[] = {0x2030, 0x2020, 0x0020};
 897     const UChar32 sampleUnicodeIDStart[] = {0x0250, 0x00e2, 0x0061};
 898     const UChar32 sampleNonUnicodeIDStart[] = {0x2000, 0x000a, 0x2019};
 899     const UChar32 sampleUnicodeIDPart[] = {0x005f, 0x0032, 0x0045};
 900     const UChar32 sampleNonUnicodeIDPart[] = {0x2030, 0x00a3, 0x0020};
 901     const UChar32 sampleIDIgnore[] = {0x0006, 0x0010, 0x206b, 0x85};
 902     const UChar32 sampleNonIDIgnore[] = {0x0075, 0x00a3, 0x0061};
 903
 904     testSampleCharProps(u_isJavaIDStart, "u_isJavaIDStart",
 905                         sampleJavaIDStart, UPRV_LENGTHOF(sampleJavaIDStart), TRUE);
 906     testSampleCharProps(u_isJavaIDStart, "u_isJavaIDStart",
 907                         sampleNonJavaIDStart, UPRV_LENGTHOF(sampleNonJavaIDStart), FALSE);
 908
 909     testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
 910                         sampleJavaIDPart, UPRV_LENGTHOF(sampleJavaIDPart), TRUE);
 911     testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
 912                         sampleNonJavaIDPart, UPRV_LENGTHOF(sampleNonJavaIDPart), FALSE);
 913
 914     /* IDPart should imply IDStart */
 915     testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
 916                         sampleJavaIDStart, UPRV_LENGTHOF(sampleJavaIDStart), TRUE);
 917
 918     testSampleCharProps(u_isIDStart, "u_isIDStart",
 919                         sampleUnicodeIDStart, UPRV_LENGTHOF(sampleUnicodeIDStart), TRUE);
 920     testSampleCharProps(u_isIDStart, "u_isIDStart",
 921                         sampleNonUnicodeIDStart, UPRV_LENGTHOF(sampleNonUnicodeIDStart), FALSE);
 922
 923     testSampleCharProps(u_isIDPart, "u_isIDPart",
 924                         sampleUnicodeIDPart, UPRV_LENGTHOF(sampleUnicodeIDPart), TRUE);
 925     testSampleCharProps(u_isIDPart, "u_isIDPart",
 926                         sampleNonUnicodeIDPart, UPRV_LENGTHOF(sampleNonUnicodeIDPart), FALSE);
 927
 928     /* IDPart should imply IDStart */
 929     testSampleCharProps(u_isIDPart, "u_isIDPart",
 930                         sampleUnicodeIDStart, UPRV_LENGTHOF(sampleUnicodeIDStart), TRUE);
 931
 932     testSampleCharProps(u_isIDIgnorable, "u_isIDIgnorable",
 933                         sampleIDIgnore, UPRV_LENGTHOF(sampleIDIgnore), TRUE);
 934     testSampleCharProps(u_isIDIgnorable, "u_isIDIgnorable",
 935                         sampleNonIDIgnore, UPRV_LENGTHOF(sampleNonIDIgnore), FALSE);
 936 }
 937
 938 /* for each line of UnicodeData.txt, check some of the properties */
 939 typedef struct UnicodeDataContext {
 940 #if UCONFIG_NO_NORMALIZATION
 941     const void *dummy;
 942 #else
 943     const UNormalizer2 *nfc;
 944     const UNormalizer2 *nfkc;
 945 #endif
 946 } UnicodeDataContext;
 947
 948 /*
 949  * ### TODO
 950  * This test fails incorrectly if the First or Last code point of a repetitive area
 951  * is overridden, which is allowed and is encouraged for the PUAs.
 952  * Currently, this means that both area First/Last and override lines are
 953  * tested against the properties from the API,
 954  * and the area boundary will not match and cause an error.
 955  *
 956  * This function should detect area boundaries and skip them for the test of individual
 957  * code points' properties.
 958  * Then it should check that the areas contain all the same properties except where overridden.
 959  * For this, it would have had to set a flag for which code points were listed explicitly.
 960  */
 961 static void U_CALLCONV
 962 unicodeDataLineFn(void *context,
 963                   char *fields[][2], int32_t fieldCount,
 964                   UErrorCode *pErrorCode)
 965 {
 966     char buffer[100];
 967     const char *d;
 968     char *end;
 969     uint32_t value;
 970     UChar32 c;
 971     int32_t i;
 972     int8_t type;
 973     int32_t dt;
 974     UChar dm[32], s[32];
 975     int32_t dmLength, length;
 976
 977 #if !UCONFIG_NO_NORMALIZATION
 978     const UNormalizer2 *nfc, *nfkc;
 979 #endif
 980
 981     /* get the character code, field 0 */
 982     c=strtoul(fields[0][0], &end, 16);
 983     if(end<=fields[0][0] || end!=fields[0][1]) {
 984         log_err("error: syntax error in field 0 at %s\n", fields[0][0]);
 985         return;
 986     }
 987     if((uint32_t)c>=UCHAR_MAX_VALUE + 1) {
 988         log_err("error in UnicodeData.txt: code point %lu out of range\n", c);
 989         return;
 990     }
 991
 992     /* get general category, field 2 */
 993     *fields[2][1]=0;
 994     type = (int8_t)tagValues[MakeProp(fields[2][0])];
 995     if(u_charType(c)!=type) {
 996         log_err("error: u_charType(U+%04lx)==%u instead of %u\n", c, u_charType(c), type);
 997     }
 998     if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
 999         log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
1000     }
1001
1002     /* get canonical combining class, field 3 */
1003     value=strtoul(fields[3][0], &end, 10);
1004     if(end<=fields[3][0] || end!=fields[3][1]) {
1005         log_err("error: syntax error in field 3 at code 0x%lx\n", c);
1006         return;
1007     }
1008     if(value>255) {
1009         log_err("error in UnicodeData.txt: combining class %lu out of range\n", value);
1010         return;
1011     }
1012 #if !UCONFIG_NO_NORMALIZATION
1013     if(value!=u_getCombiningClass(c) || value!=(uint32_t)u_getIntPropertyValue(c, UCHAR_CANONICAL_COMBINING_CLASS)) {
1014         log_err("error: u_getCombiningClass(U+%04lx)==%hu instead of %lu\n", c, u_getCombiningClass(c), value);
1015     }
1016     nfkc=((UnicodeDataContext *)context)->nfkc;
1017     if(value!=unorm2_getCombiningClass(nfkc, c)) {
1018         log_err("error: unorm2_getCombiningClass(nfkc, U+%04lx)==%hu instead of %lu\n", c, unorm2_getCombiningClass(nfkc, c), value);
1019     }
1020 #endif
1021
1022     /* get BiDi category, field 4 */
1023     *fields[4][1]=0;
1024     i=MakeDir(fields[4][0]);
1025     if(i!=u_charDirection(c) || i!=u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)) {
1026         log_err("error: u_charDirection(U+%04lx)==%u instead of %u (%s)\n", c, u_charDirection(c), MakeDir(fields[4][0]), fields[4][0]);
1027     }
1028
1029     /* get Decomposition_Type & Decomposition_Mapping, field 5 */
1030     d=NULL;
1031     if(fields[5][0]==fields[5][1]) {
1032         /* no decomposition, except UnicodeData.txt omits Hangul syllable decompositions */
1033         if(c==0xac00 || c==0xd7a3) {
1034             dt=U_DT_CANONICAL;
1035         } else {
1036             dt=U_DT_NONE;
1037         }
1038     } else {
1039         d=fields[5][0];
1040         *fields[5][1]=0;
1041         dt=UCHAR_INVALID_CODE;
1042         if(*d=='<') {
1043             end=strchr(++d, '>');
1044             if(end!=NULL) {
1045                 *end=0;
1046                 dt=u_getPropertyValueEnum(UCHAR_DECOMPOSITION_TYPE, d);
1047                 d=u_skipWhitespace(end+1);
1048             }
1049         } else {
1050             dt=U_DT_CANONICAL;
1051         }
1052     }
1053     if(dt>U_DT_NONE) {
1054         if(c==0xac00) {
1055             dm[0]=0x1100;
1056             dm[1]=0x1161;
1057             dm[2]=0;
1058             dmLength=2;
1059         } else if(c==0xd7a3) {
1060             dm[0]=0xd788;
1061             dm[1]=0x11c2;
1062             dm[2]=0;
1063             dmLength=2;
1064         } else {
1065             dmLength=u_parseString(d, dm, 32, NULL, pErrorCode);
1066         }
1067     } else {
1068         dmLength=-1;
1069     }
1070     if(dt<0 || U_FAILURE(*pErrorCode)) {
1071         log_err("error in UnicodeData.txt: syntax error in U+%04lX decomposition field\n", (long)c);
1072         return;
1073     }
1074 #if !UCONFIG_NO_NORMALIZATION
1075     i=u_getIntPropertyValue(c, UCHAR_DECOMPOSITION_TYPE);
1076     if(i!=dt) {
1077         log_err("error: u_getIntPropertyValue(U+%04lx, UCHAR_DECOMPOSITION_TYPE)==%d instead of %d\n", c, i, dt);
1078     }
1079     /* Expect Decomposition_Mapping=nfkc.getRawDecomposition(c). */
1080     length=unorm2_getRawDecomposition(nfkc, c, s, 32, pErrorCode);
1081     if(U_FAILURE(*pErrorCode) || length!=dmLength || (length>0 && 0!=u_strcmp(s, dm))) {
1082         log_err("error: unorm2_getRawDecomposition(nfkc, U+%04lx)==%d instead of %d "
1083                 "or the Decomposition_Mapping is different (%s)\n",
1084                 c, length, dmLength, u_errorName(*pErrorCode));
1085         return;
1086     }
1087     /* For canonical decompositions only, expect Decomposition_Mapping=nfc.getRawDecomposition(c). */
1088     if(dt!=U_DT_CANONICAL) {
1089         dmLength=-1;
1090     }
1091     nfc=((UnicodeDataContext *)context)->nfc;
1092     length=unorm2_getRawDecomposition(nfc, c, s, 32, pErrorCode);
1093     if(U_FAILURE(*pErrorCode) || length!=dmLength || (length>0 && 0!=u_strcmp(s, dm))) {
1094         log_err("error: unorm2_getRawDecomposition(nfc, U+%04lx)==%d instead of %d "
1095                 "or the Decomposition_Mapping is different (%s)\n",
1096                 c, length, dmLength, u_errorName(*pErrorCode));
1097         return;
1098     }
1099     /* recompose */
1100     if(dt==U_DT_CANONICAL && !u_hasBinaryProperty(c, UCHAR_FULL_COMPOSITION_EXCLUSION)) {
1101         UChar32 a, b, composite;
1102         i=0;
1103         U16_NEXT(dm, i, dmLength, a);
1104         U16_NEXT(dm, i, dmLength, b);
1105         /* i==dmLength */
1106         composite=unorm2_composePair(nfc, a, b);
1107         if(composite!=c) {
1108             log_err("error: nfc U+%04lX decomposes to U+%04lX+U+%04lX but does not compose back (instead U+%04lX)\n",
1109                     (long)c, (long)a, (long)b, (long)composite);
1110         }
1111         /*
1112          * Note: NFKC has fewer round-trip mappings than NFC,
1113          * so we can't just test unorm2_composePair(nfkc, a, b) here without further data.
1114          */
1115     }
1116 #endif
1117
1118     /* get ISO Comment, field 11 */
1119     *fields[11][1]=0;
1120     i=u_getISOComment(c, buffer, sizeof(buffer), pErrorCode);
1121     if(U_FAILURE(*pErrorCode) || 0!=strcmp(fields[11][0], buffer)) {
1122         log_err_status(*pErrorCode, "error: u_getISOComment(U+%04lx) wrong (%s): \"%s\" should be \"%s\"\n",
1123             c, u_errorName(*pErrorCode),
1124             U_FAILURE(*pErrorCode) ? buffer : "[error]",
1125             fields[11][0]);
1126     }
1127
1128     /* get uppercase mapping, field 12 */
1129     if(fields[12][0]!=fields[12][1]) {
1130         value=strtoul(fields[12][0], &end, 16);
1131         if(end!=fields[12][1]) {
1132             log_err("error: syntax error in field 12 at code 0x%lx\n", c);
1133             return;
1134         }
1135         if((UChar32)value!=u_toupper(c)) {
1136             log_err("error: u_toupper(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_toupper(c), value);
1137         }
1138     } else {
1139         /* no case mapping: the API must map the code point to itself */
1140         if(c!=u_toupper(c)) {
1141             log_err("error: U+%04lx does not have an uppercase mapping but u_toupper()==U+%04lx\n", c, u_toupper(c));
1142         }
1143     }
1144
1145     /* get lowercase mapping, field 13 */
1146     if(fields[13][0]!=fields[13][1]) {
1147         value=strtoul(fields[13][0], &end, 16);
1148         if(end!=fields[13][1]) {
1149             log_err("error: syntax error in field 13 at code 0x%lx\n", c);
1150             return;
1151         }
1152         if((UChar32)value!=u_tolower(c)) {
1153             log_err("error: u_tolower(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_tolower(c), value);
1154         }
1155     } else {
1156         /* no case mapping: the API must map the code point to itself */
1157         if(c!=u_tolower(c)) {
1158             log_err("error: U+%04lx does not have a lowercase mapping but u_tolower()==U+%04lx\n", c, u_tolower(c));
1159         }
1160     }
1161
1162     /* get titlecase mapping, field 14 */
1163     if(fields[14][0]!=fields[14][1]) {
1164         value=strtoul(fields[14][0], &end, 16);
1165         if(end!=fields[14][1]) {
1166             log_err("error: syntax error in field 14 at code 0x%lx\n", c);
1167             return;
1168         }
1169         if((UChar32)value!=u_totitle(c)) {
1170             log_err("error: u_totitle(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_totitle(c), value);
1171         }
1172     } else {
1173         /* no case mapping: the API must map the code point to itself */
1174         if(c!=u_totitle(c)) {
1175             log_err("error: U+%04lx does not have a titlecase mapping but u_totitle()==U+%04lx\n", c, u_totitle(c));
1176         }
1177     }
1178 }
1179
1180 static UBool U_CALLCONV
1181 enumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) {
1182     static const UChar32 test[][2]={
1183         {0x41, U_UPPERCASE_LETTER},
1184         {0x308, U_NON_SPACING_MARK},
1185         {0xfffe, U_GENERAL_OTHER_TYPES},
1186         {0xe0041, U_FORMAT_CHAR},
1187         {0xeffff, U_UNASSIGNED}
1188     };
1189
1190     int32_t i, count;
1191
1192     if(0!=strcmp((const char *)context, "a1")) {
1193         log_err("error: u_enumCharTypes() passes on an incorrect context pointer\n");
1194         return FALSE;
1195     }
1196
1197     count=UPRV_LENGTHOF(test);
1198     for(i=0; i<count; ++i) {
1199         if(start<=test[i][0] && test[i][0]<limit) {
1200             if(type!=(UCharCategory)test[i][1]) {
1201                 log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld instead of U+%04lx with %ld\n",
1202                         start, limit, (long)type, test[i][0], test[i][1]);
1203             }
1204             /* stop at the range that includes the last test code point (increases code coverage for enumeration) */
1205             return i==(count-1) ? FALSE : TRUE;
1206         }
1207     }
1208
1209     if(start>test[count-1][0]) {
1210         log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld after it should have stopped\n",
1211                 start, limit, (long)type);
1212         return FALSE;
1213     }
1214
1215     return TRUE;
1216 }
1217
1218 static UBool U_CALLCONV
1219 enumDefaultsRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) {
1220     /* default Bidi classes for unassigned code points, from the DerivedBidiClass.txt header */
1221     static const int32_t defaultBidi[][2]={ /* { limit, class } */
1222         { 0x0590, U_LEFT_TO_RIGHT },
1223         { 0x0600, U_RIGHT_TO_LEFT },
1224         { 0x07C0, U_RIGHT_TO_LEFT_ARABIC },
1225         { 0x0860, U_RIGHT_TO_LEFT },
1226         { 0x0870, U_RIGHT_TO_LEFT_ARABIC },  // Unicode 10 changes U+0860..U+086F from R to AL.
1227         { 0x08A0, U_RIGHT_TO_LEFT },
1228         { 0x0900, U_RIGHT_TO_LEFT_ARABIC },  /* Unicode 6.1 changes U+08A0..U+08FF from R to AL */
1229         { 0x20A0, U_LEFT_TO_RIGHT },
1230         { 0x20D0, U_EUROPEAN_NUMBER_TERMINATOR },  /* Unicode 6.3 changes the currency symbols block U+20A0..U+20CF to default to ET not L */
1231         { 0xFB1D, U_LEFT_TO_RIGHT },
1232         { 0xFB50, U_RIGHT_TO_LEFT },
1233         { 0xFE00, U_RIGHT_TO_LEFT_ARABIC },
1234         { 0xFE70, U_LEFT_TO_RIGHT },
1235         { 0xFF00, U_RIGHT_TO_LEFT_ARABIC },
1236
1237         { 0x10800, U_LEFT_TO_RIGHT },
1238         { 0x10D00, U_RIGHT_TO_LEFT },  // Unicode 11 changes U+10D00..U+10D3F from R to AL.
1239         { 0x10D40, U_RIGHT_TO_LEFT_ARABIC },
1240         { 0x10F30, U_RIGHT_TO_LEFT },  // Unicode 11 changes U+10F30..U+10F6F from R to AL.
1241         { 0x10F70, U_RIGHT_TO_LEFT_ARABIC },
1242         { 0x11000, U_RIGHT_TO_LEFT },
1243
1244         { 0x1E800, U_LEFT_TO_RIGHT },  /* new default-R range in Unicode 5.2: U+1E800 - U+1EFFF */
1245         { 0x1EC70, U_RIGHT_TO_LEFT },  // Unicode 11 changes U+1EC70..U+1ECBF from R to AL.
1246         { 0x1ECC0, U_RIGHT_TO_LEFT_ARABIC },
1247         { 0x1ED00, U_RIGHT_TO_LEFT },  // Unicode 12 changes U+1ED00..U+1ED4F from R to AL.
1248         { 0x1ED50, U_RIGHT_TO_LEFT_ARABIC },
1249         { 0x1EE00, U_RIGHT_TO_LEFT },
1250         { 0x1EF00, U_RIGHT_TO_LEFT_ARABIC },  /* Unicode 6.1 changes U+1EE00..U+1EEFF from R to AL */
1251         { 0x1F000, U_RIGHT_TO_LEFT },
1252         { 0x110000, U_LEFT_TO_RIGHT }
1253     };
1254
1255     UChar32 c;
1256     int32_t i;
1257     UCharDirection shouldBeDir;
1258
1259     /*
1260      * LineBreak.txt specifies:
1261      *   #  - Assigned characters that are not listed explicitly are given the value
1262      *   #    "AL".
1263      *   #  - Unassigned characters are given the value "XX".
1264      *
1265      * PUA characters are listed explicitly with "XX".
1266      * Verify that no assigned character has "XX".
1267      */
1268     if(type!=U_UNASSIGNED && type!=U_PRIVATE_USE_CHAR) {
1269         c=start;
1270         while(c<limit) {
1271             if(0==u_getIntPropertyValue(c, UCHAR_LINE_BREAK)) {
1272                 log_err("error UCHAR_LINE_BREAK(assigned U+%04lx)=XX\n", c);
1273             }
1274             ++c;
1275         }
1276     }
1277
1278     /*
1279      * Verify default Bidi classes.
1280      * See DerivedBidiClass.txt, especially for unassigned code points.
1281      */
1282     if(type==U_UNASSIGNED || type==U_PRIVATE_USE_CHAR) {
1283         /* enumerate the intersections of defaultBidi ranges with [start..limit[ */
1284         c=start;
1285         for(i=0; i<UPRV_LENGTHOF(defaultBidi) && c<limit; ++i) {
1286             if((int32_t)c<defaultBidi[i][0]) {
1287                 while(c<limit && (int32_t)c<defaultBidi[i][0]) {
1288                     if(U_IS_UNICODE_NONCHAR(c) || u_hasBinaryProperty(c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT)) {
1289                         shouldBeDir=U_BOUNDARY_NEUTRAL;
1290                     } else {
1291                         shouldBeDir=(UCharDirection)defaultBidi[i][1];
1292                     }
1293
1294                     if( u_charDirection(c)!=shouldBeDir ||
1295                         u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)!=shouldBeDir
1296                     ) {
1297                         log_err("error: u_charDirection(unassigned/PUA U+%04lx)=%s should be %s\n",
1298                             c, dirStrings[u_charDirection(c)], dirStrings[shouldBeDir]);
1299                     }
1300                     ++c;
1301                 }
1302             }
1303         }
1304     }
1305
1306     return TRUE;
1307 }
1308
1309 /* tests for several properties */
1310 static void TestUnicodeData()
1311 {
1312     UVersionInfo expectVersionArray;
1313     UVersionInfo versionArray;
1314     char *fields[15][2];
1315     UErrorCode errorCode;
1316     UChar32 c;
1317     int8_t type;
1318
1319     UnicodeDataContext context;
1320
1321     u_versionFromString(expectVersionArray, U_UNICODE_VERSION);
1322     u_getUnicodeVersion(versionArray);
1323     if(memcmp(versionArray, expectVersionArray, U_MAX_VERSION_LENGTH) != 0)
1324     {
1325         log_err("Testing u_getUnicodeVersion() - expected " U_UNICODE_VERSION " got %d.%d.%d.%d\n",
1326         versionArray[0], versionArray[1], versionArray[2], versionArray[3]);
1327     }
1328
1329 #if defined(ICU_UNICODE_VERSION)
1330     /* test only happens where we have configure.in with UNICODE_VERSION - sanity check. */
1331     if(strcmp(U_UNICODE_VERSION, ICU_UNICODE_VERSION))
1332     {
1333          log_err("Testing configure.in's ICU_UNICODE_VERSION - expected " U_UNICODE_VERSION " got " ICU_UNICODE_VERSION "\n");
1334     }
1335 #endif
1336
1337     if (ublock_getCode((UChar)0x0041) != UBLOCK_BASIC_LATIN || u_getIntPropertyValue(0x41, UCHAR_BLOCK)!=(int32_t)UBLOCK_BASIC_LATIN) {
1338         log_err("ublock_getCode(U+0041) property failed! Expected : %i Got: %i \n", UBLOCK_BASIC_LATIN,ublock_getCode((UChar)0x0041));
1339     }
1340
1341     errorCode=U_ZERO_ERROR;
1342 #if !UCONFIG_NO_NORMALIZATION
1343     context.nfc=unorm2_getNFCInstance(&errorCode);
1344     context.nfkc=unorm2_getNFKCInstance(&errorCode);
1345     if(U_FAILURE(errorCode)) {
1346         log_data_err("error: unable to open an NFC or NFKC UNormalizer2 - %s\n", u_errorName(errorCode));
1347         return;
1348     }
1349 #endif
1350     parseUCDFile("UnicodeData.txt", fields, 15, unicodeDataLineFn, &context, &errorCode);
1351     if(U_FAILURE(errorCode)) {
1352         return; /* if we couldn't parse UnicodeData.txt, we should return */
1353     }
1354
1355     /* sanity check on repeated properties */
1356     for(c=0xfffe; c<=0x10ffff;) {
1357         type=u_charType(c);
1358         if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
1359             log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
1360         }
1361         if(type!=U_UNASSIGNED) {
1362             log_err("error: u_charType(U+%04lx)!=U_UNASSIGNED (returns %d)\n", c, u_charType(c));
1363         }
1364         if((c&0xffff)==0xfffe) {
1365             ++c;
1366         } else {
1367             c+=0xffff;
1368         }
1369     }
1370
1371     /* test that PUA is not "unassigned" */
1372     for(c=0xe000; c<=0x10fffd;) {
1373         type=u_charType(c);
1374         if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
1375             log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
1376         }
1377         if(type==U_UNASSIGNED) {
1378             log_err("error: u_charType(U+%04lx)==U_UNASSIGNED\n", c);
1379         } else if(type!=U_PRIVATE_USE_CHAR) {
1380             log_verbose("PUA override: u_charType(U+%04lx)=%d\n", c, type);
1381         }
1382         if(c==0xf8ff) {
1383             c=0xf0000;
1384         } else if(c==0xffffd) {
1385             c=0x100000;
1386         } else {
1387             ++c;
1388         }
1389     }
1390
1391     /* test u_enumCharTypes() */
1392     u_enumCharTypes(enumTypeRange, "a1");
1393
1394     /* check default properties */
1395     u_enumCharTypes(enumDefaultsRange, NULL);
1396 }
1397
1398 static void TestCodeUnit(){
1399     const UChar codeunit[]={0x0000,0xe065,0x20ac,0xd7ff,0xd800,0xd841,0xd905,0xdbff,0xdc00,0xdc02,0xddee,0xdfff,0};
1400
1401     int32_t i;
1402
1403     for(i=0; i<UPRV_LENGTHOF(codeunit); i++){
1404         UChar c=codeunit[i];
1405         if(i<4){
1406             if(!(U16_IS_SINGLE(c)) || (U16_IS_LEAD(c)) || (U16_IS_TRAIL(c)) ||
1407                     U16_IS_SURROGATE(c) || U_IS_SURROGATE(c)) {
1408                 log_err("ERROR: U+%04x is a single", c);
1409             }
1410
1411         }
1412         if(i >= 4 && i< 8){
1413             if(!(U16_IS_LEAD(c)) || U16_IS_SINGLE(c) || U16_IS_TRAIL(c) ||
1414                     !U16_IS_SURROGATE(c) || !U_IS_SURROGATE(c)){
1415                 log_err("ERROR: U+%04x is a first surrogate", c);
1416             }
1417         }
1418         if(i >= 8 && i< 12){
1419             if(!(U16_IS_TRAIL(c)) || U16_IS_SINGLE(c) || U16_IS_LEAD(c) ||
1420                     !U16_IS_SURROGATE(c) || !U_IS_SURROGATE(c)){
1421                 log_err("ERROR: U+%04x is a second surrogate", c);
1422             }
1423         }
1424 #if !U_HIDE_OBSOLETE_UTF_OLD_H
1425         if(i<4){
1426             if(!(UTF_IS_SINGLE(c)) || (UTF_IS_LEAD(c)) || (UTF_IS_TRAIL(c)) ||(UTF_IS_SURROGATE(c))){
1427                 log_err("ERROR: U+%04x is a single", c);
1428             }
1429
1430         }
1431         if(i >= 4 && i< 8){
1432             if(!(UTF_IS_LEAD(c)) || UTF_IS_SINGLE(c) || UTF_IS_TRAIL(c) || !(UTF_IS_SURROGATE(c))){
1433                 log_err("ERROR: U+%04x is a first surrogate", c);
1434             }
1435         }
1436         if(i >= 8 && i< 12){
1437             if(!(UTF_IS_TRAIL(c)) || UTF_IS_SINGLE(c) || UTF_IS_LEAD(c) || !(UTF_IS_SURROGATE(c))){
1438                 log_err("ERROR: U+%04x is a second surrogate", c);
1439             }
1440         }
1441 #endif
1442     }
1443 }
1444
1445 static void TestCodePoint(){
1446     const UChar32 codePoint[]={
1447         /*surrogate, notvalid(codepoint), not a UnicodeChar, not Error */
1448         0xd800,
1449         0xdbff,
1450         0xdc00,
1451         0xdfff,
1452         0xdc04,
1453         0xd821,
1454         /*not a surrogate, valid, isUnicodeChar , not Error*/
1455         0x20ac,
1456         0xd7ff,
1457         0xe000,
1458         0xe123,
1459         0x0061,
1460         0xe065,
1461         0x20402,
1462         0x24506,
1463         0x23456,
1464         0x20402,
1465         0x10402,
1466         0x23456,
1467         /*not a surrogate, not valid, isUnicodeChar, isError */
1468         0x0015,
1469         0x009f,
1470         /*not a surrogate, not valid, not isUnicodeChar, isError */
1471         0xffff,
1472         0xfffe,
1473     };
1474     int32_t i;
1475     for(i=0; i<UPRV_LENGTHOF(codePoint); i++) {
1476         UChar32 c=codePoint[i];
1477         if(i<6) {
1478             if(!U_IS_SURROGATE(c) || !U16_IS_SURROGATE(c)) {
1479                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1480             }
1481             if(U_IS_UNICODE_CHAR(c)) {
1482                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1483             }
1484         } else if(i >=6 && i<18) {
1485             if(U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)) {
1486                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1487             }
1488             if(!U_IS_UNICODE_CHAR(c)) {
1489                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1490             }
1491         } else if(i >=18 && i<20) {
1492             if(U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)) {
1493                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1494             }
1495             if(!U_IS_UNICODE_CHAR(c)) {
1496                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1497             }
1498         } else if(i >=18 && i<UPRV_LENGTHOF(codePoint)) {
1499             if(U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)) {
1500                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1501             }
1502             if(U_IS_UNICODE_CHAR(c)) {
1503                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1504             }
1505         }
1506 #if !U_HIDE_OBSOLETE_UTF_OLD_H
1507         if(i<6){
1508             if(!UTF_IS_SURROGATE(c)){
1509                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1510             }
1511             if(UTF_IS_VALID(c)){
1512                 log_err("ERROR: isValid() failed for U+%04x\n", c);
1513             }
1514             if(UTF_IS_UNICODE_CHAR(c)){
1515                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1516             }
1517             if(UTF_IS_ERROR(c)){
1518                 log_err("ERROR: isError() failed for U+%04x\n", c);
1519             }
1520         }else if(i >=6 && i<18){
1521             if(UTF_IS_SURROGATE(c)){
1522                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1523             }
1524             if(!UTF_IS_VALID(c)){
1525                 log_err("ERROR: isValid() failed for U+%04x\n", c);
1526             }
1527             if(!UTF_IS_UNICODE_CHAR(c)){
1528                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1529             }
1530             if(UTF_IS_ERROR(c)){
1531                 log_err("ERROR: isError() failed for U+%04x\n", c);
1532             }
1533         }else if(i >=18 && i<20){
1534             if(UTF_IS_SURROGATE(c)){
1535                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1536             }
1537             if(UTF_IS_VALID(c)){
1538                 log_err("ERROR: isValid() failed for U+%04x\n", c);
1539             }
1540             if(!UTF_IS_UNICODE_CHAR(c)){
1541                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1542             }
1543             if(!UTF_IS_ERROR(c)){
1544                 log_err("ERROR: isError() failed for U+%04x\n", c);
1545             }
1546         }
1547         else if(i >=18 && i<UPRV_LENGTHOF(codePoint)){
1548             if(UTF_IS_SURROGATE(c)){
1549                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1550             }
1551             if(UTF_IS_VALID(c)){
1552                 log_err("ERROR: isValid() failed for U+%04x\n", c);
1553             }
1554             if(UTF_IS_UNICODE_CHAR(c)){
1555                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1556             }
1557             if(!UTF_IS_ERROR(c)){
1558                 log_err("ERROR: isError() failed for U+%04x\n", c);
1559             }
1560         }
1561 #endif
1562     }
1563
1564     if(
1565         !U_IS_BMP(0) || !U_IS_BMP(0x61) || !U_IS_BMP(0x20ac) ||
1566         !U_IS_BMP(0xd9da) || !U_IS_BMP(0xdfed) || !U_IS_BMP(0xffff) ||
1567         U_IS_BMP(U_SENTINEL) || U_IS_BMP(0x10000) || U_IS_BMP(0x50005) ||
1568         U_IS_BMP(0x10ffff) || U_IS_BMP(0x110000) || U_IS_BMP(0x7fffffff)
1569     ) {
1570         log_err("error with U_IS_BMP()\n");
1571     }
1572
1573     if(
1574         U_IS_SUPPLEMENTARY(0) || U_IS_SUPPLEMENTARY(0x61) || U_IS_SUPPLEMENTARY(0x20ac) ||
1575         U_IS_SUPPLEMENTARY(0xd9da) || U_IS_SUPPLEMENTARY(0xdfed) || U_IS_SUPPLEMENTARY(0xffff) ||
1576         U_IS_SUPPLEMENTARY(U_SENTINEL) || !U_IS_SUPPLEMENTARY(0x10000) || !U_IS_SUPPLEMENTARY(0x50005) ||
1577         !U_IS_SUPPLEMENTARY(0x10ffff) || U_IS_SUPPLEMENTARY(0x110000) || U_IS_SUPPLEMENTARY(0x7fffffff)
1578     ) {
1579         log_err("error with U_IS_SUPPLEMENTARY()\n");
1580     }
1581 }
1582
1583 static void TestCharLength()
1584 {
1585     const int32_t codepoint[]={
1586         1, 0x0061,
1587         1, 0xe065,
1588         1, 0x20ac,
1589         2, 0x20402,
1590         2, 0x23456,
1591         2, 0x24506,
1592         2, 0x20402,
1593         2, 0x10402,
1594         1, 0xd7ff,
1595         1, 0xe000
1596     };
1597
1598     int32_t i;
1599 #if !U_HIDE_OBSOLETE_UTF_OLD_H
1600     UBool multiple;
1601 #endif
1602     for(i=0; i<UPRV_LENGTHOF(codepoint); i=(int16_t)(i+2)){
1603         UChar32 c=codepoint[i+1];
1604         if(
1605 #if !U_HIDE_OBSOLETE_UTF_OLD_H
1606                 UTF_CHAR_LENGTH(c) != codepoint[i] ||
1607 #endif
1608                 U16_LENGTH(c) != codepoint[i]) {
1609             log_err("The no: of code units for U+%04x:- Expected: %d Got: %d\n", c, codepoint[i], U16_LENGTH(c));
1610         }
1611 #if !U_HIDE_OBSOLETE_UTF_OLD_H
1612         multiple=(UBool)(codepoint[i] == 1 ? FALSE : TRUE);
1613         if(UTF_NEED_MULTIPLE_UCHAR(c) != multiple){
1614             log_err("ERROR: Unicode::needMultipleUChar() failed for U+%04x\n", c);
1615         }
1616 #endif
1617     }
1618 }
1619
1620 /*internal functions ----*/
1621 static int32_t MakeProp(char* str)
1622 {
1623     int32_t result = 0;
1624     char* matchPosition =0;
1625
1626     matchPosition = strstr(tagStrings, str);
1627     if (matchPosition == 0)
1628     {
1629         log_err("unrecognized type letter ");
1630         log_err(str);
1631     }
1632     else
1633         result = (int32_t)((matchPosition - tagStrings) / 2);
1634     return result;
1635 }
1636
1637 static int32_t MakeDir(char* str)
1638 {
1639     int32_t pos = 0;
1640     for (pos = 0; pos < U_CHAR_DIRECTION_COUNT; pos++) {
1641         if (strcmp(str, dirStrings[pos]) == 0) {
1642             return pos;
1643         }
1644     }
1645     return -1;
1646 }
1647
1648 /* test u_charName() -------------------------------------------------------- */
1649
1650 static const struct {
1651     uint32_t code;
1652     const char *name, *oldName, *extName, *alias;
1653 } names[]={
1654     {0x0061, "LATIN SMALL LETTER A", "", "LATIN SMALL LETTER A"},
1655     {0x01a2, "LATIN CAPITAL LETTER OI", "",
1656              "LATIN CAPITAL LETTER OI",
1657              "LATIN CAPITAL LETTER GHA"},
1658     {0x0284, "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK", "",
1659              "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK" },
1660     {0x0fd0, "TIBETAN MARK BSKA- SHOG GI MGO RGYAN", "",
1661              "TIBETAN MARK BSKA- SHOG GI MGO RGYAN",
1662              "TIBETAN MARK BKA- SHOG GI MGO RGYAN"},
1663     {0x3401, "CJK UNIFIED IDEOGRAPH-3401", "", "CJK UNIFIED IDEOGRAPH-3401" },
1664     {0x7fed, "CJK UNIFIED IDEOGRAPH-7FED", "", "CJK UNIFIED IDEOGRAPH-7FED" },
1665     {0xac00, "HANGUL SYLLABLE GA", "", "HANGUL SYLLABLE GA" },
1666     {0xd7a3, "HANGUL SYLLABLE HIH", "", "HANGUL SYLLABLE HIH" },
1667     {0xd800, "", "", "<lead surrogate-D800>" },
1668     {0xdc00, "", "", "<trail surrogate-DC00>" },
1669     {0xff08, "FULLWIDTH LEFT PARENTHESIS", "", "FULLWIDTH LEFT PARENTHESIS" },
1670     {0xffe5, "FULLWIDTH YEN SIGN", "", "FULLWIDTH YEN SIGN" },
1671     {0xffff, "", "", "<noncharacter-FFFF>" },
1672     {0x1d0c5, "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS", "",
1673               "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS",
1674               "BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS"},
1675     {0x23456, "CJK UNIFIED IDEOGRAPH-23456", "", "CJK UNIFIED IDEOGRAPH-23456" }
1676 };
1677
1678 static UBool
1679 enumCharNamesFn(void *context,
1680                 UChar32 code, UCharNameChoice nameChoice,
1681                 const char *name, int32_t length) {
1682     int32_t *pCount=(int32_t *)context;
1683     const char *expected;
1684     int i;
1685
1686     if(length<=0 || length!=(int32_t)strlen(name)) {
1687         /* should not be called with an empty string or invalid length */
1688         log_err("u_enumCharName(0x%lx)=%s but length=%ld\n", name, length);
1689         return TRUE;
1690     }
1691
1692     ++*pCount;
1693     for(i=0; i<UPRV_LENGTHOF(names); ++i) {
1694         if(code==(UChar32)names[i].code) {
1695             switch (nameChoice) {
1696                 case U_EXTENDED_CHAR_NAME:
1697                     if(0!=strcmp(name, names[i].extName)) {
1698                         log_err("u_enumCharName(0x%lx - Extended)=%s instead of %s\n", code, name, names[i].extName);
1699                     }
1700                     break;
1701                 case U_UNICODE_CHAR_NAME:
1702                     if(0!=strcmp(name, names[i].name)) {
1703                         log_err("u_enumCharName(0x%lx)=%s instead of %s\n", code, name, names[i].name);
1704                     }
1705                     break;
1706                 case U_UNICODE_10_CHAR_NAME:
1707                     expected=names[i].oldName;
1708                     if(expected[0]==0 || 0!=strcmp(name, expected)) {
1709                         log_err("u_enumCharName(0x%lx - 1.0)=%s instead of %s\n", code, name, expected);
1710                     }
1711                     break;
1712                 case U_CHAR_NAME_ALIAS:
1713                     expected=names[i].alias;
1714                     if(expected==NULL || expected[0]==0 || 0!=strcmp(name, expected)) {
1715                         log_err("u_enumCharName(0x%lx - alias)=%s instead of %s\n", code, name, expected);
1716                     }
1717                     break;
1718                 case U_CHAR_NAME_CHOICE_COUNT:
1719                     break;
1720             }
1721             break;
1722         }
1723     }
1724     return TRUE;
1725 }
1726
1727 struct enumExtCharNamesContext {
1728     uint32_t length;
1729     int32_t last;
1730 };
1731
1732 static UBool
1733 enumExtCharNamesFn(void *context,
1734                 UChar32 code, UCharNameChoice nameChoice,
1735                 const char *name, int32_t length) {
1736     struct enumExtCharNamesContext *ecncp = (struct enumExtCharNamesContext *) context;
1737
1738     if (ecncp->last != (int32_t) code - 1) {
1739         if (ecncp->last < 0) {
1740             log_err("u_enumCharName(0x%lx - Ext) after u_enumCharName(0x%lx - Ext) instead of u_enumCharName(0x%lx - Ext)\n", code, ecncp->last, ecncp->last + 1);
1741         } else {
1742             log_err("u_enumCharName(0x%lx - Ext) instead of u_enumCharName(0x0 - Ext)\n", code);
1743         }
1744     }
1745     ecncp->last = (int32_t) code;
1746
1747     if (!*name) {
1748         log_err("u_enumCharName(0x%lx - Ext) should not be an empty string\n", code);
1749     }
1750
1751     return enumCharNamesFn(&ecncp->length, code, nameChoice, name, length);
1752 }
1753
1754 /**
1755  * This can be made more efficient by moving it into putil.c and having
1756  * it directly access the ebcdic translation tables.
1757  * TODO: If we get this method in putil.c, then delete it from here.
1758  */
1759 static UChar
1760 u_charToUChar(char c) {
1761     UChar uc;
1762     u_charsToUChars(&c, &uc, 1);
1763     return uc;
1764 }
1765
1766 static void
1767 TestCharNames() {
1768     static char name[80];
1769     UErrorCode errorCode=U_ZERO_ERROR;
1770     struct enumExtCharNamesContext extContext;
1771     const char *expected;
1772     int32_t length;
1773     UChar32 c;
1774     int32_t i;
1775
1776     log_verbose("Testing uprv_getMaxCharNameLength()\n");
1777     length=uprv_getMaxCharNameLength();
1778     if(length==0) {
1779         /* no names data available */
1780         return;
1781     }
1782     if(length<83) { /* Unicode 3.2 max char name length */
1783         log_err("uprv_getMaxCharNameLength()=%d is too short");
1784     }
1785     /* ### TODO same tests for max ISO comment length as for max name length */
1786
1787     log_verbose("Testing u_charName()\n");
1788     for(i=0; i<UPRV_LENGTHOF(names); ++i) {
1789         /* modern Unicode character name */
1790         length=u_charName(names[i].code, U_UNICODE_CHAR_NAME, name, sizeof(name), &errorCode);
1791         if(U_FAILURE(errorCode)) {
1792             log_err("u_charName(0x%lx) error %s\n", names[i].code, u_errorName(errorCode));
1793             return;
1794         }
1795         if(length<0 || 0!=strcmp(name, names[i].name) || length!=(uint16_t)strlen(name)) {
1796             log_err("u_charName(0x%lx) gets: %s (length %ld) instead of: %s\n", names[i].code, name, length, names[i].name);
1797         }
1798
1799         /* find the modern name */
1800         if (*names[i].name) {
1801             c=u_charFromName(U_UNICODE_CHAR_NAME, names[i].name, &errorCode);
1802             if(U_FAILURE(errorCode)) {
1803                 log_err("u_charFromName(%s) error %s\n", names[i].name, u_errorName(errorCode));
1804                 return;
1805             }
1806             if(c!=(UChar32)names[i].code) {
1807                 log_err("u_charFromName(%s) gets 0x%lx instead of 0x%lx\n", names[i].name, c, names[i].code);
1808             }
1809         }
1810
1811         /* Unicode 1.0 character name */
1812         length=u_charName(names[i].code, U_UNICODE_10_CHAR_NAME, name, sizeof(name), &errorCode);
1813         if(U_FAILURE(errorCode)) {
1814             log_err("u_charName(0x%lx - 1.0) error %s\n", names[i].code, u_errorName(errorCode));
1815             return;
1816         }
1817         if(length<0 || (length>0 && 0!=strcmp(name, names[i].oldName)) || length!=(uint16_t)strlen(name)) {
1818             log_err("u_charName(0x%lx - 1.0) gets %s length %ld instead of nothing or %s\n", names[i].code, name, length, names[i].oldName);
1819         }
1820
1821         /* find the Unicode 1.0 name if it is stored (length>0 means that we could read it) */
1822         if(names[i].oldName[0]!=0 /* && length>0 */) {
1823             c=u_charFromName(U_UNICODE_10_CHAR_NAME, names[i].oldName, &errorCode);
1824             if(U_FAILURE(errorCode)) {
1825                 log_err("u_charFromName(%s - 1.0) error %s\n", names[i].oldName, u_errorName(errorCode));
1826                 return;
1827             }
1828             if(c!=(UChar32)names[i].code) {
1829                 log_err("u_charFromName(%s - 1.0) gets 0x%lx instead of 0x%lx\n", names[i].oldName, c, names[i].code);
1830             }
1831         }
1832
1833         /* Unicode character name alias */
1834         length=u_charName(names[i].code, U_CHAR_NAME_ALIAS, name, sizeof(name), &errorCode);
1835         if(U_FAILURE(errorCode)) {
1836             log_err("u_charName(0x%lx - alias) error %s\n", names[i].code, u_errorName(errorCode));
1837             return;
1838         }
1839         expected=names[i].alias;
1840         if(expected==NULL) {
1841             expected="";
1842         }
1843         if(length<0 || (length>0 && 0!=strcmp(name, expected)) || length!=(uint16_t)strlen(name)) {
1844             log_err("u_charName(0x%lx - alias) gets %s length %ld instead of nothing or %s\n",
1845                     names[i].code, name, length, expected);
1846         }
1847
1848         /* find the Unicode character name alias if it is stored (length>0 means that we could read it) */
1849         if(expected[0]!=0 /* && length>0 */) {
1850             c=u_charFromName(U_CHAR_NAME_ALIAS, expected, &errorCode);
1851             if(U_FAILURE(errorCode)) {
1852                 log_err("u_charFromName(%s - alias) error %s\n",
1853                         expected, u_errorName(errorCode));
1854                 return;
1855             }
1856             if(c!=(UChar32)names[i].code) {
1857                 log_err("u_charFromName(%s - alias) gets 0x%lx instead of 0x%lx\n",
1858                         expected, c, names[i].code);
1859             }
1860         }
1861     }
1862
1863     /* test u_enumCharNames() */
1864     length=0;
1865     errorCode=U_ZERO_ERROR;
1866     u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumCharNamesFn, &length, U_UNICODE_CHAR_NAME, &errorCode);
1867     if(U_FAILURE(errorCode) || length<94140) {
1868         log_err("u_enumCharNames(%ld..%lx) error %s names count=%ld\n", UCHAR_MIN_VALUE, UCHAR_MAX_VALUE, u_errorName(errorCode), length);
1869     }
1870
1871     extContext.length = 0;
1872     extContext.last = -1;
1873     errorCode=U_ZERO_ERROR;
1874     u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumExtCharNamesFn, &extContext, U_EXTENDED_CHAR_NAME, &errorCode);
1875     if(U_FAILURE(errorCode) || extContext.length<UCHAR_MAX_VALUE + 1) {
1876         log_err("u_enumCharNames(%ld..0x%lx - Extended) error %s names count=%ld\n", UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, u_errorName(errorCode), extContext.length);
1877     }
1878
1879     /* test that u_charFromName() uppercases the input name, i.e., works with mixed-case names (new in 2.0) */
1880     if(0x61!=u_charFromName(U_UNICODE_CHAR_NAME, "LATin smALl letTER A", &errorCode)) {
1881         log_err("u_charFromName(U_UNICODE_CHAR_NAME, \"LATin smALl letTER A\") did not find U+0061 (%s)\n", u_errorName(errorCode));
1882     }
1883
1884     /* Test getCharNameCharacters */
1885     if(!getTestOption(QUICK_OPTION)) {
1886         enum { BUFSIZE = 256 };
1887         UErrorCode ec = U_ZERO_ERROR;
1888         char buf[BUFSIZE];
1889         int32_t maxLength;
1890         UChar32 cp;
1891         UChar pat[BUFSIZE], dumbPat[BUFSIZE];
1892         int32_t l1, l2;
1893         UBool map[256];
1894         UBool ok;
1895
1896         USet* set = uset_open(1, 0); /* empty set */
1897         USet* dumb = uset_open(1, 0); /* empty set */
1898
1899         /*
1900          * uprv_getCharNameCharacters() will likely return more lowercase
1901          * letters than actual character names contain because
1902          * it includes all the characters in lowercased names of
1903          * general categories, for the full possible set of extended names.
1904          */
1905         {
1906             USetAdder sa={
1907                 NULL,
1908                 uset_add,
1909                 uset_addRange,
1910                 uset_addString,
1911                 NULL /* don't need remove() */
1912             };
1913             sa.set=set;
1914             uprv_getCharNameCharacters(&sa);
1915         }
1916
1917         /* build set the dumb (but sure-fire) way */
1918         for (i=0; i<256; ++i) {
1919             map[i] = FALSE;
1920         }
1921
1922         maxLength=0;
1923         for (cp=0; cp<0x110000; ++cp) {
1924             int32_t len = u_charName(cp, U_EXTENDED_CHAR_NAME,
1925                                      buf, BUFSIZE, &ec);
1926             if (U_FAILURE(ec)) {
1927                 log_err("FAIL: u_charName failed when it shouldn't\n");
1928                 uset_close(set);
1929                 uset_close(dumb);
1930                 return;
1931             }
1932             if(len>maxLength) {
1933                 maxLength=len;
1934             }
1935
1936             for (i=0; i<len; ++i) {
1937                 if (!map[(uint8_t) buf[i]]) {
1938                     uset_add(dumb, (UChar32)u_charToUChar(buf[i]));
1939                     map[(uint8_t) buf[i]] = TRUE;
1940                 }
1941             }
1942
1943             /* test for leading/trailing whitespace */
1944             if(buf[0]==' ' || buf[0]=='\t' || buf[len-1]==' ' || buf[len-1]=='\t') {
1945                 log_err("u_charName(U+%04x) returns a name with leading or trailing whitespace\n", cp);
1946             }
1947         }
1948
1949         if(map[(uint8_t)'\t']) {
1950             log_err("u_charName() returned a name with a TAB for some code point\n", cp);
1951         }
1952
1953         length=uprv_getMaxCharNameLength();
1954         if(length!=maxLength) {
1955             log_err("uprv_getMaxCharNameLength()=%d differs from the maximum length %d of all extended names\n",
1956                     length, maxLength);
1957         }
1958
1959         /* compare the sets.  Where is my uset_equals?!! */
1960         ok=TRUE;
1961         for(i=0; i<256; ++i) {
1962             if(uset_contains(set, i)!=uset_contains(dumb, i)) {
1963                 if(0x61<=i && i<=0x7a /* a-z */ && uset_contains(set, i) && !uset_contains(dumb, i)) {
1964                     /* ignore lowercase a-z that are in set but not in dumb */
1965                     ok=TRUE;
1966                 } else {
1967                     ok=FALSE;
1968                     break;
1969                 }
1970             }
1971         }
1972
1973         l1 = uset_toPattern(set, pat, BUFSIZE, TRUE, &ec);
1974         l2 = uset_toPattern(dumb, dumbPat, BUFSIZE, TRUE, &ec);
1975         if (U_FAILURE(ec)) {
1976             log_err("FAIL: uset_toPattern failed when it shouldn't\n");
1977             uset_close(set);
1978             uset_close(dumb);
1979             return;
1980         }
1981
1982         if (l1 >= BUFSIZE) {
1983             l1 = BUFSIZE-1;
1984             pat[l1] = 0;
1985         }
1986         if (l2 >= BUFSIZE) {
1987             l2 = BUFSIZE-1;
1988             dumbPat[l2] = 0;
1989         }
1990
1991         if (!ok) {
1992             log_err("FAIL: uprv_getCharNameCharacters() returned %s, expected %s (too many lowercase a-z are ok)\n",
1993                     aescstrdup(pat, l1), aescstrdup(dumbPat, l2));
1994         } else if(getTestOption(VERBOSITY_OPTION)) {
1995             log_verbose("Ok: uprv_getCharNameCharacters() returned %s\n", aescstrdup(pat, l1));
1996         }
1997
1998         uset_close(set);
1999         uset_close(dumb);
2000     }
2001
2002     /* ### TODO: test error cases and other interesting things */
2003 }
2004
2005 static void
2006 TestUCharFromNameUnderflow() {
2007     // Ticket #10889: Underflow crash when there is no dash.
2008     const char *name="<NO BREAK SPACE>";
2009     UErrorCode errorCode=U_ZERO_ERROR;
2010     UChar32 c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode);
2011     if(U_SUCCESS(errorCode)) {
2012         log_err("u_charFromName(%s) = U+%04x but should fail - %s\n",
2013                 name, c, u_errorName(errorCode));
2014     }
2015
2016     // Test related edge cases.
2017     name="<-00a0>";
2018     errorCode=U_ZERO_ERROR;
2019     c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode);
2020     if(U_SUCCESS(errorCode)) {
2021         log_err("u_charFromName(%s) = U+%04x but should fail - %s\n",
2022                 name, c, u_errorName(errorCode));
2023     }
2024
2025     errorCode=U_ZERO_ERROR;
2026     name="<control->";
2027     c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode);
2028     if(U_SUCCESS(errorCode)) {
2029         log_err("u_charFromName(%s) = U+%04x but should fail - %s\n",
2030                 name, c, u_errorName(errorCode));
2031     }
2032
2033     errorCode=U_ZERO_ERROR;
2034     name="<control-111111>";
2035     c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode);
2036     if(U_SUCCESS(errorCode)) {
2037         log_err("u_charFromName(%s) = U+%04x but should fail - %s\n",
2038                 name, c, u_errorName(errorCode));
2039     }
2040
2041     // ICU-20292: integer overflow
2042     errorCode=U_ZERO_ERROR;
2043     name="<noncharacter-10010FFFF>";
2044     c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode);
2045     if(U_SUCCESS(errorCode)) {
2046         log_err("u_charFromName(%s) = U+%04x but should fail - %s\n",
2047                 name, c, u_errorName(errorCode));
2048     }
2049
2050     errorCode=U_ZERO_ERROR;
2051     name="<noncharacter-00010FFFF>";  // too many digits even if only leading 0s
2052     c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode);
2053     if(U_SUCCESS(errorCode)) {
2054         log_err("u_charFromName(%s) = U+%04x but should fail - %s\n",
2055                 name, c, u_errorName(errorCode));
2056     }
2057
2058     errorCode=U_ZERO_ERROR;
2059     name="<noncharacter-fFFf>>";
2060     c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode);
2061     if(U_SUCCESS(errorCode)) {
2062         log_err("u_charFromName(%s) = U+%04x but should fail - %s\n",
2063                 name, c, u_errorName(errorCode));
2064     }
2065 }
2066
2067 /* test u_isMirrored() and u_charMirror() ----------------------------------- */
2068
2069 static void
2070 TestMirroring() {
2071     USet *set;
2072     UErrorCode errorCode;
2073
2074     UChar32 start, end, c2, c3;
2075     int32_t i;
2076
2077     U_STRING_DECL(mirroredPattern, "[:Bidi_Mirrored:]", 17);
2078
2079     U_STRING_INIT(mirroredPattern, "[:Bidi_Mirrored:]", 17);
2080
2081     log_verbose("Testing u_isMirrored()\n");
2082     if(!(u_isMirrored(0x28) && u_isMirrored(0xbb) && u_isMirrored(0x2045) && u_isMirrored(0x232a) &&
2083          !u_isMirrored(0x27) && !u_isMirrored(0x61) && !u_isMirrored(0x284) && !u_isMirrored(0x3400)
2084         )
2085     ) {
2086         log_err("u_isMirrored() does not work correctly\n");
2087     }
2088
2089     log_verbose("Testing u_charMirror()\n");
2090     if(!(u_charMirror(0x3c)==0x3e && u_charMirror(0x5d)==0x5b && u_charMirror(0x208d)==0x208e && u_charMirror(0x3017)==0x3016 &&
2091          u_charMirror(0xbb)==0xab && u_charMirror(0x2215)==0x29F5 && u_charMirror(0x29F5)==0x2215 && /* large delta between the code points */
2092          u_charMirror(0x2e)==0x2e && u_charMirror(0x6f3)==0x6f3 && u_charMirror(0x301c)==0x301c && u_charMirror(0xa4ab)==0xa4ab &&
2093          /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */
2094          u_charMirror(0x2018)==0x2018 && u_charMirror(0x201b)==0x201b && u_charMirror(0x301d)==0x301d
2095          )
2096     ) {
2097         log_err("u_charMirror() does not work correctly\n");
2098     }
2099
2100     /* verify that Bidi_Mirroring_Glyph roundtrips */
2101     errorCode=U_ZERO_ERROR;
2102     set=uset_openPattern(mirroredPattern, 17, &errorCode);
2103
2104     if (U_FAILURE(errorCode)) {
2105         log_data_err("uset_openPattern(mirroredPattern, 17, &errorCode) failed!\n");
2106     } else {
2107         for(i=0; 0==uset_getItem(set, i, &start, &end, NULL, 0, &errorCode); ++i) {
2108             do {
2109                 c2=u_charMirror(start);
2110                 c3=u_charMirror(c2);
2111                 if(c3!=start) {
2112                     log_err("u_charMirror() does not roundtrip: U+%04lx->U+%04lx->U+%04lx\n", (long)start, (long)c2, (long)c3);
2113                 }
2114                 c3=u_getBidiPairedBracket(start);
2115                 if(u_getIntPropertyValue(start, UCHAR_BIDI_PAIRED_BRACKET_TYPE)==U_BPT_NONE) {
2116                     if(c3!=start) {
2117                         log_err("u_getBidiPairedBracket(U+%04lx) != self for bpt(c)==None\n",
2118                                 (long)start);
2119                     }
2120                 } else {
2121                     if(c3!=c2) {
2122                         log_err("u_getBidiPairedBracket(U+%04lx) != U+%04lx = bmg(c)'\n",
2123                                 (long)start, (long)c2);
2124                     }
2125                 }
2126             } while(++start<=end);
2127         }
2128     }
2129
2130     uset_close(set);
2131 }
2132
2133
2134 struct RunTestData
2135 {
2136     const char *runText;
2137     UScriptCode runCode;
2138 };
2139
2140 typedef struct RunTestData RunTestData;
2141
2142 static void
2143 CheckScriptRuns(UScriptRun *scriptRun, int32_t *runStarts, const RunTestData *testData, int32_t nRuns,
2144                 const char *prefix)
2145 {
2146     int32_t run, runStart, runLimit;
2147     UScriptCode runCode;
2148
2149     /* iterate over all the runs */
2150     run = 0;
2151     while (uscript_nextRun(scriptRun, &runStart, &runLimit, &runCode)) {
2152         if (runStart != runStarts[run]) {
2153             log_err("%s: incorrect start offset for run %d: expected %d, got %d\n",
2154                 prefix, run, runStarts[run], runStart);
2155         }
2156
2157         if (runLimit != runStarts[run + 1]) {
2158             log_err("%s: incorrect limit offset for run %d: expected %d, got %d\n",
2159                 prefix, run, runStarts[run + 1], runLimit);
2160         }
2161
2162         if (runCode != testData[run].runCode) {
2163             log_err("%s: incorrect script for run %d: expected \"%s\", got \"%s\"\n",
2164                 prefix, run, uscript_getName(testData[run].runCode), uscript_getName(runCode));
2165         }
2166
2167         run += 1;
2168
2169         /* stop when we've seen all the runs we expect to see */
2170         if (run >= nRuns) {
2171             break;
2172         }
2173     }
2174
2175     /* Complain if we didn't see then number of runs we expected */
2176     if (run != nRuns) {
2177         log_err("%s: incorrect number of runs: expected %d, got %d\n", prefix, run, nRuns);
2178     }
2179 }
2180
2181 static void
2182 TestUScriptRunAPI()
2183 {
2184     static const RunTestData testData1[] = {
2185         {"\\u0020\\u0946\\u0939\\u093F\\u0928\\u094D\\u0926\\u0940\\u0020", USCRIPT_DEVANAGARI},
2186         {"\\u0627\\u0644\\u0639\\u0631\\u0628\\u064A\\u0629\\u0020", USCRIPT_ARABIC},
2187         {"\\u0420\\u0443\\u0441\\u0441\\u043A\\u0438\\u0439\\u0020", USCRIPT_CYRILLIC},
2188         {"English (", USCRIPT_LATIN},
2189         {"\\u0E44\\u0E17\\u0E22", USCRIPT_THAI},
2190         {") ", USCRIPT_LATIN},
2191         {"\\u6F22\\u5B75", USCRIPT_HAN},
2192         {"\\u3068\\u3072\\u3089\\u304C\\u306A\\u3068", USCRIPT_HIRAGANA},
2193         {"\\u30AB\\u30BF\\u30AB\\u30CA", USCRIPT_KATAKANA},
2194         {"\\U00010400\\U00010401\\U00010402\\U00010403", USCRIPT_DESERET}
2195     };
2196
2197     static const RunTestData testData2[] = {
2198        {"((((((((((abc))))))))))", USCRIPT_LATIN}
2199     };
2200
2201     static const struct {
2202       const RunTestData *testData;
2203       int32_t nRuns;
2204     } testDataEntries[] = {
2205         {testData1, UPRV_LENGTHOF(testData1)},
2206         {testData2, UPRV_LENGTHOF(testData2)}
2207     };
2208
2209     static const int32_t nTestEntries = UPRV_LENGTHOF(testDataEntries);
2210     int32_t testEntry;
2211
2212     for (testEntry = 0; testEntry < nTestEntries; testEntry += 1) {
2213         UChar testString[1024];
2214         int32_t runStarts[256];
2215         int32_t nTestRuns = testDataEntries[testEntry].nRuns;
2216         const RunTestData *testData = testDataEntries[testEntry].testData;
2217
2218         int32_t run, stringLimit;
2219         UScriptRun *scriptRun = NULL;
2220         UErrorCode err;
2221
2222         /*
2223          * Fill in the test string and the runStarts array.
2224          */
2225         stringLimit = 0;
2226         for (run = 0; run < nTestRuns; run += 1) {
2227             runStarts[run] = stringLimit;
2228             stringLimit += u_unescape(testData[run].runText, &testString[stringLimit], 1024 - stringLimit);
2229             /*stringLimit -= 1;*/
2230         }
2231
2232         /* The limit of the last run */
2233         runStarts[nTestRuns] = stringLimit;
2234
2235         /*
2236          * Make sure that calling uscript_OpenRun with a NULL text pointer
2237          * and a non-zero text length returns the correct error.
2238          */
2239         err = U_ZERO_ERROR;
2240         scriptRun = uscript_openRun(NULL, stringLimit, &err);
2241
2242         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2243             log_err("uscript_openRun(NULL, stringLimit, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2244         }
2245
2246         if (scriptRun != NULL) {
2247             log_err("uscript_openRun(NULL, stringLimit, &err) returned a non-NULL result.\n");
2248             uscript_closeRun(scriptRun);
2249         }
2250
2251         /*
2252          * Make sure that calling uscript_OpenRun with a non-NULL text pointer
2253          * and a zero text length returns the correct error.
2254          */
2255         err = U_ZERO_ERROR;
2256         scriptRun = uscript_openRun(testString, 0, &err);
2257
2258         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2259             log_err("uscript_openRun(testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2260         }
2261
2262         if (scriptRun != NULL) {
2263             log_err("uscript_openRun(testString, 0, &err) returned a non-NULL result.\n");
2264             uscript_closeRun(scriptRun);
2265         }
2266
2267         /*
2268          * Make sure that calling uscript_openRun with a NULL text pointer
2269          * and a zero text length doesn't return an error.
2270          */
2271         err = U_ZERO_ERROR;
2272         scriptRun = uscript_openRun(NULL, 0, &err);
2273
2274         if (U_FAILURE(err)) {
2275             log_err("Got error %s from uscript_openRun(NULL, 0, &err)\n", u_errorName(err));
2276         }
2277
2278         /* Make sure that the empty iterator doesn't find any runs */
2279         if (uscript_nextRun(scriptRun, NULL, NULL, NULL)) {
2280             log_err("uscript_nextRun(...) returned TRUE for an empty iterator.\n");
2281         }
2282
2283         /*
2284          * Make sure that calling uscript_setRunText with a NULL text pointer
2285          * and a non-zero text length returns the correct error.
2286          */
2287         err = U_ZERO_ERROR;
2288         uscript_setRunText(scriptRun, NULL, stringLimit, &err);
2289
2290         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2291             log_err("uscript_setRunText(scriptRun, NULL, stringLimit, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2292         }
2293
2294         /*
2295          * Make sure that calling uscript_OpenRun with a non-NULL text pointer
2296          * and a zero text length returns the correct error.
2297          */
2298         err = U_ZERO_ERROR;
2299         uscript_setRunText(scriptRun, testString, 0, &err);
2300
2301         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2302             log_err("uscript_setRunText(scriptRun, testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2303         }
2304
2305         /*
2306          * Now call uscript_setRunText on the empty iterator
2307          * and make sure that it works.
2308          */
2309         err = U_ZERO_ERROR;
2310         uscript_setRunText(scriptRun, testString, stringLimit, &err);
2311
2312         if (U_FAILURE(err)) {
2313             log_err("Got error %s from uscript_setRunText(...)\n", u_errorName(err));
2314         } else {
2315             CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_setRunText");
2316         }
2317
2318         uscript_closeRun(scriptRun);
2319
2320         /*
2321          * Now open an interator over the testString
2322          * using uscript_openRun and make sure that it works
2323          */
2324         scriptRun = uscript_openRun(testString, stringLimit, &err);
2325
2326         if (U_FAILURE(err)) {
2327             log_err("Got error %s from uscript_openRun(...)\n", u_errorName(err));
2328         } else {
2329             CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_openRun");
2330         }
2331
2332         /* Now reset the iterator, and make sure
2333          * that it still works.
2334          */
2335         uscript_resetRun(scriptRun);
2336
2337         CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_resetRun");
2338
2339         /* Close the iterator */
2340         uscript_closeRun(scriptRun);
2341     }
2342 }
2343
2344 /* test additional, non-core properties */
2345 static void
2346 TestAdditionalProperties() {
2347     /* test data for u_charAge() */
2348     static const struct {
2349         UChar32 c;
2350         UVersionInfo version;
2351     } charAges[]={
2352         {0x41,    { 1, 1, 0, 0 }},
2353         {0xffff,  { 1, 1, 0, 0 }},
2354         {0x20ab,  { 2, 0, 0, 0 }},
2355         {0x2fffe, { 2, 0, 0, 0 }},
2356         {0x20ac,  { 2, 1, 0, 0 }},
2357         {0xfb1d,  { 3, 0, 0, 0 }},
2358         {0x3f4,   { 3, 1, 0, 0 }},
2359         {0x10300, { 3, 1, 0, 0 }},
2360         {0x220,   { 3, 2, 0, 0 }},
2361         {0xff60,  { 3, 2, 0, 0 }}
2362     };
2363
2364     /* test data for u_hasBinaryProperty() */
2365     static const int32_t
2366     props[][3]={ /* code point, property, value */
2367         { 0x0627, UCHAR_ALPHABETIC, TRUE },
2368         { 0x1034a, UCHAR_ALPHABETIC, TRUE },
2369         { 0x2028, UCHAR_ALPHABETIC, FALSE },
2370
2371         { 0x0066, UCHAR_ASCII_HEX_DIGIT, TRUE },
2372         { 0x0067, UCHAR_ASCII_HEX_DIGIT, FALSE },
2373
2374         { 0x202c, UCHAR_BIDI_CONTROL, TRUE },
2375         { 0x202f, UCHAR_BIDI_CONTROL, FALSE },
2376
2377         { 0x003c, UCHAR_BIDI_MIRRORED, TRUE },
2378         { 0x003d, UCHAR_BIDI_MIRRORED, FALSE },
2379
2380         /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */
2381         { 0x2018, UCHAR_BIDI_MIRRORED, FALSE },
2382         { 0x201d, UCHAR_BIDI_MIRRORED, FALSE },
2383         { 0x201f, UCHAR_BIDI_MIRRORED, FALSE },
2384         { 0x301e, UCHAR_BIDI_MIRRORED, FALSE },
2385
2386         { 0x058a, UCHAR_DASH, TRUE },
2387         { 0x007e, UCHAR_DASH, FALSE },
2388
2389         { 0x0c4d, UCHAR_DIACRITIC, TRUE },
2390         { 0x3000, UCHAR_DIACRITIC, FALSE },
2391
2392         { 0x0e46, UCHAR_EXTENDER, TRUE },
2393         { 0x0020, UCHAR_EXTENDER, FALSE },
2394
2395 #if !UCONFIG_NO_NORMALIZATION
2396         { 0xfb1d, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE },
2397         { 0x1d15f, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE },
2398         { 0xfb1e, UCHAR_FULL_COMPOSITION_EXCLUSION, FALSE },
2399
2400         { 0x110a, UCHAR_NFD_INERT, TRUE },      /* Jamo L */
2401         { 0x0308, UCHAR_NFD_INERT, FALSE },
2402
2403         { 0x1164, UCHAR_NFKD_INERT, TRUE },     /* Jamo V */
2404         { 0x1d79d, UCHAR_NFKD_INERT, FALSE },   /* math compat version of xi */
2405
2406         { 0x0021, UCHAR_NFC_INERT, TRUE },      /* ! */
2407         { 0x0061, UCHAR_NFC_INERT, FALSE },     /* a */
2408         { 0x00e4, UCHAR_NFC_INERT, FALSE },     /* a-umlaut */
2409         { 0x0102, UCHAR_NFC_INERT, FALSE },     /* a-breve */
2410         { 0xac1c, UCHAR_NFC_INERT, FALSE },     /* Hangul LV */
2411         { 0xac1d, UCHAR_NFC_INERT, TRUE },      /* Hangul LVT */
2412
2413         { 0x1d79d, UCHAR_NFKC_INERT, FALSE },   /* math compat version of xi */
2414         { 0x2a6d6, UCHAR_NFKC_INERT, TRUE },    /* Han, last of CJK ext. B */
2415
2416         { 0x00e4, UCHAR_SEGMENT_STARTER, TRUE },
2417         { 0x0308, UCHAR_SEGMENT_STARTER, FALSE },
2418         { 0x110a, UCHAR_SEGMENT_STARTER, TRUE }, /* Jamo L */
2419         { 0x1164, UCHAR_SEGMENT_STARTER, FALSE },/* Jamo V */
2420         { 0xac1c, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LV */
2421         { 0xac1d, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LVT */
2422 #endif
2423
2424         { 0x0044, UCHAR_HEX_DIGIT, TRUE },
2425         { 0xff46, UCHAR_HEX_DIGIT, TRUE },
2426         { 0x0047, UCHAR_HEX_DIGIT, FALSE },
2427
2428         { 0x30fb, UCHAR_HYPHEN, TRUE },
2429         { 0xfe58, UCHAR_HYPHEN, FALSE },
2430
2431         { 0x2172, UCHAR_ID_CONTINUE, TRUE },
2432         { 0x0307, UCHAR_ID_CONTINUE, TRUE },
2433         { 0x005c, UCHAR_ID_CONTINUE, FALSE },
2434
2435         { 0x2172, UCHAR_ID_START, TRUE },
2436         { 0x007a, UCHAR_ID_START, TRUE },
2437         { 0x0039, UCHAR_ID_START, FALSE },
2438
2439         { 0x4db5, UCHAR_IDEOGRAPHIC, TRUE },
2440         { 0x2f999, UCHAR_IDEOGRAPHIC, TRUE },
2441         { 0x2f99, UCHAR_IDEOGRAPHIC, FALSE },
2442
2443         { 0x200c, UCHAR_JOIN_CONTROL, TRUE },
2444         { 0x2029, UCHAR_JOIN_CONTROL, FALSE },
2445
2446         { 0x1d7bc, UCHAR_LOWERCASE, TRUE },
2447         { 0x0345, UCHAR_LOWERCASE, TRUE },
2448         { 0x0030, UCHAR_LOWERCASE, FALSE },
2449
2450         { 0x1d7a9, UCHAR_MATH, TRUE },
2451         { 0x2135, UCHAR_MATH, TRUE },
2452         { 0x0062, UCHAR_MATH, FALSE },
2453
2454         { 0xfde1, UCHAR_NONCHARACTER_CODE_POINT, TRUE },
2455         { 0x10ffff, UCHAR_NONCHARACTER_CODE_POINT, TRUE },
2456         { 0x10fffd, UCHAR_NONCHARACTER_CODE_POINT, FALSE },
2457
2458         { 0x0022, UCHAR_QUOTATION_MARK, TRUE },
2459         { 0xff62, UCHAR_QUOTATION_MARK, TRUE },
2460         { 0xd840, UCHAR_QUOTATION_MARK, FALSE },
2461
2462         { 0x061f, UCHAR_TERMINAL_PUNCTUATION, TRUE },
2463         { 0xe003f, UCHAR_TERMINAL_PUNCTUATION, FALSE },
2464
2465         { 0x1d44a, UCHAR_UPPERCASE, TRUE },
2466         { 0x2162, UCHAR_UPPERCASE, TRUE },
2467         { 0x0345, UCHAR_UPPERCASE, FALSE },
2468
2469         { 0x0020, UCHAR_WHITE_SPACE, TRUE },
2470         { 0x202f, UCHAR_WHITE_SPACE, TRUE },
2471         { 0x3001, UCHAR_WHITE_SPACE, FALSE },
2472
2473         { 0x0711, UCHAR_XID_CONTINUE, TRUE },
2474         { 0x1d1aa, UCHAR_XID_CONTINUE, TRUE },
2475         { 0x007c, UCHAR_XID_CONTINUE, FALSE },
2476
2477         { 0x16ee, UCHAR_XID_START, TRUE },
2478         { 0x23456, UCHAR_XID_START, TRUE },
2479         { 0x1d1aa, UCHAR_XID_START, FALSE },
2480
2481         /*
2482          * Version break:
2483          * The following properties are only supported starting with the
2484          * Unicode version indicated in the second field.
2485          */
2486         { -1, 0x320, 0 },
2487
2488         { 0x180c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE },
2489         { 0xfe02, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE },
2490         { 0x1801, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, FALSE },
2491
2492         { 0x0149, UCHAR_DEPRECATED, TRUE },         /* changed in Unicode 5.2 */
2493         { 0x0341, UCHAR_DEPRECATED, FALSE },        /* changed in Unicode 5.2 */
2494         { 0xe0001, UCHAR_DEPRECATED, TRUE },        /* changed from Unicode 5 to 5.1 */
2495         { 0xe0100, UCHAR_DEPRECATED, FALSE },
2496
2497         { 0x00a0, UCHAR_GRAPHEME_BASE, TRUE },
2498         { 0x0a4d, UCHAR_GRAPHEME_BASE, FALSE },
2499         { 0xff9d, UCHAR_GRAPHEME_BASE, TRUE },
2500         { 0xff9f, UCHAR_GRAPHEME_BASE, FALSE },     /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */
2501
2502         { 0x0300, UCHAR_GRAPHEME_EXTEND, TRUE },
2503         { 0xff9d, UCHAR_GRAPHEME_EXTEND, FALSE },
2504         { 0xff9f, UCHAR_GRAPHEME_EXTEND, TRUE },    /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */
2505         { 0x0603, UCHAR_GRAPHEME_EXTEND, FALSE },
2506
2507         { 0x0a4d, UCHAR_GRAPHEME_LINK, TRUE },
2508         { 0xff9f, UCHAR_GRAPHEME_LINK, FALSE },
2509
2510         { 0x2ff7, UCHAR_IDS_BINARY_OPERATOR, TRUE },
2511         { 0x2ff3, UCHAR_IDS_BINARY_OPERATOR, FALSE },
2512
2513         { 0x2ff3, UCHAR_IDS_TRINARY_OPERATOR, TRUE },
2514         { 0x2f03, UCHAR_IDS_TRINARY_OPERATOR, FALSE },
2515
2516         { 0x0ec1, UCHAR_LOGICAL_ORDER_EXCEPTION, TRUE },
2517         { 0xdcba, UCHAR_LOGICAL_ORDER_EXCEPTION, FALSE },
2518
2519         { 0x2e9b, UCHAR_RADICAL, TRUE },
2520         { 0x4e00, UCHAR_RADICAL, FALSE },
2521
2522         { 0x012f, UCHAR_SOFT_DOTTED, TRUE },
2523         { 0x0049, UCHAR_SOFT_DOTTED, FALSE },
2524
2525         { 0xfa11, UCHAR_UNIFIED_IDEOGRAPH, TRUE },
2526         { 0xfa12, UCHAR_UNIFIED_IDEOGRAPH, FALSE },
2527
2528         { -1, 0x401, 0 }, /* version break for Unicode 4.0.1 */
2529
2530         { 0x002e, UCHAR_S_TERM, TRUE },
2531         { 0x0061, UCHAR_S_TERM, FALSE },
2532
2533         { 0x180c, UCHAR_VARIATION_SELECTOR, TRUE },
2534         { 0xfe03, UCHAR_VARIATION_SELECTOR, TRUE },
2535         { 0xe01ef, UCHAR_VARIATION_SELECTOR, TRUE },
2536         { 0xe0200, UCHAR_VARIATION_SELECTOR, FALSE },
2537
2538         /* enum/integer type properties */
2539
2540         /* UCHAR_BIDI_CLASS tested for assigned characters in TestUnicodeData() */
2541         /* test default Bidi classes for unassigned code points */
2542         { 0x0590, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2543         { 0x05cf, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2544         { 0x05ed, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2545         { 0x07f2, UCHAR_BIDI_CLASS, U_DIR_NON_SPACING_MARK }, /* Nko, new in Unicode 5.0 */
2546         { 0x07fe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, /* unassigned R */
2547         { 0x089f, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2548         { 0xfb37, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2549         { 0xfb42, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2550         { 0x10806, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2551         { 0x10909, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2552         { 0x10fe4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2553
2554         { 0x061d, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2555         { 0x063f, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2556         { 0x070e, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2557         { 0x0775, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2558         { 0xfbc2, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2559         { 0xfd90, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2560         { 0xfefe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2561
2562         { 0x02AF, UCHAR_BLOCK, UBLOCK_IPA_EXTENSIONS },
2563         { 0x0C4E, UCHAR_BLOCK, UBLOCK_TELUGU },
2564         { 0x155A, UCHAR_BLOCK, UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS },
2565         { 0x1717, UCHAR_BLOCK, UBLOCK_TAGALOG },
2566         { 0x1900, UCHAR_BLOCK, UBLOCK_LIMBU },
2567         { 0x0870, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
2568         { 0x3040, UCHAR_BLOCK, UBLOCK_HIRAGANA },
2569         { 0x1D0FF, UCHAR_BLOCK, UBLOCK_BYZANTINE_MUSICAL_SYMBOLS },
2570         { 0x50000, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
2571         { 0xEFFFF, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
2572         { 0x10D0FF, UCHAR_BLOCK, UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B },
2573
2574         /* UCHAR_CANONICAL_COMBINING_CLASS tested for assigned characters in TestUnicodeData() */
2575         { 0xd7d7, UCHAR_CANONICAL_COMBINING_CLASS, 0 },
2576
2577         { 0x00A0, UCHAR_DECOMPOSITION_TYPE, U_DT_NOBREAK },
2578         { 0x00A8, UCHAR_DECOMPOSITION_TYPE, U_DT_COMPAT },
2579         { 0x00bf, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE },
2580         { 0x00c0, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
2581         { 0x1E9B, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
2582         { 0xBCDE, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
2583         { 0xFB5D, UCHAR_DECOMPOSITION_TYPE, U_DT_MEDIAL },
2584         { 0x1D736, UCHAR_DECOMPOSITION_TYPE, U_DT_FONT },
2585         { 0xe0033, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE },
2586
2587         { 0x0009, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
2588         { 0x0020, UCHAR_EAST_ASIAN_WIDTH, U_EA_NARROW },
2589         { 0x00B1, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2590         { 0x20A9, UCHAR_EAST_ASIAN_WIDTH, U_EA_HALFWIDTH },
2591         { 0x2FFB, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2592         { 0x3000, UCHAR_EAST_ASIAN_WIDTH, U_EA_FULLWIDTH },
2593         { 0x35bb, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2594         { 0x58bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2595         { 0xD7A3, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2596         { 0xEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2597         { 0x1D198, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
2598         { 0x20000, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2599         { 0x2F8C7, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2600         { 0x3a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE }, /* plane 3 got default W values in Unicode 4 */
2601         { 0x5a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
2602         { 0xFEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2603         { 0x10EEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2604
2605         /* UCHAR_GENERAL_CATEGORY tested for assigned characters in TestUnicodeData() */
2606         { 0xd7c7, UCHAR_GENERAL_CATEGORY, 0 },
2607         { 0xd7d7, UCHAR_GENERAL_CATEGORY, U_OTHER_LETTER },     /* changed in Unicode 5.2 */
2608
2609         { 0x0444, UCHAR_JOINING_GROUP, U_JG_NO_JOINING_GROUP },
2610         { 0x0639, UCHAR_JOINING_GROUP, U_JG_AIN },
2611         { 0x072A, UCHAR_JOINING_GROUP, U_JG_DALATH_RISH },
2612         { 0x0647, UCHAR_JOINING_GROUP, U_JG_HEH },
2613         { 0x06C1, UCHAR_JOINING_GROUP, U_JG_HEH_GOAL },
2614
2615         { 0x200C, UCHAR_JOINING_TYPE, U_JT_NON_JOINING },
2616         { 0x200D, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING },
2617         { 0x0639, UCHAR_JOINING_TYPE, U_JT_DUAL_JOINING },
2618         { 0x0640, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING },
2619         { 0x06C3, UCHAR_JOINING_TYPE, U_JT_RIGHT_JOINING },
2620         { 0x0300, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
2621         { 0x070F, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
2622         { 0xe0033, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
2623
2624         /* TestUnicodeData() verifies that no assigned character has "XX" (unknown) */
2625         { 0xe7e7, UCHAR_LINE_BREAK, U_LB_UNKNOWN },
2626         { 0x10fffd, UCHAR_LINE_BREAK, U_LB_UNKNOWN },
2627         { 0x0028, UCHAR_LINE_BREAK, U_LB_OPEN_PUNCTUATION },
2628         { 0x232A, UCHAR_LINE_BREAK, U_LB_CLOSE_PUNCTUATION },
2629         { 0x3401, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2630         { 0x4e02, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2631         { 0x20004, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2632         { 0xf905, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2633         { 0xdb7e, UCHAR_LINE_BREAK, U_LB_SURROGATE },
2634         { 0xdbfd, UCHAR_LINE_BREAK, U_LB_SURROGATE },
2635         { 0xdffc, UCHAR_LINE_BREAK, U_LB_SURROGATE },
2636         { 0x2762, UCHAR_LINE_BREAK, U_LB_EXCLAMATION },
2637         { 0x002F, UCHAR_LINE_BREAK, U_LB_BREAK_SYMBOLS },
2638         { 0x1D49C, UCHAR_LINE_BREAK, U_LB_ALPHABETIC },
2639         { 0x1731, UCHAR_LINE_BREAK, U_LB_ALPHABETIC },
2640
2641         /* UCHAR_NUMERIC_TYPE tested in TestNumericProperties() */
2642
2643         /* UCHAR_SCRIPT tested in cucdapi.c TestUScriptCodeAPI() */
2644
2645         { 0x10ff, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2646         { 0x1100, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2647         { 0x1111, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2648         { 0x1159, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2649         { 0x115a, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
2650         { 0x115e, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
2651         { 0x115f, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2652
2653         { 0xa95f, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2654         { 0xa960, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
2655         { 0xa97c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
2656         { 0xa97d, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2657
2658         { 0x1160, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2659         { 0x1161, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2660         { 0x1172, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2661         { 0x11a2, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2662         { 0x11a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
2663         { 0x11a7, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
2664
2665         { 0xd7af, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2666         { 0xd7b0, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
2667         { 0xd7c6, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
2668         { 0xd7c7, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2669
2670         { 0x11a8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2671         { 0x11b8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2672         { 0x11c8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2673         { 0x11f9, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2674         { 0x11fa, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
2675         { 0x11ff, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
2676         { 0x1200, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2677
2678         { 0xd7ca, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2679         { 0xd7cb, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
2680         { 0xd7fb, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
2681         { 0xd7fc, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2682
2683         { 0xac00, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2684         { 0xac1c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2685         { 0xc5ec, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2686         { 0xd788, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2687
2688         { 0xac01, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2689         { 0xac1b, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2690         { 0xac1d, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2691         { 0xc5ee, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2692         { 0xd7a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2693
2694         { 0xd7a4, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2695
2696         { -1, 0x410, 0 }, /* version break for Unicode 4.1 */
2697
2698         { 0x00d7, UCHAR_PATTERN_SYNTAX, TRUE },
2699         { 0xfe45, UCHAR_PATTERN_SYNTAX, TRUE },
2700         { 0x0061, UCHAR_PATTERN_SYNTAX, FALSE },
2701
2702         { 0x0020, UCHAR_PATTERN_WHITE_SPACE, TRUE },
2703         { 0x0085, UCHAR_PATTERN_WHITE_SPACE, TRUE },
2704         { 0x200f, UCHAR_PATTERN_WHITE_SPACE, TRUE },
2705         { 0x00a0, UCHAR_PATTERN_WHITE_SPACE, FALSE },
2706         { 0x3000, UCHAR_PATTERN_WHITE_SPACE, FALSE },
2707
2708         { 0x1d200, UCHAR_BLOCK, UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION },
2709         { 0x2c8e,  UCHAR_BLOCK, UBLOCK_COPTIC },
2710         { 0xfe17,  UCHAR_BLOCK, UBLOCK_VERTICAL_FORMS },
2711
2712         { 0x1a00,  UCHAR_SCRIPT, USCRIPT_BUGINESE },
2713         { 0x2cea,  UCHAR_SCRIPT, USCRIPT_COPTIC },
2714         { 0xa82b,  UCHAR_SCRIPT, USCRIPT_SYLOTI_NAGRI },
2715         { 0x103d0, UCHAR_SCRIPT, USCRIPT_OLD_PERSIAN },
2716
2717         { 0xcc28, UCHAR_LINE_BREAK, U_LB_H2 },
2718         { 0xcc29, UCHAR_LINE_BREAK, U_LB_H3 },
2719         { 0xac03, UCHAR_LINE_BREAK, U_LB_H3 },
2720         { 0x115f, UCHAR_LINE_BREAK, U_LB_JL },
2721         { 0x11aa, UCHAR_LINE_BREAK, U_LB_JT },
2722         { 0x11a1, UCHAR_LINE_BREAK, U_LB_JV },
2723
2724         { 0xb2c9, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_LVT },
2725         { 0x036f, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_EXTEND },
2726         { 0x0000, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_CONTROL },
2727         { 0x1160, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_V },
2728
2729         { 0x05f4, UCHAR_WORD_BREAK, U_WB_MIDLETTER },
2730         { 0x4ef0, UCHAR_WORD_BREAK, U_WB_OTHER },
2731         { 0x19d9, UCHAR_WORD_BREAK, U_WB_NUMERIC },
2732         { 0x2044, UCHAR_WORD_BREAK, U_WB_MIDNUM },
2733
2734         { 0xfffd, UCHAR_SENTENCE_BREAK, U_SB_OTHER },
2735         { 0x1ffc, UCHAR_SENTENCE_BREAK, U_SB_UPPER },
2736         { 0xff63, UCHAR_SENTENCE_BREAK, U_SB_CLOSE },
2737         { 0x2028, UCHAR_SENTENCE_BREAK, U_SB_SEP },
2738
2739         { -1, 0x520, 0 }, /* version break for Unicode 5.2 */
2740
2741         /* unassigned code points in new default Bidi R blocks */
2742         { 0x1ede4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2743         { 0x1efe4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2744
2745         /* test some script codes >127 */
2746         { 0xa6e6,  UCHAR_SCRIPT, USCRIPT_BAMUM },
2747         { 0xa4d0,  UCHAR_SCRIPT, USCRIPT_LISU },
2748         { 0x10a7f,  UCHAR_SCRIPT, USCRIPT_OLD_SOUTH_ARABIAN },
2749
2750         { -1, 0x600, 0 }, /* version break for Unicode 6.0 */
2751
2752         /* value changed in Unicode 6.0 */
2753         { 0x06C3, UCHAR_JOINING_GROUP, U_JG_TEH_MARBUTA_GOAL },
2754
2755         { -1, 0x610, 0 }, /* version break for Unicode 6.1 */
2756
2757         /* unassigned code points in new/changed default Bidi AL blocks */
2758         { 0x08ba, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2759         { 0x1eee4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2760
2761         { -1, 0x630, 0 }, /* version break for Unicode 6.3 */
2762
2763         /* unassigned code points in the currency symbols block now default to ET */
2764         { 0x20C0, UCHAR_BIDI_CLASS, U_EUROPEAN_NUMBER_TERMINATOR },
2765         { 0x20CF, UCHAR_BIDI_CLASS, U_EUROPEAN_NUMBER_TERMINATOR },
2766
2767         /* new property in Unicode 6.3 */
2768         { 0x0027, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_NONE },
2769         { 0x0028, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_OPEN },
2770         { 0x0029, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_CLOSE },
2771         { 0xFF5C, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_NONE },
2772         { 0xFF5B, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_OPEN },
2773         { 0xFF5D, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_CLOSE },
2774
2775         { -1, 0x700, 0 }, /* version break for Unicode 7.0 */
2776
2777         /* new character range with Joining_Group values */
2778         { 0x10ABF, UCHAR_JOINING_GROUP, U_JG_NO_JOINING_GROUP },
2779         { 0x10AC0, UCHAR_JOINING_GROUP, U_JG_MANICHAEAN_ALEPH },
2780         { 0x10AC1, UCHAR_JOINING_GROUP, U_JG_MANICHAEAN_BETH },
2781         { 0x10AEF, UCHAR_JOINING_GROUP, U_JG_MANICHAEAN_HUNDRED },
2782         { 0x10AF0, UCHAR_JOINING_GROUP, U_JG_NO_JOINING_GROUP },
2783
2784         { -1, 0xa00, 0 },  // version break for Unicode 10
2785
2786         { 0x1F1E5, UCHAR_REGIONAL_INDICATOR, FALSE },
2787         { 0x1F1E7, UCHAR_REGIONAL_INDICATOR, TRUE },
2788         { 0x1F1FF, UCHAR_REGIONAL_INDICATOR, TRUE },
2789         { 0x1F200, UCHAR_REGIONAL_INDICATOR, FALSE },
2790
2791         { 0x0600, UCHAR_PREPENDED_CONCATENATION_MARK, TRUE },
2792         { 0x0606, UCHAR_PREPENDED_CONCATENATION_MARK, FALSE },
2793         { 0x110BD, UCHAR_PREPENDED_CONCATENATION_MARK, TRUE },
2794
2795         /* undefined UProperty values */
2796         { 0x61, 0x4a7, 0 },
2797         { 0x234bc, 0x15ed, 0 }
2798     };
2799
2800     UVersionInfo version;
2801     UChar32 c;
2802     int32_t i, result, uVersion;
2803     UProperty which;
2804
2805     /* what is our Unicode version? */
2806     u_getUnicodeVersion(version);
2807     uVersion=((int32_t)version[0]<<8)|(version[1]<<4)|version[2]; /* major/minor/update version numbers */
2808
2809     u_charAge(0x20, version);
2810     if(version[0]==0) {
2811         /* no additional properties available */
2812         log_err("TestAdditionalProperties: no additional properties available, not tested\n");
2813         return;
2814     }
2815
2816     /* test u_charAge() */
2817     for(i=0; i<UPRV_LENGTHOF(charAges); ++i) {
2818         u_charAge(charAges[i].c, version);
2819         if(0!=memcmp(version, charAges[i].version, sizeof(UVersionInfo))) {
2820             log_err("error: u_charAge(U+%04lx)={ %u, %u, %u, %u } instead of { %u, %u, %u, %u }\n",
2821                 charAges[i].c,
2822                 version[0], version[1], version[2], version[3],
2823                 charAges[i].version[0], charAges[i].version[1], charAges[i].version[2], charAges[i].version[3]);
2824         }
2825     }
2826
2827     if( u_getIntPropertyMinValue(UCHAR_DASH)!=0 ||
2828         u_getIntPropertyMinValue(UCHAR_BIDI_CLASS)!=0 ||
2829         u_getIntPropertyMinValue(UCHAR_BLOCK)!=0 ||   /* j2478 */
2830         u_getIntPropertyMinValue(UCHAR_SCRIPT)!=0 || /*JB#2410*/
2831         u_getIntPropertyMinValue(0x2345)!=0
2832     ) {
2833         log_err("error: u_getIntPropertyMinValue() wrong\n");
2834     }
2835     if( u_getIntPropertyMaxValue(UCHAR_DASH)!=1) {
2836         log_err("error: u_getIntPropertyMaxValue(UCHAR_DASH) wrong\n");
2837     }
2838     if( u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE)!=1) {
2839         log_err("error: u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE) wrong\n");
2840     }
2841     if( u_getIntPropertyMaxValue((UProperty)(UCHAR_BINARY_LIMIT-1))!=1) {
2842         log_err("error: u_getIntPropertyMaxValue(UCHAR_BINARY_LIMIT-1) wrong\n");
2843     }
2844     if( u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)!=(int32_t)U_CHAR_DIRECTION_COUNT-1 ) {
2845         log_err("error: u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS) wrong\n");
2846     }
2847     if( u_getIntPropertyMaxValue(UCHAR_BLOCK)!=(int32_t)UBLOCK_COUNT-1 ) {
2848         log_err("error: u_getIntPropertyMaxValue(UCHAR_BLOCK) wrong\n");
2849     }
2850     if(u_getIntPropertyMaxValue(UCHAR_LINE_BREAK)!=(int32_t)U_LB_COUNT-1) {
2851         log_err("error: u_getIntPropertyMaxValue(UCHAR_LINE_BREAK) wrong\n");
2852     }
2853     if(u_getIntPropertyMaxValue(UCHAR_SCRIPT)!=(int32_t)USCRIPT_CODE_LIMIT-1) {
2854         log_err("error: u_getIntPropertyMaxValue(UCHAR_SCRIPT) wrong\n");
2855     }
2856     if(u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE)!=(int32_t)U_NT_COUNT-1) {
2857         log_err("error: u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE) wrong\n");
2858     }
2859     if(u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY)!=(int32_t)U_CHAR_CATEGORY_COUNT-1) {
2860         log_err("error: u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY) wrong\n");
2861     }
2862     if(u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE)!=(int32_t)U_HST_COUNT-1) {
2863         log_err("error: u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE) wrong\n");
2864     }
2865     if(u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK)!=(int32_t)U_GCB_COUNT-1) {
2866         log_err("error: u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK) wrong\n");
2867     }
2868     if(u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK)!=(int32_t)U_SB_COUNT-1) {
2869         log_err("error: u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK) wrong\n");
2870     }
2871     if(u_getIntPropertyMaxValue(UCHAR_WORD_BREAK)!=(int32_t)U_WB_COUNT-1) {
2872         log_err("error: u_getIntPropertyMaxValue(UCHAR_WORD_BREAK) wrong\n");
2873     }
2874     if(u_getIntPropertyMaxValue(UCHAR_BIDI_PAIRED_BRACKET_TYPE)!=(int32_t)U_BPT_COUNT-1) {
2875         log_err("error: u_getIntPropertyMaxValue(UCHAR_BIDI_PAIRED_BRACKET_TYPE) wrong\n");
2876     }
2877     /*JB#2410*/
2878     if( u_getIntPropertyMaxValue(0x2345)!=-1) {
2879         log_err("error: u_getIntPropertyMaxValue(0x2345) wrong\n");
2880     }
2881     if( u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) != (int32_t) (U_DT_COUNT - 1)) {
2882         log_err("error: u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) wrong\n");
2883     }
2884     if( u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) !=  (int32_t) (U_JG_COUNT -1)) {
2885         log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) wrong\n");
2886     }
2887     if( u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) != (int32_t) (U_JT_COUNT -1)) {
2888         log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) wrong\n");
2889     }
2890     if( u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) != (int32_t) (U_EA_COUNT -1)) {
2891         log_err("error: u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) wrong\n");
2892     }
2893
2894     /* test u_hasBinaryProperty() and u_getIntPropertyValue() */
2895     for(i=0; i<UPRV_LENGTHOF(props); ++i) {
2896         const char *whichName;
2897
2898         if(props[i][0]<0) {
2899             /* Unicode version break */
2900             if(uVersion<props[i][1]) {
2901                 break; /* do not test properties that are not yet supported */
2902             } else {
2903                 continue; /* skip this row */
2904             }
2905         }
2906
2907         c=(UChar32)props[i][0];
2908         which=(UProperty)props[i][1];
2909         whichName=u_getPropertyName(which, U_LONG_PROPERTY_NAME);
2910
2911         if(which<UCHAR_INT_START) {
2912             result=u_hasBinaryProperty(c, which);
2913             if(result!=props[i][2]) {
2914                 log_data_err("error: u_hasBinaryProperty(U+%04lx, %s)=%d is wrong (props[%d]) - (Are you missing data?)\n",
2915                         c, whichName, result, i);
2916             }
2917         }
2918
2919         result=u_getIntPropertyValue(c, which);
2920         if(result!=props[i][2]) {
2921             log_data_err("error: u_getIntPropertyValue(U+%04lx, %s)=%d is wrong, should be %d (props[%d]) - (Are you missing data?)\n",
2922                     c, whichName, result, props[i][2], i);
2923         }
2924
2925         /* test separate functions, too */
2926         switch((UProperty)props[i][1]) {
2927         case UCHAR_ALPHABETIC:
2928             if(u_isUAlphabetic((UChar32)props[i][0])!=(UBool)props[i][2]) {
2929                 log_err("error: u_isUAlphabetic(U+%04lx)=%d is wrong (props[%d])\n",
2930                         props[i][0], result, i);
2931             }
2932             break;
2933         case UCHAR_LOWERCASE:
2934             if(u_isULowercase((UChar32)props[i][0])!=(UBool)props[i][2]) {
2935                 log_err("error: u_isULowercase(U+%04lx)=%d is wrong (props[%d])\n",
2936                         props[i][0], result, i);
2937             }
2938             break;
2939         case UCHAR_UPPERCASE:
2940             if(u_isUUppercase((UChar32)props[i][0])!=(UBool)props[i][2]) {
2941                 log_err("error: u_isUUppercase(U+%04lx)=%d is wrong (props[%d])\n",
2942                         props[i][0], result, i);
2943             }
2944             break;
2945         case UCHAR_WHITE_SPACE:
2946             if(u_isUWhiteSpace((UChar32)props[i][0])!=(UBool)props[i][2]) {
2947                 log_err("error: u_isUWhiteSpace(U+%04lx)=%d is wrong (props[%d])\n",
2948                         props[i][0], result, i);
2949             }
2950             break;
2951         default:
2952             break;
2953         }
2954     }
2955 }
2956
2957 static void
2958 TestNumericProperties(void) {
2959     /* see UnicodeData.txt, DerivedNumericValues.txt */
2960     static const struct {
2961         UChar32 c;
2962         int32_t type;
2963         double numValue;
2964     } values[]={
2965         { 0x0F33, U_NT_NUMERIC, -1./2. },
2966         { 0x0C66, U_NT_DECIMAL, 0 },
2967         { 0x96f6, U_NT_NUMERIC, 0 },
2968         { 0xa833, U_NT_NUMERIC, 1./16. },
2969         { 0x2152, U_NT_NUMERIC, 1./10. },
2970         { 0x2151, U_NT_NUMERIC, 1./9. },
2971         { 0x1245f, U_NT_NUMERIC, 1./8. },
2972         { 0x2150, U_NT_NUMERIC, 1./7. },
2973         { 0x2159, U_NT_NUMERIC, 1./6. },
2974         { 0x09f6, U_NT_NUMERIC, 3./16. },
2975         { 0x2155, U_NT_NUMERIC, 1./5. },
2976         { 0x00BD, U_NT_NUMERIC, 1./2. },
2977         { 0x0031, U_NT_DECIMAL, 1. },
2978         { 0x4e00, U_NT_NUMERIC, 1. },
2979         { 0x58f1, U_NT_NUMERIC, 1. },
2980         { 0x10320, U_NT_NUMERIC, 1. },
2981         { 0x0F2B, U_NT_NUMERIC, 3./2. },
2982         { 0x00B2, U_NT_DIGIT, 2. },
2983         { 0x5f10, U_NT_NUMERIC, 2. },
2984         { 0x1813, U_NT_DECIMAL, 3. },
2985         { 0x5f0e, U_NT_NUMERIC, 3. },
2986         { 0x2173, U_NT_NUMERIC, 4. },
2987         { 0x8086, U_NT_NUMERIC, 4. },
2988         { 0x278E, U_NT_DIGIT, 5. },
2989         { 0x1D7F2, U_NT_DECIMAL, 6. },
2990         { 0x247A, U_NT_DIGIT, 7. },
2991         { 0x7396, U_NT_NUMERIC, 9. },
2992         { 0x1372, U_NT_NUMERIC, 10. },
2993         { 0x216B, U_NT_NUMERIC, 12. },
2994         { 0x16EE, U_NT_NUMERIC, 17. },
2995         { 0x249A, U_NT_NUMERIC, 19. },
2996         { 0x303A, U_NT_NUMERIC, 30. },
2997         { 0x5345, U_NT_NUMERIC, 30. },
2998         { 0x32B2, U_NT_NUMERIC, 37. },
2999         { 0x1375, U_NT_NUMERIC, 40. },
3000         { 0x10323, U_NT_NUMERIC, 50. },
3001         { 0x0BF1, U_NT_NUMERIC, 100. },
3002         { 0x964c, U_NT_NUMERIC, 100. },
3003         { 0x217E, U_NT_NUMERIC, 500. },
3004         { 0x2180, U_NT_NUMERIC, 1000. },
3005         { 0x4edf, U_NT_NUMERIC, 1000. },
3006         { 0x2181, U_NT_NUMERIC, 5000. },
3007         { 0x137C, U_NT_NUMERIC, 10000. },
3008         { 0x4e07, U_NT_NUMERIC, 10000. },
3009         { 0x12432, U_NT_NUMERIC, 216000. },
3010         { 0x12433, U_NT_NUMERIC, 432000. },
3011         { 0x4ebf, U_NT_NUMERIC, 100000000. },
3012         { 0x5146, U_NT_NUMERIC, 1000000000000. },
3013         { -1, U_NT_NONE, U_NO_NUMERIC_VALUE },
3014         { 0x61, U_NT_NONE, U_NO_NUMERIC_VALUE },
3015         { 0x3000, U_NT_NONE, U_NO_NUMERIC_VALUE },
3016         { 0xfffe, U_NT_NONE, U_NO_NUMERIC_VALUE },
3017         { 0x10301, U_NT_NONE, U_NO_NUMERIC_VALUE },
3018         { 0xe0033, U_NT_NONE, U_NO_NUMERIC_VALUE },
3019         { 0x10ffff, U_NT_NONE, U_NO_NUMERIC_VALUE },
3020         { 0x110000, U_NT_NONE, U_NO_NUMERIC_VALUE }
3021     };
3022
3023     double nv;
3024     UChar32 c;
3025     int32_t i, type;
3026
3027     for(i=0; i<UPRV_LENGTHOF(values); ++i) {
3028         c=values[i].c;
3029         type=u_getIntPropertyValue(c, UCHAR_NUMERIC_TYPE);
3030         nv=u_getNumericValue(c);
3031
3032         if(type!=values[i].type) {
3033             log_err("UCHAR_NUMERIC_TYPE(U+%04lx)=%d should be %d\n", c, type, values[i].type);
3034         }
3035         if(0.000001 <= fabs(nv - values[i].numValue)) {
3036             log_err("u_getNumericValue(U+%04lx)=%g should be %g\n", c, nv, values[i].numValue);
3037         }
3038     }
3039 }
3040
3041 /**
3042  * Test the property names and property value names API.
3043  */
3044 static void
3045 TestPropertyNames(void) {
3046     int32_t p, v, choice=0, rev;
3047     UBool atLeastSomething = FALSE;
3048
3049     for (p=0; ; ++p) {
3050         UProperty propEnum = (UProperty)p;
3051         UBool sawProp = FALSE;
3052         if(p > 10 && !atLeastSomething) {
3053           log_data_err("Never got anything after 10 tries.\nYour data is probably fried. Quitting this test\n", p, choice);
3054           return;
3055         }
3056
3057         for (choice=0; ; ++choice) {
3058             const char* name = u_getPropertyName(propEnum, (UPropertyNameChoice)choice);
3059             if (name) {
3060                 if (!sawProp)
3061                     log_verbose("prop 0x%04x+%2d:", p&~0xfff, p&0xfff);
3062                 log_verbose("%d=\"%s\"", choice, name);
3063                 sawProp = TRUE;
3064                 atLeastSomething = TRUE;
3065
3066                 /* test reverse mapping */
3067                 rev = u_getPropertyEnum(name);
3068                 if (rev != p) {
3069                     log_err("Property round-trip failure: %d -> %s -> %d\n",
3070                             p, name, rev);
3071                 }
3072             }
3073             if (!name && choice>0) break;
3074         }
3075         if (sawProp) {
3076             /* looks like a valid property; check the values */
3077             const char* pname = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME);
3078             int32_t max = 0;
3079             if (p == UCHAR_CANONICAL_COMBINING_CLASS) {
3080                 max = 255;
3081             } else if (p == UCHAR_GENERAL_CATEGORY_MASK) {
3082                 /* it's far too slow to iterate all the way up to
3083                    the real max, U_GC_P_MASK */
3084                 max = U_GC_NL_MASK;
3085             } else if (p == UCHAR_BLOCK) {
3086                 /* UBlockCodes, unlike other values, start at 1 */
3087                 max = 1;
3088             }
3089             log_verbose("\n");
3090             for (v=-1; ; ++v) {
3091                 UBool sawValue = FALSE;
3092                 for (choice=0; ; ++choice) {
3093                     const char* vname = u_getPropertyValueName(propEnum, v, (UPropertyNameChoice)choice);
3094                     if (vname) {
3095                         if (!sawValue) log_verbose(" %s, value %d:", pname, v);
3096                         log_verbose("%d=\"%s\"", choice, vname);
3097                         sawValue = TRUE;
3098
3099                         /* test reverse mapping */
3100                         rev = u_getPropertyValueEnum(propEnum, vname);
3101                         if (rev != v) {
3102                             log_err("Value round-trip failure (%s): %d -> %s -> %d\n",
3103                                     pname, v, vname, rev);
3104                         }
3105                     }
3106                     if (!vname && choice>0) break;
3107                 }
3108                 if (sawValue) {
3109                     log_verbose("\n");
3110                 }
3111                 if (!sawValue && v>=max) break;
3112             }
3113         }
3114         if (!sawProp) {
3115             if (p>=UCHAR_STRING_LIMIT) {
3116                 break;
3117             } else if (p>=UCHAR_DOUBLE_LIMIT) {
3118                 p = UCHAR_STRING_START - 1;
3119             } else if (p>=UCHAR_MASK_LIMIT) {
3120                 p = UCHAR_DOUBLE_START - 1;
3121             } else if (p>=UCHAR_INT_LIMIT) {
3122                 p = UCHAR_MASK_START - 1;
3123             } else if (p>=UCHAR_BINARY_LIMIT) {
3124                 p = UCHAR_INT_START - 1;
3125             }
3126         }
3127     }
3128 }
3129
3130 /**
3131  * Test the property values API.  See JB#2410.
3132  */
3133 static void
3134 TestPropertyValues(void) {
3135     int32_t i, p, min, max;
3136     UErrorCode ec;
3137
3138     /* Min should be 0 for everything. */
3139     /* Until JB#2478 is fixed, the one exception is UCHAR_BLOCK. */
3140     for (p=UCHAR_INT_START; p<UCHAR_INT_LIMIT; ++p) {
3141         UProperty propEnum = (UProperty)p;
3142         min = u_getIntPropertyMinValue(propEnum);
3143         if (min != 0) {
3144             if (p == UCHAR_BLOCK) {
3145                 /* This is okay...for now.  See JB#2487.
3146                    TODO Update this for JB#2487. */
3147             } else {
3148                 const char* name;
3149                 name = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME);
3150                 if (name == NULL)
3151                     name = "<ERROR>";
3152                 log_err("FAIL: u_getIntPropertyMinValue(%s) = %d, exp. 0\n",
3153                         name, min);
3154             }
3155         }
3156     }
3157
3158     if( u_getIntPropertyMinValue(UCHAR_GENERAL_CATEGORY_MASK)!=0 ||
3159         u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY_MASK)!=-1) {
3160         log_err("error: u_getIntPropertyMin/MaxValue(UCHAR_GENERAL_CATEGORY_MASK) is wrong\n");
3161     }
3162
3163     /* Max should be -1 for invalid properties. */
3164     max = u_getIntPropertyMaxValue(UCHAR_INVALID_CODE);
3165     if (max != -1) {
3166         log_err("FAIL: u_getIntPropertyMaxValue(-1) = %d, exp. -1\n",
3167                 max);
3168     }
3169
3170     /* Script should return USCRIPT_INVALID_CODE for an invalid code point. */
3171     for (i=0; i<2; ++i) {
3172         int32_t script;
3173         const char* desc;
3174         ec = U_ZERO_ERROR;
3175         switch (i) {
3176         case 0:
3177             script = uscript_getScript(-1, &ec);
3178             desc = "uscript_getScript(-1)";
3179             break;
3180         case 1:
3181             script = u_getIntPropertyValue(-1, UCHAR_SCRIPT);
3182             desc = "u_getIntPropertyValue(-1, UCHAR_SCRIPT)";
3183             break;
3184         default:
3185             log_err("Internal test error. Too many scripts\n");
3186             return;
3187         }
3188         /* We don't explicitly test ec.  It should be U_FAILURE but it
3189            isn't documented as such. */
3190         if (script != (int32_t)USCRIPT_INVALID_CODE) {
3191             log_err("FAIL: %s = %d, exp. 0\n",
3192                     desc, script);
3193         }
3194     }
3195 }
3196
3197 /* various tests for consistency of UCD data and API behavior */
3198 static void
3199 TestConsistency() {
3200     char buffer[300];
3201     USet *set1, *set2, *set3, *set4;
3202     UErrorCode errorCode;
3203
3204     UChar32 start, end;
3205     int32_t i, length;
3206
3207     U_STRING_DECL(hyphenPattern, "[:Hyphen:]", 10);
3208     U_STRING_DECL(dashPattern, "[:Dash:]", 8);
3209     U_STRING_DECL(lowerPattern, "[:Lowercase:]", 13);
3210     U_STRING_DECL(formatPattern, "[:Cf:]", 6);
3211     U_STRING_DECL(alphaPattern, "[:Alphabetic:]", 14);
3212
3213     U_STRING_DECL(mathBlocksPattern,
3214         "[[:block=Mathematical Operators:][:block=Miscellaneous Mathematical Symbols-A:][:block=Miscellaneous Mathematical Symbols-B:][:block=Supplemental Mathematical Operators:][:block=Mathematical Alphanumeric Symbols:]]",
3215         214);
3216     U_STRING_DECL(mathPattern, "[:Math:]", 8);
3217     U_STRING_DECL(unassignedPattern, "[:Cn:]", 6);
3218     U_STRING_DECL(unknownPattern, "[:sc=Unknown:]", 14);
3219     U_STRING_DECL(reservedPattern, "[[:Cn:][:Co:][:Cs:]]", 20);
3220
3221     U_STRING_INIT(hyphenPattern, "[:Hyphen:]", 10);
3222     U_STRING_INIT(dashPattern, "[:Dash:]", 8);
3223     U_STRING_INIT(lowerPattern, "[:Lowercase:]", 13);
3224     U_STRING_INIT(formatPattern, "[:Cf:]", 6);
3225     U_STRING_INIT(alphaPattern, "[:Alphabetic:]", 14);
3226
3227     U_STRING_INIT(mathBlocksPattern,
3228         "[[:block=Mathematical Operators:][:block=Miscellaneous Mathematical Symbols-A:][:block=Miscellaneous Mathematical Symbols-B:][:block=Supplemental Mathematical Operators:][:block=Mathematical Alphanumeric Symbols:]]",
3229         214);
3230     U_STRING_INIT(mathPattern, "[:Math:]", 8);
3231     U_STRING_INIT(unassignedPattern, "[:Cn:]", 6);
3232     U_STRING_INIT(unknownPattern, "[:sc=Unknown:]", 14);
3233     U_STRING_INIT(reservedPattern, "[[:Cn:][:Co:][:Cs:]]", 20);
3234
3235     /*
3236      * It used to be that UCD.html and its precursors said
3237      * "Those dashes used to mark connections between pieces of words,
3238      *  plus the Katakana middle dot."
3239      *
3240      * Unicode 4 changed 00AD Soft Hyphen to Cf and removed it from Dash
3241      * but not from Hyphen.
3242      * UTC 94 (2003mar) decided to leave it that way and to change UCD.html.
3243      * Therefore, do not show errors when testing the Hyphen property.
3244      */
3245     log_verbose("Starting with Unicode 4, inconsistencies with [:Hyphen:] are\n"
3246                 "known to the UTC and not considered errors.\n");
3247
3248     errorCode=U_ZERO_ERROR;
3249     set1=uset_openPattern(hyphenPattern, 10, &errorCode);
3250     set2=uset_openPattern(dashPattern, 8, &errorCode);
3251     if(U_SUCCESS(errorCode)) {
3252         /* remove the Katakana middle dot(s) from set1 */
3253         uset_remove(set1, 0x30fb);
3254         uset_remove(set1, 0xff65); /* halfwidth variant */
3255         showAMinusB(set1, set2, "[:Hyphen:]", "[:Dash:]", FALSE);
3256     } else {
3257         log_data_err("error opening [:Hyphen:] or [:Dash:] - %s (Are you missing data?)\n", u_errorName(errorCode));
3258     }
3259
3260     /* check that Cf is neither Hyphen nor Dash nor Alphabetic */
3261     set3=uset_openPattern(formatPattern, 6, &errorCode);
3262     set4=uset_openPattern(alphaPattern, 14, &errorCode);
3263     if(U_SUCCESS(errorCode)) {
3264         showAIntersectB(set3, set1, "[:Cf:]", "[:Hyphen:]", FALSE);
3265         showAIntersectB(set3, set2, "[:Cf:]", "[:Dash:]", TRUE);
3266         showAIntersectB(set3, set4, "[:Cf:]", "[:Alphabetic:]", TRUE);
3267     } else {
3268         log_data_err("error opening [:Cf:] or [:Alpbabetic:] - %s (Are you missing data?)\n", u_errorName(errorCode));
3269     }
3270
3271     uset_close(set1);
3272     uset_close(set2);
3273     uset_close(set3);
3274     uset_close(set4);
3275
3276     /*
3277      * Check that each lowercase character has "small" in its name
3278      * and not "capital".
3279      * There are some such characters, some of which seem odd.
3280      * Use the verbose flag to see these notices.
3281      */
3282     errorCode=U_ZERO_ERROR;
3283     set1=uset_openPattern(lowerPattern, 13, &errorCode);
3284     if(U_SUCCESS(errorCode)) {
3285         for(i=0;; ++i) {
3286             length=uset_getItem(set1, i, &start, &end, NULL, 0, &errorCode);
3287             if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
3288                 break; /* done */
3289             }
3290             if(U_FAILURE(errorCode)) {
3291                 log_err("error iterating over [:Lowercase:] at item %d: %s\n",
3292                         i, u_errorName(errorCode));
3293                 break;
3294             }
3295             if(length!=0) {
3296                 break; /* done with code points, got a string or -1 */
3297             }
3298
3299             while(start<=end) {
3300                 length=u_charName(start, U_UNICODE_CHAR_NAME, buffer, sizeof(buffer), &errorCode);
3301                 if(U_FAILURE(errorCode)) {
3302                     log_data_err("error getting the name of U+%04x - %s\n", start, u_errorName(errorCode));
3303                     errorCode=U_ZERO_ERROR;
3304                 }
3305                 if( (strstr(buffer, "SMALL")==NULL || strstr(buffer, "CAPITAL")!=NULL) &&
3306                     strstr(buffer, "SMALL CAPITAL")==NULL
3307                 ) {
3308                     log_verbose("info: [:Lowercase:] contains U+%04x whose name does not suggest lowercase: %s\n", start, buffer);
3309                 }
3310                 ++start;
3311             }
3312         }
3313     } else {
3314         log_data_err("error opening [:Lowercase:] - %s (Are you missing data?)\n", u_errorName(errorCode));
3315     }
3316     uset_close(set1);
3317
3318     /* verify that all assigned characters in Math blocks are exactly Math characters */
3319     errorCode=U_ZERO_ERROR;
3320     set1=uset_openPattern(mathBlocksPattern, -1, &errorCode);
3321     set2=uset_openPattern(mathPattern, 8, &errorCode);
3322     set3=uset_openPattern(unassignedPattern, 6, &errorCode);
3323     if(U_SUCCESS(errorCode)) {
3324         uset_retainAll(set2, set1); /* [math blocks]&[:Math:] */
3325         uset_complement(set3);      /* assigned characters */
3326         uset_retainAll(set1, set3); /* [math blocks]&[assigned] */
3327         compareUSets(set1, set2,
3328                      "[assigned Math block chars]", "[math blocks]&[:Math:]",
3329                      TRUE);
3330     } else {
3331         log_data_err("error opening [math blocks] or [:Math:] or [:Cn:] - %s (Are you missing data?)\n", u_errorName(errorCode));
3332     }
3333     uset_close(set1);
3334     uset_close(set2);
3335     uset_close(set3);
3336
3337     /* new in Unicode 5.0: exactly all unassigned+PUA+surrogate code points have script=Unknown */
3338     errorCode=U_ZERO_ERROR;
3339     set1=uset_openPattern(unknownPattern, 14, &errorCode);
3340     set2=uset_openPattern(reservedPattern, 20, &errorCode);
3341     if(U_SUCCESS(errorCode)) {
3342         compareUSets(set1, set2,
3343                      "[:sc=Unknown:]", "[[:Cn:][:Co:][:Cs:]]",
3344                      TRUE);
3345     } else {
3346         log_data_err("error opening [:sc=Unknown:] or [[:Cn:][:Co:][:Cs:]] - %s (Are you missing data?)\n", u_errorName(errorCode));
3347     }
3348     uset_close(set1);
3349     uset_close(set2);
3350 }
3351
3352 /* test case folding, compare return values with CaseFolding.txt ------------ */
3353
3354 /* bit set for which case foldings for a character have been tested already */
3355 enum {
3356     CF_SIMPLE=1,
3357     CF_FULL=2,
3358     CF_TURKIC=4,
3359     CF_ALL=7
3360 };
3361
3362 static void
3363 testFold(UChar32 c, int which,
3364          UChar32 simple, UChar32 turkic,
3365          const UChar *full, int32_t fullLength,
3366          const UChar *turkicFull, int32_t turkicFullLength) {
3367     UChar s[2], t[32];
3368     UChar32 c2;
3369     int32_t length, length2;
3370
3371     UErrorCode errorCode=U_ZERO_ERROR;
3372
3373     length=0;
3374     U16_APPEND_UNSAFE(s, length, c);
3375
3376     if((which&CF_SIMPLE)!=0 && (c2=u_foldCase(c, 0))!=simple) {
3377         log_err("u_foldCase(U+%04lx, default)=U+%04lx != U+%04lx\n", (long)c, (long)c2, (long)simple);
3378     }
3379     if((which&CF_FULL)!=0) {
3380         length2=u_strFoldCase(t, UPRV_LENGTHOF(t), s, length, 0, &errorCode);
3381         if(length2!=fullLength || 0!=u_memcmp(t, full, fullLength)) {
3382             log_err("u_strFoldCase(U+%04lx, default) does not fold properly\n", (long)c);
3383         }
3384     }
3385     if((which&CF_TURKIC)!=0) {
3386         if((c2=u_foldCase(c, U_FOLD_CASE_EXCLUDE_SPECIAL_I))!=turkic) {
3387             log_err("u_foldCase(U+%04lx, turkic)=U+%04lx != U+%04lx\n", (long)c, (long)c2, (long)simple);
3388         }
3389
3390         length2=u_strFoldCase(t, UPRV_LENGTHOF(t), s, length, U_FOLD_CASE_EXCLUDE_SPECIAL_I, &errorCode);
3391         if(length2!=turkicFullLength || 0!=u_memcmp(t, turkicFull, length2)) {
3392             log_err("u_strFoldCase(U+%04lx, turkic) does not fold properly\n", (long)c);
3393         }
3394     }
3395 }
3396
3397 /* test that c case-folds to itself */
3398 static void
3399 testFoldToSelf(UChar32 c, int which) {
3400     UChar s[2];
3401     int32_t length;
3402
3403     length=0;
3404     U16_APPEND_UNSAFE(s, length, c);
3405     testFold(c, which, c, c, s, length, s, length);
3406 }
3407
3408 struct CaseFoldingData {
3409     USet *notSeen;
3410     UChar32 prev, prevSimple;
3411     UChar prevFull[32];
3412     int32_t prevFullLength;
3413     int which;
3414 };
3415 typedef struct CaseFoldingData CaseFoldingData;
3416
3417 static void U_CALLCONV
3418 caseFoldingLineFn(void *context,
3419                   char *fields[][2], int32_t fieldCount,
3420                   UErrorCode *pErrorCode) {
3421     CaseFoldingData *pData=(CaseFoldingData *)context;
3422     char *end;
3423     UChar full[32];
3424     UChar32 c, prev, simple;
3425     int32_t count;
3426     int which;
3427     char status;
3428
3429     /* get code point */
3430     const char *s=u_skipWhitespace(fields[0][0]);
3431     if(0==strncmp(s, "0000..10FFFF", 12)) {
3432         /*
3433          * Ignore the line
3434          * # @missing: 0000..10FFFF; C; <code point>
3435          * because maps-to-self is already our default, and this line breaks this parser.
3436          */
3437         return;
3438     }
3439     c=(UChar32)strtoul(s, &end, 16);
3440     end=(char *)u_skipWhitespace(end);
3441     if(end<=fields[0][0] || end!=fields[0][1]) {
3442         log_err("syntax error in CaseFolding.txt field 0 at %s\n", fields[0][0]);
3443         *pErrorCode=U_PARSE_ERROR;
3444         return;
3445     }
3446
3447     /* get the status of this mapping */
3448     status=*u_skipWhitespace(fields[1][0]);
3449     if(status!='C' && status!='S' && status!='F' && status!='T') {
3450         log_err("unrecognized status field in CaseFolding.txt at %s\n", fields[0][0]);
3451         *pErrorCode=U_PARSE_ERROR;
3452         return;
3453     }
3454
3455     /* get the mapping */
3456     count=u_parseString(fields[2][0], full, 32, (uint32_t *)&simple, pErrorCode);
3457     if(U_FAILURE(*pErrorCode)) {
3458         log_err("error parsing CaseFolding.txt mapping at %s\n", fields[0][0]);
3459         return;
3460     }
3461
3462     /* there is a simple mapping only if there is exactly one code point (count is in UChars) */
3463     if(count==0 || count>2 || (count==2 && U16_IS_SINGLE(full[1]))) {
3464         simple=c;
3465     }
3466
3467     if(c!=(prev=pData->prev)) {
3468         /*
3469          * Test remaining mappings for the previous code point.
3470          * If a turkic folding was not mentioned, then it should fold the same
3471          * as the regular simple case folding.
3472          */
3473         UChar prevString[2];
3474         int32_t length;
3475
3476         length=0;
3477         U16_APPEND_UNSAFE(prevString, length, prev);
3478         testFold(prev, (~pData->which)&CF_ALL,
3479                  prev, pData->prevSimple,
3480                  prevString, length,
3481                  pData->prevFull, pData->prevFullLength);
3482         pData->prev=pData->prevSimple=c;
3483         length=0;
3484         U16_APPEND_UNSAFE(pData->prevFull, length, c);
3485         pData->prevFullLength=length;
3486         pData->which=0;
3487     }
3488
3489     /*
3490      * Turn the status into a bit set of case foldings to test.
3491      * Remember non-Turkic case foldings as defaults for Turkic mode.
3492      */
3493     switch(status) {
3494     case 'C':
3495         which=CF_SIMPLE|CF_FULL;
3496         pData->prevSimple=simple;
3497         u_memcpy(pData->prevFull, full, count);
3498         pData->prevFullLength=count;
3499         break;
3500     case 'S':
3501         which=CF_SIMPLE;
3502         pData->prevSimple=simple;
3503         break;
3504     case 'F':
3505         which=CF_FULL;
3506         u_memcpy(pData->prevFull, full, count);
3507         pData->prevFullLength=count;
3508         break;
3509     case 'T':
3510         which=CF_TURKIC;
3511         break;
3512     default:
3513         which=0;
3514         break; /* won't happen because of test above */
3515     }
3516
3517     testFold(c, which, simple, simple, full, count, full, count);
3518
3519     /* remember which case foldings of c have been tested */
3520     pData->which|=which;
3521
3522     /* remove c from the set of ones not mentioned in CaseFolding.txt */
3523     uset_remove(pData->notSeen, c);
3524 }
3525
3526 static void
3527 TestCaseFolding() {
3528     CaseFoldingData data={ NULL };
3529     char *fields[3][2];
3530     UErrorCode errorCode;
3531
3532     static char *lastLine= (char *)"10FFFF; C; 10FFFF;";
3533
3534     errorCode=U_ZERO_ERROR;
3535     /* test BMP & plane 1 - nothing interesting above */
3536     data.notSeen=uset_open(0, 0x1ffff);
3537     data.prevFullLength=1; /* length of full case folding of U+0000 */
3538
3539     parseUCDFile("CaseFolding.txt", fields, 3, caseFoldingLineFn, &data, &errorCode);
3540     if(U_SUCCESS(errorCode)) {
3541         int32_t i, start, end;
3542
3543         /* add a pseudo-last line to finish testing of the actual last one */
3544         fields[0][0]=lastLine;
3545         fields[0][1]=lastLine+6;
3546         fields[1][0]=lastLine+7;
3547         fields[1][1]=lastLine+9;
3548         fields[2][0]=lastLine+10;
3549         fields[2][1]=lastLine+17;
3550         caseFoldingLineFn(&data, fields, 3, &errorCode);
3551
3552         /* verify that all code points that are not mentioned in CaseFolding.txt fold to themselves */
3553         for(i=0;
3554             0==uset_getItem(data.notSeen, i, &start, &end, NULL, 0, &errorCode) &&
3555                 U_SUCCESS(errorCode);
3556             ++i
3557         ) {
3558             do {
3559                 testFoldToSelf(start, CF_ALL);
3560             } while(++start<=end);
3561         }
3562     }
3563
3564     uset_close(data.notSeen);
3565 }
3566
3567 static void TestBinaryCharacterPropertiesAPI() {
3568     // API test only. See intltest/ucdtest.cpp for functional test.
3569     UErrorCode errorCode = U_ZERO_ERROR;
3570     const USet *set = u_getBinaryPropertySet(-1, &errorCode);
3571     if (U_SUCCESS(errorCode)) {
3572         log_err("u_getBinaryPropertySet(-1) did not fail\n");
3573     }
3574     errorCode = U_ZERO_ERROR;
3575     set = u_getBinaryPropertySet(UCHAR_BINARY_LIMIT, &errorCode);
3576     if (U_SUCCESS(errorCode)) {
3577         log_err("u_getBinaryPropertySet(UCHAR_BINARY_LIMIT) did not fail\n");
3578     }
3579     errorCode = U_ZERO_ERROR;
3580     set = u_getBinaryPropertySet(UCHAR_WHITE_SPACE, &errorCode);
3581     if (!uset_contains(set, 0x20) || uset_contains(set, 0x61)) {
3582         log_err("u_getBinaryPropertySet(UCHAR_WHITE_SPACE) wrong contents\n");
3583     }
3584 }
3585
3586 static void TestIntCharacterPropertiesAPI() {
3587     // API test only. See intltest/ucdtest.cpp for functional test.
3588     UErrorCode errorCode = U_ZERO_ERROR;
3589     const UCPMap *map = u_getIntPropertyMap(UCHAR_INT_START - 1, &errorCode);
3590     if (U_SUCCESS(errorCode)) {
3591         log_err("u_getIntPropertyMap(UCHAR_INT_START - 1) did not fail\n");
3592     }
3593     errorCode = U_ZERO_ERROR;
3594     map = u_getIntPropertyMap(UCHAR_INT_LIMIT, &errorCode);
3595     if (U_SUCCESS(errorCode)) {
3596         log_err("u_getIntPropertyMap(UCHAR_INT_LIMIT) did not fail\n");
3597     }
3598     errorCode = U_ZERO_ERROR;
3599     map = u_getIntPropertyMap(UCHAR_GENERAL_CATEGORY, &errorCode);
3600     if (ucpmap_get(map, 0x20) != U_SPACE_SEPARATOR || ucpmap_get(map, 0x23456) != U_OTHER_LETTER) {
3601         log_err("u_getIntPropertyMap(UCHAR_GENERAL_CATEGORY) wrong contents\n");
3602     }
3603 }