icuSources/test/cintltst/cucdtst.c

   1 /********************************************************************
   2  * COPYRIGHT:
   3  * Copyright (c) 1997-2004, International Business Machines Corporation and
   4  * others. All Rights Reserved.
   5  ********************************************************************/
   6 /********************************************************************************
   7 *
   8 * File CUCDTST.C
   9 *
  10 * Modification History:
  11 *        Name                     Description
  12 *     Madhu Katragadda            Ported for C API, added tests for string functions
  13 *********************************************************************************
  14 */
  15
  16 #include <string.h>
  17 #include <math.h>
  18 #include <stdlib.h>
  19
  20 #include "unicode/utypes.h"
  21 #include "unicode/uchar.h"
  22 #include "unicode/putil.h"
  23 #include "unicode/ustring.h"
  24 #include "unicode/uloc.h"
  25
  26 #include "cintltst.h"
  27 #include "putilimp.h"
  28 #include "uparse.h"
  29 #include "ucase.h"
  30 #include "uprops.h"
  31 #include "uset_imp.h"
  32 #include "usc_impl.h"
  33 #include "unormimp.h"
  34 #include "udatamem.h" /* for testing ucase_openBinary() */
  35 #include "cucdapi.h"
  36
  37 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
  38
  39 /* prototypes --------------------------------------------------------------- */
  40
  41 static void TestUpperLower(void);
  42 static void TestLetterNumber(void);
  43 static void TestMisc(void);
  44 static void TestPOSIX(void);
  45 static void TestControlPrint(void);
  46 static void TestIdentifier(void);
  47 static void TestUnicodeData(void);
  48 static void TestCodeUnit(void);
  49 static void TestCodePoint(void);
  50 static void TestCharLength(void);
  51 static void TestCharNames(void);
  52 static void TestMirroring(void);
  53 /*       void TestUScriptCodeAPI(void);*/    /* defined in cucdapi.h */
  54 static void TestUScriptRunAPI(void);
  55 static void TestAdditionalProperties(void);
  56 static void TestNumericProperties(void);
  57 static void TestPropertyNames(void);
  58 static void TestPropertyValues(void);
  59 static void TestConsistency(void);
  60 static void TestUCase(void);
  61
  62 /* internal methods used */
  63 static int32_t MakeProp(char* str);
  64 static int32_t MakeDir(char* str);
  65
  66 /* test data ---------------------------------------------------------------- */
  67
  68 static const UChar  LAST_CHAR_CODE_IN_FILE = 0xFFFD;
  69 static const char tagStrings[] = "MnMcMeNdNlNoZsZlZpCcCfCsCoCnLuLlLtLmLoPcPdPsPePoSmScSkSoPiPf";
  70 static const int32_t tagValues[] =
  71     {
  72     /* Mn */ U_NON_SPACING_MARK,
  73     /* Mc */ U_COMBINING_SPACING_MARK,
  74     /* Me */ U_ENCLOSING_MARK,
  75     /* Nd */ U_DECIMAL_DIGIT_NUMBER,
  76     /* Nl */ U_LETTER_NUMBER,
  77     /* No */ U_OTHER_NUMBER,
  78     /* Zs */ U_SPACE_SEPARATOR,
  79     /* Zl */ U_LINE_SEPARATOR,
  80     /* Zp */ U_PARAGRAPH_SEPARATOR,
  81     /* Cc */ U_CONTROL_CHAR,
  82     /* Cf */ U_FORMAT_CHAR,
  83     /* Cs */ U_SURROGATE,
  84     /* Co */ U_PRIVATE_USE_CHAR,
  85     /* Cn */ U_UNASSIGNED,
  86     /* Lu */ U_UPPERCASE_LETTER,
  87     /* Ll */ U_LOWERCASE_LETTER,
  88     /* Lt */ U_TITLECASE_LETTER,
  89     /* Lm */ U_MODIFIER_LETTER,
  90     /* Lo */ U_OTHER_LETTER,
  91     /* Pc */ U_CONNECTOR_PUNCTUATION,
  92     /* Pd */ U_DASH_PUNCTUATION,
  93     /* Ps */ U_START_PUNCTUATION,
  94     /* Pe */ U_END_PUNCTUATION,
  95     /* Po */ U_OTHER_PUNCTUATION,
  96     /* Sm */ U_MATH_SYMBOL,
  97     /* Sc */ U_CURRENCY_SYMBOL,
  98     /* Sk */ U_MODIFIER_SYMBOL,
  99     /* So */ U_OTHER_SYMBOL,
 100     /* Pi */ U_INITIAL_PUNCTUATION,
 101     /* Pf */ U_FINAL_PUNCTUATION
 102     };
 103
 104 static const char dirStrings[][5] = {
 105     "L",
 106     "R",
 107     "EN",
 108     "ES",
 109     "ET",
 110     "AN",
 111     "CS",
 112     "B",
 113     "S",
 114     "WS",
 115     "ON",
 116     "LRE",
 117     "LRO",
 118     "AL",
 119     "RLE",
 120     "RLO",
 121     "PDF",
 122     "NSM",
 123     "BN"
 124 };
 125
 126 void addUnicodeTest(TestNode** root);
 127
 128 void addUnicodeTest(TestNode** root)
 129 {
 130     addTest(root, &TestUnicodeData, "tsutil/cucdtst/TestUnicodeData");
 131     addTest(root, &TestCodeUnit, "tsutil/cucdtst/TestCodeUnit");
 132     addTest(root, &TestCodePoint, "tsutil/cucdtst/TestCodePoint");
 133     addTest(root, &TestCharLength, "tsutil/cucdtst/TestCharLength");
 134     addTest(root, &TestAdditionalProperties, "tsutil/cucdtst/TestAdditionalProperties");
 135     addTest(root, &TestNumericProperties, "tsutil/cucdtst/TestNumericProperties");
 136     addTest(root, &TestUpperLower, "tsutil/cucdtst/TestUpperLower");
 137     addTest(root, &TestLetterNumber, "tsutil/cucdtst/TestLetterNumber");
 138     addTest(root, &TestMisc, "tsutil/cucdtst/TestMisc");
 139     addTest(root, &TestPOSIX, "tsutil/cucdtst/TestPOSIX");
 140     addTest(root, &TestControlPrint, "tsutil/cucdtst/TestControlPrint");
 141     addTest(root, &TestIdentifier, "tsutil/cucdtst/TestIdentifier");
 142     addTest(root, &TestCharNames, "tsutil/cucdtst/TestCharNames");
 143     addTest(root, &TestMirroring, "tsutil/cucdtst/TestMirroring");
 144     addTest(root, &TestUScriptCodeAPI, "tsutil/cucdtst/TestUScriptCodeAPI");
 145     addTest(root, &TestUScriptRunAPI, "tsutil/cucdtst/TestUScriptRunAPI");
 146     addTest(root, &TestPropertyNames, "tsutil/cucdtst/TestPropertyNames");
 147     addTest(root, &TestPropertyValues, "tsutil/cucdtst/TestPropertyValues");
 148     addTest(root, &TestConsistency, "tsutil/cucdtst/TestConsistency");
 149     addTest(root, &TestUCase, "tsutil/cucdtst/TestUCase");
 150 }
 151
 152 /*==================================================== */
 153 /* test u_toupper() and u_tolower()                    */
 154 /*==================================================== */
 155 static void TestUpperLower()
 156 {
 157     const UChar upper[] = {0x41, 0x42, 0x00b2, 0x01c4, 0x01c6, 0x01c9, 0x01c8, 0x01c9, 0x000c, 0x0000};
 158     const UChar lower[] = {0x61, 0x62, 0x00b2, 0x01c6, 0x01c6, 0x01c9, 0x01c9, 0x01c9, 0x000c, 0x0000};
 159     U_STRING_DECL(upperTest, "abcdefg123hij.?:klmno", 21);
 160     U_STRING_DECL(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);
 161     int32_t i;
 162
 163     U_STRING_INIT(upperTest, "abcdefg123hij.?:klmno", 21);
 164     U_STRING_INIT(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);
 165
 166 /*
 167 Checks LetterLike Symbols which were previously a source of confusion
 168 [Bertrand A. D. 02/04/98]
 169 */
 170     for (i=0x2100;i<0x2138;i++)
 171     {
 172         if(i!=0x2126 && i!=0x212a && i!=0x212b)
 173         {
 174             if (i != (int)u_tolower(i)) /* itself */
 175                 log_err("Failed case conversion with itself: U+%04x\n", i);
 176             if (i != (int)u_toupper(i))
 177                 log_err("Failed case conversion with itself: U+%04x\n", i);
 178         }
 179     }
 180
 181     for(i=0; i < u_strlen(upper); i++){
 182         if(u_tolower(upper[i]) != lower[i]){
 183             log_err("FAILED u_tolower() for %lx Expected %lx Got %lx\n", upper[i], lower[i], u_tolower(upper[i]));
 184         }
 185     }
 186
 187     log_verbose("testing upper lower\n");
 188     for (i = 0; i < 21; i++) {
 189
 190         if (u_isalpha(upperTest[i]) && !u_islower(upperTest[i]))
 191         {
 192             log_err("Failed isLowerCase test at  %c\n", upperTest[i]);
 193         }
 194         else if (u_isalpha(lowerTest[i]) && !u_isupper(lowerTest[i]))
 195          {
 196             log_err("Failed isUpperCase test at %c\n", lowerTest[i]);
 197         }
 198         else if (upperTest[i] != u_tolower(lowerTest[i]))
 199         {
 200             log_err("Failed case conversion from %c  To %c :\n", lowerTest[i], upperTest[i]);
 201         }
 202         else if (lowerTest[i] != u_toupper(upperTest[i]))
 203          {
 204             log_err("Failed case conversion : %c To %c \n", upperTest[i], lowerTest[i]);
 205         }
 206         else if (upperTest[i] != u_tolower(upperTest[i]))
 207         {
 208             log_err("Failed case conversion with itself: %c\n", upperTest[i]);
 209         }
 210         else if (lowerTest[i] != u_toupper(lowerTest[i]))
 211         {
 212             log_err("Failed case conversion with itself: %c\n", lowerTest[i]);
 213         }
 214     }
 215     log_verbose("done testing upper lower\n");
 216
 217     log_verbose("testing u_istitle\n");
 218     {
 219         static const UChar expected[] = {
 220             0x1F88,
 221             0x1F89,
 222             0x1F8A,
 223             0x1F8B,
 224             0x1F8C,
 225             0x1F8D,
 226             0x1F8E,
 227             0x1F8F,
 228             0x1F88,
 229             0x1F89,
 230             0x1F8A,
 231             0x1F8B,
 232             0x1F8C,
 233             0x1F8D,
 234             0x1F8E,
 235             0x1F8F,
 236             0x1F98,
 237             0x1F99,
 238             0x1F9A,
 239             0x1F9B,
 240             0x1F9C,
 241             0x1F9D,
 242             0x1F9E,
 243             0x1F9F,
 244             0x1F98,
 245             0x1F99,
 246             0x1F9A,
 247             0x1F9B,
 248             0x1F9C,
 249             0x1F9D,
 250             0x1F9E,
 251             0x1F9F,
 252             0x1FA8,
 253             0x1FA9,
 254             0x1FAA,
 255             0x1FAB,
 256             0x1FAC,
 257             0x1FAD,
 258             0x1FAE,
 259             0x1FAF,
 260             0x1FA8,
 261             0x1FA9,
 262             0x1FAA,
 263             0x1FAB,
 264             0x1FAC,
 265             0x1FAD,
 266             0x1FAE,
 267             0x1FAF,
 268             0x1FBC,
 269             0x1FBC,
 270             0x1FCC,
 271             0x1FCC,
 272             0x1FFC,
 273             0x1FFC,
 274         };
 275         int32_t num = sizeof(expected)/sizeof(expected[0]);
 276         for(i=0; i<num; i++){
 277             if(!u_istitle(expected[i])){
 278                 log_err("u_istitle failed for 0x%4X. Expected TRUE, got FALSE\n",expected[i]);
 279             }
 280         }
 281
 282     }
 283 }
 284
 285 /* compare two sets, which is not easy with the current (ICU 2.4) C API... */
 286
 287 static UBool
 288 showADiffB(const USet *a, const USet *b,
 289            const char *a_name, const char *b_name,
 290            UBool expect, UBool diffIsError) {
 291     int32_t i, start, end, length;
 292     UBool equal;
 293     UErrorCode errorCode;
 294
 295     errorCode=U_ZERO_ERROR;
 296     equal=TRUE;
 297     i=0;
 298     for(;;) {
 299         length=uset_getItem(a, i, &start, &end, NULL, 0, &errorCode);
 300         if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
 301             return equal; /* done */
 302         }
 303         if(U_FAILURE(errorCode)) {
 304             log_err("error comparing %s with %s at item %d: %s\n",
 305                 a_name, b_name, i, u_errorName(errorCode));
 306             return FALSE;
 307         }
 308         if(length!=0) {
 309             return equal; /* done with code points, got a string or -1 */
 310         }
 311
 312         if(expect!=uset_containsRange(b, start, end)) {
 313             equal=FALSE;
 314             while(start<=end) {
 315                 if(expect!=uset_contains(b, start)) {
 316                     if(diffIsError) {
 317                         if(expect) {
 318                             log_err("error: %s contains U+%04x but %s does not\n", a_name, start, b_name);
 319                         } else {
 320                             log_err("error: %s and %s both contain U+%04x but should not intersect\n", a_name, b_name, start);
 321                         }
 322                     } else {
 323                         if(expect) {
 324                             log_verbose("info: %s contains U+%04x but %s does not\n", a_name, start, b_name);
 325                         } else {
 326                             log_verbose("info: %s and %s both contain U+%04x but should not intersect\n", a_name, b_name, start);
 327                         }
 328                     }
 329                 }
 330                 ++start;
 331             }
 332         }
 333
 334         ++i;
 335     }
 336 }
 337
 338 static UBool
 339 showAMinusB(const USet *a, const USet *b,
 340             const char *a_name, const char *b_name,
 341             UBool diffIsError) {
 342     return showADiffB(a, b, a_name, b_name, TRUE, diffIsError);
 343 }
 344
 345 static UBool
 346 showAIntersectB(const USet *a, const USet *b,
 347                 const char *a_name, const char *b_name,
 348                 UBool diffIsError) {
 349     return showADiffB(a, b, a_name, b_name, FALSE, diffIsError);
 350 }
 351
 352 static UBool
 353 compareUSets(const USet *a, const USet *b,
 354              const char *a_name, const char *b_name,
 355              UBool diffIsError) {
 356     return
 357         showAMinusB(a, b, a_name, b_name, diffIsError) &&
 358         showAMinusB(b, a, b_name, a_name, diffIsError);
 359 }
 360
 361 /* test isLetter(u_isapha()) and isDigit(u_isdigit()) */
 362 static void TestLetterNumber()
 363 {
 364     UChar i = 0x0000;
 365
 366     log_verbose("Testing for isalpha\n");
 367     for (i = 0x0041; i < 0x005B; i++) {
 368         if (!u_isalpha(i))
 369         {
 370             log_err("Failed isLetter test at  %.4X\n", i);
 371         }
 372     }
 373     for (i = 0x0660; i < 0x066A; i++) {
 374         if (u_isalpha(i))
 375         {
 376             log_err("Failed isLetter test with numbers at %.4X\n", i);
 377         }
 378     }
 379
 380     log_verbose("Testing for isdigit\n");
 381     for (i = 0x0660; i < 0x066A; i++) {
 382         if (!u_isdigit(i))
 383         {
 384             log_verbose("Failed isNumber test at %.4X\n", i);
 385         }
 386     }
 387
 388     log_verbose("Testing for isalnum\n");
 389     for (i = 0x0041; i < 0x005B; i++) {
 390         if (!u_isalnum(i))
 391         {
 392             log_err("Failed isAlNum test at  %.4X\n", i);
 393         }
 394     }
 395     for (i = 0x0660; i < 0x066A; i++) {
 396         if (!u_isalnum(i))
 397         {
 398             log_err("Failed isAlNum test at  %.4X\n", i);
 399         }
 400     }
 401
 402     {
 403         /*
 404          * The following checks work only starting from Unicode 4.0.
 405          * Check the version number here.
 406          */
 407         static UVersionInfo u401={ 4, 0, 1, 0 };
 408         UVersionInfo version;
 409         u_getUnicodeVersion(version);
 410         if(version[0]<4 || 0==memcmp(version, u401, 4)) {
 411             return;
 412         }
 413     }
 414
 415     {
 416         /*
 417          * Sanity check:
 418          * Verify that exactly the digit characters have decimal digit values.
 419          * This assumption is used in the implementation of u_digit()
 420          * (which checks nt=de)
 421          * compared with the parallel java.lang.Character.digit()
 422          * (which checks Nd).
 423          *
 424          * This was not true in Unicode 3.2 and earlier.
 425          * Unicode 4.0 fixed discrepancies.
 426          * Unicode 4.0.1 re-introduced problems in this area due to an
 427          * unintentionally incomplete last-minute change.
 428          */
 429         U_STRING_DECL(digitsPattern, "[:Nd:]", 6);
 430         U_STRING_DECL(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24);
 431
 432         USet *digits, *decimalValues;
 433         UErrorCode errorCode;
 434
 435         U_STRING_INIT(digitsPattern, "[:Nd:]", 6);
 436         U_STRING_INIT(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24);
 437         errorCode=U_ZERO_ERROR;
 438         digits=uset_openPattern(digitsPattern, 6, &errorCode);
 439         decimalValues=uset_openPattern(decimalValuesPattern, 24, &errorCode);
 440
 441         if(U_SUCCESS(errorCode)) {
 442             compareUSets(digits, decimalValues, "[:Nd:]", "[:Numeric_Type=Decimal:]", TRUE);
 443         }
 444
 445         uset_close(digits);
 446         uset_close(decimalValues);
 447     }
 448 }
 449
 450 /* Tests for isDefined(u_isdefined)(, isBaseForm(u_isbase()), isSpaceChar(u_isspace()), isWhiteSpace(), u_CharDigitValue() */
 451 static void TestMisc()
 452 {
 453     static const UChar sampleSpaces[] = {0x0020, 0x00a0, 0x2000, 0x2001, 0x2005};
 454     static const UChar sampleNonSpaces[] = {0x61, 0x62, 0x63, 0x64, 0x74};
 455     static const UChar sampleUndefined[] = {0xfff1, 0xfff7, 0xfa6b };
 456     static const UChar sampleDefined[] = {0x523E, 0x4f88, 0xfffd};
 457     static const UChar sampleBase[] = {0x0061, 0x0031, 0x03d2};
 458     static const UChar sampleNonBase[] = {0x002B, 0x0020, 0x203B};
 459 /*    static const UChar sampleChars[] = {0x000a, 0x0045, 0x4e00, 0xDC00, 0xFFE8, 0xFFF0};*/
 460     static const UChar sampleDigits[]= {0x0030, 0x0662, 0x0F23, 0x0ED5};
 461     static const UChar sampleNonDigits[] = {0x0010, 0x0041, 0x0122, 0x68FE};
 462     static const UChar sampleWhiteSpaces[] = {0x2008, 0x2009, 0x200a, 0x001c, 0x000c};
 463     static const UChar sampleNonWhiteSpaces[] = {0x61, 0x62, 0x3c, 0x28, 0x3f};
 464
 465
 466     static const int32_t sampleDigitValues[] = {0, 2, 3, 5};
 467
 468     uint32_t mask;
 469
 470     int32_t i;
 471     char icuVersion[U_MAX_VERSION_STRING_LENGTH];
 472     UVersionInfo realVersion;
 473
 474     memset(icuVersion, 0, U_MAX_VERSION_STRING_LENGTH);
 475
 476     log_verbose("Testing for isspace and nonspaces\n");
 477     for (i = 0; i < 5; i++) {
 478         if (!(u_isspace(sampleSpaces[i])) ||
 479                 (u_isspace(sampleNonSpaces[i])))
 480         {
 481             log_err("Space char test error : %d or %d \n", (int32_t)sampleSpaces[i], (int32_t)sampleNonSpaces[i]);
 482         }
 483         if (!(u_isJavaSpaceChar(sampleSpaces[i])) ||
 484                 (u_isJavaSpaceChar(sampleNonSpaces[i])))
 485         {
 486             log_err("u_isJavaSpaceChar() test error : %d or %d \n", (int32_t)sampleSpaces[i], (int32_t)sampleNonSpaces[i]);
 487         }
 488     }
 489
 490     log_verbose("Testing for isspace and nonspaces\n");
 491     for (i = 0; i < 5; i++) {
 492         if (!(u_isWhitespace(sampleWhiteSpaces[i])) ||
 493                 (u_isWhitespace(sampleNonWhiteSpaces[i])))
 494         {
 495             log_err("White Space char test error : %lx or %lx \n", sampleWhiteSpaces[i], sampleNonWhiteSpaces[i]);
 496         }
 497     }
 498
 499     log_verbose("Testing for isdefined\n");
 500     for (i = 0; i < 3; i++) {
 501         if ((u_isdefined(sampleUndefined[i])) ||
 502                 !(u_isdefined(sampleDefined[i])))
 503         {
 504             log_err("Undefined char test error : U+%04x or U+%04x\n", (int32_t)sampleUndefined[i], (int32_t)sampleDefined[i]);
 505         }
 506     }
 507
 508     log_verbose("Testing for isbase\n");
 509     for (i = 0; i < 3; i++) {
 510         if ((u_isbase(sampleNonBase[i])) ||
 511                 !(u_isbase(sampleBase[i])))
 512         {
 513             log_err("Non-baseform char test error : U+%04x or U+%04x",(int32_t)sampleNonBase[i], (int32_t)sampleBase[i]);
 514         }
 515     }
 516
 517     log_verbose("Testing for isdigit \n");
 518     for (i = 0; i < 4; i++) {
 519         if ((u_isdigit(sampleDigits[i]) &&
 520             (u_charDigitValue(sampleDigits[i])!= sampleDigitValues[i])) ||
 521             (u_isdigit(sampleNonDigits[i]))) {
 522             log_err("Digit char test error : %lx   or   %lx\n", sampleDigits[i], sampleNonDigits[i]);
 523         }
 524     }
 525
 526     /* Tests the ICU version #*/
 527     u_getVersion(realVersion);
 528     u_versionToString(realVersion, icuVersion);
 529     if (strncmp(icuVersion, U_ICU_VERSION, uprv_min((int32_t)strlen(icuVersion), (int32_t)strlen(U_ICU_VERSION))) != 0)
 530     {
 531         log_err("ICU version test failed. Header says=%s, got=%s \n", U_ICU_VERSION, icuVersion);
 532     }
 533 #if defined(ICU_VERSION)
 534     /* test only happens where we have configure.in with VERSION - sanity check. */
 535     if(strcmp(U_ICU_VERSION, ICU_VERSION))
 536     {
 537         log_err("ICU version mismatch: Header says %s, build environment says %s.\n",  U_ICU_VERSION, ICU_VERSION);
 538     }
 539 #endif
 540
 541     /* test U_GC_... */
 542     if(
 543         U_GET_GC_MASK(0x41)!=U_GC_LU_MASK ||
 544         U_GET_GC_MASK(0x662)!=U_GC_ND_MASK ||
 545         U_GET_GC_MASK(0xa0)!=U_GC_ZS_MASK ||
 546         U_GET_GC_MASK(0x28)!=U_GC_PS_MASK ||
 547         U_GET_GC_MASK(0x2044)!=U_GC_SM_MASK ||
 548         U_GET_GC_MASK(0xe0063)!=U_GC_CF_MASK
 549     ) {
 550         log_err("error: U_GET_GC_MASK does not work properly\n");
 551     }
 552
 553     mask=0;
 554     mask=(mask&~U_GC_CN_MASK)|U_GC_CN_MASK;
 555
 556     mask=(mask&~U_GC_LU_MASK)|U_GC_LU_MASK;
 557     mask=(mask&~U_GC_LL_MASK)|U_GC_LL_MASK;
 558     mask=(mask&~U_GC_LT_MASK)|U_GC_LT_MASK;
 559     mask=(mask&~U_GC_LM_MASK)|U_GC_LM_MASK;
 560     mask=(mask&~U_GC_LO_MASK)|U_GC_LO_MASK;
 561
 562     mask=(mask&~U_GC_MN_MASK)|U_GC_MN_MASK;
 563     mask=(mask&~U_GC_ME_MASK)|U_GC_ME_MASK;
 564     mask=(mask&~U_GC_MC_MASK)|U_GC_MC_MASK;
 565
 566     mask=(mask&~U_GC_ND_MASK)|U_GC_ND_MASK;
 567     mask=(mask&~U_GC_NL_MASK)|U_GC_NL_MASK;
 568     mask=(mask&~U_GC_NO_MASK)|U_GC_NO_MASK;
 569
 570     mask=(mask&~U_GC_ZS_MASK)|U_GC_ZS_MASK;
 571     mask=(mask&~U_GC_ZL_MASK)|U_GC_ZL_MASK;
 572     mask=(mask&~U_GC_ZP_MASK)|U_GC_ZP_MASK;
 573
 574     mask=(mask&~U_GC_CC_MASK)|U_GC_CC_MASK;
 575     mask=(mask&~U_GC_CF_MASK)|U_GC_CF_MASK;
 576     mask=(mask&~U_GC_CO_MASK)|U_GC_CO_MASK;
 577     mask=(mask&~U_GC_CS_MASK)|U_GC_CS_MASK;
 578
 579     mask=(mask&~U_GC_PD_MASK)|U_GC_PD_MASK;
 580     mask=(mask&~U_GC_PS_MASK)|U_GC_PS_MASK;
 581     mask=(mask&~U_GC_PE_MASK)|U_GC_PE_MASK;
 582     mask=(mask&~U_GC_PC_MASK)|U_GC_PC_MASK;
 583     mask=(mask&~U_GC_PO_MASK)|U_GC_PO_MASK;
 584
 585     mask=(mask&~U_GC_SM_MASK)|U_GC_SM_MASK;
 586     mask=(mask&~U_GC_SC_MASK)|U_GC_SC_MASK;
 587     mask=(mask&~U_GC_SK_MASK)|U_GC_SK_MASK;
 588     mask=(mask&~U_GC_SO_MASK)|U_GC_SO_MASK;
 589
 590     mask=(mask&~U_GC_PI_MASK)|U_GC_PI_MASK;
 591     mask=(mask&~U_GC_PF_MASK)|U_GC_PF_MASK;
 592
 593     if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffffffff)) {
 594         log_err("error: problems with U_GC_XX_MASK constants\n");
 595     }
 596
 597     mask=0;
 598     mask=(mask&~U_GC_C_MASK)|U_GC_C_MASK;
 599     mask=(mask&~U_GC_L_MASK)|U_GC_L_MASK;
 600     mask=(mask&~U_GC_M_MASK)|U_GC_M_MASK;
 601     mask=(mask&~U_GC_N_MASK)|U_GC_N_MASK;
 602     mask=(mask&~U_GC_Z_MASK)|U_GC_Z_MASK;
 603     mask=(mask&~U_GC_P_MASK)|U_GC_P_MASK;
 604     mask=(mask&~U_GC_S_MASK)|U_GC_S_MASK;
 605
 606     if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffffffff)) {
 607         log_err("error: problems with U_GC_Y_MASK constants\n");
 608     }
 609     {
 610         static const UChar32 digit[10]={ 0x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037,0x0038,0x0039 };
 611         for(i=0; i<10; i++){
 612             if(digit[i]!=u_forDigit(i,10)){
 613                 log_err("u_forDigit failed for %i. Expected: 0x%4X Got: 0x%4X\n",i,digit[i],u_forDigit(i,10));
 614             }
 615         }
 616     }
 617
 618     /* test u_digit() */
 619     {
 620         static const struct {
 621             UChar32 c;
 622             int8_t radix, value;
 623         } data[]={
 624             /* base 16 */
 625             { 0x0031, 16, 1 },
 626             { 0x0038, 16, 8 },
 627             { 0x0043, 16, 12 },
 628             { 0x0066, 16, 15 },
 629             { 0x00e4, 16, -1 },
 630             { 0x0662, 16, 2 },
 631             { 0x06f5, 16, 5 },
 632             { 0xff13, 16, 3 },
 633             { 0xff41, 16, 10 },
 634
 635             /* base 8 */
 636             { 0x0031, 8, 1 },
 637             { 0x0038, 8, -1 },
 638             { 0x0043, 8, -1 },
 639             { 0x0066, 8, -1 },
 640             { 0x00e4, 8, -1 },
 641             { 0x0662, 8, 2 },
 642             { 0x06f5, 8, 5 },
 643             { 0xff13, 8, 3 },
 644             { 0xff41, 8, -1 },
 645
 646             /* base 36 */
 647             { 0x5a, 36, 35 },
 648             { 0x7a, 36, 35 },
 649             { 0xff3a, 36, 35 },
 650             { 0xff5a, 36, 35 },
 651
 652             /* wrong radix values */
 653             { 0x0031, 1, -1 },
 654             { 0xff3a, 37, -1 }
 655         };
 656
 657         for(i=0; i<LENGTHOF(data); ++i) {
 658             if(u_digit(data[i].c, data[i].radix)!=data[i].value) {
 659                 log_err("u_digit(U+%04x, %d)=%d expected %d\n",
 660                         data[i].c,
 661                         data[i].radix,
 662                         u_digit(data[i].c, data[i].radix),
 663                         data[i].value);
 664             }
 665         }
 666     }
 667 }
 668
 669 /* test C/POSIX-style functions --------------------------------------------- */
 670
 671 /* bit flags */
 672 #define ISAL     1
 673 #define ISLO     2
 674 #define ISUP     4
 675
 676 #define ISDI     8
 677 #define ISXD  0x10
 678
 679 #define ISAN  0x20
 680
 681 #define ISPU  0x40
 682 #define ISGR  0x80
 683 #define ISPR 0x100
 684
 685 #define ISSP 0x200
 686 #define ISBL 0x400
 687 #define ISCN 0x800
 688
 689 /* C/POSIX-style functions, in the same order as the bit flags */
 690 typedef UBool U_EXPORT2 IsPOSIXClass(UChar32 c);
 691
 692 static const struct {
 693     IsPOSIXClass *fn;
 694     const char *name;
 695 } posixClasses[]={
 696     { u_isalpha, "isalpha" },
 697     { u_islower, "islower" },
 698     { u_isupper, "isupper" },
 699     { u_isdigit, "isdigit" },
 700     { u_isxdigit, "isxdigit" },
 701     { u_isalnum, "isalnum" },
 702     { u_ispunct, "ispunct" },
 703     { u_isgraph, "isgraph" },
 704     { u_isprint, "isprint" },
 705     { u_isspace, "isspace" },
 706     { u_isblank, "isblank" },
 707     { u_iscntrl, "iscntrl" }
 708 };
 709
 710 static const struct {
 711     UChar32 c;
 712     uint32_t posixResults;
 713 } posixData[]={
 714     { 0x0008,                                                        ISCN },    /* backspace */
 715     { 0x0009,                                              ISSP|ISBL|ISCN },    /* TAB */
 716     { 0x000a,                                              ISSP|     ISCN },    /* LF */
 717     { 0x000c,                                              ISSP|     ISCN },    /* FF */
 718     { 0x000d,                                              ISSP|     ISCN },    /* CR */
 719     { 0x0020,                                         ISPR|ISSP|ISBL      },    /* space */
 720     { 0x0021,                               ISPU|ISGR|ISPR                },    /* ! */
 721     { 0x0033,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* 3 */
 722     { 0x0040,                               ISPU|ISGR|ISPR                },    /* @ */
 723     { 0x0041, ISAL|     ISUP|     ISXD|ISAN|     ISGR|ISPR                },    /* A */
 724     { 0x007a, ISAL|ISLO|               ISAN|     ISGR|ISPR                },    /* z */
 725     { 0x007b,                               ISPU|ISGR|ISPR                },    /* { */
 726     { 0x0085,                                              ISSP|     ISCN },    /* NEL */
 727     { 0x00a0,                                         ISPR|ISSP|ISBL      },    /* NBSP */
 728     { 0x00a4,                                    ISGR|ISPR                },    /* currency sign */
 729     { 0x00e4, ISAL|ISLO|               ISAN|     ISGR|ISPR                },    /* a-umlaut */
 730     { 0x0300,                                    ISGR|ISPR                },    /* combining grave */
 731     { 0x0600,                                                        ISCN },    /* arabic number sign */
 732     { 0x0627, ISAL|                    ISAN|     ISGR|ISPR                },    /* alef */
 733     { 0x0663,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* arabic 3 */
 734     { 0x2002,                                         ISPR|ISSP|ISBL      },    /* en space */
 735     { 0x2007,                                         ISPR|ISSP|ISBL      },    /* figure space */
 736     { 0x2009,                                         ISPR|ISSP|ISBL      },    /* thin space */
 737     { 0x200b,                                                        ISCN },    /* ZWSP */
 738   /*{ 0x200b,                                         ISPR|ISSP           },*/    /* ZWSP */ /* ZWSP became a control char in 4.0.1*/
 739     { 0x200e,                                                        ISCN },    /* LRM */
 740     { 0x2028,                                         ISPR|ISSP|     ISCN },    /* LS */
 741     { 0x2029,                                         ISPR|ISSP|     ISCN },    /* PS */
 742     { 0x20ac,                                    ISGR|ISPR                },    /* Euro */
 743     { 0xff15,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth 5 */
 744     { 0xff25, ISAL|     ISUP|     ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth E */
 745     { 0xff35, ISAL|     ISUP|          ISAN|     ISGR|ISPR                },    /* fullwidth U */
 746     { 0xff45, ISAL|ISLO|          ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth e */
 747     { 0xff55, ISAL|ISLO|               ISAN|     ISGR|ISPR                }     /* fullwidth u */
 748 };
 749
 750 static void
 751 TestPOSIX() {
 752     uint32_t mask;
 753     int32_t cl, i;
 754     UBool expect;
 755
 756     mask=1;
 757     for(cl=0; cl<12; ++cl) {
 758         for(i=0; i<LENGTHOF(posixData); ++i) {
 759             expect=(UBool)((posixData[i].posixResults&mask)!=0);
 760             if(posixClasses[cl].fn(posixData[i].c)!=expect) {
 761                 log_err("u_%s(U+%04x)=%s is wrong\n",
 762                     posixClasses[cl].name, posixData[i].c, expect ? "FALSE" : "TRUE");
 763             }
 764         }
 765         mask<<=1;
 766     }
 767 }
 768
 769 /* Tests for isControl(u_iscntrl()) and isPrintable(u_isprint()) */
 770 static void TestControlPrint()
 771 {
 772     const UChar sampleControl[] = {0x1b, 0x97, 0x82, 0x2028, 0x2029, 0x200c, 0x202b};
 773     const UChar sampleNonControl[] = {0x61, 0x0031, 0x00e2};
 774     const UChar samplePrintable[] = {0x0042, 0x005f, 0x2014};
 775     const UChar sampleNonPrintable[] = {0x200c, 0x009f, 0x001b};
 776     UChar32 c;
 777     int i;
 778
 779     log_verbose("Testing for iscontrol\n");
 780     for (i = 0; i < LENGTHOF(sampleControl); i++) {
 781         if (!u_iscntrl(sampleControl[i]))
 782         {
 783             log_err("Control char test error : U+%04x should be control but is not\n", (int32_t)sampleControl[i]);
 784         }
 785     }
 786
 787     log_verbose("Testing for !iscontrol\n");
 788     for (i = 0; i < LENGTHOF(sampleNonControl); i++) {
 789         if (u_iscntrl(sampleNonControl[i]))
 790         {
 791             log_err("Control char test error : U+%04x should not be control but is\n", (int32_t)sampleNonControl[i]);
 792         }
 793     }
 794
 795     log_verbose("testing for isprintable\n");
 796     for (i = 0; i < 3; i++) {
 797         if (!u_isprint(samplePrintable[i]))
 798         {
 799             log_err("Printable char test error : U+%04x should be printable but is not\n", (int32_t)samplePrintable[i]);
 800         }
 801         if (u_isprint(sampleNonPrintable[i]))
 802         {
 803             log_err("Printable char test error : U+%04x should not be printable but is\n", (int32_t)sampleNonPrintable[i]);
 804         }
 805     }
 806
 807     /* test all ISO 8 controls */
 808     for(c=0; c<=0x9f; ++c) {
 809         if(c==0x20) {
 810             /* skip ASCII graphic characters and continue with DEL */
 811             c=0x7f;
 812         }
 813         if(!u_iscntrl(c)) {
 814             log_err("error: u_iscntrl(ISO 8 control U+%04x)=FALSE\n", c);
 815         }
 816         if(!u_isISOControl(c)) {
 817             log_err("error: u_isISOControl(ISO 8 control U+%04x)=FALSE\n", c);
 818         }
 819         if(u_isprint(c)) {
 820             log_err("error: u_isprint(ISO 8 control U+%04x)=TRUE\n", c);
 821         }
 822     }
 823
 824     /* test all Latin-1 graphic characters */
 825     for(c=0x20; c<=0xff; ++c) {
 826         if(c==0x7f) {
 827             c=0xa0;
 828         } else if(c==0xad) {
 829             /* Unicode 4 changes 00AD Soft Hyphen to Cf (and it is in fact not printable) */
 830             ++c;
 831         }
 832         if(!u_isprint(c)) {
 833             log_err("error: u_isprint(Latin-1 graphic character U+%04x)=FALSE\n", c);
 834         }
 835     }
 836 }
 837
 838 /* u_isJavaIDStart, u_isJavaIDPart, u_isIDStart(), u_isIDPart(), u_isIDIgnorable()*/
 839 static void TestIdentifier()
 840 {
 841     const UChar sampleJavaIDStart[] = {0x0071, 0x00e4, 0x005f};
 842     const UChar sampleNonJavaIDStart[] = {0x0020, 0x2030, 0x0082};
 843     const UChar sampleJavaIDPart[] = {0x005f, 0x0032, 0x0045};
 844     const UChar sampleNonJavaIDPart[] = {0x2030, 0x2020, 0x0020};
 845     const UChar sampleUnicodeIDStart[] = {0x0250, 0x00e2, 0x0061};
 846     const UChar sampleNonUnicodeIDStart[] = {0x2000, 0x000a, 0x2019};
 847     const UChar sampleUnicodeIDPart[] = {0x005f, 0x0032, 0x0045};
 848     const UChar sampleNonUnicodeIDPart[] = {0x2030, 0x00a3, 0x0020};
 849     const UChar sampleIDIgnore[] = {0x0006, 0x0010, 0x206b};
 850     const UChar sampleNonIDIgnore[] = {0x0075, 0x00a3, 0x0061};
 851
 852     int i;
 853
 854     log_verbose("Testing sampleJavaID start \n");
 855     for (i = 0; i < 3; i++) {
 856         if (!(u_isJavaIDStart(sampleJavaIDStart[i])) ||
 857                 (u_isJavaIDStart(sampleNonJavaIDStart[i])))
 858             log_err("Java ID Start char test error : %lx or %lx\n",
 859             sampleJavaIDStart[i], sampleNonJavaIDStart[i]);
 860     }
 861
 862     log_verbose("Testing sampleJavaID part \n");
 863     for (i = 0; i < 3; i++) {
 864         if (!(u_isJavaIDPart(sampleJavaIDPart[i])) ||
 865                 (u_isJavaIDPart(sampleNonJavaIDPart[i])))
 866             log_err("Java ID Part char test error : %lx or %lx\n",
 867              sampleJavaIDPart[i], sampleNonJavaIDPart[i]);
 868     }
 869
 870     log_verbose("Testing sampleUnicodeID start \n");
 871     for (i = 0; i < 3; i++) {
 872         /* T_test_logln_ustr((int32_t)i); */
 873         if (!(u_isIDStart(sampleUnicodeIDStart[i])) ||
 874                 (u_isIDStart(sampleNonUnicodeIDStart[i])))
 875         {
 876             log_err("Unicode ID Start char test error : %lx  or  %lx\n", sampleUnicodeIDStart[i],
 877                                     sampleNonUnicodeIDStart[i]);
 878         }
 879     }
 880
 881     log_verbose("Testing sample unicode ID part \n");
 882     for (i = 2; i < 3; i++) {   /* nos *** starts with 2 instead of 0, until clarified */
 883         /* T_test_logln_ustr((int32_t)i); */
 884         if (!(u_isIDPart(sampleUnicodeIDPart[i])) ||
 885                 (u_isIDPart(sampleNonUnicodeIDPart[i])))
 886            {
 887             log_err("Unicode ID Part char test error : %lx  or  %lx", sampleUnicodeIDPart[i], sampleNonUnicodeIDPart[i]);
 888             }
 889     }
 890
 891     log_verbose("Testing  sampleId ignore\n");
 892     for (i = 0; i < 3; i++) {
 893         /*T_test_logln_ustr((int32_t)i); */
 894         if (!(u_isIDIgnorable(sampleIDIgnore[i])) ||
 895                 (u_isIDIgnorable(sampleNonIDIgnore[i])))
 896         {
 897             log_err("ID ignorable char test error : U+%04x  or  U+%04x\n", sampleIDIgnore[i], sampleNonIDIgnore[i]);
 898         }
 899     }
 900 }
 901
 902 /* for each line of UnicodeData.txt, check some of the properties */
 903 /*
 904  * ### TODO
 905  * This test fails incorrectly if the First or Last code point of a repetitive area
 906  * is overridden, which is allowed and is encouraged for the PUAs.
 907  * Currently, this means that both area First/Last and override lines are
 908  * tested against the properties from the API,
 909  * and the area boundary will not match and cause an error.
 910  *
 911  * This function should detect area boundaries and skip them for the test of individual
 912  * code points' properties.
 913  * Then it should check that the areas contain all the same properties except where overridden.
 914  * For this, it would have had to set a flag for which code points were listed explicitly.
 915  */
 916 static void U_CALLCONV
 917 unicodeDataLineFn(void *context,
 918                   char *fields[][2], int32_t fieldCount,
 919                   UErrorCode *pErrorCode)
 920 {
 921     char buffer[100];
 922     char *end;
 923     uint32_t value;
 924     UChar32 c;
 925     int32_t i;
 926     int8_t type;
 927
 928     /* get the character code, field 0 */
 929     c=strtoul(fields[0][0], &end, 16);
 930     if(end<=fields[0][0] || end!=fields[0][1]) {
 931         log_err("error: syntax error in field 0 at %s\n", fields[0][0]);
 932         return;
 933     }
 934     if((uint32_t)c>=UCHAR_MAX_VALUE + 1) {
 935         log_err("error in UnicodeData.txt: code point %lu out of range\n", c);
 936         return;
 937     }
 938
 939     /* get general category, field 2 */
 940     *fields[2][1]=0;
 941     type = (int8_t)tagValues[MakeProp(fields[2][0])];
 942     if(u_charType(c)!=type) {
 943         log_err("error: u_charType(U+%04lx)==%u instead of %u\n", c, u_charType(c), type);
 944     }
 945     if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
 946         log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
 947     }
 948
 949     /* get canonical combining class, field 3 */
 950     value=strtoul(fields[3][0], &end, 10);
 951     if(end<=fields[3][0] || end!=fields[3][1]) {
 952         log_err("error: syntax error in field 3 at code 0x%lx\n", c);
 953         return;
 954     }
 955     if(value>255) {
 956         log_err("error in UnicodeData.txt: combining class %lu out of range\n", value);
 957         return;
 958     }
 959 #if !UCONFIG_NO_NORMALIZATION
 960     if(value!=u_getCombiningClass(c) || value!=(uint32_t)u_getIntPropertyValue(c, UCHAR_CANONICAL_COMBINING_CLASS)) {
 961         log_err("error: u_getCombiningClass(U+%04lx)==%hu instead of %lu\n", c, u_getCombiningClass(c), value);
 962     }
 963 #endif
 964
 965     /* get BiDi category, field 4 */
 966     *fields[4][1]=0;
 967     i=MakeDir(fields[4][0]);
 968     if(i!=u_charDirection(c) || i!=u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)) {
 969         log_err("error: u_charDirection(U+%04lx)==%u instead of %u (%s)\n", c, u_charDirection(c), MakeDir(fields[4][0]), fields[4][0]);
 970     }
 971
 972     /* get ISO Comment, field 11 */
 973     *fields[11][1]=0;
 974     i=u_getISOComment(c, buffer, sizeof(buffer), pErrorCode);
 975     if(U_FAILURE(*pErrorCode) || 0!=strcmp(fields[11][0], buffer)) {
 976         log_err("error: u_getISOComment(U+%04lx) wrong (%s): \"%s\" should be \"%s\"\n",
 977             c, u_errorName(*pErrorCode),
 978             U_FAILURE(*pErrorCode) ? buffer : "[error]",
 979             fields[11][0]);
 980     }
 981
 982     /* get uppercase mapping, field 12 */
 983     if(fields[12][0]!=fields[12][1]) {
 984         value=strtoul(fields[12][0], &end, 16);
 985         if(end!=fields[12][1]) {
 986             log_err("error: syntax error in field 12 at code 0x%lx\n", c);
 987             return;
 988         }
 989         if((UChar32)value!=u_toupper(c)) {
 990             log_err("error: u_toupper(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_toupper(c), value);
 991         }
 992     } else {
 993         /* no case mapping: the API must map the code point to itself */
 994         if(c!=u_toupper(c)) {
 995             log_err("error: U+%04lx does not have an uppercase mapping but u_toupper()==U+%04lx\n", c, u_toupper(c));
 996         }
 997     }
 998
 999     /* get lowercase mapping, field 13 */
1000     if(fields[13][0]!=fields[13][1]) {
1001         value=strtoul(fields[13][0], &end, 16);
1002         if(end!=fields[13][1]) {
1003             log_err("error: syntax error in field 13 at code 0x%lx\n", c);
1004             return;
1005         }
1006         if((UChar32)value!=u_tolower(c)) {
1007             log_err("error: u_tolower(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_tolower(c), value);
1008         }
1009     } else {
1010         /* no case mapping: the API must map the code point to itself */
1011         if(c!=u_tolower(c)) {
1012             log_err("error: U+%04lx does not have a lowercase mapping but u_tolower()==U+%04lx\n", c, u_tolower(c));
1013         }
1014     }
1015
1016     /* get titlecase mapping, field 14 */
1017     if(fields[14][0]!=fields[14][1]) {
1018         value=strtoul(fields[14][0], &end, 16);
1019         if(end!=fields[14][1]) {
1020             log_err("error: syntax error in field 14 at code 0x%lx\n", c);
1021             return;
1022         }
1023         if((UChar32)value!=u_totitle(c)) {
1024             log_err("error: u_totitle(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_totitle(c), value);
1025         }
1026     } else {
1027         /* no case mapping: the API must map the code point to itself */
1028         if(c!=u_totitle(c)) {
1029             log_err("error: U+%04lx does not have a titlecase mapping but u_totitle()==U+%04lx\n", c, u_totitle(c));
1030         }
1031     }
1032 }
1033
1034 static UBool U_CALLCONV
1035 enumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) {
1036     static const UChar32 test[][2]={
1037         {0x41, U_UPPERCASE_LETTER},
1038         {0x308, U_NON_SPACING_MARK},
1039         {0xfffe, U_GENERAL_OTHER_TYPES},
1040         {0xe0041, U_FORMAT_CHAR},
1041         {0xeffff, U_UNASSIGNED}
1042     };
1043
1044     int32_t i, count;
1045
1046     if(0!=strcmp((const char *)context, "a1")) {
1047         log_err("error: u_enumCharTypes() passes on an incorrect context pointer\n");
1048         return FALSE;
1049     }
1050
1051     count=LENGTHOF(test);
1052     for(i=0; i<count; ++i) {
1053         if(start<=test[i][0] && test[i][0]<limit) {
1054             if(type!=(UCharCategory)test[i][1]) {
1055                 log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld instead of U+%04lx with %ld\n",
1056                         start, limit, (long)type, test[i][0], test[i][1]);
1057             }
1058             /* stop at the range that includes the last test code point (increases code coverage for enumeration) */
1059             return i==(count-1) ? FALSE : TRUE;
1060         }
1061     }
1062
1063     if(start>test[count-1][0]) {
1064         log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld after it should have stopped\n",
1065                 start, limit, (long)type);
1066         return FALSE;
1067     }
1068
1069     return TRUE;
1070 }
1071
1072 static UBool U_CALLCONV
1073 enumDefaultsRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) {
1074     /* default Bidi classes for unassigned code points */
1075     static const int32_t defaultBidi[][2]={ /* { limit, class } */
1076         { 0x0590, U_LEFT_TO_RIGHT },
1077         { 0x0600, U_RIGHT_TO_LEFT },
1078         { 0x07C0, U_RIGHT_TO_LEFT_ARABIC },
1079         { 0x0900, U_RIGHT_TO_LEFT },
1080         { 0xFB1D, U_LEFT_TO_RIGHT },
1081         { 0xFB50, U_RIGHT_TO_LEFT },
1082         { 0xFE00, U_RIGHT_TO_LEFT_ARABIC },
1083         { 0xFE70, U_LEFT_TO_RIGHT },
1084         { 0xFF00, U_RIGHT_TO_LEFT_ARABIC },
1085         { 0x10800, U_LEFT_TO_RIGHT },
1086         { 0x11000, U_RIGHT_TO_LEFT },
1087         { 0x110000, U_LEFT_TO_RIGHT }
1088     };
1089
1090     UChar32 c;
1091     int32_t i;
1092     UCharDirection shouldBeDir;
1093
1094     /*
1095      * LineBreak.txt specifies:
1096      *   #  - Assigned characters that are not listed explicitly are given the value
1097      *   #    "AL".
1098      *   #  - Unassigned characters are given the value "XX".
1099      *
1100      * PUA characters are listed explicitly with "XX".
1101      * Verify that no assigned character has "XX".
1102      */
1103     if(type!=U_UNASSIGNED && type!=U_PRIVATE_USE_CHAR) {
1104         c=start;
1105         while(c<limit) {
1106             if(0==u_getIntPropertyValue(c, UCHAR_LINE_BREAK)) {
1107                 log_err("error UCHAR_LINE_BREAK(assigned U+%04lx)=XX\n", c);
1108             }
1109             ++c;
1110         }
1111     }
1112
1113     /*
1114      * Verify default Bidi classes.
1115      * For recent Unicode versions, see UCD.html.
1116      *
1117      * For older Unicode versions:
1118      * See table 3-7 "Bidirectional Character Types" in UAX #9.
1119      * http://www.unicode.org/reports/tr9/
1120      *
1121      * See also DerivedBidiClass.txt for Cn code points!
1122      *
1123      * Unicode 4.0.1/Public Review Issue #28 (http://www.unicode.org/review/resolved-pri.html)
1124      * changed some default values.
1125      * In particular, non-characters and unassigned Default Ignorable Code Points
1126      * change from L to BN.
1127      *
1128      * UCD.html version 4.0.1 does not yet reflect these changes.
1129      */
1130     if(type==U_UNASSIGNED || type==U_PRIVATE_USE_CHAR) {
1131         /* enumerate the intersections of defaultBidi ranges with [start..limit[ */
1132         c=start;
1133         for(i=0; i<LENGTHOF(defaultBidi) && c<limit; ++i) {
1134             if((int32_t)c<defaultBidi[i][0]) {
1135                 while(c<limit && (int32_t)c<defaultBidi[i][0]) {
1136                     if(U_IS_UNICODE_NONCHAR(c) || u_hasBinaryProperty(c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT)) {
1137                         shouldBeDir=U_BOUNDARY_NEUTRAL;
1138                     } else {
1139                         shouldBeDir=(UCharDirection)defaultBidi[i][1];
1140                     }
1141
1142                     if( u_charDirection(c)!=shouldBeDir ||
1143                         u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)!=shouldBeDir
1144                     ) {
1145                         log_err("error: u_charDirection(unassigned/PUA U+%04lx)=%s should be %s\n",
1146                             c, dirStrings[u_charDirection(c)], dirStrings[shouldBeDir]);
1147                     }
1148                     ++c;
1149                 }
1150             }
1151         }
1152     }
1153
1154     return TRUE;
1155 }
1156
1157 /* tests for several properties */
1158 static void TestUnicodeData()
1159 {
1160     char newPath[256];
1161     char backupPath[256];
1162     UVersionInfo expectVersionArray;
1163     UVersionInfo versionArray;
1164     char *fields[15][2];
1165     UErrorCode errorCode;
1166     UChar32 c;
1167     int8_t type;
1168
1169     /* Look inside ICU_DATA first */
1170     strcpy(newPath, u_getDataDirectory());
1171     strcat(newPath, ".." U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING "UnicodeData.txt");
1172
1173     /* As a fallback, try to guess where the source data was located
1174      *    at the time ICU was built, and look there.
1175      */
1176     strcpy(backupPath, ctest_dataSrcDir());
1177     strcat(backupPath, U_FILE_SEP_STRING);
1178     strcat(backupPath, "unidata" U_FILE_SEP_STRING "UnicodeData.txt");
1179
1180     u_versionFromString(expectVersionArray, U_UNICODE_VERSION);
1181     u_getUnicodeVersion(versionArray);
1182     if(memcmp(versionArray, expectVersionArray, U_MAX_VERSION_LENGTH) != 0)
1183     {
1184         log_err("Testing u_getUnicodeVersion() - expected " U_UNICODE_VERSION " got %d.%d.%d.%d\n",
1185         versionArray[0], versionArray[1], versionArray[2], versionArray[3]);
1186     }
1187
1188 #if defined(ICU_UNICODE_VERSION)
1189     /* test only happens where we have configure.in with UNICODE_VERSION - sanity check. */
1190     if(strcmp(U_UNICODE_VERSION, ICU_UNICODE_VERSION))
1191     {
1192          log_err("Testing configure.in's ICU_UNICODE_VERSION - expected " U_UNICODE_VERSION " got " ICU_UNICODE_VERSION "\n");
1193     }
1194 #endif
1195
1196     if (ublock_getCode((UChar)0x0041) != UBLOCK_BASIC_LATIN || u_getIntPropertyValue(0x41, UCHAR_BLOCK)!=(int32_t)UBLOCK_BASIC_LATIN) {
1197         log_err("ublock_getCode(U+0041) property failed! Expected : %i Got: %i \n", UBLOCK_BASIC_LATIN,ublock_getCode((UChar)0x0041));
1198     }
1199
1200     errorCode=U_ZERO_ERROR;
1201     u_parseDelimitedFile(newPath, ';', fields, 15, unicodeDataLineFn, NULL, &errorCode);
1202     if(errorCode==U_FILE_ACCESS_ERROR) {
1203         errorCode=U_ZERO_ERROR;
1204         u_parseDelimitedFile(backupPath, ';', fields, 15, unicodeDataLineFn, NULL, &errorCode);
1205     }
1206     if(U_FAILURE(errorCode)) {
1207         log_err("error parsing UnicodeData.txt: %s\n", u_errorName(errorCode));
1208         return; /* if we couldn't parse UnicodeData.txt, we should return */
1209     }
1210
1211     /* sanity check on repeated properties */
1212     for(c=0xfffe; c<=0x10ffff;) {
1213         type=u_charType(c);
1214         if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
1215             log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
1216         }
1217         if(type!=U_UNASSIGNED) {
1218             log_err("error: u_charType(U+%04lx)!=U_UNASSIGNED (returns %d)\n", c, u_charType(c));
1219         }
1220         if((c&0xffff)==0xfffe) {
1221             ++c;
1222         } else {
1223             c+=0xffff;
1224         }
1225     }
1226
1227     /* test that PUA is not "unassigned" */
1228     for(c=0xe000; c<=0x10fffd;) {
1229         type=u_charType(c);
1230         if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
1231             log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
1232         }
1233         if(type==U_UNASSIGNED) {
1234             log_err("error: u_charType(U+%04lx)==U_UNASSIGNED\n", c);
1235         } else if(type!=U_PRIVATE_USE_CHAR) {
1236             log_verbose("PUA override: u_charType(U+%04lx)=%d\n", c, type);
1237         }
1238         if(c==0xf8ff) {
1239             c=0xf0000;
1240         } else if(c==0xffffd) {
1241             c=0x100000;
1242         } else {
1243             ++c;
1244         }
1245     }
1246
1247     /* test u_enumCharTypes() */
1248     u_enumCharTypes(enumTypeRange, "a1");
1249
1250     /* check default properties */
1251     u_enumCharTypes(enumDefaultsRange, NULL);
1252 }
1253
1254 static void TestCodeUnit(){
1255     const UChar codeunit[]={0x0000,0xe065,0x20ac,0xd7ff,0xd800,0xd841,0xd905,0xdbff,0xdc00,0xdc02,0xddee,0xdfff,0};
1256
1257     int32_t i;
1258
1259     for(i=0; i<(int32_t)(sizeof(codeunit)/sizeof(codeunit[0])); i++){
1260         UChar c=codeunit[i];
1261         if(i<4){
1262             if(!(UTF_IS_SINGLE(c)) || (UTF_IS_LEAD(c)) || (UTF_IS_TRAIL(c)) ||(UTF_IS_SURROGATE(c))){
1263                 log_err("ERROR: U+%04x is a single", c);
1264             }
1265
1266         }
1267         if(i >= 4 && i< 8){
1268             if(!(UTF_IS_LEAD(c)) || UTF_IS_SINGLE(c) || UTF_IS_TRAIL(c) || !(UTF_IS_SURROGATE(c))){
1269                 log_err("ERROR: U+%04x is a first surrogate", c);
1270             }
1271         }
1272         if(i >= 8 && i< 12){
1273             if(!(UTF_IS_TRAIL(c)) || UTF_IS_SINGLE(c) || UTF_IS_LEAD(c) || !(UTF_IS_SURROGATE(c))){
1274                 log_err("ERROR: U+%04x is a second surrogate", c);
1275             }
1276         }
1277     }
1278
1279 }
1280
1281 static void TestCodePoint(){
1282     const UChar32 codePoint[]={
1283         /*surrogate, notvalid(codepoint), not a UnicodeChar, not Error */
1284         0xd800,
1285         0xdbff,
1286         0xdc00,
1287         0xdfff,
1288         0xdc04,
1289         0xd821,
1290         /*not a surrogate, valid, isUnicodeChar , not Error*/
1291         0x20ac,
1292         0xd7ff,
1293         0xe000,
1294         0xe123,
1295         0x0061,
1296         0xe065,
1297         0x20402,
1298         0x24506,
1299         0x23456,
1300         0x20402,
1301         0x10402,
1302         0x23456,
1303         /*not a surrogate, not valid, isUnicodeChar, isError */
1304         0x0015,
1305         0x009f,
1306         /*not a surrogate, not valid, not isUnicodeChar, isError */
1307         0xffff,
1308         0xfffe,
1309     };
1310     int32_t i;
1311     for(i=0; i<(int32_t)(sizeof(codePoint)/sizeof(codePoint[0])); i++){
1312         UChar32 c=codePoint[i];
1313         if(i<6){
1314             if(!UTF_IS_SURROGATE(c) || !U_IS_SURROGATE(c) || !U16_IS_SURROGATE(c)){
1315                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1316             }
1317             if(UTF_IS_VALID(c)){
1318                 log_err("ERROR: isValid() failed for U+%04x\n", c);
1319             }
1320             if(UTF_IS_UNICODE_CHAR(c) || U_IS_UNICODE_CHAR(c)){
1321                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1322             }
1323             if(UTF_IS_ERROR(c)){
1324                 log_err("ERROR: isError() failed for U+%04x\n", c);
1325             }
1326         }else if(i >=6 && i<18){
1327             if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
1328                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1329             }
1330             if(!UTF_IS_VALID(c)){
1331                 log_err("ERROR: isValid() failed for U+%04x\n", c);
1332             }
1333             if(!UTF_IS_UNICODE_CHAR(c) || !U_IS_UNICODE_CHAR(c)){
1334                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1335             }
1336             if(UTF_IS_ERROR(c)){
1337                 log_err("ERROR: isError() failed for U+%04x\n", c);
1338             }
1339         }else if(i >=18 && i<20){
1340             if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
1341                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1342             }
1343             if(UTF_IS_VALID(c)){
1344                 log_err("ERROR: isValid() failed for U+%04x\n", c);
1345             }
1346             if(!UTF_IS_UNICODE_CHAR(c) || !U_IS_UNICODE_CHAR(c)){
1347                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1348             }
1349             if(!UTF_IS_ERROR(c)){
1350                 log_err("ERROR: isError() failed for U+%04x\n", c);
1351             }
1352         }
1353         else if(i >=18 && i<(int32_t)(sizeof(codePoint)/sizeof(codePoint[0]))){
1354             if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
1355                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1356             }
1357             if(UTF_IS_VALID(c)){
1358                 log_err("ERROR: isValid() failed for U+%04x\n", c);
1359             }
1360             if(UTF_IS_UNICODE_CHAR(c) || U_IS_UNICODE_CHAR(c)){
1361                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1362             }
1363             if(!UTF_IS_ERROR(c)){
1364                 log_err("ERROR: isError() failed for U+%04x\n", c);
1365             }
1366         }
1367     }
1368
1369     if(
1370         !U_IS_BMP(0) || !U_IS_BMP(0x61) || !U_IS_BMP(0x20ac) ||
1371         !U_IS_BMP(0xd9da) || !U_IS_BMP(0xdfed) || !U_IS_BMP(0xffff) ||
1372         U_IS_BMP(U_SENTINEL) || U_IS_BMP(0x10000) || U_IS_BMP(0x50005) ||
1373         U_IS_BMP(0x10ffff) || U_IS_BMP(0x110000) || U_IS_BMP(0x7fffffff)
1374     ) {
1375         log_err("error with U_IS_BMP()\n");
1376     }
1377
1378     if(
1379         U_IS_SUPPLEMENTARY(0) || U_IS_SUPPLEMENTARY(0x61) || U_IS_SUPPLEMENTARY(0x20ac) ||
1380         U_IS_SUPPLEMENTARY(0xd9da) || U_IS_SUPPLEMENTARY(0xdfed) || U_IS_SUPPLEMENTARY(0xffff) ||
1381         U_IS_SUPPLEMENTARY(U_SENTINEL) || !U_IS_SUPPLEMENTARY(0x10000) || !U_IS_SUPPLEMENTARY(0x50005) ||
1382         !U_IS_SUPPLEMENTARY(0x10ffff) || U_IS_SUPPLEMENTARY(0x110000) || U_IS_SUPPLEMENTARY(0x7fffffff)
1383     ) {
1384         log_err("error with U_IS_SUPPLEMENTARY()\n");
1385     }
1386 }
1387
1388 static void TestCharLength()
1389 {
1390     const int32_t codepoint[]={
1391         1, 0x0061,
1392         1, 0xe065,
1393         1, 0x20ac,
1394         2, 0x20402,
1395         2, 0x23456,
1396         2, 0x24506,
1397         2, 0x20402,
1398         2, 0x10402,
1399         1, 0xd7ff,
1400         1, 0xe000
1401     };
1402
1403     int32_t i;
1404     UBool multiple;
1405     for(i=0; i<(int32_t)(sizeof(codepoint)/sizeof(codepoint[0])); i=(int16_t)(i+2)){
1406         UChar32 c=codepoint[i+1];
1407         if(UTF_CHAR_LENGTH(c) != codepoint[i] || U16_LENGTH(c) != codepoint[i]){
1408             log_err("The no: of code units for U+%04x:- Expected: %d Got: %d\n", c, codepoint[i], UTF_CHAR_LENGTH(c));
1409         }
1410         multiple=(UBool)(codepoint[i] == 1 ? FALSE : TRUE);
1411         if(UTF_NEED_MULTIPLE_UCHAR(c) != multiple){
1412             log_err("ERROR: Unicode::needMultipleUChar() failed for U+%04x\n", c);
1413         }
1414     }
1415 }
1416
1417 /*internal functions ----*/
1418 static int32_t MakeProp(char* str)
1419 {
1420     int32_t result = 0;
1421     char* matchPosition =0;
1422
1423     matchPosition = strstr(tagStrings, str);
1424     if (matchPosition == 0)
1425     {
1426         log_err("unrecognized type letter ");
1427         log_err(str);
1428     }
1429     else
1430         result = (int32_t)((matchPosition - tagStrings) / 2);
1431     return result;
1432 }
1433
1434 static int32_t MakeDir(char* str)
1435 {
1436     int32_t pos = 0;
1437     for (pos = 0; pos < 19; pos++) {
1438         if (strcmp(str, dirStrings[pos]) == 0) {
1439             return pos;
1440         }
1441     }
1442     return -1;
1443 }
1444
1445 /* test u_charName() -------------------------------------------------------- */
1446
1447 static const struct {
1448     uint32_t code;
1449     const char *name, *oldName, *extName;
1450 } names[]={
1451     {0x0061, "LATIN SMALL LETTER A", "", "LATIN SMALL LETTER A"},
1452     {0x0284, "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK", "LATIN SMALL LETTER DOTLESS J BAR HOOK", "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK" },
1453     {0x3401, "CJK UNIFIED IDEOGRAPH-3401", "", "CJK UNIFIED IDEOGRAPH-3401" },
1454     {0x7fed, "CJK UNIFIED IDEOGRAPH-7FED", "", "CJK UNIFIED IDEOGRAPH-7FED" },
1455     {0xac00, "HANGUL SYLLABLE GA", "", "HANGUL SYLLABLE GA" },
1456     {0xd7a3, "HANGUL SYLLABLE HIH", "", "HANGUL SYLLABLE HIH" },
1457     {0xd800, "", "", "<lead surrogate-D800>" },
1458     {0xdc00, "", "", "<trail surrogate-DC00>" },
1459     {0xff08, "FULLWIDTH LEFT PARENTHESIS", "FULLWIDTH OPENING PARENTHESIS", "FULLWIDTH LEFT PARENTHESIS" },
1460     {0xffe5, "FULLWIDTH YEN SIGN", "", "FULLWIDTH YEN SIGN" },
1461     {0xffff, "", "", "<noncharacter-FFFF>" },
1462     {0x23456, "CJK UNIFIED IDEOGRAPH-23456", "", "CJK UNIFIED IDEOGRAPH-23456" }
1463 };
1464
1465 static UBool
1466 enumCharNamesFn(void *context,
1467                 UChar32 code, UCharNameChoice nameChoice,
1468                 const char *name, int32_t length) {
1469     int32_t *pCount=(int32_t *)context;
1470     int i;
1471
1472     if(length<=0 || length!=(int32_t)strlen(name)) {
1473         /* should not be called with an empty string or invalid length */
1474         log_err("u_enumCharName(0x%lx)=%s but length=%ld\n", name, length);
1475         return TRUE;
1476     }
1477
1478     ++*pCount;
1479     for(i=0; i<sizeof(names)/sizeof(names[0]); ++i) {
1480         if(code==(UChar32)names[i].code) {
1481             switch (nameChoice) {
1482                 case U_EXTENDED_CHAR_NAME:
1483                     if(0!=strcmp(name, names[i].extName)) {
1484                         log_err("u_enumCharName(0x%lx - Extended)=%s instead of %s\n", code, name, names[i].extName);
1485                     }
1486                     break;
1487                 case U_UNICODE_CHAR_NAME:
1488                     if(0!=strcmp(name, names[i].name)) {
1489                         log_err("u_enumCharName(0x%lx)=%s instead of %s\n", code, name, names[i].name);
1490                     }
1491                     break;
1492                 case U_UNICODE_10_CHAR_NAME:
1493                     if(names[i].oldName[0]==0 || 0!=strcmp(name, names[i].oldName)) {
1494                         log_err("u_enumCharName(0x%lx - 1.0)=%s instead of %s\n", code, name, names[i].oldName);
1495                     }
1496                     break;
1497                 case U_CHAR_NAME_CHOICE_COUNT:
1498                     break;
1499             }
1500             break;
1501         }
1502     }
1503     return TRUE;
1504 }
1505
1506 struct enumExtCharNamesContext {
1507     uint32_t length;
1508     int32_t last;
1509 };
1510
1511 static UBool
1512 enumExtCharNamesFn(void *context,
1513                 UChar32 code, UCharNameChoice nameChoice,
1514                 const char *name, int32_t length) {
1515     struct enumExtCharNamesContext *ecncp = (struct enumExtCharNamesContext *) context;
1516
1517     if (ecncp->last != (int32_t) code - 1) {
1518         if (ecncp->last < 0) {
1519             log_err("u_enumCharName(0x%lx - Ext) after u_enumCharName(0x%lx - Ext) instead of u_enumCharName(0x%lx - Ext)\n", code, ecncp->last, ecncp->last + 1);
1520         } else {
1521             log_err("u_enumCharName(0x%lx - Ext) instead of u_enumCharName(0x0 - Ext)\n", code);
1522         }
1523     }
1524     ecncp->last = (int32_t) code;
1525
1526     if (!*name) {
1527         log_err("u_enumCharName(0x%lx - Ext) should not be an empty string\n", code);
1528     }
1529
1530     return enumCharNamesFn(&ecncp->length, code, nameChoice, name, length);
1531 }
1532
1533 /**
1534  * This can be made more efficient by moving it into putil.c and having
1535  * it directly access the ebcdic translation tables.
1536  * TODO: If we get this method in putil.c, then delete it from here.
1537  */
1538 static UChar
1539 u_charToUChar(char c) {
1540     UChar uc;
1541     u_charsToUChars(&c, &uc, 1);
1542     return uc;
1543 }
1544
1545 static void
1546 TestCharNames() {
1547     static char name[80];
1548     UErrorCode errorCode=U_ZERO_ERROR;
1549     struct enumExtCharNamesContext extContext;
1550     int32_t length;
1551     UChar32 c;
1552     int32_t i;
1553
1554     log_verbose("Testing uprv_getMaxCharNameLength()\n");
1555     length=uprv_getMaxCharNameLength();
1556     if(length==0) {
1557         /* no names data available */
1558         return;
1559     }
1560     if(length<83) { /* Unicode 3.2 max char name length */
1561         log_err("uprv_getMaxCharNameLength()=%d is too short");
1562     }
1563     /* ### TODO same tests for max ISO comment length as for max name length */
1564
1565     log_verbose("Testing u_charName()\n");
1566     for(i=0; i<(int32_t)(sizeof(names)/sizeof(names[0])); ++i) {
1567         /* modern Unicode character name */
1568         length=u_charName(names[i].code, U_UNICODE_CHAR_NAME, name, sizeof(name), &errorCode);
1569         if(U_FAILURE(errorCode)) {
1570             log_err("u_charName(0x%lx) error %s\n", names[i].code, u_errorName(errorCode));
1571             return;
1572         }
1573         if(length<0 || 0!=strcmp(name, names[i].name) || length!=(uint16_t)strlen(name)) {
1574             log_err("u_charName(0x%lx) gets: %s (length %ld) instead of: %s\n", names[i].code, name, length, names[i].name);
1575         }
1576
1577         /* find the modern name */
1578         if (*names[i].name) {
1579             c=u_charFromName(U_UNICODE_CHAR_NAME, names[i].name, &errorCode);
1580             if(U_FAILURE(errorCode)) {
1581                 log_err("u_charFromName(%s) error %s\n", names[i].name, u_errorName(errorCode));
1582                 return;
1583             }
1584             if(c!=(UChar32)names[i].code) {
1585                 log_err("u_charFromName(%s) gets 0x%lx instead of 0x%lx\n", names[i].name, c, names[i].code);
1586             }
1587         }
1588
1589         /* Unicode 1.0 character name */
1590         length=u_charName(names[i].code, U_UNICODE_10_CHAR_NAME, name, sizeof(name), &errorCode);
1591         if(U_FAILURE(errorCode)) {
1592             log_err("u_charName(0x%lx - 1.0) error %s\n", names[i].code, u_errorName(errorCode));
1593             return;
1594         }
1595         if(length<0 || (length>0 && 0!=strcmp(name, names[i].oldName)) || length!=(uint16_t)strlen(name)) {
1596             log_err("u_charName(0x%lx - 1.0) gets %s length %ld instead of nothing or %s\n", names[i].code, name, length, names[i].oldName);
1597         }
1598
1599         /* find the Unicode 1.0 name if it is stored (length>0 means that we could read it) */
1600         if(names[i].oldName[0]!=0 /* && length>0 */) {
1601             c=u_charFromName(U_UNICODE_10_CHAR_NAME, names[i].oldName, &errorCode);
1602             if(U_FAILURE(errorCode)) {
1603                 log_err("u_charFromName(%s - 1.0) error %s\n", names[i].oldName, u_errorName(errorCode));
1604                 return;
1605             }
1606             if(c!=(UChar32)names[i].code) {
1607                 log_err("u_charFromName(%s - 1.0) gets 0x%lx instead of 0x%lx\n", names[i].oldName, c, names[i].code);
1608             }
1609         }
1610     }
1611
1612     /* test u_enumCharNames() */
1613     length=0;
1614     errorCode=U_ZERO_ERROR;
1615     u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumCharNamesFn, &length, U_UNICODE_CHAR_NAME, &errorCode);
1616     if(U_FAILURE(errorCode) || length<94140) {
1617         log_err("u_enumCharNames(%ld..%lx) error %s names count=%ld\n", UCHAR_MIN_VALUE, UCHAR_MAX_VALUE, u_errorName(errorCode), length);
1618     }
1619
1620     extContext.length = 0;
1621     extContext.last = -1;
1622     errorCode=U_ZERO_ERROR;
1623     u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumExtCharNamesFn, &extContext, U_EXTENDED_CHAR_NAME, &errorCode);
1624     if(U_FAILURE(errorCode) || extContext.length<UCHAR_MAX_VALUE + 1) {
1625         log_err("u_enumCharNames(%ld..0x%lx - Extended) error %s names count=%ld\n", UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, u_errorName(errorCode), extContext.length);
1626     }
1627
1628     /* test that u_charFromName() uppercases the input name, i.e., works with mixed-case names (new in 2.0) */
1629     if(0x61!=u_charFromName(U_UNICODE_CHAR_NAME, "LATin smALl letTER A", &errorCode)) {
1630         log_err("u_charFromName(U_UNICODE_CHAR_NAME, \"LATin smALl letTER A\") did not find U+0061 (%s)\n", u_errorName(errorCode));
1631     }
1632
1633     /* Test getCharNameCharacters */
1634     if(!QUICK) {
1635         enum { BUFSIZE = 256 };
1636         UErrorCode ec = U_ZERO_ERROR;
1637         char buf[BUFSIZE];
1638         int32_t maxLength;
1639         UChar32 cp;
1640         UChar pat[BUFSIZE], dumbPat[BUFSIZE];
1641         int32_t l1, l2;
1642         UBool map[256];
1643         UBool ok;
1644
1645         USet* set = uset_open(1, 0); /* empty set */
1646         USet* dumb = uset_open(1, 0); /* empty set */
1647
1648         /*
1649          * uprv_getCharNameCharacters() will likely return more lowercase
1650          * letters than actual character names contain because
1651          * it includes all the characters in lowercased names of
1652          * general categories, for the full possible set of extended names.
1653          */
1654         {
1655             USetAdder sa={
1656                 NULL,
1657                 uset_add,
1658                 uset_addRange,
1659                 uset_addString
1660             };
1661             sa.set=set;
1662             uprv_getCharNameCharacters(&sa);
1663         }
1664
1665         /* build set the dumb (but sure-fire) way */
1666         for (i=0; i<256; ++i) {
1667             map[i] = FALSE;
1668         }
1669
1670         maxLength=0;
1671         for (cp=0; cp<0x110000; ++cp) {
1672             int32_t len = u_charName(cp, U_EXTENDED_CHAR_NAME,
1673                                      buf, BUFSIZE, &ec);
1674             if (U_FAILURE(ec)) {
1675                 log_err("FAIL: u_charName failed when it shouldn't\n");
1676                 uset_close(set);
1677                 uset_close(dumb);
1678                 return;
1679             }
1680             if(len>maxLength) {
1681                 maxLength=len;
1682             }
1683
1684             for (i=0; i<len; ++i) {
1685                 if (!map[(uint8_t) buf[i]]) {
1686                     uset_add(dumb, (UChar32)u_charToUChar(buf[i]));
1687                     map[(uint8_t) buf[i]] = TRUE;
1688                 }
1689             }
1690
1691             /* test for leading/trailing whitespace */
1692             if(buf[0]==' ' || buf[0]=='\t' || buf[len-1]==' ' || buf[len-1]=='\t') {
1693                 log_err("u_charName(U+%04x) returns a name with leading or trailing whitespace\n", cp);
1694             }
1695         }
1696
1697         if(map[(uint8_t)'\t']) {
1698             log_err("u_charName() returned a name with a TAB for some code point\n", cp);
1699         }
1700
1701         length=uprv_getMaxCharNameLength();
1702         if(length!=maxLength) {
1703             log_err("uprv_getMaxCharNameLength()=%d differs from the maximum length %d of all extended names\n",
1704                     length, maxLength);
1705         }
1706
1707         /* compare the sets.  Where is my uset_equals?!! */
1708         ok=TRUE;
1709         for(i=0; i<256; ++i) {
1710             if(uset_contains(set, i)!=uset_contains(dumb, i)) {
1711                 if(0x61<=i && i<=0x7a /* a-z */ && uset_contains(set, i) && !uset_contains(dumb, i)) {
1712                     /* ignore lowercase a-z that are in set but not in dumb */
1713                     ok=TRUE;
1714                 } else {
1715                     ok=FALSE;
1716                     break;
1717                 }
1718             }
1719         }
1720
1721         l1 = uset_toPattern(set, pat, BUFSIZE, TRUE, &ec);
1722         l2 = uset_toPattern(dumb, dumbPat, BUFSIZE, TRUE, &ec);
1723         if (U_FAILURE(ec)) {
1724             log_err("FAIL: uset_toPattern failed when it shouldn't\n");
1725             uset_close(set);
1726             uset_close(dumb);
1727             return;
1728         }
1729
1730         if (l1 >= BUFSIZE) {
1731             l1 = BUFSIZE-1;
1732             pat[l1] = 0;
1733         }
1734         if (l2 >= BUFSIZE) {
1735             l2 = BUFSIZE-1;
1736             dumbPat[l2] = 0;
1737         }
1738
1739         if (!ok) {
1740             log_err("FAIL: uprv_getCharNameCharacters() returned %s, expected %s (too many lowercase a-z are ok)\n",
1741                     aescstrdup(pat, l1), aescstrdup(dumbPat, l2));
1742         } else if(VERBOSITY) {
1743             log_verbose("Ok: uprv_getCharNameCharacters() returned %s\n", aescstrdup(pat, l1));
1744         }
1745
1746         uset_close(set);
1747         uset_close(dumb);
1748     }
1749
1750     /* ### TODO: test error cases and other interesting things */
1751 }
1752
1753 /* test u_isMirrored() and u_charMirror() ----------------------------------- */
1754
1755 static void
1756 TestMirroring() {
1757     log_verbose("Testing u_isMirrored()\n");
1758     if(!(u_isMirrored(0x28) && u_isMirrored(0xbb) && u_isMirrored(0x2045) && u_isMirrored(0x232a) &&
1759          !u_isMirrored(0x27) && !u_isMirrored(0x61) && !u_isMirrored(0x284) && !u_isMirrored(0x3400)
1760         )
1761     ) {
1762         log_err("u_isMirrored() does not work correctly\n");
1763     }
1764
1765     log_verbose("Testing u_charMirror()\n");
1766     if(!(u_charMirror(0x3c)==0x3e && u_charMirror(0x5d)==0x5b && u_charMirror(0x208d)==0x208e && u_charMirror(0x3017)==0x3016 &&
1767          u_charMirror(0x2e)==0x2e && u_charMirror(0x6f3)==0x6f3 && u_charMirror(0x301c)==0x301c && u_charMirror(0xa4ab)==0xa4ab
1768          )
1769     ) {
1770         log_err("u_charMirror() does not work correctly\n");
1771     }
1772 }
1773
1774
1775 struct RunTestData
1776 {
1777     const char *runText;
1778     UScriptCode runCode;
1779 };
1780
1781 typedef struct RunTestData RunTestData;
1782
1783 static void
1784 CheckScriptRuns(UScriptRun *scriptRun, int32_t *runStarts, const RunTestData *testData, int32_t nRuns,
1785                 const char *prefix)
1786 {
1787     int32_t run, runStart, runLimit;
1788     UScriptCode runCode;
1789
1790     /* iterate over all the runs */
1791     run = 0;
1792     while (uscript_nextRun(scriptRun, &runStart, &runLimit, &runCode)) {
1793         if (runStart != runStarts[run]) {
1794             log_err("%s: incorrect start offset for run %d: expected %d, got %d\n",
1795                 prefix, run, runStarts[run], runStart);
1796         }
1797
1798         if (runLimit != runStarts[run + 1]) {
1799             log_err("%s: incorrect limit offset for run %d: expected %d, got %d\n",
1800                 prefix, run, runStarts[run + 1], runLimit);
1801         }
1802
1803         if (runCode != testData[run].runCode) {
1804             log_err("%s: incorrect script for run %d: expected \"%s\", got \"%s\"\n",
1805                 prefix, run, uscript_getName(testData[run].runCode), uscript_getName(runCode));
1806         }
1807
1808         run += 1;
1809
1810         /* stop when we've seen all the runs we expect to see */
1811         if (run >= nRuns) {
1812             break;
1813         }
1814     }
1815
1816     /* Complain if we didn't see then number of runs we expected */
1817     if (run != nRuns) {
1818         log_err("%s: incorrect number of runs: expected %d, got %d\n", prefix, run, nRuns);
1819     }
1820 }
1821
1822 static void
1823 TestUScriptRunAPI()
1824 {
1825     static const RunTestData testData1[] = {
1826         {"\\u0020\\u0946\\u0939\\u093F\\u0928\\u094D\\u0926\\u0940\\u0020", USCRIPT_DEVANAGARI},
1827         {"\\u0627\\u0644\\u0639\\u0631\\u0628\\u064A\\u0629\\u0020", USCRIPT_ARABIC},
1828         {"\\u0420\\u0443\\u0441\\u0441\\u043A\\u0438\\u0439\\u0020", USCRIPT_CYRILLIC},
1829         {"English (", USCRIPT_LATIN},
1830         {"\\u0E44\\u0E17\\u0E22", USCRIPT_THAI},
1831         {") ", USCRIPT_LATIN},
1832         {"\\u6F22\\u5B75", USCRIPT_HAN},
1833         {"\\u3068\\u3072\\u3089\\u304C\\u306A\\u3068", USCRIPT_HIRAGANA},
1834         {"\\u30AB\\u30BF\\u30AB\\u30CA", USCRIPT_KATAKANA},
1835         {"\\U00010400\\U00010401\\U00010402\\U00010403", USCRIPT_DESERET}
1836     };
1837
1838     static const RunTestData testData2[] = {
1839        {"((((((((((abc))))))))))", USCRIPT_LATIN}
1840     };
1841
1842     static const struct {
1843       const RunTestData *testData;
1844       int32_t nRuns;
1845     } testDataEntries[] = {
1846         {testData1, LENGTHOF(testData1)},
1847         {testData2, LENGTHOF(testData2)}
1848     };
1849
1850     static const int32_t nTestEntries = LENGTHOF(testDataEntries);
1851     int32_t testEntry;
1852
1853     for (testEntry = 0; testEntry < nTestEntries; testEntry += 1) {
1854         UChar testString[1024];
1855         int32_t runStarts[256];
1856         int32_t nTestRuns = testDataEntries[testEntry].nRuns;
1857         const RunTestData *testData = testDataEntries[testEntry].testData;
1858
1859         int32_t run, stringLimit;
1860         UScriptRun *scriptRun = NULL;
1861         UErrorCode err;
1862
1863         /*
1864          * Fill in the test string and the runStarts array.
1865          */
1866         stringLimit = 0;
1867         for (run = 0; run < nTestRuns; run += 1) {
1868             runStarts[run] = stringLimit;
1869             stringLimit += u_unescape(testData[run].runText, &testString[stringLimit], 1024 - stringLimit);
1870             /*stringLimit -= 1;*/
1871         }
1872
1873         /* The limit of the last run */
1874         runStarts[nTestRuns] = stringLimit;
1875
1876         /*
1877          * Make sure that calling uscript_OpenRun with a NULL text pointer
1878          * and a non-zero text length returns the correct error.
1879          */
1880         err = U_ZERO_ERROR;
1881         scriptRun = uscript_openRun(NULL, stringLimit, &err);
1882
1883         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
1884             log_err("uscript_openRun(NULL, stringLimit, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
1885         }
1886
1887         if (scriptRun != NULL) {
1888             log_err("uscript_openRun(NULL, stringLimit, &err) returned a non-NULL result.\n");
1889             uscript_closeRun(scriptRun);
1890         }
1891
1892         /*
1893          * Make sure that calling uscript_OpenRun with a non-NULL text pointer
1894          * and a zero text length returns the correct error.
1895          */
1896         err = U_ZERO_ERROR;
1897         scriptRun = uscript_openRun(testString, 0, &err);
1898
1899         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
1900             log_err("uscript_openRun(testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
1901         }
1902
1903         if (scriptRun != NULL) {
1904             log_err("uscript_openRun(testString, 0, &err) returned a non-NULL result.\n");
1905             uscript_closeRun(scriptRun);
1906         }
1907
1908         /*
1909          * Make sure that calling uscript_openRun with a NULL text pointer
1910          * and a zero text length doesn't return an error.
1911          */
1912         err = U_ZERO_ERROR;
1913         scriptRun = uscript_openRun(NULL, 0, &err);
1914
1915         if (U_FAILURE(err)) {
1916             log_err("Got error %s from uscript_openRun(NULL, 0, &err)\n", u_errorName(err));
1917         }
1918
1919         /* Make sure that the empty iterator doesn't find any runs */
1920         if (uscript_nextRun(scriptRun, NULL, NULL, NULL)) {
1921             log_err("uscript_nextRun(...) returned TRUE for an empty iterator.\n");
1922         }
1923
1924         /*
1925          * Make sure that calling uscript_setRunText with a NULL text pointer
1926          * and a non-zero text length returns the correct error.
1927          */
1928         err = U_ZERO_ERROR;
1929         uscript_setRunText(scriptRun, NULL, stringLimit, &err);
1930
1931         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
1932             log_err("uscript_setRunText(scriptRun, NULL, stringLimit, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
1933         }
1934
1935         /*
1936          * Make sure that calling uscript_OpenRun with a non-NULL text pointer
1937          * and a zero text length returns the correct error.
1938          */
1939         err = U_ZERO_ERROR;
1940         uscript_setRunText(scriptRun, testString, 0, &err);
1941
1942         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
1943             log_err("uscript_setRunText(scriptRun, testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
1944         }
1945
1946         /*
1947          * Now call uscript_setRunText on the empty iterator
1948          * and make sure that it works.
1949          */
1950         err = U_ZERO_ERROR;
1951         uscript_setRunText(scriptRun, testString, stringLimit, &err);
1952
1953         if (U_FAILURE(err)) {
1954             log_err("Got error %s from uscript_setRunText(...)\n", u_errorName(err));
1955         } else {
1956             CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_setRunText");
1957         }
1958
1959         uscript_closeRun(scriptRun);
1960
1961         /*
1962          * Now open an interator over the testString
1963          * using uscript_openRun and make sure that it works
1964          */
1965         scriptRun = uscript_openRun(testString, stringLimit, &err);
1966
1967         if (U_FAILURE(err)) {
1968             log_err("Got error %s from uscript_openRun(...)\n", u_errorName(err));
1969         } else {
1970             CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_openRun");
1971         }
1972
1973         /* Now reset the iterator, and make sure
1974          * that it still works.
1975          */
1976         uscript_resetRun(scriptRun);
1977
1978         CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_resetRun");
1979
1980         /* Close the iterator */
1981         uscript_closeRun(scriptRun);
1982     }
1983 }
1984
1985 /* test additional, non-core properties */
1986 static void
1987 TestAdditionalProperties() {
1988     /* test data for u_charAge() */
1989     static const struct {
1990         UChar32 c;
1991         UVersionInfo version;
1992     } charAges[]={
1993         {0x41,    { 1, 1, 0, 0 }},
1994         {0xffff,  { 1, 1, 0, 0 }},
1995         {0x20ab,  { 2, 0, 0, 0 }},
1996         {0x2fffe, { 2, 0, 0, 0 }},
1997         {0x20ac,  { 2, 1, 0, 0 }},
1998         {0xfb1d,  { 3, 0, 0, 0 }},
1999         {0x3f4,   { 3, 1, 0, 0 }},
2000         {0x10300, { 3, 1, 0, 0 }},
2001         {0x220,   { 3, 2, 0, 0 }},
2002         {0xff60,  { 3, 2, 0, 0 }}
2003     };
2004
2005     /* test data for u_hasBinaryProperty() */
2006     static int32_t
2007     props[][3]={ /* code point, property, value */
2008         { 0x0627, UCHAR_ALPHABETIC, TRUE },
2009         { 0x1034a, UCHAR_ALPHABETIC, TRUE },
2010         { 0x2028, UCHAR_ALPHABETIC, FALSE },
2011
2012         { 0x0066, UCHAR_ASCII_HEX_DIGIT, TRUE },
2013         { 0x0067, UCHAR_ASCII_HEX_DIGIT, FALSE },
2014
2015         { 0x202c, UCHAR_BIDI_CONTROL, TRUE },
2016         { 0x202f, UCHAR_BIDI_CONTROL, FALSE },
2017
2018         { 0x003c, UCHAR_BIDI_MIRRORED, TRUE },
2019         { 0x003d, UCHAR_BIDI_MIRRORED, FALSE },
2020
2021         { 0x058a, UCHAR_DASH, TRUE },
2022         { 0x007e, UCHAR_DASH, FALSE },
2023
2024         { 0x0c4d, UCHAR_DIACRITIC, TRUE },
2025         { 0x3000, UCHAR_DIACRITIC, FALSE },
2026
2027         { 0x0e46, UCHAR_EXTENDER, TRUE },
2028         { 0x0020, UCHAR_EXTENDER, FALSE },
2029
2030 #if !UCONFIG_NO_NORMALIZATION
2031         { 0xfb1d, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE },
2032         { 0x1d15f, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE },
2033         { 0xfb1e, UCHAR_FULL_COMPOSITION_EXCLUSION, FALSE },
2034
2035         { 0x110a, UCHAR_NFD_INERT, TRUE },      /* Jamo L */
2036         { 0x0308, UCHAR_NFD_INERT, FALSE },
2037
2038         { 0x1164, UCHAR_NFKD_INERT, TRUE },     /* Jamo V */
2039         { 0x1d79d, UCHAR_NFKD_INERT, FALSE },   /* math compat version of xi */
2040
2041         { 0x0021, UCHAR_NFC_INERT, TRUE },      /* ! */
2042         { 0x0061, UCHAR_NFC_INERT, FALSE },     /* a */
2043         { 0x00e4, UCHAR_NFC_INERT, FALSE },     /* a-umlaut */
2044         { 0x0102, UCHAR_NFC_INERT, FALSE },     /* a-breve */
2045         { 0xac1c, UCHAR_NFC_INERT, FALSE },     /* Hangul LV */
2046         { 0xac1d, UCHAR_NFC_INERT, TRUE },      /* Hangul LVT */
2047
2048         { 0x1d79d, UCHAR_NFKC_INERT, FALSE },   /* math compat version of xi */
2049         { 0x2a6d6, UCHAR_NFKC_INERT, TRUE },    /* Han, last of CJK ext. B */
2050
2051         { 0x00e4, UCHAR_SEGMENT_STARTER, TRUE },
2052         { 0x0308, UCHAR_SEGMENT_STARTER, FALSE },
2053         { 0x110a, UCHAR_SEGMENT_STARTER, TRUE }, /* Jamo L */
2054         { 0x1164, UCHAR_SEGMENT_STARTER, FALSE },/* Jamo V */
2055         { 0xac1c, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LV */
2056         { 0xac1d, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LVT */
2057 #endif
2058
2059         { 0x0044, UCHAR_HEX_DIGIT, TRUE },
2060         { 0xff46, UCHAR_HEX_DIGIT, TRUE },
2061         { 0x0047, UCHAR_HEX_DIGIT, FALSE },
2062
2063         { 0x30fb, UCHAR_HYPHEN, TRUE },
2064         { 0xfe58, UCHAR_HYPHEN, FALSE },
2065
2066         { 0x2172, UCHAR_ID_CONTINUE, TRUE },
2067         { 0x0307, UCHAR_ID_CONTINUE, TRUE },
2068         { 0x005c, UCHAR_ID_CONTINUE, FALSE },
2069
2070         { 0x2172, UCHAR_ID_START, TRUE },
2071         { 0x007a, UCHAR_ID_START, TRUE },
2072         { 0x0039, UCHAR_ID_START, FALSE },
2073
2074         { 0x4db5, UCHAR_IDEOGRAPHIC, TRUE },
2075         { 0x2f999, UCHAR_IDEOGRAPHIC, TRUE },
2076         { 0x2f99, UCHAR_IDEOGRAPHIC, FALSE },
2077
2078         { 0x200c, UCHAR_JOIN_CONTROL, TRUE },
2079         { 0x2029, UCHAR_JOIN_CONTROL, FALSE },
2080
2081         { 0x1d7bc, UCHAR_LOWERCASE, TRUE },
2082         { 0x0345, UCHAR_LOWERCASE, TRUE },
2083         { 0x0030, UCHAR_LOWERCASE, FALSE },
2084
2085         { 0x1d7a9, UCHAR_MATH, TRUE },
2086         { 0x2135, UCHAR_MATH, TRUE },
2087         { 0x0062, UCHAR_MATH, FALSE },
2088
2089         { 0xfde1, UCHAR_NONCHARACTER_CODE_POINT, TRUE },
2090         { 0x10ffff, UCHAR_NONCHARACTER_CODE_POINT, TRUE },
2091         { 0x10fffd, UCHAR_NONCHARACTER_CODE_POINT, FALSE },
2092
2093         { 0x0022, UCHAR_QUOTATION_MARK, TRUE },
2094         { 0xff62, UCHAR_QUOTATION_MARK, TRUE },
2095         { 0xd840, UCHAR_QUOTATION_MARK, FALSE },
2096
2097         { 0x061f, UCHAR_TERMINAL_PUNCTUATION, TRUE },
2098         { 0xe003f, UCHAR_TERMINAL_PUNCTUATION, FALSE },
2099
2100         { 0x1d44a, UCHAR_UPPERCASE, TRUE },
2101         { 0x2162, UCHAR_UPPERCASE, TRUE },
2102         { 0x0345, UCHAR_UPPERCASE, FALSE },
2103
2104         { 0x0020, UCHAR_WHITE_SPACE, TRUE },
2105         { 0x202f, UCHAR_WHITE_SPACE, TRUE },
2106         { 0x3001, UCHAR_WHITE_SPACE, FALSE },
2107
2108         { 0x0711, UCHAR_XID_CONTINUE, TRUE },
2109         { 0x1d1aa, UCHAR_XID_CONTINUE, TRUE },
2110         { 0x007c, UCHAR_XID_CONTINUE, FALSE },
2111
2112         { 0x16ee, UCHAR_XID_START, TRUE },
2113         { 0x23456, UCHAR_XID_START, TRUE },
2114         { 0x1d1aa, UCHAR_XID_START, FALSE },
2115
2116         /*
2117          * Version break:
2118          * The following properties are only supported starting with the
2119          * Unicode version indicated in the second field.
2120          */
2121         { -1, 0x320, 0 },
2122
2123         { 0x180c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE },
2124         { 0xfe02, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE },
2125         { 0x1801, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, FALSE },
2126
2127         { 0x0341, UCHAR_DEPRECATED, TRUE },
2128         { 0xe0041, UCHAR_DEPRECATED, FALSE },
2129
2130         { 0x00a0, UCHAR_GRAPHEME_BASE, TRUE },
2131         { 0x0a4d, UCHAR_GRAPHEME_BASE, FALSE },
2132         { 0xff9f, UCHAR_GRAPHEME_BASE, TRUE },      /* changed from Unicode 3.2 to 4 */
2133
2134         { 0x0300, UCHAR_GRAPHEME_EXTEND, TRUE },
2135         { 0xff9f, UCHAR_GRAPHEME_EXTEND, FALSE },   /* changed from Unicode 3.2 to 4 */
2136         { 0x0603, UCHAR_GRAPHEME_EXTEND, FALSE },
2137
2138         { 0x0a4d, UCHAR_GRAPHEME_LINK, TRUE },
2139         { 0xff9f, UCHAR_GRAPHEME_LINK, FALSE },
2140
2141         { 0x2ff7, UCHAR_IDS_BINARY_OPERATOR, TRUE },
2142         { 0x2ff3, UCHAR_IDS_BINARY_OPERATOR, FALSE },
2143
2144         { 0x2ff3, UCHAR_IDS_TRINARY_OPERATOR, TRUE },
2145         { 0x2f03, UCHAR_IDS_TRINARY_OPERATOR, FALSE },
2146
2147         { 0x0ec1, UCHAR_LOGICAL_ORDER_EXCEPTION, TRUE },
2148         { 0xdcba, UCHAR_LOGICAL_ORDER_EXCEPTION, FALSE },
2149
2150         { 0x2e9b, UCHAR_RADICAL, TRUE },
2151         { 0x4e00, UCHAR_RADICAL, FALSE },
2152
2153         { 0x012f, UCHAR_SOFT_DOTTED, TRUE },
2154         { 0x0049, UCHAR_SOFT_DOTTED, FALSE },
2155
2156         { 0xfa11, UCHAR_UNIFIED_IDEOGRAPH, TRUE },
2157         { 0xfa12, UCHAR_UNIFIED_IDEOGRAPH, FALSE },
2158
2159         { -1, 0x401, 0 },
2160
2161         { 0x002e, UCHAR_S_TERM, TRUE },
2162         { 0x0061, UCHAR_S_TERM, FALSE },
2163
2164         { 0x180c, UCHAR_VARIATION_SELECTOR, TRUE },
2165         { 0xfe03, UCHAR_VARIATION_SELECTOR, TRUE },
2166         { 0xe01ef, UCHAR_VARIATION_SELECTOR, TRUE },
2167         { 0xe0200, UCHAR_VARIATION_SELECTOR, FALSE },
2168
2169         /* enum/integer type properties */
2170
2171         /* UCHAR_BIDI_CLASS tested for assigned characters in TestUnicodeData() */
2172         /* test default Bidi classes for unassigned code points */
2173         { 0x0590, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2174         { 0x05a2, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2175         { 0x05ed, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2176         { 0x07f2, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2177         { 0x08ba, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2178         { 0xfb37, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2179         { 0xfb42, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2180         { 0x10806, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2181         { 0x10909, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2182         { 0x10fe4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2183
2184         { 0x0606, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2185         { 0x061c, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2186         { 0x063f, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2187         { 0x070e, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2188         { 0x0775, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2189         { 0xfbc2, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2190         { 0xfd90, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2191         { 0xfefe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2192
2193         { 0x02AF, UCHAR_BLOCK, UBLOCK_IPA_EXTENSIONS },
2194         { 0x0C4E, UCHAR_BLOCK, UBLOCK_TELUGU },
2195         { 0x155A, UCHAR_BLOCK, UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS },
2196         { 0x1717, UCHAR_BLOCK, UBLOCK_TAGALOG },
2197         { 0x1900, UCHAR_BLOCK, UBLOCK_LIMBU },
2198         { 0x1AFF, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
2199         { 0x3040, UCHAR_BLOCK, UBLOCK_HIRAGANA },
2200         { 0x1D0FF, UCHAR_BLOCK, UBLOCK_BYZANTINE_MUSICAL_SYMBOLS },
2201         { 0x50000, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
2202         { 0xEFFFF, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
2203         { 0x10D0FF, UCHAR_BLOCK, UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B },
2204
2205         /* UCHAR_CANONICAL_COMBINING_CLASS tested for assigned characters in TestUnicodeData() */
2206         { 0xd7d7, UCHAR_CANONICAL_COMBINING_CLASS, 0 },
2207
2208         { 0x00A0, UCHAR_DECOMPOSITION_TYPE, U_DT_NOBREAK },
2209         { 0x00A8, UCHAR_DECOMPOSITION_TYPE, U_DT_COMPAT },
2210         { 0x00bf, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE },
2211         { 0x00c0, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
2212         { 0x1E9B, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
2213         { 0xBCDE, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
2214         { 0xFB5D, UCHAR_DECOMPOSITION_TYPE, U_DT_MEDIAL },
2215         { 0x1D736, UCHAR_DECOMPOSITION_TYPE, U_DT_FONT },
2216         { 0xe0033, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE },
2217
2218         { 0x0009, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
2219         { 0x0020, UCHAR_EAST_ASIAN_WIDTH, U_EA_NARROW },
2220         { 0x00B1, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2221         { 0x20A9, UCHAR_EAST_ASIAN_WIDTH, U_EA_HALFWIDTH },
2222         { 0x2FFB, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2223         { 0x3000, UCHAR_EAST_ASIAN_WIDTH, U_EA_FULLWIDTH },
2224         { 0x35bb, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2225         { 0x58bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2226         { 0xD7A3, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2227         { 0xEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2228         { 0x1D198, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
2229         { 0x20000, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2230         { 0x2F8C7, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2231         { 0x3a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE }, /* plane 3 got default W values in Unicode 4 */
2232         { 0x5a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
2233         { 0xFEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2234         { 0x10EEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2235
2236         /* UCHAR_GENERAL_CATEGORY tested for assigned characters in TestUnicodeData() */
2237         { 0xd7d7, UCHAR_GENERAL_CATEGORY, 0 },
2238
2239         { 0x0444, UCHAR_JOINING_GROUP, U_JG_NO_JOINING_GROUP },
2240         { 0x0639, UCHAR_JOINING_GROUP, U_JG_AIN },
2241         { 0x072A, UCHAR_JOINING_GROUP, U_JG_DALATH_RISH },
2242         { 0x0647, UCHAR_JOINING_GROUP, U_JG_HEH },
2243         { 0x06C1, UCHAR_JOINING_GROUP, U_JG_HEH_GOAL },
2244         { 0x06C3, UCHAR_JOINING_GROUP, U_JG_HAMZA_ON_HEH_GOAL },
2245
2246         { 0x200C, UCHAR_JOINING_TYPE, U_JT_NON_JOINING },
2247         { 0x200D, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING },
2248         { 0x0639, UCHAR_JOINING_TYPE, U_JT_DUAL_JOINING },
2249         { 0x0640, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING },
2250         { 0x06C3, UCHAR_JOINING_TYPE, U_JT_RIGHT_JOINING },
2251         { 0x0300, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
2252         { 0x070F, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
2253         { 0xe0033, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
2254
2255         /* TestUnicodeData() verifies that no assigned character has "XX" (unknown) */
2256         { 0xe7e7, UCHAR_LINE_BREAK, U_LB_UNKNOWN },
2257         { 0x10fffd, UCHAR_LINE_BREAK, U_LB_UNKNOWN },
2258         { 0x0028, UCHAR_LINE_BREAK, U_LB_OPEN_PUNCTUATION },
2259         { 0x232A, UCHAR_LINE_BREAK, U_LB_CLOSE_PUNCTUATION },
2260         { 0x3401, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2261         { 0x4e02, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2262         { 0xac03, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2263         { 0x20004, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2264         { 0xf905, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2265         { 0xdb7e, UCHAR_LINE_BREAK, U_LB_SURROGATE },
2266         { 0xdbfd, UCHAR_LINE_BREAK, U_LB_SURROGATE },
2267         { 0xdffc, UCHAR_LINE_BREAK, U_LB_SURROGATE },
2268         { 0x2762, UCHAR_LINE_BREAK, U_LB_EXCLAMATION },
2269         { 0x002F, UCHAR_LINE_BREAK, U_LB_BREAK_SYMBOLS },
2270         { 0x1D49C, UCHAR_LINE_BREAK, U_LB_ALPHABETIC },
2271         { 0x1731, UCHAR_LINE_BREAK, U_LB_ALPHABETIC },
2272
2273         /* UCHAR_NUMERIC_TYPE tested in TestNumericProperties() */
2274
2275         /* UCHAR_SCRIPT tested in TestUScriptCodeAPI() */
2276
2277         { 0x1100, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2278         { 0x1111, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2279         { 0x1159, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2280         { 0x115f, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2281
2282         { 0x1160, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2283         { 0x1161, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2284         { 0x1172, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2285         { 0x11a2, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2286
2287         { 0x11a8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2288         { 0x11b8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2289         { 0x11c8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2290         { 0x11f9, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2291
2292         { 0x115a, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2293         { 0x115e, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2294         { 0x11a3, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2295         { 0x11a7, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2296         { 0x11fa, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2297         { 0x11ff, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2298
2299         { 0xac00, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2300         { 0xac1c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2301         { 0xc5ec, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2302         { 0xd788, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2303
2304         { 0xac01, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2305         { 0xac1b, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2306         { 0xac1d, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2307         { 0xc5ee, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2308         { 0xd7a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2309
2310         { 0xd7a4, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2311
2312         /* undefined UProperty values */
2313         { 0x61, 0x4a7, 0 },
2314         { 0x234bc, 0x15ed, 0 }
2315     };
2316
2317     UVersionInfo version;
2318     UChar32 c;
2319     int32_t i, result, uVersion;
2320     UProperty which;
2321
2322     /* what is our Unicode version? */
2323     u_getUnicodeVersion(version);
2324     uVersion=((int32_t)version[0]<<8)|(version[1]<<4)|version[2]; /* major/minor/update version numbers */
2325
2326     u_charAge(0x20, version);
2327     if(version[0]==0) {
2328         /* no additional properties available */
2329         log_err("TestAdditionalProperties: no additional properties available, not tested\n");
2330         return;
2331     }
2332
2333     /* test u_charAge() */
2334     for(i=0; i<sizeof(charAges)/sizeof(charAges[0]); ++i) {
2335         u_charAge(charAges[i].c, version);
2336         if(0!=memcmp(version, charAges[i].version, sizeof(UVersionInfo))) {
2337             log_err("error: u_charAge(U+%04lx)={ %u, %u, %u, %u } instead of { %u, %u, %u, %u }\n",
2338                 charAges[i].c,
2339                 version[0], version[1], version[2], version[3],
2340                 charAges[i].version[0], charAges[i].version[1], charAges[i].version[2], charAges[i].version[3]);
2341         }
2342     }
2343
2344     if( u_getIntPropertyMinValue(UCHAR_DASH)!=0 ||
2345         u_getIntPropertyMinValue(UCHAR_BIDI_CLASS)!=0 ||
2346         u_getIntPropertyMinValue(UCHAR_BLOCK)!=0 ||   /* j2478 */
2347         u_getIntPropertyMinValue(UCHAR_SCRIPT)!=0 || /*JB#2410*/
2348         u_getIntPropertyMinValue(0x2345)!=0
2349     ) {
2350         log_err("error: u_getIntPropertyMinValue() wrong\n");
2351     }
2352
2353     if( u_getIntPropertyMaxValue(UCHAR_DASH)!=1 ||
2354         u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE)!=1 ||
2355         u_getIntPropertyMaxValue(UCHAR_BINARY_LIMIT-1)!=1 ||
2356         u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)!=(int32_t)U_CHAR_DIRECTION_COUNT-1 ||
2357         u_getIntPropertyMaxValue(UCHAR_BLOCK)!=(int32_t)UBLOCK_COUNT-1 ||
2358         u_getIntPropertyMaxValue(UCHAR_LINE_BREAK)!=(int32_t)U_LB_COUNT-1 ||
2359         u_getIntPropertyMaxValue(UCHAR_SCRIPT)!=(int32_t)USCRIPT_CODE_LIMIT-1 ||
2360         u_getIntPropertyMaxValue(0x2345)!=-1 /*JB#2410*/ ||
2361         u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) != (int32_t) (U_DT_COUNT - 1) ||
2362         u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) !=  (int32_t) (U_JG_COUNT -1) ||
2363         u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) != (int32_t) (U_JT_COUNT -1) ||
2364         u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) != (int32_t) (U_EA_COUNT -1)
2365     ) {
2366         log_err("error: u_getIntPropertyMaxValue() wrong\n");
2367     }
2368
2369     /* test u_hasBinaryProperty() and u_getIntPropertyValue() */
2370     for(i=0; i<sizeof(props)/sizeof(props[0]); ++i) {
2371         if(props[i][0]<0) {
2372             /* Unicode version break */
2373             if(uVersion<props[i][1]) {
2374                 break; /* do not test properties that are not yet supported */
2375             } else {
2376                 continue; /* skip this row */
2377             }
2378         }
2379
2380         c=(UChar32)props[i][0];
2381         which=(UProperty)props[i][1];
2382
2383         if(which<UCHAR_INT_START) {
2384             result=u_hasBinaryProperty(c, which);
2385             if(result!=props[i][2]) {
2386                 log_err("error: u_hasBinaryProperty(U+%04lx, %d)=%d is wrong (props[%d])\n",
2387                         c, which, result, i);
2388             }
2389         }
2390
2391         result=u_getIntPropertyValue(c, which);
2392         if(result!=props[i][2]) {
2393             log_err("error: u_getIntPropertyValue(U+%04lx, 0x1000+%d)=%d is wrong, should be %d (props[%d])\n",
2394                     c, (int32_t)which-0x1000, result, props[i][2], i);
2395         }
2396
2397         /* test separate functions, too */
2398         switch((UProperty)props[i][1]) {
2399         case UCHAR_ALPHABETIC:
2400             if(u_isUAlphabetic((UChar32)props[i][0])!=(UBool)props[i][2]) {
2401                 log_err("error: u_isUAlphabetic(U+%04lx)=%d is wrong (props[%d])\n",
2402                         props[i][0], result, i);
2403             }
2404             break;
2405         case UCHAR_LOWERCASE:
2406             if(u_isULowercase((UChar32)props[i][0])!=(UBool)props[i][2]) {
2407                 log_err("error: u_isULowercase(U+%04lx)=%d is wrong (props[%d])\n",
2408                         props[i][0], result, i);
2409             }
2410             break;
2411         case UCHAR_UPPERCASE:
2412             if(u_isUUppercase((UChar32)props[i][0])!=(UBool)props[i][2]) {
2413                 log_err("error: u_isUUppercase(U+%04lx)=%d is wrong (props[%d])\n",
2414                         props[i][0], result, i);
2415             }
2416             break;
2417         case UCHAR_WHITE_SPACE:
2418             if(u_isUWhiteSpace((UChar32)props[i][0])!=(UBool)props[i][2]) {
2419                 log_err("error: u_isUWhiteSpace(U+%04lx)=%d is wrong (props[%d])\n",
2420                         props[i][0], result, i);
2421             }
2422             break;
2423         default:
2424             break;
2425         }
2426     }
2427 }
2428
2429 static void
2430 TestNumericProperties(void) {
2431     /* see UnicodeData.txt, DerivedNumericValues.txt */
2432     static const struct {
2433         UChar32 c;
2434         int32_t type;
2435         double numValue;
2436     } values[]={
2437         { 0x0F33, U_NT_NUMERIC, -1./2. },
2438         { 0x0C66, U_NT_DECIMAL, 0 },
2439         { 0x96f6, U_NT_NUMERIC, 0 },
2440         { 0x2159, U_NT_NUMERIC, 1./6. },
2441         { 0x00BD, U_NT_NUMERIC, 1./2. },
2442         { 0x0031, U_NT_DECIMAL, 1. },
2443         { 0x4e00, U_NT_NUMERIC, 1. },
2444         { 0x58f1, U_NT_NUMERIC, 1. },
2445         { 0x10320, U_NT_NUMERIC, 1. },
2446         { 0x0F2B, U_NT_NUMERIC, 3./2. },
2447         { 0x00B2, U_NT_DIGIT, 2. },
2448         { 0x5f10, U_NT_NUMERIC, 2. },
2449         { 0x1813, U_NT_DECIMAL, 3. },
2450         { 0x5f0e, U_NT_NUMERIC, 3. },
2451         { 0x2173, U_NT_NUMERIC, 4. },
2452         { 0x8086, U_NT_NUMERIC, 4. },
2453         { 0x278E, U_NT_DIGIT, 5. },
2454         { 0x1D7F2, U_NT_DECIMAL, 6. },
2455         { 0x247A, U_NT_DIGIT, 7. },
2456         { 0x7396, U_NT_NUMERIC, 9. },
2457         { 0x1372, U_NT_NUMERIC, 10. },
2458         { 0x216B, U_NT_NUMERIC, 12. },
2459         { 0x16EE, U_NT_NUMERIC, 17. },
2460         { 0x249A, U_NT_NUMERIC, 19. },
2461         { 0x303A, U_NT_NUMERIC, 30. },
2462         { 0x5345, U_NT_NUMERIC, 30. },
2463         { 0x32B2, U_NT_NUMERIC, 37. },
2464         { 0x1375, U_NT_NUMERIC, 40. },
2465         { 0x10323, U_NT_NUMERIC, 50. },
2466         { 0x0BF1, U_NT_NUMERIC, 100. },
2467         { 0x964c, U_NT_NUMERIC, 100. },
2468         { 0x217E, U_NT_NUMERIC, 500. },
2469         { 0x2180, U_NT_NUMERIC, 1000. },
2470         { 0x4edf, U_NT_NUMERIC, 1000. },
2471         { 0x2181, U_NT_NUMERIC, 5000. },
2472         { 0x137C, U_NT_NUMERIC, 10000. },
2473         { 0x4e07, U_NT_NUMERIC, 10000. },
2474         { 0x4ebf, U_NT_NUMERIC, 100000000. },
2475         { 0x5146, U_NT_NUMERIC, 1000000000000. },
2476         { 0x61, U_NT_NONE, U_NO_NUMERIC_VALUE },
2477         { 0x3000, U_NT_NONE, U_NO_NUMERIC_VALUE },
2478         { 0xfffe, U_NT_NONE, U_NO_NUMERIC_VALUE },
2479         { 0x10301, U_NT_NONE, U_NO_NUMERIC_VALUE },
2480         { 0xe0033, U_NT_NONE, U_NO_NUMERIC_VALUE },
2481         { 0x10ffff, U_NT_NONE, U_NO_NUMERIC_VALUE }
2482     };
2483
2484     double nv;
2485     UChar32 c;
2486     int32_t i, type;
2487
2488     for(i=0; i<LENGTHOF(values); ++i) {
2489         c=values[i].c;
2490         type=u_getIntPropertyValue(c, UCHAR_NUMERIC_TYPE);
2491         nv=u_getNumericValue(c);
2492
2493         if(type!=values[i].type) {
2494             log_err("UCHAR_NUMERIC_TYPE(U+%04lx)=%d should be %d\n", c, type, values[i].type);
2495         }
2496         if(0.000001 <= fabs(nv - values[i].numValue)) {
2497             log_err("u_getNumericValue(U+%04lx)=%g should be %g\n", c, nv, values[i].numValue);
2498         }
2499     }
2500 }
2501
2502 /**
2503  * Test the property names and property value names API.
2504  */
2505 static void
2506 TestPropertyNames(void) {
2507     int32_t p, v, choice=0, rev;
2508     UBool atLeastSomething = FALSE;
2509
2510     for (p=0; ; ++p) {
2511         UBool sawProp = FALSE;
2512         if(p > 10 && !atLeastSomething) {
2513           log_data_err("Never got anything after 10 tries.\nYour data is probably fried. Quitting this test\n", p, choice);
2514           return;
2515         }
2516
2517         for (choice=0; ; ++choice) {
2518             const char* name = u_getPropertyName(p, choice);
2519             if (name) {
2520                 if (!sawProp) log_verbose("prop 0x%04x+%2d:", p&~0xfff, p&0xfff);
2521                 log_verbose("%d=\"%s\"", choice, name);
2522                 sawProp = TRUE;
2523                 atLeastSomething = TRUE;
2524
2525                 /* test reverse mapping */
2526                 rev = u_getPropertyEnum(name);
2527                 if (rev != p) {
2528                     log_err("Property round-trip failure: %d -> %s -> %d\n",
2529                             p, name, rev);
2530                 }
2531             }
2532             if (!name && choice>0) break;
2533         }
2534         if (sawProp) {
2535             /* looks like a valid property; check the values */
2536             const char* pname = u_getPropertyName(p, U_LONG_PROPERTY_NAME);
2537             int32_t max = 0;
2538             if (p == UCHAR_CANONICAL_COMBINING_CLASS) {
2539                 max = 255;
2540             } else if (p == UCHAR_GENERAL_CATEGORY_MASK) {
2541                 /* it's far too slow to iterate all the way up to
2542                    the real max, U_GC_P_MASK */
2543                 max = U_GC_NL_MASK;
2544             } else if (p == UCHAR_BLOCK) {
2545                 /* UBlockCodes, unlike other values, start at 1 */
2546                 max = 1;
2547             }
2548             log_verbose("\n");
2549             for (v=-1; ; ++v) {
2550                 UBool sawValue = FALSE;
2551                 for (choice=0; ; ++choice) {
2552                     const char* vname = u_getPropertyValueName(p, v, choice);
2553                     if (vname) {
2554                         if (!sawValue) log_verbose(" %s, value %d:", pname, v);
2555                         log_verbose("%d=\"%s\"", choice, vname);
2556                         sawValue = TRUE;
2557
2558                         /* test reverse mapping */
2559                         rev = u_getPropertyValueEnum(p, vname);
2560                         if (rev != v) {
2561                             log_err("Value round-trip failure (%s): %d -> %s -> %d\n",
2562                                     pname, v, vname, rev);
2563                         }
2564                     }
2565                     if (!vname && choice>0) break;
2566                 }
2567                 if (sawValue) {
2568                     log_verbose("\n");
2569                 }
2570                 if (!sawValue && v>=max) break;
2571             }
2572         }
2573         if (!sawProp) {
2574             if (p>=UCHAR_STRING_LIMIT) {
2575                 break;
2576             } else if (p>=UCHAR_DOUBLE_LIMIT) {
2577                 p = UCHAR_STRING_START - 1;
2578             } else if (p>=UCHAR_MASK_LIMIT) {
2579                 p = UCHAR_DOUBLE_START - 1;
2580             } else if (p>=UCHAR_INT_LIMIT) {
2581                 p = UCHAR_MASK_START - 1;
2582             } else if (p>=UCHAR_BINARY_LIMIT) {
2583                 p = UCHAR_INT_START - 1;
2584             }
2585         }
2586     }
2587 }
2588
2589 /**
2590  * Test the property values API.  See JB#2410.
2591  */
2592 static void
2593 TestPropertyValues(void) {
2594     int32_t i, p, min, max;
2595     UErrorCode ec;
2596
2597     /* Min should be 0 for everything. */
2598     /* Until JB#2478 is fixed, the one exception is UCHAR_BLOCK. */
2599     for (p=UCHAR_INT_START; p<UCHAR_INT_LIMIT; ++p) {
2600         min = u_getIntPropertyMinValue(p);
2601         if (min != 0) {
2602             if (p == UCHAR_BLOCK) {
2603                 /* This is okay...for now.  See JB#2487.
2604                    TODO Update this for JB#2487. */
2605             } else {
2606                 const char* name;
2607                 name = u_getPropertyName(p, U_LONG_PROPERTY_NAME);
2608                 if (name == NULL) name = "<ERROR>";
2609                 log_err("FAIL: u_getIntPropertyMinValue(%s) = %d, exp. 0\n",
2610                         name, min);
2611             }
2612         }
2613     }
2614
2615     if( u_getIntPropertyMinValue(UCHAR_GENERAL_CATEGORY_MASK)!=0 ||
2616         u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY_MASK)!=-1) {
2617         log_err("error: u_getIntPropertyMin/MaxValue(UCHAR_GENERAL_CATEGORY_MASK) is wrong\n");
2618     }
2619
2620     /* Max should be -1 for invalid properties. */
2621     max = u_getIntPropertyMaxValue(-1);
2622     if (max != -1) {
2623         log_err("FAIL: u_getIntPropertyMaxValue(-1) = %d, exp. -1\n",
2624                 max);
2625     }
2626
2627     /* Script should return 0 for an invalid code point. */
2628     for (i=0; i<2; ++i) {
2629         int32_t script;
2630         const char* desc;
2631         ec = U_ZERO_ERROR;
2632         switch (i) {
2633         case 0:
2634             script = uscript_getScript(-1, &ec);
2635             desc = "uscript_getScript(-1)";
2636             break;
2637         case 1:
2638             script = u_getIntPropertyValue(-1, UCHAR_SCRIPT);
2639             desc = "u_getIntPropertyValue(-1, UCHAR_SCRIPT)";
2640             break;
2641         default:
2642             log_err("Internal test error. Too many scripts\n");
2643             return;
2644         }
2645         /* We don't explicitly test ec.  It should be U_FAILURE but it
2646            isn't documented as such. */
2647         if (script != 0) {
2648             log_err("FAIL: %s = %d, exp. 0\n",
2649                     desc, script);
2650         }
2651     }
2652 }
2653
2654 /* add characters from a serialized set to a normal one */
2655 static void
2656 _setAddSerialized(USet *set, const USerializedSet *sset) {
2657     UChar32 start, end;
2658     int32_t i, count;
2659
2660     count=uset_getSerializedRangeCount(sset);
2661     for(i=0; i<count; ++i) {
2662         uset_getSerializedRange(sset, i, &start, &end);
2663         uset_addRange(set, start, end);
2664     }
2665 }
2666
2667 /* various tests for consistency of UCD data and API behavior */
2668 static void
2669 TestConsistency() {
2670 #if !UCONFIG_NO_NORMALIZATION
2671     UChar buffer16[300];
2672 #endif
2673     char buffer[300];
2674     USet *set1, *set2, *set3, *set4;
2675     UErrorCode errorCode;
2676
2677 #if !UCONFIG_NO_NORMALIZATION
2678     USerializedSet sset;
2679 #endif
2680     UChar32 start, end;
2681     int32_t i, length;
2682
2683     U_STRING_DECL(hyphenPattern, "[:Hyphen:]", 10);
2684     U_STRING_DECL(dashPattern, "[:Dash:]", 8);
2685     U_STRING_DECL(lowerPattern, "[:Lowercase:]", 13);
2686     U_STRING_DECL(formatPattern, "[:Cf:]", 6);
2687     U_STRING_DECL(alphaPattern, "[:Alphabetic:]", 14);
2688
2689     U_STRING_INIT(hyphenPattern, "[:Hyphen:]", 10);
2690     U_STRING_INIT(dashPattern, "[:Dash:]", 8);
2691     U_STRING_INIT(lowerPattern, "[:Lowercase:]", 13);
2692     U_STRING_INIT(formatPattern, "[:Cf:]", 6);
2693     U_STRING_INIT(alphaPattern, "[:Alphabetic:]", 14);
2694
2695     /*
2696      * It used to be that UCD.html and its precursors said
2697      * "Those dashes used to mark connections between pieces of words,
2698      *  plus the Katakana middle dot."
2699      *
2700      * Unicode 4 changed 00AD Soft Hyphen to Cf and removed it from Dash
2701      * but not from Hyphen.
2702      * UTC 94 (2003mar) decided to leave it that way and to changed UCD.html.
2703      * Therefore, do not show errors when testing the Hyphen property.
2704      */
2705     log_verbose("Starting with Unicode 4, inconsistencies with [:Hyphen:] are\n"
2706                 "known to the UTC and not considered errors.\n");
2707
2708     errorCode=U_ZERO_ERROR;
2709     set1=uset_openPattern(hyphenPattern, 10, &errorCode);
2710     set2=uset_openPattern(dashPattern, 8, &errorCode);
2711     if(U_SUCCESS(errorCode)) {
2712         /* remove the Katakana middle dot(s) from set1 */
2713         uset_remove(set1, 0x30fb);
2714         uset_remove(set1, 0xff65); /* halfwidth variant */
2715         showAMinusB(set1, set2, "[:Hyphen:]", "[:Dash:]", FALSE);
2716     } else {
2717         log_err("error opening [:Hyphen:] or [:Dash:] - %s\n", u_errorName(errorCode));
2718     }
2719
2720     /* check that Cf is neither Hyphen nor Dash nor Alphabetic */
2721     set3=uset_openPattern(formatPattern, 6, &errorCode);
2722     set4=uset_openPattern(alphaPattern, 14, &errorCode);
2723     if(U_SUCCESS(errorCode)) {
2724         showAIntersectB(set3, set1, "[:Cf:]", "[:Hyphen:]", FALSE);
2725         showAIntersectB(set3, set2, "[:Cf:]", "[:Dash:]", TRUE);
2726         showAIntersectB(set3, set4, "[:Cf:]", "[:Alphabetic:]", TRUE);
2727     } else {
2728         log_err("error opening [:Cf:] or [:Alpbabetic:] - %s\n", u_errorName(errorCode));
2729     }
2730
2731     uset_close(set1);
2732     uset_close(set2);
2733     uset_close(set3);
2734     uset_close(set4);
2735
2736     /*
2737      * Check that each lowercase character has "small" in its name
2738      * and not "capital".
2739      * There are some such characters, some of which seem odd.
2740      * Use the verbose flag to see these notices.
2741      */
2742     errorCode=U_ZERO_ERROR;
2743     set1=uset_openPattern(lowerPattern, 13, &errorCode);
2744     if(U_SUCCESS(errorCode)) {
2745         for(i=0;; ++i) {
2746             length=uset_getItem(set1, i, &start, &end, NULL, 0, &errorCode);
2747             if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
2748                 break; /* done */
2749             }
2750             if(U_FAILURE(errorCode)) {
2751                 log_err("error iterating over [:Lowercase:] at item %d: %s\n",
2752                         i, u_errorName(errorCode));
2753                 break;
2754             }
2755             if(length!=0) {
2756                 break; /* done with code points, got a string or -1 */
2757             }
2758
2759             while(start<=end) {
2760                 length=u_charName(start, U_UNICODE_CHAR_NAME, buffer, sizeof(buffer), &errorCode);
2761                 if(U_FAILURE(errorCode)) {
2762                     log_err("error getting the name of U+%04x - %s\n", start, u_errorName(errorCode));
2763                     errorCode=U_ZERO_ERROR;
2764                     continue;
2765                 }
2766                 if( (strstr(buffer, "SMALL")==NULL || strstr(buffer, "CAPITAL")!=NULL) &&
2767                     strstr(buffer, "SMALL CAPITAL")==NULL
2768                 ) {
2769                     log_verbose("info: [:Lowercase:] contains U+%04x whose name does not suggest lowercase: %s\n", start, buffer);
2770                 }
2771                 ++start;
2772             }
2773         }
2774     } else {
2775         log_err("error opening [:Lowercase:] - %s\n", u_errorName(errorCode));
2776     }
2777     uset_close(set1);
2778
2779 #if !UCONFIG_NO_NORMALIZATION
2780
2781     /*
2782      * Test for an example that unorm_getCanonStartSet() delivers
2783      * all characters that compose from the input one,
2784      * even in multiple steps.
2785      * For example, the set for "I" (0049) should contain both
2786      * I-diaeresis (00CF) and I-diaeresis-acute (1E2E).
2787      * In general, the set for the middle such character should be a subset
2788      * of the set for the first.
2789      */
2790     set1=uset_open(1, 0);
2791     set2=uset_open(1, 0);
2792
2793     if (unorm_getCanonStartSet(0x49, &sset)) {
2794         _setAddSerialized(set1, &sset);
2795
2796         /* enumerate all characters that are plausible to be latin letters */
2797         for(start=0xa0; start<0x2000; ++start) {
2798             if(unorm_getDecomposition(start, FALSE, buffer16, LENGTHOF(buffer16))>1 && buffer16[0]==0x49) {
2799                 uset_add(set2, start);
2800             }
2801         }
2802
2803         compareUSets(set1, set2,
2804                      "[canon start set of 0049]", "[all c with canon decomp with 0049]",
2805                      TRUE);
2806     } else {
2807       log_err("error calling unorm_getCanonStartSet()\n");
2808     }
2809
2810     uset_close(set1);
2811     uset_close(set2);
2812
2813 #endif
2814 }
2815
2816 /* API coverage for ucase.c */
2817 static void TestUCase() {
2818     UDataMemory *pData;
2819     UCaseProps *csp;
2820     UErrorCode errorCode;
2821
2822     /* coverage for ucase_openBinary() */
2823     errorCode=U_ZERO_ERROR;
2824     pData=udata_open(NULL, UCASE_DATA_TYPE, UCASE_DATA_NAME, &errorCode);
2825     if(U_FAILURE(errorCode)) {
2826         log_data_err("unable to open " UCASE_DATA_NAME "." UCASE_DATA_TYPE ": %s\n",
2827                     u_errorName(errorCode));
2828         return;
2829     }
2830
2831     csp=ucase_openBinary((const uint8_t *)pData->pHeader, -1, &errorCode);
2832     if(U_FAILURE(errorCode)) {
2833         log_err("ucase_openBinary() fails for the contents of " UCASE_DATA_NAME "." UCASE_DATA_TYPE ": %s\n",
2834                 u_errorName(errorCode));
2835         udata_close(pData);
2836         return;
2837     }
2838
2839     if(UCASE_LOWER!=ucase_getType(csp, 0xdf)) { /* verify islower(sharp s) */
2840         log_err("ucase_openBinary() does not seem to return working UCaseProps\n");
2841     }
2842
2843     ucase_close(csp);
2844     udata_close(pData);
2845 }