icuSources/test/cintltst/cucdtst.c

   1 // © 2016 and later: Unicode, Inc. and others.
   2 // License & terms of use: http://www.unicode.org/copyright.html
   3 /********************************************************************
   4  * COPYRIGHT:
   5  * Copyright (c) 1997-2016, International Business Machines Corporation and
   6  * others. All Rights Reserved.
   7  ********************************************************************/
   8 /*******************************************************************************
   9 *
  10 * File CUCDTST.C
  11 *
  12 * Modification History:
  13 *        Name                     Description
  14 *     Madhu Katragadda            Ported for C API, added tests for string functions
  15 ********************************************************************************
  16 */
  17
  18 #include <string.h>
  19 #include <math.h>
  20 #include <stdlib.h>
  21
  22 #include "unicode/utypes.h"
  23 #include "unicode/uchar.h"
  24 #include "unicode/putil.h"
  25 #include "unicode/ustring.h"
  26 #include "unicode/uloc.h"
  27 #include "unicode/unorm2.h"
  28
  29 #include "cintltst.h"
  30 #include "putilimp.h"
  31 #include "uparse.h"
  32 #include "ucase.h"
  33 #include "ubidi_props.h"
  34 #include "uprops.h"
  35 #include "uset_imp.h"
  36 #include "usc_impl.h"
  37 #include "udatamem.h"
  38 #include "cucdapi.h"
  39 #include "cmemory.h"
  40
  41 /* prototypes --------------------------------------------------------------- */
  42
  43 static void TestUpperLower(void);
  44 static void TestLetterNumber(void);
  45 static void TestMisc(void);
  46 static void TestPOSIX(void);
  47 static void TestControlPrint(void);
  48 static void TestIdentifier(void);
  49 static void TestUnicodeData(void);
  50 static void TestCodeUnit(void);
  51 static void TestCodePoint(void);
  52 static void TestCharLength(void);
  53 static void TestCharNames(void);
  54 static void TestUCharFromNameUnderflow(void);
  55 static void TestMirroring(void);
  56 static void TestUScriptRunAPI(void);
  57 static void TestAdditionalProperties(void);
  58 static void TestNumericProperties(void);
  59 static void TestPropertyNames(void);
  60 static void TestPropertyValues(void);
  61 static void TestConsistency(void);
  62 static void TestUBiDiProps(void);
  63 static void TestCaseFolding(void);
  64
  65 /* internal methods used */
  66 static int32_t MakeProp(char* str);
  67 static int32_t MakeDir(char* str);
  68
  69 /* helpers ------------------------------------------------------------------ */
  70
  71 static void
  72 parseUCDFile(const char *filename,
  73              char *fields[][2], int32_t fieldCount,
  74              UParseLineFn *lineFn, void *context,
  75              UErrorCode *pErrorCode) {
  76     char path[256];
  77     char backupPath[256];
  78
  79     if(U_FAILURE(*pErrorCode)) {
  80         return;
  81     }
  82
  83     /* Look inside ICU_DATA first */
  84     strcpy(path, u_getDataDirectory());
  85     strcat(path, ".." U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING);
  86     strcat(path, filename);
  87
  88     /* As a fallback, try to guess where the source data was located
  89      *    at the time ICU was built, and look there.
  90      */
  91     strcpy(backupPath, ctest_dataSrcDir());
  92     strcat(backupPath, U_FILE_SEP_STRING);
  93     strcat(backupPath, "unidata" U_FILE_SEP_STRING);
  94     strcat(backupPath, filename);
  95
  96     u_parseDelimitedFile(path, ';', fields, fieldCount, lineFn, context, pErrorCode);
  97     if(*pErrorCode==U_FILE_ACCESS_ERROR) {
  98         *pErrorCode=U_ZERO_ERROR;
  99         u_parseDelimitedFile(backupPath, ';', fields, fieldCount, lineFn, context, pErrorCode);
 100     }
 101     if(U_FAILURE(*pErrorCode)) {
 102         log_err_status(*pErrorCode, "error parsing %s: %s\n", filename, u_errorName(*pErrorCode));
 103     }
 104 }
 105
 106 /* test data ---------------------------------------------------------------- */
 107
 108 static const char tagStrings[] = "MnMcMeNdNlNoZsZlZpCcCfCsCoCnLuLlLtLmLoPcPdPsPePoSmScSkSoPiPf";
 109 static const int32_t tagValues[] =
 110     {
 111     /* Mn */ U_NON_SPACING_MARK,
 112     /* Mc */ U_COMBINING_SPACING_MARK,
 113     /* Me */ U_ENCLOSING_MARK,
 114     /* Nd */ U_DECIMAL_DIGIT_NUMBER,
 115     /* Nl */ U_LETTER_NUMBER,
 116     /* No */ U_OTHER_NUMBER,
 117     /* Zs */ U_SPACE_SEPARATOR,
 118     /* Zl */ U_LINE_SEPARATOR,
 119     /* Zp */ U_PARAGRAPH_SEPARATOR,
 120     /* Cc */ U_CONTROL_CHAR,
 121     /* Cf */ U_FORMAT_CHAR,
 122     /* Cs */ U_SURROGATE,
 123     /* Co */ U_PRIVATE_USE_CHAR,
 124     /* Cn */ U_UNASSIGNED,
 125     /* Lu */ U_UPPERCASE_LETTER,
 126     /* Ll */ U_LOWERCASE_LETTER,
 127     /* Lt */ U_TITLECASE_LETTER,
 128     /* Lm */ U_MODIFIER_LETTER,
 129     /* Lo */ U_OTHER_LETTER,
 130     /* Pc */ U_CONNECTOR_PUNCTUATION,
 131     /* Pd */ U_DASH_PUNCTUATION,
 132     /* Ps */ U_START_PUNCTUATION,
 133     /* Pe */ U_END_PUNCTUATION,
 134     /* Po */ U_OTHER_PUNCTUATION,
 135     /* Sm */ U_MATH_SYMBOL,
 136     /* Sc */ U_CURRENCY_SYMBOL,
 137     /* Sk */ U_MODIFIER_SYMBOL,
 138     /* So */ U_OTHER_SYMBOL,
 139     /* Pi */ U_INITIAL_PUNCTUATION,
 140     /* Pf */ U_FINAL_PUNCTUATION
 141     };
 142
 143 static const char dirStrings[][5] = {
 144     "L",
 145     "R",
 146     "EN",
 147     "ES",
 148     "ET",
 149     "AN",
 150     "CS",
 151     "B",
 152     "S",
 153     "WS",
 154     "ON",
 155     "LRE",
 156     "LRO",
 157     "AL",
 158     "RLE",
 159     "RLO",
 160     "PDF",
 161     "NSM",
 162     "BN",
 163     /* new in Unicode 6.3/ICU 52 */
 164     "FSI",
 165     "LRI",
 166     "RLI",
 167     "PDI"
 168 };
 169
 170 void addUnicodeTest(TestNode** root);
 171
 172 void addUnicodeTest(TestNode** root)
 173 {
 174     addTest(root, &TestCodeUnit, "tsutil/cucdtst/TestCodeUnit");
 175     addTest(root, &TestCodePoint, "tsutil/cucdtst/TestCodePoint");
 176     addTest(root, &TestCharLength, "tsutil/cucdtst/TestCharLength");
 177     addTest(root, &TestBinaryValues, "tsutil/cucdtst/TestBinaryValues");
 178     addTest(root, &TestUnicodeData, "tsutil/cucdtst/TestUnicodeData");
 179     addTest(root, &TestAdditionalProperties, "tsutil/cucdtst/TestAdditionalProperties");
 180     addTest(root, &TestNumericProperties, "tsutil/cucdtst/TestNumericProperties");
 181     addTest(root, &TestUpperLower, "tsutil/cucdtst/TestUpperLower");
 182     addTest(root, &TestLetterNumber, "tsutil/cucdtst/TestLetterNumber");
 183     addTest(root, &TestMisc, "tsutil/cucdtst/TestMisc");
 184     addTest(root, &TestPOSIX, "tsutil/cucdtst/TestPOSIX");
 185     addTest(root, &TestControlPrint, "tsutil/cucdtst/TestControlPrint");
 186     addTest(root, &TestIdentifier, "tsutil/cucdtst/TestIdentifier");
 187     addTest(root, &TestCharNames, "tsutil/cucdtst/TestCharNames");
 188     addTest(root, &TestUCharFromNameUnderflow, "tsutil/cucdtst/TestUCharFromNameUnderflow");
 189     addTest(root, &TestMirroring, "tsutil/cucdtst/TestMirroring");
 190     addTest(root, &TestUScriptCodeAPI, "tsutil/cucdtst/TestUScriptCodeAPI");
 191     addTest(root, &TestHasScript, "tsutil/cucdtst/TestHasScript");
 192     addTest(root, &TestGetScriptExtensions, "tsutil/cucdtst/TestGetScriptExtensions");
 193     addTest(root, &TestScriptMetadataAPI, "tsutil/cucdtst/TestScriptMetadataAPI");
 194     addTest(root, &TestUScriptRunAPI, "tsutil/cucdtst/TestUScriptRunAPI");
 195     addTest(root, &TestPropertyNames, "tsutil/cucdtst/TestPropertyNames");
 196     addTest(root, &TestPropertyValues, "tsutil/cucdtst/TestPropertyValues");
 197     addTest(root, &TestConsistency, "tsutil/cucdtst/TestConsistency");
 198     addTest(root, &TestUBiDiProps, "tsutil/cucdtst/TestUBiDiProps");
 199     addTest(root, &TestCaseFolding, "tsutil/cucdtst/TestCaseFolding");
 200 }
 201
 202 /*==================================================== */
 203 /* test u_toupper() and u_tolower()                    */
 204 /*==================================================== */
 205 static void TestUpperLower()
 206 {
 207     const UChar upper[] = {0x41, 0x42, 0x00b2, 0x01c4, 0x01c6, 0x01c9, 0x01c8, 0x01c9, 0x000c, 0x0000};
 208     const UChar lower[] = {0x61, 0x62, 0x00b2, 0x01c6, 0x01c6, 0x01c9, 0x01c9, 0x01c9, 0x000c, 0x0000};
 209     U_STRING_DECL(upperTest, "abcdefg123hij.?:klmno", 21);
 210     U_STRING_DECL(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);
 211     int32_t i;
 212
 213     U_STRING_INIT(upperTest, "abcdefg123hij.?:klmno", 21);
 214     U_STRING_INIT(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);
 215
 216 /*
 217 Checks LetterLike Symbols which were previously a source of confusion
 218 [Bertrand A. D. 02/04/98]
 219 */
 220     for (i=0x2100;i<0x2138;i++)
 221     {
 222         /* Unicode 5.0 adds lowercase U+214E (TURNED SMALL F) to U+2132 (TURNED CAPITAL F) */
 223         if(i!=0x2126 && i!=0x212a && i!=0x212b && i!=0x2132)
 224         {
 225             if (i != (int)u_tolower(i)) /* itself */
 226                 log_err("Failed case conversion with itself: U+%04x\n", i);
 227             if (i != (int)u_toupper(i))
 228                 log_err("Failed case conversion with itself: U+%04x\n", i);
 229         }
 230     }
 231
 232     for(i=0; i < u_strlen(upper); i++){
 233         if(u_tolower(upper[i]) != lower[i]){
 234             log_err("FAILED u_tolower() for %lx Expected %lx Got %lx\n", upper[i], lower[i], u_tolower(upper[i]));
 235         }
 236     }
 237
 238     log_verbose("testing upper lower\n");
 239     for (i = 0; i < 21; i++) {
 240
 241         if (u_isalpha(upperTest[i]) && !u_islower(upperTest[i]))
 242         {
 243             log_err("Failed isLowerCase test at  %c\n", upperTest[i]);
 244         }
 245         else if (u_isalpha(lowerTest[i]) && !u_isupper(lowerTest[i]))
 246          {
 247             log_err("Failed isUpperCase test at %c\n", lowerTest[i]);
 248         }
 249         else if (upperTest[i] != u_tolower(lowerTest[i]))
 250         {
 251             log_err("Failed case conversion from %c  To %c :\n", lowerTest[i], upperTest[i]);
 252         }
 253         else if (lowerTest[i] != u_toupper(upperTest[i]))
 254          {
 255             log_err("Failed case conversion : %c To %c \n", upperTest[i], lowerTest[i]);
 256         }
 257         else if (upperTest[i] != u_tolower(upperTest[i]))
 258         {
 259             log_err("Failed case conversion with itself: %c\n", upperTest[i]);
 260         }
 261         else if (lowerTest[i] != u_toupper(lowerTest[i]))
 262         {
 263             log_err("Failed case conversion with itself: %c\n", lowerTest[i]);
 264         }
 265     }
 266     log_verbose("done testing upper lower\n");
 267
 268     log_verbose("testing u_istitle\n");
 269     {
 270         static const UChar expected[] = {
 271             0x1F88,
 272             0x1F89,
 273             0x1F8A,
 274             0x1F8B,
 275             0x1F8C,
 276             0x1F8D,
 277             0x1F8E,
 278             0x1F8F,
 279             0x1F88,
 280             0x1F89,
 281             0x1F8A,
 282             0x1F8B,
 283             0x1F8C,
 284             0x1F8D,
 285             0x1F8E,
 286             0x1F8F,
 287             0x1F98,
 288             0x1F99,
 289             0x1F9A,
 290             0x1F9B,
 291             0x1F9C,
 292             0x1F9D,
 293             0x1F9E,
 294             0x1F9F,
 295             0x1F98,
 296             0x1F99,
 297             0x1F9A,
 298             0x1F9B,
 299             0x1F9C,
 300             0x1F9D,
 301             0x1F9E,
 302             0x1F9F,
 303             0x1FA8,
 304             0x1FA9,
 305             0x1FAA,
 306             0x1FAB,
 307             0x1FAC,
 308             0x1FAD,
 309             0x1FAE,
 310             0x1FAF,
 311             0x1FA8,
 312             0x1FA9,
 313             0x1FAA,
 314             0x1FAB,
 315             0x1FAC,
 316             0x1FAD,
 317             0x1FAE,
 318             0x1FAF,
 319             0x1FBC,
 320             0x1FBC,
 321             0x1FCC,
 322             0x1FCC,
 323             0x1FFC,
 324             0x1FFC,
 325         };
 326         int32_t num = UPRV_LENGTHOF(expected);
 327         for(i=0; i<num; i++){
 328             if(!u_istitle(expected[i])){
 329                 log_err("u_istitle failed for 0x%4X. Expected TRUE, got FALSE\n",expected[i]);
 330             }
 331         }
 332
 333     }
 334 }
 335
 336 /* compare two sets and verify that their difference or intersection is empty */
 337 static UBool
 338 showADiffB(const USet *a, const USet *b,
 339            const char *a_name, const char *b_name,
 340            UBool expect, UBool diffIsError) {
 341     USet *aa;
 342     int32_t i, start, end, length;
 343     UErrorCode errorCode;
 344
 345     /*
 346      * expect:
 347      * TRUE  -> a-b should be empty, that is, b should contain all of a
 348      * FALSE -> a&b should be empty, that is, a should contain none of b (and vice versa)
 349      */
 350     if(expect ? uset_containsAll(b, a) : uset_containsNone(a, b)) {
 351         return TRUE;
 352     }
 353
 354     /* clone a to aa because a is const */
 355     aa=uset_open(1, 0);
 356     if(aa==NULL) {
 357         /* unusual problem - out of memory? */
 358         return FALSE;
 359     }
 360     uset_addAll(aa, a);
 361
 362     /* compute the set in question */
 363     if(expect) {
 364         /* a-b */
 365         uset_removeAll(aa, b);
 366     } else {
 367         /* a&b */
 368         uset_retainAll(aa, b);
 369     }
 370
 371     /* aa is not empty because of the initial tests above; show its contents */
 372     errorCode=U_ZERO_ERROR;
 373     i=0;
 374     for(;;) {
 375         length=uset_getItem(aa, i, &start, &end, NULL, 0, &errorCode);
 376         if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
 377             break; /* done */
 378         }
 379         if(U_FAILURE(errorCode)) {
 380             log_err("error comparing %s with %s at difference item %d: %s\n",
 381                 a_name, b_name, i, u_errorName(errorCode));
 382             break;
 383         }
 384         if(length!=0) {
 385             break; /* done with code points, got a string or -1 */
 386         }
 387
 388         if(diffIsError) {
 389             if(expect) {
 390                 log_err("error: %s contains U+%04x..U+%04x but %s does not\n", a_name, start, end, b_name);
 391             } else {
 392                 log_err("error: %s and %s both contain U+%04x..U+%04x but should not intersect\n", a_name, b_name, start, end);
 393             }
 394         } else {
 395             if(expect) {
 396                 log_verbose("info: %s contains U+%04x..U+%04x but %s does not\n", a_name, start, end, b_name);
 397             } else {
 398                 log_verbose("info: %s and %s both contain U+%04x..U+%04x but should not intersect\n", a_name, b_name, start, end);
 399             }
 400         }
 401
 402         ++i;
 403     }
 404
 405     uset_close(aa);
 406     return FALSE;
 407 }
 408
 409 static UBool
 410 showAMinusB(const USet *a, const USet *b,
 411             const char *a_name, const char *b_name,
 412             UBool diffIsError) {
 413     return showADiffB(a, b, a_name, b_name, TRUE, diffIsError);
 414 }
 415
 416 static UBool
 417 showAIntersectB(const USet *a, const USet *b,
 418                 const char *a_name, const char *b_name,
 419                 UBool diffIsError) {
 420     return showADiffB(a, b, a_name, b_name, FALSE, diffIsError);
 421 }
 422
 423 static UBool
 424 compareUSets(const USet *a, const USet *b,
 425              const char *a_name, const char *b_name,
 426              UBool diffIsError) {
 427     /*
 428      * Use an arithmetic & not a logical && so that both branches
 429      * are always taken and all differences are shown.
 430      */
 431     return
 432         showAMinusB(a, b, a_name, b_name, diffIsError) &
 433         showAMinusB(b, a, b_name, a_name, diffIsError);
 434 }
 435
 436 /* test isLetter(u_isapha()) and isDigit(u_isdigit()) */
 437 static void TestLetterNumber()
 438 {
 439     UChar i = 0x0000;
 440
 441     log_verbose("Testing for isalpha\n");
 442     for (i = 0x0041; i < 0x005B; i++) {
 443         if (!u_isalpha(i))
 444         {
 445             log_err("Failed isLetter test at  %.4X\n", i);
 446         }
 447     }
 448     for (i = 0x0660; i < 0x066A; i++) {
 449         if (u_isalpha(i))
 450         {
 451             log_err("Failed isLetter test with numbers at %.4X\n", i);
 452         }
 453     }
 454
 455     log_verbose("Testing for isdigit\n");
 456     for (i = 0x0660; i < 0x066A; i++) {
 457         if (!u_isdigit(i))
 458         {
 459             log_verbose("Failed isNumber test at %.4X\n", i);
 460         }
 461     }
 462
 463     log_verbose("Testing for isalnum\n");
 464     for (i = 0x0041; i < 0x005B; i++) {
 465         if (!u_isalnum(i))
 466         {
 467             log_err("Failed isAlNum test at  %.4X\n", i);
 468         }
 469     }
 470     for (i = 0x0660; i < 0x066A; i++) {
 471         if (!u_isalnum(i))
 472         {
 473             log_err("Failed isAlNum test at  %.4X\n", i);
 474         }
 475     }
 476
 477     {
 478         /*
 479          * The following checks work only starting from Unicode 4.0.
 480          * Check the version number here.
 481          */
 482         static UVersionInfo u401={ 4, 0, 1, 0 };
 483         UVersionInfo version;
 484         u_getUnicodeVersion(version);
 485         if(version[0]<4 || 0==memcmp(version, u401, 4)) {
 486             return;
 487         }
 488     }
 489
 490     {
 491         /*
 492          * Sanity check:
 493          * Verify that exactly the digit characters have decimal digit values.
 494          * This assumption is used in the implementation of u_digit()
 495          * (which checks nt=de)
 496          * compared with the parallel java.lang.Character.digit()
 497          * (which checks Nd).
 498          *
 499          * This was not true in Unicode 3.2 and earlier.
 500          * Unicode 4.0 fixed discrepancies.
 501          * Unicode 4.0.1 re-introduced problems in this area due to an
 502          * unintentionally incomplete last-minute change.
 503          */
 504         U_STRING_DECL(digitsPattern, "[:Nd:]", 6);
 505         U_STRING_DECL(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24);
 506
 507         USet *digits, *decimalValues;
 508         UErrorCode errorCode;
 509
 510         U_STRING_INIT(digitsPattern, "[:Nd:]", 6);
 511         U_STRING_INIT(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24);
 512         errorCode=U_ZERO_ERROR;
 513         digits=uset_openPattern(digitsPattern, 6, &errorCode);
 514         decimalValues=uset_openPattern(decimalValuesPattern, 24, &errorCode);
 515
 516         if(U_SUCCESS(errorCode)) {
 517             compareUSets(digits, decimalValues, "[:Nd:]", "[:Numeric_Type=Decimal:]", TRUE);
 518         }
 519
 520         uset_close(digits);
 521         uset_close(decimalValues);
 522     }
 523 }
 524
 525 static void testSampleCharProps(UBool propFn(UChar32), const char *propName,
 526                                 const UChar32 *sampleChars, int32_t sampleCharsLength,
 527                                 UBool expected) {
 528     int32_t i;
 529     for (i = 0; i < sampleCharsLength; ++i) {
 530         UBool result = propFn(sampleChars[i]);
 531         if (result != expected) {
 532             log_err("error: character property function %s(U+%04x)=%d is wrong\n",
 533                     propName, sampleChars[i], result);
 534         }
 535     }
 536 }
 537
 538 /* Tests for isDefined(u_isdefined)(, isBaseForm(u_isbase()), isSpaceChar(u_isspace()), isWhiteSpace(), u_CharDigitValue() */
 539 static void TestMisc()
 540 {
 541     static const UChar32 sampleSpaces[] = {0x0020, 0x00a0, 0x2000, 0x2001, 0x2005};
 542     static const UChar32 sampleNonSpaces[] = {0x61, 0x62, 0x63, 0x64, 0x74};
 543     static const UChar32 sampleUndefined[] = {0xfff1, 0xfff7, 0xfa6e};
 544     static const UChar32 sampleDefined[] = {0x523E, 0x4f88, 0xfffd};
 545     static const UChar32 sampleBase[] = {0x0061, 0x0031, 0x03d2};
 546     static const UChar32 sampleNonBase[] = {0x002B, 0x0020, 0x203B};
 547 /*    static const UChar sampleChars[] = {0x000a, 0x0045, 0x4e00, 0xDC00, 0xFFE8, 0xFFF0};*/
 548     static const UChar32 sampleDigits[]= {0x0030, 0x0662, 0x0F23, 0x0ED5};
 549     static const UChar32 sampleNonDigits[] = {0x0010, 0x0041, 0x0122, 0x68FE};
 550     static const UChar32 sampleWhiteSpaces[] = {0x2008, 0x2009, 0x200a, 0x001c, 0x000c};
 551     static const UChar32 sampleNonWhiteSpaces[] = {0x61, 0x62, 0x3c, 0x28, 0x3f, 0x85, 0x2007, 0xffef};
 552
 553     static const int32_t sampleDigitValues[] = {0, 2, 3, 5};
 554
 555     uint32_t mask;
 556
 557     int32_t i;
 558     char icuVersion[U_MAX_VERSION_STRING_LENGTH];
 559     UVersionInfo realVersion;
 560
 561     memset(icuVersion, 0, U_MAX_VERSION_STRING_LENGTH);
 562
 563     testSampleCharProps(u_isspace, "u_isspace", sampleSpaces, UPRV_LENGTHOF(sampleSpaces), TRUE);
 564     testSampleCharProps(u_isspace, "u_isspace", sampleNonSpaces, UPRV_LENGTHOF(sampleNonSpaces), FALSE);
 565
 566     testSampleCharProps(u_isJavaSpaceChar, "u_isJavaSpaceChar",
 567                         sampleSpaces, UPRV_LENGTHOF(sampleSpaces), TRUE);
 568     testSampleCharProps(u_isJavaSpaceChar, "u_isJavaSpaceChar",
 569                         sampleNonSpaces, UPRV_LENGTHOF(sampleNonSpaces), FALSE);
 570
 571     testSampleCharProps(u_isWhitespace, "u_isWhitespace",
 572                         sampleWhiteSpaces, UPRV_LENGTHOF(sampleWhiteSpaces), TRUE);
 573     testSampleCharProps(u_isWhitespace, "u_isWhitespace",
 574                         sampleNonWhiteSpaces, UPRV_LENGTHOF(sampleNonWhiteSpaces), FALSE);
 575
 576     testSampleCharProps(u_isdefined, "u_isdefined",
 577                         sampleDefined, UPRV_LENGTHOF(sampleDefined), TRUE);
 578     testSampleCharProps(u_isdefined, "u_isdefined",
 579                         sampleUndefined, UPRV_LENGTHOF(sampleUndefined), FALSE);
 580
 581     testSampleCharProps(u_isbase, "u_isbase", sampleBase, UPRV_LENGTHOF(sampleBase), TRUE);
 582     testSampleCharProps(u_isbase, "u_isbase", sampleNonBase, UPRV_LENGTHOF(sampleNonBase), FALSE);
 583
 584     testSampleCharProps(u_isdigit, "u_isdigit", sampleDigits, UPRV_LENGTHOF(sampleDigits), TRUE);
 585     testSampleCharProps(u_isdigit, "u_isdigit", sampleNonDigits, UPRV_LENGTHOF(sampleNonDigits), FALSE);
 586
 587     for (i = 0; i < UPRV_LENGTHOF(sampleDigits); i++) {
 588         if (u_charDigitValue(sampleDigits[i]) != sampleDigitValues[i]) {
 589             log_err("error: u_charDigitValue(U+04x)=%d != %d\n",
 590                     sampleDigits[i], u_charDigitValue(sampleDigits[i]), sampleDigitValues[i]);
 591         }
 592     }
 593
 594     /* Tests the ICU version #*/
 595     u_getVersion(realVersion);
 596     u_versionToString(realVersion, icuVersion);
 597     if (strncmp(icuVersion, U_ICU_VERSION, uprv_min((int32_t)strlen(icuVersion), (int32_t)strlen(U_ICU_VERSION))) != 0)
 598     {
 599         log_err("ICU version test failed. Header says=%s, got=%s \n", U_ICU_VERSION, icuVersion);
 600     }
 601 #if defined(ICU_VERSION)
 602     /* test only happens where we have configure.in with VERSION - sanity check. */
 603     if(strcmp(U_ICU_VERSION, ICU_VERSION))
 604     {
 605         log_err("ICU version mismatch: Header says %s, build environment says %s.\n",  U_ICU_VERSION, ICU_VERSION);
 606     }
 607 #endif
 608
 609     /* test U_GC_... */
 610     if(
 611         U_GET_GC_MASK(0x41)!=U_GC_LU_MASK ||
 612         U_GET_GC_MASK(0x662)!=U_GC_ND_MASK ||
 613         U_GET_GC_MASK(0xa0)!=U_GC_ZS_MASK ||
 614         U_GET_GC_MASK(0x28)!=U_GC_PS_MASK ||
 615         U_GET_GC_MASK(0x2044)!=U_GC_SM_MASK ||
 616         U_GET_GC_MASK(0xe0063)!=U_GC_CF_MASK
 617     ) {
 618         log_err("error: U_GET_GC_MASK does not work properly\n");
 619     }
 620
 621     mask=0;
 622     mask=(mask&~U_GC_CN_MASK)|U_GC_CN_MASK;
 623
 624     mask=(mask&~U_GC_LU_MASK)|U_GC_LU_MASK;
 625     mask=(mask&~U_GC_LL_MASK)|U_GC_LL_MASK;
 626     mask=(mask&~U_GC_LT_MASK)|U_GC_LT_MASK;
 627     mask=(mask&~U_GC_LM_MASK)|U_GC_LM_MASK;
 628     mask=(mask&~U_GC_LO_MASK)|U_GC_LO_MASK;
 629
 630     mask=(mask&~U_GC_MN_MASK)|U_GC_MN_MASK;
 631     mask=(mask&~U_GC_ME_MASK)|U_GC_ME_MASK;
 632     mask=(mask&~U_GC_MC_MASK)|U_GC_MC_MASK;
 633
 634     mask=(mask&~U_GC_ND_MASK)|U_GC_ND_MASK;
 635     mask=(mask&~U_GC_NL_MASK)|U_GC_NL_MASK;
 636     mask=(mask&~U_GC_NO_MASK)|U_GC_NO_MASK;
 637
 638     mask=(mask&~U_GC_ZS_MASK)|U_GC_ZS_MASK;
 639     mask=(mask&~U_GC_ZL_MASK)|U_GC_ZL_MASK;
 640     mask=(mask&~U_GC_ZP_MASK)|U_GC_ZP_MASK;
 641
 642     mask=(mask&~U_GC_CC_MASK)|U_GC_CC_MASK;
 643     mask=(mask&~U_GC_CF_MASK)|U_GC_CF_MASK;
 644     mask=(mask&~U_GC_CO_MASK)|U_GC_CO_MASK;
 645     mask=(mask&~U_GC_CS_MASK)|U_GC_CS_MASK;
 646
 647     mask=(mask&~U_GC_PD_MASK)|U_GC_PD_MASK;
 648     mask=(mask&~U_GC_PS_MASK)|U_GC_PS_MASK;
 649     mask=(mask&~U_GC_PE_MASK)|U_GC_PE_MASK;
 650     mask=(mask&~U_GC_PC_MASK)|U_GC_PC_MASK;
 651     mask=(mask&~U_GC_PO_MASK)|U_GC_PO_MASK;
 652
 653     mask=(mask&~U_GC_SM_MASK)|U_GC_SM_MASK;
 654     mask=(mask&~U_GC_SC_MASK)|U_GC_SC_MASK;
 655     mask=(mask&~U_GC_SK_MASK)|U_GC_SK_MASK;
 656     mask=(mask&~U_GC_SO_MASK)|U_GC_SO_MASK;
 657
 658     mask=(mask&~U_GC_PI_MASK)|U_GC_PI_MASK;
 659     mask=(mask&~U_GC_PF_MASK)|U_GC_PF_MASK;
 660
 661     if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffffffff)) {
 662         log_err("error: problems with U_GC_XX_MASK constants\n");
 663     }
 664
 665     mask=0;
 666     mask=(mask&~U_GC_C_MASK)|U_GC_C_MASK;
 667     mask=(mask&~U_GC_L_MASK)|U_GC_L_MASK;
 668     mask=(mask&~U_GC_M_MASK)|U_GC_M_MASK;
 669     mask=(mask&~U_GC_N_MASK)|U_GC_N_MASK;
 670     mask=(mask&~U_GC_Z_MASK)|U_GC_Z_MASK;
 671     mask=(mask&~U_GC_P_MASK)|U_GC_P_MASK;
 672     mask=(mask&~U_GC_S_MASK)|U_GC_S_MASK;
 673
 674     if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffffffff)) {
 675         log_err("error: problems with U_GC_Y_MASK constants\n");
 676     }
 677     {
 678         static const UChar32 digit[10]={ 0x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037,0x0038,0x0039 };
 679         for(i=0; i<10; i++){
 680             if(digit[i]!=u_forDigit(i,10)){
 681                 log_err("u_forDigit failed for %i. Expected: 0x%4X Got: 0x%4X\n",i,digit[i],u_forDigit(i,10));
 682             }
 683         }
 684     }
 685
 686     /* test u_digit() */
 687     {
 688         static const struct {
 689             UChar32 c;
 690             int8_t radix, value;
 691         } data[]={
 692             /* base 16 */
 693             { 0x0031, 16, 1 },
 694             { 0x0038, 16, 8 },
 695             { 0x0043, 16, 12 },
 696             { 0x0066, 16, 15 },
 697             { 0x00e4, 16, -1 },
 698             { 0x0662, 16, 2 },
 699             { 0x06f5, 16, 5 },
 700             { 0xff13, 16, 3 },
 701             { 0xff41, 16, 10 },
 702
 703             /* base 8 */
 704             { 0x0031, 8, 1 },
 705             { 0x0038, 8, -1 },
 706             { 0x0043, 8, -1 },
 707             { 0x0066, 8, -1 },
 708             { 0x00e4, 8, -1 },
 709             { 0x0662, 8, 2 },
 710             { 0x06f5, 8, 5 },
 711             { 0xff13, 8, 3 },
 712             { 0xff41, 8, -1 },
 713
 714             /* base 36 */
 715             { 0x5a, 36, 35 },
 716             { 0x7a, 36, 35 },
 717             { 0xff3a, 36, 35 },
 718             { 0xff5a, 36, 35 },
 719
 720             /* wrong radix values */
 721             { 0x0031, 1, -1 },
 722             { 0xff3a, 37, -1 }
 723         };
 724
 725         for(i=0; i<UPRV_LENGTHOF(data); ++i) {
 726             if(u_digit(data[i].c, data[i].radix)!=data[i].value) {
 727                 log_err("u_digit(U+%04x, %d)=%d expected %d\n",
 728                         data[i].c,
 729                         data[i].radix,
 730                         u_digit(data[i].c, data[i].radix),
 731                         data[i].value);
 732             }
 733         }
 734     }
 735 }
 736
 737 /* test C/POSIX-style functions --------------------------------------------- */
 738
 739 /* bit flags */
 740 #define ISAL     1
 741 #define ISLO     2
 742 #define ISUP     4
 743
 744 #define ISDI     8
 745 #define ISXD  0x10
 746
 747 #define ISAN  0x20
 748
 749 #define ISPU  0x40
 750 #define ISGR  0x80
 751 #define ISPR 0x100
 752
 753 #define ISSP 0x200
 754 #define ISBL 0x400
 755 #define ISCN 0x800
 756
 757 /* C/POSIX-style functions, in the same order as the bit flags */
 758 typedef UBool U_EXPORT2 IsPOSIXClass(UChar32 c);
 759
 760 static const struct {
 761     IsPOSIXClass *fn;
 762     const char *name;
 763 } posixClasses[]={
 764     { u_isalpha, "isalpha" },
 765     { u_islower, "islower" },
 766     { u_isupper, "isupper" },
 767     { u_isdigit, "isdigit" },
 768     { u_isxdigit, "isxdigit" },
 769     { u_isalnum, "isalnum" },
 770     { u_ispunct, "ispunct" },
 771     { u_isgraph, "isgraph" },
 772     { u_isprint, "isprint" },
 773     { u_isspace, "isspace" },
 774     { u_isblank, "isblank" },
 775     { u_iscntrl, "iscntrl" }
 776 };
 777
 778 static const struct {
 779     UChar32 c;
 780     uint32_t posixResults;
 781 } posixData[]={
 782     { 0x0008,                                                        ISCN },    /* backspace */
 783     { 0x0009,                                              ISSP|ISBL|ISCN },    /* TAB */
 784     { 0x000a,                                              ISSP|     ISCN },    /* LF */
 785     { 0x000c,                                              ISSP|     ISCN },    /* FF */
 786     { 0x000d,                                              ISSP|     ISCN },    /* CR */
 787     { 0x0020,                                         ISPR|ISSP|ISBL      },    /* space */
 788     { 0x0021,                               ISPU|ISGR|ISPR                },    /* ! */
 789     { 0x0033,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* 3 */
 790     { 0x0040,                               ISPU|ISGR|ISPR                },    /* @ */
 791     { 0x0041, ISAL|     ISUP|     ISXD|ISAN|     ISGR|ISPR                },    /* A */
 792     { 0x007a, ISAL|ISLO|               ISAN|     ISGR|ISPR                },    /* z */
 793     { 0x007b,                               ISPU|ISGR|ISPR                },    /* { */
 794     { 0x0085,                                              ISSP|     ISCN },    /* NEL */
 795     { 0x00a0,                                         ISPR|ISSP|ISBL      },    /* NBSP */
 796     { 0x00a4,                                    ISGR|ISPR                },    /* currency sign */
 797     { 0x00e4, ISAL|ISLO|               ISAN|     ISGR|ISPR                },    /* a-umlaut */
 798     { 0x0300,                                    ISGR|ISPR                },    /* combining grave */
 799     { 0x0600,                                                        ISCN },    /* arabic number sign */
 800     { 0x0627, ISAL|                    ISAN|     ISGR|ISPR                },    /* alef */
 801     { 0x0663,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* arabic 3 */
 802     { 0x2002,                                         ISPR|ISSP|ISBL      },    /* en space */
 803     { 0x2007,                                         ISPR|ISSP|ISBL      },    /* figure space */
 804     { 0x2009,                                         ISPR|ISSP|ISBL      },    /* thin space */
 805     { 0x200b,                                                        ISCN },    /* ZWSP */
 806   /*{ 0x200b,                                         ISPR|ISSP           },*/    /* ZWSP */ /* ZWSP became a control char in 4.0.1*/
 807     { 0x200e,                                                        ISCN },    /* LRM */
 808     { 0x2028,                                         ISPR|ISSP|     ISCN },    /* LS */
 809     { 0x2029,                                         ISPR|ISSP|     ISCN },    /* PS */
 810     { 0x20ac,                                    ISGR|ISPR                },    /* Euro */
 811     { 0xff15,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth 5 */
 812     { 0xff25, ISAL|     ISUP|     ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth E */
 813     { 0xff35, ISAL|     ISUP|          ISAN|     ISGR|ISPR                },    /* fullwidth U */
 814     { 0xff45, ISAL|ISLO|          ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth e */
 815     { 0xff55, ISAL|ISLO|               ISAN|     ISGR|ISPR                }     /* fullwidth u */
 816 };
 817
 818 static void
 819 TestPOSIX() {
 820     uint32_t mask;
 821     int32_t cl, i;
 822     UBool expect;
 823
 824     mask=1;
 825     for(cl=0; cl<12; ++cl) {
 826         for(i=0; i<UPRV_LENGTHOF(posixData); ++i) {
 827             expect=(UBool)((posixData[i].posixResults&mask)!=0);
 828             if(posixClasses[cl].fn(posixData[i].c)!=expect) {
 829                 log_err("u_%s(U+%04x)=%s is wrong\n",
 830                     posixClasses[cl].name, posixData[i].c, expect ? "FALSE" : "TRUE");
 831             }
 832         }
 833         mask<<=1;
 834     }
 835 }
 836
 837 /* Tests for isControl(u_iscntrl()) and isPrintable(u_isprint()) */
 838 static void TestControlPrint()
 839 {
 840     const UChar32 sampleControl[] = {0x1b, 0x97, 0x82, 0x2028, 0x2029, 0x200c, 0x202b};
 841     const UChar32 sampleNonControl[] = {0x61, 0x0031, 0x00e2};
 842     const UChar32 samplePrintable[] = {0x0042, 0x005f, 0x2014};
 843     const UChar32 sampleNonPrintable[] = {0x200c, 0x009f, 0x001b};
 844     UChar32 c;
 845
 846     testSampleCharProps(u_iscntrl, "u_iscntrl", sampleControl, UPRV_LENGTHOF(sampleControl), TRUE);
 847     testSampleCharProps(u_iscntrl, "u_iscntrl", sampleNonControl, UPRV_LENGTHOF(sampleNonControl), FALSE);
 848
 849     testSampleCharProps(u_isprint, "u_isprint",
 850                         samplePrintable, UPRV_LENGTHOF(samplePrintable), TRUE);
 851     testSampleCharProps(u_isprint, "u_isprint",
 852                         sampleNonPrintable, UPRV_LENGTHOF(sampleNonPrintable), FALSE);
 853
 854     /* test all ISO 8 controls */
 855     for(c=0; c<=0x9f; ++c) {
 856         if(c==0x20) {
 857             /* skip ASCII graphic characters and continue with DEL */
 858             c=0x7f;
 859         }
 860         if(!u_iscntrl(c)) {
 861             log_err("error: u_iscntrl(ISO 8 control U+%04x)=FALSE\n", c);
 862         }
 863         if(!u_isISOControl(c)) {
 864             log_err("error: u_isISOControl(ISO 8 control U+%04x)=FALSE\n", c);
 865         }
 866         if(u_isprint(c)) {
 867             log_err("error: u_isprint(ISO 8 control U+%04x)=TRUE\n", c);
 868         }
 869     }
 870
 871     /* test all Latin-1 graphic characters */
 872     for(c=0x20; c<=0xff; ++c) {
 873         if(c==0x7f) {
 874             c=0xa0;
 875         } else if(c==0xad) {
 876             /* Unicode 4 changes 00AD Soft Hyphen to Cf (and it is in fact not printable) */
 877             ++c;
 878         }
 879         if(!u_isprint(c)) {
 880             log_err("error: u_isprint(Latin-1 graphic character U+%04x)=FALSE\n", c);
 881         }
 882     }
 883 }
 884
 885 /* u_isJavaIDStart, u_isJavaIDPart, u_isIDStart(), u_isIDPart(), u_isIDIgnorable()*/
 886 static void TestIdentifier()
 887 {
 888     const UChar32 sampleJavaIDStart[] = {0x0071, 0x00e4, 0x005f};
 889     const UChar32 sampleNonJavaIDStart[] = {0x0020, 0x2030, 0x0082};
 890     const UChar32 sampleJavaIDPart[] = {0x005f, 0x0032, 0x0045};
 891     const UChar32 sampleNonJavaIDPart[] = {0x2030, 0x2020, 0x0020};
 892     const UChar32 sampleUnicodeIDStart[] = {0x0250, 0x00e2, 0x0061};
 893     const UChar32 sampleNonUnicodeIDStart[] = {0x2000, 0x000a, 0x2019};
 894     const UChar32 sampleUnicodeIDPart[] = {0x005f, 0x0032, 0x0045};
 895     const UChar32 sampleNonUnicodeIDPart[] = {0x2030, 0x00a3, 0x0020};
 896     const UChar32 sampleIDIgnore[] = {0x0006, 0x0010, 0x206b, 0x85};
 897     const UChar32 sampleNonIDIgnore[] = {0x0075, 0x00a3, 0x0061};
 898
 899     testSampleCharProps(u_isJavaIDStart, "u_isJavaIDStart",
 900                         sampleJavaIDStart, UPRV_LENGTHOF(sampleJavaIDStart), TRUE);
 901     testSampleCharProps(u_isJavaIDStart, "u_isJavaIDStart",
 902                         sampleNonJavaIDStart, UPRV_LENGTHOF(sampleNonJavaIDStart), FALSE);
 903
 904     testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
 905                         sampleJavaIDPart, UPRV_LENGTHOF(sampleJavaIDPart), TRUE);
 906     testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
 907                         sampleNonJavaIDPart, UPRV_LENGTHOF(sampleNonJavaIDPart), FALSE);
 908
 909     /* IDPart should imply IDStart */
 910     testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
 911                         sampleJavaIDStart, UPRV_LENGTHOF(sampleJavaIDStart), TRUE);
 912
 913     testSampleCharProps(u_isIDStart, "u_isIDStart",
 914                         sampleUnicodeIDStart, UPRV_LENGTHOF(sampleUnicodeIDStart), TRUE);
 915     testSampleCharProps(u_isIDStart, "u_isIDStart",
 916                         sampleNonUnicodeIDStart, UPRV_LENGTHOF(sampleNonUnicodeIDStart), FALSE);
 917
 918     testSampleCharProps(u_isIDPart, "u_isIDPart",
 919                         sampleUnicodeIDPart, UPRV_LENGTHOF(sampleUnicodeIDPart), TRUE);
 920     testSampleCharProps(u_isIDPart, "u_isIDPart",
 921                         sampleNonUnicodeIDPart, UPRV_LENGTHOF(sampleNonUnicodeIDPart), FALSE);
 922
 923     /* IDPart should imply IDStart */
 924     testSampleCharProps(u_isIDPart, "u_isIDPart",
 925                         sampleUnicodeIDStart, UPRV_LENGTHOF(sampleUnicodeIDStart), TRUE);
 926
 927     testSampleCharProps(u_isIDIgnorable, "u_isIDIgnorable",
 928                         sampleIDIgnore, UPRV_LENGTHOF(sampleIDIgnore), TRUE);
 929     testSampleCharProps(u_isIDIgnorable, "u_isIDIgnorable",
 930                         sampleNonIDIgnore, UPRV_LENGTHOF(sampleNonIDIgnore), FALSE);
 931 }
 932
 933 /* for each line of UnicodeData.txt, check some of the properties */
 934 typedef struct UnicodeDataContext {
 935 #if UCONFIG_NO_NORMALIZATION
 936     const void *dummy;
 937 #else
 938     const UNormalizer2 *nfc;
 939     const UNormalizer2 *nfkc;
 940 #endif
 941 } UnicodeDataContext;
 942
 943 /*
 944  * ### TODO
 945  * This test fails incorrectly if the First or Last code point of a repetitive area
 946  * is overridden, which is allowed and is encouraged for the PUAs.
 947  * Currently, this means that both area First/Last and override lines are
 948  * tested against the properties from the API,
 949  * and the area boundary will not match and cause an error.
 950  *
 951  * This function should detect area boundaries and skip them for the test of individual
 952  * code points' properties.
 953  * Then it should check that the areas contain all the same properties except where overridden.
 954  * For this, it would have had to set a flag for which code points were listed explicitly.
 955  */
 956 static void U_CALLCONV
 957 unicodeDataLineFn(void *context,
 958                   char *fields[][2], int32_t fieldCount,
 959                   UErrorCode *pErrorCode)
 960 {
 961     char buffer[100];
 962     const char *d;
 963     char *end;
 964     uint32_t value;
 965     UChar32 c;
 966     int32_t i;
 967     int8_t type;
 968     int32_t dt;
 969     UChar dm[32], s[32];
 970     int32_t dmLength, length;
 971
 972 #if !UCONFIG_NO_NORMALIZATION
 973     const UNormalizer2 *nfc, *nfkc;
 974 #endif
 975
 976     /* get the character code, field 0 */
 977     c=strtoul(fields[0][0], &end, 16);
 978     if(end<=fields[0][0] || end!=fields[0][1]) {
 979         log_err("error: syntax error in field 0 at %s\n", fields[0][0]);
 980         return;
 981     }
 982     if((uint32_t)c>=UCHAR_MAX_VALUE + 1) {
 983         log_err("error in UnicodeData.txt: code point %lu out of range\n", c);
 984         return;
 985     }
 986
 987     /* get general category, field 2 */
 988     *fields[2][1]=0;
 989     type = (int8_t)tagValues[MakeProp(fields[2][0])];
 990     if(u_charType(c)!=type) {
 991         log_err("error: u_charType(U+%04lx)==%u instead of %u\n", c, u_charType(c), type);
 992     }
 993     if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
 994         log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
 995     }
 996
 997     /* get canonical combining class, field 3 */
 998     value=strtoul(fields[3][0], &end, 10);
 999     if(end<=fields[3][0] || end!=fields[3][1]) {
1000         log_err("error: syntax error in field 3 at code 0x%lx\n", c);
1001         return;
1002     }
1003     if(value>255) {
1004         log_err("error in UnicodeData.txt: combining class %lu out of range\n", value);
1005         return;
1006     }
1007 #if !UCONFIG_NO_NORMALIZATION
1008     if(value!=u_getCombiningClass(c) || value!=(uint32_t)u_getIntPropertyValue(c, UCHAR_CANONICAL_COMBINING_CLASS)) {
1009         log_err("error: u_getCombiningClass(U+%04lx)==%hu instead of %lu\n", c, u_getCombiningClass(c), value);
1010     }
1011     nfkc=((UnicodeDataContext *)context)->nfkc;
1012     if(value!=unorm2_getCombiningClass(nfkc, c)) {
1013         log_err("error: unorm2_getCombiningClass(nfkc, U+%04lx)==%hu instead of %lu\n", c, unorm2_getCombiningClass(nfkc, c), value);
1014     }
1015 #endif
1016
1017     /* get BiDi category, field 4 */
1018     *fields[4][1]=0;
1019     i=MakeDir(fields[4][0]);
1020 #if U_ICU_VERSION_MAJOR_NUM!=59
1021     // TODO: Remove this version check, see ticket #13061.
1022     if(i!=u_charDirection(c) || i!=u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)) {
1023         log_err("error: u_charDirection(U+%04lx)==%u instead of %u (%s)\n", c, u_charDirection(c), MakeDir(fields[4][0]), fields[4][0]);
1024     }
1025 #endif
1026
1027     /* get Decomposition_Type & Decomposition_Mapping, field 5 */
1028     d=NULL;
1029     if(fields[5][0]==fields[5][1]) {
1030         /* no decomposition, except UnicodeData.txt omits Hangul syllable decompositions */
1031         if(c==0xac00 || c==0xd7a3) {
1032             dt=U_DT_CANONICAL;
1033         } else {
1034             dt=U_DT_NONE;
1035         }
1036     } else {
1037         d=fields[5][0];
1038         *fields[5][1]=0;
1039         dt=UCHAR_INVALID_CODE;
1040         if(*d=='<') {
1041             end=strchr(++d, '>');
1042             if(end!=NULL) {
1043                 *end=0;
1044                 dt=u_getPropertyValueEnum(UCHAR_DECOMPOSITION_TYPE, d);
1045                 d=u_skipWhitespace(end+1);
1046             }
1047         } else {
1048             dt=U_DT_CANONICAL;
1049         }
1050     }
1051     if(dt>U_DT_NONE) {
1052         if(c==0xac00) {
1053             dm[0]=0x1100;
1054             dm[1]=0x1161;
1055             dm[2]=0;
1056             dmLength=2;
1057         } else if(c==0xd7a3) {
1058             dm[0]=0xd788;
1059             dm[1]=0x11c2;
1060             dm[2]=0;
1061             dmLength=2;
1062         } else {
1063             dmLength=u_parseString(d, dm, 32, NULL, pErrorCode);
1064         }
1065     } else {
1066         dmLength=-1;
1067     }
1068     if(dt<0 || U_FAILURE(*pErrorCode)) {
1069         log_err("error in UnicodeData.txt: syntax error in U+%04lX decomposition field\n", (long)c);
1070         return;
1071     }
1072 #if !UCONFIG_NO_NORMALIZATION
1073     i=u_getIntPropertyValue(c, UCHAR_DECOMPOSITION_TYPE);
1074     if(i!=dt) {
1075         log_err("error: u_getIntPropertyValue(U+%04lx, UCHAR_DECOMPOSITION_TYPE)==%d instead of %d\n", c, i, dt);
1076     }
1077     /* Expect Decomposition_Mapping=nfkc.getRawDecomposition(c). */
1078     length=unorm2_getRawDecomposition(nfkc, c, s, 32, pErrorCode);
1079     if(U_FAILURE(*pErrorCode) || length!=dmLength || (length>0 && 0!=u_strcmp(s, dm))) {
1080         log_err("error: unorm2_getRawDecomposition(nfkc, U+%04lx)==%d instead of %d "
1081                 "or the Decomposition_Mapping is different (%s)\n",
1082                 c, length, dmLength, u_errorName(*pErrorCode));
1083         return;
1084     }
1085     /* For canonical decompositions only, expect Decomposition_Mapping=nfc.getRawDecomposition(c). */
1086     if(dt!=U_DT_CANONICAL) {
1087         dmLength=-1;
1088     }
1089     nfc=((UnicodeDataContext *)context)->nfc;
1090     length=unorm2_getRawDecomposition(nfc, c, s, 32, pErrorCode);
1091     if(U_FAILURE(*pErrorCode) || length!=dmLength || (length>0 && 0!=u_strcmp(s, dm))) {
1092         log_err("error: unorm2_getRawDecomposition(nfc, U+%04lx)==%d instead of %d "
1093                 "or the Decomposition_Mapping is different (%s)\n",
1094                 c, length, dmLength, u_errorName(*pErrorCode));
1095         return;
1096     }
1097     /* recompose */
1098     if(dt==U_DT_CANONICAL && !u_hasBinaryProperty(c, UCHAR_FULL_COMPOSITION_EXCLUSION)) {
1099         UChar32 a, b, composite;
1100         i=0;
1101         U16_NEXT(dm, i, dmLength, a);
1102         U16_NEXT(dm, i, dmLength, b);
1103         /* i==dmLength */
1104         composite=unorm2_composePair(nfc, a, b);
1105         if(composite!=c) {
1106             log_err("error: nfc U+%04lX decomposes to U+%04lX+U+%04lX but does not compose back (instead U+%04lX)\n",
1107                     (long)c, (long)a, (long)b, (long)composite);
1108         }
1109         /*
1110          * Note: NFKC has fewer round-trip mappings than NFC,
1111          * so we can't just test unorm2_composePair(nfkc, a, b) here without further data.
1112          */
1113     }
1114 #endif
1115
1116     /* get ISO Comment, field 11 */
1117     *fields[11][1]=0;
1118     i=u_getISOComment(c, buffer, sizeof(buffer), pErrorCode);
1119     if(U_FAILURE(*pErrorCode) || 0!=strcmp(fields[11][0], buffer)) {
1120         log_err_status(*pErrorCode, "error: u_getISOComment(U+%04lx) wrong (%s): \"%s\" should be \"%s\"\n",
1121             c, u_errorName(*pErrorCode),
1122             U_FAILURE(*pErrorCode) ? buffer : "[error]",
1123             fields[11][0]);
1124     }
1125
1126     /* get uppercase mapping, field 12 */
1127     if(fields[12][0]!=fields[12][1]) {
1128         value=strtoul(fields[12][0], &end, 16);
1129         if(end!=fields[12][1]) {
1130             log_err("error: syntax error in field 12 at code 0x%lx\n", c);
1131             return;
1132         }
1133         if((UChar32)value!=u_toupper(c)) {
1134             log_err("error: u_toupper(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_toupper(c), value);
1135         }
1136     } else {
1137         /* no case mapping: the API must map the code point to itself */
1138         if(c!=u_toupper(c)) {
1139             log_err("error: U+%04lx does not have an uppercase mapping but u_toupper()==U+%04lx\n", c, u_toupper(c));
1140         }
1141     }
1142
1143     /* get lowercase mapping, field 13 */
1144     if(fields[13][0]!=fields[13][1]) {
1145         value=strtoul(fields[13][0], &end, 16);
1146         if(end!=fields[13][1]) {
1147             log_err("error: syntax error in field 13 at code 0x%lx\n", c);
1148             return;
1149         }
1150         if((UChar32)value!=u_tolower(c)) {
1151             log_err("error: u_tolower(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_tolower(c), value);
1152         }
1153     } else {
1154         /* no case mapping: the API must map the code point to itself */
1155         if(c!=u_tolower(c)) {
1156             log_err("error: U+%04lx does not have a lowercase mapping but u_tolower()==U+%04lx\n", c, u_tolower(c));
1157         }
1158     }
1159
1160     /* get titlecase mapping, field 14 */
1161     if(fields[14][0]!=fields[14][1]) {
1162         value=strtoul(fields[14][0], &end, 16);
1163         if(end!=fields[14][1]) {
1164             log_err("error: syntax error in field 14 at code 0x%lx\n", c);
1165             return;
1166         }
1167         if((UChar32)value!=u_totitle(c)) {
1168             log_err("error: u_totitle(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_totitle(c), value);
1169         }
1170     } else {
1171         /* no case mapping: the API must map the code point to itself */
1172         if(c!=u_totitle(c)) {
1173             log_err("error: U+%04lx does not have a titlecase mapping but u_totitle()==U+%04lx\n", c, u_totitle(c));
1174         }
1175     }
1176 }
1177
1178 static UBool U_CALLCONV
1179 enumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) {
1180     static const UChar32 test[][2]={
1181         {0x41, U_UPPERCASE_LETTER},
1182         {0x308, U_NON_SPACING_MARK},
1183         {0xfffe, U_GENERAL_OTHER_TYPES},
1184         {0xe0041, U_FORMAT_CHAR},
1185         {0xeffff, U_UNASSIGNED}
1186     };
1187
1188     int32_t i, count;
1189
1190     if(0!=strcmp((const char *)context, "a1")) {
1191         log_err("error: u_enumCharTypes() passes on an incorrect context pointer\n");
1192         return FALSE;
1193     }
1194
1195     count=UPRV_LENGTHOF(test);
1196     for(i=0; i<count; ++i) {
1197         if(start<=test[i][0] && test[i][0]<limit) {
1198             if(type!=(UCharCategory)test[i][1]) {
1199                 log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld instead of U+%04lx with %ld\n",
1200                         start, limit, (long)type, test[i][0], test[i][1]);
1201             }
1202             /* stop at the range that includes the last test code point (increases code coverage for enumeration) */
1203             return i==(count-1) ? FALSE : TRUE;
1204         }
1205     }
1206
1207     if(start>test[count-1][0]) {
1208         log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld after it should have stopped\n",
1209                 start, limit, (long)type);
1210         return FALSE;
1211     }
1212
1213     return TRUE;
1214 }
1215
1216 static UBool U_CALLCONV
1217 enumDefaultsRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) {
1218     /* default Bidi classes for unassigned code points, from the DerivedBidiClass.txt header */
1219     static const int32_t defaultBidi[][2]={ /* { limit, class } */
1220         { 0x0590, U_LEFT_TO_RIGHT },
1221         { 0x0600, U_RIGHT_TO_LEFT },
1222         { 0x07C0, U_RIGHT_TO_LEFT_ARABIC },
1223         { 0x08A0, U_RIGHT_TO_LEFT },
1224         { 0x0900, U_RIGHT_TO_LEFT_ARABIC },  /* Unicode 6.1 changes U+08A0..U+08FF from R to AL */
1225         { 0x20A0, U_LEFT_TO_RIGHT },
1226         { 0x20D0, U_EUROPEAN_NUMBER_TERMINATOR },  /* Unicode 6.3 changes the currency symbols block U+20A0..U+20CF to default to ET not L */
1227         { 0xFB1D, U_LEFT_TO_RIGHT },
1228         { 0xFB50, U_RIGHT_TO_LEFT },
1229         { 0xFE00, U_RIGHT_TO_LEFT_ARABIC },
1230         { 0xFE70, U_LEFT_TO_RIGHT },
1231         { 0xFF00, U_RIGHT_TO_LEFT_ARABIC },
1232         { 0x10800, U_LEFT_TO_RIGHT },
1233         { 0x11000, U_RIGHT_TO_LEFT },
1234         { 0x1E800, U_LEFT_TO_RIGHT },  /* new default-R range in Unicode 5.2: U+1E800 - U+1EFFF */
1235         { 0x1EE00, U_RIGHT_TO_LEFT },
1236         { 0x1EF00, U_RIGHT_TO_LEFT_ARABIC },  /* Unicode 6.1 changes U+1EE00..U+1EEFF from R to AL */
1237         { 0x1F000, U_RIGHT_TO_LEFT },
1238         { 0x110000, U_LEFT_TO_RIGHT }
1239     };
1240
1241     UChar32 c;
1242     int32_t i;
1243     UCharDirection shouldBeDir;
1244
1245     /*
1246      * LineBreak.txt specifies:
1247      *   #  - Assigned characters that are not listed explicitly are given the value
1248      *   #    "AL".
1249      *   #  - Unassigned characters are given the value "XX".
1250      *
1251      * PUA characters are listed explicitly with "XX".
1252      * Verify that no assigned character has "XX".
1253      */
1254     if(type!=U_UNASSIGNED && type!=U_PRIVATE_USE_CHAR) {
1255         c=start;
1256         while(c<limit) {
1257             if(0==u_getIntPropertyValue(c, UCHAR_LINE_BREAK)) {
1258                 log_err("error UCHAR_LINE_BREAK(assigned U+%04lx)=XX\n", c);
1259             }
1260             ++c;
1261         }
1262     }
1263
1264     /*
1265      * Verify default Bidi classes.
1266      * See DerivedBidiClass.txt, especially for unassigned code points.
1267      */
1268     if(type==U_UNASSIGNED || type==U_PRIVATE_USE_CHAR) {
1269         /* enumerate the intersections of defaultBidi ranges with [start..limit[ */
1270         c=start;
1271         for(i=0; i<UPRV_LENGTHOF(defaultBidi) && c<limit; ++i) {
1272             if((int32_t)c<defaultBidi[i][0]) {
1273                 while(c<limit && (int32_t)c<defaultBidi[i][0]) {
1274                     if(U_IS_UNICODE_NONCHAR(c) || u_hasBinaryProperty(c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT)) {
1275                         shouldBeDir=U_BOUNDARY_NEUTRAL;
1276                     } else {
1277                         shouldBeDir=(UCharDirection)defaultBidi[i][1];
1278                     }
1279
1280 #if U_ICU_VERSION_MAJOR_NUM!=59
1281 // TODO: Remove this version check, see ticket #13061.
1282                     if( u_charDirection(c)!=shouldBeDir ||
1283                         u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)!=shouldBeDir
1284                     ) {
1285                         log_err("error: u_charDirection(unassigned/PUA U+%04lx)=%s should be %s\n",
1286                             c, dirStrings[u_charDirection(c)], dirStrings[shouldBeDir]);
1287                     }
1288 #endif
1289                     ++c;
1290                 }
1291             }
1292         }
1293     }
1294
1295     return TRUE;
1296 }
1297
1298 /* tests for several properties */
1299 static void TestUnicodeData()
1300 {
1301     UVersionInfo expectVersionArray;
1302     UVersionInfo versionArray;
1303     char *fields[15][2];
1304     UErrorCode errorCode;
1305     UChar32 c;
1306     int8_t type;
1307
1308     UnicodeDataContext context;
1309
1310     u_versionFromString(expectVersionArray, U_UNICODE_VERSION);
1311     u_getUnicodeVersion(versionArray);
1312     if(memcmp(versionArray, expectVersionArray, U_MAX_VERSION_LENGTH) != 0)
1313     {
1314         log_err("Testing u_getUnicodeVersion() - expected " U_UNICODE_VERSION " got %d.%d.%d.%d\n",
1315         versionArray[0], versionArray[1], versionArray[2], versionArray[3]);
1316     }
1317
1318 #if defined(ICU_UNICODE_VERSION)
1319     /* test only happens where we have configure.in with UNICODE_VERSION - sanity check. */
1320     if(strcmp(U_UNICODE_VERSION, ICU_UNICODE_VERSION))
1321     {
1322          log_err("Testing configure.in's ICU_UNICODE_VERSION - expected " U_UNICODE_VERSION " got " ICU_UNICODE_VERSION "\n");
1323     }
1324 #endif
1325
1326     if (ublock_getCode((UChar)0x0041) != UBLOCK_BASIC_LATIN || u_getIntPropertyValue(0x41, UCHAR_BLOCK)!=(int32_t)UBLOCK_BASIC_LATIN) {
1327         log_err("ublock_getCode(U+0041) property failed! Expected : %i Got: %i \n", UBLOCK_BASIC_LATIN,ublock_getCode((UChar)0x0041));
1328     }
1329
1330     errorCode=U_ZERO_ERROR;
1331 #if !UCONFIG_NO_NORMALIZATION
1332     context.nfc=unorm2_getNFCInstance(&errorCode);
1333     context.nfkc=unorm2_getNFKCInstance(&errorCode);
1334     if(U_FAILURE(errorCode)) {
1335         log_data_err("error: unable to open an NFC or NFKC UNormalizer2 - %s\n", u_errorName(errorCode));
1336         return;
1337     }
1338 #endif
1339     parseUCDFile("UnicodeData.txt", fields, 15, unicodeDataLineFn, &context, &errorCode);
1340     if(U_FAILURE(errorCode)) {
1341         return; /* if we couldn't parse UnicodeData.txt, we should return */
1342     }
1343
1344     /* sanity check on repeated properties */
1345     for(c=0xfffe; c<=0x10ffff;) {
1346         type=u_charType(c);
1347         if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
1348             log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
1349         }
1350         if(type!=U_UNASSIGNED) {
1351             log_err("error: u_charType(U+%04lx)!=U_UNASSIGNED (returns %d)\n", c, u_charType(c));
1352         }
1353         if((c&0xffff)==0xfffe) {
1354             ++c;
1355         } else {
1356             c+=0xffff;
1357         }
1358     }
1359
1360     /* test that PUA is not "unassigned" */
1361     for(c=0xe000; c<=0x10fffd;) {
1362         type=u_charType(c);
1363         if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
1364             log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
1365         }
1366         if(type==U_UNASSIGNED) {
1367             log_err("error: u_charType(U+%04lx)==U_UNASSIGNED\n", c);
1368         } else if(type!=U_PRIVATE_USE_CHAR) {
1369             log_verbose("PUA override: u_charType(U+%04lx)=%d\n", c, type);
1370         }
1371         if(c==0xf8ff) {
1372             c=0xf0000;
1373         } else if(c==0xffffd) {
1374             c=0x100000;
1375         } else {
1376             ++c;
1377         }
1378     }
1379
1380     /* test u_enumCharTypes() */
1381     u_enumCharTypes(enumTypeRange, "a1");
1382
1383     /* check default properties */
1384     u_enumCharTypes(enumDefaultsRange, NULL);
1385 }
1386
1387 static void TestCodeUnit(){
1388     const UChar codeunit[]={0x0000,0xe065,0x20ac,0xd7ff,0xd800,0xd841,0xd905,0xdbff,0xdc00,0xdc02,0xddee,0xdfff,0};
1389
1390     int32_t i;
1391
1392     for(i=0; i<UPRV_LENGTHOF(codeunit); i++){
1393         UChar c=codeunit[i];
1394         if(i<4){
1395             if(!(UTF_IS_SINGLE(c)) || (UTF_IS_LEAD(c)) || (UTF_IS_TRAIL(c)) ||(UTF_IS_SURROGATE(c))){
1396                 log_err("ERROR: U+%04x is a single", c);
1397             }
1398
1399         }
1400         if(i >= 4 && i< 8){
1401             if(!(UTF_IS_LEAD(c)) || UTF_IS_SINGLE(c) || UTF_IS_TRAIL(c) || !(UTF_IS_SURROGATE(c))){
1402                 log_err("ERROR: U+%04x is a first surrogate", c);
1403             }
1404         }
1405         if(i >= 8 && i< 12){
1406             if(!(UTF_IS_TRAIL(c)) || UTF_IS_SINGLE(c) || UTF_IS_LEAD(c) || !(UTF_IS_SURROGATE(c))){
1407                 log_err("ERROR: U+%04x is a second surrogate", c);
1408             }
1409         }
1410     }
1411
1412 }
1413
1414 static void TestCodePoint(){
1415     const UChar32 codePoint[]={
1416         /*surrogate, notvalid(codepoint), not a UnicodeChar, not Error */
1417         0xd800,
1418         0xdbff,
1419         0xdc00,
1420         0xdfff,
1421         0xdc04,
1422         0xd821,
1423         /*not a surrogate, valid, isUnicodeChar , not Error*/
1424         0x20ac,
1425         0xd7ff,
1426         0xe000,
1427         0xe123,
1428         0x0061,
1429         0xe065,
1430         0x20402,
1431         0x24506,
1432         0x23456,
1433         0x20402,
1434         0x10402,
1435         0x23456,
1436         /*not a surrogate, not valid, isUnicodeChar, isError */
1437         0x0015,
1438         0x009f,
1439         /*not a surrogate, not valid, not isUnicodeChar, isError */
1440         0xffff,
1441         0xfffe,
1442     };
1443     int32_t i;
1444     for(i=0; i<UPRV_LENGTHOF(codePoint); i++){
1445         UChar32 c=codePoint[i];
1446         if(i<6){
1447             if(!UTF_IS_SURROGATE(c) || !U_IS_SURROGATE(c) || !U16_IS_SURROGATE(c)){
1448                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1449             }
1450             if(UTF_IS_VALID(c)){
1451                 log_err("ERROR: isValid() failed for U+%04x\n", c);
1452             }
1453             if(UTF_IS_UNICODE_CHAR(c) || U_IS_UNICODE_CHAR(c)){
1454                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1455             }
1456             if(UTF_IS_ERROR(c)){
1457                 log_err("ERROR: isError() failed for U+%04x\n", c);
1458             }
1459         }else if(i >=6 && i<18){
1460             if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
1461                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1462             }
1463             if(!UTF_IS_VALID(c)){
1464                 log_err("ERROR: isValid() failed for U+%04x\n", c);
1465             }
1466             if(!UTF_IS_UNICODE_CHAR(c) || !U_IS_UNICODE_CHAR(c)){
1467                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1468             }
1469             if(UTF_IS_ERROR(c)){
1470                 log_err("ERROR: isError() failed for U+%04x\n", c);
1471             }
1472         }else if(i >=18 && i<20){
1473             if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
1474                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1475             }
1476             if(UTF_IS_VALID(c)){
1477                 log_err("ERROR: isValid() failed for U+%04x\n", c);
1478             }
1479             if(!UTF_IS_UNICODE_CHAR(c) || !U_IS_UNICODE_CHAR(c)){
1480                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1481             }
1482             if(!UTF_IS_ERROR(c)){
1483                 log_err("ERROR: isError() failed for U+%04x\n", c);
1484             }
1485         }
1486         else if(i >=18 && i<UPRV_LENGTHOF(codePoint)){
1487             if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
1488                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1489             }
1490             if(UTF_IS_VALID(c)){
1491                 log_err("ERROR: isValid() failed for U+%04x\n", c);
1492             }
1493             if(UTF_IS_UNICODE_CHAR(c) || U_IS_UNICODE_CHAR(c)){
1494                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1495             }
1496             if(!UTF_IS_ERROR(c)){
1497                 log_err("ERROR: isError() failed for U+%04x\n", c);
1498             }
1499         }
1500     }
1501
1502     if(
1503         !U_IS_BMP(0) || !U_IS_BMP(0x61) || !U_IS_BMP(0x20ac) ||
1504         !U_IS_BMP(0xd9da) || !U_IS_BMP(0xdfed) || !U_IS_BMP(0xffff) ||
1505         U_IS_BMP(U_SENTINEL) || U_IS_BMP(0x10000) || U_IS_BMP(0x50005) ||
1506         U_IS_BMP(0x10ffff) || U_IS_BMP(0x110000) || U_IS_BMP(0x7fffffff)
1507     ) {
1508         log_err("error with U_IS_BMP()\n");
1509     }
1510
1511     if(
1512         U_IS_SUPPLEMENTARY(0) || U_IS_SUPPLEMENTARY(0x61) || U_IS_SUPPLEMENTARY(0x20ac) ||
1513         U_IS_SUPPLEMENTARY(0xd9da) || U_IS_SUPPLEMENTARY(0xdfed) || U_IS_SUPPLEMENTARY(0xffff) ||
1514         U_IS_SUPPLEMENTARY(U_SENTINEL) || !U_IS_SUPPLEMENTARY(0x10000) || !U_IS_SUPPLEMENTARY(0x50005) ||
1515         !U_IS_SUPPLEMENTARY(0x10ffff) || U_IS_SUPPLEMENTARY(0x110000) || U_IS_SUPPLEMENTARY(0x7fffffff)
1516     ) {
1517         log_err("error with U_IS_SUPPLEMENTARY()\n");
1518     }
1519 }
1520
1521 static void TestCharLength()
1522 {
1523     const int32_t codepoint[]={
1524         1, 0x0061,
1525         1, 0xe065,
1526         1, 0x20ac,
1527         2, 0x20402,
1528         2, 0x23456,
1529         2, 0x24506,
1530         2, 0x20402,
1531         2, 0x10402,
1532         1, 0xd7ff,
1533         1, 0xe000
1534     };
1535
1536     int32_t i;
1537     UBool multiple;
1538     for(i=0; i<UPRV_LENGTHOF(codepoint); i=(int16_t)(i+2)){
1539         UChar32 c=codepoint[i+1];
1540         if(UTF_CHAR_LENGTH(c) != codepoint[i] || U16_LENGTH(c) != codepoint[i]){
1541             log_err("The no: of code units for U+%04x:- Expected: %d Got: %d\n", c, codepoint[i], U16_LENGTH(c));
1542         }
1543         multiple=(UBool)(codepoint[i] == 1 ? FALSE : TRUE);
1544         if(UTF_NEED_MULTIPLE_UCHAR(c) != multiple){
1545             log_err("ERROR: Unicode::needMultipleUChar() failed for U+%04x\n", c);
1546         }
1547     }
1548 }
1549
1550 /*internal functions ----*/
1551 static int32_t MakeProp(char* str)
1552 {
1553     int32_t result = 0;
1554     char* matchPosition =0;
1555
1556     matchPosition = strstr(tagStrings, str);
1557     if (matchPosition == 0)
1558     {
1559         log_err("unrecognized type letter ");
1560         log_err(str);
1561     }
1562     else
1563         result = (int32_t)((matchPosition - tagStrings) / 2);
1564     return result;
1565 }
1566
1567 static int32_t MakeDir(char* str)
1568 {
1569     int32_t pos = 0;
1570     for (pos = 0; pos < U_CHAR_DIRECTION_COUNT; pos++) {
1571         if (strcmp(str, dirStrings[pos]) == 0) {
1572             return pos;
1573         }
1574     }
1575     return -1;
1576 }
1577
1578 /* test u_charName() -------------------------------------------------------- */
1579
1580 static const struct {
1581     uint32_t code;
1582     const char *name, *oldName, *extName, *alias;
1583 } names[]={
1584     {0x0061, "LATIN SMALL LETTER A", "", "LATIN SMALL LETTER A"},
1585     {0x01a2, "LATIN CAPITAL LETTER OI", "",
1586              "LATIN CAPITAL LETTER OI",
1587              "LATIN CAPITAL LETTER GHA"},
1588     {0x0284, "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK", "",
1589              "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK" },
1590     {0x0fd0, "TIBETAN MARK BSKA- SHOG GI MGO RGYAN", "",
1591              "TIBETAN MARK BSKA- SHOG GI MGO RGYAN",
1592              "TIBETAN MARK BKA- SHOG GI MGO RGYAN"},
1593     {0x3401, "CJK UNIFIED IDEOGRAPH-3401", "", "CJK UNIFIED IDEOGRAPH-3401" },
1594     {0x7fed, "CJK UNIFIED IDEOGRAPH-7FED", "", "CJK UNIFIED IDEOGRAPH-7FED" },
1595     {0xac00, "HANGUL SYLLABLE GA", "", "HANGUL SYLLABLE GA" },
1596     {0xd7a3, "HANGUL SYLLABLE HIH", "", "HANGUL SYLLABLE HIH" },
1597     {0xd800, "", "", "<lead surrogate-D800>" },
1598     {0xdc00, "", "", "<trail surrogate-DC00>" },
1599     {0xff08, "FULLWIDTH LEFT PARENTHESIS", "", "FULLWIDTH LEFT PARENTHESIS" },
1600     {0xffe5, "FULLWIDTH YEN SIGN", "", "FULLWIDTH YEN SIGN" },
1601     {0xffff, "", "", "<noncharacter-FFFF>" },
1602     {0x1d0c5, "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS", "",
1603               "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS",
1604               "BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS"},
1605     {0x23456, "CJK UNIFIED IDEOGRAPH-23456", "", "CJK UNIFIED IDEOGRAPH-23456" }
1606 };
1607
1608 static UBool
1609 enumCharNamesFn(void *context,
1610                 UChar32 code, UCharNameChoice nameChoice,
1611                 const char *name, int32_t length) {
1612     int32_t *pCount=(int32_t *)context;
1613     const char *expected;
1614     int i;
1615
1616     if(length<=0 || length!=(int32_t)strlen(name)) {
1617         /* should not be called with an empty string or invalid length */
1618         log_err("u_enumCharName(0x%lx)=%s but length=%ld\n", name, length);
1619         return TRUE;
1620     }
1621
1622     ++*pCount;
1623     for(i=0; i<UPRV_LENGTHOF(names); ++i) {
1624         if(code==(UChar32)names[i].code) {
1625             switch (nameChoice) {
1626                 case U_EXTENDED_CHAR_NAME:
1627                     if(0!=strcmp(name, names[i].extName)) {
1628                         log_err("u_enumCharName(0x%lx - Extended)=%s instead of %s\n", code, name, names[i].extName);
1629                     }
1630                     break;
1631                 case U_UNICODE_CHAR_NAME:
1632                     if(0!=strcmp(name, names[i].name)) {
1633                         log_err("u_enumCharName(0x%lx)=%s instead of %s\n", code, name, names[i].name);
1634                     }
1635                     break;
1636                 case U_UNICODE_10_CHAR_NAME:
1637                     expected=names[i].oldName;
1638                     if(expected[0]==0 || 0!=strcmp(name, expected)) {
1639                         log_err("u_enumCharName(0x%lx - 1.0)=%s instead of %s\n", code, name, expected);
1640                     }
1641                     break;
1642                 case U_CHAR_NAME_ALIAS:
1643                     expected=names[i].alias;
1644                     if(expected==NULL || expected[0]==0 || 0!=strcmp(name, expected)) {
1645                         log_err("u_enumCharName(0x%lx - alias)=%s instead of %s\n", code, name, expected);
1646                     }
1647                     break;
1648                 case U_CHAR_NAME_CHOICE_COUNT:
1649                     break;
1650             }
1651             break;
1652         }
1653     }
1654     return TRUE;
1655 }
1656
1657 struct enumExtCharNamesContext {
1658     uint32_t length;
1659     int32_t last;
1660 };
1661
1662 static UBool
1663 enumExtCharNamesFn(void *context,
1664                 UChar32 code, UCharNameChoice nameChoice,
1665                 const char *name, int32_t length) {
1666     struct enumExtCharNamesContext *ecncp = (struct enumExtCharNamesContext *) context;
1667
1668     if (ecncp->last != (int32_t) code - 1) {
1669         if (ecncp->last < 0) {
1670             log_err("u_enumCharName(0x%lx - Ext) after u_enumCharName(0x%lx - Ext) instead of u_enumCharName(0x%lx - Ext)\n", code, ecncp->last, ecncp->last + 1);
1671         } else {
1672             log_err("u_enumCharName(0x%lx - Ext) instead of u_enumCharName(0x0 - Ext)\n", code);
1673         }
1674     }
1675     ecncp->last = (int32_t) code;
1676
1677     if (!*name) {
1678         log_err("u_enumCharName(0x%lx - Ext) should not be an empty string\n", code);
1679     }
1680
1681     return enumCharNamesFn(&ecncp->length, code, nameChoice, name, length);
1682 }
1683
1684 /**
1685  * This can be made more efficient by moving it into putil.c and having
1686  * it directly access the ebcdic translation tables.
1687  * TODO: If we get this method in putil.c, then delete it from here.
1688  */
1689 static UChar
1690 u_charToUChar(char c) {
1691     UChar uc;
1692     u_charsToUChars(&c, &uc, 1);
1693     return uc;
1694 }
1695
1696 static void
1697 TestCharNames() {
1698     static char name[80];
1699     UErrorCode errorCode=U_ZERO_ERROR;
1700     struct enumExtCharNamesContext extContext;
1701     const char *expected;
1702     int32_t length;
1703     UChar32 c;
1704     int32_t i;
1705
1706     log_verbose("Testing uprv_getMaxCharNameLength()\n");
1707     length=uprv_getMaxCharNameLength();
1708     if(length==0) {
1709         /* no names data available */
1710         return;
1711     }
1712     if(length<83) { /* Unicode 3.2 max char name length */
1713         log_err("uprv_getMaxCharNameLength()=%d is too short");
1714     }
1715     /* ### TODO same tests for max ISO comment length as for max name length */
1716
1717     log_verbose("Testing u_charName()\n");
1718     for(i=0; i<UPRV_LENGTHOF(names); ++i) {
1719         /* modern Unicode character name */
1720         length=u_charName(names[i].code, U_UNICODE_CHAR_NAME, name, sizeof(name), &errorCode);
1721         if(U_FAILURE(errorCode)) {
1722             log_err("u_charName(0x%lx) error %s\n", names[i].code, u_errorName(errorCode));
1723             return;
1724         }
1725         if(length<0 || 0!=strcmp(name, names[i].name) || length!=(uint16_t)strlen(name)) {
1726             log_err("u_charName(0x%lx) gets: %s (length %ld) instead of: %s\n", names[i].code, name, length, names[i].name);
1727         }
1728
1729         /* find the modern name */
1730         if (*names[i].name) {
1731             c=u_charFromName(U_UNICODE_CHAR_NAME, names[i].name, &errorCode);
1732             if(U_FAILURE(errorCode)) {
1733                 log_err("u_charFromName(%s) error %s\n", names[i].name, u_errorName(errorCode));
1734                 return;
1735             }
1736             if(c!=(UChar32)names[i].code) {
1737                 log_err("u_charFromName(%s) gets 0x%lx instead of 0x%lx\n", names[i].name, c, names[i].code);
1738             }
1739         }
1740
1741         /* Unicode 1.0 character name */
1742         length=u_charName(names[i].code, U_UNICODE_10_CHAR_NAME, name, sizeof(name), &errorCode);
1743         if(U_FAILURE(errorCode)) {
1744             log_err("u_charName(0x%lx - 1.0) error %s\n", names[i].code, u_errorName(errorCode));
1745             return;
1746         }
1747         if(length<0 || (length>0 && 0!=strcmp(name, names[i].oldName)) || length!=(uint16_t)strlen(name)) {
1748             log_err("u_charName(0x%lx - 1.0) gets %s length %ld instead of nothing or %s\n", names[i].code, name, length, names[i].oldName);
1749         }
1750
1751         /* find the Unicode 1.0 name if it is stored (length>0 means that we could read it) */
1752         if(names[i].oldName[0]!=0 /* && length>0 */) {
1753             c=u_charFromName(U_UNICODE_10_CHAR_NAME, names[i].oldName, &errorCode);
1754             if(U_FAILURE(errorCode)) {
1755                 log_err("u_charFromName(%s - 1.0) error %s\n", names[i].oldName, u_errorName(errorCode));
1756                 return;
1757             }
1758             if(c!=(UChar32)names[i].code) {
1759                 log_err("u_charFromName(%s - 1.0) gets 0x%lx instead of 0x%lx\n", names[i].oldName, c, names[i].code);
1760             }
1761         }
1762
1763         /* Unicode character name alias */
1764         length=u_charName(names[i].code, U_CHAR_NAME_ALIAS, name, sizeof(name), &errorCode);
1765         if(U_FAILURE(errorCode)) {
1766             log_err("u_charName(0x%lx - alias) error %s\n", names[i].code, u_errorName(errorCode));
1767             return;
1768         }
1769         expected=names[i].alias;
1770         if(expected==NULL) {
1771             expected="";
1772         }
1773         if(length<0 || (length>0 && 0!=strcmp(name, expected)) || length!=(uint16_t)strlen(name)) {
1774             log_err("u_charName(0x%lx - alias) gets %s length %ld instead of nothing or %s\n",
1775                     names[i].code, name, length, expected);
1776         }
1777
1778         /* find the Unicode character name alias if it is stored (length>0 means that we could read it) */
1779         if(expected[0]!=0 /* && length>0 */) {
1780             c=u_charFromName(U_CHAR_NAME_ALIAS, expected, &errorCode);
1781             if(U_FAILURE(errorCode)) {
1782                 log_err("u_charFromName(%s - alias) error %s\n",
1783                         expected, u_errorName(errorCode));
1784                 return;
1785             }
1786             if(c!=(UChar32)names[i].code) {
1787                 log_err("u_charFromName(%s - alias) gets 0x%lx instead of 0x%lx\n",
1788                         expected, c, names[i].code);
1789             }
1790         }
1791     }
1792
1793     /* test u_enumCharNames() */
1794     length=0;
1795     errorCode=U_ZERO_ERROR;
1796     u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumCharNamesFn, &length, U_UNICODE_CHAR_NAME, &errorCode);
1797     if(U_FAILURE(errorCode) || length<94140) {
1798         log_err("u_enumCharNames(%ld..%lx) error %s names count=%ld\n", UCHAR_MIN_VALUE, UCHAR_MAX_VALUE, u_errorName(errorCode), length);
1799     }
1800
1801     extContext.length = 0;
1802     extContext.last = -1;
1803     errorCode=U_ZERO_ERROR;
1804     u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumExtCharNamesFn, &extContext, U_EXTENDED_CHAR_NAME, &errorCode);
1805     if(U_FAILURE(errorCode) || extContext.length<UCHAR_MAX_VALUE + 1) {
1806         log_err("u_enumCharNames(%ld..0x%lx - Extended) error %s names count=%ld\n", UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, u_errorName(errorCode), extContext.length);
1807     }
1808
1809     /* test that u_charFromName() uppercases the input name, i.e., works with mixed-case names (new in 2.0) */
1810     if(0x61!=u_charFromName(U_UNICODE_CHAR_NAME, "LATin smALl letTER A", &errorCode)) {
1811         log_err("u_charFromName(U_UNICODE_CHAR_NAME, \"LATin smALl letTER A\") did not find U+0061 (%s)\n", u_errorName(errorCode));
1812     }
1813
1814     /* Test getCharNameCharacters */
1815     if(!getTestOption(QUICK_OPTION)) {
1816         enum { BUFSIZE = 256 };
1817         UErrorCode ec = U_ZERO_ERROR;
1818         char buf[BUFSIZE];
1819         int32_t maxLength;
1820         UChar32 cp;
1821         UChar pat[BUFSIZE], dumbPat[BUFSIZE];
1822         int32_t l1, l2;
1823         UBool map[256];
1824         UBool ok;
1825
1826         USet* set = uset_open(1, 0); /* empty set */
1827         USet* dumb = uset_open(1, 0); /* empty set */
1828
1829         /*
1830          * uprv_getCharNameCharacters() will likely return more lowercase
1831          * letters than actual character names contain because
1832          * it includes all the characters in lowercased names of
1833          * general categories, for the full possible set of extended names.
1834          */
1835         {
1836             USetAdder sa={
1837                 NULL,
1838                 uset_add,
1839                 uset_addRange,
1840                 uset_addString,
1841                 NULL /* don't need remove() */
1842             };
1843             sa.set=set;
1844             uprv_getCharNameCharacters(&sa);
1845         }
1846
1847         /* build set the dumb (but sure-fire) way */
1848         for (i=0; i<256; ++i) {
1849             map[i] = FALSE;
1850         }
1851
1852         maxLength=0;
1853         for (cp=0; cp<0x110000; ++cp) {
1854             int32_t len = u_charName(cp, U_EXTENDED_CHAR_NAME,
1855                                      buf, BUFSIZE, &ec);
1856             if (U_FAILURE(ec)) {
1857                 log_err("FAIL: u_charName failed when it shouldn't\n");
1858                 uset_close(set);
1859                 uset_close(dumb);
1860                 return;
1861             }
1862             if(len>maxLength) {
1863                 maxLength=len;
1864             }
1865
1866             for (i=0; i<len; ++i) {
1867                 if (!map[(uint8_t) buf[i]]) {
1868                     uset_add(dumb, (UChar32)u_charToUChar(buf[i]));
1869                     map[(uint8_t) buf[i]] = TRUE;
1870                 }
1871             }
1872
1873             /* test for leading/trailing whitespace */
1874             if(buf[0]==' ' || buf[0]=='\t' || buf[len-1]==' ' || buf[len-1]=='\t') {
1875                 log_err("u_charName(U+%04x) returns a name with leading or trailing whitespace\n", cp);
1876             }
1877         }
1878
1879         if(map[(uint8_t)'\t']) {
1880             log_err("u_charName() returned a name with a TAB for some code point\n", cp);
1881         }
1882
1883         length=uprv_getMaxCharNameLength();
1884         if(length!=maxLength) {
1885             log_err("uprv_getMaxCharNameLength()=%d differs from the maximum length %d of all extended names\n",
1886                     length, maxLength);
1887         }
1888
1889         /* compare the sets.  Where is my uset_equals?!! */
1890         ok=TRUE;
1891         for(i=0; i<256; ++i) {
1892             if(uset_contains(set, i)!=uset_contains(dumb, i)) {
1893                 if(0x61<=i && i<=0x7a /* a-z */ && uset_contains(set, i) && !uset_contains(dumb, i)) {
1894                     /* ignore lowercase a-z that are in set but not in dumb */
1895                     ok=TRUE;
1896                 } else {
1897                     ok=FALSE;
1898                     break;
1899                 }
1900             }
1901         }
1902
1903         l1 = uset_toPattern(set, pat, BUFSIZE, TRUE, &ec);
1904         l2 = uset_toPattern(dumb, dumbPat, BUFSIZE, TRUE, &ec);
1905         if (U_FAILURE(ec)) {
1906             log_err("FAIL: uset_toPattern failed when it shouldn't\n");
1907             uset_close(set);
1908             uset_close(dumb);
1909             return;
1910         }
1911
1912         if (l1 >= BUFSIZE) {
1913             l1 = BUFSIZE-1;
1914             pat[l1] = 0;
1915         }
1916         if (l2 >= BUFSIZE) {
1917             l2 = BUFSIZE-1;
1918             dumbPat[l2] = 0;
1919         }
1920
1921         if (!ok) {
1922             log_err("FAIL: uprv_getCharNameCharacters() returned %s, expected %s (too many lowercase a-z are ok)\n",
1923                     aescstrdup(pat, l1), aescstrdup(dumbPat, l2));
1924         } else if(getTestOption(VERBOSITY_OPTION)) {
1925             log_verbose("Ok: uprv_getCharNameCharacters() returned %s\n", aescstrdup(pat, l1));
1926         }
1927
1928         uset_close(set);
1929         uset_close(dumb);
1930     }
1931
1932     /* ### TODO: test error cases and other interesting things */
1933 }
1934
1935 static void
1936 TestUCharFromNameUnderflow() {
1937     // Ticket #10889: Underflow crash when there is no dash.
1938     UErrorCode errorCode=U_ZERO_ERROR;
1939     UChar32 c=u_charFromName(U_EXTENDED_CHAR_NAME, "<NO BREAK SPACE>", &errorCode);
1940     if(U_SUCCESS(errorCode)) {
1941         log_err("u_charFromName(<NO BREAK SPACE>) = U+%04x but should fail - %s\n", c, u_errorName(errorCode));
1942     }
1943
1944     // Test related edge cases.
1945     errorCode=U_ZERO_ERROR;
1946     c=u_charFromName(U_EXTENDED_CHAR_NAME, "<-00a0>", &errorCode);
1947     if(U_SUCCESS(errorCode)) {
1948         log_err("u_charFromName(<-00a0>) = U+%04x but should fail - %s\n", c, u_errorName(errorCode));
1949     }
1950
1951     errorCode=U_ZERO_ERROR;
1952     c=u_charFromName(U_EXTENDED_CHAR_NAME, "<control->", &errorCode);
1953     if(U_SUCCESS(errorCode)) {
1954         log_err("u_charFromName(<control->) = U+%04x but should fail - %s\n", c, u_errorName(errorCode));
1955     }
1956
1957     errorCode=U_ZERO_ERROR;
1958     c=u_charFromName(U_EXTENDED_CHAR_NAME, "<control-111111>", &errorCode);
1959     if(U_SUCCESS(errorCode)) {
1960         log_err("u_charFromName(<control-111111>) = U+%04x but should fail - %s\n", c, u_errorName(errorCode));
1961     }
1962 }
1963
1964 /* test u_isMirrored() and u_charMirror() ----------------------------------- */
1965
1966 static void
1967 TestMirroring() {
1968     USet *set;
1969     UErrorCode errorCode;
1970
1971     UChar32 start, end, c2, c3;
1972     int32_t i;
1973
1974     U_STRING_DECL(mirroredPattern, "[:Bidi_Mirrored:]", 17);
1975
1976     U_STRING_INIT(mirroredPattern, "[:Bidi_Mirrored:]", 17);
1977
1978     log_verbose("Testing u_isMirrored()\n");
1979     if(!(u_isMirrored(0x28) && u_isMirrored(0xbb) && u_isMirrored(0x2045) && u_isMirrored(0x232a) &&
1980          !u_isMirrored(0x27) && !u_isMirrored(0x61) && !u_isMirrored(0x284) && !u_isMirrored(0x3400)
1981         )
1982     ) {
1983         log_err("u_isMirrored() does not work correctly\n");
1984     }
1985
1986     log_verbose("Testing u_charMirror()\n");
1987     if(!(u_charMirror(0x3c)==0x3e && u_charMirror(0x5d)==0x5b && u_charMirror(0x208d)==0x208e && u_charMirror(0x3017)==0x3016 &&
1988          u_charMirror(0xbb)==0xab && u_charMirror(0x2215)==0x29F5 && u_charMirror(0x29F5)==0x2215 && /* large delta between the code points */
1989          u_charMirror(0x2e)==0x2e && u_charMirror(0x6f3)==0x6f3 && u_charMirror(0x301c)==0x301c && u_charMirror(0xa4ab)==0xa4ab &&
1990          /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */
1991          u_charMirror(0x2018)==0x2018 && u_charMirror(0x201b)==0x201b && u_charMirror(0x301d)==0x301d
1992          )
1993     ) {
1994         log_err("u_charMirror() does not work correctly\n");
1995     }
1996
1997     /* verify that Bidi_Mirroring_Glyph roundtrips */
1998     errorCode=U_ZERO_ERROR;
1999     set=uset_openPattern(mirroredPattern, 17, &errorCode);
2000
2001     if (U_FAILURE(errorCode)) {
2002         log_data_err("uset_openPattern(mirroredPattern, 17, &errorCode) failed!\n");
2003     } else {
2004         for(i=0; 0==uset_getItem(set, i, &start, &end, NULL, 0, &errorCode); ++i) {
2005             do {
2006                 c2=u_charMirror(start);
2007                 c3=u_charMirror(c2);
2008                 if(c3!=start) {
2009                     log_err("u_charMirror() does not roundtrip: U+%04lx->U+%04lx->U+%04lx\n", (long)start, (long)c2, (long)c3);
2010                 }
2011                 c3=u_getBidiPairedBracket(start);
2012                 if(u_getIntPropertyValue(start, UCHAR_BIDI_PAIRED_BRACKET_TYPE)==U_BPT_NONE) {
2013                     if(c3!=start) {
2014                         log_err("u_getBidiPairedBracket(U+%04lx) != self for bpt(c)==None\n",
2015                                 (long)start);
2016                     }
2017                 } else {
2018                     if(c3!=c2) {
2019                         log_err("u_getBidiPairedBracket(U+%04lx) != U+%04lx = bmg(c)'\n",
2020                                 (long)start, (long)c2);
2021                     }
2022                 }
2023             } while(++start<=end);
2024         }
2025     }
2026
2027     uset_close(set);
2028 }
2029
2030
2031 struct RunTestData
2032 {
2033     const char *runText;
2034     UScriptCode runCode;
2035 };
2036
2037 typedef struct RunTestData RunTestData;
2038
2039 static void
2040 CheckScriptRuns(UScriptRun *scriptRun, int32_t *runStarts, const RunTestData *testData, int32_t nRuns,
2041                 const char *prefix)
2042 {
2043     int32_t run, runStart, runLimit;
2044     UScriptCode runCode;
2045
2046     /* iterate over all the runs */
2047     run = 0;
2048     while (uscript_nextRun(scriptRun, &runStart, &runLimit, &runCode)) {
2049         if (runStart != runStarts[run]) {
2050             log_err("%s: incorrect start offset for run %d: expected %d, got %d\n",
2051                 prefix, run, runStarts[run], runStart);
2052         }
2053
2054         if (runLimit != runStarts[run + 1]) {
2055             log_err("%s: incorrect limit offset for run %d: expected %d, got %d\n",
2056                 prefix, run, runStarts[run + 1], runLimit);
2057         }
2058
2059         if (runCode != testData[run].runCode) {
2060             log_err("%s: incorrect script for run %d: expected \"%s\", got \"%s\"\n",
2061                 prefix, run, uscript_getName(testData[run].runCode), uscript_getName(runCode));
2062         }
2063
2064         run += 1;
2065
2066         /* stop when we've seen all the runs we expect to see */
2067         if (run >= nRuns) {
2068             break;
2069         }
2070     }
2071
2072     /* Complain if we didn't see then number of runs we expected */
2073     if (run != nRuns) {
2074         log_err("%s: incorrect number of runs: expected %d, got %d\n", prefix, run, nRuns);
2075     }
2076 }
2077
2078 static void
2079 TestUScriptRunAPI()
2080 {
2081     static const RunTestData testData1[] = {
2082         {"\\u0020\\u0946\\u0939\\u093F\\u0928\\u094D\\u0926\\u0940\\u0020", USCRIPT_DEVANAGARI},
2083         {"\\u0627\\u0644\\u0639\\u0631\\u0628\\u064A\\u0629\\u0020", USCRIPT_ARABIC},
2084         {"\\u0420\\u0443\\u0441\\u0441\\u043A\\u0438\\u0439\\u0020", USCRIPT_CYRILLIC},
2085         {"English (", USCRIPT_LATIN},
2086         {"\\u0E44\\u0E17\\u0E22", USCRIPT_THAI},
2087         {") ", USCRIPT_LATIN},
2088         {"\\u6F22\\u5B75", USCRIPT_HAN},
2089         {"\\u3068\\u3072\\u3089\\u304C\\u306A\\u3068", USCRIPT_HIRAGANA},
2090         {"\\u30AB\\u30BF\\u30AB\\u30CA", USCRIPT_KATAKANA},
2091         {"\\U00010400\\U00010401\\U00010402\\U00010403", USCRIPT_DESERET}
2092     };
2093
2094     static const RunTestData testData2[] = {
2095        {"((((((((((abc))))))))))", USCRIPT_LATIN}
2096     };
2097
2098     static const struct {
2099       const RunTestData *testData;
2100       int32_t nRuns;
2101     } testDataEntries[] = {
2102         {testData1, UPRV_LENGTHOF(testData1)},
2103         {testData2, UPRV_LENGTHOF(testData2)}
2104     };
2105
2106     static const int32_t nTestEntries = UPRV_LENGTHOF(testDataEntries);
2107     int32_t testEntry;
2108
2109     for (testEntry = 0; testEntry < nTestEntries; testEntry += 1) {
2110         UChar testString[1024];
2111         int32_t runStarts[256];
2112         int32_t nTestRuns = testDataEntries[testEntry].nRuns;
2113         const RunTestData *testData = testDataEntries[testEntry].testData;
2114
2115         int32_t run, stringLimit;
2116         UScriptRun *scriptRun = NULL;
2117         UErrorCode err;
2118
2119         /*
2120          * Fill in the test string and the runStarts array.
2121          */
2122         stringLimit = 0;
2123         for (run = 0; run < nTestRuns; run += 1) {
2124             runStarts[run] = stringLimit;
2125             stringLimit += u_unescape(testData[run].runText, &testString[stringLimit], 1024 - stringLimit);
2126             /*stringLimit -= 1;*/
2127         }
2128
2129         /* The limit of the last run */
2130         runStarts[nTestRuns] = stringLimit;
2131
2132         /*
2133          * Make sure that calling uscript_OpenRun with a NULL text pointer
2134          * and a non-zero text length returns the correct error.
2135          */
2136         err = U_ZERO_ERROR;
2137         scriptRun = uscript_openRun(NULL, stringLimit, &err);
2138
2139         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2140             log_err("uscript_openRun(NULL, stringLimit, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2141         }
2142
2143         if (scriptRun != NULL) {
2144             log_err("uscript_openRun(NULL, stringLimit, &err) returned a non-NULL result.\n");
2145             uscript_closeRun(scriptRun);
2146         }
2147
2148         /*
2149          * Make sure that calling uscript_OpenRun with a non-NULL text pointer
2150          * and a zero text length returns the correct error.
2151          */
2152         err = U_ZERO_ERROR;
2153         scriptRun = uscript_openRun(testString, 0, &err);
2154
2155         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2156             log_err("uscript_openRun(testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2157         }
2158
2159         if (scriptRun != NULL) {
2160             log_err("uscript_openRun(testString, 0, &err) returned a non-NULL result.\n");
2161             uscript_closeRun(scriptRun);
2162         }
2163
2164         /*
2165          * Make sure that calling uscript_openRun with a NULL text pointer
2166          * and a zero text length doesn't return an error.
2167          */
2168         err = U_ZERO_ERROR;
2169         scriptRun = uscript_openRun(NULL, 0, &err);
2170
2171         if (U_FAILURE(err)) {
2172             log_err("Got error %s from uscript_openRun(NULL, 0, &err)\n", u_errorName(err));
2173         }
2174
2175         /* Make sure that the empty iterator doesn't find any runs */
2176         if (uscript_nextRun(scriptRun, NULL, NULL, NULL)) {
2177             log_err("uscript_nextRun(...) returned TRUE for an empty iterator.\n");
2178         }
2179
2180         /*
2181          * Make sure that calling uscript_setRunText with a NULL text pointer
2182          * and a non-zero text length returns the correct error.
2183          */
2184         err = U_ZERO_ERROR;
2185         uscript_setRunText(scriptRun, NULL, stringLimit, &err);
2186
2187         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2188             log_err("uscript_setRunText(scriptRun, NULL, stringLimit, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2189         }
2190
2191         /*
2192          * Make sure that calling uscript_OpenRun with a non-NULL text pointer
2193          * and a zero text length returns the correct error.
2194          */
2195         err = U_ZERO_ERROR;
2196         uscript_setRunText(scriptRun, testString, 0, &err);
2197
2198         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2199             log_err("uscript_setRunText(scriptRun, testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2200         }
2201
2202         /*
2203          * Now call uscript_setRunText on the empty iterator
2204          * and make sure that it works.
2205          */
2206         err = U_ZERO_ERROR;
2207         uscript_setRunText(scriptRun, testString, stringLimit, &err);
2208
2209         if (U_FAILURE(err)) {
2210             log_err("Got error %s from uscript_setRunText(...)\n", u_errorName(err));
2211         } else {
2212             CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_setRunText");
2213         }
2214
2215         uscript_closeRun(scriptRun);
2216
2217         /*
2218          * Now open an interator over the testString
2219          * using uscript_openRun and make sure that it works
2220          */
2221         scriptRun = uscript_openRun(testString, stringLimit, &err);
2222
2223         if (U_FAILURE(err)) {
2224             log_err("Got error %s from uscript_openRun(...)\n", u_errorName(err));
2225         } else {
2226             CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_openRun");
2227         }
2228
2229         /* Now reset the iterator, and make sure
2230          * that it still works.
2231          */
2232         uscript_resetRun(scriptRun);
2233
2234         CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_resetRun");
2235
2236         /* Close the iterator */
2237         uscript_closeRun(scriptRun);
2238     }
2239 }
2240
2241 /* test additional, non-core properties */
2242 static void
2243 TestAdditionalProperties() {
2244     /* test data for u_charAge() */
2245     static const struct {
2246         UChar32 c;
2247         UVersionInfo version;
2248     } charAges[]={
2249         {0x41,    { 1, 1, 0, 0 }},
2250         {0xffff,  { 1, 1, 0, 0 }},
2251         {0x20ab,  { 2, 0, 0, 0 }},
2252         {0x2fffe, { 2, 0, 0, 0 }},
2253         {0x20ac,  { 2, 1, 0, 0 }},
2254         {0xfb1d,  { 3, 0, 0, 0 }},
2255         {0x3f4,   { 3, 1, 0, 0 }},
2256         {0x10300, { 3, 1, 0, 0 }},
2257         {0x220,   { 3, 2, 0, 0 }},
2258         {0xff60,  { 3, 2, 0, 0 }}
2259     };
2260
2261     /* test data for u_hasBinaryProperty() */
2262     static const int32_t
2263     props[][3]={ /* code point, property, value */
2264         { 0x0627, UCHAR_ALPHABETIC, TRUE },
2265         { 0x1034a, UCHAR_ALPHABETIC, TRUE },
2266         { 0x2028, UCHAR_ALPHABETIC, FALSE },
2267
2268         { 0x0066, UCHAR_ASCII_HEX_DIGIT, TRUE },
2269         { 0x0067, UCHAR_ASCII_HEX_DIGIT, FALSE },
2270
2271         { 0x202c, UCHAR_BIDI_CONTROL, TRUE },
2272         { 0x202f, UCHAR_BIDI_CONTROL, FALSE },
2273
2274         { 0x003c, UCHAR_BIDI_MIRRORED, TRUE },
2275         { 0x003d, UCHAR_BIDI_MIRRORED, FALSE },
2276
2277         /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */
2278         { 0x2018, UCHAR_BIDI_MIRRORED, FALSE },
2279         { 0x201d, UCHAR_BIDI_MIRRORED, FALSE },
2280         { 0x201f, UCHAR_BIDI_MIRRORED, FALSE },
2281         { 0x301e, UCHAR_BIDI_MIRRORED, FALSE },
2282
2283         { 0x058a, UCHAR_DASH, TRUE },
2284         { 0x007e, UCHAR_DASH, FALSE },
2285
2286         { 0x0c4d, UCHAR_DIACRITIC, TRUE },
2287         { 0x3000, UCHAR_DIACRITIC, FALSE },
2288
2289         { 0x0e46, UCHAR_EXTENDER, TRUE },
2290         { 0x0020, UCHAR_EXTENDER, FALSE },
2291
2292 #if !UCONFIG_NO_NORMALIZATION
2293         { 0xfb1d, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE },
2294         { 0x1d15f, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE },
2295         { 0xfb1e, UCHAR_FULL_COMPOSITION_EXCLUSION, FALSE },
2296
2297         { 0x110a, UCHAR_NFD_INERT, TRUE },      /* Jamo L */
2298         { 0x0308, UCHAR_NFD_INERT, FALSE },
2299
2300         { 0x1164, UCHAR_NFKD_INERT, TRUE },     /* Jamo V */
2301         { 0x1d79d, UCHAR_NFKD_INERT, FALSE },   /* math compat version of xi */
2302
2303         { 0x0021, UCHAR_NFC_INERT, TRUE },      /* ! */
2304         { 0x0061, UCHAR_NFC_INERT, FALSE },     /* a */
2305         { 0x00e4, UCHAR_NFC_INERT, FALSE },     /* a-umlaut */
2306         { 0x0102, UCHAR_NFC_INERT, FALSE },     /* a-breve */
2307         { 0xac1c, UCHAR_NFC_INERT, FALSE },     /* Hangul LV */
2308         { 0xac1d, UCHAR_NFC_INERT, TRUE },      /* Hangul LVT */
2309
2310         { 0x1d79d, UCHAR_NFKC_INERT, FALSE },   /* math compat version of xi */
2311         { 0x2a6d6, UCHAR_NFKC_INERT, TRUE },    /* Han, last of CJK ext. B */
2312
2313         { 0x00e4, UCHAR_SEGMENT_STARTER, TRUE },
2314         { 0x0308, UCHAR_SEGMENT_STARTER, FALSE },
2315         { 0x110a, UCHAR_SEGMENT_STARTER, TRUE }, /* Jamo L */
2316         { 0x1164, UCHAR_SEGMENT_STARTER, FALSE },/* Jamo V */
2317         { 0xac1c, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LV */
2318         { 0xac1d, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LVT */
2319 #endif
2320
2321         { 0x0044, UCHAR_HEX_DIGIT, TRUE },
2322         { 0xff46, UCHAR_HEX_DIGIT, TRUE },
2323         { 0x0047, UCHAR_HEX_DIGIT, FALSE },
2324
2325         { 0x30fb, UCHAR_HYPHEN, TRUE },
2326         { 0xfe58, UCHAR_HYPHEN, FALSE },
2327
2328         { 0x2172, UCHAR_ID_CONTINUE, TRUE },
2329         { 0x0307, UCHAR_ID_CONTINUE, TRUE },
2330         { 0x005c, UCHAR_ID_CONTINUE, FALSE },
2331
2332         { 0x2172, UCHAR_ID_START, TRUE },
2333         { 0x007a, UCHAR_ID_START, TRUE },
2334         { 0x0039, UCHAR_ID_START, FALSE },
2335
2336         { 0x4db5, UCHAR_IDEOGRAPHIC, TRUE },
2337         { 0x2f999, UCHAR_IDEOGRAPHIC, TRUE },
2338         { 0x2f99, UCHAR_IDEOGRAPHIC, FALSE },
2339
2340         { 0x200c, UCHAR_JOIN_CONTROL, TRUE },
2341         { 0x2029, UCHAR_JOIN_CONTROL, FALSE },
2342
2343         { 0x1d7bc, UCHAR_LOWERCASE, TRUE },
2344         { 0x0345, UCHAR_LOWERCASE, TRUE },
2345         { 0x0030, UCHAR_LOWERCASE, FALSE },
2346
2347         { 0x1d7a9, UCHAR_MATH, TRUE },
2348         { 0x2135, UCHAR_MATH, TRUE },
2349         { 0x0062, UCHAR_MATH, FALSE },
2350
2351         { 0xfde1, UCHAR_NONCHARACTER_CODE_POINT, TRUE },
2352         { 0x10ffff, UCHAR_NONCHARACTER_CODE_POINT, TRUE },
2353         { 0x10fffd, UCHAR_NONCHARACTER_CODE_POINT, FALSE },
2354
2355         { 0x0022, UCHAR_QUOTATION_MARK, TRUE },
2356         { 0xff62, UCHAR_QUOTATION_MARK, TRUE },
2357         { 0xd840, UCHAR_QUOTATION_MARK, FALSE },
2358
2359         { 0x061f, UCHAR_TERMINAL_PUNCTUATION, TRUE },
2360         { 0xe003f, UCHAR_TERMINAL_PUNCTUATION, FALSE },
2361
2362         { 0x1d44a, UCHAR_UPPERCASE, TRUE },
2363         { 0x2162, UCHAR_UPPERCASE, TRUE },
2364         { 0x0345, UCHAR_UPPERCASE, FALSE },
2365
2366         { 0x0020, UCHAR_WHITE_SPACE, TRUE },
2367         { 0x202f, UCHAR_WHITE_SPACE, TRUE },
2368         { 0x3001, UCHAR_WHITE_SPACE, FALSE },
2369
2370         { 0x0711, UCHAR_XID_CONTINUE, TRUE },
2371         { 0x1d1aa, UCHAR_XID_CONTINUE, TRUE },
2372         { 0x007c, UCHAR_XID_CONTINUE, FALSE },
2373
2374         { 0x16ee, UCHAR_XID_START, TRUE },
2375         { 0x23456, UCHAR_XID_START, TRUE },
2376         { 0x1d1aa, UCHAR_XID_START, FALSE },
2377
2378         /*
2379          * Version break:
2380          * The following properties are only supported starting with the
2381          * Unicode version indicated in the second field.
2382          */
2383         { -1, 0x320, 0 },
2384
2385         { 0x180c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE },
2386         { 0xfe02, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE },
2387         { 0x1801, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, FALSE },
2388
2389         { 0x0149, UCHAR_DEPRECATED, TRUE },         /* changed in Unicode 5.2 */
2390         { 0x0341, UCHAR_DEPRECATED, FALSE },        /* changed in Unicode 5.2 */
2391         { 0xe0001, UCHAR_DEPRECATED, TRUE },        /* changed from Unicode 5 to 5.1 */
2392         { 0xe0100, UCHAR_DEPRECATED, FALSE },
2393
2394         { 0x00a0, UCHAR_GRAPHEME_BASE, TRUE },
2395         { 0x0a4d, UCHAR_GRAPHEME_BASE, FALSE },
2396         { 0xff9d, UCHAR_GRAPHEME_BASE, TRUE },
2397         { 0xff9f, UCHAR_GRAPHEME_BASE, FALSE },     /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */
2398
2399         { 0x0300, UCHAR_GRAPHEME_EXTEND, TRUE },
2400         { 0xff9d, UCHAR_GRAPHEME_EXTEND, FALSE },
2401         { 0xff9f, UCHAR_GRAPHEME_EXTEND, TRUE },    /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */
2402         { 0x0603, UCHAR_GRAPHEME_EXTEND, FALSE },
2403
2404         { 0x0a4d, UCHAR_GRAPHEME_LINK, TRUE },
2405         { 0xff9f, UCHAR_GRAPHEME_LINK, FALSE },
2406
2407         { 0x2ff7, UCHAR_IDS_BINARY_OPERATOR, TRUE },
2408         { 0x2ff3, UCHAR_IDS_BINARY_OPERATOR, FALSE },
2409
2410         { 0x2ff3, UCHAR_IDS_TRINARY_OPERATOR, TRUE },
2411         { 0x2f03, UCHAR_IDS_TRINARY_OPERATOR, FALSE },
2412
2413         { 0x0ec1, UCHAR_LOGICAL_ORDER_EXCEPTION, TRUE },
2414         { 0xdcba, UCHAR_LOGICAL_ORDER_EXCEPTION, FALSE },
2415
2416         { 0x2e9b, UCHAR_RADICAL, TRUE },
2417         { 0x4e00, UCHAR_RADICAL, FALSE },
2418
2419         { 0x012f, UCHAR_SOFT_DOTTED, TRUE },
2420         { 0x0049, UCHAR_SOFT_DOTTED, FALSE },
2421
2422         { 0xfa11, UCHAR_UNIFIED_IDEOGRAPH, TRUE },
2423         { 0xfa12, UCHAR_UNIFIED_IDEOGRAPH, FALSE },
2424
2425         { -1, 0x401, 0 }, /* version break for Unicode 4.0.1 */
2426
2427         { 0x002e, UCHAR_S_TERM, TRUE },
2428         { 0x0061, UCHAR_S_TERM, FALSE },
2429
2430         { 0x180c, UCHAR_VARIATION_SELECTOR, TRUE },
2431         { 0xfe03, UCHAR_VARIATION_SELECTOR, TRUE },
2432         { 0xe01ef, UCHAR_VARIATION_SELECTOR, TRUE },
2433         { 0xe0200, UCHAR_VARIATION_SELECTOR, FALSE },
2434
2435         /* enum/integer type properties */
2436
2437         /* UCHAR_BIDI_CLASS tested for assigned characters in TestUnicodeData() */
2438         /* test default Bidi classes for unassigned code points */
2439         { 0x0590, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2440         { 0x05cf, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2441         { 0x05ed, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2442         { 0x07f2, UCHAR_BIDI_CLASS, U_DIR_NON_SPACING_MARK }, /* Nko, new in Unicode 5.0 */
2443         { 0x07fe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, /* unassigned R */
2444         { 0x089f, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2445         { 0xfb37, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2446         { 0xfb42, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2447         { 0x10806, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2448         { 0x10909, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2449         { 0x10fe4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2450
2451         { 0x061d, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2452         { 0x063f, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2453         { 0x070e, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2454         { 0x0775, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2455         { 0xfbc2, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2456         { 0xfd90, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2457         { 0xfefe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2458
2459         { 0x02AF, UCHAR_BLOCK, UBLOCK_IPA_EXTENSIONS },
2460         { 0x0C4E, UCHAR_BLOCK, UBLOCK_TELUGU },
2461         { 0x155A, UCHAR_BLOCK, UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS },
2462         { 0x1717, UCHAR_BLOCK, UBLOCK_TAGALOG },
2463         { 0x1900, UCHAR_BLOCK, UBLOCK_LIMBU },
2464         { 0x1CBF, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
2465         { 0x3040, UCHAR_BLOCK, UBLOCK_HIRAGANA },
2466         { 0x1D0FF, UCHAR_BLOCK, UBLOCK_BYZANTINE_MUSICAL_SYMBOLS },
2467         { 0x50000, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
2468         { 0xEFFFF, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
2469         { 0x10D0FF, UCHAR_BLOCK, UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B },
2470
2471         /* UCHAR_CANONICAL_COMBINING_CLASS tested for assigned characters in TestUnicodeData() */
2472         { 0xd7d7, UCHAR_CANONICAL_COMBINING_CLASS, 0 },
2473
2474         { 0x00A0, UCHAR_DECOMPOSITION_TYPE, U_DT_NOBREAK },
2475         { 0x00A8, UCHAR_DECOMPOSITION_TYPE, U_DT_COMPAT },
2476         { 0x00bf, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE },
2477         { 0x00c0, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
2478         { 0x1E9B, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
2479         { 0xBCDE, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
2480         { 0xFB5D, UCHAR_DECOMPOSITION_TYPE, U_DT_MEDIAL },
2481         { 0x1D736, UCHAR_DECOMPOSITION_TYPE, U_DT_FONT },
2482         { 0xe0033, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE },
2483
2484         { 0x0009, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
2485         { 0x0020, UCHAR_EAST_ASIAN_WIDTH, U_EA_NARROW },
2486         { 0x00B1, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2487         { 0x20A9, UCHAR_EAST_ASIAN_WIDTH, U_EA_HALFWIDTH },
2488         { 0x2FFB, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2489         { 0x3000, UCHAR_EAST_ASIAN_WIDTH, U_EA_FULLWIDTH },
2490         { 0x35bb, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2491         { 0x58bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2492         { 0xD7A3, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2493         { 0xEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2494         { 0x1D198, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
2495         { 0x20000, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2496         { 0x2F8C7, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2497         { 0x3a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE }, /* plane 3 got default W values in Unicode 4 */
2498         { 0x5a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
2499         { 0xFEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2500         { 0x10EEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2501
2502         /* UCHAR_GENERAL_CATEGORY tested for assigned characters in TestUnicodeData() */
2503         { 0xd7c7, UCHAR_GENERAL_CATEGORY, 0 },
2504         { 0xd7d7, UCHAR_GENERAL_CATEGORY, U_OTHER_LETTER },     /* changed in Unicode 5.2 */
2505
2506         { 0x0444, UCHAR_JOINING_GROUP, U_JG_NO_JOINING_GROUP },
2507         { 0x0639, UCHAR_JOINING_GROUP, U_JG_AIN },
2508         { 0x072A, UCHAR_JOINING_GROUP, U_JG_DALATH_RISH },
2509         { 0x0647, UCHAR_JOINING_GROUP, U_JG_HEH },
2510         { 0x06C1, UCHAR_JOINING_GROUP, U_JG_HEH_GOAL },
2511
2512         { 0x200C, UCHAR_JOINING_TYPE, U_JT_NON_JOINING },
2513         { 0x200D, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING },
2514         { 0x0639, UCHAR_JOINING_TYPE, U_JT_DUAL_JOINING },
2515         { 0x0640, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING },
2516         { 0x06C3, UCHAR_JOINING_TYPE, U_JT_RIGHT_JOINING },
2517         { 0x0300, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
2518         { 0x070F, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
2519         { 0xe0033, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
2520
2521         /* TestUnicodeData() verifies that no assigned character has "XX" (unknown) */
2522         { 0xe7e7, UCHAR_LINE_BREAK, U_LB_UNKNOWN },
2523         { 0x10fffd, UCHAR_LINE_BREAK, U_LB_UNKNOWN },
2524         { 0x0028, UCHAR_LINE_BREAK, U_LB_OPEN_PUNCTUATION },
2525         { 0x232A, UCHAR_LINE_BREAK, U_LB_CLOSE_PUNCTUATION },
2526         { 0x3401, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2527         { 0x4e02, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2528         { 0x20004, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2529         { 0xf905, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2530         { 0xdb7e, UCHAR_LINE_BREAK, U_LB_SURROGATE },
2531         { 0xdbfd, UCHAR_LINE_BREAK, U_LB_SURROGATE },
2532         { 0xdffc, UCHAR_LINE_BREAK, U_LB_SURROGATE },
2533         { 0x2762, UCHAR_LINE_BREAK, U_LB_EXCLAMATION },
2534         { 0x002F, UCHAR_LINE_BREAK, U_LB_BREAK_SYMBOLS },
2535         { 0x1D49C, UCHAR_LINE_BREAK, U_LB_ALPHABETIC },
2536         { 0x1731, UCHAR_LINE_BREAK, U_LB_ALPHABETIC },
2537
2538         /* UCHAR_NUMERIC_TYPE tested in TestNumericProperties() */
2539
2540         /* UCHAR_SCRIPT tested in TestUScriptCodeAPI() */
2541
2542         { 0x10ff, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2543         { 0x1100, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2544         { 0x1111, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2545         { 0x1159, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2546         { 0x115a, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
2547         { 0x115e, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
2548         { 0x115f, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2549
2550         { 0xa95f, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2551         { 0xa960, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
2552         { 0xa97c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
2553         { 0xa97d, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2554
2555         { 0x1160, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2556         { 0x1161, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2557         { 0x1172, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2558         { 0x11a2, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2559         { 0x11a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
2560         { 0x11a7, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
2561
2562         { 0xd7af, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2563         { 0xd7b0, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
2564         { 0xd7c6, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
2565         { 0xd7c7, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2566
2567         { 0x11a8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2568         { 0x11b8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2569         { 0x11c8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2570         { 0x11f9, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2571         { 0x11fa, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
2572         { 0x11ff, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
2573         { 0x1200, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2574
2575         { 0xd7ca, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2576         { 0xd7cb, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
2577         { 0xd7fb, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
2578         { 0xd7fc, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2579
2580         { 0xac00, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2581         { 0xac1c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2582         { 0xc5ec, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2583         { 0xd788, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2584
2585         { 0xac01, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2586         { 0xac1b, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2587         { 0xac1d, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2588         { 0xc5ee, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2589         { 0xd7a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2590
2591         { 0xd7a4, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2592
2593         { -1, 0x410, 0 }, /* version break for Unicode 4.1 */
2594
2595         { 0x00d7, UCHAR_PATTERN_SYNTAX, TRUE },
2596         { 0xfe45, UCHAR_PATTERN_SYNTAX, TRUE },
2597         { 0x0061, UCHAR_PATTERN_SYNTAX, FALSE },
2598
2599         { 0x0020, UCHAR_PATTERN_WHITE_SPACE, TRUE },
2600         { 0x0085, UCHAR_PATTERN_WHITE_SPACE, TRUE },
2601         { 0x200f, UCHAR_PATTERN_WHITE_SPACE, TRUE },
2602         { 0x00a0, UCHAR_PATTERN_WHITE_SPACE, FALSE },
2603         { 0x3000, UCHAR_PATTERN_WHITE_SPACE, FALSE },
2604
2605         { 0x1d200, UCHAR_BLOCK, UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION },
2606         { 0x2c8e,  UCHAR_BLOCK, UBLOCK_COPTIC },
2607         { 0xfe17,  UCHAR_BLOCK, UBLOCK_VERTICAL_FORMS },
2608
2609         { 0x1a00,  UCHAR_SCRIPT, USCRIPT_BUGINESE },
2610         { 0x2cea,  UCHAR_SCRIPT, USCRIPT_COPTIC },
2611         { 0xa82b,  UCHAR_SCRIPT, USCRIPT_SYLOTI_NAGRI },
2612         { 0x103d0, UCHAR_SCRIPT, USCRIPT_OLD_PERSIAN },
2613
2614         { 0xcc28, UCHAR_LINE_BREAK, U_LB_H2 },
2615         { 0xcc29, UCHAR_LINE_BREAK, U_LB_H3 },
2616         { 0xac03, UCHAR_LINE_BREAK, U_LB_H3 },
2617         { 0x115f, UCHAR_LINE_BREAK, U_LB_JL },
2618         { 0x11aa, UCHAR_LINE_BREAK, U_LB_JT },
2619         { 0x11a1, UCHAR_LINE_BREAK, U_LB_JV },
2620
2621         { 0xb2c9, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_LVT },
2622         { 0x036f, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_EXTEND },
2623         { 0x0000, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_CONTROL },
2624         { 0x1160, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_V },
2625
2626         { 0x05f4, UCHAR_WORD_BREAK, U_WB_MIDLETTER },
2627         { 0x4ef0, UCHAR_WORD_BREAK, U_WB_OTHER },
2628         { 0x19d9, UCHAR_WORD_BREAK, U_WB_NUMERIC },
2629         { 0x2044, UCHAR_WORD_BREAK, U_WB_MIDNUM },
2630
2631         { 0xfffd, UCHAR_SENTENCE_BREAK, U_SB_OTHER },
2632         { 0x1ffc, UCHAR_SENTENCE_BREAK, U_SB_UPPER },
2633         { 0xff63, UCHAR_SENTENCE_BREAK, U_SB_CLOSE },
2634         { 0x2028, UCHAR_SENTENCE_BREAK, U_SB_SEP },
2635
2636         { -1, 0x520, 0 }, /* version break for Unicode 5.2 */
2637
2638         /* unassigned code points in new default Bidi R blocks */
2639         { 0x1ede4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2640         { 0x1efe4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2641
2642         /* test some script codes >127 */
2643         { 0xa6e6,  UCHAR_SCRIPT, USCRIPT_BAMUM },
2644         { 0xa4d0,  UCHAR_SCRIPT, USCRIPT_LISU },
2645         { 0x10a7f,  UCHAR_SCRIPT, USCRIPT_OLD_SOUTH_ARABIAN },
2646
2647         { -1, 0x600, 0 }, /* version break for Unicode 6.0 */
2648
2649         /* value changed in Unicode 6.0 */
2650         { 0x06C3, UCHAR_JOINING_GROUP, U_JG_TEH_MARBUTA_GOAL },
2651
2652         { -1, 0x610, 0 }, /* version break for Unicode 6.1 */
2653
2654         /* unassigned code points in new/changed default Bidi AL blocks */
2655         { 0x08ba, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2656         { 0x1eee4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2657
2658         { -1, 0x630, 0 }, /* version break for Unicode 6.3 */
2659
2660         /* unassigned code points in the currency symbols block now default to ET */
2661         { 0x20C0, UCHAR_BIDI_CLASS, U_EUROPEAN_NUMBER_TERMINATOR },
2662         { 0x20CF, UCHAR_BIDI_CLASS, U_EUROPEAN_NUMBER_TERMINATOR },
2663
2664         /* new property in Unicode 6.3 */
2665         { 0x0027, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_NONE },
2666         { 0x0028, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_OPEN },
2667         { 0x0029, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_CLOSE },
2668         { 0xFF5C, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_NONE },
2669         { 0xFF5B, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_OPEN },
2670         { 0xFF5D, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_CLOSE },
2671
2672         { -1, 0x700, 0 }, /* version break for Unicode 7.0 */
2673
2674         /* new character range with Joining_Group values */
2675         { 0x10ABF, UCHAR_JOINING_GROUP, U_JG_NO_JOINING_GROUP },
2676         { 0x10AC0, UCHAR_JOINING_GROUP, U_JG_MANICHAEAN_ALEPH },
2677         { 0x10AC1, UCHAR_JOINING_GROUP, U_JG_MANICHAEAN_BETH },
2678         { 0x10AEF, UCHAR_JOINING_GROUP, U_JG_MANICHAEAN_HUNDRED },
2679         { 0x10AF0, UCHAR_JOINING_GROUP, U_JG_NO_JOINING_GROUP },
2680
2681         /* undefined UProperty values */
2682         { 0x61, 0x4a7, 0 },
2683         { 0x234bc, 0x15ed, 0 }
2684     };
2685
2686     UVersionInfo version;
2687     UChar32 c;
2688     int32_t i, result, uVersion;
2689     UProperty which;
2690
2691     /* what is our Unicode version? */
2692     u_getUnicodeVersion(version);
2693     uVersion=((int32_t)version[0]<<8)|(version[1]<<4)|version[2]; /* major/minor/update version numbers */
2694
2695     u_charAge(0x20, version);
2696     if(version[0]==0) {
2697         /* no additional properties available */
2698         log_err("TestAdditionalProperties: no additional properties available, not tested\n");
2699         return;
2700     }
2701
2702     /* test u_charAge() */
2703     for(i=0; i<UPRV_LENGTHOF(charAges); ++i) {
2704         u_charAge(charAges[i].c, version);
2705         if(0!=memcmp(version, charAges[i].version, sizeof(UVersionInfo))) {
2706             log_err("error: u_charAge(U+%04lx)={ %u, %u, %u, %u } instead of { %u, %u, %u, %u }\n",
2707                 charAges[i].c,
2708                 version[0], version[1], version[2], version[3],
2709                 charAges[i].version[0], charAges[i].version[1], charAges[i].version[2], charAges[i].version[3]);
2710         }
2711     }
2712
2713     if( u_getIntPropertyMinValue(UCHAR_DASH)!=0 ||
2714         u_getIntPropertyMinValue(UCHAR_BIDI_CLASS)!=0 ||
2715         u_getIntPropertyMinValue(UCHAR_BLOCK)!=0 ||   /* j2478 */
2716         u_getIntPropertyMinValue(UCHAR_SCRIPT)!=0 || /*JB#2410*/
2717         u_getIntPropertyMinValue(0x2345)!=0
2718     ) {
2719         log_err("error: u_getIntPropertyMinValue() wrong\n");
2720     }
2721     if( u_getIntPropertyMaxValue(UCHAR_DASH)!=1) {
2722         log_err("error: u_getIntPropertyMaxValue(UCHAR_DASH) wrong\n");
2723     }
2724     if( u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE)!=1) {
2725         log_err("error: u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE) wrong\n");
2726     }
2727     if( u_getIntPropertyMaxValue((UProperty)(UCHAR_BINARY_LIMIT-1))!=1) {
2728         log_err("error: u_getIntPropertyMaxValue(UCHAR_BINARY_LIMIT-1) wrong\n");
2729     }
2730     if( u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)!=(int32_t)U_CHAR_DIRECTION_COUNT-1 ) {
2731         log_err("error: u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS) wrong\n");
2732     }
2733     if( u_getIntPropertyMaxValue(UCHAR_BLOCK)!=(int32_t)UBLOCK_COUNT-1 ) {
2734         log_err("error: u_getIntPropertyMaxValue(UCHAR_BLOCK) wrong\n");
2735     }
2736     if(u_getIntPropertyMaxValue(UCHAR_LINE_BREAK)!=(int32_t)U_LB_COUNT-1) {
2737         log_err("error: u_getIntPropertyMaxValue(UCHAR_LINE_BREAK) wrong\n");
2738     }
2739     if(u_getIntPropertyMaxValue(UCHAR_SCRIPT)!=(int32_t)USCRIPT_CODE_LIMIT-1) {
2740         log_err("error: u_getIntPropertyMaxValue(UCHAR_SCRIPT) wrong\n");
2741     }
2742     if(u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE)!=(int32_t)U_NT_COUNT-1) {
2743         log_err("error: u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE) wrong\n");
2744     }
2745     if(u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY)!=(int32_t)U_CHAR_CATEGORY_COUNT-1) {
2746         log_err("error: u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY) wrong\n");
2747     }
2748     if(u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE)!=(int32_t)U_HST_COUNT-1) {
2749         log_err("error: u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE) wrong\n");
2750     }
2751     if(u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK)!=(int32_t)U_GCB_COUNT-1) {
2752         log_err("error: u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK) wrong\n");
2753     }
2754     if(u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK)!=(int32_t)U_SB_COUNT-1) {
2755         log_err("error: u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK) wrong\n");
2756     }
2757     if(u_getIntPropertyMaxValue(UCHAR_WORD_BREAK)!=(int32_t)U_WB_COUNT-1) {
2758         log_err("error: u_getIntPropertyMaxValue(UCHAR_WORD_BREAK) wrong\n");
2759     }
2760     if(u_getIntPropertyMaxValue(UCHAR_BIDI_PAIRED_BRACKET_TYPE)!=(int32_t)U_BPT_COUNT-1) {
2761         log_err("error: u_getIntPropertyMaxValue(UCHAR_BIDI_PAIRED_BRACKET_TYPE) wrong\n");
2762     }
2763     /*JB#2410*/
2764     if( u_getIntPropertyMaxValue(0x2345)!=-1) {
2765         log_err("error: u_getIntPropertyMaxValue(0x2345) wrong\n");
2766     }
2767     if( u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) != (int32_t) (U_DT_COUNT - 1)) {
2768         log_err("error: u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) wrong\n");
2769     }
2770     if( u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) !=  (int32_t) (U_JG_COUNT -1)) {
2771         log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) wrong\n");
2772     }
2773     if( u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) != (int32_t) (U_JT_COUNT -1)) {
2774         log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) wrong\n");
2775     }
2776     if( u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) != (int32_t) (U_EA_COUNT -1)) {
2777         log_err("error: u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) wrong\n");
2778     }
2779
2780     /* test u_hasBinaryProperty() and u_getIntPropertyValue() */
2781     for(i=0; i<UPRV_LENGTHOF(props); ++i) {
2782         const char *whichName;
2783
2784         if(props[i][0]<0) {
2785             /* Unicode version break */
2786             if(uVersion<props[i][1]) {
2787                 break; /* do not test properties that are not yet supported */
2788             } else {
2789                 continue; /* skip this row */
2790             }
2791         }
2792
2793         c=(UChar32)props[i][0];
2794         which=(UProperty)props[i][1];
2795         whichName=u_getPropertyName(which, U_LONG_PROPERTY_NAME);
2796
2797         if(which<UCHAR_INT_START) {
2798             result=u_hasBinaryProperty(c, which);
2799             if(result!=props[i][2]) {
2800                 log_data_err("error: u_hasBinaryProperty(U+%04lx, %s)=%d is wrong (props[%d]) - (Are you missing data?)\n",
2801                         c, whichName, result, i);
2802             }
2803         }
2804
2805         result=u_getIntPropertyValue(c, which);
2806         if(result!=props[i][2]) {
2807             log_data_err("error: u_getIntPropertyValue(U+%04lx, %s)=%d is wrong, should be %d (props[%d]) - (Are you missing data?)\n",
2808                     c, whichName, result, props[i][2], i);
2809         }
2810
2811         /* test separate functions, too */
2812         switch((UProperty)props[i][1]) {
2813         case UCHAR_ALPHABETIC:
2814             if(u_isUAlphabetic((UChar32)props[i][0])!=(UBool)props[i][2]) {
2815                 log_err("error: u_isUAlphabetic(U+%04lx)=%d is wrong (props[%d])\n",
2816                         props[i][0], result, i);
2817             }
2818             break;
2819         case UCHAR_LOWERCASE:
2820             if(u_isULowercase((UChar32)props[i][0])!=(UBool)props[i][2]) {
2821                 log_err("error: u_isULowercase(U+%04lx)=%d is wrong (props[%d])\n",
2822                         props[i][0], result, i);
2823             }
2824             break;
2825         case UCHAR_UPPERCASE:
2826             if(u_isUUppercase((UChar32)props[i][0])!=(UBool)props[i][2]) {
2827                 log_err("error: u_isUUppercase(U+%04lx)=%d is wrong (props[%d])\n",
2828                         props[i][0], result, i);
2829             }
2830             break;
2831         case UCHAR_WHITE_SPACE:
2832             if(u_isUWhiteSpace((UChar32)props[i][0])!=(UBool)props[i][2]) {
2833                 log_err("error: u_isUWhiteSpace(U+%04lx)=%d is wrong (props[%d])\n",
2834                         props[i][0], result, i);
2835             }
2836             break;
2837         default:
2838             break;
2839         }
2840     }
2841 }
2842
2843 static void
2844 TestNumericProperties(void) {
2845     /* see UnicodeData.txt, DerivedNumericValues.txt */
2846     static const struct {
2847         UChar32 c;
2848         int32_t type;
2849         double numValue;
2850     } values[]={
2851         { 0x0F33, U_NT_NUMERIC, -1./2. },
2852         { 0x0C66, U_NT_DECIMAL, 0 },
2853         { 0x96f6, U_NT_NUMERIC, 0 },
2854         { 0xa833, U_NT_NUMERIC, 1./16. },
2855         { 0x2152, U_NT_NUMERIC, 1./10. },
2856         { 0x2151, U_NT_NUMERIC, 1./9. },
2857         { 0x1245f, U_NT_NUMERIC, 1./8. },
2858         { 0x2150, U_NT_NUMERIC, 1./7. },
2859         { 0x2159, U_NT_NUMERIC, 1./6. },
2860         { 0x09f6, U_NT_NUMERIC, 3./16. },
2861         { 0x2155, U_NT_NUMERIC, 1./5. },
2862         { 0x00BD, U_NT_NUMERIC, 1./2. },
2863         { 0x0031, U_NT_DECIMAL, 1. },
2864         { 0x4e00, U_NT_NUMERIC, 1. },
2865         { 0x58f1, U_NT_NUMERIC, 1. },
2866         { 0x10320, U_NT_NUMERIC, 1. },
2867         { 0x0F2B, U_NT_NUMERIC, 3./2. },
2868         { 0x00B2, U_NT_DIGIT, 2. },
2869         { 0x5f10, U_NT_NUMERIC, 2. },
2870         { 0x1813, U_NT_DECIMAL, 3. },
2871         { 0x5f0e, U_NT_NUMERIC, 3. },
2872         { 0x2173, U_NT_NUMERIC, 4. },
2873         { 0x8086, U_NT_NUMERIC, 4. },
2874         { 0x278E, U_NT_DIGIT, 5. },
2875         { 0x1D7F2, U_NT_DECIMAL, 6. },
2876         { 0x247A, U_NT_DIGIT, 7. },
2877         { 0x7396, U_NT_NUMERIC, 9. },
2878         { 0x1372, U_NT_NUMERIC, 10. },
2879         { 0x216B, U_NT_NUMERIC, 12. },
2880         { 0x16EE, U_NT_NUMERIC, 17. },
2881         { 0x249A, U_NT_NUMERIC, 19. },
2882         { 0x303A, U_NT_NUMERIC, 30. },
2883         { 0x5345, U_NT_NUMERIC, 30. },
2884         { 0x32B2, U_NT_NUMERIC, 37. },
2885         { 0x1375, U_NT_NUMERIC, 40. },
2886         { 0x10323, U_NT_NUMERIC, 50. },
2887         { 0x0BF1, U_NT_NUMERIC, 100. },
2888         { 0x964c, U_NT_NUMERIC, 100. },
2889         { 0x217E, U_NT_NUMERIC, 500. },
2890         { 0x2180, U_NT_NUMERIC, 1000. },
2891         { 0x4edf, U_NT_NUMERIC, 1000. },
2892         { 0x2181, U_NT_NUMERIC, 5000. },
2893         { 0x137C, U_NT_NUMERIC, 10000. },
2894         { 0x4e07, U_NT_NUMERIC, 10000. },
2895         { 0x12432, U_NT_NUMERIC, 216000. },
2896         { 0x12433, U_NT_NUMERIC, 432000. },
2897         { 0x4ebf, U_NT_NUMERIC, 100000000. },
2898         { 0x5146, U_NT_NUMERIC, 1000000000000. },
2899         { -1, U_NT_NONE, U_NO_NUMERIC_VALUE },
2900         { 0x61, U_NT_NONE, U_NO_NUMERIC_VALUE },
2901         { 0x3000, U_NT_NONE, U_NO_NUMERIC_VALUE },
2902         { 0xfffe, U_NT_NONE, U_NO_NUMERIC_VALUE },
2903         { 0x10301, U_NT_NONE, U_NO_NUMERIC_VALUE },
2904         { 0xe0033, U_NT_NONE, U_NO_NUMERIC_VALUE },
2905         { 0x10ffff, U_NT_NONE, U_NO_NUMERIC_VALUE },
2906         { 0x110000, U_NT_NONE, U_NO_NUMERIC_VALUE }
2907     };
2908
2909     double nv;
2910     UChar32 c;
2911     int32_t i, type;
2912
2913     for(i=0; i<UPRV_LENGTHOF(values); ++i) {
2914         c=values[i].c;
2915         type=u_getIntPropertyValue(c, UCHAR_NUMERIC_TYPE);
2916         nv=u_getNumericValue(c);
2917
2918         if(type!=values[i].type) {
2919             log_err("UCHAR_NUMERIC_TYPE(U+%04lx)=%d should be %d\n", c, type, values[i].type);
2920         }
2921         if(0.000001 <= fabs(nv - values[i].numValue)) {
2922             log_err("u_getNumericValue(U+%04lx)=%g should be %g\n", c, nv, values[i].numValue);
2923         }
2924     }
2925 }
2926
2927 /**
2928  * Test the property names and property value names API.
2929  */
2930 static void
2931 TestPropertyNames(void) {
2932     int32_t p, v, choice=0, rev;
2933     UBool atLeastSomething = FALSE;
2934
2935     for (p=0; ; ++p) {
2936         UProperty propEnum = (UProperty)p;
2937         UBool sawProp = FALSE;
2938         if(p > 10 && !atLeastSomething) {
2939           log_data_err("Never got anything after 10 tries.\nYour data is probably fried. Quitting this test\n", p, choice);
2940           return;
2941         }
2942
2943         for (choice=0; ; ++choice) {
2944             const char* name = u_getPropertyName(propEnum, (UPropertyNameChoice)choice);
2945             if (name) {
2946                 if (!sawProp)
2947                     log_verbose("prop 0x%04x+%2d:", p&~0xfff, p&0xfff);
2948                 log_verbose("%d=\"%s\"", choice, name);
2949                 sawProp = TRUE;
2950                 atLeastSomething = TRUE;
2951
2952                 /* test reverse mapping */
2953                 rev = u_getPropertyEnum(name);
2954                 if (rev != p) {
2955                     log_err("Property round-trip failure: %d -> %s -> %d\n",
2956                             p, name, rev);
2957                 }
2958             }
2959             if (!name && choice>0) break;
2960         }
2961         if (sawProp) {
2962             /* looks like a valid property; check the values */
2963             const char* pname = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME);
2964             int32_t max = 0;
2965             if (p == UCHAR_CANONICAL_COMBINING_CLASS) {
2966                 max = 255;
2967             } else if (p == UCHAR_GENERAL_CATEGORY_MASK) {
2968                 /* it's far too slow to iterate all the way up to
2969                    the real max, U_GC_P_MASK */
2970                 max = U_GC_NL_MASK;
2971             } else if (p == UCHAR_BLOCK) {
2972                 /* UBlockCodes, unlike other values, start at 1 */
2973                 max = 1;
2974             }
2975             log_verbose("\n");
2976             for (v=-1; ; ++v) {
2977                 UBool sawValue = FALSE;
2978                 for (choice=0; ; ++choice) {
2979                     const char* vname = u_getPropertyValueName(propEnum, v, (UPropertyNameChoice)choice);
2980                     if (vname) {
2981                         if (!sawValue) log_verbose(" %s, value %d:", pname, v);
2982                         log_verbose("%d=\"%s\"", choice, vname);
2983                         sawValue = TRUE;
2984
2985                         /* test reverse mapping */
2986                         rev = u_getPropertyValueEnum(propEnum, vname);
2987                         if (rev != v) {
2988                             log_err("Value round-trip failure (%s): %d -> %s -> %d\n",
2989                                     pname, v, vname, rev);
2990                         }
2991                     }
2992                     if (!vname && choice>0) break;
2993                 }
2994                 if (sawValue) {
2995                     log_verbose("\n");
2996                 }
2997                 if (!sawValue && v>=max) break;
2998             }
2999         }
3000         if (!sawProp) {
3001             if (p>=UCHAR_STRING_LIMIT) {
3002                 break;
3003             } else if (p>=UCHAR_DOUBLE_LIMIT) {
3004                 p = UCHAR_STRING_START - 1;
3005             } else if (p>=UCHAR_MASK_LIMIT) {
3006                 p = UCHAR_DOUBLE_START - 1;
3007             } else if (p>=UCHAR_INT_LIMIT) {
3008                 p = UCHAR_MASK_START - 1;
3009             } else if (p>=UCHAR_BINARY_LIMIT) {
3010                 p = UCHAR_INT_START - 1;
3011             }
3012         }
3013     }
3014 }
3015
3016 /**
3017  * Test the property values API.  See JB#2410.
3018  */
3019 static void
3020 TestPropertyValues(void) {
3021     int32_t i, p, min, max;
3022     UErrorCode ec;
3023
3024     /* Min should be 0 for everything. */
3025     /* Until JB#2478 is fixed, the one exception is UCHAR_BLOCK. */
3026     for (p=UCHAR_INT_START; p<UCHAR_INT_LIMIT; ++p) {
3027         UProperty propEnum = (UProperty)p;
3028         min = u_getIntPropertyMinValue(propEnum);
3029         if (min != 0) {
3030             if (p == UCHAR_BLOCK) {
3031                 /* This is okay...for now.  See JB#2487.
3032                    TODO Update this for JB#2487. */
3033             } else {
3034                 const char* name;
3035                 name = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME);
3036                 if (name == NULL)
3037                     name = "<ERROR>";
3038                 log_err("FAIL: u_getIntPropertyMinValue(%s) = %d, exp. 0\n",
3039                         name, min);
3040             }
3041         }
3042     }
3043
3044     if( u_getIntPropertyMinValue(UCHAR_GENERAL_CATEGORY_MASK)!=0 ||
3045         u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY_MASK)!=-1) {
3046         log_err("error: u_getIntPropertyMin/MaxValue(UCHAR_GENERAL_CATEGORY_MASK) is wrong\n");
3047     }
3048
3049     /* Max should be -1 for invalid properties. */
3050     max = u_getIntPropertyMaxValue(UCHAR_INVALID_CODE);
3051     if (max != -1) {
3052         log_err("FAIL: u_getIntPropertyMaxValue(-1) = %d, exp. -1\n",
3053                 max);
3054     }
3055
3056     /* Script should return USCRIPT_INVALID_CODE for an invalid code point. */
3057     for (i=0; i<2; ++i) {
3058         int32_t script;
3059         const char* desc;
3060         ec = U_ZERO_ERROR;
3061         switch (i) {
3062         case 0:
3063             script = uscript_getScript(-1, &ec);
3064             desc = "uscript_getScript(-1)";
3065             break;
3066         case 1:
3067             script = u_getIntPropertyValue(-1, UCHAR_SCRIPT);
3068             desc = "u_getIntPropertyValue(-1, UCHAR_SCRIPT)";
3069             break;
3070         default:
3071             log_err("Internal test error. Too many scripts\n");
3072             return;
3073         }
3074         /* We don't explicitly test ec.  It should be U_FAILURE but it
3075            isn't documented as such. */
3076         if (script != (int32_t)USCRIPT_INVALID_CODE) {
3077             log_err("FAIL: %s = %d, exp. 0\n",
3078                     desc, script);
3079         }
3080     }
3081 }
3082
3083 /* various tests for consistency of UCD data and API behavior */
3084 static void
3085 TestConsistency() {
3086     char buffer[300];
3087     USet *set1, *set2, *set3, *set4;
3088     UErrorCode errorCode;
3089
3090     UChar32 start, end;
3091     int32_t i, length;
3092
3093     U_STRING_DECL(hyphenPattern, "[:Hyphen:]", 10);
3094     U_STRING_DECL(dashPattern, "[:Dash:]", 8);
3095     U_STRING_DECL(lowerPattern, "[:Lowercase:]", 13);
3096     U_STRING_DECL(formatPattern, "[:Cf:]", 6);
3097     U_STRING_DECL(alphaPattern, "[:Alphabetic:]", 14);
3098
3099     U_STRING_DECL(mathBlocksPattern,
3100         "[[:block=Mathematical Operators:][:block=Miscellaneous Mathematical Symbols-A:][:block=Miscellaneous Mathematical Symbols-B:][:block=Supplemental Mathematical Operators:][:block=Mathematical Alphanumeric Symbols:]]",
3101         214);
3102     U_STRING_DECL(mathPattern, "[:Math:]", 8);
3103     U_STRING_DECL(unassignedPattern, "[:Cn:]", 6);
3104     U_STRING_DECL(unknownPattern, "[:sc=Unknown:]", 14);
3105     U_STRING_DECL(reservedPattern, "[[:Cn:][:Co:][:Cs:]]", 20);
3106
3107     U_STRING_INIT(hyphenPattern, "[:Hyphen:]", 10);
3108     U_STRING_INIT(dashPattern, "[:Dash:]", 8);
3109     U_STRING_INIT(lowerPattern, "[:Lowercase:]", 13);
3110     U_STRING_INIT(formatPattern, "[:Cf:]", 6);
3111     U_STRING_INIT(alphaPattern, "[:Alphabetic:]", 14);
3112
3113     U_STRING_INIT(mathBlocksPattern,
3114         "[[:block=Mathematical Operators:][:block=Miscellaneous Mathematical Symbols-A:][:block=Miscellaneous Mathematical Symbols-B:][:block=Supplemental Mathematical Operators:][:block=Mathematical Alphanumeric Symbols:]]",
3115         214);
3116     U_STRING_INIT(mathPattern, "[:Math:]", 8);
3117     U_STRING_INIT(unassignedPattern, "[:Cn:]", 6);
3118     U_STRING_INIT(unknownPattern, "[:sc=Unknown:]", 14);
3119     U_STRING_INIT(reservedPattern, "[[:Cn:][:Co:][:Cs:]]", 20);
3120
3121     /*
3122      * It used to be that UCD.html and its precursors said
3123      * "Those dashes used to mark connections between pieces of words,
3124      *  plus the Katakana middle dot."
3125      *
3126      * Unicode 4 changed 00AD Soft Hyphen to Cf and removed it from Dash
3127      * but not from Hyphen.
3128      * UTC 94 (2003mar) decided to leave it that way and to change UCD.html.
3129      * Therefore, do not show errors when testing the Hyphen property.
3130      */
3131     log_verbose("Starting with Unicode 4, inconsistencies with [:Hyphen:] are\n"
3132                 "known to the UTC and not considered errors.\n");
3133
3134     errorCode=U_ZERO_ERROR;
3135     set1=uset_openPattern(hyphenPattern, 10, &errorCode);
3136     set2=uset_openPattern(dashPattern, 8, &errorCode);
3137     if(U_SUCCESS(errorCode)) {
3138         /* remove the Katakana middle dot(s) from set1 */
3139         uset_remove(set1, 0x30fb);
3140         uset_remove(set1, 0xff65); /* halfwidth variant */
3141         showAMinusB(set1, set2, "[:Hyphen:]", "[:Dash:]", FALSE);
3142     } else {
3143         log_data_err("error opening [:Hyphen:] or [:Dash:] - %s (Are you missing data?)\n", u_errorName(errorCode));
3144     }
3145
3146     /* check that Cf is neither Hyphen nor Dash nor Alphabetic */
3147     set3=uset_openPattern(formatPattern, 6, &errorCode);
3148     set4=uset_openPattern(alphaPattern, 14, &errorCode);
3149     if(U_SUCCESS(errorCode)) {
3150         showAIntersectB(set3, set1, "[:Cf:]", "[:Hyphen:]", FALSE);
3151         showAIntersectB(set3, set2, "[:Cf:]", "[:Dash:]", TRUE);
3152         showAIntersectB(set3, set4, "[:Cf:]", "[:Alphabetic:]", TRUE);
3153     } else {
3154         log_data_err("error opening [:Cf:] or [:Alpbabetic:] - %s (Are you missing data?)\n", u_errorName(errorCode));
3155     }
3156
3157     uset_close(set1);
3158     uset_close(set2);
3159     uset_close(set3);
3160     uset_close(set4);
3161
3162     /*
3163      * Check that each lowercase character has "small" in its name
3164      * and not "capital".
3165      * There are some such characters, some of which seem odd.
3166      * Use the verbose flag to see these notices.
3167      */
3168     errorCode=U_ZERO_ERROR;
3169     set1=uset_openPattern(lowerPattern, 13, &errorCode);
3170     if(U_SUCCESS(errorCode)) {
3171         for(i=0;; ++i) {
3172             length=uset_getItem(set1, i, &start, &end, NULL, 0, &errorCode);
3173             if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
3174                 break; /* done */
3175             }
3176             if(U_FAILURE(errorCode)) {
3177                 log_err("error iterating over [:Lowercase:] at item %d: %s\n",
3178                         i, u_errorName(errorCode));
3179                 break;
3180             }
3181             if(length!=0) {
3182                 break; /* done with code points, got a string or -1 */
3183             }
3184
3185             while(start<=end) {
3186                 length=u_charName(start, U_UNICODE_CHAR_NAME, buffer, sizeof(buffer), &errorCode);
3187                 if(U_FAILURE(errorCode)) {
3188                     log_data_err("error getting the name of U+%04x - %s\n", start, u_errorName(errorCode));
3189                     errorCode=U_ZERO_ERROR;
3190                 }
3191                 if( (strstr(buffer, "SMALL")==NULL || strstr(buffer, "CAPITAL")!=NULL) &&
3192                     strstr(buffer, "SMALL CAPITAL")==NULL
3193                 ) {
3194                     log_verbose("info: [:Lowercase:] contains U+%04x whose name does not suggest lowercase: %s\n", start, buffer);
3195                 }
3196                 ++start;
3197             }
3198         }
3199     } else {
3200         log_data_err("error opening [:Lowercase:] - %s (Are you missing data?)\n", u_errorName(errorCode));
3201     }
3202     uset_close(set1);
3203
3204     /* verify that all assigned characters in Math blocks are exactly Math characters */
3205     errorCode=U_ZERO_ERROR;
3206     set1=uset_openPattern(mathBlocksPattern, -1, &errorCode);
3207     set2=uset_openPattern(mathPattern, 8, &errorCode);
3208     set3=uset_openPattern(unassignedPattern, 6, &errorCode);
3209     if(U_SUCCESS(errorCode)) {
3210         uset_retainAll(set2, set1); /* [math blocks]&[:Math:] */
3211         uset_complement(set3);      /* assigned characters */
3212         uset_retainAll(set1, set3); /* [math blocks]&[assigned] */
3213         compareUSets(set1, set2,
3214                      "[assigned Math block chars]", "[math blocks]&[:Math:]",
3215                      TRUE);
3216     } else {
3217         log_data_err("error opening [math blocks] or [:Math:] or [:Cn:] - %s (Are you missing data?)\n", u_errorName(errorCode));
3218     }
3219     uset_close(set1);
3220     uset_close(set2);
3221     uset_close(set3);
3222
3223     /* new in Unicode 5.0: exactly all unassigned+PUA+surrogate code points have script=Unknown */
3224     errorCode=U_ZERO_ERROR;
3225     set1=uset_openPattern(unknownPattern, 14, &errorCode);
3226     set2=uset_openPattern(reservedPattern, 20, &errorCode);
3227     if(U_SUCCESS(errorCode)) {
3228         compareUSets(set1, set2,
3229                      "[:sc=Unknown:]", "[[:Cn:][:Co:][:Cs:]]",
3230                      TRUE);
3231     } else {
3232         log_data_err("error opening [:sc=Unknown:] or [[:Cn:][:Co:][:Cs:]] - %s (Are you missing data?)\n", u_errorName(errorCode));
3233     }
3234     uset_close(set1);
3235     uset_close(set2);
3236 }
3237
3238 /*
3239  * Starting with ICU4C 3.4, the core Unicode properties files
3240  * (uprops.icu, ucase.icu, ubidi.icu, unorm.icu)
3241  * are hardcoded in the common DLL and therefore not included
3242  * in the data package any more.
3243  * Test requiring these files are disabled so that
3244  * we need not jump through hoops (like adding snapshots of these files
3245  * to testdata).
3246  * See Jitterbug 4497.
3247  */
3248 #define HARDCODED_DATA_4497 1
3249
3250 /* API coverage for ubidi_props.c */
3251 static void TestUBiDiProps() {
3252 #if !HARDCODED_DATA_4497
3253     UDataMemory *pData;
3254     UBiDiProps *bdp;
3255     const UBiDiProps *cbdp;
3256     UErrorCode errorCode;
3257
3258     /* coverage for ubidi_openBinary() */
3259     errorCode=U_ZERO_ERROR;
3260     pData=udata_open(NULL, UBIDI_DATA_TYPE, UBIDI_DATA_NAME, &errorCode);
3261     if(U_FAILURE(errorCode)) {
3262         log_data_err("unable to open " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE ": %s\n",
3263                     u_errorName(errorCode));
3264         return;
3265     }
3266
3267     bdp=ubidi_openBinary((const uint8_t *)pData->pHeader, -1, &errorCode);
3268     if(U_FAILURE(errorCode)) {
3269         log_err("ubidi_openBinary() fails for the contents of " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE ": %s\n",
3270                 u_errorName(errorCode));
3271         udata_close(pData);
3272         return;
3273     }
3274
3275     if(0x2215!=ubidi_getMirror(bdp, 0x29F5)) { /* verify some data */
3276         log_err("ubidi_openBinary() does not seem to return working UBiDiProps\n");
3277     }
3278
3279     ubidi_closeProps(bdp);
3280     udata_close(pData);
3281
3282     /* coverage for ubidi_getDummy() */
3283     errorCode=U_ZERO_ERROR;
3284     cbdp=ubidi_getDummy(&errorCode);
3285     if(ubidi_getClass(cbdp, 0x20)!=0) {
3286         log_err("ubidi_getClass(dummy, space)!=0\n");
3287     }
3288 #endif
3289 }
3290
3291 /* test case folding, compare return values with CaseFolding.txt ------------ */
3292
3293 /* bit set for which case foldings for a character have been tested already */
3294 enum {
3295     CF_SIMPLE=1,
3296     CF_FULL=2,
3297     CF_TURKIC=4,
3298     CF_ALL=7
3299 };
3300
3301 static void
3302 testFold(UChar32 c, int which,
3303          UChar32 simple, UChar32 turkic,
3304          const UChar *full, int32_t fullLength,
3305          const UChar *turkicFull, int32_t turkicFullLength) {
3306     UChar s[2], t[32];
3307     UChar32 c2;
3308     int32_t length, length2;
3309
3310     UErrorCode errorCode=U_ZERO_ERROR;
3311
3312     length=0;
3313     U16_APPEND_UNSAFE(s, length, c);
3314
3315     if((which&CF_SIMPLE)!=0 && (c2=u_foldCase(c, 0))!=simple) {
3316         log_err("u_foldCase(U+%04lx, default)=U+%04lx != U+%04lx\n", (long)c, (long)c2, (long)simple);
3317     }
3318     if((which&CF_FULL)!=0) {
3319         length2=u_strFoldCase(t, UPRV_LENGTHOF(t), s, length, 0, &errorCode);
3320         if(length2!=fullLength || 0!=u_memcmp(t, full, fullLength)) {
3321             log_err("u_strFoldCase(U+%04lx, default) does not fold properly\n", (long)c);
3322         }
3323     }
3324     if((which&CF_TURKIC)!=0) {
3325         if((c2=u_foldCase(c, U_FOLD_CASE_EXCLUDE_SPECIAL_I))!=turkic) {
3326             log_err("u_foldCase(U+%04lx, turkic)=U+%04lx != U+%04lx\n", (long)c, (long)c2, (long)simple);
3327         }
3328
3329         length2=u_strFoldCase(t, UPRV_LENGTHOF(t), s, length, U_FOLD_CASE_EXCLUDE_SPECIAL_I, &errorCode);
3330         if(length2!=turkicFullLength || 0!=u_memcmp(t, turkicFull, length2)) {
3331             log_err("u_strFoldCase(U+%04lx, turkic) does not fold properly\n", (long)c);
3332         }
3333     }
3334 }
3335
3336 /* test that c case-folds to itself */
3337 static void
3338 testFoldToSelf(UChar32 c, int which) {
3339     UChar s[2];
3340     int32_t length;
3341
3342     length=0;
3343     U16_APPEND_UNSAFE(s, length, c);
3344     testFold(c, which, c, c, s, length, s, length);
3345 }
3346
3347 struct CaseFoldingData {
3348     USet *notSeen;
3349     UChar32 prev, prevSimple;
3350     UChar prevFull[32];
3351     int32_t prevFullLength;
3352     int which;
3353 };
3354 typedef struct CaseFoldingData CaseFoldingData;
3355
3356 static void U_CALLCONV
3357 caseFoldingLineFn(void *context,
3358                   char *fields[][2], int32_t fieldCount,
3359                   UErrorCode *pErrorCode) {
3360     CaseFoldingData *pData=(CaseFoldingData *)context;
3361     char *end;
3362     UChar full[32];
3363     UChar32 c, prev, simple;
3364     int32_t count;
3365     int which;
3366     char status;
3367
3368     /* get code point */
3369     const char *s=u_skipWhitespace(fields[0][0]);
3370     if(0==strncmp(s, "0000..10FFFF", 12)) {
3371         /*
3372          * Ignore the line
3373          * # @missing: 0000..10FFFF; C; <code point>
3374          * because maps-to-self is already our default, and this line breaks this parser.
3375          */
3376         return;
3377     }
3378     c=(UChar32)strtoul(s, &end, 16);
3379     end=(char *)u_skipWhitespace(end);
3380     if(end<=fields[0][0] || end!=fields[0][1]) {
3381         log_err("syntax error in CaseFolding.txt field 0 at %s\n", fields[0][0]);
3382         *pErrorCode=U_PARSE_ERROR;
3383         return;
3384     }
3385
3386     /* get the status of this mapping */
3387     status=*u_skipWhitespace(fields[1][0]);
3388     if(status!='C' && status!='S' && status!='F' && status!='T') {
3389         log_err("unrecognized status field in CaseFolding.txt at %s\n", fields[0][0]);
3390         *pErrorCode=U_PARSE_ERROR;
3391         return;
3392     }
3393
3394     /* get the mapping */
3395     count=u_parseString(fields[2][0], full, 32, (uint32_t *)&simple, pErrorCode);
3396     if(U_FAILURE(*pErrorCode)) {
3397         log_err("error parsing CaseFolding.txt mapping at %s\n", fields[0][0]);
3398         return;
3399     }
3400
3401     /* there is a simple mapping only if there is exactly one code point (count is in UChars) */
3402     if(count==0 || count>2 || (count==2 && U16_IS_SINGLE(full[1]))) {
3403         simple=c;
3404     }
3405
3406     if(c!=(prev=pData->prev)) {
3407         /*
3408          * Test remaining mappings for the previous code point.
3409          * If a turkic folding was not mentioned, then it should fold the same
3410          * as the regular simple case folding.
3411          */
3412         UChar prevString[2];
3413         int32_t length;
3414
3415         length=0;
3416         U16_APPEND_UNSAFE(prevString, length, prev);
3417         testFold(prev, (~pData->which)&CF_ALL,
3418                  prev, pData->prevSimple,
3419                  prevString, length,
3420                  pData->prevFull, pData->prevFullLength);
3421         pData->prev=pData->prevSimple=c;
3422         length=0;
3423         U16_APPEND_UNSAFE(pData->prevFull, length, c);
3424         pData->prevFullLength=length;
3425         pData->which=0;
3426     }
3427
3428     /*
3429      * Turn the status into a bit set of case foldings to test.
3430      * Remember non-Turkic case foldings as defaults for Turkic mode.
3431      */
3432     switch(status) {
3433     case 'C':
3434         which=CF_SIMPLE|CF_FULL;
3435         pData->prevSimple=simple;
3436         u_memcpy(pData->prevFull, full, count);
3437         pData->prevFullLength=count;
3438         break;
3439     case 'S':
3440         which=CF_SIMPLE;
3441         pData->prevSimple=simple;
3442         break;
3443     case 'F':
3444         which=CF_FULL;
3445         u_memcpy(pData->prevFull, full, count);
3446         pData->prevFullLength=count;
3447         break;
3448     case 'T':
3449         which=CF_TURKIC;
3450         break;
3451     default:
3452         which=0;
3453         break; /* won't happen because of test above */
3454     }
3455
3456     testFold(c, which, simple, simple, full, count, full, count);
3457
3458     /* remember which case foldings of c have been tested */
3459     pData->which|=which;
3460
3461     /* remove c from the set of ones not mentioned in CaseFolding.txt */
3462     uset_remove(pData->notSeen, c);
3463 }
3464
3465 static void
3466 TestCaseFolding() {
3467     CaseFoldingData data={ NULL };
3468     char *fields[3][2];
3469     UErrorCode errorCode;
3470
3471     static char *lastLine= (char *)"10FFFF; C; 10FFFF;";
3472
3473     errorCode=U_ZERO_ERROR;
3474     /* test BMP & plane 1 - nothing interesting above */
3475     data.notSeen=uset_open(0, 0x1ffff);
3476     data.prevFullLength=1; /* length of full case folding of U+0000 */
3477
3478     parseUCDFile("CaseFolding.txt", fields, 3, caseFoldingLineFn, &data, &errorCode);
3479     if(U_SUCCESS(errorCode)) {
3480         int32_t i, start, end;
3481
3482         /* add a pseudo-last line to finish testing of the actual last one */
3483         fields[0][0]=lastLine;
3484         fields[0][1]=lastLine+6;
3485         fields[1][0]=lastLine+7;
3486         fields[1][1]=lastLine+9;
3487         fields[2][0]=lastLine+10;
3488         fields[2][1]=lastLine+17;
3489         caseFoldingLineFn(&data, fields, 3, &errorCode);
3490
3491         /* verify that all code points that are not mentioned in CaseFolding.txt fold to themselves */
3492         for(i=0;
3493             0==uset_getItem(data.notSeen, i, &start, &end, NULL, 0, &errorCode) &&
3494                 U_SUCCESS(errorCode);
3495             ++i
3496         ) {
3497             do {
3498                 testFoldToSelf(start, CF_ALL);
3499             } while(++start<=end);
3500         }
3501     }
3502
3503     uset_close(data.notSeen);
3504 }