icuSources/test/cintltst/cucdtst.c

   1 // © 2016 and later: Unicode, Inc. and others.
   2 // License & terms of use: http://www.unicode.org/copyright.html
   3 /********************************************************************
   4  * COPYRIGHT:
   5  * Copyright (c) 1997-2016, International Business Machines Corporation and
   6  * others. All Rights Reserved.
   7  ********************************************************************/
   8 /*******************************************************************************
   9 *
  10 * File CUCDTST.C
  11 *
  12 * Modification History:
  13 *        Name                     Description
  14 *     Madhu Katragadda            Ported for C API, added tests for string functions
  15 ********************************************************************************
  16 */
  17
  18 #include <string.h>
  19 #include <math.h>
  20 #include <stdlib.h>
  21
  22 #include "unicode/utypes.h"
  23 #include "unicode/uchar.h"
  24 #include "unicode/putil.h"
  25 #include "unicode/ustring.h"
  26 #include "unicode/uloc.h"
  27 #include "unicode/unorm2.h"
  28
  29 #include "cintltst.h"
  30 #include "putilimp.h"
  31 #include "uparse.h"
  32 #include "ucase.h"
  33 #include "ubidi_props.h"
  34 #include "uprops.h"
  35 #include "uset_imp.h"
  36 #include "usc_impl.h"
  37 #include "udatamem.h"
  38 #include "cucdapi.h"
  39 #include "cmemory.h"
  40
  41 /* prototypes --------------------------------------------------------------- */
  42
  43 static void TestUpperLower(void);
  44 static void TestLetterNumber(void);
  45 static void TestMisc(void);
  46 static void TestPOSIX(void);
  47 static void TestControlPrint(void);
  48 static void TestIdentifier(void);
  49 static void TestUnicodeData(void);
  50 static void TestCodeUnit(void);
  51 static void TestCodePoint(void);
  52 static void TestCharLength(void);
  53 static void TestCharNames(void);
  54 static void TestUCharFromNameUnderflow(void);
  55 static void TestMirroring(void);
  56 static void TestUScriptRunAPI(void);
  57 static void TestAdditionalProperties(void);
  58 static void TestNumericProperties(void);
  59 static void TestPropertyNames(void);
  60 static void TestPropertyValues(void);
  61 static void TestConsistency(void);
  62 static void TestUBiDiProps(void);
  63 static void TestCaseFolding(void);
  64
  65 /* internal methods used */
  66 static int32_t MakeProp(char* str);
  67 static int32_t MakeDir(char* str);
  68
  69 /* helpers ------------------------------------------------------------------ */
  70
  71 static void
  72 parseUCDFile(const char *filename,
  73              char *fields[][2], int32_t fieldCount,
  74              UParseLineFn *lineFn, void *context,
  75              UErrorCode *pErrorCode) {
  76     char path[256];
  77     char backupPath[256];
  78
  79     if(U_FAILURE(*pErrorCode)) {
  80         return;
  81     }
  82
  83     /* Look inside ICU_DATA first */
  84     strcpy(path, u_getDataDirectory());
  85     strcat(path, ".." U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING);
  86     strcat(path, filename);
  87
  88     /* As a fallback, try to guess where the source data was located
  89      *    at the time ICU was built, and look there.
  90      */
  91     strcpy(backupPath, ctest_dataSrcDir());
  92     strcat(backupPath, U_FILE_SEP_STRING);
  93     strcat(backupPath, "unidata" U_FILE_SEP_STRING);
  94     strcat(backupPath, filename);
  95
  96     u_parseDelimitedFile(path, ';', fields, fieldCount, lineFn, context, pErrorCode);
  97     if(*pErrorCode==U_FILE_ACCESS_ERROR) {
  98         *pErrorCode=U_ZERO_ERROR;
  99         u_parseDelimitedFile(backupPath, ';', fields, fieldCount, lineFn, context, pErrorCode);
 100     }
 101     if(U_FAILURE(*pErrorCode)) {
 102         log_err_status(*pErrorCode, "error parsing %s: %s\n", filename, u_errorName(*pErrorCode));
 103     }
 104 }
 105
 106 /* test data ---------------------------------------------------------------- */
 107
 108 static const char tagStrings[] = "MnMcMeNdNlNoZsZlZpCcCfCsCoCnLuLlLtLmLoPcPdPsPePoSmScSkSoPiPf";
 109 static const int32_t tagValues[] =
 110     {
 111     /* Mn */ U_NON_SPACING_MARK,
 112     /* Mc */ U_COMBINING_SPACING_MARK,
 113     /* Me */ U_ENCLOSING_MARK,
 114     /* Nd */ U_DECIMAL_DIGIT_NUMBER,
 115     /* Nl */ U_LETTER_NUMBER,
 116     /* No */ U_OTHER_NUMBER,
 117     /* Zs */ U_SPACE_SEPARATOR,
 118     /* Zl */ U_LINE_SEPARATOR,
 119     /* Zp */ U_PARAGRAPH_SEPARATOR,
 120     /* Cc */ U_CONTROL_CHAR,
 121     /* Cf */ U_FORMAT_CHAR,
 122     /* Cs */ U_SURROGATE,
 123     /* Co */ U_PRIVATE_USE_CHAR,
 124     /* Cn */ U_UNASSIGNED,
 125     /* Lu */ U_UPPERCASE_LETTER,
 126     /* Ll */ U_LOWERCASE_LETTER,
 127     /* Lt */ U_TITLECASE_LETTER,
 128     /* Lm */ U_MODIFIER_LETTER,
 129     /* Lo */ U_OTHER_LETTER,
 130     /* Pc */ U_CONNECTOR_PUNCTUATION,
 131     /* Pd */ U_DASH_PUNCTUATION,
 132     /* Ps */ U_START_PUNCTUATION,
 133     /* Pe */ U_END_PUNCTUATION,
 134     /* Po */ U_OTHER_PUNCTUATION,
 135     /* Sm */ U_MATH_SYMBOL,
 136     /* Sc */ U_CURRENCY_SYMBOL,
 137     /* Sk */ U_MODIFIER_SYMBOL,
 138     /* So */ U_OTHER_SYMBOL,
 139     /* Pi */ U_INITIAL_PUNCTUATION,
 140     /* Pf */ U_FINAL_PUNCTUATION
 141     };
 142
 143 static const char dirStrings[][5] = {
 144     "L",
 145     "R",
 146     "EN",
 147     "ES",
 148     "ET",
 149     "AN",
 150     "CS",
 151     "B",
 152     "S",
 153     "WS",
 154     "ON",
 155     "LRE",
 156     "LRO",
 157     "AL",
 158     "RLE",
 159     "RLO",
 160     "PDF",
 161     "NSM",
 162     "BN",
 163     /* new in Unicode 6.3/ICU 52 */
 164     "FSI",
 165     "LRI",
 166     "RLI",
 167     "PDI"
 168 };
 169
 170 void addUnicodeTest(TestNode** root);
 171
 172 void addUnicodeTest(TestNode** root)
 173 {
 174     addTest(root, &TestCodeUnit, "tsutil/cucdtst/TestCodeUnit");
 175     addTest(root, &TestCodePoint, "tsutil/cucdtst/TestCodePoint");
 176     addTest(root, &TestCharLength, "tsutil/cucdtst/TestCharLength");
 177     addTest(root, &TestBinaryValues, "tsutil/cucdtst/TestBinaryValues");
 178     addTest(root, &TestUnicodeData, "tsutil/cucdtst/TestUnicodeData");
 179     addTest(root, &TestAdditionalProperties, "tsutil/cucdtst/TestAdditionalProperties");
 180     addTest(root, &TestNumericProperties, "tsutil/cucdtst/TestNumericProperties");
 181     addTest(root, &TestUpperLower, "tsutil/cucdtst/TestUpperLower");
 182     addTest(root, &TestLetterNumber, "tsutil/cucdtst/TestLetterNumber");
 183     addTest(root, &TestMisc, "tsutil/cucdtst/TestMisc");
 184     addTest(root, &TestPOSIX, "tsutil/cucdtst/TestPOSIX");
 185     addTest(root, &TestControlPrint, "tsutil/cucdtst/TestControlPrint");
 186     addTest(root, &TestIdentifier, "tsutil/cucdtst/TestIdentifier");
 187     addTest(root, &TestCharNames, "tsutil/cucdtst/TestCharNames");
 188     addTest(root, &TestUCharFromNameUnderflow, "tsutil/cucdtst/TestUCharFromNameUnderflow");
 189     addTest(root, &TestMirroring, "tsutil/cucdtst/TestMirroring");
 190     addTest(root, &TestUScriptCodeAPI, "tsutil/cucdtst/TestUScriptCodeAPI");
 191     addTest(root, &TestHasScript, "tsutil/cucdtst/TestHasScript");
 192     addTest(root, &TestGetScriptExtensions, "tsutil/cucdtst/TestGetScriptExtensions");
 193     addTest(root, &TestScriptMetadataAPI, "tsutil/cucdtst/TestScriptMetadataAPI");
 194     addTest(root, &TestUScriptRunAPI, "tsutil/cucdtst/TestUScriptRunAPI");
 195     addTest(root, &TestPropertyNames, "tsutil/cucdtst/TestPropertyNames");
 196     addTest(root, &TestPropertyValues, "tsutil/cucdtst/TestPropertyValues");
 197     addTest(root, &TestConsistency, "tsutil/cucdtst/TestConsistency");
 198     addTest(root, &TestUBiDiProps, "tsutil/cucdtst/TestUBiDiProps");
 199     addTest(root, &TestCaseFolding, "tsutil/cucdtst/TestCaseFolding");
 200 }
 201
 202 /*==================================================== */
 203 /* test u_toupper() and u_tolower()                    */
 204 /*==================================================== */
 205 static void TestUpperLower()
 206 {
 207     const UChar upper[] = {0x41, 0x42, 0x00b2, 0x01c4, 0x01c6, 0x01c9, 0x01c8, 0x01c9, 0x000c, 0x0000};
 208     const UChar lower[] = {0x61, 0x62, 0x00b2, 0x01c6, 0x01c6, 0x01c9, 0x01c9, 0x01c9, 0x000c, 0x0000};
 209     U_STRING_DECL(upperTest, "abcdefg123hij.?:klmno", 21);
 210     U_STRING_DECL(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);
 211     int32_t i;
 212
 213     U_STRING_INIT(upperTest, "abcdefg123hij.?:klmno", 21);
 214     U_STRING_INIT(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);
 215
 216 /*
 217 Checks LetterLike Symbols which were previously a source of confusion
 218 [Bertrand A. D. 02/04/98]
 219 */
 220     for (i=0x2100;i<0x2138;i++)
 221     {
 222         /* Unicode 5.0 adds lowercase U+214E (TURNED SMALL F) to U+2132 (TURNED CAPITAL F) */
 223         if(i!=0x2126 && i!=0x212a && i!=0x212b && i!=0x2132)
 224         {
 225             if (i != (int)u_tolower(i)) /* itself */
 226                 log_err("Failed case conversion with itself: U+%04x\n", i);
 227             if (i != (int)u_toupper(i))
 228                 log_err("Failed case conversion with itself: U+%04x\n", i);
 229         }
 230     }
 231
 232     for(i=0; i < u_strlen(upper); i++){
 233         if(u_tolower(upper[i]) != lower[i]){
 234             log_err("FAILED u_tolower() for %lx Expected %lx Got %lx\n", upper[i], lower[i], u_tolower(upper[i]));
 235         }
 236     }
 237
 238     log_verbose("testing upper lower\n");
 239     for (i = 0; i < 21; i++) {
 240
 241         if (u_isalpha(upperTest[i]) && !u_islower(upperTest[i]))
 242         {
 243             log_err("Failed isLowerCase test at  %c\n", upperTest[i]);
 244         }
 245         else if (u_isalpha(lowerTest[i]) && !u_isupper(lowerTest[i]))
 246          {
 247             log_err("Failed isUpperCase test at %c\n", lowerTest[i]);
 248         }
 249         else if (upperTest[i] != u_tolower(lowerTest[i]))
 250         {
 251             log_err("Failed case conversion from %c  To %c :\n", lowerTest[i], upperTest[i]);
 252         }
 253         else if (lowerTest[i] != u_toupper(upperTest[i]))
 254          {
 255             log_err("Failed case conversion : %c To %c \n", upperTest[i], lowerTest[i]);
 256         }
 257         else if (upperTest[i] != u_tolower(upperTest[i]))
 258         {
 259             log_err("Failed case conversion with itself: %c\n", upperTest[i]);
 260         }
 261         else if (lowerTest[i] != u_toupper(lowerTest[i]))
 262         {
 263             log_err("Failed case conversion with itself: %c\n", lowerTest[i]);
 264         }
 265     }
 266     log_verbose("done testing upper lower\n");
 267
 268     log_verbose("testing u_istitle\n");
 269     {
 270         static const UChar expected[] = {
 271             0x1F88,
 272             0x1F89,
 273             0x1F8A,
 274             0x1F8B,
 275             0x1F8C,
 276             0x1F8D,
 277             0x1F8E,
 278             0x1F8F,
 279             0x1F88,
 280             0x1F89,
 281             0x1F8A,
 282             0x1F8B,
 283             0x1F8C,
 284             0x1F8D,
 285             0x1F8E,
 286             0x1F8F,
 287             0x1F98,
 288             0x1F99,
 289             0x1F9A,
 290             0x1F9B,
 291             0x1F9C,
 292             0x1F9D,
 293             0x1F9E,
 294             0x1F9F,
 295             0x1F98,
 296             0x1F99,
 297             0x1F9A,
 298             0x1F9B,
 299             0x1F9C,
 300             0x1F9D,
 301             0x1F9E,
 302             0x1F9F,
 303             0x1FA8,
 304             0x1FA9,
 305             0x1FAA,
 306             0x1FAB,
 307             0x1FAC,
 308             0x1FAD,
 309             0x1FAE,
 310             0x1FAF,
 311             0x1FA8,
 312             0x1FA9,
 313             0x1FAA,
 314             0x1FAB,
 315             0x1FAC,
 316             0x1FAD,
 317             0x1FAE,
 318             0x1FAF,
 319             0x1FBC,
 320             0x1FBC,
 321             0x1FCC,
 322             0x1FCC,
 323             0x1FFC,
 324             0x1FFC,
 325         };
 326         int32_t num = UPRV_LENGTHOF(expected);
 327         for(i=0; i<num; i++){
 328             if(!u_istitle(expected[i])){
 329                 log_err("u_istitle failed for 0x%4X. Expected TRUE, got FALSE\n",expected[i]);
 330             }
 331         }
 332
 333     }
 334 }
 335
 336 /* compare two sets and verify that their difference or intersection is empty */
 337 static UBool
 338 showADiffB(const USet *a, const USet *b,
 339            const char *a_name, const char *b_name,
 340            UBool expect, UBool diffIsError) {
 341     USet *aa;
 342     int32_t i, start, end, length;
 343     UErrorCode errorCode;
 344
 345     /*
 346      * expect:
 347      * TRUE  -> a-b should be empty, that is, b should contain all of a
 348      * FALSE -> a&b should be empty, that is, a should contain none of b (and vice versa)
 349      */
 350     if(expect ? uset_containsAll(b, a) : uset_containsNone(a, b)) {
 351         return TRUE;
 352     }
 353
 354     /* clone a to aa because a is const */
 355     aa=uset_open(1, 0);
 356     if(aa==NULL) {
 357         /* unusual problem - out of memory? */
 358         return FALSE;
 359     }
 360     uset_addAll(aa, a);
 361
 362     /* compute the set in question */
 363     if(expect) {
 364         /* a-b */
 365         uset_removeAll(aa, b);
 366     } else {
 367         /* a&b */
 368         uset_retainAll(aa, b);
 369     }
 370
 371     /* aa is not empty because of the initial tests above; show its contents */
 372     errorCode=U_ZERO_ERROR;
 373     i=0;
 374     for(;;) {
 375         length=uset_getItem(aa, i, &start, &end, NULL, 0, &errorCode);
 376         if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
 377             break; /* done */
 378         }
 379         if(U_FAILURE(errorCode)) {
 380             log_err("error comparing %s with %s at difference item %d: %s\n",
 381                 a_name, b_name, i, u_errorName(errorCode));
 382             break;
 383         }
 384         if(length!=0) {
 385             break; /* done with code points, got a string or -1 */
 386         }
 387
 388         if(diffIsError) {
 389             if(expect) {
 390                 log_err("error: %s contains U+%04x..U+%04x but %s does not\n", a_name, start, end, b_name);
 391             } else {
 392                 log_err("error: %s and %s both contain U+%04x..U+%04x but should not intersect\n", a_name, b_name, start, end);
 393             }
 394         } else {
 395             if(expect) {
 396                 log_verbose("info: %s contains U+%04x..U+%04x but %s does not\n", a_name, start, end, b_name);
 397             } else {
 398                 log_verbose("info: %s and %s both contain U+%04x..U+%04x but should not intersect\n", a_name, b_name, start, end);
 399             }
 400         }
 401
 402         ++i;
 403     }
 404
 405     uset_close(aa);
 406     return FALSE;
 407 }
 408
 409 static UBool
 410 showAMinusB(const USet *a, const USet *b,
 411             const char *a_name, const char *b_name,
 412             UBool diffIsError) {
 413     return showADiffB(a, b, a_name, b_name, TRUE, diffIsError);
 414 }
 415
 416 static UBool
 417 showAIntersectB(const USet *a, const USet *b,
 418                 const char *a_name, const char *b_name,
 419                 UBool diffIsError) {
 420     return showADiffB(a, b, a_name, b_name, FALSE, diffIsError);
 421 }
 422
 423 static UBool
 424 compareUSets(const USet *a, const USet *b,
 425              const char *a_name, const char *b_name,
 426              UBool diffIsError) {
 427     /*
 428      * Use an arithmetic & not a logical && so that both branches
 429      * are always taken and all differences are shown.
 430      */
 431     return
 432         showAMinusB(a, b, a_name, b_name, diffIsError) &
 433         showAMinusB(b, a, b_name, a_name, diffIsError);
 434 }
 435
 436 /* test isLetter(u_isapha()) and isDigit(u_isdigit()) */
 437 static void TestLetterNumber()
 438 {
 439     UChar i = 0x0000;
 440
 441     log_verbose("Testing for isalpha\n");
 442     for (i = 0x0041; i < 0x005B; i++) {
 443         if (!u_isalpha(i))
 444         {
 445             log_err("Failed isLetter test at  %.4X\n", i);
 446         }
 447     }
 448     for (i = 0x0660; i < 0x066A; i++) {
 449         if (u_isalpha(i))
 450         {
 451             log_err("Failed isLetter test with numbers at %.4X\n", i);
 452         }
 453     }
 454
 455     log_verbose("Testing for isdigit\n");
 456     for (i = 0x0660; i < 0x066A; i++) {
 457         if (!u_isdigit(i))
 458         {
 459             log_verbose("Failed isNumber test at %.4X\n", i);
 460         }
 461     }
 462
 463     log_verbose("Testing for isalnum\n");
 464     for (i = 0x0041; i < 0x005B; i++) {
 465         if (!u_isalnum(i))
 466         {
 467             log_err("Failed isAlNum test at  %.4X\n", i);
 468         }
 469     }
 470     for (i = 0x0660; i < 0x066A; i++) {
 471         if (!u_isalnum(i))
 472         {
 473             log_err("Failed isAlNum test at  %.4X\n", i);
 474         }
 475     }
 476
 477     {
 478         /*
 479          * The following checks work only starting from Unicode 4.0.
 480          * Check the version number here.
 481          */
 482         static UVersionInfo u401={ 4, 0, 1, 0 };
 483         UVersionInfo version;
 484         u_getUnicodeVersion(version);
 485         if(version[0]<4 || 0==memcmp(version, u401, 4)) {
 486             return;
 487         }
 488     }
 489
 490     {
 491         /*
 492          * Sanity check:
 493          * Verify that exactly the digit characters have decimal digit values.
 494          * This assumption is used in the implementation of u_digit()
 495          * (which checks nt=de)
 496          * compared with the parallel java.lang.Character.digit()
 497          * (which checks Nd).
 498          *
 499          * This was not true in Unicode 3.2 and earlier.
 500          * Unicode 4.0 fixed discrepancies.
 501          * Unicode 4.0.1 re-introduced problems in this area due to an
 502          * unintentionally incomplete last-minute change.
 503          */
 504         U_STRING_DECL(digitsPattern, "[:Nd:]", 6);
 505         U_STRING_DECL(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24);
 506
 507         USet *digits, *decimalValues;
 508         UErrorCode errorCode;
 509
 510         U_STRING_INIT(digitsPattern, "[:Nd:]", 6);
 511         U_STRING_INIT(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24);
 512         errorCode=U_ZERO_ERROR;
 513         digits=uset_openPattern(digitsPattern, 6, &errorCode);
 514         decimalValues=uset_openPattern(decimalValuesPattern, 24, &errorCode);
 515
 516         if(U_SUCCESS(errorCode)) {
 517             compareUSets(digits, decimalValues, "[:Nd:]", "[:Numeric_Type=Decimal:]", TRUE);
 518         }
 519
 520         uset_close(digits);
 521         uset_close(decimalValues);
 522     }
 523 }
 524
 525 static void testSampleCharProps(UBool propFn(UChar32), const char *propName,
 526                                 const UChar32 *sampleChars, int32_t sampleCharsLength,
 527                                 UBool expected) {
 528     int32_t i;
 529     for (i = 0; i < sampleCharsLength; ++i) {
 530         UBool result = propFn(sampleChars[i]);
 531         if (result != expected) {
 532             log_err("error: character property function %s(U+%04x)=%d is wrong\n",
 533                     propName, sampleChars[i], result);
 534         }
 535     }
 536 }
 537
 538 /* Tests for isDefined(u_isdefined)(, isBaseForm(u_isbase()), isSpaceChar(u_isspace()), isWhiteSpace(), u_CharDigitValue() */
 539 static void TestMisc()
 540 {
 541     static const UChar32 sampleSpaces[] = {0x0020, 0x00a0, 0x2000, 0x2001, 0x2005};
 542     static const UChar32 sampleNonSpaces[] = {0x61, 0x62, 0x63, 0x64, 0x74};
 543     static const UChar32 sampleUndefined[] = {0xfff1, 0xfff7, 0xfa6e};
 544     static const UChar32 sampleDefined[] = {0x523E, 0x4f88, 0xfffd};
 545     static const UChar32 sampleBase[] = {0x0061, 0x0031, 0x03d2};
 546     static const UChar32 sampleNonBase[] = {0x002B, 0x0020, 0x203B};
 547 /*    static const UChar sampleChars[] = {0x000a, 0x0045, 0x4e00, 0xDC00, 0xFFE8, 0xFFF0};*/
 548     static const UChar32 sampleDigits[]= {0x0030, 0x0662, 0x0F23, 0x0ED5};
 549     static const UChar32 sampleNonDigits[] = {0x0010, 0x0041, 0x0122, 0x68FE};
 550     static const UChar32 sampleWhiteSpaces[] = {0x2008, 0x2009, 0x200a, 0x001c, 0x000c};
 551     static const UChar32 sampleNonWhiteSpaces[] = {0x61, 0x62, 0x3c, 0x28, 0x3f, 0x85, 0x2007, 0xffef};
 552
 553     static const int32_t sampleDigitValues[] = {0, 2, 3, 5};
 554
 555     uint32_t mask;
 556
 557     int32_t i;
 558     char icuVersion[U_MAX_VERSION_STRING_LENGTH];
 559     UVersionInfo realVersion;
 560
 561     memset(icuVersion, 0, U_MAX_VERSION_STRING_LENGTH);
 562
 563     testSampleCharProps(u_isspace, "u_isspace", sampleSpaces, UPRV_LENGTHOF(sampleSpaces), TRUE);
 564     testSampleCharProps(u_isspace, "u_isspace", sampleNonSpaces, UPRV_LENGTHOF(sampleNonSpaces), FALSE);
 565
 566     testSampleCharProps(u_isJavaSpaceChar, "u_isJavaSpaceChar",
 567                         sampleSpaces, UPRV_LENGTHOF(sampleSpaces), TRUE);
 568     testSampleCharProps(u_isJavaSpaceChar, "u_isJavaSpaceChar",
 569                         sampleNonSpaces, UPRV_LENGTHOF(sampleNonSpaces), FALSE);
 570
 571     testSampleCharProps(u_isWhitespace, "u_isWhitespace",
 572                         sampleWhiteSpaces, UPRV_LENGTHOF(sampleWhiteSpaces), TRUE);
 573     testSampleCharProps(u_isWhitespace, "u_isWhitespace",
 574                         sampleNonWhiteSpaces, UPRV_LENGTHOF(sampleNonWhiteSpaces), FALSE);
 575
 576     testSampleCharProps(u_isdefined, "u_isdefined",
 577                         sampleDefined, UPRV_LENGTHOF(sampleDefined), TRUE);
 578     testSampleCharProps(u_isdefined, "u_isdefined",
 579                         sampleUndefined, UPRV_LENGTHOF(sampleUndefined), FALSE);
 580
 581     testSampleCharProps(u_isbase, "u_isbase", sampleBase, UPRV_LENGTHOF(sampleBase), TRUE);
 582     testSampleCharProps(u_isbase, "u_isbase", sampleNonBase, UPRV_LENGTHOF(sampleNonBase), FALSE);
 583
 584     testSampleCharProps(u_isdigit, "u_isdigit", sampleDigits, UPRV_LENGTHOF(sampleDigits), TRUE);
 585     testSampleCharProps(u_isdigit, "u_isdigit", sampleNonDigits, UPRV_LENGTHOF(sampleNonDigits), FALSE);
 586
 587     for (i = 0; i < UPRV_LENGTHOF(sampleDigits); i++) {
 588         if (u_charDigitValue(sampleDigits[i]) != sampleDigitValues[i]) {
 589             log_err("error: u_charDigitValue(U+04x)=%d != %d\n",
 590                     sampleDigits[i], u_charDigitValue(sampleDigits[i]), sampleDigitValues[i]);
 591         }
 592     }
 593
 594     /* Tests the ICU version #*/
 595     u_getVersion(realVersion);
 596     u_versionToString(realVersion, icuVersion);
 597     if (strncmp(icuVersion, U_ICU_VERSION, uprv_min((int32_t)strlen(icuVersion), (int32_t)strlen(U_ICU_VERSION))) != 0)
 598     {
 599         log_err("ICU version test failed. Header says=%s, got=%s \n", U_ICU_VERSION, icuVersion);
 600     }
 601 #if defined(ICU_VERSION)
 602     /* test only happens where we have configure.in with VERSION - sanity check. */
 603     if(strcmp(U_ICU_VERSION, ICU_VERSION))
 604     {
 605         log_err("ICU version mismatch: Header says %s, build environment says %s.\n",  U_ICU_VERSION, ICU_VERSION);
 606     }
 607 #endif
 608
 609     /* test U_GC_... */
 610     if(
 611         U_GET_GC_MASK(0x41)!=U_GC_LU_MASK ||
 612         U_GET_GC_MASK(0x662)!=U_GC_ND_MASK ||
 613         U_GET_GC_MASK(0xa0)!=U_GC_ZS_MASK ||
 614         U_GET_GC_MASK(0x28)!=U_GC_PS_MASK ||
 615         U_GET_GC_MASK(0x2044)!=U_GC_SM_MASK ||
 616         U_GET_GC_MASK(0xe0063)!=U_GC_CF_MASK
 617     ) {
 618         log_err("error: U_GET_GC_MASK does not work properly\n");
 619     }
 620
 621     mask=0;
 622     mask=(mask&~U_GC_CN_MASK)|U_GC_CN_MASK;
 623
 624     mask=(mask&~U_GC_LU_MASK)|U_GC_LU_MASK;
 625     mask=(mask&~U_GC_LL_MASK)|U_GC_LL_MASK;
 626     mask=(mask&~U_GC_LT_MASK)|U_GC_LT_MASK;
 627     mask=(mask&~U_GC_LM_MASK)|U_GC_LM_MASK;
 628     mask=(mask&~U_GC_LO_MASK)|U_GC_LO_MASK;
 629
 630     mask=(mask&~U_GC_MN_MASK)|U_GC_MN_MASK;
 631     mask=(mask&~U_GC_ME_MASK)|U_GC_ME_MASK;
 632     mask=(mask&~U_GC_MC_MASK)|U_GC_MC_MASK;
 633
 634     mask=(mask&~U_GC_ND_MASK)|U_GC_ND_MASK;
 635     mask=(mask&~U_GC_NL_MASK)|U_GC_NL_MASK;
 636     mask=(mask&~U_GC_NO_MASK)|U_GC_NO_MASK;
 637
 638     mask=(mask&~U_GC_ZS_MASK)|U_GC_ZS_MASK;
 639     mask=(mask&~U_GC_ZL_MASK)|U_GC_ZL_MASK;
 640     mask=(mask&~U_GC_ZP_MASK)|U_GC_ZP_MASK;
 641
 642     mask=(mask&~U_GC_CC_MASK)|U_GC_CC_MASK;
 643     mask=(mask&~U_GC_CF_MASK)|U_GC_CF_MASK;
 644     mask=(mask&~U_GC_CO_MASK)|U_GC_CO_MASK;
 645     mask=(mask&~U_GC_CS_MASK)|U_GC_CS_MASK;
 646
 647     mask=(mask&~U_GC_PD_MASK)|U_GC_PD_MASK;
 648     mask=(mask&~U_GC_PS_MASK)|U_GC_PS_MASK;
 649     mask=(mask&~U_GC_PE_MASK)|U_GC_PE_MASK;
 650     mask=(mask&~U_GC_PC_MASK)|U_GC_PC_MASK;
 651     mask=(mask&~U_GC_PO_MASK)|U_GC_PO_MASK;
 652
 653     mask=(mask&~U_GC_SM_MASK)|U_GC_SM_MASK;
 654     mask=(mask&~U_GC_SC_MASK)|U_GC_SC_MASK;
 655     mask=(mask&~U_GC_SK_MASK)|U_GC_SK_MASK;
 656     mask=(mask&~U_GC_SO_MASK)|U_GC_SO_MASK;
 657
 658     mask=(mask&~U_GC_PI_MASK)|U_GC_PI_MASK;
 659     mask=(mask&~U_GC_PF_MASK)|U_GC_PF_MASK;
 660
 661     if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffffffff)) {
 662         log_err("error: problems with U_GC_XX_MASK constants\n");
 663     }
 664
 665     mask=0;
 666     mask=(mask&~U_GC_C_MASK)|U_GC_C_MASK;
 667     mask=(mask&~U_GC_L_MASK)|U_GC_L_MASK;
 668     mask=(mask&~U_GC_M_MASK)|U_GC_M_MASK;
 669     mask=(mask&~U_GC_N_MASK)|U_GC_N_MASK;
 670     mask=(mask&~U_GC_Z_MASK)|U_GC_Z_MASK;
 671     mask=(mask&~U_GC_P_MASK)|U_GC_P_MASK;
 672     mask=(mask&~U_GC_S_MASK)|U_GC_S_MASK;
 673
 674     if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffffffff)) {
 675         log_err("error: problems with U_GC_Y_MASK constants\n");
 676     }
 677     {
 678         static const UChar32 digit[10]={ 0x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037,0x0038,0x0039 };
 679         for(i=0; i<10; i++){
 680             if(digit[i]!=u_forDigit(i,10)){
 681                 log_err("u_forDigit failed for %i. Expected: 0x%4X Got: 0x%4X\n",i,digit[i],u_forDigit(i,10));
 682             }
 683         }
 684     }
 685
 686     /* test u_digit() */
 687     {
 688         static const struct {
 689             UChar32 c;
 690             int8_t radix, value;
 691         } data[]={
 692             /* base 16 */
 693             { 0x0031, 16, 1 },
 694             { 0x0038, 16, 8 },
 695             { 0x0043, 16, 12 },
 696             { 0x0066, 16, 15 },
 697             { 0x00e4, 16, -1 },
 698             { 0x0662, 16, 2 },
 699             { 0x06f5, 16, 5 },
 700             { 0xff13, 16, 3 },
 701             { 0xff41, 16, 10 },
 702
 703             /* base 8 */
 704             { 0x0031, 8, 1 },
 705             { 0x0038, 8, -1 },
 706             { 0x0043, 8, -1 },
 707             { 0x0066, 8, -1 },
 708             { 0x00e4, 8, -1 },
 709             { 0x0662, 8, 2 },
 710             { 0x06f5, 8, 5 },
 711             { 0xff13, 8, 3 },
 712             { 0xff41, 8, -1 },
 713
 714             /* base 36 */
 715             { 0x5a, 36, 35 },
 716             { 0x7a, 36, 35 },
 717             { 0xff3a, 36, 35 },
 718             { 0xff5a, 36, 35 },
 719
 720             /* wrong radix values */
 721             { 0x0031, 1, -1 },
 722             { 0xff3a, 37, -1 }
 723         };
 724
 725         for(i=0; i<UPRV_LENGTHOF(data); ++i) {
 726             if(u_digit(data[i].c, data[i].radix)!=data[i].value) {
 727                 log_err("u_digit(U+%04x, %d)=%d expected %d\n",
 728                         data[i].c,
 729                         data[i].radix,
 730                         u_digit(data[i].c, data[i].radix),
 731                         data[i].value);
 732             }
 733         }
 734     }
 735 }
 736
 737 /* test C/POSIX-style functions --------------------------------------------- */
 738
 739 /* bit flags */
 740 #define ISAL     1
 741 #define ISLO     2
 742 #define ISUP     4
 743
 744 #define ISDI     8
 745 #define ISXD  0x10
 746
 747 #define ISAN  0x20
 748
 749 #define ISPU  0x40
 750 #define ISGR  0x80
 751 #define ISPR 0x100
 752
 753 #define ISSP 0x200
 754 #define ISBL 0x400
 755 #define ISCN 0x800
 756
 757 /* C/POSIX-style functions, in the same order as the bit flags */
 758 typedef UBool U_EXPORT2 IsPOSIXClass(UChar32 c);
 759
 760 static const struct {
 761     IsPOSIXClass *fn;
 762     const char *name;
 763 } posixClasses[]={
 764     { u_isalpha, "isalpha" },
 765     { u_islower, "islower" },
 766     { u_isupper, "isupper" },
 767     { u_isdigit, "isdigit" },
 768     { u_isxdigit, "isxdigit" },
 769     { u_isalnum, "isalnum" },
 770     { u_ispunct, "ispunct" },
 771     { u_isgraph, "isgraph" },
 772     { u_isprint, "isprint" },
 773     { u_isspace, "isspace" },
 774     { u_isblank, "isblank" },
 775     { u_iscntrl, "iscntrl" }
 776 };
 777
 778 static const struct {
 779     UChar32 c;
 780     uint32_t posixResults;
 781 } posixData[]={
 782     { 0x0008,                                                        ISCN },    /* backspace */
 783     { 0x0009,                                              ISSP|ISBL|ISCN },    /* TAB */
 784     { 0x000a,                                              ISSP|     ISCN },    /* LF */
 785     { 0x000c,                                              ISSP|     ISCN },    /* FF */
 786     { 0x000d,                                              ISSP|     ISCN },    /* CR */
 787     { 0x0020,                                         ISPR|ISSP|ISBL      },    /* space */
 788     { 0x0021,                               ISPU|ISGR|ISPR                },    /* ! */
 789     { 0x0033,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* 3 */
 790     { 0x0040,                               ISPU|ISGR|ISPR                },    /* @ */
 791     { 0x0041, ISAL|     ISUP|     ISXD|ISAN|     ISGR|ISPR                },    /* A */
 792     { 0x007a, ISAL|ISLO|               ISAN|     ISGR|ISPR                },    /* z */
 793     { 0x007b,                               ISPU|ISGR|ISPR                },    /* { */
 794     { 0x0085,                                              ISSP|     ISCN },    /* NEL */
 795     { 0x00a0,                                         ISPR|ISSP|ISBL      },    /* NBSP */
 796     { 0x00a4,                                    ISGR|ISPR                },    /* currency sign */
 797     { 0x00e4, ISAL|ISLO|               ISAN|     ISGR|ISPR                },    /* a-umlaut */
 798     { 0x0300,                                    ISGR|ISPR                },    /* combining grave */
 799     { 0x0600,                                                        ISCN },    /* arabic number sign */
 800     { 0x0627, ISAL|                    ISAN|     ISGR|ISPR                },    /* alef */
 801     { 0x0663,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* arabic 3 */
 802     { 0x2002,                                         ISPR|ISSP|ISBL      },    /* en space */
 803     { 0x2007,                                         ISPR|ISSP|ISBL      },    /* figure space */
 804     { 0x2009,                                         ISPR|ISSP|ISBL      },    /* thin space */
 805     { 0x200b,                                                        ISCN },    /* ZWSP */
 806   /*{ 0x200b,                                         ISPR|ISSP           },*/    /* ZWSP */ /* ZWSP became a control char in 4.0.1*/
 807     { 0x200e,                                                        ISCN },    /* LRM */
 808     { 0x2028,                                         ISPR|ISSP|     ISCN },    /* LS */
 809     { 0x2029,                                         ISPR|ISSP|     ISCN },    /* PS */
 810     { 0x20ac,                                    ISGR|ISPR                },    /* Euro */
 811     { 0xff15,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth 5 */
 812     { 0xff25, ISAL|     ISUP|     ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth E */
 813     { 0xff35, ISAL|     ISUP|          ISAN|     ISGR|ISPR                },    /* fullwidth U */
 814     { 0xff45, ISAL|ISLO|          ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth e */
 815     { 0xff55, ISAL|ISLO|               ISAN|     ISGR|ISPR                }     /* fullwidth u */
 816 };
 817
 818 static void
 819 TestPOSIX() {
 820     uint32_t mask;
 821     int32_t cl, i;
 822     UBool expect;
 823
 824     mask=1;
 825     for(cl=0; cl<12; ++cl) {
 826         for(i=0; i<UPRV_LENGTHOF(posixData); ++i) {
 827             expect=(UBool)((posixData[i].posixResults&mask)!=0);
 828             if(posixClasses[cl].fn(posixData[i].c)!=expect) {
 829                 log_err("u_%s(U+%04x)=%s is wrong\n",
 830                     posixClasses[cl].name, posixData[i].c, expect ? "FALSE" : "TRUE");
 831             }
 832         }
 833         mask<<=1;
 834     }
 835 }
 836
 837 /* Tests for isControl(u_iscntrl()) and isPrintable(u_isprint()) */
 838 static void TestControlPrint()
 839 {
 840     const UChar32 sampleControl[] = {0x1b, 0x97, 0x82, 0x2028, 0x2029, 0x200c, 0x202b};
 841     const UChar32 sampleNonControl[] = {0x61, 0x0031, 0x00e2};
 842     const UChar32 samplePrintable[] = {0x0042, 0x005f, 0x2014};
 843     const UChar32 sampleNonPrintable[] = {0x200c, 0x009f, 0x001b};
 844     UChar32 c;
 845
 846     testSampleCharProps(u_iscntrl, "u_iscntrl", sampleControl, UPRV_LENGTHOF(sampleControl), TRUE);
 847     testSampleCharProps(u_iscntrl, "u_iscntrl", sampleNonControl, UPRV_LENGTHOF(sampleNonControl), FALSE);
 848
 849     testSampleCharProps(u_isprint, "u_isprint",
 850                         samplePrintable, UPRV_LENGTHOF(samplePrintable), TRUE);
 851     testSampleCharProps(u_isprint, "u_isprint",
 852                         sampleNonPrintable, UPRV_LENGTHOF(sampleNonPrintable), FALSE);
 853
 854     /* test all ISO 8 controls */
 855     for(c=0; c<=0x9f; ++c) {
 856         if(c==0x20) {
 857             /* skip ASCII graphic characters and continue with DEL */
 858             c=0x7f;
 859         }
 860         if(!u_iscntrl(c)) {
 861             log_err("error: u_iscntrl(ISO 8 control U+%04x)=FALSE\n", c);
 862         }
 863         if(!u_isISOControl(c)) {
 864             log_err("error: u_isISOControl(ISO 8 control U+%04x)=FALSE\n", c);
 865         }
 866         if(u_isprint(c)) {
 867             log_err("error: u_isprint(ISO 8 control U+%04x)=TRUE\n", c);
 868         }
 869     }
 870
 871     /* test all Latin-1 graphic characters */
 872     for(c=0x20; c<=0xff; ++c) {
 873         if(c==0x7f) {
 874             c=0xa0;
 875         } else if(c==0xad) {
 876             /* Unicode 4 changes 00AD Soft Hyphen to Cf (and it is in fact not printable) */
 877             ++c;
 878         }
 879         if(!u_isprint(c)) {
 880             log_err("error: u_isprint(Latin-1 graphic character U+%04x)=FALSE\n", c);
 881         }
 882     }
 883 }
 884
 885 /* u_isJavaIDStart, u_isJavaIDPart, u_isIDStart(), u_isIDPart(), u_isIDIgnorable()*/
 886 static void TestIdentifier()
 887 {
 888     const UChar32 sampleJavaIDStart[] = {0x0071, 0x00e4, 0x005f};
 889     const UChar32 sampleNonJavaIDStart[] = {0x0020, 0x2030, 0x0082};
 890     const UChar32 sampleJavaIDPart[] = {0x005f, 0x0032, 0x0045};
 891     const UChar32 sampleNonJavaIDPart[] = {0x2030, 0x2020, 0x0020};
 892     const UChar32 sampleUnicodeIDStart[] = {0x0250, 0x00e2, 0x0061};
 893     const UChar32 sampleNonUnicodeIDStart[] = {0x2000, 0x000a, 0x2019};
 894     const UChar32 sampleUnicodeIDPart[] = {0x005f, 0x0032, 0x0045};
 895     const UChar32 sampleNonUnicodeIDPart[] = {0x2030, 0x00a3, 0x0020};
 896     const UChar32 sampleIDIgnore[] = {0x0006, 0x0010, 0x206b, 0x85};
 897     const UChar32 sampleNonIDIgnore[] = {0x0075, 0x00a3, 0x0061};
 898
 899     testSampleCharProps(u_isJavaIDStart, "u_isJavaIDStart",
 900                         sampleJavaIDStart, UPRV_LENGTHOF(sampleJavaIDStart), TRUE);
 901     testSampleCharProps(u_isJavaIDStart, "u_isJavaIDStart",
 902                         sampleNonJavaIDStart, UPRV_LENGTHOF(sampleNonJavaIDStart), FALSE);
 903
 904     testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
 905                         sampleJavaIDPart, UPRV_LENGTHOF(sampleJavaIDPart), TRUE);
 906     testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
 907                         sampleNonJavaIDPart, UPRV_LENGTHOF(sampleNonJavaIDPart), FALSE);
 908
 909     /* IDPart should imply IDStart */
 910     testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
 911                         sampleJavaIDStart, UPRV_LENGTHOF(sampleJavaIDStart), TRUE);
 912
 913     testSampleCharProps(u_isIDStart, "u_isIDStart",
 914                         sampleUnicodeIDStart, UPRV_LENGTHOF(sampleUnicodeIDStart), TRUE);
 915     testSampleCharProps(u_isIDStart, "u_isIDStart",
 916                         sampleNonUnicodeIDStart, UPRV_LENGTHOF(sampleNonUnicodeIDStart), FALSE);
 917
 918     testSampleCharProps(u_isIDPart, "u_isIDPart",
 919                         sampleUnicodeIDPart, UPRV_LENGTHOF(sampleUnicodeIDPart), TRUE);
 920     testSampleCharProps(u_isIDPart, "u_isIDPart",
 921                         sampleNonUnicodeIDPart, UPRV_LENGTHOF(sampleNonUnicodeIDPart), FALSE);
 922
 923     /* IDPart should imply IDStart */
 924     testSampleCharProps(u_isIDPart, "u_isIDPart",
 925                         sampleUnicodeIDStart, UPRV_LENGTHOF(sampleUnicodeIDStart), TRUE);
 926
 927     testSampleCharProps(u_isIDIgnorable, "u_isIDIgnorable",
 928                         sampleIDIgnore, UPRV_LENGTHOF(sampleIDIgnore), TRUE);
 929     testSampleCharProps(u_isIDIgnorable, "u_isIDIgnorable",
 930                         sampleNonIDIgnore, UPRV_LENGTHOF(sampleNonIDIgnore), FALSE);
 931 }
 932
 933 /* for each line of UnicodeData.txt, check some of the properties */
 934 typedef struct UnicodeDataContext {
 935 #if UCONFIG_NO_NORMALIZATION
 936     const void *dummy;
 937 #else
 938     const UNormalizer2 *nfc;
 939     const UNormalizer2 *nfkc;
 940 #endif
 941 } UnicodeDataContext;
 942
 943 /*
 944  * ### TODO
 945  * This test fails incorrectly if the First or Last code point of a repetitive area
 946  * is overridden, which is allowed and is encouraged for the PUAs.
 947  * Currently, this means that both area First/Last and override lines are
 948  * tested against the properties from the API,
 949  * and the area boundary will not match and cause an error.
 950  *
 951  * This function should detect area boundaries and skip them for the test of individual
 952  * code points' properties.
 953  * Then it should check that the areas contain all the same properties except where overridden.
 954  * For this, it would have had to set a flag for which code points were listed explicitly.
 955  */
 956 static void U_CALLCONV
 957 unicodeDataLineFn(void *context,
 958                   char *fields[][2], int32_t fieldCount,
 959                   UErrorCode *pErrorCode)
 960 {
 961     char buffer[100];
 962     const char *d;
 963     char *end;
 964     uint32_t value;
 965     UChar32 c;
 966     int32_t i;
 967     int8_t type;
 968     int32_t dt;
 969     UChar dm[32], s[32];
 970     int32_t dmLength, length;
 971
 972 #if !UCONFIG_NO_NORMALIZATION
 973     const UNormalizer2 *nfc, *nfkc;
 974 #endif
 975
 976     /* get the character code, field 0 */
 977     c=strtoul(fields[0][0], &end, 16);
 978     if(end<=fields[0][0] || end!=fields[0][1]) {
 979         log_err("error: syntax error in field 0 at %s\n", fields[0][0]);
 980         return;
 981     }
 982     if((uint32_t)c>=UCHAR_MAX_VALUE + 1) {
 983         log_err("error in UnicodeData.txt: code point %lu out of range\n", c);
 984         return;
 985     }
 986
 987     /* get general category, field 2 */
 988     *fields[2][1]=0;
 989     type = (int8_t)tagValues[MakeProp(fields[2][0])];
 990     if(u_charType(c)!=type) {
 991         log_err("error: u_charType(U+%04lx)==%u instead of %u\n", c, u_charType(c), type);
 992     }
 993     if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
 994         log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
 995     }
 996
 997     /* get canonical combining class, field 3 */
 998     value=strtoul(fields[3][0], &end, 10);
 999     if(end<=fields[3][0] || end!=fields[3][1]) {
1000         log_err("error: syntax error in field 3 at code 0x%lx\n", c);
1001         return;
1002     }
1003     if(value>255) {
1004         log_err("error in UnicodeData.txt: combining class %lu out of range\n", value);
1005         return;
1006     }
1007 #if !UCONFIG_NO_NORMALIZATION
1008     if(value!=u_getCombiningClass(c) || value!=(uint32_t)u_getIntPropertyValue(c, UCHAR_CANONICAL_COMBINING_CLASS)) {
1009         log_err("error: u_getCombiningClass(U+%04lx)==%hu instead of %lu\n", c, u_getCombiningClass(c), value);
1010     }
1011     nfkc=((UnicodeDataContext *)context)->nfkc;
1012     if(value!=unorm2_getCombiningClass(nfkc, c)) {
1013         log_err("error: unorm2_getCombiningClass(nfkc, U+%04lx)==%hu instead of %lu\n", c, unorm2_getCombiningClass(nfkc, c), value);
1014     }
1015 #endif
1016
1017     /* get BiDi category, field 4 */
1018     *fields[4][1]=0;
1019     i=MakeDir(fields[4][0]);
1020     if(i!=u_charDirection(c) || i!=u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)) {
1021         log_err("error: u_charDirection(U+%04lx)==%u instead of %u (%s)\n", c, u_charDirection(c), MakeDir(fields[4][0]), fields[4][0]);
1022     }
1023
1024     /* get Decomposition_Type & Decomposition_Mapping, field 5 */
1025     d=NULL;
1026     if(fields[5][0]==fields[5][1]) {
1027         /* no decomposition, except UnicodeData.txt omits Hangul syllable decompositions */
1028         if(c==0xac00 || c==0xd7a3) {
1029             dt=U_DT_CANONICAL;
1030         } else {
1031             dt=U_DT_NONE;
1032         }
1033     } else {
1034         d=fields[5][0];
1035         *fields[5][1]=0;
1036         dt=UCHAR_INVALID_CODE;
1037         if(*d=='<') {
1038             end=strchr(++d, '>');
1039             if(end!=NULL) {
1040                 *end=0;
1041                 dt=u_getPropertyValueEnum(UCHAR_DECOMPOSITION_TYPE, d);
1042                 d=u_skipWhitespace(end+1);
1043             }
1044         } else {
1045             dt=U_DT_CANONICAL;
1046         }
1047     }
1048     if(dt>U_DT_NONE) {
1049         if(c==0xac00) {
1050             dm[0]=0x1100;
1051             dm[1]=0x1161;
1052             dm[2]=0;
1053             dmLength=2;
1054         } else if(c==0xd7a3) {
1055             dm[0]=0xd788;
1056             dm[1]=0x11c2;
1057             dm[2]=0;
1058             dmLength=2;
1059         } else {
1060             dmLength=u_parseString(d, dm, 32, NULL, pErrorCode);
1061         }
1062     } else {
1063         dmLength=-1;
1064     }
1065     if(dt<0 || U_FAILURE(*pErrorCode)) {
1066         log_err("error in UnicodeData.txt: syntax error in U+%04lX decomposition field\n", (long)c);
1067         return;
1068     }
1069 #if !UCONFIG_NO_NORMALIZATION
1070     i=u_getIntPropertyValue(c, UCHAR_DECOMPOSITION_TYPE);
1071     if(i!=dt) {
1072         log_err("error: u_getIntPropertyValue(U+%04lx, UCHAR_DECOMPOSITION_TYPE)==%d instead of %d\n", c, i, dt);
1073     }
1074     /* Expect Decomposition_Mapping=nfkc.getRawDecomposition(c). */
1075     length=unorm2_getRawDecomposition(nfkc, c, s, 32, pErrorCode);
1076     if(U_FAILURE(*pErrorCode) || length!=dmLength || (length>0 && 0!=u_strcmp(s, dm))) {
1077         log_err("error: unorm2_getRawDecomposition(nfkc, U+%04lx)==%d instead of %d "
1078                 "or the Decomposition_Mapping is different (%s)\n",
1079                 c, length, dmLength, u_errorName(*pErrorCode));
1080         return;
1081     }
1082     /* For canonical decompositions only, expect Decomposition_Mapping=nfc.getRawDecomposition(c). */
1083     if(dt!=U_DT_CANONICAL) {
1084         dmLength=-1;
1085     }
1086     nfc=((UnicodeDataContext *)context)->nfc;
1087     length=unorm2_getRawDecomposition(nfc, c, s, 32, pErrorCode);
1088     if(U_FAILURE(*pErrorCode) || length!=dmLength || (length>0 && 0!=u_strcmp(s, dm))) {
1089         log_err("error: unorm2_getRawDecomposition(nfc, U+%04lx)==%d instead of %d "
1090                 "or the Decomposition_Mapping is different (%s)\n",
1091                 c, length, dmLength, u_errorName(*pErrorCode));
1092         return;
1093     }
1094     /* recompose */
1095     if(dt==U_DT_CANONICAL && !u_hasBinaryProperty(c, UCHAR_FULL_COMPOSITION_EXCLUSION)) {
1096         UChar32 a, b, composite;
1097         i=0;
1098         U16_NEXT(dm, i, dmLength, a);
1099         U16_NEXT(dm, i, dmLength, b);
1100         /* i==dmLength */
1101         composite=unorm2_composePair(nfc, a, b);
1102         if(composite!=c) {
1103             log_err("error: nfc U+%04lX decomposes to U+%04lX+U+%04lX but does not compose back (instead U+%04lX)\n",
1104                     (long)c, (long)a, (long)b, (long)composite);
1105         }
1106         /*
1107          * Note: NFKC has fewer round-trip mappings than NFC,
1108          * so we can't just test unorm2_composePair(nfkc, a, b) here without further data.
1109          */
1110     }
1111 #endif
1112
1113     /* get ISO Comment, field 11 */
1114     *fields[11][1]=0;
1115     i=u_getISOComment(c, buffer, sizeof(buffer), pErrorCode);
1116     if(U_FAILURE(*pErrorCode) || 0!=strcmp(fields[11][0], buffer)) {
1117         log_err_status(*pErrorCode, "error: u_getISOComment(U+%04lx) wrong (%s): \"%s\" should be \"%s\"\n",
1118             c, u_errorName(*pErrorCode),
1119             U_FAILURE(*pErrorCode) ? buffer : "[error]",
1120             fields[11][0]);
1121     }
1122
1123     /* get uppercase mapping, field 12 */
1124     if(fields[12][0]!=fields[12][1]) {
1125         value=strtoul(fields[12][0], &end, 16);
1126         if(end!=fields[12][1]) {
1127             log_err("error: syntax error in field 12 at code 0x%lx\n", c);
1128             return;
1129         }
1130         if((UChar32)value!=u_toupper(c)) {
1131             log_err("error: u_toupper(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_toupper(c), value);
1132         }
1133     } else {
1134         /* no case mapping: the API must map the code point to itself */
1135         if(c!=u_toupper(c)) {
1136             log_err("error: U+%04lx does not have an uppercase mapping but u_toupper()==U+%04lx\n", c, u_toupper(c));
1137         }
1138     }
1139
1140     /* get lowercase mapping, field 13 */
1141     if(fields[13][0]!=fields[13][1]) {
1142         value=strtoul(fields[13][0], &end, 16);
1143         if(end!=fields[13][1]) {
1144             log_err("error: syntax error in field 13 at code 0x%lx\n", c);
1145             return;
1146         }
1147         if((UChar32)value!=u_tolower(c)) {
1148             log_err("error: u_tolower(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_tolower(c), value);
1149         }
1150     } else {
1151         /* no case mapping: the API must map the code point to itself */
1152         if(c!=u_tolower(c)) {
1153             log_err("error: U+%04lx does not have a lowercase mapping but u_tolower()==U+%04lx\n", c, u_tolower(c));
1154         }
1155     }
1156
1157     /* get titlecase mapping, field 14 */
1158     if(fields[14][0]!=fields[14][1]) {
1159         value=strtoul(fields[14][0], &end, 16);
1160         if(end!=fields[14][1]) {
1161             log_err("error: syntax error in field 14 at code 0x%lx\n", c);
1162             return;
1163         }
1164         if((UChar32)value!=u_totitle(c)) {
1165             log_err("error: u_totitle(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_totitle(c), value);
1166         }
1167     } else {
1168         /* no case mapping: the API must map the code point to itself */
1169         if(c!=u_totitle(c)) {
1170             log_err("error: U+%04lx does not have a titlecase mapping but u_totitle()==U+%04lx\n", c, u_totitle(c));
1171         }
1172     }
1173 }
1174
1175 static UBool U_CALLCONV
1176 enumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) {
1177     static const UChar32 test[][2]={
1178         {0x41, U_UPPERCASE_LETTER},
1179         {0x308, U_NON_SPACING_MARK},
1180         {0xfffe, U_GENERAL_OTHER_TYPES},
1181         {0xe0041, U_FORMAT_CHAR},
1182         {0xeffff, U_UNASSIGNED}
1183     };
1184
1185     int32_t i, count;
1186
1187     if(0!=strcmp((const char *)context, "a1")) {
1188         log_err("error: u_enumCharTypes() passes on an incorrect context pointer\n");
1189         return FALSE;
1190     }
1191
1192     count=UPRV_LENGTHOF(test);
1193     for(i=0; i<count; ++i) {
1194         if(start<=test[i][0] && test[i][0]<limit) {
1195             if(type!=(UCharCategory)test[i][1]) {
1196                 log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld instead of U+%04lx with %ld\n",
1197                         start, limit, (long)type, test[i][0], test[i][1]);
1198             }
1199             /* stop at the range that includes the last test code point (increases code coverage for enumeration) */
1200             return i==(count-1) ? FALSE : TRUE;
1201         }
1202     }
1203
1204     if(start>test[count-1][0]) {
1205         log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld after it should have stopped\n",
1206                 start, limit, (long)type);
1207         return FALSE;
1208     }
1209
1210     return TRUE;
1211 }
1212
1213 static UBool U_CALLCONV
1214 enumDefaultsRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) {
1215     /* default Bidi classes for unassigned code points, from the DerivedBidiClass.txt header */
1216     static const int32_t defaultBidi[][2]={ /* { limit, class } */
1217         { 0x0590, U_LEFT_TO_RIGHT },
1218         { 0x0600, U_RIGHT_TO_LEFT },
1219         { 0x07C0, U_RIGHT_TO_LEFT_ARABIC },
1220         { 0x0860, U_RIGHT_TO_LEFT },
1221         { 0x0870, U_RIGHT_TO_LEFT_ARABIC },  // Unicode 10 changes U+0860..U+086F from R to AL.
1222         { 0x08A0, U_RIGHT_TO_LEFT },
1223         { 0x0900, U_RIGHT_TO_LEFT_ARABIC },  /* Unicode 6.1 changes U+08A0..U+08FF from R to AL */
1224         { 0x20A0, U_LEFT_TO_RIGHT },
1225         { 0x20D0, U_EUROPEAN_NUMBER_TERMINATOR },  /* Unicode 6.3 changes the currency symbols block U+20A0..U+20CF to default to ET not L */
1226         { 0xFB1D, U_LEFT_TO_RIGHT },
1227         { 0xFB50, U_RIGHT_TO_LEFT },
1228         { 0xFE00, U_RIGHT_TO_LEFT_ARABIC },
1229         { 0xFE70, U_LEFT_TO_RIGHT },
1230         { 0xFF00, U_RIGHT_TO_LEFT_ARABIC },
1231         { 0x10800, U_LEFT_TO_RIGHT },
1232         { 0x11000, U_RIGHT_TO_LEFT },
1233         { 0x1E800, U_LEFT_TO_RIGHT },  /* new default-R range in Unicode 5.2: U+1E800 - U+1EFFF */
1234         { 0x1EE00, U_RIGHT_TO_LEFT },
1235         { 0x1EF00, U_RIGHT_TO_LEFT_ARABIC },  /* Unicode 6.1 changes U+1EE00..U+1EEFF from R to AL */
1236         { 0x1F000, U_RIGHT_TO_LEFT },
1237         { 0x110000, U_LEFT_TO_RIGHT }
1238     };
1239
1240     UChar32 c;
1241     int32_t i;
1242     UCharDirection shouldBeDir;
1243
1244     /*
1245      * LineBreak.txt specifies:
1246      *   #  - Assigned characters that are not listed explicitly are given the value
1247      *   #    "AL".
1248      *   #  - Unassigned characters are given the value "XX".
1249      *
1250      * PUA characters are listed explicitly with "XX".
1251      * Verify that no assigned character has "XX".
1252      */
1253     if(type!=U_UNASSIGNED && type!=U_PRIVATE_USE_CHAR) {
1254         c=start;
1255         while(c<limit) {
1256             if(0==u_getIntPropertyValue(c, UCHAR_LINE_BREAK)) {
1257                 log_err("error UCHAR_LINE_BREAK(assigned U+%04lx)=XX\n", c);
1258             }
1259             ++c;
1260         }
1261     }
1262
1263     /*
1264      * Verify default Bidi classes.
1265      * See DerivedBidiClass.txt, especially for unassigned code points.
1266      */
1267     if(type==U_UNASSIGNED || type==U_PRIVATE_USE_CHAR) {
1268         /* enumerate the intersections of defaultBidi ranges with [start..limit[ */
1269         c=start;
1270         for(i=0; i<UPRV_LENGTHOF(defaultBidi) && c<limit; ++i) {
1271             if((int32_t)c<defaultBidi[i][0]) {
1272                 while(c<limit && (int32_t)c<defaultBidi[i][0]) {
1273                     if(U_IS_UNICODE_NONCHAR(c) || u_hasBinaryProperty(c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT)) {
1274                         shouldBeDir=U_BOUNDARY_NEUTRAL;
1275                     } else {
1276                         shouldBeDir=(UCharDirection)defaultBidi[i][1];
1277                     }
1278
1279                     if( u_charDirection(c)!=shouldBeDir ||
1280                         u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)!=shouldBeDir
1281                     ) {
1282                         log_err("error: u_charDirection(unassigned/PUA U+%04lx)=%s should be %s\n",
1283                             c, dirStrings[u_charDirection(c)], dirStrings[shouldBeDir]);
1284                     }
1285                     ++c;
1286                 }
1287             }
1288         }
1289     }
1290
1291     return TRUE;
1292 }
1293
1294 /* tests for several properties */
1295 static void TestUnicodeData()
1296 {
1297     UVersionInfo expectVersionArray;
1298     UVersionInfo versionArray;
1299     char *fields[15][2];
1300     UErrorCode errorCode;
1301     UChar32 c;
1302     int8_t type;
1303
1304     UnicodeDataContext context;
1305
1306     u_versionFromString(expectVersionArray, U_UNICODE_VERSION);
1307     u_getUnicodeVersion(versionArray);
1308     if(memcmp(versionArray, expectVersionArray, U_MAX_VERSION_LENGTH) != 0)
1309     {
1310         log_err("Testing u_getUnicodeVersion() - expected " U_UNICODE_VERSION " got %d.%d.%d.%d\n",
1311         versionArray[0], versionArray[1], versionArray[2], versionArray[3]);
1312     }
1313
1314 #if defined(ICU_UNICODE_VERSION)
1315     /* test only happens where we have configure.in with UNICODE_VERSION - sanity check. */
1316     if(strcmp(U_UNICODE_VERSION, ICU_UNICODE_VERSION))
1317     {
1318          log_err("Testing configure.in's ICU_UNICODE_VERSION - expected " U_UNICODE_VERSION " got " ICU_UNICODE_VERSION "\n");
1319     }
1320 #endif
1321
1322     if (ublock_getCode((UChar)0x0041) != UBLOCK_BASIC_LATIN || u_getIntPropertyValue(0x41, UCHAR_BLOCK)!=(int32_t)UBLOCK_BASIC_LATIN) {
1323         log_err("ublock_getCode(U+0041) property failed! Expected : %i Got: %i \n", UBLOCK_BASIC_LATIN,ublock_getCode((UChar)0x0041));
1324     }
1325
1326     errorCode=U_ZERO_ERROR;
1327 #if !UCONFIG_NO_NORMALIZATION
1328     context.nfc=unorm2_getNFCInstance(&errorCode);
1329     context.nfkc=unorm2_getNFKCInstance(&errorCode);
1330     if(U_FAILURE(errorCode)) {
1331         log_data_err("error: unable to open an NFC or NFKC UNormalizer2 - %s\n", u_errorName(errorCode));
1332         return;
1333     }
1334 #endif
1335     parseUCDFile("UnicodeData.txt", fields, 15, unicodeDataLineFn, &context, &errorCode);
1336     if(U_FAILURE(errorCode)) {
1337         return; /* if we couldn't parse UnicodeData.txt, we should return */
1338     }
1339
1340     /* sanity check on repeated properties */
1341     for(c=0xfffe; c<=0x10ffff;) {
1342         type=u_charType(c);
1343         if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
1344             log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
1345         }
1346         if(type!=U_UNASSIGNED) {
1347             log_err("error: u_charType(U+%04lx)!=U_UNASSIGNED (returns %d)\n", c, u_charType(c));
1348         }
1349         if((c&0xffff)==0xfffe) {
1350             ++c;
1351         } else {
1352             c+=0xffff;
1353         }
1354     }
1355
1356     /* test that PUA is not "unassigned" */
1357     for(c=0xe000; c<=0x10fffd;) {
1358         type=u_charType(c);
1359         if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
1360             log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
1361         }
1362         if(type==U_UNASSIGNED) {
1363             log_err("error: u_charType(U+%04lx)==U_UNASSIGNED\n", c);
1364         } else if(type!=U_PRIVATE_USE_CHAR) {
1365             log_verbose("PUA override: u_charType(U+%04lx)=%d\n", c, type);
1366         }
1367         if(c==0xf8ff) {
1368             c=0xf0000;
1369         } else if(c==0xffffd) {
1370             c=0x100000;
1371         } else {
1372             ++c;
1373         }
1374     }
1375
1376     /* test u_enumCharTypes() */
1377     u_enumCharTypes(enumTypeRange, "a1");
1378
1379     /* check default properties */
1380     u_enumCharTypes(enumDefaultsRange, NULL);
1381 }
1382
1383 static void TestCodeUnit(){
1384     const UChar codeunit[]={0x0000,0xe065,0x20ac,0xd7ff,0xd800,0xd841,0xd905,0xdbff,0xdc00,0xdc02,0xddee,0xdfff,0};
1385
1386     int32_t i;
1387
1388     for(i=0; i<UPRV_LENGTHOF(codeunit); i++){
1389         UChar c=codeunit[i];
1390         if(i<4){
1391             if(!(UTF_IS_SINGLE(c)) || (UTF_IS_LEAD(c)) || (UTF_IS_TRAIL(c)) ||(UTF_IS_SURROGATE(c))){
1392                 log_err("ERROR: U+%04x is a single", c);
1393             }
1394
1395         }
1396         if(i >= 4 && i< 8){
1397             if(!(UTF_IS_LEAD(c)) || UTF_IS_SINGLE(c) || UTF_IS_TRAIL(c) || !(UTF_IS_SURROGATE(c))){
1398                 log_err("ERROR: U+%04x is a first surrogate", c);
1399             }
1400         }
1401         if(i >= 8 && i< 12){
1402             if(!(UTF_IS_TRAIL(c)) || UTF_IS_SINGLE(c) || UTF_IS_LEAD(c) || !(UTF_IS_SURROGATE(c))){
1403                 log_err("ERROR: U+%04x is a second surrogate", c);
1404             }
1405         }
1406     }
1407
1408 }
1409
1410 static void TestCodePoint(){
1411     const UChar32 codePoint[]={
1412         /*surrogate, notvalid(codepoint), not a UnicodeChar, not Error */
1413         0xd800,
1414         0xdbff,
1415         0xdc00,
1416         0xdfff,
1417         0xdc04,
1418         0xd821,
1419         /*not a surrogate, valid, isUnicodeChar , not Error*/
1420         0x20ac,
1421         0xd7ff,
1422         0xe000,
1423         0xe123,
1424         0x0061,
1425         0xe065,
1426         0x20402,
1427         0x24506,
1428         0x23456,
1429         0x20402,
1430         0x10402,
1431         0x23456,
1432         /*not a surrogate, not valid, isUnicodeChar, isError */
1433         0x0015,
1434         0x009f,
1435         /*not a surrogate, not valid, not isUnicodeChar, isError */
1436         0xffff,
1437         0xfffe,
1438     };
1439     int32_t i;
1440     for(i=0; i<UPRV_LENGTHOF(codePoint); i++){
1441         UChar32 c=codePoint[i];
1442         if(i<6){
1443             if(!UTF_IS_SURROGATE(c) || !U_IS_SURROGATE(c) || !U16_IS_SURROGATE(c)){
1444                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1445             }
1446             if(UTF_IS_VALID(c)){
1447                 log_err("ERROR: isValid() failed for U+%04x\n", c);
1448             }
1449             if(UTF_IS_UNICODE_CHAR(c) || U_IS_UNICODE_CHAR(c)){
1450                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1451             }
1452             if(UTF_IS_ERROR(c)){
1453                 log_err("ERROR: isError() failed for U+%04x\n", c);
1454             }
1455         }else if(i >=6 && i<18){
1456             if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
1457                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1458             }
1459             if(!UTF_IS_VALID(c)){
1460                 log_err("ERROR: isValid() failed for U+%04x\n", c);
1461             }
1462             if(!UTF_IS_UNICODE_CHAR(c) || !U_IS_UNICODE_CHAR(c)){
1463                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1464             }
1465             if(UTF_IS_ERROR(c)){
1466                 log_err("ERROR: isError() failed for U+%04x\n", c);
1467             }
1468         }else if(i >=18 && i<20){
1469             if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
1470                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1471             }
1472             if(UTF_IS_VALID(c)){
1473                 log_err("ERROR: isValid() failed for U+%04x\n", c);
1474             }
1475             if(!UTF_IS_UNICODE_CHAR(c) || !U_IS_UNICODE_CHAR(c)){
1476                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1477             }
1478             if(!UTF_IS_ERROR(c)){
1479                 log_err("ERROR: isError() failed for U+%04x\n", c);
1480             }
1481         }
1482         else if(i >=18 && i<UPRV_LENGTHOF(codePoint)){
1483             if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
1484                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1485             }
1486             if(UTF_IS_VALID(c)){
1487                 log_err("ERROR: isValid() failed for U+%04x\n", c);
1488             }
1489             if(UTF_IS_UNICODE_CHAR(c) || U_IS_UNICODE_CHAR(c)){
1490                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1491             }
1492             if(!UTF_IS_ERROR(c)){
1493                 log_err("ERROR: isError() failed for U+%04x\n", c);
1494             }
1495         }
1496     }
1497
1498     if(
1499         !U_IS_BMP(0) || !U_IS_BMP(0x61) || !U_IS_BMP(0x20ac) ||
1500         !U_IS_BMP(0xd9da) || !U_IS_BMP(0xdfed) || !U_IS_BMP(0xffff) ||
1501         U_IS_BMP(U_SENTINEL) || U_IS_BMP(0x10000) || U_IS_BMP(0x50005) ||
1502         U_IS_BMP(0x10ffff) || U_IS_BMP(0x110000) || U_IS_BMP(0x7fffffff)
1503     ) {
1504         log_err("error with U_IS_BMP()\n");
1505     }
1506
1507     if(
1508         U_IS_SUPPLEMENTARY(0) || U_IS_SUPPLEMENTARY(0x61) || U_IS_SUPPLEMENTARY(0x20ac) ||
1509         U_IS_SUPPLEMENTARY(0xd9da) || U_IS_SUPPLEMENTARY(0xdfed) || U_IS_SUPPLEMENTARY(0xffff) ||
1510         U_IS_SUPPLEMENTARY(U_SENTINEL) || !U_IS_SUPPLEMENTARY(0x10000) || !U_IS_SUPPLEMENTARY(0x50005) ||
1511         !U_IS_SUPPLEMENTARY(0x10ffff) || U_IS_SUPPLEMENTARY(0x110000) || U_IS_SUPPLEMENTARY(0x7fffffff)
1512     ) {
1513         log_err("error with U_IS_SUPPLEMENTARY()\n");
1514     }
1515 }
1516
1517 static void TestCharLength()
1518 {
1519     const int32_t codepoint[]={
1520         1, 0x0061,
1521         1, 0xe065,
1522         1, 0x20ac,
1523         2, 0x20402,
1524         2, 0x23456,
1525         2, 0x24506,
1526         2, 0x20402,
1527         2, 0x10402,
1528         1, 0xd7ff,
1529         1, 0xe000
1530     };
1531
1532     int32_t i;
1533     UBool multiple;
1534     for(i=0; i<UPRV_LENGTHOF(codepoint); i=(int16_t)(i+2)){
1535         UChar32 c=codepoint[i+1];
1536         if(UTF_CHAR_LENGTH(c) != codepoint[i] || U16_LENGTH(c) != codepoint[i]){
1537             log_err("The no: of code units for U+%04x:- Expected: %d Got: %d\n", c, codepoint[i], U16_LENGTH(c));
1538         }
1539         multiple=(UBool)(codepoint[i] == 1 ? FALSE : TRUE);
1540         if(UTF_NEED_MULTIPLE_UCHAR(c) != multiple){
1541             log_err("ERROR: Unicode::needMultipleUChar() failed for U+%04x\n", c);
1542         }
1543     }
1544 }
1545
1546 /*internal functions ----*/
1547 static int32_t MakeProp(char* str)
1548 {
1549     int32_t result = 0;
1550     char* matchPosition =0;
1551
1552     matchPosition = strstr(tagStrings, str);
1553     if (matchPosition == 0)
1554     {
1555         log_err("unrecognized type letter ");
1556         log_err(str);
1557     }
1558     else
1559         result = (int32_t)((matchPosition - tagStrings) / 2);
1560     return result;
1561 }
1562
1563 static int32_t MakeDir(char* str)
1564 {
1565     int32_t pos = 0;
1566     for (pos = 0; pos < U_CHAR_DIRECTION_COUNT; pos++) {
1567         if (strcmp(str, dirStrings[pos]) == 0) {
1568             return pos;
1569         }
1570     }
1571     return -1;
1572 }
1573
1574 /* test u_charName() -------------------------------------------------------- */
1575
1576 static const struct {
1577     uint32_t code;
1578     const char *name, *oldName, *extName, *alias;
1579 } names[]={
1580     {0x0061, "LATIN SMALL LETTER A", "", "LATIN SMALL LETTER A"},
1581     {0x01a2, "LATIN CAPITAL LETTER OI", "",
1582              "LATIN CAPITAL LETTER OI",
1583              "LATIN CAPITAL LETTER GHA"},
1584     {0x0284, "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK", "",
1585              "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK" },
1586     {0x0fd0, "TIBETAN MARK BSKA- SHOG GI MGO RGYAN", "",
1587              "TIBETAN MARK BSKA- SHOG GI MGO RGYAN",
1588              "TIBETAN MARK BKA- SHOG GI MGO RGYAN"},
1589     {0x3401, "CJK UNIFIED IDEOGRAPH-3401", "", "CJK UNIFIED IDEOGRAPH-3401" },
1590     {0x7fed, "CJK UNIFIED IDEOGRAPH-7FED", "", "CJK UNIFIED IDEOGRAPH-7FED" },
1591     {0xac00, "HANGUL SYLLABLE GA", "", "HANGUL SYLLABLE GA" },
1592     {0xd7a3, "HANGUL SYLLABLE HIH", "", "HANGUL SYLLABLE HIH" },
1593     {0xd800, "", "", "<lead surrogate-D800>" },
1594     {0xdc00, "", "", "<trail surrogate-DC00>" },
1595     {0xff08, "FULLWIDTH LEFT PARENTHESIS", "", "FULLWIDTH LEFT PARENTHESIS" },
1596     {0xffe5, "FULLWIDTH YEN SIGN", "", "FULLWIDTH YEN SIGN" },
1597     {0xffff, "", "", "<noncharacter-FFFF>" },
1598     {0x1d0c5, "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS", "",
1599               "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS",
1600               "BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS"},
1601     {0x23456, "CJK UNIFIED IDEOGRAPH-23456", "", "CJK UNIFIED IDEOGRAPH-23456" }
1602 };
1603
1604 static UBool
1605 enumCharNamesFn(void *context,
1606                 UChar32 code, UCharNameChoice nameChoice,
1607                 const char *name, int32_t length) {
1608     int32_t *pCount=(int32_t *)context;
1609     const char *expected;
1610     int i;
1611
1612     if(length<=0 || length!=(int32_t)strlen(name)) {
1613         /* should not be called with an empty string or invalid length */
1614         log_err("u_enumCharName(0x%lx)=%s but length=%ld\n", name, length);
1615         return TRUE;
1616     }
1617
1618     ++*pCount;
1619     for(i=0; i<UPRV_LENGTHOF(names); ++i) {
1620         if(code==(UChar32)names[i].code) {
1621             switch (nameChoice) {
1622                 case U_EXTENDED_CHAR_NAME:
1623                     if(0!=strcmp(name, names[i].extName)) {
1624                         log_err("u_enumCharName(0x%lx - Extended)=%s instead of %s\n", code, name, names[i].extName);
1625                     }
1626                     break;
1627                 case U_UNICODE_CHAR_NAME:
1628                     if(0!=strcmp(name, names[i].name)) {
1629                         log_err("u_enumCharName(0x%lx)=%s instead of %s\n", code, name, names[i].name);
1630                     }
1631                     break;
1632                 case U_UNICODE_10_CHAR_NAME:
1633                     expected=names[i].oldName;
1634                     if(expected[0]==0 || 0!=strcmp(name, expected)) {
1635                         log_err("u_enumCharName(0x%lx - 1.0)=%s instead of %s\n", code, name, expected);
1636                     }
1637                     break;
1638                 case U_CHAR_NAME_ALIAS:
1639                     expected=names[i].alias;
1640                     if(expected==NULL || expected[0]==0 || 0!=strcmp(name, expected)) {
1641                         log_err("u_enumCharName(0x%lx - alias)=%s instead of %s\n", code, name, expected);
1642                     }
1643                     break;
1644                 case U_CHAR_NAME_CHOICE_COUNT:
1645                     break;
1646             }
1647             break;
1648         }
1649     }
1650     return TRUE;
1651 }
1652
1653 struct enumExtCharNamesContext {
1654     uint32_t length;
1655     int32_t last;
1656 };
1657
1658 static UBool
1659 enumExtCharNamesFn(void *context,
1660                 UChar32 code, UCharNameChoice nameChoice,
1661                 const char *name, int32_t length) {
1662     struct enumExtCharNamesContext *ecncp = (struct enumExtCharNamesContext *) context;
1663
1664     if (ecncp->last != (int32_t) code - 1) {
1665         if (ecncp->last < 0) {
1666             log_err("u_enumCharName(0x%lx - Ext) after u_enumCharName(0x%lx - Ext) instead of u_enumCharName(0x%lx - Ext)\n", code, ecncp->last, ecncp->last + 1);
1667         } else {
1668             log_err("u_enumCharName(0x%lx - Ext) instead of u_enumCharName(0x0 - Ext)\n", code);
1669         }
1670     }
1671     ecncp->last = (int32_t) code;
1672
1673     if (!*name) {
1674         log_err("u_enumCharName(0x%lx - Ext) should not be an empty string\n", code);
1675     }
1676
1677     return enumCharNamesFn(&ecncp->length, code, nameChoice, name, length);
1678 }
1679
1680 /**
1681  * This can be made more efficient by moving it into putil.c and having
1682  * it directly access the ebcdic translation tables.
1683  * TODO: If we get this method in putil.c, then delete it from here.
1684  */
1685 static UChar
1686 u_charToUChar(char c) {
1687     UChar uc;
1688     u_charsToUChars(&c, &uc, 1);
1689     return uc;
1690 }
1691
1692 static void
1693 TestCharNames() {
1694     static char name[80];
1695     UErrorCode errorCode=U_ZERO_ERROR;
1696     struct enumExtCharNamesContext extContext;
1697     const char *expected;
1698     int32_t length;
1699     UChar32 c;
1700     int32_t i;
1701
1702     log_verbose("Testing uprv_getMaxCharNameLength()\n");
1703     length=uprv_getMaxCharNameLength();
1704     if(length==0) {
1705         /* no names data available */
1706         return;
1707     }
1708     if(length<83) { /* Unicode 3.2 max char name length */
1709         log_err("uprv_getMaxCharNameLength()=%d is too short");
1710     }
1711     /* ### TODO same tests for max ISO comment length as for max name length */
1712
1713     log_verbose("Testing u_charName()\n");
1714     for(i=0; i<UPRV_LENGTHOF(names); ++i) {
1715         /* modern Unicode character name */
1716         length=u_charName(names[i].code, U_UNICODE_CHAR_NAME, name, sizeof(name), &errorCode);
1717         if(U_FAILURE(errorCode)) {
1718             log_err("u_charName(0x%lx) error %s\n", names[i].code, u_errorName(errorCode));
1719             return;
1720         }
1721         if(length<0 || 0!=strcmp(name, names[i].name) || length!=(uint16_t)strlen(name)) {
1722             log_err("u_charName(0x%lx) gets: %s (length %ld) instead of: %s\n", names[i].code, name, length, names[i].name);
1723         }
1724
1725         /* find the modern name */
1726         if (*names[i].name) {
1727             c=u_charFromName(U_UNICODE_CHAR_NAME, names[i].name, &errorCode);
1728             if(U_FAILURE(errorCode)) {
1729                 log_err("u_charFromName(%s) error %s\n", names[i].name, u_errorName(errorCode));
1730                 return;
1731             }
1732             if(c!=(UChar32)names[i].code) {
1733                 log_err("u_charFromName(%s) gets 0x%lx instead of 0x%lx\n", names[i].name, c, names[i].code);
1734             }
1735         }
1736
1737         /* Unicode 1.0 character name */
1738         length=u_charName(names[i].code, U_UNICODE_10_CHAR_NAME, name, sizeof(name), &errorCode);
1739         if(U_FAILURE(errorCode)) {
1740             log_err("u_charName(0x%lx - 1.0) error %s\n", names[i].code, u_errorName(errorCode));
1741             return;
1742         }
1743         if(length<0 || (length>0 && 0!=strcmp(name, names[i].oldName)) || length!=(uint16_t)strlen(name)) {
1744             log_err("u_charName(0x%lx - 1.0) gets %s length %ld instead of nothing or %s\n", names[i].code, name, length, names[i].oldName);
1745         }
1746
1747         /* find the Unicode 1.0 name if it is stored (length>0 means that we could read it) */
1748         if(names[i].oldName[0]!=0 /* && length>0 */) {
1749             c=u_charFromName(U_UNICODE_10_CHAR_NAME, names[i].oldName, &errorCode);
1750             if(U_FAILURE(errorCode)) {
1751                 log_err("u_charFromName(%s - 1.0) error %s\n", names[i].oldName, u_errorName(errorCode));
1752                 return;
1753             }
1754             if(c!=(UChar32)names[i].code) {
1755                 log_err("u_charFromName(%s - 1.0) gets 0x%lx instead of 0x%lx\n", names[i].oldName, c, names[i].code);
1756             }
1757         }
1758
1759         /* Unicode character name alias */
1760         length=u_charName(names[i].code, U_CHAR_NAME_ALIAS, name, sizeof(name), &errorCode);
1761         if(U_FAILURE(errorCode)) {
1762             log_err("u_charName(0x%lx - alias) error %s\n", names[i].code, u_errorName(errorCode));
1763             return;
1764         }
1765         expected=names[i].alias;
1766         if(expected==NULL) {
1767             expected="";
1768         }
1769         if(length<0 || (length>0 && 0!=strcmp(name, expected)) || length!=(uint16_t)strlen(name)) {
1770             log_err("u_charName(0x%lx - alias) gets %s length %ld instead of nothing or %s\n",
1771                     names[i].code, name, length, expected);
1772         }
1773
1774         /* find the Unicode character name alias if it is stored (length>0 means that we could read it) */
1775         if(expected[0]!=0 /* && length>0 */) {
1776             c=u_charFromName(U_CHAR_NAME_ALIAS, expected, &errorCode);
1777             if(U_FAILURE(errorCode)) {
1778                 log_err("u_charFromName(%s - alias) error %s\n",
1779                         expected, u_errorName(errorCode));
1780                 return;
1781             }
1782             if(c!=(UChar32)names[i].code) {
1783                 log_err("u_charFromName(%s - alias) gets 0x%lx instead of 0x%lx\n",
1784                         expected, c, names[i].code);
1785             }
1786         }
1787     }
1788
1789     /* test u_enumCharNames() */
1790     length=0;
1791     errorCode=U_ZERO_ERROR;
1792     u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumCharNamesFn, &length, U_UNICODE_CHAR_NAME, &errorCode);
1793     if(U_FAILURE(errorCode) || length<94140) {
1794         log_err("u_enumCharNames(%ld..%lx) error %s names count=%ld\n", UCHAR_MIN_VALUE, UCHAR_MAX_VALUE, u_errorName(errorCode), length);
1795     }
1796
1797     extContext.length = 0;
1798     extContext.last = -1;
1799     errorCode=U_ZERO_ERROR;
1800     u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumExtCharNamesFn, &extContext, U_EXTENDED_CHAR_NAME, &errorCode);
1801     if(U_FAILURE(errorCode) || extContext.length<UCHAR_MAX_VALUE + 1) {
1802         log_err("u_enumCharNames(%ld..0x%lx - Extended) error %s names count=%ld\n", UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, u_errorName(errorCode), extContext.length);
1803     }
1804
1805     /* test that u_charFromName() uppercases the input name, i.e., works with mixed-case names (new in 2.0) */
1806     if(0x61!=u_charFromName(U_UNICODE_CHAR_NAME, "LATin smALl letTER A", &errorCode)) {
1807         log_err("u_charFromName(U_UNICODE_CHAR_NAME, \"LATin smALl letTER A\") did not find U+0061 (%s)\n", u_errorName(errorCode));
1808     }
1809
1810     /* Test getCharNameCharacters */
1811     if(!getTestOption(QUICK_OPTION)) {
1812         enum { BUFSIZE = 256 };
1813         UErrorCode ec = U_ZERO_ERROR;
1814         char buf[BUFSIZE];
1815         int32_t maxLength;
1816         UChar32 cp;
1817         UChar pat[BUFSIZE], dumbPat[BUFSIZE];
1818         int32_t l1, l2;
1819         UBool map[256];
1820         UBool ok;
1821
1822         USet* set = uset_open(1, 0); /* empty set */
1823         USet* dumb = uset_open(1, 0); /* empty set */
1824
1825         /*
1826          * uprv_getCharNameCharacters() will likely return more lowercase
1827          * letters than actual character names contain because
1828          * it includes all the characters in lowercased names of
1829          * general categories, for the full possible set of extended names.
1830          */
1831         {
1832             USetAdder sa={
1833                 NULL,
1834                 uset_add,
1835                 uset_addRange,
1836                 uset_addString,
1837                 NULL /* don't need remove() */
1838             };
1839             sa.set=set;
1840             uprv_getCharNameCharacters(&sa);
1841         }
1842
1843         /* build set the dumb (but sure-fire) way */
1844         for (i=0; i<256; ++i) {
1845             map[i] = FALSE;
1846         }
1847
1848         maxLength=0;
1849         for (cp=0; cp<0x110000; ++cp) {
1850             int32_t len = u_charName(cp, U_EXTENDED_CHAR_NAME,
1851                                      buf, BUFSIZE, &ec);
1852             if (U_FAILURE(ec)) {
1853                 log_err("FAIL: u_charName failed when it shouldn't\n");
1854                 uset_close(set);
1855                 uset_close(dumb);
1856                 return;
1857             }
1858             if(len>maxLength) {
1859                 maxLength=len;
1860             }
1861
1862             for (i=0; i<len; ++i) {
1863                 if (!map[(uint8_t) buf[i]]) {
1864                     uset_add(dumb, (UChar32)u_charToUChar(buf[i]));
1865                     map[(uint8_t) buf[i]] = TRUE;
1866                 }
1867             }
1868
1869             /* test for leading/trailing whitespace */
1870             if(buf[0]==' ' || buf[0]=='\t' || buf[len-1]==' ' || buf[len-1]=='\t') {
1871                 log_err("u_charName(U+%04x) returns a name with leading or trailing whitespace\n", cp);
1872             }
1873         }
1874
1875         if(map[(uint8_t)'\t']) {
1876             log_err("u_charName() returned a name with a TAB for some code point\n", cp);
1877         }
1878
1879         length=uprv_getMaxCharNameLength();
1880         if(length!=maxLength) {
1881             log_err("uprv_getMaxCharNameLength()=%d differs from the maximum length %d of all extended names\n",
1882                     length, maxLength);
1883         }
1884
1885         /* compare the sets.  Where is my uset_equals?!! */
1886         ok=TRUE;
1887         for(i=0; i<256; ++i) {
1888             if(uset_contains(set, i)!=uset_contains(dumb, i)) {
1889                 if(0x61<=i && i<=0x7a /* a-z */ && uset_contains(set, i) && !uset_contains(dumb, i)) {
1890                     /* ignore lowercase a-z that are in set but not in dumb */
1891                     ok=TRUE;
1892                 } else {
1893                     ok=FALSE;
1894                     break;
1895                 }
1896             }
1897         }
1898
1899         l1 = uset_toPattern(set, pat, BUFSIZE, TRUE, &ec);
1900         l2 = uset_toPattern(dumb, dumbPat, BUFSIZE, TRUE, &ec);
1901         if (U_FAILURE(ec)) {
1902             log_err("FAIL: uset_toPattern failed when it shouldn't\n");
1903             uset_close(set);
1904             uset_close(dumb);
1905             return;
1906         }
1907
1908         if (l1 >= BUFSIZE) {
1909             l1 = BUFSIZE-1;
1910             pat[l1] = 0;
1911         }
1912         if (l2 >= BUFSIZE) {
1913             l2 = BUFSIZE-1;
1914             dumbPat[l2] = 0;
1915         }
1916
1917         if (!ok) {
1918             log_err("FAIL: uprv_getCharNameCharacters() returned %s, expected %s (too many lowercase a-z are ok)\n",
1919                     aescstrdup(pat, l1), aescstrdup(dumbPat, l2));
1920         } else if(getTestOption(VERBOSITY_OPTION)) {
1921             log_verbose("Ok: uprv_getCharNameCharacters() returned %s\n", aescstrdup(pat, l1));
1922         }
1923
1924         uset_close(set);
1925         uset_close(dumb);
1926     }
1927
1928     /* ### TODO: test error cases and other interesting things */
1929 }
1930
1931 static void
1932 TestUCharFromNameUnderflow() {
1933     // Ticket #10889: Underflow crash when there is no dash.
1934     UErrorCode errorCode=U_ZERO_ERROR;
1935     UChar32 c=u_charFromName(U_EXTENDED_CHAR_NAME, "<NO BREAK SPACE>", &errorCode);
1936     if(U_SUCCESS(errorCode)) {
1937         log_err("u_charFromName(<NO BREAK SPACE>) = U+%04x but should fail - %s\n", c, u_errorName(errorCode));
1938     }
1939
1940     // Test related edge cases.
1941     errorCode=U_ZERO_ERROR;
1942     c=u_charFromName(U_EXTENDED_CHAR_NAME, "<-00a0>", &errorCode);
1943     if(U_SUCCESS(errorCode)) {
1944         log_err("u_charFromName(<-00a0>) = U+%04x but should fail - %s\n", c, u_errorName(errorCode));
1945     }
1946
1947     errorCode=U_ZERO_ERROR;
1948     c=u_charFromName(U_EXTENDED_CHAR_NAME, "<control->", &errorCode);
1949     if(U_SUCCESS(errorCode)) {
1950         log_err("u_charFromName(<control->) = U+%04x but should fail - %s\n", c, u_errorName(errorCode));
1951     }
1952
1953     errorCode=U_ZERO_ERROR;
1954     c=u_charFromName(U_EXTENDED_CHAR_NAME, "<control-111111>", &errorCode);
1955     if(U_SUCCESS(errorCode)) {
1956         log_err("u_charFromName(<control-111111>) = U+%04x but should fail - %s\n", c, u_errorName(errorCode));
1957     }
1958 }
1959
1960 /* test u_isMirrored() and u_charMirror() ----------------------------------- */
1961
1962 static void
1963 TestMirroring() {
1964     USet *set;
1965     UErrorCode errorCode;
1966
1967     UChar32 start, end, c2, c3;
1968     int32_t i;
1969
1970     U_STRING_DECL(mirroredPattern, "[:Bidi_Mirrored:]", 17);
1971
1972     U_STRING_INIT(mirroredPattern, "[:Bidi_Mirrored:]", 17);
1973
1974     log_verbose("Testing u_isMirrored()\n");
1975     if(!(u_isMirrored(0x28) && u_isMirrored(0xbb) && u_isMirrored(0x2045) && u_isMirrored(0x232a) &&
1976          !u_isMirrored(0x27) && !u_isMirrored(0x61) && !u_isMirrored(0x284) && !u_isMirrored(0x3400)
1977         )
1978     ) {
1979         log_err("u_isMirrored() does not work correctly\n");
1980     }
1981
1982     log_verbose("Testing u_charMirror()\n");
1983     if(!(u_charMirror(0x3c)==0x3e && u_charMirror(0x5d)==0x5b && u_charMirror(0x208d)==0x208e && u_charMirror(0x3017)==0x3016 &&
1984          u_charMirror(0xbb)==0xab && u_charMirror(0x2215)==0x29F5 && u_charMirror(0x29F5)==0x2215 && /* large delta between the code points */
1985          u_charMirror(0x2e)==0x2e && u_charMirror(0x6f3)==0x6f3 && u_charMirror(0x301c)==0x301c && u_charMirror(0xa4ab)==0xa4ab &&
1986          /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */
1987          u_charMirror(0x2018)==0x2018 && u_charMirror(0x201b)==0x201b && u_charMirror(0x301d)==0x301d
1988          )
1989     ) {
1990         log_err("u_charMirror() does not work correctly\n");
1991     }
1992
1993     /* verify that Bidi_Mirroring_Glyph roundtrips */
1994     errorCode=U_ZERO_ERROR;
1995     set=uset_openPattern(mirroredPattern, 17, &errorCode);
1996
1997     if (U_FAILURE(errorCode)) {
1998         log_data_err("uset_openPattern(mirroredPattern, 17, &errorCode) failed!\n");
1999     } else {
2000         for(i=0; 0==uset_getItem(set, i, &start, &end, NULL, 0, &errorCode); ++i) {
2001             do {
2002                 c2=u_charMirror(start);
2003                 c3=u_charMirror(c2);
2004                 if(c3!=start) {
2005                     log_err("u_charMirror() does not roundtrip: U+%04lx->U+%04lx->U+%04lx\n", (long)start, (long)c2, (long)c3);
2006                 }
2007                 c3=u_getBidiPairedBracket(start);
2008                 if(u_getIntPropertyValue(start, UCHAR_BIDI_PAIRED_BRACKET_TYPE)==U_BPT_NONE) {
2009                     if(c3!=start) {
2010                         log_err("u_getBidiPairedBracket(U+%04lx) != self for bpt(c)==None\n",
2011                                 (long)start);
2012                     }
2013                 } else {
2014                     if(c3!=c2) {
2015                         log_err("u_getBidiPairedBracket(U+%04lx) != U+%04lx = bmg(c)'\n",
2016                                 (long)start, (long)c2);
2017                     }
2018                 }
2019             } while(++start<=end);
2020         }
2021     }
2022
2023     uset_close(set);
2024 }
2025
2026
2027 struct RunTestData
2028 {
2029     const char *runText;
2030     UScriptCode runCode;
2031 };
2032
2033 typedef struct RunTestData RunTestData;
2034
2035 static void
2036 CheckScriptRuns(UScriptRun *scriptRun, int32_t *runStarts, const RunTestData *testData, int32_t nRuns,
2037                 const char *prefix)
2038 {
2039     int32_t run, runStart, runLimit;
2040     UScriptCode runCode;
2041
2042     /* iterate over all the runs */
2043     run = 0;
2044     while (uscript_nextRun(scriptRun, &runStart, &runLimit, &runCode)) {
2045         if (runStart != runStarts[run]) {
2046             log_err("%s: incorrect start offset for run %d: expected %d, got %d\n",
2047                 prefix, run, runStarts[run], runStart);
2048         }
2049
2050         if (runLimit != runStarts[run + 1]) {
2051             log_err("%s: incorrect limit offset for run %d: expected %d, got %d\n",
2052                 prefix, run, runStarts[run + 1], runLimit);
2053         }
2054
2055         if (runCode != testData[run].runCode) {
2056             log_err("%s: incorrect script for run %d: expected \"%s\", got \"%s\"\n",
2057                 prefix, run, uscript_getName(testData[run].runCode), uscript_getName(runCode));
2058         }
2059
2060         run += 1;
2061
2062         /* stop when we've seen all the runs we expect to see */
2063         if (run >= nRuns) {
2064             break;
2065         }
2066     }
2067
2068     /* Complain if we didn't see then number of runs we expected */
2069     if (run != nRuns) {
2070         log_err("%s: incorrect number of runs: expected %d, got %d\n", prefix, run, nRuns);
2071     }
2072 }
2073
2074 static void
2075 TestUScriptRunAPI()
2076 {
2077     static const RunTestData testData1[] = {
2078         {"\\u0020\\u0946\\u0939\\u093F\\u0928\\u094D\\u0926\\u0940\\u0020", USCRIPT_DEVANAGARI},
2079         {"\\u0627\\u0644\\u0639\\u0631\\u0628\\u064A\\u0629\\u0020", USCRIPT_ARABIC},
2080         {"\\u0420\\u0443\\u0441\\u0441\\u043A\\u0438\\u0439\\u0020", USCRIPT_CYRILLIC},
2081         {"English (", USCRIPT_LATIN},
2082         {"\\u0E44\\u0E17\\u0E22", USCRIPT_THAI},
2083         {") ", USCRIPT_LATIN},
2084         {"\\u6F22\\u5B75", USCRIPT_HAN},
2085         {"\\u3068\\u3072\\u3089\\u304C\\u306A\\u3068", USCRIPT_HIRAGANA},
2086         {"\\u30AB\\u30BF\\u30AB\\u30CA", USCRIPT_KATAKANA},
2087         {"\\U00010400\\U00010401\\U00010402\\U00010403", USCRIPT_DESERET}
2088     };
2089
2090     static const RunTestData testData2[] = {
2091        {"((((((((((abc))))))))))", USCRIPT_LATIN}
2092     };
2093
2094     static const struct {
2095       const RunTestData *testData;
2096       int32_t nRuns;
2097     } testDataEntries[] = {
2098         {testData1, UPRV_LENGTHOF(testData1)},
2099         {testData2, UPRV_LENGTHOF(testData2)}
2100     };
2101
2102     static const int32_t nTestEntries = UPRV_LENGTHOF(testDataEntries);
2103     int32_t testEntry;
2104
2105     for (testEntry = 0; testEntry < nTestEntries; testEntry += 1) {
2106         UChar testString[1024];
2107         int32_t runStarts[256];
2108         int32_t nTestRuns = testDataEntries[testEntry].nRuns;
2109         const RunTestData *testData = testDataEntries[testEntry].testData;
2110
2111         int32_t run, stringLimit;
2112         UScriptRun *scriptRun = NULL;
2113         UErrorCode err;
2114
2115         /*
2116          * Fill in the test string and the runStarts array.
2117          */
2118         stringLimit = 0;
2119         for (run = 0; run < nTestRuns; run += 1) {
2120             runStarts[run] = stringLimit;
2121             stringLimit += u_unescape(testData[run].runText, &testString[stringLimit], 1024 - stringLimit);
2122             /*stringLimit -= 1;*/
2123         }
2124
2125         /* The limit of the last run */
2126         runStarts[nTestRuns] = stringLimit;
2127
2128         /*
2129          * Make sure that calling uscript_OpenRun with a NULL text pointer
2130          * and a non-zero text length returns the correct error.
2131          */
2132         err = U_ZERO_ERROR;
2133         scriptRun = uscript_openRun(NULL, stringLimit, &err);
2134
2135         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2136             log_err("uscript_openRun(NULL, stringLimit, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2137         }
2138
2139         if (scriptRun != NULL) {
2140             log_err("uscript_openRun(NULL, stringLimit, &err) returned a non-NULL result.\n");
2141             uscript_closeRun(scriptRun);
2142         }
2143
2144         /*
2145          * Make sure that calling uscript_OpenRun with a non-NULL text pointer
2146          * and a zero text length returns the correct error.
2147          */
2148         err = U_ZERO_ERROR;
2149         scriptRun = uscript_openRun(testString, 0, &err);
2150
2151         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2152             log_err("uscript_openRun(testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2153         }
2154
2155         if (scriptRun != NULL) {
2156             log_err("uscript_openRun(testString, 0, &err) returned a non-NULL result.\n");
2157             uscript_closeRun(scriptRun);
2158         }
2159
2160         /*
2161          * Make sure that calling uscript_openRun with a NULL text pointer
2162          * and a zero text length doesn't return an error.
2163          */
2164         err = U_ZERO_ERROR;
2165         scriptRun = uscript_openRun(NULL, 0, &err);
2166
2167         if (U_FAILURE(err)) {
2168             log_err("Got error %s from uscript_openRun(NULL, 0, &err)\n", u_errorName(err));
2169         }
2170
2171         /* Make sure that the empty iterator doesn't find any runs */
2172         if (uscript_nextRun(scriptRun, NULL, NULL, NULL)) {
2173             log_err("uscript_nextRun(...) returned TRUE for an empty iterator.\n");
2174         }
2175
2176         /*
2177          * Make sure that calling uscript_setRunText with a NULL text pointer
2178          * and a non-zero text length returns the correct error.
2179          */
2180         err = U_ZERO_ERROR;
2181         uscript_setRunText(scriptRun, NULL, stringLimit, &err);
2182
2183         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2184             log_err("uscript_setRunText(scriptRun, NULL, stringLimit, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2185         }
2186
2187         /*
2188          * Make sure that calling uscript_OpenRun with a non-NULL text pointer
2189          * and a zero text length returns the correct error.
2190          */
2191         err = U_ZERO_ERROR;
2192         uscript_setRunText(scriptRun, testString, 0, &err);
2193
2194         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2195             log_err("uscript_setRunText(scriptRun, testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2196         }
2197
2198         /*
2199          * Now call uscript_setRunText on the empty iterator
2200          * and make sure that it works.
2201          */
2202         err = U_ZERO_ERROR;
2203         uscript_setRunText(scriptRun, testString, stringLimit, &err);
2204
2205         if (U_FAILURE(err)) {
2206             log_err("Got error %s from uscript_setRunText(...)\n", u_errorName(err));
2207         } else {
2208             CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_setRunText");
2209         }
2210
2211         uscript_closeRun(scriptRun);
2212
2213         /*
2214          * Now open an interator over the testString
2215          * using uscript_openRun and make sure that it works
2216          */
2217         scriptRun = uscript_openRun(testString, stringLimit, &err);
2218
2219         if (U_FAILURE(err)) {
2220             log_err("Got error %s from uscript_openRun(...)\n", u_errorName(err));
2221         } else {
2222             CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_openRun");
2223         }
2224
2225         /* Now reset the iterator, and make sure
2226          * that it still works.
2227          */
2228         uscript_resetRun(scriptRun);
2229
2230         CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_resetRun");
2231
2232         /* Close the iterator */
2233         uscript_closeRun(scriptRun);
2234     }
2235 }
2236
2237 /* test additional, non-core properties */
2238 static void
2239 TestAdditionalProperties() {
2240     /* test data for u_charAge() */
2241     static const struct {
2242         UChar32 c;
2243         UVersionInfo version;
2244     } charAges[]={
2245         {0x41,    { 1, 1, 0, 0 }},
2246         {0xffff,  { 1, 1, 0, 0 }},
2247         {0x20ab,  { 2, 0, 0, 0 }},
2248         {0x2fffe, { 2, 0, 0, 0 }},
2249         {0x20ac,  { 2, 1, 0, 0 }},
2250         {0xfb1d,  { 3, 0, 0, 0 }},
2251         {0x3f4,   { 3, 1, 0, 0 }},
2252         {0x10300, { 3, 1, 0, 0 }},
2253         {0x220,   { 3, 2, 0, 0 }},
2254         {0xff60,  { 3, 2, 0, 0 }}
2255     };
2256
2257     /* test data for u_hasBinaryProperty() */
2258     static const int32_t
2259     props[][3]={ /* code point, property, value */
2260         { 0x0627, UCHAR_ALPHABETIC, TRUE },
2261         { 0x1034a, UCHAR_ALPHABETIC, TRUE },
2262         { 0x2028, UCHAR_ALPHABETIC, FALSE },
2263
2264         { 0x0066, UCHAR_ASCII_HEX_DIGIT, TRUE },
2265         { 0x0067, UCHAR_ASCII_HEX_DIGIT, FALSE },
2266
2267         { 0x202c, UCHAR_BIDI_CONTROL, TRUE },
2268         { 0x202f, UCHAR_BIDI_CONTROL, FALSE },
2269
2270         { 0x003c, UCHAR_BIDI_MIRRORED, TRUE },
2271         { 0x003d, UCHAR_BIDI_MIRRORED, FALSE },
2272
2273         /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */
2274         { 0x2018, UCHAR_BIDI_MIRRORED, FALSE },
2275         { 0x201d, UCHAR_BIDI_MIRRORED, FALSE },
2276         { 0x201f, UCHAR_BIDI_MIRRORED, FALSE },
2277         { 0x301e, UCHAR_BIDI_MIRRORED, FALSE },
2278
2279         { 0x058a, UCHAR_DASH, TRUE },
2280         { 0x007e, UCHAR_DASH, FALSE },
2281
2282         { 0x0c4d, UCHAR_DIACRITIC, TRUE },
2283         { 0x3000, UCHAR_DIACRITIC, FALSE },
2284
2285         { 0x0e46, UCHAR_EXTENDER, TRUE },
2286         { 0x0020, UCHAR_EXTENDER, FALSE },
2287
2288 #if !UCONFIG_NO_NORMALIZATION
2289         { 0xfb1d, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE },
2290         { 0x1d15f, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE },
2291         { 0xfb1e, UCHAR_FULL_COMPOSITION_EXCLUSION, FALSE },
2292
2293         { 0x110a, UCHAR_NFD_INERT, TRUE },      /* Jamo L */
2294         { 0x0308, UCHAR_NFD_INERT, FALSE },
2295
2296         { 0x1164, UCHAR_NFKD_INERT, TRUE },     /* Jamo V */
2297         { 0x1d79d, UCHAR_NFKD_INERT, FALSE },   /* math compat version of xi */
2298
2299         { 0x0021, UCHAR_NFC_INERT, TRUE },      /* ! */
2300         { 0x0061, UCHAR_NFC_INERT, FALSE },     /* a */
2301         { 0x00e4, UCHAR_NFC_INERT, FALSE },     /* a-umlaut */
2302         { 0x0102, UCHAR_NFC_INERT, FALSE },     /* a-breve */
2303         { 0xac1c, UCHAR_NFC_INERT, FALSE },     /* Hangul LV */
2304         { 0xac1d, UCHAR_NFC_INERT, TRUE },      /* Hangul LVT */
2305
2306         { 0x1d79d, UCHAR_NFKC_INERT, FALSE },   /* math compat version of xi */
2307         { 0x2a6d6, UCHAR_NFKC_INERT, TRUE },    /* Han, last of CJK ext. B */
2308
2309         { 0x00e4, UCHAR_SEGMENT_STARTER, TRUE },
2310         { 0x0308, UCHAR_SEGMENT_STARTER, FALSE },
2311         { 0x110a, UCHAR_SEGMENT_STARTER, TRUE }, /* Jamo L */
2312         { 0x1164, UCHAR_SEGMENT_STARTER, FALSE },/* Jamo V */
2313         { 0xac1c, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LV */
2314         { 0xac1d, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LVT */
2315 #endif
2316
2317         { 0x0044, UCHAR_HEX_DIGIT, TRUE },
2318         { 0xff46, UCHAR_HEX_DIGIT, TRUE },
2319         { 0x0047, UCHAR_HEX_DIGIT, FALSE },
2320
2321         { 0x30fb, UCHAR_HYPHEN, TRUE },
2322         { 0xfe58, UCHAR_HYPHEN, FALSE },
2323
2324         { 0x2172, UCHAR_ID_CONTINUE, TRUE },
2325         { 0x0307, UCHAR_ID_CONTINUE, TRUE },
2326         { 0x005c, UCHAR_ID_CONTINUE, FALSE },
2327
2328         { 0x2172, UCHAR_ID_START, TRUE },
2329         { 0x007a, UCHAR_ID_START, TRUE },
2330         { 0x0039, UCHAR_ID_START, FALSE },
2331
2332         { 0x4db5, UCHAR_IDEOGRAPHIC, TRUE },
2333         { 0x2f999, UCHAR_IDEOGRAPHIC, TRUE },
2334         { 0x2f99, UCHAR_IDEOGRAPHIC, FALSE },
2335
2336         { 0x200c, UCHAR_JOIN_CONTROL, TRUE },
2337         { 0x2029, UCHAR_JOIN_CONTROL, FALSE },
2338
2339         { 0x1d7bc, UCHAR_LOWERCASE, TRUE },
2340         { 0x0345, UCHAR_LOWERCASE, TRUE },
2341         { 0x0030, UCHAR_LOWERCASE, FALSE },
2342
2343         { 0x1d7a9, UCHAR_MATH, TRUE },
2344         { 0x2135, UCHAR_MATH, TRUE },
2345         { 0x0062, UCHAR_MATH, FALSE },
2346
2347         { 0xfde1, UCHAR_NONCHARACTER_CODE_POINT, TRUE },
2348         { 0x10ffff, UCHAR_NONCHARACTER_CODE_POINT, TRUE },
2349         { 0x10fffd, UCHAR_NONCHARACTER_CODE_POINT, FALSE },
2350
2351         { 0x0022, UCHAR_QUOTATION_MARK, TRUE },
2352         { 0xff62, UCHAR_QUOTATION_MARK, TRUE },
2353         { 0xd840, UCHAR_QUOTATION_MARK, FALSE },
2354
2355         { 0x061f, UCHAR_TERMINAL_PUNCTUATION, TRUE },
2356         { 0xe003f, UCHAR_TERMINAL_PUNCTUATION, FALSE },
2357
2358         { 0x1d44a, UCHAR_UPPERCASE, TRUE },
2359         { 0x2162, UCHAR_UPPERCASE, TRUE },
2360         { 0x0345, UCHAR_UPPERCASE, FALSE },
2361
2362         { 0x0020, UCHAR_WHITE_SPACE, TRUE },
2363         { 0x202f, UCHAR_WHITE_SPACE, TRUE },
2364         { 0x3001, UCHAR_WHITE_SPACE, FALSE },
2365
2366         { 0x0711, UCHAR_XID_CONTINUE, TRUE },
2367         { 0x1d1aa, UCHAR_XID_CONTINUE, TRUE },
2368         { 0x007c, UCHAR_XID_CONTINUE, FALSE },
2369
2370         { 0x16ee, UCHAR_XID_START, TRUE },
2371         { 0x23456, UCHAR_XID_START, TRUE },
2372         { 0x1d1aa, UCHAR_XID_START, FALSE },
2373
2374         /*
2375          * Version break:
2376          * The following properties are only supported starting with the
2377          * Unicode version indicated in the second field.
2378          */
2379         { -1, 0x320, 0 },
2380
2381         { 0x180c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE },
2382         { 0xfe02, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE },
2383         { 0x1801, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, FALSE },
2384
2385         { 0x0149, UCHAR_DEPRECATED, TRUE },         /* changed in Unicode 5.2 */
2386         { 0x0341, UCHAR_DEPRECATED, FALSE },        /* changed in Unicode 5.2 */
2387         { 0xe0001, UCHAR_DEPRECATED, TRUE },        /* changed from Unicode 5 to 5.1 */
2388         { 0xe0100, UCHAR_DEPRECATED, FALSE },
2389
2390         { 0x00a0, UCHAR_GRAPHEME_BASE, TRUE },
2391         { 0x0a4d, UCHAR_GRAPHEME_BASE, FALSE },
2392         { 0xff9d, UCHAR_GRAPHEME_BASE, TRUE },
2393         { 0xff9f, UCHAR_GRAPHEME_BASE, FALSE },     /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */
2394
2395         { 0x0300, UCHAR_GRAPHEME_EXTEND, TRUE },
2396         { 0xff9d, UCHAR_GRAPHEME_EXTEND, FALSE },
2397         { 0xff9f, UCHAR_GRAPHEME_EXTEND, TRUE },    /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */
2398         { 0x0603, UCHAR_GRAPHEME_EXTEND, FALSE },
2399
2400         { 0x0a4d, UCHAR_GRAPHEME_LINK, TRUE },
2401         { 0xff9f, UCHAR_GRAPHEME_LINK, FALSE },
2402
2403         { 0x2ff7, UCHAR_IDS_BINARY_OPERATOR, TRUE },
2404         { 0x2ff3, UCHAR_IDS_BINARY_OPERATOR, FALSE },
2405
2406         { 0x2ff3, UCHAR_IDS_TRINARY_OPERATOR, TRUE },
2407         { 0x2f03, UCHAR_IDS_TRINARY_OPERATOR, FALSE },
2408
2409         { 0x0ec1, UCHAR_LOGICAL_ORDER_EXCEPTION, TRUE },
2410         { 0xdcba, UCHAR_LOGICAL_ORDER_EXCEPTION, FALSE },
2411
2412         { 0x2e9b, UCHAR_RADICAL, TRUE },
2413         { 0x4e00, UCHAR_RADICAL, FALSE },
2414
2415         { 0x012f, UCHAR_SOFT_DOTTED, TRUE },
2416         { 0x0049, UCHAR_SOFT_DOTTED, FALSE },
2417
2418         { 0xfa11, UCHAR_UNIFIED_IDEOGRAPH, TRUE },
2419         { 0xfa12, UCHAR_UNIFIED_IDEOGRAPH, FALSE },
2420
2421         { -1, 0x401, 0 }, /* version break for Unicode 4.0.1 */
2422
2423         { 0x002e, UCHAR_S_TERM, TRUE },
2424         { 0x0061, UCHAR_S_TERM, FALSE },
2425
2426         { 0x180c, UCHAR_VARIATION_SELECTOR, TRUE },
2427         { 0xfe03, UCHAR_VARIATION_SELECTOR, TRUE },
2428         { 0xe01ef, UCHAR_VARIATION_SELECTOR, TRUE },
2429         { 0xe0200, UCHAR_VARIATION_SELECTOR, FALSE },
2430
2431         /* enum/integer type properties */
2432
2433         /* UCHAR_BIDI_CLASS tested for assigned characters in TestUnicodeData() */
2434         /* test default Bidi classes for unassigned code points */
2435         { 0x0590, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2436         { 0x05cf, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2437         { 0x05ed, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2438         { 0x07f2, UCHAR_BIDI_CLASS, U_DIR_NON_SPACING_MARK }, /* Nko, new in Unicode 5.0 */
2439         { 0x07fe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, /* unassigned R */
2440         { 0x089f, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2441         { 0xfb37, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2442         { 0xfb42, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2443         { 0x10806, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2444         { 0x10909, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2445         { 0x10fe4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2446
2447         { 0x061d, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2448         { 0x063f, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2449         { 0x070e, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2450         { 0x0775, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2451         { 0xfbc2, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2452         { 0xfd90, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2453         { 0xfefe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2454
2455         { 0x02AF, UCHAR_BLOCK, UBLOCK_IPA_EXTENSIONS },
2456         { 0x0C4E, UCHAR_BLOCK, UBLOCK_TELUGU },
2457         { 0x155A, UCHAR_BLOCK, UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS },
2458         { 0x1717, UCHAR_BLOCK, UBLOCK_TAGALOG },
2459         { 0x1900, UCHAR_BLOCK, UBLOCK_LIMBU },
2460         { 0x1CBF, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
2461         { 0x3040, UCHAR_BLOCK, UBLOCK_HIRAGANA },
2462         { 0x1D0FF, UCHAR_BLOCK, UBLOCK_BYZANTINE_MUSICAL_SYMBOLS },
2463         { 0x50000, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
2464         { 0xEFFFF, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
2465         { 0x10D0FF, UCHAR_BLOCK, UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B },
2466
2467         /* UCHAR_CANONICAL_COMBINING_CLASS tested for assigned characters in TestUnicodeData() */
2468         { 0xd7d7, UCHAR_CANONICAL_COMBINING_CLASS, 0 },
2469
2470         { 0x00A0, UCHAR_DECOMPOSITION_TYPE, U_DT_NOBREAK },
2471         { 0x00A8, UCHAR_DECOMPOSITION_TYPE, U_DT_COMPAT },
2472         { 0x00bf, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE },
2473         { 0x00c0, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
2474         { 0x1E9B, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
2475         { 0xBCDE, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
2476         { 0xFB5D, UCHAR_DECOMPOSITION_TYPE, U_DT_MEDIAL },
2477         { 0x1D736, UCHAR_DECOMPOSITION_TYPE, U_DT_FONT },
2478         { 0xe0033, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE },
2479
2480         { 0x0009, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
2481         { 0x0020, UCHAR_EAST_ASIAN_WIDTH, U_EA_NARROW },
2482         { 0x00B1, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2483         { 0x20A9, UCHAR_EAST_ASIAN_WIDTH, U_EA_HALFWIDTH },
2484         { 0x2FFB, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2485         { 0x3000, UCHAR_EAST_ASIAN_WIDTH, U_EA_FULLWIDTH },
2486         { 0x35bb, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2487         { 0x58bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2488         { 0xD7A3, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2489         { 0xEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2490         { 0x1D198, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
2491         { 0x20000, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2492         { 0x2F8C7, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2493         { 0x3a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE }, /* plane 3 got default W values in Unicode 4 */
2494         { 0x5a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
2495         { 0xFEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2496         { 0x10EEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2497
2498         /* UCHAR_GENERAL_CATEGORY tested for assigned characters in TestUnicodeData() */
2499         { 0xd7c7, UCHAR_GENERAL_CATEGORY, 0 },
2500         { 0xd7d7, UCHAR_GENERAL_CATEGORY, U_OTHER_LETTER },     /* changed in Unicode 5.2 */
2501
2502         { 0x0444, UCHAR_JOINING_GROUP, U_JG_NO_JOINING_GROUP },
2503         { 0x0639, UCHAR_JOINING_GROUP, U_JG_AIN },
2504         { 0x072A, UCHAR_JOINING_GROUP, U_JG_DALATH_RISH },
2505         { 0x0647, UCHAR_JOINING_GROUP, U_JG_HEH },
2506         { 0x06C1, UCHAR_JOINING_GROUP, U_JG_HEH_GOAL },
2507
2508         { 0x200C, UCHAR_JOINING_TYPE, U_JT_NON_JOINING },
2509         { 0x200D, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING },
2510         { 0x0639, UCHAR_JOINING_TYPE, U_JT_DUAL_JOINING },
2511         { 0x0640, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING },
2512         { 0x06C3, UCHAR_JOINING_TYPE, U_JT_RIGHT_JOINING },
2513         { 0x0300, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
2514         { 0x070F, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
2515         { 0xe0033, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
2516
2517         /* TestUnicodeData() verifies that no assigned character has "XX" (unknown) */
2518         { 0xe7e7, UCHAR_LINE_BREAK, U_LB_UNKNOWN },
2519         { 0x10fffd, UCHAR_LINE_BREAK, U_LB_UNKNOWN },
2520         { 0x0028, UCHAR_LINE_BREAK, U_LB_OPEN_PUNCTUATION },
2521         { 0x232A, UCHAR_LINE_BREAK, U_LB_CLOSE_PUNCTUATION },
2522         { 0x3401, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2523         { 0x4e02, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2524         { 0x20004, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2525         { 0xf905, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2526         { 0xdb7e, UCHAR_LINE_BREAK, U_LB_SURROGATE },
2527         { 0xdbfd, UCHAR_LINE_BREAK, U_LB_SURROGATE },
2528         { 0xdffc, UCHAR_LINE_BREAK, U_LB_SURROGATE },
2529         { 0x2762, UCHAR_LINE_BREAK, U_LB_EXCLAMATION },
2530         { 0x002F, UCHAR_LINE_BREAK, U_LB_BREAK_SYMBOLS },
2531         { 0x1D49C, UCHAR_LINE_BREAK, U_LB_ALPHABETIC },
2532         { 0x1731, UCHAR_LINE_BREAK, U_LB_ALPHABETIC },
2533
2534         /* UCHAR_NUMERIC_TYPE tested in TestNumericProperties() */
2535
2536         /* UCHAR_SCRIPT tested in TestUScriptCodeAPI() */
2537
2538         { 0x10ff, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2539         { 0x1100, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2540         { 0x1111, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2541         { 0x1159, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2542         { 0x115a, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
2543         { 0x115e, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
2544         { 0x115f, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2545
2546         { 0xa95f, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2547         { 0xa960, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
2548         { 0xa97c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
2549         { 0xa97d, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2550
2551         { 0x1160, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2552         { 0x1161, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2553         { 0x1172, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2554         { 0x11a2, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2555         { 0x11a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
2556         { 0x11a7, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
2557
2558         { 0xd7af, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2559         { 0xd7b0, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
2560         { 0xd7c6, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
2561         { 0xd7c7, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2562
2563         { 0x11a8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2564         { 0x11b8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2565         { 0x11c8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2566         { 0x11f9, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2567         { 0x11fa, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
2568         { 0x11ff, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
2569         { 0x1200, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2570
2571         { 0xd7ca, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2572         { 0xd7cb, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
2573         { 0xd7fb, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
2574         { 0xd7fc, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2575
2576         { 0xac00, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2577         { 0xac1c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2578         { 0xc5ec, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2579         { 0xd788, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2580
2581         { 0xac01, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2582         { 0xac1b, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2583         { 0xac1d, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2584         { 0xc5ee, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2585         { 0xd7a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2586
2587         { 0xd7a4, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2588
2589         { -1, 0x410, 0 }, /* version break for Unicode 4.1 */
2590
2591         { 0x00d7, UCHAR_PATTERN_SYNTAX, TRUE },
2592         { 0xfe45, UCHAR_PATTERN_SYNTAX, TRUE },
2593         { 0x0061, UCHAR_PATTERN_SYNTAX, FALSE },
2594
2595         { 0x0020, UCHAR_PATTERN_WHITE_SPACE, TRUE },
2596         { 0x0085, UCHAR_PATTERN_WHITE_SPACE, TRUE },
2597         { 0x200f, UCHAR_PATTERN_WHITE_SPACE, TRUE },
2598         { 0x00a0, UCHAR_PATTERN_WHITE_SPACE, FALSE },
2599         { 0x3000, UCHAR_PATTERN_WHITE_SPACE, FALSE },
2600
2601         { 0x1d200, UCHAR_BLOCK, UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION },
2602         { 0x2c8e,  UCHAR_BLOCK, UBLOCK_COPTIC },
2603         { 0xfe17,  UCHAR_BLOCK, UBLOCK_VERTICAL_FORMS },
2604
2605         { 0x1a00,  UCHAR_SCRIPT, USCRIPT_BUGINESE },
2606         { 0x2cea,  UCHAR_SCRIPT, USCRIPT_COPTIC },
2607         { 0xa82b,  UCHAR_SCRIPT, USCRIPT_SYLOTI_NAGRI },
2608         { 0x103d0, UCHAR_SCRIPT, USCRIPT_OLD_PERSIAN },
2609
2610         { 0xcc28, UCHAR_LINE_BREAK, U_LB_H2 },
2611         { 0xcc29, UCHAR_LINE_BREAK, U_LB_H3 },
2612         { 0xac03, UCHAR_LINE_BREAK, U_LB_H3 },
2613         { 0x115f, UCHAR_LINE_BREAK, U_LB_JL },
2614         { 0x11aa, UCHAR_LINE_BREAK, U_LB_JT },
2615         { 0x11a1, UCHAR_LINE_BREAK, U_LB_JV },
2616
2617         { 0xb2c9, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_LVT },
2618         { 0x036f, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_EXTEND },
2619         { 0x0000, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_CONTROL },
2620         { 0x1160, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_V },
2621
2622         { 0x05f4, UCHAR_WORD_BREAK, U_WB_MIDLETTER },
2623         { 0x4ef0, UCHAR_WORD_BREAK, U_WB_OTHER },
2624         { 0x19d9, UCHAR_WORD_BREAK, U_WB_NUMERIC },
2625         { 0x2044, UCHAR_WORD_BREAK, U_WB_MIDNUM },
2626
2627         { 0xfffd, UCHAR_SENTENCE_BREAK, U_SB_OTHER },
2628         { 0x1ffc, UCHAR_SENTENCE_BREAK, U_SB_UPPER },
2629         { 0xff63, UCHAR_SENTENCE_BREAK, U_SB_CLOSE },
2630         { 0x2028, UCHAR_SENTENCE_BREAK, U_SB_SEP },
2631
2632         { -1, 0x520, 0 }, /* version break for Unicode 5.2 */
2633
2634         /* unassigned code points in new default Bidi R blocks */
2635         { 0x1ede4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2636         { 0x1efe4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2637
2638         /* test some script codes >127 */
2639         { 0xa6e6,  UCHAR_SCRIPT, USCRIPT_BAMUM },
2640         { 0xa4d0,  UCHAR_SCRIPT, USCRIPT_LISU },
2641         { 0x10a7f,  UCHAR_SCRIPT, USCRIPT_OLD_SOUTH_ARABIAN },
2642
2643         { -1, 0x600, 0 }, /* version break for Unicode 6.0 */
2644
2645         /* value changed in Unicode 6.0 */
2646         { 0x06C3, UCHAR_JOINING_GROUP, U_JG_TEH_MARBUTA_GOAL },
2647
2648         { -1, 0x610, 0 }, /* version break for Unicode 6.1 */
2649
2650         /* unassigned code points in new/changed default Bidi AL blocks */
2651         { 0x08ba, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2652         { 0x1eee4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2653
2654         { -1, 0x630, 0 }, /* version break for Unicode 6.3 */
2655
2656         /* unassigned code points in the currency symbols block now default to ET */
2657         { 0x20C0, UCHAR_BIDI_CLASS, U_EUROPEAN_NUMBER_TERMINATOR },
2658         { 0x20CF, UCHAR_BIDI_CLASS, U_EUROPEAN_NUMBER_TERMINATOR },
2659
2660         /* new property in Unicode 6.3 */
2661         { 0x0027, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_NONE },
2662         { 0x0028, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_OPEN },
2663         { 0x0029, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_CLOSE },
2664         { 0xFF5C, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_NONE },
2665         { 0xFF5B, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_OPEN },
2666         { 0xFF5D, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_CLOSE },
2667
2668         { -1, 0x700, 0 }, /* version break for Unicode 7.0 */
2669
2670         /* new character range with Joining_Group values */
2671         { 0x10ABF, UCHAR_JOINING_GROUP, U_JG_NO_JOINING_GROUP },
2672         { 0x10AC0, UCHAR_JOINING_GROUP, U_JG_MANICHAEAN_ALEPH },
2673         { 0x10AC1, UCHAR_JOINING_GROUP, U_JG_MANICHAEAN_BETH },
2674         { 0x10AEF, UCHAR_JOINING_GROUP, U_JG_MANICHAEAN_HUNDRED },
2675         { 0x10AF0, UCHAR_JOINING_GROUP, U_JG_NO_JOINING_GROUP },
2676
2677         { -1, 0xa00, 0 },  // version break for Unicode 10
2678
2679         { 0x1F1E5, UCHAR_REGIONAL_INDICATOR, FALSE },
2680         { 0x1F1E7, UCHAR_REGIONAL_INDICATOR, TRUE },
2681         { 0x1F1FF, UCHAR_REGIONAL_INDICATOR, TRUE },
2682         { 0x1F200, UCHAR_REGIONAL_INDICATOR, FALSE },
2683
2684         { 0x0600, UCHAR_PREPENDED_CONCATENATION_MARK, TRUE },
2685         { 0x0606, UCHAR_PREPENDED_CONCATENATION_MARK, FALSE },
2686         { 0x110BD, UCHAR_PREPENDED_CONCATENATION_MARK, TRUE },
2687
2688         /* undefined UProperty values */
2689         { 0x61, 0x4a7, 0 },
2690         { 0x234bc, 0x15ed, 0 }
2691     };
2692
2693     UVersionInfo version;
2694     UChar32 c;
2695     int32_t i, result, uVersion;
2696     UProperty which;
2697
2698     /* what is our Unicode version? */
2699     u_getUnicodeVersion(version);
2700     uVersion=((int32_t)version[0]<<8)|(version[1]<<4)|version[2]; /* major/minor/update version numbers */
2701
2702     u_charAge(0x20, version);
2703     if(version[0]==0) {
2704         /* no additional properties available */
2705         log_err("TestAdditionalProperties: no additional properties available, not tested\n");
2706         return;
2707     }
2708
2709     /* test u_charAge() */
2710     for(i=0; i<UPRV_LENGTHOF(charAges); ++i) {
2711         u_charAge(charAges[i].c, version);
2712         if(0!=memcmp(version, charAges[i].version, sizeof(UVersionInfo))) {
2713             log_err("error: u_charAge(U+%04lx)={ %u, %u, %u, %u } instead of { %u, %u, %u, %u }\n",
2714                 charAges[i].c,
2715                 version[0], version[1], version[2], version[3],
2716                 charAges[i].version[0], charAges[i].version[1], charAges[i].version[2], charAges[i].version[3]);
2717         }
2718     }
2719
2720     if( u_getIntPropertyMinValue(UCHAR_DASH)!=0 ||
2721         u_getIntPropertyMinValue(UCHAR_BIDI_CLASS)!=0 ||
2722         u_getIntPropertyMinValue(UCHAR_BLOCK)!=0 ||   /* j2478 */
2723         u_getIntPropertyMinValue(UCHAR_SCRIPT)!=0 || /*JB#2410*/
2724         u_getIntPropertyMinValue(0x2345)!=0
2725     ) {
2726         log_err("error: u_getIntPropertyMinValue() wrong\n");
2727     }
2728     if( u_getIntPropertyMaxValue(UCHAR_DASH)!=1) {
2729         log_err("error: u_getIntPropertyMaxValue(UCHAR_DASH) wrong\n");
2730     }
2731     if( u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE)!=1) {
2732         log_err("error: u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE) wrong\n");
2733     }
2734     if( u_getIntPropertyMaxValue((UProperty)(UCHAR_BINARY_LIMIT-1))!=1) {
2735         log_err("error: u_getIntPropertyMaxValue(UCHAR_BINARY_LIMIT-1) wrong\n");
2736     }
2737     if( u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)!=(int32_t)U_CHAR_DIRECTION_COUNT-1 ) {
2738         log_err("error: u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS) wrong\n");
2739     }
2740     if( u_getIntPropertyMaxValue(UCHAR_BLOCK)!=(int32_t)UBLOCK_COUNT-1 ) {
2741         log_err("error: u_getIntPropertyMaxValue(UCHAR_BLOCK) wrong\n");
2742     }
2743     if(u_getIntPropertyMaxValue(UCHAR_LINE_BREAK)!=(int32_t)U_LB_COUNT-1) {
2744         log_err("error: u_getIntPropertyMaxValue(UCHAR_LINE_BREAK) wrong\n");
2745     }
2746     if(u_getIntPropertyMaxValue(UCHAR_SCRIPT)!=(int32_t)USCRIPT_CODE_LIMIT-1) {
2747         log_err("error: u_getIntPropertyMaxValue(UCHAR_SCRIPT) wrong\n");
2748     }
2749     if(u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE)!=(int32_t)U_NT_COUNT-1) {
2750         log_err("error: u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE) wrong\n");
2751     }
2752     if(u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY)!=(int32_t)U_CHAR_CATEGORY_COUNT-1) {
2753         log_err("error: u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY) wrong\n");
2754     }
2755     if(u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE)!=(int32_t)U_HST_COUNT-1) {
2756         log_err("error: u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE) wrong\n");
2757     }
2758     if(u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK)!=(int32_t)U_GCB_COUNT-1) {
2759         log_err("error: u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK) wrong\n");
2760     }
2761     if(u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK)!=(int32_t)U_SB_COUNT-1) {
2762         log_err("error: u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK) wrong\n");
2763     }
2764     if(u_getIntPropertyMaxValue(UCHAR_WORD_BREAK)!=(int32_t)U_WB_COUNT-1) {
2765         log_err("error: u_getIntPropertyMaxValue(UCHAR_WORD_BREAK) wrong\n");
2766     }
2767     if(u_getIntPropertyMaxValue(UCHAR_BIDI_PAIRED_BRACKET_TYPE)!=(int32_t)U_BPT_COUNT-1) {
2768         log_err("error: u_getIntPropertyMaxValue(UCHAR_BIDI_PAIRED_BRACKET_TYPE) wrong\n");
2769     }
2770     /*JB#2410*/
2771     if( u_getIntPropertyMaxValue(0x2345)!=-1) {
2772         log_err("error: u_getIntPropertyMaxValue(0x2345) wrong\n");
2773     }
2774     if( u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) != (int32_t) (U_DT_COUNT - 1)) {
2775         log_err("error: u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) wrong\n");
2776     }
2777     if( u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) !=  (int32_t) (U_JG_COUNT -1)) {
2778         log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) wrong\n");
2779     }
2780     if( u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) != (int32_t) (U_JT_COUNT -1)) {
2781         log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) wrong\n");
2782     }
2783     if( u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) != (int32_t) (U_EA_COUNT -1)) {
2784         log_err("error: u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) wrong\n");
2785     }
2786
2787     /* test u_hasBinaryProperty() and u_getIntPropertyValue() */
2788     for(i=0; i<UPRV_LENGTHOF(props); ++i) {
2789         const char *whichName;
2790
2791         if(props[i][0]<0) {
2792             /* Unicode version break */
2793             if(uVersion<props[i][1]) {
2794                 break; /* do not test properties that are not yet supported */
2795             } else {
2796                 continue; /* skip this row */
2797             }
2798         }
2799
2800         c=(UChar32)props[i][0];
2801         which=(UProperty)props[i][1];
2802         whichName=u_getPropertyName(which, U_LONG_PROPERTY_NAME);
2803
2804         if(which<UCHAR_INT_START) {
2805             result=u_hasBinaryProperty(c, which);
2806             if(result!=props[i][2]) {
2807                 log_data_err("error: u_hasBinaryProperty(U+%04lx, %s)=%d is wrong (props[%d]) - (Are you missing data?)\n",
2808                         c, whichName, result, i);
2809             }
2810         }
2811
2812         result=u_getIntPropertyValue(c, which);
2813         if(result!=props[i][2]) {
2814             log_data_err("error: u_getIntPropertyValue(U+%04lx, %s)=%d is wrong, should be %d (props[%d]) - (Are you missing data?)\n",
2815                     c, whichName, result, props[i][2], i);
2816         }
2817
2818         /* test separate functions, too */
2819         switch((UProperty)props[i][1]) {
2820         case UCHAR_ALPHABETIC:
2821             if(u_isUAlphabetic((UChar32)props[i][0])!=(UBool)props[i][2]) {
2822                 log_err("error: u_isUAlphabetic(U+%04lx)=%d is wrong (props[%d])\n",
2823                         props[i][0], result, i);
2824             }
2825             break;
2826         case UCHAR_LOWERCASE:
2827             if(u_isULowercase((UChar32)props[i][0])!=(UBool)props[i][2]) {
2828                 log_err("error: u_isULowercase(U+%04lx)=%d is wrong (props[%d])\n",
2829                         props[i][0], result, i);
2830             }
2831             break;
2832         case UCHAR_UPPERCASE:
2833             if(u_isUUppercase((UChar32)props[i][0])!=(UBool)props[i][2]) {
2834                 log_err("error: u_isUUppercase(U+%04lx)=%d is wrong (props[%d])\n",
2835                         props[i][0], result, i);
2836             }
2837             break;
2838         case UCHAR_WHITE_SPACE:
2839             if(u_isUWhiteSpace((UChar32)props[i][0])!=(UBool)props[i][2]) {
2840                 log_err("error: u_isUWhiteSpace(U+%04lx)=%d is wrong (props[%d])\n",
2841                         props[i][0], result, i);
2842             }
2843             break;
2844         default:
2845             break;
2846         }
2847     }
2848 }
2849
2850 static void
2851 TestNumericProperties(void) {
2852     /* see UnicodeData.txt, DerivedNumericValues.txt */
2853     static const struct {
2854         UChar32 c;
2855         int32_t type;
2856         double numValue;
2857     } values[]={
2858         { 0x0F33, U_NT_NUMERIC, -1./2. },
2859         { 0x0C66, U_NT_DECIMAL, 0 },
2860         { 0x96f6, U_NT_NUMERIC, 0 },
2861         { 0xa833, U_NT_NUMERIC, 1./16. },
2862         { 0x2152, U_NT_NUMERIC, 1./10. },
2863         { 0x2151, U_NT_NUMERIC, 1./9. },
2864         { 0x1245f, U_NT_NUMERIC, 1./8. },
2865         { 0x2150, U_NT_NUMERIC, 1./7. },
2866         { 0x2159, U_NT_NUMERIC, 1./6. },
2867         { 0x09f6, U_NT_NUMERIC, 3./16. },
2868         { 0x2155, U_NT_NUMERIC, 1./5. },
2869         { 0x00BD, U_NT_NUMERIC, 1./2. },
2870         { 0x0031, U_NT_DECIMAL, 1. },
2871         { 0x4e00, U_NT_NUMERIC, 1. },
2872         { 0x58f1, U_NT_NUMERIC, 1. },
2873         { 0x10320, U_NT_NUMERIC, 1. },
2874         { 0x0F2B, U_NT_NUMERIC, 3./2. },
2875         { 0x00B2, U_NT_DIGIT, 2. },
2876         { 0x5f10, U_NT_NUMERIC, 2. },
2877         { 0x1813, U_NT_DECIMAL, 3. },
2878         { 0x5f0e, U_NT_NUMERIC, 3. },
2879         { 0x2173, U_NT_NUMERIC, 4. },
2880         { 0x8086, U_NT_NUMERIC, 4. },
2881         { 0x278E, U_NT_DIGIT, 5. },
2882         { 0x1D7F2, U_NT_DECIMAL, 6. },
2883         { 0x247A, U_NT_DIGIT, 7. },
2884         { 0x7396, U_NT_NUMERIC, 9. },
2885         { 0x1372, U_NT_NUMERIC, 10. },
2886         { 0x216B, U_NT_NUMERIC, 12. },
2887         { 0x16EE, U_NT_NUMERIC, 17. },
2888         { 0x249A, U_NT_NUMERIC, 19. },
2889         { 0x303A, U_NT_NUMERIC, 30. },
2890         { 0x5345, U_NT_NUMERIC, 30. },
2891         { 0x32B2, U_NT_NUMERIC, 37. },
2892         { 0x1375, U_NT_NUMERIC, 40. },
2893         { 0x10323, U_NT_NUMERIC, 50. },
2894         { 0x0BF1, U_NT_NUMERIC, 100. },
2895         { 0x964c, U_NT_NUMERIC, 100. },
2896         { 0x217E, U_NT_NUMERIC, 500. },
2897         { 0x2180, U_NT_NUMERIC, 1000. },
2898         { 0x4edf, U_NT_NUMERIC, 1000. },
2899         { 0x2181, U_NT_NUMERIC, 5000. },
2900         { 0x137C, U_NT_NUMERIC, 10000. },
2901         { 0x4e07, U_NT_NUMERIC, 10000. },
2902         { 0x12432, U_NT_NUMERIC, 216000. },
2903         { 0x12433, U_NT_NUMERIC, 432000. },
2904         { 0x4ebf, U_NT_NUMERIC, 100000000. },
2905         { 0x5146, U_NT_NUMERIC, 1000000000000. },
2906         { -1, U_NT_NONE, U_NO_NUMERIC_VALUE },
2907         { 0x61, U_NT_NONE, U_NO_NUMERIC_VALUE },
2908         { 0x3000, U_NT_NONE, U_NO_NUMERIC_VALUE },
2909         { 0xfffe, U_NT_NONE, U_NO_NUMERIC_VALUE },
2910         { 0x10301, U_NT_NONE, U_NO_NUMERIC_VALUE },
2911         { 0xe0033, U_NT_NONE, U_NO_NUMERIC_VALUE },
2912         { 0x10ffff, U_NT_NONE, U_NO_NUMERIC_VALUE },
2913         { 0x110000, U_NT_NONE, U_NO_NUMERIC_VALUE }
2914     };
2915
2916     double nv;
2917     UChar32 c;
2918     int32_t i, type;
2919
2920     for(i=0; i<UPRV_LENGTHOF(values); ++i) {
2921         c=values[i].c;
2922         type=u_getIntPropertyValue(c, UCHAR_NUMERIC_TYPE);
2923         nv=u_getNumericValue(c);
2924
2925         if(type!=values[i].type) {
2926             log_err("UCHAR_NUMERIC_TYPE(U+%04lx)=%d should be %d\n", c, type, values[i].type);
2927         }
2928         if(0.000001 <= fabs(nv - values[i].numValue)) {
2929             log_err("u_getNumericValue(U+%04lx)=%g should be %g\n", c, nv, values[i].numValue);
2930         }
2931     }
2932 }
2933
2934 /**
2935  * Test the property names and property value names API.
2936  */
2937 static void
2938 TestPropertyNames(void) {
2939     int32_t p, v, choice=0, rev;
2940     UBool atLeastSomething = FALSE;
2941
2942     for (p=0; ; ++p) {
2943         UProperty propEnum = (UProperty)p;
2944         UBool sawProp = FALSE;
2945         if(p > 10 && !atLeastSomething) {
2946           log_data_err("Never got anything after 10 tries.\nYour data is probably fried. Quitting this test\n", p, choice);
2947           return;
2948         }
2949
2950         for (choice=0; ; ++choice) {
2951             const char* name = u_getPropertyName(propEnum, (UPropertyNameChoice)choice);
2952             if (name) {
2953                 if (!sawProp)
2954                     log_verbose("prop 0x%04x+%2d:", p&~0xfff, p&0xfff);
2955                 log_verbose("%d=\"%s\"", choice, name);
2956                 sawProp = TRUE;
2957                 atLeastSomething = TRUE;
2958
2959                 /* test reverse mapping */
2960                 rev = u_getPropertyEnum(name);
2961                 if (rev != p) {
2962                     log_err("Property round-trip failure: %d -> %s -> %d\n",
2963                             p, name, rev);
2964                 }
2965             }
2966             if (!name && choice>0) break;
2967         }
2968         if (sawProp) {
2969             /* looks like a valid property; check the values */
2970             const char* pname = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME);
2971             int32_t max = 0;
2972             if (p == UCHAR_CANONICAL_COMBINING_CLASS) {
2973                 max = 255;
2974             } else if (p == UCHAR_GENERAL_CATEGORY_MASK) {
2975                 /* it's far too slow to iterate all the way up to
2976                    the real max, U_GC_P_MASK */
2977                 max = U_GC_NL_MASK;
2978             } else if (p == UCHAR_BLOCK) {
2979                 /* UBlockCodes, unlike other values, start at 1 */
2980                 max = 1;
2981             }
2982             log_verbose("\n");
2983             for (v=-1; ; ++v) {
2984                 UBool sawValue = FALSE;
2985                 for (choice=0; ; ++choice) {
2986                     const char* vname = u_getPropertyValueName(propEnum, v, (UPropertyNameChoice)choice);
2987                     if (vname) {
2988                         if (!sawValue) log_verbose(" %s, value %d:", pname, v);
2989                         log_verbose("%d=\"%s\"", choice, vname);
2990                         sawValue = TRUE;
2991
2992                         /* test reverse mapping */
2993                         rev = u_getPropertyValueEnum(propEnum, vname);
2994                         if (rev != v) {
2995                             log_err("Value round-trip failure (%s): %d -> %s -> %d\n",
2996                                     pname, v, vname, rev);
2997                         }
2998                     }
2999                     if (!vname && choice>0) break;
3000                 }
3001                 if (sawValue) {
3002                     log_verbose("\n");
3003                 }
3004                 if (!sawValue && v>=max) break;
3005             }
3006         }
3007         if (!sawProp) {
3008             if (p>=UCHAR_STRING_LIMIT) {
3009                 break;
3010             } else if (p>=UCHAR_DOUBLE_LIMIT) {
3011                 p = UCHAR_STRING_START - 1;
3012             } else if (p>=UCHAR_MASK_LIMIT) {
3013                 p = UCHAR_DOUBLE_START - 1;
3014             } else if (p>=UCHAR_INT_LIMIT) {
3015                 p = UCHAR_MASK_START - 1;
3016             } else if (p>=UCHAR_BINARY_LIMIT) {
3017                 p = UCHAR_INT_START - 1;
3018             }
3019         }
3020     }
3021 }
3022
3023 /**
3024  * Test the property values API.  See JB#2410.
3025  */
3026 static void
3027 TestPropertyValues(void) {
3028     int32_t i, p, min, max;
3029     UErrorCode ec;
3030
3031     /* Min should be 0 for everything. */
3032     /* Until JB#2478 is fixed, the one exception is UCHAR_BLOCK. */
3033     for (p=UCHAR_INT_START; p<UCHAR_INT_LIMIT; ++p) {
3034         UProperty propEnum = (UProperty)p;
3035         min = u_getIntPropertyMinValue(propEnum);
3036         if (min != 0) {
3037             if (p == UCHAR_BLOCK) {
3038                 /* This is okay...for now.  See JB#2487.
3039                    TODO Update this for JB#2487. */
3040             } else {
3041                 const char* name;
3042                 name = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME);
3043                 if (name == NULL)
3044                     name = "<ERROR>";
3045                 log_err("FAIL: u_getIntPropertyMinValue(%s) = %d, exp. 0\n",
3046                         name, min);
3047             }
3048         }
3049     }
3050
3051     if( u_getIntPropertyMinValue(UCHAR_GENERAL_CATEGORY_MASK)!=0 ||
3052         u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY_MASK)!=-1) {
3053         log_err("error: u_getIntPropertyMin/MaxValue(UCHAR_GENERAL_CATEGORY_MASK) is wrong\n");
3054     }
3055
3056     /* Max should be -1 for invalid properties. */
3057     max = u_getIntPropertyMaxValue(UCHAR_INVALID_CODE);
3058     if (max != -1) {
3059         log_err("FAIL: u_getIntPropertyMaxValue(-1) = %d, exp. -1\n",
3060                 max);
3061     }
3062
3063     /* Script should return USCRIPT_INVALID_CODE for an invalid code point. */
3064     for (i=0; i<2; ++i) {
3065         int32_t script;
3066         const char* desc;
3067         ec = U_ZERO_ERROR;
3068         switch (i) {
3069         case 0:
3070             script = uscript_getScript(-1, &ec);
3071             desc = "uscript_getScript(-1)";
3072             break;
3073         case 1:
3074             script = u_getIntPropertyValue(-1, UCHAR_SCRIPT);
3075             desc = "u_getIntPropertyValue(-1, UCHAR_SCRIPT)";
3076             break;
3077         default:
3078             log_err("Internal test error. Too many scripts\n");
3079             return;
3080         }
3081         /* We don't explicitly test ec.  It should be U_FAILURE but it
3082            isn't documented as such. */
3083         if (script != (int32_t)USCRIPT_INVALID_CODE) {
3084             log_err("FAIL: %s = %d, exp. 0\n",
3085                     desc, script);
3086         }
3087     }
3088 }
3089
3090 /* various tests for consistency of UCD data and API behavior */
3091 static void
3092 TestConsistency() {
3093     char buffer[300];
3094     USet *set1, *set2, *set3, *set4;
3095     UErrorCode errorCode;
3096
3097     UChar32 start, end;
3098     int32_t i, length;
3099
3100     U_STRING_DECL(hyphenPattern, "[:Hyphen:]", 10);
3101     U_STRING_DECL(dashPattern, "[:Dash:]", 8);
3102     U_STRING_DECL(lowerPattern, "[:Lowercase:]", 13);
3103     U_STRING_DECL(formatPattern, "[:Cf:]", 6);
3104     U_STRING_DECL(alphaPattern, "[:Alphabetic:]", 14);
3105
3106     U_STRING_DECL(mathBlocksPattern,
3107         "[[:block=Mathematical Operators:][:block=Miscellaneous Mathematical Symbols-A:][:block=Miscellaneous Mathematical Symbols-B:][:block=Supplemental Mathematical Operators:][:block=Mathematical Alphanumeric Symbols:]]",
3108         214);
3109     U_STRING_DECL(mathPattern, "[:Math:]", 8);
3110     U_STRING_DECL(unassignedPattern, "[:Cn:]", 6);
3111     U_STRING_DECL(unknownPattern, "[:sc=Unknown:]", 14);
3112     U_STRING_DECL(reservedPattern, "[[:Cn:][:Co:][:Cs:]]", 20);
3113
3114     U_STRING_INIT(hyphenPattern, "[:Hyphen:]", 10);
3115     U_STRING_INIT(dashPattern, "[:Dash:]", 8);
3116     U_STRING_INIT(lowerPattern, "[:Lowercase:]", 13);
3117     U_STRING_INIT(formatPattern, "[:Cf:]", 6);
3118     U_STRING_INIT(alphaPattern, "[:Alphabetic:]", 14);
3119
3120     U_STRING_INIT(mathBlocksPattern,
3121         "[[:block=Mathematical Operators:][:block=Miscellaneous Mathematical Symbols-A:][:block=Miscellaneous Mathematical Symbols-B:][:block=Supplemental Mathematical Operators:][:block=Mathematical Alphanumeric Symbols:]]",
3122         214);
3123     U_STRING_INIT(mathPattern, "[:Math:]", 8);
3124     U_STRING_INIT(unassignedPattern, "[:Cn:]", 6);
3125     U_STRING_INIT(unknownPattern, "[:sc=Unknown:]", 14);
3126     U_STRING_INIT(reservedPattern, "[[:Cn:][:Co:][:Cs:]]", 20);
3127
3128     /*
3129      * It used to be that UCD.html and its precursors said
3130      * "Those dashes used to mark connections between pieces of words,
3131      *  plus the Katakana middle dot."
3132      *
3133      * Unicode 4 changed 00AD Soft Hyphen to Cf and removed it from Dash
3134      * but not from Hyphen.
3135      * UTC 94 (2003mar) decided to leave it that way and to change UCD.html.
3136      * Therefore, do not show errors when testing the Hyphen property.
3137      */
3138     log_verbose("Starting with Unicode 4, inconsistencies with [:Hyphen:] are\n"
3139                 "known to the UTC and not considered errors.\n");
3140
3141     errorCode=U_ZERO_ERROR;
3142     set1=uset_openPattern(hyphenPattern, 10, &errorCode);
3143     set2=uset_openPattern(dashPattern, 8, &errorCode);
3144     if(U_SUCCESS(errorCode)) {
3145         /* remove the Katakana middle dot(s) from set1 */
3146         uset_remove(set1, 0x30fb);
3147         uset_remove(set1, 0xff65); /* halfwidth variant */
3148         showAMinusB(set1, set2, "[:Hyphen:]", "[:Dash:]", FALSE);
3149     } else {
3150         log_data_err("error opening [:Hyphen:] or [:Dash:] - %s (Are you missing data?)\n", u_errorName(errorCode));
3151     }
3152
3153     /* check that Cf is neither Hyphen nor Dash nor Alphabetic */
3154     set3=uset_openPattern(formatPattern, 6, &errorCode);
3155     set4=uset_openPattern(alphaPattern, 14, &errorCode);
3156     if(U_SUCCESS(errorCode)) {
3157         showAIntersectB(set3, set1, "[:Cf:]", "[:Hyphen:]", FALSE);
3158         showAIntersectB(set3, set2, "[:Cf:]", "[:Dash:]", TRUE);
3159         showAIntersectB(set3, set4, "[:Cf:]", "[:Alphabetic:]", TRUE);
3160     } else {
3161         log_data_err("error opening [:Cf:] or [:Alpbabetic:] - %s (Are you missing data?)\n", u_errorName(errorCode));
3162     }
3163
3164     uset_close(set1);
3165     uset_close(set2);
3166     uset_close(set3);
3167     uset_close(set4);
3168
3169     /*
3170      * Check that each lowercase character has "small" in its name
3171      * and not "capital".
3172      * There are some such characters, some of which seem odd.
3173      * Use the verbose flag to see these notices.
3174      */
3175     errorCode=U_ZERO_ERROR;
3176     set1=uset_openPattern(lowerPattern, 13, &errorCode);
3177     if(U_SUCCESS(errorCode)) {
3178         for(i=0;; ++i) {
3179             length=uset_getItem(set1, i, &start, &end, NULL, 0, &errorCode);
3180             if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
3181                 break; /* done */
3182             }
3183             if(U_FAILURE(errorCode)) {
3184                 log_err("error iterating over [:Lowercase:] at item %d: %s\n",
3185                         i, u_errorName(errorCode));
3186                 break;
3187             }
3188             if(length!=0) {
3189                 break; /* done with code points, got a string or -1 */
3190             }
3191
3192             while(start<=end) {
3193                 length=u_charName(start, U_UNICODE_CHAR_NAME, buffer, sizeof(buffer), &errorCode);
3194                 if(U_FAILURE(errorCode)) {
3195                     log_data_err("error getting the name of U+%04x - %s\n", start, u_errorName(errorCode));
3196                     errorCode=U_ZERO_ERROR;
3197                 }
3198                 if( (strstr(buffer, "SMALL")==NULL || strstr(buffer, "CAPITAL")!=NULL) &&
3199                     strstr(buffer, "SMALL CAPITAL")==NULL
3200                 ) {
3201                     log_verbose("info: [:Lowercase:] contains U+%04x whose name does not suggest lowercase: %s\n", start, buffer);
3202                 }
3203                 ++start;
3204             }
3205         }
3206     } else {
3207         log_data_err("error opening [:Lowercase:] - %s (Are you missing data?)\n", u_errorName(errorCode));
3208     }
3209     uset_close(set1);
3210
3211     /* verify that all assigned characters in Math blocks are exactly Math characters */
3212     errorCode=U_ZERO_ERROR;
3213     set1=uset_openPattern(mathBlocksPattern, -1, &errorCode);
3214     set2=uset_openPattern(mathPattern, 8, &errorCode);
3215     set3=uset_openPattern(unassignedPattern, 6, &errorCode);
3216     if(U_SUCCESS(errorCode)) {
3217         uset_retainAll(set2, set1); /* [math blocks]&[:Math:] */
3218         uset_complement(set3);      /* assigned characters */
3219         uset_retainAll(set1, set3); /* [math blocks]&[assigned] */
3220         compareUSets(set1, set2,
3221                      "[assigned Math block chars]", "[math blocks]&[:Math:]",
3222                      TRUE);
3223     } else {
3224         log_data_err("error opening [math blocks] or [:Math:] or [:Cn:] - %s (Are you missing data?)\n", u_errorName(errorCode));
3225     }
3226     uset_close(set1);
3227     uset_close(set2);
3228     uset_close(set3);
3229
3230     /* new in Unicode 5.0: exactly all unassigned+PUA+surrogate code points have script=Unknown */
3231     errorCode=U_ZERO_ERROR;
3232     set1=uset_openPattern(unknownPattern, 14, &errorCode);
3233     set2=uset_openPattern(reservedPattern, 20, &errorCode);
3234     if(U_SUCCESS(errorCode)) {
3235         compareUSets(set1, set2,
3236                      "[:sc=Unknown:]", "[[:Cn:][:Co:][:Cs:]]",
3237                      TRUE);
3238     } else {
3239         log_data_err("error opening [:sc=Unknown:] or [[:Cn:][:Co:][:Cs:]] - %s (Are you missing data?)\n", u_errorName(errorCode));
3240     }
3241     uset_close(set1);
3242     uset_close(set2);
3243 }
3244
3245 /*
3246  * Starting with ICU4C 3.4, the core Unicode properties files
3247  * (uprops.icu, ucase.icu, ubidi.icu, unorm.icu)
3248  * are hardcoded in the common DLL and therefore not included
3249  * in the data package any more.
3250  * Test requiring these files are disabled so that
3251  * we need not jump through hoops (like adding snapshots of these files
3252  * to testdata).
3253  * See Jitterbug 4497.
3254  */
3255 #define HARDCODED_DATA_4497 1
3256
3257 /* API coverage for ubidi_props.c */
3258 static void TestUBiDiProps() {
3259 #if !HARDCODED_DATA_4497
3260     UDataMemory *pData;
3261     UBiDiProps *bdp;
3262     const UBiDiProps *cbdp;
3263     UErrorCode errorCode;
3264
3265     /* coverage for ubidi_openBinary() */
3266     errorCode=U_ZERO_ERROR;
3267     pData=udata_open(NULL, UBIDI_DATA_TYPE, UBIDI_DATA_NAME, &errorCode);
3268     if(U_FAILURE(errorCode)) {
3269         log_data_err("unable to open " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE ": %s\n",
3270                     u_errorName(errorCode));
3271         return;
3272     }
3273
3274     bdp=ubidi_openBinary((const uint8_t *)pData->pHeader, -1, &errorCode);
3275     if(U_FAILURE(errorCode)) {
3276         log_err("ubidi_openBinary() fails for the contents of " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE ": %s\n",
3277                 u_errorName(errorCode));
3278         udata_close(pData);
3279         return;
3280     }
3281
3282     if(0x2215!=ubidi_getMirror(bdp, 0x29F5)) { /* verify some data */
3283         log_err("ubidi_openBinary() does not seem to return working UBiDiProps\n");
3284     }
3285
3286     ubidi_closeProps(bdp);
3287     udata_close(pData);
3288
3289     /* coverage for ubidi_getDummy() */
3290     errorCode=U_ZERO_ERROR;
3291     cbdp=ubidi_getDummy(&errorCode);
3292     if(ubidi_getClass(cbdp, 0x20)!=0) {
3293         log_err("ubidi_getClass(dummy, space)!=0\n");
3294     }
3295 #endif
3296 }
3297
3298 /* test case folding, compare return values with CaseFolding.txt ------------ */
3299
3300 /* bit set for which case foldings for a character have been tested already */
3301 enum {
3302     CF_SIMPLE=1,
3303     CF_FULL=2,
3304     CF_TURKIC=4,
3305     CF_ALL=7
3306 };
3307
3308 static void
3309 testFold(UChar32 c, int which,
3310          UChar32 simple, UChar32 turkic,
3311          const UChar *full, int32_t fullLength,
3312          const UChar *turkicFull, int32_t turkicFullLength) {
3313     UChar s[2], t[32];
3314     UChar32 c2;
3315     int32_t length, length2;
3316
3317     UErrorCode errorCode=U_ZERO_ERROR;
3318
3319     length=0;
3320     U16_APPEND_UNSAFE(s, length, c);
3321
3322     if((which&CF_SIMPLE)!=0 && (c2=u_foldCase(c, 0))!=simple) {
3323         log_err("u_foldCase(U+%04lx, default)=U+%04lx != U+%04lx\n", (long)c, (long)c2, (long)simple);
3324     }
3325     if((which&CF_FULL)!=0) {
3326         length2=u_strFoldCase(t, UPRV_LENGTHOF(t), s, length, 0, &errorCode);
3327         if(length2!=fullLength || 0!=u_memcmp(t, full, fullLength)) {
3328             log_err("u_strFoldCase(U+%04lx, default) does not fold properly\n", (long)c);
3329         }
3330     }
3331     if((which&CF_TURKIC)!=0) {
3332         if((c2=u_foldCase(c, U_FOLD_CASE_EXCLUDE_SPECIAL_I))!=turkic) {
3333             log_err("u_foldCase(U+%04lx, turkic)=U+%04lx != U+%04lx\n", (long)c, (long)c2, (long)simple);
3334         }
3335
3336         length2=u_strFoldCase(t, UPRV_LENGTHOF(t), s, length, U_FOLD_CASE_EXCLUDE_SPECIAL_I, &errorCode);
3337         if(length2!=turkicFullLength || 0!=u_memcmp(t, turkicFull, length2)) {
3338             log_err("u_strFoldCase(U+%04lx, turkic) does not fold properly\n", (long)c);
3339         }
3340     }
3341 }
3342
3343 /* test that c case-folds to itself */
3344 static void
3345 testFoldToSelf(UChar32 c, int which) {
3346     UChar s[2];
3347     int32_t length;
3348
3349     length=0;
3350     U16_APPEND_UNSAFE(s, length, c);
3351     testFold(c, which, c, c, s, length, s, length);
3352 }
3353
3354 struct CaseFoldingData {
3355     USet *notSeen;
3356     UChar32 prev, prevSimple;
3357     UChar prevFull[32];
3358     int32_t prevFullLength;
3359     int which;
3360 };
3361 typedef struct CaseFoldingData CaseFoldingData;
3362
3363 static void U_CALLCONV
3364 caseFoldingLineFn(void *context,
3365                   char *fields[][2], int32_t fieldCount,
3366                   UErrorCode *pErrorCode) {
3367     CaseFoldingData *pData=(CaseFoldingData *)context;
3368     char *end;
3369     UChar full[32];
3370     UChar32 c, prev, simple;
3371     int32_t count;
3372     int which;
3373     char status;
3374
3375     /* get code point */
3376     const char *s=u_skipWhitespace(fields[0][0]);
3377     if(0==strncmp(s, "0000..10FFFF", 12)) {
3378         /*
3379          * Ignore the line
3380          * # @missing: 0000..10FFFF; C; <code point>
3381          * because maps-to-self is already our default, and this line breaks this parser.
3382          */
3383         return;
3384     }
3385     c=(UChar32)strtoul(s, &end, 16);
3386     end=(char *)u_skipWhitespace(end);
3387     if(end<=fields[0][0] || end!=fields[0][1]) {
3388         log_err("syntax error in CaseFolding.txt field 0 at %s\n", fields[0][0]);
3389         *pErrorCode=U_PARSE_ERROR;
3390         return;
3391     }
3392
3393     /* get the status of this mapping */
3394     status=*u_skipWhitespace(fields[1][0]);
3395     if(status!='C' && status!='S' && status!='F' && status!='T') {
3396         log_err("unrecognized status field in CaseFolding.txt at %s\n", fields[0][0]);
3397         *pErrorCode=U_PARSE_ERROR;
3398         return;
3399     }
3400
3401     /* get the mapping */
3402     count=u_parseString(fields[2][0], full, 32, (uint32_t *)&simple, pErrorCode);
3403     if(U_FAILURE(*pErrorCode)) {
3404         log_err("error parsing CaseFolding.txt mapping at %s\n", fields[0][0]);
3405         return;
3406     }
3407
3408     /* there is a simple mapping only if there is exactly one code point (count is in UChars) */
3409     if(count==0 || count>2 || (count==2 && U16_IS_SINGLE(full[1]))) {
3410         simple=c;
3411     }
3412
3413     if(c!=(prev=pData->prev)) {
3414         /*
3415          * Test remaining mappings for the previous code point.
3416          * If a turkic folding was not mentioned, then it should fold the same
3417          * as the regular simple case folding.
3418          */
3419         UChar prevString[2];
3420         int32_t length;
3421
3422         length=0;
3423         U16_APPEND_UNSAFE(prevString, length, prev);
3424         testFold(prev, (~pData->which)&CF_ALL,
3425                  prev, pData->prevSimple,
3426                  prevString, length,
3427                  pData->prevFull, pData->prevFullLength);
3428         pData->prev=pData->prevSimple=c;
3429         length=0;
3430         U16_APPEND_UNSAFE(pData->prevFull, length, c);
3431         pData->prevFullLength=length;
3432         pData->which=0;
3433     }
3434
3435     /*
3436      * Turn the status into a bit set of case foldings to test.
3437      * Remember non-Turkic case foldings as defaults for Turkic mode.
3438      */
3439     switch(status) {
3440     case 'C':
3441         which=CF_SIMPLE|CF_FULL;
3442         pData->prevSimple=simple;
3443         u_memcpy(pData->prevFull, full, count);
3444         pData->prevFullLength=count;
3445         break;
3446     case 'S':
3447         which=CF_SIMPLE;
3448         pData->prevSimple=simple;
3449         break;
3450     case 'F':
3451         which=CF_FULL;
3452         u_memcpy(pData->prevFull, full, count);
3453         pData->prevFullLength=count;
3454         break;
3455     case 'T':
3456         which=CF_TURKIC;
3457         break;
3458     default:
3459         which=0;
3460         break; /* won't happen because of test above */
3461     }
3462
3463     testFold(c, which, simple, simple, full, count, full, count);
3464
3465     /* remember which case foldings of c have been tested */
3466     pData->which|=which;
3467
3468     /* remove c from the set of ones not mentioned in CaseFolding.txt */
3469     uset_remove(pData->notSeen, c);
3470 }
3471
3472 static void
3473 TestCaseFolding() {
3474     CaseFoldingData data={ NULL };
3475     char *fields[3][2];
3476     UErrorCode errorCode;
3477
3478     static char *lastLine= (char *)"10FFFF; C; 10FFFF;";
3479
3480     errorCode=U_ZERO_ERROR;
3481     /* test BMP & plane 1 - nothing interesting above */
3482     data.notSeen=uset_open(0, 0x1ffff);
3483     data.prevFullLength=1; /* length of full case folding of U+0000 */
3484
3485     parseUCDFile("CaseFolding.txt", fields, 3, caseFoldingLineFn, &data, &errorCode);
3486     if(U_SUCCESS(errorCode)) {
3487         int32_t i, start, end;
3488
3489         /* add a pseudo-last line to finish testing of the actual last one */
3490         fields[0][0]=lastLine;
3491         fields[0][1]=lastLine+6;
3492         fields[1][0]=lastLine+7;
3493         fields[1][1]=lastLine+9;
3494         fields[2][0]=lastLine+10;
3495         fields[2][1]=lastLine+17;
3496         caseFoldingLineFn(&data, fields, 3, &errorCode);
3497
3498         /* verify that all code points that are not mentioned in CaseFolding.txt fold to themselves */
3499         for(i=0;
3500             0==uset_getItem(data.notSeen, i, &start, &end, NULL, 0, &errorCode) &&
3501                 U_SUCCESS(errorCode);
3502             ++i
3503         ) {
3504             do {
3505                 testFoldToSelf(start, CF_ALL);
3506             } while(++start<=end);
3507         }
3508     }
3509
3510     uset_close(data.notSeen);
3511 }