X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/73c04bcfe1096173b00431f0cdc742894b15eef0..f3c0d7a59d99c2a94c6b8822291f0e42be3773c9:/icuSources/test/cintltst/cucdtst.c diff --git a/icuSources/test/cintltst/cucdtst.c b/icuSources/test/cintltst/cucdtst.c index 0aa15aff..15edead5 100644 --- a/icuSources/test/cintltst/cucdtst.c +++ b/icuSources/test/cintltst/cucdtst.c @@ -1,16 +1,18 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html /******************************************************************** * COPYRIGHT: - * Copyright (c) 1997-2006, International Business Machines Corporation and + * Copyright (c) 1997-2016, International Business Machines Corporation and * others. All Rights Reserved. ********************************************************************/ -/******************************************************************************** +/******************************************************************************* * * File CUCDTST.C * * Modification History: * Name Description * Madhu Katragadda Ported for C API, added tests for string functions -********************************************************************************* +******************************************************************************** */ #include @@ -22,6 +24,7 @@ #include "unicode/putil.h" #include "unicode/ustring.h" #include "unicode/uloc.h" +#include "unicode/unorm2.h" #include "cintltst.h" #include "putilimp.h" @@ -31,11 +34,9 @@ #include "uprops.h" #include "uset_imp.h" #include "usc_impl.h" -#include "unormimp.h" -#include "udatamem.h" /* for testing ucase_openBinary() */ +#include "udatamem.h" #include "cucdapi.h" - -#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) +#include "cmemory.h" /* prototypes --------------------------------------------------------------- */ @@ -50,15 +51,14 @@ static void TestCodeUnit(void); static void TestCodePoint(void); static void TestCharLength(void); static void TestCharNames(void); +static void TestUCharFromNameUnderflow(void); static void TestMirroring(void); -/* void TestUScriptCodeAPI(void);*/ /* defined in cucdapi.h */ static void TestUScriptRunAPI(void); static void TestAdditionalProperties(void); static void TestNumericProperties(void); static void TestPropertyNames(void); static void TestPropertyValues(void); static void TestConsistency(void); -static void TestUCase(void); static void TestUBiDiProps(void); static void TestCaseFolding(void); @@ -99,13 +99,12 @@ parseUCDFile(const char *filename, u_parseDelimitedFile(backupPath, ';', fields, fieldCount, lineFn, context, pErrorCode); } if(U_FAILURE(*pErrorCode)) { - log_err("error parsing %s: %s\n", filename, u_errorName(*pErrorCode)); + log_err_status(*pErrorCode, "error parsing %s: %s\n", filename, u_errorName(*pErrorCode)); } } /* test data ---------------------------------------------------------------- */ -static const UChar LAST_CHAR_CODE_IN_FILE = 0xFFFD; static const char tagStrings[] = "MnMcMeNdNlNoZsZlZpCcCfCsCoCnLuLlLtLmLoPcPdPsPePoSmScSkSoPiPf"; static const int32_t tagValues[] = { @@ -160,17 +159,23 @@ static const char dirStrings[][5] = { "RLO", "PDF", "NSM", - "BN" + "BN", + /* new in Unicode 6.3/ICU 52 */ + "FSI", + "LRI", + "RLI", + "PDI" }; void addUnicodeTest(TestNode** root); void addUnicodeTest(TestNode** root) { - addTest(root, &TestUnicodeData, "tsutil/cucdtst/TestUnicodeData"); addTest(root, &TestCodeUnit, "tsutil/cucdtst/TestCodeUnit"); addTest(root, &TestCodePoint, "tsutil/cucdtst/TestCodePoint"); addTest(root, &TestCharLength, "tsutil/cucdtst/TestCharLength"); + addTest(root, &TestBinaryValues, "tsutil/cucdtst/TestBinaryValues"); + addTest(root, &TestUnicodeData, "tsutil/cucdtst/TestUnicodeData"); addTest(root, &TestAdditionalProperties, "tsutil/cucdtst/TestAdditionalProperties"); addTest(root, &TestNumericProperties, "tsutil/cucdtst/TestNumericProperties"); addTest(root, &TestUpperLower, "tsutil/cucdtst/TestUpperLower"); @@ -180,13 +185,16 @@ void addUnicodeTest(TestNode** root) addTest(root, &TestControlPrint, "tsutil/cucdtst/TestControlPrint"); addTest(root, &TestIdentifier, "tsutil/cucdtst/TestIdentifier"); addTest(root, &TestCharNames, "tsutil/cucdtst/TestCharNames"); + addTest(root, &TestUCharFromNameUnderflow, "tsutil/cucdtst/TestUCharFromNameUnderflow"); addTest(root, &TestMirroring, "tsutil/cucdtst/TestMirroring"); addTest(root, &TestUScriptCodeAPI, "tsutil/cucdtst/TestUScriptCodeAPI"); + addTest(root, &TestHasScript, "tsutil/cucdtst/TestHasScript"); + addTest(root, &TestGetScriptExtensions, "tsutil/cucdtst/TestGetScriptExtensions"); + addTest(root, &TestScriptMetadataAPI, "tsutil/cucdtst/TestScriptMetadataAPI"); addTest(root, &TestUScriptRunAPI, "tsutil/cucdtst/TestUScriptRunAPI"); addTest(root, &TestPropertyNames, "tsutil/cucdtst/TestPropertyNames"); addTest(root, &TestPropertyValues, "tsutil/cucdtst/TestPropertyValues"); addTest(root, &TestConsistency, "tsutil/cucdtst/TestConsistency"); - addTest(root, &TestUCase, "tsutil/cucdtst/TestUCase"); addTest(root, &TestUBiDiProps, "tsutil/cucdtst/TestUBiDiProps"); addTest(root, &TestCaseFolding, "tsutil/cucdtst/TestCaseFolding"); } @@ -315,7 +323,7 @@ Checks LetterLike Symbols which were previously a source of confusion 0x1FFC, 0x1FFC, }; - int32_t num = sizeof(expected)/sizeof(expected[0]); + int32_t num = UPRV_LENGTHOF(expected); for(i=0; infkc; + if(value!=unorm2_getCombiningClass(nfkc, c)) { + log_err("error: unorm2_getCombiningClass(nfkc, U+%04lx)==%hu instead of %lu\n", c, unorm2_getCombiningClass(nfkc, c), value); + } #endif /* get BiDi category, field 4 */ *fields[4][1]=0; i=MakeDir(fields[4][0]); +#if U_ICU_VERSION_MAJOR_NUM!=59 + // TODO: Remove this version check, see ticket #13061. if(i!=u_charDirection(c) || i!=u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)) { log_err("error: u_charDirection(U+%04lx)==%u instead of %u (%s)\n", c, u_charDirection(c), MakeDir(fields[4][0]), fields[4][0]); } +#endif + + /* get Decomposition_Type & Decomposition_Mapping, field 5 */ + d=NULL; + if(fields[5][0]==fields[5][1]) { + /* no decomposition, except UnicodeData.txt omits Hangul syllable decompositions */ + if(c==0xac00 || c==0xd7a3) { + dt=U_DT_CANONICAL; + } else { + dt=U_DT_NONE; + } + } else { + d=fields[5][0]; + *fields[5][1]=0; + dt=UCHAR_INVALID_CODE; + if(*d=='<') { + end=strchr(++d, '>'); + if(end!=NULL) { + *end=0; + dt=u_getPropertyValueEnum(UCHAR_DECOMPOSITION_TYPE, d); + d=u_skipWhitespace(end+1); + } + } else { + dt=U_DT_CANONICAL; + } + } + if(dt>U_DT_NONE) { + if(c==0xac00) { + dm[0]=0x1100; + dm[1]=0x1161; + dm[2]=0; + dmLength=2; + } else if(c==0xd7a3) { + dm[0]=0xd788; + dm[1]=0x11c2; + dm[2]=0; + dmLength=2; + } else { + dmLength=u_parseString(d, dm, 32, NULL, pErrorCode); + } + } else { + dmLength=-1; + } + if(dt<0 || U_FAILURE(*pErrorCode)) { + log_err("error in UnicodeData.txt: syntax error in U+%04lX decomposition field\n", (long)c); + return; + } +#if !UCONFIG_NO_NORMALIZATION + i=u_getIntPropertyValue(c, UCHAR_DECOMPOSITION_TYPE); + if(i!=dt) { + log_err("error: u_getIntPropertyValue(U+%04lx, UCHAR_DECOMPOSITION_TYPE)==%d instead of %d\n", c, i, dt); + } + /* Expect Decomposition_Mapping=nfkc.getRawDecomposition(c). */ + length=unorm2_getRawDecomposition(nfkc, c, s, 32, pErrorCode); + if(U_FAILURE(*pErrorCode) || length!=dmLength || (length>0 && 0!=u_strcmp(s, dm))) { + log_err("error: unorm2_getRawDecomposition(nfkc, U+%04lx)==%d instead of %d " + "or the Decomposition_Mapping is different (%s)\n", + c, length, dmLength, u_errorName(*pErrorCode)); + return; + } + /* For canonical decompositions only, expect Decomposition_Mapping=nfc.getRawDecomposition(c). */ + if(dt!=U_DT_CANONICAL) { + dmLength=-1; + } + nfc=((UnicodeDataContext *)context)->nfc; + length=unorm2_getRawDecomposition(nfc, c, s, 32, pErrorCode); + if(U_FAILURE(*pErrorCode) || length!=dmLength || (length>0 && 0!=u_strcmp(s, dm))) { + log_err("error: unorm2_getRawDecomposition(nfc, U+%04lx)==%d instead of %d " + "or the Decomposition_Mapping is different (%s)\n", + c, length, dmLength, u_errorName(*pErrorCode)); + return; + } + /* recompose */ + if(dt==U_DT_CANONICAL && !u_hasBinaryProperty(c, UCHAR_FULL_COMPOSITION_EXCLUSION)) { + UChar32 a, b, composite; + i=0; + U16_NEXT(dm, i, dmLength, a); + U16_NEXT(dm, i, dmLength, b); + /* i==dmLength */ + composite=unorm2_composePair(nfc, a, b); + if(composite!=c) { + log_err("error: nfc U+%04lX decomposes to U+%04lX+U+%04lX but does not compose back (instead U+%04lX)\n", + (long)c, (long)a, (long)b, (long)composite); + } + /* + * Note: NFKC has fewer round-trip mappings than NFC, + * so we can't just test unorm2_composePair(nfkc, a, b) here without further data. + */ + } +#endif /* get ISO Comment, field 11 */ *fields[11][1]=0; i=u_getISOComment(c, buffer, sizeof(buffer), pErrorCode); if(U_FAILURE(*pErrorCode) || 0!=strcmp(fields[11][0], buffer)) { - log_err("error: u_getISOComment(U+%04lx) wrong (%s): \"%s\" should be \"%s\"\n", + log_err_status(*pErrorCode, "error: u_getISOComment(U+%04lx) wrong (%s): \"%s\" should be \"%s\"\n", c, u_errorName(*pErrorCode), U_FAILURE(*pErrorCode) ? buffer : "[error]", fields[11][0]); @@ -1115,7 +1192,7 @@ enumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory t return FALSE; } - count=LENGTHOF(test); + count=UPRV_LENGTHOF(test); for(i=0; i=18 && i<(int32_t)(sizeof(codePoint)/sizeof(codePoint[0]))){ + else if(i >=18 && i" }, {0xdc00, "", "", "" }, - {0xff08, "FULLWIDTH LEFT PARENTHESIS", "FULLWIDTH OPENING PARENTHESIS", "FULLWIDTH LEFT PARENTHESIS" }, + {0xff08, "FULLWIDTH LEFT PARENTHESIS", "", "FULLWIDTH LEFT PARENTHESIS" }, {0xffe5, "FULLWIDTH YEN SIGN", "", "FULLWIDTH YEN SIGN" }, {0xffff, "", "", "" }, + {0x1d0c5, "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS", "", + "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS", + "BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS"}, {0x23456, "CJK UNIFIED IDEOGRAPH-23456", "", "CJK UNIFIED IDEOGRAPH-23456" } }; @@ -1516,6 +1610,7 @@ enumCharNamesFn(void *context, UChar32 code, UCharNameChoice nameChoice, const char *name, int32_t length) { int32_t *pCount=(int32_t *)context; + const char *expected; int i; if(length<=0 || length!=(int32_t)strlen(name)) { @@ -1525,7 +1620,7 @@ enumCharNamesFn(void *context, } ++*pCount; - for(i=0; i0 && 0!=strcmp(name, expected)) || length!=(uint16_t)strlen(name)) { + log_err("u_charName(0x%lx - alias) gets %s length %ld instead of nothing or %s\n", + names[i].code, name, length, expected); + } + + /* find the Unicode character name alias if it is stored (length>0 means that we could read it) */ + if(expected[0]!=0 /* && length>0 */) { + c=u_charFromName(U_CHAR_NAME_ALIAS, expected, &errorCode); + if(U_FAILURE(errorCode)) { + log_err("u_charFromName(%s - alias) error %s\n", + expected, u_errorName(errorCode)); + return; + } + if(c!=(UChar32)names[i].code) { + log_err("u_charFromName(%s - alias) gets 0x%lx instead of 0x%lx\n", + expected, c, names[i].code); + } + } } /* test u_enumCharNames() */ @@ -1680,7 +1812,7 @@ TestCharNames() { } /* Test getCharNameCharacters */ - if(!QUICK) { + if(!getTestOption(QUICK_OPTION)) { enum { BUFSIZE = 256 }; UErrorCode ec = U_ZERO_ERROR; char buf[BUFSIZE]; @@ -1789,7 +1921,7 @@ TestCharNames() { if (!ok) { log_err("FAIL: uprv_getCharNameCharacters() returned %s, expected %s (too many lowercase a-z are ok)\n", aescstrdup(pat, l1), aescstrdup(dumbPat, l2)); - } else if(VERBOSITY) { + } else if(getTestOption(VERBOSITY_OPTION)) { log_verbose("Ok: uprv_getCharNameCharacters() returned %s\n", aescstrdup(pat, l1)); } @@ -1800,6 +1932,35 @@ TestCharNames() { /* ### TODO: test error cases and other interesting things */ } +static void +TestUCharFromNameUnderflow() { + // Ticket #10889: Underflow crash when there is no dash. + UErrorCode errorCode=U_ZERO_ERROR; + UChar32 c=u_charFromName(U_EXTENDED_CHAR_NAME, "", &errorCode); + if(U_SUCCESS(errorCode)) { + log_err("u_charFromName() = U+%04x but should fail - %s\n", c, u_errorName(errorCode)); + } + + // Test related edge cases. + errorCode=U_ZERO_ERROR; + c=u_charFromName(U_EXTENDED_CHAR_NAME, "<-00a0>", &errorCode); + if(U_SUCCESS(errorCode)) { + log_err("u_charFromName(<-00a0>) = U+%04x but should fail - %s\n", c, u_errorName(errorCode)); + } + + errorCode=U_ZERO_ERROR; + c=u_charFromName(U_EXTENDED_CHAR_NAME, "", &errorCode); + if(U_SUCCESS(errorCode)) { + log_err("u_charFromName() = U+%04x but should fail - %s\n", c, u_errorName(errorCode)); + } + + errorCode=U_ZERO_ERROR; + c=u_charFromName(U_EXTENDED_CHAR_NAME, "", &errorCode); + if(U_SUCCESS(errorCode)) { + log_err("u_charFromName() = U+%04x but should fail - %s\n", c, u_errorName(errorCode)); + } +} + /* test u_isMirrored() and u_charMirror() ----------------------------------- */ static void @@ -1825,7 +1986,9 @@ TestMirroring() { log_verbose("Testing u_charMirror()\n"); if(!(u_charMirror(0x3c)==0x3e && u_charMirror(0x5d)==0x5b && u_charMirror(0x208d)==0x208e && u_charMirror(0x3017)==0x3016 && u_charMirror(0xbb)==0xab && u_charMirror(0x2215)==0x29F5 && u_charMirror(0x29F5)==0x2215 && /* large delta between the code points */ - u_charMirror(0x2e)==0x2e && u_charMirror(0x6f3)==0x6f3 && u_charMirror(0x301c)==0x301c && u_charMirror(0xa4ab)==0xa4ab + u_charMirror(0x2e)==0x2e && u_charMirror(0x6f3)==0x6f3 && u_charMirror(0x301c)==0x301c && u_charMirror(0xa4ab)==0xa4ab && + /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */ + u_charMirror(0x2018)==0x2018 && u_charMirror(0x201b)==0x201b && u_charMirror(0x301d)==0x301d ) ) { log_err("u_charMirror() does not work correctly\n"); @@ -1836,7 +1999,7 @@ TestMirroring() { set=uset_openPattern(mirroredPattern, 17, &errorCode); if (U_FAILURE(errorCode)) { - log_data_err("uset_openPattern(mirroredPattern, 17, &errorCode) failed!"); + log_data_err("uset_openPattern(mirroredPattern, 17, &errorCode) failed!\n"); } else { for(i=0; 0==uset_getItem(set, i, &start, &end, NULL, 0, &errorCode); ++i) { do { @@ -1845,6 +2008,18 @@ TestMirroring() { if(c3!=start) { log_err("u_charMirror() does not roundtrip: U+%04lx->U+%04lx->U+%04lx\n", (long)start, (long)c2, (long)c3); } + c3=u_getBidiPairedBracket(start); + if(u_getIntPropertyValue(start, UCHAR_BIDI_PAIRED_BRACKET_TYPE)==U_BPT_NONE) { + if(c3!=start) { + log_err("u_getBidiPairedBracket(U+%04lx) != self for bpt(c)==None\n", + (long)start); + } + } else { + if(c3!=c2) { + log_err("u_getBidiPairedBracket(U+%04lx) != U+%04lx = bmg(c)'\n", + (long)start, (long)c2); + } + } } while(++start<=end); } } @@ -1924,11 +2099,11 @@ TestUScriptRunAPI() const RunTestData *testData; int32_t nRuns; } testDataEntries[] = { - {testData1, LENGTHOF(testData1)}, - {testData2, LENGTHOF(testData2)} + {testData1, UPRV_LENGTHOF(testData1)}, + {testData2, UPRV_LENGTHOF(testData2)} }; - static const int32_t nTestEntries = LENGTHOF(testDataEntries); + static const int32_t nTestEntries = UPRV_LENGTHOF(testDataEntries); int32_t testEntry; for (testEntry = 0; testEntry < nTestEntries; testEntry += 1) { @@ -2084,7 +2259,7 @@ TestAdditionalProperties() { }; /* test data for u_hasBinaryProperty() */ - static int32_t + static const int32_t props[][3]={ /* code point, property, value */ { 0x0627, UCHAR_ALPHABETIC, TRUE }, { 0x1034a, UCHAR_ALPHABETIC, TRUE }, @@ -2099,6 +2274,12 @@ TestAdditionalProperties() { { 0x003c, UCHAR_BIDI_MIRRORED, TRUE }, { 0x003d, UCHAR_BIDI_MIRRORED, FALSE }, + /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */ + { 0x2018, UCHAR_BIDI_MIRRORED, FALSE }, + { 0x201d, UCHAR_BIDI_MIRRORED, FALSE }, + { 0x201f, UCHAR_BIDI_MIRRORED, FALSE }, + { 0x301e, UCHAR_BIDI_MIRRORED, FALSE }, + { 0x058a, UCHAR_DASH, TRUE }, { 0x007e, UCHAR_DASH, FALSE }, @@ -2205,15 +2386,19 @@ TestAdditionalProperties() { { 0xfe02, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE }, { 0x1801, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, FALSE }, - { 0x0341, UCHAR_DEPRECATED, TRUE }, - { 0xe0041, UCHAR_DEPRECATED, FALSE }, + { 0x0149, UCHAR_DEPRECATED, TRUE }, /* changed in Unicode 5.2 */ + { 0x0341, UCHAR_DEPRECATED, FALSE }, /* changed in Unicode 5.2 */ + { 0xe0001, UCHAR_DEPRECATED, TRUE }, /* changed from Unicode 5 to 5.1 */ + { 0xe0100, UCHAR_DEPRECATED, FALSE }, { 0x00a0, UCHAR_GRAPHEME_BASE, TRUE }, { 0x0a4d, UCHAR_GRAPHEME_BASE, FALSE }, - { 0xff9f, UCHAR_GRAPHEME_BASE, TRUE }, /* changed from Unicode 3.2 to 4 */ + { 0xff9d, UCHAR_GRAPHEME_BASE, TRUE }, + { 0xff9f, UCHAR_GRAPHEME_BASE, FALSE }, /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */ { 0x0300, UCHAR_GRAPHEME_EXTEND, TRUE }, - { 0xff9f, UCHAR_GRAPHEME_EXTEND, FALSE }, /* changed from Unicode 3.2 to 4 */ + { 0xff9d, UCHAR_GRAPHEME_EXTEND, FALSE }, + { 0xff9f, UCHAR_GRAPHEME_EXTEND, TRUE }, /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */ { 0x0603, UCHAR_GRAPHEME_EXTEND, FALSE }, { 0x0a4d, UCHAR_GRAPHEME_LINK, TRUE }, @@ -2256,15 +2441,14 @@ TestAdditionalProperties() { { 0x05ed, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, { 0x07f2, UCHAR_BIDI_CLASS, U_DIR_NON_SPACING_MARK }, /* Nko, new in Unicode 5.0 */ { 0x07fe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, /* unassigned R */ - { 0x08ba, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, + { 0x089f, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, { 0xfb37, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, { 0xfb42, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, { 0x10806, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, { 0x10909, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, { 0x10fe4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, - { 0x0606, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC }, - { 0x061c, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC }, + { 0x061d, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC }, { 0x063f, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC }, { 0x070e, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC }, { 0x0775, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC }, @@ -2277,7 +2461,7 @@ TestAdditionalProperties() { { 0x155A, UCHAR_BLOCK, UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS }, { 0x1717, UCHAR_BLOCK, UBLOCK_TAGALOG }, { 0x1900, UCHAR_BLOCK, UBLOCK_LIMBU }, - { 0x1AFF, UCHAR_BLOCK, UBLOCK_NO_BLOCK }, + { 0x1CBF, UCHAR_BLOCK, UBLOCK_NO_BLOCK }, { 0x3040, UCHAR_BLOCK, UBLOCK_HIRAGANA }, { 0x1D0FF, UCHAR_BLOCK, UBLOCK_BYZANTINE_MUSICAL_SYMBOLS }, { 0x50000, UCHAR_BLOCK, UBLOCK_NO_BLOCK }, @@ -2316,14 +2500,14 @@ TestAdditionalProperties() { { 0x10EEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS }, /* UCHAR_GENERAL_CATEGORY tested for assigned characters in TestUnicodeData() */ - { 0xd7d7, UCHAR_GENERAL_CATEGORY, 0 }, + { 0xd7c7, UCHAR_GENERAL_CATEGORY, 0 }, + { 0xd7d7, UCHAR_GENERAL_CATEGORY, U_OTHER_LETTER }, /* changed in Unicode 5.2 */ { 0x0444, UCHAR_JOINING_GROUP, U_JG_NO_JOINING_GROUP }, { 0x0639, UCHAR_JOINING_GROUP, U_JG_AIN }, { 0x072A, UCHAR_JOINING_GROUP, U_JG_DALATH_RISH }, { 0x0647, UCHAR_JOINING_GROUP, U_JG_HEH }, { 0x06C1, UCHAR_JOINING_GROUP, U_JG_HEH_GOAL }, - { 0x06C3, UCHAR_JOINING_GROUP, U_JG_HAMZA_ON_HEH_GOAL }, { 0x200C, UCHAR_JOINING_TYPE, U_JT_NON_JOINING }, { 0x200D, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING }, @@ -2355,27 +2539,43 @@ TestAdditionalProperties() { /* UCHAR_SCRIPT tested in TestUScriptCodeAPI() */ + { 0x10ff, UCHAR_HANGUL_SYLLABLE_TYPE, 0 }, { 0x1100, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, { 0x1111, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, { 0x1159, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, + { 0x115a, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, /* changed in Unicode 5.2 */ + { 0x115e, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, /* changed in Unicode 5.2 */ { 0x115f, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, + { 0xa95f, UCHAR_HANGUL_SYLLABLE_TYPE, 0 }, + { 0xa960, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, /* changed in Unicode 5.2 */ + { 0xa97c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, /* changed in Unicode 5.2 */ + { 0xa97d, UCHAR_HANGUL_SYLLABLE_TYPE, 0 }, + { 0x1160, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, { 0x1161, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, { 0x1172, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, { 0x11a2, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, + { 0x11a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, /* changed in Unicode 5.2 */ + { 0x11a7, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, /* changed in Unicode 5.2 */ + + { 0xd7af, UCHAR_HANGUL_SYLLABLE_TYPE, 0 }, + { 0xd7b0, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, /* changed in Unicode 5.2 */ + { 0xd7c6, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, /* changed in Unicode 5.2 */ + { 0xd7c7, UCHAR_HANGUL_SYLLABLE_TYPE, 0 }, { 0x11a8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, { 0x11b8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, { 0x11c8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, { 0x11f9, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, + { 0x11fa, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, /* changed in Unicode 5.2 */ + { 0x11ff, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, /* changed in Unicode 5.2 */ + { 0x1200, UCHAR_HANGUL_SYLLABLE_TYPE, 0 }, - { 0x115a, UCHAR_HANGUL_SYLLABLE_TYPE, 0 }, - { 0x115e, UCHAR_HANGUL_SYLLABLE_TYPE, 0 }, - { 0x11a3, UCHAR_HANGUL_SYLLABLE_TYPE, 0 }, - { 0x11a7, UCHAR_HANGUL_SYLLABLE_TYPE, 0 }, - { 0x11fa, UCHAR_HANGUL_SYLLABLE_TYPE, 0 }, - { 0x11ff, UCHAR_HANGUL_SYLLABLE_TYPE, 0 }, + { 0xd7ca, UCHAR_HANGUL_SYLLABLE_TYPE, 0 }, + { 0xd7cb, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, /* changed in Unicode 5.2 */ + { 0xd7fb, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, /* changed in Unicode 5.2 */ + { 0xd7fc, UCHAR_HANGUL_SYLLABLE_TYPE, 0 }, { 0xac00, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE }, { 0xac1c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE }, @@ -2433,6 +2633,51 @@ TestAdditionalProperties() { { 0xff63, UCHAR_SENTENCE_BREAK, U_SB_CLOSE }, { 0x2028, UCHAR_SENTENCE_BREAK, U_SB_SEP }, + { -1, 0x520, 0 }, /* version break for Unicode 5.2 */ + + /* unassigned code points in new default Bidi R blocks */ + { 0x1ede4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, + { 0x1efe4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, + + /* test some script codes >127 */ + { 0xa6e6, UCHAR_SCRIPT, USCRIPT_BAMUM }, + { 0xa4d0, UCHAR_SCRIPT, USCRIPT_LISU }, + { 0x10a7f, UCHAR_SCRIPT, USCRIPT_OLD_SOUTH_ARABIAN }, + + { -1, 0x600, 0 }, /* version break for Unicode 6.0 */ + + /* value changed in Unicode 6.0 */ + { 0x06C3, UCHAR_JOINING_GROUP, U_JG_TEH_MARBUTA_GOAL }, + + { -1, 0x610, 0 }, /* version break for Unicode 6.1 */ + + /* unassigned code points in new/changed default Bidi AL blocks */ + { 0x08ba, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC }, + { 0x1eee4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC }, + + { -1, 0x630, 0 }, /* version break for Unicode 6.3 */ + + /* unassigned code points in the currency symbols block now default to ET */ + { 0x20C0, UCHAR_BIDI_CLASS, U_EUROPEAN_NUMBER_TERMINATOR }, + { 0x20CF, UCHAR_BIDI_CLASS, U_EUROPEAN_NUMBER_TERMINATOR }, + + /* new property in Unicode 6.3 */ + { 0x0027, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_NONE }, + { 0x0028, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_OPEN }, + { 0x0029, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_CLOSE }, + { 0xFF5C, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_NONE }, + { 0xFF5B, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_OPEN }, + { 0xFF5D, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_CLOSE }, + + { -1, 0x700, 0 }, /* version break for Unicode 7.0 */ + + /* new character range with Joining_Group values */ + { 0x10ABF, UCHAR_JOINING_GROUP, U_JG_NO_JOINING_GROUP }, + { 0x10AC0, UCHAR_JOINING_GROUP, U_JG_MANICHAEAN_ALEPH }, + { 0x10AC1, UCHAR_JOINING_GROUP, U_JG_MANICHAEAN_BETH }, + { 0x10AEF, UCHAR_JOINING_GROUP, U_JG_MANICHAEAN_HUNDRED }, + { 0x10AF0, UCHAR_JOINING_GROUP, U_JG_NO_JOINING_GROUP }, + /* undefined UProperty values */ { 0x61, 0x4a7, 0 }, { 0x234bc, 0x15ed, 0 } @@ -2455,7 +2700,7 @@ TestAdditionalProperties() { } /* test u_charAge() */ - for(i=0; i 10 && !atLeastSomething) { log_data_err("Never got anything after 10 tries.\nYour data is probably fried. Quitting this test\n", p, choice); @@ -2678,9 +2941,10 @@ TestPropertyNames(void) { } for (choice=0; ; ++choice) { - const char* name = u_getPropertyName(p, choice); + const char* name = u_getPropertyName(propEnum, (UPropertyNameChoice)choice); if (name) { - if (!sawProp) log_verbose("prop 0x%04x+%2d:", p&~0xfff, p&0xfff); + if (!sawProp) + log_verbose("prop 0x%04x+%2d:", p&~0xfff, p&0xfff); log_verbose("%d=\"%s\"", choice, name); sawProp = TRUE; atLeastSomething = TRUE; @@ -2696,7 +2960,7 @@ TestPropertyNames(void) { } if (sawProp) { /* looks like a valid property; check the values */ - const char* pname = u_getPropertyName(p, U_LONG_PROPERTY_NAME); + const char* pname = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME); int32_t max = 0; if (p == UCHAR_CANONICAL_COMBINING_CLASS) { max = 255; @@ -2712,14 +2976,14 @@ TestPropertyNames(void) { for (v=-1; ; ++v) { UBool sawValue = FALSE; for (choice=0; ; ++choice) { - const char* vname = u_getPropertyValueName(p, v, choice); + const char* vname = u_getPropertyValueName(propEnum, v, (UPropertyNameChoice)choice); if (vname) { if (!sawValue) log_verbose(" %s, value %d:", pname, v); log_verbose("%d=\"%s\"", choice, vname); sawValue = TRUE; /* test reverse mapping */ - rev = u_getPropertyValueEnum(p, vname); + rev = u_getPropertyValueEnum(propEnum, vname); if (rev != v) { log_err("Value round-trip failure (%s): %d -> %s -> %d\n", pname, v, vname, rev); @@ -2760,15 +3024,17 @@ TestPropertyValues(void) { /* Min should be 0 for everything. */ /* Until JB#2478 is fixed, the one exception is UCHAR_BLOCK. */ for (p=UCHAR_INT_START; p1 && buffer16[0]==0x49) { - uset_add(set2, start); - } - } - - compareUSets(set1, set2, - "[canon start set of 0049]", "[all c with canon decomp with 0049]", - TRUE); - } else { - log_err("error calling unorm_getCanonStartSet()\n"); - } - - uset_close(set1); - uset_close(set2); - -#endif - /* verify that all assigned characters in Math blocks are exactly Math characters */ errorCode=U_ZERO_ERROR; set1=uset_openPattern(mathBlocksPattern, -1, &errorCode); @@ -3004,7 +3214,7 @@ TestConsistency() { "[assigned Math block chars]", "[math blocks]&[:Math:]", TRUE); } else { - log_err("error opening [math blocks] or [:Math:] or [:Cn:] - %s\n", u_errorName(errorCode)); + log_data_err("error opening [math blocks] or [:Math:] or [:Cn:] - %s (Are you missing data?)\n", u_errorName(errorCode)); } uset_close(set1); uset_close(set2); @@ -3019,7 +3229,7 @@ TestConsistency() { "[:sc=Unknown:]", "[[:Cn:][:Co:][:Cs:]]", TRUE); } else { - log_err("error opening [:sc=Unknown:] or [[:Cn:][:Co:][:Cs:]] - %s\n", u_errorName(errorCode)); + log_data_err("error opening [:sc=Unknown:] or [[:Cn:][:Co:][:Cs:]] - %s (Are you missing data?)\n", u_errorName(errorCode)); } uset_close(set1); uset_close(set2); @@ -3037,59 +3247,14 @@ TestConsistency() { */ #define HARDCODED_DATA_4497 1 -/* API coverage for ucase.c */ -static void TestUCase() { -#if !HARDCODED_DATA_4497 - UDataMemory *pData; - UCaseProps *csp; -#endif - const UCaseProps *ccsp; - UErrorCode errorCode; - -#if !HARDCODED_DATA_4497 - /* coverage for ucase_openBinary() */ - errorCode=U_ZERO_ERROR; - pData=udata_open(NULL, UCASE_DATA_TYPE, UCASE_DATA_NAME, &errorCode); - if(U_FAILURE(errorCode)) { - log_data_err("unable to open " UCASE_DATA_NAME "." UCASE_DATA_TYPE ": %s\n", - u_errorName(errorCode)); - return; - } - - csp=ucase_openBinary((const uint8_t *)pData->pHeader, -1, &errorCode); - if(U_FAILURE(errorCode)) { - log_err("ucase_openBinary() fails for the contents of " UCASE_DATA_NAME "." UCASE_DATA_TYPE ": %s\n", - u_errorName(errorCode)); - udata_close(pData); - return; - } - - if(UCASE_LOWER!=ucase_getType(csp, 0xdf)) { /* verify islower(sharp s) */ - log_err("ucase_openBinary() does not seem to return working UCaseProps\n"); - } - - ucase_close(csp); - udata_close(pData); -#endif - - /* coverage for ucase_getDummy() */ - errorCode=U_ZERO_ERROR; - ccsp=ucase_getDummy(&errorCode); - if(ucase_tolower(ccsp, 0x41)!=0x41) { - log_err("ucase_tolower(dummy, A)!=A\n"); - } -} - /* API coverage for ubidi_props.c */ static void TestUBiDiProps() { #if !HARDCODED_DATA_4497 UDataMemory *pData; UBiDiProps *bdp; -#endif const UBiDiProps *cbdp; UErrorCode errorCode; -#if !HARDCODED_DATA_4497 /* coverage for ubidi_openBinary() */ errorCode=U_ZERO_ERROR; pData=udata_open(NULL, UBIDI_DATA_TYPE, UBIDI_DATA_NAME, &errorCode); @@ -3113,7 +3278,6 @@ static void TestUBiDiProps() { ubidi_closeProps(bdp); udata_close(pData); -#endif /* coverage for ubidi_getDummy() */ errorCode=U_ZERO_ERROR; @@ -3121,6 +3285,7 @@ static void TestUBiDiProps() { if(ubidi_getClass(cbdp, 0x20)!=0) { log_err("ubidi_getClass(dummy, space)!=0\n"); } +#endif } /* test case folding, compare return values with CaseFolding.txt ------------ */ @@ -3151,7 +3316,7 @@ testFold(UChar32 c, int which, log_err("u_foldCase(U+%04lx, default)=U+%04lx != U+%04lx\n", (long)c, (long)c2, (long)simple); } if((which&CF_FULL)!=0) { - length2=u_strFoldCase(t, LENGTHOF(t), s, length, 0, &errorCode); + length2=u_strFoldCase(t, UPRV_LENGTHOF(t), s, length, 0, &errorCode); if(length2!=fullLength || 0!=u_memcmp(t, full, fullLength)) { log_err("u_strFoldCase(U+%04lx, default) does not fold properly\n", (long)c); } @@ -3161,7 +3326,7 @@ testFold(UChar32 c, int which, log_err("u_foldCase(U+%04lx, turkic)=U+%04lx != U+%04lx\n", (long)c, (long)c2, (long)simple); } - length2=u_strFoldCase(t, LENGTHOF(t), s, length, U_FOLD_CASE_EXCLUDE_SPECIAL_I, &errorCode); + length2=u_strFoldCase(t, UPRV_LENGTHOF(t), s, length, U_FOLD_CASE_EXCLUDE_SPECIAL_I, &errorCode); if(length2!=turkicFullLength || 0!=u_memcmp(t, turkicFull, length2)) { log_err("u_strFoldCase(U+%04lx, turkic) does not fold properly\n", (long)c); } @@ -3201,7 +3366,16 @@ caseFoldingLineFn(void *context, char status; /* get code point */ - c=(UChar32)strtoul(u_skipWhitespace(fields[0][0]), &end, 16); + const char *s=u_skipWhitespace(fields[0][0]); + if(0==strncmp(s, "0000..10FFFF", 12)) { + /* + * Ignore the line + * # @missing: 0000..10FFFF; C; + * because maps-to-self is already our default, and this line breaks this parser. + */ + return; + } + c=(UChar32)strtoul(s, &end, 16); end=(char *)u_skipWhitespace(end); if(end<=fields[0][0] || end!=fields[0][1]) { log_err("syntax error in CaseFolding.txt field 0 at %s\n", fields[0][0]); @@ -3235,14 +3409,14 @@ caseFoldingLineFn(void *context, * If a turkic folding was not mentioned, then it should fold the same * as the regular simple case folding. */ - UChar s[2]; + UChar prevString[2]; int32_t length; length=0; - U16_APPEND_UNSAFE(s, length, prev); + U16_APPEND_UNSAFE(prevString, length, prev); testFold(prev, (~pData->which)&CF_ALL, prev, pData->prevSimple, - s, length, + prevString, length, pData->prevFull, pData->prevFullLength); pData->prev=pData->prevSimple=c; length=0;