X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/374ca955a76ecab1204ca8bfa63ff9238d998416..7393aa2fd2c40c89f12c2db881842a563afdb429:/icuSources/test/cintltst/cucdtst.c diff --git a/icuSources/test/cintltst/cucdtst.c b/icuSources/test/cintltst/cucdtst.c index 21077a47..3fead476 100644 --- a/icuSources/test/cintltst/cucdtst.c +++ b/icuSources/test/cintltst/cucdtst.c @@ -1,16 +1,16 @@ /******************************************************************** * COPYRIGHT: - * Copyright (c) 1997-2004, International Business Machines Corporation and + * Copyright (c) 1997-2008, International Business Machines Corporation and * others. All Rights Reserved. ********************************************************************/ -/******************************************************************************** +/******************************************************************************* * * File CUCDTST.C * * Modification History: * Name Description * Madhu Katragadda Ported for C API, added tests for string functions -********************************************************************************* +******************************************************************************** */ #include @@ -27,6 +27,7 @@ #include "putilimp.h" #include "uparse.h" #include "ucase.h" +#include "ubidi_props.h" #include "uprops.h" #include "uset_imp.h" #include "usc_impl.h" @@ -50,7 +51,6 @@ static void TestCodePoint(void); static void TestCharLength(void); static void TestCharNames(void); static void TestMirroring(void); -/* void TestUScriptCodeAPI(void);*/ /* defined in cucdapi.h */ static void TestUScriptRunAPI(void); static void TestAdditionalProperties(void); static void TestNumericProperties(void); @@ -58,11 +58,50 @@ static void TestPropertyNames(void); static void TestPropertyValues(void); static void TestConsistency(void); static void TestUCase(void); +static void TestUBiDiProps(void); +static void TestCaseFolding(void); /* internal methods used */ static int32_t MakeProp(char* str); static int32_t MakeDir(char* str); +/* helpers ------------------------------------------------------------------ */ + +static void +parseUCDFile(const char *filename, + char *fields[][2], int32_t fieldCount, + UParseLineFn *lineFn, void *context, + UErrorCode *pErrorCode) { + char path[256]; + char backupPath[256]; + + if(U_FAILURE(*pErrorCode)) { + return; + } + + /* Look inside ICU_DATA first */ + strcpy(path, u_getDataDirectory()); + strcat(path, ".." U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING); + strcat(path, filename); + + /* As a fallback, try to guess where the source data was located + * at the time ICU was built, and look there. + */ + strcpy(backupPath, ctest_dataSrcDir()); + strcat(backupPath, U_FILE_SEP_STRING); + strcat(backupPath, "unidata" U_FILE_SEP_STRING); + strcat(backupPath, filename); + + u_parseDelimitedFile(path, ';', fields, fieldCount, lineFn, context, pErrorCode); + if(*pErrorCode==U_FILE_ACCESS_ERROR) { + *pErrorCode=U_ZERO_ERROR; + u_parseDelimitedFile(backupPath, ';', fields, fieldCount, lineFn, context, pErrorCode); + } + if(U_FAILURE(*pErrorCode)) { + log_err("error parsing %s: %s\n", filename, u_errorName(*pErrorCode)); + } +} + /* test data ---------------------------------------------------------------- */ static const UChar LAST_CHAR_CODE_IN_FILE = 0xFFFD; @@ -127,10 +166,11 @@ void addUnicodeTest(TestNode** root); void addUnicodeTest(TestNode** root) { - addTest(root, &TestUnicodeData, "tsutil/cucdtst/TestUnicodeData"); addTest(root, &TestCodeUnit, "tsutil/cucdtst/TestCodeUnit"); addTest(root, &TestCodePoint, "tsutil/cucdtst/TestCodePoint"); addTest(root, &TestCharLength, "tsutil/cucdtst/TestCharLength"); + addTest(root, &TestBinaryValues, "tsutil/cucdtst/TestBinaryValues"); + addTest(root, &TestUnicodeData, "tsutil/cucdtst/TestUnicodeData"); addTest(root, &TestAdditionalProperties, "tsutil/cucdtst/TestAdditionalProperties"); addTest(root, &TestNumericProperties, "tsutil/cucdtst/TestNumericProperties"); addTest(root, &TestUpperLower, "tsutil/cucdtst/TestUpperLower"); @@ -147,6 +187,8 @@ void addUnicodeTest(TestNode** root) addTest(root, &TestPropertyValues, "tsutil/cucdtst/TestPropertyValues"); addTest(root, &TestConsistency, "tsutil/cucdtst/TestConsistency"); addTest(root, &TestUCase, "tsutil/cucdtst/TestUCase"); + addTest(root, &TestUBiDiProps, "tsutil/cucdtst/TestUBiDiProps"); + addTest(root, &TestCaseFolding, "tsutil/cucdtst/TestCaseFolding"); } /*==================================================== */ @@ -169,7 +211,8 @@ Checks LetterLike Symbols which were previously a source of confusion */ for (i=0x2100;i<0x2138;i++) { - if(i!=0x2126 && i!=0x212a && i!=0x212b) + /* Unicode 5.0 adds lowercase U+214E (TURNED SMALL F) to U+2132 (TURNED CAPITAL F) */ + if(i!=0x2126 && i!=0x212a && i!=0x212b && i!=0x2132) { if (i != (int)u_tolower(i)) /* itself */ log_err("Failed case conversion with itself: U+%04x\n", i); @@ -282,57 +325,77 @@ Checks LetterLike Symbols which were previously a source of confusion } } -/* compare two sets, which is not easy with the current (ICU 2.4) C API... */ - +/* compare two sets and verify that their difference or intersection is empty */ static UBool showADiffB(const USet *a, const USet *b, const char *a_name, const char *b_name, UBool expect, UBool diffIsError) { + USet *aa; int32_t i, start, end, length; - UBool equal; UErrorCode errorCode; + /* + * expect: + * TRUE -> a-b should be empty, that is, b should contain all of a + * FALSE -> a&b should be empty, that is, a should contain none of b (and vice versa) + */ + if(expect ? uset_containsAll(b, a) : uset_containsNone(a, b)) { + return TRUE; + } + + /* clone a to aa because a is const */ + aa=uset_open(1, 0); + if(aa==NULL) { + /* unusual problem - out of memory? */ + return FALSE; + } + uset_addAll(aa, a); + + /* compute the set in question */ + if(expect) { + /* a-b */ + uset_removeAll(aa, b); + } else { + /* a&b */ + uset_retainAll(aa, b); + } + + /* aa is not empty because of the initial tests above; show its contents */ errorCode=U_ZERO_ERROR; - equal=TRUE; i=0; for(;;) { - length=uset_getItem(a, i, &start, &end, NULL, 0, &errorCode); + length=uset_getItem(aa, i, &start, &end, NULL, 0, &errorCode); if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) { - return equal; /* done */ + break; /* done */ } if(U_FAILURE(errorCode)) { - log_err("error comparing %s with %s at item %d: %s\n", + log_err("error comparing %s with %s at difference item %d: %s\n", a_name, b_name, i, u_errorName(errorCode)); - return FALSE; + break; } if(length!=0) { - return equal; /* done with code points, got a string or -1 */ + break; /* done with code points, got a string or -1 */ } - if(expect!=uset_containsRange(b, start, end)) { - equal=FALSE; - while(start<=end) { - if(expect!=uset_contains(b, start)) { - if(diffIsError) { - if(expect) { - log_err("error: %s contains U+%04x but %s does not\n", a_name, start, b_name); - } else { - log_err("error: %s and %s both contain U+%04x but should not intersect\n", a_name, b_name, start); - } - } else { - if(expect) { - log_verbose("info: %s contains U+%04x but %s does not\n", a_name, start, b_name); - } else { - log_verbose("info: %s and %s both contain U+%04x but should not intersect\n", a_name, b_name, start); - } - } - } - ++start; + if(diffIsError) { + if(expect) { + log_err("error: %s contains U+%04x..U+%04x but %s does not\n", a_name, start, end, b_name); + } else { + log_err("error: %s and %s both contain U+%04x..U+%04x but should not intersect\n", a_name, b_name, start, end); + } + } else { + if(expect) { + log_verbose("info: %s contains U+%04x..U+%04x but %s does not\n", a_name, start, end, b_name); + } else { + log_verbose("info: %s and %s both contain U+%04x..U+%04x but should not intersect\n", a_name, b_name, start, end); } } ++i; } + + uset_close(aa); + return FALSE; } static UBool @@ -353,8 +416,12 @@ static UBool compareUSets(const USet *a, const USet *b, const char *a_name, const char *b_name, UBool diffIsError) { + /* + * Use an arithmetic & not a logical && so that both branches + * are always taken and all differences are shown. + */ return - showAMinusB(a, b, a_name, b_name, diffIsError) && + showAMinusB(a, b, a_name, b_name, diffIsError) & showAMinusB(b, a, b_name, a_name, diffIsError); } @@ -1157,8 +1224,6 @@ enumDefaultsRange(const void *context, UChar32 start, UChar32 limit, UCharCatego /* tests for several properties */ static void TestUnicodeData() { - char newPath[256]; - char backupPath[256]; UVersionInfo expectVersionArray; UVersionInfo versionArray; char *fields[15][2]; @@ -1166,17 +1231,6 @@ static void TestUnicodeData() UChar32 c; int8_t type; - /* Look inside ICU_DATA first */ - strcpy(newPath, u_getDataDirectory()); - strcat(newPath, ".." U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING "UnicodeData.txt"); - - /* As a fallback, try to guess where the source data was located - * at the time ICU was built, and look there. - */ - strcpy(backupPath, ctest_dataSrcDir()); - strcat(backupPath, U_FILE_SEP_STRING); - strcat(backupPath, "unidata" U_FILE_SEP_STRING "UnicodeData.txt"); - u_versionFromString(expectVersionArray, U_UNICODE_VERSION); u_getUnicodeVersion(versionArray); if(memcmp(versionArray, expectVersionArray, U_MAX_VERSION_LENGTH) != 0) @@ -1198,13 +1252,8 @@ static void TestUnicodeData() } errorCode=U_ZERO_ERROR; - u_parseDelimitedFile(newPath, ';', fields, 15, unicodeDataLineFn, NULL, &errorCode); - if(errorCode==U_FILE_ACCESS_ERROR) { - errorCode=U_ZERO_ERROR; - u_parseDelimitedFile(backupPath, ';', fields, 15, unicodeDataLineFn, NULL, &errorCode); - } + parseUCDFile("UnicodeData.txt", fields, 15, unicodeDataLineFn, NULL, &errorCode); if(U_FAILURE(errorCode)) { - log_err("error parsing UnicodeData.txt: %s\n", u_errorName(errorCode)); return; /* if we couldn't parse UnicodeData.txt, we should return */ } @@ -1656,7 +1705,8 @@ TestCharNames() { NULL, uset_add, uset_addRange, - uset_addString + uset_addString, + NULL /* don't need remove() */ }; sa.set=set; uprv_getCharNameCharacters(&sa); @@ -1754,6 +1804,16 @@ TestCharNames() { static void TestMirroring() { + USet *set; + UErrorCode errorCode; + + UChar32 start, end, c2, c3; + int32_t i; + + U_STRING_DECL(mirroredPattern, "[:Bidi_Mirrored:]", 17); + + U_STRING_INIT(mirroredPattern, "[:Bidi_Mirrored:]", 17); + log_verbose("Testing u_isMirrored()\n"); if(!(u_isMirrored(0x28) && u_isMirrored(0xbb) && u_isMirrored(0x2045) && u_isMirrored(0x232a) && !u_isMirrored(0x27) && !u_isMirrored(0x61) && !u_isMirrored(0x284) && !u_isMirrored(0x3400) @@ -1764,11 +1824,34 @@ TestMirroring() { log_verbose("Testing u_charMirror()\n"); if(!(u_charMirror(0x3c)==0x3e && u_charMirror(0x5d)==0x5b && u_charMirror(0x208d)==0x208e && u_charMirror(0x3017)==0x3016 && - u_charMirror(0x2e)==0x2e && u_charMirror(0x6f3)==0x6f3 && u_charMirror(0x301c)==0x301c && u_charMirror(0xa4ab)==0xa4ab + u_charMirror(0xbb)==0xab && u_charMirror(0x2215)==0x29F5 && u_charMirror(0x29F5)==0x2215 && /* large delta between the code points */ + u_charMirror(0x2e)==0x2e && u_charMirror(0x6f3)==0x6f3 && u_charMirror(0x301c)==0x301c && u_charMirror(0xa4ab)==0xa4ab && + /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */ + u_charMirror(0x2018)==0x2018 && u_charMirror(0x201b)==0x201b && u_charMirror(0x301d)==0x301d ) ) { log_err("u_charMirror() does not work correctly\n"); } + + /* verify that Bidi_Mirroring_Glyph roundtrips */ + errorCode=U_ZERO_ERROR; + set=uset_openPattern(mirroredPattern, 17, &errorCode); + + if (U_FAILURE(errorCode)) { + log_data_err("uset_openPattern(mirroredPattern, 17, &errorCode) failed!"); + } else { + for(i=0; 0==uset_getItem(set, i, &start, &end, NULL, 0, &errorCode); ++i) { + do { + c2=u_charMirror(start); + c3=u_charMirror(c2); + if(c3!=start) { + log_err("u_charMirror() does not roundtrip: U+%04lx->U+%04lx->U+%04lx\n", (long)start, (long)c2, (long)c3); + } + } while(++start<=end); + } + } + + uset_close(set); } @@ -2003,7 +2086,7 @@ TestAdditionalProperties() { }; /* test data for u_hasBinaryProperty() */ - static int32_t + static const int32_t props[][3]={ /* code point, property, value */ { 0x0627, UCHAR_ALPHABETIC, TRUE }, { 0x1034a, UCHAR_ALPHABETIC, TRUE }, @@ -2018,6 +2101,12 @@ TestAdditionalProperties() { { 0x003c, UCHAR_BIDI_MIRRORED, TRUE }, { 0x003d, UCHAR_BIDI_MIRRORED, FALSE }, + /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */ + { 0x2018, UCHAR_BIDI_MIRRORED, FALSE }, + { 0x201d, UCHAR_BIDI_MIRRORED, FALSE }, + { 0x201f, UCHAR_BIDI_MIRRORED, FALSE }, + { 0x301e, UCHAR_BIDI_MIRRORED, FALSE }, + { 0x058a, UCHAR_DASH, TRUE }, { 0x007e, UCHAR_DASH, FALSE }, @@ -2125,14 +2214,17 @@ TestAdditionalProperties() { { 0x1801, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, FALSE }, { 0x0341, UCHAR_DEPRECATED, TRUE }, - { 0xe0041, UCHAR_DEPRECATED, FALSE }, + { 0xe0041, UCHAR_DEPRECATED, TRUE }, /* changed from Unicode 5 to 5.1 */ + { 0xe0100, UCHAR_DEPRECATED, FALSE }, { 0x00a0, UCHAR_GRAPHEME_BASE, TRUE }, { 0x0a4d, UCHAR_GRAPHEME_BASE, FALSE }, - { 0xff9f, UCHAR_GRAPHEME_BASE, TRUE }, /* changed from Unicode 3.2 to 4 */ + { 0xff9d, UCHAR_GRAPHEME_BASE, TRUE }, + { 0xff9f, UCHAR_GRAPHEME_BASE, FALSE }, /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */ { 0x0300, UCHAR_GRAPHEME_EXTEND, TRUE }, - { 0xff9f, UCHAR_GRAPHEME_EXTEND, FALSE }, /* changed from Unicode 3.2 to 4 */ + { 0xff9d, UCHAR_GRAPHEME_EXTEND, FALSE }, + { 0xff9f, UCHAR_GRAPHEME_EXTEND, TRUE }, /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */ { 0x0603, UCHAR_GRAPHEME_EXTEND, FALSE }, { 0x0a4d, UCHAR_GRAPHEME_LINK, TRUE }, @@ -2156,7 +2248,7 @@ TestAdditionalProperties() { { 0xfa11, UCHAR_UNIFIED_IDEOGRAPH, TRUE }, { 0xfa12, UCHAR_UNIFIED_IDEOGRAPH, FALSE }, - { -1, 0x401, 0 }, + { -1, 0x401, 0 }, /* version break for Unicode 4.0.1 */ { 0x002e, UCHAR_S_TERM, TRUE }, { 0x0061, UCHAR_S_TERM, FALSE }, @@ -2171,9 +2263,10 @@ TestAdditionalProperties() { /* UCHAR_BIDI_CLASS tested for assigned characters in TestUnicodeData() */ /* test default Bidi classes for unassigned code points */ { 0x0590, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, - { 0x05a2, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, + { 0x05cf, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, { 0x05ed, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, - { 0x07f2, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, + { 0x07f2, UCHAR_BIDI_CLASS, U_DIR_NON_SPACING_MARK }, /* Nko, new in Unicode 5.0 */ + { 0x07fe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, /* unassigned R */ { 0x08ba, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, { 0xfb37, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, { 0xfb42, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, @@ -2181,7 +2274,7 @@ TestAdditionalProperties() { { 0x10909, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, { 0x10fe4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, - { 0x0606, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC }, + { 0x0605, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC }, { 0x061c, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC }, { 0x063f, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC }, { 0x070e, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC }, @@ -2259,7 +2352,6 @@ TestAdditionalProperties() { { 0x232A, UCHAR_LINE_BREAK, U_LB_CLOSE_PUNCTUATION }, { 0x3401, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC }, { 0x4e02, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC }, - { 0xac03, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC }, { 0x20004, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC }, { 0xf905, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC }, { 0xdb7e, UCHAR_LINE_BREAK, U_LB_SURROGATE }, @@ -2309,6 +2401,49 @@ TestAdditionalProperties() { { 0xd7a4, UCHAR_HANGUL_SYLLABLE_TYPE, 0 }, + { -1, 0x410, 0 }, /* version break for Unicode 4.1 */ + + { 0x00d7, UCHAR_PATTERN_SYNTAX, TRUE }, + { 0xfe45, UCHAR_PATTERN_SYNTAX, TRUE }, + { 0x0061, UCHAR_PATTERN_SYNTAX, FALSE }, + + { 0x0020, UCHAR_PATTERN_WHITE_SPACE, TRUE }, + { 0x0085, UCHAR_PATTERN_WHITE_SPACE, TRUE }, + { 0x200f, UCHAR_PATTERN_WHITE_SPACE, TRUE }, + { 0x00a0, UCHAR_PATTERN_WHITE_SPACE, FALSE }, + { 0x3000, UCHAR_PATTERN_WHITE_SPACE, FALSE }, + + { 0x1d200, UCHAR_BLOCK, UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION }, + { 0x2c8e, UCHAR_BLOCK, UBLOCK_COPTIC }, + { 0xfe17, UCHAR_BLOCK, UBLOCK_VERTICAL_FORMS }, + + { 0x1a00, UCHAR_SCRIPT, USCRIPT_BUGINESE }, + { 0x2cea, UCHAR_SCRIPT, USCRIPT_COPTIC }, + { 0xa82b, UCHAR_SCRIPT, USCRIPT_SYLOTI_NAGRI }, + { 0x103d0, UCHAR_SCRIPT, USCRIPT_OLD_PERSIAN }, + + { 0xcc28, UCHAR_LINE_BREAK, U_LB_H2 }, + { 0xcc29, UCHAR_LINE_BREAK, U_LB_H3 }, + { 0xac03, UCHAR_LINE_BREAK, U_LB_H3 }, + { 0x115f, UCHAR_LINE_BREAK, U_LB_JL }, + { 0x11aa, UCHAR_LINE_BREAK, U_LB_JT }, + { 0x11a1, UCHAR_LINE_BREAK, U_LB_JV }, + + { 0xb2c9, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_LVT }, + { 0x036f, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_EXTEND }, + { 0x0000, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_CONTROL }, + { 0x1160, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_V }, + + { 0x05f4, UCHAR_WORD_BREAK, U_WB_MIDLETTER }, + { 0x4ef0, UCHAR_WORD_BREAK, U_WB_OTHER }, + { 0x19d9, UCHAR_WORD_BREAK, U_WB_NUMERIC }, + { 0x2044, UCHAR_WORD_BREAK, U_WB_MIDNUM }, + + { 0xfffd, UCHAR_SENTENCE_BREAK, U_SB_OTHER }, + { 0x1ffc, UCHAR_SENTENCE_BREAK, U_SB_UPPER }, + { 0xff63, UCHAR_SENTENCE_BREAK, U_SB_CLOSE }, + { 0x2028, UCHAR_SENTENCE_BREAK, U_SB_SEP }, + /* undefined UProperty values */ { 0x61, 0x4a7, 0 }, { 0x234bc, 0x15ed, 0 } @@ -2349,21 +2484,60 @@ TestAdditionalProperties() { ) { log_err("error: u_getIntPropertyMinValue() wrong\n"); } - - if( u_getIntPropertyMaxValue(UCHAR_DASH)!=1 || - u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE)!=1 || - u_getIntPropertyMaxValue(UCHAR_BINARY_LIMIT-1)!=1 || - u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)!=(int32_t)U_CHAR_DIRECTION_COUNT-1 || - u_getIntPropertyMaxValue(UCHAR_BLOCK)!=(int32_t)UBLOCK_COUNT-1 || - u_getIntPropertyMaxValue(UCHAR_LINE_BREAK)!=(int32_t)U_LB_COUNT-1 || - u_getIntPropertyMaxValue(UCHAR_SCRIPT)!=(int32_t)USCRIPT_CODE_LIMIT-1 || - u_getIntPropertyMaxValue(0x2345)!=-1 /*JB#2410*/ || - u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) != (int32_t) (U_DT_COUNT - 1) || - u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) != (int32_t) (U_JG_COUNT -1) || - u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) != (int32_t) (U_JT_COUNT -1) || - u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) != (int32_t) (U_EA_COUNT -1) - ) { - log_err("error: u_getIntPropertyMaxValue() wrong\n"); + if( u_getIntPropertyMaxValue(UCHAR_DASH)!=1) { + log_err("error: u_getIntPropertyMaxValue(UCHAR_DASH) wrong\n"); + } + if( u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE)!=1) { + log_err("error: u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE) wrong\n"); + } + if( u_getIntPropertyMaxValue((UProperty)(UCHAR_BINARY_LIMIT-1))!=1) { + log_err("error: u_getIntPropertyMaxValue(UCHAR_BINARY_LIMIT-1) wrong\n"); + } + if( u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)!=(int32_t)U_CHAR_DIRECTION_COUNT-1 ) { + log_err("error: u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS) wrong\n"); + } + if( u_getIntPropertyMaxValue(UCHAR_BLOCK)!=(int32_t)UBLOCK_COUNT-1 ) { + log_err("error: u_getIntPropertyMaxValue(UCHAR_BLOCK) wrong\n"); + } + if(u_getIntPropertyMaxValue(UCHAR_LINE_BREAK)!=(int32_t)U_LB_COUNT-1) { + log_err("error: u_getIntPropertyMaxValue(UCHAR_LINE_BREAK) wrong\n"); + } + if(u_getIntPropertyMaxValue(UCHAR_SCRIPT)!=(int32_t)USCRIPT_CODE_LIMIT-1) { + log_err("error: u_getIntPropertyMaxValue(UCHAR_SCRIPT) wrong\n"); + } + if(u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE)!=(int32_t)U_NT_COUNT-1) { + log_err("error: u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE) wrong\n"); + } + if(u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY)!=(int32_t)U_CHAR_CATEGORY_COUNT-1) { + log_err("error: u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY) wrong\n"); + } + if(u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE)!=(int32_t)U_HST_COUNT-1) { + log_err("error: u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE) wrong\n"); + } + if(u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK)!=(int32_t)U_GCB_COUNT-1) { + log_err("error: u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK) wrong\n"); + } + if(u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK)!=(int32_t)U_SB_COUNT-1) { + log_err("error: u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK) wrong\n"); + } + if(u_getIntPropertyMaxValue(UCHAR_WORD_BREAK)!=(int32_t)U_WB_COUNT-1) { + log_err("error: u_getIntPropertyMaxValue(UCHAR_WORD_BREAK) wrong\n"); + } + /*JB#2410*/ + if( u_getIntPropertyMaxValue(0x2345)!=-1) { + log_err("error: u_getIntPropertyMaxValue(0x2345) wrong\n"); + } + if( u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) != (int32_t) (U_DT_COUNT - 1)) { + log_err("error: u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) wrong\n"); + } + if( u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) != (int32_t) (U_JG_COUNT -1)) { + log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) wrong\n"); + } + if( u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) != (int32_t) (U_JT_COUNT -1)) { + log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) wrong\n"); + } + if( u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) != (int32_t) (U_EA_COUNT -1)) { + log_err("error: u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) wrong\n"); } /* test u_hasBinaryProperty() and u_getIntPropertyValue() */ @@ -2508,6 +2682,7 @@ TestPropertyNames(void) { UBool atLeastSomething = FALSE; for (p=0; ; ++p) { + UProperty propEnum = (UProperty)p; UBool sawProp = FALSE; if(p > 10 && !atLeastSomething) { log_data_err("Never got anything after 10 tries.\nYour data is probably fried. Quitting this test\n", p, choice); @@ -2515,9 +2690,10 @@ TestPropertyNames(void) { } for (choice=0; ; ++choice) { - const char* name = u_getPropertyName(p, choice); + const char* name = u_getPropertyName(propEnum, (UPropertyNameChoice)choice); if (name) { - if (!sawProp) log_verbose("prop 0x%04x+%2d:", p&~0xfff, p&0xfff); + if (!sawProp) + log_verbose("prop 0x%04x+%2d:", p&~0xfff, p&0xfff); log_verbose("%d=\"%s\"", choice, name); sawProp = TRUE; atLeastSomething = TRUE; @@ -2533,7 +2709,7 @@ TestPropertyNames(void) { } if (sawProp) { /* looks like a valid property; check the values */ - const char* pname = u_getPropertyName(p, U_LONG_PROPERTY_NAME); + const char* pname = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME); int32_t max = 0; if (p == UCHAR_CANONICAL_COMBINING_CLASS) { max = 255; @@ -2549,14 +2725,14 @@ TestPropertyNames(void) { for (v=-1; ; ++v) { UBool sawValue = FALSE; for (choice=0; ; ++choice) { - const char* vname = u_getPropertyValueName(p, v, choice); + const char* vname = u_getPropertyValueName(propEnum, v, (UPropertyNameChoice)choice); if (vname) { if (!sawValue) log_verbose(" %s, value %d:", pname, v); log_verbose("%d=\"%s\"", choice, vname); sawValue = TRUE; /* test reverse mapping */ - rev = u_getPropertyValueEnum(p, vname); + rev = u_getPropertyValueEnum(propEnum, vname); if (rev != v) { log_err("Value round-trip failure (%s): %d -> %s -> %d\n", pname, v, vname, rev); @@ -2597,15 +2773,17 @@ TestPropertyValues(void) { /* Min should be 0 for everything. */ /* Until JB#2478 is fixed, the one exception is UCHAR_BLOCK. */ for (p=UCHAR_INT_START; ppHeader, -1, &errorCode); + if(U_FAILURE(errorCode)) { + log_err("ubidi_openBinary() fails for the contents of " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE ": %s\n", + u_errorName(errorCode)); + udata_close(pData); + return; + } + + if(0x2215!=ubidi_getMirror(bdp, 0x29F5)) { /* verify some data */ + log_err("ubidi_openBinary() does not seem to return working UBiDiProps\n"); + } + + ubidi_closeProps(bdp); + udata_close(pData); + + /* coverage for ubidi_getDummy() */ + errorCode=U_ZERO_ERROR; + cbdp=ubidi_getDummy(&errorCode); + if(ubidi_getClass(cbdp, 0x20)!=0) { + log_err("ubidi_getClass(dummy, space)!=0\n"); + } +#endif +} + +/* test case folding, compare return values with CaseFolding.txt ------------ */ + +/* bit set for which case foldings for a character have been tested already */ +enum { + CF_SIMPLE=1, + CF_FULL=2, + CF_TURKIC=4, + CF_ALL=7 +}; + +static void +testFold(UChar32 c, int which, + UChar32 simple, UChar32 turkic, + const UChar *full, int32_t fullLength, + const UChar *turkicFull, int32_t turkicFullLength) { + UChar s[2], t[32]; + UChar32 c2; + int32_t length, length2; + + UErrorCode errorCode=U_ZERO_ERROR; + + length=0; + U16_APPEND_UNSAFE(s, length, c); + + if((which&CF_SIMPLE)!=0 && (c2=u_foldCase(c, 0))!=simple) { + log_err("u_foldCase(U+%04lx, default)=U+%04lx != U+%04lx\n", (long)c, (long)c2, (long)simple); + } + if((which&CF_FULL)!=0) { + length2=u_strFoldCase(t, LENGTHOF(t), s, length, 0, &errorCode); + if(length2!=fullLength || 0!=u_memcmp(t, full, fullLength)) { + log_err("u_strFoldCase(U+%04lx, default) does not fold properly\n", (long)c); + } + } + if((which&CF_TURKIC)!=0) { + if((c2=u_foldCase(c, U_FOLD_CASE_EXCLUDE_SPECIAL_I))!=turkic) { + log_err("u_foldCase(U+%04lx, turkic)=U+%04lx != U+%04lx\n", (long)c, (long)c2, (long)simple); + } + + length2=u_strFoldCase(t, LENGTHOF(t), s, length, U_FOLD_CASE_EXCLUDE_SPECIAL_I, &errorCode); + if(length2!=turkicFullLength || 0!=u_memcmp(t, turkicFull, length2)) { + log_err("u_strFoldCase(U+%04lx, turkic) does not fold properly\n", (long)c); + } + } +} + +/* test that c case-folds to itself */ +static void +testFoldToSelf(UChar32 c, int which) { + UChar s[2]; + int32_t length; + + length=0; + U16_APPEND_UNSAFE(s, length, c); + testFold(c, which, c, c, s, length, s, length); +} + +struct CaseFoldingData { + USet *notSeen; + UChar32 prev, prevSimple; + UChar prevFull[32]; + int32_t prevFullLength; + int which; +}; +typedef struct CaseFoldingData CaseFoldingData; + +static void U_CALLCONV +caseFoldingLineFn(void *context, + char *fields[][2], int32_t fieldCount, + UErrorCode *pErrorCode) { + CaseFoldingData *pData=(CaseFoldingData *)context; + char *end; + UChar full[32]; + UChar32 c, prev, simple; + int32_t count; + int which; + char status; + + /* get code point */ + c=(UChar32)strtoul(u_skipWhitespace(fields[0][0]), &end, 16); + end=(char *)u_skipWhitespace(end); + if(end<=fields[0][0] || end!=fields[0][1]) { + log_err("syntax error in CaseFolding.txt field 0 at %s\n", fields[0][0]); + *pErrorCode=U_PARSE_ERROR; + return; + } + + /* get the status of this mapping */ + status=*u_skipWhitespace(fields[1][0]); + if(status!='C' && status!='S' && status!='F' && status!='T') { + log_err("unrecognized status field in CaseFolding.txt at %s\n", fields[0][0]); + *pErrorCode=U_PARSE_ERROR; + return; + } + + /* get the mapping */ + count=u_parseString(fields[2][0], full, 32, (uint32_t *)&simple, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + log_err("error parsing CaseFolding.txt mapping at %s\n", fields[0][0]); + return; + } + + /* there is a simple mapping only if there is exactly one code point (count is in UChars) */ + if(count==0 || count>2 || (count==2 && U16_IS_SINGLE(full[1]))) { + simple=c; + } + + if(c!=(prev=pData->prev)) { + /* + * Test remaining mappings for the previous code point. + * If a turkic folding was not mentioned, then it should fold the same + * as the regular simple case folding. + */ + UChar s[2]; + int32_t length; + + length=0; + U16_APPEND_UNSAFE(s, length, prev); + testFold(prev, (~pData->which)&CF_ALL, + prev, pData->prevSimple, + s, length, + pData->prevFull, pData->prevFullLength); + pData->prev=pData->prevSimple=c; + length=0; + U16_APPEND_UNSAFE(pData->prevFull, length, c); + pData->prevFullLength=length; + pData->which=0; + } + + /* + * Turn the status into a bit set of case foldings to test. + * Remember non-Turkic case foldings as defaults for Turkic mode. + */ + switch(status) { + case 'C': + which=CF_SIMPLE|CF_FULL; + pData->prevSimple=simple; + u_memcpy(pData->prevFull, full, count); + pData->prevFullLength=count; + break; + case 'S': + which=CF_SIMPLE; + pData->prevSimple=simple; + break; + case 'F': + which=CF_FULL; + u_memcpy(pData->prevFull, full, count); + pData->prevFullLength=count; + break; + case 'T': + which=CF_TURKIC; + break; + default: + which=0; + break; /* won't happen because of test above */ + } + + testFold(c, which, simple, simple, full, count, full, count); + + /* remember which case foldings of c have been tested */ + pData->which|=which; + + /* remove c from the set of ones not mentioned in CaseFolding.txt */ + uset_remove(pData->notSeen, c); +} + +static void +TestCaseFolding() { + CaseFoldingData data={ NULL }; + char *fields[3][2]; + UErrorCode errorCode; + + static char *lastLine= (char *)"10FFFF; C; 10FFFF;"; + + errorCode=U_ZERO_ERROR; + /* test BMP & plane 1 - nothing interesting above */ + data.notSeen=uset_open(0, 0x1ffff); + data.prevFullLength=1; /* length of full case folding of U+0000 */ + + parseUCDFile("CaseFolding.txt", fields, 3, caseFoldingLineFn, &data, &errorCode); + if(U_SUCCESS(errorCode)) { + int32_t i, start, end; + + /* add a pseudo-last line to finish testing of the actual last one */ + fields[0][0]=lastLine; + fields[0][1]=lastLine+6; + fields[1][0]=lastLine+7; + fields[1][1]=lastLine+9; + fields[2][0]=lastLine+10; + fields[2][1]=lastLine+17; + caseFoldingLineFn(&data, fields, 3, &errorCode); + + /* verify that all code points that are not mentioned in CaseFolding.txt fold to themselves */ + for(i=0; + 0==uset_getItem(data.notSeen, i, &start, &end, NULL, 0, &errorCode) && + U_SUCCESS(errorCode); + ++i + ) { + do { + testFoldToSelf(start, CF_ALL); + } while(++start<=end); + } + } + + uset_close(data.notSeen); }