X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/b75a7d8f3b4adbae880cab104ce2c6a50eee4db2..1a147d096ae81f4c8262f7bfc56bd19fc2dee932:/icuSources/test/cintltst/ccapitst.c diff --git a/icuSources/test/cintltst/ccapitst.c b/icuSources/test/cintltst/ccapitst.c index 96ef85e9..22785b38 100644 --- a/icuSources/test/cintltst/ccapitst.c +++ b/icuSources/test/cintltst/ccapitst.c @@ -1,16 +1,18 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html /******************************************************************** * COPYRIGHT: - * Copyright (c) 1997-2003, International Business Machines Corporation and + * Copyright (c) 1997-2016, International Business Machines Corporation and * others. All Rights Reserved. ********************************************************************/ -/******************************************************************************** +/***************************************************************************** * -* File CU_CAPITST.C +* File ccapitst.c * * Modification History: * Name Description * Madhu Katragadda Ported for C API -********************************************************************************* +****************************************************************************** */ #include #include @@ -19,81 +21,65 @@ #include "unicode/uloc.h" #include "unicode/ucnv.h" #include "unicode/ucnv_err.h" -#include "unicode/utypes.h" +#include "unicode/putil.h" +#include "unicode/uset.h" #include "unicode/ustring.h" +#include "unicode/utf8.h" +#include "ucnv_bld.h" /* for sizeof(UConverter) */ +#include "cmemory.h" /* for UAlignedMemory */ #include "cintltst.h" #include "ccapitst.h" - -/* for not including "cstring.h" -begin*/ -#ifdef WIN32 -# define stricmp(str1, str2) U_STANDARD_CPP_NAMESPACE _stricmp(str1, str2) -#elif defined(POSIX) -# define stricmp(str1, str2) U_STANDARD_CPP_NAMESPACE strcasecmp(str1, str2) -#else -# define stricmp(str1, str2) T_CString_stricmp(str1, str2) -#endif - -static int U_EXPORT2 -T_CString_stricmp(const char *str1, const char *str2) { - if(str1==NULL) { - if(str2==NULL) { - return 0; - } else { - return -1; - } - } else if(str2==NULL) { - return 1; - } else { - /* compare non-NULL strings lexically with lowercase */ - int rc; - unsigned char c1, c2; - for(;;) { - c1=(unsigned char)*str1; - c2=(unsigned char)*str2; - if(c1==0) { - if(c2==0) { - return 0; - } else { - return -1; - } - } else if(c2==0) { - return 1; - } else { - /* compare non-zero characters with lowercase */ - rc=(int)(unsigned char)tolower(c1)-(int)(unsigned char)tolower(c2); - if(rc!=0) { - return rc; - } - } - ++str1; - ++str2; - } - } -} -/* for not including "cstring.h" -end*/ - -#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) +#include "cstring.h" #define NUM_CODEPAGE 1 #define MAX_FILE_LEN 1024*20 #define UCS_FILE_NAME_SIZE 512 /*returns an action other than the one provided*/ +#if !UCONFIG_NO_LEGACY_CONVERSION static UConverterFromUCallback otherUnicodeAction(UConverterFromUCallback MIA); static UConverterToUCallback otherCharAction(UConverterToUCallback MIA); +#endif + +static UConverter * +cnv_open(const char *name, UErrorCode *pErrorCode) { + if(name!=NULL && name[0]=='*') { + return ucnv_openPackage(loadTestData(pErrorCode), name+1, pErrorCode); + } else { + return ucnv_open(name, pErrorCode); + } +} static void ListNames(void); - void TestFlushCache(void); /* defined in cctest.c */ +static void TestFlushCache(void); static void TestDuplicateAlias(void); static void TestCCSID(void); static void TestJ932(void); static void TestJ1968(void); +#if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION static void TestLMBCSMaxChar(void); +#endif + +#if !UCONFIG_NO_LEGACY_CONVERSION static void TestConvertSafeCloneCallback(void); +#endif + static void TestEBCDICSwapLFNL(void); static void TestConvertEx(void); +static void TestConvertExFromUTF8(void); +static void TestConvertExFromUTF8_C5F0(void); static void TestConvertAlgorithmic(void); + void TestDefaultConverterError(void); /* defined in cctest.c */ + void TestDefaultConverterSet(void); /* defined in cctest.c */ +static void TestToUCountPending(void); +static void TestFromUCountPending(void); +static void TestDefaultName(void); +static void TestCompareNames(void); +static void TestSubstString(void); +static void InvalidArguments(void); +static void TestGetName(void); +static void TestUTFBOM(void); void addTestConvert(TestNode** root); @@ -104,15 +90,33 @@ void addTestConvert(TestNode** root) addTest(root, &TestFlushCache, "tsconv/ccapitst/TestFlushCache"); addTest(root, &TestAlias, "tsconv/ccapitst/TestAlias"); addTest(root, &TestDuplicateAlias, "tsconv/ccapitst/TestDuplicateAlias"); - addTest(root, &TestConvertSafeClone, "tsconv/ccapitst/TestConvertSafeClone"); - addTest(root, &TestConvertSafeCloneCallback,"tsconv/ccapitst/TestConvertSafeCloneCallback"); + addTest(root, &TestConvertSafeClone, "tsconv/ccapitst/TestConvertSafeClone"); +#if !UCONFIG_NO_LEGACY_CONVERSION + addTest(root, &TestConvertSafeCloneCallback,"tsconv/ccapitst/TestConvertSafeCloneCallback"); +#endif addTest(root, &TestCCSID, "tsconv/ccapitst/TestCCSID"); addTest(root, &TestJ932, "tsconv/ccapitst/TestJ932"); addTest(root, &TestJ1968, "tsconv/ccapitst/TestJ1968"); +#if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION addTest(root, &TestLMBCSMaxChar, "tsconv/ccapitst/TestLMBCSMaxChar"); +#endif addTest(root, &TestEBCDICSwapLFNL, "tsconv/ccapitst/TestEBCDICSwapLFNL"); addTest(root, &TestConvertEx, "tsconv/ccapitst/TestConvertEx"); + addTest(root, &TestConvertExFromUTF8, "tsconv/ccapitst/TestConvertExFromUTF8"); + addTest(root, &TestConvertExFromUTF8_C5F0, "tsconv/ccapitst/TestConvertExFromUTF8_C5F0"); addTest(root, &TestConvertAlgorithmic, "tsconv/ccapitst/TestConvertAlgorithmic"); + addTest(root, &TestDefaultConverterError, "tsconv/ccapitst/TestDefaultConverterError"); + addTest(root, &TestDefaultConverterSet, "tsconv/ccapitst/TestDefaultConverterSet"); +#if !UCONFIG_NO_FILE_IO + addTest(root, &TestToUCountPending, "tsconv/ccapitst/TestToUCountPending"); + addTest(root, &TestFromUCountPending, "tsconv/ccapitst/TestFromUCountPending"); +#endif + addTest(root, &TestDefaultName, "tsconv/ccapitst/TestDefaultName"); + addTest(root, &TestCompareNames, "tsconv/ccapitst/TestCompareNames"); + addTest(root, &TestSubstString, "tsconv/ccapitst/TestSubstString"); + addTest(root, &InvalidArguments, "tsconv/ccapitst/InvalidArguments"); + addTest(root, &TestGetName, "tsconv/ccapitst/TestGetName"); + addTest(root, &TestUTFBOM, "tsconv/ccapitst/TestUTFBOM"); } static void ListNames(void) { @@ -126,7 +130,7 @@ static void ListNames(void) { log_verbose("Testing ucnv_openAllNames()..."); allNamesEnum = ucnv_openAllNames(&err); if(U_FAILURE(err)) { - log_err("FAILURE! ucnv_openAllNames() -> %s\n", myErrorName(err)); + log_data_err("FAILURE! ucnv_openAllNames() -> %s\n", myErrorName(err)); } else { const char *string = NULL; @@ -138,7 +142,10 @@ static void ListNames(void) { count1++; log_verbose("read \"%s\", length %i\n", string, len); } - err = U_ZERO_ERROR; + if (U_FAILURE(err)) { + log_err("FAILURE! uenum_next(allNamesEnum...) set an error: %s\n", u_errorName(err)); + err = U_ZERO_ERROR; + } uenum_reset(allNamesEnum, &err); while ((string = uenum_next(allNamesEnum, &len, &err))) { count2++; @@ -173,7 +180,7 @@ static void ListNames(void) { /* Test ucnv_countAliases() etc. */ count = ucnv_countAliases("utf-8", &err); if(U_FAILURE(err)) { - log_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %s\n", myErrorName(err)); + log_data_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %s\n", myErrorName(err)); } else if(count <= 0) { log_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %d aliases\n", count); } else { @@ -226,6 +233,7 @@ static void ListNames(void) { static void TestConvert() { +#if !UCONFIG_NO_LEGACY_CONVERSION char myptr[4]; char save[4]; int32_t testLong1 = 0; @@ -246,7 +254,6 @@ static void TestConvert() UChar* my_ucs_file_buffer; /* [MAX_FILE_LEN] */ UChar* my_ucs_file_buffer_1; int8_t ii = 0; - int32_t j = 0; uint16_t codepage_index = 0; int32_t cp = 0; UErrorCode err = U_ZERO_ERROR; @@ -408,7 +415,7 @@ static void TestConvert() const uint8_t source[]={ 0x00, 0x04, 0x05, 0x06, 0xa2, 0xb4, 0x00}; const uint8_t expectedTarget[]={ 0x00, 0x37, 0x2d, 0x2e, 0x0e, 0x49, 0x62, 0x0f, 0x00}; char *target=0; - sourceLimit=sizeof(source)/sizeof(source[0]); + sourceLimit=UPRV_LENGTHOF(source); err=U_ZERO_ERROR; targetLimit=0; @@ -443,14 +450,14 @@ static void TestConvert() } err=U_ILLEGAL_ARGUMENT_ERROR; - sourceLimit=sizeof(source)/sizeof(source[0]); + sourceLimit=UPRV_LENGTHOF(source); i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err); if(i !=0 ){ log_err("FAILURE! ucnv_convert() with err=U_ILLEGAL_ARGUMENT_ERROR is expected to return 0\n"); } err=U_ZERO_ERROR; - sourceLimit=sizeof(source)/sizeof(source[0]); + sourceLimit=UPRV_LENGTHOF(source); targetLimit=0; i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err); if(!(U_FAILURE(err) && err==U_BUFFER_OVERFLOW_ERROR)){ @@ -496,23 +503,6 @@ static void TestConvert() log_verbose("getName(someConverters[1]) returned %s\n", ucnv_getName(someConverters[1], &err)); } - /*Testing ucnv_getDefaultName() and ucnv_setDefaultNAme()*/ - { - static char defaultName[UCNV_MAX_CONVERTER_NAME_LENGTH + 1]; - strcpy(defaultName, ucnv_getDefaultName()); - - log_verbose("getDefaultName returned %s\n", defaultName); - - /*change the default name by setting it */ - ucnv_setDefaultName("changed"); - if(strcmp(ucnv_getDefaultName(), "changed")==0) - log_verbose("setDefaultName o.k"); - else - log_err("setDefaultName failed"); - /*set the default name back*/ - ucnv_setDefaultName(defaultName); - } - ucnv_close(someConverters[0]); ucnv_close(someConverters[1]); ucnv_close(someConverters[2]); @@ -522,9 +512,11 @@ static void TestConvert() for (codepage_index=0; codepage_index < NUM_CODEPAGE; ++codepage_index) { int32_t i = 0; - char* index = NULL; err = U_ZERO_ERROR; +#ifdef U_TOPSRCDIR + strcpy(ucs_file_name, U_TOPSRCDIR U_FILE_SEP_STRING"test"U_FILE_SEP_STRING"testdata"U_FILE_SEP_STRING); +#else strcpy(ucs_file_name, loadTestData(&err)); if(U_FAILURE(err)){ @@ -532,19 +524,22 @@ static void TestConvert() return; } - index=strrchr(ucs_file_name,(char)U_FILE_SEP_CHAR); + { + char* index = strrchr(ucs_file_name,(char)U_FILE_SEP_CHAR); - if((unsigned int)(index-ucs_file_name) != (strlen(ucs_file_name)-1)){ - *(index+1)=0; + if((unsigned int)(index-ucs_file_name) != (strlen(ucs_file_name)-1)){ + *(index+1)=0; + } } strcat(ucs_file_name,".."U_FILE_SEP_STRING); +#endif strcat(ucs_file_name, CodePagesTestFiles[codepage_index]); ucs_file_in = fopen(ucs_file_name,"rb"); if (!ucs_file_in) { - log_err("Couldn't open the Unicode file [%s]... Exiting...\n", ucs_file_name); + log_data_err("Couldn't open the Unicode file [%s]... Exiting...\n", ucs_file_name); return; } @@ -556,8 +551,8 @@ static void TestConvert() if (!myConverter || U_FAILURE(err)) { log_data_err("Error creating the ibm-949 converter - %s \n", u_errorName(err)); - - return; + fclose(ucs_file_in); + break; } /*testing for ucnv_getName() */ @@ -569,7 +564,7 @@ static void TestConvert() { log_verbose("getName o.k. %s\n", ucnv_getName(myConverter, &err)); } - if (stricmp(ucnv_getName(myConverter, &err), CodePagesToTest[codepage_index])) + if (uprv_stricmp(ucnv_getName(myConverter, &err), CodePagesToTest[codepage_index])) log_err("getName failed\n"); else log_verbose("getName ok\n"); @@ -605,6 +600,9 @@ static void TestConvert() log_verbose("\n---Testing ucnv_getSubstChars...\n"); ii=4; ucnv_getSubstChars(myConverter, myptr, &ii, &err); + if (ii <= 0) { + log_err("ucnv_getSubstChars returned a negative number %d\n", ii); + } for(x=0;x%s instead of U_STRING_NOT_TERMINATED_WARNING\n", u_errorName(err)); @@ -959,7 +973,6 @@ static void TestConvert() /*testing for ucnv_fromUnicode() and ucnv_toUnicode() */ /*Clean up re-usable vars*/ - j=0; log_verbose("Testing ucnv_fromUnicode().....\n"); tmp_ucs_buf=ucs_file_buffer_use; ucnv_fromUnicode(myConverter, &mytarget_1, @@ -970,6 +983,7 @@ static void TestConvert() TRUE, &err); consumedUni = (UChar*)tmp_consumedUni; + (void)consumedUni; /* Suppress set but not used warning. */ if (U_FAILURE(err)) { @@ -1024,7 +1038,6 @@ static void TestConvert() fclose(ucs_file_in); ucnv_close(myConverter); - free(displayname); if (uchar1 != 0) free(uchar1); if (uchar2 != 0) free(uchar2); if (uchar3 != 0) free(uchar3); @@ -1034,18 +1047,94 @@ static void TestConvert() free((void*)output_cp_buffer); free((void*)ucs_file_buffer); free((void*)my_ucs_file_buffer); +#endif } +#if !UCONFIG_NO_LEGACY_CONVERSION static UConverterFromUCallback otherUnicodeAction(UConverterFromUCallback MIA) { return (MIA==(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_STOP)?(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_SUBSTITUTE:(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_STOP; } - static UConverterToUCallback otherCharAction(UConverterToUCallback MIA) { return (MIA==(UConverterToUCallback)UCNV_TO_U_CALLBACK_STOP)?(UConverterToUCallback)UCNV_TO_U_CALLBACK_SUBSTITUTE:(UConverterToUCallback)UCNV_TO_U_CALLBACK_STOP; } +#endif + +static void TestFlushCache(void) { +#if !UCONFIG_NO_LEGACY_CONVERSION + UErrorCode err = U_ZERO_ERROR; + UConverter* someConverters[5]; + int flushCount = 0; + + /* flush the converter cache to get a consistent state before the flushing is tested */ + ucnv_flushCache(); + + /*Testing ucnv_open()*/ + /* Note: These converters have been chosen because they do NOT + encode the Latin characters (U+0041, ...), and therefore are + highly unlikely to be chosen as system default codepages */ + + someConverters[0] = ucnv_open("ibm-1047", &err); + if (U_FAILURE(err)) { + log_data_err("FAILURE! %s\n", myErrorName(err)); + } + + someConverters[1] = ucnv_open("ibm-1047", &err); + if (U_FAILURE(err)) { + log_data_err("FAILURE! %s\n", myErrorName(err)); + } + + someConverters[2] = ucnv_open("ibm-1047", &err); + if (U_FAILURE(err)) { + log_data_err("FAILURE! %s\n", myErrorName(err)); + } + + someConverters[3] = ucnv_open("gb18030", &err); + if (U_FAILURE(err)) { + log_data_err("FAILURE! %s\n", myErrorName(err)); + } + + someConverters[4] = ucnv_open("ibm-954", &err); + if (U_FAILURE(err)) { + log_data_err("FAILURE! %s\n", myErrorName(err)); + } + + + /* Testing ucnv_flushCache() */ + log_verbose("\n---Testing ucnv_flushCache...\n"); + if ((flushCount=ucnv_flushCache())==0) + log_verbose("Flush cache ok\n"); + else + log_data_err("Flush Cache failed [line %d], expect 0 got %d \n", __LINE__, flushCount); + + /*testing ucnv_close() and ucnv_flushCache() */ + ucnv_close(someConverters[0]); + ucnv_close(someConverters[1]); + + if ((flushCount=ucnv_flushCache())==0) + log_verbose("Flush cache ok\n"); + else + log_data_err("Flush Cache failed [line %d], expect 0 got %d \n", __LINE__, flushCount); + + ucnv_close(someConverters[2]); + ucnv_close(someConverters[3]); + + if ((flushCount=ucnv_flushCache())==2) + log_verbose("Flush cache ok\n"); /*because first, second and third are same */ + else + log_data_err("Flush Cache failed line %d, got %d expected 2 or there is an error in ucnv_close()\n", + __LINE__, + flushCount); + + ucnv_close(someConverters[4]); + if ( (flushCount=ucnv_flushCache())==1) + log_verbose("Flush cache ok\n"); + else + log_data_err("Flush Cache failed line %d, expected 1 got %d \n", __LINE__, flushCount); +#endif +} /** * Test the converter alias API, specifically the fuzzy matching of @@ -1061,15 +1150,13 @@ static void TestAlias() { /* Predetermined aliases that we expect to map back to ISO_2022 * and UTF-8. UPDATE THIS DATA AS NECESSARY. */ const char* ISO_2022_NAMES[] = - {"ISO_2022", "iso-2022", "2022", - "cp2022", "iso2022", "iso_2022"}; - int32_t ISO_2022_NAMES_LENGTH = - sizeof(ISO_2022_NAMES) / sizeof(ISO_2022_NAMES[0]); + {"ISO_2022,locale=ja,version=2", "ISO-2022-JP-2", "csISO2022JP2", + "Iso-2022jP2", "isO-2022_Jp_2", "iSo--2022,locale=ja,version=2"}; + int32_t ISO_2022_NAMES_LENGTH = UPRV_LENGTHOF(ISO_2022_NAMES); const char *UTF8_NAMES[] = { "UTF-8", "utf-8", "utf8", "ibm-1208", "utf_8", "ibm1208", "cp1208" }; - int32_t UTF8_NAMES_LENGTH = - sizeof(UTF8_NAMES) / sizeof(UTF8_NAMES[0]); + int32_t UTF8_NAMES_LENGTH = UPRV_LENGTHOF(UTF8_NAMES); struct { const char *name; @@ -1081,7 +1168,7 @@ static void TestAlias() { { "UTF32_PlatformEndian", "UTF32_PlatformEndian" }, { "UTF-32", "ucs-4" } }; - int32_t CONVERTERS_NAMES_LENGTH = sizeof(CONVERTERS_NAMES) / sizeof(*CONVERTERS_NAMES); + int32_t CONVERTERS_NAMES_LENGTH = UPRV_LENGTHOF(CONVERTERS_NAMES); /* When there are bugs in gencnval or in ucnv_io, converters can appear to have no aliases. */ @@ -1110,7 +1197,7 @@ static void TestAlias() { if (strcmp(ucnv_getName(cnv, &status), name) != 0 && (strstr(name, "PlatformEndian") == 0 && strstr(name, "OppositeEndian") == 0)) { log_err("FAIL: Converter \"%s\" returned \"%s\" for getName. " - "The should be the same\n", + "They should be the same\n", name, ucnv_getName(cnv, &status)); } } @@ -1183,7 +1270,7 @@ static void TestAlias() { continue; } if (0 != strcmp(mapBack, ISO_2022_NAMES[0])) { - log_err("FAIL: \"%s\" -> \"%s\", expect ISO_2022\n", + log_err("FAIL: \"%s\" -> \"%s\", expect \"ISO_2022,locale=ja,version=2\"\n", ISO_2022_NAMES[i], mapBack); } } @@ -1209,7 +1296,7 @@ static void TestAlias() { for (i = 0; i < CONVERTERS_NAMES_LENGTH; ++i) { const char* mapBack = ucnv_getAlias(CONVERTERS_NAMES[i].alias, 0, &status); if(!mapBack) { - log_data_err("Couldn't get alias for %s. You probably have no data\n", CONVERTERS_NAMES[i]); + log_data_err("Couldn't get alias for %s. You probably have no data\n", CONVERTERS_NAMES[i].name); continue; } if (0 != strcmp(mapBack, CONVERTERS_NAMES[i].name)) { @@ -1227,7 +1314,7 @@ static void TestDuplicateAlias(void) { status = U_ZERO_ERROR; alias = ucnv_getStandardName("Shift_JIS", "IBM", &status); if (alias == NULL || strcmp(alias, "ibm-943") != 0 || status != U_AMBIGUOUS_ALIAS_WARNING) { - log_err("FAIL: Didn't get ibm-943 for Shift_JIS {IBM}. Got %s\n", alias); + log_data_err("FAIL: Didn't get ibm-943 for Shift_JIS {IBM}. Got %s\n", alias); } status = U_ZERO_ERROR; alias = ucnv_getStandardName("ibm-943", "IANA", &status); @@ -1271,6 +1358,7 @@ static TSCCContext *TSCC_clone(TSCCContext *ctx) return newCtx; } +#if !UCONFIG_NO_LEGACY_CONVERSION static void TSCC_fromU(const void *context, UConverterFromUnicodeArgs *fromUArgs, const UChar* codeUnits, @@ -1293,6 +1381,7 @@ static void TSCC_fromU(const void *context, UErrorCode subErr = U_ZERO_ERROR; TSCCContext *newCtx; TSCCContext *junkCtx; + TSCCContext **pjunkCtx = &junkCtx; /* "recreate" it */ log_verbose("TSCC_fromU: cloning..\n"); @@ -1303,7 +1392,7 @@ static void TSCC_fromU(const void *context, } /* now, SET it */ - ucnv_getFromUCallBack(fromUArgs->converter, &junkFrom, (const void**)&junkCtx); + ucnv_getFromUCallBack(fromUArgs->converter, &junkFrom, (const void**)pjunkCtx); ucnv_setFromUCallBack(fromUArgs->converter, junkFrom, newCtx, NULL, NULL, &subErr); if(U_FAILURE(subErr)) { @@ -1317,7 +1406,6 @@ static void TSCC_fromU(const void *context, } } - static void TSCC_toU(const void *context, UConverterToUnicodeArgs *toUArgs, const char* codeUnits, @@ -1339,6 +1427,7 @@ static void TSCC_toU(const void *context, UErrorCode subErr = U_ZERO_ERROR; TSCCContext *newCtx; TSCCContext *junkCtx; + TSCCContext **pjunkCtx = &junkCtx; /* "recreate" it */ log_verbose("TSCC_toU: cloning..\n"); @@ -1349,7 +1438,7 @@ static void TSCC_toU(const void *context, } /* now, SET it */ - ucnv_getToUCallBack(toUArgs->converter, &junkFrom, (const void**)&junkCtx); + ucnv_getToUCallBack(toUArgs->converter, &junkFrom, (const void**)pjunkCtx); ucnv_setToUCallBack(toUArgs->converter, junkFrom, newCtx, NULL, NULL, &subErr); if(U_FAILURE(subErr)) { @@ -1389,6 +1478,7 @@ static void TestConvertSafeCloneCallback() UErrorCode err = U_ZERO_ERROR; TSCCContext from1, to1; TSCCContext *from2, *from3, *to2, *to3; + TSCCContext **pfrom2 = &from2, **pfrom3 = &from3, **pto2 = &to2, **pto3 = &to3; char hunk[8192]; int32_t hunkSize = 8192; UConverterFromUCallback junkFrom; @@ -1398,7 +1488,7 @@ static void TestConvertSafeCloneCallback() conv1 = ucnv_open("iso-8859-3", &err); if(U_FAILURE(err)) { - log_data_err("Err opening iso-8859-3, %s", u_errorName(err)); + log_data_err("Err opening iso-8859-3, %s\n", u_errorName(err)); return; } @@ -1426,8 +1516,8 @@ static void TestConvertSafeCloneCallback() log_verbose("Cloned to conv2=%p.\n", conv2); /********** from *********************/ - ucnv_getFromUCallBack(conv2, &junkFrom, (const void**)&from2); - ucnv_getFromUCallBack(conv1, &junkFrom, (const void**)&from3); + ucnv_getFromUCallBack(conv2, &junkFrom, (const void**)pfrom2); + ucnv_getFromUCallBack(conv1, &junkFrom, (const void**)pfrom3); TSCC_print_log(from2, "from2"); TSCC_print_log(from3, "from3(==from1)"); @@ -1459,8 +1549,8 @@ static void TestConvertSafeCloneCallback() } /********** to *********************/ - ucnv_getToUCallBack(conv2, &junkTo, (const void**)&to2); - ucnv_getToUCallBack(conv1, &junkTo, (const void**)&to3); + ucnv_getToUCallBack(conv2, &junkTo, (const void**)pto2); + ucnv_getToUCallBack(conv1, &junkTo, (const void**)pto3); TSCC_print_log(to2, "to2"); TSCC_print_log(to3, "to3(==to1)"); @@ -1548,179 +1638,271 @@ static void TestConvertSafeCloneCallback() free(from2); /* from1 is stack based */ } } +#endif + +static UBool +containsAnyOtherByte(uint8_t *p, int32_t length, uint8_t b) { + while(length>0) { + if(*p!=b) { + return TRUE; + } + ++p; + --length; + } + return FALSE; +} static void TestConvertSafeClone() { -#define CLONETEST_CONVERTER_COUNT 12 - - char charBuffer [21]; /* Leave at an odd number for alignment testing */ - uint8_t buffer [CLONETEST_CONVERTER_COUNT] [U_CNV_SAFECLONE_BUFFERSIZE]; - int32_t bufferSize = U_CNV_SAFECLONE_BUFFERSIZE; - UConverter * someConverters [CLONETEST_CONVERTER_COUNT]; - UConverter * someClonedConverters [CLONETEST_CONVERTER_COUNT]; - UConverter * cnv; - UErrorCode err = U_ZERO_ERROR; + /* one 'regular' & all the 'private stateful' converters */ + static const char *const names[] = { +#if !UCONFIG_NO_LEGACY_CONVERSION + "ibm-1047", + "ISO_2022,locale=zh,version=1", +#endif + "SCSU", +#if !UCONFIG_NO_LEGACY_CONVERSION + "HZ", + "lmbcs", + "ISCII,version=0", + "ISO_2022,locale=kr,version=1", + "ISO_2022,locale=jp,version=2", +#endif + "BOCU-1", + "UTF-7", +#if !UCONFIG_NO_LEGACY_CONVERSION + "IMAP-mailbox-name", + "ibm-1047-s390" +#else + "IMAP=mailbox-name" +#endif + }; + + /* store the actual sizes of each converter */ + int32_t actualSizes[UPRV_LENGTHOF(names)]; + + static const int32_t bufferSizes[] = { + U_CNV_SAFECLONE_BUFFERSIZE, + (int32_t)(3*sizeof(UConverter))/2, /* 1.5*sizeof(UConverter) */ + (int32_t)sizeof(UConverter)/2 /* 0.5*sizeof(UConverter) */ + }; + + char charBuffer[21]; /* Leave at an odd number for alignment testing */ + uint8_t buffer[3] [U_CNV_SAFECLONE_BUFFERSIZE]; + int32_t bufferSize, maxBufferSize; + const char *maxName; + UConverter * cnv, *cnv2; + UErrorCode err; char *pCharBuffer; const char *pConstCharBuffer; - const char *charBufferLimit = charBuffer + sizeof(charBuffer)/sizeof(*charBuffer); - UChar uniBuffer [] = {0x0058, 0x0059, 0x005A}; /* "XYZ" */ - UChar uniCharBuffer [20]; - char charSourceBuffer [] = { 0x1b, 0x24, 0x42 }; + const char *charBufferLimit = charBuffer + UPRV_LENGTHOF(charBuffer); + UChar uniBuffer[] = {0x0058, 0x0059, 0x005A}; /* "XYZ" */ + UChar uniCharBuffer[20]; + char charSourceBuffer[] = { 0x1b, 0x24, 0x42 }; const char *pCharSource = charSourceBuffer; const char *pCharSourceLimit = charSourceBuffer + sizeof(charSourceBuffer); UChar *pUCharTarget = uniCharBuffer; - UChar *pUCharTargetLimit = uniCharBuffer + sizeof(uniCharBuffer)/sizeof(*uniCharBuffer); + UChar *pUCharTargetLimit = uniCharBuffer + UPRV_LENGTHOF(uniCharBuffer); const UChar * pUniBuffer; - const UChar *uniBufferLimit = uniBuffer + sizeof(uniBuffer)/sizeof(*uniBuffer); - int index; - - /* one 'regular' & all the 'private stateful' converters */ - someConverters[0] = ucnv_open("ibm-1047", &err); - someConverters[1] = ucnv_open("ISO_2022", &err); - someConverters[2] = ucnv_open("SCSU", &err); - someConverters[3] = ucnv_open("HZ", &err); - someConverters[4] = ucnv_open("lmbcs", &err); - someConverters[5] = ucnv_open("ISCII,version=0",&err); - someConverters[6] = ucnv_open("ISO_2022,locale=kr,version=1",&err); - someConverters[7] = ucnv_open("ISO_2022,locale=jp,version=1",&err); - someConverters[8] = ucnv_open("BOCU-1", &err); - someConverters[9] = ucnv_open("UTF-7", &err); - someConverters[10] = ucnv_open("IMAP-mailbox-name", &err); - someConverters[11] = ucnv_open("ibm-1047-s390", &err); - - if(U_FAILURE(err)) { - log_data_err("problems creating converters to clone- check the data.\n"); - return; /* bail - leak */ - } - /* Check the various error & informational states: */ + const UChar *uniBufferLimit = uniBuffer + UPRV_LENGTHOF(uniBuffer); + int32_t idx, j; - /* Null status - just returns NULL */ - if (0 != ucnv_safeClone(someConverters[0], buffer[0], &bufferSize, 0)) - { - log_err("FAIL: Cloned converter failed to deal correctly with null status\n"); - } - /* error status - should return 0 & keep error the same */ - err = U_MEMORY_ALLOCATION_ERROR; - if (0 != ucnv_safeClone(someConverters[0], buffer[0], &bufferSize, &err) || err != U_MEMORY_ALLOCATION_ERROR) - { - log_err("FAIL: Cloned converter failed to deal correctly with incoming error status\n"); - } err = U_ZERO_ERROR; + cnv = ucnv_open(names[0], &err); + if(U_SUCCESS(err)) { + /* Check the various error & informational states: */ - /* Null buffer size pointer - just returns NULL & set error to U_ILLEGAL_ARGUMENT_ERROR*/ - if (0 != ucnv_safeClone(someConverters[0], buffer[0], 0, &err) || err != U_ILLEGAL_ARGUMENT_ERROR) - { - log_err("FAIL: Cloned converter failed to deal correctly with null bufferSize pointer\n"); - } - err = U_ZERO_ERROR; + /* Null status - just returns NULL */ + bufferSize = U_CNV_SAFECLONE_BUFFERSIZE; + if (NULL != ucnv_safeClone(cnv, buffer[0], &bufferSize, NULL)) + { + log_err("FAIL: Cloned converter failed to deal correctly with null status\n"); + } + /* error status - should return 0 & keep error the same */ + err = U_MEMORY_ALLOCATION_ERROR; + if (NULL != ucnv_safeClone(cnv, buffer[0], &bufferSize, &err) || err != U_MEMORY_ALLOCATION_ERROR) + { + log_err("FAIL: Cloned converter failed to deal correctly with incoming error status\n"); + } + err = U_ZERO_ERROR; - /* buffer size pointer is 0 - fill in pbufferSize with a size */ - bufferSize = 0; - if (0 != ucnv_safeClone(someConverters[0], buffer[0], &bufferSize, &err) || U_FAILURE(err) || bufferSize <= 0) - { - log_err("FAIL: Cloned converter failed a sizing request ('preflighting')\n"); - } - /* Verify our define is large enough */ - if (U_CNV_SAFECLONE_BUFFERSIZE < bufferSize) - { - log_err("FAIL: Pre-calculated buffer size is too small\n"); - } - /* Verify we can use this run-time calculated size */ - if (0 == (cnv = ucnv_safeClone(someConverters[0], buffer[0], &bufferSize, &err)) || U_FAILURE(err)) - { - log_err("FAIL: Converter can't be cloned with run-time size\n"); - } - if (cnv) - ucnv_close(cnv); - /* size one byte too small - should allocate & let us know */ - --bufferSize; - if (0 == (cnv = ucnv_safeClone(someConverters[0], 0, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING) - { - log_err("FAIL: Cloned converter failed to deal correctly with too-small buffer size\n"); - } - if (cnv) - ucnv_close(cnv); - err = U_ZERO_ERROR; - bufferSize = U_CNV_SAFECLONE_BUFFERSIZE; + /* Null buffer size pointer is ok */ + if (NULL == (cnv2 = ucnv_safeClone(cnv, buffer[0], NULL, &err)) || U_FAILURE(err)) + { + log_err("FAIL: Cloned converter failed to deal correctly with null bufferSize pointer\n"); + } + ucnv_close(cnv2); + err = U_ZERO_ERROR; - /* Null buffer pointer - return converter & set error to U_SAFECLONE_ALLOCATED_ERROR */ - if (0 == (cnv = ucnv_safeClone(someConverters[0], 0, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING) - { - log_err("FAIL: Cloned converter failed to deal correctly with null buffer pointer\n"); - } - if (cnv) - ucnv_close(cnv); - err = U_ZERO_ERROR; + /* buffer size pointer is 0 - fill in pbufferSize with a size */ + bufferSize = 0; + if (NULL != ucnv_safeClone(cnv, buffer[0], &bufferSize, &err) || U_FAILURE(err) || bufferSize <= 0) + { + log_err("FAIL: Cloned converter failed a sizing request ('preflighting')\n"); + } + /* Verify our define is large enough */ + if (U_CNV_SAFECLONE_BUFFERSIZE < bufferSize) + { + log_err("FAIL: Pre-calculated buffer size is too small\n"); + } + /* Verify we can use this run-time calculated size */ + if (NULL == (cnv2 = ucnv_safeClone(cnv, buffer[0], &bufferSize, &err)) || U_FAILURE(err)) + { + log_err("FAIL: Converter can't be cloned with run-time size\n"); + } + if (cnv2) { + ucnv_close(cnv2); + } + + /* size one byte too small - should allocate & let us know */ + --bufferSize; + if (NULL == (cnv2 = ucnv_safeClone(cnv, NULL, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING) + { + log_err("FAIL: Cloned converter failed to deal correctly with too-small buffer size\n"); + } + if (cnv2) { + ucnv_close(cnv2); + } + + err = U_ZERO_ERROR; + bufferSize = U_CNV_SAFECLONE_BUFFERSIZE; + + /* Null buffer pointer - return converter & set error to U_SAFECLONE_ALLOCATED_ERROR */ + if (NULL == (cnv2 = ucnv_safeClone(cnv, NULL, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING) + { + log_err("FAIL: Cloned converter failed to deal correctly with null buffer pointer\n"); + } + if (cnv2) { + ucnv_close(cnv2); + } + + err = U_ZERO_ERROR; - /* Null converter - return NULL & set U_ILLEGAL_ARGUMENT_ERROR */ - if (0 != ucnv_safeClone(0, buffer[0], &bufferSize, &err) || err != U_ILLEGAL_ARGUMENT_ERROR) - { - log_err("FAIL: Cloned converter failed to deal correctly with null converter pointer\n"); + /* Null converter - return NULL & set U_ILLEGAL_ARGUMENT_ERROR */ + if (NULL != ucnv_safeClone(NULL, buffer[0], &bufferSize, &err) || err != U_ILLEGAL_ARGUMENT_ERROR) + { + log_err("FAIL: Cloned converter failed to deal correctly with null converter pointer\n"); + } + + ucnv_close(cnv); } - err = U_ZERO_ERROR; + maxBufferSize = 0; + maxName = ""; /* Do these cloned converters work at all - shuffle UChars to chars & back again..*/ - for (index = 0; index < CLONETEST_CONVERTER_COUNT; index++) - { - bufferSize = U_CNV_SAFECLONE_BUFFERSIZE; - someClonedConverters[index] = ucnv_safeClone(someConverters[index], buffer[index], &bufferSize, &err); + for(j = 0; j < UPRV_LENGTHOF(bufferSizes); ++j) { + for (idx = 0; idx < UPRV_LENGTHOF(names); idx++) + { + err = U_ZERO_ERROR; + cnv = ucnv_open(names[idx], &err); + if(U_FAILURE(err)) { + log_data_err("ucnv_open(\"%s\") failed - %s\n", names[idx], u_errorName(err)); + continue; + } - /* close the original immediately to make sure that the clone works by itself */ - ucnv_close(someConverters[index]); + if(j == 0) { + /* preflight to get maxBufferSize */ + actualSizes[idx] = 0; + ucnv_safeClone(cnv, NULL, &actualSizes[idx], &err); + if(actualSizes[idx] > maxBufferSize) { + maxBufferSize = actualSizes[idx]; + maxName = names[idx]; + } + } - pCharBuffer = charBuffer; - pUniBuffer = uniBuffer; + memset(buffer, 0xaa, sizeof(buffer)); - ucnv_fromUnicode(someClonedConverters[index], - &pCharBuffer, - charBufferLimit, - &pUniBuffer, - uniBufferLimit, - NULL, - TRUE, - &err); - if(U_FAILURE(err)){ - log_err("FAIL: cloned converter failed to do fromU conversion. Error: %s\n",u_errorName(err)); - } - ucnv_toUnicode(someClonedConverters[index], - &pUCharTarget, - pUCharTargetLimit, - &pCharSource, - pCharSourceLimit, - NULL, - TRUE, - &err - ); + bufferSize = bufferSizes[j]; + cnv2 = ucnv_safeClone(cnv, buffer[1], &bufferSize, &err); - if(U_FAILURE(err)){ - log_err("FAIL: cloned converter failed to do toU conversion. Error: %s\n",u_errorName(err)); - } + /* close the original immediately to make sure that the clone works by itself */ + ucnv_close(cnv); - pConstCharBuffer = charBuffer; - if (uniBuffer [0] != ucnv_getNextUChar(someClonedConverters[index], &pConstCharBuffer, pCharBuffer, &err)) - { - log_err("FAIL: Cloned converter failed to do conversion. Error: %s\n",u_errorName(err)); + if( actualSizes[idx] <= (bufferSizes[j] - (int32_t)sizeof(UAlignedMemory)) && + err == U_SAFECLONE_ALLOCATED_WARNING + ) { + log_err("ucnv_safeClone(%s) did a heap clone although the buffer was large enough\n", names[idx]); + } + + /* check if the clone function overwrote any bytes that it is not supposed to touch */ + if(bufferSize <= bufferSizes[j]) { + /* used the stack buffer */ + if( containsAnyOtherByte(buffer[0], (int32_t)sizeof(buffer[0]), 0xaa) || + containsAnyOtherByte(buffer[1]+bufferSize, (int32_t)(sizeof(buffer)-(sizeof(buffer[0])+bufferSize)), 0xaa) + ) { + log_err("cloning %s in a stack buffer overwrote bytes outside the bufferSize %d (requested %d)\n", + names[idx], bufferSize, bufferSizes[j]); + } + } else { + /* heap-allocated the clone */ + if(containsAnyOtherByte(buffer[0], (int32_t)sizeof(buffer), 0xaa)) { + log_err("cloning %s used the heap (bufferSize %d, requested %d) but overwrote stack buffer bytes\n", + names[idx], bufferSize, bufferSizes[j]); + } + } + + pCharBuffer = charBuffer; + pUniBuffer = uniBuffer; + + ucnv_fromUnicode(cnv2, + &pCharBuffer, + charBufferLimit, + &pUniBuffer, + uniBufferLimit, + NULL, + TRUE, + &err); + if(U_FAILURE(err)){ + log_err("FAIL: cloned converter failed to do fromU conversion. Error: %s\n",u_errorName(err)); + } + ucnv_toUnicode(cnv2, + &pUCharTarget, + pUCharTargetLimit, + &pCharSource, + pCharSourceLimit, + NULL, + TRUE, + &err + ); + + if(U_FAILURE(err)){ + log_err("FAIL: cloned converter failed to do toU conversion. Error: %s\n",u_errorName(err)); + } + + pConstCharBuffer = charBuffer; + if (uniBuffer [0] != ucnv_getNextUChar(cnv2, &pConstCharBuffer, pCharBuffer, &err)) + { + log_err("FAIL: Cloned converter failed to do conversion. Error: %s\n",u_errorName(err)); + } + ucnv_close(cnv2); } - ucnv_close(someClonedConverters[index]); + } + + log_verbose("ucnv_safeClone(): sizeof(UConverter)=%lu max preflighted clone size=%d (%s) U_CNV_SAFECLONE_BUFFERSIZE=%d\n", + sizeof(UConverter), maxBufferSize, maxName, (int)U_CNV_SAFECLONE_BUFFERSIZE); + if(maxBufferSize > U_CNV_SAFECLONE_BUFFERSIZE) { + log_err("ucnv_safeClone(): max preflighted clone size=%d (%s) is larger than U_CNV_SAFECLONE_BUFFERSIZE=%d\n", + maxBufferSize, maxName, (int)U_CNV_SAFECLONE_BUFFERSIZE); } } static void TestCCSID() { +#if !UCONFIG_NO_LEGACY_CONVERSION UConverter *cnv; UErrorCode errorCode; - int32_t ccsids[]={ 37, 850, 943, 949, 950, 1047, 1252, 33722 }; + int32_t ccsids[]={ 37, 850, 943, 949, 950, 1047, 1252, 1392, 33722 }; int32_t i, ccsid; - for(i=0; i<(int32_t)(sizeof(ccsids)/sizeof(int32_t)); ++i) { + for(i=0; iiso-8859-1: got preflighting size %d instead of 10\n", size); } +#if !UCONFIG_ONLY_HTML_CONVERSION err = U_ZERO_ERROR; /* do the conversion */ size = ucnv_convert("UTF-32BE", /* out */ @@ -1876,6 +2066,7 @@ static void bug2() /* bug2: size is 5, should be 12 */ log_err("error j932 bug 2 UTF-32BE->UTF-8: got preflighting size %d instead of 12\n", size); } +#endif } /* @@ -1884,8 +2075,9 @@ static void bug2() */ static void bug3() { - static char char_in[CHUNK_SIZE*4]; - static char target[5]; +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION + char char_in[CHUNK_SIZE*4]; + char target[5]; UErrorCode err = U_ZERO_ERROR; int32_t size; @@ -1938,6 +2130,7 @@ static void bug3() */ log_data_err("error j932 bug 3b: expected 0x%04x, got 0x%04x\n", sizeof(char_in) * 2, size); } +#endif } static void @@ -1993,6 +2186,11 @@ convertExStreaming(UConverter *srcCnv, UConverter *targetCnv, pivotBuffer, &pivotSource, &pivotTarget, pivotLimit, FALSE, flush, &errorCode); targetLength=(int32_t)(target-targetBuffer); + if(target>targetLimit) { + log_err("ucnv_convertEx(%s) chunk[%d] target %p exceeds targetLimit %p\n", + testName, chunkSize, target, targetLimit); + break; /* TODO: major problem! */ + } if(errorCode==U_BUFFER_OVERFLOW_ERROR) { /* continue converting another chunk */ errorCode=U_ZERO_ERROR; @@ -2046,6 +2244,7 @@ convertExMultiStreaming(UConverter *srcCnv, UConverter *targetCnv, } static void TestConvertEx() { +#if !UCONFIG_NO_LEGACY_CONVERSION static const uint8_t utf8[]={ /* 4e00 30a1 ff61 0410 */ @@ -2059,7 +2258,7 @@ static void TestConvertEx() { * expected output when converting shiftJIS[] from UTF-8 to Shift-JIS: * SUB, SUB, 0x40, SUB, SUB, 0x40 */ - 0x81, 0xa1, 0x81, 0xa1, 0x40, 0x81, 0xa1, 0x81, 0xa1, 0x40 + 0xfc, 0xfc, 0xfc, 0xfc, 0x40, 0xfc, 0xfc, 0xfc, 0xfc, 0x40 }; char srcBuffer[100], targetBuffer[100]; @@ -2174,50 +2373,435 @@ static void TestConvertEx() { log_err("ucnv_convertEx(*source==NULL) sets %s\n", u_errorName(errorCode)); } + /* streaming conversion without a pivot buffer */ + errorCode=U_ZERO_ERROR; + src=srcBuffer; + pivotSource=pivotBuffer; + ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, + NULL, &pivotSource, &pivotTarget, pivotBuffer+1, TRUE, FALSE, &errorCode); + if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { + log_err("ucnv_convertEx(pivotStart==NULL) sets %s\n", u_errorName(errorCode)); + } + ucnv_close(cnv1); ucnv_close(cnv2); +#endif } -static void -TestConvertAlgorithmic() { - static const uint8_t - utf8[]={ - /* 4e00 30a1 ff61 0410 */ - 0xe4, 0xb8, 0x80, 0xe3, 0x82, 0xa1, 0xef, 0xbd, 0xa1, 0xd0, 0x90 - }, - shiftJIS[]={ - 0x88, 0xea, 0x83, 0x40, 0xa1, 0x84, 0x40 - }, - /*errorTarget[]={*/ - /* - * expected output when converting shiftJIS[] from UTF-8 to Shift-JIS: - * SUB, SUB, 0x40, SUB, SUB, 0x40 - */ - /* 0x81, 0xa1, 0x81, 0xa1, 0x40, 0x81, 0xa1, 0x81, 0xa1, 0x40*/ - /*},*/ - utf16[]={ - 0xfe, 0xff /* BOM only, no text */ - }, - utf32[]={ - 0xff, 0xfe, 0, 0 /* BOM only, no text */ - }; - - char target[100], utf8NUL[100], shiftJISNUL[100]; +/* Test illegal UTF-8 input: Data and functions for TestConvertExFromUTF8(). */ +static const char *const badUTF8[]={ + /* trail byte */ + "\x80", + + /* truncated multi-byte sequences */ + "\xd0", + "\xe0", + "\xe1", + "\xed", + "\xee", + "\xf0", + "\xf1", + "\xf4", + "\xf8", + "\xfc", + + "\xe0\x80", + "\xe0\xa0", + "\xe1\x80", + "\xed\x80", + "\xed\xa0", + "\xee\x80", + "\xf0\x80", + "\xf0\x90", + "\xf1\x80", + "\xf4\x80", + "\xf4\x90", + "\xf8\x80", + "\xfc\x80", + + "\xf0\x80\x80", + "\xf0\x90\x80", + "\xf1\x80\x80", + "\xf4\x80\x80", + "\xf4\x90\x80", + "\xf8\x80\x80", + "\xfc\x80\x80", + + "\xf8\x80\x80\x80", + "\xfc\x80\x80\x80", + + "\xfc\x80\x80\x80\x80", + + /* complete sequences but non-shortest forms or out of range etc. */ + "\xc0\x80", + "\xe0\x80\x80", + "\xed\xa0\x80", + "\xf0\x80\x80\x80", + "\xf4\x90\x80\x80", + "\xf8\x80\x80\x80\x80", + "\xfc\x80\x80\x80\x80\x80", + "\xfe", + "\xff" +}; + +#define ARG_CHAR_ARR_SIZE 8 + +/* get some character that can be converted and convert it */ +static UBool getTestChar(UConverter *cnv, const char *converterName, + char charUTF8[4], int32_t *pCharUTF8Length, + char char0[ARG_CHAR_ARR_SIZE], int32_t *pChar0Length, + char char1[ARG_CHAR_ARR_SIZE], int32_t *pChar1Length) { + UChar utf16[U16_MAX_LENGTH]; + int32_t utf16Length; + + const UChar *utf16Source; + char *target; - UConverter *cnv; + USet *set; + UChar32 c; UErrorCode errorCode; - int32_t length; - errorCode=U_ZERO_ERROR; - cnv=ucnv_open("Shift-JIS", &errorCode); + set=uset_open(1, 0); + ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode); + c=uset_charAt(set, uset_size(set)/2); + uset_close(set); + + utf16Length=0; + U16_APPEND_UNSAFE(utf16, utf16Length, c); + *pCharUTF8Length=0; + U8_APPEND_UNSAFE(charUTF8, *pCharUTF8Length, c); + + utf16Source=utf16; + target=char0; + ucnv_fromUnicode(cnv, + &target, char0+ARG_CHAR_ARR_SIZE, + &utf16Source, utf16+utf16Length, + NULL, FALSE, &errorCode); + *pChar0Length=(int32_t)(target-char0); + + utf16Source=utf16; + target=char1; + ucnv_fromUnicode(cnv, + &target, char1+ARG_CHAR_ARR_SIZE, + &utf16Source, utf16+utf16Length, + NULL, FALSE, &errorCode); + *pChar1Length=(int32_t)(target-char1); + if(U_FAILURE(errorCode)) { - log_data_err("unable to open a Shift-JIS converter - %s\n", u_errorName(errorCode)); - ucnv_close(cnv); - return; + log_err("unable to get test character for %s - %s\n", converterName, u_errorName(errorCode)); + return FALSE; } + return TRUE; +} - memcpy(utf8NUL, utf8, sizeof(utf8)); +static UBool isOneTruncatedUTF8(const char *s, int32_t length) { + if(length==0) { + return FALSE; + } else if(length==1) { + return U8_IS_LEAD(s[0]); + } else { + int32_t count=U8_COUNT_TRAIL_BYTES(s[0]); + if(length<=count) { + // 2 or more bytes, but fewer than the lead byte indicates. + int32_t oneLength=0; + U8_FWD_1(s, oneLength, length); + // Truncated if we reach the end of the string. + // Not true if the lead byte and first trail byte do not start a valid sequence, + // e.g., E0 80 -> oneLength=1. + return oneLength==length; + } + return FALSE; + } +} + +static void testFromTruncatedUTF8(UConverter *utf8Cnv, UConverter *cnv, const char *converterName, + char charUTF8[4], int32_t charUTF8Length, + char char0[8], int32_t char0Length, + char char1[8], int32_t char1Length) { + char utf8[16]; + int32_t utf8Length; + + char output[16]; + int32_t outputLength; + + char invalidChars[8]; + int8_t invalidLength; + + const char *source; + char *target; + + UChar pivotBuffer[8]; + UChar *pivotSource, *pivotTarget; + + UErrorCode errorCode; + int32_t i; + + /* test truncated sequences */ + errorCode=U_ZERO_ERROR; + ucnv_setToUCallBack(utf8Cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); + + memcpy(utf8, charUTF8, charUTF8Length); + + for(i=0; i %s/decimal NCRs) failed\n", converterNames[i]); + } + ucnv_close(cnv); + } + ucnv_close(utf8Cnv); +} + +static void +TestConvertAlgorithmic() { +#if !UCONFIG_NO_LEGACY_CONVERSION + static const uint8_t + utf8[]={ + /* 4e00 30a1 ff61 0410 */ + 0xe4, 0xb8, 0x80, 0xe3, 0x82, 0xa1, 0xef, 0xbd, 0xa1, 0xd0, 0x90 + }, + shiftJIS[]={ + 0x88, 0xea, 0x83, 0x40, 0xa1, 0x84, 0x40 + }, + /*errorTarget[]={*/ + /* + * expected output when converting shiftJIS[] from UTF-8 to Shift-JIS: + * SUB, SUB, 0x40, SUB, SUB, 0x40 + */ + /* 0x81, 0xa1, 0x81, 0xa1, 0x40, 0x81, 0xa1, 0x81, 0xa1, 0x40*/ + /*},*/ + utf16[]={ + 0xfe, 0xff /* BOM only, no text */ + }; +#if !UCONFIG_ONLY_HTML_CONVERSION + static const uint8_t utf32[]={ + 0xff, 0xfe, 0, 0 /* BOM only, no text */ + }; +#endif + + char target[100], utf8NUL[100], shiftJISNUL[100]; + + UConverter *cnv; + UErrorCode errorCode; + + int32_t length; + + errorCode=U_ZERO_ERROR; + cnv=ucnv_open("Shift-JIS", &errorCode); + if(U_FAILURE(errorCode)) { + log_data_err("unable to open a Shift-JIS converter - %s\n", u_errorName(errorCode)); + ucnv_close(cnv); + return; + } + + memcpy(utf8NUL, utf8, sizeof(utf8)); utf8NUL[sizeof(utf8)]=0; memcpy(shiftJISNUL, shiftJIS, sizeof(shiftJIS)); shiftJISNUL[sizeof(shiftJIS)]=0; @@ -2270,6 +2854,7 @@ TestConvertAlgorithmic() { u_errorName(errorCode), length); } +#if !UCONFIG_ONLY_HTML_CONVERSION errorCode=U_ZERO_ERROR; length=ucnv_fromAlgorithmic(cnv, UCNV_UTF32, target, 0, (const char *)utf32, 4, &errorCode); if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || @@ -2278,6 +2863,7 @@ TestConvertAlgorithmic() { log_err("ucnv_fromAlgorithmic(UTF-32 only BOM -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect 0\n", u_errorName(errorCode), length); } +#endif /* bad arguments */ errorCode=U_MESSAGE_PARSE_ERROR; @@ -2300,42 +2886,80 @@ TestConvertAlgorithmic() { log_err("ucnv_fromAlgorithmic(illegal alg. type) sets %s\n", u_errorName(errorCode)); } ucnv_close(cnv); +#endif } +#if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION static void TestLMBCSMaxChar(void) { static const struct { int8_t maxSize; const char *name; } converter[] = { - { 2, "LMBCS-1"}, - { 2, "LMBCS-2"}, - { 2, "LMBCS-3"}, - { 2, "LMBCS-4"}, - { 2, "LMBCS-5"}, - { 2, "LMBCS-6"}, - { 2, "LMBCS-8"}, - { 2, "LMBCS-11"}, - { 2, "LMBCS-16"}, - { 2, "LMBCS-17"}, - { 2, "LMBCS-18"}, - { 2, "LMBCS-19"} + /* some non-LMBCS converters - perfect test setup here */ + { 1, "US-ASCII"}, + { 1, "ISO-8859-1"}, + + { 2, "UTF-16"}, + { 2, "UTF-16BE"}, + { 3, "UTF-8"}, + { 3, "CESU-8"}, + { 3, "SCSU"}, + { 4, "UTF-32"}, + { 4, "UTF-7"}, + { 4, "IMAP-mailbox-name"}, + { 4, "BOCU-1"}, + + { 1, "windows-1256"}, + { 2, "Shift-JIS"}, + { 2, "ibm-16684"}, + { 3, "ibm-930"}, + { 3, "ibm-1390"}, + { 4, "*test3"}, + { 16,"*test4"}, + + { 4, "ISCII"}, + { 4, "HZ"}, + + { 3, "ISO-2022"}, + { 8, "ISO-2022-KR"}, + { 6, "ISO-2022-JP"}, + { 8, "ISO-2022-CN"}, + + /* LMBCS */ + { 3, "LMBCS-1"}, + { 3, "LMBCS-2"}, + { 3, "LMBCS-3"}, + { 3, "LMBCS-4"}, + { 3, "LMBCS-5"}, + { 3, "LMBCS-6"}, + { 3, "LMBCS-8"}, + { 3, "LMBCS-11"}, + { 3, "LMBCS-16"}, + { 3, "LMBCS-17"}, + { 3, "LMBCS-18"}, + { 3, "LMBCS-19"} }; int32_t idx; - for (idx = 0; idx < LENGTHOF(converter); idx++) { + for (idx = 0; idx < UPRV_LENGTHOF(converter); idx++) { UErrorCode status = U_ZERO_ERROR; - UConverter *cnv = ucnv_open(converter[idx].name, &status); + UConverter *cnv = cnv_open(converter[idx].name, &status); if (U_FAILURE(status)) { continue; } if (converter[idx].maxSize != ucnv_getMaxCharSize(cnv)) { - log_data_err("error: for %s expected %d, got %d\n", + log_err("error: ucnv_getMaxCharSize(%s) expected %d, got %d\n", converter[idx].name, converter[idx].maxSize, ucnv_getMaxCharSize(cnv)); } ucnv_close(cnv); } -} + /* mostly test that the macro compiles */ + if(UCNV_GET_MAX_BYTES_FOR_STRING(1, 2)<10) { + log_err("error UCNV_GET_MAX_BYTES_FOR_STRING(1, 2)<10\n"); + } +} +#endif static void TestJ1968(void) { UErrorCode err = U_ZERO_ERROR; @@ -2423,6 +3047,7 @@ static void TestJ1968(void) { } +#if !UCONFIG_NO_LEGACY_CONVERSION static void testSwap(const char *name, UBool swap) { /* @@ -2478,12 +3103,12 @@ testSwap(const char *name, UBool swap) { /* convert to EBCDIC */ pcu=text; pc=normal; - ucnv_fromUnicode(cnv, &pc, normal+LENGTHOF(normal), &pcu, text+LENGTHOF(text), NULL, TRUE, &errorCode); + ucnv_fromUnicode(cnv, &pc, normal+UPRV_LENGTHOF(normal), &pcu, text+UPRV_LENGTHOF(text), NULL, TRUE, &errorCode); normalLength=(int32_t)(pc-normal); pcu=text; pc=swapped; - ucnv_fromUnicode(swapCnv, &pc, swapped+LENGTHOF(swapped), &pcu, text+LENGTHOF(text), NULL, TRUE, &errorCode); + ucnv_fromUnicode(swapCnv, &pc, swapped+UPRV_LENGTHOF(swapped), &pcu, text+UPRV_LENGTHOF(text), NULL, TRUE, &errorCode); swappedLength=(int32_t)(pc-swapped); if(U_FAILURE(errorCode)) { @@ -2516,12 +3141,12 @@ testSwap(const char *name, UBool swap) { /* convert back to Unicode (may not roundtrip) */ pc=normal; pu=uNormal; - ucnv_toUnicode(cnv, &pu, uNormal+LENGTHOF(uNormal), (const char **)&pc, normal+normalLength, NULL, TRUE, &errorCode); + ucnv_toUnicode(cnv, &pu, uNormal+UPRV_LENGTHOF(uNormal), (const char **)&pc, normal+normalLength, NULL, TRUE, &errorCode); normalLength=(int32_t)(pu-uNormal); pc=normal; pu=uSwapped; - ucnv_toUnicode(swapCnv, &pu, uSwapped+LENGTHOF(uSwapped), (const char **)&pc, normal+swappedLength, NULL, TRUE, &errorCode); + ucnv_toUnicode(swapCnv, &pu, uSwapped+UPRV_LENGTHOF(uSwapped), (const char **)&pc, normal+swappedLength, NULL, TRUE, &errorCode); swappedLength=(int32_t)(pu-uSwapped); if(U_FAILURE(errorCode)) { @@ -2572,10 +3197,544 @@ TestEBCDICSwapLFNL() { int i; - for(i=0; i x ( \x07 |0) + \U00101234\U00050005 -> y (+ \x07+\x00+\x01\x02\x0e+\x05 |0) + \U00101234\U00050005\U00060006 -> z (++ \x07+\x00+\x01\x02\x0f+\x09 |0) + \U00060007 -> unassigned + */ + static const UChar head[] = {0xDBC4,0xDE34,0xD900,0xDC05,0x0000};/* \U00101234\U00050005 */ + static const UChar middle[] = {0xD940,0x0000}; /* first half of \U00060006 or \U00060007 */ + static const UChar tail[] = {0xDC07,0x0000};/* second half of \U00060007 */ + char tgt[10]; + char* target = tgt; + char* targetLimit = target + 2; /* expect overflow from converting \U00101234\U00050005 */ + const UChar* source = head; + const UChar* sourceLimit = source + u_strlen(head); + int32_t len = 0; + ucnv_reset(cnv); + ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); + len = ucnv_fromUCountPending(cnv, &status); + if(U_FAILURE(status)){ + log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status)); + status = U_ZERO_ERROR; + } + if(len!=4){ + log_err("ucnv_fromUInputHeld did not return correct length for head\n"); + } + source = middle; + sourceLimit = source + u_strlen(middle); + ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); + len = ucnv_fromUCountPending(cnv, &status); + if(U_FAILURE(status)){ + log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status)); + status = U_ZERO_ERROR; + } + if(len!=5){ + log_err("ucnv_fromUInputHeld did not return correct length for middle\n"); + } + source = tail; + sourceLimit = source + u_strlen(tail); + ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); + if(status != U_BUFFER_OVERFLOW_ERROR){ + log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status)); + } + status = U_ZERO_ERROR; + len = ucnv_fromUCountPending(cnv, &status); + /* middle[1] is pending, tail has not been consumed */ + if(U_FAILURE(status)){ + log_err("ucnv_fromUInputHeld call did not succeed. Error: %s\n", u_errorName(status)); + } + if(len!=1){ + log_err("ucnv_fromUInputHeld did not return correct length for tail\n"); + } + } + ucnv_close(cnv); +#endif +} +static void +TestToUCountPending(){ +#if !UCONFIG_NO_LEGACY_CONVERSION + UErrorCode status = U_ZERO_ERROR; + static const struct { + char input[6]; + int32_t len; + int32_t exp; + }toUnicodeTests[] = { + /*m:n conversion*/ + {{0x05, 0x01, 0x02},3,3}, + {{0x01, 0x02},2,2}, + {{0x07, 0x00, 0x01, 0x02},4,4}, + }; + int i; + UConverterToUCallback *oldToUAction= NULL; + UConverter* cnv = ucnv_openPackage(loadTestData(&status), "test3", &status); + if(U_FAILURE(status)){ + log_data_err("Could not create converter for test3. Error: %s\n", u_errorName(status)); + return; + } + ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, oldToUAction, NULL, &status); + for(i=0; i x ( \x01\x02\x03\x0a |0) + 0x01, 0x02, 0x03, 0x0b -> y ( \x01\x02\x03\x0b |0) + 0x01, 0x02, 0x03, 0x0d -> z ( \x01\x02\x03\x0d |3) + 0x01, 0x02, 0x03, 0x0a + 0x01, 0x02, 0x03, 0x0b + 0x01 + many more -> z (see test4 "many bytes, and bytes per UChar") + */ + UChar tgt[10]; + UChar* target = tgt; + UChar* targetLimit = target + 1; /* expect overflow from converting */ + const char* source = head; + const char* sourceLimit = source + strlen(head); + int32_t len = 0; + cnv = ucnv_openPackage(loadTestData(&status), "test4", &status); + if(U_FAILURE(status)){ + log_err("Could not create converter for test3. Error: %s\n", u_errorName(status)); + return; + } + ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, oldToUAction, NULL, &status); + ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); + len = ucnv_toUCountPending(cnv,&status); + if(U_FAILURE(status)){ + log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status)); + } + if(len != 4){ + log_err("Did not get the expected len for head.\n"); + } + source=mid; + sourceLimit = source+strlen(mid); + ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); + len = ucnv_toUCountPending(cnv,&status); + if(U_FAILURE(status)){ + log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status)); + } + if(len != 8){ + log_err("Did not get the expected len for mid.\n"); + } + + source=tail; + sourceLimit = source+strlen(tail); + targetLimit = target; + ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); + if(status != U_BUFFER_OVERFLOW_ERROR){ + log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status)); + } + status = U_ZERO_ERROR; + len = ucnv_toUCountPending(cnv,&status); + /* mid[4] is pending, tail has not been consumed */ + if(U_FAILURE(status)){ + log_err("ucnv_toUCountPending call did not succeed. Error: %s\n", u_errorName(status)); + } + if(len != 4){ + log_err("Did not get the expected len for tail.\n"); + } + ucnv_close(cnv); + } +#endif +} +static void TestOneDefaultNameChange(const char *name, const char *expected) { + UErrorCode status = U_ZERO_ERROR; + UConverter *cnv; + ucnv_setDefaultName(name); + if(strcmp(ucnv_getDefaultName(), expected)==0) + log_verbose("setDefaultName of %s works.\n", name); + else + log_err("setDefaultName of %s failed\n", name); + cnv=ucnv_open(NULL, &status); + if (U_FAILURE(status) || cnv == NULL) { + log_err("opening the default converter of %s failed\n", name); + return; + } + if(strcmp(ucnv_getName(cnv, &status), expected)==0) + log_verbose("ucnv_getName of %s works.\n", name); + else + log_err("ucnv_getName of %s failed\n", name); + ucnv_close(cnv); +} + +static void TestDefaultName(void) { + /*Testing ucnv_getDefaultName() and ucnv_setDefaultNAme()*/ + static char defaultName[UCNV_MAX_CONVERTER_NAME_LENGTH + 1]; + strcpy(defaultName, ucnv_getDefaultName()); + + log_verbose("getDefaultName returned %s\n", defaultName); + + /*change the default name by setting it */ + TestOneDefaultNameChange("UTF-8", "UTF-8"); +#if U_CHARSET_IS_UTF8 + TestOneDefaultNameChange("ISCII,version=1", "UTF-8"); + TestOneDefaultNameChange("ISCII,version=2", "UTF-8"); + TestOneDefaultNameChange("ISO-8859-1", "UTF-8"); +#else +# if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION + TestOneDefaultNameChange("ISCII,version=1", "ISCII,version=1"); + TestOneDefaultNameChange("ISCII,version=2", "ISCII,version=2"); +# endif + TestOneDefaultNameChange("ISO-8859-1", "ISO-8859-1"); +#endif + + /*set the default name back*/ + ucnv_setDefaultName(defaultName); +} + +/* Test that ucnv_compareNames() matches names according to spec. ----------- */ + +static int +sign(int n) { + if(n==0) { + return 0; + } else if(n<0) { + return -1; + } else /* n>0 */ { + return 1; + } +} + +static void +compareNames(const char **names) { + const char *relation, *name1, *name2; + int rel, result; + + relation=*names++; + if(*relation=='=') { + rel = 0; + } else if(*relation=='<') { + rel = -1; + } else { + rel = 1; + } + + name1=*names++; + if(name1==NULL) { + return; + } + while((name2=*names++)!=NULL) { + result=ucnv_compareNames(name1, name2); + if(sign(result)!=rel) { + log_err("ucnv_compareNames(\"%s\", \"%s\")=%d, sign!=%d\n", name1, name2, result, rel); + } + name1=name2; + } +} + +static void +TestCompareNames() { + static const char *equalUTF8[]={ "=", "UTF-8", "utf_8", "u*T@f08", "Utf 8", NULL }; + static const char *equalIBM[]={ "=", "ibm-37", "IBM037", "i-B-m 00037", "ibm-0037", "IBM00037", NULL }; + static const char *lessMac[]={ "<", "macos-0_1-10.2", "macos-1-10.0.2", "macos-1-10.2", NULL }; + static const char *lessUTF080[]={ "<", "UTF-0008", "utf$080", "u*T@f0800", "Utf 0000000009", NULL }; + + compareNames(equalUTF8); + compareNames(equalIBM); + compareNames(lessMac); + compareNames(lessUTF080); +} + +static void +TestSubstString() { + static const UChar surrogate[1]={ 0xd900 }; + char buffer[16]; + + static const UChar sub[5]={ 0x61, 0x62, 0x63, 0x64, 0x65 }; + static const char subChars[5]={ 0x61, 0x62, 0x63, 0x64, 0x65 }; + UConverter *cnv; + UErrorCode errorCode; + int32_t length; + int8_t len8; + + /* UTF-16/32: test that the BOM is output before the sub character */ + errorCode=U_ZERO_ERROR; + cnv=ucnv_open("UTF-16", &errorCode); + if(U_FAILURE(errorCode)) { + log_data_err("ucnv_open(UTF-16) failed - %s\n", u_errorName(errorCode)); + return; + } + length=ucnv_fromUChars(cnv, buffer, (int32_t)sizeof(buffer), surrogate, 1, &errorCode); + ucnv_close(cnv); + if(U_FAILURE(errorCode) || + length!=4 || + NULL == ucnv_detectUnicodeSignature(buffer, length, NULL, &errorCode) + ) { + log_err("ucnv_fromUChars(UTF-16, U+D900) did not write a BOM\n"); + } + + errorCode=U_ZERO_ERROR; + cnv=ucnv_open("UTF-32", &errorCode); + if(U_FAILURE(errorCode)) { + log_data_err("ucnv_open(UTF-32) failed - %s\n", u_errorName(errorCode)); + return; + } + length=ucnv_fromUChars(cnv, buffer, (int32_t)sizeof(buffer), surrogate, 1, &errorCode); + ucnv_close(cnv); + if(U_FAILURE(errorCode) || + length!=8 || + NULL == ucnv_detectUnicodeSignature(buffer, length, NULL, &errorCode) + ) { + log_err("ucnv_fromUChars(UTF-32, U+D900) did not write a BOM\n"); + } + + /* Simple API test of ucnv_setSubstString() + ucnv_getSubstChars(). */ + errorCode=U_ZERO_ERROR; + cnv=ucnv_open("ISO-8859-1", &errorCode); + if(U_FAILURE(errorCode)) { + log_data_err("ucnv_open(ISO-8859-1) failed - %s\n", u_errorName(errorCode)); + return; + } + ucnv_setSubstString(cnv, sub, UPRV_LENGTHOF(sub), &errorCode); + if(U_FAILURE(errorCode)) { + log_err("ucnv_setSubstString(ISO-8859-1, sub[5]) failed - %s\n", u_errorName(errorCode)); + } else { + len8 = sizeof(buffer); + ucnv_getSubstChars(cnv, buffer, &len8, &errorCode); + /* Stateless converter, we expect the string converted to charset bytes. */ + if(U_FAILURE(errorCode) || len8!=sizeof(subChars) || 0!=uprv_memcmp(buffer, subChars, len8)) { + log_err("ucnv_getSubstChars(ucnv_setSubstString(ISO-8859-1, sub[5])) failed - %s\n", u_errorName(errorCode)); + } + } + ucnv_close(cnv); + +#if !UCONFIG_NO_LEGACY_CONVERSION + errorCode=U_ZERO_ERROR; + cnv=ucnv_open("HZ", &errorCode); + if(U_FAILURE(errorCode)) { + log_data_err("ucnv_open(HZ) failed - %s\n", u_errorName(errorCode)); + return; + } + ucnv_setSubstString(cnv, sub, UPRV_LENGTHOF(sub), &errorCode); + if(U_FAILURE(errorCode)) { + log_err("ucnv_setSubstString(HZ, sub[5]) failed - %s\n", u_errorName(errorCode)); + } else { + len8 = sizeof(buffer); + ucnv_getSubstChars(cnv, buffer, &len8, &errorCode); + /* Stateful converter, we expect that the Unicode string was set and that we get an empty char * string now. */ + if(U_FAILURE(errorCode) || len8!=0) { + log_err("ucnv_getSubstChars(ucnv_setSubstString(HZ, sub[5])) failed - %s\n", u_errorName(errorCode)); + } + } + ucnv_close(cnv); +#endif + /* + * Further testing of ucnv_setSubstString() is done via intltest convert. + * We do not test edge cases of illegal arguments and similar because the + * function implementation uses all of its parameters in calls to other + * functions with UErrorCode parameters. + */ +} + +static void +InvalidArguments() { + UConverter *cnv; + UErrorCode errorCode; + char charBuffer[2] = {1, 1}; + char ucharAsCharBuffer[2] = {2, 2}; + char *charsPtr = charBuffer; + UChar *ucharsPtr = (UChar *)ucharAsCharBuffer; + UChar *ucharsBadPtr = (UChar *)(ucharAsCharBuffer + 1); + + errorCode=U_ZERO_ERROR; + cnv=ucnv_open("UTF-8", &errorCode); + if(U_FAILURE(errorCode)) { + log_err("ucnv_open() failed - %s\n", u_errorName(errorCode)); + return; + } + + errorCode=U_ZERO_ERROR; + /* This one should fail because an incomplete UChar is being passed in */ + ucnv_fromUnicode(cnv, &charsPtr, charsPtr, (const UChar **)&ucharsPtr, ucharsBadPtr, NULL, TRUE, &errorCode); + if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) { + log_err("ucnv_fromUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for incomplete UChar * buffer - %s\n", u_errorName(errorCode)); + } + + errorCode=U_ZERO_ERROR; + /* This one should fail because ucharsBadPtr is > than ucharsPtr */ + ucnv_fromUnicode(cnv, &charsPtr, charsPtr, (const UChar **)&ucharsBadPtr, ucharsPtr, NULL, TRUE, &errorCode); + if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) { + log_err("ucnv_fromUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for bad limit pointer - %s\n", u_errorName(errorCode)); + } + + errorCode=U_ZERO_ERROR; + /* This one should fail because an incomplete UChar is being passed in */ + ucnv_toUnicode(cnv, &ucharsPtr, ucharsBadPtr, (const char **)&charsPtr, charsPtr, NULL, TRUE, &errorCode); + if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) { + log_err("ucnv_toUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for incomplete UChar * buffer - %s\n", u_errorName(errorCode)); + } + + errorCode=U_ZERO_ERROR; + /* This one should fail because ucharsBadPtr is > than ucharsPtr */ + ucnv_toUnicode(cnv, &ucharsBadPtr, ucharsPtr, (const char **)&charsPtr, charsPtr, NULL, TRUE, &errorCode); + if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) { + log_err("ucnv_toUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for bad limit pointer - %s\n", u_errorName(errorCode)); + } + + if (charBuffer[0] != 1 || charBuffer[1] != 1 + || ucharAsCharBuffer[0] != 2 || ucharAsCharBuffer[1] != 2) + { + log_err("Data was incorrectly written to buffers\n"); + } + + ucnv_close(cnv); +} + +static void TestGetName() { + static const char *const names[] = { + "Unicode", "UTF-16", + "UnicodeBigUnmarked", "UTF-16BE", + "UnicodeBig", "UTF-16BE,version=1", + "UnicodeLittleUnmarked", "UTF-16LE", + "UnicodeLittle", "UTF-16LE,version=1", + "x-UTF-16LE-BOM", "UTF-16LE,version=1" + }; + int32_t i; + for(i = 0; i < UPRV_LENGTHOF(names); i += 2) { + UErrorCode errorCode = U_ZERO_ERROR; + UConverter *cnv = ucnv_open(names[i], &errorCode); + if(U_SUCCESS(errorCode)) { + const char *name = ucnv_getName(cnv, &errorCode); + if(U_FAILURE(errorCode) || 0 != strcmp(name, names[i+1])) { + log_err("ucnv_getName(%s) = %s != %s -- %s\n", + names[i], name, names[i+1], u_errorName(errorCode)); + } + ucnv_close(cnv); + } + } +} + +static void TestUTFBOM() { + static const UChar a16[] = { 0x61 }; + static const char *const names[] = { + "UTF-16", + "UTF-16,version=1", + "UTF-16BE", + "UnicodeBig", + "UTF-16LE", + "UnicodeLittle" + }; + static const uint8_t expected[][5] = { +#if U_IS_BIG_ENDIAN + { 4, 0xfe, 0xff, 0, 0x61 }, + { 4, 0xfe, 0xff, 0, 0x61 }, +#else + { 4, 0xff, 0xfe, 0x61, 0 }, + { 4, 0xff, 0xfe, 0x61, 0 }, +#endif + + { 2, 0, 0x61 }, + { 4, 0xfe, 0xff, 0, 0x61 }, + + { 2, 0x61, 0 }, + { 4, 0xff, 0xfe, 0x61, 0 } + }; + + char bytes[10]; + int32_t i; + + for(i = 0; i < UPRV_LENGTHOF(names); ++i) { + UErrorCode errorCode = U_ZERO_ERROR; + UConverter *cnv = ucnv_open(names[i], &errorCode); + int32_t length = 0; + const uint8_t *exp = expected[i]; + if (U_FAILURE(errorCode)) { + log_err_status(errorCode, "Unable to open converter: %s got error code: %s\n", names[i], u_errorName(errorCode)); + continue; + } + length = ucnv_fromUChars(cnv, bytes, (int32_t)sizeof(bytes), a16, 1, &errorCode); + + if(U_FAILURE(errorCode) || length != exp[0] || 0 != memcmp(bytes, exp+1, length)) { + log_err("unexpected %s BOM writing behavior -- %s\n", + names[i], u_errorName(errorCode)); + } + ucnv_close(cnv); + } +}