X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/374ca955a76ecab1204ca8bfa63ff9238d998416..1a147d096ae81f4c8262f7bfc56bd19fc2dee932:/icuSources/test/cintltst/ccapitst.c diff --git a/icuSources/test/cintltst/ccapitst.c b/icuSources/test/cintltst/ccapitst.c index bc08e506..22785b38 100644 --- a/icuSources/test/cintltst/ccapitst.c +++ b/icuSources/test/cintltst/ccapitst.c @@ -1,16 +1,18 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html /******************************************************************** * COPYRIGHT: - * Copyright (c) 1997-2004, International Business Machines Corporation and + * Copyright (c) 1997-2016, International Business Machines Corporation and * others. All Rights Reserved. ********************************************************************/ -/******************************************************************************** +/***************************************************************************** * -* File CU_CAPITST.C +* File ccapitst.c * * Modification History: * Name Description * Madhu Katragadda Ported for C API -********************************************************************************* +****************************************************************************** */ #include #include @@ -20,68 +22,24 @@ #include "unicode/ucnv.h" #include "unicode/ucnv_err.h" #include "unicode/putil.h" +#include "unicode/uset.h" #include "unicode/ustring.h" +#include "unicode/utf8.h" #include "ucnv_bld.h" /* for sizeof(UConverter) */ +#include "cmemory.h" /* for UAlignedMemory */ #include "cintltst.h" #include "ccapitst.h" - -/* for not including "cstring.h" -begin*/ -#ifdef WIN32 -# define ctest_stricmp(str1, str2) U_STANDARD_CPP_NAMESPACE _stricmp(str1, str2) -#elif defined(POSIX) -# define ctest_stricmp(str1, str2) U_STANDARD_CPP_NAMESPACE strcasecmp(str1, str2) -#else -# define ctest_stricmp(str1, str2) T_CString_stricmp(str1, str2) -#endif - -static int U_EXPORT2 -T_CString_stricmp(const char *str1, const char *str2) { - if(str1==NULL) { - if(str2==NULL) { - return 0; - } else { - return -1; - } - } else if(str2==NULL) { - return 1; - } else { - /* compare non-NULL strings lexically with lowercase */ - int rc; - unsigned char c1, c2; - for(;;) { - c1=(unsigned char)*str1; - c2=(unsigned char)*str2; - if(c1==0) { - if(c2==0) { - return 0; - } else { - return -1; - } - } else if(c2==0) { - return 1; - } else { - /* compare non-zero characters with lowercase */ - rc=(int)(unsigned char)tolower(c1)-(int)(unsigned char)tolower(c2); - if(rc!=0) { - return rc; - } - } - ++str1; - ++str2; - } - } -} -/* for not including "cstring.h" -end*/ - -#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) +#include "cstring.h" #define NUM_CODEPAGE 1 #define MAX_FILE_LEN 1024*20 #define UCS_FILE_NAME_SIZE 512 /*returns an action other than the one provided*/ +#if !UCONFIG_NO_LEGACY_CONVERSION static UConverterFromUCallback otherUnicodeAction(UConverterFromUCallback MIA); static UConverterToUCallback otherCharAction(UConverterToUCallback MIA); +#endif static UConverter * cnv_open(const char *name, UErrorCode *pErrorCode) { @@ -99,12 +57,29 @@ static void TestDuplicateAlias(void); static void TestCCSID(void); static void TestJ932(void); static void TestJ1968(void); +#if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION static void TestLMBCSMaxChar(void); +#endif + +#if !UCONFIG_NO_LEGACY_CONVERSION static void TestConvertSafeCloneCallback(void); +#endif + static void TestEBCDICSwapLFNL(void); static void TestConvertEx(void); +static void TestConvertExFromUTF8(void); +static void TestConvertExFromUTF8_C5F0(void); static void TestConvertAlgorithmic(void); void TestDefaultConverterError(void); /* defined in cctest.c */ + void TestDefaultConverterSet(void); /* defined in cctest.c */ +static void TestToUCountPending(void); +static void TestFromUCountPending(void); +static void TestDefaultName(void); +static void TestCompareNames(void); +static void TestSubstString(void); +static void InvalidArguments(void); +static void TestGetName(void); +static void TestUTFBOM(void); void addTestConvert(TestNode** root); @@ -115,16 +90,33 @@ void addTestConvert(TestNode** root) addTest(root, &TestFlushCache, "tsconv/ccapitst/TestFlushCache"); addTest(root, &TestAlias, "tsconv/ccapitst/TestAlias"); addTest(root, &TestDuplicateAlias, "tsconv/ccapitst/TestDuplicateAlias"); - addTest(root, &TestConvertSafeClone, "tsconv/ccapitst/TestConvertSafeClone"); - addTest(root, &TestConvertSafeCloneCallback,"tsconv/ccapitst/TestConvertSafeCloneCallback"); + addTest(root, &TestConvertSafeClone, "tsconv/ccapitst/TestConvertSafeClone"); +#if !UCONFIG_NO_LEGACY_CONVERSION + addTest(root, &TestConvertSafeCloneCallback,"tsconv/ccapitst/TestConvertSafeCloneCallback"); +#endif addTest(root, &TestCCSID, "tsconv/ccapitst/TestCCSID"); addTest(root, &TestJ932, "tsconv/ccapitst/TestJ932"); addTest(root, &TestJ1968, "tsconv/ccapitst/TestJ1968"); +#if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION addTest(root, &TestLMBCSMaxChar, "tsconv/ccapitst/TestLMBCSMaxChar"); +#endif addTest(root, &TestEBCDICSwapLFNL, "tsconv/ccapitst/TestEBCDICSwapLFNL"); addTest(root, &TestConvertEx, "tsconv/ccapitst/TestConvertEx"); + addTest(root, &TestConvertExFromUTF8, "tsconv/ccapitst/TestConvertExFromUTF8"); + addTest(root, &TestConvertExFromUTF8_C5F0, "tsconv/ccapitst/TestConvertExFromUTF8_C5F0"); addTest(root, &TestConvertAlgorithmic, "tsconv/ccapitst/TestConvertAlgorithmic"); addTest(root, &TestDefaultConverterError, "tsconv/ccapitst/TestDefaultConverterError"); + addTest(root, &TestDefaultConverterSet, "tsconv/ccapitst/TestDefaultConverterSet"); +#if !UCONFIG_NO_FILE_IO + addTest(root, &TestToUCountPending, "tsconv/ccapitst/TestToUCountPending"); + addTest(root, &TestFromUCountPending, "tsconv/ccapitst/TestFromUCountPending"); +#endif + addTest(root, &TestDefaultName, "tsconv/ccapitst/TestDefaultName"); + addTest(root, &TestCompareNames, "tsconv/ccapitst/TestCompareNames"); + addTest(root, &TestSubstString, "tsconv/ccapitst/TestSubstString"); + addTest(root, &InvalidArguments, "tsconv/ccapitst/InvalidArguments"); + addTest(root, &TestGetName, "tsconv/ccapitst/TestGetName"); + addTest(root, &TestUTFBOM, "tsconv/ccapitst/TestUTFBOM"); } static void ListNames(void) { @@ -138,7 +130,7 @@ static void ListNames(void) { log_verbose("Testing ucnv_openAllNames()..."); allNamesEnum = ucnv_openAllNames(&err); if(U_FAILURE(err)) { - log_err("FAILURE! ucnv_openAllNames() -> %s\n", myErrorName(err)); + log_data_err("FAILURE! ucnv_openAllNames() -> %s\n", myErrorName(err)); } else { const char *string = NULL; @@ -150,7 +142,10 @@ static void ListNames(void) { count1++; log_verbose("read \"%s\", length %i\n", string, len); } - err = U_ZERO_ERROR; + if (U_FAILURE(err)) { + log_err("FAILURE! uenum_next(allNamesEnum...) set an error: %s\n", u_errorName(err)); + err = U_ZERO_ERROR; + } uenum_reset(allNamesEnum, &err); while ((string = uenum_next(allNamesEnum, &len, &err))) { count2++; @@ -185,7 +180,7 @@ static void ListNames(void) { /* Test ucnv_countAliases() etc. */ count = ucnv_countAliases("utf-8", &err); if(U_FAILURE(err)) { - log_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %s\n", myErrorName(err)); + log_data_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %s\n", myErrorName(err)); } else if(count <= 0) { log_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %d aliases\n", count); } else { @@ -238,6 +233,7 @@ static void ListNames(void) { static void TestConvert() { +#if !UCONFIG_NO_LEGACY_CONVERSION char myptr[4]; char save[4]; int32_t testLong1 = 0; @@ -258,7 +254,6 @@ static void TestConvert() UChar* my_ucs_file_buffer; /* [MAX_FILE_LEN] */ UChar* my_ucs_file_buffer_1; int8_t ii = 0; - int32_t j = 0; uint16_t codepage_index = 0; int32_t cp = 0; UErrorCode err = U_ZERO_ERROR; @@ -420,7 +415,7 @@ static void TestConvert() const uint8_t source[]={ 0x00, 0x04, 0x05, 0x06, 0xa2, 0xb4, 0x00}; const uint8_t expectedTarget[]={ 0x00, 0x37, 0x2d, 0x2e, 0x0e, 0x49, 0x62, 0x0f, 0x00}; char *target=0; - sourceLimit=sizeof(source)/sizeof(source[0]); + sourceLimit=UPRV_LENGTHOF(source); err=U_ZERO_ERROR; targetLimit=0; @@ -455,14 +450,14 @@ static void TestConvert() } err=U_ILLEGAL_ARGUMENT_ERROR; - sourceLimit=sizeof(source)/sizeof(source[0]); + sourceLimit=UPRV_LENGTHOF(source); i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err); if(i !=0 ){ log_err("FAILURE! ucnv_convert() with err=U_ILLEGAL_ARGUMENT_ERROR is expected to return 0\n"); } err=U_ZERO_ERROR; - sourceLimit=sizeof(source)/sizeof(source[0]); + sourceLimit=UPRV_LENGTHOF(source); targetLimit=0; i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err); if(!(U_FAILURE(err) && err==U_BUFFER_OVERFLOW_ERROR)){ @@ -508,23 +503,6 @@ static void TestConvert() log_verbose("getName(someConverters[1]) returned %s\n", ucnv_getName(someConverters[1], &err)); } - /*Testing ucnv_getDefaultName() and ucnv_setDefaultNAme()*/ - { - static char defaultName[UCNV_MAX_CONVERTER_NAME_LENGTH + 1]; - strcpy(defaultName, ucnv_getDefaultName()); - - log_verbose("getDefaultName returned %s\n", defaultName); - - /*change the default name by setting it */ - ucnv_setDefaultName("changed"); - if(strcmp(ucnv_getDefaultName(), "changed")==0) - log_verbose("setDefaultName o.k"); - else - log_err("setDefaultName failed"); - /*set the default name back*/ - ucnv_setDefaultName(defaultName); - } - ucnv_close(someConverters[0]); ucnv_close(someConverters[1]); ucnv_close(someConverters[2]); @@ -561,7 +539,7 @@ static void TestConvert() ucs_file_in = fopen(ucs_file_name,"rb"); if (!ucs_file_in) { - log_err("Couldn't open the Unicode file [%s]... Exiting...\n", ucs_file_name); + log_data_err("Couldn't open the Unicode file [%s]... Exiting...\n", ucs_file_name); return; } @@ -573,8 +551,8 @@ static void TestConvert() if (!myConverter || U_FAILURE(err)) { log_data_err("Error creating the ibm-949 converter - %s \n", u_errorName(err)); - - return; + fclose(ucs_file_in); + break; } /*testing for ucnv_getName() */ @@ -586,7 +564,7 @@ static void TestConvert() { log_verbose("getName o.k. %s\n", ucnv_getName(myConverter, &err)); } - if (ctest_stricmp(ucnv_getName(myConverter, &err), CodePagesToTest[codepage_index])) + if (uprv_stricmp(ucnv_getName(myConverter, &err), CodePagesToTest[codepage_index])) log_err("getName failed\n"); else log_verbose("getName ok\n"); @@ -710,28 +688,37 @@ static void TestConvert() /*getDisplayName*/ log_verbose("\n---Testing ucnv_getDisplayName()...\n"); locale=CodePagesLocale[codepage_index]; - displayname=(UChar*)malloc(1 * sizeof(UChar)); len=0; - disnamelen = ucnv_getDisplayName(myConverter,locale,displayname, len, &err); - if(err==U_BUFFER_OVERFLOW_ERROR) - { + displayname=NULL; + disnamelen = ucnv_getDisplayName(myConverter, locale, displayname, len, &err); + if(err==U_BUFFER_OVERFLOW_ERROR) { err=U_ZERO_ERROR; - displayname=(UChar*)realloc(displayname, (disnamelen+1) * sizeof(UChar)); + displayname=(UChar*)malloc((disnamelen+1) * sizeof(UChar)); ucnv_getDisplayName(myConverter,locale,displayname,disnamelen+1, &err); - if(U_FAILURE(err)) - { - log_err("getDisplayName failed the error is %s\n", myErrorName(err)); + if(U_FAILURE(err)) { + log_err("getDisplayName failed. The error is %s\n", myErrorName(err)); } - else + else { log_verbose(" getDisplayName o.k.\n"); + } + free(displayname); + displayname=NULL; + } + else { + log_err("getDisplayName preflight doesn't work. Error is %s\n", myErrorName(err)); } /*test ucnv_getDiaplayName with error condition*/ - log_verbose("\n---Testing ucnv_getDisplayName()...\n"); err= U_ILLEGAL_ARGUMENT_ERROR; - len=ucnv_getDisplayName(myConverter,locale,displayname,disnamelen+1, &err); + len=ucnv_getDisplayName(myConverter,locale,NULL,0, &err); if( len !=0 ){ log_err("ucnv_getDisplayName() with err != U_ZERO_ERROR is supposed to return 0\n"); } + /*test ucnv_getDiaplayName with error condition*/ + err=U_ZERO_ERROR; + len=ucnv_getDisplayName(NULL,locale,NULL,0, &err); + if( len !=0 || U_SUCCESS(err)){ + log_err("ucnv_getDisplayName(NULL) with cnv == NULL is supposed to return 0\n"); + } err=U_ZERO_ERROR; /* testing ucnv_setFromUCallBack() and ucnv_getFromUCallBack()*/ @@ -851,16 +838,21 @@ static void TestConvert() /*Reads the BOM*/ - fread(&BOM, sizeof(UChar), 1, ucs_file_in); + { + // Note: gcc produces a compile warning if the return value from fread() is ignored. + size_t numRead = fread(&BOM, sizeof(UChar), 1, ucs_file_in); + (void)numRead; + } if (BOM!=0xFEFF && BOM!=0xFFFE) { log_err("File Missing BOM...Bailing!\n"); - return; + fclose(ucs_file_in); + break; } /*Reads in the file*/ - while(!feof(ucs_file_in)&&(i+=fread(ucs_file_buffer+i, sizeof(UChar), 1, ucs_file_in))) + while(!feof(ucs_file_in)&&(i+=(int32_t)fread(ucs_file_buffer+i, sizeof(UChar), 1, ucs_file_in))) { myUChar = ucs_file_buffer[i-1]; @@ -903,7 +895,7 @@ static void TestConvert() NULL, targetcapacity2, output_cp_buffer, - strlen(output_cp_buffer), + (int32_t)strlen(output_cp_buffer), &err); /*if there is an buffer overflow then trap the values and pass them and make the actual call*/ @@ -915,7 +907,7 @@ static void TestConvert() uchar2, targetsize+1, output_cp_buffer, - strlen(output_cp_buffer), + (int32_t)strlen(output_cp_buffer), &err); if(U_FAILURE(err)) @@ -955,12 +947,12 @@ static void TestConvert() log_err("\nFAILURE: ucnv_fromUChars with targetLength 0 is expected to fail and throw U_BUFFER_OVERFLOW_ERROR\n"); } /*toUChars with error conditions*/ - targetsize = ucnv_toUChars(myConverter, uchar2, targetsize, output_cp_buffer, strlen(output_cp_buffer), &err); + targetsize = ucnv_toUChars(myConverter, uchar2, targetsize, output_cp_buffer, (int32_t)strlen(output_cp_buffer), &err); if(targetsize != 0){ log_err("\nFAILURE: ucnv_toUChars with err != U_ZERO_ERROR is expected to fail and return 0\n"); } err=U_ZERO_ERROR; - targetsize = ucnv_toUChars(myConverter, uchar2, -1, output_cp_buffer, strlen(output_cp_buffer), &err); + targetsize = ucnv_toUChars(myConverter, uchar2, -1, output_cp_buffer, (int32_t)strlen(output_cp_buffer), &err); if(targetsize != 0 || err != U_ILLEGAL_ARGUMENT_ERROR){ log_err("\nFAILURE: ucnv_toUChars with targetsize < 0 is expected to throw U_ILLEGAL_ARGUMENT_ERROR and return 0\n"); } @@ -970,7 +962,7 @@ static void TestConvert() log_err("\nFAILURE: ucnv_toUChars with sourceLength 0 is expected to return 0\n"); } targetcapacity2=0; - targetsize = ucnv_toUChars(myConverter, NULL, targetcapacity2, output_cp_buffer, strlen(output_cp_buffer), &err); + targetsize = ucnv_toUChars(myConverter, NULL, targetcapacity2, output_cp_buffer, (int32_t)strlen(output_cp_buffer), &err); if (err != U_STRING_NOT_TERMINATED_WARNING) { log_err("\nFAILURE: ucnv_toUChars(targetLength)->%s instead of U_STRING_NOT_TERMINATED_WARNING\n", u_errorName(err)); @@ -981,7 +973,6 @@ static void TestConvert() /*testing for ucnv_fromUnicode() and ucnv_toUnicode() */ /*Clean up re-usable vars*/ - j=0; log_verbose("Testing ucnv_fromUnicode().....\n"); tmp_ucs_buf=ucs_file_buffer_use; ucnv_fromUnicode(myConverter, &mytarget_1, @@ -992,6 +983,7 @@ static void TestConvert() TRUE, &err); consumedUni = (UChar*)tmp_consumedUni; + (void)consumedUni; /* Suppress set but not used warning. */ if (U_FAILURE(err)) { @@ -1046,7 +1038,6 @@ static void TestConvert() fclose(ucs_file_in); ucnv_close(myConverter); - free(displayname); if (uchar1 != 0) free(uchar1); if (uchar2 != 0) free(uchar2); if (uchar3 != 0) free(uchar3); @@ -1056,20 +1047,23 @@ static void TestConvert() free((void*)output_cp_buffer); free((void*)ucs_file_buffer); free((void*)my_ucs_file_buffer); +#endif } +#if !UCONFIG_NO_LEGACY_CONVERSION static UConverterFromUCallback otherUnicodeAction(UConverterFromUCallback MIA) { return (MIA==(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_STOP)?(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_SUBSTITUTE:(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_STOP; } - static UConverterToUCallback otherCharAction(UConverterToUCallback MIA) { return (MIA==(UConverterToUCallback)UCNV_TO_U_CALLBACK_STOP)?(UConverterToUCallback)UCNV_TO_U_CALLBACK_SUBSTITUTE:(UConverterToUCallback)UCNV_TO_U_CALLBACK_STOP; } +#endif static void TestFlushCache(void) { +#if !UCONFIG_NO_LEGACY_CONVERSION UErrorCode err = U_ZERO_ERROR; UConverter* someConverters[5]; int flushCount = 0; @@ -1139,7 +1133,7 @@ static void TestFlushCache(void) { log_verbose("Flush cache ok\n"); else log_data_err("Flush Cache failed line %d, expected 1 got %d \n", __LINE__, flushCount); - +#endif } /** @@ -1158,13 +1152,11 @@ static void TestAlias() { const char* ISO_2022_NAMES[] = {"ISO_2022,locale=ja,version=2", "ISO-2022-JP-2", "csISO2022JP2", "Iso-2022jP2", "isO-2022_Jp_2", "iSo--2022,locale=ja,version=2"}; - int32_t ISO_2022_NAMES_LENGTH = - sizeof(ISO_2022_NAMES) / sizeof(ISO_2022_NAMES[0]); + int32_t ISO_2022_NAMES_LENGTH = UPRV_LENGTHOF(ISO_2022_NAMES); const char *UTF8_NAMES[] = { "UTF-8", "utf-8", "utf8", "ibm-1208", "utf_8", "ibm1208", "cp1208" }; - int32_t UTF8_NAMES_LENGTH = - sizeof(UTF8_NAMES) / sizeof(UTF8_NAMES[0]); + int32_t UTF8_NAMES_LENGTH = UPRV_LENGTHOF(UTF8_NAMES); struct { const char *name; @@ -1176,7 +1168,7 @@ static void TestAlias() { { "UTF32_PlatformEndian", "UTF32_PlatformEndian" }, { "UTF-32", "ucs-4" } }; - int32_t CONVERTERS_NAMES_LENGTH = sizeof(CONVERTERS_NAMES) / sizeof(*CONVERTERS_NAMES); + int32_t CONVERTERS_NAMES_LENGTH = UPRV_LENGTHOF(CONVERTERS_NAMES); /* When there are bugs in gencnval or in ucnv_io, converters can appear to have no aliases. */ @@ -1205,7 +1197,7 @@ static void TestAlias() { if (strcmp(ucnv_getName(cnv, &status), name) != 0 && (strstr(name, "PlatformEndian") == 0 && strstr(name, "OppositeEndian") == 0)) { log_err("FAIL: Converter \"%s\" returned \"%s\" for getName. " - "The should be the same\n", + "They should be the same\n", name, ucnv_getName(cnv, &status)); } } @@ -1304,7 +1296,7 @@ static void TestAlias() { for (i = 0; i < CONVERTERS_NAMES_LENGTH; ++i) { const char* mapBack = ucnv_getAlias(CONVERTERS_NAMES[i].alias, 0, &status); if(!mapBack) { - log_data_err("Couldn't get alias for %s. You probably have no data\n", CONVERTERS_NAMES[i]); + log_data_err("Couldn't get alias for %s. You probably have no data\n", CONVERTERS_NAMES[i].name); continue; } if (0 != strcmp(mapBack, CONVERTERS_NAMES[i].name)) { @@ -1322,7 +1314,7 @@ static void TestDuplicateAlias(void) { status = U_ZERO_ERROR; alias = ucnv_getStandardName("Shift_JIS", "IBM", &status); if (alias == NULL || strcmp(alias, "ibm-943") != 0 || status != U_AMBIGUOUS_ALIAS_WARNING) { - log_err("FAIL: Didn't get ibm-943 for Shift_JIS {IBM}. Got %s\n", alias); + log_data_err("FAIL: Didn't get ibm-943 for Shift_JIS {IBM}. Got %s\n", alias); } status = U_ZERO_ERROR; alias = ucnv_getStandardName("ibm-943", "IANA", &status); @@ -1366,6 +1358,7 @@ static TSCCContext *TSCC_clone(TSCCContext *ctx) return newCtx; } +#if !UCONFIG_NO_LEGACY_CONVERSION static void TSCC_fromU(const void *context, UConverterFromUnicodeArgs *fromUArgs, const UChar* codeUnits, @@ -1388,6 +1381,7 @@ static void TSCC_fromU(const void *context, UErrorCode subErr = U_ZERO_ERROR; TSCCContext *newCtx; TSCCContext *junkCtx; + TSCCContext **pjunkCtx = &junkCtx; /* "recreate" it */ log_verbose("TSCC_fromU: cloning..\n"); @@ -1398,7 +1392,7 @@ static void TSCC_fromU(const void *context, } /* now, SET it */ - ucnv_getFromUCallBack(fromUArgs->converter, &junkFrom, (const void**)&junkCtx); + ucnv_getFromUCallBack(fromUArgs->converter, &junkFrom, (const void**)pjunkCtx); ucnv_setFromUCallBack(fromUArgs->converter, junkFrom, newCtx, NULL, NULL, &subErr); if(U_FAILURE(subErr)) { @@ -1412,7 +1406,6 @@ static void TSCC_fromU(const void *context, } } - static void TSCC_toU(const void *context, UConverterToUnicodeArgs *toUArgs, const char* codeUnits, @@ -1434,6 +1427,7 @@ static void TSCC_toU(const void *context, UErrorCode subErr = U_ZERO_ERROR; TSCCContext *newCtx; TSCCContext *junkCtx; + TSCCContext **pjunkCtx = &junkCtx; /* "recreate" it */ log_verbose("TSCC_toU: cloning..\n"); @@ -1444,7 +1438,7 @@ static void TSCC_toU(const void *context, } /* now, SET it */ - ucnv_getToUCallBack(toUArgs->converter, &junkFrom, (const void**)&junkCtx); + ucnv_getToUCallBack(toUArgs->converter, &junkFrom, (const void**)pjunkCtx); ucnv_setToUCallBack(toUArgs->converter, junkFrom, newCtx, NULL, NULL, &subErr); if(U_FAILURE(subErr)) { @@ -1484,6 +1478,7 @@ static void TestConvertSafeCloneCallback() UErrorCode err = U_ZERO_ERROR; TSCCContext from1, to1; TSCCContext *from2, *from3, *to2, *to3; + TSCCContext **pfrom2 = &from2, **pfrom3 = &from3, **pto2 = &to2, **pto3 = &to3; char hunk[8192]; int32_t hunkSize = 8192; UConverterFromUCallback junkFrom; @@ -1493,7 +1488,7 @@ static void TestConvertSafeCloneCallback() conv1 = ucnv_open("iso-8859-3", &err); if(U_FAILURE(err)) { - log_data_err("Err opening iso-8859-3, %s", u_errorName(err)); + log_data_err("Err opening iso-8859-3, %s\n", u_errorName(err)); return; } @@ -1521,8 +1516,8 @@ static void TestConvertSafeCloneCallback() log_verbose("Cloned to conv2=%p.\n", conv2); /********** from *********************/ - ucnv_getFromUCallBack(conv2, &junkFrom, (const void**)&from2); - ucnv_getFromUCallBack(conv1, &junkFrom, (const void**)&from3); + ucnv_getFromUCallBack(conv2, &junkFrom, (const void**)pfrom2); + ucnv_getFromUCallBack(conv1, &junkFrom, (const void**)pfrom3); TSCC_print_log(from2, "from2"); TSCC_print_log(from3, "from3(==from1)"); @@ -1554,8 +1549,8 @@ static void TestConvertSafeCloneCallback() } /********** to *********************/ - ucnv_getToUCallBack(conv2, &junkTo, (const void**)&to2); - ucnv_getToUCallBack(conv1, &junkTo, (const void**)&to3); + ucnv_getToUCallBack(conv2, &junkTo, (const void**)pto2); + ucnv_getToUCallBack(conv1, &junkTo, (const void**)pto3); TSCC_print_log(to2, "to2"); TSCC_print_log(to3, "to3(==to1)"); @@ -1643,6 +1638,7 @@ static void TestConvertSafeCloneCallback() free(from2); /* from1 is stack based */ } } +#endif static UBool containsAnyOtherByte(uint8_t *p, int32_t length, uint8_t b) { @@ -1660,28 +1656,39 @@ static void TestConvertSafeClone() { /* one 'regular' & all the 'private stateful' converters */ static const char *const names[] = { +#if !UCONFIG_NO_LEGACY_CONVERSION "ibm-1047", "ISO_2022,locale=zh,version=1", +#endif "SCSU", +#if !UCONFIG_NO_LEGACY_CONVERSION "HZ", "lmbcs", "ISCII,version=0", "ISO_2022,locale=kr,version=1", "ISO_2022,locale=jp,version=2", +#endif "BOCU-1", "UTF-7", +#if !UCONFIG_NO_LEGACY_CONVERSION "IMAP-mailbox-name", "ibm-1047-s390" +#else + "IMAP=mailbox-name" +#endif }; + /* store the actual sizes of each converter */ + int32_t actualSizes[UPRV_LENGTHOF(names)]; + static const int32_t bufferSizes[] = { U_CNV_SAFECLONE_BUFFERSIZE, (int32_t)(3*sizeof(UConverter))/2, /* 1.5*sizeof(UConverter) */ (int32_t)sizeof(UConverter)/2 /* 0.5*sizeof(UConverter) */ }; - char charBuffer [21]; /* Leave at an odd number for alignment testing */ - uint8_t buffer [3] [U_CNV_SAFECLONE_BUFFERSIZE]; + char charBuffer[21]; /* Leave at an odd number for alignment testing */ + uint8_t buffer[3] [U_CNV_SAFECLONE_BUFFERSIZE]; int32_t bufferSize, maxBufferSize; const char *maxName; UConverter * cnv, *cnv2; @@ -1689,17 +1696,17 @@ static void TestConvertSafeClone() char *pCharBuffer; const char *pConstCharBuffer; - const char *charBufferLimit = charBuffer + sizeof(charBuffer)/sizeof(*charBuffer); - UChar uniBuffer [] = {0x0058, 0x0059, 0x005A}; /* "XYZ" */ - UChar uniCharBuffer [20]; - char charSourceBuffer [] = { 0x1b, 0x24, 0x42 }; + const char *charBufferLimit = charBuffer + UPRV_LENGTHOF(charBuffer); + UChar uniBuffer[] = {0x0058, 0x0059, 0x005A}; /* "XYZ" */ + UChar uniCharBuffer[20]; + char charSourceBuffer[] = { 0x1b, 0x24, 0x42 }; const char *pCharSource = charSourceBuffer; const char *pCharSourceLimit = charSourceBuffer + sizeof(charSourceBuffer); UChar *pUCharTarget = uniCharBuffer; - UChar *pUCharTargetLimit = uniCharBuffer + sizeof(uniCharBuffer)/sizeof(*uniCharBuffer); + UChar *pUCharTargetLimit = uniCharBuffer + UPRV_LENGTHOF(uniCharBuffer); const UChar * pUniBuffer; - const UChar *uniBufferLimit = uniBuffer + sizeof(uniBuffer)/sizeof(*uniBuffer); - int32_t index, j; + const UChar *uniBufferLimit = uniBuffer + UPRV_LENGTHOF(uniBuffer); + int32_t idx, j; err = U_ZERO_ERROR; cnv = ucnv_open(names[0], &err); @@ -1708,28 +1715,29 @@ static void TestConvertSafeClone() /* Null status - just returns NULL */ bufferSize = U_CNV_SAFECLONE_BUFFERSIZE; - if (0 != ucnv_safeClone(cnv, buffer[0], &bufferSize, 0)) + if (NULL != ucnv_safeClone(cnv, buffer[0], &bufferSize, NULL)) { log_err("FAIL: Cloned converter failed to deal correctly with null status\n"); } /* error status - should return 0 & keep error the same */ err = U_MEMORY_ALLOCATION_ERROR; - if (0 != ucnv_safeClone(cnv, buffer[0], &bufferSize, &err) || err != U_MEMORY_ALLOCATION_ERROR) + if (NULL != ucnv_safeClone(cnv, buffer[0], &bufferSize, &err) || err != U_MEMORY_ALLOCATION_ERROR) { log_err("FAIL: Cloned converter failed to deal correctly with incoming error status\n"); } err = U_ZERO_ERROR; - /* Null buffer size pointer - just returns NULL & set error to U_ILLEGAL_ARGUMENT_ERROR*/ - if (0 != ucnv_safeClone(cnv, buffer[0], 0, &err) || err != U_ILLEGAL_ARGUMENT_ERROR) + /* Null buffer size pointer is ok */ + if (NULL == (cnv2 = ucnv_safeClone(cnv, buffer[0], NULL, &err)) || U_FAILURE(err)) { log_err("FAIL: Cloned converter failed to deal correctly with null bufferSize pointer\n"); } + ucnv_close(cnv2); err = U_ZERO_ERROR; /* buffer size pointer is 0 - fill in pbufferSize with a size */ bufferSize = 0; - if (0 != ucnv_safeClone(cnv, buffer[0], &bufferSize, &err) || U_FAILURE(err) || bufferSize <= 0) + if (NULL != ucnv_safeClone(cnv, buffer[0], &bufferSize, &err) || U_FAILURE(err) || bufferSize <= 0) { log_err("FAIL: Cloned converter failed a sizing request ('preflighting')\n"); } @@ -1739,7 +1747,7 @@ static void TestConvertSafeClone() log_err("FAIL: Pre-calculated buffer size is too small\n"); } /* Verify we can use this run-time calculated size */ - if (0 == (cnv2 = ucnv_safeClone(cnv, buffer[0], &bufferSize, &err)) || U_FAILURE(err)) + if (NULL == (cnv2 = ucnv_safeClone(cnv, buffer[0], &bufferSize, &err)) || U_FAILURE(err)) { log_err("FAIL: Converter can't be cloned with run-time size\n"); } @@ -1749,7 +1757,7 @@ static void TestConvertSafeClone() /* size one byte too small - should allocate & let us know */ --bufferSize; - if (0 == (cnv2 = ucnv_safeClone(cnv, 0, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING) + if (NULL == (cnv2 = ucnv_safeClone(cnv, NULL, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING) { log_err("FAIL: Cloned converter failed to deal correctly with too-small buffer size\n"); } @@ -1761,7 +1769,7 @@ static void TestConvertSafeClone() bufferSize = U_CNV_SAFECLONE_BUFFERSIZE; /* Null buffer pointer - return converter & set error to U_SAFECLONE_ALLOCATED_ERROR */ - if (0 == (cnv2 = ucnv_safeClone(cnv, 0, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING) + if (NULL == (cnv2 = ucnv_safeClone(cnv, NULL, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING) { log_err("FAIL: Cloned converter failed to deal correctly with null buffer pointer\n"); } @@ -1772,7 +1780,7 @@ static void TestConvertSafeClone() err = U_ZERO_ERROR; /* Null converter - return NULL & set U_ILLEGAL_ARGUMENT_ERROR */ - if (0 != ucnv_safeClone(0, buffer[0], &bufferSize, &err) || err != U_ILLEGAL_ARGUMENT_ERROR) + if (NULL != ucnv_safeClone(NULL, buffer[0], &bufferSize, &err) || err != U_ILLEGAL_ARGUMENT_ERROR) { log_err("FAIL: Cloned converter failed to deal correctly with null converter pointer\n"); } @@ -1785,23 +1793,23 @@ static void TestConvertSafeClone() /* Do these cloned converters work at all - shuffle UChars to chars & back again..*/ - for(j = 0; j < LENGTHOF(bufferSizes); ++j) { - for (index = 0; index < LENGTHOF(names); index++) + for(j = 0; j < UPRV_LENGTHOF(bufferSizes); ++j) { + for (idx = 0; idx < UPRV_LENGTHOF(names); idx++) { err = U_ZERO_ERROR; - cnv = ucnv_open(names[index], &err); + cnv = ucnv_open(names[idx], &err); if(U_FAILURE(err)) { - log_data_err("ucnv_open(\"%s\") failed - %s\n", names[index], u_errorName(err)); + log_data_err("ucnv_open(\"%s\") failed - %s\n", names[idx], u_errorName(err)); continue; } if(j == 0) { /* preflight to get maxBufferSize */ - bufferSize = 0; - ucnv_safeClone(cnv, NULL, &bufferSize, &err); - if(bufferSize > maxBufferSize) { - maxBufferSize = bufferSize; - maxName = names[index]; + actualSizes[idx] = 0; + ucnv_safeClone(cnv, NULL, &actualSizes[idx], &err); + if(actualSizes[idx] > maxBufferSize) { + maxBufferSize = actualSizes[idx]; + maxName = names[idx]; } } @@ -1813,6 +1821,12 @@ static void TestConvertSafeClone() /* close the original immediately to make sure that the clone works by itself */ ucnv_close(cnv); + if( actualSizes[idx] <= (bufferSizes[j] - (int32_t)sizeof(UAlignedMemory)) && + err == U_SAFECLONE_ALLOCATED_WARNING + ) { + log_err("ucnv_safeClone(%s) did a heap clone although the buffer was large enough\n", names[idx]); + } + /* check if the clone function overwrote any bytes that it is not supposed to touch */ if(bufferSize <= bufferSizes[j]) { /* used the stack buffer */ @@ -1820,13 +1834,13 @@ static void TestConvertSafeClone() containsAnyOtherByte(buffer[1]+bufferSize, (int32_t)(sizeof(buffer)-(sizeof(buffer[0])+bufferSize)), 0xaa) ) { log_err("cloning %s in a stack buffer overwrote bytes outside the bufferSize %d (requested %d)\n", - names[index], bufferSize, bufferSizes[j]); + names[idx], bufferSize, bufferSizes[j]); } } else { /* heap-allocated the clone */ if(containsAnyOtherByte(buffer[0], (int32_t)sizeof(buffer), 0xaa)) { log_err("cloning %s used the heap (bufferSize %d, requested %d) but overwrote stack buffer bytes\n", - names[index], bufferSize, bufferSizes[j]); + names[idx], bufferSize, bufferSizes[j]); } } @@ -1869,15 +1883,20 @@ static void TestConvertSafeClone() log_verbose("ucnv_safeClone(): sizeof(UConverter)=%lu max preflighted clone size=%d (%s) U_CNV_SAFECLONE_BUFFERSIZE=%d\n", sizeof(UConverter), maxBufferSize, maxName, (int)U_CNV_SAFECLONE_BUFFERSIZE); + if(maxBufferSize > U_CNV_SAFECLONE_BUFFERSIZE) { + log_err("ucnv_safeClone(): max preflighted clone size=%d (%s) is larger than U_CNV_SAFECLONE_BUFFERSIZE=%d\n", + maxBufferSize, maxName, (int)U_CNV_SAFECLONE_BUFFERSIZE); + } } static void TestCCSID() { +#if !UCONFIG_NO_LEGACY_CONVERSION UConverter *cnv; UErrorCode errorCode; int32_t ccsids[]={ 37, 850, 943, 949, 950, 1047, 1252, 1392, 33722 }; int32_t i, ccsid; - for(i=0; i<(int32_t)(sizeof(ccsids)/sizeof(int32_t)); ++i) { + for(i=0; iiso-8859-1: got preflighting size %d instead of 10\n", size); } +#if !UCONFIG_ONLY_HTML_CONVERSION err = U_ZERO_ERROR; /* do the conversion */ size = ucnv_convert("UTF-32BE", /* out */ @@ -2040,6 +2066,7 @@ static void bug2() /* bug2: size is 5, should be 12 */ log_err("error j932 bug 2 UTF-32BE->UTF-8: got preflighting size %d instead of 12\n", size); } +#endif } /* @@ -2048,8 +2075,9 @@ static void bug2() */ static void bug3() { - static char char_in[CHUNK_SIZE*4]; - static char target[5]; +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION + char char_in[CHUNK_SIZE*4]; + char target[5]; UErrorCode err = U_ZERO_ERROR; int32_t size; @@ -2102,6 +2130,7 @@ static void bug3() */ log_data_err("error j932 bug 3b: expected 0x%04x, got 0x%04x\n", sizeof(char_in) * 2, size); } +#endif } static void @@ -2157,6 +2186,11 @@ convertExStreaming(UConverter *srcCnv, UConverter *targetCnv, pivotBuffer, &pivotSource, &pivotTarget, pivotLimit, FALSE, flush, &errorCode); targetLength=(int32_t)(target-targetBuffer); + if(target>targetLimit) { + log_err("ucnv_convertEx(%s) chunk[%d] target %p exceeds targetLimit %p\n", + testName, chunkSize, target, targetLimit); + break; /* TODO: major problem! */ + } if(errorCode==U_BUFFER_OVERFLOW_ERROR) { /* continue converting another chunk */ errorCode=U_ZERO_ERROR; @@ -2210,6 +2244,7 @@ convertExMultiStreaming(UConverter *srcCnv, UConverter *targetCnv, } static void TestConvertEx() { +#if !UCONFIG_NO_LEGACY_CONVERSION static const uint8_t utf8[]={ /* 4e00 30a1 ff61 0410 */ @@ -2338,12 +2373,395 @@ static void TestConvertEx() { log_err("ucnv_convertEx(*source==NULL) sets %s\n", u_errorName(errorCode)); } + /* streaming conversion without a pivot buffer */ + errorCode=U_ZERO_ERROR; + src=srcBuffer; + pivotSource=pivotBuffer; + ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, + NULL, &pivotSource, &pivotTarget, pivotBuffer+1, TRUE, FALSE, &errorCode); + if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { + log_err("ucnv_convertEx(pivotStart==NULL) sets %s\n", u_errorName(errorCode)); + } + ucnv_close(cnv1); ucnv_close(cnv2); +#endif +} + +/* Test illegal UTF-8 input: Data and functions for TestConvertExFromUTF8(). */ +static const char *const badUTF8[]={ + /* trail byte */ + "\x80", + + /* truncated multi-byte sequences */ + "\xd0", + "\xe0", + "\xe1", + "\xed", + "\xee", + "\xf0", + "\xf1", + "\xf4", + "\xf8", + "\xfc", + + "\xe0\x80", + "\xe0\xa0", + "\xe1\x80", + "\xed\x80", + "\xed\xa0", + "\xee\x80", + "\xf0\x80", + "\xf0\x90", + "\xf1\x80", + "\xf4\x80", + "\xf4\x90", + "\xf8\x80", + "\xfc\x80", + + "\xf0\x80\x80", + "\xf0\x90\x80", + "\xf1\x80\x80", + "\xf4\x80\x80", + "\xf4\x90\x80", + "\xf8\x80\x80", + "\xfc\x80\x80", + + "\xf8\x80\x80\x80", + "\xfc\x80\x80\x80", + + "\xfc\x80\x80\x80\x80", + + /* complete sequences but non-shortest forms or out of range etc. */ + "\xc0\x80", + "\xe0\x80\x80", + "\xed\xa0\x80", + "\xf0\x80\x80\x80", + "\xf4\x90\x80\x80", + "\xf8\x80\x80\x80\x80", + "\xfc\x80\x80\x80\x80\x80", + "\xfe", + "\xff" +}; + +#define ARG_CHAR_ARR_SIZE 8 + +/* get some character that can be converted and convert it */ +static UBool getTestChar(UConverter *cnv, const char *converterName, + char charUTF8[4], int32_t *pCharUTF8Length, + char char0[ARG_CHAR_ARR_SIZE], int32_t *pChar0Length, + char char1[ARG_CHAR_ARR_SIZE], int32_t *pChar1Length) { + UChar utf16[U16_MAX_LENGTH]; + int32_t utf16Length; + + const UChar *utf16Source; + char *target; + + USet *set; + UChar32 c; + UErrorCode errorCode; + + errorCode=U_ZERO_ERROR; + set=uset_open(1, 0); + ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode); + c=uset_charAt(set, uset_size(set)/2); + uset_close(set); + + utf16Length=0; + U16_APPEND_UNSAFE(utf16, utf16Length, c); + *pCharUTF8Length=0; + U8_APPEND_UNSAFE(charUTF8, *pCharUTF8Length, c); + + utf16Source=utf16; + target=char0; + ucnv_fromUnicode(cnv, + &target, char0+ARG_CHAR_ARR_SIZE, + &utf16Source, utf16+utf16Length, + NULL, FALSE, &errorCode); + *pChar0Length=(int32_t)(target-char0); + + utf16Source=utf16; + target=char1; + ucnv_fromUnicode(cnv, + &target, char1+ARG_CHAR_ARR_SIZE, + &utf16Source, utf16+utf16Length, + NULL, FALSE, &errorCode); + *pChar1Length=(int32_t)(target-char1); + + if(U_FAILURE(errorCode)) { + log_err("unable to get test character for %s - %s\n", converterName, u_errorName(errorCode)); + return FALSE; + } + return TRUE; +} + +static UBool isOneTruncatedUTF8(const char *s, int32_t length) { + if(length==0) { + return FALSE; + } else if(length==1) { + return U8_IS_LEAD(s[0]); + } else { + int32_t count=U8_COUNT_TRAIL_BYTES(s[0]); + if(length<=count) { + // 2 or more bytes, but fewer than the lead byte indicates. + int32_t oneLength=0; + U8_FWD_1(s, oneLength, length); + // Truncated if we reach the end of the string. + // Not true if the lead byte and first trail byte do not start a valid sequence, + // e.g., E0 80 -> oneLength=1. + return oneLength==length; + } + return FALSE; + } +} + +static void testFromTruncatedUTF8(UConverter *utf8Cnv, UConverter *cnv, const char *converterName, + char charUTF8[4], int32_t charUTF8Length, + char char0[8], int32_t char0Length, + char char1[8], int32_t char1Length) { + char utf8[16]; + int32_t utf8Length; + + char output[16]; + int32_t outputLength; + + char invalidChars[8]; + int8_t invalidLength; + + const char *source; + char *target; + + UChar pivotBuffer[8]; + UChar *pivotSource, *pivotTarget; + + UErrorCode errorCode; + int32_t i; + + /* test truncated sequences */ + errorCode=U_ZERO_ERROR; + ucnv_setToUCallBack(utf8Cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); + + memcpy(utf8, charUTF8, charUTF8Length); + + for(i=0; i %s/decimal NCRs) failed\n", converterNames[i]); + } + ucnv_close(cnv); + } + ucnv_close(utf8Cnv); } static void TestConvertAlgorithmic() { +#if !UCONFIG_NO_LEGACY_CONVERSION static const uint8_t utf8[]={ /* 4e00 30a1 ff61 0410 */ @@ -2361,10 +2779,12 @@ TestConvertAlgorithmic() { /*},*/ utf16[]={ 0xfe, 0xff /* BOM only, no text */ - }, - utf32[]={ + }; +#if !UCONFIG_ONLY_HTML_CONVERSION + static const uint8_t utf32[]={ 0xff, 0xfe, 0, 0 /* BOM only, no text */ }; +#endif char target[100], utf8NUL[100], shiftJISNUL[100]; @@ -2434,6 +2854,7 @@ TestConvertAlgorithmic() { u_errorName(errorCode), length); } +#if !UCONFIG_ONLY_HTML_CONVERSION errorCode=U_ZERO_ERROR; length=ucnv_fromAlgorithmic(cnv, UCNV_UTF32, target, 0, (const char *)utf32, 4, &errorCode); if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || @@ -2442,6 +2863,7 @@ TestConvertAlgorithmic() { log_err("ucnv_fromAlgorithmic(UTF-32 only BOM -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect 0\n", u_errorName(errorCode), length); } +#endif /* bad arguments */ errorCode=U_MESSAGE_PARSE_ERROR; @@ -2464,8 +2886,10 @@ TestConvertAlgorithmic() { log_err("ucnv_fromAlgorithmic(illegal alg. type) sets %s\n", u_errorName(errorCode)); } ucnv_close(cnv); +#endif } +#if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION static void TestLMBCSMaxChar(void) { static const struct { int8_t maxSize; @@ -2497,7 +2921,7 @@ static void TestLMBCSMaxChar(void) { { 4, "HZ"}, { 3, "ISO-2022"}, - { 3, "ISO-2022-KR"}, + { 8, "ISO-2022-KR"}, { 6, "ISO-2022-JP"}, { 8, "ISO-2022-CN"}, @@ -2517,7 +2941,7 @@ static void TestLMBCSMaxChar(void) { }; int32_t idx; - for (idx = 0; idx < LENGTHOF(converter); idx++) { + for (idx = 0; idx < UPRV_LENGTHOF(converter); idx++) { UErrorCode status = U_ZERO_ERROR; UConverter *cnv = cnv_open(converter[idx].name, &status); if (U_FAILURE(status)) { @@ -2535,7 +2959,7 @@ static void TestLMBCSMaxChar(void) { log_err("error UCNV_GET_MAX_BYTES_FOR_STRING(1, 2)<10\n"); } } - +#endif static void TestJ1968(void) { UErrorCode err = U_ZERO_ERROR; @@ -2623,6 +3047,7 @@ static void TestJ1968(void) { } +#if !UCONFIG_NO_LEGACY_CONVERSION static void testSwap(const char *name, UBool swap) { /* @@ -2678,12 +3103,12 @@ testSwap(const char *name, UBool swap) { /* convert to EBCDIC */ pcu=text; pc=normal; - ucnv_fromUnicode(cnv, &pc, normal+LENGTHOF(normal), &pcu, text+LENGTHOF(text), NULL, TRUE, &errorCode); + ucnv_fromUnicode(cnv, &pc, normal+UPRV_LENGTHOF(normal), &pcu, text+UPRV_LENGTHOF(text), NULL, TRUE, &errorCode); normalLength=(int32_t)(pc-normal); pcu=text; pc=swapped; - ucnv_fromUnicode(swapCnv, &pc, swapped+LENGTHOF(swapped), &pcu, text+LENGTHOF(text), NULL, TRUE, &errorCode); + ucnv_fromUnicode(swapCnv, &pc, swapped+UPRV_LENGTHOF(swapped), &pcu, text+UPRV_LENGTHOF(text), NULL, TRUE, &errorCode); swappedLength=(int32_t)(pc-swapped); if(U_FAILURE(errorCode)) { @@ -2716,12 +3141,12 @@ testSwap(const char *name, UBool swap) { /* convert back to Unicode (may not roundtrip) */ pc=normal; pu=uNormal; - ucnv_toUnicode(cnv, &pu, uNormal+LENGTHOF(uNormal), (const char **)&pc, normal+normalLength, NULL, TRUE, &errorCode); + ucnv_toUnicode(cnv, &pu, uNormal+UPRV_LENGTHOF(uNormal), (const char **)&pc, normal+normalLength, NULL, TRUE, &errorCode); normalLength=(int32_t)(pu-uNormal); pc=normal; pu=uSwapped; - ucnv_toUnicode(swapCnv, &pu, uSwapped+LENGTHOF(uSwapped), (const char **)&pc, normal+swappedLength, NULL, TRUE, &errorCode); + ucnv_toUnicode(swapCnv, &pu, uSwapped+UPRV_LENGTHOF(uSwapped), (const char **)&pc, normal+swappedLength, NULL, TRUE, &errorCode); swappedLength=(int32_t)(pu-uSwapped); if(U_FAILURE(errorCode)) { @@ -2772,7 +3197,544 @@ TestEBCDICSwapLFNL() { int i; - for(i=0; i x ( \x07 |0) + \U00101234\U00050005 -> y (+ \x07+\x00+\x01\x02\x0e+\x05 |0) + \U00101234\U00050005\U00060006 -> z (++ \x07+\x00+\x01\x02\x0f+\x09 |0) + \U00060007 -> unassigned + */ + static const UChar head[] = {0xDBC4,0xDE34,0xD900,0xDC05,0x0000};/* \U00101234\U00050005 */ + static const UChar middle[] = {0xD940,0x0000}; /* first half of \U00060006 or \U00060007 */ + static const UChar tail[] = {0xDC07,0x0000};/* second half of \U00060007 */ + char tgt[10]; + char* target = tgt; + char* targetLimit = target + 2; /* expect overflow from converting \U00101234\U00050005 */ + const UChar* source = head; + const UChar* sourceLimit = source + u_strlen(head); + int32_t len = 0; + ucnv_reset(cnv); + ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); + len = ucnv_fromUCountPending(cnv, &status); + if(U_FAILURE(status)){ + log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status)); + status = U_ZERO_ERROR; + } + if(len!=4){ + log_err("ucnv_fromUInputHeld did not return correct length for head\n"); + } + source = middle; + sourceLimit = source + u_strlen(middle); + ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); + len = ucnv_fromUCountPending(cnv, &status); + if(U_FAILURE(status)){ + log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status)); + status = U_ZERO_ERROR; + } + if(len!=5){ + log_err("ucnv_fromUInputHeld did not return correct length for middle\n"); + } + source = tail; + sourceLimit = source + u_strlen(tail); + ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); + if(status != U_BUFFER_OVERFLOW_ERROR){ + log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status)); + } + status = U_ZERO_ERROR; + len = ucnv_fromUCountPending(cnv, &status); + /* middle[1] is pending, tail has not been consumed */ + if(U_FAILURE(status)){ + log_err("ucnv_fromUInputHeld call did not succeed. Error: %s\n", u_errorName(status)); + } + if(len!=1){ + log_err("ucnv_fromUInputHeld did not return correct length for tail\n"); + } + } + ucnv_close(cnv); +#endif +} + +static void +TestToUCountPending(){ +#if !UCONFIG_NO_LEGACY_CONVERSION + UErrorCode status = U_ZERO_ERROR; + static const struct { + char input[6]; + int32_t len; + int32_t exp; + }toUnicodeTests[] = { + /*m:n conversion*/ + {{0x05, 0x01, 0x02},3,3}, + {{0x01, 0x02},2,2}, + {{0x07, 0x00, 0x01, 0x02},4,4}, + }; + + int i; + UConverterToUCallback *oldToUAction= NULL; + UConverter* cnv = ucnv_openPackage(loadTestData(&status), "test3", &status); + if(U_FAILURE(status)){ + log_data_err("Could not create converter for test3. Error: %s\n", u_errorName(status)); + return; + } + ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, oldToUAction, NULL, &status); + for(i=0; i x ( \x01\x02\x03\x0a |0) + 0x01, 0x02, 0x03, 0x0b -> y ( \x01\x02\x03\x0b |0) + 0x01, 0x02, 0x03, 0x0d -> z ( \x01\x02\x03\x0d |3) + 0x01, 0x02, 0x03, 0x0a + 0x01, 0x02, 0x03, 0x0b + 0x01 + many more -> z (see test4 "many bytes, and bytes per UChar") + */ + UChar tgt[10]; + UChar* target = tgt; + UChar* targetLimit = target + 1; /* expect overflow from converting */ + const char* source = head; + const char* sourceLimit = source + strlen(head); + int32_t len = 0; + cnv = ucnv_openPackage(loadTestData(&status), "test4", &status); + if(U_FAILURE(status)){ + log_err("Could not create converter for test3. Error: %s\n", u_errorName(status)); + return; + } + ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, oldToUAction, NULL, &status); + ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); + len = ucnv_toUCountPending(cnv,&status); + if(U_FAILURE(status)){ + log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status)); + } + if(len != 4){ + log_err("Did not get the expected len for head.\n"); + } + source=mid; + sourceLimit = source+strlen(mid); + ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); + len = ucnv_toUCountPending(cnv,&status); + if(U_FAILURE(status)){ + log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status)); + } + if(len != 8){ + log_err("Did not get the expected len for mid.\n"); + } + + source=tail; + sourceLimit = source+strlen(tail); + targetLimit = target; + ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); + if(status != U_BUFFER_OVERFLOW_ERROR){ + log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status)); + } + status = U_ZERO_ERROR; + len = ucnv_toUCountPending(cnv,&status); + /* mid[4] is pending, tail has not been consumed */ + if(U_FAILURE(status)){ + log_err("ucnv_toUCountPending call did not succeed. Error: %s\n", u_errorName(status)); + } + if(len != 4){ + log_err("Did not get the expected len for tail.\n"); + } + ucnv_close(cnv); + } +#endif +} + +static void TestOneDefaultNameChange(const char *name, const char *expected) { + UErrorCode status = U_ZERO_ERROR; + UConverter *cnv; + ucnv_setDefaultName(name); + if(strcmp(ucnv_getDefaultName(), expected)==0) + log_verbose("setDefaultName of %s works.\n", name); + else + log_err("setDefaultName of %s failed\n", name); + cnv=ucnv_open(NULL, &status); + if (U_FAILURE(status) || cnv == NULL) { + log_err("opening the default converter of %s failed\n", name); + return; + } + if(strcmp(ucnv_getName(cnv, &status), expected)==0) + log_verbose("ucnv_getName of %s works.\n", name); + else + log_err("ucnv_getName of %s failed\n", name); + ucnv_close(cnv); +} + +static void TestDefaultName(void) { + /*Testing ucnv_getDefaultName() and ucnv_setDefaultNAme()*/ + static char defaultName[UCNV_MAX_CONVERTER_NAME_LENGTH + 1]; + strcpy(defaultName, ucnv_getDefaultName()); + + log_verbose("getDefaultName returned %s\n", defaultName); + + /*change the default name by setting it */ + TestOneDefaultNameChange("UTF-8", "UTF-8"); +#if U_CHARSET_IS_UTF8 + TestOneDefaultNameChange("ISCII,version=1", "UTF-8"); + TestOneDefaultNameChange("ISCII,version=2", "UTF-8"); + TestOneDefaultNameChange("ISO-8859-1", "UTF-8"); +#else +# if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION + TestOneDefaultNameChange("ISCII,version=1", "ISCII,version=1"); + TestOneDefaultNameChange("ISCII,version=2", "ISCII,version=2"); +# endif + TestOneDefaultNameChange("ISO-8859-1", "ISO-8859-1"); +#endif + + /*set the default name back*/ + ucnv_setDefaultName(defaultName); +} + +/* Test that ucnv_compareNames() matches names according to spec. ----------- */ + +static int +sign(int n) { + if(n==0) { + return 0; + } else if(n<0) { + return -1; + } else /* n>0 */ { + return 1; + } +} + +static void +compareNames(const char **names) { + const char *relation, *name1, *name2; + int rel, result; + + relation=*names++; + if(*relation=='=') { + rel = 0; + } else if(*relation=='<') { + rel = -1; + } else { + rel = 1; + } + + name1=*names++; + if(name1==NULL) { + return; + } + while((name2=*names++)!=NULL) { + result=ucnv_compareNames(name1, name2); + if(sign(result)!=rel) { + log_err("ucnv_compareNames(\"%s\", \"%s\")=%d, sign!=%d\n", name1, name2, result, rel); + } + name1=name2; + } +} + +static void +TestCompareNames() { + static const char *equalUTF8[]={ "=", "UTF-8", "utf_8", "u*T@f08", "Utf 8", NULL }; + static const char *equalIBM[]={ "=", "ibm-37", "IBM037", "i-B-m 00037", "ibm-0037", "IBM00037", NULL }; + static const char *lessMac[]={ "<", "macos-0_1-10.2", "macos-1-10.0.2", "macos-1-10.2", NULL }; + static const char *lessUTF080[]={ "<", "UTF-0008", "utf$080", "u*T@f0800", "Utf 0000000009", NULL }; + + compareNames(equalUTF8); + compareNames(equalIBM); + compareNames(lessMac); + compareNames(lessUTF080); +} + +static void +TestSubstString() { + static const UChar surrogate[1]={ 0xd900 }; + char buffer[16]; + + static const UChar sub[5]={ 0x61, 0x62, 0x63, 0x64, 0x65 }; + static const char subChars[5]={ 0x61, 0x62, 0x63, 0x64, 0x65 }; + UConverter *cnv; + UErrorCode errorCode; + int32_t length; + int8_t len8; + + /* UTF-16/32: test that the BOM is output before the sub character */ + errorCode=U_ZERO_ERROR; + cnv=ucnv_open("UTF-16", &errorCode); + if(U_FAILURE(errorCode)) { + log_data_err("ucnv_open(UTF-16) failed - %s\n", u_errorName(errorCode)); + return; + } + length=ucnv_fromUChars(cnv, buffer, (int32_t)sizeof(buffer), surrogate, 1, &errorCode); + ucnv_close(cnv); + if(U_FAILURE(errorCode) || + length!=4 || + NULL == ucnv_detectUnicodeSignature(buffer, length, NULL, &errorCode) + ) { + log_err("ucnv_fromUChars(UTF-16, U+D900) did not write a BOM\n"); + } + + errorCode=U_ZERO_ERROR; + cnv=ucnv_open("UTF-32", &errorCode); + if(U_FAILURE(errorCode)) { + log_data_err("ucnv_open(UTF-32) failed - %s\n", u_errorName(errorCode)); + return; + } + length=ucnv_fromUChars(cnv, buffer, (int32_t)sizeof(buffer), surrogate, 1, &errorCode); + ucnv_close(cnv); + if(U_FAILURE(errorCode) || + length!=8 || + NULL == ucnv_detectUnicodeSignature(buffer, length, NULL, &errorCode) + ) { + log_err("ucnv_fromUChars(UTF-32, U+D900) did not write a BOM\n"); + } + + /* Simple API test of ucnv_setSubstString() + ucnv_getSubstChars(). */ + errorCode=U_ZERO_ERROR; + cnv=ucnv_open("ISO-8859-1", &errorCode); + if(U_FAILURE(errorCode)) { + log_data_err("ucnv_open(ISO-8859-1) failed - %s\n", u_errorName(errorCode)); + return; + } + ucnv_setSubstString(cnv, sub, UPRV_LENGTHOF(sub), &errorCode); + if(U_FAILURE(errorCode)) { + log_err("ucnv_setSubstString(ISO-8859-1, sub[5]) failed - %s\n", u_errorName(errorCode)); + } else { + len8 = sizeof(buffer); + ucnv_getSubstChars(cnv, buffer, &len8, &errorCode); + /* Stateless converter, we expect the string converted to charset bytes. */ + if(U_FAILURE(errorCode) || len8!=sizeof(subChars) || 0!=uprv_memcmp(buffer, subChars, len8)) { + log_err("ucnv_getSubstChars(ucnv_setSubstString(ISO-8859-1, sub[5])) failed - %s\n", u_errorName(errorCode)); + } + } + ucnv_close(cnv); + +#if !UCONFIG_NO_LEGACY_CONVERSION + errorCode=U_ZERO_ERROR; + cnv=ucnv_open("HZ", &errorCode); + if(U_FAILURE(errorCode)) { + log_data_err("ucnv_open(HZ) failed - %s\n", u_errorName(errorCode)); + return; + } + ucnv_setSubstString(cnv, sub, UPRV_LENGTHOF(sub), &errorCode); + if(U_FAILURE(errorCode)) { + log_err("ucnv_setSubstString(HZ, sub[5]) failed - %s\n", u_errorName(errorCode)); + } else { + len8 = sizeof(buffer); + ucnv_getSubstChars(cnv, buffer, &len8, &errorCode); + /* Stateful converter, we expect that the Unicode string was set and that we get an empty char * string now. */ + if(U_FAILURE(errorCode) || len8!=0) { + log_err("ucnv_getSubstChars(ucnv_setSubstString(HZ, sub[5])) failed - %s\n", u_errorName(errorCode)); + } + } + ucnv_close(cnv); +#endif + /* + * Further testing of ucnv_setSubstString() is done via intltest convert. + * We do not test edge cases of illegal arguments and similar because the + * function implementation uses all of its parameters in calls to other + * functions with UErrorCode parameters. + */ +} + +static void +InvalidArguments() { + UConverter *cnv; + UErrorCode errorCode; + char charBuffer[2] = {1, 1}; + char ucharAsCharBuffer[2] = {2, 2}; + char *charsPtr = charBuffer; + UChar *ucharsPtr = (UChar *)ucharAsCharBuffer; + UChar *ucharsBadPtr = (UChar *)(ucharAsCharBuffer + 1); + + errorCode=U_ZERO_ERROR; + cnv=ucnv_open("UTF-8", &errorCode); + if(U_FAILURE(errorCode)) { + log_err("ucnv_open() failed - %s\n", u_errorName(errorCode)); + return; + } + + errorCode=U_ZERO_ERROR; + /* This one should fail because an incomplete UChar is being passed in */ + ucnv_fromUnicode(cnv, &charsPtr, charsPtr, (const UChar **)&ucharsPtr, ucharsBadPtr, NULL, TRUE, &errorCode); + if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) { + log_err("ucnv_fromUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for incomplete UChar * buffer - %s\n", u_errorName(errorCode)); + } + + errorCode=U_ZERO_ERROR; + /* This one should fail because ucharsBadPtr is > than ucharsPtr */ + ucnv_fromUnicode(cnv, &charsPtr, charsPtr, (const UChar **)&ucharsBadPtr, ucharsPtr, NULL, TRUE, &errorCode); + if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) { + log_err("ucnv_fromUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for bad limit pointer - %s\n", u_errorName(errorCode)); + } + + errorCode=U_ZERO_ERROR; + /* This one should fail because an incomplete UChar is being passed in */ + ucnv_toUnicode(cnv, &ucharsPtr, ucharsBadPtr, (const char **)&charsPtr, charsPtr, NULL, TRUE, &errorCode); + if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) { + log_err("ucnv_toUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for incomplete UChar * buffer - %s\n", u_errorName(errorCode)); + } + + errorCode=U_ZERO_ERROR; + /* This one should fail because ucharsBadPtr is > than ucharsPtr */ + ucnv_toUnicode(cnv, &ucharsBadPtr, ucharsPtr, (const char **)&charsPtr, charsPtr, NULL, TRUE, &errorCode); + if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) { + log_err("ucnv_toUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for bad limit pointer - %s\n", u_errorName(errorCode)); + } + + if (charBuffer[0] != 1 || charBuffer[1] != 1 + || ucharAsCharBuffer[0] != 2 || ucharAsCharBuffer[1] != 2) + { + log_err("Data was incorrectly written to buffers\n"); + } + + ucnv_close(cnv); +} + +static void TestGetName() { + static const char *const names[] = { + "Unicode", "UTF-16", + "UnicodeBigUnmarked", "UTF-16BE", + "UnicodeBig", "UTF-16BE,version=1", + "UnicodeLittleUnmarked", "UTF-16LE", + "UnicodeLittle", "UTF-16LE,version=1", + "x-UTF-16LE-BOM", "UTF-16LE,version=1" + }; + int32_t i; + for(i = 0; i < UPRV_LENGTHOF(names); i += 2) { + UErrorCode errorCode = U_ZERO_ERROR; + UConverter *cnv = ucnv_open(names[i], &errorCode); + if(U_SUCCESS(errorCode)) { + const char *name = ucnv_getName(cnv, &errorCode); + if(U_FAILURE(errorCode) || 0 != strcmp(name, names[i+1])) { + log_err("ucnv_getName(%s) = %s != %s -- %s\n", + names[i], name, names[i+1], u_errorName(errorCode)); + } + ucnv_close(cnv); + } + } +} + +static void TestUTFBOM() { + static const UChar a16[] = { 0x61 }; + static const char *const names[] = { + "UTF-16", + "UTF-16,version=1", + "UTF-16BE", + "UnicodeBig", + "UTF-16LE", + "UnicodeLittle" + }; + static const uint8_t expected[][5] = { +#if U_IS_BIG_ENDIAN + { 4, 0xfe, 0xff, 0, 0x61 }, + { 4, 0xfe, 0xff, 0, 0x61 }, +#else + { 4, 0xff, 0xfe, 0x61, 0 }, + { 4, 0xff, 0xfe, 0x61, 0 }, +#endif + + { 2, 0, 0x61 }, + { 4, 0xfe, 0xff, 0, 0x61 }, + + { 2, 0x61, 0 }, + { 4, 0xff, 0xfe, 0x61, 0 } + }; + + char bytes[10]; + int32_t i; + + for(i = 0; i < UPRV_LENGTHOF(names); ++i) { + UErrorCode errorCode = U_ZERO_ERROR; + UConverter *cnv = ucnv_open(names[i], &errorCode); + int32_t length = 0; + const uint8_t *exp = expected[i]; + if (U_FAILURE(errorCode)) { + log_err_status(errorCode, "Unable to open converter: %s got error code: %s\n", names[i], u_errorName(errorCode)); + continue; + } + length = ucnv_fromUChars(cnv, bytes, (int32_t)sizeof(bytes), a16, 1, &errorCode); + + if(U_FAILURE(errorCode) || length != exp[0] || 0 != memcmp(bytes, exp+1, length)) { + log_err("unexpected %s BOM writing behavior -- %s\n", + names[i], u_errorName(errorCode)); + } + ucnv_close(cnv); + } +}