X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/73c04bcfe1096173b00431f0cdc742894b15eef0..1a147d096ae81f4c8262f7bfc56bd19fc2dee932:/icuSources/test/cintltst/ccapitst.c?ds=inline diff --git a/icuSources/test/cintltst/ccapitst.c b/icuSources/test/cintltst/ccapitst.c index daa73bd5..22785b38 100644 --- a/icuSources/test/cintltst/ccapitst.c +++ b/icuSources/test/cintltst/ccapitst.c @@ -1,16 +1,18 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html /******************************************************************** * COPYRIGHT: - * Copyright (c) 1997-2006, International Business Machines Corporation and + * Copyright (c) 1997-2016, International Business Machines Corporation and * others. All Rights Reserved. ********************************************************************/ -/******************************************************************************* +/***************************************************************************** * -* File CU_CAPITST.C +* File ccapitst.c * * Modification History: * Name Description * Madhu Katragadda Ported for C API -******************************************************************************** +****************************************************************************** */ #include #include @@ -20,69 +22,24 @@ #include "unicode/ucnv.h" #include "unicode/ucnv_err.h" #include "unicode/putil.h" +#include "unicode/uset.h" #include "unicode/ustring.h" +#include "unicode/utf8.h" #include "ucnv_bld.h" /* for sizeof(UConverter) */ #include "cmemory.h" /* for UAlignedMemory */ #include "cintltst.h" #include "ccapitst.h" - -/* for not including "cstring.h" -begin*/ -#ifdef U_WINDOWS -# define ctest_stricmp(str1, str2) U_STANDARD_CPP_NAMESPACE _stricmp(str1, str2) -#elif defined(POSIX) -# define ctest_stricmp(str1, str2) U_STANDARD_CPP_NAMESPACE strcasecmp(str1, str2) -#else -# define ctest_stricmp(str1, str2) T_CString_stricmp(str1, str2) -#endif - -static int U_EXPORT2 -T_CString_stricmp(const char *str1, const char *str2) { - if(str1==NULL) { - if(str2==NULL) { - return 0; - } else { - return -1; - } - } else if(str2==NULL) { - return 1; - } else { - /* compare non-NULL strings lexically with lowercase */ - int rc; - unsigned char c1, c2; - for(;;) { - c1=(unsigned char)*str1; - c2=(unsigned char)*str2; - if(c1==0) { - if(c2==0) { - return 0; - } else { - return -1; - } - } else if(c2==0) { - return 1; - } else { - /* compare non-zero characters with lowercase */ - rc=(int)(unsigned char)tolower(c1)-(int)(unsigned char)tolower(c2); - if(rc!=0) { - return rc; - } - } - ++str1; - ++str2; - } - } -} -/* for not including "cstring.h" -end*/ - -#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) +#include "cstring.h" #define NUM_CODEPAGE 1 #define MAX_FILE_LEN 1024*20 #define UCS_FILE_NAME_SIZE 512 /*returns an action other than the one provided*/ +#if !UCONFIG_NO_LEGACY_CONVERSION static UConverterFromUCallback otherUnicodeAction(UConverterFromUCallback MIA); static UConverterToUCallback otherCharAction(UConverterToUCallback MIA); +#endif static UConverter * cnv_open(const char *name, UErrorCode *pErrorCode) { @@ -100,7 +57,9 @@ static void TestDuplicateAlias(void); static void TestCCSID(void); static void TestJ932(void); static void TestJ1968(void); +#if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION static void TestLMBCSMaxChar(void); +#endif #if !UCONFIG_NO_LEGACY_CONVERSION static void TestConvertSafeCloneCallback(void); @@ -108,13 +67,19 @@ static void TestConvertSafeCloneCallback(void); static void TestEBCDICSwapLFNL(void); static void TestConvertEx(void); +static void TestConvertExFromUTF8(void); +static void TestConvertExFromUTF8_C5F0(void); static void TestConvertAlgorithmic(void); void TestDefaultConverterError(void); /* defined in cctest.c */ + void TestDefaultConverterSet(void); /* defined in cctest.c */ static void TestToUCountPending(void); static void TestFromUCountPending(void); static void TestDefaultName(void); static void TestCompareNames(void); static void TestSubstString(void); +static void InvalidArguments(void); +static void TestGetName(void); +static void TestUTFBOM(void); void addTestConvert(TestNode** root); @@ -126,24 +91,32 @@ void addTestConvert(TestNode** root) addTest(root, &TestAlias, "tsconv/ccapitst/TestAlias"); addTest(root, &TestDuplicateAlias, "tsconv/ccapitst/TestDuplicateAlias"); addTest(root, &TestConvertSafeClone, "tsconv/ccapitst/TestConvertSafeClone"); - #if !UCONFIG_NO_LEGACY_CONVERSION addTest(root, &TestConvertSafeCloneCallback,"tsconv/ccapitst/TestConvertSafeCloneCallback"); #endif - addTest(root, &TestCCSID, "tsconv/ccapitst/TestCCSID"); addTest(root, &TestJ932, "tsconv/ccapitst/TestJ932"); addTest(root, &TestJ1968, "tsconv/ccapitst/TestJ1968"); +#if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION addTest(root, &TestLMBCSMaxChar, "tsconv/ccapitst/TestLMBCSMaxChar"); +#endif addTest(root, &TestEBCDICSwapLFNL, "tsconv/ccapitst/TestEBCDICSwapLFNL"); addTest(root, &TestConvertEx, "tsconv/ccapitst/TestConvertEx"); + addTest(root, &TestConvertExFromUTF8, "tsconv/ccapitst/TestConvertExFromUTF8"); + addTest(root, &TestConvertExFromUTF8_C5F0, "tsconv/ccapitst/TestConvertExFromUTF8_C5F0"); addTest(root, &TestConvertAlgorithmic, "tsconv/ccapitst/TestConvertAlgorithmic"); addTest(root, &TestDefaultConverterError, "tsconv/ccapitst/TestDefaultConverterError"); + addTest(root, &TestDefaultConverterSet, "tsconv/ccapitst/TestDefaultConverterSet"); +#if !UCONFIG_NO_FILE_IO addTest(root, &TestToUCountPending, "tsconv/ccapitst/TestToUCountPending"); addTest(root, &TestFromUCountPending, "tsconv/ccapitst/TestFromUCountPending"); +#endif addTest(root, &TestDefaultName, "tsconv/ccapitst/TestDefaultName"); addTest(root, &TestCompareNames, "tsconv/ccapitst/TestCompareNames"); addTest(root, &TestSubstString, "tsconv/ccapitst/TestSubstString"); + addTest(root, &InvalidArguments, "tsconv/ccapitst/InvalidArguments"); + addTest(root, &TestGetName, "tsconv/ccapitst/TestGetName"); + addTest(root, &TestUTFBOM, "tsconv/ccapitst/TestUTFBOM"); } static void ListNames(void) { @@ -157,7 +130,7 @@ static void ListNames(void) { log_verbose("Testing ucnv_openAllNames()..."); allNamesEnum = ucnv_openAllNames(&err); if(U_FAILURE(err)) { - log_err("FAILURE! ucnv_openAllNames() -> %s\n", myErrorName(err)); + log_data_err("FAILURE! ucnv_openAllNames() -> %s\n", myErrorName(err)); } else { const char *string = NULL; @@ -207,7 +180,7 @@ static void ListNames(void) { /* Test ucnv_countAliases() etc. */ count = ucnv_countAliases("utf-8", &err); if(U_FAILURE(err)) { - log_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %s\n", myErrorName(err)); + log_data_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %s\n", myErrorName(err)); } else if(count <= 0) { log_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %d aliases\n", count); } else { @@ -281,7 +254,6 @@ static void TestConvert() UChar* my_ucs_file_buffer; /* [MAX_FILE_LEN] */ UChar* my_ucs_file_buffer_1; int8_t ii = 0; - int32_t j = 0; uint16_t codepage_index = 0; int32_t cp = 0; UErrorCode err = U_ZERO_ERROR; @@ -443,7 +415,7 @@ static void TestConvert() const uint8_t source[]={ 0x00, 0x04, 0x05, 0x06, 0xa2, 0xb4, 0x00}; const uint8_t expectedTarget[]={ 0x00, 0x37, 0x2d, 0x2e, 0x0e, 0x49, 0x62, 0x0f, 0x00}; char *target=0; - sourceLimit=sizeof(source)/sizeof(source[0]); + sourceLimit=UPRV_LENGTHOF(source); err=U_ZERO_ERROR; targetLimit=0; @@ -478,14 +450,14 @@ static void TestConvert() } err=U_ILLEGAL_ARGUMENT_ERROR; - sourceLimit=sizeof(source)/sizeof(source[0]); + sourceLimit=UPRV_LENGTHOF(source); i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err); if(i !=0 ){ log_err("FAILURE! ucnv_convert() with err=U_ILLEGAL_ARGUMENT_ERROR is expected to return 0\n"); } err=U_ZERO_ERROR; - sourceLimit=sizeof(source)/sizeof(source[0]); + sourceLimit=UPRV_LENGTHOF(source); targetLimit=0; i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err); if(!(U_FAILURE(err) && err==U_BUFFER_OVERFLOW_ERROR)){ @@ -567,7 +539,7 @@ static void TestConvert() ucs_file_in = fopen(ucs_file_name,"rb"); if (!ucs_file_in) { - log_err("Couldn't open the Unicode file [%s]... Exiting...\n", ucs_file_name); + log_data_err("Couldn't open the Unicode file [%s]... Exiting...\n", ucs_file_name); return; } @@ -579,8 +551,8 @@ static void TestConvert() if (!myConverter || U_FAILURE(err)) { log_data_err("Error creating the ibm-949 converter - %s \n", u_errorName(err)); - - return; + fclose(ucs_file_in); + break; } /*testing for ucnv_getName() */ @@ -592,7 +564,7 @@ static void TestConvert() { log_verbose("getName o.k. %s\n", ucnv_getName(myConverter, &err)); } - if (ctest_stricmp(ucnv_getName(myConverter, &err), CodePagesToTest[codepage_index])) + if (uprv_stricmp(ucnv_getName(myConverter, &err), CodePagesToTest[codepage_index])) log_err("getName failed\n"); else log_verbose("getName ok\n"); @@ -866,16 +838,21 @@ static void TestConvert() /*Reads the BOM*/ - fread(&BOM, sizeof(UChar), 1, ucs_file_in); + { + // Note: gcc produces a compile warning if the return value from fread() is ignored. + size_t numRead = fread(&BOM, sizeof(UChar), 1, ucs_file_in); + (void)numRead; + } if (BOM!=0xFEFF && BOM!=0xFFFE) { log_err("File Missing BOM...Bailing!\n"); - return; + fclose(ucs_file_in); + break; } /*Reads in the file*/ - while(!feof(ucs_file_in)&&(i+=fread(ucs_file_buffer+i, sizeof(UChar), 1, ucs_file_in))) + while(!feof(ucs_file_in)&&(i+=(int32_t)fread(ucs_file_buffer+i, sizeof(UChar), 1, ucs_file_in))) { myUChar = ucs_file_buffer[i-1]; @@ -918,7 +895,7 @@ static void TestConvert() NULL, targetcapacity2, output_cp_buffer, - strlen(output_cp_buffer), + (int32_t)strlen(output_cp_buffer), &err); /*if there is an buffer overflow then trap the values and pass them and make the actual call*/ @@ -930,7 +907,7 @@ static void TestConvert() uchar2, targetsize+1, output_cp_buffer, - strlen(output_cp_buffer), + (int32_t)strlen(output_cp_buffer), &err); if(U_FAILURE(err)) @@ -970,12 +947,12 @@ static void TestConvert() log_err("\nFAILURE: ucnv_fromUChars with targetLength 0 is expected to fail and throw U_BUFFER_OVERFLOW_ERROR\n"); } /*toUChars with error conditions*/ - targetsize = ucnv_toUChars(myConverter, uchar2, targetsize, output_cp_buffer, strlen(output_cp_buffer), &err); + targetsize = ucnv_toUChars(myConverter, uchar2, targetsize, output_cp_buffer, (int32_t)strlen(output_cp_buffer), &err); if(targetsize != 0){ log_err("\nFAILURE: ucnv_toUChars with err != U_ZERO_ERROR is expected to fail and return 0\n"); } err=U_ZERO_ERROR; - targetsize = ucnv_toUChars(myConverter, uchar2, -1, output_cp_buffer, strlen(output_cp_buffer), &err); + targetsize = ucnv_toUChars(myConverter, uchar2, -1, output_cp_buffer, (int32_t)strlen(output_cp_buffer), &err); if(targetsize != 0 || err != U_ILLEGAL_ARGUMENT_ERROR){ log_err("\nFAILURE: ucnv_toUChars with targetsize < 0 is expected to throw U_ILLEGAL_ARGUMENT_ERROR and return 0\n"); } @@ -985,7 +962,7 @@ static void TestConvert() log_err("\nFAILURE: ucnv_toUChars with sourceLength 0 is expected to return 0\n"); } targetcapacity2=0; - targetsize = ucnv_toUChars(myConverter, NULL, targetcapacity2, output_cp_buffer, strlen(output_cp_buffer), &err); + targetsize = ucnv_toUChars(myConverter, NULL, targetcapacity2, output_cp_buffer, (int32_t)strlen(output_cp_buffer), &err); if (err != U_STRING_NOT_TERMINATED_WARNING) { log_err("\nFAILURE: ucnv_toUChars(targetLength)->%s instead of U_STRING_NOT_TERMINATED_WARNING\n", u_errorName(err)); @@ -996,7 +973,6 @@ static void TestConvert() /*testing for ucnv_fromUnicode() and ucnv_toUnicode() */ /*Clean up re-usable vars*/ - j=0; log_verbose("Testing ucnv_fromUnicode().....\n"); tmp_ucs_buf=ucs_file_buffer_use; ucnv_fromUnicode(myConverter, &mytarget_1, @@ -1007,6 +983,7 @@ static void TestConvert() TRUE, &err); consumedUni = (UChar*)tmp_consumedUni; + (void)consumedUni; /* Suppress set but not used warning. */ if (U_FAILURE(err)) { @@ -1073,16 +1050,17 @@ static void TestConvert() #endif } +#if !UCONFIG_NO_LEGACY_CONVERSION static UConverterFromUCallback otherUnicodeAction(UConverterFromUCallback MIA) { return (MIA==(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_STOP)?(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_SUBSTITUTE:(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_STOP; } - static UConverterToUCallback otherCharAction(UConverterToUCallback MIA) { return (MIA==(UConverterToUCallback)UCNV_TO_U_CALLBACK_STOP)?(UConverterToUCallback)UCNV_TO_U_CALLBACK_SUBSTITUTE:(UConverterToUCallback)UCNV_TO_U_CALLBACK_STOP; } +#endif static void TestFlushCache(void) { #if !UCONFIG_NO_LEGACY_CONVERSION @@ -1174,13 +1152,11 @@ static void TestAlias() { const char* ISO_2022_NAMES[] = {"ISO_2022,locale=ja,version=2", "ISO-2022-JP-2", "csISO2022JP2", "Iso-2022jP2", "isO-2022_Jp_2", "iSo--2022,locale=ja,version=2"}; - int32_t ISO_2022_NAMES_LENGTH = - sizeof(ISO_2022_NAMES) / sizeof(ISO_2022_NAMES[0]); + int32_t ISO_2022_NAMES_LENGTH = UPRV_LENGTHOF(ISO_2022_NAMES); const char *UTF8_NAMES[] = { "UTF-8", "utf-8", "utf8", "ibm-1208", "utf_8", "ibm1208", "cp1208" }; - int32_t UTF8_NAMES_LENGTH = - sizeof(UTF8_NAMES) / sizeof(UTF8_NAMES[0]); + int32_t UTF8_NAMES_LENGTH = UPRV_LENGTHOF(UTF8_NAMES); struct { const char *name; @@ -1192,7 +1168,7 @@ static void TestAlias() { { "UTF32_PlatformEndian", "UTF32_PlatformEndian" }, { "UTF-32", "ucs-4" } }; - int32_t CONVERTERS_NAMES_LENGTH = sizeof(CONVERTERS_NAMES) / sizeof(*CONVERTERS_NAMES); + int32_t CONVERTERS_NAMES_LENGTH = UPRV_LENGTHOF(CONVERTERS_NAMES); /* When there are bugs in gencnval or in ucnv_io, converters can appear to have no aliases. */ @@ -1221,7 +1197,7 @@ static void TestAlias() { if (strcmp(ucnv_getName(cnv, &status), name) != 0 && (strstr(name, "PlatformEndian") == 0 && strstr(name, "OppositeEndian") == 0)) { log_err("FAIL: Converter \"%s\" returned \"%s\" for getName. " - "The should be the same\n", + "They should be the same\n", name, ucnv_getName(cnv, &status)); } } @@ -1338,7 +1314,7 @@ static void TestDuplicateAlias(void) { status = U_ZERO_ERROR; alias = ucnv_getStandardName("Shift_JIS", "IBM", &status); if (alias == NULL || strcmp(alias, "ibm-943") != 0 || status != U_AMBIGUOUS_ALIAS_WARNING) { - log_err("FAIL: Didn't get ibm-943 for Shift_JIS {IBM}. Got %s\n", alias); + log_data_err("FAIL: Didn't get ibm-943 for Shift_JIS {IBM}. Got %s\n", alias); } status = U_ZERO_ERROR; alias = ucnv_getStandardName("ibm-943", "IANA", &status); @@ -1382,6 +1358,7 @@ static TSCCContext *TSCC_clone(TSCCContext *ctx) return newCtx; } +#if !UCONFIG_NO_LEGACY_CONVERSION static void TSCC_fromU(const void *context, UConverterFromUnicodeArgs *fromUArgs, const UChar* codeUnits, @@ -1429,7 +1406,6 @@ static void TSCC_fromU(const void *context, } } - static void TSCC_toU(const void *context, UConverterToUnicodeArgs *toUArgs, const char* codeUnits, @@ -1497,7 +1473,6 @@ static void TSCC_print_log(TSCCContext *q, const char *name) } } -#if !UCONFIG_NO_LEGACY_CONVERSION static void TestConvertSafeCloneCallback() { UErrorCode err = U_ZERO_ERROR; @@ -1513,7 +1488,7 @@ static void TestConvertSafeCloneCallback() conv1 = ucnv_open("iso-8859-3", &err); if(U_FAILURE(err)) { - log_data_err("Err opening iso-8859-3, %s", u_errorName(err)); + log_data_err("Err opening iso-8859-3, %s\n", u_errorName(err)); return; } @@ -1704,7 +1679,7 @@ static void TestConvertSafeClone() }; /* store the actual sizes of each converter */ - int32_t actualSizes[LENGTHOF(names)]; + int32_t actualSizes[UPRV_LENGTHOF(names)]; static const int32_t bufferSizes[] = { U_CNV_SAFECLONE_BUFFERSIZE, @@ -1721,17 +1696,17 @@ static void TestConvertSafeClone() char *pCharBuffer; const char *pConstCharBuffer; - const char *charBufferLimit = charBuffer + sizeof(charBuffer)/sizeof(*charBuffer); + const char *charBufferLimit = charBuffer + UPRV_LENGTHOF(charBuffer); UChar uniBuffer[] = {0x0058, 0x0059, 0x005A}; /* "XYZ" */ UChar uniCharBuffer[20]; char charSourceBuffer[] = { 0x1b, 0x24, 0x42 }; const char *pCharSource = charSourceBuffer; const char *pCharSourceLimit = charSourceBuffer + sizeof(charSourceBuffer); UChar *pUCharTarget = uniCharBuffer; - UChar *pUCharTargetLimit = uniCharBuffer + sizeof(uniCharBuffer)/sizeof(*uniCharBuffer); + UChar *pUCharTargetLimit = uniCharBuffer + UPRV_LENGTHOF(uniCharBuffer); const UChar * pUniBuffer; - const UChar *uniBufferLimit = uniBuffer + sizeof(uniBuffer)/sizeof(*uniBuffer); - int32_t index, j; + const UChar *uniBufferLimit = uniBuffer + UPRV_LENGTHOF(uniBuffer); + int32_t idx, j; err = U_ZERO_ERROR; cnv = ucnv_open(names[0], &err); @@ -1740,28 +1715,29 @@ static void TestConvertSafeClone() /* Null status - just returns NULL */ bufferSize = U_CNV_SAFECLONE_BUFFERSIZE; - if (0 != ucnv_safeClone(cnv, buffer[0], &bufferSize, 0)) + if (NULL != ucnv_safeClone(cnv, buffer[0], &bufferSize, NULL)) { log_err("FAIL: Cloned converter failed to deal correctly with null status\n"); } /* error status - should return 0 & keep error the same */ err = U_MEMORY_ALLOCATION_ERROR; - if (0 != ucnv_safeClone(cnv, buffer[0], &bufferSize, &err) || err != U_MEMORY_ALLOCATION_ERROR) + if (NULL != ucnv_safeClone(cnv, buffer[0], &bufferSize, &err) || err != U_MEMORY_ALLOCATION_ERROR) { log_err("FAIL: Cloned converter failed to deal correctly with incoming error status\n"); } err = U_ZERO_ERROR; - /* Null buffer size pointer - just returns NULL & set error to U_ILLEGAL_ARGUMENT_ERROR*/ - if (0 != ucnv_safeClone(cnv, buffer[0], 0, &err) || err != U_ILLEGAL_ARGUMENT_ERROR) + /* Null buffer size pointer is ok */ + if (NULL == (cnv2 = ucnv_safeClone(cnv, buffer[0], NULL, &err)) || U_FAILURE(err)) { log_err("FAIL: Cloned converter failed to deal correctly with null bufferSize pointer\n"); } + ucnv_close(cnv2); err = U_ZERO_ERROR; /* buffer size pointer is 0 - fill in pbufferSize with a size */ bufferSize = 0; - if (0 != ucnv_safeClone(cnv, buffer[0], &bufferSize, &err) || U_FAILURE(err) || bufferSize <= 0) + if (NULL != ucnv_safeClone(cnv, buffer[0], &bufferSize, &err) || U_FAILURE(err) || bufferSize <= 0) { log_err("FAIL: Cloned converter failed a sizing request ('preflighting')\n"); } @@ -1771,7 +1747,7 @@ static void TestConvertSafeClone() log_err("FAIL: Pre-calculated buffer size is too small\n"); } /* Verify we can use this run-time calculated size */ - if (0 == (cnv2 = ucnv_safeClone(cnv, buffer[0], &bufferSize, &err)) || U_FAILURE(err)) + if (NULL == (cnv2 = ucnv_safeClone(cnv, buffer[0], &bufferSize, &err)) || U_FAILURE(err)) { log_err("FAIL: Converter can't be cloned with run-time size\n"); } @@ -1781,7 +1757,7 @@ static void TestConvertSafeClone() /* size one byte too small - should allocate & let us know */ --bufferSize; - if (0 == (cnv2 = ucnv_safeClone(cnv, 0, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING) + if (NULL == (cnv2 = ucnv_safeClone(cnv, NULL, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING) { log_err("FAIL: Cloned converter failed to deal correctly with too-small buffer size\n"); } @@ -1793,7 +1769,7 @@ static void TestConvertSafeClone() bufferSize = U_CNV_SAFECLONE_BUFFERSIZE; /* Null buffer pointer - return converter & set error to U_SAFECLONE_ALLOCATED_ERROR */ - if (0 == (cnv2 = ucnv_safeClone(cnv, 0, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING) + if (NULL == (cnv2 = ucnv_safeClone(cnv, NULL, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING) { log_err("FAIL: Cloned converter failed to deal correctly with null buffer pointer\n"); } @@ -1804,7 +1780,7 @@ static void TestConvertSafeClone() err = U_ZERO_ERROR; /* Null converter - return NULL & set U_ILLEGAL_ARGUMENT_ERROR */ - if (0 != ucnv_safeClone(0, buffer[0], &bufferSize, &err) || err != U_ILLEGAL_ARGUMENT_ERROR) + if (NULL != ucnv_safeClone(NULL, buffer[0], &bufferSize, &err) || err != U_ILLEGAL_ARGUMENT_ERROR) { log_err("FAIL: Cloned converter failed to deal correctly with null converter pointer\n"); } @@ -1817,23 +1793,23 @@ static void TestConvertSafeClone() /* Do these cloned converters work at all - shuffle UChars to chars & back again..*/ - for(j = 0; j < LENGTHOF(bufferSizes); ++j) { - for (index = 0; index < LENGTHOF(names); index++) + for(j = 0; j < UPRV_LENGTHOF(bufferSizes); ++j) { + for (idx = 0; idx < UPRV_LENGTHOF(names); idx++) { err = U_ZERO_ERROR; - cnv = ucnv_open(names[index], &err); + cnv = ucnv_open(names[idx], &err); if(U_FAILURE(err)) { - log_data_err("ucnv_open(\"%s\") failed - %s\n", names[index], u_errorName(err)); + log_data_err("ucnv_open(\"%s\") failed - %s\n", names[idx], u_errorName(err)); continue; } if(j == 0) { /* preflight to get maxBufferSize */ - actualSizes[index] = 0; - ucnv_safeClone(cnv, NULL, &actualSizes[index], &err); - if(actualSizes[index] > maxBufferSize) { - maxBufferSize = actualSizes[index]; - maxName = names[index]; + actualSizes[idx] = 0; + ucnv_safeClone(cnv, NULL, &actualSizes[idx], &err); + if(actualSizes[idx] > maxBufferSize) { + maxBufferSize = actualSizes[idx]; + maxName = names[idx]; } } @@ -1845,10 +1821,10 @@ static void TestConvertSafeClone() /* close the original immediately to make sure that the clone works by itself */ ucnv_close(cnv); - if( actualSizes[index] <= (bufferSizes[j] - (int32_t)sizeof(UAlignedMemory)) && + if( actualSizes[idx] <= (bufferSizes[j] - (int32_t)sizeof(UAlignedMemory)) && err == U_SAFECLONE_ALLOCATED_WARNING ) { - log_err("ucnv_safeClone(%s) did a heap clone although the buffer was large enough\n", names[index]); + log_err("ucnv_safeClone(%s) did a heap clone although the buffer was large enough\n", names[idx]); } /* check if the clone function overwrote any bytes that it is not supposed to touch */ @@ -1858,13 +1834,13 @@ static void TestConvertSafeClone() containsAnyOtherByte(buffer[1]+bufferSize, (int32_t)(sizeof(buffer)-(sizeof(buffer[0])+bufferSize)), 0xaa) ) { log_err("cloning %s in a stack buffer overwrote bytes outside the bufferSize %d (requested %d)\n", - names[index], bufferSize, bufferSizes[j]); + names[idx], bufferSize, bufferSizes[j]); } } else { /* heap-allocated the clone */ if(containsAnyOtherByte(buffer[0], (int32_t)sizeof(buffer), 0xaa)) { log_err("cloning %s used the heap (bufferSize %d, requested %d) but overwrote stack buffer bytes\n", - names[index], bufferSize, bufferSizes[j]); + names[idx], bufferSize, bufferSizes[j]); } } @@ -1920,7 +1896,7 @@ static void TestCCSID() { int32_t ccsids[]={ 37, 850, 943, 949, 950, 1047, 1252, 1392, 33722 }; int32_t i, ccsid; - for(i=0; i<(int32_t)(sizeof(ccsids)/sizeof(int32_t)); ++i) { + for(i=0; iiso-8859-1: got preflighting size %d instead of 10\n", size); } +#if !UCONFIG_ONLY_HTML_CONVERSION err = U_ZERO_ERROR; /* do the conversion */ size = ucnv_convert("UTF-32BE", /* out */ @@ -2086,6 +2066,7 @@ static void bug2() /* bug2: size is 5, should be 12 */ log_err("error j932 bug 2 UTF-32BE->UTF-8: got preflighting size %d instead of 12\n", size); } +#endif } /* @@ -2094,9 +2075,9 @@ static void bug2() */ static void bug3() { -#if !UCONFIG_NO_LEGACY_CONVERSION - static char char_in[CHUNK_SIZE*4]; - static char target[5]; +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION + char char_in[CHUNK_SIZE*4]; + char target[5]; UErrorCode err = U_ZERO_ERROR; int32_t size; @@ -2205,6 +2186,11 @@ convertExStreaming(UConverter *srcCnv, UConverter *targetCnv, pivotBuffer, &pivotSource, &pivotTarget, pivotLimit, FALSE, flush, &errorCode); targetLength=(int32_t)(target-targetBuffer); + if(target>targetLimit) { + log_err("ucnv_convertEx(%s) chunk[%d] target %p exceeds targetLimit %p\n", + testName, chunkSize, target, targetLimit); + break; /* TODO: major problem! */ + } if(errorCode==U_BUFFER_OVERFLOW_ERROR) { /* continue converting another chunk */ errorCode=U_ZERO_ERROR; @@ -2402,6 +2388,377 @@ static void TestConvertEx() { #endif } +/* Test illegal UTF-8 input: Data and functions for TestConvertExFromUTF8(). */ +static const char *const badUTF8[]={ + /* trail byte */ + "\x80", + + /* truncated multi-byte sequences */ + "\xd0", + "\xe0", + "\xe1", + "\xed", + "\xee", + "\xf0", + "\xf1", + "\xf4", + "\xf8", + "\xfc", + + "\xe0\x80", + "\xe0\xa0", + "\xe1\x80", + "\xed\x80", + "\xed\xa0", + "\xee\x80", + "\xf0\x80", + "\xf0\x90", + "\xf1\x80", + "\xf4\x80", + "\xf4\x90", + "\xf8\x80", + "\xfc\x80", + + "\xf0\x80\x80", + "\xf0\x90\x80", + "\xf1\x80\x80", + "\xf4\x80\x80", + "\xf4\x90\x80", + "\xf8\x80\x80", + "\xfc\x80\x80", + + "\xf8\x80\x80\x80", + "\xfc\x80\x80\x80", + + "\xfc\x80\x80\x80\x80", + + /* complete sequences but non-shortest forms or out of range etc. */ + "\xc0\x80", + "\xe0\x80\x80", + "\xed\xa0\x80", + "\xf0\x80\x80\x80", + "\xf4\x90\x80\x80", + "\xf8\x80\x80\x80\x80", + "\xfc\x80\x80\x80\x80\x80", + "\xfe", + "\xff" +}; + +#define ARG_CHAR_ARR_SIZE 8 + +/* get some character that can be converted and convert it */ +static UBool getTestChar(UConverter *cnv, const char *converterName, + char charUTF8[4], int32_t *pCharUTF8Length, + char char0[ARG_CHAR_ARR_SIZE], int32_t *pChar0Length, + char char1[ARG_CHAR_ARR_SIZE], int32_t *pChar1Length) { + UChar utf16[U16_MAX_LENGTH]; + int32_t utf16Length; + + const UChar *utf16Source; + char *target; + + USet *set; + UChar32 c; + UErrorCode errorCode; + + errorCode=U_ZERO_ERROR; + set=uset_open(1, 0); + ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode); + c=uset_charAt(set, uset_size(set)/2); + uset_close(set); + + utf16Length=0; + U16_APPEND_UNSAFE(utf16, utf16Length, c); + *pCharUTF8Length=0; + U8_APPEND_UNSAFE(charUTF8, *pCharUTF8Length, c); + + utf16Source=utf16; + target=char0; + ucnv_fromUnicode(cnv, + &target, char0+ARG_CHAR_ARR_SIZE, + &utf16Source, utf16+utf16Length, + NULL, FALSE, &errorCode); + *pChar0Length=(int32_t)(target-char0); + + utf16Source=utf16; + target=char1; + ucnv_fromUnicode(cnv, + &target, char1+ARG_CHAR_ARR_SIZE, + &utf16Source, utf16+utf16Length, + NULL, FALSE, &errorCode); + *pChar1Length=(int32_t)(target-char1); + + if(U_FAILURE(errorCode)) { + log_err("unable to get test character for %s - %s\n", converterName, u_errorName(errorCode)); + return FALSE; + } + return TRUE; +} + +static UBool isOneTruncatedUTF8(const char *s, int32_t length) { + if(length==0) { + return FALSE; + } else if(length==1) { + return U8_IS_LEAD(s[0]); + } else { + int32_t count=U8_COUNT_TRAIL_BYTES(s[0]); + if(length<=count) { + // 2 or more bytes, but fewer than the lead byte indicates. + int32_t oneLength=0; + U8_FWD_1(s, oneLength, length); + // Truncated if we reach the end of the string. + // Not true if the lead byte and first trail byte do not start a valid sequence, + // e.g., E0 80 -> oneLength=1. + return oneLength==length; + } + return FALSE; + } +} + +static void testFromTruncatedUTF8(UConverter *utf8Cnv, UConverter *cnv, const char *converterName, + char charUTF8[4], int32_t charUTF8Length, + char char0[8], int32_t char0Length, + char char1[8], int32_t char1Length) { + char utf8[16]; + int32_t utf8Length; + + char output[16]; + int32_t outputLength; + + char invalidChars[8]; + int8_t invalidLength; + + const char *source; + char *target; + + UChar pivotBuffer[8]; + UChar *pivotSource, *pivotTarget; + + UErrorCode errorCode; + int32_t i; + + /* test truncated sequences */ + errorCode=U_ZERO_ERROR; + ucnv_setToUCallBack(utf8Cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); + + memcpy(utf8, charUTF8, charUTF8Length); + + for(i=0; i %s/decimal NCRs) failed\n", converterNames[i]); + } + ucnv_close(cnv); + } + ucnv_close(utf8Cnv); +} + static void TestConvertAlgorithmic() { #if !UCONFIG_NO_LEGACY_CONVERSION @@ -2422,10 +2779,12 @@ TestConvertAlgorithmic() { /*},*/ utf16[]={ 0xfe, 0xff /* BOM only, no text */ - }, - utf32[]={ + }; +#if !UCONFIG_ONLY_HTML_CONVERSION + static const uint8_t utf32[]={ 0xff, 0xfe, 0, 0 /* BOM only, no text */ }; +#endif char target[100], utf8NUL[100], shiftJISNUL[100]; @@ -2495,6 +2854,7 @@ TestConvertAlgorithmic() { u_errorName(errorCode), length); } +#if !UCONFIG_ONLY_HTML_CONVERSION errorCode=U_ZERO_ERROR; length=ucnv_fromAlgorithmic(cnv, UCNV_UTF32, target, 0, (const char *)utf32, 4, &errorCode); if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || @@ -2503,6 +2863,7 @@ TestConvertAlgorithmic() { log_err("ucnv_fromAlgorithmic(UTF-32 only BOM -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect 0\n", u_errorName(errorCode), length); } +#endif /* bad arguments */ errorCode=U_MESSAGE_PARSE_ERROR; @@ -2528,6 +2889,7 @@ ucnv_close(cnv); #endif } +#if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION static void TestLMBCSMaxChar(void) { static const struct { int8_t maxSize; @@ -2559,7 +2921,7 @@ static void TestLMBCSMaxChar(void) { { 4, "HZ"}, { 3, "ISO-2022"}, - { 3, "ISO-2022-KR"}, + { 8, "ISO-2022-KR"}, { 6, "ISO-2022-JP"}, { 8, "ISO-2022-CN"}, @@ -2579,7 +2941,7 @@ static void TestLMBCSMaxChar(void) { }; int32_t idx; - for (idx = 0; idx < LENGTHOF(converter); idx++) { + for (idx = 0; idx < UPRV_LENGTHOF(converter); idx++) { UErrorCode status = U_ZERO_ERROR; UConverter *cnv = cnv_open(converter[idx].name, &status); if (U_FAILURE(status)) { @@ -2597,7 +2959,7 @@ static void TestLMBCSMaxChar(void) { log_err("error UCNV_GET_MAX_BYTES_FOR_STRING(1, 2)<10\n"); } } - +#endif static void TestJ1968(void) { UErrorCode err = U_ZERO_ERROR; @@ -2741,12 +3103,12 @@ testSwap(const char *name, UBool swap) { /* convert to EBCDIC */ pcu=text; pc=normal; - ucnv_fromUnicode(cnv, &pc, normal+LENGTHOF(normal), &pcu, text+LENGTHOF(text), NULL, TRUE, &errorCode); + ucnv_fromUnicode(cnv, &pc, normal+UPRV_LENGTHOF(normal), &pcu, text+UPRV_LENGTHOF(text), NULL, TRUE, &errorCode); normalLength=(int32_t)(pc-normal); pcu=text; pc=swapped; - ucnv_fromUnicode(swapCnv, &pc, swapped+LENGTHOF(swapped), &pcu, text+LENGTHOF(text), NULL, TRUE, &errorCode); + ucnv_fromUnicode(swapCnv, &pc, swapped+UPRV_LENGTHOF(swapped), &pcu, text+UPRV_LENGTHOF(text), NULL, TRUE, &errorCode); swappedLength=(int32_t)(pc-swapped); if(U_FAILURE(errorCode)) { @@ -2779,12 +3141,12 @@ testSwap(const char *name, UBool swap) { /* convert back to Unicode (may not roundtrip) */ pc=normal; pu=uNormal; - ucnv_toUnicode(cnv, &pu, uNormal+LENGTHOF(uNormal), (const char **)&pc, normal+normalLength, NULL, TRUE, &errorCode); + ucnv_toUnicode(cnv, &pu, uNormal+UPRV_LENGTHOF(uNormal), (const char **)&pc, normal+normalLength, NULL, TRUE, &errorCode); normalLength=(int32_t)(pu-uNormal); pc=normal; pu=uSwapped; - ucnv_toUnicode(swapCnv, &pu, uSwapped+LENGTHOF(uSwapped), (const char **)&pc, normal+swappedLength, NULL, TRUE, &errorCode); + ucnv_toUnicode(swapCnv, &pu, uSwapped+UPRV_LENGTHOF(uSwapped), (const char **)&pc, normal+swappedLength, NULL, TRUE, &errorCode); swappedLength=(int32_t)(pu-uSwapped); if(U_FAILURE(errorCode)) { @@ -2835,7 +3197,7 @@ TestEBCDICSwapLFNL() { int i; - for(i=0; i than ucharsPtr */ + ucnv_fromUnicode(cnv, &charsPtr, charsPtr, (const UChar **)&ucharsBadPtr, ucharsPtr, NULL, TRUE, &errorCode); + if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) { + log_err("ucnv_fromUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for bad limit pointer - %s\n", u_errorName(errorCode)); + } + + errorCode=U_ZERO_ERROR; + /* This one should fail because an incomplete UChar is being passed in */ + ucnv_toUnicode(cnv, &ucharsPtr, ucharsBadPtr, (const char **)&charsPtr, charsPtr, NULL, TRUE, &errorCode); + if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) { + log_err("ucnv_toUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for incomplete UChar * buffer - %s\n", u_errorName(errorCode)); + } + + errorCode=U_ZERO_ERROR; + /* This one should fail because ucharsBadPtr is > than ucharsPtr */ + ucnv_toUnicode(cnv, &ucharsBadPtr, ucharsPtr, (const char **)&charsPtr, charsPtr, NULL, TRUE, &errorCode); + if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) { + log_err("ucnv_toUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for bad limit pointer - %s\n", u_errorName(errorCode)); + } + + if (charBuffer[0] != 1 || charBuffer[1] != 1 + || ucharAsCharBuffer[0] != 2 || ucharAsCharBuffer[1] != 2) + { + log_err("Data was incorrectly written to buffers\n"); + } + + ucnv_close(cnv); +} + +static void TestGetName() { + static const char *const names[] = { + "Unicode", "UTF-16", + "UnicodeBigUnmarked", "UTF-16BE", + "UnicodeBig", "UTF-16BE,version=1", + "UnicodeLittleUnmarked", "UTF-16LE", + "UnicodeLittle", "UTF-16LE,version=1", + "x-UTF-16LE-BOM", "UTF-16LE,version=1" + }; + int32_t i; + for(i = 0; i < UPRV_LENGTHOF(names); i += 2) { + UErrorCode errorCode = U_ZERO_ERROR; + UConverter *cnv = ucnv_open(names[i], &errorCode); + if(U_SUCCESS(errorCode)) { + const char *name = ucnv_getName(cnv, &errorCode); + if(U_FAILURE(errorCode) || 0 != strcmp(name, names[i+1])) { + log_err("ucnv_getName(%s) = %s != %s -- %s\n", + names[i], name, names[i+1], u_errorName(errorCode)); + } + ucnv_close(cnv); + } + } +} + +static void TestUTFBOM() { + static const UChar a16[] = { 0x61 }; + static const char *const names[] = { + "UTF-16", + "UTF-16,version=1", + "UTF-16BE", + "UnicodeBig", + "UTF-16LE", + "UnicodeLittle" + }; + static const uint8_t expected[][5] = { +#if U_IS_BIG_ENDIAN + { 4, 0xfe, 0xff, 0, 0x61 }, + { 4, 0xfe, 0xff, 0, 0x61 }, +#else + { 4, 0xff, 0xfe, 0x61, 0 }, + { 4, 0xff, 0xfe, 0x61, 0 }, +#endif + + { 2, 0, 0x61 }, + { 4, 0xfe, 0xff, 0, 0x61 }, + + { 2, 0x61, 0 }, + { 4, 0xff, 0xfe, 0x61, 0 } + }; + + char bytes[10]; + int32_t i; + + for(i = 0; i < UPRV_LENGTHOF(names); ++i) { + UErrorCode errorCode = U_ZERO_ERROR; + UConverter *cnv = ucnv_open(names[i], &errorCode); + int32_t length = 0; + const uint8_t *exp = expected[i]; + if (U_FAILURE(errorCode)) { + log_err_status(errorCode, "Unable to open converter: %s got error code: %s\n", names[i], u_errorName(errorCode)); + continue; + } + length = ucnv_fromUChars(cnv, bytes, (int32_t)sizeof(bytes), a16, 1, &errorCode); + + if(U_FAILURE(errorCode) || length != exp[0] || 0 != memcmp(bytes, exp+1, length)) { + log_err("unexpected %s BOM writing behavior -- %s\n", + names[i], u_errorName(errorCode)); + } + ucnv_close(cnv); + } +}