/********************************************************************
* COPYRIGHT:
- * Copyright (c) 1997-2006, International Business Machines Corporation and
+ * Copyright (c) 1997-2012, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
-/*******************************************************************************
+/*****************************************************************************
*
* File CU_CAPITST.C
*
* Modification History:
* Name Description
* Madhu Katragadda Ported for C API
-********************************************************************************
+******************************************************************************
*/
#include <stdio.h>
#include <stdlib.h>
#include "unicode/ucnv.h"
#include "unicode/ucnv_err.h"
#include "unicode/putil.h"
+#include "unicode/uset.h"
#include "unicode/ustring.h"
#include "ucnv_bld.h" /* for sizeof(UConverter) */
#include "cmemory.h" /* for UAlignedMemory */
#include "cintltst.h"
#include "ccapitst.h"
-
-/* for not including "cstring.h" -begin*/
-#ifdef U_WINDOWS
-# define ctest_stricmp(str1, str2) U_STANDARD_CPP_NAMESPACE _stricmp(str1, str2)
-#elif defined(POSIX)
-# define ctest_stricmp(str1, str2) U_STANDARD_CPP_NAMESPACE strcasecmp(str1, str2)
-#else
-# define ctest_stricmp(str1, str2) T_CString_stricmp(str1, str2)
-#endif
-
-static int U_EXPORT2
-T_CString_stricmp(const char *str1, const char *str2) {
- if(str1==NULL) {
- if(str2==NULL) {
- return 0;
- } else {
- return -1;
- }
- } else if(str2==NULL) {
- return 1;
- } else {
- /* compare non-NULL strings lexically with lowercase */
- int rc;
- unsigned char c1, c2;
- for(;;) {
- c1=(unsigned char)*str1;
- c2=(unsigned char)*str2;
- if(c1==0) {
- if(c2==0) {
- return 0;
- } else {
- return -1;
- }
- } else if(c2==0) {
- return 1;
- } else {
- /* compare non-zero characters with lowercase */
- rc=(int)(unsigned char)tolower(c1)-(int)(unsigned char)tolower(c2);
- if(rc!=0) {
- return rc;
- }
- }
- ++str1;
- ++str2;
- }
- }
-}
-/* for not including "cstring.h" -end*/
+#include "cstring.h"
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
#define UCS_FILE_NAME_SIZE 512
/*returns an action other than the one provided*/
+#if !UCONFIG_NO_LEGACY_CONVERSION
static UConverterFromUCallback otherUnicodeAction(UConverterFromUCallback MIA);
static UConverterToUCallback otherCharAction(UConverterToUCallback MIA);
+#endif
static UConverter *
cnv_open(const char *name, UErrorCode *pErrorCode) {
static void TestCCSID(void);
static void TestJ932(void);
static void TestJ1968(void);
+#if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
static void TestLMBCSMaxChar(void);
+#endif
#if !UCONFIG_NO_LEGACY_CONVERSION
static void TestConvertSafeCloneCallback(void);
static void TestEBCDICSwapLFNL(void);
static void TestConvertEx(void);
+static void TestConvertExFromUTF8(void);
+static void TestConvertExFromUTF8_C5F0(void);
static void TestConvertAlgorithmic(void);
void TestDefaultConverterError(void); /* defined in cctest.c */
+ void TestDefaultConverterSet(void); /* defined in cctest.c */
static void TestToUCountPending(void);
static void TestFromUCountPending(void);
static void TestDefaultName(void);
static void TestCompareNames(void);
static void TestSubstString(void);
+static void InvalidArguments(void);
+static void TestGetName(void);
+static void TestUTFBOM(void);
void addTestConvert(TestNode** root);
addTest(root, &TestAlias, "tsconv/ccapitst/TestAlias");
addTest(root, &TestDuplicateAlias, "tsconv/ccapitst/TestDuplicateAlias");
addTest(root, &TestConvertSafeClone, "tsconv/ccapitst/TestConvertSafeClone");
-
#if !UCONFIG_NO_LEGACY_CONVERSION
addTest(root, &TestConvertSafeCloneCallback,"tsconv/ccapitst/TestConvertSafeCloneCallback");
#endif
-
addTest(root, &TestCCSID, "tsconv/ccapitst/TestCCSID");
addTest(root, &TestJ932, "tsconv/ccapitst/TestJ932");
addTest(root, &TestJ1968, "tsconv/ccapitst/TestJ1968");
+#if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
addTest(root, &TestLMBCSMaxChar, "tsconv/ccapitst/TestLMBCSMaxChar");
+#endif
addTest(root, &TestEBCDICSwapLFNL, "tsconv/ccapitst/TestEBCDICSwapLFNL");
addTest(root, &TestConvertEx, "tsconv/ccapitst/TestConvertEx");
+ addTest(root, &TestConvertExFromUTF8, "tsconv/ccapitst/TestConvertExFromUTF8");
+ addTest(root, &TestConvertExFromUTF8_C5F0, "tsconv/ccapitst/TestConvertExFromUTF8_C5F0");
addTest(root, &TestConvertAlgorithmic, "tsconv/ccapitst/TestConvertAlgorithmic");
addTest(root, &TestDefaultConverterError, "tsconv/ccapitst/TestDefaultConverterError");
+ addTest(root, &TestDefaultConverterSet, "tsconv/ccapitst/TestDefaultConverterSet");
+#if !UCONFIG_NO_FILE_IO
addTest(root, &TestToUCountPending, "tsconv/ccapitst/TestToUCountPending");
addTest(root, &TestFromUCountPending, "tsconv/ccapitst/TestFromUCountPending");
+#endif
addTest(root, &TestDefaultName, "tsconv/ccapitst/TestDefaultName");
addTest(root, &TestCompareNames, "tsconv/ccapitst/TestCompareNames");
addTest(root, &TestSubstString, "tsconv/ccapitst/TestSubstString");
+ addTest(root, &InvalidArguments, "tsconv/ccapitst/InvalidArguments");
+ addTest(root, &TestGetName, "tsconv/ccapitst/TestGetName");
+ addTest(root, &TestUTFBOM, "tsconv/ccapitst/TestUTFBOM");
}
static void ListNames(void) {
log_verbose("Testing ucnv_openAllNames()...");
allNamesEnum = ucnv_openAllNames(&err);
if(U_FAILURE(err)) {
- log_err("FAILURE! ucnv_openAllNames() -> %s\n", myErrorName(err));
+ log_data_err("FAILURE! ucnv_openAllNames() -> %s\n", myErrorName(err));
}
else {
const char *string = NULL;
/* Test ucnv_countAliases() etc. */
count = ucnv_countAliases("utf-8", &err);
if(U_FAILURE(err)) {
- log_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %s\n", myErrorName(err));
+ log_data_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %s\n", myErrorName(err));
} else if(count <= 0) {
log_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %d aliases\n", count);
} else {
ucs_file_in = fopen(ucs_file_name,"rb");
if (!ucs_file_in)
{
- log_err("Couldn't open the Unicode file [%s]... Exiting...\n", ucs_file_name);
+ log_data_err("Couldn't open the Unicode file [%s]... Exiting...\n", ucs_file_name);
return;
}
if (!myConverter || U_FAILURE(err))
{
log_data_err("Error creating the ibm-949 converter - %s \n", u_errorName(err));
-
- return;
+ fclose(ucs_file_in);
+ break;
}
/*testing for ucnv_getName() */
{
log_verbose("getName o.k. %s\n", ucnv_getName(myConverter, &err));
}
- if (ctest_stricmp(ucnv_getName(myConverter, &err), CodePagesToTest[codepage_index]))
+ if (uprv_stricmp(ucnv_getName(myConverter, &err), CodePagesToTest[codepage_index]))
log_err("getName failed\n");
else
log_verbose("getName ok\n");
if (BOM!=0xFEFF && BOM!=0xFFFE)
{
log_err("File Missing BOM...Bailing!\n");
- return;
+ fclose(ucs_file_in);
+ break;
}
#endif
}
+#if !UCONFIG_NO_LEGACY_CONVERSION
static UConverterFromUCallback otherUnicodeAction(UConverterFromUCallback MIA)
{
return (MIA==(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_STOP)?(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_SUBSTITUTE:(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_STOP;
}
-
static UConverterToUCallback otherCharAction(UConverterToUCallback MIA)
{
return (MIA==(UConverterToUCallback)UCNV_TO_U_CALLBACK_STOP)?(UConverterToUCallback)UCNV_TO_U_CALLBACK_SUBSTITUTE:(UConverterToUCallback)UCNV_TO_U_CALLBACK_STOP;
}
+#endif
static void TestFlushCache(void) {
#if !UCONFIG_NO_LEGACY_CONVERSION
status = U_ZERO_ERROR;
alias = ucnv_getStandardName("Shift_JIS", "IBM", &status);
if (alias == NULL || strcmp(alias, "ibm-943") != 0 || status != U_AMBIGUOUS_ALIAS_WARNING) {
- log_err("FAIL: Didn't get ibm-943 for Shift_JIS {IBM}. Got %s\n", alias);
+ log_data_err("FAIL: Didn't get ibm-943 for Shift_JIS {IBM}. Got %s\n", alias);
}
status = U_ZERO_ERROR;
alias = ucnv_getStandardName("ibm-943", "IANA", &status);
return newCtx;
}
+#if !UCONFIG_NO_LEGACY_CONVERSION
static void TSCC_fromU(const void *context,
UConverterFromUnicodeArgs *fromUArgs,
const UChar* codeUnits,
}
}
-
static void TSCC_toU(const void *context,
UConverterToUnicodeArgs *toUArgs,
const char* codeUnits,
}
}
-#if !UCONFIG_NO_LEGACY_CONVERSION
static void TestConvertSafeCloneCallback()
{
UErrorCode err = U_ZERO_ERROR;
conv1 = ucnv_open("iso-8859-3", &err);
if(U_FAILURE(err)) {
- log_data_err("Err opening iso-8859-3, %s", u_errorName(err));
+ log_data_err("Err opening iso-8859-3, %s\n", u_errorName(err));
return;
}
UChar *pUCharTargetLimit = uniCharBuffer + sizeof(uniCharBuffer)/sizeof(*uniCharBuffer);
const UChar * pUniBuffer;
const UChar *uniBufferLimit = uniBuffer + sizeof(uniBuffer)/sizeof(*uniBuffer);
- int32_t index, j;
+ int32_t idx, j;
err = U_ZERO_ERROR;
cnv = ucnv_open(names[0], &err);
/* Do these cloned converters work at all - shuffle UChars to chars & back again..*/
for(j = 0; j < LENGTHOF(bufferSizes); ++j) {
- for (index = 0; index < LENGTHOF(names); index++)
+ for (idx = 0; idx < LENGTHOF(names); idx++)
{
err = U_ZERO_ERROR;
- cnv = ucnv_open(names[index], &err);
+ cnv = ucnv_open(names[idx], &err);
if(U_FAILURE(err)) {
- log_data_err("ucnv_open(\"%s\") failed - %s\n", names[index], u_errorName(err));
+ log_data_err("ucnv_open(\"%s\") failed - %s\n", names[idx], u_errorName(err));
continue;
}
if(j == 0) {
/* preflight to get maxBufferSize */
- actualSizes[index] = 0;
- ucnv_safeClone(cnv, NULL, &actualSizes[index], &err);
- if(actualSizes[index] > maxBufferSize) {
- maxBufferSize = actualSizes[index];
- maxName = names[index];
+ actualSizes[idx] = 0;
+ ucnv_safeClone(cnv, NULL, &actualSizes[idx], &err);
+ if(actualSizes[idx] > maxBufferSize) {
+ maxBufferSize = actualSizes[idx];
+ maxName = names[idx];
}
}
/* close the original immediately to make sure that the clone works by itself */
ucnv_close(cnv);
- if( actualSizes[index] <= (bufferSizes[j] - (int32_t)sizeof(UAlignedMemory)) &&
+ if( actualSizes[idx] <= (bufferSizes[j] - (int32_t)sizeof(UAlignedMemory)) &&
err == U_SAFECLONE_ALLOCATED_WARNING
) {
- log_err("ucnv_safeClone(%s) did a heap clone although the buffer was large enough\n", names[index]);
+ log_err("ucnv_safeClone(%s) did a heap clone although the buffer was large enough\n", names[idx]);
}
/* check if the clone function overwrote any bytes that it is not supposed to touch */
containsAnyOtherByte(buffer[1]+bufferSize, (int32_t)(sizeof(buffer)-(sizeof(buffer[0])+bufferSize)), 0xaa)
) {
log_err("cloning %s in a stack buffer overwrote bytes outside the bufferSize %d (requested %d)\n",
- names[index], bufferSize, bufferSizes[j]);
+ names[idx], bufferSize, bufferSizes[j]);
}
} else {
/* heap-allocated the clone */
if(containsAnyOtherByte(buffer[0], (int32_t)sizeof(buffer), 0xaa)) {
log_err("cloning %s used the heap (bufferSize %d, requested %d) but overwrote stack buffer bytes\n",
- names[index], bufferSize, bufferSizes[j]);
+ names[idx], bufferSize, bufferSizes[j]);
}
}
static void bug1()
{
#if !UCONFIG_NO_LEGACY_CONVERSION
- static char char_in[CHUNK_SIZE+32];
- static char char_out[CHUNK_SIZE*2];
+ char char_in[CHUNK_SIZE+32];
+ char char_out[CHUNK_SIZE*2];
/* GB 18030 equivalent of U+10000 is 90308130 */
static const char test_seq[]={ (char)0x90u, 0x30, (char)0x81u, 0x30 };
static void bug3()
{
#if !UCONFIG_NO_LEGACY_CONVERSION
- static char char_in[CHUNK_SIZE*4];
- static char target[5];
+ char char_in[CHUNK_SIZE*4];
+ char target[5];
UErrorCode err = U_ZERO_ERROR;
int32_t size;
pivotBuffer, &pivotSource, &pivotTarget, pivotLimit,
FALSE, flush, &errorCode);
targetLength=(int32_t)(target-targetBuffer);
+ if(target>targetLimit) {
+ log_err("ucnv_convertEx(%s) chunk[%d] target %p exceeds targetLimit %p\n",
+ testName, chunkSize, target, targetLimit);
+ break; /* TODO: major problem! */
+ }
if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
/* continue converting another chunk */
errorCode=U_ZERO_ERROR;
#endif
}
+/* Test illegal UTF-8 input: Data and functions for TestConvertExFromUTF8(). */
+static const char *const badUTF8[]={
+ /* trail byte */
+ "\x80",
+
+ /* truncated multi-byte sequences */
+ "\xd0",
+ "\xe0",
+ "\xe1",
+ "\xed",
+ "\xee",
+ "\xf0",
+ "\xf1",
+ "\xf4",
+ "\xf8",
+ "\xfc",
+
+ "\xe0\x80",
+ "\xe0\xa0",
+ "\xe1\x80",
+ "\xed\x80",
+ "\xed\xa0",
+ "\xee\x80",
+ "\xf0\x80",
+ "\xf0\x90",
+ "\xf1\x80",
+ "\xf4\x80",
+ "\xf4\x90",
+ "\xf8\x80",
+ "\xfc\x80",
+
+ "\xf0\x80\x80",
+ "\xf0\x90\x80",
+ "\xf1\x80\x80",
+ "\xf4\x80\x80",
+ "\xf4\x90\x80",
+ "\xf8\x80\x80",
+ "\xfc\x80\x80",
+
+ "\xf8\x80\x80\x80",
+ "\xfc\x80\x80\x80",
+
+ "\xfc\x80\x80\x80\x80",
+
+ /* complete sequences but non-shortest forms or out of range etc. */
+ "\xc0\x80",
+ "\xe0\x80\x80",
+ "\xed\xa0\x80",
+ "\xf0\x80\x80\x80",
+ "\xf4\x90\x80\x80",
+ "\xf8\x80\x80\x80\x80",
+ "\xfc\x80\x80\x80\x80\x80",
+ "\xfe",
+ "\xff"
+};
+
+#define ARG_CHAR_ARR_SIZE 8
+
+/* get some character that can be converted and convert it */
+static UBool getTestChar(UConverter *cnv, const char *converterName,
+ char charUTF8[4], int32_t *pCharUTF8Length,
+ char char0[ARG_CHAR_ARR_SIZE], int32_t *pChar0Length,
+ char char1[ARG_CHAR_ARR_SIZE], int32_t *pChar1Length) {
+ UChar utf16[U16_MAX_LENGTH];
+ int32_t utf16Length;
+
+ const UChar *utf16Source;
+ char *target;
+
+ USet *set;
+ UChar32 c;
+ UErrorCode errorCode;
+
+ errorCode=U_ZERO_ERROR;
+ set=uset_open(1, 0);
+ ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode);
+ c=uset_charAt(set, uset_size(set)/2);
+ uset_close(set);
+
+ utf16Length=0;
+ U16_APPEND_UNSAFE(utf16, utf16Length, c);
+ *pCharUTF8Length=0;
+ U8_APPEND_UNSAFE(charUTF8, *pCharUTF8Length, c);
+
+ utf16Source=utf16;
+ target=char0;
+ ucnv_fromUnicode(cnv,
+ &target, char0+ARG_CHAR_ARR_SIZE,
+ &utf16Source, utf16+utf16Length,
+ NULL, FALSE, &errorCode);
+ *pChar0Length=(int32_t)(target-char0);
+
+ utf16Source=utf16;
+ target=char1;
+ ucnv_fromUnicode(cnv,
+ &target, char1+ARG_CHAR_ARR_SIZE,
+ &utf16Source, utf16+utf16Length,
+ NULL, FALSE, &errorCode);
+ *pChar1Length=(int32_t)(target-char1);
+
+ if(U_FAILURE(errorCode)) {
+ log_err("unable to get test character for %s - %s\n", converterName, u_errorName(errorCode));
+ return FALSE;
+ }
+ return TRUE;
+}
+
+static void testFromTruncatedUTF8(UConverter *utf8Cnv, UConverter *cnv, const char *converterName,
+ char charUTF8[4], int32_t charUTF8Length,
+ char char0[8], int32_t char0Length,
+ char char1[8], int32_t char1Length) {
+ char utf8[16];
+ int32_t utf8Length;
+
+ char output[16];
+ int32_t outputLength;
+
+ char invalidChars[8];
+ int8_t invalidLength;
+
+ const char *source;
+ char *target;
+
+ UChar pivotBuffer[8];
+ UChar *pivotSource, *pivotTarget;
+
+ UErrorCode errorCode;
+ int32_t i;
+
+ /* test truncated sequences */
+ errorCode=U_ZERO_ERROR;
+ ucnv_setToUCallBack(utf8Cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
+
+ memcpy(utf8, charUTF8, charUTF8Length);
+
+ for(i=0; i<LENGTHOF(badUTF8); ++i) {
+ /* truncated sequence? */
+ int32_t length=strlen(badUTF8[i]);
+ if(length>=(1+U8_COUNT_TRAIL_BYTES(badUTF8[i][0]))) {
+ continue;
+ }
+
+ /* assemble a string with the test character and the truncated sequence */
+ memcpy(utf8+charUTF8Length, badUTF8[i], length);
+ utf8Length=charUTF8Length+length;
+
+ /* convert and check the invalidChars */
+ source=utf8;
+ target=output;
+ pivotSource=pivotTarget=pivotBuffer;
+ errorCode=U_ZERO_ERROR;
+ ucnv_convertEx(cnv, utf8Cnv,
+ &target, output+sizeof(output),
+ &source, utf8+utf8Length,
+ pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+LENGTHOF(pivotBuffer),
+ TRUE, TRUE, /* reset & flush */
+ &errorCode);
+ outputLength=(int32_t)(target-output);
+ if(errorCode!=U_TRUNCATED_CHAR_FOUND || pivotSource!=pivotBuffer) {
+ log_err("unexpected error %s from %s badUTF8[%ld]\n", u_errorName(errorCode), converterName, (long)i);
+ continue;
+ }
+
+ errorCode=U_ZERO_ERROR;
+ invalidLength=(int8_t)sizeof(invalidChars);
+ ucnv_getInvalidChars(utf8Cnv, invalidChars, &invalidLength, &errorCode);
+ if(invalidLength!=length || 0!=memcmp(invalidChars, badUTF8[i], length)) {
+ log_err("wrong invalidChars from %s badUTF8[%ld]\n", converterName, (long)i);
+ }
+ }
+}
+
+static void testFromBadUTF8(UConverter *utf8Cnv, UConverter *cnv, const char *converterName,
+ char charUTF8[4], int32_t charUTF8Length,
+ char char0[8], int32_t char0Length,
+ char char1[8], int32_t char1Length) {
+ char utf8[600], expect[600];
+ int32_t utf8Length, expectLength;
+
+ char testName[32];
+
+ UErrorCode errorCode;
+ int32_t i;
+
+ errorCode=U_ZERO_ERROR;
+ ucnv_setToUCallBack(utf8Cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, NULL, NULL, &errorCode);
+
+ /*
+ * assemble an input string with the test character between each
+ * bad sequence,
+ * and an expected string with repeated test character output
+ */
+ memcpy(utf8, charUTF8, charUTF8Length);
+ utf8Length=charUTF8Length;
+
+ memcpy(expect, char0, char0Length);
+ expectLength=char0Length;
+
+ for(i=0; i<LENGTHOF(badUTF8); ++i) {
+ int32_t length=strlen(badUTF8[i]);
+ memcpy(utf8+utf8Length, badUTF8[i], length);
+ utf8Length+=length;
+
+ memcpy(utf8+utf8Length, charUTF8, charUTF8Length);
+ utf8Length+=charUTF8Length;
+
+ memcpy(expect+expectLength, char1, char1Length);
+ expectLength+=char1Length;
+ }
+
+ /* expect that each bad UTF-8 sequence is detected and skipped */
+ strcpy(testName, "from bad UTF-8 to ");
+ strcat(testName, converterName);
+
+ convertExMultiStreaming(utf8Cnv, cnv,
+ utf8, utf8Length,
+ expect, expectLength,
+ testName,
+ U_ZERO_ERROR);
+}
+
+/* Test illegal UTF-8 input. */
+static void TestConvertExFromUTF8() {
+ static const char *const converterNames[]={
+#if !UCONFIG_NO_LEGACY_CONVERSION
+ "windows-1252",
+ "shift-jis",
+#endif
+ "us-ascii",
+ "iso-8859-1",
+ "utf-8"
+ };
+
+ UConverter *utf8Cnv, *cnv;
+ UErrorCode errorCode;
+ int32_t i;
+
+ /* fromUnicode versions of some character, from initial state and later */
+ char charUTF8[4], char0[8], char1[8];
+ int32_t charUTF8Length, char0Length, char1Length;
+
+ errorCode=U_ZERO_ERROR;
+ utf8Cnv=ucnv_open("UTF-8", &errorCode);
+ if(U_FAILURE(errorCode)) {
+ log_data_err("unable to open UTF-8 converter - %s\n", u_errorName(errorCode));
+ return;
+ }
+
+ for(i=0; i<LENGTHOF(converterNames); ++i) {
+ errorCode=U_ZERO_ERROR;
+ cnv=ucnv_open(converterNames[i], &errorCode);
+ if(U_FAILURE(errorCode)) {
+ log_data_err("unable to open %s converter - %s\n", converterNames[i], u_errorName(errorCode));
+ continue;
+ }
+ if(!getTestChar(cnv, converterNames[i], charUTF8, &charUTF8Length, char0, &char0Length, char1, &char1Length)) {
+ continue;
+ }
+ testFromTruncatedUTF8(utf8Cnv, cnv, converterNames[i], charUTF8, charUTF8Length, char0, char0Length, char1, char1Length);
+ testFromBadUTF8(utf8Cnv, cnv, converterNames[i], charUTF8, charUTF8Length, char0, char0Length, char1, char1Length);
+ ucnv_close(cnv);
+ }
+ ucnv_close(utf8Cnv);
+}
+
+static void TestConvertExFromUTF8_C5F0() {
+ static const char *const converterNames[]={
+#if !UCONFIG_NO_LEGACY_CONVERSION
+ "windows-1251",
+ "shift-jis",
+#endif
+ "us-ascii",
+ "iso-8859-1",
+ "utf-8"
+ };
+
+ UConverter *utf8Cnv, *cnv;
+ UErrorCode errorCode;
+ int32_t i;
+
+ static const char bad_utf8[2]={ (char)0xC5, (char)0xF0 };
+ /* Expect "��" (2x U+FFFD as decimal NCRs) */
+ static const char twoNCRs[16]={
+ 0x26, 0x23, 0x36, 0x35, 0x35, 0x33, 0x33, 0x3B,
+ 0x26, 0x23, 0x36, 0x35, 0x35, 0x33, 0x33, 0x3B
+ };
+ static const char twoFFFD[6]={
+ (char)0xef, (char)0xbf, (char)0xbd,
+ (char)0xef, (char)0xbf, (char)0xbd
+ };
+ const char *expected;
+ int32_t expectedLength;
+ char dest[20]; /* longer than longest expectedLength */
+
+ const char *src;
+ char *target;
+
+ UChar pivotBuffer[128];
+ UChar *pivotSource, *pivotTarget;
+
+ errorCode=U_ZERO_ERROR;
+ utf8Cnv=ucnv_open("UTF-8", &errorCode);
+ if(U_FAILURE(errorCode)) {
+ log_data_err("unable to open UTF-8 converter - %s\n", u_errorName(errorCode));
+ return;
+ }
+
+ for(i=0; i<LENGTHOF(converterNames); ++i) {
+ errorCode=U_ZERO_ERROR;
+ cnv=ucnv_open(converterNames[i], &errorCode);
+ ucnv_setFromUCallBack(cnv, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC,
+ NULL, NULL, &errorCode);
+ if(U_FAILURE(errorCode)) {
+ log_data_err("unable to open %s converter - %s\n",
+ converterNames[i], u_errorName(errorCode));
+ continue;
+ }
+ src=bad_utf8;
+ target=dest;
+ uprv_memset(dest, 9, sizeof(dest));
+ if(i==LENGTHOF(converterNames)-1) {
+ /* conversion to UTF-8 yields two U+FFFD directly */
+ expected=twoFFFD;
+ expectedLength=6;
+ } else {
+ /* conversion to a non-Unicode charset yields two NCRs */
+ expected=twoNCRs;
+ expectedLength=16;
+ }
+ pivotBuffer[0]=0;
+ pivotBuffer[1]=1;
+ pivotBuffer[2]=2;
+ pivotSource=pivotTarget=pivotBuffer;
+ ucnv_convertEx(
+ cnv, utf8Cnv,
+ &target, dest+expectedLength,
+ &src, bad_utf8+sizeof(bad_utf8),
+ pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+LENGTHOF(pivotBuffer),
+ TRUE, TRUE, &errorCode);
+ if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || src!=bad_utf8+2 ||
+ target!=dest+expectedLength || 0!=uprv_memcmp(dest, expected, expectedLength) ||
+ dest[expectedLength]!=9
+ ) {
+ log_err("ucnv_convertEx(UTF-8 C5 F0 -> %s/decimal NCRs) failed\n", converterNames[i]);
+ }
+ ucnv_close(cnv);
+ }
+ ucnv_close(utf8Cnv);
+}
+
static void
TestConvertAlgorithmic() {
#if !UCONFIG_NO_LEGACY_CONVERSION
#endif
}
+#if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
static void TestLMBCSMaxChar(void) {
static const struct {
int8_t maxSize;
log_err("error UCNV_GET_MAX_BYTES_FOR_STRING(1, 2)<10\n");
}
}
-
+#endif
static void TestJ1968(void) {
UErrorCode err = U_ZERO_ERROR;
int i;
UConverter* cnv = ucnv_openPackage(loadTestData(&status), "test3", &status);
if(U_FAILURE(status)){
- log_err("Could not create converter for test3. Error: %s\n", u_errorName(status));
+ log_data_err("Could not create converter for test3. Error: %s\n", u_errorName(status));
return;
}
for(i=0; i<LENGTHOF(fromUnicodeTests); ++i) {
UConverterToUCallback *oldToUAction= NULL;
UConverter* cnv = ucnv_openPackage(loadTestData(&status), "test3", &status);
if(U_FAILURE(status)){
- log_err("Could not create converter for test3. Error: %s\n", u_errorName(status));
+ log_data_err("Could not create converter for test3. Error: %s\n", u_errorName(status));
return;
}
ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, oldToUAction, NULL, &status);
for(i=0; i<LENGTHOF(toUnicodeTests); ++i) {
- UChar tgt[10];
+ UChar tgt[20];
UChar* target = tgt;
UChar* targetLimit = target + 20;
const char* source = toUnicodeTests[i].input;
const char* sourceLimit = source + toUnicodeTests[i].len;
int32_t len = 0;
ucnv_reset(cnv);
- ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status);
+ ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, FALSE, &status);
len = ucnv_toUCountPending(cnv,&status);
if(U_FAILURE(status)){
log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status));
#endif
}
-static void TestOneDefaultNameChange(const char *name) {
+static void TestOneDefaultNameChange(const char *name, const char *expected) {
UErrorCode status = U_ZERO_ERROR;
UConverter *cnv;
ucnv_setDefaultName(name);
- if(strcmp(ucnv_getDefaultName(), name)==0)
+ if(strcmp(ucnv_getDefaultName(), expected)==0)
log_verbose("setDefaultName of %s works.\n", name);
else
log_err("setDefaultName of %s failed\n", name);
log_err("opening the default converter of %s failed\n", name);
return;
}
- if(strcmp(ucnv_getName(cnv, &status), name)==0)
+ if(strcmp(ucnv_getName(cnv, &status), expected)==0)
log_verbose("ucnv_getName of %s works.\n", name);
else
log_err("ucnv_getName of %s failed\n", name);
log_verbose("getDefaultName returned %s\n", defaultName);
/*change the default name by setting it */
- TestOneDefaultNameChange("UTF-8");
-#if !UCONFIG_NO_LEGACY_CONVERSION
- TestOneDefaultNameChange("ISCII,version=1");
- TestOneDefaultNameChange("ISCII,version=2");
+ TestOneDefaultNameChange("UTF-8", "UTF-8");
+#if U_CHARSET_IS_UTF8
+ TestOneDefaultNameChange("ISCII,version=1", "UTF-8");
+ TestOneDefaultNameChange("ISCII,version=2", "UTF-8");
+ TestOneDefaultNameChange("ISO-8859-1", "UTF-8");
+#else
+# if !UCONFIG_NO_LEGACY_CONVERSION
+ TestOneDefaultNameChange("ISCII,version=1", "ISCII,version=1");
+ TestOneDefaultNameChange("ISCII,version=2", "ISCII,version=2");
+# endif
+ TestOneDefaultNameChange("ISO-8859-1", "ISO-8859-1");
#endif
- TestOneDefaultNameChange("ISO-8859-1");
/*set the default name back*/
ucnv_setDefaultName(defaultName);
/* Test that ucnv_compareNames() matches names according to spec. ----------- */
-static U_INLINE int
+static int
sign(int n) {
if(n==0) {
return 0;
errorCode=U_ZERO_ERROR;
cnv=ucnv_open("UTF-16", &errorCode);
if(U_FAILURE(errorCode)) {
- log_err("ucnv_open(UTF-16) failed - %s\n", u_errorName(errorCode));
+ log_data_err("ucnv_open(UTF-16) failed - %s\n", u_errorName(errorCode));
return;
}
length=ucnv_fromUChars(cnv, buffer, (int32_t)sizeof(buffer), surrogate, 1, &errorCode);
errorCode=U_ZERO_ERROR;
cnv=ucnv_open("UTF-32", &errorCode);
if(U_FAILURE(errorCode)) {
- log_err("ucnv_open(UTF-32) failed - %s\n", u_errorName(errorCode));
+ log_data_err("ucnv_open(UTF-32) failed - %s\n", u_errorName(errorCode));
return;
}
length=ucnv_fromUChars(cnv, buffer, (int32_t)sizeof(buffer), surrogate, 1, &errorCode);
errorCode=U_ZERO_ERROR;
cnv=ucnv_open("ISO-8859-1", &errorCode);
if(U_FAILURE(errorCode)) {
- log_err("ucnv_open(ISO-8859-1) failed - %s\n", u_errorName(errorCode));
+ log_data_err("ucnv_open(ISO-8859-1) failed - %s\n", u_errorName(errorCode));
return;
}
ucnv_setSubstString(cnv, sub, LENGTHOF(sub), &errorCode);
errorCode=U_ZERO_ERROR;
cnv=ucnv_open("HZ", &errorCode);
if(U_FAILURE(errorCode)) {
- log_err("ucnv_open(HZ) failed - %s\n", u_errorName(errorCode));
+ log_data_err("ucnv_open(HZ) failed - %s\n", u_errorName(errorCode));
return;
}
ucnv_setSubstString(cnv, sub, LENGTHOF(sub), &errorCode);
* functions with UErrorCode parameters.
*/
}
+
+static void
+InvalidArguments() {
+ UConverter *cnv;
+ UErrorCode errorCode;
+ char charBuffer[2] = {1, 1};
+ char ucharAsCharBuffer[2] = {2, 2};
+ char *charsPtr = charBuffer;
+ UChar *ucharsPtr = (UChar *)ucharAsCharBuffer;
+ UChar *ucharsBadPtr = (UChar *)(ucharAsCharBuffer + 1);
+
+ errorCode=U_ZERO_ERROR;
+ cnv=ucnv_open("UTF-8", &errorCode);
+ if(U_FAILURE(errorCode)) {
+ log_err("ucnv_open() failed - %s\n", u_errorName(errorCode));
+ return;
+ }
+
+ errorCode=U_ZERO_ERROR;
+ /* This one should fail because an incomplete UChar is being passed in */
+ ucnv_fromUnicode(cnv, &charsPtr, charsPtr, (const UChar **)&ucharsPtr, ucharsBadPtr, NULL, TRUE, &errorCode);
+ if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
+ log_err("ucnv_fromUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for incomplete UChar * buffer - %s\n", u_errorName(errorCode));
+ }
+
+ errorCode=U_ZERO_ERROR;
+ /* This one should fail because ucharsBadPtr is > than ucharsPtr */
+ ucnv_fromUnicode(cnv, &charsPtr, charsPtr, (const UChar **)&ucharsBadPtr, ucharsPtr, NULL, TRUE, &errorCode);
+ if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
+ log_err("ucnv_fromUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for bad limit pointer - %s\n", u_errorName(errorCode));
+ }
+
+ errorCode=U_ZERO_ERROR;
+ /* This one should fail because an incomplete UChar is being passed in */
+ ucnv_toUnicode(cnv, &ucharsPtr, ucharsBadPtr, (const char **)&charsPtr, charsPtr, NULL, TRUE, &errorCode);
+ if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
+ log_err("ucnv_toUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for incomplete UChar * buffer - %s\n", u_errorName(errorCode));
+ }
+
+ errorCode=U_ZERO_ERROR;
+ /* This one should fail because ucharsBadPtr is > than ucharsPtr */
+ ucnv_toUnicode(cnv, &ucharsBadPtr, ucharsPtr, (const char **)&charsPtr, charsPtr, NULL, TRUE, &errorCode);
+ if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
+ log_err("ucnv_toUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for bad limit pointer - %s\n", u_errorName(errorCode));
+ }
+
+ if (charBuffer[0] != 1 || charBuffer[1] != 1
+ || ucharAsCharBuffer[0] != 2 || ucharAsCharBuffer[1] != 2)
+ {
+ log_err("Data was incorrectly written to buffers\n");
+ }
+
+ ucnv_close(cnv);
+}
+
+static void TestGetName() {
+ static const char *const names[] = {
+ "Unicode", "UTF-16",
+ "UnicodeBigUnmarked", "UTF-16BE",
+ "UnicodeBig", "UTF-16BE,version=1",
+ "UnicodeLittleUnmarked", "UTF-16LE",
+ "UnicodeLittle", "UTF-16LE,version=1",
+ "x-UTF-16LE-BOM", "UTF-16LE,version=1"
+ };
+ int32_t i;
+ for(i = 0; i < LENGTHOF(names); i += 2) {
+ UErrorCode errorCode = U_ZERO_ERROR;
+ UConverter *cnv = ucnv_open(names[i], &errorCode);
+ if(U_SUCCESS(errorCode)) {
+ const char *name = ucnv_getName(cnv, &errorCode);
+ if(U_FAILURE(errorCode) || 0 != strcmp(name, names[i+1])) {
+ log_err("ucnv_getName(%s) = %s != %s -- %s\n",
+ names[i], name, names[i+1], u_errorName(errorCode));
+ }
+ ucnv_close(cnv);
+ }
+ }
+}
+
+static void TestUTFBOM() {
+ static const UChar a16[] = { 0x61 };
+ static const char *const names[] = {
+ "UTF-16",
+ "UTF-16,version=1",
+ "UTF-16BE",
+ "UnicodeBig",
+ "UTF-16LE",
+ "UnicodeLittle"
+ };
+ static const uint8_t expected[][5] = {
+#if U_IS_BIG_ENDIAN
+ { 4, 0xfe, 0xff, 0, 0x61 },
+ { 4, 0xfe, 0xff, 0, 0x61 },
+#else
+ { 4, 0xff, 0xfe, 0x61, 0 },
+ { 4, 0xff, 0xfe, 0x61, 0 },
+#endif
+
+ { 2, 0, 0x61 },
+ { 4, 0xfe, 0xff, 0, 0x61 },
+
+ { 2, 0x61, 0 },
+ { 4, 0xff, 0xfe, 0x61, 0 }
+ };
+
+ char bytes[10];
+ int32_t i;
+
+ for(i = 0; i < LENGTHOF(names); ++i) {
+ UErrorCode errorCode = U_ZERO_ERROR;
+ UConverter *cnv = ucnv_open(names[i], &errorCode);
+ int32_t length = 0;
+ const uint8_t *exp = expected[i];
+ if (U_FAILURE(errorCode)) {
+ log_err_status(errorCode, "Unable to open converter: %s got error code: %s\n", names[i], u_errorName(errorCode));
+ continue;
+ }
+ length = ucnv_fromUChars(cnv, bytes, (int32_t)sizeof(bytes), a16, 1, &errorCode);
+
+ if(U_FAILURE(errorCode) || length != exp[0] || 0 != memcmp(bytes, exp+1, length)) {
+ log_err("unexpected %s BOM writing behavior -- %s\n",
+ names[i], u_errorName(errorCode));
+ }
+ ucnv_close(cnv);
+ }
+}