+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
/********************************************************************
* COPYRIGHT:
- * Copyright (c) 1997-2003, International Business Machines Corporation and
+ * Copyright (c) 1997-2016, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
-/********************************************************************************
+/*****************************************************************************
*
-* File CU_CAPITST.C
+* File ccapitst.c
*
* Modification History:
* Name Description
* Madhu Katragadda Ported for C API
-*********************************************************************************
+******************************************************************************
*/
#include <stdio.h>
#include <stdlib.h>
#include "unicode/uloc.h"
#include "unicode/ucnv.h"
#include "unicode/ucnv_err.h"
-#include "unicode/utypes.h"
+#include "unicode/putil.h"
+#include "unicode/uset.h"
#include "unicode/ustring.h"
+#include "unicode/utf8.h"
+#include "ucnv_bld.h" /* for sizeof(UConverter) */
+#include "cmemory.h" /* for UAlignedMemory */
#include "cintltst.h"
#include "ccapitst.h"
-
-/* for not including "cstring.h" -begin*/
-#ifdef WIN32
-# define stricmp(str1, str2) U_STANDARD_CPP_NAMESPACE _stricmp(str1, str2)
-#elif defined(POSIX)
-# define stricmp(str1, str2) U_STANDARD_CPP_NAMESPACE strcasecmp(str1, str2)
-#else
-# define stricmp(str1, str2) T_CString_stricmp(str1, str2)
-#endif
-
-static int U_EXPORT2
-T_CString_stricmp(const char *str1, const char *str2) {
- if(str1==NULL) {
- if(str2==NULL) {
- return 0;
- } else {
- return -1;
- }
- } else if(str2==NULL) {
- return 1;
- } else {
- /* compare non-NULL strings lexically with lowercase */
- int rc;
- unsigned char c1, c2;
- for(;;) {
- c1=(unsigned char)*str1;
- c2=(unsigned char)*str2;
- if(c1==0) {
- if(c2==0) {
- return 0;
- } else {
- return -1;
- }
- } else if(c2==0) {
- return 1;
- } else {
- /* compare non-zero characters with lowercase */
- rc=(int)(unsigned char)tolower(c1)-(int)(unsigned char)tolower(c2);
- if(rc!=0) {
- return rc;
- }
- }
- ++str1;
- ++str2;
- }
- }
-}
-/* for not including "cstring.h" -end*/
-
-#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
+#include "cstring.h"
#define NUM_CODEPAGE 1
#define MAX_FILE_LEN 1024*20
#define UCS_FILE_NAME_SIZE 512
/*returns an action other than the one provided*/
+#if !UCONFIG_NO_LEGACY_CONVERSION
static UConverterFromUCallback otherUnicodeAction(UConverterFromUCallback MIA);
static UConverterToUCallback otherCharAction(UConverterToUCallback MIA);
+#endif
+
+static UConverter *
+cnv_open(const char *name, UErrorCode *pErrorCode) {
+ if(name!=NULL && name[0]=='*') {
+ return ucnv_openPackage(loadTestData(pErrorCode), name+1, pErrorCode);
+ } else {
+ return ucnv_open(name, pErrorCode);
+ }
+}
static void ListNames(void);
- void TestFlushCache(void); /* defined in cctest.c */
+static void TestFlushCache(void);
static void TestDuplicateAlias(void);
static void TestCCSID(void);
static void TestJ932(void);
static void TestJ1968(void);
+#if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
static void TestLMBCSMaxChar(void);
+#endif
+
+#if !UCONFIG_NO_LEGACY_CONVERSION
static void TestConvertSafeCloneCallback(void);
+#endif
+
static void TestEBCDICSwapLFNL(void);
static void TestConvertEx(void);
+static void TestConvertExFromUTF8(void);
+static void TestConvertExFromUTF8_C5F0(void);
static void TestConvertAlgorithmic(void);
+ void TestDefaultConverterError(void); /* defined in cctest.c */
+ void TestDefaultConverterSet(void); /* defined in cctest.c */
+static void TestToUCountPending(void);
+static void TestFromUCountPending(void);
+static void TestDefaultName(void);
+static void TestCompareNames(void);
+static void TestSubstString(void);
+static void InvalidArguments(void);
+static void TestGetName(void);
+static void TestUTFBOM(void);
void addTestConvert(TestNode** root);
addTest(root, &TestFlushCache, "tsconv/ccapitst/TestFlushCache");
addTest(root, &TestAlias, "tsconv/ccapitst/TestAlias");
addTest(root, &TestDuplicateAlias, "tsconv/ccapitst/TestDuplicateAlias");
- addTest(root, &TestConvertSafeClone, "tsconv/ccapitst/TestConvertSafeClone");
- addTest(root, &TestConvertSafeCloneCallback,"tsconv/ccapitst/TestConvertSafeCloneCallback");
+ addTest(root, &TestConvertSafeClone, "tsconv/ccapitst/TestConvertSafeClone");
+#if !UCONFIG_NO_LEGACY_CONVERSION
+ addTest(root, &TestConvertSafeCloneCallback,"tsconv/ccapitst/TestConvertSafeCloneCallback");
+#endif
addTest(root, &TestCCSID, "tsconv/ccapitst/TestCCSID");
addTest(root, &TestJ932, "tsconv/ccapitst/TestJ932");
addTest(root, &TestJ1968, "tsconv/ccapitst/TestJ1968");
+#if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
addTest(root, &TestLMBCSMaxChar, "tsconv/ccapitst/TestLMBCSMaxChar");
+#endif
addTest(root, &TestEBCDICSwapLFNL, "tsconv/ccapitst/TestEBCDICSwapLFNL");
addTest(root, &TestConvertEx, "tsconv/ccapitst/TestConvertEx");
+ addTest(root, &TestConvertExFromUTF8, "tsconv/ccapitst/TestConvertExFromUTF8");
+ addTest(root, &TestConvertExFromUTF8_C5F0, "tsconv/ccapitst/TestConvertExFromUTF8_C5F0");
addTest(root, &TestConvertAlgorithmic, "tsconv/ccapitst/TestConvertAlgorithmic");
+ addTest(root, &TestDefaultConverterError, "tsconv/ccapitst/TestDefaultConverterError");
+ addTest(root, &TestDefaultConverterSet, "tsconv/ccapitst/TestDefaultConverterSet");
+#if !UCONFIG_NO_FILE_IO
+ addTest(root, &TestToUCountPending, "tsconv/ccapitst/TestToUCountPending");
+ addTest(root, &TestFromUCountPending, "tsconv/ccapitst/TestFromUCountPending");
+#endif
+ addTest(root, &TestDefaultName, "tsconv/ccapitst/TestDefaultName");
+ addTest(root, &TestCompareNames, "tsconv/ccapitst/TestCompareNames");
+ addTest(root, &TestSubstString, "tsconv/ccapitst/TestSubstString");
+ addTest(root, &InvalidArguments, "tsconv/ccapitst/InvalidArguments");
+ addTest(root, &TestGetName, "tsconv/ccapitst/TestGetName");
+ addTest(root, &TestUTFBOM, "tsconv/ccapitst/TestUTFBOM");
}
static void ListNames(void) {
log_verbose("Testing ucnv_openAllNames()...");
allNamesEnum = ucnv_openAllNames(&err);
if(U_FAILURE(err)) {
- log_err("FAILURE! ucnv_openAllNames() -> %s\n", myErrorName(err));
+ log_data_err("FAILURE! ucnv_openAllNames() -> %s\n", myErrorName(err));
}
else {
const char *string = NULL;
count1++;
log_verbose("read \"%s\", length %i\n", string, len);
}
- err = U_ZERO_ERROR;
+ if (U_FAILURE(err)) {
+ log_err("FAILURE! uenum_next(allNamesEnum...) set an error: %s\n", u_errorName(err));
+ err = U_ZERO_ERROR;
+ }
uenum_reset(allNamesEnum, &err);
while ((string = uenum_next(allNamesEnum, &len, &err))) {
count2++;
/* Test ucnv_countAliases() etc. */
count = ucnv_countAliases("utf-8", &err);
if(U_FAILURE(err)) {
- log_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %s\n", myErrorName(err));
+ log_data_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %s\n", myErrorName(err));
} else if(count <= 0) {
log_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %d aliases\n", count);
} else {
static void TestConvert()
{
+#if !UCONFIG_NO_LEGACY_CONVERSION
char myptr[4];
char save[4];
int32_t testLong1 = 0;
UChar* my_ucs_file_buffer; /* [MAX_FILE_LEN] */
UChar* my_ucs_file_buffer_1;
int8_t ii = 0;
- int32_t j = 0;
uint16_t codepage_index = 0;
int32_t cp = 0;
UErrorCode err = U_ZERO_ERROR;
const uint8_t source[]={ 0x00, 0x04, 0x05, 0x06, 0xa2, 0xb4, 0x00};
const uint8_t expectedTarget[]={ 0x00, 0x37, 0x2d, 0x2e, 0x0e, 0x49, 0x62, 0x0f, 0x00};
char *target=0;
- sourceLimit=sizeof(source)/sizeof(source[0]);
+ sourceLimit=UPRV_LENGTHOF(source);
err=U_ZERO_ERROR;
targetLimit=0;
}
err=U_ILLEGAL_ARGUMENT_ERROR;
- sourceLimit=sizeof(source)/sizeof(source[0]);
+ sourceLimit=UPRV_LENGTHOF(source);
i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err);
if(i !=0 ){
log_err("FAILURE! ucnv_convert() with err=U_ILLEGAL_ARGUMENT_ERROR is expected to return 0\n");
}
err=U_ZERO_ERROR;
- sourceLimit=sizeof(source)/sizeof(source[0]);
+ sourceLimit=UPRV_LENGTHOF(source);
targetLimit=0;
i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err);
if(!(U_FAILURE(err) && err==U_BUFFER_OVERFLOW_ERROR)){
log_verbose("getName(someConverters[1]) returned %s\n", ucnv_getName(someConverters[1], &err));
}
- /*Testing ucnv_getDefaultName() and ucnv_setDefaultNAme()*/
- {
- static char defaultName[UCNV_MAX_CONVERTER_NAME_LENGTH + 1];
- strcpy(defaultName, ucnv_getDefaultName());
-
- log_verbose("getDefaultName returned %s\n", defaultName);
-
- /*change the default name by setting it */
- ucnv_setDefaultName("changed");
- if(strcmp(ucnv_getDefaultName(), "changed")==0)
- log_verbose("setDefaultName o.k");
- else
- log_err("setDefaultName failed");
- /*set the default name back*/
- ucnv_setDefaultName(defaultName);
- }
-
ucnv_close(someConverters[0]);
ucnv_close(someConverters[1]);
ucnv_close(someConverters[2]);
for (codepage_index=0; codepage_index < NUM_CODEPAGE; ++codepage_index)
{
int32_t i = 0;
- char* index = NULL;
err = U_ZERO_ERROR;
+#ifdef U_TOPSRCDIR
+ strcpy(ucs_file_name, U_TOPSRCDIR U_FILE_SEP_STRING"test"U_FILE_SEP_STRING"testdata"U_FILE_SEP_STRING);
+#else
strcpy(ucs_file_name, loadTestData(&err));
if(U_FAILURE(err)){
return;
}
- index=strrchr(ucs_file_name,(char)U_FILE_SEP_CHAR);
+ {
+ char* index = strrchr(ucs_file_name,(char)U_FILE_SEP_CHAR);
- if((unsigned int)(index-ucs_file_name) != (strlen(ucs_file_name)-1)){
- *(index+1)=0;
+ if((unsigned int)(index-ucs_file_name) != (strlen(ucs_file_name)-1)){
+ *(index+1)=0;
+ }
}
strcat(ucs_file_name,".."U_FILE_SEP_STRING);
+#endif
strcat(ucs_file_name, CodePagesTestFiles[codepage_index]);
ucs_file_in = fopen(ucs_file_name,"rb");
if (!ucs_file_in)
{
- log_err("Couldn't open the Unicode file [%s]... Exiting...\n", ucs_file_name);
+ log_data_err("Couldn't open the Unicode file [%s]... Exiting...\n", ucs_file_name);
return;
}
if (!myConverter || U_FAILURE(err))
{
log_data_err("Error creating the ibm-949 converter - %s \n", u_errorName(err));
-
- return;
+ fclose(ucs_file_in);
+ break;
}
/*testing for ucnv_getName() */
{
log_verbose("getName o.k. %s\n", ucnv_getName(myConverter, &err));
}
- if (stricmp(ucnv_getName(myConverter, &err), CodePagesToTest[codepage_index]))
+ if (uprv_stricmp(ucnv_getName(myConverter, &err), CodePagesToTest[codepage_index]))
log_err("getName failed\n");
else
log_verbose("getName ok\n");
log_verbose("\n---Testing ucnv_getSubstChars...\n");
ii=4;
ucnv_getSubstChars(myConverter, myptr, &ii, &err);
+ if (ii <= 0) {
+ log_err("ucnv_getSubstChars returned a negative number %d\n", ii);
+ }
for(x=0;x<ii;x++)
rest = (uint16_t)(((unsigned char)rest << 8) + (unsigned char)myptr[x]);
err=U_ZERO_ERROR;
/*------*/
+#ifdef U_ENABLE_GENERIC_ISO_2022
/*resetState ucnv_reset()*/
log_verbose("\n---Testing ucnv_reset()..\n");
ucnv_reset(myConverter);
ucnv_close(cnv);
}
-
+#endif
+
/*getDisplayName*/
log_verbose("\n---Testing ucnv_getDisplayName()...\n");
locale=CodePagesLocale[codepage_index];
- displayname=(UChar*)malloc(1 * sizeof(UChar));
len=0;
- disnamelen = ucnv_getDisplayName(myConverter,locale,displayname, len, &err);
- if(err==U_BUFFER_OVERFLOW_ERROR)
- {
+ displayname=NULL;
+ disnamelen = ucnv_getDisplayName(myConverter, locale, displayname, len, &err);
+ if(err==U_BUFFER_OVERFLOW_ERROR) {
err=U_ZERO_ERROR;
- displayname=(UChar*)realloc(displayname, (disnamelen+1) * sizeof(UChar));
+ displayname=(UChar*)malloc((disnamelen+1) * sizeof(UChar));
ucnv_getDisplayName(myConverter,locale,displayname,disnamelen+1, &err);
- if(U_FAILURE(err))
- {
- log_err("getDisplayName failed the error is %s\n", myErrorName(err));
+ if(U_FAILURE(err)) {
+ log_err("getDisplayName failed. The error is %s\n", myErrorName(err));
}
- else
+ else {
log_verbose(" getDisplayName o.k.\n");
+ }
+ free(displayname);
+ displayname=NULL;
+ }
+ else {
+ log_err("getDisplayName preflight doesn't work. Error is %s\n", myErrorName(err));
}
/*test ucnv_getDiaplayName with error condition*/
- log_verbose("\n---Testing ucnv_getDisplayName()...\n");
err= U_ILLEGAL_ARGUMENT_ERROR;
- len=ucnv_getDisplayName(myConverter,locale,displayname,disnamelen+1, &err);
+ len=ucnv_getDisplayName(myConverter,locale,NULL,0, &err);
if( len !=0 ){
log_err("ucnv_getDisplayName() with err != U_ZERO_ERROR is supposed to return 0\n");
}
+ /*test ucnv_getDiaplayName with error condition*/
+ err=U_ZERO_ERROR;
+ len=ucnv_getDisplayName(NULL,locale,NULL,0, &err);
+ if( len !=0 || U_SUCCESS(err)){
+ log_err("ucnv_getDisplayName(NULL) with cnv == NULL is supposed to return 0\n");
+ }
err=U_ZERO_ERROR;
/* testing ucnv_setFromUCallBack() and ucnv_getFromUCallBack()*/
/*Reads the BOM*/
- fread(&BOM, sizeof(UChar), 1, ucs_file_in);
+ {
+ // Note: gcc produces a compile warning if the return value from fread() is ignored.
+ size_t numRead = fread(&BOM, sizeof(UChar), 1, ucs_file_in);
+ (void)numRead;
+ }
if (BOM!=0xFEFF && BOM!=0xFFFE)
{
log_err("File Missing BOM...Bailing!\n");
- return;
+ fclose(ucs_file_in);
+ break;
}
/*Reads in the file*/
- while(!feof(ucs_file_in)&&(i+=fread(ucs_file_buffer+i, sizeof(UChar), 1, ucs_file_in)))
+ while(!feof(ucs_file_in)&&(i+=(int32_t)fread(ucs_file_buffer+i, sizeof(UChar), 1, ucs_file_in)))
{
myUChar = ucs_file_buffer[i-1];
NULL,
targetcapacity2,
output_cp_buffer,
- strlen(output_cp_buffer),
+ (int32_t)strlen(output_cp_buffer),
&err);
/*if there is an buffer overflow then trap the values and pass them and make the actual call*/
uchar2,
targetsize+1,
output_cp_buffer,
- strlen(output_cp_buffer),
+ (int32_t)strlen(output_cp_buffer),
&err);
if(U_FAILURE(err))
log_err("\nFAILURE: ucnv_fromUChars with targetLength 0 is expected to fail and throw U_BUFFER_OVERFLOW_ERROR\n");
}
/*toUChars with error conditions*/
- targetsize = ucnv_toUChars(myConverter, uchar2, targetsize, output_cp_buffer, strlen(output_cp_buffer), &err);
+ targetsize = ucnv_toUChars(myConverter, uchar2, targetsize, output_cp_buffer, (int32_t)strlen(output_cp_buffer), &err);
if(targetsize != 0){
log_err("\nFAILURE: ucnv_toUChars with err != U_ZERO_ERROR is expected to fail and return 0\n");
}
err=U_ZERO_ERROR;
- targetsize = ucnv_toUChars(myConverter, uchar2, -1, output_cp_buffer, strlen(output_cp_buffer), &err);
+ targetsize = ucnv_toUChars(myConverter, uchar2, -1, output_cp_buffer, (int32_t)strlen(output_cp_buffer), &err);
if(targetsize != 0 || err != U_ILLEGAL_ARGUMENT_ERROR){
log_err("\nFAILURE: ucnv_toUChars with targetsize < 0 is expected to throw U_ILLEGAL_ARGUMENT_ERROR and return 0\n");
}
log_err("\nFAILURE: ucnv_toUChars with sourceLength 0 is expected to return 0\n");
}
targetcapacity2=0;
- targetsize = ucnv_toUChars(myConverter, NULL, targetcapacity2, output_cp_buffer, strlen(output_cp_buffer), &err);
+ targetsize = ucnv_toUChars(myConverter, NULL, targetcapacity2, output_cp_buffer, (int32_t)strlen(output_cp_buffer), &err);
if (err != U_STRING_NOT_TERMINATED_WARNING) {
log_err("\nFAILURE: ucnv_toUChars(targetLength)->%s instead of U_STRING_NOT_TERMINATED_WARNING\n",
u_errorName(err));
/*testing for ucnv_fromUnicode() and ucnv_toUnicode() */
/*Clean up re-usable vars*/
- j=0;
log_verbose("Testing ucnv_fromUnicode().....\n");
tmp_ucs_buf=ucs_file_buffer_use;
ucnv_fromUnicode(myConverter, &mytarget_1,
TRUE,
&err);
consumedUni = (UChar*)tmp_consumedUni;
+ (void)consumedUni; /* Suppress set but not used warning. */
if (U_FAILURE(err))
{
fclose(ucs_file_in);
ucnv_close(myConverter);
- free(displayname);
if (uchar1 != 0) free(uchar1);
if (uchar2 != 0) free(uchar2);
if (uchar3 != 0) free(uchar3);
free((void*)output_cp_buffer);
free((void*)ucs_file_buffer);
free((void*)my_ucs_file_buffer);
+#endif
}
+#if !UCONFIG_NO_LEGACY_CONVERSION
static UConverterFromUCallback otherUnicodeAction(UConverterFromUCallback MIA)
{
return (MIA==(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_STOP)?(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_SUBSTITUTE:(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_STOP;
}
-
static UConverterToUCallback otherCharAction(UConverterToUCallback MIA)
{
return (MIA==(UConverterToUCallback)UCNV_TO_U_CALLBACK_STOP)?(UConverterToUCallback)UCNV_TO_U_CALLBACK_SUBSTITUTE:(UConverterToUCallback)UCNV_TO_U_CALLBACK_STOP;
}
+#endif
+
+static void TestFlushCache(void) {
+#if !UCONFIG_NO_LEGACY_CONVERSION
+ UErrorCode err = U_ZERO_ERROR;
+ UConverter* someConverters[5];
+ int flushCount = 0;
+
+ /* flush the converter cache to get a consistent state before the flushing is tested */
+ ucnv_flushCache();
+
+ /*Testing ucnv_open()*/
+ /* Note: These converters have been chosen because they do NOT
+ encode the Latin characters (U+0041, ...), and therefore are
+ highly unlikely to be chosen as system default codepages */
+
+ someConverters[0] = ucnv_open("ibm-1047", &err);
+ if (U_FAILURE(err)) {
+ log_data_err("FAILURE! %s\n", myErrorName(err));
+ }
+
+ someConverters[1] = ucnv_open("ibm-1047", &err);
+ if (U_FAILURE(err)) {
+ log_data_err("FAILURE! %s\n", myErrorName(err));
+ }
+
+ someConverters[2] = ucnv_open("ibm-1047", &err);
+ if (U_FAILURE(err)) {
+ log_data_err("FAILURE! %s\n", myErrorName(err));
+ }
+
+ someConverters[3] = ucnv_open("gb18030", &err);
+ if (U_FAILURE(err)) {
+ log_data_err("FAILURE! %s\n", myErrorName(err));
+ }
+
+ someConverters[4] = ucnv_open("ibm-954", &err);
+ if (U_FAILURE(err)) {
+ log_data_err("FAILURE! %s\n", myErrorName(err));
+ }
+
+
+ /* Testing ucnv_flushCache() */
+ log_verbose("\n---Testing ucnv_flushCache...\n");
+ if ((flushCount=ucnv_flushCache())==0)
+ log_verbose("Flush cache ok\n");
+ else
+ log_data_err("Flush Cache failed [line %d], expect 0 got %d \n", __LINE__, flushCount);
+
+ /*testing ucnv_close() and ucnv_flushCache() */
+ ucnv_close(someConverters[0]);
+ ucnv_close(someConverters[1]);
+
+ if ((flushCount=ucnv_flushCache())==0)
+ log_verbose("Flush cache ok\n");
+ else
+ log_data_err("Flush Cache failed [line %d], expect 0 got %d \n", __LINE__, flushCount);
+
+ ucnv_close(someConverters[2]);
+ ucnv_close(someConverters[3]);
+
+ if ((flushCount=ucnv_flushCache())==2)
+ log_verbose("Flush cache ok\n"); /*because first, second and third are same */
+ else
+ log_data_err("Flush Cache failed line %d, got %d expected 2 or there is an error in ucnv_close()\n",
+ __LINE__,
+ flushCount);
+
+ ucnv_close(someConverters[4]);
+ if ( (flushCount=ucnv_flushCache())==1)
+ log_verbose("Flush cache ok\n");
+ else
+ log_data_err("Flush Cache failed line %d, expected 1 got %d \n", __LINE__, flushCount);
+#endif
+}
/**
* Test the converter alias API, specifically the fuzzy matching of
/* Predetermined aliases that we expect to map back to ISO_2022
* and UTF-8. UPDATE THIS DATA AS NECESSARY. */
const char* ISO_2022_NAMES[] =
- {"ISO_2022", "iso-2022", "2022",
- "cp2022", "iso2022", "iso_2022"};
- int32_t ISO_2022_NAMES_LENGTH =
- sizeof(ISO_2022_NAMES) / sizeof(ISO_2022_NAMES[0]);
+ {"ISO_2022,locale=ja,version=2", "ISO-2022-JP-2", "csISO2022JP2",
+ "Iso-2022jP2", "isO-2022_Jp_2", "iSo--2022,locale=ja,version=2"};
+ int32_t ISO_2022_NAMES_LENGTH = UPRV_LENGTHOF(ISO_2022_NAMES);
const char *UTF8_NAMES[] =
{ "UTF-8", "utf-8", "utf8", "ibm-1208",
"utf_8", "ibm1208", "cp1208" };
- int32_t UTF8_NAMES_LENGTH =
- sizeof(UTF8_NAMES) / sizeof(UTF8_NAMES[0]);
+ int32_t UTF8_NAMES_LENGTH = UPRV_LENGTHOF(UTF8_NAMES);
struct {
const char *name;
{ "UTF32_PlatformEndian", "UTF32_PlatformEndian" },
{ "UTF-32", "ucs-4" }
};
- int32_t CONVERTERS_NAMES_LENGTH = sizeof(CONVERTERS_NAMES) / sizeof(*CONVERTERS_NAMES);
+ int32_t CONVERTERS_NAMES_LENGTH = UPRV_LENGTHOF(CONVERTERS_NAMES);
/* When there are bugs in gencnval or in ucnv_io, converters can
appear to have no aliases. */
if (strcmp(ucnv_getName(cnv, &status), name) != 0
&& (strstr(name, "PlatformEndian") == 0 && strstr(name, "OppositeEndian") == 0)) {
log_err("FAIL: Converter \"%s\" returned \"%s\" for getName. "
- "The should be the same\n",
+ "They should be the same\n",
name, ucnv_getName(cnv, &status));
}
}
continue;
}
if (0 != strcmp(mapBack, ISO_2022_NAMES[0])) {
- log_err("FAIL: \"%s\" -> \"%s\", expect ISO_2022\n",
+ log_err("FAIL: \"%s\" -> \"%s\", expect \"ISO_2022,locale=ja,version=2\"\n",
ISO_2022_NAMES[i], mapBack);
}
}
for (i = 0; i < CONVERTERS_NAMES_LENGTH; ++i) {
const char* mapBack = ucnv_getAlias(CONVERTERS_NAMES[i].alias, 0, &status);
if(!mapBack) {
- log_data_err("Couldn't get alias for %s. You probably have no data\n", CONVERTERS_NAMES[i]);
+ log_data_err("Couldn't get alias for %s. You probably have no data\n", CONVERTERS_NAMES[i].name);
continue;
}
if (0 != strcmp(mapBack, CONVERTERS_NAMES[i].name)) {
status = U_ZERO_ERROR;
alias = ucnv_getStandardName("Shift_JIS", "IBM", &status);
if (alias == NULL || strcmp(alias, "ibm-943") != 0 || status != U_AMBIGUOUS_ALIAS_WARNING) {
- log_err("FAIL: Didn't get ibm-943 for Shift_JIS {IBM}. Got %s\n", alias);
+ log_data_err("FAIL: Didn't get ibm-943 for Shift_JIS {IBM}. Got %s\n", alias);
}
status = U_ZERO_ERROR;
alias = ucnv_getStandardName("ibm-943", "IANA", &status);
return newCtx;
}
+#if !UCONFIG_NO_LEGACY_CONVERSION
static void TSCC_fromU(const void *context,
UConverterFromUnicodeArgs *fromUArgs,
const UChar* codeUnits,
UErrorCode subErr = U_ZERO_ERROR;
TSCCContext *newCtx;
TSCCContext *junkCtx;
+ TSCCContext **pjunkCtx = &junkCtx;
/* "recreate" it */
log_verbose("TSCC_fromU: cloning..\n");
}
/* now, SET it */
- ucnv_getFromUCallBack(fromUArgs->converter, &junkFrom, (const void**)&junkCtx);
+ ucnv_getFromUCallBack(fromUArgs->converter, &junkFrom, (const void**)pjunkCtx);
ucnv_setFromUCallBack(fromUArgs->converter, junkFrom, newCtx, NULL, NULL, &subErr);
if(U_FAILURE(subErr)) {
}
}
-
static void TSCC_toU(const void *context,
UConverterToUnicodeArgs *toUArgs,
const char* codeUnits,
UErrorCode subErr = U_ZERO_ERROR;
TSCCContext *newCtx;
TSCCContext *junkCtx;
+ TSCCContext **pjunkCtx = &junkCtx;
/* "recreate" it */
log_verbose("TSCC_toU: cloning..\n");
}
/* now, SET it */
- ucnv_getToUCallBack(toUArgs->converter, &junkFrom, (const void**)&junkCtx);
+ ucnv_getToUCallBack(toUArgs->converter, &junkFrom, (const void**)pjunkCtx);
ucnv_setToUCallBack(toUArgs->converter, junkFrom, newCtx, NULL, NULL, &subErr);
if(U_FAILURE(subErr)) {
UErrorCode err = U_ZERO_ERROR;
TSCCContext from1, to1;
TSCCContext *from2, *from3, *to2, *to3;
+ TSCCContext **pfrom2 = &from2, **pfrom3 = &from3, **pto2 = &to2, **pto3 = &to3;
char hunk[8192];
int32_t hunkSize = 8192;
UConverterFromUCallback junkFrom;
conv1 = ucnv_open("iso-8859-3", &err);
if(U_FAILURE(err)) {
- log_data_err("Err opening iso-8859-3, %s", u_errorName(err));
+ log_data_err("Err opening iso-8859-3, %s\n", u_errorName(err));
return;
}
log_verbose("Cloned to conv2=%p.\n", conv2);
/********** from *********************/
- ucnv_getFromUCallBack(conv2, &junkFrom, (const void**)&from2);
- ucnv_getFromUCallBack(conv1, &junkFrom, (const void**)&from3);
+ ucnv_getFromUCallBack(conv2, &junkFrom, (const void**)pfrom2);
+ ucnv_getFromUCallBack(conv1, &junkFrom, (const void**)pfrom3);
TSCC_print_log(from2, "from2");
TSCC_print_log(from3, "from3(==from1)");
}
/********** to *********************/
- ucnv_getToUCallBack(conv2, &junkTo, (const void**)&to2);
- ucnv_getToUCallBack(conv1, &junkTo, (const void**)&to3);
+ ucnv_getToUCallBack(conv2, &junkTo, (const void**)pto2);
+ ucnv_getToUCallBack(conv1, &junkTo, (const void**)pto3);
TSCC_print_log(to2, "to2");
TSCC_print_log(to3, "to3(==to1)");
free(from2); /* from1 is stack based */
}
}
+#endif
+
+static UBool
+containsAnyOtherByte(uint8_t *p, int32_t length, uint8_t b) {
+ while(length>0) {
+ if(*p!=b) {
+ return TRUE;
+ }
+ ++p;
+ --length;
+ }
+ return FALSE;
+}
static void TestConvertSafeClone()
{
-#define CLONETEST_CONVERTER_COUNT 12
-
- char charBuffer [21]; /* Leave at an odd number for alignment testing */
- uint8_t buffer [CLONETEST_CONVERTER_COUNT] [U_CNV_SAFECLONE_BUFFERSIZE];
- int32_t bufferSize = U_CNV_SAFECLONE_BUFFERSIZE;
- UConverter * someConverters [CLONETEST_CONVERTER_COUNT];
- UConverter * someClonedConverters [CLONETEST_CONVERTER_COUNT];
- UConverter * cnv;
- UErrorCode err = U_ZERO_ERROR;
+ /* one 'regular' & all the 'private stateful' converters */
+ static const char *const names[] = {
+#if !UCONFIG_NO_LEGACY_CONVERSION
+ "ibm-1047",
+ "ISO_2022,locale=zh,version=1",
+#endif
+ "SCSU",
+#if !UCONFIG_NO_LEGACY_CONVERSION
+ "HZ",
+ "lmbcs",
+ "ISCII,version=0",
+ "ISO_2022,locale=kr,version=1",
+ "ISO_2022,locale=jp,version=2",
+#endif
+ "BOCU-1",
+ "UTF-7",
+#if !UCONFIG_NO_LEGACY_CONVERSION
+ "IMAP-mailbox-name",
+ "ibm-1047-s390"
+#else
+ "IMAP=mailbox-name"
+#endif
+ };
+
+ /* store the actual sizes of each converter */
+ int32_t actualSizes[UPRV_LENGTHOF(names)];
+
+ static const int32_t bufferSizes[] = {
+ U_CNV_SAFECLONE_BUFFERSIZE,
+ (int32_t)(3*sizeof(UConverter))/2, /* 1.5*sizeof(UConverter) */
+ (int32_t)sizeof(UConverter)/2 /* 0.5*sizeof(UConverter) */
+ };
+
+ char charBuffer[21]; /* Leave at an odd number for alignment testing */
+ uint8_t buffer[3] [U_CNV_SAFECLONE_BUFFERSIZE];
+ int32_t bufferSize, maxBufferSize;
+ const char *maxName;
+ UConverter * cnv, *cnv2;
+ UErrorCode err;
char *pCharBuffer;
const char *pConstCharBuffer;
- const char *charBufferLimit = charBuffer + sizeof(charBuffer)/sizeof(*charBuffer);
- UChar uniBuffer [] = {0x0058, 0x0059, 0x005A}; /* "XYZ" */
- UChar uniCharBuffer [20];
- char charSourceBuffer [] = { 0x1b, 0x24, 0x42 };
+ const char *charBufferLimit = charBuffer + UPRV_LENGTHOF(charBuffer);
+ UChar uniBuffer[] = {0x0058, 0x0059, 0x005A}; /* "XYZ" */
+ UChar uniCharBuffer[20];
+ char charSourceBuffer[] = { 0x1b, 0x24, 0x42 };
const char *pCharSource = charSourceBuffer;
const char *pCharSourceLimit = charSourceBuffer + sizeof(charSourceBuffer);
UChar *pUCharTarget = uniCharBuffer;
- UChar *pUCharTargetLimit = uniCharBuffer + sizeof(uniCharBuffer)/sizeof(*uniCharBuffer);
+ UChar *pUCharTargetLimit = uniCharBuffer + UPRV_LENGTHOF(uniCharBuffer);
const UChar * pUniBuffer;
- const UChar *uniBufferLimit = uniBuffer + sizeof(uniBuffer)/sizeof(*uniBuffer);
- int index;
-
- /* one 'regular' & all the 'private stateful' converters */
- someConverters[0] = ucnv_open("ibm-1047", &err);
- someConverters[1] = ucnv_open("ISO_2022", &err);
- someConverters[2] = ucnv_open("SCSU", &err);
- someConverters[3] = ucnv_open("HZ", &err);
- someConverters[4] = ucnv_open("lmbcs", &err);
- someConverters[5] = ucnv_open("ISCII,version=0",&err);
- someConverters[6] = ucnv_open("ISO_2022,locale=kr,version=1",&err);
- someConverters[7] = ucnv_open("ISO_2022,locale=jp,version=1",&err);
- someConverters[8] = ucnv_open("BOCU-1", &err);
- someConverters[9] = ucnv_open("UTF-7", &err);
- someConverters[10] = ucnv_open("IMAP-mailbox-name", &err);
- someConverters[11] = ucnv_open("ibm-1047-s390", &err);
-
- if(U_FAILURE(err)) {
- log_data_err("problems creating converters to clone- check the data.\n");
- return; /* bail - leak */
- }
- /* Check the various error & informational states: */
+ const UChar *uniBufferLimit = uniBuffer + UPRV_LENGTHOF(uniBuffer);
+ int32_t idx, j;
- /* Null status - just returns NULL */
- if (0 != ucnv_safeClone(someConverters[0], buffer[0], &bufferSize, 0))
- {
- log_err("FAIL: Cloned converter failed to deal correctly with null status\n");
- }
- /* error status - should return 0 & keep error the same */
- err = U_MEMORY_ALLOCATION_ERROR;
- if (0 != ucnv_safeClone(someConverters[0], buffer[0], &bufferSize, &err) || err != U_MEMORY_ALLOCATION_ERROR)
- {
- log_err("FAIL: Cloned converter failed to deal correctly with incoming error status\n");
- }
err = U_ZERO_ERROR;
+ cnv = ucnv_open(names[0], &err);
+ if(U_SUCCESS(err)) {
+ /* Check the various error & informational states: */
- /* Null buffer size pointer - just returns NULL & set error to U_ILLEGAL_ARGUMENT_ERROR*/
- if (0 != ucnv_safeClone(someConverters[0], buffer[0], 0, &err) || err != U_ILLEGAL_ARGUMENT_ERROR)
- {
- log_err("FAIL: Cloned converter failed to deal correctly with null bufferSize pointer\n");
- }
- err = U_ZERO_ERROR;
+ /* Null status - just returns NULL */
+ bufferSize = U_CNV_SAFECLONE_BUFFERSIZE;
+ if (NULL != ucnv_safeClone(cnv, buffer[0], &bufferSize, NULL))
+ {
+ log_err("FAIL: Cloned converter failed to deal correctly with null status\n");
+ }
+ /* error status - should return 0 & keep error the same */
+ err = U_MEMORY_ALLOCATION_ERROR;
+ if (NULL != ucnv_safeClone(cnv, buffer[0], &bufferSize, &err) || err != U_MEMORY_ALLOCATION_ERROR)
+ {
+ log_err("FAIL: Cloned converter failed to deal correctly with incoming error status\n");
+ }
+ err = U_ZERO_ERROR;
- /* buffer size pointer is 0 - fill in pbufferSize with a size */
- bufferSize = 0;
- if (0 != ucnv_safeClone(someConverters[0], buffer[0], &bufferSize, &err) || U_FAILURE(err) || bufferSize <= 0)
- {
- log_err("FAIL: Cloned converter failed a sizing request ('preflighting')\n");
- }
- /* Verify our define is large enough */
- if (U_CNV_SAFECLONE_BUFFERSIZE < bufferSize)
- {
- log_err("FAIL: Pre-calculated buffer size is too small\n");
- }
- /* Verify we can use this run-time calculated size */
- if (0 == (cnv = ucnv_safeClone(someConverters[0], buffer[0], &bufferSize, &err)) || U_FAILURE(err))
- {
- log_err("FAIL: Converter can't be cloned with run-time size\n");
- }
- if (cnv)
- ucnv_close(cnv);
- /* size one byte too small - should allocate & let us know */
- --bufferSize;
- if (0 == (cnv = ucnv_safeClone(someConverters[0], 0, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING)
- {
- log_err("FAIL: Cloned converter failed to deal correctly with too-small buffer size\n");
- }
- if (cnv)
- ucnv_close(cnv);
- err = U_ZERO_ERROR;
- bufferSize = U_CNV_SAFECLONE_BUFFERSIZE;
+ /* Null buffer size pointer is ok */
+ if (NULL == (cnv2 = ucnv_safeClone(cnv, buffer[0], NULL, &err)) || U_FAILURE(err))
+ {
+ log_err("FAIL: Cloned converter failed to deal correctly with null bufferSize pointer\n");
+ }
+ ucnv_close(cnv2);
+ err = U_ZERO_ERROR;
- /* Null buffer pointer - return converter & set error to U_SAFECLONE_ALLOCATED_ERROR */
- if (0 == (cnv = ucnv_safeClone(someConverters[0], 0, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING)
- {
- log_err("FAIL: Cloned converter failed to deal correctly with null buffer pointer\n");
- }
- if (cnv)
- ucnv_close(cnv);
- err = U_ZERO_ERROR;
+ /* buffer size pointer is 0 - fill in pbufferSize with a size */
+ bufferSize = 0;
+ if (NULL != ucnv_safeClone(cnv, buffer[0], &bufferSize, &err) || U_FAILURE(err) || bufferSize <= 0)
+ {
+ log_err("FAIL: Cloned converter failed a sizing request ('preflighting')\n");
+ }
+ /* Verify our define is large enough */
+ if (U_CNV_SAFECLONE_BUFFERSIZE < bufferSize)
+ {
+ log_err("FAIL: Pre-calculated buffer size is too small\n");
+ }
+ /* Verify we can use this run-time calculated size */
+ if (NULL == (cnv2 = ucnv_safeClone(cnv, buffer[0], &bufferSize, &err)) || U_FAILURE(err))
+ {
+ log_err("FAIL: Converter can't be cloned with run-time size\n");
+ }
+ if (cnv2) {
+ ucnv_close(cnv2);
+ }
+
+ /* size one byte too small - should allocate & let us know */
+ --bufferSize;
+ if (NULL == (cnv2 = ucnv_safeClone(cnv, NULL, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING)
+ {
+ log_err("FAIL: Cloned converter failed to deal correctly with too-small buffer size\n");
+ }
+ if (cnv2) {
+ ucnv_close(cnv2);
+ }
+
+ err = U_ZERO_ERROR;
+ bufferSize = U_CNV_SAFECLONE_BUFFERSIZE;
+
+ /* Null buffer pointer - return converter & set error to U_SAFECLONE_ALLOCATED_ERROR */
+ if (NULL == (cnv2 = ucnv_safeClone(cnv, NULL, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING)
+ {
+ log_err("FAIL: Cloned converter failed to deal correctly with null buffer pointer\n");
+ }
+ if (cnv2) {
+ ucnv_close(cnv2);
+ }
+
+ err = U_ZERO_ERROR;
- /* Null converter - return NULL & set U_ILLEGAL_ARGUMENT_ERROR */
- if (0 != ucnv_safeClone(0, buffer[0], &bufferSize, &err) || err != U_ILLEGAL_ARGUMENT_ERROR)
- {
- log_err("FAIL: Cloned converter failed to deal correctly with null converter pointer\n");
+ /* Null converter - return NULL & set U_ILLEGAL_ARGUMENT_ERROR */
+ if (NULL != ucnv_safeClone(NULL, buffer[0], &bufferSize, &err) || err != U_ILLEGAL_ARGUMENT_ERROR)
+ {
+ log_err("FAIL: Cloned converter failed to deal correctly with null converter pointer\n");
+ }
+
+ ucnv_close(cnv);
}
- err = U_ZERO_ERROR;
+ maxBufferSize = 0;
+ maxName = "";
/* Do these cloned converters work at all - shuffle UChars to chars & back again..*/
- for (index = 0; index < CLONETEST_CONVERTER_COUNT; index++)
- {
- bufferSize = U_CNV_SAFECLONE_BUFFERSIZE;
- someClonedConverters[index] = ucnv_safeClone(someConverters[index], buffer[index], &bufferSize, &err);
+ for(j = 0; j < UPRV_LENGTHOF(bufferSizes); ++j) {
+ for (idx = 0; idx < UPRV_LENGTHOF(names); idx++)
+ {
+ err = U_ZERO_ERROR;
+ cnv = ucnv_open(names[idx], &err);
+ if(U_FAILURE(err)) {
+ log_data_err("ucnv_open(\"%s\") failed - %s\n", names[idx], u_errorName(err));
+ continue;
+ }
- /* close the original immediately to make sure that the clone works by itself */
- ucnv_close(someConverters[index]);
+ if(j == 0) {
+ /* preflight to get maxBufferSize */
+ actualSizes[idx] = 0;
+ ucnv_safeClone(cnv, NULL, &actualSizes[idx], &err);
+ if(actualSizes[idx] > maxBufferSize) {
+ maxBufferSize = actualSizes[idx];
+ maxName = names[idx];
+ }
+ }
- pCharBuffer = charBuffer;
- pUniBuffer = uniBuffer;
+ memset(buffer, 0xaa, sizeof(buffer));
- ucnv_fromUnicode(someClonedConverters[index],
- &pCharBuffer,
- charBufferLimit,
- &pUniBuffer,
- uniBufferLimit,
- NULL,
- TRUE,
- &err);
- if(U_FAILURE(err)){
- log_err("FAIL: cloned converter failed to do fromU conversion. Error: %s\n",u_errorName(err));
- }
- ucnv_toUnicode(someClonedConverters[index],
- &pUCharTarget,
- pUCharTargetLimit,
- &pCharSource,
- pCharSourceLimit,
- NULL,
- TRUE,
- &err
- );
+ bufferSize = bufferSizes[j];
+ cnv2 = ucnv_safeClone(cnv, buffer[1], &bufferSize, &err);
- if(U_FAILURE(err)){
- log_err("FAIL: cloned converter failed to do toU conversion. Error: %s\n",u_errorName(err));
- }
+ /* close the original immediately to make sure that the clone works by itself */
+ ucnv_close(cnv);
- pConstCharBuffer = charBuffer;
- if (uniBuffer [0] != ucnv_getNextUChar(someClonedConverters[index], &pConstCharBuffer, pCharBuffer, &err))
- {
- log_err("FAIL: Cloned converter failed to do conversion. Error: %s\n",u_errorName(err));
+ if( actualSizes[idx] <= (bufferSizes[j] - (int32_t)sizeof(UAlignedMemory)) &&
+ err == U_SAFECLONE_ALLOCATED_WARNING
+ ) {
+ log_err("ucnv_safeClone(%s) did a heap clone although the buffer was large enough\n", names[idx]);
+ }
+
+ /* check if the clone function overwrote any bytes that it is not supposed to touch */
+ if(bufferSize <= bufferSizes[j]) {
+ /* used the stack buffer */
+ if( containsAnyOtherByte(buffer[0], (int32_t)sizeof(buffer[0]), 0xaa) ||
+ containsAnyOtherByte(buffer[1]+bufferSize, (int32_t)(sizeof(buffer)-(sizeof(buffer[0])+bufferSize)), 0xaa)
+ ) {
+ log_err("cloning %s in a stack buffer overwrote bytes outside the bufferSize %d (requested %d)\n",
+ names[idx], bufferSize, bufferSizes[j]);
+ }
+ } else {
+ /* heap-allocated the clone */
+ if(containsAnyOtherByte(buffer[0], (int32_t)sizeof(buffer), 0xaa)) {
+ log_err("cloning %s used the heap (bufferSize %d, requested %d) but overwrote stack buffer bytes\n",
+ names[idx], bufferSize, bufferSizes[j]);
+ }
+ }
+
+ pCharBuffer = charBuffer;
+ pUniBuffer = uniBuffer;
+
+ ucnv_fromUnicode(cnv2,
+ &pCharBuffer,
+ charBufferLimit,
+ &pUniBuffer,
+ uniBufferLimit,
+ NULL,
+ TRUE,
+ &err);
+ if(U_FAILURE(err)){
+ log_err("FAIL: cloned converter failed to do fromU conversion. Error: %s\n",u_errorName(err));
+ }
+ ucnv_toUnicode(cnv2,
+ &pUCharTarget,
+ pUCharTargetLimit,
+ &pCharSource,
+ pCharSourceLimit,
+ NULL,
+ TRUE,
+ &err
+ );
+
+ if(U_FAILURE(err)){
+ log_err("FAIL: cloned converter failed to do toU conversion. Error: %s\n",u_errorName(err));
+ }
+
+ pConstCharBuffer = charBuffer;
+ if (uniBuffer [0] != ucnv_getNextUChar(cnv2, &pConstCharBuffer, pCharBuffer, &err))
+ {
+ log_err("FAIL: Cloned converter failed to do conversion. Error: %s\n",u_errorName(err));
+ }
+ ucnv_close(cnv2);
}
- ucnv_close(someClonedConverters[index]);
+ }
+
+ log_verbose("ucnv_safeClone(): sizeof(UConverter)=%lu max preflighted clone size=%d (%s) U_CNV_SAFECLONE_BUFFERSIZE=%d\n",
+ sizeof(UConverter), maxBufferSize, maxName, (int)U_CNV_SAFECLONE_BUFFERSIZE);
+ if(maxBufferSize > U_CNV_SAFECLONE_BUFFERSIZE) {
+ log_err("ucnv_safeClone(): max preflighted clone size=%d (%s) is larger than U_CNV_SAFECLONE_BUFFERSIZE=%d\n",
+ maxBufferSize, maxName, (int)U_CNV_SAFECLONE_BUFFERSIZE);
}
}
static void TestCCSID() {
+#if !UCONFIG_NO_LEGACY_CONVERSION
UConverter *cnv;
UErrorCode errorCode;
- int32_t ccsids[]={ 37, 850, 943, 949, 950, 1047, 1252, 33722 };
+ int32_t ccsids[]={ 37, 850, 943, 949, 950, 1047, 1252, 1392, 33722 };
int32_t i, ccsid;
- for(i=0; i<(int32_t)(sizeof(ccsids)/sizeof(int32_t)); ++i) {
+ for(i=0; i<UPRV_LENGTHOF(ccsids); ++i) {
ccsid=ccsids[i];
errorCode=U_ZERO_ERROR;
cnv=ucnv_openCCSID(ccsid, UCNV_IBM, &errorCode);
if(U_FAILURE(errorCode)) {
- log_data_err("error: ucnv_openCCSID(%ld) failed (%s)\n", ccsid, u_errorName(errorCode));
+ log_data_err("error: ucnv_openCCSID(%ld) failed (%s)\n", ccsid, u_errorName(errorCode));
continue;
}
log_err("error: ucnv_getCCSID(ucnv_openCCSID(%ld))=%ld\n", ccsid, ucnv_getCCSID(cnv, &errorCode));
}
- if(UCNV_IBM!=ucnv_getPlatform(cnv, &errorCode)) {
+ /* skip gb18030(ccsid 1392) */
+ if(ccsid != 1392 && UCNV_IBM!=ucnv_getPlatform(cnv, &errorCode)) {
log_err("error: ucnv_getPlatform(ucnv_openCCSID(%ld))=%ld!=UCNV_IBM\n", ccsid, ucnv_getPlatform(cnv, &errorCode));
}
ucnv_close(cnv);
}
+#endif
}
/* jitterbug 932: ucnv_convert() bugs --------------------------------------- */
*/
static void bug1()
{
- static char char_in[CHUNK_SIZE+32];
- static char char_out[CHUNK_SIZE*2];
+#if !UCONFIG_NO_LEGACY_CONVERSION
+ char char_in[CHUNK_SIZE+32];
+ char char_out[CHUNK_SIZE*2];
/* GB 18030 equivalent of U+10000 is 90308130 */
static const char test_seq[]={ (char)0x90u, 0x30, (char)0x81u, 0x30 };
log_err("error j932 bug 1: expected success, got U_TRUNCATED_CHAR_FOUND\n");
}
}
+#endif
}
/* bug2: pre-flighting loop bug: simple overflow causes bug */
{
/* US-ASCII "1234567890" */
static const char source[]={ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39 };
+#if !UCONFIG_ONLY_HTML_CONVERSION
static const char sourceUTF8[]={ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, (char)0xef, (char)0x80, (char)0x80 };
static const char sourceUTF32[]={ 0x00, 0x00, 0x00, 0x30,
0x00, 0x00, 0x00, 0x31,
0x00, 0x00, 0x00, 0x37,
0x00, 0x00, 0x00, 0x38,
0x00, 0x00, (char)0xf0, 0x00};
+#endif
+
static char target[5];
UErrorCode err = U_ZERO_ERROR;
log_data_err("error j932 bug 2 us-ascii->iso-8859-1: got preflighting size %d instead of 10\n", size);
}
+#if !UCONFIG_ONLY_HTML_CONVERSION
err = U_ZERO_ERROR;
/* do the conversion */
size = ucnv_convert("UTF-32BE", /* out */
/* bug2: size is 5, should be 12 */
log_err("error j932 bug 2 UTF-32BE->UTF-8: got preflighting size %d instead of 12\n", size);
}
+#endif
}
/*
*/
static void bug3()
{
- static char char_in[CHUNK_SIZE*4];
- static char target[5];
+#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
+ char char_in[CHUNK_SIZE*4];
+ char target[5];
UErrorCode err = U_ZERO_ERROR;
int32_t size;
*/
log_data_err("error j932 bug 3b: expected 0x%04x, got 0x%04x\n", sizeof(char_in) * 2, size);
}
+#endif
}
static void
pivotBuffer, &pivotSource, &pivotTarget, pivotLimit,
FALSE, flush, &errorCode);
targetLength=(int32_t)(target-targetBuffer);
+ if(target>targetLimit) {
+ log_err("ucnv_convertEx(%s) chunk[%d] target %p exceeds targetLimit %p\n",
+ testName, chunkSize, target, targetLimit);
+ break; /* TODO: major problem! */
+ }
if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
/* continue converting another chunk */
errorCode=U_ZERO_ERROR;
}
static void TestConvertEx() {
+#if !UCONFIG_NO_LEGACY_CONVERSION
static const uint8_t
utf8[]={
/* 4e00 30a1 ff61 0410 */
* expected output when converting shiftJIS[] from UTF-8 to Shift-JIS:
* SUB, SUB, 0x40, SUB, SUB, 0x40
*/
- 0x81, 0xa1, 0x81, 0xa1, 0x40, 0x81, 0xa1, 0x81, 0xa1, 0x40
+ 0xfc, 0xfc, 0xfc, 0xfc, 0x40, 0xfc, 0xfc, 0xfc, 0xfc, 0x40
};
char srcBuffer[100], targetBuffer[100];
log_err("ucnv_convertEx(*source==NULL) sets %s\n", u_errorName(errorCode));
}
+ /* streaming conversion without a pivot buffer */
+ errorCode=U_ZERO_ERROR;
+ src=srcBuffer;
+ pivotSource=pivotBuffer;
+ ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL,
+ NULL, &pivotSource, &pivotTarget, pivotBuffer+1, TRUE, FALSE, &errorCode);
+ if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
+ log_err("ucnv_convertEx(pivotStart==NULL) sets %s\n", u_errorName(errorCode));
+ }
+
ucnv_close(cnv1);
ucnv_close(cnv2);
+#endif
}
-static void
-TestConvertAlgorithmic() {
- static const uint8_t
- utf8[]={
- /* 4e00 30a1 ff61 0410 */
- 0xe4, 0xb8, 0x80, 0xe3, 0x82, 0xa1, 0xef, 0xbd, 0xa1, 0xd0, 0x90
- },
- shiftJIS[]={
- 0x88, 0xea, 0x83, 0x40, 0xa1, 0x84, 0x40
- },
- /*errorTarget[]={*/
- /*
- * expected output when converting shiftJIS[] from UTF-8 to Shift-JIS:
- * SUB, SUB, 0x40, SUB, SUB, 0x40
- */
- /* 0x81, 0xa1, 0x81, 0xa1, 0x40, 0x81, 0xa1, 0x81, 0xa1, 0x40*/
- /*},*/
- utf16[]={
- 0xfe, 0xff /* BOM only, no text */
- },
- utf32[]={
- 0xff, 0xfe, 0, 0 /* BOM only, no text */
- };
-
- char target[100], utf8NUL[100], shiftJISNUL[100];
+/* Test illegal UTF-8 input: Data and functions for TestConvertExFromUTF8(). */
+static const char *const badUTF8[]={
+ /* trail byte */
+ "\x80",
+
+ /* truncated multi-byte sequences */
+ "\xd0",
+ "\xe0",
+ "\xe1",
+ "\xed",
+ "\xee",
+ "\xf0",
+ "\xf1",
+ "\xf4",
+ "\xf8",
+ "\xfc",
+
+ "\xe0\x80",
+ "\xe0\xa0",
+ "\xe1\x80",
+ "\xed\x80",
+ "\xed\xa0",
+ "\xee\x80",
+ "\xf0\x80",
+ "\xf0\x90",
+ "\xf1\x80",
+ "\xf4\x80",
+ "\xf4\x90",
+ "\xf8\x80",
+ "\xfc\x80",
+
+ "\xf0\x80\x80",
+ "\xf0\x90\x80",
+ "\xf1\x80\x80",
+ "\xf4\x80\x80",
+ "\xf4\x90\x80",
+ "\xf8\x80\x80",
+ "\xfc\x80\x80",
+
+ "\xf8\x80\x80\x80",
+ "\xfc\x80\x80\x80",
+
+ "\xfc\x80\x80\x80\x80",
+
+ /* complete sequences but non-shortest forms or out of range etc. */
+ "\xc0\x80",
+ "\xe0\x80\x80",
+ "\xed\xa0\x80",
+ "\xf0\x80\x80\x80",
+ "\xf4\x90\x80\x80",
+ "\xf8\x80\x80\x80\x80",
+ "\xfc\x80\x80\x80\x80\x80",
+ "\xfe",
+ "\xff"
+};
+
+#define ARG_CHAR_ARR_SIZE 8
+
+/* get some character that can be converted and convert it */
+static UBool getTestChar(UConverter *cnv, const char *converterName,
+ char charUTF8[4], int32_t *pCharUTF8Length,
+ char char0[ARG_CHAR_ARR_SIZE], int32_t *pChar0Length,
+ char char1[ARG_CHAR_ARR_SIZE], int32_t *pChar1Length) {
+ UChar utf16[U16_MAX_LENGTH];
+ int32_t utf16Length;
+
+ const UChar *utf16Source;
+ char *target;
- UConverter *cnv;
+ USet *set;
+ UChar32 c;
UErrorCode errorCode;
- int32_t length;
-
errorCode=U_ZERO_ERROR;
- cnv=ucnv_open("Shift-JIS", &errorCode);
+ set=uset_open(1, 0);
+ ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode);
+ c=uset_charAt(set, uset_size(set)/2);
+ uset_close(set);
+
+ utf16Length=0;
+ U16_APPEND_UNSAFE(utf16, utf16Length, c);
+ *pCharUTF8Length=0;
+ U8_APPEND_UNSAFE(charUTF8, *pCharUTF8Length, c);
+
+ utf16Source=utf16;
+ target=char0;
+ ucnv_fromUnicode(cnv,
+ &target, char0+ARG_CHAR_ARR_SIZE,
+ &utf16Source, utf16+utf16Length,
+ NULL, FALSE, &errorCode);
+ *pChar0Length=(int32_t)(target-char0);
+
+ utf16Source=utf16;
+ target=char1;
+ ucnv_fromUnicode(cnv,
+ &target, char1+ARG_CHAR_ARR_SIZE,
+ &utf16Source, utf16+utf16Length,
+ NULL, FALSE, &errorCode);
+ *pChar1Length=(int32_t)(target-char1);
+
if(U_FAILURE(errorCode)) {
- log_data_err("unable to open a Shift-JIS converter - %s\n", u_errorName(errorCode));
- ucnv_close(cnv);
- return;
+ log_err("unable to get test character for %s - %s\n", converterName, u_errorName(errorCode));
+ return FALSE;
}
+ return TRUE;
+}
- memcpy(utf8NUL, utf8, sizeof(utf8));
+static UBool isOneTruncatedUTF8(const char *s, int32_t length) {
+ if(length==0) {
+ return FALSE;
+ } else if(length==1) {
+ return U8_IS_LEAD(s[0]);
+ } else {
+ int32_t count=U8_COUNT_TRAIL_BYTES(s[0]);
+ if(length<=count) {
+ // 2 or more bytes, but fewer than the lead byte indicates.
+ int32_t oneLength=0;
+ U8_FWD_1(s, oneLength, length);
+ // Truncated if we reach the end of the string.
+ // Not true if the lead byte and first trail byte do not start a valid sequence,
+ // e.g., E0 80 -> oneLength=1.
+ return oneLength==length;
+ }
+ return FALSE;
+ }
+}
+
+static void testFromTruncatedUTF8(UConverter *utf8Cnv, UConverter *cnv, const char *converterName,
+ char charUTF8[4], int32_t charUTF8Length,
+ char char0[8], int32_t char0Length,
+ char char1[8], int32_t char1Length) {
+ char utf8[16];
+ int32_t utf8Length;
+
+ char output[16];
+ int32_t outputLength;
+
+ char invalidChars[8];
+ int8_t invalidLength;
+
+ const char *source;
+ char *target;
+
+ UChar pivotBuffer[8];
+ UChar *pivotSource, *pivotTarget;
+
+ UErrorCode errorCode;
+ int32_t i;
+
+ /* test truncated sequences */
+ errorCode=U_ZERO_ERROR;
+ ucnv_setToUCallBack(utf8Cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
+
+ memcpy(utf8, charUTF8, charUTF8Length);
+
+ for(i=0; i<UPRV_LENGTHOF(badUTF8); ++i) {
+ /* truncated sequence? */
+ int32_t length = (int32_t)strlen(badUTF8[i]);
+ if(!isOneTruncatedUTF8(badUTF8[i], length)) {
+ continue;
+ }
+
+ /* assemble a string with the test character and the truncated sequence */
+ memcpy(utf8+charUTF8Length, badUTF8[i], length);
+ utf8Length=charUTF8Length+length;
+
+ /* convert and check the invalidChars */
+ source=utf8;
+ target=output;
+ pivotSource=pivotTarget=pivotBuffer;
+ errorCode=U_ZERO_ERROR;
+ ucnv_convertEx(cnv, utf8Cnv,
+ &target, output+sizeof(output),
+ &source, utf8+utf8Length,
+ pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+UPRV_LENGTHOF(pivotBuffer),
+ TRUE, TRUE, /* reset & flush */
+ &errorCode);
+ outputLength=(int32_t)(target-output);
+ (void)outputLength; /* Suppress set but not used warning. */
+ if(errorCode!=U_TRUNCATED_CHAR_FOUND || pivotSource!=pivotBuffer) {
+ log_err("unexpected error %s from %s badUTF8[%ld]\n", u_errorName(errorCode), converterName, (long)i);
+ continue;
+ }
+
+ errorCode=U_ZERO_ERROR;
+ invalidLength=(int8_t)sizeof(invalidChars);
+ ucnv_getInvalidChars(utf8Cnv, invalidChars, &invalidLength, &errorCode);
+ if(invalidLength!=length || 0!=memcmp(invalidChars, badUTF8[i], length)) {
+ log_err("wrong invalidChars from %s badUTF8[%ld]\n", converterName, (long)i);
+ }
+ }
+}
+
+static void testFromBadUTF8(UConverter *utf8Cnv, UConverter *cnv, const char *converterName,
+ char charUTF8[4], int32_t charUTF8Length,
+ char char0[8], int32_t char0Length,
+ char char1[8], int32_t char1Length) {
+ char utf8[600], expect[600];
+ int32_t utf8Length, expectLength;
+
+ char testName[32];
+
+ UErrorCode errorCode;
+ int32_t i;
+
+ errorCode=U_ZERO_ERROR;
+ ucnv_setToUCallBack(utf8Cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, NULL, NULL, &errorCode);
+
+ /*
+ * assemble an input string with the test character between each
+ * bad sequence,
+ * and an expected string with repeated test character output
+ */
+ memcpy(utf8, charUTF8, charUTF8Length);
+ utf8Length=charUTF8Length;
+
+ memcpy(expect, char0, char0Length);
+ expectLength=char0Length;
+
+ for(i=0; i<UPRV_LENGTHOF(badUTF8); ++i) {
+ int32_t length = (int32_t)strlen(badUTF8[i]);
+ memcpy(utf8+utf8Length, badUTF8[i], length);
+ utf8Length+=length;
+
+ memcpy(utf8+utf8Length, charUTF8, charUTF8Length);
+ utf8Length+=charUTF8Length;
+
+ memcpy(expect+expectLength, char1, char1Length);
+ expectLength+=char1Length;
+ }
+
+ /* expect that each bad UTF-8 sequence is detected and skipped */
+ strcpy(testName, "from bad UTF-8 to ");
+ strcat(testName, converterName);
+
+ convertExMultiStreaming(utf8Cnv, cnv,
+ utf8, utf8Length,
+ expect, expectLength,
+ testName,
+ U_ZERO_ERROR);
+}
+
+/* Test illegal UTF-8 input. */
+static void TestConvertExFromUTF8() {
+ static const char *const converterNames[]={
+#if !UCONFIG_NO_LEGACY_CONVERSION
+ "windows-1252",
+ "shift-jis",
+#endif
+ "us-ascii",
+ "iso-8859-1",
+ "utf-8"
+ };
+
+ UConverter *utf8Cnv, *cnv;
+ UErrorCode errorCode;
+ int32_t i;
+
+ /* fromUnicode versions of some character, from initial state and later */
+ char charUTF8[4], char0[8], char1[8];
+ int32_t charUTF8Length, char0Length, char1Length;
+
+ errorCode=U_ZERO_ERROR;
+ utf8Cnv=ucnv_open("UTF-8", &errorCode);
+ if(U_FAILURE(errorCode)) {
+ log_data_err("unable to open UTF-8 converter - %s\n", u_errorName(errorCode));
+ return;
+ }
+
+ for(i=0; i<UPRV_LENGTHOF(converterNames); ++i) {
+ errorCode=U_ZERO_ERROR;
+ cnv=ucnv_open(converterNames[i], &errorCode);
+ if(U_FAILURE(errorCode)) {
+ log_data_err("unable to open %s converter - %s\n", converterNames[i], u_errorName(errorCode));
+ continue;
+ }
+ if(!getTestChar(cnv, converterNames[i], charUTF8, &charUTF8Length, char0, &char0Length, char1, &char1Length)) {
+ continue;
+ }
+ testFromTruncatedUTF8(utf8Cnv, cnv, converterNames[i], charUTF8, charUTF8Length, char0, char0Length, char1, char1Length);
+ testFromBadUTF8(utf8Cnv, cnv, converterNames[i], charUTF8, charUTF8Length, char0, char0Length, char1, char1Length);
+ ucnv_close(cnv);
+ }
+ ucnv_close(utf8Cnv);
+}
+
+static void TestConvertExFromUTF8_C5F0() {
+ static const char *const converterNames[]={
+#if !UCONFIG_NO_LEGACY_CONVERSION
+ "windows-1251",
+ "shift-jis",
+#endif
+ "us-ascii",
+ "iso-8859-1",
+ "utf-8"
+ };
+
+ UConverter *utf8Cnv, *cnv;
+ UErrorCode errorCode;
+ int32_t i;
+
+ static const char bad_utf8[2]={ (char)0xC5, (char)0xF0 };
+ /* Expect "��" (2x U+FFFD as decimal NCRs) */
+ static const char twoNCRs[16]={
+ 0x26, 0x23, 0x36, 0x35, 0x35, 0x33, 0x33, 0x3B,
+ 0x26, 0x23, 0x36, 0x35, 0x35, 0x33, 0x33, 0x3B
+ };
+ static const char twoFFFD[6]={
+ (char)0xef, (char)0xbf, (char)0xbd,
+ (char)0xef, (char)0xbf, (char)0xbd
+ };
+ const char *expected;
+ int32_t expectedLength;
+ char dest[20]; /* longer than longest expectedLength */
+
+ const char *src;
+ char *target;
+
+ UChar pivotBuffer[128];
+ UChar *pivotSource, *pivotTarget;
+
+ errorCode=U_ZERO_ERROR;
+ utf8Cnv=ucnv_open("UTF-8", &errorCode);
+ if(U_FAILURE(errorCode)) {
+ log_data_err("unable to open UTF-8 converter - %s\n", u_errorName(errorCode));
+ return;
+ }
+
+ for(i=0; i<UPRV_LENGTHOF(converterNames); ++i) {
+ errorCode=U_ZERO_ERROR;
+ cnv=ucnv_open(converterNames[i], &errorCode);
+ ucnv_setFromUCallBack(cnv, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC,
+ NULL, NULL, &errorCode);
+ if(U_FAILURE(errorCode)) {
+ log_data_err("unable to open %s converter - %s\n",
+ converterNames[i], u_errorName(errorCode));
+ continue;
+ }
+ src=bad_utf8;
+ target=dest;
+ uprv_memset(dest, 9, sizeof(dest));
+ if(i==UPRV_LENGTHOF(converterNames)-1) {
+ /* conversion to UTF-8 yields two U+FFFD directly */
+ expected=twoFFFD;
+ expectedLength=6;
+ } else {
+ /* conversion to a non-Unicode charset yields two NCRs */
+ expected=twoNCRs;
+ expectedLength=16;
+ }
+ pivotBuffer[0]=0;
+ pivotBuffer[1]=1;
+ pivotBuffer[2]=2;
+ pivotSource=pivotTarget=pivotBuffer;
+ ucnv_convertEx(
+ cnv, utf8Cnv,
+ &target, dest+expectedLength,
+ &src, bad_utf8+sizeof(bad_utf8),
+ pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+UPRV_LENGTHOF(pivotBuffer),
+ TRUE, TRUE, &errorCode);
+ if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || src!=bad_utf8+2 ||
+ target!=dest+expectedLength || 0!=uprv_memcmp(dest, expected, expectedLength) ||
+ dest[expectedLength]!=9
+ ) {
+ log_err("ucnv_convertEx(UTF-8 C5 F0 -> %s/decimal NCRs) failed\n", converterNames[i]);
+ }
+ ucnv_close(cnv);
+ }
+ ucnv_close(utf8Cnv);
+}
+
+static void
+TestConvertAlgorithmic() {
+#if !UCONFIG_NO_LEGACY_CONVERSION
+ static const uint8_t
+ utf8[]={
+ /* 4e00 30a1 ff61 0410 */
+ 0xe4, 0xb8, 0x80, 0xe3, 0x82, 0xa1, 0xef, 0xbd, 0xa1, 0xd0, 0x90
+ },
+ shiftJIS[]={
+ 0x88, 0xea, 0x83, 0x40, 0xa1, 0x84, 0x40
+ },
+ /*errorTarget[]={*/
+ /*
+ * expected output when converting shiftJIS[] from UTF-8 to Shift-JIS:
+ * SUB, SUB, 0x40, SUB, SUB, 0x40
+ */
+ /* 0x81, 0xa1, 0x81, 0xa1, 0x40, 0x81, 0xa1, 0x81, 0xa1, 0x40*/
+ /*},*/
+ utf16[]={
+ 0xfe, 0xff /* BOM only, no text */
+ };
+#if !UCONFIG_ONLY_HTML_CONVERSION
+ static const uint8_t utf32[]={
+ 0xff, 0xfe, 0, 0 /* BOM only, no text */
+ };
+#endif
+
+ char target[100], utf8NUL[100], shiftJISNUL[100];
+
+ UConverter *cnv;
+ UErrorCode errorCode;
+
+ int32_t length;
+
+ errorCode=U_ZERO_ERROR;
+ cnv=ucnv_open("Shift-JIS", &errorCode);
+ if(U_FAILURE(errorCode)) {
+ log_data_err("unable to open a Shift-JIS converter - %s\n", u_errorName(errorCode));
+ ucnv_close(cnv);
+ return;
+ }
+
+ memcpy(utf8NUL, utf8, sizeof(utf8));
utf8NUL[sizeof(utf8)]=0;
memcpy(shiftJISNUL, shiftJIS, sizeof(shiftJIS));
shiftJISNUL[sizeof(shiftJIS)]=0;
u_errorName(errorCode), length);
}
+#if !UCONFIG_ONLY_HTML_CONVERSION
errorCode=U_ZERO_ERROR;
length=ucnv_fromAlgorithmic(cnv, UCNV_UTF32, target, 0, (const char *)utf32, 4, &errorCode);
if( errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
log_err("ucnv_fromAlgorithmic(UTF-32 only BOM -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect 0\n",
u_errorName(errorCode), length);
}
+#endif
/* bad arguments */
errorCode=U_MESSAGE_PARSE_ERROR;
log_err("ucnv_fromAlgorithmic(illegal alg. type) sets %s\n", u_errorName(errorCode));
}
ucnv_close(cnv);
+#endif
}
+#if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
static void TestLMBCSMaxChar(void) {
static const struct {
int8_t maxSize;
const char *name;
} converter[] = {
- { 2, "LMBCS-1"},
- { 2, "LMBCS-2"},
- { 2, "LMBCS-3"},
- { 2, "LMBCS-4"},
- { 2, "LMBCS-5"},
- { 2, "LMBCS-6"},
- { 2, "LMBCS-8"},
- { 2, "LMBCS-11"},
- { 2, "LMBCS-16"},
- { 2, "LMBCS-17"},
- { 2, "LMBCS-18"},
- { 2, "LMBCS-19"}
+ /* some non-LMBCS converters - perfect test setup here */
+ { 1, "US-ASCII"},
+ { 1, "ISO-8859-1"},
+
+ { 2, "UTF-16"},
+ { 2, "UTF-16BE"},
+ { 3, "UTF-8"},
+ { 3, "CESU-8"},
+ { 3, "SCSU"},
+ { 4, "UTF-32"},
+ { 4, "UTF-7"},
+ { 4, "IMAP-mailbox-name"},
+ { 4, "BOCU-1"},
+
+ { 1, "windows-1256"},
+ { 2, "Shift-JIS"},
+ { 2, "ibm-16684"},
+ { 3, "ibm-930"},
+ { 3, "ibm-1390"},
+ { 4, "*test3"},
+ { 16,"*test4"},
+
+ { 4, "ISCII"},
+ { 4, "HZ"},
+
+ { 3, "ISO-2022"},
+ { 8, "ISO-2022-KR"},
+ { 6, "ISO-2022-JP"},
+ { 8, "ISO-2022-CN"},
+
+ /* LMBCS */
+ { 3, "LMBCS-1"},
+ { 3, "LMBCS-2"},
+ { 3, "LMBCS-3"},
+ { 3, "LMBCS-4"},
+ { 3, "LMBCS-5"},
+ { 3, "LMBCS-6"},
+ { 3, "LMBCS-8"},
+ { 3, "LMBCS-11"},
+ { 3, "LMBCS-16"},
+ { 3, "LMBCS-17"},
+ { 3, "LMBCS-18"},
+ { 3, "LMBCS-19"}
};
int32_t idx;
- for (idx = 0; idx < LENGTHOF(converter); idx++) {
+ for (idx = 0; idx < UPRV_LENGTHOF(converter); idx++) {
UErrorCode status = U_ZERO_ERROR;
- UConverter *cnv = ucnv_open(converter[idx].name, &status);
+ UConverter *cnv = cnv_open(converter[idx].name, &status);
if (U_FAILURE(status)) {
continue;
}
if (converter[idx].maxSize != ucnv_getMaxCharSize(cnv)) {
- log_data_err("error: for %s expected %d, got %d\n",
+ log_err("error: ucnv_getMaxCharSize(%s) expected %d, got %d\n",
converter[idx].name, converter[idx].maxSize, ucnv_getMaxCharSize(cnv));
}
ucnv_close(cnv);
}
-}
+ /* mostly test that the macro compiles */
+ if(UCNV_GET_MAX_BYTES_FOR_STRING(1, 2)<10) {
+ log_err("error UCNV_GET_MAX_BYTES_FOR_STRING(1, 2)<10\n");
+ }
+}
+#endif
static void TestJ1968(void) {
UErrorCode err = U_ZERO_ERROR;
}
+#if !UCONFIG_NO_LEGACY_CONVERSION
static void
testSwap(const char *name, UBool swap) {
/*
/* convert to EBCDIC */
pcu=text;
pc=normal;
- ucnv_fromUnicode(cnv, &pc, normal+LENGTHOF(normal), &pcu, text+LENGTHOF(text), NULL, TRUE, &errorCode);
+ ucnv_fromUnicode(cnv, &pc, normal+UPRV_LENGTHOF(normal), &pcu, text+UPRV_LENGTHOF(text), NULL, TRUE, &errorCode);
normalLength=(int32_t)(pc-normal);
pcu=text;
pc=swapped;
- ucnv_fromUnicode(swapCnv, &pc, swapped+LENGTHOF(swapped), &pcu, text+LENGTHOF(text), NULL, TRUE, &errorCode);
+ ucnv_fromUnicode(swapCnv, &pc, swapped+UPRV_LENGTHOF(swapped), &pcu, text+UPRV_LENGTHOF(text), NULL, TRUE, &errorCode);
swappedLength=(int32_t)(pc-swapped);
if(U_FAILURE(errorCode)) {
/* convert back to Unicode (may not roundtrip) */
pc=normal;
pu=uNormal;
- ucnv_toUnicode(cnv, &pu, uNormal+LENGTHOF(uNormal), (const char **)&pc, normal+normalLength, NULL, TRUE, &errorCode);
+ ucnv_toUnicode(cnv, &pu, uNormal+UPRV_LENGTHOF(uNormal), (const char **)&pc, normal+normalLength, NULL, TRUE, &errorCode);
normalLength=(int32_t)(pu-uNormal);
pc=normal;
pu=uSwapped;
- ucnv_toUnicode(swapCnv, &pu, uSwapped+LENGTHOF(uSwapped), (const char **)&pc, normal+swappedLength, NULL, TRUE, &errorCode);
+ ucnv_toUnicode(swapCnv, &pu, uSwapped+UPRV_LENGTHOF(uSwapped), (const char **)&pc, normal+swappedLength, NULL, TRUE, &errorCode);
swappedLength=(int32_t)(pu-uSwapped);
if(U_FAILURE(errorCode)) {
int i;
- for(i=0; i<LENGTHOF(tests); ++i) {
+ for(i=0; i<UPRV_LENGTHOF(tests); ++i) {
testSwap(tests[i].name, tests[i].swap);
}
}
+#else
+static void
+TestEBCDICSwapLFNL() {
+ /* test nothing... */
+}
+#endif
+
+static void TestFromUCountPending(){
+#if !UCONFIG_NO_LEGACY_CONVERSION
+ UErrorCode status = U_ZERO_ERROR;
+/* const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd}; */
+ static const struct {
+ UChar input[6];
+ int32_t len;
+ int32_t exp;
+ }fromUnicodeTests[] = {
+ /*m:n conversion*/
+ {{0xdbc4},1,1},
+ {{ 0xdbc4, 0xde34, 0xd84d},3,1},
+ {{ 0xdbc4, 0xde34, 0xd900},3,3},
+ };
+ int i;
+ UConverter* cnv = ucnv_openPackage(loadTestData(&status), "test3", &status);
+ if(U_FAILURE(status)){
+ log_data_err("Could not create converter for test3. Error: %s\n", u_errorName(status));
+ return;
+ }
+ for(i=0; i<UPRV_LENGTHOF(fromUnicodeTests); ++i) {
+ char tgt[10];
+ char* target = tgt;
+ char* targetLimit = target + 10;
+ const UChar* source = fromUnicodeTests[i].input;
+ const UChar* sourceLimit = source + fromUnicodeTests[i].len;
+ int32_t len = 0;
+ ucnv_reset(cnv);
+ ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status);
+ len = ucnv_fromUCountPending(cnv, &status);
+ if(U_FAILURE(status)){
+ log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status));
+ status = U_ZERO_ERROR;
+ continue;
+ }
+ if(len != fromUnicodeTests[i].exp){
+ log_err("Did not get the expeced output for ucnv_fromUInputConsumed.\n");
+ }
+ }
+ status = U_ZERO_ERROR;
+ {
+ /*
+ * The converter has to read the tail before it knows that
+ * only head alone matches.
+ * At the end, the output for head will overflow the target,
+ * middle will be pending, and tail will not have been consumed.
+ */
+ /*
+ \U00101234 -> x (<U101234> \x07 |0)
+ \U00101234\U00050005 -> y (<U101234>+<U50005> \x07+\x00+\x01\x02\x0e+\x05 |0)
+ \U00101234\U00050005\U00060006 -> z (<U101234>+<U50005>+<U60006> \x07+\x00+\x01\x02\x0f+\x09 |0)
+ \U00060007 -> unassigned
+ */
+ static const UChar head[] = {0xDBC4,0xDE34,0xD900,0xDC05,0x0000};/* \U00101234\U00050005 */
+ static const UChar middle[] = {0xD940,0x0000}; /* first half of \U00060006 or \U00060007 */
+ static const UChar tail[] = {0xDC07,0x0000};/* second half of \U00060007 */
+ char tgt[10];
+ char* target = tgt;
+ char* targetLimit = target + 2; /* expect overflow from converting \U00101234\U00050005 */
+ const UChar* source = head;
+ const UChar* sourceLimit = source + u_strlen(head);
+ int32_t len = 0;
+ ucnv_reset(cnv);
+ ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status);
+ len = ucnv_fromUCountPending(cnv, &status);
+ if(U_FAILURE(status)){
+ log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status));
+ status = U_ZERO_ERROR;
+ }
+ if(len!=4){
+ log_err("ucnv_fromUInputHeld did not return correct length for head\n");
+ }
+ source = middle;
+ sourceLimit = source + u_strlen(middle);
+ ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status);
+ len = ucnv_fromUCountPending(cnv, &status);
+ if(U_FAILURE(status)){
+ log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status));
+ status = U_ZERO_ERROR;
+ }
+ if(len!=5){
+ log_err("ucnv_fromUInputHeld did not return correct length for middle\n");
+ }
+ source = tail;
+ sourceLimit = source + u_strlen(tail);
+ ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status);
+ if(status != U_BUFFER_OVERFLOW_ERROR){
+ log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status));
+ }
+ status = U_ZERO_ERROR;
+ len = ucnv_fromUCountPending(cnv, &status);
+ /* middle[1] is pending, tail has not been consumed */
+ if(U_FAILURE(status)){
+ log_err("ucnv_fromUInputHeld call did not succeed. Error: %s\n", u_errorName(status));
+ }
+ if(len!=1){
+ log_err("ucnv_fromUInputHeld did not return correct length for tail\n");
+ }
+ }
+ ucnv_close(cnv);
+#endif
+}
+static void
+TestToUCountPending(){
+#if !UCONFIG_NO_LEGACY_CONVERSION
+ UErrorCode status = U_ZERO_ERROR;
+ static const struct {
+ char input[6];
+ int32_t len;
+ int32_t exp;
+ }toUnicodeTests[] = {
+ /*m:n conversion*/
+ {{0x05, 0x01, 0x02},3,3},
+ {{0x01, 0x02},2,2},
+ {{0x07, 0x00, 0x01, 0x02},4,4},
+ };
+ int i;
+ UConverterToUCallback *oldToUAction= NULL;
+ UConverter* cnv = ucnv_openPackage(loadTestData(&status), "test3", &status);
+ if(U_FAILURE(status)){
+ log_data_err("Could not create converter for test3. Error: %s\n", u_errorName(status));
+ return;
+ }
+ ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, oldToUAction, NULL, &status);
+ for(i=0; i<UPRV_LENGTHOF(toUnicodeTests); ++i) {
+ UChar tgt[20];
+ UChar* target = tgt;
+ UChar* targetLimit = target + 20;
+ const char* source = toUnicodeTests[i].input;
+ const char* sourceLimit = source + toUnicodeTests[i].len;
+ int32_t len = 0;
+ ucnv_reset(cnv);
+ ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, FALSE, &status);
+ len = ucnv_toUCountPending(cnv,&status);
+ if(U_FAILURE(status)){
+ log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status));
+ status = U_ZERO_ERROR;
+ continue;
+ }
+ if(len != toUnicodeTests[i].exp){
+ log_err("Did not get the expeced output for ucnv_toUInputConsumed.\n");
+ }
+ }
+ status = U_ZERO_ERROR;
+ ucnv_close(cnv);
+
+ {
+ /*
+ * The converter has to read the tail before it knows that
+ * only head alone matches.
+ * At the end, the output for head will overflow the target,
+ * mid will be pending, and tail will not have been consumed.
+ */
+ char head[] = { 0x01, 0x02, 0x03, 0x0a , 0x00};
+ char mid[] = { 0x01, 0x02, 0x03, 0x0b, 0x00 };
+ char tail[] = { 0x01, 0x02, 0x03, 0x0d, 0x00 };
+ /*
+ 0x01, 0x02, 0x03, 0x0a -> x (<U23456> \x01\x02\x03\x0a |0)
+ 0x01, 0x02, 0x03, 0x0b -> y (<U000b> \x01\x02\x03\x0b |0)
+ 0x01, 0x02, 0x03, 0x0d -> z (<U34567> \x01\x02\x03\x0d |3)
+ 0x01, 0x02, 0x03, 0x0a + 0x01, 0x02, 0x03, 0x0b + 0x01 + many more -> z (see test4 "many bytes, and bytes per UChar")
+ */
+ UChar tgt[10];
+ UChar* target = tgt;
+ UChar* targetLimit = target + 1; /* expect overflow from converting */
+ const char* source = head;
+ const char* sourceLimit = source + strlen(head);
+ int32_t len = 0;
+ cnv = ucnv_openPackage(loadTestData(&status), "test4", &status);
+ if(U_FAILURE(status)){
+ log_err("Could not create converter for test3. Error: %s\n", u_errorName(status));
+ return;
+ }
+ ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, oldToUAction, NULL, &status);
+ ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status);
+ len = ucnv_toUCountPending(cnv,&status);
+ if(U_FAILURE(status)){
+ log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status));
+ }
+ if(len != 4){
+ log_err("Did not get the expected len for head.\n");
+ }
+ source=mid;
+ sourceLimit = source+strlen(mid);
+ ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status);
+ len = ucnv_toUCountPending(cnv,&status);
+ if(U_FAILURE(status)){
+ log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status));
+ }
+ if(len != 8){
+ log_err("Did not get the expected len for mid.\n");
+ }
+
+ source=tail;
+ sourceLimit = source+strlen(tail);
+ targetLimit = target;
+ ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status);
+ if(status != U_BUFFER_OVERFLOW_ERROR){
+ log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status));
+ }
+ status = U_ZERO_ERROR;
+ len = ucnv_toUCountPending(cnv,&status);
+ /* mid[4] is pending, tail has not been consumed */
+ if(U_FAILURE(status)){
+ log_err("ucnv_toUCountPending call did not succeed. Error: %s\n", u_errorName(status));
+ }
+ if(len != 4){
+ log_err("Did not get the expected len for tail.\n");
+ }
+ ucnv_close(cnv);
+ }
+#endif
+}
+static void TestOneDefaultNameChange(const char *name, const char *expected) {
+ UErrorCode status = U_ZERO_ERROR;
+ UConverter *cnv;
+ ucnv_setDefaultName(name);
+ if(strcmp(ucnv_getDefaultName(), expected)==0)
+ log_verbose("setDefaultName of %s works.\n", name);
+ else
+ log_err("setDefaultName of %s failed\n", name);
+ cnv=ucnv_open(NULL, &status);
+ if (U_FAILURE(status) || cnv == NULL) {
+ log_err("opening the default converter of %s failed\n", name);
+ return;
+ }
+ if(strcmp(ucnv_getName(cnv, &status), expected)==0)
+ log_verbose("ucnv_getName of %s works.\n", name);
+ else
+ log_err("ucnv_getName of %s failed\n", name);
+ ucnv_close(cnv);
+}
+
+static void TestDefaultName(void) {
+ /*Testing ucnv_getDefaultName() and ucnv_setDefaultNAme()*/
+ static char defaultName[UCNV_MAX_CONVERTER_NAME_LENGTH + 1];
+ strcpy(defaultName, ucnv_getDefaultName());
+
+ log_verbose("getDefaultName returned %s\n", defaultName);
+
+ /*change the default name by setting it */
+ TestOneDefaultNameChange("UTF-8", "UTF-8");
+#if U_CHARSET_IS_UTF8
+ TestOneDefaultNameChange("ISCII,version=1", "UTF-8");
+ TestOneDefaultNameChange("ISCII,version=2", "UTF-8");
+ TestOneDefaultNameChange("ISO-8859-1", "UTF-8");
+#else
+# if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
+ TestOneDefaultNameChange("ISCII,version=1", "ISCII,version=1");
+ TestOneDefaultNameChange("ISCII,version=2", "ISCII,version=2");
+# endif
+ TestOneDefaultNameChange("ISO-8859-1", "ISO-8859-1");
+#endif
+
+ /*set the default name back*/
+ ucnv_setDefaultName(defaultName);
+}
+
+/* Test that ucnv_compareNames() matches names according to spec. ----------- */
+
+static int
+sign(int n) {
+ if(n==0) {
+ return 0;
+ } else if(n<0) {
+ return -1;
+ } else /* n>0 */ {
+ return 1;
+ }
+}
+
+static void
+compareNames(const char **names) {
+ const char *relation, *name1, *name2;
+ int rel, result;
+
+ relation=*names++;
+ if(*relation=='=') {
+ rel = 0;
+ } else if(*relation=='<') {
+ rel = -1;
+ } else {
+ rel = 1;
+ }
+
+ name1=*names++;
+ if(name1==NULL) {
+ return;
+ }
+ while((name2=*names++)!=NULL) {
+ result=ucnv_compareNames(name1, name2);
+ if(sign(result)!=rel) {
+ log_err("ucnv_compareNames(\"%s\", \"%s\")=%d, sign!=%d\n", name1, name2, result, rel);
+ }
+ name1=name2;
+ }
+}
+
+static void
+TestCompareNames() {
+ static const char *equalUTF8[]={ "=", "UTF-8", "utf_8", "u*T@f08", "Utf 8", NULL };
+ static const char *equalIBM[]={ "=", "ibm-37", "IBM037", "i-B-m 00037", "ibm-0037", "IBM00037", NULL };
+ static const char *lessMac[]={ "<", "macos-0_1-10.2", "macos-1-10.0.2", "macos-1-10.2", NULL };
+ static const char *lessUTF080[]={ "<", "UTF-0008", "utf$080", "u*T@f0800", "Utf 0000000009", NULL };
+
+ compareNames(equalUTF8);
+ compareNames(equalIBM);
+ compareNames(lessMac);
+ compareNames(lessUTF080);
+}
+
+static void
+TestSubstString() {
+ static const UChar surrogate[1]={ 0xd900 };
+ char buffer[16];
+
+ static const UChar sub[5]={ 0x61, 0x62, 0x63, 0x64, 0x65 };
+ static const char subChars[5]={ 0x61, 0x62, 0x63, 0x64, 0x65 };
+ UConverter *cnv;
+ UErrorCode errorCode;
+ int32_t length;
+ int8_t len8;
+
+ /* UTF-16/32: test that the BOM is output before the sub character */
+ errorCode=U_ZERO_ERROR;
+ cnv=ucnv_open("UTF-16", &errorCode);
+ if(U_FAILURE(errorCode)) {
+ log_data_err("ucnv_open(UTF-16) failed - %s\n", u_errorName(errorCode));
+ return;
+ }
+ length=ucnv_fromUChars(cnv, buffer, (int32_t)sizeof(buffer), surrogate, 1, &errorCode);
+ ucnv_close(cnv);
+ if(U_FAILURE(errorCode) ||
+ length!=4 ||
+ NULL == ucnv_detectUnicodeSignature(buffer, length, NULL, &errorCode)
+ ) {
+ log_err("ucnv_fromUChars(UTF-16, U+D900) did not write a BOM\n");
+ }
+
+ errorCode=U_ZERO_ERROR;
+ cnv=ucnv_open("UTF-32", &errorCode);
+ if(U_FAILURE(errorCode)) {
+ log_data_err("ucnv_open(UTF-32) failed - %s\n", u_errorName(errorCode));
+ return;
+ }
+ length=ucnv_fromUChars(cnv, buffer, (int32_t)sizeof(buffer), surrogate, 1, &errorCode);
+ ucnv_close(cnv);
+ if(U_FAILURE(errorCode) ||
+ length!=8 ||
+ NULL == ucnv_detectUnicodeSignature(buffer, length, NULL, &errorCode)
+ ) {
+ log_err("ucnv_fromUChars(UTF-32, U+D900) did not write a BOM\n");
+ }
+
+ /* Simple API test of ucnv_setSubstString() + ucnv_getSubstChars(). */
+ errorCode=U_ZERO_ERROR;
+ cnv=ucnv_open("ISO-8859-1", &errorCode);
+ if(U_FAILURE(errorCode)) {
+ log_data_err("ucnv_open(ISO-8859-1) failed - %s\n", u_errorName(errorCode));
+ return;
+ }
+ ucnv_setSubstString(cnv, sub, UPRV_LENGTHOF(sub), &errorCode);
+ if(U_FAILURE(errorCode)) {
+ log_err("ucnv_setSubstString(ISO-8859-1, sub[5]) failed - %s\n", u_errorName(errorCode));
+ } else {
+ len8 = sizeof(buffer);
+ ucnv_getSubstChars(cnv, buffer, &len8, &errorCode);
+ /* Stateless converter, we expect the string converted to charset bytes. */
+ if(U_FAILURE(errorCode) || len8!=sizeof(subChars) || 0!=uprv_memcmp(buffer, subChars, len8)) {
+ log_err("ucnv_getSubstChars(ucnv_setSubstString(ISO-8859-1, sub[5])) failed - %s\n", u_errorName(errorCode));
+ }
+ }
+ ucnv_close(cnv);
+
+#if !UCONFIG_NO_LEGACY_CONVERSION
+ errorCode=U_ZERO_ERROR;
+ cnv=ucnv_open("HZ", &errorCode);
+ if(U_FAILURE(errorCode)) {
+ log_data_err("ucnv_open(HZ) failed - %s\n", u_errorName(errorCode));
+ return;
+ }
+ ucnv_setSubstString(cnv, sub, UPRV_LENGTHOF(sub), &errorCode);
+ if(U_FAILURE(errorCode)) {
+ log_err("ucnv_setSubstString(HZ, sub[5]) failed - %s\n", u_errorName(errorCode));
+ } else {
+ len8 = sizeof(buffer);
+ ucnv_getSubstChars(cnv, buffer, &len8, &errorCode);
+ /* Stateful converter, we expect that the Unicode string was set and that we get an empty char * string now. */
+ if(U_FAILURE(errorCode) || len8!=0) {
+ log_err("ucnv_getSubstChars(ucnv_setSubstString(HZ, sub[5])) failed - %s\n", u_errorName(errorCode));
+ }
+ }
+ ucnv_close(cnv);
+#endif
+ /*
+ * Further testing of ucnv_setSubstString() is done via intltest convert.
+ * We do not test edge cases of illegal arguments and similar because the
+ * function implementation uses all of its parameters in calls to other
+ * functions with UErrorCode parameters.
+ */
+}
+
+static void
+InvalidArguments() {
+ UConverter *cnv;
+ UErrorCode errorCode;
+ char charBuffer[2] = {1, 1};
+ char ucharAsCharBuffer[2] = {2, 2};
+ char *charsPtr = charBuffer;
+ UChar *ucharsPtr = (UChar *)ucharAsCharBuffer;
+ UChar *ucharsBadPtr = (UChar *)(ucharAsCharBuffer + 1);
+
+ errorCode=U_ZERO_ERROR;
+ cnv=ucnv_open("UTF-8", &errorCode);
+ if(U_FAILURE(errorCode)) {
+ log_err("ucnv_open() failed - %s\n", u_errorName(errorCode));
+ return;
+ }
+
+ errorCode=U_ZERO_ERROR;
+ /* This one should fail because an incomplete UChar is being passed in */
+ ucnv_fromUnicode(cnv, &charsPtr, charsPtr, (const UChar **)&ucharsPtr, ucharsBadPtr, NULL, TRUE, &errorCode);
+ if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
+ log_err("ucnv_fromUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for incomplete UChar * buffer - %s\n", u_errorName(errorCode));
+ }
+
+ errorCode=U_ZERO_ERROR;
+ /* This one should fail because ucharsBadPtr is > than ucharsPtr */
+ ucnv_fromUnicode(cnv, &charsPtr, charsPtr, (const UChar **)&ucharsBadPtr, ucharsPtr, NULL, TRUE, &errorCode);
+ if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
+ log_err("ucnv_fromUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for bad limit pointer - %s\n", u_errorName(errorCode));
+ }
+
+ errorCode=U_ZERO_ERROR;
+ /* This one should fail because an incomplete UChar is being passed in */
+ ucnv_toUnicode(cnv, &ucharsPtr, ucharsBadPtr, (const char **)&charsPtr, charsPtr, NULL, TRUE, &errorCode);
+ if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
+ log_err("ucnv_toUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for incomplete UChar * buffer - %s\n", u_errorName(errorCode));
+ }
+
+ errorCode=U_ZERO_ERROR;
+ /* This one should fail because ucharsBadPtr is > than ucharsPtr */
+ ucnv_toUnicode(cnv, &ucharsBadPtr, ucharsPtr, (const char **)&charsPtr, charsPtr, NULL, TRUE, &errorCode);
+ if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
+ log_err("ucnv_toUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for bad limit pointer - %s\n", u_errorName(errorCode));
+ }
+
+ if (charBuffer[0] != 1 || charBuffer[1] != 1
+ || ucharAsCharBuffer[0] != 2 || ucharAsCharBuffer[1] != 2)
+ {
+ log_err("Data was incorrectly written to buffers\n");
+ }
+
+ ucnv_close(cnv);
+}
+
+static void TestGetName() {
+ static const char *const names[] = {
+ "Unicode", "UTF-16",
+ "UnicodeBigUnmarked", "UTF-16BE",
+ "UnicodeBig", "UTF-16BE,version=1",
+ "UnicodeLittleUnmarked", "UTF-16LE",
+ "UnicodeLittle", "UTF-16LE,version=1",
+ "x-UTF-16LE-BOM", "UTF-16LE,version=1"
+ };
+ int32_t i;
+ for(i = 0; i < UPRV_LENGTHOF(names); i += 2) {
+ UErrorCode errorCode = U_ZERO_ERROR;
+ UConverter *cnv = ucnv_open(names[i], &errorCode);
+ if(U_SUCCESS(errorCode)) {
+ const char *name = ucnv_getName(cnv, &errorCode);
+ if(U_FAILURE(errorCode) || 0 != strcmp(name, names[i+1])) {
+ log_err("ucnv_getName(%s) = %s != %s -- %s\n",
+ names[i], name, names[i+1], u_errorName(errorCode));
+ }
+ ucnv_close(cnv);
+ }
+ }
+}
+
+static void TestUTFBOM() {
+ static const UChar a16[] = { 0x61 };
+ static const char *const names[] = {
+ "UTF-16",
+ "UTF-16,version=1",
+ "UTF-16BE",
+ "UnicodeBig",
+ "UTF-16LE",
+ "UnicodeLittle"
+ };
+ static const uint8_t expected[][5] = {
+#if U_IS_BIG_ENDIAN
+ { 4, 0xfe, 0xff, 0, 0x61 },
+ { 4, 0xfe, 0xff, 0, 0x61 },
+#else
+ { 4, 0xff, 0xfe, 0x61, 0 },
+ { 4, 0xff, 0xfe, 0x61, 0 },
+#endif
+
+ { 2, 0, 0x61 },
+ { 4, 0xfe, 0xff, 0, 0x61 },
+
+ { 2, 0x61, 0 },
+ { 4, 0xff, 0xfe, 0x61, 0 }
+ };
+
+ char bytes[10];
+ int32_t i;
+
+ for(i = 0; i < UPRV_LENGTHOF(names); ++i) {
+ UErrorCode errorCode = U_ZERO_ERROR;
+ UConverter *cnv = ucnv_open(names[i], &errorCode);
+ int32_t length = 0;
+ const uint8_t *exp = expected[i];
+ if (U_FAILURE(errorCode)) {
+ log_err_status(errorCode, "Unable to open converter: %s got error code: %s\n", names[i], u_errorName(errorCode));
+ continue;
+ }
+ length = ucnv_fromUChars(cnv, bytes, (int32_t)sizeof(bytes), a16, 1, &errorCode);
+
+ if(U_FAILURE(errorCode) || length != exp[0] || 0 != memcmp(bytes, exp+1, length)) {
+ log_err("unexpected %s BOM writing behavior -- %s\n",
+ names[i], u_errorName(errorCode));
+ }
+ ucnv_close(cnv);
+ }
+}