X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/b75a7d8f3b4adbae880cab104ce2c6a50eee4db2..fd0068a84e9996f225edba706498f6ed413d0673:/icuSources/test/cintltst/nccbtst.c diff --git a/icuSources/test/cintltst/nccbtst.c b/icuSources/test/cintltst/nccbtst.c index 669e88fd..d426b989 100644 --- a/icuSources/test/cintltst/nccbtst.c +++ b/icuSources/test/cintltst/nccbtst.c @@ -1,15 +1,16 @@ /******************************************************************** * COPYRIGHT: - * Copyright (c) 1997-2003, International Business Machines Corporation and + * Copyright (c) 1997-2006,2008, International Business Machines Corporation and * others. All Rights Reserved. ********************************************************************/ /* +******************************************************************************** * File NCCBTST.C * * Modification History: * Name Description * Madhu Katragadda 7/21/1999 Testing error callback routines -************************************************************************************** +******************************************************************************** */ #include #include @@ -23,6 +24,7 @@ #include "unicode/utypes.h" #include "unicode/ustring.h" #include "nccbtst.h" +#include "unicode/ucnv_cb.h" #define NEW_MAX_BUFFER 999 #define nct_min(x,y) ((x ibm-943 with skip did not match.\n"); - if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), - expskipIBM_930, sizeof(expskipIBM_930), "ibm-930", - UCNV_FROM_U_CALLBACK_SKIP, toIBM930Offsskip , NULL, 0)) - log_err("u-> ibm-930 with skip did not match.\n"); - - if(!testConvertFromUnicodeWithContext(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), - expskipIBM_930, sizeof(expskipIBM_930), "ibm-930", - UCNV_FROM_U_CALLBACK_SKIP, toIBM930Offsskip , NULL, 0,"i", U_ILLEGAL_CHAR_FOUND)) - log_err("u-> ibm-930 with skip did not match.\n"); } { @@ -203,6 +206,7 @@ static void TestSkip(int32_t inputsize, int32_t outputsize) log_err("u->ibm-930 with skip with untaken fallbacks did not match.\n"); } } +#endif { static const UChar usasciiFromU[] = { 0x61, 0x80, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 }; @@ -223,6 +227,7 @@ static void TestSkip(int32_t inputsize, int32_t outputsize) log_err("u->US-ASCII with skip did not match.\n"); } +#if !UCONFIG_NO_LEGACY_CONVERSION /* SBCS NLTC codepage 367 for US-ASCII */ if(!testConvertFromUnicode(usasciiFromU, sizeof(usasciiFromU)/U_SIZEOF_UCHAR, usasciiFromUBytes, sizeof(usasciiFromUBytes), @@ -232,6 +237,7 @@ static void TestSkip(int32_t inputsize, int32_t outputsize) ) { log_err("u->ibm-367 with skip did not match.\n"); } +#endif /* ISO-Latin-1 */ if(!testConvertFromUnicode(latin1FromU, sizeof(latin1FromU)/U_SIZEOF_UCHAR, @@ -243,6 +249,7 @@ static void TestSkip(int32_t inputsize, int32_t outputsize) log_err("u->LATIN_1 with skip did not match.\n"); } +#if !UCONFIG_NO_LEGACY_CONVERSION /* windows-1252 */ if(!testConvertFromUnicode(latin1FromU, sizeof(latin1FromU)/U_SIZEOF_UCHAR, latin1FromUBytes, sizeof(latin1FromUBytes), @@ -283,14 +290,6 @@ static void TestSkip(int32_t inputsize, int32_t outputsize) }; static const int32_t from_iso_2022_jpOffs [] ={0,2}; - static const UChar iso_2022_jp_inputText1[]={0x3000, 0x00E9, 0x3001, }; - static const uint8_t to_iso_2022_jp1[]={ - 0x1b, 0x24, 0x42, 0x21, 0x21, - 0x21, 0x22, - - }; - static const int32_t from_iso_2022_jpOffs1 [] ={0,0,0,0,0,2,2,}; - /*ISO-2022-JP*/ UChar const iso_2022_jp_inputText2[]={0x0041, 0x00E9/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, }; static const uint8_t to_iso_2022_jp2[]={ @@ -303,22 +302,19 @@ static void TestSkip(int32_t inputsize, int32_t outputsize) /*ISO-2022-cn*/ static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, }; static const uint8_t to_iso_2022_cn[]={ - 0x0F, 0x41, - 0x0F, 0x42, + 0x41, 0x42 }; static const int32_t from_iso_2022_cnOffs [] ={ - 0,0, - 2,2, + 0, 2 }; /*ISO-2022-CN*/ static const UChar iso_2022_cn_inputText1[]={0x0041, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, }; static const uint8_t to_iso_2022_cn1[]={ - 0x0F, 0x41, - 0x0F, 0x43, + 0x41, 0x43 }; - static const int32_t from_iso_2022_cnOffs1 [] ={0,0,2,2}; + static const int32_t from_iso_2022_cnOffs1 [] ={ 0, 2 }; /*ISO-2022-kr*/ static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, }; @@ -386,6 +382,7 @@ static void TestSkip(int32_t inputsize, int32_t outputsize) }; +#endif static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, }; @@ -400,6 +397,8 @@ static void TestSkip(int32_t inputsize, int32_t outputsize) 2, }; + +#if !UCONFIG_NO_LEGACY_CONVERSION /* ISCII */ static const UChar iscii_inputText[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, }; static const uint8_t to_iscii[]={ @@ -440,10 +439,6 @@ static void TestSkip(int32_t inputsize, int32_t outputsize) UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs, NULL, 0 )) log_err("u-> iso-2022-jp with skip did not match.\n"); - if(!testConvertFromUnicode(iso_2022_jp_inputText1, sizeof(iso_2022_jp_inputText1)/sizeof(iso_2022_jp_inputText1[0]), - to_iso_2022_jp1, sizeof(to_iso_2022_jp1), "iso-2022-jp", - UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs1, NULL, 0 )) - log_err("u-> iso-2022-jp with skip did not match.\n"); /* with context */ if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText2, sizeof(iso_2022_jp_inputText2)/sizeof(iso_2022_jp_inputText2[0]), to_iso_2022_jp2, sizeof(to_iso_2022_jp2), "iso-2022-jp", @@ -481,7 +476,8 @@ static void TestSkip(int32_t inputsize, int32_t outputsize) if(!testConvertFromUnicodeWithContext(hz_inputText1, sizeof(hz_inputText1)/sizeof(hz_inputText1[0]), to_hz1, sizeof(to_hz1), "hz", UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) - log_err("u-> hz with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n"); + log_err("u-> hz with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n"); +#endif /*SCSU*/ if(!testConvertFromUnicode(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]), @@ -489,6 +485,7 @@ static void TestSkip(int32_t inputsize, int32_t outputsize) UCNV_FROM_U_CALLBACK_SKIP, from_SCSUOffs, NULL, 0 )) log_err("u-> SCSU with skip did not match.\n"); +#if !UCONFIG_NO_LEGACY_CONVERSION /*ISCII*/ if(!testConvertFromUnicode(iscii_inputText, sizeof(iscii_inputText)/sizeof(iscii_inputText[0]), to_iscii, sizeof(to_iscii), "ISCII,version=0", @@ -499,7 +496,7 @@ static void TestSkip(int32_t inputsize, int32_t outputsize) to_iscii1, sizeof(to_iscii1), "ISCII,version=0", UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) log_err("u-> iscii with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n"); - +#endif } log_verbose("Testing fromUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n"); @@ -650,6 +647,7 @@ static void TestSkip(int32_t inputsize, int32_t outputsize) /*to Unicode*/ log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SKIP \n"); +#if !UCONFIG_NO_LEGACY_CONVERSION { static const UChar IBM_949skiptoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xD700 }; @@ -681,6 +679,7 @@ static void TestSkip(int32_t inputsize, int32_t outputsize) UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND )) log_err("ibm-930->u with skip did not match.\n"); } +#endif { static const uint8_t usasciiToUBytes[] = { 0x61, 0x80, 0x31 }; @@ -701,6 +700,7 @@ static void TestSkip(int32_t inputsize, int32_t outputsize) log_err("US-ASCII->u with skip did not match.\n"); } +#if !UCONFIG_NO_LEGACY_CONVERSION /* SBCS NLTC codepage 367 for US-ASCII */ if(!testConvertToUnicode(usasciiToUBytes, sizeof(usasciiToUBytes), usasciiToU, sizeof(usasciiToU)/U_SIZEOF_UCHAR, @@ -710,6 +710,7 @@ static void TestSkip(int32_t inputsize, int32_t outputsize) ) { log_err("ibm-367->u with skip did not match.\n"); } +#endif /* ISO-Latin-1 */ if(!testConvertToUnicode(latin1ToUBytes, sizeof(latin1ToUBytes), @@ -721,6 +722,7 @@ static void TestSkip(int32_t inputsize, int32_t outputsize) log_err("LATIN_1->u with skip did not match.\n"); } +#if !UCONFIG_NO_LEGACY_CONVERSION /* windows-1252 */ if(!testConvertToUnicode(latin1ToUBytes, sizeof(latin1ToUBytes), latin1ToU, sizeof(latin1ToU)/U_SIZEOF_UCHAR, @@ -730,8 +732,10 @@ static void TestSkip(int32_t inputsize, int32_t outputsize) ) { log_err("windows-1252->u with skip did not match.\n"); } +#endif } +#if !UCONFIG_NO_LEGACY_CONVERSION { static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={ 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44 @@ -894,6 +898,8 @@ static void TestSkip(int32_t inputsize, int32_t outputsize) log_err("LMBCS->u with skip did not match.\n"); } +#endif + log_verbose("Testing to Unicode for UTF-8 with UCNV_TO_U_CALLBACK_SKIP \n"); { const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c, @@ -1093,7 +1099,10 @@ static void TestStop(int32_t inputsize, int32_t outputsize) gInBufferSize = inputsize; gOutBufferSize = outputsize; + /*From Unicode*/ + +#if !UCONFIG_NO_LEGACY_CONVERSION if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), expstopIBM_949, sizeof(expstopIBM_949), "ibm-949", UCNV_FROM_U_CALLBACK_STOP, toIBM949Offsstop, NULL, 0 )) @@ -1135,7 +1144,7 @@ static void TestStop(int32_t inputsize, int32_t outputsize) /*ISO-2022-cn*/ static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, }; static const uint8_t to_iso_2022_cn[]={ - 0x0F, 0x41, + 0x41, }; static const int32_t from_iso_2022_cnOffs [] ={ @@ -1225,6 +1234,8 @@ static void TestStop(int32_t inputsize, int32_t outputsize) } +#endif + log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_STOP \n"); { static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, }; @@ -1243,7 +1254,10 @@ static void TestStop(int32_t inputsize, int32_t outputsize) log_err("u-> SCSU with skip did not match.\n"); } + /*to Unicode*/ + +#if !UCONFIG_NO_LEGACY_CONVERSION if(!testConvertToUnicode(expstopIBM_949, sizeof(expstopIBM_949), IBM_949stoptoUnicode, sizeof(IBM_949stoptoUnicode)/sizeof(IBM_949stoptoUnicode[0]),"ibm-949", UCNV_TO_U_CALLBACK_STOP, fromIBM949Offs, NULL, 0 )) @@ -1300,6 +1314,8 @@ static void TestStop(int32_t inputsize, int32_t outputsize) UCNV_TO_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 )) log_err("euc-tw->u with stop did not match.\n"); } +#endif + log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_STOP \n"); { static const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c, @@ -1356,6 +1372,8 @@ static void TestSub(int32_t inputsize, int32_t outputsize) gOutBufferSize = outputsize; /*from unicode*/ + +#if !UCONFIG_NO_LEGACY_CONVERSION if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), expsubIBM_949, sizeof(expsubIBM_949), "ibm-949", UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM949Offssub, NULL, 0 )) @@ -1392,24 +1410,6 @@ static void TestSub(int32_t inputsize, int32_t outputsize) 0x61, 0xe6, 0xca, 0x8a, }; - /*ISO-2022-JP*/ - static const UChar iso_2022_jp_inputText[]={ 0x0041, 0x00E9, 0x0042,0x00E9,0x3000 }; - static const uint8_t to_iso_2022_jp[]={ - 0x41, - 0x1a, - 0x42, - 0x1a, - 0x1b, 0x24, 0x42, 0x21, 0x21, - }; - - static const int32_t from_iso_2022_jpOffs [] ={ - 0, - 1, - 2, - 3, - 4,4,4,4,4 - }; - static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7, 8,}; if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]), @@ -1426,14 +1426,8 @@ static void TestSub(int32_t inputsize, int32_t outputsize) to_euc_tw, sizeof(to_euc_tw), "euc-tw", UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 )) log_err("u-> euc-tw with substitute did not match.\n"); - - if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText, sizeof(iso_2022_jp_inputText)/sizeof(iso_2022_jp_inputText[0]), - to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp", - UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_iso_2022_jpOffs, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND )) - log_err("u-> iso-2022-jp with substitute did not match.\n"); - - } +#endif log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n"); { @@ -1543,6 +1537,8 @@ static void TestSub(int32_t inputsize, int32_t outputsize) } /*to unicode*/ + +#if !UCONFIG_NO_LEGACY_CONVERSION if(!testConvertToUnicode(expsubIBM_949, sizeof(expsubIBM_949), IBM_949subtoUnicode, sizeof(IBM_949subtoUnicode)/sizeof(IBM_949subtoUnicode[0]),"ibm-949", UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM949Offs, NULL, 0 )) @@ -1607,10 +1603,9 @@ static void TestSub(int32_t inputsize, int32_t outputsize) euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"euc-jp", UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 ,"i", U_ILLEGAL_CHAR_FOUND)) log_err("euc-jp->u with substitute did not match.\n"); - - - } +#endif + log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n"); { const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c, @@ -1635,6 +1630,7 @@ static void TestSub(int32_t inputsize, int32_t outputsize) log_err("scsu->u with stop did not match.\n");; } +#if !UCONFIG_NO_LEGACY_CONVERSION log_verbose("Testing ibm-930 subchar/subchar1\n"); { static const UChar u1[]={ 0x6d63, 0x6d64, 0x6d65, 0x6d66, 0xdf }; @@ -1660,13 +1656,6 @@ static void TestSub(int32_t inputsize, int32_t outputsize) log_verbose("Testing GB 18030 with substitute callbacks\n"); { - static const UChar u1[]={ - 0x24, 0x7f, 0x80, 0x1f9, 0x20ac, 0x4e00, 0x9fa6, 0xffff, 0xd800, 0xdc00, 0xdbff, 0xdfff }; - static const uint8_t gb1[]={ - 0x24, 0x7f, 0x81, 0x30, 0x81, 0x30, 0xa8, 0xbf, 0xa2, 0xe3, 0xd2, 0xbb, 0x82, 0x35, 0x8f, 0x33, 0x84, 0x31, 0xa4, 0x39, 0x90, 0x30, 0x81, 0x30, 0xe3, 0x32, 0x9a, 0x35 }; - static const int32_t offsets1[]={ - 0, 1, 2, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 10, 10, 10, 10 }; - static const UChar u2[]={ 0x24, 0x7f, 0x80, 0x1f9, 0x20ac, 0x4e00, 0x9fa6, 0xffff, 0xd800, 0xdc00, 0xfffd, 0xdbff, 0xdfff }; static const uint8_t gb2[]={ @@ -1674,18 +1663,13 @@ static void TestSub(int32_t inputsize, int32_t outputsize) static const int32_t offsets2[]={ 0, 1, 2, 6, 8, 10, 12, 16, 20, 20, 24, 28, 28 }; - if(!testConvertFromUnicode(u1, ARRAY_LENGTH(u1), gb1, ARRAY_LENGTH(gb1), "gb18030", - UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0) - ) { - log_err("u->gb18030 with substitute did not match.\n"); - } - if(!testConvertToUnicode(gb2, ARRAY_LENGTH(gb2), u2, ARRAY_LENGTH(u2), "gb18030", UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0) ) { log_err("gb18030->u with substitute did not match.\n"); } } +#endif log_verbose("Testing UTF-7 toUnicode with substitute callbacks\n"); { @@ -1707,26 +1691,6 @@ static void TestSub(int32_t inputsize, int32_t outputsize) } } - log_verbose("Testing IMAP-mailbox-name toUnicode with substitute callbacks\n"); - { - static const uint8_t bytes[]={ - /* aDEL a&AB~ a&AB\x0c a&AB- a&AB. a&. */ - 0x61, 0x7f, 0x61, 0x26, 0x41, 0x42, 0x7e, 0x61, 0x26, 0x41, 0x42, 0x0c, 0x61, 0x26, 0x41, 0x42, 0x2d, 0x61, 0x26, 0x41, 0x42, 0x2e, 0x61, 0x26, 0x2e - }; - static const UChar unicode[]={ - 0x61, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd - }; - static const int32_t offsets[]={ - 0, 1, 2, 4, 7, 9, 12, 14, 17, 19, 22, 23 - }; - - if(!testConvertToUnicode(bytes, ARRAY_LENGTH(bytes), unicode, ARRAY_LENGTH(unicode), "IMAP-mailbox-name", - UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0) - ) { - log_err("IMAP-mailbox-name->u with substitute did not match.\n"); - } - } - log_verbose("Testing UTF-16 toUnicode with substitute callbacks\n"); { static const uint8_t @@ -1774,7 +1738,7 @@ static void TestSub(int32_t inputsize, int32_t outputsize) static const UChar out1[]={ UTF16_LEAD(0x100f00), UTF16_TRAIL(0x100f00), 0xfeff }, out2[]={ UTF16_LEAD(0x0f1000), UTF16_TRAIL(0x0f1000), 0xfffe }, - out3[]={ 0xfefe, UTF16_LEAD(0x100f00), UTF16_TRAIL(0x100f00), 0xd840, 0xdc01 }, + out3[]={ 0xfefe, UTF16_LEAD(0x100f00), UTF16_TRAIL(0x100f00), 0xfffd, 0xfffd }, out4[]={ UTF16_LEAD(0x10203), UTF16_TRAIL(0x10203), 0xfffd, 0x4e00 }; static const int32_t @@ -1833,6 +1797,8 @@ static void TestSubWithValue(int32_t inputsize, int32_t outputsize) gOutBufferSize = outputsize; /*from Unicode*/ + +#if !UCONFIG_NO_LEGACY_CONVERSION if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), expsubwvalIBM_949, sizeof(expsubwvalIBM_949), "ibm-949", UCNV_FROM_U_CALLBACK_ESCAPE, toIBM949Offs, NULL, 0 )) @@ -1889,22 +1855,6 @@ static void TestSubWithValue(int32_t inputsize, int32_t outputsize) 6, 7, 7, 8, }; /*ISO-2022-JP*/ - static const UChar iso_2022_jp_inputText[]={ 0x0041, 0x00E9, 0x0042,0x00E9,0x3000 }; - static const uint8_t to_iso_2022_jp[]={ - 0x41, - 0x25, 0x55, 0x30, 0x30, 0x45, 0x39, - 0x42, - 0x25, 0x55, 0x30, 0x30, 0x45, 0x39, - 0x1b, 0x24, 0x42, 0x21, 0x21, - }; - - static const int32_t from_iso_2022_jpOffs [] ={ - 0, - 1,1,1,1,1,1, - 2, - 3,3,3,3,3,3, - 4,4,4,4,4 - }; static const UChar iso_2022_jp_inputText1[]={ 0x3000, 0x00E9, 0x3001,0x00E9, 0x0042} ; static const uint8_t to_iso_2022_jp1[]={ 0x1b, 0x24, 0x42, 0x21, 0x21, @@ -1945,48 +1895,15 @@ static void TestSubWithValue(int32_t inputsize, int32_t outputsize) /*ISO-2022-cn*/ static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, }; static const uint8_t to_iso_2022_cn[]={ - 0x0F, 0x41, - 0x0f, 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, + 0x41, + 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, 0x42, }; static const int32_t from_iso_2022_cnOffs [] ={ - 0,0, - 1,1,1,1,1,1,1, + 0, + 1,1,1,1,1,1, 2, }; - static const UChar iso_2022_cn_inputText1[]={ 0x4e00, 0x3712, 0x4e01, }; - static const uint8_t to_iso_2022_cn1[]={ - 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x52, 0x3b, - 0x0f, 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, - 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x36, 0x21, - }; - static const int32_t from_iso_2022_cnOffs1 [] ={ - 0, 0, 0, 0, 0, 0, 0, - 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, - }; - static const UChar iso_2022_cn_inputText3[]={ 0x3000, 0x3712, 0x3001, }; - static const uint8_t to_iso_2022_cn3[]={ - 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, - 0x0f, 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, - 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x22, - }; - static const int32_t from_iso_2022_cnOffs3 [] ={ - 0,0,0,0,0,0,0, - 1,1,1,1,1,1,1, - 2,2,2,2,2,2,2 - }; - static const UChar iso_2022_cn_inputText2[]={ 0x0041, 0x3712, 0x4e00, }; - static const uint8_t to_iso_2022_cn2[]={ - 0x0F, 0x41, - 0x0f, 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, - 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x52, 0x3b, - }; - static const int32_t from_iso_2022_cnOffs2 [] ={ - 0,0, - 1,1,1,1,1,1,1, - 2,2,2,2,2,2,2 - }; static const UChar iso_2022_cn_inputText4[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042}; @@ -1994,7 +1911,7 @@ static void TestSubWithValue(int32_t inputsize, int32_t outputsize) 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, - 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x22, + 0x0e, 0x21, 0x22, 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 0x42, @@ -2003,7 +1920,7 @@ static void TestSubWithValue(int32_t inputsize, int32_t outputsize) 0,0,0,0,0,0,0, 1,1,1,1,1,1,1, 1,1,1,1,1,1, - 3,3,3,3,3,3,3, + 3,3,3, 4,4,4,4,4,4,4, 4,4,4,4,4,4, 6 @@ -2104,30 +2021,6 @@ static void TestSubWithValue(int32_t inputsize, int32_t outputsize) }; /*ISCII*/ - static const UChar iscii_inputText2[]={ 0x0041, 0x0901,0xD84D, 0xDC56/*unassigned*/,0x0902, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 }; - static const uint8_t to_iscii2[]={ - 0x41, - 0xef, 0x42, 0xa1, - 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, - 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, - 0xa2, - 0x42, - 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, - 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, - 0x43 - }; - static const int32_t from_isciiOffs2 [] ={ - 0, - 1,1,1, - 2,2,2,2,2,2, - 2,2,2,2,2,2, - 4, - 5, - 6,6,6,6,6,6, - 6,6,6,6,6,6, - 8, - }; - static const UChar iscii_inputText[]={ 0x0041, 0x0901,0x3712/*unassigned*/,0x0902, 0x0042,0x3712/*unassigned*/,0x43 }; static const uint8_t to_iscii[]={ 0x41, @@ -2165,11 +2058,6 @@ static void TestSubWithValue(int32_t inputsize, int32_t outputsize) UCNV_FROM_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0 )) log_err("u-> euc-tw with subst with value did not match.\n"); - if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inputText)/sizeof(iso_2022_jp_inputText[0]), - to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp", - UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0 )) - log_err("u-> iso_2022_jp with subst with value did not match.\n"); - if(!testConvertFromUnicode(iso_2022_jp_inputText1, sizeof(iso_2022_jp_inputText1)/sizeof(iso_2022_jp_inputText1[0]), to_iso_2022_jp1, sizeof(to_iso_2022_jp1), "iso-2022-jp", UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 )) @@ -2215,57 +2103,27 @@ static void TestSubWithValue(int32_t inputsize, int32_t outputsize) UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs3_v2, NULL, 0,UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR )) log_err("u-> iso-2022-jp with sub & UCNV_ESCAPE_XML_DEC did not match.\n"); } - { - /* surrogate pair*/ - static const UChar iso_2022_jp_inputText4[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0901c } ; - static const uint8_t to_iso_2022_jp4_v3[]={ - 0x1b, 0x24, 0x42, 0x21, 0x21, - 0x1b, 0x28, 0x42, 0x26, 0x23, 0x78, 0x32, 0x33, 0x34, 0x35, 0x36, 0x3b , - - 0x1b, 0x24, 0x42, 0x21, 0x22, - 0x1b, 0x28, 0x42, 0x26, 0x23, 0x78, 0x32, 0x33, 0x34, 0x35, 0x36, 0x3b , - - 0x42, - 0x26, 0x23, 0x78, 0x39, 0x30, 0x31, 0x43, 0x3b, - }; - - static const int32_t from_iso_2022_jpOffs4_v3 [] ={ - 0,0,0,0,0, - 1,1,1,1,1,1,1,1,1,1,1,1, - - 3,3,3,3,3, - 4,4,4,4,4,4,4,4,4,4,4,4, - - 6, - 7,7,7,7,7,7,7,7 - }; - if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText4, sizeof(iso_2022_jp_inputText4)/sizeof(iso_2022_jp_inputText4[0]), - to_iso_2022_jp4_v3, sizeof(to_iso_2022_jp4_v3), "iso-2022-jp", - UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs4_v3, NULL, 0,UCNV_ESCAPE_XML_HEX,U_ZERO_ERROR )) - log_err("u-> iso-2022-jp with sub & UCNV_ESCAPE_XML_HEX did not match.\n"); - - } { static const UChar iso_2022_cn_inputText5[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902}; static const uint8_t to_iso_2022_cn5_v2[]={ 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, 0x0f, 0x5c, 0x75, 0x44, 0x38, 0x34, 0x44, 0x5c, 0x75, 0x44, 0x43, 0x35, 0x36, - 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x22, + 0x0e, 0x21, 0x22, 0x0f, 0x5c, 0x75, 0x44, 0x38, 0x34, 0x44, 0x5c, 0x75, 0x44, 0x43, 0x35, 0x36, 0x42, - 0x0f, 0x5c, 0x75, 0x30, 0x39, 0x30, 0x32, + 0x5c, 0x75, 0x30, 0x39, 0x30, 0x32, }; static const int32_t from_iso_2022_cnOffs5_v2 [] ={ 0,0,0,0,0,0,0, 1,1,1,1,1,1,1, 1,1,1,1,1,1, - 3,3,3,3,3,3,3, + 3,3,3, 4,4,4,4,4,4,4, 4,4,4,4,4,4, 6, - 7,7,7,7,7,7,7 + 7,7,7,7,7,7 }; if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText5, sizeof(iso_2022_cn_inputText5)/sizeof(iso_2022_cn_inputText5[0]), to_iso_2022_cn5_v2, sizeof(to_iso_2022_cn5_v2), "iso-2022-cn", @@ -2278,18 +2136,18 @@ static void TestSubWithValue(int32_t inputsize, int32_t outputsize) static const uint8_t to_iso_2022_cn6_v2[]={ 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, 0x0f, 0x7b, 0x55, 0x2b, 0x32, 0x33, 0x34, 0x35, 0x36, 0x7d, - 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x22, + 0x0e, 0x21, 0x22, 0x0f, 0x7b, 0x55, 0x2b, 0x32, 0x33, 0x34, 0x35, 0x36, 0x7d, 0x42, - 0x0f, 0x7b, 0x55, 0x2b, 0x30, 0x39, 0x30, 0x32, 0x7d + 0x7b, 0x55, 0x2b, 0x30, 0x39, 0x30, 0x32, 0x7d }; static const int32_t from_iso_2022_cnOffs6_v2 [] ={ 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 6, - 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, }; if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText6, sizeof(iso_2022_cn_inputText6)/sizeof(iso_2022_cn_inputText6[0]), to_iso_2022_cn6_v2, sizeof(to_iso_2022_cn6_v2), "iso-2022-cn", @@ -2302,17 +2160,17 @@ static void TestSubWithValue(int32_t inputsize, int32_t outputsize) static const uint8_t to_iso_2022_cn7_v2[]={ 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, - 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x22, + 0x0e, 0x21, 0x22, 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, - 0x42, 0x0f, 0x25, 0x55, 0x30, 0x39, 0x30, 0x32, + 0x42, 0x25, 0x55, 0x30, 0x39, 0x30, 0x32, }; static const int32_t from_iso_2022_cnOffs7_v2 [] ={ 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 6, - 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, }; if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText7, sizeof(iso_2022_cn_inputText7)/sizeof(iso_2022_cn_inputText7[0]), to_iso_2022_cn7_v2, sizeof(to_iso_2022_cn7_v2), "iso-2022-cn", @@ -2324,7 +2182,7 @@ static void TestSubWithValue(int32_t inputsize, int32_t outputsize) static const uint8_t to_iso_2022_cn4_v3[]={ 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, 0x0f, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x32, 0x33, 0x34, 0x35, 0x36, - 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x22, + 0x0e, 0x21, 0x22, 0x0f, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x32, 0x33, 0x34, 0x35, 0x36, 0x42 }; @@ -2334,7 +2192,7 @@ static void TestSubWithValue(int32_t inputsize, int32_t outputsize) 0,0,0,0,0,0,0, 1,1,1,1,1,1,1,1,1,1,1, - 3,3,3,3,3,3,3, + 3,3,3, 4,4,4,4,4,4,4,4,4,4,4, 6 @@ -2352,18 +2210,6 @@ static void TestSubWithValue(int32_t inputsize, int32_t outputsize) UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0 )) log_err("u-> iso_2022_cn with subst with value did not match.\n"); - if(!testConvertFromUnicode(iso_2022_cn_inputText1, sizeof(iso_2022_cn_inputText1)/sizeof(iso_2022_cn_inputText1[0]), - to_iso_2022_cn1, sizeof(to_iso_2022_cn1), "iso-2022-cn", - UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs1, NULL, 0 )) - log_err("u-> iso_2022_cn with subst with value did not match.\n"); - if(!testConvertFromUnicode(iso_2022_cn_inputText2, sizeof(iso_2022_cn_inputText2)/sizeof(iso_2022_cn_inputText2[0]), - to_iso_2022_cn2, sizeof(to_iso_2022_cn2), "iso-2022-cn", - UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs2, NULL, 0 )) - log_err("u-> iso_2022_cn with subst with value did not match.\n"); - if(!testConvertFromUnicode(iso_2022_cn_inputText3, sizeof(iso_2022_cn_inputText3)/sizeof(iso_2022_cn_inputText3[0]), - to_iso_2022_cn3, sizeof(to_iso_2022_cn3), "iso-2022-cn", - UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs3, NULL, 0 )) - log_err("u-> iso_2022_cn with subst with value did not match.\n"); if(!testConvertFromUnicode(iso_2022_cn_inputText4, sizeof(iso_2022_cn_inputText4)/sizeof(iso_2022_cn_inputText4[0]), to_iso_2022_cn4, sizeof(to_iso_2022_cn4), "iso-2022-cn", UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4, NULL, 0 )) @@ -2389,17 +2235,13 @@ static void TestSubWithValue(int32_t inputsize, int32_t outputsize) to_iscii, sizeof(to_iscii), "ISCII,version=0", UCNV_FROM_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0 )) log_err("u-> iscii with subst with value did not match.\n"); - - if(!testConvertFromUnicode(iscii_inputText2, sizeof(iscii_inputText2)/sizeof(iscii_inputText2[0]), - to_iscii2, sizeof(to_iscii2), "ISCII,version=0", - UCNV_FROM_U_CALLBACK_ESCAPE, from_isciiOffs2, NULL, 0 )) - log_err("u-> iscii2 with subst with value did not match.\n"); } - +#endif log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_ESCAPE \n"); /*to Unicode*/ { +#if !UCONFIG_NO_LEGACY_CONVERSION static const uint8_t sampleTxtToU[]= { 0x00, 0x9f, 0xaf, 0x81, 0xad, /*unassigned*/ 0x89, 0xd3 }; @@ -2510,19 +2352,7 @@ static void TestSubWithValue(int32_t inputsize, int32_t outputsize) 0x42,}; static const int32_t from_isciiOffs [] ={0,1,2,2,2,2,3,4,5,5,5,5,6 }; - - - /*LMBCS*/ - static const uint8_t sampleTxtLMBCS[]={ 0x12, 0xc9, 0x50, - 0x12, 0x92, 0xa0, /*unassigned*/ - 0x12, 0x92, 0xa1, - }; - static const UChar LMBCSToUnicode[]={ 0x4e2e, - 0x25, 0x58, 0x31, 0x32, 0x25, 0x58, 0x39, 0x32, 0x25, 0x58, 0x41, 0x30, - 0xe5c4, }; - static const int32_t fromLMBCS[] = {0, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 6, }; +#endif /*UTF8*/ static const uint8_t sampleTxtUTF8[]={ @@ -2561,7 +2391,8 @@ static void TestSubWithValue(int32_t inputsize, int32_t outputsize) 9 }; - + +#if !UCONFIG_NO_LEGACY_CONVERSION if(!testConvertToUnicode(sampleTxtToU, sizeof(sampleTxtToU), IBM_943toUnicode, sizeof(IBM_943toUnicode)/sizeof(IBM_943toUnicode[0]),"ibm-943", UCNV_TO_U_CALLBACK_ESCAPE, fromIBM943Offs, NULL, 0 )) @@ -2644,10 +2475,8 @@ static void TestSubWithValue(int32_t inputsize, int32_t outputsize) isciitoUnicode, sizeof(isciitoUnicode)/sizeof(isciitoUnicode[0]),"ISCII,version=0", UCNV_TO_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0)) log_err("ISCII ->u with substitute with value did not match.\n"); - if(!testConvertToUnicode(sampleTxtLMBCS, sizeof(sampleTxtLMBCS), - LMBCSToUnicode, sizeof(LMBCSToUnicode)/sizeof(LMBCSToUnicode[0]),"LMBCS", - UCNV_TO_U_CALLBACK_ESCAPE, fromLMBCS, NULL, 0)) - log_err("LMBCS->u with substitute with value did not match.\n"); +#endif + if(!testConvertToUnicode(sampleTxtUTF8, sizeof(sampleTxtUTF8), UTF8ToUnicode, sizeof(UTF8ToUnicode)/sizeof(UTF8ToUnicode[0]),"UTF-8", UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8, NULL, 0)) @@ -2659,6 +2488,7 @@ static void TestSubWithValue(int32_t inputsize, int32_t outputsize) } } +#if !UCONFIG_NO_LEGACY_CONVERSION static void TestLegalAndOthers(int32_t inputsize, int32_t outputsize) { static const UChar legalText[] = { 0x0000, 0xAC00, 0xAC01, 0xD700 }; @@ -2667,13 +2497,13 @@ static void TestLegalAndOthers(int32_t inputsize, int32_t outputsize) static const uint8_t text943[] = { - 0x82, 0xa9, 0x82, 0x20, /*0xc8,*/ 0x61, 0x8a, 0xbf, 0x8e, 0x9a }; - static const UChar toUnicode943sub[] = { 0x304b, 0xfffd, /*0xff88,*/ 0x0061, 0x6f22, 0x5b57}; - static const UChar toUnicode943skip[]= { 0x304b, /*0xff88,*/ 0x0061, 0x6f22, 0x5b57}; + 0x82, 0xa9, 0x82, 0x20, 0x61, 0x8a, 0xbf, 0x8e, 0x9a }; + static const UChar toUnicode943sub[] = { 0x304b, 0x1a, 0x20, 0x0061, 0x6f22, 0x5b57 }; + static const UChar toUnicode943skip[]= { 0x304b, 0x20, 0x0061, 0x6f22, 0x5b57 }; static const UChar toUnicode943stop[]= { 0x304b}; - static const int32_t fromIBM943Offssub[] = {0, 2, 4, 5, 7}; - static const int32_t fromIBM943Offsskip[] = { 0, 4, 5, 7}; + static const int32_t fromIBM943Offssub[] = { 0, 2, 3, 4, 5, 7 }; + static const int32_t fromIBM943Offsskip[] = { 0, 3, 4, 5, 7 }; static const int32_t fromIBM943Offsstop[] = { 0}; gInBufferSize = inputsize; @@ -2707,9 +2537,9 @@ static void TestSingleByte(int32_t inputsize, int32_t outputsize) { static const uint8_t sampleText[] = { 0x82, 0xa9, 0x61, 0x62, 0x63 , 0x82, - 0xff, /*0x82, 0xa9,*/ 0x32, 0x33}; - static const UChar toUnicode943sub[] = {0x304b, 0x0061, 0x0062, 0x0063, 0xfffd,/*0x304b,*/ 0x0032, 0x0033}; - static const int32_t fromIBM943Offssub[] = {0, 2, 3, 4, 5, 7, 8}; + 0xff, 0x32, 0x33}; + static const UChar toUnicode943sub[] = { 0x304b, 0x0061, 0x0062, 0x0063, 0x1a, 0x1a, 0x0032, 0x0033 }; + static const int32_t fromIBM943Offssub[] = { 0, 2, 3, 4, 5, 6, 7, 8 }; /*checking illegal value for ibm-943 with substitute*/ gInBufferSize = inputsize; gOutBufferSize = outputsize; @@ -2746,8 +2576,7 @@ static void TestEBCDIC_STATEFUL_Sub(int32_t inputsize, int32_t outputsize) UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930_subvaried, mySubChar, 1 )) log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst(setSubChar=0x3f) did not match.\n"); } - - +#endif UBool testConvertFromUnicode(const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets, @@ -2757,28 +2586,28 @@ UBool testConvertFromUnicode(const UChar *source, int sourceLen, const uint8_t UErrorCode status = U_ZERO_ERROR; UConverter *conv = 0; - uint8_t junkout[NEW_MAX_BUFFER]; /* FIX */ + char junkout[NEW_MAX_BUFFER]; /* FIX */ int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ const UChar *src; - uint8_t *end; - uint8_t *targ; + char *end; + char *targ; int32_t *offs; int i; int32_t realBufferSize; - uint8_t *realBufferEnd; + char *realBufferEnd; const UChar *realSourceEnd; const UChar *sourceLimit; UBool checkOffsets = TRUE; UBool doFlush; char junk[9999]; char offset_str[9999]; - uint8_t *p; + char *p; UConverterFromUCallback oldAction = NULL; const void* oldContext = NULL; for(i=0;i