X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/b75a7d8f3b4adbae880cab104ce2c6a50eee4db2..1a147d096ae81f4c8262f7bfc56bd19fc2dee932:/icuSources/test/cintltst/ncnvtst.c diff --git a/icuSources/test/cintltst/ncnvtst.c b/icuSources/test/cintltst/ncnvtst.c index 47d88f83..255020a2 100644 --- a/icuSources/test/cintltst/ncnvtst.c +++ b/icuSources/test/cintltst/ncnvtst.c @@ -1,25 +1,31 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html /******************************************************************** * COPYRIGHT: - * Copyright (c) 1997-2003, International Business Machines Corporation and + * Copyright (c) 1997-2016, International Business Machines Corporation and * others. All Rights Reserved. ********************************************************************/ -/******************************************************************************** +/***************************************************************************** * -* File CCONVTST.C +* File ncnvtst.c * * Modification History: * Name Description * Madhu Katragadda 7/7/2000 Converter Tests for extended code coverage -********************************************************************************* +****************************************************************************** */ #include -#include "cmemory.h" +#include +#include #include "unicode/uloc.h" #include "unicode/ucnv.h" #include "unicode/utypes.h" #include "unicode/ustring.h" #include "unicode/uset.h" +#include "unicode/utf8.h" +#include "unicode/utf16.h" #include "cintltst.h" +#include "cmemory.h" #define MAX_LENGTH 999 @@ -32,7 +38,6 @@ static int32_t gOutBufferSize = 0; static char gNuConvTestName[1024]; #define nct_min(x,y) ((x ibm-920 [UCNV_SBCS] not match.\n"); +#endif /*LATIN_1*/ - if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), + if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), expected, sizeof(expected), "LATIN_1", 0, TRUE, U_ZERO_ERROR )) log_err("u-> LATIN_1 not match.\n"); } + +#if !UCONFIG_NO_LEGACY_CONVERSION log_verbose("Testing for DBCS and MBCS\n"); { UChar sampleText[] = {0x00a1, 0xd801, 0xdc01, 0x00a4}; @@ -149,20 +167,21 @@ static void TestSurrogateBehaviour(){ int32_t offsets[] = {0x00, 0x00, 0x01, 0x01, 0x03, 0x03 }; /*DBCS*/ - if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), + if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), expected, sizeof(expected), "ibm-1363", 0 , TRUE, U_ZERO_ERROR)) log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n"); - if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), + if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), expected, sizeof(expected), "ibm-1363", offsets , TRUE, U_ZERO_ERROR)) log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n"); /*MBCS*/ - if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), + if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), expected, sizeof(expected), "ibm-1363", 0 , TRUE, U_ZERO_ERROR)) log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n"); - if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), + if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), expected, sizeof(expected), "ibm-1363", offsets, TRUE, U_ZERO_ERROR)) log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n"); } + log_verbose("Testing for ISO-2022-jp\n"); { UChar sampleText[] = { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; @@ -174,13 +193,14 @@ static void TestSurrogateBehaviour(){ int32_t offsets[] = {0,0,0,0,0,1,1,2,2,2,2,3,5 }; /*iso-2022-jp*/ - if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), + if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), expected, sizeof(expected), "iso-2022-jp", 0 , TRUE, U_ZERO_ERROR)) log_err("u-> not match.\n"); - if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), + if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), expected, sizeof(expected), "iso-2022-jp", offsets , TRUE, U_ZERO_ERROR)) log_err("u-> not match.\n"); } + log_verbose("Testing for ISO-2022-cn\n"); { static const UChar sampleText[] = { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; @@ -190,7 +210,7 @@ static void TestSurrogateBehaviour(){ 0x36, 0x21, 0x0F, 0x31, 0x1A, - 0x0f, 0x32 + 0x32 }; @@ -200,16 +220,17 @@ static void TestSurrogateBehaviour(){ 1, 1, 2, 2, 3, - 5, 5, }; + 5, }; /*iso-2022-CN*/ - if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), + if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), expected, sizeof(expected), "iso-2022-cn", 0 , TRUE, U_ZERO_ERROR)) log_err("u-> not match.\n"); - if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), + if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), expected, sizeof(expected), "iso-2022-cn", offsets , TRUE, U_ZERO_ERROR)) log_err("u-> not match.\n"); } + log_verbose("Testing for ISO-2022-kr\n"); { static const UChar sampleText[] = { 0x4e00,0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; @@ -232,13 +253,14 @@ static void TestSurrogateBehaviour(){ }; /*iso-2022-kr*/ - if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), + if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), expected, sizeof(expected), "iso-2022-kr", 0 , TRUE, U_ZERO_ERROR)) log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n"); - if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), + if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), expected, sizeof(expected), "iso-2022-kr", offsets , TRUE, U_ZERO_ERROR)) log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n"); } + log_verbose("Testing for HZ\n"); { static const UChar sampleText[] = { 0x4e00, 0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; @@ -259,13 +281,15 @@ static void TestSurrogateBehaviour(){ 7,}; /*hz*/ - if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), + if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), expected, sizeof(expected), "HZ", 0 , TRUE, U_ZERO_ERROR)) - log_err("u-> not match.\n"); - if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), + log_err("u-> HZ not match.\n"); + if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), expected, sizeof(expected), "HZ", offsets , TRUE, U_ZERO_ERROR)) - log_err("u-> not match.\n"); + log_err("u-> HZ not match.\n"); } +#endif + /*UTF-8*/ log_verbose("Testing for UTF8\n"); { @@ -279,36 +303,33 @@ static void TestSurrogateBehaviour(){ static const int32_t fromOffsets[] = { 0x0000, 0x0003, 0x0005, 0x0006, 0x0009, 0x0009, 0x000D }; /*UTF-8*/ - if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), + if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), expected, sizeof(expected), "UTF8", offsets, TRUE, U_ZERO_ERROR )) log_err("u-> UTF8 with offsets and flush true did not match.\n"); - if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), + if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), expected, sizeof(expected), "UTF8", 0, TRUE, U_ZERO_ERROR )) log_err("u-> UTF8 with offsets and flush true did not match.\n"); - if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), + if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), expected, sizeof(expected), "UTF8", offsets, FALSE, U_ZERO_ERROR )) log_err("u-> UTF8 with offsets and flush true did not match.\n"); - if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), + if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), expected, sizeof(expected), "UTF8", 0, FALSE, U_ZERO_ERROR )) log_err("u-> UTF8 with offsets and flush true did not match.\n"); if(!convertToU(expected, sizeof(expected), - sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", 0, TRUE, U_ZERO_ERROR )) - log_err("UTF8 -> did not match.\n"); + sampleText, UPRV_LENGTHOF(sampleText), "UTF8", 0, TRUE, U_ZERO_ERROR )) + log_err("UTF8 -> u did not match.\n"); if(!convertToU(expected, sizeof(expected), - sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", 0, FALSE, U_ZERO_ERROR )) - log_err("UTF8 -> did not match.\n"); + sampleText, UPRV_LENGTHOF(sampleText), "UTF8", 0, FALSE, U_ZERO_ERROR )) + log_err("UTF8 -> u did not match.\n"); if(!convertToU(expected, sizeof(expected), - sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", fromOffsets, TRUE, U_ZERO_ERROR )) - log_err("UTF8 -> did not match.\n"); + sampleText, UPRV_LENGTHOF(sampleText), "UTF8", fromOffsets, TRUE, U_ZERO_ERROR )) + log_err("UTF8 ->u did not match.\n"); if(!convertToU(expected, sizeof(expected), - sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", fromOffsets, FALSE, U_ZERO_ERROR )) - log_err("UTF8 -> did not match.\n"); + sampleText, UPRV_LENGTHOF(sampleText), "UTF8", fromOffsets, FALSE, U_ZERO_ERROR )) + log_err("UTF8 -> u did not match.\n"); } - - - } /*test various error behaviours*/ @@ -317,40 +338,44 @@ static void TestErrorBehaviour(){ { static const UChar sampleText[] = { 0x0031, 0xd801}; static const UChar sampleText2[] = { 0x0031, 0xd801, 0x0032}; - static const uint8_t expected[] = { 0x31}; + static const uint8_t expected0[] = { 0x31}; + static const uint8_t expected[] = { 0x31, 0x1a}; static const uint8_t expected2[] = { 0x31, 0x1a, 0x32}; +#if !UCONFIG_NO_LEGACY_CONVERSION /*SBCS*/ - if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), - expected, sizeof(expected), "ibm-920", 0, TRUE, U_TRUNCATED_CHAR_FOUND)) + if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), + expected, sizeof(expected), "ibm-920", 0, TRUE, U_ZERO_ERROR)) log_err("u-> ibm-920 [UCNV_SBCS] \n"); - if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), - expected, sizeof(expected), "ibm-920", 0, FALSE, U_ZERO_ERROR)) + if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), + expected0, sizeof(expected0), "ibm-920", 0, FALSE, U_ZERO_ERROR)) log_err("u-> ibm-920 [UCNV_SBCS] \n"); - if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), + if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), expected2, sizeof(expected2), "ibm-920", 0, TRUE, U_ZERO_ERROR)) log_err("u-> ibm-920 [UCNV_SBCS] did not match\n"); - +#endif /*LATIN_1*/ - if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), - expected, sizeof(expected), "LATIN_1", 0, TRUE, U_TRUNCATED_CHAR_FOUND)) + if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), + expected, sizeof(expected), "LATIN_1", 0, TRUE, U_ZERO_ERROR)) log_err("u-> LATIN_1 is supposed to fail\n"); - if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), - expected, sizeof(expected), "LATIN_1", 0, FALSE, U_ZERO_ERROR)) + if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), + expected0, sizeof(expected0), "LATIN_1", 0, FALSE, U_ZERO_ERROR)) log_err("u-> LATIN_1 is supposed to fail\n"); - if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), + if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), expected2, sizeof(expected2), "LATIN_1", 0, TRUE, U_ZERO_ERROR)) log_err("u-> LATIN_1 did not match\n"); } - +#if !UCONFIG_NO_LEGACY_CONVERSION log_verbose("Testing for DBCS and MBCS\n"); { static const UChar sampleText[] = { 0x00a1, 0xd801}; static const uint8_t expected[] = { 0xa2, 0xae}; - static const int32_t offsets[] = { 0x00, 0x00, 0x01, 0x01}; + static const int32_t offsets[] = { 0x00, 0x00}; + static const uint8_t expectedSUB[] = { 0xa2, 0xae, 0xa1, 0xe0}; + static const int32_t offsetsSUB[] = { 0x00, 0x00, 0x01, 0x01}; static const UChar sampleText2[] = { 0x00a1, 0xd801, 0x00a4}; static const uint8_t expected2[] = { 0xa2, 0xae, 0xa1, 0xe0, 0xa2, 0xb4}; @@ -360,173 +385,164 @@ static void TestErrorBehaviour(){ static const uint8_t expected3MBCS[] = { 0x01, 0xa2, 0xb4, 0xa1, 0xe0}; static const int32_t offsets3MBCS[] = { 0x00, 0x01, 0x01, 0x02, 0x02}; - static const UChar sampleText4MBCS[] = { 0x0061, 0x00a6, 0xdc01}; + static const UChar sampleText4MBCS[] = { 0x0061, 0xFFE4, 0xdc01}; static const uint8_t expected4MBCS[] = { 0x61, 0x8f, 0xa2, 0xc3, 0xf4, 0xfe}; static const int32_t offsets4MBCS[] = { 0x00, 0x01, 0x01, 0x01, 0x02, 0x02 }; - - - - /*DBCS*/ - if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), - expected, sizeof(expected), "ibm-1363", 0, TRUE, U_TRUNCATED_CHAR_FOUND)) + if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), + expectedSUB, sizeof(expectedSUB), "ibm-1363", 0, TRUE, U_ZERO_ERROR)) log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n"); - if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), - expected, sizeof(expected), "ibm-1363", 0, FALSE, U_ZERO_ERROR)) + if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), + expected, sizeof(expected), "ibm-1363", 0, FALSE, U_AMBIGUOUS_ALIAS_WARNING)) log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n"); - if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), - expected, sizeof(expected), "ibm-1363", offsets, TRUE, U_TRUNCATED_CHAR_FOUND)) + if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), + expectedSUB, sizeof(expectedSUB), "ibm-1363", offsetsSUB, TRUE, U_ZERO_ERROR)) log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n"); - if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), - expected, sizeof(expected), "ibm-1363", offsets, FALSE, U_ZERO_ERROR)) + if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), + expected, sizeof(expected), "ibm-1363", offsets, FALSE, U_AMBIGUOUS_ALIAS_WARNING)) log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n"); - if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), + if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), expected2, sizeof(expected2), "ibm-1363", 0, TRUE, U_ZERO_ERROR)) log_err("u-> ibm-1363 [UCNV_DBCS portion] did not match \n"); - if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), + if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), expected2, sizeof(expected2), "ibm-1363", offsets2, TRUE, U_ZERO_ERROR)) log_err("u-> ibm-1363 [UCNV_DBCS portion] did not match \n"); /*MBCS*/ - if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), - expected, sizeof(expected), "ibm-1363", 0, TRUE, U_TRUNCATED_CHAR_FOUND)) + if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), + expectedSUB, sizeof(expectedSUB), "ibm-1363", 0, TRUE, U_ZERO_ERROR)) log_err("u-> ibm-1363 [UCNV_MBCS] \n"); - if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), - expected, sizeof(expected), "ibm-1363", 0, FALSE, U_ZERO_ERROR)) + if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), + expected, sizeof(expected), "ibm-1363", 0, FALSE, U_AMBIGUOUS_ALIAS_WARNING)) log_err("u-> ibm-1363 [UCNV_MBCS] \n"); - if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), + if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), expected2, sizeof(expected2), "ibm-1363", 0, TRUE, U_ZERO_ERROR)) log_err("u-> ibm-1363 [UCNV_DBCS] did not match\n"); - if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), + if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), expected2, sizeof(expected2), "ibm-1363", 0, FALSE, U_ZERO_ERROR)) log_err("u-> ibm-1363 [UCNV_DBCS] did not match\n"); - if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), + if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), expected2, sizeof(expected2), "ibm-1363", offsets2, FALSE, U_ZERO_ERROR)) log_err("u-> ibm-1363 [UCNV_DBCS] did not match\n"); - if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]), + if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS), expected3MBCS, sizeof(expected3MBCS), "ibm-1363", offsets3MBCS, TRUE, U_ZERO_ERROR)) log_err("u-> ibm-1363 [UCNV_MBCS] \n"); - if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]), + if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS), expected3MBCS, sizeof(expected3MBCS), "ibm-1363", offsets3MBCS, FALSE, U_ZERO_ERROR)) log_err("u-> ibm-1363 [UCNV_MBCS] \n"); - if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]), - expected4MBCS, sizeof(expected4MBCS), "euc-jp", offsets4MBCS, TRUE, U_ZERO_ERROR)) + if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS), + expected4MBCS, sizeof(expected4MBCS), "IBM-eucJP", offsets4MBCS, TRUE, U_ZERO_ERROR)) log_err("u-> euc-jp [UCNV_MBCS] \n"); - if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]), - expected4MBCS, sizeof(expected4MBCS), "euc-jp", offsets4MBCS, FALSE, U_ZERO_ERROR)) + if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS), + expected4MBCS, sizeof(expected4MBCS), "IBM-eucJP", offsets4MBCS, FALSE, U_ZERO_ERROR)) log_err("u-> euc-jp [UCNV_MBCS] \n"); } + /*iso-2022-jp*/ log_verbose("Testing for iso-2022-jp\n"); { static const UChar sampleText[] = { 0x0031, 0xd801}; static const uint8_t expected[] = { 0x31}; - static const int32_t offsets[] = { 0x00}; + static const uint8_t expectedSUB[] = { 0x31, 0x1a}; + static const int32_t offsets[] = { 0x00, 1}; static const UChar sampleText2[] = { 0x0031, 0xd801, 0x0032}; static const uint8_t expected2[] = { 0x31,0x1A,0x32}; static const int32_t offsets2[] = { 0x00,0x01,0x02}; - static const UChar sampleText3MBCS[] = { 0x3000, 0x0050, 0xdc01,0x3001}; - static const uint8_t expected3MBCS[] = { 0x1B, 0x24, 0x42, 0x21, 0x21, 0x1B, 0x28, 0x42, 0x50, 0x1A, 0x1B, 0x24, 0x42, 0x21, 0x22,}; - static const int32_t offsets3MBCS[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x02, 0x03, 0x03, 0x03, 0x03, 0x03,}; - static const UChar sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01}; static const uint8_t expected4MBCS[] = { 0x61, 0x1b, 0x24, 0x42, 0x30, 0x6c,0x1b,0x28,0x42,0x1a}; static const int32_t offsets4MBCS[] = { 0x00, 0x01, 0x01 ,0x01, 0x01, 0x01,0x02,0x02,0x02,0x02 }; - if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), - expected, sizeof(expected), "iso-2022-jp", offsets, TRUE, U_TRUNCATED_CHAR_FOUND)) + if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), + expectedSUB, sizeof(expectedSUB), "iso-2022-jp", offsets, TRUE, U_ZERO_ERROR)) + log_err("u-> iso-2022-jp [UCNV_MBCS] \n"); + if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), + expected, sizeof(expected), "iso-2022-jp", offsets, FALSE, U_AMBIGUOUS_ALIAS_WARNING)) log_err("u-> iso-2022-jp [UCNV_MBCS] \n"); - if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), - expected, sizeof(expected), "iso-2022-jp", offsets, FALSE, U_ZERO_ERROR)) - log_err("u-> ibm-1363 [UCNV_MBCS] \n"); - if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), + if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), expected2, sizeof(expected2), "iso-2022-jp", offsets2, TRUE, U_ZERO_ERROR)) log_err("u->iso-2022-jp[UCNV_DBCS] did not match\n"); - if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), + if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), expected2, sizeof(expected2), "iso-2022-jp", offsets2, FALSE, U_ZERO_ERROR)) log_err("u-> iso-2022-jp [UCNV_DBCS] did not match\n"); - if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), + if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), expected2, sizeof(expected2), "iso-2022-jp", offsets2, FALSE, U_ZERO_ERROR)) log_err("u-> iso-2022-jp [UCNV_DBCS] did not match\n"); - if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]), - expected3MBCS, sizeof(expected3MBCS), "iso-2022-jp", offsets3MBCS, TRUE, U_ZERO_ERROR)) - log_err("u->iso-2022-jp [UCNV_MBCS] \n"); - if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]), - expected3MBCS, sizeof(expected3MBCS), "iso-2022-jp", offsets3MBCS, FALSE, U_ZERO_ERROR)) - log_err("u-> iso-2022-jp[UCNV_MBCS] \n"); - - if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]), + if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS), expected4MBCS, sizeof(expected4MBCS), "iso-2022-jp", offsets4MBCS, TRUE, U_ZERO_ERROR)) log_err("u-> iso-2022-jp [UCNV_MBCS] \n"); - if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]), + if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS), expected4MBCS, sizeof(expected4MBCS), "iso-2022-jp", offsets4MBCS, FALSE, U_ZERO_ERROR)) log_err("u-> iso-2022-jp [UCNV_MBCS] \n"); } + /*iso-2022-cn*/ log_verbose("Testing for iso-2022-cn\n"); { static const UChar sampleText[] = { 0x0031, 0xd801}; - static const uint8_t expected[] = { 0x0f, 0x31}; - static const int32_t offsets[] = { 0x00, 0x00}; + static const uint8_t expected[] = { 0x31}; + static const uint8_t expectedSUB[] = { 0x31, 0x1A}; + static const int32_t offsets[] = { 0x00, 1}; static const UChar sampleText2[] = { 0x0031, 0xd801, 0x0032}; - static const uint8_t expected2[] = { 0x0f, 0x31, 0x1A,0x32}; - static const int32_t offsets2[] = { 0x00, 0x00, 0x01,0x02}; + static const uint8_t expected2[] = { 0x31, 0x1A,0x32}; + static const int32_t offsets2[] = { 0x00, 0x01,0x02}; static const UChar sampleText3MBCS[] = { 0x0051, 0x0050, 0xdc01}; - static const uint8_t expected3MBCS[] = {0x0f, 0x51, 0x50, 0x1A}; - static const int32_t offsets3MBCS[] = { 0x00, 0x00, 0x01, 0x02 }; + static const uint8_t expected3MBCS[] = {0x51, 0x50, 0x1A}; + static const int32_t offsets3MBCS[] = { 0x00, 0x01, 0x02 }; static const UChar sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01}; - static const uint8_t expected4MBCS[] = { 0x0f, 0x61, 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x52, 0x3b, 0x0f, 0x1a }; - static const int32_t offsets4MBCS[] = { 0x00, 0x00, 0x01, 0x01 ,0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02 }; - if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), - expected, sizeof(expected), "iso-2022-cn", offsets, TRUE, U_TRUNCATED_CHAR_FOUND)) + static const uint8_t expected4MBCS[] = { 0x61, 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x52, 0x3b, 0x0f, 0x1a }; + static const int32_t offsets4MBCS[] = { 0x00, 0x01, 0x01 ,0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02 }; + if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), + expectedSUB, sizeof(expectedSUB), "iso-2022-cn", offsets, TRUE, U_ZERO_ERROR)) log_err("u-> iso-2022-cn [UCNV_MBCS] \n"); - if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), + if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), expected, sizeof(expected), "iso-2022-cn", offsets, FALSE, U_ZERO_ERROR)) log_err("u-> ibm-1363 [UCNV_MBCS] \n"); - if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), + if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), expected2, sizeof(expected2), "iso-2022-cn", offsets2, TRUE, U_ZERO_ERROR)) log_err("u->iso-2022-cn[UCNV_DBCS] did not match\n"); - if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), + if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), expected2, sizeof(expected2), "iso-2022-cn", offsets2, FALSE, U_ZERO_ERROR)) log_err("u-> iso-2022-cn [UCNV_DBCS] did not match\n"); - if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), + if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), expected2, sizeof(expected2), "iso-2022-cn", offsets2, FALSE, U_ZERO_ERROR)) log_err("u-> iso-2022-cn [UCNV_DBCS] did not match\n"); - if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]), + if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS), expected3MBCS, sizeof(expected3MBCS), "iso-2022-cn", offsets3MBCS, TRUE, U_ZERO_ERROR)) log_err("u->iso-2022-cn [UCNV_MBCS] \n"); - if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]), + if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS), expected3MBCS, sizeof(expected3MBCS), "iso-2022-cn", offsets3MBCS, FALSE, U_ZERO_ERROR)) log_err("u-> iso-2022-cn[UCNV_MBCS] \n"); - if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]), + if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS), expected4MBCS, sizeof(expected4MBCS), "iso-2022-cn", offsets4MBCS, TRUE, U_ZERO_ERROR)) log_err("u-> iso-2022-cn [UCNV_MBCS] \n"); - if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]), + if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS), expected4MBCS, sizeof(expected4MBCS), "iso-2022-cn", offsets4MBCS, FALSE, U_ZERO_ERROR)) log_err("u-> iso-2022-cn [UCNV_MBCS] \n"); } + /*iso-2022-kr*/ log_verbose("Testing for iso-2022-kr\n"); { static const UChar sampleText[] = { 0x0031, 0xd801}; static const uint8_t expected[] = { 0x1b, 0x24, 0x29, 0x43, 0x31}; - static const int32_t offsets[] = { -1, -1, -1, -1, 0x00}; + static const uint8_t expectedSUB[] = { 0x1b, 0x24, 0x29, 0x43, 0x31, 0x1A}; + static const int32_t offsets[] = { -1, -1, -1, -1, 0x00, 1}; static const UChar sampleText2[] = { 0x0031, 0xd801, 0x0032}; static const uint8_t expected2[] = { 0x1b, 0x24, 0x29, 0x43, 0x31, 0x1A, 0x32}; @@ -536,43 +552,29 @@ static void TestErrorBehaviour(){ static const uint8_t expected3MBCS[] = { 0x1b, 0x24, 0x29, 0x43, 0x51, 0x50, 0x1A }; static const int32_t offsets3MBCS[] = { -1, -1, -1, -1, 0x00, 0x01, 0x02, 0x02 }; - static const UChar sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01,0x4e00}; - static const uint8_t expected4MBCS[] = { 0x1b, 0x24, 0x29, 0x43, - 0x61, - 0x0e, 0x6c, 0x69, - 0x0f, 0x1a, - 0x0e, 0x6c, 0x69,}; - static const int32_t offsets4MBCS[] = { -1, -1, -1, -1, 0x00, 0x01 ,0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x03 }; - if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), - expected, sizeof(expected), "iso-2022-kr", offsets, TRUE, U_TRUNCATED_CHAR_FOUND)) + if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), + expectedSUB, sizeof(expectedSUB), "iso-2022-kr", offsets, TRUE, U_ZERO_ERROR)) log_err("u-> iso-2022-kr [UCNV_MBCS] \n"); - if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), + if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), expected, sizeof(expected), "iso-2022-kr", offsets, FALSE, U_ZERO_ERROR)) log_err("u-> ibm-1363 [UCNV_MBCS] \n"); - if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), + if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), expected2, sizeof(expected2), "iso-2022-kr", offsets2, TRUE, U_ZERO_ERROR)) log_err("u->iso-2022-kr[UCNV_DBCS] did not match\n"); - if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), + if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), expected2, sizeof(expected2), "iso-2022-kr", offsets2, FALSE, U_ZERO_ERROR)) log_err("u-> iso-2022-kr [UCNV_DBCS] did not match\n"); - if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), + if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), expected2, sizeof(expected2), "iso-2022-kr", offsets2, FALSE, U_ZERO_ERROR)) log_err("u-> iso-2022-kr [UCNV_DBCS] did not match\n"); - if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]), + if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS), expected3MBCS, sizeof(expected3MBCS), "iso-2022-kr", offsets3MBCS, TRUE, U_ZERO_ERROR)) log_err("u->iso-2022-kr [UCNV_MBCS] \n"); - if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]), + if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS), expected3MBCS, sizeof(expected3MBCS), "iso-2022-kr", offsets3MBCS, FALSE, U_ZERO_ERROR)) log_err("u-> iso-2022-kr[UCNV_MBCS] \n"); - - if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]), - expected4MBCS, sizeof(expected4MBCS), "iso-2022-kr", offsets4MBCS, TRUE, U_ZERO_ERROR)) - log_err("u-> iso-2022-kr [UCNV_MBCS] \n"); - if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]), - expected4MBCS, sizeof(expected4MBCS), "iso-2022-kr", offsets4MBCS, FALSE, U_ZERO_ERROR)) - log_err("u-> iso-2022-kr [UCNV_MBCS] \n"); } /*HZ*/ @@ -580,7 +582,8 @@ static void TestErrorBehaviour(){ { static const UChar sampleText[] = { 0x0031, 0xd801}; static const uint8_t expected[] = { 0x7e, 0x7d, 0x31}; - static const int32_t offsets[] = { 0x00, 0x00, 0x00}; + static const uint8_t expectedSUB[] = { 0x7e, 0x7d, 0x31, 0x1A}; + static const int32_t offsets[] = { 0x00, 0x00, 0x00, 1}; static const UChar sampleText2[] = { 0x0031, 0xd801, 0x0032}; static const uint8_t expected2[] = { 0x7e, 0x7d, 0x31, 0x1A, 0x32 }; @@ -593,41 +596,41 @@ static void TestErrorBehaviour(){ static const UChar sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01}; static const uint8_t expected4MBCS[] = { 0x7e, 0x7d, 0x61, 0x7e, 0x7b, 0x52, 0x3b, 0x7e, 0x7d, 0x1a }; static const int32_t offsets4MBCS[] = { 0x00, 0x00, 0x00, 0x01, 0x01, 0x01 ,0x01, 0x02, 0x02, 0x02 }; - if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), - expected, sizeof(expected), "HZ", offsets, TRUE, U_TRUNCATED_CHAR_FOUND)) + if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), + expectedSUB, sizeof(expectedSUB), "HZ", offsets, TRUE, U_ZERO_ERROR)) log_err("u-> HZ [UCNV_MBCS] \n"); - if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), + if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), expected, sizeof(expected), "HZ", offsets, FALSE, U_ZERO_ERROR)) log_err("u-> ibm-1363 [UCNV_MBCS] \n"); - if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), + if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), expected2, sizeof(expected2), "HZ", offsets2, TRUE, U_ZERO_ERROR)) log_err("u->HZ[UCNV_DBCS] did not match\n"); - if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), + if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), expected2, sizeof(expected2), "HZ", offsets2, FALSE, U_ZERO_ERROR)) log_err("u-> HZ [UCNV_DBCS] did not match\n"); - if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), + if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), expected2, sizeof(expected2), "HZ", offsets2, FALSE, U_ZERO_ERROR)) log_err("u-> HZ [UCNV_DBCS] did not match\n"); - if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]), + if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS), expected3MBCS, sizeof(expected3MBCS), "HZ", offsets3MBCS, TRUE, U_ZERO_ERROR)) log_err("u->HZ [UCNV_MBCS] \n"); - if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]), + if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS), expected3MBCS, sizeof(expected3MBCS), "HZ", offsets3MBCS, FALSE, U_ZERO_ERROR)) log_err("u-> HZ[UCNV_MBCS] \n"); - if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]), + if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS), expected4MBCS, sizeof(expected4MBCS), "HZ", offsets4MBCS, TRUE, U_ZERO_ERROR)) log_err("u-> HZ [UCNV_MBCS] \n"); - if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]), + if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS), expected4MBCS, sizeof(expected4MBCS), "HZ", offsets4MBCS, FALSE, U_ZERO_ERROR)) log_err("u-> HZ [UCNV_MBCS] \n"); } - - +#endif } +#if !UCONFIG_NO_LEGACY_CONVERSION /*test different convertToUnicode error behaviours*/ static void TestToUnicodeErrorBehaviour() { @@ -636,21 +639,12 @@ static void TestToUnicodeErrorBehaviour() uint8_t sampleText[] = { 0xa2, 0xae, 0x03, 0x04}; const UChar expected[] = { 0x00a1 }; - uint8_t sampleText2[] = { 0xa2, 0xae, 0xa2}; - const UChar expected2[] = { 0x00a1 }; - if(!convertToU(sampleText, sizeof(sampleText), - expected, sizeof(expected)/sizeof(expected[0]), "ibm-1363", 0, TRUE, U_ZERO_ERROR )) + expected, UPRV_LENGTHOF(expected), "ibm-1363", 0, TRUE, U_AMBIGUOUS_ALIAS_WARNING )) log_err("DBCS (ibm-1363)->Unicode did not match.\n"); if(!convertToU(sampleText, sizeof(sampleText), - expected, sizeof(expected)/sizeof(expected[0]), "ibm-1363", 0, FALSE, U_ZERO_ERROR )) + expected, UPRV_LENGTHOF(expected), "ibm-1363", 0, FALSE, U_AMBIGUOUS_ALIAS_WARNING )) log_err("DBCS (ibm-1363)->Unicode with flush = false did not match.\n"); - - if(!convertToU(sampleText2, sizeof(sampleText2), - expected2, sizeof(expected2)/sizeof(expected2[0]), "ibm-1363", 0, TRUE, U_TRUNCATED_CHAR_FOUND )) - log_err("DBCS (ibm-1363)->Unicode with TRUNCATED CHARACTER did not match.\n"); - - } log_verbose("Testing error conditions for SBCS\n"); { @@ -661,64 +655,13 @@ static void TestToUnicodeErrorBehaviour() const UChar expected2[] = { 0x0073 };*/ if(!convertToU(sampleText, sizeof(sampleText), - expected, sizeof(expected)/sizeof(expected[0]), "ibm-1051", 0, TRUE, U_ZERO_ERROR )) + expected, UPRV_LENGTHOF(expected), "ibm-1051", 0, TRUE, U_ZERO_ERROR )) log_err("SBCS (ibm-1051)->Unicode did not match.\n"); if(!convertToU(sampleText, sizeof(sampleText), - expected, sizeof(expected)/sizeof(expected[0]), "ibm-1051", 0, FALSE, U_ZERO_ERROR )) + expected, UPRV_LENGTHOF(expected), "ibm-1051", 0, FALSE, U_ZERO_ERROR )) log_err("SBCS (ibm-1051)->Unicode with flush = false did not match.\n"); } - log_verbose("Testing error conditions for UTF8\n"); - { - const uint8_t sampleText[] = { 0x31, 0xe4, 0xba, 0x8c, 0xe4, 0xb8 }; - UChar expectedUTF8[] = { 0x0031, 0x4e8c}; - int32_t offsets[] = { 0x0000, 0x0001}; - - const uint8_t sampleText2[] = { 0x31, 0xff, 0xe4, 0xba, 0x8c, - 0xe0, 0x80, 0x61}; - UChar expected2UTF8[] = { 0x0031, 0xfffd, 0x4e8c, 0xfffd, 0x0061}; - int32_t offsets2[] = { 0x0000, 0x0001, 0x0002, 0x0005, 0x0007}; - - const uint8_t sampleText3[] = { 0x31, 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, - 0x61}; - UChar expected3UTF8[] = { 0x0031, 0xfffd, 0x0061}; - int32_t offsets3[] = { 0x0000, 0x0001, 0x0006}; - - if(!convertToU(sampleText, sizeof(sampleText), - expectedUTF8, sizeof(expectedUTF8)/sizeof(expectedUTF8[0]), "utf-8", 0, TRUE, U_TRUNCATED_CHAR_FOUND )) - log_err("utf-8->Unicode did not match.\n"); - if(!convertToU(sampleText, sizeof(sampleText), - expectedUTF8, sizeof(expectedUTF8)/sizeof(expectedUTF8[0]), "utf-8", 0, FALSE, U_ZERO_ERROR )) - log_err("utf-8->Unicode did not match.\n"); - if(!convertToU(sampleText, sizeof(sampleText), - expectedUTF8, sizeof(expectedUTF8)/sizeof(expectedUTF8[0]), "utf-8", offsets, TRUE, U_TRUNCATED_CHAR_FOUND )) - log_err("utf-8->Unicode did not match.\n"); - if(!convertToU(sampleText, sizeof(sampleText), - expectedUTF8, sizeof(expectedUTF8)/sizeof(expectedUTF8[0]), "utf-8", offsets, FALSE, U_ZERO_ERROR )) - log_err("utf-8->Unicode did not match.\n"); - - if(!convertToU(sampleText2, sizeof(sampleText2), - expected2UTF8, sizeof(expected2UTF8)/sizeof(expected2UTF8[0]), "utf-8", 0, TRUE, U_ZERO_ERROR )) - log_err("utf-8->Unicode did not match.\n"); - if(!convertToU(sampleText2, sizeof(sampleText2), - expected2UTF8, sizeof(expected2UTF8)/sizeof(expected2UTF8[0]), "utf-8", 0, FALSE, U_ZERO_ERROR )) - log_err("utf-8->Unicode did not match.\n"); - if(!convertToU(sampleText2, sizeof(sampleText2), - expected2UTF8, sizeof(expected2UTF8)/sizeof(expected2UTF8[0]), "utf-8", offsets2, TRUE, U_ZERO_ERROR )) - log_err("utf-8->Unicode did not match.\n"); - if(!convertToU(sampleText2, sizeof(sampleText2), - expected2UTF8, sizeof(expected2UTF8)/sizeof(expected2UTF8[0]), "utf-8", offsets2, FALSE, U_ZERO_ERROR )) - log_err("utf-8->Unicode did not match.\n"); - - if(!convertToU(sampleText3, sizeof(sampleText3), - expected3UTF8, sizeof(expected3UTF8)/sizeof(expected3UTF8[0]), "utf-8", offsets3, TRUE, U_ZERO_ERROR )) - log_err("utf-8->Unicode did not match.\n"); - if(!convertToU(sampleText3, sizeof(sampleText3), - expected3UTF8, sizeof(expected3UTF8)/sizeof(expected3UTF8[0]), "utf-8", offsets3, FALSE, U_ZERO_ERROR )) - log_err("utf-8->Unicode did not match with flush false.\n"); - - } - } static void TestGetNextErrorBehaviour(){ @@ -739,6 +682,7 @@ static void TestGetNextErrorBehaviour(){ } ucnv_close(cnv); } +#endif #define MAX_UTF16_LEN 2 #define MAX_UTF8_LEN 4 @@ -761,8 +705,8 @@ static void TestRegressionUTF8(){ if (currCh == SURROGATE_HIGH_START) { currCh = SURROGATE_LOW_END + 1; /* Skip surrogate range */ } - UTF16_APPEND_CHAR_SAFE(standardForm, offset16, MAX_LENGTH, currCh); - UTF8_APPEND_CHAR_SAFE(utf8, offset8, MAX_LENGTH, currCh); + U16_APPEND_UNSAFE(standardForm, offset16, currCh); + U8_APPEND_UNSAFE(utf8, offset8, currCh); currCh++; } if(!convertFromU(standardForm, offset16, @@ -774,13 +718,48 @@ static void TestRegressionUTF8(){ log_err("UTF8->Unicode did not match.\n"); } } + free(standardForm); free(utf8); + + { + static const char src8[] = { (char)0xCC, (char)0x81, (char)0xCC, (char)0x80 }; + static const UChar expected[] = { 0x0301, 0x0300 }; + UConverter *conv8; + UErrorCode err = U_ZERO_ERROR; + UChar pivotBuffer[100]; + const UChar* const pivEnd = pivotBuffer + 100; + const char* srcBeg; + const char* srcEnd; + UChar* pivBeg; + + conv8 = ucnv_open("UTF-8", &err); + + srcBeg = src8; + pivBeg = pivotBuffer; + srcEnd = src8 + 3; + ucnv_toUnicode(conv8, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, FALSE, &err); + if (srcBeg != srcEnd) { + log_err("Did not consume whole buffer on first call.\n"); + } + + srcEnd = src8 + 4; + ucnv_toUnicode(conv8, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, TRUE, &err); + if (srcBeg != srcEnd) { + log_err("Did not consume whole buffer on second call.\n"); + } + + if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) { + log_err("Did not get expected results for UTF-8.\n"); + } + ucnv_close(conv8); + } } #define MAX_UTF32_LEN 1 static void TestRegressionUTF32(){ +#if !UCONFIG_ONLY_HTML_CONVERSION UChar32 currCh = 0; int32_t offset32; int32_t offset16; @@ -797,8 +776,8 @@ static void TestRegressionUTF32(){ if (currCh == SURROGATE_HIGH_START) { currCh = SURROGATE_LOW_END + 1; /* Skip surrogate range */ } - UTF16_APPEND_CHAR_SAFE(standardForm, offset16, MAX_LENGTH, currCh); - UTF32_APPEND_CHAR_SAFE(utf32, offset32, MAX_LENGTH, currCh); + U16_APPEND_UNSAFE(standardForm, offset16, currCh); + utf32[offset32++] = currCh; currCh++; } if(!convertFromU(standardForm, offset16, @@ -812,6 +791,107 @@ static void TestRegressionUTF32(){ } free(standardForm); free(utf32); + + { + /* Check for lone surrogate error handling. */ + static const UChar sampleBadStartSurrogate[] = { 0x0031, 0xD800, 0x0032 }; + static const UChar sampleBadEndSurrogate[] = { 0x0031, 0xDC00, 0x0032 }; + static const uint8_t expectedUTF32BE[] = { + 0x00, 0x00, 0x00, 0x31, + 0x00, 0x00, 0xff, 0xfd, + 0x00, 0x00, 0x00, 0x32 + }; + static const uint8_t expectedUTF32LE[] = { + 0x31, 0x00, 0x00, 0x00, + 0xfd, 0xff, 0x00, 0x00, + 0x32, 0x00, 0x00, 0x00 + }; + static const int32_t offsetsUTF32[] = { + 0x00, 0x00, 0x00, 0x00, + 0x01, 0x01, 0x01, 0x01, + 0x02, 0x02, 0x02, 0x02 + }; + + if(!convertFromU(sampleBadStartSurrogate, UPRV_LENGTHOF(sampleBadStartSurrogate), + expectedUTF32BE, sizeof(expectedUTF32BE), "UTF-32BE", offsetsUTF32, TRUE, U_ZERO_ERROR)) + log_err("u->UTF-32BE\n"); + if(!convertFromU(sampleBadEndSurrogate, UPRV_LENGTHOF(sampleBadEndSurrogate), + expectedUTF32BE, sizeof(expectedUTF32BE), "UTF-32BE", offsetsUTF32, TRUE, U_ZERO_ERROR)) + log_err("u->UTF-32BE\n"); + + if(!convertFromU(sampleBadStartSurrogate, UPRV_LENGTHOF(sampleBadStartSurrogate), + expectedUTF32LE, sizeof(expectedUTF32LE), "UTF-32LE", offsetsUTF32, TRUE, U_ZERO_ERROR)) + log_err("u->UTF-32LE\n"); + if(!convertFromU(sampleBadEndSurrogate, UPRV_LENGTHOF(sampleBadEndSurrogate), + expectedUTF32LE, sizeof(expectedUTF32LE), "UTF-32LE", offsetsUTF32, TRUE, U_ZERO_ERROR)) + log_err("u->UTF-32LE\n"); + } + + { + static const char srcBE[] = { 0, 0, 0, 0x31, 0, 0, 0, 0x30 }; + static const UChar expected[] = { 0x0031, 0x0030 }; + UConverter *convBE; + UErrorCode err = U_ZERO_ERROR; + UChar pivotBuffer[100]; + const UChar* const pivEnd = pivotBuffer + 100; + const char* srcBeg; + const char* srcEnd; + UChar* pivBeg; + + convBE = ucnv_open("UTF-32BE", &err); + + srcBeg = srcBE; + pivBeg = pivotBuffer; + srcEnd = srcBE + 5; + ucnv_toUnicode(convBE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, FALSE, &err); + if (srcBeg != srcEnd) { + log_err("Did not consume whole buffer on first call.\n"); + } + + srcEnd = srcBE + 8; + ucnv_toUnicode(convBE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, TRUE, &err); + if (srcBeg != srcEnd) { + log_err("Did not consume whole buffer on second call.\n"); + } + + if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) { + log_err("Did not get expected results for UTF-32BE.\n"); + } + ucnv_close(convBE); + } + { + static const char srcLE[] = { 0x31, 0, 0, 0, 0x30, 0, 0, 0 }; + static const UChar expected[] = { 0x0031, 0x0030 }; + UConverter *convLE; + UErrorCode err = U_ZERO_ERROR; + UChar pivotBuffer[100]; + const UChar* const pivEnd = pivotBuffer + 100; + const char* srcBeg; + const char* srcEnd; + UChar* pivBeg; + + convLE = ucnv_open("UTF-32LE", &err); + + srcBeg = srcLE; + pivBeg = pivotBuffer; + srcEnd = srcLE + 5; + ucnv_toUnicode(convLE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, FALSE, &err); + if (srcBeg != srcEnd) { + log_err("Did not consume whole buffer on first call.\n"); + } + + srcEnd = srcLE + 8; + ucnv_toUnicode(convLE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, TRUE, &err); + if (srcBeg != srcEnd) { + log_err("Did not consume whole buffer on second call.\n"); + } + + if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) { + log_err("Did not get expected results for UTF-32LE.\n"); + } + ucnv_close(convLE); + } +#endif } /*Walk through the available converters*/ @@ -856,11 +936,12 @@ static void TestWithBufferSize(int32_t insize, int32_t outsize){ { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d };*/ /*UTF-8*/ - if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), + if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), expectedUTF8, sizeof(expectedUTF8), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE, toUTF8Offs ,FALSE)) log_err("u-> UTF8 did not match.\n"); } +#if !UCONFIG_NO_LEGACY_CONVERSION log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_ESCAPE \n"); { UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 }; @@ -871,25 +952,26 @@ static void TestWithBufferSize(int32_t insize, int32_t outsize){ 0x61 }; int32_t offset[]= {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 4}; - if(!testConvertFromU(inputTest, sizeof(inputTest)/sizeof(inputTest[0]), + if(!testConvertFromU(inputTest, UPRV_LENGTHOF(inputTest), toIBM943, sizeof(toIBM943), "ibm-943", (UConverterFromUCallback)UCNV_FROM_U_CALLBACK_ESCAPE, offset,FALSE)) log_err("u-> ibm-943 with subst with value did not match.\n"); } +#endif log_verbose("Testing fromUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n"); { const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c, 0xe0, 0x80, 0x61}; - UChar expected1[] = { 0x0031, 0x4e8c, 0xfffd, 0x0061}; - int32_t offsets1[] = { 0x0000, 0x0001, 0x0004, 0x0006}; + UChar expected1[] = { 0x0031, 0x4e8c, 0xfffd, 0xfffd, 0x0061}; + int32_t offsets1[] = { 0x0000, 0x0001, 0x0004, 0x0005, 0x0006}; if(!testConvertToU(sampleText1, sizeof(sampleText1), - expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8", UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1,FALSE)) + expected1, UPRV_LENGTHOF(expected1),"utf8", UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1,FALSE)) log_err("utf8->u with substitute did not match.\n");; } - +#if !UCONFIG_NO_LEGACY_CONVERSION log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_ESCAPE \n"); /*to Unicode*/ { @@ -902,12 +984,12 @@ static void TestWithBufferSize(int32_t insize, int32_t outsize){ int32_t fromIBM943Offs [] = { 0, 1, 3, 3, 3, 3, 3, 3, 3, 3, 5}; if(!testConvertToU(sampleTxtToU, sizeof(sampleTxtToU), - IBM_943toUnicode, sizeof(IBM_943toUnicode)/sizeof(IBM_943toUnicode[0]),"ibm-943", + IBM_943toUnicode, UPRV_LENGTHOF(IBM_943toUnicode),"ibm-943", (UConverterToUCallback)UCNV_TO_U_CALLBACK_ESCAPE, fromIBM943Offs,FALSE)) log_err("ibm-943->u with substitute with value did not match.\n"); } - +#endif } static UBool convertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, @@ -915,13 +997,13 @@ static UBool convertFromU( const UChar *source, int sourceLen, const uint8_t *e { int32_t i=0; - uint8_t *p=0; + char *p=0; const UChar *src; - uint8_t buffer[MAX_LENGTH]; + char buffer[MAX_LENGTH]; int32_t offsetBuffer[MAX_LENGTH]; int32_t *offs=0; - uint8_t *targ; - uint8_t *targetLimit; + char *targ; + char *targetLimit; UChar *sourceLimit=0; UErrorCode status = U_ZERO_ERROR; UConverter *conv = 0; @@ -934,7 +1016,7 @@ static UBool convertFromU( const UChar *source, int sourceLen, const uint8_t *e log_verbose("Converter %s opened..\n", ucnv_getName(conv, &status)); for(i=0; i %d chars out]. \nResult :", sourceLen, targ-junkout); - if(VERBOSITY) + if(getTestOption(VERBOSITY_OPTION)) { char junk[999]; char offset_str[999]; - uint8_t *ptr; + char *ptr; junk[0] = 0; offset_str[0] = 0; @@ -1227,9 +1309,9 @@ static UBool testConvertFromU( const UChar *source, int sourceLen, const uint8_ { log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); - printf("\nGot:"); - printSeqErr((const unsigned char*)junkout, targ-junkout); - printf("\nExpected:"); + log_info("\nGot:"); + printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout)); + log_info("\nExpected:"); printSeqErr((const unsigned char*)expect, expectLen); return FALSE; } @@ -1240,7 +1322,7 @@ static UBool testConvertFromU( const UChar *source, int sourceLen, const uint8_ if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){ log_err("did not get the expected offsets. %s", gNuConvTestName); log_err("Got : "); - printSeqErr((const unsigned char*)junkout, targ-junkout); + printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout)); for(p=junkout;p %d chars.\nResult :", sourcelen, targ-junkout); - if(VERBOSITY) + if(getTestOption(VERBOSITY_OPTION)) { char junk[999]; char offset_str[999]; @@ -1415,7 +1497,7 @@ static UBool testConvertToU( const uint8_t *source, int sourcelen, const UChar * for(i=0; i<(targ-junkout); i++) log_err("%X,", junkout[i]); log_err(""); - for(i=0; i<(src-source); i++) + for(i=0; i<(src-(const char *)source); i++) log_err("%X,", (unsigned char)source[i]); } } @@ -1429,9 +1511,9 @@ static UBool testConvertToU( const uint8_t *source, int sourcelen, const UChar * { log_err("String does not match. %s\n", gNuConvTestName); log_verbose("String does not match. %s\n", gNuConvTestName); - printf("\nGot:"); + log_info("\nGot:"); printUSeq(junkout, expectlen); - printf("\nExpected:"); + log_info("\nExpected:"); printUSeq(expect, expectlen); return FALSE; } @@ -1439,37 +1521,7 @@ static UBool testConvertToU( const uint8_t *source, int sourcelen, const UChar * static void TestResetBehaviour(void){ - log_verbose("Testing Reset for SBCS and LATIN_1\n"); - { - static const UChar sampleText[] = {0x0031, 0xd801, 0xdc01, 0x0032}; - static const uint8_t expected[] = {0x31, 0x1a, 0x32}; - static const int32_t offsets[] = { 0,1,3}; - - static const UChar sampleText1[] = {0x0031, 0x0033, 0x0034, 0x0032}; - static const uint8_t expected1[] = {0x31, 0x33,0x34, 0x32}; - static const int32_t offsets1[] = { 0,1,2,3}; - - /*SBCS*/ - if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), - expected, sizeof(expected), "ibm-920", UCNV_FROM_U_CALLBACK_SUBSTITUTE , offsets, TRUE)) - log_err("u-> ibm-920 [UCNV_SBCS] not match.\n"); - if(!testConvertToU(expected1, sizeof(expected1), - sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "ibm-920",UCNV_TO_U_CALLBACK_SUBSTITUTE , - offsets1, TRUE)) - log_err("ibm -920 -> did not match.\n"); - - /*LATIN_1*/ - if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), - expected, sizeof(expected), "LATIN1", UCNV_FROM_U_CALLBACK_SUBSTITUTE , offsets, TRUE)) - log_err("u-> LATIN_1 not match.\n"); - if(!testConvertToU(expected1, sizeof(expected1), - sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "LATIN1",UCNV_TO_U_CALLBACK_SUBSTITUTE , - offsets1, TRUE)) - log_err("LATIN1 -> did not match.\n"); - - - - } +#if !UCONFIG_NO_LEGACY_CONVERSION log_verbose("Testing Reset for DBCS and MBCS\n"); { static const UChar sampleText[] = {0x00a1, 0xd801, 0xdc01, 0x00a4}; @@ -1482,31 +1534,32 @@ static void TestResetBehaviour(void){ static const int32_t offsets1[] = { 0,2,4,6}; /*DBCS*/ - if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), + if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n"); - if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), + if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n"); if(!testConvertToU(expected1, sizeof(expected1), - sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "ibm-1363",UCNV_TO_U_CALLBACK_SUBSTITUTE , + sampleText1, UPRV_LENGTHOF(sampleText1), "ibm-1363",UCNV_TO_U_CALLBACK_SUBSTITUTE , offsets1, TRUE)) log_err("ibm-1363 -> did not match.\n"); /*MBCS*/ - if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), + if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n"); - if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), + if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n"); if(!testConvertToU(expected1, sizeof(expected1), - sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "ibm-1363",UCNV_TO_U_CALLBACK_SUBSTITUTE , + sampleText1, UPRV_LENGTHOF(sampleText1), "ibm-1363",UCNV_TO_U_CALLBACK_SUBSTITUTE , offsets1, TRUE)) log_err("ibm-1363 -> did not match.\n"); } + log_verbose("Testing Reset for ISO-2022-jp\n"); { static const UChar sampleText[] = { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; @@ -1524,19 +1577,20 @@ static void TestResetBehaviour(void){ static const int32_t offsets1[] = { 3,5,10,11,12}; /*iso-2022-jp*/ - if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), + if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), expected, sizeof(expected), "iso-2022-jp", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) log_err("u-> not match.\n"); - if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), + if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), expected, sizeof(expected), "iso-2022-jp", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) log_err("u-> not match.\n"); if(!testConvertToU(expected1, sizeof(expected1), - sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "iso-2022-jp",UCNV_TO_U_CALLBACK_SUBSTITUTE , + sampleText1, UPRV_LENGTHOF(sampleText1), "iso-2022-jp",UCNV_TO_U_CALLBACK_SUBSTITUTE , offsets1, TRUE)) log_err("iso-2022-jp -> did not match.\n"); } + log_verbose("Testing Reset for ISO-2022-cn\n"); { static const UChar sampleText[] = { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; @@ -1546,7 +1600,7 @@ static void TestResetBehaviour(void){ 0x36, 0x21, 0x0f, 0x31, 0x1A, - 0x0f, 0x32 + 0x32 }; @@ -1555,31 +1609,32 @@ static void TestResetBehaviour(void){ 1, 1, 2, 2, 3, - 5, 5, }; + 5, }; UChar sampleText1[] = {0x4e00, 0x04e01, 0x0031,0x001A, 0x0032}; static const uint8_t expected1[] = { 0x1B, 0x24, 0x29, 0x41, 0x0E, 0x52, 0x3B, 0x36, 0x21, - 0x1B, 0x24, 0x29, 0x47, 0x1B, 0x4E, 0x24, 0x22, + 0x1B, 0x24, 0x29, 0x47, 0x24, 0x22, 0x0f, 0x1A, 0x32 }; - static const int32_t offsets1[] = { 5,7,15,18,19}; + static const int32_t offsets1[] = { 5,7,13,16,17}; /*iso-2022-CN*/ - if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), + if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), expected, sizeof(expected), "iso-2022-cn", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) log_err("u-> not match.\n"); - if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), + if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), expected, sizeof(expected), "iso-2022-cn", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) log_err("u-> not match.\n"); if(!testConvertToU(expected1, sizeof(expected1), - sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "iso-2022-cn",UCNV_TO_U_CALLBACK_SUBSTITUTE , + sampleText1, UPRV_LENGTHOF(sampleText1), "iso-2022-cn",UCNV_TO_U_CALLBACK_SUBSTITUTE , offsets1, TRUE)) log_err("iso-2022-cn -> did not match.\n"); } + log_verbose("Testing Reset for ISO-2022-kr\n"); { UChar sampleText[] = { 0x4e00,0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; @@ -1616,17 +1671,18 @@ static void TestResetBehaviour(void){ }; /*iso-2022-kr*/ - if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), + if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), expected, sizeof(expected), "iso-2022-kr", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n"); - if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), + if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), expected, sizeof(expected), "iso-2022-kr", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n"); if(!testConvertToU(expected1, sizeof(expected1), - sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "iso-2022-kr",UCNV_TO_U_CALLBACK_SUBSTITUTE , + sampleText1, UPRV_LENGTHOF(sampleText1), "iso-2022-kr",UCNV_TO_U_CALLBACK_SUBSTITUTE , offsets1, TRUE)) log_err("iso-2022-kr -> did not match.\n"); } + log_verbose("Testing Reset for HZ\n"); { static const UChar sampleText[] = { 0x4e00, 0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; @@ -1659,17 +1715,19 @@ static void TestResetBehaviour(void){ }; /*hz*/ - if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), + if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), expected, sizeof(expected), "HZ", UCNV_FROM_U_CALLBACK_SUBSTITUTE,NULL , TRUE)) log_err("u-> not match.\n"); - if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), + if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), expected, sizeof(expected), "HZ", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) log_err("u-> not match.\n"); if(!testConvertToU(expected1, sizeof(expected1), - sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "hz",UCNV_TO_U_CALLBACK_SUBSTITUTE , + sampleText1, UPRV_LENGTHOF(sampleText1), "hz",UCNV_TO_U_CALLBACK_SUBSTITUTE , offsets1, TRUE)) log_err("hz -> did not match.\n"); } +#endif + /*UTF-8*/ log_verbose("Testing for UTF8\n"); { @@ -1683,29 +1741,29 @@ static void TestResetBehaviour(void){ static const int32_t fromOffsets[] = { 0x0000, 0x0003, 0x0005, 0x0006, 0x0009, 0x0009, 0x000D }; /*UTF-8*/ - if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), + if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), expected, sizeof(expected), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) log_err("u-> UTF8 with offsets and flush true did not match.\n"); - if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), + if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), expected, sizeof(expected), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE,NULL , TRUE)) log_err("u-> UTF8 with offsets and flush true did not match.\n"); - if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), + if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), expected, sizeof(expected), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) log_err("u-> UTF8 with offsets and flush true did not match.\n"); - if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), + if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), expected, sizeof(expected), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE,NULL , TRUE)) log_err("u-> UTF8 with offsets and flush true did not match.\n"); if(!testConvertToU(expected, sizeof(expected), - sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8",UCNV_TO_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) + sampleText, UPRV_LENGTHOF(sampleText), "UTF8",UCNV_TO_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) log_err("UTF8 -> did not match.\n"); if(!testConvertToU(expected, sizeof(expected), - sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", UCNV_TO_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) + sampleText, UPRV_LENGTHOF(sampleText), "UTF8", UCNV_TO_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) log_err("UTF8 -> did not match.\n"); if(!testConvertToU(expected, sizeof(expected), - sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8",UCNV_TO_U_CALLBACK_SUBSTITUTE , fromOffsets, TRUE)) + sampleText, UPRV_LENGTHOF(sampleText), "UTF8",UCNV_TO_U_CALLBACK_SUBSTITUTE , fromOffsets, TRUE)) log_err("UTF8 -> did not match.\n"); if(!testConvertToU(expected, sizeof(expected), - sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", UCNV_TO_U_CALLBACK_SUBSTITUTE , fromOffsets, TRUE)) + sampleText, UPRV_LENGTHOF(sampleText), "UTF8", UCNV_TO_U_CALLBACK_SUBSTITUTE , fromOffsets, TRUE)) log_err("UTF8 -> did not match.\n"); } @@ -1729,11 +1787,18 @@ doTestTruncated(const char *cnvName, const uint8_t *bytes, int32_t length) { log_data_err("error TestTruncated: unable to open \"%s\" - %s\n", cnvName, u_errorName(errorCode)); return; } + ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); + if(U_FAILURE(errorCode)) { + log_data_err("error TestTruncated: unable to set the stop callback on \"%s\" - %s\n", + cnvName, u_errorName(errorCode)); + ucnv_close(cnv); + return; + } source=(const char *)bytes; sourceLimit=source+length; target=buffer; - targetLimit=buffer+LENGTHOF(buffer); + targetLimit=buffer+UPRV_LENGTHOF(buffer); /* 1. input bytes with flush=FALSE, then input nothing with flush=TRUE */ ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, FALSE, &errorCode); @@ -1747,16 +1812,10 @@ doTestTruncated(const char *cnvName, const uint8_t *bytes, int32_t length) { target=buffer; ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &errorCode); if(errorCode!=U_TRUNCATED_CHAR_FOUND || target!=buffer) { - log_err("error TestTruncated(%s, 1b): no input, flush=TRUE: %s (should be U_TRUNCATED_CHAR_FOUND), output %d\n", - cnvName, u_errorName(errorCode), (int)(target-buffer)); + log_err("error TestTruncated(%s, 1b): no input (previously %d), flush=TRUE: %s (should be U_TRUNCATED_CHAR_FOUND), output %d\n", + cnvName, (int)length, u_errorName(errorCode), (int)(target-buffer)); } - /* - * ### TODO: flush=TRUE resets; make sure this is well documented; question - - * does it also delete ucnv_getInvalidChars()? - * resetting logically should delete them, but then it is not possible to figure out which bytes are left in the converter. - */ - /* 2. input bytes with flush=TRUE */ ucnv_resetToUnicode(cnv); @@ -1764,7 +1823,7 @@ doTestTruncated(const char *cnvName, const uint8_t *bytes, int32_t length) { source=(const char *)bytes; target=buffer; ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &errorCode); - if(errorCode!=U_TRUNCATED_CHAR_FOUND || target!=buffer) { + if(errorCode!=U_TRUNCATED_CHAR_FOUND || source!=sourceLimit || target!=buffer) { log_err("error TestTruncated(%s, 2): input bytes[%d], flush=TRUE: %s (should be U_TRUNCATED_CHAR_FOUND), input left %d, output %d\n", cnvName, length, u_errorName(errorCode), (int)(sourceLimit-source), (int)(target-buffer)); } @@ -1799,17 +1858,21 @@ TestTruncated() { { "UTF-32", { 0, 0, 0x4e }, 3 }, { "UTF-32", { 0xff }, 1 }, { "UTF-32", { 0, 0, 0xfe, 0xff, 0 }, 5 }, - { "SCSU", { 0x0e, 0x4e }, 2 }, /* SQU 0x4e */ + +#if !UCONFIG_NO_LEGACY_CONVERSION { "BOCU-1", { 0xd5 }, 1 }, { "Shift-JIS", { 0xe0 }, 1 }, { "ibm-939", { 0x0e, 0x41 }, 2 } /* SO 0x41 */ +#else + { "BOCU-1", { 0xd5 }, 1 ,} +#endif }; int32_t i; - for(i=0; i