X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/374ca955a76ecab1204ca8bfa63ff9238d998416..f3c0d7a59d99c2a94c6b8822291f0e42be3773c9:/icuSources/test/cintltst/nucnvtst.c diff --git a/icuSources/test/cintltst/nucnvtst.c b/icuSources/test/cintltst/nucnvtst.c index b67c69d9..3366b669 100644 --- a/icuSources/test/cintltst/nucnvtst.c +++ b/icuSources/test/cintltst/nucnvtst.c @@ -1,34 +1,41 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html /******************************************************************** * COPYRIGHT: - * Copyright (c) 1997-2004, International Business Machines Corporation and + * Copyright (c) 1997-2016, International Business Machines Corporation and * others. All Rights Reserved. ********************************************************************/ -/******************************************************************************** +/******************************************************************************* * -* File CCONVTST.C +* File nucnvtst.c * * Modification History: * Name Description * Steven R. Loomis 7/8/1999 Adding input buffer test -********************************************************************************* +******************************************************************************** */ #include #include "cstring.h" #include "unicode/uloc.h" #include "unicode/ucnv.h" #include "unicode/ucnv_err.h" +#include "unicode/ucnv_cb.h" #include "cintltst.h" #include "unicode/utypes.h" #include "unicode/ustring.h" #include "unicode/ucol.h" +#include "unicode/utf16.h" #include "cmemory.h" +#include "nucnvtst.h" static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message); static void TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message); #if !UCONFIG_NO_COLLATION static void TestJitterbug981(void); #endif +#if !UCONFIG_NO_LEGACY_CONVERSION static void TestJitterbug1293(void); +#endif static void TestNewConvertWithBufferSizes(int32_t osize, int32_t isize) ; static void TestConverterTypesAndStarters(void); static void TestAmbiguous(void); @@ -44,40 +51,70 @@ static void TestUTF32(void); static void TestUTF32BE(void); static void TestUTF32LE(void); static void TestLATIN1(void); + +#if !UCONFIG_NO_LEGACY_CONVERSION static void TestSBCS(void); static void TestDBCS(void); static void TestMBCS(void); +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO +static void TestICCRunout(void); +#endif + #ifdef U_ENABLE_GENERIC_ISO_2022 static void TestISO_2022(void); #endif + static void TestISO_2022_JP(void); static void TestISO_2022_JP_1(void); static void TestISO_2022_JP_2(void); static void TestISO_2022_KR(void); static void TestISO_2022_KR_1(void); static void TestISO_2022_CN(void); +#if 0 + /* + * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 + */ static void TestISO_2022_CN_EXT(void); +#endif static void TestJIS(void); static void TestHZ(void); +#endif + static void TestSCSU(void); + +#if !UCONFIG_NO_LEGACY_CONVERSION static void TestEBCDIC_STATEFUL(void); static void TestGB18030(void); static void TestLMBCS(void); static void TestJitterbug255(void); static void TestEBCDICUS4XML(void); +#if 0 + /* + * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 + */ static void TestJitterbug915(void); +#endif static void TestISCII(void); + +static void TestCoverageMBCS(void); +static void TestJitterbug2346(void); +static void TestJitterbug2411(void); +static void TestJB5275(void); +static void TestJB5275_1(void); +static void TestJitterbug6175(void); + +static void TestIsFixedWidth(void); +#endif + +static void TestInBufSizes(void); + +static void TestRoundTrippingAllUTF(void); static void TestConv(const uint16_t in[], int len, const char* conv, const char* lang, char byteArr[], int byteArrLen); -static void TestRoundTrippingAllUTF(void); -static void TestCoverageMBCS(void); -static void TestJitterbug2346(void); -static void TestJitterbug2411(void); -void addTestNewConvert(TestNode** root); /* open a converter, using test data if it begins with '@' */ static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err); @@ -212,8 +249,10 @@ static void TestOutBufSizes(void) void addTestNewConvert(TestNode** root) { +#if !UCONFIG_NO_FILE_IO addTest(root, &TestInBufSizes, "tsconv/nucnvtst/TestInBufSizes"); addTest(root, &TestOutBufSizes, "tsconv/nucnvtst/TestOutBufSizes"); +#endif addTest(root, &TestConverterTypesAndStarters, "tsconv/nucnvtst/TestConverterTypesAndStarters"); addTest(root, &TestAmbiguous, "tsconv/nucnvtst/TestAmbiguous"); addTest(root, &TestSignatureDetection, "tsconv/nucnvtst/TestSignatureDetection"); @@ -229,15 +268,25 @@ void addTestNewConvert(TestNode** root) addTest(root, &TestUTF32, "tsconv/nucnvtst/TestUTF32"); addTest(root, &TestUTF32BE, "tsconv/nucnvtst/TestUTF32BE"); addTest(root, &TestUTF32LE, "tsconv/nucnvtst/TestUTF32LE"); + +#if !UCONFIG_NO_LEGACY_CONVERSION addTest(root, &TestLMBCS, "tsconv/nucnvtst/TestLMBCS"); +#endif addTest(root, &TestLATIN1, "tsconv/nucnvtst/TestLATIN1"); + +#if !UCONFIG_NO_LEGACY_CONVERSION addTest(root, &TestSBCS, "tsconv/nucnvtst/TestSBCS"); +#if !UCONFIG_NO_FILE_IO addTest(root, &TestDBCS, "tsconv/nucnvtst/TestDBCS"); + addTest(root, &TestICCRunout, "tsconv/nucnvtst/TestICCRunout"); +#endif addTest(root, &TestMBCS, "tsconv/nucnvtst/TestMBCS"); + #ifdef U_ENABLE_GENERIC_ISO_2022 addTest(root, &TestISO_2022, "tsconv/nucnvtst/TestISO_2022"); #endif + addTest(root, &TestISO_2022_JP, "tsconv/nucnvtst/TestISO_2022_JP"); addTest(root, &TestJIS, "tsconv/nucnvtst/TestJIS"); addTest(root, &TestISO_2022_JP_1, "tsconv/nucnvtst/TestISO_2022_JP_1"); @@ -245,24 +294,45 @@ void addTestNewConvert(TestNode** root) addTest(root, &TestISO_2022_KR, "tsconv/nucnvtst/TestISO_2022_KR"); addTest(root, &TestISO_2022_KR_1, "tsconv/nucnvtst/TestISO_2022_KR_1"); addTest(root, &TestISO_2022_CN, "tsconv/nucnvtst/TestISO_2022_CN"); + /* + * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 addTest(root, &TestISO_2022_CN_EXT, "tsconv/nucnvtst/TestISO_2022_CN_EXT"); addTest(root, &TestJitterbug915, "tsconv/nucnvtst/TestJitterbug915"); + */ addTest(root, &TestHZ, "tsconv/nucnvtst/TestHZ"); +#endif + addTest(root, &TestSCSU, "tsconv/nucnvtst/TestSCSU"); + +#if !UCONFIG_NO_LEGACY_CONVERSION addTest(root, &TestEBCDIC_STATEFUL, "tsconv/nucnvtst/TestEBCDIC_STATEFUL"); addTest(root, &TestGB18030, "tsconv/nucnvtst/TestGB18030"); addTest(root, &TestJitterbug255, "tsconv/nucnvtst/TestJitterbug255"); addTest(root, &TestEBCDICUS4XML, "tsconv/nucnvtst/TestEBCDICUS4XML"); addTest(root, &TestISCII, "tsconv/nucnvtst/TestISCII"); + addTest(root, &TestJB5275, "tsconv/nucnvtst/TestJB5275"); + addTest(root, &TestJB5275_1, "tsconv/nucnvtst/TestJB5275_1"); #if !UCONFIG_NO_COLLATION addTest(root, &TestJitterbug981, "tsconv/nucnvtst/TestJitterbug981"); #endif + addTest(root, &TestJitterbug1293, "tsconv/nucnvtst/TestJitterbug1293"); +#endif + + +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO addTest(root, &TestCoverageMBCS, "tsconv/nucnvtst/TestCoverageMBCS"); +#endif + addTest(root, &TestRoundTrippingAllUTF, "tsconv/nucnvtst/TestRoundTrippingAllUTF"); + +#if !UCONFIG_NO_LEGACY_CONVERSION addTest(root, &TestJitterbug2346, "tsconv/nucnvtst/TestJitterbug2346"); addTest(root, &TestJitterbug2411, "tsconv/nucnvtst/TestJitterbug2411"); + addTest(root, &TestJitterbug6175, "tsconv/nucnvtst/TestJitterbug6175"); + addTest(root, &TestIsFixedWidth, "tsconv/nucnvtst/TestIsFixedWidth"); +#endif } @@ -294,23 +364,23 @@ static ETestConvertResult testConvertFromU( const UChar *source, int sourceLen, { UErrorCode status = U_ZERO_ERROR; UConverter *conv = 0; - uint8_t junkout[NEW_MAX_BUFFER]; /* FIX */ + char junkout[NEW_MAX_BUFFER]; /* FIX */ int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ - uint8_t *p; + char *p; const UChar *src; - uint8_t *end; - uint8_t *targ; + char *end; + char *targ; int32_t *offs; int i; int32_t realBufferSize; - uint8_t *realBufferEnd; + char *realBufferEnd; const UChar *realSourceEnd; const UChar *sourceLimit; UBool checkOffsets = TRUE; UBool doFlush; for(i=0;i %d chars out]. \nResult :", sourceLen, targ-junkout); - if(VERBOSITY) + if(getTestOption(VERBOSITY_OPTION)) { char junk[9999]; char offset_str[9999]; - uint8_t *ptr; + char *ptr; junk[0] = 0; offset_str[0] = 0; @@ -402,9 +472,9 @@ static ETestConvertResult testConvertFromU( const UChar *source, int sourceLen, if(expectLen != targ-junkout) { log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); - printf("\nGot:"); - printSeqErr((const unsigned char*)junkout, targ-junkout); - printf("\nExpected:"); + fprintf(stderr, "Got:\n"); + printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout)); + fprintf(stderr, "Expected:\n"); printSeqErr((const unsigned char*)expect, expectLen); return TC_MISMATCH; } @@ -413,7 +483,7 @@ static ETestConvertResult testConvertFromU( const UChar *source, int sourceLen, log_verbose("comparing %d offsets..\n", targ-junkout); if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){ log_err("did not get the expected offsets. %s\n", gNuConvTestName); - printSeqErr((const unsigned char*)junkout, targ-junkout); + printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout)); log_err("\n"); log_err("Got : "); for(p=junkout;p%s\n", gNuConvTestName); printUSeqErr(source, sourceLen); - printf("\nGot:"); + fprintf(stderr, "Got:\n"); printSeqErr((const unsigned char *)junkout, expectLen); - printf("\nExpected:"); + fprintf(stderr, "Expected:\n"); printSeqErr((const unsigned char *)expect, expectLen); return TC_MISMATCH; @@ -453,9 +523,9 @@ static ETestConvertResult testConvertToU( const uint8_t *source, int sourcelen, UConverter *conv = 0; UChar junkout[NEW_MAX_BUFFER]; /* FIX */ int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ - const uint8_t *src; - const uint8_t *realSourceEnd; - const uint8_t *srcLimit; + const char *src; + const char *realSourceEnd; + const char *srcLimit; UChar *p; UChar *targ; UChar *end; @@ -489,11 +559,11 @@ static ETestConvertResult testConvertToU( const uint8_t *source, int sourcelen, } log_verbose("Converter opened..\n"); - src = source; + src = (const char *)source; targ = junkout; offs = junokout; - realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); + realBufferSize = UPRV_LENGTHOF(junkout); realBufferEnd = junkout + realBufferSize; realSourceEnd = src + sourcelen; @@ -519,8 +589,8 @@ static ETestConvertResult testConvertToU( const uint8_t *source, int sourcelen, ucnv_toUnicode (conv, &targ, end, - (const char **)&src, - (const char *)srcLimit, + &src, + srcLimit, checkOffsets ? offs : NULL, (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of hte source data */ &status); @@ -537,7 +607,7 @@ static ETestConvertResult testConvertToU( const uint8_t *source, int sourcelen, log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :", sourcelen, targ-junkout); - if(VERBOSITY) + if(getTestOption(VERBOSITY_OPTION)) { char junk[9999]; char offset_str[9999]; @@ -585,7 +655,7 @@ static ETestConvertResult testConvertToU( const uint8_t *source, int sourcelen, } log_err("\n"); log_err("input: "); - for(i=0; i<(src-source); i++) { + for(i=0; i<(src-(const char *)source); i++) { log_err("%X,", (unsigned char)source[i]); } log_err("\n"); @@ -614,85 +684,87 @@ static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize ) { /** test chars #1 */ /* 1 2 3 1Han 2Han 3Han . */ - UChar sampleText[] = - { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E }; + static const UChar sampleText[] = + { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xD840, 0xDC21 }; + static const UChar sampleTextRoundTripUnmappable[] = + { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xfffd }; - const uint8_t expectedUTF8[] = - { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E }; - int32_t toUTF8Offs[] = - { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07}; - int32_t fmUTF8Offs[] = - { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d }; + static const uint8_t expectedUTF8[] = + { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E, 0xf0, 0xa0, 0x80, 0xa1 }; + static const int32_t toUTF8Offs[] = + { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07, 0x08, 0x08, 0x08, 0x08 }; + static const int32_t fmUTF8Offs[] = + { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d, 0x000e, 0x000e }; #ifdef U_ENABLE_GENERIC_ISO_2022 /* Same as UTF8, but with ^[%B preceeding */ - const uint8_t expectedISO2022[] = + static const const uint8_t expectedISO2022[] = { 0x1b, 0x25, 0x42, 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E }; - int32_t toISO2022Offs[] = + static const int32_t toISO2022Offs[] = { -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07 }; /* right? */ - int32_t fmISO2022Offs[] = + static const int32_t fmISO2022Offs[] = { 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x000a, 0x000d, 0x0010 }; /* is this right? */ #endif /* 1 2 3 0, h1 h2 h3 . EBCDIC_STATEFUL */ - const uint8_t expectedIBM930[] = - { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B }; - int32_t toIBM930Offs[] = - { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, }; - int32_t fmIBM930Offs[] = - { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c}; + static const uint8_t expectedIBM930[] = + { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B, 0x0e, 0xfe, 0xfe, 0x0f }; + static const int32_t toIBM930Offs[] = + { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, -1 }; + static const int32_t fmIBM930Offs[] = + { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c, 0x000e }; /* 1 2 3 0 h1 h2 h3 . MBCS*/ - const uint8_t expectedIBM943[] = - { 0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e }; - int32_t toIBM943Offs [] = - { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07 }; - int32_t fmIBM943Offs[] = - { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a}; + static const uint8_t expectedIBM943[] = + { 0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e, 0xfc, 0xfc }; + static const int32_t toIBM943Offs [] = + { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x08, 0x08 }; + static const int32_t fmIBM943Offs[] = + { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a, 0x000b }; /* 1 2 3 0 h1 h2 h3 . DBCS*/ - const uint8_t expectedIBM9027[] = - { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe}; - int32_t toIBM9027Offs [] = - { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07}; + static const uint8_t expectedIBM9027[] = + { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe, 0xfe, 0xfe }; + static const int32_t toIBM9027Offs [] = + { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08 }; /* 1 2 3 0 . SBCS*/ - const uint8_t expectedIBM920[] = - { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e }; - int32_t toIBM920Offs [] = - { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 }; + static const uint8_t expectedIBM920[] = + { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e, 0x1a }; + static const int32_t toIBM920Offs [] = + { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }; /* 1 2 3 0 . SBCS*/ - const uint8_t expectedISO88593[] = - { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E }; - int32_t toISO88593Offs[] = - { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07}; + static const uint8_t expectedISO88593[] = + { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a }; + static const int32_t toISO88593Offs[] = + { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }; - /* 1 2 3 0 . LATIN_1*/ - const uint8_t expectedLATIN1[] = - { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E }; - int32_t toLATIN1Offs[] = - { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07}; + /* 1 2 3 0 . LATIN_1*/ + static const uint8_t expectedLATIN1[] = + { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a }; + static const int32_t toLATIN1Offs[] = + { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }; /* etc */ - const uint8_t expectedUTF16BE[] = - { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e }; - int32_t toUTF16BEOffs[]= - { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07}; - int32_t fmUTF16BEOffs[] = - { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e }; - - const uint8_t expectedUTF16LE[] = - { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00 }; - int32_t toUTF16LEOffs[]= - { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07}; - int32_t fmUTF16LEOffs[] = - { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e }; - - const uint8_t expectedUTF32BE[] = + static const uint8_t expectedUTF16BE[] = + { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e, 0xd8, 0x40, 0xdc, 0x21 }; + static const int32_t toUTF16BEOffs[]= + { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 }; + static const int32_t fmUTF16BEOffs[] = + { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 }; + + static const uint8_t expectedUTF16LE[] = + { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00, 0x40, 0xd8, 0x21, 0xdc }; + static const int32_t toUTF16LEOffs[]= + { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 }; + static const int32_t fmUTF16LEOffs[] = + { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 }; + + static const uint8_t expectedUTF32BE[] = { 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x33, @@ -700,8 +772,9 @@ static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize ) 0x00, 0x00, 0x4e, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x00, 0x00, 0x4e, 0x09, - 0x00, 0x00, 0x00, 0x2e }; - int32_t toUTF32BEOffs[]= + 0x00, 0x00, 0x00, 0x2e, + 0x00, 0x02, 0x00, 0x21 }; + static const int32_t toUTF32BEOffs[]= { 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02, 0x02, @@ -710,11 +783,12 @@ static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize ) 0x05, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x06, 0x07, 0x07, 0x07, 0x07, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08 }; - int32_t fmUTF32BEOffs[] = - { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c }; + static const int32_t fmUTF32BEOffs[] = + { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 }; - const uint8_t expectedUTF32LE[] = + static const uint8_t expectedUTF32LE[] = { 0x31, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, @@ -722,8 +796,9 @@ static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize ) 0x00, 0x4e, 0x00, 0x00, 0x8c, 0x4e, 0x00, 0x00, 0x09, 0x4e, 0x00, 0x00, - 0x2e, 0x00, 0x00, 0x00 }; - int32_t toUTF32LEOffs[]= + 0x2e, 0x00, 0x00, 0x00, + 0x21, 0x00, 0x02, 0x00 }; + static const int32_t toUTF32LEOffs[]= { 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02, 0x02, @@ -732,9 +807,10 @@ static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize ) 0x05, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x06, 0x07, 0x07, 0x07, 0x07, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08 }; - int32_t fmUTF32LEOffs[] = - { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c }; + static const int32_t fmUTF32LEOffs[] = + { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 }; @@ -742,128 +818,137 @@ static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize ) /** Test chars #2 **/ /* Sahha [health], slashed h's */ - const UChar malteseUChars[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 }; - const uint8_t expectedMaltese913[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 }; + static const UChar malteseUChars[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 }; + static const uint8_t expectedMaltese913[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 }; /* LMBCS */ - const UChar LMBCSUChars[] = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666, 0x0220 }; - const uint8_t expectedLMBCS[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04, 0x14, 0x02, 0x20 }; - int32_t toLMBCSOffs[] = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 , 0x05, 0x05, 0x05 }; - int32_t fmLMBCSOffs[] = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006, 0x0008}; + static const UChar LMBCSUChars[] = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666, 0x0220 }; + static const uint8_t expectedLMBCS[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04, 0x14, 0x02, 0x20 }; + static const int32_t toLMBCSOffs[] = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 , 0x05, 0x05, 0x05 }; + static const int32_t fmLMBCSOffs[] = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006, 0x0008}; /*********************************** START OF CODE finally *************/ - gInBufferSize = insize; - gOutBufferSize = outsize; + gInBufferSize = insize; + gOutBufferSize = outsize; - log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize, gOutBufferSize); + log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize, gOutBufferSize); -#if 1 /*UTF-8*/ - testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), + testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), expectedUTF8, sizeof(expectedUTF8), "UTF8", toUTF8Offs,FALSE ); log_verbose("Test surrogate behaviour for UTF8\n"); { - const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 }; - const uint8_t expectedUTF8test2[]= { 0xe2, 0x82, 0xac, + static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 }; + static const uint8_t expectedUTF8test2[]= { 0xe2, 0x82, 0xac, 0xf0, 0x90, 0x90, 0x81, 0xef, 0xbf, 0xbd }; - int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 }; - testConvertFromU(testinput, sizeof(testinput)/sizeof(testinput[0]), + static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 }; + testConvertFromU(testinput, UPRV_LENGTHOF(testinput), expectedUTF8test2, sizeof(expectedUTF8test2), "UTF8", offsets,FALSE ); } -#ifdef U_ENABLE_GENERIC_ISO_2022 + +#if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022) /*ISO-2022*/ - testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), + testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), expectedISO2022, sizeof(expectedISO2022), "ISO_2022", toISO2022Offs,FALSE ); #endif + /*UTF16 LE*/ - testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), + testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), expectedUTF16LE, sizeof(expectedUTF16LE), "utf-16le", toUTF16LEOffs,FALSE ); /*UTF16 BE*/ - testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), + testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), expectedUTF16BE, sizeof(expectedUTF16BE), "utf-16be", toUTF16BEOffs,FALSE ); /*UTF32 LE*/ - testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), + testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), expectedUTF32LE, sizeof(expectedUTF32LE), "utf-32le", toUTF32LEOffs,FALSE ); /*UTF32 BE*/ - testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), + testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), expectedUTF32BE, sizeof(expectedUTF32BE), "utf-32be", toUTF32BEOffs,FALSE ); + /*LATIN_1*/ - testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), + testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), expectedLATIN1, sizeof(expectedLATIN1), "LATIN_1", toLATIN1Offs,FALSE ); + +#if !UCONFIG_NO_LEGACY_CONVERSION /*EBCDIC_STATEFUL*/ - testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), + testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), expectedIBM930, sizeof(expectedIBM930), "ibm-930", toIBM930Offs,FALSE ); - testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), + testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE ); /*MBCS*/ - testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), + testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), expectedIBM943, sizeof(expectedIBM943), "ibm-943", toIBM943Offs,FALSE ); /*DBCS*/ - testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), + testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), expectedIBM9027, sizeof(expectedIBM9027), "@ibm9027", toIBM9027Offs,FALSE ); /*SBCS*/ - testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), + testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), expectedIBM920, sizeof(expectedIBM920), "ibm-920", toIBM920Offs,FALSE ); /*SBCS*/ - testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), + testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE ); +#endif /****/ -#endif -#if 1 /*UTF-8*/ testConvertToU(expectedUTF8, sizeof(expectedUTF8), - sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf8", fmUTF8Offs,FALSE); -#ifdef U_ENABLE_GENERIC_ISO_2022 + sampleText, UPRV_LENGTHOF(sampleText), "utf8", fmUTF8Offs,FALSE); +#if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022) /*ISO-2022*/ testConvertToU(expectedISO2022, sizeof(expectedISO2022), - sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ISO_2022", fmISO2022Offs,FALSE); + sampleText, UPRV_LENGTHOF(sampleText), "ISO_2022", fmISO2022Offs,FALSE); #endif + /*UTF16 LE*/ testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE), - sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE); + sampleText, UPRV_LENGTHOF(sampleText), "utf-16le", fmUTF16LEOffs,FALSE); /*UTF16 BE*/ testConvertToU(expectedUTF16BE, sizeof(expectedUTF16BE), - sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16be", fmUTF16BEOffs,FALSE); + sampleText, UPRV_LENGTHOF(sampleText), "utf-16be", fmUTF16BEOffs,FALSE); /*UTF32 LE*/ testConvertToU(expectedUTF32LE, sizeof(expectedUTF32LE), - sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32le", fmUTF32LEOffs,FALSE); + sampleText, UPRV_LENGTHOF(sampleText), "utf-32le", fmUTF32LEOffs,FALSE); /*UTF32 BE*/ testConvertToU(expectedUTF32BE, sizeof(expectedUTF32BE), - sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32be", fmUTF32BEOffs,FALSE); + sampleText, UPRV_LENGTHOF(sampleText), "utf-32be", fmUTF32BEOffs,FALSE); + +#if !UCONFIG_NO_LEGACY_CONVERSION /*EBCDIC_STATEFUL*/ - testConvertToU(expectedIBM930, sizeof(expectedIBM930), - sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ibm-930", fmIBM930Offs,FALSE); + testConvertToU(expectedIBM930, sizeof(expectedIBM930), sampleTextRoundTripUnmappable, + UPRV_LENGTHOF(sampleTextRoundTripUnmappable), "ibm-930", fmIBM930Offs,FALSE); /*MBCS*/ - testConvertToU(expectedIBM943, sizeof(expectedIBM943), - sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ibm-943", fmIBM943Offs,FALSE); + testConvertToU(expectedIBM943, sizeof(expectedIBM943),sampleTextRoundTripUnmappable, + UPRV_LENGTHOF(sampleTextRoundTripUnmappable), "ibm-943", fmIBM943Offs,FALSE); +#endif /* Try it again to make sure it still works */ testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE), - sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE); + sampleText, UPRV_LENGTHOF(sampleText), "utf-16le", fmUTF16LEOffs,FALSE); +#if !UCONFIG_NO_LEGACY_CONVERSION testConvertToU(expectedMaltese913, sizeof(expectedMaltese913), - malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]), "latin3", NULL,FALSE); + malteseUChars, UPRV_LENGTHOF(malteseUChars), "latin3", NULL,FALSE); - testConvertFromU(malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]), + testConvertFromU(malteseUChars, UPRV_LENGTHOF(malteseUChars), expectedMaltese913, sizeof(expectedMaltese913), "iso-8859-3", NULL,FALSE ); /*LMBCS*/ - testConvertFromU(LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]), + testConvertFromU(LMBCSUChars, UPRV_LENGTHOF(LMBCSUChars), expectedLMBCS, sizeof(expectedLMBCS), "LMBCS-1", toLMBCSOffs,FALSE ); testConvertToU(expectedLMBCS, sizeof(expectedLMBCS), - LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]), "LMBCS-1", fmLMBCSOffs,FALSE); + LMBCSUChars, UPRV_LENGTHOF(LMBCSUChars), "LMBCS-1", fmLMBCSOffs,FALSE); +#endif /* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */ { @@ -873,12 +958,12 @@ static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize ) Hi Mom -+Jjo--! A+ImIDkQ. +- - +ZeVnLIqe + +ZeVnLIqe- */ 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21, 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e, 0x2b, 0x2d, - 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65 + 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d }; static const UChar unicode[] = { /* @@ -902,7 +987,7 @@ static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize ) 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 11, 12, 12, 12, 13, 13, 13, 13, 14, 15, 15, - 16, 16, 16, 17, 17, 17, 18, 18, 18 + 16, 16, 16, 17, 17, 17, 18, 18, 18, 18 }; /* same but escaping set O (the exclamation mark) */ @@ -911,12 +996,12 @@ static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize ) Hi Mom -+Jjo--+ACE- A+ImIDkQ. +- - +ZeVnLIqe + +ZeVnLIqe- */ 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d, 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e, 0x2b, 0x2d, - 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65 + 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d }; static const int32_t toUnicodeOffsetsR[] = { 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15, @@ -928,16 +1013,16 @@ static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize ) 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10, 11, 12, 12, 12, 13, 13, 13, 13, 14, 15, 15, - 16, 16, 16, 17, 17, 17, 18, 18, 18 + 16, 16, 16, 17, 17, 17, 18, 18, 18, 18 }; - testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7, sizeof(utf7), "UTF-7", fromUnicodeOffsets,FALSE); + testConvertFromU(unicode, UPRV_LENGTHOF(unicode), utf7, sizeof(utf7), "UTF-7", fromUnicodeOffsets,FALSE); - testConvertToU(utf7, sizeof(utf7), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7", toUnicodeOffsets,FALSE); + testConvertToU(utf7, sizeof(utf7), unicode, UPRV_LENGTHOF(unicode), "UTF-7", toUnicodeOffsets,FALSE); - testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7Restricted, sizeof(utf7Restricted), "UTF-7,version=1", fromUnicodeOffsetsR,FALSE); + testConvertFromU(unicode, UPRV_LENGTHOF(unicode), utf7Restricted, sizeof(utf7Restricted), "UTF-7,version=1", fromUnicodeOffsetsR,FALSE); - testConvertToU(utf7Restricted, sizeof(utf7Restricted), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7,version=1", toUnicodeOffsetsR,FALSE); + testConvertToU(utf7Restricted, sizeof(utf7Restricted), unicode, UPRV_LENGTHOF(unicode), "UTF-7,version=1", toUnicodeOffsetsR,FALSE); } /* @@ -1011,9 +1096,9 @@ static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize ) 35, 36, 36, 36, 37, 37, 37, 37, 37 }; - testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, imap, sizeof(imap), "IMAP-mailbox-name", fromUnicodeOffsets,FALSE); + testConvertFromU(unicode, UPRV_LENGTHOF(unicode), imap, sizeof(imap), "IMAP-mailbox-name", fromUnicodeOffsets,FALSE); - testConvertToU(imap, sizeof(imap), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "IMAP-mailbox-name", toUnicodeOffsets,FALSE); + testConvertToU(imap, sizeof(imap), unicode, UPRV_LENGTHOF(unicode), "IMAP-mailbox-name", toUnicodeOffsets,FALSE); } /* Test UTF-8 bad data handling*/ @@ -1050,7 +1135,7 @@ static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize ) 0, 1, 5, 6, 7, 12, 17, 17, 21, 23, 24, 28 }; testConvertToU(utf8, sizeof(utf8), - utf8Expected, sizeof(utf8Expected)/sizeof(utf8Expected[0]), "utf-8", utf8Offsets ,FALSE); + utf8Expected, UPRV_LENGTHOF(utf8Expected), "utf-8", utf8Offsets ,FALSE); } @@ -1066,7 +1151,6 @@ static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize ) 0x00, 0x00, 0x01, 0x62, 0x00, 0x00, 0x02, 0x62 }; - static const uint16_t utf32Expected[]={ 0x0061, 0xfffd, /* 0x110000 out of range */ @@ -1078,13 +1162,34 @@ static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize ) 0x0162, 0x0262 }; - static const int32_t utf32Offsets[]={ 0, 4, 8, 8, 12, 16, 20, 24, 28 }; - testConvertToU(utf32, sizeof(utf32), - utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32be", utf32Offsets ,FALSE); + static const uint8_t utf32ExpectedBack[]={ + 0x00, 0x00, 0x00, 0x61, + 0x00, 0x00, 0xff, 0xfd, /* 0x110000 out of range */ + 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */ + 0x00, 0x00, 0x00, 0x62, + 0x00, 0x00, 0xff, 0xfd, /* 0xffffffff out of range */ + 0x00, 0x00, 0xff, 0xfd, /* 0x7fffffff out of range */ + 0x00, 0x00, 0x01, 0x62, + 0x00, 0x00, 0x02, 0x62 + }; + static const int32_t utf32OffsetsBack[]={ + 0,0,0,0, + 1,1,1,1, + 2,2,2,2, + 4,4,4,4, + 5,5,5,5, + 6,6,6,6, + 7,7,7,7, + 8,8,8,8 + }; + testConvertToU(utf32, sizeof(utf32), + utf32Expected, UPRV_LENGTHOF(utf32Expected), "utf-32be", utf32Offsets ,FALSE); + testConvertFromU(utf32Expected, UPRV_LENGTHOF(utf32Expected), + utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32be", utf32OffsetsBack, FALSE); } /* Test UTF-32LE bad data handling*/ @@ -1111,13 +1216,33 @@ static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize ) 0x0162, 0x0262 }; - static const int32_t utf32Offsets[]={ 0, 4, 8, 8, 12, 16, 20, 24, 28 }; + static const uint8_t utf32ExpectedBack[]={ + 0x61, 0x00, 0x00, 0x00, + 0xfd, 0xff, 0x00, 0x00, /* 0x110000 out of range */ + 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */ + 0x62, 0x00, 0x00, 0x00, + 0xfd, 0xff, 0x00, 0x00, /* 0xffffffff out of range */ + 0xfd, 0xff, 0x00, 0x00, /* 0x7fffffff out of range */ + 0x62, 0x01, 0x00, 0x00, + 0x62, 0x02, 0x00, 0x00 + }; + static const int32_t utf32OffsetsBack[]={ + 0,0,0,0, + 1,1,1,1, + 2,2,2,2, + 4,4,4,4, + 5,5,5,5, + 6,6,6,6, + 7,7,7,7, + 8,8,8,8 + }; testConvertToU(utf32, sizeof(utf32), - utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32le", utf32Offsets,FALSE ); - + utf32Expected, UPRV_LENGTHOF(utf32Expected), "utf-32le", utf32Offsets,FALSE ); + testConvertFromU(utf32Expected, UPRV_LENGTHOF(utf32Expected), + utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32le", utf32OffsetsBack, FALSE); } } @@ -1153,7 +1278,7 @@ static void TestCoverageMBCS(){ int32_t totest1Offs[] = { 0, 1, 2, 3, 5, }; /*from Unicode*/ - testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]), + testConvertFromU(unicodeInput, UPRV_LENGTHOF(unicodeInput), expectedtest1, sizeof(expectedtest1), "@test1", totest1Offs,FALSE ); } @@ -1171,12 +1296,12 @@ static void TestCoverageMBCS(){ int32_t fromtest3Offs[] = { 0, 1, 2, 3, 6, 6, 7, 7, 10 }; /*from Unicode*/ - testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]), + testConvertFromU(unicodeInput, UPRV_LENGTHOF(unicodeInput), expectedtest3, sizeof(expectedtest3), "@test3", totest3Offs,FALSE ); /*to Unicode*/ testConvertToU(test3input, sizeof(test3input), - expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test3", fromtest3Offs ,FALSE); + expectedUnicode, UPRV_LENGTHOF(expectedUnicode), "@test3", fromtest3Offs ,FALSE); } @@ -1194,12 +1319,12 @@ static void TestCoverageMBCS(){ static const int32_t fromtest4Offs[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,}; /*from Unicode*/ - testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]), + testConvertFromU(unicodeInput, UPRV_LENGTHOF(unicodeInput), expectedtest4, sizeof(expectedtest4), "@test4", totest4Offs,FALSE ); /*to Unicode*/ testConvertToU(test4input, sizeof(test4input), - expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test4", fromtest4Offs,FALSE ); + expectedUnicode, UPRV_LENGTHOF(expectedUnicode), "@test4", fromtest4Offs,FALSE ); } #if 0 @@ -1237,6 +1362,7 @@ static void TestConverterType(const char *convName, UConverterType convType) { static void TestConverterTypesAndStarters() { +#if !UCONFIG_NO_LEGACY_CONVERSION UConverter* myConverter; UErrorCode err = U_ZERO_ERROR; UBool mystarters[256]; @@ -1297,19 +1423,33 @@ static void TestConverterTypesAndStarters() TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL); TestConverterType("ibm-878", UCNV_SBCS); +#endif + TestConverterType("iso-8859-1", UCNV_LATIN_1); + TestConverterType("ibm-1208", UCNV_UTF8); + TestConverterType("utf-8", UCNV_UTF8); TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian); TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian); TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian); TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian); -#ifdef U_ENABLE_GENERIC_ISO_2022 + +#if !UCONFIG_NO_LEGACY_CONVERSION + +#if defined(U_ENABLE_GENERIC_ISO_2022) TestConverterType("iso-2022", UCNV_ISO_2022); #endif + TestConverterType("hz", UCNV_HZ); +#endif + TestConverterType("scsu", UCNV_SCSU); + +#if !UCONFIG_NO_LEGACY_CONVERSION TestConverterType("x-iscii-de", UCNV_ISCII); +#endif + TestConverterType("ascii", UCNV_US_ASCII); TestConverterType("utf-7", UCNV_UTF7); TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX); @@ -1318,7 +1458,7 @@ static void TestConverterTypesAndStarters() static void TestAmbiguousConverter(UConverter *cnv) { - static const char inBytes[2]={ 0x61, 0x5c }; + static const char inBytes[3]={ 0x61, 0x5B, 0x5c }; UChar outUnicode[20]={ 0, 0, 0, 0 }; const char *s; @@ -1326,34 +1466,36 @@ TestAmbiguousConverter(UConverter *cnv) { UErrorCode errorCode; UBool isAmbiguous; - /* try to convert an 'a' and a US-ASCII backslash */ + /* try to convert an 'a', a square bracket and a US-ASCII backslash */ errorCode=U_ZERO_ERROR; s=inBytes; u=outUnicode; - ucnv_toUnicode(cnv, &u, u+20, &s, s+2, NULL, TRUE, &errorCode); + ucnv_toUnicode(cnv, &u, u+20, &s, s+3, NULL, TRUE, &errorCode); if(U_FAILURE(errorCode)) { /* we do not care about general failures in this test; the input may just not be mappable */ return; } - if(outUnicode[0]!=0x61 || outUnicode[1]==0xfffd) { - /* not an ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */ + if(outUnicode[0]!=0x61 || outUnicode[1]!=0x5B || outUnicode[2]==0xfffd) { + /* not a close ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */ + /* There are some encodings that are partially ASCII based, + like the ISO-7 and GSM series of codepages, which we ignore. */ return; } isAmbiguous=ucnv_isAmbiguous(cnv); /* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */ - if((outUnicode[1]!=0x5c)!=isAmbiguous) { + if((outUnicode[2]!=0x5c)!=isAmbiguous) { log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAmbiguous()==%d\n", - ucnv_getName(cnv, &errorCode), outUnicode[1]!=0x5c, isAmbiguous); + ucnv_getName(cnv, &errorCode), outUnicode[2]!=0x5c, isAmbiguous); return; } - if(outUnicode[1]!=0x5c) { + if(outUnicode[2]!=0x5c) { /* needs fixup, fix it */ ucnv_fixFileSeparator(cnv, outUnicode, (int32_t)(u-outUnicode)); - if(outUnicode[1]!=0x5c) { + if(outUnicode[2]!=0x5c) { /* the fix failed */ log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cnv, &errorCode)); return; @@ -1365,7 +1507,7 @@ static void TestAmbiguous() { UErrorCode status = U_ZERO_ERROR; UConverter *ascii_cnv = 0, *sjis_cnv = 0, *cnv; - const char target[] = { + static const char target[] = { /* "\\usr\\local\\share\\data\\icutest.txt" */ 0x5c, 0x75, 0x73, 0x72, 0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c, @@ -1375,7 +1517,7 @@ static void TestAmbiguous() 0 }; UChar asciiResult[200], sjisResult[200]; - int32_t asciiLength = 0, sjisLength = 0, i; + int32_t /*asciiLength = 0,*/ sjisLength = 0, i; const char *name; /* enumerate all converters */ @@ -1391,6 +1533,7 @@ static void TestAmbiguous() } } +#if !UCONFIG_NO_LEGACY_CONVERSION sjis_cnv = ucnv_open("ibm-943", &status); if (U_FAILURE(status)) { @@ -1405,7 +1548,7 @@ static void TestAmbiguous() return; } /* convert target from SJIS to Unicode */ - sjisLength = ucnv_toUChars(sjis_cnv, sjisResult, sizeof(sjisResult)/U_SIZEOF_UCHAR, target, strlen(target), &status); + sjisLength = ucnv_toUChars(sjis_cnv, sjisResult, UPRV_LENGTHOF(sjisResult), target, (int32_t)strlen(target), &status); if (U_FAILURE(status)) { log_err("Failed to convert the SJIS string.\n"); @@ -1414,11 +1557,10 @@ static void TestAmbiguous() return; } /* convert target from Latin-1 to Unicode */ - asciiLength = ucnv_toUChars(ascii_cnv, asciiResult, sizeof(asciiResult)/U_SIZEOF_UCHAR, target, strlen(target), &status); + /*asciiLength =*/ ucnv_toUChars(ascii_cnv, asciiResult, UPRV_LENGTHOF(asciiResult), target, (int32_t)strlen(target), &status); if (U_FAILURE(status)) { log_err("Failed to convert the Latin-1 string.\n"); - free(sjisResult); ucnv_close(sjis_cnv); ucnv_close(ascii_cnv); return; @@ -1426,8 +1568,6 @@ static void TestAmbiguous() if (!ucnv_isAmbiguous(sjis_cnv)) { log_err("SJIS converter should contain ambiguous character mappings.\n"); - free(sjisResult); - free(asciiResult); ucnv_close(sjis_cnv); ucnv_close(ascii_cnv); return; @@ -1443,6 +1583,7 @@ static void TestAmbiguous() } ucnv_close(sjis_cnv); ucnv_close(ascii_cnv); +#endif } static void @@ -1524,7 +1665,7 @@ TestSignatureDetection(){ int32_t signatureLength = -1; const char* source = NULL; const char* enc = NULL; - for( ; i0xFFFF){ - dst[dstIndex++] = UTF16_LEAD(c); + dst[dstIndex++] = U16_LEAD(c); if(dstIndex&@*/ }; - const uint16_t expectedISO2022JIS[] = { + static const uint16_t expectedISO2022JIS[] = { 0x0041, 0x0042, 0xFF81, 0xFF82, 0x3000 }; - int32_t toISO2022JISOffs[]={ + static const int32_t toISO2022JISOffs[]={ 3,4, 8,9, 16 }; - const uint8_t sampleTextJIS7[] = { + static const uint8_t sampleTextJIS7[] = { 0x1b,0x28,0x48,0x41,0x42, /*JIS7-Roman*/ 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/ 0x1b,0x24,0x42,0x21,0x21, @@ -4016,7 +4220,7 @@ TestJIS(){ 0x21,0x22, 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore &@*/ }; - const uint16_t expectedISO2022JIS7[] = { + static const uint16_t expectedISO2022JIS7[] = { 0x0041, 0x0042, 0xFF81, 0xFF82, 0x3000, @@ -4024,14 +4228,14 @@ TestJIS(){ 0x3001, 0x3000 }; - int32_t toISO2022JIS7Offs[]={ + static const int32_t toISO2022JIS7Offs[]={ 3,4, 8,9, 13,16, 17, 19,27 }; - const uint8_t sampleTextJIS8[] = { + static const uint8_t sampleTextJIS8[] = { 0x1b,0x28,0x48,0x41,0x42, /*JIS8-Roman*/ 0xa1,0xc8,0xd9,/*Katakana Set*/ 0x1b,0x28,0x42, @@ -4039,29 +4243,33 @@ TestJIS(){ 0xb1,0xc3, /*Katakana Set*/ 0x1b,0x24,0x42,0x21,0x21 }; - const uint16_t expectedISO2022JIS8[] = { + static const uint16_t expectedISO2022JIS8[] = { 0x0041, 0x0042, 0xff61, 0xff88, 0xff99, 0x0041, 0x0042, 0xff71, 0xff83, 0x3000 }; - int32_t toISO2022JIS8Offs[]={ + static const int32_t toISO2022JIS8Offs[]={ 3, 4, 5, 6, 7, 11, 12, 13, 14, 18, }; testConvertToU(sampleTextJIS,sizeof(sampleTextJIS),expectedISO2022JIS, - sizeof(expectedISO2022JIS)/sizeof(expectedISO2022JIS[0]),"JIS", toISO2022JISOffs,TRUE); + UPRV_LENGTHOF(expectedISO2022JIS),"JIS", toISO2022JISOffs,TRUE); testConvertToU(sampleTextJIS7,sizeof(sampleTextJIS7),expectedISO2022JIS7, - sizeof(expectedISO2022JIS7)/sizeof(expectedISO2022JIS7[0]),"JIS7", toISO2022JIS7Offs,TRUE); + UPRV_LENGTHOF(expectedISO2022JIS7),"JIS7", toISO2022JIS7Offs,TRUE); testConvertToU(sampleTextJIS8,sizeof(sampleTextJIS8),expectedISO2022JIS8, - sizeof(expectedISO2022JIS8)/sizeof(expectedISO2022JIS8[0]),"JIS8", toISO2022JIS8Offs,TRUE); + UPRV_LENGTHOF(expectedISO2022JIS8),"JIS8", toISO2022JIS8Offs,TRUE); } } + +#if 0 + ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 + static void TestJitterbug915(){ /* tests for roundtripping of the below sequence \x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+ / *plane 1 * / @@ -4072,7 +4280,7 @@ static void TestJitterbug915(){ \x1b$+L\x1bO!#\x1bO",\x1bO#N\x1bO!n\x1bO#q / *plane 6 * / \x1b$+M\x1bO"q\x1bO!N\x1bO!j\x1bO#:\x1bO#o / *plane 7 * / */ - static char cSource[]={ + static const char cSource[]={ 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, @@ -4110,7 +4318,7 @@ static void TestJitterbug915(){ char* ctarget=cTarget; char* ctargetLimit=cTarget+sizeof(cTarget); const char* csource=cSource; - char* tempSrc = cSource; + const char* tempSrc = cSource; UErrorCode err=U_ZERO_ERROR; UConverter* conv =ucnv_open("ISO_2022_CN_EXT",&err); @@ -4205,8 +4413,8 @@ TestISO_2022_CN_EXT() { uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); - uSource = (const UChar*)&in[0]; - uSourceLimit=(const UChar*)&in[sizeof(in)/2]; + uSource = (const UChar*)in; + uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in); cTarget = cBuf; cTargetLimit = cBuf +uBufSize*5; uTarget = uBuf; @@ -4225,7 +4433,7 @@ TestISO_2022_CN_EXT() { log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); return; } - uSource = (const UChar*)&in[0]; + uSource = (const UChar*)in; while(uSource UCNV_IRREGULAR) { + return; + } + if (reason != UCNV_IRREGULAR) { + log_err("toUnicode callback invoked for empty segment but reason is not UCNV_IRREGULAR\n"); + } + /* Standard stuff below from UCNV_TO_U_CALLBACK_SUBSTITUTE */ + *err = U_ZERO_ERROR; + ucnv_cbToUWriteSub(toArgs,0,err); +} + +enum { kEmptySegmentToUCharsMax = 64 }; +static void TestJitterbug6175(void) { + static const char iso2022jp_a[] = { 0x61, 0x62, 0x1B,0x24,0x42, 0x1B,0x28,0x42, 0x63, 0x64, 0x0D, 0x0A }; + static const char iso2022kr_a[] = { 0x1B,0x24,0x29,0x43, 0x61, 0x0E, 0x0F, 0x62, 0x0D, 0x0A }; + static const char iso2022cn_a[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x0F, 0x1B,0x24,0x2A,0x48, 0x1B,0x4E, 0x6A,0x65, 0x63, 0x0D, 0x0A }; + static const char iso2022cn_b[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x1B,0x24,0x29,0x47, 0x68,0x64, 0x0F, 0x63, 0x0D, 0x0A }; + static const char hzGB2312_a[] = { 0x61, 0x62, 0x7E,0x7B, 0x7E,0x7D, 0x63, 0x64 }; + static const EmptySegmentTest emptySegmentTests[] = { + /* converterName inputText inputTextLength */ + { "ISO-2022-JP", iso2022jp_a, sizeof(iso2022jp_a) }, + { "ISO-2022-KR", iso2022kr_a, sizeof(iso2022kr_a) }, + { "ISO-2022-CN", iso2022cn_a, sizeof(iso2022cn_a) }, + { "ISO-2022-CN", iso2022cn_b, sizeof(iso2022cn_b) }, + { "HZ-GB-2312", hzGB2312_a, sizeof(hzGB2312_a) }, + /* terminator: */ + { NULL, NULL, 0, } + }; + const EmptySegmentTest * testPtr; + for (testPtr = emptySegmentTests; testPtr->converterName != NULL; ++testPtr) { + UErrorCode err = U_ZERO_ERROR; + UConverter * cnv = ucnv_open(testPtr->converterName, &err); + if (U_FAILURE(err)) { + log_data_err("Unable to open %s converter: %s\n", testPtr->converterName, u_errorName(err)); + return; + } + ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_EMPTYSEGMENT, NULL, NULL, NULL, &err); + if (U_FAILURE(err)) { + log_data_err("Unable to setToUCallBack for %s converter: %s\n", testPtr->converterName, u_errorName(err)); + ucnv_close(cnv); + return; + } + { + UChar toUChars[kEmptySegmentToUCharsMax]; + UChar * toUCharsPtr = toUChars; + const UChar * toUCharsLimit = toUCharsPtr + kEmptySegmentToUCharsMax; + const char * inCharsPtr = testPtr->inputText; + const char * inCharsLimit = inCharsPtr + testPtr->inputTextLength; + ucnv_toUnicode(cnv, &toUCharsPtr, toUCharsLimit, &inCharsPtr, inCharsLimit, NULL, TRUE, &err); + } + ucnv_close(cnv); + } +} + static void TestEBCDIC_STATEFUL() { /* test input */ @@ -4632,19 +4905,19 @@ TestLMBCS() { { UErrorCode errorCode=U_ZERO_ERROR; - const uint8_t * pSource = pszLMBCS; - const uint8_t * sourceLimit = pszLMBCS + sizeof(pszLMBCS); + const char * pSource = (const char *)pszLMBCS; + const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS); UChar Out [sizeof(pszUnicode) + 1]; UChar * pOut = Out; - UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar); + UChar * OutLimit = Out + UPRV_LENGTHOF(pszUnicode); int32_t off [sizeof(offsets)]; /* last 'offset' in expected results is just the final size. (Makes other tests easier). Compensate here: */ - off[(sizeof(offsets)/sizeof(offsets[0]))-1] = sizeof(pszLMBCS); + off[UPRV_LENGTHOF(offsets)-1] = sizeof(pszLMBCS); @@ -4659,8 +4932,8 @@ TestLMBCS() { ucnv_toUnicode (cnv, &pOut, OutLimit, - (const char **)&pSource, - (const char *)sourceLimit, + &pSource, + sourceLimit, off, TRUE, &errorCode); @@ -4722,13 +4995,13 @@ TestLMBCS() { const UChar * pUniOut = uniString; UChar * pUniIn = uniString; uint8_t lmbcsString [4]; - const uint8_t * pLMBCSOut = lmbcsString; - uint8_t * pLMBCSIn = lmbcsString; + const char * pLMBCSOut = (const char *)lmbcsString; + char * pLMBCSIn = (char *)lmbcsString; /* 0192 (hook) converts to both group 3 & group 1. input locale should differentiate */ ucnv_fromUnicode (cnv16he, - (char **)&pLMBCSIn, (const char *)(pLMBCSIn + sizeof(lmbcsString)/sizeof(lmbcsString[0])), - &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]), + &pLMBCSIn, (pLMBCSIn + UPRV_LENGTHOF(lmbcsString)), + &pUniOut, pUniOut + UPRV_LENGTHOF(uniString), NULL, 1, &errorCode); if (lmbcsString[0] != 0x3 || lmbcsString[1] != 0x83) @@ -4736,11 +5009,11 @@ TestLMBCS() { log_err("LMBCS-16,locale=he gives unexpected translation\n"); } - pLMBCSIn=lmbcsString; + pLMBCSIn= (char *)lmbcsString; pUniOut = uniString; ucnv_fromUnicode (cnv01us, - (char **)&pLMBCSIn, (const char *)(lmbcsString + sizeof(lmbcsString)/sizeof(lmbcsString[0])), - &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]), + &pLMBCSIn, (const char *)(lmbcsString + UPRV_LENGTHOF(lmbcsString)), + &pUniOut, pUniOut + UPRV_LENGTHOF(uniString), NULL, 1, &errorCode); if (lmbcsString[0] != 0x9F) @@ -4750,45 +5023,45 @@ TestLMBCS() { /* single byte char from mbcs char set */ lmbcsString[0] = 0xAE; /* 1/2 width katakana letter small Yo */ - pLMBCSOut = lmbcsString; + pLMBCSOut = (const char *)lmbcsString; pUniIn = uniString; ucnv_toUnicode (cnv16jp, &pUniIn, pUniIn + 1, - (const char **)&pLMBCSOut, (const char *)(pLMBCSOut + 1), + &pLMBCSOut, (pLMBCSOut + 1), NULL, 1, &errorCode); - if (U_FAILURE(errorCode) || pLMBCSOut != lmbcsString+1 || pUniIn != uniString+1 || uniString[0] != 0xFF6E) + if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+1 || pUniIn != uniString+1 || uniString[0] != 0xFF6E) { log_err("Unexpected results from LMBCS-16 single byte char\n"); } /* convert to group 1: should be 3 bytes */ - pLMBCSIn = lmbcsString; + pLMBCSIn = (char *)lmbcsString; pUniOut = uniString; ucnv_fromUnicode (cnv01us, - (char **)&pLMBCSIn, (const char *)(pLMBCSIn + 3), + &pLMBCSIn, (const char *)(pLMBCSIn + 3), &pUniOut, pUniOut + 1, NULL, 1, &errorCode); - if (U_FAILURE(errorCode) || pLMBCSIn != lmbcsString+3 || pUniOut != uniString+1 + if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+3 || pUniOut != uniString+1 || lmbcsString[0] != 0x10 || lmbcsString[1] != 0x10 || lmbcsString[2] != 0xAE) { log_err("Unexpected results to LMBCS-1 single byte mbcs char\n"); } - pLMBCSOut = lmbcsString; + pLMBCSOut = (const char *)lmbcsString; pUniIn = uniString; ucnv_toUnicode (cnv01us, &pUniIn, pUniIn + 1, - (const char **)&pLMBCSOut, (const char *)(pLMBCSOut + 3), + &pLMBCSOut, (const char *)(pLMBCSOut + 3), NULL, 1, &errorCode); - if (U_FAILURE(errorCode) || pLMBCSOut != lmbcsString+3 || pUniIn != uniString+1 || uniString[0] != 0xFF6E) + if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+3 || pUniIn != uniString+1 || uniString[0] != 0xFF6E) { log_err("Unexpected results from LMBCS-1 single byte mbcs char\n"); } - pLMBCSIn = lmbcsString; + pLMBCSIn = (char *)lmbcsString; pUniOut = uniString; ucnv_fromUnicode (cnv16jp, - (char **)&pLMBCSIn, (const char *)(pLMBCSIn + 1), + &pLMBCSIn, (const char *)(pLMBCSIn + 1), &pUniOut, pUniOut + 1, NULL, 1, &errorCode); - if (U_FAILURE(errorCode) || pLMBCSIn != lmbcsString+1 || pUniOut != uniString+1 || lmbcsString[0] != 0xAE) + if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+1 || pUniOut != uniString+1 || lmbcsString[0] != 0xAE) { log_err("Unexpected results to LMBCS-16 single byte mbcs char\n"); } @@ -4801,13 +5074,13 @@ TestLMBCS() { UErrorCode errorCode=U_ZERO_ERROR; - const uint8_t * pSource = pszLMBCS; - const uint8_t * sourceLimit = pszLMBCS + sizeof(pszLMBCS); + const char * pSource = (const char *)pszLMBCS; + const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS); int codepointCount = 0; UChar Out [sizeof(pszUnicode) + 1]; UChar * pOut = Out; - UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar); + UChar * OutLimit = Out + UPRV_LENGTHOF(pszUnicode); cnv = ucnv_open(NAME_LMBCS_1, &errorCode); @@ -4822,15 +5095,15 @@ TestLMBCS() { ucnv_toUnicode (cnv, &pOut, OutLimit, - (const char **)&pSource, - (const char *)(pSource+1), /* claim that this is a 1- byte buffer */ + &pSource, + (pSource+1), /* claim that this is a 1- byte buffer */ NULL, FALSE, /* FALSE means there might be more chars in the next buffer */ &errorCode); if (U_SUCCESS (errorCode)) { - if ((pSource - (const uint8_t *)pszLMBCS) == offsets [codepointCount+1]) + if ((pSource - (const char *)pszLMBCS) == offsets [codepointCount+1]) { /* we are on to the next code point: check value */ @@ -4850,8 +5123,8 @@ TestLMBCS() { } { /* limits & surrogate error testing */ - uint8_t LIn [sizeof(pszLMBCS)]; - const uint8_t * pLIn = LIn; + char LIn [sizeof(pszLMBCS)]; + const char * pLIn = LIn; char LOut [sizeof(pszLMBCS)]; char * pLOut = LOut; @@ -4868,11 +5141,14 @@ TestLMBCS() { errorCode=U_ZERO_ERROR; /* negative source request should always return U_ILLEGAL_ARGUMENT_ERROR */ - ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn-1,off,FALSE, &errorCode); + pUIn++; + ucnv_fromUnicode(cnv, &pLOut, pLOut+1, &pUIn, pUIn-1, off, FALSE, &errorCode); if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) { log_err("Unexpected Error on negative source request to ucnv_fromUnicode: %s\n", u_errorName(errorCode)); } + pUIn--; + errorCode=U_ZERO_ERROR; ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)(pLIn-1),off,FALSE, &errorCode); if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) @@ -4913,7 +5189,7 @@ TestLMBCS() { /* running out of target room : U_BUFFER_OVERFLOW_ERROR */ pUIn = pszUnicode; - ucnv_fromUnicode(cnv, &pLOut,pLOut+offsets[4],&pUIn,pUIn+sizeof(pszUnicode)/sizeof(UChar),off,FALSE, &errorCode); + ucnv_fromUnicode(cnv, &pLOut,pLOut+offsets[4],&pUIn,pUIn+UPRV_LENGTHOF(pszUnicode),off,FALSE, &errorCode); if (errorCode != U_BUFFER_OVERFLOW_ERROR || pLOut != LOut + offsets[4] || pUIn != pszUnicode+4 ) { log_err("Unexpected results on out of target room to ucnv_fromUnicode\n"); @@ -4921,9 +5197,9 @@ TestLMBCS() { errorCode = U_ZERO_ERROR; - pLIn = pszLMBCS; - ucnv_toUnicode(cnv, &pUOut,pUOut+4,(const char **)&pLIn,(const char *)(pLIn+sizeof(pszLMBCS)),off,FALSE, &errorCode); - if (errorCode != U_BUFFER_OVERFLOW_ERROR || pUOut != UOut + 4 || pLIn != (const uint8_t *)pszLMBCS+offsets[4]) + pLIn = (const char *)pszLMBCS; + ucnv_toUnicode(cnv, &pUOut,pUOut+4,&pLIn,(pLIn+sizeof(pszLMBCS)),off,FALSE, &errorCode); + if (errorCode != U_BUFFER_OVERFLOW_ERROR || pUOut != UOut + 4 || pLIn != (const char *)pszLMBCS+offsets[4]) { log_err("Unexpected results on out of target room to ucnv_toUnicode\n"); } @@ -4931,98 +5207,98 @@ TestLMBCS() { /* unpaired or chopped LMBCS surrogates */ /* OK high surrogate, Low surrogate is chopped */ - LIn [0] = 0x14; - LIn [1] = 0xD8; - LIn [2] = 0x01; - LIn [3] = 0x14; - LIn [4] = 0xDC; + LIn [0] = (char)0x14; + LIn [1] = (char)0xD8; + LIn [2] = (char)0x01; + LIn [3] = (char)0x14; + LIn [4] = (char)0xDC; pLIn = LIn; errorCode = U_ZERO_ERROR; pUOut = UOut; ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); - ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); + ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5) { log_err("Unexpected results on chopped low surrogate\n"); } /* chopped at surrogate boundary */ - LIn [0] = 0x14; - LIn [1] = 0xD8; - LIn [2] = 0x01; + LIn [0] = (char)0x14; + LIn [1] = (char)0xD8; + LIn [2] = (char)0x01; pLIn = LIn; errorCode = U_ZERO_ERROR; pUOut = UOut; - ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+3),off,TRUE, &errorCode); + ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+3),off,TRUE, &errorCode); if (UOut[0] != 0xD801 || U_FAILURE(errorCode) || pUOut != UOut + 1 || pLIn != LIn + 3) { log_err("Unexpected results on chopped at surrogate boundary \n"); } /* unpaired surrogate plus valid Unichar */ - LIn [0] = 0x14; - LIn [1] = 0xD8; - LIn [2] = 0x01; - LIn [3] = 0x14; - LIn [4] = 0xC9; - LIn [5] = 0xD0; + LIn [0] = (char)0x14; + LIn [1] = (char)0xD8; + LIn [2] = (char)0x01; + LIn [3] = (char)0x14; + LIn [4] = (char)0xC9; + LIn [5] = (char)0xD0; pLIn = LIn; errorCode = U_ZERO_ERROR; pUOut = UOut; - ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+6),off,TRUE, &errorCode); + ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+6),off,TRUE, &errorCode); if (UOut[0] != 0xD801 || UOut[1] != 0xC9D0 || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 6) { log_err("Unexpected results after unpaired surrogate plus valid Unichar \n"); } /* unpaired surrogate plus chopped Unichar */ - LIn [0] = 0x14; - LIn [1] = 0xD8; - LIn [2] = 0x01; - LIn [3] = 0x14; - LIn [4] = 0xC9; + LIn [0] = (char)0x14; + LIn [1] = (char)0xD8; + LIn [2] = (char)0x01; + LIn [3] = (char)0x14; + LIn [4] = (char)0xC9; pLIn = LIn; errorCode = U_ZERO_ERROR; pUOut = UOut; - ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); + ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5) { log_err("Unexpected results after unpaired surrogate plus chopped Unichar \n"); } /* unpaired surrogate plus valid non-Unichar */ - LIn [0] = 0x14; - LIn [1] = 0xD8; - LIn [2] = 0x01; - LIn [3] = 0x0F; - LIn [4] = 0x3B; + LIn [0] = (char)0x14; + LIn [1] = (char)0xD8; + LIn [2] = (char)0x01; + LIn [3] = (char)0x0F; + LIn [4] = (char)0x3B; pLIn = LIn; errorCode = U_ZERO_ERROR; pUOut = UOut; - ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); + ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); if (UOut[0] != 0xD801 || UOut[1] != 0x1B || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 5) { log_err("Unexpected results after unpaired surrogate plus valid non-Unichar\n"); } /* unpaired surrogate plus chopped non-Unichar */ - LIn [0] = 0x14; - LIn [1] = 0xD8; - LIn [2] = 0x01; - LIn [3] = 0x0F; + LIn [0] = (char)0x14; + LIn [1] = (char)0xD8; + LIn [2] = (char)0x01; + LIn [3] = (char)0x0F; pLIn = LIn; errorCode = U_ZERO_ERROR; pUOut = UOut; - ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+4),off,TRUE, &errorCode); + ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+4),off,TRUE, &errorCode); if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 4) { @@ -5036,11 +5312,11 @@ TestLMBCS() { static void TestJitterbug255() { - const uint8_t testBytes[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x00 }; - const uint8_t *testBuffer = testBytes; - const uint8_t *testEnd = testBytes + sizeof(testBytes); + static const uint8_t testBytes[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x00 }; + const char *testBuffer = (const char *)testBytes; + const char *testEnd = (const char *)testBytes + sizeof(testBytes); UErrorCode status = U_ZERO_ERROR; - UChar32 result; + /*UChar32 result;*/ UConverter *cnv = 0; cnv = ucnv_open("shift-jis", &status); @@ -5050,7 +5326,7 @@ static void TestJitterbug255() } while (testBuffer != testEnd) { - result = ucnv_getNextUChar (cnv, (const char **)&testBuffer, (const char *)testEnd , &status); + /*result = */ucnv_getNextUChar (cnv, &testBuffer, testEnd , &status); if (U_FAILURE(status)) { log_err("Failed to convert the next UChar for SJIS.\n"); @@ -5096,6 +5372,7 @@ static void TestEBCDICUS4XML() } ucnv_close(cnv); } +#endif /* #if !UCONFIG_NO_LEGACY_COLLATION */ #if !UCONFIG_NO_COLLATION @@ -5109,16 +5386,23 @@ static void TestJitterbug981(){ int numNeeded=0; utf8cnv = ucnv_open ("utf8", &status); if(U_FAILURE(status)){ - log_err("Could not open UTF-8 converter. Error: %s", u_errorName(status)); + log_err("Could not open UTF-8 converter. Error: %s\n", u_errorName(status)); return; } myCollator = ucol_open("zh", &status); if(U_FAILURE(status)){ - log_err("Could not open collator for zh locale. Error: %s", u_errorName(status)); + log_data_err("Could not open collator for zh locale. Error: %s\n", u_errorName(status)); + ucnv_close(utf8cnv); return; } rules = ucol_getRules(myCollator, &rules_length); + if(rules_length == 0) { + log_data_err("missing zh tailoring rule string\n"); + ucol_close(myCollator); + ucnv_close(utf8cnv); + return; + } buff_size = rules_length * ucnv_getMaxCharSize(utf8cnv); buff = malloc(buff_size); @@ -5128,13 +5412,14 @@ static void TestJitterbug981(){ status = U_ZERO_ERROR; if(target_cap >= buff_size) { log_err("wanted %d bytes, only %d available\n", target_cap, buff_size); - return; + break; } bytes_needed = ucnv_fromUChars(utf8cnv, buff, target_cap, rules, rules_length, &status); target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1; if(numNeeded!=0 && numNeeded!= bytes_needed){ log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes"); + break; } numNeeded = bytes_needed; } while (status == U_BUFFER_OVERFLOW_ERROR); @@ -5145,8 +5430,9 @@ static void TestJitterbug981(){ #endif +#if !UCONFIG_NO_LEGACY_CONVERSION static void TestJitterbug1293(){ - UChar src[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4, 0x30D7,0x000}; + static const UChar src[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4, 0x30D7,0x000}; char target[256]; UErrorCode status = U_ZERO_ERROR; UConverter* conv=NULL; @@ -5172,5 +5458,146 @@ static void TestJitterbug1293(){ } ucnv_close(conv); } - #endif + +static void TestJB5275_1(){ + + static const char* data = "\x3B\xB3\x0A" /* Easy characters */ + "\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */ + /* Switch script: */ + "\xEF\x43\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Bengali test */ + "\x3B\xB3\x0A" /* Easy characters - new line, so should default!*/ + "\xEF\x40\x3B\xB3\x0A"; + static const UChar expected[] ={ + 0x003b, 0x0a15, 0x000a, /* Easy characters */ + 0x0a5c, 0x0a4d, 0x0a39, 0x0a5c, 0x0a4d, 0x0a39, 0x000a, /* Gurmukhi test */ + 0x09dd, 0x09dc, 0x09cd, 0x09b9, 0x000a, /* Switch script: to Bengali*/ + 0x003b, 0x0a15, 0x000a, /* Easy characters - new line, so should default!*/ + 0x003b, 0x0a15, 0x000a /* Back to Gurmukhi*/ + }; + + UErrorCode status = U_ZERO_ERROR; + UConverter* conv = ucnv_open("iscii-gur", &status); + UChar dest[100] = {'\0'}; + UChar* target = dest; + UChar* targetLimit = dest+100; + const char* source = data; + const char* sourceLimit = data+strlen(data); + const UChar* exp = expected; + + if (U_FAILURE(status)) { + log_data_err("Unable to open converter: iscii-gur got errorCode: %s\n", u_errorName(status)); + return; + } + + log_verbose("Testing switching back to default script when new line is encountered.\n"); + ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status); + if(U_FAILURE(status)){ + log_err("conversion failed: %s \n", u_errorName(status)); + } + targetLimit = target; + target = dest; + printUSeq(target, targetLimit-target); + while(target