X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/374ca955a76ecab1204ca8bfa63ff9238d998416..4388f060552cc537e71e957d32f35e9d75a61233:/icuSources/test/cintltst/custrtrn.c diff --git a/icuSources/test/cintltst/custrtrn.c b/icuSources/test/cintltst/custrtrn.c index 66ada233..34302375 100644 --- a/icuSources/test/cintltst/custrtrn.c +++ b/icuSources/test/cintltst/custrtrn.c @@ -1,6 +1,6 @@ /******************************************************************** * COPYRIGHT: - * Copyright (c) 2001-2004, International Business Machines Corporation and + * Copyright (c) 2001-2011, International Business Machines Corporation and * others. All Rights Reserved. ********************************************************************/ /******************************************************************************** @@ -17,31 +17,52 @@ #include #include +#include #include "unicode/utypes.h" #include "unicode/ustring.h" #include "unicode/ures.h" #include "ustr_imp.h" #include "cintltst.h" +#include "cmemory.h" +#include "cstring.h" #include "cwchar.h" #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) void addUCharTransformTest(TestNode** root); -static void Test_UChar_UTF32_API(void); +static void Test_strToUTF32(void); +static void Test_strToUTF32_surrogates(void); +static void Test_strFromUTF32(void); +static void Test_strFromUTF32_surrogates(void); static void Test_UChar_UTF8_API(void); +static void Test_FromUTF8(void); +static void Test_FromUTF8Lenient(void); static void Test_UChar_WCHART_API(void); static void Test_widestrs(void); static void Test_WCHART_LongString(void); +static void Test_strToJavaModifiedUTF8(void); +static void Test_strFromJavaModifiedUTF8(void); +static void TestNullEmptySource(void); void addUCharTransformTest(TestNode** root) { - addTest(root, &Test_UChar_UTF32_API, "custrtrn/Test_UChar_UTF32_API"); + addTest(root, &Test_strToUTF32, "custrtrn/Test_strToUTF32"); + addTest(root, &Test_strToUTF32_surrogates, "custrtrn/Test_strToUTF32_surrogates"); + addTest(root, &Test_strFromUTF32, "custrtrn/Test_strFromUTF32"); + addTest(root, &Test_strFromUTF32_surrogates, "custrtrn/Test_strFromUTF32_surrogates"); addTest(root, &Test_UChar_UTF8_API, "custrtrn/Test_UChar_UTF8_API"); + addTest(root, &Test_FromUTF8, "custrtrn/Test_FromUTF8"); + addTest(root, &Test_FromUTF8Lenient, "custrtrn/Test_FromUTF8Lenient"); addTest(root, &Test_UChar_WCHART_API, "custrtrn/Test_UChar_WCHART_API"); addTest(root, &Test_widestrs, "custrtrn/Test_widestrs"); +#if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION addTest(root, &Test_WCHART_LongString, "custrtrn/Test_WCHART_LongString"); +#endif + addTest(root, &Test_strToJavaModifiedUTF8, "custrtrn/Test_strToJavaModifiedUTF8"); + addTest(root, &Test_strFromJavaModifiedUTF8, "custrtrn/Test_strFromJavaModifiedUTF8"); + addTest(root, &TestNullEmptySource, "custrtrn/TestNullEmptySource"); } static const UChar32 src32[]={ @@ -120,138 +141,365 @@ static const UChar src16[] = { }; -static void Test_UChar_UTF32_API(void){ - +static void Test_strToUTF32(void){ UErrorCode err = U_ZERO_ERROR; - UChar uTemp[1]; - UChar32 u32Temp[1]; - UChar* uTarget=uTemp; - const UChar32* u32Src = src32; - int32_t u32SrcLen = sizeof(src32)/4; - int32_t uTargetLength = 0; - int32_t uDestLen=0; - const UChar* uSrc = src16; - int32_t uSrcLen = sizeof(src16)/2; - UChar32* u32Target = u32Temp; - uint32_t u32TargetLength =0; - int32_t u32DestLen =0; - UBool failed = FALSE; + UChar32 u32Target[400]; + int32_t u32DestLen; int i= 0; - { - /* preflight */ - u_strToUTF32(u32Target,u32TargetLength, &u32DestLen, uSrc, uSrcLen,&err); - if(err == U_BUFFER_OVERFLOW_ERROR){ - err = U_ZERO_ERROR; - u32Target = (UChar32*) malloc (sizeof(uint32_t) * (u32DestLen+1)); - u32TargetLength = u32DestLen+1; - - u_strToUTF32(u32Target,u32TargetLength, &u32DestLen, uSrc, uSrcLen,&err); + + /* first with length */ + u32DestLen = -2; + u_strToUTF32(u32Target, 0, &u32DestLen, src16, LENGTHOF(src16),&err); + if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != LENGTHOF(src32)) { + log_err("u_strToUTF32(preflight with length): " + "length %ld != %ld and %s != U_BUFFER_OVERFLOW_ERROR\n", + (long)u32DestLen, (long)LENGTHOF(src32), u_errorName(err)); + return; + } + err = U_ZERO_ERROR; + u32DestLen = -2; + u_strToUTF32(u32Target, LENGTHOF(src32)+1, &u32DestLen, src16, LENGTHOF(src16),&err); + if(err != U_ZERO_ERROR || u32DestLen != LENGTHOF(src32)) { + log_err("u_strToUTF32(with length): " + "length %ld != %ld and %s != U_ZERO_ERROR\n", + (long)u32DestLen, (long)LENGTHOF(src32), u_errorName(err)); + return; + } + /*for(i=0; i< u32DestLen; i++){ + printf("0x%08X, ",uTarget[i]); + if(i%10==0){ + printf("\n"); } - else { - log_err("Should have gotten U_BUFFER_OVERFLOW_ERROR"); + }*/ + for(i=0; i< LENGTHOF(src32); i++){ + if(u32Target[i] != src32[i]){ + log_verbose("u_strToUTF32(with length) failed expected: %04X got: %04X at index: %i \n", src32[i], u32Target[i],i); } - failed = FALSE; - /*for(i=0; i< u32DestLen; i++){ - printf("0x%08X, ",uTarget[i]); - if(i%10==0){ - printf("\n"); - } - }*/ - for(i=0; i< u32SrcLen; i++){ - if(u32Target[i] != src32[i]){ - log_verbose("u_strToUTF32() failed expected: \\U%08X got: \\U%08X at index: %i \n", src32[i], u32Target[i],i); - failed =TRUE; - } - } - if(failed){ - log_err("u_strToUTF32() failed \n"); + } + if(u32Target[i] != 0){ + log_verbose("u_strToUTF32(with length) failed expected: %04X got: %04X at index: %i \n", 0, u32Target[i],i); + } + + /* now NUL-terminated */ + u32DestLen = -2; + u_strToUTF32(NULL,0, &u32DestLen, src16, -1,&err); + if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != LENGTHOF(src32)-1) { + log_err("u_strToUTF32(preflight with NUL-termination): " + "length %ld != %ld and %s != U_BUFFER_OVERFLOW_ERROR\n", + (long)u32DestLen, (long)LENGTHOF(src32)-1, u_errorName(err)); + return; + } + err = U_ZERO_ERROR; + u32DestLen = -2; + u_strToUTF32(u32Target, LENGTHOF(src32), &u32DestLen, src16, -1,&err); + if(err != U_ZERO_ERROR || u32DestLen != LENGTHOF(src32)-1) { + log_err("u_strToUTF32(with NUL-termination): " + "length %ld != %ld and %s != U_ZERO_ERROR\n", + (long)u32DestLen, (long)LENGTHOF(src32)-1, u_errorName(err)); + return; + } + + for(i=0; i< LENGTHOF(src32); i++){ + if(u32Target[i] != src32[i]){ + log_verbose("u_strToUTF32(NUL-termination) failed expected: %04X got: %04X \n", src32[i], u32Target[i]); } + } +} - /* preflight */ - u_strFromUTF32(uTarget,uTargetLength,&uDestLen,u32Src,u32SrcLen,&err); - if(err == U_BUFFER_OVERFLOW_ERROR){ - err = U_ZERO_ERROR; - uTarget = (UChar*) malloc( sizeof(UChar) * (uDestLen+1)); - uTargetLength = uDestLen+1; - u_strFromUTF32(uTarget,uTargetLength,&uDestLen,u32Src,u32SrcLen,&err); +/* test unpaired surrogates */ +static void Test_strToUTF32_surrogates() { + UErrorCode err = U_ZERO_ERROR; + UChar32 u32Target[400]; + int32_t len16, u32DestLen; + int32_t numSubstitutions; + int i; + + static const UChar surr16[] = { 0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0 }; + static const UChar32 expected[] = { 0x5a, 0x50000, 0x7a, 0 }; + static const UChar32 expected_FFFD[] = { 0x41, 0xfffd, 0x61, 0xfffd, 0x5a, 0x50000, 0x7a, 0 }; + static const UChar32 expected_12345[] = { 0x41, 0x12345, 0x61, 0x12345, 0x5a, 0x50000, 0x7a, 0 }; + len16 = LENGTHOF(surr16); + for(i = 0; i < 4; ++i) { + err = U_ZERO_ERROR; + u_strToUTF32(u32Target, 0, &u32DestLen, surr16+i, len16-i, &err); + if(err != U_INVALID_CHAR_FOUND) { + log_err("u_strToUTF32(preflight surr16+%ld) sets %s != U_INVALID_CHAR_FOUND\n", + (long)i, u_errorName(err)); + return; + } + err = U_ZERO_ERROR; + u_strToUTF32(u32Target, LENGTHOF(u32Target), &u32DestLen, surr16+i, len16-i, &err); + if(err != U_INVALID_CHAR_FOUND) { + log_err("u_strToUTF32(surr16+%ld) sets %s != U_INVALID_CHAR_FOUND\n", + (long)i, u_errorName(err)); + return; } - /*for(i=0; i< uDestLen; i++){ - printf("0x%04X, ",uTarget[i]); - if(i%10==0){ - printf("\n"); - } - }*/ - - for(i=0; i< uDestLen; i++){ - if(uTarget[i] != src16[i]){ - log_verbose("u_strFromUTF32() failed expected: \\U%08X got: \\U%08X at index: %i \n", src16[i] ,uTarget[i],i); - failed =TRUE; - } + + err = U_ZERO_ERROR; + u_strToUTF32(NULL, 0, &u32DestLen, surr16+i, -1, &err); + if(err != U_INVALID_CHAR_FOUND) { + log_err("u_strToUTF32(preflight surr16+%ld/NUL) sets %s != U_INVALID_CHAR_FOUND\n", + (long)i, u_errorName(err)); + return; } - if(failed){ - log_err("u_strToUTF32() failed \n"); + + err = U_ZERO_ERROR; + u_strToUTF32(u32Target, LENGTHOF(u32Target), &u32DestLen, surr16+i, -1, &err); + if(err != U_INVALID_CHAR_FOUND) { + log_err("u_strToUTF32(surr16+%ld/NUL) sets %s != U_INVALID_CHAR_FOUND\n", + (long)i, u_errorName(err)); + return; } + } - free(u32Target); - free(uTarget); + err = U_ZERO_ERROR; + u_strToUTF32(u32Target, 0, &u32DestLen, surr16+4, len16-4-1, &err); + if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != 3) { + log_err("u_strToUTF32(preflight surr16+4) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n", + u_errorName(err)); + return; } - { - u32SrcLen = -1; - uTargetLength = 0; - uSrcLen =-1; - u32TargetLength=0; - failed = FALSE; - /* preflight */ - u_strToUTF32(NULL,u32TargetLength, &u32DestLen, uSrc, uSrcLen,&err); - if(err == U_BUFFER_OVERFLOW_ERROR){ - err = U_ZERO_ERROR; - u32Target = (UChar32*) malloc (sizeof(uint32_t) * (u32DestLen+1)); - u32TargetLength = u32DestLen+1; - - u_strToUTF32(u32Target,u32TargetLength, &u32DestLen, uSrc, uSrcLen,&err); + err = U_ZERO_ERROR; + u_strToUTF32(u32Target, LENGTHOF(u32Target), &u32DestLen, surr16+4, len16-4-1, &err); + if(err != U_ZERO_ERROR || u32DestLen != 3 || uprv_memcmp(u32Target, expected, 4*4)) { + log_err("u_strToUTF32(surr16+4) sets %s != U_ZERO_ERROR or does not produce the expected string\n", + u_errorName(err)); + return; + } + + err = U_ZERO_ERROR; + u_strToUTF32(NULL, 0, &u32DestLen, surr16+4, -1, &err); + if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != 3) { + log_err("u_strToUTF32(preflight surr16+4/NUL) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n", + u_errorName(err)); + return; + } + + err = U_ZERO_ERROR; + u_strToUTF32(u32Target, LENGTHOF(u32Target), &u32DestLen, surr16+4, -1, &err); + if(err != U_ZERO_ERROR || u32DestLen != 3 || uprv_memcmp(u32Target, expected, 4*4)) { + log_err("u_strToUTF32(surr16+4/NUL) sets %s != U_ZERO_ERROR or does not produce the expected string\n", + u_errorName(err)); + return; + } + + /* with substitution character */ + numSubstitutions = -1; + err = U_ZERO_ERROR; + u_strToUTF32WithSub(u32Target, 0, &u32DestLen, surr16, len16-1, 0xfffd, &numSubstitutions, &err); + if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != 7 || numSubstitutions != 2) { + log_err("u_strToUTF32WithSub(preflight surr16) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n", + u_errorName(err)); + return; + } + + err = U_ZERO_ERROR; + u_strToUTF32WithSub(u32Target, LENGTHOF(u32Target), &u32DestLen, surr16, len16-1, 0xfffd, &numSubstitutions, &err); + if(err != U_ZERO_ERROR || u32DestLen != 7 || numSubstitutions != 2 || uprv_memcmp(u32Target, expected_FFFD, 8*4)) { + log_err("u_strToUTF32WithSub(surr16) sets %s != U_ZERO_ERROR or does not produce the expected string\n", + u_errorName(err)); + return; + } + + err = U_ZERO_ERROR; + u_strToUTF32WithSub(NULL, 0, &u32DestLen, surr16, -1, 0x12345, &numSubstitutions, &err); + if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != 7 || numSubstitutions != 2) { + log_err("u_strToUTF32WithSub(preflight surr16/NUL) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n", + u_errorName(err)); + return; + } + + err = U_ZERO_ERROR; + u_strToUTF32WithSub(u32Target, LENGTHOF(u32Target), &u32DestLen, surr16, -1, 0x12345, &numSubstitutions, &err); + if(err != U_ZERO_ERROR || u32DestLen != 7 || numSubstitutions != 2 || uprv_memcmp(u32Target, expected_12345, 8*4)) { + log_err("u_strToUTF32WithSub(surr16/NUL) sets %s != U_ZERO_ERROR or does not produce the expected string\n", + u_errorName(err)); + return; + } +} + +static void Test_strFromUTF32(void){ + UErrorCode err = U_ZERO_ERROR; + UChar uTarget[400]; + int32_t uDestLen; + int i= 0; + + /* first with length */ + uDestLen = -2; + u_strFromUTF32(uTarget,0,&uDestLen,src32,LENGTHOF(src32),&err); + if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != LENGTHOF(src16)) { + log_err("u_strFromUTF32(preflight with length): " + "length %ld != %ld and %s != U_BUFFER_OVERFLOW_ERROR\n", + (long)uDestLen, (long)LENGTHOF(src16), u_errorName(err)); + return; + } + err = U_ZERO_ERROR; + uDestLen = -2; + u_strFromUTF32(uTarget, LENGTHOF(src16)+1,&uDestLen,src32,LENGTHOF(src32),&err); + if(err != U_ZERO_ERROR || uDestLen != LENGTHOF(src16)) { + log_err("u_strFromUTF32(with length): " + "length %ld != %ld and %s != U_ZERO_ERROR\n", + (long)uDestLen, (long)LENGTHOF(src16), u_errorName(err)); + return; + } + /*for(i=0; i< uDestLen; i++){ + printf("0x%04X, ",uTarget[i]); + if(i%10==0){ + printf("\n"); } - else { - log_err("Should have gotten U_BUFFER_OVERFLOW_ERROR"); + }*/ + + for(i=0; i< uDestLen; i++){ + if(uTarget[i] != src16[i]){ + log_verbose("u_strFromUTF32(with length) failed expected: %04X got: %04X at index: %i \n", src16[i] ,uTarget[i],i); } - failed = FALSE; + } + if(uTarget[i] != 0){ + log_verbose("u_strFromUTF32(with length) failed expected: %04X got: %04X at index: %i \n", 0,uTarget[i],i); + } - for(i=0; i< u32SrcLen; i++){ - if(u32Target[i] != src32[i]){ - log_verbose("u_strToUTF32() failed expected: \\U%08X got: \\U%08X \n", src32[i], u32Target[i]); - failed =TRUE; - } - } - if(failed){ - log_err("u_strToUTF32() failed \n"); + /* now NUL-terminated */ + uDestLen = -2; + u_strFromUTF32(NULL,0,&uDestLen,src32,-1,&err); + if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != LENGTHOF(src16)-1) { + log_err("u_strFromUTF32(preflight with NUL-termination): " + "length %ld != %ld and %s != U_BUFFER_OVERFLOW_ERROR\n", + (long)uDestLen, (long)LENGTHOF(src16)-1, u_errorName(err)); + return; + } + err = U_ZERO_ERROR; + uDestLen = -2; + u_strFromUTF32(uTarget, LENGTHOF(src16),&uDestLen,src32,-1,&err); + if(err != U_ZERO_ERROR || uDestLen != LENGTHOF(src16)-1) { + log_err("u_strFromUTF32(with NUL-termination): " + "length %ld != %ld and %s != U_ZERO_ERROR\n", + (long)uDestLen, (long)LENGTHOF(src16)-1, u_errorName(err)); + return; + } + + for(i=0; i< uDestLen; i++){ + if(uTarget[i] != src16[i]){ + log_verbose("u_strFromUTF32(with NUL-termination) failed expected: %04X got: %04X \n", src16[i] ,uTarget[i]); } + } +} - /* preflight */ - u_strFromUTF32(NULL,uTargetLength,&uDestLen,u32Src,u32SrcLen,&err); - if(err == U_BUFFER_OVERFLOW_ERROR){ - err = U_ZERO_ERROR; - uTarget = (UChar*) malloc( sizeof(UChar) * (uDestLen+1)); - uTargetLength = uDestLen+1; - u_strFromUTF32(uTarget,uTargetLength,&uDestLen,u32Src,u32SrcLen,&err); +/* test surrogate code points */ +static void Test_strFromUTF32_surrogates() { + UErrorCode err = U_ZERO_ERROR; + UChar uTarget[400]; + int32_t len32, uDestLen; + int32_t numSubstitutions; + int i; + + static const UChar32 surr32[] = { 0x41, 0xd900, 0x61, 0xdc00, -1, 0x110000, 0x5a, 0x50000, 0x7a, 0 }; + static const UChar expected[] = { 0x5a, 0xd900, 0xdc00, 0x7a, 0 }; + static const UChar expected_FFFD[] = { 0x41, 0xfffd, 0x61, 0xfffd, 0xfffd, 0xfffd, 0x5a, 0xd900, 0xdc00, 0x7a, 0 }; + static const UChar expected_12345[] = { 0x41, 0xd808, 0xdf45, 0x61, 0xd808, 0xdf45, 0xd808, 0xdf45, 0xd808, 0xdf45, + 0x5a, 0xd900, 0xdc00, 0x7a, 0 }; + len32 = LENGTHOF(surr32); + for(i = 0; i < 6; ++i) { + err = U_ZERO_ERROR; + u_strFromUTF32(uTarget, 0, &uDestLen, surr32+i, len32-i, &err); + if(err != U_INVALID_CHAR_FOUND) { + log_err("u_strFromUTF32(preflight surr32+%ld) sets %s != U_INVALID_CHAR_FOUND\n", + (long)i, u_errorName(err)); + return; + } + err = U_ZERO_ERROR; + u_strFromUTF32(uTarget, LENGTHOF(uTarget), &uDestLen, surr32+i, len32-i, &err); + if(err != U_INVALID_CHAR_FOUND) { + log_err("u_strFromUTF32(surr32+%ld) sets %s != U_INVALID_CHAR_FOUND\n", + (long)i, u_errorName(err)); + return; } - - for(i=0; i< uDestLen; i++){ - if(uTarget[i] != src16[i]){ - log_verbose("u_strFromUTF32() failed expected: \\U%08X got: \\U%08X \n", src16[i] ,uTarget[i]); - failed =TRUE; - } + + err = U_ZERO_ERROR; + u_strFromUTF32(NULL, 0, &uDestLen, surr32+i, -1, &err); + if(err != U_INVALID_CHAR_FOUND) { + log_err("u_strFromUTF32(preflight surr32+%ld/NUL) sets %s != U_INVALID_CHAR_FOUND\n", + (long)i, u_errorName(err)); + return; } - if(failed){ - log_err("u_strToUTF32() failed \n"); + + err = U_ZERO_ERROR; + u_strFromUTF32(uTarget, LENGTHOF(uTarget), &uDestLen, surr32+i, -1, &err); + if(err != U_INVALID_CHAR_FOUND) { + log_err("u_strFromUTF32(surr32+%ld/NUL) sets %s != U_INVALID_CHAR_FOUND\n", + (long)i, u_errorName(err)); + return; } + } - free(u32Target); - free(uTarget); + err = U_ZERO_ERROR; + u_strFromUTF32(uTarget, 0, &uDestLen, surr32+6, len32-6-1, &err); + if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != 4) { + log_err("u_strFromUTF32(preflight surr32+6) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n", + u_errorName(err)); + return; + } + + err = U_ZERO_ERROR; + u_strFromUTF32(uTarget, LENGTHOF(uTarget), &uDestLen, surr32+6, len32-6-1, &err); + if(err != U_ZERO_ERROR || uDestLen != 4 || u_memcmp(uTarget, expected, 5)) { + log_err("u_strFromUTF32(surr32+6) sets %s != U_ZERO_ERROR or does not produce the expected string\n", + u_errorName(err)); + return; + } + + err = U_ZERO_ERROR; + u_strFromUTF32(NULL, 0, &uDestLen, surr32+6, -1, &err); + if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != 4) { + log_err("u_strFromUTF32(preflight surr32+6/NUL) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n", + u_errorName(err)); + return; + } + + err = U_ZERO_ERROR; + u_strFromUTF32(uTarget, LENGTHOF(uTarget), &uDestLen, surr32+6, -1, &err); + if(err != U_ZERO_ERROR || uDestLen != 4 || u_memcmp(uTarget, expected, 5)) { + log_err("u_strFromUTF32(surr32+6/NUL) sets %s != U_ZERO_ERROR or does not produce the expected string\n", + u_errorName(err)); + return; + } + + /* with substitution character */ + numSubstitutions = -1; + err = U_ZERO_ERROR; + u_strFromUTF32WithSub(uTarget, 0, &uDestLen, surr32, len32-1, 0xfffd, &numSubstitutions, &err); + if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != 10 || numSubstitutions != 4) { + log_err("u_strFromUTF32WithSub(preflight surr32) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n", + u_errorName(err)); + return; + } + + err = U_ZERO_ERROR; + u_strFromUTF32WithSub(uTarget, LENGTHOF(uTarget), &uDestLen, surr32, len32-1, 0xfffd, &numSubstitutions, &err); + if(err != U_ZERO_ERROR || uDestLen != 10 || numSubstitutions != 4 || u_memcmp(uTarget, expected_FFFD, 11)) { + log_err("u_strFromUTF32WithSub(surr32) sets %s != U_ZERO_ERROR or does not produce the expected string\n", + u_errorName(err)); + return; } -} + err = U_ZERO_ERROR; + u_strFromUTF32WithSub(NULL, 0, &uDestLen, surr32, -1, 0x12345, &numSubstitutions, &err); + if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != 14 || numSubstitutions != 4) { + log_err("u_strFromUTF32WithSub(preflight surr32/NUL) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n", + u_errorName(err)); + return; + } + + err = U_ZERO_ERROR; + u_strFromUTF32WithSub(uTarget, LENGTHOF(uTarget), &uDestLen, surr32, -1, 0x12345, &numSubstitutions, &err); + if(err != U_ZERO_ERROR || uDestLen != 14 || numSubstitutions != 4 || u_memcmp(uTarget, expected_12345, 15)) { + log_err("u_strFromUTF32WithSub(surr32/NUL) sets %s != U_ZERO_ERROR or does not produce the expected string\n", + u_errorName(err)); + return; + } +} static void Test_UChar_UTF8_API(void){ @@ -270,16 +518,21 @@ static void Test_UChar_UTF8_API(void){ int32_t u8DestLen =0; UBool failed = FALSE; int i= 0; + int32_t numSubstitutions; + { /* preflight */ + u8Temp[0] = 0x12; u_strToUTF8(u8Target,u8TargetLength, &u8DestLen, uSrc, uSrcLen,&err); - if(err == U_BUFFER_OVERFLOW_ERROR){ + if(err == U_BUFFER_OVERFLOW_ERROR && u8Temp[0] == 0x12){ err = U_ZERO_ERROR; u8Target = (char*) malloc (sizeof(uint8_t) * (u8DestLen+1)); u8TargetLength = u8DestLen; - + + u8Target[u8TargetLength] = (char)0xfe; + u8DestLen = -1; u_strToUTF8(u8Target,u8TargetLength, &u8DestLen, uSrc, uSrcLen,&err); - if(U_FAILURE(err)){ + if(U_FAILURE(err) || u8DestLen != u8TargetLength || u8Target[u8TargetLength] != (char)0xfe){ log_err("u_strToUTF8 failed after preflight. Error: %s\n", u_errorName(err)); return; } @@ -297,7 +550,7 @@ static void Test_UChar_UTF8_API(void){ }*/ /*for(i=0; i< u8DestLen; i++){ if(u8Target[i] != src8[i]){ - log_verbose("u_strToUTF8() failed expected: \\U%08X got: \\U%08X \n", src8[i], u8Target[i]); + log_verbose("u_strToUTF8() failed expected: %04X got: %04X \n", src8[i], u8Target[i]); failed =TRUE; } } @@ -308,16 +561,19 @@ static void Test_UChar_UTF8_API(void){ u8SrcLen = u8DestLen; /* preflight */ + uTemp[0] = 0x1234; u_strFromUTF8(uTarget,uTargetLength,&uDestLen,u8Src,u8SrcLen,&err); - if(err == U_BUFFER_OVERFLOW_ERROR){ + if(err == U_BUFFER_OVERFLOW_ERROR && uTemp[0] == 0x1234){ err = U_ZERO_ERROR; uTarget = (UChar*) malloc( sizeof(UChar) * (uDestLen+1)); uTargetLength = uDestLen; + uTarget[uTargetLength] = 0xfff0; + uDestLen = -1; u_strFromUTF8(uTarget,uTargetLength,&uDestLen,u8Src,u8SrcLen,&err); } else { - log_err("Should have gotten U_BUFFER_OVERFLOW_ERROR"); + log_err("error: u_strFromUTF8(preflight) should have gotten U_BUFFER_OVERFLOW_ERROR\n"); } /*for(i=0; i< uDestLen; i++){ printf("0x%04X, ",uTarget[i]); @@ -325,7 +581,10 @@ static void Test_UChar_UTF8_API(void){ printf("\n"); } }*/ - + + if(U_FAILURE(err) || uDestLen != uTargetLength || uTarget[uTargetLength] != 0xfff0) { + failed = TRUE; + } for(i=0; i< uSrcLen; i++){ if(uTarget[i] != src16[i]){ log_verbose("u_strFromUTF8() failed expected: \\u%04X got: \\u%04X at index: %i \n", src16[i] ,uTarget[i],i); @@ -333,7 +592,7 @@ static void Test_UChar_UTF8_API(void){ } } if(failed){ - log_err("u_strToUTF8() failed \n"); + log_err("error: u_strFromUTF8(after preflighting) failed\n"); } free(u8Target); @@ -367,7 +626,7 @@ static void Test_UChar_UTF8_API(void){ }*/ /*for(i=0; i< u8DestLen; i++){ if(u8Target[i] != src8[i]){ - log_verbose("u_strToUTF8() failed expected: \\U%08X got: \\U%08X \n", src8[i], u8Target[i]); + log_verbose("u_strToUTF8() failed expected: %04X got: %04X \n", src8[i], u8Target[i]); failed =TRUE; } } @@ -414,10 +673,14 @@ static void Test_UChar_UTF8_API(void){ { static const UChar withLead16[]={ 0x1800, 0xd89a, 0x0061 }, - withTrail16[]={ 0x1800, 0xdcba, 0x0061, 0 }; + withTrail16[]={ 0x1800, 0xdcba, 0x0061, 0 }, + withTrail16SubFFFD[]={ 0x1800, 0xfffd, 0x0061, 0 }, /* sub==U+FFFD */ + withTrail16Sub50005[]={ 0x1800, 0xd900, 0xdc05, 0x0061, 0 }; /* sub==U+50005 */ static const uint8_t withLead8[]={ 0xe1, 0xa0, 0x80, 0xed, 0xa2, 0x9a, 0x61 }, - withTrail8[]={ 0xe1, 0xa0, 0x80, 0xed, 0xb2, 0xba, 0x61 }; + withTrail8[]={ 0xe1, 0xa0, 0x80, 0xed, 0xb2, 0xba, 0x61, 0 }, + withTrail8Sub1A[]={ 0xe1, 0xa0, 0x80, 0x1a, 0x61, 0 }, /* sub==U+001A */ + withTrail8SubFFFD[]={ 0xe1, 0xa0, 0x80, 0xef, 0xbf, 0xbd, 0x61, 0 }; /* sub==U+FFFD */ UChar out16[10]; char out8[10]; @@ -429,8 +692,441 @@ static void Test_UChar_UTF8_API(void){ ) { log_err("error: u_strTo/FromUTF8(string with single surrogate) fails to report error\n"); } + + /* test error handling with substitution characters */ + + /* from UTF-8 with length */ + err=U_ZERO_ERROR; + numSubstitutions=-1; + out16[0]=0x55aa; + uDestLen=0; + u_strFromUTF8WithSub(out16, LENGTHOF(out16), &uDestLen, + (const char *)withTrail8, uprv_strlen((const char *)withTrail8), + 0x50005, &numSubstitutions, + &err); + if(U_FAILURE(err) || uDestLen!=u_strlen(withTrail16Sub50005) || + 0!=u_memcmp(withTrail16Sub50005, out16, uDestLen+1) || + numSubstitutions!=1) { + log_err("error: u_strFromUTF8WithSub(length) failed\n"); + } + + /* from UTF-8 with NUL termination */ + err=U_ZERO_ERROR; + numSubstitutions=-1; + out16[0]=0x55aa; + uDestLen=0; + u_strFromUTF8WithSub(out16, LENGTHOF(out16), &uDestLen, + (const char *)withTrail8, -1, + 0xfffd, &numSubstitutions, + &err); + if(U_FAILURE(err) || uDestLen!=u_strlen(withTrail16SubFFFD) || + 0!=u_memcmp(withTrail16SubFFFD, out16, uDestLen+1) || + numSubstitutions!=1) { + log_err("error: u_strFromUTF8WithSub(NUL termination) failed\n"); + } + + /* preflight from UTF-8 with NUL termination */ + err=U_ZERO_ERROR; + numSubstitutions=-1; + out16[0]=0x55aa; + uDestLen=0; + u_strFromUTF8WithSub(out16, 1, &uDestLen, + (const char *)withTrail8, -1, + 0x50005, &numSubstitutions, + &err); + if(err!=U_BUFFER_OVERFLOW_ERROR || uDestLen!=u_strlen(withTrail16Sub50005) || numSubstitutions!=1) { + log_err("error: u_strFromUTF8WithSub(preflight/NUL termination) failed\n"); + } + + /* to UTF-8 with length */ + err=U_ZERO_ERROR; + numSubstitutions=-1; + out8[0]=(char)0xf5; + u8DestLen=0; + u_strToUTF8WithSub(out8, LENGTHOF(out8), &u8DestLen, + withTrail16, u_strlen(withTrail16), + 0xfffd, &numSubstitutions, + &err); + if(U_FAILURE(err) || u8DestLen!=uprv_strlen((const char *)withTrail8SubFFFD) || + 0!=uprv_memcmp((const char *)withTrail8SubFFFD, out8, u8DestLen+1) || + numSubstitutions!=1) { + log_err("error: u_strToUTF8WithSub(length) failed\n"); + } + + /* to UTF-8 with NUL termination */ + err=U_ZERO_ERROR; + numSubstitutions=-1; + out8[0]=(char)0xf5; + u8DestLen=0; + u_strToUTF8WithSub(out8, LENGTHOF(out8), &u8DestLen, + withTrail16, -1, + 0x1a, &numSubstitutions, + &err); + if(U_FAILURE(err) || u8DestLen!=uprv_strlen((const char *)withTrail8Sub1A) || + 0!=uprv_memcmp((const char *)withTrail8Sub1A, out8, u8DestLen+1) || + numSubstitutions!=1) { + log_err("error: u_strToUTF8WithSub(NUL termination) failed\n"); + } + + /* preflight to UTF-8 with NUL termination */ + err=U_ZERO_ERROR; + numSubstitutions=-1; + out8[0]=(char)0xf5; + u8DestLen=0; + u_strToUTF8WithSub(out8, 1, &u8DestLen, + withTrail16, -1, + 0xfffd, &numSubstitutions, + &err); + if(err!=U_BUFFER_OVERFLOW_ERROR || u8DestLen!=uprv_strlen((const char *)withTrail8SubFFFD) || + numSubstitutions!=1) { + log_err("error: u_strToUTF8WithSub(preflight/NUL termination) failed\n"); + } + + /* test that numSubstitutions==0 if there are no substitutions */ + + /* from UTF-8 with length (just first 3 bytes which are valid) */ + err=U_ZERO_ERROR; + numSubstitutions=-1; + out16[0]=0x55aa; + uDestLen=0; + u_strFromUTF8WithSub(out16, LENGTHOF(out16), &uDestLen, + (const char *)withTrail8, 3, + 0x50005, &numSubstitutions, + &err); + if(U_FAILURE(err) || uDestLen!=1 || + 0!=u_memcmp(withTrail16Sub50005, out16, uDestLen) || + numSubstitutions!=0) { + log_err("error: u_strFromUTF8WithSub(no subs) failed\n"); + } + + /* to UTF-8 with length (just first UChar which is valid) */ + err=U_ZERO_ERROR; + numSubstitutions=-1; + out8[0]=(char)0xf5; + u8DestLen=0; + u_strToUTF8WithSub(out8, LENGTHOF(out8), &u8DestLen, + withTrail16, 1, + 0xfffd, &numSubstitutions, + &err); + if(U_FAILURE(err) || u8DestLen!=3 || + 0!=uprv_memcmp((const char *)withTrail8SubFFFD, out8, u8DestLen) || + numSubstitutions!=0) { + log_err("error: u_strToUTF8WithSub(no subs) failed\n"); + } + + /* test that numSubstitutions==0 if subchar==U_SENTINEL (no subchar) */ + + /* from UTF-8 with length (just first 3 bytes which are valid) */ + err=U_ZERO_ERROR; + numSubstitutions=-1; + out16[0]=0x55aa; + uDestLen=0; + u_strFromUTF8WithSub(out16, LENGTHOF(out16), &uDestLen, + (const char *)withTrail8, 3, + U_SENTINEL, &numSubstitutions, + &err); + if(U_FAILURE(err) || uDestLen!=1 || + 0!=u_memcmp(withTrail16Sub50005, out16, uDestLen) || + numSubstitutions!=0) { + log_err("error: u_strFromUTF8WithSub(no subchar) failed\n"); + } + + /* to UTF-8 with length (just first UChar which is valid) */ + err=U_ZERO_ERROR; + numSubstitutions=-1; + out8[0]=(char)0xf5; + u8DestLen=0; + u_strToUTF8WithSub(out8, LENGTHOF(out8), &u8DestLen, + withTrail16, 1, + U_SENTINEL, &numSubstitutions, + &err); + if(U_FAILURE(err) || u8DestLen!=3 || + 0!=uprv_memcmp((const char *)withTrail8SubFFFD, out8, u8DestLen) || + numSubstitutions!=0) { + log_err("error: u_strToUTF8WithSub(no subchar) failed\n"); + } + } +} + +/* compare if two strings are equal, but match 0xfffd in the second string with anything in the first */ +static UBool +equalAnyFFFD(const UChar *s, const UChar *t, int32_t length) { + UChar c1, c2; + + while(length>0) { + c1=*s++; + c2=*t++; + if(c1!=c2 && c2!=0xfffd) { + return FALSE; + } + --length; } + return TRUE; } + +/* test u_strFromUTF8Lenient() */ +static void +Test_FromUTF8(void) { + /* + * Test case from icu-support list 20071130 "u_strFromUTF8() returns U_INVALID_CHAR_FOUND(10)" + */ + static const uint8_t bytes[]={ 0xe0, 0xa5, 0x9c, 0 }; + UChar dest[64]; + UChar *destPointer; + int32_t destLength; + UErrorCode errorCode; + + /* 3 bytes input, one UChar output (U+095C) */ + errorCode=U_ZERO_ERROR; + destLength=-99; + destPointer=u_strFromUTF8(NULL, 0, &destLength, (const char *)bytes, 3, &errorCode); + if(errorCode!=U_BUFFER_OVERFLOW_ERROR || destPointer!=NULL || destLength!=1) { + log_err("error: u_strFromUTF8(preflight srcLength=3) fails: destLength=%ld - %s\n", + (long)destLength, u_errorName(errorCode)); + } + + /* 4 bytes input, two UChars output (U+095C U+0000) */ + errorCode=U_ZERO_ERROR; + destLength=-99; + destPointer=u_strFromUTF8(NULL, 0, &destLength, (const char *)bytes, 4, &errorCode); + if(errorCode!=U_BUFFER_OVERFLOW_ERROR || destPointer!=NULL || destLength!=2) { + log_err("error: u_strFromUTF8(preflight srcLength=4) fails: destLength=%ld - %s\n", + (long)destLength, u_errorName(errorCode)); + } + + /* NUL-terminated 3 bytes input, one UChar output (U+095C) */ + errorCode=U_ZERO_ERROR; + destLength=-99; + destPointer=u_strFromUTF8(NULL, 0, &destLength, (const char *)bytes, -1, &errorCode); + if(errorCode!=U_BUFFER_OVERFLOW_ERROR || destPointer!=NULL || destLength!=1) { + log_err("error: u_strFromUTF8(preflight srcLength=-1) fails: destLength=%ld - %s\n", + (long)destLength, u_errorName(errorCode)); + } + + /* 3 bytes input, one UChar output (U+095C), transform not just preflight */ + errorCode=U_ZERO_ERROR; + dest[0]=dest[1]=99; + destLength=-99; + destPointer=u_strFromUTF8(dest, LENGTHOF(dest), &destLength, (const char *)bytes, 3, &errorCode); + if(U_FAILURE(errorCode) || destPointer!=dest || destLength!=1 || dest[0]!=0x95c || dest[1]!=0) { + log_err("error: u_strFromUTF8(transform srcLength=3) fails: destLength=%ld - %s\n", + (long)destLength, u_errorName(errorCode)); + } +} + +/* test u_strFromUTF8Lenient() */ +static void +Test_FromUTF8Lenient(void) { + /* + * Multiple input strings, each NUL-terminated. + * Terminate with a string starting with 0xff. + */ + static const uint8_t bytes[]={ + /* well-formed UTF-8 */ + 0x61, 0xc3, 0x9f, 0xe0, 0xa0, 0x80, 0xf0, 0xa0, 0x80, 0x80, + 0x62, 0xc3, 0xa0, 0xe0, 0xa0, 0x81, 0xf0, 0xa0, 0x80, 0x81, 0, + + /* various malformed sequences */ + 0xc3, 0xc3, 0x9f, 0xc3, 0xa0, 0xe0, 0x80, 0x8a, 0xf0, 0x41, 0x42, 0x43, 0, + + /* truncated input */ + 0xc3, 0, + 0xe0, 0, + 0xe0, 0xa0, 0, + 0xf0, 0, + 0xf0, 0x90, 0, + 0xf0, 0x90, 0x80, 0, + + /* non-ASCII characters in the last few bytes */ + 0x61, 0xc3, 0x9f, 0xe0, 0xa0, 0x80, 0, + 0x61, 0xe0, 0xa0, 0x80, 0xc3, 0x9f, 0, + + /* empty string */ + 0, + + /* finish */ + 0xff, 0 + }; + + /* Multiple output strings, each NUL-terminated. 0xfffd matches anything. */ + static const UChar uchars[]={ + 0x61, 0xdf, 0x800, 0xd840, 0xdc00, + 0x62, 0xe0, 0x801, 0xd840, 0xdc01, 0, + + 0xfffd, 0x9f, 0xe0, 0xa, 0xfffd, 0xfffd, 0, + + 0xfffd, 0, + 0xfffd, 0, + 0xfffd, 0, + 0xfffd, 0, + 0xfffd, 0, + 0xfffd, 0, + + 0x61, 0xdf, 0x800, 0, + 0x61, 0x800, 0xdf, 0, + + 0, + + 0 + }; + + UChar dest[64]; + const char *pb; + const UChar *pu, *pDest; + int32_t srcLength, destLength0, destLength; + int number; + UErrorCode errorCode; + + /* verify checking for some illegal arguments */ + dest[0]=0x1234; + destLength=-1; + errorCode=U_ZERO_ERROR; + pDest=u_strFromUTF8Lenient(dest, 1, &destLength, NULL, -1, &errorCode); + if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0x1234) { + log_err("u_strFromUTF8Lenient(src=NULL) failed\n"); + } + + dest[0]=0x1234; + destLength=-1; + errorCode=U_ZERO_ERROR; + pDest=u_strFromUTF8Lenient(NULL, 1, &destLength, (const char *)bytes, -1, &errorCode); + if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { + log_err("u_strFromUTF8Lenient(dest=NULL[1]) failed\n"); + } + + dest[0]=0x1234; + destLength=-1; + errorCode=U_MEMORY_ALLOCATION_ERROR; + pDest=u_strFromUTF8Lenient(dest, 1, &destLength, (const char *)bytes, -1, &errorCode); + if(errorCode!=U_MEMORY_ALLOCATION_ERROR || dest[0]!=0x1234) { + log_err("u_strFromUTF8Lenient(U_MEMORY_ALLOCATION_ERROR) failed\n"); + } + + dest[0]=0x1234; + destLength=-1; + errorCode=U_MEMORY_ALLOCATION_ERROR; + pDest=u_strFromUTF8Lenient(dest, 1, &destLength, (const char *)bytes, -1, NULL); + if(dest[0]!=0x1234) { + log_err("u_strFromUTF8Lenient(pErrorCode=NULL) failed\n"); + } + + /* test normal behavior */ + number=0; /* string number for log_err() */ + + for(pb=(const char *)bytes, pu=uchars; + *pb!=(char)0xff; + pb+=srcLength+1, pu+=destLength0+1, ++number + ) { + srcLength=uprv_strlen(pb); + destLength0=u_strlen(pu); + + /* preflighting with NUL-termination */ + dest[0]=0x1234; + destLength=-1; + errorCode=U_ZERO_ERROR; + pDest=u_strFromUTF8Lenient(NULL, 0, &destLength, pb, -1, &errorCode); + if (errorCode!= (destLength0==0 ? U_STRING_NOT_TERMINATED_WARNING : U_BUFFER_OVERFLOW_ERROR) || + pDest!=NULL || dest[0]!=0x1234 || destLength!=destLength0 + ) { + log_err("u_strFromUTF8Lenient(%d preflighting with NUL-termination) failed\n", number); + } + + /* preflighting/some capacity with NUL-termination */ + if(srcLength>0) { + dest[destLength0-1]=0x1234; + destLength=-1; + errorCode=U_ZERO_ERROR; + pDest=u_strFromUTF8Lenient(dest, destLength0-1, &destLength, pb, -1, &errorCode); + if (errorCode!=U_BUFFER_OVERFLOW_ERROR || + dest[destLength0-1]!=0x1234 || destLength!=destLength0 + ) { + log_err("u_strFromUTF8Lenient(%d preflighting/some capacity with NUL-termination) failed\n", number); + } + } + + /* conversion with NUL-termination, much capacity */ + dest[0]=dest[destLength0]=0x1234; + destLength=-1; + errorCode=U_ZERO_ERROR; + pDest=u_strFromUTF8Lenient(dest, LENGTHOF(dest), &destLength, pb, -1, &errorCode); + if (errorCode!=U_ZERO_ERROR || + pDest!=dest || dest[destLength0]!=0 || + destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength) + ) { + log_err("u_strFromUTF8Lenient(%d conversion with NUL-termination, much capacity) failed\n", number); + } + + /* conversion with NUL-termination, exact capacity */ + dest[0]=dest[destLength0]=0x1234; + destLength=-1; + errorCode=U_ZERO_ERROR; + pDest=u_strFromUTF8Lenient(dest, destLength0, &destLength, pb, -1, &errorCode); + if (errorCode!=U_STRING_NOT_TERMINATED_WARNING || + pDest!=dest || dest[destLength0]!=0x1234 || + destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength) + ) { + log_err("u_strFromUTF8Lenient(%d conversion with NUL-termination, exact capacity) failed\n", number); + } + + /* preflighting with length */ + dest[0]=0x1234; + destLength=-1; + errorCode=U_ZERO_ERROR; + pDest=u_strFromUTF8Lenient(NULL, 0, &destLength, pb, srcLength, &errorCode); + if (errorCode!= (destLength0==0 ? U_STRING_NOT_TERMINATED_WARNING : U_BUFFER_OVERFLOW_ERROR) || + pDest!=NULL || dest[0]!=0x1234 || destLength!=srcLength + ) { + log_err("u_strFromUTF8Lenient(%d preflighting with length) failed\n", number); + } + + /* preflighting/some capacity with length */ + if(srcLength>0) { + dest[srcLength-1]=0x1234; + destLength=-1; + errorCode=U_ZERO_ERROR; + pDest=u_strFromUTF8Lenient(dest, srcLength-1, &destLength, pb, srcLength, &errorCode); + if (errorCode!=U_BUFFER_OVERFLOW_ERROR || + dest[srcLength-1]!=0x1234 || destLength!=srcLength + ) { + log_err("u_strFromUTF8Lenient(%d preflighting/some capacity with length) failed\n", number); + } + } + + /* conversion with length, much capacity */ + dest[0]=dest[destLength0]=0x1234; + destLength=-1; + errorCode=U_ZERO_ERROR; + pDest=u_strFromUTF8Lenient(dest, LENGTHOF(dest), &destLength, pb, srcLength, &errorCode); + if (errorCode!=U_ZERO_ERROR || + pDest!=dest || dest[destLength0]!=0 || + destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength) + ) { + log_err("u_strFromUTF8Lenient(%d conversion with length, much capacity) failed\n", number); + } + + /* conversion with length, srcLength capacity */ + dest[0]=dest[srcLength]=dest[destLength0]=0x1234; + destLength=-1; + errorCode=U_ZERO_ERROR; + pDest=u_strFromUTF8Lenient(dest, srcLength, &destLength, pb, srcLength, &errorCode); + if(srcLength==destLength0) { + if (errorCode!=U_STRING_NOT_TERMINATED_WARNING || + pDest!=dest || dest[destLength0]!=0x1234 || + destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength) + ) { + log_err("u_strFromUTF8Lenient(%d conversion with length, srcLength capacity/not terminated) failed\n", number); + } + } else { + if (errorCode!=U_ZERO_ERROR || + pDest!=dest || dest[destLength0]!=0 || + destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength) + ) { + log_err("u_strFromUTF8Lenient(%d conversion with length, srcLength capacity/terminated) failed\n", number); + } + } + } +} + static const uint16_t src16j[] = { 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, @@ -462,6 +1158,7 @@ static const uint16_t src16WithNulls[] = { }; static void Test_UChar_WCHART_API(void){ +#if (defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32)) || (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION) UErrorCode err = U_ZERO_ERROR; const UChar* uSrc = src16j; int32_t uSrcLen = sizeof(src16j)/2; @@ -473,6 +1170,27 @@ static void Test_UChar_WCHART_API(void){ int32_t uDestLen = 0; int i =0; { + /* Bad UErrorCode arguments. Make sure that the API doesn't crash, and that Purify doesn't complain. */ + if (u_strFromWCS(NULL,0,NULL,NULL,0,NULL) != NULL) { + log_err("u_strFromWCS() should return NULL with a bad argument\n"); + } + if (u_strToWCS(NULL,0,NULL,NULL,0,NULL) != NULL) { + log_err("u_strToWCS() should return NULL with a bad argument\n"); + } + + /* NULL source & destination. */ + err = U_ZERO_ERROR; + u_strFromWCS(NULL,0,NULL,NULL,0,&err); + if (err != U_STRING_NOT_TERMINATED_WARNING) { + log_err("u_strFromWCS(NULL, NULL) failed. Error: %s \n", u_errorName(err)); + } + err = U_ZERO_ERROR; + u_strToWCS(NULL,0,NULL,NULL,0,&err); + if (err != U_STRING_NOT_TERMINATED_WARNING) { + log_err("u_strToWCS(NULL, NULL) failed. Error: %s \n", u_errorName(err)); + } + err = U_ZERO_ERROR; + /* pre-flight*/ u_strToWCS(wDest,wDestLen,&reqLen,uSrc,uSrcLen-1,&err); @@ -542,12 +1260,13 @@ static void Test_UChar_WCHART_API(void){ u_strFromWCS(uDest, uDestLen,&reqLen,wDest,reqLen,&err); } - - for(i=0; i< uSrcLen; i++){ + if(!U_FAILURE(err)) { + for(i=0; i< uSrcLen; i++){ if(uDest[i] != src16WithNulls[i]){ log_verbose("u_str*WCS() failed for string with nulls expected: \\u%04X got: \\u%04X at index: %i \n", src16WithNulls[i] ,uDest[i],i); failed =TRUE; } + } } if(U_FAILURE(err)){ @@ -591,11 +1310,13 @@ static void Test_UChar_WCHART_API(void){ } - for(i=0; i< uSrcLen; i++){ + if(!U_FAILURE(err)) { + for(i=0; i< uSrcLen; i++){ if(uDest[i] != src16j[i]){ log_verbose("u_str*WCS() failed for null terminated string expected: \\u%04X got: \\u%04X at index: %i \n", src16j[i] ,uDest[i],i); failed =TRUE; } + } } if(U_FAILURE(err)){ @@ -648,10 +1369,14 @@ static void Test_UChar_WCHART_API(void){ u_errorName(err), wDestLen, buffer[3]); } } +#else + log_info("Not testing u_str*WCS because (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION) and wchar is neither utf16 nor utf32"); +#endif } static void Test_widestrs() { +#if (defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32)) || (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION) wchar_t ws[100]; UChar rts[100]; int32_t wcap = sizeof(ws) / sizeof(*ws); @@ -686,10 +1411,14 @@ static void Test_widestrs() if(wl != rtl){ log_err("u_strFromWCS: wcs = %S, wl = %d,rts = %s, rtl = %d!\n", wcs, wl, u_austrcpy(astr, rts), rtl); } +#else + log_info("Not testing u_str*WCS because (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION) and wchar is neither utf16 nor utf32"); +#endif } static void Test_WCHART_LongString(){ +#if (defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32)) || (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION) UErrorCode status = U_ZERO_ERROR; const char* testdatapath=loadTestData(&status); UResourceBundle *theBundle = ures_open(testdatapath, "testtypes", &status); @@ -703,8 +1432,10 @@ Test_WCHART_LongString(){ UChar* uDest = NULL; UBool failed = FALSE; + log_verbose("Loaded string of %d UChars\n", uSrcLen); + if(U_FAILURE(status)){ - log_err("Could not get testinclude resource from testtypes bundle. Error: %s\n",u_errorName(status)); + log_data_err("Could not get testinclude resource from testtypes bundle. Error: %s\n",u_errorName(status)); return; } @@ -716,22 +1447,45 @@ Test_WCHART_LongString(){ wDest =(wchar_t*) malloc(sizeof(wchar_t) * (reqLen+1)); wDestLen = reqLen+1; u_strToWCS(wDest,wDestLen,&reqLen,uSrc,-1,&status); + log_verbose("To %d*%d-byte wchar_ts\n", reqLen,sizeof(wchar_t)); } + + { + int j; + for(j=0;j>=0&&jfffd) failed - %s\n", u_errorName(errorCode)); + } + memset(dest, 0xff, sizeof(dest)); + errorCode=U_ZERO_ERROR; + length=numSubstitutions=-5; + p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length, + (const char *)invalid, LENGTHOF(invalid), + 0x50000, &numSubstitutions, &errorCode); + if( U_FAILURE(errorCode) || p!=dest || + length!=LENGTHOF(invalidExpected50000) || 0!=memcmp(dest, invalidExpected50000, length) || + dest[length]!=0 || + numSubstitutions!=LENGTHOF(invalidExpectedFFFD) /* not ...50000 */ + ) { + log_err("u_strFromJavaModifiedUTF8WithSub(invalid->50000) failed - %s\n", u_errorName(errorCode)); + } + memset(dest, 0xff, sizeof(dest)); + errorCode=U_ZERO_ERROR; + length=numSubstitutions=-5; + p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length, + (const char *)invalid, LENGTHOF(invalid), + U_SENTINEL, &numSubstitutions, &errorCode); + if(errorCode!=U_INVALID_CHAR_FOUND || dest[0]!=0xffff || numSubstitutions!=0) { + log_err("u_strFromJavaModifiedUTF8WithSub(invalid->error) failed - %s\n", u_errorName(errorCode)); + } + memset(dest, 0xff, sizeof(dest)); + errorCode=U_ZERO_ERROR; + length=numSubstitutions=-5; + p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length, + (const char *)src, LENGTHOF(src), + U_SENTINEL, &numSubstitutions, &errorCode); + if( errorCode!=U_INVALID_CHAR_FOUND || + length>=LENGTHOF(expected) || dest[LENGTHOF(expected)-1]!=0xffff || + numSubstitutions!=0 + ) { + log_err("u_strFromJavaModifiedUTF8WithSub(normal->error) failed - %s\n", u_errorName(errorCode)); + } + + /* illegal arguments */ + memset(dest, 0xff, sizeof(dest)); + errorCode=U_ZERO_ERROR; + length=numSubstitutions=-5; + p=u_strFromJavaModifiedUTF8WithSub(NULL, sizeof(dest), &length, + (const char *)src, LENGTHOF(src), + 0xfffd, &numSubstitutions, &errorCode); + if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) { + log_err("u_strFromJavaModifiedUTF8WithSub(dest=NULL) failed - %s\n", u_errorName(errorCode)); + } + memset(dest, 0xff, sizeof(dest)); + errorCode=U_ZERO_ERROR; + length=numSubstitutions=-5; + p=u_strFromJavaModifiedUTF8WithSub(dest, -1, &length, + (const char *)src, LENGTHOF(src), + 0xfffd, &numSubstitutions, &errorCode); + if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) { + log_err("u_strFromJavaModifiedUTF8WithSub(destCapacity<0) failed - %s\n", u_errorName(errorCode)); + } + memset(dest, 0xff, sizeof(dest)); + errorCode=U_ZERO_ERROR; + length=numSubstitutions=-5; + p=u_strFromJavaModifiedUTF8WithSub(dest, sizeof(dest), &length, + NULL, LENGTHOF(src), + 0xfffd, &numSubstitutions, &errorCode); + if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) { + log_err("u_strFromJavaModifiedUTF8WithSub(src=NULL) failed - %s\n", u_errorName(errorCode)); + } + memset(dest, 0xff, sizeof(dest)); + errorCode=U_ZERO_ERROR; + length=numSubstitutions=-5; + p=u_strFromJavaModifiedUTF8WithSub(dest, sizeof(dest), &length, + NULL, -1, 0xfffd, &numSubstitutions, &errorCode); + if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) { + log_err("u_strFromJavaModifiedUTF8WithSub(src=NULL, srcLength<0) failed - %s\n", u_errorName(errorCode)); + } + memset(dest, 0xff, sizeof(dest)); + errorCode=U_ZERO_ERROR; + length=numSubstitutions=-5; + p=u_strFromJavaModifiedUTF8WithSub(dest, sizeof(dest), &length, + (const char *)src, LENGTHOF(src), + 0x110000, &numSubstitutions, &errorCode); + if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) { + log_err("u_strFromJavaModifiedUTF8WithSub(subchar=U_SENTINEL) failed - %s\n", u_errorName(errorCode)); + } + memset(dest, 0xff, sizeof(dest)); + errorCode=U_ZERO_ERROR; + length=numSubstitutions=-5; + p=u_strFromJavaModifiedUTF8WithSub(dest, sizeof(dest), &length, + (const char *)src, LENGTHOF(src), + 0xdfff, &numSubstitutions, &errorCode); + if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) { + log_err("u_strFromJavaModifiedUTF8WithSub(subchar is surrogate) failed - %s\n", u_errorName(errorCode)); + } +} + +/* test that string transformation functions permit NULL source pointer when source length==0 */ +static void TestNullEmptySource() { + char dest8[4]={ 3, 3, 3, 3 }; + UChar dest16[4]={ 3, 3, 3, 3 }; + UChar32 dest32[4]={ 3, 3, 3, 3 }; +#if (defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32)) || (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION) + wchar_t destW[4]={ 3, 3, 3, 3 }; +#endif + + int32_t length; + UErrorCode errorCode; + + /* u_strFromXyz() */ + + dest16[0]=3; + length=3; + errorCode=U_ZERO_ERROR; + u_strFromUTF8(dest16, LENGTHOF(dest16), &length, NULL, 0, &errorCode); + if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) { + log_err("u_strFromUTF8(source=NULL, sourceLength=0) failed\n"); + } + + dest16[0]=3; + length=3; + errorCode=U_ZERO_ERROR; + u_strFromUTF8WithSub(dest16, LENGTHOF(dest16), &length, NULL, 0, 0xfffd, NULL, &errorCode); + if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) { + log_err("u_strFromUTF8WithSub(source=NULL, sourceLength=0) failed\n"); + } + + dest16[0]=3; + length=3; + errorCode=U_ZERO_ERROR; + u_strFromUTF8Lenient(dest16, LENGTHOF(dest16), &length, NULL, 0, &errorCode); + if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) { + log_err("u_strFromUTF8Lenient(source=NULL, sourceLength=0) failed\n"); + } + + dest16[0]=3; + length=3; + errorCode=U_ZERO_ERROR; + u_strFromUTF32(dest16, LENGTHOF(dest16), &length, NULL, 0, &errorCode); + if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) { + log_err("u_strFromUTF32(source=NULL, sourceLength=0) failed\n"); + } + + dest16[0]=3; + length=3; + errorCode=U_ZERO_ERROR; + u_strFromUTF32WithSub(dest16, LENGTHOF(dest16), &length, NULL, 0, 0xfffd, NULL, &errorCode); + if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) { + log_err("u_strFromUTF32WithSub(source=NULL, sourceLength=0) failed\n"); + } + + dest16[0]=3; + length=3; + errorCode=U_ZERO_ERROR; + u_strFromJavaModifiedUTF8WithSub(dest16, LENGTHOF(dest16), &length, NULL, 0, 0xfffd, NULL, &errorCode); + if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) { + log_err("u_strFromJavaModifiedUTF8WithSub(source=NULL, sourceLength=0) failed\n"); + } + + /* u_strToXyz() */ + + dest8[0]=3; + length=3; + errorCode=U_ZERO_ERROR; + u_strToUTF8(dest8, LENGTHOF(dest8), &length, NULL, 0, &errorCode); + if(errorCode!=U_ZERO_ERROR || length!=0 || dest8[0]!=0 || dest8[1]!=3) { + log_err("u_strToUTF8(source=NULL, sourceLength=0) failed\n"); + } + + dest8[0]=3; + length=3; + errorCode=U_ZERO_ERROR; + u_strToUTF8WithSub(dest8, LENGTHOF(dest8), &length, NULL, 0, 0xfffd, NULL, &errorCode); + if(errorCode!=U_ZERO_ERROR || length!=0 || dest8[0]!=0 || dest8[1]!=3) { + log_err("u_strToUTF8(source=NULL, sourceLength=0) failed\n"); + } + + dest32[0]=3; + length=3; + errorCode=U_ZERO_ERROR; + u_strToUTF32(dest32, LENGTHOF(dest32), &length, NULL, 0, &errorCode); + if(errorCode!=U_ZERO_ERROR || length!=0 || dest32[0]!=0 || dest32[1]!=3) { + log_err("u_strToUTF32(source=NULL, sourceLength=0) failed\n"); + } + + dest32[0]=3; + length=3; + errorCode=U_ZERO_ERROR; + u_strToUTF32WithSub(dest32, LENGTHOF(dest32), &length, NULL, 0, 0xfffd, NULL, &errorCode); + if(errorCode!=U_ZERO_ERROR || length!=0 || dest32[0]!=0 || dest32[1]!=3) { + log_err("u_strToUTF32WithSub(source=NULL, sourceLength=0) failed\n"); + } + + dest8[0]=3; + length=3; + errorCode=U_ZERO_ERROR; + u_strToJavaModifiedUTF8(dest8, LENGTHOF(dest8), &length, NULL, 0, &errorCode); + if(errorCode!=U_ZERO_ERROR || length!=0 || dest8[0]!=0 || dest8[1]!=3) { + log_err("u_strToJavaModifiedUTF8(source=NULL, sourceLength=0) failed\n"); + } + +#if (defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32)) || (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION) + + dest16[0]=3; + length=3; + errorCode=U_ZERO_ERROR; + u_strFromWCS(dest16, LENGTHOF(dest16), &length, NULL, 0, &errorCode); + if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) { + log_err("u_strFromWCS(source=NULL, sourceLength=0) failed\n"); + } + + destW[0]=3; + length=3; + errorCode=U_ZERO_ERROR; + u_strToWCS(destW, LENGTHOF(destW), &length, NULL, 0, &errorCode); + if(errorCode!=U_ZERO_ERROR || length!=0 || destW[0]!=0 || destW[1]!=3) { + log_err("u_strToWCS(source=NULL, sourceLength=0) failed\n"); + } + +#endif +}