X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/8899b155a1e4fa5f4b90e1f3bebe28088ea46bc9..61c213fe224da24ce33c9187263e75a17ca83a04:/tests/mbconv/mbconvtest.cpp diff --git a/tests/mbconv/mbconvtest.cpp b/tests/mbconv/mbconvtest.cpp index 8bb3fd878e..450e0ce183 100644 --- a/tests/mbconv/mbconvtest.cpp +++ b/tests/mbconv/mbconvtest.cpp @@ -1,10 +1,10 @@ /////////////////////////////////////////////////////////////////////////////// // Name: tests/mbconv/main.cpp // Purpose: wxMBConv unit test -// Author: Vadim Zeitlin +// Author: Vadim Zeitlin, Mike Wetherell // Created: 14.02.04 // RCS-ID: $Id$ -// Copyright: (c) 2003 TT-Solutions +// Copyright: (c) 2003 TT-Solutions, (c) 2005 Mike Wetherell /////////////////////////////////////////////////////////////////////////////// // ---------------------------------------------------------------------------- @@ -24,6 +24,36 @@ #include "wx/strconv.h" #include "wx/string.h" +#if defined wxHAVE_TCHAR_SUPPORT && !defined HAVE_WCHAR_H + #define HAVE_WCHAR_H +#endif + +// ---------------------------------------------------------------------------- +// Some wide character constants. "\uXXXX" escapes aren't supported by old +// compilers such as VC++ 5 and g++ 2.95. +// ---------------------------------------------------------------------------- + +wchar_t u41[] = { 0x41, 0 }; +wchar_t u7f[] = { 0x7f, 0 }; + +wchar_t u80[] = { 0x80, 0 }; +wchar_t u391[] = { 0x391, 0 }; +wchar_t u7ff[] = { 0x7ff, 0 }; + +wchar_t u800[] = { 0x800, 0 }; +wchar_t u2620[] = { 0x2620, 0 }; +wchar_t ufffd[] = { 0xfffd, 0 }; + +#if SIZEOF_WCHAR_T == 4 +wchar_t u10000[] = { 0x10000, 0 }; +wchar_t u1000a5[] = { 0x1000a5, 0 }; +wchar_t u10fffd[] = { 0x10fffd, 0 }; +#else +wchar_t u10000[] = { 0xd800, 0xdc00, 0 }; +wchar_t u1000a5[] = { 0xdbc0, 0xdca5, 0 }; +wchar_t u10fffd[] = { 0xdbff, 0xdffd, 0 }; +#endif + // ---------------------------------------------------------------------------- // test class // ---------------------------------------------------------------------------- @@ -36,10 +66,64 @@ public: private: CPPUNIT_TEST_SUITE( MBConvTestCase ); CPPUNIT_TEST( WC2CP1250 ); +#ifdef HAVE_WCHAR_H + CPPUNIT_TEST( UTF8_41 ); + CPPUNIT_TEST( UTF8_7f ); + CPPUNIT_TEST( UTF8_80 ); + CPPUNIT_TEST( UTF8_c2_7f ); + CPPUNIT_TEST( UTF8_c2_80 ); + CPPUNIT_TEST( UTF8_ce_91 ); + CPPUNIT_TEST( UTF8_df_bf ); + CPPUNIT_TEST( UTF8_df_c0 ); + CPPUNIT_TEST( UTF8_e0_a0_7f ); + CPPUNIT_TEST( UTF8_e0_a0_80 ); + CPPUNIT_TEST( UTF8_e2_98_a0 ); + CPPUNIT_TEST( UTF8_ef_bf_bd ); + CPPUNIT_TEST( UTF8_ef_bf_c0 ); + CPPUNIT_TEST( UTF8_f0_90_80_7f ); + CPPUNIT_TEST( UTF8_f0_90_80_80 ); + CPPUNIT_TEST( UTF8_f4_8f_bf_bd ); + CPPUNIT_TEST( UTF8PUA_f4_80_82_a5 ); + CPPUNIT_TEST( UTF8Octal_backslash245 ); +#endif // HAVE_WCHAR_H CPPUNIT_TEST_SUITE_END(); void WC2CP1250(); +#ifdef HAVE_WCHAR_H + // UTF-8 tests. Test the first, last and one in the middle for sequences + // of each length + void UTF8_41() { UTF8("\x41", u41); } + void UTF8_7f() { UTF8("\x7f", u7f); } + void UTF8_80() { UTF8("\x80", NULL); } + + void UTF8_c2_7f() { UTF8("\xc2\x7f", NULL); } + void UTF8_c2_80() { UTF8("\xc2\x80", u80); } + void UTF8_ce_91() { UTF8("\xce\x91", u391); } + void UTF8_df_bf() { UTF8("\xdf\xbf", u7ff); } + void UTF8_df_c0() { UTF8("\xdf\xc0", NULL); } + + void UTF8_e0_a0_7f() { UTF8("\xe0\xa0\x7f", NULL); } + void UTF8_e0_a0_80() { UTF8("\xe0\xa0\x80", u800); } + void UTF8_e2_98_a0() { UTF8("\xe2\x98\xa0", u2620); } + void UTF8_ef_bf_bd() { UTF8("\xef\xbf\xbd", ufffd); } + void UTF8_ef_bf_c0() { UTF8("\xef\xbf\xc0", NULL); } + + void UTF8_f0_90_80_7f() { UTF8("\xf0\x90\x80\x7f", NULL); } + void UTF8_f0_90_80_80() { UTF8("\xf0\x90\x80\x80", u10000); } + void UTF8_f4_8f_bf_bd() { UTF8("\xf4\x8f\xbf\xbd", u10fffd); } + + // test 'escaping the escape characters' for the two escaping schemes + void UTF8PUA_f4_80_82_a5() { UTF8PUA("\xf4\x80\x82\xa5", u1000a5); } + void UTF8Octal_backslash245() { UTF8Octal("\\245", L"\\245"); } + + // implementation for the utf-8 tests (see comments below) + void UTF8(const char *charSequence, const wchar_t *wideSequence); + void UTF8PUA(const char *charSequence, const wchar_t *wideSequence); + void UTF8Octal(const char *charSequence, const wchar_t *wideSequence); + void UTF8(const char *charSequence, const wchar_t *wideSequence, int option); +#endif // HAVE_WCHAR_H + DECLARE_NO_COPY_CLASS(MBConvTestCase) }; @@ -75,3 +159,120 @@ void MBConvTestCase::WC2CP1250() } } } + +// ---------------------------------------------------------------------------- +// UTF-8 tests +// ---------------------------------------------------------------------------- + +#ifdef HAVE_WCHAR_H + +// Check that 'charSequence' translates to 'wideSequence' and back. +// Invalid sequences can be tested by giving NULL for 'wideSequence'. Even +// invalid sequences should roundtrip when an option is given and this is +// checked. +// +void MBConvTestCase::UTF8(const char *charSequence, + const wchar_t *wideSequence) +{ + UTF8(charSequence, wideSequence, wxMBConvUTF8::MAP_INVALID_UTF8_NOT); + UTF8(charSequence, wideSequence, wxMBConvUTF8::MAP_INVALID_UTF8_TO_PUA); + UTF8(charSequence, wideSequence, wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL); +} + +// Use this alternative when 'charSequence' contains a PUA character. Such +// sequences should still roundtrip ok, and this is checked. +// +void MBConvTestCase::UTF8PUA(const char *charSequence, + const wchar_t *wideSequence) +{ + UTF8(charSequence, wideSequence, wxMBConvUTF8::MAP_INVALID_UTF8_NOT); + UTF8(charSequence, NULL, wxMBConvUTF8::MAP_INVALID_UTF8_TO_PUA); + UTF8(charSequence, wideSequence, wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL); +} + +// Use this alternative when 'charSequence' contains an octal escape sequence. +// Such sequences should still roundtrip ok, and this is checked. +// +void MBConvTestCase::UTF8Octal(const char *charSequence, + const wchar_t *wideSequence) +{ + UTF8(charSequence, wideSequence, wxMBConvUTF8::MAP_INVALID_UTF8_NOT); + UTF8(charSequence, wideSequence, wxMBConvUTF8::MAP_INVALID_UTF8_TO_PUA); + UTF8(charSequence, NULL, wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL); +} + +// include the option in the error messages so it's possible to see which +// test failed +#define UTF8ASSERT(expr) CPPUNIT_ASSERT_MESSAGE(#expr + errmsg, expr) + +// The test implementation +// +void MBConvTestCase::UTF8(const char *charSequence, + const wchar_t *wideSequence, + int option) +{ + const size_t BUFSIZE = 128; + wxASSERT(strlen(charSequence) * 3 + 10 < BUFSIZE); + char bytes[BUFSIZE]; + + // include the option in the error messages so it's possible to see + // which test failed + sprintf(bytes, " (with option == %d)", option); + std::string errmsg(bytes); + + // put the charSequence at the start, middle and end of a string + strcpy(bytes, charSequence); + strcat(bytes, "ABC"); + strcat(bytes, charSequence); + strcat(bytes, "XYZ"); + strcat(bytes, charSequence); + + // translate it into wide characters + wxMBConvUTF8 utf8(option); + wchar_t widechars[BUFSIZE]; + size_t lenResult = utf8.MB2WC(NULL, bytes, 0); + size_t result = utf8.MB2WC(widechars, bytes, BUFSIZE); + UTF8ASSERT(result == lenResult); + + // check we got the expected result + if (wideSequence) { + UTF8ASSERT(result != (size_t)-1); + wxASSERT(result < BUFSIZE); + + wchar_t expected[BUFSIZE]; + wcscpy(expected, wideSequence); + wcscat(expected, L"ABC"); + wcscat(expected, wideSequence); + wcscat(expected, L"XYZ"); + wcscat(expected, wideSequence); + + UTF8ASSERT(wcscmp(widechars, expected) == 0); + UTF8ASSERT(wcslen(widechars) == result); + } + else { + // If 'wideSequence' is NULL, then the result is expected to be + // invalid. Normally that is as far as we can go, but if there is an + // option then the conversion should succeed anyway, and it should be + // possible to translate back to the original + if (!option) { + UTF8ASSERT(result == (size_t)-1); + return; + } + else { + UTF8ASSERT(result != (size_t)-1); + } + } + + // translate it back and check we get the original + char bytesAgain[BUFSIZE]; + size_t lenResultAgain = utf8.WC2MB(NULL, widechars, 0); + size_t resultAgain = utf8.WC2MB(bytesAgain, widechars, BUFSIZE); + UTF8ASSERT(resultAgain == lenResultAgain); + UTF8ASSERT(resultAgain != (size_t)-1); + wxASSERT(resultAgain < BUFSIZE); + + UTF8ASSERT(strcmp(bytes, bytesAgain) == 0); + UTF8ASSERT(strlen(bytesAgain) == resultAgain); +} + +#endif // HAVE_WCHAR_H