1 ///////////////////////////////////////////////////////////////////////////////
2 // Name: tests/mbconv/main.cpp
3 // Purpose: wxMBConv unit test
4 // Author: Vadim Zeitlin, Mike Wetherell
7 // Copyright: (c) 2003 TT-Solutions, (c) 2005 Mike Wetherell
8 ///////////////////////////////////////////////////////////////////////////////
10 // ----------------------------------------------------------------------------
12 // ----------------------------------------------------------------------------
24 #include "wx/strconv.h"
25 #include "wx/string.h"
27 #if defined wxHAVE_TCHAR_SUPPORT && !defined HAVE_WCHAR_H
31 // ----------------------------------------------------------------------------
32 // Some wide character constants. "\uXXXX" escapes aren't supported by old
33 // compilers such as VC++ 5 and g++ 2.95.
34 // ----------------------------------------------------------------------------
36 wchar_t u41
[] = { 0x41, 0 };
37 wchar_t u7f
[] = { 0x7f, 0 };
39 wchar_t u80
[] = { 0x80, 0 };
40 wchar_t u391
[] = { 0x391, 0 };
41 wchar_t u7ff
[] = { 0x7ff, 0 };
43 wchar_t u800
[] = { 0x800, 0 };
44 wchar_t u2620
[] = { 0x2620, 0 };
45 wchar_t ufffd
[] = { 0xfffd, 0 };
47 #if SIZEOF_WCHAR_T == 4
48 wchar_t u10000
[] = { 0x10000, 0 };
49 wchar_t u1000a5
[] = { 0x1000a5, 0 };
50 wchar_t u10fffd
[] = { 0x10fffd, 0 };
52 wchar_t u10000
[] = { 0xd800, 0xdc00, 0 };
53 wchar_t u1000a5
[] = { 0xdbc0, 0xdca5, 0 };
54 wchar_t u10fffd
[] = { 0xdbff, 0xdffd, 0 };
57 // ----------------------------------------------------------------------------
59 // ----------------------------------------------------------------------------
61 class MBConvTestCase
: public CppUnit::TestCase
67 CPPUNIT_TEST_SUITE( MBConvTestCase
);
68 CPPUNIT_TEST( WC2CP1250
);
70 CPPUNIT_TEST( UTF8_41
);
71 CPPUNIT_TEST( UTF8_7f
);
72 CPPUNIT_TEST( UTF8_80
);
73 CPPUNIT_TEST( UTF8_c2_7f
);
74 CPPUNIT_TEST( UTF8_c2_80
);
75 CPPUNIT_TEST( UTF8_ce_91
);
76 CPPUNIT_TEST( UTF8_df_bf
);
77 CPPUNIT_TEST( UTF8_df_c0
);
78 CPPUNIT_TEST( UTF8_e0_a0_7f
);
79 CPPUNIT_TEST( UTF8_e0_a0_80
);
80 CPPUNIT_TEST( UTF8_e2_98_a0
);
81 CPPUNIT_TEST( UTF8_ef_bf_bd
);
82 CPPUNIT_TEST( UTF8_ef_bf_c0
);
83 CPPUNIT_TEST( UTF8_f0_90_80_7f
);
84 CPPUNIT_TEST( UTF8_f0_90_80_80
);
85 CPPUNIT_TEST( UTF8_f4_8f_bf_bd
);
86 CPPUNIT_TEST( UTF8PUA_f4_80_82_a5
);
87 CPPUNIT_TEST( UTF8Octal_backslash245
);
88 #endif // HAVE_WCHAR_H
89 CPPUNIT_TEST_SUITE_END();
94 // UTF-8 tests. Test the first, last and one in the middle for sequences
96 void UTF8_41() { UTF8("\x41", u41
); }
97 void UTF8_7f() { UTF8("\x7f", u7f
); }
98 void UTF8_80() { UTF8("\x80", NULL
); }
100 void UTF8_c2_7f() { UTF8("\xc2\x7f", NULL
); }
101 void UTF8_c2_80() { UTF8("\xc2\x80", u80
); }
102 void UTF8_ce_91() { UTF8("\xce\x91", u391
); }
103 void UTF8_df_bf() { UTF8("\xdf\xbf", u7ff
); }
104 void UTF8_df_c0() { UTF8("\xdf\xc0", NULL
); }
106 void UTF8_e0_a0_7f() { UTF8("\xe0\xa0\x7f", NULL
); }
107 void UTF8_e0_a0_80() { UTF8("\xe0\xa0\x80", u800
); }
108 void UTF8_e2_98_a0() { UTF8("\xe2\x98\xa0", u2620
); }
109 void UTF8_ef_bf_bd() { UTF8("\xef\xbf\xbd", ufffd
); }
110 void UTF8_ef_bf_c0() { UTF8("\xef\xbf\xc0", NULL
); }
112 void UTF8_f0_90_80_7f() { UTF8("\xf0\x90\x80\x7f", NULL
); }
113 void UTF8_f0_90_80_80() { UTF8("\xf0\x90\x80\x80", u10000
); }
114 void UTF8_f4_8f_bf_bd() { UTF8("\xf4\x8f\xbf\xbd", u10fffd
); }
116 // test 'escaping the escape characters' for the two escaping schemes
117 void UTF8PUA_f4_80_82_a5() { UTF8PUA("\xf4\x80\x82\xa5", u1000a5
); }
118 void UTF8Octal_backslash245() { UTF8Octal("\\245", L
"\\245"); }
120 // implementation for the utf-8 tests (see comments below)
121 void UTF8(const char *charSequence
, const wchar_t *wideSequence
);
122 void UTF8PUA(const char *charSequence
, const wchar_t *wideSequence
);
123 void UTF8Octal(const char *charSequence
, const wchar_t *wideSequence
);
124 void UTF8(const char *charSequence
, const wchar_t *wideSequence
, int option
);
125 #endif // HAVE_WCHAR_H
127 DECLARE_NO_COPY_CLASS(MBConvTestCase
)
130 // register in the unnamed registry so that these tests are run by default
131 CPPUNIT_TEST_SUITE_REGISTRATION( MBConvTestCase
);
133 // also include in it's own registry so that these tests can be run alone
134 CPPUNIT_TEST_SUITE_NAMED_REGISTRATION( MBConvTestCase
, "MBConvTestCase" );
136 void MBConvTestCase::WC2CP1250()
138 static const struct Data
144 { L
"hello", "hello" }, // test that it works in simplest case
145 { L
"\xBD of \xBD is \xBC", NULL
}, // this should fail as cp1250 doesn't have 1/2
148 wxCSConv
cs1250(wxFONTENCODING_CP1250
);
149 for ( size_t n
= 0; n
< WXSIZEOF(data
); n
++ )
151 const Data
& d
= data
[n
];
154 CPPUNIT_ASSERT( strcmp(cs1250
.cWC2MB(d
.wc
), d
.cp1250
) == 0 );
158 CPPUNIT_ASSERT( (const char*)cs1250
.cWC2MB(d
.wc
) == NULL
);
163 // ----------------------------------------------------------------------------
165 // ----------------------------------------------------------------------------
169 // Check that 'charSequence' translates to 'wideSequence' and back.
170 // Invalid sequences can be tested by giving NULL for 'wideSequence'. Even
171 // invalid sequences should roundtrip when an option is given and this is
174 void MBConvTestCase::UTF8(const char *charSequence
,
175 const wchar_t *wideSequence
)
177 UTF8(charSequence
, wideSequence
, wxMBConvUTF8::MAP_INVALID_UTF8_NOT
);
178 UTF8(charSequence
, wideSequence
, wxMBConvUTF8::MAP_INVALID_UTF8_TO_PUA
);
179 UTF8(charSequence
, wideSequence
, wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL
);
182 // Use this alternative when 'charSequence' contains a PUA character. Such
183 // sequences should still roundtrip ok, and this is checked.
185 void MBConvTestCase::UTF8PUA(const char *charSequence
,
186 const wchar_t *wideSequence
)
188 UTF8(charSequence
, wideSequence
, wxMBConvUTF8::MAP_INVALID_UTF8_NOT
);
189 UTF8(charSequence
, NULL
, wxMBConvUTF8::MAP_INVALID_UTF8_TO_PUA
);
190 UTF8(charSequence
, wideSequence
, wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL
);
193 // Use this alternative when 'charSequence' contains an octal escape sequence.
194 // Such sequences should still roundtrip ok, and this is checked.
196 void MBConvTestCase::UTF8Octal(const char *charSequence
,
197 const wchar_t *wideSequence
)
199 UTF8(charSequence
, wideSequence
, wxMBConvUTF8::MAP_INVALID_UTF8_NOT
);
200 UTF8(charSequence
, wideSequence
, wxMBConvUTF8::MAP_INVALID_UTF8_TO_PUA
);
201 UTF8(charSequence
, NULL
, wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL
);
204 // include the option in the error messages so it's possible to see which
206 #define UTF8ASSERT(expr) CPPUNIT_ASSERT_MESSAGE(#expr + errmsg, expr)
208 // The test implementation
210 void MBConvTestCase::UTF8(const char *charSequence
,
211 const wchar_t *wideSequence
,
214 const size_t BUFSIZE
= 128;
215 wxASSERT(strlen(charSequence
) * 3 + 10 < BUFSIZE
);
218 // include the option in the error messages so it's possible to see
220 sprintf(bytes
, " (with option == %d)", option
);
221 std::string
errmsg(bytes
);
223 // put the charSequence at the start, middle and end of a string
224 strcpy(bytes
, charSequence
);
225 strcat(bytes
, "ABC");
226 strcat(bytes
, charSequence
);
227 strcat(bytes
, "XYZ");
228 strcat(bytes
, charSequence
);
230 // translate it into wide characters
231 wxMBConvUTF8
utf8(option
);
232 wchar_t widechars
[BUFSIZE
];
233 size_t lenResult
= utf8
.MB2WC(NULL
, bytes
, 0);
234 size_t result
= utf8
.MB2WC(widechars
, bytes
, BUFSIZE
);
235 UTF8ASSERT(result
== lenResult
);
237 // check we got the expected result
239 UTF8ASSERT(result
!= (size_t)-1);
240 wxASSERT(result
< BUFSIZE
);
242 wchar_t expected
[BUFSIZE
];
243 wcscpy(expected
, wideSequence
);
244 wcscat(expected
, L
"ABC");
245 wcscat(expected
, wideSequence
);
246 wcscat(expected
, L
"XYZ");
247 wcscat(expected
, wideSequence
);
249 UTF8ASSERT(wcscmp(widechars
, expected
) == 0);
250 UTF8ASSERT(wcslen(widechars
) == result
);
253 // If 'wideSequence' is NULL, then the result is expected to be
254 // invalid. Normally that is as far as we can go, but if there is an
255 // option then the conversion should succeed anyway, and it should be
256 // possible to translate back to the original
258 UTF8ASSERT(result
== (size_t)-1);
262 UTF8ASSERT(result
!= (size_t)-1);
266 // translate it back and check we get the original
267 char bytesAgain
[BUFSIZE
];
268 size_t lenResultAgain
= utf8
.WC2MB(NULL
, widechars
, 0);
269 size_t resultAgain
= utf8
.WC2MB(bytesAgain
, widechars
, BUFSIZE
);
270 UTF8ASSERT(resultAgain
== lenResultAgain
);
271 UTF8ASSERT(resultAgain
!= (size_t)-1);
272 wxASSERT(resultAgain
< BUFSIZE
);
274 UTF8ASSERT(strcmp(bytes
, bytesAgain
) == 0);
275 UTF8ASSERT(strlen(bytesAgain
) == resultAgain
);
278 #endif // HAVE_WCHAR_H