]> git.saurik.com Git - wxWidgets.git/blame - tests/strings/unicode.cpp
wxMessageBox off the main thread lost result code.
[wxWidgets.git] / tests / strings / unicode.cpp
CommitLineData
387f829e
VS
1///////////////////////////////////////////////////////////////////////////////
2// Name: tests/strings/unicode.cpp
3// Purpose: Unicode unit test
4// Author: Vadim Zeitlin, Wlodzimierz ABX Skiba
5// Created: 2004-04-28
387f829e
VS
6// Copyright: (c) 2004 Vadim Zeitlin, Wlodzimierz Skiba
7///////////////////////////////////////////////////////////////////////////////
8
9// ----------------------------------------------------------------------------
10// headers
11// ----------------------------------------------------------------------------
12
8899b155 13#include "testprec.h"
387f829e
VS
14
15#ifdef __BORLANDC__
16 #pragma hdrstop
17#endif
18
19#ifndef WX_PRECOMP
31c06391 20 #include "wx/wx.h"
387f829e
VS
21#endif // WX_PRECOMP
22
210bfffb
FM
23#include "wx/encconv.h"
24
25// ----------------------------------------------------------------------------
42e8b52f 26// helper class holding the matching MB and WC strings
210bfffb
FM
27// ----------------------------------------------------------------------------
28
42e8b52f
VZ
29struct StringConversionData
30{
210bfffb
FM
31 // either str or wcs (but not both) may be NULL, this means that the conversion
32 // to it should fail
527587d3
VZ
33 StringConversionData(const char *str_, const wchar_t *wcs_, int flags_ = 0)
34 : str(str_), wcs(wcs_), flags(flags_)
35 {
36 }
37
38 const char * const str;
39 const wchar_t * const wcs;
42e8b52f
VZ
40
41 enum
42 {
43 TEST_BOTH = 0, // test both str -> wcs and wcs -> str
44 ONLY_MB2WC = 1 // only test str -> wcs conversion
45 };
46
527587d3 47 const int flags;
42e8b52f
VZ
48
49 // test that the conversion between str and wcs (subject to flags) succeeds
50 //
51 // the first argument is the index in the test array and is used solely for
52 // diagnostics
53 void Test(size_t n, wxMBConv& conv) const
54 {
55 if ( str )
56 {
57 wxWCharBuffer wbuf = conv.cMB2WC(str);
58
59 if ( wcs )
60 {
61 CPPUNIT_ASSERT_MESSAGE
62 (
63 Message(n, "MB2WC failed"),
64 wbuf.data()
65 );
66
67 CPPUNIT_ASSERT_MESSAGE
68 (
69 Message(n, "MB2WC", wbuf, wcs),
70 wxStrcmp(wbuf, wcs) == 0
71 );
72 }
73 else // conversion is supposed to fail
74 {
75 CPPUNIT_ASSERT_MESSAGE
76 (
77 Message(n, "MB2WC succeeded"),
78 !wbuf.data()
79 );
80 }
81 }
82
83 if ( wcs && !(flags & ONLY_MB2WC) )
84 {
85 wxCharBuffer buf = conv.cWC2MB(wcs);
86
87 if ( str )
88 {
89 CPPUNIT_ASSERT_MESSAGE
90 (
91 Message(n, "WC2MB failed"),
92 buf.data()
93 );
94
95 CPPUNIT_ASSERT_MESSAGE
96 (
97 Message(n, "WC2MB", buf, str),
98 strcmp(buf, str) == 0
99 );
100 }
101 else
102 {
103 CPPUNIT_ASSERT_MESSAGE
104 (
105 Message(n, "WC2MB succeeded"),
106 !buf.data()
107 );
108 }
109 }
110 }
111
112private:
113 static std::string
114 Message(size_t n, const wxString& msg)
115 {
116 return std::string(wxString::Format("#%lu: %s", (unsigned long)n, msg));
117 }
118
119 template <typename T>
120 static std::string
121 Message(size_t n,
122 const char *func,
123 const wxCharTypeBuffer<T>& actual,
124 const T *expected)
125 {
126 return Message(n,
127 wxString::Format("%s returned \"%s\", expected \"%s\"",
128 func, actual.data(), expected));
129 }
130};
131
387f829e
VS
132// ----------------------------------------------------------------------------
133// test class
134// ----------------------------------------------------------------------------
135
136class UnicodeTestCase : public CppUnit::TestCase
137{
138public:
139 UnicodeTestCase();
140
141private:
142 CPPUNIT_TEST_SUITE( UnicodeTestCase );
143 CPPUNIT_TEST( ToFromAscii );
a65ca3e6 144 CPPUNIT_TEST( ConstructorsWithConversion );
bbb0ff36 145 CPPUNIT_TEST( ConversionFixed );
5975f198 146 CPPUNIT_TEST( ConversionWithNULs );
a65ca3e6
VZ
147 CPPUNIT_TEST( ConversionUTF7 );
148 CPPUNIT_TEST( ConversionUTF8 );
5975f198 149 CPPUNIT_TEST( ConversionUTF16 );
a7823b26 150 CPPUNIT_TEST( ConversionUTF32 );
0f0298b1 151 CPPUNIT_TEST( IsConvOk );
b0c4d5d7
VS
152#if wxUSE_UNICODE
153 CPPUNIT_TEST( Iteration );
154#endif
387f829e
VS
155 CPPUNIT_TEST_SUITE_END();
156
157 void ToFromAscii();
a65ca3e6 158 void ConstructorsWithConversion();
bbb0ff36 159 void ConversionFixed();
5975f198 160 void ConversionWithNULs();
a65ca3e6
VZ
161 void ConversionUTF7();
162 void ConversionUTF8();
5975f198 163 void ConversionUTF16();
a7823b26 164 void ConversionUTF32();
0f0298b1 165 void IsConvOk();
b0c4d5d7
VS
166#if wxUSE_UNICODE
167 void Iteration();
168#endif
a65ca3e6 169
387f829e
VS
170 DECLARE_NO_COPY_CLASS(UnicodeTestCase)
171};
172
173// register in the unnamed registry so that these tests are run by default
174CPPUNIT_TEST_SUITE_REGISTRATION( UnicodeTestCase );
175
e3778b4d 176// also include in its own registry so that these tests can be run alone
81e9dec6 177CPPUNIT_TEST_SUITE_NAMED_REGISTRATION( UnicodeTestCase, "UnicodeTestCase" );
387f829e
VS
178
179UnicodeTestCase::UnicodeTestCase()
180{
181}
182
183void UnicodeTestCase::ToFromAscii()
184{
185
186#define TEST_TO_FROM_ASCII(txt) \
187 { \
188 static const char *msg = txt; \
189 wxString s = wxString::FromAscii(msg); \
190 CPPUNIT_ASSERT( strcmp( s.ToAscii() , msg ) == 0 ); \
191 }
192
193 TEST_TO_FROM_ASCII( "Hello, world!" );
194 TEST_TO_FROM_ASCII( "additional \" special \t test \\ component \n :-)" );
195}
196
a65ca3e6
VZ
197void UnicodeTestCase::ConstructorsWithConversion()
198{
4bc9acbe 199 // the string "Déjà" in UTF-8 and wchar_t:
a65ca3e6 200 const unsigned char utf8Buf[] = {0x44,0xC3,0xA9,0x6A,0xC3,0xA0,0};
4bc9acbe 201 const unsigned char utf8subBuf[] = {0x44,0xC3,0xA9,0x6A,0}; // just "Déj"
a65ca3e6
VZ
202 const char *utf8 = (char *)utf8Buf;
203 const char *utf8sub = (char *)utf8subBuf;
204
205 wxString s1(utf8, wxConvUTF8);
a65ca3e6
VZ
206
207#if wxUSE_UNICODE
ae431935 208 const wchar_t wchar[] = {0x44,0xE9,0x6A,0xE0,0};
1de532f5 209 CPPUNIT_ASSERT_EQUAL( wchar, s1 );
ae431935
VZ
210
211 wxString s2(wchar);
1de532f5 212 CPPUNIT_ASSERT_EQUAL( wchar, s2 );
727e8d84 213 CPPUNIT_ASSERT_EQUAL( wxString::FromUTF8(utf8), s2 );
a65ca3e6 214#else
1de532f5 215 CPPUNIT_ASSERT_EQUAL( utf8, s1 );
a65ca3e6
VZ
216#endif
217
218 wxString sub(utf8sub, wxConvUTF8); // "Dej" substring
219 wxString s3(utf8, wxConvUTF8, 4);
fa0584f1 220 CPPUNIT_ASSERT_EQUAL( sub, s3 );
a65ca3e6
VZ
221
222#if wxUSE_UNICODE
ae431935
VZ
223 wxString s4(wchar, wxConvUTF8, 3);
224 CPPUNIT_ASSERT_EQUAL( sub, s4 );
225
fa0584f1 226 // conversion should stop with failure at pos 35
4bc9acbe 227 wxString s("\t[pl]open.format.Sformatuj dyskietk\xea=gfloppy %f", wxConvUTF8);
fa0584f1 228 CPPUNIT_ASSERT( s.empty() );
ae431935 229#endif // wxUSE_UNICODE
d7330233
VS
230
231
232 // test using Unicode strings together with char* strings (this must work
233 // in ANSI mode as well, of course):
234 wxString s5("ascii");
1de532f5 235 CPPUNIT_ASSERT_EQUAL( "ascii", s5 );
d7330233
VS
236
237 s5 += " value";
238
239 CPPUNIT_ASSERT( strcmp(s5.mb_str(), "ascii value") == 0 );
1de532f5 240 CPPUNIT_ASSERT_EQUAL( "ascii value", s5 );
d7330233 241 CPPUNIT_ASSERT( s5 != "SomethingElse" );
a65ca3e6
VZ
242}
243
bbb0ff36 244void UnicodeTestCase::ConversionFixed()
85d3e5a9
VZ
245{
246 size_t len;
247
248#if wxUSE_UNICODE
93a800a9 249 wxConvLibc.cWC2MB(L"", 0, &len);
85d3e5a9 250#else // !wxUSE_UNICODE
93a800a9 251 wxConvLibc.cMB2WC("", 0, &len);
85d3e5a9
VZ
252#endif // wxUSE_UNICODE/!wxUSE_UNICODE
253
93a800a9 254 CPPUNIT_ASSERT_EQUAL( 0, len );
bbb0ff36
VZ
255
256#if wxUSE_UNICODE
257 // check that when we convert a fixed number of characters we obtain the
258 // expected return value
259 CPPUNIT_ASSERT_EQUAL( 0, wxConvLibc.ToWChar(NULL, 0, "", 0) );
260 CPPUNIT_ASSERT_EQUAL( 1, wxConvLibc.ToWChar(NULL, 0, "x", 1) );
261 CPPUNIT_ASSERT_EQUAL( 2, wxConvLibc.ToWChar(NULL, 0, "x", 2) );
262 CPPUNIT_ASSERT_EQUAL( 2, wxConvLibc.ToWChar(NULL, 0, "xy", 2) );
263#endif // wxUSE_UNICODE
85d3e5a9
VZ
264}
265
5975f198 266void UnicodeTestCase::ConversionWithNULs()
a65ca3e6
VZ
267{
268#if wxUSE_UNICODE
85d3e5a9 269 static const size_t lenNulString = 10;
7ce0c58f 270
85d3e5a9
VZ
271 wxString szTheString(L"The\0String", wxConvLibc, lenNulString);
272 wxCharBuffer theBuffer = szTheString.mb_str();
a65ca3e6 273
85d3e5a9
VZ
274 CPPUNIT_ASSERT( memcmp(theBuffer.data(), "The\0String",
275 lenNulString + 1) == 0 );
a65ca3e6 276
85d3e5a9
VZ
277 wxString szTheString2("The\0String", wxConvLocal, lenNulString);
278 CPPUNIT_ASSERT_EQUAL( lenNulString, szTheString2.length() );
279 CPPUNIT_ASSERT( wxTmemcmp(szTheString2.c_str(), L"The\0String",
280 lenNulString + 1) == 0 );
2877b828 281#else // !wxUSE_UNICODE
ae431935 282 wxString szTheString("TheString");
85d3e5a9
VZ
283 szTheString.insert(3, 1, '\0');
284 wxWCharBuffer theBuffer = szTheString.wc_str(wxConvLibc);
a65ca3e6 285
85d3e5a9 286 CPPUNIT_ASSERT( memcmp(theBuffer.data(), L"The\0String", 11 * sizeof(wchar_t)) == 0 );
a65ca3e6 287
ae431935 288 wxString szLocalTheString("TheString");
85d3e5a9
VZ
289 szLocalTheString.insert(3, 1, '\0');
290 wxWCharBuffer theLocalBuffer = szLocalTheString.wc_str(wxConvLocal);
a65ca3e6 291
85d3e5a9 292 CPPUNIT_ASSERT( memcmp(theLocalBuffer.data(), L"The\0String", 11 * sizeof(wchar_t)) == 0 );
2877b828 293#endif // wxUSE_UNICODE/!wxUSE_UNICODE
a65ca3e6
VZ
294}
295
a65ca3e6
VZ
296void UnicodeTestCase::ConversionUTF7()
297{
298 static const StringConversionData utf7data[] =
299 {
ae431935 300 // normal fragments
527587d3
VZ
301 StringConversionData("+AKM-", L"\xa3"),
302 StringConversionData("+AOk-t+AOk-", L"\xe9t\xe9"),
ae431935 303
42e8b52f 304 // this one is an alternative valid encoding of the same string
527587d3
VZ
305 StringConversionData("+AOk-t+AOk", L"\xe9t\xe9",
306 StringConversionData::ONLY_MB2WC),
42e8b52f 307
ae431935 308 // some special cases
527587d3
VZ
309 StringConversionData("+-", L"+"),
310 StringConversionData("+--", L"+-"),
8da7a00a 311
8da7a00a 312 // the following are invalid UTF-7 sequences
527587d3
VZ
313 StringConversionData("\xa3", NULL),
314 StringConversionData("+", NULL),
315 StringConversionData("+~", NULL),
316 StringConversionData("a+", NULL),
a65ca3e6
VZ
317 };
318
a65ca3e6
VZ
319 for ( size_t n = 0; n < WXSIZEOF(utf7data); n++ )
320 {
321 const StringConversionData& d = utf7data[n];
b901ac2c
VZ
322
323 // converting to/from UTF-7 using iconv() currently doesn't work
324 // because of several problems:
325 // - GetMBNulLen() doesn't return correct result (iconv converts L'\0'
326 // to an incomplete and anyhow nonsensical "+AA" string)
327 // - iconv refuses to convert "+-" (although it converts "+-\n" just
328 // fine, go figure)
329 //
330 // I have no idea how to fix this so just disable the test for now
331#if 0
42e8b52f 332 d.Test(n, wxCSConv("utf-7"));
b901ac2c 333#endif
42e8b52f 334 d.Test(n, wxConvUTF7);
a65ca3e6
VZ
335 }
336}
337
338void UnicodeTestCase::ConversionUTF8()
339{
340 static const StringConversionData utf8data[] =
341 {
8da7a00a 342#ifdef wxHAVE_U_ESCAPE
527587d3 343 StringConversionData("\xc2\xa3", L"\u00a3"),
a65ca3e6 344#endif
527587d3 345 StringConversionData("\xc2", NULL),
a65ca3e6
VZ
346 };
347
9a83f860 348 wxCSConv conv(wxT("utf-8"));
a65ca3e6
VZ
349 for ( size_t n = 0; n < WXSIZEOF(utf8data); n++ )
350 {
351 const StringConversionData& d = utf8data[n];
42e8b52f
VZ
352 d.Test(n, conv);
353 d.Test(n, wxConvUTF8);
a65ca3e6
VZ
354 }
355}
356
5975f198
VZ
357void UnicodeTestCase::ConversionUTF16()
358{
359 static const StringConversionData utf16data[] =
360 {
361#ifdef wxHAVE_U_ESCAPE
527587d3
VZ
362 StringConversionData(
363 "\x04\x1f\x04\x40\x04\x38\x04\x32\x04\x35\x04\x42\0\0",
364 L"\u041f\u0440\u0438\u0432\u0435\u0442"),
365 StringConversionData(
366 "\x01\0\0b\x01\0\0a\x01\0\0r\0\0",
367 L"\u0100b\u0100a\u0100r"),
5975f198 368#endif
527587d3 369 StringConversionData("\0f\0o\0o\0\0", L"foo"),
5975f198
VZ
370 };
371
372 wxCSConv conv(wxFONTENCODING_UTF16BE);
373 for ( size_t n = 0; n < WXSIZEOF(utf16data); n++ )
374 {
375 const StringConversionData& d = utf16data[n];
42e8b52f 376 d.Test(n, conv);
5975f198 377 }
2877b828
VZ
378
379 // special case: this string has consecutive NULs inside it which don't
380 // terminate the string, this exposed a bug in our conversion code which
381 // got confused in this case
382 size_t len;
93a800a9
VZ
383 conv.cMB2WC("\x01\0\0B\0C" /* A macron BC */, 6, &len);
384 CPPUNIT_ASSERT_EQUAL( 3, len );
5975f198
VZ
385}
386
a7823b26
VZ
387void UnicodeTestCase::ConversionUTF32()
388{
389 static const StringConversionData utf32data[] =
390 {
391#ifdef wxHAVE_U_ESCAPE
527587d3 392 StringConversionData(
72b2fc5c 393 "\0\0\x04\x1f\0\0\x04\x40\0\0\x04\x38\0\0\x04\x32\0\0\x04\x35\0\0\x04\x42\0\0\0\0",
527587d3 394 L"\u041f\u0440\u0438\u0432\u0435\u0442"),
a7823b26 395#endif
527587d3 396 StringConversionData("\0\0\0f\0\0\0o\0\0\0o\0\0\0\0", L"foo"),
a7823b26
VZ
397 };
398
399 wxCSConv conv(wxFONTENCODING_UTF32BE);
400 for ( size_t n = 0; n < WXSIZEOF(utf32data); n++ )
401 {
402 const StringConversionData& d = utf32data[n];
42e8b52f 403 d.Test(n, conv);
a7823b26
VZ
404 }
405
406 size_t len;
93a800a9
VZ
407 conv.cMB2WC("\0\0\x01\0\0\0\0B\0\0\0C" /* A macron BC */, 12, &len);
408 CPPUNIT_ASSERT_EQUAL( 3, len );
a7823b26
VZ
409}
410
0f0298b1
VZ
411void UnicodeTestCase::IsConvOk()
412{
413 CPPUNIT_ASSERT( wxCSConv(wxFONTENCODING_SYSTEM).IsOk() );
e3276230
VZ
414 CPPUNIT_ASSERT( wxCSConv("US-ASCII").IsOk() );
415 CPPUNIT_ASSERT( wxCSConv("UTF-8").IsOk() );
416 CPPUNIT_ASSERT( !wxCSConv("NoSuchConversion").IsOk() );
0f0298b1
VZ
417
418#ifdef __WINDOWS__
e3276230 419 CPPUNIT_ASSERT( wxCSConv("WINDOWS-437").IsOk() );
0f0298b1
VZ
420#endif
421}
422
b0c4d5d7
VS
423#if wxUSE_UNICODE
424void UnicodeTestCase::Iteration()
425{
426 // "czech" in Czech ("cestina"):
427 static const char *textUTF8 = "\304\215e\305\241tina";
428 static const wchar_t textUTF16[] = {0x10D, 0x65, 0x161, 0x74, 0x69, 0x6E, 0x61, 0};
429
430 wxString text(wxString::FromUTF8(textUTF8));
431 CPPUNIT_ASSERT( wxStrcmp(text.wc_str(), textUTF16) == 0 );
432
433 // verify the string was decoded correctly:
434 {
435 size_t idx = 0;
436 for ( wxString::const_iterator i = text.begin(); i != text.end(); ++i, ++idx )
437 {
438 CPPUNIT_ASSERT( *i == textUTF16[idx] );
439 }
440 }
441
442 // overwrite the string with something that is shorter in UTF-8:
443 {
444 for ( wxString::iterator i = text.begin(); i != text.end(); ++i )
445 *i = 'x';
446 }
447
448 // restore the original text now:
449 {
450 wxString::iterator end1 = text.end();
451 wxString::const_iterator end2 = text.end();
452
453 size_t idx = 0;
454 for ( wxString::iterator i = text.begin(); i != text.end(); ++i, ++idx )
455 {
456 *i = textUTF16[idx];
457
458 CPPUNIT_ASSERT( end1 == text.end() );
459 CPPUNIT_ASSERT( end2 == text.end() );
460 }
461
462 CPPUNIT_ASSERT( end1 == text.end() );
463 CPPUNIT_ASSERT( end2 == text.end() );
464 }
465
466 // and verify it again:
467 {
468 size_t idx = 0;
469 for ( wxString::const_iterator i = text.begin(); i != text.end(); ++i, ++idx )
470 {
471 CPPUNIT_ASSERT( *i == textUTF16[idx] );
472 }
473 }
474}
475#endif // wxUSE_UNICODE
210bfffb 476