1 ///////////////////////////////////////////////////////////////////////////////
2 // Name: tests/strings/unicode.cpp
3 // Purpose: Unicode unit test
4 // Author: Vadim Zeitlin, Wlodzimierz ABX Skiba
7 // Copyright: (c) 2004 Vadim Zeitlin, Wlodzimierz Skiba
8 ///////////////////////////////////////////////////////////////////////////////
10 // ----------------------------------------------------------------------------
12 // ----------------------------------------------------------------------------
23 // ----------------------------------------------------------------------------
25 // ----------------------------------------------------------------------------
27 class UnicodeTestCase
: public CppUnit::TestCase
33 CPPUNIT_TEST_SUITE( UnicodeTestCase
);
34 CPPUNIT_TEST( ToFromAscii
);
36 CPPUNIT_TEST( ConstructorsWithConversion
);
37 CPPUNIT_TEST( Conversion
);
38 CPPUNIT_TEST( ConversionUTF7
);
39 CPPUNIT_TEST( ConversionUTF8
);
40 #endif // wxUSE_WCHAR_T
41 CPPUNIT_TEST_SUITE_END();
45 void ConstructorsWithConversion();
47 void ConversionUTF7();
48 void ConversionUTF8();
50 // test if converting s using the given encoding gives ws and vice versa
52 // if either of the first 2 arguments is NULL, the conversion is supposed
54 void DoTestConversion(const char *s
, const wchar_t *w
, wxCSConv
& conv
);
55 #endif // wxUSE_WCHAR_T
58 DECLARE_NO_COPY_CLASS(UnicodeTestCase
)
61 // register in the unnamed registry so that these tests are run by default
62 CPPUNIT_TEST_SUITE_REGISTRATION( UnicodeTestCase
);
64 // also include in it's own registry so that these tests can be run alone
65 CPPUNIT_TEST_SUITE_NAMED_REGISTRATION( UnicodeTestCase
, "UnicodeTestCase" );
67 UnicodeTestCase::UnicodeTestCase()
71 void UnicodeTestCase::ToFromAscii()
74 #define TEST_TO_FROM_ASCII(txt) \
76 static const char *msg = txt; \
77 wxString s = wxString::FromAscii(msg); \
78 CPPUNIT_ASSERT( strcmp( s.ToAscii() , msg ) == 0 ); \
81 TEST_TO_FROM_ASCII( "Hello, world!" );
82 TEST_TO_FROM_ASCII( "additional \" special \t test \\ component \n :-)" );
86 void UnicodeTestCase::ConstructorsWithConversion()
88 // the string "Déjà" in UTF-8 and wchar_t:
89 const unsigned char utf8Buf
[] = {0x44,0xC3,0xA9,0x6A,0xC3,0xA0,0};
90 const wchar_t wchar
[] = {0x44,0xE9,0x6A,0xE0,0};
91 const unsigned char utf8subBuf
[] = {0x44,0xC3,0xA9,0x6A,0}; // just "Déj"
92 const char *utf8
= (char *)utf8Buf
;
93 const char *utf8sub
= (char *)utf8subBuf
;
95 wxString
s1(utf8
, wxConvUTF8
);
96 wxString
s2(wchar
, wxConvUTF8
);
99 CPPUNIT_ASSERT( s1
== wchar
);
100 CPPUNIT_ASSERT( s2
== wchar
);
102 CPPUNIT_ASSERT( s1
== utf8
);
103 CPPUNIT_ASSERT( s2
== utf8
);
106 wxString
sub(utf8sub
, wxConvUTF8
); // "Dej" substring
107 wxString
s3(utf8
, wxConvUTF8
, 4);
108 wxString
s4(wchar
, wxConvUTF8
, 3);
110 CPPUNIT_ASSERT( s3
== sub
);
111 CPPUNIT_ASSERT( s4
== sub
);
114 CPPUNIT_ASSERT ( wxString("\t[pl]open.format.Sformatuj dyskietkê=gfloppy %f",
115 wxConvUTF8
) == wxT("") ); //should stop at pos 35
119 void UnicodeTestCase::Conversion()
122 wxString
szTheString(L
"The\0String", wxConvLibc
, 10);
123 wxCharBuffer theBuffer
= szTheString
.mb_str();
125 CPPUNIT_ASSERT( memcmp(theBuffer
.data(), "The\0String", 11) == 0 );
127 wxString
szTheString2("The\0String", wxConvLocal
, 10);
128 CPPUNIT_ASSERT( szTheString2
.length() == 11 );
129 CPPUNIT_ASSERT( wxTmemcmp(szTheString2
.c_str(), L
"The\0String", 11) == 0 );
131 wxString
szTheString(wxT("TheString"));
132 szTheString
.insert(3, 1, '\0');
133 wxWCharBuffer theBuffer
= szTheString
.wc_str(wxConvLibc
);
135 CPPUNIT_ASSERT( memcmp(theBuffer
.data(), L
"The\0String", 11 * sizeof(wchar_t)) == 0 );
137 wxString
szLocalTheString(wxT("TheString"));
138 szLocalTheString
.insert(3, 1, '\0');
139 wxWCharBuffer theLocalBuffer
= szLocalTheString
.wc_str(wxConvLocal
);
141 CPPUNIT_ASSERT( memcmp(theLocalBuffer
.data(), L
"The\0String", 11 * sizeof(wchar_t)) == 0 );
146 // in case wcscmp is missing
148 static int wx_wcscmp(const wchar_t *s1
, const wchar_t *s2
)
150 while (*s1
== *s2
&& *s1
!= 0)
160 UnicodeTestCase::DoTestConversion(const char *s
,
167 wxCharBuffer
buf(wxString(ws
).mb_str(conv
));
169 CPPUNIT_ASSERT( strcmp(buf
, s
) == 0 );
171 #else // wxUSE_UNICODE
174 wxWCharBuffer
wbuf(wxString(s
).wc_str(conv
));
177 CPPUNIT_ASSERT( wx_wcscmp(wbuf
, ws
) == 0 );
179 CPPUNIT_ASSERT( !*wbuf
);
181 #endif // wxUSE_UNICODE/!wxUSE_UNICODE
184 struct StringConversionData
190 void UnicodeTestCase::ConversionUTF7()
192 static const StringConversionData utf7data
[] =
196 //\u isn't recognized on MSVC 6
197 #if !defined(_MSC_VER)
198 #if !defined(__GNUC__) || (__GNUC__ >= 3)
199 { "+AKM-", L
"\u00a3" },
202 // Windows accepts invalid UTF-7 strings and so does our UTF-7
203 // conversion code -- this is wrong IMO but the way it is for now
205 // notice that converting "+" still behaves as expected because the
206 // result is just an empty string, i.e. the same as if there were an
207 // error, but converting "a+" results in "a" while it really should
213 wxCSConv
conv(_T("utf-7"));
214 for ( size_t n
= 0; n
< WXSIZEOF(utf7data
); n
++ )
216 const StringConversionData
& d
= utf7data
[n
];
217 DoTestConversion(d
.str
, d
.wcs
, conv
);
221 void UnicodeTestCase::ConversionUTF8()
223 static const StringConversionData utf8data
[] =
225 //\u isn't recognized on MSVC 6
226 #if !defined(_MSC_VER)
227 #if !defined(__GNUC__) || (__GNUC__ >= 3)
228 { "\xc2\xa3", L
"\u00a3" },
234 wxCSConv
conv(_T("utf-8"));
235 for ( size_t n
= 0; n
< WXSIZEOF(utf8data
); n
++ )
237 const StringConversionData
& d
= utf8data
[n
];
238 DoTestConversion(d
.str
, d
.wcs
, conv
);
242 #endif // wxUSE_WCHAR_T