1 /////////////////////////////////////////////////////////////////////////////// 
   2 // Name:        tests/strings/unicode.cpp 
   3 // Purpose:     Unicode unit test 
   4 // Author:      Vadim Zeitlin, Wlodzimierz ABX Skiba 
   7 // Copyright:   (c) 2004 Vadim Zeitlin, Wlodzimierz Skiba 
   8 /////////////////////////////////////////////////////////////////////////////// 
  10 // ---------------------------------------------------------------------------- 
  12 // ---------------------------------------------------------------------------- 
  23 // ---------------------------------------------------------------------------- 
  25 // ---------------------------------------------------------------------------- 
  27 #if wxUSE_WCHAR_T && !wxUSE_UNICODE 
  29 // in case wcscmp is missing 
  30 static int wx_wcscmp(const wchar_t *s1
, const wchar_t *s2
) 
  32     while (*s1 
== *s2 
&& *s1 
!= 0) 
  40 #endif // wxUSE_WCHAR_T && !wxUSE_UNICODE 
  42 // ---------------------------------------------------------------------------- 
  44 // ---------------------------------------------------------------------------- 
  46 class UnicodeTestCase 
: public CppUnit::TestCase
 
  52     CPPUNIT_TEST_SUITE( UnicodeTestCase 
); 
  53         CPPUNIT_TEST( ToFromAscii 
); 
  55         CPPUNIT_TEST( ConstructorsWithConversion 
); 
  56         CPPUNIT_TEST( ConversionEmpty 
); 
  57         CPPUNIT_TEST( ConversionWithNULs 
); 
  58         CPPUNIT_TEST( ConversionUTF7 
); 
  59         CPPUNIT_TEST( ConversionUTF8 
); 
  60         CPPUNIT_TEST( ConversionUTF16 
); 
  61         CPPUNIT_TEST( ConversionUTF32 
); 
  62         CPPUNIT_TEST( IsConvOk 
); 
  63 #endif // wxUSE_WCHAR_T 
  64     CPPUNIT_TEST_SUITE_END(); 
  68     void ConstructorsWithConversion(); 
  69     void ConversionEmpty(); 
  70     void ConversionWithNULs(); 
  71     void ConversionUTF7(); 
  72     void ConversionUTF8(); 
  73     void ConversionUTF16(); 
  74     void ConversionUTF32(); 
  77     // test if converting s using the given encoding gives ws and vice versa 
  79     // if either of the first 2 arguments is NULL, the conversion is supposed 
  81     void DoTestConversion(const char *s
, const wchar_t *w
, wxMBConv
& conv
); 
  82 #endif // wxUSE_WCHAR_T 
  85     DECLARE_NO_COPY_CLASS(UnicodeTestCase
) 
  88 // register in the unnamed registry so that these tests are run by default 
  89 CPPUNIT_TEST_SUITE_REGISTRATION( UnicodeTestCase 
); 
  91 // also include in it's own registry so that these tests can be run alone 
  92 CPPUNIT_TEST_SUITE_NAMED_REGISTRATION( UnicodeTestCase
, "Unicode" ); 
  94 UnicodeTestCase::UnicodeTestCase() 
  98 void UnicodeTestCase::ToFromAscii() 
 101 #define TEST_TO_FROM_ASCII(txt)                              \ 
 103         static const char *msg = txt;                        \ 
 104         wxString s = wxString::FromAscii(msg);               \ 
 105         CPPUNIT_ASSERT( strcmp( s.ToAscii() , msg ) == 0 );  \ 
 108     TEST_TO_FROM_ASCII( "Hello, world!" ); 
 109     TEST_TO_FROM_ASCII( "additional \" special \t test \\ component \n :-)" ); 
 113 void UnicodeTestCase::ConstructorsWithConversion() 
 115     // the string "Déjà" in UTF-8 and wchar_t: 
 116     const unsigned char utf8Buf
[] = {0x44,0xC3,0xA9,0x6A,0xC3,0xA0,0}; 
 117     const wchar_t wchar
[] = {0x44,0xE9,0x6A,0xE0,0}; 
 118     const unsigned char utf8subBuf
[] = {0x44,0xC3,0xA9,0x6A,0}; // just "Déj" 
 119     const char *utf8 
= (char *)utf8Buf
; 
 120     const char *utf8sub 
= (char *)utf8subBuf
; 
 122     wxString 
s1(utf8
, wxConvUTF8
); 
 123     wxString 
s2(wchar
, wxConvUTF8
); 
 126     CPPUNIT_ASSERT( s1 
== wchar 
); 
 127     CPPUNIT_ASSERT( s2 
== wchar 
); 
 129     CPPUNIT_ASSERT( s1 
== utf8 
); 
 130     CPPUNIT_ASSERT( s2 
== utf8 
); 
 133     wxString 
sub(utf8sub
, wxConvUTF8
); // "Dej" substring 
 134     wxString 
s3(utf8
, wxConvUTF8
, 4); 
 135     wxString 
s4(wchar
, wxConvUTF8
, 3); 
 137     CPPUNIT_ASSERT( s3 
== sub 
); 
 138     CPPUNIT_ASSERT( s4 
== sub 
); 
 141     CPPUNIT_ASSERT ( wxString("\t[pl]open.format.Sformatuj dyskietkê=gfloppy %f",  
 142                                wxConvUTF8
) == wxT("") ); //should stop at pos 35  
 146 void UnicodeTestCase::ConversionEmpty() 
 151     wxCharBuffer buf 
= wxConvLibc
.cWC2MB(L
"", 0, &len
); 
 152 #else // !wxUSE_UNICODE 
 153     wxWCharBuffer wbuf 
= wxConvLibc
.cMB2WC("", 0, &len
); 
 154 #endif // wxUSE_UNICODE/!wxUSE_UNICODE 
 156     CPPUNIT_ASSERT(len 
== 0); 
 159 void UnicodeTestCase::ConversionWithNULs() 
 162     static const size_t lenNulString 
= 10; 
 164     wxString 
szTheString(L
"The\0String", wxConvLibc
, lenNulString
); 
 165     wxCharBuffer theBuffer 
= szTheString
.mb_str(); 
 167     CPPUNIT_ASSERT( memcmp(theBuffer
.data(), "The\0String", 
 168                     lenNulString 
+ 1) == 0 ); 
 170     wxString 
szTheString2("The\0String", wxConvLocal
, lenNulString
); 
 171     CPPUNIT_ASSERT_EQUAL( lenNulString
, szTheString2
.length() ); 
 172     CPPUNIT_ASSERT( wxTmemcmp(szTheString2
.c_str(), L
"The\0String", 
 173                     lenNulString 
+ 1) == 0 ); 
 174 #else // !wxUSE_UNICODE 
 175     wxString 
szTheString(wxT("TheString")); 
 176     szTheString
.insert(3, 1, '\0'); 
 177     wxWCharBuffer theBuffer 
= szTheString
.wc_str(wxConvLibc
); 
 179     CPPUNIT_ASSERT( memcmp(theBuffer
.data(), L
"The\0String", 11 * sizeof(wchar_t)) == 0 ); 
 181     wxString 
szLocalTheString(wxT("TheString")); 
 182     szLocalTheString
.insert(3, 1, '\0'); 
 183     wxWCharBuffer theLocalBuffer 
= szLocalTheString
.wc_str(wxConvLocal
); 
 185     CPPUNIT_ASSERT( memcmp(theLocalBuffer
.data(), L
"The\0String", 11 * sizeof(wchar_t)) == 0 ); 
 186 #endif // wxUSE_UNICODE/!wxUSE_UNICODE 
 190 UnicodeTestCase::DoTestConversion(const char *s
, 
 197         wxCharBuffer buf 
= conv
.cWC2MB(ws
, (size_t)-1, NULL
); 
 199         CPPUNIT_ASSERT( strcmp(buf
, s
) == 0 ); 
 201 #else // wxUSE_UNICODE 
 204         wxWCharBuffer wbuf 
= conv
.cMB2WC(s
, (size_t)-1, NULL
); 
 208             CPPUNIT_ASSERT( wbuf
.data() ); 
 209             CPPUNIT_ASSERT( wx_wcscmp(wbuf
, ws
) == 0 ); 
 211         else // conversion is supposed to fail 
 213             CPPUNIT_ASSERT_EQUAL( (wchar_t *)NULL
, wbuf
.data() ); 
 216 #endif // wxUSE_UNICODE/!wxUSE_UNICODE 
 219 struct StringConversionData
 
 225 void UnicodeTestCase::ConversionUTF7() 
 227     static const StringConversionData utf7data
[] = 
 232 #ifdef wxHAVE_U_ESCAPE 
 233         { "+AKM-", L
"\u00a3" }, 
 234 #endif // wxHAVE_U_ESCAPE 
 236         // the following are invalid UTF-7 sequences 
 242     wxCSConv 
conv(_T("utf-7")); 
 243     for ( size_t n 
= 0; n 
< WXSIZEOF(utf7data
); n
++ ) 
 245         const StringConversionData
& d 
= utf7data
[n
]; 
 247         // converting to/from UTF-7 using iconv() currently doesn't work 
 248         // because of several problems: 
 249         //  - GetMBNulLen() doesn't return correct result (iconv converts L'\0' 
 250         //    to an incomplete and anyhow nonsensical "+AA" string) 
 251         //  - iconv refuses to convert "+-" (although it converts "+-\n" just 
 254         // I have no idea how to fix this so just disable the test for now 
 256         DoTestConversion(d
.str
, d
.wcs
, conv
); 
 258         DoTestConversion(d
.str
, d
.wcs
, wxConvUTF7
); 
 262 void UnicodeTestCase::ConversionUTF8() 
 264     static const StringConversionData utf8data
[] = 
 266 #ifdef wxHAVE_U_ESCAPE 
 267         { "\xc2\xa3", L
"\u00a3" }, 
 272     wxCSConv 
conv(_T("utf-8")); 
 273     for ( size_t n 
= 0; n 
< WXSIZEOF(utf8data
); n
++ ) 
 275         const StringConversionData
& d 
= utf8data
[n
]; 
 276         DoTestConversion(d
.str
, d
.wcs
, conv
); 
 277         DoTestConversion(d
.str
, d
.wcs
, wxConvUTF8
); 
 281 void UnicodeTestCase::ConversionUTF16() 
 283     static const StringConversionData utf16data
[] = 
 285 #ifdef wxHAVE_U_ESCAPE 
 286         { "\x04\x1f\x04\x40\x04\x38\x04\x32\x04\x35\x04\x42\0\0", 
 287           L
"\u041f\u0440\u0438\u0432\u0435\u0442" }, 
 288         { "\x01\0\0b\x01\0\0a\x01\0\0r\0\0", L
"\u0100b\u0100a\u0100r" }, 
 290         { "\0f\0o\0o\0\0", L
"foo" }, 
 293     wxCSConv 
conv(wxFONTENCODING_UTF16BE
); 
 294     for ( size_t n 
= 0; n 
< WXSIZEOF(utf16data
); n
++ ) 
 296         const StringConversionData
& d 
= utf16data
[n
]; 
 297         DoTestConversion(d
.str
, d
.wcs
, conv
); 
 300     // special case: this string has consecutive NULs inside it which don't 
 301     // terminate the string, this exposed a bug in our conversion code which 
 302     // got confused in this case 
 304     wxWCharBuffer 
wbuf(conv
.cMB2WC("\x01\0\0B\0C" /* A macron BC */, 6, &len
)); 
 305     CPPUNIT_ASSERT_EQUAL( (size_t)3, len 
); 
 308 void UnicodeTestCase::ConversionUTF32() 
 310     static const StringConversionData utf32data
[] = 
 312 #ifdef wxHAVE_U_ESCAPE 
 314             "\0\0\x04\x1f\0\0\x04\x40\0\0\x04\x38\0\0\x04\x32\0\0\x04\x35\0\0\x04\x42\0\0\0\0", 
 315           L
"\u041f\u0440\u0438\u0432\u0435\u0442" }, 
 317         { "\0\0\0f\0\0\0o\0\0\0o\0\0\0\0", L
"foo" }, 
 320     wxCSConv 
conv(wxFONTENCODING_UTF32BE
); 
 321     for ( size_t n 
= 0; n 
< WXSIZEOF(utf32data
); n
++ ) 
 323         const StringConversionData
& d 
= utf32data
[n
]; 
 324         DoTestConversion(d
.str
, d
.wcs
, conv
); 
 328     wxWCharBuffer 
wbuf(conv
.cMB2WC("\0\0\x01\0\0\0\0B\0\0\0C" /* A macron BC */, 
 330     CPPUNIT_ASSERT_EQUAL( (size_t)3, len 
); 
 333 void UnicodeTestCase::IsConvOk() 
 335     CPPUNIT_ASSERT( wxCSConv(wxFONTENCODING_SYSTEM
).IsOk() ); 
 336     CPPUNIT_ASSERT( wxCSConv(_T("UTF-8")).IsOk() ); 
 337     CPPUNIT_ASSERT( !wxCSConv(_T("NoSuchConversion")).IsOk() ); 
 340     CPPUNIT_ASSERT( wxCSConv(_T("WINDOWS-437")).IsOk() ); 
 344 #endif // wxUSE_WCHAR_T