1 /////////////////////////////////////////////////////////////////////////////// 
   2 // Name:        tests/strings/unicode.cpp 
   3 // Purpose:     Unicode unit test 
   4 // Author:      Vadim Zeitlin, Wlodzimierz ABX Skiba 
   7 // Copyright:   (c) 2004 Vadim Zeitlin, Wlodzimierz Skiba 
   8 /////////////////////////////////////////////////////////////////////////////// 
  10 // ---------------------------------------------------------------------------- 
  12 // ---------------------------------------------------------------------------- 
  23 // ---------------------------------------------------------------------------- 
  25 // ---------------------------------------------------------------------------- 
  27 #if wxUSE_WCHAR_T && !wxUSE_UNICODE 
  29 // in case wcscmp is missing 
  30 static int wx_wcscmp(const wchar_t *s1
, const wchar_t *s2
) 
  32     while (*s1 
== *s2 
&& *s1 
!= 0) 
  40 #endif // wxUSE_WCHAR_T && !wxUSE_UNICODE 
  42 // ---------------------------------------------------------------------------- 
  44 // ---------------------------------------------------------------------------- 
  46 class UnicodeTestCase 
: public CppUnit::TestCase
 
  52     CPPUNIT_TEST_SUITE( UnicodeTestCase 
); 
  53         CPPUNIT_TEST( ToFromAscii 
); 
  55         CPPUNIT_TEST( ConstructorsWithConversion 
); 
  56         CPPUNIT_TEST( ConversionWithNULs 
); 
  57         CPPUNIT_TEST( ConversionUTF7 
); 
  58         CPPUNIT_TEST( ConversionUTF8 
); 
  59         CPPUNIT_TEST( ConversionUTF16 
); 
  60         CPPUNIT_TEST( ConversionUTF32 
); 
  61 #endif // wxUSE_WCHAR_T 
  62     CPPUNIT_TEST_SUITE_END(); 
  66     void ConstructorsWithConversion(); 
  67     void ConversionWithNULs(); 
  68     void ConversionUTF7(); 
  69     void ConversionUTF8(); 
  70     void ConversionUTF16(); 
  71     void ConversionUTF32(); 
  73     // test if converting s using the given encoding gives ws and vice versa 
  75     // if either of the first 2 arguments is NULL, the conversion is supposed 
  77     void DoTestConversion(const char *s
, const wchar_t *w
, wxMBConv
& conv
); 
  78 #endif // wxUSE_WCHAR_T 
  81     DECLARE_NO_COPY_CLASS(UnicodeTestCase
) 
  84 // register in the unnamed registry so that these tests are run by default 
  85 CPPUNIT_TEST_SUITE_REGISTRATION( UnicodeTestCase 
); 
  87 // also include in it's own registry so that these tests can be run alone 
  88 CPPUNIT_TEST_SUITE_NAMED_REGISTRATION( UnicodeTestCase
, "Unicode" ); 
  90 UnicodeTestCase::UnicodeTestCase() 
  94 void UnicodeTestCase::ToFromAscii() 
  97 #define TEST_TO_FROM_ASCII(txt)                              \ 
  99         static const char *msg = txt;                        \ 
 100         wxString s = wxString::FromAscii(msg);               \ 
 101         CPPUNIT_ASSERT( strcmp( s.ToAscii() , msg ) == 0 );  \ 
 104     TEST_TO_FROM_ASCII( "Hello, world!" ); 
 105     TEST_TO_FROM_ASCII( "additional \" special \t test \\ component \n :-)" ); 
 109 void UnicodeTestCase::ConstructorsWithConversion() 
 111     // the string "Déjà" in UTF-8 and wchar_t: 
 112     const unsigned char utf8Buf
[] = {0x44,0xC3,0xA9,0x6A,0xC3,0xA0,0}; 
 113     const wchar_t wchar
[] = {0x44,0xE9,0x6A,0xE0,0}; 
 114     const unsigned char utf8subBuf
[] = {0x44,0xC3,0xA9,0x6A,0}; // just "Déj" 
 115     const char *utf8 
= (char *)utf8Buf
; 
 116     const char *utf8sub 
= (char *)utf8subBuf
; 
 118     wxString 
s1(utf8
, wxConvUTF8
); 
 119     wxString 
s2(wchar
, wxConvUTF8
); 
 122     CPPUNIT_ASSERT( s1 
== wchar 
); 
 123     CPPUNIT_ASSERT( s2 
== wchar 
); 
 125     CPPUNIT_ASSERT( s1 
== utf8 
); 
 126     CPPUNIT_ASSERT( s2 
== utf8 
); 
 129     wxString 
sub(utf8sub
, wxConvUTF8
); // "Dej" substring 
 130     wxString 
s3(utf8
, wxConvUTF8
, 4); 
 131     wxString 
s4(wchar
, wxConvUTF8
, 3); 
 133     CPPUNIT_ASSERT( s3 
== sub 
); 
 134     CPPUNIT_ASSERT( s4 
== sub 
); 
 137     CPPUNIT_ASSERT ( wxString("\t[pl]open.format.Sformatuj dyskietkê=gfloppy %f",  
 138                                wxConvUTF8
) == wxT("") ); //should stop at pos 35  
 142 void UnicodeTestCase::ConversionWithNULs() 
 145         static const size_t lenNulString 
= 10; 
 147         wxString 
szTheString(L
"The\0String", wxConvLibc
, lenNulString
); 
 148         wxCharBuffer theBuffer 
= szTheString
.mb_str(); 
 150         CPPUNIT_ASSERT( memcmp(theBuffer
.data(), "The\0String", 
 151                         lenNulString 
+ 1) == 0 ); 
 153         wxString 
szTheString2("The\0String", wxConvLocal
, lenNulString
); 
 154         CPPUNIT_ASSERT_EQUAL( lenNulString
, szTheString2
.length() ); 
 155         CPPUNIT_ASSERT( wxTmemcmp(szTheString2
.c_str(), L
"The\0String", 
 156                         lenNulString 
+ 1) == 0 ); 
 157 #else // !wxUSE_UNICODE 
 158         wxString 
szTheString(wxT("TheString")); 
 159         szTheString
.insert(3, 1, '\0'); 
 160         wxWCharBuffer theBuffer 
= szTheString
.wc_str(wxConvLibc
); 
 162         CPPUNIT_ASSERT( memcmp(theBuffer
.data(), L
"The\0String", 11 * sizeof(wchar_t)) == 0 ); 
 164         wxString 
szLocalTheString(wxT("TheString")); 
 165         szLocalTheString
.insert(3, 1, '\0'); 
 166         wxWCharBuffer theLocalBuffer 
= szLocalTheString
.wc_str(wxConvLocal
); 
 168         CPPUNIT_ASSERT( memcmp(theLocalBuffer
.data(), L
"The\0String", 11 * sizeof(wchar_t)) == 0 ); 
 169 #endif // wxUSE_UNICODE/!wxUSE_UNICODE 
 173 UnicodeTestCase::DoTestConversion(const char *s
, 
 180         wxCharBuffer buf 
= conv
.cWC2MB(ws
, (size_t)-1, NULL
); 
 182         CPPUNIT_ASSERT( strcmp(buf
, s
) == 0 ); 
 184 #else // wxUSE_UNICODE 
 187         wxWCharBuffer wbuf 
= conv
.cMB2WC(s
, (size_t)-1, NULL
); 
 191             CPPUNIT_ASSERT( wbuf
.data() ); 
 192             CPPUNIT_ASSERT( wx_wcscmp(wbuf
, ws
) == 0 ); 
 194         else // conversion is supposed to fail 
 196             CPPUNIT_ASSERT_EQUAL( (wchar_t *)NULL
, wbuf
.data() ); 
 199 #endif // wxUSE_UNICODE/!wxUSE_UNICODE 
 202 struct StringConversionData
 
 208 void UnicodeTestCase::ConversionUTF7() 
 210     static const StringConversionData utf7data
[] = 
 215 #ifdef wxHAVE_U_ESCAPE 
 216         { "+AKM-", L
"\u00a3" }, 
 217 #endif // wxHAVE_U_ESCAPE 
 219         // the following are invalid UTF-7 sequences 
 225     wxCSConv 
conv(_T("utf-7")); 
 226     for ( size_t n 
= 0; n 
< WXSIZEOF(utf7data
); n
++ ) 
 228         const StringConversionData
& d 
= utf7data
[n
]; 
 229         DoTestConversion(d
.str
, d
.wcs
, conv
); 
 230         DoTestConversion(d
.str
, d
.wcs
, wxConvUTF7
); 
 234 void UnicodeTestCase::ConversionUTF8() 
 236     static const StringConversionData utf8data
[] = 
 238 #ifdef wxHAVE_U_ESCAPE 
 239         { "\xc2\xa3", L
"\u00a3" }, 
 244     wxCSConv 
conv(_T("utf-8")); 
 245     for ( size_t n 
= 0; n 
< WXSIZEOF(utf8data
); n
++ ) 
 247         const StringConversionData
& d 
= utf8data
[n
]; 
 248         DoTestConversion(d
.str
, d
.wcs
, conv
); 
 249         DoTestConversion(d
.str
, d
.wcs
, wxConvUTF8
); 
 253 void UnicodeTestCase::ConversionUTF16() 
 255     static const StringConversionData utf16data
[] = 
 257 #ifdef wxHAVE_U_ESCAPE 
 258         { "\x04\x1f\x04\x40\x04\x38\x04\x32\x04\x35\x04\x42\0\0", 
 259           L
"\u041f\u0440\u0438\u0432\u0435\u0442" }, 
 260         { "\x01\0\0b\x01\0\0a\x01\0\0r\0\0", L
"\u0100b\u0100a\u0100r" }, 
 262         { "\0f\0o\0o\0\0", L
"foo" }, 
 265     wxCSConv 
conv(wxFONTENCODING_UTF16BE
); 
 266     for ( size_t n 
= 0; n 
< WXSIZEOF(utf16data
); n
++ ) 
 268         const StringConversionData
& d 
= utf16data
[n
]; 
 269         DoTestConversion(d
.str
, d
.wcs
, conv
); 
 272     // special case: this string has consecutive NULs inside it which don't 
 273     // terminate the string, this exposed a bug in our conversion code which 
 274     // got confused in this case 
 276     wxWCharBuffer 
wbuf(conv
.cMB2WC("\x01\0\0B\0C" /* A macron BC */, 6, &len
)); 
 277     CPPUNIT_ASSERT_EQUAL( (size_t)3, len 
); 
 280 void UnicodeTestCase::ConversionUTF32() 
 282     static const StringConversionData utf32data
[] = 
 284 #ifdef wxHAVE_U_ESCAPE 
 286             "\0\0\x04\x1f\0\0\x04\x40\0\0\x04\x38\0\0\x04\x32\0\0\x04\x35\0\0\x04\x42\0\0\0\0", 
 287           L
"\u041f\u0440\u0438\u0432\u0435\u0442" }, 
 289         { "\0\0\0f\0\0\0o\0\0\0o\0\0\0\0", L
"foo" }, 
 292     wxCSConv 
conv(wxFONTENCODING_UTF32BE
); 
 293     for ( size_t n 
= 0; n 
< WXSIZEOF(utf32data
); n
++ ) 
 295         const StringConversionData
& d 
= utf32data
[n
]; 
 296         DoTestConversion(d
.str
, d
.wcs
, conv
); 
 300     wxWCharBuffer 
wbuf(conv
.cMB2WC("\0\0\x01\0\0\0\0B\0\0\0C" /* A macron BC */, 
 302     CPPUNIT_ASSERT_EQUAL( (size_t)3, len 
); 
 305 #endif // wxUSE_WCHAR_T