tests/strings/unicode.cpp

   1 ///////////////////////////////////////////////////////////////////////////////
   2 // Name:        tests/strings/unicode.cpp
   3 // Purpose:     Unicode unit test
   4 // Author:      Vadim Zeitlin, Wlodzimierz ABX Skiba
   5 // Created:     2004-04-28
   6 // RCS-ID:      $Id$
   7 // Copyright:   (c) 2004 Vadim Zeitlin, Wlodzimierz Skiba
   8 ///////////////////////////////////////////////////////////////////////////////
   9
  10 // ----------------------------------------------------------------------------
  11 // headers
  12 // ----------------------------------------------------------------------------
  13
  14 #include "testprec.h"
  15
  16 #ifdef __BORLANDC__
  17     #pragma hdrstop
  18 #endif
  19
  20 #ifndef WX_PRECOMP
  21 #endif // WX_PRECOMP
  22
  23 // ----------------------------------------------------------------------------
  24 // local functions
  25 // ----------------------------------------------------------------------------
  26
  27 #if wxUSE_WCHAR_T && !wxUSE_UNICODE
  28
  29 // in case wcscmp is missing
  30 static int wx_wcscmp(const wchar_t *s1, const wchar_t *s2)
  31 {
  32     while (*s1 == *s2 && *s1 != 0)
  33     {
  34         s1++;
  35         s2++;
  36     }
  37     return *s1 - *s2;
  38 }
  39
  40 #endif // wxUSE_WCHAR_T && !wxUSE_UNICODE
  41
  42 // ----------------------------------------------------------------------------
  43 // test class
  44 // ----------------------------------------------------------------------------
  45
  46 class UnicodeTestCase : public CppUnit::TestCase
  47 {
  48 public:
  49     UnicodeTestCase();
  50
  51 private:
  52     CPPUNIT_TEST_SUITE( UnicodeTestCase );
  53         CPPUNIT_TEST( ToFromAscii );
  54 #if wxUSE_WCHAR_T
  55         CPPUNIT_TEST( ConstructorsWithConversion );
  56         CPPUNIT_TEST( ConversionWithNULs );
  57         CPPUNIT_TEST( ConversionUTF7 );
  58         CPPUNIT_TEST( ConversionUTF8 );
  59         CPPUNIT_TEST( ConversionUTF16 );
  60         CPPUNIT_TEST( ConversionUTF32 );
  61 #endif // wxUSE_WCHAR_T
  62     CPPUNIT_TEST_SUITE_END();
  63
  64     void ToFromAscii();
  65 #if wxUSE_WCHAR_T
  66     void ConstructorsWithConversion();
  67     void ConversionWithNULs();
  68     void ConversionUTF7();
  69     void ConversionUTF8();
  70     void ConversionUTF16();
  71     void ConversionUTF32();
  72
  73     // test if converting s using the given encoding gives ws and vice versa
  74     //
  75     // if either of the first 2 arguments is NULL, the conversion is supposed
  76     // to fail
  77     void DoTestConversion(const char *s, const wchar_t *w, wxMBConv& conv);
  78 #endif // wxUSE_WCHAR_T
  79
  80
  81     DECLARE_NO_COPY_CLASS(UnicodeTestCase)
  82 };
  83
  84 // register in the unnamed registry so that these tests are run by default
  85 CPPUNIT_TEST_SUITE_REGISTRATION( UnicodeTestCase );
  86
  87 // also include in it's own registry so that these tests can be run alone
  88 CPPUNIT_TEST_SUITE_NAMED_REGISTRATION( UnicodeTestCase, "Unicode" );
  89
  90 UnicodeTestCase::UnicodeTestCase()
  91 {
  92 }
  93
  94 void UnicodeTestCase::ToFromAscii()
  95 {
  96
  97 #define TEST_TO_FROM_ASCII(txt)                              \
  98     {                                                        \
  99         static const char *msg = txt;                        \
 100         wxString s = wxString::FromAscii(msg);               \
 101         CPPUNIT_ASSERT( strcmp( s.ToAscii() , msg ) == 0 );  \
 102     }
 103
 104     TEST_TO_FROM_ASCII( "Hello, world!" );
 105     TEST_TO_FROM_ASCII( "additional \" special \t test \\ component \n :-)" );
 106 }
 107
 108 #if wxUSE_WCHAR_T
 109 void UnicodeTestCase::ConstructorsWithConversion()
 110 {
 111     // the string "Déjà" in UTF-8 and wchar_t:
 112     const unsigned char utf8Buf[] = {0x44,0xC3,0xA9,0x6A,0xC3,0xA0,0};
 113     const wchar_t wchar[] = {0x44,0xE9,0x6A,0xE0,0};
 114     const unsigned char utf8subBuf[] = {0x44,0xC3,0xA9,0x6A,0}; // just "Déj"
 115     const char *utf8 = (char *)utf8Buf;
 116     const char *utf8sub = (char *)utf8subBuf;
 117
 118     wxString s1(utf8, wxConvUTF8);
 119     wxString s2(wchar, wxConvUTF8);
 120
 121 #if wxUSE_UNICODE
 122     CPPUNIT_ASSERT( s1 == wchar );
 123     CPPUNIT_ASSERT( s2 == wchar );
 124 #else
 125     CPPUNIT_ASSERT( s1 == utf8 );
 126     CPPUNIT_ASSERT( s2 == utf8 );
 127 #endif
 128
 129     wxString sub(utf8sub, wxConvUTF8); // "Dej" substring
 130     wxString s3(utf8, wxConvUTF8, 4);
 131     wxString s4(wchar, wxConvUTF8, 3);
 132
 133     CPPUNIT_ASSERT( s3 == sub );
 134     CPPUNIT_ASSERT( s4 == sub );
 135
 136 #if wxUSE_UNICODE
 137     CPPUNIT_ASSERT ( wxString("\t[pl]open.format.Sformatuj dyskietkê=gfloppy %f",
 138                                wxConvUTF8) == wxT("") ); //should stop at pos 35
 139 #endif
 140 }
 141
 142 void UnicodeTestCase::ConversionWithNULs()
 143 {
 144 #if wxUSE_UNICODE
 145         static const size_t lenNulString = 10;
 146
 147         wxString szTheString(L"The\0String", wxConvLibc, lenNulString);
 148         wxCharBuffer theBuffer = szTheString.mb_str();
 149
 150         CPPUNIT_ASSERT( memcmp(theBuffer.data(), "The\0String",
 151                         lenNulString + 1) == 0 );
 152
 153         wxString szTheString2("The\0String", wxConvLocal, lenNulString);
 154         CPPUNIT_ASSERT_EQUAL( lenNulString, szTheString2.length() );
 155         CPPUNIT_ASSERT( wxTmemcmp(szTheString2.c_str(), L"The\0String",
 156                         lenNulString + 1) == 0 );
 157 #else // !wxUSE_UNICODE
 158         wxString szTheString(wxT("TheString"));
 159         szTheString.insert(3, 1, '\0');
 160         wxWCharBuffer theBuffer = szTheString.wc_str(wxConvLibc);
 161
 162         CPPUNIT_ASSERT( memcmp(theBuffer.data(), L"The\0String", 11 * sizeof(wchar_t)) == 0 );
 163
 164         wxString szLocalTheString(wxT("TheString"));
 165         szLocalTheString.insert(3, 1, '\0');
 166         wxWCharBuffer theLocalBuffer = szLocalTheString.wc_str(wxConvLocal);
 167
 168         CPPUNIT_ASSERT( memcmp(theLocalBuffer.data(), L"The\0String", 11 * sizeof(wchar_t)) == 0 );
 169 #endif // wxUSE_UNICODE/!wxUSE_UNICODE
 170 }
 171
 172 void
 173 UnicodeTestCase::DoTestConversion(const char *s,
 174                                   const wchar_t *ws,
 175                                   wxMBConv& conv)
 176 {
 177 #if wxUSE_UNICODE
 178     if ( ws )
 179     {
 180         wxCharBuffer buf = conv.cWC2MB(ws, (size_t)-1, NULL);
 181
 182         CPPUNIT_ASSERT( strcmp(buf, s) == 0 );
 183     }
 184 #else // wxUSE_UNICODE
 185     if ( s )
 186     {
 187         wxWCharBuffer wbuf = conv.cMB2WC(s, (size_t)-1, NULL);
 188
 189         if ( ws )
 190         {
 191             CPPUNIT_ASSERT( wbuf.data() );
 192             CPPUNIT_ASSERT( wx_wcscmp(wbuf, ws) == 0 );
 193         }
 194         else // conversion is supposed to fail
 195         {
 196             CPPUNIT_ASSERT_EQUAL( (wchar_t *)NULL, wbuf.data() );
 197         }
 198     }
 199 #endif // wxUSE_UNICODE/!wxUSE_UNICODE
 200 }
 201
 202 struct StringConversionData
 203 {
 204     const char *str;
 205     const wchar_t *wcs;
 206 };
 207
 208 void UnicodeTestCase::ConversionUTF7()
 209 {
 210     static const StringConversionData utf7data[] =
 211     {
 212         { "+-", L"+" },
 213         { "+--", L"+-" },
 214
 215 #ifdef wxHAVE_U_ESCAPE
 216         { "+AKM-", L"\u00a3" },
 217 #endif // wxHAVE_U_ESCAPE
 218
 219         // the following are invalid UTF-7 sequences
 220         { "+", NULL },
 221         { "+~", NULL },
 222         { "a+", NULL },
 223     };
 224
 225     wxCSConv conv(_T("utf-7"));
 226     for ( size_t n = 0; n < WXSIZEOF(utf7data); n++ )
 227     {
 228         const StringConversionData& d = utf7data[n];
 229         DoTestConversion(d.str, d.wcs, conv);
 230         DoTestConversion(d.str, d.wcs, wxConvUTF7);
 231     }
 232 }
 233
 234 void UnicodeTestCase::ConversionUTF8()
 235 {
 236     static const StringConversionData utf8data[] =
 237     {
 238 #ifdef wxHAVE_U_ESCAPE
 239         { "\xc2\xa3", L"\u00a3" },
 240 #endif
 241         { "\xc2", NULL },
 242     };
 243
 244     wxCSConv conv(_T("utf-8"));
 245     for ( size_t n = 0; n < WXSIZEOF(utf8data); n++ )
 246     {
 247         const StringConversionData& d = utf8data[n];
 248         DoTestConversion(d.str, d.wcs, conv);
 249         DoTestConversion(d.str, d.wcs, wxConvUTF8);
 250     }
 251 }
 252
 253 void UnicodeTestCase::ConversionUTF16()
 254 {
 255     static const StringConversionData utf16data[] =
 256     {
 257 #ifdef wxHAVE_U_ESCAPE
 258         { "\x04\x1f\x04\x40\x04\x38\x04\x32\x04\x35\x04\x42\0\0",
 259           L"\u041f\u0440\u0438\u0432\u0435\u0442" },
 260         { "\x01\0\0b\x01\0\0a\x01\0\0r\0\0", L"\u0100b\u0100a\u0100r" },
 261 #endif
 262         { "\0f\0o\0o\0\0", L"foo" },
 263     };
 264
 265     wxCSConv conv(wxFONTENCODING_UTF16BE);
 266     for ( size_t n = 0; n < WXSIZEOF(utf16data); n++ )
 267     {
 268         const StringConversionData& d = utf16data[n];
 269         DoTestConversion(d.str, d.wcs, conv);
 270     }
 271
 272     // special case: this string has consecutive NULs inside it which don't
 273     // terminate the string, this exposed a bug in our conversion code which
 274     // got confused in this case
 275     size_t len;
 276     wxWCharBuffer wbuf(conv.cMB2WC("\x01\0\0B\0C" /* A macron BC */, 6, &len));
 277     CPPUNIT_ASSERT_EQUAL( (size_t)3, len );
 278 }
 279
 280 void UnicodeTestCase::ConversionUTF32()
 281 {
 282     static const StringConversionData utf32data[] =
 283     {
 284 #ifdef wxHAVE_U_ESCAPE
 285         {
 286             "\0\0\x04\x1f\0\0\x04\x40\0\0\x04\x38\0\0\x04\x32\0\0\x04\x35\0\0\x04\x42\0\0\0\0",
 287           L"\u041f\u0440\u0438\u0432\u0435\u0442" },
 288 #endif
 289         { "\0\0\0f\0\0\0o\0\0\0o\0\0\0\0", L"foo" },
 290     };
 291
 292     wxCSConv conv(wxFONTENCODING_UTF32BE);
 293     for ( size_t n = 0; n < WXSIZEOF(utf32data); n++ )
 294     {
 295         const StringConversionData& d = utf32data[n];
 296         DoTestConversion(d.str, d.wcs, conv);
 297     }
 298
 299     size_t len;
 300     wxWCharBuffer wbuf(conv.cMB2WC("\0\0\x01\0\0\0\0B\0\0\0C" /* A macron BC */,
 301                                    12, &len));
 302     CPPUNIT_ASSERT_EQUAL( (size_t)3, len );
 303 }
 304
 305 #endif // wxUSE_WCHAR_T
 306