tests/strings/unicode.cpp

   1 ///////////////////////////////////////////////////////////////////////////////
   2 // Name:        tests/strings/unicode.cpp
   3 // Purpose:     Unicode unit test
   4 // Author:      Vadim Zeitlin, Wlodzimierz ABX Skiba
   5 // Created:     2004-04-28
   6 // RCS-ID:      $Id$
   7 // Copyright:   (c) 2004 Vadim Zeitlin, Wlodzimierz Skiba
   8 ///////////////////////////////////////////////////////////////////////////////
   9
  10 // ----------------------------------------------------------------------------
  11 // headers
  12 // ----------------------------------------------------------------------------
  13
  14 #include "testprec.h"
  15
  16 #ifdef __BORLANDC__
  17     #pragma hdrstop
  18 #endif
  19
  20 #ifndef WX_PRECOMP
  21 #endif // WX_PRECOMP
  22
  23 // ----------------------------------------------------------------------------
  24 // local functions
  25 // ----------------------------------------------------------------------------
  26
  27 #if wxUSE_WCHAR_T && !wxUSE_UNICODE
  28
  29 // in case wcscmp is missing
  30 static int wx_wcscmp(const wchar_t *s1, const wchar_t *s2)
  31 {
  32     while (*s1 == *s2 && *s1 != 0)
  33     {
  34         s1++;
  35         s2++;
  36     }
  37     return *s1 - *s2;
  38 }
  39
  40 #endif // wxUSE_WCHAR_T && !wxUSE_UNICODE
  41
  42 // ----------------------------------------------------------------------------
  43 // test class
  44 // ----------------------------------------------------------------------------
  45
  46 class UnicodeTestCase : public CppUnit::TestCase
  47 {
  48 public:
  49     UnicodeTestCase();
  50
  51 private:
  52     CPPUNIT_TEST_SUITE( UnicodeTestCase );
  53         CPPUNIT_TEST( ToFromAscii );
  54 #if wxUSE_WCHAR_T
  55         CPPUNIT_TEST( ConstructorsWithConversion );
  56         CPPUNIT_TEST( Conversion );
  57         CPPUNIT_TEST( ConversionUTF7 );
  58         CPPUNIT_TEST( ConversionUTF8 );
  59 #endif // wxUSE_WCHAR_T
  60     CPPUNIT_TEST_SUITE_END();
  61
  62     void ToFromAscii();
  63 #if wxUSE_WCHAR_T
  64     void ConstructorsWithConversion();
  65     void Conversion();
  66     void ConversionUTF7();
  67     void ConversionUTF8();
  68
  69     // test if converting s using the given encoding gives ws and vice versa
  70     //
  71     // if either of the first 2 arguments is NULL, the conversion is supposed
  72     // to fail
  73     void DoTestConversion(const char *s, const wchar_t *w, wxCSConv& conv);
  74 #endif // wxUSE_WCHAR_T
  75
  76
  77     DECLARE_NO_COPY_CLASS(UnicodeTestCase)
  78 };
  79
  80 // register in the unnamed registry so that these tests are run by default
  81 CPPUNIT_TEST_SUITE_REGISTRATION( UnicodeTestCase );
  82
  83 // also include in it's own registry so that these tests can be run alone
  84 CPPUNIT_TEST_SUITE_NAMED_REGISTRATION( UnicodeTestCase, "UnicodeTestCase" );
  85
  86 UnicodeTestCase::UnicodeTestCase()
  87 {
  88 }
  89
  90 void UnicodeTestCase::ToFromAscii()
  91 {
  92
  93 #define TEST_TO_FROM_ASCII(txt)                              \
  94     {                                                        \
  95         static const char *msg = txt;                        \
  96         wxString s = wxString::FromAscii(msg);               \
  97         CPPUNIT_ASSERT( strcmp( s.ToAscii() , msg ) == 0 );  \
  98     }
  99
 100     TEST_TO_FROM_ASCII( "Hello, world!" );
 101     TEST_TO_FROM_ASCII( "additional \" special \t test \\ component \n :-)" );
 102 }
 103
 104 #if wxUSE_WCHAR_T
 105 void UnicodeTestCase::ConstructorsWithConversion()
 106 {
 107     // the string "Déjà" in UTF-8 and wchar_t:
 108     const unsigned char utf8Buf[] = {0x44,0xC3,0xA9,0x6A,0xC3,0xA0,0};
 109     const wchar_t wchar[] = {0x44,0xE9,0x6A,0xE0,0};
 110     const unsigned char utf8subBuf[] = {0x44,0xC3,0xA9,0x6A,0}; // just "Déj"
 111     const char *utf8 = (char *)utf8Buf;
 112     const char *utf8sub = (char *)utf8subBuf;
 113
 114     wxString s1(utf8, wxConvUTF8);
 115     wxString s2(wchar, wxConvUTF8);
 116
 117 #if wxUSE_UNICODE
 118     CPPUNIT_ASSERT( s1 == wchar );
 119     CPPUNIT_ASSERT( s2 == wchar );
 120 #else
 121     CPPUNIT_ASSERT( s1 == utf8 );
 122     CPPUNIT_ASSERT( s2 == utf8 );
 123 #endif
 124
 125     wxString sub(utf8sub, wxConvUTF8); // "Dej" substring
 126     wxString s3(utf8, wxConvUTF8, 4);
 127     wxString s4(wchar, wxConvUTF8, 3);
 128
 129     CPPUNIT_ASSERT( s3 == sub );
 130     CPPUNIT_ASSERT( s4 == sub );
 131
 132 #if wxUSE_UNICODE
 133     CPPUNIT_ASSERT ( wxString("\t[pl]open.format.Sformatuj dyskietkê=gfloppy %f",
 134                                wxConvUTF8) == wxT("") ); //should stop at pos 35
 135 #endif
 136 }
 137
 138 void UnicodeTestCase::Conversion()
 139 {
 140 #if wxUSE_UNICODE
 141         static const size_t lenNulString = 10;
 142
 143         wxString szTheString(L"The\0String", wxConvLibc, lenNulString);
 144         wxCharBuffer theBuffer = szTheString.mb_str();
 145
 146         CPPUNIT_ASSERT( memcmp(theBuffer.data(), "The\0String",
 147                         lenNulString + 1) == 0 );
 148
 149         wxString szTheString2("The\0String", wxConvLocal, lenNulString);
 150         CPPUNIT_ASSERT_EQUAL( lenNulString, szTheString2.length() );
 151         CPPUNIT_ASSERT( wxTmemcmp(szTheString2.c_str(), L"The\0String",
 152                         lenNulString + 1) == 0 );
 153 #else
 154         wxString szTheString(wxT("TheString"));
 155         szTheString.insert(3, 1, '\0');
 156         wxWCharBuffer theBuffer = szTheString.wc_str(wxConvLibc);
 157
 158         CPPUNIT_ASSERT( memcmp(theBuffer.data(), L"The\0String", 11 * sizeof(wchar_t)) == 0 );
 159
 160         wxString szLocalTheString(wxT("TheString"));
 161         szLocalTheString.insert(3, 1, '\0');
 162         wxWCharBuffer theLocalBuffer = szLocalTheString.wc_str(wxConvLocal);
 163
 164         CPPUNIT_ASSERT( memcmp(theLocalBuffer.data(), L"The\0String", 11 * sizeof(wchar_t)) == 0 );
 165 #endif
 166 }
 167
 168 void
 169 UnicodeTestCase::DoTestConversion(const char *s,
 170                                   const wchar_t *ws,
 171                                   wxCSConv& conv)
 172 {
 173 #if wxUSE_UNICODE
 174     if ( ws )
 175     {
 176         wxCharBuffer buf(wxString(ws).mb_str(conv));
 177
 178         CPPUNIT_ASSERT( strcmp(buf, s) == 0 );
 179     }
 180 #else // wxUSE_UNICODE
 181     if ( s )
 182     {
 183         wxWCharBuffer wbuf(wxString(s).wc_str(conv));
 184
 185         if ( ws )
 186             CPPUNIT_ASSERT( wx_wcscmp(wbuf, ws) == 0 );
 187         else
 188             CPPUNIT_ASSERT( !*wbuf );
 189     }
 190 #endif // wxUSE_UNICODE/!wxUSE_UNICODE
 191 }
 192
 193 struct StringConversionData
 194 {
 195     const char *str;
 196     const wchar_t *wcs;
 197 };
 198
 199 void UnicodeTestCase::ConversionUTF7()
 200 {
 201     static const StringConversionData utf7data[] =
 202     {
 203         { "+-", L"+" },
 204         { "+--", L"+-" },
 205
 206 #ifdef wxHAVE_U_ESCAPE
 207         { "+AKM-", L"\u00a3" },
 208 #endif // wxHAVE_U_ESCAPE
 209
 210         // the following are invalid UTF-7 sequences
 211         { "+", NULL },
 212         { "a+", NULL },
 213     };
 214
 215     wxCSConv conv(_T("utf-7"));
 216     for ( size_t n = 0; n < WXSIZEOF(utf7data); n++ )
 217     {
 218         const StringConversionData& d = utf7data[n];
 219         DoTestConversion(d.str, d.wcs, conv);
 220     }
 221 }
 222
 223 void UnicodeTestCase::ConversionUTF8()
 224 {
 225     static const StringConversionData utf8data[] =
 226     {
 227         //\u isn't recognized on MSVC 6
 228 #ifdef wxHAVE_U_ESCAPE
 229         { "\xc2\xa3", L"\u00a3" },
 230 #endif
 231         { "\xc2", NULL },
 232     };
 233
 234     wxCSConv conv(_T("utf-8"));
 235     for ( size_t n = 0; n < WXSIZEOF(utf8data); n++ )
 236     {
 237         const StringConversionData& d = utf8data[n];
 238         DoTestConversion(d.str, d.wcs, conv);
 239     }
 240 }
 241
 242 #endif // wxUSE_WCHAR_T
 243