tests/strings/unicode.cpp

   1 ///////////////////////////////////////////////////////////////////////////////
   2 // Name:        tests/strings/unicode.cpp
   3 // Purpose:     Unicode unit test
   4 // Author:      Vadim Zeitlin, Wlodzimierz ABX Skiba
   5 // Created:     2004-04-28
   6 // RCS-ID:      $Id$
   7 // Copyright:   (c) 2004 Vadim Zeitlin, Wlodzimierz Skiba
   8 ///////////////////////////////////////////////////////////////////////////////
   9
  10 // ----------------------------------------------------------------------------
  11 // headers
  12 // ----------------------------------------------------------------------------
  13
  14 #include "testprec.h"
  15
  16 #ifdef __BORLANDC__
  17     #pragma hdrstop
  18 #endif
  19
  20 #ifndef WX_PRECOMP
  21 #endif // WX_PRECOMP
  22
  23 // ----------------------------------------------------------------------------
  24 // local functions
  25 // ----------------------------------------------------------------------------
  26
  27 #if wxUSE_WCHAR_T && !wxUSE_UNICODE
  28
  29 // in case wcscmp is missing
  30 static int wx_wcscmp(const wchar_t *s1, const wchar_t *s2)
  31 {
  32     while (*s1 == *s2 && *s1 != 0)
  33     {
  34         s1++;
  35         s2++;
  36     }
  37     return *s1 - *s2;
  38 }
  39
  40 #endif // wxUSE_WCHAR_T && !wxUSE_UNICODE
  41
  42 // ----------------------------------------------------------------------------
  43 // test class
  44 // ----------------------------------------------------------------------------
  45
  46 class UnicodeTestCase : public CppUnit::TestCase
  47 {
  48 public:
  49     UnicodeTestCase();
  50
  51 private:
  52     CPPUNIT_TEST_SUITE( UnicodeTestCase );
  53         CPPUNIT_TEST( ToFromAscii );
  54 #if wxUSE_WCHAR_T
  55         CPPUNIT_TEST( ConstructorsWithConversion );
  56         CPPUNIT_TEST( Conversion );
  57         CPPUNIT_TEST( ConversionUTF7 );
  58         CPPUNIT_TEST( ConversionUTF8 );
  59 #endif // wxUSE_WCHAR_T
  60     CPPUNIT_TEST_SUITE_END();
  61
  62     void ToFromAscii();
  63 #if wxUSE_WCHAR_T
  64     void ConstructorsWithConversion();
  65     void Conversion();
  66     void ConversionUTF7();
  67     void ConversionUTF8();
  68
  69     // test if converting s using the given encoding gives ws and vice versa
  70     //
  71     // if either of the first 2 arguments is NULL, the conversion is supposed
  72     // to fail
  73     void DoTestConversion(const char *s, const wchar_t *w, wxCSConv& conv);
  74 #endif // wxUSE_WCHAR_T
  75
  76
  77     DECLARE_NO_COPY_CLASS(UnicodeTestCase)
  78 };
  79
  80 // register in the unnamed registry so that these tests are run by default
  81 CPPUNIT_TEST_SUITE_REGISTRATION( UnicodeTestCase );
  82
  83 // also include in it's own registry so that these tests can be run alone
  84 CPPUNIT_TEST_SUITE_NAMED_REGISTRATION( UnicodeTestCase, "UnicodeTestCase" );
  85
  86 UnicodeTestCase::UnicodeTestCase()
  87 {
  88 }
  89
  90 void UnicodeTestCase::ToFromAscii()
  91 {
  92
  93 #define TEST_TO_FROM_ASCII(txt)                              \
  94     {                                                        \
  95         static const char *msg = txt;                        \
  96         wxString s = wxString::FromAscii(msg);               \
  97         CPPUNIT_ASSERT( strcmp( s.ToAscii() , msg ) == 0 );  \
  98     }
  99
 100     TEST_TO_FROM_ASCII( "Hello, world!" );
 101     TEST_TO_FROM_ASCII( "additional \" special \t test \\ component \n :-)" );
 102 }
 103
 104 #if wxUSE_WCHAR_T
 105 void UnicodeTestCase::ConstructorsWithConversion()
 106 {
 107     // the string "Déjà" in UTF-8 and wchar_t:
 108     const unsigned char utf8Buf[] = {0x44,0xC3,0xA9,0x6A,0xC3,0xA0,0};
 109     const wchar_t wchar[] = {0x44,0xE9,0x6A,0xE0,0};
 110     const unsigned char utf8subBuf[] = {0x44,0xC3,0xA9,0x6A,0}; // just "Déj"
 111     const char *utf8 = (char *)utf8Buf;
 112     const char *utf8sub = (char *)utf8subBuf;
 113
 114     wxString s1(utf8, wxConvUTF8);
 115     wxString s2(wchar, wxConvUTF8);
 116
 117 #if wxUSE_UNICODE
 118     CPPUNIT_ASSERT( s1 == wchar );
 119     CPPUNIT_ASSERT( s2 == wchar );
 120 #else
 121     CPPUNIT_ASSERT( s1 == utf8 );
 122     CPPUNIT_ASSERT( s2 == utf8 );
 123 #endif
 124
 125     wxString sub(utf8sub, wxConvUTF8); // "Dej" substring
 126     wxString s3(utf8, wxConvUTF8, 4);
 127     wxString s4(wchar, wxConvUTF8, 3);
 128
 129     CPPUNIT_ASSERT( s3 == sub );
 130     CPPUNIT_ASSERT( s4 == sub );
 131
 132 #if wxUSE_UNICODE
 133     CPPUNIT_ASSERT ( wxString("\t[pl]open.format.Sformatuj dyskietkê=gfloppy %f",
 134                                wxConvUTF8) == wxT("") ); //should stop at pos 35
 135 #endif
 136 }
 137
 138 void UnicodeTestCase::Conversion()
 139 {
 140 #if wxUSE_UNICODE
 141         wxString szTheString(L"The\0String", wxConvLibc, 10);
 142         wxCharBuffer theBuffer = szTheString.mb_str();
 143
 144         CPPUNIT_ASSERT( memcmp(theBuffer.data(), "The\0String", 11) == 0 );
 145
 146         wxString szTheString2("The\0String", wxConvLocal, 10);
 147         CPPUNIT_ASSERT( szTheString2.length() == 11 );
 148         CPPUNIT_ASSERT( wxTmemcmp(szTheString2.c_str(), L"The\0String", 11) == 0 );
 149 #else
 150         wxString szTheString(wxT("TheString"));
 151         szTheString.insert(3, 1, '\0');
 152         wxWCharBuffer theBuffer = szTheString.wc_str(wxConvLibc);
 153
 154         CPPUNIT_ASSERT( memcmp(theBuffer.data(), L"The\0String", 11 * sizeof(wchar_t)) == 0 );
 155
 156         wxString szLocalTheString(wxT("TheString"));
 157         szLocalTheString.insert(3, 1, '\0');
 158         wxWCharBuffer theLocalBuffer = szLocalTheString.wc_str(wxConvLocal);
 159
 160         CPPUNIT_ASSERT( memcmp(theLocalBuffer.data(), L"The\0String", 11 * sizeof(wchar_t)) == 0 );
 161 #endif
 162 }
 163
 164 void
 165 UnicodeTestCase::DoTestConversion(const char *s,
 166                                   const wchar_t *ws,
 167                                   wxCSConv& conv)
 168 {
 169 #if wxUSE_UNICODE
 170     if ( ws )
 171     {
 172         wxCharBuffer buf(wxString(ws).mb_str(conv));
 173
 174         CPPUNIT_ASSERT( strcmp(buf, s) == 0 );
 175     }
 176 #else // wxUSE_UNICODE
 177     if ( s )
 178     {
 179         wxWCharBuffer wbuf(wxString(s).wc_str(conv));
 180
 181         if ( ws )
 182             CPPUNIT_ASSERT( wx_wcscmp(wbuf, ws) == 0 );
 183         else
 184             CPPUNIT_ASSERT( !*wbuf );
 185     }
 186 #endif // wxUSE_UNICODE/!wxUSE_UNICODE
 187 }
 188
 189 struct StringConversionData
 190 {
 191     const char *str;
 192     const wchar_t *wcs;
 193 };
 194
 195 void UnicodeTestCase::ConversionUTF7()
 196 {
 197     static const StringConversionData utf7data[] =
 198     {
 199         { "+-", L"+" },
 200         { "+--", L"+-" },
 201
 202 #ifdef wxHAVE_U_ESCAPE
 203         { "+AKM-", L"\u00a3" },
 204 #endif // wxHAVE_U_ESCAPE
 205
 206         // the following are invalid UTF-7 sequences
 207         { "+", NULL },
 208         { "a+", NULL },
 209     };
 210
 211     wxCSConv conv(_T("utf-7"));
 212     for ( size_t n = 0; n < WXSIZEOF(utf7data); n++ )
 213     {
 214         const StringConversionData& d = utf7data[n];
 215         DoTestConversion(d.str, d.wcs, conv);
 216     }
 217 }
 218
 219 void UnicodeTestCase::ConversionUTF8()
 220 {
 221     static const StringConversionData utf8data[] =
 222     {
 223         //\u isn't recognized on MSVC 6
 224 #ifdef wxHAVE_U_ESCAPE
 225         { "\xc2\xa3", L"\u00a3" },
 226 #endif
 227         { "\xc2", NULL },
 228     };
 229
 230     wxCSConv conv(_T("utf-8"));
 231     for ( size_t n = 0; n < WXSIZEOF(utf8data); n++ )
 232     {
 233         const StringConversionData& d = utf8data[n];
 234         DoTestConversion(d.str, d.wcs, conv);
 235     }
 236 }
 237
 238 #endif // wxUSE_WCHAR_T
 239