| 1 | /////////////////////////////////////////////////////////////////////////////// |
| 2 | // Name: tests/mbconv/main.cpp |
| 3 | // Purpose: wxMBConv unit test |
| 4 | // Author: Vadim Zeitlin, Mike Wetherell, Vince Harron |
| 5 | // Created: 14.02.04 |
| 6 | // RCS-ID: $Id$ |
| 7 | // Copyright: (c) 2003 TT-Solutions, (c) 2005 Mike Wetherell, Vince Harron |
| 8 | /////////////////////////////////////////////////////////////////////////////// |
| 9 | |
| 10 | // ---------------------------------------------------------------------------- |
| 11 | // headers |
| 12 | // ---------------------------------------------------------------------------- |
| 13 | |
| 14 | #include "testprec.h" |
| 15 | |
| 16 | #ifdef __BORLANDC__ |
| 17 | #pragma hdrstop |
| 18 | #endif |
| 19 | |
| 20 | #ifndef WX_PRECOMP |
| 21 | #include "wx/wx.h" |
| 22 | #endif // WX_PRECOMP |
| 23 | |
| 24 | #include "wx/strconv.h" |
| 25 | #include "wx/string.h" |
| 26 | #include "wx/txtstrm.h" |
| 27 | #include "wx/mstream.h" |
| 28 | |
| 29 | #if defined wxHAVE_TCHAR_SUPPORT && !defined HAVE_WCHAR_H |
| 30 | #define HAVE_WCHAR_H |
| 31 | #endif |
| 32 | |
| 33 | // ---------------------------------------------------------------------------- |
| 34 | // Some wide character constants. "\uXXXX" escapes aren't supported by old |
| 35 | // compilers such as VC++ 5 and g++ 2.95. |
| 36 | // ---------------------------------------------------------------------------- |
| 37 | |
| 38 | wchar_t u41[] = { 0x41, 0 }; |
| 39 | wchar_t u7f[] = { 0x7f, 0 }; |
| 40 | |
| 41 | wchar_t u80[] = { 0x80, 0 }; |
| 42 | wchar_t u391[] = { 0x391, 0 }; |
| 43 | wchar_t u7ff[] = { 0x7ff, 0 }; |
| 44 | |
| 45 | wchar_t u800[] = { 0x800, 0 }; |
| 46 | wchar_t u2620[] = { 0x2620, 0 }; |
| 47 | wchar_t ufffd[] = { 0xfffd, 0 }; |
| 48 | |
| 49 | #if SIZEOF_WCHAR_T == 4 |
| 50 | wchar_t u10000[] = { 0x10000, 0 }; |
| 51 | wchar_t u1000a5[] = { 0x1000a5, 0 }; |
| 52 | wchar_t u10fffd[] = { 0x10fffd, 0 }; |
| 53 | #else |
| 54 | wchar_t u10000[] = { 0xd800, 0xdc00, 0 }; |
| 55 | wchar_t u1000a5[] = { 0xdbc0, 0xdca5, 0 }; |
| 56 | wchar_t u10fffd[] = { 0xdbff, 0xdffd, 0 }; |
| 57 | #endif |
| 58 | |
| 59 | // ---------------------------------------------------------------------------- |
| 60 | // test class |
| 61 | // ---------------------------------------------------------------------------- |
| 62 | |
| 63 | class MBConvTestCase : public CppUnit::TestCase |
| 64 | { |
| 65 | public: |
| 66 | MBConvTestCase() { } |
| 67 | |
| 68 | private: |
| 69 | CPPUNIT_TEST_SUITE( MBConvTestCase ); |
| 70 | CPPUNIT_TEST( UTF32LETests ); |
| 71 | CPPUNIT_TEST( UTF32BETests ); |
| 72 | CPPUNIT_TEST( WC2CP1250 ); |
| 73 | CPPUNIT_TEST( UTF7Tests ); |
| 74 | CPPUNIT_TEST( UTF8Tests ); |
| 75 | CPPUNIT_TEST( UTF16LETests ); |
| 76 | CPPUNIT_TEST( UTF16BETests ); |
| 77 | CPPUNIT_TEST( CP932Tests ); |
| 78 | CPPUNIT_TEST( CP1252Tests ); // depends on UTF8 Decoder functioning correctly |
| 79 | CPPUNIT_TEST( LibcTests ); |
| 80 | CPPUNIT_TEST( IconvTests ); |
| 81 | CPPUNIT_TEST( FontmapTests ); |
| 82 | #ifdef HAVE_WCHAR_H |
| 83 | CPPUNIT_TEST( UTF8_41 ); |
| 84 | CPPUNIT_TEST( UTF8_7f ); |
| 85 | CPPUNIT_TEST( UTF8_80 ); |
| 86 | CPPUNIT_TEST( UTF8_c2_7f ); |
| 87 | CPPUNIT_TEST( UTF8_c2_80 ); |
| 88 | CPPUNIT_TEST( UTF8_ce_91 ); |
| 89 | CPPUNIT_TEST( UTF8_df_bf ); |
| 90 | CPPUNIT_TEST( UTF8_df_c0 ); |
| 91 | CPPUNIT_TEST( UTF8_e0_a0_7f ); |
| 92 | CPPUNIT_TEST( UTF8_e0_a0_80 ); |
| 93 | CPPUNIT_TEST( UTF8_e2_98_a0 ); |
| 94 | CPPUNIT_TEST( UTF8_ef_bf_bd ); |
| 95 | CPPUNIT_TEST( UTF8_ef_bf_c0 ); |
| 96 | CPPUNIT_TEST( UTF8_f0_90_80_7f ); |
| 97 | CPPUNIT_TEST( UTF8_f0_90_80_80 ); |
| 98 | CPPUNIT_TEST( UTF8_f4_8f_bf_bd ); |
| 99 | CPPUNIT_TEST( UTF8PUA_f4_80_82_a5 ); |
| 100 | CPPUNIT_TEST( UTF8Octal_backslash245 ); |
| 101 | #endif // HAVE_WCHAR_H |
| 102 | CPPUNIT_TEST_SUITE_END(); |
| 103 | |
| 104 | void WC2CP1250(); |
| 105 | void UTF7Tests(); |
| 106 | void UTF8Tests(); |
| 107 | void UTF16LETests(); |
| 108 | void UTF16BETests(); |
| 109 | void UTF32LETests(); |
| 110 | void UTF32BETests(); |
| 111 | void CP932Tests(); |
| 112 | void CP1252Tests(); |
| 113 | void LibcTests(); |
| 114 | void FontmapTests(); |
| 115 | void IconvTests(); |
| 116 | |
| 117 | // verifies that the specified multibyte sequence decodes to the specified wchar_t sequence |
| 118 | void TestDecoder( |
| 119 | const wchar_t* wideBuffer, // the same character sequence as multiBuffer, encoded as wchar_t |
| 120 | size_t wideChars, // the number of wide characters at wideBuffer |
| 121 | const char* multiBuffer, // a multibyte encoded character sequence that can be decoded by "converter" |
| 122 | size_t multiBytes, // the byte length of the multibyte character sequence that can be decoded by "converter" |
| 123 | wxMBConv* converter, // the wxMBConv object that can decode multiBuffer into a wide character sequence |
| 124 | int sizeofNull // number of bytes occupied by terminating null in this encoding |
| 125 | ); |
| 126 | |
| 127 | // verifies that the specified wchar_t sequence encodes to the specified multibyte sequence |
| 128 | void TestEncoder( |
| 129 | const wchar_t* wideBuffer, // the same character sequence as multiBuffer, encoded as wchar_t |
| 130 | size_t wideChars, // the number of wide characters at wideBuffer |
| 131 | const char* multiBuffer, // a multibyte encoded character sequence that can be decoded by "converter" |
| 132 | size_t multiBytes, // the byte length of the multibyte character sequence that can be decoded by "converter" |
| 133 | wxMBConv* converter, // the wxMBConv object that can decode multiBuffer into a wide character sequence |
| 134 | int sizeofNull // number of bytes occupied by terminating null in this encoding |
| 135 | ); |
| 136 | |
| 137 | #if wxUSE_UNICODE && wxUSE_STREAMS |
| 138 | // use wxTextInputStream to exercise wxMBConv interface |
| 139 | // (this reveals some bugs in certain wxMBConv subclasses) |
| 140 | void TestStreamDecoder( |
| 141 | const wchar_t* wideBuffer, // the same character sequence as multiBuffer, encoded as wchar_t |
| 142 | size_t wideChars, // the number of wide characters at wideBuffer |
| 143 | const char* multiBuffer, // a multibyte encoded character sequence that can be decoded by "converter" |
| 144 | size_t multiBytes, // the byte length of the multibyte character sequence that can be decoded by "converter" |
| 145 | wxMBConv* converter // the wxMBConv object that can decode multiBuffer into a wide character sequence |
| 146 | ); |
| 147 | |
| 148 | // use wxTextOutputStream to exercise wxMBConv interface |
| 149 | // (this reveals some bugs in certain wxMBConv subclasses) |
| 150 | void TestStreamEncoder( |
| 151 | const wchar_t* wideBuffer, // the same character sequence as multiBuffer, encoded as wchar_t |
| 152 | size_t wideChars, // the number of wide characters at wideBuffer |
| 153 | const char* multiBuffer, // a multibyte encoded character sequence that can be decoded by "converter" |
| 154 | size_t multiBytes, // the byte length of the multibyte character sequence that can be decoded by "converter" |
| 155 | wxMBConv* converter // the wxMBConv object that can decode multiBuffer into a wide character sequence |
| 156 | ); |
| 157 | #endif |
| 158 | |
| 159 | // tests the encoding and decoding capability of an wxMBConv object |
| 160 | // |
| 161 | // decodes the utf-8 bytes into wide characters |
| 162 | // encodes the wide characters to compare against input multiBuffer |
| 163 | // decodes the multiBuffer to compare against wide characters |
| 164 | // decodes the multiBuffer into wide characters |
| 165 | void TestCoder( |
| 166 | const char* multiBuffer, // a multibyte encoded character sequence that can be decoded by "converter" |
| 167 | size_t multiBytes, // the byte length of the multibyte character sequence that can be decoded by "converter" |
| 168 | const char* utf8Buffer, // the same character sequence as multiBuffer, encoded as UTF-8 |
| 169 | size_t utf8Bytes, // the byte length of the UTF-8 encoded character sequence |
| 170 | wxMBConv* converter, // the wxMBConv object that can decode multiBuffer into a wide character sequence |
| 171 | int sizeofNull // the number of bytes occupied by a terminating null in the converter's encoding |
| 172 | ); |
| 173 | |
| 174 | #ifdef HAVE_WCHAR_H |
| 175 | // UTF-8 tests. Test the first, last and one in the middle for sequences |
| 176 | // of each length |
| 177 | void UTF8_41() { UTF8("\x41", u41); } |
| 178 | void UTF8_7f() { UTF8("\x7f", u7f); } |
| 179 | void UTF8_80() { UTF8("\x80", NULL); } |
| 180 | |
| 181 | void UTF8_c2_7f() { UTF8("\xc2\x7f", NULL); } |
| 182 | void UTF8_c2_80() { UTF8("\xc2\x80", u80); } |
| 183 | void UTF8_ce_91() { UTF8("\xce\x91", u391); } |
| 184 | void UTF8_df_bf() { UTF8("\xdf\xbf", u7ff); } |
| 185 | void UTF8_df_c0() { UTF8("\xdf\xc0", NULL); } |
| 186 | |
| 187 | void UTF8_e0_a0_7f() { UTF8("\xe0\xa0\x7f", NULL); } |
| 188 | void UTF8_e0_a0_80() { UTF8("\xe0\xa0\x80", u800); } |
| 189 | void UTF8_e2_98_a0() { UTF8("\xe2\x98\xa0", u2620); } |
| 190 | void UTF8_ef_bf_bd() { UTF8("\xef\xbf\xbd", ufffd); } |
| 191 | void UTF8_ef_bf_c0() { UTF8("\xef\xbf\xc0", NULL); } |
| 192 | |
| 193 | void UTF8_f0_90_80_7f() { UTF8("\xf0\x90\x80\x7f", NULL); } |
| 194 | void UTF8_f0_90_80_80() { UTF8("\xf0\x90\x80\x80", u10000); } |
| 195 | void UTF8_f4_8f_bf_bd() { UTF8("\xf4\x8f\xbf\xbd", u10fffd); } |
| 196 | |
| 197 | // test 'escaping the escape characters' for the two escaping schemes |
| 198 | void UTF8PUA_f4_80_82_a5() { UTF8PUA("\xf4\x80\x82\xa5", u1000a5); } |
| 199 | void UTF8Octal_backslash245() { UTF8Octal("\\245", L"\\245"); } |
| 200 | |
| 201 | // implementation for the utf-8 tests (see comments below) |
| 202 | void UTF8(const char *charSequence, const wchar_t *wideSequence); |
| 203 | void UTF8PUA(const char *charSequence, const wchar_t *wideSequence); |
| 204 | void UTF8Octal(const char *charSequence, const wchar_t *wideSequence); |
| 205 | void UTF8(const char *charSequence, const wchar_t *wideSequence, int option); |
| 206 | #endif // HAVE_WCHAR_H |
| 207 | |
| 208 | DECLARE_NO_COPY_CLASS(MBConvTestCase) |
| 209 | }; |
| 210 | |
| 211 | // register in the unnamed registry so that these tests are run by default |
| 212 | CPPUNIT_TEST_SUITE_REGISTRATION( MBConvTestCase ); |
| 213 | |
| 214 | // also include in it's own registry so that these tests can be run alone |
| 215 | CPPUNIT_TEST_SUITE_NAMED_REGISTRATION( MBConvTestCase, "MBConvTestCase" ); |
| 216 | CPPUNIT_TEST_SUITE_NAMED_REGISTRATION( MBConvTestCase, "MBConv" ); |
| 217 | |
| 218 | void MBConvTestCase::WC2CP1250() |
| 219 | { |
| 220 | static const struct Data |
| 221 | { |
| 222 | const wchar_t *wc; |
| 223 | const char *cp1250; |
| 224 | } data[] = |
| 225 | { |
| 226 | { L"hello", "hello" }, // test that it works in simplest case |
| 227 | { L"\xBD of \xBD is \xBC", NULL }, // this should fail as cp1250 doesn't have 1/2 |
| 228 | }; |
| 229 | |
| 230 | wxCSConv cs1250(wxFONTENCODING_CP1250); |
| 231 | for ( size_t n = 0; n < WXSIZEOF(data); n++ ) |
| 232 | { |
| 233 | const Data& d = data[n]; |
| 234 | if (d.cp1250) |
| 235 | { |
| 236 | CPPUNIT_ASSERT( strcmp(cs1250.cWC2MB(d.wc), d.cp1250) == 0 ); |
| 237 | } |
| 238 | else |
| 239 | { |
| 240 | CPPUNIT_ASSERT( (const char*)cs1250.cWC2MB(d.wc) == NULL ); |
| 241 | } |
| 242 | } |
| 243 | } |
| 244 | |
| 245 | // Print an unsigned character array as a C unsigned character array. |
| 246 | // NB: Please don't remove this function even though it's not used anywhere, |
| 247 | // it's very useful when debugging a failed test. |
| 248 | wxString CByteArrayFormat( const void* data, size_t len, const wxChar* name ) |
| 249 | { |
| 250 | const unsigned char* bytes = (unsigned char*)data; |
| 251 | wxString result; |
| 252 | |
| 253 | result.Printf( _T("const static unsigned char %s[%i] = \n{"), name, (int)len ); |
| 254 | |
| 255 | for ( size_t i = 0; i < len; i++ ) |
| 256 | { |
| 257 | if ( i != 0 ) |
| 258 | { |
| 259 | result.append( _T(",") ); |
| 260 | } |
| 261 | if ((i%16)==0) |
| 262 | { |
| 263 | result.append( _T("\n ") ); |
| 264 | } |
| 265 | wxString byte = wxString::Format( _T("0x%02x"), bytes[i] ); |
| 266 | result.append(byte); |
| 267 | } |
| 268 | result.append( _T("\n};\n") ); |
| 269 | return result; |
| 270 | } |
| 271 | |
| 272 | // The following bytes represent the same string, containing Japanese and English |
| 273 | // characters, encoded in several different formats. |
| 274 | |
| 275 | // encoded by iconv |
| 276 | const static unsigned char welcome_utf7_iconv[84] = |
| 277 | { |
| 278 | 0x57,0x65,0x6c,0x63,0x6f,0x6d,0x65,0x20,0x74,0x6f,0x20,0x6f,0x75,0x72,0x20,0x63, |
| 279 | 0x79,0x62,0x65,0x72,0x20,0x73,0x70,0x61,0x63,0x65,0x20,0x66,0x6f,0x72,0x63,0x65, |
| 280 | 0x2e,0x20,0x20,0x2b,0x4d,0x46,0x6b,0x77,0x55,0x49,0x74,0x6d,0x57,0x39,0x38,0x77, |
| 281 | 0x61,0x35,0x62,0x37,0x69,0x6e,0x45,0x77,0x6b,0x6a,0x42,0x5a,0x4d,0x49,0x73,0x77, |
| 282 | 0x65,0x7a,0x42,0x47,0x4d,0x45,0x77,0x77,0x52,0x44,0x42,0x45,0x4d,0x47,0x63,0x77, |
| 283 | 0x57,0x54,0x41,0x43 |
| 284 | }; |
| 285 | // encoded by wxWindows (iconv can decode this successfully) |
| 286 | const static unsigned char welcome_utf7_wx[109] = |
| 287 | { |
| 288 | 0x57,0x65,0x6c,0x63,0x6f,0x6d,0x65,0x2b,0x41,0x43,0x41,0x2d,0x74,0x6f,0x2b,0x41, |
| 289 | 0x43,0x41,0x2d,0x6f,0x75,0x72,0x2b,0x41,0x43,0x41,0x2d,0x63,0x79,0x62,0x65,0x72, |
| 290 | 0x2b,0x41,0x43,0x41,0x2d,0x73,0x70,0x61,0x63,0x65,0x2b,0x41,0x43,0x41,0x2d,0x66, |
| 291 | 0x6f,0x72,0x63,0x65,0x2e,0x2b,0x41,0x43,0x41,0x41,0x49,0x44,0x42,0x5a,0x4d,0x46, |
| 292 | 0x43,0x4c,0x5a,0x6c,0x76,0x66,0x4d,0x47,0x75,0x57,0x2b,0x34,0x70,0x78,0x4d,0x4a, |
| 293 | 0x49,0x77,0x57,0x54,0x43,0x4c,0x4d,0x48,0x73,0x77,0x52,0x6a,0x42,0x4d,0x4d,0x45, |
| 294 | 0x51,0x77,0x52,0x44,0x42,0x6e,0x4d,0x46,0x6b,0x77,0x41,0x67,0x2d |
| 295 | }; |
| 296 | // encoded by iconv |
| 297 | const static unsigned char welcome_utf8[89] = |
| 298 | { |
| 299 | 0x57,0x65,0x6c,0x63,0x6f,0x6d,0x65,0x20,0x74,0x6f,0x20,0x6f,0x75,0x72,0x20,0x63, |
| 300 | 0x79,0x62,0x65,0x72,0x20,0x73,0x70,0x61,0x63,0x65,0x20,0x66,0x6f,0x72,0x63,0x65, |
| 301 | 0x2e,0x20,0x20,0xe3,0x81,0x99,0xe3,0x81,0x90,0xe8,0xad,0xa6,0xe5,0xaf,0x9f,0xe3, |
| 302 | 0x81,0xab,0xe9,0x9b,0xbb,0xe8,0xa9,0xb1,0xe3,0x82,0x92,0xe3,0x81,0x99,0xe3,0x82, |
| 303 | 0x8b,0xe3,0x81,0xbb,0xe3,0x81,0x86,0xe3,0x81,0x8c,0xe3,0x81,0x84,0xe3,0x81,0x84, |
| 304 | 0xe3,0x81,0xa7,0xe3,0x81,0x99,0xe3,0x80,0x82 |
| 305 | }; |
| 306 | // encoded by iconv |
| 307 | const static unsigned char welcome_utf16le[106] = |
| 308 | { |
| 309 | 0x57,0x00,0x65,0x00,0x6c,0x00,0x63,0x00,0x6f,0x00,0x6d,0x00,0x65,0x00,0x20,0x00, |
| 310 | 0x74,0x00,0x6f,0x00,0x20,0x00,0x6f,0x00,0x75,0x00,0x72,0x00,0x20,0x00,0x63,0x00, |
| 311 | 0x79,0x00,0x62,0x00,0x65,0x00,0x72,0x00,0x20,0x00,0x73,0x00,0x70,0x00,0x61,0x00, |
| 312 | 0x63,0x00,0x65,0x00,0x20,0x00,0x66,0x00,0x6f,0x00,0x72,0x00,0x63,0x00,0x65,0x00, |
| 313 | 0x2e,0x00,0x20,0x00,0x20,0x00,0x59,0x30,0x50,0x30,0x66,0x8b,0xdf,0x5b,0x6b,0x30, |
| 314 | 0xfb,0x96,0x71,0x8a,0x92,0x30,0x59,0x30,0x8b,0x30,0x7b,0x30,0x46,0x30,0x4c,0x30, |
| 315 | 0x44,0x30,0x44,0x30,0x67,0x30,0x59,0x30,0x02,0x30 |
| 316 | }; |
| 317 | // encoded by iconv |
| 318 | const static unsigned char welcome_utf16be[106] = |
| 319 | { |
| 320 | 0x00,0x57,0x00,0x65,0x00,0x6c,0x00,0x63,0x00,0x6f,0x00,0x6d,0x00,0x65,0x00,0x20, |
| 321 | 0x00,0x74,0x00,0x6f,0x00,0x20,0x00,0x6f,0x00,0x75,0x00,0x72,0x00,0x20,0x00,0x63, |
| 322 | 0x00,0x79,0x00,0x62,0x00,0x65,0x00,0x72,0x00,0x20,0x00,0x73,0x00,0x70,0x00,0x61, |
| 323 | 0x00,0x63,0x00,0x65,0x00,0x20,0x00,0x66,0x00,0x6f,0x00,0x72,0x00,0x63,0x00,0x65, |
| 324 | 0x00,0x2e,0x00,0x20,0x00,0x20,0x30,0x59,0x30,0x50,0x8b,0x66,0x5b,0xdf,0x30,0x6b, |
| 325 | 0x96,0xfb,0x8a,0x71,0x30,0x92,0x30,0x59,0x30,0x8b,0x30,0x7b,0x30,0x46,0x30,0x4c, |
| 326 | 0x30,0x44,0x30,0x44,0x30,0x67,0x30,0x59,0x30,0x02 |
| 327 | }; |
| 328 | // encoded by iconv |
| 329 | const static unsigned char welcome_utf32le[212] = |
| 330 | { |
| 331 | 0x57,0x00,0x00,0x00,0x65,0x00,0x00,0x00,0x6c,0x00,0x00,0x00,0x63,0x00,0x00,0x00, |
| 332 | 0x6f,0x00,0x00,0x00,0x6d,0x00,0x00,0x00,0x65,0x00,0x00,0x00,0x20,0x00,0x00,0x00, |
| 333 | 0x74,0x00,0x00,0x00,0x6f,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x6f,0x00,0x00,0x00, |
| 334 | 0x75,0x00,0x00,0x00,0x72,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x63,0x00,0x00,0x00, |
| 335 | 0x79,0x00,0x00,0x00,0x62,0x00,0x00,0x00,0x65,0x00,0x00,0x00,0x72,0x00,0x00,0x00, |
| 336 | 0x20,0x00,0x00,0x00,0x73,0x00,0x00,0x00,0x70,0x00,0x00,0x00,0x61,0x00,0x00,0x00, |
| 337 | 0x63,0x00,0x00,0x00,0x65,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x66,0x00,0x00,0x00, |
| 338 | 0x6f,0x00,0x00,0x00,0x72,0x00,0x00,0x00,0x63,0x00,0x00,0x00,0x65,0x00,0x00,0x00, |
| 339 | 0x2e,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x59,0x30,0x00,0x00, |
| 340 | 0x50,0x30,0x00,0x00,0x66,0x8b,0x00,0x00,0xdf,0x5b,0x00,0x00,0x6b,0x30,0x00,0x00, |
| 341 | 0xfb,0x96,0x00,0x00,0x71,0x8a,0x00,0x00,0x92,0x30,0x00,0x00,0x59,0x30,0x00,0x00, |
| 342 | 0x8b,0x30,0x00,0x00,0x7b,0x30,0x00,0x00,0x46,0x30,0x00,0x00,0x4c,0x30,0x00,0x00, |
| 343 | 0x44,0x30,0x00,0x00,0x44,0x30,0x00,0x00,0x67,0x30,0x00,0x00,0x59,0x30,0x00,0x00, |
| 344 | 0x02,0x30,0x00,0x00 |
| 345 | }; |
| 346 | // encoded by iconv |
| 347 | const static unsigned char welcome_utf32be[212] = |
| 348 | { |
| 349 | 0x00,0x00,0x00,0x57,0x00,0x00,0x00,0x65,0x00,0x00,0x00,0x6c,0x00,0x00,0x00,0x63, |
| 350 | 0x00,0x00,0x00,0x6f,0x00,0x00,0x00,0x6d,0x00,0x00,0x00,0x65,0x00,0x00,0x00,0x20, |
| 351 | 0x00,0x00,0x00,0x74,0x00,0x00,0x00,0x6f,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x6f, |
| 352 | 0x00,0x00,0x00,0x75,0x00,0x00,0x00,0x72,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x63, |
| 353 | 0x00,0x00,0x00,0x79,0x00,0x00,0x00,0x62,0x00,0x00,0x00,0x65,0x00,0x00,0x00,0x72, |
| 354 | 0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x73,0x00,0x00,0x00,0x70,0x00,0x00,0x00,0x61, |
| 355 | 0x00,0x00,0x00,0x63,0x00,0x00,0x00,0x65,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x66, |
| 356 | 0x00,0x00,0x00,0x6f,0x00,0x00,0x00,0x72,0x00,0x00,0x00,0x63,0x00,0x00,0x00,0x65, |
| 357 | 0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x20,0x00,0x00,0x30,0x59, |
| 358 | 0x00,0x00,0x30,0x50,0x00,0x00,0x8b,0x66,0x00,0x00,0x5b,0xdf,0x00,0x00,0x30,0x6b, |
| 359 | 0x00,0x00,0x96,0xfb,0x00,0x00,0x8a,0x71,0x00,0x00,0x30,0x92,0x00,0x00,0x30,0x59, |
| 360 | 0x00,0x00,0x30,0x8b,0x00,0x00,0x30,0x7b,0x00,0x00,0x30,0x46,0x00,0x00,0x30,0x4c, |
| 361 | 0x00,0x00,0x30,0x44,0x00,0x00,0x30,0x44,0x00,0x00,0x30,0x67,0x00,0x00,0x30,0x59, |
| 362 | 0x00,0x00,0x30,0x02 |
| 363 | }; |
| 364 | // encoded by iconv |
| 365 | const static unsigned char welcome_cp932[71] = |
| 366 | { |
| 367 | 0x57,0x65,0x6c,0x63,0x6f,0x6d,0x65,0x20,0x74,0x6f,0x20,0x6f,0x75,0x72,0x20,0x63, |
| 368 | 0x79,0x62,0x65,0x72,0x20,0x73,0x70,0x61,0x63,0x65,0x20,0x66,0x6f,0x72,0x63,0x65, |
| 369 | 0x2e,0x20,0x20,0x82,0xb7,0x82,0xae,0x8c,0x78,0x8e,0x40,0x82,0xc9,0x93,0x64,0x98, |
| 370 | 0x62,0x82,0xf0,0x82,0xb7,0x82,0xe9,0x82,0xd9,0x82,0xa4,0x82,0xaa,0x82,0xa2,0x82, |
| 371 | 0xa2,0x82,0xc5,0x82,0xb7,0x81,0x42 |
| 372 | }; |
| 373 | |
| 374 | #if wxBYTE_ORDER == wxBIG_ENDIAN |
| 375 | #if SIZEOF_WCHAR_T == 2 |
| 376 | #define welcome_wchar_t welcome_utf16be |
| 377 | #elif SIZEOF_WCHAR_T == 4 |
| 378 | #define welcome_wchar_t welcome_utf32be |
| 379 | #endif |
| 380 | #elif wxBYTE_ORDER == wxLITTLE_ENDIAN |
| 381 | #if SIZEOF_WCHAR_T == 2 |
| 382 | #define welcome_wchar_t welcome_utf16le |
| 383 | #elif SIZEOF_WCHAR_T == 4 |
| 384 | #define welcome_wchar_t welcome_utf32le |
| 385 | #endif |
| 386 | #endif |
| 387 | |
| 388 | void MBConvTestCase::UTF7Tests() |
| 389 | { |
| 390 | TestDecoder |
| 391 | ( |
| 392 | (const wchar_t*)welcome_wchar_t, |
| 393 | sizeof(welcome_wchar_t)/sizeof(wchar_t), |
| 394 | (const char*)welcome_utf7_iconv, |
| 395 | sizeof(welcome_utf7_iconv), |
| 396 | &wxConvUTF7, |
| 397 | 1 |
| 398 | ); |
| 399 | TestDecoder |
| 400 | ( |
| 401 | (const wchar_t*)welcome_wchar_t, |
| 402 | sizeof(welcome_wchar_t)/sizeof(wchar_t), |
| 403 | (const char*)welcome_utf7_wx, |
| 404 | sizeof(welcome_utf7_wx), |
| 405 | &wxConvUTF7, |
| 406 | 1 |
| 407 | ); |
| 408 | #if 0 |
| 409 | // wxWidget's UTF-7 encoder generates different byte sequences than iconv's. |
| 410 | // but both seem to be equally legal. |
| 411 | // This test won't work and that's okay. |
| 412 | TestEncoder |
| 413 | ( |
| 414 | (const wchar_t*)welcome_wchar_t, |
| 415 | sizeof(welcome_wchar_t)/sizeof(wchar_t), |
| 416 | (const char*)welcome_utf7_iconv, |
| 417 | sizeof(welcome_utf7_iconv), |
| 418 | &wxConvUTF7, |
| 419 | 1 |
| 420 | ); |
| 421 | #endif |
| 422 | TestEncoder |
| 423 | ( |
| 424 | (const wchar_t*)welcome_wchar_t, |
| 425 | sizeof(welcome_wchar_t)/sizeof(wchar_t), |
| 426 | (const char*)welcome_utf7_wx, |
| 427 | sizeof(welcome_utf7_wx), |
| 428 | &wxConvUTF7, |
| 429 | 1 |
| 430 | ); |
| 431 | } |
| 432 | |
| 433 | void MBConvTestCase::UTF8Tests() |
| 434 | { |
| 435 | TestDecoder |
| 436 | ( |
| 437 | (const wchar_t*)welcome_wchar_t, |
| 438 | sizeof(welcome_wchar_t)/sizeof(wchar_t), |
| 439 | (const char*)welcome_utf8, |
| 440 | sizeof(welcome_utf8), |
| 441 | &wxConvUTF8, |
| 442 | 1 |
| 443 | ); |
| 444 | TestEncoder |
| 445 | ( |
| 446 | (const wchar_t*)welcome_wchar_t, |
| 447 | sizeof(welcome_wchar_t)/sizeof(wchar_t), |
| 448 | (const char*)welcome_utf8, |
| 449 | sizeof(welcome_utf8), |
| 450 | &wxConvUTF8, |
| 451 | 1 |
| 452 | ); |
| 453 | } |
| 454 | |
| 455 | void MBConvTestCase::UTF16LETests() |
| 456 | { |
| 457 | wxMBConvUTF16LE convUTF16LE; |
| 458 | TestDecoder |
| 459 | ( |
| 460 | (const wchar_t*)welcome_wchar_t, |
| 461 | sizeof(welcome_wchar_t)/sizeof(wchar_t), |
| 462 | (const char*)welcome_utf16le, |
| 463 | sizeof(welcome_utf16le), |
| 464 | &convUTF16LE, |
| 465 | 2 |
| 466 | ); |
| 467 | TestEncoder |
| 468 | ( |
| 469 | (const wchar_t*)welcome_wchar_t, |
| 470 | sizeof(welcome_wchar_t)/sizeof(wchar_t), |
| 471 | (const char*)welcome_utf16le, |
| 472 | sizeof(welcome_utf16le), |
| 473 | &convUTF16LE, |
| 474 | 2 |
| 475 | ); |
| 476 | } |
| 477 | |
| 478 | void MBConvTestCase::UTF16BETests() |
| 479 | { |
| 480 | wxMBConvUTF16BE convUTF16BE; |
| 481 | TestDecoder |
| 482 | ( |
| 483 | (const wchar_t*)welcome_wchar_t, |
| 484 | sizeof(welcome_wchar_t)/sizeof(wchar_t), |
| 485 | (const char*)welcome_utf16be, |
| 486 | sizeof(welcome_utf16be), |
| 487 | &convUTF16BE, |
| 488 | 2 |
| 489 | ); |
| 490 | TestEncoder |
| 491 | ( |
| 492 | (const wchar_t*)welcome_wchar_t, |
| 493 | sizeof(welcome_wchar_t)/sizeof(wchar_t), |
| 494 | (const char*)welcome_utf16be, |
| 495 | sizeof(welcome_utf16be), |
| 496 | &convUTF16BE, |
| 497 | 2 |
| 498 | ); |
| 499 | } |
| 500 | |
| 501 | void MBConvTestCase::UTF32LETests() |
| 502 | { |
| 503 | wxMBConvUTF32LE convUTF32LE; |
| 504 | TestDecoder |
| 505 | ( |
| 506 | (const wchar_t*)welcome_wchar_t, |
| 507 | sizeof(welcome_wchar_t)/sizeof(wchar_t), |
| 508 | (const char*)welcome_utf32le, |
| 509 | sizeof(welcome_utf32le), |
| 510 | &convUTF32LE, |
| 511 | 4 |
| 512 | ); |
| 513 | TestEncoder |
| 514 | ( |
| 515 | (const wchar_t*)welcome_wchar_t, |
| 516 | sizeof(welcome_wchar_t)/sizeof(wchar_t), |
| 517 | (const char*)welcome_utf32le, |
| 518 | sizeof(welcome_utf32le), |
| 519 | &convUTF32LE, |
| 520 | 4 |
| 521 | ); |
| 522 | } |
| 523 | |
| 524 | void MBConvTestCase::UTF32BETests() |
| 525 | { |
| 526 | wxMBConvUTF32BE convUTF32BE; |
| 527 | TestDecoder |
| 528 | ( |
| 529 | (const wchar_t*)welcome_wchar_t, |
| 530 | sizeof(welcome_wchar_t)/sizeof(wchar_t), |
| 531 | (const char*)welcome_utf32be, |
| 532 | sizeof(welcome_utf32be), |
| 533 | &convUTF32BE, |
| 534 | 4 |
| 535 | ); |
| 536 | TestEncoder |
| 537 | ( |
| 538 | (const wchar_t*)welcome_wchar_t, |
| 539 | sizeof(welcome_wchar_t)/sizeof(wchar_t), |
| 540 | (const char*)welcome_utf32be, |
| 541 | sizeof(welcome_utf32be), |
| 542 | &convUTF32BE, |
| 543 | 4 |
| 544 | ); |
| 545 | } |
| 546 | |
| 547 | void MBConvTestCase::CP932Tests() |
| 548 | { |
| 549 | wxCSConv convCP932( wxFONTENCODING_CP932 ); |
| 550 | TestDecoder |
| 551 | ( |
| 552 | (const wchar_t*)welcome_wchar_t, |
| 553 | sizeof(welcome_wchar_t)/sizeof(wchar_t), |
| 554 | (const char*)welcome_cp932, |
| 555 | sizeof(welcome_cp932), |
| 556 | &convCP932, |
| 557 | 1 |
| 558 | ); |
| 559 | TestEncoder |
| 560 | ( |
| 561 | (const wchar_t*)welcome_wchar_t, |
| 562 | sizeof(welcome_wchar_t)/sizeof(wchar_t), |
| 563 | (const char*)welcome_cp932, |
| 564 | sizeof(welcome_cp932), |
| 565 | &convCP932, |
| 566 | 1 |
| 567 | ); |
| 568 | } |
| 569 | |
| 570 | // a character sequence encoded as iso8859-1 (iconv) |
| 571 | static const unsigned char iso8859_1[251] = |
| 572 | { |
| 573 | 0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10,0x11,0x12,0x13,0x14, |
| 574 | 0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0x20,0x21,0x22,0x23,0x24, |
| 575 | 0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,0x30,0x31,0x32,0x33,0x34, |
| 576 | 0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,0x40,0x41,0x42,0x43,0x44, |
| 577 | 0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,0x50,0x51,0x52,0x53,0x54, |
| 578 | 0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,0x60,0x61,0x62,0x63,0x64, |
| 579 | 0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,0x70,0x71,0x72,0x73,0x74, |
| 580 | 0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,0x80,0x81,0x82,0x83,0x84, |
| 581 | 0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,0x90,0x91,0x92,0x93,0x94, |
| 582 | 0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,0xa0,0xa1,0xa2,0xa3,0xa4, |
| 583 | 0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2,0xb3,0xb4, |
| 584 | 0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4, |
| 585 | 0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4, |
| 586 | 0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,0xe0,0xe1,0xe2,0xe3,0xe4, |
| 587 | 0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef,0xf0,0xf1,0xf2,0xf3,0xf4, |
| 588 | 0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff |
| 589 | }; |
| 590 | // the above character sequence encoded as UTF-8 (iconv) |
| 591 | static const unsigned char iso8859_1_utf8[379] = |
| 592 | { |
| 593 | 0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10,0x11,0x12,0x13,0x14, |
| 594 | 0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0x20,0x21,0x22,0x23,0x24, |
| 595 | 0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,0x30,0x31,0x32,0x33,0x34, |
| 596 | 0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,0x40,0x41,0x42,0x43,0x44, |
| 597 | 0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,0x50,0x51,0x52,0x53,0x54, |
| 598 | 0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,0x60,0x61,0x62,0x63,0x64, |
| 599 | 0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,0x70,0x71,0x72,0x73,0x74, |
| 600 | 0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,0xc2,0x80,0xc2,0x81,0xc2, |
| 601 | 0x82,0xc2,0x83,0xc2,0x84,0xc2,0x85,0xc2,0x86,0xc2,0x87,0xc2,0x88,0xc2,0x89,0xc2, |
| 602 | 0x8a,0xc2,0x8b,0xc2,0x8c,0xc2,0x8d,0xc2,0x8e,0xc2,0x8f,0xc2,0x90,0xc2,0x91,0xc2, |
| 603 | 0x92,0xc2,0x93,0xc2,0x94,0xc2,0x95,0xc2,0x96,0xc2,0x97,0xc2,0x98,0xc2,0x99,0xc2, |
| 604 | 0x9a,0xc2,0x9b,0xc2,0x9c,0xc2,0x9d,0xc2,0x9e,0xc2,0x9f,0xc2,0xa0,0xc2,0xa1,0xc2, |
| 605 | 0xa2,0xc2,0xa3,0xc2,0xa4,0xc2,0xa5,0xc2,0xa6,0xc2,0xa7,0xc2,0xa8,0xc2,0xa9,0xc2, |
| 606 | 0xaa,0xc2,0xab,0xc2,0xac,0xc2,0xad,0xc2,0xae,0xc2,0xaf,0xc2,0xb0,0xc2,0xb1,0xc2, |
| 607 | 0xb2,0xc2,0xb3,0xc2,0xb4,0xc2,0xb5,0xc2,0xb6,0xc2,0xb7,0xc2,0xb8,0xc2,0xb9,0xc2, |
| 608 | 0xba,0xc2,0xbb,0xc2,0xbc,0xc2,0xbd,0xc2,0xbe,0xc2,0xbf,0xc3,0x80,0xc3,0x81,0xc3, |
| 609 | 0x82,0xc3,0x83,0xc3,0x84,0xc3,0x85,0xc3,0x86,0xc3,0x87,0xc3,0x88,0xc3,0x89,0xc3, |
| 610 | 0x8a,0xc3,0x8b,0xc3,0x8c,0xc3,0x8d,0xc3,0x8e,0xc3,0x8f,0xc3,0x90,0xc3,0x91,0xc3, |
| 611 | 0x92,0xc3,0x93,0xc3,0x94,0xc3,0x95,0xc3,0x96,0xc3,0x97,0xc3,0x98,0xc3,0x99,0xc3, |
| 612 | 0x9a,0xc3,0x9b,0xc3,0x9c,0xc3,0x9d,0xc3,0x9e,0xc3,0x9f,0xc3,0xa0,0xc3,0xa1,0xc3, |
| 613 | 0xa2,0xc3,0xa3,0xc3,0xa4,0xc3,0xa5,0xc3,0xa6,0xc3,0xa7,0xc3,0xa8,0xc3,0xa9,0xc3, |
| 614 | 0xaa,0xc3,0xab,0xc3,0xac,0xc3,0xad,0xc3,0xae,0xc3,0xaf,0xc3,0xb0,0xc3,0xb1,0xc3, |
| 615 | 0xb2,0xc3,0xb3,0xc3,0xb4,0xc3,0xb5,0xc3,0xb6,0xc3,0xb7,0xc3,0xb8,0xc3,0xb9,0xc3, |
| 616 | 0xba,0xc3,0xbb,0xc3,0xbc,0xc3,0xbd,0xc3,0xbe,0xc3,0xbf |
| 617 | }; |
| 618 | |
| 619 | // a character sequence encoded as CP1252 (iconv) |
| 620 | static const unsigned char CP1252[246] = |
| 621 | { |
| 622 | 0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10,0x11,0x12,0x13,0x14, |
| 623 | 0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0x20,0x21,0x22,0x23,0x24, |
| 624 | 0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,0x30,0x31,0x32,0x33,0x34, |
| 625 | 0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,0x40,0x41,0x42,0x43,0x44, |
| 626 | 0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,0x50,0x51,0x52,0x53,0x54, |
| 627 | 0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,0x60,0x61,0x62,0x63,0x64, |
| 628 | 0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,0x70,0x71,0x72,0x73,0x74, |
| 629 | 0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,0xa0,0xa1,0xa2,0xa3,0xa4, |
| 630 | 0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2,0xb3,0xb4, |
| 631 | 0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4, |
| 632 | 0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4, |
| 633 | 0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,0xe0,0xe1,0xe2,0xe3,0xe4, |
| 634 | 0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef,0xf0,0xf1,0xf2,0xf3,0xf4, |
| 635 | 0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff,0x8c,0x9c,0x8a,0x9a,0x9f, |
| 636 | 0x8e,0x9e,0x83,0x88,0x98,0x96,0x97,0x91,0x92,0x82,0x93,0x94,0x84,0x86,0x87,0x95, |
| 637 | 0x85,0x89,0x8b,0x9b,0x80,0x99 |
| 638 | }; |
| 639 | // the above character sequence encoded as UTF-8 (iconv) |
| 640 | static const unsigned char CP1252_utf8[386] = |
| 641 | { |
| 642 | 0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10,0x11,0x12,0x13,0x14, |
| 643 | 0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0x20,0x21,0x22,0x23,0x24, |
| 644 | 0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,0x30,0x31,0x32,0x33,0x34, |
| 645 | 0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,0x40,0x41,0x42,0x43,0x44, |
| 646 | 0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,0x50,0x51,0x52,0x53,0x54, |
| 647 | 0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,0x60,0x61,0x62,0x63,0x64, |
| 648 | 0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,0x70,0x71,0x72,0x73,0x74, |
| 649 | 0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,0xc2,0xa0,0xc2,0xa1,0xc2, |
| 650 | 0xa2,0xc2,0xa3,0xc2,0xa4,0xc2,0xa5,0xc2,0xa6,0xc2,0xa7,0xc2,0xa8,0xc2,0xa9,0xc2, |
| 651 | 0xaa,0xc2,0xab,0xc2,0xac,0xc2,0xad,0xc2,0xae,0xc2,0xaf,0xc2,0xb0,0xc2,0xb1,0xc2, |
| 652 | 0xb2,0xc2,0xb3,0xc2,0xb4,0xc2,0xb5,0xc2,0xb6,0xc2,0xb7,0xc2,0xb8,0xc2,0xb9,0xc2, |
| 653 | 0xba,0xc2,0xbb,0xc2,0xbc,0xc2,0xbd,0xc2,0xbe,0xc2,0xbf,0xc3,0x80,0xc3,0x81,0xc3, |
| 654 | 0x82,0xc3,0x83,0xc3,0x84,0xc3,0x85,0xc3,0x86,0xc3,0x87,0xc3,0x88,0xc3,0x89,0xc3, |
| 655 | 0x8a,0xc3,0x8b,0xc3,0x8c,0xc3,0x8d,0xc3,0x8e,0xc3,0x8f,0xc3,0x90,0xc3,0x91,0xc3, |
| 656 | 0x92,0xc3,0x93,0xc3,0x94,0xc3,0x95,0xc3,0x96,0xc3,0x97,0xc3,0x98,0xc3,0x99,0xc3, |
| 657 | 0x9a,0xc3,0x9b,0xc3,0x9c,0xc3,0x9d,0xc3,0x9e,0xc3,0x9f,0xc3,0xa0,0xc3,0xa1,0xc3, |
| 658 | 0xa2,0xc3,0xa3,0xc3,0xa4,0xc3,0xa5,0xc3,0xa6,0xc3,0xa7,0xc3,0xa8,0xc3,0xa9,0xc3, |
| 659 | 0xaa,0xc3,0xab,0xc3,0xac,0xc3,0xad,0xc3,0xae,0xc3,0xaf,0xc3,0xb0,0xc3,0xb1,0xc3, |
| 660 | 0xb2,0xc3,0xb3,0xc3,0xb4,0xc3,0xb5,0xc3,0xb6,0xc3,0xb7,0xc3,0xb8,0xc3,0xb9,0xc3, |
| 661 | 0xba,0xc3,0xbb,0xc3,0xbc,0xc3,0xbd,0xc3,0xbe,0xc3,0xbf,0xc5,0x92,0xc5,0x93,0xc5, |
| 662 | 0xa0,0xc5,0xa1,0xc5,0xb8,0xc5,0xbd,0xc5,0xbe,0xc6,0x92,0xcb,0x86,0xcb,0x9c,0xe2, |
| 663 | 0x80,0x93,0xe2,0x80,0x94,0xe2,0x80,0x98,0xe2,0x80,0x99,0xe2,0x80,0x9a,0xe2,0x80, |
| 664 | 0x9c,0xe2,0x80,0x9d,0xe2,0x80,0x9e,0xe2,0x80,0xa0,0xe2,0x80,0xa1,0xe2,0x80,0xa2, |
| 665 | 0xe2,0x80,0xa6,0xe2,0x80,0xb0,0xe2,0x80,0xb9,0xe2,0x80,0xba,0xe2,0x82,0xac,0xe2, |
| 666 | 0x84,0xa2 |
| 667 | }; |
| 668 | |
| 669 | // a character sequence encoded as iso8859-5 (iconv) |
| 670 | static const unsigned char iso8859_5[251] = |
| 671 | { |
| 672 | 0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10,0x11,0x12,0x13,0x14, |
| 673 | 0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0x20,0x21,0x22,0x23,0x24, |
| 674 | 0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,0x30,0x31,0x32,0x33,0x34, |
| 675 | 0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,0x40,0x41,0x42,0x43,0x44, |
| 676 | 0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,0x50,0x51,0x52,0x53,0x54, |
| 677 | 0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,0x60,0x61,0x62,0x63,0x64, |
| 678 | 0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,0x70,0x71,0x72,0x73,0x74, |
| 679 | 0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,0x80,0x81,0x82,0x83,0x84, |
| 680 | 0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,0x90,0x91,0x92,0x93,0x94, |
| 681 | 0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,0xa0,0xfd,0xad,0xa1,0xa2, |
| 682 | 0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xae,0xaf,0xb0,0xb1,0xb2,0xb3, |
| 683 | 0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3, |
| 684 | 0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf,0xd0,0xd1,0xd2,0xd3, |
| 685 | 0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,0xe0,0xe1,0xe2,0xe3, |
| 686 | 0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef,0xf1,0xf2,0xf3,0xf4, |
| 687 | 0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfe,0xff,0xf0 |
| 688 | }; |
| 689 | // the above character sequence encoded as UTF-8 (iconv) |
| 690 | static const unsigned char iso8859_5_utf8[380] = |
| 691 | { |
| 692 | 0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10,0x11,0x12,0x13,0x14, |
| 693 | 0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0x20,0x21,0x22,0x23,0x24, |
| 694 | 0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,0x30,0x31,0x32,0x33,0x34, |
| 695 | 0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,0x40,0x41,0x42,0x43,0x44, |
| 696 | 0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,0x50,0x51,0x52,0x53,0x54, |
| 697 | 0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,0x60,0x61,0x62,0x63,0x64, |
| 698 | 0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,0x70,0x71,0x72,0x73,0x74, |
| 699 | 0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,0xc2,0x80,0xc2,0x81,0xc2, |
| 700 | 0x82,0xc2,0x83,0xc2,0x84,0xc2,0x85,0xc2,0x86,0xc2,0x87,0xc2,0x88,0xc2,0x89,0xc2, |
| 701 | 0x8a,0xc2,0x8b,0xc2,0x8c,0xc2,0x8d,0xc2,0x8e,0xc2,0x8f,0xc2,0x90,0xc2,0x91,0xc2, |
| 702 | 0x92,0xc2,0x93,0xc2,0x94,0xc2,0x95,0xc2,0x96,0xc2,0x97,0xc2,0x98,0xc2,0x99,0xc2, |
| 703 | 0x9a,0xc2,0x9b,0xc2,0x9c,0xc2,0x9d,0xc2,0x9e,0xc2,0x9f,0xc2,0xa0,0xc2,0xa7,0xc2, |
| 704 | 0xad,0xd0,0x81,0xd0,0x82,0xd0,0x83,0xd0,0x84,0xd0,0x85,0xd0,0x86,0xd0,0x87,0xd0, |
| 705 | 0x88,0xd0,0x89,0xd0,0x8a,0xd0,0x8b,0xd0,0x8c,0xd0,0x8e,0xd0,0x8f,0xd0,0x90,0xd0, |
| 706 | 0x91,0xd0,0x92,0xd0,0x93,0xd0,0x94,0xd0,0x95,0xd0,0x96,0xd0,0x97,0xd0,0x98,0xd0, |
| 707 | 0x99,0xd0,0x9a,0xd0,0x9b,0xd0,0x9c,0xd0,0x9d,0xd0,0x9e,0xd0,0x9f,0xd0,0xa0,0xd0, |
| 708 | 0xa1,0xd0,0xa2,0xd0,0xa3,0xd0,0xa4,0xd0,0xa5,0xd0,0xa6,0xd0,0xa7,0xd0,0xa8,0xd0, |
| 709 | 0xa9,0xd0,0xaa,0xd0,0xab,0xd0,0xac,0xd0,0xad,0xd0,0xae,0xd0,0xaf,0xd0,0xb0,0xd0, |
| 710 | 0xb1,0xd0,0xb2,0xd0,0xb3,0xd0,0xb4,0xd0,0xb5,0xd0,0xb6,0xd0,0xb7,0xd0,0xb8,0xd0, |
| 711 | 0xb9,0xd0,0xba,0xd0,0xbb,0xd0,0xbc,0xd0,0xbd,0xd0,0xbe,0xd0,0xbf,0xd1,0x80,0xd1, |
| 712 | 0x81,0xd1,0x82,0xd1,0x83,0xd1,0x84,0xd1,0x85,0xd1,0x86,0xd1,0x87,0xd1,0x88,0xd1, |
| 713 | 0x89,0xd1,0x8a,0xd1,0x8b,0xd1,0x8c,0xd1,0x8d,0xd1,0x8e,0xd1,0x8f,0xd1,0x91,0xd1, |
| 714 | 0x92,0xd1,0x93,0xd1,0x94,0xd1,0x95,0xd1,0x96,0xd1,0x97,0xd1,0x98,0xd1,0x99,0xd1, |
| 715 | 0x9a,0xd1,0x9b,0xd1,0x9c,0xd1,0x9e,0xd1,0x9f,0xe2,0x84,0x96 |
| 716 | }; |
| 717 | |
| 718 | // DecodeUTF8 |
| 719 | // decodes the specified *unterminated* UTF-8 byte array |
| 720 | wxWCharBuffer DecodeUTF8( |
| 721 | const void* data, // an unterminated UTF-8 encoded byte array |
| 722 | size_t size // the byte length of data |
| 723 | ) |
| 724 | { |
| 725 | // the decoder requires a null terminated buffer. |
| 726 | // the input data is not null terminated. |
| 727 | // copy to null terminated buffer |
| 728 | |
| 729 | wxCharBuffer nullTerminated( size+1 ); |
| 730 | memcpy( nullTerminated.data(), data, size ); |
| 731 | nullTerminated.data()[size] = 0; |
| 732 | return wxConvUTF8.cMB2WC(nullTerminated.data()); |
| 733 | } |
| 734 | |
| 735 | // tests the encoding and decoding capability of an wxMBConv object |
| 736 | // |
| 737 | // decodes the utf-8 bytes into wide characters |
| 738 | // encodes the wide characters to compare against input multiBuffer |
| 739 | // decodes the multiBuffer to compare against wide characters |
| 740 | // decodes the multiBuffer into wide characters |
| 741 | void MBConvTestCase::TestCoder( |
| 742 | const char* multiBuffer, // a multibyte encoded character sequence that can be decoded by "converter" |
| 743 | size_t multiBytes, // the byte length of the multibyte character sequence that can be decoded by "converter" |
| 744 | const char* utf8Buffer, // the same character sequence as multiBuffer, encoded as UTF-8 |
| 745 | size_t utf8Bytes, // the byte length of the UTF-8 encoded character sequence |
| 746 | wxMBConv* converter, // the wxMBConv object thta can decode multiBuffer into a wide character sequence |
| 747 | int sizeofNull // the number of bytes occupied by a terminating null in the converter's encoding |
| 748 | ) |
| 749 | { |
| 750 | // wide character size and endian-ess varies from platform to platform |
| 751 | // compiler support for wide character literals varies from compiler to compiler |
| 752 | // so we should store the wide character version as UTF-8 and depend on |
| 753 | // the UTF-8 converter's ability to decode it to platform specific wide characters |
| 754 | // this test is invalid if the UTF-8 converter can't decode |
| 755 | wxWCharBuffer wideBuffer((size_t)0); |
| 756 | wideBuffer = DecodeUTF8( utf8Buffer, utf8Bytes ); |
| 757 | size_t wideChars = wxWcslen( wideBuffer.data() ); |
| 758 | |
| 759 | TestDecoder |
| 760 | ( |
| 761 | wideBuffer.data(), |
| 762 | wideChars, |
| 763 | (const char*)multiBuffer, |
| 764 | multiBytes, |
| 765 | converter, |
| 766 | sizeofNull |
| 767 | ); |
| 768 | TestEncoder |
| 769 | ( |
| 770 | wideBuffer.data(), |
| 771 | wideChars, |
| 772 | (const char*)multiBuffer, |
| 773 | multiBytes, |
| 774 | converter, |
| 775 | sizeofNull |
| 776 | ); |
| 777 | } |
| 778 | |
| 779 | |
| 780 | WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_wxwin( const wxChar* name ); |
| 781 | |
| 782 | void MBConvTestCase::FontmapTests() |
| 783 | { |
| 784 | #ifdef wxUSE_FONTMAP |
| 785 | wxMBConv* converter = new_wxMBConv_wxwin( _T("CP1252") ); |
| 786 | if ( !converter ) |
| 787 | { |
| 788 | return; |
| 789 | } |
| 790 | TestCoder( |
| 791 | (const char*)CP1252, |
| 792 | sizeof(CP1252), |
| 793 | (const char*)CP1252_utf8, |
| 794 | sizeof(CP1252_utf8), |
| 795 | converter, |
| 796 | 1 |
| 797 | ); |
| 798 | delete converter; |
| 799 | #endif |
| 800 | } |
| 801 | |
| 802 | |
| 803 | WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const wxChar* name ); |
| 804 | |
| 805 | void MBConvTestCase::IconvTests() |
| 806 | { |
| 807 | #ifdef HAVE_ICONV |
| 808 | wxMBConv* converter = new_wxMBConv_iconv( _T("CP932") ); |
| 809 | if ( !converter ) |
| 810 | { |
| 811 | return; |
| 812 | } |
| 813 | TestCoder( |
| 814 | (const char*)welcome_cp932, |
| 815 | sizeof(welcome_cp932), |
| 816 | (const char*)welcome_utf8, |
| 817 | sizeof(welcome_utf8), |
| 818 | converter, |
| 819 | 1 |
| 820 | ); |
| 821 | delete converter; |
| 822 | #endif |
| 823 | } |
| 824 | |
| 825 | void MBConvTestCase::CP1252Tests() |
| 826 | { |
| 827 | wxCSConv convCP1252( wxFONTENCODING_CP1252 ); |
| 828 | TestCoder( |
| 829 | (const char*)CP1252, |
| 830 | sizeof(CP1252), |
| 831 | (const char*)CP1252_utf8, |
| 832 | sizeof(CP1252_utf8), |
| 833 | &convCP1252, |
| 834 | 1 |
| 835 | ); |
| 836 | } |
| 837 | |
| 838 | void MBConvTestCase::LibcTests() |
| 839 | { |
| 840 | // There isn't a locale that all systems support (except "C"), so leave |
| 841 | // this one disabled for non-Windows systems for the moment, until |
| 842 | // a solution can be found. |
| 843 | #ifdef __WXMSW__ |
| 844 | |
| 845 | #ifdef __WXMSW__ |
| 846 | setlocale( LC_ALL, "English_United States.1252" ); |
| 847 | const unsigned char* systemMB = CP1252; |
| 848 | size_t systemMB_size = sizeof(CP1252); |
| 849 | const unsigned char* systemMB_utf8 = CP1252_utf8; |
| 850 | size_t systemMB_utf8_size = sizeof(CP1252_utf8); |
| 851 | #else |
| 852 | setlocale( LC_ALL, "en_US.iso8859-1" ); |
| 853 | const unsigned char* systemMB = iso8859_1; |
| 854 | size_t systemMB_size = sizeof(iso8859_1); |
| 855 | const unsigned char* systemMB_utf8 = iso8859_1_utf8; |
| 856 | size_t systemMB_utf8_size = sizeof(iso8859_1_utf8); |
| 857 | #endif |
| 858 | wxMBConvLibc convLibc; |
| 859 | TestCoder( |
| 860 | (const char*)systemMB, |
| 861 | systemMB_size, |
| 862 | (const char*)systemMB_utf8, |
| 863 | systemMB_utf8_size, |
| 864 | &convLibc, |
| 865 | 1 |
| 866 | ); |
| 867 | |
| 868 | #endif // __WXMSW__ |
| 869 | } |
| 870 | |
| 871 | // verifies that the specified mb sequences decode to the specified wc sequence |
| 872 | void MBConvTestCase::TestDecoder( |
| 873 | const wchar_t* wideBuffer, // the same character sequence as multiBuffer, encoded as wchar_t |
| 874 | size_t wideChars, // the number of wide characters at wideBuffer |
| 875 | const char* multiBuffer, // a multibyte encoded character sequence that can be decoded by "converter" |
| 876 | size_t multiBytes, // the byte length of the multibyte character sequence that can be decoded by "converter" |
| 877 | wxMBConv* converter, // the wxMBConv object that can decode multiBuffer into a wide character sequence |
| 878 | int sizeofNull // number of bytes occupied by terminating null in this encoding |
| 879 | ) |
| 880 | { |
| 881 | const unsigned UNINITIALIZED = 0xcd; |
| 882 | |
| 883 | // copy the input bytes into a null terminated buffer |
| 884 | wxCharBuffer inputCopy( multiBytes+sizeofNull ); |
| 885 | memcpy( inputCopy.data(), multiBuffer, multiBytes ); |
| 886 | memset( &inputCopy.data()[multiBytes], 0, sizeofNull ); |
| 887 | |
| 888 | // calculate the output size |
| 889 | size_t outputWritten = converter->MB2WC |
| 890 | ( |
| 891 | 0, |
| 892 | (const char*)inputCopy.data(), |
| 893 | 0 |
| 894 | ); |
| 895 | // make sure the correct output length was calculated |
| 896 | CPPUNIT_ASSERT( outputWritten == wideChars ); |
| 897 | |
| 898 | // convert the string |
| 899 | size_t guardChars = 8; // to make sure we're not overrunning the output buffer |
| 900 | size_t nullCharacters = 1; |
| 901 | size_t outputBufferChars = outputWritten + nullCharacters + guardChars; |
| 902 | wxWCharBuffer outputBuffer(outputBufferChars); |
| 903 | memset( outputBuffer.data(), UNINITIALIZED, outputBufferChars*sizeof(wchar_t) ); |
| 904 | |
| 905 | outputWritten = converter->MB2WC |
| 906 | ( |
| 907 | outputBuffer.data(), |
| 908 | (const char*)inputCopy.data(), |
| 909 | outputBufferChars |
| 910 | ); |
| 911 | // make sure the correct number of characters were outputs |
| 912 | CPPUNIT_ASSERT( outputWritten == wideChars ); |
| 913 | |
| 914 | // make sure the characters generated are correct |
| 915 | CPPUNIT_ASSERT( 0 == memcmp( outputBuffer, wideBuffer, wideChars*sizeof(wchar_t) ) ); |
| 916 | |
| 917 | // the output buffer should be null terminated |
| 918 | CPPUNIT_ASSERT( outputBuffer[outputWritten] == 0 ); |
| 919 | |
| 920 | // make sure the rest of the output buffer is untouched |
| 921 | for ( size_t i = (wideChars+1)*sizeof(wchar_t); i < (outputBufferChars*sizeof(wchar_t)); i++ ) |
| 922 | { |
| 923 | CPPUNIT_ASSERT( ((unsigned char*)outputBuffer.data())[i] == UNINITIALIZED ); |
| 924 | } |
| 925 | |
| 926 | #if wxUSE_UNICODE && wxUSE_STREAMS |
| 927 | TestStreamDecoder( wideBuffer, wideChars, multiBuffer, multiBytes, converter ); |
| 928 | #endif |
| 929 | } |
| 930 | |
| 931 | // verifies that the specified wc sequences encodes to the specified mb sequence |
| 932 | void MBConvTestCase::TestEncoder( |
| 933 | const wchar_t* wideBuffer, // the same character sequence as multiBuffer, encoded as wchar_t |
| 934 | size_t wideChars, // the number of wide characters at wideBuffer |
| 935 | const char* multiBuffer, // a multibyte encoded character sequence that can be decoded by "converter" |
| 936 | size_t multiBytes, // the byte length of the multibyte character sequence that can be decoded by "converter" |
| 937 | wxMBConv* converter, // the wxMBConv object that can decode multiBuffer into a wide character sequence |
| 938 | int sizeofNull // number of bytes occupied by terminating null in this encoding |
| 939 | ) |
| 940 | { |
| 941 | const unsigned UNINITIALIZED = 0xcd; |
| 942 | |
| 943 | // copy the input bytes into a null terminated buffer |
| 944 | wxWCharBuffer inputCopy( wideChars + 1 ); |
| 945 | memcpy( inputCopy.data(), wideBuffer, (wideChars*sizeof(wchar_t)) ); |
| 946 | inputCopy.data()[wideChars] = 0; |
| 947 | |
| 948 | // calculate the output size |
| 949 | size_t outputWritten = converter->WC2MB |
| 950 | ( |
| 951 | 0, |
| 952 | (const wchar_t*)inputCopy.data(), |
| 953 | 0 |
| 954 | ); |
| 955 | // make sure the correct output length was calculated |
| 956 | CPPUNIT_ASSERT( outputWritten == multiBytes ); |
| 957 | |
| 958 | // convert the string |
| 959 | size_t guardBytes = 8; // to make sure we're not overrunning the output buffer |
| 960 | size_t outputBufferSize = outputWritten + sizeofNull + guardBytes; |
| 961 | wxCharBuffer outputBuffer(outputBufferSize); |
| 962 | memset( outputBuffer.data(), UNINITIALIZED, outputBufferSize ); |
| 963 | |
| 964 | outputWritten = converter->WC2MB |
| 965 | ( |
| 966 | outputBuffer.data(), |
| 967 | (const wchar_t*)inputCopy.data(), |
| 968 | outputBufferSize |
| 969 | ); |
| 970 | |
| 971 | // make sure the correct number of characters were output |
| 972 | CPPUNIT_ASSERT( outputWritten == multiBytes ); |
| 973 | |
| 974 | // make sure the characters generated are correct |
| 975 | CPPUNIT_ASSERT( 0 == memcmp( outputBuffer, multiBuffer, multiBytes ) ); |
| 976 | |
| 977 | size_t i; |
| 978 | |
| 979 | // the output buffer should be null terminated |
| 980 | for ( i = multiBytes; i < multiBytes + sizeofNull; i++ ) |
| 981 | { |
| 982 | CPPUNIT_ASSERT( ((unsigned char*)outputBuffer.data())[i] == 0 ); |
| 983 | } |
| 984 | |
| 985 | // make sure the rest of the output buffer is untouched |
| 986 | for ( i = multiBytes + sizeofNull; i < outputBufferSize; i++ ) |
| 987 | { |
| 988 | CPPUNIT_ASSERT( ((unsigned char*)outputBuffer.data())[i] == UNINITIALIZED ); |
| 989 | } |
| 990 | |
| 991 | #if wxUSE_UNICODE && wxUSE_STREAMS |
| 992 | TestStreamEncoder( wideBuffer, wideChars, multiBuffer, multiBytes, converter ); |
| 993 | #endif |
| 994 | } |
| 995 | |
| 996 | #if wxUSE_UNICODE && wxUSE_STREAMS |
| 997 | // use wxTextInputStream to exercise wxMBConv interface |
| 998 | // (this reveals some bugs in certain wxMBConv subclasses) |
| 999 | void MBConvTestCase::TestStreamDecoder( |
| 1000 | const wchar_t* wideBuffer, // the same character sequence as multiBuffer, encoded as wchar_t |
| 1001 | size_t wideChars, // the number of wide characters at wideBuffer |
| 1002 | const char* multiBuffer, // a multibyte encoded character sequence that can be decoded by "converter" |
| 1003 | size_t multiBytes, // the byte length of the multibyte character sequence that can be decoded by "converter" |
| 1004 | wxMBConv* converter // the wxMBConv object that can decode multiBuffer into a wide character sequence |
| 1005 | ) |
| 1006 | { |
| 1007 | // this isn't meant to test wxMemoryInputStream or wxTextInputStream |
| 1008 | // it's meant to test the way wxTextInputStream uses wxMBConv |
| 1009 | // (which has exposed some problems with wxMBConv) |
| 1010 | wxMemoryInputStream memoryInputStream( multiBuffer, multiBytes ); |
| 1011 | wxTextInputStream textInputStream( memoryInputStream, wxT(""), *converter ); |
| 1012 | for ( size_t i = 0; i < wideChars; i++ ) |
| 1013 | { |
| 1014 | wxChar wc = textInputStream.GetChar(); |
| 1015 | CPPUNIT_ASSERT( wc == wideBuffer[i] ); |
| 1016 | } |
| 1017 | CPPUNIT_ASSERT( 0 == textInputStream.GetChar() ); |
| 1018 | CPPUNIT_ASSERT( memoryInputStream.Eof() ); |
| 1019 | } |
| 1020 | #endif |
| 1021 | |
| 1022 | #if wxUSE_UNICODE && wxUSE_STREAMS |
| 1023 | // use wxTextInputStream to exercise wxMBConv interface |
| 1024 | // (this reveals some bugs in certain wxMBConv subclasses) |
| 1025 | void MBConvTestCase::TestStreamEncoder( |
| 1026 | const wchar_t* wideBuffer, // the same character sequence as multiBuffer, encoded as wchar_t |
| 1027 | size_t wideChars, // the number of wide characters at wideBuffer |
| 1028 | const char* multiBuffer, // a multibyte encoded character sequence that can be decoded by "converter" |
| 1029 | size_t multiBytes, // the byte length of the multibyte character sequence that can be decoded by "converter" |
| 1030 | wxMBConv* converter // the wxMBConv object that can decode multiBuffer into a wide character sequence |
| 1031 | ) |
| 1032 | { |
| 1033 | // this isn't meant to test wxMemoryOutputStream or wxTextOutputStream |
| 1034 | // it's meant to test the way wxTextOutputStream uses wxMBConv |
| 1035 | // (which has exposed some problems with wxMBConv) |
| 1036 | wxMemoryOutputStream memoryOutputStream; |
| 1037 | // wxEOL_UNIX will pass \n \r unchanged |
| 1038 | wxTextOutputStream textOutputStream( memoryOutputStream, wxEOL_UNIX, *converter ); |
| 1039 | for ( size_t i = 0; i < wideChars; i++ ) |
| 1040 | { |
| 1041 | textOutputStream.PutChar( wideBuffer[i] ); |
| 1042 | } |
| 1043 | CPPUNIT_ASSERT_EQUAL( (wxFileOffset)multiBytes, memoryOutputStream.TellO() ); |
| 1044 | wxCharBuffer copy( memoryOutputStream.TellO() ); |
| 1045 | memoryOutputStream.CopyTo( copy.data(), memoryOutputStream.TellO()); |
| 1046 | CPPUNIT_ASSERT_EQUAL( 0, memcmp( copy.data(), multiBuffer, multiBytes ) ); |
| 1047 | } |
| 1048 | #endif |
| 1049 | |
| 1050 | |
| 1051 | // ---------------------------------------------------------------------------- |
| 1052 | // UTF-8 tests |
| 1053 | // ---------------------------------------------------------------------------- |
| 1054 | |
| 1055 | #ifdef HAVE_WCHAR_H |
| 1056 | |
| 1057 | // Check that 'charSequence' translates to 'wideSequence' and back. |
| 1058 | // Invalid sequences can be tested by giving NULL for 'wideSequence'. Even |
| 1059 | // invalid sequences should roundtrip when an option is given and this is |
| 1060 | // checked. |
| 1061 | // |
| 1062 | void MBConvTestCase::UTF8(const char *charSequence, |
| 1063 | const wchar_t *wideSequence) |
| 1064 | { |
| 1065 | UTF8(charSequence, wideSequence, wxMBConvUTF8::MAP_INVALID_UTF8_NOT); |
| 1066 | UTF8(charSequence, wideSequence, wxMBConvUTF8::MAP_INVALID_UTF8_TO_PUA); |
| 1067 | UTF8(charSequence, wideSequence, wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL); |
| 1068 | } |
| 1069 | |
| 1070 | // Use this alternative when 'charSequence' contains a PUA character. Such |
| 1071 | // sequences should still roundtrip ok, and this is checked. |
| 1072 | // |
| 1073 | void MBConvTestCase::UTF8PUA(const char *charSequence, |
| 1074 | const wchar_t *wideSequence) |
| 1075 | { |
| 1076 | UTF8(charSequence, wideSequence, wxMBConvUTF8::MAP_INVALID_UTF8_NOT); |
| 1077 | UTF8(charSequence, NULL, wxMBConvUTF8::MAP_INVALID_UTF8_TO_PUA); |
| 1078 | UTF8(charSequence, wideSequence, wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL); |
| 1079 | } |
| 1080 | |
| 1081 | // Use this alternative when 'charSequence' contains an octal escape sequence. |
| 1082 | // Such sequences should still roundtrip ok, and this is checked. |
| 1083 | // |
| 1084 | void MBConvTestCase::UTF8Octal(const char *charSequence, |
| 1085 | const wchar_t *wideSequence) |
| 1086 | { |
| 1087 | UTF8(charSequence, wideSequence, wxMBConvUTF8::MAP_INVALID_UTF8_NOT); |
| 1088 | UTF8(charSequence, wideSequence, wxMBConvUTF8::MAP_INVALID_UTF8_TO_PUA); |
| 1089 | UTF8(charSequence, NULL, wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL); |
| 1090 | } |
| 1091 | |
| 1092 | // in case wcscpy is missing |
| 1093 | // |
| 1094 | static wchar_t *wx_wcscpy(wchar_t *dest, const wchar_t *src) |
| 1095 | { |
| 1096 | wchar_t *d = dest; |
| 1097 | while ((*d++ = *src++) != 0) |
| 1098 | ; |
| 1099 | return dest; |
| 1100 | } |
| 1101 | |
| 1102 | // in case wcscat is missing |
| 1103 | // |
| 1104 | static wchar_t *wx_wcscat(wchar_t *dest, const wchar_t *src) |
| 1105 | { |
| 1106 | wchar_t *d = dest; |
| 1107 | while (*d) |
| 1108 | d++; |
| 1109 | while ((*d++ = *src++) != 0) |
| 1110 | ; |
| 1111 | return dest; |
| 1112 | } |
| 1113 | |
| 1114 | // in case wcscmp is missing |
| 1115 | // |
| 1116 | static int wx_wcscmp(const wchar_t *s1, const wchar_t *s2) |
| 1117 | { |
| 1118 | while (*s1 == *s2 && *s1 != 0) |
| 1119 | { |
| 1120 | s1++; |
| 1121 | s2++; |
| 1122 | } |
| 1123 | return *s1 - *s2; |
| 1124 | } |
| 1125 | |
| 1126 | // in case wcslen is missing |
| 1127 | // |
| 1128 | static size_t wx_wcslen(const wchar_t *s) |
| 1129 | { |
| 1130 | const wchar_t *t = s; |
| 1131 | while (*t != 0) |
| 1132 | t++; |
| 1133 | return t - s; |
| 1134 | } |
| 1135 | |
| 1136 | // include the option in the error messages so it's possible to see which |
| 1137 | // test failed |
| 1138 | #define UTF8ASSERT(expr) CPPUNIT_ASSERT_MESSAGE(#expr + errmsg, expr) |
| 1139 | |
| 1140 | // The test implementation |
| 1141 | // |
| 1142 | void MBConvTestCase::UTF8(const char *charSequence, |
| 1143 | const wchar_t *wideSequence, |
| 1144 | int option) |
| 1145 | { |
| 1146 | const size_t BUFSIZE = 128; |
| 1147 | wxASSERT(strlen(charSequence) * 3 + 10 < BUFSIZE); |
| 1148 | char bytes[BUFSIZE]; |
| 1149 | |
| 1150 | // include the option in the error messages so it's possible to see |
| 1151 | // which test failed |
| 1152 | sprintf(bytes, " (with option == %d)", option); |
| 1153 | std::string errmsg(bytes); |
| 1154 | |
| 1155 | // put the charSequence at the start, middle and end of a string |
| 1156 | strcpy(bytes, charSequence); |
| 1157 | strcat(bytes, "ABC"); |
| 1158 | strcat(bytes, charSequence); |
| 1159 | strcat(bytes, "XYZ"); |
| 1160 | strcat(bytes, charSequence); |
| 1161 | |
| 1162 | // translate it into wide characters |
| 1163 | wxMBConvUTF8 utf8(option); |
| 1164 | wchar_t widechars[BUFSIZE]; |
| 1165 | size_t lenResult = utf8.MB2WC(NULL, bytes, 0); |
| 1166 | size_t result = utf8.MB2WC(widechars, bytes, BUFSIZE); |
| 1167 | UTF8ASSERT(result == lenResult); |
| 1168 | |
| 1169 | // check we got the expected result |
| 1170 | if (wideSequence) { |
| 1171 | UTF8ASSERT(result != (size_t)-1); |
| 1172 | wxASSERT(result < BUFSIZE); |
| 1173 | |
| 1174 | wchar_t expected[BUFSIZE]; |
| 1175 | wx_wcscpy(expected, wideSequence); |
| 1176 | wx_wcscat(expected, L"ABC"); |
| 1177 | wx_wcscat(expected, wideSequence); |
| 1178 | wx_wcscat(expected, L"XYZ"); |
| 1179 | wx_wcscat(expected, wideSequence); |
| 1180 | |
| 1181 | UTF8ASSERT(wx_wcscmp(widechars, expected) == 0); |
| 1182 | UTF8ASSERT(wx_wcslen(widechars) == result); |
| 1183 | } |
| 1184 | else { |
| 1185 | // If 'wideSequence' is NULL, then the result is expected to be |
| 1186 | // invalid. Normally that is as far as we can go, but if there is an |
| 1187 | // option then the conversion should succeed anyway, and it should be |
| 1188 | // possible to translate back to the original |
| 1189 | if (!option) { |
| 1190 | UTF8ASSERT(result == (size_t)-1); |
| 1191 | return; |
| 1192 | } |
| 1193 | else { |
| 1194 | UTF8ASSERT(result != (size_t)-1); |
| 1195 | } |
| 1196 | } |
| 1197 | |
| 1198 | // translate it back and check we get the original |
| 1199 | char bytesAgain[BUFSIZE]; |
| 1200 | size_t lenResultAgain = utf8.WC2MB(NULL, widechars, 0); |
| 1201 | size_t resultAgain = utf8.WC2MB(bytesAgain, widechars, BUFSIZE); |
| 1202 | UTF8ASSERT(resultAgain == lenResultAgain); |
| 1203 | UTF8ASSERT(resultAgain != (size_t)-1); |
| 1204 | wxASSERT(resultAgain < BUFSIZE); |
| 1205 | |
| 1206 | UTF8ASSERT(strcmp(bytes, bytesAgain) == 0); |
| 1207 | UTF8ASSERT(strlen(bytesAgain) == resultAgain); |
| 1208 | } |
| 1209 | |
| 1210 | #endif // HAVE_WCHAR_H |