]> git.saurik.com Git - wxWidgets.git/blame - tests/mbconv/mbconvtest.cpp
changed to DOS format; added tif_extension.c
[wxWidgets.git] / tests / mbconv / mbconvtest.cpp
CommitLineData
2cc07181
VZ
1///////////////////////////////////////////////////////////////////////////////
2// Name: tests/mbconv/main.cpp
3// Purpose: wxMBConv unit test
726c8204 4// Author: Vadim Zeitlin, Mike Wetherell
2cc07181
VZ
5// Created: 14.02.04
6// RCS-ID: $Id$
726c8204 7// Copyright: (c) 2003 TT-Solutions, (c) 2005 Mike Wetherell
2cc07181
VZ
8///////////////////////////////////////////////////////////////////////////////
9
10// ----------------------------------------------------------------------------
11// headers
12// ----------------------------------------------------------------------------
13
8899b155 14#include "testprec.h"
20f46e8d
VS
15
16#ifdef __BORLANDC__
17 #pragma hdrstop
18#endif
19
20#ifndef WX_PRECOMP
21 #include "wx/wx.h"
22#endif // WX_PRECOMP
23
2cc07181
VZ
24#include "wx/strconv.h"
25#include "wx/string.h"
26
726c8204
MW
27#if defined wxHAVE_TCHAR_SUPPORT && !defined HAVE_WCHAR_H
28 #define HAVE_WCHAR_H
29#endif
30
31// ----------------------------------------------------------------------------
32// Some wide character constants. "\uXXXX" escapes aren't supported by old
33// compilers such as VC++ 5 and g++ 2.95.
34// ----------------------------------------------------------------------------
35
36wchar_t u41[] = { 0x41, 0 };
37wchar_t u7f[] = { 0x7f, 0 };
38
39wchar_t u80[] = { 0x80, 0 };
40wchar_t u391[] = { 0x391, 0 };
41wchar_t u7ff[] = { 0x7ff, 0 };
42
43wchar_t u800[] = { 0x800, 0 };
44wchar_t u2620[] = { 0x2620, 0 };
45wchar_t ufffd[] = { 0xfffd, 0 };
46
47#if SIZEOF_WCHAR_T == 4
48wchar_t u10000[] = { 0x10000, 0 };
49wchar_t u1000a5[] = { 0x1000a5, 0 };
50wchar_t u10fffd[] = { 0x10fffd, 0 };
51#else
52wchar_t u10000[] = { 0xd800, 0xdc00, 0 };
53wchar_t u1000a5[] = { 0xdbc0, 0xdca5, 0 };
54wchar_t u10fffd[] = { 0xdbff, 0xdffd, 0 };
55#endif
56
2cc07181
VZ
57// ----------------------------------------------------------------------------
58// test class
59// ----------------------------------------------------------------------------
60
61class MBConvTestCase : public CppUnit::TestCase
62{
63public:
64 MBConvTestCase() { }
65
66private:
67 CPPUNIT_TEST_SUITE( MBConvTestCase );
68 CPPUNIT_TEST( WC2CP1250 );
726c8204
MW
69#ifdef HAVE_WCHAR_H
70 CPPUNIT_TEST( UTF8_41 );
71 CPPUNIT_TEST( UTF8_7f );
72 CPPUNIT_TEST( UTF8_80 );
73 CPPUNIT_TEST( UTF8_c2_7f );
74 CPPUNIT_TEST( UTF8_c2_80 );
75 CPPUNIT_TEST( UTF8_ce_91 );
76 CPPUNIT_TEST( UTF8_df_bf );
77 CPPUNIT_TEST( UTF8_df_c0 );
78 CPPUNIT_TEST( UTF8_e0_a0_7f );
79 CPPUNIT_TEST( UTF8_e0_a0_80 );
80 CPPUNIT_TEST( UTF8_e2_98_a0 );
81 CPPUNIT_TEST( UTF8_ef_bf_bd );
82 CPPUNIT_TEST( UTF8_ef_bf_c0 );
83 CPPUNIT_TEST( UTF8_f0_90_80_7f );
84 CPPUNIT_TEST( UTF8_f0_90_80_80 );
85 CPPUNIT_TEST( UTF8_f4_8f_bf_bd );
86 CPPUNIT_TEST( UTF8PUA_f4_80_82_a5 );
87 CPPUNIT_TEST( UTF8Octal_backslash245 );
88#endif // HAVE_WCHAR_H
2cc07181
VZ
89 CPPUNIT_TEST_SUITE_END();
90
91 void WC2CP1250();
92
726c8204
MW
93#ifdef HAVE_WCHAR_H
94 // UTF-8 tests. Test the first, last and one in the middle for sequences
95 // of each length
96 void UTF8_41() { UTF8("\x41", u41); }
97 void UTF8_7f() { UTF8("\x7f", u7f); }
98 void UTF8_80() { UTF8("\x80", NULL); }
99
100 void UTF8_c2_7f() { UTF8("\xc2\x7f", NULL); }
101 void UTF8_c2_80() { UTF8("\xc2\x80", u80); }
102 void UTF8_ce_91() { UTF8("\xce\x91", u391); }
103 void UTF8_df_bf() { UTF8("\xdf\xbf", u7ff); }
104 void UTF8_df_c0() { UTF8("\xdf\xc0", NULL); }
105
106 void UTF8_e0_a0_7f() { UTF8("\xe0\xa0\x7f", NULL); }
107 void UTF8_e0_a0_80() { UTF8("\xe0\xa0\x80", u800); }
108 void UTF8_e2_98_a0() { UTF8("\xe2\x98\xa0", u2620); }
109 void UTF8_ef_bf_bd() { UTF8("\xef\xbf\xbd", ufffd); }
110 void UTF8_ef_bf_c0() { UTF8("\xef\xbf\xc0", NULL); }
111
112 void UTF8_f0_90_80_7f() { UTF8("\xf0\x90\x80\x7f", NULL); }
113 void UTF8_f0_90_80_80() { UTF8("\xf0\x90\x80\x80", u10000); }
114 void UTF8_f4_8f_bf_bd() { UTF8("\xf4\x8f\xbf\xbd", u10fffd); }
115
116 // test 'escaping the escape characters' for the two escaping schemes
117 void UTF8PUA_f4_80_82_a5() { UTF8PUA("\xf4\x80\x82\xa5", u1000a5); }
118 void UTF8Octal_backslash245() { UTF8Octal("\\245", L"\\245"); }
119
120 // implementation for the utf-8 tests (see comments below)
121 void UTF8(const char *charSequence, const wchar_t *wideSequence);
122 void UTF8PUA(const char *charSequence, const wchar_t *wideSequence);
123 void UTF8Octal(const char *charSequence, const wchar_t *wideSequence);
124 void UTF8(const char *charSequence, const wchar_t *wideSequence, int option);
125#endif // HAVE_WCHAR_H
126
20f46e8d 127 DECLARE_NO_COPY_CLASS(MBConvTestCase)
2cc07181
VZ
128};
129
98eae466
VS
130// register in the unnamed registry so that these tests are run by default
131CPPUNIT_TEST_SUITE_REGISTRATION( MBConvTestCase );
132
133// also include in it's own registry so that these tests can be run alone
2cc07181
VZ
134CPPUNIT_TEST_SUITE_NAMED_REGISTRATION( MBConvTestCase, "MBConvTestCase" );
135
136void MBConvTestCase::WC2CP1250()
137{
138 static const struct Data
139 {
140 const wchar_t *wc;
141 const char *cp1250;
142 } data[] =
143 {
144 { L"hello", "hello" }, // test that it works in simplest case
a430a60f 145 { L"\xBD of \xBD is \xBC", NULL }, // this should fail as cp1250 doesn't have 1/2
2cc07181
VZ
146 };
147
148 wxCSConv cs1250(wxFONTENCODING_CP1250);
149 for ( size_t n = 0; n < WXSIZEOF(data); n++ )
150 {
151 const Data& d = data[n];
98eae466
VS
152 if (d.cp1250)
153 {
154 CPPUNIT_ASSERT( strcmp(cs1250.cWC2MB(d.wc), d.cp1250) == 0 );
155 }
156 else
157 {
0f216992 158 CPPUNIT_ASSERT( (const char*)cs1250.cWC2MB(d.wc) == NULL );
98eae466 159 }
2cc07181
VZ
160 }
161}
726c8204
MW
162
163// ----------------------------------------------------------------------------
164// UTF-8 tests
165// ----------------------------------------------------------------------------
166
167#ifdef HAVE_WCHAR_H
168
169// Check that 'charSequence' translates to 'wideSequence' and back.
98cfeab3 170// Invalid sequences can be tested by giving NULL for 'wideSequence'. Even
726c8204
MW
171// invalid sequences should roundtrip when an option is given and this is
172// checked.
173//
174void MBConvTestCase::UTF8(const char *charSequence,
175 const wchar_t *wideSequence)
176{
177 UTF8(charSequence, wideSequence, wxMBConvUTF8::MAP_INVALID_UTF8_NOT);
178 UTF8(charSequence, wideSequence, wxMBConvUTF8::MAP_INVALID_UTF8_TO_PUA);
179 UTF8(charSequence, wideSequence, wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
180}
181
182// Use this alternative when 'charSequence' contains a PUA character. Such
183// sequences should still roundtrip ok, and this is checked.
184//
185void MBConvTestCase::UTF8PUA(const char *charSequence,
186 const wchar_t *wideSequence)
187{
188 UTF8(charSequence, wideSequence, wxMBConvUTF8::MAP_INVALID_UTF8_NOT);
189 UTF8(charSequence, NULL, wxMBConvUTF8::MAP_INVALID_UTF8_TO_PUA);
190 UTF8(charSequence, wideSequence, wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
191}
192
193// Use this alternative when 'charSequence' contains an octal escape sequence.
194// Such sequences should still roundtrip ok, and this is checked.
195//
196void MBConvTestCase::UTF8Octal(const char *charSequence,
197 const wchar_t *wideSequence)
198{
199 UTF8(charSequence, wideSequence, wxMBConvUTF8::MAP_INVALID_UTF8_NOT);
200 UTF8(charSequence, wideSequence, wxMBConvUTF8::MAP_INVALID_UTF8_TO_PUA);
201 UTF8(charSequence, NULL, wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
202}
203
204// include the option in the error messages so it's possible to see which
205// test failed
206#define UTF8ASSERT(expr) CPPUNIT_ASSERT_MESSAGE(#expr + errmsg, expr)
207
208// The test implementation
209//
210void MBConvTestCase::UTF8(const char *charSequence,
211 const wchar_t *wideSequence,
212 int option)
213{
214 const size_t BUFSIZE = 128;
215 wxASSERT(strlen(charSequence) * 3 + 10 < BUFSIZE);
216 char bytes[BUFSIZE];
217
218 // include the option in the error messages so it's possible to see
219 // which test failed
220 sprintf(bytes, " (with option == %d)", option);
221 std::string errmsg(bytes);
222
223 // put the charSequence at the start, middle and end of a string
224 strcpy(bytes, charSequence);
225 strcat(bytes, "ABC");
226 strcat(bytes, charSequence);
227 strcat(bytes, "XYZ");
228 strcat(bytes, charSequence);
229
230 // translate it into wide characters
231 wxMBConvUTF8 utf8(option);
232 wchar_t widechars[BUFSIZE];
98cfeab3 233 size_t lenResult = utf8.MB2WC(NULL, bytes, 0);
726c8204 234 size_t result = utf8.MB2WC(widechars, bytes, BUFSIZE);
98cfeab3 235 UTF8ASSERT(result == lenResult);
726c8204
MW
236
237 // check we got the expected result
238 if (wideSequence) {
239 UTF8ASSERT(result != (size_t)-1);
240 wxASSERT(result < BUFSIZE);
241
242 wchar_t expected[BUFSIZE];
243 wcscpy(expected, wideSequence);
244 wcscat(expected, L"ABC");
245 wcscat(expected, wideSequence);
246 wcscat(expected, L"XYZ");
247 wcscat(expected, wideSequence);
248
249 UTF8ASSERT(wcscmp(widechars, expected) == 0);
98cfeab3 250 UTF8ASSERT(wcslen(widechars) == result);
726c8204
MW
251 }
252 else {
253 // If 'wideSequence' is NULL, then the result is expected to be
254 // invalid. Normally that is as far as we can go, but if there is an
255 // option then the conversion should succeed anyway, and it should be
256 // possible to translate back to the original
257 if (!option) {
258 UTF8ASSERT(result == (size_t)-1);
259 return;
260 }
261 else {
262 UTF8ASSERT(result != (size_t)-1);
263 }
264 }
265
266 // translate it back and check we get the original
267 char bytesAgain[BUFSIZE];
98cfeab3 268 size_t lenResultAgain = utf8.WC2MB(NULL, widechars, 0);
726c8204 269 size_t resultAgain = utf8.WC2MB(bytesAgain, widechars, BUFSIZE);
98cfeab3 270 UTF8ASSERT(resultAgain == lenResultAgain);
726c8204
MW
271 UTF8ASSERT(resultAgain != (size_t)-1);
272 wxASSERT(resultAgain < BUFSIZE);
273
274 UTF8ASSERT(strcmp(bytes, bytesAgain) == 0);
98cfeab3 275 UTF8ASSERT(strlen(bytesAgain) == resultAgain);
726c8204
MW
276}
277
278#endif // HAVE_WCHAR_H