Replacements for some wcsxxx funcs for systems without them like osx 10.2.
[wxWidgets.git] / tests / mbconv / mbconvtest.cpp
1 ///////////////////////////////////////////////////////////////////////////////
2 // Name: tests/mbconv/main.cpp
3 // Purpose: wxMBConv unit test
4 // Author: Vadim Zeitlin, Mike Wetherell
5 // Created: 14.02.04
6 // RCS-ID: $Id$
7 // Copyright: (c) 2003 TT-Solutions, (c) 2005 Mike Wetherell
8 ///////////////////////////////////////////////////////////////////////////////
9
10 // ----------------------------------------------------------------------------
11 // headers
12 // ----------------------------------------------------------------------------
13
14 #include "testprec.h"
15
16 #ifdef __BORLANDC__
17 #pragma hdrstop
18 #endif
19
20 #ifndef WX_PRECOMP
21 #include "wx/wx.h"
22 #endif // WX_PRECOMP
23
24 #include "wx/strconv.h"
25 #include "wx/string.h"
26
27 #if defined wxHAVE_TCHAR_SUPPORT && !defined HAVE_WCHAR_H
28 #define HAVE_WCHAR_H
29 #endif
30
31 // ----------------------------------------------------------------------------
32 // Some wide character constants. "\uXXXX" escapes aren't supported by old
33 // compilers such as VC++ 5 and g++ 2.95.
34 // ----------------------------------------------------------------------------
35
36 wchar_t u41[] = { 0x41, 0 };
37 wchar_t u7f[] = { 0x7f, 0 };
38
39 wchar_t u80[] = { 0x80, 0 };
40 wchar_t u391[] = { 0x391, 0 };
41 wchar_t u7ff[] = { 0x7ff, 0 };
42
43 wchar_t u800[] = { 0x800, 0 };
44 wchar_t u2620[] = { 0x2620, 0 };
45 wchar_t ufffd[] = { 0xfffd, 0 };
46
47 #if SIZEOF_WCHAR_T == 4
48 wchar_t u10000[] = { 0x10000, 0 };
49 wchar_t u1000a5[] = { 0x1000a5, 0 };
50 wchar_t u10fffd[] = { 0x10fffd, 0 };
51 #else
52 wchar_t u10000[] = { 0xd800, 0xdc00, 0 };
53 wchar_t u1000a5[] = { 0xdbc0, 0xdca5, 0 };
54 wchar_t u10fffd[] = { 0xdbff, 0xdffd, 0 };
55 #endif
56
57 // ----------------------------------------------------------------------------
58 // test class
59 // ----------------------------------------------------------------------------
60
61 class MBConvTestCase : public CppUnit::TestCase
62 {
63 public:
64 MBConvTestCase() { }
65
66 private:
67 CPPUNIT_TEST_SUITE( MBConvTestCase );
68 CPPUNIT_TEST( WC2CP1250 );
69 #ifdef HAVE_WCHAR_H
70 CPPUNIT_TEST( UTF8_41 );
71 CPPUNIT_TEST( UTF8_7f );
72 CPPUNIT_TEST( UTF8_80 );
73 CPPUNIT_TEST( UTF8_c2_7f );
74 CPPUNIT_TEST( UTF8_c2_80 );
75 CPPUNIT_TEST( UTF8_ce_91 );
76 CPPUNIT_TEST( UTF8_df_bf );
77 CPPUNIT_TEST( UTF8_df_c0 );
78 CPPUNIT_TEST( UTF8_e0_a0_7f );
79 CPPUNIT_TEST( UTF8_e0_a0_80 );
80 CPPUNIT_TEST( UTF8_e2_98_a0 );
81 CPPUNIT_TEST( UTF8_ef_bf_bd );
82 CPPUNIT_TEST( UTF8_ef_bf_c0 );
83 CPPUNIT_TEST( UTF8_f0_90_80_7f );
84 CPPUNIT_TEST( UTF8_f0_90_80_80 );
85 CPPUNIT_TEST( UTF8_f4_8f_bf_bd );
86 CPPUNIT_TEST( UTF8PUA_f4_80_82_a5 );
87 CPPUNIT_TEST( UTF8Octal_backslash245 );
88 #endif // HAVE_WCHAR_H
89 CPPUNIT_TEST_SUITE_END();
90
91 void WC2CP1250();
92
93 #ifdef HAVE_WCHAR_H
94 // UTF-8 tests. Test the first, last and one in the middle for sequences
95 // of each length
96 void UTF8_41() { UTF8("\x41", u41); }
97 void UTF8_7f() { UTF8("\x7f", u7f); }
98 void UTF8_80() { UTF8("\x80", NULL); }
99
100 void UTF8_c2_7f() { UTF8("\xc2\x7f", NULL); }
101 void UTF8_c2_80() { UTF8("\xc2\x80", u80); }
102 void UTF8_ce_91() { UTF8("\xce\x91", u391); }
103 void UTF8_df_bf() { UTF8("\xdf\xbf", u7ff); }
104 void UTF8_df_c0() { UTF8("\xdf\xc0", NULL); }
105
106 void UTF8_e0_a0_7f() { UTF8("\xe0\xa0\x7f", NULL); }
107 void UTF8_e0_a0_80() { UTF8("\xe0\xa0\x80", u800); }
108 void UTF8_e2_98_a0() { UTF8("\xe2\x98\xa0", u2620); }
109 void UTF8_ef_bf_bd() { UTF8("\xef\xbf\xbd", ufffd); }
110 void UTF8_ef_bf_c0() { UTF8("\xef\xbf\xc0", NULL); }
111
112 void UTF8_f0_90_80_7f() { UTF8("\xf0\x90\x80\x7f", NULL); }
113 void UTF8_f0_90_80_80() { UTF8("\xf0\x90\x80\x80", u10000); }
114 void UTF8_f4_8f_bf_bd() { UTF8("\xf4\x8f\xbf\xbd", u10fffd); }
115
116 // test 'escaping the escape characters' for the two escaping schemes
117 void UTF8PUA_f4_80_82_a5() { UTF8PUA("\xf4\x80\x82\xa5", u1000a5); }
118 void UTF8Octal_backslash245() { UTF8Octal("\\245", L"\\245"); }
119
120 // implementation for the utf-8 tests (see comments below)
121 void UTF8(const char *charSequence, const wchar_t *wideSequence);
122 void UTF8PUA(const char *charSequence, const wchar_t *wideSequence);
123 void UTF8Octal(const char *charSequence, const wchar_t *wideSequence);
124 void UTF8(const char *charSequence, const wchar_t *wideSequence, int option);
125 #endif // HAVE_WCHAR_H
126
127 DECLARE_NO_COPY_CLASS(MBConvTestCase)
128 };
129
130 // register in the unnamed registry so that these tests are run by default
131 CPPUNIT_TEST_SUITE_REGISTRATION( MBConvTestCase );
132
133 // also include in it's own registry so that these tests can be run alone
134 CPPUNIT_TEST_SUITE_NAMED_REGISTRATION( MBConvTestCase, "MBConvTestCase" );
135
136 void MBConvTestCase::WC2CP1250()
137 {
138 static const struct Data
139 {
140 const wchar_t *wc;
141 const char *cp1250;
142 } data[] =
143 {
144 { L"hello", "hello" }, // test that it works in simplest case
145 { L"\xBD of \xBD is \xBC", NULL }, // this should fail as cp1250 doesn't have 1/2
146 };
147
148 wxCSConv cs1250(wxFONTENCODING_CP1250);
149 for ( size_t n = 0; n < WXSIZEOF(data); n++ )
150 {
151 const Data& d = data[n];
152 if (d.cp1250)
153 {
154 CPPUNIT_ASSERT( strcmp(cs1250.cWC2MB(d.wc), d.cp1250) == 0 );
155 }
156 else
157 {
158 CPPUNIT_ASSERT( (const char*)cs1250.cWC2MB(d.wc) == NULL );
159 }
160 }
161 }
162
163 // ----------------------------------------------------------------------------
164 // UTF-8 tests
165 // ----------------------------------------------------------------------------
166
167 #ifdef HAVE_WCHAR_H
168
169 // Check that 'charSequence' translates to 'wideSequence' and back.
170 // Invalid sequences can be tested by giving NULL for 'wideSequence'. Even
171 // invalid sequences should roundtrip when an option is given and this is
172 // checked.
173 //
174 void MBConvTestCase::UTF8(const char *charSequence,
175 const wchar_t *wideSequence)
176 {
177 UTF8(charSequence, wideSequence, wxMBConvUTF8::MAP_INVALID_UTF8_NOT);
178 UTF8(charSequence, wideSequence, wxMBConvUTF8::MAP_INVALID_UTF8_TO_PUA);
179 UTF8(charSequence, wideSequence, wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
180 }
181
182 // Use this alternative when 'charSequence' contains a PUA character. Such
183 // sequences should still roundtrip ok, and this is checked.
184 //
185 void MBConvTestCase::UTF8PUA(const char *charSequence,
186 const wchar_t *wideSequence)
187 {
188 UTF8(charSequence, wideSequence, wxMBConvUTF8::MAP_INVALID_UTF8_NOT);
189 UTF8(charSequence, NULL, wxMBConvUTF8::MAP_INVALID_UTF8_TO_PUA);
190 UTF8(charSequence, wideSequence, wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
191 }
192
193 // Use this alternative when 'charSequence' contains an octal escape sequence.
194 // Such sequences should still roundtrip ok, and this is checked.
195 //
196 void MBConvTestCase::UTF8Octal(const char *charSequence,
197 const wchar_t *wideSequence)
198 {
199 UTF8(charSequence, wideSequence, wxMBConvUTF8::MAP_INVALID_UTF8_NOT);
200 UTF8(charSequence, wideSequence, wxMBConvUTF8::MAP_INVALID_UTF8_TO_PUA);
201 UTF8(charSequence, NULL, wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
202 }
203
204 // in case wcscpy is missing
205 //
206 static wchar_t *wx_wcscpy(wchar_t *dest, const wchar_t *src)
207 {
208 wchar_t *d = dest;
209 while ((*d++ = *src++) != 0)
210 ;
211 return dest;
212 }
213
214 // in case wcscat is missing
215 //
216 static wchar_t *wx_wcscat(wchar_t *dest, const wchar_t *src)
217 {
218 wchar_t *d = dest;
219 while (*d)
220 d++;
221 while ((*d++ = *src++) != 0)
222 ;
223 return dest;
224 }
225
226 // include the option in the error messages so it's possible to see which
227 // test failed
228 #define UTF8ASSERT(expr) CPPUNIT_ASSERT_MESSAGE(#expr + errmsg, expr)
229
230 // The test implementation
231 //
232 void MBConvTestCase::UTF8(const char *charSequence,
233 const wchar_t *wideSequence,
234 int option)
235 {
236 const size_t BUFSIZE = 128;
237 wxASSERT(strlen(charSequence) * 3 + 10 < BUFSIZE);
238 char bytes[BUFSIZE];
239
240 // include the option in the error messages so it's possible to see
241 // which test failed
242 sprintf(bytes, " (with option == %d)", option);
243 std::string errmsg(bytes);
244
245 // put the charSequence at the start, middle and end of a string
246 strcpy(bytes, charSequence);
247 strcat(bytes, "ABC");
248 strcat(bytes, charSequence);
249 strcat(bytes, "XYZ");
250 strcat(bytes, charSequence);
251
252 // translate it into wide characters
253 wxMBConvUTF8 utf8(option);
254 wchar_t widechars[BUFSIZE];
255 size_t lenResult = utf8.MB2WC(NULL, bytes, 0);
256 size_t result = utf8.MB2WC(widechars, bytes, BUFSIZE);
257 UTF8ASSERT(result == lenResult);
258
259 // check we got the expected result
260 if (wideSequence) {
261 UTF8ASSERT(result != (size_t)-1);
262 wxASSERT(result < BUFSIZE);
263
264 wchar_t expected[BUFSIZE];
265 wx_wcscpy(expected, wideSequence);
266 wx_wcscat(expected, L"ABC");
267 wx_wcscat(expected, wideSequence);
268 wx_wcscat(expected, L"XYZ");
269 wx_wcscat(expected, wideSequence);
270
271 UTF8ASSERT(wcscmp(widechars, expected) == 0);
272 UTF8ASSERT(wcslen(widechars) == result);
273 }
274 else {
275 // If 'wideSequence' is NULL, then the result is expected to be
276 // invalid. Normally that is as far as we can go, but if there is an
277 // option then the conversion should succeed anyway, and it should be
278 // possible to translate back to the original
279 if (!option) {
280 UTF8ASSERT(result == (size_t)-1);
281 return;
282 }
283 else {
284 UTF8ASSERT(result != (size_t)-1);
285 }
286 }
287
288 // translate it back and check we get the original
289 char bytesAgain[BUFSIZE];
290 size_t lenResultAgain = utf8.WC2MB(NULL, widechars, 0);
291 size_t resultAgain = utf8.WC2MB(bytesAgain, widechars, BUFSIZE);
292 UTF8ASSERT(resultAgain == lenResultAgain);
293 UTF8ASSERT(resultAgain != (size_t)-1);
294 wxASSERT(resultAgain < BUFSIZE);
295
296 UTF8ASSERT(strcmp(bytes, bytesAgain) == 0);
297 UTF8ASSERT(strlen(bytesAgain) == resultAgain);
298 }
299
300 #endif // HAVE_WCHAR_H