]>
Commit | Line | Data |
---|---|---|
1 | /////////////////////////////////////////////////////////////////////////////// | |
2 | // Name: tests/mbconv/main.cpp | |
3 | // Purpose: wxMBConv unit test | |
4 | // Author: Vadim Zeitlin, Mike Wetherell | |
5 | // Created: 14.02.04 | |
6 | // RCS-ID: $Id$ | |
7 | // Copyright: (c) 2003 TT-Solutions, (c) 2005 Mike Wetherell | |
8 | /////////////////////////////////////////////////////////////////////////////// | |
9 | ||
10 | // ---------------------------------------------------------------------------- | |
11 | // headers | |
12 | // ---------------------------------------------------------------------------- | |
13 | ||
14 | #include "testprec.h" | |
15 | ||
16 | #ifdef __BORLANDC__ | |
17 | #pragma hdrstop | |
18 | #endif | |
19 | ||
20 | #ifndef WX_PRECOMP | |
21 | #include "wx/wx.h" | |
22 | #endif // WX_PRECOMP | |
23 | ||
24 | #include "wx/strconv.h" | |
25 | #include "wx/string.h" | |
26 | ||
27 | #if defined wxHAVE_TCHAR_SUPPORT && !defined HAVE_WCHAR_H | |
28 | #define HAVE_WCHAR_H | |
29 | #endif | |
30 | ||
31 | // ---------------------------------------------------------------------------- | |
32 | // Some wide character constants. "\uXXXX" escapes aren't supported by old | |
33 | // compilers such as VC++ 5 and g++ 2.95. | |
34 | // ---------------------------------------------------------------------------- | |
35 | ||
36 | wchar_t u41[] = { 0x41, 0 }; | |
37 | wchar_t u7f[] = { 0x7f, 0 }; | |
38 | ||
39 | wchar_t u80[] = { 0x80, 0 }; | |
40 | wchar_t u391[] = { 0x391, 0 }; | |
41 | wchar_t u7ff[] = { 0x7ff, 0 }; | |
42 | ||
43 | wchar_t u800[] = { 0x800, 0 }; | |
44 | wchar_t u2620[] = { 0x2620, 0 }; | |
45 | wchar_t ufffd[] = { 0xfffd, 0 }; | |
46 | ||
47 | #if SIZEOF_WCHAR_T == 4 | |
48 | wchar_t u10000[] = { 0x10000, 0 }; | |
49 | wchar_t u1000a5[] = { 0x1000a5, 0 }; | |
50 | wchar_t u10fffd[] = { 0x10fffd, 0 }; | |
51 | #else | |
52 | wchar_t u10000[] = { 0xd800, 0xdc00, 0 }; | |
53 | wchar_t u1000a5[] = { 0xdbc0, 0xdca5, 0 }; | |
54 | wchar_t u10fffd[] = { 0xdbff, 0xdffd, 0 }; | |
55 | #endif | |
56 | ||
57 | // ---------------------------------------------------------------------------- | |
58 | // test class | |
59 | // ---------------------------------------------------------------------------- | |
60 | ||
61 | class MBConvTestCase : public CppUnit::TestCase | |
62 | { | |
63 | public: | |
64 | MBConvTestCase() { } | |
65 | ||
66 | private: | |
67 | CPPUNIT_TEST_SUITE( MBConvTestCase ); | |
68 | CPPUNIT_TEST( WC2CP1250 ); | |
69 | #ifdef HAVE_WCHAR_H | |
70 | CPPUNIT_TEST( UTF8_41 ); | |
71 | CPPUNIT_TEST( UTF8_7f ); | |
72 | CPPUNIT_TEST( UTF8_80 ); | |
73 | CPPUNIT_TEST( UTF8_c2_7f ); | |
74 | CPPUNIT_TEST( UTF8_c2_80 ); | |
75 | CPPUNIT_TEST( UTF8_ce_91 ); | |
76 | CPPUNIT_TEST( UTF8_df_bf ); | |
77 | CPPUNIT_TEST( UTF8_df_c0 ); | |
78 | CPPUNIT_TEST( UTF8_e0_a0_7f ); | |
79 | CPPUNIT_TEST( UTF8_e0_a0_80 ); | |
80 | CPPUNIT_TEST( UTF8_e2_98_a0 ); | |
81 | CPPUNIT_TEST( UTF8_ef_bf_bd ); | |
82 | CPPUNIT_TEST( UTF8_ef_bf_c0 ); | |
83 | CPPUNIT_TEST( UTF8_f0_90_80_7f ); | |
84 | CPPUNIT_TEST( UTF8_f0_90_80_80 ); | |
85 | CPPUNIT_TEST( UTF8_f4_8f_bf_bd ); | |
86 | CPPUNIT_TEST( UTF8PUA_f4_80_82_a5 ); | |
87 | CPPUNIT_TEST( UTF8Octal_backslash245 ); | |
88 | #endif // HAVE_WCHAR_H | |
89 | CPPUNIT_TEST_SUITE_END(); | |
90 | ||
91 | void WC2CP1250(); | |
92 | ||
93 | #ifdef HAVE_WCHAR_H | |
94 | // UTF-8 tests. Test the first, last and one in the middle for sequences | |
95 | // of each length | |
96 | void UTF8_41() { UTF8("\x41", u41); } | |
97 | void UTF8_7f() { UTF8("\x7f", u7f); } | |
98 | void UTF8_80() { UTF8("\x80", NULL); } | |
99 | ||
100 | void UTF8_c2_7f() { UTF8("\xc2\x7f", NULL); } | |
101 | void UTF8_c2_80() { UTF8("\xc2\x80", u80); } | |
102 | void UTF8_ce_91() { UTF8("\xce\x91", u391); } | |
103 | void UTF8_df_bf() { UTF8("\xdf\xbf", u7ff); } | |
104 | void UTF8_df_c0() { UTF8("\xdf\xc0", NULL); } | |
105 | ||
106 | void UTF8_e0_a0_7f() { UTF8("\xe0\xa0\x7f", NULL); } | |
107 | void UTF8_e0_a0_80() { UTF8("\xe0\xa0\x80", u800); } | |
108 | void UTF8_e2_98_a0() { UTF8("\xe2\x98\xa0", u2620); } | |
109 | void UTF8_ef_bf_bd() { UTF8("\xef\xbf\xbd", ufffd); } | |
110 | void UTF8_ef_bf_c0() { UTF8("\xef\xbf\xc0", NULL); } | |
111 | ||
112 | void UTF8_f0_90_80_7f() { UTF8("\xf0\x90\x80\x7f", NULL); } | |
113 | void UTF8_f0_90_80_80() { UTF8("\xf0\x90\x80\x80", u10000); } | |
114 | void UTF8_f4_8f_bf_bd() { UTF8("\xf4\x8f\xbf\xbd", u10fffd); } | |
115 | ||
116 | // test 'escaping the escape characters' for the two escaping schemes | |
117 | void UTF8PUA_f4_80_82_a5() { UTF8PUA("\xf4\x80\x82\xa5", u1000a5); } | |
118 | void UTF8Octal_backslash245() { UTF8Octal("\\245", L"\\245"); } | |
119 | ||
120 | // implementation for the utf-8 tests (see comments below) | |
121 | void UTF8(const char *charSequence, const wchar_t *wideSequence); | |
122 | void UTF8PUA(const char *charSequence, const wchar_t *wideSequence); | |
123 | void UTF8Octal(const char *charSequence, const wchar_t *wideSequence); | |
124 | void UTF8(const char *charSequence, const wchar_t *wideSequence, int option); | |
125 | #endif // HAVE_WCHAR_H | |
126 | ||
127 | DECLARE_NO_COPY_CLASS(MBConvTestCase) | |
128 | }; | |
129 | ||
130 | // register in the unnamed registry so that these tests are run by default | |
131 | CPPUNIT_TEST_SUITE_REGISTRATION( MBConvTestCase ); | |
132 | ||
133 | // also include in it's own registry so that these tests can be run alone | |
134 | CPPUNIT_TEST_SUITE_NAMED_REGISTRATION( MBConvTestCase, "MBConvTestCase" ); | |
135 | ||
136 | void MBConvTestCase::WC2CP1250() | |
137 | { | |
138 | static const struct Data | |
139 | { | |
140 | const wchar_t *wc; | |
141 | const char *cp1250; | |
142 | } data[] = | |
143 | { | |
144 | { L"hello", "hello" }, // test that it works in simplest case | |
145 | { L"\xBD of \xBD is \xBC", NULL }, // this should fail as cp1250 doesn't have 1/2 | |
146 | }; | |
147 | ||
148 | wxCSConv cs1250(wxFONTENCODING_CP1250); | |
149 | for ( size_t n = 0; n < WXSIZEOF(data); n++ ) | |
150 | { | |
151 | const Data& d = data[n]; | |
152 | if (d.cp1250) | |
153 | { | |
154 | CPPUNIT_ASSERT( strcmp(cs1250.cWC2MB(d.wc), d.cp1250) == 0 ); | |
155 | } | |
156 | else | |
157 | { | |
158 | CPPUNIT_ASSERT( (const char*)cs1250.cWC2MB(d.wc) == NULL ); | |
159 | } | |
160 | } | |
161 | } | |
162 | ||
163 | // ---------------------------------------------------------------------------- | |
164 | // UTF-8 tests | |
165 | // ---------------------------------------------------------------------------- | |
166 | ||
167 | #ifdef HAVE_WCHAR_H | |
168 | ||
169 | // Check that 'charSequence' translates to 'wideSequence' and back. | |
170 | // Invalid sequences can be tested by giving NULL for 'wideSequence'. Even | |
171 | // invalid sequences should roundtrip when an option is given and this is | |
172 | // checked. | |
173 | // | |
174 | void MBConvTestCase::UTF8(const char *charSequence, | |
175 | const wchar_t *wideSequence) | |
176 | { | |
177 | UTF8(charSequence, wideSequence, wxMBConvUTF8::MAP_INVALID_UTF8_NOT); | |
178 | UTF8(charSequence, wideSequence, wxMBConvUTF8::MAP_INVALID_UTF8_TO_PUA); | |
179 | UTF8(charSequence, wideSequence, wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL); | |
180 | } | |
181 | ||
182 | // Use this alternative when 'charSequence' contains a PUA character. Such | |
183 | // sequences should still roundtrip ok, and this is checked. | |
184 | // | |
185 | void MBConvTestCase::UTF8PUA(const char *charSequence, | |
186 | const wchar_t *wideSequence) | |
187 | { | |
188 | UTF8(charSequence, wideSequence, wxMBConvUTF8::MAP_INVALID_UTF8_NOT); | |
189 | UTF8(charSequence, NULL, wxMBConvUTF8::MAP_INVALID_UTF8_TO_PUA); | |
190 | UTF8(charSequence, wideSequence, wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL); | |
191 | } | |
192 | ||
193 | // Use this alternative when 'charSequence' contains an octal escape sequence. | |
194 | // Such sequences should still roundtrip ok, and this is checked. | |
195 | // | |
196 | void MBConvTestCase::UTF8Octal(const char *charSequence, | |
197 | const wchar_t *wideSequence) | |
198 | { | |
199 | UTF8(charSequence, wideSequence, wxMBConvUTF8::MAP_INVALID_UTF8_NOT); | |
200 | UTF8(charSequence, wideSequence, wxMBConvUTF8::MAP_INVALID_UTF8_TO_PUA); | |
201 | UTF8(charSequence, NULL, wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL); | |
202 | } | |
203 | ||
204 | // include the option in the error messages so it's possible to see which | |
205 | // test failed | |
206 | #define UTF8ASSERT(expr) CPPUNIT_ASSERT_MESSAGE(#expr + errmsg, expr) | |
207 | ||
208 | // The test implementation | |
209 | // | |
210 | void MBConvTestCase::UTF8(const char *charSequence, | |
211 | const wchar_t *wideSequence, | |
212 | int option) | |
213 | { | |
214 | const size_t BUFSIZE = 128; | |
215 | wxASSERT(strlen(charSequence) * 3 + 10 < BUFSIZE); | |
216 | char bytes[BUFSIZE]; | |
217 | ||
218 | // include the option in the error messages so it's possible to see | |
219 | // which test failed | |
220 | sprintf(bytes, " (with option == %d)", option); | |
221 | std::string errmsg(bytes); | |
222 | ||
223 | // put the charSequence at the start, middle and end of a string | |
224 | strcpy(bytes, charSequence); | |
225 | strcat(bytes, "ABC"); | |
226 | strcat(bytes, charSequence); | |
227 | strcat(bytes, "XYZ"); | |
228 | strcat(bytes, charSequence); | |
229 | ||
230 | // translate it into wide characters | |
231 | wxMBConvUTF8 utf8(option); | |
232 | wchar_t widechars[BUFSIZE]; | |
233 | size_t lenResult = utf8.MB2WC(NULL, bytes, 0); | |
234 | size_t result = utf8.MB2WC(widechars, bytes, BUFSIZE); | |
235 | UTF8ASSERT(result == lenResult); | |
236 | ||
237 | // check we got the expected result | |
238 | if (wideSequence) { | |
239 | UTF8ASSERT(result != (size_t)-1); | |
240 | wxASSERT(result < BUFSIZE); | |
241 | ||
242 | wchar_t expected[BUFSIZE]; | |
243 | wcscpy(expected, wideSequence); | |
244 | wcscat(expected, L"ABC"); | |
245 | wcscat(expected, wideSequence); | |
246 | wcscat(expected, L"XYZ"); | |
247 | wcscat(expected, wideSequence); | |
248 | ||
249 | UTF8ASSERT(wcscmp(widechars, expected) == 0); | |
250 | UTF8ASSERT(wcslen(widechars) == result); | |
251 | } | |
252 | else { | |
253 | // If 'wideSequence' is NULL, then the result is expected to be | |
254 | // invalid. Normally that is as far as we can go, but if there is an | |
255 | // option then the conversion should succeed anyway, and it should be | |
256 | // possible to translate back to the original | |
257 | if (!option) { | |
258 | UTF8ASSERT(result == (size_t)-1); | |
259 | return; | |
260 | } | |
261 | else { | |
262 | UTF8ASSERT(result != (size_t)-1); | |
263 | } | |
264 | } | |
265 | ||
266 | // translate it back and check we get the original | |
267 | char bytesAgain[BUFSIZE]; | |
268 | size_t lenResultAgain = utf8.WC2MB(NULL, widechars, 0); | |
269 | size_t resultAgain = utf8.WC2MB(bytesAgain, widechars, BUFSIZE); | |
270 | UTF8ASSERT(resultAgain == lenResultAgain); | |
271 | UTF8ASSERT(resultAgain != (size_t)-1); | |
272 | wxASSERT(resultAgain < BUFSIZE); | |
273 | ||
274 | UTF8ASSERT(strcmp(bytes, bytesAgain) == 0); | |
275 | UTF8ASSERT(strlen(bytesAgain) == resultAgain); | |
276 | } | |
277 | ||
278 | #endif // HAVE_WCHAR_H |