git.saurik.com Git - wxWidgets.git/blame_incremental

0 / 278 ( 0%)

Commit	Line	Data
	1	///////////////////////////////////////////////////////////////////////////////
	2	// Name: tests/mbconv/main.cpp
	3	// Purpose: wxMBConv unit test
	4	// Author: Vadim Zeitlin, Mike Wetherell
	5	// Created: 14.02.04
	6	// RCS-ID: $Id$
	7	// Copyright: (c) 2003 TT-Solutions, (c) 2005 Mike Wetherell
	8	///////////////////////////////////////////////////////////////////////////////
	9
	10	// ----------------------------------------------------------------------------
	11	// headers
	12	// ----------------------------------------------------------------------------
	13
	14	#include "testprec.h"
	15
	16	#ifdef __BORLANDC__
	17	#pragma hdrstop
	18	#endif
	19
	20	#ifndef WX_PRECOMP
	21	#include "wx/wx.h"
	22	#endif // WX_PRECOMP
	23
	24	#include "wx/strconv.h"
	25	#include "wx/string.h"
	26
	27	#if defined wxHAVE_TCHAR_SUPPORT && !defined HAVE_WCHAR_H
	28	#define HAVE_WCHAR_H
	29	#endif
	30
	31	// ----------------------------------------------------------------------------
	32	// Some wide character constants. "\uXXXX" escapes aren't supported by old
	33	// compilers such as VC++ 5 and g++ 2.95.
	34	// ----------------------------------------------------------------------------
	35
	36	wchar_t u41[] = { 0x41, 0 };
	37	wchar_t u7f[] = { 0x7f, 0 };
	38
	39	wchar_t u80[] = { 0x80, 0 };
	40	wchar_t u391[] = { 0x391, 0 };
	41	wchar_t u7ff[] = { 0x7ff, 0 };
	42
	43	wchar_t u800[] = { 0x800, 0 };
	44	wchar_t u2620[] = { 0x2620, 0 };
	45	wchar_t ufffd[] = { 0xfffd, 0 };
	46
	47	#if SIZEOF_WCHAR_T == 4
	48	wchar_t u10000[] = { 0x10000, 0 };
	49	wchar_t u1000a5[] = { 0x1000a5, 0 };
	50	wchar_t u10fffd[] = { 0x10fffd, 0 };
	51	#else
	52	wchar_t u10000[] = { 0xd800, 0xdc00, 0 };
	53	wchar_t u1000a5[] = { 0xdbc0, 0xdca5, 0 };
	54	wchar_t u10fffd[] = { 0xdbff, 0xdffd, 0 };
	55	#endif
	56
	57	// ----------------------------------------------------------------------------
	58	// test class
	59	// ----------------------------------------------------------------------------
	60
	61	class MBConvTestCase : public CppUnit::TestCase
	62	{
	63	public:
	64	MBConvTestCase() { }
	65
	66	private:
	67	CPPUNIT_TEST_SUITE( MBConvTestCase );
	68	CPPUNIT_TEST( WC2CP1250 );
	69	#ifdef HAVE_WCHAR_H
	70	CPPUNIT_TEST( UTF8_41 );
	71	CPPUNIT_TEST( UTF8_7f );
	72	CPPUNIT_TEST( UTF8_80 );
	73	CPPUNIT_TEST( UTF8_c2_7f );
	74	CPPUNIT_TEST( UTF8_c2_80 );
	75	CPPUNIT_TEST( UTF8_ce_91 );
	76	CPPUNIT_TEST( UTF8_df_bf );
	77	CPPUNIT_TEST( UTF8_df_c0 );
	78	CPPUNIT_TEST( UTF8_e0_a0_7f );
	79	CPPUNIT_TEST( UTF8_e0_a0_80 );
	80	CPPUNIT_TEST( UTF8_e2_98_a0 );
	81	CPPUNIT_TEST( UTF8_ef_bf_bd );
	82	CPPUNIT_TEST( UTF8_ef_bf_c0 );
	83	CPPUNIT_TEST( UTF8_f0_90_80_7f );
	84	CPPUNIT_TEST( UTF8_f0_90_80_80 );
	85	CPPUNIT_TEST( UTF8_f4_8f_bf_bd );
	86	CPPUNIT_TEST( UTF8PUA_f4_80_82_a5 );
	87	CPPUNIT_TEST( UTF8Octal_backslash245 );
	88	#endif // HAVE_WCHAR_H
	89	CPPUNIT_TEST_SUITE_END();
	90
	91	void WC2CP1250();
	92
	93	#ifdef HAVE_WCHAR_H
	94	// UTF-8 tests. Test the first, last and one in the middle for sequences
	95	// of each length
	96	void UTF8_41() { UTF8("\x41", u41); }
	97	void UTF8_7f() { UTF8("\x7f", u7f); }
	98	void UTF8_80() { UTF8("\x80", NULL); }
	99
	100	void UTF8_c2_7f() { UTF8("\xc2\x7f", NULL); }
	101	void UTF8_c2_80() { UTF8("\xc2\x80", u80); }
	102	void UTF8_ce_91() { UTF8("\xce\x91", u391); }
	103	void UTF8_df_bf() { UTF8("\xdf\xbf", u7ff); }
	104	void UTF8_df_c0() { UTF8("\xdf\xc0", NULL); }
	105
	106	void UTF8_e0_a0_7f() { UTF8("\xe0\xa0\x7f", NULL); }
	107	void UTF8_e0_a0_80() { UTF8("\xe0\xa0\x80", u800); }
	108	void UTF8_e2_98_a0() { UTF8("\xe2\x98\xa0", u2620); }
	109	void UTF8_ef_bf_bd() { UTF8("\xef\xbf\xbd", ufffd); }
	110	void UTF8_ef_bf_c0() { UTF8("\xef\xbf\xc0", NULL); }
	111
	112	void UTF8_f0_90_80_7f() { UTF8("\xf0\x90\x80\x7f", NULL); }
	113	void UTF8_f0_90_80_80() { UTF8("\xf0\x90\x80\x80", u10000); }
	114	void UTF8_f4_8f_bf_bd() { UTF8("\xf4\x8f\xbf\xbd", u10fffd); }
	115
	116	// test 'escaping the escape characters' for the two escaping schemes
	117	void UTF8PUA_f4_80_82_a5() { UTF8PUA("\xf4\x80\x82\xa5", u1000a5); }
	118	void UTF8Octal_backslash245() { UTF8Octal("\\245", L"\\245"); }
	119
	120	// implementation for the utf-8 tests (see comments below)
	121	void UTF8(const char charSequence, const wchar_t wideSequence);
	122	void UTF8PUA(const char charSequence, const wchar_t wideSequence);
	123	void UTF8Octal(const char charSequence, const wchar_t wideSequence);
	124	void UTF8(const char charSequence, const wchar_t wideSequence, int option);
	125	#endif // HAVE_WCHAR_H
	126
	127	DECLARE_NO_COPY_CLASS(MBConvTestCase)
	128	};
	129
	130	// register in the unnamed registry so that these tests are run by default
	131	CPPUNIT_TEST_SUITE_REGISTRATION( MBConvTestCase );
	132
	133	// also include in it's own registry so that these tests can be run alone
	134	CPPUNIT_TEST_SUITE_NAMED_REGISTRATION( MBConvTestCase, "MBConvTestCase" );
	135
	136	void MBConvTestCase::WC2CP1250()
	137	{
	138	static const struct Data
	139	{
	140	const wchar_t *wc;
	141	const char *cp1250;
	142	} data[] =
	143	{
	144	{ L"hello", "hello" }, // test that it works in simplest case
	145	{ L"\xBD of \xBD is \xBC", NULL }, // this should fail as cp1250 doesn't have 1/2
	146	};
	147
	148	wxCSConv cs1250(wxFONTENCODING_CP1250);
	149	for ( size_t n = 0; n < WXSIZEOF(data); n++ )
	150	{
	151	const Data& d = data[n];
	152	if (d.cp1250)
	153	{
	154	CPPUNIT_ASSERT( strcmp(cs1250.cWC2MB(d.wc), d.cp1250) == 0 );
	155	}
	156	else
	157	{
	158	CPPUNIT_ASSERT( (const char*)cs1250.cWC2MB(d.wc) == NULL );
	159	}
	160	}
	161	}
	162
	163	// ----------------------------------------------------------------------------
	164	// UTF-8 tests
	165	// ----------------------------------------------------------------------------
	166
	167	#ifdef HAVE_WCHAR_H
	168
	169	// Check that 'charSequence' translates to 'wideSequence' and back.
	170	// Invalid sequences can be tested by giving NULL for 'wideSequence'. Even
	171	// invalid sequences should roundtrip when an option is given and this is
	172	// checked.
	173	//
	174	void MBConvTestCase::UTF8(const char *charSequence,
	175	const wchar_t *wideSequence)
	176	{
	177	UTF8(charSequence, wideSequence, wxMBConvUTF8::MAP_INVALID_UTF8_NOT);
	178	UTF8(charSequence, wideSequence, wxMBConvUTF8::MAP_INVALID_UTF8_TO_PUA);
	179	UTF8(charSequence, wideSequence, wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
	180	}
	181
	182	// Use this alternative when 'charSequence' contains a PUA character. Such
	183	// sequences should still roundtrip ok, and this is checked.
	184	//
	185	void MBConvTestCase::UTF8PUA(const char *charSequence,
	186	const wchar_t *wideSequence)
	187	{
	188	UTF8(charSequence, wideSequence, wxMBConvUTF8::MAP_INVALID_UTF8_NOT);
	189	UTF8(charSequence, NULL, wxMBConvUTF8::MAP_INVALID_UTF8_TO_PUA);
	190	UTF8(charSequence, wideSequence, wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
	191	}
	192
	193	// Use this alternative when 'charSequence' contains an octal escape sequence.
	194	// Such sequences should still roundtrip ok, and this is checked.
	195	//
	196	void MBConvTestCase::UTF8Octal(const char *charSequence,
	197	const wchar_t *wideSequence)
	198	{
	199	UTF8(charSequence, wideSequence, wxMBConvUTF8::MAP_INVALID_UTF8_NOT);
	200	UTF8(charSequence, wideSequence, wxMBConvUTF8::MAP_INVALID_UTF8_TO_PUA);
	201	UTF8(charSequence, NULL, wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
	202	}
	203
	204	// include the option in the error messages so it's possible to see which
	205	// test failed
	206	#define UTF8ASSERT(expr) CPPUNIT_ASSERT_MESSAGE(#expr + errmsg, expr)
	207
	208	// The test implementation
	209	//
	210	void MBConvTestCase::UTF8(const char *charSequence,
	211	const wchar_t *wideSequence,
	212	int option)
	213	{
	214	const size_t BUFSIZE = 128;
	215	wxASSERT(strlen(charSequence) * 3 + 10 < BUFSIZE);
	216	char bytes[BUFSIZE];
	217
	218	// include the option in the error messages so it's possible to see
	219	// which test failed
	220	sprintf(bytes, " (with option == %d)", option);
	221	std::string errmsg(bytes);
	222
	223	// put the charSequence at the start, middle and end of a string
	224	strcpy(bytes, charSequence);
	225	strcat(bytes, "ABC");
	226	strcat(bytes, charSequence);
	227	strcat(bytes, "XYZ");
	228	strcat(bytes, charSequence);
	229
	230	// translate it into wide characters
	231	wxMBConvUTF8 utf8(option);
	232	wchar_t widechars[BUFSIZE];
	233	size_t lenResult = utf8.MB2WC(NULL, bytes, 0);
	234	size_t result = utf8.MB2WC(widechars, bytes, BUFSIZE);
	235	UTF8ASSERT(result == lenResult);
	236
	237	// check we got the expected result
	238	if (wideSequence) {
	239	UTF8ASSERT(result != (size_t)-1);
	240	wxASSERT(result < BUFSIZE);
	241
	242	wchar_t expected[BUFSIZE];
	243	wcscpy(expected, wideSequence);
	244	wcscat(expected, L"ABC");
	245	wcscat(expected, wideSequence);
	246	wcscat(expected, L"XYZ");
	247	wcscat(expected, wideSequence);
	248
	249	UTF8ASSERT(wcscmp(widechars, expected) == 0);
	250	UTF8ASSERT(wcslen(widechars) == result);
	251	}
	252	else {
	253	// If 'wideSequence' is NULL, then the result is expected to be
	254	// invalid. Normally that is as far as we can go, but if there is an
	255	// option then the conversion should succeed anyway, and it should be
	256	// possible to translate back to the original
	257	if (!option) {
	258	UTF8ASSERT(result == (size_t)-1);
	259	return;
	260	}
	261	else {
	262	UTF8ASSERT(result != (size_t)-1);
	263	}
	264	}
	265
	266	// translate it back and check we get the original
	267	char bytesAgain[BUFSIZE];
	268	size_t lenResultAgain = utf8.WC2MB(NULL, widechars, 0);
	269	size_t resultAgain = utf8.WC2MB(bytesAgain, widechars, BUFSIZE);
	270	UTF8ASSERT(resultAgain == lenResultAgain);
	271	UTF8ASSERT(resultAgain != (size_t)-1);
	272	wxASSERT(resultAgain < BUFSIZE);
	273
	274	UTF8ASSERT(strcmp(bytes, bytesAgain) == 0);
	275	UTF8ASSERT(strlen(bytesAgain) == resultAgain);
	276	}
	277
	278	#endif // HAVE_WCHAR_H