git.saurik.com Git - wxWidgets.git/blame_incremental

... / ...

Commit	Line	Data
	1	///////////////////////////////////////////////////////////////////////////////
	2	// Name: tests/strings/unicode.cpp
	3	// Purpose: Unicode unit test
	4	// Author: Vadim Zeitlin, Wlodzimierz ABX Skiba
	5	// Created: 2004-04-28
	6	// Copyright: (c) 2004 Vadim Zeitlin, Wlodzimierz Skiba
	7	///////////////////////////////////////////////////////////////////////////////
	8
	9	// ----------------------------------------------------------------------------
	10	// headers
	11	// ----------------------------------------------------------------------------
	12
	13	#include "testprec.h"
	14
	15	#ifdef __BORLANDC__
	16	#pragma hdrstop
	17	#endif
	18
	19	#ifndef WX_PRECOMP
	20	#include "wx/wx.h"
	21	#endif // WX_PRECOMP
	22
	23	#include "wx/encconv.h"
	24
	25	// ----------------------------------------------------------------------------
	26	// helper class holding the matching MB and WC strings
	27	// ----------------------------------------------------------------------------
	28
	29	struct StringConversionData
	30	{
	31	// either str or wcs (but not both) may be NULL, this means that the conversion
	32	// to it should fail
	33	StringConversionData(const char str_, const wchar_t wcs_, int flags_ = 0)
	34	: str(str_), wcs(wcs_), flags(flags_)
	35	{
	36	}
	37
	38	const char * const str;
	39	const wchar_t * const wcs;
	40
	41	enum
	42	{
	43	TEST_BOTH = 0, // test both str -> wcs and wcs -> str
	44	ONLY_MB2WC = 1 // only test str -> wcs conversion
	45	};
	46
	47	const int flags;
	48
	49	// test that the conversion between str and wcs (subject to flags) succeeds
	50	//
	51	// the first argument is the index in the test array and is used solely for
	52	// diagnostics
	53	void Test(size_t n, wxMBConv& conv) const
	54	{
	55	if ( str )
	56	{
	57	wxWCharBuffer wbuf = conv.cMB2WC(str);
	58
	59	if ( wcs )
	60	{
	61	CPPUNIT_ASSERT_MESSAGE
	62	(
	63	Message(n, "MB2WC failed"),
	64	wbuf.data()
	65	);
	66
	67	CPPUNIT_ASSERT_MESSAGE
	68	(
	69	Message(n, "MB2WC", wbuf, wcs),
	70	wxStrcmp(wbuf, wcs) == 0
	71	);
	72	}
	73	else // conversion is supposed to fail
	74	{
	75	CPPUNIT_ASSERT_MESSAGE
	76	(
	77	Message(n, "MB2WC succeeded"),
	78	!wbuf.data()
	79	);
	80	}
	81	}
	82
	83	if ( wcs && !(flags & ONLY_MB2WC) )
	84	{
	85	wxCharBuffer buf = conv.cWC2MB(wcs);
	86
	87	if ( str )
	88	{
	89	CPPUNIT_ASSERT_MESSAGE
	90	(
	91	Message(n, "WC2MB failed"),
	92	buf.data()
	93	);
	94
	95	CPPUNIT_ASSERT_MESSAGE
	96	(
	97	Message(n, "WC2MB", buf, str),
	98	strcmp(buf, str) == 0
	99	);
	100	}
	101	else
	102	{
	103	CPPUNIT_ASSERT_MESSAGE
	104	(
	105	Message(n, "WC2MB succeeded"),
	106	!buf.data()
	107	);
	108	}
	109	}
	110	}
	111
	112	private:
	113	static std::string
	114	Message(size_t n, const wxString& msg)
	115	{
	116	return std::string(wxString::Format("#%lu: %s", (unsigned long)n, msg));
	117	}
	118
	119	template <typename T>
	120	static std::string
	121	Message(size_t n,
	122	const char *func,
	123	const wxCharTypeBuffer<T>& actual,
	124	const T *expected)
	125	{
	126	return Message(n,
	127	wxString::Format("%s returned \"%s\", expected \"%s\"",
	128	func, actual.data(), expected));
	129	}
	130	};
	131
	132	// ----------------------------------------------------------------------------
	133	// test class
	134	// ----------------------------------------------------------------------------
	135
	136	class UnicodeTestCase : public CppUnit::TestCase
	137	{
	138	public:
	139	UnicodeTestCase();
	140
	141	private:
	142	CPPUNIT_TEST_SUITE( UnicodeTestCase );
	143	CPPUNIT_TEST( ToFromAscii );
	144	CPPUNIT_TEST( ConstructorsWithConversion );
	145	CPPUNIT_TEST( ConversionFixed );
	146	CPPUNIT_TEST( ConversionWithNULs );
	147	CPPUNIT_TEST( ConversionUTF7 );
	148	CPPUNIT_TEST( ConversionUTF8 );
	149	CPPUNIT_TEST( ConversionUTF16 );
	150	CPPUNIT_TEST( ConversionUTF32 );
	151	CPPUNIT_TEST( IsConvOk );
	152	#if wxUSE_UNICODE
	153	CPPUNIT_TEST( Iteration );
	154	#endif
	155	CPPUNIT_TEST_SUITE_END();
	156
	157	void ToFromAscii();
	158	void ConstructorsWithConversion();
	159	void ConversionFixed();
	160	void ConversionWithNULs();
	161	void ConversionUTF7();
	162	void ConversionUTF8();
	163	void ConversionUTF16();
	164	void ConversionUTF32();
	165	void IsConvOk();
	166	#if wxUSE_UNICODE
	167	void Iteration();
	168	#endif
	169
	170	DECLARE_NO_COPY_CLASS(UnicodeTestCase)
	171	};
	172
	173	// register in the unnamed registry so that these tests are run by default
	174	CPPUNIT_TEST_SUITE_REGISTRATION( UnicodeTestCase );
	175
	176	// also include in its own registry so that these tests can be run alone
	177	CPPUNIT_TEST_SUITE_NAMED_REGISTRATION( UnicodeTestCase, "UnicodeTestCase" );
	178
	179	UnicodeTestCase::UnicodeTestCase()
	180	{
	181	}
	182
	183	void UnicodeTestCase::ToFromAscii()
	184	{
	185
	186	#define TEST_TO_FROM_ASCII(txt) \
	187	{ \
	188	static const char *msg = txt; \
	189	wxString s = wxString::FromAscii(msg); \
	190	CPPUNIT_ASSERT( strcmp( s.ToAscii() , msg ) == 0 ); \
	191	}
	192
	193	TEST_TO_FROM_ASCII( "Hello, world!" );
	194	TEST_TO_FROM_ASCII( "additional \" special \t test \\ component \n :-)" );
	195	}
	196
	197	void UnicodeTestCase::ConstructorsWithConversion()
	198	{
	199	// the string "Déjà" in UTF-8 and wchar_t:
	200	const unsigned char utf8Buf[] = {0x44,0xC3,0xA9,0x6A,0xC3,0xA0,0};
	201	const unsigned char utf8subBuf[] = {0x44,0xC3,0xA9,0x6A,0}; // just "Déj"
	202	const char utf8 = (char )utf8Buf;
	203	const char utf8sub = (char )utf8subBuf;
	204
	205	wxString s1(utf8, wxConvUTF8);
	206
	207	#if wxUSE_UNICODE
	208	const wchar_t wchar[] = {0x44,0xE9,0x6A,0xE0,0};
	209	CPPUNIT_ASSERT_EQUAL( wchar, s1 );
	210
	211	wxString s2(wchar);
	212	CPPUNIT_ASSERT_EQUAL( wchar, s2 );
	213	CPPUNIT_ASSERT_EQUAL( wxString::FromUTF8(utf8), s2 );
	214	#else
	215	CPPUNIT_ASSERT_EQUAL( utf8, s1 );
	216	#endif
	217
	218	wxString sub(utf8sub, wxConvUTF8); // "Dej" substring
	219	wxString s3(utf8, wxConvUTF8, 4);
	220	CPPUNIT_ASSERT_EQUAL( sub, s3 );
	221
	222	#if wxUSE_UNICODE
	223	wxString s4(wchar, wxConvUTF8, 3);
	224	CPPUNIT_ASSERT_EQUAL( sub, s4 );
	225
	226	// conversion should stop with failure at pos 35
	227	wxString s("\t[pl]open.format.Sformatuj dyskietk\xea=gfloppy %f", wxConvUTF8);
	228	CPPUNIT_ASSERT( s.empty() );
	229	#endif // wxUSE_UNICODE
	230
	231
	232	// test using Unicode strings together with char* strings (this must work
	233	// in ANSI mode as well, of course):
	234	wxString s5("ascii");
	235	CPPUNIT_ASSERT_EQUAL( "ascii", s5 );
	236
	237	s5 += " value";
	238
	239	CPPUNIT_ASSERT( strcmp(s5.mb_str(), "ascii value") == 0 );
	240	CPPUNIT_ASSERT_EQUAL( "ascii value", s5 );
	241	CPPUNIT_ASSERT( s5 != "SomethingElse" );
	242	}
	243
	244	void UnicodeTestCase::ConversionFixed()
	245	{
	246	size_t len;
	247
	248	#if wxUSE_UNICODE
	249	wxConvLibc.cWC2MB(L"", 0, &len);
	250	#else // !wxUSE_UNICODE
	251	wxConvLibc.cMB2WC("", 0, &len);
	252	#endif // wxUSE_UNICODE/!wxUSE_UNICODE
	253
	254	CPPUNIT_ASSERT_EQUAL( 0, len );
	255
	256	#if wxUSE_UNICODE
	257	// check that when we convert a fixed number of characters we obtain the
	258	// expected return value
	259	CPPUNIT_ASSERT_EQUAL( 0, wxConvLibc.ToWChar(NULL, 0, "", 0) );
	260	CPPUNIT_ASSERT_EQUAL( 1, wxConvLibc.ToWChar(NULL, 0, "x", 1) );
	261	CPPUNIT_ASSERT_EQUAL( 2, wxConvLibc.ToWChar(NULL, 0, "x", 2) );
	262	CPPUNIT_ASSERT_EQUAL( 2, wxConvLibc.ToWChar(NULL, 0, "xy", 2) );
	263	#endif // wxUSE_UNICODE
	264	}
	265
	266	void UnicodeTestCase::ConversionWithNULs()
	267	{
	268	#if wxUSE_UNICODE
	269	static const size_t lenNulString = 10;
	270
	271	wxString szTheString(L"The\0String", wxConvLibc, lenNulString);
	272	wxCharBuffer theBuffer = szTheString.mb_str();
	273
	274	CPPUNIT_ASSERT( memcmp(theBuffer.data(), "The\0String",
	275	lenNulString + 1) == 0 );
	276
	277	wxString szTheString2("The\0String", wxConvLocal, lenNulString);
	278	CPPUNIT_ASSERT_EQUAL( lenNulString, szTheString2.length() );
	279	CPPUNIT_ASSERT( wxTmemcmp(szTheString2.c_str(), L"The\0String",
	280	lenNulString + 1) == 0 );
	281	#else // !wxUSE_UNICODE
	282	wxString szTheString("TheString");
	283	szTheString.insert(3, 1, '\0');
	284	wxWCharBuffer theBuffer = szTheString.wc_str(wxConvLibc);
	285
	286	CPPUNIT_ASSERT( memcmp(theBuffer.data(), L"The\0String", 11 * sizeof(wchar_t)) == 0 );
	287
	288	wxString szLocalTheString("TheString");
	289	szLocalTheString.insert(3, 1, '\0');
	290	wxWCharBuffer theLocalBuffer = szLocalTheString.wc_str(wxConvLocal);
	291
	292	CPPUNIT_ASSERT( memcmp(theLocalBuffer.data(), L"The\0String", 11 * sizeof(wchar_t)) == 0 );
	293	#endif // wxUSE_UNICODE/!wxUSE_UNICODE
	294	}
	295
	296	void UnicodeTestCase::ConversionUTF7()
	297	{
	298	static const StringConversionData utf7data[] =
	299	{
	300	// normal fragments
	301	StringConversionData("+AKM-", L"\xa3"),
	302	StringConversionData("+AOk-t+AOk-", L"\xe9t\xe9"),
	303
	304	// this one is an alternative valid encoding of the same string
	305	StringConversionData("+AOk-t+AOk", L"\xe9t\xe9",
	306	StringConversionData::ONLY_MB2WC),
	307
	308	// some special cases
	309	StringConversionData("+-", L"+"),
	310	StringConversionData("+--", L"+-"),
	311
	312	// the following are invalid UTF-7 sequences
	313	StringConversionData("\xa3", NULL),
	314	StringConversionData("+", NULL),
	315	StringConversionData("+~", NULL),
	316	StringConversionData("a+", NULL),
	317	};
	318
	319	for ( size_t n = 0; n < WXSIZEOF(utf7data); n++ )
	320	{
	321	const StringConversionData& d = utf7data[n];
	322
	323	// converting to/from UTF-7 using iconv() currently doesn't work
	324	// because of several problems:
	325	// - GetMBNulLen() doesn't return correct result (iconv converts L'\0'
	326	// to an incomplete and anyhow nonsensical "+AA" string)
	327	// - iconv refuses to convert "+-" (although it converts "+-\n" just
	328	// fine, go figure)
	329	//
	330	// I have no idea how to fix this so just disable the test for now
	331	#if 0
	332	d.Test(n, wxCSConv("utf-7"));
	333	#endif
	334	d.Test(n, wxConvUTF7);
	335	}
	336	}
	337
	338	void UnicodeTestCase::ConversionUTF8()
	339	{
	340	static const StringConversionData utf8data[] =
	341	{
	342	#ifdef wxHAVE_U_ESCAPE
	343	StringConversionData("\xc2\xa3", L"\u00a3"),
	344	#endif
	345	StringConversionData("\xc2", NULL),
	346	};
	347
	348	wxCSConv conv(wxT("utf-8"));
	349	for ( size_t n = 0; n < WXSIZEOF(utf8data); n++ )
	350	{
	351	const StringConversionData& d = utf8data[n];
	352	d.Test(n, conv);
	353	d.Test(n, wxConvUTF8);
	354	}
	355	}
	356
	357	void UnicodeTestCase::ConversionUTF16()
	358	{
	359	static const StringConversionData utf16data[] =
	360	{
	361	#ifdef wxHAVE_U_ESCAPE
	362	StringConversionData(
	363	"\x04\x1f\x04\x40\x04\x38\x04\x32\x04\x35\x04\x42\0\0",
	364	L"\u041f\u0440\u0438\u0432\u0435\u0442"),
	365	StringConversionData(
	366	"\x01\0\0b\x01\0\0a\x01\0\0r\0\0",
	367	L"\u0100b\u0100a\u0100r"),
	368	#endif
	369	StringConversionData("\0f\0o\0o\0\0", L"foo"),
	370	};
	371
	372	wxCSConv conv(wxFONTENCODING_UTF16BE);
	373	for ( size_t n = 0; n < WXSIZEOF(utf16data); n++ )
	374	{
	375	const StringConversionData& d = utf16data[n];
	376	d.Test(n, conv);
	377	}
	378
	379	// special case: this string has consecutive NULs inside it which don't
	380	// terminate the string, this exposed a bug in our conversion code which
	381	// got confused in this case
	382	size_t len;
	383	conv.cMB2WC("\x01\0\0B\0C" /* A macron BC */, 6, &len);
	384	CPPUNIT_ASSERT_EQUAL( 3, len );
	385	}
	386
	387	void UnicodeTestCase::ConversionUTF32()
	388	{
	389	static const StringConversionData utf32data[] =
	390	{
	391	#ifdef wxHAVE_U_ESCAPE
	392	StringConversionData(
	393	"\0\0\x04\x1f\0\0\x04\x40\0\0\x04\x38\0\0\x04\x32\0\0\x04\x35\0\0\x04\x42\0\0\0\0",
	394	L"\u041f\u0440\u0438\u0432\u0435\u0442"),
	395	#endif
	396	StringConversionData("\0\0\0f\0\0\0o\0\0\0o\0\0\0\0", L"foo"),
	397	};
	398
	399	wxCSConv conv(wxFONTENCODING_UTF32BE);
	400	for ( size_t n = 0; n < WXSIZEOF(utf32data); n++ )
	401	{
	402	const StringConversionData& d = utf32data[n];
	403	d.Test(n, conv);
	404	}
	405
	406	size_t len;
	407	conv.cMB2WC("\0\0\x01\0\0\0\0B\0\0\0C" /* A macron BC */, 12, &len);
	408	CPPUNIT_ASSERT_EQUAL( 3, len );
	409	}
	410
	411	void UnicodeTestCase::IsConvOk()
	412	{
	413	CPPUNIT_ASSERT( wxCSConv(wxFONTENCODING_SYSTEM).IsOk() );
	414	CPPUNIT_ASSERT( wxCSConv("US-ASCII").IsOk() );
	415	CPPUNIT_ASSERT( wxCSConv("UTF-8").IsOk() );
	416	CPPUNIT_ASSERT( !wxCSConv("NoSuchConversion").IsOk() );
	417
	418	#ifdef __WINDOWS__
	419	CPPUNIT_ASSERT( wxCSConv("WINDOWS-437").IsOk() );
	420	#endif
	421	}
	422
	423	#if wxUSE_UNICODE
	424	void UnicodeTestCase::Iteration()
	425	{
	426	// "czech" in Czech ("cestina"):
	427	static const char *textUTF8 = "\304\215e\305\241tina";
	428	static const wchar_t textUTF16[] = {0x10D, 0x65, 0x161, 0x74, 0x69, 0x6E, 0x61, 0};
	429
	430	wxString text(wxString::FromUTF8(textUTF8));
	431	CPPUNIT_ASSERT( wxStrcmp(text.wc_str(), textUTF16) == 0 );
	432
	433	// verify the string was decoded correctly:
	434	{
	435	size_t idx = 0;
	436	for ( wxString::const_iterator i = text.begin(); i != text.end(); ++i, ++idx )
	437	{
	438	CPPUNIT_ASSERT( *i == textUTF16[idx] );
	439	}
	440	}
	441
	442	// overwrite the string with something that is shorter in UTF-8:
	443	{
	444	for ( wxString::iterator i = text.begin(); i != text.end(); ++i )
	445	*i = 'x';
	446	}
	447
	448	// restore the original text now:
	449	{
	450	wxString::iterator end1 = text.end();
	451	wxString::const_iterator end2 = text.end();
	452
	453	size_t idx = 0;
	454	for ( wxString::iterator i = text.begin(); i != text.end(); ++i, ++idx )
	455	{
	456	*i = textUTF16[idx];
	457
	458	CPPUNIT_ASSERT( end1 == text.end() );
	459	CPPUNIT_ASSERT( end2 == text.end() );
	460	}
	461
	462	CPPUNIT_ASSERT( end1 == text.end() );
	463	CPPUNIT_ASSERT( end2 == text.end() );
	464	}
	465
	466	// and verify it again:
	467	{
	468	size_t idx = 0;
	469	for ( wxString::const_iterator i = text.begin(); i != text.end(); ++i, ++idx )
	470	{
	471	CPPUNIT_ASSERT( *i == textUTF16[idx] );
	472	}
	473	}
	474	}
	475	#endif // wxUSE_UNICODE
	476