git.saurik.com Git - wxWidgets.git/blame_incremental

... / ...

Commit	Line	Data
	1	///////////////////////////////////////////////////////////////////////////////
	2	// Name: tests/strings/unicode.cpp
	3	// Purpose: Unicode unit test
	4	// Author: Vadim Zeitlin, Wlodzimierz ABX Skiba
	5	// Created: 2004-04-28
	6	// RCS-ID: $Id$
	7	// Copyright: (c) 2004 Vadim Zeitlin, Wlodzimierz Skiba
	8	///////////////////////////////////////////////////////////////////////////////
	9
	10	// ----------------------------------------------------------------------------
	11	// headers
	12	// ----------------------------------------------------------------------------
	13
	14	#include "testprec.h"
	15
	16	#ifdef __BORLANDC__
	17	#pragma hdrstop
	18	#endif
	19
	20	#ifndef WX_PRECOMP
	21	#include "wx/wx.h"
	22	#endif // WX_PRECOMP
	23
	24	#include "wx/encconv.h"
	25
	26	// ----------------------------------------------------------------------------
	27	// helper class holding the matching MB and WC strings
	28	// ----------------------------------------------------------------------------
	29
	30	struct StringConversionData
	31	{
	32	// either str or wcs (but not both) may be NULL, this means that the conversion
	33	// to it should fail
	34	StringConversionData(const char str_, const wchar_t wcs_, int flags_ = 0)
	35	: str(str_), wcs(wcs_), flags(flags_)
	36	{
	37	}
	38
	39	const char * const str;
	40	const wchar_t * const wcs;
	41
	42	enum
	43	{
	44	TEST_BOTH = 0, // test both str -> wcs and wcs -> str
	45	ONLY_MB2WC = 1 // only test str -> wcs conversion
	46	};
	47
	48	const int flags;
	49
	50	// test that the conversion between str and wcs (subject to flags) succeeds
	51	//
	52	// the first argument is the index in the test array and is used solely for
	53	// diagnostics
	54	void Test(size_t n, wxMBConv& conv) const
	55	{
	56	if ( str )
	57	{
	58	wxWCharBuffer wbuf = conv.cMB2WC(str);
	59
	60	if ( wcs )
	61	{
	62	CPPUNIT_ASSERT_MESSAGE
	63	(
	64	Message(n, "MB2WC failed"),
	65	wbuf.data()
	66	);
	67
	68	CPPUNIT_ASSERT_MESSAGE
	69	(
	70	Message(n, "MB2WC", wbuf, wcs),
	71	wxStrcmp(wbuf, wcs) == 0
	72	);
	73	}
	74	else // conversion is supposed to fail
	75	{
	76	CPPUNIT_ASSERT_MESSAGE
	77	(
	78	Message(n, "MB2WC succeeded"),
	79	!wbuf.data()
	80	);
	81	}
	82	}
	83
	84	if ( wcs && !(flags & ONLY_MB2WC) )
	85	{
	86	wxCharBuffer buf = conv.cWC2MB(wcs);
	87
	88	if ( str )
	89	{
	90	CPPUNIT_ASSERT_MESSAGE
	91	(
	92	Message(n, "WC2MB failed"),
	93	buf.data()
	94	);
	95
	96	CPPUNIT_ASSERT_MESSAGE
	97	(
	98	Message(n, "WC2MB", buf, str),
	99	strcmp(buf, str) == 0
	100	);
	101	}
	102	else
	103	{
	104	CPPUNIT_ASSERT_MESSAGE
	105	(
	106	Message(n, "WC2MB succeeded"),
	107	!buf.data()
	108	);
	109	}
	110	}
	111	}
	112
	113	private:
	114	static std::string
	115	Message(size_t n, const wxString& msg)
	116	{
	117	return std::string(wxString::Format("#%lu: %s", (unsigned long)n, msg));
	118	}
	119
	120	template <typename T>
	121	static std::string
	122	Message(size_t n,
	123	const char *func,
	124	const wxCharTypeBuffer<T>& actual,
	125	const T *expected)
	126	{
	127	return Message(n,
	128	wxString::Format("%s returned \"%s\", expected \"%s\"",
	129	func, actual.data(), expected));
	130	}
	131	};
	132
	133	// ----------------------------------------------------------------------------
	134	// test class
	135	// ----------------------------------------------------------------------------
	136
	137	class UnicodeTestCase : public CppUnit::TestCase
	138	{
	139	public:
	140	UnicodeTestCase();
	141
	142	private:
	143	CPPUNIT_TEST_SUITE( UnicodeTestCase );
	144	CPPUNIT_TEST( ToFromAscii );
	145	CPPUNIT_TEST( ConstructorsWithConversion );
	146	CPPUNIT_TEST( ConversionFixed );
	147	CPPUNIT_TEST( ConversionWithNULs );
	148	CPPUNIT_TEST( ConversionUTF7 );
	149	CPPUNIT_TEST( ConversionUTF8 );
	150	CPPUNIT_TEST( ConversionUTF16 );
	151	CPPUNIT_TEST( ConversionUTF32 );
	152	CPPUNIT_TEST( IsConvOk );
	153	#if wxUSE_UNICODE
	154	CPPUNIT_TEST( Iteration );
	155	#endif
	156	CPPUNIT_TEST_SUITE_END();
	157
	158	void ToFromAscii();
	159	void ConstructorsWithConversion();
	160	void ConversionFixed();
	161	void ConversionWithNULs();
	162	void ConversionUTF7();
	163	void ConversionUTF8();
	164	void ConversionUTF16();
	165	void ConversionUTF32();
	166	void IsConvOk();
	167	#if wxUSE_UNICODE
	168	void Iteration();
	169	#endif
	170
	171	DECLARE_NO_COPY_CLASS(UnicodeTestCase)
	172	};
	173
	174	// register in the unnamed registry so that these tests are run by default
	175	CPPUNIT_TEST_SUITE_REGISTRATION( UnicodeTestCase );
	176
	177	// also include in it's own registry so that these tests can be run alone
	178	CPPUNIT_TEST_SUITE_NAMED_REGISTRATION( UnicodeTestCase, "UnicodeTestCase" );
	179
	180	UnicodeTestCase::UnicodeTestCase()
	181	{
	182	}
	183
	184	void UnicodeTestCase::ToFromAscii()
	185	{
	186
	187	#define TEST_TO_FROM_ASCII(txt) \
	188	{ \
	189	static const char *msg = txt; \
	190	wxString s = wxString::FromAscii(msg); \
	191	CPPUNIT_ASSERT( strcmp( s.ToAscii() , msg ) == 0 ); \
	192	}
	193
	194	TEST_TO_FROM_ASCII( "Hello, world!" );
	195	TEST_TO_FROM_ASCII( "additional \" special \t test \\ component \n :-)" );
	196	}
	197
	198	void UnicodeTestCase::ConstructorsWithConversion()
	199	{
	200	// the string "Déjà" in UTF-8 and wchar_t:
	201	const unsigned char utf8Buf[] = {0x44,0xC3,0xA9,0x6A,0xC3,0xA0,0};
	202	const unsigned char utf8subBuf[] = {0x44,0xC3,0xA9,0x6A,0}; // just "Déj"
	203	const char utf8 = (char )utf8Buf;
	204	const char utf8sub = (char )utf8subBuf;
	205
	206	wxString s1(utf8, wxConvUTF8);
	207
	208	#if wxUSE_UNICODE
	209	const wchar_t wchar[] = {0x44,0xE9,0x6A,0xE0,0};
	210	CPPUNIT_ASSERT_EQUAL( wchar, s1 );
	211
	212	wxString s2(wchar);
	213	CPPUNIT_ASSERT_EQUAL( wchar, s2 );
	214	CPPUNIT_ASSERT_EQUAL( wxString::FromUTF8(utf8), s2 );
	215	#else
	216	CPPUNIT_ASSERT_EQUAL( utf8, s1 );
	217	#endif
	218
	219	wxString sub(utf8sub, wxConvUTF8); // "Dej" substring
	220	wxString s3(utf8, wxConvUTF8, 4);
	221	CPPUNIT_ASSERT_EQUAL( sub, s3 );
	222
	223	#if wxUSE_UNICODE
	224	wxString s4(wchar, wxConvUTF8, 3);
	225	CPPUNIT_ASSERT_EQUAL( sub, s4 );
	226
	227	// conversion should stop with failure at pos 35
	228	wxString s("\t[pl]open.format.Sformatuj dyskietk\xea=gfloppy %f", wxConvUTF8);
	229	CPPUNIT_ASSERT( s.empty() );
	230	#endif // wxUSE_UNICODE
	231
	232
	233	// test using Unicode strings together with char* strings (this must work
	234	// in ANSI mode as well, of course):
	235	wxString s5("ascii");
	236	CPPUNIT_ASSERT_EQUAL( "ascii", s5 );
	237
	238	s5 += " value";
	239
	240	CPPUNIT_ASSERT( strcmp(s5.mb_str(), "ascii value") == 0 );
	241	CPPUNIT_ASSERT_EQUAL( "ascii value", s5 );
	242	CPPUNIT_ASSERT( s5 != "SomethingElse" );
	243	}
	244
	245	void UnicodeTestCase::ConversionFixed()
	246	{
	247	size_t len;
	248
	249	#if wxUSE_UNICODE
	250	wxConvLibc.cWC2MB(L"", 0, &len);
	251	#else // !wxUSE_UNICODE
	252	wxConvLibc.cMB2WC("", 0, &len);
	253	#endif // wxUSE_UNICODE/!wxUSE_UNICODE
	254
	255	CPPUNIT_ASSERT_EQUAL( 0, len );
	256
	257	#if wxUSE_UNICODE
	258	// check that when we convert a fixed number of characters we obtain the
	259	// expected return value
	260	CPPUNIT_ASSERT_EQUAL( 0, wxConvLibc.ToWChar(NULL, 0, "", 0) );
	261	CPPUNIT_ASSERT_EQUAL( 1, wxConvLibc.ToWChar(NULL, 0, "x", 1) );
	262	CPPUNIT_ASSERT_EQUAL( 2, wxConvLibc.ToWChar(NULL, 0, "x", 2) );
	263	CPPUNIT_ASSERT_EQUAL( 2, wxConvLibc.ToWChar(NULL, 0, "xy", 2) );
	264	#endif // wxUSE_UNICODE
	265	}
	266
	267	void UnicodeTestCase::ConversionWithNULs()
	268	{
	269	#if wxUSE_UNICODE
	270	static const size_t lenNulString = 10;
	271
	272	wxString szTheString(L"The\0String", wxConvLibc, lenNulString);
	273	wxCharBuffer theBuffer = szTheString.mb_str();
	274
	275	CPPUNIT_ASSERT( memcmp(theBuffer.data(), "The\0String",
	276	lenNulString + 1) == 0 );
	277
	278	wxString szTheString2("The\0String", wxConvLocal, lenNulString);
	279	CPPUNIT_ASSERT_EQUAL( lenNulString, szTheString2.length() );
	280	CPPUNIT_ASSERT( wxTmemcmp(szTheString2.c_str(), L"The\0String",
	281	lenNulString + 1) == 0 );
	282	#else // !wxUSE_UNICODE
	283	wxString szTheString("TheString");
	284	szTheString.insert(3, 1, '\0');
	285	wxWCharBuffer theBuffer = szTheString.wc_str(wxConvLibc);
	286
	287	CPPUNIT_ASSERT( memcmp(theBuffer.data(), L"The\0String", 11 * sizeof(wchar_t)) == 0 );
	288
	289	wxString szLocalTheString("TheString");
	290	szLocalTheString.insert(3, 1, '\0');
	291	wxWCharBuffer theLocalBuffer = szLocalTheString.wc_str(wxConvLocal);
	292
	293	CPPUNIT_ASSERT( memcmp(theLocalBuffer.data(), L"The\0String", 11 * sizeof(wchar_t)) == 0 );
	294	#endif // wxUSE_UNICODE/!wxUSE_UNICODE
	295	}
	296
	297	void UnicodeTestCase::ConversionUTF7()
	298	{
	299	static const StringConversionData utf7data[] =
	300	{
	301	// normal fragments
	302	StringConversionData("+AKM-", L"\xa3"),
	303	StringConversionData("+AOk-t+AOk-", L"\xe9t\xe9"),
	304
	305	// this one is an alternative valid encoding of the same string
	306	StringConversionData("+AOk-t+AOk", L"\xe9t\xe9",
	307	StringConversionData::ONLY_MB2WC),
	308
	309	// some special cases
	310	StringConversionData("+-", L"+"),
	311	StringConversionData("+--", L"+-"),
	312
	313	// the following are invalid UTF-7 sequences
	314	StringConversionData("\xa3", NULL),
	315	StringConversionData("+", NULL),
	316	StringConversionData("+~", NULL),
	317	StringConversionData("a+", NULL),
	318	};
	319
	320	for ( size_t n = 0; n < WXSIZEOF(utf7data); n++ )
	321	{
	322	const StringConversionData& d = utf7data[n];
	323
	324	// converting to/from UTF-7 using iconv() currently doesn't work
	325	// because of several problems:
	326	// - GetMBNulLen() doesn't return correct result (iconv converts L'\0'
	327	// to an incomplete and anyhow nonsensical "+AA" string)
	328	// - iconv refuses to convert "+-" (although it converts "+-\n" just
	329	// fine, go figure)
	330	//
	331	// I have no idea how to fix this so just disable the test for now
	332	#if 0
	333	d.Test(n, wxCSConv("utf-7"));
	334	#endif
	335	d.Test(n, wxConvUTF7);
	336	}
	337	}
	338
	339	void UnicodeTestCase::ConversionUTF8()
	340	{
	341	static const StringConversionData utf8data[] =
	342	{
	343	#ifdef wxHAVE_U_ESCAPE
	344	StringConversionData("\xc2\xa3", L"\u00a3"),
	345	#endif
	346	StringConversionData("\xc2", NULL),
	347	};
	348
	349	wxCSConv conv(wxT("utf-8"));
	350	for ( size_t n = 0; n < WXSIZEOF(utf8data); n++ )
	351	{
	352	const StringConversionData& d = utf8data[n];
	353	d.Test(n, conv);
	354	d.Test(n, wxConvUTF8);
	355	}
	356	}
	357
	358	void UnicodeTestCase::ConversionUTF16()
	359	{
	360	static const StringConversionData utf16data[] =
	361	{
	362	#ifdef wxHAVE_U_ESCAPE
	363	StringConversionData(
	364	"\x04\x1f\x04\x40\x04\x38\x04\x32\x04\x35\x04\x42\0\0",
	365	L"\u041f\u0440\u0438\u0432\u0435\u0442"),
	366	StringConversionData(
	367	"\x01\0\0b\x01\0\0a\x01\0\0r\0\0",
	368	L"\u0100b\u0100a\u0100r"),
	369	#endif
	370	StringConversionData("\0f\0o\0o\0\0", L"foo"),
	371	};
	372
	373	wxCSConv conv(wxFONTENCODING_UTF16BE);
	374	for ( size_t n = 0; n < WXSIZEOF(utf16data); n++ )
	375	{
	376	const StringConversionData& d = utf16data[n];
	377	d.Test(n, conv);
	378	}
	379
	380	// special case: this string has consecutive NULs inside it which don't
	381	// terminate the string, this exposed a bug in our conversion code which
	382	// got confused in this case
	383	size_t len;
	384	conv.cMB2WC("\x01\0\0B\0C" /* A macron BC */, 6, &len);
	385	CPPUNIT_ASSERT_EQUAL( 3, len );
	386	}
	387
	388	void UnicodeTestCase::ConversionUTF32()
	389	{
	390	static const StringConversionData utf32data[] =
	391	{
	392	#ifdef wxHAVE_U_ESCAPE
	393	StringConversionData(
	394	"\0\0\x04\x1f\0\0\x04\x40\0\0\x04\x38\0\0\x04\x32\0\0\x04\x35\0\0\x04\x42\0\0\0\0",
	395	L"\u041f\u0440\u0438\u0432\u0435\u0442"),
	396	#endif
	397	StringConversionData("\0\0\0f\0\0\0o\0\0\0o\0\0\0\0", L"foo"),
	398	};
	399
	400	wxCSConv conv(wxFONTENCODING_UTF32BE);
	401	for ( size_t n = 0; n < WXSIZEOF(utf32data); n++ )
	402	{
	403	const StringConversionData& d = utf32data[n];
	404	d.Test(n, conv);
	405	}
	406
	407	size_t len;
	408	conv.cMB2WC("\0\0\x01\0\0\0\0B\0\0\0C" /* A macron BC */, 12, &len);
	409	CPPUNIT_ASSERT_EQUAL( 3, len );
	410	}
	411
	412	void UnicodeTestCase::IsConvOk()
	413	{
	414	CPPUNIT_ASSERT( wxCSConv(wxFONTENCODING_SYSTEM).IsOk() );
	415	CPPUNIT_ASSERT( wxCSConv("US-ASCII").IsOk() );
	416	CPPUNIT_ASSERT( wxCSConv("UTF-8").IsOk() );
	417	CPPUNIT_ASSERT( !wxCSConv("NoSuchConversion").IsOk() );
	418
	419	#ifdef __WINDOWS__
	420	CPPUNIT_ASSERT( wxCSConv("WINDOWS-437").IsOk() );
	421	#endif
	422	}
	423
	424	#if wxUSE_UNICODE
	425	void UnicodeTestCase::Iteration()
	426	{
	427	// "czech" in Czech ("cestina"):
	428	static const char *textUTF8 = "\304\215e\305\241tina";
	429	static const wchar_t textUTF16[] = {0x10D, 0x65, 0x161, 0x74, 0x69, 0x6E, 0x61, 0};
	430
	431	wxString text(wxString::FromUTF8(textUTF8));
	432	CPPUNIT_ASSERT( wxStrcmp(text.wc_str(), textUTF16) == 0 );
	433
	434	// verify the string was decoded correctly:
	435	{
	436	size_t idx = 0;
	437	for ( wxString::const_iterator i = text.begin(); i != text.end(); ++i, ++idx )
	438	{
	439	CPPUNIT_ASSERT( *i == textUTF16[idx] );
	440	}
	441	}
	442
	443	// overwrite the string with something that is shorter in UTF-8:
	444	{
	445	for ( wxString::iterator i = text.begin(); i != text.end(); ++i )
	446	*i = 'x';
	447	}
	448
	449	// restore the original text now:
	450	{
	451	wxString::iterator end1 = text.end();
	452	wxString::const_iterator end2 = text.end();
	453
	454	size_t idx = 0;
	455	for ( wxString::iterator i = text.begin(); i != text.end(); ++i, ++idx )
	456	{
	457	*i = textUTF16[idx];
	458
	459	CPPUNIT_ASSERT( end1 == text.end() );
	460	CPPUNIT_ASSERT( end2 == text.end() );
	461	}
	462
	463	CPPUNIT_ASSERT( end1 == text.end() );
	464	CPPUNIT_ASSERT( end2 == text.end() );
	465	}
	466
	467	// and verify it again:
	468	{
	469	size_t idx = 0;
	470	for ( wxString::const_iterator i = text.begin(); i != text.end(); ++i, ++idx )
	471	{
	472	CPPUNIT_ASSERT( *i == textUTF16[idx] );
	473	}
	474	}
	475	}
	476	#endif // wxUSE_UNICODE
	477