git.saurik.com Git - wxWidgets.git/blame

Commit	Line	Data
6001e347	1	/////////////////////////////////////////////////////////////////////////////
38d4b1e4	2	// Name: src/common/strconv.cpp
6001e347	3	// Purpose: Unicode conversion classes
15f2ee32 RN	4	// Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
15f2ee32 RN	5	// Ryan Norton, Fredrik Roubert (UTF7)
6001e347 RR	6	// Modified by:
6001e347 RR	7	// Created: 29/01/98
e95354ec VZ	8	// Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
e95354ec VZ	9	// (c) 2000-2003 Vadim Zeitlin
15f2ee32	10	// (c) 2004 Ryan Norton, Fredrik Roubert
65571936	11	// Licence: wxWindows licence
6001e347 RR	12	/////////////////////////////////////////////////////////////////////////////
6001e347 RR	13
6001e347 RR	14	// For compilers that support precompilation, includes "wx.h".
	15	#include "wx/wxprec.h"
	16
480f42ec VS	17	#ifdef __BORLANDC__
	18	#pragma hdrstop
	19	#endif //__BORLANDC__
	20
373658eb VZ	21	#ifndef WX_PRECOMP
	22	#include "wx/intl.h"
	23	#include "wx/log.h"
de6185e2	24	#include "wx/utils.h"
df69528b	25	#include "wx/hashmap.h"
ef199164	26	#endif
373658eb	27
bde4baac VZ	28	#include "wx/strconv.h"
bde4baac VZ	29
1c193821	30	#ifndef __WXWINCE__
1cd52418	31	#include <errno.h>
1c193821 JS	32	#endif
1c193821 JS	33
6001e347 RR	34	#include <ctype.h>
	35	#include <string.h>
	36	#include <stdlib.h>
	37
e95354ec	38	#if defined(__WIN32__) && !defined(__WXMICROWIN__)
a6c2e2c7 VZ	39	#include "wx/msw/private.h"
a6c2e2c7 VZ	40	#include "wx/msw/missing.h"
e95354ec	41	#define wxHAVE_WIN32_MB2WC
ef199164	42	#endif
e95354ec	43
b040e242	44	#ifdef HAVE_ICONV
373658eb	45	#include <iconv.h>
b1d547eb	46	#include "wx/thread.h"
1cd52418	47	#endif
1cd52418	48
373658eb VZ	49	#include "wx/encconv.h"
	50	#include "wx/fontmap.h"
	51
5c4ed98d	52	#ifdef __DARWIN__
c933e267	53	#include "wx/osx/core/private/strconv_cf.h"
5c4ed98d DE	54	#endif //def __DARWIN__
5c4ed98d DE	55
ef199164	56
9a83f860	57	#define TRACE_STRCONV wxT("strconv")
ce6f8d6f	58
467e0479 VZ	59	// WC_UTF16 is defined only if sizeof(wchar_t) == 2, otherwise it's supposed to
467e0479 VZ	60	// be 4 bytes
4948c2b6	61	#if SIZEOF_WCHAR_T == 2
ac11db3a MW	62	#define WC_UTF16
	63	#endif
	64
ef199164	65
373658eb VZ	66	// ============================================================================
	67	// implementation
	68	// ============================================================================
	69
69373110 VZ	70	// helper function of cMB2WC(): check if n bytes at this location are all NUL
	71	static bool NotAllNULs(const char *p, size_t n)
	72	{
	73	while ( n && *p++ == '\0' )
	74	n--;
	75
	76	return n != 0;
	77	}
	78
373658eb	79	// ----------------------------------------------------------------------------
467e0479	80	// UTF-16 en/decoding to/from UCS-4 with surrogates handling
373658eb	81	// ----------------------------------------------------------------------------
6001e347	82
c91830cb	83	static size_t encode_utf16(wxUint32 input, wxUint16 *output)
1cd52418	84	{
ef199164	85	if (input <= 0xffff)
4def3b35	86	{
999836aa VZ	87	if (output)
999836aa VZ	88	*output = (wxUint16) input;
ef199164	89
4def3b35	90	return 1;
dccce9ea	91	}
ef199164	92	else if (input >= 0x110000)
4def3b35	93	{
467e0479	94	return wxCONV_FAILED;
dccce9ea VZ	95	}
dccce9ea VZ	96	else
4def3b35	97	{
dccce9ea	98	if (output)
4def3b35	99	{
ef199164 DS	100	*output++ = (wxUint16) ((input >> 10) + 0xd7c0);
ef199164 DS	101	*output = (wxUint16) ((input & 0x3ff) + 0xdc00);
4def3b35	102	}
ef199164	103
4def3b35	104	return 2;
1cd52418	105	}
1cd52418 OK	106	}
1cd52418 OK	107
c91830cb	108	static size_t decode_utf16(const wxUint16* input, wxUint32& output)
1cd52418	109	{
ef199164	110	if ((input < 0xd800) \|\| (input > 0xdfff))
4def3b35 VS	111	{
	112	output = *input;
	113	return 1;
dccce9ea	114	}
ef199164	115	else if ((input[1] < 0xdc00) \|\| (input[1] > 0xdfff))
4def3b35 VS	116	{
4def3b35 VS	117	output = *input;
467e0479	118	return wxCONV_FAILED;
dccce9ea VZ	119	}
dccce9ea VZ	120	else
4def3b35 VS	121	{
	122	output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
	123	return 2;
	124	}
1cd52418 OK	125	}
1cd52418 OK	126
467e0479	127	#ifdef WC_UTF16
35d11700 VZ	128	typedef wchar_t wxDecodeSurrogate_t;
	129	#else // !WC_UTF16
	130	typedef wxUint16 wxDecodeSurrogate_t;
	131	#endif // WC_UTF16/!WC_UTF16
467e0479 VZ	132
	133	// returns the next UTF-32 character from the wchar_t buffer and advances the
	134	// pointer to the character after this one
	135	//
	136	// if an invalid character is found, *pSrc is set to NULL, the caller must
	137	// check for this
35d11700	138	static wxUint32 wxDecodeSurrogate(const wxDecodeSurrogate_t **pSrc)
467e0479 VZ	139	{
467e0479 VZ	140	wxUint32 out;
8d3dd069	141	const size_t
5c33522f	142	n = decode_utf16(reinterpret_cast<const wxUint16 >(pSrc), out);
467e0479 VZ	143	if ( n == wxCONV_FAILED )
	144	*pSrc = NULL;
	145	else
	146	*pSrc += n;
	147
	148	return out;
	149	}
	150
f6bcfd97	151	// ----------------------------------------------------------------------------
6001e347	152	// wxMBConv
f6bcfd97	153	// ----------------------------------------------------------------------------
2c53a80a	154
483b0434 VZ	155	size_t
	156	wxMBConv::ToWChar(wchar_t *dst, size_t dstLen,
	157	const char *src, size_t srcLen) const
6001e347	158	{
483b0434	159	// although new conversion classes are supposed to implement this function
36f93678	160	// directly, the existing ones only implement the old MB2WC() and so, to
483b0434 VZ	161	// avoid to have to rewrite all conversion classes at once, we provide a
	162	// default (but not efficient) implementation of this one in terms of the
	163	// old function by copying the input to ensure that it's NUL-terminated and
	164	// then using MB2WC() to convert it
36f93678 VZ	165	//
	166	// moreover, some conversion classes simply can't implement ToWChar()
	167	// directly, the primary example is wxConvLibc: mbstowcs() only handles
	168	// NUL-terminated strings
6001e347	169
483b0434 VZ	170	// the number of chars [which would be] written to dst [if it were not NULL]
483b0434 VZ	171	size_t dstWritten = 0;
eec47cc6	172
c1464d9d	173	// the number of NULs terminating this string
a78c43f1	174	size_t nulLen = 0; // not really needed, but just to avoid warnings
eec47cc6	175
c1464d9d VZ	176	// if we were not given the input size we just have to assume that the
	177	// string is properly terminated as we have no way of knowing how long it
	178	// is anyhow, but if we do have the size check whether there are enough
	179	// NULs at the end
483b0434 VZ	180	wxCharBuffer bufTmp;
483b0434 VZ	181	const char *srcEnd;
467e0479	182	if ( srcLen != wxNO_LEN )
eec47cc6	183	{
c1464d9d	184	// we need to know how to find the end of this string
7ef3ab50	185	nulLen = GetMBNulLen();
483b0434 VZ	186	if ( nulLen == wxCONV_FAILED )
483b0434 VZ	187	return wxCONV_FAILED;
e4e3bbb4	188
c1464d9d	189	// if there are enough NULs we can avoid the copy
483b0434	190	if ( srcLen < nulLen \|\| NotAllNULs(src + srcLen - nulLen, nulLen) )
eec47cc6 VZ	191	{
eec47cc6 VZ	192	// make a copy in order to properly NUL-terminate the string
483b0434	193	bufTmp = wxCharBuffer(srcLen + nulLen - 1 /* 1 will be added */);
c1464d9d	194	char * const p = bufTmp.data();
483b0434 VZ	195	memcpy(p, src, srcLen);
483b0434 VZ	196	for ( char *s = p + srcLen; s < p + srcLen + nulLen; s++ )
c1464d9d	197	*s = '\0';
483b0434 VZ	198
483b0434 VZ	199	src = bufTmp;
eec47cc6	200	}
e4e3bbb4	201
483b0434 VZ	202	srcEnd = src + srcLen;
	203	}
	204	else // quit after the first loop iteration
	205	{
	206	srcEnd = NULL;
	207	}
e4e3bbb4	208
36f93678 VZ	209	// the idea of this code is straightforward: it converts a NUL-terminated
	210	// chunk of the string during each iteration and updates the output buffer
	211	// with the result
	212	//
	213	// all the complication come from the fact that this function, for
	214	// historical reasons, must behave in 2 subtly different ways when it's
	215	// called with a fixed number of characters and when it's called for the
bbb0ff36	216	// entire NUL-terminated string: in the former case (srcEnd != NULL) we
36f93678 VZ	217	// must count all characters we convert, NUL or not; but in the latter we
	218	// do not count the trailing NUL -- but still count all the NULs inside the
	219	// string
	220	//
	221	// so for the (simple) former case we just always count the trailing NUL,
	222	// but for the latter we need to wait until we see if there is going to be
	223	// another loop iteration and only count it then
483b0434	224	for ( ;; )
eec47cc6	225	{
c1464d9d	226	// try to convert the current chunk
483b0434	227	size_t lenChunk = MB2WC(NULL, src, 0);
483b0434 VZ	228	if ( lenChunk == wxCONV_FAILED )
483b0434 VZ	229	return wxCONV_FAILED;
e4e3bbb4	230
483b0434	231	dstWritten += lenChunk;
f6a02087 VZ	232	if ( !srcEnd )
f6a02087 VZ	233	dstWritten++;
f5fb6871	234
f6a02087	235	if ( !lenChunk )
467e0479 VZ	236	{
	237	// nothing left in the input string, conversion succeeded
	238	break;
	239	}
	240
483b0434 VZ	241	if ( dst )
	242	{
	243	if ( dstWritten > dstLen )
	244	return wxCONV_FAILED;
	245
f6a02087 VZ	246	// +1 is for trailing NUL
f6a02087 VZ	247	if ( MB2WC(dst, src, lenChunk + 1) == wxCONV_FAILED )
483b0434 VZ	248	return wxCONV_FAILED;
	249
	250	dst += lenChunk;
f6a02087 VZ	251	if ( !srcEnd )
f6a02087 VZ	252	dst++;
483b0434	253	}
c1464d9d	254
483b0434	255	if ( !srcEnd )
c1464d9d	256	{
467e0479	257	// we convert just one chunk in this case as this is the entire
bbb0ff36	258	// string anyhow (and we don't count the trailing NUL in this case)
c1464d9d VZ	259	break;
c1464d9d VZ	260	}
eec47cc6	261
bbb0ff36 VZ	262	// advance the input pointer past the end of this chunk: notice that we
	263	// will always stop before srcEnd because we know that the chunk is
	264	// always properly NUL-terminated
483b0434	265	while ( NotAllNULs(src, nulLen) )
c1464d9d VZ	266	{
	267	// notice that we must skip over multiple bytes here as we suppose
	268	// that if NUL takes 2 or 4 bytes, then all the other characters do
	269	// too and so if advanced by a single byte we might erroneously
	270	// detect sequences of NUL bytes in the middle of the input
483b0434	271	src += nulLen;
c1464d9d	272	}
e4e3bbb4	273
bbb0ff36 VZ	274	// if the buffer ends before this NUL, we shouldn't count it in our
	275	// output so skip the code below
	276	if ( src == srcEnd )
	277	break;
	278
	279	// do count this terminator as it's inside the buffer we convert
	280	dstWritten++;
	281	if ( dst )
	282	dst++;
	283
	284	src += nulLen; // skip the terminator itself
c1464d9d	285
483b0434	286	if ( src >= srcEnd )
c1464d9d VZ	287	break;
	288	}
	289
483b0434	290	return dstWritten;
e4e3bbb4 RN	291	}
e4e3bbb4 RN	292
483b0434 VZ	293	size_t
	294	wxMBConv::FromWChar(char *dst, size_t dstLen,
	295	const wchar_t *src, size_t srcLen) const
e4e3bbb4	296	{
483b0434 VZ	297	// the number of chars [which would be] written to dst [if it were not NULL]
483b0434 VZ	298	size_t dstWritten = 0;
e4e3bbb4	299
f6a02087 VZ	300	// if we don't know its length we have no choice but to assume that it is
	301	// NUL-terminated (notice that it can still be NUL-terminated even if
	302	// explicit length is given but it doesn't change our return value)
	303	const bool isNulTerminated = srcLen == wxNO_LEN;
	304
eec47cc6 VZ	305	// make a copy of the input string unless it is already properly
eec47cc6 VZ	306	// NUL-terminated
eec47cc6	307	wxWCharBuffer bufTmp;
f6a02087	308	if ( isNulTerminated )
e4e3bbb4	309	{
483b0434	310	srcLen = wxWcslen(src) + 1;
eec47cc6	311	}
483b0434	312	else if ( srcLen != 0 && src[srcLen - 1] != L'\0' )
eec47cc6 VZ	313	{
eec47cc6 VZ	314	// make a copy in order to properly NUL-terminate the string
483b0434	315	bufTmp = wxWCharBuffer(srcLen);
ef199164	316	memcpy(bufTmp.data(), src, srcLen * sizeof(wchar_t));
483b0434 VZ	317	src = bufTmp;
	318	}
	319
	320	const size_t lenNul = GetMBNulLen();
	321	for ( const wchar_t * const srcEnd = src + srcLen;
	322	src < srcEnd;
27307233	323	src++ /* skip L'\0' too */ )
483b0434 VZ	324	{
	325	// try to convert the current chunk
	326	size_t lenChunk = WC2MB(NULL, src, 0);
483b0434 VZ	327	if ( lenChunk == wxCONV_FAILED )
	328	return wxCONV_FAILED;
	329
483b0434	330	dstWritten += lenChunk;
27307233 VZ	331
	332	const wchar_t * const
	333	chunkEnd = isNulTerminated ? srcEnd - 1 : src + wxWcslen(src);
	334
	335	// our return value accounts for the trailing NUL(s), unlike that of
	336	// WC2MB(), however don't do it for the last NUL we artificially added
	337	// ourselves above
	338	if ( chunkEnd < srcEnd )
f6a02087	339	dstWritten += lenNul;
483b0434 VZ	340
	341	if ( dst )
	342	{
	343	if ( dstWritten > dstLen )
	344	return wxCONV_FAILED;
	345
27307233 VZ	346	// if we know that there is enough space in the destination buffer
	347	// (because we accounted for lenNul in dstWritten above), we can
	348	// convert directly in place -- but otherwise we need another
	349	// temporary buffer to ensure that we don't overwrite the output
	350	wxCharBuffer dstBuf;
	351	char *dstTmp;
	352	if ( chunkEnd == srcEnd )
	353	{
	354	dstBuf = wxCharBuffer(lenChunk + lenNul - 1);
	355	dstTmp = dstBuf.data();
	356	}
	357	else
	358	{
	359	dstTmp = dst;
	360	}
	361
	362	if ( WC2MB(dstTmp, src, lenChunk + lenNul) == wxCONV_FAILED )
483b0434 VZ	363	return wxCONV_FAILED;
483b0434 VZ	364
27307233 VZ	365	if ( dstTmp != dst )
	366	{
	367	// copy everything up to but excluding the terminating NUL(s)
	368	// into the real output buffer
	369	memcpy(dst, dstTmp, lenChunk);
	370
	371	// micro-optimization: if dstTmp != dst it means that chunkEnd
	372	// == srcEnd and so we're done, no need to update anything below
	373	break;
	374	}
	375
483b0434	376	dst += lenChunk;
27307233	377	if ( chunkEnd < srcEnd )
f6a02087	378	dst += lenNul;
483b0434	379	}
27307233 VZ	380
27307233 VZ	381	src = chunkEnd;
eec47cc6	382	}
e4e3bbb4	383
483b0434 VZ	384	return dstWritten;
	385	}
	386
ef199164	387	size_t wxMBConv::MB2WC(wchar_t outBuff, const char inBuff, size_t outLen) const
509da451	388	{
51725fc0	389	size_t rc = ToWChar(outBuff, outLen, inBuff);
467e0479	390	if ( rc != wxCONV_FAILED )
509da451 VZ	391	{
	392	// ToWChar() returns the buffer length, i.e. including the trailing
	393	// NUL, while this method doesn't take it into account
	394	rc--;
	395	}
	396
	397	return rc;
	398	}
	399
ef199164	400	size_t wxMBConv::WC2MB(char outBuff, const wchar_t inBuff, size_t outLen) const
509da451	401	{
51725fc0	402	size_t rc = FromWChar(outBuff, outLen, inBuff);
467e0479	403	if ( rc != wxCONV_FAILED )
509da451	404	{
51725fc0	405	rc -= GetMBNulLen();
509da451 VZ	406	}
	407
	408	return rc;
	409	}
	410
483b0434 VZ	411	wxMBConv::~wxMBConv()
	412	{
	413	// nothing to do here (necessary for Darwin linking probably)
	414	}
e4e3bbb4	415
483b0434 VZ	416	const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
	417	{
	418	if ( psz )
eec47cc6	419	{
483b0434	420	// calculate the length of the buffer needed first
a2db25a1	421	const size_t nLen = ToWChar(NULL, 0, psz);
467e0479	422	if ( nLen != wxCONV_FAILED )
f5fb6871	423	{
483b0434	424	// now do the actual conversion
a2db25a1	425	wxWCharBuffer buf(nLen - 1 /* +1 added implicitly */);
eec47cc6	426
483b0434	427	// +1 for the trailing NULL
a2db25a1	428	if ( ToWChar(buf.data(), nLen, psz) != wxCONV_FAILED )
483b0434	429	return buf;
f5fb6871	430	}
483b0434	431	}
e4e3bbb4	432
483b0434 VZ	433	return wxWCharBuffer();
483b0434 VZ	434	}
3698ae71	435
483b0434 VZ	436	const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
	437	{
	438	if ( pwz )
	439	{
a2db25a1	440	const size_t nLen = FromWChar(NULL, 0, pwz);
467e0479	441	if ( nLen != wxCONV_FAILED )
483b0434	442	{
a2db25a1 VZ	443	wxCharBuffer buf(nLen - 1);
a2db25a1 VZ	444	if ( FromWChar(buf.data(), nLen, pwz) != wxCONV_FAILED )
483b0434 VZ	445	return buf;
	446	}
	447	}
	448
	449	return wxCharBuffer();
	450	}
e4e3bbb4	451
483b0434	452	const wxWCharBuffer
ef199164	453	wxMBConv::cMB2WC(const char inBuff, size_t inLen, size_t outLen) const
483b0434	454	{
ef199164	455	const size_t dstLen = ToWChar(NULL, 0, inBuff, inLen);
467e0479	456	if ( dstLen != wxCONV_FAILED )
483b0434	457	{
0dd13d21 VZ	458	// notice that we allocate space for dstLen+1 wide characters here
	459	// because we want the buffer to always be NUL-terminated, even if the
	460	// input isn't (as otherwise the caller has no way to know its length)
	461	wxWCharBuffer wbuf(dstLen);
ef199164	462	if ( ToWChar(wbuf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED )
483b0434 VZ	463	{
483b0434 VZ	464	if ( outLen )
467e0479 VZ	465	{
467e0479 VZ	466	*outLen = dstLen;
f6a02087 VZ	467
	468	// we also need to handle NUL-terminated input strings
	469	// specially: for them the output is the length of the string
	470	// excluding the trailing NUL, however if we're asked to
	471	// convert a specific number of characters we return the length
	472	// of the resulting output even if it's NUL-terminated
	473	if ( inLen == wxNO_LEN )
467e0479 VZ	474	(*outLen)--;
	475	}
	476
483b0434 VZ	477	return wbuf;
	478	}
	479	}
	480
	481	if ( outLen )
	482	*outLen = 0;
	483
	484	return wxWCharBuffer();
	485	}
	486
	487	const wxCharBuffer
ef199164	488	wxMBConv::cWC2MB(const wchar_t inBuff, size_t inLen, size_t outLen) const
483b0434	489	{
13d92ad6	490	size_t dstLen = FromWChar(NULL, 0, inBuff, inLen);
467e0479	491	if ( dstLen != wxCONV_FAILED )
483b0434	492	{
0dd13d21 VZ	493	const size_t nulLen = GetMBNulLen();
	494
	495	// as above, ensure that the buffer is always NUL-terminated, even if
	496	// the input is not
	497	wxCharBuffer buf(dstLen + nulLen - 1);
	498	memset(buf.data() + dstLen, 0, nulLen);
ef199164	499	if ( FromWChar(buf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED )
483b0434 VZ	500	{
483b0434 VZ	501	if ( outLen )
467e0479 VZ	502	{
	503	*outLen = dstLen;
	504
f6a02087	505	if ( inLen == wxNO_LEN )
467e0479	506	{
f6a02087 VZ	507	// in this case both input and output are NUL-terminated
f6a02087 VZ	508	// and we're not supposed to count NUL
13d92ad6	509	*outLen -= nulLen;
467e0479 VZ	510	}
467e0479 VZ	511	}
d32a507d	512
483b0434 VZ	513	return buf;
483b0434 VZ	514	}
e4e3bbb4 RN	515	}
e4e3bbb4 RN	516
eec47cc6 VZ	517	if ( outLen )
	518	*outLen = 0;
	519
	520	return wxCharBuffer();
e4e3bbb4 RN	521	}
e4e3bbb4 RN	522
40ac5040 VZ	523	const wxWCharBuffer wxMBConv::cMB2WC(const wxScopedCharBuffer& buf) const
	524	{
	525	const size_t srcLen = buf.length();
	526	if ( srcLen )
	527	{
	528	const size_t dstLen = ToWChar(NULL, 0, buf, srcLen);
	529	if ( dstLen != wxCONV_FAILED )
	530	{
	531	wxWCharBuffer wbuf(dstLen);
	532	wbuf.data()[dstLen] = L'\0';
	533	if ( ToWChar(wbuf.data(), dstLen, buf, srcLen) != wxCONV_FAILED )
	534	return wbuf;
	535	}
	536	}
	537
cfcfada9	538	return wxScopedWCharBuffer::CreateNonOwned(L"", 0);
40ac5040 VZ	539	}
	540
	541	const wxCharBuffer wxMBConv::cWC2MB(const wxScopedWCharBuffer& wbuf) const
	542	{
	543	const size_t srcLen = wbuf.length();
	544	if ( srcLen )
	545	{
	546	const size_t dstLen = FromWChar(NULL, 0, wbuf, srcLen);
	547	if ( dstLen != wxCONV_FAILED )
	548	{
	549	wxCharBuffer buf(dstLen);
	550	buf.data()[dstLen] = '\0';
	551	if ( FromWChar(buf.data(), dstLen, wbuf, srcLen) != wxCONV_FAILED )
	552	return buf;
	553	}
	554	}
	555
cfcfada9	556	return wxScopedCharBuffer::CreateNonOwned("", 0);
40ac5040 VZ	557	}
40ac5040 VZ	558
6001e347	559	// ----------------------------------------------------------------------------
bde4baac	560	// wxMBConvLibc
6001e347 RR	561	// ----------------------------------------------------------------------------
6001e347 RR	562
bde4baac VZ	563	size_t wxMBConvLibc::MB2WC(wchar_t buf, const char psz, size_t n) const
	564	{
	565	return wxMB2WC(buf, psz, n);
	566	}
	567
	568	size_t wxMBConvLibc::WC2MB(char buf, const wchar_t psz, size_t n) const
	569	{
	570	return wxWC2MB(buf, psz, n);
	571	}
e1bfe89e RR	572
e1bfe89e RR	573	// ----------------------------------------------------------------------------
532d575b	574	// wxConvBrokenFileNames
e1bfe89e RR	575	// ----------------------------------------------------------------------------
e1bfe89e RR	576
eec47cc6 VZ	577	#ifdef __UNIX__
eec47cc6 VZ	578
86501081	579	wxConvBrokenFileNames::wxConvBrokenFileNames(const wxString& charset)
ea8ce907	580	{
9a83f860 VZ	581	if ( wxStricmp(charset, wxT("UTF-8")) == 0 \|\|
9a83f860 VZ	582	wxStricmp(charset, wxT("UTF8")) == 0 )
5deedd6e	583	m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_PUA);
845905d5 MW	584	else
845905d5 MW	585	m_conv = new wxCSConv(charset);
ea8ce907 RR	586	}
ea8ce907 RR	587
eec47cc6	588	#endif // __UNIX__
c12b7f79	589
bde4baac	590	// ----------------------------------------------------------------------------
3698ae71	591	// UTF-7
bde4baac	592	// ----------------------------------------------------------------------------
6001e347	593
15f2ee32	594	// Implementation (C) 2004 Fredrik Roubert
9d653e81 VZ	595	//
9d653e81 VZ	596	// Changes to work in streaming mode (C) 2008 Vadim Zeitlin
6001e347	597
15f2ee32 RN	598	//
	599	// BASE64 decoding table
	600	//
	601	static const unsigned char utf7unb64[] =
6001e347	602	{
15f2ee32 RN	603	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
	604	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
	605	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
	606	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
	607	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
	608	0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
	609	0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
	610	0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
	611	0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
	612	0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
	613	0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
	614	0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
	615	0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
	616	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
	617	0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
	618	0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
	619	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
	620	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
	621	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
	622	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
	623	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
	624	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
	625	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
	626	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
	627	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
	628	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
	629	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
	630	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
	631	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
	632	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
	633	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
ccaa848d	634	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
15f2ee32 RN	635	};
15f2ee32 RN	636
9d653e81 VZ	637	size_t wxMBConvUTF7::ToWChar(wchar_t *dst, size_t dstLen,
9d653e81 VZ	638	const char *src, size_t srcLen) const
15f2ee32	639	{
9d653e81	640	DecoderState stateOrig,
852dcba5	641	*statePtr;
9d653e81 VZ	642	if ( srcLen == wxNO_LEN )
	643	{
	644	// convert the entire string, up to and including the trailing NUL
	645	srcLen = strlen(src) + 1;
	646
	647	// when working on the entire strings we don't update nor use the shift
	648	// state from the previous call
	649	statePtr = &stateOrig;
	650	}
	651	else // when working with partial strings we do use the shift state
	652	{
5c33522f	653	statePtr = const_cast<DecoderState *>(&m_stateDecoder);
9d653e81 VZ	654
	655	// also save the old state to be able to rollback to it on error
	656	stateOrig = m_stateDecoder;
	657	}
	658
	659	// but to simplify the code below we use this variable in both cases
	660	DecoderState& state = *statePtr;
	661
	662
	663	// number of characters [which would have been] written to dst [if it were
	664	// not NULL]
15f2ee32 RN	665	size_t len = 0;
15f2ee32 RN	666
9d653e81 VZ	667	const char * const srcEnd = src + srcLen;
	668
	669	while ( (src < srcEnd) && (!dst \|\| (len < dstLen)) )
15f2ee32	670	{
9d653e81 VZ	671	const unsigned char cc = *src++;
	672
	673	if ( state.IsShifted() )
15f2ee32	674	{
9d653e81 VZ	675	const unsigned char dc = utf7unb64[cc];
9d653e81 VZ	676	if ( dc == 0xff )
15f2ee32	677	{
ccaa848d VZ	678	// end of encoded part, check that nothing was left: there can
	679	// be up to 4 bits of 0 padding but nothing else (we also need
	680	// to check isLSB as we count bits modulo 8 while a valid UTF-7
	681	// encoded sequence must contain an integral number of UTF-16
	682	// characters)
	683	if ( state.isLSB \|\| state.bit > 4 \|\|
	684	(state.accum & ((1 << state.bit) - 1)) )
	685	{
	686	if ( !len )
	687	state = stateOrig;
	688
852dcba5	689	return wxCONV_FAILED;
ccaa848d	690	}
852dcba5	691
9d653e81 VZ	692	state.ToDirect();
	693
	694	// re-parse this character normally below unless it's '-' which
	695	// is consumed by the decoder
	696	if ( cc == '-' )
	697	continue;
	698	}
	699	else // valid encoded character
	700	{
	701	// mini base64 decoder: each character is 6 bits
	702	state.bit += 6;
	703	state.accum <<= 6;
	704	state.accum += dc;
	705
	706	if ( state.bit >= 8 )
15f2ee32	707	{
9d653e81 VZ	708	// got the full byte, consume it
	709	state.bit -= 8;
	710	unsigned char b = (state.accum >> state.bit) & 0x00ff;
	711
	712	if ( state.isLSB )
15f2ee32	713	{
9d653e81 VZ	714	// we've got the full word, output it
	715	if ( dst )
	716	*dst++ = (state.msb << 8) \| b;
	717	len++;
	718	state.isLSB = false;
15f2ee32	719	}
9d653e81	720	else // MSB
04a37834	721	{
9d653e81 VZ	722	// just store it while we wait for LSB
	723	state.msb = b;
	724	state.isLSB = true;
04a37834	725	}
15f2ee32 RN	726	}
15f2ee32 RN	727	}
9d653e81	728	}
04a37834	729
9d653e81 VZ	730	if ( state.IsDirect() )
	731	{
	732	// start of an encoded segment?
	733	if ( cc == '+' )
04a37834	734	{
9d653e81 VZ	735	if ( *src == '-' )
	736	{
	737	// just the encoded plus sign, don't switch to shifted mode
	738	if ( dst )
	739	*dst++ = '+';
	740	len++;
	741	src++;
	742	}
ccaa848d VZ	743	else if ( utf7unb64[(unsigned)*src] == 0xff )
	744	{
	745	// empty encoded chunks are not allowed
	746	if ( !len )
	747	state = stateOrig;
	748
	749	return wxCONV_FAILED;
	750	}
	751	else // base-64 encoded chunk follows
9d653e81 VZ	752	{
	753	state.ToShifted();
	754	}
	755	}
	756	else // not '+'
	757	{
	758	// only printable 7 bit ASCII characters (with the exception of
	759	// NUL, TAB, CR and LF) can be used directly
	760	if ( cc >= 0x7f \|\| (cc < ' ' &&
	761	!(cc == '\0' \|\| cc == '\t' \|\| cc == '\r' \|\| cc == '\n')) )
	762	return wxCONV_FAILED;
	763
	764	if ( dst )
	765	*dst++ = cc;
	766	len++;
	767	}
15f2ee32 RN	768	}
15f2ee32 RN	769	}
04a37834	770
9d653e81 VZ	771	if ( !len )
	772	{
	773	// as we didn't read any characters we should be called with the same
	774	// data (followed by some more new data) again later so don't save our
	775	// state
	776	state = stateOrig;
	777
	778	return wxCONV_FAILED;
	779	}
04a37834	780
15f2ee32	781	return len;
6001e347 RR	782	}
6001e347 RR	783
15f2ee32 RN	784	//
	785	// BASE64 encoding table
	786	//
	787	static const unsigned char utf7enb64[] =
	788	{
	789	'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
	790	'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
	791	'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
	792	'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
	793	'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
	794	'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
	795	'w', 'x', 'y', 'z', '0', '1', '2', '3',
	796	'4', '5', '6', '7', '8', '9', '+', '/'
	797	};
	798
	799	//
	800	// UTF-7 encoding table
	801	//
	802	// 0 - Set D (directly encoded characters)
	803	// 1 - Set O (optional direct characters)
	804	// 2 - whitespace characters (optional)
	805	// 3 - special characters
	806	//
	807	static const unsigned char utf7encode[128] =
6001e347	808	{
9d653e81	809	0, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
15f2ee32 RN	810	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
	811	2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
	812	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
	813	1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	814	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
	815	1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	816	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
	817	};
	818
9d653e81 VZ	819	static inline bool wxIsUTF7Direct(wchar_t wc)
	820	{
	821	return wc < 0x80 && utf7encode[wc] < 1;
	822	}
	823
	824	size_t wxMBConvUTF7::FromWChar(char *dst, size_t dstLen,
	825	const wchar_t *src, size_t srcLen) const
15f2ee32	826	{
9d653e81 VZ	827	EncoderState stateOrig,
	828	*statePtr;
	829	if ( srcLen == wxNO_LEN )
	830	{
	831	// we don't apply the stored state when operating on entire strings at
	832	// once
	833	statePtr = &stateOrig;
	834
	835	srcLen = wxWcslen(src) + 1;
	836	}
	837	else // do use the mode we left the output in previously
	838	{
	839	stateOrig = m_stateEncoder;
5c33522f	840	statePtr = const_cast<EncoderState *>(&m_stateEncoder);
9d653e81 VZ	841	}
	842
	843	EncoderState& state = *statePtr;
	844
	845
15f2ee32 RN	846	size_t len = 0;
15f2ee32 RN	847
9d653e81 VZ	848	const wchar_t * const srcEnd = src + srcLen;
9d653e81 VZ	849	while ( src < srcEnd && (!dst \|\| len < dstLen) )
15f2ee32	850	{
9d653e81 VZ	851	wchar_t cc = *src++;
9d653e81 VZ	852	if ( wxIsUTF7Direct(cc) )
15f2ee32	853	{
9d653e81 VZ	854	if ( state.IsShifted() )
	855	{
	856	// pad with zeros the last encoded block if necessary
	857	if ( state.bit )
	858	{
	859	if ( dst )
	860	*dst++ = utf7enb64[((state.accum % 16) << (6 - state.bit)) % 64];
	861	len++;
	862	}
ef199164	863
9d653e81 VZ	864	state.ToDirect();
	865
	866	if ( dst )
	867	*dst++ = '-';
	868	len++;
	869	}
	870
	871	if ( dst )
	872	*dst++ = (char)cc;
15f2ee32 RN	873	len++;
15f2ee32 RN	874	}
9d653e81 VZ	875	else if ( cc == '+' && state.IsDirect() )
	876	{
	877	if ( dst )
	878	{
	879	*dst++ = '+';
	880	*dst++ = '-';
	881	}
	882
	883	len += 2;
	884	}
15f2ee32	885	#ifndef WC_UTF16
79c78d42	886	else if (((wxUint32)cc) > 0xffff)
b2c13097	887	{
15f2ee32	888	// no surrogate pair generation (yet?)
467e0479	889	return wxCONV_FAILED;
15f2ee32 RN	890	}
	891	#endif
	892	else
	893	{
9d653e81 VZ	894	if ( state.IsDirect() )
	895	{
	896	state.ToShifted();
ef199164	897
9d653e81 VZ	898	if ( dst )
	899	*dst++ = '+';
	900	len++;
	901	}
	902
	903	// BASE64 encode string
	904	for ( ;; )
15f2ee32	905	{
9d653e81	906	for ( unsigned lsb = 0; lsb < 2; lsb++ )
15f2ee32	907	{
9d653e81 VZ	908	state.accum <<= 8;
	909	state.accum += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
	910
	911	for (state.bit += 8; state.bit >= 6; )
15f2ee32	912	{
9d653e81 VZ	913	state.bit -= 6;
	914	if ( dst )
	915	*dst++ = utf7enb64[(state.accum >> state.bit) % 64];
	916	len++;
15f2ee32	917	}
15f2ee32	918	}
ef199164	919
9d653e81 VZ	920	if ( src == srcEnd \|\| wxIsUTF7Direct(cc = *src) )
9d653e81 VZ	921	break;
ef199164	922
9d653e81	923	src++;
15f2ee32	924	}
15f2ee32 RN	925	}
15f2ee32 RN	926	}
ef199164	927
9d653e81 VZ	928	// we need to restore the original encoder state if we were called just to
	929	// calculate the amount of space needed as we will presumably be called
	930	// again to really convert the data now
	931	if ( !dst )
	932	state = stateOrig;
ef199164	933
15f2ee32	934	return len;
6001e347 RR	935	}
6001e347 RR	936
f6bcfd97	937	// ----------------------------------------------------------------------------
6001e347	938	// UTF-8
f6bcfd97	939	// ----------------------------------------------------------------------------
6001e347	940
1774c3c5	941	static const wxUint32 utf8_max[]=
4def3b35	942	{ 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
6001e347	943
3698ae71 VZ	944	// boundaries of the private use area we use to (temporarily) remap invalid
3698ae71 VZ	945	// characters invalid in a UTF-8 encoded string
ea8ce907 RR	946	const wxUint32 wxUnicodePUA = 0x100000;
	947	const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
	948
0286d08d	949	// this table gives the length of the UTF-8 encoding from its first character:
1774c3c5	950	const unsigned char tableUtf8Lengths[256] = {
0286d08d VZ	951	// single-byte sequences (ASCII):
	952	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 00..0F
	953	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 10..1F
	954	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 20..2F
	955	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 30..3F
	956	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 40..4F
	957	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 50..5F
	958	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 60..6F
	959	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 70..7F
	960
	961	// these are invalid:
	962	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80..8F
	963	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 90..9F
	964	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // A0..AF
	965	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // B0..BF
	966	0, 0, // C0,C1
	967
	968	// two-byte sequences:
	969	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // C2..CF
	970	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // D0..DF
	971
	972	// three-byte sequences:
	973	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // E0..EF
	974
	975	// four-byte sequences:
	976	4, 4, 4, 4, 4, // F0..F4
	977
	978	// these are invalid again (5- or 6-byte
	979	// sequences and sequences for code points
	980	// above U+10FFFF, as restricted by RFC 3629):
	981	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // F5..FF
	982	};
	983
	984	size_t
	985	wxMBConvStrictUTF8::ToWChar(wchar_t *dst, size_t dstLen,
	986	const char *src, size_t srcLen) const
	987	{
	988	wchar_t *out = dstLen ? dst : NULL;
	989	size_t written = 0;
	990
	991	if ( srcLen == wxNO_LEN )
	992	srcLen = strlen(src) + 1;
	993
	994	for ( const char *p = src; ; p++ )
	995	{
0dcbb107	996	if ( (srcLen == wxNO_LEN ? !*p : !srcLen) )
0286d08d VZ	997	{
	998	// all done successfully, just add the trailing NULL if we are not
	999	// using explicit length
	1000	if ( srcLen == wxNO_LEN )
	1001	{
	1002	if ( out )
	1003	{
	1004	if ( !dstLen )
	1005	break;
	1006
	1007	*out = L'\0';
	1008	}
	1009
	1010	written++;
	1011	}
	1012
	1013	return written;
	1014	}
	1015
0286d08d VZ	1016	if ( out && !dstLen-- )
	1017	break;
	1018
5367a38a VS	1019	wxUint32 code;
5367a38a VS	1020	unsigned char c = *p;
0286d08d	1021
5367a38a VS	1022	if ( c < 0x80 )
	1023	{
	1024	if ( srcLen == 0 ) // the test works for wxNO_LEN too
	1025	break;
0286d08d	1026
5367a38a VS	1027	if ( srcLen != wxNO_LEN )
5367a38a VS	1028	srcLen--;
0286d08d	1029
5367a38a VS	1030	code = c;
	1031	}
	1032	else
0286d08d	1033	{
5367a38a VS	1034	unsigned len = tableUtf8Lengths[c];
	1035	if ( !len )
	1036	break;
	1037
	1038	if ( srcLen < len ) // the test works for wxNO_LEN too
	1039	break;
	1040
	1041	if ( srcLen != wxNO_LEN )
	1042	srcLen -= len;
	1043
	1044	// Char. number range \| UTF-8 octet sequence
	1045	// (hexadecimal) \| (binary)
	1046	// ----------------------+----------------------------------------
	1047	// 0000 0000 - 0000 007F \| 0xxxxxxx
	1048	// 0000 0080 - 0000 07FF \| 110xxxxx 10xxxxxx
	1049	// 0000 0800 - 0000 FFFF \| 1110xxxx 10xxxxxx 10xxxxxx
	1050	// 0001 0000 - 0010 FFFF \| 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
	1051	//
	1052	// Code point value is stored in bits marked with 'x',
	1053	// lowest-order bit of the value on the right side in the diagram
	1054	// above. (from RFC 3629)
	1055
	1056	// mask to extract lead byte's value ('x' bits above), by sequence
	1057	// length:
	1058	static const unsigned char leadValueMask[] = { 0x7F, 0x1F, 0x0F, 0x07 };
	1059
	1060	// mask and value of lead byte's most significant bits, by length:
	1061	static const unsigned char leadMarkerMask[] = { 0x80, 0xE0, 0xF0, 0xF8 };
	1062	static const unsigned char leadMarkerVal[] = { 0x00, 0xC0, 0xE0, 0xF0 };
	1063
	1064	len--; // it's more convenient to work with 0-based length here
	1065
	1066	// extract the lead byte's value bits:
	1067	if ( (c & leadMarkerMask[len]) != leadMarkerVal[len] )
	1068	break;
	1069
	1070	code = c & leadValueMask[len];
	1071
	1072	// all remaining bytes, if any, are handled in the same way
	1073	// regardless of sequence's length:
	1074	for ( ; len; --len )
	1075	{
	1076	c = *++p;
	1077	if ( (c & 0xC0) != 0x80 )
	1078	return wxCONV_FAILED;
0286d08d	1079
5367a38a VS	1080	code <<= 6;
	1081	code \|= c & 0x3F;
	1082	}
0286d08d VZ	1083	}
	1084
	1085	#ifdef WC_UTF16
	1086	// cast is ok because wchar_t == wxUint16 if WC_UTF16
	1087	if ( encode_utf16(code, (wxUint16 *)out) == 2 )
	1088	{
	1089	if ( out )
	1090	out++;
	1091	written++;
	1092	}
	1093	#else // !WC_UTF16
	1094	if ( out )
	1095	*out = code;
	1096	#endif // WC_UTF16/!WC_UTF16
	1097
	1098	if ( out )
	1099	out++;
	1100
	1101	written++;
	1102	}
	1103
	1104	return wxCONV_FAILED;
	1105	}
	1106
	1107	size_t
	1108	wxMBConvStrictUTF8::FromWChar(char *dst, size_t dstLen,
	1109	const wchar_t *src, size_t srcLen) const
	1110	{
	1111	char *out = dstLen ? dst : NULL;
	1112	size_t written = 0;
	1113
	1114	for ( const wchar_t *wp = src; ; wp++ )
	1115	{
0dcbb107	1116	if ( (srcLen == wxNO_LEN ? !*wp : !srcLen) )
0286d08d VZ	1117	{
	1118	// all done successfully, just add the trailing NULL if we are not
	1119	// using explicit length
	1120	if ( srcLen == wxNO_LEN )
	1121	{
	1122	if ( out )
	1123	{
	1124	if ( !dstLen )
	1125	break;
	1126
	1127	*out = '\0';
	1128	}
	1129
	1130	written++;
	1131	}
	1132
	1133	return written;
	1134	}
	1135
a964d3ed VZ	1136	if ( srcLen != wxNO_LEN )
a964d3ed VZ	1137	srcLen--;
0286d08d VZ	1138
	1139	wxUint32 code;
	1140	#ifdef WC_UTF16
	1141	// cast is ok for WC_UTF16
	1142	if ( decode_utf16((const wxUint16 *)wp, code) == 2 )
	1143	{
	1144	// skip the next char too as we decoded a surrogate
	1145	wp++;
041e6050 VZ	1146	if ( srcLen != wxNO_LEN )
041e6050 VZ	1147	srcLen--;
0286d08d VZ	1148	}
	1149	#else // wchar_t is UTF-32
	1150	code = *wp & 0x7fffffff;
	1151	#endif
	1152
	1153	unsigned len;
	1154	if ( code <= 0x7F )
	1155	{
	1156	len = 1;
	1157	if ( out )
	1158	{
	1159	if ( dstLen < len )
	1160	break;
	1161
	1162	out[0] = (char)code;
	1163	}
	1164	}
	1165	else if ( code <= 0x07FF )
	1166	{
	1167	len = 2;
	1168	if ( out )
	1169	{
	1170	if ( dstLen < len )
	1171	break;
	1172
	1173	// NB: this line takes 6 least significant bits, encodes them as
	1174	// 10xxxxxx and discards them so that the next byte can be encoded:
	1175	out[1] = 0x80 \| (code & 0x3F); code >>= 6;
	1176	out[0] = 0xC0 \| code;
	1177	}
	1178	}
	1179	else if ( code < 0xFFFF )
	1180	{
	1181	len = 3;
	1182	if ( out )
	1183	{
	1184	if ( dstLen < len )
	1185	break;
	1186
	1187	out[2] = 0x80 \| (code & 0x3F); code >>= 6;
	1188	out[1] = 0x80 \| (code & 0x3F); code >>= 6;
	1189	out[0] = 0xE0 \| code;
	1190	}
	1191	}
	1192	else if ( code <= 0x10FFFF )
	1193	{
	1194	len = 4;
	1195	if ( out )
	1196	{
	1197	if ( dstLen < len )
	1198	break;
	1199
	1200	out[3] = 0x80 \| (code & 0x3F); code >>= 6;
	1201	out[2] = 0x80 \| (code & 0x3F); code >>= 6;
	1202	out[1] = 0x80 \| (code & 0x3F); code >>= 6;
	1203	out[0] = 0xF0 \| code;
	1204	}
	1205	}
	1206	else
	1207	{
9a83f860	1208	wxFAIL_MSG( wxT("trying to encode undefined Unicode character") );
0286d08d VZ	1209	break;
	1210	}
	1211
	1212	if ( out )
	1213	{
	1214	out += len;
	1215	dstLen -= len;
	1216	}
	1217
	1218	written += len;
	1219	}
	1220
	1221	// we only get here if an error occurs during decoding
	1222	return wxCONV_FAILED;
	1223	}
	1224
d16d0917 VZ	1225	size_t wxMBConvUTF8::ToWChar(wchar_t *buf, size_t n,
d16d0917 VZ	1226	const char *psz, size_t srcLen) const
6001e347	1227	{
0286d08d	1228	if ( m_options == MAP_INVALID_UTF8_NOT )
d16d0917	1229	return wxMBConvStrictUTF8::ToWChar(buf, n, psz, srcLen);
0286d08d	1230
4def3b35 VS	1231	size_t len = 0;
4def3b35 VS	1232
f4cb7c58 VZ	1233	// The length can be either given explicitly or computed implicitly for the
	1234	// NUL-terminated strings.
	1235	const bool isNulTerminated = srcLen == wxNO_LEN;
	1236	while ((isNulTerminated ? *psz : srcLen--) && ((!buf) \|\| (len < n)))
4def3b35	1237	{
ea8ce907 RR	1238	const char *opsz = psz;
ea8ce907 RR	1239	bool invalid = false;
4def3b35 VS	1240	unsigned char cc = *psz++, fc = cc;
4def3b35 VS	1241	unsigned cnt;
dccce9ea	1242	for (cnt = 0; fc & 0x80; cnt++)
4def3b35	1243	fc <<= 1;
ef199164	1244
dccce9ea	1245	if (!cnt)
4def3b35 VS	1246	{
4def3b35 VS	1247	// plain ASCII char
dccce9ea	1248	if (buf)
4def3b35 VS	1249	*buf++ = cc;
4def3b35 VS	1250	len++;
561488ef MW	1251
	1252	// escape the escape character for octal escapes
	1253	if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
	1254	&& cc == '\\' && (!buf \|\| len < n))
	1255	{
	1256	if (buf)
	1257	*buf++ = cc;
	1258	len++;
	1259	}
dccce9ea VZ	1260	}
dccce9ea VZ	1261	else
4def3b35 VS	1262	{
4def3b35 VS	1263	cnt--;
dccce9ea	1264	if (!cnt)
4def3b35 VS	1265	{
4def3b35 VS	1266	// invalid UTF-8 sequence
ea8ce907	1267	invalid = true;
dccce9ea VZ	1268	}
dccce9ea VZ	1269	else
4def3b35 VS	1270	{
	1271	unsigned ocnt = cnt - 1;
	1272	wxUint32 res = cc & (0x3f >> cnt);
dccce9ea	1273	while (cnt--)
4def3b35	1274	{
ea8ce907	1275	cc = *psz;
dccce9ea	1276	if ((cc & 0xC0) != 0x80)
4def3b35 VS	1277	{
4def3b35 VS	1278	// invalid UTF-8 sequence
ea8ce907 RR	1279	invalid = true;
ea8ce907 RR	1280	break;
4def3b35	1281	}
ef199164	1282
ea8ce907	1283	psz++;
4def3b35 VS	1284	res = (res << 6) \| (cc & 0x3f);
4def3b35 VS	1285	}
ef199164	1286
ea8ce907	1287	if (invalid \|\| res <= utf8_max[ocnt])
4def3b35 VS	1288	{
4def3b35 VS	1289	// illegal UTF-8 encoding
ea8ce907	1290	invalid = true;
4def3b35	1291	}
ea8ce907 RR	1292	else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
	1293	res >= wxUnicodePUA && res < wxUnicodePUAEnd)
	1294	{
	1295	// if one of our PUA characters turns up externally
	1296	// it must also be treated as an illegal sequence
	1297	// (a bit like you have to escape an escape character)
	1298	invalid = true;
	1299	}
	1300	else
	1301	{
1cd52418	1302	#ifdef WC_UTF16
0286d08d	1303	// cast is ok because wchar_t == wxUint16 if WC_UTF16
ea8ce907	1304	size_t pa = encode_utf16(res, (wxUint16 *)buf);
467e0479	1305	if (pa == wxCONV_FAILED)
ea8ce907 RR	1306	{
	1307	invalid = true;
	1308	}
	1309	else
	1310	{
	1311	if (buf)
	1312	buf += pa;
	1313	len += pa;
	1314	}
373658eb	1315	#else // !WC_UTF16
ea8ce907	1316	if (buf)
38d4b1e4	1317	*buf++ = (wchar_t)res;
ea8ce907	1318	len++;
373658eb	1319	#endif // WC_UTF16/!WC_UTF16
ea8ce907 RR	1320	}
ea8ce907 RR	1321	}
ef199164	1322
ea8ce907 RR	1323	if (invalid)
	1324	{
	1325	if (m_options & MAP_INVALID_UTF8_TO_PUA)
	1326	{
	1327	while (opsz < psz && (!buf \|\| len < n))
	1328	{
	1329	#ifdef WC_UTF16
	1330	// cast is ok because wchar_t == wxUuint16 if WC_UTF16
	1331	size_t pa = encode_utf16((unsigned char)opsz + wxUnicodePUA, (wxUint16 )buf);
467e0479	1332	wxASSERT(pa != wxCONV_FAILED);
ea8ce907 RR	1333	if (buf)
	1334	buf += pa;
	1335	opsz++;
	1336	len += pa;
	1337	#else
	1338	if (buf)
38d4b1e4	1339	buf++ = (wchar_t)(wxUnicodePUA + (unsigned char)opsz);
ea8ce907 RR	1340	opsz++;
	1341	len++;
	1342	#endif
	1343	}
	1344	}
3698ae71	1345	else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
ea8ce907 RR	1346	{
	1347	while (opsz < psz && (!buf \|\| len < n))
	1348	{
3698ae71 VZ	1349	if ( buf && len + 3 < n )
3698ae71 VZ	1350	{
17a1ebd1	1351	unsigned char on = *opsz;
3698ae71	1352	*buf++ = L'\\';
17a1ebd1 VZ	1353	*buf++ = (wchar_t)( L'0' + on / 0100 );
	1354	*buf++ = (wchar_t)( L'0' + (on % 0100) / 010 );
	1355	*buf++ = (wchar_t)( L'0' + on % 010 );
3698ae71	1356	}
ef199164	1357
ea8ce907 RR	1358	opsz++;
	1359	len += 4;
	1360	}
	1361	}
3698ae71	1362	else // MAP_INVALID_UTF8_NOT
ea8ce907	1363	{
467e0479	1364	return wxCONV_FAILED;
ea8ce907	1365	}
4def3b35 VS	1366	}
4def3b35 VS	1367	}
6001e347	1368	}
ef199164	1369
f4cb7c58 VZ	1370	if ( isNulTerminated )
	1371	{
	1372	// Add the trailing NUL in this case if we have a large enough buffer.
	1373	if ( buf && (len < n) )
	1374	*buf = 0;
ef199164	1375
f4cb7c58 VZ	1376	// And count it in any case.
	1377	len++;
	1378	}
	1379
	1380	return len;
6001e347 RR	1381	}
6001e347 RR	1382
3698ae71 VZ	1383	static inline bool isoctal(wchar_t wch)
	1384	{
	1385	return L'0' <= wch && wch <= L'7';
	1386	}
	1387
d16d0917 VZ	1388	size_t wxMBConvUTF8::FromWChar(char *buf, size_t n,
d16d0917 VZ	1389	const wchar_t *psz, size_t srcLen) const
6001e347	1390	{
0286d08d	1391	if ( m_options == MAP_INVALID_UTF8_NOT )
d16d0917	1392	return wxMBConvStrictUTF8::FromWChar(buf, n, psz, srcLen);
0286d08d	1393
4def3b35	1394	size_t len = 0;
6001e347	1395
2ba61518 VZ	1396	// The length can be either given explicitly or computed implicitly for the
	1397	// NUL-terminated strings.
	1398	const bool isNulTerminated = srcLen == wxNO_LEN;
	1399	while ((isNulTerminated ? *psz : srcLen--) && ((!buf) \|\| (len < n)))
4def3b35 VS	1400	{
4def3b35 VS	1401	wxUint32 cc;
ef199164	1402
1cd52418	1403	#ifdef WC_UTF16
b5153fd8 VZ	1404	// cast is ok for WC_UTF16
b5153fd8 VZ	1405	size_t pa = decode_utf16((const wxUint16 *)psz, cc);
467e0479	1406	psz += (pa == wxCONV_FAILED) ? 1 : pa;
1cd52418	1407	#else
ef199164	1408	cc = (*psz++) & 0x7fffffff;
4def3b35	1409	#endif
3698ae71 VZ	1410
	1411	if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
	1412	&& cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
4def3b35	1413	{
dccce9ea	1414	if (buf)
ea8ce907	1415	*buf++ = (char)(cc - wxUnicodePUA);
4def3b35	1416	len++;
3698ae71	1417	}
561488ef MW	1418	else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
	1419	&& cc == L'\\' && psz[0] == L'\\' )
	1420	{
	1421	if (buf)
	1422	*buf++ = (char)cc;
	1423	psz++;
	1424	len++;
	1425	}
3698ae71 VZ	1426	else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
	1427	cc == L'\\' &&
	1428	isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
4def3b35	1429	{
dccce9ea	1430	if (buf)
3698ae71	1431	{
ef199164 DS	1432	buf++ = (char) ((psz[0] - L'0') 0100 +
ef199164 DS	1433	(psz[1] - L'0') * 010 +
b2c13097	1434	(psz[2] - L'0'));
3698ae71 VZ	1435	}
	1436
	1437	psz += 3;
ea8ce907 RR	1438	len++;
	1439	}
	1440	else
	1441	{
	1442	unsigned cnt;
ef199164 DS	1443	for (cnt = 0; cc > utf8_max[cnt]; cnt++)
	1444	{
	1445	}
	1446
ea8ce907	1447	if (!cnt)
4def3b35	1448	{
ea8ce907 RR	1449	// plain ASCII char
	1450	if (buf)
	1451	*buf++ = (char) cc;
	1452	len++;
	1453	}
ea8ce907 RR	1454	else
	1455	{
	1456	len += cnt + 1;
	1457	if (buf)
	1458	{
	1459	buf++ = (char) ((-128 >> cnt) \| ((cc >> (cnt 6)) & (0x3f >> cnt)));
	1460	while (cnt--)
	1461	buf++ = (char) (0x80 \| ((cc >> (cnt 6)) & 0x3f));
	1462	}
4def3b35 VS	1463	}
4def3b35 VS	1464	}
6001e347	1465	}
4def3b35	1466
2ba61518 VZ	1467	if ( isNulTerminated )
	1468	{
	1469	// Add the trailing NUL in this case if we have a large enough buffer.
	1470	if ( buf && (len < n) )
	1471	*buf = 0;
	1472
	1473	// And count it in any case.
	1474	len++;
	1475	}
adb45366	1476
2ba61518	1477	return len;
6001e347 RR	1478	}
6001e347 RR	1479
467e0479	1480	// ============================================================================
c91830cb	1481	// UTF-16
467e0479	1482	// ============================================================================
c91830cb VZ	1483
c91830cb VZ	1484	#ifdef WORDS_BIGENDIAN
bde4baac VZ	1485	#define wxMBConvUTF16straight wxMBConvUTF16BE
bde4baac VZ	1486	#define wxMBConvUTF16swap wxMBConvUTF16LE
c91830cb	1487	#else
bde4baac VZ	1488	#define wxMBConvUTF16swap wxMBConvUTF16BE
bde4baac VZ	1489	#define wxMBConvUTF16straight wxMBConvUTF16LE
c91830cb VZ	1490	#endif
c91830cb VZ	1491
467e0479 VZ	1492	/* static */
	1493	size_t wxMBConvUTF16Base::GetLength(const char *src, size_t srcLen)
	1494	{
	1495	if ( srcLen == wxNO_LEN )
	1496	{
	1497	// count the number of bytes in input, including the trailing NULs
5c33522f	1498	const wxUint16 inBuff = reinterpret_cast<const wxUint16 >(src);
ef199164	1499	for ( srcLen = 1; *inBuff++; srcLen++ )
467e0479	1500	;
c91830cb	1501
467e0479 VZ	1502	srcLen *= BYTES_PER_CHAR;
	1503	}
	1504	else // we already have the length
	1505	{
	1506	// we can only convert an entire number of UTF-16 characters
	1507	if ( srcLen % BYTES_PER_CHAR )
	1508	return wxCONV_FAILED;
	1509	}
	1510
	1511	return srcLen;
	1512	}
	1513
	1514	// case when in-memory representation is UTF-16 too
c91830cb VZ	1515	#ifdef WC_UTF16
c91830cb VZ	1516
467e0479 VZ	1517	// ----------------------------------------------------------------------------
	1518	// conversions without endianness change
	1519	// ----------------------------------------------------------------------------
	1520
	1521	size_t
	1522	wxMBConvUTF16straight::ToWChar(wchar_t *dst, size_t dstLen,
	1523	const char *src, size_t srcLen) const
c91830cb	1524	{
467e0479 VZ	1525	// set up the scene for using memcpy() (which is presumably more efficient
	1526	// than copying the bytes one by one)
	1527	srcLen = GetLength(src, srcLen);
	1528	if ( srcLen == wxNO_LEN )
	1529	return wxCONV_FAILED;
c91830cb	1530
ef199164	1531	const size_t inLen = srcLen / BYTES_PER_CHAR;
467e0479	1532	if ( dst )
c91830cb	1533	{
467e0479 VZ	1534	if ( dstLen < inLen )
467e0479 VZ	1535	return wxCONV_FAILED;
c91830cb	1536
467e0479	1537	memcpy(dst, src, srcLen);
c91830cb	1538	}
d32a507d	1539
467e0479	1540	return inLen;
c91830cb VZ	1541	}
c91830cb VZ	1542
467e0479 VZ	1543	size_t
	1544	wxMBConvUTF16straight::FromWChar(char *dst, size_t dstLen,
	1545	const wchar_t *src, size_t srcLen) const
c91830cb	1546	{
467e0479 VZ	1547	if ( srcLen == wxNO_LEN )
467e0479 VZ	1548	srcLen = wxWcslen(src) + 1;
c91830cb	1549
467e0479 VZ	1550	srcLen *= BYTES_PER_CHAR;
	1551
	1552	if ( dst )
c91830cb	1553	{
467e0479 VZ	1554	if ( dstLen < srcLen )
467e0479 VZ	1555	return wxCONV_FAILED;
d32a507d	1556
467e0479	1557	memcpy(dst, src, srcLen);
c91830cb	1558	}
d32a507d	1559
467e0479	1560	return srcLen;
c91830cb VZ	1561	}
c91830cb VZ	1562
467e0479 VZ	1563	// ----------------------------------------------------------------------------
	1564	// endian-reversing conversions
	1565	// ----------------------------------------------------------------------------
c91830cb	1566
467e0479 VZ	1567	size_t
	1568	wxMBConvUTF16swap::ToWChar(wchar_t *dst, size_t dstLen,
	1569	const char *src, size_t srcLen) const
c91830cb	1570	{
467e0479 VZ	1571	srcLen = GetLength(src, srcLen);
	1572	if ( srcLen == wxNO_LEN )
	1573	return wxCONV_FAILED;
c91830cb	1574
467e0479 VZ	1575	srcLen /= BYTES_PER_CHAR;
	1576
	1577	if ( dst )
c91830cb	1578	{
467e0479 VZ	1579	if ( dstLen < srcLen )
	1580	return wxCONV_FAILED;
	1581
5c33522f	1582	const wxUint16 inBuff = reinterpret_cast<const wxUint16 >(src);
ef199164	1583	for ( size_t n = 0; n < srcLen; n++, inBuff++ )
c91830cb	1584	{
ef199164	1585	dst++ = wxUINT16_SWAP_ALWAYS(inBuff);
c91830cb	1586	}
c91830cb	1587	}
bfab25d4	1588
467e0479	1589	return srcLen;
c91830cb VZ	1590	}
c91830cb VZ	1591
467e0479 VZ	1592	size_t
	1593	wxMBConvUTF16swap::FromWChar(char *dst, size_t dstLen,
	1594	const wchar_t *src, size_t srcLen) const
c91830cb	1595	{
467e0479 VZ	1596	if ( srcLen == wxNO_LEN )
467e0479 VZ	1597	srcLen = wxWcslen(src) + 1;
c91830cb	1598
467e0479 VZ	1599	srcLen *= BYTES_PER_CHAR;
	1600
	1601	if ( dst )
c91830cb	1602	{
467e0479 VZ	1603	if ( dstLen < srcLen )
	1604	return wxCONV_FAILED;
	1605
5c33522f	1606	wxUint16 outBuff = reinterpret_cast<wxUint16 >(dst);
467e0479	1607	for ( size_t n = 0; n < srcLen; n += BYTES_PER_CHAR, src++ )
c91830cb	1608	{
ef199164	1609	outBuff++ = wxUINT16_SWAP_ALWAYS(src);
c91830cb	1610	}
c91830cb	1611	}
eec47cc6	1612
467e0479	1613	return srcLen;
c91830cb VZ	1614	}
c91830cb VZ	1615
467e0479	1616	#else // !WC_UTF16: wchar_t is UTF-32
c91830cb	1617
467e0479 VZ	1618	// ----------------------------------------------------------------------------
	1619	// conversions without endianness change
	1620	// ----------------------------------------------------------------------------
c91830cb	1621
35d11700 VZ	1622	size_t
	1623	wxMBConvUTF16straight::ToWChar(wchar_t *dst, size_t dstLen,
	1624	const char *src, size_t srcLen) const
c91830cb	1625	{
35d11700 VZ	1626	srcLen = GetLength(src, srcLen);
	1627	if ( srcLen == wxNO_LEN )
	1628	return wxCONV_FAILED;
c91830cb	1629
ef199164	1630	const size_t inLen = srcLen / BYTES_PER_CHAR;
35d11700	1631	if ( !dst )
c91830cb	1632	{
35d11700 VZ	1633	// optimization: return maximal space which could be needed for this
	1634	// string even if the real size could be smaller if the buffer contains
	1635	// any surrogates
	1636	return inLen;
c91830cb	1637	}
c91830cb	1638
35d11700	1639	size_t outLen = 0;
5c33522f	1640	const wxUint16 inBuff = reinterpret_cast<const wxUint16 >(src);
ef199164	1641	for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; )
35d11700	1642	{
ef199164 DS	1643	const wxUint32 ch = wxDecodeSurrogate(&inBuff);
ef199164 DS	1644	if ( !inBuff )
35d11700 VZ	1645	return wxCONV_FAILED;
	1646
	1647	if ( ++outLen > dstLen )
	1648	return wxCONV_FAILED;
c91830cb	1649
35d11700 VZ	1650	*dst++ = ch;
	1651	}
	1652
	1653
	1654	return outLen;
	1655	}
c91830cb	1656
35d11700 VZ	1657	size_t
	1658	wxMBConvUTF16straight::FromWChar(char *dst, size_t dstLen,
	1659	const wchar_t *src, size_t srcLen) const
c91830cb	1660	{
35d11700 VZ	1661	if ( srcLen == wxNO_LEN )
35d11700 VZ	1662	srcLen = wxWcslen(src) + 1;
c91830cb	1663
35d11700	1664	size_t outLen = 0;
5c33522f	1665	wxUint16 outBuff = reinterpret_cast<wxUint16 >(dst);
35d11700	1666	for ( size_t n = 0; n < srcLen; n++ )
c91830cb	1667	{
d883acaa	1668	wxUint16 cc[2] = { 0 };
35d11700 VZ	1669	const size_t numChars = encode_utf16(*src++, cc);
	1670	if ( numChars == wxCONV_FAILED )
	1671	return wxCONV_FAILED;
c91830cb	1672
ef199164 DS	1673	outLen += numChars * BYTES_PER_CHAR;
ef199164 DS	1674	if ( outBuff )
c91830cb	1675	{
35d11700 VZ	1676	if ( outLen > dstLen )
	1677	return wxCONV_FAILED;
	1678
ef199164	1679	*outBuff++ = cc[0];
35d11700	1680	if ( numChars == 2 )
69b80d28	1681	{
35d11700	1682	// second character of a surrogate
ef199164	1683	*outBuff++ = cc[1];
69b80d28	1684	}
c91830cb	1685	}
c91830cb	1686	}
c91830cb	1687
35d11700	1688	return outLen;
c91830cb VZ	1689	}
c91830cb VZ	1690
467e0479 VZ	1691	// ----------------------------------------------------------------------------
	1692	// endian-reversing conversions
	1693	// ----------------------------------------------------------------------------
c91830cb	1694
35d11700 VZ	1695	size_t
	1696	wxMBConvUTF16swap::ToWChar(wchar_t *dst, size_t dstLen,
	1697	const char *src, size_t srcLen) const
c91830cb	1698	{
35d11700 VZ	1699	srcLen = GetLength(src, srcLen);
	1700	if ( srcLen == wxNO_LEN )
	1701	return wxCONV_FAILED;
	1702
ef199164	1703	const size_t inLen = srcLen / BYTES_PER_CHAR;
35d11700 VZ	1704	if ( !dst )
	1705	{
	1706	// optimization: return maximal space which could be needed for this
	1707	// string even if the real size could be smaller if the buffer contains
	1708	// any surrogates
	1709	return inLen;
	1710	}
c91830cb	1711
35d11700	1712	size_t outLen = 0;
5c33522f	1713	const wxUint16 inBuff = reinterpret_cast<const wxUint16 >(src);
ef199164	1714	for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; )
c91830cb	1715	{
35d11700 VZ	1716	wxUint32 ch;
35d11700 VZ	1717	wxUint16 tmp[2];
ef199164 DS	1718
	1719	tmp[0] = wxUINT16_SWAP_ALWAYS(*inBuff);
	1720	inBuff++;
	1721	tmp[1] = wxUINT16_SWAP_ALWAYS(*inBuff);
c91830cb	1722
35d11700 VZ	1723	const size_t numChars = decode_utf16(tmp, ch);
	1724	if ( numChars == wxCONV_FAILED )
	1725	return wxCONV_FAILED;
c91830cb	1726
35d11700	1727	if ( numChars == 2 )
ef199164	1728	inBuff++;
35d11700 VZ	1729
	1730	if ( ++outLen > dstLen )
	1731	return wxCONV_FAILED;
c91830cb	1732
35d11700	1733	*dst++ = ch;
c91830cb	1734	}
c91830cb	1735
c91830cb	1736
35d11700 VZ	1737	return outLen;
35d11700 VZ	1738	}
c91830cb	1739
35d11700 VZ	1740	size_t
	1741	wxMBConvUTF16swap::FromWChar(char *dst, size_t dstLen,
	1742	const wchar_t *src, size_t srcLen) const
c91830cb	1743	{
35d11700 VZ	1744	if ( srcLen == wxNO_LEN )
35d11700 VZ	1745	srcLen = wxWcslen(src) + 1;
c91830cb	1746
35d11700	1747	size_t outLen = 0;
5c33522f	1748	wxUint16 outBuff = reinterpret_cast<wxUint16 >(dst);
35d11700	1749	for ( const wchar_t *srcEnd = src + srcLen; src < srcEnd; src++ )
c91830cb	1750	{
d883acaa	1751	wxUint16 cc[2] = { 0 };
35d11700 VZ	1752	const size_t numChars = encode_utf16(*src, cc);
	1753	if ( numChars == wxCONV_FAILED )
	1754	return wxCONV_FAILED;
c91830cb	1755
ef199164 DS	1756	outLen += numChars * BYTES_PER_CHAR;
ef199164 DS	1757	if ( outBuff )
c91830cb	1758	{
35d11700 VZ	1759	if ( outLen > dstLen )
	1760	return wxCONV_FAILED;
	1761
ef199164	1762	*outBuff++ = wxUINT16_SWAP_ALWAYS(cc[0]);
35d11700	1763	if ( numChars == 2 )
c91830cb	1764	{
35d11700	1765	// second character of a surrogate
ef199164	1766	*outBuff++ = wxUINT16_SWAP_ALWAYS(cc[1]);
c91830cb VZ	1767	}
c91830cb VZ	1768	}
c91830cb	1769	}
c91830cb	1770
35d11700	1771	return outLen;
c91830cb VZ	1772	}
c91830cb VZ	1773
467e0479	1774	#endif // WC_UTF16/!WC_UTF16
c91830cb VZ	1775
c91830cb VZ	1776
35d11700	1777	// ============================================================================
c91830cb	1778	// UTF-32
35d11700	1779	// ============================================================================
c91830cb VZ	1780
c91830cb VZ	1781	#ifdef WORDS_BIGENDIAN
467e0479 VZ	1782	#define wxMBConvUTF32straight wxMBConvUTF32BE
467e0479 VZ	1783	#define wxMBConvUTF32swap wxMBConvUTF32LE
c91830cb	1784	#else
467e0479 VZ	1785	#define wxMBConvUTF32swap wxMBConvUTF32BE
467e0479 VZ	1786	#define wxMBConvUTF32straight wxMBConvUTF32LE
c91830cb VZ	1787	#endif
	1788
	1789
	1790	WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
	1791	WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
	1792
467e0479 VZ	1793	/* static */
	1794	size_t wxMBConvUTF32Base::GetLength(const char *src, size_t srcLen)
	1795	{
	1796	if ( srcLen == wxNO_LEN )
	1797	{
	1798	// count the number of bytes in input, including the trailing NULs
5c33522f	1799	const wxUint32 inBuff = reinterpret_cast<const wxUint32 >(src);
ef199164	1800	for ( srcLen = 1; *inBuff++; srcLen++ )
467e0479	1801	;
c91830cb	1802
467e0479 VZ	1803	srcLen *= BYTES_PER_CHAR;
	1804	}
	1805	else // we already have the length
	1806	{
	1807	// we can only convert an entire number of UTF-32 characters
	1808	if ( srcLen % BYTES_PER_CHAR )
	1809	return wxCONV_FAILED;
	1810	}
	1811
	1812	return srcLen;
	1813	}
	1814
	1815	// case when in-memory representation is UTF-16
c91830cb VZ	1816	#ifdef WC_UTF16
c91830cb VZ	1817
467e0479 VZ	1818	// ----------------------------------------------------------------------------
	1819	// conversions without endianness change
	1820	// ----------------------------------------------------------------------------
	1821
	1822	size_t
	1823	wxMBConvUTF32straight::ToWChar(wchar_t *dst, size_t dstLen,
	1824	const char *src, size_t srcLen) const
c91830cb	1825	{
467e0479 VZ	1826	srcLen = GetLength(src, srcLen);
	1827	if ( srcLen == wxNO_LEN )
	1828	return wxCONV_FAILED;
c91830cb	1829
5c33522f	1830	const wxUint32 inBuff = reinterpret_cast<const wxUint32 >(src);
ef199164	1831	const size_t inLen = srcLen / BYTES_PER_CHAR;
467e0479 VZ	1832	size_t outLen = 0;
467e0479 VZ	1833	for ( size_t n = 0; n < inLen; n++ )
c91830cb	1834	{
d883acaa	1835	wxUint16 cc[2] = { 0 };
ef199164	1836	const size_t numChars = encode_utf16(*inBuff++, cc);
467e0479 VZ	1837	if ( numChars == wxCONV_FAILED )
467e0479 VZ	1838	return wxCONV_FAILED;
c91830cb	1839
467e0479 VZ	1840	outLen += numChars;
467e0479 VZ	1841	if ( dst )
c91830cb	1842	{
467e0479 VZ	1843	if ( outLen > dstLen )
467e0479 VZ	1844	return wxCONV_FAILED;
d32a507d	1845
467e0479 VZ	1846	*dst++ = cc[0];
	1847	if ( numChars == 2 )
	1848	{
	1849	// second character of a surrogate
	1850	*dst++ = cc[1];
	1851	}
	1852	}
c91830cb	1853	}
d32a507d	1854
467e0479	1855	return outLen;
c91830cb VZ	1856	}
c91830cb VZ	1857
467e0479 VZ	1858	size_t
	1859	wxMBConvUTF32straight::FromWChar(char *dst, size_t dstLen,
	1860	const wchar_t *src, size_t srcLen) const
c91830cb	1861	{
467e0479 VZ	1862	if ( srcLen == wxNO_LEN )
467e0479 VZ	1863	srcLen = wxWcslen(src) + 1;
c91830cb	1864
467e0479	1865	if ( !dst )
c91830cb	1866	{
467e0479 VZ	1867	// optimization: return maximal space which could be needed for this
	1868	// string instead of the exact amount which could be less if there are
	1869	// any surrogates in the input
	1870	//
	1871	// we consider that surrogates are rare enough to make it worthwhile to
	1872	// avoid running the loop below at the cost of slightly extra memory
	1873	// consumption
ef199164	1874	return srcLen * BYTES_PER_CHAR;
467e0479	1875	}
c91830cb	1876
5c33522f	1877	wxUint32 outBuff = reinterpret_cast<wxUint32 >(dst);
467e0479 VZ	1878	size_t outLen = 0;
	1879	for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; )
	1880	{
	1881	const wxUint32 ch = wxDecodeSurrogate(&src);
	1882	if ( !src )
	1883	return wxCONV_FAILED;
c91830cb	1884
467e0479	1885	outLen += BYTES_PER_CHAR;
d32a507d	1886
467e0479 VZ	1887	if ( outLen > dstLen )
467e0479 VZ	1888	return wxCONV_FAILED;
b5153fd8	1889
ef199164	1890	*outBuff++ = ch;
467e0479	1891	}
c91830cb	1892
467e0479	1893	return outLen;
c91830cb VZ	1894	}
c91830cb VZ	1895
467e0479 VZ	1896	// ----------------------------------------------------------------------------
	1897	// endian-reversing conversions
	1898	// ----------------------------------------------------------------------------
c91830cb	1899
467e0479 VZ	1900	size_t
	1901	wxMBConvUTF32swap::ToWChar(wchar_t *dst, size_t dstLen,
	1902	const char *src, size_t srcLen) const
c91830cb	1903	{
467e0479 VZ	1904	srcLen = GetLength(src, srcLen);
	1905	if ( srcLen == wxNO_LEN )
	1906	return wxCONV_FAILED;
c91830cb	1907
5c33522f	1908	const wxUint32 inBuff = reinterpret_cast<const wxUint32 >(src);
ef199164	1909	const size_t inLen = srcLen / BYTES_PER_CHAR;
467e0479	1910	size_t outLen = 0;
ef199164	1911	for ( size_t n = 0; n < inLen; n++, inBuff++ )
c91830cb	1912	{
d883acaa	1913	wxUint16 cc[2] = { 0 };
ef199164	1914	const size_t numChars = encode_utf16(wxUINT32_SWAP_ALWAYS(*inBuff), cc);
467e0479 VZ	1915	if ( numChars == wxCONV_FAILED )
467e0479 VZ	1916	return wxCONV_FAILED;
c91830cb	1917
467e0479 VZ	1918	outLen += numChars;
467e0479 VZ	1919	if ( dst )
c91830cb	1920	{
467e0479 VZ	1921	if ( outLen > dstLen )
467e0479 VZ	1922	return wxCONV_FAILED;
d32a507d	1923
467e0479 VZ	1924	*dst++ = cc[0];
	1925	if ( numChars == 2 )
	1926	{
	1927	// second character of a surrogate
	1928	*dst++ = cc[1];
	1929	}
	1930	}
c91830cb	1931	}
b5153fd8	1932
467e0479	1933	return outLen;
c91830cb VZ	1934	}
c91830cb VZ	1935
467e0479 VZ	1936	size_t
	1937	wxMBConvUTF32swap::FromWChar(char *dst, size_t dstLen,
	1938	const wchar_t *src, size_t srcLen) const
c91830cb	1939	{
467e0479 VZ	1940	if ( srcLen == wxNO_LEN )
467e0479 VZ	1941	srcLen = wxWcslen(src) + 1;
c91830cb	1942
467e0479	1943	if ( !dst )
c91830cb	1944	{
467e0479 VZ	1945	// optimization: return maximal space which could be needed for this
	1946	// string instead of the exact amount which could be less if there are
	1947	// any surrogates in the input
	1948	//
	1949	// we consider that surrogates are rare enough to make it worthwhile to
	1950	// avoid running the loop below at the cost of slightly extra memory
	1951	// consumption
	1952	return srcLen*BYTES_PER_CHAR;
	1953	}
c91830cb	1954
5c33522f	1955	wxUint32 outBuff = reinterpret_cast<wxUint32 >(dst);
467e0479 VZ	1956	size_t outLen = 0;
	1957	for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; )
	1958	{
	1959	const wxUint32 ch = wxDecodeSurrogate(&src);
	1960	if ( !src )
	1961	return wxCONV_FAILED;
c91830cb	1962
467e0479	1963	outLen += BYTES_PER_CHAR;
d32a507d	1964
467e0479 VZ	1965	if ( outLen > dstLen )
467e0479 VZ	1966	return wxCONV_FAILED;
b5153fd8	1967
ef199164	1968	*outBuff++ = wxUINT32_SWAP_ALWAYS(ch);
467e0479	1969	}
c91830cb	1970
467e0479	1971	return outLen;
c91830cb VZ	1972	}
c91830cb VZ	1973
467e0479	1974	#else // !WC_UTF16: wchar_t is UTF-32
c91830cb	1975
35d11700 VZ	1976	// ----------------------------------------------------------------------------
	1977	// conversions without endianness change
	1978	// ----------------------------------------------------------------------------
	1979
	1980	size_t
	1981	wxMBConvUTF32straight::ToWChar(wchar_t *dst, size_t dstLen,
	1982	const char *src, size_t srcLen) const
c91830cb	1983	{
35d11700 VZ	1984	// use memcpy() as it should be much faster than hand-written loop
	1985	srcLen = GetLength(src, srcLen);
	1986	if ( srcLen == wxNO_LEN )
	1987	return wxCONV_FAILED;
c91830cb	1988
35d11700 VZ	1989	const size_t inLen = srcLen/BYTES_PER_CHAR;
35d11700 VZ	1990	if ( dst )
c91830cb	1991	{
35d11700 VZ	1992	if ( dstLen < inLen )
35d11700 VZ	1993	return wxCONV_FAILED;
b5153fd8	1994
35d11700 VZ	1995	memcpy(dst, src, srcLen);
35d11700 VZ	1996	}
c91830cb	1997
35d11700	1998	return inLen;
c91830cb VZ	1999	}
c91830cb VZ	2000
35d11700 VZ	2001	size_t
	2002	wxMBConvUTF32straight::FromWChar(char *dst, size_t dstLen,
	2003	const wchar_t *src, size_t srcLen) const
c91830cb	2004	{
35d11700 VZ	2005	if ( srcLen == wxNO_LEN )
	2006	srcLen = wxWcslen(src) + 1;
	2007
	2008	srcLen *= BYTES_PER_CHAR;
c91830cb	2009
35d11700	2010	if ( dst )
c91830cb	2011	{
35d11700 VZ	2012	if ( dstLen < srcLen )
35d11700 VZ	2013	return wxCONV_FAILED;
c91830cb	2014
35d11700	2015	memcpy(dst, src, srcLen);
c91830cb VZ	2016	}
c91830cb VZ	2017
35d11700	2018	return srcLen;
c91830cb VZ	2019	}
c91830cb VZ	2020
35d11700 VZ	2021	// ----------------------------------------------------------------------------
	2022	// endian-reversing conversions
	2023	// ----------------------------------------------------------------------------
c91830cb	2024
35d11700 VZ	2025	size_t
	2026	wxMBConvUTF32swap::ToWChar(wchar_t *dst, size_t dstLen,
	2027	const char *src, size_t srcLen) const
c91830cb	2028	{
35d11700 VZ	2029	srcLen = GetLength(src, srcLen);
	2030	if ( srcLen == wxNO_LEN )
	2031	return wxCONV_FAILED;
	2032
	2033	srcLen /= BYTES_PER_CHAR;
c91830cb	2034
35d11700	2035	if ( dst )
c91830cb	2036	{
35d11700 VZ	2037	if ( dstLen < srcLen )
	2038	return wxCONV_FAILED;
	2039
5c33522f	2040	const wxUint32 inBuff = reinterpret_cast<const wxUint32 >(src);
ef199164	2041	for ( size_t n = 0; n < srcLen; n++, inBuff++ )
c91830cb	2042	{
ef199164	2043	dst++ = wxUINT32_SWAP_ALWAYS(inBuff);
c91830cb	2044	}
c91830cb	2045	}
b5153fd8	2046
35d11700	2047	return srcLen;
c91830cb VZ	2048	}
c91830cb VZ	2049
35d11700 VZ	2050	size_t
	2051	wxMBConvUTF32swap::FromWChar(char *dst, size_t dstLen,
	2052	const wchar_t *src, size_t srcLen) const
c91830cb	2053	{
35d11700 VZ	2054	if ( srcLen == wxNO_LEN )
	2055	srcLen = wxWcslen(src) + 1;
	2056
	2057	srcLen *= BYTES_PER_CHAR;
c91830cb	2058
35d11700	2059	if ( dst )
c91830cb	2060	{
35d11700 VZ	2061	if ( dstLen < srcLen )
	2062	return wxCONV_FAILED;
	2063
5c33522f	2064	wxUint32 outBuff = reinterpret_cast<wxUint32 >(dst);
35d11700	2065	for ( size_t n = 0; n < srcLen; n += BYTES_PER_CHAR, src++ )
c91830cb	2066	{
ef199164	2067	outBuff++ = wxUINT32_SWAP_ALWAYS(src);
c91830cb	2068	}
c91830cb	2069	}
b5153fd8	2070
35d11700	2071	return srcLen;
c91830cb VZ	2072	}
c91830cb VZ	2073
467e0479	2074	#endif // WC_UTF16/!WC_UTF16
c91830cb VZ	2075
c91830cb VZ	2076
36acb880 VZ	2077	// ============================================================================
	2078	// The classes doing conversion using the iconv_xxx() functions
	2079	// ============================================================================
3caec1bb	2080
b040e242	2081	#ifdef HAVE_ICONV
3a0d76bc	2082
b1d547eb VS	2083	// VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
	2084	// E2BIG if output buffer is _exactly_ as big as needed. Such case is
	2085	// (unless there's yet another bug in glibc) the only case when iconv()
	2086	// returns with (size_t)-1 (which means error) and says there are 0 bytes
	2087	// left in the input buffer -- when _real_ error occurs,
	2088	// bytes-left-in-input buffer is non-zero. Hence, this alternative test for
	2089	// iconv() failure.
3caec1bb VS	2090	// [This bug does not appear in glibc 2.2.]
	2091	#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
	2092	#define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
	2093	(errno != E2BIG \|\| bufLeft != 0))
	2094	#else
	2095	#define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
	2096	#endif
	2097
ab217dba	2098	#define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
36acb880	2099
74a7eb0b VZ	2100	#define ICONV_T_INVALID ((iconv_t)-1)
	2101
	2102	#if SIZEOF_WCHAR_T == 4
	2103	#define WC_BSWAP wxUINT32_SWAP_ALWAYS
	2104	#define WC_ENC wxFONTENCODING_UTF32
	2105	#elif SIZEOF_WCHAR_T == 2
	2106	#define WC_BSWAP wxUINT16_SWAP_ALWAYS
	2107	#define WC_ENC wxFONTENCODING_UTF16
	2108	#else // sizeof(wchar_t) != 2 nor 4
	2109	// does this ever happen?
	2110	#error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
	2111	#endif
	2112
36acb880	2113	// ----------------------------------------------------------------------------
e95354ec	2114	// wxMBConv_iconv: encapsulates an iconv character set
36acb880 VZ	2115	// ----------------------------------------------------------------------------
36acb880 VZ	2116
e95354ec	2117	class wxMBConv_iconv : public wxMBConv
1cd52418 OK	2118	{
1cd52418 OK	2119	public:
86501081	2120	wxMBConv_iconv(const char *name);
e95354ec	2121	virtual ~wxMBConv_iconv();
36acb880	2122
8f4b0f43 VZ	2123	// implement base class virtual methods
	2124	virtual size_t ToWChar(wchar_t *dst, size_t dstLen,
	2125	const char *src, size_t srcLen = wxNO_LEN) const;
	2126	virtual size_t FromWChar(char *dst, size_t dstLen,
	2127	const wchar_t *src, size_t srcLen = wxNO_LEN) const;
7ef3ab50 VZ	2128	virtual size_t GetMBNulLen() const;
7ef3ab50 VZ	2129
ba98e032 VS	2130	#if wxUSE_UNICODE_UTF8
	2131	virtual bool IsUTF8() const;
	2132	#endif
	2133
d36c9347 VZ	2134	virtual wxMBConv *Clone() const
d36c9347 VZ	2135	{
b64f93b6	2136	wxMBConv_iconv *p = new wxMBConv_iconv(m_name);
d36c9347 VZ	2137	p->m_minMBCharWidth = m_minMBCharWidth;
	2138	return p;
	2139	}
	2140
e95354ec	2141	bool IsOk() const
74a7eb0b	2142	{ return (m2w != ICONV_T_INVALID) && (w2m != ICONV_T_INVALID); }
36acb880 VZ	2143
36acb880 VZ	2144	protected:
ef199164 DS	2145	// the iconv handlers used to translate from multibyte
ef199164 DS	2146	// to wide char and in the other direction
36acb880 VZ	2147	iconv_t m2w,
36acb880 VZ	2148	w2m;
ef199164	2149
b1d547eb VS	2150	#if wxUSE_THREADS
	2151	// guards access to m2w and w2m objects
	2152	wxMutex m_iconvMutex;
	2153	#endif
36acb880 VZ	2154
36acb880 VZ	2155	private:
e95354ec	2156	// the name (for iconv_open()) of a wide char charset -- if none is
36acb880	2157	// available on this machine, it will remain NULL
74a7eb0b	2158	static wxString ms_wcCharsetName;
36acb880 VZ	2159
	2160	// true if the wide char encoding we use (i.e. ms_wcCharsetName) has
	2161	// different endian-ness than the native one
405d8f46	2162	static bool ms_wcNeedsSwap;
eec47cc6	2163
d36c9347 VZ	2164
d36c9347 VZ	2165	// name of the encoding handled by this conversion
b64f93b6	2166	const char *m_name;
d36c9347	2167
7ef3ab50	2168	// cached result of GetMBNulLen(); set to 0 meaning "unknown"
c1464d9d VZ	2169	// initially
c1464d9d VZ	2170	size_t m_minMBCharWidth;
36acb880 VZ	2171	};
36acb880 VZ	2172
8f115891	2173	// make the constructor available for unit testing
86501081	2174	WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const char* name )
8f115891 MW	2175	{
	2176	wxMBConv_iconv* result = new wxMBConv_iconv( name );
	2177	if ( !result->IsOk() )
	2178	{
	2179	delete result;
	2180	return 0;
	2181	}
ef199164	2182
8f115891 MW	2183	return result;
	2184	}
	2185
422e411e	2186	wxString wxMBConv_iconv::ms_wcCharsetName;
e95354ec	2187	bool wxMBConv_iconv::ms_wcNeedsSwap = false;
36acb880	2188
86501081	2189	wxMBConv_iconv::wxMBConv_iconv(const char *name)
b64f93b6	2190	: m_name(wxStrdup(name))
36acb880	2191	{
c1464d9d	2192	m_minMBCharWidth = 0;
eec47cc6	2193
36acb880	2194	// check for charset that represents wchar_t:
74a7eb0b	2195	if ( ms_wcCharsetName.empty() )
f1339c56	2196	{
9a83f860	2197	wxLogTrace(TRACE_STRCONV, wxT("Looking for wide char codeset:"));
c2b83fdd	2198
74a7eb0b	2199	#if wxUSE_FONTMAP
a243da29	2200	const wxChar const names = wxFontMapperBase::GetAllEncodingNames(WC_ENC);
74a7eb0b	2201	#else // !wxUSE_FONTMAP
a243da29	2202	static const wxChar *const names_static[] =
36acb880	2203	{
74a7eb0b	2204	#if SIZEOF_WCHAR_T == 4
9a83f860	2205	wxT("UCS-4"),
da2f1172	2206	#elif SIZEOF_WCHAR_T == 2
9a83f860	2207	wxT("UCS-2"),
74a7eb0b VZ	2208	#endif
	2209	NULL
	2210	};
a243da29	2211	const wxChar const names = names_static;
74a7eb0b	2212	#endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
36acb880	2213
d1f024a8	2214	for ( ; *names && ms_wcCharsetName.empty(); ++names )
74a7eb0b	2215	{
17a1ebd1	2216	const wxString nameCS(*names);
74a7eb0b VZ	2217
74a7eb0b VZ	2218	// first try charset with explicit bytesex info (e.g. "UCS-4LE"):
17a1ebd1	2219	wxString nameXE(nameCS);
ef199164 DS	2220
ef199164 DS	2221	#ifdef WORDS_BIGENDIAN
9a83f860	2222	nameXE += wxT("BE");
ef199164	2223	#else // little endian
9a83f860	2224	nameXE += wxT("LE");
ef199164	2225	#endif
74a7eb0b	2226
9a83f860	2227	wxLogTrace(TRACE_STRCONV, wxT(" trying charset \"%s\""),
c2b83fdd VZ	2228	nameXE.c_str());
c2b83fdd VZ	2229
86501081	2230	m2w = iconv_open(nameXE.ToAscii(), name);
74a7eb0b	2231	if ( m2w == ICONV_T_INVALID )
3a0d76bc	2232	{
74a7eb0b	2233	// try charset w/o bytesex info (e.g. "UCS4")
9a83f860	2234	wxLogTrace(TRACE_STRCONV, wxT(" trying charset \"%s\""),
c2b83fdd	2235	nameCS.c_str());
86501081	2236	m2w = iconv_open(nameCS.ToAscii(), name);
3a0d76bc	2237
74a7eb0b VZ	2238	// and check for bytesex ourselves:
74a7eb0b VZ	2239	if ( m2w != ICONV_T_INVALID )
3a0d76bc	2240	{
74a7eb0b	2241	char buf[2], *bufPtr;
e8769ed1	2242	wchar_t wbuf[2];
74a7eb0b VZ	2243	size_t insz, outsz;
	2244	size_t res;
	2245
	2246	buf[0] = 'A';
	2247	buf[1] = 0;
	2248	wbuf[0] = 0;
	2249	insz = 2;
	2250	outsz = SIZEOF_WCHAR_T * 2;
e8769ed1	2251	char* wbufPtr = (char*)wbuf;
74a7eb0b VZ	2252	bufPtr = buf;
74a7eb0b VZ	2253
ef199164 DS	2254	res = iconv(
ef199164 DS	2255	m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
e8769ed1	2256	&wbufPtr, &outsz);
74a7eb0b VZ	2257
	2258	if (ICONV_FAILED(res, insz))
	2259	{
	2260	wxLogLastError(wxT("iconv"));
422e411e	2261	wxLogError(_("Conversion to charset '%s' doesn't work."),
17a1ebd1	2262	nameCS.c_str());
74a7eb0b VZ	2263	}
	2264	else // ok, can convert to this encoding, remember it
	2265	{
17a1ebd1	2266	ms_wcCharsetName = nameCS;
74a7eb0b VZ	2267	ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
74a7eb0b VZ	2268	}
3a0d76bc VS	2269	}
3a0d76bc VS	2270	}
74a7eb0b	2271	else // use charset not requiring byte swapping
36acb880	2272	{
74a7eb0b	2273	ms_wcCharsetName = nameXE;
36acb880	2274	}
3a0d76bc	2275	}
74a7eb0b	2276
0944fceb	2277	wxLogTrace(TRACE_STRCONV,
74a7eb0b	2278	wxT("iconv wchar_t charset is \"%s\"%s"),
999020e1 VZ	2279	ms_wcCharsetName.empty() ? wxString("<none>")
999020e1 VZ	2280	: ms_wcCharsetName,
9a83f860 VZ	2281	ms_wcNeedsSwap ? wxT(" (needs swap)")
9a83f860 VZ	2282	: wxT(""));
3a0d76bc	2283	}
36acb880	2284	else // we already have ms_wcCharsetName
3caec1bb	2285	{
86501081	2286	m2w = iconv_open(ms_wcCharsetName.ToAscii(), name);
f1339c56	2287	}
dccce9ea	2288
74a7eb0b	2289	if ( ms_wcCharsetName.empty() )
f1339c56	2290	{
74a7eb0b	2291	w2m = ICONV_T_INVALID;
36acb880	2292	}
405d8f46 VZ	2293	else
405d8f46 VZ	2294	{
86501081	2295	w2m = iconv_open(name, ms_wcCharsetName.ToAscii());
74a7eb0b VZ	2296	if ( w2m == ICONV_T_INVALID )
	2297	{
	2298	wxLogTrace(TRACE_STRCONV,
	2299	wxT("\"%s\" -> \"%s\" works but not the converse!?"),
86501081	2300	ms_wcCharsetName.c_str(), name);
74a7eb0b	2301	}
405d8f46	2302	}
36acb880	2303	}
3caec1bb	2304
e95354ec	2305	wxMBConv_iconv::~wxMBConv_iconv()
36acb880	2306	{
b64f93b6 VZ	2307	free(const_cast<char *>(m_name));
b64f93b6 VZ	2308
74a7eb0b	2309	if ( m2w != ICONV_T_INVALID )
36acb880	2310	iconv_close(m2w);
74a7eb0b	2311	if ( w2m != ICONV_T_INVALID )
36acb880 VZ	2312	iconv_close(w2m);
36acb880 VZ	2313	}
3a0d76bc	2314
8f4b0f43 VZ	2315	size_t
	2316	wxMBConv_iconv::ToWChar(wchar_t *dst, size_t dstLen,
	2317	const char *src, size_t srcLen) const
36acb880	2318	{
8f4b0f43	2319	if ( srcLen == wxNO_LEN )
69373110	2320	{
8f4b0f43 VZ	2321	// find the string length: notice that must be done differently for
	2322	// NUL-terminated strings and UTF-16/32 which are terminated with 2/4
	2323	// consecutive NULs
	2324	const size_t nulLen = GetMBNulLen();
	2325	switch ( nulLen )
	2326	{
	2327	default:
	2328	return wxCONV_FAILED;
69373110	2329
8f4b0f43 VZ	2330	case 1:
	2331	srcLen = strlen(src); // arguably more optimized than our version
	2332	break;
69373110	2333
8f4b0f43 VZ	2334	case 2:
	2335	case 4:
	2336	// for UTF-16/32 not only we need to have 2/4 consecutive NULs
	2337	// but they also have to start at character boundary and not
	2338	// span two adjacent characters
	2339	const char *p;
	2340	for ( p = src; NotAllNULs(p, nulLen); p += nulLen )
	2341	;
	2342	srcLen = p - src;
	2343	break;
	2344	}
d50c0831 VZ	2345
	2346	// when we're determining the length of the string ourselves we count
	2347	// the terminating NUL(s) as part of it and always NUL-terminate the
	2348	// output
	2349	srcLen += nulLen;
69373110 VZ	2350	}
69373110 VZ	2351
8f4b0f43 VZ	2352	// we express length in the number of (wide) characters but iconv always
	2353	// counts buffer sizes it in bytes
	2354	dstLen *= SIZEOF_WCHAR_T;
	2355
b1d547eb	2356	#if wxUSE_THREADS
6a17b868 SN	2357	// NB: iconv() is MT-safe, but each thread must use its own iconv_t handle.
6a17b868 SN	2358	// Unfortunately there are a couple of global wxCSConv objects such as
b1d547eb VS	2359	// wxConvLocal that are used all over wx code, so we have to make sure
	2360	// the handle is used by at most one thread at the time. Otherwise
	2361	// only a few wx classes would be safe to use from non-main threads
	2362	// as MB<->WC conversion would fail "randomly".
	2363	wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
69373110 VZ	2364	#endif // wxUSE_THREADS
69373110 VZ	2365
36acb880	2366	size_t res, cres;
8f4b0f43	2367	const char *pszPtr = src;
36acb880	2368
8f4b0f43	2369	if ( dst )
36acb880	2370	{
8f4b0f43	2371	char* bufPtr = (char*)dst;
e8769ed1	2372
36acb880	2373	// have destination buffer, convert there
1752fda6	2374	size_t dstLenOrig = dstLen;
36acb880	2375	cres = iconv(m2w,
8f4b0f43 VZ	2376	ICONV_CHAR_CAST(&pszPtr), &srcLen,
8f4b0f43 VZ	2377	&bufPtr, &dstLen);
1752fda6 VZ	2378
	2379	// convert the number of bytes converted as returned by iconv to the
	2380	// number of (wide) characters converted that we need
	2381	res = (dstLenOrig - dstLen) / SIZEOF_WCHAR_T;
dccce9ea	2382
36acb880	2383	if (ms_wcNeedsSwap)
3a0d76bc	2384	{
36acb880	2385	// convert to native endianness
17a1ebd1	2386	for ( unsigned i = 0; i < res; i++ )
467a2982	2387	dst[i] = WC_BSWAP(dst[i]);
3a0d76bc	2388	}
36acb880	2389	}
8f4b0f43	2390	else // no destination buffer
36acb880	2391	{
8f4b0f43	2392	// convert using temp buffer to calculate the size of the buffer needed
878c265b	2393	wchar_t tbuf[256];
36acb880	2394	res = 0;
ef199164 DS	2395
	2396	do
	2397	{
e8769ed1	2398	char* bufPtr = (char*)tbuf;
8f4b0f43	2399	dstLen = 8 * SIZEOF_WCHAR_T;
36acb880 VZ	2400
36acb880 VZ	2401	cres = iconv(m2w,
8f4b0f43 VZ	2402	ICONV_CHAR_CAST(&pszPtr), &srcLen,
8f4b0f43 VZ	2403	&bufPtr, &dstLen );
36acb880	2404
8f4b0f43	2405	res += 8 - (dstLen / SIZEOF_WCHAR_T);
ef199164 DS	2406	}
ef199164 DS	2407	while ((cres == (size_t)-1) && (errno == E2BIG));
f1339c56	2408	}
dccce9ea	2409
8f4b0f43	2410	if (ICONV_FAILED(cres, srcLen))
f1339c56	2411	{
36acb880	2412	//VS: it is ok if iconv fails, hence trace only
ce6f8d6f	2413	wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
467e0479	2414	return wxCONV_FAILED;
36acb880 VZ	2415	}
	2416
	2417	return res;
	2418	}
	2419
8f4b0f43 VZ	2420	size_t wxMBConv_iconv::FromWChar(char *dst, size_t dstLen,
8f4b0f43 VZ	2421	const wchar_t *src, size_t srcLen) const
36acb880	2422	{
b1d547eb VS	2423	#if wxUSE_THREADS
	2424	// NB: explained in MB2WC
	2425	wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
	2426	#endif
3698ae71	2427
8f4b0f43	2428	if ( srcLen == wxNO_LEN )
2588ee86	2429	srcLen = wxWcslen(src) + 1;
8f4b0f43 VZ	2430
	2431	size_t inbuflen = srcLen * SIZEOF_WCHAR_T;
	2432	size_t outbuflen = dstLen;
36acb880	2433	size_t res, cres;
3a0d76bc	2434
36acb880	2435	wchar_t *tmpbuf = 0;
3caec1bb	2436
36acb880 VZ	2437	if (ms_wcNeedsSwap)
	2438	{
	2439	// need to copy to temp buffer to switch endianness
51725fc0	2440	// (doing WC_BSWAP twice on the original buffer won't work, as it
36acb880	2441	// could be in read-only memory, or be accessed in some other thread)
51725fc0	2442	tmpbuf = (wchar_t *)malloc(inbuflen);
8f4b0f43 VZ	2443	for ( size_t i = 0; i < srcLen; i++ )
8f4b0f43 VZ	2444	tmpbuf[i] = WC_BSWAP(src[i]);
ef199164	2445
8f4b0f43	2446	src = tmpbuf;
36acb880	2447	}
3a0d76bc	2448
8f4b0f43 VZ	2449	char* inbuf = (char*)src;
8f4b0f43 VZ	2450	if ( dst )
36acb880 VZ	2451	{
36acb880 VZ	2452	// have destination buffer, convert there
8f4b0f43	2453	cres = iconv(w2m, ICONV_CHAR_CAST(&inbuf), &inbuflen, &dst, &outbuflen);
3a0d76bc	2454
8f4b0f43	2455	res = dstLen - outbuflen;
36acb880	2456	}
8f4b0f43	2457	else // no destination buffer
36acb880	2458	{
8f4b0f43	2459	// convert using temp buffer to calculate the size of the buffer needed
878c265b	2460	char tbuf[256];
36acb880	2461	res = 0;
ef199164 DS	2462	do
ef199164 DS	2463	{
8f4b0f43	2464	dst = tbuf;
51725fc0	2465	outbuflen = WXSIZEOF(tbuf);
36acb880	2466
8f4b0f43	2467	cres = iconv(w2m, ICONV_CHAR_CAST(&inbuf), &inbuflen, &dst, &outbuflen);
dccce9ea	2468
51725fc0	2469	res += WXSIZEOF(tbuf) - outbuflen;
ef199164 DS	2470	}
ef199164 DS	2471	while ((cres == (size_t)-1) && (errno == E2BIG));
f1339c56	2472	}
dccce9ea	2473
36acb880 VZ	2474	if (ms_wcNeedsSwap)
	2475	{
	2476	free(tmpbuf);
	2477	}
dccce9ea	2478
e8769ed1	2479	if (ICONV_FAILED(cres, inbuflen))
36acb880	2480	{
ce6f8d6f	2481	wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
467e0479	2482	return wxCONV_FAILED;
36acb880 VZ	2483	}
	2484
	2485	return res;
	2486	}
	2487
7ef3ab50	2488	size_t wxMBConv_iconv::GetMBNulLen() const
eec47cc6	2489	{
c1464d9d	2490	if ( m_minMBCharWidth == 0 )
eec47cc6 VZ	2491	{
	2492	wxMBConv_iconv * const self = wxConstCast(this, wxMBConv_iconv);
	2493
	2494	#if wxUSE_THREADS
	2495	// NB: explained in MB2WC
	2496	wxMutexLocker lock(self->m_iconvMutex);
	2497	#endif
	2498
999020e1	2499	const wchar_t *wnul = L"";
c1464d9d	2500	char buf[8]; // should be enough for NUL in any encoding
356410fc	2501	size_t inLen = sizeof(wchar_t),
c1464d9d	2502	outLen = WXSIZEOF(buf);
ef199164 DS	2503	char inBuff = (char )wnul;
	2504	char *outBuff = buf;
	2505	if ( iconv(w2m, ICONV_CHAR_CAST(&inBuff), &inLen, &outBuff, &outLen) == (size_t)-1 )
356410fc	2506	{
c1464d9d	2507	self->m_minMBCharWidth = (size_t)-1;
356410fc VZ	2508	}
	2509	else // ok
	2510	{
ef199164	2511	self->m_minMBCharWidth = outBuff - buf;
356410fc	2512	}
eec47cc6 VZ	2513	}
eec47cc6 VZ	2514
c1464d9d	2515	return m_minMBCharWidth;
eec47cc6 VZ	2516	}
eec47cc6 VZ	2517
ba98e032 VS	2518	#if wxUSE_UNICODE_UTF8
	2519	bool wxMBConv_iconv::IsUTF8() const
	2520	{
86501081 VS	2521	return wxStricmp(m_name, "UTF-8") == 0 \|\|
86501081 VS	2522	wxStricmp(m_name, "UTF8") == 0;
ba98e032 VS	2523	}
	2524	#endif
	2525
b040e242	2526	#endif // HAVE_ICONV
36acb880	2527
e95354ec	2528
36acb880 VZ	2529	// ============================================================================
	2530	// Win32 conversion classes
	2531	// ============================================================================
1cd52418	2532
e95354ec	2533	#ifdef wxHAVE_WIN32_MB2WC
373658eb	2534
8b04d4c4	2535	// from utils.cpp
d775fa82	2536	#if wxUSE_FONTMAP
86501081	2537	extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const char *charset);
8b04d4c4	2538	extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
7608a683	2539	#endif
373658eb	2540
e95354ec	2541	class wxMBConv_win32 : public wxMBConv
1cd52418 OK	2542	{
1cd52418 OK	2543	public:
bde4baac VZ	2544	wxMBConv_win32()
	2545	{
	2546	m_CodePage = CP_ACP;
c1464d9d	2547	m_minMBCharWidth = 0;
bde4baac VZ	2548	}
bde4baac VZ	2549
d36c9347	2550	wxMBConv_win32(const wxMBConv_win32& conv)
1e1c5d62	2551	: wxMBConv()
d36c9347 VZ	2552	{
	2553	m_CodePage = conv.m_CodePage;
	2554	m_minMBCharWidth = conv.m_minMBCharWidth;
	2555	}
	2556
7608a683	2557	#if wxUSE_FONTMAP
86501081	2558	wxMBConv_win32(const char* name)
bde4baac VZ	2559	{
bde4baac VZ	2560	m_CodePage = wxCharsetToCodepage(name);
c1464d9d	2561	m_minMBCharWidth = 0;
bde4baac	2562	}
dccce9ea	2563
e95354ec	2564	wxMBConv_win32(wxFontEncoding encoding)
bde4baac VZ	2565	{
bde4baac VZ	2566	m_CodePage = wxEncodingToCodepage(encoding);
c1464d9d	2567	m_minMBCharWidth = 0;
bde4baac	2568	}
eec47cc6	2569	#endif // wxUSE_FONTMAP
8b04d4c4	2570
d36c9347	2571	virtual size_t MB2WC(wchar_t buf, const char psz, size_t n) const
f1339c56	2572	{
02272c9c VZ	2573	// note that we have to use MB_ERR_INVALID_CHARS flag as it without it
	2574	// the behaviour is not compatible with the Unix version (using iconv)
	2575	// and break the library itself, e.g. wxTextInputStream::NextChar()
	2576	// wouldn't work if reading an incomplete MB char didn't result in an
	2577	// error
667e5b3e	2578	//
89028980	2579	// Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or
830f8f11 VZ	2580	// Win XP or newer and it is not supported for UTF-[78] so we always
830f8f11 VZ	2581	// use our own conversions in this case. See
89028980 VS	2582	// http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx
89028980 VS	2583	// http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp
830f8f11	2584	if ( m_CodePage == CP_UTF8 )
89028980	2585	{
5487ff0f	2586	return wxMBConvUTF8().MB2WC(buf, psz, n);
89028980	2587	}
830f8f11 VZ	2588
	2589	if ( m_CodePage == CP_UTF7 )
	2590	{
5487ff0f	2591	return wxMBConvUTF7().MB2WC(buf, psz, n);
830f8f11 VZ	2592	}
	2593
	2594	int flags = 0;
	2595	if ( (m_CodePage < 50000 && m_CodePage != CP_SYMBOL) &&
	2596	IsAtLeastWin2kSP4() )
89028980	2597	{
830f8f11	2598	flags = MB_ERR_INVALID_CHARS;
89028980	2599	}
667e5b3e	2600
2b5f62a0 VZ	2601	const size_t len = ::MultiByteToWideChar
	2602	(
	2603	m_CodePage, // code page
667e5b3e	2604	flags, // flags: fall on error
2b5f62a0 VZ	2605	psz, // input string
2b5f62a0 VZ	2606	-1, // its length (NUL-terminated)
b4da152e	2607	buf, // output string
2b5f62a0 VZ	2608	buf ? n : 0 // size of output buffer
2b5f62a0 VZ	2609	);
89028980 VS	2610	if ( !len )
	2611	{
	2612	// function totally failed
467e0479	2613	return wxCONV_FAILED;
89028980 VS	2614	}
	2615
	2616	// if we were really converting and didn't use MB_ERR_INVALID_CHARS,
	2617	// check if we succeeded, by doing a double trip:
	2618	if ( !flags && buf )
	2619	{
53c174fc VZ	2620	const size_t mbLen = strlen(psz);
53c174fc VZ	2621	wxCharBuffer mbBuf(mbLen);
89028980 VS	2622	if ( ::WideCharToMultiByte
	2623	(
	2624	m_CodePage,
	2625	0,
	2626	buf,
	2627	-1,
	2628	mbBuf.data(),
53c174fc	2629	mbLen + 1, // size in bytes, not length
89028980 VS	2630	NULL,
	2631	NULL
	2632	) == 0 \|\|
	2633	strcmp(mbBuf, psz) != 0 )
	2634	{
	2635	// we didn't obtain the same thing we started from, hence
	2636	// the conversion was lossy and we consider that it failed
467e0479	2637	return wxCONV_FAILED;
89028980 VS	2638	}
89028980 VS	2639	}
2b5f62a0	2640
03a991bc VZ	2641	// note that it returns count of written chars for buf != NULL and size
	2642	// of the needed buffer for buf == NULL so in either case the length of
	2643	// the string (which never includes the terminating NUL) is one less
89028980	2644	return len - 1;
f1339c56	2645	}
dccce9ea	2646
d36c9347	2647	virtual size_t WC2MB(char buf, const wchar_t pwz, size_t n) const
f1339c56	2648	{
13dd924a VZ	2649	/*
	2650	we have a problem here: by default, WideCharToMultiByte() may
	2651	replace characters unrepresentable in the target code page with bad
	2652	quality approximations such as turning "1/2" symbol (U+00BD) into
	2653	"1" for the code pages which don't have it and we, obviously, want
	2654	to avoid this at any price
d775fa82	2655
13dd924a VZ	2656	the trouble is that this function does it _silently_, i.e. it won't
	2657	even tell us whether it did or not... Win98/2000 and higher provide
	2658	WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
	2659	we have to resort to a round trip, i.e. check that converting back
	2660	results in the same string -- this is, of course, expensive but
	2661	otherwise we simply can't be sure to not garble the data.
	2662	*/
	2663
	2664	// determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
	2665	// it doesn't work with CJK encodings (which we test for rather roughly
	2666	// here...) nor with UTF-7/8 nor, of course, with Windows versions not
	2667	// supporting it
907173e5 WS	2668	BOOL usedDef wxDUMMY_INITIALIZE(false);
907173e5 WS	2669	BOOL *pUsedDef;
13dd924a VZ	2670	int flags;
	2671	if ( CanUseNoBestFit() && m_CodePage < 50000 )
	2672	{
	2673	// it's our lucky day
	2674	flags = WC_NO_BEST_FIT_CHARS;
	2675	pUsedDef = &usedDef;
	2676	}
	2677	else // old system or unsupported encoding
	2678	{
	2679	flags = 0;
	2680	pUsedDef = NULL;
	2681	}
	2682
2b5f62a0 VZ	2683	const size_t len = ::WideCharToMultiByte
	2684	(
	2685	m_CodePage, // code page
13dd924a VZ	2686	flags, // either none or no best fit
13dd924a VZ	2687	pwz, // input string
2b5f62a0 VZ	2688	-1, // it is (wide) NUL-terminated
	2689	buf, // output buffer
	2690	buf ? n : 0, // and its size
	2691	NULL, // default "replacement" char
13dd924a	2692	pUsedDef // [out] was it used?
2b5f62a0 VZ	2693	);
2b5f62a0 VZ	2694
13dd924a VZ	2695	if ( !len )
	2696	{
	2697	// function totally failed
467e0479	2698	return wxCONV_FAILED;
13dd924a VZ	2699	}
13dd924a VZ	2700
765bdb4a VZ	2701	// we did something, check if we really succeeded
765bdb4a VZ	2702	if ( flags )
13dd924a	2703	{
765bdb4a VZ	2704	// check if the conversion failed, i.e. if any replacements
	2705	// were done
	2706	if ( usedDef )
	2707	return wxCONV_FAILED;
	2708	}
	2709	else // we must resort to double tripping...
	2710	{
	2711	// first we need to ensure that we really have the MB data: this is
	2712	// not the case if we're called with NULL buffer, in which case we
	2713	// need to do the conversion yet again
	2714	wxCharBuffer bufDef;
	2715	if ( !buf )
13dd924a	2716	{
765bdb4a VZ	2717	bufDef = wxCharBuffer(len);
	2718	buf = bufDef.data();
	2719	if ( !::WideCharToMultiByte(m_CodePage, flags, pwz, -1,
	2720	buf, len, NULL, NULL) )
467e0479	2721	return wxCONV_FAILED;
13dd924a	2722	}
765bdb4a	2723
564da6ff VZ	2724	if ( !n )
564da6ff VZ	2725	n = wcslen(pwz);
765bdb4a	2726	wxWCharBuffer wcBuf(n);
564da6ff	2727	if ( MB2WC(wcBuf.data(), buf, n + 1) == wxCONV_FAILED \|\|
765bdb4a	2728	wcscmp(wcBuf, pwz) != 0 )
13dd924a	2729	{
765bdb4a VZ	2730	// we didn't obtain the same thing we started from, hence
	2731	// the conversion was lossy and we consider that it failed
	2732	return wxCONV_FAILED;
13dd924a VZ	2733	}
	2734	}
	2735
03a991bc	2736	// see the comment above for the reason of "len - 1"
13dd924a	2737	return len - 1;
f1339c56	2738	}
dccce9ea	2739
7ef3ab50 VZ	2740	virtual size_t GetMBNulLen() const
	2741	{
	2742	if ( m_minMBCharWidth == 0 )
	2743	{
	2744	int len = ::WideCharToMultiByte
	2745	(
	2746	m_CodePage, // code page
	2747	0, // no flags
	2748	L"", // input string
	2749	1, // translate just the NUL
	2750	NULL, // output buffer
	2751	0, // and its size
	2752	NULL, // no replacement char
	2753	NULL // [out] don't care if it was used
	2754	);
	2755
	2756	wxMBConv_win32 * const self = wxConstCast(this, wxMBConv_win32);
	2757	switch ( len )
	2758	{
	2759	default:
9a83f860	2760	wxLogDebug(wxT("Unexpected NUL length %d"), len);
ef199164 DS	2761	self->m_minMBCharWidth = (size_t)-1;
ef199164 DS	2762	break;
7ef3ab50 VZ	2763
	2764	case 0:
	2765	self->m_minMBCharWidth = (size_t)-1;
	2766	break;
	2767
	2768	case 1:
	2769	case 2:
	2770	case 4:
	2771	self->m_minMBCharWidth = len;
	2772	break;
	2773	}
	2774	}
	2775
	2776	return m_minMBCharWidth;
	2777	}
	2778
d36c9347 VZ	2779	virtual wxMBConv Clone() const { return new wxMBConv_win32(this); }
d36c9347 VZ	2780
13dd924a VZ	2781	bool IsOk() const { return m_CodePage != -1; }
	2782
	2783	private:
	2784	static bool CanUseNoBestFit()
	2785	{
	2786	static int s_isWin98Or2k = -1;
	2787
	2788	if ( s_isWin98Or2k == -1 )
	2789	{
	2790	int verMaj, verMin;
	2791	switch ( wxGetOsVersion(&verMaj, &verMin) )
	2792	{
406d283a	2793	case wxOS_WINDOWS_9X:
13dd924a VZ	2794	s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
	2795	break;
	2796
406d283a	2797	case wxOS_WINDOWS_NT:
13dd924a VZ	2798	s_isWin98Or2k = verMaj >= 5;
	2799	break;
	2800
	2801	default:
ef199164	2802	// unknown: be conservative by default
13dd924a	2803	s_isWin98Or2k = 0;
ef199164	2804	break;
13dd924a VZ	2805	}
13dd924a VZ	2806
9a83f860	2807	wxASSERT_MSG( s_isWin98Or2k != -1, wxT("should be set above") );
13dd924a VZ	2808	}
	2809
	2810	return s_isWin98Or2k == 1;
	2811	}
f1339c56	2812
89028980 VS	2813	static bool IsAtLeastWin2kSP4()
89028980 VS	2814	{
8942f83a WS	2815	#ifdef __WXWINCE__
	2816	return false;
	2817	#else
89028980 VS	2818	static int s_isAtLeastWin2kSP4 = -1;
	2819
	2820	if ( s_isAtLeastWin2kSP4 == -1 )
	2821	{
	2822	OSVERSIONINFOEX ver;
	2823
	2824	memset(&ver, 0, sizeof(ver));
	2825	ver.dwOSVersionInfoSize = sizeof(ver);
	2826	GetVersionEx((OSVERSIONINFO*)&ver);
	2827
	2828	s_isAtLeastWin2kSP4 =
	2829	((ver.dwMajorVersion > 5) \|\| // Vista+
	2830	(ver.dwMajorVersion == 5 && ver.dwMinorVersion > 0) \|\| // XP/2003
	2831	(ver.dwMajorVersion == 5 && ver.dwMinorVersion == 0 &&
	2832	ver.wServicePackMajor >= 4)) // 2000 SP4+
	2833	? 1 : 0;
	2834	}
	2835
	2836	return s_isAtLeastWin2kSP4 == 1;
8942f83a	2837	#endif
89028980 VS	2838	}
89028980 VS	2839
eec47cc6	2840
c1464d9d	2841	// the code page we're working with
b1d66b54	2842	long m_CodePage;
c1464d9d	2843
7ef3ab50	2844	// cached result of GetMBNulLen(), set to 0 initially meaning
c1464d9d VZ	2845	// "unknown"
c1464d9d VZ	2846	size_t m_minMBCharWidth;
1cd52418	2847	};
e95354ec VZ	2848
	2849	#endif // wxHAVE_WIN32_MB2WC
	2850
f7e98dee	2851
36acb880 VZ	2852	// ============================================================================
	2853	// wxEncodingConverter based conversion classes
	2854	// ============================================================================
	2855
1e6feb95	2856	#if wxUSE_FONTMAP
1cd52418	2857
e95354ec	2858	class wxMBConv_wxwin : public wxMBConv
1cd52418	2859	{
8b04d4c4 VZ	2860	private:
	2861	void Init()
	2862	{
6ac84a78 DE	2863	// Refuse to use broken wxEncodingConverter code for Mac-specific encodings.
	2864	// The wxMBConv_cf class does a better job.
	2865	m_ok = (m_enc < wxFONTENCODING_MACMIN \|\| m_enc > wxFONTENCODING_MACMAX) &&
	2866	m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
8b04d4c4 VZ	2867	w2m.Init(wxFONTENCODING_UNICODE, m_enc);
	2868	}
	2869
6001e347	2870	public:
f1339c56 RR	2871	// temporarily just use wxEncodingConverter stuff,
f1339c56 RR	2872	// so that it works while a better implementation is built
86501081	2873	wxMBConv_wxwin(const char* name)
f1339c56 RR	2874	{
f1339c56 RR	2875	if (name)
267e11c5	2876	m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
8b04d4c4 VZ	2877	else
8b04d4c4 VZ	2878	m_enc = wxFONTENCODING_SYSTEM;
cafbf6fb	2879
8b04d4c4 VZ	2880	Init();
	2881	}
	2882
e95354ec	2883	wxMBConv_wxwin(wxFontEncoding enc)
8b04d4c4 VZ	2884	{
	2885	m_enc = enc;
	2886
	2887	Init();
f1339c56	2888	}
dccce9ea	2889
bde4baac	2890	size_t MB2WC(wchar_t buf, const char psz, size_t WXUNUSED(n)) const
f1339c56 RR	2891	{
f1339c56 RR	2892	size_t inbuf = strlen(psz);
dccce9ea	2893	if (buf)
c643a977	2894	{
ef199164	2895	if (!m2w.Convert(psz, buf))
467e0479	2896	return wxCONV_FAILED;
c643a977	2897	}
f1339c56 RR	2898	return inbuf;
f1339c56 RR	2899	}
dccce9ea	2900
bde4baac	2901	size_t WC2MB(char buf, const wchar_t psz, size_t WXUNUSED(n)) const
f1339c56	2902	{
f8d791e0	2903	const size_t inbuf = wxWcslen(psz);
f1339c56	2904	if (buf)
c643a977	2905	{
ef199164	2906	if (!w2m.Convert(psz, buf))
467e0479	2907	return wxCONV_FAILED;
c643a977	2908	}
dccce9ea	2909
f1339c56 RR	2910	return inbuf;
f1339c56 RR	2911	}
dccce9ea	2912
7ef3ab50	2913	virtual size_t GetMBNulLen() const
eec47cc6 VZ	2914	{
	2915	switch ( m_enc )
	2916	{
	2917	case wxFONTENCODING_UTF16BE:
	2918	case wxFONTENCODING_UTF16LE:
c1464d9d	2919	return 2;
eec47cc6 VZ	2920
	2921	case wxFONTENCODING_UTF32BE:
	2922	case wxFONTENCODING_UTF32LE:
c1464d9d	2923	return 4;
eec47cc6 VZ	2924
eec47cc6 VZ	2925	default:
c1464d9d	2926	return 1;
eec47cc6 VZ	2927	}
	2928	}
	2929
d36c9347 VZ	2930	virtual wxMBConv *Clone() const { return new wxMBConv_wxwin(m_enc); }
d36c9347 VZ	2931
7ef3ab50 VZ	2932	bool IsOk() const { return m_ok; }
	2933
	2934	public:
	2935	wxFontEncoding m_enc;
	2936	wxEncodingConverter m2w, w2m;
	2937
	2938	private:
cafbf6fb VZ	2939	// were we initialized successfully?
cafbf6fb VZ	2940	bool m_ok;
fc7a2a60	2941
c0c133e1	2942	wxDECLARE_NO_COPY_CLASS(wxMBConv_wxwin);
f6bcfd97	2943	};
6001e347	2944
8f115891	2945	// make the constructors available for unit testing
86501081	2946	WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_wxwin( const char* name )
8f115891 MW	2947	{
	2948	wxMBConv_wxwin* result = new wxMBConv_wxwin( name );
	2949	if ( !result->IsOk() )
	2950	{
	2951	delete result;
	2952	return 0;
	2953	}
ef199164	2954
8f115891 MW	2955	return result;
	2956	}
	2957
1e6feb95 VZ	2958	#endif // wxUSE_FONTMAP
1e6feb95 VZ	2959
36acb880 VZ	2960	// ============================================================================
	2961	// wxCSConv implementation
	2962	// ============================================================================
	2963
8b04d4c4	2964	void wxCSConv::Init()
6001e347	2965	{
e95354ec VZ	2966	m_name = NULL;
e95354ec VZ	2967	m_convReal = NULL;
6c4d607e VZ	2968	}
	2969
	2970	void wxCSConv::SetEncoding(wxFontEncoding encoding)
	2971	{
	2972	switch ( encoding )
	2973	{
	2974	case wxFONTENCODING_MAX:
	2975	case wxFONTENCODING_SYSTEM:
	2976	if ( m_name )
	2977	{
	2978	// It's ok to not have encoding value if we have a name for it.
	2979	m_encoding = wxFONTENCODING_SYSTEM;
	2980	}
	2981	else // No name neither.
	2982	{
	2983	// Fall back to the system default encoding in this case (not
	2984	// sure how much sense does this make but this is how the old
	2985	// code used to behave).
	2986	#if wxUSE_INTL
	2987	m_encoding = wxLocale::GetSystemEncoding();
	2988	if ( m_encoding == wxFONTENCODING_SYSTEM )
	2989	#endif // wxUSE_INTL
	2990	m_encoding = wxFONTENCODING_ISO8859_1;
	2991	}
	2992	break;
	2993
	2994	case wxFONTENCODING_DEFAULT:
	2995	// wxFONTENCODING_DEFAULT is same as US-ASCII in this context
	2996	m_encoding = wxFONTENCODING_ISO8859_1;
	2997	break;
	2998
	2999	default:
	3000	// Just use the provided encoding.
	3001	m_encoding = encoding;
	3002	}
e95354ec VZ	3003	}
e95354ec VZ	3004
86501081	3005	wxCSConv::wxCSConv(const wxString& charset)
8b04d4c4 VZ	3006	{
8b04d4c4 VZ	3007	Init();
82713003	3008
86501081	3009	if ( !charset.empty() )
e95354ec	3010	{
86501081	3011	SetName(charset.ToAscii());
e95354ec	3012	}
bda3d86a	3013
e4277538	3014	#if wxUSE_FONTMAP
6c4d607e	3015	SetEncoding(wxFontMapperBase::GetEncodingFromName(charset));
e4277538	3016	#else
6c4d607e	3017	SetEncoding(wxFONTENCODING_SYSTEM);
e4277538	3018	#endif
6c4d607e VZ	3019
6c4d607e VZ	3020	m_convReal = DoCreate();
6001e347 RR	3021	}
6001e347 RR	3022
8b04d4c4 VZ	3023	wxCSConv::wxCSConv(wxFontEncoding encoding)
8b04d4c4 VZ	3024	{
bda3d86a	3025	if ( encoding == wxFONTENCODING_MAX \|\| encoding == wxFONTENCODING_DEFAULT )
e95354ec	3026	{
9a83f860	3027	wxFAIL_MSG( wxT("invalid encoding value in wxCSConv ctor") );
e95354ec VZ	3028
	3029	encoding = wxFONTENCODING_SYSTEM;
	3030	}
	3031
8b04d4c4 VZ	3032	Init();
8b04d4c4 VZ	3033
6c4d607e VZ	3034	SetEncoding(encoding);
	3035
	3036	m_convReal = DoCreate();
8b04d4c4 VZ	3037	}
8b04d4c4 VZ	3038
6001e347 RR	3039	wxCSConv::~wxCSConv()
6001e347 RR	3040	{
65e50848 JS	3041	Clear();
	3042	}
	3043
54380f29	3044	wxCSConv::wxCSConv(const wxCSConv& conv)
8b04d4c4	3045	: wxMBConv()
54380f29	3046	{
8b04d4c4 VZ	3047	Init();
8b04d4c4 VZ	3048
54380f29	3049	SetName(conv.m_name);
6c4d607e VZ	3050	SetEncoding(conv.m_encoding);
	3051
	3052	m_convReal = DoCreate();
54380f29 GD	3053	}
	3054
	3055	wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
	3056	{
	3057	Clear();
8b04d4c4	3058
54380f29	3059	SetName(conv.m_name);
6c4d607e VZ	3060	SetEncoding(conv.m_encoding);
	3061
	3062	m_convReal = DoCreate();
8b04d4c4	3063
54380f29 GD	3064	return *this;
	3065	}
	3066
65e50848 JS	3067	void wxCSConv::Clear()
65e50848 JS	3068	{
8b04d4c4	3069	free(m_name);
65e50848	3070	m_name = NULL;
6c4d607e VZ	3071
6c4d607e VZ	3072	wxDELETE(m_convReal);
6001e347 RR	3073	}
6001e347 RR	3074
86501081	3075	void wxCSConv::SetName(const char *charset)
6001e347	3076	{
6c4d607e	3077	if ( charset )
d6f2a891	3078	m_name = wxStrdup(charset);
6001e347 RR	3079	}
6001e347 RR	3080
8b3eb85d	3081	#if wxUSE_FONTMAP
8b3eb85d VZ	3082
8b3eb85d VZ	3083	WX_DECLARE_HASH_MAP( wxFontEncoding, wxString, wxIntegerHash, wxIntegerEqual,
3f5c62f9	3084	wxEncodingNameCache );
8b3eb85d VZ	3085
	3086	static wxEncodingNameCache gs_nameCache;
	3087	#endif
	3088
e95354ec VZ	3089	wxMBConv *wxCSConv::DoCreate() const
e95354ec VZ	3090	{
ce6f8d6f VZ	3091	#if wxUSE_FONTMAP
	3092	wxLogTrace(TRACE_STRCONV,
	3093	wxT("creating conversion for %s"),
	3094	(m_name ? m_name
86501081	3095	: (const char*)wxFontMapperBase::GetEncodingName(m_encoding).mb_str()));
ce6f8d6f VZ	3096	#endif // wxUSE_FONTMAP
ce6f8d6f VZ	3097
c547282d VZ	3098	// check for the special case of ASCII or ISO8859-1 charset: as we have
	3099	// special knowledge of it anyhow, we don't need to create a special
	3100	// conversion object
6c4d607e	3101	if ( m_encoding == wxFONTENCODING_ISO8859_1 )
f1339c56	3102	{
e95354ec VZ	3103	// don't convert at all
	3104	return NULL;
	3105	}
dccce9ea	3106
e95354ec VZ	3107	// we trust OS to do conversion better than we can so try external
	3108	// conversion methods first
	3109	//
	3110	// the full order is:
	3111	// 1. OS conversion (iconv() under Unix or Win32 API)
	3112	// 2. hard coded conversions for UTF
	3113	// 3. wxEncodingConverter as fall back
	3114
	3115	// step (1)
	3116	#ifdef HAVE_ICONV
c547282d	3117	#if !wxUSE_FONTMAP
e95354ec	3118	if ( m_name )
c547282d	3119	#endif // !wxUSE_FONTMAP
e95354ec	3120	{
3ef10cfc	3121	#if wxUSE_FONTMAP
8b3eb85d	3122	wxFontEncoding encoding(m_encoding);
3ef10cfc	3123	#endif
8b3eb85d	3124
86501081	3125	if ( m_name )
8b3eb85d	3126	{
86501081	3127	wxMBConv_iconv *conv = new wxMBConv_iconv(m_name);
8b3eb85d VZ	3128	if ( conv->IsOk() )
	3129	return conv;
	3130
	3131	delete conv;
c547282d VZ	3132
c547282d VZ	3133	#if wxUSE_FONTMAP
8b3eb85d	3134	encoding =
86501081	3135	wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
c547282d	3136	#endif // wxUSE_FONTMAP
8b3eb85d VZ	3137	}
	3138	#if wxUSE_FONTMAP
	3139	{
	3140	const wxEncodingNameCache::iterator it = gs_nameCache.find(encoding);
	3141	if ( it != gs_nameCache.end() )
	3142	{
	3143	if ( it->second.empty() )
	3144	return NULL;
c547282d	3145
86501081	3146	wxMBConv_iconv *conv = new wxMBConv_iconv(it->second.ToAscii());
8b3eb85d VZ	3147	if ( conv->IsOk() )
8b3eb85d VZ	3148	return conv;
e95354ec	3149
8b3eb85d VZ	3150	delete conv;
	3151	}
	3152
a243da29	3153	const wxChar* const* names = wxFontMapperBase::GetAllEncodingNames(encoding);
86501081 VS	3154	// CS : in case this does not return valid names (eg for MacRoman)
	3155	// encoding got a 'failure' entry in the cache all the same,
	3156	// although it just has to be created using a different method, so
	3157	// only store failed iconv creation attempts (or perhaps we
	3158	// shoulnd't do this at all ?)
3c67ec06	3159	if ( names[0] != NULL )
8b3eb85d	3160	{
3c67ec06	3161	for ( ; *names; ++names )
8b3eb85d	3162	{
86501081 VS	3163	// FIXME-UTF8: wxFontMapperBase::GetAllEncodingNames()
	3164	// will need changes that will obsolete this
	3165	wxString name(*names);
	3166	wxMBConv_iconv *conv = new wxMBConv_iconv(name.ToAscii());
3c67ec06 SC	3167	if ( conv->IsOk() )
	3168	{
	3169	gs_nameCache[encoding] = *names;
	3170	return conv;
	3171	}
	3172
	3173	delete conv;
8b3eb85d VZ	3174	}
8b3eb85d VZ	3175
9a83f860	3176	gs_nameCache[encoding] = wxT(""); // cache the failure
8b3eb85d	3177	}
8b3eb85d VZ	3178	}
8b3eb85d VZ	3179	#endif // wxUSE_FONTMAP
e95354ec VZ	3180	}
	3181	#endif // HAVE_ICONV
	3182
	3183	#ifdef wxHAVE_WIN32_MB2WC
	3184	{
7608a683	3185	#if wxUSE_FONTMAP
e95354ec VZ	3186	wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
	3187	: new wxMBConv_win32(m_encoding);
	3188	if ( conv->IsOk() )
	3189	return conv;
	3190
	3191	delete conv;
7608a683 WS	3192	#else
	3193	return NULL;
	3194	#endif
e95354ec VZ	3195	}
e95354ec VZ	3196	#endif // wxHAVE_WIN32_MB2WC
ef199164	3197
5c4ed98d	3198	#ifdef __DARWIN__
f7e98dee	3199	{
6ff49cbc DE	3200	// leave UTF16 and UTF32 to the built-ins of wx
	3201	if ( m_name \|\| ( m_encoding < wxFONTENCODING_UTF16BE \|\|
	3202	( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
f7e98dee	3203	{
a6900d10	3204	#if wxUSE_FONTMAP
5c4ed98d DE	3205	wxMBConv_cf *conv = m_name ? new wxMBConv_cf(m_name)
5c4ed98d DE	3206	: new wxMBConv_cf(m_encoding);
a6900d10	3207	#else
5c4ed98d	3208	wxMBConv_cf *conv = new wxMBConv_cf(m_encoding);
a6900d10	3209	#endif
ef199164	3210
f7e98dee	3211	if ( conv->IsOk() )
d775fa82 WS	3212	return conv;
	3213
	3214	delete conv;
	3215	}
335d31e0	3216	}
5c4ed98d DE	3217	#endif // __DARWIN__
5c4ed98d DE	3218
e95354ec VZ	3219	// step (2)
	3220	wxFontEncoding enc = m_encoding;
	3221	#if wxUSE_FONTMAP
c547282d VZ	3222	if ( enc == wxFONTENCODING_SYSTEM && m_name )
	3223	{
	3224	// use "false" to suppress interactive dialogs -- we can be called from
	3225	// anywhere and popping up a dialog from here is the last thing we want to
	3226	// do
267e11c5	3227	enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
c547282d	3228	}
e95354ec VZ	3229	#endif // wxUSE_FONTMAP
	3230
	3231	switch ( enc )
	3232	{
	3233	case wxFONTENCODING_UTF7:
	3234	return new wxMBConvUTF7;
	3235
	3236	case wxFONTENCODING_UTF8:
	3237	return new wxMBConvUTF8;
	3238
e95354ec VZ	3239	case wxFONTENCODING_UTF16BE:
	3240	return new wxMBConvUTF16BE;
	3241
	3242	case wxFONTENCODING_UTF16LE:
	3243	return new wxMBConvUTF16LE;
	3244
e95354ec VZ	3245	case wxFONTENCODING_UTF32BE:
	3246	return new wxMBConvUTF32BE;
	3247
	3248	case wxFONTENCODING_UTF32LE:
	3249	return new wxMBConvUTF32LE;
	3250
	3251	default:
	3252	// nothing to do but put here to suppress gcc warnings
ef199164	3253	break;
e95354ec VZ	3254	}
	3255
	3256	// step (3)
	3257	#if wxUSE_FONTMAP
	3258	{
	3259	wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
	3260	: new wxMBConv_wxwin(m_encoding);
	3261	if ( conv->IsOk() )
	3262	return conv;
	3263
	3264	delete conv;
	3265	}
ef199164	3266
3df31b2d VZ	3267	wxLogTrace(TRACE_STRCONV,
3df31b2d VZ	3268	wxT("encoding \"%s\" is not supported by this system"),
ef6cef09	3269	(m_name ? wxString(m_name)
3df31b2d VZ	3270	: wxFontMapperBase::GetEncodingName(m_encoding)));
3df31b2d VZ	3271	#endif // wxUSE_FONTMAP
e95354ec VZ	3272
	3273	return NULL;
	3274	}
	3275
0f0298b1 VZ	3276	bool wxCSConv::IsOk() const
0f0298b1 VZ	3277	{
0f0298b1 VZ	3278	// special case: no convReal created for wxFONTENCODING_ISO8859_1
	3279	if ( m_encoding == wxFONTENCODING_ISO8859_1 )
	3280	return true; // always ok as we do it ourselves
	3281
	3282	// m_convReal->IsOk() is called at its own creation, so we know it must
	3283	// be ok if m_convReal is non-NULL
	3284	return m_convReal != NULL;
	3285	}
	3286
1c714a5d VZ	3287	size_t wxCSConv::ToWChar(wchar_t *dst, size_t dstLen,
	3288	const char *src, size_t srcLen) const
	3289	{
2c74c558 VS	3290	if (m_convReal)
	3291	return m_convReal->ToWChar(dst, dstLen, src, srcLen);
	3292
	3293	// latin-1 (direct)
05392dc8 VZ	3294	if ( srcLen == wxNO_LEN )
05392dc8 VZ	3295	srcLen = strlen(src) + 1; // take trailing NUL too
1c714a5d	3296
05392dc8 VZ	3297	if ( dst )
	3298	{
	3299	if ( dstLen < srcLen )
	3300	return wxCONV_FAILED;
1c714a5d	3301
05392dc8 VZ	3302	for ( size_t n = 0; n < srcLen; n++ )
	3303	dst[n] = (unsigned char)(src[n]);
	3304	}
2c74c558	3305
05392dc8	3306	return srcLen;
1c714a5d VZ	3307	}
1c714a5d VZ	3308
05392dc8 VZ	3309	size_t wxCSConv::FromWChar(char *dst, size_t dstLen,
05392dc8 VZ	3310	const wchar_t *src, size_t srcLen) const
6001e347	3311	{
e95354ec	3312	if (m_convReal)
05392dc8	3313	return m_convReal->FromWChar(dst, dstLen, src, srcLen);
f1339c56 RR	3314
f1339c56 RR	3315	// latin-1 (direct)
05392dc8 VZ	3316	if ( srcLen == wxNO_LEN )
05392dc8 VZ	3317	srcLen = wxWcslen(src) + 1;
dccce9ea	3318
05392dc8	3319	if ( dst )
f1339c56	3320	{
05392dc8 VZ	3321	if ( dstLen < srcLen )
05392dc8 VZ	3322	return wxCONV_FAILED;
1cd52418	3323
05392dc8	3324	for ( size_t n = 0; n < srcLen; n++ )
24642831	3325	{
05392dc8	3326	if ( src[n] > 0xFF )
467e0479	3327	return wxCONV_FAILED;
ef199164	3328
05392dc8	3329	dst[n] = (char)src[n];
24642831	3330	}
05392dc8	3331
24642831	3332	}
05392dc8	3333	else // still need to check the input validity
24642831	3334	{
05392dc8	3335	for ( size_t n = 0; n < srcLen; n++ )
24642831	3336	{
05392dc8	3337	if ( src[n] > 0xFF )
467e0479	3338	return wxCONV_FAILED;
24642831	3339	}
f1339c56	3340	}
dccce9ea	3341
05392dc8	3342	return srcLen;
6001e347 RR	3343	}
6001e347 RR	3344
7ef3ab50	3345	size_t wxCSConv::GetMBNulLen() const
eec47cc6	3346	{
eec47cc6	3347	if ( m_convReal )
7ef3ab50	3348	return m_convReal->GetMBNulLen();
eec47cc6	3349
ba98e032	3350	// otherwise, we are ISO-8859-1
c1464d9d	3351	return 1;
eec47cc6 VZ	3352	}
eec47cc6 VZ	3353
ba98e032 VS	3354	#if wxUSE_UNICODE_UTF8
	3355	bool wxCSConv::IsUTF8() const
	3356	{
ba98e032	3357	if ( m_convReal )
ba98e032	3358	return m_convReal->IsUTF8();
ba98e032 VS	3359
	3360	// otherwise, we are ISO-8859-1
	3361	return false;
	3362	}
	3363	#endif
	3364
69c928ef VZ	3365
	3366	#if wxUSE_UNICODE
	3367
	3368	wxWCharBuffer wxSafeConvertMB2WX(const char *s)
	3369	{
	3370	if ( !s )
	3371	return wxWCharBuffer();
	3372
	3373	wxWCharBuffer wbuf(wxConvLibc.cMB2WX(s));
	3374	if ( !wbuf )
5487ff0f	3375	wbuf = wxMBConvUTF8().cMB2WX(s);
69c928ef VZ	3376	if ( !wbuf )
	3377	wbuf = wxConvISO8859_1.cMB2WX(s);
	3378
	3379	return wbuf;
	3380	}
	3381
	3382	wxCharBuffer wxSafeConvertWX2MB(const wchar_t *ws)
	3383	{
	3384	if ( !ws )
	3385	return wxCharBuffer();
	3386
	3387	wxCharBuffer buf(wxConvLibc.cWX2MB(ws));
	3388	if ( !buf )
	3389	buf = wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL).cWX2MB(ws);
	3390
	3391	return buf;
	3392	}
	3393
	3394	#endif // wxUSE_UNICODE
f5a1953b	3395
1e50d914 VS	3396	// ----------------------------------------------------------------------------
	3397	// globals
	3398	// ----------------------------------------------------------------------------
	3399
	3400	// NB: The reason why we create converted objects in this convoluted way,
	3401	// using a factory function instead of global variable, is that they
	3402	// may be used at static initialization time (some of them are used by
	3403	// wxString ctors and there may be a global wxString object). In other
	3404	// words, possibly _before_ the converter global object would be
	3405	// initialized.
	3406
	3407	#undef wxConvLibc
	3408	#undef wxConvUTF8
	3409	#undef wxConvUTF7
	3410	#undef wxConvLocal
	3411	#undef wxConvISO8859_1
	3412
	3413	#define WX_DEFINE_GLOBAL_CONV2(klass, impl_klass, name, ctor_args) \
	3414	WXDLLIMPEXP_DATA_BASE(klass*) name##Ptr = NULL; \
092ee46f	3415	WXDLLIMPEXP_BASE klass* wxGet_##name##Ptr() \
1e50d914 VS	3416	{ \
	3417	static impl_klass name##Obj ctor_args; \
	3418	return &name##Obj; \
	3419	} \
	3420	/* this ensures that all global converter objects are created */ \
	3421	/* by the time static initialization is done, i.e. before any */ \
	3422	/* thread is launched: */ \
	3423	static klass* gs_##name##instance = wxGet_##name##Ptr()
	3424
	3425	#define WX_DEFINE_GLOBAL_CONV(klass, name, ctor_args) \
	3426	WX_DEFINE_GLOBAL_CONV2(klass, klass, name, ctor_args)
	3427
5c69ef61 VZ	3428	#ifdef __INTELC__
	3429	// disable warning "variable 'xxx' was declared but never referenced"
	3430	#pragma warning(disable: 177)
	3431	#endif // Intel C++
	3432
1e50d914 VS	3433	#ifdef __WINDOWS__
1e50d914 VS	3434	WX_DEFINE_GLOBAL_CONV2(wxMBConv, wxMBConv_win32, wxConvLibc, wxEMPTY_PARAMETER_VALUE);
c45fad9a SC	3435	#elif 0 // defined(__WXOSX__)
c45fad9a SC	3436	WX_DEFINE_GLOBAL_CONV2(wxMBConv, wxMBConv_cf, wxConvLibc, (wxFONTENCODING_UTF8));
1e50d914 VS	3437	#else
	3438	WX_DEFINE_GLOBAL_CONV2(wxMBConv, wxMBConvLibc, wxConvLibc, wxEMPTY_PARAMETER_VALUE);
	3439	#endif
	3440
e1079eda VZ	3441	// NB: we can't use wxEMPTY_PARAMETER_VALUE as final argument here because it's
	3442	// passed to WX_DEFINE_GLOBAL_CONV2 after a macro expansion and so still
	3443	// provokes an error message about "not enough macro parameters"; and we
	3444	// can't use "()" here as the name##Obj declaration would be parsed as a
	3445	// function declaration then, so use a semicolon and live with an extra
	3446	// empty statement (and hope that no compilers warns about this)
	3447	WX_DEFINE_GLOBAL_CONV(wxMBConvStrictUTF8, wxConvUTF8, ;);
	3448	WX_DEFINE_GLOBAL_CONV(wxMBConvUTF7, wxConvUTF7, ;);
1e50d914 VS	3449
	3450	WX_DEFINE_GLOBAL_CONV(wxCSConv, wxConvLocal, (wxFONTENCODING_SYSTEM));
	3451	WX_DEFINE_GLOBAL_CONV(wxCSConv, wxConvISO8859_1, (wxFONTENCODING_ISO8859_1));
	3452
	3453	WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = wxGet_wxConvLibcPtr();
	3454	WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvUI = wxGet_wxConvLocalPtr();
	3455
6ac84a78	3456	#ifdef __DARWIN__
8244507f VZ	3457	// It is important to use this conversion object under Darwin as it ensures
	3458	// that Unicode strings are (re)composed correctly even though xnu kernel uses
	3459	// decomposed form internally (at least for the file names).
6ac84a78	3460	static wxMBConv_cf wxConvMacUTF8DObj(wxFONTENCODING_UTF8);
1e50d914	3461	#endif
6ac84a78	3462
1e50d914	3463	WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName =
6ac84a78	3464	#ifdef __DARWIN__
1e50d914	3465	&wxConvMacUTF8DObj;
6ac84a78	3466	#else // !__DARWIN__
1e50d914	3467	wxGet_wxConvLibcPtr();
6ac84a78	3468	#endif // __DARWIN__/!__DARWIN__