git.saurik.com Git - wxWidgets.git/blame

Commit	Line	Data
6001e347	1	///////////////////////////////////////////////////////////////////////////////
80fdcdb9	2	// Name: wx/strconv.h
6001e347	3	// Purpose: conversion routines for char sets any Unicode
467e0479	4	// Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin
6001e347 RR	5	// Modified by:
6001e347 RR	6	// Created: 29/01/98
467e0479 VZ	7	// Copyright: (c) 1998 Ove Kaaven, Robert Roebling
467e0479 VZ	8	// (c) 1998-2006 Vadim Zeitlin
65571936	9	// Licence: wxWindows licence
6001e347 RR	10	///////////////////////////////////////////////////////////////////////////////
6001e347 RR	11
d36c9347 VZ	12	#ifndef _WX_STRCONV_H_
d36c9347 VZ	13	#define _WX_STRCONV_H_
6001e347	14
6001e347	15	#include "wx/defs.h"
e3f6cbd9	16	#include "wx/chartype.h"
6001e347 RR	17	#include "wx/buffer.h"
6001e347 RR	18
7db39dd6 CE	19	#ifdef __DIGITALMARS__
	20	#include "typeinfo.h"
	21	#endif
	22
9dea36ef DW	23	#if defined(__VISAGECPP__) && __IBMCPP__ >= 400
	24	# undef __BSEXCPT__
	25	#endif
dccce9ea	26
6001e347 RR	27	#include <stdlib.h>
6001e347 RR	28
b5dbe15d	29	class WXDLLIMPEXP_FWD_BASE wxString;
86501081	30
483b0434 VZ	31	// the error value returned by wxMBConv methods
	32	#define wxCONV_FAILED ((size_t)-1)
	33
e90c1d2a	34	// ----------------------------------------------------------------------------
bde4baac	35	// wxMBConv (abstract base class for conversions)
e90c1d2a	36	// ----------------------------------------------------------------------------
6001e347	37
509da451 VZ	38	// When deriving a new class from wxMBConv you must reimplement ToWChar() and
	39	// FromWChar() methods which are not pure virtual only for historical reasons,
	40	// don't let the fact that the existing classes implement MB2WC/WC2MB() instead
	41	// confuse you.
	42	//
d36c9347 VZ	43	// You also have to implement Clone() to allow copying the conversions
	44	// polymorphically.
	45	//
509da451	46	// And you might need to override GetMBNulLen() as well.
bddd7a8d	47	class WXDLLIMPEXP_BASE wxMBConv
6001e347 RR	48	{
6001e347 RR	49	public:
483b0434 VZ	50	// The functions doing actual conversion from/to narrow to/from wide
483b0434 VZ	51	// character strings.
bde4baac	52	//
483b0434 VZ	53	// On success, the return value is the length (i.e. the number of
	54	// characters, not bytes) of the converted string including any trailing
	55	// L'\0' or (possibly multiple) '\0'(s). If the conversion fails or if
	56	// there is not enough space for everything, including the trailing NUL
467e0479	57	// character(s), in the output buffer, wxCONV_FAILED is returned.
483b0434	58	//
96132605 VZ	59	// In the special case when dst is NULL (the value of dstLen is ignored
	60	// then) the return value is the length of the needed buffer but nothing
	61	// happens otherwise. If srcLen is wxNO_LEN, the entire string, up to and
467e0479 VZ	62	// including the trailing NUL(s), is converted, otherwise exactly srcLen
467e0479 VZ	63	// bytes are.
483b0434 VZ	64	//
	65	// Typical usage:
	66	//
	67	// size_t dstLen = conv.ToWChar(NULL, 0, src);
96132605	68	// if ( dstLen == wxCONV_FAILED )
483b0434 VZ	69	// ... handle error ...
	70	// wchar_t *wbuf = new wchar_t[dstLen];
	71	// conv.ToWChar(wbuf, dstLen, src);
96132605 VZ	72	// ... work with wbuf ...
96132605 VZ	73	// delete [] wbuf;
483b0434 VZ	74	//
483b0434 VZ	75	virtual size_t ToWChar(wchar_t *dst, size_t dstLen,
467e0479	76	const char *src, size_t srcLen = wxNO_LEN) const;
483b0434 VZ	77
483b0434 VZ	78	virtual size_t FromWChar(char *dst, size_t dstLen,
467e0479	79	const wchar_t *src, size_t srcLen = wxNO_LEN) const;
483b0434	80
e90c1d2a	81
483b0434 VZ	82	// Convenience functions for translating NUL-terminated strings: returns
	83	// the buffer containing the converted string or NULL pointer if the
	84	// conversion failed.
eec47cc6 VZ	85	const wxWCharBuffer cMB2WC(const char *in) const;
eec47cc6 VZ	86	const wxCharBuffer cWC2MB(const wchar_t *in) const;
6001e347	87
483b0434 VZ	88	// Convenience functions for converting strings which may contain embedded
483b0434 VZ	89	// NULs and don't have to be NUL-terminated.
f5fb6871	90	//
f6a02087 VZ	91	// inLen is the length of the buffer including trailing NUL if any or
f6a02087 VZ	92	// wxNO_LEN if the input is NUL-terminated.
eec47cc6 VZ	93	//
	94	// outLen receives, if not NULL, the length of the converted string or 0 if
	95	// the conversion failed (returning 0 and not -1 in this case makes it
	96	// difficult to distinguish between failed conversion and empty input but
f6a02087 VZ	97	// this is done for backwards compatibility). Notice that the rules for
	98	// whether outLen accounts or not for the last NUL are the same as for
	99	// To/FromWChar() above: if inLen is specified, outLen is exactly the
	100	// number of characters converted, whether the last one of them was NUL or
	101	// not. But if inLen == wxNO_LEN then outLen doesn't account for the last
	102	// NUL even though it is present.
eec47cc6 VZ	103	const wxWCharBuffer
	104	cMB2WC(const char in, size_t inLen, size_t outLen) const;
	105	const wxCharBuffer
	106	cWC2MB(const wchar_t in, size_t inLen, size_t outLen) const;
f5fb6871	107
40ac5040 VZ	108	// And yet more convenience functions for converting the entire buffers:
	109	// these are the simplest and least error-prone as you never need to bother
	110	// with lengths/sizes directly.
	111	const wxWCharBuffer cMB2WC(const wxScopedCharBuffer& in) const;
	112	const wxCharBuffer cWC2MB(const wxScopedWCharBuffer& in) const;
	113
bde4baac	114	// convenience functions for converting MB or WC to/from wxWin default
6001e347	115	#if wxUSE_UNICODE
e90c1d2a VZ	116	const wxWCharBuffer cMB2WX(const char *psz) const { return cMB2WC(psz); }
	117	const wxCharBuffer cWX2MB(const wchar_t *psz) const { return cWC2MB(psz); }
	118	const wchar_t* cWC2WX(const wchar_t *psz) const { return psz; }
f6bcfd97	119	const wchar_t* cWX2WC(const wchar_t *psz) const { return psz; }
e90c1d2a VZ	120	#else // ANSI
	121	const char* cMB2WX(const char *psz) const { return psz; }
	122	const char* cWX2MB(const char *psz) const { return psz; }
	123	const wxCharBuffer cWC2WX(const wchar_t *psz) const { return cWC2MB(psz); }
	124	const wxWCharBuffer cWX2WC(const char *psz) const { return cMB2WC(psz); }
	125	#endif // Unicode/ANSI
2b5f62a0	126
c1464d9d VZ	127	// this function is used in the implementation of cMB2WC() to distinguish
c1464d9d VZ	128	// between the following cases:
eec47cc6	129	//
c1464d9d VZ	130	// a) var width encoding with strings terminated by a single NUL
	131	// (usual multibyte encodings): return 1 in this case
	132	// b) fixed width encoding with 2 bytes/char and so terminated by
	133	// 2 NULs (UTF-16/UCS-2 and variants): return 2 in this case
	134	// c) fixed width encoding with 4 bytes/char and so terminated by
	135	// 4 NULs (UTF-32/UCS-4 and variants): return 4 in this case
	136	//
	137	// anything else is not supported currently and -1 should be returned
7ef3ab50 VZ	138	virtual size_t GetMBNulLen() const { return 1; }
7ef3ab50 VZ	139
483b0434 VZ	140	// return the maximal value currently returned by GetMBNulLen() for any
	141	// encoding
	142	static size_t GetMaxMBNulLen() { return 4 /* for UTF-32 */; }
	143
111d9948 VS	144	#if wxUSE_UNICODE_UTF8
	145	// return true if the converter's charset is UTF-8, i.e. char* strings
	146	// decoded using this object can be directly copied to wxString's internal
	147	// storage without converting to WC and than back to UTF-8 MB string
	148	virtual bool IsUTF8() const { return false; }
	149	#endif
483b0434 VZ	150
	151	// The old conversion functions. The existing classes currently mostly
	152	// implement these ones but we're in transition to using To/FromWChar()
	153	// instead and any new classes should implement just the new functions.
	154	// For now, however, we provide default implementation of To/FromWChar() in
	155	// this base class in terms of MB2WC/WC2MB() to avoid having to rewrite all
	156	// the conversions at once.
	157	//
	158	// On success, the return value is the length (i.e. the number of
	159	// characters, not bytes) not counting the trailing NUL(s) of the converted
	160	// string. On failure, (size_t)-1 is returned. In the special case when
	161	// outputBuf is NULL the return value is the same one but nothing is
	162	// written to the buffer.
	163	//
	164	// Note that outLen is the length of the output buffer, not the length of
	165	// the input (which is always supposed to be terminated by one or more
	166	// NULs, as appropriate for the encoding)!
509da451 VZ	167	virtual size_t MB2WC(wchar_t out, const char in, size_t outLen) const;
509da451 VZ	168	virtual size_t WC2MB(char out, const wchar_t in, size_t outLen) const;
483b0434 VZ	169
483b0434 VZ	170
d36c9347 VZ	171	// make a heap-allocated copy of this object
	172	virtual wxMBConv *Clone() const = 0;
	173
7ef3ab50 VZ	174	// virtual dtor for any base class
7ef3ab50 VZ	175	virtual ~wxMBConv();
6001e347 RR	176	};
6001e347 RR	177
bde4baac VZ	178	// ----------------------------------------------------------------------------
	179	// wxMBConvLibc uses standard mbstowcs() and wcstombs() functions for
	180	// conversion (hence it depends on the current locale)
	181	// ----------------------------------------------------------------------------
	182
	183	class WXDLLIMPEXP_BASE wxMBConvLibc : public wxMBConv
	184	{
	185	public:
75736a9c DS	186	virtual size_t MB2WC(wchar_t outputBuf, const char psz, size_t outputSize) const;
75736a9c DS	187	virtual size_t WC2MB(char outputBuf, const wchar_t psz, size_t outputSize) const;
d36c9347 VZ	188
d36c9347 VZ	189	virtual wxMBConv *Clone() const { return new wxMBConvLibc; }
111d9948 VS	190
	191	#if wxUSE_UNICODE_UTF8
	192	virtual bool IsUTF8() const { return wxLocaleIsUtf8; }
	193	#endif
bde4baac VZ	194	};
bde4baac VZ	195
5576edf8 RR	196	#ifdef __UNIX__
	197
	198	// ----------------------------------------------------------------------------
	199	// wxConvBrokenFileNames is made for Unix in Unicode mode when
	200	// files are accidentally written in an encoding which is not
	201	// the system encoding. Typically, the system encoding will be
	202	// UTF8 but there might be files stored in ISO8859-1 on disk.
	203	// ----------------------------------------------------------------------------
	204
	205	class WXDLLIMPEXP_BASE wxConvBrokenFileNames : public wxMBConv
	206	{
	207	public:
86501081	208	wxConvBrokenFileNames(const wxString& charset);
d36c9347	209	wxConvBrokenFileNames(const wxConvBrokenFileNames& conv)
2e2cf78d VZ	210	: wxMBConv(),
2e2cf78d VZ	211	m_conv(conv.m_conv ? conv.m_conv->Clone() : NULL)
d36c9347 VZ	212	{
d36c9347 VZ	213	}
5576edf8 RR	214	virtual ~wxConvBrokenFileNames() { delete m_conv; }
5576edf8 RR	215
eec47cc6 VZ	216	virtual size_t MB2WC(wchar_t out, const char in, size_t outLen) const
	217	{
	218	return m_conv->MB2WC(out, in, outLen);
	219	}
	220
	221	virtual size_t WC2MB(char out, const wchar_t in, size_t outLen) const
	222	{
	223	return m_conv->WC2MB(out, in, outLen);
	224	}
5576edf8	225
7ef3ab50	226	virtual size_t GetMBNulLen() const
eec47cc6	227	{
22886fb3	228	// cast needed to call a private function
7ef3ab50	229	return m_conv->GetMBNulLen();
eec47cc6 VZ	230	}
eec47cc6 VZ	231
ba98e032 VS	232	#if wxUSE_UNICODE_UTF8
	233	virtual bool IsUTF8() const { return m_conv->IsUTF8(); }
	234	#endif
	235
d36c9347 VZ	236	virtual wxMBConv Clone() const { return new wxConvBrokenFileNames(this); }
d36c9347 VZ	237
7ef3ab50	238	private:
5576edf8 RR	239	// the conversion object we forward to
5576edf8 RR	240	wxMBConv *m_conv;
d36c9347	241
c0c133e1	242	wxDECLARE_NO_ASSIGN_CLASS(wxConvBrokenFileNames);
5576edf8 RR	243	};
5576edf8 RR	244
eec47cc6	245	#endif // __UNIX__
5576edf8	246
e90c1d2a	247	// ----------------------------------------------------------------------------
6001e347	248	// wxMBConvUTF7 (for conversion using UTF7 encoding)
e90c1d2a	249	// ----------------------------------------------------------------------------
6001e347	250
bddd7a8d	251	class WXDLLIMPEXP_BASE wxMBConvUTF7 : public wxMBConv
6001e347 RR	252	{
6001e347 RR	253	public:
9d653e81 VZ	254	wxMBConvUTF7() { }
	255
	256	// compiler-generated copy ctor, assignment operator and dtor are ok
	257	// (assuming it's ok to copy the shift state -- not really sure about it)
	258
	259	virtual size_t ToWChar(wchar_t *dst, size_t dstLen,
	260	const char *src, size_t srcLen = wxNO_LEN) const;
	261	virtual size_t FromWChar(char *dst, size_t dstLen,
	262	const wchar_t *src, size_t srcLen = wxNO_LEN) const;
d36c9347 VZ	263
d36c9347 VZ	264	virtual wxMBConv *Clone() const { return new wxMBConvUTF7; }
9d653e81 VZ	265
	266	private:
	267	// UTF-7 decoder/encoder may be in direct mode or in shifted mode after a
	268	// '+' (and until the '-' or any other non-base64 character)
1bc82105	269	struct StateMode
9d653e81	270	{
1bc82105 VZ	271	enum Mode
	272	{
	273	Direct, // pass through state
	274	Shifted // after a '+' (and before '-')
	275	};
9d653e81 VZ	276	};
	277
	278	// the current decoder state: this is only used by ToWChar() if srcLen
	279	// parameter is not wxNO_LEN, when working on the entire NUL-terminated
	280	// strings we neither update nor use the state
5c69ef61	281	class DecoderState : private StateMode
9d653e81 VZ	282	{
	283	private:
	284	// current state: this one is private as we want to enforce the use of
	285	// ToDirect/ToShifted() methods below
	286	Mode mode;
	287
	288	public:
	289	// the initial state is direct
	290	DecoderState() { mode = Direct; }
	291
	292	// switch to/from shifted mode
	293	void ToDirect() { mode = Direct; }
	294	void ToShifted() { mode = Shifted; accum = bit = 0; isLSB = false; }
	295
	296	bool IsDirect() const { return mode == Direct; }
	297	bool IsShifted() const { return mode == Shifted; }
	298
	299
	300	// these variables are only used in shifted mode
	301
	302	unsigned int accum; // accumulator of the bit we've already got
	303	unsigned int bit; // the number of bits consumed mod 8
	304	unsigned char msb; // the high byte of UTF-16 word
	305	bool isLSB; // whether we're decoding LSB or MSB of UTF-16 word
	306	};
	307
	308	DecoderState m_stateDecoder;
	309
	310
	311	// encoder state is simpler as we always receive entire Unicode characters
	312	// on input
5c69ef61	313	class EncoderState : private StateMode
9d653e81 VZ	314	{
	315	private:
	316	Mode mode;
	317
	318	public:
	319	EncoderState() { mode = Direct; }
	320
	321	void ToDirect() { mode = Direct; }
	322	void ToShifted() { mode = Shifted; accum = bit = 0; }
	323
	324	bool IsDirect() const { return mode == Direct; }
	325	bool IsShifted() const { return mode == Shifted; }
	326
	327	unsigned int accum;
	328	unsigned int bit;
	329	};
	330
	331	EncoderState m_stateEncoder;
6001e347 RR	332	};
6001e347 RR	333
e90c1d2a	334	// ----------------------------------------------------------------------------
6001e347	335	// wxMBConvUTF8 (for conversion using UTF8 encoding)
e90c1d2a	336	// ----------------------------------------------------------------------------
6001e347	337
0286d08d VZ	338	// this is the real UTF-8 conversion class, it has to be called "strict UTF-8"
	339	// for compatibility reasons: the wxMBConvUTF8 class below also supports lossy
	340	// conversions if it is created with non default options
	341	class WXDLLIMPEXP_BASE wxMBConvStrictUTF8 : public wxMBConv
	342	{
	343	public:
	344	// compiler-generated default ctor and other methods are ok
	345
	346	virtual size_t ToWChar(wchar_t *dst, size_t dstLen,
	347	const char *src, size_t srcLen = wxNO_LEN) const;
	348	virtual size_t FromWChar(char *dst, size_t dstLen,
	349	const wchar_t *src, size_t srcLen = wxNO_LEN) const;
	350
	351	virtual wxMBConv *Clone() const { return new wxMBConvStrictUTF8(); }
	352
	353	#if wxUSE_UNICODE_UTF8
	354	// NB: other mapping modes are not, strictly speaking, UTF-8, so we can't
	355	// take the shortcut in that case
	356	virtual bool IsUTF8() const { return true; }
	357	#endif
	358	};
	359
	360	class WXDLLIMPEXP_BASE wxMBConvUTF8 : public wxMBConvStrictUTF8
6001e347 RR	361	{
6001e347 RR	362	public:
d36c9347 VZ	363	enum
d36c9347 VZ	364	{
ea8ce907 RR	365	MAP_INVALID_UTF8_NOT = 0,
	366	MAP_INVALID_UTF8_TO_PUA = 1,
	367	MAP_INVALID_UTF8_TO_OCTAL = 2
	368	};
	369
	370	wxMBConvUTF8(int options = MAP_INVALID_UTF8_NOT) : m_options(options) { }
d16d0917 VZ	371
	372	virtual size_t ToWChar(wchar_t *dst, size_t dstLen,
	373	const char *src, size_t srcLen = wxNO_LEN) const;
	374	virtual size_t FromWChar(char *dst, size_t dstLen,
	375	const wchar_t *src, size_t srcLen = wxNO_LEN) const;
eec47cc6	376
d36c9347 VZ	377	virtual wxMBConv *Clone() const { return new wxMBConvUTF8(m_options); }
d36c9347 VZ	378
111d9948 VS	379	#if wxUSE_UNICODE_UTF8
	380	// NB: other mapping modes are not, strictly speaking, UTF-8, so we can't
	381	// take the shortcut in that case
	382	virtual bool IsUTF8() const { return m_options == MAP_INVALID_UTF8_NOT; }
	383	#endif
	384
ea8ce907 RR	385	private:
ea8ce907 RR	386	int m_options;
6001e347 RR	387	};
6001e347 RR	388
eec47cc6 VZ	389	// ----------------------------------------------------------------------------
	390	// wxMBConvUTF16Base: for both LE and BE variants
	391	// ----------------------------------------------------------------------------
	392
	393	class WXDLLIMPEXP_BASE wxMBConvUTF16Base : public wxMBConv
	394	{
7ef3ab50	395	public:
467e0479 VZ	396	enum { BYTES_PER_CHAR = 2 };
	397
	398	virtual size_t GetMBNulLen() const { return BYTES_PER_CHAR; }
	399
	400	protected:
	401	// return the length of the buffer using srcLen if it's not wxNO_LEN and
	402	// computing the length ourselves if it is; also checks that the length is
	403	// even if specified as we need an entire number of UTF-16 characters and
	404	// returns wxNO_LEN which indicates error if it is odd
	405	static size_t GetLength(const char *src, size_t srcLen);
eec47cc6 VZ	406	};
eec47cc6 VZ	407
e90c1d2a	408	// ----------------------------------------------------------------------------
c91830cb VZ	409	// wxMBConvUTF16LE (for conversion using UTF16 Little Endian encoding)
	410	// ----------------------------------------------------------------------------
	411
eec47cc6	412	class WXDLLIMPEXP_BASE wxMBConvUTF16LE : public wxMBConvUTF16Base
c91830cb VZ	413	{
c91830cb VZ	414	public:
467e0479 VZ	415	virtual size_t ToWChar(wchar_t *dst, size_t dstLen,
	416	const char *src, size_t srcLen = wxNO_LEN) const;
	417	virtual size_t FromWChar(char *dst, size_t dstLen,
	418	const wchar_t *src, size_t srcLen = wxNO_LEN) const;
d36c9347	419	virtual wxMBConv *Clone() const { return new wxMBConvUTF16LE; }
c91830cb VZ	420	};
	421
	422	// ----------------------------------------------------------------------------
	423	// wxMBConvUTF16BE (for conversion using UTF16 Big Endian encoding)
	424	// ----------------------------------------------------------------------------
	425
eec47cc6	426	class WXDLLIMPEXP_BASE wxMBConvUTF16BE : public wxMBConvUTF16Base
c91830cb VZ	427	{
c91830cb VZ	428	public:
467e0479 VZ	429	virtual size_t ToWChar(wchar_t *dst, size_t dstLen,
	430	const char *src, size_t srcLen = wxNO_LEN) const;
	431	virtual size_t FromWChar(char *dst, size_t dstLen,
	432	const wchar_t *src, size_t srcLen = wxNO_LEN) const;
d36c9347	433	virtual wxMBConv *Clone() const { return new wxMBConvUTF16BE; }
c91830cb VZ	434	};
c91830cb VZ	435
eec47cc6 VZ	436	// ----------------------------------------------------------------------------
	437	// wxMBConvUTF32Base: base class for both LE and BE variants
	438	// ----------------------------------------------------------------------------
	439
	440	class WXDLLIMPEXP_BASE wxMBConvUTF32Base : public wxMBConv
	441	{
7ef3ab50	442	public:
467e0479 VZ	443	enum { BYTES_PER_CHAR = 4 };
	444
	445	virtual size_t GetMBNulLen() const { return BYTES_PER_CHAR; }
	446
	447	protected:
	448	// this is similar to wxMBConvUTF16Base method with the same name except
	449	// that, of course, it verifies that length is divisible by 4 if given and
	450	// not by 2
	451	static size_t GetLength(const char *src, size_t srcLen);
eec47cc6 VZ	452	};
eec47cc6 VZ	453
c91830cb	454	// ----------------------------------------------------------------------------
8b9e1f43	455	// wxMBConvUTF32LE (for conversion using UTF32 Little Endian encoding)
c91830cb VZ	456	// ----------------------------------------------------------------------------
c91830cb VZ	457
eec47cc6	458	class WXDLLIMPEXP_BASE wxMBConvUTF32LE : public wxMBConvUTF32Base
c91830cb VZ	459	{
c91830cb VZ	460	public:
467e0479 VZ	461	virtual size_t ToWChar(wchar_t *dst, size_t dstLen,
	462	const char *src, size_t srcLen = wxNO_LEN) const;
	463	virtual size_t FromWChar(char *dst, size_t dstLen,
	464	const wchar_t *src, size_t srcLen = wxNO_LEN) const;
d36c9347	465	virtual wxMBConv *Clone() const { return new wxMBConvUTF32LE; }
c91830cb VZ	466	};
	467
	468	// ----------------------------------------------------------------------------
8b9e1f43	469	// wxMBConvUTF32BE (for conversion using UTF32 Big Endian encoding)
c91830cb VZ	470	// ----------------------------------------------------------------------------
c91830cb VZ	471
eec47cc6	472	class WXDLLIMPEXP_BASE wxMBConvUTF32BE : public wxMBConvUTF32Base
c91830cb VZ	473	{
c91830cb VZ	474	public:
467e0479 VZ	475	virtual size_t ToWChar(wchar_t *dst, size_t dstLen,
	476	const char *src, size_t srcLen = wxNO_LEN) const;
	477	virtual size_t FromWChar(char *dst, size_t dstLen,
	478	const wchar_t *src, size_t srcLen = wxNO_LEN) const;
d36c9347	479	virtual wxMBConv *Clone() const { return new wxMBConvUTF32BE; }
c91830cb VZ	480	};
	481
	482	// ----------------------------------------------------------------------------
e90c1d2a VZ	483	// wxCSConv (for conversion based on loadable char sets)
e90c1d2a VZ	484	// ----------------------------------------------------------------------------
6001e347	485
8b04d4c4 VZ	486	#include "wx/fontenc.h"
8b04d4c4 VZ	487
bddd7a8d	488	class WXDLLIMPEXP_BASE wxCSConv : public wxMBConv
6001e347	489	{
6001e347	490	public:
e95354ec VZ	491	// we can be created either from charset name or from an encoding constant
e95354ec VZ	492	// but we can't have both at once
86501081	493	wxCSConv(const wxString& charset);
8b04d4c4	494	wxCSConv(wxFontEncoding encoding);
e95354ec	495
54380f29	496	wxCSConv(const wxCSConv& conv);
e90c1d2a VZ	497	virtual ~wxCSConv();
e90c1d2a VZ	498
54380f29	499	wxCSConv& operator=(const wxCSConv& conv);
2b5f62a0	500
1c714a5d VZ	501	virtual size_t ToWChar(wchar_t *dst, size_t dstLen,
	502	const char *src, size_t srcLen = wxNO_LEN) const;
	503	virtual size_t FromWChar(char *dst, size_t dstLen,
	504	const wchar_t *src, size_t srcLen = wxNO_LEN) const;
7ef3ab50	505	virtual size_t GetMBNulLen() const;
1c714a5d	506
ba98e032 VS	507	#if wxUSE_UNICODE_UTF8
	508	virtual bool IsUTF8() const;
	509	#endif
	510
d36c9347	511	virtual wxMBConv Clone() const { return new wxCSConv(this); }
e90c1d2a	512
d36c9347	513	void Clear();
65e50848	514
a08a37d0	515	// return true if the conversion could be initialized successfully
0f0298b1	516	bool IsOk() const;
0f0298b1	517
e90c1d2a	518	private:
8b04d4c4 VZ	519	// common part of all ctors
	520	void Init();
	521
6c4d607e VZ	522	// Creates the conversion to use, called from all ctors to initialize
6c4d607e VZ	523	// m_convReal.
e95354ec VZ	524	wxMBConv *DoCreate() const;
e95354ec VZ	525
6c4d607e VZ	526	// Set the name (may be only called when m_name == NULL), makes copy of
6c4d607e VZ	527	// the charset string.
86501081	528	void SetName(const char *charset);
e90c1d2a	529
6c4d607e VZ	530	// Set m_encoding field respecting the rules below, i.e. making sure it has
	531	// a valid value if m_name == NULL (thus this should be always called after
	532	// SetName()).
a08a37d0	533	//
6c4d607e VZ	534	// Input encoding may be valid or not.
6c4d607e VZ	535	void SetEncoding(wxFontEncoding encoding);
a08a37d0	536
6c4d607e VZ	537
	538	// The encoding we use is specified by the two fields below:
	539	//
	540	// 1. If m_name != NULL, m_encoding corresponds to it if it's one of
	541	// encodings we know about (i.e. member of wxFontEncoding) or is
	542	// wxFONTENCODING_SYSTEM otherwise.
a08a37d0	543	//
6c4d607e VZ	544	// 2. If m_name == NULL, m_encoding is always valid, i.e. not one of
	545	// wxFONTENCODING_{SYSTEM,DEFAULT,MAX}.
	546	char *m_name;
8b04d4c4	547	wxFontEncoding m_encoding;
e95354ec	548
6c4d607e VZ	549	// The conversion object for our encoding or NULL if we failed to create it
6c4d607e VZ	550	// in which case we fall back to hard-coded ISO8859-1 conversion.
e95354ec	551	wxMBConv *m_convReal;
6001e347 RR	552	};
6001e347 RR	553
c3c1a9a9	554
f5a1953b VZ	555	// ----------------------------------------------------------------------------
	556	// declare predefined conversion objects
	557	// ----------------------------------------------------------------------------
d5c8817c	558
1e50d914 VS	559	// Note: this macro is an implementation detail (see the comment in
	560	// strconv.cpp). The wxGet_XXX() and wxGet_XXXPtr() functions shouldn't be
	561	// used by user code and neither should XXXPtr, use the wxConvXXX macro
	562	// instead.
	563	#define WX_DECLARE_GLOBAL_CONV(klass, name) \
	564	extern WXDLLIMPEXP_DATA_BASE(klass*) name##Ptr; \
092ee46f	565	extern WXDLLIMPEXP_BASE klass* wxGet_##name##Ptr(); \
1e50d914 VS	566	inline klass& wxGet_##name() \
	567	{ \
	568	if ( !name##Ptr ) \
	569	name##Ptr = wxGet_##name##Ptr(); \
	570	return *name##Ptr; \
	571	}
	572
	573
f5a1953b VZ	574	// conversion to be used with all standard functions affected by locale, e.g.
f5a1953b VZ	575	// strtol(), strftime(), ...
1e50d914 VS	576	WX_DECLARE_GLOBAL_CONV(wxMBConv, wxConvLibc)
1e50d914 VS	577	#define wxConvLibc wxGet_wxConvLibc()
f5a1953b VZ	578
f5a1953b VZ	579	// conversion ISO-8859-1/UTF-7/UTF-8 <-> wchar_t
1e50d914 VS	580	WX_DECLARE_GLOBAL_CONV(wxCSConv, wxConvISO8859_1)
	581	#define wxConvISO8859_1 wxGet_wxConvISO8859_1()
	582
0286d08d	583	WX_DECLARE_GLOBAL_CONV(wxMBConvStrictUTF8, wxConvUTF8)
1e50d914 VS	584	#define wxConvUTF8 wxGet_wxConvUTF8()
	585
	586	WX_DECLARE_GLOBAL_CONV(wxMBConvUTF7, wxConvUTF7)
	587	#define wxConvUTF7 wxGet_wxConvUTF7()
f5a1953b VZ	588
	589	// conversion used for the file names on the systems where they're not Unicode
	590	// (basically anything except Windows)
	591	//
	592	// this is used by all file functions, can be changed by the application
	593	//
	594	// by default UTF-8 under Mac OS X and wxConvLibc elsewhere (but it's not used
	595	// under Windows normally)
	596	extern WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName;
	597
	598	// backwards compatible define
	599	#define wxConvFile (*wxConvFileName)
	600
	601	// the current conversion object, may be set to any conversion, is used by
	602	// default in a couple of places inside wx (initially same as wxConvLibc)
16cba29d	603	extern WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent;
6001e347	604
0e052272	605	// the conversion corresponding to the current locale
1e50d914 VS	606	WX_DECLARE_GLOBAL_CONV(wxCSConv, wxConvLocal)
1e50d914 VS	607	#define wxConvLocal wxGet_wxConvLocal()
f5a1953b	608
d5bef0a3 VZ	609	// the conversion corresponding to the encoding of the standard UI elements
	610	//
	611	// by default this is the same as wxConvLocal but may be changed if the program
	612	// needs to use a fixed encoding
	613	extern WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvUI;
	614
1e50d914 VS	615	#undef WX_DECLARE_GLOBAL_CONV
1e50d914 VS	616
e95354ec VZ	617	// ----------------------------------------------------------------------------
	618	// endianness-dependent conversions
	619	// ----------------------------------------------------------------------------
	620
	621	#ifdef WORDS_BIGENDIAN
	622	typedef wxMBConvUTF16BE wxMBConvUTF16;
	623	typedef wxMBConvUTF32BE wxMBConvUTF32;
	624	#else
	625	typedef wxMBConvUTF16LE wxMBConvUTF16;
	626	typedef wxMBConvUTF32LE wxMBConvUTF32;
	627	#endif
	628
e90c1d2a	629	// ----------------------------------------------------------------------------
6001e347	630	// filename conversion macros
e90c1d2a	631	// ----------------------------------------------------------------------------
6001e347	632
bc4b4779	633	// filenames are multibyte on Unix and widechar on Windows
80df4d31	634	#if wxMBFILES && wxUSE_UNICODE
f5a1953b	635	#define wxFNCONV(name) wxConvFileName->cWX2MB(name)
e90c1d2a	636	#define wxFNSTRINGCAST wxMBSTRINGCAST
d5c8817c	637	#else
0b6a49c2	638	#if defined( __WXOSX_OR_COCOA__ ) && wxMBFILES
f5a1953b	639	#define wxFNCONV(name) wxConvFileName->cWC2MB( wxConvLocal.cWX2WC(name) )
6001e347	640	#else
e90c1d2a	641	#define wxFNCONV(name) name
d5c8817c	642	#endif
e90c1d2a	643	#define wxFNSTRINGCAST WXSTRINGCAST
6001e347 RR	644	#endif
6001e347 RR	645
e90c1d2a VZ	646	// ----------------------------------------------------------------------------
	647	// macros for the most common conversions
	648	// ----------------------------------------------------------------------------
	649
	650	#if wxUSE_UNICODE
	651	#define wxConvertWX2MB(s) wxConvCurrent->cWX2MB(s)
	652	#define wxConvertMB2WX(s) wxConvCurrent->cMB2WX(s)
69c928ef VZ	653
	654	// these functions should be used when the conversions really, really have
	655	// to succeed (usually because we pass their results to a standard C
	656	// function which would crash if we passed NULL to it), so these functions
	657	// always return a valid pointer if their argument is non-NULL
	658
	659	// this function safety is achieved by trying wxConvLibc first, wxConvUTF8
	660	// next if it fails and, finally, wxConvISO8859_1 which always succeeds
	661	extern WXDLLIMPEXP_BASE wxWCharBuffer wxSafeConvertMB2WX(const char *s);
	662
	663	// this function uses wxConvLibc and wxConvUTF8(MAP_INVALID_UTF8_TO_OCTAL)
	664	// if it fails
	665	extern WXDLLIMPEXP_BASE wxCharBuffer wxSafeConvertWX2MB(const wchar_t *ws);
e90c1d2a VZ	666	#else // ANSI
	667	// no conversions to do
	668	#define wxConvertWX2MB(s) (s)
	669	#define wxConvertMB2WX(s) (s)
69c928ef VZ	670	#define wxSafeConvertMB2WX(s) (s)
69c928ef VZ	671	#define wxSafeConvertWX2MB(s) (s)
e90c1d2a VZ	672	#endif // Unicode/ANSI
e90c1d2a VZ	673
d36c9347	674	#endif // _WX_STRCONV_H_
6001e347	675