git.saurik.com Git - wxWidgets.git/blame_incremental

... / ...

Commit	Line	Data
	1	///////////////////////////////////////////////////////////////////////////////
	2	// Name: strconv.h
	3	// Purpose: conversion routines for char sets any Unicode
	4	// Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin
	5	// Modified by:
	6	// Created: 29/01/98
	7	// RCS-ID: $Id$
	8	// Copyright: (c) 1998 Ove Kaaven, Robert Roebling
	9	// (c) 1998-2006 Vadim Zeitlin
	10	// Licence: wxWindows licence
	11	///////////////////////////////////////////////////////////////////////////////
	12
	13	#ifndef _WX_STRCONV_H_
	14	#define _WX_STRCONV_H_
	15
	16	#include "wx/defs.h"
	17	#include "wx/wxchar.h"
	18	#include "wx/buffer.h"
	19
	20	#ifdef __DIGITALMARS__
	21	#include "typeinfo.h"
	22	#endif
	23
	24	#if defined(__VISAGECPP__) && __IBMCPP__ >= 400
	25	# undef __BSEXCPT__
	26	#endif
	27
	28	#include <stdlib.h>
	29
	30	#if wxUSE_WCHAR_T
	31
	32	// the error value returned by wxMBConv methods
	33	#define wxCONV_FAILED ((size_t)-1)
	34
	35	// the default value for some length parameters meaning that the string is
	36	// NUL-terminated
	37	#define wxNO_LEN ((size_t)-1)
	38
	39	// ----------------------------------------------------------------------------
	40	// wxMBConv (abstract base class for conversions)
	41	// ----------------------------------------------------------------------------
	42
	43	// When deriving a new class from wxMBConv you must reimplement ToWChar() and
	44	// FromWChar() methods which are not pure virtual only for historical reasons,
	45	// don't let the fact that the existing classes implement MB2WC/WC2MB() instead
	46	// confuse you.
	47	//
	48	// You also have to implement Clone() to allow copying the conversions
	49	// polymorphically.
	50	//
	51	// And you might need to override GetMBNulLen() as well.
	52	class WXDLLIMPEXP_BASE wxMBConv
	53	{
	54	public:
	55	// The functions doing actual conversion from/to narrow to/from wide
	56	// character strings.
	57	//
	58	// On success, the return value is the length (i.e. the number of
	59	// characters, not bytes) of the converted string including any trailing
	60	// L'\0' or (possibly multiple) '\0'(s). If the conversion fails or if
	61	// there is not enough space for everything, including the trailing NUL
	62	// character(s), in the output buffer, wxCONV_FAILED is returned.
	63	//
	64	// In the special case when dstLen is 0 (outputBuf may be NULL then) the
	65	// return value is the length of the needed buffer but nothing happens
	66	// otherwise. If srcLen is wxNO_LEN, the entire string, up to and
	67	// including the trailing NUL(s), is converted, otherwise exactly srcLen
	68	// bytes are.
	69	//
	70	// Typical usage:
	71	//
	72	// size_t dstLen = conv.ToWChar(NULL, 0, src);
	73	// if ( dstLen != wxCONV_FAILED )
	74	// ... handle error ...
	75	// wchar_t *wbuf = new wchar_t[dstLen];
	76	// conv.ToWChar(wbuf, dstLen, src);
	77	//
	78	virtual size_t ToWChar(wchar_t *dst, size_t dstLen,
	79	const char *src, size_t srcLen = wxNO_LEN) const;
	80
	81	virtual size_t FromWChar(char *dst, size_t dstLen,
	82	const wchar_t *src, size_t srcLen = wxNO_LEN) const;
	83
	84
	85	// Convenience functions for translating NUL-terminated strings: returns
	86	// the buffer containing the converted string or NULL pointer if the
	87	// conversion failed.
	88	const wxWCharBuffer cMB2WC(const char *in) const;
	89	const wxCharBuffer cWC2MB(const wchar_t *in) const;
	90
	91	// Convenience functions for converting strings which may contain embedded
	92	// NULs and don't have to be NUL-terminated.
	93	//
	94	// inLen is the length of the buffer including trailing NUL if any: if the
	95	// last 4 bytes of the buffer are all NULs, these functions are more
	96	// efficient as they avoid copying the string, but otherwise a copy is made
	97	// internally which could be quite bad for (very) long strings.
	98	//
	99	// outLen receives, if not NULL, the length of the converted string or 0 if
	100	// the conversion failed (returning 0 and not -1 in this case makes it
	101	// difficult to distinguish between failed conversion and empty input but
	102	// this is done for backwards compatibility)
	103	const wxWCharBuffer
	104	cMB2WC(const char in, size_t inLen, size_t outLen) const;
	105	const wxCharBuffer
	106	cWC2MB(const wchar_t in, size_t inLen, size_t outLen) const;
	107
	108	// convenience functions for converting MB or WC to/from wxWin default
	109	#if wxUSE_UNICODE
	110	const wxWCharBuffer cMB2WX(const char *psz) const { return cMB2WC(psz); }
	111	const wxCharBuffer cWX2MB(const wchar_t *psz) const { return cWC2MB(psz); }
	112	const wchar_t* cWC2WX(const wchar_t *psz) const { return psz; }
	113	const wchar_t* cWX2WC(const wchar_t *psz) const { return psz; }
	114	#else // ANSI
	115	const char* cMB2WX(const char *psz) const { return psz; }
	116	const char* cWX2MB(const char *psz) const { return psz; }
	117	const wxCharBuffer cWC2WX(const wchar_t *psz) const { return cWC2MB(psz); }
	118	const wxWCharBuffer cWX2WC(const char *psz) const { return cMB2WC(psz); }
	119	#endif // Unicode/ANSI
	120
	121	// this function is used in the implementation of cMB2WC() to distinguish
	122	// between the following cases:
	123	//
	124	// a) var width encoding with strings terminated by a single NUL
	125	// (usual multibyte encodings): return 1 in this case
	126	// b) fixed width encoding with 2 bytes/char and so terminated by
	127	// 2 NULs (UTF-16/UCS-2 and variants): return 2 in this case
	128	// c) fixed width encoding with 4 bytes/char and so terminated by
	129	// 4 NULs (UTF-32/UCS-4 and variants): return 4 in this case
	130	//
	131	// anything else is not supported currently and -1 should be returned
	132	virtual size_t GetMBNulLen() const { return 1; }
	133
	134	// return the maximal value currently returned by GetMBNulLen() for any
	135	// encoding
	136	static size_t GetMaxMBNulLen() { return 4 /* for UTF-32 */; }
	137
	138
	139	// The old conversion functions. The existing classes currently mostly
	140	// implement these ones but we're in transition to using To/FromWChar()
	141	// instead and any new classes should implement just the new functions.
	142	// For now, however, we provide default implementation of To/FromWChar() in
	143	// this base class in terms of MB2WC/WC2MB() to avoid having to rewrite all
	144	// the conversions at once.
	145	//
	146	// On success, the return value is the length (i.e. the number of
	147	// characters, not bytes) not counting the trailing NUL(s) of the converted
	148	// string. On failure, (size_t)-1 is returned. In the special case when
	149	// outputBuf is NULL the return value is the same one but nothing is
	150	// written to the buffer.
	151	//
	152	// Note that outLen is the length of the output buffer, not the length of
	153	// the input (which is always supposed to be terminated by one or more
	154	// NULs, as appropriate for the encoding)!
	155	virtual size_t MB2WC(wchar_t out, const char in, size_t outLen) const;
	156	virtual size_t WC2MB(char out, const wchar_t in, size_t outLen) const;
	157
	158
	159	// make a heap-allocated copy of this object
	160	virtual wxMBConv *Clone() const = 0;
	161
	162	// virtual dtor for any base class
	163	virtual ~wxMBConv();
	164	};
	165
	166	// ----------------------------------------------------------------------------
	167	// wxMBConvLibc uses standard mbstowcs() and wcstombs() functions for
	168	// conversion (hence it depends on the current locale)
	169	// ----------------------------------------------------------------------------
	170
	171	class WXDLLIMPEXP_BASE wxMBConvLibc : public wxMBConv
	172	{
	173	public:
	174	virtual size_t MB2WC(wchar_t outputBuf, const char psz, size_t outputSize) const;
	175	virtual size_t WC2MB(char outputBuf, const wchar_t psz, size_t outputSize) const;
	176
	177	virtual wxMBConv *Clone() const { return new wxMBConvLibc; }
	178	};
	179
	180	#ifdef __UNIX__
	181
	182	// ----------------------------------------------------------------------------
	183	// wxConvBrokenFileNames is made for Unix in Unicode mode when
	184	// files are accidentally written in an encoding which is not
	185	// the system encoding. Typically, the system encoding will be
	186	// UTF8 but there might be files stored in ISO8859-1 on disk.
	187	// ----------------------------------------------------------------------------
	188
	189	class WXDLLIMPEXP_BASE wxConvBrokenFileNames : public wxMBConv
	190	{
	191	public:
	192	wxConvBrokenFileNames(const wxChar *charset);
	193	wxConvBrokenFileNames(const wxConvBrokenFileNames& conv)
	194	: wxMBConv(),
	195	m_conv(conv.m_conv ? conv.m_conv->Clone() : NULL)
	196	{
	197	}
	198	virtual ~wxConvBrokenFileNames() { delete m_conv; }
	199
	200	virtual size_t MB2WC(wchar_t out, const char in, size_t outLen) const
	201	{
	202	return m_conv->MB2WC(out, in, outLen);
	203	}
	204
	205	virtual size_t WC2MB(char out, const wchar_t in, size_t outLen) const
	206	{
	207	return m_conv->WC2MB(out, in, outLen);
	208	}
	209
	210	virtual size_t GetMBNulLen() const
	211	{
	212	// cast needed to call a private function
	213	return m_conv->GetMBNulLen();
	214	}
	215
	216	virtual wxMBConv Clone() const { return new wxConvBrokenFileNames(this); }
	217
	218	private:
	219	// the conversion object we forward to
	220	wxMBConv *m_conv;
	221
	222	DECLARE_NO_ASSIGN_CLASS(wxConvBrokenFileNames)
	223	};
	224
	225	#endif // __UNIX__
	226
	227	// ----------------------------------------------------------------------------
	228	// wxMBConvUTF7 (for conversion using UTF7 encoding)
	229	// ----------------------------------------------------------------------------
	230
	231	class WXDLLIMPEXP_BASE wxMBConvUTF7 : public wxMBConv
	232	{
	233	public:
	234	virtual size_t MB2WC(wchar_t outputBuf, const char psz, size_t outputSize) const;
	235	virtual size_t WC2MB(char outputBuf, const wchar_t psz, size_t outputSize) const;
	236
	237	virtual wxMBConv *Clone() const { return new wxMBConvUTF7; }
	238	};
	239
	240	// ----------------------------------------------------------------------------
	241	// wxMBConvUTF8 (for conversion using UTF8 encoding)
	242	// ----------------------------------------------------------------------------
	243
	244	class WXDLLIMPEXP_BASE wxMBConvUTF8 : public wxMBConv
	245	{
	246	public:
	247	enum
	248	{
	249	MAP_INVALID_UTF8_NOT = 0,
	250	MAP_INVALID_UTF8_TO_PUA = 1,
	251	MAP_INVALID_UTF8_TO_OCTAL = 2
	252	};
	253
	254	wxMBConvUTF8(int options = MAP_INVALID_UTF8_NOT) : m_options(options) { }
	255	virtual size_t MB2WC(wchar_t outputBuf, const char psz, size_t outputSize) const;
	256	virtual size_t WC2MB(char outputBuf, const wchar_t psz, size_t outputSize) const;
	257
	258	virtual wxMBConv *Clone() const { return new wxMBConvUTF8(m_options); }
	259
	260	private:
	261	int m_options;
	262	};
	263
	264	// ----------------------------------------------------------------------------
	265	// wxMBConvUTF16Base: for both LE and BE variants
	266	// ----------------------------------------------------------------------------
	267
	268	class WXDLLIMPEXP_BASE wxMBConvUTF16Base : public wxMBConv
	269	{
	270	public:
	271	enum { BYTES_PER_CHAR = 2 };
	272
	273	virtual size_t GetMBNulLen() const { return BYTES_PER_CHAR; }
	274
	275	protected:
	276	// return the length of the buffer using srcLen if it's not wxNO_LEN and
	277	// computing the length ourselves if it is; also checks that the length is
	278	// even if specified as we need an entire number of UTF-16 characters and
	279	// returns wxNO_LEN which indicates error if it is odd
	280	static size_t GetLength(const char *src, size_t srcLen);
	281	};
	282
	283	// ----------------------------------------------------------------------------
	284	// wxMBConvUTF16LE (for conversion using UTF16 Little Endian encoding)
	285	// ----------------------------------------------------------------------------
	286
	287	class WXDLLIMPEXP_BASE wxMBConvUTF16LE : public wxMBConvUTF16Base
	288	{
	289	public:
	290	virtual size_t ToWChar(wchar_t *dst, size_t dstLen,
	291	const char *src, size_t srcLen = wxNO_LEN) const;
	292	virtual size_t FromWChar(char *dst, size_t dstLen,
	293	const wchar_t *src, size_t srcLen = wxNO_LEN) const;
	294	virtual wxMBConv *Clone() const { return new wxMBConvUTF16LE; }
	295	};
	296
	297	// ----------------------------------------------------------------------------
	298	// wxMBConvUTF16BE (for conversion using UTF16 Big Endian encoding)
	299	// ----------------------------------------------------------------------------
	300
	301	class WXDLLIMPEXP_BASE wxMBConvUTF16BE : public wxMBConvUTF16Base
	302	{
	303	public:
	304	virtual size_t ToWChar(wchar_t *dst, size_t dstLen,
	305	const char *src, size_t srcLen = wxNO_LEN) const;
	306	virtual size_t FromWChar(char *dst, size_t dstLen,
	307	const wchar_t *src, size_t srcLen = wxNO_LEN) const;
	308	virtual wxMBConv *Clone() const { return new wxMBConvUTF16BE; }
	309	};
	310
	311	// ----------------------------------------------------------------------------
	312	// wxMBConvUTF32Base: base class for both LE and BE variants
	313	// ----------------------------------------------------------------------------
	314
	315	class WXDLLIMPEXP_BASE wxMBConvUTF32Base : public wxMBConv
	316	{
	317	public:
	318	enum { BYTES_PER_CHAR = 4 };
	319
	320	virtual size_t GetMBNulLen() const { return BYTES_PER_CHAR; }
	321
	322	protected:
	323	// this is similar to wxMBConvUTF16Base method with the same name except
	324	// that, of course, it verifies that length is divisible by 4 if given and
	325	// not by 2
	326	static size_t GetLength(const char *src, size_t srcLen);
	327	};
	328
	329	// ----------------------------------------------------------------------------
	330	// wxMBConvUTF32LE (for conversion using UTF32 Little Endian encoding)
	331	// ----------------------------------------------------------------------------
	332
	333	class WXDLLIMPEXP_BASE wxMBConvUTF32LE : public wxMBConvUTF32Base
	334	{
	335	public:
	336	virtual size_t ToWChar(wchar_t *dst, size_t dstLen,
	337	const char *src, size_t srcLen = wxNO_LEN) const;
	338	virtual size_t FromWChar(char *dst, size_t dstLen,
	339	const wchar_t *src, size_t srcLen = wxNO_LEN) const;
	340	virtual wxMBConv *Clone() const { return new wxMBConvUTF32LE; }
	341	};
	342
	343	// ----------------------------------------------------------------------------
	344	// wxMBConvUTF32BE (for conversion using UTF32 Big Endian encoding)
	345	// ----------------------------------------------------------------------------
	346
	347	class WXDLLIMPEXP_BASE wxMBConvUTF32BE : public wxMBConvUTF32Base
	348	{
	349	public:
	350	virtual size_t ToWChar(wchar_t *dst, size_t dstLen,
	351	const char *src, size_t srcLen = wxNO_LEN) const;
	352	virtual size_t FromWChar(char *dst, size_t dstLen,
	353	const wchar_t *src, size_t srcLen = wxNO_LEN) const;
	354	virtual wxMBConv *Clone() const { return new wxMBConvUTF32BE; }
	355	};
	356
	357	// ----------------------------------------------------------------------------
	358	// wxCSConv (for conversion based on loadable char sets)
	359	// ----------------------------------------------------------------------------
	360
	361	#include "wx/fontenc.h"
	362
	363	class WXDLLIMPEXP_BASE wxCSConv : public wxMBConv
	364	{
	365	public:
	366	// we can be created either from charset name or from an encoding constant
	367	// but we can't have both at once
	368	wxCSConv(const wxChar *charset);
	369	wxCSConv(wxFontEncoding encoding);
	370
	371	wxCSConv(const wxCSConv& conv);
	372	virtual ~wxCSConv();
	373
	374	wxCSConv& operator=(const wxCSConv& conv);
	375
	376	virtual size_t ToWChar(wchar_t *dst, size_t dstLen,
	377	const char *src, size_t srcLen = wxNO_LEN) const;
	378	virtual size_t FromWChar(char *dst, size_t dstLen,
	379	const wchar_t *src, size_t srcLen = wxNO_LEN) const;
	380	virtual size_t MB2WC(wchar_t outputBuf, const char psz, size_t outputSize) const;
	381	virtual size_t WC2MB(char outputBuf, const wchar_t psz, size_t outputSize) const;
	382	virtual size_t GetMBNulLen() const;
	383
	384	virtual wxMBConv Clone() const { return new wxCSConv(this); }
	385
	386	void Clear();
	387
	388	// return true if the conversion could be initilized successfully
	389	bool IsOk() const;
	390
	391	private:
	392	// common part of all ctors
	393	void Init();
	394
	395	// creates m_convReal if necessary
	396	void CreateConvIfNeeded() const;
	397
	398	// do create m_convReal (unconditionally)
	399	wxMBConv *DoCreate() const;
	400
	401	// set the name (may be only called when m_name == NULL), makes copy of
	402	// charset string
	403	void SetName(const wxChar *charset);
	404
	405
	406	// note that we can't use wxString here because of compilation
	407	// dependencies: we're included from wx/string.h
	408	wxChar *m_name;
	409	wxFontEncoding m_encoding;
	410
	411	// use CreateConvIfNeeded() before accessing m_convReal!
	412	wxMBConv *m_convReal;
	413	bool m_deferred;
	414	};
	415
	416
	417	// ----------------------------------------------------------------------------
	418	// declare predefined conversion objects
	419	// ----------------------------------------------------------------------------
	420
	421	// conversion to be used with all standard functions affected by locale, e.g.
	422	// strtol(), strftime(), ...
	423	extern WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc;
	424
	425	// conversion ISO-8859-1/UTF-7/UTF-8 <-> wchar_t
	426	extern WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1;
	427	extern WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7;
	428	extern WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8;
	429
	430	// conversion used for the file names on the systems where they're not Unicode
	431	// (basically anything except Windows)
	432	//
	433	// this is used by all file functions, can be changed by the application
	434	//
	435	// by default UTF-8 under Mac OS X and wxConvLibc elsewhere (but it's not used
	436	// under Windows normally)
	437	extern WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName;
	438
	439	// backwards compatible define
	440	#define wxConvFile (*wxConvFileName)
	441
	442	// the current conversion object, may be set to any conversion, is used by
	443	// default in a couple of places inside wx (initially same as wxConvLibc)
	444	extern WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent;
	445
	446	// the conversion corresponding to the current locale
	447	extern WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal;
	448
	449	// the conversion corresponding to the encoding of the standard UI elements
	450	//
	451	// by default this is the same as wxConvLocal but may be changed if the program
	452	// needs to use a fixed encoding
	453	extern WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvUI;
	454
	455	// ----------------------------------------------------------------------------
	456	// endianness-dependent conversions
	457	// ----------------------------------------------------------------------------
	458
	459	#ifdef WORDS_BIGENDIAN
	460	typedef wxMBConvUTF16BE wxMBConvUTF16;
	461	typedef wxMBConvUTF32BE wxMBConvUTF32;
	462	#else
	463	typedef wxMBConvUTF16LE wxMBConvUTF16;
	464	typedef wxMBConvUTF32LE wxMBConvUTF32;
	465	#endif
	466
	467	// ----------------------------------------------------------------------------
	468	// filename conversion macros
	469	// ----------------------------------------------------------------------------
	470
	471	// filenames are multibyte on Unix and widechar on Windows
	472	#if defined(__UNIX__) \|\| defined(__WXMAC__)
	473	#define wxMBFILES 1
	474	#else
	475	#define wxMBFILES 0
	476	#endif
	477
	478	#if wxMBFILES && wxUSE_UNICODE
	479	#define wxFNCONV(name) wxConvFileName->cWX2MB(name)
	480	#define wxFNSTRINGCAST wxMBSTRINGCAST
	481	#else
	482	#if defined( __WXOSX__ ) && wxMBFILES
	483	#define wxFNCONV(name) wxConvFileName->cWC2MB( wxConvLocal.cWX2WC(name) )
	484	#else
	485	#define wxFNCONV(name) name
	486	#endif
	487	#define wxFNSTRINGCAST WXSTRINGCAST
	488	#endif
	489
	490	#else // !wxUSE_WCHAR_T
	491
	492	// ----------------------------------------------------------------------------
	493	// stand-ins in absence of wchar_t
	494	// ----------------------------------------------------------------------------
	495
	496	class WXDLLIMPEXP_BASE wxMBConv
	497	{
	498	public:
	499	const char* cMB2WX(const char *psz) const { return psz; }
	500	const char* cWX2MB(const char *psz) const { return psz; }
	501	};
	502
	503	#define wxConvFile wxConvLocal
	504
	505	extern WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
	506	wxConvLocal,
	507	wxConvISO8859_1,
	508	wxConvUTF8;
	509	extern WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent;
	510
	511	#define wxFNCONV(name) name
	512	#define wxFNSTRINGCAST WXSTRINGCAST
	513
	514	#endif
	515	// wxUSE_WCHAR_T
	516
	517	// ----------------------------------------------------------------------------
	518	// macros for the most common conversions
	519	// ----------------------------------------------------------------------------
	520
	521	#if wxUSE_UNICODE
	522	#define wxConvertWX2MB(s) wxConvCurrent->cWX2MB(s)
	523	#define wxConvertMB2WX(s) wxConvCurrent->cMB2WX(s)
	524
	525	// these functions should be used when the conversions really, really have
	526	// to succeed (usually because we pass their results to a standard C
	527	// function which would crash if we passed NULL to it), so these functions
	528	// always return a valid pointer if their argument is non-NULL
	529
	530	// this function safety is achieved by trying wxConvLibc first, wxConvUTF8
	531	// next if it fails and, finally, wxConvISO8859_1 which always succeeds
	532	extern WXDLLIMPEXP_BASE wxWCharBuffer wxSafeConvertMB2WX(const char *s);
	533
	534	// this function uses wxConvLibc and wxConvUTF8(MAP_INVALID_UTF8_TO_OCTAL)
	535	// if it fails
	536	extern WXDLLIMPEXP_BASE wxCharBuffer wxSafeConvertWX2MB(const wchar_t *ws);
	537	#else // ANSI
	538	// no conversions to do
	539	#define wxConvertWX2MB(s) (s)
	540	#define wxConvertMB2WX(s) (s)
	541	#define wxSafeConvertMB2WX(s) (s)
	542	#define wxSafeConvertWX2MB(s) (s)
	543	#endif // Unicode/ANSI
	544
	545	#endif // _WX_STRCONV_H_
	546