[wxWidgets.git] / interface / encconv.h

/////////////////////////////////////////////////////////////////////////////
// Name:        encconv.h
// Purpose:     interface of wxEncodingConverter
// Author:      wxWidgets team
// RCS-ID:      $Id$
// Licence:     wxWindows license
/////////////////////////////////////////////////////////////////////////////

/**
    @class wxEncodingConverter
    @wxheader{encconv.h}

    This class is capable of converting strings between two
    8-bit encodings/charsets. It can also convert from/to Unicode (but only
    if you compiled wxWidgets with wxUSE_WCHAR_T set to 1). Only a limited subset
    of encodings is supported by wxEncodingConverter:
    @c wxFONTENCODING_ISO8859_1..15, @c wxFONTENCODING_CP1250..1257 and
    @c wxFONTENCODING_KOI8.

    @library{wxbase}
    @category{misc}

    @see wxFontMapper, wxMBConv, @ref overview_nonenglishoverview "Writing
    non-English applications"
*/
class wxEncodingConverter : public wxObject
{
public:
    /**
        Constructor.
    */
    wxEncodingConverter();

    /**
        Return @true if (any text in) multibyte encoding @a encIn can be converted to
        another one (@e encOut) losslessly.
        Do not call this method with @c wxFONTENCODING_UNICODE as either
        parameter, it doesn't make sense (always works in one sense and always depends
        on the text to convert in the other).
    */
    static bool CanConvert(wxFontEncoding encIn,
                           wxFontEncoding encOut);

    //@{
    /**
        Convert wxString and return new wxString object.
    */
    bool Convert(const char* input, char* output) const;
    const bool Convert(const wchar_t* input, wchar_t* output) const;
    const bool Convert(const char* input, wchar_t* output) const;
    const bool Convert(const wchar_t* input, char* output) const;
    const bool Convert(char* str) const;
    const bool Convert(wchar_t* str) const;
    const wxString  Convert(const wxString& input) const;
    //@}

    /**
        Similar to
        GetPlatformEquivalents(),
        but this one will return ALL
        equivalent encodings, regardless of the platform, and including itself.
        This platform's encodings are before others in the array. And again, if @a enc
        is in the array,
        it is the very first item in it.
    */
    static wxFontEncodingArray GetAllEquivalents(wxFontEncoding enc);

    /**
        Return equivalents for given font that are used
        under given platform. Supported platforms:
         wxPLATFORM_UNIX
         wxPLATFORM_WINDOWS
         wxPLATFORM_OS2
         wxPLATFORM_MAC
         wxPLATFORM_CURRENT
        wxPLATFORM_CURRENT means the platform this binary was compiled for.
        Examples:

        Equivalence is defined in terms of convertibility:
        two encodings are equivalent if you can convert text between
        then without losing information (it may - and will - happen
        that you lose special chars like quotation marks or em-dashes
        but you shouldn't lose any diacritics and language-specific
        characters when converting between equivalent encodings).
        Remember that this function does @b NOT check for presence of
        fonts in system. It only tells you what are most suitable
        encodings. (It usually returns only one encoding.)
    */
    static wxFontEncodingArray GetPlatformEquivalents(wxFontEncoding enc,
            int platform = wxPLATFORM_CURRENT);

    /**
        Initialize conversion. Both output or input encoding may
        be wxFONTENCODING_UNICODE, but only if wxUSE_ENCODING is set to 1.
        All subsequent calls to Convert()
        will interpret its argument
        as a string in @a input_enc encoding and will output string in
        @a output_enc encoding.
        You must call this method before calling Convert. You may call
        it more than once in order to switch to another conversion.
        @e Method affects behaviour of Convert() in case input character
        cannot be converted because it does not exist in output encoding:

        @b wxCONVERT_STRICT

        follow behaviour of GNU Recode -
        just copy unconvertible  characters to output and don't change them
        (its integer value will stay the same)

        @b wxCONVERT_SUBSTITUTE

        try some (lossy) substitutions
        - e.g. replace unconvertible latin capitals with acute by ordinary
        capitals, replace en-dash or em-dash by '-' etc.

        Both modes guarantee that output string will have same length
        as input string.
    */
    bool Init(wxFontEncoding input_enc, wxFontEncoding output_enc,
              int method = wxCONVERT_STRICT);
};
Commit	Line	Data
23324ae1 FM	1	/////////////////////////////////////////////////////////////////////////////
23324ae1 FM	2	// Name: encconv.h
e54c96f1	3	// Purpose: interface of wxEncodingConverter
23324ae1 FM	4	// Author: wxWidgets team
	5	// RCS-ID: $Id$
	6	// Licence: wxWindows license
	7	/////////////////////////////////////////////////////////////////////////////
	8
	9	/**
	10	@class wxEncodingConverter
	11	@wxheader{encconv.h}
7c913512	12
23324ae1 FM	13	This class is capable of converting strings between two
	14	8-bit encodings/charsets. It can also convert from/to Unicode (but only
	15	if you compiled wxWidgets with wxUSE_WCHAR_T set to 1). Only a limited subset
	16	of encodings is supported by wxEncodingConverter:
7c913512	17	@c wxFONTENCODING_ISO8859_1..15, @c wxFONTENCODING_CP1250..1257 and
23324ae1	18	@c wxFONTENCODING_KOI8.
7c913512	19
23324ae1 FM	20	@library{wxbase}
23324ae1 FM	21	@category{misc}
7c913512	22
e54c96f1 FM	23	@see wxFontMapper, wxMBConv, @ref overview_nonenglishoverview "Writing
e54c96f1 FM	24	non-English applications"
23324ae1 FM	25	*/
	26	class wxEncodingConverter : public wxObject
	27	{
	28	public:
	29	/**
	30	Constructor.
	31	*/
	32	wxEncodingConverter();
	33
	34	/**
4cc4bfaf	35	Return @true if (any text in) multibyte encoding @a encIn can be converted to
23324ae1	36	another one (@e encOut) losslessly.
23324ae1 FM	37	Do not call this method with @c wxFONTENCODING_UNICODE as either
	38	parameter, it doesn't make sense (always works in one sense and always depends
	39	on the text to convert in the other).
	40	*/
	41	static bool CanConvert(wxFontEncoding encIn,
	42	wxFontEncoding encOut);
	43
	44	//@{
	45	/**
	46	Convert wxString and return new wxString object.
	47	*/
328f5751 FM	48	bool Convert(const char* input, char* output) const;
	49	const bool Convert(const wchar_t* input, wchar_t* output) const;
	50	const bool Convert(const char* input, wchar_t* output) const;
	51	const bool Convert(const wchar_t* input, char* output) const;
	52	const bool Convert(char* str) const;
	53	const bool Convert(wchar_t* str) const;
	54	const wxString Convert(const wxString& input) const;
23324ae1 FM	55	//@}
	56
	57	/**
7c913512 FM	58	Similar to
	59	GetPlatformEquivalents(),
	60	but this one will return ALL
23324ae1	61	equivalent encodings, regardless of the platform, and including itself.
4cc4bfaf	62	This platform's encodings are before others in the array. And again, if @a enc
23324ae1 FM	63	is in the array,
	64	it is the very first item in it.
	65	*/
	66	static wxFontEncodingArray GetAllEquivalents(wxFontEncoding enc);
	67
	68	/**
	69	Return equivalents for given font that are used
	70	under given platform. Supported platforms:
23324ae1 FM	71	wxPLATFORM_UNIX
	72	wxPLATFORM_WINDOWS
	73	wxPLATFORM_OS2
	74	wxPLATFORM_MAC
	75	wxPLATFORM_CURRENT
23324ae1	76	wxPLATFORM_CURRENT means the platform this binary was compiled for.
23324ae1	77	Examples:
3c4f71cc	78
23324ae1 FM	79	Equivalence is defined in terms of convertibility:
	80	two encodings are equivalent if you can convert text between
	81	then without losing information (it may - and will - happen
	82	that you lose special chars like quotation marks or em-dashes
	83	but you shouldn't lose any diacritics and language-specific
	84	characters when converting between equivalent encodings).
23324ae1 FM	85	Remember that this function does @b NOT check for presence of
	86	fonts in system. It only tells you what are most suitable
	87	encodings. (It usually returns only one encoding.)
	88	*/
	89	static wxFontEncodingArray GetPlatformEquivalents(wxFontEncoding enc,
7c913512	90	int platform = wxPLATFORM_CURRENT);
23324ae1 FM	91
	92	/**
	93	Initialize conversion. Both output or input encoding may
	94	be wxFONTENCODING_UNICODE, but only if wxUSE_ENCODING is set to 1.
7c913512	95	All subsequent calls to Convert()
23324ae1	96	will interpret its argument
4cc4bfaf FM	97	as a string in @a input_enc encoding and will output string in
4cc4bfaf FM	98	@a output_enc encoding.
7c913512	99	You must call this method before calling Convert. You may call
23324ae1 FM	100	it more than once in order to switch to another conversion.
	101	@e Method affects behaviour of Convert() in case input character
	102	cannot be converted because it does not exist in output encoding:
3c4f71cc	103
23324ae1	104	@b wxCONVERT_STRICT
3c4f71cc	105
23324ae1	106	follow behaviour of GNU Recode -
7c913512	107	just copy unconvertible characters to output and don't change them
23324ae1	108	(its integer value will stay the same)
3c4f71cc	109
23324ae1	110	@b wxCONVERT_SUBSTITUTE
3c4f71cc	111
7c913512	112	try some (lossy) substitutions
23324ae1 FM	113	- e.g. replace unconvertible latin capitals with acute by ordinary
23324ae1 FM	114	capitals, replace en-dash or em-dash by '-' etc.
3c4f71cc	115
23324ae1 FM	116	Both modes guarantee that output string will have same length
	117	as input string.
	118	*/
	119	bool Init(wxFontEncoding input_enc, wxFontEncoding output_enc,
	120	int method = wxCONVERT_STRICT);
	121	};
e54c96f1	122