]> git.saurik.com Git - wxWidgets.git/blame - interface/wx/encconv.h
Document ctors creating a wxString from repeated characters.
[wxWidgets.git] / interface / wx / encconv.h
CommitLineData
23324ae1
FM
1/////////////////////////////////////////////////////////////////////////////
2// Name: encconv.h
e54c96f1 3// Purpose: interface of wxEncodingConverter
23324ae1
FM
4// Author: wxWidgets team
5// RCS-ID: $Id$
6// Licence: wxWindows license
7/////////////////////////////////////////////////////////////////////////////
8
9/**
10 @class wxEncodingConverter
7c913512 11
1f1d2182
FM
12 This class is capable of converting strings between two 8-bit encodings/charsets.
13 It can also convert from/to Unicode (but only if you compiled wxWidgets
f1d5aa12 14 with @c wxUSE_WCHAR_T set to 1).
1f1d2182
FM
15
16 Only a limited subset of encodings is supported by wxEncodingConverter:
7c913512 17 @c wxFONTENCODING_ISO8859_1..15, @c wxFONTENCODING_CP1250..1257 and
23324ae1 18 @c wxFONTENCODING_KOI8.
7c913512 19
1f1d2182 20 @note
1f1d2182
FM
21 Please use wxMBConv classes instead if possible. wxCSConv has much better
22 support for various encodings than wxEncodingConverter.
23 wxEncodingConverter is useful only if you rely on wxCONVERT_SUBSTITUTE mode
24 of operation (see wxEncodingConverter::Init()).
25
23324ae1 26 @library{wxbase}
3c99e2fd 27 @category{conv}
7c913512 28
1f1d2182 29 @see wxFontMapper, wxMBConv, @ref overview_nonenglish
23324ae1
FM
30*/
31class wxEncodingConverter : public wxObject
32{
33public:
34 /**
35 Constructor.
36 */
37 wxEncodingConverter();
38
39 /**
4cc4bfaf 40 Return @true if (any text in) multibyte encoding @a encIn can be converted to
1f1d2182
FM
41 another one (@a encOut) losslessly.
42
43 Do not call this method with @c wxFONTENCODING_UNICODE as either parameter,
44 it doesn't make sense (always works in one sense and always depends
23324ae1
FM
45 on the text to convert in the other).
46 */
47 static bool CanConvert(wxFontEncoding encIn,
48 wxFontEncoding encOut);
49
23324ae1 50 /**
1f1d2182
FM
51 @name Conversion functions
52
53 @{
54 */
55 /**
56 Convert input string according to settings passed to Init() and writes
57 the result to output.
58
59 All the Convert() function overloads return @true if the conversion was
60 lossless and @false if at least one of the characters couldn't be converted
61 was and replaced with '?' in the output.
62
63 Note that if @c wxCONVERT_SUBSTITUTE was passed to Init(), substitution is
64 considered a lossless operation.
65
66 @note You must call Init() before using this method!
67
68 @note wchar_t versions of the method are not available if wxWidgets was
69 compiled with @c wxUSE_WCHAR_T set to 0.
23324ae1 70 */
328f5751 71 bool Convert(const char* input, char* output) const;
1f1d2182
FM
72 bool Convert(const wchar_t* input, wchar_t* output) const;
73 bool Convert(const char* input, wchar_t* output) const;
74 bool Convert(const wchar_t* input, char* output) const;
75
76 /**
77 Convert input string according to settings passed to Init() in-place,
78 i.e. write the result to the same memory area.
79
80 See the Convert(const char*,char*) const overload for more info.
81 */
82 bool Convert(char* str) const;
83 bool Convert(wchar_t* str) const;
84
85 /**
86 Convert a wxString and return a new wxString object.
87
88 See the Convert(const char*,char*) const overload for more info.
89 */
90 wxString Convert(const wxString& input) const;
23324ae1
FM
91 //@}
92
1f1d2182 93
23324ae1 94 /**
1f1d2182 95 Similar to GetPlatformEquivalents(), but this one will return ALL
23324ae1 96 equivalent encodings, regardless of the platform, and including itself.
1f1d2182
FM
97
98 This platform's encodings are before others in the array.
99 And again, if @a enc is in the array, it is the very first item in it.
23324ae1
FM
100 */
101 static wxFontEncodingArray GetAllEquivalents(wxFontEncoding enc);
102
103 /**
1f1d2182
FM
104 Return equivalents for given font that are used under given platform.
105
106 Supported platforms:
107 @li wxPLATFORM_UNIX
108 @li wxPLATFORM_WINDOWS
109 @li wxPLATFORM_OS2
110 @li wxPLATFORM_MAC
111 @li wxPLATFORM_CURRENT
112
23324ae1 113 wxPLATFORM_CURRENT means the platform this binary was compiled for.
1f1d2182 114
23324ae1 115 Examples:
3c4f71cc 116
1f1d2182
FM
117 @verbatim
118 current platform enc returned value
119 ----------------------------------------------
120 unix CP1250 {ISO8859_2}
121 unix ISO8859_2 {ISO8859_2}
122 windows ISO8859_2 {CP1250}
123 unix CP1252 {ISO8859_1,ISO8859_15}
124 @endverbatim
125
126 Equivalence is defined in terms of convertibility: two encodings are
127 equivalent if you can convert text between then without losing
128 information (it may - and will - happen that you lose special chars
129 like quotation marks or em-dashes but you shouldn't lose any diacritics
130 and language-specific characters when converting between equivalent encodings).
131
23324ae1
FM
132 Remember that this function does @b NOT check for presence of
133 fonts in system. It only tells you what are most suitable
134 encodings. (It usually returns only one encoding.)
1f1d2182
FM
135
136 @note Note that argument enc itself may be present in the returned array,
137 so that you can, as a side-effect, detect whether the encoding is
138 native for this platform or not.
139
140 @note Convert() is not limited to converting between equivalent encodings,
141 it can convert between two arbitrary encodings.
142
143 @note If @a enc is present in the returned array, then it is always the first
144 item of it.
145
146 @note Please note that the returned array may contain no items at all.
23324ae1
FM
147 */
148 static wxFontEncodingArray GetPlatformEquivalents(wxFontEncoding enc,
1f1d2182 149 int platform = wxPLATFORM_CURRENT);
23324ae1
FM
150
151 /**
1f1d2182
FM
152 Initialize the conversion.
153
154 Both output or input encoding may be wxFONTENCODING_UNICODE, but only
155 if wxUSE_ENCODING is set to 1.
156
157 All subsequent calls to Convert() will interpret its argument
4cc4bfaf
FM
158 as a string in @a input_enc encoding and will output string in
159 @a output_enc encoding.
1f1d2182 160
7c913512 161 You must call this method before calling Convert. You may call
23324ae1 162 it more than once in order to switch to another conversion.
3c4f71cc 163
1f1d2182
FM
164 @a method affects behaviour of Convert() in case input character
165 cannot be converted because it does not exist in output encoding:
3c4f71cc 166
1f1d2182
FM
167 @li @b wxCONVERT_STRICT: follow behaviour of GNU Recode - just copy
168 unconvertible characters to output and don't change them
169 (its integer value will stay the same)
170 @li @b wxCONVERT_SUBSTITUTE: try some (lossy) substitutions - e.g.
171 replace unconvertible latin capitals with acute by ordinary
172 capitals, replace en-dash or em-dash by '-' etc.
3c4f71cc 173
23324ae1
FM
174 Both modes guarantee that output string will have same length
175 as input string.
1f1d2182
FM
176
177 @return @false if given conversion is impossible, @true otherwise
178 (conversion may be impossible either if you try to convert
179 to Unicode with non-Unicode build of wxWidgets or if input
180 or output encoding is not supported).
23324ae1
FM
181 */
182 bool Init(wxFontEncoding input_enc, wxFontEncoding output_enc,
183 int method = wxCONVERT_STRICT);
184};
e54c96f1 185