]> git.saurik.com Git - wxWidgets.git/blob - interface/wx/encconv.h
7bf0a48c535b7a5e3c87075675f8341f9539f1c3
[wxWidgets.git] / interface / wx / encconv.h
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: encconv.h
3 // Purpose: interface of wxEncodingConverter
4 // Author: wxWidgets team
5 // RCS-ID: $Id$
6 // Licence: wxWindows licence
7 /////////////////////////////////////////////////////////////////////////////
8
9 /**
10 @class wxEncodingConverter
11
12 This class is capable of converting strings between two 8-bit encodings/charsets.
13 It can also convert from/to Unicode.
14
15 Only a limited subset of encodings is supported by wxEncodingConverter:
16 @c wxFONTENCODING_ISO8859_1..15, @c wxFONTENCODING_CP1250..1257 and
17 @c wxFONTENCODING_KOI8.
18
19 @note
20 Please use wxMBConv classes instead if possible. wxCSConv has much better
21 support for various encodings than wxEncodingConverter.
22 wxEncodingConverter is useful only if you rely on wxCONVERT_SUBSTITUTE mode
23 of operation (see wxEncodingConverter::Init()).
24
25 @library{wxbase}
26 @category{conv}
27
28 @see wxFontMapper, wxMBConv, @ref overview_nonenglish
29 */
30 class wxEncodingConverter : public wxObject
31 {
32 public:
33 /**
34 Constructor.
35 */
36 wxEncodingConverter();
37
38 /**
39 Return @true if (any text in) multibyte encoding @a encIn can be converted to
40 another one (@a encOut) losslessly.
41
42 Do not call this method with @c wxFONTENCODING_UNICODE as either parameter,
43 it doesn't make sense (always works in one sense and always depends
44 on the text to convert in the other).
45 */
46 static bool CanConvert(wxFontEncoding encIn,
47 wxFontEncoding encOut);
48
49 /**
50 @name Conversion functions
51
52 @{
53 */
54 /**
55 Convert input string according to settings passed to Init() and writes
56 the result to output.
57
58 All the Convert() function overloads return @true if the conversion was
59 lossless and @false if at least one of the characters couldn't be converted
60 was and replaced with '?' in the output.
61
62 Note that if @c wxCONVERT_SUBSTITUTE was passed to Init(), substitution is
63 considered a lossless operation.
64
65 @note You must call Init() before using this method!
66 */
67 bool Convert(const char* input, char* output) const;
68 bool Convert(const wchar_t* input, wchar_t* output) const;
69 bool Convert(const char* input, wchar_t* output) const;
70 bool Convert(const wchar_t* input, char* output) const;
71
72 /**
73 Convert input string according to settings passed to Init() in-place.
74
75 With this overload, the conversion result is written to the same memory
76 area from which the input is read.
77
78 See the Convert(const char*,char*) const overload for more info.
79 */
80 bool Convert(char* str) const;
81
82 /**
83 Convert input string according to settings passed to Init() in-place.
84
85 With this overload, the conversion result is written to the same memory
86 area from which the input is read.
87
88 See the Convert(const wchar_t*,wchar_t*) const overload for more info.
89 */
90 bool Convert(wchar_t* str) const;
91
92 /**
93 Convert a wxString and return a new wxString object.
94
95 See the Convert(const char*,char*) const overload for more info.
96 */
97 wxString Convert(const wxString& input) const;
98 //@}
99
100
101 /**
102 Similar to GetPlatformEquivalents(), but this one will return ALL
103 equivalent encodings, regardless of the platform, and including itself.
104
105 This platform's encodings are before others in the array.
106 And again, if @a enc is in the array, it is the very first item in it.
107 */
108 static wxFontEncodingArray GetAllEquivalents(wxFontEncoding enc);
109
110 /**
111 Return equivalents for given font that are used under given platform.
112
113 Supported platforms:
114 @li wxPLATFORM_UNIX
115 @li wxPLATFORM_WINDOWS
116 @li wxPLATFORM_OS2
117 @li wxPLATFORM_MAC
118 @li wxPLATFORM_CURRENT
119
120 wxPLATFORM_CURRENT means the platform this binary was compiled for.
121
122 Examples:
123
124 @verbatim
125 current platform enc returned value
126 ----------------------------------------------
127 unix CP1250 {ISO8859_2}
128 unix ISO8859_2 {ISO8859_2}
129 windows ISO8859_2 {CP1250}
130 unix CP1252 {ISO8859_1,ISO8859_15}
131 @endverbatim
132
133 Equivalence is defined in terms of convertibility: two encodings are
134 equivalent if you can convert text between then without losing
135 information (it may - and will - happen that you lose special chars
136 like quotation marks or em-dashes but you shouldn't lose any diacritics
137 and language-specific characters when converting between equivalent encodings).
138
139 Remember that this function does @b NOT check for presence of
140 fonts in system. It only tells you what are most suitable
141 encodings. (It usually returns only one encoding.)
142
143 @note Note that argument enc itself may be present in the returned array,
144 so that you can, as a side-effect, detect whether the encoding is
145 native for this platform or not.
146
147 @note Convert() is not limited to converting between equivalent encodings,
148 it can convert between two arbitrary encodings.
149
150 @note If @a enc is present in the returned array, then it is always the first
151 item of it.
152
153 @note Please note that the returned array may contain no items at all.
154 */
155 static wxFontEncodingArray GetPlatformEquivalents(wxFontEncoding enc,
156 int platform = wxPLATFORM_CURRENT);
157
158 /**
159 Initialize the conversion.
160
161 Both output or input encoding may be wxFONTENCODING_UNICODE, but only
162 if wxUSE_ENCODING is set to 1.
163
164 All subsequent calls to Convert() will interpret its argument
165 as a string in @a input_enc encoding and will output string in
166 @a output_enc encoding.
167
168 You must call this method before calling Convert. You may call
169 it more than once in order to switch to another conversion.
170
171 @a method affects behaviour of Convert() in case input character
172 cannot be converted because it does not exist in output encoding:
173
174 @li @b wxCONVERT_STRICT: follow behaviour of GNU Recode - just copy
175 unconvertible characters to output and don't change them
176 (its integer value will stay the same)
177 @li @b wxCONVERT_SUBSTITUTE: try some (lossy) substitutions - e.g.
178 replace unconvertible latin capitals with acute by ordinary
179 capitals, replace en-dash or em-dash by '-' etc.
180
181 Both modes guarantee that output string will have same length
182 as input string.
183
184 @return @false if given conversion is impossible, @true otherwise
185 (conversion may be impossible either if you try to convert
186 to Unicode with non-Unicode build of wxWidgets or if input
187 or output encoding is not supported).
188 */
189 bool Init(wxFontEncoding input_enc, wxFontEncoding output_enc,
190 int method = wxCONVERT_STRICT);
191 };
192