]>
Commit | Line | Data |
---|---|---|
23324ae1 FM |
1 | ///////////////////////////////////////////////////////////////////////////// |
2 | // Name: encconv.h | |
e54c96f1 | 3 | // Purpose: interface of wxEncodingConverter |
23324ae1 | 4 | // Author: wxWidgets team |
526954c5 | 5 | // Licence: wxWindows licence |
23324ae1 FM |
6 | ///////////////////////////////////////////////////////////////////////////// |
7 | ||
8 | /** | |
9 | @class wxEncodingConverter | |
7c913512 | 10 | |
1f1d2182 | 11 | This class is capable of converting strings between two 8-bit encodings/charsets. |
8d94819c | 12 | It can also convert from/to Unicode. |
1f1d2182 FM |
13 | |
14 | Only a limited subset of encodings is supported by wxEncodingConverter: | |
7c913512 | 15 | @c wxFONTENCODING_ISO8859_1..15, @c wxFONTENCODING_CP1250..1257 and |
23324ae1 | 16 | @c wxFONTENCODING_KOI8. |
7c913512 | 17 | |
1f1d2182 | 18 | @note |
1f1d2182 FM |
19 | Please use wxMBConv classes instead if possible. wxCSConv has much better |
20 | support for various encodings than wxEncodingConverter. | |
21 | wxEncodingConverter is useful only if you rely on wxCONVERT_SUBSTITUTE mode | |
22 | of operation (see wxEncodingConverter::Init()). | |
23 | ||
23324ae1 | 24 | @library{wxbase} |
3c99e2fd | 25 | @category{conv} |
7c913512 | 26 | |
1f1d2182 | 27 | @see wxFontMapper, wxMBConv, @ref overview_nonenglish |
23324ae1 FM |
28 | */ |
29 | class wxEncodingConverter : public wxObject | |
30 | { | |
31 | public: | |
32 | /** | |
33 | Constructor. | |
34 | */ | |
35 | wxEncodingConverter(); | |
36 | ||
37 | /** | |
4cc4bfaf | 38 | Return @true if (any text in) multibyte encoding @a encIn can be converted to |
1f1d2182 FM |
39 | another one (@a encOut) losslessly. |
40 | ||
41 | Do not call this method with @c wxFONTENCODING_UNICODE as either parameter, | |
42 | it doesn't make sense (always works in one sense and always depends | |
23324ae1 FM |
43 | on the text to convert in the other). |
44 | */ | |
45 | static bool CanConvert(wxFontEncoding encIn, | |
46 | wxFontEncoding encOut); | |
47 | ||
23324ae1 | 48 | /** |
1f1d2182 FM |
49 | @name Conversion functions |
50 | ||
51 | @{ | |
52 | */ | |
53 | /** | |
54 | Convert input string according to settings passed to Init() and writes | |
55 | the result to output. | |
56 | ||
57 | All the Convert() function overloads return @true if the conversion was | |
58 | lossless and @false if at least one of the characters couldn't be converted | |
59 | was and replaced with '?' in the output. | |
60 | ||
61 | Note that if @c wxCONVERT_SUBSTITUTE was passed to Init(), substitution is | |
62 | considered a lossless operation. | |
63 | ||
64 | @note You must call Init() before using this method! | |
23324ae1 | 65 | */ |
328f5751 | 66 | bool Convert(const char* input, char* output) const; |
1f1d2182 FM |
67 | bool Convert(const wchar_t* input, wchar_t* output) const; |
68 | bool Convert(const char* input, wchar_t* output) const; | |
69 | bool Convert(const wchar_t* input, char* output) const; | |
70 | ||
71 | /** | |
13b4df95 VZ |
72 | Convert input string according to settings passed to Init() in-place. |
73 | ||
74 | With this overload, the conversion result is written to the same memory | |
75 | area from which the input is read. | |
1f1d2182 FM |
76 | |
77 | See the Convert(const char*,char*) const overload for more info. | |
78 | */ | |
79 | bool Convert(char* str) const; | |
13b4df95 VZ |
80 | |
81 | /** | |
82 | Convert input string according to settings passed to Init() in-place. | |
83 | ||
84 | With this overload, the conversion result is written to the same memory | |
85 | area from which the input is read. | |
86 | ||
87 | See the Convert(const wchar_t*,wchar_t*) const overload for more info. | |
88 | */ | |
1f1d2182 FM |
89 | bool Convert(wchar_t* str) const; |
90 | ||
91 | /** | |
92 | Convert a wxString and return a new wxString object. | |
93 | ||
94 | See the Convert(const char*,char*) const overload for more info. | |
95 | */ | |
96 | wxString Convert(const wxString& input) const; | |
23324ae1 FM |
97 | //@} |
98 | ||
1f1d2182 | 99 | |
23324ae1 | 100 | /** |
1f1d2182 | 101 | Similar to GetPlatformEquivalents(), but this one will return ALL |
23324ae1 | 102 | equivalent encodings, regardless of the platform, and including itself. |
1f1d2182 FM |
103 | |
104 | This platform's encodings are before others in the array. | |
105 | And again, if @a enc is in the array, it is the very first item in it. | |
23324ae1 FM |
106 | */ |
107 | static wxFontEncodingArray GetAllEquivalents(wxFontEncoding enc); | |
108 | ||
109 | /** | |
1f1d2182 FM |
110 | Return equivalents for given font that are used under given platform. |
111 | ||
112 | Supported platforms: | |
113 | @li wxPLATFORM_UNIX | |
114 | @li wxPLATFORM_WINDOWS | |
115 | @li wxPLATFORM_OS2 | |
116 | @li wxPLATFORM_MAC | |
117 | @li wxPLATFORM_CURRENT | |
118 | ||
23324ae1 | 119 | wxPLATFORM_CURRENT means the platform this binary was compiled for. |
1f1d2182 | 120 | |
23324ae1 | 121 | Examples: |
3c4f71cc | 122 | |
1f1d2182 FM |
123 | @verbatim |
124 | current platform enc returned value | |
125 | ---------------------------------------------- | |
126 | unix CP1250 {ISO8859_2} | |
127 | unix ISO8859_2 {ISO8859_2} | |
128 | windows ISO8859_2 {CP1250} | |
129 | unix CP1252 {ISO8859_1,ISO8859_15} | |
130 | @endverbatim | |
131 | ||
132 | Equivalence is defined in terms of convertibility: two encodings are | |
133 | equivalent if you can convert text between then without losing | |
134 | information (it may - and will - happen that you lose special chars | |
135 | like quotation marks or em-dashes but you shouldn't lose any diacritics | |
136 | and language-specific characters when converting between equivalent encodings). | |
137 | ||
23324ae1 FM |
138 | Remember that this function does @b NOT check for presence of |
139 | fonts in system. It only tells you what are most suitable | |
140 | encodings. (It usually returns only one encoding.) | |
1f1d2182 FM |
141 | |
142 | @note Note that argument enc itself may be present in the returned array, | |
143 | so that you can, as a side-effect, detect whether the encoding is | |
144 | native for this platform or not. | |
145 | ||
146 | @note Convert() is not limited to converting between equivalent encodings, | |
147 | it can convert between two arbitrary encodings. | |
148 | ||
149 | @note If @a enc is present in the returned array, then it is always the first | |
150 | item of it. | |
151 | ||
152 | @note Please note that the returned array may contain no items at all. | |
23324ae1 FM |
153 | */ |
154 | static wxFontEncodingArray GetPlatformEquivalents(wxFontEncoding enc, | |
1f1d2182 | 155 | int platform = wxPLATFORM_CURRENT); |
23324ae1 FM |
156 | |
157 | /** | |
1f1d2182 FM |
158 | Initialize the conversion. |
159 | ||
160 | Both output or input encoding may be wxFONTENCODING_UNICODE, but only | |
161 | if wxUSE_ENCODING is set to 1. | |
162 | ||
163 | All subsequent calls to Convert() will interpret its argument | |
4cc4bfaf FM |
164 | as a string in @a input_enc encoding and will output string in |
165 | @a output_enc encoding. | |
1f1d2182 | 166 | |
7c913512 | 167 | You must call this method before calling Convert. You may call |
23324ae1 | 168 | it more than once in order to switch to another conversion. |
3c4f71cc | 169 | |
1f1d2182 FM |
170 | @a method affects behaviour of Convert() in case input character |
171 | cannot be converted because it does not exist in output encoding: | |
3c4f71cc | 172 | |
1f1d2182 FM |
173 | @li @b wxCONVERT_STRICT: follow behaviour of GNU Recode - just copy |
174 | unconvertible characters to output and don't change them | |
175 | (its integer value will stay the same) | |
176 | @li @b wxCONVERT_SUBSTITUTE: try some (lossy) substitutions - e.g. | |
177 | replace unconvertible latin capitals with acute by ordinary | |
178 | capitals, replace en-dash or em-dash by '-' etc. | |
3c4f71cc | 179 | |
23324ae1 FM |
180 | Both modes guarantee that output string will have same length |
181 | as input string. | |
1f1d2182 FM |
182 | |
183 | @return @false if given conversion is impossible, @true otherwise | |
184 | (conversion may be impossible either if you try to convert | |
185 | to Unicode with non-Unicode build of wxWidgets or if input | |
186 | or output encoding is not supported). | |
23324ae1 FM |
187 | */ |
188 | bool Init(wxFontEncoding input_enc, wxFontEncoding output_enc, | |
189 | int method = wxCONVERT_STRICT); | |
190 | }; | |
e54c96f1 | 191 |