]>
Commit | Line | Data |
---|---|---|
23324ae1 FM |
1 | ///////////////////////////////////////////////////////////////////////////// |
2 | // Name: strconv.h | |
e54c96f1 | 3 | // Purpose: interface of wxMBConvUTF7 |
23324ae1 FM |
4 | // Author: wxWidgets team |
5 | // RCS-ID: $Id$ | |
6 | // Licence: wxWindows license | |
7 | ///////////////////////////////////////////////////////////////////////////// | |
8 | ||
9 | /** | |
10 | @class wxMBConvUTF7 | |
11 | @wxheader{strconv.h} | |
7c913512 | 12 | |
23324ae1 FM |
13 | This class converts between the UTF-7 encoding and Unicode. |
14 | It has one predefined instance, @b wxConvUTF7. | |
7c913512 | 15 | |
23324ae1 | 16 | @b WARNING: this class is not implemented yet. |
7c913512 | 17 | |
23324ae1 FM |
18 | @library{wxbase} |
19 | @category{FIXME} | |
7c913512 | 20 | |
ee0b7af0 | 21 | @see wxMBConvUTF8, @ref overview_mbconv "wxMBConv classes overview" |
23324ae1 FM |
22 | */ |
23 | class wxMBConvUTF7 : public wxMBConv | |
24 | { | |
25 | public: | |
26 | /** | |
27 | Converts from UTF-7 encoding to Unicode. Returns the size of the destination | |
28 | buffer. | |
29 | */ | |
328f5751 | 30 | size_t MB2WC(wchar_t* buf, const char* psz, size_t n) const; |
23324ae1 FM |
31 | |
32 | /** | |
33 | Converts from Unicode to UTF-7 encoding. Returns the size of the destination | |
34 | buffer. | |
35 | */ | |
328f5751 | 36 | size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const; |
23324ae1 FM |
37 | }; |
38 | ||
39 | ||
e54c96f1 | 40 | |
23324ae1 FM |
41 | /** |
42 | @class wxMBConvUTF8 | |
43 | @wxheader{strconv.h} | |
7c913512 | 44 | |
23324ae1 FM |
45 | This class converts between the UTF-8 encoding and Unicode. |
46 | It has one predefined instance, @b wxConvUTF8. | |
7c913512 | 47 | |
23324ae1 FM |
48 | @library{wxbase} |
49 | @category{FIXME} | |
7c913512 | 50 | |
ee0b7af0 | 51 | @see wxMBConvUTF7, @ref overview_mbconv "wxMBConv classes overview" |
23324ae1 FM |
52 | */ |
53 | class wxMBConvUTF8 : public wxMBConv | |
54 | { | |
55 | public: | |
56 | /** | |
57 | Converts from UTF-8 encoding to Unicode. Returns the size of the destination | |
58 | buffer. | |
59 | */ | |
328f5751 | 60 | size_t MB2WC(wchar_t* buf, const char* psz, size_t n) const; |
23324ae1 FM |
61 | |
62 | /** | |
63 | Converts from Unicode to UTF-8 encoding. Returns the size of the destination | |
64 | buffer. | |
65 | */ | |
328f5751 | 66 | size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const; |
23324ae1 FM |
67 | }; |
68 | ||
69 | ||
e54c96f1 | 70 | |
23324ae1 FM |
71 | /** |
72 | @class wxMBConvUTF16 | |
73 | @wxheader{strconv.h} | |
7c913512 | 74 | |
23324ae1 | 75 | This class is used to convert between multibyte encodings and UTF-16 Unicode |
e54c96f1 | 76 | encoding (also known as UCS-2). Unlike UTF-8() encoding, |
23324ae1 FM |
77 | UTF-16 uses words and not bytes and hence depends on the byte ordering: |
78 | big or little endian. Hence this class is provided in two versions: | |
79 | wxMBConvUTF16LE and wxMBConvUTF16BE and wxMBConvUTF16 itself is just a typedef | |
80 | for one of them (native for the given platform, e.g. LE under Windows and BE | |
81 | under Mac). | |
7c913512 | 82 | |
23324ae1 FM |
83 | @library{wxbase} |
84 | @category{FIXME} | |
7c913512 | 85 | |
8c1cd030 | 86 | @see wxMBConvUTF8, wxMBConvUTF32, @ref overview_mbconv "wxMBConv classes overview" |
23324ae1 FM |
87 | */ |
88 | class wxMBConvUTF16 : public wxMBConv | |
89 | { | |
90 | public: | |
91 | /** | |
92 | Converts from UTF-16 encoding to Unicode. Returns the size of the destination | |
93 | buffer. | |
94 | */ | |
328f5751 | 95 | size_t MB2WC(wchar_t* buf, const char* psz, size_t n) const; |
23324ae1 FM |
96 | |
97 | /** | |
98 | Converts from Unicode to UTF-16 encoding. Returns the size of the destination | |
99 | buffer. | |
100 | */ | |
328f5751 | 101 | size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const; |
23324ae1 FM |
102 | }; |
103 | ||
104 | ||
e54c96f1 | 105 | |
23324ae1 FM |
106 | /** |
107 | @class wxCSConv | |
108 | @wxheader{strconv.h} | |
7c913512 | 109 | |
23324ae1 FM |
110 | This class converts between any character sets and Unicode. |
111 | It has one predefined instance, @b wxConvLocal, for the | |
112 | default user character set. | |
7c913512 | 113 | |
23324ae1 FM |
114 | @library{wxbase} |
115 | @category{FIXME} | |
7c913512 | 116 | |
8c1cd030 | 117 | @see wxMBConv, wxEncodingConverter, @ref overview_mbconv "wxMBConv classes overview" |
23324ae1 FM |
118 | */ |
119 | class wxCSConv : public wxMBConv | |
120 | { | |
121 | public: | |
23324ae1 | 122 | /** |
ee0b7af0 RR |
123 | Constructor. You can specify the name of the character set you want to |
124 | convert from/to. If the character set name is not recognized, ISO 8859-1 | |
125 | is used as fall back. | |
23324ae1 FM |
126 | */ |
127 | wxCSConv(const wxChar* charset); | |
ee0b7af0 RR |
128 | |
129 | /** | |
130 | Constructor. You can specify an encoding constant for the | |
131 | character set you want to convert from/to or. If the encoding | |
132 | is not recognized, ISO 8859-1 is used as fall back. | |
133 | */ | |
7c913512 | 134 | wxCSConv(wxFontEncoding encoding); |
23324ae1 FM |
135 | |
136 | /** | |
137 | Destructor frees any resources needed to perform the conversion. | |
138 | */ | |
139 | ~wxCSConv(); | |
140 | ||
141 | /** | |
142 | Returns @true if the charset (or the encoding) given at constructor is really | |
143 | available to use. Returns @false if ISO 8859-1 will be used instead. | |
23324ae1 FM |
144 | Note this does not mean that a given string will be correctly converted. |
145 | A malformed string may still make conversion functions return @c wxCONV_FAILED. | |
3c4f71cc | 146 | |
1e24c2af | 147 | @since 2.8.2 |
23324ae1 | 148 | */ |
328f5751 | 149 | bool IsOk() const; |
23324ae1 FM |
150 | |
151 | /** | |
152 | Converts from the selected character set to Unicode. Returns length of string | |
153 | written to destination buffer. | |
154 | */ | |
328f5751 | 155 | size_t MB2WC(wchar_t* buf, const char* psz, size_t n) const; |
23324ae1 FM |
156 | |
157 | /** | |
158 | Converts from Unicode to the selected character set. Returns length of string | |
159 | written to destination buffer. | |
160 | */ | |
328f5751 | 161 | size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const; |
23324ae1 FM |
162 | }; |
163 | ||
164 | ||
e54c96f1 | 165 | |
23324ae1 FM |
166 | /** |
167 | @class wxMBConvFile | |
168 | @wxheader{strconv.h} | |
7c913512 FM |
169 | |
170 | This class used to define the class instance | |
23324ae1 FM |
171 | @b wxConvFileName, but nowadays @b wxConvFileName is |
172 | either of type wxConvLibc (on most platforms) or wxConvUTF8 | |
7c913512 FM |
173 | (on MacOS X). @b wxConvFileName converts filenames between |
174 | filesystem multibyte encoding and Unicode. @b wxConvFileName | |
175 | can also be set to a something else at run-time which is used | |
176 | e.g. by wxGTK to use a class which checks the environment | |
177 | variable @b G_FILESYSTEM_ENCODING indicating that filenames | |
178 | should not be interpreted as UTF8 and also for converting | |
23324ae1 | 179 | invalid UTF8 characters (e.g. if there is a filename in iso8859_1) |
7c913512 FM |
180 | to strings with octal values. |
181 | ||
23324ae1 FM |
182 | Since some platforms (such as Win32) use Unicode in the filenames, |
183 | and others (such as Unix) use multibyte encodings, this class should only | |
184 | be used directly if wxMBFILES is defined to 1. A convenience macro, | |
185 | wxFNCONV, is defined to wxConvFileName-cWX2MB in this case. You could | |
186 | use it like this: | |
7c913512 | 187 | |
23324ae1 FM |
188 | @code |
189 | wxChar *name = wxT("rawfile.doc"); | |
190 | FILE *fil = fopen(wxFNCONV(name), "r"); | |
191 | @endcode | |
7c913512 | 192 | |
23324ae1 | 193 | (although it would be better to use wxFopen(name, wxT("r")) in this case.) |
7c913512 | 194 | |
23324ae1 FM |
195 | @library{wxbase} |
196 | @category{FIXME} | |
7c913512 | 197 | |
ee0b7af0 | 198 | @see @ref overview_mbconv "wxMBConv classes overview" |
23324ae1 FM |
199 | */ |
200 | class wxMBConvFile : public wxMBConv | |
201 | { | |
202 | public: | |
203 | /** | |
204 | Converts from multibyte filename encoding to Unicode. Returns the size of the | |
205 | destination buffer. | |
206 | */ | |
328f5751 | 207 | size_t MB2WC(wchar_t* buf, const char* psz, size_t n) const; |
23324ae1 FM |
208 | |
209 | /** | |
210 | Converts from Unicode to multibyte filename encoding. Returns the size of the | |
211 | destination buffer. | |
212 | */ | |
328f5751 | 213 | size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const; |
23324ae1 FM |
214 | }; |
215 | ||
216 | ||
e54c96f1 | 217 | |
23324ae1 FM |
218 | /** |
219 | @class wxMBConvUTF32 | |
220 | @wxheader{strconv.h} | |
7c913512 | 221 | |
23324ae1 | 222 | This class is used to convert between multibyte encodings and UTF-32 Unicode |
e54c96f1 | 223 | encoding (also known as UCS-4). Unlike UTF-8() encoding, |
23324ae1 FM |
224 | UTF-32 uses (double) words and not bytes and hence depends on the byte ordering: |
225 | big or little endian. Hence this class is provided in two versions: | |
226 | wxMBConvUTF32LE and wxMBConvUTF32BE and wxMBConvUTF32 itself is just a typedef | |
227 | for one of them (native for the given platform, e.g. LE under Windows and BE | |
228 | under Mac). | |
7c913512 | 229 | |
23324ae1 FM |
230 | @library{wxbase} |
231 | @category{FIXME} | |
7c913512 | 232 | |
8c1cd030 | 233 | @see wxMBConvUTF8, wxMBConvUTF16, @ref overview_mbconv "wxMBConv classes overview" |
23324ae1 FM |
234 | */ |
235 | class wxMBConvUTF32 : public wxMBConv | |
236 | { | |
237 | public: | |
238 | /** | |
239 | Converts from UTF-32 encoding to Unicode. Returns the size of the destination | |
240 | buffer. | |
241 | */ | |
328f5751 | 242 | size_t MB2WC(wchar_t* buf, const char* psz, size_t n) const; |
23324ae1 FM |
243 | |
244 | /** | |
245 | Converts from Unicode to UTF-32 encoding. Returns the size of the destination | |
246 | buffer. | |
247 | */ | |
328f5751 | 248 | size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const; |
23324ae1 FM |
249 | }; |
250 | ||
251 | ||
e54c96f1 | 252 | |
23324ae1 FM |
253 | /** |
254 | @class wxMBConv | |
255 | @wxheader{strconv.h} | |
7c913512 | 256 | |
23324ae1 FM |
257 | This class is the base class of a hierarchy of classes capable of converting |
258 | text strings between multibyte (SBCS or DBCS) encodings and Unicode. | |
7c913512 FM |
259 | |
260 | In the documentation for this and related classes please notice that | |
23324ae1 FM |
261 | length of the string refers to the number of characters in the string |
262 | not counting the terminating @c NUL, if any. While the size of the string | |
263 | is the total number of bytes in the string, including any trailing @c NUL. | |
264 | Thus, length of wide character string @c L"foo" is 3 while its size can | |
265 | be either 8 or 16 depending on whether @c wchar_t is 2 bytes (as | |
266 | under Windows) or 4 (Unix). | |
7c913512 | 267 | |
23324ae1 FM |
268 | @library{wxbase} |
269 | @category{FIXME} | |
7c913512 | 270 | |
8c1cd030 | 271 | @see wxCSConv, wxEncodingConverter, @ref overview_mbconv "wxMBConv classes overview" |
23324ae1 | 272 | */ |
7c913512 | 273 | class wxMBConv |
23324ae1 FM |
274 | { |
275 | public: | |
276 | /** | |
277 | Trivial default constructor. | |
278 | */ | |
279 | wxMBConv(); | |
280 | ||
281 | /** | |
282 | This pure virtual function is overridden in each of the derived classes to | |
283 | return a new copy of the object it is called on. It is used for copying the | |
284 | conversion objects while preserving their dynamic type. | |
285 | */ | |
328f5751 | 286 | virtual wxMBConv* Clone() const; |
23324ae1 FM |
287 | |
288 | /** | |
7c913512 | 289 | This function has the same semantics as ToWChar() |
23324ae1 FM |
290 | except that it converts a wide string to multibyte one. |
291 | */ | |
4cc4bfaf FM |
292 | virtual size_t FromWChar(char* dst, size_t dstLen, |
293 | const wchar_t* src, | |
328f5751 | 294 | size_t srcLen = wxNO_LEN) const; |
23324ae1 FM |
295 | |
296 | /** | |
297 | This function returns 1 for most of the multibyte encodings in which the | |
298 | string is terminated by a single @c NUL, 2 for UTF-16 and 4 for UTF-32 for | |
299 | which the string is terminated with 2 and 4 @c NUL characters respectively. | |
7c913512 | 300 | The other cases are not currently supported and @c wxCONV_FAILED |
23324ae1 FM |
301 | (defined as -1) is returned for them. |
302 | */ | |
328f5751 | 303 | size_t GetMBNulLen() const; |
23324ae1 FM |
304 | |
305 | /** | |
7c913512 | 306 | Returns the maximal value which can be returned by |
23324ae1 FM |
307 | GetMBNulLen() for any conversion object. Currently |
308 | this value is 4. | |
23324ae1 FM |
309 | This method can be used to allocate the buffer with enough space for the |
310 | trailing @c NUL characters for any encoding. | |
311 | */ | |
312 | const size_t GetMaxMBNulLen(); | |
313 | ||
314 | /** | |
315 | This function is deprecated, please use ToWChar() instead | |
4cc4bfaf FM |
316 | Converts from a string @a in in multibyte encoding to Unicode putting up to |
317 | @a outLen characters into the buffer @e out. | |
318 | If @a out is @NULL, only the length of the string which would result from | |
23324ae1 FM |
319 | the conversion is calculated and returned. Note that this is the length and not |
320 | size, i.e. the returned value does not include the trailing @c NUL. But | |
4cc4bfaf | 321 | when the function is called with a non-@NULL @a out buffer, the @a outLen |
23324ae1 | 322 | parameter should be one more to allow to properly @c NUL-terminate the string. |
3c4f71cc | 323 | |
7c913512 | 324 | @param out |
4cc4bfaf FM |
325 | The output buffer, may be @NULL if the caller is only |
326 | interested in the length of the resulting string | |
7c913512 | 327 | @param in |
4cc4bfaf | 328 | The NUL-terminated input string, cannot be @NULL |
7c913512 | 329 | @param outLen |
4cc4bfaf FM |
330 | The length of the output buffer but including |
331 | NUL, ignored if out is @NULL | |
3c4f71cc | 332 | |
d29a9a8a | 333 | @return The length of the converted string excluding the trailing NUL. |
23324ae1 | 334 | */ |
4cc4bfaf | 335 | virtual size_t MB2WC(wchar_t* out, const char* in, |
328f5751 | 336 | size_t outLen) const; |
23324ae1 FM |
337 | |
338 | /** | |
339 | The most general function for converting a multibyte string to a wide string. | |
4cc4bfaf | 340 | The main case is when @a dst is not @NULL and @a srcLen is not |
23324ae1 | 341 | @c wxNO_LEN (which is defined as @c (size_t)-1): then |
4cc4bfaf | 342 | the function converts exactly @a srcLen bytes starting at @a src into |
23324ae1 | 343 | wide string which it output to @e dst. If the length of the resulting wide |
7c913512 | 344 | string is greater than @e dstLen, an error is returned. Note that if |
4cc4bfaf | 345 | @a srcLen bytes don't include @c NUL characters, the resulting wide string is |
23324ae1 | 346 | not @c NUL-terminated neither. |
4cc4bfaf | 347 | If @a srcLen is @c wxNO_LEN, the function supposes that the string is |
7c913512 FM |
348 | properly (i.e. as necessary for the encoding handled by this conversion) |
349 | @c NUL-terminated and converts the entire string, including any trailing @c NUL | |
23324ae1 | 350 | bytes. In this case the wide string is also @c NUL-terminated. |
4cc4bfaf | 351 | Finally, if @a dst is @NULL, the function returns the length of the needed |
23324ae1 FM |
352 | buffer. |
353 | */ | |
4cc4bfaf FM |
354 | virtual size_t ToWChar(wchar_t* dst, size_t dstLen, |
355 | const char* src, | |
328f5751 | 356 | size_t srcLen = wxNO_LEN) const; |
23324ae1 FM |
357 | |
358 | /** | |
359 | This function is deprecated, please use FromWChar() instead | |
23324ae1 | 360 | Converts from Unicode to multibyte encoding. The semantics of this function |
7c913512 | 361 | (including the return value meaning) is the same as for |
23324ae1 | 362 | wxMBConv::MB2WC. |
7c913512 | 363 | Notice that when the function is called with a non-@NULL buffer, the |
4cc4bfaf | 364 | @a n parameter should be the size of the buffer and so it should take |
23324ae1 FM |
365 | into account the trailing @c NUL, which might take two or four bytes for some |
366 | encodings (UTF-16 and UTF-32) and not one. | |
367 | */ | |
328f5751 | 368 | virtual size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const; |
23324ae1 FM |
369 | |
370 | //@{ | |
371 | /** | |
7c913512 | 372 | Converts from multibyte encoding to Unicode by calling |
23324ae1 FM |
373 | wxMBConv::MB2WC, allocating a temporary wxWCharBuffer to hold |
374 | the result. | |
23324ae1 FM |
375 | The first overload takes a @c NUL-terminated input string. The second one takes |
376 | a | |
377 | string of exactly the specified length and the string may include or not the | |
378 | trailing @c NUL character(s). If the string is not @c NUL-terminated, a | |
7c913512 FM |
379 | temporary |
380 | @c NUL-terminated copy of it suitable for passing to wxMBConv::MB2WC | |
23324ae1 FM |
381 | is made, so it is more efficient to ensure that the string is does have the |
382 | appropriate number of @c NUL bytes (which is usually 1 but may be 2 or 4 | |
383 | for UTF-16 or UTF-32, see wxMBConv::GetMBNulLen), | |
384 | especially for long strings. | |
4cc4bfaf | 385 | If @a outLen is not-@NULL, it receives the length of the converted |
23324ae1 FM |
386 | string. |
387 | */ | |
328f5751 | 388 | const wxWCharBuffer cMB2WC(const char* in) const; |
ead152be | 389 | const wxWCharBuffer cMB2WC(const char* in, |
328f5751 FM |
390 | size_t inLen, |
391 | size_t outLen) const; | |
23324ae1 FM |
392 | //@} |
393 | ||
394 | //@{ | |
395 | /** | |
396 | Converts from multibyte encoding to the current wxChar type | |
397 | (which depends on whether wxUSE_UNICODE is set to 1). If wxChar is char, | |
398 | it returns the parameter unaltered. If wxChar is wchar_t, it returns the | |
399 | result in a wxWCharBuffer. The macro wxMB2WXbuf is defined as the correct | |
400 | return type (without const). | |
401 | */ | |
328f5751 | 402 | const char* cMB2WX(const char* psz) const; |
ead152be | 403 | const wxWCharBuffer cMB2WX(const char* psz) const; |
23324ae1 FM |
404 | //@} |
405 | ||
406 | //@{ | |
407 | /** | |
408 | Converts from Unicode to multibyte encoding by calling WC2MB, | |
409 | allocating a temporary wxCharBuffer to hold the result. | |
23324ae1 FM |
410 | The second overload of this function allows to convert a string of the given |
411 | length @e inLen, whether it is @c NUL-terminated or not (for wide character | |
412 | strings, unlike for the multibyte ones, a single @c NUL is always enough). | |
413 | But notice that just as with @ref wxMBConv::mb2wc cMB2WC, it is more | |
414 | efficient to pass an already terminated string to this function as otherwise a | |
415 | copy is made internally. | |
4cc4bfaf | 416 | If @a outLen is not-@NULL, it receives the length of the converted |
23324ae1 FM |
417 | string. |
418 | */ | |
328f5751 | 419 | const wxCharBuffer cWC2MB(const wchar_t* in) const; |
ead152be | 420 | const wxCharBuffer cWC2MB(const wchar_t* in, |
328f5751 FM |
421 | size_t inLen, |
422 | size_t outLen) const; | |
23324ae1 FM |
423 | //@} |
424 | ||
425 | //@{ | |
426 | /** | |
427 | Converts from Unicode to the current wxChar type. If wxChar is wchar_t, | |
428 | it returns the parameter unaltered. If wxChar is char, it returns the | |
429 | result in a wxCharBuffer. The macro wxWC2WXbuf is defined as the correct | |
430 | return type (without const). | |
431 | */ | |
328f5751 | 432 | const wchar_t* cWC2WX(const wchar_t* psz) const; |
ead152be | 433 | const wxCharBuffer cWC2WX(const wchar_t* psz) const; |
23324ae1 FM |
434 | //@} |
435 | ||
436 | //@{ | |
437 | /** | |
438 | Converts from the current wxChar type to multibyte encoding. If wxChar is char, | |
439 | it returns the parameter unaltered. If wxChar is wchar_t, it returns the | |
440 | result in a wxCharBuffer. The macro wxWX2MBbuf is defined as the correct | |
441 | return type (without const). | |
442 | */ | |
328f5751 | 443 | const char* cWX2MB(const wxChar* psz) const; |
ead152be | 444 | const wxCharBuffer cWX2MB(const wxChar* psz) const; |
23324ae1 FM |
445 | //@} |
446 | ||
447 | //@{ | |
448 | /** | |
449 | Converts from the current wxChar type to Unicode. If wxChar is wchar_t, | |
450 | it returns the parameter unaltered. If wxChar is char, it returns the | |
451 | result in a wxWCharBuffer. The macro wxWX2WCbuf is defined as the correct | |
452 | return type (without const). | |
453 | */ | |
328f5751 | 454 | const wchar_t* cWX2WC(const wxChar* psz) const; |
ead152be | 455 | const wxWCharBuffer cWX2WC(const wxChar* psz) const; |
23324ae1 FM |
456 | //@} |
457 | }; | |
e54c96f1 | 458 |