]> git.saurik.com Git - wxWidgets.git/blob - interface/strconv.h
20cc3613980accc4fead7cbea8ec68ff926d6e70
[wxWidgets.git] / interface / strconv.h
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: strconv.h
3 // Purpose: interface of wxMBConvUTF7
4 // Author: wxWidgets team
5 // RCS-ID: $Id$
6 // Licence: wxWindows license
7 /////////////////////////////////////////////////////////////////////////////
8
9 /**
10 @class wxMBConvUTF7
11 @wxheader{strconv.h}
12
13 This class converts between the UTF-7 encoding and Unicode.
14 It has one predefined instance, @b wxConvUTF7.
15
16 @b WARNING: this class is not implemented yet.
17
18 @library{wxbase}
19 @category{FIXME}
20
21 @see wxMBConvUTF8, @ref overview_mbconv "wxMBConv classes overview"
22 */
23 class wxMBConvUTF7 : public wxMBConv
24 {
25 public:
26 /**
27 Converts from UTF-7 encoding to Unicode. Returns the size of the destination
28 buffer.
29 */
30 size_t MB2WC(wchar_t* buf, const char* psz, size_t n) const;
31
32 /**
33 Converts from Unicode to UTF-7 encoding. Returns the size of the destination
34 buffer.
35 */
36 size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
37 };
38
39
40
41 /**
42 @class wxMBConvUTF8
43 @wxheader{strconv.h}
44
45 This class converts between the UTF-8 encoding and Unicode.
46 It has one predefined instance, @b wxConvUTF8.
47
48 @library{wxbase}
49 @category{FIXME}
50
51 @see wxMBConvUTF7, @ref overview_mbconv "wxMBConv classes overview"
52 */
53 class wxMBConvUTF8 : public wxMBConv
54 {
55 public:
56 /**
57 Converts from UTF-8 encoding to Unicode. Returns the size of the destination
58 buffer.
59 */
60 size_t MB2WC(wchar_t* buf, const char* psz, size_t n) const;
61
62 /**
63 Converts from Unicode to UTF-8 encoding. Returns the size of the destination
64 buffer.
65 */
66 size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
67 };
68
69
70
71 /**
72 @class wxMBConvUTF16
73 @wxheader{strconv.h}
74
75 This class is used to convert between multibyte encodings and UTF-16 Unicode
76 encoding (also known as UCS-2). Unlike UTF-8() encoding,
77 UTF-16 uses words and not bytes and hence depends on the byte ordering:
78 big or little endian. Hence this class is provided in two versions:
79 wxMBConvUTF16LE and wxMBConvUTF16BE and wxMBConvUTF16 itself is just a typedef
80 for one of them (native for the given platform, e.g. LE under Windows and BE
81 under Mac).
82
83 @library{wxbase}
84 @category{FIXME}
85
86 @see wxMBConvUTF8, wxMBConvUTF32, @ref overview_mbconv "wxMBConv classes
87 overview"
88 */
89 class wxMBConvUTF16 : public wxMBConv
90 {
91 public:
92 /**
93 Converts from UTF-16 encoding to Unicode. Returns the size of the destination
94 buffer.
95 */
96 size_t MB2WC(wchar_t* buf, const char* psz, size_t n) const;
97
98 /**
99 Converts from Unicode to UTF-16 encoding. Returns the size of the destination
100 buffer.
101 */
102 size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
103 };
104
105
106
107 /**
108 @class wxCSConv
109 @wxheader{strconv.h}
110
111 This class converts between any character sets and Unicode.
112 It has one predefined instance, @b wxConvLocal, for the
113 default user character set.
114
115 @library{wxbase}
116 @category{FIXME}
117
118 @see wxMBConv, wxEncodingConverter, @ref overview_mbconv "wxMBConv
119 classes overview"
120 */
121 class wxCSConv : public wxMBConv
122 {
123 public:
124 /**
125 Constructor. You can specify the name of the character set you want to
126 convert from/to. If the character set name is not recognized, ISO 8859-1
127 is used as fall back.
128 */
129 wxCSConv(const wxChar* charset);
130
131 /**
132 Constructor. You can specify an encoding constant for the
133 character set you want to convert from/to or. If the encoding
134 is not recognized, ISO 8859-1 is used as fall back.
135 */
136 wxCSConv(wxFontEncoding encoding);
137
138 /**
139 Destructor frees any resources needed to perform the conversion.
140 */
141 ~wxCSConv();
142
143 /**
144 Returns @true if the charset (or the encoding) given at constructor is really
145 available to use. Returns @false if ISO 8859-1 will be used instead.
146 Note this does not mean that a given string will be correctly converted.
147 A malformed string may still make conversion functions return @c wxCONV_FAILED.
148
149 @since 2.8.2
150 */
151 bool IsOk() const;
152
153 /**
154 Converts from the selected character set to Unicode. Returns length of string
155 written to destination buffer.
156 */
157 size_t MB2WC(wchar_t* buf, const char* psz, size_t n) const;
158
159 /**
160 Converts from Unicode to the selected character set. Returns length of string
161 written to destination buffer.
162 */
163 size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
164 };
165
166
167
168 /**
169 @class wxMBConvFile
170 @wxheader{strconv.h}
171
172 This class used to define the class instance
173 @b wxConvFileName, but nowadays @b wxConvFileName is
174 either of type wxConvLibc (on most platforms) or wxConvUTF8
175 (on MacOS X). @b wxConvFileName converts filenames between
176 filesystem multibyte encoding and Unicode. @b wxConvFileName
177 can also be set to a something else at run-time which is used
178 e.g. by wxGTK to use a class which checks the environment
179 variable @b G_FILESYSTEM_ENCODING indicating that filenames
180 should not be interpreted as UTF8 and also for converting
181 invalid UTF8 characters (e.g. if there is a filename in iso8859_1)
182 to strings with octal values.
183
184 Since some platforms (such as Win32) use Unicode in the filenames,
185 and others (such as Unix) use multibyte encodings, this class should only
186 be used directly if wxMBFILES is defined to 1. A convenience macro,
187 wxFNCONV, is defined to wxConvFileName-cWX2MB in this case. You could
188 use it like this:
189
190 @code
191 wxChar *name = wxT("rawfile.doc");
192 FILE *fil = fopen(wxFNCONV(name), "r");
193 @endcode
194
195 (although it would be better to use wxFopen(name, wxT("r")) in this case.)
196
197 @library{wxbase}
198 @category{FIXME}
199
200 @see @ref overview_mbconv "wxMBConv classes overview"
201 */
202 class wxMBConvFile : public wxMBConv
203 {
204 public:
205 /**
206 Converts from multibyte filename encoding to Unicode. Returns the size of the
207 destination buffer.
208 */
209 size_t MB2WC(wchar_t* buf, const char* psz, size_t n) const;
210
211 /**
212 Converts from Unicode to multibyte filename encoding. Returns the size of the
213 destination buffer.
214 */
215 size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
216 };
217
218
219
220 /**
221 @class wxMBConvUTF32
222 @wxheader{strconv.h}
223
224 This class is used to convert between multibyte encodings and UTF-32 Unicode
225 encoding (also known as UCS-4). Unlike UTF-8() encoding,
226 UTF-32 uses (double) words and not bytes and hence depends on the byte ordering:
227 big or little endian. Hence this class is provided in two versions:
228 wxMBConvUTF32LE and wxMBConvUTF32BE and wxMBConvUTF32 itself is just a typedef
229 for one of them (native for the given platform, e.g. LE under Windows and BE
230 under Mac).
231
232 @library{wxbase}
233 @category{FIXME}
234
235 @see wxMBConvUTF8, wxMBConvUTF16, @ref overview_mbconv "wxMBConv classes
236 overview"
237 */
238 class wxMBConvUTF32 : public wxMBConv
239 {
240 public:
241 /**
242 Converts from UTF-32 encoding to Unicode. Returns the size of the destination
243 buffer.
244 */
245 size_t MB2WC(wchar_t* buf, const char* psz, size_t n) const;
246
247 /**
248 Converts from Unicode to UTF-32 encoding. Returns the size of the destination
249 buffer.
250 */
251 size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
252 };
253
254
255
256 /**
257 @class wxMBConv
258 @wxheader{strconv.h}
259
260 This class is the base class of a hierarchy of classes capable of converting
261 text strings between multibyte (SBCS or DBCS) encodings and Unicode.
262
263 In the documentation for this and related classes please notice that
264 length of the string refers to the number of characters in the string
265 not counting the terminating @c NUL, if any. While the size of the string
266 is the total number of bytes in the string, including any trailing @c NUL.
267 Thus, length of wide character string @c L"foo" is 3 while its size can
268 be either 8 or 16 depending on whether @c wchar_t is 2 bytes (as
269 under Windows) or 4 (Unix).
270
271 @library{wxbase}
272 @category{FIXME}
273
274 @see wxCSConv, wxEncodingConverter, @ref overview_mbconv "wxMBConv
275 classes overview"
276 */
277 class wxMBConv
278 {
279 public:
280 /**
281 Trivial default constructor.
282 */
283 wxMBConv();
284
285 /**
286 This pure virtual function is overridden in each of the derived classes to
287 return a new copy of the object it is called on. It is used for copying the
288 conversion objects while preserving their dynamic type.
289 */
290 virtual wxMBConv* Clone() const;
291
292 /**
293 This function has the same semantics as ToWChar()
294 except that it converts a wide string to multibyte one.
295 */
296 virtual size_t FromWChar(char* dst, size_t dstLen,
297 const wchar_t* src,
298 size_t srcLen = wxNO_LEN) const;
299
300 /**
301 This function returns 1 for most of the multibyte encodings in which the
302 string is terminated by a single @c NUL, 2 for UTF-16 and 4 for UTF-32 for
303 which the string is terminated with 2 and 4 @c NUL characters respectively.
304 The other cases are not currently supported and @c wxCONV_FAILED
305 (defined as -1) is returned for them.
306 */
307 size_t GetMBNulLen() const;
308
309 /**
310 Returns the maximal value which can be returned by
311 GetMBNulLen() for any conversion object. Currently
312 this value is 4.
313 This method can be used to allocate the buffer with enough space for the
314 trailing @c NUL characters for any encoding.
315 */
316 const size_t GetMaxMBNulLen();
317
318 /**
319 This function is deprecated, please use ToWChar() instead
320 Converts from a string @a in in multibyte encoding to Unicode putting up to
321 @a outLen characters into the buffer @e out.
322 If @a out is @NULL, only the length of the string which would result from
323 the conversion is calculated and returned. Note that this is the length and not
324 size, i.e. the returned value does not include the trailing @c NUL. But
325 when the function is called with a non-@NULL @a out buffer, the @a outLen
326 parameter should be one more to allow to properly @c NUL-terminate the string.
327
328 @param out
329 The output buffer, may be @NULL if the caller is only
330 interested in the length of the resulting string
331 @param in
332 The NUL-terminated input string, cannot be @NULL
333 @param outLen
334 The length of the output buffer but including
335 NUL, ignored if out is @NULL
336
337 @returns The length of the converted string excluding the trailing NUL.
338 */
339 virtual size_t MB2WC(wchar_t* out, const char* in,
340 size_t outLen) const;
341
342 /**
343 The most general function for converting a multibyte string to a wide string.
344 The main case is when @a dst is not @NULL and @a srcLen is not
345 @c wxNO_LEN (which is defined as @c (size_t)-1): then
346 the function converts exactly @a srcLen bytes starting at @a src into
347 wide string which it output to @e dst. If the length of the resulting wide
348 string is greater than @e dstLen, an error is returned. Note that if
349 @a srcLen bytes don't include @c NUL characters, the resulting wide string is
350 not @c NUL-terminated neither.
351 If @a srcLen is @c wxNO_LEN, the function supposes that the string is
352 properly (i.e. as necessary for the encoding handled by this conversion)
353 @c NUL-terminated and converts the entire string, including any trailing @c NUL
354 bytes. In this case the wide string is also @c NUL-terminated.
355 Finally, if @a dst is @NULL, the function returns the length of the needed
356 buffer.
357 */
358 virtual size_t ToWChar(wchar_t* dst, size_t dstLen,
359 const char* src,
360 size_t srcLen = wxNO_LEN) const;
361
362 /**
363 This function is deprecated, please use FromWChar() instead
364 Converts from Unicode to multibyte encoding. The semantics of this function
365 (including the return value meaning) is the same as for
366 wxMBConv::MB2WC.
367 Notice that when the function is called with a non-@NULL buffer, the
368 @a n parameter should be the size of the buffer and so it should take
369 into account the trailing @c NUL, which might take two or four bytes for some
370 encodings (UTF-16 and UTF-32) and not one.
371 */
372 virtual size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
373
374 //@{
375 /**
376 Converts from multibyte encoding to Unicode by calling
377 wxMBConv::MB2WC, allocating a temporary wxWCharBuffer to hold
378 the result.
379 The first overload takes a @c NUL-terminated input string. The second one takes
380 a
381 string of exactly the specified length and the string may include or not the
382 trailing @c NUL character(s). If the string is not @c NUL-terminated, a
383 temporary
384 @c NUL-terminated copy of it suitable for passing to wxMBConv::MB2WC
385 is made, so it is more efficient to ensure that the string is does have the
386 appropriate number of @c NUL bytes (which is usually 1 but may be 2 or 4
387 for UTF-16 or UTF-32, see wxMBConv::GetMBNulLen),
388 especially for long strings.
389 If @a outLen is not-@NULL, it receives the length of the converted
390 string.
391 */
392 const wxWCharBuffer cMB2WC(const char* in) const;
393 const const wxWCharBuffer cMB2WC(const char* in,
394 size_t inLen,
395 size_t outLen) const;
396 //@}
397
398 //@{
399 /**
400 Converts from multibyte encoding to the current wxChar type
401 (which depends on whether wxUSE_UNICODE is set to 1). If wxChar is char,
402 it returns the parameter unaltered. If wxChar is wchar_t, it returns the
403 result in a wxWCharBuffer. The macro wxMB2WXbuf is defined as the correct
404 return type (without const).
405 */
406 const char* cMB2WX(const char* psz) const;
407 const const wxWCharBuffer cMB2WX(const char* psz) const;
408 //@}
409
410 //@{
411 /**
412 Converts from Unicode to multibyte encoding by calling WC2MB,
413 allocating a temporary wxCharBuffer to hold the result.
414 The second overload of this function allows to convert a string of the given
415 length @e inLen, whether it is @c NUL-terminated or not (for wide character
416 strings, unlike for the multibyte ones, a single @c NUL is always enough).
417 But notice that just as with @ref wxMBConv::mb2wc cMB2WC, it is more
418 efficient to pass an already terminated string to this function as otherwise a
419 copy is made internally.
420 If @a outLen is not-@NULL, it receives the length of the converted
421 string.
422 */
423 const wxCharBuffer cWC2MB(const wchar_t* in) const;
424 const const wxCharBuffer cWC2MB(const wchar_t* in,
425 size_t inLen,
426 size_t outLen) const;
427 //@}
428
429 //@{
430 /**
431 Converts from Unicode to the current wxChar type. If wxChar is wchar_t,
432 it returns the parameter unaltered. If wxChar is char, it returns the
433 result in a wxCharBuffer. The macro wxWC2WXbuf is defined as the correct
434 return type (without const).
435 */
436 const wchar_t* cWC2WX(const wchar_t* psz) const;
437 const const wxCharBuffer cWC2WX(const wchar_t* psz) const;
438 //@}
439
440 //@{
441 /**
442 Converts from the current wxChar type to multibyte encoding. If wxChar is char,
443 it returns the parameter unaltered. If wxChar is wchar_t, it returns the
444 result in a wxCharBuffer. The macro wxWX2MBbuf is defined as the correct
445 return type (without const).
446 */
447 const char* cWX2MB(const wxChar* psz) const;
448 const const wxCharBuffer cWX2MB(const wxChar* psz) const;
449 //@}
450
451 //@{
452 /**
453 Converts from the current wxChar type to Unicode. If wxChar is wchar_t,
454 it returns the parameter unaltered. If wxChar is char, it returns the
455 result in a wxWCharBuffer. The macro wxWX2WCbuf is defined as the correct
456 return type (without const).
457 */
458 const wchar_t* cWX2WC(const wxChar* psz) const;
459 const const wxWCharBuffer cWX2WC(const wxChar* psz) const;
460 //@}
461 };
462