]> git.saurik.com Git - wxWidgets.git/blob - interface/strconv.h
e734ef3f2287cdf9a79158b5e9bc0937810bf91c
[wxWidgets.git] / interface / strconv.h
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: strconv.h
3 // Purpose: documentation for wxMBConvUTF7 class
4 // Author: wxWidgets team
5 // RCS-ID: $Id$
6 // Licence: wxWindows license
7 /////////////////////////////////////////////////////////////////////////////
8
9 /**
10 @class wxMBConvUTF7
11 @wxheader{strconv.h}
12
13 This class converts between the UTF-7 encoding and Unicode.
14 It has one predefined instance, @b wxConvUTF7.
15
16 @b WARNING: this class is not implemented yet.
17
18 @library{wxbase}
19 @category{FIXME}
20
21 @seealso
22 wxMBConvUTF8, @ref overview_mbconvclasses "wxMBConv classes overview"
23 */
24 class wxMBConvUTF7 : public wxMBConv
25 {
26 public:
27 /**
28 Converts from UTF-7 encoding to Unicode. Returns the size of the destination
29 buffer.
30 */
31 #define size_t MB2WC(wchar_t* buf, const char* psz, size_t n) /* implementation is private */
32
33 /**
34 Converts from Unicode to UTF-7 encoding. Returns the size of the destination
35 buffer.
36 */
37 #define size_t WC2MB(char* buf, const wchar_t* psz, size_t n) /* implementation is private */
38 };
39
40
41 /**
42 @class wxMBConvUTF8
43 @wxheader{strconv.h}
44
45 This class converts between the UTF-8 encoding and Unicode.
46 It has one predefined instance, @b wxConvUTF8.
47
48 @library{wxbase}
49 @category{FIXME}
50
51 @seealso
52 wxMBConvUTF7, @ref overview_mbconvclasses "wxMBConv classes overview"
53 */
54 class wxMBConvUTF8 : public wxMBConv
55 {
56 public:
57 /**
58 Converts from UTF-8 encoding to Unicode. Returns the size of the destination
59 buffer.
60 */
61 #define size_t MB2WC(wchar_t* buf, const char* psz, size_t n) /* implementation is private */
62
63 /**
64 Converts from Unicode to UTF-8 encoding. Returns the size of the destination
65 buffer.
66 */
67 #define size_t WC2MB(char* buf, const wchar_t* psz, size_t n) /* implementation is private */
68 };
69
70
71 /**
72 @class wxMBConvUTF16
73 @wxheader{strconv.h}
74
75 This class is used to convert between multibyte encodings and UTF-16 Unicode
76 encoding (also known as UCS-2). Unlike UTF-8 encoding,
77 UTF-16 uses words and not bytes and hence depends on the byte ordering:
78 big or little endian. Hence this class is provided in two versions:
79 wxMBConvUTF16LE and wxMBConvUTF16BE and wxMBConvUTF16 itself is just a typedef
80 for one of them (native for the given platform, e.g. LE under Windows and BE
81 under Mac).
82
83 @library{wxbase}
84 @category{FIXME}
85
86 @seealso
87 wxMBConvUTF8, wxMBConvUTF32, @ref overview_mbconvclasses "wxMBConv classes
88 overview"
89 */
90 class wxMBConvUTF16 : public wxMBConv
91 {
92 public:
93 /**
94 Converts from UTF-16 encoding to Unicode. Returns the size of the destination
95 buffer.
96 */
97 #define size_t MB2WC(wchar_t* buf, const char* psz, size_t n) /* implementation is private */
98
99 /**
100 Converts from Unicode to UTF-16 encoding. Returns the size of the destination
101 buffer.
102 */
103 #define size_t WC2MB(char* buf, const wchar_t* psz, size_t n) /* implementation is private */
104 };
105
106
107 /**
108 @class wxCSConv
109 @wxheader{strconv.h}
110
111 This class converts between any character sets and Unicode.
112 It has one predefined instance, @b wxConvLocal, for the
113 default user character set.
114
115 @library{wxbase}
116 @category{FIXME}
117
118 @seealso
119 wxMBConv, wxEncodingConverter, @ref overview_mbconvclasses "wxMBConv classes
120 overview"
121 */
122 class wxCSConv : public wxMBConv
123 {
124 public:
125 //@{
126 /**
127 Constructor. You may specify either the name of the character set you want to
128 convert from/to or an encoding constant. If the character set name (or the
129 encoding) is not recognized, ISO 8859-1 is used as fall back.
130 */
131 wxCSConv(const wxChar* charset);
132 wxCSConv(wxFontEncoding encoding);
133 //@}
134
135 /**
136 Destructor frees any resources needed to perform the conversion.
137 */
138 ~wxCSConv();
139
140 /**
141 Returns @true if the charset (or the encoding) given at constructor is really
142 available to use. Returns @false if ISO 8859-1 will be used instead.
143
144 Note this does not mean that a given string will be correctly converted.
145 A malformed string may still make conversion functions return @c wxCONV_FAILED.
146
147 This function is new since wxWidgets version 2.8.2
148 */
149 #define bool IsOk() /* implementation is private */
150
151 /**
152 Converts from the selected character set to Unicode. Returns length of string
153 written to destination buffer.
154 */
155 #define size_t MB2WC(wchar_t* buf, const char* psz, size_t n) /* implementation is private */
156
157 /**
158 Converts from Unicode to the selected character set. Returns length of string
159 written to destination buffer.
160 */
161 #define size_t WC2MB(char* buf, const wchar_t* psz, size_t n) /* implementation is private */
162 };
163
164
165 /**
166 @class wxMBConvFile
167 @wxheader{strconv.h}
168
169 This class used to define the class instance
170 @b wxConvFileName, but nowadays @b wxConvFileName is
171 either of type wxConvLibc (on most platforms) or wxConvUTF8
172 (on MacOS X). @b wxConvFileName converts filenames between
173 filesystem multibyte encoding and Unicode. @b wxConvFileName
174 can also be set to a something else at run-time which is used
175 e.g. by wxGTK to use a class which checks the environment
176 variable @b G_FILESYSTEM_ENCODING indicating that filenames
177 should not be interpreted as UTF8 and also for converting
178 invalid UTF8 characters (e.g. if there is a filename in iso8859_1)
179 to strings with octal values.
180
181 Since some platforms (such as Win32) use Unicode in the filenames,
182 and others (such as Unix) use multibyte encodings, this class should only
183 be used directly if wxMBFILES is defined to 1. A convenience macro,
184 wxFNCONV, is defined to wxConvFileName-cWX2MB in this case. You could
185 use it like this:
186
187 @code
188 wxChar *name = wxT("rawfile.doc");
189 FILE *fil = fopen(wxFNCONV(name), "r");
190 @endcode
191
192 (although it would be better to use wxFopen(name, wxT("r")) in this case.)
193
194 @library{wxbase}
195 @category{FIXME}
196
197 @seealso
198 @ref overview_mbconvclasses "wxMBConv classes overview"
199 */
200 class wxMBConvFile : public wxMBConv
201 {
202 public:
203 /**
204 Converts from multibyte filename encoding to Unicode. Returns the size of the
205 destination buffer.
206 */
207 #define size_t MB2WC(wchar_t* buf, const char* psz, size_t n) /* implementation is private */
208
209 /**
210 Converts from Unicode to multibyte filename encoding. Returns the size of the
211 destination buffer.
212 */
213 #define size_t WC2MB(char* buf, const wchar_t* psz, size_t n) /* implementation is private */
214 };
215
216
217 /**
218 @class wxMBConvUTF32
219 @wxheader{strconv.h}
220
221 This class is used to convert between multibyte encodings and UTF-32 Unicode
222 encoding (also known as UCS-4). Unlike UTF-8 encoding,
223 UTF-32 uses (double) words and not bytes and hence depends on the byte ordering:
224 big or little endian. Hence this class is provided in two versions:
225 wxMBConvUTF32LE and wxMBConvUTF32BE and wxMBConvUTF32 itself is just a typedef
226 for one of them (native for the given platform, e.g. LE under Windows and BE
227 under Mac).
228
229 @library{wxbase}
230 @category{FIXME}
231
232 @seealso
233 wxMBConvUTF8, wxMBConvUTF16, @ref overview_mbconvclasses "wxMBConv classes
234 overview"
235 */
236 class wxMBConvUTF32 : public wxMBConv
237 {
238 public:
239 /**
240 Converts from UTF-32 encoding to Unicode. Returns the size of the destination
241 buffer.
242 */
243 #define size_t MB2WC(wchar_t* buf, const char* psz, size_t n) /* implementation is private */
244
245 /**
246 Converts from Unicode to UTF-32 encoding. Returns the size of the destination
247 buffer.
248 */
249 #define size_t WC2MB(char* buf, const wchar_t* psz, size_t n) /* implementation is private */
250 };
251
252
253 /**
254 @class wxMBConv
255 @wxheader{strconv.h}
256
257 This class is the base class of a hierarchy of classes capable of converting
258 text strings between multibyte (SBCS or DBCS) encodings and Unicode.
259
260 In the documentation for this and related classes please notice that
261 length of the string refers to the number of characters in the string
262 not counting the terminating @c NUL, if any. While the size of the string
263 is the total number of bytes in the string, including any trailing @c NUL.
264 Thus, length of wide character string @c L"foo" is 3 while its size can
265 be either 8 or 16 depending on whether @c wchar_t is 2 bytes (as
266 under Windows) or 4 (Unix).
267
268 @library{wxbase}
269 @category{FIXME}
270
271 @seealso
272 wxCSConv, wxEncodingConverter, @ref overview_mbconvclasses "wxMBConv classes
273 overview"
274 */
275 class wxMBConv
276 {
277 public:
278 /**
279 Trivial default constructor.
280 */
281 wxMBConv();
282
283 /**
284 This pure virtual function is overridden in each of the derived classes to
285 return a new copy of the object it is called on. It is used for copying the
286 conversion objects while preserving their dynamic type.
287 */
288 virtual wxMBConv * Clone();
289
290 /**
291 This function has the same semantics as ToWChar()
292 except that it converts a wide string to multibyte one.
293 */
294 virtual size_t FromWChar(char * dst, size_t dstLen,
295 const wchar_t * src,
296 size_t srcLen = wxNO_LEN);
297
298 /**
299 This function returns 1 for most of the multibyte encodings in which the
300 string is terminated by a single @c NUL, 2 for UTF-16 and 4 for UTF-32 for
301 which the string is terminated with 2 and 4 @c NUL characters respectively.
302 The other cases are not currently supported and @c wxCONV_FAILED
303 (defined as -1) is returned for them.
304 */
305 size_t GetMBNulLen();
306
307 /**
308 Returns the maximal value which can be returned by
309 GetMBNulLen() for any conversion object. Currently
310 this value is 4.
311
312 This method can be used to allocate the buffer with enough space for the
313 trailing @c NUL characters for any encoding.
314 */
315 const size_t GetMaxMBNulLen();
316
317 /**
318 This function is deprecated, please use ToWChar() instead
319
320 Converts from a string @e in in multibyte encoding to Unicode putting up to
321 @e outLen characters into the buffer @e out.
322
323 If @e out is @NULL, only the length of the string which would result from
324 the conversion is calculated and returned. Note that this is the length and not
325 size, i.e. the returned value does not include the trailing @c NUL. But
326 when the function is called with a non-@NULL @e out buffer, the @e outLen
327 parameter should be one more to allow to properly @c NUL-terminate the string.
328
329 @param out
330 The output buffer, may be @NULL if the caller is only
331 interested in the length of the resulting string
332
333 @param in
334 The NUL-terminated input string, cannot be @NULL
335
336 @param outLen
337 The length of the output buffer but including
338 NUL, ignored if out is @NULL
339
340 @returns The length of the converted string excluding the trailing NUL.
341 */
342 #define virtual size_t MB2WC(wchar_t * out, const char * in,
343 size_t outLen) /* implementation is private */
344
345 /**
346 The most general function for converting a multibyte string to a wide string.
347 The main case is when @e dst is not @NULL and @e srcLen is not
348 @c wxNO_LEN (which is defined as @c (size_t)-1): then
349 the function converts exactly @e srcLen bytes starting at @e src into
350 wide string which it output to @e dst. If the length of the resulting wide
351 string is greater than @e dstLen, an error is returned. Note that if
352 @e srcLen bytes don't include @c NUL characters, the resulting wide string is
353 not @c NUL-terminated neither.
354
355 If @e srcLen is @c wxNO_LEN, the function supposes that the string is
356 properly (i.e. as necessary for the encoding handled by this conversion)
357 @c NUL-terminated and converts the entire string, including any trailing @c NUL
358 bytes. In this case the wide string is also @c NUL-terminated.
359
360 Finally, if @e dst is @NULL, the function returns the length of the needed
361 buffer.
362 */
363 virtual size_t ToWChar(wchar_t * dst, size_t dstLen,
364 const char * src,
365 size_t srcLen = wxNO_LEN);
366
367 /**
368 This function is deprecated, please use FromWChar() instead
369
370 Converts from Unicode to multibyte encoding. The semantics of this function
371 (including the return value meaning) is the same as for
372 wxMBConv::MB2WC.
373
374 Notice that when the function is called with a non-@NULL buffer, the
375 @e n parameter should be the size of the buffer and so it should take
376 into account the trailing @c NUL, which might take two or four bytes for some
377 encodings (UTF-16 and UTF-32) and not one.
378 */
379 #define virtual size_t WC2MB(char* buf, const wchar_t* psz, size_t n) /* implementation is private */
380
381 //@{
382 /**
383 Converts from multibyte encoding to Unicode by calling
384 wxMBConv::MB2WC, allocating a temporary wxWCharBuffer to hold
385 the result.
386
387 The first overload takes a @c NUL-terminated input string. The second one takes
388 a
389 string of exactly the specified length and the string may include or not the
390 trailing @c NUL character(s). If the string is not @c NUL-terminated, a
391 temporary
392 @c NUL-terminated copy of it suitable for passing to wxMBConv::MB2WC
393 is made, so it is more efficient to ensure that the string is does have the
394 appropriate number of @c NUL bytes (which is usually 1 but may be 2 or 4
395 for UTF-16 or UTF-32, see wxMBConv::GetMBNulLen),
396 especially for long strings.
397
398 If @e outLen is not-@NULL, it receives the length of the converted
399 string.
400 */
401 const wxWCharBuffer cMB2WC(const char * in);
402 const wxWCharBuffer cMB2WC(const char * in, size_t inLen,
403 size_t outLen);
404 //@}
405
406 //@{
407 /**
408 Converts from multibyte encoding to the current wxChar type
409 (which depends on whether wxUSE_UNICODE is set to 1). If wxChar is char,
410 it returns the parameter unaltered. If wxChar is wchar_t, it returns the
411 result in a wxWCharBuffer. The macro wxMB2WXbuf is defined as the correct
412 return type (without const).
413 */
414 const char* cMB2WX(const char* psz);
415 const wxWCharBuffer cMB2WX(const char* psz);
416 //@}
417
418 //@{
419 /**
420 Converts from Unicode to multibyte encoding by calling WC2MB,
421 allocating a temporary wxCharBuffer to hold the result.
422
423 The second overload of this function allows to convert a string of the given
424 length @e inLen, whether it is @c NUL-terminated or not (for wide character
425 strings, unlike for the multibyte ones, a single @c NUL is always enough).
426 But notice that just as with @ref wxMBConv::mb2wc cMB2WC, it is more
427 efficient to pass an already terminated string to this function as otherwise a
428 copy is made internally.
429
430 If @e outLen is not-@NULL, it receives the length of the converted
431 string.
432 */
433 const wxCharBuffer cWC2MB(const wchar_t* in);
434 const wxCharBuffer cWC2MB(const wchar_t* in, size_t inLen,
435 size_t outLen);
436 //@}
437
438 //@{
439 /**
440 Converts from Unicode to the current wxChar type. If wxChar is wchar_t,
441 it returns the parameter unaltered. If wxChar is char, it returns the
442 result in a wxCharBuffer. The macro wxWC2WXbuf is defined as the correct
443 return type (without const).
444 */
445 const wchar_t* cWC2WX(const wchar_t* psz);
446 const wxCharBuffer cWC2WX(const wchar_t* psz);
447 //@}
448
449 //@{
450 /**
451 Converts from the current wxChar type to multibyte encoding. If wxChar is char,
452 it returns the parameter unaltered. If wxChar is wchar_t, it returns the
453 result in a wxCharBuffer. The macro wxWX2MBbuf is defined as the correct
454 return type (without const).
455 */
456 const char* cWX2MB(const wxChar* psz);
457 const wxCharBuffer cWX2MB(const wxChar* psz);
458 //@}
459
460 //@{
461 /**
462 Converts from the current wxChar type to Unicode. If wxChar is wchar_t,
463 it returns the parameter unaltered. If wxChar is char, it returns the
464 result in a wxWCharBuffer. The macro wxWX2WCbuf is defined as the correct
465 return type (without const).
466 */
467 const wchar_t* cWX2WC(const wxChar* psz);
468 const wxWCharBuffer cWX2WC(const wxChar* psz);
469 //@}
470 };