]> git.saurik.com Git - wxWidgets.git/blob - interface/strconv.h
c2076bc14c205a6637c09b027aa6485a882784b1
[wxWidgets.git] / interface / strconv.h
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: strconv.h
3 // Purpose: interface of wxMBConvUTF7
4 // Author: wxWidgets team
5 // RCS-ID: $Id$
6 // Licence: wxWindows license
7 /////////////////////////////////////////////////////////////////////////////
8
9 /**
10 @class wxMBConvUTF7
11 @wxheader{strconv.h}
12
13 This class converts between the UTF-7 encoding and Unicode.
14 It has one predefined instance, @b wxConvUTF7.
15
16 @library{wxbase}
17 @category{conv}
18
19 @see wxMBConvUTF8, @ref overview_mbconv "wxMBConv classes overview"
20 */
21 class wxMBConvUTF7 : public wxMBConv
22 {
23 public:
24 /**
25 Converts from UTF-7 encoding to Unicode. Returns the size of the destination
26 buffer.
27 */
28 size_t MB2WC(wchar_t* buf, const char* psz, size_t n) const;
29
30 /**
31 Converts from Unicode to UTF-7 encoding. Returns the size of the destination
32 buffer.
33 */
34 size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
35 };
36
37
38
39 /**
40 @class wxMBConvUTF8
41 @wxheader{strconv.h}
42
43 This class converts between the UTF-8 encoding and Unicode.
44 It has one predefined instance, @b wxConvUTF8.
45
46 @library{wxbase}
47 @category{conv}
48
49 @see wxMBConvUTF7, @ref overview_mbconv "wxMBConv classes overview"
50 */
51 class wxMBConvUTF8 : public wxMBConv
52 {
53 public:
54 /**
55 Converts from UTF-8 encoding to Unicode. Returns the size of the destination
56 buffer.
57 */
58 size_t MB2WC(wchar_t* buf, const char* psz, size_t n) const;
59
60 /**
61 Converts from Unicode to UTF-8 encoding. Returns the size of the destination
62 buffer.
63 */
64 size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
65 };
66
67
68
69 /**
70 @class wxMBConvUTF16
71 @wxheader{strconv.h}
72
73 This class is used to convert between multibyte encodings and UTF-16 Unicode
74 encoding (also known as UCS-2). Unlike UTF-8() encoding,
75 UTF-16 uses words and not bytes and hence depends on the byte ordering:
76 big or little endian. Hence this class is provided in two versions:
77 wxMBConvUTF16LE and wxMBConvUTF16BE and wxMBConvUTF16 itself is just a typedef
78 for one of them (native for the given platform, e.g. LE under Windows and BE
79 under Mac).
80
81 @library{wxbase}
82 @category{conv}
83
84 @see wxMBConvUTF8, wxMBConvUTF32, @ref overview_mbconv "wxMBConv classes overview"
85 */
86 class wxMBConvUTF16 : public wxMBConv
87 {
88 public:
89 /**
90 Converts from UTF-16 encoding to Unicode. Returns the size of the destination
91 buffer.
92 */
93 size_t MB2WC(wchar_t* buf, const char* psz, size_t n) const;
94
95 /**
96 Converts from Unicode to UTF-16 encoding. Returns the size of the destination
97 buffer.
98 */
99 size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
100 };
101
102
103
104 /**
105 @class wxCSConv
106 @wxheader{strconv.h}
107
108 This class converts between any character sets and Unicode.
109 It has one predefined instance, @b wxConvLocal, for the
110 default user character set.
111
112 @library{wxbase}
113 @category{conv}
114
115 @see wxMBConv, wxEncodingConverter, @ref overview_mbconv "wxMBConv classes overview"
116 */
117 class wxCSConv : public wxMBConv
118 {
119 public:
120 /**
121 Constructor. You can specify the name of the character set you want to
122 convert from/to. If the character set name is not recognized, ISO 8859-1
123 is used as fall back.
124 */
125 wxCSConv(const wxChar* charset);
126
127 /**
128 Constructor. You can specify an encoding constant for the
129 character set you want to convert from/to or. If the encoding
130 is not recognized, ISO 8859-1 is used as fall back.
131 */
132 wxCSConv(wxFontEncoding encoding);
133
134 /**
135 Destructor frees any resources needed to perform the conversion.
136 */
137 ~wxCSConv();
138
139 /**
140 Returns @true if the charset (or the encoding) given at constructor is really
141 available to use. Returns @false if ISO 8859-1 will be used instead.
142 Note this does not mean that a given string will be correctly converted.
143 A malformed string may still make conversion functions return @c wxCONV_FAILED.
144
145 @since 2.8.2
146 */
147 bool IsOk() const;
148
149 /**
150 Converts from the selected character set to Unicode. Returns length of string
151 written to destination buffer.
152 */
153 size_t MB2WC(wchar_t* buf, const char* psz, size_t n) const;
154
155 /**
156 Converts from Unicode to the selected character set. Returns length of string
157 written to destination buffer.
158 */
159 size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
160 };
161
162
163
164 /**
165 @class wxMBConvFile
166 @wxheader{strconv.h}
167
168 This class used to define the class instance
169 @b wxConvFileName, but nowadays @b wxConvFileName is
170 either of type wxConvLibc (on most platforms) or wxConvUTF8
171 (on MacOS X). @b wxConvFileName converts filenames between
172 filesystem multibyte encoding and Unicode. @b wxConvFileName
173 can also be set to a something else at run-time which is used
174 e.g. by wxGTK to use a class which checks the environment
175 variable @b G_FILESYSTEM_ENCODING indicating that filenames
176 should not be interpreted as UTF8 and also for converting
177 invalid UTF8 characters (e.g. if there is a filename in iso8859_1)
178 to strings with octal values.
179
180 Since some platforms (such as Win32) use Unicode in the filenames,
181 and others (such as Unix) use multibyte encodings, this class should only
182 be used directly if wxMBFILES is defined to 1. A convenience macro,
183 wxFNCONV, is defined to wxConvFileName-cWX2MB in this case. You could
184 use it like this:
185
186 @code
187 wxChar *name = wxT("rawfile.doc");
188 FILE *fil = fopen(wxFNCONV(name), "r");
189 @endcode
190
191 (although it would be better to use wxFopen(name, wxT("r")) in this case.)
192
193 @library{wxbase}
194 @category{conv}
195
196 @see @ref overview_mbconv "wxMBConv classes overview"
197 */
198 class wxMBConvFile : public wxMBConv
199 {
200 public:
201 /**
202 Converts from multibyte filename encoding to Unicode. Returns the size of the
203 destination buffer.
204 */
205 size_t MB2WC(wchar_t* buf, const char* psz, size_t n) const;
206
207 /**
208 Converts from Unicode to multibyte filename encoding. Returns the size of the
209 destination buffer.
210 */
211 size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
212 };
213
214
215
216 /**
217 @class wxMBConvUTF32
218 @wxheader{strconv.h}
219
220 This class is used to convert between multibyte encodings and UTF-32 Unicode
221 encoding (also known as UCS-4). Unlike UTF-8() encoding,
222 UTF-32 uses (double) words and not bytes and hence depends on the byte ordering:
223 big or little endian. Hence this class is provided in two versions:
224 wxMBConvUTF32LE and wxMBConvUTF32BE and wxMBConvUTF32 itself is just a typedef
225 for one of them (native for the given platform, e.g. LE under Windows and BE
226 under Mac).
227
228 @library{wxbase}
229 @category{conv}
230
231 @see wxMBConvUTF8, wxMBConvUTF16, @ref overview_mbconv "wxMBConv classes overview"
232 */
233 class wxMBConvUTF32 : public wxMBConv
234 {
235 public:
236 /**
237 Converts from UTF-32 encoding to Unicode. Returns the size of the destination
238 buffer.
239 */
240 size_t MB2WC(wchar_t* buf, const char* psz, size_t n) const;
241
242 /**
243 Converts from Unicode to UTF-32 encoding. Returns the size of the destination
244 buffer.
245 */
246 size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
247 };
248
249
250
251 /**
252 @class wxMBConv
253 @wxheader{strconv.h}
254
255 This class is the base class of a hierarchy of classes capable of converting
256 text strings between multibyte (SBCS or DBCS) encodings and Unicode.
257
258 In the documentation for this and related classes please notice that
259 length of the string refers to the number of characters in the string
260 not counting the terminating @c NUL, if any. While the size of the string
261 is the total number of bytes in the string, including any trailing @c NUL.
262 Thus, length of wide character string @c L"foo" is 3 while its size can
263 be either 8 or 16 depending on whether @c wchar_t is 2 bytes (as
264 under Windows) or 4 (Unix).
265
266 @library{wxbase}
267 @category{conv}
268
269 @see wxCSConv, wxEncodingConverter, @ref overview_mbconv "wxMBConv classes overview"
270 */
271 class wxMBConv
272 {
273 public:
274 /**
275 Trivial default constructor.
276 */
277 wxMBConv();
278
279 /**
280 This pure virtual function is overridden in each of the derived classes to
281 return a new copy of the object it is called on. It is used for copying the
282 conversion objects while preserving their dynamic type.
283 */
284 virtual wxMBConv* Clone() const;
285
286 /**
287 This function has the same semantics as ToWChar()
288 except that it converts a wide string to multibyte one.
289 */
290 virtual size_t FromWChar(char* dst, size_t dstLen,
291 const wchar_t* src,
292 size_t srcLen = wxNO_LEN) const;
293
294 /**
295 This function returns 1 for most of the multibyte encodings in which the
296 string is terminated by a single @c NUL, 2 for UTF-16 and 4 for UTF-32 for
297 which the string is terminated with 2 and 4 @c NUL characters respectively.
298 The other cases are not currently supported and @c wxCONV_FAILED
299 (defined as -1) is returned for them.
300 */
301 size_t GetMBNulLen() const;
302
303 /**
304 Returns the maximal value which can be returned by
305 GetMBNulLen() for any conversion object. Currently
306 this value is 4.
307 This method can be used to allocate the buffer with enough space for the
308 trailing @c NUL characters for any encoding.
309 */
310 const size_t GetMaxMBNulLen();
311
312 /**
313 This function is deprecated, please use ToWChar() instead
314 Converts from a string @a in in multibyte encoding to Unicode putting up to
315 @a outLen characters into the buffer @e out.
316 If @a out is @NULL, only the length of the string which would result from
317 the conversion is calculated and returned. Note that this is the length and not
318 size, i.e. the returned value does not include the trailing @c NUL. But
319 when the function is called with a non-@NULL @a out buffer, the @a outLen
320 parameter should be one more to allow to properly @c NUL-terminate the string.
321
322 @param out
323 The output buffer, may be @NULL if the caller is only
324 interested in the length of the resulting string
325 @param in
326 The NUL-terminated input string, cannot be @NULL
327 @param outLen
328 The length of the output buffer but including
329 NUL, ignored if out is @NULL
330
331 @return The length of the converted string excluding the trailing NUL.
332 */
333 virtual size_t MB2WC(wchar_t* out, const char* in,
334 size_t outLen) const;
335
336 /**
337 The most general function for converting a multibyte string to a wide string.
338 The main case is when @a dst is not @NULL and @a srcLen is not
339 @c wxNO_LEN (which is defined as @c (size_t)-1): then
340 the function converts exactly @a srcLen bytes starting at @a src into
341 wide string which it output to @e dst. If the length of the resulting wide
342 string is greater than @e dstLen, an error is returned. Note that if
343 @a srcLen bytes don't include @c NUL characters, the resulting wide string is
344 not @c NUL-terminated neither.
345 If @a srcLen is @c wxNO_LEN, the function supposes that the string is
346 properly (i.e. as necessary for the encoding handled by this conversion)
347 @c NUL-terminated and converts the entire string, including any trailing @c NUL
348 bytes. In this case the wide string is also @c NUL-terminated.
349 Finally, if @a dst is @NULL, the function returns the length of the needed
350 buffer.
351 */
352 virtual size_t ToWChar(wchar_t* dst, size_t dstLen,
353 const char* src,
354 size_t srcLen = wxNO_LEN) const;
355
356 /**
357 This function is deprecated, please use FromWChar() instead
358 Converts from Unicode to multibyte encoding. The semantics of this function
359 (including the return value meaning) is the same as for
360 wxMBConv::MB2WC.
361 Notice that when the function is called with a non-@NULL buffer, the
362 @a n parameter should be the size of the buffer and so it should take
363 into account the trailing @c NUL, which might take two or four bytes for some
364 encodings (UTF-16 and UTF-32) and not one.
365 */
366 virtual size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
367
368 //@{
369 /**
370 Converts from multibyte encoding to Unicode by calling
371 wxMBConv::MB2WC, allocating a temporary wxWCharBuffer to hold
372 the result.
373 The first overload takes a @c NUL-terminated input string. The second one takes
374 a
375 string of exactly the specified length and the string may include or not the
376 trailing @c NUL character(s). If the string is not @c NUL-terminated, a
377 temporary
378 @c NUL-terminated copy of it suitable for passing to wxMBConv::MB2WC
379 is made, so it is more efficient to ensure that the string is does have the
380 appropriate number of @c NUL bytes (which is usually 1 but may be 2 or 4
381 for UTF-16 or UTF-32, see wxMBConv::GetMBNulLen),
382 especially for long strings.
383 If @a outLen is not-@NULL, it receives the length of the converted
384 string.
385 */
386 const wxWCharBuffer cMB2WC(const char* in) const;
387 const wxWCharBuffer cMB2WC(const char* in,
388 size_t inLen,
389 size_t outLen) const;
390 //@}
391
392 //@{
393 /**
394 Converts from multibyte encoding to the current wxChar type
395 (which depends on whether wxUSE_UNICODE is set to 1). If wxChar is char,
396 it returns the parameter unaltered. If wxChar is wchar_t, it returns the
397 result in a wxWCharBuffer. The macro wxMB2WXbuf is defined as the correct
398 return type (without const).
399 */
400 const char* cMB2WX(const char* psz) const;
401 const wxWCharBuffer cMB2WX(const char* psz) const;
402 //@}
403
404 //@{
405 /**
406 Converts from Unicode to multibyte encoding by calling WC2MB,
407 allocating a temporary wxCharBuffer to hold the result.
408 The second overload of this function allows to convert a string of the given
409 length @e inLen, whether it is @c NUL-terminated or not (for wide character
410 strings, unlike for the multibyte ones, a single @c NUL is always enough).
411 But notice that just as with @ref wxMBConv::mb2wc cMB2WC, it is more
412 efficient to pass an already terminated string to this function as otherwise a
413 copy is made internally.
414 If @a outLen is not-@NULL, it receives the length of the converted
415 string.
416 */
417 const wxCharBuffer cWC2MB(const wchar_t* in) const;
418 const wxCharBuffer cWC2MB(const wchar_t* in,
419 size_t inLen,
420 size_t outLen) const;
421 //@}
422
423 //@{
424 /**
425 Converts from Unicode to the current wxChar type. If wxChar is wchar_t,
426 it returns the parameter unaltered. If wxChar is char, it returns the
427 result in a wxCharBuffer. The macro wxWC2WXbuf is defined as the correct
428 return type (without const).
429 */
430 const wchar_t* cWC2WX(const wchar_t* psz) const;
431 const wxCharBuffer cWC2WX(const wchar_t* psz) const;
432 //@}
433
434 //@{
435 /**
436 Converts from the current wxChar type to multibyte encoding. If wxChar is char,
437 it returns the parameter unaltered. If wxChar is wchar_t, it returns the
438 result in a wxCharBuffer. The macro wxWX2MBbuf is defined as the correct
439 return type (without const).
440 */
441 const char* cWX2MB(const wxChar* psz) const;
442 const wxCharBuffer cWX2MB(const wxChar* psz) const;
443 //@}
444
445 //@{
446 /**
447 Converts from the current wxChar type to Unicode. If wxChar is wchar_t,
448 it returns the parameter unaltered. If wxChar is char, it returns the
449 result in a wxWCharBuffer. The macro wxWX2WCbuf is defined as the correct
450 return type (without const).
451 */
452 const wchar_t* cWX2WC(const wxChar* psz) const;
453 const wxWCharBuffer cWX2WC(const wxChar* psz) const;
454 //@}
455 };
456