]> git.saurik.com Git - wxWidgets.git/blob - interface/strconv.h
current locale in wxString means wxConvLibc
[wxWidgets.git] / interface / strconv.h
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: strconv.h
3 // Purpose: interface of wxMBConvUTF7
4 // Author: wxWidgets team
5 // RCS-ID: $Id$
6 // Licence: wxWindows license
7 /////////////////////////////////////////////////////////////////////////////
8
9 /**
10 @class wxMBConvUTF7
11 @wxheader{strconv.h}
12
13 This class converts between the UTF-7 encoding and Unicode.
14 It has one predefined instance, @b wxConvUTF7.
15
16 @b WARNING: this class is not implemented yet.
17
18 @library{wxbase}
19 @category{FIXME}
20
21 @see wxMBConvUTF8, @ref overview_mbconv "wxMBConv classes overview"
22 */
23 class wxMBConvUTF7 : public wxMBConv
24 {
25 public:
26 /**
27 Converts from UTF-7 encoding to Unicode. Returns the size of the destination
28 buffer.
29 */
30 size_t MB2WC(wchar_t* buf, const char* psz, size_t n) const;
31
32 /**
33 Converts from Unicode to UTF-7 encoding. Returns the size of the destination
34 buffer.
35 */
36 size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
37 };
38
39
40
41 /**
42 @class wxMBConvUTF8
43 @wxheader{strconv.h}
44
45 This class converts between the UTF-8 encoding and Unicode.
46 It has one predefined instance, @b wxConvUTF8.
47
48 @library{wxbase}
49 @category{FIXME}
50
51 @see wxMBConvUTF7, @ref overview_mbconv "wxMBConv classes overview"
52 */
53 class wxMBConvUTF8 : public wxMBConv
54 {
55 public:
56 /**
57 Converts from UTF-8 encoding to Unicode. Returns the size of the destination
58 buffer.
59 */
60 size_t MB2WC(wchar_t* buf, const char* psz, size_t n) const;
61
62 /**
63 Converts from Unicode to UTF-8 encoding. Returns the size of the destination
64 buffer.
65 */
66 size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
67 };
68
69
70
71 /**
72 @class wxMBConvUTF16
73 @wxheader{strconv.h}
74
75 This class is used to convert between multibyte encodings and UTF-16 Unicode
76 encoding (also known as UCS-2). Unlike UTF-8() encoding,
77 UTF-16 uses words and not bytes and hence depends on the byte ordering:
78 big or little endian. Hence this class is provided in two versions:
79 wxMBConvUTF16LE and wxMBConvUTF16BE and wxMBConvUTF16 itself is just a typedef
80 for one of them (native for the given platform, e.g. LE under Windows and BE
81 under Mac).
82
83 @library{wxbase}
84 @category{FIXME}
85
86 @see wxMBConvUTF8, wxMBConvUTF32, @ref overview_mbconv "wxMBConv classes overview"
87 */
88 class wxMBConvUTF16 : public wxMBConv
89 {
90 public:
91 /**
92 Converts from UTF-16 encoding to Unicode. Returns the size of the destination
93 buffer.
94 */
95 size_t MB2WC(wchar_t* buf, const char* psz, size_t n) const;
96
97 /**
98 Converts from Unicode to UTF-16 encoding. Returns the size of the destination
99 buffer.
100 */
101 size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
102 };
103
104
105
106 /**
107 @class wxCSConv
108 @wxheader{strconv.h}
109
110 This class converts between any character sets and Unicode.
111 It has one predefined instance, @b wxConvLocal, for the
112 default user character set.
113
114 @library{wxbase}
115 @category{FIXME}
116
117 @see wxMBConv, wxEncodingConverter, @ref overview_mbconv "wxMBConv classes overview"
118 */
119 class wxCSConv : public wxMBConv
120 {
121 public:
122 /**
123 Constructor. You can specify the name of the character set you want to
124 convert from/to. If the character set name is not recognized, ISO 8859-1
125 is used as fall back.
126 */
127 wxCSConv(const wxChar* charset);
128
129 /**
130 Constructor. You can specify an encoding constant for the
131 character set you want to convert from/to or. If the encoding
132 is not recognized, ISO 8859-1 is used as fall back.
133 */
134 wxCSConv(wxFontEncoding encoding);
135
136 /**
137 Destructor frees any resources needed to perform the conversion.
138 */
139 ~wxCSConv();
140
141 /**
142 Returns @true if the charset (or the encoding) given at constructor is really
143 available to use. Returns @false if ISO 8859-1 will be used instead.
144 Note this does not mean that a given string will be correctly converted.
145 A malformed string may still make conversion functions return @c wxCONV_FAILED.
146
147 @since 2.8.2
148 */
149 bool IsOk() const;
150
151 /**
152 Converts from the selected character set to Unicode. Returns length of string
153 written to destination buffer.
154 */
155 size_t MB2WC(wchar_t* buf, const char* psz, size_t n) const;
156
157 /**
158 Converts from Unicode to the selected character set. Returns length of string
159 written to destination buffer.
160 */
161 size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
162 };
163
164
165
166 /**
167 @class wxMBConvFile
168 @wxheader{strconv.h}
169
170 This class used to define the class instance
171 @b wxConvFileName, but nowadays @b wxConvFileName is
172 either of type wxConvLibc (on most platforms) or wxConvUTF8
173 (on MacOS X). @b wxConvFileName converts filenames between
174 filesystem multibyte encoding and Unicode. @b wxConvFileName
175 can also be set to a something else at run-time which is used
176 e.g. by wxGTK to use a class which checks the environment
177 variable @b G_FILESYSTEM_ENCODING indicating that filenames
178 should not be interpreted as UTF8 and also for converting
179 invalid UTF8 characters (e.g. if there is a filename in iso8859_1)
180 to strings with octal values.
181
182 Since some platforms (such as Win32) use Unicode in the filenames,
183 and others (such as Unix) use multibyte encodings, this class should only
184 be used directly if wxMBFILES is defined to 1. A convenience macro,
185 wxFNCONV, is defined to wxConvFileName-cWX2MB in this case. You could
186 use it like this:
187
188 @code
189 wxChar *name = wxT("rawfile.doc");
190 FILE *fil = fopen(wxFNCONV(name), "r");
191 @endcode
192
193 (although it would be better to use wxFopen(name, wxT("r")) in this case.)
194
195 @library{wxbase}
196 @category{FIXME}
197
198 @see @ref overview_mbconv "wxMBConv classes overview"
199 */
200 class wxMBConvFile : public wxMBConv
201 {
202 public:
203 /**
204 Converts from multibyte filename encoding to Unicode. Returns the size of the
205 destination buffer.
206 */
207 size_t MB2WC(wchar_t* buf, const char* psz, size_t n) const;
208
209 /**
210 Converts from Unicode to multibyte filename encoding. Returns the size of the
211 destination buffer.
212 */
213 size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
214 };
215
216
217
218 /**
219 @class wxMBConvUTF32
220 @wxheader{strconv.h}
221
222 This class is used to convert between multibyte encodings and UTF-32 Unicode
223 encoding (also known as UCS-4). Unlike UTF-8() encoding,
224 UTF-32 uses (double) words and not bytes and hence depends on the byte ordering:
225 big or little endian. Hence this class is provided in two versions:
226 wxMBConvUTF32LE and wxMBConvUTF32BE and wxMBConvUTF32 itself is just a typedef
227 for one of them (native for the given platform, e.g. LE under Windows and BE
228 under Mac).
229
230 @library{wxbase}
231 @category{FIXME}
232
233 @see wxMBConvUTF8, wxMBConvUTF16, @ref overview_mbconv "wxMBConv classes overview"
234 */
235 class wxMBConvUTF32 : public wxMBConv
236 {
237 public:
238 /**
239 Converts from UTF-32 encoding to Unicode. Returns the size of the destination
240 buffer.
241 */
242 size_t MB2WC(wchar_t* buf, const char* psz, size_t n) const;
243
244 /**
245 Converts from Unicode to UTF-32 encoding. Returns the size of the destination
246 buffer.
247 */
248 size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
249 };
250
251
252
253 /**
254 @class wxMBConv
255 @wxheader{strconv.h}
256
257 This class is the base class of a hierarchy of classes capable of converting
258 text strings between multibyte (SBCS or DBCS) encodings and Unicode.
259
260 In the documentation for this and related classes please notice that
261 length of the string refers to the number of characters in the string
262 not counting the terminating @c NUL, if any. While the size of the string
263 is the total number of bytes in the string, including any trailing @c NUL.
264 Thus, length of wide character string @c L"foo" is 3 while its size can
265 be either 8 or 16 depending on whether @c wchar_t is 2 bytes (as
266 under Windows) or 4 (Unix).
267
268 @library{wxbase}
269 @category{FIXME}
270
271 @see wxCSConv, wxEncodingConverter, @ref overview_mbconv "wxMBConv classes overview"
272 */
273 class wxMBConv
274 {
275 public:
276 /**
277 Trivial default constructor.
278 */
279 wxMBConv();
280
281 /**
282 This pure virtual function is overridden in each of the derived classes to
283 return a new copy of the object it is called on. It is used for copying the
284 conversion objects while preserving their dynamic type.
285 */
286 virtual wxMBConv* Clone() const;
287
288 /**
289 This function has the same semantics as ToWChar()
290 except that it converts a wide string to multibyte one.
291 */
292 virtual size_t FromWChar(char* dst, size_t dstLen,
293 const wchar_t* src,
294 size_t srcLen = wxNO_LEN) const;
295
296 /**
297 This function returns 1 for most of the multibyte encodings in which the
298 string is terminated by a single @c NUL, 2 for UTF-16 and 4 for UTF-32 for
299 which the string is terminated with 2 and 4 @c NUL characters respectively.
300 The other cases are not currently supported and @c wxCONV_FAILED
301 (defined as -1) is returned for them.
302 */
303 size_t GetMBNulLen() const;
304
305 /**
306 Returns the maximal value which can be returned by
307 GetMBNulLen() for any conversion object. Currently
308 this value is 4.
309 This method can be used to allocate the buffer with enough space for the
310 trailing @c NUL characters for any encoding.
311 */
312 const size_t GetMaxMBNulLen();
313
314 /**
315 This function is deprecated, please use ToWChar() instead
316 Converts from a string @a in in multibyte encoding to Unicode putting up to
317 @a outLen characters into the buffer @e out.
318 If @a out is @NULL, only the length of the string which would result from
319 the conversion is calculated and returned. Note that this is the length and not
320 size, i.e. the returned value does not include the trailing @c NUL. But
321 when the function is called with a non-@NULL @a out buffer, the @a outLen
322 parameter should be one more to allow to properly @c NUL-terminate the string.
323
324 @param out
325 The output buffer, may be @NULL if the caller is only
326 interested in the length of the resulting string
327 @param in
328 The NUL-terminated input string, cannot be @NULL
329 @param outLen
330 The length of the output buffer but including
331 NUL, ignored if out is @NULL
332
333 @returns The length of the converted string excluding the trailing NUL.
334 */
335 virtual size_t MB2WC(wchar_t* out, const char* in,
336 size_t outLen) const;
337
338 /**
339 The most general function for converting a multibyte string to a wide string.
340 The main case is when @a dst is not @NULL and @a srcLen is not
341 @c wxNO_LEN (which is defined as @c (size_t)-1): then
342 the function converts exactly @a srcLen bytes starting at @a src into
343 wide string which it output to @e dst. If the length of the resulting wide
344 string is greater than @e dstLen, an error is returned. Note that if
345 @a srcLen bytes don't include @c NUL characters, the resulting wide string is
346 not @c NUL-terminated neither.
347 If @a srcLen is @c wxNO_LEN, the function supposes that the string is
348 properly (i.e. as necessary for the encoding handled by this conversion)
349 @c NUL-terminated and converts the entire string, including any trailing @c NUL
350 bytes. In this case the wide string is also @c NUL-terminated.
351 Finally, if @a dst is @NULL, the function returns the length of the needed
352 buffer.
353 */
354 virtual size_t ToWChar(wchar_t* dst, size_t dstLen,
355 const char* src,
356 size_t srcLen = wxNO_LEN) const;
357
358 /**
359 This function is deprecated, please use FromWChar() instead
360 Converts from Unicode to multibyte encoding. The semantics of this function
361 (including the return value meaning) is the same as for
362 wxMBConv::MB2WC.
363 Notice that when the function is called with a non-@NULL buffer, the
364 @a n parameter should be the size of the buffer and so it should take
365 into account the trailing @c NUL, which might take two or four bytes for some
366 encodings (UTF-16 and UTF-32) and not one.
367 */
368 virtual size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
369
370 //@{
371 /**
372 Converts from multibyte encoding to Unicode by calling
373 wxMBConv::MB2WC, allocating a temporary wxWCharBuffer to hold
374 the result.
375 The first overload takes a @c NUL-terminated input string. The second one takes
376 a
377 string of exactly the specified length and the string may include or not the
378 trailing @c NUL character(s). If the string is not @c NUL-terminated, a
379 temporary
380 @c NUL-terminated copy of it suitable for passing to wxMBConv::MB2WC
381 is made, so it is more efficient to ensure that the string is does have the
382 appropriate number of @c NUL bytes (which is usually 1 but may be 2 or 4
383 for UTF-16 or UTF-32, see wxMBConv::GetMBNulLen),
384 especially for long strings.
385 If @a outLen is not-@NULL, it receives the length of the converted
386 string.
387 */
388 const wxWCharBuffer cMB2WC(const char* in) const;
389 const const wxWCharBuffer cMB2WC(const char* in,
390 size_t inLen,
391 size_t outLen) const;
392 //@}
393
394 //@{
395 /**
396 Converts from multibyte encoding to the current wxChar type
397 (which depends on whether wxUSE_UNICODE is set to 1). If wxChar is char,
398 it returns the parameter unaltered. If wxChar is wchar_t, it returns the
399 result in a wxWCharBuffer. The macro wxMB2WXbuf is defined as the correct
400 return type (without const).
401 */
402 const char* cMB2WX(const char* psz) const;
403 const const wxWCharBuffer cMB2WX(const char* psz) const;
404 //@}
405
406 //@{
407 /**
408 Converts from Unicode to multibyte encoding by calling WC2MB,
409 allocating a temporary wxCharBuffer to hold the result.
410 The second overload of this function allows to convert a string of the given
411 length @e inLen, whether it is @c NUL-terminated or not (for wide character
412 strings, unlike for the multibyte ones, a single @c NUL is always enough).
413 But notice that just as with @ref wxMBConv::mb2wc cMB2WC, it is more
414 efficient to pass an already terminated string to this function as otherwise a
415 copy is made internally.
416 If @a outLen is not-@NULL, it receives the length of the converted
417 string.
418 */
419 const wxCharBuffer cWC2MB(const wchar_t* in) const;
420 const const wxCharBuffer cWC2MB(const wchar_t* in,
421 size_t inLen,
422 size_t outLen) const;
423 //@}
424
425 //@{
426 /**
427 Converts from Unicode to the current wxChar type. If wxChar is wchar_t,
428 it returns the parameter unaltered. If wxChar is char, it returns the
429 result in a wxCharBuffer. The macro wxWC2WXbuf is defined as the correct
430 return type (without const).
431 */
432 const wchar_t* cWC2WX(const wchar_t* psz) const;
433 const const wxCharBuffer cWC2WX(const wchar_t* psz) const;
434 //@}
435
436 //@{
437 /**
438 Converts from the current wxChar type to multibyte encoding. If wxChar is char,
439 it returns the parameter unaltered. If wxChar is wchar_t, it returns the
440 result in a wxCharBuffer. The macro wxWX2MBbuf is defined as the correct
441 return type (without const).
442 */
443 const char* cWX2MB(const wxChar* psz) const;
444 const const wxCharBuffer cWX2MB(const wxChar* psz) const;
445 //@}
446
447 //@{
448 /**
449 Converts from the current wxChar type to Unicode. If wxChar is wchar_t,
450 it returns the parameter unaltered. If wxChar is char, it returns the
451 result in a wxWCharBuffer. The macro wxWX2WCbuf is defined as the correct
452 return type (without const).
453 */
454 const wchar_t* cWX2WC(const wxChar* psz) const;
455 const const wxWCharBuffer cWX2WC(const wxChar* psz) const;
456 //@}
457 };
458