]>
git.saurik.com Git - wxWidgets.git/blob - interface/strconv.h
c2076bc14c205a6637c09b027aa6485a882784b1
1 /////////////////////////////////////////////////////////////////////////////
3 // Purpose: interface of wxMBConvUTF7
4 // Author: wxWidgets team
6 // Licence: wxWindows license
7 /////////////////////////////////////////////////////////////////////////////
13 This class converts between the UTF-7 encoding and Unicode.
14 It has one predefined instance, @b wxConvUTF7.
19 @see wxMBConvUTF8, @ref overview_mbconv "wxMBConv classes overview"
21 class wxMBConvUTF7
: public wxMBConv
25 Converts from UTF-7 encoding to Unicode. Returns the size of the destination
28 size_t MB2WC(wchar_t* buf
, const char* psz
, size_t n
) const;
31 Converts from Unicode to UTF-7 encoding. Returns the size of the destination
34 size_t WC2MB(char* buf
, const wchar_t* psz
, size_t n
) const;
43 This class converts between the UTF-8 encoding and Unicode.
44 It has one predefined instance, @b wxConvUTF8.
49 @see wxMBConvUTF7, @ref overview_mbconv "wxMBConv classes overview"
51 class wxMBConvUTF8
: public wxMBConv
55 Converts from UTF-8 encoding to Unicode. Returns the size of the destination
58 size_t MB2WC(wchar_t* buf
, const char* psz
, size_t n
) const;
61 Converts from Unicode to UTF-8 encoding. Returns the size of the destination
64 size_t WC2MB(char* buf
, const wchar_t* psz
, size_t n
) const;
73 This class is used to convert between multibyte encodings and UTF-16 Unicode
74 encoding (also known as UCS-2). Unlike UTF-8() encoding,
75 UTF-16 uses words and not bytes and hence depends on the byte ordering:
76 big or little endian. Hence this class is provided in two versions:
77 wxMBConvUTF16LE and wxMBConvUTF16BE and wxMBConvUTF16 itself is just a typedef
78 for one of them (native for the given platform, e.g. LE under Windows and BE
84 @see wxMBConvUTF8, wxMBConvUTF32, @ref overview_mbconv "wxMBConv classes overview"
86 class wxMBConvUTF16
: public wxMBConv
90 Converts from UTF-16 encoding to Unicode. Returns the size of the destination
93 size_t MB2WC(wchar_t* buf
, const char* psz
, size_t n
) const;
96 Converts from Unicode to UTF-16 encoding. Returns the size of the destination
99 size_t WC2MB(char* buf
, const wchar_t* psz
, size_t n
) const;
108 This class converts between any character sets and Unicode.
109 It has one predefined instance, @b wxConvLocal, for the
110 default user character set.
115 @see wxMBConv, wxEncodingConverter, @ref overview_mbconv "wxMBConv classes overview"
117 class wxCSConv
: public wxMBConv
121 Constructor. You can specify the name of the character set you want to
122 convert from/to. If the character set name is not recognized, ISO 8859-1
123 is used as fall back.
125 wxCSConv(const wxChar
* charset
);
128 Constructor. You can specify an encoding constant for the
129 character set you want to convert from/to or. If the encoding
130 is not recognized, ISO 8859-1 is used as fall back.
132 wxCSConv(wxFontEncoding encoding
);
135 Destructor frees any resources needed to perform the conversion.
140 Returns @true if the charset (or the encoding) given at constructor is really
141 available to use. Returns @false if ISO 8859-1 will be used instead.
142 Note this does not mean that a given string will be correctly converted.
143 A malformed string may still make conversion functions return @c wxCONV_FAILED.
150 Converts from the selected character set to Unicode. Returns length of string
151 written to destination buffer.
153 size_t MB2WC(wchar_t* buf
, const char* psz
, size_t n
) const;
156 Converts from Unicode to the selected character set. Returns length of string
157 written to destination buffer.
159 size_t WC2MB(char* buf
, const wchar_t* psz
, size_t n
) const;
168 This class used to define the class instance
169 @b wxConvFileName, but nowadays @b wxConvFileName is
170 either of type wxConvLibc (on most platforms) or wxConvUTF8
171 (on MacOS X). @b wxConvFileName converts filenames between
172 filesystem multibyte encoding and Unicode. @b wxConvFileName
173 can also be set to a something else at run-time which is used
174 e.g. by wxGTK to use a class which checks the environment
175 variable @b G_FILESYSTEM_ENCODING indicating that filenames
176 should not be interpreted as UTF8 and also for converting
177 invalid UTF8 characters (e.g. if there is a filename in iso8859_1)
178 to strings with octal values.
180 Since some platforms (such as Win32) use Unicode in the filenames,
181 and others (such as Unix) use multibyte encodings, this class should only
182 be used directly if wxMBFILES is defined to 1. A convenience macro,
183 wxFNCONV, is defined to wxConvFileName-cWX2MB in this case. You could
187 wxChar *name = wxT("rawfile.doc");
188 FILE *fil = fopen(wxFNCONV(name), "r");
191 (although it would be better to use wxFopen(name, wxT("r")) in this case.)
196 @see @ref overview_mbconv "wxMBConv classes overview"
198 class wxMBConvFile
: public wxMBConv
202 Converts from multibyte filename encoding to Unicode. Returns the size of the
205 size_t MB2WC(wchar_t* buf
, const char* psz
, size_t n
) const;
208 Converts from Unicode to multibyte filename encoding. Returns the size of the
211 size_t WC2MB(char* buf
, const wchar_t* psz
, size_t n
) const;
220 This class is used to convert between multibyte encodings and UTF-32 Unicode
221 encoding (also known as UCS-4). Unlike UTF-8() encoding,
222 UTF-32 uses (double) words and not bytes and hence depends on the byte ordering:
223 big or little endian. Hence this class is provided in two versions:
224 wxMBConvUTF32LE and wxMBConvUTF32BE and wxMBConvUTF32 itself is just a typedef
225 for one of them (native for the given platform, e.g. LE under Windows and BE
231 @see wxMBConvUTF8, wxMBConvUTF16, @ref overview_mbconv "wxMBConv classes overview"
233 class wxMBConvUTF32
: public wxMBConv
237 Converts from UTF-32 encoding to Unicode. Returns the size of the destination
240 size_t MB2WC(wchar_t* buf
, const char* psz
, size_t n
) const;
243 Converts from Unicode to UTF-32 encoding. Returns the size of the destination
246 size_t WC2MB(char* buf
, const wchar_t* psz
, size_t n
) const;
255 This class is the base class of a hierarchy of classes capable of converting
256 text strings between multibyte (SBCS or DBCS) encodings and Unicode.
258 In the documentation for this and related classes please notice that
259 length of the string refers to the number of characters in the string
260 not counting the terminating @c NUL, if any. While the size of the string
261 is the total number of bytes in the string, including any trailing @c NUL.
262 Thus, length of wide character string @c L"foo" is 3 while its size can
263 be either 8 or 16 depending on whether @c wchar_t is 2 bytes (as
264 under Windows) or 4 (Unix).
269 @see wxCSConv, wxEncodingConverter, @ref overview_mbconv "wxMBConv classes overview"
275 Trivial default constructor.
280 This pure virtual function is overridden in each of the derived classes to
281 return a new copy of the object it is called on. It is used for copying the
282 conversion objects while preserving their dynamic type.
284 virtual wxMBConv
* Clone() const;
287 This function has the same semantics as ToWChar()
288 except that it converts a wide string to multibyte one.
290 virtual size_t FromWChar(char* dst
, size_t dstLen
,
292 size_t srcLen
= wxNO_LEN
) const;
295 This function returns 1 for most of the multibyte encodings in which the
296 string is terminated by a single @c NUL, 2 for UTF-16 and 4 for UTF-32 for
297 which the string is terminated with 2 and 4 @c NUL characters respectively.
298 The other cases are not currently supported and @c wxCONV_FAILED
299 (defined as -1) is returned for them.
301 size_t GetMBNulLen() const;
304 Returns the maximal value which can be returned by
305 GetMBNulLen() for any conversion object. Currently
307 This method can be used to allocate the buffer with enough space for the
308 trailing @c NUL characters for any encoding.
310 const size_t GetMaxMBNulLen();
313 This function is deprecated, please use ToWChar() instead
314 Converts from a string @a in in multibyte encoding to Unicode putting up to
315 @a outLen characters into the buffer @e out.
316 If @a out is @NULL, only the length of the string which would result from
317 the conversion is calculated and returned. Note that this is the length and not
318 size, i.e. the returned value does not include the trailing @c NUL. But
319 when the function is called with a non-@NULL @a out buffer, the @a outLen
320 parameter should be one more to allow to properly @c NUL-terminate the string.
323 The output buffer, may be @NULL if the caller is only
324 interested in the length of the resulting string
326 The NUL-terminated input string, cannot be @NULL
328 The length of the output buffer but including
329 NUL, ignored if out is @NULL
331 @return The length of the converted string excluding the trailing NUL.
333 virtual size_t MB2WC(wchar_t* out
, const char* in
,
334 size_t outLen
) const;
337 The most general function for converting a multibyte string to a wide string.
338 The main case is when @a dst is not @NULL and @a srcLen is not
339 @c wxNO_LEN (which is defined as @c (size_t)-1): then
340 the function converts exactly @a srcLen bytes starting at @a src into
341 wide string which it output to @e dst. If the length of the resulting wide
342 string is greater than @e dstLen, an error is returned. Note that if
343 @a srcLen bytes don't include @c NUL characters, the resulting wide string is
344 not @c NUL-terminated neither.
345 If @a srcLen is @c wxNO_LEN, the function supposes that the string is
346 properly (i.e. as necessary for the encoding handled by this conversion)
347 @c NUL-terminated and converts the entire string, including any trailing @c NUL
348 bytes. In this case the wide string is also @c NUL-terminated.
349 Finally, if @a dst is @NULL, the function returns the length of the needed
352 virtual size_t ToWChar(wchar_t* dst
, size_t dstLen
,
354 size_t srcLen
= wxNO_LEN
) const;
357 This function is deprecated, please use FromWChar() instead
358 Converts from Unicode to multibyte encoding. The semantics of this function
359 (including the return value meaning) is the same as for
361 Notice that when the function is called with a non-@NULL buffer, the
362 @a n parameter should be the size of the buffer and so it should take
363 into account the trailing @c NUL, which might take two or four bytes for some
364 encodings (UTF-16 and UTF-32) and not one.
366 virtual size_t WC2MB(char* buf
, const wchar_t* psz
, size_t n
) const;
370 Converts from multibyte encoding to Unicode by calling
371 wxMBConv::MB2WC, allocating a temporary wxWCharBuffer to hold
373 The first overload takes a @c NUL-terminated input string. The second one takes
375 string of exactly the specified length and the string may include or not the
376 trailing @c NUL character(s). If the string is not @c NUL-terminated, a
378 @c NUL-terminated copy of it suitable for passing to wxMBConv::MB2WC
379 is made, so it is more efficient to ensure that the string is does have the
380 appropriate number of @c NUL bytes (which is usually 1 but may be 2 or 4
381 for UTF-16 or UTF-32, see wxMBConv::GetMBNulLen),
382 especially for long strings.
383 If @a outLen is not-@NULL, it receives the length of the converted
386 const wxWCharBuffer
cMB2WC(const char* in
) const;
387 const wxWCharBuffer
cMB2WC(const char* in
,
389 size_t outLen
) const;
394 Converts from multibyte encoding to the current wxChar type
395 (which depends on whether wxUSE_UNICODE is set to 1). If wxChar is char,
396 it returns the parameter unaltered. If wxChar is wchar_t, it returns the
397 result in a wxWCharBuffer. The macro wxMB2WXbuf is defined as the correct
398 return type (without const).
400 const char* cMB2WX(const char* psz
) const;
401 const wxWCharBuffer
cMB2WX(const char* psz
) const;
406 Converts from Unicode to multibyte encoding by calling WC2MB,
407 allocating a temporary wxCharBuffer to hold the result.
408 The second overload of this function allows to convert a string of the given
409 length @e inLen, whether it is @c NUL-terminated or not (for wide character
410 strings, unlike for the multibyte ones, a single @c NUL is always enough).
411 But notice that just as with @ref wxMBConv::mb2wc cMB2WC, it is more
412 efficient to pass an already terminated string to this function as otherwise a
413 copy is made internally.
414 If @a outLen is not-@NULL, it receives the length of the converted
417 const wxCharBuffer
cWC2MB(const wchar_t* in
) const;
418 const wxCharBuffer
cWC2MB(const wchar_t* in
,
420 size_t outLen
) const;
425 Converts from Unicode to the current wxChar type. If wxChar is wchar_t,
426 it returns the parameter unaltered. If wxChar is char, it returns the
427 result in a wxCharBuffer. The macro wxWC2WXbuf is defined as the correct
428 return type (without const).
430 const wchar_t* cWC2WX(const wchar_t* psz
) const;
431 const wxCharBuffer
cWC2WX(const wchar_t* psz
) const;
436 Converts from the current wxChar type to multibyte encoding. If wxChar is char,
437 it returns the parameter unaltered. If wxChar is wchar_t, it returns the
438 result in a wxCharBuffer. The macro wxWX2MBbuf is defined as the correct
439 return type (without const).
441 const char* cWX2MB(const wxChar
* psz
) const;
442 const wxCharBuffer
cWX2MB(const wxChar
* psz
) const;
447 Converts from the current wxChar type to Unicode. If wxChar is wchar_t,
448 it returns the parameter unaltered. If wxChar is char, it returns the
449 result in a wxWCharBuffer. The macro wxWX2WCbuf is defined as the correct
450 return type (without const).
452 const wchar_t* cWX2WC(const wxChar
* psz
) const;
453 const wxWCharBuffer
cWX2WC(const wxChar
* psz
) const;