interface/strconv.h

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        strconv.h
   3 // Purpose:     documentation for wxMBConvUTF7 class
   4 // Author:      wxWidgets team
   5 // RCS-ID:      $Id$
   6 // Licence:     wxWindows license
   7 /////////////////////////////////////////////////////////////////////////////
   8
   9 /**
  10     @class wxMBConvUTF7
  11     @wxheader{strconv.h}
  12
  13     This class converts between the UTF-7 encoding and Unicode.
  14     It has one predefined instance, @b wxConvUTF7.
  15
  16     @b WARNING: this class is not implemented yet.
  17
  18     @library{wxbase}
  19     @category{FIXME}
  20
  21     @seealso
  22     wxMBConvUTF8, @ref overview_mbconvclasses "wxMBConv classes overview"
  23 */
  24 class wxMBConvUTF7 : public wxMBConv
  25 {
  26 public:
  27     /**
  28         Converts from UTF-7 encoding to Unicode. Returns the size of the destination
  29         buffer.
  30     */
  31     size_t MB2WC(wchar_t* buf, const char* psz, size_t n) const;
  32
  33     /**
  34         Converts from Unicode to UTF-7 encoding. Returns the size of the destination
  35         buffer.
  36     */
  37     size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
  38 };
  39
  40
  41 /**
  42     @class wxMBConvUTF8
  43     @wxheader{strconv.h}
  44
  45     This class converts between the UTF-8 encoding and Unicode.
  46     It has one predefined instance, @b wxConvUTF8.
  47
  48     @library{wxbase}
  49     @category{FIXME}
  50
  51     @seealso
  52     wxMBConvUTF7, @ref overview_mbconvclasses "wxMBConv classes overview"
  53 */
  54 class wxMBConvUTF8 : public wxMBConv
  55 {
  56 public:
  57     /**
  58         Converts from UTF-8 encoding to Unicode. Returns the size of the destination
  59         buffer.
  60     */
  61     size_t MB2WC(wchar_t* buf, const char* psz, size_t n) const;
  62
  63     /**
  64         Converts from Unicode to UTF-8 encoding. Returns the size of the destination
  65         buffer.
  66     */
  67     size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
  68 };
  69
  70
  71 /**
  72     @class wxMBConvUTF16
  73     @wxheader{strconv.h}
  74
  75     This class is used to convert between multibyte encodings and UTF-16 Unicode
  76     encoding (also known as UCS-2). Unlike UTF-8 encoding,
  77     UTF-16 uses words and not bytes and hence depends on the byte ordering:
  78     big or little endian. Hence this class is provided in two versions:
  79     wxMBConvUTF16LE and wxMBConvUTF16BE and wxMBConvUTF16 itself is just a typedef
  80     for one of them (native for the given platform, e.g. LE under Windows and BE
  81     under Mac).
  82
  83     @library{wxbase}
  84     @category{FIXME}
  85
  86     @seealso
  87     wxMBConvUTF8, wxMBConvUTF32, @ref overview_mbconvclasses "wxMBConv classes
  88     overview"
  89 */
  90 class wxMBConvUTF16 : public wxMBConv
  91 {
  92 public:
  93     /**
  94         Converts from UTF-16 encoding to Unicode. Returns the size of the destination
  95         buffer.
  96     */
  97     size_t MB2WC(wchar_t* buf, const char* psz, size_t n) const;
  98
  99     /**
 100         Converts from Unicode to UTF-16 encoding. Returns the size of the destination
 101         buffer.
 102     */
 103     size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
 104 };
 105
 106
 107 /**
 108     @class wxCSConv
 109     @wxheader{strconv.h}
 110
 111     This class converts between any character sets and Unicode.
 112     It has one predefined instance, @b wxConvLocal, for the
 113     default user character set.
 114
 115     @library{wxbase}
 116     @category{FIXME}
 117
 118     @seealso
 119     wxMBConv, wxEncodingConverter, @ref overview_mbconvclasses "wxMBConv classes
 120     overview"
 121 */
 122 class wxCSConv : public wxMBConv
 123 {
 124 public:
 125     //@{
 126     /**
 127         Constructor. You may specify either the name of the character set you want to
 128         convert from/to or an encoding constant. If the character set name (or the
 129         encoding) is not recognized, ISO 8859-1 is used as fall back.
 130     */
 131     wxCSConv(const wxChar* charset);
 132     wxCSConv(wxFontEncoding encoding);
 133     //@}
 134
 135     /**
 136         Destructor frees any resources needed to perform the conversion.
 137     */
 138     ~wxCSConv();
 139
 140     /**
 141         Returns @true if the charset (or the encoding) given at constructor is really
 142         available to use. Returns @false if ISO 8859-1 will be used instead.
 143         Note this does not mean that a given string will be correctly converted.
 144         A malformed string may still make conversion functions return @c wxCONV_FAILED.
 145         This function is new since wxWidgets version 2.8.2
 146     */
 147     bool IsOk() const;
 148
 149     /**
 150         Converts from the selected character set to Unicode. Returns length of string
 151         written to destination buffer.
 152     */
 153     size_t MB2WC(wchar_t* buf, const char* psz, size_t n) const;
 154
 155     /**
 156         Converts from Unicode to the selected character set. Returns length of string
 157         written to destination buffer.
 158     */
 159     size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
 160 };
 161
 162
 163 /**
 164     @class wxMBConvFile
 165     @wxheader{strconv.h}
 166
 167     This class used to define the class instance
 168     @b wxConvFileName, but nowadays @b wxConvFileName is
 169     either of type wxConvLibc (on most platforms) or wxConvUTF8
 170     (on MacOS X). @b wxConvFileName converts filenames between
 171     filesystem multibyte encoding and Unicode. @b wxConvFileName
 172     can also be set to a something else at run-time which is used
 173     e.g. by wxGTK to use a class which checks the environment
 174     variable @b G_FILESYSTEM_ENCODING indicating that filenames
 175     should not be interpreted as UTF8 and also for converting
 176     invalid UTF8 characters (e.g. if there is a filename in iso8859_1)
 177     to strings with octal values.
 178
 179     Since some platforms (such as Win32) use Unicode in the filenames,
 180     and others (such as Unix) use multibyte encodings, this class should only
 181     be used directly if wxMBFILES is defined to 1. A convenience macro,
 182     wxFNCONV, is defined to wxConvFileName-cWX2MB in this case. You could
 183     use it like this:
 184
 185     @code
 186     wxChar *name = wxT("rawfile.doc");
 187     FILE *fil = fopen(wxFNCONV(name), "r");
 188     @endcode
 189
 190     (although it would be better to use wxFopen(name, wxT("r")) in this case.)
 191
 192     @library{wxbase}
 193     @category{FIXME}
 194
 195     @seealso
 196     @ref overview_mbconvclasses "wxMBConv classes overview"
 197 */
 198 class wxMBConvFile : public wxMBConv
 199 {
 200 public:
 201     /**
 202         Converts from multibyte filename encoding to Unicode. Returns the size of the
 203         destination buffer.
 204     */
 205     size_t MB2WC(wchar_t* buf, const char* psz, size_t n) const;
 206
 207     /**
 208         Converts from Unicode to multibyte filename encoding. Returns the size of the
 209         destination buffer.
 210     */
 211     size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
 212 };
 213
 214
 215 /**
 216     @class wxMBConvUTF32
 217     @wxheader{strconv.h}
 218
 219     This class is used to convert between multibyte encodings and UTF-32 Unicode
 220     encoding (also known as UCS-4). Unlike UTF-8 encoding,
 221     UTF-32 uses (double) words and not bytes and hence depends on the byte ordering:
 222     big or little endian. Hence this class is provided in two versions:
 223     wxMBConvUTF32LE and wxMBConvUTF32BE and wxMBConvUTF32 itself is just a typedef
 224     for one of them (native for the given platform, e.g. LE under Windows and BE
 225     under Mac).
 226
 227     @library{wxbase}
 228     @category{FIXME}
 229
 230     @seealso
 231     wxMBConvUTF8, wxMBConvUTF16, @ref overview_mbconvclasses "wxMBConv classes
 232     overview"
 233 */
 234 class wxMBConvUTF32 : public wxMBConv
 235 {
 236 public:
 237     /**
 238         Converts from UTF-32 encoding to Unicode. Returns the size of the destination
 239         buffer.
 240     */
 241     size_t MB2WC(wchar_t* buf, const char* psz, size_t n) const;
 242
 243     /**
 244         Converts from Unicode to UTF-32 encoding. Returns the size of the destination
 245         buffer.
 246     */
 247     size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
 248 };
 249
 250
 251 /**
 252     @class wxMBConv
 253     @wxheader{strconv.h}
 254
 255     This class is the base class of a hierarchy of classes capable of converting
 256     text strings between multibyte (SBCS or DBCS) encodings and Unicode.
 257
 258     In the documentation for this and related classes please notice that
 259     length of the string refers to the number of characters in the string
 260     not counting the terminating @c NUL, if any. While the size of the string
 261     is the total number of bytes in the string, including any trailing @c NUL.
 262     Thus, length of wide character string @c L"foo" is 3 while its size can
 263     be either 8 or 16 depending on whether @c wchar_t is 2 bytes (as
 264     under Windows) or 4 (Unix).
 265
 266     @library{wxbase}
 267     @category{FIXME}
 268
 269     @seealso
 270     wxCSConv, wxEncodingConverter, @ref overview_mbconvclasses "wxMBConv classes
 271     overview"
 272 */
 273 class wxMBConv
 274 {
 275 public:
 276     /**
 277         Trivial default constructor.
 278     */
 279     wxMBConv();
 280
 281     /**
 282         This pure virtual function is overridden in each of the derived classes to
 283         return a new copy of the object it is called on. It is used for copying the
 284         conversion objects while preserving their dynamic type.
 285     */
 286     virtual wxMBConv* Clone() const;
 287
 288     /**
 289         This function has the same semantics as ToWChar()
 290         except that it converts a wide string to multibyte one.
 291     */
 292     virtual size_t FromWChar(char* dst, size_t dstLen,
 293                              const wchar_t* src,
 294                              size_t srcLen = wxNO_LEN) const;
 295
 296     /**
 297         This function returns 1 for most of the multibyte encodings in which the
 298         string is terminated by a single @c NUL, 2 for UTF-16 and 4 for UTF-32 for
 299         which the string is terminated with 2 and 4 @c NUL characters respectively.
 300         The other cases are not currently supported and @c wxCONV_FAILED
 301         (defined as -1) is returned for them.
 302     */
 303     size_t GetMBNulLen() const;
 304
 305     /**
 306         Returns the maximal value which can be returned by
 307         GetMBNulLen() for any conversion object. Currently
 308         this value is 4.
 309         This method can be used to allocate the buffer with enough space for the
 310         trailing @c NUL characters for any encoding.
 311     */
 312     const size_t GetMaxMBNulLen();
 313
 314     /**
 315         This function is deprecated, please use ToWChar() instead
 316         Converts from a string @a in in multibyte encoding to Unicode putting up to
 317         @a outLen characters into the buffer @e out.
 318         If @a out is @NULL, only the length of the string which would result from
 319         the conversion is calculated and returned. Note that this is the length and not
 320         size, i.e. the returned value does not include the trailing @c NUL. But
 321         when the function is called with a non-@NULL @a out buffer, the @a outLen
 322         parameter should be one more to allow to properly @c NUL-terminate the string.
 323
 324         @param out
 325             The output buffer, may be @NULL if the caller is only
 326             interested in the length of the resulting string
 327         @param in
 328             The NUL-terminated input string, cannot be @NULL
 329         @param outLen
 330             The length of the output buffer but including
 331             NUL, ignored if out is @NULL
 332
 333         @returns The length of the converted string excluding the trailing NUL.
 334     */
 335     virtual size_t MB2WC(wchar_t* out, const char* in,
 336                          size_t outLen) const;
 337
 338     /**
 339         The most general function for converting a multibyte string to a wide string.
 340         The main case is when @a dst is not @NULL and @a srcLen is not
 341         @c wxNO_LEN (which is defined as @c (size_t)-1): then
 342         the function converts exactly @a srcLen bytes starting at @a src into
 343         wide string which it output to @e dst. If the length of the resulting wide
 344         string is greater than @e dstLen, an error is returned. Note that if
 345         @a srcLen bytes don't include @c NUL characters, the resulting wide string is
 346         not @c NUL-terminated neither.
 347         If @a srcLen is @c wxNO_LEN, the function supposes that the string is
 348         properly (i.e. as necessary for the encoding handled by this conversion)
 349         @c NUL-terminated and converts the entire string, including any trailing @c NUL
 350         bytes. In this case the wide string is also @c NUL-terminated.
 351         Finally, if @a dst is @NULL, the function returns the length of the needed
 352         buffer.
 353     */
 354     virtual size_t ToWChar(wchar_t* dst, size_t dstLen,
 355                            const char* src,
 356                            size_t srcLen = wxNO_LEN) const;
 357
 358     /**
 359         This function is deprecated, please use FromWChar() instead
 360         Converts from Unicode to multibyte encoding. The semantics of this function
 361         (including the return value meaning) is the same as for
 362         wxMBConv::MB2WC.
 363         Notice that when the function is called with a non-@NULL buffer, the
 364         @a n parameter should be the size of the buffer and so it should take
 365         into account the trailing @c NUL, which might take two or four bytes for some
 366         encodings (UTF-16 and UTF-32) and not one.
 367     */
 368     virtual size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
 369
 370     //@{
 371     /**
 372         Converts from multibyte encoding to Unicode by calling
 373         wxMBConv::MB2WC, allocating a temporary wxWCharBuffer to hold
 374         the result.
 375         The first overload takes a @c NUL-terminated input string. The second one takes
 376         a
 377         string of exactly the specified length and the string may include or not the
 378         trailing @c NUL character(s). If the string is not @c NUL-terminated, a
 379         temporary
 380         @c NUL-terminated copy of it suitable for passing to wxMBConv::MB2WC
 381         is made, so it is more efficient to ensure that the string is does have the
 382         appropriate number of @c NUL bytes (which is usually 1 but may be 2 or 4
 383         for UTF-16 or UTF-32, see wxMBConv::GetMBNulLen),
 384         especially for long strings.
 385         If @a outLen is not-@NULL, it receives the length of the converted
 386         string.
 387     */
 388     const wxWCharBuffer cMB2WC(const char* in) const;
 389     const const wxWCharBuffer cMB2WC(const char* in,
 390                                      size_t inLen,
 391                                      size_t outLen) const;
 392     //@}
 393
 394     //@{
 395     /**
 396         Converts from multibyte encoding to the current wxChar type
 397         (which depends on whether wxUSE_UNICODE is set to 1). If wxChar is char,
 398         it returns the parameter unaltered. If wxChar is wchar_t, it returns the
 399         result in a wxWCharBuffer. The macro wxMB2WXbuf is defined as the correct
 400         return type (without const).
 401     */
 402     const char* cMB2WX(const char* psz) const;
 403     const const wxWCharBuffer cMB2WX(const char* psz) const;
 404     //@}
 405
 406     //@{
 407     /**
 408         Converts from Unicode to multibyte encoding by calling WC2MB,
 409         allocating a temporary wxCharBuffer to hold the result.
 410         The second overload of this function allows to convert a string of the given
 411         length @e inLen, whether it is @c NUL-terminated or not (for wide character
 412         strings, unlike for the multibyte ones, a single @c NUL is always enough).
 413         But notice that just as with @ref wxMBConv::mb2wc cMB2WC, it is more
 414         efficient to pass an already terminated string to this function as otherwise a
 415         copy is made internally.
 416         If @a outLen is not-@NULL, it receives the length of the converted
 417         string.
 418     */
 419     const wxCharBuffer cWC2MB(const wchar_t* in) const;
 420     const const wxCharBuffer cWC2MB(const wchar_t* in,
 421                                     size_t inLen,
 422                                     size_t outLen) const;
 423     //@}
 424
 425     //@{
 426     /**
 427         Converts from Unicode to the current wxChar type. If wxChar is wchar_t,
 428         it returns the parameter unaltered. If wxChar is char, it returns the
 429         result in a wxCharBuffer. The macro wxWC2WXbuf is defined as the correct
 430         return type (without const).
 431     */
 432     const wchar_t* cWC2WX(const wchar_t* psz) const;
 433     const const wxCharBuffer cWC2WX(const wchar_t* psz) const;
 434     //@}
 435
 436     //@{
 437     /**
 438         Converts from the current wxChar type to multibyte encoding. If wxChar is char,
 439         it returns the parameter unaltered. If wxChar is wchar_t, it returns the
 440         result in a wxCharBuffer. The macro wxWX2MBbuf is defined as the correct
 441         return type (without const).
 442     */
 443     const char* cWX2MB(const wxChar* psz) const;
 444     const const wxCharBuffer cWX2MB(const wxChar* psz) const;
 445     //@}
 446
 447     //@{
 448     /**
 449         Converts from the current wxChar type to Unicode. If wxChar is wchar_t,
 450         it returns the parameter unaltered. If wxChar is char, it returns the
 451         result in a wxWCharBuffer. The macro wxWX2WCbuf is defined as the correct
 452         return type (without const).
 453     */
 454     const wchar_t* cWX2WC(const wxChar* psz) const;
 455     const const wxWCharBuffer cWX2WC(const wxChar* psz) const;
 456     //@}
 457 };