interface/strconv.h

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        strconv.h
   3 // Purpose:     interface of wxMBConvUTF7
   4 // Author:      wxWidgets team
   5 // RCS-ID:      $Id$
   6 // Licence:     wxWindows license
   7 /////////////////////////////////////////////////////////////////////////////
   8
   9 /**
  10     @class wxMBConvUTF7
  11     @wxheader{strconv.h}
  12
  13     This class converts between the UTF-7 encoding and Unicode.
  14     It has one predefined instance, @b wxConvUTF7.
  15
  16     @b WARNING: this class is not implemented yet.
  17
  18     @library{wxbase}
  19     @category{FIXME}
  20
  21     @see wxMBConvUTF8, @ref overview_mbconvclasses "wxMBConv classes overview"
  22 */
  23 class wxMBConvUTF7 : public wxMBConv
  24 {
  25 public:
  26     /**
  27         Converts from UTF-7 encoding to Unicode. Returns the size of the destination
  28         buffer.
  29     */
  30     size_t MB2WC(wchar_t* buf, const char* psz, size_t n) const;
  31
  32     /**
  33         Converts from Unicode to UTF-7 encoding. Returns the size of the destination
  34         buffer.
  35     */
  36     size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
  37 };
  38
  39
  40
  41 /**
  42     @class wxMBConvUTF8
  43     @wxheader{strconv.h}
  44
  45     This class converts between the UTF-8 encoding and Unicode.
  46     It has one predefined instance, @b wxConvUTF8.
  47
  48     @library{wxbase}
  49     @category{FIXME}
  50
  51     @see wxMBConvUTF7, @ref overview_mbconvclasses "wxMBConv classes overview"
  52 */
  53 class wxMBConvUTF8 : public wxMBConv
  54 {
  55 public:
  56     /**
  57         Converts from UTF-8 encoding to Unicode. Returns the size of the destination
  58         buffer.
  59     */
  60     size_t MB2WC(wchar_t* buf, const char* psz, size_t n) const;
  61
  62     /**
  63         Converts from Unicode to UTF-8 encoding. Returns the size of the destination
  64         buffer.
  65     */
  66     size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
  67 };
  68
  69
  70
  71 /**
  72     @class wxMBConvUTF16
  73     @wxheader{strconv.h}
  74
  75     This class is used to convert between multibyte encodings and UTF-16 Unicode
  76     encoding (also known as UCS-2). Unlike UTF-8() encoding,
  77     UTF-16 uses words and not bytes and hence depends on the byte ordering:
  78     big or little endian. Hence this class is provided in two versions:
  79     wxMBConvUTF16LE and wxMBConvUTF16BE and wxMBConvUTF16 itself is just a typedef
  80     for one of them (native for the given platform, e.g. LE under Windows and BE
  81     under Mac).
  82
  83     @library{wxbase}
  84     @category{FIXME}
  85
  86     @see wxMBConvUTF8, wxMBConvUTF32, @ref overview_mbconvclasses "wxMBConv classes
  87     overview"
  88 */
  89 class wxMBConvUTF16 : public wxMBConv
  90 {
  91 public:
  92     /**
  93         Converts from UTF-16 encoding to Unicode. Returns the size of the destination
  94         buffer.
  95     */
  96     size_t MB2WC(wchar_t* buf, const char* psz, size_t n) const;
  97
  98     /**
  99         Converts from Unicode to UTF-16 encoding. Returns the size of the destination
 100         buffer.
 101     */
 102     size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
 103 };
 104
 105
 106
 107 /**
 108     @class wxCSConv
 109     @wxheader{strconv.h}
 110
 111     This class converts between any character sets and Unicode.
 112     It has one predefined instance, @b wxConvLocal, for the
 113     default user character set.
 114
 115     @library{wxbase}
 116     @category{FIXME}
 117
 118     @see wxMBConv, wxEncodingConverter, @ref overview_mbconvclasses "wxMBConv
 119     classes overview"
 120 */
 121 class wxCSConv : public wxMBConv
 122 {
 123 public:
 124     //@{
 125     /**
 126         Constructor. You may specify either the name of the character set you want to
 127         convert from/to or an encoding constant. If the character set name (or the
 128         encoding) is not recognized, ISO 8859-1 is used as fall back.
 129     */
 130     wxCSConv(const wxChar* charset);
 131     wxCSConv(wxFontEncoding encoding);
 132     //@}
 133
 134     /**
 135         Destructor frees any resources needed to perform the conversion.
 136     */
 137     ~wxCSConv();
 138
 139     /**
 140         Returns @true if the charset (or the encoding) given at constructor is really
 141         available to use. Returns @false if ISO 8859-1 will be used instead.
 142         Note this does not mean that a given string will be correctly converted.
 143         A malformed string may still make conversion functions return @c wxCONV_FAILED.
 144
 145         @since 2.8.2
 146     */
 147     bool IsOk() const;
 148
 149     /**
 150         Converts from the selected character set to Unicode. Returns length of string
 151         written to destination buffer.
 152     */
 153     size_t MB2WC(wchar_t* buf, const char* psz, size_t n) const;
 154
 155     /**
 156         Converts from Unicode to the selected character set. Returns length of string
 157         written to destination buffer.
 158     */
 159     size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
 160 };
 161
 162
 163
 164 /**
 165     @class wxMBConvFile
 166     @wxheader{strconv.h}
 167
 168     This class used to define the class instance
 169     @b wxConvFileName, but nowadays @b wxConvFileName is
 170     either of type wxConvLibc (on most platforms) or wxConvUTF8
 171     (on MacOS X). @b wxConvFileName converts filenames between
 172     filesystem multibyte encoding and Unicode. @b wxConvFileName
 173     can also be set to a something else at run-time which is used
 174     e.g. by wxGTK to use a class which checks the environment
 175     variable @b G_FILESYSTEM_ENCODING indicating that filenames
 176     should not be interpreted as UTF8 and also for converting
 177     invalid UTF8 characters (e.g. if there is a filename in iso8859_1)
 178     to strings with octal values.
 179
 180     Since some platforms (such as Win32) use Unicode in the filenames,
 181     and others (such as Unix) use multibyte encodings, this class should only
 182     be used directly if wxMBFILES is defined to 1. A convenience macro,
 183     wxFNCONV, is defined to wxConvFileName-cWX2MB in this case. You could
 184     use it like this:
 185
 186     @code
 187     wxChar *name = wxT("rawfile.doc");
 188     FILE *fil = fopen(wxFNCONV(name), "r");
 189     @endcode
 190
 191     (although it would be better to use wxFopen(name, wxT("r")) in this case.)
 192
 193     @library{wxbase}
 194     @category{FIXME}
 195
 196     @see @ref overview_mbconvclasses "wxMBConv classes overview"
 197 */
 198 class wxMBConvFile : public wxMBConv
 199 {
 200 public:
 201     /**
 202         Converts from multibyte filename encoding to Unicode. Returns the size of the
 203         destination buffer.
 204     */
 205     size_t MB2WC(wchar_t* buf, const char* psz, size_t n) const;
 206
 207     /**
 208         Converts from Unicode to multibyte filename encoding. Returns the size of the
 209         destination buffer.
 210     */
 211     size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
 212 };
 213
 214
 215
 216 /**
 217     @class wxMBConvUTF32
 218     @wxheader{strconv.h}
 219
 220     This class is used to convert between multibyte encodings and UTF-32 Unicode
 221     encoding (also known as UCS-4). Unlike UTF-8() encoding,
 222     UTF-32 uses (double) words and not bytes and hence depends on the byte ordering:
 223     big or little endian. Hence this class is provided in two versions:
 224     wxMBConvUTF32LE and wxMBConvUTF32BE and wxMBConvUTF32 itself is just a typedef
 225     for one of them (native for the given platform, e.g. LE under Windows and BE
 226     under Mac).
 227
 228     @library{wxbase}
 229     @category{FIXME}
 230
 231     @see wxMBConvUTF8, wxMBConvUTF16, @ref overview_mbconvclasses "wxMBConv classes
 232     overview"
 233 */
 234 class wxMBConvUTF32 : public wxMBConv
 235 {
 236 public:
 237     /**
 238         Converts from UTF-32 encoding to Unicode. Returns the size of the destination
 239         buffer.
 240     */
 241     size_t MB2WC(wchar_t* buf, const char* psz, size_t n) const;
 242
 243     /**
 244         Converts from Unicode to UTF-32 encoding. Returns the size of the destination
 245         buffer.
 246     */
 247     size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
 248 };
 249
 250
 251
 252 /**
 253     @class wxMBConv
 254     @wxheader{strconv.h}
 255
 256     This class is the base class of a hierarchy of classes capable of converting
 257     text strings between multibyte (SBCS or DBCS) encodings and Unicode.
 258
 259     In the documentation for this and related classes please notice that
 260     length of the string refers to the number of characters in the string
 261     not counting the terminating @c NUL, if any. While the size of the string
 262     is the total number of bytes in the string, including any trailing @c NUL.
 263     Thus, length of wide character string @c L"foo" is 3 while its size can
 264     be either 8 or 16 depending on whether @c wchar_t is 2 bytes (as
 265     under Windows) or 4 (Unix).
 266
 267     @library{wxbase}
 268     @category{FIXME}
 269
 270     @see wxCSConv, wxEncodingConverter, @ref overview_mbconvclasses "wxMBConv
 271     classes overview"
 272 */
 273 class wxMBConv
 274 {
 275 public:
 276     /**
 277         Trivial default constructor.
 278     */
 279     wxMBConv();
 280
 281     /**
 282         This pure virtual function is overridden in each of the derived classes to
 283         return a new copy of the object it is called on. It is used for copying the
 284         conversion objects while preserving their dynamic type.
 285     */
 286     virtual wxMBConv* Clone() const;
 287
 288     /**
 289         This function has the same semantics as ToWChar()
 290         except that it converts a wide string to multibyte one.
 291     */
 292     virtual size_t FromWChar(char* dst, size_t dstLen,
 293                              const wchar_t* src,
 294                              size_t srcLen = wxNO_LEN) const;
 295
 296     /**
 297         This function returns 1 for most of the multibyte encodings in which the
 298         string is terminated by a single @c NUL, 2 for UTF-16 and 4 for UTF-32 for
 299         which the string is terminated with 2 and 4 @c NUL characters respectively.
 300         The other cases are not currently supported and @c wxCONV_FAILED
 301         (defined as -1) is returned for them.
 302     */
 303     size_t GetMBNulLen() const;
 304
 305     /**
 306         Returns the maximal value which can be returned by
 307         GetMBNulLen() for any conversion object. Currently
 308         this value is 4.
 309         This method can be used to allocate the buffer with enough space for the
 310         trailing @c NUL characters for any encoding.
 311     */
 312     const size_t GetMaxMBNulLen();
 313
 314     /**
 315         This function is deprecated, please use ToWChar() instead
 316         Converts from a string @a in in multibyte encoding to Unicode putting up to
 317         @a outLen characters into the buffer @e out.
 318         If @a out is @NULL, only the length of the string which would result from
 319         the conversion is calculated and returned. Note that this is the length and not
 320         size, i.e. the returned value does not include the trailing @c NUL. But
 321         when the function is called with a non-@NULL @a out buffer, the @a outLen
 322         parameter should be one more to allow to properly @c NUL-terminate the string.
 323
 324         @param out
 325             The output buffer, may be @NULL if the caller is only
 326             interested in the length of the resulting string
 327         @param in
 328             The NUL-terminated input string, cannot be @NULL
 329         @param outLen
 330             The length of the output buffer but including
 331             NUL, ignored if out is @NULL
 332
 333         @returns The length of the converted string excluding the trailing NUL.
 334     */
 335     virtual size_t MB2WC(wchar_t* out, const char* in,
 336                          size_t outLen) const;
 337
 338     /**
 339         The most general function for converting a multibyte string to a wide string.
 340         The main case is when @a dst is not @NULL and @a srcLen is not
 341         @c wxNO_LEN (which is defined as @c (size_t)-1): then
 342         the function converts exactly @a srcLen bytes starting at @a src into
 343         wide string which it output to @e dst. If the length of the resulting wide
 344         string is greater than @e dstLen, an error is returned. Note that if
 345         @a srcLen bytes don't include @c NUL characters, the resulting wide string is
 346         not @c NUL-terminated neither.
 347         If @a srcLen is @c wxNO_LEN, the function supposes that the string is
 348         properly (i.e. as necessary for the encoding handled by this conversion)
 349         @c NUL-terminated and converts the entire string, including any trailing @c NUL
 350         bytes. In this case the wide string is also @c NUL-terminated.
 351         Finally, if @a dst is @NULL, the function returns the length of the needed
 352         buffer.
 353     */
 354     virtual size_t ToWChar(wchar_t* dst, size_t dstLen,
 355                            const char* src,
 356                            size_t srcLen = wxNO_LEN) const;
 357
 358     /**
 359         This function is deprecated, please use FromWChar() instead
 360         Converts from Unicode to multibyte encoding. The semantics of this function
 361         (including the return value meaning) is the same as for
 362         wxMBConv::MB2WC.
 363         Notice that when the function is called with a non-@NULL buffer, the
 364         @a n parameter should be the size of the buffer and so it should take
 365         into account the trailing @c NUL, which might take two or four bytes for some
 366         encodings (UTF-16 and UTF-32) and not one.
 367     */
 368     virtual size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
 369
 370     //@{
 371     /**
 372         Converts from multibyte encoding to Unicode by calling
 373         wxMBConv::MB2WC, allocating a temporary wxWCharBuffer to hold
 374         the result.
 375         The first overload takes a @c NUL-terminated input string. The second one takes
 376         a
 377         string of exactly the specified length and the string may include or not the
 378         trailing @c NUL character(s). If the string is not @c NUL-terminated, a
 379         temporary
 380         @c NUL-terminated copy of it suitable for passing to wxMBConv::MB2WC
 381         is made, so it is more efficient to ensure that the string is does have the
 382         appropriate number of @c NUL bytes (which is usually 1 but may be 2 or 4
 383         for UTF-16 or UTF-32, see wxMBConv::GetMBNulLen),
 384         especially for long strings.
 385         If @a outLen is not-@NULL, it receives the length of the converted
 386         string.
 387     */
 388     const wxWCharBuffer cMB2WC(const char* in) const;
 389     const const wxWCharBuffer cMB2WC(const char* in,
 390                                      size_t inLen,
 391                                      size_t outLen) const;
 392     //@}
 393
 394     //@{
 395     /**
 396         Converts from multibyte encoding to the current wxChar type
 397         (which depends on whether wxUSE_UNICODE is set to 1). If wxChar is char,
 398         it returns the parameter unaltered. If wxChar is wchar_t, it returns the
 399         result in a wxWCharBuffer. The macro wxMB2WXbuf is defined as the correct
 400         return type (without const).
 401     */
 402     const char* cMB2WX(const char* psz) const;
 403     const const wxWCharBuffer cMB2WX(const char* psz) const;
 404     //@}
 405
 406     //@{
 407     /**
 408         Converts from Unicode to multibyte encoding by calling WC2MB,
 409         allocating a temporary wxCharBuffer to hold the result.
 410         The second overload of this function allows to convert a string of the given
 411         length @e inLen, whether it is @c NUL-terminated or not (for wide character
 412         strings, unlike for the multibyte ones, a single @c NUL is always enough).
 413         But notice that just as with @ref wxMBConv::mb2wc cMB2WC, it is more
 414         efficient to pass an already terminated string to this function as otherwise a
 415         copy is made internally.
 416         If @a outLen is not-@NULL, it receives the length of the converted
 417         string.
 418     */
 419     const wxCharBuffer cWC2MB(const wchar_t* in) const;
 420     const const wxCharBuffer cWC2MB(const wchar_t* in,
 421                                     size_t inLen,
 422                                     size_t outLen) const;
 423     //@}
 424
 425     //@{
 426     /**
 427         Converts from Unicode to the current wxChar type. If wxChar is wchar_t,
 428         it returns the parameter unaltered. If wxChar is char, it returns the
 429         result in a wxCharBuffer. The macro wxWC2WXbuf is defined as the correct
 430         return type (without const).
 431     */
 432     const wchar_t* cWC2WX(const wchar_t* psz) const;
 433     const const wxCharBuffer cWC2WX(const wchar_t* psz) const;
 434     //@}
 435
 436     //@{
 437     /**
 438         Converts from the current wxChar type to multibyte encoding. If wxChar is char,
 439         it returns the parameter unaltered. If wxChar is wchar_t, it returns the
 440         result in a wxCharBuffer. The macro wxWX2MBbuf is defined as the correct
 441         return type (without const).
 442     */
 443     const char* cWX2MB(const wxChar* psz) const;
 444     const const wxCharBuffer cWX2MB(const wxChar* psz) const;
 445     //@}
 446
 447     //@{
 448     /**
 449         Converts from the current wxChar type to Unicode. If wxChar is wchar_t,
 450         it returns the parameter unaltered. If wxChar is char, it returns the
 451         result in a wxWCharBuffer. The macro wxWX2WCbuf is defined as the correct
 452         return type (without const).
 453     */
 454     const wchar_t* cWX2WC(const wxChar* psz) const;
 455     const const wxWCharBuffer cWX2WC(const wxChar* psz) const;
 456     //@}
 457 };
 458