interface/strconv.h

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        strconv.h
   3 // Purpose:     interface of wxMBConvUTF7
   4 // Author:      wxWidgets team
   5 // RCS-ID:      $Id$
   6 // Licence:     wxWindows license
   7 /////////////////////////////////////////////////////////////////////////////
   8
   9 /**
  10     @class wxMBConvUTF7
  11     @wxheader{strconv.h}
  12
  13     This class converts between the UTF-7 encoding and Unicode.
  14     It has one predefined instance, @b wxConvUTF7.
  15
  16     @b WARNING: this class is not implemented yet.
  17
  18     @library{wxbase}
  19     @category{FIXME}
  20
  21     @see wxMBConvUTF8, @ref overview_mbconv "wxMBConv classes overview"
  22 */
  23 class wxMBConvUTF7 : public wxMBConv
  24 {
  25 public:
  26     /**
  27         Converts from UTF-7 encoding to Unicode. Returns the size of the destination
  28         buffer.
  29     */
  30     size_t MB2WC(wchar_t* buf, const char* psz, size_t n) const;
  31
  32     /**
  33         Converts from Unicode to UTF-7 encoding. Returns the size of the destination
  34         buffer.
  35     */
  36     size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
  37 };
  38
  39
  40
  41 /**
  42     @class wxMBConvUTF8
  43     @wxheader{strconv.h}
  44
  45     This class converts between the UTF-8 encoding and Unicode.
  46     It has one predefined instance, @b wxConvUTF8.
  47
  48     @library{wxbase}
  49     @category{FIXME}
  50
  51     @see wxMBConvUTF7, @ref overview_mbconv "wxMBConv classes overview"
  52 */
  53 class wxMBConvUTF8 : public wxMBConv
  54 {
  55 public:
  56     /**
  57         Converts from UTF-8 encoding to Unicode. Returns the size of the destination
  58         buffer.
  59     */
  60     size_t MB2WC(wchar_t* buf, const char* psz, size_t n) const;
  61
  62     /**
  63         Converts from Unicode to UTF-8 encoding. Returns the size of the destination
  64         buffer.
  65     */
  66     size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
  67 };
  68
  69
  70
  71 /**
  72     @class wxMBConvUTF16
  73     @wxheader{strconv.h}
  74
  75     This class is used to convert between multibyte encodings and UTF-16 Unicode
  76     encoding (also known as UCS-2). Unlike UTF-8() encoding,
  77     UTF-16 uses words and not bytes and hence depends on the byte ordering:
  78     big or little endian. Hence this class is provided in two versions:
  79     wxMBConvUTF16LE and wxMBConvUTF16BE and wxMBConvUTF16 itself is just a typedef
  80     for one of them (native for the given platform, e.g. LE under Windows and BE
  81     under Mac).
  82
  83     @library{wxbase}
  84     @category{FIXME}
  85
  86     @see wxMBConvUTF8, wxMBConvUTF32, @ref overview_mbconv "wxMBConv classes overview"
  87 */
  88 class wxMBConvUTF16 : public wxMBConv
  89 {
  90 public:
  91     /**
  92         Converts from UTF-16 encoding to Unicode. Returns the size of the destination
  93         buffer.
  94     */
  95     size_t MB2WC(wchar_t* buf, const char* psz, size_t n) const;
  96
  97     /**
  98         Converts from Unicode to UTF-16 encoding. Returns the size of the destination
  99         buffer.
 100     */
 101     size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
 102 };
 103
 104
 105
 106 /**
 107     @class wxCSConv
 108     @wxheader{strconv.h}
 109
 110     This class converts between any character sets and Unicode.
 111     It has one predefined instance, @b wxConvLocal, for the
 112     default user character set.
 113
 114     @library{wxbase}
 115     @category{FIXME}
 116
 117     @see wxMBConv, wxEncodingConverter, @ref overview_mbconv "wxMBConv classes overview"
 118 */
 119 class wxCSConv : public wxMBConv
 120 {
 121 public:
 122     /**
 123         Constructor. You can specify the name of the character set you want to
 124         convert from/to. If the character set name is not recognized, ISO 8859-1
 125         is used as fall back.
 126     */
 127     wxCSConv(const wxChar* charset);
 128
 129     /**
 130         Constructor. You can specify an encoding constant for the
 131         character set you want to convert from/to or. If the encoding
 132         is not recognized, ISO 8859-1 is used as fall back.
 133     */
 134     wxCSConv(wxFontEncoding encoding);
 135
 136     /**
 137         Destructor frees any resources needed to perform the conversion.
 138     */
 139     ~wxCSConv();
 140
 141     /**
 142         Returns @true if the charset (or the encoding) given at constructor is really
 143         available to use. Returns @false if ISO 8859-1 will be used instead.
 144         Note this does not mean that a given string will be correctly converted.
 145         A malformed string may still make conversion functions return @c wxCONV_FAILED.
 146
 147         @since 2.8.2
 148     */
 149     bool IsOk() const;
 150
 151     /**
 152         Converts from the selected character set to Unicode. Returns length of string
 153         written to destination buffer.
 154     */
 155     size_t MB2WC(wchar_t* buf, const char* psz, size_t n) const;
 156
 157     /**
 158         Converts from Unicode to the selected character set. Returns length of string
 159         written to destination buffer.
 160     */
 161     size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
 162 };
 163
 164
 165
 166 /**
 167     @class wxMBConvFile
 168     @wxheader{strconv.h}
 169
 170     This class used to define the class instance
 171     @b wxConvFileName, but nowadays @b wxConvFileName is
 172     either of type wxConvLibc (on most platforms) or wxConvUTF8
 173     (on MacOS X). @b wxConvFileName converts filenames between
 174     filesystem multibyte encoding and Unicode. @b wxConvFileName
 175     can also be set to a something else at run-time which is used
 176     e.g. by wxGTK to use a class which checks the environment
 177     variable @b G_FILESYSTEM_ENCODING indicating that filenames
 178     should not be interpreted as UTF8 and also for converting
 179     invalid UTF8 characters (e.g. if there is a filename in iso8859_1)
 180     to strings with octal values.
 181
 182     Since some platforms (such as Win32) use Unicode in the filenames,
 183     and others (such as Unix) use multibyte encodings, this class should only
 184     be used directly if wxMBFILES is defined to 1. A convenience macro,
 185     wxFNCONV, is defined to wxConvFileName-cWX2MB in this case. You could
 186     use it like this:
 187
 188     @code
 189     wxChar *name = wxT("rawfile.doc");
 190     FILE *fil = fopen(wxFNCONV(name), "r");
 191     @endcode
 192
 193     (although it would be better to use wxFopen(name, wxT("r")) in this case.)
 194
 195     @library{wxbase}
 196     @category{FIXME}
 197
 198     @see @ref overview_mbconv "wxMBConv classes overview"
 199 */
 200 class wxMBConvFile : public wxMBConv
 201 {
 202 public:
 203     /**
 204         Converts from multibyte filename encoding to Unicode. Returns the size of the
 205         destination buffer.
 206     */
 207     size_t MB2WC(wchar_t* buf, const char* psz, size_t n) const;
 208
 209     /**
 210         Converts from Unicode to multibyte filename encoding. Returns the size of the
 211         destination buffer.
 212     */
 213     size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
 214 };
 215
 216
 217
 218 /**
 219     @class wxMBConvUTF32
 220     @wxheader{strconv.h}
 221
 222     This class is used to convert between multibyte encodings and UTF-32 Unicode
 223     encoding (also known as UCS-4). Unlike UTF-8() encoding,
 224     UTF-32 uses (double) words and not bytes and hence depends on the byte ordering:
 225     big or little endian. Hence this class is provided in two versions:
 226     wxMBConvUTF32LE and wxMBConvUTF32BE and wxMBConvUTF32 itself is just a typedef
 227     for one of them (native for the given platform, e.g. LE under Windows and BE
 228     under Mac).
 229
 230     @library{wxbase}
 231     @category{FIXME}
 232
 233     @see wxMBConvUTF8, wxMBConvUTF16, @ref overview_mbconv "wxMBConv classes overview"
 234 */
 235 class wxMBConvUTF32 : public wxMBConv
 236 {
 237 public:
 238     /**
 239         Converts from UTF-32 encoding to Unicode. Returns the size of the destination
 240         buffer.
 241     */
 242     size_t MB2WC(wchar_t* buf, const char* psz, size_t n) const;
 243
 244     /**
 245         Converts from Unicode to UTF-32 encoding. Returns the size of the destination
 246         buffer.
 247     */
 248     size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
 249 };
 250
 251
 252
 253 /**
 254     @class wxMBConv
 255     @wxheader{strconv.h}
 256
 257     This class is the base class of a hierarchy of classes capable of converting
 258     text strings between multibyte (SBCS or DBCS) encodings and Unicode.
 259
 260     In the documentation for this and related classes please notice that
 261     length of the string refers to the number of characters in the string
 262     not counting the terminating @c NUL, if any. While the size of the string
 263     is the total number of bytes in the string, including any trailing @c NUL.
 264     Thus, length of wide character string @c L"foo" is 3 while its size can
 265     be either 8 or 16 depending on whether @c wchar_t is 2 bytes (as
 266     under Windows) or 4 (Unix).
 267
 268     @library{wxbase}
 269     @category{FIXME}
 270
 271     @see wxCSConv, wxEncodingConverter, @ref overview_mbconv "wxMBConv classes overview"
 272 */
 273 class wxMBConv
 274 {
 275 public:
 276     /**
 277         Trivial default constructor.
 278     */
 279     wxMBConv();
 280
 281     /**
 282         This pure virtual function is overridden in each of the derived classes to
 283         return a new copy of the object it is called on. It is used for copying the
 284         conversion objects while preserving their dynamic type.
 285     */
 286     virtual wxMBConv* Clone() const;
 287
 288     /**
 289         This function has the same semantics as ToWChar()
 290         except that it converts a wide string to multibyte one.
 291     */
 292     virtual size_t FromWChar(char* dst, size_t dstLen,
 293                              const wchar_t* src,
 294                              size_t srcLen = wxNO_LEN) const;
 295
 296     /**
 297         This function returns 1 for most of the multibyte encodings in which the
 298         string is terminated by a single @c NUL, 2 for UTF-16 and 4 for UTF-32 for
 299         which the string is terminated with 2 and 4 @c NUL characters respectively.
 300         The other cases are not currently supported and @c wxCONV_FAILED
 301         (defined as -1) is returned for them.
 302     */
 303     size_t GetMBNulLen() const;
 304
 305     /**
 306         Returns the maximal value which can be returned by
 307         GetMBNulLen() for any conversion object. Currently
 308         this value is 4.
 309         This method can be used to allocate the buffer with enough space for the
 310         trailing @c NUL characters for any encoding.
 311     */
 312     const size_t GetMaxMBNulLen();
 313
 314     /**
 315         This function is deprecated, please use ToWChar() instead
 316         Converts from a string @a in in multibyte encoding to Unicode putting up to
 317         @a outLen characters into the buffer @e out.
 318         If @a out is @NULL, only the length of the string which would result from
 319         the conversion is calculated and returned. Note that this is the length and not
 320         size, i.e. the returned value does not include the trailing @c NUL. But
 321         when the function is called with a non-@NULL @a out buffer, the @a outLen
 322         parameter should be one more to allow to properly @c NUL-terminate the string.
 323
 324         @param out
 325             The output buffer, may be @NULL if the caller is only
 326             interested in the length of the resulting string
 327         @param in
 328             The NUL-terminated input string, cannot be @NULL
 329         @param outLen
 330             The length of the output buffer but including
 331             NUL, ignored if out is @NULL
 332
 333         @returns The length of the converted string excluding the trailing NUL.
 334     */
 335     virtual size_t MB2WC(wchar_t* out, const char* in,
 336                          size_t outLen) const;
 337
 338     /**
 339         The most general function for converting a multibyte string to a wide string.
 340         The main case is when @a dst is not @NULL and @a srcLen is not
 341         @c wxNO_LEN (which is defined as @c (size_t)-1): then
 342         the function converts exactly @a srcLen bytes starting at @a src into
 343         wide string which it output to @e dst. If the length of the resulting wide
 344         string is greater than @e dstLen, an error is returned. Note that if
 345         @a srcLen bytes don't include @c NUL characters, the resulting wide string is
 346         not @c NUL-terminated neither.
 347         If @a srcLen is @c wxNO_LEN, the function supposes that the string is
 348         properly (i.e. as necessary for the encoding handled by this conversion)
 349         @c NUL-terminated and converts the entire string, including any trailing @c NUL
 350         bytes. In this case the wide string is also @c NUL-terminated.
 351         Finally, if @a dst is @NULL, the function returns the length of the needed
 352         buffer.
 353     */
 354     virtual size_t ToWChar(wchar_t* dst, size_t dstLen,
 355                            const char* src,
 356                            size_t srcLen = wxNO_LEN) const;
 357
 358     /**
 359         This function is deprecated, please use FromWChar() instead
 360         Converts from Unicode to multibyte encoding. The semantics of this function
 361         (including the return value meaning) is the same as for
 362         wxMBConv::MB2WC.
 363         Notice that when the function is called with a non-@NULL buffer, the
 364         @a n parameter should be the size of the buffer and so it should take
 365         into account the trailing @c NUL, which might take two or four bytes for some
 366         encodings (UTF-16 and UTF-32) and not one.
 367     */
 368     virtual size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
 369
 370     //@{
 371     /**
 372         Converts from multibyte encoding to Unicode by calling
 373         wxMBConv::MB2WC, allocating a temporary wxWCharBuffer to hold
 374         the result.
 375         The first overload takes a @c NUL-terminated input string. The second one takes
 376         a
 377         string of exactly the specified length and the string may include or not the
 378         trailing @c NUL character(s). If the string is not @c NUL-terminated, a
 379         temporary
 380         @c NUL-terminated copy of it suitable for passing to wxMBConv::MB2WC
 381         is made, so it is more efficient to ensure that the string is does have the
 382         appropriate number of @c NUL bytes (which is usually 1 but may be 2 or 4
 383         for UTF-16 or UTF-32, see wxMBConv::GetMBNulLen),
 384         especially for long strings.
 385         If @a outLen is not-@NULL, it receives the length of the converted
 386         string.
 387     */
 388     const wxWCharBuffer cMB2WC(const char* in) const;
 389     const const wxWCharBuffer cMB2WC(const char* in,
 390                                      size_t inLen,
 391                                      size_t outLen) const;
 392     //@}
 393
 394     //@{
 395     /**
 396         Converts from multibyte encoding to the current wxChar type
 397         (which depends on whether wxUSE_UNICODE is set to 1). If wxChar is char,
 398         it returns the parameter unaltered. If wxChar is wchar_t, it returns the
 399         result in a wxWCharBuffer. The macro wxMB2WXbuf is defined as the correct
 400         return type (without const).
 401     */
 402     const char* cMB2WX(const char* psz) const;
 403     const const wxWCharBuffer cMB2WX(const char* psz) const;
 404     //@}
 405
 406     //@{
 407     /**
 408         Converts from Unicode to multibyte encoding by calling WC2MB,
 409         allocating a temporary wxCharBuffer to hold the result.
 410         The second overload of this function allows to convert a string of the given
 411         length @e inLen, whether it is @c NUL-terminated or not (for wide character
 412         strings, unlike for the multibyte ones, a single @c NUL is always enough).
 413         But notice that just as with @ref wxMBConv::mb2wc cMB2WC, it is more
 414         efficient to pass an already terminated string to this function as otherwise a
 415         copy is made internally.
 416         If @a outLen is not-@NULL, it receives the length of the converted
 417         string.
 418     */
 419     const wxCharBuffer cWC2MB(const wchar_t* in) const;
 420     const const wxCharBuffer cWC2MB(const wchar_t* in,
 421                                     size_t inLen,
 422                                     size_t outLen) const;
 423     //@}
 424
 425     //@{
 426     /**
 427         Converts from Unicode to the current wxChar type. If wxChar is wchar_t,
 428         it returns the parameter unaltered. If wxChar is char, it returns the
 429         result in a wxCharBuffer. The macro wxWC2WXbuf is defined as the correct
 430         return type (without const).
 431     */
 432     const wchar_t* cWC2WX(const wchar_t* psz) const;
 433     const const wxCharBuffer cWC2WX(const wchar_t* psz) const;
 434     //@}
 435
 436     //@{
 437     /**
 438         Converts from the current wxChar type to multibyte encoding. If wxChar is char,
 439         it returns the parameter unaltered. If wxChar is wchar_t, it returns the
 440         result in a wxCharBuffer. The macro wxWX2MBbuf is defined as the correct
 441         return type (without const).
 442     */
 443     const char* cWX2MB(const wxChar* psz) const;
 444     const const wxCharBuffer cWX2MB(const wxChar* psz) const;
 445     //@}
 446
 447     //@{
 448     /**
 449         Converts from the current wxChar type to Unicode. If wxChar is wchar_t,
 450         it returns the parameter unaltered. If wxChar is char, it returns the
 451         result in a wxWCharBuffer. The macro wxWX2WCbuf is defined as the correct
 452         return type (without const).
 453     */
 454     const wchar_t* cWX2WC(const wxChar* psz) const;
 455     const const wxWCharBuffer cWX2WC(const wxChar* psz) const;
 456     //@}
 457 };
 458