interface/strconv.h

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        strconv.h
   3 // Purpose:     interface of wxMBConvUTF7
   4 // Author:      wxWidgets team
   5 // RCS-ID:      $Id$
   6 // Licence:     wxWindows license
   7 /////////////////////////////////////////////////////////////////////////////
   8
   9 /**
  10     @class wxMBConvUTF7
  11     @wxheader{strconv.h}
  12
  13     This class converts between the UTF-7 encoding and Unicode.
  14     It has one predefined instance, @b wxConvUTF7.
  15
  16     @b WARNING: this class is not implemented yet.
  17
  18     @library{wxbase}
  19     @category{FIXME}
  20
  21     @see wxMBConvUTF8, @ref overview_mbconv "wxMBConv classes overview"
  22 */
  23 class wxMBConvUTF7 : public wxMBConv
  24 {
  25 public:
  26     /**
  27         Converts from UTF-7 encoding to Unicode. Returns the size of the destination
  28         buffer.
  29     */
  30     size_t MB2WC(wchar_t* buf, const char* psz, size_t n) const;
  31
  32     /**
  33         Converts from Unicode to UTF-7 encoding. Returns the size of the destination
  34         buffer.
  35     */
  36     size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
  37 };
  38
  39
  40
  41 /**
  42     @class wxMBConvUTF8
  43     @wxheader{strconv.h}
  44
  45     This class converts between the UTF-8 encoding and Unicode.
  46     It has one predefined instance, @b wxConvUTF8.
  47
  48     @library{wxbase}
  49     @category{FIXME}
  50
  51     @see wxMBConvUTF7, @ref overview_mbconv "wxMBConv classes overview"
  52 */
  53 class wxMBConvUTF8 : public wxMBConv
  54 {
  55 public:
  56     /**
  57         Converts from UTF-8 encoding to Unicode. Returns the size of the destination
  58         buffer.
  59     */
  60     size_t MB2WC(wchar_t* buf, const char* psz, size_t n) const;
  61
  62     /**
  63         Converts from Unicode to UTF-8 encoding. Returns the size of the destination
  64         buffer.
  65     */
  66     size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
  67 };
  68
  69
  70
  71 /**
  72     @class wxMBConvUTF16
  73     @wxheader{strconv.h}
  74
  75     This class is used to convert between multibyte encodings and UTF-16 Unicode
  76     encoding (also known as UCS-2). Unlike UTF-8() encoding,
  77     UTF-16 uses words and not bytes and hence depends on the byte ordering:
  78     big or little endian. Hence this class is provided in two versions:
  79     wxMBConvUTF16LE and wxMBConvUTF16BE and wxMBConvUTF16 itself is just a typedef
  80     for one of them (native for the given platform, e.g. LE under Windows and BE
  81     under Mac).
  82
  83     @library{wxbase}
  84     @category{FIXME}
  85
  86     @see wxMBConvUTF8, wxMBConvUTF32, @ref overview_mbconv "wxMBConv classes
  87     overview"
  88 */
  89 class wxMBConvUTF16 : public wxMBConv
  90 {
  91 public:
  92     /**
  93         Converts from UTF-16 encoding to Unicode. Returns the size of the destination
  94         buffer.
  95     */
  96     size_t MB2WC(wchar_t* buf, const char* psz, size_t n) const;
  97
  98     /**
  99         Converts from Unicode to UTF-16 encoding. Returns the size of the destination
 100         buffer.
 101     */
 102     size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
 103 };
 104
 105
 106
 107 /**
 108     @class wxCSConv
 109     @wxheader{strconv.h}
 110
 111     This class converts between any character sets and Unicode.
 112     It has one predefined instance, @b wxConvLocal, for the
 113     default user character set.
 114
 115     @library{wxbase}
 116     @category{FIXME}
 117
 118     @see wxMBConv, wxEncodingConverter, @ref overview_mbconv "wxMBConv
 119     classes overview"
 120 */
 121 class wxCSConv : public wxMBConv
 122 {
 123 public:
 124     /**
 125         Constructor. You can specify the name of the character set you want to
 126         convert from/to. If the character set name is not recognized, ISO 8859-1
 127         is used as fall back.
 128     */
 129     wxCSConv(const wxChar* charset);
 130
 131     /**
 132         Constructor. You can specify an encoding constant for the
 133         character set you want to convert from/to or. If the encoding
 134         is not recognized, ISO 8859-1 is used as fall back.
 135     */
 136     wxCSConv(wxFontEncoding encoding);
 137
 138     /**
 139         Destructor frees any resources needed to perform the conversion.
 140     */
 141     ~wxCSConv();
 142
 143     /**
 144         Returns @true if the charset (or the encoding) given at constructor is really
 145         available to use. Returns @false if ISO 8859-1 will be used instead.
 146         Note this does not mean that a given string will be correctly converted.
 147         A malformed string may still make conversion functions return @c wxCONV_FAILED.
 148
 149         @since 2.8.2
 150     */
 151     bool IsOk() const;
 152
 153     /**
 154         Converts from the selected character set to Unicode. Returns length of string
 155         written to destination buffer.
 156     */
 157     size_t MB2WC(wchar_t* buf, const char* psz, size_t n) const;
 158
 159     /**
 160         Converts from Unicode to the selected character set. Returns length of string
 161         written to destination buffer.
 162     */
 163     size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
 164 };
 165
 166
 167
 168 /**
 169     @class wxMBConvFile
 170     @wxheader{strconv.h}
 171
 172     This class used to define the class instance
 173     @b wxConvFileName, but nowadays @b wxConvFileName is
 174     either of type wxConvLibc (on most platforms) or wxConvUTF8
 175     (on MacOS X). @b wxConvFileName converts filenames between
 176     filesystem multibyte encoding and Unicode. @b wxConvFileName
 177     can also be set to a something else at run-time which is used
 178     e.g. by wxGTK to use a class which checks the environment
 179     variable @b G_FILESYSTEM_ENCODING indicating that filenames
 180     should not be interpreted as UTF8 and also for converting
 181     invalid UTF8 characters (e.g. if there is a filename in iso8859_1)
 182     to strings with octal values.
 183
 184     Since some platforms (such as Win32) use Unicode in the filenames,
 185     and others (such as Unix) use multibyte encodings, this class should only
 186     be used directly if wxMBFILES is defined to 1. A convenience macro,
 187     wxFNCONV, is defined to wxConvFileName-cWX2MB in this case. You could
 188     use it like this:
 189
 190     @code
 191     wxChar *name = wxT("rawfile.doc");
 192     FILE *fil = fopen(wxFNCONV(name), "r");
 193     @endcode
 194
 195     (although it would be better to use wxFopen(name, wxT("r")) in this case.)
 196
 197     @library{wxbase}
 198     @category{FIXME}
 199
 200     @see @ref overview_mbconv "wxMBConv classes overview"
 201 */
 202 class wxMBConvFile : public wxMBConv
 203 {
 204 public:
 205     /**
 206         Converts from multibyte filename encoding to Unicode. Returns the size of the
 207         destination buffer.
 208     */
 209     size_t MB2WC(wchar_t* buf, const char* psz, size_t n) const;
 210
 211     /**
 212         Converts from Unicode to multibyte filename encoding. Returns the size of the
 213         destination buffer.
 214     */
 215     size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
 216 };
 217
 218
 219
 220 /**
 221     @class wxMBConvUTF32
 222     @wxheader{strconv.h}
 223
 224     This class is used to convert between multibyte encodings and UTF-32 Unicode
 225     encoding (also known as UCS-4). Unlike UTF-8() encoding,
 226     UTF-32 uses (double) words and not bytes and hence depends on the byte ordering:
 227     big or little endian. Hence this class is provided in two versions:
 228     wxMBConvUTF32LE and wxMBConvUTF32BE and wxMBConvUTF32 itself is just a typedef
 229     for one of them (native for the given platform, e.g. LE under Windows and BE
 230     under Mac).
 231
 232     @library{wxbase}
 233     @category{FIXME}
 234
 235     @see wxMBConvUTF8, wxMBConvUTF16, @ref overview_mbconv "wxMBConv classes
 236     overview"
 237 */
 238 class wxMBConvUTF32 : public wxMBConv
 239 {
 240 public:
 241     /**
 242         Converts from UTF-32 encoding to Unicode. Returns the size of the destination
 243         buffer.
 244     */
 245     size_t MB2WC(wchar_t* buf, const char* psz, size_t n) const;
 246
 247     /**
 248         Converts from Unicode to UTF-32 encoding. Returns the size of the destination
 249         buffer.
 250     */
 251     size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
 252 };
 253
 254
 255
 256 /**
 257     @class wxMBConv
 258     @wxheader{strconv.h}
 259
 260     This class is the base class of a hierarchy of classes capable of converting
 261     text strings between multibyte (SBCS or DBCS) encodings and Unicode.
 262
 263     In the documentation for this and related classes please notice that
 264     length of the string refers to the number of characters in the string
 265     not counting the terminating @c NUL, if any. While the size of the string
 266     is the total number of bytes in the string, including any trailing @c NUL.
 267     Thus, length of wide character string @c L"foo" is 3 while its size can
 268     be either 8 or 16 depending on whether @c wchar_t is 2 bytes (as
 269     under Windows) or 4 (Unix).
 270
 271     @library{wxbase}
 272     @category{FIXME}
 273
 274     @see wxCSConv, wxEncodingConverter, @ref overview_mbconv "wxMBConv
 275     classes overview"
 276 */
 277 class wxMBConv
 278 {
 279 public:
 280     /**
 281         Trivial default constructor.
 282     */
 283     wxMBConv();
 284
 285     /**
 286         This pure virtual function is overridden in each of the derived classes to
 287         return a new copy of the object it is called on. It is used for copying the
 288         conversion objects while preserving their dynamic type.
 289     */
 290     virtual wxMBConv* Clone() const;
 291
 292     /**
 293         This function has the same semantics as ToWChar()
 294         except that it converts a wide string to multibyte one.
 295     */
 296     virtual size_t FromWChar(char* dst, size_t dstLen,
 297                              const wchar_t* src,
 298                              size_t srcLen = wxNO_LEN) const;
 299
 300     /**
 301         This function returns 1 for most of the multibyte encodings in which the
 302         string is terminated by a single @c NUL, 2 for UTF-16 and 4 for UTF-32 for
 303         which the string is terminated with 2 and 4 @c NUL characters respectively.
 304         The other cases are not currently supported and @c wxCONV_FAILED
 305         (defined as -1) is returned for them.
 306     */
 307     size_t GetMBNulLen() const;
 308
 309     /**
 310         Returns the maximal value which can be returned by
 311         GetMBNulLen() for any conversion object. Currently
 312         this value is 4.
 313         This method can be used to allocate the buffer with enough space for the
 314         trailing @c NUL characters for any encoding.
 315     */
 316     const size_t GetMaxMBNulLen();
 317
 318     /**
 319         This function is deprecated, please use ToWChar() instead
 320         Converts from a string @a in in multibyte encoding to Unicode putting up to
 321         @a outLen characters into the buffer @e out.
 322         If @a out is @NULL, only the length of the string which would result from
 323         the conversion is calculated and returned. Note that this is the length and not
 324         size, i.e. the returned value does not include the trailing @c NUL. But
 325         when the function is called with a non-@NULL @a out buffer, the @a outLen
 326         parameter should be one more to allow to properly @c NUL-terminate the string.
 327
 328         @param out
 329             The output buffer, may be @NULL if the caller is only
 330             interested in the length of the resulting string
 331         @param in
 332             The NUL-terminated input string, cannot be @NULL
 333         @param outLen
 334             The length of the output buffer but including
 335             NUL, ignored if out is @NULL
 336
 337         @returns The length of the converted string excluding the trailing NUL.
 338     */
 339     virtual size_t MB2WC(wchar_t* out, const char* in,
 340                          size_t outLen) const;
 341
 342     /**
 343         The most general function for converting a multibyte string to a wide string.
 344         The main case is when @a dst is not @NULL and @a srcLen is not
 345         @c wxNO_LEN (which is defined as @c (size_t)-1): then
 346         the function converts exactly @a srcLen bytes starting at @a src into
 347         wide string which it output to @e dst. If the length of the resulting wide
 348         string is greater than @e dstLen, an error is returned. Note that if
 349         @a srcLen bytes don't include @c NUL characters, the resulting wide string is
 350         not @c NUL-terminated neither.
 351         If @a srcLen is @c wxNO_LEN, the function supposes that the string is
 352         properly (i.e. as necessary for the encoding handled by this conversion)
 353         @c NUL-terminated and converts the entire string, including any trailing @c NUL
 354         bytes. In this case the wide string is also @c NUL-terminated.
 355         Finally, if @a dst is @NULL, the function returns the length of the needed
 356         buffer.
 357     */
 358     virtual size_t ToWChar(wchar_t* dst, size_t dstLen,
 359                            const char* src,
 360                            size_t srcLen = wxNO_LEN) const;
 361
 362     /**
 363         This function is deprecated, please use FromWChar() instead
 364         Converts from Unicode to multibyte encoding. The semantics of this function
 365         (including the return value meaning) is the same as for
 366         wxMBConv::MB2WC.
 367         Notice that when the function is called with a non-@NULL buffer, the
 368         @a n parameter should be the size of the buffer and so it should take
 369         into account the trailing @c NUL, which might take two or four bytes for some
 370         encodings (UTF-16 and UTF-32) and not one.
 371     */
 372     virtual size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
 373
 374     //@{
 375     /**
 376         Converts from multibyte encoding to Unicode by calling
 377         wxMBConv::MB2WC, allocating a temporary wxWCharBuffer to hold
 378         the result.
 379         The first overload takes a @c NUL-terminated input string. The second one takes
 380         a
 381         string of exactly the specified length and the string may include or not the
 382         trailing @c NUL character(s). If the string is not @c NUL-terminated, a
 383         temporary
 384         @c NUL-terminated copy of it suitable for passing to wxMBConv::MB2WC
 385         is made, so it is more efficient to ensure that the string is does have the
 386         appropriate number of @c NUL bytes (which is usually 1 but may be 2 or 4
 387         for UTF-16 or UTF-32, see wxMBConv::GetMBNulLen),
 388         especially for long strings.
 389         If @a outLen is not-@NULL, it receives the length of the converted
 390         string.
 391     */
 392     const wxWCharBuffer cMB2WC(const char* in) const;
 393     const const wxWCharBuffer cMB2WC(const char* in,
 394                                      size_t inLen,
 395                                      size_t outLen) const;
 396     //@}
 397
 398     //@{
 399     /**
 400         Converts from multibyte encoding to the current wxChar type
 401         (which depends on whether wxUSE_UNICODE is set to 1). If wxChar is char,
 402         it returns the parameter unaltered. If wxChar is wchar_t, it returns the
 403         result in a wxWCharBuffer. The macro wxMB2WXbuf is defined as the correct
 404         return type (without const).
 405     */
 406     const char* cMB2WX(const char* psz) const;
 407     const const wxWCharBuffer cMB2WX(const char* psz) const;
 408     //@}
 409
 410     //@{
 411     /**
 412         Converts from Unicode to multibyte encoding by calling WC2MB,
 413         allocating a temporary wxCharBuffer to hold the result.
 414         The second overload of this function allows to convert a string of the given
 415         length @e inLen, whether it is @c NUL-terminated or not (for wide character
 416         strings, unlike for the multibyte ones, a single @c NUL is always enough).
 417         But notice that just as with @ref wxMBConv::mb2wc cMB2WC, it is more
 418         efficient to pass an already terminated string to this function as otherwise a
 419         copy is made internally.
 420         If @a outLen is not-@NULL, it receives the length of the converted
 421         string.
 422     */
 423     const wxCharBuffer cWC2MB(const wchar_t* in) const;
 424     const const wxCharBuffer cWC2MB(const wchar_t* in,
 425                                     size_t inLen,
 426                                     size_t outLen) const;
 427     //@}
 428
 429     //@{
 430     /**
 431         Converts from Unicode to the current wxChar type. If wxChar is wchar_t,
 432         it returns the parameter unaltered. If wxChar is char, it returns the
 433         result in a wxCharBuffer. The macro wxWC2WXbuf is defined as the correct
 434         return type (without const).
 435     */
 436     const wchar_t* cWC2WX(const wchar_t* psz) const;
 437     const const wxCharBuffer cWC2WX(const wchar_t* psz) const;
 438     //@}
 439
 440     //@{
 441     /**
 442         Converts from the current wxChar type to multibyte encoding. If wxChar is char,
 443         it returns the parameter unaltered. If wxChar is wchar_t, it returns the
 444         result in a wxCharBuffer. The macro wxWX2MBbuf is defined as the correct
 445         return type (without const).
 446     */
 447     const char* cWX2MB(const wxChar* psz) const;
 448     const const wxCharBuffer cWX2MB(const wxChar* psz) const;
 449     //@}
 450
 451     //@{
 452     /**
 453         Converts from the current wxChar type to Unicode. If wxChar is wchar_t,
 454         it returns the parameter unaltered. If wxChar is char, it returns the
 455         result in a wxWCharBuffer. The macro wxWX2WCbuf is defined as the correct
 456         return type (without const).
 457     */
 458     const wchar_t* cWX2WC(const wxChar* psz) const;
 459     const const wxWCharBuffer cWX2WC(const wxChar* psz) const;
 460     //@}
 461 };
 462