interface/strconv.h

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        strconv.h
   3 // Purpose:     interface of wxMBConvUTF7
   4 // Author:      wxWidgets team
   5 // RCS-ID:      $Id$
   6 // Licence:     wxWindows license
   7 /////////////////////////////////////////////////////////////////////////////
   8
   9 /**
  10     @class wxMBConvUTF7
  11     @wxheader{strconv.h}
  12
  13     This class converts between the UTF-7 encoding and Unicode.
  14     It has one predefined instance, @b wxConvUTF7.
  15
  16     @library{wxbase}
  17     @category{conv}
  18
  19     @see wxMBConvUTF8, @ref overview_mbconv "wxMBConv classes overview"
  20 */
  21 class wxMBConvUTF7 : public wxMBConv
  22 {
  23 public:
  24     /**
  25         Converts from UTF-7 encoding to Unicode. Returns the size of the destination
  26         buffer.
  27     */
  28     size_t MB2WC(wchar_t* buf, const char* psz, size_t n) const;
  29
  30     /**
  31         Converts from Unicode to UTF-7 encoding. Returns the size of the destination
  32         buffer.
  33     */
  34     size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
  35 };
  36
  37
  38
  39 /**
  40     @class wxMBConvUTF8
  41     @wxheader{strconv.h}
  42
  43     This class converts between the UTF-8 encoding and Unicode.
  44     It has one predefined instance, @b wxConvUTF8.
  45
  46     @library{wxbase}
  47     @category{conv}
  48
  49     @see wxMBConvUTF7, @ref overview_mbconv "wxMBConv classes overview"
  50 */
  51 class wxMBConvUTF8 : public wxMBConv
  52 {
  53 public:
  54     /**
  55         Converts from UTF-8 encoding to Unicode. Returns the size of the destination
  56         buffer.
  57     */
  58     size_t MB2WC(wchar_t* buf, const char* psz, size_t n) const;
  59
  60     /**
  61         Converts from Unicode to UTF-8 encoding. Returns the size of the destination
  62         buffer.
  63     */
  64     size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
  65 };
  66
  67
  68
  69 /**
  70     @class wxMBConvUTF16
  71     @wxheader{strconv.h}
  72
  73     This class is used to convert between multibyte encodings and UTF-16 Unicode
  74     encoding (also known as UCS-2). Unlike UTF-8() encoding,
  75     UTF-16 uses words and not bytes and hence depends on the byte ordering:
  76     big or little endian. Hence this class is provided in two versions:
  77     wxMBConvUTF16LE and wxMBConvUTF16BE and wxMBConvUTF16 itself is just a typedef
  78     for one of them (native for the given platform, e.g. LE under Windows and BE
  79     under Mac).
  80
  81     @library{wxbase}
  82     @category{conv}
  83
  84     @see wxMBConvUTF8, wxMBConvUTF32, @ref overview_mbconv "wxMBConv classes overview"
  85 */
  86 class wxMBConvUTF16 : public wxMBConv
  87 {
  88 public:
  89     /**
  90         Converts from UTF-16 encoding to Unicode. Returns the size of the destination
  91         buffer.
  92     */
  93     size_t MB2WC(wchar_t* buf, const char* psz, size_t n) const;
  94
  95     /**
  96         Converts from Unicode to UTF-16 encoding. Returns the size of the destination
  97         buffer.
  98     */
  99     size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
 100 };
 101
 102
 103
 104 /**
 105     @class wxCSConv
 106     @wxheader{strconv.h}
 107
 108     This class converts between any character sets and Unicode.
 109     It has one predefined instance, @b wxConvLocal, for the
 110     default user character set.
 111
 112     @library{wxbase}
 113     @category{conv}
 114
 115     @see wxMBConv, wxEncodingConverter, @ref overview_mbconv "wxMBConv classes overview"
 116 */
 117 class wxCSConv : public wxMBConv
 118 {
 119 public:
 120     /**
 121         Constructor. You can specify the name of the character set you want to
 122         convert from/to. If the character set name is not recognized, ISO 8859-1
 123         is used as fall back.
 124     */
 125     wxCSConv(const wxChar* charset);
 126
 127     /**
 128         Constructor. You can specify an encoding constant for the
 129         character set you want to convert from/to or. If the encoding
 130         is not recognized, ISO 8859-1 is used as fall back.
 131     */
 132     wxCSConv(wxFontEncoding encoding);
 133
 134     /**
 135         Destructor frees any resources needed to perform the conversion.
 136     */
 137     ~wxCSConv();
 138
 139     /**
 140         Returns @true if the charset (or the encoding) given at constructor is really
 141         available to use. Returns @false if ISO 8859-1 will be used instead.
 142         Note this does not mean that a given string will be correctly converted.
 143         A malformed string may still make conversion functions return @c wxCONV_FAILED.
 144
 145         @since 2.8.2
 146     */
 147     bool IsOk() const;
 148
 149     /**
 150         Converts from the selected character set to Unicode. Returns length of string
 151         written to destination buffer.
 152     */
 153     size_t MB2WC(wchar_t* buf, const char* psz, size_t n) const;
 154
 155     /**
 156         Converts from Unicode to the selected character set. Returns length of string
 157         written to destination buffer.
 158     */
 159     size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
 160 };
 161
 162
 163
 164 /**
 165     @class wxMBConvFile
 166     @wxheader{strconv.h}
 167
 168     This class used to define the class instance
 169     @b wxConvFileName, but nowadays @b wxConvFileName is
 170     either of type wxConvLibc (on most platforms) or wxConvUTF8
 171     (on MacOS X). @b wxConvFileName converts filenames between
 172     filesystem multibyte encoding and Unicode. @b wxConvFileName
 173     can also be set to a something else at run-time which is used
 174     e.g. by wxGTK to use a class which checks the environment
 175     variable @b G_FILESYSTEM_ENCODING indicating that filenames
 176     should not be interpreted as UTF8 and also for converting
 177     invalid UTF8 characters (e.g. if there is a filename in iso8859_1)
 178     to strings with octal values.
 179
 180     Since some platforms (such as Win32) use Unicode in the filenames,
 181     and others (such as Unix) use multibyte encodings, this class should only
 182     be used directly if wxMBFILES is defined to 1. A convenience macro,
 183     wxFNCONV, is defined to wxConvFileName-cWX2MB in this case. You could
 184     use it like this:
 185
 186     @code
 187     wxChar *name = wxT("rawfile.doc");
 188     FILE *fil = fopen(wxFNCONV(name), "r");
 189     @endcode
 190
 191     (although it would be better to use wxFopen(name, wxT("r")) in this case.)
 192
 193     @library{wxbase}
 194     @category{conv}
 195
 196     @see @ref overview_mbconv "wxMBConv classes overview"
 197 */
 198 class wxMBConvFile : public wxMBConv
 199 {
 200 public:
 201     /**
 202         Converts from multibyte filename encoding to Unicode. Returns the size of the
 203         destination buffer.
 204     */
 205     size_t MB2WC(wchar_t* buf, const char* psz, size_t n) const;
 206
 207     /**
 208         Converts from Unicode to multibyte filename encoding. Returns the size of the
 209         destination buffer.
 210     */
 211     size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
 212 };
 213
 214
 215
 216 /**
 217     @class wxMBConvUTF32
 218     @wxheader{strconv.h}
 219
 220     This class is used to convert between multibyte encodings and UTF-32 Unicode
 221     encoding (also known as UCS-4). Unlike UTF-8() encoding,
 222     UTF-32 uses (double) words and not bytes and hence depends on the byte ordering:
 223     big or little endian. Hence this class is provided in two versions:
 224     wxMBConvUTF32LE and wxMBConvUTF32BE and wxMBConvUTF32 itself is just a typedef
 225     for one of them (native for the given platform, e.g. LE under Windows and BE
 226     under Mac).
 227
 228     @library{wxbase}
 229     @category{conv}
 230
 231     @see wxMBConvUTF8, wxMBConvUTF16, @ref overview_mbconv "wxMBConv classes overview"
 232 */
 233 class wxMBConvUTF32 : public wxMBConv
 234 {
 235 public:
 236     /**
 237         Converts from UTF-32 encoding to Unicode. Returns the size of the destination
 238         buffer.
 239     */
 240     size_t MB2WC(wchar_t* buf, const char* psz, size_t n) const;
 241
 242     /**
 243         Converts from Unicode to UTF-32 encoding. Returns the size of the destination
 244         buffer.
 245     */
 246     size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
 247 };
 248
 249
 250
 251 /**
 252     @class wxMBConv
 253     @wxheader{strconv.h}
 254
 255     This class is the base class of a hierarchy of classes capable of converting
 256     text strings between multibyte (SBCS or DBCS) encodings and Unicode.
 257
 258     In the documentation for this and related classes please notice that
 259     length of the string refers to the number of characters in the string
 260     not counting the terminating @c NUL, if any. While the size of the string
 261     is the total number of bytes in the string, including any trailing @c NUL.
 262     Thus, length of wide character string @c L"foo" is 3 while its size can
 263     be either 8 or 16 depending on whether @c wchar_t is 2 bytes (as
 264     under Windows) or 4 (Unix).
 265
 266     @library{wxbase}
 267     @category{conv}
 268
 269     @see wxCSConv, wxEncodingConverter, @ref overview_mbconv "wxMBConv classes overview"
 270 */
 271 class wxMBConv
 272 {
 273 public:
 274     /**
 275         Trivial default constructor.
 276     */
 277     wxMBConv();
 278
 279     /**
 280         This pure virtual function is overridden in each of the derived classes to
 281         return a new copy of the object it is called on. It is used for copying the
 282         conversion objects while preserving their dynamic type.
 283     */
 284     virtual wxMBConv* Clone() const;
 285
 286     /**
 287         This function has the same semantics as ToWChar()
 288         except that it converts a wide string to multibyte one.
 289     */
 290     virtual size_t FromWChar(char* dst, size_t dstLen,
 291                              const wchar_t* src,
 292                              size_t srcLen = wxNO_LEN) const;
 293
 294     /**
 295         This function returns 1 for most of the multibyte encodings in which the
 296         string is terminated by a single @c NUL, 2 for UTF-16 and 4 for UTF-32 for
 297         which the string is terminated with 2 and 4 @c NUL characters respectively.
 298         The other cases are not currently supported and @c wxCONV_FAILED
 299         (defined as -1) is returned for them.
 300     */
 301     size_t GetMBNulLen() const;
 302
 303     /**
 304         Returns the maximal value which can be returned by
 305         GetMBNulLen() for any conversion object. Currently
 306         this value is 4.
 307         This method can be used to allocate the buffer with enough space for the
 308         trailing @c NUL characters for any encoding.
 309     */
 310     const size_t GetMaxMBNulLen();
 311
 312     /**
 313         This function is deprecated, please use ToWChar() instead
 314         Converts from a string @a in in multibyte encoding to Unicode putting up to
 315         @a outLen characters into the buffer @e out.
 316         If @a out is @NULL, only the length of the string which would result from
 317         the conversion is calculated and returned. Note that this is the length and not
 318         size, i.e. the returned value does not include the trailing @c NUL. But
 319         when the function is called with a non-@NULL @a out buffer, the @a outLen
 320         parameter should be one more to allow to properly @c NUL-terminate the string.
 321
 322         @param out
 323             The output buffer, may be @NULL if the caller is only
 324             interested in the length of the resulting string
 325         @param in
 326             The NUL-terminated input string, cannot be @NULL
 327         @param outLen
 328             The length of the output buffer but including
 329             NUL, ignored if out is @NULL
 330
 331         @return The length of the converted string excluding the trailing NUL.
 332     */
 333     virtual size_t MB2WC(wchar_t* out, const char* in,
 334                          size_t outLen) const;
 335
 336     /**
 337         The most general function for converting a multibyte string to a wide string.
 338         The main case is when @a dst is not @NULL and @a srcLen is not
 339         @c wxNO_LEN (which is defined as @c (size_t)-1): then
 340         the function converts exactly @a srcLen bytes starting at @a src into
 341         wide string which it output to @e dst. If the length of the resulting wide
 342         string is greater than @e dstLen, an error is returned. Note that if
 343         @a srcLen bytes don't include @c NUL characters, the resulting wide string is
 344         not @c NUL-terminated neither.
 345         If @a srcLen is @c wxNO_LEN, the function supposes that the string is
 346         properly (i.e. as necessary for the encoding handled by this conversion)
 347         @c NUL-terminated and converts the entire string, including any trailing @c NUL
 348         bytes. In this case the wide string is also @c NUL-terminated.
 349         Finally, if @a dst is @NULL, the function returns the length of the needed
 350         buffer.
 351     */
 352     virtual size_t ToWChar(wchar_t* dst, size_t dstLen,
 353                            const char* src,
 354                            size_t srcLen = wxNO_LEN) const;
 355
 356     /**
 357         This function is deprecated, please use FromWChar() instead
 358         Converts from Unicode to multibyte encoding. The semantics of this function
 359         (including the return value meaning) is the same as for
 360         wxMBConv::MB2WC.
 361         Notice that when the function is called with a non-@NULL buffer, the
 362         @a n parameter should be the size of the buffer and so it should take
 363         into account the trailing @c NUL, which might take two or four bytes for some
 364         encodings (UTF-16 and UTF-32) and not one.
 365     */
 366     virtual size_t WC2MB(char* buf, const wchar_t* psz, size_t n) const;
 367
 368     //@{
 369     /**
 370         Converts from multibyte encoding to Unicode by calling
 371         wxMBConv::MB2WC, allocating a temporary wxWCharBuffer to hold
 372         the result.
 373         The first overload takes a @c NUL-terminated input string. The second one takes
 374         a
 375         string of exactly the specified length and the string may include or not the
 376         trailing @c NUL character(s). If the string is not @c NUL-terminated, a
 377         temporary
 378         @c NUL-terminated copy of it suitable for passing to wxMBConv::MB2WC
 379         is made, so it is more efficient to ensure that the string is does have the
 380         appropriate number of @c NUL bytes (which is usually 1 but may be 2 or 4
 381         for UTF-16 or UTF-32, see wxMBConv::GetMBNulLen),
 382         especially for long strings.
 383         If @a outLen is not-@NULL, it receives the length of the converted
 384         string.
 385     */
 386     const wxWCharBuffer cMB2WC(const char* in) const;
 387     const wxWCharBuffer cMB2WC(const char* in,
 388                                      size_t inLen,
 389                                      size_t outLen) const;
 390     //@}
 391
 392     //@{
 393     /**
 394         Converts from multibyte encoding to the current wxChar type
 395         (which depends on whether wxUSE_UNICODE is set to 1). If wxChar is char,
 396         it returns the parameter unaltered. If wxChar is wchar_t, it returns the
 397         result in a wxWCharBuffer. The macro wxMB2WXbuf is defined as the correct
 398         return type (without const).
 399     */
 400     const char* cMB2WX(const char* psz) const;
 401     const wxWCharBuffer cMB2WX(const char* psz) const;
 402     //@}
 403
 404     //@{
 405     /**
 406         Converts from Unicode to multibyte encoding by calling WC2MB,
 407         allocating a temporary wxCharBuffer to hold the result.
 408         The second overload of this function allows to convert a string of the given
 409         length @e inLen, whether it is @c NUL-terminated or not (for wide character
 410         strings, unlike for the multibyte ones, a single @c NUL is always enough).
 411         But notice that just as with @ref wxMBConv::mb2wc cMB2WC, it is more
 412         efficient to pass an already terminated string to this function as otherwise a
 413         copy is made internally.
 414         If @a outLen is not-@NULL, it receives the length of the converted
 415         string.
 416     */
 417     const wxCharBuffer cWC2MB(const wchar_t* in) const;
 418     const wxCharBuffer cWC2MB(const wchar_t* in,
 419                                     size_t inLen,
 420                                     size_t outLen) const;
 421     //@}
 422
 423     //@{
 424     /**
 425         Converts from Unicode to the current wxChar type. If wxChar is wchar_t,
 426         it returns the parameter unaltered. If wxChar is char, it returns the
 427         result in a wxCharBuffer. The macro wxWC2WXbuf is defined as the correct
 428         return type (without const).
 429     */
 430     const wchar_t* cWC2WX(const wchar_t* psz) const;
 431     const wxCharBuffer cWC2WX(const wchar_t* psz) const;
 432     //@}
 433
 434     //@{
 435     /**
 436         Converts from the current wxChar type to multibyte encoding. If wxChar is char,
 437         it returns the parameter unaltered. If wxChar is wchar_t, it returns the
 438         result in a wxCharBuffer. The macro wxWX2MBbuf is defined as the correct
 439         return type (without const).
 440     */
 441     const char* cWX2MB(const wxChar* psz) const;
 442     const wxCharBuffer cWX2MB(const wxChar* psz) const;
 443     //@}
 444
 445     //@{
 446     /**
 447         Converts from the current wxChar type to Unicode. If wxChar is wchar_t,
 448         it returns the parameter unaltered. If wxChar is char, it returns the
 449         result in a wxWCharBuffer. The macro wxWX2WCbuf is defined as the correct
 450         return type (without const).
 451     */
 452     const wchar_t* cWX2WC(const wxChar* psz) const;
 453     const wxWCharBuffer cWX2WC(const wxChar* psz) const;
 454     //@}
 455 };
 456