interface/strconv.h

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        strconv.h
   3 // Purpose:     documentation for wxMBConvUTF7 class
   4 // Author:      wxWidgets team
   5 // RCS-ID:      $Id$
   6 // Licence:     wxWindows license
   7 /////////////////////////////////////////////////////////////////////////////
   8
   9 /**
  10     @class wxMBConvUTF7
  11     @wxheader{strconv.h}
  12
  13     This class converts between the UTF-7 encoding and Unicode.
  14     It has one predefined instance, @b wxConvUTF7.
  15
  16     @b WARNING: this class is not implemented yet.
  17
  18     @library{wxbase}
  19     @category{FIXME}
  20
  21     @seealso
  22     wxMBConvUTF8, @ref overview_mbconvclasses "wxMBConv classes overview"
  23 */
  24 class wxMBConvUTF7 : public wxMBConv
  25 {
  26 public:
  27     /**
  28         Converts from UTF-7 encoding to Unicode. Returns the size of the destination
  29         buffer.
  30     */
  31 #define size_t MB2WC(wchar_t* buf, const char* psz, size_t n)     /* implementation is private */
  32
  33     /**
  34         Converts from Unicode to UTF-7 encoding. Returns the size of the destination
  35         buffer.
  36     */
  37 #define size_t WC2MB(char* buf, const wchar_t* psz, size_t n)     /* implementation is private */
  38 };
  39
  40
  41 /**
  42     @class wxMBConvUTF8
  43     @wxheader{strconv.h}
  44
  45     This class converts between the UTF-8 encoding and Unicode.
  46     It has one predefined instance, @b wxConvUTF8.
  47
  48     @library{wxbase}
  49     @category{FIXME}
  50
  51     @seealso
  52     wxMBConvUTF7, @ref overview_mbconvclasses "wxMBConv classes overview"
  53 */
  54 class wxMBConvUTF8 : public wxMBConv
  55 {
  56 public:
  57     /**
  58         Converts from UTF-8 encoding to Unicode. Returns the size of the destination
  59         buffer.
  60     */
  61 #define size_t MB2WC(wchar_t* buf, const char* psz, size_t n)     /* implementation is private */
  62
  63     /**
  64         Converts from Unicode to UTF-8 encoding. Returns the size of the destination
  65         buffer.
  66     */
  67 #define size_t WC2MB(char* buf, const wchar_t* psz, size_t n)     /* implementation is private */
  68 };
  69
  70
  71 /**
  72     @class wxMBConvUTF16
  73     @wxheader{strconv.h}
  74
  75     This class is used to convert between multibyte encodings and UTF-16 Unicode
  76     encoding (also known as UCS-2). Unlike UTF-8 encoding,
  77     UTF-16 uses words and not bytes and hence depends on the byte ordering:
  78     big or little endian. Hence this class is provided in two versions:
  79     wxMBConvUTF16LE and wxMBConvUTF16BE and wxMBConvUTF16 itself is just a typedef
  80     for one of them (native for the given platform, e.g. LE under Windows and BE
  81     under Mac).
  82
  83     @library{wxbase}
  84     @category{FIXME}
  85
  86     @seealso
  87     wxMBConvUTF8, wxMBConvUTF32, @ref overview_mbconvclasses "wxMBConv classes
  88     overview"
  89 */
  90 class wxMBConvUTF16 : public wxMBConv
  91 {
  92 public:
  93     /**
  94         Converts from UTF-16 encoding to Unicode. Returns the size of the destination
  95         buffer.
  96     */
  97 #define size_t MB2WC(wchar_t* buf, const char* psz, size_t n)     /* implementation is private */
  98
  99     /**
 100         Converts from Unicode to UTF-16 encoding. Returns the size of the destination
 101         buffer.
 102     */
 103 #define size_t WC2MB(char* buf, const wchar_t* psz, size_t n)     /* implementation is private */
 104 };
 105
 106
 107 /**
 108     @class wxCSConv
 109     @wxheader{strconv.h}
 110
 111     This class converts between any character sets and Unicode.
 112     It has one predefined instance, @b wxConvLocal, for the
 113     default user character set.
 114
 115     @library{wxbase}
 116     @category{FIXME}
 117
 118     @seealso
 119     wxMBConv, wxEncodingConverter, @ref overview_mbconvclasses "wxMBConv classes
 120     overview"
 121 */
 122 class wxCSConv : public wxMBConv
 123 {
 124 public:
 125     //@{
 126     /**
 127         Constructor. You may specify either the name of the character set you want to
 128         convert from/to or an encoding constant. If the character set name (or the
 129         encoding) is not recognized, ISO 8859-1 is used as fall back.
 130     */
 131     wxCSConv(const wxChar* charset);
 132         wxCSConv(wxFontEncoding encoding);
 133     //@}
 134
 135     /**
 136         Destructor frees any resources needed to perform the conversion.
 137     */
 138     ~wxCSConv();
 139
 140     /**
 141         Returns @true if the charset (or the encoding) given at constructor is really
 142         available to use. Returns @false if ISO 8859-1 will be used instead.
 143
 144         Note this does not mean that a given string will be correctly converted.
 145         A malformed string may still make conversion functions return @c wxCONV_FAILED.
 146
 147         This function is new since wxWidgets version 2.8.2
 148     */
 149 #define bool IsOk()     /* implementation is private */
 150
 151     /**
 152         Converts from the selected character set to Unicode. Returns length of string
 153         written to destination buffer.
 154     */
 155 #define size_t MB2WC(wchar_t* buf, const char* psz, size_t n)     /* implementation is private */
 156
 157     /**
 158         Converts from Unicode to the selected character set. Returns length of string
 159         written to destination buffer.
 160     */
 161 #define size_t WC2MB(char* buf, const wchar_t* psz, size_t n)     /* implementation is private */
 162 };
 163
 164
 165 /**
 166     @class wxMBConvFile
 167     @wxheader{strconv.h}
 168
 169     This class used to define the class instance
 170     @b wxConvFileName, but nowadays @b wxConvFileName is
 171     either of type wxConvLibc (on most platforms) or wxConvUTF8
 172     (on MacOS X). @b wxConvFileName converts filenames between
 173     filesystem multibyte encoding and Unicode. @b wxConvFileName
 174     can also be set to a something else at run-time which is used
 175     e.g. by wxGTK to use a class which checks the environment
 176     variable @b G_FILESYSTEM_ENCODING indicating that filenames
 177     should not be interpreted as UTF8 and also for converting
 178     invalid UTF8 characters (e.g. if there is a filename in iso8859_1)
 179     to strings with octal values.
 180
 181     Since some platforms (such as Win32) use Unicode in the filenames,
 182     and others (such as Unix) use multibyte encodings, this class should only
 183     be used directly if wxMBFILES is defined to 1. A convenience macro,
 184     wxFNCONV, is defined to wxConvFileName-cWX2MB in this case. You could
 185     use it like this:
 186
 187     @code
 188     wxChar *name = wxT("rawfile.doc");
 189     FILE *fil = fopen(wxFNCONV(name), "r");
 190     @endcode
 191
 192     (although it would be better to use wxFopen(name, wxT("r")) in this case.)
 193
 194     @library{wxbase}
 195     @category{FIXME}
 196
 197     @seealso
 198     @ref overview_mbconvclasses "wxMBConv classes overview"
 199 */
 200 class wxMBConvFile : public wxMBConv
 201 {
 202 public:
 203     /**
 204         Converts from multibyte filename encoding to Unicode. Returns the size of the
 205         destination buffer.
 206     */
 207 #define size_t MB2WC(wchar_t* buf, const char* psz, size_t n)     /* implementation is private */
 208
 209     /**
 210         Converts from Unicode to multibyte filename encoding. Returns the size of the
 211         destination buffer.
 212     */
 213 #define size_t WC2MB(char* buf, const wchar_t* psz, size_t n)     /* implementation is private */
 214 };
 215
 216
 217 /**
 218     @class wxMBConvUTF32
 219     @wxheader{strconv.h}
 220
 221     This class is used to convert between multibyte encodings and UTF-32 Unicode
 222     encoding (also known as UCS-4). Unlike UTF-8 encoding,
 223     UTF-32 uses (double) words and not bytes and hence depends on the byte ordering:
 224     big or little endian. Hence this class is provided in two versions:
 225     wxMBConvUTF32LE and wxMBConvUTF32BE and wxMBConvUTF32 itself is just a typedef
 226     for one of them (native for the given platform, e.g. LE under Windows and BE
 227     under Mac).
 228
 229     @library{wxbase}
 230     @category{FIXME}
 231
 232     @seealso
 233     wxMBConvUTF8, wxMBConvUTF16, @ref overview_mbconvclasses "wxMBConv classes
 234     overview"
 235 */
 236 class wxMBConvUTF32 : public wxMBConv
 237 {
 238 public:
 239     /**
 240         Converts from UTF-32 encoding to Unicode. Returns the size of the destination
 241         buffer.
 242     */
 243 #define size_t MB2WC(wchar_t* buf, const char* psz, size_t n)     /* implementation is private */
 244
 245     /**
 246         Converts from Unicode to UTF-32 encoding. Returns the size of the destination
 247         buffer.
 248     */
 249 #define size_t WC2MB(char* buf, const wchar_t* psz, size_t n)     /* implementation is private */
 250 };
 251
 252
 253 /**
 254     @class wxMBConv
 255     @wxheader{strconv.h}
 256
 257     This class is the base class of a hierarchy of classes capable of converting
 258     text strings between multibyte (SBCS or DBCS) encodings and Unicode.
 259
 260     In the documentation for this and related classes please notice that
 261     length of the string refers to the number of characters in the string
 262     not counting the terminating @c NUL, if any. While the size of the string
 263     is the total number of bytes in the string, including any trailing @c NUL.
 264     Thus, length of wide character string @c L"foo" is 3 while its size can
 265     be either 8 or 16 depending on whether @c wchar_t is 2 bytes (as
 266     under Windows) or 4 (Unix).
 267
 268     @library{wxbase}
 269     @category{FIXME}
 270
 271     @seealso
 272     wxCSConv, wxEncodingConverter, @ref overview_mbconvclasses "wxMBConv classes
 273     overview"
 274 */
 275 class wxMBConv
 276 {
 277 public:
 278     /**
 279         Trivial default constructor.
 280     */
 281     wxMBConv();
 282
 283     /**
 284         This pure virtual function is overridden in each of the derived classes to
 285         return a new copy of the object it is called on. It is used for copying the
 286         conversion objects while preserving their dynamic type.
 287     */
 288     virtual wxMBConv * Clone();
 289
 290     /**
 291         This function has the same semantics as ToWChar()
 292         except that it converts a wide string to multibyte one.
 293     */
 294     virtual size_t FromWChar(char * dst, size_t dstLen,
 295                              const wchar_t * src,
 296                              size_t srcLen = wxNO_LEN);
 297
 298     /**
 299         This function returns 1 for most of the multibyte encodings in which the
 300         string is terminated by a single @c NUL, 2 for UTF-16 and 4 for UTF-32 for
 301         which the string is terminated with 2 and 4 @c NUL characters respectively.
 302         The other cases are not currently supported and @c wxCONV_FAILED
 303         (defined as -1) is returned for them.
 304     */
 305     size_t GetMBNulLen();
 306
 307     /**
 308         Returns the maximal value which can be returned by
 309         GetMBNulLen() for any conversion object. Currently
 310         this value is 4.
 311
 312         This method can be used to allocate the buffer with enough space for the
 313         trailing @c NUL characters for any encoding.
 314     */
 315     const size_t GetMaxMBNulLen();
 316
 317     /**
 318         This function is deprecated, please use ToWChar() instead
 319
 320         Converts from a string @e in in multibyte encoding to Unicode putting up to
 321         @e outLen characters into the buffer @e out.
 322
 323         If @e out is @NULL, only the length of the string which would result from
 324         the conversion is calculated and returned. Note that this is the length and not
 325         size, i.e. the returned value does not include the trailing @c NUL. But
 326         when the function is called with a non-@NULL @e out buffer, the @e outLen
 327         parameter should be one more to allow to properly @c NUL-terminate the string.
 328
 329         @param out
 330         The output buffer, may be @NULL if the caller is only
 331         interested in the length of the resulting string
 332
 333         @param in
 334         The NUL-terminated input string, cannot be @NULL
 335
 336         @param outLen
 337         The length of the output buffer but including
 338         NUL, ignored if out is @NULL
 339
 340         @returns The length of the converted string excluding the trailing NUL.
 341     */
 342 #define virtual size_t MB2WC(wchar_t * out, const char * in,
 343                          size_t outLen)     /* implementation is private */
 344
 345     /**
 346         The most general function for converting a multibyte string to a wide string.
 347         The main case is when @e dst is not @NULL and @e srcLen is not
 348         @c wxNO_LEN (which is defined as @c (size_t)-1): then
 349         the function converts exactly @e srcLen bytes starting at @e src into
 350         wide string which it output to @e dst. If the length of the resulting wide
 351         string is greater than @e dstLen, an error is returned. Note that if
 352         @e srcLen bytes don't include @c NUL characters, the resulting wide string is
 353         not @c NUL-terminated neither.
 354
 355         If @e srcLen is @c wxNO_LEN, the function supposes that the string is
 356         properly (i.e. as necessary for the encoding handled by this conversion)
 357         @c NUL-terminated and converts the entire string, including any trailing @c NUL
 358         bytes. In this case the wide string is also @c NUL-terminated.
 359
 360         Finally, if @e dst is @NULL, the function returns the length of the needed
 361         buffer.
 362     */
 363     virtual size_t ToWChar(wchar_t * dst, size_t dstLen,
 364                            const char * src,
 365                            size_t srcLen = wxNO_LEN);
 366
 367     /**
 368         This function is deprecated, please use FromWChar() instead
 369
 370         Converts from Unicode to multibyte encoding. The semantics of this function
 371         (including the return value meaning) is the same as for
 372         wxMBConv::MB2WC.
 373
 374         Notice that when the function is called with a non-@NULL buffer, the
 375         @e n parameter should be the size of the buffer and so it should take
 376         into account the trailing @c NUL, which might take two or four bytes for some
 377         encodings (UTF-16 and UTF-32) and not one.
 378     */
 379 #define virtual size_t WC2MB(char* buf, const wchar_t* psz, size_t n)     /* implementation is private */
 380
 381     //@{
 382     /**
 383         Converts from multibyte encoding to Unicode by calling
 384         wxMBConv::MB2WC, allocating a temporary wxWCharBuffer to hold
 385         the result.
 386
 387         The first overload takes a @c NUL-terminated input string. The second one takes
 388         a
 389         string of exactly the specified length and the string may include or not the
 390         trailing @c NUL character(s). If the string is not @c NUL-terminated, a
 391         temporary
 392         @c NUL-terminated copy of it suitable for passing to wxMBConv::MB2WC
 393         is made, so it is more efficient to ensure that the string is does have the
 394         appropriate number of @c NUL bytes (which is usually 1 but may be 2 or 4
 395         for UTF-16 or UTF-32, see wxMBConv::GetMBNulLen),
 396         especially for long strings.
 397
 398         If @e outLen is not-@NULL, it receives the length of the converted
 399         string.
 400     */
 401     const wxWCharBuffer cMB2WC(const char * in);
 402         const wxWCharBuffer cMB2WC(const char * in, size_t inLen,
 403                                    size_t outLen);
 404     //@}
 405
 406     //@{
 407     /**
 408         Converts from multibyte encoding to the current wxChar type
 409         (which depends on whether wxUSE_UNICODE is set to 1). If wxChar is char,
 410         it returns the parameter unaltered. If wxChar is wchar_t, it returns the
 411         result in a wxWCharBuffer. The macro wxMB2WXbuf is defined as the correct
 412         return type (without const).
 413     */
 414     const char* cMB2WX(const char* psz);
 415         const wxWCharBuffer cMB2WX(const char* psz);
 416     //@}
 417
 418     //@{
 419     /**
 420         Converts from Unicode to multibyte encoding by calling WC2MB,
 421         allocating a temporary wxCharBuffer to hold the result.
 422
 423         The second overload of this function allows to convert a string of the given
 424         length @e inLen, whether it is @c NUL-terminated or not (for wide character
 425         strings, unlike for the multibyte ones, a single @c NUL is always enough).
 426         But notice that just as with @ref wxMBConv::mb2wc cMB2WC, it is more
 427         efficient to pass an already terminated string to this function as otherwise a
 428         copy is made internally.
 429
 430         If @e outLen is not-@NULL, it receives the length of the converted
 431         string.
 432     */
 433     const wxCharBuffer cWC2MB(const wchar_t* in);
 434         const wxCharBuffer cWC2MB(const wchar_t* in, size_t inLen,
 435                                   size_t outLen);
 436     //@}
 437
 438     //@{
 439     /**
 440         Converts from Unicode to the current wxChar type. If wxChar is wchar_t,
 441         it returns the parameter unaltered. If wxChar is char, it returns the
 442         result in a wxCharBuffer. The macro wxWC2WXbuf is defined as the correct
 443         return type (without const).
 444     */
 445     const wchar_t* cWC2WX(const wchar_t* psz);
 446         const wxCharBuffer cWC2WX(const wchar_t* psz);
 447     //@}
 448
 449     //@{
 450     /**
 451         Converts from the current wxChar type to multibyte encoding. If wxChar is char,
 452         it returns the parameter unaltered. If wxChar is wchar_t, it returns the
 453         result in a wxCharBuffer. The macro wxWX2MBbuf is defined as the correct
 454         return type (without const).
 455     */
 456     const char* cWX2MB(const wxChar* psz);
 457         const wxCharBuffer cWX2MB(const wxChar* psz);
 458     //@}
 459
 460     //@{
 461     /**
 462         Converts from the current wxChar type to Unicode. If wxChar is wchar_t,
 463         it returns the parameter unaltered. If wxChar is char, it returns the
 464         result in a wxWCharBuffer. The macro wxWX2WCbuf is defined as the correct
 465         return type (without const).
 466     */
 467     const wchar_t* cWX2WC(const wxChar* psz);
 468         const wxWCharBuffer cWX2WC(const wxChar* psz);
 469     //@}
 470 };