| 1 | /////////////////////////////////////////////////////////////////////////////// |
| 2 | // Name: wx/strconv.h |
| 3 | // Purpose: conversion routines for char sets any Unicode |
| 4 | // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin |
| 5 | // Modified by: |
| 6 | // Created: 29/01/98 |
| 7 | // RCS-ID: $Id$ |
| 8 | // Copyright: (c) 1998 Ove Kaaven, Robert Roebling |
| 9 | // (c) 1998-2006 Vadim Zeitlin |
| 10 | // Licence: wxWindows licence |
| 11 | /////////////////////////////////////////////////////////////////////////////// |
| 12 | |
| 13 | #ifndef _WX_STRCONV_H_ |
| 14 | #define _WX_STRCONV_H_ |
| 15 | |
| 16 | #include "wx/defs.h" |
| 17 | #include "wx/chartype.h" |
| 18 | #include "wx/buffer.h" |
| 19 | |
| 20 | #ifdef __DIGITALMARS__ |
| 21 | #include "typeinfo.h" |
| 22 | #endif |
| 23 | |
| 24 | #if defined(__VISAGECPP__) && __IBMCPP__ >= 400 |
| 25 | # undef __BSEXCPT__ |
| 26 | #endif |
| 27 | |
| 28 | #ifndef __WXPALMOS5__ |
| 29 | #include <stdlib.h> |
| 30 | #endif // ! __WXPALMOS5__ |
| 31 | |
| 32 | class WXDLLIMPEXP_FWD_BASE wxString; |
| 33 | |
| 34 | // the error value returned by wxMBConv methods |
| 35 | #define wxCONV_FAILED ((size_t)-1) |
| 36 | |
| 37 | // ---------------------------------------------------------------------------- |
| 38 | // wxMBConv (abstract base class for conversions) |
| 39 | // ---------------------------------------------------------------------------- |
| 40 | |
| 41 | // When deriving a new class from wxMBConv you must reimplement ToWChar() and |
| 42 | // FromWChar() methods which are not pure virtual only for historical reasons, |
| 43 | // don't let the fact that the existing classes implement MB2WC/WC2MB() instead |
| 44 | // confuse you. |
| 45 | // |
| 46 | // You also have to implement Clone() to allow copying the conversions |
| 47 | // polymorphically. |
| 48 | // |
| 49 | // And you might need to override GetMBNulLen() as well. |
| 50 | class WXDLLIMPEXP_BASE wxMBConv |
| 51 | { |
| 52 | public: |
| 53 | // The functions doing actual conversion from/to narrow to/from wide |
| 54 | // character strings. |
| 55 | // |
| 56 | // On success, the return value is the length (i.e. the number of |
| 57 | // characters, not bytes) of the converted string including any trailing |
| 58 | // L'\0' or (possibly multiple) '\0'(s). If the conversion fails or if |
| 59 | // there is not enough space for everything, including the trailing NUL |
| 60 | // character(s), in the output buffer, wxCONV_FAILED is returned. |
| 61 | // |
| 62 | // In the special case when dst is NULL (the value of dstLen is ignored |
| 63 | // then) the return value is the length of the needed buffer but nothing |
| 64 | // happens otherwise. If srcLen is wxNO_LEN, the entire string, up to and |
| 65 | // including the trailing NUL(s), is converted, otherwise exactly srcLen |
| 66 | // bytes are. |
| 67 | // |
| 68 | // Typical usage: |
| 69 | // |
| 70 | // size_t dstLen = conv.ToWChar(NULL, 0, src); |
| 71 | // if ( dstLen == wxCONV_FAILED ) |
| 72 | // ... handle error ... |
| 73 | // wchar_t *wbuf = new wchar_t[dstLen]; |
| 74 | // conv.ToWChar(wbuf, dstLen, src); |
| 75 | // ... work with wbuf ... |
| 76 | // delete [] wbuf; |
| 77 | // |
| 78 | virtual size_t ToWChar(wchar_t *dst, size_t dstLen, |
| 79 | const char *src, size_t srcLen = wxNO_LEN) const; |
| 80 | |
| 81 | virtual size_t FromWChar(char *dst, size_t dstLen, |
| 82 | const wchar_t *src, size_t srcLen = wxNO_LEN) const; |
| 83 | |
| 84 | |
| 85 | // Convenience functions for translating NUL-terminated strings: returns |
| 86 | // the buffer containing the converted string or NULL pointer if the |
| 87 | // conversion failed. |
| 88 | const wxWCharBuffer cMB2WC(const char *in) const; |
| 89 | const wxCharBuffer cWC2MB(const wchar_t *in) const; |
| 90 | |
| 91 | // Convenience functions for converting strings which may contain embedded |
| 92 | // NULs and don't have to be NUL-terminated. |
| 93 | // |
| 94 | // inLen is the length of the buffer including trailing NUL if any or |
| 95 | // wxNO_LEN if the input is NUL-terminated. |
| 96 | // |
| 97 | // outLen receives, if not NULL, the length of the converted string or 0 if |
| 98 | // the conversion failed (returning 0 and not -1 in this case makes it |
| 99 | // difficult to distinguish between failed conversion and empty input but |
| 100 | // this is done for backwards compatibility). Notice that the rules for |
| 101 | // whether outLen accounts or not for the last NUL are the same as for |
| 102 | // To/FromWChar() above: if inLen is specified, outLen is exactly the |
| 103 | // number of characters converted, whether the last one of them was NUL or |
| 104 | // not. But if inLen == wxNO_LEN then outLen doesn't account for the last |
| 105 | // NUL even though it is present. |
| 106 | const wxWCharBuffer |
| 107 | cMB2WC(const char *in, size_t inLen, size_t *outLen) const; |
| 108 | const wxCharBuffer |
| 109 | cWC2MB(const wchar_t *in, size_t inLen, size_t *outLen) const; |
| 110 | |
| 111 | // And yet more convenience functions for converting the entire buffers: |
| 112 | // these are the simplest and least error-prone as you never need to bother |
| 113 | // with lengths/sizes directly. |
| 114 | const wxWCharBuffer cMB2WC(const wxScopedCharBuffer& in) const; |
| 115 | const wxCharBuffer cWC2MB(const wxScopedWCharBuffer& in) const; |
| 116 | |
| 117 | // convenience functions for converting MB or WC to/from wxWin default |
| 118 | #if wxUSE_UNICODE |
| 119 | const wxWCharBuffer cMB2WX(const char *psz) const { return cMB2WC(psz); } |
| 120 | const wxCharBuffer cWX2MB(const wchar_t *psz) const { return cWC2MB(psz); } |
| 121 | const wchar_t* cWC2WX(const wchar_t *psz) const { return psz; } |
| 122 | const wchar_t* cWX2WC(const wchar_t *psz) const { return psz; } |
| 123 | #else // ANSI |
| 124 | const char* cMB2WX(const char *psz) const { return psz; } |
| 125 | const char* cWX2MB(const char *psz) const { return psz; } |
| 126 | const wxCharBuffer cWC2WX(const wchar_t *psz) const { return cWC2MB(psz); } |
| 127 | const wxWCharBuffer cWX2WC(const char *psz) const { return cMB2WC(psz); } |
| 128 | #endif // Unicode/ANSI |
| 129 | |
| 130 | // this function is used in the implementation of cMB2WC() to distinguish |
| 131 | // between the following cases: |
| 132 | // |
| 133 | // a) var width encoding with strings terminated by a single NUL |
| 134 | // (usual multibyte encodings): return 1 in this case |
| 135 | // b) fixed width encoding with 2 bytes/char and so terminated by |
| 136 | // 2 NULs (UTF-16/UCS-2 and variants): return 2 in this case |
| 137 | // c) fixed width encoding with 4 bytes/char and so terminated by |
| 138 | // 4 NULs (UTF-32/UCS-4 and variants): return 4 in this case |
| 139 | // |
| 140 | // anything else is not supported currently and -1 should be returned |
| 141 | virtual size_t GetMBNulLen() const { return 1; } |
| 142 | |
| 143 | // return the maximal value currently returned by GetMBNulLen() for any |
| 144 | // encoding |
| 145 | static size_t GetMaxMBNulLen() { return 4 /* for UTF-32 */; } |
| 146 | |
| 147 | #if wxUSE_UNICODE_UTF8 |
| 148 | // return true if the converter's charset is UTF-8, i.e. char* strings |
| 149 | // decoded using this object can be directly copied to wxString's internal |
| 150 | // storage without converting to WC and than back to UTF-8 MB string |
| 151 | virtual bool IsUTF8() const { return false; } |
| 152 | #endif |
| 153 | |
| 154 | // The old conversion functions. The existing classes currently mostly |
| 155 | // implement these ones but we're in transition to using To/FromWChar() |
| 156 | // instead and any new classes should implement just the new functions. |
| 157 | // For now, however, we provide default implementation of To/FromWChar() in |
| 158 | // this base class in terms of MB2WC/WC2MB() to avoid having to rewrite all |
| 159 | // the conversions at once. |
| 160 | // |
| 161 | // On success, the return value is the length (i.e. the number of |
| 162 | // characters, not bytes) not counting the trailing NUL(s) of the converted |
| 163 | // string. On failure, (size_t)-1 is returned. In the special case when |
| 164 | // outputBuf is NULL the return value is the same one but nothing is |
| 165 | // written to the buffer. |
| 166 | // |
| 167 | // Note that outLen is the length of the output buffer, not the length of |
| 168 | // the input (which is always supposed to be terminated by one or more |
| 169 | // NULs, as appropriate for the encoding)! |
| 170 | virtual size_t MB2WC(wchar_t *out, const char *in, size_t outLen) const; |
| 171 | virtual size_t WC2MB(char *out, const wchar_t *in, size_t outLen) const; |
| 172 | |
| 173 | |
| 174 | // make a heap-allocated copy of this object |
| 175 | virtual wxMBConv *Clone() const = 0; |
| 176 | |
| 177 | // virtual dtor for any base class |
| 178 | virtual ~wxMBConv(); |
| 179 | }; |
| 180 | |
| 181 | // ---------------------------------------------------------------------------- |
| 182 | // wxMBConvLibc uses standard mbstowcs() and wcstombs() functions for |
| 183 | // conversion (hence it depends on the current locale) |
| 184 | // ---------------------------------------------------------------------------- |
| 185 | |
| 186 | class WXDLLIMPEXP_BASE wxMBConvLibc : public wxMBConv |
| 187 | { |
| 188 | public: |
| 189 | virtual size_t MB2WC(wchar_t *outputBuf, const char *psz, size_t outputSize) const; |
| 190 | virtual size_t WC2MB(char *outputBuf, const wchar_t *psz, size_t outputSize) const; |
| 191 | |
| 192 | virtual wxMBConv *Clone() const { return new wxMBConvLibc; } |
| 193 | |
| 194 | #if wxUSE_UNICODE_UTF8 |
| 195 | virtual bool IsUTF8() const { return wxLocaleIsUtf8; } |
| 196 | #endif |
| 197 | }; |
| 198 | |
| 199 | #ifdef __UNIX__ |
| 200 | |
| 201 | // ---------------------------------------------------------------------------- |
| 202 | // wxConvBrokenFileNames is made for Unix in Unicode mode when |
| 203 | // files are accidentally written in an encoding which is not |
| 204 | // the system encoding. Typically, the system encoding will be |
| 205 | // UTF8 but there might be files stored in ISO8859-1 on disk. |
| 206 | // ---------------------------------------------------------------------------- |
| 207 | |
| 208 | class WXDLLIMPEXP_BASE wxConvBrokenFileNames : public wxMBConv |
| 209 | { |
| 210 | public: |
| 211 | wxConvBrokenFileNames(const wxString& charset); |
| 212 | wxConvBrokenFileNames(const wxConvBrokenFileNames& conv) |
| 213 | : wxMBConv(), |
| 214 | m_conv(conv.m_conv ? conv.m_conv->Clone() : NULL) |
| 215 | { |
| 216 | } |
| 217 | virtual ~wxConvBrokenFileNames() { delete m_conv; } |
| 218 | |
| 219 | virtual size_t MB2WC(wchar_t *out, const char *in, size_t outLen) const |
| 220 | { |
| 221 | return m_conv->MB2WC(out, in, outLen); |
| 222 | } |
| 223 | |
| 224 | virtual size_t WC2MB(char *out, const wchar_t *in, size_t outLen) const |
| 225 | { |
| 226 | return m_conv->WC2MB(out, in, outLen); |
| 227 | } |
| 228 | |
| 229 | virtual size_t GetMBNulLen() const |
| 230 | { |
| 231 | // cast needed to call a private function |
| 232 | return m_conv->GetMBNulLen(); |
| 233 | } |
| 234 | |
| 235 | #if wxUSE_UNICODE_UTF8 |
| 236 | virtual bool IsUTF8() const { return m_conv->IsUTF8(); } |
| 237 | #endif |
| 238 | |
| 239 | virtual wxMBConv *Clone() const { return new wxConvBrokenFileNames(*this); } |
| 240 | |
| 241 | private: |
| 242 | // the conversion object we forward to |
| 243 | wxMBConv *m_conv; |
| 244 | |
| 245 | wxDECLARE_NO_ASSIGN_CLASS(wxConvBrokenFileNames); |
| 246 | }; |
| 247 | |
| 248 | #endif // __UNIX__ |
| 249 | |
| 250 | // ---------------------------------------------------------------------------- |
| 251 | // wxMBConvUTF7 (for conversion using UTF7 encoding) |
| 252 | // ---------------------------------------------------------------------------- |
| 253 | |
| 254 | class WXDLLIMPEXP_BASE wxMBConvUTF7 : public wxMBConv |
| 255 | { |
| 256 | public: |
| 257 | wxMBConvUTF7() { } |
| 258 | |
| 259 | // compiler-generated copy ctor, assignment operator and dtor are ok |
| 260 | // (assuming it's ok to copy the shift state -- not really sure about it) |
| 261 | |
| 262 | virtual size_t ToWChar(wchar_t *dst, size_t dstLen, |
| 263 | const char *src, size_t srcLen = wxNO_LEN) const; |
| 264 | virtual size_t FromWChar(char *dst, size_t dstLen, |
| 265 | const wchar_t *src, size_t srcLen = wxNO_LEN) const; |
| 266 | |
| 267 | virtual wxMBConv *Clone() const { return new wxMBConvUTF7; } |
| 268 | |
| 269 | private: |
| 270 | // UTF-7 decoder/encoder may be in direct mode or in shifted mode after a |
| 271 | // '+' (and until the '-' or any other non-base64 character) |
| 272 | struct StateMode |
| 273 | { |
| 274 | enum Mode |
| 275 | { |
| 276 | Direct, // pass through state |
| 277 | Shifted // after a '+' (and before '-') |
| 278 | }; |
| 279 | }; |
| 280 | |
| 281 | // the current decoder state: this is only used by ToWChar() if srcLen |
| 282 | // parameter is not wxNO_LEN, when working on the entire NUL-terminated |
| 283 | // strings we neither update nor use the state |
| 284 | class DecoderState : private StateMode |
| 285 | { |
| 286 | private: |
| 287 | // current state: this one is private as we want to enforce the use of |
| 288 | // ToDirect/ToShifted() methods below |
| 289 | Mode mode; |
| 290 | |
| 291 | public: |
| 292 | // the initial state is direct |
| 293 | DecoderState() { mode = Direct; } |
| 294 | |
| 295 | // switch to/from shifted mode |
| 296 | void ToDirect() { mode = Direct; } |
| 297 | void ToShifted() { mode = Shifted; accum = bit = 0; isLSB = false; } |
| 298 | |
| 299 | bool IsDirect() const { return mode == Direct; } |
| 300 | bool IsShifted() const { return mode == Shifted; } |
| 301 | |
| 302 | |
| 303 | // these variables are only used in shifted mode |
| 304 | |
| 305 | unsigned int accum; // accumulator of the bit we've already got |
| 306 | unsigned int bit; // the number of bits consumed mod 8 |
| 307 | unsigned char msb; // the high byte of UTF-16 word |
| 308 | bool isLSB; // whether we're decoding LSB or MSB of UTF-16 word |
| 309 | }; |
| 310 | |
| 311 | DecoderState m_stateDecoder; |
| 312 | |
| 313 | |
| 314 | // encoder state is simpler as we always receive entire Unicode characters |
| 315 | // on input |
| 316 | class EncoderState : private StateMode |
| 317 | { |
| 318 | private: |
| 319 | Mode mode; |
| 320 | |
| 321 | public: |
| 322 | EncoderState() { mode = Direct; } |
| 323 | |
| 324 | void ToDirect() { mode = Direct; } |
| 325 | void ToShifted() { mode = Shifted; accum = bit = 0; } |
| 326 | |
| 327 | bool IsDirect() const { return mode == Direct; } |
| 328 | bool IsShifted() const { return mode == Shifted; } |
| 329 | |
| 330 | unsigned int accum; |
| 331 | unsigned int bit; |
| 332 | }; |
| 333 | |
| 334 | EncoderState m_stateEncoder; |
| 335 | }; |
| 336 | |
| 337 | // ---------------------------------------------------------------------------- |
| 338 | // wxMBConvUTF8 (for conversion using UTF8 encoding) |
| 339 | // ---------------------------------------------------------------------------- |
| 340 | |
| 341 | // this is the real UTF-8 conversion class, it has to be called "strict UTF-8" |
| 342 | // for compatibility reasons: the wxMBConvUTF8 class below also supports lossy |
| 343 | // conversions if it is created with non default options |
| 344 | class WXDLLIMPEXP_BASE wxMBConvStrictUTF8 : public wxMBConv |
| 345 | { |
| 346 | public: |
| 347 | // compiler-generated default ctor and other methods are ok |
| 348 | |
| 349 | virtual size_t ToWChar(wchar_t *dst, size_t dstLen, |
| 350 | const char *src, size_t srcLen = wxNO_LEN) const; |
| 351 | virtual size_t FromWChar(char *dst, size_t dstLen, |
| 352 | const wchar_t *src, size_t srcLen = wxNO_LEN) const; |
| 353 | |
| 354 | virtual wxMBConv *Clone() const { return new wxMBConvStrictUTF8(); } |
| 355 | |
| 356 | #if wxUSE_UNICODE_UTF8 |
| 357 | // NB: other mapping modes are not, strictly speaking, UTF-8, so we can't |
| 358 | // take the shortcut in that case |
| 359 | virtual bool IsUTF8() const { return true; } |
| 360 | #endif |
| 361 | }; |
| 362 | |
| 363 | class WXDLLIMPEXP_BASE wxMBConvUTF8 : public wxMBConvStrictUTF8 |
| 364 | { |
| 365 | public: |
| 366 | enum |
| 367 | { |
| 368 | MAP_INVALID_UTF8_NOT = 0, |
| 369 | MAP_INVALID_UTF8_TO_PUA = 1, |
| 370 | MAP_INVALID_UTF8_TO_OCTAL = 2 |
| 371 | }; |
| 372 | |
| 373 | wxMBConvUTF8(int options = MAP_INVALID_UTF8_NOT) : m_options(options) { } |
| 374 | |
| 375 | virtual size_t ToWChar(wchar_t *dst, size_t dstLen, |
| 376 | const char *src, size_t srcLen = wxNO_LEN) const; |
| 377 | virtual size_t FromWChar(char *dst, size_t dstLen, |
| 378 | const wchar_t *src, size_t srcLen = wxNO_LEN) const; |
| 379 | |
| 380 | virtual wxMBConv *Clone() const { return new wxMBConvUTF8(m_options); } |
| 381 | |
| 382 | #if wxUSE_UNICODE_UTF8 |
| 383 | // NB: other mapping modes are not, strictly speaking, UTF-8, so we can't |
| 384 | // take the shortcut in that case |
| 385 | virtual bool IsUTF8() const { return m_options == MAP_INVALID_UTF8_NOT; } |
| 386 | #endif |
| 387 | |
| 388 | private: |
| 389 | int m_options; |
| 390 | }; |
| 391 | |
| 392 | // ---------------------------------------------------------------------------- |
| 393 | // wxMBConvUTF16Base: for both LE and BE variants |
| 394 | // ---------------------------------------------------------------------------- |
| 395 | |
| 396 | class WXDLLIMPEXP_BASE wxMBConvUTF16Base : public wxMBConv |
| 397 | { |
| 398 | public: |
| 399 | enum { BYTES_PER_CHAR = 2 }; |
| 400 | |
| 401 | virtual size_t GetMBNulLen() const { return BYTES_PER_CHAR; } |
| 402 | |
| 403 | protected: |
| 404 | // return the length of the buffer using srcLen if it's not wxNO_LEN and |
| 405 | // computing the length ourselves if it is; also checks that the length is |
| 406 | // even if specified as we need an entire number of UTF-16 characters and |
| 407 | // returns wxNO_LEN which indicates error if it is odd |
| 408 | static size_t GetLength(const char *src, size_t srcLen); |
| 409 | }; |
| 410 | |
| 411 | // ---------------------------------------------------------------------------- |
| 412 | // wxMBConvUTF16LE (for conversion using UTF16 Little Endian encoding) |
| 413 | // ---------------------------------------------------------------------------- |
| 414 | |
| 415 | class WXDLLIMPEXP_BASE wxMBConvUTF16LE : public wxMBConvUTF16Base |
| 416 | { |
| 417 | public: |
| 418 | virtual size_t ToWChar(wchar_t *dst, size_t dstLen, |
| 419 | const char *src, size_t srcLen = wxNO_LEN) const; |
| 420 | virtual size_t FromWChar(char *dst, size_t dstLen, |
| 421 | const wchar_t *src, size_t srcLen = wxNO_LEN) const; |
| 422 | virtual wxMBConv *Clone() const { return new wxMBConvUTF16LE; } |
| 423 | }; |
| 424 | |
| 425 | // ---------------------------------------------------------------------------- |
| 426 | // wxMBConvUTF16BE (for conversion using UTF16 Big Endian encoding) |
| 427 | // ---------------------------------------------------------------------------- |
| 428 | |
| 429 | class WXDLLIMPEXP_BASE wxMBConvUTF16BE : public wxMBConvUTF16Base |
| 430 | { |
| 431 | public: |
| 432 | virtual size_t ToWChar(wchar_t *dst, size_t dstLen, |
| 433 | const char *src, size_t srcLen = wxNO_LEN) const; |
| 434 | virtual size_t FromWChar(char *dst, size_t dstLen, |
| 435 | const wchar_t *src, size_t srcLen = wxNO_LEN) const; |
| 436 | virtual wxMBConv *Clone() const { return new wxMBConvUTF16BE; } |
| 437 | }; |
| 438 | |
| 439 | // ---------------------------------------------------------------------------- |
| 440 | // wxMBConvUTF32Base: base class for both LE and BE variants |
| 441 | // ---------------------------------------------------------------------------- |
| 442 | |
| 443 | class WXDLLIMPEXP_BASE wxMBConvUTF32Base : public wxMBConv |
| 444 | { |
| 445 | public: |
| 446 | enum { BYTES_PER_CHAR = 4 }; |
| 447 | |
| 448 | virtual size_t GetMBNulLen() const { return BYTES_PER_CHAR; } |
| 449 | |
| 450 | protected: |
| 451 | // this is similar to wxMBConvUTF16Base method with the same name except |
| 452 | // that, of course, it verifies that length is divisible by 4 if given and |
| 453 | // not by 2 |
| 454 | static size_t GetLength(const char *src, size_t srcLen); |
| 455 | }; |
| 456 | |
| 457 | // ---------------------------------------------------------------------------- |
| 458 | // wxMBConvUTF32LE (for conversion using UTF32 Little Endian encoding) |
| 459 | // ---------------------------------------------------------------------------- |
| 460 | |
| 461 | class WXDLLIMPEXP_BASE wxMBConvUTF32LE : public wxMBConvUTF32Base |
| 462 | { |
| 463 | public: |
| 464 | virtual size_t ToWChar(wchar_t *dst, size_t dstLen, |
| 465 | const char *src, size_t srcLen = wxNO_LEN) const; |
| 466 | virtual size_t FromWChar(char *dst, size_t dstLen, |
| 467 | const wchar_t *src, size_t srcLen = wxNO_LEN) const; |
| 468 | virtual wxMBConv *Clone() const { return new wxMBConvUTF32LE; } |
| 469 | }; |
| 470 | |
| 471 | // ---------------------------------------------------------------------------- |
| 472 | // wxMBConvUTF32BE (for conversion using UTF32 Big Endian encoding) |
| 473 | // ---------------------------------------------------------------------------- |
| 474 | |
| 475 | class WXDLLIMPEXP_BASE wxMBConvUTF32BE : public wxMBConvUTF32Base |
| 476 | { |
| 477 | public: |
| 478 | virtual size_t ToWChar(wchar_t *dst, size_t dstLen, |
| 479 | const char *src, size_t srcLen = wxNO_LEN) const; |
| 480 | virtual size_t FromWChar(char *dst, size_t dstLen, |
| 481 | const wchar_t *src, size_t srcLen = wxNO_LEN) const; |
| 482 | virtual wxMBConv *Clone() const { return new wxMBConvUTF32BE; } |
| 483 | }; |
| 484 | |
| 485 | // ---------------------------------------------------------------------------- |
| 486 | // wxCSConv (for conversion based on loadable char sets) |
| 487 | // ---------------------------------------------------------------------------- |
| 488 | |
| 489 | #include "wx/fontenc.h" |
| 490 | |
| 491 | class WXDLLIMPEXP_BASE wxCSConv : public wxMBConv |
| 492 | { |
| 493 | public: |
| 494 | // we can be created either from charset name or from an encoding constant |
| 495 | // but we can't have both at once |
| 496 | wxCSConv(const wxString& charset); |
| 497 | wxCSConv(wxFontEncoding encoding); |
| 498 | |
| 499 | wxCSConv(const wxCSConv& conv); |
| 500 | virtual ~wxCSConv(); |
| 501 | |
| 502 | wxCSConv& operator=(const wxCSConv& conv); |
| 503 | |
| 504 | virtual size_t ToWChar(wchar_t *dst, size_t dstLen, |
| 505 | const char *src, size_t srcLen = wxNO_LEN) const; |
| 506 | virtual size_t FromWChar(char *dst, size_t dstLen, |
| 507 | const wchar_t *src, size_t srcLen = wxNO_LEN) const; |
| 508 | virtual size_t GetMBNulLen() const; |
| 509 | |
| 510 | #if wxUSE_UNICODE_UTF8 |
| 511 | virtual bool IsUTF8() const; |
| 512 | #endif |
| 513 | |
| 514 | virtual wxMBConv *Clone() const { return new wxCSConv(*this); } |
| 515 | |
| 516 | void Clear(); |
| 517 | |
| 518 | // return true if the conversion could be initialized successfully |
| 519 | bool IsOk() const; |
| 520 | |
| 521 | private: |
| 522 | // common part of all ctors |
| 523 | void Init(); |
| 524 | |
| 525 | // Creates the conversion to use, called from all ctors to initialize |
| 526 | // m_convReal. |
| 527 | wxMBConv *DoCreate() const; |
| 528 | |
| 529 | // Set the name (may be only called when m_name == NULL), makes copy of |
| 530 | // the charset string. |
| 531 | void SetName(const char *charset); |
| 532 | |
| 533 | // Set m_encoding field respecting the rules below, i.e. making sure it has |
| 534 | // a valid value if m_name == NULL (thus this should be always called after |
| 535 | // SetName()). |
| 536 | // |
| 537 | // Input encoding may be valid or not. |
| 538 | void SetEncoding(wxFontEncoding encoding); |
| 539 | |
| 540 | |
| 541 | // The encoding we use is specified by the two fields below: |
| 542 | // |
| 543 | // 1. If m_name != NULL, m_encoding corresponds to it if it's one of |
| 544 | // encodings we know about (i.e. member of wxFontEncoding) or is |
| 545 | // wxFONTENCODING_SYSTEM otherwise. |
| 546 | // |
| 547 | // 2. If m_name == NULL, m_encoding is always valid, i.e. not one of |
| 548 | // wxFONTENCODING_{SYSTEM,DEFAULT,MAX}. |
| 549 | char *m_name; |
| 550 | wxFontEncoding m_encoding; |
| 551 | |
| 552 | // The conversion object for our encoding or NULL if we failed to create it |
| 553 | // in which case we fall back to hard-coded ISO8859-1 conversion. |
| 554 | wxMBConv *m_convReal; |
| 555 | }; |
| 556 | |
| 557 | |
| 558 | // ---------------------------------------------------------------------------- |
| 559 | // declare predefined conversion objects |
| 560 | // ---------------------------------------------------------------------------- |
| 561 | |
| 562 | // Note: this macro is an implementation detail (see the comment in |
| 563 | // strconv.cpp). The wxGet_XXX() and wxGet_XXXPtr() functions shouldn't be |
| 564 | // used by user code and neither should XXXPtr, use the wxConvXXX macro |
| 565 | // instead. |
| 566 | #define WX_DECLARE_GLOBAL_CONV(klass, name) \ |
| 567 | extern WXDLLIMPEXP_DATA_BASE(klass*) name##Ptr; \ |
| 568 | extern WXDLLIMPEXP_BASE klass* wxGet_##name##Ptr(); \ |
| 569 | inline klass& wxGet_##name() \ |
| 570 | { \ |
| 571 | if ( !name##Ptr ) \ |
| 572 | name##Ptr = wxGet_##name##Ptr(); \ |
| 573 | return *name##Ptr; \ |
| 574 | } |
| 575 | |
| 576 | |
| 577 | // conversion to be used with all standard functions affected by locale, e.g. |
| 578 | // strtol(), strftime(), ... |
| 579 | WX_DECLARE_GLOBAL_CONV(wxMBConv, wxConvLibc) |
| 580 | #define wxConvLibc wxGet_wxConvLibc() |
| 581 | |
| 582 | // conversion ISO-8859-1/UTF-7/UTF-8 <-> wchar_t |
| 583 | WX_DECLARE_GLOBAL_CONV(wxCSConv, wxConvISO8859_1) |
| 584 | #define wxConvISO8859_1 wxGet_wxConvISO8859_1() |
| 585 | |
| 586 | WX_DECLARE_GLOBAL_CONV(wxMBConvStrictUTF8, wxConvUTF8) |
| 587 | #define wxConvUTF8 wxGet_wxConvUTF8() |
| 588 | |
| 589 | WX_DECLARE_GLOBAL_CONV(wxMBConvUTF7, wxConvUTF7) |
| 590 | #define wxConvUTF7 wxGet_wxConvUTF7() |
| 591 | |
| 592 | // conversion used for the file names on the systems where they're not Unicode |
| 593 | // (basically anything except Windows) |
| 594 | // |
| 595 | // this is used by all file functions, can be changed by the application |
| 596 | // |
| 597 | // by default UTF-8 under Mac OS X and wxConvLibc elsewhere (but it's not used |
| 598 | // under Windows normally) |
| 599 | extern WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName; |
| 600 | |
| 601 | // backwards compatible define |
| 602 | #define wxConvFile (*wxConvFileName) |
| 603 | |
| 604 | // the current conversion object, may be set to any conversion, is used by |
| 605 | // default in a couple of places inside wx (initially same as wxConvLibc) |
| 606 | extern WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent; |
| 607 | |
| 608 | // the conversion corresponding to the current locale |
| 609 | WX_DECLARE_GLOBAL_CONV(wxCSConv, wxConvLocal) |
| 610 | #define wxConvLocal wxGet_wxConvLocal() |
| 611 | |
| 612 | // the conversion corresponding to the encoding of the standard UI elements |
| 613 | // |
| 614 | // by default this is the same as wxConvLocal but may be changed if the program |
| 615 | // needs to use a fixed encoding |
| 616 | extern WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvUI; |
| 617 | |
| 618 | #undef WX_DECLARE_GLOBAL_CONV |
| 619 | |
| 620 | // ---------------------------------------------------------------------------- |
| 621 | // endianness-dependent conversions |
| 622 | // ---------------------------------------------------------------------------- |
| 623 | |
| 624 | #ifdef WORDS_BIGENDIAN |
| 625 | typedef wxMBConvUTF16BE wxMBConvUTF16; |
| 626 | typedef wxMBConvUTF32BE wxMBConvUTF32; |
| 627 | #else |
| 628 | typedef wxMBConvUTF16LE wxMBConvUTF16; |
| 629 | typedef wxMBConvUTF32LE wxMBConvUTF32; |
| 630 | #endif |
| 631 | |
| 632 | // ---------------------------------------------------------------------------- |
| 633 | // filename conversion macros |
| 634 | // ---------------------------------------------------------------------------- |
| 635 | |
| 636 | // filenames are multibyte on Unix and widechar on Windows |
| 637 | #if wxMBFILES && wxUSE_UNICODE |
| 638 | #define wxFNCONV(name) wxConvFileName->cWX2MB(name) |
| 639 | #define wxFNSTRINGCAST wxMBSTRINGCAST |
| 640 | #else |
| 641 | #if defined( __WXOSX_OR_COCOA__ ) && wxMBFILES |
| 642 | #define wxFNCONV(name) wxConvFileName->cWC2MB( wxConvLocal.cWX2WC(name) ) |
| 643 | #else |
| 644 | #define wxFNCONV(name) name |
| 645 | #endif |
| 646 | #define wxFNSTRINGCAST WXSTRINGCAST |
| 647 | #endif |
| 648 | |
| 649 | // ---------------------------------------------------------------------------- |
| 650 | // macros for the most common conversions |
| 651 | // ---------------------------------------------------------------------------- |
| 652 | |
| 653 | #if wxUSE_UNICODE |
| 654 | #define wxConvertWX2MB(s) wxConvCurrent->cWX2MB(s) |
| 655 | #define wxConvertMB2WX(s) wxConvCurrent->cMB2WX(s) |
| 656 | |
| 657 | // these functions should be used when the conversions really, really have |
| 658 | // to succeed (usually because we pass their results to a standard C |
| 659 | // function which would crash if we passed NULL to it), so these functions |
| 660 | // always return a valid pointer if their argument is non-NULL |
| 661 | |
| 662 | // this function safety is achieved by trying wxConvLibc first, wxConvUTF8 |
| 663 | // next if it fails and, finally, wxConvISO8859_1 which always succeeds |
| 664 | extern WXDLLIMPEXP_BASE wxWCharBuffer wxSafeConvertMB2WX(const char *s); |
| 665 | |
| 666 | // this function uses wxConvLibc and wxConvUTF8(MAP_INVALID_UTF8_TO_OCTAL) |
| 667 | // if it fails |
| 668 | extern WXDLLIMPEXP_BASE wxCharBuffer wxSafeConvertWX2MB(const wchar_t *ws); |
| 669 | #else // ANSI |
| 670 | // no conversions to do |
| 671 | #define wxConvertWX2MB(s) (s) |
| 672 | #define wxConvertMB2WX(s) (s) |
| 673 | #define wxSafeConvertMB2WX(s) (s) |
| 674 | #define wxSafeConvertWX2MB(s) (s) |
| 675 | #endif // Unicode/ANSI |
| 676 | |
| 677 | #endif // _WX_STRCONV_H_ |
| 678 | |