src/common/strconv.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/common/strconv.cpp
   3 // Purpose:     Unicode conversion classes
   4 // Author:      Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
   5 //              Ryan Norton, Fredrik Roubert (UTF7)
   6 // Modified by:
   7 // Created:     29/01/98
   8 // RCS-ID:      $Id$
   9 // Copyright:   (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
  10 //              (c) 2000-2003 Vadim Zeitlin
  11 //              (c) 2004 Ryan Norton, Fredrik Roubert
  12 // Licence:     wxWindows licence
  13 /////////////////////////////////////////////////////////////////////////////
  14
  15 // For compilers that support precompilation, includes "wx.h".
  16 #include "wx/wxprec.h"
  17
  18 #ifdef __BORLANDC__
  19     #pragma hdrstop
  20 #endif  //__BORLANDC__
  21
  22 #ifndef WX_PRECOMP
  23     #include "wx/intl.h"
  24     #include "wx/log.h"
  25     #include "wx/utils.h"
  26     #include "wx/hashmap.h"
  27 #endif
  28
  29 #include "wx/strconv.h"
  30
  31 #ifndef __WXWINCE__
  32 #include <errno.h>
  33 #endif
  34
  35 #include <ctype.h>
  36 #include <string.h>
  37 #include <stdlib.h>
  38
  39 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
  40     #include "wx/msw/private.h"
  41     #include "wx/msw/missing.h"
  42     #define wxHAVE_WIN32_MB2WC
  43 #endif
  44
  45 #ifdef HAVE_ICONV
  46     #include <iconv.h>
  47     #include "wx/thread.h"
  48 #endif
  49
  50 #include "wx/encconv.h"
  51 #include "wx/fontmap.h"
  52
  53 #ifdef __DARWIN__
  54 #include "wx/osx/core/private/strconv_cf.h"
  55 #endif //def __DARWIN__
  56
  57
  58 #define TRACE_STRCONV wxT("strconv")
  59
  60 // WC_UTF16 is defined only if sizeof(wchar_t) == 2, otherwise it's supposed to
  61 // be 4 bytes
  62 #if SIZEOF_WCHAR_T == 2
  63     #define WC_UTF16
  64 #endif
  65
  66
  67 // ============================================================================
  68 // implementation
  69 // ============================================================================
  70
  71 // helper function of cMB2WC(): check if n bytes at this location are all NUL
  72 static bool NotAllNULs(const char *p, size_t n)
  73 {
  74     while ( n && *p++ == '\0' )
  75         n--;
  76
  77     return n != 0;
  78 }
  79
  80 // ----------------------------------------------------------------------------
  81 // UTF-16 en/decoding to/from UCS-4 with surrogates handling
  82 // ----------------------------------------------------------------------------
  83
  84 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
  85 {
  86     if (input <= 0xffff)
  87     {
  88         if (output)
  89             *output = (wxUint16) input;
  90
  91         return 1;
  92     }
  93     else if (input >= 0x110000)
  94     {
  95         return wxCONV_FAILED;
  96     }
  97     else
  98     {
  99         if (output)
 100         {
 101             *output++ = (wxUint16) ((input >> 10) + 0xd7c0);
 102             *output = (wxUint16) ((input & 0x3ff) + 0xdc00);
 103         }
 104
 105         return 2;
 106     }
 107 }
 108
 109 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
 110 {
 111     if ((*input < 0xd800) || (*input > 0xdfff))
 112     {
 113         output = *input;
 114         return 1;
 115     }
 116     else if ((input[1] < 0xdc00) || (input[1] > 0xdfff))
 117     {
 118         output = *input;
 119         return wxCONV_FAILED;
 120     }
 121     else
 122     {
 123         output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
 124         return 2;
 125     }
 126 }
 127
 128 #ifdef WC_UTF16
 129     typedef wchar_t wxDecodeSurrogate_t;
 130 #else // !WC_UTF16
 131     typedef wxUint16 wxDecodeSurrogate_t;
 132 #endif // WC_UTF16/!WC_UTF16
 133
 134 // returns the next UTF-32 character from the wchar_t buffer and advances the
 135 // pointer to the character after this one
 136 //
 137 // if an invalid character is found, *pSrc is set to NULL, the caller must
 138 // check for this
 139 static wxUint32 wxDecodeSurrogate(const wxDecodeSurrogate_t **pSrc)
 140 {
 141     wxUint32 out;
 142     const size_t
 143         n = decode_utf16(reinterpret_cast<const wxUint16 *>(*pSrc), out);
 144     if ( n == wxCONV_FAILED )
 145         *pSrc = NULL;
 146     else
 147         *pSrc += n;
 148
 149     return out;
 150 }
 151
 152 // ----------------------------------------------------------------------------
 153 // wxMBConv
 154 // ----------------------------------------------------------------------------
 155
 156 size_t
 157 wxMBConv::ToWChar(wchar_t *dst, size_t dstLen,
 158                   const char *src, size_t srcLen) const
 159 {
 160     // although new conversion classes are supposed to implement this function
 161     // directly, the existing ones only implement the old MB2WC() and so, to
 162     // avoid to have to rewrite all conversion classes at once, we provide a
 163     // default (but not efficient) implementation of this one in terms of the
 164     // old function by copying the input to ensure that it's NUL-terminated and
 165     // then using MB2WC() to convert it
 166     //
 167     // moreover, some conversion classes simply can't implement ToWChar()
 168     // directly, the primary example is wxConvLibc: mbstowcs() only handles
 169     // NUL-terminated strings
 170
 171     // the number of chars [which would be] written to dst [if it were not NULL]
 172     size_t dstWritten = 0;
 173
 174     // the number of NULs terminating this string
 175     size_t nulLen = 0;  // not really needed, but just to avoid warnings
 176
 177     // if we were not given the input size we just have to assume that the
 178     // string is properly terminated as we have no way of knowing how long it
 179     // is anyhow, but if we do have the size check whether there are enough
 180     // NULs at the end
 181     wxCharBuffer bufTmp;
 182     const char *srcEnd;
 183     if ( srcLen != wxNO_LEN )
 184     {
 185         // we need to know how to find the end of this string
 186         nulLen = GetMBNulLen();
 187         if ( nulLen == wxCONV_FAILED )
 188             return wxCONV_FAILED;
 189
 190         // if there are enough NULs we can avoid the copy
 191         if ( srcLen < nulLen || NotAllNULs(src + srcLen - nulLen, nulLen) )
 192         {
 193             // make a copy in order to properly NUL-terminate the string
 194             bufTmp = wxCharBuffer(srcLen + nulLen - 1 /* 1 will be added */);
 195             char * const p = bufTmp.data();
 196             memcpy(p, src, srcLen);
 197             for ( char *s = p + srcLen; s < p + srcLen + nulLen; s++ )
 198                 *s = '\0';
 199
 200             src = bufTmp;
 201         }
 202
 203         srcEnd = src + srcLen;
 204     }
 205     else // quit after the first loop iteration
 206     {
 207         srcEnd = NULL;
 208     }
 209
 210     // the idea of this code is straightforward: it converts a NUL-terminated
 211     // chunk of the string during each iteration and updates the output buffer
 212     // with the result
 213     //
 214     // all the complication come from the fact that this function, for
 215     // historical reasons, must behave in 2 subtly different ways when it's
 216     // called with a fixed number of characters and when it's called for the
 217     // entire NUL-terminated string: in the former case (srcEnd != NULL) we
 218     // must count all characters we convert, NUL or not; but in the latter we
 219     // do not count the trailing NUL -- but still count all the NULs inside the
 220     // string
 221     //
 222     // so for the (simple) former case we just always count the trailing NUL,
 223     // but for the latter we need to wait until we see if there is going to be
 224     // another loop iteration and only count it then
 225     for ( ;; )
 226     {
 227         // try to convert the current chunk
 228         size_t lenChunk = MB2WC(NULL, src, 0);
 229         if ( lenChunk == wxCONV_FAILED )
 230             return wxCONV_FAILED;
 231
 232         dstWritten += lenChunk;
 233         if ( !srcEnd )
 234             dstWritten++;
 235
 236         if ( !lenChunk )
 237         {
 238             // nothing left in the input string, conversion succeeded
 239             break;
 240         }
 241
 242         if ( dst )
 243         {
 244             if ( dstWritten > dstLen )
 245                 return wxCONV_FAILED;
 246
 247             // +1 is for trailing NUL
 248             if ( MB2WC(dst, src, lenChunk + 1) == wxCONV_FAILED )
 249                 return wxCONV_FAILED;
 250
 251             dst += lenChunk;
 252             if ( !srcEnd )
 253                 dst++;
 254         }
 255
 256         if ( !srcEnd )
 257         {
 258             // we convert just one chunk in this case as this is the entire
 259             // string anyhow (and we don't count the trailing NUL in this case)
 260             break;
 261         }
 262
 263         // advance the input pointer past the end of this chunk: notice that we
 264         // will always stop before srcEnd because we know that the chunk is
 265         // always properly NUL-terminated
 266         while ( NotAllNULs(src, nulLen) )
 267         {
 268             // notice that we must skip over multiple bytes here as we suppose
 269             // that if NUL takes 2 or 4 bytes, then all the other characters do
 270             // too and so if advanced by a single byte we might erroneously
 271             // detect sequences of NUL bytes in the middle of the input
 272             src += nulLen;
 273         }
 274
 275         // if the buffer ends before this NUL, we shouldn't count it in our
 276         // output so skip the code below
 277         if ( src == srcEnd )
 278             break;
 279
 280         // do count this terminator as it's inside the buffer we convert
 281         dstWritten++;
 282         if ( dst )
 283             dst++;
 284
 285         src += nulLen; // skip the terminator itself
 286
 287         if ( src >= srcEnd )
 288             break;
 289     }
 290
 291     return dstWritten;
 292 }
 293
 294 size_t
 295 wxMBConv::FromWChar(char *dst, size_t dstLen,
 296                     const wchar_t *src, size_t srcLen) const
 297 {
 298     // the number of chars [which would be] written to dst [if it were not NULL]
 299     size_t dstWritten = 0;
 300
 301     // if we don't know its length we have no choice but to assume that it is
 302     // NUL-terminated (notice that it can still be NUL-terminated even if
 303     // explicit length is given but it doesn't change our return value)
 304     const bool isNulTerminated = srcLen == wxNO_LEN;
 305
 306     // make a copy of the input string unless it is already properly
 307     // NUL-terminated
 308     wxWCharBuffer bufTmp;
 309     if ( isNulTerminated )
 310     {
 311         srcLen = wxWcslen(src) + 1;
 312     }
 313     else if ( srcLen != 0 && src[srcLen - 1] != L'\0' )
 314     {
 315         // make a copy in order to properly NUL-terminate the string
 316         bufTmp = wxWCharBuffer(srcLen);
 317         memcpy(bufTmp.data(), src, srcLen * sizeof(wchar_t));
 318         src = bufTmp;
 319     }
 320
 321     const size_t lenNul = GetMBNulLen();
 322     for ( const wchar_t * const srcEnd = src + srcLen;
 323           src < srcEnd;
 324           src++ /* skip L'\0' too */ )
 325     {
 326         // try to convert the current chunk
 327         size_t lenChunk = WC2MB(NULL, src, 0);
 328         if ( lenChunk == wxCONV_FAILED )
 329             return wxCONV_FAILED;
 330
 331         dstWritten += lenChunk;
 332
 333         const wchar_t * const
 334             chunkEnd = isNulTerminated ? srcEnd - 1 : src + wxWcslen(src);
 335
 336         // our return value accounts for the trailing NUL(s), unlike that of
 337         // WC2MB(), however don't do it for the last NUL we artificially added
 338         // ourselves above
 339         if ( chunkEnd < srcEnd )
 340             dstWritten += lenNul;
 341
 342         if ( dst )
 343         {
 344             if ( dstWritten > dstLen )
 345                 return wxCONV_FAILED;
 346
 347             // if we know that there is enough space in the destination buffer
 348             // (because we accounted for lenNul in dstWritten above), we can
 349             // convert directly in place -- but otherwise we need another
 350             // temporary buffer to ensure that we don't overwrite the output
 351             wxCharBuffer dstBuf;
 352             char *dstTmp;
 353             if ( chunkEnd == srcEnd )
 354             {
 355                 dstBuf = wxCharBuffer(lenChunk + lenNul - 1);
 356                 dstTmp = dstBuf.data();
 357             }
 358             else
 359             {
 360                 dstTmp = dst;
 361             }
 362
 363             if ( WC2MB(dstTmp, src, lenChunk + lenNul) == wxCONV_FAILED )
 364                 return wxCONV_FAILED;
 365
 366             if ( dstTmp != dst )
 367             {
 368                 // copy everything up to but excluding the terminating NUL(s)
 369                 // into the real output buffer
 370                 memcpy(dst, dstTmp, lenChunk);
 371
 372                 // micro-optimization: if dstTmp != dst it means that chunkEnd
 373                 // == srcEnd and so we're done, no need to update anything below
 374                 break;
 375             }
 376
 377             dst += lenChunk;
 378             if ( chunkEnd < srcEnd )
 379                 dst += lenNul;
 380         }
 381
 382         src = chunkEnd;
 383     }
 384
 385     return dstWritten;
 386 }
 387
 388 size_t wxMBConv::MB2WC(wchar_t *outBuff, const char *inBuff, size_t outLen) const
 389 {
 390     size_t rc = ToWChar(outBuff, outLen, inBuff);
 391     if ( rc != wxCONV_FAILED )
 392     {
 393         // ToWChar() returns the buffer length, i.e. including the trailing
 394         // NUL, while this method doesn't take it into account
 395         rc--;
 396     }
 397
 398     return rc;
 399 }
 400
 401 size_t wxMBConv::WC2MB(char *outBuff, const wchar_t *inBuff, size_t outLen) const
 402 {
 403     size_t rc = FromWChar(outBuff, outLen, inBuff);
 404     if ( rc != wxCONV_FAILED )
 405     {
 406         rc -= GetMBNulLen();
 407     }
 408
 409     return rc;
 410 }
 411
 412 wxMBConv::~wxMBConv()
 413 {
 414     // nothing to do here (necessary for Darwin linking probably)
 415 }
 416
 417 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
 418 {
 419     if ( psz )
 420     {
 421         // calculate the length of the buffer needed first
 422         const size_t nLen = ToWChar(NULL, 0, psz);
 423         if ( nLen != wxCONV_FAILED )
 424         {
 425             // now do the actual conversion
 426             wxWCharBuffer buf(nLen - 1 /* +1 added implicitly */);
 427
 428             // +1 for the trailing NULL
 429             if ( ToWChar(buf.data(), nLen, psz) != wxCONV_FAILED )
 430                 return buf;
 431         }
 432     }
 433
 434     return wxWCharBuffer();
 435 }
 436
 437 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
 438 {
 439     if ( pwz )
 440     {
 441         const size_t nLen = FromWChar(NULL, 0, pwz);
 442         if ( nLen != wxCONV_FAILED )
 443         {
 444             wxCharBuffer buf(nLen - 1);
 445             if ( FromWChar(buf.data(), nLen, pwz) != wxCONV_FAILED )
 446                 return buf;
 447         }
 448     }
 449
 450     return wxCharBuffer();
 451 }
 452
 453 const wxWCharBuffer
 454 wxMBConv::cMB2WC(const char *inBuff, size_t inLen, size_t *outLen) const
 455 {
 456     const size_t dstLen = ToWChar(NULL, 0, inBuff, inLen);
 457     if ( dstLen != wxCONV_FAILED )
 458     {
 459         // notice that we allocate space for dstLen+1 wide characters here
 460         // because we want the buffer to always be NUL-terminated, even if the
 461         // input isn't (as otherwise the caller has no way to know its length)
 462         wxWCharBuffer wbuf(dstLen);
 463         wbuf.data()[dstLen] = L'\0';
 464         if ( ToWChar(wbuf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED )
 465         {
 466             if ( outLen )
 467             {
 468                 *outLen = dstLen;
 469
 470                 // we also need to handle NUL-terminated input strings
 471                 // specially: for them the output is the length of the string
 472                 // excluding the trailing NUL, however if we're asked to
 473                 // convert a specific number of characters we return the length
 474                 // of the resulting output even if it's NUL-terminated
 475                 if ( inLen == wxNO_LEN )
 476                     (*outLen)--;
 477             }
 478
 479             return wbuf;
 480         }
 481     }
 482
 483     if ( outLen )
 484         *outLen = 0;
 485
 486     return wxWCharBuffer();
 487 }
 488
 489 const wxCharBuffer
 490 wxMBConv::cWC2MB(const wchar_t *inBuff, size_t inLen, size_t *outLen) const
 491 {
 492     size_t dstLen = FromWChar(NULL, 0, inBuff, inLen);
 493     if ( dstLen != wxCONV_FAILED )
 494     {
 495         const size_t nulLen = GetMBNulLen();
 496
 497         // as above, ensure that the buffer is always NUL-terminated, even if
 498         // the input is not
 499         wxCharBuffer buf(dstLen + nulLen - 1);
 500         memset(buf.data() + dstLen, 0, nulLen);
 501         if ( FromWChar(buf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED )
 502         {
 503             if ( outLen )
 504             {
 505                 *outLen = dstLen;
 506
 507                 if ( inLen == wxNO_LEN )
 508                 {
 509                     // in this case both input and output are NUL-terminated
 510                     // and we're not supposed to count NUL
 511                     *outLen -= nulLen;
 512                 }
 513             }
 514
 515             return buf;
 516         }
 517     }
 518
 519     if ( outLen )
 520         *outLen = 0;
 521
 522     return wxCharBuffer();
 523 }
 524
 525 const wxWCharBuffer wxMBConv::cMB2WC(const wxScopedCharBuffer& buf) const
 526 {
 527     const size_t srcLen = buf.length();
 528     if ( srcLen )
 529     {
 530         const size_t dstLen = ToWChar(NULL, 0, buf, srcLen);
 531         if ( dstLen != wxCONV_FAILED )
 532         {
 533             wxWCharBuffer wbuf(dstLen);
 534             wbuf.data()[dstLen] = L'\0';
 535             if ( ToWChar(wbuf.data(), dstLen, buf, srcLen) != wxCONV_FAILED )
 536                 return wbuf;
 537         }
 538     }
 539
 540     return wxScopedWCharBuffer::CreateNonOwned(L"", 0);
 541 }
 542
 543 const wxCharBuffer wxMBConv::cWC2MB(const wxScopedWCharBuffer& wbuf) const
 544 {
 545     const size_t srcLen = wbuf.length();
 546     if ( srcLen )
 547     {
 548         const size_t dstLen = FromWChar(NULL, 0, wbuf, srcLen);
 549         if ( dstLen != wxCONV_FAILED )
 550         {
 551             wxCharBuffer buf(dstLen);
 552             buf.data()[dstLen] = '\0';
 553             if ( FromWChar(buf.data(), dstLen, wbuf, srcLen) != wxCONV_FAILED )
 554                 return buf;
 555         }
 556     }
 557
 558     return wxScopedCharBuffer::CreateNonOwned("", 0);
 559 }
 560
 561 // ----------------------------------------------------------------------------
 562 // wxMBConvLibc
 563 // ----------------------------------------------------------------------------
 564
 565 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 566 {
 567     return wxMB2WC(buf, psz, n);
 568 }
 569
 570 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 571 {
 572     return wxWC2MB(buf, psz, n);
 573 }
 574
 575 // ----------------------------------------------------------------------------
 576 // wxConvBrokenFileNames
 577 // ----------------------------------------------------------------------------
 578
 579 #ifdef __UNIX__
 580
 581 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxString& charset)
 582 {
 583     if ( wxStricmp(charset, wxT("UTF-8")) == 0 ||
 584          wxStricmp(charset, wxT("UTF8")) == 0  )
 585         m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_PUA);
 586     else
 587         m_conv = new wxCSConv(charset);
 588 }
 589
 590 #endif // __UNIX__
 591
 592 // ----------------------------------------------------------------------------
 593 // UTF-7
 594 // ----------------------------------------------------------------------------
 595
 596 // Implementation (C) 2004 Fredrik Roubert
 597 //
 598 // Changes to work in streaming mode (C) 2008 Vadim Zeitlin
 599
 600 //
 601 // BASE64 decoding table
 602 //
 603 static const unsigned char utf7unb64[] =
 604 {
 605     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 606     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 607     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 608     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 609     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 610     0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
 611     0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
 612     0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 613     0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
 614     0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
 615     0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
 616     0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
 617     0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
 618     0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
 619     0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
 620     0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
 621     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 622     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 623     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 624     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 625     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 626     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 627     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 628     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 629     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 630     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 631     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 632     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 633     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 634     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 635     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 636     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 637 };
 638
 639 size_t wxMBConvUTF7::ToWChar(wchar_t *dst, size_t dstLen,
 640                              const char *src, size_t srcLen) const
 641 {
 642     DecoderState stateOrig,
 643                 *statePtr;
 644     if ( srcLen == wxNO_LEN )
 645     {
 646         // convert the entire string, up to and including the trailing NUL
 647         srcLen = strlen(src) + 1;
 648
 649         // when working on the entire strings we don't update nor use the shift
 650         // state from the previous call
 651         statePtr = &stateOrig;
 652     }
 653     else // when working with partial strings we do use the shift state
 654     {
 655         statePtr = const_cast<DecoderState *>(&m_stateDecoder);
 656
 657         // also save the old state to be able to rollback to it on error
 658         stateOrig = m_stateDecoder;
 659     }
 660
 661     // but to simplify the code below we use this variable in both cases
 662     DecoderState& state = *statePtr;
 663
 664
 665     // number of characters [which would have been] written to dst [if it were
 666     // not NULL]
 667     size_t len = 0;
 668
 669     const char * const srcEnd = src + srcLen;
 670
 671     while ( (src < srcEnd) && (!dst || (len < dstLen)) )
 672     {
 673         const unsigned char cc = *src++;
 674
 675         if ( state.IsShifted() )
 676         {
 677             const unsigned char dc = utf7unb64[cc];
 678             if ( dc == 0xff )
 679             {
 680                 // end of encoded part, check that nothing was left: there can
 681                 // be up to 4 bits of 0 padding but nothing else (we also need
 682                 // to check isLSB as we count bits modulo 8 while a valid UTF-7
 683                 // encoded sequence must contain an integral number of UTF-16
 684                 // characters)
 685                 if ( state.isLSB || state.bit > 4 ||
 686                         (state.accum & ((1 << state.bit) - 1)) )
 687                 {
 688                     if ( !len )
 689                         state = stateOrig;
 690
 691                     return wxCONV_FAILED;
 692                 }
 693
 694                 state.ToDirect();
 695
 696                 // re-parse this character normally below unless it's '-' which
 697                 // is consumed by the decoder
 698                 if ( cc == '-' )
 699                     continue;
 700             }
 701             else // valid encoded character
 702             {
 703                 // mini base64 decoder: each character is 6 bits
 704                 state.bit += 6;
 705                 state.accum <<= 6;
 706                 state.accum += dc;
 707
 708                 if ( state.bit >= 8 )
 709                 {
 710                     // got the full byte, consume it
 711                     state.bit -= 8;
 712                     unsigned char b = (state.accum >> state.bit) & 0x00ff;
 713
 714                     if ( state.isLSB )
 715                     {
 716                         // we've got the full word, output it
 717                         if ( dst )
 718                             *dst++ = (state.msb << 8) | b;
 719                         len++;
 720                         state.isLSB = false;
 721                     }
 722                     else // MSB
 723                     {
 724                         // just store it while we wait for LSB
 725                         state.msb = b;
 726                         state.isLSB = true;
 727                     }
 728                 }
 729             }
 730         }
 731
 732         if ( state.IsDirect() )
 733         {
 734             // start of an encoded segment?
 735             if ( cc == '+' )
 736             {
 737                 if ( *src == '-' )
 738                 {
 739                     // just the encoded plus sign, don't switch to shifted mode
 740                     if ( dst )
 741                         *dst++ = '+';
 742                     len++;
 743                     src++;
 744                 }
 745                 else if ( utf7unb64[(unsigned)*src] == 0xff )
 746                 {
 747                     // empty encoded chunks are not allowed
 748                     if ( !len )
 749                         state = stateOrig;
 750
 751                     return wxCONV_FAILED;
 752                 }
 753                 else // base-64 encoded chunk follows
 754                 {
 755                     state.ToShifted();
 756                 }
 757             }
 758             else // not '+'
 759             {
 760                 // only printable 7 bit ASCII characters (with the exception of
 761                 // NUL, TAB, CR and LF) can be used directly
 762                 if ( cc >= 0x7f || (cc < ' ' &&
 763                       !(cc == '\0' || cc == '\t' || cc == '\r' || cc == '\n')) )
 764                     return wxCONV_FAILED;
 765
 766                 if ( dst )
 767                     *dst++ = cc;
 768                 len++;
 769             }
 770         }
 771     }
 772
 773     if ( !len )
 774     {
 775         // as we didn't read any characters we should be called with the same
 776         // data (followed by some more new data) again later so don't save our
 777         // state
 778         state = stateOrig;
 779
 780         return wxCONV_FAILED;
 781     }
 782
 783     return len;
 784 }
 785
 786 //
 787 // BASE64 encoding table
 788 //
 789 static const unsigned char utf7enb64[] =
 790 {
 791     'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
 792     'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
 793     'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
 794     'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
 795     'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
 796     'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
 797     'w', 'x', 'y', 'z', '0', '1', '2', '3',
 798     '4', '5', '6', '7', '8', '9', '+', '/'
 799 };
 800
 801 //
 802 // UTF-7 encoding table
 803 //
 804 // 0 - Set D (directly encoded characters)
 805 // 1 - Set O (optional direct characters)
 806 // 2 - whitespace characters (optional)
 807 // 3 - special characters
 808 //
 809 static const unsigned char utf7encode[128] =
 810 {
 811     0, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
 812     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 813     2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
 814     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
 815     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 816     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
 817     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 818     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
 819 };
 820
 821 static inline bool wxIsUTF7Direct(wchar_t wc)
 822 {
 823     return wc < 0x80 && utf7encode[wc] < 1;
 824 }
 825
 826 size_t wxMBConvUTF7::FromWChar(char *dst, size_t dstLen,
 827                                const wchar_t *src, size_t srcLen) const
 828 {
 829     EncoderState stateOrig,
 830                 *statePtr;
 831     if ( srcLen == wxNO_LEN )
 832     {
 833         // we don't apply the stored state when operating on entire strings at
 834         // once
 835         statePtr = &stateOrig;
 836
 837         srcLen = wxWcslen(src) + 1;
 838     }
 839     else // do use the mode we left the output in previously
 840     {
 841         stateOrig = m_stateEncoder;
 842         statePtr = const_cast<EncoderState *>(&m_stateEncoder);
 843     }
 844
 845     EncoderState& state = *statePtr;
 846
 847
 848     size_t len = 0;
 849
 850     const wchar_t * const srcEnd = src + srcLen;
 851     while ( src < srcEnd && (!dst || len < dstLen) )
 852     {
 853         wchar_t cc = *src++;
 854         if ( wxIsUTF7Direct(cc) )
 855         {
 856             if ( state.IsShifted() )
 857             {
 858                 // pad with zeros the last encoded block if necessary
 859                 if ( state.bit )
 860                 {
 861                     if ( dst )
 862                         *dst++ = utf7enb64[((state.accum % 16) << (6 - state.bit)) % 64];
 863                     len++;
 864                 }
 865
 866                 state.ToDirect();
 867
 868                 if ( dst )
 869                     *dst++ = '-';
 870                 len++;
 871             }
 872
 873             if ( dst )
 874                 *dst++ = (char)cc;
 875             len++;
 876         }
 877         else if ( cc == '+' && state.IsDirect() )
 878         {
 879             if ( dst )
 880             {
 881                 *dst++ = '+';
 882                 *dst++ = '-';
 883             }
 884
 885             len += 2;
 886         }
 887 #ifndef WC_UTF16
 888         else if (((wxUint32)cc) > 0xffff)
 889         {
 890             // no surrogate pair generation (yet?)
 891             return wxCONV_FAILED;
 892         }
 893 #endif
 894         else
 895         {
 896             if ( state.IsDirect() )
 897             {
 898                 state.ToShifted();
 899
 900                 if ( dst )
 901                     *dst++ = '+';
 902                 len++;
 903             }
 904
 905             // BASE64 encode string
 906             for ( ;; )
 907             {
 908                 for ( unsigned lsb = 0; lsb < 2; lsb++ )
 909                 {
 910                     state.accum <<= 8;
 911                     state.accum += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
 912
 913                     for (state.bit += 8; state.bit >= 6; )
 914                     {
 915                         state.bit -= 6;
 916                         if ( dst )
 917                             *dst++ = utf7enb64[(state.accum >> state.bit) % 64];
 918                         len++;
 919                     }
 920                 }
 921
 922                 if ( src == srcEnd || wxIsUTF7Direct(cc = *src) )
 923                     break;
 924
 925                 src++;
 926             }
 927         }
 928     }
 929
 930     // we need to restore the original encoder state if we were called just to
 931     // calculate the amount of space needed as we will presumably be called
 932     // again to really convert the data now
 933     if ( !dst )
 934         state = stateOrig;
 935
 936     return len;
 937 }
 938
 939 // ----------------------------------------------------------------------------
 940 // UTF-8
 941 // ----------------------------------------------------------------------------
 942
 943 static const wxUint32 utf8_max[]=
 944     { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
 945
 946 // boundaries of the private use area we use to (temporarily) remap invalid
 947 // characters invalid in a UTF-8 encoded string
 948 const wxUint32 wxUnicodePUA = 0x100000;
 949 const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
 950
 951 // this table gives the length of the UTF-8 encoding from its first character:
 952 const unsigned char tableUtf8Lengths[256] = {
 953     // single-byte sequences (ASCII):
 954     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 00..0F
 955     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 10..1F
 956     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 20..2F
 957     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 30..3F
 958     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 40..4F
 959     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 50..5F
 960     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 60..6F
 961     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 70..7F
 962
 963     // these are invalid:
 964     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 80..8F
 965     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 90..9F
 966     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // A0..AF
 967     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // B0..BF
 968     0, 0,                                            // C0,C1
 969
 970     // two-byte sequences:
 971           2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,  // C2..CF
 972     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,  // D0..DF
 973
 974     // three-byte sequences:
 975     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,  // E0..EF
 976
 977     // four-byte sequences:
 978     4, 4, 4, 4, 4,                                   // F0..F4
 979
 980     // these are invalid again (5- or 6-byte
 981     // sequences and sequences for code points
 982     // above U+10FFFF, as restricted by RFC 3629):
 983                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0   // F5..FF
 984 };
 985
 986 size_t
 987 wxMBConvStrictUTF8::ToWChar(wchar_t *dst, size_t dstLen,
 988                             const char *src, size_t srcLen) const
 989 {
 990     wchar_t *out = dstLen ? dst : NULL;
 991     size_t written = 0;
 992
 993     if ( srcLen == wxNO_LEN )
 994         srcLen = strlen(src) + 1;
 995
 996     for ( const char *p = src; ; p++ )
 997     {
 998         if ( !(srcLen == wxNO_LEN ? *p : srcLen) )
 999         {
1000             // all done successfully, just add the trailing NULL if we are not
1001             // using explicit length
1002             if ( srcLen == wxNO_LEN )
1003             {
1004                 if ( out )
1005                 {
1006                     if ( !dstLen )
1007                         break;
1008
1009                     *out = L'\0';
1010                 }
1011
1012                 written++;
1013             }
1014
1015             return written;
1016         }
1017
1018         if ( out && !dstLen-- )
1019             break;
1020
1021         wxUint32 code;
1022         unsigned char c = *p;
1023
1024         if ( c < 0x80 )
1025         {
1026             if ( srcLen == 0 ) // the test works for wxNO_LEN too
1027                 break;
1028
1029             if ( srcLen != wxNO_LEN )
1030                 srcLen--;
1031
1032             code = c;
1033         }
1034         else
1035         {
1036             unsigned len = tableUtf8Lengths[c];
1037             if ( !len )
1038                 break;
1039
1040             if ( srcLen < len ) // the test works for wxNO_LEN too
1041                 break;
1042
1043             if ( srcLen != wxNO_LEN )
1044                 srcLen -= len;
1045
1046             //   Char. number range   |        UTF-8 octet sequence
1047             //      (hexadecimal)     |              (binary)
1048             //  ----------------------+----------------------------------------
1049             //  0000 0000 - 0000 007F | 0xxxxxxx
1050             //  0000 0080 - 0000 07FF | 110xxxxx 10xxxxxx
1051             //  0000 0800 - 0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
1052             //  0001 0000 - 0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
1053             //
1054             //  Code point value is stored in bits marked with 'x',
1055             //  lowest-order bit of the value on the right side in the diagram
1056             //  above.                                         (from RFC 3629)
1057
1058             // mask to extract lead byte's value ('x' bits above), by sequence
1059             // length:
1060             static const unsigned char leadValueMask[] = { 0x7F, 0x1F, 0x0F, 0x07 };
1061
1062             // mask and value of lead byte's most significant bits, by length:
1063             static const unsigned char leadMarkerMask[] = { 0x80, 0xE0, 0xF0, 0xF8 };
1064             static const unsigned char leadMarkerVal[] = { 0x00, 0xC0, 0xE0, 0xF0 };
1065
1066             len--; // it's more convenient to work with 0-based length here
1067
1068             // extract the lead byte's value bits:
1069             if ( (c & leadMarkerMask[len]) != leadMarkerVal[len] )
1070                 break;
1071
1072             code = c & leadValueMask[len];
1073
1074             // all remaining bytes, if any, are handled in the same way
1075             // regardless of sequence's length:
1076             for ( ; len; --len )
1077             {
1078                 c = *++p;
1079                 if ( (c & 0xC0) != 0x80 )
1080                     return wxCONV_FAILED;
1081
1082                 code <<= 6;
1083                 code |= c & 0x3F;
1084             }
1085         }
1086
1087 #ifdef WC_UTF16
1088         // cast is ok because wchar_t == wxUint16 if WC_UTF16
1089         if ( encode_utf16(code, (wxUint16 *)out) == 2 )
1090         {
1091             if ( out )
1092                 out++;
1093             written++;
1094         }
1095 #else // !WC_UTF16
1096         if ( out )
1097             *out = code;
1098 #endif // WC_UTF16/!WC_UTF16
1099
1100         if ( out )
1101             out++;
1102
1103         written++;
1104     }
1105
1106     return wxCONV_FAILED;
1107 }
1108
1109 size_t
1110 wxMBConvStrictUTF8::FromWChar(char *dst, size_t dstLen,
1111                               const wchar_t *src, size_t srcLen) const
1112 {
1113     char *out = dstLen ? dst : NULL;
1114     size_t written = 0;
1115
1116     for ( const wchar_t *wp = src; ; wp++ )
1117     {
1118         if ( !(srcLen == wxNO_LEN ? *wp : srcLen) )
1119         {
1120             // all done successfully, just add the trailing NULL if we are not
1121             // using explicit length
1122             if ( srcLen == wxNO_LEN )
1123             {
1124                 if ( out )
1125                 {
1126                     if ( !dstLen )
1127                         break;
1128
1129                     *out = '\0';
1130                 }
1131
1132                 written++;
1133             }
1134
1135             return written;
1136         }
1137
1138         if ( srcLen != wxNO_LEN )
1139             srcLen--;
1140
1141         wxUint32 code;
1142 #ifdef WC_UTF16
1143         // cast is ok for WC_UTF16
1144         if ( decode_utf16((const wxUint16 *)wp, code) == 2 )
1145         {
1146             // skip the next char too as we decoded a surrogate
1147             wp++;
1148         }
1149 #else // wchar_t is UTF-32
1150         code = *wp & 0x7fffffff;
1151 #endif
1152
1153         unsigned len;
1154         if ( code <= 0x7F )
1155         {
1156             len = 1;
1157             if ( out )
1158             {
1159                 if ( dstLen < len )
1160                     break;
1161
1162                 out[0] = (char)code;
1163             }
1164         }
1165         else if ( code <= 0x07FF )
1166         {
1167             len = 2;
1168             if ( out )
1169             {
1170                 if ( dstLen < len )
1171                     break;
1172
1173                 // NB: this line takes 6 least significant bits, encodes them as
1174                 // 10xxxxxx and discards them so that the next byte can be encoded:
1175                 out[1] = 0x80 | (code & 0x3F);  code >>= 6;
1176                 out[0] = 0xC0 | code;
1177             }
1178         }
1179         else if ( code < 0xFFFF )
1180         {
1181             len = 3;
1182             if ( out )
1183             {
1184                 if ( dstLen < len )
1185                     break;
1186
1187                 out[2] = 0x80 | (code & 0x3F);  code >>= 6;
1188                 out[1] = 0x80 | (code & 0x3F);  code >>= 6;
1189                 out[0] = 0xE0 | code;
1190             }
1191         }
1192         else if ( code <= 0x10FFFF )
1193         {
1194             len = 4;
1195             if ( out )
1196             {
1197                 if ( dstLen < len )
1198                     break;
1199
1200                 out[3] = 0x80 | (code & 0x3F);  code >>= 6;
1201                 out[2] = 0x80 | (code & 0x3F);  code >>= 6;
1202                 out[1] = 0x80 | (code & 0x3F);  code >>= 6;
1203                 out[0] = 0xF0 | code;
1204             }
1205         }
1206         else
1207         {
1208             wxFAIL_MSG( wxT("trying to encode undefined Unicode character") );
1209             break;
1210         }
1211
1212         if ( out )
1213         {
1214             out += len;
1215             dstLen -= len;
1216         }
1217
1218         written += len;
1219     }
1220
1221     // we only get here if an error occurs during decoding
1222     return wxCONV_FAILED;
1223 }
1224
1225 size_t wxMBConvUTF8::ToWChar(wchar_t *buf, size_t n,
1226                              const char *psz, size_t srcLen) const
1227 {
1228     if ( m_options == MAP_INVALID_UTF8_NOT )
1229         return wxMBConvStrictUTF8::ToWChar(buf, n, psz, srcLen);
1230
1231     size_t len = 0;
1232
1233     while ((srcLen == wxNO_LEN ? *psz : srcLen--) && ((!buf) || (len < n)))
1234     {
1235         const char *opsz = psz;
1236         bool invalid = false;
1237         unsigned char cc = *psz++, fc = cc;
1238         unsigned cnt;
1239         for (cnt = 0; fc & 0x80; cnt++)
1240             fc <<= 1;
1241
1242         if (!cnt)
1243         {
1244             // plain ASCII char
1245             if (buf)
1246                 *buf++ = cc;
1247             len++;
1248
1249             // escape the escape character for octal escapes
1250             if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
1251                     && cc == '\\' && (!buf || len < n))
1252             {
1253                 if (buf)
1254                     *buf++ = cc;
1255                 len++;
1256             }
1257         }
1258         else
1259         {
1260             cnt--;
1261             if (!cnt)
1262             {
1263                 // invalid UTF-8 sequence
1264                 invalid = true;
1265             }
1266             else
1267             {
1268                 unsigned ocnt = cnt - 1;
1269                 wxUint32 res = cc & (0x3f >> cnt);
1270                 while (cnt--)
1271                 {
1272                     cc = *psz;
1273                     if ((cc & 0xC0) != 0x80)
1274                     {
1275                         // invalid UTF-8 sequence
1276                         invalid = true;
1277                         break;
1278                     }
1279
1280                     psz++;
1281                     res = (res << 6) | (cc & 0x3f);
1282                 }
1283
1284                 if (invalid || res <= utf8_max[ocnt])
1285                 {
1286                     // illegal UTF-8 encoding
1287                     invalid = true;
1288                 }
1289                 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
1290                         res >= wxUnicodePUA && res < wxUnicodePUAEnd)
1291                 {
1292                     // if one of our PUA characters turns up externally
1293                     // it must also be treated as an illegal sequence
1294                     // (a bit like you have to escape an escape character)
1295                     invalid = true;
1296                 }
1297                 else
1298                 {
1299 #ifdef WC_UTF16
1300                     // cast is ok because wchar_t == wxUint16 if WC_UTF16
1301                     size_t pa = encode_utf16(res, (wxUint16 *)buf);
1302                     if (pa == wxCONV_FAILED)
1303                     {
1304                         invalid = true;
1305                     }
1306                     else
1307                     {
1308                         if (buf)
1309                             buf += pa;
1310                         len += pa;
1311                     }
1312 #else // !WC_UTF16
1313                     if (buf)
1314                         *buf++ = (wchar_t)res;
1315                     len++;
1316 #endif // WC_UTF16/!WC_UTF16
1317                 }
1318             }
1319
1320             if (invalid)
1321             {
1322                 if (m_options & MAP_INVALID_UTF8_TO_PUA)
1323                 {
1324                     while (opsz < psz && (!buf || len < n))
1325                     {
1326 #ifdef WC_UTF16
1327                         // cast is ok because wchar_t == wxUuint16 if WC_UTF16
1328                         size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
1329                         wxASSERT(pa != wxCONV_FAILED);
1330                         if (buf)
1331                             buf += pa;
1332                         opsz++;
1333                         len += pa;
1334 #else
1335                         if (buf)
1336                             *buf++ = (wchar_t)(wxUnicodePUA + (unsigned char)*opsz);
1337                         opsz++;
1338                         len++;
1339 #endif
1340                     }
1341                 }
1342                 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
1343                 {
1344                     while (opsz < psz && (!buf || len < n))
1345                     {
1346                         if ( buf && len + 3 < n )
1347                         {
1348                             unsigned char on = *opsz;
1349                             *buf++ = L'\\';
1350                             *buf++ = (wchar_t)( L'0' + on / 0100 );
1351                             *buf++ = (wchar_t)( L'0' + (on % 0100) / 010 );
1352                             *buf++ = (wchar_t)( L'0' + on % 010 );
1353                         }
1354
1355                         opsz++;
1356                         len += 4;
1357                     }
1358                 }
1359                 else // MAP_INVALID_UTF8_NOT
1360                 {
1361                     return wxCONV_FAILED;
1362                 }
1363             }
1364         }
1365     }
1366
1367     if (srcLen == wxNO_LEN && buf && (len < n))
1368         *buf = 0;
1369
1370     return len + 1;
1371 }
1372
1373 static inline bool isoctal(wchar_t wch)
1374 {
1375     return L'0' <= wch && wch <= L'7';
1376 }
1377
1378 size_t wxMBConvUTF8::FromWChar(char *buf, size_t n,
1379                                const wchar_t *psz, size_t srcLen) const
1380 {
1381     if ( m_options == MAP_INVALID_UTF8_NOT )
1382         return wxMBConvStrictUTF8::FromWChar(buf, n, psz, srcLen);
1383
1384     size_t len = 0;
1385
1386     while ((srcLen == wxNO_LEN ? *psz : srcLen--) && ((!buf) || (len < n)))
1387     {
1388         wxUint32 cc;
1389
1390 #ifdef WC_UTF16
1391         // cast is ok for WC_UTF16
1392         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
1393         psz += (pa == wxCONV_FAILED) ? 1 : pa;
1394 #else
1395         cc = (*psz++) & 0x7fffffff;
1396 #endif
1397
1398         if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
1399                 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
1400         {
1401             if (buf)
1402                 *buf++ = (char)(cc - wxUnicodePUA);
1403             len++;
1404         }
1405         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
1406                     && cc == L'\\' && psz[0] == L'\\' )
1407         {
1408             if (buf)
1409                 *buf++ = (char)cc;
1410             psz++;
1411             len++;
1412         }
1413         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
1414                     cc == L'\\' &&
1415                         isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
1416         {
1417             if (buf)
1418             {
1419                 *buf++ = (char) ((psz[0] - L'0') * 0100 +
1420                                  (psz[1] - L'0') * 010 +
1421                                  (psz[2] - L'0'));
1422             }
1423
1424             psz += 3;
1425             len++;
1426         }
1427         else
1428         {
1429             unsigned cnt;
1430             for (cnt = 0; cc > utf8_max[cnt]; cnt++)
1431             {
1432             }
1433
1434             if (!cnt)
1435             {
1436                 // plain ASCII char
1437                 if (buf)
1438                     *buf++ = (char) cc;
1439                 len++;
1440             }
1441             else
1442             {
1443                 len += cnt + 1;
1444                 if (buf)
1445                 {
1446                     *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
1447                     while (cnt--)
1448                         *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
1449                 }
1450             }
1451         }
1452     }
1453
1454     if (srcLen == wxNO_LEN && buf && (len < n))
1455         *buf = 0;
1456
1457     return len + 1;
1458 }
1459
1460 // ============================================================================
1461 // UTF-16
1462 // ============================================================================
1463
1464 #ifdef WORDS_BIGENDIAN
1465     #define wxMBConvUTF16straight wxMBConvUTF16BE
1466     #define wxMBConvUTF16swap     wxMBConvUTF16LE
1467 #else
1468     #define wxMBConvUTF16swap     wxMBConvUTF16BE
1469     #define wxMBConvUTF16straight wxMBConvUTF16LE
1470 #endif
1471
1472 /* static */
1473 size_t wxMBConvUTF16Base::GetLength(const char *src, size_t srcLen)
1474 {
1475     if ( srcLen == wxNO_LEN )
1476     {
1477         // count the number of bytes in input, including the trailing NULs
1478         const wxUint16 *inBuff = reinterpret_cast<const wxUint16 *>(src);
1479         for ( srcLen = 1; *inBuff++; srcLen++ )
1480             ;
1481
1482         srcLen *= BYTES_PER_CHAR;
1483     }
1484     else // we already have the length
1485     {
1486         // we can only convert an entire number of UTF-16 characters
1487         if ( srcLen % BYTES_PER_CHAR )
1488             return wxCONV_FAILED;
1489     }
1490
1491     return srcLen;
1492 }
1493
1494 // case when in-memory representation is UTF-16 too
1495 #ifdef WC_UTF16
1496
1497 // ----------------------------------------------------------------------------
1498 // conversions without endianness change
1499 // ----------------------------------------------------------------------------
1500
1501 size_t
1502 wxMBConvUTF16straight::ToWChar(wchar_t *dst, size_t dstLen,
1503                                const char *src, size_t srcLen) const
1504 {
1505     // set up the scene for using memcpy() (which is presumably more efficient
1506     // than copying the bytes one by one)
1507     srcLen = GetLength(src, srcLen);
1508     if ( srcLen == wxNO_LEN )
1509         return wxCONV_FAILED;
1510
1511     const size_t inLen = srcLen / BYTES_PER_CHAR;
1512     if ( dst )
1513     {
1514         if ( dstLen < inLen )
1515             return wxCONV_FAILED;
1516
1517         memcpy(dst, src, srcLen);
1518     }
1519
1520     return inLen;
1521 }
1522
1523 size_t
1524 wxMBConvUTF16straight::FromWChar(char *dst, size_t dstLen,
1525                                  const wchar_t *src, size_t srcLen) const
1526 {
1527     if ( srcLen == wxNO_LEN )
1528         srcLen = wxWcslen(src) + 1;
1529
1530     srcLen *= BYTES_PER_CHAR;
1531
1532     if ( dst )
1533     {
1534         if ( dstLen < srcLen )
1535             return wxCONV_FAILED;
1536
1537         memcpy(dst, src, srcLen);
1538     }
1539
1540     return srcLen;
1541 }
1542
1543 // ----------------------------------------------------------------------------
1544 // endian-reversing conversions
1545 // ----------------------------------------------------------------------------
1546
1547 size_t
1548 wxMBConvUTF16swap::ToWChar(wchar_t *dst, size_t dstLen,
1549                            const char *src, size_t srcLen) const
1550 {
1551     srcLen = GetLength(src, srcLen);
1552     if ( srcLen == wxNO_LEN )
1553         return wxCONV_FAILED;
1554
1555     srcLen /= BYTES_PER_CHAR;
1556
1557     if ( dst )
1558     {
1559         if ( dstLen < srcLen )
1560             return wxCONV_FAILED;
1561
1562         const wxUint16 *inBuff = reinterpret_cast<const wxUint16 *>(src);
1563         for ( size_t n = 0; n < srcLen; n++, inBuff++ )
1564         {
1565             *dst++ = wxUINT16_SWAP_ALWAYS(*inBuff);
1566         }
1567     }
1568
1569     return srcLen;
1570 }
1571
1572 size_t
1573 wxMBConvUTF16swap::FromWChar(char *dst, size_t dstLen,
1574                              const wchar_t *src, size_t srcLen) const
1575 {
1576     if ( srcLen == wxNO_LEN )
1577         srcLen = wxWcslen(src) + 1;
1578
1579     srcLen *= BYTES_PER_CHAR;
1580
1581     if ( dst )
1582     {
1583         if ( dstLen < srcLen )
1584             return wxCONV_FAILED;
1585
1586         wxUint16 *outBuff = reinterpret_cast<wxUint16 *>(dst);
1587         for ( size_t n = 0; n < srcLen; n += BYTES_PER_CHAR, src++ )
1588         {
1589             *outBuff++ = wxUINT16_SWAP_ALWAYS(*src);
1590         }
1591     }
1592
1593     return srcLen;
1594 }
1595
1596 #else // !WC_UTF16: wchar_t is UTF-32
1597
1598 // ----------------------------------------------------------------------------
1599 // conversions without endianness change
1600 // ----------------------------------------------------------------------------
1601
1602 size_t
1603 wxMBConvUTF16straight::ToWChar(wchar_t *dst, size_t dstLen,
1604                                const char *src, size_t srcLen) const
1605 {
1606     srcLen = GetLength(src, srcLen);
1607     if ( srcLen == wxNO_LEN )
1608         return wxCONV_FAILED;
1609
1610     const size_t inLen = srcLen / BYTES_PER_CHAR;
1611     if ( !dst )
1612     {
1613         // optimization: return maximal space which could be needed for this
1614         // string even if the real size could be smaller if the buffer contains
1615         // any surrogates
1616         return inLen;
1617     }
1618
1619     size_t outLen = 0;
1620     const wxUint16 *inBuff = reinterpret_cast<const wxUint16 *>(src);
1621     for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; )
1622     {
1623         const wxUint32 ch = wxDecodeSurrogate(&inBuff);
1624         if ( !inBuff )
1625             return wxCONV_FAILED;
1626
1627         if ( ++outLen > dstLen )
1628             return wxCONV_FAILED;
1629
1630         *dst++ = ch;
1631     }
1632
1633
1634     return outLen;
1635 }
1636
1637 size_t
1638 wxMBConvUTF16straight::FromWChar(char *dst, size_t dstLen,
1639                                  const wchar_t *src, size_t srcLen) const
1640 {
1641     if ( srcLen == wxNO_LEN )
1642         srcLen = wxWcslen(src) + 1;
1643
1644     size_t outLen = 0;
1645     wxUint16 *outBuff = reinterpret_cast<wxUint16 *>(dst);
1646     for ( size_t n = 0; n < srcLen; n++ )
1647     {
1648         wxUint16 cc[2];
1649         const size_t numChars = encode_utf16(*src++, cc);
1650         if ( numChars == wxCONV_FAILED )
1651             return wxCONV_FAILED;
1652
1653         outLen += numChars * BYTES_PER_CHAR;
1654         if ( outBuff )
1655         {
1656             if ( outLen > dstLen )
1657                 return wxCONV_FAILED;
1658
1659             *outBuff++ = cc[0];
1660             if ( numChars == 2 )
1661             {
1662                 // second character of a surrogate
1663                 *outBuff++ = cc[1];
1664             }
1665         }
1666     }
1667
1668     return outLen;
1669 }
1670
1671 // ----------------------------------------------------------------------------
1672 // endian-reversing conversions
1673 // ----------------------------------------------------------------------------
1674
1675 size_t
1676 wxMBConvUTF16swap::ToWChar(wchar_t *dst, size_t dstLen,
1677                            const char *src, size_t srcLen) const
1678 {
1679     srcLen = GetLength(src, srcLen);
1680     if ( srcLen == wxNO_LEN )
1681         return wxCONV_FAILED;
1682
1683     const size_t inLen = srcLen / BYTES_PER_CHAR;
1684     if ( !dst )
1685     {
1686         // optimization: return maximal space which could be needed for this
1687         // string even if the real size could be smaller if the buffer contains
1688         // any surrogates
1689         return inLen;
1690     }
1691
1692     size_t outLen = 0;
1693     const wxUint16 *inBuff = reinterpret_cast<const wxUint16 *>(src);
1694     for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; )
1695     {
1696         wxUint32 ch;
1697         wxUint16 tmp[2];
1698
1699         tmp[0] = wxUINT16_SWAP_ALWAYS(*inBuff);
1700         inBuff++;
1701         tmp[1] = wxUINT16_SWAP_ALWAYS(*inBuff);
1702
1703         const size_t numChars = decode_utf16(tmp, ch);
1704         if ( numChars == wxCONV_FAILED )
1705             return wxCONV_FAILED;
1706
1707         if ( numChars == 2 )
1708             inBuff++;
1709
1710         if ( ++outLen > dstLen )
1711             return wxCONV_FAILED;
1712
1713         *dst++ = ch;
1714     }
1715
1716
1717     return outLen;
1718 }
1719
1720 size_t
1721 wxMBConvUTF16swap::FromWChar(char *dst, size_t dstLen,
1722                              const wchar_t *src, size_t srcLen) const
1723 {
1724     if ( srcLen == wxNO_LEN )
1725         srcLen = wxWcslen(src) + 1;
1726
1727     size_t outLen = 0;
1728     wxUint16 *outBuff = reinterpret_cast<wxUint16 *>(dst);
1729     for ( const wchar_t *srcEnd = src + srcLen; src < srcEnd; src++ )
1730     {
1731         wxUint16 cc[2];
1732         const size_t numChars = encode_utf16(*src, cc);
1733         if ( numChars == wxCONV_FAILED )
1734             return wxCONV_FAILED;
1735
1736         outLen += numChars * BYTES_PER_CHAR;
1737         if ( outBuff )
1738         {
1739             if ( outLen > dstLen )
1740                 return wxCONV_FAILED;
1741
1742             *outBuff++ = wxUINT16_SWAP_ALWAYS(cc[0]);
1743             if ( numChars == 2 )
1744             {
1745                 // second character of a surrogate
1746                 *outBuff++ = wxUINT16_SWAP_ALWAYS(cc[1]);
1747             }
1748         }
1749     }
1750
1751     return outLen;
1752 }
1753
1754 #endif // WC_UTF16/!WC_UTF16
1755
1756
1757 // ============================================================================
1758 // UTF-32
1759 // ============================================================================
1760
1761 #ifdef WORDS_BIGENDIAN
1762     #define wxMBConvUTF32straight  wxMBConvUTF32BE
1763     #define wxMBConvUTF32swap      wxMBConvUTF32LE
1764 #else
1765     #define wxMBConvUTF32swap      wxMBConvUTF32BE
1766     #define wxMBConvUTF32straight  wxMBConvUTF32LE
1767 #endif
1768
1769
1770 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1771 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1772
1773 /* static */
1774 size_t wxMBConvUTF32Base::GetLength(const char *src, size_t srcLen)
1775 {
1776     if ( srcLen == wxNO_LEN )
1777     {
1778         // count the number of bytes in input, including the trailing NULs
1779         const wxUint32 *inBuff = reinterpret_cast<const wxUint32 *>(src);
1780         for ( srcLen = 1; *inBuff++; srcLen++ )
1781             ;
1782
1783         srcLen *= BYTES_PER_CHAR;
1784     }
1785     else // we already have the length
1786     {
1787         // we can only convert an entire number of UTF-32 characters
1788         if ( srcLen % BYTES_PER_CHAR )
1789             return wxCONV_FAILED;
1790     }
1791
1792     return srcLen;
1793 }
1794
1795 // case when in-memory representation is UTF-16
1796 #ifdef WC_UTF16
1797
1798 // ----------------------------------------------------------------------------
1799 // conversions without endianness change
1800 // ----------------------------------------------------------------------------
1801
1802 size_t
1803 wxMBConvUTF32straight::ToWChar(wchar_t *dst, size_t dstLen,
1804                                const char *src, size_t srcLen) const
1805 {
1806     srcLen = GetLength(src, srcLen);
1807     if ( srcLen == wxNO_LEN )
1808         return wxCONV_FAILED;
1809
1810     const wxUint32 *inBuff = reinterpret_cast<const wxUint32 *>(src);
1811     const size_t inLen = srcLen / BYTES_PER_CHAR;
1812     size_t outLen = 0;
1813     for ( size_t n = 0; n < inLen; n++ )
1814     {
1815         wxUint16 cc[2];
1816         const size_t numChars = encode_utf16(*inBuff++, cc);
1817         if ( numChars == wxCONV_FAILED )
1818             return wxCONV_FAILED;
1819
1820         outLen += numChars;
1821         if ( dst )
1822         {
1823             if ( outLen > dstLen )
1824                 return wxCONV_FAILED;
1825
1826             *dst++ = cc[0];
1827             if ( numChars == 2 )
1828             {
1829                 // second character of a surrogate
1830                 *dst++ = cc[1];
1831             }
1832         }
1833     }
1834
1835     return outLen;
1836 }
1837
1838 size_t
1839 wxMBConvUTF32straight::FromWChar(char *dst, size_t dstLen,
1840                                  const wchar_t *src, size_t srcLen) const
1841 {
1842     if ( srcLen == wxNO_LEN )
1843         srcLen = wxWcslen(src) + 1;
1844
1845     if ( !dst )
1846     {
1847         // optimization: return maximal space which could be needed for this
1848         // string instead of the exact amount which could be less if there are
1849         // any surrogates in the input
1850         //
1851         // we consider that surrogates are rare enough to make it worthwhile to
1852         // avoid running the loop below at the cost of slightly extra memory
1853         // consumption
1854         return srcLen * BYTES_PER_CHAR;
1855     }
1856
1857     wxUint32 *outBuff = reinterpret_cast<wxUint32 *>(dst);
1858     size_t outLen = 0;
1859     for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; )
1860     {
1861         const wxUint32 ch = wxDecodeSurrogate(&src);
1862         if ( !src )
1863             return wxCONV_FAILED;
1864
1865         outLen += BYTES_PER_CHAR;
1866
1867         if ( outLen > dstLen )
1868             return wxCONV_FAILED;
1869
1870         *outBuff++ = ch;
1871     }
1872
1873     return outLen;
1874 }
1875
1876 // ----------------------------------------------------------------------------
1877 // endian-reversing conversions
1878 // ----------------------------------------------------------------------------
1879
1880 size_t
1881 wxMBConvUTF32swap::ToWChar(wchar_t *dst, size_t dstLen,
1882                            const char *src, size_t srcLen) const
1883 {
1884     srcLen = GetLength(src, srcLen);
1885     if ( srcLen == wxNO_LEN )
1886         return wxCONV_FAILED;
1887
1888     const wxUint32 *inBuff = reinterpret_cast<const wxUint32 *>(src);
1889     const size_t inLen = srcLen / BYTES_PER_CHAR;
1890     size_t outLen = 0;
1891     for ( size_t n = 0; n < inLen; n++, inBuff++ )
1892     {
1893         wxUint16 cc[2];
1894         const size_t numChars = encode_utf16(wxUINT32_SWAP_ALWAYS(*inBuff), cc);
1895         if ( numChars == wxCONV_FAILED )
1896             return wxCONV_FAILED;
1897
1898         outLen += numChars;
1899         if ( dst )
1900         {
1901             if ( outLen > dstLen )
1902                 return wxCONV_FAILED;
1903
1904             *dst++ = cc[0];
1905             if ( numChars == 2 )
1906             {
1907                 // second character of a surrogate
1908                 *dst++ = cc[1];
1909             }
1910         }
1911     }
1912
1913     return outLen;
1914 }
1915
1916 size_t
1917 wxMBConvUTF32swap::FromWChar(char *dst, size_t dstLen,
1918                              const wchar_t *src, size_t srcLen) const
1919 {
1920     if ( srcLen == wxNO_LEN )
1921         srcLen = wxWcslen(src) + 1;
1922
1923     if ( !dst )
1924     {
1925         // optimization: return maximal space which could be needed for this
1926         // string instead of the exact amount which could be less if there are
1927         // any surrogates in the input
1928         //
1929         // we consider that surrogates are rare enough to make it worthwhile to
1930         // avoid running the loop below at the cost of slightly extra memory
1931         // consumption
1932         return srcLen*BYTES_PER_CHAR;
1933     }
1934
1935     wxUint32 *outBuff = reinterpret_cast<wxUint32 *>(dst);
1936     size_t outLen = 0;
1937     for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; )
1938     {
1939         const wxUint32 ch = wxDecodeSurrogate(&src);
1940         if ( !src )
1941             return wxCONV_FAILED;
1942
1943         outLen += BYTES_PER_CHAR;
1944
1945         if ( outLen > dstLen )
1946             return wxCONV_FAILED;
1947
1948         *outBuff++ = wxUINT32_SWAP_ALWAYS(ch);
1949     }
1950
1951     return outLen;
1952 }
1953
1954 #else // !WC_UTF16: wchar_t is UTF-32
1955
1956 // ----------------------------------------------------------------------------
1957 // conversions without endianness change
1958 // ----------------------------------------------------------------------------
1959
1960 size_t
1961 wxMBConvUTF32straight::ToWChar(wchar_t *dst, size_t dstLen,
1962                                const char *src, size_t srcLen) const
1963 {
1964     // use memcpy() as it should be much faster than hand-written loop
1965     srcLen = GetLength(src, srcLen);
1966     if ( srcLen == wxNO_LEN )
1967         return wxCONV_FAILED;
1968
1969     const size_t inLen = srcLen/BYTES_PER_CHAR;
1970     if ( dst )
1971     {
1972         if ( dstLen < inLen )
1973             return wxCONV_FAILED;
1974
1975         memcpy(dst, src, srcLen);
1976     }
1977
1978     return inLen;
1979 }
1980
1981 size_t
1982 wxMBConvUTF32straight::FromWChar(char *dst, size_t dstLen,
1983                                  const wchar_t *src, size_t srcLen) const
1984 {
1985     if ( srcLen == wxNO_LEN )
1986         srcLen = wxWcslen(src) + 1;
1987
1988     srcLen *= BYTES_PER_CHAR;
1989
1990     if ( dst )
1991     {
1992         if ( dstLen < srcLen )
1993             return wxCONV_FAILED;
1994
1995         memcpy(dst, src, srcLen);
1996     }
1997
1998     return srcLen;
1999 }
2000
2001 // ----------------------------------------------------------------------------
2002 // endian-reversing conversions
2003 // ----------------------------------------------------------------------------
2004
2005 size_t
2006 wxMBConvUTF32swap::ToWChar(wchar_t *dst, size_t dstLen,
2007                            const char *src, size_t srcLen) const
2008 {
2009     srcLen = GetLength(src, srcLen);
2010     if ( srcLen == wxNO_LEN )
2011         return wxCONV_FAILED;
2012
2013     srcLen /= BYTES_PER_CHAR;
2014
2015     if ( dst )
2016     {
2017         if ( dstLen < srcLen )
2018             return wxCONV_FAILED;
2019
2020         const wxUint32 *inBuff = reinterpret_cast<const wxUint32 *>(src);
2021         for ( size_t n = 0; n < srcLen; n++, inBuff++ )
2022         {
2023             *dst++ = wxUINT32_SWAP_ALWAYS(*inBuff);
2024         }
2025     }
2026
2027     return srcLen;
2028 }
2029
2030 size_t
2031 wxMBConvUTF32swap::FromWChar(char *dst, size_t dstLen,
2032                              const wchar_t *src, size_t srcLen) const
2033 {
2034     if ( srcLen == wxNO_LEN )
2035         srcLen = wxWcslen(src) + 1;
2036
2037     srcLen *= BYTES_PER_CHAR;
2038
2039     if ( dst )
2040     {
2041         if ( dstLen < srcLen )
2042             return wxCONV_FAILED;
2043
2044         wxUint32 *outBuff = reinterpret_cast<wxUint32 *>(dst);
2045         for ( size_t n = 0; n < srcLen; n += BYTES_PER_CHAR, src++ )
2046         {
2047             *outBuff++ = wxUINT32_SWAP_ALWAYS(*src);
2048         }
2049     }
2050
2051     return srcLen;
2052 }
2053
2054 #endif // WC_UTF16/!WC_UTF16
2055
2056
2057 // ============================================================================
2058 // The classes doing conversion using the iconv_xxx() functions
2059 // ============================================================================
2060
2061 #ifdef HAVE_ICONV
2062
2063 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
2064 //     E2BIG if output buffer is _exactly_ as big as needed. Such case is
2065 //     (unless there's yet another bug in glibc) the only case when iconv()
2066 //     returns with (size_t)-1 (which means error) and says there are 0 bytes
2067 //     left in the input buffer -- when _real_ error occurs,
2068 //     bytes-left-in-input buffer is non-zero. Hence, this alternative test for
2069 //     iconv() failure.
2070 //     [This bug does not appear in glibc 2.2.]
2071 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
2072 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
2073                                      (errno != E2BIG || bufLeft != 0))
2074 #else
2075 #define ICONV_FAILED(cres, bufLeft)  (cres == (size_t)-1)
2076 #endif
2077
2078 #define ICONV_CHAR_CAST(x)  ((ICONV_CONST char **)(x))
2079
2080 #define ICONV_T_INVALID ((iconv_t)-1)
2081
2082 #if SIZEOF_WCHAR_T == 4
2083     #define WC_BSWAP    wxUINT32_SWAP_ALWAYS
2084     #define WC_ENC      wxFONTENCODING_UTF32
2085 #elif SIZEOF_WCHAR_T == 2
2086     #define WC_BSWAP    wxUINT16_SWAP_ALWAYS
2087     #define WC_ENC      wxFONTENCODING_UTF16
2088 #else // sizeof(wchar_t) != 2 nor 4
2089     // does this ever happen?
2090     #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
2091 #endif
2092
2093 // ----------------------------------------------------------------------------
2094 // wxMBConv_iconv: encapsulates an iconv character set
2095 // ----------------------------------------------------------------------------
2096
2097 class wxMBConv_iconv : public wxMBConv
2098 {
2099 public:
2100     wxMBConv_iconv(const char *name);
2101     virtual ~wxMBConv_iconv();
2102
2103     // implement base class virtual methods
2104     virtual size_t ToWChar(wchar_t *dst, size_t dstLen,
2105                            const char *src, size_t srcLen = wxNO_LEN) const;
2106     virtual size_t FromWChar(char *dst, size_t dstLen,
2107                              const wchar_t *src, size_t srcLen = wxNO_LEN) const;
2108     virtual size_t GetMBNulLen() const;
2109
2110 #if wxUSE_UNICODE_UTF8
2111     virtual bool IsUTF8() const;
2112 #endif
2113
2114     virtual wxMBConv *Clone() const
2115     {
2116         wxMBConv_iconv *p = new wxMBConv_iconv(m_name.ToAscii());
2117         p->m_minMBCharWidth = m_minMBCharWidth;
2118         return p;
2119     }
2120
2121     bool IsOk() const
2122         { return (m2w != ICONV_T_INVALID) && (w2m != ICONV_T_INVALID); }
2123
2124 protected:
2125     // the iconv handlers used to translate from multibyte
2126     // to wide char and in the other direction
2127     iconv_t m2w,
2128             w2m;
2129
2130 #if wxUSE_THREADS
2131     // guards access to m2w and w2m objects
2132     wxMutex m_iconvMutex;
2133 #endif
2134
2135 private:
2136     // the name (for iconv_open()) of a wide char charset -- if none is
2137     // available on this machine, it will remain NULL
2138     static wxString ms_wcCharsetName;
2139
2140     // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
2141     // different endian-ness than the native one
2142     static bool ms_wcNeedsSwap;
2143
2144
2145     // name of the encoding handled by this conversion
2146     wxString m_name;
2147
2148     // cached result of GetMBNulLen(); set to 0 meaning "unknown"
2149     // initially
2150     size_t m_minMBCharWidth;
2151 };
2152
2153 // make the constructor available for unit testing
2154 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const char* name )
2155 {
2156     wxMBConv_iconv* result = new wxMBConv_iconv( name );
2157     if ( !result->IsOk() )
2158     {
2159         delete result;
2160         return 0;
2161     }
2162
2163     return result;
2164 }
2165
2166 wxString wxMBConv_iconv::ms_wcCharsetName;
2167 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
2168
2169 wxMBConv_iconv::wxMBConv_iconv(const char *name)
2170               : m_name(name)
2171 {
2172     m_minMBCharWidth = 0;
2173
2174     // check for charset that represents wchar_t:
2175     if ( ms_wcCharsetName.empty() )
2176     {
2177         wxLogTrace(TRACE_STRCONV, wxT("Looking for wide char codeset:"));
2178
2179 #if wxUSE_FONTMAP
2180         const wxChar *const *names = wxFontMapperBase::GetAllEncodingNames(WC_ENC);
2181 #else // !wxUSE_FONTMAP
2182         static const wxChar *const names_static[] =
2183         {
2184 #if SIZEOF_WCHAR_T == 4
2185             wxT("UCS-4"),
2186 #elif SIZEOF_WCHAR_T = 2
2187             wxT("UCS-2"),
2188 #endif
2189             NULL
2190         };
2191         const wxChar *const *names = names_static;
2192 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2193
2194         for ( ; *names && ms_wcCharsetName.empty(); ++names )
2195         {
2196             const wxString nameCS(*names);
2197
2198             // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
2199             wxString nameXE(nameCS);
2200
2201 #ifdef WORDS_BIGENDIAN
2202                 nameXE += wxT("BE");
2203 #else // little endian
2204                 nameXE += wxT("LE");
2205 #endif
2206
2207             wxLogTrace(TRACE_STRCONV, wxT("  trying charset \"%s\""),
2208                        nameXE.c_str());
2209
2210             m2w = iconv_open(nameXE.ToAscii(), name);
2211             if ( m2w == ICONV_T_INVALID )
2212             {
2213                 // try charset w/o bytesex info (e.g. "UCS4")
2214                 wxLogTrace(TRACE_STRCONV, wxT("  trying charset \"%s\""),
2215                            nameCS.c_str());
2216                 m2w = iconv_open(nameCS.ToAscii(), name);
2217
2218                 // and check for bytesex ourselves:
2219                 if ( m2w != ICONV_T_INVALID )
2220                 {
2221                     char    buf[2], *bufPtr;
2222                     wchar_t wbuf[2];
2223                     size_t  insz, outsz;
2224                     size_t  res;
2225
2226                     buf[0] = 'A';
2227                     buf[1] = 0;
2228                     wbuf[0] = 0;
2229                     insz = 2;
2230                     outsz = SIZEOF_WCHAR_T * 2;
2231                     char* wbufPtr = (char*)wbuf;
2232                     bufPtr = buf;
2233
2234                     res = iconv(
2235                         m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
2236                         &wbufPtr, &outsz);
2237
2238                     if (ICONV_FAILED(res, insz))
2239                     {
2240                         wxLogLastError(wxT("iconv"));
2241                         wxLogError(_("Conversion to charset '%s' doesn't work."),
2242                                    nameCS.c_str());
2243                     }
2244                     else // ok, can convert to this encoding, remember it
2245                     {
2246                         ms_wcCharsetName = nameCS;
2247                         ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
2248                     }
2249                 }
2250             }
2251             else // use charset not requiring byte swapping
2252             {
2253                 ms_wcCharsetName = nameXE;
2254             }
2255         }
2256
2257         wxLogTrace(TRACE_STRCONV,
2258                    wxT("iconv wchar_t charset is \"%s\"%s"),
2259                    ms_wcCharsetName.empty() ? wxString("<none>")
2260                                             : ms_wcCharsetName,
2261                    ms_wcNeedsSwap ? wxT(" (needs swap)")
2262                                   : wxT(""));
2263     }
2264     else // we already have ms_wcCharsetName
2265     {
2266         m2w = iconv_open(ms_wcCharsetName.ToAscii(), name);
2267     }
2268
2269     if ( ms_wcCharsetName.empty() )
2270     {
2271         w2m = ICONV_T_INVALID;
2272     }
2273     else
2274     {
2275         w2m = iconv_open(name, ms_wcCharsetName.ToAscii());
2276         if ( w2m == ICONV_T_INVALID )
2277         {
2278             wxLogTrace(TRACE_STRCONV,
2279                        wxT("\"%s\" -> \"%s\" works but not the converse!?"),
2280                        ms_wcCharsetName.c_str(), name);
2281         }
2282     }
2283 }
2284
2285 wxMBConv_iconv::~wxMBConv_iconv()
2286 {
2287     if ( m2w != ICONV_T_INVALID )
2288         iconv_close(m2w);
2289     if ( w2m != ICONV_T_INVALID )
2290         iconv_close(w2m);
2291 }
2292
2293 size_t
2294 wxMBConv_iconv::ToWChar(wchar_t *dst, size_t dstLen,
2295                         const char *src, size_t srcLen) const
2296 {
2297     if ( srcLen == wxNO_LEN )
2298     {
2299         // find the string length: notice that must be done differently for
2300         // NUL-terminated strings and UTF-16/32 which are terminated with 2/4
2301         // consecutive NULs
2302         const size_t nulLen = GetMBNulLen();
2303         switch ( nulLen )
2304         {
2305             default:
2306                 return wxCONV_FAILED;
2307
2308             case 1:
2309                 srcLen = strlen(src); // arguably more optimized than our version
2310                 break;
2311
2312             case 2:
2313             case 4:
2314                 // for UTF-16/32 not only we need to have 2/4 consecutive NULs
2315                 // but they also have to start at character boundary and not
2316                 // span two adjacent characters
2317                 const char *p;
2318                 for ( p = src; NotAllNULs(p, nulLen); p += nulLen )
2319                     ;
2320                 srcLen = p - src;
2321                 break;
2322         }
2323
2324         // when we're determining the length of the string ourselves we count
2325         // the terminating NUL(s) as part of it and always NUL-terminate the
2326         // output
2327         srcLen += nulLen;
2328     }
2329
2330     // we express length in the number of (wide) characters but iconv always
2331     // counts buffer sizes it in bytes
2332     dstLen *= SIZEOF_WCHAR_T;
2333
2334 #if wxUSE_THREADS
2335     // NB: iconv() is MT-safe, but each thread must use its own iconv_t handle.
2336     //     Unfortunately there are a couple of global wxCSConv objects such as
2337     //     wxConvLocal that are used all over wx code, so we have to make sure
2338     //     the handle is used by at most one thread at the time. Otherwise
2339     //     only a few wx classes would be safe to use from non-main threads
2340     //     as MB<->WC conversion would fail "randomly".
2341     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
2342 #endif // wxUSE_THREADS
2343
2344     size_t res, cres;
2345     const char *pszPtr = src;
2346
2347     if ( dst )
2348     {
2349         char* bufPtr = (char*)dst;
2350
2351         // have destination buffer, convert there
2352         size_t dstLenOrig = dstLen;
2353         cres = iconv(m2w,
2354                      ICONV_CHAR_CAST(&pszPtr), &srcLen,
2355                      &bufPtr, &dstLen);
2356
2357         // convert the number of bytes converted as returned by iconv to the
2358         // number of (wide) characters converted that we need
2359         res = (dstLenOrig - dstLen) / SIZEOF_WCHAR_T;
2360
2361         if (ms_wcNeedsSwap)
2362         {
2363             // convert to native endianness
2364             for ( unsigned i = 0; i < res; i++ )
2365                 dst[i] = WC_BSWAP(dst[i]);
2366         }
2367     }
2368     else // no destination buffer
2369     {
2370         // convert using temp buffer to calculate the size of the buffer needed
2371         wchar_t tbuf[256];
2372         res = 0;
2373
2374         do
2375         {
2376             char* bufPtr = (char*)tbuf;
2377             dstLen = 8 * SIZEOF_WCHAR_T;
2378
2379             cres = iconv(m2w,
2380                          ICONV_CHAR_CAST(&pszPtr), &srcLen,
2381                          &bufPtr, &dstLen );
2382
2383             res += 8 - (dstLen / SIZEOF_WCHAR_T);
2384         }
2385         while ((cres == (size_t)-1) && (errno == E2BIG));
2386     }
2387
2388     if (ICONV_FAILED(cres, srcLen))
2389     {
2390         //VS: it is ok if iconv fails, hence trace only
2391         wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
2392         return wxCONV_FAILED;
2393     }
2394
2395     return res;
2396 }
2397
2398 size_t wxMBConv_iconv::FromWChar(char *dst, size_t dstLen,
2399                                  const wchar_t *src, size_t srcLen) const
2400 {
2401 #if wxUSE_THREADS
2402     // NB: explained in MB2WC
2403     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
2404 #endif
2405
2406     if ( srcLen == wxNO_LEN )
2407         srcLen = wxWcslen(src) + 1;
2408
2409     size_t inbuflen = srcLen * SIZEOF_WCHAR_T;
2410     size_t outbuflen = dstLen;
2411     size_t res, cres;
2412
2413     wchar_t *tmpbuf = 0;
2414
2415     if (ms_wcNeedsSwap)
2416     {
2417         // need to copy to temp buffer to switch endianness
2418         // (doing WC_BSWAP twice on the original buffer won't work, as it
2419         //  could be in read-only memory, or be accessed in some other thread)
2420         tmpbuf = (wchar_t *)malloc(inbuflen);
2421         for ( size_t i = 0; i < srcLen; i++ )
2422             tmpbuf[i] = WC_BSWAP(src[i]);
2423
2424         src = tmpbuf;
2425     }
2426
2427     char* inbuf = (char*)src;
2428     if ( dst )
2429     {
2430         // have destination buffer, convert there
2431         cres = iconv(w2m, ICONV_CHAR_CAST(&inbuf), &inbuflen, &dst, &outbuflen);
2432
2433         res = dstLen - outbuflen;
2434     }
2435     else // no destination buffer
2436     {
2437         // convert using temp buffer to calculate the size of the buffer needed
2438         char tbuf[256];
2439         res = 0;
2440         do
2441         {
2442             dst = tbuf;
2443             outbuflen = WXSIZEOF(tbuf);
2444
2445             cres = iconv(w2m, ICONV_CHAR_CAST(&inbuf), &inbuflen, &dst, &outbuflen);
2446
2447             res += WXSIZEOF(tbuf) - outbuflen;
2448         }
2449         while ((cres == (size_t)-1) && (errno == E2BIG));
2450     }
2451
2452     if (ms_wcNeedsSwap)
2453     {
2454         free(tmpbuf);
2455     }
2456
2457     if (ICONV_FAILED(cres, inbuflen))
2458     {
2459         wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
2460         return wxCONV_FAILED;
2461     }
2462
2463     return res;
2464 }
2465
2466 size_t wxMBConv_iconv::GetMBNulLen() const
2467 {
2468     if ( m_minMBCharWidth == 0 )
2469     {
2470         wxMBConv_iconv * const self = wxConstCast(this, wxMBConv_iconv);
2471
2472 #if wxUSE_THREADS
2473         // NB: explained in MB2WC
2474         wxMutexLocker lock(self->m_iconvMutex);
2475 #endif
2476
2477         const wchar_t *wnul = L"";
2478         char buf[8]; // should be enough for NUL in any encoding
2479         size_t inLen = sizeof(wchar_t),
2480                outLen = WXSIZEOF(buf);
2481         char *inBuff = (char *)wnul;
2482         char *outBuff = buf;
2483         if ( iconv(w2m, ICONV_CHAR_CAST(&inBuff), &inLen, &outBuff, &outLen) == (size_t)-1 )
2484         {
2485             self->m_minMBCharWidth = (size_t)-1;
2486         }
2487         else // ok
2488         {
2489             self->m_minMBCharWidth = outBuff - buf;
2490         }
2491     }
2492
2493     return m_minMBCharWidth;
2494 }
2495
2496 #if wxUSE_UNICODE_UTF8
2497 bool wxMBConv_iconv::IsUTF8() const
2498 {
2499     return wxStricmp(m_name, "UTF-8") == 0 ||
2500            wxStricmp(m_name, "UTF8") == 0;
2501 }
2502 #endif
2503
2504 #endif // HAVE_ICONV
2505
2506
2507 // ============================================================================
2508 // Win32 conversion classes
2509 // ============================================================================
2510
2511 #ifdef wxHAVE_WIN32_MB2WC
2512
2513 // from utils.cpp
2514 #if wxUSE_FONTMAP
2515 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const char *charset);
2516 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
2517 #endif
2518
2519 class wxMBConv_win32 : public wxMBConv
2520 {
2521 public:
2522     wxMBConv_win32()
2523     {
2524         m_CodePage = CP_ACP;
2525         m_minMBCharWidth = 0;
2526     }
2527
2528     wxMBConv_win32(const wxMBConv_win32& conv)
2529         : wxMBConv()
2530     {
2531         m_CodePage = conv.m_CodePage;
2532         m_minMBCharWidth = conv.m_minMBCharWidth;
2533     }
2534
2535 #if wxUSE_FONTMAP
2536     wxMBConv_win32(const char* name)
2537     {
2538         m_CodePage = wxCharsetToCodepage(name);
2539         m_minMBCharWidth = 0;
2540     }
2541
2542     wxMBConv_win32(wxFontEncoding encoding)
2543     {
2544         m_CodePage = wxEncodingToCodepage(encoding);
2545         m_minMBCharWidth = 0;
2546     }
2547 #endif // wxUSE_FONTMAP
2548
2549     virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2550     {
2551         // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
2552         // the behaviour is not compatible with the Unix version (using iconv)
2553         // and break the library itself, e.g. wxTextInputStream::NextChar()
2554         // wouldn't work if reading an incomplete MB char didn't result in an
2555         // error
2556         //
2557         // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or
2558         // Win XP or newer and it is not supported for UTF-[78] so we always
2559         // use our own conversions in this case. See
2560         //     http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx
2561         //     http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp
2562         if ( m_CodePage == CP_UTF8 )
2563         {
2564             return wxMBConvUTF8().MB2WC(buf, psz, n);
2565         }
2566
2567         if ( m_CodePage == CP_UTF7 )
2568         {
2569             return wxMBConvUTF7().MB2WC(buf, psz, n);
2570         }
2571
2572         int flags = 0;
2573         if ( (m_CodePage < 50000 && m_CodePage != CP_SYMBOL) &&
2574                 IsAtLeastWin2kSP4() )
2575         {
2576             flags = MB_ERR_INVALID_CHARS;
2577         }
2578
2579         const size_t len = ::MultiByteToWideChar
2580                              (
2581                                 m_CodePage,     // code page
2582                                 flags,          // flags: fall on error
2583                                 psz,            // input string
2584                                 -1,             // its length (NUL-terminated)
2585                                 buf,            // output string
2586                                 buf ? n : 0     // size of output buffer
2587                              );
2588         if ( !len )
2589         {
2590             // function totally failed
2591             return wxCONV_FAILED;
2592         }
2593
2594         // if we were really converting and didn't use MB_ERR_INVALID_CHARS,
2595         // check if we succeeded, by doing a double trip:
2596         if ( !flags && buf )
2597         {
2598             const size_t mbLen = strlen(psz);
2599             wxCharBuffer mbBuf(mbLen);
2600             if ( ::WideCharToMultiByte
2601                    (
2602                       m_CodePage,
2603                       0,
2604                       buf,
2605                       -1,
2606                       mbBuf.data(),
2607                       mbLen + 1,        // size in bytes, not length
2608                       NULL,
2609                       NULL
2610                    ) == 0 ||
2611                   strcmp(mbBuf, psz) != 0 )
2612             {
2613                 // we didn't obtain the same thing we started from, hence
2614                 // the conversion was lossy and we consider that it failed
2615                 return wxCONV_FAILED;
2616             }
2617         }
2618
2619         // note that it returns count of written chars for buf != NULL and size
2620         // of the needed buffer for buf == NULL so in either case the length of
2621         // the string (which never includes the terminating NUL) is one less
2622         return len - 1;
2623     }
2624
2625     virtual size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
2626     {
2627         /*
2628             we have a problem here: by default, WideCharToMultiByte() may
2629             replace characters unrepresentable in the target code page with bad
2630             quality approximations such as turning "1/2" symbol (U+00BD) into
2631             "1" for the code pages which don't have it and we, obviously, want
2632             to avoid this at any price
2633
2634             the trouble is that this function does it _silently_, i.e. it won't
2635             even tell us whether it did or not... Win98/2000 and higher provide
2636             WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
2637             we have to resort to a round trip, i.e. check that converting back
2638             results in the same string -- this is, of course, expensive but
2639             otherwise we simply can't be sure to not garble the data.
2640          */
2641
2642         // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
2643         // it doesn't work with CJK encodings (which we test for rather roughly
2644         // here...) nor with UTF-7/8 nor, of course, with Windows versions not
2645         // supporting it
2646         BOOL usedDef wxDUMMY_INITIALIZE(false);
2647         BOOL *pUsedDef;
2648         int flags;
2649         if ( CanUseNoBestFit() && m_CodePage < 50000 )
2650         {
2651             // it's our lucky day
2652             flags = WC_NO_BEST_FIT_CHARS;
2653             pUsedDef = &usedDef;
2654         }
2655         else // old system or unsupported encoding
2656         {
2657             flags = 0;
2658             pUsedDef = NULL;
2659         }
2660
2661         const size_t len = ::WideCharToMultiByte
2662                              (
2663                                 m_CodePage,     // code page
2664                                 flags,          // either none or no best fit
2665                                 pwz,            // input string
2666                                 -1,             // it is (wide) NUL-terminated
2667                                 buf,            // output buffer
2668                                 buf ? n : 0,    // and its size
2669                                 NULL,           // default "replacement" char
2670                                 pUsedDef        // [out] was it used?
2671                              );
2672
2673         if ( !len )
2674         {
2675             // function totally failed
2676             return wxCONV_FAILED;
2677         }
2678
2679         // we did something, check if we really succeeded
2680         if ( flags )
2681         {
2682             // check if the conversion failed, i.e. if any replacements
2683             // were done
2684             if ( usedDef )
2685                 return wxCONV_FAILED;
2686         }
2687         else // we must resort to double tripping...
2688         {
2689             // first we need to ensure that we really have the MB data: this is
2690             // not the case if we're called with NULL buffer, in which case we
2691             // need to do the conversion yet again
2692             wxCharBuffer bufDef;
2693             if ( !buf )
2694             {
2695                 bufDef = wxCharBuffer(len);
2696                 buf = bufDef.data();
2697                 if ( !::WideCharToMultiByte(m_CodePage, flags, pwz, -1,
2698                                             buf, len, NULL, NULL) )
2699                     return wxCONV_FAILED;
2700             }
2701
2702             if ( !n )
2703                 n = wcslen(pwz);
2704             wxWCharBuffer wcBuf(n);
2705             if ( MB2WC(wcBuf.data(), buf, n + 1) == wxCONV_FAILED ||
2706                     wcscmp(wcBuf, pwz) != 0 )
2707             {
2708                 // we didn't obtain the same thing we started from, hence
2709                 // the conversion was lossy and we consider that it failed
2710                 return wxCONV_FAILED;
2711             }
2712         }
2713
2714         // see the comment above for the reason of "len - 1"
2715         return len - 1;
2716     }
2717
2718     virtual size_t GetMBNulLen() const
2719     {
2720         if ( m_minMBCharWidth == 0 )
2721         {
2722             int len = ::WideCharToMultiByte
2723                         (
2724                             m_CodePage,     // code page
2725                             0,              // no flags
2726                             L"",            // input string
2727                             1,              // translate just the NUL
2728                             NULL,           // output buffer
2729                             0,              // and its size
2730                             NULL,           // no replacement char
2731                             NULL            // [out] don't care if it was used
2732                         );
2733
2734             wxMBConv_win32 * const self = wxConstCast(this, wxMBConv_win32);
2735             switch ( len )
2736             {
2737                 default:
2738                     wxLogDebug(wxT("Unexpected NUL length %d"), len);
2739                     self->m_minMBCharWidth = (size_t)-1;
2740                     break;
2741
2742                 case 0:
2743                     self->m_minMBCharWidth = (size_t)-1;
2744                     break;
2745
2746                 case 1:
2747                 case 2:
2748                 case 4:
2749                     self->m_minMBCharWidth = len;
2750                     break;
2751             }
2752         }
2753
2754         return m_minMBCharWidth;
2755     }
2756
2757     virtual wxMBConv *Clone() const { return new wxMBConv_win32(*this); }
2758
2759     bool IsOk() const { return m_CodePage != -1; }
2760
2761 private:
2762     static bool CanUseNoBestFit()
2763     {
2764         static int s_isWin98Or2k = -1;
2765
2766         if ( s_isWin98Or2k == -1 )
2767         {
2768             int verMaj, verMin;
2769             switch ( wxGetOsVersion(&verMaj, &verMin) )
2770             {
2771                 case wxOS_WINDOWS_9X:
2772                     s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
2773                     break;
2774
2775                 case wxOS_WINDOWS_NT:
2776                     s_isWin98Or2k = verMaj >= 5;
2777                     break;
2778
2779                 default:
2780                     // unknown: be conservative by default
2781                     s_isWin98Or2k = 0;
2782                     break;
2783             }
2784
2785             wxASSERT_MSG( s_isWin98Or2k != -1, wxT("should be set above") );
2786         }
2787
2788         return s_isWin98Or2k == 1;
2789     }
2790
2791     static bool IsAtLeastWin2kSP4()
2792     {
2793 #ifdef __WXWINCE__
2794         return false;
2795 #else
2796         static int s_isAtLeastWin2kSP4 = -1;
2797
2798         if ( s_isAtLeastWin2kSP4 == -1 )
2799         {
2800             OSVERSIONINFOEX ver;
2801
2802             memset(&ver, 0, sizeof(ver));
2803             ver.dwOSVersionInfoSize = sizeof(ver);
2804             GetVersionEx((OSVERSIONINFO*)&ver);
2805
2806             s_isAtLeastWin2kSP4 =
2807               ((ver.dwMajorVersion > 5) || // Vista+
2808                (ver.dwMajorVersion == 5 && ver.dwMinorVersion > 0) || // XP/2003
2809                (ver.dwMajorVersion == 5 && ver.dwMinorVersion == 0 &&
2810                ver.wServicePackMajor >= 4)) // 2000 SP4+
2811               ? 1 : 0;
2812         }
2813
2814         return s_isAtLeastWin2kSP4 == 1;
2815 #endif
2816     }
2817
2818
2819     // the code page we're working with
2820     long m_CodePage;
2821
2822     // cached result of GetMBNulLen(), set to 0 initially meaning
2823     // "unknown"
2824     size_t m_minMBCharWidth;
2825 };
2826
2827 #endif // wxHAVE_WIN32_MB2WC
2828
2829
2830 // ============================================================================
2831 // wxEncodingConverter based conversion classes
2832 // ============================================================================
2833
2834 #if wxUSE_FONTMAP
2835
2836 class wxMBConv_wxwin : public wxMBConv
2837 {
2838 private:
2839     void Init()
2840     {
2841         // Refuse to use broken wxEncodingConverter code for Mac-specific encodings.
2842         // The wxMBConv_cf class does a better job.
2843         m_ok = (m_enc < wxFONTENCODING_MACMIN || m_enc > wxFONTENCODING_MACMAX) &&
2844                m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2845                w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2846     }
2847
2848 public:
2849     // temporarily just use wxEncodingConverter stuff,
2850     // so that it works while a better implementation is built
2851     wxMBConv_wxwin(const char* name)
2852     {
2853         if (name)
2854             m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2855         else
2856             m_enc = wxFONTENCODING_SYSTEM;
2857
2858         Init();
2859     }
2860
2861     wxMBConv_wxwin(wxFontEncoding enc)
2862     {
2863         m_enc = enc;
2864
2865         Init();
2866     }
2867
2868     size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
2869     {
2870         size_t inbuf = strlen(psz);
2871         if (buf)
2872         {
2873             if (!m2w.Convert(psz, buf))
2874                 return wxCONV_FAILED;
2875         }
2876         return inbuf;
2877     }
2878
2879     size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
2880     {
2881         const size_t inbuf = wxWcslen(psz);
2882         if (buf)
2883         {
2884             if (!w2m.Convert(psz, buf))
2885                 return wxCONV_FAILED;
2886         }
2887
2888         return inbuf;
2889     }
2890
2891     virtual size_t GetMBNulLen() const
2892     {
2893         switch ( m_enc )
2894         {
2895             case wxFONTENCODING_UTF16BE:
2896             case wxFONTENCODING_UTF16LE:
2897                 return 2;
2898
2899             case wxFONTENCODING_UTF32BE:
2900             case wxFONTENCODING_UTF32LE:
2901                 return 4;
2902
2903             default:
2904                 return 1;
2905         }
2906     }
2907
2908     virtual wxMBConv *Clone() const { return new wxMBConv_wxwin(m_enc); }
2909
2910     bool IsOk() const { return m_ok; }
2911
2912 public:
2913     wxFontEncoding m_enc;
2914     wxEncodingConverter m2w, w2m;
2915
2916 private:
2917     // were we initialized successfully?
2918     bool m_ok;
2919
2920     wxDECLARE_NO_COPY_CLASS(wxMBConv_wxwin);
2921 };
2922
2923 // make the constructors available for unit testing
2924 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_wxwin( const char* name )
2925 {
2926     wxMBConv_wxwin* result = new wxMBConv_wxwin( name );
2927     if ( !result->IsOk() )
2928     {
2929         delete result;
2930         return 0;
2931     }
2932
2933     return result;
2934 }
2935
2936 #endif // wxUSE_FONTMAP
2937
2938 // ============================================================================
2939 // wxCSConv implementation
2940 // ============================================================================
2941
2942 void wxCSConv::Init()
2943 {
2944     m_name = NULL;
2945     m_convReal =  NULL;
2946     m_deferred = true;
2947 }
2948
2949 wxCSConv::wxCSConv(const wxString& charset)
2950 {
2951     Init();
2952
2953     if ( !charset.empty() )
2954     {
2955         SetName(charset.ToAscii());
2956     }
2957
2958 #if wxUSE_FONTMAP
2959     m_encoding = wxFontMapperBase::GetEncodingFromName(charset);
2960     if ( m_encoding == wxFONTENCODING_MAX )
2961     {
2962         // set to unknown/invalid value
2963         m_encoding = wxFONTENCODING_SYSTEM;
2964     }
2965     else if ( m_encoding == wxFONTENCODING_DEFAULT )
2966     {
2967         // wxFONTENCODING_DEFAULT is same as US-ASCII in this context
2968         m_encoding = wxFONTENCODING_ISO8859_1;
2969     }
2970 #else
2971     m_encoding = wxFONTENCODING_SYSTEM;
2972 #endif
2973 }
2974
2975 wxCSConv::wxCSConv(wxFontEncoding encoding)
2976 {
2977     if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
2978     {
2979         wxFAIL_MSG( wxT("invalid encoding value in wxCSConv ctor") );
2980
2981         encoding = wxFONTENCODING_SYSTEM;
2982     }
2983
2984     Init();
2985
2986     m_encoding = encoding;
2987 }
2988
2989 wxCSConv::~wxCSConv()
2990 {
2991     Clear();
2992 }
2993
2994 wxCSConv::wxCSConv(const wxCSConv& conv)
2995         : wxMBConv()
2996 {
2997     Init();
2998
2999     SetName(conv.m_name);
3000     m_encoding = conv.m_encoding;
3001 }
3002
3003 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
3004 {
3005     Clear();
3006
3007     SetName(conv.m_name);
3008     m_encoding = conv.m_encoding;
3009
3010     return *this;
3011 }
3012
3013 void wxCSConv::Clear()
3014 {
3015     free(m_name);
3016     wxDELETE(m_convReal);
3017
3018     m_name = NULL;
3019 }
3020
3021 void wxCSConv::SetName(const char *charset)
3022 {
3023     if (charset)
3024     {
3025         m_name = wxStrdup(charset);
3026         m_deferred = true;
3027     }
3028 }
3029
3030 #if wxUSE_FONTMAP
3031
3032 WX_DECLARE_HASH_MAP( wxFontEncoding, wxString, wxIntegerHash, wxIntegerEqual,
3033                      wxEncodingNameCache );
3034
3035 static wxEncodingNameCache gs_nameCache;
3036 #endif
3037
3038 wxMBConv *wxCSConv::DoCreate() const
3039 {
3040 #if wxUSE_FONTMAP
3041     wxLogTrace(TRACE_STRCONV,
3042                wxT("creating conversion for %s"),
3043                (m_name ? m_name
3044                        : (const char*)wxFontMapperBase::GetEncodingName(m_encoding).mb_str()));
3045 #endif // wxUSE_FONTMAP
3046
3047     // check for the special case of ASCII or ISO8859-1 charset: as we have
3048     // special knowledge of it anyhow, we don't need to create a special
3049     // conversion object
3050     if ( m_encoding == wxFONTENCODING_ISO8859_1 ||
3051             m_encoding == wxFONTENCODING_DEFAULT )
3052     {
3053         // don't convert at all
3054         return NULL;
3055     }
3056
3057     // we trust OS to do conversion better than we can so try external
3058     // conversion methods first
3059     //
3060     // the full order is:
3061     //      1. OS conversion (iconv() under Unix or Win32 API)
3062     //      2. hard coded conversions for UTF
3063     //      3. wxEncodingConverter as fall back
3064
3065     // step (1)
3066 #ifdef HAVE_ICONV
3067 #if !wxUSE_FONTMAP
3068     if ( m_name )
3069 #endif // !wxUSE_FONTMAP
3070     {
3071 #if wxUSE_FONTMAP
3072         wxFontEncoding encoding(m_encoding);
3073 #endif
3074
3075         if ( m_name )
3076         {
3077             wxMBConv_iconv *conv = new wxMBConv_iconv(m_name);
3078             if ( conv->IsOk() )
3079                 return conv;
3080
3081             delete conv;
3082
3083 #if wxUSE_FONTMAP
3084             encoding =
3085                 wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
3086 #endif // wxUSE_FONTMAP
3087         }
3088 #if wxUSE_FONTMAP
3089         {
3090             const wxEncodingNameCache::iterator it = gs_nameCache.find(encoding);
3091             if ( it != gs_nameCache.end() )
3092             {
3093                 if ( it->second.empty() )
3094                     return NULL;
3095
3096                 wxMBConv_iconv *conv = new wxMBConv_iconv(it->second.ToAscii());
3097                 if ( conv->IsOk() )
3098                     return conv;
3099
3100                 delete conv;
3101             }
3102
3103             const wxChar* const* names = wxFontMapperBase::GetAllEncodingNames(encoding);
3104             // CS : in case this does not return valid names (eg for MacRoman)
3105             // encoding got a 'failure' entry in the cache all the same,
3106             // although it just has to be created using a different method, so
3107             // only store failed iconv creation attempts (or perhaps we
3108             // shoulnd't do this at all ?)
3109             if ( names[0] != NULL )
3110             {
3111                 for ( ; *names; ++names )
3112                 {
3113                     // FIXME-UTF8: wxFontMapperBase::GetAllEncodingNames()
3114                     //             will need changes that will obsolete this
3115                     wxString name(*names);
3116                     wxMBConv_iconv *conv = new wxMBConv_iconv(name.ToAscii());
3117                     if ( conv->IsOk() )
3118                     {
3119                         gs_nameCache[encoding] = *names;
3120                         return conv;
3121                     }
3122
3123                     delete conv;
3124                 }
3125
3126                 gs_nameCache[encoding] = wxT(""); // cache the failure
3127             }
3128         }
3129 #endif // wxUSE_FONTMAP
3130     }
3131 #endif // HAVE_ICONV
3132
3133 #ifdef wxHAVE_WIN32_MB2WC
3134     {
3135 #if wxUSE_FONTMAP
3136         wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
3137                                       : new wxMBConv_win32(m_encoding);
3138         if ( conv->IsOk() )
3139             return conv;
3140
3141         delete conv;
3142 #else
3143         return NULL;
3144 #endif
3145     }
3146 #endif // wxHAVE_WIN32_MB2WC
3147
3148 #ifdef __DARWIN__
3149     {
3150         // leave UTF16 and UTF32 to the built-ins of wx
3151         if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
3152             ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
3153         {
3154 #if wxUSE_FONTMAP
3155             wxMBConv_cf *conv = m_name ? new wxMBConv_cf(m_name)
3156                                           : new wxMBConv_cf(m_encoding);
3157 #else
3158             wxMBConv_cf *conv = new wxMBConv_cf(m_encoding);
3159 #endif
3160
3161             if ( conv->IsOk() )
3162                  return conv;
3163
3164             delete conv;
3165         }
3166     }
3167 #endif // __DARWIN__
3168
3169     // step (2)
3170     wxFontEncoding enc = m_encoding;
3171 #if wxUSE_FONTMAP
3172     if ( enc == wxFONTENCODING_SYSTEM && m_name )
3173     {
3174         // use "false" to suppress interactive dialogs -- we can be called from
3175         // anywhere and popping up a dialog from here is the last thing we want to
3176         // do
3177         enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
3178     }
3179 #endif // wxUSE_FONTMAP
3180
3181     switch ( enc )
3182     {
3183         case wxFONTENCODING_UTF7:
3184              return new wxMBConvUTF7;
3185
3186         case wxFONTENCODING_UTF8:
3187              return new wxMBConvUTF8;
3188
3189         case wxFONTENCODING_UTF16BE:
3190              return new wxMBConvUTF16BE;
3191
3192         case wxFONTENCODING_UTF16LE:
3193              return new wxMBConvUTF16LE;
3194
3195         case wxFONTENCODING_UTF32BE:
3196              return new wxMBConvUTF32BE;
3197
3198         case wxFONTENCODING_UTF32LE:
3199              return new wxMBConvUTF32LE;
3200
3201         default:
3202              // nothing to do but put here to suppress gcc warnings
3203              break;
3204     }
3205
3206     // step (3)
3207 #if wxUSE_FONTMAP
3208     {
3209         wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
3210                                       : new wxMBConv_wxwin(m_encoding);
3211         if ( conv->IsOk() )
3212             return conv;
3213
3214         delete conv;
3215     }
3216
3217     wxLogTrace(TRACE_STRCONV,
3218                wxT("encoding \"%s\" is not supported by this system"),
3219                (m_name ? wxString(m_name)
3220                        : wxFontMapperBase::GetEncodingName(m_encoding)));
3221 #endif // wxUSE_FONTMAP
3222
3223     return NULL;
3224 }
3225
3226 void wxCSConv::CreateConvIfNeeded() const
3227 {
3228     if ( m_deferred )
3229     {
3230         wxCSConv *self = (wxCSConv *)this; // const_cast
3231
3232         // if we don't have neither the name nor the encoding, use the default
3233         // encoding for this system
3234         if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
3235         {
3236 #if wxUSE_INTL
3237             self->m_encoding = wxLocale::GetSystemEncoding();
3238 #else
3239             // fallback to some reasonable default:
3240             self->m_encoding = wxFONTENCODING_ISO8859_1;
3241 #endif // wxUSE_INTL
3242         }
3243
3244         self->m_convReal = DoCreate();
3245         self->m_deferred = false;
3246     }
3247 }
3248
3249 bool wxCSConv::IsOk() const
3250 {
3251     CreateConvIfNeeded();
3252
3253     // special case: no convReal created for wxFONTENCODING_ISO8859_1
3254     if ( m_encoding == wxFONTENCODING_ISO8859_1 )
3255         return true; // always ok as we do it ourselves
3256
3257     // m_convReal->IsOk() is called at its own creation, so we know it must
3258     // be ok if m_convReal is non-NULL
3259     return m_convReal != NULL;
3260 }
3261
3262 size_t wxCSConv::ToWChar(wchar_t *dst, size_t dstLen,
3263                          const char *src, size_t srcLen) const
3264 {
3265     CreateConvIfNeeded();
3266
3267     if (m_convReal)
3268         return m_convReal->ToWChar(dst, dstLen, src, srcLen);
3269
3270     // latin-1 (direct)
3271     if ( srcLen == wxNO_LEN )
3272         srcLen = strlen(src) + 1; // take trailing NUL too
3273
3274     if ( dst )
3275     {
3276         if ( dstLen < srcLen )
3277             return wxCONV_FAILED;
3278
3279         for ( size_t n = 0; n < srcLen; n++ )
3280             dst[n] = (unsigned char)(src[n]);
3281     }
3282
3283     return srcLen;
3284 }
3285
3286 size_t wxCSConv::FromWChar(char *dst, size_t dstLen,
3287                            const wchar_t *src, size_t srcLen) const
3288 {
3289     CreateConvIfNeeded();
3290
3291     if (m_convReal)
3292         return m_convReal->FromWChar(dst, dstLen, src, srcLen);
3293
3294     // latin-1 (direct)
3295     if ( srcLen == wxNO_LEN )
3296         srcLen = wxWcslen(src) + 1;
3297
3298     if ( dst )
3299     {
3300         if ( dstLen < srcLen )
3301             return wxCONV_FAILED;
3302
3303         for ( size_t n = 0; n < srcLen; n++ )
3304         {
3305             if ( src[n] > 0xFF )
3306                 return wxCONV_FAILED;
3307
3308             dst[n] = (char)src[n];
3309         }
3310
3311     }
3312     else // still need to check the input validity
3313     {
3314         for ( size_t n = 0; n < srcLen; n++ )
3315         {
3316             if ( src[n] > 0xFF )
3317                 return wxCONV_FAILED;
3318         }
3319     }
3320
3321     return srcLen;
3322 }
3323
3324 size_t wxCSConv::GetMBNulLen() const
3325 {
3326     CreateConvIfNeeded();
3327
3328     if ( m_convReal )
3329     {
3330         return m_convReal->GetMBNulLen();
3331     }
3332
3333     // otherwise, we are ISO-8859-1
3334     return 1;
3335 }
3336
3337 #if wxUSE_UNICODE_UTF8
3338 bool wxCSConv::IsUTF8() const
3339 {
3340     CreateConvIfNeeded();
3341
3342     if ( m_convReal )
3343     {
3344         return m_convReal->IsUTF8();
3345     }
3346
3347     // otherwise, we are ISO-8859-1
3348     return false;
3349 }
3350 #endif
3351
3352
3353 #if wxUSE_UNICODE
3354
3355 wxWCharBuffer wxSafeConvertMB2WX(const char *s)
3356 {
3357     if ( !s )
3358         return wxWCharBuffer();
3359
3360     wxWCharBuffer wbuf(wxConvLibc.cMB2WX(s));
3361     if ( !wbuf )
3362         wbuf = wxMBConvUTF8().cMB2WX(s);
3363     if ( !wbuf )
3364         wbuf = wxConvISO8859_1.cMB2WX(s);
3365
3366     return wbuf;
3367 }
3368
3369 wxCharBuffer wxSafeConvertWX2MB(const wchar_t *ws)
3370 {
3371     if ( !ws )
3372         return wxCharBuffer();
3373
3374     wxCharBuffer buf(wxConvLibc.cWX2MB(ws));
3375     if ( !buf )
3376         buf = wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL).cWX2MB(ws);
3377
3378     return buf;
3379 }
3380
3381 #endif // wxUSE_UNICODE
3382
3383 // ----------------------------------------------------------------------------
3384 // globals
3385 // ----------------------------------------------------------------------------
3386
3387 // NB: The reason why we create converted objects in this convoluted way,
3388 //     using a factory function instead of global variable, is that they
3389 //     may be used at static initialization time (some of them are used by
3390 //     wxString ctors and there may be a global wxString object). In other
3391 //     words, possibly _before_ the converter global object would be
3392 //     initialized.
3393
3394 #undef wxConvLibc
3395 #undef wxConvUTF8
3396 #undef wxConvUTF7
3397 #undef wxConvLocal
3398 #undef wxConvISO8859_1
3399
3400 #define WX_DEFINE_GLOBAL_CONV2(klass, impl_klass, name, ctor_args)      \
3401     WXDLLIMPEXP_DATA_BASE(klass*) name##Ptr = NULL;                     \
3402     WXDLLIMPEXP_BASE klass* wxGet_##name##Ptr()                         \
3403     {                                                                   \
3404         static impl_klass name##Obj ctor_args;                          \
3405         return &name##Obj;                                              \
3406     }                                                                   \
3407     /* this ensures that all global converter objects are created */    \
3408     /* by the time static initialization is done, i.e. before any */    \
3409     /* thread is launched: */                                           \
3410     static klass* gs_##name##instance = wxGet_##name##Ptr()
3411
3412 #define WX_DEFINE_GLOBAL_CONV(klass, name, ctor_args) \
3413     WX_DEFINE_GLOBAL_CONV2(klass, klass, name, ctor_args)
3414
3415 #ifdef __INTELC__
3416     // disable warning "variable 'xxx' was declared but never referenced"
3417     #pragma warning(disable: 177)
3418 #endif // Intel C++
3419
3420 #ifdef __WINDOWS__
3421     WX_DEFINE_GLOBAL_CONV2(wxMBConv, wxMBConv_win32, wxConvLibc, wxEMPTY_PARAMETER_VALUE);
3422 #elif 0 // defined(__WXOSX__)
3423     WX_DEFINE_GLOBAL_CONV2(wxMBConv, wxMBConv_cf, wxConvLibc,  (wxFONTENCODING_UTF8));
3424 #else
3425     WX_DEFINE_GLOBAL_CONV2(wxMBConv, wxMBConvLibc, wxConvLibc, wxEMPTY_PARAMETER_VALUE);
3426 #endif
3427
3428 // NB: we can't use wxEMPTY_PARAMETER_VALUE as final argument here because it's
3429 //     passed to WX_DEFINE_GLOBAL_CONV2 after a macro expansion and so still
3430 //     provokes an error message about "not enough macro parameters"; and we
3431 //     can't use "()" here as the name##Obj declaration would be parsed as a
3432 //     function declaration then, so use a semicolon and live with an extra
3433 //     empty statement (and hope that no compilers warns about this)
3434 WX_DEFINE_GLOBAL_CONV(wxMBConvStrictUTF8, wxConvUTF8, ;);
3435 WX_DEFINE_GLOBAL_CONV(wxMBConvUTF7, wxConvUTF7, ;);
3436
3437 WX_DEFINE_GLOBAL_CONV(wxCSConv, wxConvLocal, (wxFONTENCODING_SYSTEM));
3438 WX_DEFINE_GLOBAL_CONV(wxCSConv, wxConvISO8859_1, (wxFONTENCODING_ISO8859_1));
3439
3440 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = wxGet_wxConvLibcPtr();
3441 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvUI = wxGet_wxConvLocalPtr();
3442
3443 #ifdef __DARWIN__
3444 // The xnu kernel always communicates file paths in decomposed UTF-8.
3445 // WARNING: Are we sure that CFString's conversion will cause decomposition?
3446 static wxMBConv_cf wxConvMacUTF8DObj(wxFONTENCODING_UTF8);
3447 #endif
3448
3449 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName =
3450 #ifdef __DARWIN__
3451                                     &wxConvMacUTF8DObj;
3452 #else // !__DARWIN__
3453                                     wxGet_wxConvLibcPtr();
3454 #endif // __DARWIN__/!__DARWIN__