src/common/strconv.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/common/strconv.cpp
   3 // Purpose:     Unicode conversion classes
   4 // Author:      Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
   5 //              Ryan Norton, Fredrik Roubert (UTF7)
   6 // Modified by:
   7 // Created:     29/01/98
   8 // RCS-ID:      $Id$
   9 // Copyright:   (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
  10 //              (c) 2000-2003 Vadim Zeitlin
  11 //              (c) 2004 Ryan Norton, Fredrik Roubert
  12 // Licence:     wxWindows licence
  13 /////////////////////////////////////////////////////////////////////////////
  14
  15 // For compilers that support precompilation, includes "wx.h".
  16 #include "wx/wxprec.h"
  17
  18 #ifdef __BORLANDC__
  19     #pragma hdrstop
  20 #endif  //__BORLANDC__
  21
  22 #ifndef WX_PRECOMP
  23     #include "wx/intl.h"
  24     #include "wx/log.h"
  25     #include "wx/utils.h"
  26     #include "wx/hashmap.h"
  27 #endif
  28
  29 #include "wx/strconv.h"
  30
  31 #if wxUSE_WCHAR_T
  32
  33 #ifndef __WXWINCE__
  34 #include <errno.h>
  35 #endif
  36
  37 #include <ctype.h>
  38 #include <string.h>
  39 #include <stdlib.h>
  40
  41 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
  42     #include "wx/msw/private.h"
  43     #include "wx/msw/missing.h"
  44     #define wxHAVE_WIN32_MB2WC
  45 #endif
  46
  47 #ifdef HAVE_ICONV
  48     #include <iconv.h>
  49     #include "wx/thread.h"
  50 #endif
  51
  52 #include "wx/encconv.h"
  53 #include "wx/fontmap.h"
  54
  55 #ifdef __DARWIN__
  56 #include "wx/osx/core/private/strconv_cf.h"
  57 #endif //def __DARWIN__
  58
  59
  60 #define TRACE_STRCONV wxT("strconv")
  61
  62 // WC_UTF16 is defined only if sizeof(wchar_t) == 2, otherwise it's supposed to
  63 // be 4 bytes
  64 #if SIZEOF_WCHAR_T == 2
  65     #define WC_UTF16
  66 #endif
  67
  68
  69 // ============================================================================
  70 // implementation
  71 // ============================================================================
  72
  73 // helper function of cMB2WC(): check if n bytes at this location are all NUL
  74 static bool NotAllNULs(const char *p, size_t n)
  75 {
  76     while ( n && *p++ == '\0' )
  77         n--;
  78
  79     return n != 0;
  80 }
  81
  82 // ----------------------------------------------------------------------------
  83 // UTF-16 en/decoding to/from UCS-4 with surrogates handling
  84 // ----------------------------------------------------------------------------
  85
  86 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
  87 {
  88     if (input <= 0xffff)
  89     {
  90         if (output)
  91             *output = (wxUint16) input;
  92
  93         return 1;
  94     }
  95     else if (input >= 0x110000)
  96     {
  97         return wxCONV_FAILED;
  98     }
  99     else
 100     {
 101         if (output)
 102         {
 103             *output++ = (wxUint16) ((input >> 10) + 0xd7c0);
 104             *output = (wxUint16) ((input & 0x3ff) + 0xdc00);
 105         }
 106
 107         return 2;
 108     }
 109 }
 110
 111 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
 112 {
 113     if ((*input < 0xd800) || (*input > 0xdfff))
 114     {
 115         output = *input;
 116         return 1;
 117     }
 118     else if ((input[1] < 0xdc00) || (input[1] > 0xdfff))
 119     {
 120         output = *input;
 121         return wxCONV_FAILED;
 122     }
 123     else
 124     {
 125         output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
 126         return 2;
 127     }
 128 }
 129
 130 #ifdef WC_UTF16
 131     typedef wchar_t wxDecodeSurrogate_t;
 132 #else // !WC_UTF16
 133     typedef wxUint16 wxDecodeSurrogate_t;
 134 #endif // WC_UTF16/!WC_UTF16
 135
 136 // returns the next UTF-32 character from the wchar_t buffer and advances the
 137 // pointer to the character after this one
 138 //
 139 // if an invalid character is found, *pSrc is set to NULL, the caller must
 140 // check for this
 141 static wxUint32 wxDecodeSurrogate(const wxDecodeSurrogate_t **pSrc)
 142 {
 143     wxUint32 out;
 144     const size_t
 145         n = decode_utf16(reinterpret_cast<const wxUint16 *>(*pSrc), out);
 146     if ( n == wxCONV_FAILED )
 147         *pSrc = NULL;
 148     else
 149         *pSrc += n;
 150
 151     return out;
 152 }
 153
 154 // ----------------------------------------------------------------------------
 155 // wxMBConv
 156 // ----------------------------------------------------------------------------
 157
 158 size_t
 159 wxMBConv::ToWChar(wchar_t *dst, size_t dstLen,
 160                   const char *src, size_t srcLen) const
 161 {
 162     // although new conversion classes are supposed to implement this function
 163     // directly, the existing ones only implement the old MB2WC() and so, to
 164     // avoid to have to rewrite all conversion classes at once, we provide a
 165     // default (but not efficient) implementation of this one in terms of the
 166     // old function by copying the input to ensure that it's NUL-terminated and
 167     // then using MB2WC() to convert it
 168     //
 169     // moreover, some conversion classes simply can't implement ToWChar()
 170     // directly, the primary example is wxConvLibc: mbstowcs() only handles
 171     // NUL-terminated strings
 172
 173     // the number of chars [which would be] written to dst [if it were not NULL]
 174     size_t dstWritten = 0;
 175
 176     // the number of NULs terminating this string
 177     size_t nulLen = 0;  // not really needed, but just to avoid warnings
 178
 179     // if we were not given the input size we just have to assume that the
 180     // string is properly terminated as we have no way of knowing how long it
 181     // is anyhow, but if we do have the size check whether there are enough
 182     // NULs at the end
 183     wxCharBuffer bufTmp;
 184     const char *srcEnd;
 185     if ( srcLen != wxNO_LEN )
 186     {
 187         // we need to know how to find the end of this string
 188         nulLen = GetMBNulLen();
 189         if ( nulLen == wxCONV_FAILED )
 190             return wxCONV_FAILED;
 191
 192         // if there are enough NULs we can avoid the copy
 193         if ( srcLen < nulLen || NotAllNULs(src + srcLen - nulLen, nulLen) )
 194         {
 195             // make a copy in order to properly NUL-terminate the string
 196             bufTmp = wxCharBuffer(srcLen + nulLen - 1 /* 1 will be added */);
 197             char * const p = bufTmp.data();
 198             memcpy(p, src, srcLen);
 199             for ( char *s = p + srcLen; s < p + srcLen + nulLen; s++ )
 200                 *s = '\0';
 201
 202             src = bufTmp;
 203         }
 204
 205         srcEnd = src + srcLen;
 206     }
 207     else // quit after the first loop iteration
 208     {
 209         srcEnd = NULL;
 210     }
 211
 212     // the idea of this code is straightforward: it converts a NUL-terminated
 213     // chunk of the string during each iteration and updates the output buffer
 214     // with the result
 215     //
 216     // all the complication come from the fact that this function, for
 217     // historical reasons, must behave in 2 subtly different ways when it's
 218     // called with a fixed number of characters and when it's called for the
 219     // entire NUL-terminated string: in the former case (srcEnd != NULL) we
 220     // must count all characters we convert, NUL or not; but in the latter we
 221     // do not count the trailing NUL -- but still count all the NULs inside the
 222     // string
 223     //
 224     // so for the (simple) former case we just always count the trailing NUL,
 225     // but for the latter we need to wait until we see if there is going to be
 226     // another loop iteration and only count it then
 227     for ( ;; )
 228     {
 229         // try to convert the current chunk
 230         size_t lenChunk = MB2WC(NULL, src, 0);
 231         if ( lenChunk == wxCONV_FAILED )
 232             return wxCONV_FAILED;
 233
 234         dstWritten += lenChunk;
 235         if ( !srcEnd )
 236             dstWritten++;
 237
 238         if ( !lenChunk )
 239         {
 240             // nothing left in the input string, conversion succeeded
 241             break;
 242         }
 243
 244         if ( dst )
 245         {
 246             if ( dstWritten > dstLen )
 247                 return wxCONV_FAILED;
 248
 249             // +1 is for trailing NUL
 250             if ( MB2WC(dst, src, lenChunk + 1) == wxCONV_FAILED )
 251                 return wxCONV_FAILED;
 252
 253             dst += lenChunk;
 254             if ( !srcEnd )
 255                 dst++;
 256         }
 257
 258         if ( !srcEnd )
 259         {
 260             // we convert just one chunk in this case as this is the entire
 261             // string anyhow (and we don't count the trailing NUL in this case)
 262             break;
 263         }
 264
 265         // advance the input pointer past the end of this chunk: notice that we
 266         // will always stop before srcEnd because we know that the chunk is
 267         // always properly NUL-terminated
 268         while ( NotAllNULs(src, nulLen) )
 269         {
 270             // notice that we must skip over multiple bytes here as we suppose
 271             // that if NUL takes 2 or 4 bytes, then all the other characters do
 272             // too and so if advanced by a single byte we might erroneously
 273             // detect sequences of NUL bytes in the middle of the input
 274             src += nulLen;
 275         }
 276
 277         // if the buffer ends before this NUL, we shouldn't count it in our
 278         // output so skip the code below
 279         if ( src == srcEnd )
 280             break;
 281
 282         // do count this terminator as it's inside the buffer we convert
 283         dstWritten++;
 284         if ( dst )
 285             dst++;
 286
 287         src += nulLen; // skip the terminator itself
 288
 289         if ( src >= srcEnd )
 290             break;
 291     }
 292
 293     return dstWritten;
 294 }
 295
 296 size_t
 297 wxMBConv::FromWChar(char *dst, size_t dstLen,
 298                     const wchar_t *src, size_t srcLen) const
 299 {
 300     // the number of chars [which would be] written to dst [if it were not NULL]
 301     size_t dstWritten = 0;
 302
 303     // if we don't know its length we have no choice but to assume that it is
 304     // NUL-terminated (notice that it can still be NUL-terminated even if
 305     // explicit length is given but it doesn't change our return value)
 306     const bool isNulTerminated = srcLen == wxNO_LEN;
 307
 308     // make a copy of the input string unless it is already properly
 309     // NUL-terminated
 310     wxWCharBuffer bufTmp;
 311     if ( isNulTerminated )
 312     {
 313         srcLen = wxWcslen(src) + 1;
 314     }
 315     else if ( srcLen != 0 && src[srcLen - 1] != L'\0' )
 316     {
 317         // make a copy in order to properly NUL-terminate the string
 318         bufTmp = wxWCharBuffer(srcLen);
 319         memcpy(bufTmp.data(), src, srcLen * sizeof(wchar_t));
 320         src = bufTmp;
 321     }
 322
 323     const size_t lenNul = GetMBNulLen();
 324     for ( const wchar_t * const srcEnd = src + srcLen;
 325           src < srcEnd;
 326           src++ /* skip L'\0' too */ )
 327     {
 328         // try to convert the current chunk
 329         size_t lenChunk = WC2MB(NULL, src, 0);
 330         if ( lenChunk == wxCONV_FAILED )
 331             return wxCONV_FAILED;
 332
 333         dstWritten += lenChunk;
 334
 335         const wchar_t * const
 336             chunkEnd = isNulTerminated ? srcEnd - 1 : src + wxWcslen(src);
 337
 338         // our return value accounts for the trailing NUL(s), unlike that of
 339         // WC2MB(), however don't do it for the last NUL we artificially added
 340         // ourselves above
 341         if ( chunkEnd < srcEnd )
 342             dstWritten += lenNul;
 343
 344         if ( dst )
 345         {
 346             if ( dstWritten > dstLen )
 347                 return wxCONV_FAILED;
 348
 349             // if we know that there is enough space in the destination buffer
 350             // (because we accounted for lenNul in dstWritten above), we can
 351             // convert directly in place -- but otherwise we need another
 352             // temporary buffer to ensure that we don't overwrite the output
 353             wxCharBuffer dstBuf;
 354             char *dstTmp;
 355             if ( chunkEnd == srcEnd )
 356             {
 357                 dstBuf = wxCharBuffer(lenChunk + lenNul - 1);
 358                 dstTmp = dstBuf.data();
 359             }
 360             else
 361             {
 362                 dstTmp = dst;
 363             }
 364
 365             if ( WC2MB(dstTmp, src, lenChunk + lenNul) == wxCONV_FAILED )
 366                 return wxCONV_FAILED;
 367
 368             if ( dstTmp != dst )
 369             {
 370                 // copy everything up to but excluding the terminating NUL(s)
 371                 // into the real output buffer
 372                 memcpy(dst, dstTmp, lenChunk);
 373
 374                 // micro-optimization: if dstTmp != dst it means that chunkEnd
 375                 // == srcEnd and so we're done, no need to update anything below
 376                 break;
 377             }
 378
 379             dst += lenChunk;
 380             if ( chunkEnd < srcEnd )
 381                 dst += lenNul;
 382         }
 383
 384         src = chunkEnd;
 385     }
 386
 387     return dstWritten;
 388 }
 389
 390 size_t wxMBConv::MB2WC(wchar_t *outBuff, const char *inBuff, size_t outLen) const
 391 {
 392     size_t rc = ToWChar(outBuff, outLen, inBuff);
 393     if ( rc != wxCONV_FAILED )
 394     {
 395         // ToWChar() returns the buffer length, i.e. including the trailing
 396         // NUL, while this method doesn't take it into account
 397         rc--;
 398     }
 399
 400     return rc;
 401 }
 402
 403 size_t wxMBConv::WC2MB(char *outBuff, const wchar_t *inBuff, size_t outLen) const
 404 {
 405     size_t rc = FromWChar(outBuff, outLen, inBuff);
 406     if ( rc != wxCONV_FAILED )
 407     {
 408         rc -= GetMBNulLen();
 409     }
 410
 411     return rc;
 412 }
 413
 414 wxMBConv::~wxMBConv()
 415 {
 416     // nothing to do here (necessary for Darwin linking probably)
 417 }
 418
 419 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
 420 {
 421     if ( psz )
 422     {
 423         // calculate the length of the buffer needed first
 424         const size_t nLen = ToWChar(NULL, 0, psz);
 425         if ( nLen != wxCONV_FAILED )
 426         {
 427             // now do the actual conversion
 428             wxWCharBuffer buf(nLen - 1 /* +1 added implicitly */);
 429
 430             // +1 for the trailing NULL
 431             if ( ToWChar(buf.data(), nLen, psz) != wxCONV_FAILED )
 432                 return buf;
 433         }
 434     }
 435
 436     return wxWCharBuffer();
 437 }
 438
 439 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
 440 {
 441     if ( pwz )
 442     {
 443         const size_t nLen = FromWChar(NULL, 0, pwz);
 444         if ( nLen != wxCONV_FAILED )
 445         {
 446             wxCharBuffer buf(nLen - 1);
 447             if ( FromWChar(buf.data(), nLen, pwz) != wxCONV_FAILED )
 448                 return buf;
 449         }
 450     }
 451
 452     return wxCharBuffer();
 453 }
 454
 455 const wxWCharBuffer
 456 wxMBConv::cMB2WC(const char *inBuff, size_t inLen, size_t *outLen) const
 457 {
 458     const size_t dstLen = ToWChar(NULL, 0, inBuff, inLen);
 459     if ( dstLen != wxCONV_FAILED )
 460     {
 461         // notice that we allocate space for dstLen+1 wide characters here
 462         // because we want the buffer to always be NUL-terminated, even if the
 463         // input isn't (as otherwise the caller has no way to know its length)
 464         wxWCharBuffer wbuf(dstLen);
 465         wbuf.data()[dstLen] = L'\0';
 466         if ( ToWChar(wbuf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED )
 467         {
 468             if ( outLen )
 469             {
 470                 *outLen = dstLen;
 471
 472                 // we also need to handle NUL-terminated input strings
 473                 // specially: for them the output is the length of the string
 474                 // excluding the trailing NUL, however if we're asked to
 475                 // convert a specific number of characters we return the length
 476                 // of the resulting output even if it's NUL-terminated
 477                 if ( inLen == wxNO_LEN )
 478                     (*outLen)--;
 479             }
 480
 481             return wbuf;
 482         }
 483     }
 484
 485     if ( outLen )
 486         *outLen = 0;
 487
 488     return wxWCharBuffer();
 489 }
 490
 491 const wxCharBuffer
 492 wxMBConv::cWC2MB(const wchar_t *inBuff, size_t inLen, size_t *outLen) const
 493 {
 494     size_t dstLen = FromWChar(NULL, 0, inBuff, inLen);
 495     if ( dstLen != wxCONV_FAILED )
 496     {
 497         const size_t nulLen = GetMBNulLen();
 498
 499         // as above, ensure that the buffer is always NUL-terminated, even if
 500         // the input is not
 501         wxCharBuffer buf(dstLen + nulLen - 1);
 502         memset(buf.data() + dstLen, 0, nulLen);
 503         if ( FromWChar(buf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED )
 504         {
 505             if ( outLen )
 506             {
 507                 *outLen = dstLen;
 508
 509                 if ( inLen == wxNO_LEN )
 510                 {
 511                     // in this case both input and output are NUL-terminated
 512                     // and we're not supposed to count NUL
 513                     *outLen -= nulLen;
 514                 }
 515             }
 516
 517             return buf;
 518         }
 519     }
 520
 521     if ( outLen )
 522         *outLen = 0;
 523
 524     return wxCharBuffer();
 525 }
 526
 527 const wxWCharBuffer wxMBConv::cMB2WC(const wxScopedCharBuffer& buf) const
 528 {
 529     const size_t srcLen = buf.length();
 530     if ( srcLen )
 531     {
 532         const size_t dstLen = ToWChar(NULL, 0, buf, srcLen);
 533         if ( dstLen != wxCONV_FAILED )
 534         {
 535             wxWCharBuffer wbuf(dstLen);
 536             wbuf.data()[dstLen] = L'\0';
 537             if ( ToWChar(wbuf.data(), dstLen, buf, srcLen) != wxCONV_FAILED )
 538                 return wbuf;
 539         }
 540     }
 541
 542     return wxWCharBuffer();
 543 }
 544
 545 const wxCharBuffer wxMBConv::cWC2MB(const wxScopedWCharBuffer& wbuf) const
 546 {
 547     const size_t srcLen = wbuf.length();
 548     if ( srcLen )
 549     {
 550         const size_t dstLen = FromWChar(NULL, 0, wbuf, srcLen);
 551         if ( dstLen != wxCONV_FAILED )
 552         {
 553             wxCharBuffer buf(dstLen);
 554             buf.data()[dstLen] = '\0';
 555             if ( FromWChar(buf.data(), dstLen, wbuf, srcLen) != wxCONV_FAILED )
 556                 return buf;
 557         }
 558     }
 559
 560     return wxCharBuffer();
 561 }
 562
 563 // ----------------------------------------------------------------------------
 564 // wxMBConvLibc
 565 // ----------------------------------------------------------------------------
 566
 567 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 568 {
 569     return wxMB2WC(buf, psz, n);
 570 }
 571
 572 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 573 {
 574     return wxWC2MB(buf, psz, n);
 575 }
 576
 577 // ----------------------------------------------------------------------------
 578 // wxConvBrokenFileNames
 579 // ----------------------------------------------------------------------------
 580
 581 #ifdef __UNIX__
 582
 583 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxString& charset)
 584 {
 585     if ( wxStricmp(charset, wxT("UTF-8")) == 0 ||
 586          wxStricmp(charset, wxT("UTF8")) == 0  )
 587         m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_PUA);
 588     else
 589         m_conv = new wxCSConv(charset);
 590 }
 591
 592 #endif // __UNIX__
 593
 594 // ----------------------------------------------------------------------------
 595 // UTF-7
 596 // ----------------------------------------------------------------------------
 597
 598 // Implementation (C) 2004 Fredrik Roubert
 599 //
 600 // Changes to work in streaming mode (C) 2008 Vadim Zeitlin
 601
 602 //
 603 // BASE64 decoding table
 604 //
 605 static const unsigned char utf7unb64[] =
 606 {
 607     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 608     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 609     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 610     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 611     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 612     0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
 613     0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
 614     0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 615     0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
 616     0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
 617     0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
 618     0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
 619     0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
 620     0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
 621     0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
 622     0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
 623     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 624     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 625     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 626     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 627     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 628     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 629     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 630     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 631     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 632     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 633     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 634     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 635     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 636     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 637     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 638     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 639 };
 640
 641 size_t wxMBConvUTF7::ToWChar(wchar_t *dst, size_t dstLen,
 642                              const char *src, size_t srcLen) const
 643 {
 644     DecoderState stateOrig,
 645                 *statePtr;
 646     if ( srcLen == wxNO_LEN )
 647     {
 648         // convert the entire string, up to and including the trailing NUL
 649         srcLen = strlen(src) + 1;
 650
 651         // when working on the entire strings we don't update nor use the shift
 652         // state from the previous call
 653         statePtr = &stateOrig;
 654     }
 655     else // when working with partial strings we do use the shift state
 656     {
 657         statePtr = const_cast<DecoderState *>(&m_stateDecoder);
 658
 659         // also save the old state to be able to rollback to it on error
 660         stateOrig = m_stateDecoder;
 661     }
 662
 663     // but to simplify the code below we use this variable in both cases
 664     DecoderState& state = *statePtr;
 665
 666
 667     // number of characters [which would have been] written to dst [if it were
 668     // not NULL]
 669     size_t len = 0;
 670
 671     const char * const srcEnd = src + srcLen;
 672
 673     while ( (src < srcEnd) && (!dst || (len < dstLen)) )
 674     {
 675         const unsigned char cc = *src++;
 676
 677         if ( state.IsShifted() )
 678         {
 679             const unsigned char dc = utf7unb64[cc];
 680             if ( dc == 0xff )
 681             {
 682                 // end of encoded part, check that nothing was left: there can
 683                 // be up to 4 bits of 0 padding but nothing else (we also need
 684                 // to check isLSB as we count bits modulo 8 while a valid UTF-7
 685                 // encoded sequence must contain an integral number of UTF-16
 686                 // characters)
 687                 if ( state.isLSB || state.bit > 4 ||
 688                         (state.accum & ((1 << state.bit) - 1)) )
 689                 {
 690                     if ( !len )
 691                         state = stateOrig;
 692
 693                     return wxCONV_FAILED;
 694                 }
 695
 696                 state.ToDirect();
 697
 698                 // re-parse this character normally below unless it's '-' which
 699                 // is consumed by the decoder
 700                 if ( cc == '-' )
 701                     continue;
 702             }
 703             else // valid encoded character
 704             {
 705                 // mini base64 decoder: each character is 6 bits
 706                 state.bit += 6;
 707                 state.accum <<= 6;
 708                 state.accum += dc;
 709
 710                 if ( state.bit >= 8 )
 711                 {
 712                     // got the full byte, consume it
 713                     state.bit -= 8;
 714                     unsigned char b = (state.accum >> state.bit) & 0x00ff;
 715
 716                     if ( state.isLSB )
 717                     {
 718                         // we've got the full word, output it
 719                         if ( dst )
 720                             *dst++ = (state.msb << 8) | b;
 721                         len++;
 722                         state.isLSB = false;
 723                     }
 724                     else // MSB
 725                     {
 726                         // just store it while we wait for LSB
 727                         state.msb = b;
 728                         state.isLSB = true;
 729                     }
 730                 }
 731             }
 732         }
 733
 734         if ( state.IsDirect() )
 735         {
 736             // start of an encoded segment?
 737             if ( cc == '+' )
 738             {
 739                 if ( *src == '-' )
 740                 {
 741                     // just the encoded plus sign, don't switch to shifted mode
 742                     if ( dst )
 743                         *dst++ = '+';
 744                     len++;
 745                     src++;
 746                 }
 747                 else if ( utf7unb64[(unsigned)*src] == 0xff )
 748                 {
 749                     // empty encoded chunks are not allowed
 750                     if ( !len )
 751                         state = stateOrig;
 752
 753                     return wxCONV_FAILED;
 754                 }
 755                 else // base-64 encoded chunk follows
 756                 {
 757                     state.ToShifted();
 758                 }
 759             }
 760             else // not '+'
 761             {
 762                 // only printable 7 bit ASCII characters (with the exception of
 763                 // NUL, TAB, CR and LF) can be used directly
 764                 if ( cc >= 0x7f || (cc < ' ' &&
 765                       !(cc == '\0' || cc == '\t' || cc == '\r' || cc == '\n')) )
 766                     return wxCONV_FAILED;
 767
 768                 if ( dst )
 769                     *dst++ = cc;
 770                 len++;
 771             }
 772         }
 773     }
 774
 775     if ( !len )
 776     {
 777         // as we didn't read any characters we should be called with the same
 778         // data (followed by some more new data) again later so don't save our
 779         // state
 780         state = stateOrig;
 781
 782         return wxCONV_FAILED;
 783     }
 784
 785     return len;
 786 }
 787
 788 //
 789 // BASE64 encoding table
 790 //
 791 static const unsigned char utf7enb64[] =
 792 {
 793     'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
 794     'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
 795     'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
 796     'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
 797     'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
 798     'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
 799     'w', 'x', 'y', 'z', '0', '1', '2', '3',
 800     '4', '5', '6', '7', '8', '9', '+', '/'
 801 };
 802
 803 //
 804 // UTF-7 encoding table
 805 //
 806 // 0 - Set D (directly encoded characters)
 807 // 1 - Set O (optional direct characters)
 808 // 2 - whitespace characters (optional)
 809 // 3 - special characters
 810 //
 811 static const unsigned char utf7encode[128] =
 812 {
 813     0, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
 814     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 815     2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
 816     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
 817     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 818     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
 819     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 820     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
 821 };
 822
 823 static inline bool wxIsUTF7Direct(wchar_t wc)
 824 {
 825     return wc < 0x80 && utf7encode[wc] < 1;
 826 }
 827
 828 size_t wxMBConvUTF7::FromWChar(char *dst, size_t dstLen,
 829                                const wchar_t *src, size_t srcLen) const
 830 {
 831     EncoderState stateOrig,
 832                 *statePtr;
 833     if ( srcLen == wxNO_LEN )
 834     {
 835         // we don't apply the stored state when operating on entire strings at
 836         // once
 837         statePtr = &stateOrig;
 838
 839         srcLen = wxWcslen(src) + 1;
 840     }
 841     else // do use the mode we left the output in previously
 842     {
 843         stateOrig = m_stateEncoder;
 844         statePtr = const_cast<EncoderState *>(&m_stateEncoder);
 845     }
 846
 847     EncoderState& state = *statePtr;
 848
 849
 850     size_t len = 0;
 851
 852     const wchar_t * const srcEnd = src + srcLen;
 853     while ( src < srcEnd && (!dst || len < dstLen) )
 854     {
 855         wchar_t cc = *src++;
 856         if ( wxIsUTF7Direct(cc) )
 857         {
 858             if ( state.IsShifted() )
 859             {
 860                 // pad with zeros the last encoded block if necessary
 861                 if ( state.bit )
 862                 {
 863                     if ( dst )
 864                         *dst++ = utf7enb64[((state.accum % 16) << (6 - state.bit)) % 64];
 865                     len++;
 866                 }
 867
 868                 state.ToDirect();
 869
 870                 if ( dst )
 871                     *dst++ = '-';
 872                 len++;
 873             }
 874
 875             if ( dst )
 876                 *dst++ = (char)cc;
 877             len++;
 878         }
 879         else if ( cc == '+' && state.IsDirect() )
 880         {
 881             if ( dst )
 882             {
 883                 *dst++ = '+';
 884                 *dst++ = '-';
 885             }
 886
 887             len += 2;
 888         }
 889 #ifndef WC_UTF16
 890         else if (((wxUint32)cc) > 0xffff)
 891         {
 892             // no surrogate pair generation (yet?)
 893             return wxCONV_FAILED;
 894         }
 895 #endif
 896         else
 897         {
 898             if ( state.IsDirect() )
 899             {
 900                 state.ToShifted();
 901
 902                 if ( dst )
 903                     *dst++ = '+';
 904                 len++;
 905             }
 906
 907             // BASE64 encode string
 908             for ( ;; )
 909             {
 910                 for ( unsigned lsb = 0; lsb < 2; lsb++ )
 911                 {
 912                     state.accum <<= 8;
 913                     state.accum += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
 914
 915                     for (state.bit += 8; state.bit >= 6; )
 916                     {
 917                         state.bit -= 6;
 918                         if ( dst )
 919                             *dst++ = utf7enb64[(state.accum >> state.bit) % 64];
 920                         len++;
 921                     }
 922                 }
 923
 924                 if ( src == srcEnd || wxIsUTF7Direct(cc = *src) )
 925                     break;
 926
 927                 src++;
 928             }
 929         }
 930     }
 931
 932     // we need to restore the original encoder state if we were called just to
 933     // calculate the amount of space needed as we will presumably be called
 934     // again to really convert the data now
 935     if ( !dst )
 936         state = stateOrig;
 937
 938     return len;
 939 }
 940
 941 // ----------------------------------------------------------------------------
 942 // UTF-8
 943 // ----------------------------------------------------------------------------
 944
 945 static const wxUint32 utf8_max[]=
 946     { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
 947
 948 // boundaries of the private use area we use to (temporarily) remap invalid
 949 // characters invalid in a UTF-8 encoded string
 950 const wxUint32 wxUnicodePUA = 0x100000;
 951 const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
 952
 953 // this table gives the length of the UTF-8 encoding from its first character:
 954 const unsigned char tableUtf8Lengths[256] = {
 955     // single-byte sequences (ASCII):
 956     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 00..0F
 957     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 10..1F
 958     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 20..2F
 959     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 30..3F
 960     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 40..4F
 961     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 50..5F
 962     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 60..6F
 963     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 70..7F
 964
 965     // these are invalid:
 966     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 80..8F
 967     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 90..9F
 968     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // A0..AF
 969     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // B0..BF
 970     0, 0,                                            // C0,C1
 971
 972     // two-byte sequences:
 973           2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,  // C2..CF
 974     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,  // D0..DF
 975
 976     // three-byte sequences:
 977     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,  // E0..EF
 978
 979     // four-byte sequences:
 980     4, 4, 4, 4, 4,                                   // F0..F4
 981
 982     // these are invalid again (5- or 6-byte
 983     // sequences and sequences for code points
 984     // above U+10FFFF, as restricted by RFC 3629):
 985                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0   // F5..FF
 986 };
 987
 988 size_t
 989 wxMBConvStrictUTF8::ToWChar(wchar_t *dst, size_t dstLen,
 990                             const char *src, size_t srcLen) const
 991 {
 992     wchar_t *out = dstLen ? dst : NULL;
 993     size_t written = 0;
 994
 995     if ( srcLen == wxNO_LEN )
 996         srcLen = strlen(src) + 1;
 997
 998     for ( const char *p = src; ; p++ )
 999     {
1000         if ( !(srcLen == wxNO_LEN ? *p : srcLen) )
1001         {
1002             // all done successfully, just add the trailing NULL if we are not
1003             // using explicit length
1004             if ( srcLen == wxNO_LEN )
1005             {
1006                 if ( out )
1007                 {
1008                     if ( !dstLen )
1009                         break;
1010
1011                     *out = L'\0';
1012                 }
1013
1014                 written++;
1015             }
1016
1017             return written;
1018         }
1019
1020         if ( out && !dstLen-- )
1021             break;
1022
1023         wxUint32 code;
1024         unsigned char c = *p;
1025
1026         if ( c < 0x80 )
1027         {
1028             if ( srcLen == 0 ) // the test works for wxNO_LEN too
1029                 break;
1030
1031             if ( srcLen != wxNO_LEN )
1032                 srcLen--;
1033
1034             code = c;
1035         }
1036         else
1037         {
1038             unsigned len = tableUtf8Lengths[c];
1039             if ( !len )
1040                 break;
1041
1042             if ( srcLen < len ) // the test works for wxNO_LEN too
1043                 break;
1044
1045             if ( srcLen != wxNO_LEN )
1046                 srcLen -= len;
1047
1048             //   Char. number range   |        UTF-8 octet sequence
1049             //      (hexadecimal)     |              (binary)
1050             //  ----------------------+----------------------------------------
1051             //  0000 0000 - 0000 007F | 0xxxxxxx
1052             //  0000 0080 - 0000 07FF | 110xxxxx 10xxxxxx
1053             //  0000 0800 - 0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
1054             //  0001 0000 - 0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
1055             //
1056             //  Code point value is stored in bits marked with 'x',
1057             //  lowest-order bit of the value on the right side in the diagram
1058             //  above.                                         (from RFC 3629)
1059
1060             // mask to extract lead byte's value ('x' bits above), by sequence
1061             // length:
1062             static const unsigned char leadValueMask[] = { 0x7F, 0x1F, 0x0F, 0x07 };
1063
1064             // mask and value of lead byte's most significant bits, by length:
1065             static const unsigned char leadMarkerMask[] = { 0x80, 0xE0, 0xF0, 0xF8 };
1066             static const unsigned char leadMarkerVal[] = { 0x00, 0xC0, 0xE0, 0xF0 };
1067
1068             len--; // it's more convenient to work with 0-based length here
1069
1070             // extract the lead byte's value bits:
1071             if ( (c & leadMarkerMask[len]) != leadMarkerVal[len] )
1072                 break;
1073
1074             code = c & leadValueMask[len];
1075
1076             // all remaining bytes, if any, are handled in the same way
1077             // regardless of sequence's length:
1078             for ( ; len; --len )
1079             {
1080                 c = *++p;
1081                 if ( (c & 0xC0) != 0x80 )
1082                     return wxCONV_FAILED;
1083
1084                 code <<= 6;
1085                 code |= c & 0x3F;
1086             }
1087         }
1088
1089 #ifdef WC_UTF16
1090         // cast is ok because wchar_t == wxUint16 if WC_UTF16
1091         if ( encode_utf16(code, (wxUint16 *)out) == 2 )
1092         {
1093             if ( out )
1094                 out++;
1095             written++;
1096         }
1097 #else // !WC_UTF16
1098         if ( out )
1099             *out = code;
1100 #endif // WC_UTF16/!WC_UTF16
1101
1102         if ( out )
1103             out++;
1104
1105         written++;
1106     }
1107
1108     return wxCONV_FAILED;
1109 }
1110
1111 size_t
1112 wxMBConvStrictUTF8::FromWChar(char *dst, size_t dstLen,
1113                               const wchar_t *src, size_t srcLen) const
1114 {
1115     char *out = dstLen ? dst : NULL;
1116     size_t written = 0;
1117
1118     for ( const wchar_t *wp = src; ; wp++ )
1119     {
1120         if ( !(srcLen == wxNO_LEN ? *wp : srcLen) )
1121         {
1122             // all done successfully, just add the trailing NULL if we are not
1123             // using explicit length
1124             if ( srcLen == wxNO_LEN )
1125             {
1126                 if ( out )
1127                 {
1128                     if ( !dstLen )
1129                         break;
1130
1131                     *out = '\0';
1132                 }
1133
1134                 written++;
1135             }
1136
1137             return written;
1138         }
1139
1140         if ( srcLen != wxNO_LEN )
1141             srcLen--;
1142
1143         wxUint32 code;
1144 #ifdef WC_UTF16
1145         // cast is ok for WC_UTF16
1146         if ( decode_utf16((const wxUint16 *)wp, code) == 2 )
1147         {
1148             // skip the next char too as we decoded a surrogate
1149             wp++;
1150         }
1151 #else // wchar_t is UTF-32
1152         code = *wp & 0x7fffffff;
1153 #endif
1154
1155         unsigned len;
1156         if ( code <= 0x7F )
1157         {
1158             len = 1;
1159             if ( out )
1160             {
1161                 if ( dstLen < len )
1162                     break;
1163
1164                 out[0] = (char)code;
1165             }
1166         }
1167         else if ( code <= 0x07FF )
1168         {
1169             len = 2;
1170             if ( out )
1171             {
1172                 if ( dstLen < len )
1173                     break;
1174
1175                 // NB: this line takes 6 least significant bits, encodes them as
1176                 // 10xxxxxx and discards them so that the next byte can be encoded:
1177                 out[1] = 0x80 | (code & 0x3F);  code >>= 6;
1178                 out[0] = 0xC0 | code;
1179             }
1180         }
1181         else if ( code < 0xFFFF )
1182         {
1183             len = 3;
1184             if ( out )
1185             {
1186                 if ( dstLen < len )
1187                     break;
1188
1189                 out[2] = 0x80 | (code & 0x3F);  code >>= 6;
1190                 out[1] = 0x80 | (code & 0x3F);  code >>= 6;
1191                 out[0] = 0xE0 | code;
1192             }
1193         }
1194         else if ( code <= 0x10FFFF )
1195         {
1196             len = 4;
1197             if ( out )
1198             {
1199                 if ( dstLen < len )
1200                     break;
1201
1202                 out[3] = 0x80 | (code & 0x3F);  code >>= 6;
1203                 out[2] = 0x80 | (code & 0x3F);  code >>= 6;
1204                 out[1] = 0x80 | (code & 0x3F);  code >>= 6;
1205                 out[0] = 0xF0 | code;
1206             }
1207         }
1208         else
1209         {
1210             wxFAIL_MSG( wxT("trying to encode undefined Unicode character") );
1211             break;
1212         }
1213
1214         if ( out )
1215         {
1216             out += len;
1217             dstLen -= len;
1218         }
1219
1220         written += len;
1221     }
1222
1223     // we only get here if an error occurs during decoding
1224     return wxCONV_FAILED;
1225 }
1226
1227 size_t wxMBConvUTF8::ToWChar(wchar_t *buf, size_t n,
1228                              const char *psz, size_t srcLen) const
1229 {
1230     if ( m_options == MAP_INVALID_UTF8_NOT )
1231         return wxMBConvStrictUTF8::ToWChar(buf, n, psz, srcLen);
1232
1233     size_t len = 0;
1234
1235     while ((srcLen == wxNO_LEN ? *psz : srcLen--) && ((!buf) || (len < n)))
1236     {
1237         const char *opsz = psz;
1238         bool invalid = false;
1239         unsigned char cc = *psz++, fc = cc;
1240         unsigned cnt;
1241         for (cnt = 0; fc & 0x80; cnt++)
1242             fc <<= 1;
1243
1244         if (!cnt)
1245         {
1246             // plain ASCII char
1247             if (buf)
1248                 *buf++ = cc;
1249             len++;
1250
1251             // escape the escape character for octal escapes
1252             if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
1253                     && cc == '\\' && (!buf || len < n))
1254             {
1255                 if (buf)
1256                     *buf++ = cc;
1257                 len++;
1258             }
1259         }
1260         else
1261         {
1262             cnt--;
1263             if (!cnt)
1264             {
1265                 // invalid UTF-8 sequence
1266                 invalid = true;
1267             }
1268             else
1269             {
1270                 unsigned ocnt = cnt - 1;
1271                 wxUint32 res = cc & (0x3f >> cnt);
1272                 while (cnt--)
1273                 {
1274                     cc = *psz;
1275                     if ((cc & 0xC0) != 0x80)
1276                     {
1277                         // invalid UTF-8 sequence
1278                         invalid = true;
1279                         break;
1280                     }
1281
1282                     psz++;
1283                     res = (res << 6) | (cc & 0x3f);
1284                 }
1285
1286                 if (invalid || res <= utf8_max[ocnt])
1287                 {
1288                     // illegal UTF-8 encoding
1289                     invalid = true;
1290                 }
1291                 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
1292                         res >= wxUnicodePUA && res < wxUnicodePUAEnd)
1293                 {
1294                     // if one of our PUA characters turns up externally
1295                     // it must also be treated as an illegal sequence
1296                     // (a bit like you have to escape an escape character)
1297                     invalid = true;
1298                 }
1299                 else
1300                 {
1301 #ifdef WC_UTF16
1302                     // cast is ok because wchar_t == wxUint16 if WC_UTF16
1303                     size_t pa = encode_utf16(res, (wxUint16 *)buf);
1304                     if (pa == wxCONV_FAILED)
1305                     {
1306                         invalid = true;
1307                     }
1308                     else
1309                     {
1310                         if (buf)
1311                             buf += pa;
1312                         len += pa;
1313                     }
1314 #else // !WC_UTF16
1315                     if (buf)
1316                         *buf++ = (wchar_t)res;
1317                     len++;
1318 #endif // WC_UTF16/!WC_UTF16
1319                 }
1320             }
1321
1322             if (invalid)
1323             {
1324                 if (m_options & MAP_INVALID_UTF8_TO_PUA)
1325                 {
1326                     while (opsz < psz && (!buf || len < n))
1327                     {
1328 #ifdef WC_UTF16
1329                         // cast is ok because wchar_t == wxUuint16 if WC_UTF16
1330                         size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
1331                         wxASSERT(pa != wxCONV_FAILED);
1332                         if (buf)
1333                             buf += pa;
1334                         opsz++;
1335                         len += pa;
1336 #else
1337                         if (buf)
1338                             *buf++ = (wchar_t)(wxUnicodePUA + (unsigned char)*opsz);
1339                         opsz++;
1340                         len++;
1341 #endif
1342                     }
1343                 }
1344                 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
1345                 {
1346                     while (opsz < psz && (!buf || len < n))
1347                     {
1348                         if ( buf && len + 3 < n )
1349                         {
1350                             unsigned char on = *opsz;
1351                             *buf++ = L'\\';
1352                             *buf++ = (wchar_t)( L'0' + on / 0100 );
1353                             *buf++ = (wchar_t)( L'0' + (on % 0100) / 010 );
1354                             *buf++ = (wchar_t)( L'0' + on % 010 );
1355                         }
1356
1357                         opsz++;
1358                         len += 4;
1359                     }
1360                 }
1361                 else // MAP_INVALID_UTF8_NOT
1362                 {
1363                     return wxCONV_FAILED;
1364                 }
1365             }
1366         }
1367     }
1368
1369     if (srcLen == wxNO_LEN && buf && (len < n))
1370         *buf = 0;
1371
1372     return len + 1;
1373 }
1374
1375 static inline bool isoctal(wchar_t wch)
1376 {
1377     return L'0' <= wch && wch <= L'7';
1378 }
1379
1380 size_t wxMBConvUTF8::FromWChar(char *buf, size_t n,
1381                                const wchar_t *psz, size_t srcLen) const
1382 {
1383     if ( m_options == MAP_INVALID_UTF8_NOT )
1384         return wxMBConvStrictUTF8::FromWChar(buf, n, psz, srcLen);
1385
1386     size_t len = 0;
1387
1388     while ((srcLen == wxNO_LEN ? *psz : srcLen--) && ((!buf) || (len < n)))
1389     {
1390         wxUint32 cc;
1391
1392 #ifdef WC_UTF16
1393         // cast is ok for WC_UTF16
1394         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
1395         psz += (pa == wxCONV_FAILED) ? 1 : pa;
1396 #else
1397         cc = (*psz++) & 0x7fffffff;
1398 #endif
1399
1400         if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
1401                 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
1402         {
1403             if (buf)
1404                 *buf++ = (char)(cc - wxUnicodePUA);
1405             len++;
1406         }
1407         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
1408                     && cc == L'\\' && psz[0] == L'\\' )
1409         {
1410             if (buf)
1411                 *buf++ = (char)cc;
1412             psz++;
1413             len++;
1414         }
1415         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
1416                     cc == L'\\' &&
1417                         isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
1418         {
1419             if (buf)
1420             {
1421                 *buf++ = (char) ((psz[0] - L'0') * 0100 +
1422                                  (psz[1] - L'0') * 010 +
1423                                  (psz[2] - L'0'));
1424             }
1425
1426             psz += 3;
1427             len++;
1428         }
1429         else
1430         {
1431             unsigned cnt;
1432             for (cnt = 0; cc > utf8_max[cnt]; cnt++)
1433             {
1434             }
1435
1436             if (!cnt)
1437             {
1438                 // plain ASCII char
1439                 if (buf)
1440                     *buf++ = (char) cc;
1441                 len++;
1442             }
1443             else
1444             {
1445                 len += cnt + 1;
1446                 if (buf)
1447                 {
1448                     *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
1449                     while (cnt--)
1450                         *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
1451                 }
1452             }
1453         }
1454     }
1455
1456     if (srcLen == wxNO_LEN && buf && (len < n))
1457         *buf = 0;
1458
1459     return len + 1;
1460 }
1461
1462 // ============================================================================
1463 // UTF-16
1464 // ============================================================================
1465
1466 #ifdef WORDS_BIGENDIAN
1467     #define wxMBConvUTF16straight wxMBConvUTF16BE
1468     #define wxMBConvUTF16swap     wxMBConvUTF16LE
1469 #else
1470     #define wxMBConvUTF16swap     wxMBConvUTF16BE
1471     #define wxMBConvUTF16straight wxMBConvUTF16LE
1472 #endif
1473
1474 /* static */
1475 size_t wxMBConvUTF16Base::GetLength(const char *src, size_t srcLen)
1476 {
1477     if ( srcLen == wxNO_LEN )
1478     {
1479         // count the number of bytes in input, including the trailing NULs
1480         const wxUint16 *inBuff = reinterpret_cast<const wxUint16 *>(src);
1481         for ( srcLen = 1; *inBuff++; srcLen++ )
1482             ;
1483
1484         srcLen *= BYTES_PER_CHAR;
1485     }
1486     else // we already have the length
1487     {
1488         // we can only convert an entire number of UTF-16 characters
1489         if ( srcLen % BYTES_PER_CHAR )
1490             return wxCONV_FAILED;
1491     }
1492
1493     return srcLen;
1494 }
1495
1496 // case when in-memory representation is UTF-16 too
1497 #ifdef WC_UTF16
1498
1499 // ----------------------------------------------------------------------------
1500 // conversions without endianness change
1501 // ----------------------------------------------------------------------------
1502
1503 size_t
1504 wxMBConvUTF16straight::ToWChar(wchar_t *dst, size_t dstLen,
1505                                const char *src, size_t srcLen) const
1506 {
1507     // set up the scene for using memcpy() (which is presumably more efficient
1508     // than copying the bytes one by one)
1509     srcLen = GetLength(src, srcLen);
1510     if ( srcLen == wxNO_LEN )
1511         return wxCONV_FAILED;
1512
1513     const size_t inLen = srcLen / BYTES_PER_CHAR;
1514     if ( dst )
1515     {
1516         if ( dstLen < inLen )
1517             return wxCONV_FAILED;
1518
1519         memcpy(dst, src, srcLen);
1520     }
1521
1522     return inLen;
1523 }
1524
1525 size_t
1526 wxMBConvUTF16straight::FromWChar(char *dst, size_t dstLen,
1527                                  const wchar_t *src, size_t srcLen) const
1528 {
1529     if ( srcLen == wxNO_LEN )
1530         srcLen = wxWcslen(src) + 1;
1531
1532     srcLen *= BYTES_PER_CHAR;
1533
1534     if ( dst )
1535     {
1536         if ( dstLen < srcLen )
1537             return wxCONV_FAILED;
1538
1539         memcpy(dst, src, srcLen);
1540     }
1541
1542     return srcLen;
1543 }
1544
1545 // ----------------------------------------------------------------------------
1546 // endian-reversing conversions
1547 // ----------------------------------------------------------------------------
1548
1549 size_t
1550 wxMBConvUTF16swap::ToWChar(wchar_t *dst, size_t dstLen,
1551                            const char *src, size_t srcLen) const
1552 {
1553     srcLen = GetLength(src, srcLen);
1554     if ( srcLen == wxNO_LEN )
1555         return wxCONV_FAILED;
1556
1557     srcLen /= BYTES_PER_CHAR;
1558
1559     if ( dst )
1560     {
1561         if ( dstLen < srcLen )
1562             return wxCONV_FAILED;
1563
1564         const wxUint16 *inBuff = reinterpret_cast<const wxUint16 *>(src);
1565         for ( size_t n = 0; n < srcLen; n++, inBuff++ )
1566         {
1567             *dst++ = wxUINT16_SWAP_ALWAYS(*inBuff);
1568         }
1569     }
1570
1571     return srcLen;
1572 }
1573
1574 size_t
1575 wxMBConvUTF16swap::FromWChar(char *dst, size_t dstLen,
1576                              const wchar_t *src, size_t srcLen) const
1577 {
1578     if ( srcLen == wxNO_LEN )
1579         srcLen = wxWcslen(src) + 1;
1580
1581     srcLen *= BYTES_PER_CHAR;
1582
1583     if ( dst )
1584     {
1585         if ( dstLen < srcLen )
1586             return wxCONV_FAILED;
1587
1588         wxUint16 *outBuff = reinterpret_cast<wxUint16 *>(dst);
1589         for ( size_t n = 0; n < srcLen; n += BYTES_PER_CHAR, src++ )
1590         {
1591             *outBuff++ = wxUINT16_SWAP_ALWAYS(*src);
1592         }
1593     }
1594
1595     return srcLen;
1596 }
1597
1598 #else // !WC_UTF16: wchar_t is UTF-32
1599
1600 // ----------------------------------------------------------------------------
1601 // conversions without endianness change
1602 // ----------------------------------------------------------------------------
1603
1604 size_t
1605 wxMBConvUTF16straight::ToWChar(wchar_t *dst, size_t dstLen,
1606                                const char *src, size_t srcLen) const
1607 {
1608     srcLen = GetLength(src, srcLen);
1609     if ( srcLen == wxNO_LEN )
1610         return wxCONV_FAILED;
1611
1612     const size_t inLen = srcLen / BYTES_PER_CHAR;
1613     if ( !dst )
1614     {
1615         // optimization: return maximal space which could be needed for this
1616         // string even if the real size could be smaller if the buffer contains
1617         // any surrogates
1618         return inLen;
1619     }
1620
1621     size_t outLen = 0;
1622     const wxUint16 *inBuff = reinterpret_cast<const wxUint16 *>(src);
1623     for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; )
1624     {
1625         const wxUint32 ch = wxDecodeSurrogate(&inBuff);
1626         if ( !inBuff )
1627             return wxCONV_FAILED;
1628
1629         if ( ++outLen > dstLen )
1630             return wxCONV_FAILED;
1631
1632         *dst++ = ch;
1633     }
1634
1635
1636     return outLen;
1637 }
1638
1639 size_t
1640 wxMBConvUTF16straight::FromWChar(char *dst, size_t dstLen,
1641                                  const wchar_t *src, size_t srcLen) const
1642 {
1643     if ( srcLen == wxNO_LEN )
1644         srcLen = wxWcslen(src) + 1;
1645
1646     size_t outLen = 0;
1647     wxUint16 *outBuff = reinterpret_cast<wxUint16 *>(dst);
1648     for ( size_t n = 0; n < srcLen; n++ )
1649     {
1650         wxUint16 cc[2];
1651         const size_t numChars = encode_utf16(*src++, cc);
1652         if ( numChars == wxCONV_FAILED )
1653             return wxCONV_FAILED;
1654
1655         outLen += numChars * BYTES_PER_CHAR;
1656         if ( outBuff )
1657         {
1658             if ( outLen > dstLen )
1659                 return wxCONV_FAILED;
1660
1661             *outBuff++ = cc[0];
1662             if ( numChars == 2 )
1663             {
1664                 // second character of a surrogate
1665                 *outBuff++ = cc[1];
1666             }
1667         }
1668     }
1669
1670     return outLen;
1671 }
1672
1673 // ----------------------------------------------------------------------------
1674 // endian-reversing conversions
1675 // ----------------------------------------------------------------------------
1676
1677 size_t
1678 wxMBConvUTF16swap::ToWChar(wchar_t *dst, size_t dstLen,
1679                            const char *src, size_t srcLen) const
1680 {
1681     srcLen = GetLength(src, srcLen);
1682     if ( srcLen == wxNO_LEN )
1683         return wxCONV_FAILED;
1684
1685     const size_t inLen = srcLen / BYTES_PER_CHAR;
1686     if ( !dst )
1687     {
1688         // optimization: return maximal space which could be needed for this
1689         // string even if the real size could be smaller if the buffer contains
1690         // any surrogates
1691         return inLen;
1692     }
1693
1694     size_t outLen = 0;
1695     const wxUint16 *inBuff = reinterpret_cast<const wxUint16 *>(src);
1696     for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; )
1697     {
1698         wxUint32 ch;
1699         wxUint16 tmp[2];
1700
1701         tmp[0] = wxUINT16_SWAP_ALWAYS(*inBuff);
1702         inBuff++;
1703         tmp[1] = wxUINT16_SWAP_ALWAYS(*inBuff);
1704
1705         const size_t numChars = decode_utf16(tmp, ch);
1706         if ( numChars == wxCONV_FAILED )
1707             return wxCONV_FAILED;
1708
1709         if ( numChars == 2 )
1710             inBuff++;
1711
1712         if ( ++outLen > dstLen )
1713             return wxCONV_FAILED;
1714
1715         *dst++ = ch;
1716     }
1717
1718
1719     return outLen;
1720 }
1721
1722 size_t
1723 wxMBConvUTF16swap::FromWChar(char *dst, size_t dstLen,
1724                              const wchar_t *src, size_t srcLen) const
1725 {
1726     if ( srcLen == wxNO_LEN )
1727         srcLen = wxWcslen(src) + 1;
1728
1729     size_t outLen = 0;
1730     wxUint16 *outBuff = reinterpret_cast<wxUint16 *>(dst);
1731     for ( const wchar_t *srcEnd = src + srcLen; src < srcEnd; src++ )
1732     {
1733         wxUint16 cc[2];
1734         const size_t numChars = encode_utf16(*src, cc);
1735         if ( numChars == wxCONV_FAILED )
1736             return wxCONV_FAILED;
1737
1738         outLen += numChars * BYTES_PER_CHAR;
1739         if ( outBuff )
1740         {
1741             if ( outLen > dstLen )
1742                 return wxCONV_FAILED;
1743
1744             *outBuff++ = wxUINT16_SWAP_ALWAYS(cc[0]);
1745             if ( numChars == 2 )
1746             {
1747                 // second character of a surrogate
1748                 *outBuff++ = wxUINT16_SWAP_ALWAYS(cc[1]);
1749             }
1750         }
1751     }
1752
1753     return outLen;
1754 }
1755
1756 #endif // WC_UTF16/!WC_UTF16
1757
1758
1759 // ============================================================================
1760 // UTF-32
1761 // ============================================================================
1762
1763 #ifdef WORDS_BIGENDIAN
1764     #define wxMBConvUTF32straight  wxMBConvUTF32BE
1765     #define wxMBConvUTF32swap      wxMBConvUTF32LE
1766 #else
1767     #define wxMBConvUTF32swap      wxMBConvUTF32BE
1768     #define wxMBConvUTF32straight  wxMBConvUTF32LE
1769 #endif
1770
1771
1772 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1773 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1774
1775 /* static */
1776 size_t wxMBConvUTF32Base::GetLength(const char *src, size_t srcLen)
1777 {
1778     if ( srcLen == wxNO_LEN )
1779     {
1780         // count the number of bytes in input, including the trailing NULs
1781         const wxUint32 *inBuff = reinterpret_cast<const wxUint32 *>(src);
1782         for ( srcLen = 1; *inBuff++; srcLen++ )
1783             ;
1784
1785         srcLen *= BYTES_PER_CHAR;
1786     }
1787     else // we already have the length
1788     {
1789         // we can only convert an entire number of UTF-32 characters
1790         if ( srcLen % BYTES_PER_CHAR )
1791             return wxCONV_FAILED;
1792     }
1793
1794     return srcLen;
1795 }
1796
1797 // case when in-memory representation is UTF-16
1798 #ifdef WC_UTF16
1799
1800 // ----------------------------------------------------------------------------
1801 // conversions without endianness change
1802 // ----------------------------------------------------------------------------
1803
1804 size_t
1805 wxMBConvUTF32straight::ToWChar(wchar_t *dst, size_t dstLen,
1806                                const char *src, size_t srcLen) const
1807 {
1808     srcLen = GetLength(src, srcLen);
1809     if ( srcLen == wxNO_LEN )
1810         return wxCONV_FAILED;
1811
1812     const wxUint32 *inBuff = reinterpret_cast<const wxUint32 *>(src);
1813     const size_t inLen = srcLen / BYTES_PER_CHAR;
1814     size_t outLen = 0;
1815     for ( size_t n = 0; n < inLen; n++ )
1816     {
1817         wxUint16 cc[2];
1818         const size_t numChars = encode_utf16(*inBuff++, cc);
1819         if ( numChars == wxCONV_FAILED )
1820             return wxCONV_FAILED;
1821
1822         outLen += numChars;
1823         if ( dst )
1824         {
1825             if ( outLen > dstLen )
1826                 return wxCONV_FAILED;
1827
1828             *dst++ = cc[0];
1829             if ( numChars == 2 )
1830             {
1831                 // second character of a surrogate
1832                 *dst++ = cc[1];
1833             }
1834         }
1835     }
1836
1837     return outLen;
1838 }
1839
1840 size_t
1841 wxMBConvUTF32straight::FromWChar(char *dst, size_t dstLen,
1842                                  const wchar_t *src, size_t srcLen) const
1843 {
1844     if ( srcLen == wxNO_LEN )
1845         srcLen = wxWcslen(src) + 1;
1846
1847     if ( !dst )
1848     {
1849         // optimization: return maximal space which could be needed for this
1850         // string instead of the exact amount which could be less if there are
1851         // any surrogates in the input
1852         //
1853         // we consider that surrogates are rare enough to make it worthwhile to
1854         // avoid running the loop below at the cost of slightly extra memory
1855         // consumption
1856         return srcLen * BYTES_PER_CHAR;
1857     }
1858
1859     wxUint32 *outBuff = reinterpret_cast<wxUint32 *>(dst);
1860     size_t outLen = 0;
1861     for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; )
1862     {
1863         const wxUint32 ch = wxDecodeSurrogate(&src);
1864         if ( !src )
1865             return wxCONV_FAILED;
1866
1867         outLen += BYTES_PER_CHAR;
1868
1869         if ( outLen > dstLen )
1870             return wxCONV_FAILED;
1871
1872         *outBuff++ = ch;
1873     }
1874
1875     return outLen;
1876 }
1877
1878 // ----------------------------------------------------------------------------
1879 // endian-reversing conversions
1880 // ----------------------------------------------------------------------------
1881
1882 size_t
1883 wxMBConvUTF32swap::ToWChar(wchar_t *dst, size_t dstLen,
1884                            const char *src, size_t srcLen) const
1885 {
1886     srcLen = GetLength(src, srcLen);
1887     if ( srcLen == wxNO_LEN )
1888         return wxCONV_FAILED;
1889
1890     const wxUint32 *inBuff = reinterpret_cast<const wxUint32 *>(src);
1891     const size_t inLen = srcLen / BYTES_PER_CHAR;
1892     size_t outLen = 0;
1893     for ( size_t n = 0; n < inLen; n++, inBuff++ )
1894     {
1895         wxUint16 cc[2];
1896         const size_t numChars = encode_utf16(wxUINT32_SWAP_ALWAYS(*inBuff), cc);
1897         if ( numChars == wxCONV_FAILED )
1898             return wxCONV_FAILED;
1899
1900         outLen += numChars;
1901         if ( dst )
1902         {
1903             if ( outLen > dstLen )
1904                 return wxCONV_FAILED;
1905
1906             *dst++ = cc[0];
1907             if ( numChars == 2 )
1908             {
1909                 // second character of a surrogate
1910                 *dst++ = cc[1];
1911             }
1912         }
1913     }
1914
1915     return outLen;
1916 }
1917
1918 size_t
1919 wxMBConvUTF32swap::FromWChar(char *dst, size_t dstLen,
1920                              const wchar_t *src, size_t srcLen) const
1921 {
1922     if ( srcLen == wxNO_LEN )
1923         srcLen = wxWcslen(src) + 1;
1924
1925     if ( !dst )
1926     {
1927         // optimization: return maximal space which could be needed for this
1928         // string instead of the exact amount which could be less if there are
1929         // any surrogates in the input
1930         //
1931         // we consider that surrogates are rare enough to make it worthwhile to
1932         // avoid running the loop below at the cost of slightly extra memory
1933         // consumption
1934         return srcLen*BYTES_PER_CHAR;
1935     }
1936
1937     wxUint32 *outBuff = reinterpret_cast<wxUint32 *>(dst);
1938     size_t outLen = 0;
1939     for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; )
1940     {
1941         const wxUint32 ch = wxDecodeSurrogate(&src);
1942         if ( !src )
1943             return wxCONV_FAILED;
1944
1945         outLen += BYTES_PER_CHAR;
1946
1947         if ( outLen > dstLen )
1948             return wxCONV_FAILED;
1949
1950         *outBuff++ = wxUINT32_SWAP_ALWAYS(ch);
1951     }
1952
1953     return outLen;
1954 }
1955
1956 #else // !WC_UTF16: wchar_t is UTF-32
1957
1958 // ----------------------------------------------------------------------------
1959 // conversions without endianness change
1960 // ----------------------------------------------------------------------------
1961
1962 size_t
1963 wxMBConvUTF32straight::ToWChar(wchar_t *dst, size_t dstLen,
1964                                const char *src, size_t srcLen) const
1965 {
1966     // use memcpy() as it should be much faster than hand-written loop
1967     srcLen = GetLength(src, srcLen);
1968     if ( srcLen == wxNO_LEN )
1969         return wxCONV_FAILED;
1970
1971     const size_t inLen = srcLen/BYTES_PER_CHAR;
1972     if ( dst )
1973     {
1974         if ( dstLen < inLen )
1975             return wxCONV_FAILED;
1976
1977         memcpy(dst, src, srcLen);
1978     }
1979
1980     return inLen;
1981 }
1982
1983 size_t
1984 wxMBConvUTF32straight::FromWChar(char *dst, size_t dstLen,
1985                                  const wchar_t *src, size_t srcLen) const
1986 {
1987     if ( srcLen == wxNO_LEN )
1988         srcLen = wxWcslen(src) + 1;
1989
1990     srcLen *= BYTES_PER_CHAR;
1991
1992     if ( dst )
1993     {
1994         if ( dstLen < srcLen )
1995             return wxCONV_FAILED;
1996
1997         memcpy(dst, src, srcLen);
1998     }
1999
2000     return srcLen;
2001 }
2002
2003 // ----------------------------------------------------------------------------
2004 // endian-reversing conversions
2005 // ----------------------------------------------------------------------------
2006
2007 size_t
2008 wxMBConvUTF32swap::ToWChar(wchar_t *dst, size_t dstLen,
2009                            const char *src, size_t srcLen) const
2010 {
2011     srcLen = GetLength(src, srcLen);
2012     if ( srcLen == wxNO_LEN )
2013         return wxCONV_FAILED;
2014
2015     srcLen /= BYTES_PER_CHAR;
2016
2017     if ( dst )
2018     {
2019         if ( dstLen < srcLen )
2020             return wxCONV_FAILED;
2021
2022         const wxUint32 *inBuff = reinterpret_cast<const wxUint32 *>(src);
2023         for ( size_t n = 0; n < srcLen; n++, inBuff++ )
2024         {
2025             *dst++ = wxUINT32_SWAP_ALWAYS(*inBuff);
2026         }
2027     }
2028
2029     return srcLen;
2030 }
2031
2032 size_t
2033 wxMBConvUTF32swap::FromWChar(char *dst, size_t dstLen,
2034                              const wchar_t *src, size_t srcLen) const
2035 {
2036     if ( srcLen == wxNO_LEN )
2037         srcLen = wxWcslen(src) + 1;
2038
2039     srcLen *= BYTES_PER_CHAR;
2040
2041     if ( dst )
2042     {
2043         if ( dstLen < srcLen )
2044             return wxCONV_FAILED;
2045
2046         wxUint32 *outBuff = reinterpret_cast<wxUint32 *>(dst);
2047         for ( size_t n = 0; n < srcLen; n += BYTES_PER_CHAR, src++ )
2048         {
2049             *outBuff++ = wxUINT32_SWAP_ALWAYS(*src);
2050         }
2051     }
2052
2053     return srcLen;
2054 }
2055
2056 #endif // WC_UTF16/!WC_UTF16
2057
2058
2059 // ============================================================================
2060 // The classes doing conversion using the iconv_xxx() functions
2061 // ============================================================================
2062
2063 #ifdef HAVE_ICONV
2064
2065 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
2066 //     E2BIG if output buffer is _exactly_ as big as needed. Such case is
2067 //     (unless there's yet another bug in glibc) the only case when iconv()
2068 //     returns with (size_t)-1 (which means error) and says there are 0 bytes
2069 //     left in the input buffer -- when _real_ error occurs,
2070 //     bytes-left-in-input buffer is non-zero. Hence, this alternative test for
2071 //     iconv() failure.
2072 //     [This bug does not appear in glibc 2.2.]
2073 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
2074 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
2075                                      (errno != E2BIG || bufLeft != 0))
2076 #else
2077 #define ICONV_FAILED(cres, bufLeft)  (cres == (size_t)-1)
2078 #endif
2079
2080 #define ICONV_CHAR_CAST(x)  ((ICONV_CONST char **)(x))
2081
2082 #define ICONV_T_INVALID ((iconv_t)-1)
2083
2084 #if SIZEOF_WCHAR_T == 4
2085     #define WC_BSWAP    wxUINT32_SWAP_ALWAYS
2086     #define WC_ENC      wxFONTENCODING_UTF32
2087 #elif SIZEOF_WCHAR_T == 2
2088     #define WC_BSWAP    wxUINT16_SWAP_ALWAYS
2089     #define WC_ENC      wxFONTENCODING_UTF16
2090 #else // sizeof(wchar_t) != 2 nor 4
2091     // does this ever happen?
2092     #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
2093 #endif
2094
2095 // ----------------------------------------------------------------------------
2096 // wxMBConv_iconv: encapsulates an iconv character set
2097 // ----------------------------------------------------------------------------
2098
2099 class wxMBConv_iconv : public wxMBConv
2100 {
2101 public:
2102     wxMBConv_iconv(const char *name);
2103     virtual ~wxMBConv_iconv();
2104
2105     // implement base class virtual methods
2106     virtual size_t ToWChar(wchar_t *dst, size_t dstLen,
2107                            const char *src, size_t srcLen = wxNO_LEN) const;
2108     virtual size_t FromWChar(char *dst, size_t dstLen,
2109                              const wchar_t *src, size_t srcLen = wxNO_LEN) const;
2110     virtual size_t GetMBNulLen() const;
2111
2112 #if wxUSE_UNICODE_UTF8
2113     virtual bool IsUTF8() const;
2114 #endif
2115
2116     virtual wxMBConv *Clone() const
2117     {
2118         wxMBConv_iconv *p = new wxMBConv_iconv(m_name.ToAscii());
2119         p->m_minMBCharWidth = m_minMBCharWidth;
2120         return p;
2121     }
2122
2123     bool IsOk() const
2124         { return (m2w != ICONV_T_INVALID) && (w2m != ICONV_T_INVALID); }
2125
2126 protected:
2127     // the iconv handlers used to translate from multibyte
2128     // to wide char and in the other direction
2129     iconv_t m2w,
2130             w2m;
2131
2132 #if wxUSE_THREADS
2133     // guards access to m2w and w2m objects
2134     wxMutex m_iconvMutex;
2135 #endif
2136
2137 private:
2138     // the name (for iconv_open()) of a wide char charset -- if none is
2139     // available on this machine, it will remain NULL
2140     static wxString ms_wcCharsetName;
2141
2142     // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
2143     // different endian-ness than the native one
2144     static bool ms_wcNeedsSwap;
2145
2146
2147     // name of the encoding handled by this conversion
2148     wxString m_name;
2149
2150     // cached result of GetMBNulLen(); set to 0 meaning "unknown"
2151     // initially
2152     size_t m_minMBCharWidth;
2153 };
2154
2155 // make the constructor available for unit testing
2156 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const char* name )
2157 {
2158     wxMBConv_iconv* result = new wxMBConv_iconv( name );
2159     if ( !result->IsOk() )
2160     {
2161         delete result;
2162         return 0;
2163     }
2164
2165     return result;
2166 }
2167
2168 wxString wxMBConv_iconv::ms_wcCharsetName;
2169 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
2170
2171 wxMBConv_iconv::wxMBConv_iconv(const char *name)
2172               : m_name(name)
2173 {
2174     m_minMBCharWidth = 0;
2175
2176     // check for charset that represents wchar_t:
2177     if ( ms_wcCharsetName.empty() )
2178     {
2179         wxLogTrace(TRACE_STRCONV, wxT("Looking for wide char codeset:"));
2180
2181 #if wxUSE_FONTMAP
2182         const wxChar *const *names = wxFontMapperBase::GetAllEncodingNames(WC_ENC);
2183 #else // !wxUSE_FONTMAP
2184         static const wxChar *const names_static[] =
2185         {
2186 #if SIZEOF_WCHAR_T == 4
2187             wxT("UCS-4"),
2188 #elif SIZEOF_WCHAR_T = 2
2189             wxT("UCS-2"),
2190 #endif
2191             NULL
2192         };
2193         const wxChar *const *names = names_static;
2194 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2195
2196         for ( ; *names && ms_wcCharsetName.empty(); ++names )
2197         {
2198             const wxString nameCS(*names);
2199
2200             // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
2201             wxString nameXE(nameCS);
2202
2203 #ifdef WORDS_BIGENDIAN
2204                 nameXE += wxT("BE");
2205 #else // little endian
2206                 nameXE += wxT("LE");
2207 #endif
2208
2209             wxLogTrace(TRACE_STRCONV, wxT("  trying charset \"%s\""),
2210                        nameXE.c_str());
2211
2212             m2w = iconv_open(nameXE.ToAscii(), name);
2213             if ( m2w == ICONV_T_INVALID )
2214             {
2215                 // try charset w/o bytesex info (e.g. "UCS4")
2216                 wxLogTrace(TRACE_STRCONV, wxT("  trying charset \"%s\""),
2217                            nameCS.c_str());
2218                 m2w = iconv_open(nameCS.ToAscii(), name);
2219
2220                 // and check for bytesex ourselves:
2221                 if ( m2w != ICONV_T_INVALID )
2222                 {
2223                     char    buf[2], *bufPtr;
2224                     wchar_t wbuf[2];
2225                     size_t  insz, outsz;
2226                     size_t  res;
2227
2228                     buf[0] = 'A';
2229                     buf[1] = 0;
2230                     wbuf[0] = 0;
2231                     insz = 2;
2232                     outsz = SIZEOF_WCHAR_T * 2;
2233                     char* wbufPtr = (char*)wbuf;
2234                     bufPtr = buf;
2235
2236                     res = iconv(
2237                         m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
2238                         &wbufPtr, &outsz);
2239
2240                     if (ICONV_FAILED(res, insz))
2241                     {
2242                         wxLogLastError(wxT("iconv"));
2243                         wxLogError(_("Conversion to charset '%s' doesn't work."),
2244                                    nameCS.c_str());
2245                     }
2246                     else // ok, can convert to this encoding, remember it
2247                     {
2248                         ms_wcCharsetName = nameCS;
2249                         ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
2250                     }
2251                 }
2252             }
2253             else // use charset not requiring byte swapping
2254             {
2255                 ms_wcCharsetName = nameXE;
2256             }
2257         }
2258
2259         wxLogTrace(TRACE_STRCONV,
2260                    wxT("iconv wchar_t charset is \"%s\"%s"),
2261                    ms_wcCharsetName.empty() ? wxString("<none>")
2262                                             : ms_wcCharsetName,
2263                    ms_wcNeedsSwap ? wxT(" (needs swap)")
2264                                   : wxT(""));
2265     }
2266     else // we already have ms_wcCharsetName
2267     {
2268         m2w = iconv_open(ms_wcCharsetName.ToAscii(), name);
2269     }
2270
2271     if ( ms_wcCharsetName.empty() )
2272     {
2273         w2m = ICONV_T_INVALID;
2274     }
2275     else
2276     {
2277         w2m = iconv_open(name, ms_wcCharsetName.ToAscii());
2278         if ( w2m == ICONV_T_INVALID )
2279         {
2280             wxLogTrace(TRACE_STRCONV,
2281                        wxT("\"%s\" -> \"%s\" works but not the converse!?"),
2282                        ms_wcCharsetName.c_str(), name);
2283         }
2284     }
2285 }
2286
2287 wxMBConv_iconv::~wxMBConv_iconv()
2288 {
2289     if ( m2w != ICONV_T_INVALID )
2290         iconv_close(m2w);
2291     if ( w2m != ICONV_T_INVALID )
2292         iconv_close(w2m);
2293 }
2294
2295 size_t
2296 wxMBConv_iconv::ToWChar(wchar_t *dst, size_t dstLen,
2297                         const char *src, size_t srcLen) const
2298 {
2299     if ( srcLen == wxNO_LEN )
2300     {
2301         // find the string length: notice that must be done differently for
2302         // NUL-terminated strings and UTF-16/32 which are terminated with 2/4
2303         // consecutive NULs
2304         const size_t nulLen = GetMBNulLen();
2305         switch ( nulLen )
2306         {
2307             default:
2308                 return wxCONV_FAILED;
2309
2310             case 1:
2311                 srcLen = strlen(src); // arguably more optimized than our version
2312                 break;
2313
2314             case 2:
2315             case 4:
2316                 // for UTF-16/32 not only we need to have 2/4 consecutive NULs
2317                 // but they also have to start at character boundary and not
2318                 // span two adjacent characters
2319                 const char *p;
2320                 for ( p = src; NotAllNULs(p, nulLen); p += nulLen )
2321                     ;
2322                 srcLen = p - src;
2323                 break;
2324         }
2325
2326         // when we're determining the length of the string ourselves we count
2327         // the terminating NUL(s) as part of it and always NUL-terminate the
2328         // output
2329         srcLen += nulLen;
2330     }
2331
2332     // we express length in the number of (wide) characters but iconv always
2333     // counts buffer sizes it in bytes
2334     dstLen *= SIZEOF_WCHAR_T;
2335
2336 #if wxUSE_THREADS
2337     // NB: iconv() is MT-safe, but each thread must use its own iconv_t handle.
2338     //     Unfortunately there are a couple of global wxCSConv objects such as
2339     //     wxConvLocal that are used all over wx code, so we have to make sure
2340     //     the handle is used by at most one thread at the time. Otherwise
2341     //     only a few wx classes would be safe to use from non-main threads
2342     //     as MB<->WC conversion would fail "randomly".
2343     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
2344 #endif // wxUSE_THREADS
2345
2346     size_t res, cres;
2347     const char *pszPtr = src;
2348
2349     if ( dst )
2350     {
2351         char* bufPtr = (char*)dst;
2352
2353         // have destination buffer, convert there
2354         size_t dstLenOrig = dstLen;
2355         cres = iconv(m2w,
2356                      ICONV_CHAR_CAST(&pszPtr), &srcLen,
2357                      &bufPtr, &dstLen);
2358
2359         // convert the number of bytes converted as returned by iconv to the
2360         // number of (wide) characters converted that we need
2361         res = (dstLenOrig - dstLen) / SIZEOF_WCHAR_T;
2362
2363         if (ms_wcNeedsSwap)
2364         {
2365             // convert to native endianness
2366             for ( unsigned i = 0; i < res; i++ )
2367                 dst[i] = WC_BSWAP(dst[i]);
2368         }
2369     }
2370     else // no destination buffer
2371     {
2372         // convert using temp buffer to calculate the size of the buffer needed
2373         wchar_t tbuf[256];
2374         res = 0;
2375
2376         do
2377         {
2378             char* bufPtr = (char*)tbuf;
2379             dstLen = 8 * SIZEOF_WCHAR_T;
2380
2381             cres = iconv(m2w,
2382                          ICONV_CHAR_CAST(&pszPtr), &srcLen,
2383                          &bufPtr, &dstLen );
2384
2385             res += 8 - (dstLen / SIZEOF_WCHAR_T);
2386         }
2387         while ((cres == (size_t)-1) && (errno == E2BIG));
2388     }
2389
2390     if (ICONV_FAILED(cres, srcLen))
2391     {
2392         //VS: it is ok if iconv fails, hence trace only
2393         wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
2394         return wxCONV_FAILED;
2395     }
2396
2397     return res;
2398 }
2399
2400 size_t wxMBConv_iconv::FromWChar(char *dst, size_t dstLen,
2401                                  const wchar_t *src, size_t srcLen) const
2402 {
2403 #if wxUSE_THREADS
2404     // NB: explained in MB2WC
2405     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
2406 #endif
2407
2408     if ( srcLen == wxNO_LEN )
2409         srcLen = wxWcslen(src) + 1;
2410
2411     size_t inbuflen = srcLen * SIZEOF_WCHAR_T;
2412     size_t outbuflen = dstLen;
2413     size_t res, cres;
2414
2415     wchar_t *tmpbuf = 0;
2416
2417     if (ms_wcNeedsSwap)
2418     {
2419         // need to copy to temp buffer to switch endianness
2420         // (doing WC_BSWAP twice on the original buffer won't work, as it
2421         //  could be in read-only memory, or be accessed in some other thread)
2422         tmpbuf = (wchar_t *)malloc(inbuflen);
2423         for ( size_t i = 0; i < srcLen; i++ )
2424             tmpbuf[i] = WC_BSWAP(src[i]);
2425
2426         src = tmpbuf;
2427     }
2428
2429     char* inbuf = (char*)src;
2430     if ( dst )
2431     {
2432         // have destination buffer, convert there
2433         cres = iconv(w2m, ICONV_CHAR_CAST(&inbuf), &inbuflen, &dst, &outbuflen);
2434
2435         res = dstLen - outbuflen;
2436     }
2437     else // no destination buffer
2438     {
2439         // convert using temp buffer to calculate the size of the buffer needed
2440         char tbuf[256];
2441         res = 0;
2442         do
2443         {
2444             dst = tbuf;
2445             outbuflen = WXSIZEOF(tbuf);
2446
2447             cres = iconv(w2m, ICONV_CHAR_CAST(&inbuf), &inbuflen, &dst, &outbuflen);
2448
2449             res += WXSIZEOF(tbuf) - outbuflen;
2450         }
2451         while ((cres == (size_t)-1) && (errno == E2BIG));
2452     }
2453
2454     if (ms_wcNeedsSwap)
2455     {
2456         free(tmpbuf);
2457     }
2458
2459     if (ICONV_FAILED(cres, inbuflen))
2460     {
2461         wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
2462         return wxCONV_FAILED;
2463     }
2464
2465     return res;
2466 }
2467
2468 size_t wxMBConv_iconv::GetMBNulLen() const
2469 {
2470     if ( m_minMBCharWidth == 0 )
2471     {
2472         wxMBConv_iconv * const self = wxConstCast(this, wxMBConv_iconv);
2473
2474 #if wxUSE_THREADS
2475         // NB: explained in MB2WC
2476         wxMutexLocker lock(self->m_iconvMutex);
2477 #endif
2478
2479         const wchar_t *wnul = L"";
2480         char buf[8]; // should be enough for NUL in any encoding
2481         size_t inLen = sizeof(wchar_t),
2482                outLen = WXSIZEOF(buf);
2483         char *inBuff = (char *)wnul;
2484         char *outBuff = buf;
2485         if ( iconv(w2m, ICONV_CHAR_CAST(&inBuff), &inLen, &outBuff, &outLen) == (size_t)-1 )
2486         {
2487             self->m_minMBCharWidth = (size_t)-1;
2488         }
2489         else // ok
2490         {
2491             self->m_minMBCharWidth = outBuff - buf;
2492         }
2493     }
2494
2495     return m_minMBCharWidth;
2496 }
2497
2498 #if wxUSE_UNICODE_UTF8
2499 bool wxMBConv_iconv::IsUTF8() const
2500 {
2501     return wxStricmp(m_name, "UTF-8") == 0 ||
2502            wxStricmp(m_name, "UTF8") == 0;
2503 }
2504 #endif
2505
2506 #endif // HAVE_ICONV
2507
2508
2509 // ============================================================================
2510 // Win32 conversion classes
2511 // ============================================================================
2512
2513 #ifdef wxHAVE_WIN32_MB2WC
2514
2515 // from utils.cpp
2516 #if wxUSE_FONTMAP
2517 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const char *charset);
2518 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
2519 #endif
2520
2521 class wxMBConv_win32 : public wxMBConv
2522 {
2523 public:
2524     wxMBConv_win32()
2525     {
2526         m_CodePage = CP_ACP;
2527         m_minMBCharWidth = 0;
2528     }
2529
2530     wxMBConv_win32(const wxMBConv_win32& conv)
2531         : wxMBConv()
2532     {
2533         m_CodePage = conv.m_CodePage;
2534         m_minMBCharWidth = conv.m_minMBCharWidth;
2535     }
2536
2537 #if wxUSE_FONTMAP
2538     wxMBConv_win32(const char* name)
2539     {
2540         m_CodePage = wxCharsetToCodepage(name);
2541         m_minMBCharWidth = 0;
2542     }
2543
2544     wxMBConv_win32(wxFontEncoding encoding)
2545     {
2546         m_CodePage = wxEncodingToCodepage(encoding);
2547         m_minMBCharWidth = 0;
2548     }
2549 #endif // wxUSE_FONTMAP
2550
2551     virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2552     {
2553         // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
2554         // the behaviour is not compatible with the Unix version (using iconv)
2555         // and break the library itself, e.g. wxTextInputStream::NextChar()
2556         // wouldn't work if reading an incomplete MB char didn't result in an
2557         // error
2558         //
2559         // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or
2560         // Win XP or newer and it is not supported for UTF-[78] so we always
2561         // use our own conversions in this case. See
2562         //     http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx
2563         //     http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp
2564         if ( m_CodePage == CP_UTF8 )
2565         {
2566             return wxMBConvUTF8().MB2WC(buf, psz, n);
2567         }
2568
2569         if ( m_CodePage == CP_UTF7 )
2570         {
2571             return wxMBConvUTF7().MB2WC(buf, psz, n);
2572         }
2573
2574         int flags = 0;
2575         if ( (m_CodePage < 50000 && m_CodePage != CP_SYMBOL) &&
2576                 IsAtLeastWin2kSP4() )
2577         {
2578             flags = MB_ERR_INVALID_CHARS;
2579         }
2580
2581         const size_t len = ::MultiByteToWideChar
2582                              (
2583                                 m_CodePage,     // code page
2584                                 flags,          // flags: fall on error
2585                                 psz,            // input string
2586                                 -1,             // its length (NUL-terminated)
2587                                 buf,            // output string
2588                                 buf ? n : 0     // size of output buffer
2589                              );
2590         if ( !len )
2591         {
2592             // function totally failed
2593             return wxCONV_FAILED;
2594         }
2595
2596         // if we were really converting and didn't use MB_ERR_INVALID_CHARS,
2597         // check if we succeeded, by doing a double trip:
2598         if ( !flags && buf )
2599         {
2600             const size_t mbLen = strlen(psz);
2601             wxCharBuffer mbBuf(mbLen);
2602             if ( ::WideCharToMultiByte
2603                    (
2604                       m_CodePage,
2605                       0,
2606                       buf,
2607                       -1,
2608                       mbBuf.data(),
2609                       mbLen + 1,        // size in bytes, not length
2610                       NULL,
2611                       NULL
2612                    ) == 0 ||
2613                   strcmp(mbBuf, psz) != 0 )
2614             {
2615                 // we didn't obtain the same thing we started from, hence
2616                 // the conversion was lossy and we consider that it failed
2617                 return wxCONV_FAILED;
2618             }
2619         }
2620
2621         // note that it returns count of written chars for buf != NULL and size
2622         // of the needed buffer for buf == NULL so in either case the length of
2623         // the string (which never includes the terminating NUL) is one less
2624         return len - 1;
2625     }
2626
2627     virtual size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
2628     {
2629         /*
2630             we have a problem here: by default, WideCharToMultiByte() may
2631             replace characters unrepresentable in the target code page with bad
2632             quality approximations such as turning "1/2" symbol (U+00BD) into
2633             "1" for the code pages which don't have it and we, obviously, want
2634             to avoid this at any price
2635
2636             the trouble is that this function does it _silently_, i.e. it won't
2637             even tell us whether it did or not... Win98/2000 and higher provide
2638             WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
2639             we have to resort to a round trip, i.e. check that converting back
2640             results in the same string -- this is, of course, expensive but
2641             otherwise we simply can't be sure to not garble the data.
2642          */
2643
2644         // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
2645         // it doesn't work with CJK encodings (which we test for rather roughly
2646         // here...) nor with UTF-7/8 nor, of course, with Windows versions not
2647         // supporting it
2648         BOOL usedDef wxDUMMY_INITIALIZE(false);
2649         BOOL *pUsedDef;
2650         int flags;
2651         if ( CanUseNoBestFit() && m_CodePage < 50000 )
2652         {
2653             // it's our lucky day
2654             flags = WC_NO_BEST_FIT_CHARS;
2655             pUsedDef = &usedDef;
2656         }
2657         else // old system or unsupported encoding
2658         {
2659             flags = 0;
2660             pUsedDef = NULL;
2661         }
2662
2663         const size_t len = ::WideCharToMultiByte
2664                              (
2665                                 m_CodePage,     // code page
2666                                 flags,          // either none or no best fit
2667                                 pwz,            // input string
2668                                 -1,             // it is (wide) NUL-terminated
2669                                 buf,            // output buffer
2670                                 buf ? n : 0,    // and its size
2671                                 NULL,           // default "replacement" char
2672                                 pUsedDef        // [out] was it used?
2673                              );
2674
2675         if ( !len )
2676         {
2677             // function totally failed
2678             return wxCONV_FAILED;
2679         }
2680
2681         // we did something, check if we really succeeded
2682         if ( flags )
2683         {
2684             // check if the conversion failed, i.e. if any replacements
2685             // were done
2686             if ( usedDef )
2687                 return wxCONV_FAILED;
2688         }
2689         else // we must resort to double tripping...
2690         {
2691             // first we need to ensure that we really have the MB data: this is
2692             // not the case if we're called with NULL buffer, in which case we
2693             // need to do the conversion yet again
2694             wxCharBuffer bufDef;
2695             if ( !buf )
2696             {
2697                 bufDef = wxCharBuffer(len);
2698                 buf = bufDef.data();
2699                 if ( !::WideCharToMultiByte(m_CodePage, flags, pwz, -1,
2700                                             buf, len, NULL, NULL) )
2701                     return wxCONV_FAILED;
2702             }
2703
2704             if ( !n )
2705                 n = wcslen(pwz);
2706             wxWCharBuffer wcBuf(n);
2707             if ( MB2WC(wcBuf.data(), buf, n + 1) == wxCONV_FAILED ||
2708                     wcscmp(wcBuf, pwz) != 0 )
2709             {
2710                 // we didn't obtain the same thing we started from, hence
2711                 // the conversion was lossy and we consider that it failed
2712                 return wxCONV_FAILED;
2713             }
2714         }
2715
2716         // see the comment above for the reason of "len - 1"
2717         return len - 1;
2718     }
2719
2720     virtual size_t GetMBNulLen() const
2721     {
2722         if ( m_minMBCharWidth == 0 )
2723         {
2724             int len = ::WideCharToMultiByte
2725                         (
2726                             m_CodePage,     // code page
2727                             0,              // no flags
2728                             L"",            // input string
2729                             1,              // translate just the NUL
2730                             NULL,           // output buffer
2731                             0,              // and its size
2732                             NULL,           // no replacement char
2733                             NULL            // [out] don't care if it was used
2734                         );
2735
2736             wxMBConv_win32 * const self = wxConstCast(this, wxMBConv_win32);
2737             switch ( len )
2738             {
2739                 default:
2740                     wxLogDebug(wxT("Unexpected NUL length %d"), len);
2741                     self->m_minMBCharWidth = (size_t)-1;
2742                     break;
2743
2744                 case 0:
2745                     self->m_minMBCharWidth = (size_t)-1;
2746                     break;
2747
2748                 case 1:
2749                 case 2:
2750                 case 4:
2751                     self->m_minMBCharWidth = len;
2752                     break;
2753             }
2754         }
2755
2756         return m_minMBCharWidth;
2757     }
2758
2759     virtual wxMBConv *Clone() const { return new wxMBConv_win32(*this); }
2760
2761     bool IsOk() const { return m_CodePage != -1; }
2762
2763 private:
2764     static bool CanUseNoBestFit()
2765     {
2766         static int s_isWin98Or2k = -1;
2767
2768         if ( s_isWin98Or2k == -1 )
2769         {
2770             int verMaj, verMin;
2771             switch ( wxGetOsVersion(&verMaj, &verMin) )
2772             {
2773                 case wxOS_WINDOWS_9X:
2774                     s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
2775                     break;
2776
2777                 case wxOS_WINDOWS_NT:
2778                     s_isWin98Or2k = verMaj >= 5;
2779                     break;
2780
2781                 default:
2782                     // unknown: be conservative by default
2783                     s_isWin98Or2k = 0;
2784                     break;
2785             }
2786
2787             wxASSERT_MSG( s_isWin98Or2k != -1, wxT("should be set above") );
2788         }
2789
2790         return s_isWin98Or2k == 1;
2791     }
2792
2793     static bool IsAtLeastWin2kSP4()
2794     {
2795 #ifdef __WXWINCE__
2796         return false;
2797 #else
2798         static int s_isAtLeastWin2kSP4 = -1;
2799
2800         if ( s_isAtLeastWin2kSP4 == -1 )
2801         {
2802             OSVERSIONINFOEX ver;
2803
2804             memset(&ver, 0, sizeof(ver));
2805             ver.dwOSVersionInfoSize = sizeof(ver);
2806             GetVersionEx((OSVERSIONINFO*)&ver);
2807
2808             s_isAtLeastWin2kSP4 =
2809               ((ver.dwMajorVersion > 5) || // Vista+
2810                (ver.dwMajorVersion == 5 && ver.dwMinorVersion > 0) || // XP/2003
2811                (ver.dwMajorVersion == 5 && ver.dwMinorVersion == 0 &&
2812                ver.wServicePackMajor >= 4)) // 2000 SP4+
2813               ? 1 : 0;
2814         }
2815
2816         return s_isAtLeastWin2kSP4 == 1;
2817 #endif
2818     }
2819
2820
2821     // the code page we're working with
2822     long m_CodePage;
2823
2824     // cached result of GetMBNulLen(), set to 0 initially meaning
2825     // "unknown"
2826     size_t m_minMBCharWidth;
2827 };
2828
2829 #endif // wxHAVE_WIN32_MB2WC
2830
2831
2832 // ============================================================================
2833 // wxEncodingConverter based conversion classes
2834 // ============================================================================
2835
2836 #if wxUSE_FONTMAP
2837
2838 class wxMBConv_wxwin : public wxMBConv
2839 {
2840 private:
2841     void Init()
2842     {
2843         // Refuse to use broken wxEncodingConverter code for Mac-specific encodings.
2844         // The wxMBConv_cf class does a better job.
2845         m_ok = (m_enc < wxFONTENCODING_MACMIN || m_enc > wxFONTENCODING_MACMAX) &&
2846                m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2847                w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2848     }
2849
2850 public:
2851     // temporarily just use wxEncodingConverter stuff,
2852     // so that it works while a better implementation is built
2853     wxMBConv_wxwin(const char* name)
2854     {
2855         if (name)
2856             m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2857         else
2858             m_enc = wxFONTENCODING_SYSTEM;
2859
2860         Init();
2861     }
2862
2863     wxMBConv_wxwin(wxFontEncoding enc)
2864     {
2865         m_enc = enc;
2866
2867         Init();
2868     }
2869
2870     size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
2871     {
2872         size_t inbuf = strlen(psz);
2873         if (buf)
2874         {
2875             if (!m2w.Convert(psz, buf))
2876                 return wxCONV_FAILED;
2877         }
2878         return inbuf;
2879     }
2880
2881     size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
2882     {
2883         const size_t inbuf = wxWcslen(psz);
2884         if (buf)
2885         {
2886             if (!w2m.Convert(psz, buf))
2887                 return wxCONV_FAILED;
2888         }
2889
2890         return inbuf;
2891     }
2892
2893     virtual size_t GetMBNulLen() const
2894     {
2895         switch ( m_enc )
2896         {
2897             case wxFONTENCODING_UTF16BE:
2898             case wxFONTENCODING_UTF16LE:
2899                 return 2;
2900
2901             case wxFONTENCODING_UTF32BE:
2902             case wxFONTENCODING_UTF32LE:
2903                 return 4;
2904
2905             default:
2906                 return 1;
2907         }
2908     }
2909
2910     virtual wxMBConv *Clone() const { return new wxMBConv_wxwin(m_enc); }
2911
2912     bool IsOk() const { return m_ok; }
2913
2914 public:
2915     wxFontEncoding m_enc;
2916     wxEncodingConverter m2w, w2m;
2917
2918 private:
2919     // were we initialized successfully?
2920     bool m_ok;
2921
2922     wxDECLARE_NO_COPY_CLASS(wxMBConv_wxwin);
2923 };
2924
2925 // make the constructors available for unit testing
2926 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_wxwin( const char* name )
2927 {
2928     wxMBConv_wxwin* result = new wxMBConv_wxwin( name );
2929     if ( !result->IsOk() )
2930     {
2931         delete result;
2932         return 0;
2933     }
2934
2935     return result;
2936 }
2937
2938 #endif // wxUSE_FONTMAP
2939
2940 // ============================================================================
2941 // wxCSConv implementation
2942 // ============================================================================
2943
2944 void wxCSConv::Init()
2945 {
2946     m_name = NULL;
2947     m_convReal =  NULL;
2948     m_deferred = true;
2949 }
2950
2951 wxCSConv::wxCSConv(const wxString& charset)
2952 {
2953     Init();
2954
2955     if ( !charset.empty() )
2956     {
2957         SetName(charset.ToAscii());
2958     }
2959
2960 #if wxUSE_FONTMAP
2961     m_encoding = wxFontMapperBase::GetEncodingFromName(charset);
2962     if ( m_encoding == wxFONTENCODING_MAX )
2963     {
2964         // set to unknown/invalid value
2965         m_encoding = wxFONTENCODING_SYSTEM;
2966     }
2967     else if ( m_encoding == wxFONTENCODING_DEFAULT )
2968     {
2969         // wxFONTENCODING_DEFAULT is same as US-ASCII in this context
2970         m_encoding = wxFONTENCODING_ISO8859_1;
2971     }
2972 #else
2973     m_encoding = wxFONTENCODING_SYSTEM;
2974 #endif
2975 }
2976
2977 wxCSConv::wxCSConv(wxFontEncoding encoding)
2978 {
2979     if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
2980     {
2981         wxFAIL_MSG( wxT("invalid encoding value in wxCSConv ctor") );
2982
2983         encoding = wxFONTENCODING_SYSTEM;
2984     }
2985
2986     Init();
2987
2988     m_encoding = encoding;
2989 }
2990
2991 wxCSConv::~wxCSConv()
2992 {
2993     Clear();
2994 }
2995
2996 wxCSConv::wxCSConv(const wxCSConv& conv)
2997         : wxMBConv()
2998 {
2999     Init();
3000
3001     SetName(conv.m_name);
3002     m_encoding = conv.m_encoding;
3003 }
3004
3005 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
3006 {
3007     Clear();
3008
3009     SetName(conv.m_name);
3010     m_encoding = conv.m_encoding;
3011
3012     return *this;
3013 }
3014
3015 void wxCSConv::Clear()
3016 {
3017     free(m_name);
3018     delete m_convReal;
3019
3020     m_name = NULL;
3021     m_convReal = NULL;
3022 }
3023
3024 void wxCSConv::SetName(const char *charset)
3025 {
3026     if (charset)
3027     {
3028         m_name = wxStrdup(charset);
3029         m_deferred = true;
3030     }
3031 }
3032
3033 #if wxUSE_FONTMAP
3034
3035 WX_DECLARE_HASH_MAP( wxFontEncoding, wxString, wxIntegerHash, wxIntegerEqual,
3036                      wxEncodingNameCache );
3037
3038 static wxEncodingNameCache gs_nameCache;
3039 #endif
3040
3041 wxMBConv *wxCSConv::DoCreate() const
3042 {
3043 #if wxUSE_FONTMAP
3044     wxLogTrace(TRACE_STRCONV,
3045                wxT("creating conversion for %s"),
3046                (m_name ? m_name
3047                        : (const char*)wxFontMapperBase::GetEncodingName(m_encoding).mb_str()));
3048 #endif // wxUSE_FONTMAP
3049
3050     // check for the special case of ASCII or ISO8859-1 charset: as we have
3051     // special knowledge of it anyhow, we don't need to create a special
3052     // conversion object
3053     if ( m_encoding == wxFONTENCODING_ISO8859_1 ||
3054             m_encoding == wxFONTENCODING_DEFAULT )
3055     {
3056         // don't convert at all
3057         return NULL;
3058     }
3059
3060     // we trust OS to do conversion better than we can so try external
3061     // conversion methods first
3062     //
3063     // the full order is:
3064     //      1. OS conversion (iconv() under Unix or Win32 API)
3065     //      2. hard coded conversions for UTF
3066     //      3. wxEncodingConverter as fall back
3067
3068     // step (1)
3069 #ifdef HAVE_ICONV
3070 #if !wxUSE_FONTMAP
3071     if ( m_name )
3072 #endif // !wxUSE_FONTMAP
3073     {
3074 #if wxUSE_FONTMAP
3075         wxFontEncoding encoding(m_encoding);
3076 #endif
3077
3078         if ( m_name )
3079         {
3080             wxMBConv_iconv *conv = new wxMBConv_iconv(m_name);
3081             if ( conv->IsOk() )
3082                 return conv;
3083
3084             delete conv;
3085
3086 #if wxUSE_FONTMAP
3087             encoding =
3088                 wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
3089 #endif // wxUSE_FONTMAP
3090         }
3091 #if wxUSE_FONTMAP
3092         {
3093             const wxEncodingNameCache::iterator it = gs_nameCache.find(encoding);
3094             if ( it != gs_nameCache.end() )
3095             {
3096                 if ( it->second.empty() )
3097                     return NULL;
3098
3099                 wxMBConv_iconv *conv = new wxMBConv_iconv(it->second.ToAscii());
3100                 if ( conv->IsOk() )
3101                     return conv;
3102
3103                 delete conv;
3104             }
3105
3106             const wxChar* const* names = wxFontMapperBase::GetAllEncodingNames(encoding);
3107             // CS : in case this does not return valid names (eg for MacRoman)
3108             // encoding got a 'failure' entry in the cache all the same,
3109             // although it just has to be created using a different method, so
3110             // only store failed iconv creation attempts (or perhaps we
3111             // shoulnd't do this at all ?)
3112             if ( names[0] != NULL )
3113             {
3114                 for ( ; *names; ++names )
3115                 {
3116                     // FIXME-UTF8: wxFontMapperBase::GetAllEncodingNames()
3117                     //             will need changes that will obsolete this
3118                     wxString name(*names);
3119                     wxMBConv_iconv *conv = new wxMBConv_iconv(name.ToAscii());
3120                     if ( conv->IsOk() )
3121                     {
3122                         gs_nameCache[encoding] = *names;
3123                         return conv;
3124                     }
3125
3126                     delete conv;
3127                 }
3128
3129                 gs_nameCache[encoding] = wxT(""); // cache the failure
3130             }
3131         }
3132 #endif // wxUSE_FONTMAP
3133     }
3134 #endif // HAVE_ICONV
3135
3136 #ifdef wxHAVE_WIN32_MB2WC
3137     {
3138 #if wxUSE_FONTMAP
3139         wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
3140                                       : new wxMBConv_win32(m_encoding);
3141         if ( conv->IsOk() )
3142             return conv;
3143
3144         delete conv;
3145 #else
3146         return NULL;
3147 #endif
3148     }
3149 #endif // wxHAVE_WIN32_MB2WC
3150
3151 #ifdef __DARWIN__
3152     {
3153         // leave UTF16 and UTF32 to the built-ins of wx
3154         if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
3155             ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
3156         {
3157 #if wxUSE_FONTMAP
3158             wxMBConv_cf *conv = m_name ? new wxMBConv_cf(m_name)
3159                                           : new wxMBConv_cf(m_encoding);
3160 #else
3161             wxMBConv_cf *conv = new wxMBConv_cf(m_encoding);
3162 #endif
3163
3164             if ( conv->IsOk() )
3165                  return conv;
3166
3167             delete conv;
3168         }
3169     }
3170 #endif // __DARWIN__
3171
3172     // step (2)
3173     wxFontEncoding enc = m_encoding;
3174 #if wxUSE_FONTMAP
3175     if ( enc == wxFONTENCODING_SYSTEM && m_name )
3176     {
3177         // use "false" to suppress interactive dialogs -- we can be called from
3178         // anywhere and popping up a dialog from here is the last thing we want to
3179         // do
3180         enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
3181     }
3182 #endif // wxUSE_FONTMAP
3183
3184     switch ( enc )
3185     {
3186         case wxFONTENCODING_UTF7:
3187              return new wxMBConvUTF7;
3188
3189         case wxFONTENCODING_UTF8:
3190              return new wxMBConvUTF8;
3191
3192         case wxFONTENCODING_UTF16BE:
3193              return new wxMBConvUTF16BE;
3194
3195         case wxFONTENCODING_UTF16LE:
3196              return new wxMBConvUTF16LE;
3197
3198         case wxFONTENCODING_UTF32BE:
3199              return new wxMBConvUTF32BE;
3200
3201         case wxFONTENCODING_UTF32LE:
3202              return new wxMBConvUTF32LE;
3203
3204         default:
3205              // nothing to do but put here to suppress gcc warnings
3206              break;
3207     }
3208
3209     // step (3)
3210 #if wxUSE_FONTMAP
3211     {
3212         wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
3213                                       : new wxMBConv_wxwin(m_encoding);
3214         if ( conv->IsOk() )
3215             return conv;
3216
3217         delete conv;
3218     }
3219
3220     wxLogTrace(TRACE_STRCONV,
3221                wxT("encoding \"%s\" is not supported by this system"),
3222                (m_name ? wxString(m_name)
3223                        : wxFontMapperBase::GetEncodingName(m_encoding)));
3224 #endif // wxUSE_FONTMAP
3225
3226     return NULL;
3227 }
3228
3229 void wxCSConv::CreateConvIfNeeded() const
3230 {
3231     if ( m_deferred )
3232     {
3233         wxCSConv *self = (wxCSConv *)this; // const_cast
3234
3235         // if we don't have neither the name nor the encoding, use the default
3236         // encoding for this system
3237         if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
3238         {
3239 #if wxUSE_INTL
3240             self->m_encoding = wxLocale::GetSystemEncoding();
3241 #else
3242             // fallback to some reasonable default:
3243             self->m_encoding = wxFONTENCODING_ISO8859_1;
3244 #endif // wxUSE_INTL
3245         }
3246
3247         self->m_convReal = DoCreate();
3248         self->m_deferred = false;
3249     }
3250 }
3251
3252 bool wxCSConv::IsOk() const
3253 {
3254     CreateConvIfNeeded();
3255
3256     // special case: no convReal created for wxFONTENCODING_ISO8859_1
3257     if ( m_encoding == wxFONTENCODING_ISO8859_1 )
3258         return true; // always ok as we do it ourselves
3259
3260     // m_convReal->IsOk() is called at its own creation, so we know it must
3261     // be ok if m_convReal is non-NULL
3262     return m_convReal != NULL;
3263 }
3264
3265 size_t wxCSConv::ToWChar(wchar_t *dst, size_t dstLen,
3266                          const char *src, size_t srcLen) const
3267 {
3268     CreateConvIfNeeded();
3269
3270     if (m_convReal)
3271         return m_convReal->ToWChar(dst, dstLen, src, srcLen);
3272
3273     // latin-1 (direct)
3274     if ( srcLen == wxNO_LEN )
3275         srcLen = strlen(src) + 1; // take trailing NUL too
3276
3277     if ( dst )
3278     {
3279         if ( dstLen < srcLen )
3280             return wxCONV_FAILED;
3281
3282         for ( size_t n = 0; n < srcLen; n++ )
3283             dst[n] = (unsigned char)(src[n]);
3284     }
3285
3286     return srcLen;
3287 }
3288
3289 size_t wxCSConv::FromWChar(char *dst, size_t dstLen,
3290                            const wchar_t *src, size_t srcLen) const
3291 {
3292     CreateConvIfNeeded();
3293
3294     if (m_convReal)
3295         return m_convReal->FromWChar(dst, dstLen, src, srcLen);
3296
3297     // latin-1 (direct)
3298     if ( srcLen == wxNO_LEN )
3299         srcLen = wxWcslen(src) + 1;
3300
3301     if ( dst )
3302     {
3303         if ( dstLen < srcLen )
3304             return wxCONV_FAILED;
3305
3306         for ( size_t n = 0; n < srcLen; n++ )
3307         {
3308             if ( src[n] > 0xFF )
3309                 return wxCONV_FAILED;
3310
3311             dst[n] = (char)src[n];
3312         }
3313
3314     }
3315     else // still need to check the input validity
3316     {
3317         for ( size_t n = 0; n < srcLen; n++ )
3318         {
3319             if ( src[n] > 0xFF )
3320                 return wxCONV_FAILED;
3321         }
3322     }
3323
3324     return srcLen;
3325 }
3326
3327 size_t wxCSConv::GetMBNulLen() const
3328 {
3329     CreateConvIfNeeded();
3330
3331     if ( m_convReal )
3332     {
3333         return m_convReal->GetMBNulLen();
3334     }
3335
3336     // otherwise, we are ISO-8859-1
3337     return 1;
3338 }
3339
3340 #if wxUSE_UNICODE_UTF8
3341 bool wxCSConv::IsUTF8() const
3342 {
3343     CreateConvIfNeeded();
3344
3345     if ( m_convReal )
3346     {
3347         return m_convReal->IsUTF8();
3348     }
3349
3350     // otherwise, we are ISO-8859-1
3351     return false;
3352 }
3353 #endif
3354
3355
3356 #if wxUSE_UNICODE
3357
3358 wxWCharBuffer wxSafeConvertMB2WX(const char *s)
3359 {
3360     if ( !s )
3361         return wxWCharBuffer();
3362
3363     wxWCharBuffer wbuf(wxConvLibc.cMB2WX(s));
3364     if ( !wbuf )
3365         wbuf = wxMBConvUTF8().cMB2WX(s);
3366     if ( !wbuf )
3367         wbuf = wxConvISO8859_1.cMB2WX(s);
3368
3369     return wbuf;
3370 }
3371
3372 wxCharBuffer wxSafeConvertWX2MB(const wchar_t *ws)
3373 {
3374     if ( !ws )
3375         return wxCharBuffer();
3376
3377     wxCharBuffer buf(wxConvLibc.cWX2MB(ws));
3378     if ( !buf )
3379         buf = wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL).cWX2MB(ws);
3380
3381     return buf;
3382 }
3383
3384 #endif // wxUSE_UNICODE
3385
3386 // ----------------------------------------------------------------------------
3387 // globals
3388 // ----------------------------------------------------------------------------
3389
3390 // NB: The reason why we create converted objects in this convoluted way,
3391 //     using a factory function instead of global variable, is that they
3392 //     may be used at static initialization time (some of them are used by
3393 //     wxString ctors and there may be a global wxString object). In other
3394 //     words, possibly _before_ the converter global object would be
3395 //     initialized.
3396
3397 #undef wxConvLibc
3398 #undef wxConvUTF8
3399 #undef wxConvUTF7
3400 #undef wxConvLocal
3401 #undef wxConvISO8859_1
3402
3403 #define WX_DEFINE_GLOBAL_CONV2(klass, impl_klass, name, ctor_args)      \
3404     WXDLLIMPEXP_DATA_BASE(klass*) name##Ptr = NULL;                     \
3405     WXDLLIMPEXP_BASE klass* wxGet_##name##Ptr()                         \
3406     {                                                                   \
3407         static impl_klass name##Obj ctor_args;                          \
3408         return &name##Obj;                                              \
3409     }                                                                   \
3410     /* this ensures that all global converter objects are created */    \
3411     /* by the time static initialization is done, i.e. before any */    \
3412     /* thread is launched: */                                           \
3413     static klass* gs_##name##instance = wxGet_##name##Ptr()
3414
3415 #define WX_DEFINE_GLOBAL_CONV(klass, name, ctor_args) \
3416     WX_DEFINE_GLOBAL_CONV2(klass, klass, name, ctor_args)
3417
3418 #ifdef __INTELC__
3419     // disable warning "variable 'xxx' was declared but never referenced"
3420     #pragma warning(disable: 177)
3421 #endif // Intel C++
3422
3423 #ifdef __WINDOWS__
3424     WX_DEFINE_GLOBAL_CONV2(wxMBConv, wxMBConv_win32, wxConvLibc, wxEMPTY_PARAMETER_VALUE);
3425 #elif 0 // defined(__WXOSX__)
3426     WX_DEFINE_GLOBAL_CONV2(wxMBConv, wxMBConv_cf, wxConvLibc,  (wxFONTENCODING_UTF8));
3427 #else
3428     WX_DEFINE_GLOBAL_CONV2(wxMBConv, wxMBConvLibc, wxConvLibc, wxEMPTY_PARAMETER_VALUE);
3429 #endif
3430
3431 // NB: we can't use wxEMPTY_PARAMETER_VALUE as final argument here because it's
3432 //     passed to WX_DEFINE_GLOBAL_CONV2 after a macro expansion and so still
3433 //     provokes an error message about "not enough macro parameters"; and we
3434 //     can't use "()" here as the name##Obj declaration would be parsed as a
3435 //     function declaration then, so use a semicolon and live with an extra
3436 //     empty statement (and hope that no compilers warns about this)
3437 WX_DEFINE_GLOBAL_CONV(wxMBConvStrictUTF8, wxConvUTF8, ;);
3438 WX_DEFINE_GLOBAL_CONV(wxMBConvUTF7, wxConvUTF7, ;);
3439
3440 WX_DEFINE_GLOBAL_CONV(wxCSConv, wxConvLocal, (wxFONTENCODING_SYSTEM));
3441 WX_DEFINE_GLOBAL_CONV(wxCSConv, wxConvISO8859_1, (wxFONTENCODING_ISO8859_1));
3442
3443 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = wxGet_wxConvLibcPtr();
3444 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvUI = wxGet_wxConvLocalPtr();
3445
3446 #ifdef __DARWIN__
3447 // The xnu kernel always communicates file paths in decomposed UTF-8.
3448 // WARNING: Are we sure that CFString's conversion will cause decomposition?
3449 static wxMBConv_cf wxConvMacUTF8DObj(wxFONTENCODING_UTF8);
3450 #endif
3451
3452 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName =
3453 #ifdef __DARWIN__
3454                                     &wxConvMacUTF8DObj;
3455 #else // !__DARWIN__
3456                                     wxGet_wxConvLibcPtr();
3457 #endif // __DARWIN__/!__DARWIN__
3458
3459 #else // !wxUSE_WCHAR_T
3460
3461 // FIXME-UTF8: remove this, wxUSE_WCHAR_T is required now
3462 // stand-ins in absence of wchar_t
3463 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
3464                                 wxConvISO8859_1,
3465                                 wxConvLocal,
3466                                 wxConvUTF8;
3467
3468 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T