src/common/strconv.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/common/strconv.cpp
   3 // Purpose:     Unicode conversion classes
   4 // Author:      Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
   5 //              Ryan Norton, Fredrik Roubert (UTF7)
   6 // Modified by:
   7 // Created:     29/01/98
   8 // RCS-ID:      $Id$
   9 // Copyright:   (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
  10 //              (c) 2000-2003 Vadim Zeitlin
  11 //              (c) 2004 Ryan Norton, Fredrik Roubert
  12 // Licence:     wxWindows licence
  13 /////////////////////////////////////////////////////////////////////////////
  14
  15 // For compilers that support precompilation, includes "wx.h".
  16 #include "wx/wxprec.h"
  17
  18 #ifdef __BORLANDC__
  19     #pragma hdrstop
  20 #endif  //__BORLANDC__
  21
  22 #ifndef WX_PRECOMP
  23     #include "wx/intl.h"
  24     #include "wx/log.h"
  25     #include "wx/utils.h"
  26     #include "wx/hashmap.h"
  27 #endif
  28
  29 #include "wx/strconv.h"
  30
  31 #ifndef __WXWINCE__
  32 #include <errno.h>
  33 #endif
  34
  35 #include <ctype.h>
  36 #include <string.h>
  37 #include <stdlib.h>
  38
  39 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
  40     #include "wx/msw/private.h"
  41     #include "wx/msw/missing.h"
  42     #define wxHAVE_WIN32_MB2WC
  43 #endif
  44
  45 #ifdef HAVE_ICONV
  46     #include <iconv.h>
  47     #include "wx/thread.h"
  48 #endif
  49
  50 #include "wx/encconv.h"
  51 #include "wx/fontmap.h"
  52
  53 #ifdef __DARWIN__
  54 #include "wx/osx/core/private/strconv_cf.h"
  55 #endif //def __DARWIN__
  56
  57
  58 #define TRACE_STRCONV wxT("strconv")
  59
  60 // WC_UTF16 is defined only if sizeof(wchar_t) == 2, otherwise it's supposed to
  61 // be 4 bytes
  62 #if SIZEOF_WCHAR_T == 2
  63     #define WC_UTF16
  64 #endif
  65
  66
  67 // ============================================================================
  68 // implementation
  69 // ============================================================================
  70
  71 // helper function of cMB2WC(): check if n bytes at this location are all NUL
  72 static bool NotAllNULs(const char *p, size_t n)
  73 {
  74     while ( n && *p++ == '\0' )
  75         n--;
  76
  77     return n != 0;
  78 }
  79
  80 // ----------------------------------------------------------------------------
  81 // UTF-16 en/decoding to/from UCS-4 with surrogates handling
  82 // ----------------------------------------------------------------------------
  83
  84 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
  85 {
  86     if (input <= 0xffff)
  87     {
  88         if (output)
  89             *output = (wxUint16) input;
  90
  91         return 1;
  92     }
  93     else if (input >= 0x110000)
  94     {
  95         return wxCONV_FAILED;
  96     }
  97     else
  98     {
  99         if (output)
 100         {
 101             *output++ = (wxUint16) ((input >> 10) + 0xd7c0);
 102             *output = (wxUint16) ((input & 0x3ff) + 0xdc00);
 103         }
 104
 105         return 2;
 106     }
 107 }
 108
 109 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
 110 {
 111     if ((*input < 0xd800) || (*input > 0xdfff))
 112     {
 113         output = *input;
 114         return 1;
 115     }
 116     else if ((input[1] < 0xdc00) || (input[1] > 0xdfff))
 117     {
 118         output = *input;
 119         return wxCONV_FAILED;
 120     }
 121     else
 122     {
 123         output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
 124         return 2;
 125     }
 126 }
 127
 128 #ifdef WC_UTF16
 129     typedef wchar_t wxDecodeSurrogate_t;
 130 #else // !WC_UTF16
 131     typedef wxUint16 wxDecodeSurrogate_t;
 132 #endif // WC_UTF16/!WC_UTF16
 133
 134 // returns the next UTF-32 character from the wchar_t buffer and advances the
 135 // pointer to the character after this one
 136 //
 137 // if an invalid character is found, *pSrc is set to NULL, the caller must
 138 // check for this
 139 static wxUint32 wxDecodeSurrogate(const wxDecodeSurrogate_t **pSrc)
 140 {
 141     wxUint32 out;
 142     const size_t
 143         n = decode_utf16(reinterpret_cast<const wxUint16 *>(*pSrc), out);
 144     if ( n == wxCONV_FAILED )
 145         *pSrc = NULL;
 146     else
 147         *pSrc += n;
 148
 149     return out;
 150 }
 151
 152 // ----------------------------------------------------------------------------
 153 // wxMBConv
 154 // ----------------------------------------------------------------------------
 155
 156 size_t
 157 wxMBConv::ToWChar(wchar_t *dst, size_t dstLen,
 158                   const char *src, size_t srcLen) const
 159 {
 160     // although new conversion classes are supposed to implement this function
 161     // directly, the existing ones only implement the old MB2WC() and so, to
 162     // avoid to have to rewrite all conversion classes at once, we provide a
 163     // default (but not efficient) implementation of this one in terms of the
 164     // old function by copying the input to ensure that it's NUL-terminated and
 165     // then using MB2WC() to convert it
 166     //
 167     // moreover, some conversion classes simply can't implement ToWChar()
 168     // directly, the primary example is wxConvLibc: mbstowcs() only handles
 169     // NUL-terminated strings
 170
 171     // the number of chars [which would be] written to dst [if it were not NULL]
 172     size_t dstWritten = 0;
 173
 174     // the number of NULs terminating this string
 175     size_t nulLen = 0;  // not really needed, but just to avoid warnings
 176
 177     // if we were not given the input size we just have to assume that the
 178     // string is properly terminated as we have no way of knowing how long it
 179     // is anyhow, but if we do have the size check whether there are enough
 180     // NULs at the end
 181     wxCharBuffer bufTmp;
 182     const char *srcEnd;
 183     if ( srcLen != wxNO_LEN )
 184     {
 185         // we need to know how to find the end of this string
 186         nulLen = GetMBNulLen();
 187         if ( nulLen == wxCONV_FAILED )
 188             return wxCONV_FAILED;
 189
 190         // if there are enough NULs we can avoid the copy
 191         if ( srcLen < nulLen || NotAllNULs(src + srcLen - nulLen, nulLen) )
 192         {
 193             // make a copy in order to properly NUL-terminate the string
 194             bufTmp = wxCharBuffer(srcLen + nulLen - 1 /* 1 will be added */);
 195             char * const p = bufTmp.data();
 196             memcpy(p, src, srcLen);
 197             for ( char *s = p + srcLen; s < p + srcLen + nulLen; s++ )
 198                 *s = '\0';
 199
 200             src = bufTmp;
 201         }
 202
 203         srcEnd = src + srcLen;
 204     }
 205     else // quit after the first loop iteration
 206     {
 207         srcEnd = NULL;
 208     }
 209
 210     // the idea of this code is straightforward: it converts a NUL-terminated
 211     // chunk of the string during each iteration and updates the output buffer
 212     // with the result
 213     //
 214     // all the complication come from the fact that this function, for
 215     // historical reasons, must behave in 2 subtly different ways when it's
 216     // called with a fixed number of characters and when it's called for the
 217     // entire NUL-terminated string: in the former case (srcEnd != NULL) we
 218     // must count all characters we convert, NUL or not; but in the latter we
 219     // do not count the trailing NUL -- but still count all the NULs inside the
 220     // string
 221     //
 222     // so for the (simple) former case we just always count the trailing NUL,
 223     // but for the latter we need to wait until we see if there is going to be
 224     // another loop iteration and only count it then
 225     for ( ;; )
 226     {
 227         // try to convert the current chunk
 228         size_t lenChunk = MB2WC(NULL, src, 0);
 229         if ( lenChunk == wxCONV_FAILED )
 230             return wxCONV_FAILED;
 231
 232         dstWritten += lenChunk;
 233         if ( !srcEnd )
 234             dstWritten++;
 235
 236         if ( !lenChunk )
 237         {
 238             // nothing left in the input string, conversion succeeded
 239             break;
 240         }
 241
 242         if ( dst )
 243         {
 244             if ( dstWritten > dstLen )
 245                 return wxCONV_FAILED;
 246
 247             // +1 is for trailing NUL
 248             if ( MB2WC(dst, src, lenChunk + 1) == wxCONV_FAILED )
 249                 return wxCONV_FAILED;
 250
 251             dst += lenChunk;
 252             if ( !srcEnd )
 253                 dst++;
 254         }
 255
 256         if ( !srcEnd )
 257         {
 258             // we convert just one chunk in this case as this is the entire
 259             // string anyhow (and we don't count the trailing NUL in this case)
 260             break;
 261         }
 262
 263         // advance the input pointer past the end of this chunk: notice that we
 264         // will always stop before srcEnd because we know that the chunk is
 265         // always properly NUL-terminated
 266         while ( NotAllNULs(src, nulLen) )
 267         {
 268             // notice that we must skip over multiple bytes here as we suppose
 269             // that if NUL takes 2 or 4 bytes, then all the other characters do
 270             // too and so if advanced by a single byte we might erroneously
 271             // detect sequences of NUL bytes in the middle of the input
 272             src += nulLen;
 273         }
 274
 275         // if the buffer ends before this NUL, we shouldn't count it in our
 276         // output so skip the code below
 277         if ( src == srcEnd )
 278             break;
 279
 280         // do count this terminator as it's inside the buffer we convert
 281         dstWritten++;
 282         if ( dst )
 283             dst++;
 284
 285         src += nulLen; // skip the terminator itself
 286
 287         if ( src >= srcEnd )
 288             break;
 289     }
 290
 291     return dstWritten;
 292 }
 293
 294 size_t
 295 wxMBConv::FromWChar(char *dst, size_t dstLen,
 296                     const wchar_t *src, size_t srcLen) const
 297 {
 298     // the number of chars [which would be] written to dst [if it were not NULL]
 299     size_t dstWritten = 0;
 300
 301     // if we don't know its length we have no choice but to assume that it is
 302     // NUL-terminated (notice that it can still be NUL-terminated even if
 303     // explicit length is given but it doesn't change our return value)
 304     const bool isNulTerminated = srcLen == wxNO_LEN;
 305
 306     // make a copy of the input string unless it is already properly
 307     // NUL-terminated
 308     wxWCharBuffer bufTmp;
 309     if ( isNulTerminated )
 310     {
 311         srcLen = wxWcslen(src) + 1;
 312     }
 313     else if ( srcLen != 0 && src[srcLen - 1] != L'\0' )
 314     {
 315         // make a copy in order to properly NUL-terminate the string
 316         bufTmp = wxWCharBuffer(srcLen);
 317         memcpy(bufTmp.data(), src, srcLen * sizeof(wchar_t));
 318         src = bufTmp;
 319     }
 320
 321     const size_t lenNul = GetMBNulLen();
 322     for ( const wchar_t * const srcEnd = src + srcLen;
 323           src < srcEnd;
 324           src++ /* skip L'\0' too */ )
 325     {
 326         // try to convert the current chunk
 327         size_t lenChunk = WC2MB(NULL, src, 0);
 328         if ( lenChunk == wxCONV_FAILED )
 329             return wxCONV_FAILED;
 330
 331         dstWritten += lenChunk;
 332
 333         const wchar_t * const
 334             chunkEnd = isNulTerminated ? srcEnd - 1 : src + wxWcslen(src);
 335
 336         // our return value accounts for the trailing NUL(s), unlike that of
 337         // WC2MB(), however don't do it for the last NUL we artificially added
 338         // ourselves above
 339         if ( chunkEnd < srcEnd )
 340             dstWritten += lenNul;
 341
 342         if ( dst )
 343         {
 344             if ( dstWritten > dstLen )
 345                 return wxCONV_FAILED;
 346
 347             // if we know that there is enough space in the destination buffer
 348             // (because we accounted for lenNul in dstWritten above), we can
 349             // convert directly in place -- but otherwise we need another
 350             // temporary buffer to ensure that we don't overwrite the output
 351             wxCharBuffer dstBuf;
 352             char *dstTmp;
 353             if ( chunkEnd == srcEnd )
 354             {
 355                 dstBuf = wxCharBuffer(lenChunk + lenNul - 1);
 356                 dstTmp = dstBuf.data();
 357             }
 358             else
 359             {
 360                 dstTmp = dst;
 361             }
 362
 363             if ( WC2MB(dstTmp, src, lenChunk + lenNul) == wxCONV_FAILED )
 364                 return wxCONV_FAILED;
 365
 366             if ( dstTmp != dst )
 367             {
 368                 // copy everything up to but excluding the terminating NUL(s)
 369                 // into the real output buffer
 370                 memcpy(dst, dstTmp, lenChunk);
 371
 372                 // micro-optimization: if dstTmp != dst it means that chunkEnd
 373                 // == srcEnd and so we're done, no need to update anything below
 374                 break;
 375             }
 376
 377             dst += lenChunk;
 378             if ( chunkEnd < srcEnd )
 379                 dst += lenNul;
 380         }
 381
 382         src = chunkEnd;
 383     }
 384
 385     return dstWritten;
 386 }
 387
 388 size_t wxMBConv::MB2WC(wchar_t *outBuff, const char *inBuff, size_t outLen) const
 389 {
 390     size_t rc = ToWChar(outBuff, outLen, inBuff);
 391     if ( rc != wxCONV_FAILED )
 392     {
 393         // ToWChar() returns the buffer length, i.e. including the trailing
 394         // NUL, while this method doesn't take it into account
 395         rc--;
 396     }
 397
 398     return rc;
 399 }
 400
 401 size_t wxMBConv::WC2MB(char *outBuff, const wchar_t *inBuff, size_t outLen) const
 402 {
 403     size_t rc = FromWChar(outBuff, outLen, inBuff);
 404     if ( rc != wxCONV_FAILED )
 405     {
 406         rc -= GetMBNulLen();
 407     }
 408
 409     return rc;
 410 }
 411
 412 wxMBConv::~wxMBConv()
 413 {
 414     // nothing to do here (necessary for Darwin linking probably)
 415 }
 416
 417 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
 418 {
 419     if ( psz )
 420     {
 421         // calculate the length of the buffer needed first
 422         const size_t nLen = ToWChar(NULL, 0, psz);
 423         if ( nLen != wxCONV_FAILED )
 424         {
 425             // now do the actual conversion
 426             wxWCharBuffer buf(nLen - 1 /* +1 added implicitly */);
 427
 428             // +1 for the trailing NULL
 429             if ( ToWChar(buf.data(), nLen, psz) != wxCONV_FAILED )
 430                 return buf;
 431         }
 432     }
 433
 434     return wxWCharBuffer();
 435 }
 436
 437 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
 438 {
 439     if ( pwz )
 440     {
 441         const size_t nLen = FromWChar(NULL, 0, pwz);
 442         if ( nLen != wxCONV_FAILED )
 443         {
 444             wxCharBuffer buf(nLen - 1);
 445             if ( FromWChar(buf.data(), nLen, pwz) != wxCONV_FAILED )
 446                 return buf;
 447         }
 448     }
 449
 450     return wxCharBuffer();
 451 }
 452
 453 const wxWCharBuffer
 454 wxMBConv::cMB2WC(const char *inBuff, size_t inLen, size_t *outLen) const
 455 {
 456     const size_t dstLen = ToWChar(NULL, 0, inBuff, inLen);
 457     if ( dstLen != wxCONV_FAILED )
 458     {
 459         // notice that we allocate space for dstLen+1 wide characters here
 460         // because we want the buffer to always be NUL-terminated, even if the
 461         // input isn't (as otherwise the caller has no way to know its length)
 462         wxWCharBuffer wbuf(dstLen);
 463         wbuf.data()[dstLen] = L'\0';
 464         if ( ToWChar(wbuf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED )
 465         {
 466             if ( outLen )
 467             {
 468                 *outLen = dstLen;
 469
 470                 // we also need to handle NUL-terminated input strings
 471                 // specially: for them the output is the length of the string
 472                 // excluding the trailing NUL, however if we're asked to
 473                 // convert a specific number of characters we return the length
 474                 // of the resulting output even if it's NUL-terminated
 475                 if ( inLen == wxNO_LEN )
 476                     (*outLen)--;
 477             }
 478
 479             return wbuf;
 480         }
 481     }
 482
 483     if ( outLen )
 484         *outLen = 0;
 485
 486     return wxWCharBuffer();
 487 }
 488
 489 const wxCharBuffer
 490 wxMBConv::cWC2MB(const wchar_t *inBuff, size_t inLen, size_t *outLen) const
 491 {
 492     size_t dstLen = FromWChar(NULL, 0, inBuff, inLen);
 493     if ( dstLen != wxCONV_FAILED )
 494     {
 495         const size_t nulLen = GetMBNulLen();
 496
 497         // as above, ensure that the buffer is always NUL-terminated, even if
 498         // the input is not
 499         wxCharBuffer buf(dstLen + nulLen - 1);
 500         memset(buf.data() + dstLen, 0, nulLen);
 501         if ( FromWChar(buf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED )
 502         {
 503             if ( outLen )
 504             {
 505                 *outLen = dstLen;
 506
 507                 if ( inLen == wxNO_LEN )
 508                 {
 509                     // in this case both input and output are NUL-terminated
 510                     // and we're not supposed to count NUL
 511                     *outLen -= nulLen;
 512                 }
 513             }
 514
 515             return buf;
 516         }
 517     }
 518
 519     if ( outLen )
 520         *outLen = 0;
 521
 522     return wxCharBuffer();
 523 }
 524
 525 const wxWCharBuffer wxMBConv::cMB2WC(const wxScopedCharBuffer& buf) const
 526 {
 527     const size_t srcLen = buf.length();
 528     if ( srcLen )
 529     {
 530         const size_t dstLen = ToWChar(NULL, 0, buf, srcLen);
 531         if ( dstLen != wxCONV_FAILED )
 532         {
 533             wxWCharBuffer wbuf(dstLen);
 534             wbuf.data()[dstLen] = L'\0';
 535             if ( ToWChar(wbuf.data(), dstLen, buf, srcLen) != wxCONV_FAILED )
 536                 return wbuf;
 537         }
 538     }
 539
 540     return wxScopedWCharBuffer::CreateNonOwned(L"", 0);
 541 }
 542
 543 const wxCharBuffer wxMBConv::cWC2MB(const wxScopedWCharBuffer& wbuf) const
 544 {
 545     const size_t srcLen = wbuf.length();
 546     if ( srcLen )
 547     {
 548         const size_t dstLen = FromWChar(NULL, 0, wbuf, srcLen);
 549         if ( dstLen != wxCONV_FAILED )
 550         {
 551             wxCharBuffer buf(dstLen);
 552             buf.data()[dstLen] = '\0';
 553             if ( FromWChar(buf.data(), dstLen, wbuf, srcLen) != wxCONV_FAILED )
 554                 return buf;
 555         }
 556     }
 557
 558     return wxScopedCharBuffer::CreateNonOwned("", 0);
 559 }
 560
 561 // ----------------------------------------------------------------------------
 562 // wxMBConvLibc
 563 // ----------------------------------------------------------------------------
 564
 565 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 566 {
 567     return wxMB2WC(buf, psz, n);
 568 }
 569
 570 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 571 {
 572     return wxWC2MB(buf, psz, n);
 573 }
 574
 575 // ----------------------------------------------------------------------------
 576 // wxConvBrokenFileNames
 577 // ----------------------------------------------------------------------------
 578
 579 #ifdef __UNIX__
 580
 581 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxString& charset)
 582 {
 583     if ( wxStricmp(charset, wxT("UTF-8")) == 0 ||
 584          wxStricmp(charset, wxT("UTF8")) == 0  )
 585         m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_PUA);
 586     else
 587         m_conv = new wxCSConv(charset);
 588 }
 589
 590 #endif // __UNIX__
 591
 592 // ----------------------------------------------------------------------------
 593 // UTF-7
 594 // ----------------------------------------------------------------------------
 595
 596 // Implementation (C) 2004 Fredrik Roubert
 597 //
 598 // Changes to work in streaming mode (C) 2008 Vadim Zeitlin
 599
 600 //
 601 // BASE64 decoding table
 602 //
 603 static const unsigned char utf7unb64[] =
 604 {
 605     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 606     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 607     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 608     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 609     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 610     0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
 611     0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
 612     0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 613     0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
 614     0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
 615     0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
 616     0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
 617     0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
 618     0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
 619     0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
 620     0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
 621     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 622     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 623     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 624     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 625     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 626     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 627     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 628     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 629     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 630     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 631     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 632     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 633     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 634     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 635     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 636     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 637 };
 638
 639 size_t wxMBConvUTF7::ToWChar(wchar_t *dst, size_t dstLen,
 640                              const char *src, size_t srcLen) const
 641 {
 642     DecoderState stateOrig,
 643                 *statePtr;
 644     if ( srcLen == wxNO_LEN )
 645     {
 646         // convert the entire string, up to and including the trailing NUL
 647         srcLen = strlen(src) + 1;
 648
 649         // when working on the entire strings we don't update nor use the shift
 650         // state from the previous call
 651         statePtr = &stateOrig;
 652     }
 653     else // when working with partial strings we do use the shift state
 654     {
 655         statePtr = const_cast<DecoderState *>(&m_stateDecoder);
 656
 657         // also save the old state to be able to rollback to it on error
 658         stateOrig = m_stateDecoder;
 659     }
 660
 661     // but to simplify the code below we use this variable in both cases
 662     DecoderState& state = *statePtr;
 663
 664
 665     // number of characters [which would have been] written to dst [if it were
 666     // not NULL]
 667     size_t len = 0;
 668
 669     const char * const srcEnd = src + srcLen;
 670
 671     while ( (src < srcEnd) && (!dst || (len < dstLen)) )
 672     {
 673         const unsigned char cc = *src++;
 674
 675         if ( state.IsShifted() )
 676         {
 677             const unsigned char dc = utf7unb64[cc];
 678             if ( dc == 0xff )
 679             {
 680                 // end of encoded part, check that nothing was left: there can
 681                 // be up to 4 bits of 0 padding but nothing else (we also need
 682                 // to check isLSB as we count bits modulo 8 while a valid UTF-7
 683                 // encoded sequence must contain an integral number of UTF-16
 684                 // characters)
 685                 if ( state.isLSB || state.bit > 4 ||
 686                         (state.accum & ((1 << state.bit) - 1)) )
 687                 {
 688                     if ( !len )
 689                         state = stateOrig;
 690
 691                     return wxCONV_FAILED;
 692                 }
 693
 694                 state.ToDirect();
 695
 696                 // re-parse this character normally below unless it's '-' which
 697                 // is consumed by the decoder
 698                 if ( cc == '-' )
 699                     continue;
 700             }
 701             else // valid encoded character
 702             {
 703                 // mini base64 decoder: each character is 6 bits
 704                 state.bit += 6;
 705                 state.accum <<= 6;
 706                 state.accum += dc;
 707
 708                 if ( state.bit >= 8 )
 709                 {
 710                     // got the full byte, consume it
 711                     state.bit -= 8;
 712                     unsigned char b = (state.accum >> state.bit) & 0x00ff;
 713
 714                     if ( state.isLSB )
 715                     {
 716                         // we've got the full word, output it
 717                         if ( dst )
 718                             *dst++ = (state.msb << 8) | b;
 719                         len++;
 720                         state.isLSB = false;
 721                     }
 722                     else // MSB
 723                     {
 724                         // just store it while we wait for LSB
 725                         state.msb = b;
 726                         state.isLSB = true;
 727                     }
 728                 }
 729             }
 730         }
 731
 732         if ( state.IsDirect() )
 733         {
 734             // start of an encoded segment?
 735             if ( cc == '+' )
 736             {
 737                 if ( *src == '-' )
 738                 {
 739                     // just the encoded plus sign, don't switch to shifted mode
 740                     if ( dst )
 741                         *dst++ = '+';
 742                     len++;
 743                     src++;
 744                 }
 745                 else if ( utf7unb64[(unsigned)*src] == 0xff )
 746                 {
 747                     // empty encoded chunks are not allowed
 748                     if ( !len )
 749                         state = stateOrig;
 750
 751                     return wxCONV_FAILED;
 752                 }
 753                 else // base-64 encoded chunk follows
 754                 {
 755                     state.ToShifted();
 756                 }
 757             }
 758             else // not '+'
 759             {
 760                 // only printable 7 bit ASCII characters (with the exception of
 761                 // NUL, TAB, CR and LF) can be used directly
 762                 if ( cc >= 0x7f || (cc < ' ' &&
 763                       !(cc == '\0' || cc == '\t' || cc == '\r' || cc == '\n')) )
 764                     return wxCONV_FAILED;
 765
 766                 if ( dst )
 767                     *dst++ = cc;
 768                 len++;
 769             }
 770         }
 771     }
 772
 773     if ( !len )
 774     {
 775         // as we didn't read any characters we should be called with the same
 776         // data (followed by some more new data) again later so don't save our
 777         // state
 778         state = stateOrig;
 779
 780         return wxCONV_FAILED;
 781     }
 782
 783     return len;
 784 }
 785
 786 //
 787 // BASE64 encoding table
 788 //
 789 static const unsigned char utf7enb64[] =
 790 {
 791     'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
 792     'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
 793     'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
 794     'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
 795     'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
 796     'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
 797     'w', 'x', 'y', 'z', '0', '1', '2', '3',
 798     '4', '5', '6', '7', '8', '9', '+', '/'
 799 };
 800
 801 //
 802 // UTF-7 encoding table
 803 //
 804 // 0 - Set D (directly encoded characters)
 805 // 1 - Set O (optional direct characters)
 806 // 2 - whitespace characters (optional)
 807 // 3 - special characters
 808 //
 809 static const unsigned char utf7encode[128] =
 810 {
 811     0, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
 812     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 813     2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
 814     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
 815     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 816     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
 817     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 818     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
 819 };
 820
 821 static inline bool wxIsUTF7Direct(wchar_t wc)
 822 {
 823     return wc < 0x80 && utf7encode[wc] < 1;
 824 }
 825
 826 size_t wxMBConvUTF7::FromWChar(char *dst, size_t dstLen,
 827                                const wchar_t *src, size_t srcLen) const
 828 {
 829     EncoderState stateOrig,
 830                 *statePtr;
 831     if ( srcLen == wxNO_LEN )
 832     {
 833         // we don't apply the stored state when operating on entire strings at
 834         // once
 835         statePtr = &stateOrig;
 836
 837         srcLen = wxWcslen(src) + 1;
 838     }
 839     else // do use the mode we left the output in previously
 840     {
 841         stateOrig = m_stateEncoder;
 842         statePtr = const_cast<EncoderState *>(&m_stateEncoder);
 843     }
 844
 845     EncoderState& state = *statePtr;
 846
 847
 848     size_t len = 0;
 849
 850     const wchar_t * const srcEnd = src + srcLen;
 851     while ( src < srcEnd && (!dst || len < dstLen) )
 852     {
 853         wchar_t cc = *src++;
 854         if ( wxIsUTF7Direct(cc) )
 855         {
 856             if ( state.IsShifted() )
 857             {
 858                 // pad with zeros the last encoded block if necessary
 859                 if ( state.bit )
 860                 {
 861                     if ( dst )
 862                         *dst++ = utf7enb64[((state.accum % 16) << (6 - state.bit)) % 64];
 863                     len++;
 864                 }
 865
 866                 state.ToDirect();
 867
 868                 if ( dst )
 869                     *dst++ = '-';
 870                 len++;
 871             }
 872
 873             if ( dst )
 874                 *dst++ = (char)cc;
 875             len++;
 876         }
 877         else if ( cc == '+' && state.IsDirect() )
 878         {
 879             if ( dst )
 880             {
 881                 *dst++ = '+';
 882                 *dst++ = '-';
 883             }
 884
 885             len += 2;
 886         }
 887 #ifndef WC_UTF16
 888         else if (((wxUint32)cc) > 0xffff)
 889         {
 890             // no surrogate pair generation (yet?)
 891             return wxCONV_FAILED;
 892         }
 893 #endif
 894         else
 895         {
 896             if ( state.IsDirect() )
 897             {
 898                 state.ToShifted();
 899
 900                 if ( dst )
 901                     *dst++ = '+';
 902                 len++;
 903             }
 904
 905             // BASE64 encode string
 906             for ( ;; )
 907             {
 908                 for ( unsigned lsb = 0; lsb < 2; lsb++ )
 909                 {
 910                     state.accum <<= 8;
 911                     state.accum += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
 912
 913                     for (state.bit += 8; state.bit >= 6; )
 914                     {
 915                         state.bit -= 6;
 916                         if ( dst )
 917                             *dst++ = utf7enb64[(state.accum >> state.bit) % 64];
 918                         len++;
 919                     }
 920                 }
 921
 922                 if ( src == srcEnd || wxIsUTF7Direct(cc = *src) )
 923                     break;
 924
 925                 src++;
 926             }
 927         }
 928     }
 929
 930     // we need to restore the original encoder state if we were called just to
 931     // calculate the amount of space needed as we will presumably be called
 932     // again to really convert the data now
 933     if ( !dst )
 934         state = stateOrig;
 935
 936     return len;
 937 }
 938
 939 // ----------------------------------------------------------------------------
 940 // UTF-8
 941 // ----------------------------------------------------------------------------
 942
 943 static const wxUint32 utf8_max[]=
 944     { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
 945
 946 // boundaries of the private use area we use to (temporarily) remap invalid
 947 // characters invalid in a UTF-8 encoded string
 948 const wxUint32 wxUnicodePUA = 0x100000;
 949 const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
 950
 951 // this table gives the length of the UTF-8 encoding from its first character:
 952 const unsigned char tableUtf8Lengths[256] = {
 953     // single-byte sequences (ASCII):
 954     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 00..0F
 955     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 10..1F
 956     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 20..2F
 957     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 30..3F
 958     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 40..4F
 959     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 50..5F
 960     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 60..6F
 961     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 70..7F
 962
 963     // these are invalid:
 964     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 80..8F
 965     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 90..9F
 966     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // A0..AF
 967     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // B0..BF
 968     0, 0,                                            // C0,C1
 969
 970     // two-byte sequences:
 971           2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,  // C2..CF
 972     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,  // D0..DF
 973
 974     // three-byte sequences:
 975     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,  // E0..EF
 976
 977     // four-byte sequences:
 978     4, 4, 4, 4, 4,                                   // F0..F4
 979
 980     // these are invalid again (5- or 6-byte
 981     // sequences and sequences for code points
 982     // above U+10FFFF, as restricted by RFC 3629):
 983                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0   // F5..FF
 984 };
 985
 986 size_t
 987 wxMBConvStrictUTF8::ToWChar(wchar_t *dst, size_t dstLen,
 988                             const char *src, size_t srcLen) const
 989 {
 990     wchar_t *out = dstLen ? dst : NULL;
 991     size_t written = 0;
 992
 993     if ( srcLen == wxNO_LEN )
 994         srcLen = strlen(src) + 1;
 995
 996     for ( const char *p = src; ; p++ )
 997     {
 998         if ( (srcLen == wxNO_LEN ? !*p : !srcLen) )
 999         {
1000             // all done successfully, just add the trailing NULL if we are not
1001             // using explicit length
1002             if ( srcLen == wxNO_LEN )
1003             {
1004                 if ( out )
1005                 {
1006                     if ( !dstLen )
1007                         break;
1008
1009                     *out = L'\0';
1010                 }
1011
1012                 written++;
1013             }
1014
1015             return written;
1016         }
1017
1018         if ( out && !dstLen-- )
1019             break;
1020
1021         wxUint32 code;
1022         unsigned char c = *p;
1023
1024         if ( c < 0x80 )
1025         {
1026             if ( srcLen == 0 ) // the test works for wxNO_LEN too
1027                 break;
1028
1029             if ( srcLen != wxNO_LEN )
1030                 srcLen--;
1031
1032             code = c;
1033         }
1034         else
1035         {
1036             unsigned len = tableUtf8Lengths[c];
1037             if ( !len )
1038                 break;
1039
1040             if ( srcLen < len ) // the test works for wxNO_LEN too
1041                 break;
1042
1043             if ( srcLen != wxNO_LEN )
1044                 srcLen -= len;
1045
1046             //   Char. number range   |        UTF-8 octet sequence
1047             //      (hexadecimal)     |              (binary)
1048             //  ----------------------+----------------------------------------
1049             //  0000 0000 - 0000 007F | 0xxxxxxx
1050             //  0000 0080 - 0000 07FF | 110xxxxx 10xxxxxx
1051             //  0000 0800 - 0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
1052             //  0001 0000 - 0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
1053             //
1054             //  Code point value is stored in bits marked with 'x',
1055             //  lowest-order bit of the value on the right side in the diagram
1056             //  above.                                         (from RFC 3629)
1057
1058             // mask to extract lead byte's value ('x' bits above), by sequence
1059             // length:
1060             static const unsigned char leadValueMask[] = { 0x7F, 0x1F, 0x0F, 0x07 };
1061
1062             // mask and value of lead byte's most significant bits, by length:
1063             static const unsigned char leadMarkerMask[] = { 0x80, 0xE0, 0xF0, 0xF8 };
1064             static const unsigned char leadMarkerVal[] = { 0x00, 0xC0, 0xE0, 0xF0 };
1065
1066             len--; // it's more convenient to work with 0-based length here
1067
1068             // extract the lead byte's value bits:
1069             if ( (c & leadMarkerMask[len]) != leadMarkerVal[len] )
1070                 break;
1071
1072             code = c & leadValueMask[len];
1073
1074             // all remaining bytes, if any, are handled in the same way
1075             // regardless of sequence's length:
1076             for ( ; len; --len )
1077             {
1078                 c = *++p;
1079                 if ( (c & 0xC0) != 0x80 )
1080                     return wxCONV_FAILED;
1081
1082                 code <<= 6;
1083                 code |= c & 0x3F;
1084             }
1085         }
1086
1087 #ifdef WC_UTF16
1088         // cast is ok because wchar_t == wxUint16 if WC_UTF16
1089         if ( encode_utf16(code, (wxUint16 *)out) == 2 )
1090         {
1091             if ( out )
1092                 out++;
1093             written++;
1094         }
1095 #else // !WC_UTF16
1096         if ( out )
1097             *out = code;
1098 #endif // WC_UTF16/!WC_UTF16
1099
1100         if ( out )
1101             out++;
1102
1103         written++;
1104     }
1105
1106     return wxCONV_FAILED;
1107 }
1108
1109 size_t
1110 wxMBConvStrictUTF8::FromWChar(char *dst, size_t dstLen,
1111                               const wchar_t *src, size_t srcLen) const
1112 {
1113     char *out = dstLen ? dst : NULL;
1114     size_t written = 0;
1115
1116     for ( const wchar_t *wp = src; ; wp++ )
1117     {
1118         if ( (srcLen == wxNO_LEN ? !*wp : !srcLen) )
1119         {
1120             // all done successfully, just add the trailing NULL if we are not
1121             // using explicit length
1122             if ( srcLen == wxNO_LEN )
1123             {
1124                 if ( out )
1125                 {
1126                     if ( !dstLen )
1127                         break;
1128
1129                     *out = '\0';
1130                 }
1131
1132                 written++;
1133             }
1134
1135             return written;
1136         }
1137
1138         if ( srcLen != wxNO_LEN )
1139             srcLen--;
1140
1141         wxUint32 code;
1142 #ifdef WC_UTF16
1143         // cast is ok for WC_UTF16
1144         if ( decode_utf16((const wxUint16 *)wp, code) == 2 )
1145         {
1146             // skip the next char too as we decoded a surrogate
1147             wp++;
1148             if ( srcLen != wxNO_LEN )
1149                 srcLen--;
1150         }
1151 #else // wchar_t is UTF-32
1152         code = *wp & 0x7fffffff;
1153 #endif
1154
1155         unsigned len;
1156         if ( code <= 0x7F )
1157         {
1158             len = 1;
1159             if ( out )
1160             {
1161                 if ( dstLen < len )
1162                     break;
1163
1164                 out[0] = (char)code;
1165             }
1166         }
1167         else if ( code <= 0x07FF )
1168         {
1169             len = 2;
1170             if ( out )
1171             {
1172                 if ( dstLen < len )
1173                     break;
1174
1175                 // NB: this line takes 6 least significant bits, encodes them as
1176                 // 10xxxxxx and discards them so that the next byte can be encoded:
1177                 out[1] = 0x80 | (code & 0x3F);  code >>= 6;
1178                 out[0] = 0xC0 | code;
1179             }
1180         }
1181         else if ( code < 0xFFFF )
1182         {
1183             len = 3;
1184             if ( out )
1185             {
1186                 if ( dstLen < len )
1187                     break;
1188
1189                 out[2] = 0x80 | (code & 0x3F);  code >>= 6;
1190                 out[1] = 0x80 | (code & 0x3F);  code >>= 6;
1191                 out[0] = 0xE0 | code;
1192             }
1193         }
1194         else if ( code <= 0x10FFFF )
1195         {
1196             len = 4;
1197             if ( out )
1198             {
1199                 if ( dstLen < len )
1200                     break;
1201
1202                 out[3] = 0x80 | (code & 0x3F);  code >>= 6;
1203                 out[2] = 0x80 | (code & 0x3F);  code >>= 6;
1204                 out[1] = 0x80 | (code & 0x3F);  code >>= 6;
1205                 out[0] = 0xF0 | code;
1206             }
1207         }
1208         else
1209         {
1210             wxFAIL_MSG( wxT("trying to encode undefined Unicode character") );
1211             break;
1212         }
1213
1214         if ( out )
1215         {
1216             out += len;
1217             dstLen -= len;
1218         }
1219
1220         written += len;
1221     }
1222
1223     // we only get here if an error occurs during decoding
1224     return wxCONV_FAILED;
1225 }
1226
1227 size_t wxMBConvUTF8::ToWChar(wchar_t *buf, size_t n,
1228                              const char *psz, size_t srcLen) const
1229 {
1230     if ( m_options == MAP_INVALID_UTF8_NOT )
1231         return wxMBConvStrictUTF8::ToWChar(buf, n, psz, srcLen);
1232
1233     size_t len = 0;
1234
1235     // The length can be either given explicitly or computed implicitly for the
1236     // NUL-terminated strings.
1237     const bool isNulTerminated = srcLen == wxNO_LEN;
1238     while ((isNulTerminated ? *psz : srcLen--) && ((!buf) || (len < n)))
1239     {
1240         const char *opsz = psz;
1241         bool invalid = false;
1242         unsigned char cc = *psz++, fc = cc;
1243         unsigned cnt;
1244         for (cnt = 0; fc & 0x80; cnt++)
1245             fc <<= 1;
1246
1247         if (!cnt)
1248         {
1249             // plain ASCII char
1250             if (buf)
1251                 *buf++ = cc;
1252             len++;
1253
1254             // escape the escape character for octal escapes
1255             if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
1256                     && cc == '\\' && (!buf || len < n))
1257             {
1258                 if (buf)
1259                     *buf++ = cc;
1260                 len++;
1261             }
1262         }
1263         else
1264         {
1265             cnt--;
1266             if (!cnt)
1267             {
1268                 // invalid UTF-8 sequence
1269                 invalid = true;
1270             }
1271             else
1272             {
1273                 unsigned ocnt = cnt - 1;
1274                 wxUint32 res = cc & (0x3f >> cnt);
1275                 while (cnt--)
1276                 {
1277                     cc = *psz;
1278                     if ((cc & 0xC0) != 0x80)
1279                     {
1280                         // invalid UTF-8 sequence
1281                         invalid = true;
1282                         break;
1283                     }
1284
1285                     psz++;
1286                     res = (res << 6) | (cc & 0x3f);
1287                 }
1288
1289                 if (invalid || res <= utf8_max[ocnt])
1290                 {
1291                     // illegal UTF-8 encoding
1292                     invalid = true;
1293                 }
1294                 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
1295                         res >= wxUnicodePUA && res < wxUnicodePUAEnd)
1296                 {
1297                     // if one of our PUA characters turns up externally
1298                     // it must also be treated as an illegal sequence
1299                     // (a bit like you have to escape an escape character)
1300                     invalid = true;
1301                 }
1302                 else
1303                 {
1304 #ifdef WC_UTF16
1305                     // cast is ok because wchar_t == wxUint16 if WC_UTF16
1306                     size_t pa = encode_utf16(res, (wxUint16 *)buf);
1307                     if (pa == wxCONV_FAILED)
1308                     {
1309                         invalid = true;
1310                     }
1311                     else
1312                     {
1313                         if (buf)
1314                             buf += pa;
1315                         len += pa;
1316                     }
1317 #else // !WC_UTF16
1318                     if (buf)
1319                         *buf++ = (wchar_t)res;
1320                     len++;
1321 #endif // WC_UTF16/!WC_UTF16
1322                 }
1323             }
1324
1325             if (invalid)
1326             {
1327                 if (m_options & MAP_INVALID_UTF8_TO_PUA)
1328                 {
1329                     while (opsz < psz && (!buf || len < n))
1330                     {
1331 #ifdef WC_UTF16
1332                         // cast is ok because wchar_t == wxUuint16 if WC_UTF16
1333                         size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
1334                         wxASSERT(pa != wxCONV_FAILED);
1335                         if (buf)
1336                             buf += pa;
1337                         opsz++;
1338                         len += pa;
1339 #else
1340                         if (buf)
1341                             *buf++ = (wchar_t)(wxUnicodePUA + (unsigned char)*opsz);
1342                         opsz++;
1343                         len++;
1344 #endif
1345                     }
1346                 }
1347                 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
1348                 {
1349                     while (opsz < psz && (!buf || len < n))
1350                     {
1351                         if ( buf && len + 3 < n )
1352                         {
1353                             unsigned char on = *opsz;
1354                             *buf++ = L'\\';
1355                             *buf++ = (wchar_t)( L'0' + on / 0100 );
1356                             *buf++ = (wchar_t)( L'0' + (on % 0100) / 010 );
1357                             *buf++ = (wchar_t)( L'0' + on % 010 );
1358                         }
1359
1360                         opsz++;
1361                         len += 4;
1362                     }
1363                 }
1364                 else // MAP_INVALID_UTF8_NOT
1365                 {
1366                     return wxCONV_FAILED;
1367                 }
1368             }
1369         }
1370     }
1371
1372     if ( isNulTerminated )
1373     {
1374         // Add the trailing NUL in this case if we have a large enough buffer.
1375         if ( buf && (len < n) )
1376             *buf = 0;
1377
1378         // And count it in any case.
1379         len++;
1380     }
1381
1382     return len;
1383 }
1384
1385 static inline bool isoctal(wchar_t wch)
1386 {
1387     return L'0' <= wch && wch <= L'7';
1388 }
1389
1390 size_t wxMBConvUTF8::FromWChar(char *buf, size_t n,
1391                                const wchar_t *psz, size_t srcLen) const
1392 {
1393     if ( m_options == MAP_INVALID_UTF8_NOT )
1394         return wxMBConvStrictUTF8::FromWChar(buf, n, psz, srcLen);
1395
1396     size_t len = 0;
1397
1398     while ((srcLen == wxNO_LEN ? *psz : srcLen--) && ((!buf) || (len < n)))
1399     {
1400         wxUint32 cc;
1401
1402 #ifdef WC_UTF16
1403         // cast is ok for WC_UTF16
1404         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
1405         psz += (pa == wxCONV_FAILED) ? 1 : pa;
1406 #else
1407         cc = (*psz++) & 0x7fffffff;
1408 #endif
1409
1410         if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
1411                 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
1412         {
1413             if (buf)
1414                 *buf++ = (char)(cc - wxUnicodePUA);
1415             len++;
1416         }
1417         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
1418                     && cc == L'\\' && psz[0] == L'\\' )
1419         {
1420             if (buf)
1421                 *buf++ = (char)cc;
1422             psz++;
1423             len++;
1424         }
1425         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
1426                     cc == L'\\' &&
1427                         isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
1428         {
1429             if (buf)
1430             {
1431                 *buf++ = (char) ((psz[0] - L'0') * 0100 +
1432                                  (psz[1] - L'0') * 010 +
1433                                  (psz[2] - L'0'));
1434             }
1435
1436             psz += 3;
1437             len++;
1438         }
1439         else
1440         {
1441             unsigned cnt;
1442             for (cnt = 0; cc > utf8_max[cnt]; cnt++)
1443             {
1444             }
1445
1446             if (!cnt)
1447             {
1448                 // plain ASCII char
1449                 if (buf)
1450                     *buf++ = (char) cc;
1451                 len++;
1452             }
1453             else
1454             {
1455                 len += cnt + 1;
1456                 if (buf)
1457                 {
1458                     *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
1459                     while (cnt--)
1460                         *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
1461                 }
1462             }
1463         }
1464     }
1465
1466     if (srcLen == wxNO_LEN && buf && (len < n))
1467         *buf = 0;
1468
1469     return len + 1;
1470 }
1471
1472 // ============================================================================
1473 // UTF-16
1474 // ============================================================================
1475
1476 #ifdef WORDS_BIGENDIAN
1477     #define wxMBConvUTF16straight wxMBConvUTF16BE
1478     #define wxMBConvUTF16swap     wxMBConvUTF16LE
1479 #else
1480     #define wxMBConvUTF16swap     wxMBConvUTF16BE
1481     #define wxMBConvUTF16straight wxMBConvUTF16LE
1482 #endif
1483
1484 /* static */
1485 size_t wxMBConvUTF16Base::GetLength(const char *src, size_t srcLen)
1486 {
1487     if ( srcLen == wxNO_LEN )
1488     {
1489         // count the number of bytes in input, including the trailing NULs
1490         const wxUint16 *inBuff = reinterpret_cast<const wxUint16 *>(src);
1491         for ( srcLen = 1; *inBuff++; srcLen++ )
1492             ;
1493
1494         srcLen *= BYTES_PER_CHAR;
1495     }
1496     else // we already have the length
1497     {
1498         // we can only convert an entire number of UTF-16 characters
1499         if ( srcLen % BYTES_PER_CHAR )
1500             return wxCONV_FAILED;
1501     }
1502
1503     return srcLen;
1504 }
1505
1506 // case when in-memory representation is UTF-16 too
1507 #ifdef WC_UTF16
1508
1509 // ----------------------------------------------------------------------------
1510 // conversions without endianness change
1511 // ----------------------------------------------------------------------------
1512
1513 size_t
1514 wxMBConvUTF16straight::ToWChar(wchar_t *dst, size_t dstLen,
1515                                const char *src, size_t srcLen) const
1516 {
1517     // set up the scene for using memcpy() (which is presumably more efficient
1518     // than copying the bytes one by one)
1519     srcLen = GetLength(src, srcLen);
1520     if ( srcLen == wxNO_LEN )
1521         return wxCONV_FAILED;
1522
1523     const size_t inLen = srcLen / BYTES_PER_CHAR;
1524     if ( dst )
1525     {
1526         if ( dstLen < inLen )
1527             return wxCONV_FAILED;
1528
1529         memcpy(dst, src, srcLen);
1530     }
1531
1532     return inLen;
1533 }
1534
1535 size_t
1536 wxMBConvUTF16straight::FromWChar(char *dst, size_t dstLen,
1537                                  const wchar_t *src, size_t srcLen) const
1538 {
1539     if ( srcLen == wxNO_LEN )
1540         srcLen = wxWcslen(src) + 1;
1541
1542     srcLen *= BYTES_PER_CHAR;
1543
1544     if ( dst )
1545     {
1546         if ( dstLen < srcLen )
1547             return wxCONV_FAILED;
1548
1549         memcpy(dst, src, srcLen);
1550     }
1551
1552     return srcLen;
1553 }
1554
1555 // ----------------------------------------------------------------------------
1556 // endian-reversing conversions
1557 // ----------------------------------------------------------------------------
1558
1559 size_t
1560 wxMBConvUTF16swap::ToWChar(wchar_t *dst, size_t dstLen,
1561                            const char *src, size_t srcLen) const
1562 {
1563     srcLen = GetLength(src, srcLen);
1564     if ( srcLen == wxNO_LEN )
1565         return wxCONV_FAILED;
1566
1567     srcLen /= BYTES_PER_CHAR;
1568
1569     if ( dst )
1570     {
1571         if ( dstLen < srcLen )
1572             return wxCONV_FAILED;
1573
1574         const wxUint16 *inBuff = reinterpret_cast<const wxUint16 *>(src);
1575         for ( size_t n = 0; n < srcLen; n++, inBuff++ )
1576         {
1577             *dst++ = wxUINT16_SWAP_ALWAYS(*inBuff);
1578         }
1579     }
1580
1581     return srcLen;
1582 }
1583
1584 size_t
1585 wxMBConvUTF16swap::FromWChar(char *dst, size_t dstLen,
1586                              const wchar_t *src, size_t srcLen) const
1587 {
1588     if ( srcLen == wxNO_LEN )
1589         srcLen = wxWcslen(src) + 1;
1590
1591     srcLen *= BYTES_PER_CHAR;
1592
1593     if ( dst )
1594     {
1595         if ( dstLen < srcLen )
1596             return wxCONV_FAILED;
1597
1598         wxUint16 *outBuff = reinterpret_cast<wxUint16 *>(dst);
1599         for ( size_t n = 0; n < srcLen; n += BYTES_PER_CHAR, src++ )
1600         {
1601             *outBuff++ = wxUINT16_SWAP_ALWAYS(*src);
1602         }
1603     }
1604
1605     return srcLen;
1606 }
1607
1608 #else // !WC_UTF16: wchar_t is UTF-32
1609
1610 // ----------------------------------------------------------------------------
1611 // conversions without endianness change
1612 // ----------------------------------------------------------------------------
1613
1614 size_t
1615 wxMBConvUTF16straight::ToWChar(wchar_t *dst, size_t dstLen,
1616                                const char *src, size_t srcLen) const
1617 {
1618     srcLen = GetLength(src, srcLen);
1619     if ( srcLen == wxNO_LEN )
1620         return wxCONV_FAILED;
1621
1622     const size_t inLen = srcLen / BYTES_PER_CHAR;
1623     if ( !dst )
1624     {
1625         // optimization: return maximal space which could be needed for this
1626         // string even if the real size could be smaller if the buffer contains
1627         // any surrogates
1628         return inLen;
1629     }
1630
1631     size_t outLen = 0;
1632     const wxUint16 *inBuff = reinterpret_cast<const wxUint16 *>(src);
1633     for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; )
1634     {
1635         const wxUint32 ch = wxDecodeSurrogate(&inBuff);
1636         if ( !inBuff )
1637             return wxCONV_FAILED;
1638
1639         if ( ++outLen > dstLen )
1640             return wxCONV_FAILED;
1641
1642         *dst++ = ch;
1643     }
1644
1645
1646     return outLen;
1647 }
1648
1649 size_t
1650 wxMBConvUTF16straight::FromWChar(char *dst, size_t dstLen,
1651                                  const wchar_t *src, size_t srcLen) const
1652 {
1653     if ( srcLen == wxNO_LEN )
1654         srcLen = wxWcslen(src) + 1;
1655
1656     size_t outLen = 0;
1657     wxUint16 *outBuff = reinterpret_cast<wxUint16 *>(dst);
1658     for ( size_t n = 0; n < srcLen; n++ )
1659     {
1660         wxUint16 cc[2] = { 0 };
1661         const size_t numChars = encode_utf16(*src++, cc);
1662         if ( numChars == wxCONV_FAILED )
1663             return wxCONV_FAILED;
1664
1665         outLen += numChars * BYTES_PER_CHAR;
1666         if ( outBuff )
1667         {
1668             if ( outLen > dstLen )
1669                 return wxCONV_FAILED;
1670
1671             *outBuff++ = cc[0];
1672             if ( numChars == 2 )
1673             {
1674                 // second character of a surrogate
1675                 *outBuff++ = cc[1];
1676             }
1677         }
1678     }
1679
1680     return outLen;
1681 }
1682
1683 // ----------------------------------------------------------------------------
1684 // endian-reversing conversions
1685 // ----------------------------------------------------------------------------
1686
1687 size_t
1688 wxMBConvUTF16swap::ToWChar(wchar_t *dst, size_t dstLen,
1689                            const char *src, size_t srcLen) const
1690 {
1691     srcLen = GetLength(src, srcLen);
1692     if ( srcLen == wxNO_LEN )
1693         return wxCONV_FAILED;
1694
1695     const size_t inLen = srcLen / BYTES_PER_CHAR;
1696     if ( !dst )
1697     {
1698         // optimization: return maximal space which could be needed for this
1699         // string even if the real size could be smaller if the buffer contains
1700         // any surrogates
1701         return inLen;
1702     }
1703
1704     size_t outLen = 0;
1705     const wxUint16 *inBuff = reinterpret_cast<const wxUint16 *>(src);
1706     for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; )
1707     {
1708         wxUint32 ch;
1709         wxUint16 tmp[2];
1710
1711         tmp[0] = wxUINT16_SWAP_ALWAYS(*inBuff);
1712         inBuff++;
1713         tmp[1] = wxUINT16_SWAP_ALWAYS(*inBuff);
1714
1715         const size_t numChars = decode_utf16(tmp, ch);
1716         if ( numChars == wxCONV_FAILED )
1717             return wxCONV_FAILED;
1718
1719         if ( numChars == 2 )
1720             inBuff++;
1721
1722         if ( ++outLen > dstLen )
1723             return wxCONV_FAILED;
1724
1725         *dst++ = ch;
1726     }
1727
1728
1729     return outLen;
1730 }
1731
1732 size_t
1733 wxMBConvUTF16swap::FromWChar(char *dst, size_t dstLen,
1734                              const wchar_t *src, size_t srcLen) const
1735 {
1736     if ( srcLen == wxNO_LEN )
1737         srcLen = wxWcslen(src) + 1;
1738
1739     size_t outLen = 0;
1740     wxUint16 *outBuff = reinterpret_cast<wxUint16 *>(dst);
1741     for ( const wchar_t *srcEnd = src + srcLen; src < srcEnd; src++ )
1742     {
1743         wxUint16 cc[2] = { 0 };
1744         const size_t numChars = encode_utf16(*src, cc);
1745         if ( numChars == wxCONV_FAILED )
1746             return wxCONV_FAILED;
1747
1748         outLen += numChars * BYTES_PER_CHAR;
1749         if ( outBuff )
1750         {
1751             if ( outLen > dstLen )
1752                 return wxCONV_FAILED;
1753
1754             *outBuff++ = wxUINT16_SWAP_ALWAYS(cc[0]);
1755             if ( numChars == 2 )
1756             {
1757                 // second character of a surrogate
1758                 *outBuff++ = wxUINT16_SWAP_ALWAYS(cc[1]);
1759             }
1760         }
1761     }
1762
1763     return outLen;
1764 }
1765
1766 #endif // WC_UTF16/!WC_UTF16
1767
1768
1769 // ============================================================================
1770 // UTF-32
1771 // ============================================================================
1772
1773 #ifdef WORDS_BIGENDIAN
1774     #define wxMBConvUTF32straight  wxMBConvUTF32BE
1775     #define wxMBConvUTF32swap      wxMBConvUTF32LE
1776 #else
1777     #define wxMBConvUTF32swap      wxMBConvUTF32BE
1778     #define wxMBConvUTF32straight  wxMBConvUTF32LE
1779 #endif
1780
1781
1782 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1783 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1784
1785 /* static */
1786 size_t wxMBConvUTF32Base::GetLength(const char *src, size_t srcLen)
1787 {
1788     if ( srcLen == wxNO_LEN )
1789     {
1790         // count the number of bytes in input, including the trailing NULs
1791         const wxUint32 *inBuff = reinterpret_cast<const wxUint32 *>(src);
1792         for ( srcLen = 1; *inBuff++; srcLen++ )
1793             ;
1794
1795         srcLen *= BYTES_PER_CHAR;
1796     }
1797     else // we already have the length
1798     {
1799         // we can only convert an entire number of UTF-32 characters
1800         if ( srcLen % BYTES_PER_CHAR )
1801             return wxCONV_FAILED;
1802     }
1803
1804     return srcLen;
1805 }
1806
1807 // case when in-memory representation is UTF-16
1808 #ifdef WC_UTF16
1809
1810 // ----------------------------------------------------------------------------
1811 // conversions without endianness change
1812 // ----------------------------------------------------------------------------
1813
1814 size_t
1815 wxMBConvUTF32straight::ToWChar(wchar_t *dst, size_t dstLen,
1816                                const char *src, size_t srcLen) const
1817 {
1818     srcLen = GetLength(src, srcLen);
1819     if ( srcLen == wxNO_LEN )
1820         return wxCONV_FAILED;
1821
1822     const wxUint32 *inBuff = reinterpret_cast<const wxUint32 *>(src);
1823     const size_t inLen = srcLen / BYTES_PER_CHAR;
1824     size_t outLen = 0;
1825     for ( size_t n = 0; n < inLen; n++ )
1826     {
1827         wxUint16 cc[2] = { 0 };
1828         const size_t numChars = encode_utf16(*inBuff++, cc);
1829         if ( numChars == wxCONV_FAILED )
1830             return wxCONV_FAILED;
1831
1832         outLen += numChars;
1833         if ( dst )
1834         {
1835             if ( outLen > dstLen )
1836                 return wxCONV_FAILED;
1837
1838             *dst++ = cc[0];
1839             if ( numChars == 2 )
1840             {
1841                 // second character of a surrogate
1842                 *dst++ = cc[1];
1843             }
1844         }
1845     }
1846
1847     return outLen;
1848 }
1849
1850 size_t
1851 wxMBConvUTF32straight::FromWChar(char *dst, size_t dstLen,
1852                                  const wchar_t *src, size_t srcLen) const
1853 {
1854     if ( srcLen == wxNO_LEN )
1855         srcLen = wxWcslen(src) + 1;
1856
1857     if ( !dst )
1858     {
1859         // optimization: return maximal space which could be needed for this
1860         // string instead of the exact amount which could be less if there are
1861         // any surrogates in the input
1862         //
1863         // we consider that surrogates are rare enough to make it worthwhile to
1864         // avoid running the loop below at the cost of slightly extra memory
1865         // consumption
1866         return srcLen * BYTES_PER_CHAR;
1867     }
1868
1869     wxUint32 *outBuff = reinterpret_cast<wxUint32 *>(dst);
1870     size_t outLen = 0;
1871     for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; )
1872     {
1873         const wxUint32 ch = wxDecodeSurrogate(&src);
1874         if ( !src )
1875             return wxCONV_FAILED;
1876
1877         outLen += BYTES_PER_CHAR;
1878
1879         if ( outLen > dstLen )
1880             return wxCONV_FAILED;
1881
1882         *outBuff++ = ch;
1883     }
1884
1885     return outLen;
1886 }
1887
1888 // ----------------------------------------------------------------------------
1889 // endian-reversing conversions
1890 // ----------------------------------------------------------------------------
1891
1892 size_t
1893 wxMBConvUTF32swap::ToWChar(wchar_t *dst, size_t dstLen,
1894                            const char *src, size_t srcLen) const
1895 {
1896     srcLen = GetLength(src, srcLen);
1897     if ( srcLen == wxNO_LEN )
1898         return wxCONV_FAILED;
1899
1900     const wxUint32 *inBuff = reinterpret_cast<const wxUint32 *>(src);
1901     const size_t inLen = srcLen / BYTES_PER_CHAR;
1902     size_t outLen = 0;
1903     for ( size_t n = 0; n < inLen; n++, inBuff++ )
1904     {
1905         wxUint16 cc[2] = { 0 };
1906         const size_t numChars = encode_utf16(wxUINT32_SWAP_ALWAYS(*inBuff), cc);
1907         if ( numChars == wxCONV_FAILED )
1908             return wxCONV_FAILED;
1909
1910         outLen += numChars;
1911         if ( dst )
1912         {
1913             if ( outLen > dstLen )
1914                 return wxCONV_FAILED;
1915
1916             *dst++ = cc[0];
1917             if ( numChars == 2 )
1918             {
1919                 // second character of a surrogate
1920                 *dst++ = cc[1];
1921             }
1922         }
1923     }
1924
1925     return outLen;
1926 }
1927
1928 size_t
1929 wxMBConvUTF32swap::FromWChar(char *dst, size_t dstLen,
1930                              const wchar_t *src, size_t srcLen) const
1931 {
1932     if ( srcLen == wxNO_LEN )
1933         srcLen = wxWcslen(src) + 1;
1934
1935     if ( !dst )
1936     {
1937         // optimization: return maximal space which could be needed for this
1938         // string instead of the exact amount which could be less if there are
1939         // any surrogates in the input
1940         //
1941         // we consider that surrogates are rare enough to make it worthwhile to
1942         // avoid running the loop below at the cost of slightly extra memory
1943         // consumption
1944         return srcLen*BYTES_PER_CHAR;
1945     }
1946
1947     wxUint32 *outBuff = reinterpret_cast<wxUint32 *>(dst);
1948     size_t outLen = 0;
1949     for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; )
1950     {
1951         const wxUint32 ch = wxDecodeSurrogate(&src);
1952         if ( !src )
1953             return wxCONV_FAILED;
1954
1955         outLen += BYTES_PER_CHAR;
1956
1957         if ( outLen > dstLen )
1958             return wxCONV_FAILED;
1959
1960         *outBuff++ = wxUINT32_SWAP_ALWAYS(ch);
1961     }
1962
1963     return outLen;
1964 }
1965
1966 #else // !WC_UTF16: wchar_t is UTF-32
1967
1968 // ----------------------------------------------------------------------------
1969 // conversions without endianness change
1970 // ----------------------------------------------------------------------------
1971
1972 size_t
1973 wxMBConvUTF32straight::ToWChar(wchar_t *dst, size_t dstLen,
1974                                const char *src, size_t srcLen) const
1975 {
1976     // use memcpy() as it should be much faster than hand-written loop
1977     srcLen = GetLength(src, srcLen);
1978     if ( srcLen == wxNO_LEN )
1979         return wxCONV_FAILED;
1980
1981     const size_t inLen = srcLen/BYTES_PER_CHAR;
1982     if ( dst )
1983     {
1984         if ( dstLen < inLen )
1985             return wxCONV_FAILED;
1986
1987         memcpy(dst, src, srcLen);
1988     }
1989
1990     return inLen;
1991 }
1992
1993 size_t
1994 wxMBConvUTF32straight::FromWChar(char *dst, size_t dstLen,
1995                                  const wchar_t *src, size_t srcLen) const
1996 {
1997     if ( srcLen == wxNO_LEN )
1998         srcLen = wxWcslen(src) + 1;
1999
2000     srcLen *= BYTES_PER_CHAR;
2001
2002     if ( dst )
2003     {
2004         if ( dstLen < srcLen )
2005             return wxCONV_FAILED;
2006
2007         memcpy(dst, src, srcLen);
2008     }
2009
2010     return srcLen;
2011 }
2012
2013 // ----------------------------------------------------------------------------
2014 // endian-reversing conversions
2015 // ----------------------------------------------------------------------------
2016
2017 size_t
2018 wxMBConvUTF32swap::ToWChar(wchar_t *dst, size_t dstLen,
2019                            const char *src, size_t srcLen) const
2020 {
2021     srcLen = GetLength(src, srcLen);
2022     if ( srcLen == wxNO_LEN )
2023         return wxCONV_FAILED;
2024
2025     srcLen /= BYTES_PER_CHAR;
2026
2027     if ( dst )
2028     {
2029         if ( dstLen < srcLen )
2030             return wxCONV_FAILED;
2031
2032         const wxUint32 *inBuff = reinterpret_cast<const wxUint32 *>(src);
2033         for ( size_t n = 0; n < srcLen; n++, inBuff++ )
2034         {
2035             *dst++ = wxUINT32_SWAP_ALWAYS(*inBuff);
2036         }
2037     }
2038
2039     return srcLen;
2040 }
2041
2042 size_t
2043 wxMBConvUTF32swap::FromWChar(char *dst, size_t dstLen,
2044                              const wchar_t *src, size_t srcLen) const
2045 {
2046     if ( srcLen == wxNO_LEN )
2047         srcLen = wxWcslen(src) + 1;
2048
2049     srcLen *= BYTES_PER_CHAR;
2050
2051     if ( dst )
2052     {
2053         if ( dstLen < srcLen )
2054             return wxCONV_FAILED;
2055
2056         wxUint32 *outBuff = reinterpret_cast<wxUint32 *>(dst);
2057         for ( size_t n = 0; n < srcLen; n += BYTES_PER_CHAR, src++ )
2058         {
2059             *outBuff++ = wxUINT32_SWAP_ALWAYS(*src);
2060         }
2061     }
2062
2063     return srcLen;
2064 }
2065
2066 #endif // WC_UTF16/!WC_UTF16
2067
2068
2069 // ============================================================================
2070 // The classes doing conversion using the iconv_xxx() functions
2071 // ============================================================================
2072
2073 #ifdef HAVE_ICONV
2074
2075 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
2076 //     E2BIG if output buffer is _exactly_ as big as needed. Such case is
2077 //     (unless there's yet another bug in glibc) the only case when iconv()
2078 //     returns with (size_t)-1 (which means error) and says there are 0 bytes
2079 //     left in the input buffer -- when _real_ error occurs,
2080 //     bytes-left-in-input buffer is non-zero. Hence, this alternative test for
2081 //     iconv() failure.
2082 //     [This bug does not appear in glibc 2.2.]
2083 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
2084 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
2085                                      (errno != E2BIG || bufLeft != 0))
2086 #else
2087 #define ICONV_FAILED(cres, bufLeft)  (cres == (size_t)-1)
2088 #endif
2089
2090 #define ICONV_CHAR_CAST(x)  ((ICONV_CONST char **)(x))
2091
2092 #define ICONV_T_INVALID ((iconv_t)-1)
2093
2094 #if SIZEOF_WCHAR_T == 4
2095     #define WC_BSWAP    wxUINT32_SWAP_ALWAYS
2096     #define WC_ENC      wxFONTENCODING_UTF32
2097 #elif SIZEOF_WCHAR_T == 2
2098     #define WC_BSWAP    wxUINT16_SWAP_ALWAYS
2099     #define WC_ENC      wxFONTENCODING_UTF16
2100 #else // sizeof(wchar_t) != 2 nor 4
2101     // does this ever happen?
2102     #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
2103 #endif
2104
2105 // ----------------------------------------------------------------------------
2106 // wxMBConv_iconv: encapsulates an iconv character set
2107 // ----------------------------------------------------------------------------
2108
2109 class wxMBConv_iconv : public wxMBConv
2110 {
2111 public:
2112     wxMBConv_iconv(const char *name);
2113     virtual ~wxMBConv_iconv();
2114
2115     // implement base class virtual methods
2116     virtual size_t ToWChar(wchar_t *dst, size_t dstLen,
2117                            const char *src, size_t srcLen = wxNO_LEN) const;
2118     virtual size_t FromWChar(char *dst, size_t dstLen,
2119                              const wchar_t *src, size_t srcLen = wxNO_LEN) const;
2120     virtual size_t GetMBNulLen() const;
2121
2122 #if wxUSE_UNICODE_UTF8
2123     virtual bool IsUTF8() const;
2124 #endif
2125
2126     virtual wxMBConv *Clone() const
2127     {
2128         wxMBConv_iconv *p = new wxMBConv_iconv(m_name);
2129         p->m_minMBCharWidth = m_minMBCharWidth;
2130         return p;
2131     }
2132
2133     bool IsOk() const
2134         { return (m2w != ICONV_T_INVALID) && (w2m != ICONV_T_INVALID); }
2135
2136 protected:
2137     // the iconv handlers used to translate from multibyte
2138     // to wide char and in the other direction
2139     iconv_t m2w,
2140             w2m;
2141
2142 #if wxUSE_THREADS
2143     // guards access to m2w and w2m objects
2144     wxMutex m_iconvMutex;
2145 #endif
2146
2147 private:
2148     // the name (for iconv_open()) of a wide char charset -- if none is
2149     // available on this machine, it will remain NULL
2150     static wxString ms_wcCharsetName;
2151
2152     // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
2153     // different endian-ness than the native one
2154     static bool ms_wcNeedsSwap;
2155
2156
2157     // name of the encoding handled by this conversion
2158     const char *m_name;
2159
2160     // cached result of GetMBNulLen(); set to 0 meaning "unknown"
2161     // initially
2162     size_t m_minMBCharWidth;
2163 };
2164
2165 // make the constructor available for unit testing
2166 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const char* name )
2167 {
2168     wxMBConv_iconv* result = new wxMBConv_iconv( name );
2169     if ( !result->IsOk() )
2170     {
2171         delete result;
2172         return 0;
2173     }
2174
2175     return result;
2176 }
2177
2178 wxString wxMBConv_iconv::ms_wcCharsetName;
2179 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
2180
2181 wxMBConv_iconv::wxMBConv_iconv(const char *name)
2182               : m_name(wxStrdup(name))
2183 {
2184     m_minMBCharWidth = 0;
2185
2186     // check for charset that represents wchar_t:
2187     if ( ms_wcCharsetName.empty() )
2188     {
2189         wxLogTrace(TRACE_STRCONV, wxT("Looking for wide char codeset:"));
2190
2191 #if wxUSE_FONTMAP
2192         const wxChar *const *names = wxFontMapperBase::GetAllEncodingNames(WC_ENC);
2193 #else // !wxUSE_FONTMAP
2194         static const wxChar *const names_static[] =
2195         {
2196 #if SIZEOF_WCHAR_T == 4
2197             wxT("UCS-4"),
2198 #elif SIZEOF_WCHAR_T == 2
2199             wxT("UCS-2"),
2200 #endif
2201             NULL
2202         };
2203         const wxChar *const *names = names_static;
2204 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2205
2206         for ( ; *names && ms_wcCharsetName.empty(); ++names )
2207         {
2208             const wxString nameCS(*names);
2209
2210             // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
2211             wxString nameXE(nameCS);
2212
2213 #ifdef WORDS_BIGENDIAN
2214                 nameXE += wxT("BE");
2215 #else // little endian
2216                 nameXE += wxT("LE");
2217 #endif
2218
2219             wxLogTrace(TRACE_STRCONV, wxT("  trying charset \"%s\""),
2220                        nameXE.c_str());
2221
2222             m2w = iconv_open(nameXE.ToAscii(), name);
2223             if ( m2w == ICONV_T_INVALID )
2224             {
2225                 // try charset w/o bytesex info (e.g. "UCS4")
2226                 wxLogTrace(TRACE_STRCONV, wxT("  trying charset \"%s\""),
2227                            nameCS.c_str());
2228                 m2w = iconv_open(nameCS.ToAscii(), name);
2229
2230                 // and check for bytesex ourselves:
2231                 if ( m2w != ICONV_T_INVALID )
2232                 {
2233                     char    buf[2], *bufPtr;
2234                     wchar_t wbuf[2];
2235                     size_t  insz, outsz;
2236                     size_t  res;
2237
2238                     buf[0] = 'A';
2239                     buf[1] = 0;
2240                     wbuf[0] = 0;
2241                     insz = 2;
2242                     outsz = SIZEOF_WCHAR_T * 2;
2243                     char* wbufPtr = (char*)wbuf;
2244                     bufPtr = buf;
2245
2246                     res = iconv(
2247                         m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
2248                         &wbufPtr, &outsz);
2249
2250                     if (ICONV_FAILED(res, insz))
2251                     {
2252                         wxLogLastError(wxT("iconv"));
2253                         wxLogError(_("Conversion to charset '%s' doesn't work."),
2254                                    nameCS.c_str());
2255                     }
2256                     else // ok, can convert to this encoding, remember it
2257                     {
2258                         ms_wcCharsetName = nameCS;
2259                         ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
2260                     }
2261                 }
2262             }
2263             else // use charset not requiring byte swapping
2264             {
2265                 ms_wcCharsetName = nameXE;
2266             }
2267         }
2268
2269         wxLogTrace(TRACE_STRCONV,
2270                    wxT("iconv wchar_t charset is \"%s\"%s"),
2271                    ms_wcCharsetName.empty() ? wxString("<none>")
2272                                             : ms_wcCharsetName,
2273                    ms_wcNeedsSwap ? wxT(" (needs swap)")
2274                                   : wxT(""));
2275     }
2276     else // we already have ms_wcCharsetName
2277     {
2278         m2w = iconv_open(ms_wcCharsetName.ToAscii(), name);
2279     }
2280
2281     if ( ms_wcCharsetName.empty() )
2282     {
2283         w2m = ICONV_T_INVALID;
2284     }
2285     else
2286     {
2287         w2m = iconv_open(name, ms_wcCharsetName.ToAscii());
2288         if ( w2m == ICONV_T_INVALID )
2289         {
2290             wxLogTrace(TRACE_STRCONV,
2291                        wxT("\"%s\" -> \"%s\" works but not the converse!?"),
2292                        ms_wcCharsetName.c_str(), name);
2293         }
2294     }
2295 }
2296
2297 wxMBConv_iconv::~wxMBConv_iconv()
2298 {
2299     free(const_cast<char *>(m_name));
2300
2301     if ( m2w != ICONV_T_INVALID )
2302         iconv_close(m2w);
2303     if ( w2m != ICONV_T_INVALID )
2304         iconv_close(w2m);
2305 }
2306
2307 size_t
2308 wxMBConv_iconv::ToWChar(wchar_t *dst, size_t dstLen,
2309                         const char *src, size_t srcLen) const
2310 {
2311     if ( srcLen == wxNO_LEN )
2312     {
2313         // find the string length: notice that must be done differently for
2314         // NUL-terminated strings and UTF-16/32 which are terminated with 2/4
2315         // consecutive NULs
2316         const size_t nulLen = GetMBNulLen();
2317         switch ( nulLen )
2318         {
2319             default:
2320                 return wxCONV_FAILED;
2321
2322             case 1:
2323                 srcLen = strlen(src); // arguably more optimized than our version
2324                 break;
2325
2326             case 2:
2327             case 4:
2328                 // for UTF-16/32 not only we need to have 2/4 consecutive NULs
2329                 // but they also have to start at character boundary and not
2330                 // span two adjacent characters
2331                 const char *p;
2332                 for ( p = src; NotAllNULs(p, nulLen); p += nulLen )
2333                     ;
2334                 srcLen = p - src;
2335                 break;
2336         }
2337
2338         // when we're determining the length of the string ourselves we count
2339         // the terminating NUL(s) as part of it and always NUL-terminate the
2340         // output
2341         srcLen += nulLen;
2342     }
2343
2344     // we express length in the number of (wide) characters but iconv always
2345     // counts buffer sizes it in bytes
2346     dstLen *= SIZEOF_WCHAR_T;
2347
2348 #if wxUSE_THREADS
2349     // NB: iconv() is MT-safe, but each thread must use its own iconv_t handle.
2350     //     Unfortunately there are a couple of global wxCSConv objects such as
2351     //     wxConvLocal that are used all over wx code, so we have to make sure
2352     //     the handle is used by at most one thread at the time. Otherwise
2353     //     only a few wx classes would be safe to use from non-main threads
2354     //     as MB<->WC conversion would fail "randomly".
2355     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
2356 #endif // wxUSE_THREADS
2357
2358     size_t res, cres;
2359     const char *pszPtr = src;
2360
2361     if ( dst )
2362     {
2363         char* bufPtr = (char*)dst;
2364
2365         // have destination buffer, convert there
2366         size_t dstLenOrig = dstLen;
2367         cres = iconv(m2w,
2368                      ICONV_CHAR_CAST(&pszPtr), &srcLen,
2369                      &bufPtr, &dstLen);
2370
2371         // convert the number of bytes converted as returned by iconv to the
2372         // number of (wide) characters converted that we need
2373         res = (dstLenOrig - dstLen) / SIZEOF_WCHAR_T;
2374
2375         if (ms_wcNeedsSwap)
2376         {
2377             // convert to native endianness
2378             for ( unsigned i = 0; i < res; i++ )
2379                 dst[i] = WC_BSWAP(dst[i]);
2380         }
2381     }
2382     else // no destination buffer
2383     {
2384         // convert using temp buffer to calculate the size of the buffer needed
2385         wchar_t tbuf[256];
2386         res = 0;
2387
2388         do
2389         {
2390             char* bufPtr = (char*)tbuf;
2391             dstLen = 8 * SIZEOF_WCHAR_T;
2392
2393             cres = iconv(m2w,
2394                          ICONV_CHAR_CAST(&pszPtr), &srcLen,
2395                          &bufPtr, &dstLen );
2396
2397             res += 8 - (dstLen / SIZEOF_WCHAR_T);
2398         }
2399         while ((cres == (size_t)-1) && (errno == E2BIG));
2400     }
2401
2402     if (ICONV_FAILED(cres, srcLen))
2403     {
2404         //VS: it is ok if iconv fails, hence trace only
2405         wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
2406         return wxCONV_FAILED;
2407     }
2408
2409     return res;
2410 }
2411
2412 size_t wxMBConv_iconv::FromWChar(char *dst, size_t dstLen,
2413                                  const wchar_t *src, size_t srcLen) const
2414 {
2415 #if wxUSE_THREADS
2416     // NB: explained in MB2WC
2417     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
2418 #endif
2419
2420     if ( srcLen == wxNO_LEN )
2421         srcLen = wxWcslen(src) + 1;
2422
2423     size_t inbuflen = srcLen * SIZEOF_WCHAR_T;
2424     size_t outbuflen = dstLen;
2425     size_t res, cres;
2426
2427     wchar_t *tmpbuf = 0;
2428
2429     if (ms_wcNeedsSwap)
2430     {
2431         // need to copy to temp buffer to switch endianness
2432         // (doing WC_BSWAP twice on the original buffer won't work, as it
2433         //  could be in read-only memory, or be accessed in some other thread)
2434         tmpbuf = (wchar_t *)malloc(inbuflen);
2435         for ( size_t i = 0; i < srcLen; i++ )
2436             tmpbuf[i] = WC_BSWAP(src[i]);
2437
2438         src = tmpbuf;
2439     }
2440
2441     char* inbuf = (char*)src;
2442     if ( dst )
2443     {
2444         // have destination buffer, convert there
2445         cres = iconv(w2m, ICONV_CHAR_CAST(&inbuf), &inbuflen, &dst, &outbuflen);
2446
2447         res = dstLen - outbuflen;
2448     }
2449     else // no destination buffer
2450     {
2451         // convert using temp buffer to calculate the size of the buffer needed
2452         char tbuf[256];
2453         res = 0;
2454         do
2455         {
2456             dst = tbuf;
2457             outbuflen = WXSIZEOF(tbuf);
2458
2459             cres = iconv(w2m, ICONV_CHAR_CAST(&inbuf), &inbuflen, &dst, &outbuflen);
2460
2461             res += WXSIZEOF(tbuf) - outbuflen;
2462         }
2463         while ((cres == (size_t)-1) && (errno == E2BIG));
2464     }
2465
2466     if (ms_wcNeedsSwap)
2467     {
2468         free(tmpbuf);
2469     }
2470
2471     if (ICONV_FAILED(cres, inbuflen))
2472     {
2473         wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
2474         return wxCONV_FAILED;
2475     }
2476
2477     return res;
2478 }
2479
2480 size_t wxMBConv_iconv::GetMBNulLen() const
2481 {
2482     if ( m_minMBCharWidth == 0 )
2483     {
2484         wxMBConv_iconv * const self = wxConstCast(this, wxMBConv_iconv);
2485
2486 #if wxUSE_THREADS
2487         // NB: explained in MB2WC
2488         wxMutexLocker lock(self->m_iconvMutex);
2489 #endif
2490
2491         const wchar_t *wnul = L"";
2492         char buf[8]; // should be enough for NUL in any encoding
2493         size_t inLen = sizeof(wchar_t),
2494                outLen = WXSIZEOF(buf);
2495         char *inBuff = (char *)wnul;
2496         char *outBuff = buf;
2497         if ( iconv(w2m, ICONV_CHAR_CAST(&inBuff), &inLen, &outBuff, &outLen) == (size_t)-1 )
2498         {
2499             self->m_minMBCharWidth = (size_t)-1;
2500         }
2501         else // ok
2502         {
2503             self->m_minMBCharWidth = outBuff - buf;
2504         }
2505     }
2506
2507     return m_minMBCharWidth;
2508 }
2509
2510 #if wxUSE_UNICODE_UTF8
2511 bool wxMBConv_iconv::IsUTF8() const
2512 {
2513     return wxStricmp(m_name, "UTF-8") == 0 ||
2514            wxStricmp(m_name, "UTF8") == 0;
2515 }
2516 #endif
2517
2518 #endif // HAVE_ICONV
2519
2520
2521 // ============================================================================
2522 // Win32 conversion classes
2523 // ============================================================================
2524
2525 #ifdef wxHAVE_WIN32_MB2WC
2526
2527 // from utils.cpp
2528 #if wxUSE_FONTMAP
2529 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const char *charset);
2530 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
2531 #endif
2532
2533 class wxMBConv_win32 : public wxMBConv
2534 {
2535 public:
2536     wxMBConv_win32()
2537     {
2538         m_CodePage = CP_ACP;
2539         m_minMBCharWidth = 0;
2540     }
2541
2542     wxMBConv_win32(const wxMBConv_win32& conv)
2543         : wxMBConv()
2544     {
2545         m_CodePage = conv.m_CodePage;
2546         m_minMBCharWidth = conv.m_minMBCharWidth;
2547     }
2548
2549 #if wxUSE_FONTMAP
2550     wxMBConv_win32(const char* name)
2551     {
2552         m_CodePage = wxCharsetToCodepage(name);
2553         m_minMBCharWidth = 0;
2554     }
2555
2556     wxMBConv_win32(wxFontEncoding encoding)
2557     {
2558         m_CodePage = wxEncodingToCodepage(encoding);
2559         m_minMBCharWidth = 0;
2560     }
2561 #endif // wxUSE_FONTMAP
2562
2563     virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2564     {
2565         // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
2566         // the behaviour is not compatible with the Unix version (using iconv)
2567         // and break the library itself, e.g. wxTextInputStream::NextChar()
2568         // wouldn't work if reading an incomplete MB char didn't result in an
2569         // error
2570         //
2571         // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or
2572         // Win XP or newer and it is not supported for UTF-[78] so we always
2573         // use our own conversions in this case. See
2574         //     http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx
2575         //     http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp
2576         if ( m_CodePage == CP_UTF8 )
2577         {
2578             return wxMBConvUTF8().MB2WC(buf, psz, n);
2579         }
2580
2581         if ( m_CodePage == CP_UTF7 )
2582         {
2583             return wxMBConvUTF7().MB2WC(buf, psz, n);
2584         }
2585
2586         int flags = 0;
2587         if ( (m_CodePage < 50000 && m_CodePage != CP_SYMBOL) &&
2588                 IsAtLeastWin2kSP4() )
2589         {
2590             flags = MB_ERR_INVALID_CHARS;
2591         }
2592
2593         const size_t len = ::MultiByteToWideChar
2594                              (
2595                                 m_CodePage,     // code page
2596                                 flags,          // flags: fall on error
2597                                 psz,            // input string
2598                                 -1,             // its length (NUL-terminated)
2599                                 buf,            // output string
2600                                 buf ? n : 0     // size of output buffer
2601                              );
2602         if ( !len )
2603         {
2604             // function totally failed
2605             return wxCONV_FAILED;
2606         }
2607
2608         // if we were really converting and didn't use MB_ERR_INVALID_CHARS,
2609         // check if we succeeded, by doing a double trip:
2610         if ( !flags && buf )
2611         {
2612             const size_t mbLen = strlen(psz);
2613             wxCharBuffer mbBuf(mbLen);
2614             if ( ::WideCharToMultiByte
2615                    (
2616                       m_CodePage,
2617                       0,
2618                       buf,
2619                       -1,
2620                       mbBuf.data(),
2621                       mbLen + 1,        // size in bytes, not length
2622                       NULL,
2623                       NULL
2624                    ) == 0 ||
2625                   strcmp(mbBuf, psz) != 0 )
2626             {
2627                 // we didn't obtain the same thing we started from, hence
2628                 // the conversion was lossy and we consider that it failed
2629                 return wxCONV_FAILED;
2630             }
2631         }
2632
2633         // note that it returns count of written chars for buf != NULL and size
2634         // of the needed buffer for buf == NULL so in either case the length of
2635         // the string (which never includes the terminating NUL) is one less
2636         return len - 1;
2637     }
2638
2639     virtual size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
2640     {
2641         /*
2642             we have a problem here: by default, WideCharToMultiByte() may
2643             replace characters unrepresentable in the target code page with bad
2644             quality approximations such as turning "1/2" symbol (U+00BD) into
2645             "1" for the code pages which don't have it and we, obviously, want
2646             to avoid this at any price
2647
2648             the trouble is that this function does it _silently_, i.e. it won't
2649             even tell us whether it did or not... Win98/2000 and higher provide
2650             WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
2651             we have to resort to a round trip, i.e. check that converting back
2652             results in the same string -- this is, of course, expensive but
2653             otherwise we simply can't be sure to not garble the data.
2654          */
2655
2656         // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
2657         // it doesn't work with CJK encodings (which we test for rather roughly
2658         // here...) nor with UTF-7/8 nor, of course, with Windows versions not
2659         // supporting it
2660         BOOL usedDef wxDUMMY_INITIALIZE(false);
2661         BOOL *pUsedDef;
2662         int flags;
2663         if ( CanUseNoBestFit() && m_CodePage < 50000 )
2664         {
2665             // it's our lucky day
2666             flags = WC_NO_BEST_FIT_CHARS;
2667             pUsedDef = &usedDef;
2668         }
2669         else // old system or unsupported encoding
2670         {
2671             flags = 0;
2672             pUsedDef = NULL;
2673         }
2674
2675         const size_t len = ::WideCharToMultiByte
2676                              (
2677                                 m_CodePage,     // code page
2678                                 flags,          // either none or no best fit
2679                                 pwz,            // input string
2680                                 -1,             // it is (wide) NUL-terminated
2681                                 buf,            // output buffer
2682                                 buf ? n : 0,    // and its size
2683                                 NULL,           // default "replacement" char
2684                                 pUsedDef        // [out] was it used?
2685                              );
2686
2687         if ( !len )
2688         {
2689             // function totally failed
2690             return wxCONV_FAILED;
2691         }
2692
2693         // we did something, check if we really succeeded
2694         if ( flags )
2695         {
2696             // check if the conversion failed, i.e. if any replacements
2697             // were done
2698             if ( usedDef )
2699                 return wxCONV_FAILED;
2700         }
2701         else // we must resort to double tripping...
2702         {
2703             // first we need to ensure that we really have the MB data: this is
2704             // not the case if we're called with NULL buffer, in which case we
2705             // need to do the conversion yet again
2706             wxCharBuffer bufDef;
2707             if ( !buf )
2708             {
2709                 bufDef = wxCharBuffer(len);
2710                 buf = bufDef.data();
2711                 if ( !::WideCharToMultiByte(m_CodePage, flags, pwz, -1,
2712                                             buf, len, NULL, NULL) )
2713                     return wxCONV_FAILED;
2714             }
2715
2716             if ( !n )
2717                 n = wcslen(pwz);
2718             wxWCharBuffer wcBuf(n);
2719             if ( MB2WC(wcBuf.data(), buf, n + 1) == wxCONV_FAILED ||
2720                     wcscmp(wcBuf, pwz) != 0 )
2721             {
2722                 // we didn't obtain the same thing we started from, hence
2723                 // the conversion was lossy and we consider that it failed
2724                 return wxCONV_FAILED;
2725             }
2726         }
2727
2728         // see the comment above for the reason of "len - 1"
2729         return len - 1;
2730     }
2731
2732     virtual size_t GetMBNulLen() const
2733     {
2734         if ( m_minMBCharWidth == 0 )
2735         {
2736             int len = ::WideCharToMultiByte
2737                         (
2738                             m_CodePage,     // code page
2739                             0,              // no flags
2740                             L"",            // input string
2741                             1,              // translate just the NUL
2742                             NULL,           // output buffer
2743                             0,              // and its size
2744                             NULL,           // no replacement char
2745                             NULL            // [out] don't care if it was used
2746                         );
2747
2748             wxMBConv_win32 * const self = wxConstCast(this, wxMBConv_win32);
2749             switch ( len )
2750             {
2751                 default:
2752                     wxLogDebug(wxT("Unexpected NUL length %d"), len);
2753                     self->m_minMBCharWidth = (size_t)-1;
2754                     break;
2755
2756                 case 0:
2757                     self->m_minMBCharWidth = (size_t)-1;
2758                     break;
2759
2760                 case 1:
2761                 case 2:
2762                 case 4:
2763                     self->m_minMBCharWidth = len;
2764                     break;
2765             }
2766         }
2767
2768         return m_minMBCharWidth;
2769     }
2770
2771     virtual wxMBConv *Clone() const { return new wxMBConv_win32(*this); }
2772
2773     bool IsOk() const { return m_CodePage != -1; }
2774
2775 private:
2776     static bool CanUseNoBestFit()
2777     {
2778         static int s_isWin98Or2k = -1;
2779
2780         if ( s_isWin98Or2k == -1 )
2781         {
2782             int verMaj, verMin;
2783             switch ( wxGetOsVersion(&verMaj, &verMin) )
2784             {
2785                 case wxOS_WINDOWS_9X:
2786                     s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
2787                     break;
2788
2789                 case wxOS_WINDOWS_NT:
2790                     s_isWin98Or2k = verMaj >= 5;
2791                     break;
2792
2793                 default:
2794                     // unknown: be conservative by default
2795                     s_isWin98Or2k = 0;
2796                     break;
2797             }
2798
2799             wxASSERT_MSG( s_isWin98Or2k != -1, wxT("should be set above") );
2800         }
2801
2802         return s_isWin98Or2k == 1;
2803     }
2804
2805     static bool IsAtLeastWin2kSP4()
2806     {
2807 #ifdef __WXWINCE__
2808         return false;
2809 #else
2810         static int s_isAtLeastWin2kSP4 = -1;
2811
2812         if ( s_isAtLeastWin2kSP4 == -1 )
2813         {
2814             OSVERSIONINFOEX ver;
2815
2816             memset(&ver, 0, sizeof(ver));
2817             ver.dwOSVersionInfoSize = sizeof(ver);
2818             GetVersionEx((OSVERSIONINFO*)&ver);
2819
2820             s_isAtLeastWin2kSP4 =
2821               ((ver.dwMajorVersion > 5) || // Vista+
2822                (ver.dwMajorVersion == 5 && ver.dwMinorVersion > 0) || // XP/2003
2823                (ver.dwMajorVersion == 5 && ver.dwMinorVersion == 0 &&
2824                ver.wServicePackMajor >= 4)) // 2000 SP4+
2825               ? 1 : 0;
2826         }
2827
2828         return s_isAtLeastWin2kSP4 == 1;
2829 #endif
2830     }
2831
2832
2833     // the code page we're working with
2834     long m_CodePage;
2835
2836     // cached result of GetMBNulLen(), set to 0 initially meaning
2837     // "unknown"
2838     size_t m_minMBCharWidth;
2839 };
2840
2841 #endif // wxHAVE_WIN32_MB2WC
2842
2843
2844 // ============================================================================
2845 // wxEncodingConverter based conversion classes
2846 // ============================================================================
2847
2848 #if wxUSE_FONTMAP
2849
2850 class wxMBConv_wxwin : public wxMBConv
2851 {
2852 private:
2853     void Init()
2854     {
2855         // Refuse to use broken wxEncodingConverter code for Mac-specific encodings.
2856         // The wxMBConv_cf class does a better job.
2857         m_ok = (m_enc < wxFONTENCODING_MACMIN || m_enc > wxFONTENCODING_MACMAX) &&
2858                m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2859                w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2860     }
2861
2862 public:
2863     // temporarily just use wxEncodingConverter stuff,
2864     // so that it works while a better implementation is built
2865     wxMBConv_wxwin(const char* name)
2866     {
2867         if (name)
2868             m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2869         else
2870             m_enc = wxFONTENCODING_SYSTEM;
2871
2872         Init();
2873     }
2874
2875     wxMBConv_wxwin(wxFontEncoding enc)
2876     {
2877         m_enc = enc;
2878
2879         Init();
2880     }
2881
2882     size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
2883     {
2884         size_t inbuf = strlen(psz);
2885         if (buf)
2886         {
2887             if (!m2w.Convert(psz, buf))
2888                 return wxCONV_FAILED;
2889         }
2890         return inbuf;
2891     }
2892
2893     size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
2894     {
2895         const size_t inbuf = wxWcslen(psz);
2896         if (buf)
2897         {
2898             if (!w2m.Convert(psz, buf))
2899                 return wxCONV_FAILED;
2900         }
2901
2902         return inbuf;
2903     }
2904
2905     virtual size_t GetMBNulLen() const
2906     {
2907         switch ( m_enc )
2908         {
2909             case wxFONTENCODING_UTF16BE:
2910             case wxFONTENCODING_UTF16LE:
2911                 return 2;
2912
2913             case wxFONTENCODING_UTF32BE:
2914             case wxFONTENCODING_UTF32LE:
2915                 return 4;
2916
2917             default:
2918                 return 1;
2919         }
2920     }
2921
2922     virtual wxMBConv *Clone() const { return new wxMBConv_wxwin(m_enc); }
2923
2924     bool IsOk() const { return m_ok; }
2925
2926 public:
2927     wxFontEncoding m_enc;
2928     wxEncodingConverter m2w, w2m;
2929
2930 private:
2931     // were we initialized successfully?
2932     bool m_ok;
2933
2934     wxDECLARE_NO_COPY_CLASS(wxMBConv_wxwin);
2935 };
2936
2937 // make the constructors available for unit testing
2938 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_wxwin( const char* name )
2939 {
2940     wxMBConv_wxwin* result = new wxMBConv_wxwin( name );
2941     if ( !result->IsOk() )
2942     {
2943         delete result;
2944         return 0;
2945     }
2946
2947     return result;
2948 }
2949
2950 #endif // wxUSE_FONTMAP
2951
2952 // ============================================================================
2953 // wxCSConv implementation
2954 // ============================================================================
2955
2956 void wxCSConv::Init()
2957 {
2958     m_name = NULL;
2959     m_convReal =  NULL;
2960 }
2961
2962 void wxCSConv::SetEncoding(wxFontEncoding encoding)
2963 {
2964     switch ( encoding )
2965     {
2966         case wxFONTENCODING_MAX:
2967         case wxFONTENCODING_SYSTEM:
2968             if ( m_name )
2969             {
2970                 // It's ok to not have encoding value if we have a name for it.
2971                 m_encoding = wxFONTENCODING_SYSTEM;
2972             }
2973             else // No name neither.
2974             {
2975                 // Fall back to the system default encoding in this case (not
2976                 // sure how much sense does this make but this is how the old
2977                 // code used to behave).
2978 #if wxUSE_INTL
2979                 m_encoding = wxLocale::GetSystemEncoding();
2980                 if ( m_encoding == wxFONTENCODING_SYSTEM )
2981 #endif // wxUSE_INTL
2982                     m_encoding = wxFONTENCODING_ISO8859_1;
2983             }
2984             break;
2985
2986         case wxFONTENCODING_DEFAULT:
2987             // wxFONTENCODING_DEFAULT is same as US-ASCII in this context
2988             m_encoding = wxFONTENCODING_ISO8859_1;
2989             break;
2990
2991         default:
2992             // Just use the provided encoding.
2993             m_encoding = encoding;
2994     }
2995 }
2996
2997 wxCSConv::wxCSConv(const wxString& charset)
2998 {
2999     Init();
3000
3001     if ( !charset.empty() )
3002     {
3003         SetName(charset.ToAscii());
3004     }
3005
3006 #if wxUSE_FONTMAP
3007     SetEncoding(wxFontMapperBase::GetEncodingFromName(charset));
3008 #else
3009     SetEncoding(wxFONTENCODING_SYSTEM);
3010 #endif
3011
3012     m_convReal = DoCreate();
3013 }
3014
3015 wxCSConv::wxCSConv(wxFontEncoding encoding)
3016 {
3017     if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
3018     {
3019         wxFAIL_MSG( wxT("invalid encoding value in wxCSConv ctor") );
3020
3021         encoding = wxFONTENCODING_SYSTEM;
3022     }
3023
3024     Init();
3025
3026     SetEncoding(encoding);
3027
3028     m_convReal = DoCreate();
3029 }
3030
3031 wxCSConv::~wxCSConv()
3032 {
3033     Clear();
3034 }
3035
3036 wxCSConv::wxCSConv(const wxCSConv& conv)
3037         : wxMBConv()
3038 {
3039     Init();
3040
3041     SetName(conv.m_name);
3042     SetEncoding(conv.m_encoding);
3043
3044     m_convReal = DoCreate();
3045 }
3046
3047 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
3048 {
3049     Clear();
3050
3051     SetName(conv.m_name);
3052     SetEncoding(conv.m_encoding);
3053
3054     m_convReal = DoCreate();
3055
3056     return *this;
3057 }
3058
3059 void wxCSConv::Clear()
3060 {
3061     free(m_name);
3062     m_name = NULL;
3063
3064     wxDELETE(m_convReal);
3065 }
3066
3067 void wxCSConv::SetName(const char *charset)
3068 {
3069     if ( charset )
3070         m_name = wxStrdup(charset);
3071 }
3072
3073 #if wxUSE_FONTMAP
3074
3075 WX_DECLARE_HASH_MAP( wxFontEncoding, wxString, wxIntegerHash, wxIntegerEqual,
3076                      wxEncodingNameCache );
3077
3078 static wxEncodingNameCache gs_nameCache;
3079 #endif
3080
3081 wxMBConv *wxCSConv::DoCreate() const
3082 {
3083 #if wxUSE_FONTMAP
3084     wxLogTrace(TRACE_STRCONV,
3085                wxT("creating conversion for %s"),
3086                (m_name ? m_name
3087                        : (const char*)wxFontMapperBase::GetEncodingName(m_encoding).mb_str()));
3088 #endif // wxUSE_FONTMAP
3089
3090     // check for the special case of ASCII or ISO8859-1 charset: as we have
3091     // special knowledge of it anyhow, we don't need to create a special
3092     // conversion object
3093     if ( m_encoding == wxFONTENCODING_ISO8859_1 )
3094     {
3095         // don't convert at all
3096         return NULL;
3097     }
3098
3099     // we trust OS to do conversion better than we can so try external
3100     // conversion methods first
3101     //
3102     // the full order is:
3103     //      1. OS conversion (iconv() under Unix or Win32 API)
3104     //      2. hard coded conversions for UTF
3105     //      3. wxEncodingConverter as fall back
3106
3107     // step (1)
3108 #ifdef HAVE_ICONV
3109 #if !wxUSE_FONTMAP
3110     if ( m_name )
3111 #endif // !wxUSE_FONTMAP
3112     {
3113 #if wxUSE_FONTMAP
3114         wxFontEncoding encoding(m_encoding);
3115 #endif
3116
3117         if ( m_name )
3118         {
3119             wxMBConv_iconv *conv = new wxMBConv_iconv(m_name);
3120             if ( conv->IsOk() )
3121                 return conv;
3122
3123             delete conv;
3124
3125 #if wxUSE_FONTMAP
3126             encoding =
3127                 wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
3128 #endif // wxUSE_FONTMAP
3129         }
3130 #if wxUSE_FONTMAP
3131         {
3132             const wxEncodingNameCache::iterator it = gs_nameCache.find(encoding);
3133             if ( it != gs_nameCache.end() )
3134             {
3135                 if ( it->second.empty() )
3136                     return NULL;
3137
3138                 wxMBConv_iconv *conv = new wxMBConv_iconv(it->second.ToAscii());
3139                 if ( conv->IsOk() )
3140                     return conv;
3141
3142                 delete conv;
3143             }
3144
3145             const wxChar* const* names = wxFontMapperBase::GetAllEncodingNames(encoding);
3146             // CS : in case this does not return valid names (eg for MacRoman)
3147             // encoding got a 'failure' entry in the cache all the same,
3148             // although it just has to be created using a different method, so
3149             // only store failed iconv creation attempts (or perhaps we
3150             // shoulnd't do this at all ?)
3151             if ( names[0] != NULL )
3152             {
3153                 for ( ; *names; ++names )
3154                 {
3155                     // FIXME-UTF8: wxFontMapperBase::GetAllEncodingNames()
3156                     //             will need changes that will obsolete this
3157                     wxString name(*names);
3158                     wxMBConv_iconv *conv = new wxMBConv_iconv(name.ToAscii());
3159                     if ( conv->IsOk() )
3160                     {
3161                         gs_nameCache[encoding] = *names;
3162                         return conv;
3163                     }
3164
3165                     delete conv;
3166                 }
3167
3168                 gs_nameCache[encoding] = wxT(""); // cache the failure
3169             }
3170         }
3171 #endif // wxUSE_FONTMAP
3172     }
3173 #endif // HAVE_ICONV
3174
3175 #ifdef wxHAVE_WIN32_MB2WC
3176     {
3177 #if wxUSE_FONTMAP
3178         wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
3179                                       : new wxMBConv_win32(m_encoding);
3180         if ( conv->IsOk() )
3181             return conv;
3182
3183         delete conv;
3184 #else
3185         return NULL;
3186 #endif
3187     }
3188 #endif // wxHAVE_WIN32_MB2WC
3189
3190 #ifdef __DARWIN__
3191     {
3192         // leave UTF16 and UTF32 to the built-ins of wx
3193         if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
3194             ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
3195         {
3196 #if wxUSE_FONTMAP
3197             wxMBConv_cf *conv = m_name ? new wxMBConv_cf(m_name)
3198                                           : new wxMBConv_cf(m_encoding);
3199 #else
3200             wxMBConv_cf *conv = new wxMBConv_cf(m_encoding);
3201 #endif
3202
3203             if ( conv->IsOk() )
3204                  return conv;
3205
3206             delete conv;
3207         }
3208     }
3209 #endif // __DARWIN__
3210
3211     // step (2)
3212     wxFontEncoding enc = m_encoding;
3213 #if wxUSE_FONTMAP
3214     if ( enc == wxFONTENCODING_SYSTEM && m_name )
3215     {
3216         // use "false" to suppress interactive dialogs -- we can be called from
3217         // anywhere and popping up a dialog from here is the last thing we want to
3218         // do
3219         enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
3220     }
3221 #endif // wxUSE_FONTMAP
3222
3223     switch ( enc )
3224     {
3225         case wxFONTENCODING_UTF7:
3226              return new wxMBConvUTF7;
3227
3228         case wxFONTENCODING_UTF8:
3229              return new wxMBConvUTF8;
3230
3231         case wxFONTENCODING_UTF16BE:
3232              return new wxMBConvUTF16BE;
3233
3234         case wxFONTENCODING_UTF16LE:
3235              return new wxMBConvUTF16LE;
3236
3237         case wxFONTENCODING_UTF32BE:
3238              return new wxMBConvUTF32BE;
3239
3240         case wxFONTENCODING_UTF32LE:
3241              return new wxMBConvUTF32LE;
3242
3243         default:
3244              // nothing to do but put here to suppress gcc warnings
3245              break;
3246     }
3247
3248     // step (3)
3249 #if wxUSE_FONTMAP
3250     {
3251         wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
3252                                       : new wxMBConv_wxwin(m_encoding);
3253         if ( conv->IsOk() )
3254             return conv;
3255
3256         delete conv;
3257     }
3258
3259     wxLogTrace(TRACE_STRCONV,
3260                wxT("encoding \"%s\" is not supported by this system"),
3261                (m_name ? wxString(m_name)
3262                        : wxFontMapperBase::GetEncodingName(m_encoding)));
3263 #endif // wxUSE_FONTMAP
3264
3265     return NULL;
3266 }
3267
3268 bool wxCSConv::IsOk() const
3269 {
3270     // special case: no convReal created for wxFONTENCODING_ISO8859_1
3271     if ( m_encoding == wxFONTENCODING_ISO8859_1 )
3272         return true; // always ok as we do it ourselves
3273
3274     // m_convReal->IsOk() is called at its own creation, so we know it must
3275     // be ok if m_convReal is non-NULL
3276     return m_convReal != NULL;
3277 }
3278
3279 size_t wxCSConv::ToWChar(wchar_t *dst, size_t dstLen,
3280                          const char *src, size_t srcLen) const
3281 {
3282     if (m_convReal)
3283         return m_convReal->ToWChar(dst, dstLen, src, srcLen);
3284
3285     // latin-1 (direct)
3286     if ( srcLen == wxNO_LEN )
3287         srcLen = strlen(src) + 1; // take trailing NUL too
3288
3289     if ( dst )
3290     {
3291         if ( dstLen < srcLen )
3292             return wxCONV_FAILED;
3293
3294         for ( size_t n = 0; n < srcLen; n++ )
3295             dst[n] = (unsigned char)(src[n]);
3296     }
3297
3298     return srcLen;
3299 }
3300
3301 size_t wxCSConv::FromWChar(char *dst, size_t dstLen,
3302                            const wchar_t *src, size_t srcLen) const
3303 {
3304     if (m_convReal)
3305         return m_convReal->FromWChar(dst, dstLen, src, srcLen);
3306
3307     // latin-1 (direct)
3308     if ( srcLen == wxNO_LEN )
3309         srcLen = wxWcslen(src) + 1;
3310
3311     if ( dst )
3312     {
3313         if ( dstLen < srcLen )
3314             return wxCONV_FAILED;
3315
3316         for ( size_t n = 0; n < srcLen; n++ )
3317         {
3318             if ( src[n] > 0xFF )
3319                 return wxCONV_FAILED;
3320
3321             dst[n] = (char)src[n];
3322         }
3323
3324     }
3325     else // still need to check the input validity
3326     {
3327         for ( size_t n = 0; n < srcLen; n++ )
3328         {
3329             if ( src[n] > 0xFF )
3330                 return wxCONV_FAILED;
3331         }
3332     }
3333
3334     return srcLen;
3335 }
3336
3337 size_t wxCSConv::GetMBNulLen() const
3338 {
3339     if ( m_convReal )
3340         return m_convReal->GetMBNulLen();
3341
3342     // otherwise, we are ISO-8859-1
3343     return 1;
3344 }
3345
3346 #if wxUSE_UNICODE_UTF8
3347 bool wxCSConv::IsUTF8() const
3348 {
3349     if ( m_convReal )
3350         return m_convReal->IsUTF8();
3351
3352     // otherwise, we are ISO-8859-1
3353     return false;
3354 }
3355 #endif
3356
3357
3358 #if wxUSE_UNICODE
3359
3360 wxWCharBuffer wxSafeConvertMB2WX(const char *s)
3361 {
3362     if ( !s )
3363         return wxWCharBuffer();
3364
3365     wxWCharBuffer wbuf(wxConvLibc.cMB2WX(s));
3366     if ( !wbuf )
3367         wbuf = wxMBConvUTF8().cMB2WX(s);
3368     if ( !wbuf )
3369         wbuf = wxConvISO8859_1.cMB2WX(s);
3370
3371     return wbuf;
3372 }
3373
3374 wxCharBuffer wxSafeConvertWX2MB(const wchar_t *ws)
3375 {
3376     if ( !ws )
3377         return wxCharBuffer();
3378
3379     wxCharBuffer buf(wxConvLibc.cWX2MB(ws));
3380     if ( !buf )
3381         buf = wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL).cWX2MB(ws);
3382
3383     return buf;
3384 }
3385
3386 #endif // wxUSE_UNICODE
3387
3388 // ----------------------------------------------------------------------------
3389 // globals
3390 // ----------------------------------------------------------------------------
3391
3392 // NB: The reason why we create converted objects in this convoluted way,
3393 //     using a factory function instead of global variable, is that they
3394 //     may be used at static initialization time (some of them are used by
3395 //     wxString ctors and there may be a global wxString object). In other
3396 //     words, possibly _before_ the converter global object would be
3397 //     initialized.
3398
3399 #undef wxConvLibc
3400 #undef wxConvUTF8
3401 #undef wxConvUTF7
3402 #undef wxConvLocal
3403 #undef wxConvISO8859_1
3404
3405 #define WX_DEFINE_GLOBAL_CONV2(klass, impl_klass, name, ctor_args)      \
3406     WXDLLIMPEXP_DATA_BASE(klass*) name##Ptr = NULL;                     \
3407     WXDLLIMPEXP_BASE klass* wxGet_##name##Ptr()                         \
3408     {                                                                   \
3409         static impl_klass name##Obj ctor_args;                          \
3410         return &name##Obj;                                              \
3411     }                                                                   \
3412     /* this ensures that all global converter objects are created */    \
3413     /* by the time static initialization is done, i.e. before any */    \
3414     /* thread is launched: */                                           \
3415     static klass* gs_##name##instance = wxGet_##name##Ptr()
3416
3417 #define WX_DEFINE_GLOBAL_CONV(klass, name, ctor_args) \
3418     WX_DEFINE_GLOBAL_CONV2(klass, klass, name, ctor_args)
3419
3420 #ifdef __INTELC__
3421     // disable warning "variable 'xxx' was declared but never referenced"
3422     #pragma warning(disable: 177)
3423 #endif // Intel C++
3424
3425 #ifdef __WINDOWS__
3426     WX_DEFINE_GLOBAL_CONV2(wxMBConv, wxMBConv_win32, wxConvLibc, wxEMPTY_PARAMETER_VALUE);
3427 #elif 0 // defined(__WXOSX__)
3428     WX_DEFINE_GLOBAL_CONV2(wxMBConv, wxMBConv_cf, wxConvLibc,  (wxFONTENCODING_UTF8));
3429 #else
3430     WX_DEFINE_GLOBAL_CONV2(wxMBConv, wxMBConvLibc, wxConvLibc, wxEMPTY_PARAMETER_VALUE);
3431 #endif
3432
3433 // NB: we can't use wxEMPTY_PARAMETER_VALUE as final argument here because it's
3434 //     passed to WX_DEFINE_GLOBAL_CONV2 after a macro expansion and so still
3435 //     provokes an error message about "not enough macro parameters"; and we
3436 //     can't use "()" here as the name##Obj declaration would be parsed as a
3437 //     function declaration then, so use a semicolon and live with an extra
3438 //     empty statement (and hope that no compilers warns about this)
3439 WX_DEFINE_GLOBAL_CONV(wxMBConvStrictUTF8, wxConvUTF8, ;);
3440 WX_DEFINE_GLOBAL_CONV(wxMBConvUTF7, wxConvUTF7, ;);
3441
3442 WX_DEFINE_GLOBAL_CONV(wxCSConv, wxConvLocal, (wxFONTENCODING_SYSTEM));
3443 WX_DEFINE_GLOBAL_CONV(wxCSConv, wxConvISO8859_1, (wxFONTENCODING_ISO8859_1));
3444
3445 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = wxGet_wxConvLibcPtr();
3446 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvUI = wxGet_wxConvLocalPtr();
3447
3448 #ifdef __DARWIN__
3449 // It is important to use this conversion object under Darwin as it ensures
3450 // that Unicode strings are (re)composed correctly even though xnu kernel uses
3451 // decomposed form internally (at least for the file names).
3452 static wxMBConv_cf wxConvMacUTF8DObj(wxFONTENCODING_UTF8);
3453 #endif
3454
3455 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName =
3456 #ifdef __DARWIN__
3457                                     &wxConvMacUTF8DObj;
3458 #else // !__DARWIN__
3459                                     wxGet_wxConvLibcPtr();
3460 #endif // __DARWIN__/!__DARWIN__