src/common/strconv.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/common/strconv.cpp
   3 // Purpose:     Unicode conversion classes
   4 // Author:      Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
   5 //              Ryan Norton, Fredrik Roubert (UTF7)
   6 // Modified by:
   7 // Created:     29/01/98
   8 // RCS-ID:      $Id$
   9 // Copyright:   (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
  10 //              (c) 2000-2003 Vadim Zeitlin
  11 //              (c) 2004 Ryan Norton, Fredrik Roubert
  12 // Licence:     wxWindows licence
  13 /////////////////////////////////////////////////////////////////////////////
  14
  15 // For compilers that support precompilation, includes "wx.h".
  16 #include "wx/wxprec.h"
  17
  18 #ifdef __BORLANDC__
  19     #pragma hdrstop
  20 #endif  //__BORLANDC__
  21
  22 #ifndef WX_PRECOMP
  23     #include "wx/intl.h"
  24     #include "wx/log.h"
  25     #include "wx/utils.h"
  26     #include "wx/hashmap.h"
  27 #endif
  28
  29 #include "wx/strconv.h"
  30
  31 #ifndef __WXWINCE__
  32 #include <errno.h>
  33 #endif
  34
  35 #include <ctype.h>
  36 #include <string.h>
  37 #include <stdlib.h>
  38
  39 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
  40     #include "wx/msw/private.h"
  41     #include "wx/msw/missing.h"
  42     #define wxHAVE_WIN32_MB2WC
  43 #endif
  44
  45 #ifdef HAVE_ICONV
  46     #include <iconv.h>
  47     #include "wx/thread.h"
  48 #endif
  49
  50 #include "wx/encconv.h"
  51 #include "wx/fontmap.h"
  52
  53 #ifdef __DARWIN__
  54 #include "wx/osx/core/private/strconv_cf.h"
  55 #endif //def __DARWIN__
  56
  57
  58 #define TRACE_STRCONV wxT("strconv")
  59
  60 // WC_UTF16 is defined only if sizeof(wchar_t) == 2, otherwise it's supposed to
  61 // be 4 bytes
  62 #if SIZEOF_WCHAR_T == 2
  63     #define WC_UTF16
  64 #endif
  65
  66
  67 // ============================================================================
  68 // implementation
  69 // ============================================================================
  70
  71 // helper function of cMB2WC(): check if n bytes at this location are all NUL
  72 static bool NotAllNULs(const char *p, size_t n)
  73 {
  74     while ( n && *p++ == '\0' )
  75         n--;
  76
  77     return n != 0;
  78 }
  79
  80 // ----------------------------------------------------------------------------
  81 // UTF-16 en/decoding to/from UCS-4 with surrogates handling
  82 // ----------------------------------------------------------------------------
  83
  84 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
  85 {
  86     if (input <= 0xffff)
  87     {
  88         if (output)
  89             *output = (wxUint16) input;
  90
  91         return 1;
  92     }
  93     else if (input >= 0x110000)
  94     {
  95         return wxCONV_FAILED;
  96     }
  97     else
  98     {
  99         if (output)
 100         {
 101             *output++ = (wxUint16) ((input >> 10) + 0xd7c0);
 102             *output = (wxUint16) ((input & 0x3ff) + 0xdc00);
 103         }
 104
 105         return 2;
 106     }
 107 }
 108
 109 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
 110 {
 111     if ((*input < 0xd800) || (*input > 0xdfff))
 112     {
 113         output = *input;
 114         return 1;
 115     }
 116     else if ((input[1] < 0xdc00) || (input[1] > 0xdfff))
 117     {
 118         output = *input;
 119         return wxCONV_FAILED;
 120     }
 121     else
 122     {
 123         output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
 124         return 2;
 125     }
 126 }
 127
 128 #ifdef WC_UTF16
 129     typedef wchar_t wxDecodeSurrogate_t;
 130 #else // !WC_UTF16
 131     typedef wxUint16 wxDecodeSurrogate_t;
 132 #endif // WC_UTF16/!WC_UTF16
 133
 134 // returns the next UTF-32 character from the wchar_t buffer and advances the
 135 // pointer to the character after this one
 136 //
 137 // if an invalid character is found, *pSrc is set to NULL, the caller must
 138 // check for this
 139 static wxUint32 wxDecodeSurrogate(const wxDecodeSurrogate_t **pSrc)
 140 {
 141     wxUint32 out;
 142     const size_t
 143         n = decode_utf16(reinterpret_cast<const wxUint16 *>(*pSrc), out);
 144     if ( n == wxCONV_FAILED )
 145         *pSrc = NULL;
 146     else
 147         *pSrc += n;
 148
 149     return out;
 150 }
 151
 152 // ----------------------------------------------------------------------------
 153 // wxMBConv
 154 // ----------------------------------------------------------------------------
 155
 156 size_t
 157 wxMBConv::ToWChar(wchar_t *dst, size_t dstLen,
 158                   const char *src, size_t srcLen) const
 159 {
 160     // although new conversion classes are supposed to implement this function
 161     // directly, the existing ones only implement the old MB2WC() and so, to
 162     // avoid to have to rewrite all conversion classes at once, we provide a
 163     // default (but not efficient) implementation of this one in terms of the
 164     // old function by copying the input to ensure that it's NUL-terminated and
 165     // then using MB2WC() to convert it
 166     //
 167     // moreover, some conversion classes simply can't implement ToWChar()
 168     // directly, the primary example is wxConvLibc: mbstowcs() only handles
 169     // NUL-terminated strings
 170
 171     // the number of chars [which would be] written to dst [if it were not NULL]
 172     size_t dstWritten = 0;
 173
 174     // the number of NULs terminating this string
 175     size_t nulLen = 0;  // not really needed, but just to avoid warnings
 176
 177     // if we were not given the input size we just have to assume that the
 178     // string is properly terminated as we have no way of knowing how long it
 179     // is anyhow, but if we do have the size check whether there are enough
 180     // NULs at the end
 181     wxCharBuffer bufTmp;
 182     const char *srcEnd;
 183     if ( srcLen != wxNO_LEN )
 184     {
 185         // we need to know how to find the end of this string
 186         nulLen = GetMBNulLen();
 187         if ( nulLen == wxCONV_FAILED )
 188             return wxCONV_FAILED;
 189
 190         // if there are enough NULs we can avoid the copy
 191         if ( srcLen < nulLen || NotAllNULs(src + srcLen - nulLen, nulLen) )
 192         {
 193             // make a copy in order to properly NUL-terminate the string
 194             bufTmp = wxCharBuffer(srcLen + nulLen - 1 /* 1 will be added */);
 195             char * const p = bufTmp.data();
 196             memcpy(p, src, srcLen);
 197             for ( char *s = p + srcLen; s < p + srcLen + nulLen; s++ )
 198                 *s = '\0';
 199
 200             src = bufTmp;
 201         }
 202
 203         srcEnd = src + srcLen;
 204     }
 205     else // quit after the first loop iteration
 206     {
 207         srcEnd = NULL;
 208     }
 209
 210     // the idea of this code is straightforward: it converts a NUL-terminated
 211     // chunk of the string during each iteration and updates the output buffer
 212     // with the result
 213     //
 214     // all the complication come from the fact that this function, for
 215     // historical reasons, must behave in 2 subtly different ways when it's
 216     // called with a fixed number of characters and when it's called for the
 217     // entire NUL-terminated string: in the former case (srcEnd != NULL) we
 218     // must count all characters we convert, NUL or not; but in the latter we
 219     // do not count the trailing NUL -- but still count all the NULs inside the
 220     // string
 221     //
 222     // so for the (simple) former case we just always count the trailing NUL,
 223     // but for the latter we need to wait until we see if there is going to be
 224     // another loop iteration and only count it then
 225     for ( ;; )
 226     {
 227         // try to convert the current chunk
 228         size_t lenChunk = MB2WC(NULL, src, 0);
 229         if ( lenChunk == wxCONV_FAILED )
 230             return wxCONV_FAILED;
 231
 232         dstWritten += lenChunk;
 233         if ( !srcEnd )
 234             dstWritten++;
 235
 236         if ( !lenChunk )
 237         {
 238             // nothing left in the input string, conversion succeeded
 239             break;
 240         }
 241
 242         if ( dst )
 243         {
 244             if ( dstWritten > dstLen )
 245                 return wxCONV_FAILED;
 246
 247             // +1 is for trailing NUL
 248             if ( MB2WC(dst, src, lenChunk + 1) == wxCONV_FAILED )
 249                 return wxCONV_FAILED;
 250
 251             dst += lenChunk;
 252             if ( !srcEnd )
 253                 dst++;
 254         }
 255
 256         if ( !srcEnd )
 257         {
 258             // we convert just one chunk in this case as this is the entire
 259             // string anyhow (and we don't count the trailing NUL in this case)
 260             break;
 261         }
 262
 263         // advance the input pointer past the end of this chunk: notice that we
 264         // will always stop before srcEnd because we know that the chunk is
 265         // always properly NUL-terminated
 266         while ( NotAllNULs(src, nulLen) )
 267         {
 268             // notice that we must skip over multiple bytes here as we suppose
 269             // that if NUL takes 2 or 4 bytes, then all the other characters do
 270             // too and so if advanced by a single byte we might erroneously
 271             // detect sequences of NUL bytes in the middle of the input
 272             src += nulLen;
 273         }
 274
 275         // if the buffer ends before this NUL, we shouldn't count it in our
 276         // output so skip the code below
 277         if ( src == srcEnd )
 278             break;
 279
 280         // do count this terminator as it's inside the buffer we convert
 281         dstWritten++;
 282         if ( dst )
 283             dst++;
 284
 285         src += nulLen; // skip the terminator itself
 286
 287         if ( src >= srcEnd )
 288             break;
 289     }
 290
 291     return dstWritten;
 292 }
 293
 294 size_t
 295 wxMBConv::FromWChar(char *dst, size_t dstLen,
 296                     const wchar_t *src, size_t srcLen) const
 297 {
 298     // the number of chars [which would be] written to dst [if it were not NULL]
 299     size_t dstWritten = 0;
 300
 301     // if we don't know its length we have no choice but to assume that it is
 302     // NUL-terminated (notice that it can still be NUL-terminated even if
 303     // explicit length is given but it doesn't change our return value)
 304     const bool isNulTerminated = srcLen == wxNO_LEN;
 305
 306     // make a copy of the input string unless it is already properly
 307     // NUL-terminated
 308     wxWCharBuffer bufTmp;
 309     if ( isNulTerminated )
 310     {
 311         srcLen = wxWcslen(src) + 1;
 312     }
 313     else if ( srcLen != 0 && src[srcLen - 1] != L'\0' )
 314     {
 315         // make a copy in order to properly NUL-terminate the string
 316         bufTmp = wxWCharBuffer(srcLen);
 317         memcpy(bufTmp.data(), src, srcLen * sizeof(wchar_t));
 318         src = bufTmp;
 319     }
 320
 321     const size_t lenNul = GetMBNulLen();
 322     for ( const wchar_t * const srcEnd = src + srcLen;
 323           src < srcEnd;
 324           src++ /* skip L'\0' too */ )
 325     {
 326         // try to convert the current chunk
 327         size_t lenChunk = WC2MB(NULL, src, 0);
 328         if ( lenChunk == wxCONV_FAILED )
 329             return wxCONV_FAILED;
 330
 331         dstWritten += lenChunk;
 332
 333         const wchar_t * const
 334             chunkEnd = isNulTerminated ? srcEnd - 1 : src + wxWcslen(src);
 335
 336         // our return value accounts for the trailing NUL(s), unlike that of
 337         // WC2MB(), however don't do it for the last NUL we artificially added
 338         // ourselves above
 339         if ( chunkEnd < srcEnd )
 340             dstWritten += lenNul;
 341
 342         if ( dst )
 343         {
 344             if ( dstWritten > dstLen )
 345                 return wxCONV_FAILED;
 346
 347             // if we know that there is enough space in the destination buffer
 348             // (because we accounted for lenNul in dstWritten above), we can
 349             // convert directly in place -- but otherwise we need another
 350             // temporary buffer to ensure that we don't overwrite the output
 351             wxCharBuffer dstBuf;
 352             char *dstTmp;
 353             if ( chunkEnd == srcEnd )
 354             {
 355                 dstBuf = wxCharBuffer(lenChunk + lenNul - 1);
 356                 dstTmp = dstBuf.data();
 357             }
 358             else
 359             {
 360                 dstTmp = dst;
 361             }
 362
 363             if ( WC2MB(dstTmp, src, lenChunk + lenNul) == wxCONV_FAILED )
 364                 return wxCONV_FAILED;
 365
 366             if ( dstTmp != dst )
 367             {
 368                 // copy everything up to but excluding the terminating NUL(s)
 369                 // into the real output buffer
 370                 memcpy(dst, dstTmp, lenChunk);
 371
 372                 // micro-optimization: if dstTmp != dst it means that chunkEnd
 373                 // == srcEnd and so we're done, no need to update anything below
 374                 break;
 375             }
 376
 377             dst += lenChunk;
 378             if ( chunkEnd < srcEnd )
 379                 dst += lenNul;
 380         }
 381
 382         src = chunkEnd;
 383     }
 384
 385     return dstWritten;
 386 }
 387
 388 size_t wxMBConv::MB2WC(wchar_t *outBuff, const char *inBuff, size_t outLen) const
 389 {
 390     size_t rc = ToWChar(outBuff, outLen, inBuff);
 391     if ( rc != wxCONV_FAILED )
 392     {
 393         // ToWChar() returns the buffer length, i.e. including the trailing
 394         // NUL, while this method doesn't take it into account
 395         rc--;
 396     }
 397
 398     return rc;
 399 }
 400
 401 size_t wxMBConv::WC2MB(char *outBuff, const wchar_t *inBuff, size_t outLen) const
 402 {
 403     size_t rc = FromWChar(outBuff, outLen, inBuff);
 404     if ( rc != wxCONV_FAILED )
 405     {
 406         rc -= GetMBNulLen();
 407     }
 408
 409     return rc;
 410 }
 411
 412 wxMBConv::~wxMBConv()
 413 {
 414     // nothing to do here (necessary for Darwin linking probably)
 415 }
 416
 417 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
 418 {
 419     if ( psz )
 420     {
 421         // calculate the length of the buffer needed first
 422         const size_t nLen = ToWChar(NULL, 0, psz);
 423         if ( nLen != wxCONV_FAILED )
 424         {
 425             // now do the actual conversion
 426             wxWCharBuffer buf(nLen - 1 /* +1 added implicitly */);
 427
 428             // +1 for the trailing NULL
 429             if ( ToWChar(buf.data(), nLen, psz) != wxCONV_FAILED )
 430                 return buf;
 431         }
 432     }
 433
 434     return wxWCharBuffer();
 435 }
 436
 437 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
 438 {
 439     if ( pwz )
 440     {
 441         const size_t nLen = FromWChar(NULL, 0, pwz);
 442         if ( nLen != wxCONV_FAILED )
 443         {
 444             wxCharBuffer buf(nLen - 1);
 445             if ( FromWChar(buf.data(), nLen, pwz) != wxCONV_FAILED )
 446                 return buf;
 447         }
 448     }
 449
 450     return wxCharBuffer();
 451 }
 452
 453 const wxWCharBuffer
 454 wxMBConv::cMB2WC(const char *inBuff, size_t inLen, size_t *outLen) const
 455 {
 456     const size_t dstLen = ToWChar(NULL, 0, inBuff, inLen);
 457     if ( dstLen != wxCONV_FAILED )
 458     {
 459         // notice that we allocate space for dstLen+1 wide characters here
 460         // because we want the buffer to always be NUL-terminated, even if the
 461         // input isn't (as otherwise the caller has no way to know its length)
 462         wxWCharBuffer wbuf(dstLen);
 463         wbuf.data()[dstLen] = L'\0';
 464         if ( ToWChar(wbuf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED )
 465         {
 466             if ( outLen )
 467             {
 468                 *outLen = dstLen;
 469
 470                 // we also need to handle NUL-terminated input strings
 471                 // specially: for them the output is the length of the string
 472                 // excluding the trailing NUL, however if we're asked to
 473                 // convert a specific number of characters we return the length
 474                 // of the resulting output even if it's NUL-terminated
 475                 if ( inLen == wxNO_LEN )
 476                     (*outLen)--;
 477             }
 478
 479             return wbuf;
 480         }
 481     }
 482
 483     if ( outLen )
 484         *outLen = 0;
 485
 486     return wxWCharBuffer();
 487 }
 488
 489 const wxCharBuffer
 490 wxMBConv::cWC2MB(const wchar_t *inBuff, size_t inLen, size_t *outLen) const
 491 {
 492     size_t dstLen = FromWChar(NULL, 0, inBuff, inLen);
 493     if ( dstLen != wxCONV_FAILED )
 494     {
 495         const size_t nulLen = GetMBNulLen();
 496
 497         // as above, ensure that the buffer is always NUL-terminated, even if
 498         // the input is not
 499         wxCharBuffer buf(dstLen + nulLen - 1);
 500         memset(buf.data() + dstLen, 0, nulLen);
 501         if ( FromWChar(buf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED )
 502         {
 503             if ( outLen )
 504             {
 505                 *outLen = dstLen;
 506
 507                 if ( inLen == wxNO_LEN )
 508                 {
 509                     // in this case both input and output are NUL-terminated
 510                     // and we're not supposed to count NUL
 511                     *outLen -= nulLen;
 512                 }
 513             }
 514
 515             return buf;
 516         }
 517     }
 518
 519     if ( outLen )
 520         *outLen = 0;
 521
 522     return wxCharBuffer();
 523 }
 524
 525 const wxWCharBuffer wxMBConv::cMB2WC(const wxScopedCharBuffer& buf) const
 526 {
 527     const size_t srcLen = buf.length();
 528     if ( srcLen )
 529     {
 530         const size_t dstLen = ToWChar(NULL, 0, buf, srcLen);
 531         if ( dstLen != wxCONV_FAILED )
 532         {
 533             wxWCharBuffer wbuf(dstLen);
 534             wbuf.data()[dstLen] = L'\0';
 535             if ( ToWChar(wbuf.data(), dstLen, buf, srcLen) != wxCONV_FAILED )
 536                 return wbuf;
 537         }
 538     }
 539
 540     return wxScopedWCharBuffer::CreateNonOwned(L"", 0);
 541 }
 542
 543 const wxCharBuffer wxMBConv::cWC2MB(const wxScopedWCharBuffer& wbuf) const
 544 {
 545     const size_t srcLen = wbuf.length();
 546     if ( srcLen )
 547     {
 548         const size_t dstLen = FromWChar(NULL, 0, wbuf, srcLen);
 549         if ( dstLen != wxCONV_FAILED )
 550         {
 551             wxCharBuffer buf(dstLen);
 552             buf.data()[dstLen] = '\0';
 553             if ( FromWChar(buf.data(), dstLen, wbuf, srcLen) != wxCONV_FAILED )
 554                 return buf;
 555         }
 556     }
 557
 558     return wxScopedCharBuffer::CreateNonOwned("", 0);
 559 }
 560
 561 // ----------------------------------------------------------------------------
 562 // wxMBConvLibc
 563 // ----------------------------------------------------------------------------
 564
 565 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 566 {
 567     return wxMB2WC(buf, psz, n);
 568 }
 569
 570 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 571 {
 572     return wxWC2MB(buf, psz, n);
 573 }
 574
 575 // ----------------------------------------------------------------------------
 576 // wxConvBrokenFileNames
 577 // ----------------------------------------------------------------------------
 578
 579 #ifdef __UNIX__
 580
 581 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxString& charset)
 582 {
 583     if ( wxStricmp(charset, wxT("UTF-8")) == 0 ||
 584          wxStricmp(charset, wxT("UTF8")) == 0  )
 585         m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_PUA);
 586     else
 587         m_conv = new wxCSConv(charset);
 588 }
 589
 590 #endif // __UNIX__
 591
 592 // ----------------------------------------------------------------------------
 593 // UTF-7
 594 // ----------------------------------------------------------------------------
 595
 596 // Implementation (C) 2004 Fredrik Roubert
 597 //
 598 // Changes to work in streaming mode (C) 2008 Vadim Zeitlin
 599
 600 //
 601 // BASE64 decoding table
 602 //
 603 static const unsigned char utf7unb64[] =
 604 {
 605     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 606     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 607     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 608     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 609     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 610     0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
 611     0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
 612     0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 613     0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
 614     0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
 615     0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
 616     0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
 617     0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
 618     0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
 619     0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
 620     0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
 621     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 622     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 623     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 624     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 625     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 626     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 627     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 628     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 629     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 630     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 631     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 632     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 633     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 634     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 635     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 636     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 637 };
 638
 639 size_t wxMBConvUTF7::ToWChar(wchar_t *dst, size_t dstLen,
 640                              const char *src, size_t srcLen) const
 641 {
 642     DecoderState stateOrig,
 643                 *statePtr;
 644     if ( srcLen == wxNO_LEN )
 645     {
 646         // convert the entire string, up to and including the trailing NUL
 647         srcLen = strlen(src) + 1;
 648
 649         // when working on the entire strings we don't update nor use the shift
 650         // state from the previous call
 651         statePtr = &stateOrig;
 652     }
 653     else // when working with partial strings we do use the shift state
 654     {
 655         statePtr = const_cast<DecoderState *>(&m_stateDecoder);
 656
 657         // also save the old state to be able to rollback to it on error
 658         stateOrig = m_stateDecoder;
 659     }
 660
 661     // but to simplify the code below we use this variable in both cases
 662     DecoderState& state = *statePtr;
 663
 664
 665     // number of characters [which would have been] written to dst [if it were
 666     // not NULL]
 667     size_t len = 0;
 668
 669     const char * const srcEnd = src + srcLen;
 670
 671     while ( (src < srcEnd) && (!dst || (len < dstLen)) )
 672     {
 673         const unsigned char cc = *src++;
 674
 675         if ( state.IsShifted() )
 676         {
 677             const unsigned char dc = utf7unb64[cc];
 678             if ( dc == 0xff )
 679             {
 680                 // end of encoded part, check that nothing was left: there can
 681                 // be up to 4 bits of 0 padding but nothing else (we also need
 682                 // to check isLSB as we count bits modulo 8 while a valid UTF-7
 683                 // encoded sequence must contain an integral number of UTF-16
 684                 // characters)
 685                 if ( state.isLSB || state.bit > 4 ||
 686                         (state.accum & ((1 << state.bit) - 1)) )
 687                 {
 688                     if ( !len )
 689                         state = stateOrig;
 690
 691                     return wxCONV_FAILED;
 692                 }
 693
 694                 state.ToDirect();
 695
 696                 // re-parse this character normally below unless it's '-' which
 697                 // is consumed by the decoder
 698                 if ( cc == '-' )
 699                     continue;
 700             }
 701             else // valid encoded character
 702             {
 703                 // mini base64 decoder: each character is 6 bits
 704                 state.bit += 6;
 705                 state.accum <<= 6;
 706                 state.accum += dc;
 707
 708                 if ( state.bit >= 8 )
 709                 {
 710                     // got the full byte, consume it
 711                     state.bit -= 8;
 712                     unsigned char b = (state.accum >> state.bit) & 0x00ff;
 713
 714                     if ( state.isLSB )
 715                     {
 716                         // we've got the full word, output it
 717                         if ( dst )
 718                             *dst++ = (state.msb << 8) | b;
 719                         len++;
 720                         state.isLSB = false;
 721                     }
 722                     else // MSB
 723                     {
 724                         // just store it while we wait for LSB
 725                         state.msb = b;
 726                         state.isLSB = true;
 727                     }
 728                 }
 729             }
 730         }
 731
 732         if ( state.IsDirect() )
 733         {
 734             // start of an encoded segment?
 735             if ( cc == '+' )
 736             {
 737                 if ( *src == '-' )
 738                 {
 739                     // just the encoded plus sign, don't switch to shifted mode
 740                     if ( dst )
 741                         *dst++ = '+';
 742                     len++;
 743                     src++;
 744                 }
 745                 else if ( utf7unb64[(unsigned)*src] == 0xff )
 746                 {
 747                     // empty encoded chunks are not allowed
 748                     if ( !len )
 749                         state = stateOrig;
 750
 751                     return wxCONV_FAILED;
 752                 }
 753                 else // base-64 encoded chunk follows
 754                 {
 755                     state.ToShifted();
 756                 }
 757             }
 758             else // not '+'
 759             {
 760                 // only printable 7 bit ASCII characters (with the exception of
 761                 // NUL, TAB, CR and LF) can be used directly
 762                 if ( cc >= 0x7f || (cc < ' ' &&
 763                       !(cc == '\0' || cc == '\t' || cc == '\r' || cc == '\n')) )
 764                     return wxCONV_FAILED;
 765
 766                 if ( dst )
 767                     *dst++ = cc;
 768                 len++;
 769             }
 770         }
 771     }
 772
 773     if ( !len )
 774     {
 775         // as we didn't read any characters we should be called with the same
 776         // data (followed by some more new data) again later so don't save our
 777         // state
 778         state = stateOrig;
 779
 780         return wxCONV_FAILED;
 781     }
 782
 783     return len;
 784 }
 785
 786 //
 787 // BASE64 encoding table
 788 //
 789 static const unsigned char utf7enb64[] =
 790 {
 791     'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
 792     'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
 793     'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
 794     'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
 795     'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
 796     'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
 797     'w', 'x', 'y', 'z', '0', '1', '2', '3',
 798     '4', '5', '6', '7', '8', '9', '+', '/'
 799 };
 800
 801 //
 802 // UTF-7 encoding table
 803 //
 804 // 0 - Set D (directly encoded characters)
 805 // 1 - Set O (optional direct characters)
 806 // 2 - whitespace characters (optional)
 807 // 3 - special characters
 808 //
 809 static const unsigned char utf7encode[128] =
 810 {
 811     0, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
 812     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 813     2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
 814     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
 815     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 816     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
 817     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 818     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
 819 };
 820
 821 static inline bool wxIsUTF7Direct(wchar_t wc)
 822 {
 823     return wc < 0x80 && utf7encode[wc] < 1;
 824 }
 825
 826 size_t wxMBConvUTF7::FromWChar(char *dst, size_t dstLen,
 827                                const wchar_t *src, size_t srcLen) const
 828 {
 829     EncoderState stateOrig,
 830                 *statePtr;
 831     if ( srcLen == wxNO_LEN )
 832     {
 833         // we don't apply the stored state when operating on entire strings at
 834         // once
 835         statePtr = &stateOrig;
 836
 837         srcLen = wxWcslen(src) + 1;
 838     }
 839     else // do use the mode we left the output in previously
 840     {
 841         stateOrig = m_stateEncoder;
 842         statePtr = const_cast<EncoderState *>(&m_stateEncoder);
 843     }
 844
 845     EncoderState& state = *statePtr;
 846
 847
 848     size_t len = 0;
 849
 850     const wchar_t * const srcEnd = src + srcLen;
 851     while ( src < srcEnd && (!dst || len < dstLen) )
 852     {
 853         wchar_t cc = *src++;
 854         if ( wxIsUTF7Direct(cc) )
 855         {
 856             if ( state.IsShifted() )
 857             {
 858                 // pad with zeros the last encoded block if necessary
 859                 if ( state.bit )
 860                 {
 861                     if ( dst )
 862                         *dst++ = utf7enb64[((state.accum % 16) << (6 - state.bit)) % 64];
 863                     len++;
 864                 }
 865
 866                 state.ToDirect();
 867
 868                 if ( dst )
 869                     *dst++ = '-';
 870                 len++;
 871             }
 872
 873             if ( dst )
 874                 *dst++ = (char)cc;
 875             len++;
 876         }
 877         else if ( cc == '+' && state.IsDirect() )
 878         {
 879             if ( dst )
 880             {
 881                 *dst++ = '+';
 882                 *dst++ = '-';
 883             }
 884
 885             len += 2;
 886         }
 887 #ifndef WC_UTF16
 888         else if (((wxUint32)cc) > 0xffff)
 889         {
 890             // no surrogate pair generation (yet?)
 891             return wxCONV_FAILED;
 892         }
 893 #endif
 894         else
 895         {
 896             if ( state.IsDirect() )
 897             {
 898                 state.ToShifted();
 899
 900                 if ( dst )
 901                     *dst++ = '+';
 902                 len++;
 903             }
 904
 905             // BASE64 encode string
 906             for ( ;; )
 907             {
 908                 for ( unsigned lsb = 0; lsb < 2; lsb++ )
 909                 {
 910                     state.accum <<= 8;
 911                     state.accum += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
 912
 913                     for (state.bit += 8; state.bit >= 6; )
 914                     {
 915                         state.bit -= 6;
 916                         if ( dst )
 917                             *dst++ = utf7enb64[(state.accum >> state.bit) % 64];
 918                         len++;
 919                     }
 920                 }
 921
 922                 if ( src == srcEnd || wxIsUTF7Direct(cc = *src) )
 923                     break;
 924
 925                 src++;
 926             }
 927         }
 928     }
 929
 930     // we need to restore the original encoder state if we were called just to
 931     // calculate the amount of space needed as we will presumably be called
 932     // again to really convert the data now
 933     if ( !dst )
 934         state = stateOrig;
 935
 936     return len;
 937 }
 938
 939 // ----------------------------------------------------------------------------
 940 // UTF-8
 941 // ----------------------------------------------------------------------------
 942
 943 static const wxUint32 utf8_max[]=
 944     { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
 945
 946 // boundaries of the private use area we use to (temporarily) remap invalid
 947 // characters invalid in a UTF-8 encoded string
 948 const wxUint32 wxUnicodePUA = 0x100000;
 949 const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
 950
 951 // this table gives the length of the UTF-8 encoding from its first character:
 952 const unsigned char tableUtf8Lengths[256] = {
 953     // single-byte sequences (ASCII):
 954     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 00..0F
 955     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 10..1F
 956     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 20..2F
 957     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 30..3F
 958     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 40..4F
 959     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 50..5F
 960     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 60..6F
 961     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 70..7F
 962
 963     // these are invalid:
 964     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 80..8F
 965     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 90..9F
 966     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // A0..AF
 967     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // B0..BF
 968     0, 0,                                            // C0,C1
 969
 970     // two-byte sequences:
 971           2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,  // C2..CF
 972     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,  // D0..DF
 973
 974     // three-byte sequences:
 975     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,  // E0..EF
 976
 977     // four-byte sequences:
 978     4, 4, 4, 4, 4,                                   // F0..F4
 979
 980     // these are invalid again (5- or 6-byte
 981     // sequences and sequences for code points
 982     // above U+10FFFF, as restricted by RFC 3629):
 983                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0   // F5..FF
 984 };
 985
 986 size_t
 987 wxMBConvStrictUTF8::ToWChar(wchar_t *dst, size_t dstLen,
 988                             const char *src, size_t srcLen) const
 989 {
 990     wchar_t *out = dstLen ? dst : NULL;
 991     size_t written = 0;
 992
 993     if ( srcLen == wxNO_LEN )
 994         srcLen = strlen(src) + 1;
 995
 996     for ( const char *p = src; ; p++ )
 997     {
 998         if ( (srcLen == wxNO_LEN ? !*p : !srcLen) )
 999         {
1000             // all done successfully, just add the trailing NULL if we are not
1001             // using explicit length
1002             if ( srcLen == wxNO_LEN )
1003             {
1004                 if ( out )
1005                 {
1006                     if ( !dstLen )
1007                         break;
1008
1009                     *out = L'\0';
1010                 }
1011
1012                 written++;
1013             }
1014
1015             return written;
1016         }
1017
1018         if ( out && !dstLen-- )
1019             break;
1020
1021         wxUint32 code;
1022         unsigned char c = *p;
1023
1024         if ( c < 0x80 )
1025         {
1026             if ( srcLen == 0 ) // the test works for wxNO_LEN too
1027                 break;
1028
1029             if ( srcLen != wxNO_LEN )
1030                 srcLen--;
1031
1032             code = c;
1033         }
1034         else
1035         {
1036             unsigned len = tableUtf8Lengths[c];
1037             if ( !len )
1038                 break;
1039
1040             if ( srcLen < len ) // the test works for wxNO_LEN too
1041                 break;
1042
1043             if ( srcLen != wxNO_LEN )
1044                 srcLen -= len;
1045
1046             //   Char. number range   |        UTF-8 octet sequence
1047             //      (hexadecimal)     |              (binary)
1048             //  ----------------------+----------------------------------------
1049             //  0000 0000 - 0000 007F | 0xxxxxxx
1050             //  0000 0080 - 0000 07FF | 110xxxxx 10xxxxxx
1051             //  0000 0800 - 0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
1052             //  0001 0000 - 0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
1053             //
1054             //  Code point value is stored in bits marked with 'x',
1055             //  lowest-order bit of the value on the right side in the diagram
1056             //  above.                                         (from RFC 3629)
1057
1058             // mask to extract lead byte's value ('x' bits above), by sequence
1059             // length:
1060             static const unsigned char leadValueMask[] = { 0x7F, 0x1F, 0x0F, 0x07 };
1061
1062             // mask and value of lead byte's most significant bits, by length:
1063             static const unsigned char leadMarkerMask[] = { 0x80, 0xE0, 0xF0, 0xF8 };
1064             static const unsigned char leadMarkerVal[] = { 0x00, 0xC0, 0xE0, 0xF0 };
1065
1066             len--; // it's more convenient to work with 0-based length here
1067
1068             // extract the lead byte's value bits:
1069             if ( (c & leadMarkerMask[len]) != leadMarkerVal[len] )
1070                 break;
1071
1072             code = c & leadValueMask[len];
1073
1074             // all remaining bytes, if any, are handled in the same way
1075             // regardless of sequence's length:
1076             for ( ; len; --len )
1077             {
1078                 c = *++p;
1079                 if ( (c & 0xC0) != 0x80 )
1080                     return wxCONV_FAILED;
1081
1082                 code <<= 6;
1083                 code |= c & 0x3F;
1084             }
1085         }
1086
1087 #ifdef WC_UTF16
1088         // cast is ok because wchar_t == wxUint16 if WC_UTF16
1089         if ( encode_utf16(code, (wxUint16 *)out) == 2 )
1090         {
1091             if ( out )
1092                 out++;
1093             written++;
1094         }
1095 #else // !WC_UTF16
1096         if ( out )
1097             *out = code;
1098 #endif // WC_UTF16/!WC_UTF16
1099
1100         if ( out )
1101             out++;
1102
1103         written++;
1104     }
1105
1106     return wxCONV_FAILED;
1107 }
1108
1109 size_t
1110 wxMBConvStrictUTF8::FromWChar(char *dst, size_t dstLen,
1111                               const wchar_t *src, size_t srcLen) const
1112 {
1113     char *out = dstLen ? dst : NULL;
1114     size_t written = 0;
1115
1116     for ( const wchar_t *wp = src; ; wp++ )
1117     {
1118         if ( (srcLen == wxNO_LEN ? !*wp : !srcLen) )
1119         {
1120             // all done successfully, just add the trailing NULL if we are not
1121             // using explicit length
1122             if ( srcLen == wxNO_LEN )
1123             {
1124                 if ( out )
1125                 {
1126                     if ( !dstLen )
1127                         break;
1128
1129                     *out = '\0';
1130                 }
1131
1132                 written++;
1133             }
1134
1135             return written;
1136         }
1137
1138         if ( srcLen != wxNO_LEN )
1139             srcLen--;
1140
1141         wxUint32 code;
1142 #ifdef WC_UTF16
1143         // cast is ok for WC_UTF16
1144         if ( decode_utf16((const wxUint16 *)wp, code) == 2 )
1145         {
1146             // skip the next char too as we decoded a surrogate
1147             wp++;
1148         }
1149 #else // wchar_t is UTF-32
1150         code = *wp & 0x7fffffff;
1151 #endif
1152
1153         unsigned len;
1154         if ( code <= 0x7F )
1155         {
1156             len = 1;
1157             if ( out )
1158             {
1159                 if ( dstLen < len )
1160                     break;
1161
1162                 out[0] = (char)code;
1163             }
1164         }
1165         else if ( code <= 0x07FF )
1166         {
1167             len = 2;
1168             if ( out )
1169             {
1170                 if ( dstLen < len )
1171                     break;
1172
1173                 // NB: this line takes 6 least significant bits, encodes them as
1174                 // 10xxxxxx and discards them so that the next byte can be encoded:
1175                 out[1] = 0x80 | (code & 0x3F);  code >>= 6;
1176                 out[0] = 0xC0 | code;
1177             }
1178         }
1179         else if ( code < 0xFFFF )
1180         {
1181             len = 3;
1182             if ( out )
1183             {
1184                 if ( dstLen < len )
1185                     break;
1186
1187                 out[2] = 0x80 | (code & 0x3F);  code >>= 6;
1188                 out[1] = 0x80 | (code & 0x3F);  code >>= 6;
1189                 out[0] = 0xE0 | code;
1190             }
1191         }
1192         else if ( code <= 0x10FFFF )
1193         {
1194             len = 4;
1195             if ( out )
1196             {
1197                 if ( dstLen < len )
1198                     break;
1199
1200                 out[3] = 0x80 | (code & 0x3F);  code >>= 6;
1201                 out[2] = 0x80 | (code & 0x3F);  code >>= 6;
1202                 out[1] = 0x80 | (code & 0x3F);  code >>= 6;
1203                 out[0] = 0xF0 | code;
1204             }
1205         }
1206         else
1207         {
1208             wxFAIL_MSG( wxT("trying to encode undefined Unicode character") );
1209             break;
1210         }
1211
1212         if ( out )
1213         {
1214             out += len;
1215             dstLen -= len;
1216         }
1217
1218         written += len;
1219     }
1220
1221     // we only get here if an error occurs during decoding
1222     return wxCONV_FAILED;
1223 }
1224
1225 size_t wxMBConvUTF8::ToWChar(wchar_t *buf, size_t n,
1226                              const char *psz, size_t srcLen) const
1227 {
1228     if ( m_options == MAP_INVALID_UTF8_NOT )
1229         return wxMBConvStrictUTF8::ToWChar(buf, n, psz, srcLen);
1230
1231     size_t len = 0;
1232
1233     // The length can be either given explicitly or computed implicitly for the
1234     // NUL-terminated strings.
1235     const bool isNulTerminated = srcLen == wxNO_LEN;
1236     while ((isNulTerminated ? *psz : srcLen--) && ((!buf) || (len < n)))
1237     {
1238         const char *opsz = psz;
1239         bool invalid = false;
1240         unsigned char cc = *psz++, fc = cc;
1241         unsigned cnt;
1242         for (cnt = 0; fc & 0x80; cnt++)
1243             fc <<= 1;
1244
1245         if (!cnt)
1246         {
1247             // plain ASCII char
1248             if (buf)
1249                 *buf++ = cc;
1250             len++;
1251
1252             // escape the escape character for octal escapes
1253             if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
1254                     && cc == '\\' && (!buf || len < n))
1255             {
1256                 if (buf)
1257                     *buf++ = cc;
1258                 len++;
1259             }
1260         }
1261         else
1262         {
1263             cnt--;
1264             if (!cnt)
1265             {
1266                 // invalid UTF-8 sequence
1267                 invalid = true;
1268             }
1269             else
1270             {
1271                 unsigned ocnt = cnt - 1;
1272                 wxUint32 res = cc & (0x3f >> cnt);
1273                 while (cnt--)
1274                 {
1275                     cc = *psz;
1276                     if ((cc & 0xC0) != 0x80)
1277                     {
1278                         // invalid UTF-8 sequence
1279                         invalid = true;
1280                         break;
1281                     }
1282
1283                     psz++;
1284                     res = (res << 6) | (cc & 0x3f);
1285                 }
1286
1287                 if (invalid || res <= utf8_max[ocnt])
1288                 {
1289                     // illegal UTF-8 encoding
1290                     invalid = true;
1291                 }
1292                 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
1293                         res >= wxUnicodePUA && res < wxUnicodePUAEnd)
1294                 {
1295                     // if one of our PUA characters turns up externally
1296                     // it must also be treated as an illegal sequence
1297                     // (a bit like you have to escape an escape character)
1298                     invalid = true;
1299                 }
1300                 else
1301                 {
1302 #ifdef WC_UTF16
1303                     // cast is ok because wchar_t == wxUint16 if WC_UTF16
1304                     size_t pa = encode_utf16(res, (wxUint16 *)buf);
1305                     if (pa == wxCONV_FAILED)
1306                     {
1307                         invalid = true;
1308                     }
1309                     else
1310                     {
1311                         if (buf)
1312                             buf += pa;
1313                         len += pa;
1314                     }
1315 #else // !WC_UTF16
1316                     if (buf)
1317                         *buf++ = (wchar_t)res;
1318                     len++;
1319 #endif // WC_UTF16/!WC_UTF16
1320                 }
1321             }
1322
1323             if (invalid)
1324             {
1325                 if (m_options & MAP_INVALID_UTF8_TO_PUA)
1326                 {
1327                     while (opsz < psz && (!buf || len < n))
1328                     {
1329 #ifdef WC_UTF16
1330                         // cast is ok because wchar_t == wxUuint16 if WC_UTF16
1331                         size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
1332                         wxASSERT(pa != wxCONV_FAILED);
1333                         if (buf)
1334                             buf += pa;
1335                         opsz++;
1336                         len += pa;
1337 #else
1338                         if (buf)
1339                             *buf++ = (wchar_t)(wxUnicodePUA + (unsigned char)*opsz);
1340                         opsz++;
1341                         len++;
1342 #endif
1343                     }
1344                 }
1345                 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
1346                 {
1347                     while (opsz < psz && (!buf || len < n))
1348                     {
1349                         if ( buf && len + 3 < n )
1350                         {
1351                             unsigned char on = *opsz;
1352                             *buf++ = L'\\';
1353                             *buf++ = (wchar_t)( L'0' + on / 0100 );
1354                             *buf++ = (wchar_t)( L'0' + (on % 0100) / 010 );
1355                             *buf++ = (wchar_t)( L'0' + on % 010 );
1356                         }
1357
1358                         opsz++;
1359                         len += 4;
1360                     }
1361                 }
1362                 else // MAP_INVALID_UTF8_NOT
1363                 {
1364                     return wxCONV_FAILED;
1365                 }
1366             }
1367         }
1368     }
1369
1370     if ( isNulTerminated )
1371     {
1372         // Add the trailing NUL in this case if we have a large enough buffer.
1373         if ( buf && (len < n) )
1374             *buf = 0;
1375
1376         // And count it in any case.
1377         len++;
1378     }
1379
1380     return len;
1381 }
1382
1383 static inline bool isoctal(wchar_t wch)
1384 {
1385     return L'0' <= wch && wch <= L'7';
1386 }
1387
1388 size_t wxMBConvUTF8::FromWChar(char *buf, size_t n,
1389                                const wchar_t *psz, size_t srcLen) const
1390 {
1391     if ( m_options == MAP_INVALID_UTF8_NOT )
1392         return wxMBConvStrictUTF8::FromWChar(buf, n, psz, srcLen);
1393
1394     size_t len = 0;
1395
1396     while ((srcLen == wxNO_LEN ? *psz : srcLen--) && ((!buf) || (len < n)))
1397     {
1398         wxUint32 cc;
1399
1400 #ifdef WC_UTF16
1401         // cast is ok for WC_UTF16
1402         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
1403         psz += (pa == wxCONV_FAILED) ? 1 : pa;
1404 #else
1405         cc = (*psz++) & 0x7fffffff;
1406 #endif
1407
1408         if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
1409                 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
1410         {
1411             if (buf)
1412                 *buf++ = (char)(cc - wxUnicodePUA);
1413             len++;
1414         }
1415         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
1416                     && cc == L'\\' && psz[0] == L'\\' )
1417         {
1418             if (buf)
1419                 *buf++ = (char)cc;
1420             psz++;
1421             len++;
1422         }
1423         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
1424                     cc == L'\\' &&
1425                         isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
1426         {
1427             if (buf)
1428             {
1429                 *buf++ = (char) ((psz[0] - L'0') * 0100 +
1430                                  (psz[1] - L'0') * 010 +
1431                                  (psz[2] - L'0'));
1432             }
1433
1434             psz += 3;
1435             len++;
1436         }
1437         else
1438         {
1439             unsigned cnt;
1440             for (cnt = 0; cc > utf8_max[cnt]; cnt++)
1441             {
1442             }
1443
1444             if (!cnt)
1445             {
1446                 // plain ASCII char
1447                 if (buf)
1448                     *buf++ = (char) cc;
1449                 len++;
1450             }
1451             else
1452             {
1453                 len += cnt + 1;
1454                 if (buf)
1455                 {
1456                     *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
1457                     while (cnt--)
1458                         *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
1459                 }
1460             }
1461         }
1462     }
1463
1464     if (srcLen == wxNO_LEN && buf && (len < n))
1465         *buf = 0;
1466
1467     return len + 1;
1468 }
1469
1470 // ============================================================================
1471 // UTF-16
1472 // ============================================================================
1473
1474 #ifdef WORDS_BIGENDIAN
1475     #define wxMBConvUTF16straight wxMBConvUTF16BE
1476     #define wxMBConvUTF16swap     wxMBConvUTF16LE
1477 #else
1478     #define wxMBConvUTF16swap     wxMBConvUTF16BE
1479     #define wxMBConvUTF16straight wxMBConvUTF16LE
1480 #endif
1481
1482 /* static */
1483 size_t wxMBConvUTF16Base::GetLength(const char *src, size_t srcLen)
1484 {
1485     if ( srcLen == wxNO_LEN )
1486     {
1487         // count the number of bytes in input, including the trailing NULs
1488         const wxUint16 *inBuff = reinterpret_cast<const wxUint16 *>(src);
1489         for ( srcLen = 1; *inBuff++; srcLen++ )
1490             ;
1491
1492         srcLen *= BYTES_PER_CHAR;
1493     }
1494     else // we already have the length
1495     {
1496         // we can only convert an entire number of UTF-16 characters
1497         if ( srcLen % BYTES_PER_CHAR )
1498             return wxCONV_FAILED;
1499     }
1500
1501     return srcLen;
1502 }
1503
1504 // case when in-memory representation is UTF-16 too
1505 #ifdef WC_UTF16
1506
1507 // ----------------------------------------------------------------------------
1508 // conversions without endianness change
1509 // ----------------------------------------------------------------------------
1510
1511 size_t
1512 wxMBConvUTF16straight::ToWChar(wchar_t *dst, size_t dstLen,
1513                                const char *src, size_t srcLen) const
1514 {
1515     // set up the scene for using memcpy() (which is presumably more efficient
1516     // than copying the bytes one by one)
1517     srcLen = GetLength(src, srcLen);
1518     if ( srcLen == wxNO_LEN )
1519         return wxCONV_FAILED;
1520
1521     const size_t inLen = srcLen / BYTES_PER_CHAR;
1522     if ( dst )
1523     {
1524         if ( dstLen < inLen )
1525             return wxCONV_FAILED;
1526
1527         memcpy(dst, src, srcLen);
1528     }
1529
1530     return inLen;
1531 }
1532
1533 size_t
1534 wxMBConvUTF16straight::FromWChar(char *dst, size_t dstLen,
1535                                  const wchar_t *src, size_t srcLen) const
1536 {
1537     if ( srcLen == wxNO_LEN )
1538         srcLen = wxWcslen(src) + 1;
1539
1540     srcLen *= BYTES_PER_CHAR;
1541
1542     if ( dst )
1543     {
1544         if ( dstLen < srcLen )
1545             return wxCONV_FAILED;
1546
1547         memcpy(dst, src, srcLen);
1548     }
1549
1550     return srcLen;
1551 }
1552
1553 // ----------------------------------------------------------------------------
1554 // endian-reversing conversions
1555 // ----------------------------------------------------------------------------
1556
1557 size_t
1558 wxMBConvUTF16swap::ToWChar(wchar_t *dst, size_t dstLen,
1559                            const char *src, size_t srcLen) const
1560 {
1561     srcLen = GetLength(src, srcLen);
1562     if ( srcLen == wxNO_LEN )
1563         return wxCONV_FAILED;
1564
1565     srcLen /= BYTES_PER_CHAR;
1566
1567     if ( dst )
1568     {
1569         if ( dstLen < srcLen )
1570             return wxCONV_FAILED;
1571
1572         const wxUint16 *inBuff = reinterpret_cast<const wxUint16 *>(src);
1573         for ( size_t n = 0; n < srcLen; n++, inBuff++ )
1574         {
1575             *dst++ = wxUINT16_SWAP_ALWAYS(*inBuff);
1576         }
1577     }
1578
1579     return srcLen;
1580 }
1581
1582 size_t
1583 wxMBConvUTF16swap::FromWChar(char *dst, size_t dstLen,
1584                              const wchar_t *src, size_t srcLen) const
1585 {
1586     if ( srcLen == wxNO_LEN )
1587         srcLen = wxWcslen(src) + 1;
1588
1589     srcLen *= BYTES_PER_CHAR;
1590
1591     if ( dst )
1592     {
1593         if ( dstLen < srcLen )
1594             return wxCONV_FAILED;
1595
1596         wxUint16 *outBuff = reinterpret_cast<wxUint16 *>(dst);
1597         for ( size_t n = 0; n < srcLen; n += BYTES_PER_CHAR, src++ )
1598         {
1599             *outBuff++ = wxUINT16_SWAP_ALWAYS(*src);
1600         }
1601     }
1602
1603     return srcLen;
1604 }
1605
1606 #else // !WC_UTF16: wchar_t is UTF-32
1607
1608 // ----------------------------------------------------------------------------
1609 // conversions without endianness change
1610 // ----------------------------------------------------------------------------
1611
1612 size_t
1613 wxMBConvUTF16straight::ToWChar(wchar_t *dst, size_t dstLen,
1614                                const char *src, size_t srcLen) const
1615 {
1616     srcLen = GetLength(src, srcLen);
1617     if ( srcLen == wxNO_LEN )
1618         return wxCONV_FAILED;
1619
1620     const size_t inLen = srcLen / BYTES_PER_CHAR;
1621     if ( !dst )
1622     {
1623         // optimization: return maximal space which could be needed for this
1624         // string even if the real size could be smaller if the buffer contains
1625         // any surrogates
1626         return inLen;
1627     }
1628
1629     size_t outLen = 0;
1630     const wxUint16 *inBuff = reinterpret_cast<const wxUint16 *>(src);
1631     for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; )
1632     {
1633         const wxUint32 ch = wxDecodeSurrogate(&inBuff);
1634         if ( !inBuff )
1635             return wxCONV_FAILED;
1636
1637         if ( ++outLen > dstLen )
1638             return wxCONV_FAILED;
1639
1640         *dst++ = ch;
1641     }
1642
1643
1644     return outLen;
1645 }
1646
1647 size_t
1648 wxMBConvUTF16straight::FromWChar(char *dst, size_t dstLen,
1649                                  const wchar_t *src, size_t srcLen) const
1650 {
1651     if ( srcLen == wxNO_LEN )
1652         srcLen = wxWcslen(src) + 1;
1653
1654     size_t outLen = 0;
1655     wxUint16 *outBuff = reinterpret_cast<wxUint16 *>(dst);
1656     for ( size_t n = 0; n < srcLen; n++ )
1657     {
1658         wxUint16 cc[2] = { 0 };
1659         const size_t numChars = encode_utf16(*src++, cc);
1660         if ( numChars == wxCONV_FAILED )
1661             return wxCONV_FAILED;
1662
1663         outLen += numChars * BYTES_PER_CHAR;
1664         if ( outBuff )
1665         {
1666             if ( outLen > dstLen )
1667                 return wxCONV_FAILED;
1668
1669             *outBuff++ = cc[0];
1670             if ( numChars == 2 )
1671             {
1672                 // second character of a surrogate
1673                 *outBuff++ = cc[1];
1674             }
1675         }
1676     }
1677
1678     return outLen;
1679 }
1680
1681 // ----------------------------------------------------------------------------
1682 // endian-reversing conversions
1683 // ----------------------------------------------------------------------------
1684
1685 size_t
1686 wxMBConvUTF16swap::ToWChar(wchar_t *dst, size_t dstLen,
1687                            const char *src, size_t srcLen) const
1688 {
1689     srcLen = GetLength(src, srcLen);
1690     if ( srcLen == wxNO_LEN )
1691         return wxCONV_FAILED;
1692
1693     const size_t inLen = srcLen / BYTES_PER_CHAR;
1694     if ( !dst )
1695     {
1696         // optimization: return maximal space which could be needed for this
1697         // string even if the real size could be smaller if the buffer contains
1698         // any surrogates
1699         return inLen;
1700     }
1701
1702     size_t outLen = 0;
1703     const wxUint16 *inBuff = reinterpret_cast<const wxUint16 *>(src);
1704     for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; )
1705     {
1706         wxUint32 ch;
1707         wxUint16 tmp[2];
1708
1709         tmp[0] = wxUINT16_SWAP_ALWAYS(*inBuff);
1710         inBuff++;
1711         tmp[1] = wxUINT16_SWAP_ALWAYS(*inBuff);
1712
1713         const size_t numChars = decode_utf16(tmp, ch);
1714         if ( numChars == wxCONV_FAILED )
1715             return wxCONV_FAILED;
1716
1717         if ( numChars == 2 )
1718             inBuff++;
1719
1720         if ( ++outLen > dstLen )
1721             return wxCONV_FAILED;
1722
1723         *dst++ = ch;
1724     }
1725
1726
1727     return outLen;
1728 }
1729
1730 size_t
1731 wxMBConvUTF16swap::FromWChar(char *dst, size_t dstLen,
1732                              const wchar_t *src, size_t srcLen) const
1733 {
1734     if ( srcLen == wxNO_LEN )
1735         srcLen = wxWcslen(src) + 1;
1736
1737     size_t outLen = 0;
1738     wxUint16 *outBuff = reinterpret_cast<wxUint16 *>(dst);
1739     for ( const wchar_t *srcEnd = src + srcLen; src < srcEnd; src++ )
1740     {
1741         wxUint16 cc[2] = { 0 };
1742         const size_t numChars = encode_utf16(*src, cc);
1743         if ( numChars == wxCONV_FAILED )
1744             return wxCONV_FAILED;
1745
1746         outLen += numChars * BYTES_PER_CHAR;
1747         if ( outBuff )
1748         {
1749             if ( outLen > dstLen )
1750                 return wxCONV_FAILED;
1751
1752             *outBuff++ = wxUINT16_SWAP_ALWAYS(cc[0]);
1753             if ( numChars == 2 )
1754             {
1755                 // second character of a surrogate
1756                 *outBuff++ = wxUINT16_SWAP_ALWAYS(cc[1]);
1757             }
1758         }
1759     }
1760
1761     return outLen;
1762 }
1763
1764 #endif // WC_UTF16/!WC_UTF16
1765
1766
1767 // ============================================================================
1768 // UTF-32
1769 // ============================================================================
1770
1771 #ifdef WORDS_BIGENDIAN
1772     #define wxMBConvUTF32straight  wxMBConvUTF32BE
1773     #define wxMBConvUTF32swap      wxMBConvUTF32LE
1774 #else
1775     #define wxMBConvUTF32swap      wxMBConvUTF32BE
1776     #define wxMBConvUTF32straight  wxMBConvUTF32LE
1777 #endif
1778
1779
1780 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1781 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1782
1783 /* static */
1784 size_t wxMBConvUTF32Base::GetLength(const char *src, size_t srcLen)
1785 {
1786     if ( srcLen == wxNO_LEN )
1787     {
1788         // count the number of bytes in input, including the trailing NULs
1789         const wxUint32 *inBuff = reinterpret_cast<const wxUint32 *>(src);
1790         for ( srcLen = 1; *inBuff++; srcLen++ )
1791             ;
1792
1793         srcLen *= BYTES_PER_CHAR;
1794     }
1795     else // we already have the length
1796     {
1797         // we can only convert an entire number of UTF-32 characters
1798         if ( srcLen % BYTES_PER_CHAR )
1799             return wxCONV_FAILED;
1800     }
1801
1802     return srcLen;
1803 }
1804
1805 // case when in-memory representation is UTF-16
1806 #ifdef WC_UTF16
1807
1808 // ----------------------------------------------------------------------------
1809 // conversions without endianness change
1810 // ----------------------------------------------------------------------------
1811
1812 size_t
1813 wxMBConvUTF32straight::ToWChar(wchar_t *dst, size_t dstLen,
1814                                const char *src, size_t srcLen) const
1815 {
1816     srcLen = GetLength(src, srcLen);
1817     if ( srcLen == wxNO_LEN )
1818         return wxCONV_FAILED;
1819
1820     const wxUint32 *inBuff = reinterpret_cast<const wxUint32 *>(src);
1821     const size_t inLen = srcLen / BYTES_PER_CHAR;
1822     size_t outLen = 0;
1823     for ( size_t n = 0; n < inLen; n++ )
1824     {
1825         wxUint16 cc[2] = { 0 };
1826         const size_t numChars = encode_utf16(*inBuff++, cc);
1827         if ( numChars == wxCONV_FAILED )
1828             return wxCONV_FAILED;
1829
1830         outLen += numChars;
1831         if ( dst )
1832         {
1833             if ( outLen > dstLen )
1834                 return wxCONV_FAILED;
1835
1836             *dst++ = cc[0];
1837             if ( numChars == 2 )
1838             {
1839                 // second character of a surrogate
1840                 *dst++ = cc[1];
1841             }
1842         }
1843     }
1844
1845     return outLen;
1846 }
1847
1848 size_t
1849 wxMBConvUTF32straight::FromWChar(char *dst, size_t dstLen,
1850                                  const wchar_t *src, size_t srcLen) const
1851 {
1852     if ( srcLen == wxNO_LEN )
1853         srcLen = wxWcslen(src) + 1;
1854
1855     if ( !dst )
1856     {
1857         // optimization: return maximal space which could be needed for this
1858         // string instead of the exact amount which could be less if there are
1859         // any surrogates in the input
1860         //
1861         // we consider that surrogates are rare enough to make it worthwhile to
1862         // avoid running the loop below at the cost of slightly extra memory
1863         // consumption
1864         return srcLen * BYTES_PER_CHAR;
1865     }
1866
1867     wxUint32 *outBuff = reinterpret_cast<wxUint32 *>(dst);
1868     size_t outLen = 0;
1869     for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; )
1870     {
1871         const wxUint32 ch = wxDecodeSurrogate(&src);
1872         if ( !src )
1873             return wxCONV_FAILED;
1874
1875         outLen += BYTES_PER_CHAR;
1876
1877         if ( outLen > dstLen )
1878             return wxCONV_FAILED;
1879
1880         *outBuff++ = ch;
1881     }
1882
1883     return outLen;
1884 }
1885
1886 // ----------------------------------------------------------------------------
1887 // endian-reversing conversions
1888 // ----------------------------------------------------------------------------
1889
1890 size_t
1891 wxMBConvUTF32swap::ToWChar(wchar_t *dst, size_t dstLen,
1892                            const char *src, size_t srcLen) const
1893 {
1894     srcLen = GetLength(src, srcLen);
1895     if ( srcLen == wxNO_LEN )
1896         return wxCONV_FAILED;
1897
1898     const wxUint32 *inBuff = reinterpret_cast<const wxUint32 *>(src);
1899     const size_t inLen = srcLen / BYTES_PER_CHAR;
1900     size_t outLen = 0;
1901     for ( size_t n = 0; n < inLen; n++, inBuff++ )
1902     {
1903         wxUint16 cc[2] = { 0 };
1904         const size_t numChars = encode_utf16(wxUINT32_SWAP_ALWAYS(*inBuff), cc);
1905         if ( numChars == wxCONV_FAILED )
1906             return wxCONV_FAILED;
1907
1908         outLen += numChars;
1909         if ( dst )
1910         {
1911             if ( outLen > dstLen )
1912                 return wxCONV_FAILED;
1913
1914             *dst++ = cc[0];
1915             if ( numChars == 2 )
1916             {
1917                 // second character of a surrogate
1918                 *dst++ = cc[1];
1919             }
1920         }
1921     }
1922
1923     return outLen;
1924 }
1925
1926 size_t
1927 wxMBConvUTF32swap::FromWChar(char *dst, size_t dstLen,
1928                              const wchar_t *src, size_t srcLen) const
1929 {
1930     if ( srcLen == wxNO_LEN )
1931         srcLen = wxWcslen(src) + 1;
1932
1933     if ( !dst )
1934     {
1935         // optimization: return maximal space which could be needed for this
1936         // string instead of the exact amount which could be less if there are
1937         // any surrogates in the input
1938         //
1939         // we consider that surrogates are rare enough to make it worthwhile to
1940         // avoid running the loop below at the cost of slightly extra memory
1941         // consumption
1942         return srcLen*BYTES_PER_CHAR;
1943     }
1944
1945     wxUint32 *outBuff = reinterpret_cast<wxUint32 *>(dst);
1946     size_t outLen = 0;
1947     for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; )
1948     {
1949         const wxUint32 ch = wxDecodeSurrogate(&src);
1950         if ( !src )
1951             return wxCONV_FAILED;
1952
1953         outLen += BYTES_PER_CHAR;
1954
1955         if ( outLen > dstLen )
1956             return wxCONV_FAILED;
1957
1958         *outBuff++ = wxUINT32_SWAP_ALWAYS(ch);
1959     }
1960
1961     return outLen;
1962 }
1963
1964 #else // !WC_UTF16: wchar_t is UTF-32
1965
1966 // ----------------------------------------------------------------------------
1967 // conversions without endianness change
1968 // ----------------------------------------------------------------------------
1969
1970 size_t
1971 wxMBConvUTF32straight::ToWChar(wchar_t *dst, size_t dstLen,
1972                                const char *src, size_t srcLen) const
1973 {
1974     // use memcpy() as it should be much faster than hand-written loop
1975     srcLen = GetLength(src, srcLen);
1976     if ( srcLen == wxNO_LEN )
1977         return wxCONV_FAILED;
1978
1979     const size_t inLen = srcLen/BYTES_PER_CHAR;
1980     if ( dst )
1981     {
1982         if ( dstLen < inLen )
1983             return wxCONV_FAILED;
1984
1985         memcpy(dst, src, srcLen);
1986     }
1987
1988     return inLen;
1989 }
1990
1991 size_t
1992 wxMBConvUTF32straight::FromWChar(char *dst, size_t dstLen,
1993                                  const wchar_t *src, size_t srcLen) const
1994 {
1995     if ( srcLen == wxNO_LEN )
1996         srcLen = wxWcslen(src) + 1;
1997
1998     srcLen *= BYTES_PER_CHAR;
1999
2000     if ( dst )
2001     {
2002         if ( dstLen < srcLen )
2003             return wxCONV_FAILED;
2004
2005         memcpy(dst, src, srcLen);
2006     }
2007
2008     return srcLen;
2009 }
2010
2011 // ----------------------------------------------------------------------------
2012 // endian-reversing conversions
2013 // ----------------------------------------------------------------------------
2014
2015 size_t
2016 wxMBConvUTF32swap::ToWChar(wchar_t *dst, size_t dstLen,
2017                            const char *src, size_t srcLen) const
2018 {
2019     srcLen = GetLength(src, srcLen);
2020     if ( srcLen == wxNO_LEN )
2021         return wxCONV_FAILED;
2022
2023     srcLen /= BYTES_PER_CHAR;
2024
2025     if ( dst )
2026     {
2027         if ( dstLen < srcLen )
2028             return wxCONV_FAILED;
2029
2030         const wxUint32 *inBuff = reinterpret_cast<const wxUint32 *>(src);
2031         for ( size_t n = 0; n < srcLen; n++, inBuff++ )
2032         {
2033             *dst++ = wxUINT32_SWAP_ALWAYS(*inBuff);
2034         }
2035     }
2036
2037     return srcLen;
2038 }
2039
2040 size_t
2041 wxMBConvUTF32swap::FromWChar(char *dst, size_t dstLen,
2042                              const wchar_t *src, size_t srcLen) const
2043 {
2044     if ( srcLen == wxNO_LEN )
2045         srcLen = wxWcslen(src) + 1;
2046
2047     srcLen *= BYTES_PER_CHAR;
2048
2049     if ( dst )
2050     {
2051         if ( dstLen < srcLen )
2052             return wxCONV_FAILED;
2053
2054         wxUint32 *outBuff = reinterpret_cast<wxUint32 *>(dst);
2055         for ( size_t n = 0; n < srcLen; n += BYTES_PER_CHAR, src++ )
2056         {
2057             *outBuff++ = wxUINT32_SWAP_ALWAYS(*src);
2058         }
2059     }
2060
2061     return srcLen;
2062 }
2063
2064 #endif // WC_UTF16/!WC_UTF16
2065
2066
2067 // ============================================================================
2068 // The classes doing conversion using the iconv_xxx() functions
2069 // ============================================================================
2070
2071 #ifdef HAVE_ICONV
2072
2073 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
2074 //     E2BIG if output buffer is _exactly_ as big as needed. Such case is
2075 //     (unless there's yet another bug in glibc) the only case when iconv()
2076 //     returns with (size_t)-1 (which means error) and says there are 0 bytes
2077 //     left in the input buffer -- when _real_ error occurs,
2078 //     bytes-left-in-input buffer is non-zero. Hence, this alternative test for
2079 //     iconv() failure.
2080 //     [This bug does not appear in glibc 2.2.]
2081 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
2082 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
2083                                      (errno != E2BIG || bufLeft != 0))
2084 #else
2085 #define ICONV_FAILED(cres, bufLeft)  (cres == (size_t)-1)
2086 #endif
2087
2088 #define ICONV_CHAR_CAST(x)  ((ICONV_CONST char **)(x))
2089
2090 #define ICONV_T_INVALID ((iconv_t)-1)
2091
2092 #if SIZEOF_WCHAR_T == 4
2093     #define WC_BSWAP    wxUINT32_SWAP_ALWAYS
2094     #define WC_ENC      wxFONTENCODING_UTF32
2095 #elif SIZEOF_WCHAR_T == 2
2096     #define WC_BSWAP    wxUINT16_SWAP_ALWAYS
2097     #define WC_ENC      wxFONTENCODING_UTF16
2098 #else // sizeof(wchar_t) != 2 nor 4
2099     // does this ever happen?
2100     #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
2101 #endif
2102
2103 // ----------------------------------------------------------------------------
2104 // wxMBConv_iconv: encapsulates an iconv character set
2105 // ----------------------------------------------------------------------------
2106
2107 class wxMBConv_iconv : public wxMBConv
2108 {
2109 public:
2110     wxMBConv_iconv(const char *name);
2111     virtual ~wxMBConv_iconv();
2112
2113     // implement base class virtual methods
2114     virtual size_t ToWChar(wchar_t *dst, size_t dstLen,
2115                            const char *src, size_t srcLen = wxNO_LEN) const;
2116     virtual size_t FromWChar(char *dst, size_t dstLen,
2117                              const wchar_t *src, size_t srcLen = wxNO_LEN) const;
2118     virtual size_t GetMBNulLen() const;
2119
2120 #if wxUSE_UNICODE_UTF8
2121     virtual bool IsUTF8() const;
2122 #endif
2123
2124     virtual wxMBConv *Clone() const
2125     {
2126         wxMBConv_iconv *p = new wxMBConv_iconv(m_name);
2127         p->m_minMBCharWidth = m_minMBCharWidth;
2128         return p;
2129     }
2130
2131     bool IsOk() const
2132         { return (m2w != ICONV_T_INVALID) && (w2m != ICONV_T_INVALID); }
2133
2134 protected:
2135     // the iconv handlers used to translate from multibyte
2136     // to wide char and in the other direction
2137     iconv_t m2w,
2138             w2m;
2139
2140 #if wxUSE_THREADS
2141     // guards access to m2w and w2m objects
2142     wxMutex m_iconvMutex;
2143 #endif
2144
2145 private:
2146     // the name (for iconv_open()) of a wide char charset -- if none is
2147     // available on this machine, it will remain NULL
2148     static wxString ms_wcCharsetName;
2149
2150     // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
2151     // different endian-ness than the native one
2152     static bool ms_wcNeedsSwap;
2153
2154
2155     // name of the encoding handled by this conversion
2156     const char *m_name;
2157
2158     // cached result of GetMBNulLen(); set to 0 meaning "unknown"
2159     // initially
2160     size_t m_minMBCharWidth;
2161 };
2162
2163 // make the constructor available for unit testing
2164 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const char* name )
2165 {
2166     wxMBConv_iconv* result = new wxMBConv_iconv( name );
2167     if ( !result->IsOk() )
2168     {
2169         delete result;
2170         return 0;
2171     }
2172
2173     return result;
2174 }
2175
2176 wxString wxMBConv_iconv::ms_wcCharsetName;
2177 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
2178
2179 wxMBConv_iconv::wxMBConv_iconv(const char *name)
2180               : m_name(wxStrdup(name))
2181 {
2182     m_minMBCharWidth = 0;
2183
2184     // check for charset that represents wchar_t:
2185     if ( ms_wcCharsetName.empty() )
2186     {
2187         wxLogTrace(TRACE_STRCONV, wxT("Looking for wide char codeset:"));
2188
2189 #if wxUSE_FONTMAP
2190         const wxChar *const *names = wxFontMapperBase::GetAllEncodingNames(WC_ENC);
2191 #else // !wxUSE_FONTMAP
2192         static const wxChar *const names_static[] =
2193         {
2194 #if SIZEOF_WCHAR_T == 4
2195             wxT("UCS-4"),
2196 #elif SIZEOF_WCHAR_T == 2
2197             wxT("UCS-2"),
2198 #endif
2199             NULL
2200         };
2201         const wxChar *const *names = names_static;
2202 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2203
2204         for ( ; *names && ms_wcCharsetName.empty(); ++names )
2205         {
2206             const wxString nameCS(*names);
2207
2208             // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
2209             wxString nameXE(nameCS);
2210
2211 #ifdef WORDS_BIGENDIAN
2212                 nameXE += wxT("BE");
2213 #else // little endian
2214                 nameXE += wxT("LE");
2215 #endif
2216
2217             wxLogTrace(TRACE_STRCONV, wxT("  trying charset \"%s\""),
2218                        nameXE.c_str());
2219
2220             m2w = iconv_open(nameXE.ToAscii(), name);
2221             if ( m2w == ICONV_T_INVALID )
2222             {
2223                 // try charset w/o bytesex info (e.g. "UCS4")
2224                 wxLogTrace(TRACE_STRCONV, wxT("  trying charset \"%s\""),
2225                            nameCS.c_str());
2226                 m2w = iconv_open(nameCS.ToAscii(), name);
2227
2228                 // and check for bytesex ourselves:
2229                 if ( m2w != ICONV_T_INVALID )
2230                 {
2231                     char    buf[2], *bufPtr;
2232                     wchar_t wbuf[2];
2233                     size_t  insz, outsz;
2234                     size_t  res;
2235
2236                     buf[0] = 'A';
2237                     buf[1] = 0;
2238                     wbuf[0] = 0;
2239                     insz = 2;
2240                     outsz = SIZEOF_WCHAR_T * 2;
2241                     char* wbufPtr = (char*)wbuf;
2242                     bufPtr = buf;
2243
2244                     res = iconv(
2245                         m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
2246                         &wbufPtr, &outsz);
2247
2248                     if (ICONV_FAILED(res, insz))
2249                     {
2250                         wxLogLastError(wxT("iconv"));
2251                         wxLogError(_("Conversion to charset '%s' doesn't work."),
2252                                    nameCS.c_str());
2253                     }
2254                     else // ok, can convert to this encoding, remember it
2255                     {
2256                         ms_wcCharsetName = nameCS;
2257                         ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
2258                     }
2259                 }
2260             }
2261             else // use charset not requiring byte swapping
2262             {
2263                 ms_wcCharsetName = nameXE;
2264             }
2265         }
2266
2267         wxLogTrace(TRACE_STRCONV,
2268                    wxT("iconv wchar_t charset is \"%s\"%s"),
2269                    ms_wcCharsetName.empty() ? wxString("<none>")
2270                                             : ms_wcCharsetName,
2271                    ms_wcNeedsSwap ? wxT(" (needs swap)")
2272                                   : wxT(""));
2273     }
2274     else // we already have ms_wcCharsetName
2275     {
2276         m2w = iconv_open(ms_wcCharsetName.ToAscii(), name);
2277     }
2278
2279     if ( ms_wcCharsetName.empty() )
2280     {
2281         w2m = ICONV_T_INVALID;
2282     }
2283     else
2284     {
2285         w2m = iconv_open(name, ms_wcCharsetName.ToAscii());
2286         if ( w2m == ICONV_T_INVALID )
2287         {
2288             wxLogTrace(TRACE_STRCONV,
2289                        wxT("\"%s\" -> \"%s\" works but not the converse!?"),
2290                        ms_wcCharsetName.c_str(), name);
2291         }
2292     }
2293 }
2294
2295 wxMBConv_iconv::~wxMBConv_iconv()
2296 {
2297     free(const_cast<char *>(m_name));
2298
2299     if ( m2w != ICONV_T_INVALID )
2300         iconv_close(m2w);
2301     if ( w2m != ICONV_T_INVALID )
2302         iconv_close(w2m);
2303 }
2304
2305 size_t
2306 wxMBConv_iconv::ToWChar(wchar_t *dst, size_t dstLen,
2307                         const char *src, size_t srcLen) const
2308 {
2309     if ( srcLen == wxNO_LEN )
2310     {
2311         // find the string length: notice that must be done differently for
2312         // NUL-terminated strings and UTF-16/32 which are terminated with 2/4
2313         // consecutive NULs
2314         const size_t nulLen = GetMBNulLen();
2315         switch ( nulLen )
2316         {
2317             default:
2318                 return wxCONV_FAILED;
2319
2320             case 1:
2321                 srcLen = strlen(src); // arguably more optimized than our version
2322                 break;
2323
2324             case 2:
2325             case 4:
2326                 // for UTF-16/32 not only we need to have 2/4 consecutive NULs
2327                 // but they also have to start at character boundary and not
2328                 // span two adjacent characters
2329                 const char *p;
2330                 for ( p = src; NotAllNULs(p, nulLen); p += nulLen )
2331                     ;
2332                 srcLen = p - src;
2333                 break;
2334         }
2335
2336         // when we're determining the length of the string ourselves we count
2337         // the terminating NUL(s) as part of it and always NUL-terminate the
2338         // output
2339         srcLen += nulLen;
2340     }
2341
2342     // we express length in the number of (wide) characters but iconv always
2343     // counts buffer sizes it in bytes
2344     dstLen *= SIZEOF_WCHAR_T;
2345
2346 #if wxUSE_THREADS
2347     // NB: iconv() is MT-safe, but each thread must use its own iconv_t handle.
2348     //     Unfortunately there are a couple of global wxCSConv objects such as
2349     //     wxConvLocal that are used all over wx code, so we have to make sure
2350     //     the handle is used by at most one thread at the time. Otherwise
2351     //     only a few wx classes would be safe to use from non-main threads
2352     //     as MB<->WC conversion would fail "randomly".
2353     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
2354 #endif // wxUSE_THREADS
2355
2356     size_t res, cres;
2357     const char *pszPtr = src;
2358
2359     if ( dst )
2360     {
2361         char* bufPtr = (char*)dst;
2362
2363         // have destination buffer, convert there
2364         size_t dstLenOrig = dstLen;
2365         cres = iconv(m2w,
2366                      ICONV_CHAR_CAST(&pszPtr), &srcLen,
2367                      &bufPtr, &dstLen);
2368
2369         // convert the number of bytes converted as returned by iconv to the
2370         // number of (wide) characters converted that we need
2371         res = (dstLenOrig - dstLen) / SIZEOF_WCHAR_T;
2372
2373         if (ms_wcNeedsSwap)
2374         {
2375             // convert to native endianness
2376             for ( unsigned i = 0; i < res; i++ )
2377                 dst[i] = WC_BSWAP(dst[i]);
2378         }
2379     }
2380     else // no destination buffer
2381     {
2382         // convert using temp buffer to calculate the size of the buffer needed
2383         wchar_t tbuf[256];
2384         res = 0;
2385
2386         do
2387         {
2388             char* bufPtr = (char*)tbuf;
2389             dstLen = 8 * SIZEOF_WCHAR_T;
2390
2391             cres = iconv(m2w,
2392                          ICONV_CHAR_CAST(&pszPtr), &srcLen,
2393                          &bufPtr, &dstLen );
2394
2395             res += 8 - (dstLen / SIZEOF_WCHAR_T);
2396         }
2397         while ((cres == (size_t)-1) && (errno == E2BIG));
2398     }
2399
2400     if (ICONV_FAILED(cres, srcLen))
2401     {
2402         //VS: it is ok if iconv fails, hence trace only
2403         wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
2404         return wxCONV_FAILED;
2405     }
2406
2407     return res;
2408 }
2409
2410 size_t wxMBConv_iconv::FromWChar(char *dst, size_t dstLen,
2411                                  const wchar_t *src, size_t srcLen) const
2412 {
2413 #if wxUSE_THREADS
2414     // NB: explained in MB2WC
2415     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
2416 #endif
2417
2418     if ( srcLen == wxNO_LEN )
2419         srcLen = wxWcslen(src) + 1;
2420
2421     size_t inbuflen = srcLen * SIZEOF_WCHAR_T;
2422     size_t outbuflen = dstLen;
2423     size_t res, cres;
2424
2425     wchar_t *tmpbuf = 0;
2426
2427     if (ms_wcNeedsSwap)
2428     {
2429         // need to copy to temp buffer to switch endianness
2430         // (doing WC_BSWAP twice on the original buffer won't work, as it
2431         //  could be in read-only memory, or be accessed in some other thread)
2432         tmpbuf = (wchar_t *)malloc(inbuflen);
2433         for ( size_t i = 0; i < srcLen; i++ )
2434             tmpbuf[i] = WC_BSWAP(src[i]);
2435
2436         src = tmpbuf;
2437     }
2438
2439     char* inbuf = (char*)src;
2440     if ( dst )
2441     {
2442         // have destination buffer, convert there
2443         cres = iconv(w2m, ICONV_CHAR_CAST(&inbuf), &inbuflen, &dst, &outbuflen);
2444
2445         res = dstLen - outbuflen;
2446     }
2447     else // no destination buffer
2448     {
2449         // convert using temp buffer to calculate the size of the buffer needed
2450         char tbuf[256];
2451         res = 0;
2452         do
2453         {
2454             dst = tbuf;
2455             outbuflen = WXSIZEOF(tbuf);
2456
2457             cres = iconv(w2m, ICONV_CHAR_CAST(&inbuf), &inbuflen, &dst, &outbuflen);
2458
2459             res += WXSIZEOF(tbuf) - outbuflen;
2460         }
2461         while ((cres == (size_t)-1) && (errno == E2BIG));
2462     }
2463
2464     if (ms_wcNeedsSwap)
2465     {
2466         free(tmpbuf);
2467     }
2468
2469     if (ICONV_FAILED(cres, inbuflen))
2470     {
2471         wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
2472         return wxCONV_FAILED;
2473     }
2474
2475     return res;
2476 }
2477
2478 size_t wxMBConv_iconv::GetMBNulLen() const
2479 {
2480     if ( m_minMBCharWidth == 0 )
2481     {
2482         wxMBConv_iconv * const self = wxConstCast(this, wxMBConv_iconv);
2483
2484 #if wxUSE_THREADS
2485         // NB: explained in MB2WC
2486         wxMutexLocker lock(self->m_iconvMutex);
2487 #endif
2488
2489         const wchar_t *wnul = L"";
2490         char buf[8]; // should be enough for NUL in any encoding
2491         size_t inLen = sizeof(wchar_t),
2492                outLen = WXSIZEOF(buf);
2493         char *inBuff = (char *)wnul;
2494         char *outBuff = buf;
2495         if ( iconv(w2m, ICONV_CHAR_CAST(&inBuff), &inLen, &outBuff, &outLen) == (size_t)-1 )
2496         {
2497             self->m_minMBCharWidth = (size_t)-1;
2498         }
2499         else // ok
2500         {
2501             self->m_minMBCharWidth = outBuff - buf;
2502         }
2503     }
2504
2505     return m_minMBCharWidth;
2506 }
2507
2508 #if wxUSE_UNICODE_UTF8
2509 bool wxMBConv_iconv::IsUTF8() const
2510 {
2511     return wxStricmp(m_name, "UTF-8") == 0 ||
2512            wxStricmp(m_name, "UTF8") == 0;
2513 }
2514 #endif
2515
2516 #endif // HAVE_ICONV
2517
2518
2519 // ============================================================================
2520 // Win32 conversion classes
2521 // ============================================================================
2522
2523 #ifdef wxHAVE_WIN32_MB2WC
2524
2525 // from utils.cpp
2526 #if wxUSE_FONTMAP
2527 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const char *charset);
2528 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
2529 #endif
2530
2531 class wxMBConv_win32 : public wxMBConv
2532 {
2533 public:
2534     wxMBConv_win32()
2535     {
2536         m_CodePage = CP_ACP;
2537         m_minMBCharWidth = 0;
2538     }
2539
2540     wxMBConv_win32(const wxMBConv_win32& conv)
2541         : wxMBConv()
2542     {
2543         m_CodePage = conv.m_CodePage;
2544         m_minMBCharWidth = conv.m_minMBCharWidth;
2545     }
2546
2547 #if wxUSE_FONTMAP
2548     wxMBConv_win32(const char* name)
2549     {
2550         m_CodePage = wxCharsetToCodepage(name);
2551         m_minMBCharWidth = 0;
2552     }
2553
2554     wxMBConv_win32(wxFontEncoding encoding)
2555     {
2556         m_CodePage = wxEncodingToCodepage(encoding);
2557         m_minMBCharWidth = 0;
2558     }
2559 #endif // wxUSE_FONTMAP
2560
2561     virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2562     {
2563         // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
2564         // the behaviour is not compatible with the Unix version (using iconv)
2565         // and break the library itself, e.g. wxTextInputStream::NextChar()
2566         // wouldn't work if reading an incomplete MB char didn't result in an
2567         // error
2568         //
2569         // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or
2570         // Win XP or newer and it is not supported for UTF-[78] so we always
2571         // use our own conversions in this case. See
2572         //     http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx
2573         //     http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp
2574         if ( m_CodePage == CP_UTF8 )
2575         {
2576             return wxMBConvUTF8().MB2WC(buf, psz, n);
2577         }
2578
2579         if ( m_CodePage == CP_UTF7 )
2580         {
2581             return wxMBConvUTF7().MB2WC(buf, psz, n);
2582         }
2583
2584         int flags = 0;
2585         if ( (m_CodePage < 50000 && m_CodePage != CP_SYMBOL) &&
2586                 IsAtLeastWin2kSP4() )
2587         {
2588             flags = MB_ERR_INVALID_CHARS;
2589         }
2590
2591         const size_t len = ::MultiByteToWideChar
2592                              (
2593                                 m_CodePage,     // code page
2594                                 flags,          // flags: fall on error
2595                                 psz,            // input string
2596                                 -1,             // its length (NUL-terminated)
2597                                 buf,            // output string
2598                                 buf ? n : 0     // size of output buffer
2599                              );
2600         if ( !len )
2601         {
2602             // function totally failed
2603             return wxCONV_FAILED;
2604         }
2605
2606         // if we were really converting and didn't use MB_ERR_INVALID_CHARS,
2607         // check if we succeeded, by doing a double trip:
2608         if ( !flags && buf )
2609         {
2610             const size_t mbLen = strlen(psz);
2611             wxCharBuffer mbBuf(mbLen);
2612             if ( ::WideCharToMultiByte
2613                    (
2614                       m_CodePage,
2615                       0,
2616                       buf,
2617                       -1,
2618                       mbBuf.data(),
2619                       mbLen + 1,        // size in bytes, not length
2620                       NULL,
2621                       NULL
2622                    ) == 0 ||
2623                   strcmp(mbBuf, psz) != 0 )
2624             {
2625                 // we didn't obtain the same thing we started from, hence
2626                 // the conversion was lossy and we consider that it failed
2627                 return wxCONV_FAILED;
2628             }
2629         }
2630
2631         // note that it returns count of written chars for buf != NULL and size
2632         // of the needed buffer for buf == NULL so in either case the length of
2633         // the string (which never includes the terminating NUL) is one less
2634         return len - 1;
2635     }
2636
2637     virtual size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
2638     {
2639         /*
2640             we have a problem here: by default, WideCharToMultiByte() may
2641             replace characters unrepresentable in the target code page with bad
2642             quality approximations such as turning "1/2" symbol (U+00BD) into
2643             "1" for the code pages which don't have it and we, obviously, want
2644             to avoid this at any price
2645
2646             the trouble is that this function does it _silently_, i.e. it won't
2647             even tell us whether it did or not... Win98/2000 and higher provide
2648             WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
2649             we have to resort to a round trip, i.e. check that converting back
2650             results in the same string -- this is, of course, expensive but
2651             otherwise we simply can't be sure to not garble the data.
2652          */
2653
2654         // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
2655         // it doesn't work with CJK encodings (which we test for rather roughly
2656         // here...) nor with UTF-7/8 nor, of course, with Windows versions not
2657         // supporting it
2658         BOOL usedDef wxDUMMY_INITIALIZE(false);
2659         BOOL *pUsedDef;
2660         int flags;
2661         if ( CanUseNoBestFit() && m_CodePage < 50000 )
2662         {
2663             // it's our lucky day
2664             flags = WC_NO_BEST_FIT_CHARS;
2665             pUsedDef = &usedDef;
2666         }
2667         else // old system or unsupported encoding
2668         {
2669             flags = 0;
2670             pUsedDef = NULL;
2671         }
2672
2673         const size_t len = ::WideCharToMultiByte
2674                              (
2675                                 m_CodePage,     // code page
2676                                 flags,          // either none or no best fit
2677                                 pwz,            // input string
2678                                 -1,             // it is (wide) NUL-terminated
2679                                 buf,            // output buffer
2680                                 buf ? n : 0,    // and its size
2681                                 NULL,           // default "replacement" char
2682                                 pUsedDef        // [out] was it used?
2683                              );
2684
2685         if ( !len )
2686         {
2687             // function totally failed
2688             return wxCONV_FAILED;
2689         }
2690
2691         // we did something, check if we really succeeded
2692         if ( flags )
2693         {
2694             // check if the conversion failed, i.e. if any replacements
2695             // were done
2696             if ( usedDef )
2697                 return wxCONV_FAILED;
2698         }
2699         else // we must resort to double tripping...
2700         {
2701             // first we need to ensure that we really have the MB data: this is
2702             // not the case if we're called with NULL buffer, in which case we
2703             // need to do the conversion yet again
2704             wxCharBuffer bufDef;
2705             if ( !buf )
2706             {
2707                 bufDef = wxCharBuffer(len);
2708                 buf = bufDef.data();
2709                 if ( !::WideCharToMultiByte(m_CodePage, flags, pwz, -1,
2710                                             buf, len, NULL, NULL) )
2711                     return wxCONV_FAILED;
2712             }
2713
2714             if ( !n )
2715                 n = wcslen(pwz);
2716             wxWCharBuffer wcBuf(n);
2717             if ( MB2WC(wcBuf.data(), buf, n + 1) == wxCONV_FAILED ||
2718                     wcscmp(wcBuf, pwz) != 0 )
2719             {
2720                 // we didn't obtain the same thing we started from, hence
2721                 // the conversion was lossy and we consider that it failed
2722                 return wxCONV_FAILED;
2723             }
2724         }
2725
2726         // see the comment above for the reason of "len - 1"
2727         return len - 1;
2728     }
2729
2730     virtual size_t GetMBNulLen() const
2731     {
2732         if ( m_minMBCharWidth == 0 )
2733         {
2734             int len = ::WideCharToMultiByte
2735                         (
2736                             m_CodePage,     // code page
2737                             0,              // no flags
2738                             L"",            // input string
2739                             1,              // translate just the NUL
2740                             NULL,           // output buffer
2741                             0,              // and its size
2742                             NULL,           // no replacement char
2743                             NULL            // [out] don't care if it was used
2744                         );
2745
2746             wxMBConv_win32 * const self = wxConstCast(this, wxMBConv_win32);
2747             switch ( len )
2748             {
2749                 default:
2750                     wxLogDebug(wxT("Unexpected NUL length %d"), len);
2751                     self->m_minMBCharWidth = (size_t)-1;
2752                     break;
2753
2754                 case 0:
2755                     self->m_minMBCharWidth = (size_t)-1;
2756                     break;
2757
2758                 case 1:
2759                 case 2:
2760                 case 4:
2761                     self->m_minMBCharWidth = len;
2762                     break;
2763             }
2764         }
2765
2766         return m_minMBCharWidth;
2767     }
2768
2769     virtual wxMBConv *Clone() const { return new wxMBConv_win32(*this); }
2770
2771     bool IsOk() const { return m_CodePage != -1; }
2772
2773 private:
2774     static bool CanUseNoBestFit()
2775     {
2776         static int s_isWin98Or2k = -1;
2777
2778         if ( s_isWin98Or2k == -1 )
2779         {
2780             int verMaj, verMin;
2781             switch ( wxGetOsVersion(&verMaj, &verMin) )
2782             {
2783                 case wxOS_WINDOWS_9X:
2784                     s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
2785                     break;
2786
2787                 case wxOS_WINDOWS_NT:
2788                     s_isWin98Or2k = verMaj >= 5;
2789                     break;
2790
2791                 default:
2792                     // unknown: be conservative by default
2793                     s_isWin98Or2k = 0;
2794                     break;
2795             }
2796
2797             wxASSERT_MSG( s_isWin98Or2k != -1, wxT("should be set above") );
2798         }
2799
2800         return s_isWin98Or2k == 1;
2801     }
2802
2803     static bool IsAtLeastWin2kSP4()
2804     {
2805 #ifdef __WXWINCE__
2806         return false;
2807 #else
2808         static int s_isAtLeastWin2kSP4 = -1;
2809
2810         if ( s_isAtLeastWin2kSP4 == -1 )
2811         {
2812             OSVERSIONINFOEX ver;
2813
2814             memset(&ver, 0, sizeof(ver));
2815             ver.dwOSVersionInfoSize = sizeof(ver);
2816             GetVersionEx((OSVERSIONINFO*)&ver);
2817
2818             s_isAtLeastWin2kSP4 =
2819               ((ver.dwMajorVersion > 5) || // Vista+
2820                (ver.dwMajorVersion == 5 && ver.dwMinorVersion > 0) || // XP/2003
2821                (ver.dwMajorVersion == 5 && ver.dwMinorVersion == 0 &&
2822                ver.wServicePackMajor >= 4)) // 2000 SP4+
2823               ? 1 : 0;
2824         }
2825
2826         return s_isAtLeastWin2kSP4 == 1;
2827 #endif
2828     }
2829
2830
2831     // the code page we're working with
2832     long m_CodePage;
2833
2834     // cached result of GetMBNulLen(), set to 0 initially meaning
2835     // "unknown"
2836     size_t m_minMBCharWidth;
2837 };
2838
2839 #endif // wxHAVE_WIN32_MB2WC
2840
2841
2842 // ============================================================================
2843 // wxEncodingConverter based conversion classes
2844 // ============================================================================
2845
2846 #if wxUSE_FONTMAP
2847
2848 class wxMBConv_wxwin : public wxMBConv
2849 {
2850 private:
2851     void Init()
2852     {
2853         // Refuse to use broken wxEncodingConverter code for Mac-specific encodings.
2854         // The wxMBConv_cf class does a better job.
2855         m_ok = (m_enc < wxFONTENCODING_MACMIN || m_enc > wxFONTENCODING_MACMAX) &&
2856                m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2857                w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2858     }
2859
2860 public:
2861     // temporarily just use wxEncodingConverter stuff,
2862     // so that it works while a better implementation is built
2863     wxMBConv_wxwin(const char* name)
2864     {
2865         if (name)
2866             m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2867         else
2868             m_enc = wxFONTENCODING_SYSTEM;
2869
2870         Init();
2871     }
2872
2873     wxMBConv_wxwin(wxFontEncoding enc)
2874     {
2875         m_enc = enc;
2876
2877         Init();
2878     }
2879
2880     size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
2881     {
2882         size_t inbuf = strlen(psz);
2883         if (buf)
2884         {
2885             if (!m2w.Convert(psz, buf))
2886                 return wxCONV_FAILED;
2887         }
2888         return inbuf;
2889     }
2890
2891     size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
2892     {
2893         const size_t inbuf = wxWcslen(psz);
2894         if (buf)
2895         {
2896             if (!w2m.Convert(psz, buf))
2897                 return wxCONV_FAILED;
2898         }
2899
2900         return inbuf;
2901     }
2902
2903     virtual size_t GetMBNulLen() const
2904     {
2905         switch ( m_enc )
2906         {
2907             case wxFONTENCODING_UTF16BE:
2908             case wxFONTENCODING_UTF16LE:
2909                 return 2;
2910
2911             case wxFONTENCODING_UTF32BE:
2912             case wxFONTENCODING_UTF32LE:
2913                 return 4;
2914
2915             default:
2916                 return 1;
2917         }
2918     }
2919
2920     virtual wxMBConv *Clone() const { return new wxMBConv_wxwin(m_enc); }
2921
2922     bool IsOk() const { return m_ok; }
2923
2924 public:
2925     wxFontEncoding m_enc;
2926     wxEncodingConverter m2w, w2m;
2927
2928 private:
2929     // were we initialized successfully?
2930     bool m_ok;
2931
2932     wxDECLARE_NO_COPY_CLASS(wxMBConv_wxwin);
2933 };
2934
2935 // make the constructors available for unit testing
2936 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_wxwin( const char* name )
2937 {
2938     wxMBConv_wxwin* result = new wxMBConv_wxwin( name );
2939     if ( !result->IsOk() )
2940     {
2941         delete result;
2942         return 0;
2943     }
2944
2945     return result;
2946 }
2947
2948 #endif // wxUSE_FONTMAP
2949
2950 // ============================================================================
2951 // wxCSConv implementation
2952 // ============================================================================
2953
2954 void wxCSConv::Init()
2955 {
2956     m_name = NULL;
2957     m_convReal =  NULL;
2958 }
2959
2960 void wxCSConv::SetEncoding(wxFontEncoding encoding)
2961 {
2962     switch ( encoding )
2963     {
2964         case wxFONTENCODING_MAX:
2965         case wxFONTENCODING_SYSTEM:
2966             if ( m_name )
2967             {
2968                 // It's ok to not have encoding value if we have a name for it.
2969                 m_encoding = wxFONTENCODING_SYSTEM;
2970             }
2971             else // No name neither.
2972             {
2973                 // Fall back to the system default encoding in this case (not
2974                 // sure how much sense does this make but this is how the old
2975                 // code used to behave).
2976 #if wxUSE_INTL
2977                 m_encoding = wxLocale::GetSystemEncoding();
2978                 if ( m_encoding == wxFONTENCODING_SYSTEM )
2979 #endif // wxUSE_INTL
2980                     m_encoding = wxFONTENCODING_ISO8859_1;
2981             }
2982             break;
2983
2984         case wxFONTENCODING_DEFAULT:
2985             // wxFONTENCODING_DEFAULT is same as US-ASCII in this context
2986             m_encoding = wxFONTENCODING_ISO8859_1;
2987             break;
2988
2989         default:
2990             // Just use the provided encoding.
2991             m_encoding = encoding;
2992     }
2993 }
2994
2995 wxCSConv::wxCSConv(const wxString& charset)
2996 {
2997     Init();
2998
2999     if ( !charset.empty() )
3000     {
3001         SetName(charset.ToAscii());
3002     }
3003
3004 #if wxUSE_FONTMAP
3005     SetEncoding(wxFontMapperBase::GetEncodingFromName(charset));
3006 #else
3007     SetEncoding(wxFONTENCODING_SYSTEM);
3008 #endif
3009
3010     m_convReal = DoCreate();
3011 }
3012
3013 wxCSConv::wxCSConv(wxFontEncoding encoding)
3014 {
3015     if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
3016     {
3017         wxFAIL_MSG( wxT("invalid encoding value in wxCSConv ctor") );
3018
3019         encoding = wxFONTENCODING_SYSTEM;
3020     }
3021
3022     Init();
3023
3024     SetEncoding(encoding);
3025
3026     m_convReal = DoCreate();
3027 }
3028
3029 wxCSConv::~wxCSConv()
3030 {
3031     Clear();
3032 }
3033
3034 wxCSConv::wxCSConv(const wxCSConv& conv)
3035         : wxMBConv()
3036 {
3037     Init();
3038
3039     SetName(conv.m_name);
3040     SetEncoding(conv.m_encoding);
3041
3042     m_convReal = DoCreate();
3043 }
3044
3045 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
3046 {
3047     Clear();
3048
3049     SetName(conv.m_name);
3050     SetEncoding(conv.m_encoding);
3051
3052     m_convReal = DoCreate();
3053
3054     return *this;
3055 }
3056
3057 void wxCSConv::Clear()
3058 {
3059     free(m_name);
3060     m_name = NULL;
3061
3062     wxDELETE(m_convReal);
3063 }
3064
3065 void wxCSConv::SetName(const char *charset)
3066 {
3067     if ( charset )
3068         m_name = wxStrdup(charset);
3069 }
3070
3071 #if wxUSE_FONTMAP
3072
3073 WX_DECLARE_HASH_MAP( wxFontEncoding, wxString, wxIntegerHash, wxIntegerEqual,
3074                      wxEncodingNameCache );
3075
3076 static wxEncodingNameCache gs_nameCache;
3077 #endif
3078
3079 wxMBConv *wxCSConv::DoCreate() const
3080 {
3081 #if wxUSE_FONTMAP
3082     wxLogTrace(TRACE_STRCONV,
3083                wxT("creating conversion for %s"),
3084                (m_name ? m_name
3085                        : (const char*)wxFontMapperBase::GetEncodingName(m_encoding).mb_str()));
3086 #endif // wxUSE_FONTMAP
3087
3088     // check for the special case of ASCII or ISO8859-1 charset: as we have
3089     // special knowledge of it anyhow, we don't need to create a special
3090     // conversion object
3091     if ( m_encoding == wxFONTENCODING_ISO8859_1 )
3092     {
3093         // don't convert at all
3094         return NULL;
3095     }
3096
3097     // we trust OS to do conversion better than we can so try external
3098     // conversion methods first
3099     //
3100     // the full order is:
3101     //      1. OS conversion (iconv() under Unix or Win32 API)
3102     //      2. hard coded conversions for UTF
3103     //      3. wxEncodingConverter as fall back
3104
3105     // step (1)
3106 #ifdef HAVE_ICONV
3107 #if !wxUSE_FONTMAP
3108     if ( m_name )
3109 #endif // !wxUSE_FONTMAP
3110     {
3111 #if wxUSE_FONTMAP
3112         wxFontEncoding encoding(m_encoding);
3113 #endif
3114
3115         if ( m_name )
3116         {
3117             wxMBConv_iconv *conv = new wxMBConv_iconv(m_name);
3118             if ( conv->IsOk() )
3119                 return conv;
3120
3121             delete conv;
3122
3123 #if wxUSE_FONTMAP
3124             encoding =
3125                 wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
3126 #endif // wxUSE_FONTMAP
3127         }
3128 #if wxUSE_FONTMAP
3129         {
3130             const wxEncodingNameCache::iterator it = gs_nameCache.find(encoding);
3131             if ( it != gs_nameCache.end() )
3132             {
3133                 if ( it->second.empty() )
3134                     return NULL;
3135
3136                 wxMBConv_iconv *conv = new wxMBConv_iconv(it->second.ToAscii());
3137                 if ( conv->IsOk() )
3138                     return conv;
3139
3140                 delete conv;
3141             }
3142
3143             const wxChar* const* names = wxFontMapperBase::GetAllEncodingNames(encoding);
3144             // CS : in case this does not return valid names (eg for MacRoman)
3145             // encoding got a 'failure' entry in the cache all the same,
3146             // although it just has to be created using a different method, so
3147             // only store failed iconv creation attempts (or perhaps we
3148             // shoulnd't do this at all ?)
3149             if ( names[0] != NULL )
3150             {
3151                 for ( ; *names; ++names )
3152                 {
3153                     // FIXME-UTF8: wxFontMapperBase::GetAllEncodingNames()
3154                     //             will need changes that will obsolete this
3155                     wxString name(*names);
3156                     wxMBConv_iconv *conv = new wxMBConv_iconv(name.ToAscii());
3157                     if ( conv->IsOk() )
3158                     {
3159                         gs_nameCache[encoding] = *names;
3160                         return conv;
3161                     }
3162
3163                     delete conv;
3164                 }
3165
3166                 gs_nameCache[encoding] = wxT(""); // cache the failure
3167             }
3168         }
3169 #endif // wxUSE_FONTMAP
3170     }
3171 #endif // HAVE_ICONV
3172
3173 #ifdef wxHAVE_WIN32_MB2WC
3174     {
3175 #if wxUSE_FONTMAP
3176         wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
3177                                       : new wxMBConv_win32(m_encoding);
3178         if ( conv->IsOk() )
3179             return conv;
3180
3181         delete conv;
3182 #else
3183         return NULL;
3184 #endif
3185     }
3186 #endif // wxHAVE_WIN32_MB2WC
3187
3188 #ifdef __DARWIN__
3189     {
3190         // leave UTF16 and UTF32 to the built-ins of wx
3191         if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
3192             ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
3193         {
3194 #if wxUSE_FONTMAP
3195             wxMBConv_cf *conv = m_name ? new wxMBConv_cf(m_name)
3196                                           : new wxMBConv_cf(m_encoding);
3197 #else
3198             wxMBConv_cf *conv = new wxMBConv_cf(m_encoding);
3199 #endif
3200
3201             if ( conv->IsOk() )
3202                  return conv;
3203
3204             delete conv;
3205         }
3206     }
3207 #endif // __DARWIN__
3208
3209     // step (2)
3210     wxFontEncoding enc = m_encoding;
3211 #if wxUSE_FONTMAP
3212     if ( enc == wxFONTENCODING_SYSTEM && m_name )
3213     {
3214         // use "false" to suppress interactive dialogs -- we can be called from
3215         // anywhere and popping up a dialog from here is the last thing we want to
3216         // do
3217         enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
3218     }
3219 #endif // wxUSE_FONTMAP
3220
3221     switch ( enc )
3222     {
3223         case wxFONTENCODING_UTF7:
3224              return new wxMBConvUTF7;
3225
3226         case wxFONTENCODING_UTF8:
3227              return new wxMBConvUTF8;
3228
3229         case wxFONTENCODING_UTF16BE:
3230              return new wxMBConvUTF16BE;
3231
3232         case wxFONTENCODING_UTF16LE:
3233              return new wxMBConvUTF16LE;
3234
3235         case wxFONTENCODING_UTF32BE:
3236              return new wxMBConvUTF32BE;
3237
3238         case wxFONTENCODING_UTF32LE:
3239              return new wxMBConvUTF32LE;
3240
3241         default:
3242              // nothing to do but put here to suppress gcc warnings
3243              break;
3244     }
3245
3246     // step (3)
3247 #if wxUSE_FONTMAP
3248     {
3249         wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
3250                                       : new wxMBConv_wxwin(m_encoding);
3251         if ( conv->IsOk() )
3252             return conv;
3253
3254         delete conv;
3255     }
3256
3257     wxLogTrace(TRACE_STRCONV,
3258                wxT("encoding \"%s\" is not supported by this system"),
3259                (m_name ? wxString(m_name)
3260                        : wxFontMapperBase::GetEncodingName(m_encoding)));
3261 #endif // wxUSE_FONTMAP
3262
3263     return NULL;
3264 }
3265
3266 bool wxCSConv::IsOk() const
3267 {
3268     // special case: no convReal created for wxFONTENCODING_ISO8859_1
3269     if ( m_encoding == wxFONTENCODING_ISO8859_1 )
3270         return true; // always ok as we do it ourselves
3271
3272     // m_convReal->IsOk() is called at its own creation, so we know it must
3273     // be ok if m_convReal is non-NULL
3274     return m_convReal != NULL;
3275 }
3276
3277 size_t wxCSConv::ToWChar(wchar_t *dst, size_t dstLen,
3278                          const char *src, size_t srcLen) const
3279 {
3280     if (m_convReal)
3281         return m_convReal->ToWChar(dst, dstLen, src, srcLen);
3282
3283     // latin-1 (direct)
3284     if ( srcLen == wxNO_LEN )
3285         srcLen = strlen(src) + 1; // take trailing NUL too
3286
3287     if ( dst )
3288     {
3289         if ( dstLen < srcLen )
3290             return wxCONV_FAILED;
3291
3292         for ( size_t n = 0; n < srcLen; n++ )
3293             dst[n] = (unsigned char)(src[n]);
3294     }
3295
3296     return srcLen;
3297 }
3298
3299 size_t wxCSConv::FromWChar(char *dst, size_t dstLen,
3300                            const wchar_t *src, size_t srcLen) const
3301 {
3302     if (m_convReal)
3303         return m_convReal->FromWChar(dst, dstLen, src, srcLen);
3304
3305     // latin-1 (direct)
3306     if ( srcLen == wxNO_LEN )
3307         srcLen = wxWcslen(src) + 1;
3308
3309     if ( dst )
3310     {
3311         if ( dstLen < srcLen )
3312             return wxCONV_FAILED;
3313
3314         for ( size_t n = 0; n < srcLen; n++ )
3315         {
3316             if ( src[n] > 0xFF )
3317                 return wxCONV_FAILED;
3318
3319             dst[n] = (char)src[n];
3320         }
3321
3322     }
3323     else // still need to check the input validity
3324     {
3325         for ( size_t n = 0; n < srcLen; n++ )
3326         {
3327             if ( src[n] > 0xFF )
3328                 return wxCONV_FAILED;
3329         }
3330     }
3331
3332     return srcLen;
3333 }
3334
3335 size_t wxCSConv::GetMBNulLen() const
3336 {
3337     if ( m_convReal )
3338         return m_convReal->GetMBNulLen();
3339
3340     // otherwise, we are ISO-8859-1
3341     return 1;
3342 }
3343
3344 #if wxUSE_UNICODE_UTF8
3345 bool wxCSConv::IsUTF8() const
3346 {
3347     if ( m_convReal )
3348         return m_convReal->IsUTF8();
3349
3350     // otherwise, we are ISO-8859-1
3351     return false;
3352 }
3353 #endif
3354
3355
3356 #if wxUSE_UNICODE
3357
3358 wxWCharBuffer wxSafeConvertMB2WX(const char *s)
3359 {
3360     if ( !s )
3361         return wxWCharBuffer();
3362
3363     wxWCharBuffer wbuf(wxConvLibc.cMB2WX(s));
3364     if ( !wbuf )
3365         wbuf = wxMBConvUTF8().cMB2WX(s);
3366     if ( !wbuf )
3367         wbuf = wxConvISO8859_1.cMB2WX(s);
3368
3369     return wbuf;
3370 }
3371
3372 wxCharBuffer wxSafeConvertWX2MB(const wchar_t *ws)
3373 {
3374     if ( !ws )
3375         return wxCharBuffer();
3376
3377     wxCharBuffer buf(wxConvLibc.cWX2MB(ws));
3378     if ( !buf )
3379         buf = wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL).cWX2MB(ws);
3380
3381     return buf;
3382 }
3383
3384 #endif // wxUSE_UNICODE
3385
3386 // ----------------------------------------------------------------------------
3387 // globals
3388 // ----------------------------------------------------------------------------
3389
3390 // NB: The reason why we create converted objects in this convoluted way,
3391 //     using a factory function instead of global variable, is that they
3392 //     may be used at static initialization time (some of them are used by
3393 //     wxString ctors and there may be a global wxString object). In other
3394 //     words, possibly _before_ the converter global object would be
3395 //     initialized.
3396
3397 #undef wxConvLibc
3398 #undef wxConvUTF8
3399 #undef wxConvUTF7
3400 #undef wxConvLocal
3401 #undef wxConvISO8859_1
3402
3403 #define WX_DEFINE_GLOBAL_CONV2(klass, impl_klass, name, ctor_args)      \
3404     WXDLLIMPEXP_DATA_BASE(klass*) name##Ptr = NULL;                     \
3405     WXDLLIMPEXP_BASE klass* wxGet_##name##Ptr()                         \
3406     {                                                                   \
3407         static impl_klass name##Obj ctor_args;                          \
3408         return &name##Obj;                                              \
3409     }                                                                   \
3410     /* this ensures that all global converter objects are created */    \
3411     /* by the time static initialization is done, i.e. before any */    \
3412     /* thread is launched: */                                           \
3413     static klass* gs_##name##instance = wxGet_##name##Ptr()
3414
3415 #define WX_DEFINE_GLOBAL_CONV(klass, name, ctor_args) \
3416     WX_DEFINE_GLOBAL_CONV2(klass, klass, name, ctor_args)
3417
3418 #ifdef __INTELC__
3419     // disable warning "variable 'xxx' was declared but never referenced"
3420     #pragma warning(disable: 177)
3421 #endif // Intel C++
3422
3423 #ifdef __WINDOWS__
3424     WX_DEFINE_GLOBAL_CONV2(wxMBConv, wxMBConv_win32, wxConvLibc, wxEMPTY_PARAMETER_VALUE);
3425 #elif 0 // defined(__WXOSX__)
3426     WX_DEFINE_GLOBAL_CONV2(wxMBConv, wxMBConv_cf, wxConvLibc,  (wxFONTENCODING_UTF8));
3427 #else
3428     WX_DEFINE_GLOBAL_CONV2(wxMBConv, wxMBConvLibc, wxConvLibc, wxEMPTY_PARAMETER_VALUE);
3429 #endif
3430
3431 // NB: we can't use wxEMPTY_PARAMETER_VALUE as final argument here because it's
3432 //     passed to WX_DEFINE_GLOBAL_CONV2 after a macro expansion and so still
3433 //     provokes an error message about "not enough macro parameters"; and we
3434 //     can't use "()" here as the name##Obj declaration would be parsed as a
3435 //     function declaration then, so use a semicolon and live with an extra
3436 //     empty statement (and hope that no compilers warns about this)
3437 WX_DEFINE_GLOBAL_CONV(wxMBConvStrictUTF8, wxConvUTF8, ;);
3438 WX_DEFINE_GLOBAL_CONV(wxMBConvUTF7, wxConvUTF7, ;);
3439
3440 WX_DEFINE_GLOBAL_CONV(wxCSConv, wxConvLocal, (wxFONTENCODING_SYSTEM));
3441 WX_DEFINE_GLOBAL_CONV(wxCSConv, wxConvISO8859_1, (wxFONTENCODING_ISO8859_1));
3442
3443 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = wxGet_wxConvLibcPtr();
3444 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvUI = wxGet_wxConvLocalPtr();
3445
3446 #ifdef __DARWIN__
3447 // It is important to use this conversion object under Darwin as it ensures
3448 // that Unicode strings are (re)composed correctly even though xnu kernel uses
3449 // decomposed form internally (at least for the file names).
3450 static wxMBConv_cf wxConvMacUTF8DObj(wxFONTENCODING_UTF8);
3451 #endif
3452
3453 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName =
3454 #ifdef __DARWIN__
3455                                     &wxConvMacUTF8DObj;
3456 #else // !__DARWIN__
3457                                     wxGet_wxConvLibcPtr();
3458 #endif // __DARWIN__/!__DARWIN__