src/common/strconv.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/common/strconv.cpp
   3 // Purpose:     Unicode conversion classes
   4 // Author:      Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
   5 //              Ryan Norton, Fredrik Roubert (UTF7)
   6 // Modified by:
   7 // Created:     29/01/98
   8 // Copyright:   (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
   9 //              (c) 2000-2003 Vadim Zeitlin
  10 //              (c) 2004 Ryan Norton, Fredrik Roubert
  11 // Licence:     wxWindows licence
  12 /////////////////////////////////////////////////////////////////////////////
  13
  14 // For compilers that support precompilation, includes "wx.h".
  15 #include "wx/wxprec.h"
  16
  17 #ifdef __BORLANDC__
  18     #pragma hdrstop
  19 #endif  //__BORLANDC__
  20
  21 #ifndef WX_PRECOMP
  22     #include "wx/intl.h"
  23     #include "wx/log.h"
  24     #include "wx/utils.h"
  25     #include "wx/hashmap.h"
  26 #endif
  27
  28 #include "wx/strconv.h"
  29
  30 #ifndef __WXWINCE__
  31 #include <errno.h>
  32 #endif
  33
  34 #include <ctype.h>
  35 #include <string.h>
  36 #include <stdlib.h>
  37
  38 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
  39     #include "wx/msw/private.h"
  40     #include "wx/msw/missing.h"
  41     #define wxHAVE_WIN32_MB2WC
  42 #endif
  43
  44 #ifdef HAVE_ICONV
  45     #include <iconv.h>
  46     #include "wx/thread.h"
  47 #endif
  48
  49 #include "wx/encconv.h"
  50 #include "wx/fontmap.h"
  51
  52 #ifdef __DARWIN__
  53 #include "wx/osx/core/private/strconv_cf.h"
  54 #endif //def __DARWIN__
  55
  56
  57 #define TRACE_STRCONV wxT("strconv")
  58
  59 // WC_UTF16 is defined only if sizeof(wchar_t) == 2, otherwise it's supposed to
  60 // be 4 bytes
  61 #if SIZEOF_WCHAR_T == 2
  62     #define WC_UTF16
  63 #endif
  64
  65
  66 // ============================================================================
  67 // implementation
  68 // ============================================================================
  69
  70 // helper function of cMB2WC(): check if n bytes at this location are all NUL
  71 static bool NotAllNULs(const char *p, size_t n)
  72 {
  73     while ( n && *p++ == '\0' )
  74         n--;
  75
  76     return n != 0;
  77 }
  78
  79 // ----------------------------------------------------------------------------
  80 // UTF-16 en/decoding to/from UCS-4 with surrogates handling
  81 // ----------------------------------------------------------------------------
  82
  83 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
  84 {
  85     if (input <= 0xffff)
  86     {
  87         if (output)
  88             *output = (wxUint16) input;
  89
  90         return 1;
  91     }
  92     else if (input >= 0x110000)
  93     {
  94         return wxCONV_FAILED;
  95     }
  96     else
  97     {
  98         if (output)
  99         {
 100             *output++ = (wxUint16) ((input >> 10) + 0xd7c0);
 101             *output = (wxUint16) ((input & 0x3ff) + 0xdc00);
 102         }
 103
 104         return 2;
 105     }
 106 }
 107
 108 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
 109 {
 110     if ((*input < 0xd800) || (*input > 0xdfff))
 111     {
 112         output = *input;
 113         return 1;
 114     }
 115     else if ((input[1] < 0xdc00) || (input[1] > 0xdfff))
 116     {
 117         output = *input;
 118         return wxCONV_FAILED;
 119     }
 120     else
 121     {
 122         output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
 123         return 2;
 124     }
 125 }
 126
 127 #ifdef WC_UTF16
 128     typedef wchar_t wxDecodeSurrogate_t;
 129 #else // !WC_UTF16
 130     typedef wxUint16 wxDecodeSurrogate_t;
 131 #endif // WC_UTF16/!WC_UTF16
 132
 133 // returns the next UTF-32 character from the wchar_t buffer and advances the
 134 // pointer to the character after this one
 135 //
 136 // if an invalid character is found, *pSrc is set to NULL, the caller must
 137 // check for this
 138 static wxUint32 wxDecodeSurrogate(const wxDecodeSurrogate_t **pSrc)
 139 {
 140     wxUint32 out;
 141     const size_t
 142         n = decode_utf16(reinterpret_cast<const wxUint16 *>(*pSrc), out);
 143     if ( n == wxCONV_FAILED )
 144         *pSrc = NULL;
 145     else
 146         *pSrc += n;
 147
 148     return out;
 149 }
 150
 151 // ----------------------------------------------------------------------------
 152 // wxMBConv
 153 // ----------------------------------------------------------------------------
 154
 155 size_t
 156 wxMBConv::ToWChar(wchar_t *dst, size_t dstLen,
 157                   const char *src, size_t srcLen) const
 158 {
 159     // although new conversion classes are supposed to implement this function
 160     // directly, the existing ones only implement the old MB2WC() and so, to
 161     // avoid to have to rewrite all conversion classes at once, we provide a
 162     // default (but not efficient) implementation of this one in terms of the
 163     // old function by copying the input to ensure that it's NUL-terminated and
 164     // then using MB2WC() to convert it
 165     //
 166     // moreover, some conversion classes simply can't implement ToWChar()
 167     // directly, the primary example is wxConvLibc: mbstowcs() only handles
 168     // NUL-terminated strings
 169
 170     // the number of chars [which would be] written to dst [if it were not NULL]
 171     size_t dstWritten = 0;
 172
 173     // the number of NULs terminating this string
 174     size_t nulLen = 0;  // not really needed, but just to avoid warnings
 175
 176     // if we were not given the input size we just have to assume that the
 177     // string is properly terminated as we have no way of knowing how long it
 178     // is anyhow, but if we do have the size check whether there are enough
 179     // NULs at the end
 180     wxCharBuffer bufTmp;
 181     const char *srcEnd;
 182     if ( srcLen != wxNO_LEN )
 183     {
 184         // we need to know how to find the end of this string
 185         nulLen = GetMBNulLen();
 186         if ( nulLen == wxCONV_FAILED )
 187             return wxCONV_FAILED;
 188
 189         // if there are enough NULs we can avoid the copy
 190         if ( srcLen < nulLen || NotAllNULs(src + srcLen - nulLen, nulLen) )
 191         {
 192             // make a copy in order to properly NUL-terminate the string
 193             bufTmp = wxCharBuffer(srcLen + nulLen - 1 /* 1 will be added */);
 194             char * const p = bufTmp.data();
 195             memcpy(p, src, srcLen);
 196             for ( char *s = p + srcLen; s < p + srcLen + nulLen; s++ )
 197                 *s = '\0';
 198
 199             src = bufTmp;
 200         }
 201
 202         srcEnd = src + srcLen;
 203     }
 204     else // quit after the first loop iteration
 205     {
 206         srcEnd = NULL;
 207     }
 208
 209     // the idea of this code is straightforward: it converts a NUL-terminated
 210     // chunk of the string during each iteration and updates the output buffer
 211     // with the result
 212     //
 213     // all the complication come from the fact that this function, for
 214     // historical reasons, must behave in 2 subtly different ways when it's
 215     // called with a fixed number of characters and when it's called for the
 216     // entire NUL-terminated string: in the former case (srcEnd != NULL) we
 217     // must count all characters we convert, NUL or not; but in the latter we
 218     // do not count the trailing NUL -- but still count all the NULs inside the
 219     // string
 220     //
 221     // so for the (simple) former case we just always count the trailing NUL,
 222     // but for the latter we need to wait until we see if there is going to be
 223     // another loop iteration and only count it then
 224     for ( ;; )
 225     {
 226         // try to convert the current chunk
 227         size_t lenChunk = MB2WC(NULL, src, 0);
 228         if ( lenChunk == wxCONV_FAILED )
 229             return wxCONV_FAILED;
 230
 231         dstWritten += lenChunk;
 232         if ( !srcEnd )
 233             dstWritten++;
 234
 235         if ( !lenChunk )
 236         {
 237             // nothing left in the input string, conversion succeeded
 238             break;
 239         }
 240
 241         if ( dst )
 242         {
 243             if ( dstWritten > dstLen )
 244                 return wxCONV_FAILED;
 245
 246             // +1 is for trailing NUL
 247             if ( MB2WC(dst, src, lenChunk + 1) == wxCONV_FAILED )
 248                 return wxCONV_FAILED;
 249
 250             dst += lenChunk;
 251             if ( !srcEnd )
 252                 dst++;
 253         }
 254
 255         if ( !srcEnd )
 256         {
 257             // we convert just one chunk in this case as this is the entire
 258             // string anyhow (and we don't count the trailing NUL in this case)
 259             break;
 260         }
 261
 262         // advance the input pointer past the end of this chunk: notice that we
 263         // will always stop before srcEnd because we know that the chunk is
 264         // always properly NUL-terminated
 265         while ( NotAllNULs(src, nulLen) )
 266         {
 267             // notice that we must skip over multiple bytes here as we suppose
 268             // that if NUL takes 2 or 4 bytes, then all the other characters do
 269             // too and so if advanced by a single byte we might erroneously
 270             // detect sequences of NUL bytes in the middle of the input
 271             src += nulLen;
 272         }
 273
 274         // if the buffer ends before this NUL, we shouldn't count it in our
 275         // output so skip the code below
 276         if ( src == srcEnd )
 277             break;
 278
 279         // do count this terminator as it's inside the buffer we convert
 280         dstWritten++;
 281         if ( dst )
 282             dst++;
 283
 284         src += nulLen; // skip the terminator itself
 285
 286         if ( src >= srcEnd )
 287             break;
 288     }
 289
 290     return dstWritten;
 291 }
 292
 293 size_t
 294 wxMBConv::FromWChar(char *dst, size_t dstLen,
 295                     const wchar_t *src, size_t srcLen) const
 296 {
 297     // the number of chars [which would be] written to dst [if it were not NULL]
 298     size_t dstWritten = 0;
 299
 300     // if we don't know its length we have no choice but to assume that it is
 301     // NUL-terminated (notice that it can still be NUL-terminated even if
 302     // explicit length is given but it doesn't change our return value)
 303     const bool isNulTerminated = srcLen == wxNO_LEN;
 304
 305     // make a copy of the input string unless it is already properly
 306     // NUL-terminated
 307     wxWCharBuffer bufTmp;
 308     if ( isNulTerminated )
 309     {
 310         srcLen = wxWcslen(src) + 1;
 311     }
 312     else if ( srcLen != 0 && src[srcLen - 1] != L'\0' )
 313     {
 314         // make a copy in order to properly NUL-terminate the string
 315         bufTmp = wxWCharBuffer(srcLen);
 316         memcpy(bufTmp.data(), src, srcLen * sizeof(wchar_t));
 317         src = bufTmp;
 318     }
 319
 320     const size_t lenNul = GetMBNulLen();
 321     for ( const wchar_t * const srcEnd = src + srcLen;
 322           src < srcEnd;
 323           src++ /* skip L'\0' too */ )
 324     {
 325         // try to convert the current chunk
 326         size_t lenChunk = WC2MB(NULL, src, 0);
 327         if ( lenChunk == wxCONV_FAILED )
 328             return wxCONV_FAILED;
 329
 330         dstWritten += lenChunk;
 331
 332         const wchar_t * const
 333             chunkEnd = isNulTerminated ? srcEnd - 1 : src + wxWcslen(src);
 334
 335         // our return value accounts for the trailing NUL(s), unlike that of
 336         // WC2MB(), however don't do it for the last NUL we artificially added
 337         // ourselves above
 338         if ( chunkEnd < srcEnd )
 339             dstWritten += lenNul;
 340
 341         if ( dst )
 342         {
 343             if ( dstWritten > dstLen )
 344                 return wxCONV_FAILED;
 345
 346             // if we know that there is enough space in the destination buffer
 347             // (because we accounted for lenNul in dstWritten above), we can
 348             // convert directly in place -- but otherwise we need another
 349             // temporary buffer to ensure that we don't overwrite the output
 350             wxCharBuffer dstBuf;
 351             char *dstTmp;
 352             if ( chunkEnd == srcEnd )
 353             {
 354                 dstBuf = wxCharBuffer(lenChunk + lenNul - 1);
 355                 dstTmp = dstBuf.data();
 356             }
 357             else
 358             {
 359                 dstTmp = dst;
 360             }
 361
 362             if ( WC2MB(dstTmp, src, lenChunk + lenNul) == wxCONV_FAILED )
 363                 return wxCONV_FAILED;
 364
 365             if ( dstTmp != dst )
 366             {
 367                 // copy everything up to but excluding the terminating NUL(s)
 368                 // into the real output buffer
 369                 memcpy(dst, dstTmp, lenChunk);
 370
 371                 // micro-optimization: if dstTmp != dst it means that chunkEnd
 372                 // == srcEnd and so we're done, no need to update anything below
 373                 break;
 374             }
 375
 376             dst += lenChunk;
 377             if ( chunkEnd < srcEnd )
 378                 dst += lenNul;
 379         }
 380
 381         src = chunkEnd;
 382     }
 383
 384     return dstWritten;
 385 }
 386
 387 size_t wxMBConv::MB2WC(wchar_t *outBuff, const char *inBuff, size_t outLen) const
 388 {
 389     size_t rc = ToWChar(outBuff, outLen, inBuff);
 390     if ( rc != wxCONV_FAILED )
 391     {
 392         // ToWChar() returns the buffer length, i.e. including the trailing
 393         // NUL, while this method doesn't take it into account
 394         rc--;
 395     }
 396
 397     return rc;
 398 }
 399
 400 size_t wxMBConv::WC2MB(char *outBuff, const wchar_t *inBuff, size_t outLen) const
 401 {
 402     size_t rc = FromWChar(outBuff, outLen, inBuff);
 403     if ( rc != wxCONV_FAILED )
 404     {
 405         rc -= GetMBNulLen();
 406     }
 407
 408     return rc;
 409 }
 410
 411 wxMBConv::~wxMBConv()
 412 {
 413     // nothing to do here (necessary for Darwin linking probably)
 414 }
 415
 416 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
 417 {
 418     if ( psz )
 419     {
 420         // calculate the length of the buffer needed first
 421         const size_t nLen = ToWChar(NULL, 0, psz);
 422         if ( nLen != wxCONV_FAILED )
 423         {
 424             // now do the actual conversion
 425             wxWCharBuffer buf(nLen - 1 /* +1 added implicitly */);
 426
 427             // +1 for the trailing NULL
 428             if ( ToWChar(buf.data(), nLen, psz) != wxCONV_FAILED )
 429                 return buf;
 430         }
 431     }
 432
 433     return wxWCharBuffer();
 434 }
 435
 436 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
 437 {
 438     if ( pwz )
 439     {
 440         const size_t nLen = FromWChar(NULL, 0, pwz);
 441         if ( nLen != wxCONV_FAILED )
 442         {
 443             wxCharBuffer buf(nLen - 1);
 444             if ( FromWChar(buf.data(), nLen, pwz) != wxCONV_FAILED )
 445                 return buf;
 446         }
 447     }
 448
 449     return wxCharBuffer();
 450 }
 451
 452 const wxWCharBuffer
 453 wxMBConv::cMB2WC(const char *inBuff, size_t inLen, size_t *outLen) const
 454 {
 455     const size_t dstLen = ToWChar(NULL, 0, inBuff, inLen);
 456     if ( dstLen != wxCONV_FAILED )
 457     {
 458         // notice that we allocate space for dstLen+1 wide characters here
 459         // because we want the buffer to always be NUL-terminated, even if the
 460         // input isn't (as otherwise the caller has no way to know its length)
 461         wxWCharBuffer wbuf(dstLen);
 462         if ( ToWChar(wbuf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED )
 463         {
 464             if ( outLen )
 465             {
 466                 *outLen = dstLen;
 467
 468                 // we also need to handle NUL-terminated input strings
 469                 // specially: for them the output is the length of the string
 470                 // excluding the trailing NUL, however if we're asked to
 471                 // convert a specific number of characters we return the length
 472                 // of the resulting output even if it's NUL-terminated
 473                 if ( inLen == wxNO_LEN )
 474                     (*outLen)--;
 475             }
 476
 477             return wbuf;
 478         }
 479     }
 480
 481     if ( outLen )
 482         *outLen = 0;
 483
 484     return wxWCharBuffer();
 485 }
 486
 487 const wxCharBuffer
 488 wxMBConv::cWC2MB(const wchar_t *inBuff, size_t inLen, size_t *outLen) const
 489 {
 490     size_t dstLen = FromWChar(NULL, 0, inBuff, inLen);
 491     if ( dstLen != wxCONV_FAILED )
 492     {
 493         const size_t nulLen = GetMBNulLen();
 494
 495         // as above, ensure that the buffer is always NUL-terminated, even if
 496         // the input is not
 497         wxCharBuffer buf(dstLen + nulLen - 1);
 498         memset(buf.data() + dstLen, 0, nulLen);
 499         if ( FromWChar(buf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED )
 500         {
 501             if ( outLen )
 502             {
 503                 *outLen = dstLen;
 504
 505                 if ( inLen == wxNO_LEN )
 506                 {
 507                     // in this case both input and output are NUL-terminated
 508                     // and we're not supposed to count NUL
 509                     *outLen -= nulLen;
 510                 }
 511             }
 512
 513             return buf;
 514         }
 515     }
 516
 517     if ( outLen )
 518         *outLen = 0;
 519
 520     return wxCharBuffer();
 521 }
 522
 523 const wxWCharBuffer wxMBConv::cMB2WC(const wxScopedCharBuffer& buf) const
 524 {
 525     const size_t srcLen = buf.length();
 526     if ( srcLen )
 527     {
 528         const size_t dstLen = ToWChar(NULL, 0, buf, srcLen);
 529         if ( dstLen != wxCONV_FAILED )
 530         {
 531             wxWCharBuffer wbuf(dstLen);
 532             wbuf.data()[dstLen] = L'\0';
 533             if ( ToWChar(wbuf.data(), dstLen, buf, srcLen) != wxCONV_FAILED )
 534                 return wbuf;
 535         }
 536     }
 537
 538     return wxScopedWCharBuffer::CreateNonOwned(L"", 0);
 539 }
 540
 541 const wxCharBuffer wxMBConv::cWC2MB(const wxScopedWCharBuffer& wbuf) const
 542 {
 543     const size_t srcLen = wbuf.length();
 544     if ( srcLen )
 545     {
 546         const size_t dstLen = FromWChar(NULL, 0, wbuf, srcLen);
 547         if ( dstLen != wxCONV_FAILED )
 548         {
 549             wxCharBuffer buf(dstLen);
 550             buf.data()[dstLen] = '\0';
 551             if ( FromWChar(buf.data(), dstLen, wbuf, srcLen) != wxCONV_FAILED )
 552                 return buf;
 553         }
 554     }
 555
 556     return wxScopedCharBuffer::CreateNonOwned("", 0);
 557 }
 558
 559 // ----------------------------------------------------------------------------
 560 // wxMBConvLibc
 561 // ----------------------------------------------------------------------------
 562
 563 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 564 {
 565     return wxMB2WC(buf, psz, n);
 566 }
 567
 568 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 569 {
 570     return wxWC2MB(buf, psz, n);
 571 }
 572
 573 // ----------------------------------------------------------------------------
 574 // wxConvBrokenFileNames
 575 // ----------------------------------------------------------------------------
 576
 577 #ifdef __UNIX__
 578
 579 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxString& charset)
 580 {
 581     if ( wxStricmp(charset, wxT("UTF-8")) == 0 ||
 582          wxStricmp(charset, wxT("UTF8")) == 0  )
 583         m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_PUA);
 584     else
 585         m_conv = new wxCSConv(charset);
 586 }
 587
 588 #endif // __UNIX__
 589
 590 // ----------------------------------------------------------------------------
 591 // UTF-7
 592 // ----------------------------------------------------------------------------
 593
 594 // Implementation (C) 2004 Fredrik Roubert
 595 //
 596 // Changes to work in streaming mode (C) 2008 Vadim Zeitlin
 597
 598 //
 599 // BASE64 decoding table
 600 //
 601 static const unsigned char utf7unb64[] =
 602 {
 603     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 604     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 605     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 606     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 607     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 608     0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
 609     0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
 610     0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 611     0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
 612     0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
 613     0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
 614     0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
 615     0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
 616     0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
 617     0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
 618     0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
 619     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 620     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 621     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 622     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 623     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 624     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 625     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 626     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 627     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 628     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 629     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 630     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 631     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 632     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 633     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 634     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 635 };
 636
 637 size_t wxMBConvUTF7::ToWChar(wchar_t *dst, size_t dstLen,
 638                              const char *src, size_t srcLen) const
 639 {
 640     DecoderState stateOrig,
 641                 *statePtr;
 642     if ( srcLen == wxNO_LEN )
 643     {
 644         // convert the entire string, up to and including the trailing NUL
 645         srcLen = strlen(src) + 1;
 646
 647         // when working on the entire strings we don't update nor use the shift
 648         // state from the previous call
 649         statePtr = &stateOrig;
 650     }
 651     else // when working with partial strings we do use the shift state
 652     {
 653         statePtr = const_cast<DecoderState *>(&m_stateDecoder);
 654
 655         // also save the old state to be able to rollback to it on error
 656         stateOrig = m_stateDecoder;
 657     }
 658
 659     // but to simplify the code below we use this variable in both cases
 660     DecoderState& state = *statePtr;
 661
 662
 663     // number of characters [which would have been] written to dst [if it were
 664     // not NULL]
 665     size_t len = 0;
 666
 667     const char * const srcEnd = src + srcLen;
 668
 669     while ( (src < srcEnd) && (!dst || (len < dstLen)) )
 670     {
 671         const unsigned char cc = *src++;
 672
 673         if ( state.IsShifted() )
 674         {
 675             const unsigned char dc = utf7unb64[cc];
 676             if ( dc == 0xff )
 677             {
 678                 // end of encoded part, check that nothing was left: there can
 679                 // be up to 4 bits of 0 padding but nothing else (we also need
 680                 // to check isLSB as we count bits modulo 8 while a valid UTF-7
 681                 // encoded sequence must contain an integral number of UTF-16
 682                 // characters)
 683                 if ( state.isLSB || state.bit > 4 ||
 684                         (state.accum & ((1 << state.bit) - 1)) )
 685                 {
 686                     if ( !len )
 687                         state = stateOrig;
 688
 689                     return wxCONV_FAILED;
 690                 }
 691
 692                 state.ToDirect();
 693
 694                 // re-parse this character normally below unless it's '-' which
 695                 // is consumed by the decoder
 696                 if ( cc == '-' )
 697                     continue;
 698             }
 699             else // valid encoded character
 700             {
 701                 // mini base64 decoder: each character is 6 bits
 702                 state.bit += 6;
 703                 state.accum <<= 6;
 704                 state.accum += dc;
 705
 706                 if ( state.bit >= 8 )
 707                 {
 708                     // got the full byte, consume it
 709                     state.bit -= 8;
 710                     unsigned char b = (state.accum >> state.bit) & 0x00ff;
 711
 712                     if ( state.isLSB )
 713                     {
 714                         // we've got the full word, output it
 715                         if ( dst )
 716                             *dst++ = (state.msb << 8) | b;
 717                         len++;
 718                         state.isLSB = false;
 719                     }
 720                     else // MSB
 721                     {
 722                         // just store it while we wait for LSB
 723                         state.msb = b;
 724                         state.isLSB = true;
 725                     }
 726                 }
 727             }
 728         }
 729
 730         if ( state.IsDirect() )
 731         {
 732             // start of an encoded segment?
 733             if ( cc == '+' )
 734             {
 735                 if ( *src == '-' )
 736                 {
 737                     // just the encoded plus sign, don't switch to shifted mode
 738                     if ( dst )
 739                         *dst++ = '+';
 740                     len++;
 741                     src++;
 742                 }
 743                 else if ( utf7unb64[(unsigned)*src] == 0xff )
 744                 {
 745                     // empty encoded chunks are not allowed
 746                     if ( !len )
 747                         state = stateOrig;
 748
 749                     return wxCONV_FAILED;
 750                 }
 751                 else // base-64 encoded chunk follows
 752                 {
 753                     state.ToShifted();
 754                 }
 755             }
 756             else // not '+'
 757             {
 758                 // only printable 7 bit ASCII characters (with the exception of
 759                 // NUL, TAB, CR and LF) can be used directly
 760                 if ( cc >= 0x7f || (cc < ' ' &&
 761                       !(cc == '\0' || cc == '\t' || cc == '\r' || cc == '\n')) )
 762                     return wxCONV_FAILED;
 763
 764                 if ( dst )
 765                     *dst++ = cc;
 766                 len++;
 767             }
 768         }
 769     }
 770
 771     if ( !len )
 772     {
 773         // as we didn't read any characters we should be called with the same
 774         // data (followed by some more new data) again later so don't save our
 775         // state
 776         state = stateOrig;
 777
 778         return wxCONV_FAILED;
 779     }
 780
 781     return len;
 782 }
 783
 784 //
 785 // BASE64 encoding table
 786 //
 787 static const unsigned char utf7enb64[] =
 788 {
 789     'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
 790     'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
 791     'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
 792     'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
 793     'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
 794     'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
 795     'w', 'x', 'y', 'z', '0', '1', '2', '3',
 796     '4', '5', '6', '7', '8', '9', '+', '/'
 797 };
 798
 799 //
 800 // UTF-7 encoding table
 801 //
 802 // 0 - Set D (directly encoded characters)
 803 // 1 - Set O (optional direct characters)
 804 // 2 - whitespace characters (optional)
 805 // 3 - special characters
 806 //
 807 static const unsigned char utf7encode[128] =
 808 {
 809     0, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
 810     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 811     2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
 812     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
 813     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 814     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
 815     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 816     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
 817 };
 818
 819 static inline bool wxIsUTF7Direct(wchar_t wc)
 820 {
 821     return wc < 0x80 && utf7encode[wc] < 1;
 822 }
 823
 824 size_t wxMBConvUTF7::FromWChar(char *dst, size_t dstLen,
 825                                const wchar_t *src, size_t srcLen) const
 826 {
 827     EncoderState stateOrig,
 828                 *statePtr;
 829     if ( srcLen == wxNO_LEN )
 830     {
 831         // we don't apply the stored state when operating on entire strings at
 832         // once
 833         statePtr = &stateOrig;
 834
 835         srcLen = wxWcslen(src) + 1;
 836     }
 837     else // do use the mode we left the output in previously
 838     {
 839         stateOrig = m_stateEncoder;
 840         statePtr = const_cast<EncoderState *>(&m_stateEncoder);
 841     }
 842
 843     EncoderState& state = *statePtr;
 844
 845
 846     size_t len = 0;
 847
 848     const wchar_t * const srcEnd = src + srcLen;
 849     while ( src < srcEnd && (!dst || len < dstLen) )
 850     {
 851         wchar_t cc = *src++;
 852         if ( wxIsUTF7Direct(cc) )
 853         {
 854             if ( state.IsShifted() )
 855             {
 856                 // pad with zeros the last encoded block if necessary
 857                 if ( state.bit )
 858                 {
 859                     if ( dst )
 860                         *dst++ = utf7enb64[((state.accum % 16) << (6 - state.bit)) % 64];
 861                     len++;
 862                 }
 863
 864                 state.ToDirect();
 865
 866                 if ( dst )
 867                     *dst++ = '-';
 868                 len++;
 869             }
 870
 871             if ( dst )
 872                 *dst++ = (char)cc;
 873             len++;
 874         }
 875         else if ( cc == '+' && state.IsDirect() )
 876         {
 877             if ( dst )
 878             {
 879                 *dst++ = '+';
 880                 *dst++ = '-';
 881             }
 882
 883             len += 2;
 884         }
 885 #ifndef WC_UTF16
 886         else if (((wxUint32)cc) > 0xffff)
 887         {
 888             // no surrogate pair generation (yet?)
 889             return wxCONV_FAILED;
 890         }
 891 #endif
 892         else
 893         {
 894             if ( state.IsDirect() )
 895             {
 896                 state.ToShifted();
 897
 898                 if ( dst )
 899                     *dst++ = '+';
 900                 len++;
 901             }
 902
 903             // BASE64 encode string
 904             for ( ;; )
 905             {
 906                 for ( unsigned lsb = 0; lsb < 2; lsb++ )
 907                 {
 908                     state.accum <<= 8;
 909                     state.accum += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
 910
 911                     for (state.bit += 8; state.bit >= 6; )
 912                     {
 913                         state.bit -= 6;
 914                         if ( dst )
 915                             *dst++ = utf7enb64[(state.accum >> state.bit) % 64];
 916                         len++;
 917                     }
 918                 }
 919
 920                 if ( src == srcEnd || wxIsUTF7Direct(cc = *src) )
 921                     break;
 922
 923                 src++;
 924             }
 925         }
 926     }
 927
 928     // we need to restore the original encoder state if we were called just to
 929     // calculate the amount of space needed as we will presumably be called
 930     // again to really convert the data now
 931     if ( !dst )
 932         state = stateOrig;
 933
 934     return len;
 935 }
 936
 937 // ----------------------------------------------------------------------------
 938 // UTF-8
 939 // ----------------------------------------------------------------------------
 940
 941 static const wxUint32 utf8_max[]=
 942     { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
 943
 944 // boundaries of the private use area we use to (temporarily) remap invalid
 945 // characters invalid in a UTF-8 encoded string
 946 const wxUint32 wxUnicodePUA = 0x100000;
 947 const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
 948
 949 // this table gives the length of the UTF-8 encoding from its first character:
 950 const unsigned char tableUtf8Lengths[256] = {
 951     // single-byte sequences (ASCII):
 952     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 00..0F
 953     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 10..1F
 954     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 20..2F
 955     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 30..3F
 956     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 40..4F
 957     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 50..5F
 958     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 60..6F
 959     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 70..7F
 960
 961     // these are invalid:
 962     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 80..8F
 963     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 90..9F
 964     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // A0..AF
 965     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // B0..BF
 966     0, 0,                                            // C0,C1
 967
 968     // two-byte sequences:
 969           2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,  // C2..CF
 970     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,  // D0..DF
 971
 972     // three-byte sequences:
 973     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,  // E0..EF
 974
 975     // four-byte sequences:
 976     4, 4, 4, 4, 4,                                   // F0..F4
 977
 978     // these are invalid again (5- or 6-byte
 979     // sequences and sequences for code points
 980     // above U+10FFFF, as restricted by RFC 3629):
 981                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0   // F5..FF
 982 };
 983
 984 size_t
 985 wxMBConvStrictUTF8::ToWChar(wchar_t *dst, size_t dstLen,
 986                             const char *src, size_t srcLen) const
 987 {
 988     wchar_t *out = dstLen ? dst : NULL;
 989     size_t written = 0;
 990
 991     if ( srcLen == wxNO_LEN )
 992         srcLen = strlen(src) + 1;
 993
 994     for ( const char *p = src; ; p++ )
 995     {
 996         if ( (srcLen == wxNO_LEN ? !*p : !srcLen) )
 997         {
 998             // all done successfully, just add the trailing NULL if we are not
 999             // using explicit length
1000             if ( srcLen == wxNO_LEN )
1001             {
1002                 if ( out )
1003                 {
1004                     if ( !dstLen )
1005                         break;
1006
1007                     *out = L'\0';
1008                 }
1009
1010                 written++;
1011             }
1012
1013             return written;
1014         }
1015
1016         if ( out && !dstLen-- )
1017             break;
1018
1019         wxUint32 code;
1020         unsigned char c = *p;
1021
1022         if ( c < 0x80 )
1023         {
1024             if ( srcLen == 0 ) // the test works for wxNO_LEN too
1025                 break;
1026
1027             if ( srcLen != wxNO_LEN )
1028                 srcLen--;
1029
1030             code = c;
1031         }
1032         else
1033         {
1034             unsigned len = tableUtf8Lengths[c];
1035             if ( !len )
1036                 break;
1037
1038             if ( srcLen < len ) // the test works for wxNO_LEN too
1039                 break;
1040
1041             if ( srcLen != wxNO_LEN )
1042                 srcLen -= len;
1043
1044             //   Char. number range   |        UTF-8 octet sequence
1045             //      (hexadecimal)     |              (binary)
1046             //  ----------------------+----------------------------------------
1047             //  0000 0000 - 0000 007F | 0xxxxxxx
1048             //  0000 0080 - 0000 07FF | 110xxxxx 10xxxxxx
1049             //  0000 0800 - 0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
1050             //  0001 0000 - 0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
1051             //
1052             //  Code point value is stored in bits marked with 'x',
1053             //  lowest-order bit of the value on the right side in the diagram
1054             //  above.                                         (from RFC 3629)
1055
1056             // mask to extract lead byte's value ('x' bits above), by sequence
1057             // length:
1058             static const unsigned char leadValueMask[] = { 0x7F, 0x1F, 0x0F, 0x07 };
1059
1060             // mask and value of lead byte's most significant bits, by length:
1061             static const unsigned char leadMarkerMask[] = { 0x80, 0xE0, 0xF0, 0xF8 };
1062             static const unsigned char leadMarkerVal[] = { 0x00, 0xC0, 0xE0, 0xF0 };
1063
1064             len--; // it's more convenient to work with 0-based length here
1065
1066             // extract the lead byte's value bits:
1067             if ( (c & leadMarkerMask[len]) != leadMarkerVal[len] )
1068                 break;
1069
1070             code = c & leadValueMask[len];
1071
1072             // all remaining bytes, if any, are handled in the same way
1073             // regardless of sequence's length:
1074             for ( ; len; --len )
1075             {
1076                 c = *++p;
1077                 if ( (c & 0xC0) != 0x80 )
1078                     return wxCONV_FAILED;
1079
1080                 code <<= 6;
1081                 code |= c & 0x3F;
1082             }
1083         }
1084
1085 #ifdef WC_UTF16
1086         // cast is ok because wchar_t == wxUint16 if WC_UTF16
1087         if ( encode_utf16(code, (wxUint16 *)out) == 2 )
1088         {
1089             if ( out )
1090                 out++;
1091             written++;
1092         }
1093 #else // !WC_UTF16
1094         if ( out )
1095             *out = code;
1096 #endif // WC_UTF16/!WC_UTF16
1097
1098         if ( out )
1099             out++;
1100
1101         written++;
1102     }
1103
1104     return wxCONV_FAILED;
1105 }
1106
1107 size_t
1108 wxMBConvStrictUTF8::FromWChar(char *dst, size_t dstLen,
1109                               const wchar_t *src, size_t srcLen) const
1110 {
1111     char *out = dstLen ? dst : NULL;
1112     size_t written = 0;
1113
1114     for ( const wchar_t *wp = src; ; wp++ )
1115     {
1116         if ( (srcLen == wxNO_LEN ? !*wp : !srcLen) )
1117         {
1118             // all done successfully, just add the trailing NULL if we are not
1119             // using explicit length
1120             if ( srcLen == wxNO_LEN )
1121             {
1122                 if ( out )
1123                 {
1124                     if ( !dstLen )
1125                         break;
1126
1127                     *out = '\0';
1128                 }
1129
1130                 written++;
1131             }
1132
1133             return written;
1134         }
1135
1136         if ( srcLen != wxNO_LEN )
1137             srcLen--;
1138
1139         wxUint32 code;
1140 #ifdef WC_UTF16
1141         // cast is ok for WC_UTF16
1142         if ( decode_utf16((const wxUint16 *)wp, code) == 2 )
1143         {
1144             // skip the next char too as we decoded a surrogate
1145             wp++;
1146             if ( srcLen != wxNO_LEN )
1147                 srcLen--;
1148         }
1149 #else // wchar_t is UTF-32
1150         code = *wp & 0x7fffffff;
1151 #endif
1152
1153         unsigned len;
1154         if ( code <= 0x7F )
1155         {
1156             len = 1;
1157             if ( out )
1158             {
1159                 if ( dstLen < len )
1160                     break;
1161
1162                 out[0] = (char)code;
1163             }
1164         }
1165         else if ( code <= 0x07FF )
1166         {
1167             len = 2;
1168             if ( out )
1169             {
1170                 if ( dstLen < len )
1171                     break;
1172
1173                 // NB: this line takes 6 least significant bits, encodes them as
1174                 // 10xxxxxx and discards them so that the next byte can be encoded:
1175                 out[1] = 0x80 | (code & 0x3F);  code >>= 6;
1176                 out[0] = 0xC0 | code;
1177             }
1178         }
1179         else if ( code < 0xFFFF )
1180         {
1181             len = 3;
1182             if ( out )
1183             {
1184                 if ( dstLen < len )
1185                     break;
1186
1187                 out[2] = 0x80 | (code & 0x3F);  code >>= 6;
1188                 out[1] = 0x80 | (code & 0x3F);  code >>= 6;
1189                 out[0] = 0xE0 | code;
1190             }
1191         }
1192         else if ( code <= 0x10FFFF )
1193         {
1194             len = 4;
1195             if ( out )
1196             {
1197                 if ( dstLen < len )
1198                     break;
1199
1200                 out[3] = 0x80 | (code & 0x3F);  code >>= 6;
1201                 out[2] = 0x80 | (code & 0x3F);  code >>= 6;
1202                 out[1] = 0x80 | (code & 0x3F);  code >>= 6;
1203                 out[0] = 0xF0 | code;
1204             }
1205         }
1206         else
1207         {
1208             wxFAIL_MSG( wxT("trying to encode undefined Unicode character") );
1209             break;
1210         }
1211
1212         if ( out )
1213         {
1214             out += len;
1215             dstLen -= len;
1216         }
1217
1218         written += len;
1219     }
1220
1221     // we only get here if an error occurs during decoding
1222     return wxCONV_FAILED;
1223 }
1224
1225 size_t wxMBConvUTF8::ToWChar(wchar_t *buf, size_t n,
1226                              const char *psz, size_t srcLen) const
1227 {
1228     if ( m_options == MAP_INVALID_UTF8_NOT )
1229         return wxMBConvStrictUTF8::ToWChar(buf, n, psz, srcLen);
1230
1231     size_t len = 0;
1232
1233     // The length can be either given explicitly or computed implicitly for the
1234     // NUL-terminated strings.
1235     const bool isNulTerminated = srcLen == wxNO_LEN;
1236     while ((isNulTerminated ? *psz : srcLen--) && ((!buf) || (len < n)))
1237     {
1238         const char *opsz = psz;
1239         bool invalid = false;
1240         unsigned char cc = *psz++, fc = cc;
1241         unsigned cnt;
1242         for (cnt = 0; fc & 0x80; cnt++)
1243             fc <<= 1;
1244
1245         if (!cnt)
1246         {
1247             // plain ASCII char
1248             if (buf)
1249                 *buf++ = cc;
1250             len++;
1251
1252             // escape the escape character for octal escapes
1253             if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
1254                     && cc == '\\' && (!buf || len < n))
1255             {
1256                 if (buf)
1257                     *buf++ = cc;
1258                 len++;
1259             }
1260         }
1261         else
1262         {
1263             cnt--;
1264             if (!cnt)
1265             {
1266                 // invalid UTF-8 sequence
1267                 invalid = true;
1268             }
1269             else
1270             {
1271                 unsigned ocnt = cnt - 1;
1272                 wxUint32 res = cc & (0x3f >> cnt);
1273                 while (cnt--)
1274                 {
1275                     cc = *psz;
1276                     if ((cc & 0xC0) != 0x80)
1277                     {
1278                         // invalid UTF-8 sequence
1279                         invalid = true;
1280                         break;
1281                     }
1282
1283                     psz++;
1284                     res = (res << 6) | (cc & 0x3f);
1285                 }
1286
1287                 if (invalid || res <= utf8_max[ocnt])
1288                 {
1289                     // illegal UTF-8 encoding
1290                     invalid = true;
1291                 }
1292                 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
1293                         res >= wxUnicodePUA && res < wxUnicodePUAEnd)
1294                 {
1295                     // if one of our PUA characters turns up externally
1296                     // it must also be treated as an illegal sequence
1297                     // (a bit like you have to escape an escape character)
1298                     invalid = true;
1299                 }
1300                 else
1301                 {
1302 #ifdef WC_UTF16
1303                     // cast is ok because wchar_t == wxUint16 if WC_UTF16
1304                     size_t pa = encode_utf16(res, (wxUint16 *)buf);
1305                     if (pa == wxCONV_FAILED)
1306                     {
1307                         invalid = true;
1308                     }
1309                     else
1310                     {
1311                         if (buf)
1312                             buf += pa;
1313                         len += pa;
1314                     }
1315 #else // !WC_UTF16
1316                     if (buf)
1317                         *buf++ = (wchar_t)res;
1318                     len++;
1319 #endif // WC_UTF16/!WC_UTF16
1320                 }
1321             }
1322
1323             if (invalid)
1324             {
1325                 if (m_options & MAP_INVALID_UTF8_TO_PUA)
1326                 {
1327                     while (opsz < psz && (!buf || len < n))
1328                     {
1329 #ifdef WC_UTF16
1330                         // cast is ok because wchar_t == wxUuint16 if WC_UTF16
1331                         size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
1332                         wxASSERT(pa != wxCONV_FAILED);
1333                         if (buf)
1334                             buf += pa;
1335                         opsz++;
1336                         len += pa;
1337 #else
1338                         if (buf)
1339                             *buf++ = (wchar_t)(wxUnicodePUA + (unsigned char)*opsz);
1340                         opsz++;
1341                         len++;
1342 #endif
1343                     }
1344                 }
1345                 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
1346                 {
1347                     while (opsz < psz && (!buf || len < n))
1348                     {
1349                         if ( buf && len + 3 < n )
1350                         {
1351                             unsigned char on = *opsz;
1352                             *buf++ = L'\\';
1353                             *buf++ = (wchar_t)( L'0' + on / 0100 );
1354                             *buf++ = (wchar_t)( L'0' + (on % 0100) / 010 );
1355                             *buf++ = (wchar_t)( L'0' + on % 010 );
1356                         }
1357
1358                         opsz++;
1359                         len += 4;
1360                     }
1361                 }
1362                 else // MAP_INVALID_UTF8_NOT
1363                 {
1364                     return wxCONV_FAILED;
1365                 }
1366             }
1367         }
1368     }
1369
1370     if ( isNulTerminated )
1371     {
1372         // Add the trailing NUL in this case if we have a large enough buffer.
1373         if ( buf && (len < n) )
1374             *buf = 0;
1375
1376         // And count it in any case.
1377         len++;
1378     }
1379
1380     return len;
1381 }
1382
1383 static inline bool isoctal(wchar_t wch)
1384 {
1385     return L'0' <= wch && wch <= L'7';
1386 }
1387
1388 size_t wxMBConvUTF8::FromWChar(char *buf, size_t n,
1389                                const wchar_t *psz, size_t srcLen) const
1390 {
1391     if ( m_options == MAP_INVALID_UTF8_NOT )
1392         return wxMBConvStrictUTF8::FromWChar(buf, n, psz, srcLen);
1393
1394     size_t len = 0;
1395
1396     // The length can be either given explicitly or computed implicitly for the
1397     // NUL-terminated strings.
1398     const bool isNulTerminated = srcLen == wxNO_LEN;
1399     while ((isNulTerminated ? *psz : srcLen--) && ((!buf) || (len < n)))
1400     {
1401         wxUint32 cc;
1402
1403 #ifdef WC_UTF16
1404         // cast is ok for WC_UTF16
1405         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
1406         psz += (pa == wxCONV_FAILED) ? 1 : pa;
1407 #else
1408         cc = (*psz++) & 0x7fffffff;
1409 #endif
1410
1411         if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
1412                 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
1413         {
1414             if (buf)
1415                 *buf++ = (char)(cc - wxUnicodePUA);
1416             len++;
1417         }
1418         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
1419                     && cc == L'\\' && psz[0] == L'\\' )
1420         {
1421             if (buf)
1422                 *buf++ = (char)cc;
1423             psz++;
1424             len++;
1425         }
1426         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
1427                     cc == L'\\' &&
1428                         isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
1429         {
1430             if (buf)
1431             {
1432                 *buf++ = (char) ((psz[0] - L'0') * 0100 +
1433                                  (psz[1] - L'0') * 010 +
1434                                  (psz[2] - L'0'));
1435             }
1436
1437             psz += 3;
1438             len++;
1439         }
1440         else
1441         {
1442             unsigned cnt;
1443             for (cnt = 0; cc > utf8_max[cnt]; cnt++)
1444             {
1445             }
1446
1447             if (!cnt)
1448             {
1449                 // plain ASCII char
1450                 if (buf)
1451                     *buf++ = (char) cc;
1452                 len++;
1453             }
1454             else
1455             {
1456                 len += cnt + 1;
1457                 if (buf)
1458                 {
1459                     *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
1460                     while (cnt--)
1461                         *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
1462                 }
1463             }
1464         }
1465     }
1466
1467     if ( isNulTerminated )
1468     {
1469         // Add the trailing NUL in this case if we have a large enough buffer.
1470         if ( buf && (len < n) )
1471             *buf = 0;
1472
1473         // And count it in any case.
1474         len++;
1475     }
1476
1477     return len;
1478 }
1479
1480 // ============================================================================
1481 // UTF-16
1482 // ============================================================================
1483
1484 #ifdef WORDS_BIGENDIAN
1485     #define wxMBConvUTF16straight wxMBConvUTF16BE
1486     #define wxMBConvUTF16swap     wxMBConvUTF16LE
1487 #else
1488     #define wxMBConvUTF16swap     wxMBConvUTF16BE
1489     #define wxMBConvUTF16straight wxMBConvUTF16LE
1490 #endif
1491
1492 /* static */
1493 size_t wxMBConvUTF16Base::GetLength(const char *src, size_t srcLen)
1494 {
1495     if ( srcLen == wxNO_LEN )
1496     {
1497         // count the number of bytes in input, including the trailing NULs
1498         const wxUint16 *inBuff = reinterpret_cast<const wxUint16 *>(src);
1499         for ( srcLen = 1; *inBuff++; srcLen++ )
1500             ;
1501
1502         srcLen *= BYTES_PER_CHAR;
1503     }
1504     else // we already have the length
1505     {
1506         // we can only convert an entire number of UTF-16 characters
1507         if ( srcLen % BYTES_PER_CHAR )
1508             return wxCONV_FAILED;
1509     }
1510
1511     return srcLen;
1512 }
1513
1514 // case when in-memory representation is UTF-16 too
1515 #ifdef WC_UTF16
1516
1517 // ----------------------------------------------------------------------------
1518 // conversions without endianness change
1519 // ----------------------------------------------------------------------------
1520
1521 size_t
1522 wxMBConvUTF16straight::ToWChar(wchar_t *dst, size_t dstLen,
1523                                const char *src, size_t srcLen) const
1524 {
1525     // set up the scene for using memcpy() (which is presumably more efficient
1526     // than copying the bytes one by one)
1527     srcLen = GetLength(src, srcLen);
1528     if ( srcLen == wxNO_LEN )
1529         return wxCONV_FAILED;
1530
1531     const size_t inLen = srcLen / BYTES_PER_CHAR;
1532     if ( dst )
1533     {
1534         if ( dstLen < inLen )
1535             return wxCONV_FAILED;
1536
1537         memcpy(dst, src, srcLen);
1538     }
1539
1540     return inLen;
1541 }
1542
1543 size_t
1544 wxMBConvUTF16straight::FromWChar(char *dst, size_t dstLen,
1545                                  const wchar_t *src, size_t srcLen) const
1546 {
1547     if ( srcLen == wxNO_LEN )
1548         srcLen = wxWcslen(src) + 1;
1549
1550     srcLen *= BYTES_PER_CHAR;
1551
1552     if ( dst )
1553     {
1554         if ( dstLen < srcLen )
1555             return wxCONV_FAILED;
1556
1557         memcpy(dst, src, srcLen);
1558     }
1559
1560     return srcLen;
1561 }
1562
1563 // ----------------------------------------------------------------------------
1564 // endian-reversing conversions
1565 // ----------------------------------------------------------------------------
1566
1567 size_t
1568 wxMBConvUTF16swap::ToWChar(wchar_t *dst, size_t dstLen,
1569                            const char *src, size_t srcLen) const
1570 {
1571     srcLen = GetLength(src, srcLen);
1572     if ( srcLen == wxNO_LEN )
1573         return wxCONV_FAILED;
1574
1575     srcLen /= BYTES_PER_CHAR;
1576
1577     if ( dst )
1578     {
1579         if ( dstLen < srcLen )
1580             return wxCONV_FAILED;
1581
1582         const wxUint16 *inBuff = reinterpret_cast<const wxUint16 *>(src);
1583         for ( size_t n = 0; n < srcLen; n++, inBuff++ )
1584         {
1585             *dst++ = wxUINT16_SWAP_ALWAYS(*inBuff);
1586         }
1587     }
1588
1589     return srcLen;
1590 }
1591
1592 size_t
1593 wxMBConvUTF16swap::FromWChar(char *dst, size_t dstLen,
1594                              const wchar_t *src, size_t srcLen) const
1595 {
1596     if ( srcLen == wxNO_LEN )
1597         srcLen = wxWcslen(src) + 1;
1598
1599     srcLen *= BYTES_PER_CHAR;
1600
1601     if ( dst )
1602     {
1603         if ( dstLen < srcLen )
1604             return wxCONV_FAILED;
1605
1606         wxUint16 *outBuff = reinterpret_cast<wxUint16 *>(dst);
1607         for ( size_t n = 0; n < srcLen; n += BYTES_PER_CHAR, src++ )
1608         {
1609             *outBuff++ = wxUINT16_SWAP_ALWAYS(*src);
1610         }
1611     }
1612
1613     return srcLen;
1614 }
1615
1616 #else // !WC_UTF16: wchar_t is UTF-32
1617
1618 // ----------------------------------------------------------------------------
1619 // conversions without endianness change
1620 // ----------------------------------------------------------------------------
1621
1622 size_t
1623 wxMBConvUTF16straight::ToWChar(wchar_t *dst, size_t dstLen,
1624                                const char *src, size_t srcLen) const
1625 {
1626     srcLen = GetLength(src, srcLen);
1627     if ( srcLen == wxNO_LEN )
1628         return wxCONV_FAILED;
1629
1630     const size_t inLen = srcLen / BYTES_PER_CHAR;
1631     if ( !dst )
1632     {
1633         // optimization: return maximal space which could be needed for this
1634         // string even if the real size could be smaller if the buffer contains
1635         // any surrogates
1636         return inLen;
1637     }
1638
1639     size_t outLen = 0;
1640     const wxUint16 *inBuff = reinterpret_cast<const wxUint16 *>(src);
1641     for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; )
1642     {
1643         const wxUint32 ch = wxDecodeSurrogate(&inBuff);
1644         if ( !inBuff )
1645             return wxCONV_FAILED;
1646
1647         if ( ++outLen > dstLen )
1648             return wxCONV_FAILED;
1649
1650         *dst++ = ch;
1651     }
1652
1653
1654     return outLen;
1655 }
1656
1657 size_t
1658 wxMBConvUTF16straight::FromWChar(char *dst, size_t dstLen,
1659                                  const wchar_t *src, size_t srcLen) const
1660 {
1661     if ( srcLen == wxNO_LEN )
1662         srcLen = wxWcslen(src) + 1;
1663
1664     size_t outLen = 0;
1665     wxUint16 *outBuff = reinterpret_cast<wxUint16 *>(dst);
1666     for ( size_t n = 0; n < srcLen; n++ )
1667     {
1668         wxUint16 cc[2] = { 0 };
1669         const size_t numChars = encode_utf16(*src++, cc);
1670         if ( numChars == wxCONV_FAILED )
1671             return wxCONV_FAILED;
1672
1673         outLen += numChars * BYTES_PER_CHAR;
1674         if ( outBuff )
1675         {
1676             if ( outLen > dstLen )
1677                 return wxCONV_FAILED;
1678
1679             *outBuff++ = cc[0];
1680             if ( numChars == 2 )
1681             {
1682                 // second character of a surrogate
1683                 *outBuff++ = cc[1];
1684             }
1685         }
1686     }
1687
1688     return outLen;
1689 }
1690
1691 // ----------------------------------------------------------------------------
1692 // endian-reversing conversions
1693 // ----------------------------------------------------------------------------
1694
1695 size_t
1696 wxMBConvUTF16swap::ToWChar(wchar_t *dst, size_t dstLen,
1697                            const char *src, size_t srcLen) const
1698 {
1699     srcLen = GetLength(src, srcLen);
1700     if ( srcLen == wxNO_LEN )
1701         return wxCONV_FAILED;
1702
1703     const size_t inLen = srcLen / BYTES_PER_CHAR;
1704     if ( !dst )
1705     {
1706         // optimization: return maximal space which could be needed for this
1707         // string even if the real size could be smaller if the buffer contains
1708         // any surrogates
1709         return inLen;
1710     }
1711
1712     size_t outLen = 0;
1713     const wxUint16 *inBuff = reinterpret_cast<const wxUint16 *>(src);
1714     for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; )
1715     {
1716         wxUint32 ch;
1717         wxUint16 tmp[2];
1718
1719         tmp[0] = wxUINT16_SWAP_ALWAYS(*inBuff);
1720         inBuff++;
1721         tmp[1] = wxUINT16_SWAP_ALWAYS(*inBuff);
1722
1723         const size_t numChars = decode_utf16(tmp, ch);
1724         if ( numChars == wxCONV_FAILED )
1725             return wxCONV_FAILED;
1726
1727         if ( numChars == 2 )
1728             inBuff++;
1729
1730         if ( ++outLen > dstLen )
1731             return wxCONV_FAILED;
1732
1733         *dst++ = ch;
1734     }
1735
1736
1737     return outLen;
1738 }
1739
1740 size_t
1741 wxMBConvUTF16swap::FromWChar(char *dst, size_t dstLen,
1742                              const wchar_t *src, size_t srcLen) const
1743 {
1744     if ( srcLen == wxNO_LEN )
1745         srcLen = wxWcslen(src) + 1;
1746
1747     size_t outLen = 0;
1748     wxUint16 *outBuff = reinterpret_cast<wxUint16 *>(dst);
1749     for ( const wchar_t *srcEnd = src + srcLen; src < srcEnd; src++ )
1750     {
1751         wxUint16 cc[2] = { 0 };
1752         const size_t numChars = encode_utf16(*src, cc);
1753         if ( numChars == wxCONV_FAILED )
1754             return wxCONV_FAILED;
1755
1756         outLen += numChars * BYTES_PER_CHAR;
1757         if ( outBuff )
1758         {
1759             if ( outLen > dstLen )
1760                 return wxCONV_FAILED;
1761
1762             *outBuff++ = wxUINT16_SWAP_ALWAYS(cc[0]);
1763             if ( numChars == 2 )
1764             {
1765                 // second character of a surrogate
1766                 *outBuff++ = wxUINT16_SWAP_ALWAYS(cc[1]);
1767             }
1768         }
1769     }
1770
1771     return outLen;
1772 }
1773
1774 #endif // WC_UTF16/!WC_UTF16
1775
1776
1777 // ============================================================================
1778 // UTF-32
1779 // ============================================================================
1780
1781 #ifdef WORDS_BIGENDIAN
1782     #define wxMBConvUTF32straight  wxMBConvUTF32BE
1783     #define wxMBConvUTF32swap      wxMBConvUTF32LE
1784 #else
1785     #define wxMBConvUTF32swap      wxMBConvUTF32BE
1786     #define wxMBConvUTF32straight  wxMBConvUTF32LE
1787 #endif
1788
1789
1790 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1791 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1792
1793 /* static */
1794 size_t wxMBConvUTF32Base::GetLength(const char *src, size_t srcLen)
1795 {
1796     if ( srcLen == wxNO_LEN )
1797     {
1798         // count the number of bytes in input, including the trailing NULs
1799         const wxUint32 *inBuff = reinterpret_cast<const wxUint32 *>(src);
1800         for ( srcLen = 1; *inBuff++; srcLen++ )
1801             ;
1802
1803         srcLen *= BYTES_PER_CHAR;
1804     }
1805     else // we already have the length
1806     {
1807         // we can only convert an entire number of UTF-32 characters
1808         if ( srcLen % BYTES_PER_CHAR )
1809             return wxCONV_FAILED;
1810     }
1811
1812     return srcLen;
1813 }
1814
1815 // case when in-memory representation is UTF-16
1816 #ifdef WC_UTF16
1817
1818 // ----------------------------------------------------------------------------
1819 // conversions without endianness change
1820 // ----------------------------------------------------------------------------
1821
1822 size_t
1823 wxMBConvUTF32straight::ToWChar(wchar_t *dst, size_t dstLen,
1824                                const char *src, size_t srcLen) const
1825 {
1826     srcLen = GetLength(src, srcLen);
1827     if ( srcLen == wxNO_LEN )
1828         return wxCONV_FAILED;
1829
1830     const wxUint32 *inBuff = reinterpret_cast<const wxUint32 *>(src);
1831     const size_t inLen = srcLen / BYTES_PER_CHAR;
1832     size_t outLen = 0;
1833     for ( size_t n = 0; n < inLen; n++ )
1834     {
1835         wxUint16 cc[2] = { 0 };
1836         const size_t numChars = encode_utf16(*inBuff++, cc);
1837         if ( numChars == wxCONV_FAILED )
1838             return wxCONV_FAILED;
1839
1840         outLen += numChars;
1841         if ( dst )
1842         {
1843             if ( outLen > dstLen )
1844                 return wxCONV_FAILED;
1845
1846             *dst++ = cc[0];
1847             if ( numChars == 2 )
1848             {
1849                 // second character of a surrogate
1850                 *dst++ = cc[1];
1851             }
1852         }
1853     }
1854
1855     return outLen;
1856 }
1857
1858 size_t
1859 wxMBConvUTF32straight::FromWChar(char *dst, size_t dstLen,
1860                                  const wchar_t *src, size_t srcLen) const
1861 {
1862     if ( srcLen == wxNO_LEN )
1863         srcLen = wxWcslen(src) + 1;
1864
1865     if ( !dst )
1866     {
1867         // optimization: return maximal space which could be needed for this
1868         // string instead of the exact amount which could be less if there are
1869         // any surrogates in the input
1870         //
1871         // we consider that surrogates are rare enough to make it worthwhile to
1872         // avoid running the loop below at the cost of slightly extra memory
1873         // consumption
1874         return srcLen * BYTES_PER_CHAR;
1875     }
1876
1877     wxUint32 *outBuff = reinterpret_cast<wxUint32 *>(dst);
1878     size_t outLen = 0;
1879     for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; )
1880     {
1881         const wxUint32 ch = wxDecodeSurrogate(&src);
1882         if ( !src )
1883             return wxCONV_FAILED;
1884
1885         outLen += BYTES_PER_CHAR;
1886
1887         if ( outLen > dstLen )
1888             return wxCONV_FAILED;
1889
1890         *outBuff++ = ch;
1891     }
1892
1893     return outLen;
1894 }
1895
1896 // ----------------------------------------------------------------------------
1897 // endian-reversing conversions
1898 // ----------------------------------------------------------------------------
1899
1900 size_t
1901 wxMBConvUTF32swap::ToWChar(wchar_t *dst, size_t dstLen,
1902                            const char *src, size_t srcLen) const
1903 {
1904     srcLen = GetLength(src, srcLen);
1905     if ( srcLen == wxNO_LEN )
1906         return wxCONV_FAILED;
1907
1908     const wxUint32 *inBuff = reinterpret_cast<const wxUint32 *>(src);
1909     const size_t inLen = srcLen / BYTES_PER_CHAR;
1910     size_t outLen = 0;
1911     for ( size_t n = 0; n < inLen; n++, inBuff++ )
1912     {
1913         wxUint16 cc[2] = { 0 };
1914         const size_t numChars = encode_utf16(wxUINT32_SWAP_ALWAYS(*inBuff), cc);
1915         if ( numChars == wxCONV_FAILED )
1916             return wxCONV_FAILED;
1917
1918         outLen += numChars;
1919         if ( dst )
1920         {
1921             if ( outLen > dstLen )
1922                 return wxCONV_FAILED;
1923
1924             *dst++ = cc[0];
1925             if ( numChars == 2 )
1926             {
1927                 // second character of a surrogate
1928                 *dst++ = cc[1];
1929             }
1930         }
1931     }
1932
1933     return outLen;
1934 }
1935
1936 size_t
1937 wxMBConvUTF32swap::FromWChar(char *dst, size_t dstLen,
1938                              const wchar_t *src, size_t srcLen) const
1939 {
1940     if ( srcLen == wxNO_LEN )
1941         srcLen = wxWcslen(src) + 1;
1942
1943     if ( !dst )
1944     {
1945         // optimization: return maximal space which could be needed for this
1946         // string instead of the exact amount which could be less if there are
1947         // any surrogates in the input
1948         //
1949         // we consider that surrogates are rare enough to make it worthwhile to
1950         // avoid running the loop below at the cost of slightly extra memory
1951         // consumption
1952         return srcLen*BYTES_PER_CHAR;
1953     }
1954
1955     wxUint32 *outBuff = reinterpret_cast<wxUint32 *>(dst);
1956     size_t outLen = 0;
1957     for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; )
1958     {
1959         const wxUint32 ch = wxDecodeSurrogate(&src);
1960         if ( !src )
1961             return wxCONV_FAILED;
1962
1963         outLen += BYTES_PER_CHAR;
1964
1965         if ( outLen > dstLen )
1966             return wxCONV_FAILED;
1967
1968         *outBuff++ = wxUINT32_SWAP_ALWAYS(ch);
1969     }
1970
1971     return outLen;
1972 }
1973
1974 #else // !WC_UTF16: wchar_t is UTF-32
1975
1976 // ----------------------------------------------------------------------------
1977 // conversions without endianness change
1978 // ----------------------------------------------------------------------------
1979
1980 size_t
1981 wxMBConvUTF32straight::ToWChar(wchar_t *dst, size_t dstLen,
1982                                const char *src, size_t srcLen) const
1983 {
1984     // use memcpy() as it should be much faster than hand-written loop
1985     srcLen = GetLength(src, srcLen);
1986     if ( srcLen == wxNO_LEN )
1987         return wxCONV_FAILED;
1988
1989     const size_t inLen = srcLen/BYTES_PER_CHAR;
1990     if ( dst )
1991     {
1992         if ( dstLen < inLen )
1993             return wxCONV_FAILED;
1994
1995         memcpy(dst, src, srcLen);
1996     }
1997
1998     return inLen;
1999 }
2000
2001 size_t
2002 wxMBConvUTF32straight::FromWChar(char *dst, size_t dstLen,
2003                                  const wchar_t *src, size_t srcLen) const
2004 {
2005     if ( srcLen == wxNO_LEN )
2006         srcLen = wxWcslen(src) + 1;
2007
2008     srcLen *= BYTES_PER_CHAR;
2009
2010     if ( dst )
2011     {
2012         if ( dstLen < srcLen )
2013             return wxCONV_FAILED;
2014
2015         memcpy(dst, src, srcLen);
2016     }
2017
2018     return srcLen;
2019 }
2020
2021 // ----------------------------------------------------------------------------
2022 // endian-reversing conversions
2023 // ----------------------------------------------------------------------------
2024
2025 size_t
2026 wxMBConvUTF32swap::ToWChar(wchar_t *dst, size_t dstLen,
2027                            const char *src, size_t srcLen) const
2028 {
2029     srcLen = GetLength(src, srcLen);
2030     if ( srcLen == wxNO_LEN )
2031         return wxCONV_FAILED;
2032
2033     srcLen /= BYTES_PER_CHAR;
2034
2035     if ( dst )
2036     {
2037         if ( dstLen < srcLen )
2038             return wxCONV_FAILED;
2039
2040         const wxUint32 *inBuff = reinterpret_cast<const wxUint32 *>(src);
2041         for ( size_t n = 0; n < srcLen; n++, inBuff++ )
2042         {
2043             *dst++ = wxUINT32_SWAP_ALWAYS(*inBuff);
2044         }
2045     }
2046
2047     return srcLen;
2048 }
2049
2050 size_t
2051 wxMBConvUTF32swap::FromWChar(char *dst, size_t dstLen,
2052                              const wchar_t *src, size_t srcLen) const
2053 {
2054     if ( srcLen == wxNO_LEN )
2055         srcLen = wxWcslen(src) + 1;
2056
2057     srcLen *= BYTES_PER_CHAR;
2058
2059     if ( dst )
2060     {
2061         if ( dstLen < srcLen )
2062             return wxCONV_FAILED;
2063
2064         wxUint32 *outBuff = reinterpret_cast<wxUint32 *>(dst);
2065         for ( size_t n = 0; n < srcLen; n += BYTES_PER_CHAR, src++ )
2066         {
2067             *outBuff++ = wxUINT32_SWAP_ALWAYS(*src);
2068         }
2069     }
2070
2071     return srcLen;
2072 }
2073
2074 #endif // WC_UTF16/!WC_UTF16
2075
2076
2077 // ============================================================================
2078 // The classes doing conversion using the iconv_xxx() functions
2079 // ============================================================================
2080
2081 #ifdef HAVE_ICONV
2082
2083 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
2084 //     E2BIG if output buffer is _exactly_ as big as needed. Such case is
2085 //     (unless there's yet another bug in glibc) the only case when iconv()
2086 //     returns with (size_t)-1 (which means error) and says there are 0 bytes
2087 //     left in the input buffer -- when _real_ error occurs,
2088 //     bytes-left-in-input buffer is non-zero. Hence, this alternative test for
2089 //     iconv() failure.
2090 //     [This bug does not appear in glibc 2.2.]
2091 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
2092 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
2093                                      (errno != E2BIG || bufLeft != 0))
2094 #else
2095 #define ICONV_FAILED(cres, bufLeft)  (cres == (size_t)-1)
2096 #endif
2097
2098 #define ICONV_CHAR_CAST(x)  ((ICONV_CONST char **)(x))
2099
2100 #define ICONV_T_INVALID ((iconv_t)-1)
2101
2102 #if SIZEOF_WCHAR_T == 4
2103     #define WC_BSWAP    wxUINT32_SWAP_ALWAYS
2104     #define WC_ENC      wxFONTENCODING_UTF32
2105 #elif SIZEOF_WCHAR_T == 2
2106     #define WC_BSWAP    wxUINT16_SWAP_ALWAYS
2107     #define WC_ENC      wxFONTENCODING_UTF16
2108 #else // sizeof(wchar_t) != 2 nor 4
2109     // does this ever happen?
2110     #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
2111 #endif
2112
2113 // ----------------------------------------------------------------------------
2114 // wxMBConv_iconv: encapsulates an iconv character set
2115 // ----------------------------------------------------------------------------
2116
2117 class wxMBConv_iconv : public wxMBConv
2118 {
2119 public:
2120     wxMBConv_iconv(const char *name);
2121     virtual ~wxMBConv_iconv();
2122
2123     // implement base class virtual methods
2124     virtual size_t ToWChar(wchar_t *dst, size_t dstLen,
2125                            const char *src, size_t srcLen = wxNO_LEN) const;
2126     virtual size_t FromWChar(char *dst, size_t dstLen,
2127                              const wchar_t *src, size_t srcLen = wxNO_LEN) const;
2128     virtual size_t GetMBNulLen() const;
2129
2130 #if wxUSE_UNICODE_UTF8
2131     virtual bool IsUTF8() const;
2132 #endif
2133
2134     virtual wxMBConv *Clone() const
2135     {
2136         wxMBConv_iconv *p = new wxMBConv_iconv(m_name);
2137         p->m_minMBCharWidth = m_minMBCharWidth;
2138         return p;
2139     }
2140
2141     bool IsOk() const
2142         { return (m2w != ICONV_T_INVALID) && (w2m != ICONV_T_INVALID); }
2143
2144 protected:
2145     // the iconv handlers used to translate from multibyte
2146     // to wide char and in the other direction
2147     iconv_t m2w,
2148             w2m;
2149
2150 #if wxUSE_THREADS
2151     // guards access to m2w and w2m objects
2152     wxMutex m_iconvMutex;
2153 #endif
2154
2155 private:
2156     // the name (for iconv_open()) of a wide char charset -- if none is
2157     // available on this machine, it will remain NULL
2158     static wxString ms_wcCharsetName;
2159
2160     // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
2161     // different endian-ness than the native one
2162     static bool ms_wcNeedsSwap;
2163
2164
2165     // name of the encoding handled by this conversion
2166     const char *m_name;
2167
2168     // cached result of GetMBNulLen(); set to 0 meaning "unknown"
2169     // initially
2170     size_t m_minMBCharWidth;
2171 };
2172
2173 // make the constructor available for unit testing
2174 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const char* name )
2175 {
2176     wxMBConv_iconv* result = new wxMBConv_iconv( name );
2177     if ( !result->IsOk() )
2178     {
2179         delete result;
2180         return 0;
2181     }
2182
2183     return result;
2184 }
2185
2186 wxString wxMBConv_iconv::ms_wcCharsetName;
2187 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
2188
2189 wxMBConv_iconv::wxMBConv_iconv(const char *name)
2190               : m_name(wxStrdup(name))
2191 {
2192     m_minMBCharWidth = 0;
2193
2194     // check for charset that represents wchar_t:
2195     if ( ms_wcCharsetName.empty() )
2196     {
2197         wxLogTrace(TRACE_STRCONV, wxT("Looking for wide char codeset:"));
2198
2199 #if wxUSE_FONTMAP
2200         const wxChar *const *names = wxFontMapperBase::GetAllEncodingNames(WC_ENC);
2201 #else // !wxUSE_FONTMAP
2202         static const wxChar *const names_static[] =
2203         {
2204 #if SIZEOF_WCHAR_T == 4
2205             wxT("UCS-4"),
2206 #elif SIZEOF_WCHAR_T == 2
2207             wxT("UCS-2"),
2208 #endif
2209             NULL
2210         };
2211         const wxChar *const *names = names_static;
2212 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2213
2214         for ( ; *names && ms_wcCharsetName.empty(); ++names )
2215         {
2216             const wxString nameCS(*names);
2217
2218             // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
2219             wxString nameXE(nameCS);
2220
2221 #ifdef WORDS_BIGENDIAN
2222                 nameXE += wxT("BE");
2223 #else // little endian
2224                 nameXE += wxT("LE");
2225 #endif
2226
2227             wxLogTrace(TRACE_STRCONV, wxT("  trying charset \"%s\""),
2228                        nameXE.c_str());
2229
2230             m2w = iconv_open(nameXE.ToAscii(), name);
2231             if ( m2w == ICONV_T_INVALID )
2232             {
2233                 // try charset w/o bytesex info (e.g. "UCS4")
2234                 wxLogTrace(TRACE_STRCONV, wxT("  trying charset \"%s\""),
2235                            nameCS.c_str());
2236                 m2w = iconv_open(nameCS.ToAscii(), name);
2237
2238                 // and check for bytesex ourselves:
2239                 if ( m2w != ICONV_T_INVALID )
2240                 {
2241                     char    buf[2], *bufPtr;
2242                     wchar_t wbuf[2];
2243                     size_t  insz, outsz;
2244                     size_t  res;
2245
2246                     buf[0] = 'A';
2247                     buf[1] = 0;
2248                     wbuf[0] = 0;
2249                     insz = 2;
2250                     outsz = SIZEOF_WCHAR_T * 2;
2251                     char* wbufPtr = (char*)wbuf;
2252                     bufPtr = buf;
2253
2254                     res = iconv(
2255                         m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
2256                         &wbufPtr, &outsz);
2257
2258                     if (ICONV_FAILED(res, insz))
2259                     {
2260                         wxLogLastError(wxT("iconv"));
2261                         wxLogError(_("Conversion to charset '%s' doesn't work."),
2262                                    nameCS.c_str());
2263                     }
2264                     else // ok, can convert to this encoding, remember it
2265                     {
2266                         ms_wcCharsetName = nameCS;
2267                         ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
2268                     }
2269                 }
2270             }
2271             else // use charset not requiring byte swapping
2272             {
2273                 ms_wcCharsetName = nameXE;
2274             }
2275         }
2276
2277         wxLogTrace(TRACE_STRCONV,
2278                    wxT("iconv wchar_t charset is \"%s\"%s"),
2279                    ms_wcCharsetName.empty() ? wxString("<none>")
2280                                             : ms_wcCharsetName,
2281                    ms_wcNeedsSwap ? wxT(" (needs swap)")
2282                                   : wxT(""));
2283     }
2284     else // we already have ms_wcCharsetName
2285     {
2286         m2w = iconv_open(ms_wcCharsetName.ToAscii(), name);
2287     }
2288
2289     if ( ms_wcCharsetName.empty() )
2290     {
2291         w2m = ICONV_T_INVALID;
2292     }
2293     else
2294     {
2295         w2m = iconv_open(name, ms_wcCharsetName.ToAscii());
2296         if ( w2m == ICONV_T_INVALID )
2297         {
2298             wxLogTrace(TRACE_STRCONV,
2299                        wxT("\"%s\" -> \"%s\" works but not the converse!?"),
2300                        ms_wcCharsetName.c_str(), name);
2301         }
2302     }
2303 }
2304
2305 wxMBConv_iconv::~wxMBConv_iconv()
2306 {
2307     free(const_cast<char *>(m_name));
2308
2309     if ( m2w != ICONV_T_INVALID )
2310         iconv_close(m2w);
2311     if ( w2m != ICONV_T_INVALID )
2312         iconv_close(w2m);
2313 }
2314
2315 size_t
2316 wxMBConv_iconv::ToWChar(wchar_t *dst, size_t dstLen,
2317                         const char *src, size_t srcLen) const
2318 {
2319     if ( srcLen == wxNO_LEN )
2320     {
2321         // find the string length: notice that must be done differently for
2322         // NUL-terminated strings and UTF-16/32 which are terminated with 2/4
2323         // consecutive NULs
2324         const size_t nulLen = GetMBNulLen();
2325         switch ( nulLen )
2326         {
2327             default:
2328                 return wxCONV_FAILED;
2329
2330             case 1:
2331                 srcLen = strlen(src); // arguably more optimized than our version
2332                 break;
2333
2334             case 2:
2335             case 4:
2336                 // for UTF-16/32 not only we need to have 2/4 consecutive NULs
2337                 // but they also have to start at character boundary and not
2338                 // span two adjacent characters
2339                 const char *p;
2340                 for ( p = src; NotAllNULs(p, nulLen); p += nulLen )
2341                     ;
2342                 srcLen = p - src;
2343                 break;
2344         }
2345
2346         // when we're determining the length of the string ourselves we count
2347         // the terminating NUL(s) as part of it and always NUL-terminate the
2348         // output
2349         srcLen += nulLen;
2350     }
2351
2352     // we express length in the number of (wide) characters but iconv always
2353     // counts buffer sizes it in bytes
2354     dstLen *= SIZEOF_WCHAR_T;
2355
2356 #if wxUSE_THREADS
2357     // NB: iconv() is MT-safe, but each thread must use its own iconv_t handle.
2358     //     Unfortunately there are a couple of global wxCSConv objects such as
2359     //     wxConvLocal that are used all over wx code, so we have to make sure
2360     //     the handle is used by at most one thread at the time. Otherwise
2361     //     only a few wx classes would be safe to use from non-main threads
2362     //     as MB<->WC conversion would fail "randomly".
2363     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
2364 #endif // wxUSE_THREADS
2365
2366     size_t res, cres;
2367     const char *pszPtr = src;
2368
2369     if ( dst )
2370     {
2371         char* bufPtr = (char*)dst;
2372
2373         // have destination buffer, convert there
2374         size_t dstLenOrig = dstLen;
2375         cres = iconv(m2w,
2376                      ICONV_CHAR_CAST(&pszPtr), &srcLen,
2377                      &bufPtr, &dstLen);
2378
2379         // convert the number of bytes converted as returned by iconv to the
2380         // number of (wide) characters converted that we need
2381         res = (dstLenOrig - dstLen) / SIZEOF_WCHAR_T;
2382
2383         if (ms_wcNeedsSwap)
2384         {
2385             // convert to native endianness
2386             for ( unsigned i = 0; i < res; i++ )
2387                 dst[i] = WC_BSWAP(dst[i]);
2388         }
2389     }
2390     else // no destination buffer
2391     {
2392         // convert using temp buffer to calculate the size of the buffer needed
2393         wchar_t tbuf[256];
2394         res = 0;
2395
2396         do
2397         {
2398             char* bufPtr = (char*)tbuf;
2399             dstLen = 8 * SIZEOF_WCHAR_T;
2400
2401             cres = iconv(m2w,
2402                          ICONV_CHAR_CAST(&pszPtr), &srcLen,
2403                          &bufPtr, &dstLen );
2404
2405             res += 8 - (dstLen / SIZEOF_WCHAR_T);
2406         }
2407         while ((cres == (size_t)-1) && (errno == E2BIG));
2408     }
2409
2410     if (ICONV_FAILED(cres, srcLen))
2411     {
2412         //VS: it is ok if iconv fails, hence trace only
2413         wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
2414         return wxCONV_FAILED;
2415     }
2416
2417     return res;
2418 }
2419
2420 size_t wxMBConv_iconv::FromWChar(char *dst, size_t dstLen,
2421                                  const wchar_t *src, size_t srcLen) const
2422 {
2423 #if wxUSE_THREADS
2424     // NB: explained in MB2WC
2425     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
2426 #endif
2427
2428     if ( srcLen == wxNO_LEN )
2429         srcLen = wxWcslen(src) + 1;
2430
2431     size_t inbuflen = srcLen * SIZEOF_WCHAR_T;
2432     size_t outbuflen = dstLen;
2433     size_t res, cres;
2434
2435     wchar_t *tmpbuf = 0;
2436
2437     if (ms_wcNeedsSwap)
2438     {
2439         // need to copy to temp buffer to switch endianness
2440         // (doing WC_BSWAP twice on the original buffer won't work, as it
2441         //  could be in read-only memory, or be accessed in some other thread)
2442         tmpbuf = (wchar_t *)malloc(inbuflen);
2443         for ( size_t i = 0; i < srcLen; i++ )
2444             tmpbuf[i] = WC_BSWAP(src[i]);
2445
2446         src = tmpbuf;
2447     }
2448
2449     char* inbuf = (char*)src;
2450     if ( dst )
2451     {
2452         // have destination buffer, convert there
2453         cres = iconv(w2m, ICONV_CHAR_CAST(&inbuf), &inbuflen, &dst, &outbuflen);
2454
2455         res = dstLen - outbuflen;
2456     }
2457     else // no destination buffer
2458     {
2459         // convert using temp buffer to calculate the size of the buffer needed
2460         char tbuf[256];
2461         res = 0;
2462         do
2463         {
2464             dst = tbuf;
2465             outbuflen = WXSIZEOF(tbuf);
2466
2467             cres = iconv(w2m, ICONV_CHAR_CAST(&inbuf), &inbuflen, &dst, &outbuflen);
2468
2469             res += WXSIZEOF(tbuf) - outbuflen;
2470         }
2471         while ((cres == (size_t)-1) && (errno == E2BIG));
2472     }
2473
2474     if (ms_wcNeedsSwap)
2475     {
2476         free(tmpbuf);
2477     }
2478
2479     if (ICONV_FAILED(cres, inbuflen))
2480     {
2481         wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
2482         return wxCONV_FAILED;
2483     }
2484
2485     return res;
2486 }
2487
2488 size_t wxMBConv_iconv::GetMBNulLen() const
2489 {
2490     if ( m_minMBCharWidth == 0 )
2491     {
2492         wxMBConv_iconv * const self = wxConstCast(this, wxMBConv_iconv);
2493
2494 #if wxUSE_THREADS
2495         // NB: explained in MB2WC
2496         wxMutexLocker lock(self->m_iconvMutex);
2497 #endif
2498
2499         const wchar_t *wnul = L"";
2500         char buf[8]; // should be enough for NUL in any encoding
2501         size_t inLen = sizeof(wchar_t),
2502                outLen = WXSIZEOF(buf);
2503         char *inBuff = (char *)wnul;
2504         char *outBuff = buf;
2505         if ( iconv(w2m, ICONV_CHAR_CAST(&inBuff), &inLen, &outBuff, &outLen) == (size_t)-1 )
2506         {
2507             self->m_minMBCharWidth = (size_t)-1;
2508         }
2509         else // ok
2510         {
2511             self->m_minMBCharWidth = outBuff - buf;
2512         }
2513     }
2514
2515     return m_minMBCharWidth;
2516 }
2517
2518 #if wxUSE_UNICODE_UTF8
2519 bool wxMBConv_iconv::IsUTF8() const
2520 {
2521     return wxStricmp(m_name, "UTF-8") == 0 ||
2522            wxStricmp(m_name, "UTF8") == 0;
2523 }
2524 #endif
2525
2526 #endif // HAVE_ICONV
2527
2528
2529 // ============================================================================
2530 // Win32 conversion classes
2531 // ============================================================================
2532
2533 #ifdef wxHAVE_WIN32_MB2WC
2534
2535 // from utils.cpp
2536 #if wxUSE_FONTMAP
2537 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const char *charset);
2538 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
2539 #endif
2540
2541 class wxMBConv_win32 : public wxMBConv
2542 {
2543 public:
2544     wxMBConv_win32()
2545     {
2546         m_CodePage = CP_ACP;
2547         m_minMBCharWidth = 0;
2548     }
2549
2550     wxMBConv_win32(const wxMBConv_win32& conv)
2551         : wxMBConv()
2552     {
2553         m_CodePage = conv.m_CodePage;
2554         m_minMBCharWidth = conv.m_minMBCharWidth;
2555     }
2556
2557 #if wxUSE_FONTMAP
2558     wxMBConv_win32(const char* name)
2559     {
2560         m_CodePage = wxCharsetToCodepage(name);
2561         m_minMBCharWidth = 0;
2562     }
2563
2564     wxMBConv_win32(wxFontEncoding encoding)
2565     {
2566         m_CodePage = wxEncodingToCodepage(encoding);
2567         m_minMBCharWidth = 0;
2568     }
2569 #endif // wxUSE_FONTMAP
2570
2571     virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2572     {
2573         // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
2574         // the behaviour is not compatible with the Unix version (using iconv)
2575         // and break the library itself, e.g. wxTextInputStream::NextChar()
2576         // wouldn't work if reading an incomplete MB char didn't result in an
2577         // error
2578         //
2579         // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or
2580         // Win XP or newer and it is not supported for UTF-[78] so we always
2581         // use our own conversions in this case. See
2582         //     http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx
2583         //     http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp
2584         if ( m_CodePage == CP_UTF8 )
2585         {
2586             return wxMBConvUTF8().MB2WC(buf, psz, n);
2587         }
2588
2589         if ( m_CodePage == CP_UTF7 )
2590         {
2591             return wxMBConvUTF7().MB2WC(buf, psz, n);
2592         }
2593
2594         int flags = 0;
2595         if ( (m_CodePage < 50000 && m_CodePage != CP_SYMBOL) &&
2596                 IsAtLeastWin2kSP4() )
2597         {
2598             flags = MB_ERR_INVALID_CHARS;
2599         }
2600
2601         const size_t len = ::MultiByteToWideChar
2602                              (
2603                                 m_CodePage,     // code page
2604                                 flags,          // flags: fall on error
2605                                 psz,            // input string
2606                                 -1,             // its length (NUL-terminated)
2607                                 buf,            // output string
2608                                 buf ? n : 0     // size of output buffer
2609                              );
2610         if ( !len )
2611         {
2612             // function totally failed
2613             return wxCONV_FAILED;
2614         }
2615
2616         // if we were really converting and didn't use MB_ERR_INVALID_CHARS,
2617         // check if we succeeded, by doing a double trip:
2618         if ( !flags && buf )
2619         {
2620             const size_t mbLen = strlen(psz);
2621             wxCharBuffer mbBuf(mbLen);
2622             if ( ::WideCharToMultiByte
2623                    (
2624                       m_CodePage,
2625                       0,
2626                       buf,
2627                       -1,
2628                       mbBuf.data(),
2629                       mbLen + 1,        // size in bytes, not length
2630                       NULL,
2631                       NULL
2632                    ) == 0 ||
2633                   strcmp(mbBuf, psz) != 0 )
2634             {
2635                 // we didn't obtain the same thing we started from, hence
2636                 // the conversion was lossy and we consider that it failed
2637                 return wxCONV_FAILED;
2638             }
2639         }
2640
2641         // note that it returns count of written chars for buf != NULL and size
2642         // of the needed buffer for buf == NULL so in either case the length of
2643         // the string (which never includes the terminating NUL) is one less
2644         return len - 1;
2645     }
2646
2647     virtual size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
2648     {
2649         /*
2650             we have a problem here: by default, WideCharToMultiByte() may
2651             replace characters unrepresentable in the target code page with bad
2652             quality approximations such as turning "1/2" symbol (U+00BD) into
2653             "1" for the code pages which don't have it and we, obviously, want
2654             to avoid this at any price
2655
2656             the trouble is that this function does it _silently_, i.e. it won't
2657             even tell us whether it did or not... Win98/2000 and higher provide
2658             WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
2659             we have to resort to a round trip, i.e. check that converting back
2660             results in the same string -- this is, of course, expensive but
2661             otherwise we simply can't be sure to not garble the data.
2662          */
2663
2664         // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
2665         // it doesn't work with CJK encodings (which we test for rather roughly
2666         // here...) nor with UTF-7/8 nor, of course, with Windows versions not
2667         // supporting it
2668         BOOL usedDef wxDUMMY_INITIALIZE(false);
2669         BOOL *pUsedDef;
2670         int flags;
2671         if ( CanUseNoBestFit() && m_CodePage < 50000 )
2672         {
2673             // it's our lucky day
2674             flags = WC_NO_BEST_FIT_CHARS;
2675             pUsedDef = &usedDef;
2676         }
2677         else // old system or unsupported encoding
2678         {
2679             flags = 0;
2680             pUsedDef = NULL;
2681         }
2682
2683         const size_t len = ::WideCharToMultiByte
2684                              (
2685                                 m_CodePage,     // code page
2686                                 flags,          // either none or no best fit
2687                                 pwz,            // input string
2688                                 -1,             // it is (wide) NUL-terminated
2689                                 buf,            // output buffer
2690                                 buf ? n : 0,    // and its size
2691                                 NULL,           // default "replacement" char
2692                                 pUsedDef        // [out] was it used?
2693                              );
2694
2695         if ( !len )
2696         {
2697             // function totally failed
2698             return wxCONV_FAILED;
2699         }
2700
2701         // we did something, check if we really succeeded
2702         if ( flags )
2703         {
2704             // check if the conversion failed, i.e. if any replacements
2705             // were done
2706             if ( usedDef )
2707                 return wxCONV_FAILED;
2708         }
2709         else // we must resort to double tripping...
2710         {
2711             // first we need to ensure that we really have the MB data: this is
2712             // not the case if we're called with NULL buffer, in which case we
2713             // need to do the conversion yet again
2714             wxCharBuffer bufDef;
2715             if ( !buf )
2716             {
2717                 bufDef = wxCharBuffer(len);
2718                 buf = bufDef.data();
2719                 if ( !::WideCharToMultiByte(m_CodePage, flags, pwz, -1,
2720                                             buf, len, NULL, NULL) )
2721                     return wxCONV_FAILED;
2722             }
2723
2724             if ( !n )
2725                 n = wcslen(pwz);
2726             wxWCharBuffer wcBuf(n);
2727             if ( MB2WC(wcBuf.data(), buf, n + 1) == wxCONV_FAILED ||
2728                     wcscmp(wcBuf, pwz) != 0 )
2729             {
2730                 // we didn't obtain the same thing we started from, hence
2731                 // the conversion was lossy and we consider that it failed
2732                 return wxCONV_FAILED;
2733             }
2734         }
2735
2736         // see the comment above for the reason of "len - 1"
2737         return len - 1;
2738     }
2739
2740     virtual size_t GetMBNulLen() const
2741     {
2742         if ( m_minMBCharWidth == 0 )
2743         {
2744             int len = ::WideCharToMultiByte
2745                         (
2746                             m_CodePage,     // code page
2747                             0,              // no flags
2748                             L"",            // input string
2749                             1,              // translate just the NUL
2750                             NULL,           // output buffer
2751                             0,              // and its size
2752                             NULL,           // no replacement char
2753                             NULL            // [out] don't care if it was used
2754                         );
2755
2756             wxMBConv_win32 * const self = wxConstCast(this, wxMBConv_win32);
2757             switch ( len )
2758             {
2759                 default:
2760                     wxLogDebug(wxT("Unexpected NUL length %d"), len);
2761                     self->m_minMBCharWidth = (size_t)-1;
2762                     break;
2763
2764                 case 0:
2765                     self->m_minMBCharWidth = (size_t)-1;
2766                     break;
2767
2768                 case 1:
2769                 case 2:
2770                 case 4:
2771                     self->m_minMBCharWidth = len;
2772                     break;
2773             }
2774         }
2775
2776         return m_minMBCharWidth;
2777     }
2778
2779     virtual wxMBConv *Clone() const { return new wxMBConv_win32(*this); }
2780
2781     bool IsOk() const { return m_CodePage != -1; }
2782
2783 private:
2784     static bool CanUseNoBestFit()
2785     {
2786         static int s_isWin98Or2k = -1;
2787
2788         if ( s_isWin98Or2k == -1 )
2789         {
2790             int verMaj, verMin;
2791             switch ( wxGetOsVersion(&verMaj, &verMin) )
2792             {
2793                 case wxOS_WINDOWS_9X:
2794                     s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
2795                     break;
2796
2797                 case wxOS_WINDOWS_NT:
2798                     s_isWin98Or2k = verMaj >= 5;
2799                     break;
2800
2801                 default:
2802                     // unknown: be conservative by default
2803                     s_isWin98Or2k = 0;
2804                     break;
2805             }
2806
2807             wxASSERT_MSG( s_isWin98Or2k != -1, wxT("should be set above") );
2808         }
2809
2810         return s_isWin98Or2k == 1;
2811     }
2812
2813     static bool IsAtLeastWin2kSP4()
2814     {
2815 #ifdef __WXWINCE__
2816         return false;
2817 #else
2818         static int s_isAtLeastWin2kSP4 = -1;
2819
2820         if ( s_isAtLeastWin2kSP4 == -1 )
2821         {
2822             OSVERSIONINFOEX ver;
2823
2824             memset(&ver, 0, sizeof(ver));
2825             ver.dwOSVersionInfoSize = sizeof(ver);
2826             GetVersionEx((OSVERSIONINFO*)&ver);
2827
2828             s_isAtLeastWin2kSP4 =
2829               ((ver.dwMajorVersion > 5) || // Vista+
2830                (ver.dwMajorVersion == 5 && ver.dwMinorVersion > 0) || // XP/2003
2831                (ver.dwMajorVersion == 5 && ver.dwMinorVersion == 0 &&
2832                ver.wServicePackMajor >= 4)) // 2000 SP4+
2833               ? 1 : 0;
2834         }
2835
2836         return s_isAtLeastWin2kSP4 == 1;
2837 #endif
2838     }
2839
2840
2841     // the code page we're working with
2842     long m_CodePage;
2843
2844     // cached result of GetMBNulLen(), set to 0 initially meaning
2845     // "unknown"
2846     size_t m_minMBCharWidth;
2847 };
2848
2849 #endif // wxHAVE_WIN32_MB2WC
2850
2851
2852 // ============================================================================
2853 // wxEncodingConverter based conversion classes
2854 // ============================================================================
2855
2856 #if wxUSE_FONTMAP
2857
2858 class wxMBConv_wxwin : public wxMBConv
2859 {
2860 private:
2861     void Init()
2862     {
2863         // Refuse to use broken wxEncodingConverter code for Mac-specific encodings.
2864         // The wxMBConv_cf class does a better job.
2865         m_ok = (m_enc < wxFONTENCODING_MACMIN || m_enc > wxFONTENCODING_MACMAX) &&
2866                m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2867                w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2868     }
2869
2870 public:
2871     // temporarily just use wxEncodingConverter stuff,
2872     // so that it works while a better implementation is built
2873     wxMBConv_wxwin(const char* name)
2874     {
2875         if (name)
2876             m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2877         else
2878             m_enc = wxFONTENCODING_SYSTEM;
2879
2880         Init();
2881     }
2882
2883     wxMBConv_wxwin(wxFontEncoding enc)
2884     {
2885         m_enc = enc;
2886
2887         Init();
2888     }
2889
2890     size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
2891     {
2892         size_t inbuf = strlen(psz);
2893         if (buf)
2894         {
2895             if (!m2w.Convert(psz, buf))
2896                 return wxCONV_FAILED;
2897         }
2898         return inbuf;
2899     }
2900
2901     size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
2902     {
2903         const size_t inbuf = wxWcslen(psz);
2904         if (buf)
2905         {
2906             if (!w2m.Convert(psz, buf))
2907                 return wxCONV_FAILED;
2908         }
2909
2910         return inbuf;
2911     }
2912
2913     virtual size_t GetMBNulLen() const
2914     {
2915         switch ( m_enc )
2916         {
2917             case wxFONTENCODING_UTF16BE:
2918             case wxFONTENCODING_UTF16LE:
2919                 return 2;
2920
2921             case wxFONTENCODING_UTF32BE:
2922             case wxFONTENCODING_UTF32LE:
2923                 return 4;
2924
2925             default:
2926                 return 1;
2927         }
2928     }
2929
2930     virtual wxMBConv *Clone() const { return new wxMBConv_wxwin(m_enc); }
2931
2932     bool IsOk() const { return m_ok; }
2933
2934 public:
2935     wxFontEncoding m_enc;
2936     wxEncodingConverter m2w, w2m;
2937
2938 private:
2939     // were we initialized successfully?
2940     bool m_ok;
2941
2942     wxDECLARE_NO_COPY_CLASS(wxMBConv_wxwin);
2943 };
2944
2945 // make the constructors available for unit testing
2946 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_wxwin( const char* name )
2947 {
2948     wxMBConv_wxwin* result = new wxMBConv_wxwin( name );
2949     if ( !result->IsOk() )
2950     {
2951         delete result;
2952         return 0;
2953     }
2954
2955     return result;
2956 }
2957
2958 #endif // wxUSE_FONTMAP
2959
2960 // ============================================================================
2961 // wxCSConv implementation
2962 // ============================================================================
2963
2964 void wxCSConv::Init()
2965 {
2966     m_name = NULL;
2967     m_convReal =  NULL;
2968 }
2969
2970 void wxCSConv::SetEncoding(wxFontEncoding encoding)
2971 {
2972     switch ( encoding )
2973     {
2974         case wxFONTENCODING_MAX:
2975         case wxFONTENCODING_SYSTEM:
2976             if ( m_name )
2977             {
2978                 // It's ok to not have encoding value if we have a name for it.
2979                 m_encoding = wxFONTENCODING_SYSTEM;
2980             }
2981             else // No name neither.
2982             {
2983                 // Fall back to the system default encoding in this case (not
2984                 // sure how much sense does this make but this is how the old
2985                 // code used to behave).
2986 #if wxUSE_INTL
2987                 m_encoding = wxLocale::GetSystemEncoding();
2988                 if ( m_encoding == wxFONTENCODING_SYSTEM )
2989 #endif // wxUSE_INTL
2990                     m_encoding = wxFONTENCODING_ISO8859_1;
2991             }
2992             break;
2993
2994         case wxFONTENCODING_DEFAULT:
2995             // wxFONTENCODING_DEFAULT is same as US-ASCII in this context
2996             m_encoding = wxFONTENCODING_ISO8859_1;
2997             break;
2998
2999         default:
3000             // Just use the provided encoding.
3001             m_encoding = encoding;
3002     }
3003 }
3004
3005 wxCSConv::wxCSConv(const wxString& charset)
3006 {
3007     Init();
3008
3009     if ( !charset.empty() )
3010     {
3011         SetName(charset.ToAscii());
3012     }
3013
3014 #if wxUSE_FONTMAP
3015     SetEncoding(wxFontMapperBase::GetEncodingFromName(charset));
3016 #else
3017     SetEncoding(wxFONTENCODING_SYSTEM);
3018 #endif
3019
3020     m_convReal = DoCreate();
3021 }
3022
3023 wxCSConv::wxCSConv(wxFontEncoding encoding)
3024 {
3025     if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
3026     {
3027         wxFAIL_MSG( wxT("invalid encoding value in wxCSConv ctor") );
3028
3029         encoding = wxFONTENCODING_SYSTEM;
3030     }
3031
3032     Init();
3033
3034     SetEncoding(encoding);
3035
3036     m_convReal = DoCreate();
3037 }
3038
3039 wxCSConv::~wxCSConv()
3040 {
3041     Clear();
3042 }
3043
3044 wxCSConv::wxCSConv(const wxCSConv& conv)
3045         : wxMBConv()
3046 {
3047     Init();
3048
3049     SetName(conv.m_name);
3050     SetEncoding(conv.m_encoding);
3051
3052     m_convReal = DoCreate();
3053 }
3054
3055 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
3056 {
3057     Clear();
3058
3059     SetName(conv.m_name);
3060     SetEncoding(conv.m_encoding);
3061
3062     m_convReal = DoCreate();
3063
3064     return *this;
3065 }
3066
3067 void wxCSConv::Clear()
3068 {
3069     free(m_name);
3070     m_name = NULL;
3071
3072     wxDELETE(m_convReal);
3073 }
3074
3075 void wxCSConv::SetName(const char *charset)
3076 {
3077     if ( charset )
3078         m_name = wxStrdup(charset);
3079 }
3080
3081 #if wxUSE_FONTMAP
3082
3083 WX_DECLARE_HASH_MAP( wxFontEncoding, wxString, wxIntegerHash, wxIntegerEqual,
3084                      wxEncodingNameCache );
3085
3086 static wxEncodingNameCache gs_nameCache;
3087 #endif
3088
3089 wxMBConv *wxCSConv::DoCreate() const
3090 {
3091 #if wxUSE_FONTMAP
3092     wxLogTrace(TRACE_STRCONV,
3093                wxT("creating conversion for %s"),
3094                (m_name ? m_name
3095                        : (const char*)wxFontMapperBase::GetEncodingName(m_encoding).mb_str()));
3096 #endif // wxUSE_FONTMAP
3097
3098     // check for the special case of ASCII or ISO8859-1 charset: as we have
3099     // special knowledge of it anyhow, we don't need to create a special
3100     // conversion object
3101     if ( m_encoding == wxFONTENCODING_ISO8859_1 )
3102     {
3103         // don't convert at all
3104         return NULL;
3105     }
3106
3107     // we trust OS to do conversion better than we can so try external
3108     // conversion methods first
3109     //
3110     // the full order is:
3111     //      1. OS conversion (iconv() under Unix or Win32 API)
3112     //      2. hard coded conversions for UTF
3113     //      3. wxEncodingConverter as fall back
3114
3115     // step (1)
3116 #ifdef HAVE_ICONV
3117 #if !wxUSE_FONTMAP
3118     if ( m_name )
3119 #endif // !wxUSE_FONTMAP
3120     {
3121 #if wxUSE_FONTMAP
3122         wxFontEncoding encoding(m_encoding);
3123 #endif
3124
3125         if ( m_name )
3126         {
3127             wxMBConv_iconv *conv = new wxMBConv_iconv(m_name);
3128             if ( conv->IsOk() )
3129                 return conv;
3130
3131             delete conv;
3132
3133 #if wxUSE_FONTMAP
3134             encoding =
3135                 wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
3136 #endif // wxUSE_FONTMAP
3137         }
3138 #if wxUSE_FONTMAP
3139         {
3140             const wxEncodingNameCache::iterator it = gs_nameCache.find(encoding);
3141             if ( it != gs_nameCache.end() )
3142             {
3143                 if ( it->second.empty() )
3144                     return NULL;
3145
3146                 wxMBConv_iconv *conv = new wxMBConv_iconv(it->second.ToAscii());
3147                 if ( conv->IsOk() )
3148                     return conv;
3149
3150                 delete conv;
3151             }
3152
3153             const wxChar* const* names = wxFontMapperBase::GetAllEncodingNames(encoding);
3154             // CS : in case this does not return valid names (eg for MacRoman)
3155             // encoding got a 'failure' entry in the cache all the same,
3156             // although it just has to be created using a different method, so
3157             // only store failed iconv creation attempts (or perhaps we
3158             // shoulnd't do this at all ?)
3159             if ( names[0] != NULL )
3160             {
3161                 for ( ; *names; ++names )
3162                 {
3163                     // FIXME-UTF8: wxFontMapperBase::GetAllEncodingNames()
3164                     //             will need changes that will obsolete this
3165                     wxString name(*names);
3166                     wxMBConv_iconv *conv = new wxMBConv_iconv(name.ToAscii());
3167                     if ( conv->IsOk() )
3168                     {
3169                         gs_nameCache[encoding] = *names;
3170                         return conv;
3171                     }
3172
3173                     delete conv;
3174                 }
3175
3176                 gs_nameCache[encoding] = wxT(""); // cache the failure
3177             }
3178         }
3179 #endif // wxUSE_FONTMAP
3180     }
3181 #endif // HAVE_ICONV
3182
3183 #ifdef wxHAVE_WIN32_MB2WC
3184     {
3185 #if wxUSE_FONTMAP
3186         wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
3187                                       : new wxMBConv_win32(m_encoding);
3188         if ( conv->IsOk() )
3189             return conv;
3190
3191         delete conv;
3192 #else
3193         return NULL;
3194 #endif
3195     }
3196 #endif // wxHAVE_WIN32_MB2WC
3197
3198 #ifdef __DARWIN__
3199     {
3200         // leave UTF16 and UTF32 to the built-ins of wx
3201         if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
3202             ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
3203         {
3204 #if wxUSE_FONTMAP
3205             wxMBConv_cf *conv = m_name ? new wxMBConv_cf(m_name)
3206                                           : new wxMBConv_cf(m_encoding);
3207 #else
3208             wxMBConv_cf *conv = new wxMBConv_cf(m_encoding);
3209 #endif
3210
3211             if ( conv->IsOk() )
3212                  return conv;
3213
3214             delete conv;
3215         }
3216     }
3217 #endif // __DARWIN__
3218
3219     // step (2)
3220     wxFontEncoding enc = m_encoding;
3221 #if wxUSE_FONTMAP
3222     if ( enc == wxFONTENCODING_SYSTEM && m_name )
3223     {
3224         // use "false" to suppress interactive dialogs -- we can be called from
3225         // anywhere and popping up a dialog from here is the last thing we want to
3226         // do
3227         enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
3228     }
3229 #endif // wxUSE_FONTMAP
3230
3231     switch ( enc )
3232     {
3233         case wxFONTENCODING_UTF7:
3234              return new wxMBConvUTF7;
3235
3236         case wxFONTENCODING_UTF8:
3237              return new wxMBConvUTF8;
3238
3239         case wxFONTENCODING_UTF16BE:
3240              return new wxMBConvUTF16BE;
3241
3242         case wxFONTENCODING_UTF16LE:
3243              return new wxMBConvUTF16LE;
3244
3245         case wxFONTENCODING_UTF32BE:
3246              return new wxMBConvUTF32BE;
3247
3248         case wxFONTENCODING_UTF32LE:
3249              return new wxMBConvUTF32LE;
3250
3251         default:
3252              // nothing to do but put here to suppress gcc warnings
3253              break;
3254     }
3255
3256     // step (3)
3257 #if wxUSE_FONTMAP
3258     {
3259         wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
3260                                       : new wxMBConv_wxwin(m_encoding);
3261         if ( conv->IsOk() )
3262             return conv;
3263
3264         delete conv;
3265     }
3266
3267     wxLogTrace(TRACE_STRCONV,
3268                wxT("encoding \"%s\" is not supported by this system"),
3269                (m_name ? wxString(m_name)
3270                        : wxFontMapperBase::GetEncodingName(m_encoding)));
3271 #endif // wxUSE_FONTMAP
3272
3273     return NULL;
3274 }
3275
3276 bool wxCSConv::IsOk() const
3277 {
3278     // special case: no convReal created for wxFONTENCODING_ISO8859_1
3279     if ( m_encoding == wxFONTENCODING_ISO8859_1 )
3280         return true; // always ok as we do it ourselves
3281
3282     // m_convReal->IsOk() is called at its own creation, so we know it must
3283     // be ok if m_convReal is non-NULL
3284     return m_convReal != NULL;
3285 }
3286
3287 size_t wxCSConv::ToWChar(wchar_t *dst, size_t dstLen,
3288                          const char *src, size_t srcLen) const
3289 {
3290     if (m_convReal)
3291         return m_convReal->ToWChar(dst, dstLen, src, srcLen);
3292
3293     // latin-1 (direct)
3294     if ( srcLen == wxNO_LEN )
3295         srcLen = strlen(src) + 1; // take trailing NUL too
3296
3297     if ( dst )
3298     {
3299         if ( dstLen < srcLen )
3300             return wxCONV_FAILED;
3301
3302         for ( size_t n = 0; n < srcLen; n++ )
3303             dst[n] = (unsigned char)(src[n]);
3304     }
3305
3306     return srcLen;
3307 }
3308
3309 size_t wxCSConv::FromWChar(char *dst, size_t dstLen,
3310                            const wchar_t *src, size_t srcLen) const
3311 {
3312     if (m_convReal)
3313         return m_convReal->FromWChar(dst, dstLen, src, srcLen);
3314
3315     // latin-1 (direct)
3316     if ( srcLen == wxNO_LEN )
3317         srcLen = wxWcslen(src) + 1;
3318
3319     if ( dst )
3320     {
3321         if ( dstLen < srcLen )
3322             return wxCONV_FAILED;
3323
3324         for ( size_t n = 0; n < srcLen; n++ )
3325         {
3326             if ( src[n] > 0xFF )
3327                 return wxCONV_FAILED;
3328
3329             dst[n] = (char)src[n];
3330         }
3331
3332     }
3333     else // still need to check the input validity
3334     {
3335         for ( size_t n = 0; n < srcLen; n++ )
3336         {
3337             if ( src[n] > 0xFF )
3338                 return wxCONV_FAILED;
3339         }
3340     }
3341
3342     return srcLen;
3343 }
3344
3345 size_t wxCSConv::GetMBNulLen() const
3346 {
3347     if ( m_convReal )
3348         return m_convReal->GetMBNulLen();
3349
3350     // otherwise, we are ISO-8859-1
3351     return 1;
3352 }
3353
3354 #if wxUSE_UNICODE_UTF8
3355 bool wxCSConv::IsUTF8() const
3356 {
3357     if ( m_convReal )
3358         return m_convReal->IsUTF8();
3359
3360     // otherwise, we are ISO-8859-1
3361     return false;
3362 }
3363 #endif
3364
3365
3366 #if wxUSE_UNICODE
3367
3368 wxWCharBuffer wxSafeConvertMB2WX(const char *s)
3369 {
3370     if ( !s )
3371         return wxWCharBuffer();
3372
3373     wxWCharBuffer wbuf(wxConvLibc.cMB2WX(s));
3374     if ( !wbuf )
3375         wbuf = wxMBConvUTF8().cMB2WX(s);
3376     if ( !wbuf )
3377         wbuf = wxConvISO8859_1.cMB2WX(s);
3378
3379     return wbuf;
3380 }
3381
3382 wxCharBuffer wxSafeConvertWX2MB(const wchar_t *ws)
3383 {
3384     if ( !ws )
3385         return wxCharBuffer();
3386
3387     wxCharBuffer buf(wxConvLibc.cWX2MB(ws));
3388     if ( !buf )
3389         buf = wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL).cWX2MB(ws);
3390
3391     return buf;
3392 }
3393
3394 #endif // wxUSE_UNICODE
3395
3396 // ----------------------------------------------------------------------------
3397 // globals
3398 // ----------------------------------------------------------------------------
3399
3400 // NB: The reason why we create converted objects in this convoluted way,
3401 //     using a factory function instead of global variable, is that they
3402 //     may be used at static initialization time (some of them are used by
3403 //     wxString ctors and there may be a global wxString object). In other
3404 //     words, possibly _before_ the converter global object would be
3405 //     initialized.
3406
3407 #undef wxConvLibc
3408 #undef wxConvUTF8
3409 #undef wxConvUTF7
3410 #undef wxConvLocal
3411 #undef wxConvISO8859_1
3412
3413 #define WX_DEFINE_GLOBAL_CONV2(klass, impl_klass, name, ctor_args)      \
3414     WXDLLIMPEXP_DATA_BASE(klass*) name##Ptr = NULL;                     \
3415     WXDLLIMPEXP_BASE klass* wxGet_##name##Ptr()                         \
3416     {                                                                   \
3417         static impl_klass name##Obj ctor_args;                          \
3418         return &name##Obj;                                              \
3419     }                                                                   \
3420     /* this ensures that all global converter objects are created */    \
3421     /* by the time static initialization is done, i.e. before any */    \
3422     /* thread is launched: */                                           \
3423     static klass* gs_##name##instance = wxGet_##name##Ptr()
3424
3425 #define WX_DEFINE_GLOBAL_CONV(klass, name, ctor_args) \
3426     WX_DEFINE_GLOBAL_CONV2(klass, klass, name, ctor_args)
3427
3428 #ifdef __INTELC__
3429     // disable warning "variable 'xxx' was declared but never referenced"
3430     #pragma warning(disable: 177)
3431 #endif // Intel C++
3432
3433 #ifdef __WINDOWS__
3434     WX_DEFINE_GLOBAL_CONV2(wxMBConv, wxMBConv_win32, wxConvLibc, wxEMPTY_PARAMETER_VALUE);
3435 #elif 0 // defined(__WXOSX__)
3436     WX_DEFINE_GLOBAL_CONV2(wxMBConv, wxMBConv_cf, wxConvLibc,  (wxFONTENCODING_UTF8));
3437 #else
3438     WX_DEFINE_GLOBAL_CONV2(wxMBConv, wxMBConvLibc, wxConvLibc, wxEMPTY_PARAMETER_VALUE);
3439 #endif
3440
3441 // NB: we can't use wxEMPTY_PARAMETER_VALUE as final argument here because it's
3442 //     passed to WX_DEFINE_GLOBAL_CONV2 after a macro expansion and so still
3443 //     provokes an error message about "not enough macro parameters"; and we
3444 //     can't use "()" here as the name##Obj declaration would be parsed as a
3445 //     function declaration then, so use a semicolon and live with an extra
3446 //     empty statement (and hope that no compilers warns about this)
3447 WX_DEFINE_GLOBAL_CONV(wxMBConvStrictUTF8, wxConvUTF8, ;);
3448 WX_DEFINE_GLOBAL_CONV(wxMBConvUTF7, wxConvUTF7, ;);
3449
3450 WX_DEFINE_GLOBAL_CONV(wxCSConv, wxConvLocal, (wxFONTENCODING_SYSTEM));
3451 WX_DEFINE_GLOBAL_CONV(wxCSConv, wxConvISO8859_1, (wxFONTENCODING_ISO8859_1));
3452
3453 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = wxGet_wxConvLibcPtr();
3454 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvUI = wxGet_wxConvLocalPtr();
3455
3456 #ifdef __DARWIN__
3457 // It is important to use this conversion object under Darwin as it ensures
3458 // that Unicode strings are (re)composed correctly even though xnu kernel uses
3459 // decomposed form internally (at least for the file names).
3460 static wxMBConv_cf wxConvMacUTF8DObj(wxFONTENCODING_UTF8);
3461 #endif
3462
3463 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName =
3464 #ifdef __DARWIN__
3465                                     &wxConvMacUTF8DObj;
3466 #else // !__DARWIN__
3467                                     wxGet_wxConvLibcPtr();
3468 #endif // __DARWIN__/!__DARWIN__