src/common/strconv.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/common/strconv.cpp
   3 // Purpose:     Unicode conversion classes
   4 // Author:      Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
   5 //              Ryan Norton, Fredrik Roubert (UTF7)
   6 // Modified by:
   7 // Created:     29/01/98
   8 // RCS-ID:      $Id$
   9 // Copyright:   (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
  10 //              (c) 2000-2003 Vadim Zeitlin
  11 //              (c) 2004 Ryan Norton, Fredrik Roubert
  12 // Licence:     wxWindows licence
  13 /////////////////////////////////////////////////////////////////////////////
  14
  15 // For compilers that support precompilation, includes "wx.h".
  16 #include "wx/wxprec.h"
  17
  18 #ifdef __BORLANDC__
  19     #pragma hdrstop
  20 #endif  //__BORLANDC__
  21
  22 #ifndef WX_PRECOMP
  23     #include "wx/intl.h"
  24     #include "wx/log.h"
  25     #include "wx/utils.h"
  26     #include "wx/hashmap.h"
  27 #endif
  28
  29 #include "wx/strconv.h"
  30
  31 #if wxUSE_WCHAR_T
  32
  33 #ifndef __WXWINCE__
  34 #include <errno.h>
  35 #endif
  36
  37 #include <ctype.h>
  38 #include <string.h>
  39 #include <stdlib.h>
  40
  41 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
  42     #include "wx/msw/private.h"
  43     #include "wx/msw/missing.h"
  44     #define wxHAVE_WIN32_MB2WC
  45 #endif
  46
  47 #ifdef __SALFORDC__
  48     #include <clib.h>
  49 #endif
  50
  51 #ifdef HAVE_ICONV
  52     #include <iconv.h>
  53     #include "wx/thread.h"
  54 #endif
  55
  56 #include "wx/encconv.h"
  57 #include "wx/fontmap.h"
  58
  59 #ifdef __WXMAC__
  60 #ifndef __DARWIN__
  61 #include <ATSUnicode.h>
  62 #include <TextCommon.h>
  63 #include <TextEncodingConverter.h>
  64 #endif
  65
  66 // includes Mac headers
  67 #include "wx/mac/private.h"
  68 #endif
  69
  70
  71 #define TRACE_STRCONV _T("strconv")
  72
  73 // WC_UTF16 is defined only if sizeof(wchar_t) == 2, otherwise it's supposed to
  74 // be 4 bytes
  75 #if SIZEOF_WCHAR_T == 2
  76     #define WC_UTF16
  77 #endif
  78
  79
  80 // ============================================================================
  81 // implementation
  82 // ============================================================================
  83
  84 // helper function of cMB2WC(): check if n bytes at this location are all NUL
  85 static bool NotAllNULs(const char *p, size_t n)
  86 {
  87     while ( n && *p++ == '\0' )
  88         n--;
  89
  90     return n != 0;
  91 }
  92
  93 // ----------------------------------------------------------------------------
  94 // UTF-16 en/decoding to/from UCS-4 with surrogates handling
  95 // ----------------------------------------------------------------------------
  96
  97 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
  98 {
  99     if (input <= 0xffff)
 100     {
 101         if (output)
 102             *output = (wxUint16) input;
 103
 104         return 1;
 105     }
 106     else if (input >= 0x110000)
 107     {
 108         return wxCONV_FAILED;
 109     }
 110     else
 111     {
 112         if (output)
 113         {
 114             *output++ = (wxUint16) ((input >> 10) + 0xd7c0);
 115             *output = (wxUint16) ((input & 0x3ff) + 0xdc00);
 116         }
 117
 118         return 2;
 119     }
 120 }
 121
 122 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
 123 {
 124     if ((*input < 0xd800) || (*input > 0xdfff))
 125     {
 126         output = *input;
 127         return 1;
 128     }
 129     else if ((input[1] < 0xdc00) || (input[1] > 0xdfff))
 130     {
 131         output = *input;
 132         return wxCONV_FAILED;
 133     }
 134     else
 135     {
 136         output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
 137         return 2;
 138     }
 139 }
 140
 141 #ifdef WC_UTF16
 142     typedef wchar_t wxDecodeSurrogate_t;
 143 #else // !WC_UTF16
 144     typedef wxUint16 wxDecodeSurrogate_t;
 145 #endif // WC_UTF16/!WC_UTF16
 146
 147 // returns the next UTF-32 character from the wchar_t buffer and advances the
 148 // pointer to the character after this one
 149 //
 150 // if an invalid character is found, *pSrc is set to NULL, the caller must
 151 // check for this
 152 static wxUint32 wxDecodeSurrogate(const wxDecodeSurrogate_t **pSrc)
 153 {
 154     wxUint32 out;
 155     const size_t
 156         n = decode_utf16(wx_reinterpret_cast(const wxUint16 *, *pSrc), out);
 157     if ( n == wxCONV_FAILED )
 158         *pSrc = NULL;
 159     else
 160         *pSrc += n;
 161
 162     return out;
 163 }
 164
 165 // ----------------------------------------------------------------------------
 166 // wxMBConv
 167 // ----------------------------------------------------------------------------
 168
 169 size_t
 170 wxMBConv::ToWChar(wchar_t *dst, size_t dstLen,
 171                   const char *src, size_t srcLen) const
 172 {
 173     // although new conversion classes are supposed to implement this function
 174     // directly, the existins ones only implement the old MB2WC() and so, to
 175     // avoid to have to rewrite all conversion classes at once, we provide a
 176     // default (but not efficient) implementation of this one in terms of the
 177     // old function by copying the input to ensure that it's NUL-terminated and
 178     // then using MB2WC() to convert it
 179
 180     // the number of chars [which would be] written to dst [if it were not NULL]
 181     size_t dstWritten = 0;
 182
 183     // the number of NULs terminating this string
 184     size_t nulLen = 0;  // not really needed, but just to avoid warnings
 185
 186     // if we were not given the input size we just have to assume that the
 187     // string is properly terminated as we have no way of knowing how long it
 188     // is anyhow, but if we do have the size check whether there are enough
 189     // NULs at the end
 190     wxCharBuffer bufTmp;
 191     const char *srcEnd;
 192     if ( srcLen != wxNO_LEN )
 193     {
 194         // we need to know how to find the end of this string
 195         nulLen = GetMBNulLen();
 196         if ( nulLen == wxCONV_FAILED )
 197             return wxCONV_FAILED;
 198
 199         // if there are enough NULs we can avoid the copy
 200         if ( srcLen < nulLen || NotAllNULs(src + srcLen - nulLen, nulLen) )
 201         {
 202             // make a copy in order to properly NUL-terminate the string
 203             bufTmp = wxCharBuffer(srcLen + nulLen - 1 /* 1 will be added */);
 204             char * const p = bufTmp.data();
 205             memcpy(p, src, srcLen);
 206             for ( char *s = p + srcLen; s < p + srcLen + nulLen; s++ )
 207                 *s = '\0';
 208
 209             src = bufTmp;
 210         }
 211
 212         srcEnd = src + srcLen;
 213     }
 214     else // quit after the first loop iteration
 215     {
 216         srcEnd = NULL;
 217     }
 218
 219     for ( ;; )
 220     {
 221         // try to convert the current chunk
 222         size_t lenChunk = MB2WC(NULL, src, 0);
 223         if ( lenChunk == wxCONV_FAILED )
 224             return wxCONV_FAILED;
 225
 226         lenChunk++; // for the L'\0' at the end of this chunk
 227
 228         dstWritten += lenChunk;
 229
 230         if ( lenChunk == 1 )
 231         {
 232             // nothing left in the input string, conversion succeeded
 233             break;
 234         }
 235
 236         if ( dst )
 237         {
 238             if ( dstWritten > dstLen )
 239                 return wxCONV_FAILED;
 240
 241             if ( MB2WC(dst, src, lenChunk) == wxCONV_FAILED )
 242                 return wxCONV_FAILED;
 243
 244             dst += lenChunk;
 245         }
 246
 247         if ( !srcEnd )
 248         {
 249             // we convert just one chunk in this case as this is the entire
 250             // string anyhow
 251             break;
 252         }
 253
 254         // advance the input pointer past the end of this chunk
 255         while ( NotAllNULs(src, nulLen) )
 256         {
 257             // notice that we must skip over multiple bytes here as we suppose
 258             // that if NUL takes 2 or 4 bytes, then all the other characters do
 259             // too and so if advanced by a single byte we might erroneously
 260             // detect sequences of NUL bytes in the middle of the input
 261             src += nulLen;
 262         }
 263
 264         src += nulLen; // skipping over its terminator as well
 265
 266         // note that ">=" (and not just "==") is needed here as the terminator
 267         // we skipped just above could be inside or just after the buffer
 268         // delimited by inEnd
 269         if ( src >= srcEnd )
 270             break;
 271     }
 272
 273     return dstWritten;
 274 }
 275
 276 size_t
 277 wxMBConv::FromWChar(char *dst, size_t dstLen,
 278                     const wchar_t *src, size_t srcLen) const
 279 {
 280     // the number of chars [which would be] written to dst [if it were not NULL]
 281     size_t dstWritten = 0;
 282
 283     // make a copy of the input string unless it is already properly
 284     // NUL-terminated
 285     //
 286     // if we don't know its length we have no choice but to assume that it is,
 287     // indeed, properly terminated
 288     wxWCharBuffer bufTmp;
 289     if ( srcLen == wxNO_LEN )
 290     {
 291         srcLen = wxWcslen(src) + 1;
 292     }
 293     else if ( srcLen != 0 && src[srcLen - 1] != L'\0' )
 294     {
 295         // make a copy in order to properly NUL-terminate the string
 296         bufTmp = wxWCharBuffer(srcLen);
 297         memcpy(bufTmp.data(), src, srcLen * sizeof(wchar_t));
 298         src = bufTmp;
 299     }
 300
 301     const size_t lenNul = GetMBNulLen();
 302     for ( const wchar_t * const srcEnd = src + srcLen;
 303           src < srcEnd;
 304           src += wxWcslen(src) + 1 /* skip L'\0' too */ )
 305     {
 306         // try to convert the current chunk
 307         size_t lenChunk = WC2MB(NULL, src, 0);
 308
 309         if ( lenChunk == wxCONV_FAILED )
 310             return wxCONV_FAILED;
 311
 312         lenChunk += lenNul;
 313         dstWritten += lenChunk;
 314
 315         if ( dst )
 316         {
 317             if ( dstWritten > dstLen )
 318                 return wxCONV_FAILED;
 319
 320             if ( WC2MB(dst, src, lenChunk) == wxCONV_FAILED )
 321                 return wxCONV_FAILED;
 322
 323             dst += lenChunk;
 324         }
 325     }
 326
 327     return dstWritten;
 328 }
 329
 330 size_t wxMBConv::MB2WC(wchar_t *outBuff, const char *inBuff, size_t outLen) const
 331 {
 332     size_t rc = ToWChar(outBuff, outLen, inBuff);
 333     if ( rc != wxCONV_FAILED )
 334     {
 335         // ToWChar() returns the buffer length, i.e. including the trailing
 336         // NUL, while this method doesn't take it into account
 337         rc--;
 338     }
 339
 340     return rc;
 341 }
 342
 343 size_t wxMBConv::WC2MB(char *outBuff, const wchar_t *inBuff, size_t outLen) const
 344 {
 345     size_t rc = FromWChar(outBuff, outLen, inBuff);
 346     if ( rc != wxCONV_FAILED )
 347     {
 348         rc -= GetMBNulLen();
 349     }
 350
 351     return rc;
 352 }
 353
 354 wxMBConv::~wxMBConv()
 355 {
 356     // nothing to do here (necessary for Darwin linking probably)
 357 }
 358
 359 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
 360 {
 361     if ( psz )
 362     {
 363         // calculate the length of the buffer needed first
 364         const size_t nLen = MB2WC(NULL, psz, 0);
 365         if ( nLen != wxCONV_FAILED )
 366         {
 367             // now do the actual conversion
 368             wxWCharBuffer buf(nLen /* +1 added implicitly */);
 369
 370             // +1 for the trailing NULL
 371             if ( MB2WC(buf.data(), psz, nLen + 1) != wxCONV_FAILED )
 372                 return buf;
 373         }
 374     }
 375
 376     return wxWCharBuffer();
 377 }
 378
 379 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
 380 {
 381     if ( pwz )
 382     {
 383         const size_t nLen = WC2MB(NULL, pwz, 0);
 384         if ( nLen != wxCONV_FAILED )
 385         {
 386             // extra space for trailing NUL(s)
 387             static const size_t extraLen = GetMaxMBNulLen();
 388
 389             wxCharBuffer buf(nLen + extraLen - 1);
 390             if ( WC2MB(buf.data(), pwz, nLen + extraLen) != wxCONV_FAILED )
 391                 return buf;
 392         }
 393     }
 394
 395     return wxCharBuffer();
 396 }
 397
 398 const wxWCharBuffer
 399 wxMBConv::cMB2WC(const char *inBuff, size_t inLen, size_t *outLen) const
 400 {
 401     const size_t dstLen = ToWChar(NULL, 0, inBuff, inLen);
 402     if ( dstLen != wxCONV_FAILED )
 403     {
 404         wxWCharBuffer wbuf(dstLen - 1);
 405         if ( ToWChar(wbuf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED )
 406         {
 407             if ( outLen )
 408             {
 409                 *outLen = dstLen;
 410                 if ( wbuf[dstLen - 1] == L'\0' )
 411                     (*outLen)--;
 412             }
 413
 414             return wbuf;
 415         }
 416     }
 417
 418     if ( outLen )
 419         *outLen = 0;
 420
 421     return wxWCharBuffer();
 422 }
 423
 424 const wxCharBuffer
 425 wxMBConv::cWC2MB(const wchar_t *inBuff, size_t inLen, size_t *outLen) const
 426 {
 427     size_t dstLen = FromWChar(NULL, 0, inBuff, inLen);
 428     if ( dstLen != wxCONV_FAILED )
 429     {
 430         // special case of empty input: can't allocate 0 size buffer below as
 431         // wxCharBuffer insists on NUL-terminating it
 432         wxCharBuffer buf(dstLen ? dstLen - 1 : 1);
 433         if ( FromWChar(buf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED )
 434         {
 435             if ( outLen )
 436             {
 437                 *outLen = dstLen;
 438
 439                 const size_t nulLen = GetMBNulLen();
 440                 if ( dstLen >= nulLen &&
 441                         !NotAllNULs(buf.data() + dstLen - nulLen, nulLen) )
 442                 {
 443                     // in this case the output is NUL-terminated and we're not
 444                     // supposed to count NUL
 445                     *outLen -= nulLen;
 446                 }
 447             }
 448
 449             return buf;
 450         }
 451     }
 452
 453     if ( outLen )
 454         *outLen = 0;
 455
 456     return wxCharBuffer();
 457 }
 458
 459 // ----------------------------------------------------------------------------
 460 // wxMBConvLibc
 461 // ----------------------------------------------------------------------------
 462
 463 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 464 {
 465     return wxMB2WC(buf, psz, n);
 466 }
 467
 468 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 469 {
 470     return wxWC2MB(buf, psz, n);
 471 }
 472
 473 // ----------------------------------------------------------------------------
 474 // wxConvBrokenFileNames
 475 // ----------------------------------------------------------------------------
 476
 477 #ifdef __UNIX__
 478
 479 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxString& charset)
 480 {
 481     if ( wxStricmp(charset, _T("UTF-8")) == 0 ||
 482          wxStricmp(charset, _T("UTF8")) == 0  )
 483         m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_PUA);
 484     else
 485         m_conv = new wxCSConv(charset);
 486 }
 487
 488 #endif // __UNIX__
 489
 490 // ----------------------------------------------------------------------------
 491 // UTF-7
 492 // ----------------------------------------------------------------------------
 493
 494 // Implementation (C) 2004 Fredrik Roubert
 495
 496 //
 497 // BASE64 decoding table
 498 //
 499 static const unsigned char utf7unb64[] =
 500 {
 501     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 502     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 503     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 504     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 505     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 506     0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
 507     0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
 508     0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 509     0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
 510     0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
 511     0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
 512     0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
 513     0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
 514     0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
 515     0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
 516     0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
 517     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 518     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 519     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 520     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 521     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 522     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 523     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 524     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 525     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 526     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 527     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 528     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 529     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 530     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 531     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 532     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 533 };
 534
 535 size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 536 {
 537     size_t len = 0;
 538
 539     while ( *psz && (!buf || (len < n)) )
 540     {
 541         unsigned char cc = *psz++;
 542         if (cc != '+')
 543         {
 544             // plain ASCII char
 545             if (buf)
 546                 *buf++ = cc;
 547             len++;
 548         }
 549         else if (*psz == '-')
 550         {
 551             // encoded plus sign
 552             if (buf)
 553                 *buf++ = cc;
 554             len++;
 555             psz++;
 556         }
 557         else // start of BASE64 encoded string
 558         {
 559             bool lsb, ok;
 560             unsigned int d, l;
 561             for ( ok = lsb = false, d = 0, l = 0;
 562                   (cc = utf7unb64[(unsigned char)*psz]) != 0xff;
 563                   psz++ )
 564             {
 565                 d <<= 6;
 566                 d += cc;
 567                 for (l += 6; l >= 8; lsb = !lsb)
 568                 {
 569                     unsigned char c = (unsigned char)((d >> (l -= 8)) % 256);
 570                     if (lsb)
 571                     {
 572                         if (buf)
 573                             *buf++ |= c;
 574                         len ++;
 575                     }
 576                     else
 577                     {
 578                         if (buf)
 579                             *buf = (wchar_t)(c << 8);
 580                     }
 581
 582                     ok = true;
 583                 }
 584             }
 585
 586             if ( !ok )
 587             {
 588                 // in valid UTF7 we should have valid characters after '+'
 589                 return wxCONV_FAILED;
 590             }
 591
 592             if (*psz == '-')
 593                 psz++;
 594         }
 595     }
 596
 597     if ( buf && (len < n) )
 598         *buf = '\0';
 599
 600     return len;
 601 }
 602
 603 //
 604 // BASE64 encoding table
 605 //
 606 static const unsigned char utf7enb64[] =
 607 {
 608     'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
 609     'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
 610     'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
 611     'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
 612     'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
 613     'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
 614     'w', 'x', 'y', 'z', '0', '1', '2', '3',
 615     '4', '5', '6', '7', '8', '9', '+', '/'
 616 };
 617
 618 //
 619 // UTF-7 encoding table
 620 //
 621 // 0 - Set D (directly encoded characters)
 622 // 1 - Set O (optional direct characters)
 623 // 2 - whitespace characters (optional)
 624 // 3 - special characters
 625 //
 626 static const unsigned char utf7encode[128] =
 627 {
 628     3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
 629     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 630     2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
 631     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
 632     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 633     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
 634     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 635     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
 636 };
 637
 638 size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 639 {
 640     size_t len = 0;
 641
 642     while (*psz && ((!buf) || (len < n)))
 643     {
 644         wchar_t cc = *psz++;
 645         if (cc < 0x80 && utf7encode[cc] < 1)
 646         {
 647             // plain ASCII char
 648             if (buf)
 649                 *buf++ = (char)cc;
 650
 651             len++;
 652         }
 653 #ifndef WC_UTF16
 654         else if (((wxUint32)cc) > 0xffff)
 655         {
 656             // no surrogate pair generation (yet?)
 657             return wxCONV_FAILED;
 658         }
 659 #endif
 660         else
 661         {
 662             if (buf)
 663                 *buf++ = '+';
 664
 665             len++;
 666             if (cc != '+')
 667             {
 668                 // BASE64 encode string
 669                 unsigned int lsb, d, l;
 670                 for (d = 0, l = 0; /*nothing*/; psz++)
 671                 {
 672                     for (lsb = 0; lsb < 2; lsb ++)
 673                     {
 674                         d <<= 8;
 675                         d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
 676
 677                         for (l += 8; l >= 6; )
 678                         {
 679                             l -= 6;
 680                             if (buf)
 681                                 *buf++ = utf7enb64[(d >> l) % 64];
 682                             len++;
 683                         }
 684                     }
 685
 686                     cc = *psz;
 687                     if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
 688                         break;
 689                 }
 690
 691                 if (l != 0)
 692                 {
 693                     if (buf)
 694                         *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
 695
 696                     len++;
 697                 }
 698             }
 699
 700             if (buf)
 701                 *buf++ = '-';
 702             len++;
 703         }
 704     }
 705
 706     if (buf && (len < n))
 707         *buf = 0;
 708
 709     return len;
 710 }
 711
 712 // ----------------------------------------------------------------------------
 713 // UTF-8
 714 // ----------------------------------------------------------------------------
 715
 716 static wxUint32 utf8_max[]=
 717     { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
 718
 719 // boundaries of the private use area we use to (temporarily) remap invalid
 720 // characters invalid in a UTF-8 encoded string
 721 const wxUint32 wxUnicodePUA = 0x100000;
 722 const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
 723
 724 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 725 {
 726     size_t len = 0;
 727
 728     while (*psz && ((!buf) || (len < n)))
 729     {
 730         const char *opsz = psz;
 731         bool invalid = false;
 732         unsigned char cc = *psz++, fc = cc;
 733         unsigned cnt;
 734         for (cnt = 0; fc & 0x80; cnt++)
 735             fc <<= 1;
 736
 737         if (!cnt)
 738         {
 739             // plain ASCII char
 740             if (buf)
 741                 *buf++ = cc;
 742             len++;
 743
 744             // escape the escape character for octal escapes
 745             if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
 746                     && cc == '\\' && (!buf || len < n))
 747             {
 748                 if (buf)
 749                     *buf++ = cc;
 750                 len++;
 751             }
 752         }
 753         else
 754         {
 755             cnt--;
 756             if (!cnt)
 757             {
 758                 // invalid UTF-8 sequence
 759                 invalid = true;
 760             }
 761             else
 762             {
 763                 unsigned ocnt = cnt - 1;
 764                 wxUint32 res = cc & (0x3f >> cnt);
 765                 while (cnt--)
 766                 {
 767                     cc = *psz;
 768                     if ((cc & 0xC0) != 0x80)
 769                     {
 770                         // invalid UTF-8 sequence
 771                         invalid = true;
 772                         break;
 773                     }
 774
 775                     psz++;
 776                     res = (res << 6) | (cc & 0x3f);
 777                 }
 778
 779                 if (invalid || res <= utf8_max[ocnt])
 780                 {
 781                     // illegal UTF-8 encoding
 782                     invalid = true;
 783                 }
 784                 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
 785                         res >= wxUnicodePUA && res < wxUnicodePUAEnd)
 786                 {
 787                     // if one of our PUA characters turns up externally
 788                     // it must also be treated as an illegal sequence
 789                     // (a bit like you have to escape an escape character)
 790                     invalid = true;
 791                 }
 792                 else
 793                 {
 794 #ifdef WC_UTF16
 795                     // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 796                     size_t pa = encode_utf16(res, (wxUint16 *)buf);
 797                     if (pa == wxCONV_FAILED)
 798                     {
 799                         invalid = true;
 800                     }
 801                     else
 802                     {
 803                         if (buf)
 804                             buf += pa;
 805                         len += pa;
 806                     }
 807 #else // !WC_UTF16
 808                     if (buf)
 809                         *buf++ = (wchar_t)res;
 810                     len++;
 811 #endif // WC_UTF16/!WC_UTF16
 812                 }
 813             }
 814
 815             if (invalid)
 816             {
 817                 if (m_options & MAP_INVALID_UTF8_TO_PUA)
 818                 {
 819                     while (opsz < psz && (!buf || len < n))
 820                     {
 821 #ifdef WC_UTF16
 822                         // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 823                         size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
 824                         wxASSERT(pa != wxCONV_FAILED);
 825                         if (buf)
 826                             buf += pa;
 827                         opsz++;
 828                         len += pa;
 829 #else
 830                         if (buf)
 831                             *buf++ = (wchar_t)(wxUnicodePUA + (unsigned char)*opsz);
 832                         opsz++;
 833                         len++;
 834 #endif
 835                     }
 836                 }
 837                 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
 838                 {
 839                     while (opsz < psz && (!buf || len < n))
 840                     {
 841                         if ( buf && len + 3 < n )
 842                         {
 843                             unsigned char on = *opsz;
 844                             *buf++ = L'\\';
 845                             *buf++ = (wchar_t)( L'0' + on / 0100 );
 846                             *buf++ = (wchar_t)( L'0' + (on % 0100) / 010 );
 847                             *buf++ = (wchar_t)( L'0' + on % 010 );
 848                         }
 849
 850                         opsz++;
 851                         len += 4;
 852                     }
 853                 }
 854                 else // MAP_INVALID_UTF8_NOT
 855                 {
 856                     return wxCONV_FAILED;
 857                 }
 858             }
 859         }
 860     }
 861
 862     if (buf && (len < n))
 863         *buf = 0;
 864
 865     return len;
 866 }
 867
 868 static inline bool isoctal(wchar_t wch)
 869 {
 870     return L'0' <= wch && wch <= L'7';
 871 }
 872
 873 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 874 {
 875     size_t len = 0;
 876
 877     while (*psz && ((!buf) || (len < n)))
 878     {
 879         wxUint32 cc;
 880
 881 #ifdef WC_UTF16
 882         // cast is ok for WC_UTF16
 883         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
 884         psz += (pa == wxCONV_FAILED) ? 1 : pa;
 885 #else
 886         cc = (*psz++) & 0x7fffffff;
 887 #endif
 888
 889         if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
 890                 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
 891         {
 892             if (buf)
 893                 *buf++ = (char)(cc - wxUnicodePUA);
 894             len++;
 895         }
 896         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
 897                     && cc == L'\\' && psz[0] == L'\\' )
 898         {
 899             if (buf)
 900                 *buf++ = (char)cc;
 901             psz++;
 902             len++;
 903         }
 904         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
 905                     cc == L'\\' &&
 906                         isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
 907         {
 908             if (buf)
 909             {
 910                 *buf++ = (char) ((psz[0] - L'0') * 0100 +
 911                                  (psz[1] - L'0') * 010 +
 912                                  (psz[2] - L'0'));
 913             }
 914
 915             psz += 3;
 916             len++;
 917         }
 918         else
 919         {
 920             unsigned cnt;
 921             for (cnt = 0; cc > utf8_max[cnt]; cnt++)
 922             {
 923             }
 924
 925             if (!cnt)
 926             {
 927                 // plain ASCII char
 928                 if (buf)
 929                     *buf++ = (char) cc;
 930                 len++;
 931             }
 932             else
 933             {
 934                 len += cnt + 1;
 935                 if (buf)
 936                 {
 937                     *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
 938                     while (cnt--)
 939                         *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
 940                 }
 941             }
 942         }
 943     }
 944
 945     if (buf && (len < n))
 946         *buf = 0;
 947
 948     return len;
 949 }
 950
 951 // ============================================================================
 952 // UTF-16
 953 // ============================================================================
 954
 955 #ifdef WORDS_BIGENDIAN
 956     #define wxMBConvUTF16straight wxMBConvUTF16BE
 957     #define wxMBConvUTF16swap     wxMBConvUTF16LE
 958 #else
 959     #define wxMBConvUTF16swap     wxMBConvUTF16BE
 960     #define wxMBConvUTF16straight wxMBConvUTF16LE
 961 #endif
 962
 963 /* static */
 964 size_t wxMBConvUTF16Base::GetLength(const char *src, size_t srcLen)
 965 {
 966     if ( srcLen == wxNO_LEN )
 967     {
 968         // count the number of bytes in input, including the trailing NULs
 969         const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
 970         for ( srcLen = 1; *inBuff++; srcLen++ )
 971             ;
 972
 973         srcLen *= BYTES_PER_CHAR;
 974     }
 975     else // we already have the length
 976     {
 977         // we can only convert an entire number of UTF-16 characters
 978         if ( srcLen % BYTES_PER_CHAR )
 979             return wxCONV_FAILED;
 980     }
 981
 982     return srcLen;
 983 }
 984
 985 // case when in-memory representation is UTF-16 too
 986 #ifdef WC_UTF16
 987
 988 // ----------------------------------------------------------------------------
 989 // conversions without endianness change
 990 // ----------------------------------------------------------------------------
 991
 992 size_t
 993 wxMBConvUTF16straight::ToWChar(wchar_t *dst, size_t dstLen,
 994                                const char *src, size_t srcLen) const
 995 {
 996     // set up the scene for using memcpy() (which is presumably more efficient
 997     // than copying the bytes one by one)
 998     srcLen = GetLength(src, srcLen);
 999     if ( srcLen == wxNO_LEN )
1000         return wxCONV_FAILED;
1001
1002     const size_t inLen = srcLen / BYTES_PER_CHAR;
1003     if ( dst )
1004     {
1005         if ( dstLen < inLen )
1006             return wxCONV_FAILED;
1007
1008         memcpy(dst, src, srcLen);
1009     }
1010
1011     return inLen;
1012 }
1013
1014 size_t
1015 wxMBConvUTF16straight::FromWChar(char *dst, size_t dstLen,
1016                                  const wchar_t *src, size_t srcLen) const
1017 {
1018     if ( srcLen == wxNO_LEN )
1019         srcLen = wxWcslen(src) + 1;
1020
1021     srcLen *= BYTES_PER_CHAR;
1022
1023     if ( dst )
1024     {
1025         if ( dstLen < srcLen )
1026             return wxCONV_FAILED;
1027
1028         memcpy(dst, src, srcLen);
1029     }
1030
1031     return srcLen;
1032 }
1033
1034 // ----------------------------------------------------------------------------
1035 // endian-reversing conversions
1036 // ----------------------------------------------------------------------------
1037
1038 size_t
1039 wxMBConvUTF16swap::ToWChar(wchar_t *dst, size_t dstLen,
1040                            const char *src, size_t srcLen) const
1041 {
1042     srcLen = GetLength(src, srcLen);
1043     if ( srcLen == wxNO_LEN )
1044         return wxCONV_FAILED;
1045
1046     srcLen /= BYTES_PER_CHAR;
1047
1048     if ( dst )
1049     {
1050         if ( dstLen < srcLen )
1051             return wxCONV_FAILED;
1052
1053         const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
1054         for ( size_t n = 0; n < srcLen; n++, inBuff++ )
1055         {
1056             *dst++ = wxUINT16_SWAP_ALWAYS(*inBuff);
1057         }
1058     }
1059
1060     return srcLen;
1061 }
1062
1063 size_t
1064 wxMBConvUTF16swap::FromWChar(char *dst, size_t dstLen,
1065                              const wchar_t *src, size_t srcLen) const
1066 {
1067     if ( srcLen == wxNO_LEN )
1068         srcLen = wxWcslen(src) + 1;
1069
1070     srcLen *= BYTES_PER_CHAR;
1071
1072     if ( dst )
1073     {
1074         if ( dstLen < srcLen )
1075             return wxCONV_FAILED;
1076
1077         wxUint16 *outBuff = wx_reinterpret_cast(wxUint16 *, dst);
1078         for ( size_t n = 0; n < srcLen; n += BYTES_PER_CHAR, src++ )
1079         {
1080             *outBuff++ = wxUINT16_SWAP_ALWAYS(*src);
1081         }
1082     }
1083
1084     return srcLen;
1085 }
1086
1087 #else // !WC_UTF16: wchar_t is UTF-32
1088
1089 // ----------------------------------------------------------------------------
1090 // conversions without endianness change
1091 // ----------------------------------------------------------------------------
1092
1093 size_t
1094 wxMBConvUTF16straight::ToWChar(wchar_t *dst, size_t dstLen,
1095                                const char *src, size_t srcLen) const
1096 {
1097     srcLen = GetLength(src, srcLen);
1098     if ( srcLen == wxNO_LEN )
1099         return wxCONV_FAILED;
1100
1101     const size_t inLen = srcLen / BYTES_PER_CHAR;
1102     if ( !dst )
1103     {
1104         // optimization: return maximal space which could be needed for this
1105         // string even if the real size could be smaller if the buffer contains
1106         // any surrogates
1107         return inLen;
1108     }
1109
1110     size_t outLen = 0;
1111     const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
1112     for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; )
1113     {
1114         const wxUint32 ch = wxDecodeSurrogate(&inBuff);
1115         if ( !inBuff )
1116             return wxCONV_FAILED;
1117
1118         if ( ++outLen > dstLen )
1119             return wxCONV_FAILED;
1120
1121         *dst++ = ch;
1122     }
1123
1124
1125     return outLen;
1126 }
1127
1128 size_t
1129 wxMBConvUTF16straight::FromWChar(char *dst, size_t dstLen,
1130                                  const wchar_t *src, size_t srcLen) const
1131 {
1132     if ( srcLen == wxNO_LEN )
1133         srcLen = wxWcslen(src) + 1;
1134
1135     size_t outLen = 0;
1136     wxUint16 *outBuff = wx_reinterpret_cast(wxUint16 *, dst);
1137     for ( size_t n = 0; n < srcLen; n++ )
1138     {
1139         wxUint16 cc[2];
1140         const size_t numChars = encode_utf16(*src++, cc);
1141         if ( numChars == wxCONV_FAILED )
1142             return wxCONV_FAILED;
1143
1144         outLen += numChars * BYTES_PER_CHAR;
1145         if ( outBuff )
1146         {
1147             if ( outLen > dstLen )
1148                 return wxCONV_FAILED;
1149
1150             *outBuff++ = cc[0];
1151             if ( numChars == 2 )
1152             {
1153                 // second character of a surrogate
1154                 *outBuff++ = cc[1];
1155             }
1156         }
1157     }
1158
1159     return outLen;
1160 }
1161
1162 // ----------------------------------------------------------------------------
1163 // endian-reversing conversions
1164 // ----------------------------------------------------------------------------
1165
1166 size_t
1167 wxMBConvUTF16swap::ToWChar(wchar_t *dst, size_t dstLen,
1168                            const char *src, size_t srcLen) const
1169 {
1170     srcLen = GetLength(src, srcLen);
1171     if ( srcLen == wxNO_LEN )
1172         return wxCONV_FAILED;
1173
1174     const size_t inLen = srcLen / BYTES_PER_CHAR;
1175     if ( !dst )
1176     {
1177         // optimization: return maximal space which could be needed for this
1178         // string even if the real size could be smaller if the buffer contains
1179         // any surrogates
1180         return inLen;
1181     }
1182
1183     size_t outLen = 0;
1184     const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
1185     for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; )
1186     {
1187         wxUint32 ch;
1188         wxUint16 tmp[2];
1189
1190         tmp[0] = wxUINT16_SWAP_ALWAYS(*inBuff);
1191         inBuff++;
1192         tmp[1] = wxUINT16_SWAP_ALWAYS(*inBuff);
1193
1194         const size_t numChars = decode_utf16(tmp, ch);
1195         if ( numChars == wxCONV_FAILED )
1196             return wxCONV_FAILED;
1197
1198         if ( numChars == 2 )
1199             inBuff++;
1200
1201         if ( ++outLen > dstLen )
1202             return wxCONV_FAILED;
1203
1204         *dst++ = ch;
1205     }
1206
1207
1208     return outLen;
1209 }
1210
1211 size_t
1212 wxMBConvUTF16swap::FromWChar(char *dst, size_t dstLen,
1213                              const wchar_t *src, size_t srcLen) const
1214 {
1215     if ( srcLen == wxNO_LEN )
1216         srcLen = wxWcslen(src) + 1;
1217
1218     size_t outLen = 0;
1219     wxUint16 *outBuff = wx_reinterpret_cast(wxUint16 *, dst);
1220     for ( const wchar_t *srcEnd = src + srcLen; src < srcEnd; src++ )
1221     {
1222         wxUint16 cc[2];
1223         const size_t numChars = encode_utf16(*src, cc);
1224         if ( numChars == wxCONV_FAILED )
1225             return wxCONV_FAILED;
1226
1227         outLen += numChars * BYTES_PER_CHAR;
1228         if ( outBuff )
1229         {
1230             if ( outLen > dstLen )
1231                 return wxCONV_FAILED;
1232
1233             *outBuff++ = wxUINT16_SWAP_ALWAYS(cc[0]);
1234             if ( numChars == 2 )
1235             {
1236                 // second character of a surrogate
1237                 *outBuff++ = wxUINT16_SWAP_ALWAYS(cc[1]);
1238             }
1239         }
1240     }
1241
1242     return outLen;
1243 }
1244
1245 #endif // WC_UTF16/!WC_UTF16
1246
1247
1248 // ============================================================================
1249 // UTF-32
1250 // ============================================================================
1251
1252 #ifdef WORDS_BIGENDIAN
1253     #define wxMBConvUTF32straight  wxMBConvUTF32BE
1254     #define wxMBConvUTF32swap      wxMBConvUTF32LE
1255 #else
1256     #define wxMBConvUTF32swap      wxMBConvUTF32BE
1257     #define wxMBConvUTF32straight  wxMBConvUTF32LE
1258 #endif
1259
1260
1261 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1262 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1263
1264 /* static */
1265 size_t wxMBConvUTF32Base::GetLength(const char *src, size_t srcLen)
1266 {
1267     if ( srcLen == wxNO_LEN )
1268     {
1269         // count the number of bytes in input, including the trailing NULs
1270         const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1271         for ( srcLen = 1; *inBuff++; srcLen++ )
1272             ;
1273
1274         srcLen *= BYTES_PER_CHAR;
1275     }
1276     else // we already have the length
1277     {
1278         // we can only convert an entire number of UTF-32 characters
1279         if ( srcLen % BYTES_PER_CHAR )
1280             return wxCONV_FAILED;
1281     }
1282
1283     return srcLen;
1284 }
1285
1286 // case when in-memory representation is UTF-16
1287 #ifdef WC_UTF16
1288
1289 // ----------------------------------------------------------------------------
1290 // conversions without endianness change
1291 // ----------------------------------------------------------------------------
1292
1293 size_t
1294 wxMBConvUTF32straight::ToWChar(wchar_t *dst, size_t dstLen,
1295                                const char *src, size_t srcLen) const
1296 {
1297     srcLen = GetLength(src, srcLen);
1298     if ( srcLen == wxNO_LEN )
1299         return wxCONV_FAILED;
1300
1301     const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1302     const size_t inLen = srcLen / BYTES_PER_CHAR;
1303     size_t outLen = 0;
1304     for ( size_t n = 0; n < inLen; n++ )
1305     {
1306         wxUint16 cc[2];
1307         const size_t numChars = encode_utf16(*inBuff++, cc);
1308         if ( numChars == wxCONV_FAILED )
1309             return wxCONV_FAILED;
1310
1311         outLen += numChars;
1312         if ( dst )
1313         {
1314             if ( outLen > dstLen )
1315                 return wxCONV_FAILED;
1316
1317             *dst++ = cc[0];
1318             if ( numChars == 2 )
1319             {
1320                 // second character of a surrogate
1321                 *dst++ = cc[1];
1322             }
1323         }
1324     }
1325
1326     return outLen;
1327 }
1328
1329 size_t
1330 wxMBConvUTF32straight::FromWChar(char *dst, size_t dstLen,
1331                                  const wchar_t *src, size_t srcLen) const
1332 {
1333     if ( srcLen == wxNO_LEN )
1334         srcLen = wxWcslen(src) + 1;
1335
1336     if ( !dst )
1337     {
1338         // optimization: return maximal space which could be needed for this
1339         // string instead of the exact amount which could be less if there are
1340         // any surrogates in the input
1341         //
1342         // we consider that surrogates are rare enough to make it worthwhile to
1343         // avoid running the loop below at the cost of slightly extra memory
1344         // consumption
1345         return srcLen * BYTES_PER_CHAR;
1346     }
1347
1348     wxUint32 *outBuff = wx_reinterpret_cast(wxUint32 *, dst);
1349     size_t outLen = 0;
1350     for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; )
1351     {
1352         const wxUint32 ch = wxDecodeSurrogate(&src);
1353         if ( !src )
1354             return wxCONV_FAILED;
1355
1356         outLen += BYTES_PER_CHAR;
1357
1358         if ( outLen > dstLen )
1359             return wxCONV_FAILED;
1360
1361         *outBuff++ = ch;
1362     }
1363
1364     return outLen;
1365 }
1366
1367 // ----------------------------------------------------------------------------
1368 // endian-reversing conversions
1369 // ----------------------------------------------------------------------------
1370
1371 size_t
1372 wxMBConvUTF32swap::ToWChar(wchar_t *dst, size_t dstLen,
1373                            const char *src, size_t srcLen) const
1374 {
1375     srcLen = GetLength(src, srcLen);
1376     if ( srcLen == wxNO_LEN )
1377         return wxCONV_FAILED;
1378
1379     const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1380     const size_t inLen = srcLen / BYTES_PER_CHAR;
1381     size_t outLen = 0;
1382     for ( size_t n = 0; n < inLen; n++, inBuff++ )
1383     {
1384         wxUint16 cc[2];
1385         const size_t numChars = encode_utf16(wxUINT32_SWAP_ALWAYS(*inBuff), cc);
1386         if ( numChars == wxCONV_FAILED )
1387             return wxCONV_FAILED;
1388
1389         outLen += numChars;
1390         if ( dst )
1391         {
1392             if ( outLen > dstLen )
1393                 return wxCONV_FAILED;
1394
1395             *dst++ = cc[0];
1396             if ( numChars == 2 )
1397             {
1398                 // second character of a surrogate
1399                 *dst++ = cc[1];
1400             }
1401         }
1402     }
1403
1404     return outLen;
1405 }
1406
1407 size_t
1408 wxMBConvUTF32swap::FromWChar(char *dst, size_t dstLen,
1409                              const wchar_t *src, size_t srcLen) const
1410 {
1411     if ( srcLen == wxNO_LEN )
1412         srcLen = wxWcslen(src) + 1;
1413
1414     if ( !dst )
1415     {
1416         // optimization: return maximal space which could be needed for this
1417         // string instead of the exact amount which could be less if there are
1418         // any surrogates in the input
1419         //
1420         // we consider that surrogates are rare enough to make it worthwhile to
1421         // avoid running the loop below at the cost of slightly extra memory
1422         // consumption
1423         return srcLen*BYTES_PER_CHAR;
1424     }
1425
1426     wxUint32 *outBuff = wx_reinterpret_cast(wxUint32 *, dst);
1427     size_t outLen = 0;
1428     for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; )
1429     {
1430         const wxUint32 ch = wxDecodeSurrogate(&src);
1431         if ( !src )
1432             return wxCONV_FAILED;
1433
1434         outLen += BYTES_PER_CHAR;
1435
1436         if ( outLen > dstLen )
1437             return wxCONV_FAILED;
1438
1439         *outBuff++ = wxUINT32_SWAP_ALWAYS(ch);
1440     }
1441
1442     return outLen;
1443 }
1444
1445 #else // !WC_UTF16: wchar_t is UTF-32
1446
1447 // ----------------------------------------------------------------------------
1448 // conversions without endianness change
1449 // ----------------------------------------------------------------------------
1450
1451 size_t
1452 wxMBConvUTF32straight::ToWChar(wchar_t *dst, size_t dstLen,
1453                                const char *src, size_t srcLen) const
1454 {
1455     // use memcpy() as it should be much faster than hand-written loop
1456     srcLen = GetLength(src, srcLen);
1457     if ( srcLen == wxNO_LEN )
1458         return wxCONV_FAILED;
1459
1460     const size_t inLen = srcLen/BYTES_PER_CHAR;
1461     if ( dst )
1462     {
1463         if ( dstLen < inLen )
1464             return wxCONV_FAILED;
1465
1466         memcpy(dst, src, srcLen);
1467     }
1468
1469     return inLen;
1470 }
1471
1472 size_t
1473 wxMBConvUTF32straight::FromWChar(char *dst, size_t dstLen,
1474                                  const wchar_t *src, size_t srcLen) const
1475 {
1476     if ( srcLen == wxNO_LEN )
1477         srcLen = wxWcslen(src) + 1;
1478
1479     srcLen *= BYTES_PER_CHAR;
1480
1481     if ( dst )
1482     {
1483         if ( dstLen < srcLen )
1484             return wxCONV_FAILED;
1485
1486         memcpy(dst, src, srcLen);
1487     }
1488
1489     return srcLen;
1490 }
1491
1492 // ----------------------------------------------------------------------------
1493 // endian-reversing conversions
1494 // ----------------------------------------------------------------------------
1495
1496 size_t
1497 wxMBConvUTF32swap::ToWChar(wchar_t *dst, size_t dstLen,
1498                            const char *src, size_t srcLen) const
1499 {
1500     srcLen = GetLength(src, srcLen);
1501     if ( srcLen == wxNO_LEN )
1502         return wxCONV_FAILED;
1503
1504     srcLen /= BYTES_PER_CHAR;
1505
1506     if ( dst )
1507     {
1508         if ( dstLen < srcLen )
1509             return wxCONV_FAILED;
1510
1511         const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1512         for ( size_t n = 0; n < srcLen; n++, inBuff++ )
1513         {
1514             *dst++ = wxUINT32_SWAP_ALWAYS(*inBuff);
1515         }
1516     }
1517
1518     return srcLen;
1519 }
1520
1521 size_t
1522 wxMBConvUTF32swap::FromWChar(char *dst, size_t dstLen,
1523                              const wchar_t *src, size_t srcLen) const
1524 {
1525     if ( srcLen == wxNO_LEN )
1526         srcLen = wxWcslen(src) + 1;
1527
1528     srcLen *= BYTES_PER_CHAR;
1529
1530     if ( dst )
1531     {
1532         if ( dstLen < srcLen )
1533             return wxCONV_FAILED;
1534
1535         wxUint32 *outBuff = wx_reinterpret_cast(wxUint32 *, dst);
1536         for ( size_t n = 0; n < srcLen; n += BYTES_PER_CHAR, src++ )
1537         {
1538             *outBuff++ = wxUINT32_SWAP_ALWAYS(*src);
1539         }
1540     }
1541
1542     return srcLen;
1543 }
1544
1545 #endif // WC_UTF16/!WC_UTF16
1546
1547
1548 // ============================================================================
1549 // The classes doing conversion using the iconv_xxx() functions
1550 // ============================================================================
1551
1552 #ifdef HAVE_ICONV
1553
1554 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1555 //     E2BIG if output buffer is _exactly_ as big as needed. Such case is
1556 //     (unless there's yet another bug in glibc) the only case when iconv()
1557 //     returns with (size_t)-1 (which means error) and says there are 0 bytes
1558 //     left in the input buffer -- when _real_ error occurs,
1559 //     bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1560 //     iconv() failure.
1561 //     [This bug does not appear in glibc 2.2.]
1562 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1563 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1564                                      (errno != E2BIG || bufLeft != 0))
1565 #else
1566 #define ICONV_FAILED(cres, bufLeft)  (cres == (size_t)-1)
1567 #endif
1568
1569 #define ICONV_CHAR_CAST(x)  ((ICONV_CONST char **)(x))
1570
1571 #define ICONV_T_INVALID ((iconv_t)-1)
1572
1573 #if SIZEOF_WCHAR_T == 4
1574     #define WC_BSWAP    wxUINT32_SWAP_ALWAYS
1575     #define WC_ENC      wxFONTENCODING_UTF32
1576 #elif SIZEOF_WCHAR_T == 2
1577     #define WC_BSWAP    wxUINT16_SWAP_ALWAYS
1578     #define WC_ENC      wxFONTENCODING_UTF16
1579 #else // sizeof(wchar_t) != 2 nor 4
1580     // does this ever happen?
1581     #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1582 #endif
1583
1584 // ----------------------------------------------------------------------------
1585 // wxMBConv_iconv: encapsulates an iconv character set
1586 // ----------------------------------------------------------------------------
1587
1588 class wxMBConv_iconv : public wxMBConv
1589 {
1590 public:
1591     wxMBConv_iconv(const char *name);
1592     virtual ~wxMBConv_iconv();
1593
1594     virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1595     virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
1596
1597     // classify this encoding as explained in wxMBConv::GetMBNulLen() comment
1598     virtual size_t GetMBNulLen() const;
1599
1600 #if wxUSE_UNICODE_UTF8
1601     virtual bool IsUTF8() const;
1602 #endif
1603
1604     virtual wxMBConv *Clone() const
1605     {
1606         wxMBConv_iconv *p = new wxMBConv_iconv(m_name.ToAscii());
1607         p->m_minMBCharWidth = m_minMBCharWidth;
1608         return p;
1609     }
1610
1611     bool IsOk() const
1612         { return (m2w != ICONV_T_INVALID) && (w2m != ICONV_T_INVALID); }
1613
1614 protected:
1615     // the iconv handlers used to translate from multibyte
1616     // to wide char and in the other direction
1617     iconv_t m2w,
1618             w2m;
1619
1620 #if wxUSE_THREADS
1621     // guards access to m2w and w2m objects
1622     wxMutex m_iconvMutex;
1623 #endif
1624
1625 private:
1626     // the name (for iconv_open()) of a wide char charset -- if none is
1627     // available on this machine, it will remain NULL
1628     static wxString ms_wcCharsetName;
1629
1630     // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1631     // different endian-ness than the native one
1632     static bool ms_wcNeedsSwap;
1633
1634
1635     // name of the encoding handled by this conversion
1636     wxString m_name;
1637
1638     // cached result of GetMBNulLen(); set to 0 meaning "unknown"
1639     // initially
1640     size_t m_minMBCharWidth;
1641 };
1642
1643 // make the constructor available for unit testing
1644 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const char* name )
1645 {
1646     wxMBConv_iconv* result = new wxMBConv_iconv( name );
1647     if ( !result->IsOk() )
1648     {
1649         delete result;
1650         return 0;
1651     }
1652
1653     return result;
1654 }
1655
1656 wxString wxMBConv_iconv::ms_wcCharsetName;
1657 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
1658
1659 wxMBConv_iconv::wxMBConv_iconv(const char *name)
1660               : m_name(name)
1661 {
1662     m_minMBCharWidth = 0;
1663
1664     // check for charset that represents wchar_t:
1665     if ( ms_wcCharsetName.empty() )
1666     {
1667         wxLogTrace(TRACE_STRCONV, _T("Looking for wide char codeset:"));
1668
1669 #if wxUSE_FONTMAP
1670         const wxChar **names = wxFontMapperBase::GetAllEncodingNames(WC_ENC);
1671 #else // !wxUSE_FONTMAP
1672         static const wxChar *names_static[] =
1673         {
1674 #if SIZEOF_WCHAR_T == 4
1675             _T("UCS-4"),
1676 #elif SIZEOF_WCHAR_T = 2
1677             _T("UCS-2"),
1678 #endif
1679             NULL
1680         };
1681         const wxChar **names = names_static;
1682 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1683
1684         for ( ; *names && ms_wcCharsetName.empty(); ++names )
1685         {
1686             const wxString nameCS(*names);
1687
1688             // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
1689             wxString nameXE(nameCS);
1690
1691 #ifdef WORDS_BIGENDIAN
1692                 nameXE += _T("BE");
1693 #else // little endian
1694                 nameXE += _T("LE");
1695 #endif
1696
1697             wxLogTrace(TRACE_STRCONV, _T("  trying charset \"%s\""),
1698                        nameXE.c_str());
1699
1700             m2w = iconv_open(nameXE.ToAscii(), name);
1701             if ( m2w == ICONV_T_INVALID )
1702             {
1703                 // try charset w/o bytesex info (e.g. "UCS4")
1704                 wxLogTrace(TRACE_STRCONV, _T("  trying charset \"%s\""),
1705                            nameCS.c_str());
1706                 m2w = iconv_open(nameCS.ToAscii(), name);
1707
1708                 // and check for bytesex ourselves:
1709                 if ( m2w != ICONV_T_INVALID )
1710                 {
1711                     char    buf[2], *bufPtr;
1712                     wchar_t wbuf[2], *wbufPtr;
1713                     size_t  insz, outsz;
1714                     size_t  res;
1715
1716                     buf[0] = 'A';
1717                     buf[1] = 0;
1718                     wbuf[0] = 0;
1719                     insz = 2;
1720                     outsz = SIZEOF_WCHAR_T * 2;
1721                     wbufPtr = wbuf;
1722                     bufPtr = buf;
1723
1724                     res = iconv(
1725                         m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1726                         (char**)&wbufPtr, &outsz);
1727
1728                     if (ICONV_FAILED(res, insz))
1729                     {
1730                         wxLogLastError(wxT("iconv"));
1731                         wxLogError(_("Conversion to charset '%s' doesn't work."),
1732                                    nameCS.c_str());
1733                     }
1734                     else // ok, can convert to this encoding, remember it
1735                     {
1736                         ms_wcCharsetName = nameCS;
1737                         ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1738                     }
1739                 }
1740             }
1741             else // use charset not requiring byte swapping
1742             {
1743                 ms_wcCharsetName = nameXE;
1744             }
1745         }
1746
1747         wxLogTrace(TRACE_STRCONV,
1748                    wxT("iconv wchar_t charset is \"%s\"%s"),
1749                    ms_wcCharsetName.empty() ? wxString("<none>")
1750                                             : ms_wcCharsetName,
1751                    ms_wcNeedsSwap ? _T(" (needs swap)")
1752                                   : _T(""));
1753     }
1754     else // we already have ms_wcCharsetName
1755     {
1756         m2w = iconv_open(ms_wcCharsetName.ToAscii(), name);
1757     }
1758
1759     if ( ms_wcCharsetName.empty() )
1760     {
1761         w2m = ICONV_T_INVALID;
1762     }
1763     else
1764     {
1765         w2m = iconv_open(name, ms_wcCharsetName.ToAscii());
1766         if ( w2m == ICONV_T_INVALID )
1767         {
1768             wxLogTrace(TRACE_STRCONV,
1769                        wxT("\"%s\" -> \"%s\" works but not the converse!?"),
1770                        ms_wcCharsetName.c_str(), name);
1771         }
1772     }
1773 }
1774
1775 wxMBConv_iconv::~wxMBConv_iconv()
1776 {
1777     if ( m2w != ICONV_T_INVALID )
1778         iconv_close(m2w);
1779     if ( w2m != ICONV_T_INVALID )
1780         iconv_close(w2m);
1781 }
1782
1783 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1784 {
1785     // find the string length: notice that must be done differently for
1786     // NUL-terminated strings and UTF-16/32 which are terminated with 2/4 NULs
1787     size_t inbuf;
1788     const size_t nulLen = GetMBNulLen();
1789     switch ( nulLen )
1790     {
1791         default:
1792             return wxCONV_FAILED;
1793
1794         case 1:
1795             inbuf = strlen(psz); // arguably more optimized than our version
1796             break;
1797
1798         case 2:
1799         case 4:
1800             // for UTF-16/32 not only we need to have 2/4 consecutive NULs but
1801             // they also have to start at character boundary and not span two
1802             // adjacent characters
1803             const char *p;
1804             for ( p = psz; NotAllNULs(p, nulLen); p += nulLen )
1805                 ;
1806             inbuf = p - psz;
1807             break;
1808     }
1809
1810 #if wxUSE_THREADS
1811     // NB: iconv() is MT-safe, but each thread must use its own iconv_t handle.
1812     //     Unfortunately there are a couple of global wxCSConv objects such as
1813     //     wxConvLocal that are used all over wx code, so we have to make sure
1814     //     the handle is used by at most one thread at the time. Otherwise
1815     //     only a few wx classes would be safe to use from non-main threads
1816     //     as MB<->WC conversion would fail "randomly".
1817     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1818 #endif // wxUSE_THREADS
1819
1820     size_t outbuf = n * SIZEOF_WCHAR_T;
1821     size_t res, cres;
1822     // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1823     wchar_t *bufPtr = buf;
1824     const char *pszPtr = psz;
1825
1826     if (buf)
1827     {
1828         // have destination buffer, convert there
1829         cres = iconv(m2w,
1830                      ICONV_CHAR_CAST(&pszPtr), &inbuf,
1831                      (char**)&bufPtr, &outbuf);
1832         res = n - (outbuf / SIZEOF_WCHAR_T);
1833
1834         if (ms_wcNeedsSwap)
1835         {
1836             // convert to native endianness
1837             for ( unsigned i = 0; i < res; i++ )
1838                 buf[n] = WC_BSWAP(buf[i]);
1839         }
1840
1841         // NUL-terminate the string if there is any space left
1842         if (res < n)
1843             buf[res] = 0;
1844     }
1845     else
1846     {
1847         // no destination buffer... convert using temp buffer
1848         // to calculate destination buffer requirement
1849         wchar_t tbuf[8];
1850         res = 0;
1851
1852         do
1853         {
1854             bufPtr = tbuf;
1855             outbuf = 8 * SIZEOF_WCHAR_T;
1856
1857             cres = iconv(m2w,
1858                          ICONV_CHAR_CAST(&pszPtr), &inbuf,
1859                          (char**)&bufPtr, &outbuf );
1860
1861             res += 8 - (outbuf / SIZEOF_WCHAR_T);
1862         }
1863         while ((cres == (size_t)-1) && (errno == E2BIG));
1864     }
1865
1866     if (ICONV_FAILED(cres, inbuf))
1867     {
1868         //VS: it is ok if iconv fails, hence trace only
1869         wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1870         return wxCONV_FAILED;
1871     }
1872
1873     return res;
1874 }
1875
1876 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1877 {
1878 #if wxUSE_THREADS
1879     // NB: explained in MB2WC
1880     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1881 #endif
1882
1883     size_t inlen = wxWcslen(psz);
1884     size_t inbuf = inlen * SIZEOF_WCHAR_T;
1885     size_t outbuf = n;
1886     size_t res, cres;
1887
1888     wchar_t *tmpbuf = 0;
1889
1890     if (ms_wcNeedsSwap)
1891     {
1892         // need to copy to temp buffer to switch endianness
1893         // (doing WC_BSWAP twice on the original buffer won't help, as it
1894         //  could be in read-only memory, or be accessed in some other thread)
1895         tmpbuf = (wchar_t *)malloc(inbuf + SIZEOF_WCHAR_T);
1896         for ( size_t i = 0; i < inlen; i++ )
1897             tmpbuf[n] = WC_BSWAP(psz[i]);
1898
1899         tmpbuf[inlen] = L'\0';
1900         psz = tmpbuf;
1901     }
1902
1903     if (buf)
1904     {
1905         // have destination buffer, convert there
1906         cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1907
1908         res = n - outbuf;
1909
1910         // NB: iconv was given only wcslen(psz) characters on input, and so
1911         //     it couldn't convert the trailing zero. Let's do it ourselves
1912         //     if there's some room left for it in the output buffer.
1913         if (res < n)
1914             buf[0] = 0;
1915     }
1916     else
1917     {
1918         // no destination buffer: convert using temp buffer
1919         // to calculate destination buffer requirement
1920         char tbuf[16];
1921         res = 0;
1922         do
1923         {
1924             buf = tbuf;
1925             outbuf = 16;
1926
1927             cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1928
1929             res += 16 - outbuf;
1930         }
1931         while ((cres == (size_t)-1) && (errno == E2BIG));
1932     }
1933
1934     if (ms_wcNeedsSwap)
1935     {
1936         free(tmpbuf);
1937     }
1938
1939     if (ICONV_FAILED(cres, inbuf))
1940     {
1941         wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1942         return wxCONV_FAILED;
1943     }
1944
1945     return res;
1946 }
1947
1948 size_t wxMBConv_iconv::GetMBNulLen() const
1949 {
1950     if ( m_minMBCharWidth == 0 )
1951     {
1952         wxMBConv_iconv * const self = wxConstCast(this, wxMBConv_iconv);
1953
1954 #if wxUSE_THREADS
1955         // NB: explained in MB2WC
1956         wxMutexLocker lock(self->m_iconvMutex);
1957 #endif
1958
1959         const wchar_t *wnul = L"";
1960         char buf[8]; // should be enough for NUL in any encoding
1961         size_t inLen = sizeof(wchar_t),
1962                outLen = WXSIZEOF(buf);
1963         char *inBuff = (char *)wnul;
1964         char *outBuff = buf;
1965         if ( iconv(w2m, ICONV_CHAR_CAST(&inBuff), &inLen, &outBuff, &outLen) == (size_t)-1 )
1966         {
1967             self->m_minMBCharWidth = (size_t)-1;
1968         }
1969         else // ok
1970         {
1971             self->m_minMBCharWidth = outBuff - buf;
1972         }
1973     }
1974
1975     return m_minMBCharWidth;
1976 }
1977
1978 #if wxUSE_UNICODE_UTF8
1979 bool wxMBConv_iconv::IsUTF8() const
1980 {
1981     return wxStricmp(m_name, "UTF-8") == 0 ||
1982            wxStricmp(m_name, "UTF8") == 0;
1983 }
1984 #endif
1985
1986 #endif // HAVE_ICONV
1987
1988
1989 // ============================================================================
1990 // Win32 conversion classes
1991 // ============================================================================
1992
1993 #ifdef wxHAVE_WIN32_MB2WC
1994
1995 // from utils.cpp
1996 #if wxUSE_FONTMAP
1997 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const char *charset);
1998 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1999 #endif
2000
2001 class wxMBConv_win32 : public wxMBConv
2002 {
2003 public:
2004     wxMBConv_win32()
2005     {
2006         m_CodePage = CP_ACP;
2007         m_minMBCharWidth = 0;
2008     }
2009
2010     wxMBConv_win32(const wxMBConv_win32& conv)
2011         : wxMBConv()
2012     {
2013         m_CodePage = conv.m_CodePage;
2014         m_minMBCharWidth = conv.m_minMBCharWidth;
2015     }
2016
2017 #if wxUSE_FONTMAP
2018     wxMBConv_win32(const char* name)
2019     {
2020         m_CodePage = wxCharsetToCodepage(name);
2021         m_minMBCharWidth = 0;
2022     }
2023
2024     wxMBConv_win32(wxFontEncoding encoding)
2025     {
2026         m_CodePage = wxEncodingToCodepage(encoding);
2027         m_minMBCharWidth = 0;
2028     }
2029 #endif // wxUSE_FONTMAP
2030
2031     virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2032     {
2033         // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
2034         // the behaviour is not compatible with the Unix version (using iconv)
2035         // and break the library itself, e.g. wxTextInputStream::NextChar()
2036         // wouldn't work if reading an incomplete MB char didn't result in an
2037         // error
2038         //
2039         // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or
2040         // Win XP or newer and it is not supported for UTF-[78] so we always
2041         // use our own conversions in this case. See
2042         //     http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx
2043         //     http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp
2044         if ( m_CodePage == CP_UTF8 )
2045         {
2046             return wxMBConvUTF8().MB2WC(buf, psz, n);
2047         }
2048
2049         if ( m_CodePage == CP_UTF7 )
2050         {
2051             return wxMBConvUTF7().MB2WC(buf, psz, n);
2052         }
2053
2054         int flags = 0;
2055         if ( (m_CodePage < 50000 && m_CodePage != CP_SYMBOL) &&
2056                 IsAtLeastWin2kSP4() )
2057         {
2058             flags = MB_ERR_INVALID_CHARS;
2059         }
2060
2061         const size_t len = ::MultiByteToWideChar
2062                              (
2063                                 m_CodePage,     // code page
2064                                 flags,          // flags: fall on error
2065                                 psz,            // input string
2066                                 -1,             // its length (NUL-terminated)
2067                                 buf,            // output string
2068                                 buf ? n : 0     // size of output buffer
2069                              );
2070         if ( !len )
2071         {
2072             // function totally failed
2073             return wxCONV_FAILED;
2074         }
2075
2076         // if we were really converting and didn't use MB_ERR_INVALID_CHARS,
2077         // check if we succeeded, by doing a double trip:
2078         if ( !flags && buf )
2079         {
2080             const size_t mbLen = strlen(psz);
2081             wxCharBuffer mbBuf(mbLen);
2082             if ( ::WideCharToMultiByte
2083                    (
2084                       m_CodePage,
2085                       0,
2086                       buf,
2087                       -1,
2088                       mbBuf.data(),
2089                       mbLen + 1,        // size in bytes, not length
2090                       NULL,
2091                       NULL
2092                    ) == 0 ||
2093                   strcmp(mbBuf, psz) != 0 )
2094             {
2095                 // we didn't obtain the same thing we started from, hence
2096                 // the conversion was lossy and we consider that it failed
2097                 return wxCONV_FAILED;
2098             }
2099         }
2100
2101         // note that it returns count of written chars for buf != NULL and size
2102         // of the needed buffer for buf == NULL so in either case the length of
2103         // the string (which never includes the terminating NUL) is one less
2104         return len - 1;
2105     }
2106
2107     virtual size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
2108     {
2109         /*
2110             we have a problem here: by default, WideCharToMultiByte() may
2111             replace characters unrepresentable in the target code page with bad
2112             quality approximations such as turning "1/2" symbol (U+00BD) into
2113             "1" for the code pages which don't have it and we, obviously, want
2114             to avoid this at any price
2115
2116             the trouble is that this function does it _silently_, i.e. it won't
2117             even tell us whether it did or not... Win98/2000 and higher provide
2118             WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
2119             we have to resort to a round trip, i.e. check that converting back
2120             results in the same string -- this is, of course, expensive but
2121             otherwise we simply can't be sure to not garble the data.
2122          */
2123
2124         // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
2125         // it doesn't work with CJK encodings (which we test for rather roughly
2126         // here...) nor with UTF-7/8 nor, of course, with Windows versions not
2127         // supporting it
2128         BOOL usedDef wxDUMMY_INITIALIZE(false);
2129         BOOL *pUsedDef;
2130         int flags;
2131         if ( CanUseNoBestFit() && m_CodePage < 50000 )
2132         {
2133             // it's our lucky day
2134             flags = WC_NO_BEST_FIT_CHARS;
2135             pUsedDef = &usedDef;
2136         }
2137         else // old system or unsupported encoding
2138         {
2139             flags = 0;
2140             pUsedDef = NULL;
2141         }
2142
2143         const size_t len = ::WideCharToMultiByte
2144                              (
2145                                 m_CodePage,     // code page
2146                                 flags,          // either none or no best fit
2147                                 pwz,            // input string
2148                                 -1,             // it is (wide) NUL-terminated
2149                                 buf,            // output buffer
2150                                 buf ? n : 0,    // and its size
2151                                 NULL,           // default "replacement" char
2152                                 pUsedDef        // [out] was it used?
2153                              );
2154
2155         if ( !len )
2156         {
2157             // function totally failed
2158             return wxCONV_FAILED;
2159         }
2160
2161         // if we were really converting, check if we succeeded
2162         if ( buf )
2163         {
2164             if ( flags )
2165             {
2166                 // check if the conversion failed, i.e. if any replacements
2167                 // were done
2168                 if ( usedDef )
2169                     return wxCONV_FAILED;
2170             }
2171             else // we must resort to double tripping...
2172             {
2173                 wxWCharBuffer wcBuf(n);
2174                 if ( MB2WC(wcBuf.data(), buf, n) == wxCONV_FAILED ||
2175                         wcscmp(wcBuf, pwz) != 0 )
2176                 {
2177                     // we didn't obtain the same thing we started from, hence
2178                     // the conversion was lossy and we consider that it failed
2179                     return wxCONV_FAILED;
2180                 }
2181             }
2182         }
2183
2184         // see the comment above for the reason of "len - 1"
2185         return len - 1;
2186     }
2187
2188     virtual size_t GetMBNulLen() const
2189     {
2190         if ( m_minMBCharWidth == 0 )
2191         {
2192             int len = ::WideCharToMultiByte
2193                         (
2194                             m_CodePage,     // code page
2195                             0,              // no flags
2196                             L"",            // input string
2197                             1,              // translate just the NUL
2198                             NULL,           // output buffer
2199                             0,              // and its size
2200                             NULL,           // no replacement char
2201                             NULL            // [out] don't care if it was used
2202                         );
2203
2204             wxMBConv_win32 * const self = wxConstCast(this, wxMBConv_win32);
2205             switch ( len )
2206             {
2207                 default:
2208                     wxLogDebug(_T("Unexpected NUL length %d"), len);
2209                     self->m_minMBCharWidth = (size_t)-1;
2210                     break;
2211
2212                 case 0:
2213                     self->m_minMBCharWidth = (size_t)-1;
2214                     break;
2215
2216                 case 1:
2217                 case 2:
2218                 case 4:
2219                     self->m_minMBCharWidth = len;
2220                     break;
2221             }
2222         }
2223
2224         return m_minMBCharWidth;
2225     }
2226
2227     virtual wxMBConv *Clone() const { return new wxMBConv_win32(*this); }
2228
2229     bool IsOk() const { return m_CodePage != -1; }
2230
2231 private:
2232     static bool CanUseNoBestFit()
2233     {
2234         static int s_isWin98Or2k = -1;
2235
2236         if ( s_isWin98Or2k == -1 )
2237         {
2238             int verMaj, verMin;
2239             switch ( wxGetOsVersion(&verMaj, &verMin) )
2240             {
2241                 case wxOS_WINDOWS_9X:
2242                     s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
2243                     break;
2244
2245                 case wxOS_WINDOWS_NT:
2246                     s_isWin98Or2k = verMaj >= 5;
2247                     break;
2248
2249                 default:
2250                     // unknown: be conservative by default
2251                     s_isWin98Or2k = 0;
2252                     break;
2253             }
2254
2255             wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
2256         }
2257
2258         return s_isWin98Or2k == 1;
2259     }
2260
2261     static bool IsAtLeastWin2kSP4()
2262     {
2263 #ifdef __WXWINCE__
2264         return false;
2265 #else
2266         static int s_isAtLeastWin2kSP4 = -1;
2267
2268         if ( s_isAtLeastWin2kSP4 == -1 )
2269         {
2270             OSVERSIONINFOEX ver;
2271
2272             memset(&ver, 0, sizeof(ver));
2273             ver.dwOSVersionInfoSize = sizeof(ver);
2274             GetVersionEx((OSVERSIONINFO*)&ver);
2275
2276             s_isAtLeastWin2kSP4 =
2277               ((ver.dwMajorVersion > 5) || // Vista+
2278                (ver.dwMajorVersion == 5 && ver.dwMinorVersion > 0) || // XP/2003
2279                (ver.dwMajorVersion == 5 && ver.dwMinorVersion == 0 &&
2280                ver.wServicePackMajor >= 4)) // 2000 SP4+
2281               ? 1 : 0;
2282         }
2283
2284         return s_isAtLeastWin2kSP4 == 1;
2285 #endif
2286     }
2287
2288
2289     // the code page we're working with
2290     long m_CodePage;
2291
2292     // cached result of GetMBNulLen(), set to 0 initially meaning
2293     // "unknown"
2294     size_t m_minMBCharWidth;
2295 };
2296
2297 #endif // wxHAVE_WIN32_MB2WC
2298
2299 // ============================================================================
2300 // Cocoa conversion classes
2301 // ============================================================================
2302
2303 // DE: Does anyone know the purpose of this code?
2304 // This file is compiled in the base library, so __WXCOCOA__ check is totally wrong
2305 // in the first place.
2306 #if 0 // defined(__WXCOCOA__)
2307
2308 // RN: There is no UTF-32 support in either Core Foundation or Cocoa.
2309 // Strangely enough, internally Core Foundation uses
2310 // UTF-32 internally quite a bit - its just not public (yet).
2311
2312 #include <CoreFoundation/CFString.h>
2313 #include <CoreFoundation/CFStringEncodingExt.h>
2314
2315 CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
2316 {
2317     CFStringEncoding enc = kCFStringEncodingInvalidId ;
2318
2319     switch (encoding)
2320     {
2321         case wxFONTENCODING_DEFAULT :
2322             enc = CFStringGetSystemEncoding();
2323             break ;
2324
2325         case wxFONTENCODING_ISO8859_1 :
2326             enc = kCFStringEncodingISOLatin1 ;
2327             break ;
2328         case wxFONTENCODING_ISO8859_2 :
2329             enc = kCFStringEncodingISOLatin2;
2330             break ;
2331         case wxFONTENCODING_ISO8859_3 :
2332             enc = kCFStringEncodingISOLatin3 ;
2333             break ;
2334         case wxFONTENCODING_ISO8859_4 :
2335             enc = kCFStringEncodingISOLatin4;
2336             break ;
2337         case wxFONTENCODING_ISO8859_5 :
2338             enc = kCFStringEncodingISOLatinCyrillic;
2339             break ;
2340         case wxFONTENCODING_ISO8859_6 :
2341             enc = kCFStringEncodingISOLatinArabic;
2342             break ;
2343         case wxFONTENCODING_ISO8859_7 :
2344             enc = kCFStringEncodingISOLatinGreek;
2345             break ;
2346         case wxFONTENCODING_ISO8859_8 :
2347             enc = kCFStringEncodingISOLatinHebrew;
2348             break ;
2349         case wxFONTENCODING_ISO8859_9 :
2350             enc = kCFStringEncodingISOLatin5;
2351             break ;
2352         case wxFONTENCODING_ISO8859_10 :
2353             enc = kCFStringEncodingISOLatin6;
2354             break ;
2355         case wxFONTENCODING_ISO8859_11 :
2356             enc = kCFStringEncodingISOLatinThai;
2357             break ;
2358         case wxFONTENCODING_ISO8859_13 :
2359             enc = kCFStringEncodingISOLatin7;
2360             break ;
2361         case wxFONTENCODING_ISO8859_14 :
2362             enc = kCFStringEncodingISOLatin8;
2363             break ;
2364         case wxFONTENCODING_ISO8859_15 :
2365             enc = kCFStringEncodingISOLatin9;
2366             break ;
2367
2368         case wxFONTENCODING_KOI8 :
2369             enc = kCFStringEncodingKOI8_R;
2370             break ;
2371         case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
2372             enc = kCFStringEncodingDOSRussian;
2373             break ;
2374
2375 //      case wxFONTENCODING_BULGARIAN :
2376 //          enc = ;
2377 //          break ;
2378
2379         case wxFONTENCODING_CP437 :
2380             enc = kCFStringEncodingDOSLatinUS ;
2381             break ;
2382         case wxFONTENCODING_CP850 :
2383             enc = kCFStringEncodingDOSLatin1;
2384             break ;
2385         case wxFONTENCODING_CP852 :
2386             enc = kCFStringEncodingDOSLatin2;
2387             break ;
2388         case wxFONTENCODING_CP855 :
2389             enc = kCFStringEncodingDOSCyrillic;
2390             break ;
2391         case wxFONTENCODING_CP866 :
2392             enc = kCFStringEncodingDOSRussian ;
2393             break ;
2394         case wxFONTENCODING_CP874 :
2395             enc = kCFStringEncodingDOSThai;
2396             break ;
2397         case wxFONTENCODING_CP932 :
2398             enc = kCFStringEncodingDOSJapanese;
2399             break ;
2400         case wxFONTENCODING_CP936 :
2401             enc = kCFStringEncodingDOSChineseSimplif ;
2402             break ;
2403         case wxFONTENCODING_CP949 :
2404             enc = kCFStringEncodingDOSKorean;
2405             break ;
2406         case wxFONTENCODING_CP950 :
2407             enc = kCFStringEncodingDOSChineseTrad;
2408             break ;
2409         case wxFONTENCODING_CP1250 :
2410             enc = kCFStringEncodingWindowsLatin2;
2411             break ;
2412         case wxFONTENCODING_CP1251 :
2413             enc = kCFStringEncodingWindowsCyrillic ;
2414             break ;
2415         case wxFONTENCODING_CP1252 :
2416             enc = kCFStringEncodingWindowsLatin1 ;
2417             break ;
2418         case wxFONTENCODING_CP1253 :
2419             enc = kCFStringEncodingWindowsGreek;
2420             break ;
2421         case wxFONTENCODING_CP1254 :
2422             enc = kCFStringEncodingWindowsLatin5;
2423             break ;
2424         case wxFONTENCODING_CP1255 :
2425             enc = kCFStringEncodingWindowsHebrew ;
2426             break ;
2427         case wxFONTENCODING_CP1256 :
2428             enc = kCFStringEncodingWindowsArabic ;
2429             break ;
2430         case wxFONTENCODING_CP1257 :
2431             enc = kCFStringEncodingWindowsBalticRim;
2432             break ;
2433 //   This only really encodes to UTF7 (if that) evidently
2434 //        case wxFONTENCODING_UTF7 :
2435 //            enc = kCFStringEncodingNonLossyASCII ;
2436 //            break ;
2437         case wxFONTENCODING_UTF8 :
2438             enc = kCFStringEncodingUTF8 ;
2439             break ;
2440         case wxFONTENCODING_EUC_JP :
2441             enc = kCFStringEncodingEUC_JP;
2442             break ;
2443         case wxFONTENCODING_UTF16 :
2444             enc = kCFStringEncodingUnicode ;
2445             break ;
2446         case wxFONTENCODING_MACROMAN :
2447             enc = kCFStringEncodingMacRoman ;
2448             break ;
2449         case wxFONTENCODING_MACJAPANESE :
2450             enc = kCFStringEncodingMacJapanese ;
2451             break ;
2452         case wxFONTENCODING_MACCHINESETRAD :
2453             enc = kCFStringEncodingMacChineseTrad ;
2454             break ;
2455         case wxFONTENCODING_MACKOREAN :
2456             enc = kCFStringEncodingMacKorean ;
2457             break ;
2458         case wxFONTENCODING_MACARABIC :
2459             enc = kCFStringEncodingMacArabic ;
2460             break ;
2461         case wxFONTENCODING_MACHEBREW :
2462             enc = kCFStringEncodingMacHebrew ;
2463             break ;
2464         case wxFONTENCODING_MACGREEK :
2465             enc = kCFStringEncodingMacGreek ;
2466             break ;
2467         case wxFONTENCODING_MACCYRILLIC :
2468             enc = kCFStringEncodingMacCyrillic ;
2469             break ;
2470         case wxFONTENCODING_MACDEVANAGARI :
2471             enc = kCFStringEncodingMacDevanagari ;
2472             break ;
2473         case wxFONTENCODING_MACGURMUKHI :
2474             enc = kCFStringEncodingMacGurmukhi ;
2475             break ;
2476         case wxFONTENCODING_MACGUJARATI :
2477             enc = kCFStringEncodingMacGujarati ;
2478             break ;
2479         case wxFONTENCODING_MACORIYA :
2480             enc = kCFStringEncodingMacOriya ;
2481             break ;
2482         case wxFONTENCODING_MACBENGALI :
2483             enc = kCFStringEncodingMacBengali ;
2484             break ;
2485         case wxFONTENCODING_MACTAMIL :
2486             enc = kCFStringEncodingMacTamil ;
2487             break ;
2488         case wxFONTENCODING_MACTELUGU :
2489             enc = kCFStringEncodingMacTelugu ;
2490             break ;
2491         case wxFONTENCODING_MACKANNADA :
2492             enc = kCFStringEncodingMacKannada ;
2493             break ;
2494         case wxFONTENCODING_MACMALAJALAM :
2495             enc = kCFStringEncodingMacMalayalam ;
2496             break ;
2497         case wxFONTENCODING_MACSINHALESE :
2498             enc = kCFStringEncodingMacSinhalese ;
2499             break ;
2500         case wxFONTENCODING_MACBURMESE :
2501             enc = kCFStringEncodingMacBurmese ;
2502             break ;
2503         case wxFONTENCODING_MACKHMER :
2504             enc = kCFStringEncodingMacKhmer ;
2505             break ;
2506         case wxFONTENCODING_MACTHAI :
2507             enc = kCFStringEncodingMacThai ;
2508             break ;
2509         case wxFONTENCODING_MACLAOTIAN :
2510             enc = kCFStringEncodingMacLaotian ;
2511             break ;
2512         case wxFONTENCODING_MACGEORGIAN :
2513             enc = kCFStringEncodingMacGeorgian ;
2514             break ;
2515         case wxFONTENCODING_MACARMENIAN :
2516             enc = kCFStringEncodingMacArmenian ;
2517             break ;
2518         case wxFONTENCODING_MACCHINESESIMP :
2519             enc = kCFStringEncodingMacChineseSimp ;
2520             break ;
2521         case wxFONTENCODING_MACTIBETAN :
2522             enc = kCFStringEncodingMacTibetan ;
2523             break ;
2524         case wxFONTENCODING_MACMONGOLIAN :
2525             enc = kCFStringEncodingMacMongolian ;
2526             break ;
2527         case wxFONTENCODING_MACETHIOPIC :
2528             enc = kCFStringEncodingMacEthiopic ;
2529             break ;
2530         case wxFONTENCODING_MACCENTRALEUR :
2531             enc = kCFStringEncodingMacCentralEurRoman ;
2532             break ;
2533         case wxFONTENCODING_MACVIATNAMESE :
2534             enc = kCFStringEncodingMacVietnamese ;
2535             break ;
2536         case wxFONTENCODING_MACARABICEXT :
2537             enc = kCFStringEncodingMacExtArabic ;
2538             break ;
2539         case wxFONTENCODING_MACSYMBOL :
2540             enc = kCFStringEncodingMacSymbol ;
2541             break ;
2542         case wxFONTENCODING_MACDINGBATS :
2543             enc = kCFStringEncodingMacDingbats ;
2544             break ;
2545         case wxFONTENCODING_MACTURKISH :
2546             enc = kCFStringEncodingMacTurkish ;
2547             break ;
2548         case wxFONTENCODING_MACCROATIAN :
2549             enc = kCFStringEncodingMacCroatian ;
2550             break ;
2551         case wxFONTENCODING_MACICELANDIC :
2552             enc = kCFStringEncodingMacIcelandic ;
2553             break ;
2554         case wxFONTENCODING_MACROMANIAN :
2555             enc = kCFStringEncodingMacRomanian ;
2556             break ;
2557         case wxFONTENCODING_MACCELTIC :
2558             enc = kCFStringEncodingMacCeltic ;
2559             break ;
2560         case wxFONTENCODING_MACGAELIC :
2561             enc = kCFStringEncodingMacGaelic ;
2562             break ;
2563 //      case wxFONTENCODING_MACKEYBOARD :
2564 //          enc = kCFStringEncodingMacKeyboardGlyphs ;
2565 //          break ;
2566
2567         default :
2568             // because gcc is picky
2569             break ;
2570     }
2571
2572     return enc ;
2573 }
2574
2575 class wxMBConv_cocoa : public wxMBConv
2576 {
2577 public:
2578     wxMBConv_cocoa()
2579     {
2580         Init(CFStringGetSystemEncoding()) ;
2581     }
2582
2583     wxMBConv_cocoa(const wxMBConv_cocoa& conv)
2584     {
2585         m_encoding = conv.m_encoding;
2586     }
2587
2588 #if wxUSE_FONTMAP
2589     wxMBConv_cocoa(const wxChar* name)
2590     {
2591         Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2592     }
2593 #endif
2594
2595     wxMBConv_cocoa(wxFontEncoding encoding)
2596     {
2597         Init( wxCFStringEncFromFontEnc(encoding) );
2598     }
2599
2600     virtual ~wxMBConv_cocoa()
2601     {
2602     }
2603
2604     void Init( CFStringEncoding encoding)
2605     {
2606         m_encoding = encoding ;
2607     }
2608
2609     size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2610     {
2611         wxASSERT(szUnConv);
2612
2613         CFStringRef theString = CFStringCreateWithBytes (
2614                                                 NULL, //the allocator
2615                                                 (const UInt8*)szUnConv,
2616                                                 strlen(szUnConv),
2617                                                 m_encoding,
2618                                                 false //no BOM/external representation
2619                                                 );
2620
2621         wxASSERT(theString);
2622
2623         size_t nOutLength = CFStringGetLength(theString);
2624
2625         if (szOut == NULL)
2626         {
2627             CFRelease(theString);
2628             return nOutLength;
2629         }
2630
2631         CFRange theRange = { 0, nOutSize };
2632
2633 #if SIZEOF_WCHAR_T == 4
2634         UniChar* szUniCharBuffer = new UniChar[nOutSize];
2635 #endif
2636
2637         CFStringGetCharacters(theString, theRange, szUniCharBuffer);
2638
2639         CFRelease(theString);
2640
2641         szUniCharBuffer[nOutLength] = '\0';
2642
2643 #if SIZEOF_WCHAR_T == 4
2644         wxMBConvUTF16 converter;
2645         converter.MB2WC( szOut, (const char*)szUniCharBuffer, nOutSize );
2646         delete [] szUniCharBuffer;
2647 #endif
2648
2649         return nOutLength;
2650     }
2651
2652     size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2653     {
2654         wxASSERT(szUnConv);
2655
2656         size_t nRealOutSize;
2657         size_t nBufSize = wxWcslen(szUnConv);
2658         UniChar* szUniBuffer = (UniChar*) szUnConv;
2659
2660 #if SIZEOF_WCHAR_T == 4
2661         wxMBConvUTF16 converter ;
2662         nBufSize = converter.WC2MB( NULL, szUnConv, 0 );
2663         szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1];
2664         converter.WC2MB( (char*) szUniBuffer, szUnConv, nBufSize + sizeof(UniChar));
2665         nBufSize /= sizeof(UniChar);
2666 #endif
2667
2668         CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2669                                 NULL, //allocator
2670                                 szUniBuffer,
2671                                 nBufSize,
2672                                 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
2673                             );
2674
2675         wxASSERT(theString);
2676
2677         //Note that CER puts a BOM when converting to unicode
2678         //so we  check and use getchars instead in that case
2679         if (m_encoding == kCFStringEncodingUnicode)
2680         {
2681             if (szOut != NULL)
2682                 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
2683
2684             nRealOutSize = CFStringGetLength(theString) + 1;
2685         }
2686         else
2687         {
2688             CFStringGetBytes(
2689                 theString,
2690                 CFRangeMake(0, CFStringGetLength(theString)),
2691                 m_encoding,
2692                 0, //what to put in characters that can't be converted -
2693                     //0 tells CFString to return NULL if it meets such a character
2694                 false, //not an external representation
2695                 (UInt8*) szOut,
2696                 nOutSize,
2697                 (CFIndex*) &nRealOutSize
2698                         );
2699         }
2700
2701         CFRelease(theString);
2702
2703 #if SIZEOF_WCHAR_T == 4
2704         delete[] szUniBuffer;
2705 #endif
2706
2707         return  nRealOutSize - 1;
2708     }
2709
2710     virtual wxMBConv *Clone() const { return new wxMBConv_cocoa(*this); }
2711
2712     bool IsOk() const
2713     {
2714         return m_encoding != kCFStringEncodingInvalidId &&
2715               CFStringIsEncodingAvailable(m_encoding);
2716     }
2717
2718 private:
2719     CFStringEncoding m_encoding ;
2720 };
2721
2722 #endif // defined(__WXCOCOA__)
2723
2724 // ============================================================================
2725 // Mac conversion classes
2726 // ============================================================================
2727
2728 // DE: Can someone explain to me why this is conditional upon __WXMAC__ instead
2729 // of being used for all Mac OS X systems?  This file is part of the base library
2730 // not the core library.
2731 // If we really need GUI-specific conversions then a better method might be to
2732 // provide something in wxAppTraits that could be implemented in the core library.
2733 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2734
2735 class wxMBConv_mac : public wxMBConv
2736 {
2737 public:
2738     wxMBConv_mac()
2739     {
2740         Init(CFStringGetSystemEncoding()) ;
2741     }
2742
2743     wxMBConv_mac(const wxMBConv_mac& conv)
2744     {
2745         Init(conv.m_char_encoding);
2746     }
2747
2748 #if wxUSE_FONTMAP
2749     wxMBConv_mac(const char* name)
2750     {
2751         Init( wxMacGetSystemEncFromFontEnc( wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) );
2752     }
2753 #endif
2754
2755     wxMBConv_mac(wxFontEncoding encoding)
2756     {
2757         Init( wxMacGetSystemEncFromFontEnc(encoding) );
2758     }
2759
2760     virtual ~wxMBConv_mac()
2761     {
2762         OSStatus status = noErr ;
2763         if (m_MB2WC_converter)
2764             status = TECDisposeConverter(m_MB2WC_converter);
2765         if (m_WC2MB_converter)
2766             status = TECDisposeConverter(m_WC2MB_converter);
2767     }
2768
2769     void Init( TextEncodingBase encoding,TextEncodingVariant encodingVariant = kTextEncodingDefaultVariant ,
2770             TextEncodingFormat encodingFormat = kTextEncodingDefaultFormat)
2771     {
2772         m_MB2WC_converter = NULL ;
2773         m_WC2MB_converter = NULL ;
2774         m_char_encoding = CreateTextEncoding(encoding, encodingVariant, encodingFormat) ;
2775         m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault, 0, kUnicode16BitFormat) ;
2776     }
2777
2778     virtual void CreateIfNeeded() const
2779     {
2780         if ( m_MB2WC_converter == NULL && m_WC2MB_converter == NULL )
2781         {
2782             OSStatus status = noErr ;
2783             status = TECCreateConverter(&m_MB2WC_converter,
2784                                     m_char_encoding,
2785                                     m_unicode_encoding);
2786             wxASSERT_MSG( status == noErr , _("Unable to create TextEncodingConverter")) ;
2787             status = TECCreateConverter(&m_WC2MB_converter,
2788                                     m_unicode_encoding,
2789                                     m_char_encoding);
2790             wxASSERT_MSG( status == noErr , _("Unable to create TextEncodingConverter")) ;
2791         }
2792     }
2793
2794     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2795     {
2796         CreateIfNeeded() ;
2797         OSStatus status = noErr ;
2798         ByteCount byteOutLen ;
2799         ByteCount byteInLen = strlen(psz) + 1;
2800         wchar_t *tbuf = NULL ;
2801         UniChar* ubuf = NULL ;
2802         size_t res = 0 ;
2803
2804         if (buf == NULL)
2805         {
2806             // Apple specs say at least 32
2807             n = wxMax( 32, byteInLen ) ;
2808             tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T ) ;
2809         }
2810
2811         ByteCount byteBufferLen = n * sizeof( UniChar ) ;
2812
2813 #if SIZEOF_WCHAR_T == 4
2814         ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
2815 #else
2816         ubuf = (UniChar*) (buf ? buf : tbuf) ;
2817 #endif
2818
2819         status = TECConvertText(
2820             m_MB2WC_converter, (ConstTextPtr) psz, byteInLen, &byteInLen,
2821             (TextPtr) ubuf, byteBufferLen, &byteOutLen);
2822
2823 #if SIZEOF_WCHAR_T == 4
2824         // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2825         // is not properly terminated we get random characters at the end
2826         ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
2827         wxMBConvUTF16 converter ;
2828         res = converter.MB2WC( (buf ? buf : tbuf), (const char*)ubuf, n ) ;
2829         free( ubuf ) ;
2830 #else
2831         res = byteOutLen / sizeof( UniChar ) ;
2832 #endif
2833
2834         if ( buf == NULL )
2835              free(tbuf) ;
2836
2837         if ( buf  && res < n)
2838             buf[res] = 0;
2839
2840         return res ;
2841     }
2842
2843     size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2844     {
2845         CreateIfNeeded() ;
2846         OSStatus status = noErr ;
2847         ByteCount byteOutLen ;
2848         ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2849
2850         char *tbuf = NULL ;
2851
2852         if (buf == NULL)
2853         {
2854             // Apple specs say at least 32
2855             n = wxMax( 32, ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2856             tbuf = (char*) malloc( n ) ;
2857         }
2858
2859         ByteCount byteBufferLen = n ;
2860         UniChar* ubuf = NULL ;
2861
2862 #if SIZEOF_WCHAR_T == 4
2863         wxMBConvUTF16 converter ;
2864         size_t unicharlen = converter.WC2MB( NULL, psz, 0 ) ;
2865         byteInLen = unicharlen ;
2866         ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2867         converter.WC2MB( (char*) ubuf, psz, unicharlen + 2 ) ;
2868 #else
2869         ubuf = (UniChar*) psz ;
2870 #endif
2871
2872         status = TECConvertText(
2873             m_WC2MB_converter, (ConstTextPtr) ubuf, byteInLen, &byteInLen,
2874             (TextPtr) (buf ? buf : tbuf), byteBufferLen, &byteOutLen);
2875
2876 #if SIZEOF_WCHAR_T == 4
2877         free( ubuf ) ;
2878 #endif
2879
2880         if ( buf == NULL )
2881             free(tbuf) ;
2882
2883         size_t res = byteOutLen ;
2884         if ( buf  && res < n)
2885         {
2886             buf[res] = 0;
2887
2888             //we need to double-trip to verify it didn't insert any ? in place
2889             //of bogus characters
2890             wxWCharBuffer wcBuf(n);
2891             size_t pszlen = wxWcslen(psz);
2892             if ( MB2WC(wcBuf.data(), buf, n) == wxCONV_FAILED ||
2893                         wxWcslen(wcBuf) != pszlen ||
2894                         memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2895             {
2896                 // we didn't obtain the same thing we started from, hence
2897                 // the conversion was lossy and we consider that it failed
2898                 return wxCONV_FAILED;
2899             }
2900         }
2901
2902         return res ;
2903     }
2904
2905     virtual wxMBConv *Clone() const { return new wxMBConv_mac(*this); }
2906
2907     bool IsOk() const
2908     {
2909         CreateIfNeeded() ;
2910         return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL;
2911     }
2912
2913 protected :
2914     mutable TECObjectRef m_MB2WC_converter;
2915     mutable TECObjectRef m_WC2MB_converter;
2916
2917     TextEncodingBase m_char_encoding;
2918     TextEncodingBase m_unicode_encoding;
2919 };
2920
2921 // MB is decomposed (D) normalized UTF8
2922
2923 class wxMBConv_macUTF8D : public wxMBConv_mac
2924 {
2925 public :
2926     wxMBConv_macUTF8D()
2927     {
2928         Init( kTextEncodingUnicodeDefault , kUnicodeNoSubset , kUnicodeUTF8Format ) ;
2929         m_uni = NULL;
2930         m_uniBack = NULL ;
2931     }
2932
2933     virtual ~wxMBConv_macUTF8D()
2934     {
2935         if (m_uni!=NULL)
2936             DisposeUnicodeToTextInfo(&m_uni);
2937         if (m_uniBack!=NULL)
2938             DisposeUnicodeToTextInfo(&m_uniBack);
2939     }
2940
2941     size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2942     {
2943         CreateIfNeeded() ;
2944         OSStatus status = noErr ;
2945         ByteCount byteOutLen ;
2946         ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2947
2948         char *tbuf = NULL ;
2949
2950         if (buf == NULL)
2951         {
2952             // Apple specs say at least 32
2953             n = wxMax( 32, ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2954             tbuf = (char*) malloc( n ) ;
2955         }
2956
2957         ByteCount byteBufferLen = n ;
2958         UniChar* ubuf = NULL ;
2959
2960 #if SIZEOF_WCHAR_T == 4
2961         wxMBConvUTF16 converter ;
2962         size_t unicharlen = converter.WC2MB( NULL, psz, 0 ) ;
2963         byteInLen = unicharlen ;
2964         ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2965         converter.WC2MB( (char*) ubuf, psz, unicharlen + 2 ) ;
2966 #else
2967         ubuf = (UniChar*) psz ;
2968 #endif
2969
2970         // ubuf is a non-decomposed UniChar buffer
2971
2972         ByteCount dcubuflen = byteInLen * 2 + 2 ;
2973         ByteCount dcubufread , dcubufwritten ;
2974         UniChar *dcubuf = (UniChar*) malloc( dcubuflen ) ;
2975
2976         ConvertFromUnicodeToText( m_uni , byteInLen , ubuf ,
2977             kUnicodeDefaultDirectionMask, 0, NULL, NULL, NULL, dcubuflen  , &dcubufread , &dcubufwritten , dcubuf ) ;
2978
2979         // we now convert that decomposed buffer into UTF8
2980
2981         status = TECConvertText(
2982             m_WC2MB_converter, (ConstTextPtr) dcubuf, dcubufwritten, &dcubufread,
2983             (TextPtr) (buf ? buf : tbuf), byteBufferLen, &byteOutLen);
2984
2985         free( dcubuf );
2986
2987 #if SIZEOF_WCHAR_T == 4
2988         free( ubuf ) ;
2989 #endif
2990
2991         if ( buf == NULL )
2992             free(tbuf) ;
2993
2994         size_t res = byteOutLen ;
2995         if ( buf  && res < n)
2996         {
2997             buf[res] = 0;
2998             // don't test for round-trip fidelity yet, we cannot guarantee it yet
2999         }
3000
3001         return res ;
3002     }
3003
3004     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
3005     {
3006         CreateIfNeeded() ;
3007         OSStatus status = noErr ;
3008         ByteCount byteOutLen ;
3009         ByteCount byteInLen = strlen(psz) + 1;
3010         wchar_t *tbuf = NULL ;
3011         UniChar* ubuf = NULL ;
3012         size_t res = 0 ;
3013
3014         if (buf == NULL)
3015         {
3016             // Apple specs say at least 32
3017             n = wxMax( 32, byteInLen ) ;
3018             tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T ) ;
3019         }
3020
3021         ByteCount byteBufferLen = n * sizeof( UniChar ) ;
3022
3023 #if SIZEOF_WCHAR_T == 4
3024         ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
3025 #else
3026         ubuf = (UniChar*) (buf ? buf : tbuf) ;
3027 #endif
3028
3029         ByteCount dcubuflen = byteBufferLen * 2 + 2 ;
3030         ByteCount dcubufread , dcubufwritten ;
3031         UniChar *dcubuf = (UniChar*) malloc( dcubuflen ) ;
3032
3033         status = TECConvertText(
3034                                 m_MB2WC_converter, (ConstTextPtr) psz, byteInLen, &byteInLen,
3035                                 (TextPtr) dcubuf, dcubuflen, &byteOutLen);
3036         // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
3037         // is not properly terminated we get random characters at the end
3038         dcubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
3039
3040         // now from the decomposed UniChar to properly composed uniChar
3041         ConvertFromUnicodeToText( m_uniBack , byteOutLen , dcubuf ,
3042                                   kUnicodeDefaultDirectionMask, 0, NULL, NULL, NULL, dcubuflen  , &dcubufread , &dcubufwritten , ubuf ) ;
3043
3044         free( dcubuf );
3045         byteOutLen = dcubufwritten ;
3046         ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
3047
3048
3049 #if SIZEOF_WCHAR_T == 4
3050         wxMBConvUTF16 converter ;
3051         res = converter.MB2WC( (buf ? buf : tbuf), (const char*)ubuf, n ) ;
3052         free( ubuf ) ;
3053 #else
3054         res = byteOutLen / sizeof( UniChar ) ;
3055 #endif
3056
3057         if ( buf == NULL )
3058             free(tbuf) ;
3059
3060         if ( buf  && res < n)
3061             buf[res] = 0;
3062
3063         return res ;
3064     }
3065
3066     virtual void CreateIfNeeded() const
3067     {
3068         wxMBConv_mac::CreateIfNeeded() ;
3069         if ( m_uni == NULL )
3070         {
3071             m_map.unicodeEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
3072                 kUnicodeNoSubset, kTextEncodingDefaultFormat);
3073             m_map.otherEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
3074                 kUnicodeCanonicalDecompVariant, kTextEncodingDefaultFormat);
3075             m_map.mappingVersion = kUnicodeUseLatestMapping;
3076
3077             OSStatus err = CreateUnicodeToTextInfo(&m_map, &m_uni);
3078             wxASSERT_MSG( err == noErr , _(" Couldn't create the UnicodeConverter")) ;
3079
3080             m_map.unicodeEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
3081                                                        kUnicodeNoSubset, kTextEncodingDefaultFormat);
3082             m_map.otherEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
3083                                                      kUnicodeCanonicalCompVariant, kTextEncodingDefaultFormat);
3084             m_map.mappingVersion = kUnicodeUseLatestMapping;
3085             err = CreateUnicodeToTextInfo(&m_map, &m_uniBack);
3086             wxASSERT_MSG( err == noErr , _(" Couldn't create the UnicodeConverter")) ;
3087         }
3088     }
3089 protected :
3090     mutable UnicodeToTextInfo   m_uni;
3091     mutable UnicodeToTextInfo   m_uniBack;
3092     mutable UnicodeMapping      m_map;
3093 };
3094 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
3095
3096 // ============================================================================
3097 // wxEncodingConverter based conversion classes
3098 // ============================================================================
3099
3100 #if wxUSE_FONTMAP
3101
3102 class wxMBConv_wxwin : public wxMBConv
3103 {
3104 private:
3105     void Init()
3106     {
3107         m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
3108                w2m.Init(wxFONTENCODING_UNICODE, m_enc);
3109     }
3110
3111 public:
3112     // temporarily just use wxEncodingConverter stuff,
3113     // so that it works while a better implementation is built
3114     wxMBConv_wxwin(const char* name)
3115     {
3116         if (name)
3117             m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
3118         else
3119             m_enc = wxFONTENCODING_SYSTEM;
3120
3121         Init();
3122     }
3123
3124     wxMBConv_wxwin(wxFontEncoding enc)
3125     {
3126         m_enc = enc;
3127
3128         Init();
3129     }
3130
3131     size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
3132     {
3133         size_t inbuf = strlen(psz);
3134         if (buf)
3135         {
3136             if (!m2w.Convert(psz, buf))
3137                 return wxCONV_FAILED;
3138         }
3139         return inbuf;
3140     }
3141
3142     size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
3143     {
3144         const size_t inbuf = wxWcslen(psz);
3145         if (buf)
3146         {
3147             if (!w2m.Convert(psz, buf))
3148                 return wxCONV_FAILED;
3149         }
3150
3151         return inbuf;
3152     }
3153
3154     virtual size_t GetMBNulLen() const
3155     {
3156         switch ( m_enc )
3157         {
3158             case wxFONTENCODING_UTF16BE:
3159             case wxFONTENCODING_UTF16LE:
3160                 return 2;
3161
3162             case wxFONTENCODING_UTF32BE:
3163             case wxFONTENCODING_UTF32LE:
3164                 return 4;
3165
3166             default:
3167                 return 1;
3168         }
3169     }
3170
3171     virtual wxMBConv *Clone() const { return new wxMBConv_wxwin(m_enc); }
3172
3173     bool IsOk() const { return m_ok; }
3174
3175 public:
3176     wxFontEncoding m_enc;
3177     wxEncodingConverter m2w, w2m;
3178
3179 private:
3180     // were we initialized successfully?
3181     bool m_ok;
3182
3183     DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
3184 };
3185
3186 // make the constructors available for unit testing
3187 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_wxwin( const char* name )
3188 {
3189     wxMBConv_wxwin* result = new wxMBConv_wxwin( name );
3190     if ( !result->IsOk() )
3191     {
3192         delete result;
3193         return 0;
3194     }
3195
3196     return result;
3197 }
3198
3199 #endif // wxUSE_FONTMAP
3200
3201 // ============================================================================
3202 // wxCSConv implementation
3203 // ============================================================================
3204
3205 void wxCSConv::Init()
3206 {
3207     m_name = NULL;
3208     m_convReal =  NULL;
3209     m_deferred = true;
3210 }
3211
3212 wxCSConv::wxCSConv(const wxString& charset)
3213 {
3214     Init();
3215
3216     if ( !charset.empty() )
3217     {
3218         SetName(charset.ToAscii());
3219     }
3220
3221 #if wxUSE_FONTMAP
3222     m_encoding = wxFontMapperBase::GetEncodingFromName(charset);
3223 #else
3224     m_encoding = wxFONTENCODING_SYSTEM;
3225 #endif
3226 }
3227
3228 wxCSConv::wxCSConv(wxFontEncoding encoding)
3229 {
3230     if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
3231     {
3232         wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
3233
3234         encoding = wxFONTENCODING_SYSTEM;
3235     }
3236
3237     Init();
3238
3239     m_encoding = encoding;
3240 }
3241
3242 wxCSConv::~wxCSConv()
3243 {
3244     Clear();
3245 }
3246
3247 wxCSConv::wxCSConv(const wxCSConv& conv)
3248         : wxMBConv()
3249 {
3250     Init();
3251
3252     SetName(conv.m_name);
3253     m_encoding = conv.m_encoding;
3254 }
3255
3256 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
3257 {
3258     Clear();
3259
3260     SetName(conv.m_name);
3261     m_encoding = conv.m_encoding;
3262
3263     return *this;
3264 }
3265
3266 void wxCSConv::Clear()
3267 {
3268     free(m_name);
3269     delete m_convReal;
3270
3271     m_name = NULL;
3272     m_convReal = NULL;
3273 }
3274
3275 void wxCSConv::SetName(const char *charset)
3276 {
3277     if (charset)
3278     {
3279         m_name = strdup(charset);
3280         m_deferred = true;
3281     }
3282 }
3283
3284 #if wxUSE_FONTMAP
3285
3286 WX_DECLARE_HASH_MAP( wxFontEncoding, wxString, wxIntegerHash, wxIntegerEqual,
3287                      wxEncodingNameCache );
3288
3289 static wxEncodingNameCache gs_nameCache;
3290 #endif
3291
3292 wxMBConv *wxCSConv::DoCreate() const
3293 {
3294 #if wxUSE_FONTMAP
3295     wxLogTrace(TRACE_STRCONV,
3296                wxT("creating conversion for %s"),
3297                (m_name ? m_name
3298                        : (const char*)wxFontMapperBase::GetEncodingName(m_encoding).mb_str()));
3299 #endif // wxUSE_FONTMAP
3300
3301     // check for the special case of ASCII or ISO8859-1 charset: as we have
3302     // special knowledge of it anyhow, we don't need to create a special
3303     // conversion object
3304     if ( m_encoding == wxFONTENCODING_ISO8859_1 ||
3305             m_encoding == wxFONTENCODING_DEFAULT )
3306     {
3307         // don't convert at all
3308         return NULL;
3309     }
3310
3311     // we trust OS to do conversion better than we can so try external
3312     // conversion methods first
3313     //
3314     // the full order is:
3315     //      1. OS conversion (iconv() under Unix or Win32 API)
3316     //      2. hard coded conversions for UTF
3317     //      3. wxEncodingConverter as fall back
3318
3319     // step (1)
3320 #ifdef HAVE_ICONV
3321 #if !wxUSE_FONTMAP
3322     if ( m_name )
3323 #endif // !wxUSE_FONTMAP
3324     {
3325 #if wxUSE_FONTMAP
3326         wxFontEncoding encoding(m_encoding);
3327 #endif
3328
3329         if ( m_name )
3330         {
3331             wxMBConv_iconv *conv = new wxMBConv_iconv(m_name);
3332             if ( conv->IsOk() )
3333                 return conv;
3334
3335             delete conv;
3336
3337 #if wxUSE_FONTMAP
3338             encoding =
3339                 wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
3340 #endif // wxUSE_FONTMAP
3341         }
3342 #if wxUSE_FONTMAP
3343         {
3344             const wxEncodingNameCache::iterator it = gs_nameCache.find(encoding);
3345             if ( it != gs_nameCache.end() )
3346             {
3347                 if ( it->second.empty() )
3348                     return NULL;
3349
3350                 wxMBConv_iconv *conv = new wxMBConv_iconv(it->second.ToAscii());
3351                 if ( conv->IsOk() )
3352                     return conv;
3353
3354                 delete conv;
3355             }
3356
3357             const wxChar** names = wxFontMapperBase::GetAllEncodingNames(encoding);
3358             // CS : in case this does not return valid names (eg for MacRoman)
3359             // encoding got a 'failure' entry in the cache all the same,
3360             // although it just has to be created using a different method, so
3361             // only store failed iconv creation attempts (or perhaps we
3362             // shoulnd't do this at all ?)
3363             if ( names[0] != NULL )
3364             {
3365                 for ( ; *names; ++names )
3366                 {
3367                     // FIXME-UTF8: wxFontMapperBase::GetAllEncodingNames()
3368                     //             will need changes that will obsolete this
3369                     wxString name(*names);
3370                     wxMBConv_iconv *conv = new wxMBConv_iconv(name.ToAscii());
3371                     if ( conv->IsOk() )
3372                     {
3373                         gs_nameCache[encoding] = *names;
3374                         return conv;
3375                     }
3376
3377                     delete conv;
3378                 }
3379
3380                 gs_nameCache[encoding] = _T(""); // cache the failure
3381             }
3382         }
3383 #endif // wxUSE_FONTMAP
3384     }
3385 #endif // HAVE_ICONV
3386
3387 #ifdef wxHAVE_WIN32_MB2WC
3388     {
3389 #if wxUSE_FONTMAP
3390         wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
3391                                       : new wxMBConv_win32(m_encoding);
3392         if ( conv->IsOk() )
3393             return conv;
3394
3395         delete conv;
3396 #else
3397         return NULL;
3398 #endif
3399     }
3400 #endif // wxHAVE_WIN32_MB2WC
3401
3402 #if defined(__WXMAC__)
3403     {
3404         // leave UTF16 and UTF32 to the built-ins of wx
3405         if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
3406             ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
3407         {
3408 #if wxUSE_FONTMAP
3409             wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
3410                                         : new wxMBConv_mac(m_encoding);
3411 #else
3412             wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
3413 #endif
3414             if ( conv->IsOk() )
3415                  return conv;
3416
3417             delete conv;
3418         }
3419     }
3420 #endif
3421
3422 #if 0 //defined(__WXCOCOA__)
3423     {
3424         if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
3425         {
3426 #if wxUSE_FONTMAP
3427             wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
3428                                           : new wxMBConv_cocoa(m_encoding);
3429 #else
3430             wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
3431 #endif
3432
3433             if ( conv->IsOk() )
3434                  return conv;
3435
3436             delete conv;
3437         }
3438     }
3439 #endif
3440     // step (2)
3441     wxFontEncoding enc = m_encoding;
3442 #if wxUSE_FONTMAP
3443     if ( enc == wxFONTENCODING_SYSTEM && m_name )
3444     {
3445         // use "false" to suppress interactive dialogs -- we can be called from
3446         // anywhere and popping up a dialog from here is the last thing we want to
3447         // do
3448         enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
3449     }
3450 #endif // wxUSE_FONTMAP
3451
3452     switch ( enc )
3453     {
3454         case wxFONTENCODING_UTF7:
3455              return new wxMBConvUTF7;
3456
3457         case wxFONTENCODING_UTF8:
3458              return new wxMBConvUTF8;
3459
3460         case wxFONTENCODING_UTF16BE:
3461              return new wxMBConvUTF16BE;
3462
3463         case wxFONTENCODING_UTF16LE:
3464              return new wxMBConvUTF16LE;
3465
3466         case wxFONTENCODING_UTF32BE:
3467              return new wxMBConvUTF32BE;
3468
3469         case wxFONTENCODING_UTF32LE:
3470              return new wxMBConvUTF32LE;
3471
3472         default:
3473              // nothing to do but put here to suppress gcc warnings
3474              break;
3475     }
3476
3477     // step (3)
3478 #if wxUSE_FONTMAP
3479     {
3480         wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
3481                                       : new wxMBConv_wxwin(m_encoding);
3482         if ( conv->IsOk() )
3483             return conv;
3484
3485         delete conv;
3486     }
3487 #endif // wxUSE_FONTMAP
3488
3489     // NB: This is a hack to prevent deadlock. What could otherwise happen
3490     //     in Unicode build: wxConvLocal creation ends up being here
3491     //     because of some failure and logs the error. But wxLog will try to
3492     //     attach a timestamp, for which it will need wxConvLocal (to convert
3493     //     time to char* and then wchar_t*), but that fails, tries to log the
3494     //     error, but wxLog has an (already locked) critical section that
3495     //     guards the static buffer.
3496     static bool alreadyLoggingError = false;
3497     if (!alreadyLoggingError)
3498     {
3499         alreadyLoggingError = true;
3500         wxLogError(_("Cannot convert from the charset '%s'!"),
3501                    m_name ? m_name
3502                       :
3503 #if wxUSE_FONTMAP
3504                          (const char*)wxFontMapperBase::GetEncodingDescription(m_encoding).ToAscii()
3505 #else // !wxUSE_FONTMAP
3506                          (const char*)wxString::Format(_("encoding %i"), m_encoding).ToAscii()
3507 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
3508               );
3509
3510         alreadyLoggingError = false;
3511     }
3512
3513     return NULL;
3514 }
3515
3516 void wxCSConv::CreateConvIfNeeded() const
3517 {
3518     if ( m_deferred )
3519     {
3520         wxCSConv *self = (wxCSConv *)this; // const_cast
3521
3522         // if we don't have neither the name nor the encoding, use the default
3523         // encoding for this system
3524         if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
3525         {
3526 #if wxUSE_INTL
3527             self->m_encoding = wxLocale::GetSystemEncoding();
3528 #else
3529             // fallback to some reasonable default:
3530             self->m_encoding = wxFONTENCODING_ISO8859_1;
3531 #endif // wxUSE_INTL
3532         }
3533
3534         self->m_convReal = DoCreate();
3535         self->m_deferred = false;
3536     }
3537 }
3538
3539 bool wxCSConv::IsOk() const
3540 {
3541     CreateConvIfNeeded();
3542
3543     // special case: no convReal created for wxFONTENCODING_ISO8859_1
3544     if ( m_encoding == wxFONTENCODING_ISO8859_1 )
3545         return true; // always ok as we do it ourselves
3546
3547     // m_convReal->IsOk() is called at its own creation, so we know it must
3548     // be ok if m_convReal is non-NULL
3549     return m_convReal != NULL;
3550 }
3551
3552 size_t wxCSConv::ToWChar(wchar_t *dst, size_t dstLen,
3553                          const char *src, size_t srcLen) const
3554 {
3555     CreateConvIfNeeded();
3556
3557     if (m_convReal)
3558         return m_convReal->ToWChar(dst, dstLen, src, srcLen);
3559
3560     // latin-1 (direct)
3561     return wxMBConv::ToWChar(dst, dstLen, src, srcLen);
3562 }
3563
3564 size_t wxCSConv::FromWChar(char *dst, size_t dstLen,
3565                            const wchar_t *src, size_t srcLen) const
3566 {
3567     CreateConvIfNeeded();
3568
3569     if (m_convReal)
3570         return m_convReal->FromWChar(dst, dstLen, src, srcLen);
3571
3572     // latin-1 (direct)
3573     return wxMBConv::FromWChar(dst, dstLen, src, srcLen);
3574 }
3575
3576 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
3577 {
3578     CreateConvIfNeeded();
3579
3580     if (m_convReal)
3581         return m_convReal->MB2WC(buf, psz, n);
3582
3583     // latin-1 (direct)
3584     size_t len = strlen(psz);
3585
3586     if (buf)
3587     {
3588         for (size_t c = 0; c <= len; c++)
3589             buf[c] = (unsigned char)(psz[c]);
3590     }
3591
3592     return len;
3593 }
3594
3595 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
3596 {
3597     CreateConvIfNeeded();
3598
3599     if (m_convReal)
3600         return m_convReal->WC2MB(buf, psz, n);
3601
3602     // latin-1 (direct)
3603     const size_t len = wxWcslen(psz);
3604     if (buf)
3605     {
3606         for (size_t c = 0; c <= len; c++)
3607         {
3608             if (psz[c] > 0xFF)
3609                 return wxCONV_FAILED;
3610
3611             buf[c] = (char)psz[c];
3612         }
3613     }
3614     else
3615     {
3616         for (size_t c = 0; c <= len; c++)
3617         {
3618             if (psz[c] > 0xFF)
3619                 return wxCONV_FAILED;
3620         }
3621     }
3622
3623     return len;
3624 }
3625
3626 size_t wxCSConv::GetMBNulLen() const
3627 {
3628     CreateConvIfNeeded();
3629
3630     if ( m_convReal )
3631     {
3632         return m_convReal->GetMBNulLen();
3633     }
3634
3635     // otherwise, we are ISO-8859-1
3636     return 1;
3637 }
3638
3639 #if wxUSE_UNICODE_UTF8
3640 bool wxCSConv::IsUTF8() const
3641 {
3642     CreateConvIfNeeded();
3643
3644     if ( m_convReal )
3645     {
3646         return m_convReal->IsUTF8();
3647     }
3648
3649     // otherwise, we are ISO-8859-1
3650     return false;
3651 }
3652 #endif
3653
3654
3655 #if wxUSE_UNICODE
3656
3657 wxWCharBuffer wxSafeConvertMB2WX(const char *s)
3658 {
3659     if ( !s )
3660         return wxWCharBuffer();
3661
3662     wxWCharBuffer wbuf(wxConvLibc.cMB2WX(s));
3663     if ( !wbuf )
3664         wbuf = wxMBConvUTF8().cMB2WX(s);
3665     if ( !wbuf )
3666         wbuf = wxConvISO8859_1.cMB2WX(s);
3667
3668     return wbuf;
3669 }
3670
3671 wxCharBuffer wxSafeConvertWX2MB(const wchar_t *ws)
3672 {
3673     if ( !ws )
3674         return wxCharBuffer();
3675
3676     wxCharBuffer buf(wxConvLibc.cWX2MB(ws));
3677     if ( !buf )
3678         buf = wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL).cWX2MB(ws);
3679
3680     return buf;
3681 }
3682
3683 #endif // wxUSE_UNICODE
3684
3685 // ----------------------------------------------------------------------------
3686 // globals
3687 // ----------------------------------------------------------------------------
3688
3689 // NB: The reason why we create converted objects in this convoluted way,
3690 //     using a factory function instead of global variable, is that they
3691 //     may be used at static initialization time (some of them are used by
3692 //     wxString ctors and there may be a global wxString object). In other
3693 //     words, possibly _before_ the converter global object would be
3694 //     initialized.
3695
3696 #undef wxConvLibc
3697 #undef wxConvUTF8
3698 #undef wxConvUTF7
3699 #undef wxConvLocal
3700 #undef wxConvISO8859_1
3701
3702 #define WX_DEFINE_GLOBAL_CONV2(klass, impl_klass, name, ctor_args)      \
3703     WXDLLIMPEXP_DATA_BASE(klass*) name##Ptr = NULL;                     \
3704     WXDLLIMPEXP_BASE klass* wxGet_##name##Ptr()                         \
3705     {                                                                   \
3706         static impl_klass name##Obj ctor_args;                          \
3707         return &name##Obj;                                              \
3708     }                                                                   \
3709     /* this ensures that all global converter objects are created */    \
3710     /* by the time static initialization is done, i.e. before any */    \
3711     /* thread is launched: */                                           \
3712     static klass* gs_##name##instance = wxGet_##name##Ptr()
3713
3714 #define WX_DEFINE_GLOBAL_CONV(klass, name, ctor_args) \
3715     WX_DEFINE_GLOBAL_CONV2(klass, klass, name, ctor_args)
3716
3717 #ifdef __WINDOWS__
3718     WX_DEFINE_GLOBAL_CONV2(wxMBConv, wxMBConv_win32, wxConvLibc, wxEMPTY_PARAMETER_VALUE);
3719 #elif defined(__WXMAC__) && !defined(__MACH__)
3720     WX_DEFINE_GLOBAL_CONV2(wxMBConv, wxMBConv_mac, wxConvLibc, wxEMPTY_PARAMETER_VALUE);
3721 #else
3722     WX_DEFINE_GLOBAL_CONV2(wxMBConv, wxMBConvLibc, wxConvLibc, wxEMPTY_PARAMETER_VALUE);
3723 #endif
3724
3725 WX_DEFINE_GLOBAL_CONV(wxMBConvUTF8, wxConvUTF8, wxEMPTY_PARAMETER_VALUE);
3726 WX_DEFINE_GLOBAL_CONV(wxMBConvUTF7, wxConvUTF7, wxEMPTY_PARAMETER_VALUE);
3727
3728 WX_DEFINE_GLOBAL_CONV(wxCSConv, wxConvLocal, (wxFONTENCODING_SYSTEM));
3729 WX_DEFINE_GLOBAL_CONV(wxCSConv, wxConvISO8859_1, (wxFONTENCODING_ISO8859_1));
3730
3731 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = wxGet_wxConvLibcPtr();
3732 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvUI = wxGet_wxConvLocalPtr();
3733
3734 #if defined(__WXMAC__) && defined(TARGET_CARBON)
3735 static wxMBConv_macUTF8D wxConvMacUTF8DObj;
3736 #endif
3737 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName =
3738 #ifdef __WXOSX__
3739 #if defined(__WXMAC__) && defined(TARGET_CARBON)
3740                                     &wxConvMacUTF8DObj;
3741 #else
3742                                     wxGet_wxConvUTF8Ptr();
3743 #endif
3744 #else // !__WXOSX__
3745                                     wxGet_wxConvLibcPtr();
3746 #endif // __WXOSX__/!__WXOSX__
3747
3748 #else // !wxUSE_WCHAR_T
3749
3750 // FIXME-UTF8: remove this, wxUSE_WCHAR_T is required now
3751 // stand-ins in absence of wchar_t
3752 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
3753                                 wxConvISO8859_1,
3754                                 wxConvLocal,
3755                                 wxConvUTF8;
3756
3757 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T