src/common/strconv.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/common/strconv.cpp
   3 // Purpose:     Unicode conversion classes
   4 // Author:      Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
   5 //              Ryan Norton, Fredrik Roubert (UTF7)
   6 // Modified by:
   7 // Created:     29/01/98
   8 // RCS-ID:      $Id$
   9 // Copyright:   (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
  10 //              (c) 2000-2003 Vadim Zeitlin
  11 //              (c) 2004 Ryan Norton, Fredrik Roubert
  12 // Licence:     wxWindows licence
  13 /////////////////////////////////////////////////////////////////////////////
  14
  15 // For compilers that support precompilation, includes "wx.h".
  16 #include "wx/wxprec.h"
  17
  18 #ifndef WX_PRECOMP
  19     #include "wx/intl.h"
  20     #include "wx/log.h"
  21     #include "wx/utils.h"
  22     #include "wx/hashmap.h"
  23 #endif
  24
  25 #include "wx/strconv.h"
  26
  27 #if wxUSE_WCHAR_T
  28
  29 #ifdef __WINDOWS__
  30     #include "wx/msw/private.h"
  31     #include "wx/msw/missing.h"
  32 #endif
  33
  34 #ifndef __WXWINCE__
  35 #include <errno.h>
  36 #endif
  37
  38 #include <ctype.h>
  39 #include <string.h>
  40 #include <stdlib.h>
  41
  42 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
  43     #define wxHAVE_WIN32_MB2WC
  44 #endif
  45
  46 #ifdef __SALFORDC__
  47     #include <clib.h>
  48 #endif
  49
  50 #ifdef HAVE_ICONV
  51     #include <iconv.h>
  52     #include "wx/thread.h"
  53 #endif
  54
  55 #include "wx/encconv.h"
  56 #include "wx/fontmap.h"
  57
  58 #ifdef __WXMAC__
  59 #ifndef __DARWIN__
  60 #include <ATSUnicode.h>
  61 #include <TextCommon.h>
  62 #include <TextEncodingConverter.h>
  63 #endif
  64
  65 // includes Mac headers
  66 #include "wx/mac/private.h"
  67 #endif
  68
  69
  70 #define TRACE_STRCONV _T("strconv")
  71
  72 // WC_UTF16 is defined only if sizeof(wchar_t) == 2, otherwise it's supposed to
  73 // be 4 bytes
  74 #if SIZEOF_WCHAR_T == 2
  75     #define WC_UTF16
  76 #endif
  77
  78
  79 // ============================================================================
  80 // implementation
  81 // ============================================================================
  82
  83 // helper function of cMB2WC(): check if n bytes at this location are all NUL
  84 static bool NotAllNULs(const char *p, size_t n)
  85 {
  86     while ( n && *p++ == '\0' )
  87         n--;
  88
  89     return n != 0;
  90 }
  91
  92 // ----------------------------------------------------------------------------
  93 // UTF-16 en/decoding to/from UCS-4 with surrogates handling
  94 // ----------------------------------------------------------------------------
  95
  96 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
  97 {
  98     if (input <= 0xffff)
  99     {
 100         if (output)
 101             *output = (wxUint16) input;
 102
 103         return 1;
 104     }
 105     else if (input >= 0x110000)
 106     {
 107         return wxCONV_FAILED;
 108     }
 109     else
 110     {
 111         if (output)
 112         {
 113             *output++ = (wxUint16) ((input >> 10) + 0xd7c0);
 114             *output = (wxUint16) ((input & 0x3ff) + 0xdc00);
 115         }
 116
 117         return 2;
 118     }
 119 }
 120
 121 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
 122 {
 123     if ((*input < 0xd800) || (*input > 0xdfff))
 124     {
 125         output = *input;
 126         return 1;
 127     }
 128     else if ((input[1] < 0xdc00) || (input[1] > 0xdfff))
 129     {
 130         output = *input;
 131         return wxCONV_FAILED;
 132     }
 133     else
 134     {
 135         output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
 136         return 2;
 137     }
 138 }
 139
 140 #ifdef WC_UTF16
 141     typedef wchar_t wxDecodeSurrogate_t;
 142 #else // !WC_UTF16
 143     typedef wxUint16 wxDecodeSurrogate_t;
 144 #endif // WC_UTF16/!WC_UTF16
 145
 146 // returns the next UTF-32 character from the wchar_t buffer and advances the
 147 // pointer to the character after this one
 148 //
 149 // if an invalid character is found, *pSrc is set to NULL, the caller must
 150 // check for this
 151 static wxUint32 wxDecodeSurrogate(const wxDecodeSurrogate_t **pSrc)
 152 {
 153     wxUint32 out;
 154     const size_t
 155         n = decode_utf16(wx_reinterpret_cast(const wxUint16 *, *pSrc), out);
 156     if ( n == wxCONV_FAILED )
 157         *pSrc = NULL;
 158     else
 159         *pSrc += n;
 160
 161     return out;
 162 }
 163
 164 // ----------------------------------------------------------------------------
 165 // wxMBConv
 166 // ----------------------------------------------------------------------------
 167
 168 size_t
 169 wxMBConv::ToWChar(wchar_t *dst, size_t dstLen,
 170                   const char *src, size_t srcLen) const
 171 {
 172     // although new conversion classes are supposed to implement this function
 173     // directly, the existins ones only implement the old MB2WC() and so, to
 174     // avoid to have to rewrite all conversion classes at once, we provide a
 175     // default (but not efficient) implementation of this one in terms of the
 176     // old function by copying the input to ensure that it's NUL-terminated and
 177     // then using MB2WC() to convert it
 178
 179     // the number of chars [which would be] written to dst [if it were not NULL]
 180     size_t dstWritten = 0;
 181
 182     // the number of NULs terminating this string
 183     size_t nulLen = 0;  // not really needed, but just to avoid warnings
 184
 185     // if we were not given the input size we just have to assume that the
 186     // string is properly terminated as we have no way of knowing how long it
 187     // is anyhow, but if we do have the size check whether there are enough
 188     // NULs at the end
 189     wxCharBuffer bufTmp;
 190     const char *srcEnd;
 191     if ( srcLen != wxNO_LEN )
 192     {
 193         // we need to know how to find the end of this string
 194         nulLen = GetMBNulLen();
 195         if ( nulLen == wxCONV_FAILED )
 196             return wxCONV_FAILED;
 197
 198         // if there are enough NULs we can avoid the copy
 199         if ( srcLen < nulLen || NotAllNULs(src + srcLen - nulLen, nulLen) )
 200         {
 201             // make a copy in order to properly NUL-terminate the string
 202             bufTmp = wxCharBuffer(srcLen + nulLen - 1 /* 1 will be added */);
 203             char * const p = bufTmp.data();
 204             memcpy(p, src, srcLen);
 205             for ( char *s = p + srcLen; s < p + srcLen + nulLen; s++ )
 206                 *s = '\0';
 207
 208             src = bufTmp;
 209         }
 210
 211         srcEnd = src + srcLen;
 212     }
 213     else // quit after the first loop iteration
 214     {
 215         srcEnd = NULL;
 216     }
 217
 218     for ( ;; )
 219     {
 220         // try to convert the current chunk
 221         size_t lenChunk = MB2WC(NULL, src, 0);
 222         if ( lenChunk == wxCONV_FAILED )
 223             return wxCONV_FAILED;
 224
 225         lenChunk++; // for the L'\0' at the end of this chunk
 226
 227         dstWritten += lenChunk;
 228
 229         if ( lenChunk == 1 )
 230         {
 231             // nothing left in the input string, conversion succeeded
 232             break;
 233         }
 234
 235         if ( dst )
 236         {
 237             if ( dstWritten > dstLen )
 238                 return wxCONV_FAILED;
 239
 240             if ( MB2WC(dst, src, lenChunk) == wxCONV_FAILED )
 241                 return wxCONV_FAILED;
 242
 243             dst += lenChunk;
 244         }
 245
 246         if ( !srcEnd )
 247         {
 248             // we convert just one chunk in this case as this is the entire
 249             // string anyhow
 250             break;
 251         }
 252
 253         // advance the input pointer past the end of this chunk
 254         while ( NotAllNULs(src, nulLen) )
 255         {
 256             // notice that we must skip over multiple bytes here as we suppose
 257             // that if NUL takes 2 or 4 bytes, then all the other characters do
 258             // too and so if advanced by a single byte we might erroneously
 259             // detect sequences of NUL bytes in the middle of the input
 260             src += nulLen;
 261         }
 262
 263         src += nulLen; // skipping over its terminator as well
 264
 265         // note that ">=" (and not just "==") is needed here as the terminator
 266         // we skipped just above could be inside or just after the buffer
 267         // delimited by inEnd
 268         if ( src >= srcEnd )
 269             break;
 270     }
 271
 272     return dstWritten;
 273 }
 274
 275 size_t
 276 wxMBConv::FromWChar(char *dst, size_t dstLen,
 277                     const wchar_t *src, size_t srcLen) const
 278 {
 279     // the number of chars [which would be] written to dst [if it were not NULL]
 280     size_t dstWritten = 0;
 281
 282     // make a copy of the input string unless it is already properly
 283     // NUL-terminated
 284     //
 285     // if we don't know its length we have no choice but to assume that it is,
 286     // indeed, properly terminated
 287     wxWCharBuffer bufTmp;
 288     if ( srcLen == wxNO_LEN )
 289     {
 290         srcLen = wxWcslen(src) + 1;
 291     }
 292     else if ( srcLen != 0 && src[srcLen - 1] != L'\0' )
 293     {
 294         // make a copy in order to properly NUL-terminate the string
 295         bufTmp = wxWCharBuffer(srcLen);
 296         memcpy(bufTmp.data(), src, srcLen * sizeof(wchar_t));
 297         src = bufTmp;
 298     }
 299
 300     const size_t lenNul = GetMBNulLen();
 301     for ( const wchar_t * const srcEnd = src + srcLen;
 302           src < srcEnd;
 303           src += wxWcslen(src) + 1 /* skip L'\0' too */ )
 304     {
 305         // try to convert the current chunk
 306         size_t lenChunk = WC2MB(NULL, src, 0);
 307
 308         if ( lenChunk == wxCONV_FAILED )
 309             return wxCONV_FAILED;
 310
 311         lenChunk += lenNul;
 312         dstWritten += lenChunk;
 313
 314         if ( dst )
 315         {
 316             if ( dstWritten > dstLen )
 317                 return wxCONV_FAILED;
 318
 319             if ( WC2MB(dst, src, lenChunk) == wxCONV_FAILED )
 320                 return wxCONV_FAILED;
 321
 322             dst += lenChunk;
 323         }
 324     }
 325
 326     return dstWritten;
 327 }
 328
 329 size_t wxMBConv::MB2WC(wchar_t *outBuff, const char *inBuff, size_t outLen) const
 330 {
 331     size_t rc = ToWChar(outBuff, outLen, inBuff);
 332     if ( rc != wxCONV_FAILED )
 333     {
 334         // ToWChar() returns the buffer length, i.e. including the trailing
 335         // NUL, while this method doesn't take it into account
 336         rc--;
 337     }
 338
 339     return rc;
 340 }
 341
 342 size_t wxMBConv::WC2MB(char *outBuff, const wchar_t *inBuff, size_t outLen) const
 343 {
 344     size_t rc = FromWChar(outBuff, outLen, inBuff);
 345     if ( rc != wxCONV_FAILED )
 346     {
 347         rc -= GetMBNulLen();
 348     }
 349
 350     return rc;
 351 }
 352
 353 wxMBConv::~wxMBConv()
 354 {
 355     // nothing to do here (necessary for Darwin linking probably)
 356 }
 357
 358 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
 359 {
 360     if ( psz )
 361     {
 362         // calculate the length of the buffer needed first
 363         const size_t nLen = MB2WC(NULL, psz, 0);
 364         if ( nLen != wxCONV_FAILED )
 365         {
 366             // now do the actual conversion
 367             wxWCharBuffer buf(nLen /* +1 added implicitly */);
 368
 369             // +1 for the trailing NULL
 370             if ( MB2WC(buf.data(), psz, nLen + 1) != wxCONV_FAILED )
 371                 return buf;
 372         }
 373     }
 374
 375     return wxWCharBuffer();
 376 }
 377
 378 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
 379 {
 380     if ( pwz )
 381     {
 382         const size_t nLen = WC2MB(NULL, pwz, 0);
 383         if ( nLen != wxCONV_FAILED )
 384         {
 385             // extra space for trailing NUL(s)
 386             static const size_t extraLen = GetMaxMBNulLen();
 387
 388             wxCharBuffer buf(nLen + extraLen - 1);
 389             if ( WC2MB(buf.data(), pwz, nLen + extraLen) != wxCONV_FAILED )
 390                 return buf;
 391         }
 392     }
 393
 394     return wxCharBuffer();
 395 }
 396
 397 const wxWCharBuffer
 398 wxMBConv::cMB2WC(const char *inBuff, size_t inLen, size_t *outLen) const
 399 {
 400     const size_t dstLen = ToWChar(NULL, 0, inBuff, inLen);
 401     if ( dstLen != wxCONV_FAILED )
 402     {
 403         wxWCharBuffer wbuf(dstLen - 1);
 404         if ( ToWChar(wbuf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED )
 405         {
 406             if ( outLen )
 407             {
 408                 *outLen = dstLen;
 409                 if ( wbuf[dstLen - 1] == L'\0' )
 410                     (*outLen)--;
 411             }
 412
 413             return wbuf;
 414         }
 415     }
 416
 417     if ( outLen )
 418         *outLen = 0;
 419
 420     return wxWCharBuffer();
 421 }
 422
 423 const wxCharBuffer
 424 wxMBConv::cWC2MB(const wchar_t *inBuff, size_t inLen, size_t *outLen) const
 425 {
 426     size_t dstLen = FromWChar(NULL, 0, inBuff, inLen);
 427     if ( dstLen != wxCONV_FAILED )
 428     {
 429         // special case of empty input: can't allocate 0 size buffer below as
 430         // wxCharBuffer insists on NUL-terminating it
 431         wxCharBuffer buf(dstLen ? dstLen - 1 : 1);
 432         if ( FromWChar(buf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED )
 433         {
 434             if ( outLen )
 435             {
 436                 *outLen = dstLen;
 437
 438                 const size_t nulLen = GetMBNulLen();
 439                 if ( dstLen >= nulLen &&
 440                         !NotAllNULs(buf.data() + dstLen - nulLen, nulLen) )
 441                 {
 442                     // in this case the output is NUL-terminated and we're not
 443                     // supposed to count NUL
 444                     *outLen -= nulLen;
 445                 }
 446             }
 447
 448             return buf;
 449         }
 450     }
 451
 452     if ( outLen )
 453         *outLen = 0;
 454
 455     return wxCharBuffer();
 456 }
 457
 458 // ----------------------------------------------------------------------------
 459 // wxMBConvLibc
 460 // ----------------------------------------------------------------------------
 461
 462 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 463 {
 464     return wxMB2WC(buf, psz, n);
 465 }
 466
 467 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 468 {
 469     return wxWC2MB(buf, psz, n);
 470 }
 471
 472 // ----------------------------------------------------------------------------
 473 // wxConvBrokenFileNames
 474 // ----------------------------------------------------------------------------
 475
 476 #ifdef __UNIX__
 477
 478 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar *charset)
 479 {
 480     if ( !charset || wxStricmp(charset, _T("UTF-8")) == 0
 481                   || wxStricmp(charset, _T("UTF8")) == 0  )
 482         m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
 483     else
 484         m_conv = new wxCSConv(charset);
 485 }
 486
 487 #endif // __UNIX__
 488
 489 // ----------------------------------------------------------------------------
 490 // UTF-7
 491 // ----------------------------------------------------------------------------
 492
 493 // Implementation (C) 2004 Fredrik Roubert
 494
 495 //
 496 // BASE64 decoding table
 497 //
 498 static const unsigned char utf7unb64[] =
 499 {
 500     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 501     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 502     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 503     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 504     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 505     0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
 506     0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
 507     0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 508     0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
 509     0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
 510     0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
 511     0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
 512     0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
 513     0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
 514     0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
 515     0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
 516     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 517     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 518     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 519     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 520     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 521     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 522     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 523     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 524     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 525     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 526     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 527     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 528     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 529     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 530     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 531     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 532 };
 533
 534 size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 535 {
 536     size_t len = 0;
 537
 538     while ( *psz && (!buf || (len < n)) )
 539     {
 540         unsigned char cc = *psz++;
 541         if (cc != '+')
 542         {
 543             // plain ASCII char
 544             if (buf)
 545                 *buf++ = cc;
 546             len++;
 547         }
 548         else if (*psz == '-')
 549         {
 550             // encoded plus sign
 551             if (buf)
 552                 *buf++ = cc;
 553             len++;
 554             psz++;
 555         }
 556         else // start of BASE64 encoded string
 557         {
 558             bool lsb, ok;
 559             unsigned int d, l;
 560             for ( ok = lsb = false, d = 0, l = 0;
 561                   (cc = utf7unb64[(unsigned char)*psz]) != 0xff;
 562                   psz++ )
 563             {
 564                 d <<= 6;
 565                 d += cc;
 566                 for (l += 6; l >= 8; lsb = !lsb)
 567                 {
 568                     unsigned char c = (unsigned char)((d >> (l -= 8)) % 256);
 569                     if (lsb)
 570                     {
 571                         if (buf)
 572                             *buf++ |= c;
 573                         len ++;
 574                     }
 575                     else
 576                     {
 577                         if (buf)
 578                             *buf = (wchar_t)(c << 8);
 579                     }
 580
 581                     ok = true;
 582                 }
 583             }
 584
 585             if ( !ok )
 586             {
 587                 // in valid UTF7 we should have valid characters after '+'
 588                 return wxCONV_FAILED;
 589             }
 590
 591             if (*psz == '-')
 592                 psz++;
 593         }
 594     }
 595
 596     if ( buf && (len < n) )
 597         *buf = '\0';
 598
 599     return len;
 600 }
 601
 602 //
 603 // BASE64 encoding table
 604 //
 605 static const unsigned char utf7enb64[] =
 606 {
 607     'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
 608     'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
 609     'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
 610     'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
 611     'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
 612     'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
 613     'w', 'x', 'y', 'z', '0', '1', '2', '3',
 614     '4', '5', '6', '7', '8', '9', '+', '/'
 615 };
 616
 617 //
 618 // UTF-7 encoding table
 619 //
 620 // 0 - Set D (directly encoded characters)
 621 // 1 - Set O (optional direct characters)
 622 // 2 - whitespace characters (optional)
 623 // 3 - special characters
 624 //
 625 static const unsigned char utf7encode[128] =
 626 {
 627     3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
 628     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 629     2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
 630     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
 631     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 632     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
 633     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 634     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
 635 };
 636
 637 size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 638 {
 639     size_t len = 0;
 640
 641     while (*psz && ((!buf) || (len < n)))
 642     {
 643         wchar_t cc = *psz++;
 644         if (cc < 0x80 && utf7encode[cc] < 1)
 645         {
 646             // plain ASCII char
 647             if (buf)
 648                 *buf++ = (char)cc;
 649
 650             len++;
 651         }
 652 #ifndef WC_UTF16
 653         else if (((wxUint32)cc) > 0xffff)
 654         {
 655             // no surrogate pair generation (yet?)
 656             return wxCONV_FAILED;
 657         }
 658 #endif
 659         else
 660         {
 661             if (buf)
 662                 *buf++ = '+';
 663
 664             len++;
 665             if (cc != '+')
 666             {
 667                 // BASE64 encode string
 668                 unsigned int lsb, d, l;
 669                 for (d = 0, l = 0; /*nothing*/; psz++)
 670                 {
 671                     for (lsb = 0; lsb < 2; lsb ++)
 672                     {
 673                         d <<= 8;
 674                         d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
 675
 676                         for (l += 8; l >= 6; )
 677                         {
 678                             l -= 6;
 679                             if (buf)
 680                                 *buf++ = utf7enb64[(d >> l) % 64];
 681                             len++;
 682                         }
 683                     }
 684
 685                     cc = *psz;
 686                     if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
 687                         break;
 688                 }
 689
 690                 if (l != 0)
 691                 {
 692                     if (buf)
 693                         *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
 694
 695                     len++;
 696                 }
 697             }
 698
 699             if (buf)
 700                 *buf++ = '-';
 701             len++;
 702         }
 703     }
 704
 705     if (buf && (len < n))
 706         *buf = 0;
 707
 708     return len;
 709 }
 710
 711 // ----------------------------------------------------------------------------
 712 // UTF-8
 713 // ----------------------------------------------------------------------------
 714
 715 static wxUint32 utf8_max[]=
 716     { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
 717
 718 // boundaries of the private use area we use to (temporarily) remap invalid
 719 // characters invalid in a UTF-8 encoded string
 720 const wxUint32 wxUnicodePUA = 0x100000;
 721 const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
 722
 723 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 724 {
 725     size_t len = 0;
 726
 727     while (*psz && ((!buf) || (len < n)))
 728     {
 729         const char *opsz = psz;
 730         bool invalid = false;
 731         unsigned char cc = *psz++, fc = cc;
 732         unsigned cnt;
 733         for (cnt = 0; fc & 0x80; cnt++)
 734             fc <<= 1;
 735
 736         if (!cnt)
 737         {
 738             // plain ASCII char
 739             if (buf)
 740                 *buf++ = cc;
 741             len++;
 742
 743             // escape the escape character for octal escapes
 744             if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
 745                     && cc == '\\' && (!buf || len < n))
 746             {
 747                 if (buf)
 748                     *buf++ = cc;
 749                 len++;
 750             }
 751         }
 752         else
 753         {
 754             cnt--;
 755             if (!cnt)
 756             {
 757                 // invalid UTF-8 sequence
 758                 invalid = true;
 759             }
 760             else
 761             {
 762                 unsigned ocnt = cnt - 1;
 763                 wxUint32 res = cc & (0x3f >> cnt);
 764                 while (cnt--)
 765                 {
 766                     cc = *psz;
 767                     if ((cc & 0xC0) != 0x80)
 768                     {
 769                         // invalid UTF-8 sequence
 770                         invalid = true;
 771                         break;
 772                     }
 773
 774                     psz++;
 775                     res = (res << 6) | (cc & 0x3f);
 776                 }
 777
 778                 if (invalid || res <= utf8_max[ocnt])
 779                 {
 780                     // illegal UTF-8 encoding
 781                     invalid = true;
 782                 }
 783                 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
 784                         res >= wxUnicodePUA && res < wxUnicodePUAEnd)
 785                 {
 786                     // if one of our PUA characters turns up externally
 787                     // it must also be treated as an illegal sequence
 788                     // (a bit like you have to escape an escape character)
 789                     invalid = true;
 790                 }
 791                 else
 792                 {
 793 #ifdef WC_UTF16
 794                     // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 795                     size_t pa = encode_utf16(res, (wxUint16 *)buf);
 796                     if (pa == wxCONV_FAILED)
 797                     {
 798                         invalid = true;
 799                     }
 800                     else
 801                     {
 802                         if (buf)
 803                             buf += pa;
 804                         len += pa;
 805                     }
 806 #else // !WC_UTF16
 807                     if (buf)
 808                         *buf++ = (wchar_t)res;
 809                     len++;
 810 #endif // WC_UTF16/!WC_UTF16
 811                 }
 812             }
 813
 814             if (invalid)
 815             {
 816                 if (m_options & MAP_INVALID_UTF8_TO_PUA)
 817                 {
 818                     while (opsz < psz && (!buf || len < n))
 819                     {
 820 #ifdef WC_UTF16
 821                         // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 822                         size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
 823                         wxASSERT(pa != wxCONV_FAILED);
 824                         if (buf)
 825                             buf += pa;
 826                         opsz++;
 827                         len += pa;
 828 #else
 829                         if (buf)
 830                             *buf++ = (wchar_t)(wxUnicodePUA + (unsigned char)*opsz);
 831                         opsz++;
 832                         len++;
 833 #endif
 834                     }
 835                 }
 836                 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
 837                 {
 838                     while (opsz < psz && (!buf || len < n))
 839                     {
 840                         if ( buf && len + 3 < n )
 841                         {
 842                             unsigned char on = *opsz;
 843                             *buf++ = L'\\';
 844                             *buf++ = (wchar_t)( L'0' + on / 0100 );
 845                             *buf++ = (wchar_t)( L'0' + (on % 0100) / 010 );
 846                             *buf++ = (wchar_t)( L'0' + on % 010 );
 847                         }
 848
 849                         opsz++;
 850                         len += 4;
 851                     }
 852                 }
 853                 else // MAP_INVALID_UTF8_NOT
 854                 {
 855                     return wxCONV_FAILED;
 856                 }
 857             }
 858         }
 859     }
 860
 861     if (buf && (len < n))
 862         *buf = 0;
 863
 864     return len;
 865 }
 866
 867 static inline bool isoctal(wchar_t wch)
 868 {
 869     return L'0' <= wch && wch <= L'7';
 870 }
 871
 872 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 873 {
 874     size_t len = 0;
 875
 876     while (*psz && ((!buf) || (len < n)))
 877     {
 878         wxUint32 cc;
 879
 880 #ifdef WC_UTF16
 881         // cast is ok for WC_UTF16
 882         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
 883         psz += (pa == wxCONV_FAILED) ? 1 : pa;
 884 #else
 885         cc = (*psz++) & 0x7fffffff;
 886 #endif
 887
 888         if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
 889                 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
 890         {
 891             if (buf)
 892                 *buf++ = (char)(cc - wxUnicodePUA);
 893             len++;
 894         }
 895         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
 896                     && cc == L'\\' && psz[0] == L'\\' )
 897         {
 898             if (buf)
 899                 *buf++ = (char)cc;
 900             psz++;
 901             len++;
 902         }
 903         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
 904                     cc == L'\\' &&
 905                         isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
 906         {
 907             if (buf)
 908             {
 909                 *buf++ = (char) ((psz[0] - L'0') * 0100 +
 910                                  (psz[1] - L'0') * 010 +
 911                                  (psz[2] - L'0'));
 912             }
 913
 914             psz += 3;
 915             len++;
 916         }
 917         else
 918         {
 919             unsigned cnt;
 920             for (cnt = 0; cc > utf8_max[cnt]; cnt++)
 921             {
 922             }
 923
 924             if (!cnt)
 925             {
 926                 // plain ASCII char
 927                 if (buf)
 928                     *buf++ = (char) cc;
 929                 len++;
 930             }
 931             else
 932             {
 933                 len += cnt + 1;
 934                 if (buf)
 935                 {
 936                     *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
 937                     while (cnt--)
 938                         *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
 939                 }
 940             }
 941         }
 942     }
 943
 944     if (buf && (len < n))
 945         *buf = 0;
 946
 947     return len;
 948 }
 949
 950 // ============================================================================
 951 // UTF-16
 952 // ============================================================================
 953
 954 #ifdef WORDS_BIGENDIAN
 955     #define wxMBConvUTF16straight wxMBConvUTF16BE
 956     #define wxMBConvUTF16swap     wxMBConvUTF16LE
 957 #else
 958     #define wxMBConvUTF16swap     wxMBConvUTF16BE
 959     #define wxMBConvUTF16straight wxMBConvUTF16LE
 960 #endif
 961
 962 /* static */
 963 size_t wxMBConvUTF16Base::GetLength(const char *src, size_t srcLen)
 964 {
 965     if ( srcLen == wxNO_LEN )
 966     {
 967         // count the number of bytes in input, including the trailing NULs
 968         const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
 969         for ( srcLen = 1; *inBuff++; srcLen++ )
 970             ;
 971
 972         srcLen *= BYTES_PER_CHAR;
 973     }
 974     else // we already have the length
 975     {
 976         // we can only convert an entire number of UTF-16 characters
 977         if ( srcLen % BYTES_PER_CHAR )
 978             return wxCONV_FAILED;
 979     }
 980
 981     return srcLen;
 982 }
 983
 984 // case when in-memory representation is UTF-16 too
 985 #ifdef WC_UTF16
 986
 987 // ----------------------------------------------------------------------------
 988 // conversions without endianness change
 989 // ----------------------------------------------------------------------------
 990
 991 size_t
 992 wxMBConvUTF16straight::ToWChar(wchar_t *dst, size_t dstLen,
 993                                const char *src, size_t srcLen) const
 994 {
 995     // set up the scene for using memcpy() (which is presumably more efficient
 996     // than copying the bytes one by one)
 997     srcLen = GetLength(src, srcLen);
 998     if ( srcLen == wxNO_LEN )
 999         return wxCONV_FAILED;
1000
1001     const size_t inLen = srcLen / BYTES_PER_CHAR;
1002     if ( dst )
1003     {
1004         if ( dstLen < inLen )
1005             return wxCONV_FAILED;
1006
1007         memcpy(dst, src, srcLen);
1008     }
1009
1010     return inLen;
1011 }
1012
1013 size_t
1014 wxMBConvUTF16straight::FromWChar(char *dst, size_t dstLen,
1015                                  const wchar_t *src, size_t srcLen) const
1016 {
1017     if ( srcLen == wxNO_LEN )
1018         srcLen = wxWcslen(src) + 1;
1019
1020     srcLen *= BYTES_PER_CHAR;
1021
1022     if ( dst )
1023     {
1024         if ( dstLen < srcLen )
1025             return wxCONV_FAILED;
1026
1027         memcpy(dst, src, srcLen);
1028     }
1029
1030     return srcLen;
1031 }
1032
1033 // ----------------------------------------------------------------------------
1034 // endian-reversing conversions
1035 // ----------------------------------------------------------------------------
1036
1037 size_t
1038 wxMBConvUTF16swap::ToWChar(wchar_t *dst, size_t dstLen,
1039                            const char *src, size_t srcLen) const
1040 {
1041     srcLen = GetLength(src, srcLen);
1042     if ( srcLen == wxNO_LEN )
1043         return wxCONV_FAILED;
1044
1045     srcLen /= BYTES_PER_CHAR;
1046
1047     if ( dst )
1048     {
1049         if ( dstLen < srcLen )
1050             return wxCONV_FAILED;
1051
1052         const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
1053         for ( size_t n = 0; n < srcLen; n++, inBuff++ )
1054         {
1055             *dst++ = wxUINT16_SWAP_ALWAYS(*inBuff);
1056         }
1057     }
1058
1059     return srcLen;
1060 }
1061
1062 size_t
1063 wxMBConvUTF16swap::FromWChar(char *dst, size_t dstLen,
1064                              const wchar_t *src, size_t srcLen) const
1065 {
1066     if ( srcLen == wxNO_LEN )
1067         srcLen = wxWcslen(src) + 1;
1068
1069     srcLen *= BYTES_PER_CHAR;
1070
1071     if ( dst )
1072     {
1073         if ( dstLen < srcLen )
1074             return wxCONV_FAILED;
1075
1076         wxUint16 *outBuff = wx_reinterpret_cast(wxUint16 *, dst);
1077         for ( size_t n = 0; n < srcLen; n += BYTES_PER_CHAR, src++ )
1078         {
1079             *outBuff++ = wxUINT16_SWAP_ALWAYS(*src);
1080         }
1081     }
1082
1083     return srcLen;
1084 }
1085
1086 #else // !WC_UTF16: wchar_t is UTF-32
1087
1088 // ----------------------------------------------------------------------------
1089 // conversions without endianness change
1090 // ----------------------------------------------------------------------------
1091
1092 size_t
1093 wxMBConvUTF16straight::ToWChar(wchar_t *dst, size_t dstLen,
1094                                const char *src, size_t srcLen) const
1095 {
1096     srcLen = GetLength(src, srcLen);
1097     if ( srcLen == wxNO_LEN )
1098         return wxCONV_FAILED;
1099
1100     const size_t inLen = srcLen / BYTES_PER_CHAR;
1101     if ( !dst )
1102     {
1103         // optimization: return maximal space which could be needed for this
1104         // string even if the real size could be smaller if the buffer contains
1105         // any surrogates
1106         return inLen;
1107     }
1108
1109     size_t outLen = 0;
1110     const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
1111     for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; )
1112     {
1113         const wxUint32 ch = wxDecodeSurrogate(&inBuff);
1114         if ( !inBuff )
1115             return wxCONV_FAILED;
1116
1117         if ( ++outLen > dstLen )
1118             return wxCONV_FAILED;
1119
1120         *dst++ = ch;
1121     }
1122
1123
1124     return outLen;
1125 }
1126
1127 size_t
1128 wxMBConvUTF16straight::FromWChar(char *dst, size_t dstLen,
1129                                  const wchar_t *src, size_t srcLen) const
1130 {
1131     if ( srcLen == wxNO_LEN )
1132         srcLen = wxWcslen(src) + 1;
1133
1134     size_t outLen = 0;
1135     wxUint16 *outBuff = wx_reinterpret_cast(wxUint16 *, dst);
1136     for ( size_t n = 0; n < srcLen; n++ )
1137     {
1138         wxUint16 cc[2];
1139         const size_t numChars = encode_utf16(*src++, cc);
1140         if ( numChars == wxCONV_FAILED )
1141             return wxCONV_FAILED;
1142
1143         outLen += numChars * BYTES_PER_CHAR;
1144         if ( outBuff )
1145         {
1146             if ( outLen > dstLen )
1147                 return wxCONV_FAILED;
1148
1149             *outBuff++ = cc[0];
1150             if ( numChars == 2 )
1151             {
1152                 // second character of a surrogate
1153                 *outBuff++ = cc[1];
1154             }
1155         }
1156     }
1157
1158     return outLen;
1159 }
1160
1161 // ----------------------------------------------------------------------------
1162 // endian-reversing conversions
1163 // ----------------------------------------------------------------------------
1164
1165 size_t
1166 wxMBConvUTF16swap::ToWChar(wchar_t *dst, size_t dstLen,
1167                            const char *src, size_t srcLen) const
1168 {
1169     srcLen = GetLength(src, srcLen);
1170     if ( srcLen == wxNO_LEN )
1171         return wxCONV_FAILED;
1172
1173     const size_t inLen = srcLen / BYTES_PER_CHAR;
1174     if ( !dst )
1175     {
1176         // optimization: return maximal space which could be needed for this
1177         // string even if the real size could be smaller if the buffer contains
1178         // any surrogates
1179         return inLen;
1180     }
1181
1182     size_t outLen = 0;
1183     const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
1184     for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; )
1185     {
1186         wxUint32 ch;
1187         wxUint16 tmp[2];
1188
1189         tmp[0] = wxUINT16_SWAP_ALWAYS(*inBuff);
1190         inBuff++;
1191         tmp[1] = wxUINT16_SWAP_ALWAYS(*inBuff);
1192
1193         const size_t numChars = decode_utf16(tmp, ch);
1194         if ( numChars == wxCONV_FAILED )
1195             return wxCONV_FAILED;
1196
1197         if ( numChars == 2 )
1198             inBuff++;
1199
1200         if ( ++outLen > dstLen )
1201             return wxCONV_FAILED;
1202
1203         *dst++ = ch;
1204     }
1205
1206
1207     return outLen;
1208 }
1209
1210 size_t
1211 wxMBConvUTF16swap::FromWChar(char *dst, size_t dstLen,
1212                              const wchar_t *src, size_t srcLen) const
1213 {
1214     if ( srcLen == wxNO_LEN )
1215         srcLen = wxWcslen(src) + 1;
1216
1217     size_t outLen = 0;
1218     wxUint16 *outBuff = wx_reinterpret_cast(wxUint16 *, dst);
1219     for ( const wchar_t *srcEnd = src + srcLen; src < srcEnd; src++ )
1220     {
1221         wxUint16 cc[2];
1222         const size_t numChars = encode_utf16(*src, cc);
1223         if ( numChars == wxCONV_FAILED )
1224             return wxCONV_FAILED;
1225
1226         outLen += numChars * BYTES_PER_CHAR;
1227         if ( outBuff )
1228         {
1229             if ( outLen > dstLen )
1230                 return wxCONV_FAILED;
1231
1232             *outBuff++ = wxUINT16_SWAP_ALWAYS(cc[0]);
1233             if ( numChars == 2 )
1234             {
1235                 // second character of a surrogate
1236                 *outBuff++ = wxUINT16_SWAP_ALWAYS(cc[1]);
1237             }
1238         }
1239     }
1240
1241     return outLen;
1242 }
1243
1244 #endif // WC_UTF16/!WC_UTF16
1245
1246
1247 // ============================================================================
1248 // UTF-32
1249 // ============================================================================
1250
1251 #ifdef WORDS_BIGENDIAN
1252     #define wxMBConvUTF32straight  wxMBConvUTF32BE
1253     #define wxMBConvUTF32swap      wxMBConvUTF32LE
1254 #else
1255     #define wxMBConvUTF32swap      wxMBConvUTF32BE
1256     #define wxMBConvUTF32straight  wxMBConvUTF32LE
1257 #endif
1258
1259
1260 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1261 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1262
1263 /* static */
1264 size_t wxMBConvUTF32Base::GetLength(const char *src, size_t srcLen)
1265 {
1266     if ( srcLen == wxNO_LEN )
1267     {
1268         // count the number of bytes in input, including the trailing NULs
1269         const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1270         for ( srcLen = 1; *inBuff++; srcLen++ )
1271             ;
1272
1273         srcLen *= BYTES_PER_CHAR;
1274     }
1275     else // we already have the length
1276     {
1277         // we can only convert an entire number of UTF-32 characters
1278         if ( srcLen % BYTES_PER_CHAR )
1279             return wxCONV_FAILED;
1280     }
1281
1282     return srcLen;
1283 }
1284
1285 // case when in-memory representation is UTF-16
1286 #ifdef WC_UTF16
1287
1288 // ----------------------------------------------------------------------------
1289 // conversions without endianness change
1290 // ----------------------------------------------------------------------------
1291
1292 size_t
1293 wxMBConvUTF32straight::ToWChar(wchar_t *dst, size_t dstLen,
1294                                const char *src, size_t srcLen) const
1295 {
1296     srcLen = GetLength(src, srcLen);
1297     if ( srcLen == wxNO_LEN )
1298         return wxCONV_FAILED;
1299
1300     const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1301     const size_t inLen = srcLen / BYTES_PER_CHAR;
1302     size_t outLen = 0;
1303     for ( size_t n = 0; n < inLen; n++ )
1304     {
1305         wxUint16 cc[2];
1306         const size_t numChars = encode_utf16(*inBuff++, cc);
1307         if ( numChars == wxCONV_FAILED )
1308             return wxCONV_FAILED;
1309
1310         outLen += numChars;
1311         if ( dst )
1312         {
1313             if ( outLen > dstLen )
1314                 return wxCONV_FAILED;
1315
1316             *dst++ = cc[0];
1317             if ( numChars == 2 )
1318             {
1319                 // second character of a surrogate
1320                 *dst++ = cc[1];
1321             }
1322         }
1323     }
1324
1325     return outLen;
1326 }
1327
1328 size_t
1329 wxMBConvUTF32straight::FromWChar(char *dst, size_t dstLen,
1330                                  const wchar_t *src, size_t srcLen) const
1331 {
1332     if ( srcLen == wxNO_LEN )
1333         srcLen = wxWcslen(src) + 1;
1334
1335     if ( !dst )
1336     {
1337         // optimization: return maximal space which could be needed for this
1338         // string instead of the exact amount which could be less if there are
1339         // any surrogates in the input
1340         //
1341         // we consider that surrogates are rare enough to make it worthwhile to
1342         // avoid running the loop below at the cost of slightly extra memory
1343         // consumption
1344         return srcLen * BYTES_PER_CHAR;
1345     }
1346
1347     wxUint32 *outBuff = wx_reinterpret_cast(wxUint32 *, dst);
1348     size_t outLen = 0;
1349     for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; )
1350     {
1351         const wxUint32 ch = wxDecodeSurrogate(&src);
1352         if ( !src )
1353             return wxCONV_FAILED;
1354
1355         outLen += BYTES_PER_CHAR;
1356
1357         if ( outLen > dstLen )
1358             return wxCONV_FAILED;
1359
1360         *outBuff++ = ch;
1361     }
1362
1363     return outLen;
1364 }
1365
1366 // ----------------------------------------------------------------------------
1367 // endian-reversing conversions
1368 // ----------------------------------------------------------------------------
1369
1370 size_t
1371 wxMBConvUTF32swap::ToWChar(wchar_t *dst, size_t dstLen,
1372                            const char *src, size_t srcLen) const
1373 {
1374     srcLen = GetLength(src, srcLen);
1375     if ( srcLen == wxNO_LEN )
1376         return wxCONV_FAILED;
1377
1378     const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1379     const size_t inLen = srcLen / BYTES_PER_CHAR;
1380     size_t outLen = 0;
1381     for ( size_t n = 0; n < inLen; n++, inBuff++ )
1382     {
1383         wxUint16 cc[2];
1384         const size_t numChars = encode_utf16(wxUINT32_SWAP_ALWAYS(*inBuff), cc);
1385         if ( numChars == wxCONV_FAILED )
1386             return wxCONV_FAILED;
1387
1388         outLen += numChars;
1389         if ( dst )
1390         {
1391             if ( outLen > dstLen )
1392                 return wxCONV_FAILED;
1393
1394             *dst++ = cc[0];
1395             if ( numChars == 2 )
1396             {
1397                 // second character of a surrogate
1398                 *dst++ = cc[1];
1399             }
1400         }
1401     }
1402
1403     return outLen;
1404 }
1405
1406 size_t
1407 wxMBConvUTF32swap::FromWChar(char *dst, size_t dstLen,
1408                              const wchar_t *src, size_t srcLen) const
1409 {
1410     if ( srcLen == wxNO_LEN )
1411         srcLen = wxWcslen(src) + 1;
1412
1413     if ( !dst )
1414     {
1415         // optimization: return maximal space which could be needed for this
1416         // string instead of the exact amount which could be less if there are
1417         // any surrogates in the input
1418         //
1419         // we consider that surrogates are rare enough to make it worthwhile to
1420         // avoid running the loop below at the cost of slightly extra memory
1421         // consumption
1422         return srcLen*BYTES_PER_CHAR;
1423     }
1424
1425     wxUint32 *outBuff = wx_reinterpret_cast(wxUint32 *, dst);
1426     size_t outLen = 0;
1427     for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; )
1428     {
1429         const wxUint32 ch = wxDecodeSurrogate(&src);
1430         if ( !src )
1431             return wxCONV_FAILED;
1432
1433         outLen += BYTES_PER_CHAR;
1434
1435         if ( outLen > dstLen )
1436             return wxCONV_FAILED;
1437
1438         *outBuff++ = wxUINT32_SWAP_ALWAYS(ch);
1439     }
1440
1441     return outLen;
1442 }
1443
1444 #else // !WC_UTF16: wchar_t is UTF-32
1445
1446 // ----------------------------------------------------------------------------
1447 // conversions without endianness change
1448 // ----------------------------------------------------------------------------
1449
1450 size_t
1451 wxMBConvUTF32straight::ToWChar(wchar_t *dst, size_t dstLen,
1452                                const char *src, size_t srcLen) const
1453 {
1454     // use memcpy() as it should be much faster than hand-written loop
1455     srcLen = GetLength(src, srcLen);
1456     if ( srcLen == wxNO_LEN )
1457         return wxCONV_FAILED;
1458
1459     const size_t inLen = srcLen/BYTES_PER_CHAR;
1460     if ( dst )
1461     {
1462         if ( dstLen < inLen )
1463             return wxCONV_FAILED;
1464
1465         memcpy(dst, src, srcLen);
1466     }
1467
1468     return inLen;
1469 }
1470
1471 size_t
1472 wxMBConvUTF32straight::FromWChar(char *dst, size_t dstLen,
1473                                  const wchar_t *src, size_t srcLen) const
1474 {
1475     if ( srcLen == wxNO_LEN )
1476         srcLen = wxWcslen(src) + 1;
1477
1478     srcLen *= BYTES_PER_CHAR;
1479
1480     if ( dst )
1481     {
1482         if ( dstLen < srcLen )
1483             return wxCONV_FAILED;
1484
1485         memcpy(dst, src, srcLen);
1486     }
1487
1488     return srcLen;
1489 }
1490
1491 // ----------------------------------------------------------------------------
1492 // endian-reversing conversions
1493 // ----------------------------------------------------------------------------
1494
1495 size_t
1496 wxMBConvUTF32swap::ToWChar(wchar_t *dst, size_t dstLen,
1497                            const char *src, size_t srcLen) const
1498 {
1499     srcLen = GetLength(src, srcLen);
1500     if ( srcLen == wxNO_LEN )
1501         return wxCONV_FAILED;
1502
1503     srcLen /= BYTES_PER_CHAR;
1504
1505     if ( dst )
1506     {
1507         if ( dstLen < srcLen )
1508             return wxCONV_FAILED;
1509
1510         const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1511         for ( size_t n = 0; n < srcLen; n++, inBuff++ )
1512         {
1513             *dst++ = wxUINT32_SWAP_ALWAYS(*inBuff);
1514         }
1515     }
1516
1517     return srcLen;
1518 }
1519
1520 size_t
1521 wxMBConvUTF32swap::FromWChar(char *dst, size_t dstLen,
1522                              const wchar_t *src, size_t srcLen) const
1523 {
1524     if ( srcLen == wxNO_LEN )
1525         srcLen = wxWcslen(src) + 1;
1526
1527     srcLen *= BYTES_PER_CHAR;
1528
1529     if ( dst )
1530     {
1531         if ( dstLen < srcLen )
1532             return wxCONV_FAILED;
1533
1534         wxUint32 *outBuff = wx_reinterpret_cast(wxUint32 *, dst);
1535         for ( size_t n = 0; n < srcLen; n += BYTES_PER_CHAR, src++ )
1536         {
1537             *outBuff++ = wxUINT32_SWAP_ALWAYS(*src);
1538         }
1539     }
1540
1541     return srcLen;
1542 }
1543
1544 #endif // WC_UTF16/!WC_UTF16
1545
1546
1547 // ============================================================================
1548 // The classes doing conversion using the iconv_xxx() functions
1549 // ============================================================================
1550
1551 #ifdef HAVE_ICONV
1552
1553 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1554 //     E2BIG if output buffer is _exactly_ as big as needed. Such case is
1555 //     (unless there's yet another bug in glibc) the only case when iconv()
1556 //     returns with (size_t)-1 (which means error) and says there are 0 bytes
1557 //     left in the input buffer -- when _real_ error occurs,
1558 //     bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1559 //     iconv() failure.
1560 //     [This bug does not appear in glibc 2.2.]
1561 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1562 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1563                                      (errno != E2BIG || bufLeft != 0))
1564 #else
1565 #define ICONV_FAILED(cres, bufLeft)  (cres == (size_t)-1)
1566 #endif
1567
1568 #define ICONV_CHAR_CAST(x)  ((ICONV_CONST char **)(x))
1569
1570 #define ICONV_T_INVALID ((iconv_t)-1)
1571
1572 #if SIZEOF_WCHAR_T == 4
1573     #define WC_BSWAP    wxUINT32_SWAP_ALWAYS
1574     #define WC_ENC      wxFONTENCODING_UTF32
1575 #elif SIZEOF_WCHAR_T == 2
1576     #define WC_BSWAP    wxUINT16_SWAP_ALWAYS
1577     #define WC_ENC      wxFONTENCODING_UTF16
1578 #else // sizeof(wchar_t) != 2 nor 4
1579     // does this ever happen?
1580     #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1581 #endif
1582
1583 // ----------------------------------------------------------------------------
1584 // wxMBConv_iconv: encapsulates an iconv character set
1585 // ----------------------------------------------------------------------------
1586
1587 class wxMBConv_iconv : public wxMBConv
1588 {
1589 public:
1590     wxMBConv_iconv(const wxChar *name);
1591     virtual ~wxMBConv_iconv();
1592
1593     virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1594     virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
1595
1596     // classify this encoding as explained in wxMBConv::GetMBNulLen() comment
1597     virtual size_t GetMBNulLen() const;
1598
1599     virtual wxMBConv *Clone() const
1600     {
1601         wxMBConv_iconv *p = new wxMBConv_iconv(m_name);
1602         p->m_minMBCharWidth = m_minMBCharWidth;
1603         return p;
1604     }
1605
1606     bool IsOk() const
1607         { return (m2w != ICONV_T_INVALID) && (w2m != ICONV_T_INVALID); }
1608
1609 protected:
1610     // the iconv handlers used to translate from multibyte
1611     // to wide char and in the other direction
1612     iconv_t m2w,
1613             w2m;
1614
1615 #if wxUSE_THREADS
1616     // guards access to m2w and w2m objects
1617     wxMutex m_iconvMutex;
1618 #endif
1619
1620 private:
1621     // the name (for iconv_open()) of a wide char charset -- if none is
1622     // available on this machine, it will remain NULL
1623     static wxString ms_wcCharsetName;
1624
1625     // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1626     // different endian-ness than the native one
1627     static bool ms_wcNeedsSwap;
1628
1629
1630     // name of the encoding handled by this conversion
1631     wxString m_name;
1632
1633     // cached result of GetMBNulLen(); set to 0 meaning "unknown"
1634     // initially
1635     size_t m_minMBCharWidth;
1636 };
1637
1638 // make the constructor available for unit testing
1639 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const wxChar* name )
1640 {
1641     wxMBConv_iconv* result = new wxMBConv_iconv( name );
1642     if ( !result->IsOk() )
1643     {
1644         delete result;
1645         return 0;
1646     }
1647
1648     return result;
1649 }
1650
1651 wxString wxMBConv_iconv::ms_wcCharsetName;
1652 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
1653
1654 wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
1655               : m_name(name)
1656 {
1657     m_minMBCharWidth = 0;
1658
1659     // iconv operates with chars, not wxChars, but luckily it uses only ASCII
1660     // names for the charsets
1661     const wxCharBuffer cname(wxString(name).ToAscii());
1662
1663     // check for charset that represents wchar_t:
1664     if ( ms_wcCharsetName.empty() )
1665     {
1666         wxLogTrace(TRACE_STRCONV, _T("Looking for wide char codeset:"));
1667
1668 #if wxUSE_FONTMAP
1669         const wxChar **names = wxFontMapperBase::GetAllEncodingNames(WC_ENC);
1670 #else // !wxUSE_FONTMAP
1671         static const wxChar *names_static[] =
1672         {
1673 #if SIZEOF_WCHAR_T == 4
1674             _T("UCS-4"),
1675 #elif SIZEOF_WCHAR_T = 2
1676             _T("UCS-2"),
1677 #endif
1678             NULL
1679         };
1680         const wxChar **names = names_static;
1681 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1682
1683         for ( ; *names && ms_wcCharsetName.empty(); ++names )
1684         {
1685             const wxString nameCS(*names);
1686
1687             // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
1688             wxString nameXE(nameCS);
1689
1690 #ifdef WORDS_BIGENDIAN
1691                 nameXE += _T("BE");
1692 #else // little endian
1693                 nameXE += _T("LE");
1694 #endif
1695
1696             wxLogTrace(TRACE_STRCONV, _T("  trying charset \"%s\""),
1697                        nameXE.c_str());
1698
1699             m2w = iconv_open(nameXE.ToAscii(), cname);
1700             if ( m2w == ICONV_T_INVALID )
1701             {
1702                 // try charset w/o bytesex info (e.g. "UCS4")
1703                 wxLogTrace(TRACE_STRCONV, _T("  trying charset \"%s\""),
1704                            nameCS.c_str());
1705                 m2w = iconv_open(nameCS.ToAscii(), cname);
1706
1707                 // and check for bytesex ourselves:
1708                 if ( m2w != ICONV_T_INVALID )
1709                 {
1710                     char    buf[2], *bufPtr;
1711                     wchar_t wbuf[2], *wbufPtr;
1712                     size_t  insz, outsz;
1713                     size_t  res;
1714
1715                     buf[0] = 'A';
1716                     buf[1] = 0;
1717                     wbuf[0] = 0;
1718                     insz = 2;
1719                     outsz = SIZEOF_WCHAR_T * 2;
1720                     wbufPtr = wbuf;
1721                     bufPtr = buf;
1722
1723                     res = iconv(
1724                         m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1725                         (char**)&wbufPtr, &outsz);
1726
1727                     if (ICONV_FAILED(res, insz))
1728                     {
1729                         wxLogLastError(wxT("iconv"));
1730                         wxLogError(_("Conversion to charset '%s' doesn't work."),
1731                                    nameCS.c_str());
1732                     }
1733                     else // ok, can convert to this encoding, remember it
1734                     {
1735                         ms_wcCharsetName = nameCS;
1736                         ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1737                     }
1738                 }
1739             }
1740             else // use charset not requiring byte swapping
1741             {
1742                 ms_wcCharsetName = nameXE;
1743             }
1744         }
1745
1746         wxLogTrace(TRACE_STRCONV,
1747                    wxT("iconv wchar_t charset is \"%s\"%s"),
1748                    ms_wcCharsetName.empty() ? _T("<none>")
1749                                             : ms_wcCharsetName.c_str(),
1750                    ms_wcNeedsSwap ? _T(" (needs swap)")
1751                                   : _T(""));
1752     }
1753     else // we already have ms_wcCharsetName
1754     {
1755         m2w = iconv_open(ms_wcCharsetName.ToAscii(), cname);
1756     }
1757
1758     if ( ms_wcCharsetName.empty() )
1759     {
1760         w2m = ICONV_T_INVALID;
1761     }
1762     else
1763     {
1764         w2m = iconv_open(cname, ms_wcCharsetName.ToAscii());
1765         if ( w2m == ICONV_T_INVALID )
1766         {
1767             wxLogTrace(TRACE_STRCONV,
1768                        wxT("\"%s\" -> \"%s\" works but not the converse!?"),
1769                        ms_wcCharsetName.c_str(), cname.data());
1770         }
1771     }
1772 }
1773
1774 wxMBConv_iconv::~wxMBConv_iconv()
1775 {
1776     if ( m2w != ICONV_T_INVALID )
1777         iconv_close(m2w);
1778     if ( w2m != ICONV_T_INVALID )
1779         iconv_close(w2m);
1780 }
1781
1782 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1783 {
1784     // find the string length: notice that must be done differently for
1785     // NUL-terminated strings and UTF-16/32 which are terminated with 2/4 NULs
1786     size_t inbuf;
1787     const size_t nulLen = GetMBNulLen();
1788     switch ( nulLen )
1789     {
1790         default:
1791             return wxCONV_FAILED;
1792
1793         case 1:
1794             inbuf = strlen(psz); // arguably more optimized than our version
1795             break;
1796
1797         case 2:
1798         case 4:
1799             // for UTF-16/32 not only we need to have 2/4 consecutive NULs but
1800             // they also have to start at character boundary and not span two
1801             // adjacent characters
1802             const char *p;
1803             for ( p = psz; NotAllNULs(p, nulLen); p += nulLen )
1804                 ;
1805             inbuf = p - psz;
1806             break;
1807     }
1808
1809 #if wxUSE_THREADS
1810     // NB: iconv() is MT-safe, but each thread must use its own iconv_t handle.
1811     //     Unfortunately there are a couple of global wxCSConv objects such as
1812     //     wxConvLocal that are used all over wx code, so we have to make sure
1813     //     the handle is used by at most one thread at the time. Otherwise
1814     //     only a few wx classes would be safe to use from non-main threads
1815     //     as MB<->WC conversion would fail "randomly".
1816     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1817 #endif // wxUSE_THREADS
1818
1819     size_t outbuf = n * SIZEOF_WCHAR_T;
1820     size_t res, cres;
1821     // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1822     wchar_t *bufPtr = buf;
1823     const char *pszPtr = psz;
1824
1825     if (buf)
1826     {
1827         // have destination buffer, convert there
1828         cres = iconv(m2w,
1829                      ICONV_CHAR_CAST(&pszPtr), &inbuf,
1830                      (char**)&bufPtr, &outbuf);
1831         res = n - (outbuf / SIZEOF_WCHAR_T);
1832
1833         if (ms_wcNeedsSwap)
1834         {
1835             // convert to native endianness
1836             for ( unsigned i = 0; i < res; i++ )
1837                 buf[n] = WC_BSWAP(buf[i]);
1838         }
1839
1840         // NUL-terminate the string if there is any space left
1841         if (res < n)
1842             buf[res] = 0;
1843     }
1844     else
1845     {
1846         // no destination buffer... convert using temp buffer
1847         // to calculate destination buffer requirement
1848         wchar_t tbuf[8];
1849         res = 0;
1850
1851         do
1852         {
1853             bufPtr = tbuf;
1854             outbuf = 8 * SIZEOF_WCHAR_T;
1855
1856             cres = iconv(m2w,
1857                          ICONV_CHAR_CAST(&pszPtr), &inbuf,
1858                          (char**)&bufPtr, &outbuf );
1859
1860             res += 8 - (outbuf / SIZEOF_WCHAR_T);
1861         }
1862         while ((cres == (size_t)-1) && (errno == E2BIG));
1863     }
1864
1865     if (ICONV_FAILED(cres, inbuf))
1866     {
1867         //VS: it is ok if iconv fails, hence trace only
1868         wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1869         return wxCONV_FAILED;
1870     }
1871
1872     return res;
1873 }
1874
1875 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1876 {
1877 #if wxUSE_THREADS
1878     // NB: explained in MB2WC
1879     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1880 #endif
1881
1882     size_t inlen = wxWcslen(psz);
1883     size_t inbuf = inlen * SIZEOF_WCHAR_T;
1884     size_t outbuf = n;
1885     size_t res, cres;
1886
1887     wchar_t *tmpbuf = 0;
1888
1889     if (ms_wcNeedsSwap)
1890     {
1891         // need to copy to temp buffer to switch endianness
1892         // (doing WC_BSWAP twice on the original buffer won't help, as it
1893         //  could be in read-only memory, or be accessed in some other thread)
1894         tmpbuf = (wchar_t *)malloc(inbuf + SIZEOF_WCHAR_T);
1895         for ( size_t i = 0; i < inlen; i++ )
1896             tmpbuf[n] = WC_BSWAP(psz[i]);
1897
1898         tmpbuf[inlen] = L'\0';
1899         psz = tmpbuf;
1900     }
1901
1902     if (buf)
1903     {
1904         // have destination buffer, convert there
1905         cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1906
1907         res = n - outbuf;
1908
1909         // NB: iconv was given only wcslen(psz) characters on input, and so
1910         //     it couldn't convert the trailing zero. Let's do it ourselves
1911         //     if there's some room left for it in the output buffer.
1912         if (res < n)
1913             buf[0] = 0;
1914     }
1915     else
1916     {
1917         // no destination buffer: convert using temp buffer
1918         // to calculate destination buffer requirement
1919         char tbuf[16];
1920         res = 0;
1921         do
1922         {
1923             buf = tbuf;
1924             outbuf = 16;
1925
1926             cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1927
1928             res += 16 - outbuf;
1929         }
1930         while ((cres == (size_t)-1) && (errno == E2BIG));
1931     }
1932
1933     if (ms_wcNeedsSwap)
1934     {
1935         free(tmpbuf);
1936     }
1937
1938     if (ICONV_FAILED(cres, inbuf))
1939     {
1940         wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1941         return wxCONV_FAILED;
1942     }
1943
1944     return res;
1945 }
1946
1947 size_t wxMBConv_iconv::GetMBNulLen() const
1948 {
1949     if ( m_minMBCharWidth == 0 )
1950     {
1951         wxMBConv_iconv * const self = wxConstCast(this, wxMBConv_iconv);
1952
1953 #if wxUSE_THREADS
1954         // NB: explained in MB2WC
1955         wxMutexLocker lock(self->m_iconvMutex);
1956 #endif
1957
1958         wchar_t *wnul = L"";
1959         char buf[8]; // should be enough for NUL in any encoding
1960         size_t inLen = sizeof(wchar_t),
1961                outLen = WXSIZEOF(buf);
1962         char *inBuff = (char *)wnul;
1963         char *outBuff = buf;
1964         if ( iconv(w2m, ICONV_CHAR_CAST(&inBuff), &inLen, &outBuff, &outLen) == (size_t)-1 )
1965         {
1966             self->m_minMBCharWidth = (size_t)-1;
1967         }
1968         else // ok
1969         {
1970             self->m_minMBCharWidth = outBuff - buf;
1971         }
1972     }
1973
1974     return m_minMBCharWidth;
1975 }
1976
1977 #endif // HAVE_ICONV
1978
1979
1980 // ============================================================================
1981 // Win32 conversion classes
1982 // ============================================================================
1983
1984 #ifdef wxHAVE_WIN32_MB2WC
1985
1986 // from utils.cpp
1987 #if wxUSE_FONTMAP
1988 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1989 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1990 #endif
1991
1992 class wxMBConv_win32 : public wxMBConv
1993 {
1994 public:
1995     wxMBConv_win32()
1996     {
1997         m_CodePage = CP_ACP;
1998         m_minMBCharWidth = 0;
1999     }
2000
2001     wxMBConv_win32(const wxMBConv_win32& conv)
2002         : wxMBConv()
2003     {
2004         m_CodePage = conv.m_CodePage;
2005         m_minMBCharWidth = conv.m_minMBCharWidth;
2006     }
2007
2008 #if wxUSE_FONTMAP
2009     wxMBConv_win32(const wxChar* name)
2010     {
2011         m_CodePage = wxCharsetToCodepage(name);
2012         m_minMBCharWidth = 0;
2013     }
2014
2015     wxMBConv_win32(wxFontEncoding encoding)
2016     {
2017         m_CodePage = wxEncodingToCodepage(encoding);
2018         m_minMBCharWidth = 0;
2019     }
2020 #endif // wxUSE_FONTMAP
2021
2022     virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2023     {
2024         // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
2025         // the behaviour is not compatible with the Unix version (using iconv)
2026         // and break the library itself, e.g. wxTextInputStream::NextChar()
2027         // wouldn't work if reading an incomplete MB char didn't result in an
2028         // error
2029         //
2030         // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or
2031         // Win XP or newer and it is not supported for UTF-[78] so we always
2032         // use our own conversions in this case. See
2033         //     http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx
2034         //     http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp
2035         if ( m_CodePage == CP_UTF8 )
2036         {
2037             return wxConvUTF8.MB2WC(buf, psz, n);
2038         }
2039
2040         if ( m_CodePage == CP_UTF7 )
2041         {
2042             return wxConvUTF7.MB2WC(buf, psz, n);
2043         }
2044
2045         int flags = 0;
2046         if ( (m_CodePage < 50000 && m_CodePage != CP_SYMBOL) &&
2047                 IsAtLeastWin2kSP4() )
2048         {
2049             flags = MB_ERR_INVALID_CHARS;
2050         }
2051
2052         const size_t len = ::MultiByteToWideChar
2053                              (
2054                                 m_CodePage,     // code page
2055                                 flags,          // flags: fall on error
2056                                 psz,            // input string
2057                                 -1,             // its length (NUL-terminated)
2058                                 buf,            // output string
2059                                 buf ? n : 0     // size of output buffer
2060                              );
2061         if ( !len )
2062         {
2063             // function totally failed
2064             return wxCONV_FAILED;
2065         }
2066
2067         // if we were really converting and didn't use MB_ERR_INVALID_CHARS,
2068         // check if we succeeded, by doing a double trip:
2069         if ( !flags && buf )
2070         {
2071             const size_t mbLen = strlen(psz);
2072             wxCharBuffer mbBuf(mbLen);
2073             if ( ::WideCharToMultiByte
2074                    (
2075                       m_CodePage,
2076                       0,
2077                       buf,
2078                       -1,
2079                       mbBuf.data(),
2080                       mbLen + 1,        // size in bytes, not length
2081                       NULL,
2082                       NULL
2083                    ) == 0 ||
2084                   strcmp(mbBuf, psz) != 0 )
2085             {
2086                 // we didn't obtain the same thing we started from, hence
2087                 // the conversion was lossy and we consider that it failed
2088                 return wxCONV_FAILED;
2089             }
2090         }
2091
2092         // note that it returns count of written chars for buf != NULL and size
2093         // of the needed buffer for buf == NULL so in either case the length of
2094         // the string (which never includes the terminating NUL) is one less
2095         return len - 1;
2096     }
2097
2098     virtual size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
2099     {
2100         /*
2101             we have a problem here: by default, WideCharToMultiByte() may
2102             replace characters unrepresentable in the target code page with bad
2103             quality approximations such as turning "1/2" symbol (U+00BD) into
2104             "1" for the code pages which don't have it and we, obviously, want
2105             to avoid this at any price
2106
2107             the trouble is that this function does it _silently_, i.e. it won't
2108             even tell us whether it did or not... Win98/2000 and higher provide
2109             WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
2110             we have to resort to a round trip, i.e. check that converting back
2111             results in the same string -- this is, of course, expensive but
2112             otherwise we simply can't be sure to not garble the data.
2113          */
2114
2115         // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
2116         // it doesn't work with CJK encodings (which we test for rather roughly
2117         // here...) nor with UTF-7/8 nor, of course, with Windows versions not
2118         // supporting it
2119         BOOL usedDef wxDUMMY_INITIALIZE(false);
2120         BOOL *pUsedDef;
2121         int flags;
2122         if ( CanUseNoBestFit() && m_CodePage < 50000 )
2123         {
2124             // it's our lucky day
2125             flags = WC_NO_BEST_FIT_CHARS;
2126             pUsedDef = &usedDef;
2127         }
2128         else // old system or unsupported encoding
2129         {
2130             flags = 0;
2131             pUsedDef = NULL;
2132         }
2133
2134         const size_t len = ::WideCharToMultiByte
2135                              (
2136                                 m_CodePage,     // code page
2137                                 flags,          // either none or no best fit
2138                                 pwz,            // input string
2139                                 -1,             // it is (wide) NUL-terminated
2140                                 buf,            // output buffer
2141                                 buf ? n : 0,    // and its size
2142                                 NULL,           // default "replacement" char
2143                                 pUsedDef        // [out] was it used?
2144                              );
2145
2146         if ( !len )
2147         {
2148             // function totally failed
2149             return wxCONV_FAILED;
2150         }
2151
2152         // if we were really converting, check if we succeeded
2153         if ( buf )
2154         {
2155             if ( flags )
2156             {
2157                 // check if the conversion failed, i.e. if any replacements
2158                 // were done
2159                 if ( usedDef )
2160                     return wxCONV_FAILED;
2161             }
2162             else // we must resort to double tripping...
2163             {
2164                 wxWCharBuffer wcBuf(n);
2165                 if ( MB2WC(wcBuf.data(), buf, n) == wxCONV_FAILED ||
2166                         wcscmp(wcBuf, pwz) != 0 )
2167                 {
2168                     // we didn't obtain the same thing we started from, hence
2169                     // the conversion was lossy and we consider that it failed
2170                     return wxCONV_FAILED;
2171                 }
2172             }
2173         }
2174
2175         // see the comment above for the reason of "len - 1"
2176         return len - 1;
2177     }
2178
2179     virtual size_t GetMBNulLen() const
2180     {
2181         if ( m_minMBCharWidth == 0 )
2182         {
2183             int len = ::WideCharToMultiByte
2184                         (
2185                             m_CodePage,     // code page
2186                             0,              // no flags
2187                             L"",            // input string
2188                             1,              // translate just the NUL
2189                             NULL,           // output buffer
2190                             0,              // and its size
2191                             NULL,           // no replacement char
2192                             NULL            // [out] don't care if it was used
2193                         );
2194
2195             wxMBConv_win32 * const self = wxConstCast(this, wxMBConv_win32);
2196             switch ( len )
2197             {
2198                 default:
2199                     wxLogDebug(_T("Unexpected NUL length %d"), len);
2200                     self->m_minMBCharWidth = (size_t)-1;
2201                     break;
2202
2203                 case 0:
2204                     self->m_minMBCharWidth = (size_t)-1;
2205                     break;
2206
2207                 case 1:
2208                 case 2:
2209                 case 4:
2210                     self->m_minMBCharWidth = len;
2211                     break;
2212             }
2213         }
2214
2215         return m_minMBCharWidth;
2216     }
2217
2218     virtual wxMBConv *Clone() const { return new wxMBConv_win32(*this); }
2219
2220     bool IsOk() const { return m_CodePage != -1; }
2221
2222 private:
2223     static bool CanUseNoBestFit()
2224     {
2225         static int s_isWin98Or2k = -1;
2226
2227         if ( s_isWin98Or2k == -1 )
2228         {
2229             int verMaj, verMin;
2230             switch ( wxGetOsVersion(&verMaj, &verMin) )
2231             {
2232                 case wxOS_WINDOWS_9X:
2233                     s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
2234                     break;
2235
2236                 case wxOS_WINDOWS_NT:
2237                     s_isWin98Or2k = verMaj >= 5;
2238                     break;
2239
2240                 default:
2241                     // unknown: be conservative by default
2242                     s_isWin98Or2k = 0;
2243                     break;
2244             }
2245
2246             wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
2247         }
2248
2249         return s_isWin98Or2k == 1;
2250     }
2251
2252     static bool IsAtLeastWin2kSP4()
2253     {
2254 #ifdef __WXWINCE__
2255         return false;
2256 #else
2257         static int s_isAtLeastWin2kSP4 = -1;
2258
2259         if ( s_isAtLeastWin2kSP4 == -1 )
2260         {
2261             OSVERSIONINFOEX ver;
2262
2263             memset(&ver, 0, sizeof(ver));
2264             ver.dwOSVersionInfoSize = sizeof(ver);
2265             GetVersionEx((OSVERSIONINFO*)&ver);
2266
2267             s_isAtLeastWin2kSP4 =
2268               ((ver.dwMajorVersion > 5) || // Vista+
2269                (ver.dwMajorVersion == 5 && ver.dwMinorVersion > 0) || // XP/2003
2270                (ver.dwMajorVersion == 5 && ver.dwMinorVersion == 0 &&
2271                ver.wServicePackMajor >= 4)) // 2000 SP4+
2272               ? 1 : 0;
2273         }
2274
2275         return s_isAtLeastWin2kSP4 == 1;
2276 #endif
2277     }
2278
2279
2280     // the code page we're working with
2281     long m_CodePage;
2282
2283     // cached result of GetMBNulLen(), set to 0 initially meaning
2284     // "unknown"
2285     size_t m_minMBCharWidth;
2286 };
2287
2288 #endif // wxHAVE_WIN32_MB2WC
2289
2290 // ============================================================================
2291 // Cocoa conversion classes
2292 // ============================================================================
2293
2294 #if defined(__WXCOCOA__)
2295
2296 // RN: There is no UTF-32 support in either Core Foundation or Cocoa.
2297 // Strangely enough, internally Core Foundation uses
2298 // UTF-32 internally quite a bit - its just not public (yet).
2299
2300 #include <CoreFoundation/CFString.h>
2301 #include <CoreFoundation/CFStringEncodingExt.h>
2302
2303 CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
2304 {
2305     CFStringEncoding enc = kCFStringEncodingInvalidId ;
2306
2307     switch (encoding)
2308     {
2309         case wxFONTENCODING_DEFAULT :
2310             enc = CFStringGetSystemEncoding();
2311             break ;
2312
2313         case wxFONTENCODING_ISO8859_1 :
2314             enc = kCFStringEncodingISOLatin1 ;
2315             break ;
2316         case wxFONTENCODING_ISO8859_2 :
2317             enc = kCFStringEncodingISOLatin2;
2318             break ;
2319         case wxFONTENCODING_ISO8859_3 :
2320             enc = kCFStringEncodingISOLatin3 ;
2321             break ;
2322         case wxFONTENCODING_ISO8859_4 :
2323             enc = kCFStringEncodingISOLatin4;
2324             break ;
2325         case wxFONTENCODING_ISO8859_5 :
2326             enc = kCFStringEncodingISOLatinCyrillic;
2327             break ;
2328         case wxFONTENCODING_ISO8859_6 :
2329             enc = kCFStringEncodingISOLatinArabic;
2330             break ;
2331         case wxFONTENCODING_ISO8859_7 :
2332             enc = kCFStringEncodingISOLatinGreek;
2333             break ;
2334         case wxFONTENCODING_ISO8859_8 :
2335             enc = kCFStringEncodingISOLatinHebrew;
2336             break ;
2337         case wxFONTENCODING_ISO8859_9 :
2338             enc = kCFStringEncodingISOLatin5;
2339             break ;
2340         case wxFONTENCODING_ISO8859_10 :
2341             enc = kCFStringEncodingISOLatin6;
2342             break ;
2343         case wxFONTENCODING_ISO8859_11 :
2344             enc = kCFStringEncodingISOLatinThai;
2345             break ;
2346         case wxFONTENCODING_ISO8859_13 :
2347             enc = kCFStringEncodingISOLatin7;
2348             break ;
2349         case wxFONTENCODING_ISO8859_14 :
2350             enc = kCFStringEncodingISOLatin8;
2351             break ;
2352         case wxFONTENCODING_ISO8859_15 :
2353             enc = kCFStringEncodingISOLatin9;
2354             break ;
2355
2356         case wxFONTENCODING_KOI8 :
2357             enc = kCFStringEncodingKOI8_R;
2358             break ;
2359         case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
2360             enc = kCFStringEncodingDOSRussian;
2361             break ;
2362
2363 //      case wxFONTENCODING_BULGARIAN :
2364 //          enc = ;
2365 //          break ;
2366
2367         case wxFONTENCODING_CP437 :
2368             enc = kCFStringEncodingDOSLatinUS ;
2369             break ;
2370         case wxFONTENCODING_CP850 :
2371             enc = kCFStringEncodingDOSLatin1;
2372             break ;
2373         case wxFONTENCODING_CP852 :
2374             enc = kCFStringEncodingDOSLatin2;
2375             break ;
2376         case wxFONTENCODING_CP855 :
2377             enc = kCFStringEncodingDOSCyrillic;
2378             break ;
2379         case wxFONTENCODING_CP866 :
2380             enc = kCFStringEncodingDOSRussian ;
2381             break ;
2382         case wxFONTENCODING_CP874 :
2383             enc = kCFStringEncodingDOSThai;
2384             break ;
2385         case wxFONTENCODING_CP932 :
2386             enc = kCFStringEncodingDOSJapanese;
2387             break ;
2388         case wxFONTENCODING_CP936 :
2389             enc = kCFStringEncodingDOSChineseSimplif ;
2390             break ;
2391         case wxFONTENCODING_CP949 :
2392             enc = kCFStringEncodingDOSKorean;
2393             break ;
2394         case wxFONTENCODING_CP950 :
2395             enc = kCFStringEncodingDOSChineseTrad;
2396             break ;
2397         case wxFONTENCODING_CP1250 :
2398             enc = kCFStringEncodingWindowsLatin2;
2399             break ;
2400         case wxFONTENCODING_CP1251 :
2401             enc = kCFStringEncodingWindowsCyrillic ;
2402             break ;
2403         case wxFONTENCODING_CP1252 :
2404             enc = kCFStringEncodingWindowsLatin1 ;
2405             break ;
2406         case wxFONTENCODING_CP1253 :
2407             enc = kCFStringEncodingWindowsGreek;
2408             break ;
2409         case wxFONTENCODING_CP1254 :
2410             enc = kCFStringEncodingWindowsLatin5;
2411             break ;
2412         case wxFONTENCODING_CP1255 :
2413             enc = kCFStringEncodingWindowsHebrew ;
2414             break ;
2415         case wxFONTENCODING_CP1256 :
2416             enc = kCFStringEncodingWindowsArabic ;
2417             break ;
2418         case wxFONTENCODING_CP1257 :
2419             enc = kCFStringEncodingWindowsBalticRim;
2420             break ;
2421 //   This only really encodes to UTF7 (if that) evidently
2422 //        case wxFONTENCODING_UTF7 :
2423 //            enc = kCFStringEncodingNonLossyASCII ;
2424 //            break ;
2425         case wxFONTENCODING_UTF8 :
2426             enc = kCFStringEncodingUTF8 ;
2427             break ;
2428         case wxFONTENCODING_EUC_JP :
2429             enc = kCFStringEncodingEUC_JP;
2430             break ;
2431         case wxFONTENCODING_UTF16 :
2432             enc = kCFStringEncodingUnicode ;
2433             break ;
2434         case wxFONTENCODING_MACROMAN :
2435             enc = kCFStringEncodingMacRoman ;
2436             break ;
2437         case wxFONTENCODING_MACJAPANESE :
2438             enc = kCFStringEncodingMacJapanese ;
2439             break ;
2440         case wxFONTENCODING_MACCHINESETRAD :
2441             enc = kCFStringEncodingMacChineseTrad ;
2442             break ;
2443         case wxFONTENCODING_MACKOREAN :
2444             enc = kCFStringEncodingMacKorean ;
2445             break ;
2446         case wxFONTENCODING_MACARABIC :
2447             enc = kCFStringEncodingMacArabic ;
2448             break ;
2449         case wxFONTENCODING_MACHEBREW :
2450             enc = kCFStringEncodingMacHebrew ;
2451             break ;
2452         case wxFONTENCODING_MACGREEK :
2453             enc = kCFStringEncodingMacGreek ;
2454             break ;
2455         case wxFONTENCODING_MACCYRILLIC :
2456             enc = kCFStringEncodingMacCyrillic ;
2457             break ;
2458         case wxFONTENCODING_MACDEVANAGARI :
2459             enc = kCFStringEncodingMacDevanagari ;
2460             break ;
2461         case wxFONTENCODING_MACGURMUKHI :
2462             enc = kCFStringEncodingMacGurmukhi ;
2463             break ;
2464         case wxFONTENCODING_MACGUJARATI :
2465             enc = kCFStringEncodingMacGujarati ;
2466             break ;
2467         case wxFONTENCODING_MACORIYA :
2468             enc = kCFStringEncodingMacOriya ;
2469             break ;
2470         case wxFONTENCODING_MACBENGALI :
2471             enc = kCFStringEncodingMacBengali ;
2472             break ;
2473         case wxFONTENCODING_MACTAMIL :
2474             enc = kCFStringEncodingMacTamil ;
2475             break ;
2476         case wxFONTENCODING_MACTELUGU :
2477             enc = kCFStringEncodingMacTelugu ;
2478             break ;
2479         case wxFONTENCODING_MACKANNADA :
2480             enc = kCFStringEncodingMacKannada ;
2481             break ;
2482         case wxFONTENCODING_MACMALAJALAM :
2483             enc = kCFStringEncodingMacMalayalam ;
2484             break ;
2485         case wxFONTENCODING_MACSINHALESE :
2486             enc = kCFStringEncodingMacSinhalese ;
2487             break ;
2488         case wxFONTENCODING_MACBURMESE :
2489             enc = kCFStringEncodingMacBurmese ;
2490             break ;
2491         case wxFONTENCODING_MACKHMER :
2492             enc = kCFStringEncodingMacKhmer ;
2493             break ;
2494         case wxFONTENCODING_MACTHAI :
2495             enc = kCFStringEncodingMacThai ;
2496             break ;
2497         case wxFONTENCODING_MACLAOTIAN :
2498             enc = kCFStringEncodingMacLaotian ;
2499             break ;
2500         case wxFONTENCODING_MACGEORGIAN :
2501             enc = kCFStringEncodingMacGeorgian ;
2502             break ;
2503         case wxFONTENCODING_MACARMENIAN :
2504             enc = kCFStringEncodingMacArmenian ;
2505             break ;
2506         case wxFONTENCODING_MACCHINESESIMP :
2507             enc = kCFStringEncodingMacChineseSimp ;
2508             break ;
2509         case wxFONTENCODING_MACTIBETAN :
2510             enc = kCFStringEncodingMacTibetan ;
2511             break ;
2512         case wxFONTENCODING_MACMONGOLIAN :
2513             enc = kCFStringEncodingMacMongolian ;
2514             break ;
2515         case wxFONTENCODING_MACETHIOPIC :
2516             enc = kCFStringEncodingMacEthiopic ;
2517             break ;
2518         case wxFONTENCODING_MACCENTRALEUR :
2519             enc = kCFStringEncodingMacCentralEurRoman ;
2520             break ;
2521         case wxFONTENCODING_MACVIATNAMESE :
2522             enc = kCFStringEncodingMacVietnamese ;
2523             break ;
2524         case wxFONTENCODING_MACARABICEXT :
2525             enc = kCFStringEncodingMacExtArabic ;
2526             break ;
2527         case wxFONTENCODING_MACSYMBOL :
2528             enc = kCFStringEncodingMacSymbol ;
2529             break ;
2530         case wxFONTENCODING_MACDINGBATS :
2531             enc = kCFStringEncodingMacDingbats ;
2532             break ;
2533         case wxFONTENCODING_MACTURKISH :
2534             enc = kCFStringEncodingMacTurkish ;
2535             break ;
2536         case wxFONTENCODING_MACCROATIAN :
2537             enc = kCFStringEncodingMacCroatian ;
2538             break ;
2539         case wxFONTENCODING_MACICELANDIC :
2540             enc = kCFStringEncodingMacIcelandic ;
2541             break ;
2542         case wxFONTENCODING_MACROMANIAN :
2543             enc = kCFStringEncodingMacRomanian ;
2544             break ;
2545         case wxFONTENCODING_MACCELTIC :
2546             enc = kCFStringEncodingMacCeltic ;
2547             break ;
2548         case wxFONTENCODING_MACGAELIC :
2549             enc = kCFStringEncodingMacGaelic ;
2550             break ;
2551 //      case wxFONTENCODING_MACKEYBOARD :
2552 //          enc = kCFStringEncodingMacKeyboardGlyphs ;
2553 //          break ;
2554
2555         default :
2556             // because gcc is picky
2557             break ;
2558     }
2559
2560     return enc ;
2561 }
2562
2563 class wxMBConv_cocoa : public wxMBConv
2564 {
2565 public:
2566     wxMBConv_cocoa()
2567     {
2568         Init(CFStringGetSystemEncoding()) ;
2569     }
2570
2571     wxMBConv_cocoa(const wxMBConv_cocoa& conv)
2572     {
2573         m_encoding = conv.m_encoding;
2574     }
2575
2576 #if wxUSE_FONTMAP
2577     wxMBConv_cocoa(const wxChar* name)
2578     {
2579         Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2580     }
2581 #endif
2582
2583     wxMBConv_cocoa(wxFontEncoding encoding)
2584     {
2585         Init( wxCFStringEncFromFontEnc(encoding) );
2586     }
2587
2588     ~wxMBConv_cocoa()
2589     {
2590     }
2591
2592     void Init( CFStringEncoding encoding)
2593     {
2594         m_encoding = encoding ;
2595     }
2596
2597     size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2598     {
2599         wxASSERT(szUnConv);
2600
2601         CFStringRef theString = CFStringCreateWithBytes (
2602                                                 NULL, //the allocator
2603                                                 (const UInt8*)szUnConv,
2604                                                 strlen(szUnConv),
2605                                                 m_encoding,
2606                                                 false //no BOM/external representation
2607                                                 );
2608
2609         wxASSERT(theString);
2610
2611         size_t nOutLength = CFStringGetLength(theString);
2612
2613         if (szOut == NULL)
2614         {
2615             CFRelease(theString);
2616             return nOutLength;
2617         }
2618
2619         CFRange theRange = { 0, nOutSize };
2620
2621 #if SIZEOF_WCHAR_T == 4
2622         UniChar* szUniCharBuffer = new UniChar[nOutSize];
2623 #endif
2624
2625         CFStringGetCharacters(theString, theRange, szUniCharBuffer);
2626
2627         CFRelease(theString);
2628
2629         szUniCharBuffer[nOutLength] = '\0';
2630
2631 #if SIZEOF_WCHAR_T == 4
2632         wxMBConvUTF16 converter;
2633         converter.MB2WC( szOut, (const char*)szUniCharBuffer, nOutSize );
2634         delete [] szUniCharBuffer;
2635 #endif
2636
2637         return nOutLength;
2638     }
2639
2640     size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2641     {
2642         wxASSERT(szUnConv);
2643
2644         size_t nRealOutSize;
2645         size_t nBufSize = wxWcslen(szUnConv);
2646         UniChar* szUniBuffer = (UniChar*) szUnConv;
2647
2648 #if SIZEOF_WCHAR_T == 4
2649         wxMBConvUTF16 converter ;
2650         nBufSize = converter.WC2MB( NULL, szUnConv, 0 );
2651         szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1];
2652         converter.WC2MB( (char*) szUniBuffer, szUnConv, nBufSize + sizeof(UniChar));
2653         nBufSize /= sizeof(UniChar);
2654 #endif
2655
2656         CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2657                                 NULL, //allocator
2658                                 szUniBuffer,
2659                                 nBufSize,
2660                                 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
2661                             );
2662
2663         wxASSERT(theString);
2664
2665         //Note that CER puts a BOM when converting to unicode
2666         //so we  check and use getchars instead in that case
2667         if (m_encoding == kCFStringEncodingUnicode)
2668         {
2669             if (szOut != NULL)
2670                 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
2671
2672             nRealOutSize = CFStringGetLength(theString) + 1;
2673         }
2674         else
2675         {
2676             CFStringGetBytes(
2677                 theString,
2678                 CFRangeMake(0, CFStringGetLength(theString)),
2679                 m_encoding,
2680                 0, //what to put in characters that can't be converted -
2681                     //0 tells CFString to return NULL if it meets such a character
2682                 false, //not an external representation
2683                 (UInt8*) szOut,
2684                 nOutSize,
2685                 (CFIndex*) &nRealOutSize
2686                         );
2687         }
2688
2689         CFRelease(theString);
2690
2691 #if SIZEOF_WCHAR_T == 4
2692         delete[] szUniBuffer;
2693 #endif
2694
2695         return  nRealOutSize - 1;
2696     }
2697
2698     virtual wxMBConv *Clone() const { return new wxMBConv_cocoa(*this); }
2699
2700     bool IsOk() const
2701     {
2702         return m_encoding != kCFStringEncodingInvalidId &&
2703               CFStringIsEncodingAvailable(m_encoding);
2704     }
2705
2706 private:
2707     CFStringEncoding m_encoding ;
2708 };
2709
2710 #endif // defined(__WXCOCOA__)
2711
2712 // ============================================================================
2713 // Mac conversion classes
2714 // ============================================================================
2715
2716 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2717
2718 class wxMBConv_mac : public wxMBConv
2719 {
2720 public:
2721     wxMBConv_mac()
2722     {
2723         Init(CFStringGetSystemEncoding()) ;
2724     }
2725
2726     wxMBConv_mac(const wxMBConv_mac& conv)
2727     {
2728         Init(conv.m_char_encoding);
2729     }
2730
2731 #if wxUSE_FONTMAP
2732     wxMBConv_mac(const wxChar* name)
2733     {
2734         Init( wxMacGetSystemEncFromFontEnc( wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) );
2735     }
2736 #endif
2737
2738     wxMBConv_mac(wxFontEncoding encoding)
2739     {
2740         Init( wxMacGetSystemEncFromFontEnc(encoding) );
2741     }
2742
2743     ~wxMBConv_mac()
2744     {
2745         OSStatus status = noErr ;
2746         if (m_MB2WC_converter)
2747             status = TECDisposeConverter(m_MB2WC_converter);
2748         if (m_WC2MB_converter)
2749             status = TECDisposeConverter(m_WC2MB_converter);
2750     }
2751
2752     void Init( TextEncodingBase encoding,TextEncodingVariant encodingVariant = kTextEncodingDefaultVariant ,
2753             TextEncodingFormat encodingFormat = kTextEncodingDefaultFormat)
2754     {
2755         m_MB2WC_converter = NULL ;
2756         m_WC2MB_converter = NULL ;
2757         m_char_encoding = CreateTextEncoding(encoding, encodingVariant, encodingFormat) ;
2758         m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault, 0, kUnicode16BitFormat) ;
2759     }
2760
2761     virtual void CreateIfNeeded() const
2762     {
2763         if ( m_MB2WC_converter == NULL && m_WC2MB_converter == NULL )
2764         {
2765             OSStatus status = noErr ;
2766             status = TECCreateConverter(&m_MB2WC_converter,
2767                                     m_char_encoding,
2768                                     m_unicode_encoding);
2769             wxASSERT_MSG( status == noErr , _("Unable to create TextEncodingConverter")) ;
2770             status = TECCreateConverter(&m_WC2MB_converter,
2771                                     m_unicode_encoding,
2772                                     m_char_encoding);
2773             wxASSERT_MSG( status == noErr , _("Unable to create TextEncodingConverter")) ;
2774         }
2775     }
2776
2777     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2778     {
2779         CreateIfNeeded() ;
2780         OSStatus status = noErr ;
2781         ByteCount byteOutLen ;
2782         ByteCount byteInLen = strlen(psz) + 1;
2783         wchar_t *tbuf = NULL ;
2784         UniChar* ubuf = NULL ;
2785         size_t res = 0 ;
2786
2787         if (buf == NULL)
2788         {
2789             // Apple specs say at least 32
2790             n = wxMax( 32, byteInLen ) ;
2791             tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T ) ;
2792         }
2793
2794         ByteCount byteBufferLen = n * sizeof( UniChar ) ;
2795
2796 #if SIZEOF_WCHAR_T == 4
2797         ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
2798 #else
2799         ubuf = (UniChar*) (buf ? buf : tbuf) ;
2800 #endif
2801
2802         status = TECConvertText(
2803             m_MB2WC_converter, (ConstTextPtr) psz, byteInLen, &byteInLen,
2804             (TextPtr) ubuf, byteBufferLen, &byteOutLen);
2805
2806 #if SIZEOF_WCHAR_T == 4
2807         // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2808         // is not properly terminated we get random characters at the end
2809         ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
2810         wxMBConvUTF16 converter ;
2811         res = converter.MB2WC( (buf ? buf : tbuf), (const char*)ubuf, n ) ;
2812         free( ubuf ) ;
2813 #else
2814         res = byteOutLen / sizeof( UniChar ) ;
2815 #endif
2816
2817         if ( buf == NULL )
2818              free(tbuf) ;
2819
2820         if ( buf  && res < n)
2821             buf[res] = 0;
2822
2823         return res ;
2824     }
2825
2826     size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2827     {
2828         CreateIfNeeded() ;
2829         OSStatus status = noErr ;
2830         ByteCount byteOutLen ;
2831         ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2832
2833         char *tbuf = NULL ;
2834
2835         if (buf == NULL)
2836         {
2837             // Apple specs say at least 32
2838             n = wxMax( 32, ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2839             tbuf = (char*) malloc( n ) ;
2840         }
2841
2842         ByteCount byteBufferLen = n ;
2843         UniChar* ubuf = NULL ;
2844
2845 #if SIZEOF_WCHAR_T == 4
2846         wxMBConvUTF16 converter ;
2847         size_t unicharlen = converter.WC2MB( NULL, psz, 0 ) ;
2848         byteInLen = unicharlen ;
2849         ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2850         converter.WC2MB( (char*) ubuf, psz, unicharlen + 2 ) ;
2851 #else
2852         ubuf = (UniChar*) psz ;
2853 #endif
2854
2855         status = TECConvertText(
2856             m_WC2MB_converter, (ConstTextPtr) ubuf, byteInLen, &byteInLen,
2857             (TextPtr) (buf ? buf : tbuf), byteBufferLen, &byteOutLen);
2858
2859 #if SIZEOF_WCHAR_T == 4
2860         free( ubuf ) ;
2861 #endif
2862
2863         if ( buf == NULL )
2864             free(tbuf) ;
2865
2866         size_t res = byteOutLen ;
2867         if ( buf  && res < n)
2868         {
2869             buf[res] = 0;
2870
2871             //we need to double-trip to verify it didn't insert any ? in place
2872             //of bogus characters
2873             wxWCharBuffer wcBuf(n);
2874             size_t pszlen = wxWcslen(psz);
2875             if ( MB2WC(wcBuf.data(), buf, n) == wxCONV_FAILED ||
2876                         wxWcslen(wcBuf) != pszlen ||
2877                         memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2878             {
2879                 // we didn't obtain the same thing we started from, hence
2880                 // the conversion was lossy and we consider that it failed
2881                 return wxCONV_FAILED;
2882             }
2883         }
2884
2885         return res ;
2886     }
2887
2888     virtual wxMBConv *Clone() const { return new wxMBConv_mac(*this); }
2889
2890     bool IsOk() const
2891     {
2892         CreateIfNeeded() ;
2893         return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL;
2894     }
2895
2896 protected :
2897     mutable TECObjectRef m_MB2WC_converter;
2898     mutable TECObjectRef m_WC2MB_converter;
2899
2900     TextEncodingBase m_char_encoding;
2901     TextEncodingBase m_unicode_encoding;
2902 };
2903
2904 // MB is decomposed (D) normalized UTF8
2905
2906 class wxMBConv_macUTF8D : public wxMBConv_mac
2907 {
2908 public :
2909     wxMBConv_macUTF8D()
2910     {
2911         Init( kTextEncodingUnicodeDefault , kUnicodeNoSubset , kUnicodeUTF8Format ) ;
2912         m_uni = NULL;
2913         m_uniBack = NULL ;
2914     }
2915
2916     ~wxMBConv_macUTF8D()
2917     {
2918         if (m_uni!=NULL)
2919             DisposeUnicodeToTextInfo(&m_uni);
2920         if (m_uniBack!=NULL)
2921             DisposeUnicodeToTextInfo(&m_uniBack);
2922     }
2923
2924     size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2925     {
2926         CreateIfNeeded() ;
2927         OSStatus status = noErr ;
2928         ByteCount byteOutLen ;
2929         ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2930
2931         char *tbuf = NULL ;
2932
2933         if (buf == NULL)
2934         {
2935             // Apple specs say at least 32
2936             n = wxMax( 32, ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2937             tbuf = (char*) malloc( n ) ;
2938         }
2939
2940         ByteCount byteBufferLen = n ;
2941         UniChar* ubuf = NULL ;
2942
2943 #if SIZEOF_WCHAR_T == 4
2944         wxMBConvUTF16 converter ;
2945         size_t unicharlen = converter.WC2MB( NULL, psz, 0 ) ;
2946         byteInLen = unicharlen ;
2947         ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2948         converter.WC2MB( (char*) ubuf, psz, unicharlen + 2 ) ;
2949 #else
2950         ubuf = (UniChar*) psz ;
2951 #endif
2952
2953         // ubuf is a non-decomposed UniChar buffer
2954
2955         ByteCount dcubuflen = byteInLen * 2 + 2 ;
2956         ByteCount dcubufread , dcubufwritten ;
2957         UniChar *dcubuf = (UniChar*) malloc( dcubuflen ) ;
2958
2959         ConvertFromUnicodeToText( m_uni , byteInLen , ubuf ,
2960             kUnicodeDefaultDirectionMask, 0, NULL, NULL, NULL, dcubuflen  , &dcubufread , &dcubufwritten , dcubuf ) ;
2961
2962         // we now convert that decomposed buffer into UTF8
2963
2964         status = TECConvertText(
2965             m_WC2MB_converter, (ConstTextPtr) dcubuf, dcubufwritten, &dcubufread,
2966             (TextPtr) (buf ? buf : tbuf), byteBufferLen, &byteOutLen);
2967
2968         free( dcubuf );
2969
2970 #if SIZEOF_WCHAR_T == 4
2971         free( ubuf ) ;
2972 #endif
2973
2974         if ( buf == NULL )
2975             free(tbuf) ;
2976
2977         size_t res = byteOutLen ;
2978         if ( buf  && res < n)
2979         {
2980             buf[res] = 0;
2981             // don't test for round-trip fidelity yet, we cannot guarantee it yet
2982         }
2983
2984         return res ;
2985     }
2986
2987     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2988     {
2989         CreateIfNeeded() ;
2990         OSStatus status = noErr ;
2991         ByteCount byteOutLen ;
2992         ByteCount byteInLen = strlen(psz) + 1;
2993         wchar_t *tbuf = NULL ;
2994         UniChar* ubuf = NULL ;
2995         size_t res = 0 ;
2996
2997         if (buf == NULL)
2998         {
2999             // Apple specs say at least 32
3000             n = wxMax( 32, byteInLen ) ;
3001             tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T ) ;
3002         }
3003
3004         ByteCount byteBufferLen = n * sizeof( UniChar ) ;
3005
3006 #if SIZEOF_WCHAR_T == 4
3007         ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
3008 #else
3009         ubuf = (UniChar*) (buf ? buf : tbuf) ;
3010 #endif
3011
3012         ByteCount dcubuflen = byteBufferLen * 2 + 2 ;
3013         ByteCount dcubufread , dcubufwritten ;
3014         UniChar *dcubuf = (UniChar*) malloc( dcubuflen ) ;
3015
3016         status = TECConvertText(
3017                                 m_MB2WC_converter, (ConstTextPtr) psz, byteInLen, &byteInLen,
3018                                 (TextPtr) dcubuf, dcubuflen, &byteOutLen);
3019         // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
3020         // is not properly terminated we get random characters at the end
3021         dcubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
3022
3023         // now from the decomposed UniChar to properly composed uniChar
3024         ConvertFromUnicodeToText( m_uniBack , byteOutLen , dcubuf ,
3025                                   kUnicodeDefaultDirectionMask, 0, NULL, NULL, NULL, dcubuflen  , &dcubufread , &dcubufwritten , ubuf ) ;
3026
3027         free( dcubuf );
3028         byteOutLen = dcubufwritten ;
3029         ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
3030
3031
3032 #if SIZEOF_WCHAR_T == 4
3033         wxMBConvUTF16 converter ;
3034         res = converter.MB2WC( (buf ? buf : tbuf), (const char*)ubuf, n ) ;
3035         free( ubuf ) ;
3036 #else
3037         res = byteOutLen / sizeof( UniChar ) ;
3038 #endif
3039
3040         if ( buf == NULL )
3041             free(tbuf) ;
3042
3043         if ( buf  && res < n)
3044             buf[res] = 0;
3045
3046         return res ;
3047     }
3048
3049     virtual void CreateIfNeeded() const
3050     {
3051         wxMBConv_mac::CreateIfNeeded() ;
3052         if ( m_uni == NULL )
3053         {
3054             m_map.unicodeEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
3055                 kUnicodeNoSubset, kTextEncodingDefaultFormat);
3056             m_map.otherEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
3057                 kUnicodeCanonicalDecompVariant, kTextEncodingDefaultFormat);
3058             m_map.mappingVersion = kUnicodeUseLatestMapping;
3059
3060             OSStatus err = CreateUnicodeToTextInfo(&m_map, &m_uni);
3061             wxASSERT_MSG( err == noErr , _(" Couldn't create the UnicodeConverter")) ;
3062
3063             m_map.unicodeEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
3064                                                        kUnicodeNoSubset, kTextEncodingDefaultFormat);
3065             m_map.otherEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
3066                                                      kUnicodeCanonicalCompVariant, kTextEncodingDefaultFormat);
3067             m_map.mappingVersion = kUnicodeUseLatestMapping;
3068             err = CreateUnicodeToTextInfo(&m_map, &m_uniBack);
3069             wxASSERT_MSG( err == noErr , _(" Couldn't create the UnicodeConverter")) ;
3070         }
3071     }
3072 protected :
3073     mutable UnicodeToTextInfo   m_uni;
3074     mutable UnicodeToTextInfo   m_uniBack;
3075     mutable UnicodeMapping      m_map;
3076 };
3077 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
3078
3079 // ============================================================================
3080 // wxEncodingConverter based conversion classes
3081 // ============================================================================
3082
3083 #if wxUSE_FONTMAP
3084
3085 class wxMBConv_wxwin : public wxMBConv
3086 {
3087 private:
3088     void Init()
3089     {
3090         m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
3091                w2m.Init(wxFONTENCODING_UNICODE, m_enc);
3092     }
3093
3094 public:
3095     // temporarily just use wxEncodingConverter stuff,
3096     // so that it works while a better implementation is built
3097     wxMBConv_wxwin(const wxChar* name)
3098     {
3099         if (name)
3100             m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
3101         else
3102             m_enc = wxFONTENCODING_SYSTEM;
3103
3104         Init();
3105     }
3106
3107     wxMBConv_wxwin(wxFontEncoding enc)
3108     {
3109         m_enc = enc;
3110
3111         Init();
3112     }
3113
3114     size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
3115     {
3116         size_t inbuf = strlen(psz);
3117         if (buf)
3118         {
3119             if (!m2w.Convert(psz, buf))
3120                 return wxCONV_FAILED;
3121         }
3122         return inbuf;
3123     }
3124
3125     size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
3126     {
3127         const size_t inbuf = wxWcslen(psz);
3128         if (buf)
3129         {
3130             if (!w2m.Convert(psz, buf))
3131                 return wxCONV_FAILED;
3132         }
3133
3134         return inbuf;
3135     }
3136
3137     virtual size_t GetMBNulLen() const
3138     {
3139         switch ( m_enc )
3140         {
3141             case wxFONTENCODING_UTF16BE:
3142             case wxFONTENCODING_UTF16LE:
3143                 return 2;
3144
3145             case wxFONTENCODING_UTF32BE:
3146             case wxFONTENCODING_UTF32LE:
3147                 return 4;
3148
3149             default:
3150                 return 1;
3151         }
3152     }
3153
3154     virtual wxMBConv *Clone() const { return new wxMBConv_wxwin(m_enc); }
3155
3156     bool IsOk() const { return m_ok; }
3157
3158 public:
3159     wxFontEncoding m_enc;
3160     wxEncodingConverter m2w, w2m;
3161
3162 private:
3163     // were we initialized successfully?
3164     bool m_ok;
3165
3166     DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
3167 };
3168
3169 // make the constructors available for unit testing
3170 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_wxwin( const wxChar* name )
3171 {
3172     wxMBConv_wxwin* result = new wxMBConv_wxwin( name );
3173     if ( !result->IsOk() )
3174     {
3175         delete result;
3176         return 0;
3177     }
3178
3179     return result;
3180 }
3181
3182 #endif // wxUSE_FONTMAP
3183
3184 // ============================================================================
3185 // wxCSConv implementation
3186 // ============================================================================
3187
3188 void wxCSConv::Init()
3189 {
3190     m_name = NULL;
3191     m_convReal =  NULL;
3192     m_deferred = true;
3193 }
3194
3195 wxCSConv::wxCSConv(const wxChar *charset)
3196 {
3197     Init();
3198
3199     if ( charset )
3200     {
3201         SetName(charset);
3202     }
3203
3204 #if wxUSE_FONTMAP
3205     m_encoding = wxFontMapperBase::GetEncodingFromName(charset);
3206 #else
3207     m_encoding = wxFONTENCODING_SYSTEM;
3208 #endif
3209 }
3210
3211 wxCSConv::wxCSConv(wxFontEncoding encoding)
3212 {
3213     if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
3214     {
3215         wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
3216
3217         encoding = wxFONTENCODING_SYSTEM;
3218     }
3219
3220     Init();
3221
3222     m_encoding = encoding;
3223 }
3224
3225 wxCSConv::~wxCSConv()
3226 {
3227     Clear();
3228 }
3229
3230 wxCSConv::wxCSConv(const wxCSConv& conv)
3231         : wxMBConv()
3232 {
3233     Init();
3234
3235     SetName(conv.m_name);
3236     m_encoding = conv.m_encoding;
3237 }
3238
3239 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
3240 {
3241     Clear();
3242
3243     SetName(conv.m_name);
3244     m_encoding = conv.m_encoding;
3245
3246     return *this;
3247 }
3248
3249 void wxCSConv::Clear()
3250 {
3251     free(m_name);
3252     delete m_convReal;
3253
3254     m_name = NULL;
3255     m_convReal = NULL;
3256 }
3257
3258 void wxCSConv::SetName(const wxChar *charset)
3259 {
3260     if (charset)
3261     {
3262         m_name = wxStrdup(charset);
3263         m_deferred = true;
3264     }
3265 }
3266
3267 #if wxUSE_FONTMAP
3268
3269 WX_DECLARE_HASH_MAP( wxFontEncoding, wxString, wxIntegerHash, wxIntegerEqual,
3270                      wxEncodingNameCache );
3271
3272 static wxEncodingNameCache gs_nameCache;
3273 #endif
3274
3275 wxMBConv *wxCSConv::DoCreate() const
3276 {
3277 #if wxUSE_FONTMAP
3278     wxLogTrace(TRACE_STRCONV,
3279                wxT("creating conversion for %s"),
3280                (m_name ? m_name
3281                        : wxFontMapperBase::GetEncodingName(m_encoding).c_str()));
3282 #endif // wxUSE_FONTMAP
3283
3284     // check for the special case of ASCII or ISO8859-1 charset: as we have
3285     // special knowledge of it anyhow, we don't need to create a special
3286     // conversion object
3287     if ( m_encoding == wxFONTENCODING_ISO8859_1 ||
3288             m_encoding == wxFONTENCODING_DEFAULT )
3289     {
3290         // don't convert at all
3291         return NULL;
3292     }
3293
3294     // we trust OS to do conversion better than we can so try external
3295     // conversion methods first
3296     //
3297     // the full order is:
3298     //      1. OS conversion (iconv() under Unix or Win32 API)
3299     //      2. hard coded conversions for UTF
3300     //      3. wxEncodingConverter as fall back
3301
3302     // step (1)
3303 #ifdef HAVE_ICONV
3304 #if !wxUSE_FONTMAP
3305     if ( m_name )
3306 #endif // !wxUSE_FONTMAP
3307     {
3308         wxString name(m_name);
3309 #if wxUSE_FONTMAP
3310         wxFontEncoding encoding(m_encoding);
3311 #endif
3312
3313         if ( !name.empty() )
3314         {
3315             wxMBConv_iconv *conv = new wxMBConv_iconv(name);
3316             if ( conv->IsOk() )
3317                 return conv;
3318
3319             delete conv;
3320
3321 #if wxUSE_FONTMAP
3322             encoding =
3323                 wxFontMapperBase::Get()->CharsetToEncoding(name, false);
3324 #endif // wxUSE_FONTMAP
3325         }
3326 #if wxUSE_FONTMAP
3327         {
3328             const wxEncodingNameCache::iterator it = gs_nameCache.find(encoding);
3329             if ( it != gs_nameCache.end() )
3330             {
3331                 if ( it->second.empty() )
3332                     return NULL;
3333
3334                 wxMBConv_iconv *conv = new wxMBConv_iconv(it->second);
3335                 if ( conv->IsOk() )
3336                     return conv;
3337
3338                 delete conv;
3339             }
3340
3341             const wxChar** names = wxFontMapperBase::GetAllEncodingNames(encoding);
3342             // CS : in case this does not return valid names (eg for MacRoman) encoding
3343             // got a 'failure' entry in the cache all the same, although it just has to
3344             // be created using a different method, so only store failed iconv creation
3345             // attempts (or perhaps we shoulnd't do this at all ?)
3346             if ( names[0] != NULL )
3347             {
3348                 for ( ; *names; ++names )
3349                 {
3350                     wxMBConv_iconv *conv = new wxMBConv_iconv(*names);
3351                     if ( conv->IsOk() )
3352                     {
3353                         gs_nameCache[encoding] = *names;
3354                         return conv;
3355                     }
3356
3357                     delete conv;
3358                 }
3359
3360                 gs_nameCache[encoding] = _T(""); // cache the failure
3361             }
3362         }
3363 #endif // wxUSE_FONTMAP
3364     }
3365 #endif // HAVE_ICONV
3366
3367 #ifdef wxHAVE_WIN32_MB2WC
3368     {
3369 #if wxUSE_FONTMAP
3370         wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
3371                                       : new wxMBConv_win32(m_encoding);
3372         if ( conv->IsOk() )
3373             return conv;
3374
3375         delete conv;
3376 #else
3377         return NULL;
3378 #endif
3379     }
3380 #endif // wxHAVE_WIN32_MB2WC
3381
3382 #if defined(__WXMAC__)
3383     {
3384         // leave UTF16 and UTF32 to the built-ins of wx
3385         if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
3386             ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
3387         {
3388 #if wxUSE_FONTMAP
3389             wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
3390                                         : new wxMBConv_mac(m_encoding);
3391 #else
3392             wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
3393 #endif
3394             if ( conv->IsOk() )
3395                  return conv;
3396
3397             delete conv;
3398         }
3399     }
3400 #endif
3401
3402 #if defined(__WXCOCOA__)
3403     {
3404         if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
3405         {
3406 #if wxUSE_FONTMAP
3407             wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
3408                                           : new wxMBConv_cocoa(m_encoding);
3409 #else
3410             wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
3411 #endif
3412
3413             if ( conv->IsOk() )
3414                  return conv;
3415
3416             delete conv;
3417         }
3418     }
3419 #endif
3420     // step (2)
3421     wxFontEncoding enc = m_encoding;
3422 #if wxUSE_FONTMAP
3423     if ( enc == wxFONTENCODING_SYSTEM && m_name )
3424     {
3425         // use "false" to suppress interactive dialogs -- we can be called from
3426         // anywhere and popping up a dialog from here is the last thing we want to
3427         // do
3428         enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
3429     }
3430 #endif // wxUSE_FONTMAP
3431
3432     switch ( enc )
3433     {
3434         case wxFONTENCODING_UTF7:
3435              return new wxMBConvUTF7;
3436
3437         case wxFONTENCODING_UTF8:
3438              return new wxMBConvUTF8;
3439
3440         case wxFONTENCODING_UTF16BE:
3441              return new wxMBConvUTF16BE;
3442
3443         case wxFONTENCODING_UTF16LE:
3444              return new wxMBConvUTF16LE;
3445
3446         case wxFONTENCODING_UTF32BE:
3447              return new wxMBConvUTF32BE;
3448
3449         case wxFONTENCODING_UTF32LE:
3450              return new wxMBConvUTF32LE;
3451
3452         default:
3453              // nothing to do but put here to suppress gcc warnings
3454              break;
3455     }
3456
3457     // step (3)
3458 #if wxUSE_FONTMAP
3459     {
3460         wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
3461                                       : new wxMBConv_wxwin(m_encoding);
3462         if ( conv->IsOk() )
3463             return conv;
3464
3465         delete conv;
3466     }
3467 #endif // wxUSE_FONTMAP
3468
3469     // NB: This is a hack to prevent deadlock. What could otherwise happen
3470     //     in Unicode build: wxConvLocal creation ends up being here
3471     //     because of some failure and logs the error. But wxLog will try to
3472     //     attach a timestamp, for which it will need wxConvLocal (to convert
3473     //     time to char* and then wchar_t*), but that fails, tries to log the
3474     //     error, but wxLog has an (already locked) critical section that
3475     //     guards the static buffer.
3476     static bool alreadyLoggingError = false;
3477     if (!alreadyLoggingError)
3478     {
3479         alreadyLoggingError = true;
3480         wxLogError(_("Cannot convert from the charset '%s'!"),
3481                    m_name ? m_name
3482                       :
3483 #if wxUSE_FONTMAP
3484                          wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
3485 #else // !wxUSE_FONTMAP
3486                          wxString::Format(_("encoding %i"), m_encoding).c_str()
3487 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
3488               );
3489
3490         alreadyLoggingError = false;
3491     }
3492
3493     return NULL;
3494 }
3495
3496 void wxCSConv::CreateConvIfNeeded() const
3497 {
3498     if ( m_deferred )
3499     {
3500         wxCSConv *self = (wxCSConv *)this; // const_cast
3501
3502 #if wxUSE_INTL
3503         // if we don't have neither the name nor the encoding, use the default
3504         // encoding for this system
3505         if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
3506         {
3507             self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
3508         }
3509 #endif // wxUSE_INTL
3510
3511         self->m_convReal = DoCreate();
3512         self->m_deferred = false;
3513     }
3514 }
3515
3516 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
3517 {
3518     CreateConvIfNeeded();
3519
3520     if (m_convReal)
3521         return m_convReal->MB2WC(buf, psz, n);
3522
3523     // latin-1 (direct)
3524     size_t len = strlen(psz);
3525
3526     if (buf)
3527     {
3528         for (size_t c = 0; c <= len; c++)
3529             buf[c] = (unsigned char)(psz[c]);
3530     }
3531
3532     return len;
3533 }
3534
3535 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
3536 {
3537     CreateConvIfNeeded();
3538
3539     if (m_convReal)
3540         return m_convReal->WC2MB(buf, psz, n);
3541
3542     // latin-1 (direct)
3543     const size_t len = wxWcslen(psz);
3544     if (buf)
3545     {
3546         for (size_t c = 0; c <= len; c++)
3547         {
3548             if (psz[c] > 0xFF)
3549                 return wxCONV_FAILED;
3550
3551             buf[c] = (char)psz[c];
3552         }
3553     }
3554     else
3555     {
3556         for (size_t c = 0; c <= len; c++)
3557         {
3558             if (psz[c] > 0xFF)
3559                 return wxCONV_FAILED;
3560         }
3561     }
3562
3563     return len;
3564 }
3565
3566 size_t wxCSConv::GetMBNulLen() const
3567 {
3568     CreateConvIfNeeded();
3569
3570     if ( m_convReal )
3571     {
3572         return m_convReal->GetMBNulLen();
3573     }
3574
3575     return 1;
3576 }
3577
3578 // ----------------------------------------------------------------------------
3579 // globals
3580 // ----------------------------------------------------------------------------
3581
3582 #ifdef __WINDOWS__
3583     static wxMBConv_win32 wxConvLibcObj;
3584 #elif defined(__WXMAC__) && !defined(__MACH__)
3585     static wxMBConv_mac wxConvLibcObj ;
3586 #else
3587     static wxMBConvLibc wxConvLibcObj;
3588 #endif
3589
3590 static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
3591 static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
3592 static wxMBConvUTF7 wxConvUTF7Obj;
3593 static wxMBConvUTF8 wxConvUTF8Obj;
3594 #if defined(__WXMAC__) && defined(TARGET_CARBON)
3595 static wxMBConv_macUTF8D wxConvMacUTF8DObj;
3596 #endif
3597 WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
3598 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
3599 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
3600 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
3601 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
3602 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
3603 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvUI = &wxConvLocal;
3604 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
3605 #ifdef __WXOSX__
3606 #if defined(__WXMAC__) && defined(TARGET_CARBON)
3607                                     wxConvMacUTF8DObj;
3608 #else
3609                                     wxConvUTF8Obj;
3610 #endif
3611 #else
3612                                     wxConvLibcObj;
3613 #endif
3614
3615 #else // !wxUSE_WCHAR_T
3616
3617 // stand-ins in absence of wchar_t
3618 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
3619                                 wxConvISO8859_1,
3620                                 wxConvLocal,
3621                                 wxConvUTF8;
3622
3623 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T