src/common/strconv.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/common/strconv.cpp
   3 // Purpose:     Unicode conversion classes
   4 // Author:      Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
   5 //              Ryan Norton, Fredrik Roubert (UTF7)
   6 // Modified by:
   7 // Created:     29/01/98
   8 // RCS-ID:      $Id$
   9 // Copyright:   (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
  10 //              (c) 2000-2003 Vadim Zeitlin
  11 //              (c) 2004 Ryan Norton, Fredrik Roubert
  12 // Licence:     wxWindows licence
  13 /////////////////////////////////////////////////////////////////////////////
  14
  15 // For compilers that support precompilation, includes "wx.h".
  16 #include "wx/wxprec.h"
  17
  18 #ifdef __BORLANDC__
  19     #pragma hdrstop
  20 #endif  //__BORLANDC__
  21
  22 #ifndef WX_PRECOMP
  23     #include "wx/intl.h"
  24     #include "wx/log.h"
  25     #include "wx/utils.h"
  26     #include "wx/hashmap.h"
  27 #endif
  28
  29 #include "wx/strconv.h"
  30
  31 #if wxUSE_WCHAR_T
  32
  33 #ifndef __WXWINCE__
  34 #include <errno.h>
  35 #endif
  36
  37 #include <ctype.h>
  38 #include <string.h>
  39 #include <stdlib.h>
  40
  41 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
  42     #include "wx/msw/private.h"
  43     #include "wx/msw/missing.h"
  44     #define wxHAVE_WIN32_MB2WC
  45 #endif
  46
  47 #ifdef __SALFORDC__
  48     #include <clib.h>
  49 #endif
  50
  51 #ifdef HAVE_ICONV
  52     #include <iconv.h>
  53     #include "wx/thread.h"
  54 #endif
  55
  56 #include "wx/encconv.h"
  57 #include "wx/fontmap.h"
  58
  59 #ifdef __DARWIN__
  60 #include <CoreFoundation/CFString.h>
  61 #include <CoreFoundation/CFStringEncodingExt.h>
  62 #endif //def __DARWIN__
  63
  64 #ifdef __WXMAC__
  65 #ifndef __DARWIN__
  66 #include <ATSUnicode.h>
  67 #include <TextCommon.h>
  68 #include <TextEncodingConverter.h>
  69 #endif
  70
  71 // includes Mac headers
  72 #include "wx/mac/private.h"
  73 #endif
  74
  75
  76 #define TRACE_STRCONV _T("strconv")
  77
  78 // WC_UTF16 is defined only if sizeof(wchar_t) == 2, otherwise it's supposed to
  79 // be 4 bytes
  80 #if SIZEOF_WCHAR_T == 2
  81     #define WC_UTF16
  82 #endif
  83
  84
  85 // ============================================================================
  86 // implementation
  87 // ============================================================================
  88
  89 // helper function of cMB2WC(): check if n bytes at this location are all NUL
  90 static bool NotAllNULs(const char *p, size_t n)
  91 {
  92     while ( n && *p++ == '\0' )
  93         n--;
  94
  95     return n != 0;
  96 }
  97
  98 // ----------------------------------------------------------------------------
  99 // UTF-16 en/decoding to/from UCS-4 with surrogates handling
 100 // ----------------------------------------------------------------------------
 101
 102 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
 103 {
 104     if (input <= 0xffff)
 105     {
 106         if (output)
 107             *output = (wxUint16) input;
 108
 109         return 1;
 110     }
 111     else if (input >= 0x110000)
 112     {
 113         return wxCONV_FAILED;
 114     }
 115     else
 116     {
 117         if (output)
 118         {
 119             *output++ = (wxUint16) ((input >> 10) + 0xd7c0);
 120             *output = (wxUint16) ((input & 0x3ff) + 0xdc00);
 121         }
 122
 123         return 2;
 124     }
 125 }
 126
 127 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
 128 {
 129     if ((*input < 0xd800) || (*input > 0xdfff))
 130     {
 131         output = *input;
 132         return 1;
 133     }
 134     else if ((input[1] < 0xdc00) || (input[1] > 0xdfff))
 135     {
 136         output = *input;
 137         return wxCONV_FAILED;
 138     }
 139     else
 140     {
 141         output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
 142         return 2;
 143     }
 144 }
 145
 146 #ifdef WC_UTF16
 147     typedef wchar_t wxDecodeSurrogate_t;
 148 #else // !WC_UTF16
 149     typedef wxUint16 wxDecodeSurrogate_t;
 150 #endif // WC_UTF16/!WC_UTF16
 151
 152 // returns the next UTF-32 character from the wchar_t buffer and advances the
 153 // pointer to the character after this one
 154 //
 155 // if an invalid character is found, *pSrc is set to NULL, the caller must
 156 // check for this
 157 static wxUint32 wxDecodeSurrogate(const wxDecodeSurrogate_t **pSrc)
 158 {
 159     wxUint32 out;
 160     const size_t
 161         n = decode_utf16(wx_reinterpret_cast(const wxUint16 *, *pSrc), out);
 162     if ( n == wxCONV_FAILED )
 163         *pSrc = NULL;
 164     else
 165         *pSrc += n;
 166
 167     return out;
 168 }
 169
 170 // ----------------------------------------------------------------------------
 171 // wxMBConv
 172 // ----------------------------------------------------------------------------
 173
 174 size_t
 175 wxMBConv::ToWChar(wchar_t *dst, size_t dstLen,
 176                   const char *src, size_t srcLen) const
 177 {
 178     // although new conversion classes are supposed to implement this function
 179     // directly, the existins ones only implement the old MB2WC() and so, to
 180     // avoid to have to rewrite all conversion classes at once, we provide a
 181     // default (but not efficient) implementation of this one in terms of the
 182     // old function by copying the input to ensure that it's NUL-terminated and
 183     // then using MB2WC() to convert it
 184
 185     // the number of chars [which would be] written to dst [if it were not NULL]
 186     size_t dstWritten = 0;
 187
 188     // the number of NULs terminating this string
 189     size_t nulLen = 0;  // not really needed, but just to avoid warnings
 190
 191     // if we were not given the input size we just have to assume that the
 192     // string is properly terminated as we have no way of knowing how long it
 193     // is anyhow, but if we do have the size check whether there are enough
 194     // NULs at the end
 195     wxCharBuffer bufTmp;
 196     const char *srcEnd;
 197     if ( srcLen != wxNO_LEN )
 198     {
 199         // we need to know how to find the end of this string
 200         nulLen = GetMBNulLen();
 201         if ( nulLen == wxCONV_FAILED )
 202             return wxCONV_FAILED;
 203
 204         // if there are enough NULs we can avoid the copy
 205         if ( srcLen < nulLen || NotAllNULs(src + srcLen - nulLen, nulLen) )
 206         {
 207             // make a copy in order to properly NUL-terminate the string
 208             bufTmp = wxCharBuffer(srcLen + nulLen - 1 /* 1 will be added */);
 209             char * const p = bufTmp.data();
 210             memcpy(p, src, srcLen);
 211             for ( char *s = p + srcLen; s < p + srcLen + nulLen; s++ )
 212                 *s = '\0';
 213
 214             src = bufTmp;
 215         }
 216
 217         srcEnd = src + srcLen;
 218     }
 219     else // quit after the first loop iteration
 220     {
 221         srcEnd = NULL;
 222     }
 223
 224     for ( ;; )
 225     {
 226         // try to convert the current chunk
 227         size_t lenChunk = MB2WC(NULL, src, 0);
 228         if ( lenChunk == wxCONV_FAILED )
 229             return wxCONV_FAILED;
 230
 231         lenChunk++; // for the L'\0' at the end of this chunk
 232
 233         dstWritten += lenChunk;
 234
 235         if ( lenChunk == 1 )
 236         {
 237             // nothing left in the input string, conversion succeeded
 238             break;
 239         }
 240
 241         if ( dst )
 242         {
 243             if ( dstWritten > dstLen )
 244                 return wxCONV_FAILED;
 245
 246             if ( MB2WC(dst, src, lenChunk) == wxCONV_FAILED )
 247                 return wxCONV_FAILED;
 248
 249             dst += lenChunk;
 250         }
 251
 252         if ( !srcEnd )
 253         {
 254             // we convert just one chunk in this case as this is the entire
 255             // string anyhow
 256             break;
 257         }
 258
 259         // advance the input pointer past the end of this chunk
 260         while ( NotAllNULs(src, nulLen) )
 261         {
 262             // notice that we must skip over multiple bytes here as we suppose
 263             // that if NUL takes 2 or 4 bytes, then all the other characters do
 264             // too and so if advanced by a single byte we might erroneously
 265             // detect sequences of NUL bytes in the middle of the input
 266             src += nulLen;
 267         }
 268
 269         src += nulLen; // skipping over its terminator as well
 270
 271         // note that ">=" (and not just "==") is needed here as the terminator
 272         // we skipped just above could be inside or just after the buffer
 273         // delimited by inEnd
 274         if ( src >= srcEnd )
 275             break;
 276     }
 277
 278     return dstWritten;
 279 }
 280
 281 size_t
 282 wxMBConv::FromWChar(char *dst, size_t dstLen,
 283                     const wchar_t *src, size_t srcLen) const
 284 {
 285     // the number of chars [which would be] written to dst [if it were not NULL]
 286     size_t dstWritten = 0;
 287
 288     // make a copy of the input string unless it is already properly
 289     // NUL-terminated
 290     //
 291     // if we don't know its length we have no choice but to assume that it is,
 292     // indeed, properly terminated
 293     wxWCharBuffer bufTmp;
 294     if ( srcLen == wxNO_LEN )
 295     {
 296         srcLen = wxWcslen(src) + 1;
 297     }
 298     else if ( srcLen != 0 && src[srcLen - 1] != L'\0' )
 299     {
 300         // make a copy in order to properly NUL-terminate the string
 301         bufTmp = wxWCharBuffer(srcLen);
 302         memcpy(bufTmp.data(), src, srcLen * sizeof(wchar_t));
 303         src = bufTmp;
 304     }
 305
 306     const size_t lenNul = GetMBNulLen();
 307     for ( const wchar_t * const srcEnd = src + srcLen;
 308           src < srcEnd;
 309           src += wxWcslen(src) + 1 /* skip L'\0' too */ )
 310     {
 311         // try to convert the current chunk
 312         size_t lenChunk = WC2MB(NULL, src, 0);
 313
 314         if ( lenChunk == wxCONV_FAILED )
 315             return wxCONV_FAILED;
 316
 317         lenChunk += lenNul;
 318         dstWritten += lenChunk;
 319
 320         if ( dst )
 321         {
 322             if ( dstWritten > dstLen )
 323                 return wxCONV_FAILED;
 324
 325             if ( WC2MB(dst, src, lenChunk) == wxCONV_FAILED )
 326                 return wxCONV_FAILED;
 327
 328             dst += lenChunk;
 329         }
 330     }
 331
 332     return dstWritten;
 333 }
 334
 335 size_t wxMBConv::MB2WC(wchar_t *outBuff, const char *inBuff, size_t outLen) const
 336 {
 337     size_t rc = ToWChar(outBuff, outLen, inBuff);
 338     if ( rc != wxCONV_FAILED )
 339     {
 340         // ToWChar() returns the buffer length, i.e. including the trailing
 341         // NUL, while this method doesn't take it into account
 342         rc--;
 343     }
 344
 345     return rc;
 346 }
 347
 348 size_t wxMBConv::WC2MB(char *outBuff, const wchar_t *inBuff, size_t outLen) const
 349 {
 350     size_t rc = FromWChar(outBuff, outLen, inBuff);
 351     if ( rc != wxCONV_FAILED )
 352     {
 353         rc -= GetMBNulLen();
 354     }
 355
 356     return rc;
 357 }
 358
 359 wxMBConv::~wxMBConv()
 360 {
 361     // nothing to do here (necessary for Darwin linking probably)
 362 }
 363
 364 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
 365 {
 366     if ( psz )
 367     {
 368         // calculate the length of the buffer needed first
 369         const size_t nLen = MB2WC(NULL, psz, 0);
 370         if ( nLen != wxCONV_FAILED )
 371         {
 372             // now do the actual conversion
 373             wxWCharBuffer buf(nLen /* +1 added implicitly */);
 374
 375             // +1 for the trailing NULL
 376             if ( MB2WC(buf.data(), psz, nLen + 1) != wxCONV_FAILED )
 377                 return buf;
 378         }
 379     }
 380
 381     return wxWCharBuffer();
 382 }
 383
 384 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
 385 {
 386     if ( pwz )
 387     {
 388         const size_t nLen = WC2MB(NULL, pwz, 0);
 389         if ( nLen != wxCONV_FAILED )
 390         {
 391             // extra space for trailing NUL(s)
 392             static const size_t extraLen = GetMaxMBNulLen();
 393
 394             wxCharBuffer buf(nLen + extraLen - 1);
 395             if ( WC2MB(buf.data(), pwz, nLen + extraLen) != wxCONV_FAILED )
 396                 return buf;
 397         }
 398     }
 399
 400     return wxCharBuffer();
 401 }
 402
 403 const wxWCharBuffer
 404 wxMBConv::cMB2WC(const char *inBuff, size_t inLen, size_t *outLen) const
 405 {
 406     const size_t dstLen = ToWChar(NULL, 0, inBuff, inLen);
 407     if ( dstLen != wxCONV_FAILED )
 408     {
 409         wxWCharBuffer wbuf(dstLen - 1);
 410         if ( ToWChar(wbuf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED )
 411         {
 412             if ( outLen )
 413             {
 414                 *outLen = dstLen;
 415                 if ( wbuf[dstLen - 1] == L'\0' )
 416                     (*outLen)--;
 417             }
 418
 419             return wbuf;
 420         }
 421     }
 422
 423     if ( outLen )
 424         *outLen = 0;
 425
 426     return wxWCharBuffer();
 427 }
 428
 429 const wxCharBuffer
 430 wxMBConv::cWC2MB(const wchar_t *inBuff, size_t inLen, size_t *outLen) const
 431 {
 432     size_t dstLen = FromWChar(NULL, 0, inBuff, inLen);
 433     if ( dstLen != wxCONV_FAILED )
 434     {
 435         // special case of empty input: can't allocate 0 size buffer below as
 436         // wxCharBuffer insists on NUL-terminating it
 437         wxCharBuffer buf(dstLen ? dstLen - 1 : 1);
 438         if ( FromWChar(buf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED )
 439         {
 440             if ( outLen )
 441             {
 442                 *outLen = dstLen;
 443
 444                 const size_t nulLen = GetMBNulLen();
 445                 if ( dstLen >= nulLen &&
 446                         !NotAllNULs(buf.data() + dstLen - nulLen, nulLen) )
 447                 {
 448                     // in this case the output is NUL-terminated and we're not
 449                     // supposed to count NUL
 450                     *outLen -= nulLen;
 451                 }
 452             }
 453
 454             return buf;
 455         }
 456     }
 457
 458     if ( outLen )
 459         *outLen = 0;
 460
 461     return wxCharBuffer();
 462 }
 463
 464 // ----------------------------------------------------------------------------
 465 // wxMBConvLibc
 466 // ----------------------------------------------------------------------------
 467
 468 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 469 {
 470     return wxMB2WC(buf, psz, n);
 471 }
 472
 473 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 474 {
 475     return wxWC2MB(buf, psz, n);
 476 }
 477
 478 // ----------------------------------------------------------------------------
 479 // wxConvBrokenFileNames
 480 // ----------------------------------------------------------------------------
 481
 482 #ifdef __UNIX__
 483
 484 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxString& charset)
 485 {
 486     if ( wxStricmp(charset, _T("UTF-8")) == 0 ||
 487          wxStricmp(charset, _T("UTF8")) == 0  )
 488         m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_PUA);
 489     else
 490         m_conv = new wxCSConv(charset);
 491 }
 492
 493 #endif // __UNIX__
 494
 495 // ----------------------------------------------------------------------------
 496 // UTF-7
 497 // ----------------------------------------------------------------------------
 498
 499 // Implementation (C) 2004 Fredrik Roubert
 500
 501 //
 502 // BASE64 decoding table
 503 //
 504 static const unsigned char utf7unb64[] =
 505 {
 506     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 507     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 508     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 509     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 510     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 511     0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
 512     0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
 513     0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 514     0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
 515     0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
 516     0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
 517     0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
 518     0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
 519     0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
 520     0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
 521     0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
 522     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 523     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 524     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 525     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 526     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 527     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 528     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 529     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 530     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 531     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 532     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 533     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 534     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 535     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 536     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 537     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 538 };
 539
 540 size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 541 {
 542     size_t len = 0;
 543
 544     while ( *psz && (!buf || (len < n)) )
 545     {
 546         unsigned char cc = *psz++;
 547         if (cc != '+')
 548         {
 549             // plain ASCII char
 550             if (buf)
 551                 *buf++ = cc;
 552             len++;
 553         }
 554         else if (*psz == '-')
 555         {
 556             // encoded plus sign
 557             if (buf)
 558                 *buf++ = cc;
 559             len++;
 560             psz++;
 561         }
 562         else // start of BASE64 encoded string
 563         {
 564             bool lsb, ok;
 565             unsigned int d, l;
 566             for ( ok = lsb = false, d = 0, l = 0;
 567                   (cc = utf7unb64[(unsigned char)*psz]) != 0xff;
 568                   psz++ )
 569             {
 570                 d <<= 6;
 571                 d += cc;
 572                 for (l += 6; l >= 8; lsb = !lsb)
 573                 {
 574                     unsigned char c = (unsigned char)((d >> (l -= 8)) % 256);
 575                     if (lsb)
 576                     {
 577                         if (buf)
 578                             *buf++ |= c;
 579                         len ++;
 580                     }
 581                     else
 582                     {
 583                         if (buf)
 584                             *buf = (wchar_t)(c << 8);
 585                     }
 586
 587                     ok = true;
 588                 }
 589             }
 590
 591             if ( !ok )
 592             {
 593                 // in valid UTF7 we should have valid characters after '+'
 594                 return wxCONV_FAILED;
 595             }
 596
 597             if (*psz == '-')
 598                 psz++;
 599         }
 600     }
 601
 602     if ( buf && (len < n) )
 603         *buf = '\0';
 604
 605     return len;
 606 }
 607
 608 //
 609 // BASE64 encoding table
 610 //
 611 static const unsigned char utf7enb64[] =
 612 {
 613     'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
 614     'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
 615     'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
 616     'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
 617     'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
 618     'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
 619     'w', 'x', 'y', 'z', '0', '1', '2', '3',
 620     '4', '5', '6', '7', '8', '9', '+', '/'
 621 };
 622
 623 //
 624 // UTF-7 encoding table
 625 //
 626 // 0 - Set D (directly encoded characters)
 627 // 1 - Set O (optional direct characters)
 628 // 2 - whitespace characters (optional)
 629 // 3 - special characters
 630 //
 631 static const unsigned char utf7encode[128] =
 632 {
 633     3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
 634     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 635     2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
 636     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
 637     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 638     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
 639     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 640     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
 641 };
 642
 643 size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 644 {
 645     size_t len = 0;
 646
 647     while (*psz && ((!buf) || (len < n)))
 648     {
 649         wchar_t cc = *psz++;
 650         if (cc < 0x80 && utf7encode[cc] < 1)
 651         {
 652             // plain ASCII char
 653             if (buf)
 654                 *buf++ = (char)cc;
 655
 656             len++;
 657         }
 658 #ifndef WC_UTF16
 659         else if (((wxUint32)cc) > 0xffff)
 660         {
 661             // no surrogate pair generation (yet?)
 662             return wxCONV_FAILED;
 663         }
 664 #endif
 665         else
 666         {
 667             if (buf)
 668                 *buf++ = '+';
 669
 670             len++;
 671             if (cc != '+')
 672             {
 673                 // BASE64 encode string
 674                 unsigned int lsb, d, l;
 675                 for (d = 0, l = 0; /*nothing*/; psz++)
 676                 {
 677                     for (lsb = 0; lsb < 2; lsb ++)
 678                     {
 679                         d <<= 8;
 680                         d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
 681
 682                         for (l += 8; l >= 6; )
 683                         {
 684                             l -= 6;
 685                             if (buf)
 686                                 *buf++ = utf7enb64[(d >> l) % 64];
 687                             len++;
 688                         }
 689                     }
 690
 691                     cc = *psz;
 692                     if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
 693                         break;
 694                 }
 695
 696                 if (l != 0)
 697                 {
 698                     if (buf)
 699                         *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
 700
 701                     len++;
 702                 }
 703             }
 704
 705             if (buf)
 706                 *buf++ = '-';
 707             len++;
 708         }
 709     }
 710
 711     if (buf && (len < n))
 712         *buf = 0;
 713
 714     return len;
 715 }
 716
 717 // ----------------------------------------------------------------------------
 718 // UTF-8
 719 // ----------------------------------------------------------------------------
 720
 721 static wxUint32 utf8_max[]=
 722     { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
 723
 724 // boundaries of the private use area we use to (temporarily) remap invalid
 725 // characters invalid in a UTF-8 encoded string
 726 const wxUint32 wxUnicodePUA = 0x100000;
 727 const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
 728
 729 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 730 {
 731     size_t len = 0;
 732
 733     while (*psz && ((!buf) || (len < n)))
 734     {
 735         const char *opsz = psz;
 736         bool invalid = false;
 737         unsigned char cc = *psz++, fc = cc;
 738         unsigned cnt;
 739         for (cnt = 0; fc & 0x80; cnt++)
 740             fc <<= 1;
 741
 742         if (!cnt)
 743         {
 744             // plain ASCII char
 745             if (buf)
 746                 *buf++ = cc;
 747             len++;
 748
 749             // escape the escape character for octal escapes
 750             if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
 751                     && cc == '\\' && (!buf || len < n))
 752             {
 753                 if (buf)
 754                     *buf++ = cc;
 755                 len++;
 756             }
 757         }
 758         else
 759         {
 760             cnt--;
 761             if (!cnt)
 762             {
 763                 // invalid UTF-8 sequence
 764                 invalid = true;
 765             }
 766             else
 767             {
 768                 unsigned ocnt = cnt - 1;
 769                 wxUint32 res = cc & (0x3f >> cnt);
 770                 while (cnt--)
 771                 {
 772                     cc = *psz;
 773                     if ((cc & 0xC0) != 0x80)
 774                     {
 775                         // invalid UTF-8 sequence
 776                         invalid = true;
 777                         break;
 778                     }
 779
 780                     psz++;
 781                     res = (res << 6) | (cc & 0x3f);
 782                 }
 783
 784                 if (invalid || res <= utf8_max[ocnt])
 785                 {
 786                     // illegal UTF-8 encoding
 787                     invalid = true;
 788                 }
 789                 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
 790                         res >= wxUnicodePUA && res < wxUnicodePUAEnd)
 791                 {
 792                     // if one of our PUA characters turns up externally
 793                     // it must also be treated as an illegal sequence
 794                     // (a bit like you have to escape an escape character)
 795                     invalid = true;
 796                 }
 797                 else
 798                 {
 799 #ifdef WC_UTF16
 800                     // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 801                     size_t pa = encode_utf16(res, (wxUint16 *)buf);
 802                     if (pa == wxCONV_FAILED)
 803                     {
 804                         invalid = true;
 805                     }
 806                     else
 807                     {
 808                         if (buf)
 809                             buf += pa;
 810                         len += pa;
 811                     }
 812 #else // !WC_UTF16
 813                     if (buf)
 814                         *buf++ = (wchar_t)res;
 815                     len++;
 816 #endif // WC_UTF16/!WC_UTF16
 817                 }
 818             }
 819
 820             if (invalid)
 821             {
 822                 if (m_options & MAP_INVALID_UTF8_TO_PUA)
 823                 {
 824                     while (opsz < psz && (!buf || len < n))
 825                     {
 826 #ifdef WC_UTF16
 827                         // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 828                         size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
 829                         wxASSERT(pa != wxCONV_FAILED);
 830                         if (buf)
 831                             buf += pa;
 832                         opsz++;
 833                         len += pa;
 834 #else
 835                         if (buf)
 836                             *buf++ = (wchar_t)(wxUnicodePUA + (unsigned char)*opsz);
 837                         opsz++;
 838                         len++;
 839 #endif
 840                     }
 841                 }
 842                 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
 843                 {
 844                     while (opsz < psz && (!buf || len < n))
 845                     {
 846                         if ( buf && len + 3 < n )
 847                         {
 848                             unsigned char on = *opsz;
 849                             *buf++ = L'\\';
 850                             *buf++ = (wchar_t)( L'0' + on / 0100 );
 851                             *buf++ = (wchar_t)( L'0' + (on % 0100) / 010 );
 852                             *buf++ = (wchar_t)( L'0' + on % 010 );
 853                         }
 854
 855                         opsz++;
 856                         len += 4;
 857                     }
 858                 }
 859                 else // MAP_INVALID_UTF8_NOT
 860                 {
 861                     return wxCONV_FAILED;
 862                 }
 863             }
 864         }
 865     }
 866
 867     if (buf && (len < n))
 868         *buf = 0;
 869
 870     return len;
 871 }
 872
 873 static inline bool isoctal(wchar_t wch)
 874 {
 875     return L'0' <= wch && wch <= L'7';
 876 }
 877
 878 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 879 {
 880     size_t len = 0;
 881
 882     while (*psz && ((!buf) || (len < n)))
 883     {
 884         wxUint32 cc;
 885
 886 #ifdef WC_UTF16
 887         // cast is ok for WC_UTF16
 888         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
 889         psz += (pa == wxCONV_FAILED) ? 1 : pa;
 890 #else
 891         cc = (*psz++) & 0x7fffffff;
 892 #endif
 893
 894         if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
 895                 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
 896         {
 897             if (buf)
 898                 *buf++ = (char)(cc - wxUnicodePUA);
 899             len++;
 900         }
 901         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
 902                     && cc == L'\\' && psz[0] == L'\\' )
 903         {
 904             if (buf)
 905                 *buf++ = (char)cc;
 906             psz++;
 907             len++;
 908         }
 909         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
 910                     cc == L'\\' &&
 911                         isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
 912         {
 913             if (buf)
 914             {
 915                 *buf++ = (char) ((psz[0] - L'0') * 0100 +
 916                                  (psz[1] - L'0') * 010 +
 917                                  (psz[2] - L'0'));
 918             }
 919
 920             psz += 3;
 921             len++;
 922         }
 923         else
 924         {
 925             unsigned cnt;
 926             for (cnt = 0; cc > utf8_max[cnt]; cnt++)
 927             {
 928             }
 929
 930             if (!cnt)
 931             {
 932                 // plain ASCII char
 933                 if (buf)
 934                     *buf++ = (char) cc;
 935                 len++;
 936             }
 937             else
 938             {
 939                 len += cnt + 1;
 940                 if (buf)
 941                 {
 942                     *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
 943                     while (cnt--)
 944                         *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
 945                 }
 946             }
 947         }
 948     }
 949
 950     if (buf && (len < n))
 951         *buf = 0;
 952
 953     return len;
 954 }
 955
 956 // ============================================================================
 957 // UTF-16
 958 // ============================================================================
 959
 960 #ifdef WORDS_BIGENDIAN
 961     #define wxMBConvUTF16straight wxMBConvUTF16BE
 962     #define wxMBConvUTF16swap     wxMBConvUTF16LE
 963 #else
 964     #define wxMBConvUTF16swap     wxMBConvUTF16BE
 965     #define wxMBConvUTF16straight wxMBConvUTF16LE
 966 #endif
 967
 968 /* static */
 969 size_t wxMBConvUTF16Base::GetLength(const char *src, size_t srcLen)
 970 {
 971     if ( srcLen == wxNO_LEN )
 972     {
 973         // count the number of bytes in input, including the trailing NULs
 974         const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
 975         for ( srcLen = 1; *inBuff++; srcLen++ )
 976             ;
 977
 978         srcLen *= BYTES_PER_CHAR;
 979     }
 980     else // we already have the length
 981     {
 982         // we can only convert an entire number of UTF-16 characters
 983         if ( srcLen % BYTES_PER_CHAR )
 984             return wxCONV_FAILED;
 985     }
 986
 987     return srcLen;
 988 }
 989
 990 // case when in-memory representation is UTF-16 too
 991 #ifdef WC_UTF16
 992
 993 // ----------------------------------------------------------------------------
 994 // conversions without endianness change
 995 // ----------------------------------------------------------------------------
 996
 997 size_t
 998 wxMBConvUTF16straight::ToWChar(wchar_t *dst, size_t dstLen,
 999                                const char *src, size_t srcLen) const
1000 {
1001     // set up the scene for using memcpy() (which is presumably more efficient
1002     // than copying the bytes one by one)
1003     srcLen = GetLength(src, srcLen);
1004     if ( srcLen == wxNO_LEN )
1005         return wxCONV_FAILED;
1006
1007     const size_t inLen = srcLen / BYTES_PER_CHAR;
1008     if ( dst )
1009     {
1010         if ( dstLen < inLen )
1011             return wxCONV_FAILED;
1012
1013         memcpy(dst, src, srcLen);
1014     }
1015
1016     return inLen;
1017 }
1018
1019 size_t
1020 wxMBConvUTF16straight::FromWChar(char *dst, size_t dstLen,
1021                                  const wchar_t *src, size_t srcLen) const
1022 {
1023     if ( srcLen == wxNO_LEN )
1024         srcLen = wxWcslen(src) + 1;
1025
1026     srcLen *= BYTES_PER_CHAR;
1027
1028     if ( dst )
1029     {
1030         if ( dstLen < srcLen )
1031             return wxCONV_FAILED;
1032
1033         memcpy(dst, src, srcLen);
1034     }
1035
1036     return srcLen;
1037 }
1038
1039 // ----------------------------------------------------------------------------
1040 // endian-reversing conversions
1041 // ----------------------------------------------------------------------------
1042
1043 size_t
1044 wxMBConvUTF16swap::ToWChar(wchar_t *dst, size_t dstLen,
1045                            const char *src, size_t srcLen) const
1046 {
1047     srcLen = GetLength(src, srcLen);
1048     if ( srcLen == wxNO_LEN )
1049         return wxCONV_FAILED;
1050
1051     srcLen /= BYTES_PER_CHAR;
1052
1053     if ( dst )
1054     {
1055         if ( dstLen < srcLen )
1056             return wxCONV_FAILED;
1057
1058         const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
1059         for ( size_t n = 0; n < srcLen; n++, inBuff++ )
1060         {
1061             *dst++ = wxUINT16_SWAP_ALWAYS(*inBuff);
1062         }
1063     }
1064
1065     return srcLen;
1066 }
1067
1068 size_t
1069 wxMBConvUTF16swap::FromWChar(char *dst, size_t dstLen,
1070                              const wchar_t *src, size_t srcLen) const
1071 {
1072     if ( srcLen == wxNO_LEN )
1073         srcLen = wxWcslen(src) + 1;
1074
1075     srcLen *= BYTES_PER_CHAR;
1076
1077     if ( dst )
1078     {
1079         if ( dstLen < srcLen )
1080             return wxCONV_FAILED;
1081
1082         wxUint16 *outBuff = wx_reinterpret_cast(wxUint16 *, dst);
1083         for ( size_t n = 0; n < srcLen; n += BYTES_PER_CHAR, src++ )
1084         {
1085             *outBuff++ = wxUINT16_SWAP_ALWAYS(*src);
1086         }
1087     }
1088
1089     return srcLen;
1090 }
1091
1092 #else // !WC_UTF16: wchar_t is UTF-32
1093
1094 // ----------------------------------------------------------------------------
1095 // conversions without endianness change
1096 // ----------------------------------------------------------------------------
1097
1098 size_t
1099 wxMBConvUTF16straight::ToWChar(wchar_t *dst, size_t dstLen,
1100                                const char *src, size_t srcLen) const
1101 {
1102     srcLen = GetLength(src, srcLen);
1103     if ( srcLen == wxNO_LEN )
1104         return wxCONV_FAILED;
1105
1106     const size_t inLen = srcLen / BYTES_PER_CHAR;
1107     if ( !dst )
1108     {
1109         // optimization: return maximal space which could be needed for this
1110         // string even if the real size could be smaller if the buffer contains
1111         // any surrogates
1112         return inLen;
1113     }
1114
1115     size_t outLen = 0;
1116     const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
1117     for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; )
1118     {
1119         const wxUint32 ch = wxDecodeSurrogate(&inBuff);
1120         if ( !inBuff )
1121             return wxCONV_FAILED;
1122
1123         if ( ++outLen > dstLen )
1124             return wxCONV_FAILED;
1125
1126         *dst++ = ch;
1127     }
1128
1129
1130     return outLen;
1131 }
1132
1133 size_t
1134 wxMBConvUTF16straight::FromWChar(char *dst, size_t dstLen,
1135                                  const wchar_t *src, size_t srcLen) const
1136 {
1137     if ( srcLen == wxNO_LEN )
1138         srcLen = wxWcslen(src) + 1;
1139
1140     size_t outLen = 0;
1141     wxUint16 *outBuff = wx_reinterpret_cast(wxUint16 *, dst);
1142     for ( size_t n = 0; n < srcLen; n++ )
1143     {
1144         wxUint16 cc[2];
1145         const size_t numChars = encode_utf16(*src++, cc);
1146         if ( numChars == wxCONV_FAILED )
1147             return wxCONV_FAILED;
1148
1149         outLen += numChars * BYTES_PER_CHAR;
1150         if ( outBuff )
1151         {
1152             if ( outLen > dstLen )
1153                 return wxCONV_FAILED;
1154
1155             *outBuff++ = cc[0];
1156             if ( numChars == 2 )
1157             {
1158                 // second character of a surrogate
1159                 *outBuff++ = cc[1];
1160             }
1161         }
1162     }
1163
1164     return outLen;
1165 }
1166
1167 // ----------------------------------------------------------------------------
1168 // endian-reversing conversions
1169 // ----------------------------------------------------------------------------
1170
1171 size_t
1172 wxMBConvUTF16swap::ToWChar(wchar_t *dst, size_t dstLen,
1173                            const char *src, size_t srcLen) const
1174 {
1175     srcLen = GetLength(src, srcLen);
1176     if ( srcLen == wxNO_LEN )
1177         return wxCONV_FAILED;
1178
1179     const size_t inLen = srcLen / BYTES_PER_CHAR;
1180     if ( !dst )
1181     {
1182         // optimization: return maximal space which could be needed for this
1183         // string even if the real size could be smaller if the buffer contains
1184         // any surrogates
1185         return inLen;
1186     }
1187
1188     size_t outLen = 0;
1189     const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
1190     for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; )
1191     {
1192         wxUint32 ch;
1193         wxUint16 tmp[2];
1194
1195         tmp[0] = wxUINT16_SWAP_ALWAYS(*inBuff);
1196         inBuff++;
1197         tmp[1] = wxUINT16_SWAP_ALWAYS(*inBuff);
1198
1199         const size_t numChars = decode_utf16(tmp, ch);
1200         if ( numChars == wxCONV_FAILED )
1201             return wxCONV_FAILED;
1202
1203         if ( numChars == 2 )
1204             inBuff++;
1205
1206         if ( ++outLen > dstLen )
1207             return wxCONV_FAILED;
1208
1209         *dst++ = ch;
1210     }
1211
1212
1213     return outLen;
1214 }
1215
1216 size_t
1217 wxMBConvUTF16swap::FromWChar(char *dst, size_t dstLen,
1218                              const wchar_t *src, size_t srcLen) const
1219 {
1220     if ( srcLen == wxNO_LEN )
1221         srcLen = wxWcslen(src) + 1;
1222
1223     size_t outLen = 0;
1224     wxUint16 *outBuff = wx_reinterpret_cast(wxUint16 *, dst);
1225     for ( const wchar_t *srcEnd = src + srcLen; src < srcEnd; src++ )
1226     {
1227         wxUint16 cc[2];
1228         const size_t numChars = encode_utf16(*src, cc);
1229         if ( numChars == wxCONV_FAILED )
1230             return wxCONV_FAILED;
1231
1232         outLen += numChars * BYTES_PER_CHAR;
1233         if ( outBuff )
1234         {
1235             if ( outLen > dstLen )
1236                 return wxCONV_FAILED;
1237
1238             *outBuff++ = wxUINT16_SWAP_ALWAYS(cc[0]);
1239             if ( numChars == 2 )
1240             {
1241                 // second character of a surrogate
1242                 *outBuff++ = wxUINT16_SWAP_ALWAYS(cc[1]);
1243             }
1244         }
1245     }
1246
1247     return outLen;
1248 }
1249
1250 #endif // WC_UTF16/!WC_UTF16
1251
1252
1253 // ============================================================================
1254 // UTF-32
1255 // ============================================================================
1256
1257 #ifdef WORDS_BIGENDIAN
1258     #define wxMBConvUTF32straight  wxMBConvUTF32BE
1259     #define wxMBConvUTF32swap      wxMBConvUTF32LE
1260 #else
1261     #define wxMBConvUTF32swap      wxMBConvUTF32BE
1262     #define wxMBConvUTF32straight  wxMBConvUTF32LE
1263 #endif
1264
1265
1266 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1267 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1268
1269 /* static */
1270 size_t wxMBConvUTF32Base::GetLength(const char *src, size_t srcLen)
1271 {
1272     if ( srcLen == wxNO_LEN )
1273     {
1274         // count the number of bytes in input, including the trailing NULs
1275         const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1276         for ( srcLen = 1; *inBuff++; srcLen++ )
1277             ;
1278
1279         srcLen *= BYTES_PER_CHAR;
1280     }
1281     else // we already have the length
1282     {
1283         // we can only convert an entire number of UTF-32 characters
1284         if ( srcLen % BYTES_PER_CHAR )
1285             return wxCONV_FAILED;
1286     }
1287
1288     return srcLen;
1289 }
1290
1291 // case when in-memory representation is UTF-16
1292 #ifdef WC_UTF16
1293
1294 // ----------------------------------------------------------------------------
1295 // conversions without endianness change
1296 // ----------------------------------------------------------------------------
1297
1298 size_t
1299 wxMBConvUTF32straight::ToWChar(wchar_t *dst, size_t dstLen,
1300                                const char *src, size_t srcLen) const
1301 {
1302     srcLen = GetLength(src, srcLen);
1303     if ( srcLen == wxNO_LEN )
1304         return wxCONV_FAILED;
1305
1306     const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1307     const size_t inLen = srcLen / BYTES_PER_CHAR;
1308     size_t outLen = 0;
1309     for ( size_t n = 0; n < inLen; n++ )
1310     {
1311         wxUint16 cc[2];
1312         const size_t numChars = encode_utf16(*inBuff++, cc);
1313         if ( numChars == wxCONV_FAILED )
1314             return wxCONV_FAILED;
1315
1316         outLen += numChars;
1317         if ( dst )
1318         {
1319             if ( outLen > dstLen )
1320                 return wxCONV_FAILED;
1321
1322             *dst++ = cc[0];
1323             if ( numChars == 2 )
1324             {
1325                 // second character of a surrogate
1326                 *dst++ = cc[1];
1327             }
1328         }
1329     }
1330
1331     return outLen;
1332 }
1333
1334 size_t
1335 wxMBConvUTF32straight::FromWChar(char *dst, size_t dstLen,
1336                                  const wchar_t *src, size_t srcLen) const
1337 {
1338     if ( srcLen == wxNO_LEN )
1339         srcLen = wxWcslen(src) + 1;
1340
1341     if ( !dst )
1342     {
1343         // optimization: return maximal space which could be needed for this
1344         // string instead of the exact amount which could be less if there are
1345         // any surrogates in the input
1346         //
1347         // we consider that surrogates are rare enough to make it worthwhile to
1348         // avoid running the loop below at the cost of slightly extra memory
1349         // consumption
1350         return srcLen * BYTES_PER_CHAR;
1351     }
1352
1353     wxUint32 *outBuff = wx_reinterpret_cast(wxUint32 *, dst);
1354     size_t outLen = 0;
1355     for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; )
1356     {
1357         const wxUint32 ch = wxDecodeSurrogate(&src);
1358         if ( !src )
1359             return wxCONV_FAILED;
1360
1361         outLen += BYTES_PER_CHAR;
1362
1363         if ( outLen > dstLen )
1364             return wxCONV_FAILED;
1365
1366         *outBuff++ = ch;
1367     }
1368
1369     return outLen;
1370 }
1371
1372 // ----------------------------------------------------------------------------
1373 // endian-reversing conversions
1374 // ----------------------------------------------------------------------------
1375
1376 size_t
1377 wxMBConvUTF32swap::ToWChar(wchar_t *dst, size_t dstLen,
1378                            const char *src, size_t srcLen) const
1379 {
1380     srcLen = GetLength(src, srcLen);
1381     if ( srcLen == wxNO_LEN )
1382         return wxCONV_FAILED;
1383
1384     const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1385     const size_t inLen = srcLen / BYTES_PER_CHAR;
1386     size_t outLen = 0;
1387     for ( size_t n = 0; n < inLen; n++, inBuff++ )
1388     {
1389         wxUint16 cc[2];
1390         const size_t numChars = encode_utf16(wxUINT32_SWAP_ALWAYS(*inBuff), cc);
1391         if ( numChars == wxCONV_FAILED )
1392             return wxCONV_FAILED;
1393
1394         outLen += numChars;
1395         if ( dst )
1396         {
1397             if ( outLen > dstLen )
1398                 return wxCONV_FAILED;
1399
1400             *dst++ = cc[0];
1401             if ( numChars == 2 )
1402             {
1403                 // second character of a surrogate
1404                 *dst++ = cc[1];
1405             }
1406         }
1407     }
1408
1409     return outLen;
1410 }
1411
1412 size_t
1413 wxMBConvUTF32swap::FromWChar(char *dst, size_t dstLen,
1414                              const wchar_t *src, size_t srcLen) const
1415 {
1416     if ( srcLen == wxNO_LEN )
1417         srcLen = wxWcslen(src) + 1;
1418
1419     if ( !dst )
1420     {
1421         // optimization: return maximal space which could be needed for this
1422         // string instead of the exact amount which could be less if there are
1423         // any surrogates in the input
1424         //
1425         // we consider that surrogates are rare enough to make it worthwhile to
1426         // avoid running the loop below at the cost of slightly extra memory
1427         // consumption
1428         return srcLen*BYTES_PER_CHAR;
1429     }
1430
1431     wxUint32 *outBuff = wx_reinterpret_cast(wxUint32 *, dst);
1432     size_t outLen = 0;
1433     for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; )
1434     {
1435         const wxUint32 ch = wxDecodeSurrogate(&src);
1436         if ( !src )
1437             return wxCONV_FAILED;
1438
1439         outLen += BYTES_PER_CHAR;
1440
1441         if ( outLen > dstLen )
1442             return wxCONV_FAILED;
1443
1444         *outBuff++ = wxUINT32_SWAP_ALWAYS(ch);
1445     }
1446
1447     return outLen;
1448 }
1449
1450 #else // !WC_UTF16: wchar_t is UTF-32
1451
1452 // ----------------------------------------------------------------------------
1453 // conversions without endianness change
1454 // ----------------------------------------------------------------------------
1455
1456 size_t
1457 wxMBConvUTF32straight::ToWChar(wchar_t *dst, size_t dstLen,
1458                                const char *src, size_t srcLen) const
1459 {
1460     // use memcpy() as it should be much faster than hand-written loop
1461     srcLen = GetLength(src, srcLen);
1462     if ( srcLen == wxNO_LEN )
1463         return wxCONV_FAILED;
1464
1465     const size_t inLen = srcLen/BYTES_PER_CHAR;
1466     if ( dst )
1467     {
1468         if ( dstLen < inLen )
1469             return wxCONV_FAILED;
1470
1471         memcpy(dst, src, srcLen);
1472     }
1473
1474     return inLen;
1475 }
1476
1477 size_t
1478 wxMBConvUTF32straight::FromWChar(char *dst, size_t dstLen,
1479                                  const wchar_t *src, size_t srcLen) const
1480 {
1481     if ( srcLen == wxNO_LEN )
1482         srcLen = wxWcslen(src) + 1;
1483
1484     srcLen *= BYTES_PER_CHAR;
1485
1486     if ( dst )
1487     {
1488         if ( dstLen < srcLen )
1489             return wxCONV_FAILED;
1490
1491         memcpy(dst, src, srcLen);
1492     }
1493
1494     return srcLen;
1495 }
1496
1497 // ----------------------------------------------------------------------------
1498 // endian-reversing conversions
1499 // ----------------------------------------------------------------------------
1500
1501 size_t
1502 wxMBConvUTF32swap::ToWChar(wchar_t *dst, size_t dstLen,
1503                            const char *src, size_t srcLen) const
1504 {
1505     srcLen = GetLength(src, srcLen);
1506     if ( srcLen == wxNO_LEN )
1507         return wxCONV_FAILED;
1508
1509     srcLen /= BYTES_PER_CHAR;
1510
1511     if ( dst )
1512     {
1513         if ( dstLen < srcLen )
1514             return wxCONV_FAILED;
1515
1516         const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1517         for ( size_t n = 0; n < srcLen; n++, inBuff++ )
1518         {
1519             *dst++ = wxUINT32_SWAP_ALWAYS(*inBuff);
1520         }
1521     }
1522
1523     return srcLen;
1524 }
1525
1526 size_t
1527 wxMBConvUTF32swap::FromWChar(char *dst, size_t dstLen,
1528                              const wchar_t *src, size_t srcLen) const
1529 {
1530     if ( srcLen == wxNO_LEN )
1531         srcLen = wxWcslen(src) + 1;
1532
1533     srcLen *= BYTES_PER_CHAR;
1534
1535     if ( dst )
1536     {
1537         if ( dstLen < srcLen )
1538             return wxCONV_FAILED;
1539
1540         wxUint32 *outBuff = wx_reinterpret_cast(wxUint32 *, dst);
1541         for ( size_t n = 0; n < srcLen; n += BYTES_PER_CHAR, src++ )
1542         {
1543             *outBuff++ = wxUINT32_SWAP_ALWAYS(*src);
1544         }
1545     }
1546
1547     return srcLen;
1548 }
1549
1550 #endif // WC_UTF16/!WC_UTF16
1551
1552
1553 // ============================================================================
1554 // The classes doing conversion using the iconv_xxx() functions
1555 // ============================================================================
1556
1557 #ifdef HAVE_ICONV
1558
1559 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1560 //     E2BIG if output buffer is _exactly_ as big as needed. Such case is
1561 //     (unless there's yet another bug in glibc) the only case when iconv()
1562 //     returns with (size_t)-1 (which means error) and says there are 0 bytes
1563 //     left in the input buffer -- when _real_ error occurs,
1564 //     bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1565 //     iconv() failure.
1566 //     [This bug does not appear in glibc 2.2.]
1567 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1568 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1569                                      (errno != E2BIG || bufLeft != 0))
1570 #else
1571 #define ICONV_FAILED(cres, bufLeft)  (cres == (size_t)-1)
1572 #endif
1573
1574 #define ICONV_CHAR_CAST(x)  ((ICONV_CONST char **)(x))
1575
1576 #define ICONV_T_INVALID ((iconv_t)-1)
1577
1578 #if SIZEOF_WCHAR_T == 4
1579     #define WC_BSWAP    wxUINT32_SWAP_ALWAYS
1580     #define WC_ENC      wxFONTENCODING_UTF32
1581 #elif SIZEOF_WCHAR_T == 2
1582     #define WC_BSWAP    wxUINT16_SWAP_ALWAYS
1583     #define WC_ENC      wxFONTENCODING_UTF16
1584 #else // sizeof(wchar_t) != 2 nor 4
1585     // does this ever happen?
1586     #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1587 #endif
1588
1589 // ----------------------------------------------------------------------------
1590 // wxMBConv_iconv: encapsulates an iconv character set
1591 // ----------------------------------------------------------------------------
1592
1593 class wxMBConv_iconv : public wxMBConv
1594 {
1595 public:
1596     wxMBConv_iconv(const char *name);
1597     virtual ~wxMBConv_iconv();
1598
1599     virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1600     virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
1601
1602     // classify this encoding as explained in wxMBConv::GetMBNulLen() comment
1603     virtual size_t GetMBNulLen() const;
1604
1605 #if wxUSE_UNICODE_UTF8
1606     virtual bool IsUTF8() const;
1607 #endif
1608
1609     virtual wxMBConv *Clone() const
1610     {
1611         wxMBConv_iconv *p = new wxMBConv_iconv(m_name.ToAscii());
1612         p->m_minMBCharWidth = m_minMBCharWidth;
1613         return p;
1614     }
1615
1616     bool IsOk() const
1617         { return (m2w != ICONV_T_INVALID) && (w2m != ICONV_T_INVALID); }
1618
1619 protected:
1620     // the iconv handlers used to translate from multibyte
1621     // to wide char and in the other direction
1622     iconv_t m2w,
1623             w2m;
1624
1625 #if wxUSE_THREADS
1626     // guards access to m2w and w2m objects
1627     wxMutex m_iconvMutex;
1628 #endif
1629
1630 private:
1631     // the name (for iconv_open()) of a wide char charset -- if none is
1632     // available on this machine, it will remain NULL
1633     static wxString ms_wcCharsetName;
1634
1635     // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1636     // different endian-ness than the native one
1637     static bool ms_wcNeedsSwap;
1638
1639
1640     // name of the encoding handled by this conversion
1641     wxString m_name;
1642
1643     // cached result of GetMBNulLen(); set to 0 meaning "unknown"
1644     // initially
1645     size_t m_minMBCharWidth;
1646 };
1647
1648 // make the constructor available for unit testing
1649 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const char* name )
1650 {
1651     wxMBConv_iconv* result = new wxMBConv_iconv( name );
1652     if ( !result->IsOk() )
1653     {
1654         delete result;
1655         return 0;
1656     }
1657
1658     return result;
1659 }
1660
1661 wxString wxMBConv_iconv::ms_wcCharsetName;
1662 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
1663
1664 wxMBConv_iconv::wxMBConv_iconv(const char *name)
1665               : m_name(name)
1666 {
1667     m_minMBCharWidth = 0;
1668
1669     // check for charset that represents wchar_t:
1670     if ( ms_wcCharsetName.empty() )
1671     {
1672         wxLogTrace(TRACE_STRCONV, _T("Looking for wide char codeset:"));
1673
1674 #if wxUSE_FONTMAP
1675         const wxChar **names = wxFontMapperBase::GetAllEncodingNames(WC_ENC);
1676 #else // !wxUSE_FONTMAP
1677         static const wxChar *names_static[] =
1678         {
1679 #if SIZEOF_WCHAR_T == 4
1680             _T("UCS-4"),
1681 #elif SIZEOF_WCHAR_T = 2
1682             _T("UCS-2"),
1683 #endif
1684             NULL
1685         };
1686         const wxChar **names = names_static;
1687 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1688
1689         for ( ; *names && ms_wcCharsetName.empty(); ++names )
1690         {
1691             const wxString nameCS(*names);
1692
1693             // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
1694             wxString nameXE(nameCS);
1695
1696 #ifdef WORDS_BIGENDIAN
1697                 nameXE += _T("BE");
1698 #else // little endian
1699                 nameXE += _T("LE");
1700 #endif
1701
1702             wxLogTrace(TRACE_STRCONV, _T("  trying charset \"%s\""),
1703                        nameXE.c_str());
1704
1705             m2w = iconv_open(nameXE.ToAscii(), name);
1706             if ( m2w == ICONV_T_INVALID )
1707             {
1708                 // try charset w/o bytesex info (e.g. "UCS4")
1709                 wxLogTrace(TRACE_STRCONV, _T("  trying charset \"%s\""),
1710                            nameCS.c_str());
1711                 m2w = iconv_open(nameCS.ToAscii(), name);
1712
1713                 // and check for bytesex ourselves:
1714                 if ( m2w != ICONV_T_INVALID )
1715                 {
1716                     char    buf[2], *bufPtr;
1717                     wchar_t wbuf[2], *wbufPtr;
1718                     size_t  insz, outsz;
1719                     size_t  res;
1720
1721                     buf[0] = 'A';
1722                     buf[1] = 0;
1723                     wbuf[0] = 0;
1724                     insz = 2;
1725                     outsz = SIZEOF_WCHAR_T * 2;
1726                     wbufPtr = wbuf;
1727                     bufPtr = buf;
1728
1729                     res = iconv(
1730                         m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1731                         (char**)&wbufPtr, &outsz);
1732
1733                     if (ICONV_FAILED(res, insz))
1734                     {
1735                         wxLogLastError(wxT("iconv"));
1736                         wxLogError(_("Conversion to charset '%s' doesn't work."),
1737                                    nameCS.c_str());
1738                     }
1739                     else // ok, can convert to this encoding, remember it
1740                     {
1741                         ms_wcCharsetName = nameCS;
1742                         ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1743                     }
1744                 }
1745             }
1746             else // use charset not requiring byte swapping
1747             {
1748                 ms_wcCharsetName = nameXE;
1749             }
1750         }
1751
1752         wxLogTrace(TRACE_STRCONV,
1753                    wxT("iconv wchar_t charset is \"%s\"%s"),
1754                    ms_wcCharsetName.empty() ? wxString("<none>")
1755                                             : ms_wcCharsetName,
1756                    ms_wcNeedsSwap ? _T(" (needs swap)")
1757                                   : _T(""));
1758     }
1759     else // we already have ms_wcCharsetName
1760     {
1761         m2w = iconv_open(ms_wcCharsetName.ToAscii(), name);
1762     }
1763
1764     if ( ms_wcCharsetName.empty() )
1765     {
1766         w2m = ICONV_T_INVALID;
1767     }
1768     else
1769     {
1770         w2m = iconv_open(name, ms_wcCharsetName.ToAscii());
1771         if ( w2m == ICONV_T_INVALID )
1772         {
1773             wxLogTrace(TRACE_STRCONV,
1774                        wxT("\"%s\" -> \"%s\" works but not the converse!?"),
1775                        ms_wcCharsetName.c_str(), name);
1776         }
1777     }
1778 }
1779
1780 wxMBConv_iconv::~wxMBConv_iconv()
1781 {
1782     if ( m2w != ICONV_T_INVALID )
1783         iconv_close(m2w);
1784     if ( w2m != ICONV_T_INVALID )
1785         iconv_close(w2m);
1786 }
1787
1788 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1789 {
1790     // find the string length: notice that must be done differently for
1791     // NUL-terminated strings and UTF-16/32 which are terminated with 2/4 NULs
1792     size_t inbuf;
1793     const size_t nulLen = GetMBNulLen();
1794     switch ( nulLen )
1795     {
1796         default:
1797             return wxCONV_FAILED;
1798
1799         case 1:
1800             inbuf = strlen(psz); // arguably more optimized than our version
1801             break;
1802
1803         case 2:
1804         case 4:
1805             // for UTF-16/32 not only we need to have 2/4 consecutive NULs but
1806             // they also have to start at character boundary and not span two
1807             // adjacent characters
1808             const char *p;
1809             for ( p = psz; NotAllNULs(p, nulLen); p += nulLen )
1810                 ;
1811             inbuf = p - psz;
1812             break;
1813     }
1814
1815 #if wxUSE_THREADS
1816     // NB: iconv() is MT-safe, but each thread must use its own iconv_t handle.
1817     //     Unfortunately there are a couple of global wxCSConv objects such as
1818     //     wxConvLocal that are used all over wx code, so we have to make sure
1819     //     the handle is used by at most one thread at the time. Otherwise
1820     //     only a few wx classes would be safe to use from non-main threads
1821     //     as MB<->WC conversion would fail "randomly".
1822     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1823 #endif // wxUSE_THREADS
1824
1825     size_t outbuf = n * SIZEOF_WCHAR_T;
1826     size_t res, cres;
1827     // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1828     wchar_t *bufPtr = buf;
1829     const char *pszPtr = psz;
1830
1831     if (buf)
1832     {
1833         // have destination buffer, convert there
1834         cres = iconv(m2w,
1835                      ICONV_CHAR_CAST(&pszPtr), &inbuf,
1836                      (char**)&bufPtr, &outbuf);
1837         res = n - (outbuf / SIZEOF_WCHAR_T);
1838
1839         if (ms_wcNeedsSwap)
1840         {
1841             // convert to native endianness
1842             for ( unsigned i = 0; i < res; i++ )
1843                 buf[n] = WC_BSWAP(buf[i]);
1844         }
1845
1846         // NUL-terminate the string if there is any space left
1847         if (res < n)
1848             buf[res] = 0;
1849     }
1850     else
1851     {
1852         // no destination buffer... convert using temp buffer
1853         // to calculate destination buffer requirement
1854         wchar_t tbuf[8];
1855         res = 0;
1856
1857         do
1858         {
1859             bufPtr = tbuf;
1860             outbuf = 8 * SIZEOF_WCHAR_T;
1861
1862             cres = iconv(m2w,
1863                          ICONV_CHAR_CAST(&pszPtr), &inbuf,
1864                          (char**)&bufPtr, &outbuf );
1865
1866             res += 8 - (outbuf / SIZEOF_WCHAR_T);
1867         }
1868         while ((cres == (size_t)-1) && (errno == E2BIG));
1869     }
1870
1871     if (ICONV_FAILED(cres, inbuf))
1872     {
1873         //VS: it is ok if iconv fails, hence trace only
1874         wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1875         return wxCONV_FAILED;
1876     }
1877
1878     return res;
1879 }
1880
1881 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1882 {
1883 #if wxUSE_THREADS
1884     // NB: explained in MB2WC
1885     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1886 #endif
1887
1888     size_t inlen = wxWcslen(psz);
1889     size_t inbuf = inlen * SIZEOF_WCHAR_T;
1890     size_t outbuf = n;
1891     size_t res, cres;
1892
1893     wchar_t *tmpbuf = 0;
1894
1895     if (ms_wcNeedsSwap)
1896     {
1897         // need to copy to temp buffer to switch endianness
1898         // (doing WC_BSWAP twice on the original buffer won't help, as it
1899         //  could be in read-only memory, or be accessed in some other thread)
1900         tmpbuf = (wchar_t *)malloc(inbuf + SIZEOF_WCHAR_T);
1901         for ( size_t i = 0; i < inlen; i++ )
1902             tmpbuf[n] = WC_BSWAP(psz[i]);
1903
1904         tmpbuf[inlen] = L'\0';
1905         psz = tmpbuf;
1906     }
1907
1908     if (buf)
1909     {
1910         // have destination buffer, convert there
1911         cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1912
1913         res = n - outbuf;
1914
1915         // NB: iconv was given only wcslen(psz) characters on input, and so
1916         //     it couldn't convert the trailing zero. Let's do it ourselves
1917         //     if there's some room left for it in the output buffer.
1918         if (res < n)
1919             buf[0] = 0;
1920     }
1921     else
1922     {
1923         // no destination buffer: convert using temp buffer
1924         // to calculate destination buffer requirement
1925         char tbuf[16];
1926         res = 0;
1927         do
1928         {
1929             buf = tbuf;
1930             outbuf = 16;
1931
1932             cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1933
1934             res += 16 - outbuf;
1935         }
1936         while ((cres == (size_t)-1) && (errno == E2BIG));
1937     }
1938
1939     if (ms_wcNeedsSwap)
1940     {
1941         free(tmpbuf);
1942     }
1943
1944     if (ICONV_FAILED(cres, inbuf))
1945     {
1946         wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1947         return wxCONV_FAILED;
1948     }
1949
1950     return res;
1951 }
1952
1953 size_t wxMBConv_iconv::GetMBNulLen() const
1954 {
1955     if ( m_minMBCharWidth == 0 )
1956     {
1957         wxMBConv_iconv * const self = wxConstCast(this, wxMBConv_iconv);
1958
1959 #if wxUSE_THREADS
1960         // NB: explained in MB2WC
1961         wxMutexLocker lock(self->m_iconvMutex);
1962 #endif
1963
1964         const wchar_t *wnul = L"";
1965         char buf[8]; // should be enough for NUL in any encoding
1966         size_t inLen = sizeof(wchar_t),
1967                outLen = WXSIZEOF(buf);
1968         char *inBuff = (char *)wnul;
1969         char *outBuff = buf;
1970         if ( iconv(w2m, ICONV_CHAR_CAST(&inBuff), &inLen, &outBuff, &outLen) == (size_t)-1 )
1971         {
1972             self->m_minMBCharWidth = (size_t)-1;
1973         }
1974         else // ok
1975         {
1976             self->m_minMBCharWidth = outBuff - buf;
1977         }
1978     }
1979
1980     return m_minMBCharWidth;
1981 }
1982
1983 #if wxUSE_UNICODE_UTF8
1984 bool wxMBConv_iconv::IsUTF8() const
1985 {
1986     return wxStricmp(m_name, "UTF-8") == 0 ||
1987            wxStricmp(m_name, "UTF8") == 0;
1988 }
1989 #endif
1990
1991 #endif // HAVE_ICONV
1992
1993
1994 // ============================================================================
1995 // Win32 conversion classes
1996 // ============================================================================
1997
1998 #ifdef wxHAVE_WIN32_MB2WC
1999
2000 // from utils.cpp
2001 #if wxUSE_FONTMAP
2002 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const char *charset);
2003 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
2004 #endif
2005
2006 class wxMBConv_win32 : public wxMBConv
2007 {
2008 public:
2009     wxMBConv_win32()
2010     {
2011         m_CodePage = CP_ACP;
2012         m_minMBCharWidth = 0;
2013     }
2014
2015     wxMBConv_win32(const wxMBConv_win32& conv)
2016         : wxMBConv()
2017     {
2018         m_CodePage = conv.m_CodePage;
2019         m_minMBCharWidth = conv.m_minMBCharWidth;
2020     }
2021
2022 #if wxUSE_FONTMAP
2023     wxMBConv_win32(const char* name)
2024     {
2025         m_CodePage = wxCharsetToCodepage(name);
2026         m_minMBCharWidth = 0;
2027     }
2028
2029     wxMBConv_win32(wxFontEncoding encoding)
2030     {
2031         m_CodePage = wxEncodingToCodepage(encoding);
2032         m_minMBCharWidth = 0;
2033     }
2034 #endif // wxUSE_FONTMAP
2035
2036     virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2037     {
2038         // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
2039         // the behaviour is not compatible with the Unix version (using iconv)
2040         // and break the library itself, e.g. wxTextInputStream::NextChar()
2041         // wouldn't work if reading an incomplete MB char didn't result in an
2042         // error
2043         //
2044         // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or
2045         // Win XP or newer and it is not supported for UTF-[78] so we always
2046         // use our own conversions in this case. See
2047         //     http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx
2048         //     http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp
2049         if ( m_CodePage == CP_UTF8 )
2050         {
2051             return wxMBConvUTF8().MB2WC(buf, psz, n);
2052         }
2053
2054         if ( m_CodePage == CP_UTF7 )
2055         {
2056             return wxMBConvUTF7().MB2WC(buf, psz, n);
2057         }
2058
2059         int flags = 0;
2060         if ( (m_CodePage < 50000 && m_CodePage != CP_SYMBOL) &&
2061                 IsAtLeastWin2kSP4() )
2062         {
2063             flags = MB_ERR_INVALID_CHARS;
2064         }
2065
2066         const size_t len = ::MultiByteToWideChar
2067                              (
2068                                 m_CodePage,     // code page
2069                                 flags,          // flags: fall on error
2070                                 psz,            // input string
2071                                 -1,             // its length (NUL-terminated)
2072                                 buf,            // output string
2073                                 buf ? n : 0     // size of output buffer
2074                              );
2075         if ( !len )
2076         {
2077             // function totally failed
2078             return wxCONV_FAILED;
2079         }
2080
2081         // if we were really converting and didn't use MB_ERR_INVALID_CHARS,
2082         // check if we succeeded, by doing a double trip:
2083         if ( !flags && buf )
2084         {
2085             const size_t mbLen = strlen(psz);
2086             wxCharBuffer mbBuf(mbLen);
2087             if ( ::WideCharToMultiByte
2088                    (
2089                       m_CodePage,
2090                       0,
2091                       buf,
2092                       -1,
2093                       mbBuf.data(),
2094                       mbLen + 1,        // size in bytes, not length
2095                       NULL,
2096                       NULL
2097                    ) == 0 ||
2098                   strcmp(mbBuf, psz) != 0 )
2099             {
2100                 // we didn't obtain the same thing we started from, hence
2101                 // the conversion was lossy and we consider that it failed
2102                 return wxCONV_FAILED;
2103             }
2104         }
2105
2106         // note that it returns count of written chars for buf != NULL and size
2107         // of the needed buffer for buf == NULL so in either case the length of
2108         // the string (which never includes the terminating NUL) is one less
2109         return len - 1;
2110     }
2111
2112     virtual size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
2113     {
2114         /*
2115             we have a problem here: by default, WideCharToMultiByte() may
2116             replace characters unrepresentable in the target code page with bad
2117             quality approximations such as turning "1/2" symbol (U+00BD) into
2118             "1" for the code pages which don't have it and we, obviously, want
2119             to avoid this at any price
2120
2121             the trouble is that this function does it _silently_, i.e. it won't
2122             even tell us whether it did or not... Win98/2000 and higher provide
2123             WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
2124             we have to resort to a round trip, i.e. check that converting back
2125             results in the same string -- this is, of course, expensive but
2126             otherwise we simply can't be sure to not garble the data.
2127          */
2128
2129         // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
2130         // it doesn't work with CJK encodings (which we test for rather roughly
2131         // here...) nor with UTF-7/8 nor, of course, with Windows versions not
2132         // supporting it
2133         BOOL usedDef wxDUMMY_INITIALIZE(false);
2134         BOOL *pUsedDef;
2135         int flags;
2136         if ( CanUseNoBestFit() && m_CodePage < 50000 )
2137         {
2138             // it's our lucky day
2139             flags = WC_NO_BEST_FIT_CHARS;
2140             pUsedDef = &usedDef;
2141         }
2142         else // old system or unsupported encoding
2143         {
2144             flags = 0;
2145             pUsedDef = NULL;
2146         }
2147
2148         const size_t len = ::WideCharToMultiByte
2149                              (
2150                                 m_CodePage,     // code page
2151                                 flags,          // either none or no best fit
2152                                 pwz,            // input string
2153                                 -1,             // it is (wide) NUL-terminated
2154                                 buf,            // output buffer
2155                                 buf ? n : 0,    // and its size
2156                                 NULL,           // default "replacement" char
2157                                 pUsedDef        // [out] was it used?
2158                              );
2159
2160         if ( !len )
2161         {
2162             // function totally failed
2163             return wxCONV_FAILED;
2164         }
2165
2166         // if we were really converting, check if we succeeded
2167         if ( buf )
2168         {
2169             if ( flags )
2170             {
2171                 // check if the conversion failed, i.e. if any replacements
2172                 // were done
2173                 if ( usedDef )
2174                     return wxCONV_FAILED;
2175             }
2176             else // we must resort to double tripping...
2177             {
2178                 wxWCharBuffer wcBuf(n);
2179                 if ( MB2WC(wcBuf.data(), buf, n) == wxCONV_FAILED ||
2180                         wcscmp(wcBuf, pwz) != 0 )
2181                 {
2182                     // we didn't obtain the same thing we started from, hence
2183                     // the conversion was lossy and we consider that it failed
2184                     return wxCONV_FAILED;
2185                 }
2186             }
2187         }
2188
2189         // see the comment above for the reason of "len - 1"
2190         return len - 1;
2191     }
2192
2193     virtual size_t GetMBNulLen() const
2194     {
2195         if ( m_minMBCharWidth == 0 )
2196         {
2197             int len = ::WideCharToMultiByte
2198                         (
2199                             m_CodePage,     // code page
2200                             0,              // no flags
2201                             L"",            // input string
2202                             1,              // translate just the NUL
2203                             NULL,           // output buffer
2204                             0,              // and its size
2205                             NULL,           // no replacement char
2206                             NULL            // [out] don't care if it was used
2207                         );
2208
2209             wxMBConv_win32 * const self = wxConstCast(this, wxMBConv_win32);
2210             switch ( len )
2211             {
2212                 default:
2213                     wxLogDebug(_T("Unexpected NUL length %d"), len);
2214                     self->m_minMBCharWidth = (size_t)-1;
2215                     break;
2216
2217                 case 0:
2218                     self->m_minMBCharWidth = (size_t)-1;
2219                     break;
2220
2221                 case 1:
2222                 case 2:
2223                 case 4:
2224                     self->m_minMBCharWidth = len;
2225                     break;
2226             }
2227         }
2228
2229         return m_minMBCharWidth;
2230     }
2231
2232     virtual wxMBConv *Clone() const { return new wxMBConv_win32(*this); }
2233
2234     bool IsOk() const { return m_CodePage != -1; }
2235
2236 private:
2237     static bool CanUseNoBestFit()
2238     {
2239         static int s_isWin98Or2k = -1;
2240
2241         if ( s_isWin98Or2k == -1 )
2242         {
2243             int verMaj, verMin;
2244             switch ( wxGetOsVersion(&verMaj, &verMin) )
2245             {
2246                 case wxOS_WINDOWS_9X:
2247                     s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
2248                     break;
2249
2250                 case wxOS_WINDOWS_NT:
2251                     s_isWin98Or2k = verMaj >= 5;
2252                     break;
2253
2254                 default:
2255                     // unknown: be conservative by default
2256                     s_isWin98Or2k = 0;
2257                     break;
2258             }
2259
2260             wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
2261         }
2262
2263         return s_isWin98Or2k == 1;
2264     }
2265
2266     static bool IsAtLeastWin2kSP4()
2267     {
2268 #ifdef __WXWINCE__
2269         return false;
2270 #else
2271         static int s_isAtLeastWin2kSP4 = -1;
2272
2273         if ( s_isAtLeastWin2kSP4 == -1 )
2274         {
2275             OSVERSIONINFOEX ver;
2276
2277             memset(&ver, 0, sizeof(ver));
2278             ver.dwOSVersionInfoSize = sizeof(ver);
2279             GetVersionEx((OSVERSIONINFO*)&ver);
2280
2281             s_isAtLeastWin2kSP4 =
2282               ((ver.dwMajorVersion > 5) || // Vista+
2283                (ver.dwMajorVersion == 5 && ver.dwMinorVersion > 0) || // XP/2003
2284                (ver.dwMajorVersion == 5 && ver.dwMinorVersion == 0 &&
2285                ver.wServicePackMajor >= 4)) // 2000 SP4+
2286               ? 1 : 0;
2287         }
2288
2289         return s_isAtLeastWin2kSP4 == 1;
2290 #endif
2291     }
2292
2293
2294     // the code page we're working with
2295     long m_CodePage;
2296
2297     // cached result of GetMBNulLen(), set to 0 initially meaning
2298     // "unknown"
2299     size_t m_minMBCharWidth;
2300 };
2301
2302 #endif // wxHAVE_WIN32_MB2WC
2303
2304 // ============================================================================
2305 // CoreFoundation conversion classes
2306 // ============================================================================
2307
2308 #ifdef __DARWIN__
2309
2310 // RN: There is no UTF-32 support in either Core Foundation or Cocoa.
2311 // Strangely enough, internally Core Foundation uses
2312 // UTF-32 internally quite a bit - its just not public (yet).
2313
2314 CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
2315 {
2316     CFStringEncoding enc = kCFStringEncodingInvalidId ;
2317
2318     switch (encoding)
2319     {
2320         case wxFONTENCODING_DEFAULT :
2321             enc = CFStringGetSystemEncoding();
2322             break ;
2323
2324         case wxFONTENCODING_ISO8859_1 :
2325             enc = kCFStringEncodingISOLatin1 ;
2326             break ;
2327         case wxFONTENCODING_ISO8859_2 :
2328             enc = kCFStringEncodingISOLatin2;
2329             break ;
2330         case wxFONTENCODING_ISO8859_3 :
2331             enc = kCFStringEncodingISOLatin3 ;
2332             break ;
2333         case wxFONTENCODING_ISO8859_4 :
2334             enc = kCFStringEncodingISOLatin4;
2335             break ;
2336         case wxFONTENCODING_ISO8859_5 :
2337             enc = kCFStringEncodingISOLatinCyrillic;
2338             break ;
2339         case wxFONTENCODING_ISO8859_6 :
2340             enc = kCFStringEncodingISOLatinArabic;
2341             break ;
2342         case wxFONTENCODING_ISO8859_7 :
2343             enc = kCFStringEncodingISOLatinGreek;
2344             break ;
2345         case wxFONTENCODING_ISO8859_8 :
2346             enc = kCFStringEncodingISOLatinHebrew;
2347             break ;
2348         case wxFONTENCODING_ISO8859_9 :
2349             enc = kCFStringEncodingISOLatin5;
2350             break ;
2351         case wxFONTENCODING_ISO8859_10 :
2352             enc = kCFStringEncodingISOLatin6;
2353             break ;
2354         case wxFONTENCODING_ISO8859_11 :
2355             enc = kCFStringEncodingISOLatinThai;
2356             break ;
2357         case wxFONTENCODING_ISO8859_13 :
2358             enc = kCFStringEncodingISOLatin7;
2359             break ;
2360         case wxFONTENCODING_ISO8859_14 :
2361             enc = kCFStringEncodingISOLatin8;
2362             break ;
2363         case wxFONTENCODING_ISO8859_15 :
2364             enc = kCFStringEncodingISOLatin9;
2365             break ;
2366
2367         case wxFONTENCODING_KOI8 :
2368             enc = kCFStringEncodingKOI8_R;
2369             break ;
2370         case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
2371             enc = kCFStringEncodingDOSRussian;
2372             break ;
2373
2374 //      case wxFONTENCODING_BULGARIAN :
2375 //          enc = ;
2376 //          break ;
2377
2378         case wxFONTENCODING_CP437 :
2379             enc = kCFStringEncodingDOSLatinUS ;
2380             break ;
2381         case wxFONTENCODING_CP850 :
2382             enc = kCFStringEncodingDOSLatin1;
2383             break ;
2384         case wxFONTENCODING_CP852 :
2385             enc = kCFStringEncodingDOSLatin2;
2386             break ;
2387         case wxFONTENCODING_CP855 :
2388             enc = kCFStringEncodingDOSCyrillic;
2389             break ;
2390         case wxFONTENCODING_CP866 :
2391             enc = kCFStringEncodingDOSRussian ;
2392             break ;
2393         case wxFONTENCODING_CP874 :
2394             enc = kCFStringEncodingDOSThai;
2395             break ;
2396         case wxFONTENCODING_CP932 :
2397             enc = kCFStringEncodingDOSJapanese;
2398             break ;
2399         case wxFONTENCODING_CP936 :
2400             enc = kCFStringEncodingDOSChineseSimplif ;
2401             break ;
2402         case wxFONTENCODING_CP949 :
2403             enc = kCFStringEncodingDOSKorean;
2404             break ;
2405         case wxFONTENCODING_CP950 :
2406             enc = kCFStringEncodingDOSChineseTrad;
2407             break ;
2408         case wxFONTENCODING_CP1250 :
2409             enc = kCFStringEncodingWindowsLatin2;
2410             break ;
2411         case wxFONTENCODING_CP1251 :
2412             enc = kCFStringEncodingWindowsCyrillic ;
2413             break ;
2414         case wxFONTENCODING_CP1252 :
2415             enc = kCFStringEncodingWindowsLatin1 ;
2416             break ;
2417         case wxFONTENCODING_CP1253 :
2418             enc = kCFStringEncodingWindowsGreek;
2419             break ;
2420         case wxFONTENCODING_CP1254 :
2421             enc = kCFStringEncodingWindowsLatin5;
2422             break ;
2423         case wxFONTENCODING_CP1255 :
2424             enc = kCFStringEncodingWindowsHebrew ;
2425             break ;
2426         case wxFONTENCODING_CP1256 :
2427             enc = kCFStringEncodingWindowsArabic ;
2428             break ;
2429         case wxFONTENCODING_CP1257 :
2430             enc = kCFStringEncodingWindowsBalticRim;
2431             break ;
2432 //   This only really encodes to UTF7 (if that) evidently
2433 //        case wxFONTENCODING_UTF7 :
2434 //            enc = kCFStringEncodingNonLossyASCII ;
2435 //            break ;
2436         case wxFONTENCODING_UTF8 :
2437             enc = kCFStringEncodingUTF8 ;
2438             break ;
2439         case wxFONTENCODING_EUC_JP :
2440             enc = kCFStringEncodingEUC_JP;
2441             break ;
2442         case wxFONTENCODING_UTF16 :
2443             enc = kCFStringEncodingUnicode ;
2444             break ;
2445         case wxFONTENCODING_MACROMAN :
2446             enc = kCFStringEncodingMacRoman ;
2447             break ;
2448         case wxFONTENCODING_MACJAPANESE :
2449             enc = kCFStringEncodingMacJapanese ;
2450             break ;
2451         case wxFONTENCODING_MACCHINESETRAD :
2452             enc = kCFStringEncodingMacChineseTrad ;
2453             break ;
2454         case wxFONTENCODING_MACKOREAN :
2455             enc = kCFStringEncodingMacKorean ;
2456             break ;
2457         case wxFONTENCODING_MACARABIC :
2458             enc = kCFStringEncodingMacArabic ;
2459             break ;
2460         case wxFONTENCODING_MACHEBREW :
2461             enc = kCFStringEncodingMacHebrew ;
2462             break ;
2463         case wxFONTENCODING_MACGREEK :
2464             enc = kCFStringEncodingMacGreek ;
2465             break ;
2466         case wxFONTENCODING_MACCYRILLIC :
2467             enc = kCFStringEncodingMacCyrillic ;
2468             break ;
2469         case wxFONTENCODING_MACDEVANAGARI :
2470             enc = kCFStringEncodingMacDevanagari ;
2471             break ;
2472         case wxFONTENCODING_MACGURMUKHI :
2473             enc = kCFStringEncodingMacGurmukhi ;
2474             break ;
2475         case wxFONTENCODING_MACGUJARATI :
2476             enc = kCFStringEncodingMacGujarati ;
2477             break ;
2478         case wxFONTENCODING_MACORIYA :
2479             enc = kCFStringEncodingMacOriya ;
2480             break ;
2481         case wxFONTENCODING_MACBENGALI :
2482             enc = kCFStringEncodingMacBengali ;
2483             break ;
2484         case wxFONTENCODING_MACTAMIL :
2485             enc = kCFStringEncodingMacTamil ;
2486             break ;
2487         case wxFONTENCODING_MACTELUGU :
2488             enc = kCFStringEncodingMacTelugu ;
2489             break ;
2490         case wxFONTENCODING_MACKANNADA :
2491             enc = kCFStringEncodingMacKannada ;
2492             break ;
2493         case wxFONTENCODING_MACMALAJALAM :
2494             enc = kCFStringEncodingMacMalayalam ;
2495             break ;
2496         case wxFONTENCODING_MACSINHALESE :
2497             enc = kCFStringEncodingMacSinhalese ;
2498             break ;
2499         case wxFONTENCODING_MACBURMESE :
2500             enc = kCFStringEncodingMacBurmese ;
2501             break ;
2502         case wxFONTENCODING_MACKHMER :
2503             enc = kCFStringEncodingMacKhmer ;
2504             break ;
2505         case wxFONTENCODING_MACTHAI :
2506             enc = kCFStringEncodingMacThai ;
2507             break ;
2508         case wxFONTENCODING_MACLAOTIAN :
2509             enc = kCFStringEncodingMacLaotian ;
2510             break ;
2511         case wxFONTENCODING_MACGEORGIAN :
2512             enc = kCFStringEncodingMacGeorgian ;
2513             break ;
2514         case wxFONTENCODING_MACARMENIAN :
2515             enc = kCFStringEncodingMacArmenian ;
2516             break ;
2517         case wxFONTENCODING_MACCHINESESIMP :
2518             enc = kCFStringEncodingMacChineseSimp ;
2519             break ;
2520         case wxFONTENCODING_MACTIBETAN :
2521             enc = kCFStringEncodingMacTibetan ;
2522             break ;
2523         case wxFONTENCODING_MACMONGOLIAN :
2524             enc = kCFStringEncodingMacMongolian ;
2525             break ;
2526         case wxFONTENCODING_MACETHIOPIC :
2527             enc = kCFStringEncodingMacEthiopic ;
2528             break ;
2529         case wxFONTENCODING_MACCENTRALEUR :
2530             enc = kCFStringEncodingMacCentralEurRoman ;
2531             break ;
2532         case wxFONTENCODING_MACVIATNAMESE :
2533             enc = kCFStringEncodingMacVietnamese ;
2534             break ;
2535         case wxFONTENCODING_MACARABICEXT :
2536             enc = kCFStringEncodingMacExtArabic ;
2537             break ;
2538         case wxFONTENCODING_MACSYMBOL :
2539             enc = kCFStringEncodingMacSymbol ;
2540             break ;
2541         case wxFONTENCODING_MACDINGBATS :
2542             enc = kCFStringEncodingMacDingbats ;
2543             break ;
2544         case wxFONTENCODING_MACTURKISH :
2545             enc = kCFStringEncodingMacTurkish ;
2546             break ;
2547         case wxFONTENCODING_MACCROATIAN :
2548             enc = kCFStringEncodingMacCroatian ;
2549             break ;
2550         case wxFONTENCODING_MACICELANDIC :
2551             enc = kCFStringEncodingMacIcelandic ;
2552             break ;
2553         case wxFONTENCODING_MACROMANIAN :
2554             enc = kCFStringEncodingMacRomanian ;
2555             break ;
2556         case wxFONTENCODING_MACCELTIC :
2557             enc = kCFStringEncodingMacCeltic ;
2558             break ;
2559         case wxFONTENCODING_MACGAELIC :
2560             enc = kCFStringEncodingMacGaelic ;
2561             break ;
2562 //      case wxFONTENCODING_MACKEYBOARD :
2563 //          enc = kCFStringEncodingMacKeyboardGlyphs ;
2564 //          break ;
2565
2566         default :
2567             // because gcc is picky
2568             break ;
2569     }
2570
2571     return enc ;
2572 }
2573
2574 class wxMBConv_cf : public wxMBConv
2575 {
2576 public:
2577     wxMBConv_cf()
2578     {
2579         Init(CFStringGetSystemEncoding()) ;
2580     }
2581
2582     wxMBConv_cf(const wxMBConv_cf& conv)
2583     {
2584         m_encoding = conv.m_encoding;
2585     }
2586
2587 #if wxUSE_FONTMAP
2588     wxMBConv_cf(const char* name)
2589     {
2590         Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2591     }
2592 #endif
2593
2594     wxMBConv_cf(wxFontEncoding encoding)
2595     {
2596         Init( wxCFStringEncFromFontEnc(encoding) );
2597     }
2598
2599     virtual ~wxMBConv_cf()
2600     {
2601     }
2602
2603     void Init( CFStringEncoding encoding)
2604     {
2605         m_encoding = encoding ;
2606     }
2607
2608     size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2609     {
2610         wxASSERT(szUnConv);
2611
2612         CFStringRef theString = CFStringCreateWithBytes (
2613                                                 NULL, //the allocator
2614                                                 (const UInt8*)szUnConv,
2615                                                 strlen(szUnConv),
2616                                                 m_encoding,
2617                                                 false //no BOM/external representation
2618                                                 );
2619
2620         wxASSERT(theString);
2621
2622         size_t nOutLength = CFStringGetLength(theString);
2623
2624         if (szOut == NULL)
2625         {
2626             CFRelease(theString);
2627             return nOutLength;
2628         }
2629
2630         CFRange theRange = { 0, nOutSize };
2631
2632 #if SIZEOF_WCHAR_T == 4
2633         UniChar* szUniCharBuffer = new UniChar[nOutSize];
2634 #endif
2635
2636         CFStringGetCharacters(theString, theRange, szUniCharBuffer);
2637
2638         CFRelease(theString);
2639
2640         szUniCharBuffer[nOutLength] = '\0';
2641
2642 #if SIZEOF_WCHAR_T == 4
2643         wxMBConvUTF16 converter;
2644         converter.MB2WC( szOut, (const char*)szUniCharBuffer, nOutSize );
2645         delete [] szUniCharBuffer;
2646 #endif
2647
2648         return nOutLength;
2649     }
2650
2651     size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2652     {
2653         wxASSERT(szUnConv);
2654
2655         size_t nRealOutSize;
2656         size_t nBufSize = wxWcslen(szUnConv);
2657         UniChar* szUniBuffer = (UniChar*) szUnConv;
2658
2659 #if SIZEOF_WCHAR_T == 4
2660         wxMBConvUTF16 converter ;
2661         nBufSize = converter.WC2MB( NULL, szUnConv, 0 );
2662         szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1];
2663         converter.WC2MB( (char*) szUniBuffer, szUnConv, nBufSize + sizeof(UniChar));
2664         nBufSize /= sizeof(UniChar);
2665 #endif
2666
2667         CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2668                                 NULL, //allocator
2669                                 szUniBuffer,
2670                                 nBufSize,
2671                                 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
2672                             );
2673
2674         wxASSERT(theString);
2675
2676         //Note that CER puts a BOM when converting to unicode
2677         //so we  check and use getchars instead in that case
2678         if (m_encoding == kCFStringEncodingUnicode)
2679         {
2680             if (szOut != NULL)
2681                 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
2682
2683             nRealOutSize = CFStringGetLength(theString) + 1;
2684         }
2685         else
2686         {
2687             CFStringGetBytes(
2688                 theString,
2689                 CFRangeMake(0, CFStringGetLength(theString)),
2690                 m_encoding,
2691                 0, //what to put in characters that can't be converted -
2692                     //0 tells CFString to return NULL if it meets such a character
2693                 false, //not an external representation
2694                 (UInt8*) szOut,
2695                 nOutSize,
2696                 (CFIndex*) &nRealOutSize
2697                         );
2698         }
2699
2700         CFRelease(theString);
2701
2702 #if SIZEOF_WCHAR_T == 4
2703         delete[] szUniBuffer;
2704 #endif
2705
2706         return  nRealOutSize - 1;
2707     }
2708
2709     virtual wxMBConv *Clone() const { return new wxMBConv_cf(*this); }
2710
2711     bool IsOk() const
2712     {
2713         return m_encoding != kCFStringEncodingInvalidId &&
2714               CFStringIsEncodingAvailable(m_encoding);
2715     }
2716
2717 private:
2718     CFStringEncoding m_encoding ;
2719 };
2720
2721 #endif // __DARWIN__
2722
2723 // ============================================================================
2724 // Mac conversion classes
2725 // ============================================================================
2726
2727 /* Although we are in the base library we currently have this wxMac
2728  * conditional.  This is not generally good but fortunately does not affect
2729  * the ABI of the base library, only what encodings might work.
2730  * It does mean that a wxBase built as part of wxMac has slightly more support
2731  * than one built for wxCocoa or even wxGtk.
2732  */
2733 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2734
2735 class wxMBConv_mac : public wxMBConv
2736 {
2737 public:
2738     wxMBConv_mac()
2739     {
2740         Init(CFStringGetSystemEncoding()) ;
2741     }
2742
2743     wxMBConv_mac(const wxMBConv_mac& conv)
2744     {
2745         Init(conv.m_char_encoding);
2746     }
2747
2748 #if wxUSE_FONTMAP
2749     wxMBConv_mac(const char* name)
2750     {
2751         Init( wxMacGetSystemEncFromFontEnc( wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) );
2752     }
2753 #endif
2754
2755     wxMBConv_mac(wxFontEncoding encoding)
2756     {
2757         Init( wxMacGetSystemEncFromFontEnc(encoding) );
2758     }
2759
2760     virtual ~wxMBConv_mac()
2761     {
2762         OSStatus status = noErr ;
2763         if (m_MB2WC_converter)
2764             status = TECDisposeConverter(m_MB2WC_converter);
2765         if (m_WC2MB_converter)
2766             status = TECDisposeConverter(m_WC2MB_converter);
2767     }
2768
2769     void Init( TextEncodingBase encoding,TextEncodingVariant encodingVariant = kTextEncodingDefaultVariant ,
2770             TextEncodingFormat encodingFormat = kTextEncodingDefaultFormat)
2771     {
2772         m_MB2WC_converter = NULL ;
2773         m_WC2MB_converter = NULL ;
2774         m_char_encoding = CreateTextEncoding(encoding, encodingVariant, encodingFormat) ;
2775         m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault, 0, kUnicode16BitFormat) ;
2776     }
2777
2778     virtual void CreateIfNeeded() const
2779     {
2780         if ( m_MB2WC_converter == NULL && m_WC2MB_converter == NULL )
2781         {
2782             OSStatus status = noErr ;
2783             status = TECCreateConverter(&m_MB2WC_converter,
2784                                     m_char_encoding,
2785                                     m_unicode_encoding);
2786             wxASSERT_MSG( status == noErr , _("Unable to create TextEncodingConverter")) ;
2787             status = TECCreateConverter(&m_WC2MB_converter,
2788                                     m_unicode_encoding,
2789                                     m_char_encoding);
2790             wxASSERT_MSG( status == noErr , _("Unable to create TextEncodingConverter")) ;
2791         }
2792     }
2793
2794     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2795     {
2796         CreateIfNeeded() ;
2797         OSStatus status = noErr ;
2798         ByteCount byteOutLen ;
2799         ByteCount byteInLen = strlen(psz) + 1;
2800         wchar_t *tbuf = NULL ;
2801         UniChar* ubuf = NULL ;
2802         size_t res = 0 ;
2803
2804         if (buf == NULL)
2805         {
2806             // Apple specs say at least 32
2807             n = wxMax( 32, byteInLen ) ;
2808             tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T ) ;
2809         }
2810
2811         ByteCount byteBufferLen = n * sizeof( UniChar ) ;
2812
2813 #if SIZEOF_WCHAR_T == 4
2814         ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
2815 #else
2816         ubuf = (UniChar*) (buf ? buf : tbuf) ;
2817 #endif
2818
2819         status = TECConvertText(
2820             m_MB2WC_converter, (ConstTextPtr) psz, byteInLen, &byteInLen,
2821             (TextPtr) ubuf, byteBufferLen, &byteOutLen);
2822
2823 #if SIZEOF_WCHAR_T == 4
2824         // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2825         // is not properly terminated we get random characters at the end
2826         ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
2827         wxMBConvUTF16 converter ;
2828         res = converter.MB2WC( (buf ? buf : tbuf), (const char*)ubuf, n ) ;
2829         free( ubuf ) ;
2830 #else
2831         res = byteOutLen / sizeof( UniChar ) ;
2832 #endif
2833
2834         if ( buf == NULL )
2835              free(tbuf) ;
2836
2837         if ( buf  && res < n)
2838             buf[res] = 0;
2839
2840         return res ;
2841     }
2842
2843     size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2844     {
2845         CreateIfNeeded() ;
2846         OSStatus status = noErr ;
2847         ByteCount byteOutLen ;
2848         ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2849
2850         char *tbuf = NULL ;
2851
2852         if (buf == NULL)
2853         {
2854             // Apple specs say at least 32
2855             n = wxMax( 32, ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2856             tbuf = (char*) malloc( n ) ;
2857         }
2858
2859         ByteCount byteBufferLen = n ;
2860         UniChar* ubuf = NULL ;
2861
2862 #if SIZEOF_WCHAR_T == 4
2863         wxMBConvUTF16 converter ;
2864         size_t unicharlen = converter.WC2MB( NULL, psz, 0 ) ;
2865         byteInLen = unicharlen ;
2866         ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2867         converter.WC2MB( (char*) ubuf, psz, unicharlen + 2 ) ;
2868 #else
2869         ubuf = (UniChar*) psz ;
2870 #endif
2871
2872         status = TECConvertText(
2873             m_WC2MB_converter, (ConstTextPtr) ubuf, byteInLen, &byteInLen,
2874             (TextPtr) (buf ? buf : tbuf), byteBufferLen, &byteOutLen);
2875
2876 #if SIZEOF_WCHAR_T == 4
2877         free( ubuf ) ;
2878 #endif
2879
2880         if ( buf == NULL )
2881             free(tbuf) ;
2882
2883         size_t res = byteOutLen ;
2884         if ( buf  && res < n)
2885         {
2886             buf[res] = 0;
2887
2888             //we need to double-trip to verify it didn't insert any ? in place
2889             //of bogus characters
2890             wxWCharBuffer wcBuf(n);
2891             size_t pszlen = wxWcslen(psz);
2892             if ( MB2WC(wcBuf.data(), buf, n) == wxCONV_FAILED ||
2893                         wxWcslen(wcBuf) != pszlen ||
2894                         memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2895             {
2896                 // we didn't obtain the same thing we started from, hence
2897                 // the conversion was lossy and we consider that it failed
2898                 return wxCONV_FAILED;
2899             }
2900         }
2901
2902         return res ;
2903     }
2904
2905     virtual wxMBConv *Clone() const { return new wxMBConv_mac(*this); }
2906
2907     bool IsOk() const
2908     {
2909         CreateIfNeeded() ;
2910         return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL;
2911     }
2912
2913 protected :
2914     mutable TECObjectRef m_MB2WC_converter;
2915     mutable TECObjectRef m_WC2MB_converter;
2916
2917     TextEncodingBase m_char_encoding;
2918     TextEncodingBase m_unicode_encoding;
2919 };
2920
2921 // MB is decomposed (D) normalized UTF8
2922
2923 class wxMBConv_macUTF8D : public wxMBConv_mac
2924 {
2925 public :
2926     wxMBConv_macUTF8D()
2927     {
2928         Init( kTextEncodingUnicodeDefault , kUnicodeNoSubset , kUnicodeUTF8Format ) ;
2929         m_uni = NULL;
2930         m_uniBack = NULL ;
2931     }
2932
2933     virtual ~wxMBConv_macUTF8D()
2934     {
2935         if (m_uni!=NULL)
2936             DisposeUnicodeToTextInfo(&m_uni);
2937         if (m_uniBack!=NULL)
2938             DisposeUnicodeToTextInfo(&m_uniBack);
2939     }
2940
2941     size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2942     {
2943         CreateIfNeeded() ;
2944         OSStatus status = noErr ;
2945         ByteCount byteOutLen ;
2946         ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2947
2948         char *tbuf = NULL ;
2949
2950         if (buf == NULL)
2951         {
2952             // Apple specs say at least 32
2953             n = wxMax( 32, ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2954             tbuf = (char*) malloc( n ) ;
2955         }
2956
2957         ByteCount byteBufferLen = n ;
2958         UniChar* ubuf = NULL ;
2959
2960 #if SIZEOF_WCHAR_T == 4
2961         wxMBConvUTF16 converter ;
2962         size_t unicharlen = converter.WC2MB( NULL, psz, 0 ) ;
2963         byteInLen = unicharlen ;
2964         ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2965         converter.WC2MB( (char*) ubuf, psz, unicharlen + 2 ) ;
2966 #else
2967         ubuf = (UniChar*) psz ;
2968 #endif
2969
2970         // ubuf is a non-decomposed UniChar buffer
2971
2972         ByteCount dcubuflen = byteInLen * 2 + 2 ;
2973         ByteCount dcubufread , dcubufwritten ;
2974         UniChar *dcubuf = (UniChar*) malloc( dcubuflen ) ;
2975
2976         ConvertFromUnicodeToText( m_uni , byteInLen , ubuf ,
2977             kUnicodeDefaultDirectionMask, 0, NULL, NULL, NULL, dcubuflen  , &dcubufread , &dcubufwritten , dcubuf ) ;
2978
2979         // we now convert that decomposed buffer into UTF8
2980
2981         status = TECConvertText(
2982             m_WC2MB_converter, (ConstTextPtr) dcubuf, dcubufwritten, &dcubufread,
2983             (TextPtr) (buf ? buf : tbuf), byteBufferLen, &byteOutLen);
2984
2985         free( dcubuf );
2986
2987 #if SIZEOF_WCHAR_T == 4
2988         free( ubuf ) ;
2989 #endif
2990
2991         if ( buf == NULL )
2992             free(tbuf) ;
2993
2994         size_t res = byteOutLen ;
2995         if ( buf  && res < n)
2996         {
2997             buf[res] = 0;
2998             // don't test for round-trip fidelity yet, we cannot guarantee it yet
2999         }
3000
3001         return res ;
3002     }
3003
3004     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
3005     {
3006         CreateIfNeeded() ;
3007         OSStatus status = noErr ;
3008         ByteCount byteOutLen ;
3009         ByteCount byteInLen = strlen(psz) + 1;
3010         wchar_t *tbuf = NULL ;
3011         UniChar* ubuf = NULL ;
3012         size_t res = 0 ;
3013
3014         if (buf == NULL)
3015         {
3016             // Apple specs say at least 32
3017             n = wxMax( 32, byteInLen ) ;
3018             tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T ) ;
3019         }
3020
3021         ByteCount byteBufferLen = n * sizeof( UniChar ) ;
3022
3023 #if SIZEOF_WCHAR_T == 4
3024         ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
3025 #else
3026         ubuf = (UniChar*) (buf ? buf : tbuf) ;
3027 #endif
3028
3029         ByteCount dcubuflen = byteBufferLen * 2 + 2 ;
3030         ByteCount dcubufread , dcubufwritten ;
3031         UniChar *dcubuf = (UniChar*) malloc( dcubuflen ) ;
3032
3033         status = TECConvertText(
3034                                 m_MB2WC_converter, (ConstTextPtr) psz, byteInLen, &byteInLen,
3035                                 (TextPtr) dcubuf, dcubuflen, &byteOutLen);
3036         // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
3037         // is not properly terminated we get random characters at the end
3038         dcubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
3039
3040         // now from the decomposed UniChar to properly composed uniChar
3041         ConvertFromUnicodeToText( m_uniBack , byteOutLen , dcubuf ,
3042                                   kUnicodeDefaultDirectionMask, 0, NULL, NULL, NULL, dcubuflen  , &dcubufread , &dcubufwritten , ubuf ) ;
3043
3044         free( dcubuf );
3045         byteOutLen = dcubufwritten ;
3046         ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
3047
3048
3049 #if SIZEOF_WCHAR_T == 4
3050         wxMBConvUTF16 converter ;
3051         res = converter.MB2WC( (buf ? buf : tbuf), (const char*)ubuf, n ) ;
3052         free( ubuf ) ;
3053 #else
3054         res = byteOutLen / sizeof( UniChar ) ;
3055 #endif
3056
3057         if ( buf == NULL )
3058             free(tbuf) ;
3059
3060         if ( buf  && res < n)
3061             buf[res] = 0;
3062
3063         return res ;
3064     }
3065
3066     virtual void CreateIfNeeded() const
3067     {
3068         wxMBConv_mac::CreateIfNeeded() ;
3069         if ( m_uni == NULL )
3070         {
3071             m_map.unicodeEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
3072                 kUnicodeNoSubset, kTextEncodingDefaultFormat);
3073             m_map.otherEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
3074                 kUnicodeCanonicalDecompVariant, kTextEncodingDefaultFormat);
3075             m_map.mappingVersion = kUnicodeUseLatestMapping;
3076
3077             OSStatus err = CreateUnicodeToTextInfo(&m_map, &m_uni);
3078             wxASSERT_MSG( err == noErr , _(" Couldn't create the UnicodeConverter")) ;
3079
3080             m_map.unicodeEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
3081                                                        kUnicodeNoSubset, kTextEncodingDefaultFormat);
3082             m_map.otherEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
3083                                                      kUnicodeCanonicalCompVariant, kTextEncodingDefaultFormat);
3084             m_map.mappingVersion = kUnicodeUseLatestMapping;
3085             err = CreateUnicodeToTextInfo(&m_map, &m_uniBack);
3086             wxASSERT_MSG( err == noErr , _(" Couldn't create the UnicodeConverter")) ;
3087         }
3088     }
3089 protected :
3090     mutable UnicodeToTextInfo   m_uni;
3091     mutable UnicodeToTextInfo   m_uniBack;
3092     mutable UnicodeMapping      m_map;
3093 };
3094 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
3095
3096 // ============================================================================
3097 // wxEncodingConverter based conversion classes
3098 // ============================================================================
3099
3100 #if wxUSE_FONTMAP
3101
3102 class wxMBConv_wxwin : public wxMBConv
3103 {
3104 private:
3105     void Init()
3106     {
3107         m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
3108                w2m.Init(wxFONTENCODING_UNICODE, m_enc);
3109     }
3110
3111 public:
3112     // temporarily just use wxEncodingConverter stuff,
3113     // so that it works while a better implementation is built
3114     wxMBConv_wxwin(const char* name)
3115     {
3116         if (name)
3117             m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
3118         else
3119             m_enc = wxFONTENCODING_SYSTEM;
3120
3121         Init();
3122     }
3123
3124     wxMBConv_wxwin(wxFontEncoding enc)
3125     {
3126         m_enc = enc;
3127
3128         Init();
3129     }
3130
3131     size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
3132     {
3133         size_t inbuf = strlen(psz);
3134         if (buf)
3135         {
3136             if (!m2w.Convert(psz, buf))
3137                 return wxCONV_FAILED;
3138         }
3139         return inbuf;
3140     }
3141
3142     size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
3143     {
3144         const size_t inbuf = wxWcslen(psz);
3145         if (buf)
3146         {
3147             if (!w2m.Convert(psz, buf))
3148                 return wxCONV_FAILED;
3149         }
3150
3151         return inbuf;
3152     }
3153
3154     virtual size_t GetMBNulLen() const
3155     {
3156         switch ( m_enc )
3157         {
3158             case wxFONTENCODING_UTF16BE:
3159             case wxFONTENCODING_UTF16LE:
3160                 return 2;
3161
3162             case wxFONTENCODING_UTF32BE:
3163             case wxFONTENCODING_UTF32LE:
3164                 return 4;
3165
3166             default:
3167                 return 1;
3168         }
3169     }
3170
3171     virtual wxMBConv *Clone() const { return new wxMBConv_wxwin(m_enc); }
3172
3173     bool IsOk() const { return m_ok; }
3174
3175 public:
3176     wxFontEncoding m_enc;
3177     wxEncodingConverter m2w, w2m;
3178
3179 private:
3180     // were we initialized successfully?
3181     bool m_ok;
3182
3183     DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
3184 };
3185
3186 // make the constructors available for unit testing
3187 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_wxwin( const char* name )
3188 {
3189     wxMBConv_wxwin* result = new wxMBConv_wxwin( name );
3190     if ( !result->IsOk() )
3191     {
3192         delete result;
3193         return 0;
3194     }
3195
3196     return result;
3197 }
3198
3199 #endif // wxUSE_FONTMAP
3200
3201 // ============================================================================
3202 // wxCSConv implementation
3203 // ============================================================================
3204
3205 void wxCSConv::Init()
3206 {
3207     m_name = NULL;
3208     m_convReal =  NULL;
3209     m_deferred = true;
3210 }
3211
3212 wxCSConv::wxCSConv(const wxString& charset)
3213 {
3214     Init();
3215
3216     if ( !charset.empty() )
3217     {
3218         SetName(charset.ToAscii());
3219     }
3220
3221 #if wxUSE_FONTMAP
3222     m_encoding = wxFontMapperBase::GetEncodingFromName(charset);
3223 #else
3224     m_encoding = wxFONTENCODING_SYSTEM;
3225 #endif
3226 }
3227
3228 wxCSConv::wxCSConv(wxFontEncoding encoding)
3229 {
3230     if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
3231     {
3232         wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
3233
3234         encoding = wxFONTENCODING_SYSTEM;
3235     }
3236
3237     Init();
3238
3239     m_encoding = encoding;
3240 }
3241
3242 wxCSConv::~wxCSConv()
3243 {
3244     Clear();
3245 }
3246
3247 wxCSConv::wxCSConv(const wxCSConv& conv)
3248         : wxMBConv()
3249 {
3250     Init();
3251
3252     SetName(conv.m_name);
3253     m_encoding = conv.m_encoding;
3254 }
3255
3256 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
3257 {
3258     Clear();
3259
3260     SetName(conv.m_name);
3261     m_encoding = conv.m_encoding;
3262
3263     return *this;
3264 }
3265
3266 void wxCSConv::Clear()
3267 {
3268     free(m_name);
3269     delete m_convReal;
3270
3271     m_name = NULL;
3272     m_convReal = NULL;
3273 }
3274
3275 void wxCSConv::SetName(const char *charset)
3276 {
3277     if (charset)
3278     {
3279         m_name = strdup(charset);
3280         m_deferred = true;
3281     }
3282 }
3283
3284 #if wxUSE_FONTMAP
3285
3286 WX_DECLARE_HASH_MAP( wxFontEncoding, wxString, wxIntegerHash, wxIntegerEqual,
3287                      wxEncodingNameCache );
3288
3289 static wxEncodingNameCache gs_nameCache;
3290 #endif
3291
3292 wxMBConv *wxCSConv::DoCreate() const
3293 {
3294 #if wxUSE_FONTMAP
3295     wxLogTrace(TRACE_STRCONV,
3296                wxT("creating conversion for %s"),
3297                (m_name ? m_name
3298                        : (const char*)wxFontMapperBase::GetEncodingName(m_encoding).mb_str()));
3299 #endif // wxUSE_FONTMAP
3300
3301     // check for the special case of ASCII or ISO8859-1 charset: as we have
3302     // special knowledge of it anyhow, we don't need to create a special
3303     // conversion object
3304     if ( m_encoding == wxFONTENCODING_ISO8859_1 ||
3305             m_encoding == wxFONTENCODING_DEFAULT )
3306     {
3307         // don't convert at all
3308         return NULL;
3309     }
3310
3311     // we trust OS to do conversion better than we can so try external
3312     // conversion methods first
3313     //
3314     // the full order is:
3315     //      1. OS conversion (iconv() under Unix or Win32 API)
3316     //      2. hard coded conversions for UTF
3317     //      3. wxEncodingConverter as fall back
3318
3319     // step (1)
3320 #ifdef HAVE_ICONV
3321 #if !wxUSE_FONTMAP
3322     if ( m_name )
3323 #endif // !wxUSE_FONTMAP
3324     {
3325 #if wxUSE_FONTMAP
3326         wxFontEncoding encoding(m_encoding);
3327 #endif
3328
3329         if ( m_name )
3330         {
3331             wxMBConv_iconv *conv = new wxMBConv_iconv(m_name);
3332             if ( conv->IsOk() )
3333                 return conv;
3334
3335             delete conv;
3336
3337 #if wxUSE_FONTMAP
3338             encoding =
3339                 wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
3340 #endif // wxUSE_FONTMAP
3341         }
3342 #if wxUSE_FONTMAP
3343         {
3344             const wxEncodingNameCache::iterator it = gs_nameCache.find(encoding);
3345             if ( it != gs_nameCache.end() )
3346             {
3347                 if ( it->second.empty() )
3348                     return NULL;
3349
3350                 wxMBConv_iconv *conv = new wxMBConv_iconv(it->second.ToAscii());
3351                 if ( conv->IsOk() )
3352                     return conv;
3353
3354                 delete conv;
3355             }
3356
3357             const wxChar** names = wxFontMapperBase::GetAllEncodingNames(encoding);
3358             // CS : in case this does not return valid names (eg for MacRoman)
3359             // encoding got a 'failure' entry in the cache all the same,
3360             // although it just has to be created using a different method, so
3361             // only store failed iconv creation attempts (or perhaps we
3362             // shoulnd't do this at all ?)
3363             if ( names[0] != NULL )
3364             {
3365                 for ( ; *names; ++names )
3366                 {
3367                     // FIXME-UTF8: wxFontMapperBase::GetAllEncodingNames()
3368                     //             will need changes that will obsolete this
3369                     wxString name(*names);
3370                     wxMBConv_iconv *conv = new wxMBConv_iconv(name.ToAscii());
3371                     if ( conv->IsOk() )
3372                     {
3373                         gs_nameCache[encoding] = *names;
3374                         return conv;
3375                     }
3376
3377                     delete conv;
3378                 }
3379
3380                 gs_nameCache[encoding] = _T(""); // cache the failure
3381             }
3382         }
3383 #endif // wxUSE_FONTMAP
3384     }
3385 #endif // HAVE_ICONV
3386
3387 #ifdef wxHAVE_WIN32_MB2WC
3388     {
3389 #if wxUSE_FONTMAP
3390         wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
3391                                       : new wxMBConv_win32(m_encoding);
3392         if ( conv->IsOk() )
3393             return conv;
3394
3395         delete conv;
3396 #else
3397         return NULL;
3398 #endif
3399     }
3400 #endif // wxHAVE_WIN32_MB2WC
3401
3402 #if defined(__WXMAC__)
3403     {
3404         // leave UTF16 and UTF32 to the built-ins of wx
3405         if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
3406             ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
3407         {
3408 #if wxUSE_FONTMAP
3409             wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
3410                                         : new wxMBConv_mac(m_encoding);
3411 #else
3412             wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
3413 #endif
3414             if ( conv->IsOk() )
3415                  return conv;
3416
3417             delete conv;
3418         }
3419     }
3420 #endif
3421
3422 #ifdef __DARWIN__
3423     {
3424         if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
3425         {
3426 #if wxUSE_FONTMAP
3427             wxMBConv_cf *conv = m_name ? new wxMBConv_cf(m_name)
3428                                           : new wxMBConv_cf(m_encoding);
3429 #else
3430             wxMBConv_cf *conv = new wxMBConv_cf(m_encoding);
3431 #endif
3432
3433             if ( conv->IsOk() )
3434                  return conv;
3435
3436             delete conv;
3437         }
3438     }
3439 #endif // __DARWIN__
3440
3441     // step (2)
3442     wxFontEncoding enc = m_encoding;
3443 #if wxUSE_FONTMAP
3444     if ( enc == wxFONTENCODING_SYSTEM && m_name )
3445     {
3446         // use "false" to suppress interactive dialogs -- we can be called from
3447         // anywhere and popping up a dialog from here is the last thing we want to
3448         // do
3449         enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
3450     }
3451 #endif // wxUSE_FONTMAP
3452
3453     switch ( enc )
3454     {
3455         case wxFONTENCODING_UTF7:
3456              return new wxMBConvUTF7;
3457
3458         case wxFONTENCODING_UTF8:
3459              return new wxMBConvUTF8;
3460
3461         case wxFONTENCODING_UTF16BE:
3462              return new wxMBConvUTF16BE;
3463
3464         case wxFONTENCODING_UTF16LE:
3465              return new wxMBConvUTF16LE;
3466
3467         case wxFONTENCODING_UTF32BE:
3468              return new wxMBConvUTF32BE;
3469
3470         case wxFONTENCODING_UTF32LE:
3471              return new wxMBConvUTF32LE;
3472
3473         default:
3474              // nothing to do but put here to suppress gcc warnings
3475              break;
3476     }
3477
3478     // step (3)
3479 #if wxUSE_FONTMAP
3480     {
3481         wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
3482                                       : new wxMBConv_wxwin(m_encoding);
3483         if ( conv->IsOk() )
3484             return conv;
3485
3486         delete conv;
3487     }
3488 #endif // wxUSE_FONTMAP
3489
3490     // NB: This is a hack to prevent deadlock. What could otherwise happen
3491     //     in Unicode build: wxConvLocal creation ends up being here
3492     //     because of some failure and logs the error. But wxLog will try to
3493     //     attach a timestamp, for which it will need wxConvLocal (to convert
3494     //     time to char* and then wchar_t*), but that fails, tries to log the
3495     //     error, but wxLog has an (already locked) critical section that
3496     //     guards the static buffer.
3497     static bool alreadyLoggingError = false;
3498     if (!alreadyLoggingError)
3499     {
3500         alreadyLoggingError = true;
3501         wxLogError(_("Cannot convert from the charset '%s'!"),
3502                    m_name ? m_name
3503                       :
3504 #if wxUSE_FONTMAP
3505                          (const char*)wxFontMapperBase::GetEncodingDescription(m_encoding).ToAscii()
3506 #else // !wxUSE_FONTMAP
3507                          (const char*)wxString::Format(_("encoding %i"), m_encoding).ToAscii()
3508 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
3509               );
3510
3511         alreadyLoggingError = false;
3512     }
3513
3514     return NULL;
3515 }
3516
3517 void wxCSConv::CreateConvIfNeeded() const
3518 {
3519     if ( m_deferred )
3520     {
3521         wxCSConv *self = (wxCSConv *)this; // const_cast
3522
3523         // if we don't have neither the name nor the encoding, use the default
3524         // encoding for this system
3525         if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
3526         {
3527 #if wxUSE_INTL
3528             self->m_encoding = wxLocale::GetSystemEncoding();
3529 #else
3530             // fallback to some reasonable default:
3531             self->m_encoding = wxFONTENCODING_ISO8859_1;
3532 #endif // wxUSE_INTL
3533         }
3534
3535         self->m_convReal = DoCreate();
3536         self->m_deferred = false;
3537     }
3538 }
3539
3540 bool wxCSConv::IsOk() const
3541 {
3542     CreateConvIfNeeded();
3543
3544     // special case: no convReal created for wxFONTENCODING_ISO8859_1
3545     if ( m_encoding == wxFONTENCODING_ISO8859_1 )
3546         return true; // always ok as we do it ourselves
3547
3548     // m_convReal->IsOk() is called at its own creation, so we know it must
3549     // be ok if m_convReal is non-NULL
3550     return m_convReal != NULL;
3551 }
3552
3553 size_t wxCSConv::ToWChar(wchar_t *dst, size_t dstLen,
3554                          const char *src, size_t srcLen) const
3555 {
3556     CreateConvIfNeeded();
3557
3558     if (m_convReal)
3559         return m_convReal->ToWChar(dst, dstLen, src, srcLen);
3560
3561     // latin-1 (direct)
3562     return wxMBConv::ToWChar(dst, dstLen, src, srcLen);
3563 }
3564
3565 size_t wxCSConv::FromWChar(char *dst, size_t dstLen,
3566                            const wchar_t *src, size_t srcLen) const
3567 {
3568     CreateConvIfNeeded();
3569
3570     if (m_convReal)
3571         return m_convReal->FromWChar(dst, dstLen, src, srcLen);
3572
3573     // latin-1 (direct)
3574     return wxMBConv::FromWChar(dst, dstLen, src, srcLen);
3575 }
3576
3577 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
3578 {
3579     CreateConvIfNeeded();
3580
3581     if (m_convReal)
3582         return m_convReal->MB2WC(buf, psz, n);
3583
3584     // latin-1 (direct)
3585     size_t len = strlen(psz);
3586
3587     if (buf)
3588     {
3589         for (size_t c = 0; c <= len; c++)
3590             buf[c] = (unsigned char)(psz[c]);
3591     }
3592
3593     return len;
3594 }
3595
3596 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
3597 {
3598     CreateConvIfNeeded();
3599
3600     if (m_convReal)
3601         return m_convReal->WC2MB(buf, psz, n);
3602
3603     // latin-1 (direct)
3604     const size_t len = wxWcslen(psz);
3605     if (buf)
3606     {
3607         for (size_t c = 0; c <= len; c++)
3608         {
3609             if (psz[c] > 0xFF)
3610                 return wxCONV_FAILED;
3611
3612             buf[c] = (char)psz[c];
3613         }
3614     }
3615     else
3616     {
3617         for (size_t c = 0; c <= len; c++)
3618         {
3619             if (psz[c] > 0xFF)
3620                 return wxCONV_FAILED;
3621         }
3622     }
3623
3624     return len;
3625 }
3626
3627 size_t wxCSConv::GetMBNulLen() const
3628 {
3629     CreateConvIfNeeded();
3630
3631     if ( m_convReal )
3632     {
3633         return m_convReal->GetMBNulLen();
3634     }
3635
3636     // otherwise, we are ISO-8859-1
3637     return 1;
3638 }
3639
3640 #if wxUSE_UNICODE_UTF8
3641 bool wxCSConv::IsUTF8() const
3642 {
3643     CreateConvIfNeeded();
3644
3645     if ( m_convReal )
3646     {
3647         return m_convReal->IsUTF8();
3648     }
3649
3650     // otherwise, we are ISO-8859-1
3651     return false;
3652 }
3653 #endif
3654
3655
3656 #if wxUSE_UNICODE
3657
3658 wxWCharBuffer wxSafeConvertMB2WX(const char *s)
3659 {
3660     if ( !s )
3661         return wxWCharBuffer();
3662
3663     wxWCharBuffer wbuf(wxConvLibc.cMB2WX(s));
3664     if ( !wbuf )
3665         wbuf = wxMBConvUTF8().cMB2WX(s);
3666     if ( !wbuf )
3667         wbuf = wxConvISO8859_1.cMB2WX(s);
3668
3669     return wbuf;
3670 }
3671
3672 wxCharBuffer wxSafeConvertWX2MB(const wchar_t *ws)
3673 {
3674     if ( !ws )
3675         return wxCharBuffer();
3676
3677     wxCharBuffer buf(wxConvLibc.cWX2MB(ws));
3678     if ( !buf )
3679         buf = wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL).cWX2MB(ws);
3680
3681     return buf;
3682 }
3683
3684 #endif // wxUSE_UNICODE
3685
3686 // ----------------------------------------------------------------------------
3687 // globals
3688 // ----------------------------------------------------------------------------
3689
3690 // NB: The reason why we create converted objects in this convoluted way,
3691 //     using a factory function instead of global variable, is that they
3692 //     may be used at static initialization time (some of them are used by
3693 //     wxString ctors and there may be a global wxString object). In other
3694 //     words, possibly _before_ the converter global object would be
3695 //     initialized.
3696
3697 #undef wxConvLibc
3698 #undef wxConvUTF8
3699 #undef wxConvUTF7
3700 #undef wxConvLocal
3701 #undef wxConvISO8859_1
3702
3703 #define WX_DEFINE_GLOBAL_CONV2(klass, impl_klass, name, ctor_args)      \
3704     WXDLLIMPEXP_DATA_BASE(klass*) name##Ptr = NULL;                     \
3705     WXDLLIMPEXP_BASE klass* wxGet_##name##Ptr()                         \
3706     {                                                                   \
3707         static impl_klass name##Obj ctor_args;                          \
3708         return &name##Obj;                                              \
3709     }                                                                   \
3710     /* this ensures that all global converter objects are created */    \
3711     /* by the time static initialization is done, i.e. before any */    \
3712     /* thread is launched: */                                           \
3713     static klass* gs_##name##instance = wxGet_##name##Ptr()
3714
3715 #define WX_DEFINE_GLOBAL_CONV(klass, name, ctor_args) \
3716     WX_DEFINE_GLOBAL_CONV2(klass, klass, name, ctor_args)
3717
3718 #ifdef __WINDOWS__
3719     WX_DEFINE_GLOBAL_CONV2(wxMBConv, wxMBConv_win32, wxConvLibc, wxEMPTY_PARAMETER_VALUE);
3720 #elif defined(__WXMAC__) && !defined(__MACH__)
3721     WX_DEFINE_GLOBAL_CONV2(wxMBConv, wxMBConv_mac, wxConvLibc, wxEMPTY_PARAMETER_VALUE);
3722 #else
3723     WX_DEFINE_GLOBAL_CONV2(wxMBConv, wxMBConvLibc, wxConvLibc, wxEMPTY_PARAMETER_VALUE);
3724 #endif
3725
3726 WX_DEFINE_GLOBAL_CONV(wxMBConvUTF8, wxConvUTF8, wxEMPTY_PARAMETER_VALUE);
3727 WX_DEFINE_GLOBAL_CONV(wxMBConvUTF7, wxConvUTF7, wxEMPTY_PARAMETER_VALUE);
3728
3729 WX_DEFINE_GLOBAL_CONV(wxCSConv, wxConvLocal, (wxFONTENCODING_SYSTEM));
3730 WX_DEFINE_GLOBAL_CONV(wxCSConv, wxConvISO8859_1, (wxFONTENCODING_ISO8859_1));
3731
3732 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = wxGet_wxConvLibcPtr();
3733 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvUI = wxGet_wxConvLocalPtr();
3734
3735 #if defined(__WXMAC__) && defined(TARGET_CARBON)
3736 static wxMBConv_macUTF8D wxConvMacUTF8DObj;
3737 #endif
3738 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName =
3739 #ifdef __WXOSX__
3740 #if defined(__WXMAC__) && defined(TARGET_CARBON)
3741                                     &wxConvMacUTF8DObj;
3742 #else
3743                                     wxGet_wxConvUTF8Ptr();
3744 #endif
3745 #else // !__WXOSX__
3746                                     wxGet_wxConvLibcPtr();
3747 #endif // __WXOSX__/!__WXOSX__
3748
3749 #else // !wxUSE_WCHAR_T
3750
3751 // FIXME-UTF8: remove this, wxUSE_WCHAR_T is required now
3752 // stand-ins in absence of wchar_t
3753 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
3754                                 wxConvISO8859_1,
3755                                 wxConvLocal,
3756                                 wxConvUTF8;
3757
3758 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T