src/common/strconv.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/common/strconv.cpp
   3 // Purpose:     Unicode conversion classes
   4 // Author:      Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
   5 //              Ryan Norton, Fredrik Roubert (UTF7)
   6 // Modified by:
   7 // Created:     29/01/98
   8 // RCS-ID:      $Id$
   9 // Copyright:   (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
  10 //              (c) 2000-2003 Vadim Zeitlin
  11 //              (c) 2004 Ryan Norton, Fredrik Roubert
  12 // Licence:     wxWindows licence
  13 /////////////////////////////////////////////////////////////////////////////
  14
  15 // ============================================================================
  16 // declarations
  17 // ============================================================================
  18
  19 // ----------------------------------------------------------------------------
  20 // headers
  21 // ----------------------------------------------------------------------------
  22
  23 // For compilers that support precompilation, includes "wx.h".
  24 #include "wx/wxprec.h"
  25
  26 #ifdef __BORLANDC__
  27   #pragma hdrstop
  28 #endif
  29
  30 #ifndef WX_PRECOMP
  31     #include "wx/intl.h"
  32     #include "wx/log.h"
  33 #endif // WX_PRECOMP
  34
  35 #include "wx/strconv.h"
  36
  37 #if wxUSE_WCHAR_T
  38
  39 #ifdef __WINDOWS__
  40     #include "wx/msw/private.h"
  41     #include "wx/msw/missing.h"
  42 #endif
  43
  44 #ifndef __WXWINCE__
  45 #include <errno.h>
  46 #endif
  47
  48 #include <ctype.h>
  49 #include <string.h>
  50 #include <stdlib.h>
  51
  52 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
  53     #define wxHAVE_WIN32_MB2WC
  54 #endif // __WIN32__ but !__WXMICROWIN__
  55
  56 #ifdef __SALFORDC__
  57     #include <clib.h>
  58 #endif
  59
  60 #ifdef HAVE_ICONV
  61     #include <iconv.h>
  62     #include "wx/thread.h"
  63 #endif
  64
  65 #include "wx/encconv.h"
  66 #include "wx/fontmap.h"
  67 #include "wx/utils.h"
  68
  69 #ifdef __WXMAC__
  70 #ifndef __DARWIN__
  71 #include <ATSUnicode.h>
  72 #include <TextCommon.h>
  73 #include <TextEncodingConverter.h>
  74 #endif
  75
  76 #include  "wx/mac/private.h"  // includes mac headers
  77 #endif
  78
  79 #define TRACE_STRCONV _T("strconv")
  80
  81 #if SIZEOF_WCHAR_T == 2
  82     #define WC_UTF16
  83 #endif
  84
  85 // ============================================================================
  86 // implementation
  87 // ============================================================================
  88
  89 // ----------------------------------------------------------------------------
  90 // UTF-16 en/decoding to/from UCS-4
  91 // ----------------------------------------------------------------------------
  92
  93
  94 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
  95 {
  96     if (input<=0xffff)
  97     {
  98         if (output)
  99             *output = (wxUint16) input;
 100         return 1;
 101     }
 102     else if (input>=0x110000)
 103     {
 104         return (size_t)-1;
 105     }
 106     else
 107     {
 108         if (output)
 109         {
 110             *output++ = (wxUint16) ((input >> 10)+0xd7c0);
 111             *output = (wxUint16) ((input&0x3ff)+0xdc00);
 112         }
 113         return 2;
 114     }
 115 }
 116
 117 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
 118 {
 119     if ((*input<0xd800) || (*input>0xdfff))
 120     {
 121         output = *input;
 122         return 1;
 123     }
 124     else if ((input[1]<0xdc00) || (input[1]>0xdfff))
 125     {
 126         output = *input;
 127         return (size_t)-1;
 128     }
 129     else
 130     {
 131         output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
 132         return 2;
 133     }
 134 }
 135
 136
 137 // ----------------------------------------------------------------------------
 138 // wxMBConv
 139 // ----------------------------------------------------------------------------
 140
 141 wxMBConv::~wxMBConv()
 142 {
 143     // nothing to do here (necessary for Darwin linking probably)
 144 }
 145
 146 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
 147 {
 148     if ( psz )
 149     {
 150         // calculate the length of the buffer needed first
 151         size_t nLen = MB2WC(NULL, psz, 0);
 152         if ( nLen != (size_t)-1 )
 153         {
 154             // now do the actual conversion
 155             wxWCharBuffer buf(nLen);
 156             nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
 157             if ( nLen != (size_t)-1 )
 158             {
 159                 return buf;
 160             }
 161         }
 162     }
 163
 164     wxWCharBuffer buf((wchar_t *)NULL);
 165
 166     return buf;
 167 }
 168
 169 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
 170 {
 171     if ( pwz )
 172     {
 173         size_t nLen = WC2MB(NULL, pwz, 0);
 174         if ( nLen != (size_t)-1 )
 175         {
 176             wxCharBuffer buf(nLen+3);       // space for a wxUint32 trailing zero
 177             nLen = WC2MB(buf.data(), pwz, nLen + 4);
 178             if ( nLen != (size_t)-1 )
 179             {
 180                 return buf;
 181             }
 182         }
 183     }
 184
 185     wxCharBuffer buf((char *)NULL);
 186
 187     return buf;
 188 }
 189
 190 const wxWCharBuffer wxMBConv::cMB2WC(const char *szString, size_t nStringLen, size_t* pOutSize) const
 191 {
 192     wxASSERT(pOutSize != NULL);
 193
 194     const char* szEnd = szString + nStringLen + 1;
 195     const char* szPos = szString;
 196     const char* szStart = szPos;
 197
 198     size_t nActualLength = 0;
 199     size_t nCurrentSize = nStringLen; //try normal size first (should never resize?)
 200
 201     wxWCharBuffer theBuffer(nCurrentSize);
 202
 203     //Convert the string until the length() is reached, continuing the
 204     //loop every time a null character is reached
 205     while(szPos != szEnd)
 206     {
 207         wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
 208
 209         //Get the length of the current (sub)string
 210         size_t nLen = MB2WC(NULL, szPos, 0);
 211
 212         //Invalid conversion?
 213         if( nLen == (size_t)-1 )
 214         {
 215             *pOutSize = 0;
 216             theBuffer.data()[0u] = wxT('\0');
 217             return theBuffer;
 218         }
 219
 220
 221         //Increase the actual length (+1 for current null character)
 222         nActualLength += nLen + 1;
 223
 224         //if buffer too big, realloc the buffer
 225         if (nActualLength > (nCurrentSize+1))
 226         {
 227             wxWCharBuffer theNewBuffer(nCurrentSize << 1);
 228             memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize * sizeof(wchar_t));
 229             theBuffer = theNewBuffer;
 230             nCurrentSize <<= 1;
 231         }
 232
 233         //Convert the current (sub)string
 234         if ( MB2WC(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
 235         {
 236             *pOutSize = 0;
 237             theBuffer.data()[0u] = wxT('\0');
 238             return theBuffer;
 239         }
 240
 241         //Increment to next (sub)string
 242         //Note that we have to use strlen instead of nLen here
 243         //because XX2XX gives us the size of the output buffer,
 244         //which is not necessarily the length of the string
 245         szPos += strlen(szPos) + 1;
 246     }
 247
 248     //success - return actual length and the buffer
 249     *pOutSize = nActualLength;
 250     return theBuffer;
 251 }
 252
 253 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *szString, size_t nStringLen, size_t* pOutSize) const
 254 {
 255     wxASSERT(pOutSize != NULL);
 256
 257     const wchar_t* szEnd = szString + nStringLen + 1;
 258     const wchar_t* szPos = szString;
 259     const wchar_t* szStart = szPos;
 260
 261     size_t nActualLength = 0;
 262     size_t nCurrentSize = nStringLen << 2; //try * 4 first
 263
 264     wxCharBuffer theBuffer(nCurrentSize);
 265
 266     //Convert the string until the length() is reached, continuing the
 267     //loop every time a null character is reached
 268     while(szPos != szEnd)
 269     {
 270         wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
 271
 272         //Get the length of the current (sub)string
 273         size_t nLen = WC2MB(NULL, szPos, 0);
 274
 275         //Invalid conversion?
 276         if( nLen == (size_t)-1 )
 277         {
 278             *pOutSize = 0;
 279             theBuffer.data()[0u] = wxT('\0');
 280             return theBuffer;
 281         }
 282
 283         //Increase the actual length (+1 for current null character)
 284         nActualLength += nLen + 1;
 285
 286         //if buffer too big, realloc the buffer
 287         if (nActualLength > (nCurrentSize+1))
 288         {
 289             wxCharBuffer theNewBuffer(nCurrentSize << 1);
 290             memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize);
 291             theBuffer = theNewBuffer;
 292             nCurrentSize <<= 1;
 293         }
 294
 295         //Convert the current (sub)string
 296         if(WC2MB(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
 297         {
 298             *pOutSize = 0;
 299             theBuffer.data()[0u] = wxT('\0');
 300             return theBuffer;
 301         }
 302
 303         //Increment to next (sub)string
 304         //Note that we have to use wxWcslen instead of nLen here
 305         //because XX2XX gives us the size of the output buffer,
 306         //which is not necessarily the length of the string
 307         szPos += wxWcslen(szPos) + 1;
 308     }
 309
 310     //success - return actual length and the buffer
 311     *pOutSize = nActualLength;
 312     return theBuffer;
 313 }
 314
 315 // ----------------------------------------------------------------------------
 316 // wxMBConvLibc
 317 // ----------------------------------------------------------------------------
 318
 319 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 320 {
 321     return wxMB2WC(buf, psz, n);
 322 }
 323
 324 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 325 {
 326     return wxWC2MB(buf, psz, n);
 327 }
 328
 329 #ifdef __UNIX__
 330
 331 // ----------------------------------------------------------------------------
 332 // wxConvBrokenFileNames
 333 // ----------------------------------------------------------------------------
 334
 335 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar *charset)
 336 {
 337     if ( !charset || wxStricmp(charset, _T("UTF-8")) == 0
 338                   || wxStricmp(charset, _T("UTF8")) == 0  )
 339         m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
 340     else
 341         m_conv = new wxCSConv(charset);
 342 }
 343
 344 size_t
 345 wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf,
 346                              const char *psz,
 347                              size_t outputSize) const
 348 {
 349     return m_conv->MB2WC( outputBuf, psz, outputSize );
 350 }
 351
 352 size_t
 353 wxConvBrokenFileNames::WC2MB(char *outputBuf,
 354                              const wchar_t *psz,
 355                              size_t outputSize) const
 356 {
 357     return m_conv->WC2MB( outputBuf, psz, outputSize );
 358 }
 359
 360 #endif
 361
 362 // ----------------------------------------------------------------------------
 363 // UTF-7
 364 // ----------------------------------------------------------------------------
 365
 366 // Implementation (C) 2004 Fredrik Roubert
 367
 368 //
 369 // BASE64 decoding table
 370 //
 371 static const unsigned char utf7unb64[] =
 372 {
 373     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 374     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 375     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 376     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 377     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 378     0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
 379     0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
 380     0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 381     0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
 382     0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
 383     0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
 384     0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
 385     0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
 386     0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
 387     0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
 388     0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
 389     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 390     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 391     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 392     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 393     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 394     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 395     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 396     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 397     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 398     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 399     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 400     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 401     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 402     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 403     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 404     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 405 };
 406
 407 size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 408 {
 409     size_t len = 0;
 410
 411     while (*psz && ((!buf) || (len < n)))
 412     {
 413         unsigned char cc = *psz++;
 414         if (cc != '+')
 415         {
 416             // plain ASCII char
 417             if (buf)
 418                 *buf++ = cc;
 419             len++;
 420         }
 421         else if (*psz == '-')
 422         {
 423             // encoded plus sign
 424             if (buf)
 425                 *buf++ = cc;
 426             len++;
 427             psz++;
 428         }
 429         else
 430         {
 431             // BASE64 encoded string
 432             bool lsb;
 433             unsigned char c;
 434             unsigned int d, l;
 435             for (lsb = false, d = 0, l = 0;
 436                 (cc = utf7unb64[(unsigned char)*psz]) != 0xff; psz++)
 437             {
 438                 d <<= 6;
 439                 d += cc;
 440                 for (l += 6; l >= 8; lsb = !lsb)
 441                 {
 442                     c = (unsigned char)((d >> (l -= 8)) % 256);
 443                     if (lsb)
 444                     {
 445                         if (buf)
 446                             *buf++ |= c;
 447                         len ++;
 448                     }
 449                     else
 450                         if (buf)
 451                             *buf = (wchar_t)(c << 8);
 452                 }
 453             }
 454             if (*psz == '-')
 455                 psz++;
 456         }
 457     }
 458     if (buf && (len < n))
 459         *buf = 0;
 460     return len;
 461 }
 462
 463 //
 464 // BASE64 encoding table
 465 //
 466 static const unsigned char utf7enb64[] =
 467 {
 468     'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
 469     'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
 470     'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
 471     'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
 472     'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
 473     'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
 474     'w', 'x', 'y', 'z', '0', '1', '2', '3',
 475     '4', '5', '6', '7', '8', '9', '+', '/'
 476 };
 477
 478 //
 479 // UTF-7 encoding table
 480 //
 481 // 0 - Set D (directly encoded characters)
 482 // 1 - Set O (optional direct characters)
 483 // 2 - whitespace characters (optional)
 484 // 3 - special characters
 485 //
 486 static const unsigned char utf7encode[128] =
 487 {
 488     3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
 489     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 490     2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
 491     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
 492     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 493     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
 494     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 495     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
 496 };
 497
 498 size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 499 {
 500
 501
 502     size_t len = 0;
 503
 504     while (*psz && ((!buf) || (len < n)))
 505     {
 506         wchar_t cc = *psz++;
 507         if (cc < 0x80 && utf7encode[cc] < 1)
 508         {
 509             // plain ASCII char
 510             if (buf)
 511                 *buf++ = (char)cc;
 512             len++;
 513         }
 514 #ifndef WC_UTF16
 515         else if (((wxUint32)cc) > 0xffff)
 516         {
 517             // no surrogate pair generation (yet?)
 518             return (size_t)-1;
 519         }
 520 #endif
 521         else
 522         {
 523             if (buf)
 524                 *buf++ = '+';
 525             len++;
 526             if (cc != '+')
 527             {
 528                 // BASE64 encode string
 529                 unsigned int lsb, d, l;
 530                 for (d = 0, l = 0; /*nothing*/; psz++)
 531                 {
 532                     for (lsb = 0; lsb < 2; lsb ++)
 533                     {
 534                         d <<= 8;
 535                         d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
 536
 537                         for (l += 8; l >= 6; )
 538                         {
 539                             l -= 6;
 540                             if (buf)
 541                                 *buf++ = utf7enb64[(d >> l) % 64];
 542                             len++;
 543                         }
 544                     }
 545                     cc = *psz;
 546                     if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
 547                         break;
 548                 }
 549                 if (l != 0)
 550                 {
 551                     if (buf)
 552                         *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
 553                     len++;
 554                 }
 555             }
 556             if (buf)
 557                 *buf++ = '-';
 558             len++;
 559         }
 560     }
 561     if (buf && (len < n))
 562         *buf = 0;
 563     return len;
 564 }
 565
 566 // ----------------------------------------------------------------------------
 567 // UTF-8
 568 // ----------------------------------------------------------------------------
 569
 570 static wxUint32 utf8_max[]=
 571     { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
 572
 573 // boundaries of the private use area we use to (temporarily) remap invalid
 574 // characters invalid in a UTF-8 encoded string
 575 const wxUint32 wxUnicodePUA = 0x100000;
 576 const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
 577
 578 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 579 {
 580     size_t len = 0;
 581
 582     while (*psz && ((!buf) || (len < n)))
 583     {
 584         const char *opsz = psz;
 585         bool invalid = false;
 586         unsigned char cc = *psz++, fc = cc;
 587         unsigned cnt;
 588         for (cnt = 0; fc & 0x80; cnt++)
 589             fc <<= 1;
 590         if (!cnt)
 591         {
 592             // plain ASCII char
 593             if (buf)
 594                 *buf++ = cc;
 595             len++;
 596
 597             // escape the escape character for octal escapes
 598             if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
 599                     && cc == '\\' && (!buf || len < n))
 600             {
 601                 if (buf)
 602                     *buf++ = cc;
 603                 len++;
 604             }
 605         }
 606         else
 607         {
 608             cnt--;
 609             if (!cnt)
 610             {
 611                 // invalid UTF-8 sequence
 612                 invalid = true;
 613             }
 614             else
 615             {
 616                 unsigned ocnt = cnt - 1;
 617                 wxUint32 res = cc & (0x3f >> cnt);
 618                 while (cnt--)
 619                 {
 620                     cc = *psz;
 621                     if ((cc & 0xC0) != 0x80)
 622                     {
 623                         // invalid UTF-8 sequence
 624                         invalid = true;
 625                         break;
 626                     }
 627                     psz++;
 628                     res = (res << 6) | (cc & 0x3f);
 629                 }
 630                 if (invalid || res <= utf8_max[ocnt])
 631                 {
 632                     // illegal UTF-8 encoding
 633                     invalid = true;
 634                 }
 635                 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
 636                         res >= wxUnicodePUA && res < wxUnicodePUAEnd)
 637                 {
 638                     // if one of our PUA characters turns up externally
 639                     // it must also be treated as an illegal sequence
 640                     // (a bit like you have to escape an escape character)
 641                     invalid = true;
 642                 }
 643                 else
 644                 {
 645 #ifdef WC_UTF16
 646                     // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 647                     size_t pa = encode_utf16(res, (wxUint16 *)buf);
 648                     if (pa == (size_t)-1)
 649                     {
 650                         invalid = true;
 651                     }
 652                     else
 653                     {
 654                         if (buf)
 655                             buf += pa;
 656                         len += pa;
 657                     }
 658 #else // !WC_UTF16
 659                     if (buf)
 660                         *buf++ = (wchar_t)res;
 661                     len++;
 662 #endif // WC_UTF16/!WC_UTF16
 663                 }
 664             }
 665             if (invalid)
 666             {
 667                 if (m_options & MAP_INVALID_UTF8_TO_PUA)
 668                 {
 669                     while (opsz < psz && (!buf || len < n))
 670                     {
 671 #ifdef WC_UTF16
 672                         // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 673                         size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
 674                         wxASSERT(pa != (size_t)-1);
 675                         if (buf)
 676                             buf += pa;
 677                         opsz++;
 678                         len += pa;
 679 #else
 680                         if (buf)
 681                             *buf++ = (wchar_t)(wxUnicodePUA + (unsigned char)*opsz);
 682                         opsz++;
 683                         len++;
 684 #endif
 685                     }
 686                 }
 687                 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
 688                 {
 689                     while (opsz < psz && (!buf || len < n))
 690                     {
 691                         if ( buf && len + 3 < n )
 692                         {
 693                             unsigned char on = *opsz;
 694                             *buf++ = L'\\';
 695                             *buf++ = (wchar_t)( L'0' + on / 0100 );
 696                             *buf++ = (wchar_t)( L'0' + (on % 0100) / 010 );
 697                             *buf++ = (wchar_t)( L'0' + on % 010 );
 698                         }
 699                         opsz++;
 700                         len += 4;
 701                     }
 702                 }
 703                 else // MAP_INVALID_UTF8_NOT
 704                 {
 705                     return (size_t)-1;
 706                 }
 707             }
 708         }
 709     }
 710     if (buf && (len < n))
 711         *buf = 0;
 712     return len;
 713 }
 714
 715 static inline bool isoctal(wchar_t wch)
 716 {
 717     return L'0' <= wch && wch <= L'7';
 718 }
 719
 720 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 721 {
 722     size_t len = 0;
 723
 724     while (*psz && ((!buf) || (len < n)))
 725     {
 726         wxUint32 cc;
 727 #ifdef WC_UTF16
 728         // cast is ok for WC_UTF16
 729         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
 730         psz += (pa == (size_t)-1) ? 1 : pa;
 731 #else
 732         cc=(*psz++) & 0x7fffffff;
 733 #endif
 734
 735         if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
 736                 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
 737         {
 738             if (buf)
 739                 *buf++ = (char)(cc - wxUnicodePUA);
 740             len++;
 741         }
 742         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
 743                     && cc == L'\\' && psz[0] == L'\\' )
 744         {
 745             if (buf)
 746                 *buf++ = (char)cc;
 747             psz++;
 748             len++;
 749         }
 750         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
 751                     cc == L'\\' &&
 752                         isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
 753         {
 754             if (buf)
 755             {
 756                 *buf++ = (char) ((psz[0] - L'0')*0100 +
 757                                  (psz[1] - L'0')*010 +
 758                                  (psz[2] - L'0'));
 759             }
 760
 761             psz += 3;
 762             len++;
 763         }
 764         else
 765         {
 766             unsigned cnt;
 767             for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
 768             if (!cnt)
 769             {
 770                 // plain ASCII char
 771                 if (buf)
 772                     *buf++ = (char) cc;
 773                 len++;
 774             }
 775
 776             else
 777             {
 778                 len += cnt + 1;
 779                 if (buf)
 780                 {
 781                     *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
 782                     while (cnt--)
 783                         *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
 784                 }
 785             }
 786         }
 787     }
 788
 789     if (buf && (len<n))
 790         *buf = 0;
 791
 792     return len;
 793 }
 794
 795 // ----------------------------------------------------------------------------
 796 // UTF-16
 797 // ----------------------------------------------------------------------------
 798
 799 #ifdef WORDS_BIGENDIAN
 800     #define wxMBConvUTF16straight wxMBConvUTF16BE
 801     #define wxMBConvUTF16swap     wxMBConvUTF16LE
 802 #else
 803     #define wxMBConvUTF16swap     wxMBConvUTF16BE
 804     #define wxMBConvUTF16straight wxMBConvUTF16LE
 805 #endif
 806
 807
 808 #ifdef WC_UTF16
 809
 810 // copy 16bit MB to 16bit String
 811 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 812 {
 813     size_t len=0;
 814
 815     while (*(wxUint16*)psz && (!buf || len < n))
 816     {
 817         if (buf)
 818             *buf++ = *(wxUint16*)psz;
 819         len++;
 820
 821         psz += sizeof(wxUint16);
 822     }
 823     if (buf && len<n)   *buf=0;
 824
 825     return len;
 826 }
 827
 828
 829 // copy 16bit String to 16bit MB
 830 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 831 {
 832     size_t len=0;
 833
 834     while (*psz && (!buf || len < n))
 835     {
 836         if (buf)
 837         {
 838             *(wxUint16*)buf = *psz;
 839             buf += sizeof(wxUint16);
 840         }
 841         len += sizeof(wxUint16);
 842         psz++;
 843     }
 844     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 845
 846     return len;
 847 }
 848
 849
 850 // swap 16bit MB to 16bit String
 851 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 852 {
 853     size_t len=0;
 854
 855     while (*(wxUint16*)psz && (!buf || len < n))
 856     {
 857         if (buf)
 858         {
 859             ((char *)buf)[0] = psz[1];
 860             ((char *)buf)[1] = psz[0];
 861             buf++;
 862         }
 863         len++;
 864         psz += sizeof(wxUint16);
 865     }
 866     if (buf && len<n)   *buf=0;
 867
 868     return len;
 869 }
 870
 871
 872 // swap 16bit MB to 16bit String
 873 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 874 {
 875     size_t len=0;
 876
 877     while (*psz && (!buf || len < n))
 878     {
 879         if (buf)
 880         {
 881             *buf++ = ((char*)psz)[1];
 882             *buf++ = ((char*)psz)[0];
 883         }
 884         len += sizeof(wxUint16);
 885         psz++;
 886     }
 887     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 888
 889     return len;
 890 }
 891
 892
 893 #else // WC_UTF16
 894
 895
 896 // copy 16bit MB to 32bit String
 897 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 898 {
 899     size_t len=0;
 900
 901     while (*(wxUint16*)psz && (!buf || len < n))
 902     {
 903         wxUint32 cc;
 904         size_t pa=decode_utf16((wxUint16*)psz, cc);
 905         if (pa == (size_t)-1)
 906             return pa;
 907
 908         if (buf)
 909             *buf++ = (wchar_t)cc;
 910         len++;
 911         psz += pa * sizeof(wxUint16);
 912     }
 913     if (buf && len<n)   *buf=0;
 914
 915     return len;
 916 }
 917
 918
 919 // copy 32bit String to 16bit MB
 920 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 921 {
 922     size_t len=0;
 923
 924     while (*psz && (!buf || len < n))
 925     {
 926         wxUint16 cc[2];
 927         size_t pa=encode_utf16(*psz, cc);
 928
 929         if (pa == (size_t)-1)
 930             return pa;
 931
 932         if (buf)
 933         {
 934             *(wxUint16*)buf = cc[0];
 935             buf += sizeof(wxUint16);
 936             if (pa > 1)
 937             {
 938                 *(wxUint16*)buf = cc[1];
 939                 buf += sizeof(wxUint16);
 940             }
 941         }
 942
 943         len += pa*sizeof(wxUint16);
 944         psz++;
 945     }
 946     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 947
 948     return len;
 949 }
 950
 951
 952 // swap 16bit MB to 32bit String
 953 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 954 {
 955     size_t len=0;
 956
 957     while (*(wxUint16*)psz && (!buf || len < n))
 958     {
 959         wxUint32 cc;
 960         char tmp[4];
 961         tmp[0]=psz[1];  tmp[1]=psz[0];
 962         tmp[2]=psz[3];  tmp[3]=psz[2];
 963
 964         size_t pa=decode_utf16((wxUint16*)tmp, cc);
 965         if (pa == (size_t)-1)
 966             return pa;
 967
 968         if (buf)
 969             *buf++ = (wchar_t)cc;
 970
 971         len++;
 972         psz += pa * sizeof(wxUint16);
 973     }
 974     if (buf && len<n)   *buf=0;
 975
 976     return len;
 977 }
 978
 979
 980 // swap 32bit String to 16bit MB
 981 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 982 {
 983     size_t len=0;
 984
 985     while (*psz && (!buf || len < n))
 986     {
 987         wxUint16 cc[2];
 988         size_t pa=encode_utf16(*psz, cc);
 989
 990         if (pa == (size_t)-1)
 991             return pa;
 992
 993         if (buf)
 994         {
 995             *buf++ = ((char*)cc)[1];
 996             *buf++ = ((char*)cc)[0];
 997             if (pa > 1)
 998             {
 999                 *buf++ = ((char*)cc)[3];
1000                 *buf++ = ((char*)cc)[2];
1001             }
1002         }
1003
1004         len += pa*sizeof(wxUint16);
1005         psz++;
1006     }
1007     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
1008
1009     return len;
1010 }
1011
1012 #endif // WC_UTF16
1013
1014
1015 // ----------------------------------------------------------------------------
1016 // UTF-32
1017 // ----------------------------------------------------------------------------
1018
1019 #ifdef WORDS_BIGENDIAN
1020 #define wxMBConvUTF32straight  wxMBConvUTF32BE
1021 #define wxMBConvUTF32swap      wxMBConvUTF32LE
1022 #else
1023 #define wxMBConvUTF32swap      wxMBConvUTF32BE
1024 #define wxMBConvUTF32straight  wxMBConvUTF32LE
1025 #endif
1026
1027
1028 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1029 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1030
1031
1032 #ifdef WC_UTF16
1033
1034 // copy 32bit MB to 16bit String
1035 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1036 {
1037     size_t len=0;
1038
1039     while (*(wxUint32*)psz && (!buf || len < n))
1040     {
1041         wxUint16 cc[2];
1042
1043         size_t pa=encode_utf16(*(wxUint32*)psz, cc);
1044         if (pa == (size_t)-1)
1045             return pa;
1046
1047         if (buf)
1048         {
1049             *buf++ = cc[0];
1050             if (pa > 1)
1051                 *buf++ = cc[1];
1052         }
1053         len += pa;
1054         psz += sizeof(wxUint32);
1055     }
1056     if (buf && len<n)   *buf=0;
1057
1058     return len;
1059 }
1060
1061
1062 // copy 16bit String to 32bit MB
1063 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1064 {
1065     size_t len=0;
1066
1067     while (*psz && (!buf || len < n))
1068     {
1069         wxUint32 cc;
1070
1071         // cast is ok for WC_UTF16
1072         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
1073         if (pa == (size_t)-1)
1074             return pa;
1075
1076         if (buf)
1077         {
1078             *(wxUint32*)buf = cc;
1079             buf += sizeof(wxUint32);
1080         }
1081         len += sizeof(wxUint32);
1082         psz += pa;
1083     }
1084
1085     if (buf && len<=n-sizeof(wxUint32))
1086         *(wxUint32*)buf=0;
1087
1088     return len;
1089 }
1090
1091
1092
1093 // swap 32bit MB to 16bit String
1094 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1095 {
1096     size_t len=0;
1097
1098     while (*(wxUint32*)psz && (!buf || len < n))
1099     {
1100         char tmp[4];
1101         tmp[0] = psz[3];   tmp[1] = psz[2];
1102         tmp[2] = psz[1];   tmp[3] = psz[0];
1103
1104
1105         wxUint16 cc[2];
1106
1107         size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
1108         if (pa == (size_t)-1)
1109             return pa;
1110
1111         if (buf)
1112         {
1113             *buf++ = cc[0];
1114             if (pa > 1)
1115                 *buf++ = cc[1];
1116         }
1117         len += pa;
1118         psz += sizeof(wxUint32);
1119     }
1120
1121     if (buf && len<n)
1122         *buf=0;
1123
1124     return len;
1125 }
1126
1127
1128 // swap 16bit String to 32bit MB
1129 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1130 {
1131     size_t len=0;
1132
1133     while (*psz && (!buf || len < n))
1134     {
1135         char cc[4];
1136
1137         // cast is ok for WC_UTF16
1138         size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
1139         if (pa == (size_t)-1)
1140             return pa;
1141
1142         if (buf)
1143         {
1144             *buf++ = cc[3];
1145             *buf++ = cc[2];
1146             *buf++ = cc[1];
1147             *buf++ = cc[0];
1148         }
1149         len += sizeof(wxUint32);
1150         psz += pa;
1151     }
1152
1153     if (buf && len<=n-sizeof(wxUint32))
1154         *(wxUint32*)buf=0;
1155
1156     return len;
1157 }
1158
1159 #else // WC_UTF16
1160
1161
1162 // copy 32bit MB to 32bit String
1163 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1164 {
1165     size_t len=0;
1166
1167     while (*(wxUint32*)psz && (!buf || len < n))
1168     {
1169         if (buf)
1170             *buf++ = (wchar_t)(*(wxUint32*)psz);
1171         len++;
1172         psz += sizeof(wxUint32);
1173     }
1174
1175     if (buf && len<n)
1176         *buf=0;
1177
1178     return len;
1179 }
1180
1181
1182 // copy 32bit String to 32bit MB
1183 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1184 {
1185     size_t len=0;
1186
1187     while (*psz && (!buf || len < n))
1188     {
1189         if (buf)
1190         {
1191             *(wxUint32*)buf = *psz;
1192             buf += sizeof(wxUint32);
1193         }
1194
1195         len += sizeof(wxUint32);
1196         psz++;
1197     }
1198
1199     if (buf && len<=n-sizeof(wxUint32))
1200         *(wxUint32*)buf=0;
1201
1202     return len;
1203 }
1204
1205
1206 // swap 32bit MB to 32bit String
1207 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1208 {
1209     size_t len=0;
1210
1211     while (*(wxUint32*)psz && (!buf || len < n))
1212     {
1213         if (buf)
1214         {
1215             ((char *)buf)[0] = psz[3];
1216             ((char *)buf)[1] = psz[2];
1217             ((char *)buf)[2] = psz[1];
1218             ((char *)buf)[3] = psz[0];
1219             buf++;
1220         }
1221         len++;
1222         psz += sizeof(wxUint32);
1223     }
1224
1225     if (buf && len<n)
1226         *buf=0;
1227
1228     return len;
1229 }
1230
1231
1232 // swap 32bit String to 32bit MB
1233 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1234 {
1235     size_t len=0;
1236
1237     while (*psz && (!buf || len < n))
1238     {
1239         if (buf)
1240         {
1241             *buf++ = ((char *)psz)[3];
1242             *buf++ = ((char *)psz)[2];
1243             *buf++ = ((char *)psz)[1];
1244             *buf++ = ((char *)psz)[0];
1245         }
1246         len += sizeof(wxUint32);
1247         psz++;
1248     }
1249
1250     if (buf && len<=n-sizeof(wxUint32))
1251         *(wxUint32*)buf=0;
1252
1253     return len;
1254 }
1255
1256
1257 #endif // WC_UTF16
1258
1259
1260 // ============================================================================
1261 // The classes doing conversion using the iconv_xxx() functions
1262 // ============================================================================
1263
1264 #ifdef HAVE_ICONV
1265
1266 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1267 //     E2BIG if output buffer is _exactly_ as big as needed. Such case is
1268 //     (unless there's yet another bug in glibc) the only case when iconv()
1269 //     returns with (size_t)-1 (which means error) and says there are 0 bytes
1270 //     left in the input buffer -- when _real_ error occurs,
1271 //     bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1272 //     iconv() failure.
1273 //     [This bug does not appear in glibc 2.2.]
1274 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1275 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1276                                      (errno != E2BIG || bufLeft != 0))
1277 #else
1278 #define ICONV_FAILED(cres, bufLeft)  (cres == (size_t)-1)
1279 #endif
1280
1281 #define ICONV_CHAR_CAST(x)  ((ICONV_CONST char **)(x))
1282
1283 #define ICONV_T_INVALID ((iconv_t)-1)
1284
1285 #if SIZEOF_WCHAR_T == 4
1286     #define WC_BSWAP    wxUINT32_SWAP_ALWAYS
1287     #define WC_ENC      wxFONTENCODING_UTF32
1288 #elif SIZEOF_WCHAR_T == 2
1289     #define WC_BSWAP    wxUINT16_SWAP_ALWAYS
1290     #define WC_ENC      wxFONTENCODING_UTF16
1291 #else // sizeof(wchar_t) != 2 nor 4
1292     // does this ever happen?
1293     #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1294 #endif
1295
1296 // ----------------------------------------------------------------------------
1297 // wxMBConv_iconv: encapsulates an iconv character set
1298 // ----------------------------------------------------------------------------
1299
1300 class wxMBConv_iconv : public wxMBConv
1301 {
1302 public:
1303     wxMBConv_iconv(const wxChar *name);
1304     virtual ~wxMBConv_iconv();
1305
1306     virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1307     virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
1308
1309     bool IsOk() const
1310         { return (m2w != ICONV_T_INVALID) && (w2m != ICONV_T_INVALID); }
1311
1312 protected:
1313     // the iconv handlers used to translate from multibyte to wide char and in
1314     // the other direction
1315     iconv_t m2w,
1316             w2m;
1317 #if wxUSE_THREADS
1318     // guards access to m2w and w2m objects
1319     wxMutex m_iconvMutex;
1320 #endif
1321
1322 private:
1323     // the name (for iconv_open()) of a wide char charset -- if none is
1324     // available on this machine, it will remain NULL
1325     static wxString ms_wcCharsetName;
1326
1327     // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1328     // different endian-ness than the native one
1329     static bool ms_wcNeedsSwap;
1330 };
1331
1332 // make the constructor available for unit testing
1333 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const wxChar* name )
1334 {
1335     wxMBConv_iconv* result = new wxMBConv_iconv( name );
1336     if ( !result->IsOk() )
1337     {
1338         delete result;
1339         return 0;
1340     }
1341     return result;
1342 }
1343
1344 wxString wxMBConv_iconv::ms_wcCharsetName;
1345 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
1346
1347 wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
1348 {
1349     // iconv operates with chars, not wxChars, but luckily it uses only ASCII
1350     // names for the charsets
1351     const wxCharBuffer cname(wxString(name).ToAscii());
1352
1353     // check for charset that represents wchar_t:
1354     if ( ms_wcCharsetName.empty() )
1355     {
1356         wxLogTrace(TRACE_STRCONV, _T("Looking for wide char codeset:"));
1357
1358 #if wxUSE_FONTMAP
1359         const wxChar **names = wxFontMapperBase::GetAllEncodingNames(WC_ENC);
1360 #else // !wxUSE_FONTMAP
1361         static const wxChar *names[] =
1362         {
1363 #if SIZEOF_WCHAR_T == 4
1364             _T("UCS-4"),
1365 #elif SIZEOF_WCHAR_T = 2
1366             _T("UCS-2"),
1367 #endif
1368             NULL
1369         };
1370 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1371
1372         for ( ; *names && ms_wcCharsetName.empty(); ++names )
1373         {
1374             const wxString nameCS(*names);
1375
1376             // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
1377             wxString nameXE(nameCS);
1378             #ifdef WORDS_BIGENDIAN
1379                 nameXE += _T("BE");
1380             #else // little endian
1381                 nameXE += _T("LE");
1382             #endif
1383
1384             wxLogTrace(TRACE_STRCONV, _T("  trying charset \"%s\""),
1385                        nameXE.c_str());
1386
1387             m2w = iconv_open(nameXE.ToAscii(), cname);
1388             if ( m2w == ICONV_T_INVALID )
1389             {
1390                 // try charset w/o bytesex info (e.g. "UCS4")
1391                 wxLogTrace(TRACE_STRCONV, _T("  trying charset \"%s\""),
1392                            nameCS.c_str());
1393                 m2w = iconv_open(nameCS.ToAscii(), cname);
1394
1395                 // and check for bytesex ourselves:
1396                 if ( m2w != ICONV_T_INVALID )
1397                 {
1398                     char    buf[2], *bufPtr;
1399                     wchar_t wbuf[2], *wbufPtr;
1400                     size_t  insz, outsz;
1401                     size_t  res;
1402
1403                     buf[0] = 'A';
1404                     buf[1] = 0;
1405                     wbuf[0] = 0;
1406                     insz = 2;
1407                     outsz = SIZEOF_WCHAR_T * 2;
1408                     wbufPtr = wbuf;
1409                     bufPtr = buf;
1410
1411                     res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1412                                 (char**)&wbufPtr, &outsz);
1413
1414                     if (ICONV_FAILED(res, insz))
1415                     {
1416                         wxLogLastError(wxT("iconv"));
1417                         wxLogError(_("Conversion to charset '%s' doesn't work."),
1418                                    nameCS.c_str());
1419                     }
1420                     else // ok, can convert to this encoding, remember it
1421                     {
1422                         ms_wcCharsetName = nameCS;
1423                         ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1424                     }
1425                 }
1426             }
1427             else // use charset not requiring byte swapping
1428             {
1429                 ms_wcCharsetName = nameXE;
1430             }
1431         }
1432
1433         wxLogTrace(TRACE_STRCONV,
1434                    wxT("iconv wchar_t charset is \"%s\"%s"),
1435                    ms_wcCharsetName.empty() ? _T("<none>")
1436                                             : ms_wcCharsetName.c_str(),
1437                    ms_wcNeedsSwap ? _T(" (needs swap)")
1438                                   : _T(""));
1439     }
1440     else // we already have ms_wcCharsetName
1441     {
1442         m2w = iconv_open(ms_wcCharsetName.ToAscii(), cname);
1443     }
1444
1445     if ( ms_wcCharsetName.empty() )
1446     {
1447         w2m = ICONV_T_INVALID;
1448     }
1449     else
1450     {
1451         w2m = iconv_open(cname, ms_wcCharsetName.ToAscii());
1452         if ( w2m == ICONV_T_INVALID )
1453         {
1454             wxLogTrace(TRACE_STRCONV,
1455                        wxT("\"%s\" -> \"%s\" works but not the converse!?"),
1456                        ms_wcCharsetName.c_str(), cname.data());
1457         }
1458     }
1459 }
1460
1461 wxMBConv_iconv::~wxMBConv_iconv()
1462 {
1463     if ( m2w != ICONV_T_INVALID )
1464         iconv_close(m2w);
1465     if ( w2m != ICONV_T_INVALID )
1466         iconv_close(w2m);
1467 }
1468
1469 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1470 {
1471 #if wxUSE_THREADS
1472     // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1473     //     Unfortunately there is a couple of global wxCSConv objects such as
1474     //     wxConvLocal that are used all over wx code, so we have to make sure
1475     //     the handle is used by at most one thread at the time. Otherwise
1476     //     only a few wx classes would be safe to use from non-main threads
1477     //     as MB<->WC conversion would fail "randomly".
1478     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1479 #endif
1480
1481     size_t inbuf = strlen(psz);
1482     size_t outbuf = n * SIZEOF_WCHAR_T;
1483     size_t res, cres;
1484     // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1485     wchar_t *bufPtr = buf;
1486     const char *pszPtr = psz;
1487
1488     if (buf)
1489     {
1490         // have destination buffer, convert there
1491         cres = iconv(m2w,
1492                      ICONV_CHAR_CAST(&pszPtr), &inbuf,
1493                      (char**)&bufPtr, &outbuf);
1494         res = n - (outbuf / SIZEOF_WCHAR_T);
1495
1496         if (ms_wcNeedsSwap)
1497         {
1498             // convert to native endianness
1499             for ( unsigned i = 0; i < res; i++ )
1500                 buf[n] = WC_BSWAP(buf[i]);
1501         }
1502
1503         // NB: iconv was given only strlen(psz) characters on input, and so
1504         //     it couldn't convert the trailing zero. Let's do it ourselves
1505         //     if there's some room left for it in the output buffer.
1506         if (res < n)
1507             buf[res] = 0;
1508     }
1509     else
1510     {
1511         // no destination buffer... convert using temp buffer
1512         // to calculate destination buffer requirement
1513         wchar_t tbuf[8];
1514         res = 0;
1515         do {
1516             bufPtr = tbuf;
1517             outbuf = 8*SIZEOF_WCHAR_T;
1518
1519             cres = iconv(m2w,
1520                          ICONV_CHAR_CAST(&pszPtr), &inbuf,
1521                          (char**)&bufPtr, &outbuf );
1522
1523             res += 8-(outbuf/SIZEOF_WCHAR_T);
1524         } while ((cres==(size_t)-1) && (errno==E2BIG));
1525     }
1526
1527     if (ICONV_FAILED(cres, inbuf))
1528     {
1529         //VS: it is ok if iconv fails, hence trace only
1530         wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1531         return (size_t)-1;
1532     }
1533
1534     return res;
1535 }
1536
1537 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1538 {
1539 #if wxUSE_THREADS
1540     // NB: explained in MB2WC
1541     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1542 #endif
1543
1544     size_t inlen = wxWcslen(psz);
1545     size_t inbuf = inlen * SIZEOF_WCHAR_T;
1546     size_t outbuf = n;
1547     size_t res, cres;
1548
1549     wchar_t *tmpbuf = 0;
1550
1551     if (ms_wcNeedsSwap)
1552     {
1553         // need to copy to temp buffer to switch endianness
1554         // (doing WC_BSWAP twice on the original buffer won't help, as it
1555         //  could be in read-only memory, or be accessed in some other thread)
1556         tmpbuf = (wchar_t *)malloc(inbuf + SIZEOF_WCHAR_T);
1557         for ( size_t i = 0; i < inlen; i++ )
1558             tmpbuf[n] = WC_BSWAP(psz[i]);
1559         tmpbuf[inlen] = L'\0';
1560         psz = tmpbuf;
1561     }
1562
1563     if (buf)
1564     {
1565         // have destination buffer, convert there
1566         cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1567
1568         res = n-outbuf;
1569
1570         // NB: iconv was given only wcslen(psz) characters on input, and so
1571         //     it couldn't convert the trailing zero. Let's do it ourselves
1572         //     if there's some room left for it in the output buffer.
1573         if (res < n)
1574             buf[0] = 0;
1575     }
1576     else
1577     {
1578         // no destination buffer... convert using temp buffer
1579         // to calculate destination buffer requirement
1580         char tbuf[16];
1581         res = 0;
1582         do {
1583             buf = tbuf; outbuf = 16;
1584
1585             cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1586
1587             res += 16 - outbuf;
1588         } while ((cres==(size_t)-1) && (errno==E2BIG));
1589     }
1590
1591     if (ms_wcNeedsSwap)
1592     {
1593         free(tmpbuf);
1594     }
1595
1596     if (ICONV_FAILED(cres, inbuf))
1597     {
1598         wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1599         return (size_t)-1;
1600     }
1601
1602     return res;
1603 }
1604
1605 #endif // HAVE_ICONV
1606
1607
1608 // ============================================================================
1609 // Win32 conversion classes
1610 // ============================================================================
1611
1612 #ifdef wxHAVE_WIN32_MB2WC
1613
1614 // from utils.cpp
1615 #if wxUSE_FONTMAP
1616 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1617 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1618 #endif
1619
1620 class wxMBConv_win32 : public wxMBConv
1621 {
1622 public:
1623     wxMBConv_win32()
1624     {
1625         m_CodePage = CP_ACP;
1626     }
1627
1628 #if wxUSE_FONTMAP
1629     wxMBConv_win32(const wxChar* name)
1630     {
1631         m_CodePage = wxCharsetToCodepage(name);
1632     }
1633
1634     wxMBConv_win32(wxFontEncoding encoding)
1635     {
1636         m_CodePage = wxEncodingToCodepage(encoding);
1637     }
1638 #endif
1639
1640     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1641     {
1642         // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1643         // the behaviour is not compatible with the Unix version (using iconv)
1644         // and break the library itself, e.g. wxTextInputStream::NextChar()
1645         // wouldn't work if reading an incomplete MB char didn't result in an
1646         // error
1647         //
1648         // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1649         // an error (tested under Windows Server 2003) and apparently it is
1650         // done on purpose, i.e. the function accepts any input in this case
1651         // and although I'd prefer to return error on ill-formed output, our
1652         // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1653         // explicitly ill-formed according to RFC 2152) neither so we don't
1654         // even have any fallback here...
1655         //
1656         // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or
1657         // Win XP or newer and if it is specified on older versions, conversion
1658         // from CP_UTF8 (which can have flags only 0 or MB_ERR_INVALID_CHARS)
1659         // fails. So we can only use the flag on newer Windows versions.
1660         // Additionally, the flag is not supported by UTF7, symbol and CJK
1661         // encodings. See here:
1662         //     http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx
1663         //     http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp
1664         int flags = 0;
1665         if ( m_CodePage != CP_UTF7 && m_CodePage != CP_SYMBOL &&
1666              m_CodePage < 50000 &&
1667              IsAtLeastWin2kSP4() )
1668         {
1669             flags = MB_ERR_INVALID_CHARS;
1670         }
1671         else if ( m_CodePage == CP_UTF8 )
1672         {
1673             // Avoid round-trip in the special case of UTF-8 by using our
1674             // own UTF-8 conversion code:
1675             return wxMBConvUTF8().MB2WC(buf, psz, n);
1676         }
1677
1678         const size_t len = ::MultiByteToWideChar
1679                              (
1680                                 m_CodePage,     // code page
1681                                 flags,          // flags: fall on error
1682                                 psz,            // input string
1683                                 -1,             // its length (NUL-terminated)
1684                                 buf,            // output string
1685                                 buf ? n : 0     // size of output buffer
1686                              );
1687         if ( !len )
1688         {
1689             // function totally failed
1690             return (size_t)-1;
1691         }
1692
1693         // if we were really converting and didn't use MB_ERR_INVALID_CHARS,
1694         // check if we succeeded, by doing a double trip:
1695         if ( !flags && buf )
1696         {
1697             const size_t mbLen = strlen(psz);
1698             wxCharBuffer mbBuf(mbLen);
1699             if ( ::WideCharToMultiByte
1700                    (
1701                       m_CodePage,
1702                       0,
1703                       buf,
1704                       -1,
1705                       mbBuf.data(),
1706                       mbLen + 1,        // size in bytes, not length
1707                       NULL,
1708                       NULL
1709                    ) == 0 ||
1710                   strcmp(mbBuf, psz) != 0 )
1711             {
1712                 // we didn't obtain the same thing we started from, hence
1713                 // the conversion was lossy and we consider that it failed
1714                 return (size_t)-1;
1715             }
1716         }
1717
1718         // note that it returns count of written chars for buf != NULL and size
1719         // of the needed buffer for buf == NULL so in either case the length of
1720         // the string (which never includes the terminating NUL) is one less
1721         return len - 1;
1722     }
1723
1724     size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
1725     {
1726         /*
1727             we have a problem here: by default, WideCharToMultiByte() may
1728             replace characters unrepresentable in the target code page with bad
1729             quality approximations such as turning "1/2" symbol (U+00BD) into
1730             "1" for the code pages which don't have it and we, obviously, want
1731             to avoid this at any price
1732
1733             the trouble is that this function does it _silently_, i.e. it won't
1734             even tell us whether it did or not... Win98/2000 and higher provide
1735             WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1736             we have to resort to a round trip, i.e. check that converting back
1737             results in the same string -- this is, of course, expensive but
1738             otherwise we simply can't be sure to not garble the data.
1739          */
1740
1741         // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1742         // it doesn't work with CJK encodings (which we test for rather roughly
1743         // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1744         // supporting it
1745         BOOL usedDef wxDUMMY_INITIALIZE(false);
1746         BOOL *pUsedDef;
1747         int flags;
1748         if ( CanUseNoBestFit() && m_CodePage < 50000 )
1749         {
1750             // it's our lucky day
1751             flags = WC_NO_BEST_FIT_CHARS;
1752             pUsedDef = &usedDef;
1753         }
1754         else // old system or unsupported encoding
1755         {
1756             flags = 0;
1757             pUsedDef = NULL;
1758         }
1759
1760         const size_t len = ::WideCharToMultiByte
1761                              (
1762                                 m_CodePage,     // code page
1763                                 flags,          // either none or no best fit
1764                                 pwz,            // input string
1765                                 -1,             // it is (wide) NUL-terminated
1766                                 buf,            // output buffer
1767                                 buf ? n : 0,    // and its size
1768                                 NULL,           // default "replacement" char
1769                                 pUsedDef        // [out] was it used?
1770                              );
1771
1772         if ( !len )
1773         {
1774             // function totally failed
1775             return (size_t)-1;
1776         }
1777
1778         // if we were really converting, check if we succeeded
1779         if ( buf )
1780         {
1781             if ( flags )
1782             {
1783                 // check if the conversion failed, i.e. if any replacements
1784                 // were done
1785                 if ( usedDef )
1786                     return (size_t)-1;
1787             }
1788             else // we must resort to double tripping...
1789             {
1790                 wxWCharBuffer wcBuf(n);
1791                 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1792                         wcscmp(wcBuf, pwz) != 0 )
1793                 {
1794                     // we didn't obtain the same thing we started from, hence
1795                     // the conversion was lossy and we consider that it failed
1796                     return (size_t)-1;
1797                 }
1798             }
1799         }
1800
1801         // see the comment above for the reason of "len - 1"
1802         return len - 1;
1803     }
1804
1805     bool IsOk() const { return m_CodePage != -1; }
1806
1807 private:
1808     static bool CanUseNoBestFit()
1809     {
1810         static int s_isWin98Or2k = -1;
1811
1812         if ( s_isWin98Or2k == -1 )
1813         {
1814             int verMaj, verMin;
1815             switch ( wxGetOsVersion(&verMaj, &verMin) )
1816             {
1817                 case wxWIN95:
1818                     s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1819                     break;
1820
1821                 case wxWINDOWS_NT:
1822                     s_isWin98Or2k = verMaj >= 5;
1823                     break;
1824
1825                 default:
1826                     // unknown, be conseravtive by default
1827                     s_isWin98Or2k = 0;
1828             }
1829
1830             wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1831         }
1832
1833         return s_isWin98Or2k == 1;
1834     }
1835
1836     static bool IsAtLeastWin2kSP4()
1837     {
1838 #ifdef __WXWINCE__
1839         return false;
1840 #else
1841         static int s_isAtLeastWin2kSP4 = -1;
1842
1843         if ( s_isAtLeastWin2kSP4 == -1 )
1844         {
1845             OSVERSIONINFOEX ver;
1846
1847             memset(&ver, 0, sizeof(ver));
1848             ver.dwOSVersionInfoSize = sizeof(ver);
1849             GetVersionEx((OSVERSIONINFO*)&ver);
1850
1851             s_isAtLeastWin2kSP4 =
1852               ((ver.dwMajorVersion > 5) || // Vista+
1853                (ver.dwMajorVersion == 5 && ver.dwMinorVersion > 0) || // XP/2003
1854                (ver.dwMajorVersion == 5 && ver.dwMinorVersion == 0 &&
1855                ver.wServicePackMajor >= 4)) // 2000 SP4+
1856               ? 1 : 0;
1857         }
1858
1859         return s_isAtLeastWin2kSP4 == 1;
1860 #endif
1861     }
1862
1863     long m_CodePage;
1864 };
1865
1866 #endif // wxHAVE_WIN32_MB2WC
1867
1868 // ============================================================================
1869 // Cocoa conversion classes
1870 // ============================================================================
1871
1872 #if defined(__WXCOCOA__)
1873
1874 // RN:  There is no UTF-32 support in either Core Foundation or
1875 // Cocoa.  Strangely enough, internally Core Foundation uses
1876 // UTF 32 internally quite a bit - its just not public (yet).
1877
1878 #include <CoreFoundation/CFString.h>
1879 #include <CoreFoundation/CFStringEncodingExt.h>
1880
1881 CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
1882 {
1883     CFStringEncoding enc = kCFStringEncodingInvalidId ;
1884     if ( encoding == wxFONTENCODING_DEFAULT )
1885     {
1886         enc = CFStringGetSystemEncoding();
1887     }
1888     else switch( encoding)
1889     {
1890         case wxFONTENCODING_ISO8859_1 :
1891             enc = kCFStringEncodingISOLatin1 ;
1892             break ;
1893         case wxFONTENCODING_ISO8859_2 :
1894             enc = kCFStringEncodingISOLatin2;
1895             break ;
1896         case wxFONTENCODING_ISO8859_3 :
1897             enc = kCFStringEncodingISOLatin3 ;
1898             break ;
1899         case wxFONTENCODING_ISO8859_4 :
1900             enc = kCFStringEncodingISOLatin4;
1901             break ;
1902         case wxFONTENCODING_ISO8859_5 :
1903             enc = kCFStringEncodingISOLatinCyrillic;
1904             break ;
1905         case wxFONTENCODING_ISO8859_6 :
1906             enc = kCFStringEncodingISOLatinArabic;
1907             break ;
1908         case wxFONTENCODING_ISO8859_7 :
1909             enc = kCFStringEncodingISOLatinGreek;
1910             break ;
1911         case wxFONTENCODING_ISO8859_8 :
1912             enc = kCFStringEncodingISOLatinHebrew;
1913             break ;
1914         case wxFONTENCODING_ISO8859_9 :
1915             enc = kCFStringEncodingISOLatin5;
1916             break ;
1917         case wxFONTENCODING_ISO8859_10 :
1918             enc = kCFStringEncodingISOLatin6;
1919             break ;
1920         case wxFONTENCODING_ISO8859_11 :
1921             enc = kCFStringEncodingISOLatinThai;
1922             break ;
1923         case wxFONTENCODING_ISO8859_13 :
1924             enc = kCFStringEncodingISOLatin7;
1925             break ;
1926         case wxFONTENCODING_ISO8859_14 :
1927             enc = kCFStringEncodingISOLatin8;
1928             break ;
1929         case wxFONTENCODING_ISO8859_15 :
1930             enc = kCFStringEncodingISOLatin9;
1931             break ;
1932
1933         case wxFONTENCODING_KOI8 :
1934             enc = kCFStringEncodingKOI8_R;
1935             break ;
1936         case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
1937             enc = kCFStringEncodingDOSRussian;
1938             break ;
1939
1940 //      case wxFONTENCODING_BULGARIAN :
1941 //          enc = ;
1942 //          break ;
1943
1944         case wxFONTENCODING_CP437 :
1945             enc =kCFStringEncodingDOSLatinUS ;
1946             break ;
1947         case wxFONTENCODING_CP850 :
1948             enc = kCFStringEncodingDOSLatin1;
1949             break ;
1950         case wxFONTENCODING_CP852 :
1951             enc = kCFStringEncodingDOSLatin2;
1952             break ;
1953         case wxFONTENCODING_CP855 :
1954             enc = kCFStringEncodingDOSCyrillic;
1955             break ;
1956         case wxFONTENCODING_CP866 :
1957             enc =kCFStringEncodingDOSRussian ;
1958             break ;
1959         case wxFONTENCODING_CP874 :
1960             enc = kCFStringEncodingDOSThai;
1961             break ;
1962         case wxFONTENCODING_CP932 :
1963             enc = kCFStringEncodingDOSJapanese;
1964             break ;
1965         case wxFONTENCODING_CP936 :
1966             enc =kCFStringEncodingDOSChineseSimplif ;
1967             break ;
1968         case wxFONTENCODING_CP949 :
1969             enc = kCFStringEncodingDOSKorean;
1970             break ;
1971         case wxFONTENCODING_CP950 :
1972             enc = kCFStringEncodingDOSChineseTrad;
1973             break ;
1974         case wxFONTENCODING_CP1250 :
1975             enc = kCFStringEncodingWindowsLatin2;
1976             break ;
1977         case wxFONTENCODING_CP1251 :
1978             enc =kCFStringEncodingWindowsCyrillic ;
1979             break ;
1980         case wxFONTENCODING_CP1252 :
1981             enc =kCFStringEncodingWindowsLatin1 ;
1982             break ;
1983         case wxFONTENCODING_CP1253 :
1984             enc = kCFStringEncodingWindowsGreek;
1985             break ;
1986         case wxFONTENCODING_CP1254 :
1987             enc = kCFStringEncodingWindowsLatin5;
1988             break ;
1989         case wxFONTENCODING_CP1255 :
1990             enc =kCFStringEncodingWindowsHebrew ;
1991             break ;
1992         case wxFONTENCODING_CP1256 :
1993             enc =kCFStringEncodingWindowsArabic ;
1994             break ;
1995         case wxFONTENCODING_CP1257 :
1996             enc = kCFStringEncodingWindowsBalticRim;
1997             break ;
1998 //   This only really encodes to UTF7 (if that) evidently
1999 //        case wxFONTENCODING_UTF7 :
2000 //            enc = kCFStringEncodingNonLossyASCII ;
2001 //            break ;
2002         case wxFONTENCODING_UTF8 :
2003             enc = kCFStringEncodingUTF8 ;
2004             break ;
2005         case wxFONTENCODING_EUC_JP :
2006             enc = kCFStringEncodingEUC_JP;
2007             break ;
2008         case wxFONTENCODING_UTF16 :
2009             enc = kCFStringEncodingUnicode ;
2010             break ;
2011         case wxFONTENCODING_MACROMAN :
2012             enc = kCFStringEncodingMacRoman ;
2013             break ;
2014         case wxFONTENCODING_MACJAPANESE :
2015             enc = kCFStringEncodingMacJapanese ;
2016             break ;
2017         case wxFONTENCODING_MACCHINESETRAD :
2018             enc = kCFStringEncodingMacChineseTrad ;
2019             break ;
2020         case wxFONTENCODING_MACKOREAN :
2021             enc = kCFStringEncodingMacKorean ;
2022             break ;
2023         case wxFONTENCODING_MACARABIC :
2024             enc = kCFStringEncodingMacArabic ;
2025             break ;
2026         case wxFONTENCODING_MACHEBREW :
2027             enc = kCFStringEncodingMacHebrew ;
2028             break ;
2029         case wxFONTENCODING_MACGREEK :
2030             enc = kCFStringEncodingMacGreek ;
2031             break ;
2032         case wxFONTENCODING_MACCYRILLIC :
2033             enc = kCFStringEncodingMacCyrillic ;
2034             break ;
2035         case wxFONTENCODING_MACDEVANAGARI :
2036             enc = kCFStringEncodingMacDevanagari ;
2037             break ;
2038         case wxFONTENCODING_MACGURMUKHI :
2039             enc = kCFStringEncodingMacGurmukhi ;
2040             break ;
2041         case wxFONTENCODING_MACGUJARATI :
2042             enc = kCFStringEncodingMacGujarati ;
2043             break ;
2044         case wxFONTENCODING_MACORIYA :
2045             enc = kCFStringEncodingMacOriya ;
2046             break ;
2047         case wxFONTENCODING_MACBENGALI :
2048             enc = kCFStringEncodingMacBengali ;
2049             break ;
2050         case wxFONTENCODING_MACTAMIL :
2051             enc = kCFStringEncodingMacTamil ;
2052             break ;
2053         case wxFONTENCODING_MACTELUGU :
2054             enc = kCFStringEncodingMacTelugu ;
2055             break ;
2056         case wxFONTENCODING_MACKANNADA :
2057             enc = kCFStringEncodingMacKannada ;
2058             break ;
2059         case wxFONTENCODING_MACMALAJALAM :
2060             enc = kCFStringEncodingMacMalayalam ;
2061             break ;
2062         case wxFONTENCODING_MACSINHALESE :
2063             enc = kCFStringEncodingMacSinhalese ;
2064             break ;
2065         case wxFONTENCODING_MACBURMESE :
2066             enc = kCFStringEncodingMacBurmese ;
2067             break ;
2068         case wxFONTENCODING_MACKHMER :
2069             enc = kCFStringEncodingMacKhmer ;
2070             break ;
2071         case wxFONTENCODING_MACTHAI :
2072             enc = kCFStringEncodingMacThai ;
2073             break ;
2074         case wxFONTENCODING_MACLAOTIAN :
2075             enc = kCFStringEncodingMacLaotian ;
2076             break ;
2077         case wxFONTENCODING_MACGEORGIAN :
2078             enc = kCFStringEncodingMacGeorgian ;
2079             break ;
2080         case wxFONTENCODING_MACARMENIAN :
2081             enc = kCFStringEncodingMacArmenian ;
2082             break ;
2083         case wxFONTENCODING_MACCHINESESIMP :
2084             enc = kCFStringEncodingMacChineseSimp ;
2085             break ;
2086         case wxFONTENCODING_MACTIBETAN :
2087             enc = kCFStringEncodingMacTibetan ;
2088             break ;
2089         case wxFONTENCODING_MACMONGOLIAN :
2090             enc = kCFStringEncodingMacMongolian ;
2091             break ;
2092         case wxFONTENCODING_MACETHIOPIC :
2093             enc = kCFStringEncodingMacEthiopic ;
2094             break ;
2095         case wxFONTENCODING_MACCENTRALEUR :
2096             enc = kCFStringEncodingMacCentralEurRoman ;
2097             break ;
2098         case wxFONTENCODING_MACVIATNAMESE :
2099             enc = kCFStringEncodingMacVietnamese ;
2100             break ;
2101         case wxFONTENCODING_MACARABICEXT :
2102             enc = kCFStringEncodingMacExtArabic ;
2103             break ;
2104         case wxFONTENCODING_MACSYMBOL :
2105             enc = kCFStringEncodingMacSymbol ;
2106             break ;
2107         case wxFONTENCODING_MACDINGBATS :
2108             enc = kCFStringEncodingMacDingbats ;
2109             break ;
2110         case wxFONTENCODING_MACTURKISH :
2111             enc = kCFStringEncodingMacTurkish ;
2112             break ;
2113         case wxFONTENCODING_MACCROATIAN :
2114             enc = kCFStringEncodingMacCroatian ;
2115             break ;
2116         case wxFONTENCODING_MACICELANDIC :
2117             enc = kCFStringEncodingMacIcelandic ;
2118             break ;
2119         case wxFONTENCODING_MACROMANIAN :
2120             enc = kCFStringEncodingMacRomanian ;
2121             break ;
2122         case wxFONTENCODING_MACCELTIC :
2123             enc = kCFStringEncodingMacCeltic ;
2124             break ;
2125         case wxFONTENCODING_MACGAELIC :
2126             enc = kCFStringEncodingMacGaelic ;
2127             break ;
2128 //      case wxFONTENCODING_MACKEYBOARD :
2129 //          enc = kCFStringEncodingMacKeyboardGlyphs ;
2130 //          break ;
2131         default :
2132             // because gcc is picky
2133             break ;
2134     } ;
2135     return enc ;
2136 }
2137
2138 class wxMBConv_cocoa : public wxMBConv
2139 {
2140 public:
2141     wxMBConv_cocoa()
2142     {
2143         Init(CFStringGetSystemEncoding()) ;
2144     }
2145
2146 #if wxUSE_FONTMAP
2147     wxMBConv_cocoa(const wxChar* name)
2148     {
2149         Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2150     }
2151 #endif
2152
2153     wxMBConv_cocoa(wxFontEncoding encoding)
2154     {
2155         Init( wxCFStringEncFromFontEnc(encoding) );
2156     }
2157
2158     ~wxMBConv_cocoa()
2159     {
2160     }
2161
2162     void Init( CFStringEncoding encoding)
2163     {
2164         m_encoding = encoding ;
2165     }
2166
2167     size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2168     {
2169         wxASSERT(szUnConv);
2170
2171         CFStringRef theString = CFStringCreateWithBytes (
2172                                                 NULL, //the allocator
2173                                                 (const UInt8*)szUnConv,
2174                                                 strlen(szUnConv),
2175                                                 m_encoding,
2176                                                 false //no BOM/external representation
2177                                                 );
2178
2179         wxASSERT(theString);
2180
2181         size_t nOutLength = CFStringGetLength(theString);
2182
2183         if (szOut == NULL)
2184         {
2185             CFRelease(theString);
2186             return nOutLength;
2187         }
2188
2189         CFRange theRange = { 0, nOutSize };
2190
2191 #if SIZEOF_WCHAR_T == 4
2192         UniChar* szUniCharBuffer = new UniChar[nOutSize];
2193 #endif
2194
2195         CFStringGetCharacters(theString, theRange, szUniCharBuffer);
2196
2197         CFRelease(theString);
2198
2199         szUniCharBuffer[nOutLength] = '\0' ;
2200
2201 #if SIZEOF_WCHAR_T == 4
2202         wxMBConvUTF16 converter ;
2203         converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
2204         delete[] szUniCharBuffer;
2205 #endif
2206
2207         return nOutLength;
2208     }
2209
2210     size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2211     {
2212         wxASSERT(szUnConv);
2213
2214         size_t nRealOutSize;
2215         size_t nBufSize = wxWcslen(szUnConv);
2216         UniChar* szUniBuffer = (UniChar*) szUnConv;
2217
2218 #if SIZEOF_WCHAR_T == 4
2219         wxMBConvUTF16 converter ;
2220         nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
2221         szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
2222         converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
2223         nBufSize /= sizeof(UniChar);
2224 #endif
2225
2226         CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2227                                 NULL, //allocator
2228                                 szUniBuffer,
2229                                 nBufSize,
2230                                 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
2231                             );
2232
2233         wxASSERT(theString);
2234
2235         //Note that CER puts a BOM when converting to unicode
2236         //so we  check and use getchars instead in that case
2237         if (m_encoding == kCFStringEncodingUnicode)
2238         {
2239             if (szOut != NULL)
2240                 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
2241
2242             nRealOutSize = CFStringGetLength(theString) + 1;
2243         }
2244         else
2245         {
2246             CFStringGetBytes(
2247                 theString,
2248                 CFRangeMake(0, CFStringGetLength(theString)),
2249                 m_encoding,
2250                 0, //what to put in characters that can't be converted -
2251                     //0 tells CFString to return NULL if it meets such a character
2252                 false, //not an external representation
2253                 (UInt8*) szOut,
2254                 nOutSize,
2255                 (CFIndex*) &nRealOutSize
2256                         );
2257         }
2258
2259         CFRelease(theString);
2260
2261 #if SIZEOF_WCHAR_T == 4
2262         delete[] szUniBuffer;
2263 #endif
2264
2265         return  nRealOutSize - 1;
2266     }
2267
2268     bool IsOk() const
2269     {
2270         return m_encoding != kCFStringEncodingInvalidId &&
2271               CFStringIsEncodingAvailable(m_encoding);
2272     }
2273
2274 private:
2275     CFStringEncoding m_encoding ;
2276 };
2277
2278 #endif // defined(__WXCOCOA__)
2279
2280 // ============================================================================
2281 // Mac conversion classes
2282 // ============================================================================
2283
2284 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2285
2286 class wxMBConv_mac : public wxMBConv
2287 {
2288 public:
2289     wxMBConv_mac()
2290     {
2291         Init(CFStringGetSystemEncoding()) ;
2292     }
2293
2294 #if wxUSE_FONTMAP
2295     wxMBConv_mac(const wxChar* name)
2296     {
2297         Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2298     }
2299 #endif
2300
2301     wxMBConv_mac(wxFontEncoding encoding)
2302     {
2303         Init( wxMacGetSystemEncFromFontEnc(encoding) );
2304     }
2305
2306     ~wxMBConv_mac()
2307     {
2308         OSStatus status = noErr ;
2309         status = TECDisposeConverter(m_MB2WC_converter);
2310         status = TECDisposeConverter(m_WC2MB_converter);
2311     }
2312
2313
2314     void Init( TextEncodingBase encoding)
2315     {
2316         OSStatus status = noErr ;
2317         m_char_encoding = encoding ;
2318         m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2319
2320         status = TECCreateConverter(&m_MB2WC_converter,
2321                                     m_char_encoding,
2322                                     m_unicode_encoding);
2323         status = TECCreateConverter(&m_WC2MB_converter,
2324                                     m_unicode_encoding,
2325                                     m_char_encoding);
2326     }
2327
2328     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2329     {
2330         OSStatus status = noErr ;
2331         ByteCount byteOutLen ;
2332         ByteCount byteInLen = strlen(psz) ;
2333         wchar_t *tbuf = NULL ;
2334         UniChar* ubuf = NULL ;
2335         size_t res = 0 ;
2336
2337         if (buf == NULL)
2338         {
2339             //apple specs say at least 32
2340             n = wxMax( 32 , byteInLen ) ;
2341             tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2342         }
2343         ByteCount byteBufferLen = n * sizeof( UniChar ) ;
2344 #if SIZEOF_WCHAR_T == 4
2345         ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
2346 #else
2347         ubuf = (UniChar*) (buf ? buf : tbuf) ;
2348 #endif
2349         status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2350           (TextPtr) ubuf , byteBufferLen, &byteOutLen);
2351 #if SIZEOF_WCHAR_T == 4
2352         // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2353         // is not properly terminated we get random characters at the end
2354         ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
2355         wxMBConvUTF16 converter ;
2356         res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2357         free( ubuf ) ;
2358 #else
2359         res = byteOutLen / sizeof( UniChar ) ;
2360 #endif
2361         if ( buf == NULL )
2362              free(tbuf) ;
2363
2364         if ( buf  && res < n)
2365             buf[res] = 0;
2366
2367         return res ;
2368     }
2369
2370     size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2371     {
2372         OSStatus status = noErr ;
2373         ByteCount byteOutLen ;
2374         ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2375
2376         char *tbuf = NULL ;
2377
2378         if (buf == NULL)
2379         {
2380             //apple specs say at least 32
2381             n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2382             tbuf = (char*) malloc( n ) ;
2383         }
2384
2385         ByteCount byteBufferLen = n ;
2386         UniChar* ubuf = NULL ;
2387 #if SIZEOF_WCHAR_T == 4
2388         wxMBConvUTF16 converter ;
2389         size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2390         byteInLen = unicharlen ;
2391         ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2392         converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
2393 #else
2394         ubuf = (UniChar*) psz ;
2395 #endif
2396         status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2397             (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
2398 #if SIZEOF_WCHAR_T == 4
2399         free( ubuf ) ;
2400 #endif
2401         if ( buf == NULL )
2402             free(tbuf) ;
2403
2404         size_t res = byteOutLen ;
2405         if ( buf  && res < n)
2406         {
2407             buf[res] = 0;
2408
2409             //we need to double-trip to verify it didn't insert any ? in place
2410             //of bogus characters
2411             wxWCharBuffer wcBuf(n);
2412             size_t pszlen = wxWcslen(psz);
2413             if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
2414                         wxWcslen(wcBuf) != pszlen ||
2415                         memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2416             {
2417                 // we didn't obtain the same thing we started from, hence
2418                 // the conversion was lossy and we consider that it failed
2419                 return (size_t)-1;
2420             }
2421         }
2422
2423         return res ;
2424     }
2425
2426     bool IsOk() const
2427         { return m_MB2WC_converter !=  NULL && m_WC2MB_converter != NULL  ; }
2428
2429 private:
2430     TECObjectRef m_MB2WC_converter ;
2431     TECObjectRef m_WC2MB_converter ;
2432
2433     TextEncodingBase m_char_encoding ;
2434     TextEncodingBase m_unicode_encoding ;
2435 };
2436
2437 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2438
2439 // ============================================================================
2440 // wxEncodingConverter based conversion classes
2441 // ============================================================================
2442
2443 #if wxUSE_FONTMAP
2444
2445 class wxMBConv_wxwin : public wxMBConv
2446 {
2447 private:
2448     void Init()
2449     {
2450         m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2451                w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2452     }
2453
2454 public:
2455     // temporarily just use wxEncodingConverter stuff,
2456     // so that it works while a better implementation is built
2457     wxMBConv_wxwin(const wxChar* name)
2458     {
2459         if (name)
2460             m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2461         else
2462             m_enc = wxFONTENCODING_SYSTEM;
2463
2464         Init();
2465     }
2466
2467     wxMBConv_wxwin(wxFontEncoding enc)
2468     {
2469         m_enc = enc;
2470
2471         Init();
2472     }
2473
2474     size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
2475     {
2476         size_t inbuf = strlen(psz);
2477         if (buf)
2478         {
2479             if (!m2w.Convert(psz,buf))
2480                 return (size_t)-1;
2481         }
2482         return inbuf;
2483     }
2484
2485     size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
2486     {
2487         const size_t inbuf = wxWcslen(psz);
2488         if (buf)
2489         {
2490             if (!w2m.Convert(psz,buf))
2491                 return (size_t)-1;
2492         }
2493
2494         return inbuf;
2495     }
2496
2497     bool IsOk() const { return m_ok; }
2498
2499 public:
2500     wxFontEncoding m_enc;
2501     wxEncodingConverter m2w, w2m;
2502
2503     // were we initialized successfully?
2504     bool m_ok;
2505
2506     DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
2507 };
2508
2509 // make the constructors available for unit testing
2510 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_wxwin( const wxChar* name )
2511 {
2512     wxMBConv_wxwin* result = new wxMBConv_wxwin( name );
2513     if ( !result->IsOk() )
2514     {
2515         delete result;
2516         return 0;
2517     }
2518     return result;
2519 }
2520
2521 #endif // wxUSE_FONTMAP
2522
2523 // ============================================================================
2524 // wxCSConv implementation
2525 // ============================================================================
2526
2527 void wxCSConv::Init()
2528 {
2529     m_name = NULL;
2530     m_convReal =  NULL;
2531     m_deferred = true;
2532 }
2533
2534 wxCSConv::wxCSConv(const wxChar *charset)
2535 {
2536     Init();
2537
2538     if ( charset )
2539     {
2540         SetName(charset);
2541     }
2542
2543 #if wxUSE_FONTMAP
2544     m_encoding = wxFontMapperBase::GetEncodingFromName(charset);
2545 #else
2546     m_encoding = wxFONTENCODING_SYSTEM;
2547 #endif
2548 }
2549
2550 wxCSConv::wxCSConv(wxFontEncoding encoding)
2551 {
2552     if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
2553     {
2554         wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2555
2556         encoding = wxFONTENCODING_SYSTEM;
2557     }
2558
2559     Init();
2560
2561     m_encoding = encoding;
2562 }
2563
2564 wxCSConv::~wxCSConv()
2565 {
2566     Clear();
2567 }
2568
2569 wxCSConv::wxCSConv(const wxCSConv& conv)
2570         : wxMBConv()
2571 {
2572     Init();
2573
2574     SetName(conv.m_name);
2575     m_encoding = conv.m_encoding;
2576 }
2577
2578 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2579 {
2580     Clear();
2581
2582     SetName(conv.m_name);
2583     m_encoding = conv.m_encoding;
2584
2585     return *this;
2586 }
2587
2588 void wxCSConv::Clear()
2589 {
2590     free(m_name);
2591     delete m_convReal;
2592
2593     m_name = NULL;
2594     m_convReal = NULL;
2595 }
2596
2597 void wxCSConv::SetName(const wxChar *charset)
2598 {
2599     if (charset)
2600     {
2601         m_name = wxStrdup(charset);
2602         m_deferred = true;
2603     }
2604 }
2605
2606 #if wxUSE_FONTMAP
2607 #include "wx/hashmap.h"
2608
2609 WX_DECLARE_HASH_MAP( wxFontEncoding, wxString, wxIntegerHash, wxIntegerEqual,
2610                      wxEncodingNameCache );
2611
2612 static wxEncodingNameCache gs_nameCache;
2613 #endif
2614
2615 wxMBConv *wxCSConv::DoCreate() const
2616 {
2617 #if wxUSE_FONTMAP
2618     wxLogTrace(TRACE_STRCONV,
2619                wxT("creating conversion for %s"),
2620                (m_name ? m_name
2621                        : wxFontMapperBase::GetEncodingName(m_encoding).c_str()));
2622 #endif // wxUSE_FONTMAP
2623
2624     // check for the special case of ASCII or ISO8859-1 charset: as we have
2625     // special knowledge of it anyhow, we don't need to create a special
2626     // conversion object
2627     if ( m_encoding == wxFONTENCODING_ISO8859_1 ||
2628             m_encoding == wxFONTENCODING_DEFAULT )
2629     {
2630         // don't convert at all
2631         return NULL;
2632     }
2633
2634     // we trust OS to do conversion better than we can so try external
2635     // conversion methods first
2636     //
2637     // the full order is:
2638     //      1. OS conversion (iconv() under Unix or Win32 API)
2639     //      2. hard coded conversions for UTF
2640     //      3. wxEncodingConverter as fall back
2641
2642     // step (1)
2643 #ifdef HAVE_ICONV
2644 #if !wxUSE_FONTMAP
2645     if ( m_name )
2646 #endif // !wxUSE_FONTMAP
2647     {
2648         wxString name(m_name);
2649         wxFontEncoding encoding(m_encoding);
2650
2651         if ( !name.empty() )
2652         {
2653             wxMBConv_iconv *conv = new wxMBConv_iconv(name);
2654             if ( conv->IsOk() )
2655                 return conv;
2656
2657             delete conv;
2658
2659 #if wxUSE_FONTMAP
2660             encoding =
2661                 wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2662 #endif // wxUSE_FONTMAP
2663         }
2664 #if wxUSE_FONTMAP
2665         {
2666             const wxEncodingNameCache::iterator it = gs_nameCache.find(encoding);
2667             if ( it != gs_nameCache.end() )
2668             {
2669                 if ( it->second.empty() )
2670                     return NULL;
2671
2672                 wxMBConv_iconv *conv = new wxMBConv_iconv(it->second);
2673                 if ( conv->IsOk() )
2674                     return conv;
2675
2676                 delete conv;
2677             }
2678
2679             const wxChar** names = wxFontMapperBase::GetAllEncodingNames(encoding);
2680
2681             for ( ; *names; ++names )
2682             {
2683                 wxMBConv_iconv *conv = new wxMBConv_iconv(*names);
2684                 if ( conv->IsOk() )
2685                 {
2686                     gs_nameCache[encoding] = *names;
2687                     return conv;
2688                 }
2689
2690                 delete conv;
2691             }
2692
2693             gs_nameCache[encoding] = _T(""); // cache the failure
2694         }
2695 #endif // wxUSE_FONTMAP
2696     }
2697 #endif // HAVE_ICONV
2698
2699 #ifdef wxHAVE_WIN32_MB2WC
2700     {
2701 #if wxUSE_FONTMAP
2702         wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2703                                       : new wxMBConv_win32(m_encoding);
2704         if ( conv->IsOk() )
2705             return conv;
2706
2707         delete conv;
2708 #else
2709         return NULL;
2710 #endif
2711     }
2712 #endif // wxHAVE_WIN32_MB2WC
2713 #if defined(__WXMAC__)
2714     {
2715         // leave UTF16 and UTF32 to the built-ins of wx
2716         if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
2717             ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
2718         {
2719
2720 #if wxUSE_FONTMAP
2721             wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2722                                         : new wxMBConv_mac(m_encoding);
2723 #else
2724             wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
2725 #endif
2726             if ( conv->IsOk() )
2727                  return conv;
2728
2729             delete conv;
2730         }
2731     }
2732 #endif
2733 #if defined(__WXCOCOA__)
2734     {
2735         if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2736         {
2737
2738 #if wxUSE_FONTMAP
2739             wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2740                                           : new wxMBConv_cocoa(m_encoding);
2741 #else
2742             wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
2743 #endif
2744             if ( conv->IsOk() )
2745                  return conv;
2746
2747             delete conv;
2748         }
2749     }
2750 #endif
2751     // step (2)
2752     wxFontEncoding enc = m_encoding;
2753 #if wxUSE_FONTMAP
2754     if ( enc == wxFONTENCODING_SYSTEM && m_name )
2755     {
2756         // use "false" to suppress interactive dialogs -- we can be called from
2757         // anywhere and popping up a dialog from here is the last thing we want to
2758         // do
2759         enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
2760     }
2761 #endif // wxUSE_FONTMAP
2762
2763     switch ( enc )
2764     {
2765         case wxFONTENCODING_UTF7:
2766              return new wxMBConvUTF7;
2767
2768         case wxFONTENCODING_UTF8:
2769              return new wxMBConvUTF8;
2770
2771         case wxFONTENCODING_UTF16BE:
2772              return new wxMBConvUTF16BE;
2773
2774         case wxFONTENCODING_UTF16LE:
2775              return new wxMBConvUTF16LE;
2776
2777         case wxFONTENCODING_UTF32BE:
2778              return new wxMBConvUTF32BE;
2779
2780         case wxFONTENCODING_UTF32LE:
2781              return new wxMBConvUTF32LE;
2782
2783         default:
2784              // nothing to do but put here to suppress gcc warnings
2785              ;
2786     }
2787
2788     // step (3)
2789 #if wxUSE_FONTMAP
2790     {
2791         wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2792                                       : new wxMBConv_wxwin(m_encoding);
2793         if ( conv->IsOk() )
2794             return conv;
2795
2796         delete conv;
2797     }
2798 #endif // wxUSE_FONTMAP
2799
2800     // NB: This is a hack to prevent deadlock. What could otherwise happen
2801     //     in Unicode build: wxConvLocal creation ends up being here
2802     //     because of some failure and logs the error. But wxLog will try to
2803     //     attach timestamp, for which it will need wxConvLocal (to convert
2804     //     time to char* and then wchar_t*), but that fails, tries to log
2805     //     error, but wxLog has a (already locked) critical section that
2806     //     guards static buffer.
2807     static bool alreadyLoggingError = false;
2808     if (!alreadyLoggingError)
2809     {
2810         alreadyLoggingError = true;
2811         wxLogError(_("Cannot convert from the charset '%s'!"),
2812                    m_name ? m_name
2813                       :
2814 #if wxUSE_FONTMAP
2815                          wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
2816 #else // !wxUSE_FONTMAP
2817                          wxString::Format(_("encoding %s"), m_encoding).c_str()
2818 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2819               );
2820         alreadyLoggingError = false;
2821     }
2822
2823     return NULL;
2824 }
2825
2826 void wxCSConv::CreateConvIfNeeded() const
2827 {
2828     if ( m_deferred )
2829     {
2830         wxCSConv *self = (wxCSConv *)this; // const_cast
2831
2832 #if wxUSE_INTL
2833         // if we don't have neither the name nor the encoding, use the default
2834         // encoding for this system
2835         if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2836         {
2837             self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
2838         }
2839 #endif // wxUSE_INTL
2840
2841         self->m_convReal = DoCreate();
2842         self->m_deferred = false;
2843     }
2844 }
2845
2846 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2847 {
2848     CreateConvIfNeeded();
2849
2850     if (m_convReal)
2851         return m_convReal->MB2WC(buf, psz, n);
2852
2853     // latin-1 (direct)
2854     size_t len = strlen(psz);
2855
2856     if (buf)
2857     {
2858         for (size_t c = 0; c <= len; c++)
2859             buf[c] = (unsigned char)(psz[c]);
2860     }
2861
2862     return len;
2863 }
2864
2865 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2866 {
2867     CreateConvIfNeeded();
2868
2869     if (m_convReal)
2870         return m_convReal->WC2MB(buf, psz, n);
2871
2872     // latin-1 (direct)
2873     const size_t len = wxWcslen(psz);
2874     if (buf)
2875     {
2876         for (size_t c = 0; c <= len; c++)
2877         {
2878             if (psz[c] > 0xFF)
2879                 return (size_t)-1;
2880             buf[c] = (char)psz[c];
2881         }
2882     }
2883     else
2884     {
2885         for (size_t c = 0; c <= len; c++)
2886         {
2887             if (psz[c] > 0xFF)
2888                 return (size_t)-1;
2889         }
2890     }
2891
2892     return len;
2893 }
2894
2895 // ----------------------------------------------------------------------------
2896 // globals
2897 // ----------------------------------------------------------------------------
2898
2899 #ifdef __WINDOWS__
2900     static wxMBConv_win32 wxConvLibcObj;
2901 #elif defined(__WXMAC__) && !defined(__MACH__)
2902     static wxMBConv_mac wxConvLibcObj ;
2903 #else
2904     static wxMBConvLibc wxConvLibcObj;
2905 #endif
2906
2907 static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
2908 static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
2909 static wxMBConvUTF7 wxConvUTF7Obj;
2910 static wxMBConvUTF8 wxConvUTF8Obj;
2911
2912 WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
2913 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
2914 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
2915 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
2916 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
2917 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
2918 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
2919 #ifdef __WXOSX__
2920                                     wxConvUTF8Obj;
2921 #else
2922                                     wxConvLibcObj;
2923 #endif
2924
2925
2926 #else // !wxUSE_WCHAR_T
2927
2928 // stand-ins in absence of wchar_t
2929 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
2930                                 wxConvISO8859_1,
2931                                 wxConvLocal,
2932                                 wxConvUTF8;
2933
2934 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T