src/common/strconv.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/common/strconv.cpp
   3 // Purpose:     Unicode conversion classes
   4 // Author:      Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
   5 //              Ryan Norton, Fredrik Roubert (UTF7)
   6 // Modified by:
   7 // Created:     29/01/98
   8 // RCS-ID:      $Id$
   9 // Copyright:   (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
  10 //              (c) 2000-2003 Vadim Zeitlin
  11 //              (c) 2004 Ryan Norton, Fredrik Roubert
  12 // Licence:     wxWindows licence
  13 /////////////////////////////////////////////////////////////////////////////
  14
  15 // ============================================================================
  16 // declarations
  17 // ============================================================================
  18
  19 // ----------------------------------------------------------------------------
  20 // headers
  21 // ----------------------------------------------------------------------------
  22
  23 // For compilers that support precompilation, includes "wx.h".
  24 #include "wx/wxprec.h"
  25
  26 #ifdef __BORLANDC__
  27   #pragma hdrstop
  28 #endif
  29
  30 #ifndef WX_PRECOMP
  31     #include "wx/intl.h"
  32     #include "wx/log.h"
  33 #endif // WX_PRECOMP
  34
  35 #include "wx/strconv.h"
  36
  37 #if wxUSE_WCHAR_T
  38
  39 #ifdef __WINDOWS__
  40     #include "wx/msw/private.h"
  41     #include "wx/msw/missing.h"
  42 #endif
  43
  44 #ifndef __WXWINCE__
  45 #include <errno.h>
  46 #endif
  47
  48 #include <ctype.h>
  49 #include <string.h>
  50 #include <stdlib.h>
  51
  52 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
  53     #define wxHAVE_WIN32_MB2WC
  54 #endif // __WIN32__ but !__WXMICROWIN__
  55
  56 #ifdef __SALFORDC__
  57     #include <clib.h>
  58 #endif
  59
  60 #ifdef HAVE_ICONV
  61     #include <iconv.h>
  62     #include "wx/thread.h"
  63 #endif
  64
  65 #include "wx/encconv.h"
  66 #include "wx/fontmap.h"
  67 #include "wx/utils.h"
  68
  69 #ifdef __WXMAC__
  70 #ifndef __DARWIN__
  71 #include <ATSUnicode.h>
  72 #include <TextCommon.h>
  73 #include <TextEncodingConverter.h>
  74 #endif
  75
  76 #include  "wx/mac/private.h"  // includes mac headers
  77 #endif
  78
  79 #define TRACE_STRCONV _T("strconv")
  80
  81 #if SIZEOF_WCHAR_T == 2
  82     #define WC_UTF16
  83 #endif
  84
  85 // ============================================================================
  86 // implementation
  87 // ============================================================================
  88
  89 // ----------------------------------------------------------------------------
  90 // UTF-16 en/decoding to/from UCS-4
  91 // ----------------------------------------------------------------------------
  92
  93
  94 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
  95 {
  96     if (input<=0xffff)
  97     {
  98         if (output)
  99             *output = (wxUint16) input;
 100         return 1;
 101     }
 102     else if (input>=0x110000)
 103     {
 104         return (size_t)-1;
 105     }
 106     else
 107     {
 108         if (output)
 109         {
 110             *output++ = (wxUint16) ((input >> 10)+0xd7c0);
 111             *output = (wxUint16) ((input&0x3ff)+0xdc00);
 112         }
 113         return 2;
 114     }
 115 }
 116
 117 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
 118 {
 119     if ((*input<0xd800) || (*input>0xdfff))
 120     {
 121         output = *input;
 122         return 1;
 123     }
 124     else if ((input[1]<0xdc00) || (input[1]>0xdfff))
 125     {
 126         output = *input;
 127         return (size_t)-1;
 128     }
 129     else
 130     {
 131         output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
 132         return 2;
 133     }
 134 }
 135
 136
 137 // ----------------------------------------------------------------------------
 138 // wxMBConv
 139 // ----------------------------------------------------------------------------
 140
 141 wxMBConv::~wxMBConv()
 142 {
 143     // nothing to do here (necessary for Darwin linking probably)
 144 }
 145
 146 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
 147 {
 148     if ( psz )
 149     {
 150         // calculate the length of the buffer needed first
 151         size_t nLen = MB2WC(NULL, psz, 0);
 152         if ( nLen != (size_t)-1 )
 153         {
 154             // now do the actual conversion
 155             wxWCharBuffer buf(nLen);
 156             nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
 157             if ( nLen != (size_t)-1 )
 158             {
 159                 return buf;
 160             }
 161         }
 162     }
 163
 164     wxWCharBuffer buf((wchar_t *)NULL);
 165
 166     return buf;
 167 }
 168
 169 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
 170 {
 171     if ( pwz )
 172     {
 173         size_t nLen = WC2MB(NULL, pwz, 0);
 174         if ( nLen != (size_t)-1 )
 175         {
 176             wxCharBuffer buf(nLen+3);       // space for a wxUint32 trailing zero
 177             nLen = WC2MB(buf.data(), pwz, nLen + 4);
 178             if ( nLen != (size_t)-1 )
 179             {
 180                 return buf;
 181             }
 182         }
 183     }
 184
 185     wxCharBuffer buf((char *)NULL);
 186
 187     return buf;
 188 }
 189
 190 const wxWCharBuffer wxMBConv::cMB2WC(const char *szString, size_t nStringLen, size_t* pOutSize) const
 191 {
 192     wxASSERT(pOutSize != NULL);
 193
 194     const char* szEnd = szString + nStringLen + 1;
 195     const char* szPos = szString;
 196     const char* szStart = szPos;
 197
 198     size_t nActualLength = 0;
 199     size_t nCurrentSize = nStringLen; //try normal size first (should never resize?)
 200
 201     wxWCharBuffer theBuffer(nCurrentSize);
 202
 203     //Convert the string until the length() is reached, continuing the
 204     //loop every time a null character is reached
 205     while(szPos != szEnd)
 206     {
 207         wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
 208
 209         //Get the length of the current (sub)string
 210         size_t nLen = MB2WC(NULL, szPos, 0);
 211
 212         //Invalid conversion?
 213         if( nLen == (size_t)-1 )
 214         {
 215             *pOutSize = 0;
 216             theBuffer.data()[0u] = wxT('\0');
 217             return theBuffer;
 218         }
 219
 220
 221         //Increase the actual length (+1 for current null character)
 222         nActualLength += nLen + 1;
 223
 224         //if buffer too big, realloc the buffer
 225         if (nActualLength > (nCurrentSize+1))
 226         {
 227             wxWCharBuffer theNewBuffer(nCurrentSize << 1);
 228             memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize * sizeof(wchar_t));
 229             theBuffer = theNewBuffer;
 230             nCurrentSize <<= 1;
 231         }
 232
 233         //Convert the current (sub)string
 234         if ( MB2WC(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
 235         {
 236             *pOutSize = 0;
 237             theBuffer.data()[0u] = wxT('\0');
 238             return theBuffer;
 239         }
 240
 241         //Increment to next (sub)string
 242         //Note that we have to use strlen instead of nLen here
 243         //because XX2XX gives us the size of the output buffer,
 244         //which is not necessarily the length of the string
 245         szPos += strlen(szPos) + 1;
 246     }
 247
 248     //success - return actual length and the buffer
 249     *pOutSize = nActualLength;
 250     return theBuffer;
 251 }
 252
 253 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *szString, size_t nStringLen, size_t* pOutSize) const
 254 {
 255     wxASSERT(pOutSize != NULL);
 256
 257     const wchar_t* szEnd = szString + nStringLen + 1;
 258     const wchar_t* szPos = szString;
 259     const wchar_t* szStart = szPos;
 260
 261     size_t nActualLength = 0;
 262     size_t nCurrentSize = nStringLen << 2; //try * 4 first
 263
 264     wxCharBuffer theBuffer(nCurrentSize);
 265
 266     //Convert the string until the length() is reached, continuing the
 267     //loop every time a null character is reached
 268     while(szPos != szEnd)
 269     {
 270         wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
 271
 272         //Get the length of the current (sub)string
 273         size_t nLen = WC2MB(NULL, szPos, 0);
 274
 275         //Invalid conversion?
 276         if( nLen == (size_t)-1 )
 277         {
 278             *pOutSize = 0;
 279             theBuffer.data()[0u] = wxT('\0');
 280             return theBuffer;
 281         }
 282
 283         //Increase the actual length (+1 for current null character)
 284         nActualLength += nLen + 1;
 285
 286         //if buffer too big, realloc the buffer
 287         if (nActualLength > (nCurrentSize+1))
 288         {
 289             wxCharBuffer theNewBuffer(nCurrentSize << 1);
 290             memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize);
 291             theBuffer = theNewBuffer;
 292             nCurrentSize <<= 1;
 293         }
 294
 295         //Convert the current (sub)string
 296         if(WC2MB(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
 297         {
 298             *pOutSize = 0;
 299             theBuffer.data()[0u] = wxT('\0');
 300             return theBuffer;
 301         }
 302
 303         //Increment to next (sub)string
 304         //Note that we have to use wxWcslen instead of nLen here
 305         //because XX2XX gives us the size of the output buffer,
 306         //which is not necessarily the length of the string
 307         szPos += wxWcslen(szPos) + 1;
 308     }
 309
 310     //success - return actual length and the buffer
 311     *pOutSize = nActualLength;
 312     return theBuffer;
 313 }
 314
 315 // ----------------------------------------------------------------------------
 316 // wxMBConvLibc
 317 // ----------------------------------------------------------------------------
 318
 319 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 320 {
 321     return wxMB2WC(buf, psz, n);
 322 }
 323
 324 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 325 {
 326     return wxWC2MB(buf, psz, n);
 327 }
 328
 329 #ifdef __UNIX__
 330
 331 // ----------------------------------------------------------------------------
 332 // wxConvBrokenFileNames
 333 // ----------------------------------------------------------------------------
 334
 335 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar *charset)
 336 {
 337     if ( !charset || wxStricmp(charset, _T("UTF-8")) == 0
 338                   || wxStricmp(charset, _T("UTF8")) == 0  )
 339         m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
 340     else
 341         m_conv = new wxCSConv(charset);
 342 }
 343
 344 size_t
 345 wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf,
 346                              const char *psz,
 347                              size_t outputSize) const
 348 {
 349     return m_conv->MB2WC( outputBuf, psz, outputSize );
 350 }
 351
 352 size_t
 353 wxConvBrokenFileNames::WC2MB(char *outputBuf,
 354                              const wchar_t *psz,
 355                              size_t outputSize) const
 356 {
 357     return m_conv->WC2MB( outputBuf, psz, outputSize );
 358 }
 359
 360 #endif
 361
 362 // ----------------------------------------------------------------------------
 363 // UTF-7
 364 // ----------------------------------------------------------------------------
 365
 366 // Implementation (C) 2004 Fredrik Roubert
 367
 368 //
 369 // BASE64 decoding table
 370 //
 371 static const unsigned char utf7unb64[] =
 372 {
 373     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 374     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 375     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 376     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 377     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 378     0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
 379     0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
 380     0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 381     0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
 382     0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
 383     0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
 384     0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
 385     0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
 386     0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
 387     0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
 388     0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
 389     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 390     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 391     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 392     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 393     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 394     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 395     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 396     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 397     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 398     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 399     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 400     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 401     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 402     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 403     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 404     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 405 };
 406
 407 size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 408 {
 409     size_t len = 0;
 410
 411     while ( *psz && (!buf || (len < n)) )
 412     {
 413         unsigned char cc = *psz++;
 414         if (cc != '+')
 415         {
 416             // plain ASCII char
 417             if (buf)
 418                 *buf++ = cc;
 419             len++;
 420         }
 421         else if (*psz == '-')
 422         {
 423             // encoded plus sign
 424             if (buf)
 425                 *buf++ = cc;
 426             len++;
 427             psz++;
 428         }
 429         else // start of BASE64 encoded string
 430         {
 431             bool lsb, ok;
 432             unsigned int d, l;
 433             for ( ok = lsb = false, d = 0, l = 0;
 434                   (cc = utf7unb64[(unsigned char)*psz]) != 0xff;
 435                   psz++ )
 436             {
 437                 d <<= 6;
 438                 d += cc;
 439                 for (l += 6; l >= 8; lsb = !lsb)
 440                 {
 441                     unsigned char c = (unsigned char)((d >> (l -= 8)) % 256);
 442                     if (lsb)
 443                     {
 444                         if (buf)
 445                             *buf++ |= c;
 446                         len ++;
 447                     }
 448                     else
 449                     {
 450                         if (buf)
 451                             *buf = (wchar_t)(c << 8);
 452                     }
 453
 454                     ok = true;
 455                 }
 456             }
 457
 458             if ( !ok )
 459             {
 460                 // in valid UTF7 we should have valid characters after '+'
 461                 return (size_t)-1;
 462             }
 463
 464             if (*psz == '-')
 465                 psz++;
 466         }
 467     }
 468
 469     if ( buf && (len < n) )
 470         *buf = '\0';
 471
 472     return len;
 473 }
 474
 475 //
 476 // BASE64 encoding table
 477 //
 478 static const unsigned char utf7enb64[] =
 479 {
 480     'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
 481     'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
 482     'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
 483     'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
 484     'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
 485     'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
 486     'w', 'x', 'y', 'z', '0', '1', '2', '3',
 487     '4', '5', '6', '7', '8', '9', '+', '/'
 488 };
 489
 490 //
 491 // UTF-7 encoding table
 492 //
 493 // 0 - Set D (directly encoded characters)
 494 // 1 - Set O (optional direct characters)
 495 // 2 - whitespace characters (optional)
 496 // 3 - special characters
 497 //
 498 static const unsigned char utf7encode[128] =
 499 {
 500     3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
 501     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 502     2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
 503     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
 504     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 505     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
 506     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 507     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
 508 };
 509
 510 size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 511 {
 512
 513
 514     size_t len = 0;
 515
 516     while (*psz && ((!buf) || (len < n)))
 517     {
 518         wchar_t cc = *psz++;
 519         if (cc < 0x80 && utf7encode[cc] < 1)
 520         {
 521             // plain ASCII char
 522             if (buf)
 523                 *buf++ = (char)cc;
 524             len++;
 525         }
 526 #ifndef WC_UTF16
 527         else if (((wxUint32)cc) > 0xffff)
 528         {
 529             // no surrogate pair generation (yet?)
 530             return (size_t)-1;
 531         }
 532 #endif
 533         else
 534         {
 535             if (buf)
 536                 *buf++ = '+';
 537             len++;
 538             if (cc != '+')
 539             {
 540                 // BASE64 encode string
 541                 unsigned int lsb, d, l;
 542                 for (d = 0, l = 0; /*nothing*/; psz++)
 543                 {
 544                     for (lsb = 0; lsb < 2; lsb ++)
 545                     {
 546                         d <<= 8;
 547                         d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
 548
 549                         for (l += 8; l >= 6; )
 550                         {
 551                             l -= 6;
 552                             if (buf)
 553                                 *buf++ = utf7enb64[(d >> l) % 64];
 554                             len++;
 555                         }
 556                     }
 557                     cc = *psz;
 558                     if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
 559                         break;
 560                 }
 561                 if (l != 0)
 562                 {
 563                     if (buf)
 564                         *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
 565                     len++;
 566                 }
 567             }
 568             if (buf)
 569                 *buf++ = '-';
 570             len++;
 571         }
 572     }
 573     if (buf && (len < n))
 574         *buf = 0;
 575     return len;
 576 }
 577
 578 // ----------------------------------------------------------------------------
 579 // UTF-8
 580 // ----------------------------------------------------------------------------
 581
 582 static wxUint32 utf8_max[]=
 583     { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
 584
 585 // boundaries of the private use area we use to (temporarily) remap invalid
 586 // characters invalid in a UTF-8 encoded string
 587 const wxUint32 wxUnicodePUA = 0x100000;
 588 const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
 589
 590 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 591 {
 592     size_t len = 0;
 593
 594     while (*psz && ((!buf) || (len < n)))
 595     {
 596         const char *opsz = psz;
 597         bool invalid = false;
 598         unsigned char cc = *psz++, fc = cc;
 599         unsigned cnt;
 600         for (cnt = 0; fc & 0x80; cnt++)
 601             fc <<= 1;
 602         if (!cnt)
 603         {
 604             // plain ASCII char
 605             if (buf)
 606                 *buf++ = cc;
 607             len++;
 608
 609             // escape the escape character for octal escapes
 610             if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
 611                     && cc == '\\' && (!buf || len < n))
 612             {
 613                 if (buf)
 614                     *buf++ = cc;
 615                 len++;
 616             }
 617         }
 618         else
 619         {
 620             cnt--;
 621             if (!cnt)
 622             {
 623                 // invalid UTF-8 sequence
 624                 invalid = true;
 625             }
 626             else
 627             {
 628                 unsigned ocnt = cnt - 1;
 629                 wxUint32 res = cc & (0x3f >> cnt);
 630                 while (cnt--)
 631                 {
 632                     cc = *psz;
 633                     if ((cc & 0xC0) != 0x80)
 634                     {
 635                         // invalid UTF-8 sequence
 636                         invalid = true;
 637                         break;
 638                     }
 639                     psz++;
 640                     res = (res << 6) | (cc & 0x3f);
 641                 }
 642                 if (invalid || res <= utf8_max[ocnt])
 643                 {
 644                     // illegal UTF-8 encoding
 645                     invalid = true;
 646                 }
 647                 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
 648                         res >= wxUnicodePUA && res < wxUnicodePUAEnd)
 649                 {
 650                     // if one of our PUA characters turns up externally
 651                     // it must also be treated as an illegal sequence
 652                     // (a bit like you have to escape an escape character)
 653                     invalid = true;
 654                 }
 655                 else
 656                 {
 657 #ifdef WC_UTF16
 658                     // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 659                     size_t pa = encode_utf16(res, (wxUint16 *)buf);
 660                     if (pa == (size_t)-1)
 661                     {
 662                         invalid = true;
 663                     }
 664                     else
 665                     {
 666                         if (buf)
 667                             buf += pa;
 668                         len += pa;
 669                     }
 670 #else // !WC_UTF16
 671                     if (buf)
 672                         *buf++ = (wchar_t)res;
 673                     len++;
 674 #endif // WC_UTF16/!WC_UTF16
 675                 }
 676             }
 677             if (invalid)
 678             {
 679                 if (m_options & MAP_INVALID_UTF8_TO_PUA)
 680                 {
 681                     while (opsz < psz && (!buf || len < n))
 682                     {
 683 #ifdef WC_UTF16
 684                         // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 685                         size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
 686                         wxASSERT(pa != (size_t)-1);
 687                         if (buf)
 688                             buf += pa;
 689                         opsz++;
 690                         len += pa;
 691 #else
 692                         if (buf)
 693                             *buf++ = (wchar_t)(wxUnicodePUA + (unsigned char)*opsz);
 694                         opsz++;
 695                         len++;
 696 #endif
 697                     }
 698                 }
 699                 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
 700                 {
 701                     while (opsz < psz && (!buf || len < n))
 702                     {
 703                         if ( buf && len + 3 < n )
 704                         {
 705                             unsigned char on = *opsz;
 706                             *buf++ = L'\\';
 707                             *buf++ = (wchar_t)( L'0' + on / 0100 );
 708                             *buf++ = (wchar_t)( L'0' + (on % 0100) / 010 );
 709                             *buf++ = (wchar_t)( L'0' + on % 010 );
 710                         }
 711                         opsz++;
 712                         len += 4;
 713                     }
 714                 }
 715                 else // MAP_INVALID_UTF8_NOT
 716                 {
 717                     return (size_t)-1;
 718                 }
 719             }
 720         }
 721     }
 722     if (buf && (len < n))
 723         *buf = 0;
 724     return len;
 725 }
 726
 727 static inline bool isoctal(wchar_t wch)
 728 {
 729     return L'0' <= wch && wch <= L'7';
 730 }
 731
 732 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 733 {
 734     size_t len = 0;
 735
 736     while (*psz && ((!buf) || (len < n)))
 737     {
 738         wxUint32 cc;
 739 #ifdef WC_UTF16
 740         // cast is ok for WC_UTF16
 741         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
 742         psz += (pa == (size_t)-1) ? 1 : pa;
 743 #else
 744         cc=(*psz++) & 0x7fffffff;
 745 #endif
 746
 747         if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
 748                 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
 749         {
 750             if (buf)
 751                 *buf++ = (char)(cc - wxUnicodePUA);
 752             len++;
 753         }
 754         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
 755                     && cc == L'\\' && psz[0] == L'\\' )
 756         {
 757             if (buf)
 758                 *buf++ = (char)cc;
 759             psz++;
 760             len++;
 761         }
 762         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
 763                     cc == L'\\' &&
 764                         isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
 765         {
 766             if (buf)
 767             {
 768                 *buf++ = (char) ((psz[0] - L'0')*0100 +
 769                                  (psz[1] - L'0')*010 +
 770                                  (psz[2] - L'0'));
 771             }
 772
 773             psz += 3;
 774             len++;
 775         }
 776         else
 777         {
 778             unsigned cnt;
 779             for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
 780             if (!cnt)
 781             {
 782                 // plain ASCII char
 783                 if (buf)
 784                     *buf++ = (char) cc;
 785                 len++;
 786             }
 787
 788             else
 789             {
 790                 len += cnt + 1;
 791                 if (buf)
 792                 {
 793                     *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
 794                     while (cnt--)
 795                         *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
 796                 }
 797             }
 798         }
 799     }
 800
 801     if (buf && (len<n))
 802         *buf = 0;
 803
 804     return len;
 805 }
 806
 807 // ----------------------------------------------------------------------------
 808 // UTF-16
 809 // ----------------------------------------------------------------------------
 810
 811 #ifdef WORDS_BIGENDIAN
 812     #define wxMBConvUTF16straight wxMBConvUTF16BE
 813     #define wxMBConvUTF16swap     wxMBConvUTF16LE
 814 #else
 815     #define wxMBConvUTF16swap     wxMBConvUTF16BE
 816     #define wxMBConvUTF16straight wxMBConvUTF16LE
 817 #endif
 818
 819
 820 #ifdef WC_UTF16
 821
 822 // copy 16bit MB to 16bit String
 823 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 824 {
 825     size_t len=0;
 826
 827     while (*(wxUint16*)psz && (!buf || len < n))
 828     {
 829         if (buf)
 830             *buf++ = *(wxUint16*)psz;
 831         len++;
 832
 833         psz += sizeof(wxUint16);
 834     }
 835     if (buf && len<n)   *buf=0;
 836
 837     return len;
 838 }
 839
 840
 841 // copy 16bit String to 16bit MB
 842 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 843 {
 844     size_t len=0;
 845
 846     while (*psz && (!buf || len < n))
 847     {
 848         if (buf)
 849         {
 850             *(wxUint16*)buf = *psz;
 851             buf += sizeof(wxUint16);
 852         }
 853         len += sizeof(wxUint16);
 854         psz++;
 855     }
 856     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 857
 858     return len;
 859 }
 860
 861
 862 // swap 16bit MB to 16bit String
 863 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 864 {
 865     size_t len=0;
 866
 867     while (*(wxUint16*)psz && (!buf || len < n))
 868     {
 869         if (buf)
 870         {
 871             ((char *)buf)[0] = psz[1];
 872             ((char *)buf)[1] = psz[0];
 873             buf++;
 874         }
 875         len++;
 876         psz += sizeof(wxUint16);
 877     }
 878     if (buf && len<n)   *buf=0;
 879
 880     return len;
 881 }
 882
 883
 884 // swap 16bit MB to 16bit String
 885 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 886 {
 887     size_t len=0;
 888
 889     while (*psz && (!buf || len < n))
 890     {
 891         if (buf)
 892         {
 893             *buf++ = ((char*)psz)[1];
 894             *buf++ = ((char*)psz)[0];
 895         }
 896         len += sizeof(wxUint16);
 897         psz++;
 898     }
 899     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 900
 901     return len;
 902 }
 903
 904
 905 #else // WC_UTF16
 906
 907
 908 // copy 16bit MB to 32bit String
 909 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 910 {
 911     size_t len=0;
 912
 913     while (*(wxUint16*)psz && (!buf || len < n))
 914     {
 915         wxUint32 cc;
 916         size_t pa=decode_utf16((wxUint16*)psz, cc);
 917         if (pa == (size_t)-1)
 918             return pa;
 919
 920         if (buf)
 921             *buf++ = (wchar_t)cc;
 922         len++;
 923         psz += pa * sizeof(wxUint16);
 924     }
 925     if (buf && len<n)   *buf=0;
 926
 927     return len;
 928 }
 929
 930
 931 // copy 32bit String to 16bit MB
 932 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 933 {
 934     size_t len=0;
 935
 936     while (*psz && (!buf || len < n))
 937     {
 938         wxUint16 cc[2];
 939         size_t pa=encode_utf16(*psz, cc);
 940
 941         if (pa == (size_t)-1)
 942             return pa;
 943
 944         if (buf)
 945         {
 946             *(wxUint16*)buf = cc[0];
 947             buf += sizeof(wxUint16);
 948             if (pa > 1)
 949             {
 950                 *(wxUint16*)buf = cc[1];
 951                 buf += sizeof(wxUint16);
 952             }
 953         }
 954
 955         len += pa*sizeof(wxUint16);
 956         psz++;
 957     }
 958     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 959
 960     return len;
 961 }
 962
 963
 964 // swap 16bit MB to 32bit String
 965 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 966 {
 967     size_t len=0;
 968
 969     while (*(wxUint16*)psz && (!buf || len < n))
 970     {
 971         wxUint32 cc;
 972         char tmp[4];
 973         tmp[0]=psz[1];  tmp[1]=psz[0];
 974         tmp[2]=psz[3];  tmp[3]=psz[2];
 975
 976         size_t pa=decode_utf16((wxUint16*)tmp, cc);
 977         if (pa == (size_t)-1)
 978             return pa;
 979
 980         if (buf)
 981             *buf++ = (wchar_t)cc;
 982
 983         len++;
 984         psz += pa * sizeof(wxUint16);
 985     }
 986     if (buf && len<n)   *buf=0;
 987
 988     return len;
 989 }
 990
 991
 992 // swap 32bit String to 16bit MB
 993 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 994 {
 995     size_t len=0;
 996
 997     while (*psz && (!buf || len < n))
 998     {
 999         wxUint16 cc[2];
1000         size_t pa=encode_utf16(*psz, cc);
1001
1002         if (pa == (size_t)-1)
1003             return pa;
1004
1005         if (buf)
1006         {
1007             *buf++ = ((char*)cc)[1];
1008             *buf++ = ((char*)cc)[0];
1009             if (pa > 1)
1010             {
1011                 *buf++ = ((char*)cc)[3];
1012                 *buf++ = ((char*)cc)[2];
1013             }
1014         }
1015
1016         len += pa*sizeof(wxUint16);
1017         psz++;
1018     }
1019     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
1020
1021     return len;
1022 }
1023
1024 #endif // WC_UTF16
1025
1026
1027 // ----------------------------------------------------------------------------
1028 // UTF-32
1029 // ----------------------------------------------------------------------------
1030
1031 #ifdef WORDS_BIGENDIAN
1032 #define wxMBConvUTF32straight  wxMBConvUTF32BE
1033 #define wxMBConvUTF32swap      wxMBConvUTF32LE
1034 #else
1035 #define wxMBConvUTF32swap      wxMBConvUTF32BE
1036 #define wxMBConvUTF32straight  wxMBConvUTF32LE
1037 #endif
1038
1039
1040 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1041 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1042
1043
1044 #ifdef WC_UTF16
1045
1046 // copy 32bit MB to 16bit String
1047 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1048 {
1049     size_t len=0;
1050
1051     while (*(wxUint32*)psz && (!buf || len < n))
1052     {
1053         wxUint16 cc[2];
1054
1055         size_t pa=encode_utf16(*(wxUint32*)psz, cc);
1056         if (pa == (size_t)-1)
1057             return pa;
1058
1059         if (buf)
1060         {
1061             *buf++ = cc[0];
1062             if (pa > 1)
1063                 *buf++ = cc[1];
1064         }
1065         len += pa;
1066         psz += sizeof(wxUint32);
1067     }
1068     if (buf && len<n)   *buf=0;
1069
1070     return len;
1071 }
1072
1073
1074 // copy 16bit String to 32bit MB
1075 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1076 {
1077     size_t len=0;
1078
1079     while (*psz && (!buf || len < n))
1080     {
1081         wxUint32 cc;
1082
1083         // cast is ok for WC_UTF16
1084         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
1085         if (pa == (size_t)-1)
1086             return pa;
1087
1088         if (buf)
1089         {
1090             *(wxUint32*)buf = cc;
1091             buf += sizeof(wxUint32);
1092         }
1093         len += sizeof(wxUint32);
1094         psz += pa;
1095     }
1096
1097     if (buf && len<=n-sizeof(wxUint32))
1098         *(wxUint32*)buf=0;
1099
1100     return len;
1101 }
1102
1103
1104
1105 // swap 32bit MB to 16bit String
1106 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1107 {
1108     size_t len=0;
1109
1110     while (*(wxUint32*)psz && (!buf || len < n))
1111     {
1112         char tmp[4];
1113         tmp[0] = psz[3];   tmp[1] = psz[2];
1114         tmp[2] = psz[1];   tmp[3] = psz[0];
1115
1116
1117         wxUint16 cc[2];
1118
1119         size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
1120         if (pa == (size_t)-1)
1121             return pa;
1122
1123         if (buf)
1124         {
1125             *buf++ = cc[0];
1126             if (pa > 1)
1127                 *buf++ = cc[1];
1128         }
1129         len += pa;
1130         psz += sizeof(wxUint32);
1131     }
1132
1133     if (buf && len<n)
1134         *buf=0;
1135
1136     return len;
1137 }
1138
1139
1140 // swap 16bit String to 32bit MB
1141 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1142 {
1143     size_t len=0;
1144
1145     while (*psz && (!buf || len < n))
1146     {
1147         char cc[4];
1148
1149         // cast is ok for WC_UTF16
1150         size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
1151         if (pa == (size_t)-1)
1152             return pa;
1153
1154         if (buf)
1155         {
1156             *buf++ = cc[3];
1157             *buf++ = cc[2];
1158             *buf++ = cc[1];
1159             *buf++ = cc[0];
1160         }
1161         len += sizeof(wxUint32);
1162         psz += pa;
1163     }
1164
1165     if (buf && len<=n-sizeof(wxUint32))
1166         *(wxUint32*)buf=0;
1167
1168     return len;
1169 }
1170
1171 #else // WC_UTF16
1172
1173
1174 // copy 32bit MB to 32bit String
1175 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1176 {
1177     size_t len=0;
1178
1179     while (*(wxUint32*)psz && (!buf || len < n))
1180     {
1181         if (buf)
1182             *buf++ = (wchar_t)(*(wxUint32*)psz);
1183         len++;
1184         psz += sizeof(wxUint32);
1185     }
1186
1187     if (buf && len<n)
1188         *buf=0;
1189
1190     return len;
1191 }
1192
1193
1194 // copy 32bit String to 32bit MB
1195 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1196 {
1197     size_t len=0;
1198
1199     while (*psz && (!buf || len < n))
1200     {
1201         if (buf)
1202         {
1203             *(wxUint32*)buf = *psz;
1204             buf += sizeof(wxUint32);
1205         }
1206
1207         len += sizeof(wxUint32);
1208         psz++;
1209     }
1210
1211     if (buf && len<=n-sizeof(wxUint32))
1212         *(wxUint32*)buf=0;
1213
1214     return len;
1215 }
1216
1217
1218 // swap 32bit MB to 32bit String
1219 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1220 {
1221     size_t len=0;
1222
1223     while (*(wxUint32*)psz && (!buf || len < n))
1224     {
1225         if (buf)
1226         {
1227             ((char *)buf)[0] = psz[3];
1228             ((char *)buf)[1] = psz[2];
1229             ((char *)buf)[2] = psz[1];
1230             ((char *)buf)[3] = psz[0];
1231             buf++;
1232         }
1233         len++;
1234         psz += sizeof(wxUint32);
1235     }
1236
1237     if (buf && len<n)
1238         *buf=0;
1239
1240     return len;
1241 }
1242
1243
1244 // swap 32bit String to 32bit MB
1245 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1246 {
1247     size_t len=0;
1248
1249     while (*psz && (!buf || len < n))
1250     {
1251         if (buf)
1252         {
1253             *buf++ = ((char *)psz)[3];
1254             *buf++ = ((char *)psz)[2];
1255             *buf++ = ((char *)psz)[1];
1256             *buf++ = ((char *)psz)[0];
1257         }
1258         len += sizeof(wxUint32);
1259         psz++;
1260     }
1261
1262     if (buf && len<=n-sizeof(wxUint32))
1263         *(wxUint32*)buf=0;
1264
1265     return len;
1266 }
1267
1268
1269 #endif // WC_UTF16
1270
1271
1272 // ============================================================================
1273 // The classes doing conversion using the iconv_xxx() functions
1274 // ============================================================================
1275
1276 #ifdef HAVE_ICONV
1277
1278 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1279 //     E2BIG if output buffer is _exactly_ as big as needed. Such case is
1280 //     (unless there's yet another bug in glibc) the only case when iconv()
1281 //     returns with (size_t)-1 (which means error) and says there are 0 bytes
1282 //     left in the input buffer -- when _real_ error occurs,
1283 //     bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1284 //     iconv() failure.
1285 //     [This bug does not appear in glibc 2.2.]
1286 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1287 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1288                                      (errno != E2BIG || bufLeft != 0))
1289 #else
1290 #define ICONV_FAILED(cres, bufLeft)  (cres == (size_t)-1)
1291 #endif
1292
1293 #define ICONV_CHAR_CAST(x)  ((ICONV_CONST char **)(x))
1294
1295 #define ICONV_T_INVALID ((iconv_t)-1)
1296
1297 #if SIZEOF_WCHAR_T == 4
1298     #define WC_BSWAP    wxUINT32_SWAP_ALWAYS
1299     #define WC_ENC      wxFONTENCODING_UTF32
1300 #elif SIZEOF_WCHAR_T == 2
1301     #define WC_BSWAP    wxUINT16_SWAP_ALWAYS
1302     #define WC_ENC      wxFONTENCODING_UTF16
1303 #else // sizeof(wchar_t) != 2 nor 4
1304     // does this ever happen?
1305     #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1306 #endif
1307
1308 // ----------------------------------------------------------------------------
1309 // wxMBConv_iconv: encapsulates an iconv character set
1310 // ----------------------------------------------------------------------------
1311
1312 class wxMBConv_iconv : public wxMBConv
1313 {
1314 public:
1315     wxMBConv_iconv(const wxChar *name);
1316     virtual ~wxMBConv_iconv();
1317
1318     virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1319     virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
1320
1321     bool IsOk() const
1322         { return (m2w != ICONV_T_INVALID) && (w2m != ICONV_T_INVALID); }
1323
1324 protected:
1325     // the iconv handlers used to translate from multibyte to wide char and in
1326     // the other direction
1327     iconv_t m2w,
1328             w2m;
1329 #if wxUSE_THREADS
1330     // guards access to m2w and w2m objects
1331     wxMutex m_iconvMutex;
1332 #endif
1333
1334 private:
1335     // the name (for iconv_open()) of a wide char charset -- if none is
1336     // available on this machine, it will remain NULL
1337     static wxString ms_wcCharsetName;
1338
1339     // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1340     // different endian-ness than the native one
1341     static bool ms_wcNeedsSwap;
1342 };
1343
1344 // make the constructor available for unit testing
1345 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const wxChar* name )
1346 {
1347     wxMBConv_iconv* result = new wxMBConv_iconv( name );
1348     if ( !result->IsOk() )
1349     {
1350         delete result;
1351         return 0;
1352     }
1353     return result;
1354 }
1355
1356 wxString wxMBConv_iconv::ms_wcCharsetName;
1357 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
1358
1359 wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
1360 {
1361     // iconv operates with chars, not wxChars, but luckily it uses only ASCII
1362     // names for the charsets
1363     const wxCharBuffer cname(wxString(name).ToAscii());
1364
1365     // check for charset that represents wchar_t:
1366     if ( ms_wcCharsetName.empty() )
1367     {
1368         wxLogTrace(TRACE_STRCONV, _T("Looking for wide char codeset:"));
1369
1370 #if wxUSE_FONTMAP
1371         const wxChar **names = wxFontMapperBase::GetAllEncodingNames(WC_ENC);
1372 #else // !wxUSE_FONTMAP
1373         static const wxChar *names[] =
1374         {
1375 #if SIZEOF_WCHAR_T == 4
1376             _T("UCS-4"),
1377 #elif SIZEOF_WCHAR_T = 2
1378             _T("UCS-2"),
1379 #endif
1380             NULL
1381         };
1382 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1383
1384         for ( ; *names && ms_wcCharsetName.empty(); ++names )
1385         {
1386             const wxString nameCS(*names);
1387
1388             // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
1389             wxString nameXE(nameCS);
1390             #ifdef WORDS_BIGENDIAN
1391                 nameXE += _T("BE");
1392             #else // little endian
1393                 nameXE += _T("LE");
1394             #endif
1395
1396             wxLogTrace(TRACE_STRCONV, _T("  trying charset \"%s\""),
1397                        nameXE.c_str());
1398
1399             m2w = iconv_open(nameXE.ToAscii(), cname);
1400             if ( m2w == ICONV_T_INVALID )
1401             {
1402                 // try charset w/o bytesex info (e.g. "UCS4")
1403                 wxLogTrace(TRACE_STRCONV, _T("  trying charset \"%s\""),
1404                            nameCS.c_str());
1405                 m2w = iconv_open(nameCS.ToAscii(), cname);
1406
1407                 // and check for bytesex ourselves:
1408                 if ( m2w != ICONV_T_INVALID )
1409                 {
1410                     char    buf[2], *bufPtr;
1411                     wchar_t wbuf[2], *wbufPtr;
1412                     size_t  insz, outsz;
1413                     size_t  res;
1414
1415                     buf[0] = 'A';
1416                     buf[1] = 0;
1417                     wbuf[0] = 0;
1418                     insz = 2;
1419                     outsz = SIZEOF_WCHAR_T * 2;
1420                     wbufPtr = wbuf;
1421                     bufPtr = buf;
1422
1423                     res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1424                                 (char**)&wbufPtr, &outsz);
1425
1426                     if (ICONV_FAILED(res, insz))
1427                     {
1428                         wxLogLastError(wxT("iconv"));
1429                         wxLogError(_("Conversion to charset '%s' doesn't work."),
1430                                    nameCS.c_str());
1431                     }
1432                     else // ok, can convert to this encoding, remember it
1433                     {
1434                         ms_wcCharsetName = nameCS;
1435                         ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1436                     }
1437                 }
1438             }
1439             else // use charset not requiring byte swapping
1440             {
1441                 ms_wcCharsetName = nameXE;
1442             }
1443         }
1444
1445         wxLogTrace(TRACE_STRCONV,
1446                    wxT("iconv wchar_t charset is \"%s\"%s"),
1447                    ms_wcCharsetName.empty() ? _T("<none>")
1448                                             : ms_wcCharsetName.c_str(),
1449                    ms_wcNeedsSwap ? _T(" (needs swap)")
1450                                   : _T(""));
1451     }
1452     else // we already have ms_wcCharsetName
1453     {
1454         m2w = iconv_open(ms_wcCharsetName.ToAscii(), cname);
1455     }
1456
1457     if ( ms_wcCharsetName.empty() )
1458     {
1459         w2m = ICONV_T_INVALID;
1460     }
1461     else
1462     {
1463         w2m = iconv_open(cname, ms_wcCharsetName.ToAscii());
1464         if ( w2m == ICONV_T_INVALID )
1465         {
1466             wxLogTrace(TRACE_STRCONV,
1467                        wxT("\"%s\" -> \"%s\" works but not the converse!?"),
1468                        ms_wcCharsetName.c_str(), cname.data());
1469         }
1470     }
1471 }
1472
1473 wxMBConv_iconv::~wxMBConv_iconv()
1474 {
1475     if ( m2w != ICONV_T_INVALID )
1476         iconv_close(m2w);
1477     if ( w2m != ICONV_T_INVALID )
1478         iconv_close(w2m);
1479 }
1480
1481 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1482 {
1483 #if wxUSE_THREADS
1484     // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1485     //     Unfortunately there is a couple of global wxCSConv objects such as
1486     //     wxConvLocal that are used all over wx code, so we have to make sure
1487     //     the handle is used by at most one thread at the time. Otherwise
1488     //     only a few wx classes would be safe to use from non-main threads
1489     //     as MB<->WC conversion would fail "randomly".
1490     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1491 #endif
1492
1493     size_t inbuf = strlen(psz);
1494     size_t outbuf = n * SIZEOF_WCHAR_T;
1495     size_t res, cres;
1496     // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1497     wchar_t *bufPtr = buf;
1498     const char *pszPtr = psz;
1499
1500     if (buf)
1501     {
1502         // have destination buffer, convert there
1503         cres = iconv(m2w,
1504                      ICONV_CHAR_CAST(&pszPtr), &inbuf,
1505                      (char**)&bufPtr, &outbuf);
1506         res = n - (outbuf / SIZEOF_WCHAR_T);
1507
1508         if (ms_wcNeedsSwap)
1509         {
1510             // convert to native endianness
1511             for ( unsigned i = 0; i < res; i++ )
1512                 buf[n] = WC_BSWAP(buf[i]);
1513         }
1514
1515         // NB: iconv was given only strlen(psz) characters on input, and so
1516         //     it couldn't convert the trailing zero. Let's do it ourselves
1517         //     if there's some room left for it in the output buffer.
1518         if (res < n)
1519             buf[res] = 0;
1520     }
1521     else
1522     {
1523         // no destination buffer... convert using temp buffer
1524         // to calculate destination buffer requirement
1525         wchar_t tbuf[8];
1526         res = 0;
1527         do {
1528             bufPtr = tbuf;
1529             outbuf = 8*SIZEOF_WCHAR_T;
1530
1531             cres = iconv(m2w,
1532                          ICONV_CHAR_CAST(&pszPtr), &inbuf,
1533                          (char**)&bufPtr, &outbuf );
1534
1535             res += 8-(outbuf/SIZEOF_WCHAR_T);
1536         } while ((cres==(size_t)-1) && (errno==E2BIG));
1537     }
1538
1539     if (ICONV_FAILED(cres, inbuf))
1540     {
1541         //VS: it is ok if iconv fails, hence trace only
1542         wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1543         return (size_t)-1;
1544     }
1545
1546     return res;
1547 }
1548
1549 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1550 {
1551 #if wxUSE_THREADS
1552     // NB: explained in MB2WC
1553     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1554 #endif
1555
1556     size_t inlen = wxWcslen(psz);
1557     size_t inbuf = inlen * SIZEOF_WCHAR_T;
1558     size_t outbuf = n;
1559     size_t res, cres;
1560
1561     wchar_t *tmpbuf = 0;
1562
1563     if (ms_wcNeedsSwap)
1564     {
1565         // need to copy to temp buffer to switch endianness
1566         // (doing WC_BSWAP twice on the original buffer won't help, as it
1567         //  could be in read-only memory, or be accessed in some other thread)
1568         tmpbuf = (wchar_t *)malloc(inbuf + SIZEOF_WCHAR_T);
1569         for ( size_t i = 0; i < inlen; i++ )
1570             tmpbuf[n] = WC_BSWAP(psz[i]);
1571         tmpbuf[inlen] = L'\0';
1572         psz = tmpbuf;
1573     }
1574
1575     if (buf)
1576     {
1577         // have destination buffer, convert there
1578         cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1579
1580         res = n-outbuf;
1581
1582         // NB: iconv was given only wcslen(psz) characters on input, and so
1583         //     it couldn't convert the trailing zero. Let's do it ourselves
1584         //     if there's some room left for it in the output buffer.
1585         if (res < n)
1586             buf[0] = 0;
1587     }
1588     else
1589     {
1590         // no destination buffer... convert using temp buffer
1591         // to calculate destination buffer requirement
1592         char tbuf[16];
1593         res = 0;
1594         do {
1595             buf = tbuf; outbuf = 16;
1596
1597             cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1598
1599             res += 16 - outbuf;
1600         } while ((cres==(size_t)-1) && (errno==E2BIG));
1601     }
1602
1603     if (ms_wcNeedsSwap)
1604     {
1605         free(tmpbuf);
1606     }
1607
1608     if (ICONV_FAILED(cres, inbuf))
1609     {
1610         wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1611         return (size_t)-1;
1612     }
1613
1614     return res;
1615 }
1616
1617 #endif // HAVE_ICONV
1618
1619
1620 // ============================================================================
1621 // Win32 conversion classes
1622 // ============================================================================
1623
1624 #ifdef wxHAVE_WIN32_MB2WC
1625
1626 // from utils.cpp
1627 #if wxUSE_FONTMAP
1628 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1629 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1630 #endif
1631
1632 class wxMBConv_win32 : public wxMBConv
1633 {
1634 public:
1635     wxMBConv_win32()
1636     {
1637         m_CodePage = CP_ACP;
1638     }
1639
1640 #if wxUSE_FONTMAP
1641     wxMBConv_win32(const wxChar* name)
1642     {
1643         m_CodePage = wxCharsetToCodepage(name);
1644     }
1645
1646     wxMBConv_win32(wxFontEncoding encoding)
1647     {
1648         m_CodePage = wxEncodingToCodepage(encoding);
1649     }
1650 #endif
1651
1652     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1653     {
1654         // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1655         // the behaviour is not compatible with the Unix version (using iconv)
1656         // and break the library itself, e.g. wxTextInputStream::NextChar()
1657         // wouldn't work if reading an incomplete MB char didn't result in an
1658         // error
1659         //
1660         // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1661         // an error (tested under Windows Server 2003) and apparently it is
1662         // done on purpose, i.e. the function accepts any input in this case
1663         // and although I'd prefer to return error on ill-formed output, our
1664         // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1665         // explicitly ill-formed according to RFC 2152) neither so we don't
1666         // even have any fallback here...
1667         //
1668         // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or
1669         // Win XP or newer and if it is specified on older versions, conversion
1670         // from CP_UTF8 (which can have flags only 0 or MB_ERR_INVALID_CHARS)
1671         // fails. So we can only use the flag on newer Windows versions.
1672         // Additionally, the flag is not supported by UTF7, symbol and CJK
1673         // encodings. See here:
1674         //     http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx
1675         //     http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp
1676         int flags = 0;
1677         if ( m_CodePage != CP_UTF7 && m_CodePage != CP_SYMBOL &&
1678              m_CodePage < 50000 &&
1679              IsAtLeastWin2kSP4() )
1680         {
1681             flags = MB_ERR_INVALID_CHARS;
1682         }
1683         else if ( m_CodePage == CP_UTF8 )
1684         {
1685             // Avoid round-trip in the special case of UTF-8 by using our
1686             // own UTF-8 conversion code:
1687             return wxMBConvUTF8().MB2WC(buf, psz, n);
1688         }
1689
1690         const size_t len = ::MultiByteToWideChar
1691                              (
1692                                 m_CodePage,     // code page
1693                                 flags,          // flags: fall on error
1694                                 psz,            // input string
1695                                 -1,             // its length (NUL-terminated)
1696                                 buf,            // output string
1697                                 buf ? n : 0     // size of output buffer
1698                              );
1699         if ( !len )
1700         {
1701             // function totally failed
1702             return (size_t)-1;
1703         }
1704
1705         // if we were really converting and didn't use MB_ERR_INVALID_CHARS,
1706         // check if we succeeded, by doing a double trip:
1707         if ( !flags && buf )
1708         {
1709             const size_t mbLen = strlen(psz);
1710             wxCharBuffer mbBuf(mbLen);
1711             if ( ::WideCharToMultiByte
1712                    (
1713                       m_CodePage,
1714                       0,
1715                       buf,
1716                       -1,
1717                       mbBuf.data(),
1718                       mbLen + 1,        // size in bytes, not length
1719                       NULL,
1720                       NULL
1721                    ) == 0 ||
1722                   strcmp(mbBuf, psz) != 0 )
1723             {
1724                 // we didn't obtain the same thing we started from, hence
1725                 // the conversion was lossy and we consider that it failed
1726                 return (size_t)-1;
1727             }
1728         }
1729
1730         // note that it returns count of written chars for buf != NULL and size
1731         // of the needed buffer for buf == NULL so in either case the length of
1732         // the string (which never includes the terminating NUL) is one less
1733         return len - 1;
1734     }
1735
1736     size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
1737     {
1738         /*
1739             we have a problem here: by default, WideCharToMultiByte() may
1740             replace characters unrepresentable in the target code page with bad
1741             quality approximations such as turning "1/2" symbol (U+00BD) into
1742             "1" for the code pages which don't have it and we, obviously, want
1743             to avoid this at any price
1744
1745             the trouble is that this function does it _silently_, i.e. it won't
1746             even tell us whether it did or not... Win98/2000 and higher provide
1747             WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1748             we have to resort to a round trip, i.e. check that converting back
1749             results in the same string -- this is, of course, expensive but
1750             otherwise we simply can't be sure to not garble the data.
1751          */
1752
1753         // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1754         // it doesn't work with CJK encodings (which we test for rather roughly
1755         // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1756         // supporting it
1757         BOOL usedDef wxDUMMY_INITIALIZE(false);
1758         BOOL *pUsedDef;
1759         int flags;
1760         if ( CanUseNoBestFit() && m_CodePage < 50000 )
1761         {
1762             // it's our lucky day
1763             flags = WC_NO_BEST_FIT_CHARS;
1764             pUsedDef = &usedDef;
1765         }
1766         else // old system or unsupported encoding
1767         {
1768             flags = 0;
1769             pUsedDef = NULL;
1770         }
1771
1772         const size_t len = ::WideCharToMultiByte
1773                              (
1774                                 m_CodePage,     // code page
1775                                 flags,          // either none or no best fit
1776                                 pwz,            // input string
1777                                 -1,             // it is (wide) NUL-terminated
1778                                 buf,            // output buffer
1779                                 buf ? n : 0,    // and its size
1780                                 NULL,           // default "replacement" char
1781                                 pUsedDef        // [out] was it used?
1782                              );
1783
1784         if ( !len )
1785         {
1786             // function totally failed
1787             return (size_t)-1;
1788         }
1789
1790         // if we were really converting, check if we succeeded
1791         if ( buf )
1792         {
1793             if ( flags )
1794             {
1795                 // check if the conversion failed, i.e. if any replacements
1796                 // were done
1797                 if ( usedDef )
1798                     return (size_t)-1;
1799             }
1800             else // we must resort to double tripping...
1801             {
1802                 wxWCharBuffer wcBuf(n);
1803                 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1804                         wcscmp(wcBuf, pwz) != 0 )
1805                 {
1806                     // we didn't obtain the same thing we started from, hence
1807                     // the conversion was lossy and we consider that it failed
1808                     return (size_t)-1;
1809                 }
1810             }
1811         }
1812
1813         // see the comment above for the reason of "len - 1"
1814         return len - 1;
1815     }
1816
1817     bool IsOk() const { return m_CodePage != -1; }
1818
1819 private:
1820     static bool CanUseNoBestFit()
1821     {
1822         static int s_isWin98Or2k = -1;
1823
1824         if ( s_isWin98Or2k == -1 )
1825         {
1826             int verMaj, verMin;
1827             switch ( wxGetOsVersion(&verMaj, &verMin) )
1828             {
1829                 case wxWIN95:
1830                     s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1831                     break;
1832
1833                 case wxWINDOWS_NT:
1834                     s_isWin98Or2k = verMaj >= 5;
1835                     break;
1836
1837                 default:
1838                     // unknown, be conseravtive by default
1839                     s_isWin98Or2k = 0;
1840             }
1841
1842             wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1843         }
1844
1845         return s_isWin98Or2k == 1;
1846     }
1847
1848     static bool IsAtLeastWin2kSP4()
1849     {
1850 #ifdef __WXWINCE__
1851         return false;
1852 #else
1853         static int s_isAtLeastWin2kSP4 = -1;
1854
1855         if ( s_isAtLeastWin2kSP4 == -1 )
1856         {
1857             OSVERSIONINFOEX ver;
1858
1859             memset(&ver, 0, sizeof(ver));
1860             ver.dwOSVersionInfoSize = sizeof(ver);
1861             GetVersionEx((OSVERSIONINFO*)&ver);
1862
1863             s_isAtLeastWin2kSP4 =
1864               ((ver.dwMajorVersion > 5) || // Vista+
1865                (ver.dwMajorVersion == 5 && ver.dwMinorVersion > 0) || // XP/2003
1866                (ver.dwMajorVersion == 5 && ver.dwMinorVersion == 0 &&
1867                ver.wServicePackMajor >= 4)) // 2000 SP4+
1868               ? 1 : 0;
1869         }
1870
1871         return s_isAtLeastWin2kSP4 == 1;
1872 #endif
1873     }
1874
1875     long m_CodePage;
1876 };
1877
1878 #endif // wxHAVE_WIN32_MB2WC
1879
1880 // ============================================================================
1881 // Cocoa conversion classes
1882 // ============================================================================
1883
1884 #if defined(__WXCOCOA__)
1885
1886 // RN:  There is no UTF-32 support in either Core Foundation or
1887 // Cocoa.  Strangely enough, internally Core Foundation uses
1888 // UTF 32 internally quite a bit - its just not public (yet).
1889
1890 #include <CoreFoundation/CFString.h>
1891 #include <CoreFoundation/CFStringEncodingExt.h>
1892
1893 CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
1894 {
1895     CFStringEncoding enc = kCFStringEncodingInvalidId ;
1896     if ( encoding == wxFONTENCODING_DEFAULT )
1897     {
1898         enc = CFStringGetSystemEncoding();
1899     }
1900     else switch( encoding)
1901     {
1902         case wxFONTENCODING_ISO8859_1 :
1903             enc = kCFStringEncodingISOLatin1 ;
1904             break ;
1905         case wxFONTENCODING_ISO8859_2 :
1906             enc = kCFStringEncodingISOLatin2;
1907             break ;
1908         case wxFONTENCODING_ISO8859_3 :
1909             enc = kCFStringEncodingISOLatin3 ;
1910             break ;
1911         case wxFONTENCODING_ISO8859_4 :
1912             enc = kCFStringEncodingISOLatin4;
1913             break ;
1914         case wxFONTENCODING_ISO8859_5 :
1915             enc = kCFStringEncodingISOLatinCyrillic;
1916             break ;
1917         case wxFONTENCODING_ISO8859_6 :
1918             enc = kCFStringEncodingISOLatinArabic;
1919             break ;
1920         case wxFONTENCODING_ISO8859_7 :
1921             enc = kCFStringEncodingISOLatinGreek;
1922             break ;
1923         case wxFONTENCODING_ISO8859_8 :
1924             enc = kCFStringEncodingISOLatinHebrew;
1925             break ;
1926         case wxFONTENCODING_ISO8859_9 :
1927             enc = kCFStringEncodingISOLatin5;
1928             break ;
1929         case wxFONTENCODING_ISO8859_10 :
1930             enc = kCFStringEncodingISOLatin6;
1931             break ;
1932         case wxFONTENCODING_ISO8859_11 :
1933             enc = kCFStringEncodingISOLatinThai;
1934             break ;
1935         case wxFONTENCODING_ISO8859_13 :
1936             enc = kCFStringEncodingISOLatin7;
1937             break ;
1938         case wxFONTENCODING_ISO8859_14 :
1939             enc = kCFStringEncodingISOLatin8;
1940             break ;
1941         case wxFONTENCODING_ISO8859_15 :
1942             enc = kCFStringEncodingISOLatin9;
1943             break ;
1944
1945         case wxFONTENCODING_KOI8 :
1946             enc = kCFStringEncodingKOI8_R;
1947             break ;
1948         case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
1949             enc = kCFStringEncodingDOSRussian;
1950             break ;
1951
1952 //      case wxFONTENCODING_BULGARIAN :
1953 //          enc = ;
1954 //          break ;
1955
1956         case wxFONTENCODING_CP437 :
1957             enc =kCFStringEncodingDOSLatinUS ;
1958             break ;
1959         case wxFONTENCODING_CP850 :
1960             enc = kCFStringEncodingDOSLatin1;
1961             break ;
1962         case wxFONTENCODING_CP852 :
1963             enc = kCFStringEncodingDOSLatin2;
1964             break ;
1965         case wxFONTENCODING_CP855 :
1966             enc = kCFStringEncodingDOSCyrillic;
1967             break ;
1968         case wxFONTENCODING_CP866 :
1969             enc =kCFStringEncodingDOSRussian ;
1970             break ;
1971         case wxFONTENCODING_CP874 :
1972             enc = kCFStringEncodingDOSThai;
1973             break ;
1974         case wxFONTENCODING_CP932 :
1975             enc = kCFStringEncodingDOSJapanese;
1976             break ;
1977         case wxFONTENCODING_CP936 :
1978             enc =kCFStringEncodingDOSChineseSimplif ;
1979             break ;
1980         case wxFONTENCODING_CP949 :
1981             enc = kCFStringEncodingDOSKorean;
1982             break ;
1983         case wxFONTENCODING_CP950 :
1984             enc = kCFStringEncodingDOSChineseTrad;
1985             break ;
1986         case wxFONTENCODING_CP1250 :
1987             enc = kCFStringEncodingWindowsLatin2;
1988             break ;
1989         case wxFONTENCODING_CP1251 :
1990             enc =kCFStringEncodingWindowsCyrillic ;
1991             break ;
1992         case wxFONTENCODING_CP1252 :
1993             enc =kCFStringEncodingWindowsLatin1 ;
1994             break ;
1995         case wxFONTENCODING_CP1253 :
1996             enc = kCFStringEncodingWindowsGreek;
1997             break ;
1998         case wxFONTENCODING_CP1254 :
1999             enc = kCFStringEncodingWindowsLatin5;
2000             break ;
2001         case wxFONTENCODING_CP1255 :
2002             enc =kCFStringEncodingWindowsHebrew ;
2003             break ;
2004         case wxFONTENCODING_CP1256 :
2005             enc =kCFStringEncodingWindowsArabic ;
2006             break ;
2007         case wxFONTENCODING_CP1257 :
2008             enc = kCFStringEncodingWindowsBalticRim;
2009             break ;
2010 //   This only really encodes to UTF7 (if that) evidently
2011 //        case wxFONTENCODING_UTF7 :
2012 //            enc = kCFStringEncodingNonLossyASCII ;
2013 //            break ;
2014         case wxFONTENCODING_UTF8 :
2015             enc = kCFStringEncodingUTF8 ;
2016             break ;
2017         case wxFONTENCODING_EUC_JP :
2018             enc = kCFStringEncodingEUC_JP;
2019             break ;
2020         case wxFONTENCODING_UTF16 :
2021             enc = kCFStringEncodingUnicode ;
2022             break ;
2023         case wxFONTENCODING_MACROMAN :
2024             enc = kCFStringEncodingMacRoman ;
2025             break ;
2026         case wxFONTENCODING_MACJAPANESE :
2027             enc = kCFStringEncodingMacJapanese ;
2028             break ;
2029         case wxFONTENCODING_MACCHINESETRAD :
2030             enc = kCFStringEncodingMacChineseTrad ;
2031             break ;
2032         case wxFONTENCODING_MACKOREAN :
2033             enc = kCFStringEncodingMacKorean ;
2034             break ;
2035         case wxFONTENCODING_MACARABIC :
2036             enc = kCFStringEncodingMacArabic ;
2037             break ;
2038         case wxFONTENCODING_MACHEBREW :
2039             enc = kCFStringEncodingMacHebrew ;
2040             break ;
2041         case wxFONTENCODING_MACGREEK :
2042             enc = kCFStringEncodingMacGreek ;
2043             break ;
2044         case wxFONTENCODING_MACCYRILLIC :
2045             enc = kCFStringEncodingMacCyrillic ;
2046             break ;
2047         case wxFONTENCODING_MACDEVANAGARI :
2048             enc = kCFStringEncodingMacDevanagari ;
2049             break ;
2050         case wxFONTENCODING_MACGURMUKHI :
2051             enc = kCFStringEncodingMacGurmukhi ;
2052             break ;
2053         case wxFONTENCODING_MACGUJARATI :
2054             enc = kCFStringEncodingMacGujarati ;
2055             break ;
2056         case wxFONTENCODING_MACORIYA :
2057             enc = kCFStringEncodingMacOriya ;
2058             break ;
2059         case wxFONTENCODING_MACBENGALI :
2060             enc = kCFStringEncodingMacBengali ;
2061             break ;
2062         case wxFONTENCODING_MACTAMIL :
2063             enc = kCFStringEncodingMacTamil ;
2064             break ;
2065         case wxFONTENCODING_MACTELUGU :
2066             enc = kCFStringEncodingMacTelugu ;
2067             break ;
2068         case wxFONTENCODING_MACKANNADA :
2069             enc = kCFStringEncodingMacKannada ;
2070             break ;
2071         case wxFONTENCODING_MACMALAJALAM :
2072             enc = kCFStringEncodingMacMalayalam ;
2073             break ;
2074         case wxFONTENCODING_MACSINHALESE :
2075             enc = kCFStringEncodingMacSinhalese ;
2076             break ;
2077         case wxFONTENCODING_MACBURMESE :
2078             enc = kCFStringEncodingMacBurmese ;
2079             break ;
2080         case wxFONTENCODING_MACKHMER :
2081             enc = kCFStringEncodingMacKhmer ;
2082             break ;
2083         case wxFONTENCODING_MACTHAI :
2084             enc = kCFStringEncodingMacThai ;
2085             break ;
2086         case wxFONTENCODING_MACLAOTIAN :
2087             enc = kCFStringEncodingMacLaotian ;
2088             break ;
2089         case wxFONTENCODING_MACGEORGIAN :
2090             enc = kCFStringEncodingMacGeorgian ;
2091             break ;
2092         case wxFONTENCODING_MACARMENIAN :
2093             enc = kCFStringEncodingMacArmenian ;
2094             break ;
2095         case wxFONTENCODING_MACCHINESESIMP :
2096             enc = kCFStringEncodingMacChineseSimp ;
2097             break ;
2098         case wxFONTENCODING_MACTIBETAN :
2099             enc = kCFStringEncodingMacTibetan ;
2100             break ;
2101         case wxFONTENCODING_MACMONGOLIAN :
2102             enc = kCFStringEncodingMacMongolian ;
2103             break ;
2104         case wxFONTENCODING_MACETHIOPIC :
2105             enc = kCFStringEncodingMacEthiopic ;
2106             break ;
2107         case wxFONTENCODING_MACCENTRALEUR :
2108             enc = kCFStringEncodingMacCentralEurRoman ;
2109             break ;
2110         case wxFONTENCODING_MACVIATNAMESE :
2111             enc = kCFStringEncodingMacVietnamese ;
2112             break ;
2113         case wxFONTENCODING_MACARABICEXT :
2114             enc = kCFStringEncodingMacExtArabic ;
2115             break ;
2116         case wxFONTENCODING_MACSYMBOL :
2117             enc = kCFStringEncodingMacSymbol ;
2118             break ;
2119         case wxFONTENCODING_MACDINGBATS :
2120             enc = kCFStringEncodingMacDingbats ;
2121             break ;
2122         case wxFONTENCODING_MACTURKISH :
2123             enc = kCFStringEncodingMacTurkish ;
2124             break ;
2125         case wxFONTENCODING_MACCROATIAN :
2126             enc = kCFStringEncodingMacCroatian ;
2127             break ;
2128         case wxFONTENCODING_MACICELANDIC :
2129             enc = kCFStringEncodingMacIcelandic ;
2130             break ;
2131         case wxFONTENCODING_MACROMANIAN :
2132             enc = kCFStringEncodingMacRomanian ;
2133             break ;
2134         case wxFONTENCODING_MACCELTIC :
2135             enc = kCFStringEncodingMacCeltic ;
2136             break ;
2137         case wxFONTENCODING_MACGAELIC :
2138             enc = kCFStringEncodingMacGaelic ;
2139             break ;
2140 //      case wxFONTENCODING_MACKEYBOARD :
2141 //          enc = kCFStringEncodingMacKeyboardGlyphs ;
2142 //          break ;
2143         default :
2144             // because gcc is picky
2145             break ;
2146     } ;
2147     return enc ;
2148 }
2149
2150 class wxMBConv_cocoa : public wxMBConv
2151 {
2152 public:
2153     wxMBConv_cocoa()
2154     {
2155         Init(CFStringGetSystemEncoding()) ;
2156     }
2157
2158 #if wxUSE_FONTMAP
2159     wxMBConv_cocoa(const wxChar* name)
2160     {
2161         Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2162     }
2163 #endif
2164
2165     wxMBConv_cocoa(wxFontEncoding encoding)
2166     {
2167         Init( wxCFStringEncFromFontEnc(encoding) );
2168     }
2169
2170     ~wxMBConv_cocoa()
2171     {
2172     }
2173
2174     void Init( CFStringEncoding encoding)
2175     {
2176         m_encoding = encoding ;
2177     }
2178
2179     size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2180     {
2181         wxASSERT(szUnConv);
2182
2183         CFStringRef theString = CFStringCreateWithBytes (
2184                                                 NULL, //the allocator
2185                                                 (const UInt8*)szUnConv,
2186                                                 strlen(szUnConv),
2187                                                 m_encoding,
2188                                                 false //no BOM/external representation
2189                                                 );
2190
2191         wxASSERT(theString);
2192
2193         size_t nOutLength = CFStringGetLength(theString);
2194
2195         if (szOut == NULL)
2196         {
2197             CFRelease(theString);
2198             return nOutLength;
2199         }
2200
2201         CFRange theRange = { 0, nOutSize };
2202
2203 #if SIZEOF_WCHAR_T == 4
2204         UniChar* szUniCharBuffer = new UniChar[nOutSize];
2205 #endif
2206
2207         CFStringGetCharacters(theString, theRange, szUniCharBuffer);
2208
2209         CFRelease(theString);
2210
2211         szUniCharBuffer[nOutLength] = '\0' ;
2212
2213 #if SIZEOF_WCHAR_T == 4
2214         wxMBConvUTF16 converter ;
2215         converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
2216         delete[] szUniCharBuffer;
2217 #endif
2218
2219         return nOutLength;
2220     }
2221
2222     size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2223     {
2224         wxASSERT(szUnConv);
2225
2226         size_t nRealOutSize;
2227         size_t nBufSize = wxWcslen(szUnConv);
2228         UniChar* szUniBuffer = (UniChar*) szUnConv;
2229
2230 #if SIZEOF_WCHAR_T == 4
2231         wxMBConvUTF16 converter ;
2232         nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
2233         szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
2234         converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
2235         nBufSize /= sizeof(UniChar);
2236 #endif
2237
2238         CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2239                                 NULL, //allocator
2240                                 szUniBuffer,
2241                                 nBufSize,
2242                                 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
2243                             );
2244
2245         wxASSERT(theString);
2246
2247         //Note that CER puts a BOM when converting to unicode
2248         //so we  check and use getchars instead in that case
2249         if (m_encoding == kCFStringEncodingUnicode)
2250         {
2251             if (szOut != NULL)
2252                 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
2253
2254             nRealOutSize = CFStringGetLength(theString) + 1;
2255         }
2256         else
2257         {
2258             CFStringGetBytes(
2259                 theString,
2260                 CFRangeMake(0, CFStringGetLength(theString)),
2261                 m_encoding,
2262                 0, //what to put in characters that can't be converted -
2263                     //0 tells CFString to return NULL if it meets such a character
2264                 false, //not an external representation
2265                 (UInt8*) szOut,
2266                 nOutSize,
2267                 (CFIndex*) &nRealOutSize
2268                         );
2269         }
2270
2271         CFRelease(theString);
2272
2273 #if SIZEOF_WCHAR_T == 4
2274         delete[] szUniBuffer;
2275 #endif
2276
2277         return  nRealOutSize - 1;
2278     }
2279
2280     bool IsOk() const
2281     {
2282         return m_encoding != kCFStringEncodingInvalidId &&
2283               CFStringIsEncodingAvailable(m_encoding);
2284     }
2285
2286 private:
2287     CFStringEncoding m_encoding ;
2288 };
2289
2290 #endif // defined(__WXCOCOA__)
2291
2292 // ============================================================================
2293 // Mac conversion classes
2294 // ============================================================================
2295
2296 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2297
2298 class wxMBConv_mac : public wxMBConv
2299 {
2300 public:
2301     wxMBConv_mac()
2302     {
2303         Init(CFStringGetSystemEncoding()) ;
2304     }
2305
2306 #if wxUSE_FONTMAP
2307     wxMBConv_mac(const wxChar* name)
2308     {
2309         Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2310     }
2311 #endif
2312
2313     wxMBConv_mac(wxFontEncoding encoding)
2314     {
2315         Init( wxMacGetSystemEncFromFontEnc(encoding) );
2316     }
2317
2318     ~wxMBConv_mac()
2319     {
2320         OSStatus status = noErr ;
2321         status = TECDisposeConverter(m_MB2WC_converter);
2322         status = TECDisposeConverter(m_WC2MB_converter);
2323     }
2324
2325
2326     void Init( TextEncodingBase encoding)
2327     {
2328         OSStatus status = noErr ;
2329         m_char_encoding = encoding ;
2330         m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2331
2332         status = TECCreateConverter(&m_MB2WC_converter,
2333                                     m_char_encoding,
2334                                     m_unicode_encoding);
2335         status = TECCreateConverter(&m_WC2MB_converter,
2336                                     m_unicode_encoding,
2337                                     m_char_encoding);
2338     }
2339
2340     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2341     {
2342         OSStatus status = noErr ;
2343         ByteCount byteOutLen ;
2344         ByteCount byteInLen = strlen(psz) ;
2345         wchar_t *tbuf = NULL ;
2346         UniChar* ubuf = NULL ;
2347         size_t res = 0 ;
2348
2349         if (buf == NULL)
2350         {
2351             //apple specs say at least 32
2352             n = wxMax( 32 , byteInLen ) ;
2353             tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2354         }
2355         ByteCount byteBufferLen = n * sizeof( UniChar ) ;
2356 #if SIZEOF_WCHAR_T == 4
2357         ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
2358 #else
2359         ubuf = (UniChar*) (buf ? buf : tbuf) ;
2360 #endif
2361         status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2362           (TextPtr) ubuf , byteBufferLen, &byteOutLen);
2363 #if SIZEOF_WCHAR_T == 4
2364         // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2365         // is not properly terminated we get random characters at the end
2366         ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
2367         wxMBConvUTF16 converter ;
2368         res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2369         free( ubuf ) ;
2370 #else
2371         res = byteOutLen / sizeof( UniChar ) ;
2372 #endif
2373         if ( buf == NULL )
2374              free(tbuf) ;
2375
2376         if ( buf  && res < n)
2377             buf[res] = 0;
2378
2379         return res ;
2380     }
2381
2382     size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2383     {
2384         OSStatus status = noErr ;
2385         ByteCount byteOutLen ;
2386         ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2387
2388         char *tbuf = NULL ;
2389
2390         if (buf == NULL)
2391         {
2392             //apple specs say at least 32
2393             n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2394             tbuf = (char*) malloc( n ) ;
2395         }
2396
2397         ByteCount byteBufferLen = n ;
2398         UniChar* ubuf = NULL ;
2399 #if SIZEOF_WCHAR_T == 4
2400         wxMBConvUTF16 converter ;
2401         size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2402         byteInLen = unicharlen ;
2403         ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2404         converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
2405 #else
2406         ubuf = (UniChar*) psz ;
2407 #endif
2408         status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2409             (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
2410 #if SIZEOF_WCHAR_T == 4
2411         free( ubuf ) ;
2412 #endif
2413         if ( buf == NULL )
2414             free(tbuf) ;
2415
2416         size_t res = byteOutLen ;
2417         if ( buf  && res < n)
2418         {
2419             buf[res] = 0;
2420
2421             //we need to double-trip to verify it didn't insert any ? in place
2422             //of bogus characters
2423             wxWCharBuffer wcBuf(n);
2424             size_t pszlen = wxWcslen(psz);
2425             if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
2426                         wxWcslen(wcBuf) != pszlen ||
2427                         memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2428             {
2429                 // we didn't obtain the same thing we started from, hence
2430                 // the conversion was lossy and we consider that it failed
2431                 return (size_t)-1;
2432             }
2433         }
2434
2435         return res ;
2436     }
2437
2438     bool IsOk() const
2439         { return m_MB2WC_converter !=  NULL && m_WC2MB_converter != NULL  ; }
2440
2441 private:
2442     TECObjectRef m_MB2WC_converter ;
2443     TECObjectRef m_WC2MB_converter ;
2444
2445     TextEncodingBase m_char_encoding ;
2446     TextEncodingBase m_unicode_encoding ;
2447 };
2448
2449 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2450
2451 // ============================================================================
2452 // wxEncodingConverter based conversion classes
2453 // ============================================================================
2454
2455 #if wxUSE_FONTMAP
2456
2457 class wxMBConv_wxwin : public wxMBConv
2458 {
2459 private:
2460     void Init()
2461     {
2462         m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2463                w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2464     }
2465
2466 public:
2467     // temporarily just use wxEncodingConverter stuff,
2468     // so that it works while a better implementation is built
2469     wxMBConv_wxwin(const wxChar* name)
2470     {
2471         if (name)
2472             m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2473         else
2474             m_enc = wxFONTENCODING_SYSTEM;
2475
2476         Init();
2477     }
2478
2479     wxMBConv_wxwin(wxFontEncoding enc)
2480     {
2481         m_enc = enc;
2482
2483         Init();
2484     }
2485
2486     size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
2487     {
2488         size_t inbuf = strlen(psz);
2489         if (buf)
2490         {
2491             if (!m2w.Convert(psz,buf))
2492                 return (size_t)-1;
2493         }
2494         return inbuf;
2495     }
2496
2497     size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
2498     {
2499         const size_t inbuf = wxWcslen(psz);
2500         if (buf)
2501         {
2502             if (!w2m.Convert(psz,buf))
2503                 return (size_t)-1;
2504         }
2505
2506         return inbuf;
2507     }
2508
2509     bool IsOk() const { return m_ok; }
2510
2511 public:
2512     wxFontEncoding m_enc;
2513     wxEncodingConverter m2w, w2m;
2514
2515     // were we initialized successfully?
2516     bool m_ok;
2517
2518     DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
2519 };
2520
2521 // make the constructors available for unit testing
2522 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_wxwin( const wxChar* name )
2523 {
2524     wxMBConv_wxwin* result = new wxMBConv_wxwin( name );
2525     if ( !result->IsOk() )
2526     {
2527         delete result;
2528         return 0;
2529     }
2530     return result;
2531 }
2532
2533 #endif // wxUSE_FONTMAP
2534
2535 // ============================================================================
2536 // wxCSConv implementation
2537 // ============================================================================
2538
2539 void wxCSConv::Init()
2540 {
2541     m_name = NULL;
2542     m_convReal =  NULL;
2543     m_deferred = true;
2544 }
2545
2546 wxCSConv::wxCSConv(const wxChar *charset)
2547 {
2548     Init();
2549
2550     if ( charset )
2551     {
2552         SetName(charset);
2553     }
2554
2555 #if wxUSE_FONTMAP
2556     m_encoding = wxFontMapperBase::GetEncodingFromName(charset);
2557 #else
2558     m_encoding = wxFONTENCODING_SYSTEM;
2559 #endif
2560 }
2561
2562 wxCSConv::wxCSConv(wxFontEncoding encoding)
2563 {
2564     if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
2565     {
2566         wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2567
2568         encoding = wxFONTENCODING_SYSTEM;
2569     }
2570
2571     Init();
2572
2573     m_encoding = encoding;
2574 }
2575
2576 wxCSConv::~wxCSConv()
2577 {
2578     Clear();
2579 }
2580
2581 wxCSConv::wxCSConv(const wxCSConv& conv)
2582         : wxMBConv()
2583 {
2584     Init();
2585
2586     SetName(conv.m_name);
2587     m_encoding = conv.m_encoding;
2588 }
2589
2590 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2591 {
2592     Clear();
2593
2594     SetName(conv.m_name);
2595     m_encoding = conv.m_encoding;
2596
2597     return *this;
2598 }
2599
2600 void wxCSConv::Clear()
2601 {
2602     free(m_name);
2603     delete m_convReal;
2604
2605     m_name = NULL;
2606     m_convReal = NULL;
2607 }
2608
2609 void wxCSConv::SetName(const wxChar *charset)
2610 {
2611     if (charset)
2612     {
2613         m_name = wxStrdup(charset);
2614         m_deferred = true;
2615     }
2616 }
2617
2618 #if wxUSE_FONTMAP
2619 #include "wx/hashmap.h"
2620
2621 WX_DECLARE_HASH_MAP( wxFontEncoding, wxString, wxIntegerHash, wxIntegerEqual,
2622                      wxEncodingNameCache );
2623
2624 static wxEncodingNameCache gs_nameCache;
2625 #endif
2626
2627 wxMBConv *wxCSConv::DoCreate() const
2628 {
2629 #if wxUSE_FONTMAP
2630     wxLogTrace(TRACE_STRCONV,
2631                wxT("creating conversion for %s"),
2632                (m_name ? m_name
2633                        : wxFontMapperBase::GetEncodingName(m_encoding).c_str()));
2634 #endif // wxUSE_FONTMAP
2635
2636     // check for the special case of ASCII or ISO8859-1 charset: as we have
2637     // special knowledge of it anyhow, we don't need to create a special
2638     // conversion object
2639     if ( m_encoding == wxFONTENCODING_ISO8859_1 ||
2640             m_encoding == wxFONTENCODING_DEFAULT )
2641     {
2642         // don't convert at all
2643         return NULL;
2644     }
2645
2646     // we trust OS to do conversion better than we can so try external
2647     // conversion methods first
2648     //
2649     // the full order is:
2650     //      1. OS conversion (iconv() under Unix or Win32 API)
2651     //      2. hard coded conversions for UTF
2652     //      3. wxEncodingConverter as fall back
2653
2654     // step (1)
2655 #ifdef HAVE_ICONV
2656 #if !wxUSE_FONTMAP
2657     if ( m_name )
2658 #endif // !wxUSE_FONTMAP
2659     {
2660         wxString name(m_name);
2661         wxFontEncoding encoding(m_encoding);
2662
2663         if ( !name.empty() )
2664         {
2665             wxMBConv_iconv *conv = new wxMBConv_iconv(name);
2666             if ( conv->IsOk() )
2667                 return conv;
2668
2669             delete conv;
2670
2671 #if wxUSE_FONTMAP
2672             encoding =
2673                 wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2674 #endif // wxUSE_FONTMAP
2675         }
2676 #if wxUSE_FONTMAP
2677         {
2678             const wxEncodingNameCache::iterator it = gs_nameCache.find(encoding);
2679             if ( it != gs_nameCache.end() )
2680             {
2681                 if ( it->second.empty() )
2682                     return NULL;
2683
2684                 wxMBConv_iconv *conv = new wxMBConv_iconv(it->second);
2685                 if ( conv->IsOk() )
2686                     return conv;
2687
2688                 delete conv;
2689             }
2690
2691             const wxChar** names = wxFontMapperBase::GetAllEncodingNames(encoding);
2692
2693             for ( ; *names; ++names )
2694             {
2695                 wxMBConv_iconv *conv = new wxMBConv_iconv(*names);
2696                 if ( conv->IsOk() )
2697                 {
2698                     gs_nameCache[encoding] = *names;
2699                     return conv;
2700                 }
2701
2702                 delete conv;
2703             }
2704
2705             gs_nameCache[encoding] = _T(""); // cache the failure
2706         }
2707 #endif // wxUSE_FONTMAP
2708     }
2709 #endif // HAVE_ICONV
2710
2711 #ifdef wxHAVE_WIN32_MB2WC
2712     {
2713 #if wxUSE_FONTMAP
2714         wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2715                                       : new wxMBConv_win32(m_encoding);
2716         if ( conv->IsOk() )
2717             return conv;
2718
2719         delete conv;
2720 #else
2721         return NULL;
2722 #endif
2723     }
2724 #endif // wxHAVE_WIN32_MB2WC
2725 #if defined(__WXMAC__)
2726     {
2727         // leave UTF16 and UTF32 to the built-ins of wx
2728         if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
2729             ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
2730         {
2731
2732 #if wxUSE_FONTMAP
2733             wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2734                                         : new wxMBConv_mac(m_encoding);
2735 #else
2736             wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
2737 #endif
2738             if ( conv->IsOk() )
2739                  return conv;
2740
2741             delete conv;
2742         }
2743     }
2744 #endif
2745 #if defined(__WXCOCOA__)
2746     {
2747         if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2748         {
2749
2750 #if wxUSE_FONTMAP
2751             wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2752                                           : new wxMBConv_cocoa(m_encoding);
2753 #else
2754             wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
2755 #endif
2756             if ( conv->IsOk() )
2757                  return conv;
2758
2759             delete conv;
2760         }
2761     }
2762 #endif
2763     // step (2)
2764     wxFontEncoding enc = m_encoding;
2765 #if wxUSE_FONTMAP
2766     if ( enc == wxFONTENCODING_SYSTEM && m_name )
2767     {
2768         // use "false" to suppress interactive dialogs -- we can be called from
2769         // anywhere and popping up a dialog from here is the last thing we want to
2770         // do
2771         enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
2772     }
2773 #endif // wxUSE_FONTMAP
2774
2775     switch ( enc )
2776     {
2777         case wxFONTENCODING_UTF7:
2778              return new wxMBConvUTF7;
2779
2780         case wxFONTENCODING_UTF8:
2781              return new wxMBConvUTF8;
2782
2783         case wxFONTENCODING_UTF16BE:
2784              return new wxMBConvUTF16BE;
2785
2786         case wxFONTENCODING_UTF16LE:
2787              return new wxMBConvUTF16LE;
2788
2789         case wxFONTENCODING_UTF32BE:
2790              return new wxMBConvUTF32BE;
2791
2792         case wxFONTENCODING_UTF32LE:
2793              return new wxMBConvUTF32LE;
2794
2795         default:
2796              // nothing to do but put here to suppress gcc warnings
2797              ;
2798     }
2799
2800     // step (3)
2801 #if wxUSE_FONTMAP
2802     {
2803         wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2804                                       : new wxMBConv_wxwin(m_encoding);
2805         if ( conv->IsOk() )
2806             return conv;
2807
2808         delete conv;
2809     }
2810 #endif // wxUSE_FONTMAP
2811
2812     // NB: This is a hack to prevent deadlock. What could otherwise happen
2813     //     in Unicode build: wxConvLocal creation ends up being here
2814     //     because of some failure and logs the error. But wxLog will try to
2815     //     attach timestamp, for which it will need wxConvLocal (to convert
2816     //     time to char* and then wchar_t*), but that fails, tries to log
2817     //     error, but wxLog has a (already locked) critical section that
2818     //     guards static buffer.
2819     static bool alreadyLoggingError = false;
2820     if (!alreadyLoggingError)
2821     {
2822         alreadyLoggingError = true;
2823         wxLogError(_("Cannot convert from the charset '%s'!"),
2824                    m_name ? m_name
2825                       :
2826 #if wxUSE_FONTMAP
2827                          wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
2828 #else // !wxUSE_FONTMAP
2829                          wxString::Format(_("encoding %s"), m_encoding).c_str()
2830 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2831               );
2832         alreadyLoggingError = false;
2833     }
2834
2835     return NULL;
2836 }
2837
2838 void wxCSConv::CreateConvIfNeeded() const
2839 {
2840     if ( m_deferred )
2841     {
2842         wxCSConv *self = (wxCSConv *)this; // const_cast
2843
2844 #if wxUSE_INTL
2845         // if we don't have neither the name nor the encoding, use the default
2846         // encoding for this system
2847         if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2848         {
2849             self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
2850         }
2851 #endif // wxUSE_INTL
2852
2853         self->m_convReal = DoCreate();
2854         self->m_deferred = false;
2855     }
2856 }
2857
2858 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2859 {
2860     CreateConvIfNeeded();
2861
2862     if (m_convReal)
2863         return m_convReal->MB2WC(buf, psz, n);
2864
2865     // latin-1 (direct)
2866     size_t len = strlen(psz);
2867
2868     if (buf)
2869     {
2870         for (size_t c = 0; c <= len; c++)
2871             buf[c] = (unsigned char)(psz[c]);
2872     }
2873
2874     return len;
2875 }
2876
2877 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2878 {
2879     CreateConvIfNeeded();
2880
2881     if (m_convReal)
2882         return m_convReal->WC2MB(buf, psz, n);
2883
2884     // latin-1 (direct)
2885     const size_t len = wxWcslen(psz);
2886     if (buf)
2887     {
2888         for (size_t c = 0; c <= len; c++)
2889         {
2890             if (psz[c] > 0xFF)
2891                 return (size_t)-1;
2892             buf[c] = (char)psz[c];
2893         }
2894     }
2895     else
2896     {
2897         for (size_t c = 0; c <= len; c++)
2898         {
2899             if (psz[c] > 0xFF)
2900                 return (size_t)-1;
2901         }
2902     }
2903
2904     return len;
2905 }
2906
2907 // ----------------------------------------------------------------------------
2908 // globals
2909 // ----------------------------------------------------------------------------
2910
2911 #ifdef __WINDOWS__
2912     static wxMBConv_win32 wxConvLibcObj;
2913 #elif defined(__WXMAC__) && !defined(__MACH__)
2914     static wxMBConv_mac wxConvLibcObj ;
2915 #else
2916     static wxMBConvLibc wxConvLibcObj;
2917 #endif
2918
2919 static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
2920 static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
2921 static wxMBConvUTF7 wxConvUTF7Obj;
2922 static wxMBConvUTF8 wxConvUTF8Obj;
2923
2924 WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
2925 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
2926 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
2927 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
2928 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
2929 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
2930 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
2931 #ifdef __WXOSX__
2932                                     wxConvUTF8Obj;
2933 #else
2934                                     wxConvLibcObj;
2935 #endif
2936
2937
2938 #else // !wxUSE_WCHAR_T
2939
2940 // stand-ins in absence of wchar_t
2941 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
2942                                 wxConvISO8859_1,
2943                                 wxConvLocal,
2944                                 wxConvUTF8;
2945
2946 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T