src/common/strconv.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/common/strconv.cpp
   3 // Purpose:     Unicode conversion classes
   4 // Author:      Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
   5 //              Ryan Norton, Fredrik Roubert (UTF7)
   6 // Modified by:
   7 // Created:     29/01/98
   8 // RCS-ID:      $Id$
   9 // Copyright:   (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
  10 //              (c) 2000-2003 Vadim Zeitlin
  11 //              (c) 2004 Ryan Norton, Fredrik Roubert
  12 // Licence:     wxWindows licence
  13 /////////////////////////////////////////////////////////////////////////////
  14
  15 // ============================================================================
  16 // declarations
  17 // ============================================================================
  18
  19 // ----------------------------------------------------------------------------
  20 // headers
  21 // ----------------------------------------------------------------------------
  22
  23 // For compilers that support precompilation, includes "wx.h".
  24 #include "wx/wxprec.h"
  25
  26 #ifdef __BORLANDC__
  27   #pragma hdrstop
  28 #endif
  29
  30 #ifndef WX_PRECOMP
  31     #include "wx/intl.h"
  32     #include "wx/log.h"
  33 #endif // WX_PRECOMP
  34
  35 #include "wx/strconv.h"
  36
  37 #if wxUSE_WCHAR_T
  38
  39 #ifdef __WINDOWS__
  40     #include "wx/msw/private.h"
  41     #include "wx/msw/missing.h"
  42 #endif
  43
  44 #ifndef __WXWINCE__
  45 #include <errno.h>
  46 #endif
  47
  48 #include <ctype.h>
  49 #include <string.h>
  50 #include <stdlib.h>
  51
  52 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
  53     #define wxHAVE_WIN32_MB2WC
  54 #endif // __WIN32__ but !__WXMICROWIN__
  55
  56 #ifdef __SALFORDC__
  57     #include <clib.h>
  58 #endif
  59
  60 #ifdef HAVE_ICONV
  61     #include <iconv.h>
  62     #include "wx/thread.h"
  63 #endif
  64
  65 #include "wx/encconv.h"
  66 #include "wx/fontmap.h"
  67 #include "wx/utils.h"
  68
  69 #ifdef __WXMAC__
  70 #ifndef __DARWIN__
  71 #include <ATSUnicode.h>
  72 #include <TextCommon.h>
  73 #include <TextEncodingConverter.h>
  74 #endif
  75
  76 #include  "wx/mac/private.h"  // includes mac headers
  77 #endif
  78
  79 #define TRACE_STRCONV _T("strconv")
  80
  81 #if SIZEOF_WCHAR_T == 2
  82     #define WC_UTF16
  83 #endif
  84
  85 // ============================================================================
  86 // implementation
  87 // ============================================================================
  88
  89 // ----------------------------------------------------------------------------
  90 // UTF-16 en/decoding to/from UCS-4
  91 // ----------------------------------------------------------------------------
  92
  93
  94 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
  95 {
  96     if (input<=0xffff)
  97     {
  98         if (output)
  99             *output = (wxUint16) input;
 100         return 1;
 101     }
 102     else if (input>=0x110000)
 103     {
 104         return (size_t)-1;
 105     }
 106     else
 107     {
 108         if (output)
 109         {
 110             *output++ = (wxUint16) ((input >> 10)+0xd7c0);
 111             *output = (wxUint16) ((input&0x3ff)+0xdc00);
 112         }
 113         return 2;
 114     }
 115 }
 116
 117 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
 118 {
 119     if ((*input<0xd800) || (*input>0xdfff))
 120     {
 121         output = *input;
 122         return 1;
 123     }
 124     else if ((input[1]<0xdc00) || (input[1]>0xdfff))
 125     {
 126         output = *input;
 127         return (size_t)-1;
 128     }
 129     else
 130     {
 131         output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
 132         return 2;
 133     }
 134 }
 135
 136
 137 // ----------------------------------------------------------------------------
 138 // wxMBConv
 139 // ----------------------------------------------------------------------------
 140
 141 wxMBConv::~wxMBConv()
 142 {
 143     // nothing to do here (necessary for Darwin linking probably)
 144 }
 145
 146 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
 147 {
 148     if ( psz )
 149     {
 150         // calculate the length of the buffer needed first
 151         size_t nLen = MB2WC(NULL, psz, 0);
 152         if ( nLen != (size_t)-1 )
 153         {
 154             // now do the actual conversion
 155             wxWCharBuffer buf(nLen);
 156             nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
 157             if ( nLen != (size_t)-1 )
 158             {
 159                 return buf;
 160             }
 161         }
 162     }
 163
 164     wxWCharBuffer buf((wchar_t *)NULL);
 165
 166     return buf;
 167 }
 168
 169 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
 170 {
 171     if ( pwz )
 172     {
 173         size_t nLen = WC2MB(NULL, pwz, 0);
 174         if ( nLen != (size_t)-1 )
 175         {
 176             wxCharBuffer buf(nLen+3);       // space for a wxUint32 trailing zero
 177             nLen = WC2MB(buf.data(), pwz, nLen + 4);
 178             if ( nLen != (size_t)-1 )
 179             {
 180                 return buf;
 181             }
 182         }
 183     }
 184
 185     wxCharBuffer buf((char *)NULL);
 186
 187     return buf;
 188 }
 189
 190 const wxWCharBuffer wxMBConv::cMB2WC(const char *szString, size_t nStringLen, size_t* pOutSize) const
 191 {
 192     wxASSERT(pOutSize != NULL);
 193
 194     const char* szEnd = szString + nStringLen + 1;
 195     const char* szPos = szString;
 196     const char* szStart = szPos;
 197
 198     size_t nActualLength = 0;
 199     size_t nCurrentSize = nStringLen; //try normal size first (should never resize?)
 200
 201     wxWCharBuffer theBuffer(nCurrentSize);
 202
 203     //Convert the string until the length() is reached, continuing the
 204     //loop every time a null character is reached
 205     while(szPos != szEnd)
 206     {
 207         wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
 208
 209         //Get the length of the current (sub)string
 210         size_t nLen = MB2WC(NULL, szPos, 0);
 211
 212         //Invalid conversion?
 213         if( nLen == (size_t)-1 )
 214         {
 215             *pOutSize = 0;
 216             theBuffer.data()[0u] = wxT('\0');
 217             return theBuffer;
 218         }
 219
 220
 221         //Increase the actual length (+1 for current null character)
 222         nActualLength += nLen + 1;
 223
 224         //if buffer too big, realloc the buffer
 225         if (nActualLength > (nCurrentSize+1))
 226         {
 227             wxWCharBuffer theNewBuffer(nCurrentSize << 1);
 228             memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize * sizeof(wchar_t));
 229             theBuffer = theNewBuffer;
 230             nCurrentSize <<= 1;
 231         }
 232
 233         //Convert the current (sub)string
 234         if ( MB2WC(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
 235         {
 236             *pOutSize = 0;
 237             theBuffer.data()[0u] = wxT('\0');
 238             return theBuffer;
 239         }
 240
 241         //Increment to next (sub)string
 242         //Note that we have to use strlen instead of nLen here
 243         //because XX2XX gives us the size of the output buffer,
 244         //which is not necessarily the length of the string
 245         szPos += strlen(szPos) + 1;
 246     }
 247
 248     //success - return actual length and the buffer
 249     *pOutSize = nActualLength;
 250     return theBuffer;
 251 }
 252
 253 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *szString, size_t nStringLen, size_t* pOutSize) const
 254 {
 255     wxASSERT(pOutSize != NULL);
 256
 257     const wchar_t* szEnd = szString + nStringLen + 1;
 258     const wchar_t* szPos = szString;
 259     const wchar_t* szStart = szPos;
 260
 261     size_t nActualLength = 0;
 262     size_t nCurrentSize = nStringLen << 2; //try * 4 first
 263
 264     wxCharBuffer theBuffer(nCurrentSize);
 265
 266     //Convert the string until the length() is reached, continuing the
 267     //loop every time a null character is reached
 268     while(szPos != szEnd)
 269     {
 270         wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
 271
 272         //Get the length of the current (sub)string
 273         size_t nLen = WC2MB(NULL, szPos, 0);
 274
 275         //Invalid conversion?
 276         if( nLen == (size_t)-1 )
 277         {
 278             *pOutSize = 0;
 279             theBuffer.data()[0u] = wxT('\0');
 280             return theBuffer;
 281         }
 282
 283         //Increase the actual length (+1 for current null character)
 284         nActualLength += nLen + 1;
 285
 286         //if buffer too big, realloc the buffer
 287         if (nActualLength > (nCurrentSize+1))
 288         {
 289             wxCharBuffer theNewBuffer(nCurrentSize << 1);
 290             memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize);
 291             theBuffer = theNewBuffer;
 292             nCurrentSize <<= 1;
 293         }
 294
 295         //Convert the current (sub)string
 296         if(WC2MB(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
 297         {
 298             *pOutSize = 0;
 299             theBuffer.data()[0u] = wxT('\0');
 300             return theBuffer;
 301         }
 302
 303         //Increment to next (sub)string
 304         //Note that we have to use wxWcslen instead of nLen here
 305         //because XX2XX gives us the size of the output buffer,
 306         //which is not necessarily the length of the string
 307         szPos += wxWcslen(szPos) + 1;
 308     }
 309
 310     //success - return actual length and the buffer
 311     *pOutSize = nActualLength;
 312     return theBuffer;
 313 }
 314
 315 // ----------------------------------------------------------------------------
 316 // wxMBConvLibc
 317 // ----------------------------------------------------------------------------
 318
 319 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 320 {
 321     return wxMB2WC(buf, psz, n);
 322 }
 323
 324 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 325 {
 326     return wxWC2MB(buf, psz, n);
 327 }
 328
 329 #ifdef __UNIX__
 330
 331 // ----------------------------------------------------------------------------
 332 // wxConvBrokenFileNames
 333 // ----------------------------------------------------------------------------
 334
 335 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar *charset)
 336 {
 337     if ( !charset || wxStricmp(charset, _T("UTF-8")) == 0
 338                   || wxStricmp(charset, _T("UTF8")) == 0  )
 339         m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
 340     else
 341         m_conv = new wxCSConv(charset);
 342 }
 343
 344 size_t
 345 wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf,
 346                              const char *psz,
 347                              size_t outputSize) const
 348 {
 349     return m_conv->MB2WC( outputBuf, psz, outputSize );
 350 }
 351
 352 size_t
 353 wxConvBrokenFileNames::WC2MB(char *outputBuf,
 354                              const wchar_t *psz,
 355                              size_t outputSize) const
 356 {
 357     return m_conv->WC2MB( outputBuf, psz, outputSize );
 358 }
 359
 360 #endif
 361
 362 // ----------------------------------------------------------------------------
 363 // UTF-7
 364 // ----------------------------------------------------------------------------
 365
 366 // Implementation (C) 2004 Fredrik Roubert
 367
 368 //
 369 // BASE64 decoding table
 370 //
 371 static const unsigned char utf7unb64[] =
 372 {
 373     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 374     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 375     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 376     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 377     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 378     0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
 379     0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
 380     0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 381     0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
 382     0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
 383     0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
 384     0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
 385     0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
 386     0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
 387     0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
 388     0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
 389     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 390     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 391     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 392     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 393     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 394     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 395     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 396     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 397     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 398     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 399     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 400     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 401     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 402     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 403     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 404     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 405 };
 406
 407 size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 408 {
 409     size_t len = 0;
 410
 411     while ( *psz && (!buf || (len < n)) )
 412     {
 413         unsigned char cc = *psz++;
 414         if (cc != '+')
 415         {
 416             // plain ASCII char
 417             if (buf)
 418                 *buf++ = cc;
 419             len++;
 420         }
 421         else if (*psz == '-')
 422         {
 423             // encoded plus sign
 424             if (buf)
 425                 *buf++ = cc;
 426             len++;
 427             psz++;
 428         }
 429         else // start of BASE64 encoded string
 430         {
 431             bool lsb, ok;
 432             unsigned int d, l;
 433             for ( ok = lsb = false, d = 0, l = 0;
 434                   (cc = utf7unb64[(unsigned char)*psz]) != 0xff;
 435                   psz++ )
 436             {
 437                 d <<= 6;
 438                 d += cc;
 439                 for (l += 6; l >= 8; lsb = !lsb)
 440                 {
 441                     unsigned char c = (unsigned char)((d >> (l -= 8)) % 256);
 442                     if (lsb)
 443                     {
 444                         if (buf)
 445                             *buf++ |= c;
 446                         len ++;
 447                     }
 448                     else
 449                     {
 450                         if (buf)
 451                             *buf = (wchar_t)(c << 8);
 452                     }
 453
 454                     ok = true;
 455                 }
 456             }
 457
 458             if ( !ok )
 459             {
 460                 // in valid UTF7 we should have valid characters after '+'
 461                 return (size_t)-1;
 462             }
 463
 464             if (*psz == '-')
 465                 psz++;
 466         }
 467     }
 468
 469     if ( buf && (len < n) )
 470         *buf = '\0';
 471
 472     return len;
 473 }
 474
 475 //
 476 // BASE64 encoding table
 477 //
 478 static const unsigned char utf7enb64[] =
 479 {
 480     'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
 481     'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
 482     'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
 483     'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
 484     'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
 485     'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
 486     'w', 'x', 'y', 'z', '0', '1', '2', '3',
 487     '4', '5', '6', '7', '8', '9', '+', '/'
 488 };
 489
 490 //
 491 // UTF-7 encoding table
 492 //
 493 // 0 - Set D (directly encoded characters)
 494 // 1 - Set O (optional direct characters)
 495 // 2 - whitespace characters (optional)
 496 // 3 - special characters
 497 //
 498 static const unsigned char utf7encode[128] =
 499 {
 500     3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
 501     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 502     2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
 503     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
 504     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 505     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
 506     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 507     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
 508 };
 509
 510 size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 511 {
 512
 513
 514     size_t len = 0;
 515
 516     while (*psz && ((!buf) || (len < n)))
 517     {
 518         wchar_t cc = *psz++;
 519         if (cc < 0x80 && utf7encode[cc] < 1)
 520         {
 521             // plain ASCII char
 522             if (buf)
 523                 *buf++ = (char)cc;
 524             len++;
 525         }
 526 #ifndef WC_UTF16
 527         else if (((wxUint32)cc) > 0xffff)
 528         {
 529             // no surrogate pair generation (yet?)
 530             return (size_t)-1;
 531         }
 532 #endif
 533         else
 534         {
 535             if (buf)
 536                 *buf++ = '+';
 537             len++;
 538             if (cc != '+')
 539             {
 540                 // BASE64 encode string
 541                 unsigned int lsb, d, l;
 542                 for (d = 0, l = 0; /*nothing*/; psz++)
 543                 {
 544                     for (lsb = 0; lsb < 2; lsb ++)
 545                     {
 546                         d <<= 8;
 547                         d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
 548
 549                         for (l += 8; l >= 6; )
 550                         {
 551                             l -= 6;
 552                             if (buf)
 553                                 *buf++ = utf7enb64[(d >> l) % 64];
 554                             len++;
 555                         }
 556                     }
 557                     cc = *psz;
 558                     if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
 559                         break;
 560                 }
 561                 if (l != 0)
 562                 {
 563                     if (buf)
 564                         *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
 565                     len++;
 566                 }
 567             }
 568             if (buf)
 569                 *buf++ = '-';
 570             len++;
 571         }
 572     }
 573     if (buf && (len < n))
 574         *buf = 0;
 575     return len;
 576 }
 577
 578 // ----------------------------------------------------------------------------
 579 // UTF-8
 580 // ----------------------------------------------------------------------------
 581
 582 static wxUint32 utf8_max[]=
 583     { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
 584
 585 // boundaries of the private use area we use to (temporarily) remap invalid
 586 // characters invalid in a UTF-8 encoded string
 587 const wxUint32 wxUnicodePUA = 0x100000;
 588 const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
 589
 590 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 591 {
 592     size_t len = 0;
 593
 594     while (*psz && ((!buf) || (len < n)))
 595     {
 596         const char *opsz = psz;
 597         bool invalid = false;
 598         unsigned char cc = *psz++, fc = cc;
 599         unsigned cnt;
 600         for (cnt = 0; fc & 0x80; cnt++)
 601             fc <<= 1;
 602         if (!cnt)
 603         {
 604             // plain ASCII char
 605             if (buf)
 606                 *buf++ = cc;
 607             len++;
 608
 609             // escape the escape character for octal escapes
 610             if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
 611                     && cc == '\\' && (!buf || len < n))
 612             {
 613                 if (buf)
 614                     *buf++ = cc;
 615                 len++;
 616             }
 617         }
 618         else
 619         {
 620             cnt--;
 621             if (!cnt)
 622             {
 623                 // invalid UTF-8 sequence
 624                 invalid = true;
 625             }
 626             else
 627             {
 628                 unsigned ocnt = cnt - 1;
 629                 wxUint32 res = cc & (0x3f >> cnt);
 630                 while (cnt--)
 631                 {
 632                     cc = *psz;
 633                     if ((cc & 0xC0) != 0x80)
 634                     {
 635                         // invalid UTF-8 sequence
 636                         invalid = true;
 637                         break;
 638                     }
 639                     psz++;
 640                     res = (res << 6) | (cc & 0x3f);
 641                 }
 642                 if (invalid || res <= utf8_max[ocnt])
 643                 {
 644                     // illegal UTF-8 encoding
 645                     invalid = true;
 646                 }
 647                 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
 648                         res >= wxUnicodePUA && res < wxUnicodePUAEnd)
 649                 {
 650                     // if one of our PUA characters turns up externally
 651                     // it must also be treated as an illegal sequence
 652                     // (a bit like you have to escape an escape character)
 653                     invalid = true;
 654                 }
 655                 else
 656                 {
 657 #ifdef WC_UTF16
 658                     // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 659                     size_t pa = encode_utf16(res, (wxUint16 *)buf);
 660                     if (pa == (size_t)-1)
 661                     {
 662                         invalid = true;
 663                     }
 664                     else
 665                     {
 666                         if (buf)
 667                             buf += pa;
 668                         len += pa;
 669                     }
 670 #else // !WC_UTF16
 671                     if (buf)
 672                         *buf++ = (wchar_t)res;
 673                     len++;
 674 #endif // WC_UTF16/!WC_UTF16
 675                 }
 676             }
 677             if (invalid)
 678             {
 679                 if (m_options & MAP_INVALID_UTF8_TO_PUA)
 680                 {
 681                     while (opsz < psz && (!buf || len < n))
 682                     {
 683 #ifdef WC_UTF16
 684                         // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 685                         size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
 686                         wxASSERT(pa != (size_t)-1);
 687                         if (buf)
 688                             buf += pa;
 689                         opsz++;
 690                         len += pa;
 691 #else
 692                         if (buf)
 693                             *buf++ = (wchar_t)(wxUnicodePUA + (unsigned char)*opsz);
 694                         opsz++;
 695                         len++;
 696 #endif
 697                     }
 698                 }
 699                 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
 700                 {
 701                     while (opsz < psz && (!buf || len < n))
 702                     {
 703                         if ( buf && len + 3 < n )
 704                         {
 705                             unsigned char on = *opsz;
 706                             *buf++ = L'\\';
 707                             *buf++ = (wchar_t)( L'0' + on / 0100 );
 708                             *buf++ = (wchar_t)( L'0' + (on % 0100) / 010 );
 709                             *buf++ = (wchar_t)( L'0' + on % 010 );
 710                         }
 711                         opsz++;
 712                         len += 4;
 713                     }
 714                 }
 715                 else // MAP_INVALID_UTF8_NOT
 716                 {
 717                     return (size_t)-1;
 718                 }
 719             }
 720         }
 721     }
 722     if (buf && (len < n))
 723         *buf = 0;
 724     return len;
 725 }
 726
 727 static inline bool isoctal(wchar_t wch)
 728 {
 729     return L'0' <= wch && wch <= L'7';
 730 }
 731
 732 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 733 {
 734     size_t len = 0;
 735
 736     while (*psz && ((!buf) || (len < n)))
 737     {
 738         wxUint32 cc;
 739 #ifdef WC_UTF16
 740         // cast is ok for WC_UTF16
 741         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
 742         psz += (pa == (size_t)-1) ? 1 : pa;
 743 #else
 744         cc=(*psz++) & 0x7fffffff;
 745 #endif
 746
 747         if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
 748                 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
 749         {
 750             if (buf)
 751                 *buf++ = (char)(cc - wxUnicodePUA);
 752             len++;
 753         }
 754         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
 755                     && cc == L'\\' && psz[0] == L'\\' )
 756         {
 757             if (buf)
 758                 *buf++ = (char)cc;
 759             psz++;
 760             len++;
 761         }
 762         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
 763                     cc == L'\\' &&
 764                         isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
 765         {
 766             if (buf)
 767             {
 768                 *buf++ = (char) ((psz[0] - L'0')*0100 +
 769                                  (psz[1] - L'0')*010 +
 770                                  (psz[2] - L'0'));
 771             }
 772
 773             psz += 3;
 774             len++;
 775         }
 776         else
 777         {
 778             unsigned cnt;
 779             for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
 780             if (!cnt)
 781             {
 782                 // plain ASCII char
 783                 if (buf)
 784                     *buf++ = (char) cc;
 785                 len++;
 786             }
 787
 788             else
 789             {
 790                 len += cnt + 1;
 791                 if (buf)
 792                 {
 793                     *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
 794                     while (cnt--)
 795                         *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
 796                 }
 797             }
 798         }
 799     }
 800
 801     if (buf && (len<n))
 802         *buf = 0;
 803
 804     return len;
 805 }
 806
 807 // ----------------------------------------------------------------------------
 808 // UTF-16
 809 // ----------------------------------------------------------------------------
 810
 811 #ifdef WORDS_BIGENDIAN
 812     #define wxMBConvUTF16straight wxMBConvUTF16BE
 813     #define wxMBConvUTF16swap     wxMBConvUTF16LE
 814 #else
 815     #define wxMBConvUTF16swap     wxMBConvUTF16BE
 816     #define wxMBConvUTF16straight wxMBConvUTF16LE
 817 #endif
 818
 819
 820 #ifdef WC_UTF16
 821
 822 // copy 16bit MB to 16bit String
 823 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 824 {
 825     size_t len=0;
 826
 827     while (*(wxUint16*)psz && (!buf || len < n))
 828     {
 829         if (buf)
 830             *buf++ = *(wxUint16*)psz;
 831         len++;
 832
 833         psz += sizeof(wxUint16);
 834     }
 835     if (buf && len<n)   *buf=0;
 836
 837     return len;
 838 }
 839
 840
 841 // copy 16bit String to 16bit MB
 842 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 843 {
 844     size_t len=0;
 845
 846     while (*psz && (!buf || len < n))
 847     {
 848         if (buf)
 849         {
 850             *(wxUint16*)buf = *psz;
 851             buf += sizeof(wxUint16);
 852         }
 853         len += sizeof(wxUint16);
 854         psz++;
 855     }
 856     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 857
 858     return len;
 859 }
 860
 861
 862 // swap 16bit MB to 16bit String
 863 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 864 {
 865     size_t len = 0;
 866
 867     while ( *psz && (!buf || len < n) )
 868     {
 869         if ( buf )
 870         {
 871             ((char *)buf)[0] = psz[1];
 872             ((char *)buf)[1] = psz[0];
 873             buf++;
 874         }
 875         len++;
 876         psz += 2;
 877     }
 878
 879     if ( buf && len < n )
 880         *buf = L'\0';
 881
 882     return len;
 883 }
 884
 885
 886 // swap 16bit MB to 16bit String
 887 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 888 {
 889     size_t len=0;
 890
 891     while (*psz && (!buf || len < n))
 892     {
 893         if (buf)
 894         {
 895             *buf++ = ((char*)psz)[1];
 896             *buf++ = ((char*)psz)[0];
 897         }
 898         len += sizeof(wxUint16);
 899         psz++;
 900     }
 901     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 902
 903     return len;
 904 }
 905
 906
 907 #else // WC_UTF16
 908
 909
 910 // copy 16bit MB to 32bit String
 911 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 912 {
 913     size_t len=0;
 914
 915     while (*(wxUint16*)psz && (!buf || len < n))
 916     {
 917         wxUint32 cc;
 918         size_t pa=decode_utf16((wxUint16*)psz, cc);
 919         if (pa == (size_t)-1)
 920             return pa;
 921
 922         if (buf)
 923             *buf++ = (wchar_t)cc;
 924         len++;
 925         psz += pa * sizeof(wxUint16);
 926     }
 927     if (buf && len<n)   *buf=0;
 928
 929     return len;
 930 }
 931
 932
 933 // copy 32bit String to 16bit MB
 934 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 935 {
 936     size_t len=0;
 937
 938     while (*psz && (!buf || len < n))
 939     {
 940         wxUint16 cc[2];
 941         size_t pa=encode_utf16(*psz, cc);
 942
 943         if (pa == (size_t)-1)
 944             return pa;
 945
 946         if (buf)
 947         {
 948             *(wxUint16*)buf = cc[0];
 949             buf += sizeof(wxUint16);
 950             if (pa > 1)
 951             {
 952                 *(wxUint16*)buf = cc[1];
 953                 buf += sizeof(wxUint16);
 954             }
 955         }
 956
 957         len += pa*sizeof(wxUint16);
 958         psz++;
 959     }
 960     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 961
 962     return len;
 963 }
 964
 965
 966 // swap 16bit MB to 32bit String
 967 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 968 {
 969     size_t len=0;
 970
 971     while (*(wxUint16*)psz && (!buf || len < n))
 972     {
 973         wxUint32 cc;
 974         char tmp[4];
 975         tmp[0]=psz[1];  tmp[1]=psz[0];
 976         tmp[2]=psz[3];  tmp[3]=psz[2];
 977
 978         size_t pa=decode_utf16((wxUint16*)tmp, cc);
 979         if (pa == (size_t)-1)
 980             return pa;
 981
 982         if (buf)
 983             *buf++ = (wchar_t)cc;
 984
 985         len++;
 986         psz += pa * sizeof(wxUint16);
 987     }
 988     if (buf && len<n)   *buf=0;
 989
 990     return len;
 991 }
 992
 993
 994 // swap 32bit String to 16bit MB
 995 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 996 {
 997     size_t len=0;
 998
 999     while (*psz && (!buf || len < n))
1000     {
1001         wxUint16 cc[2];
1002         size_t pa=encode_utf16(*psz, cc);
1003
1004         if (pa == (size_t)-1)
1005             return pa;
1006
1007         if (buf)
1008         {
1009             *buf++ = ((char*)cc)[1];
1010             *buf++ = ((char*)cc)[0];
1011             if (pa > 1)
1012             {
1013                 *buf++ = ((char*)cc)[3];
1014                 *buf++ = ((char*)cc)[2];
1015             }
1016         }
1017
1018         len += pa*sizeof(wxUint16);
1019         psz++;
1020     }
1021     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
1022
1023     return len;
1024 }
1025
1026 #endif // WC_UTF16
1027
1028
1029 // ----------------------------------------------------------------------------
1030 // UTF-32
1031 // ----------------------------------------------------------------------------
1032
1033 #ifdef WORDS_BIGENDIAN
1034 #define wxMBConvUTF32straight  wxMBConvUTF32BE
1035 #define wxMBConvUTF32swap      wxMBConvUTF32LE
1036 #else
1037 #define wxMBConvUTF32swap      wxMBConvUTF32BE
1038 #define wxMBConvUTF32straight  wxMBConvUTF32LE
1039 #endif
1040
1041
1042 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1043 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1044
1045
1046 #ifdef WC_UTF16
1047
1048 // copy 32bit MB to 16bit String
1049 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1050 {
1051     size_t len=0;
1052
1053     while (*(wxUint32*)psz && (!buf || len < n))
1054     {
1055         wxUint16 cc[2];
1056
1057         size_t pa=encode_utf16(*(wxUint32*)psz, cc);
1058         if (pa == (size_t)-1)
1059             return pa;
1060
1061         if (buf)
1062         {
1063             *buf++ = cc[0];
1064             if (pa > 1)
1065                 *buf++ = cc[1];
1066         }
1067         len += pa;
1068         psz += sizeof(wxUint32);
1069     }
1070     if (buf && len<n)   *buf=0;
1071
1072     return len;
1073 }
1074
1075
1076 // copy 16bit String to 32bit MB
1077 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1078 {
1079     size_t len=0;
1080
1081     while (*psz && (!buf || len < n))
1082     {
1083         wxUint32 cc;
1084
1085         // cast is ok for WC_UTF16
1086         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
1087         if (pa == (size_t)-1)
1088             return pa;
1089
1090         if (buf)
1091         {
1092             *(wxUint32*)buf = cc;
1093             buf += sizeof(wxUint32);
1094         }
1095         len += sizeof(wxUint32);
1096         psz += pa;
1097     }
1098
1099     if (buf && len<=n-sizeof(wxUint32))
1100         *(wxUint32*)buf=0;
1101
1102     return len;
1103 }
1104
1105
1106
1107 // swap 32bit MB to 16bit String
1108 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1109 {
1110     size_t len=0;
1111
1112     while (*(wxUint32*)psz && (!buf || len < n))
1113     {
1114         char tmp[4];
1115         tmp[0] = psz[3];   tmp[1] = psz[2];
1116         tmp[2] = psz[1];   tmp[3] = psz[0];
1117
1118
1119         wxUint16 cc[2];
1120
1121         size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
1122         if (pa == (size_t)-1)
1123             return pa;
1124
1125         if (buf)
1126         {
1127             *buf++ = cc[0];
1128             if (pa > 1)
1129                 *buf++ = cc[1];
1130         }
1131         len += pa;
1132         psz += sizeof(wxUint32);
1133     }
1134
1135     if (buf && len<n)
1136         *buf=0;
1137
1138     return len;
1139 }
1140
1141
1142 // swap 16bit String to 32bit MB
1143 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1144 {
1145     size_t len=0;
1146
1147     while (*psz && (!buf || len < n))
1148     {
1149         char cc[4];
1150
1151         // cast is ok for WC_UTF16
1152         size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
1153         if (pa == (size_t)-1)
1154             return pa;
1155
1156         if (buf)
1157         {
1158             *buf++ = cc[3];
1159             *buf++ = cc[2];
1160             *buf++ = cc[1];
1161             *buf++ = cc[0];
1162         }
1163         len += sizeof(wxUint32);
1164         psz += pa;
1165     }
1166
1167     if (buf && len<=n-sizeof(wxUint32))
1168         *(wxUint32*)buf=0;
1169
1170     return len;
1171 }
1172
1173 #else // WC_UTF16
1174
1175
1176 // copy 32bit MB to 32bit String
1177 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1178 {
1179     size_t len=0;
1180
1181     while (*(wxUint32*)psz && (!buf || len < n))
1182     {
1183         if (buf)
1184             *buf++ = (wchar_t)(*(wxUint32*)psz);
1185         len++;
1186         psz += sizeof(wxUint32);
1187     }
1188
1189     if (buf && len<n)
1190         *buf=0;
1191
1192     return len;
1193 }
1194
1195
1196 // copy 32bit String to 32bit MB
1197 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1198 {
1199     size_t len=0;
1200
1201     while (*psz && (!buf || len < n))
1202     {
1203         if (buf)
1204         {
1205             *(wxUint32*)buf = *psz;
1206             buf += sizeof(wxUint32);
1207         }
1208
1209         len += sizeof(wxUint32);
1210         psz++;
1211     }
1212
1213     if (buf && len<=n-sizeof(wxUint32))
1214         *(wxUint32*)buf=0;
1215
1216     return len;
1217 }
1218
1219
1220 // swap 32bit MB to 32bit String
1221 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1222 {
1223     size_t len=0;
1224
1225     while (*(wxUint32*)psz && (!buf || len < n))
1226     {
1227         if (buf)
1228         {
1229             ((char *)buf)[0] = psz[3];
1230             ((char *)buf)[1] = psz[2];
1231             ((char *)buf)[2] = psz[1];
1232             ((char *)buf)[3] = psz[0];
1233             buf++;
1234         }
1235         len++;
1236         psz += sizeof(wxUint32);
1237     }
1238
1239     if (buf && len<n)
1240         *buf=0;
1241
1242     return len;
1243 }
1244
1245
1246 // swap 32bit String to 32bit MB
1247 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1248 {
1249     size_t len=0;
1250
1251     while (*psz && (!buf || len < n))
1252     {
1253         if (buf)
1254         {
1255             *buf++ = ((char *)psz)[3];
1256             *buf++ = ((char *)psz)[2];
1257             *buf++ = ((char *)psz)[1];
1258             *buf++ = ((char *)psz)[0];
1259         }
1260         len += sizeof(wxUint32);
1261         psz++;
1262     }
1263
1264     if (buf && len<=n-sizeof(wxUint32))
1265         *(wxUint32*)buf=0;
1266
1267     return len;
1268 }
1269
1270
1271 #endif // WC_UTF16
1272
1273
1274 // ============================================================================
1275 // The classes doing conversion using the iconv_xxx() functions
1276 // ============================================================================
1277
1278 #ifdef HAVE_ICONV
1279
1280 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1281 //     E2BIG if output buffer is _exactly_ as big as needed. Such case is
1282 //     (unless there's yet another bug in glibc) the only case when iconv()
1283 //     returns with (size_t)-1 (which means error) and says there are 0 bytes
1284 //     left in the input buffer -- when _real_ error occurs,
1285 //     bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1286 //     iconv() failure.
1287 //     [This bug does not appear in glibc 2.2.]
1288 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1289 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1290                                      (errno != E2BIG || bufLeft != 0))
1291 #else
1292 #define ICONV_FAILED(cres, bufLeft)  (cres == (size_t)-1)
1293 #endif
1294
1295 #define ICONV_CHAR_CAST(x)  ((ICONV_CONST char **)(x))
1296
1297 #define ICONV_T_INVALID ((iconv_t)-1)
1298
1299 #if SIZEOF_WCHAR_T == 4
1300     #define WC_BSWAP    wxUINT32_SWAP_ALWAYS
1301     #define WC_ENC      wxFONTENCODING_UTF32
1302 #elif SIZEOF_WCHAR_T == 2
1303     #define WC_BSWAP    wxUINT16_SWAP_ALWAYS
1304     #define WC_ENC      wxFONTENCODING_UTF16
1305 #else // sizeof(wchar_t) != 2 nor 4
1306     // does this ever happen?
1307     #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1308 #endif
1309
1310 // ----------------------------------------------------------------------------
1311 // wxMBConv_iconv: encapsulates an iconv character set
1312 // ----------------------------------------------------------------------------
1313
1314 class wxMBConv_iconv : public wxMBConv
1315 {
1316 public:
1317     wxMBConv_iconv(const wxChar *name);
1318     virtual ~wxMBConv_iconv();
1319
1320     virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1321     virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
1322
1323     bool IsOk() const
1324         { return (m2w != ICONV_T_INVALID) && (w2m != ICONV_T_INVALID); }
1325
1326 protected:
1327     // the iconv handlers used to translate from multibyte to wide char and in
1328     // the other direction
1329     iconv_t m2w,
1330             w2m;
1331 #if wxUSE_THREADS
1332     // guards access to m2w and w2m objects
1333     wxMutex m_iconvMutex;
1334 #endif
1335
1336 private:
1337     // the name (for iconv_open()) of a wide char charset -- if none is
1338     // available on this machine, it will remain NULL
1339     static wxString ms_wcCharsetName;
1340
1341     // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1342     // different endian-ness than the native one
1343     static bool ms_wcNeedsSwap;
1344 };
1345
1346 // make the constructor available for unit testing
1347 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const wxChar* name )
1348 {
1349     wxMBConv_iconv* result = new wxMBConv_iconv( name );
1350     if ( !result->IsOk() )
1351     {
1352         delete result;
1353         return 0;
1354     }
1355     return result;
1356 }
1357
1358 wxString wxMBConv_iconv::ms_wcCharsetName;
1359 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
1360
1361 wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
1362 {
1363     // iconv operates with chars, not wxChars, but luckily it uses only ASCII
1364     // names for the charsets
1365     const wxCharBuffer cname(wxString(name).ToAscii());
1366
1367     // check for charset that represents wchar_t:
1368     if ( ms_wcCharsetName.empty() )
1369     {
1370         wxLogTrace(TRACE_STRCONV, _T("Looking for wide char codeset:"));
1371
1372 #if wxUSE_FONTMAP
1373         const wxChar **names = wxFontMapperBase::GetAllEncodingNames(WC_ENC);
1374 #else // !wxUSE_FONTMAP
1375         static const wxChar *names[] =
1376         {
1377 #if SIZEOF_WCHAR_T == 4
1378             _T("UCS-4"),
1379 #elif SIZEOF_WCHAR_T = 2
1380             _T("UCS-2"),
1381 #endif
1382             NULL
1383         };
1384 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1385
1386         for ( ; *names && ms_wcCharsetName.empty(); ++names )
1387         {
1388             const wxString nameCS(*names);
1389
1390             // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
1391             wxString nameXE(nameCS);
1392             #ifdef WORDS_BIGENDIAN
1393                 nameXE += _T("BE");
1394             #else // little endian
1395                 nameXE += _T("LE");
1396             #endif
1397
1398             wxLogTrace(TRACE_STRCONV, _T("  trying charset \"%s\""),
1399                        nameXE.c_str());
1400
1401             m2w = iconv_open(nameXE.ToAscii(), cname);
1402             if ( m2w == ICONV_T_INVALID )
1403             {
1404                 // try charset w/o bytesex info (e.g. "UCS4")
1405                 wxLogTrace(TRACE_STRCONV, _T("  trying charset \"%s\""),
1406                            nameCS.c_str());
1407                 m2w = iconv_open(nameCS.ToAscii(), cname);
1408
1409                 // and check for bytesex ourselves:
1410                 if ( m2w != ICONV_T_INVALID )
1411                 {
1412                     char    buf[2], *bufPtr;
1413                     wchar_t wbuf[2], *wbufPtr;
1414                     size_t  insz, outsz;
1415                     size_t  res;
1416
1417                     buf[0] = 'A';
1418                     buf[1] = 0;
1419                     wbuf[0] = 0;
1420                     insz = 2;
1421                     outsz = SIZEOF_WCHAR_T * 2;
1422                     wbufPtr = wbuf;
1423                     bufPtr = buf;
1424
1425                     res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1426                                 (char**)&wbufPtr, &outsz);
1427
1428                     if (ICONV_FAILED(res, insz))
1429                     {
1430                         wxLogLastError(wxT("iconv"));
1431                         wxLogError(_("Conversion to charset '%s' doesn't work."),
1432                                    nameCS.c_str());
1433                     }
1434                     else // ok, can convert to this encoding, remember it
1435                     {
1436                         ms_wcCharsetName = nameCS;
1437                         ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1438                     }
1439                 }
1440             }
1441             else // use charset not requiring byte swapping
1442             {
1443                 ms_wcCharsetName = nameXE;
1444             }
1445         }
1446
1447         wxLogTrace(TRACE_STRCONV,
1448                    wxT("iconv wchar_t charset is \"%s\"%s"),
1449                    ms_wcCharsetName.empty() ? _T("<none>")
1450                                             : ms_wcCharsetName.c_str(),
1451                    ms_wcNeedsSwap ? _T(" (needs swap)")
1452                                   : _T(""));
1453     }
1454     else // we already have ms_wcCharsetName
1455     {
1456         m2w = iconv_open(ms_wcCharsetName.ToAscii(), cname);
1457     }
1458
1459     if ( ms_wcCharsetName.empty() )
1460     {
1461         w2m = ICONV_T_INVALID;
1462     }
1463     else
1464     {
1465         w2m = iconv_open(cname, ms_wcCharsetName.ToAscii());
1466         if ( w2m == ICONV_T_INVALID )
1467         {
1468             wxLogTrace(TRACE_STRCONV,
1469                        wxT("\"%s\" -> \"%s\" works but not the converse!?"),
1470                        ms_wcCharsetName.c_str(), cname.data());
1471         }
1472     }
1473 }
1474
1475 wxMBConv_iconv::~wxMBConv_iconv()
1476 {
1477     if ( m2w != ICONV_T_INVALID )
1478         iconv_close(m2w);
1479     if ( w2m != ICONV_T_INVALID )
1480         iconv_close(w2m);
1481 }
1482
1483 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1484 {
1485 #if wxUSE_THREADS
1486     // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1487     //     Unfortunately there is a couple of global wxCSConv objects such as
1488     //     wxConvLocal that are used all over wx code, so we have to make sure
1489     //     the handle is used by at most one thread at the time. Otherwise
1490     //     only a few wx classes would be safe to use from non-main threads
1491     //     as MB<->WC conversion would fail "randomly".
1492     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1493 #endif
1494
1495     size_t inbuf = strlen(psz);
1496     size_t outbuf = n * SIZEOF_WCHAR_T;
1497     size_t res, cres;
1498     // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1499     wchar_t *bufPtr = buf;
1500     const char *pszPtr = psz;
1501
1502     if (buf)
1503     {
1504         // have destination buffer, convert there
1505         cres = iconv(m2w,
1506                      ICONV_CHAR_CAST(&pszPtr), &inbuf,
1507                      (char**)&bufPtr, &outbuf);
1508         res = n - (outbuf / SIZEOF_WCHAR_T);
1509
1510         if (ms_wcNeedsSwap)
1511         {
1512             // convert to native endianness
1513             for ( unsigned i = 0; i < res; i++ )
1514                 buf[n] = WC_BSWAP(buf[i]);
1515         }
1516
1517         // NB: iconv was given only strlen(psz) characters on input, and so
1518         //     it couldn't convert the trailing zero. Let's do it ourselves
1519         //     if there's some room left for it in the output buffer.
1520         if (res < n)
1521             buf[res] = 0;
1522     }
1523     else
1524     {
1525         // no destination buffer... convert using temp buffer
1526         // to calculate destination buffer requirement
1527         wchar_t tbuf[8];
1528         res = 0;
1529         do {
1530             bufPtr = tbuf;
1531             outbuf = 8*SIZEOF_WCHAR_T;
1532
1533             cres = iconv(m2w,
1534                          ICONV_CHAR_CAST(&pszPtr), &inbuf,
1535                          (char**)&bufPtr, &outbuf );
1536
1537             res += 8-(outbuf/SIZEOF_WCHAR_T);
1538         } while ((cres==(size_t)-1) && (errno==E2BIG));
1539     }
1540
1541     if (ICONV_FAILED(cres, inbuf))
1542     {
1543         //VS: it is ok if iconv fails, hence trace only
1544         wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1545         return (size_t)-1;
1546     }
1547
1548     return res;
1549 }
1550
1551 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1552 {
1553 #if wxUSE_THREADS
1554     // NB: explained in MB2WC
1555     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1556 #endif
1557
1558     size_t inlen = wxWcslen(psz);
1559     size_t inbuf = inlen * SIZEOF_WCHAR_T;
1560     size_t outbuf = n;
1561     size_t res, cres;
1562
1563     wchar_t *tmpbuf = 0;
1564
1565     if (ms_wcNeedsSwap)
1566     {
1567         // need to copy to temp buffer to switch endianness
1568         // (doing WC_BSWAP twice on the original buffer won't help, as it
1569         //  could be in read-only memory, or be accessed in some other thread)
1570         tmpbuf = (wchar_t *)malloc(inbuf + SIZEOF_WCHAR_T);
1571         for ( size_t i = 0; i < inlen; i++ )
1572             tmpbuf[n] = WC_BSWAP(psz[i]);
1573         tmpbuf[inlen] = L'\0';
1574         psz = tmpbuf;
1575     }
1576
1577     if (buf)
1578     {
1579         // have destination buffer, convert there
1580         cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1581
1582         res = n-outbuf;
1583
1584         // NB: iconv was given only wcslen(psz) characters on input, and so
1585         //     it couldn't convert the trailing zero. Let's do it ourselves
1586         //     if there's some room left for it in the output buffer.
1587         if (res < n)
1588             buf[0] = 0;
1589     }
1590     else
1591     {
1592         // no destination buffer... convert using temp buffer
1593         // to calculate destination buffer requirement
1594         char tbuf[16];
1595         res = 0;
1596         do {
1597             buf = tbuf; outbuf = 16;
1598
1599             cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1600
1601             res += 16 - outbuf;
1602         } while ((cres==(size_t)-1) && (errno==E2BIG));
1603     }
1604
1605     if (ms_wcNeedsSwap)
1606     {
1607         free(tmpbuf);
1608     }
1609
1610     if (ICONV_FAILED(cres, inbuf))
1611     {
1612         wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1613         return (size_t)-1;
1614     }
1615
1616     return res;
1617 }
1618
1619 #endif // HAVE_ICONV
1620
1621
1622 // ============================================================================
1623 // Win32 conversion classes
1624 // ============================================================================
1625
1626 #ifdef wxHAVE_WIN32_MB2WC
1627
1628 // from utils.cpp
1629 #if wxUSE_FONTMAP
1630 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1631 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1632 #endif
1633
1634 class wxMBConv_win32 : public wxMBConv
1635 {
1636 public:
1637     wxMBConv_win32()
1638     {
1639         m_CodePage = CP_ACP;
1640     }
1641
1642 #if wxUSE_FONTMAP
1643     wxMBConv_win32(const wxChar* name)
1644     {
1645         m_CodePage = wxCharsetToCodepage(name);
1646     }
1647
1648     wxMBConv_win32(wxFontEncoding encoding)
1649     {
1650         m_CodePage = wxEncodingToCodepage(encoding);
1651     }
1652 #endif
1653
1654     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1655     {
1656         // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1657         // the behaviour is not compatible with the Unix version (using iconv)
1658         // and break the library itself, e.g. wxTextInputStream::NextChar()
1659         // wouldn't work if reading an incomplete MB char didn't result in an
1660         // error
1661         //
1662         // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1663         // an error (tested under Windows Server 2003) and apparently it is
1664         // done on purpose, i.e. the function accepts any input in this case
1665         // and although I'd prefer to return error on ill-formed output, our
1666         // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1667         // explicitly ill-formed according to RFC 2152) neither so we don't
1668         // even have any fallback here...
1669         //
1670         // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or
1671         // Win XP or newer and if it is specified on older versions, conversion
1672         // from CP_UTF8 (which can have flags only 0 or MB_ERR_INVALID_CHARS)
1673         // fails. So we can only use the flag on newer Windows versions.
1674         // Additionally, the flag is not supported by UTF7, symbol and CJK
1675         // encodings. See here:
1676         //     http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx
1677         //     http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp
1678         int flags = 0;
1679         if ( m_CodePage != CP_UTF7 && m_CodePage != CP_SYMBOL &&
1680              m_CodePage < 50000 &&
1681              IsAtLeastWin2kSP4() )
1682         {
1683             flags = MB_ERR_INVALID_CHARS;
1684         }
1685         else if ( m_CodePage == CP_UTF8 )
1686         {
1687             // Avoid round-trip in the special case of UTF-8 by using our
1688             // own UTF-8 conversion code:
1689             return wxMBConvUTF8().MB2WC(buf, psz, n);
1690         }
1691
1692         const size_t len = ::MultiByteToWideChar
1693                              (
1694                                 m_CodePage,     // code page
1695                                 flags,          // flags: fall on error
1696                                 psz,            // input string
1697                                 -1,             // its length (NUL-terminated)
1698                                 buf,            // output string
1699                                 buf ? n : 0     // size of output buffer
1700                              );
1701         if ( !len )
1702         {
1703             // function totally failed
1704             return (size_t)-1;
1705         }
1706
1707         // if we were really converting and didn't use MB_ERR_INVALID_CHARS,
1708         // check if we succeeded, by doing a double trip:
1709         if ( !flags && buf )
1710         {
1711             const size_t mbLen = strlen(psz);
1712             wxCharBuffer mbBuf(mbLen);
1713             if ( ::WideCharToMultiByte
1714                    (
1715                       m_CodePage,
1716                       0,
1717                       buf,
1718                       -1,
1719                       mbBuf.data(),
1720                       mbLen + 1,        // size in bytes, not length
1721                       NULL,
1722                       NULL
1723                    ) == 0 ||
1724                   strcmp(mbBuf, psz) != 0 )
1725             {
1726                 // we didn't obtain the same thing we started from, hence
1727                 // the conversion was lossy and we consider that it failed
1728                 return (size_t)-1;
1729             }
1730         }
1731
1732         // note that it returns count of written chars for buf != NULL and size
1733         // of the needed buffer for buf == NULL so in either case the length of
1734         // the string (which never includes the terminating NUL) is one less
1735         return len - 1;
1736     }
1737
1738     size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
1739     {
1740         /*
1741             we have a problem here: by default, WideCharToMultiByte() may
1742             replace characters unrepresentable in the target code page with bad
1743             quality approximations such as turning "1/2" symbol (U+00BD) into
1744             "1" for the code pages which don't have it and we, obviously, want
1745             to avoid this at any price
1746
1747             the trouble is that this function does it _silently_, i.e. it won't
1748             even tell us whether it did or not... Win98/2000 and higher provide
1749             WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1750             we have to resort to a round trip, i.e. check that converting back
1751             results in the same string -- this is, of course, expensive but
1752             otherwise we simply can't be sure to not garble the data.
1753          */
1754
1755         // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1756         // it doesn't work with CJK encodings (which we test for rather roughly
1757         // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1758         // supporting it
1759         BOOL usedDef wxDUMMY_INITIALIZE(false);
1760         BOOL *pUsedDef;
1761         int flags;
1762         if ( CanUseNoBestFit() && m_CodePage < 50000 )
1763         {
1764             // it's our lucky day
1765             flags = WC_NO_BEST_FIT_CHARS;
1766             pUsedDef = &usedDef;
1767         }
1768         else // old system or unsupported encoding
1769         {
1770             flags = 0;
1771             pUsedDef = NULL;
1772         }
1773
1774         const size_t len = ::WideCharToMultiByte
1775                              (
1776                                 m_CodePage,     // code page
1777                                 flags,          // either none or no best fit
1778                                 pwz,            // input string
1779                                 -1,             // it is (wide) NUL-terminated
1780                                 buf,            // output buffer
1781                                 buf ? n : 0,    // and its size
1782                                 NULL,           // default "replacement" char
1783                                 pUsedDef        // [out] was it used?
1784                              );
1785
1786         if ( !len )
1787         {
1788             // function totally failed
1789             return (size_t)-1;
1790         }
1791
1792         // if we were really converting, check if we succeeded
1793         if ( buf )
1794         {
1795             if ( flags )
1796             {
1797                 // check if the conversion failed, i.e. if any replacements
1798                 // were done
1799                 if ( usedDef )
1800                     return (size_t)-1;
1801             }
1802             else // we must resort to double tripping...
1803             {
1804                 wxWCharBuffer wcBuf(n);
1805                 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1806                         wcscmp(wcBuf, pwz) != 0 )
1807                 {
1808                     // we didn't obtain the same thing we started from, hence
1809                     // the conversion was lossy and we consider that it failed
1810                     return (size_t)-1;
1811                 }
1812             }
1813         }
1814
1815         // see the comment above for the reason of "len - 1"
1816         return len - 1;
1817     }
1818
1819     bool IsOk() const { return m_CodePage != -1; }
1820
1821 private:
1822     static bool CanUseNoBestFit()
1823     {
1824         static int s_isWin98Or2k = -1;
1825
1826         if ( s_isWin98Or2k == -1 )
1827         {
1828             int verMaj, verMin;
1829             switch ( wxGetOsVersion(&verMaj, &verMin) )
1830             {
1831                 case wxWIN95:
1832                     s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1833                     break;
1834
1835                 case wxWINDOWS_NT:
1836                     s_isWin98Or2k = verMaj >= 5;
1837                     break;
1838
1839                 default:
1840                     // unknown, be conseravtive by default
1841                     s_isWin98Or2k = 0;
1842             }
1843
1844             wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1845         }
1846
1847         return s_isWin98Or2k == 1;
1848     }
1849
1850     static bool IsAtLeastWin2kSP4()
1851     {
1852 #ifdef __WXWINCE__
1853         return false;
1854 #else
1855         static int s_isAtLeastWin2kSP4 = -1;
1856
1857         if ( s_isAtLeastWin2kSP4 == -1 )
1858         {
1859             OSVERSIONINFOEX ver;
1860
1861             memset(&ver, 0, sizeof(ver));
1862             ver.dwOSVersionInfoSize = sizeof(ver);
1863             GetVersionEx((OSVERSIONINFO*)&ver);
1864
1865             s_isAtLeastWin2kSP4 =
1866               ((ver.dwMajorVersion > 5) || // Vista+
1867                (ver.dwMajorVersion == 5 && ver.dwMinorVersion > 0) || // XP/2003
1868                (ver.dwMajorVersion == 5 && ver.dwMinorVersion == 0 &&
1869                ver.wServicePackMajor >= 4)) // 2000 SP4+
1870               ? 1 : 0;
1871         }
1872
1873         return s_isAtLeastWin2kSP4 == 1;
1874 #endif
1875     }
1876
1877     long m_CodePage;
1878 };
1879
1880 #endif // wxHAVE_WIN32_MB2WC
1881
1882 // ============================================================================
1883 // Cocoa conversion classes
1884 // ============================================================================
1885
1886 #if defined(__WXCOCOA__)
1887
1888 // RN:  There is no UTF-32 support in either Core Foundation or
1889 // Cocoa.  Strangely enough, internally Core Foundation uses
1890 // UTF 32 internally quite a bit - its just not public (yet).
1891
1892 #include <CoreFoundation/CFString.h>
1893 #include <CoreFoundation/CFStringEncodingExt.h>
1894
1895 CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
1896 {
1897     CFStringEncoding enc = kCFStringEncodingInvalidId ;
1898     if ( encoding == wxFONTENCODING_DEFAULT )
1899     {
1900         enc = CFStringGetSystemEncoding();
1901     }
1902     else switch( encoding)
1903     {
1904         case wxFONTENCODING_ISO8859_1 :
1905             enc = kCFStringEncodingISOLatin1 ;
1906             break ;
1907         case wxFONTENCODING_ISO8859_2 :
1908             enc = kCFStringEncodingISOLatin2;
1909             break ;
1910         case wxFONTENCODING_ISO8859_3 :
1911             enc = kCFStringEncodingISOLatin3 ;
1912             break ;
1913         case wxFONTENCODING_ISO8859_4 :
1914             enc = kCFStringEncodingISOLatin4;
1915             break ;
1916         case wxFONTENCODING_ISO8859_5 :
1917             enc = kCFStringEncodingISOLatinCyrillic;
1918             break ;
1919         case wxFONTENCODING_ISO8859_6 :
1920             enc = kCFStringEncodingISOLatinArabic;
1921             break ;
1922         case wxFONTENCODING_ISO8859_7 :
1923             enc = kCFStringEncodingISOLatinGreek;
1924             break ;
1925         case wxFONTENCODING_ISO8859_8 :
1926             enc = kCFStringEncodingISOLatinHebrew;
1927             break ;
1928         case wxFONTENCODING_ISO8859_9 :
1929             enc = kCFStringEncodingISOLatin5;
1930             break ;
1931         case wxFONTENCODING_ISO8859_10 :
1932             enc = kCFStringEncodingISOLatin6;
1933             break ;
1934         case wxFONTENCODING_ISO8859_11 :
1935             enc = kCFStringEncodingISOLatinThai;
1936             break ;
1937         case wxFONTENCODING_ISO8859_13 :
1938             enc = kCFStringEncodingISOLatin7;
1939             break ;
1940         case wxFONTENCODING_ISO8859_14 :
1941             enc = kCFStringEncodingISOLatin8;
1942             break ;
1943         case wxFONTENCODING_ISO8859_15 :
1944             enc = kCFStringEncodingISOLatin9;
1945             break ;
1946
1947         case wxFONTENCODING_KOI8 :
1948             enc = kCFStringEncodingKOI8_R;
1949             break ;
1950         case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
1951             enc = kCFStringEncodingDOSRussian;
1952             break ;
1953
1954 //      case wxFONTENCODING_BULGARIAN :
1955 //          enc = ;
1956 //          break ;
1957
1958         case wxFONTENCODING_CP437 :
1959             enc =kCFStringEncodingDOSLatinUS ;
1960             break ;
1961         case wxFONTENCODING_CP850 :
1962             enc = kCFStringEncodingDOSLatin1;
1963             break ;
1964         case wxFONTENCODING_CP852 :
1965             enc = kCFStringEncodingDOSLatin2;
1966             break ;
1967         case wxFONTENCODING_CP855 :
1968             enc = kCFStringEncodingDOSCyrillic;
1969             break ;
1970         case wxFONTENCODING_CP866 :
1971             enc =kCFStringEncodingDOSRussian ;
1972             break ;
1973         case wxFONTENCODING_CP874 :
1974             enc = kCFStringEncodingDOSThai;
1975             break ;
1976         case wxFONTENCODING_CP932 :
1977             enc = kCFStringEncodingDOSJapanese;
1978             break ;
1979         case wxFONTENCODING_CP936 :
1980             enc =kCFStringEncodingDOSChineseSimplif ;
1981             break ;
1982         case wxFONTENCODING_CP949 :
1983             enc = kCFStringEncodingDOSKorean;
1984             break ;
1985         case wxFONTENCODING_CP950 :
1986             enc = kCFStringEncodingDOSChineseTrad;
1987             break ;
1988         case wxFONTENCODING_CP1250 :
1989             enc = kCFStringEncodingWindowsLatin2;
1990             break ;
1991         case wxFONTENCODING_CP1251 :
1992             enc =kCFStringEncodingWindowsCyrillic ;
1993             break ;
1994         case wxFONTENCODING_CP1252 :
1995             enc =kCFStringEncodingWindowsLatin1 ;
1996             break ;
1997         case wxFONTENCODING_CP1253 :
1998             enc = kCFStringEncodingWindowsGreek;
1999             break ;
2000         case wxFONTENCODING_CP1254 :
2001             enc = kCFStringEncodingWindowsLatin5;
2002             break ;
2003         case wxFONTENCODING_CP1255 :
2004             enc =kCFStringEncodingWindowsHebrew ;
2005             break ;
2006         case wxFONTENCODING_CP1256 :
2007             enc =kCFStringEncodingWindowsArabic ;
2008             break ;
2009         case wxFONTENCODING_CP1257 :
2010             enc = kCFStringEncodingWindowsBalticRim;
2011             break ;
2012 //   This only really encodes to UTF7 (if that) evidently
2013 //        case wxFONTENCODING_UTF7 :
2014 //            enc = kCFStringEncodingNonLossyASCII ;
2015 //            break ;
2016         case wxFONTENCODING_UTF8 :
2017             enc = kCFStringEncodingUTF8 ;
2018             break ;
2019         case wxFONTENCODING_EUC_JP :
2020             enc = kCFStringEncodingEUC_JP;
2021             break ;
2022         case wxFONTENCODING_UTF16 :
2023             enc = kCFStringEncodingUnicode ;
2024             break ;
2025         case wxFONTENCODING_MACROMAN :
2026             enc = kCFStringEncodingMacRoman ;
2027             break ;
2028         case wxFONTENCODING_MACJAPANESE :
2029             enc = kCFStringEncodingMacJapanese ;
2030             break ;
2031         case wxFONTENCODING_MACCHINESETRAD :
2032             enc = kCFStringEncodingMacChineseTrad ;
2033             break ;
2034         case wxFONTENCODING_MACKOREAN :
2035             enc = kCFStringEncodingMacKorean ;
2036             break ;
2037         case wxFONTENCODING_MACARABIC :
2038             enc = kCFStringEncodingMacArabic ;
2039             break ;
2040         case wxFONTENCODING_MACHEBREW :
2041             enc = kCFStringEncodingMacHebrew ;
2042             break ;
2043         case wxFONTENCODING_MACGREEK :
2044             enc = kCFStringEncodingMacGreek ;
2045             break ;
2046         case wxFONTENCODING_MACCYRILLIC :
2047             enc = kCFStringEncodingMacCyrillic ;
2048             break ;
2049         case wxFONTENCODING_MACDEVANAGARI :
2050             enc = kCFStringEncodingMacDevanagari ;
2051             break ;
2052         case wxFONTENCODING_MACGURMUKHI :
2053             enc = kCFStringEncodingMacGurmukhi ;
2054             break ;
2055         case wxFONTENCODING_MACGUJARATI :
2056             enc = kCFStringEncodingMacGujarati ;
2057             break ;
2058         case wxFONTENCODING_MACORIYA :
2059             enc = kCFStringEncodingMacOriya ;
2060             break ;
2061         case wxFONTENCODING_MACBENGALI :
2062             enc = kCFStringEncodingMacBengali ;
2063             break ;
2064         case wxFONTENCODING_MACTAMIL :
2065             enc = kCFStringEncodingMacTamil ;
2066             break ;
2067         case wxFONTENCODING_MACTELUGU :
2068             enc = kCFStringEncodingMacTelugu ;
2069             break ;
2070         case wxFONTENCODING_MACKANNADA :
2071             enc = kCFStringEncodingMacKannada ;
2072             break ;
2073         case wxFONTENCODING_MACMALAJALAM :
2074             enc = kCFStringEncodingMacMalayalam ;
2075             break ;
2076         case wxFONTENCODING_MACSINHALESE :
2077             enc = kCFStringEncodingMacSinhalese ;
2078             break ;
2079         case wxFONTENCODING_MACBURMESE :
2080             enc = kCFStringEncodingMacBurmese ;
2081             break ;
2082         case wxFONTENCODING_MACKHMER :
2083             enc = kCFStringEncodingMacKhmer ;
2084             break ;
2085         case wxFONTENCODING_MACTHAI :
2086             enc = kCFStringEncodingMacThai ;
2087             break ;
2088         case wxFONTENCODING_MACLAOTIAN :
2089             enc = kCFStringEncodingMacLaotian ;
2090             break ;
2091         case wxFONTENCODING_MACGEORGIAN :
2092             enc = kCFStringEncodingMacGeorgian ;
2093             break ;
2094         case wxFONTENCODING_MACARMENIAN :
2095             enc = kCFStringEncodingMacArmenian ;
2096             break ;
2097         case wxFONTENCODING_MACCHINESESIMP :
2098             enc = kCFStringEncodingMacChineseSimp ;
2099             break ;
2100         case wxFONTENCODING_MACTIBETAN :
2101             enc = kCFStringEncodingMacTibetan ;
2102             break ;
2103         case wxFONTENCODING_MACMONGOLIAN :
2104             enc = kCFStringEncodingMacMongolian ;
2105             break ;
2106         case wxFONTENCODING_MACETHIOPIC :
2107             enc = kCFStringEncodingMacEthiopic ;
2108             break ;
2109         case wxFONTENCODING_MACCENTRALEUR :
2110             enc = kCFStringEncodingMacCentralEurRoman ;
2111             break ;
2112         case wxFONTENCODING_MACVIATNAMESE :
2113             enc = kCFStringEncodingMacVietnamese ;
2114             break ;
2115         case wxFONTENCODING_MACARABICEXT :
2116             enc = kCFStringEncodingMacExtArabic ;
2117             break ;
2118         case wxFONTENCODING_MACSYMBOL :
2119             enc = kCFStringEncodingMacSymbol ;
2120             break ;
2121         case wxFONTENCODING_MACDINGBATS :
2122             enc = kCFStringEncodingMacDingbats ;
2123             break ;
2124         case wxFONTENCODING_MACTURKISH :
2125             enc = kCFStringEncodingMacTurkish ;
2126             break ;
2127         case wxFONTENCODING_MACCROATIAN :
2128             enc = kCFStringEncodingMacCroatian ;
2129             break ;
2130         case wxFONTENCODING_MACICELANDIC :
2131             enc = kCFStringEncodingMacIcelandic ;
2132             break ;
2133         case wxFONTENCODING_MACROMANIAN :
2134             enc = kCFStringEncodingMacRomanian ;
2135             break ;
2136         case wxFONTENCODING_MACCELTIC :
2137             enc = kCFStringEncodingMacCeltic ;
2138             break ;
2139         case wxFONTENCODING_MACGAELIC :
2140             enc = kCFStringEncodingMacGaelic ;
2141             break ;
2142 //      case wxFONTENCODING_MACKEYBOARD :
2143 //          enc = kCFStringEncodingMacKeyboardGlyphs ;
2144 //          break ;
2145         default :
2146             // because gcc is picky
2147             break ;
2148     } ;
2149     return enc ;
2150 }
2151
2152 class wxMBConv_cocoa : public wxMBConv
2153 {
2154 public:
2155     wxMBConv_cocoa()
2156     {
2157         Init(CFStringGetSystemEncoding()) ;
2158     }
2159
2160 #if wxUSE_FONTMAP
2161     wxMBConv_cocoa(const wxChar* name)
2162     {
2163         Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2164     }
2165 #endif
2166
2167     wxMBConv_cocoa(wxFontEncoding encoding)
2168     {
2169         Init( wxCFStringEncFromFontEnc(encoding) );
2170     }
2171
2172     ~wxMBConv_cocoa()
2173     {
2174     }
2175
2176     void Init( CFStringEncoding encoding)
2177     {
2178         m_encoding = encoding ;
2179     }
2180
2181     size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2182     {
2183         wxASSERT(szUnConv);
2184
2185         CFStringRef theString = CFStringCreateWithBytes (
2186                                                 NULL, //the allocator
2187                                                 (const UInt8*)szUnConv,
2188                                                 strlen(szUnConv),
2189                                                 m_encoding,
2190                                                 false //no BOM/external representation
2191                                                 );
2192
2193         wxASSERT(theString);
2194
2195         size_t nOutLength = CFStringGetLength(theString);
2196
2197         if (szOut == NULL)
2198         {
2199             CFRelease(theString);
2200             return nOutLength;
2201         }
2202
2203         CFRange theRange = { 0, nOutSize };
2204
2205 #if SIZEOF_WCHAR_T == 4
2206         UniChar* szUniCharBuffer = new UniChar[nOutSize];
2207 #endif
2208
2209         CFStringGetCharacters(theString, theRange, szUniCharBuffer);
2210
2211         CFRelease(theString);
2212
2213         szUniCharBuffer[nOutLength] = '\0' ;
2214
2215 #if SIZEOF_WCHAR_T == 4
2216         wxMBConvUTF16 converter ;
2217         converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
2218         delete[] szUniCharBuffer;
2219 #endif
2220
2221         return nOutLength;
2222     }
2223
2224     size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2225     {
2226         wxASSERT(szUnConv);
2227
2228         size_t nRealOutSize;
2229         size_t nBufSize = wxWcslen(szUnConv);
2230         UniChar* szUniBuffer = (UniChar*) szUnConv;
2231
2232 #if SIZEOF_WCHAR_T == 4
2233         wxMBConvUTF16 converter ;
2234         nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
2235         szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
2236         converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
2237         nBufSize /= sizeof(UniChar);
2238 #endif
2239
2240         CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2241                                 NULL, //allocator
2242                                 szUniBuffer,
2243                                 nBufSize,
2244                                 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
2245                             );
2246
2247         wxASSERT(theString);
2248
2249         //Note that CER puts a BOM when converting to unicode
2250         //so we  check and use getchars instead in that case
2251         if (m_encoding == kCFStringEncodingUnicode)
2252         {
2253             if (szOut != NULL)
2254                 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
2255
2256             nRealOutSize = CFStringGetLength(theString) + 1;
2257         }
2258         else
2259         {
2260             CFStringGetBytes(
2261                 theString,
2262                 CFRangeMake(0, CFStringGetLength(theString)),
2263                 m_encoding,
2264                 0, //what to put in characters that can't be converted -
2265                     //0 tells CFString to return NULL if it meets such a character
2266                 false, //not an external representation
2267                 (UInt8*) szOut,
2268                 nOutSize,
2269                 (CFIndex*) &nRealOutSize
2270                         );
2271         }
2272
2273         CFRelease(theString);
2274
2275 #if SIZEOF_WCHAR_T == 4
2276         delete[] szUniBuffer;
2277 #endif
2278
2279         return  nRealOutSize - 1;
2280     }
2281
2282     bool IsOk() const
2283     {
2284         return m_encoding != kCFStringEncodingInvalidId &&
2285               CFStringIsEncodingAvailable(m_encoding);
2286     }
2287
2288 private:
2289     CFStringEncoding m_encoding ;
2290 };
2291
2292 #endif // defined(__WXCOCOA__)
2293
2294 // ============================================================================
2295 // Mac conversion classes
2296 // ============================================================================
2297
2298 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2299
2300 class wxMBConv_mac : public wxMBConv
2301 {
2302 public:
2303     wxMBConv_mac()
2304     {
2305         Init(CFStringGetSystemEncoding()) ;
2306     }
2307
2308 #if wxUSE_FONTMAP
2309     wxMBConv_mac(const wxChar* name)
2310     {
2311         Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2312     }
2313 #endif
2314
2315     wxMBConv_mac(wxFontEncoding encoding)
2316     {
2317         Init( wxMacGetSystemEncFromFontEnc(encoding) );
2318     }
2319
2320     ~wxMBConv_mac()
2321     {
2322         OSStatus status = noErr ;
2323         status = TECDisposeConverter(m_MB2WC_converter);
2324         status = TECDisposeConverter(m_WC2MB_converter);
2325     }
2326
2327
2328     void Init( TextEncodingBase encoding)
2329     {
2330         OSStatus status = noErr ;
2331         m_char_encoding = encoding ;
2332         m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2333
2334         status = TECCreateConverter(&m_MB2WC_converter,
2335                                     m_char_encoding,
2336                                     m_unicode_encoding);
2337         status = TECCreateConverter(&m_WC2MB_converter,
2338                                     m_unicode_encoding,
2339                                     m_char_encoding);
2340     }
2341
2342     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2343     {
2344         OSStatus status = noErr ;
2345         ByteCount byteOutLen ;
2346         ByteCount byteInLen = strlen(psz) ;
2347         wchar_t *tbuf = NULL ;
2348         UniChar* ubuf = NULL ;
2349         size_t res = 0 ;
2350
2351         if (buf == NULL)
2352         {
2353             //apple specs say at least 32
2354             n = wxMax( 32 , byteInLen ) ;
2355             tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2356         }
2357         ByteCount byteBufferLen = n * sizeof( UniChar ) ;
2358 #if SIZEOF_WCHAR_T == 4
2359         ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
2360 #else
2361         ubuf = (UniChar*) (buf ? buf : tbuf) ;
2362 #endif
2363         status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2364           (TextPtr) ubuf , byteBufferLen, &byteOutLen);
2365 #if SIZEOF_WCHAR_T == 4
2366         // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2367         // is not properly terminated we get random characters at the end
2368         ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
2369         wxMBConvUTF16 converter ;
2370         res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2371         free( ubuf ) ;
2372 #else
2373         res = byteOutLen / sizeof( UniChar ) ;
2374 #endif
2375         if ( buf == NULL )
2376              free(tbuf) ;
2377
2378         if ( buf  && res < n)
2379             buf[res] = 0;
2380
2381         return res ;
2382     }
2383
2384     size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2385     {
2386         OSStatus status = noErr ;
2387         ByteCount byteOutLen ;
2388         ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2389
2390         char *tbuf = NULL ;
2391
2392         if (buf == NULL)
2393         {
2394             //apple specs say at least 32
2395             n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2396             tbuf = (char*) malloc( n ) ;
2397         }
2398
2399         ByteCount byteBufferLen = n ;
2400         UniChar* ubuf = NULL ;
2401 #if SIZEOF_WCHAR_T == 4
2402         wxMBConvUTF16 converter ;
2403         size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2404         byteInLen = unicharlen ;
2405         ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2406         converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
2407 #else
2408         ubuf = (UniChar*) psz ;
2409 #endif
2410         status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2411             (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
2412 #if SIZEOF_WCHAR_T == 4
2413         free( ubuf ) ;
2414 #endif
2415         if ( buf == NULL )
2416             free(tbuf) ;
2417
2418         size_t res = byteOutLen ;
2419         if ( buf  && res < n)
2420         {
2421             buf[res] = 0;
2422
2423             //we need to double-trip to verify it didn't insert any ? in place
2424             //of bogus characters
2425             wxWCharBuffer wcBuf(n);
2426             size_t pszlen = wxWcslen(psz);
2427             if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
2428                         wxWcslen(wcBuf) != pszlen ||
2429                         memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2430             {
2431                 // we didn't obtain the same thing we started from, hence
2432                 // the conversion was lossy and we consider that it failed
2433                 return (size_t)-1;
2434             }
2435         }
2436
2437         return res ;
2438     }
2439
2440     bool IsOk() const
2441         { return m_MB2WC_converter !=  NULL && m_WC2MB_converter != NULL  ; }
2442
2443 private:
2444     TECObjectRef m_MB2WC_converter ;
2445     TECObjectRef m_WC2MB_converter ;
2446
2447     TextEncodingBase m_char_encoding ;
2448     TextEncodingBase m_unicode_encoding ;
2449 };
2450
2451 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2452
2453 // ============================================================================
2454 // wxEncodingConverter based conversion classes
2455 // ============================================================================
2456
2457 #if wxUSE_FONTMAP
2458
2459 class wxMBConv_wxwin : public wxMBConv
2460 {
2461 private:
2462     void Init()
2463     {
2464         m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2465                w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2466     }
2467
2468 public:
2469     // temporarily just use wxEncodingConverter stuff,
2470     // so that it works while a better implementation is built
2471     wxMBConv_wxwin(const wxChar* name)
2472     {
2473         if (name)
2474             m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2475         else
2476             m_enc = wxFONTENCODING_SYSTEM;
2477
2478         Init();
2479     }
2480
2481     wxMBConv_wxwin(wxFontEncoding enc)
2482     {
2483         m_enc = enc;
2484
2485         Init();
2486     }
2487
2488     size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
2489     {
2490         size_t inbuf = strlen(psz);
2491         if (buf)
2492         {
2493             if (!m2w.Convert(psz,buf))
2494                 return (size_t)-1;
2495         }
2496         return inbuf;
2497     }
2498
2499     size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
2500     {
2501         const size_t inbuf = wxWcslen(psz);
2502         if (buf)
2503         {
2504             if (!w2m.Convert(psz,buf))
2505                 return (size_t)-1;
2506         }
2507
2508         return inbuf;
2509     }
2510
2511     bool IsOk() const { return m_ok; }
2512
2513 public:
2514     wxFontEncoding m_enc;
2515     wxEncodingConverter m2w, w2m;
2516
2517     // were we initialized successfully?
2518     bool m_ok;
2519
2520     DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
2521 };
2522
2523 // make the constructors available for unit testing
2524 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_wxwin( const wxChar* name )
2525 {
2526     wxMBConv_wxwin* result = new wxMBConv_wxwin( name );
2527     if ( !result->IsOk() )
2528     {
2529         delete result;
2530         return 0;
2531     }
2532     return result;
2533 }
2534
2535 #endif // wxUSE_FONTMAP
2536
2537 // ============================================================================
2538 // wxCSConv implementation
2539 // ============================================================================
2540
2541 void wxCSConv::Init()
2542 {
2543     m_name = NULL;
2544     m_convReal =  NULL;
2545     m_deferred = true;
2546 }
2547
2548 wxCSConv::wxCSConv(const wxChar *charset)
2549 {
2550     Init();
2551
2552     if ( charset )
2553     {
2554         SetName(charset);
2555     }
2556
2557 #if wxUSE_FONTMAP
2558     m_encoding = wxFontMapperBase::GetEncodingFromName(charset);
2559 #else
2560     m_encoding = wxFONTENCODING_SYSTEM;
2561 #endif
2562 }
2563
2564 wxCSConv::wxCSConv(wxFontEncoding encoding)
2565 {
2566     if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
2567     {
2568         wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2569
2570         encoding = wxFONTENCODING_SYSTEM;
2571     }
2572
2573     Init();
2574
2575     m_encoding = encoding;
2576 }
2577
2578 wxCSConv::~wxCSConv()
2579 {
2580     Clear();
2581 }
2582
2583 wxCSConv::wxCSConv(const wxCSConv& conv)
2584         : wxMBConv()
2585 {
2586     Init();
2587
2588     SetName(conv.m_name);
2589     m_encoding = conv.m_encoding;
2590 }
2591
2592 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2593 {
2594     Clear();
2595
2596     SetName(conv.m_name);
2597     m_encoding = conv.m_encoding;
2598
2599     return *this;
2600 }
2601
2602 void wxCSConv::Clear()
2603 {
2604     free(m_name);
2605     delete m_convReal;
2606
2607     m_name = NULL;
2608     m_convReal = NULL;
2609 }
2610
2611 void wxCSConv::SetName(const wxChar *charset)
2612 {
2613     if (charset)
2614     {
2615         m_name = wxStrdup(charset);
2616         m_deferred = true;
2617     }
2618 }
2619
2620 #if wxUSE_FONTMAP
2621 #include "wx/hashmap.h"
2622
2623 WX_DECLARE_HASH_MAP( wxFontEncoding, wxString, wxIntegerHash, wxIntegerEqual,
2624                      wxEncodingNameCache );
2625
2626 static wxEncodingNameCache gs_nameCache;
2627 #endif
2628
2629 wxMBConv *wxCSConv::DoCreate() const
2630 {
2631 #if wxUSE_FONTMAP
2632     wxLogTrace(TRACE_STRCONV,
2633                wxT("creating conversion for %s"),
2634                (m_name ? m_name
2635                        : wxFontMapperBase::GetEncodingName(m_encoding).c_str()));
2636 #endif // wxUSE_FONTMAP
2637
2638     // check for the special case of ASCII or ISO8859-1 charset: as we have
2639     // special knowledge of it anyhow, we don't need to create a special
2640     // conversion object
2641     if ( m_encoding == wxFONTENCODING_ISO8859_1 ||
2642             m_encoding == wxFONTENCODING_DEFAULT )
2643     {
2644         // don't convert at all
2645         return NULL;
2646     }
2647
2648     // we trust OS to do conversion better than we can so try external
2649     // conversion methods first
2650     //
2651     // the full order is:
2652     //      1. OS conversion (iconv() under Unix or Win32 API)
2653     //      2. hard coded conversions for UTF
2654     //      3. wxEncodingConverter as fall back
2655
2656     // step (1)
2657 #ifdef HAVE_ICONV
2658 #if !wxUSE_FONTMAP
2659     if ( m_name )
2660 #endif // !wxUSE_FONTMAP
2661     {
2662         wxString name(m_name);
2663         wxFontEncoding encoding(m_encoding);
2664
2665         if ( !name.empty() )
2666         {
2667             wxMBConv_iconv *conv = new wxMBConv_iconv(name);
2668             if ( conv->IsOk() )
2669                 return conv;
2670
2671             delete conv;
2672
2673 #if wxUSE_FONTMAP
2674             encoding =
2675                 wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2676 #endif // wxUSE_FONTMAP
2677         }
2678 #if wxUSE_FONTMAP
2679         {
2680             const wxEncodingNameCache::iterator it = gs_nameCache.find(encoding);
2681             if ( it != gs_nameCache.end() )
2682             {
2683                 if ( it->second.empty() )
2684                     return NULL;
2685
2686                 wxMBConv_iconv *conv = new wxMBConv_iconv(it->second);
2687                 if ( conv->IsOk() )
2688                     return conv;
2689
2690                 delete conv;
2691             }
2692
2693             const wxChar** names = wxFontMapperBase::GetAllEncodingNames(encoding);
2694
2695             for ( ; *names; ++names )
2696             {
2697                 wxMBConv_iconv *conv = new wxMBConv_iconv(*names);
2698                 if ( conv->IsOk() )
2699                 {
2700                     gs_nameCache[encoding] = *names;
2701                     return conv;
2702                 }
2703
2704                 delete conv;
2705             }
2706
2707             gs_nameCache[encoding] = _T(""); // cache the failure
2708         }
2709 #endif // wxUSE_FONTMAP
2710     }
2711 #endif // HAVE_ICONV
2712
2713 #ifdef wxHAVE_WIN32_MB2WC
2714     {
2715 #if wxUSE_FONTMAP
2716         wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2717                                       : new wxMBConv_win32(m_encoding);
2718         if ( conv->IsOk() )
2719             return conv;
2720
2721         delete conv;
2722 #else
2723         return NULL;
2724 #endif
2725     }
2726 #endif // wxHAVE_WIN32_MB2WC
2727 #if defined(__WXMAC__)
2728     {
2729         // leave UTF16 and UTF32 to the built-ins of wx
2730         if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
2731             ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
2732         {
2733
2734 #if wxUSE_FONTMAP
2735             wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2736                                         : new wxMBConv_mac(m_encoding);
2737 #else
2738             wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
2739 #endif
2740             if ( conv->IsOk() )
2741                  return conv;
2742
2743             delete conv;
2744         }
2745     }
2746 #endif
2747 #if defined(__WXCOCOA__)
2748     {
2749         if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2750         {
2751
2752 #if wxUSE_FONTMAP
2753             wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2754                                           : new wxMBConv_cocoa(m_encoding);
2755 #else
2756             wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
2757 #endif
2758             if ( conv->IsOk() )
2759                  return conv;
2760
2761             delete conv;
2762         }
2763     }
2764 #endif
2765     // step (2)
2766     wxFontEncoding enc = m_encoding;
2767 #if wxUSE_FONTMAP
2768     if ( enc == wxFONTENCODING_SYSTEM && m_name )
2769     {
2770         // use "false" to suppress interactive dialogs -- we can be called from
2771         // anywhere and popping up a dialog from here is the last thing we want to
2772         // do
2773         enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
2774     }
2775 #endif // wxUSE_FONTMAP
2776
2777     switch ( enc )
2778     {
2779         case wxFONTENCODING_UTF7:
2780              return new wxMBConvUTF7;
2781
2782         case wxFONTENCODING_UTF8:
2783              return new wxMBConvUTF8;
2784
2785         case wxFONTENCODING_UTF16BE:
2786              return new wxMBConvUTF16BE;
2787
2788         case wxFONTENCODING_UTF16LE:
2789              return new wxMBConvUTF16LE;
2790
2791         case wxFONTENCODING_UTF32BE:
2792              return new wxMBConvUTF32BE;
2793
2794         case wxFONTENCODING_UTF32LE:
2795              return new wxMBConvUTF32LE;
2796
2797         default:
2798              // nothing to do but put here to suppress gcc warnings
2799              ;
2800     }
2801
2802     // step (3)
2803 #if wxUSE_FONTMAP
2804     {
2805         wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2806                                       : new wxMBConv_wxwin(m_encoding);
2807         if ( conv->IsOk() )
2808             return conv;
2809
2810         delete conv;
2811     }
2812 #endif // wxUSE_FONTMAP
2813
2814     // NB: This is a hack to prevent deadlock. What could otherwise happen
2815     //     in Unicode build: wxConvLocal creation ends up being here
2816     //     because of some failure and logs the error. But wxLog will try to
2817     //     attach timestamp, for which it will need wxConvLocal (to convert
2818     //     time to char* and then wchar_t*), but that fails, tries to log
2819     //     error, but wxLog has a (already locked) critical section that
2820     //     guards static buffer.
2821     static bool alreadyLoggingError = false;
2822     if (!alreadyLoggingError)
2823     {
2824         alreadyLoggingError = true;
2825         wxLogError(_("Cannot convert from the charset '%s'!"),
2826                    m_name ? m_name
2827                       :
2828 #if wxUSE_FONTMAP
2829                          wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
2830 #else // !wxUSE_FONTMAP
2831                          wxString::Format(_("encoding %s"), m_encoding).c_str()
2832 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2833               );
2834         alreadyLoggingError = false;
2835     }
2836
2837     return NULL;
2838 }
2839
2840 void wxCSConv::CreateConvIfNeeded() const
2841 {
2842     if ( m_deferred )
2843     {
2844         wxCSConv *self = (wxCSConv *)this; // const_cast
2845
2846 #if wxUSE_INTL
2847         // if we don't have neither the name nor the encoding, use the default
2848         // encoding for this system
2849         if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2850         {
2851             self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
2852         }
2853 #endif // wxUSE_INTL
2854
2855         self->m_convReal = DoCreate();
2856         self->m_deferred = false;
2857     }
2858 }
2859
2860 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2861 {
2862     CreateConvIfNeeded();
2863
2864     if (m_convReal)
2865         return m_convReal->MB2WC(buf, psz, n);
2866
2867     // latin-1 (direct)
2868     size_t len = strlen(psz);
2869
2870     if (buf)
2871     {
2872         for (size_t c = 0; c <= len; c++)
2873             buf[c] = (unsigned char)(psz[c]);
2874     }
2875
2876     return len;
2877 }
2878
2879 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2880 {
2881     CreateConvIfNeeded();
2882
2883     if (m_convReal)
2884         return m_convReal->WC2MB(buf, psz, n);
2885
2886     // latin-1 (direct)
2887     const size_t len = wxWcslen(psz);
2888     if (buf)
2889     {
2890         for (size_t c = 0; c <= len; c++)
2891         {
2892             if (psz[c] > 0xFF)
2893                 return (size_t)-1;
2894             buf[c] = (char)psz[c];
2895         }
2896     }
2897     else
2898     {
2899         for (size_t c = 0; c <= len; c++)
2900         {
2901             if (psz[c] > 0xFF)
2902                 return (size_t)-1;
2903         }
2904     }
2905
2906     return len;
2907 }
2908
2909 // ----------------------------------------------------------------------------
2910 // globals
2911 // ----------------------------------------------------------------------------
2912
2913 #ifdef __WINDOWS__
2914     static wxMBConv_win32 wxConvLibcObj;
2915 #elif defined(__WXMAC__) && !defined(__MACH__)
2916     static wxMBConv_mac wxConvLibcObj ;
2917 #else
2918     static wxMBConvLibc wxConvLibcObj;
2919 #endif
2920
2921 static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
2922 static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
2923 static wxMBConvUTF7 wxConvUTF7Obj;
2924 static wxMBConvUTF8 wxConvUTF8Obj;
2925
2926 WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
2927 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
2928 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
2929 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
2930 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
2931 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
2932 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
2933 #ifdef __WXOSX__
2934                                     wxConvUTF8Obj;
2935 #else
2936                                     wxConvLibcObj;
2937 #endif
2938
2939
2940 #else // !wxUSE_WCHAR_T
2941
2942 // stand-ins in absence of wchar_t
2943 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
2944                                 wxConvISO8859_1,
2945                                 wxConvLocal,
2946                                 wxConvUTF8;
2947
2948 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T