src/common/strconv.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/common/strconv.cpp
   3 // Purpose:     Unicode conversion classes
   4 // Author:      Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
   5 //              Ryan Norton, Fredrik Roubert (UTF7)
   6 // Modified by:
   7 // Created:     29/01/98
   8 // RCS-ID:      $Id$
   9 // Copyright:   (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
  10 //              (c) 2000-2003 Vadim Zeitlin
  11 //              (c) 2004 Ryan Norton, Fredrik Roubert
  12 // Licence:     wxWindows licence
  13 /////////////////////////////////////////////////////////////////////////////
  14
  15 // ============================================================================
  16 // declarations
  17 // ============================================================================
  18
  19 // ----------------------------------------------------------------------------
  20 // headers
  21 // ----------------------------------------------------------------------------
  22
  23 // For compilers that support precompilation, includes "wx.h".
  24 #include "wx/wxprec.h"
  25
  26 #ifdef __BORLANDC__
  27   #pragma hdrstop
  28 #endif
  29
  30 #ifndef WX_PRECOMP
  31     #include "wx/intl.h"
  32     #include "wx/log.h"
  33 #endif // WX_PRECOMP
  34
  35 #include "wx/strconv.h"
  36
  37 #if wxUSE_WCHAR_T
  38
  39 #ifdef __WINDOWS__
  40     #include "wx/msw/private.h"
  41     #include "wx/msw/missing.h"
  42 #endif
  43
  44 #ifndef __WXWINCE__
  45 #include <errno.h>
  46 #endif
  47
  48 #include <ctype.h>
  49 #include <string.h>
  50 #include <stdlib.h>
  51
  52 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
  53     #define wxHAVE_WIN32_MB2WC
  54 #endif // __WIN32__ but !__WXMICROWIN__
  55
  56 #ifdef __SALFORDC__
  57     #include <clib.h>
  58 #endif
  59
  60 #ifdef HAVE_ICONV
  61     #include <iconv.h>
  62     #include "wx/thread.h"
  63 #endif
  64
  65 #include "wx/encconv.h"
  66 #include "wx/fontmap.h"
  67 #include "wx/utils.h"
  68
  69 #ifdef __WXMAC__
  70 #ifndef __DARWIN__
  71 #include <ATSUnicode.h>
  72 #include <TextCommon.h>
  73 #include <TextEncodingConverter.h>
  74 #endif
  75
  76 #include  "wx/mac/private.h"  // includes mac headers
  77 #endif
  78
  79 #define TRACE_STRCONV _T("strconv")
  80
  81 #if SIZEOF_WCHAR_T == 2
  82     #define WC_UTF16
  83 #endif
  84
  85 // ============================================================================
  86 // implementation
  87 // ============================================================================
  88
  89 // ----------------------------------------------------------------------------
  90 // UTF-16 en/decoding to/from UCS-4
  91 // ----------------------------------------------------------------------------
  92
  93
  94 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
  95 {
  96     if (input<=0xffff)
  97     {
  98         if (output)
  99             *output = (wxUint16) input;
 100         return 1;
 101     }
 102     else if (input>=0x110000)
 103     {
 104         return (size_t)-1;
 105     }
 106     else
 107     {
 108         if (output)
 109         {
 110             *output++ = (wxUint16) ((input >> 10)+0xd7c0);
 111             *output = (wxUint16) ((input&0x3ff)+0xdc00);
 112         }
 113         return 2;
 114     }
 115 }
 116
 117 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
 118 {
 119     if ((*input<0xd800) || (*input>0xdfff))
 120     {
 121         output = *input;
 122         return 1;
 123     }
 124     else if ((input[1]<0xdc00) || (input[1]>0xdfff))
 125     {
 126         output = *input;
 127         return (size_t)-1;
 128     }
 129     else
 130     {
 131         output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
 132         return 2;
 133     }
 134 }
 135
 136
 137 // ----------------------------------------------------------------------------
 138 // wxMBConv
 139 // ----------------------------------------------------------------------------
 140
 141 wxMBConv::~wxMBConv()
 142 {
 143     // nothing to do here (necessary for Darwin linking probably)
 144 }
 145
 146 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
 147 {
 148     if ( psz )
 149     {
 150         // calculate the length of the buffer needed first
 151         size_t nLen = MB2WC(NULL, psz, 0);
 152         if ( nLen != (size_t)-1 )
 153         {
 154             // now do the actual conversion
 155             wxWCharBuffer buf(nLen);
 156             nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
 157             if ( nLen != (size_t)-1 )
 158             {
 159                 return buf;
 160             }
 161         }
 162     }
 163
 164     wxWCharBuffer buf((wchar_t *)NULL);
 165
 166     return buf;
 167 }
 168
 169 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
 170 {
 171     if ( pwz )
 172     {
 173         size_t nLen = WC2MB(NULL, pwz, 0);
 174         if ( nLen != (size_t)-1 )
 175         {
 176             wxCharBuffer buf(nLen+3);       // space for a wxUint32 trailing zero
 177             nLen = WC2MB(buf.data(), pwz, nLen + 4);
 178             if ( nLen != (size_t)-1 )
 179             {
 180                 return buf;
 181             }
 182         }
 183     }
 184
 185     wxCharBuffer buf((char *)NULL);
 186
 187     return buf;
 188 }
 189
 190 const wxWCharBuffer wxMBConv::cMB2WC(const char *szString, size_t nStringLen, size_t* pOutSize) const
 191 {
 192     wxASSERT(pOutSize != NULL);
 193
 194     const char* szEnd = szString + nStringLen + 1;
 195     const char* szPos = szString;
 196     const char* szStart = szPos;
 197
 198     size_t nActualLength = 0;
 199     size_t nCurrentSize = nStringLen; //try normal size first (should never resize?)
 200
 201     wxWCharBuffer theBuffer(nCurrentSize);
 202
 203     //Convert the string until the length() is reached, continuing the
 204     //loop every time a null character is reached
 205     while(szPos != szEnd)
 206     {
 207         wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
 208
 209         //Get the length of the current (sub)string
 210         size_t nLen = MB2WC(NULL, szPos, 0);
 211
 212         //Invalid conversion?
 213         if( nLen == (size_t)-1 )
 214         {
 215             *pOutSize = 0;
 216             theBuffer.data()[0u] = wxT('\0');
 217             return theBuffer;
 218         }
 219
 220
 221         //Increase the actual length (+1 for current null character)
 222         nActualLength += nLen + 1;
 223
 224         //if buffer too big, realloc the buffer
 225         if (nActualLength > (nCurrentSize+1))
 226         {
 227             wxWCharBuffer theNewBuffer(nCurrentSize << 1);
 228             memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize * sizeof(wchar_t));
 229             theBuffer = theNewBuffer;
 230             nCurrentSize <<= 1;
 231         }
 232
 233         //Convert the current (sub)string
 234         if ( MB2WC(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
 235         {
 236             *pOutSize = 0;
 237             theBuffer.data()[0u] = wxT('\0');
 238             return theBuffer;
 239         }
 240
 241         //Increment to next (sub)string
 242         //Note that we have to use strlen instead of nLen here
 243         //because XX2XX gives us the size of the output buffer,
 244         //which is not necessarily the length of the string
 245         szPos += strlen(szPos) + 1;
 246     }
 247
 248     //success - return actual length and the buffer
 249     *pOutSize = nActualLength;
 250     return theBuffer;
 251 }
 252
 253 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *szString, size_t nStringLen, size_t* pOutSize) const
 254 {
 255     wxASSERT(pOutSize != NULL);
 256
 257     const wchar_t* szEnd = szString + nStringLen + 1;
 258     const wchar_t* szPos = szString;
 259     const wchar_t* szStart = szPos;
 260
 261     size_t nActualLength = 0;
 262     size_t nCurrentSize = nStringLen << 2; //try * 4 first
 263
 264     wxCharBuffer theBuffer(nCurrentSize);
 265
 266     //Convert the string until the length() is reached, continuing the
 267     //loop every time a null character is reached
 268     while(szPos != szEnd)
 269     {
 270         wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
 271
 272         //Get the length of the current (sub)string
 273         size_t nLen = WC2MB(NULL, szPos, 0);
 274
 275         //Invalid conversion?
 276         if( nLen == (size_t)-1 )
 277         {
 278             *pOutSize = 0;
 279             theBuffer.data()[0u] = wxT('\0');
 280             return theBuffer;
 281         }
 282
 283         //Increase the actual length (+1 for current null character)
 284         nActualLength += nLen + 1;
 285
 286         //if buffer too big, realloc the buffer
 287         if (nActualLength > (nCurrentSize+1))
 288         {
 289             wxCharBuffer theNewBuffer(nCurrentSize << 1);
 290             memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize);
 291             theBuffer = theNewBuffer;
 292             nCurrentSize <<= 1;
 293         }
 294
 295         //Convert the current (sub)string
 296         if(WC2MB(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
 297         {
 298             *pOutSize = 0;
 299             theBuffer.data()[0u] = wxT('\0');
 300             return theBuffer;
 301         }
 302
 303         //Increment to next (sub)string
 304         //Note that we have to use wxWcslen instead of nLen here
 305         //because XX2XX gives us the size of the output buffer,
 306         //which is not necessarily the length of the string
 307         szPos += wxWcslen(szPos) + 1;
 308     }
 309
 310     //success - return actual length and the buffer
 311     *pOutSize = nActualLength;
 312     return theBuffer;
 313 }
 314
 315 // ----------------------------------------------------------------------------
 316 // wxMBConvLibc
 317 // ----------------------------------------------------------------------------
 318
 319 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 320 {
 321     return wxMB2WC(buf, psz, n);
 322 }
 323
 324 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 325 {
 326     return wxWC2MB(buf, psz, n);
 327 }
 328
 329 #ifdef __UNIX__
 330
 331 // ----------------------------------------------------------------------------
 332 // wxConvBrokenFileNames
 333 // ----------------------------------------------------------------------------
 334
 335 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar *charset)
 336 {
 337     if ( !charset || wxStricmp(charset, _T("UTF-8")) == 0
 338                   || wxStricmp(charset, _T("UTF8")) == 0  )
 339         m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
 340     else
 341         m_conv = new wxCSConv(charset);
 342 }
 343
 344 size_t
 345 wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf,
 346                              const char *psz,
 347                              size_t outputSize) const
 348 {
 349     return m_conv->MB2WC( outputBuf, psz, outputSize );
 350 }
 351
 352 size_t
 353 wxConvBrokenFileNames::WC2MB(char *outputBuf,
 354                              const wchar_t *psz,
 355                              size_t outputSize) const
 356 {
 357     return m_conv->WC2MB( outputBuf, psz, outputSize );
 358 }
 359
 360 #endif
 361
 362 // ----------------------------------------------------------------------------
 363 // UTF-7
 364 // ----------------------------------------------------------------------------
 365
 366 // Implementation (C) 2004 Fredrik Roubert
 367
 368 //
 369 // BASE64 decoding table
 370 //
 371 static const unsigned char utf7unb64[] =
 372 {
 373     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 374     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 375     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 376     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 377     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 378     0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
 379     0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
 380     0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 381     0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
 382     0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
 383     0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
 384     0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
 385     0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
 386     0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
 387     0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
 388     0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
 389     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 390     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 391     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 392     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 393     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 394     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 395     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 396     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 397     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 398     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 399     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 400     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 401     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 402     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 403     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 404     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 405 };
 406
 407 size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 408 {
 409     size_t len = 0;
 410
 411     while ( *psz && (!buf || (len < n)) )
 412     {
 413         unsigned char cc = *psz++;
 414         if (cc != '+')
 415         {
 416             // plain ASCII char
 417             if (buf)
 418                 *buf++ = cc;
 419             len++;
 420         }
 421         else if (*psz == '-')
 422         {
 423             // encoded plus sign
 424             if (buf)
 425                 *buf++ = cc;
 426             len++;
 427             psz++;
 428         }
 429         else // start of BASE64 encoded string
 430         {
 431             bool lsb, ok;
 432             unsigned int d, l;
 433             for ( ok = lsb = false, d = 0, l = 0;
 434                   (cc = utf7unb64[(unsigned char)*psz]) != 0xff;
 435                   psz++ )
 436             {
 437                 d <<= 6;
 438                 d += cc;
 439                 for (l += 6; l >= 8; lsb = !lsb)
 440                 {
 441                     unsigned char c = (unsigned char)((d >> (l -= 8)) % 256);
 442                     if (lsb)
 443                     {
 444                         if (buf)
 445                             *buf++ |= c;
 446                         len ++;
 447                     }
 448                     else
 449                     {
 450                         if (buf)
 451                             *buf = (wchar_t)(c << 8);
 452                     }
 453
 454                     ok = true;
 455                 }
 456             }
 457
 458             if ( !ok )
 459             {
 460                 // in valid UTF7 we should have valid characters after '+'
 461                 return (size_t)-1;
 462             }
 463
 464             if (*psz == '-')
 465                 psz++;
 466         }
 467     }
 468
 469     if ( buf && (len < n) )
 470         *buf = '\0';
 471
 472     return len;
 473 }
 474
 475 //
 476 // BASE64 encoding table
 477 //
 478 static const unsigned char utf7enb64[] =
 479 {
 480     'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
 481     'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
 482     'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
 483     'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
 484     'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
 485     'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
 486     'w', 'x', 'y', 'z', '0', '1', '2', '3',
 487     '4', '5', '6', '7', '8', '9', '+', '/'
 488 };
 489
 490 //
 491 // UTF-7 encoding table
 492 //
 493 // 0 - Set D (directly encoded characters)
 494 // 1 - Set O (optional direct characters)
 495 // 2 - whitespace characters (optional)
 496 // 3 - special characters
 497 //
 498 static const unsigned char utf7encode[128] =
 499 {
 500     3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
 501     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 502     2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
 503     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
 504     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 505     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
 506     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 507     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
 508 };
 509
 510 size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 511 {
 512
 513
 514     size_t len = 0;
 515
 516     while (*psz && ((!buf) || (len < n)))
 517     {
 518         wchar_t cc = *psz++;
 519         if (cc < 0x80 && utf7encode[cc] < 1)
 520         {
 521             // plain ASCII char
 522             if (buf)
 523                 *buf++ = (char)cc;
 524             len++;
 525         }
 526 #ifndef WC_UTF16
 527         else if (((wxUint32)cc) > 0xffff)
 528         {
 529             // no surrogate pair generation (yet?)
 530             return (size_t)-1;
 531         }
 532 #endif
 533         else
 534         {
 535             if (buf)
 536                 *buf++ = '+';
 537             len++;
 538             if (cc != '+')
 539             {
 540                 // BASE64 encode string
 541                 unsigned int lsb, d, l;
 542                 for (d = 0, l = 0; /*nothing*/; psz++)
 543                 {
 544                     for (lsb = 0; lsb < 2; lsb ++)
 545                     {
 546                         d <<= 8;
 547                         d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
 548
 549                         for (l += 8; l >= 6; )
 550                         {
 551                             l -= 6;
 552                             if (buf)
 553                                 *buf++ = utf7enb64[(d >> l) % 64];
 554                             len++;
 555                         }
 556                     }
 557                     cc = *psz;
 558                     if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
 559                         break;
 560                 }
 561                 if (l != 0)
 562                 {
 563                     if (buf)
 564                         *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
 565                     len++;
 566                 }
 567             }
 568             if (buf)
 569                 *buf++ = '-';
 570             len++;
 571         }
 572     }
 573     if (buf && (len < n))
 574         *buf = 0;
 575     return len;
 576 }
 577
 578 // ----------------------------------------------------------------------------
 579 // UTF-8
 580 // ----------------------------------------------------------------------------
 581
 582 static wxUint32 utf8_max[]=
 583     { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
 584
 585 // boundaries of the private use area we use to (temporarily) remap invalid
 586 // characters invalid in a UTF-8 encoded string
 587 const wxUint32 wxUnicodePUA = 0x100000;
 588 const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
 589
 590 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 591 {
 592     size_t len = 0;
 593
 594     while (*psz && ((!buf) || (len < n)))
 595     {
 596         const char *opsz = psz;
 597         bool invalid = false;
 598         unsigned char cc = *psz++, fc = cc;
 599         unsigned cnt;
 600         for (cnt = 0; fc & 0x80; cnt++)
 601             fc <<= 1;
 602         if (!cnt)
 603         {
 604             // plain ASCII char
 605             if (buf)
 606                 *buf++ = cc;
 607             len++;
 608
 609             // escape the escape character for octal escapes
 610             if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
 611                     && cc == '\\' && (!buf || len < n))
 612             {
 613                 if (buf)
 614                     *buf++ = cc;
 615                 len++;
 616             }
 617         }
 618         else
 619         {
 620             cnt--;
 621             if (!cnt)
 622             {
 623                 // invalid UTF-8 sequence
 624                 invalid = true;
 625             }
 626             else
 627             {
 628                 unsigned ocnt = cnt - 1;
 629                 wxUint32 res = cc & (0x3f >> cnt);
 630                 while (cnt--)
 631                 {
 632                     cc = *psz;
 633                     if ((cc & 0xC0) != 0x80)
 634                     {
 635                         // invalid UTF-8 sequence
 636                         invalid = true;
 637                         break;
 638                     }
 639                     psz++;
 640                     res = (res << 6) | (cc & 0x3f);
 641                 }
 642                 if (invalid || res <= utf8_max[ocnt])
 643                 {
 644                     // illegal UTF-8 encoding
 645                     invalid = true;
 646                 }
 647                 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
 648                         res >= wxUnicodePUA && res < wxUnicodePUAEnd)
 649                 {
 650                     // if one of our PUA characters turns up externally
 651                     // it must also be treated as an illegal sequence
 652                     // (a bit like you have to escape an escape character)
 653                     invalid = true;
 654                 }
 655                 else
 656                 {
 657 #ifdef WC_UTF16
 658                     // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 659                     size_t pa = encode_utf16(res, (wxUint16 *)buf);
 660                     if (pa == (size_t)-1)
 661                     {
 662                         invalid = true;
 663                     }
 664                     else
 665                     {
 666                         if (buf)
 667                             buf += pa;
 668                         len += pa;
 669                     }
 670 #else // !WC_UTF16
 671                     if (buf)
 672                         *buf++ = (wchar_t)res;
 673                     len++;
 674 #endif // WC_UTF16/!WC_UTF16
 675                 }
 676             }
 677             if (invalid)
 678             {
 679                 if (m_options & MAP_INVALID_UTF8_TO_PUA)
 680                 {
 681                     while (opsz < psz && (!buf || len < n))
 682                     {
 683 #ifdef WC_UTF16
 684                         // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 685                         size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
 686                         wxASSERT(pa != (size_t)-1);
 687                         if (buf)
 688                             buf += pa;
 689                         opsz++;
 690                         len += pa;
 691 #else
 692                         if (buf)
 693                             *buf++ = (wchar_t)(wxUnicodePUA + (unsigned char)*opsz);
 694                         opsz++;
 695                         len++;
 696 #endif
 697                     }
 698                 }
 699                 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
 700                 {
 701                     while (opsz < psz && (!buf || len < n))
 702                     {
 703                         if ( buf && len + 3 < n )
 704                         {
 705                             unsigned char on = *opsz;
 706                             *buf++ = L'\\';
 707                             *buf++ = (wchar_t)( L'0' + on / 0100 );
 708                             *buf++ = (wchar_t)( L'0' + (on % 0100) / 010 );
 709                             *buf++ = (wchar_t)( L'0' + on % 010 );
 710                         }
 711                         opsz++;
 712                         len += 4;
 713                     }
 714                 }
 715                 else // MAP_INVALID_UTF8_NOT
 716                 {
 717                     return (size_t)-1;
 718                 }
 719             }
 720         }
 721     }
 722     if (buf && (len < n))
 723         *buf = 0;
 724     return len;
 725 }
 726
 727 static inline bool isoctal(wchar_t wch)
 728 {
 729     return L'0' <= wch && wch <= L'7';
 730 }
 731
 732 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 733 {
 734     size_t len = 0;
 735
 736     while (*psz && ((!buf) || (len < n)))
 737     {
 738         wxUint32 cc;
 739 #ifdef WC_UTF16
 740         // cast is ok for WC_UTF16
 741         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
 742         psz += (pa == (size_t)-1) ? 1 : pa;
 743 #else
 744         cc=(*psz++) & 0x7fffffff;
 745 #endif
 746
 747         if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
 748                 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
 749         {
 750             if (buf)
 751                 *buf++ = (char)(cc - wxUnicodePUA);
 752             len++;
 753         }
 754         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
 755                     && cc == L'\\' && psz[0] == L'\\' )
 756         {
 757             if (buf)
 758                 *buf++ = (char)cc;
 759             psz++;
 760             len++;
 761         }
 762         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
 763                     cc == L'\\' &&
 764                         isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
 765         {
 766             if (buf)
 767             {
 768                 *buf++ = (char) ((psz[0] - L'0')*0100 +
 769                                  (psz[1] - L'0')*010 +
 770                                  (psz[2] - L'0'));
 771             }
 772
 773             psz += 3;
 774             len++;
 775         }
 776         else
 777         {
 778             unsigned cnt;
 779             for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
 780             if (!cnt)
 781             {
 782                 // plain ASCII char
 783                 if (buf)
 784                     *buf++ = (char) cc;
 785                 len++;
 786             }
 787
 788             else
 789             {
 790                 len += cnt + 1;
 791                 if (buf)
 792                 {
 793                     *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
 794                     while (cnt--)
 795                         *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
 796                 }
 797             }
 798         }
 799     }
 800
 801     if (buf && (len<n))
 802         *buf = 0;
 803
 804     return len;
 805 }
 806
 807 // ----------------------------------------------------------------------------
 808 // UTF-16
 809 // ----------------------------------------------------------------------------
 810
 811 #ifdef WORDS_BIGENDIAN
 812     #define wxMBConvUTF16straight wxMBConvUTF16BE
 813     #define wxMBConvUTF16swap     wxMBConvUTF16LE
 814 #else
 815     #define wxMBConvUTF16swap     wxMBConvUTF16BE
 816     #define wxMBConvUTF16straight wxMBConvUTF16LE
 817 #endif
 818
 819
 820 #ifdef WC_UTF16
 821
 822 // copy 16bit MB to 16bit String
 823 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 824 {
 825     size_t len=0;
 826
 827     while (*(wxUint16*)psz && (!buf || len < n))
 828     {
 829         if (buf)
 830             *buf++ = *(wxUint16*)psz;
 831         len++;
 832
 833         psz += sizeof(wxUint16);
 834     }
 835     if (buf && len<n)   *buf=0;
 836
 837     return len;
 838 }
 839
 840
 841 // copy 16bit String to 16bit MB
 842 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 843 {
 844     size_t len=0;
 845
 846     while (*psz && (!buf || len < n))
 847     {
 848         if (buf)
 849         {
 850             *(wxUint16*)buf = *psz;
 851             buf += sizeof(wxUint16);
 852         }
 853         len += sizeof(wxUint16);
 854         psz++;
 855     }
 856     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 857
 858     return len;
 859 }
 860
 861
 862 // swap 16bit MB to 16bit String
 863 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 864 {
 865     size_t len = 0;
 866
 867     // UTF16 string must be terminated by 2 NULs as single NULs may occur
 868     // inside the string
 869     while ( (psz[0] || psz[1]) && (!buf || len < n) )
 870     {
 871         if ( buf )
 872         {
 873             ((char *)buf)[0] = psz[1];
 874             ((char *)buf)[1] = psz[0];
 875             buf++;
 876         }
 877         len++;
 878         psz += 2;
 879     }
 880
 881     if ( buf && len < n )
 882         *buf = L'\0';
 883
 884     return len;
 885 }
 886
 887
 888 // swap 16bit MB to 16bit String
 889 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 890 {
 891     size_t len=0;
 892
 893     while (*psz && (!buf || len < n))
 894     {
 895         if (buf)
 896         {
 897             *buf++ = ((char*)psz)[1];
 898             *buf++ = ((char*)psz)[0];
 899         }
 900         len += sizeof(wxUint16);
 901         psz++;
 902     }
 903     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 904
 905     return len;
 906 }
 907
 908
 909 #else // WC_UTF16
 910
 911
 912 // copy 16bit MB to 32bit String
 913 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 914 {
 915     size_t len=0;
 916
 917     while (*(wxUint16*)psz && (!buf || len < n))
 918     {
 919         wxUint32 cc;
 920         size_t pa=decode_utf16((wxUint16*)psz, cc);
 921         if (pa == (size_t)-1)
 922             return pa;
 923
 924         if (buf)
 925             *buf++ = (wchar_t)cc;
 926         len++;
 927         psz += pa * sizeof(wxUint16);
 928     }
 929     if (buf && len<n)   *buf=0;
 930
 931     return len;
 932 }
 933
 934
 935 // copy 32bit String to 16bit MB
 936 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 937 {
 938     size_t len=0;
 939
 940     while (*psz && (!buf || len < n))
 941     {
 942         wxUint16 cc[2];
 943         size_t pa=encode_utf16(*psz, cc);
 944
 945         if (pa == (size_t)-1)
 946             return pa;
 947
 948         if (buf)
 949         {
 950             *(wxUint16*)buf = cc[0];
 951             buf += sizeof(wxUint16);
 952             if (pa > 1)
 953             {
 954                 *(wxUint16*)buf = cc[1];
 955                 buf += sizeof(wxUint16);
 956             }
 957         }
 958
 959         len += pa*sizeof(wxUint16);
 960         psz++;
 961     }
 962     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 963
 964     return len;
 965 }
 966
 967
 968 // swap 16bit MB to 32bit String
 969 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 970 {
 971     size_t len=0;
 972
 973     while (*(wxUint16*)psz && (!buf || len < n))
 974     {
 975         wxUint32 cc;
 976         char tmp[4];
 977         tmp[0]=psz[1];  tmp[1]=psz[0];
 978         tmp[2]=psz[3];  tmp[3]=psz[2];
 979
 980         size_t pa=decode_utf16((wxUint16*)tmp, cc);
 981         if (pa == (size_t)-1)
 982             return pa;
 983
 984         if (buf)
 985             *buf++ = (wchar_t)cc;
 986
 987         len++;
 988         psz += pa * sizeof(wxUint16);
 989     }
 990     if (buf && len<n)   *buf=0;
 991
 992     return len;
 993 }
 994
 995
 996 // swap 32bit String to 16bit MB
 997 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 998 {
 999     size_t len=0;
1000
1001     while (*psz && (!buf || len < n))
1002     {
1003         wxUint16 cc[2];
1004         size_t pa=encode_utf16(*psz, cc);
1005
1006         if (pa == (size_t)-1)
1007             return pa;
1008
1009         if (buf)
1010         {
1011             *buf++ = ((char*)cc)[1];
1012             *buf++ = ((char*)cc)[0];
1013             if (pa > 1)
1014             {
1015                 *buf++ = ((char*)cc)[3];
1016                 *buf++ = ((char*)cc)[2];
1017             }
1018         }
1019
1020         len += pa*sizeof(wxUint16);
1021         psz++;
1022     }
1023     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
1024
1025     return len;
1026 }
1027
1028 #endif // WC_UTF16
1029
1030
1031 // ----------------------------------------------------------------------------
1032 // UTF-32
1033 // ----------------------------------------------------------------------------
1034
1035 #ifdef WORDS_BIGENDIAN
1036 #define wxMBConvUTF32straight  wxMBConvUTF32BE
1037 #define wxMBConvUTF32swap      wxMBConvUTF32LE
1038 #else
1039 #define wxMBConvUTF32swap      wxMBConvUTF32BE
1040 #define wxMBConvUTF32straight  wxMBConvUTF32LE
1041 #endif
1042
1043
1044 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1045 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1046
1047
1048 #ifdef WC_UTF16
1049
1050 // copy 32bit MB to 16bit String
1051 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1052 {
1053     size_t len=0;
1054
1055     while (*(wxUint32*)psz && (!buf || len < n))
1056     {
1057         wxUint16 cc[2];
1058
1059         size_t pa=encode_utf16(*(wxUint32*)psz, cc);
1060         if (pa == (size_t)-1)
1061             return pa;
1062
1063         if (buf)
1064         {
1065             *buf++ = cc[0];
1066             if (pa > 1)
1067                 *buf++ = cc[1];
1068         }
1069         len += pa;
1070         psz += sizeof(wxUint32);
1071     }
1072     if (buf && len<n)   *buf=0;
1073
1074     return len;
1075 }
1076
1077
1078 // copy 16bit String to 32bit MB
1079 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1080 {
1081     size_t len=0;
1082
1083     while (*psz && (!buf || len < n))
1084     {
1085         wxUint32 cc;
1086
1087         // cast is ok for WC_UTF16
1088         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
1089         if (pa == (size_t)-1)
1090             return pa;
1091
1092         if (buf)
1093         {
1094             *(wxUint32*)buf = cc;
1095             buf += sizeof(wxUint32);
1096         }
1097         len += sizeof(wxUint32);
1098         psz += pa;
1099     }
1100
1101     if (buf && len<=n-sizeof(wxUint32))
1102         *(wxUint32*)buf=0;
1103
1104     return len;
1105 }
1106
1107
1108
1109 // swap 32bit MB to 16bit String
1110 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1111 {
1112     size_t len=0;
1113
1114     while (*(wxUint32*)psz && (!buf || len < n))
1115     {
1116         char tmp[4];
1117         tmp[0] = psz[3];   tmp[1] = psz[2];
1118         tmp[2] = psz[1];   tmp[3] = psz[0];
1119
1120
1121         wxUint16 cc[2];
1122
1123         size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
1124         if (pa == (size_t)-1)
1125             return pa;
1126
1127         if (buf)
1128         {
1129             *buf++ = cc[0];
1130             if (pa > 1)
1131                 *buf++ = cc[1];
1132         }
1133         len += pa;
1134         psz += sizeof(wxUint32);
1135     }
1136
1137     if (buf && len<n)
1138         *buf=0;
1139
1140     return len;
1141 }
1142
1143
1144 // swap 16bit String to 32bit MB
1145 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1146 {
1147     size_t len=0;
1148
1149     while (*psz && (!buf || len < n))
1150     {
1151         char cc[4];
1152
1153         // cast is ok for WC_UTF16
1154         size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
1155         if (pa == (size_t)-1)
1156             return pa;
1157
1158         if (buf)
1159         {
1160             *buf++ = cc[3];
1161             *buf++ = cc[2];
1162             *buf++ = cc[1];
1163             *buf++ = cc[0];
1164         }
1165         len += sizeof(wxUint32);
1166         psz += pa;
1167     }
1168
1169     if (buf && len<=n-sizeof(wxUint32))
1170         *(wxUint32*)buf=0;
1171
1172     return len;
1173 }
1174
1175 #else // WC_UTF16
1176
1177
1178 // copy 32bit MB to 32bit String
1179 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1180 {
1181     size_t len=0;
1182
1183     while (*(wxUint32*)psz && (!buf || len < n))
1184     {
1185         if (buf)
1186             *buf++ = (wchar_t)(*(wxUint32*)psz);
1187         len++;
1188         psz += sizeof(wxUint32);
1189     }
1190
1191     if (buf && len<n)
1192         *buf=0;
1193
1194     return len;
1195 }
1196
1197
1198 // copy 32bit String to 32bit MB
1199 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1200 {
1201     size_t len=0;
1202
1203     while (*psz && (!buf || len < n))
1204     {
1205         if (buf)
1206         {
1207             *(wxUint32*)buf = *psz;
1208             buf += sizeof(wxUint32);
1209         }
1210
1211         len += sizeof(wxUint32);
1212         psz++;
1213     }
1214
1215     if (buf && len<=n-sizeof(wxUint32))
1216         *(wxUint32*)buf=0;
1217
1218     return len;
1219 }
1220
1221
1222 // swap 32bit MB to 32bit String
1223 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1224 {
1225     size_t len=0;
1226
1227     while (*(wxUint32*)psz && (!buf || len < n))
1228     {
1229         if (buf)
1230         {
1231             ((char *)buf)[0] = psz[3];
1232             ((char *)buf)[1] = psz[2];
1233             ((char *)buf)[2] = psz[1];
1234             ((char *)buf)[3] = psz[0];
1235             buf++;
1236         }
1237         len++;
1238         psz += sizeof(wxUint32);
1239     }
1240
1241     if (buf && len<n)
1242         *buf=0;
1243
1244     return len;
1245 }
1246
1247
1248 // swap 32bit String to 32bit MB
1249 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1250 {
1251     size_t len=0;
1252
1253     while (*psz && (!buf || len < n))
1254     {
1255         if (buf)
1256         {
1257             *buf++ = ((char *)psz)[3];
1258             *buf++ = ((char *)psz)[2];
1259             *buf++ = ((char *)psz)[1];
1260             *buf++ = ((char *)psz)[0];
1261         }
1262         len += sizeof(wxUint32);
1263         psz++;
1264     }
1265
1266     if (buf && len<=n-sizeof(wxUint32))
1267         *(wxUint32*)buf=0;
1268
1269     return len;
1270 }
1271
1272
1273 #endif // WC_UTF16
1274
1275
1276 // ============================================================================
1277 // The classes doing conversion using the iconv_xxx() functions
1278 // ============================================================================
1279
1280 #ifdef HAVE_ICONV
1281
1282 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1283 //     E2BIG if output buffer is _exactly_ as big as needed. Such case is
1284 //     (unless there's yet another bug in glibc) the only case when iconv()
1285 //     returns with (size_t)-1 (which means error) and says there are 0 bytes
1286 //     left in the input buffer -- when _real_ error occurs,
1287 //     bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1288 //     iconv() failure.
1289 //     [This bug does not appear in glibc 2.2.]
1290 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1291 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1292                                      (errno != E2BIG || bufLeft != 0))
1293 #else
1294 #define ICONV_FAILED(cres, bufLeft)  (cres == (size_t)-1)
1295 #endif
1296
1297 #define ICONV_CHAR_CAST(x)  ((ICONV_CONST char **)(x))
1298
1299 #define ICONV_T_INVALID ((iconv_t)-1)
1300
1301 #if SIZEOF_WCHAR_T == 4
1302     #define WC_BSWAP    wxUINT32_SWAP_ALWAYS
1303     #define WC_ENC      wxFONTENCODING_UTF32
1304 #elif SIZEOF_WCHAR_T == 2
1305     #define WC_BSWAP    wxUINT16_SWAP_ALWAYS
1306     #define WC_ENC      wxFONTENCODING_UTF16
1307 #else // sizeof(wchar_t) != 2 nor 4
1308     // does this ever happen?
1309     #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1310 #endif
1311
1312 // ----------------------------------------------------------------------------
1313 // wxMBConv_iconv: encapsulates an iconv character set
1314 // ----------------------------------------------------------------------------
1315
1316 class wxMBConv_iconv : public wxMBConv
1317 {
1318 public:
1319     wxMBConv_iconv(const wxChar *name);
1320     virtual ~wxMBConv_iconv();
1321
1322     virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1323     virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
1324
1325     bool IsOk() const
1326         { return (m2w != ICONV_T_INVALID) && (w2m != ICONV_T_INVALID); }
1327
1328 protected:
1329     // the iconv handlers used to translate from multibyte to wide char and in
1330     // the other direction
1331     iconv_t m2w,
1332             w2m;
1333 #if wxUSE_THREADS
1334     // guards access to m2w and w2m objects
1335     wxMutex m_iconvMutex;
1336 #endif
1337
1338 private:
1339     // the name (for iconv_open()) of a wide char charset -- if none is
1340     // available on this machine, it will remain NULL
1341     static wxString ms_wcCharsetName;
1342
1343     // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1344     // different endian-ness than the native one
1345     static bool ms_wcNeedsSwap;
1346 };
1347
1348 // make the constructor available for unit testing
1349 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const wxChar* name )
1350 {
1351     wxMBConv_iconv* result = new wxMBConv_iconv( name );
1352     if ( !result->IsOk() )
1353     {
1354         delete result;
1355         return 0;
1356     }
1357     return result;
1358 }
1359
1360 wxString wxMBConv_iconv::ms_wcCharsetName;
1361 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
1362
1363 wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
1364 {
1365     // iconv operates with chars, not wxChars, but luckily it uses only ASCII
1366     // names for the charsets
1367     const wxCharBuffer cname(wxString(name).ToAscii());
1368
1369     // check for charset that represents wchar_t:
1370     if ( ms_wcCharsetName.empty() )
1371     {
1372         wxLogTrace(TRACE_STRCONV, _T("Looking for wide char codeset:"));
1373
1374 #if wxUSE_FONTMAP
1375         const wxChar **names = wxFontMapperBase::GetAllEncodingNames(WC_ENC);
1376 #else // !wxUSE_FONTMAP
1377         static const wxChar *names[] =
1378         {
1379 #if SIZEOF_WCHAR_T == 4
1380             _T("UCS-4"),
1381 #elif SIZEOF_WCHAR_T = 2
1382             _T("UCS-2"),
1383 #endif
1384             NULL
1385         };
1386 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1387
1388         for ( ; *names && ms_wcCharsetName.empty(); ++names )
1389         {
1390             const wxString nameCS(*names);
1391
1392             // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
1393             wxString nameXE(nameCS);
1394             #ifdef WORDS_BIGENDIAN
1395                 nameXE += _T("BE");
1396             #else // little endian
1397                 nameXE += _T("LE");
1398             #endif
1399
1400             wxLogTrace(TRACE_STRCONV, _T("  trying charset \"%s\""),
1401                        nameXE.c_str());
1402
1403             m2w = iconv_open(nameXE.ToAscii(), cname);
1404             if ( m2w == ICONV_T_INVALID )
1405             {
1406                 // try charset w/o bytesex info (e.g. "UCS4")
1407                 wxLogTrace(TRACE_STRCONV, _T("  trying charset \"%s\""),
1408                            nameCS.c_str());
1409                 m2w = iconv_open(nameCS.ToAscii(), cname);
1410
1411                 // and check for bytesex ourselves:
1412                 if ( m2w != ICONV_T_INVALID )
1413                 {
1414                     char    buf[2], *bufPtr;
1415                     wchar_t wbuf[2], *wbufPtr;
1416                     size_t  insz, outsz;
1417                     size_t  res;
1418
1419                     buf[0] = 'A';
1420                     buf[1] = 0;
1421                     wbuf[0] = 0;
1422                     insz = 2;
1423                     outsz = SIZEOF_WCHAR_T * 2;
1424                     wbufPtr = wbuf;
1425                     bufPtr = buf;
1426
1427                     res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1428                                 (char**)&wbufPtr, &outsz);
1429
1430                     if (ICONV_FAILED(res, insz))
1431                     {
1432                         wxLogLastError(wxT("iconv"));
1433                         wxLogError(_("Conversion to charset '%s' doesn't work."),
1434                                    nameCS.c_str());
1435                     }
1436                     else // ok, can convert to this encoding, remember it
1437                     {
1438                         ms_wcCharsetName = nameCS;
1439                         ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1440                     }
1441                 }
1442             }
1443             else // use charset not requiring byte swapping
1444             {
1445                 ms_wcCharsetName = nameXE;
1446             }
1447         }
1448
1449         wxLogTrace(TRACE_STRCONV,
1450                    wxT("iconv wchar_t charset is \"%s\"%s"),
1451                    ms_wcCharsetName.empty() ? _T("<none>")
1452                                             : ms_wcCharsetName.c_str(),
1453                    ms_wcNeedsSwap ? _T(" (needs swap)")
1454                                   : _T(""));
1455     }
1456     else // we already have ms_wcCharsetName
1457     {
1458         m2w = iconv_open(ms_wcCharsetName.ToAscii(), cname);
1459     }
1460
1461     if ( ms_wcCharsetName.empty() )
1462     {
1463         w2m = ICONV_T_INVALID;
1464     }
1465     else
1466     {
1467         w2m = iconv_open(cname, ms_wcCharsetName.ToAscii());
1468         if ( w2m == ICONV_T_INVALID )
1469         {
1470             wxLogTrace(TRACE_STRCONV,
1471                        wxT("\"%s\" -> \"%s\" works but not the converse!?"),
1472                        ms_wcCharsetName.c_str(), cname.data());
1473         }
1474     }
1475 }
1476
1477 wxMBConv_iconv::~wxMBConv_iconv()
1478 {
1479     if ( m2w != ICONV_T_INVALID )
1480         iconv_close(m2w);
1481     if ( w2m != ICONV_T_INVALID )
1482         iconv_close(w2m);
1483 }
1484
1485 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1486 {
1487 #if wxUSE_THREADS
1488     // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1489     //     Unfortunately there is a couple of global wxCSConv objects such as
1490     //     wxConvLocal that are used all over wx code, so we have to make sure
1491     //     the handle is used by at most one thread at the time. Otherwise
1492     //     only a few wx classes would be safe to use from non-main threads
1493     //     as MB<->WC conversion would fail "randomly".
1494     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1495 #endif
1496
1497     size_t inbuf = strlen(psz);
1498     size_t outbuf = n * SIZEOF_WCHAR_T;
1499     size_t res, cres;
1500     // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1501     wchar_t *bufPtr = buf;
1502     const char *pszPtr = psz;
1503
1504     if (buf)
1505     {
1506         // have destination buffer, convert there
1507         cres = iconv(m2w,
1508                      ICONV_CHAR_CAST(&pszPtr), &inbuf,
1509                      (char**)&bufPtr, &outbuf);
1510         res = n - (outbuf / SIZEOF_WCHAR_T);
1511
1512         if (ms_wcNeedsSwap)
1513         {
1514             // convert to native endianness
1515             for ( unsigned i = 0; i < res; i++ )
1516                 buf[n] = WC_BSWAP(buf[i]);
1517         }
1518
1519         // NB: iconv was given only strlen(psz) characters on input, and so
1520         //     it couldn't convert the trailing zero. Let's do it ourselves
1521         //     if there's some room left for it in the output buffer.
1522         if (res < n)
1523             buf[res] = 0;
1524     }
1525     else
1526     {
1527         // no destination buffer... convert using temp buffer
1528         // to calculate destination buffer requirement
1529         wchar_t tbuf[8];
1530         res = 0;
1531         do {
1532             bufPtr = tbuf;
1533             outbuf = 8*SIZEOF_WCHAR_T;
1534
1535             cres = iconv(m2w,
1536                          ICONV_CHAR_CAST(&pszPtr), &inbuf,
1537                          (char**)&bufPtr, &outbuf );
1538
1539             res += 8-(outbuf/SIZEOF_WCHAR_T);
1540         } while ((cres==(size_t)-1) && (errno==E2BIG));
1541     }
1542
1543     if (ICONV_FAILED(cres, inbuf))
1544     {
1545         //VS: it is ok if iconv fails, hence trace only
1546         wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1547         return (size_t)-1;
1548     }
1549
1550     return res;
1551 }
1552
1553 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1554 {
1555 #if wxUSE_THREADS
1556     // NB: explained in MB2WC
1557     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1558 #endif
1559
1560     size_t inlen = wxWcslen(psz);
1561     size_t inbuf = inlen * SIZEOF_WCHAR_T;
1562     size_t outbuf = n;
1563     size_t res, cres;
1564
1565     wchar_t *tmpbuf = 0;
1566
1567     if (ms_wcNeedsSwap)
1568     {
1569         // need to copy to temp buffer to switch endianness
1570         // (doing WC_BSWAP twice on the original buffer won't help, as it
1571         //  could be in read-only memory, or be accessed in some other thread)
1572         tmpbuf = (wchar_t *)malloc(inbuf + SIZEOF_WCHAR_T);
1573         for ( size_t i = 0; i < inlen; i++ )
1574             tmpbuf[n] = WC_BSWAP(psz[i]);
1575         tmpbuf[inlen] = L'\0';
1576         psz = tmpbuf;
1577     }
1578
1579     if (buf)
1580     {
1581         // have destination buffer, convert there
1582         cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1583
1584         res = n-outbuf;
1585
1586         // NB: iconv was given only wcslen(psz) characters on input, and so
1587         //     it couldn't convert the trailing zero. Let's do it ourselves
1588         //     if there's some room left for it in the output buffer.
1589         if (res < n)
1590             buf[0] = 0;
1591     }
1592     else
1593     {
1594         // no destination buffer... convert using temp buffer
1595         // to calculate destination buffer requirement
1596         char tbuf[16];
1597         res = 0;
1598         do {
1599             buf = tbuf; outbuf = 16;
1600
1601             cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1602
1603             res += 16 - outbuf;
1604         } while ((cres==(size_t)-1) && (errno==E2BIG));
1605     }
1606
1607     if (ms_wcNeedsSwap)
1608     {
1609         free(tmpbuf);
1610     }
1611
1612     if (ICONV_FAILED(cres, inbuf))
1613     {
1614         wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1615         return (size_t)-1;
1616     }
1617
1618     return res;
1619 }
1620
1621 #endif // HAVE_ICONV
1622
1623
1624 // ============================================================================
1625 // Win32 conversion classes
1626 // ============================================================================
1627
1628 #ifdef wxHAVE_WIN32_MB2WC
1629
1630 // from utils.cpp
1631 #if wxUSE_FONTMAP
1632 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1633 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1634 #endif
1635
1636 class wxMBConv_win32 : public wxMBConv
1637 {
1638 public:
1639     wxMBConv_win32()
1640     {
1641         m_CodePage = CP_ACP;
1642     }
1643
1644 #if wxUSE_FONTMAP
1645     wxMBConv_win32(const wxChar* name)
1646     {
1647         m_CodePage = wxCharsetToCodepage(name);
1648     }
1649
1650     wxMBConv_win32(wxFontEncoding encoding)
1651     {
1652         m_CodePage = wxEncodingToCodepage(encoding);
1653     }
1654 #endif
1655
1656     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1657     {
1658         // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1659         // the behaviour is not compatible with the Unix version (using iconv)
1660         // and break the library itself, e.g. wxTextInputStream::NextChar()
1661         // wouldn't work if reading an incomplete MB char didn't result in an
1662         // error
1663         //
1664         // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1665         // an error (tested under Windows Server 2003) and apparently it is
1666         // done on purpose, i.e. the function accepts any input in this case
1667         // and although I'd prefer to return error on ill-formed output, our
1668         // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1669         // explicitly ill-formed according to RFC 2152) neither so we don't
1670         // even have any fallback here...
1671         //
1672         // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or
1673         // Win XP or newer and if it is specified on older versions, conversion
1674         // from CP_UTF8 (which can have flags only 0 or MB_ERR_INVALID_CHARS)
1675         // fails. So we can only use the flag on newer Windows versions.
1676         // Additionally, the flag is not supported by UTF7, symbol and CJK
1677         // encodings. See here:
1678         //     http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx
1679         //     http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp
1680         int flags = 0;
1681         if ( m_CodePage != CP_UTF7 && m_CodePage != CP_SYMBOL &&
1682              m_CodePage < 50000 &&
1683              IsAtLeastWin2kSP4() )
1684         {
1685             flags = MB_ERR_INVALID_CHARS;
1686         }
1687         else if ( m_CodePage == CP_UTF8 )
1688         {
1689             // Avoid round-trip in the special case of UTF-8 by using our
1690             // own UTF-8 conversion code:
1691             return wxMBConvUTF8().MB2WC(buf, psz, n);
1692         }
1693
1694         const size_t len = ::MultiByteToWideChar
1695                              (
1696                                 m_CodePage,     // code page
1697                                 flags,          // flags: fall on error
1698                                 psz,            // input string
1699                                 -1,             // its length (NUL-terminated)
1700                                 buf,            // output string
1701                                 buf ? n : 0     // size of output buffer
1702                              );
1703         if ( !len )
1704         {
1705             // function totally failed
1706             return (size_t)-1;
1707         }
1708
1709         // if we were really converting and didn't use MB_ERR_INVALID_CHARS,
1710         // check if we succeeded, by doing a double trip:
1711         if ( !flags && buf )
1712         {
1713             const size_t mbLen = strlen(psz);
1714             wxCharBuffer mbBuf(mbLen);
1715             if ( ::WideCharToMultiByte
1716                    (
1717                       m_CodePage,
1718                       0,
1719                       buf,
1720                       -1,
1721                       mbBuf.data(),
1722                       mbLen + 1,        // size in bytes, not length
1723                       NULL,
1724                       NULL
1725                    ) == 0 ||
1726                   strcmp(mbBuf, psz) != 0 )
1727             {
1728                 // we didn't obtain the same thing we started from, hence
1729                 // the conversion was lossy and we consider that it failed
1730                 return (size_t)-1;
1731             }
1732         }
1733
1734         // note that it returns count of written chars for buf != NULL and size
1735         // of the needed buffer for buf == NULL so in either case the length of
1736         // the string (which never includes the terminating NUL) is one less
1737         return len - 1;
1738     }
1739
1740     size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
1741     {
1742         /*
1743             we have a problem here: by default, WideCharToMultiByte() may
1744             replace characters unrepresentable in the target code page with bad
1745             quality approximations such as turning "1/2" symbol (U+00BD) into
1746             "1" for the code pages which don't have it and we, obviously, want
1747             to avoid this at any price
1748
1749             the trouble is that this function does it _silently_, i.e. it won't
1750             even tell us whether it did or not... Win98/2000 and higher provide
1751             WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1752             we have to resort to a round trip, i.e. check that converting back
1753             results in the same string -- this is, of course, expensive but
1754             otherwise we simply can't be sure to not garble the data.
1755          */
1756
1757         // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1758         // it doesn't work with CJK encodings (which we test for rather roughly
1759         // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1760         // supporting it
1761         BOOL usedDef wxDUMMY_INITIALIZE(false);
1762         BOOL *pUsedDef;
1763         int flags;
1764         if ( CanUseNoBestFit() && m_CodePage < 50000 )
1765         {
1766             // it's our lucky day
1767             flags = WC_NO_BEST_FIT_CHARS;
1768             pUsedDef = &usedDef;
1769         }
1770         else // old system or unsupported encoding
1771         {
1772             flags = 0;
1773             pUsedDef = NULL;
1774         }
1775
1776         const size_t len = ::WideCharToMultiByte
1777                              (
1778                                 m_CodePage,     // code page
1779                                 flags,          // either none or no best fit
1780                                 pwz,            // input string
1781                                 -1,             // it is (wide) NUL-terminated
1782                                 buf,            // output buffer
1783                                 buf ? n : 0,    // and its size
1784                                 NULL,           // default "replacement" char
1785                                 pUsedDef        // [out] was it used?
1786                              );
1787
1788         if ( !len )
1789         {
1790             // function totally failed
1791             return (size_t)-1;
1792         }
1793
1794         // if we were really converting, check if we succeeded
1795         if ( buf )
1796         {
1797             if ( flags )
1798             {
1799                 // check if the conversion failed, i.e. if any replacements
1800                 // were done
1801                 if ( usedDef )
1802                     return (size_t)-1;
1803             }
1804             else // we must resort to double tripping...
1805             {
1806                 wxWCharBuffer wcBuf(n);
1807                 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1808                         wcscmp(wcBuf, pwz) != 0 )
1809                 {
1810                     // we didn't obtain the same thing we started from, hence
1811                     // the conversion was lossy and we consider that it failed
1812                     return (size_t)-1;
1813                 }
1814             }
1815         }
1816
1817         // see the comment above for the reason of "len - 1"
1818         return len - 1;
1819     }
1820
1821     bool IsOk() const { return m_CodePage != -1; }
1822
1823 private:
1824     static bool CanUseNoBestFit()
1825     {
1826         static int s_isWin98Or2k = -1;
1827
1828         if ( s_isWin98Or2k == -1 )
1829         {
1830             int verMaj, verMin;
1831             switch ( wxGetOsVersion(&verMaj, &verMin) )
1832             {
1833                 case wxWIN95:
1834                     s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1835                     break;
1836
1837                 case wxWINDOWS_NT:
1838                     s_isWin98Or2k = verMaj >= 5;
1839                     break;
1840
1841                 default:
1842                     // unknown, be conseravtive by default
1843                     s_isWin98Or2k = 0;
1844             }
1845
1846             wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1847         }
1848
1849         return s_isWin98Or2k == 1;
1850     }
1851
1852     static bool IsAtLeastWin2kSP4()
1853     {
1854 #ifdef __WXWINCE__
1855         return false;
1856 #else
1857         static int s_isAtLeastWin2kSP4 = -1;
1858
1859         if ( s_isAtLeastWin2kSP4 == -1 )
1860         {
1861             OSVERSIONINFOEX ver;
1862
1863             memset(&ver, 0, sizeof(ver));
1864             ver.dwOSVersionInfoSize = sizeof(ver);
1865             GetVersionEx((OSVERSIONINFO*)&ver);
1866
1867             s_isAtLeastWin2kSP4 =
1868               ((ver.dwMajorVersion > 5) || // Vista+
1869                (ver.dwMajorVersion == 5 && ver.dwMinorVersion > 0) || // XP/2003
1870                (ver.dwMajorVersion == 5 && ver.dwMinorVersion == 0 &&
1871                ver.wServicePackMajor >= 4)) // 2000 SP4+
1872               ? 1 : 0;
1873         }
1874
1875         return s_isAtLeastWin2kSP4 == 1;
1876 #endif
1877     }
1878
1879     long m_CodePage;
1880 };
1881
1882 #endif // wxHAVE_WIN32_MB2WC
1883
1884 // ============================================================================
1885 // Cocoa conversion classes
1886 // ============================================================================
1887
1888 #if defined(__WXCOCOA__)
1889
1890 // RN:  There is no UTF-32 support in either Core Foundation or
1891 // Cocoa.  Strangely enough, internally Core Foundation uses
1892 // UTF 32 internally quite a bit - its just not public (yet).
1893
1894 #include <CoreFoundation/CFString.h>
1895 #include <CoreFoundation/CFStringEncodingExt.h>
1896
1897 CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
1898 {
1899     CFStringEncoding enc = kCFStringEncodingInvalidId ;
1900     if ( encoding == wxFONTENCODING_DEFAULT )
1901     {
1902         enc = CFStringGetSystemEncoding();
1903     }
1904     else switch( encoding)
1905     {
1906         case wxFONTENCODING_ISO8859_1 :
1907             enc = kCFStringEncodingISOLatin1 ;
1908             break ;
1909         case wxFONTENCODING_ISO8859_2 :
1910             enc = kCFStringEncodingISOLatin2;
1911             break ;
1912         case wxFONTENCODING_ISO8859_3 :
1913             enc = kCFStringEncodingISOLatin3 ;
1914             break ;
1915         case wxFONTENCODING_ISO8859_4 :
1916             enc = kCFStringEncodingISOLatin4;
1917             break ;
1918         case wxFONTENCODING_ISO8859_5 :
1919             enc = kCFStringEncodingISOLatinCyrillic;
1920             break ;
1921         case wxFONTENCODING_ISO8859_6 :
1922             enc = kCFStringEncodingISOLatinArabic;
1923             break ;
1924         case wxFONTENCODING_ISO8859_7 :
1925             enc = kCFStringEncodingISOLatinGreek;
1926             break ;
1927         case wxFONTENCODING_ISO8859_8 :
1928             enc = kCFStringEncodingISOLatinHebrew;
1929             break ;
1930         case wxFONTENCODING_ISO8859_9 :
1931             enc = kCFStringEncodingISOLatin5;
1932             break ;
1933         case wxFONTENCODING_ISO8859_10 :
1934             enc = kCFStringEncodingISOLatin6;
1935             break ;
1936         case wxFONTENCODING_ISO8859_11 :
1937             enc = kCFStringEncodingISOLatinThai;
1938             break ;
1939         case wxFONTENCODING_ISO8859_13 :
1940             enc = kCFStringEncodingISOLatin7;
1941             break ;
1942         case wxFONTENCODING_ISO8859_14 :
1943             enc = kCFStringEncodingISOLatin8;
1944             break ;
1945         case wxFONTENCODING_ISO8859_15 :
1946             enc = kCFStringEncodingISOLatin9;
1947             break ;
1948
1949         case wxFONTENCODING_KOI8 :
1950             enc = kCFStringEncodingKOI8_R;
1951             break ;
1952         case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
1953             enc = kCFStringEncodingDOSRussian;
1954             break ;
1955
1956 //      case wxFONTENCODING_BULGARIAN :
1957 //          enc = ;
1958 //          break ;
1959
1960         case wxFONTENCODING_CP437 :
1961             enc =kCFStringEncodingDOSLatinUS ;
1962             break ;
1963         case wxFONTENCODING_CP850 :
1964             enc = kCFStringEncodingDOSLatin1;
1965             break ;
1966         case wxFONTENCODING_CP852 :
1967             enc = kCFStringEncodingDOSLatin2;
1968             break ;
1969         case wxFONTENCODING_CP855 :
1970             enc = kCFStringEncodingDOSCyrillic;
1971             break ;
1972         case wxFONTENCODING_CP866 :
1973             enc =kCFStringEncodingDOSRussian ;
1974             break ;
1975         case wxFONTENCODING_CP874 :
1976             enc = kCFStringEncodingDOSThai;
1977             break ;
1978         case wxFONTENCODING_CP932 :
1979             enc = kCFStringEncodingDOSJapanese;
1980             break ;
1981         case wxFONTENCODING_CP936 :
1982             enc =kCFStringEncodingDOSChineseSimplif ;
1983             break ;
1984         case wxFONTENCODING_CP949 :
1985             enc = kCFStringEncodingDOSKorean;
1986             break ;
1987         case wxFONTENCODING_CP950 :
1988             enc = kCFStringEncodingDOSChineseTrad;
1989             break ;
1990         case wxFONTENCODING_CP1250 :
1991             enc = kCFStringEncodingWindowsLatin2;
1992             break ;
1993         case wxFONTENCODING_CP1251 :
1994             enc =kCFStringEncodingWindowsCyrillic ;
1995             break ;
1996         case wxFONTENCODING_CP1252 :
1997             enc =kCFStringEncodingWindowsLatin1 ;
1998             break ;
1999         case wxFONTENCODING_CP1253 :
2000             enc = kCFStringEncodingWindowsGreek;
2001             break ;
2002         case wxFONTENCODING_CP1254 :
2003             enc = kCFStringEncodingWindowsLatin5;
2004             break ;
2005         case wxFONTENCODING_CP1255 :
2006             enc =kCFStringEncodingWindowsHebrew ;
2007             break ;
2008         case wxFONTENCODING_CP1256 :
2009             enc =kCFStringEncodingWindowsArabic ;
2010             break ;
2011         case wxFONTENCODING_CP1257 :
2012             enc = kCFStringEncodingWindowsBalticRim;
2013             break ;
2014 //   This only really encodes to UTF7 (if that) evidently
2015 //        case wxFONTENCODING_UTF7 :
2016 //            enc = kCFStringEncodingNonLossyASCII ;
2017 //            break ;
2018         case wxFONTENCODING_UTF8 :
2019             enc = kCFStringEncodingUTF8 ;
2020             break ;
2021         case wxFONTENCODING_EUC_JP :
2022             enc = kCFStringEncodingEUC_JP;
2023             break ;
2024         case wxFONTENCODING_UTF16 :
2025             enc = kCFStringEncodingUnicode ;
2026             break ;
2027         case wxFONTENCODING_MACROMAN :
2028             enc = kCFStringEncodingMacRoman ;
2029             break ;
2030         case wxFONTENCODING_MACJAPANESE :
2031             enc = kCFStringEncodingMacJapanese ;
2032             break ;
2033         case wxFONTENCODING_MACCHINESETRAD :
2034             enc = kCFStringEncodingMacChineseTrad ;
2035             break ;
2036         case wxFONTENCODING_MACKOREAN :
2037             enc = kCFStringEncodingMacKorean ;
2038             break ;
2039         case wxFONTENCODING_MACARABIC :
2040             enc = kCFStringEncodingMacArabic ;
2041             break ;
2042         case wxFONTENCODING_MACHEBREW :
2043             enc = kCFStringEncodingMacHebrew ;
2044             break ;
2045         case wxFONTENCODING_MACGREEK :
2046             enc = kCFStringEncodingMacGreek ;
2047             break ;
2048         case wxFONTENCODING_MACCYRILLIC :
2049             enc = kCFStringEncodingMacCyrillic ;
2050             break ;
2051         case wxFONTENCODING_MACDEVANAGARI :
2052             enc = kCFStringEncodingMacDevanagari ;
2053             break ;
2054         case wxFONTENCODING_MACGURMUKHI :
2055             enc = kCFStringEncodingMacGurmukhi ;
2056             break ;
2057         case wxFONTENCODING_MACGUJARATI :
2058             enc = kCFStringEncodingMacGujarati ;
2059             break ;
2060         case wxFONTENCODING_MACORIYA :
2061             enc = kCFStringEncodingMacOriya ;
2062             break ;
2063         case wxFONTENCODING_MACBENGALI :
2064             enc = kCFStringEncodingMacBengali ;
2065             break ;
2066         case wxFONTENCODING_MACTAMIL :
2067             enc = kCFStringEncodingMacTamil ;
2068             break ;
2069         case wxFONTENCODING_MACTELUGU :
2070             enc = kCFStringEncodingMacTelugu ;
2071             break ;
2072         case wxFONTENCODING_MACKANNADA :
2073             enc = kCFStringEncodingMacKannada ;
2074             break ;
2075         case wxFONTENCODING_MACMALAJALAM :
2076             enc = kCFStringEncodingMacMalayalam ;
2077             break ;
2078         case wxFONTENCODING_MACSINHALESE :
2079             enc = kCFStringEncodingMacSinhalese ;
2080             break ;
2081         case wxFONTENCODING_MACBURMESE :
2082             enc = kCFStringEncodingMacBurmese ;
2083             break ;
2084         case wxFONTENCODING_MACKHMER :
2085             enc = kCFStringEncodingMacKhmer ;
2086             break ;
2087         case wxFONTENCODING_MACTHAI :
2088             enc = kCFStringEncodingMacThai ;
2089             break ;
2090         case wxFONTENCODING_MACLAOTIAN :
2091             enc = kCFStringEncodingMacLaotian ;
2092             break ;
2093         case wxFONTENCODING_MACGEORGIAN :
2094             enc = kCFStringEncodingMacGeorgian ;
2095             break ;
2096         case wxFONTENCODING_MACARMENIAN :
2097             enc = kCFStringEncodingMacArmenian ;
2098             break ;
2099         case wxFONTENCODING_MACCHINESESIMP :
2100             enc = kCFStringEncodingMacChineseSimp ;
2101             break ;
2102         case wxFONTENCODING_MACTIBETAN :
2103             enc = kCFStringEncodingMacTibetan ;
2104             break ;
2105         case wxFONTENCODING_MACMONGOLIAN :
2106             enc = kCFStringEncodingMacMongolian ;
2107             break ;
2108         case wxFONTENCODING_MACETHIOPIC :
2109             enc = kCFStringEncodingMacEthiopic ;
2110             break ;
2111         case wxFONTENCODING_MACCENTRALEUR :
2112             enc = kCFStringEncodingMacCentralEurRoman ;
2113             break ;
2114         case wxFONTENCODING_MACVIATNAMESE :
2115             enc = kCFStringEncodingMacVietnamese ;
2116             break ;
2117         case wxFONTENCODING_MACARABICEXT :
2118             enc = kCFStringEncodingMacExtArabic ;
2119             break ;
2120         case wxFONTENCODING_MACSYMBOL :
2121             enc = kCFStringEncodingMacSymbol ;
2122             break ;
2123         case wxFONTENCODING_MACDINGBATS :
2124             enc = kCFStringEncodingMacDingbats ;
2125             break ;
2126         case wxFONTENCODING_MACTURKISH :
2127             enc = kCFStringEncodingMacTurkish ;
2128             break ;
2129         case wxFONTENCODING_MACCROATIAN :
2130             enc = kCFStringEncodingMacCroatian ;
2131             break ;
2132         case wxFONTENCODING_MACICELANDIC :
2133             enc = kCFStringEncodingMacIcelandic ;
2134             break ;
2135         case wxFONTENCODING_MACROMANIAN :
2136             enc = kCFStringEncodingMacRomanian ;
2137             break ;
2138         case wxFONTENCODING_MACCELTIC :
2139             enc = kCFStringEncodingMacCeltic ;
2140             break ;
2141         case wxFONTENCODING_MACGAELIC :
2142             enc = kCFStringEncodingMacGaelic ;
2143             break ;
2144 //      case wxFONTENCODING_MACKEYBOARD :
2145 //          enc = kCFStringEncodingMacKeyboardGlyphs ;
2146 //          break ;
2147         default :
2148             // because gcc is picky
2149             break ;
2150     } ;
2151     return enc ;
2152 }
2153
2154 class wxMBConv_cocoa : public wxMBConv
2155 {
2156 public:
2157     wxMBConv_cocoa()
2158     {
2159         Init(CFStringGetSystemEncoding()) ;
2160     }
2161
2162 #if wxUSE_FONTMAP
2163     wxMBConv_cocoa(const wxChar* name)
2164     {
2165         Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2166     }
2167 #endif
2168
2169     wxMBConv_cocoa(wxFontEncoding encoding)
2170     {
2171         Init( wxCFStringEncFromFontEnc(encoding) );
2172     }
2173
2174     ~wxMBConv_cocoa()
2175     {
2176     }
2177
2178     void Init( CFStringEncoding encoding)
2179     {
2180         m_encoding = encoding ;
2181     }
2182
2183     size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2184     {
2185         wxASSERT(szUnConv);
2186
2187         CFStringRef theString = CFStringCreateWithBytes (
2188                                                 NULL, //the allocator
2189                                                 (const UInt8*)szUnConv,
2190                                                 strlen(szUnConv),
2191                                                 m_encoding,
2192                                                 false //no BOM/external representation
2193                                                 );
2194
2195         wxASSERT(theString);
2196
2197         size_t nOutLength = CFStringGetLength(theString);
2198
2199         if (szOut == NULL)
2200         {
2201             CFRelease(theString);
2202             return nOutLength;
2203         }
2204
2205         CFRange theRange = { 0, nOutSize };
2206
2207 #if SIZEOF_WCHAR_T == 4
2208         UniChar* szUniCharBuffer = new UniChar[nOutSize];
2209 #endif
2210
2211         CFStringGetCharacters(theString, theRange, szUniCharBuffer);
2212
2213         CFRelease(theString);
2214
2215         szUniCharBuffer[nOutLength] = '\0' ;
2216
2217 #if SIZEOF_WCHAR_T == 4
2218         wxMBConvUTF16 converter ;
2219         converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
2220         delete[] szUniCharBuffer;
2221 #endif
2222
2223         return nOutLength;
2224     }
2225
2226     size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2227     {
2228         wxASSERT(szUnConv);
2229
2230         size_t nRealOutSize;
2231         size_t nBufSize = wxWcslen(szUnConv);
2232         UniChar* szUniBuffer = (UniChar*) szUnConv;
2233
2234 #if SIZEOF_WCHAR_T == 4
2235         wxMBConvUTF16 converter ;
2236         nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
2237         szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
2238         converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
2239         nBufSize /= sizeof(UniChar);
2240 #endif
2241
2242         CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2243                                 NULL, //allocator
2244                                 szUniBuffer,
2245                                 nBufSize,
2246                                 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
2247                             );
2248
2249         wxASSERT(theString);
2250
2251         //Note that CER puts a BOM when converting to unicode
2252         //so we  check and use getchars instead in that case
2253         if (m_encoding == kCFStringEncodingUnicode)
2254         {
2255             if (szOut != NULL)
2256                 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
2257
2258             nRealOutSize = CFStringGetLength(theString) + 1;
2259         }
2260         else
2261         {
2262             CFStringGetBytes(
2263                 theString,
2264                 CFRangeMake(0, CFStringGetLength(theString)),
2265                 m_encoding,
2266                 0, //what to put in characters that can't be converted -
2267                     //0 tells CFString to return NULL if it meets such a character
2268                 false, //not an external representation
2269                 (UInt8*) szOut,
2270                 nOutSize,
2271                 (CFIndex*) &nRealOutSize
2272                         );
2273         }
2274
2275         CFRelease(theString);
2276
2277 #if SIZEOF_WCHAR_T == 4
2278         delete[] szUniBuffer;
2279 #endif
2280
2281         return  nRealOutSize - 1;
2282     }
2283
2284     bool IsOk() const
2285     {
2286         return m_encoding != kCFStringEncodingInvalidId &&
2287               CFStringIsEncodingAvailable(m_encoding);
2288     }
2289
2290 private:
2291     CFStringEncoding m_encoding ;
2292 };
2293
2294 #endif // defined(__WXCOCOA__)
2295
2296 // ============================================================================
2297 // Mac conversion classes
2298 // ============================================================================
2299
2300 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2301
2302 class wxMBConv_mac : public wxMBConv
2303 {
2304 public:
2305     wxMBConv_mac()
2306     {
2307         Init(CFStringGetSystemEncoding()) ;
2308     }
2309
2310 #if wxUSE_FONTMAP
2311     wxMBConv_mac(const wxChar* name)
2312     {
2313         Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2314     }
2315 #endif
2316
2317     wxMBConv_mac(wxFontEncoding encoding)
2318     {
2319         Init( wxMacGetSystemEncFromFontEnc(encoding) );
2320     }
2321
2322     ~wxMBConv_mac()
2323     {
2324         OSStatus status = noErr ;
2325         status = TECDisposeConverter(m_MB2WC_converter);
2326         status = TECDisposeConverter(m_WC2MB_converter);
2327     }
2328
2329
2330     void Init( TextEncodingBase encoding)
2331     {
2332         OSStatus status = noErr ;
2333         m_char_encoding = encoding ;
2334         m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2335
2336         status = TECCreateConverter(&m_MB2WC_converter,
2337                                     m_char_encoding,
2338                                     m_unicode_encoding);
2339         status = TECCreateConverter(&m_WC2MB_converter,
2340                                     m_unicode_encoding,
2341                                     m_char_encoding);
2342     }
2343
2344     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2345     {
2346         OSStatus status = noErr ;
2347         ByteCount byteOutLen ;
2348         ByteCount byteInLen = strlen(psz) ;
2349         wchar_t *tbuf = NULL ;
2350         UniChar* ubuf = NULL ;
2351         size_t res = 0 ;
2352
2353         if (buf == NULL)
2354         {
2355             //apple specs say at least 32
2356             n = wxMax( 32 , byteInLen ) ;
2357             tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2358         }
2359         ByteCount byteBufferLen = n * sizeof( UniChar ) ;
2360 #if SIZEOF_WCHAR_T == 4
2361         ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
2362 #else
2363         ubuf = (UniChar*) (buf ? buf : tbuf) ;
2364 #endif
2365         status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2366           (TextPtr) ubuf , byteBufferLen, &byteOutLen);
2367 #if SIZEOF_WCHAR_T == 4
2368         // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2369         // is not properly terminated we get random characters at the end
2370         ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
2371         wxMBConvUTF16 converter ;
2372         res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2373         free( ubuf ) ;
2374 #else
2375         res = byteOutLen / sizeof( UniChar ) ;
2376 #endif
2377         if ( buf == NULL )
2378              free(tbuf) ;
2379
2380         if ( buf  && res < n)
2381             buf[res] = 0;
2382
2383         return res ;
2384     }
2385
2386     size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2387     {
2388         OSStatus status = noErr ;
2389         ByteCount byteOutLen ;
2390         ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2391
2392         char *tbuf = NULL ;
2393
2394         if (buf == NULL)
2395         {
2396             //apple specs say at least 32
2397             n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2398             tbuf = (char*) malloc( n ) ;
2399         }
2400
2401         ByteCount byteBufferLen = n ;
2402         UniChar* ubuf = NULL ;
2403 #if SIZEOF_WCHAR_T == 4
2404         wxMBConvUTF16 converter ;
2405         size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2406         byteInLen = unicharlen ;
2407         ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2408         converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
2409 #else
2410         ubuf = (UniChar*) psz ;
2411 #endif
2412         status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2413             (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
2414 #if SIZEOF_WCHAR_T == 4
2415         free( ubuf ) ;
2416 #endif
2417         if ( buf == NULL )
2418             free(tbuf) ;
2419
2420         size_t res = byteOutLen ;
2421         if ( buf  && res < n)
2422         {
2423             buf[res] = 0;
2424
2425             //we need to double-trip to verify it didn't insert any ? in place
2426             //of bogus characters
2427             wxWCharBuffer wcBuf(n);
2428             size_t pszlen = wxWcslen(psz);
2429             if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
2430                         wxWcslen(wcBuf) != pszlen ||
2431                         memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2432             {
2433                 // we didn't obtain the same thing we started from, hence
2434                 // the conversion was lossy and we consider that it failed
2435                 return (size_t)-1;
2436             }
2437         }
2438
2439         return res ;
2440     }
2441
2442     bool IsOk() const
2443         { return m_MB2WC_converter !=  NULL && m_WC2MB_converter != NULL  ; }
2444
2445 private:
2446     TECObjectRef m_MB2WC_converter ;
2447     TECObjectRef m_WC2MB_converter ;
2448
2449     TextEncodingBase m_char_encoding ;
2450     TextEncodingBase m_unicode_encoding ;
2451 };
2452
2453 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2454
2455 // ============================================================================
2456 // wxEncodingConverter based conversion classes
2457 // ============================================================================
2458
2459 #if wxUSE_FONTMAP
2460
2461 class wxMBConv_wxwin : public wxMBConv
2462 {
2463 private:
2464     void Init()
2465     {
2466         m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2467                w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2468     }
2469
2470 public:
2471     // temporarily just use wxEncodingConverter stuff,
2472     // so that it works while a better implementation is built
2473     wxMBConv_wxwin(const wxChar* name)
2474     {
2475         if (name)
2476             m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2477         else
2478             m_enc = wxFONTENCODING_SYSTEM;
2479
2480         Init();
2481     }
2482
2483     wxMBConv_wxwin(wxFontEncoding enc)
2484     {
2485         m_enc = enc;
2486
2487         Init();
2488     }
2489
2490     size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
2491     {
2492         size_t inbuf = strlen(psz);
2493         if (buf)
2494         {
2495             if (!m2w.Convert(psz,buf))
2496                 return (size_t)-1;
2497         }
2498         return inbuf;
2499     }
2500
2501     size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
2502     {
2503         const size_t inbuf = wxWcslen(psz);
2504         if (buf)
2505         {
2506             if (!w2m.Convert(psz,buf))
2507                 return (size_t)-1;
2508         }
2509
2510         return inbuf;
2511     }
2512
2513     bool IsOk() const { return m_ok; }
2514
2515 public:
2516     wxFontEncoding m_enc;
2517     wxEncodingConverter m2w, w2m;
2518
2519     // were we initialized successfully?
2520     bool m_ok;
2521
2522     DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
2523 };
2524
2525 // make the constructors available for unit testing
2526 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_wxwin( const wxChar* name )
2527 {
2528     wxMBConv_wxwin* result = new wxMBConv_wxwin( name );
2529     if ( !result->IsOk() )
2530     {
2531         delete result;
2532         return 0;
2533     }
2534     return result;
2535 }
2536
2537 #endif // wxUSE_FONTMAP
2538
2539 // ============================================================================
2540 // wxCSConv implementation
2541 // ============================================================================
2542
2543 void wxCSConv::Init()
2544 {
2545     m_name = NULL;
2546     m_convReal =  NULL;
2547     m_deferred = true;
2548 }
2549
2550 wxCSConv::wxCSConv(const wxChar *charset)
2551 {
2552     Init();
2553
2554     if ( charset )
2555     {
2556         SetName(charset);
2557     }
2558
2559 #if wxUSE_FONTMAP
2560     m_encoding = wxFontMapperBase::GetEncodingFromName(charset);
2561 #else
2562     m_encoding = wxFONTENCODING_SYSTEM;
2563 #endif
2564 }
2565
2566 wxCSConv::wxCSConv(wxFontEncoding encoding)
2567 {
2568     if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
2569     {
2570         wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2571
2572         encoding = wxFONTENCODING_SYSTEM;
2573     }
2574
2575     Init();
2576
2577     m_encoding = encoding;
2578 }
2579
2580 wxCSConv::~wxCSConv()
2581 {
2582     Clear();
2583 }
2584
2585 wxCSConv::wxCSConv(const wxCSConv& conv)
2586         : wxMBConv()
2587 {
2588     Init();
2589
2590     SetName(conv.m_name);
2591     m_encoding = conv.m_encoding;
2592 }
2593
2594 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2595 {
2596     Clear();
2597
2598     SetName(conv.m_name);
2599     m_encoding = conv.m_encoding;
2600
2601     return *this;
2602 }
2603
2604 void wxCSConv::Clear()
2605 {
2606     free(m_name);
2607     delete m_convReal;
2608
2609     m_name = NULL;
2610     m_convReal = NULL;
2611 }
2612
2613 void wxCSConv::SetName(const wxChar *charset)
2614 {
2615     if (charset)
2616     {
2617         m_name = wxStrdup(charset);
2618         m_deferred = true;
2619     }
2620 }
2621
2622 #if wxUSE_FONTMAP
2623 #include "wx/hashmap.h"
2624
2625 WX_DECLARE_HASH_MAP( wxFontEncoding, wxString, wxIntegerHash, wxIntegerEqual,
2626                      wxEncodingNameCache );
2627
2628 static wxEncodingNameCache gs_nameCache;
2629 #endif
2630
2631 wxMBConv *wxCSConv::DoCreate() const
2632 {
2633 #if wxUSE_FONTMAP
2634     wxLogTrace(TRACE_STRCONV,
2635                wxT("creating conversion for %s"),
2636                (m_name ? m_name
2637                        : wxFontMapperBase::GetEncodingName(m_encoding).c_str()));
2638 #endif // wxUSE_FONTMAP
2639
2640     // check for the special case of ASCII or ISO8859-1 charset: as we have
2641     // special knowledge of it anyhow, we don't need to create a special
2642     // conversion object
2643     if ( m_encoding == wxFONTENCODING_ISO8859_1 ||
2644             m_encoding == wxFONTENCODING_DEFAULT )
2645     {
2646         // don't convert at all
2647         return NULL;
2648     }
2649
2650     // we trust OS to do conversion better than we can so try external
2651     // conversion methods first
2652     //
2653     // the full order is:
2654     //      1. OS conversion (iconv() under Unix or Win32 API)
2655     //      2. hard coded conversions for UTF
2656     //      3. wxEncodingConverter as fall back
2657
2658     // step (1)
2659 #ifdef HAVE_ICONV
2660 #if !wxUSE_FONTMAP
2661     if ( m_name )
2662 #endif // !wxUSE_FONTMAP
2663     {
2664         wxString name(m_name);
2665         wxFontEncoding encoding(m_encoding);
2666
2667         if ( !name.empty() )
2668         {
2669             wxMBConv_iconv *conv = new wxMBConv_iconv(name);
2670             if ( conv->IsOk() )
2671                 return conv;
2672
2673             delete conv;
2674
2675 #if wxUSE_FONTMAP
2676             encoding =
2677                 wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2678 #endif // wxUSE_FONTMAP
2679         }
2680 #if wxUSE_FONTMAP
2681         {
2682             const wxEncodingNameCache::iterator it = gs_nameCache.find(encoding);
2683             if ( it != gs_nameCache.end() )
2684             {
2685                 if ( it->second.empty() )
2686                     return NULL;
2687
2688                 wxMBConv_iconv *conv = new wxMBConv_iconv(it->second);
2689                 if ( conv->IsOk() )
2690                     return conv;
2691
2692                 delete conv;
2693             }
2694
2695             const wxChar** names = wxFontMapperBase::GetAllEncodingNames(encoding);
2696
2697             for ( ; *names; ++names )
2698             {
2699                 wxMBConv_iconv *conv = new wxMBConv_iconv(*names);
2700                 if ( conv->IsOk() )
2701                 {
2702                     gs_nameCache[encoding] = *names;
2703                     return conv;
2704                 }
2705
2706                 delete conv;
2707             }
2708
2709             gs_nameCache[encoding] = _T(""); // cache the failure
2710         }
2711 #endif // wxUSE_FONTMAP
2712     }
2713 #endif // HAVE_ICONV
2714
2715 #ifdef wxHAVE_WIN32_MB2WC
2716     {
2717 #if wxUSE_FONTMAP
2718         wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2719                                       : new wxMBConv_win32(m_encoding);
2720         if ( conv->IsOk() )
2721             return conv;
2722
2723         delete conv;
2724 #else
2725         return NULL;
2726 #endif
2727     }
2728 #endif // wxHAVE_WIN32_MB2WC
2729 #if defined(__WXMAC__)
2730     {
2731         // leave UTF16 and UTF32 to the built-ins of wx
2732         if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
2733             ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
2734         {
2735
2736 #if wxUSE_FONTMAP
2737             wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2738                                         : new wxMBConv_mac(m_encoding);
2739 #else
2740             wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
2741 #endif
2742             if ( conv->IsOk() )
2743                  return conv;
2744
2745             delete conv;
2746         }
2747     }
2748 #endif
2749 #if defined(__WXCOCOA__)
2750     {
2751         if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2752         {
2753
2754 #if wxUSE_FONTMAP
2755             wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2756                                           : new wxMBConv_cocoa(m_encoding);
2757 #else
2758             wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
2759 #endif
2760             if ( conv->IsOk() )
2761                  return conv;
2762
2763             delete conv;
2764         }
2765     }
2766 #endif
2767     // step (2)
2768     wxFontEncoding enc = m_encoding;
2769 #if wxUSE_FONTMAP
2770     if ( enc == wxFONTENCODING_SYSTEM && m_name )
2771     {
2772         // use "false" to suppress interactive dialogs -- we can be called from
2773         // anywhere and popping up a dialog from here is the last thing we want to
2774         // do
2775         enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
2776     }
2777 #endif // wxUSE_FONTMAP
2778
2779     switch ( enc )
2780     {
2781         case wxFONTENCODING_UTF7:
2782              return new wxMBConvUTF7;
2783
2784         case wxFONTENCODING_UTF8:
2785              return new wxMBConvUTF8;
2786
2787         case wxFONTENCODING_UTF16BE:
2788              return new wxMBConvUTF16BE;
2789
2790         case wxFONTENCODING_UTF16LE:
2791              return new wxMBConvUTF16LE;
2792
2793         case wxFONTENCODING_UTF32BE:
2794              return new wxMBConvUTF32BE;
2795
2796         case wxFONTENCODING_UTF32LE:
2797              return new wxMBConvUTF32LE;
2798
2799         default:
2800              // nothing to do but put here to suppress gcc warnings
2801              ;
2802     }
2803
2804     // step (3)
2805 #if wxUSE_FONTMAP
2806     {
2807         wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2808                                       : new wxMBConv_wxwin(m_encoding);
2809         if ( conv->IsOk() )
2810             return conv;
2811
2812         delete conv;
2813     }
2814 #endif // wxUSE_FONTMAP
2815
2816     // NB: This is a hack to prevent deadlock. What could otherwise happen
2817     //     in Unicode build: wxConvLocal creation ends up being here
2818     //     because of some failure and logs the error. But wxLog will try to
2819     //     attach timestamp, for which it will need wxConvLocal (to convert
2820     //     time to char* and then wchar_t*), but that fails, tries to log
2821     //     error, but wxLog has a (already locked) critical section that
2822     //     guards static buffer.
2823     static bool alreadyLoggingError = false;
2824     if (!alreadyLoggingError)
2825     {
2826         alreadyLoggingError = true;
2827         wxLogError(_("Cannot convert from the charset '%s'!"),
2828                    m_name ? m_name
2829                       :
2830 #if wxUSE_FONTMAP
2831                          wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
2832 #else // !wxUSE_FONTMAP
2833                          wxString::Format(_("encoding %s"), m_encoding).c_str()
2834 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2835               );
2836         alreadyLoggingError = false;
2837     }
2838
2839     return NULL;
2840 }
2841
2842 void wxCSConv::CreateConvIfNeeded() const
2843 {
2844     if ( m_deferred )
2845     {
2846         wxCSConv *self = (wxCSConv *)this; // const_cast
2847
2848 #if wxUSE_INTL
2849         // if we don't have neither the name nor the encoding, use the default
2850         // encoding for this system
2851         if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2852         {
2853             self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
2854         }
2855 #endif // wxUSE_INTL
2856
2857         self->m_convReal = DoCreate();
2858         self->m_deferred = false;
2859     }
2860 }
2861
2862 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2863 {
2864     CreateConvIfNeeded();
2865
2866     if (m_convReal)
2867         return m_convReal->MB2WC(buf, psz, n);
2868
2869     // latin-1 (direct)
2870     size_t len = strlen(psz);
2871
2872     if (buf)
2873     {
2874         for (size_t c = 0; c <= len; c++)
2875             buf[c] = (unsigned char)(psz[c]);
2876     }
2877
2878     return len;
2879 }
2880
2881 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2882 {
2883     CreateConvIfNeeded();
2884
2885     if (m_convReal)
2886         return m_convReal->WC2MB(buf, psz, n);
2887
2888     // latin-1 (direct)
2889     const size_t len = wxWcslen(psz);
2890     if (buf)
2891     {
2892         for (size_t c = 0; c <= len; c++)
2893         {
2894             if (psz[c] > 0xFF)
2895                 return (size_t)-1;
2896             buf[c] = (char)psz[c];
2897         }
2898     }
2899     else
2900     {
2901         for (size_t c = 0; c <= len; c++)
2902         {
2903             if (psz[c] > 0xFF)
2904                 return (size_t)-1;
2905         }
2906     }
2907
2908     return len;
2909 }
2910
2911 // ----------------------------------------------------------------------------
2912 // globals
2913 // ----------------------------------------------------------------------------
2914
2915 #ifdef __WINDOWS__
2916     static wxMBConv_win32 wxConvLibcObj;
2917 #elif defined(__WXMAC__) && !defined(__MACH__)
2918     static wxMBConv_mac wxConvLibcObj ;
2919 #else
2920     static wxMBConvLibc wxConvLibcObj;
2921 #endif
2922
2923 static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
2924 static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
2925 static wxMBConvUTF7 wxConvUTF7Obj;
2926 static wxMBConvUTF8 wxConvUTF8Obj;
2927
2928 WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
2929 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
2930 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
2931 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
2932 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
2933 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
2934 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
2935 #ifdef __WXOSX__
2936                                     wxConvUTF8Obj;
2937 #else
2938                                     wxConvLibcObj;
2939 #endif
2940
2941
2942 #else // !wxUSE_WCHAR_T
2943
2944 // stand-ins in absence of wchar_t
2945 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
2946                                 wxConvISO8859_1,
2947                                 wxConvLocal,
2948                                 wxConvUTF8;
2949
2950 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T