src/common/strconv.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        strconv.cpp
   3 // Purpose:     Unicode conversion classes
   4 // Author:      Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
   5 //              Ryan Norton, Fredrik Roubert (UTF7)
   6 // Modified by:
   7 // Created:     29/01/98
   8 // RCS-ID:      $Id$
   9 // Copyright:   (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
  10 //              (c) 2000-2003 Vadim Zeitlin
  11 //              (c) 2004 Ryan Norton, Fredrik Roubert
  12 // Licence:     wxWindows licence
  13 /////////////////////////////////////////////////////////////////////////////
  14
  15 // ============================================================================
  16 // declarations
  17 // ============================================================================
  18
  19 // ----------------------------------------------------------------------------
  20 // headers
  21 // ----------------------------------------------------------------------------
  22
  23 // For compilers that support precompilation, includes "wx.h".
  24 #include "wx/wxprec.h"
  25
  26 #ifdef __BORLANDC__
  27   #pragma hdrstop
  28 #endif
  29
  30 #ifndef WX_PRECOMP
  31     #include "wx/intl.h"
  32     #include "wx/log.h"
  33 #endif // WX_PRECOMP
  34
  35 #include "wx/strconv.h"
  36
  37 #if wxUSE_WCHAR_T
  38
  39 #ifdef __WINDOWS__
  40     #include "wx/msw/private.h"
  41     #include "wx/msw/missing.h"
  42 #endif
  43
  44 #ifndef __WXWINCE__
  45 #include <errno.h>
  46 #endif
  47
  48 #include <ctype.h>
  49 #include <string.h>
  50 #include <stdlib.h>
  51
  52 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
  53     #define wxHAVE_WIN32_MB2WC
  54 #endif // __WIN32__ but !__WXMICROWIN__
  55
  56 #ifdef __SALFORDC__
  57     #include <clib.h>
  58 #endif
  59
  60 #ifdef HAVE_ICONV
  61     #include <iconv.h>
  62     #include "wx/thread.h"
  63 #endif
  64
  65 #include "wx/encconv.h"
  66 #include "wx/fontmap.h"
  67 #include "wx/utils.h"
  68
  69 #ifdef __WXMAC__
  70 #ifndef __DARWIN__
  71 #include <ATSUnicode.h>
  72 #include <TextCommon.h>
  73 #include <TextEncodingConverter.h>
  74 #endif
  75
  76 #include  "wx/mac/private.h"  // includes mac headers
  77 #endif
  78
  79 #define TRACE_STRCONV _T("strconv")
  80
  81 // ============================================================================
  82 // implementation
  83 // ============================================================================
  84
  85 // ----------------------------------------------------------------------------
  86 // UTF-16 en/decoding to/from UCS-4
  87 // ----------------------------------------------------------------------------
  88
  89
  90 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
  91 {
  92     if (input<=0xffff)
  93     {
  94         if (output)
  95             *output = (wxUint16) input;
  96         return 1;
  97     }
  98     else if (input>=0x110000)
  99     {
 100         return (size_t)-1;
 101     }
 102     else
 103     {
 104         if (output)
 105         {
 106             *output++ = (wxUint16) ((input >> 10)+0xd7c0);
 107             *output = (wxUint16) ((input&0x3ff)+0xdc00);
 108         }
 109         return 2;
 110     }
 111 }
 112
 113 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
 114 {
 115     if ((*input<0xd800) || (*input>0xdfff))
 116     {
 117         output = *input;
 118         return 1;
 119     }
 120     else if ((input[1]<0xdc00) || (input[1]>0xdfff))
 121     {
 122         output = *input;
 123         return (size_t)-1;
 124     }
 125     else
 126     {
 127         output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
 128         return 2;
 129     }
 130 }
 131
 132
 133 // ----------------------------------------------------------------------------
 134 // wxMBConv
 135 // ----------------------------------------------------------------------------
 136
 137 wxMBConv::~wxMBConv()
 138 {
 139     // nothing to do here (necessary for Darwin linking probably)
 140 }
 141
 142 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
 143 {
 144     if ( psz )
 145     {
 146         // calculate the length of the buffer needed first
 147         size_t nLen = MB2WC(NULL, psz, 0);
 148         if ( nLen != (size_t)-1 )
 149         {
 150             // now do the actual conversion
 151             wxWCharBuffer buf(nLen);
 152             nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
 153             if ( nLen != (size_t)-1 )
 154             {
 155                 return buf;
 156             }
 157         }
 158     }
 159
 160     wxWCharBuffer buf((wchar_t *)NULL);
 161
 162     return buf;
 163 }
 164
 165 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
 166 {
 167     if ( pwz )
 168     {
 169         size_t nLen = WC2MB(NULL, pwz, 0);
 170         if ( nLen != (size_t)-1 )
 171         {
 172             wxCharBuffer buf(nLen+3);       // space for a wxUint32 trailing zero
 173             nLen = WC2MB(buf.data(), pwz, nLen + 4);
 174             if ( nLen != (size_t)-1 )
 175             {
 176                 return buf;
 177             }
 178         }
 179     }
 180
 181     wxCharBuffer buf((char *)NULL);
 182
 183     return buf;
 184 }
 185
 186 const wxWCharBuffer wxMBConv::cMB2WC(const char *szString, size_t nStringLen, size_t* pOutSize) const
 187 {
 188     wxASSERT(pOutSize != NULL);
 189
 190     const char* szEnd = szString + nStringLen + 1;
 191     const char* szPos = szString;
 192     const char* szStart = szPos;
 193
 194     size_t nActualLength = 0;
 195     size_t nCurrentSize = nStringLen; //try normal size first (should never resize?)
 196
 197     wxWCharBuffer theBuffer(nCurrentSize);
 198
 199     //Convert the string until the length() is reached, continuing the
 200     //loop every time a null character is reached
 201     while(szPos != szEnd)
 202     {
 203         wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
 204
 205         //Get the length of the current (sub)string
 206         size_t nLen = MB2WC(NULL, szPos, 0);
 207
 208         //Invalid conversion?
 209         if( nLen == (size_t)-1 )
 210         {
 211             *pOutSize = 0;
 212             theBuffer.data()[0u] = wxT('\0');
 213             return theBuffer;
 214         }
 215
 216
 217         //Increase the actual length (+1 for current null character)
 218         nActualLength += nLen + 1;
 219
 220         //if buffer too big, realloc the buffer
 221         if (nActualLength > (nCurrentSize+1))
 222         {
 223             wxWCharBuffer theNewBuffer(nCurrentSize << 1);
 224             memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize * sizeof(wchar_t));
 225             theBuffer = theNewBuffer;
 226             nCurrentSize <<= 1;
 227         }
 228
 229         //Convert the current (sub)string
 230         if ( MB2WC(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
 231         {
 232             *pOutSize = 0;
 233             theBuffer.data()[0u] = wxT('\0');
 234             return theBuffer;
 235         }
 236
 237         //Increment to next (sub)string
 238         //Note that we have to use strlen instead of nLen here
 239         //because XX2XX gives us the size of the output buffer,
 240         //which is not necessarily the length of the string
 241         szPos += strlen(szPos) + 1;
 242     }
 243
 244     //success - return actual length and the buffer
 245     *pOutSize = nActualLength;
 246     return theBuffer;
 247 }
 248
 249 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *szString, size_t nStringLen, size_t* pOutSize) const
 250 {
 251     wxASSERT(pOutSize != NULL);
 252
 253     const wchar_t* szEnd = szString + nStringLen + 1;
 254     const wchar_t* szPos = szString;
 255     const wchar_t* szStart = szPos;
 256
 257     size_t nActualLength = 0;
 258     size_t nCurrentSize = nStringLen << 2; //try * 4 first
 259
 260     wxCharBuffer theBuffer(nCurrentSize);
 261
 262     //Convert the string until the length() is reached, continuing the
 263     //loop every time a null character is reached
 264     while(szPos != szEnd)
 265     {
 266         wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
 267
 268         //Get the length of the current (sub)string
 269         size_t nLen = WC2MB(NULL, szPos, 0);
 270
 271         //Invalid conversion?
 272         if( nLen == (size_t)-1 )
 273         {
 274             *pOutSize = 0;
 275             theBuffer.data()[0u] = wxT('\0');
 276             return theBuffer;
 277         }
 278
 279         //Increase the actual length (+1 for current null character)
 280         nActualLength += nLen + 1;
 281
 282         //if buffer too big, realloc the buffer
 283         if (nActualLength > (nCurrentSize+1))
 284         {
 285             wxCharBuffer theNewBuffer(nCurrentSize << 1);
 286             memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize);
 287             theBuffer = theNewBuffer;
 288             nCurrentSize <<= 1;
 289         }
 290
 291         //Convert the current (sub)string
 292         if(WC2MB(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
 293         {
 294             *pOutSize = 0;
 295             theBuffer.data()[0u] = wxT('\0');
 296             return theBuffer;
 297         }
 298
 299         //Increment to next (sub)string
 300         //Note that we have to use wxWcslen instead of nLen here
 301         //because XX2XX gives us the size of the output buffer,
 302         //which is not necessarily the length of the string
 303         szPos += wxWcslen(szPos) + 1;
 304     }
 305
 306     //success - return actual length and the buffer
 307     *pOutSize = nActualLength;
 308     return theBuffer;
 309 }
 310
 311 // ----------------------------------------------------------------------------
 312 // wxMBConvLibc
 313 // ----------------------------------------------------------------------------
 314
 315 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 316 {
 317     return wxMB2WC(buf, psz, n);
 318 }
 319
 320 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 321 {
 322     return wxWC2MB(buf, psz, n);
 323 }
 324
 325 #ifdef __UNIX__
 326
 327 // ----------------------------------------------------------------------------
 328 // wxConvBrokenFileNames
 329 // ----------------------------------------------------------------------------
 330
 331 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar *charset)
 332 {
 333     if ( !charset || wxStricmp(charset, _T("UTF-8")) == 0
 334                   || wxStricmp(charset, _T("UTF8")) == 0  )
 335         m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
 336     else
 337         m_conv = new wxCSConv(charset);
 338 }
 339
 340 size_t
 341 wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf,
 342                              const char *psz,
 343                              size_t outputSize) const
 344 {
 345     return m_conv->MB2WC( outputBuf, psz, outputSize );
 346 }
 347
 348 size_t
 349 wxConvBrokenFileNames::WC2MB(char *outputBuf,
 350                              const wchar_t *psz,
 351                              size_t outputSize) const
 352 {
 353     return m_conv->WC2MB( outputBuf, psz, outputSize );
 354 }
 355
 356 #endif
 357
 358 // ----------------------------------------------------------------------------
 359 // UTF-7
 360 // ----------------------------------------------------------------------------
 361
 362 // Implementation (C) 2004 Fredrik Roubert
 363
 364 //
 365 // BASE64 decoding table
 366 //
 367 static const unsigned char utf7unb64[] =
 368 {
 369     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 370     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 371     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 372     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 373     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 374     0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
 375     0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
 376     0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 377     0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
 378     0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
 379     0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
 380     0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
 381     0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
 382     0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
 383     0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
 384     0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
 385     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 386     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 387     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 388     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 389     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 390     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 391     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 392     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 393     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 394     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 395     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 396     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 397     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 398     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 399     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 400     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 401 };
 402
 403 size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 404 {
 405     size_t len = 0;
 406
 407     while (*psz && ((!buf) || (len < n)))
 408     {
 409         unsigned char cc = *psz++;
 410         if (cc != '+')
 411         {
 412             // plain ASCII char
 413             if (buf)
 414                 *buf++ = cc;
 415             len++;
 416         }
 417         else if (*psz == '-')
 418         {
 419             // encoded plus sign
 420             if (buf)
 421                 *buf++ = cc;
 422             len++;
 423             psz++;
 424         }
 425         else
 426         {
 427             // BASE64 encoded string
 428             bool lsb;
 429             unsigned char c;
 430             unsigned int d, l;
 431             for (lsb = false, d = 0, l = 0;
 432                 (cc = utf7unb64[(unsigned char)*psz]) != 0xff; psz++)
 433             {
 434                 d <<= 6;
 435                 d += cc;
 436                 for (l += 6; l >= 8; lsb = !lsb)
 437                 {
 438                     c = (unsigned char)((d >> (l -= 8)) % 256);
 439                     if (lsb)
 440                     {
 441                         if (buf)
 442                             *buf++ |= c;
 443                         len ++;
 444                     }
 445                     else
 446                         if (buf)
 447                             *buf = (wchar_t)(c << 8);
 448                 }
 449             }
 450             if (*psz == '-')
 451                 psz++;
 452         }
 453     }
 454     if (buf && (len < n))
 455         *buf = 0;
 456     return len;
 457 }
 458
 459 //
 460 // BASE64 encoding table
 461 //
 462 static const unsigned char utf7enb64[] =
 463 {
 464     'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
 465     'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
 466     'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
 467     'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
 468     'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
 469     'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
 470     'w', 'x', 'y', 'z', '0', '1', '2', '3',
 471     '4', '5', '6', '7', '8', '9', '+', '/'
 472 };
 473
 474 //
 475 // UTF-7 encoding table
 476 //
 477 // 0 - Set D (directly encoded characters)
 478 // 1 - Set O (optional direct characters)
 479 // 2 - whitespace characters (optional)
 480 // 3 - special characters
 481 //
 482 static const unsigned char utf7encode[128] =
 483 {
 484     3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
 485     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 486     2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
 487     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
 488     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 489     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
 490     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 491     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
 492 };
 493
 494 size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 495 {
 496
 497
 498     size_t len = 0;
 499
 500     while (*psz && ((!buf) || (len < n)))
 501     {
 502         wchar_t cc = *psz++;
 503         if (cc < 0x80 && utf7encode[cc] < 1)
 504         {
 505             // plain ASCII char
 506             if (buf)
 507                 *buf++ = (char)cc;
 508             len++;
 509         }
 510 #ifndef WC_UTF16
 511         else if (((wxUint32)cc) > 0xffff)
 512         {
 513             // no surrogate pair generation (yet?)
 514             return (size_t)-1;
 515         }
 516 #endif
 517         else
 518         {
 519             if (buf)
 520                 *buf++ = '+';
 521             len++;
 522             if (cc != '+')
 523             {
 524                 // BASE64 encode string
 525                 unsigned int lsb, d, l;
 526                 for (d = 0, l = 0;; psz++)
 527                 {
 528                     for (lsb = 0; lsb < 2; lsb ++)
 529                     {
 530                         d <<= 8;
 531                         d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
 532
 533                         for (l += 8; l >= 6; )
 534                         {
 535                             l -= 6;
 536                             if (buf)
 537                                 *buf++ = utf7enb64[(d >> l) % 64];
 538                             len++;
 539                         }
 540                     }
 541                     cc = *psz;
 542                     if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
 543                         break;
 544                 }
 545                 if (l != 0)
 546                 {
 547                     if (buf)
 548                         *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
 549                     len++;
 550                 }
 551             }
 552             if (buf)
 553                 *buf++ = '-';
 554             len++;
 555         }
 556     }
 557     if (buf && (len < n))
 558         *buf = 0;
 559     return len;
 560 }
 561
 562 // ----------------------------------------------------------------------------
 563 // UTF-8
 564 // ----------------------------------------------------------------------------
 565
 566 static wxUint32 utf8_max[]=
 567     { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
 568
 569 // boundaries of the private use area we use to (temporarily) remap invalid
 570 // characters invalid in a UTF-8 encoded string
 571 const wxUint32 wxUnicodePUA = 0x100000;
 572 const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
 573
 574 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 575 {
 576     size_t len = 0;
 577
 578     while (*psz && ((!buf) || (len < n)))
 579     {
 580         const char *opsz = psz;
 581         bool invalid = false;
 582         unsigned char cc = *psz++, fc = cc;
 583         unsigned cnt;
 584         for (cnt = 0; fc & 0x80; cnt++)
 585             fc <<= 1;
 586         if (!cnt)
 587         {
 588             // plain ASCII char
 589             if (buf)
 590                 *buf++ = cc;
 591             len++;
 592
 593             // escape the escape character for octal escapes
 594             if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
 595                     && cc == '\\' && (!buf || len < n))
 596             {
 597                 if (buf)
 598                     *buf++ = cc;
 599                 len++;
 600             }
 601         }
 602         else
 603         {
 604             cnt--;
 605             if (!cnt)
 606             {
 607                 // invalid UTF-8 sequence
 608                 invalid = true;
 609             }
 610             else
 611             {
 612                 unsigned ocnt = cnt - 1;
 613                 wxUint32 res = cc & (0x3f >> cnt);
 614                 while (cnt--)
 615                 {
 616                     cc = *psz;
 617                     if ((cc & 0xC0) != 0x80)
 618                     {
 619                         // invalid UTF-8 sequence
 620                         invalid = true;
 621                         break;
 622                     }
 623                     psz++;
 624                     res = (res << 6) | (cc & 0x3f);
 625                 }
 626                 if (invalid || res <= utf8_max[ocnt])
 627                 {
 628                     // illegal UTF-8 encoding
 629                     invalid = true;
 630                 }
 631                 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
 632                         res >= wxUnicodePUA && res < wxUnicodePUAEnd)
 633                 {
 634                     // if one of our PUA characters turns up externally
 635                     // it must also be treated as an illegal sequence
 636                     // (a bit like you have to escape an escape character)
 637                     invalid = true;
 638                 }
 639                 else
 640                 {
 641 #ifdef WC_UTF16
 642                     // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 643                     size_t pa = encode_utf16(res, (wxUint16 *)buf);
 644                     if (pa == (size_t)-1)
 645                     {
 646                         invalid = true;
 647                     }
 648                     else
 649                     {
 650                         if (buf)
 651                             buf += pa;
 652                         len += pa;
 653                     }
 654 #else // !WC_UTF16
 655                     if (buf)
 656                         *buf++ = res;
 657                     len++;
 658 #endif // WC_UTF16/!WC_UTF16
 659                 }
 660             }
 661             if (invalid)
 662             {
 663                 if (m_options & MAP_INVALID_UTF8_TO_PUA)
 664                 {
 665                     while (opsz < psz && (!buf || len < n))
 666                     {
 667 #ifdef WC_UTF16
 668                         // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 669                         size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
 670                         wxASSERT(pa != (size_t)-1);
 671                         if (buf)
 672                             buf += pa;
 673                         opsz++;
 674                         len += pa;
 675 #else
 676                         if (buf)
 677                             *buf++ = wxUnicodePUA + (unsigned char)*opsz;
 678                         opsz++;
 679                         len++;
 680 #endif
 681                     }
 682                 }
 683                 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
 684                 {
 685                     while (opsz < psz && (!buf || len < n))
 686                     {
 687                         if ( buf && len + 3 < n )
 688                         {
 689                             unsigned char n = *opsz;
 690                             *buf++ = L'\\';
 691                             *buf++ = (wchar_t)( L'0' + n / 0100 );
 692                             *buf++ = (wchar_t)( L'0' + (n % 0100) / 010 );
 693                             *buf++ = (wchar_t)( L'0' + n % 010 );
 694                         }
 695                         opsz++;
 696                         len += 4;
 697                     }
 698                 }
 699                 else // MAP_INVALID_UTF8_NOT
 700                 {
 701                     return (size_t)-1;
 702                 }
 703             }
 704         }
 705     }
 706     if (buf && (len < n))
 707         *buf = 0;
 708     return len;
 709 }
 710
 711 static inline bool isoctal(wchar_t wch)
 712 {
 713     return L'0' <= wch && wch <= L'7';
 714 }
 715
 716 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 717 {
 718     size_t len = 0;
 719
 720     while (*psz && ((!buf) || (len < n)))
 721     {
 722         wxUint32 cc;
 723 #ifdef WC_UTF16
 724         // cast is ok for WC_UTF16
 725         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
 726         psz += (pa == (size_t)-1) ? 1 : pa;
 727 #else
 728         cc=(*psz++) & 0x7fffffff;
 729 #endif
 730
 731         if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
 732                 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
 733         {
 734             if (buf)
 735                 *buf++ = (char)(cc - wxUnicodePUA);
 736             len++;
 737         }
 738         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
 739                     && cc == L'\\' && psz[0] == L'\\' )
 740         {
 741             if (buf)
 742                 *buf++ = (char)cc;
 743             psz++;
 744             len++;
 745         }
 746         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
 747                     cc == L'\\' &&
 748                         isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
 749         {
 750             if (buf)
 751             {
 752                 *buf++ = (char) ((psz[0] - L'0')*0100 +
 753                                  (psz[1] - L'0')*010 +
 754                                  (psz[2] - L'0'));
 755             }
 756
 757             psz += 3;
 758             len++;
 759         }
 760         else
 761         {
 762             unsigned cnt;
 763             for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
 764             if (!cnt)
 765             {
 766                 // plain ASCII char
 767                 if (buf)
 768                     *buf++ = (char) cc;
 769                 len++;
 770             }
 771
 772             else
 773             {
 774                 len += cnt + 1;
 775                 if (buf)
 776                 {
 777                     *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
 778                     while (cnt--)
 779                         *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
 780                 }
 781             }
 782         }
 783     }
 784
 785     if (buf && (len<n))
 786         *buf = 0;
 787
 788     return len;
 789 }
 790
 791 // ----------------------------------------------------------------------------
 792 // UTF-16
 793 // ----------------------------------------------------------------------------
 794
 795 #ifdef WORDS_BIGENDIAN
 796     #define wxMBConvUTF16straight wxMBConvUTF16BE
 797     #define wxMBConvUTF16swap     wxMBConvUTF16LE
 798 #else
 799     #define wxMBConvUTF16swap     wxMBConvUTF16BE
 800     #define wxMBConvUTF16straight wxMBConvUTF16LE
 801 #endif
 802
 803
 804 #ifdef WC_UTF16
 805
 806 // copy 16bit MB to 16bit String
 807 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 808 {
 809     size_t len=0;
 810
 811     while (*(wxUint16*)psz && (!buf || len < n))
 812     {
 813         if (buf)
 814             *buf++ = *(wxUint16*)psz;
 815         len++;
 816
 817         psz += sizeof(wxUint16);
 818     }
 819     if (buf && len<n)   *buf=0;
 820
 821     return len;
 822 }
 823
 824
 825 // copy 16bit String to 16bit MB
 826 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 827 {
 828     size_t len=0;
 829
 830     while (*psz && (!buf || len < n))
 831     {
 832         if (buf)
 833         {
 834             *(wxUint16*)buf = *psz;
 835             buf += sizeof(wxUint16);
 836         }
 837         len += sizeof(wxUint16);
 838         psz++;
 839     }
 840     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 841
 842     return len;
 843 }
 844
 845
 846 // swap 16bit MB to 16bit String
 847 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 848 {
 849     size_t len=0;
 850
 851     while (*(wxUint16*)psz && (!buf || len < n))
 852     {
 853         if (buf)
 854         {
 855             ((char *)buf)[0] = psz[1];
 856             ((char *)buf)[1] = psz[0];
 857             buf++;
 858         }
 859         len++;
 860         psz += sizeof(wxUint16);
 861     }
 862     if (buf && len<n)   *buf=0;
 863
 864     return len;
 865 }
 866
 867
 868 // swap 16bit MB to 16bit String
 869 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 870 {
 871     size_t len=0;
 872
 873     while (*psz && (!buf || len < n))
 874     {
 875         if (buf)
 876         {
 877             *buf++ = ((char*)psz)[1];
 878             *buf++ = ((char*)psz)[0];
 879         }
 880         len += sizeof(wxUint16);
 881         psz++;
 882     }
 883     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 884
 885     return len;
 886 }
 887
 888
 889 #else // WC_UTF16
 890
 891
 892 // copy 16bit MB to 32bit String
 893 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 894 {
 895     size_t len=0;
 896
 897     while (*(wxUint16*)psz && (!buf || len < n))
 898     {
 899         wxUint32 cc;
 900         size_t pa=decode_utf16((wxUint16*)psz, cc);
 901         if (pa == (size_t)-1)
 902             return pa;
 903
 904         if (buf)
 905             *buf++ = cc;
 906         len++;
 907         psz += pa * sizeof(wxUint16);
 908     }
 909     if (buf && len<n)   *buf=0;
 910
 911     return len;
 912 }
 913
 914
 915 // copy 32bit String to 16bit MB
 916 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 917 {
 918     size_t len=0;
 919
 920     while (*psz && (!buf || len < n))
 921     {
 922         wxUint16 cc[2];
 923         size_t pa=encode_utf16(*psz, cc);
 924
 925         if (pa == (size_t)-1)
 926             return pa;
 927
 928         if (buf)
 929         {
 930             *(wxUint16*)buf = cc[0];
 931             buf += sizeof(wxUint16);
 932             if (pa > 1)
 933             {
 934                 *(wxUint16*)buf = cc[1];
 935                 buf += sizeof(wxUint16);
 936             }
 937         }
 938
 939         len += pa*sizeof(wxUint16);
 940         psz++;
 941     }
 942     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 943
 944     return len;
 945 }
 946
 947
 948 // swap 16bit MB to 32bit String
 949 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 950 {
 951     size_t len=0;
 952
 953     while (*(wxUint16*)psz && (!buf || len < n))
 954     {
 955         wxUint32 cc;
 956         char tmp[4];
 957         tmp[0]=psz[1];  tmp[1]=psz[0];
 958         tmp[2]=psz[3];  tmp[3]=psz[2];
 959
 960         size_t pa=decode_utf16((wxUint16*)tmp, cc);
 961         if (pa == (size_t)-1)
 962             return pa;
 963
 964         if (buf)
 965             *buf++ = cc;
 966
 967         len++;
 968         psz += pa * sizeof(wxUint16);
 969     }
 970     if (buf && len<n)   *buf=0;
 971
 972     return len;
 973 }
 974
 975
 976 // swap 32bit String to 16bit MB
 977 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 978 {
 979     size_t len=0;
 980
 981     while (*psz && (!buf || len < n))
 982     {
 983         wxUint16 cc[2];
 984         size_t pa=encode_utf16(*psz, cc);
 985
 986         if (pa == (size_t)-1)
 987             return pa;
 988
 989         if (buf)
 990         {
 991             *buf++ = ((char*)cc)[1];
 992             *buf++ = ((char*)cc)[0];
 993             if (pa > 1)
 994             {
 995                 *buf++ = ((char*)cc)[3];
 996                 *buf++ = ((char*)cc)[2];
 997             }
 998         }
 999
1000         len += pa*sizeof(wxUint16);
1001         psz++;
1002     }
1003     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
1004
1005     return len;
1006 }
1007
1008 #endif // WC_UTF16
1009
1010
1011 // ----------------------------------------------------------------------------
1012 // UTF-32
1013 // ----------------------------------------------------------------------------
1014
1015 #ifdef WORDS_BIGENDIAN
1016 #define wxMBConvUTF32straight  wxMBConvUTF32BE
1017 #define wxMBConvUTF32swap      wxMBConvUTF32LE
1018 #else
1019 #define wxMBConvUTF32swap      wxMBConvUTF32BE
1020 #define wxMBConvUTF32straight  wxMBConvUTF32LE
1021 #endif
1022
1023
1024 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1025 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1026
1027
1028 #ifdef WC_UTF16
1029
1030 // copy 32bit MB to 16bit String
1031 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1032 {
1033     size_t len=0;
1034
1035     while (*(wxUint32*)psz && (!buf || len < n))
1036     {
1037         wxUint16 cc[2];
1038
1039         size_t pa=encode_utf16(*(wxUint32*)psz, cc);
1040         if (pa == (size_t)-1)
1041             return pa;
1042
1043         if (buf)
1044         {
1045             *buf++ = cc[0];
1046             if (pa > 1)
1047                 *buf++ = cc[1];
1048         }
1049         len += pa;
1050         psz += sizeof(wxUint32);
1051     }
1052     if (buf && len<n)   *buf=0;
1053
1054     return len;
1055 }
1056
1057
1058 // copy 16bit String to 32bit MB
1059 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1060 {
1061     size_t len=0;
1062
1063     while (*psz && (!buf || len < n))
1064     {
1065         wxUint32 cc;
1066
1067         // cast is ok for WC_UTF16
1068         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
1069         if (pa == (size_t)-1)
1070             return pa;
1071
1072         if (buf)
1073         {
1074             *(wxUint32*)buf = cc;
1075             buf += sizeof(wxUint32);
1076         }
1077         len += sizeof(wxUint32);
1078         psz += pa;
1079     }
1080
1081     if (buf && len<=n-sizeof(wxUint32))
1082         *(wxUint32*)buf=0;
1083
1084     return len;
1085 }
1086
1087
1088
1089 // swap 32bit MB to 16bit String
1090 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1091 {
1092     size_t len=0;
1093
1094     while (*(wxUint32*)psz && (!buf || len < n))
1095     {
1096         char tmp[4];
1097         tmp[0] = psz[3];   tmp[1] = psz[2];
1098         tmp[2] = psz[1];   tmp[3] = psz[0];
1099
1100
1101         wxUint16 cc[2];
1102
1103         size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
1104         if (pa == (size_t)-1)
1105             return pa;
1106
1107         if (buf)
1108         {
1109             *buf++ = cc[0];
1110             if (pa > 1)
1111                 *buf++ = cc[1];
1112         }
1113         len += pa;
1114         psz += sizeof(wxUint32);
1115     }
1116
1117     if (buf && len<n)
1118         *buf=0;
1119
1120     return len;
1121 }
1122
1123
1124 // swap 16bit String to 32bit MB
1125 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1126 {
1127     size_t len=0;
1128
1129     while (*psz && (!buf || len < n))
1130     {
1131         char cc[4];
1132
1133         // cast is ok for WC_UTF16
1134         size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
1135         if (pa == (size_t)-1)
1136             return pa;
1137
1138         if (buf)
1139         {
1140             *buf++ = cc[3];
1141             *buf++ = cc[2];
1142             *buf++ = cc[1];
1143             *buf++ = cc[0];
1144         }
1145         len += sizeof(wxUint32);
1146         psz += pa;
1147     }
1148
1149     if (buf && len<=n-sizeof(wxUint32))
1150         *(wxUint32*)buf=0;
1151
1152     return len;
1153 }
1154
1155 #else // WC_UTF16
1156
1157
1158 // copy 32bit MB to 32bit String
1159 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1160 {
1161     size_t len=0;
1162
1163     while (*(wxUint32*)psz && (!buf || len < n))
1164     {
1165         if (buf)
1166             *buf++ = *(wxUint32*)psz;
1167         len++;
1168         psz += sizeof(wxUint32);
1169     }
1170
1171     if (buf && len<n)
1172         *buf=0;
1173
1174     return len;
1175 }
1176
1177
1178 // copy 32bit String to 32bit MB
1179 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1180 {
1181     size_t len=0;
1182
1183     while (*psz && (!buf || len < n))
1184     {
1185         if (buf)
1186         {
1187             *(wxUint32*)buf = *psz;
1188             buf += sizeof(wxUint32);
1189         }
1190
1191         len += sizeof(wxUint32);
1192         psz++;
1193     }
1194
1195     if (buf && len<=n-sizeof(wxUint32))
1196         *(wxUint32*)buf=0;
1197
1198     return len;
1199 }
1200
1201
1202 // swap 32bit MB to 32bit String
1203 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1204 {
1205     size_t len=0;
1206
1207     while (*(wxUint32*)psz && (!buf || len < n))
1208     {
1209         if (buf)
1210         {
1211             ((char *)buf)[0] = psz[3];
1212             ((char *)buf)[1] = psz[2];
1213             ((char *)buf)[2] = psz[1];
1214             ((char *)buf)[3] = psz[0];
1215             buf++;
1216         }
1217         len++;
1218         psz += sizeof(wxUint32);
1219     }
1220
1221     if (buf && len<n)
1222         *buf=0;
1223
1224     return len;
1225 }
1226
1227
1228 // swap 32bit String to 32bit MB
1229 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1230 {
1231     size_t len=0;
1232
1233     while (*psz && (!buf || len < n))
1234     {
1235         if (buf)
1236         {
1237             *buf++ = ((char *)psz)[3];
1238             *buf++ = ((char *)psz)[2];
1239             *buf++ = ((char *)psz)[1];
1240             *buf++ = ((char *)psz)[0];
1241         }
1242         len += sizeof(wxUint32);
1243         psz++;
1244     }
1245
1246     if (buf && len<=n-sizeof(wxUint32))
1247         *(wxUint32*)buf=0;
1248
1249     return len;
1250 }
1251
1252
1253 #endif // WC_UTF16
1254
1255
1256 // ============================================================================
1257 // The classes doing conversion using the iconv_xxx() functions
1258 // ============================================================================
1259
1260 #ifdef HAVE_ICONV
1261
1262 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1263 //     E2BIG if output buffer is _exactly_ as big as needed. Such case is
1264 //     (unless there's yet another bug in glibc) the only case when iconv()
1265 //     returns with (size_t)-1 (which means error) and says there are 0 bytes
1266 //     left in the input buffer -- when _real_ error occurs,
1267 //     bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1268 //     iconv() failure.
1269 //     [This bug does not appear in glibc 2.2.]
1270 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1271 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1272                                      (errno != E2BIG || bufLeft != 0))
1273 #else
1274 #define ICONV_FAILED(cres, bufLeft)  (cres == (size_t)-1)
1275 #endif
1276
1277 #define ICONV_CHAR_CAST(x)  ((ICONV_CONST char **)(x))
1278
1279 #define ICONV_T_INVALID ((iconv_t)-1)
1280
1281 #if SIZEOF_WCHAR_T == 4
1282     #define WC_BSWAP    wxUINT32_SWAP_ALWAYS
1283     #define WC_ENC      wxFONTENCODING_UTF32
1284 #elif SIZEOF_WCHAR_T == 2
1285     #define WC_BSWAP    wxUINT16_SWAP_ALWAYS
1286     #define WC_ENC      wxFONTENCODING_UTF16
1287 #else // sizeof(wchar_t) != 2 nor 4
1288     // does this ever happen?
1289     #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1290 #endif
1291
1292 // ----------------------------------------------------------------------------
1293 // wxMBConv_iconv: encapsulates an iconv character set
1294 // ----------------------------------------------------------------------------
1295
1296 class wxMBConv_iconv : public wxMBConv
1297 {
1298 public:
1299     wxMBConv_iconv(const wxChar *name);
1300     virtual ~wxMBConv_iconv();
1301
1302     virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1303     virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
1304
1305     bool IsOk() const
1306         { return (m2w != ICONV_T_INVALID) && (w2m != ICONV_T_INVALID); }
1307
1308 protected:
1309     // the iconv handlers used to translate from multibyte to wide char and in
1310     // the other direction
1311     iconv_t m2w,
1312             w2m;
1313 #if wxUSE_THREADS
1314     // guards access to m2w and w2m objects
1315     wxMutex m_iconvMutex;
1316 #endif
1317
1318 private:
1319     // the name (for iconv_open()) of a wide char charset -- if none is
1320     // available on this machine, it will remain NULL
1321     static wxString ms_wcCharsetName;
1322
1323     // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1324     // different endian-ness than the native one
1325     static bool ms_wcNeedsSwap;
1326 };
1327
1328 // make the constructor available for unit testing
1329 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const wxChar* name )
1330 {
1331     wxMBConv_iconv* result = new wxMBConv_iconv( name );
1332     if ( !result->IsOk() )
1333     {
1334         delete result;
1335         return 0;
1336     }
1337     return result;
1338 }
1339
1340 wxString wxMBConv_iconv::ms_wcCharsetName;
1341 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
1342
1343 wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
1344 {
1345     // iconv operates with chars, not wxChars, but luckily it uses only ASCII
1346     // names for the charsets
1347     const wxCharBuffer cname(wxString(name).ToAscii());
1348
1349     // check for charset that represents wchar_t:
1350     if ( ms_wcCharsetName.empty() )
1351     {
1352 #if wxUSE_FONTMAP
1353         const wxChar **names = wxFontMapperBase::GetAllEncodingNames(WC_ENC);
1354 #else // !wxUSE_FONTMAP
1355         static const wxChar *names[] =
1356         {
1357 #if SIZEOF_WCHAR_T == 4
1358             _T("UCS-4"),
1359 #elif SIZEOF_WCHAR_T = 2
1360             _T("UCS-2"),
1361 #endif
1362             NULL
1363         };
1364 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1365
1366         for ( ; *names; ++names )
1367         {
1368             const wxString name(*names);
1369
1370             // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
1371             wxString nameXE(name);
1372             #ifdef WORDS_BIGENDIAN
1373                 nameXE += _T("BE");
1374             #else // little endian
1375                 nameXE += _T("LE");
1376             #endif
1377
1378             m2w = iconv_open(nameXE.ToAscii(), cname);
1379             if ( m2w == ICONV_T_INVALID )
1380             {
1381                 // try charset w/o bytesex info (e.g. "UCS4")
1382                 m2w = iconv_open(name.ToAscii(), cname);
1383
1384                 // and check for bytesex ourselves:
1385                 if ( m2w != ICONV_T_INVALID )
1386                 {
1387                     char    buf[2], *bufPtr;
1388                     wchar_t wbuf[2], *wbufPtr;
1389                     size_t  insz, outsz;
1390                     size_t  res;
1391
1392                     buf[0] = 'A';
1393                     buf[1] = 0;
1394                     wbuf[0] = 0;
1395                     insz = 2;
1396                     outsz = SIZEOF_WCHAR_T * 2;
1397                     wbufPtr = wbuf;
1398                     bufPtr = buf;
1399
1400                     res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1401                                 (char**)&wbufPtr, &outsz);
1402
1403                     if (ICONV_FAILED(res, insz))
1404                     {
1405                         wxLogLastError(wxT("iconv"));
1406                         wxLogError(_("Conversion to charset '%s' doesn't work."),
1407                                    name.c_str());
1408                     }
1409                     else // ok, can convert to this encoding, remember it
1410                     {
1411                         ms_wcCharsetName = name;
1412                         ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1413                     }
1414                 }
1415             }
1416             else // use charset not requiring byte swapping
1417             {
1418                 ms_wcCharsetName = nameXE;
1419             }
1420         }
1421
1422         wxLogTrace(TRACE_STRCONV,
1423                    wxT("iconv wchar_t charset is \"%s\"%s"),
1424                    ms_wcCharsetName.empty() ? _T("<none>")
1425                                             : ms_wcCharsetName.c_str(),
1426                    ms_wcNeedsSwap ? _T(" (needs swap)")
1427                                   : _T(""));
1428     }
1429     else // we already have ms_wcCharsetName
1430     {
1431         m2w = iconv_open(ms_wcCharsetName.ToAscii(), cname);
1432     }
1433
1434     if ( ms_wcCharsetName.empty() )
1435     {
1436         w2m = ICONV_T_INVALID;
1437     }
1438     else
1439     {
1440         w2m = iconv_open(cname, ms_wcCharsetName.ToAscii());
1441         if ( w2m == ICONV_T_INVALID )
1442         {
1443             wxLogTrace(TRACE_STRCONV,
1444                        wxT("\"%s\" -> \"%s\" works but not the converse!?"),
1445                        ms_wcCharsetName.c_str(), cname.data());
1446         }
1447     }
1448 }
1449
1450 wxMBConv_iconv::~wxMBConv_iconv()
1451 {
1452     if ( m2w != ICONV_T_INVALID )
1453         iconv_close(m2w);
1454     if ( w2m != ICONV_T_INVALID )
1455         iconv_close(w2m);
1456 }
1457
1458 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1459 {
1460 #if wxUSE_THREADS
1461     // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1462     //     Unfortunately there is a couple of global wxCSConv objects such as
1463     //     wxConvLocal that are used all over wx code, so we have to make sure
1464     //     the handle is used by at most one thread at the time. Otherwise
1465     //     only a few wx classes would be safe to use from non-main threads
1466     //     as MB<->WC conversion would fail "randomly".
1467     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1468 #endif
1469
1470     size_t inbuf = strlen(psz);
1471     size_t outbuf = n * SIZEOF_WCHAR_T;
1472     size_t res, cres;
1473     // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1474     wchar_t *bufPtr = buf;
1475     const char *pszPtr = psz;
1476
1477     if (buf)
1478     {
1479         // have destination buffer, convert there
1480         cres = iconv(m2w,
1481                      ICONV_CHAR_CAST(&pszPtr), &inbuf,
1482                      (char**)&bufPtr, &outbuf);
1483         res = n - (outbuf / SIZEOF_WCHAR_T);
1484
1485         if (ms_wcNeedsSwap)
1486         {
1487             // convert to native endianness
1488             for ( unsigned n = 0; n < res; n++ )
1489                 buf[n] = WC_BSWAP(buf[n]);
1490         }
1491
1492         // NB: iconv was given only strlen(psz) characters on input, and so
1493         //     it couldn't convert the trailing zero. Let's do it ourselves
1494         //     if there's some room left for it in the output buffer.
1495         if (res < n)
1496             buf[res] = 0;
1497     }
1498     else
1499     {
1500         // no destination buffer... convert using temp buffer
1501         // to calculate destination buffer requirement
1502         wchar_t tbuf[8];
1503         res = 0;
1504         do {
1505             bufPtr = tbuf;
1506             outbuf = 8*SIZEOF_WCHAR_T;
1507
1508             cres = iconv(m2w,
1509                          ICONV_CHAR_CAST(&pszPtr), &inbuf,
1510                          (char**)&bufPtr, &outbuf );
1511
1512             res += 8-(outbuf/SIZEOF_WCHAR_T);
1513         } while ((cres==(size_t)-1) && (errno==E2BIG));
1514     }
1515
1516     if (ICONV_FAILED(cres, inbuf))
1517     {
1518         //VS: it is ok if iconv fails, hence trace only
1519         wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1520         return (size_t)-1;
1521     }
1522
1523     return res;
1524 }
1525
1526 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1527 {
1528 #if wxUSE_THREADS
1529     // NB: explained in MB2WC
1530     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1531 #endif
1532
1533     size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
1534     size_t outbuf = n;
1535     size_t res, cres;
1536
1537     wchar_t *tmpbuf = 0;
1538
1539     if (ms_wcNeedsSwap)
1540     {
1541         // need to copy to temp buffer to switch endianness
1542         // (doing WC_BSWAP twice on the original buffer won't help, as it
1543         //  could be in read-only memory, or be accessed in some other thread)
1544         tmpbuf = (wchar_t *)malloc(inbuf + SIZEOF_WCHAR_T);
1545         for ( size_t n = 0; n < inbuf; n++ )
1546             tmpbuf[n] = WC_BSWAP(psz[n]);
1547         tmpbuf[inbuf] = L'\0';
1548         psz = tmpbuf;
1549     }
1550
1551     if (buf)
1552     {
1553         // have destination buffer, convert there
1554         cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1555
1556         res = n-outbuf;
1557
1558         // NB: iconv was given only wcslen(psz) characters on input, and so
1559         //     it couldn't convert the trailing zero. Let's do it ourselves
1560         //     if there's some room left for it in the output buffer.
1561         if (res < n)
1562             buf[0] = 0;
1563     }
1564     else
1565     {
1566         // no destination buffer... convert using temp buffer
1567         // to calculate destination buffer requirement
1568         char tbuf[16];
1569         res = 0;
1570         do {
1571             buf = tbuf; outbuf = 16;
1572
1573             cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1574
1575             res += 16 - outbuf;
1576         } while ((cres==(size_t)-1) && (errno==E2BIG));
1577     }
1578
1579     if (ms_wcNeedsSwap)
1580     {
1581         free(tmpbuf);
1582     }
1583
1584     if (ICONV_FAILED(cres, inbuf))
1585     {
1586         //VS: it is ok if iconv fails, hence trace only
1587         wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1588         return (size_t)-1;
1589     }
1590
1591     return res;
1592 }
1593
1594 #endif // HAVE_ICONV
1595
1596
1597 // ============================================================================
1598 // Win32 conversion classes
1599 // ============================================================================
1600
1601 #ifdef wxHAVE_WIN32_MB2WC
1602
1603 // from utils.cpp
1604 #if wxUSE_FONTMAP
1605 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1606 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1607 #endif
1608
1609 class wxMBConv_win32 : public wxMBConv
1610 {
1611 public:
1612     wxMBConv_win32()
1613     {
1614         m_CodePage = CP_ACP;
1615     }
1616
1617 #if wxUSE_FONTMAP
1618     wxMBConv_win32(const wxChar* name)
1619     {
1620         m_CodePage = wxCharsetToCodepage(name);
1621     }
1622
1623     wxMBConv_win32(wxFontEncoding encoding)
1624     {
1625         m_CodePage = wxEncodingToCodepage(encoding);
1626     }
1627 #endif
1628
1629     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1630     {
1631         // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1632         // the behaviour is not compatible with the Unix version (using iconv)
1633         // and break the library itself, e.g. wxTextInputStream::NextChar()
1634         // wouldn't work if reading an incomplete MB char didn't result in an
1635         // error
1636         //
1637         // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1638         // an error (tested under Windows Server 2003) and apparently it is
1639         // done on purpose, i.e. the function accepts any input in this case
1640         // and although I'd prefer to return error on ill-formed output, our
1641         // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1642         // explicitly ill-formed according to RFC 2152) neither so we don't
1643         // even have any fallback here...
1644         int flags = m_CodePage == CP_UTF7 ? 0 : MB_ERR_INVALID_CHARS;
1645
1646         const size_t len = ::MultiByteToWideChar
1647                              (
1648                                 m_CodePage,     // code page
1649                                 flags,          // flags: fall on error
1650                                 psz,            // input string
1651                                 -1,             // its length (NUL-terminated)
1652                                 buf,            // output string
1653                                 buf ? n : 0     // size of output buffer
1654                              );
1655
1656         // note that it returns count of written chars for buf != NULL and size
1657         // of the needed buffer for buf == NULL so in either case the length of
1658         // the string (which never includes the terminating NUL) is one less
1659         return len ? len - 1 : (size_t)-1;
1660     }
1661
1662     size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
1663     {
1664         /*
1665             we have a problem here: by default, WideCharToMultiByte() may
1666             replace characters unrepresentable in the target code page with bad
1667             quality approximations such as turning "1/2" symbol (U+00BD) into
1668             "1" for the code pages which don't have it and we, obviously, want
1669             to avoid this at any price
1670
1671             the trouble is that this function does it _silently_, i.e. it won't
1672             even tell us whether it did or not... Win98/2000 and higher provide
1673             WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1674             we have to resort to a round trip, i.e. check that converting back
1675             results in the same string -- this is, of course, expensive but
1676             otherwise we simply can't be sure to not garble the data.
1677          */
1678
1679         // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1680         // it doesn't work with CJK encodings (which we test for rather roughly
1681         // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1682         // supporting it
1683         BOOL usedDef wxDUMMY_INITIALIZE(false);
1684         BOOL *pUsedDef;
1685         int flags;
1686         if ( CanUseNoBestFit() && m_CodePage < 50000 )
1687         {
1688             // it's our lucky day
1689             flags = WC_NO_BEST_FIT_CHARS;
1690             pUsedDef = &usedDef;
1691         }
1692         else // old system or unsupported encoding
1693         {
1694             flags = 0;
1695             pUsedDef = NULL;
1696         }
1697
1698         const size_t len = ::WideCharToMultiByte
1699                              (
1700                                 m_CodePage,     // code page
1701                                 flags,          // either none or no best fit
1702                                 pwz,            // input string
1703                                 -1,             // it is (wide) NUL-terminated
1704                                 buf,            // output buffer
1705                                 buf ? n : 0,    // and its size
1706                                 NULL,           // default "replacement" char
1707                                 pUsedDef        // [out] was it used?
1708                              );
1709
1710         if ( !len )
1711         {
1712             // function totally failed
1713             return (size_t)-1;
1714         }
1715
1716         // if we were really converting, check if we succeeded
1717         if ( buf )
1718         {
1719             if ( flags )
1720             {
1721                 // check if the conversion failed, i.e. if any replacements
1722                 // were done
1723                 if ( usedDef )
1724                     return (size_t)-1;
1725             }
1726             else // we must resort to double tripping...
1727             {
1728                 wxWCharBuffer wcBuf(n);
1729                 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1730                         wcscmp(wcBuf, pwz) != 0 )
1731                 {
1732                     // we didn't obtain the same thing we started from, hence
1733                     // the conversion was lossy and we consider that it failed
1734                     return (size_t)-1;
1735                 }
1736             }
1737         }
1738
1739         // see the comment above for the reason of "len - 1"
1740         return len - 1;
1741     }
1742
1743     bool IsOk() const { return m_CodePage != -1; }
1744
1745 private:
1746     static bool CanUseNoBestFit()
1747     {
1748         static int s_isWin98Or2k = -1;
1749
1750         if ( s_isWin98Or2k == -1 )
1751         {
1752             int verMaj, verMin;
1753             switch ( wxGetOsVersion(&verMaj, &verMin) )
1754             {
1755                 case wxWIN95:
1756                     s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1757                     break;
1758
1759                 case wxWINDOWS_NT:
1760                     s_isWin98Or2k = verMaj >= 5;
1761                     break;
1762
1763                 default:
1764                     // unknown, be conseravtive by default
1765                     s_isWin98Or2k = 0;
1766             }
1767
1768             wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1769         }
1770
1771         return s_isWin98Or2k == 1;
1772     }
1773
1774     long m_CodePage;
1775 };
1776
1777 #endif // wxHAVE_WIN32_MB2WC
1778
1779 // ============================================================================
1780 // Cocoa conversion classes
1781 // ============================================================================
1782
1783 #if defined(__WXCOCOA__)
1784
1785 // RN:  There is no UTF-32 support in either Core Foundation or
1786 // Cocoa.  Strangely enough, internally Core Foundation uses
1787 // UTF 32 internally quite a bit - its just not public (yet).
1788
1789 #include <CoreFoundation/CFString.h>
1790 #include <CoreFoundation/CFStringEncodingExt.h>
1791
1792 CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
1793 {
1794     CFStringEncoding enc = kCFStringEncodingInvalidId ;
1795     if ( encoding == wxFONTENCODING_DEFAULT )
1796     {
1797         enc = CFStringGetSystemEncoding();
1798     }
1799     else switch( encoding)
1800     {
1801         case wxFONTENCODING_ISO8859_1 :
1802             enc = kCFStringEncodingISOLatin1 ;
1803             break ;
1804         case wxFONTENCODING_ISO8859_2 :
1805             enc = kCFStringEncodingISOLatin2;
1806             break ;
1807         case wxFONTENCODING_ISO8859_3 :
1808             enc = kCFStringEncodingISOLatin3 ;
1809             break ;
1810         case wxFONTENCODING_ISO8859_4 :
1811             enc = kCFStringEncodingISOLatin4;
1812             break ;
1813         case wxFONTENCODING_ISO8859_5 :
1814             enc = kCFStringEncodingISOLatinCyrillic;
1815             break ;
1816         case wxFONTENCODING_ISO8859_6 :
1817             enc = kCFStringEncodingISOLatinArabic;
1818             break ;
1819         case wxFONTENCODING_ISO8859_7 :
1820             enc = kCFStringEncodingISOLatinGreek;
1821             break ;
1822         case wxFONTENCODING_ISO8859_8 :
1823             enc = kCFStringEncodingISOLatinHebrew;
1824             break ;
1825         case wxFONTENCODING_ISO8859_9 :
1826             enc = kCFStringEncodingISOLatin5;
1827             break ;
1828         case wxFONTENCODING_ISO8859_10 :
1829             enc = kCFStringEncodingISOLatin6;
1830             break ;
1831         case wxFONTENCODING_ISO8859_11 :
1832             enc = kCFStringEncodingISOLatinThai;
1833             break ;
1834         case wxFONTENCODING_ISO8859_13 :
1835             enc = kCFStringEncodingISOLatin7;
1836             break ;
1837         case wxFONTENCODING_ISO8859_14 :
1838             enc = kCFStringEncodingISOLatin8;
1839             break ;
1840         case wxFONTENCODING_ISO8859_15 :
1841             enc = kCFStringEncodingISOLatin9;
1842             break ;
1843
1844         case wxFONTENCODING_KOI8 :
1845             enc = kCFStringEncodingKOI8_R;
1846             break ;
1847         case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
1848             enc = kCFStringEncodingDOSRussian;
1849             break ;
1850
1851 //      case wxFONTENCODING_BULGARIAN :
1852 //          enc = ;
1853 //          break ;
1854
1855         case wxFONTENCODING_CP437 :
1856             enc =kCFStringEncodingDOSLatinUS ;
1857             break ;
1858         case wxFONTENCODING_CP850 :
1859             enc = kCFStringEncodingDOSLatin1;
1860             break ;
1861         case wxFONTENCODING_CP852 :
1862             enc = kCFStringEncodingDOSLatin2;
1863             break ;
1864         case wxFONTENCODING_CP855 :
1865             enc = kCFStringEncodingDOSCyrillic;
1866             break ;
1867         case wxFONTENCODING_CP866 :
1868             enc =kCFStringEncodingDOSRussian ;
1869             break ;
1870         case wxFONTENCODING_CP874 :
1871             enc = kCFStringEncodingDOSThai;
1872             break ;
1873         case wxFONTENCODING_CP932 :
1874             enc = kCFStringEncodingDOSJapanese;
1875             break ;
1876         case wxFONTENCODING_CP936 :
1877             enc =kCFStringEncodingDOSChineseSimplif ;
1878             break ;
1879         case wxFONTENCODING_CP949 :
1880             enc = kCFStringEncodingDOSKorean;
1881             break ;
1882         case wxFONTENCODING_CP950 :
1883             enc = kCFStringEncodingDOSChineseTrad;
1884             break ;
1885         case wxFONTENCODING_CP1250 :
1886             enc = kCFStringEncodingWindowsLatin2;
1887             break ;
1888         case wxFONTENCODING_CP1251 :
1889             enc =kCFStringEncodingWindowsCyrillic ;
1890             break ;
1891         case wxFONTENCODING_CP1252 :
1892             enc =kCFStringEncodingWindowsLatin1 ;
1893             break ;
1894         case wxFONTENCODING_CP1253 :
1895             enc = kCFStringEncodingWindowsGreek;
1896             break ;
1897         case wxFONTENCODING_CP1254 :
1898             enc = kCFStringEncodingWindowsLatin5;
1899             break ;
1900         case wxFONTENCODING_CP1255 :
1901             enc =kCFStringEncodingWindowsHebrew ;
1902             break ;
1903         case wxFONTENCODING_CP1256 :
1904             enc =kCFStringEncodingWindowsArabic ;
1905             break ;
1906         case wxFONTENCODING_CP1257 :
1907             enc = kCFStringEncodingWindowsBalticRim;
1908             break ;
1909 //   This only really encodes to UTF7 (if that) evidently
1910 //        case wxFONTENCODING_UTF7 :
1911 //            enc = kCFStringEncodingNonLossyASCII ;
1912 //            break ;
1913         case wxFONTENCODING_UTF8 :
1914             enc = kCFStringEncodingUTF8 ;
1915             break ;
1916         case wxFONTENCODING_EUC_JP :
1917             enc = kCFStringEncodingEUC_JP;
1918             break ;
1919         case wxFONTENCODING_UTF16 :
1920             enc = kCFStringEncodingUnicode ;
1921             break ;
1922         case wxFONTENCODING_MACROMAN :
1923             enc = kCFStringEncodingMacRoman ;
1924             break ;
1925         case wxFONTENCODING_MACJAPANESE :
1926             enc = kCFStringEncodingMacJapanese ;
1927             break ;
1928         case wxFONTENCODING_MACCHINESETRAD :
1929             enc = kCFStringEncodingMacChineseTrad ;
1930             break ;
1931         case wxFONTENCODING_MACKOREAN :
1932             enc = kCFStringEncodingMacKorean ;
1933             break ;
1934         case wxFONTENCODING_MACARABIC :
1935             enc = kCFStringEncodingMacArabic ;
1936             break ;
1937         case wxFONTENCODING_MACHEBREW :
1938             enc = kCFStringEncodingMacHebrew ;
1939             break ;
1940         case wxFONTENCODING_MACGREEK :
1941             enc = kCFStringEncodingMacGreek ;
1942             break ;
1943         case wxFONTENCODING_MACCYRILLIC :
1944             enc = kCFStringEncodingMacCyrillic ;
1945             break ;
1946         case wxFONTENCODING_MACDEVANAGARI :
1947             enc = kCFStringEncodingMacDevanagari ;
1948             break ;
1949         case wxFONTENCODING_MACGURMUKHI :
1950             enc = kCFStringEncodingMacGurmukhi ;
1951             break ;
1952         case wxFONTENCODING_MACGUJARATI :
1953             enc = kCFStringEncodingMacGujarati ;
1954             break ;
1955         case wxFONTENCODING_MACORIYA :
1956             enc = kCFStringEncodingMacOriya ;
1957             break ;
1958         case wxFONTENCODING_MACBENGALI :
1959             enc = kCFStringEncodingMacBengali ;
1960             break ;
1961         case wxFONTENCODING_MACTAMIL :
1962             enc = kCFStringEncodingMacTamil ;
1963             break ;
1964         case wxFONTENCODING_MACTELUGU :
1965             enc = kCFStringEncodingMacTelugu ;
1966             break ;
1967         case wxFONTENCODING_MACKANNADA :
1968             enc = kCFStringEncodingMacKannada ;
1969             break ;
1970         case wxFONTENCODING_MACMALAJALAM :
1971             enc = kCFStringEncodingMacMalayalam ;
1972             break ;
1973         case wxFONTENCODING_MACSINHALESE :
1974             enc = kCFStringEncodingMacSinhalese ;
1975             break ;
1976         case wxFONTENCODING_MACBURMESE :
1977             enc = kCFStringEncodingMacBurmese ;
1978             break ;
1979         case wxFONTENCODING_MACKHMER :
1980             enc = kCFStringEncodingMacKhmer ;
1981             break ;
1982         case wxFONTENCODING_MACTHAI :
1983             enc = kCFStringEncodingMacThai ;
1984             break ;
1985         case wxFONTENCODING_MACLAOTIAN :
1986             enc = kCFStringEncodingMacLaotian ;
1987             break ;
1988         case wxFONTENCODING_MACGEORGIAN :
1989             enc = kCFStringEncodingMacGeorgian ;
1990             break ;
1991         case wxFONTENCODING_MACARMENIAN :
1992             enc = kCFStringEncodingMacArmenian ;
1993             break ;
1994         case wxFONTENCODING_MACCHINESESIMP :
1995             enc = kCFStringEncodingMacChineseSimp ;
1996             break ;
1997         case wxFONTENCODING_MACTIBETAN :
1998             enc = kCFStringEncodingMacTibetan ;
1999             break ;
2000         case wxFONTENCODING_MACMONGOLIAN :
2001             enc = kCFStringEncodingMacMongolian ;
2002             break ;
2003         case wxFONTENCODING_MACETHIOPIC :
2004             enc = kCFStringEncodingMacEthiopic ;
2005             break ;
2006         case wxFONTENCODING_MACCENTRALEUR :
2007             enc = kCFStringEncodingMacCentralEurRoman ;
2008             break ;
2009         case wxFONTENCODING_MACVIATNAMESE :
2010             enc = kCFStringEncodingMacVietnamese ;
2011             break ;
2012         case wxFONTENCODING_MACARABICEXT :
2013             enc = kCFStringEncodingMacExtArabic ;
2014             break ;
2015         case wxFONTENCODING_MACSYMBOL :
2016             enc = kCFStringEncodingMacSymbol ;
2017             break ;
2018         case wxFONTENCODING_MACDINGBATS :
2019             enc = kCFStringEncodingMacDingbats ;
2020             break ;
2021         case wxFONTENCODING_MACTURKISH :
2022             enc = kCFStringEncodingMacTurkish ;
2023             break ;
2024         case wxFONTENCODING_MACCROATIAN :
2025             enc = kCFStringEncodingMacCroatian ;
2026             break ;
2027         case wxFONTENCODING_MACICELANDIC :
2028             enc = kCFStringEncodingMacIcelandic ;
2029             break ;
2030         case wxFONTENCODING_MACROMANIAN :
2031             enc = kCFStringEncodingMacRomanian ;
2032             break ;
2033         case wxFONTENCODING_MACCELTIC :
2034             enc = kCFStringEncodingMacCeltic ;
2035             break ;
2036         case wxFONTENCODING_MACGAELIC :
2037             enc = kCFStringEncodingMacGaelic ;
2038             break ;
2039 //      case wxFONTENCODING_MACKEYBOARD :
2040 //          enc = kCFStringEncodingMacKeyboardGlyphs ;
2041 //          break ;
2042         default :
2043             // because gcc is picky
2044             break ;
2045     } ;
2046     return enc ;
2047 }
2048
2049 class wxMBConv_cocoa : public wxMBConv
2050 {
2051 public:
2052     wxMBConv_cocoa()
2053     {
2054         Init(CFStringGetSystemEncoding()) ;
2055     }
2056
2057 #if wxUSE_FONTMAP
2058     wxMBConv_cocoa(const wxChar* name)
2059     {
2060         Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2061     }
2062 #endif
2063
2064     wxMBConv_cocoa(wxFontEncoding encoding)
2065     {
2066         Init( wxCFStringEncFromFontEnc(encoding) );
2067     }
2068
2069     ~wxMBConv_cocoa()
2070     {
2071     }
2072
2073     void Init( CFStringEncoding encoding)
2074     {
2075         m_encoding = encoding ;
2076     }
2077
2078     size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2079     {
2080         wxASSERT(szUnConv);
2081
2082         CFStringRef theString = CFStringCreateWithBytes (
2083                                                 NULL, //the allocator
2084                                                 (const UInt8*)szUnConv,
2085                                                 strlen(szUnConv),
2086                                                 m_encoding,
2087                                                 false //no BOM/external representation
2088                                                 );
2089
2090         wxASSERT(theString);
2091
2092         size_t nOutLength = CFStringGetLength(theString);
2093
2094         if (szOut == NULL)
2095         {
2096             CFRelease(theString);
2097             return nOutLength;
2098         }
2099
2100         CFRange theRange = { 0, nOutSize };
2101
2102 #if SIZEOF_WCHAR_T == 4
2103         UniChar* szUniCharBuffer = new UniChar[nOutSize];
2104 #endif
2105
2106         CFStringGetCharacters(theString, theRange, szUniCharBuffer);
2107
2108         CFRelease(theString);
2109
2110         szUniCharBuffer[nOutLength] = '\0' ;
2111
2112 #if SIZEOF_WCHAR_T == 4
2113         wxMBConvUTF16 converter ;
2114         converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
2115         delete[] szUniCharBuffer;
2116 #endif
2117
2118         return nOutLength;
2119     }
2120
2121     size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2122     {
2123         wxASSERT(szUnConv);
2124
2125         size_t nRealOutSize;
2126         size_t nBufSize = wxWcslen(szUnConv);
2127         UniChar* szUniBuffer = (UniChar*) szUnConv;
2128
2129 #if SIZEOF_WCHAR_T == 4
2130         wxMBConvUTF16 converter ;
2131         nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
2132         szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
2133         converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
2134         nBufSize /= sizeof(UniChar);
2135 #endif
2136
2137         CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2138                                 NULL, //allocator
2139                                 szUniBuffer,
2140                                 nBufSize,
2141                                 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
2142                             );
2143
2144         wxASSERT(theString);
2145
2146         //Note that CER puts a BOM when converting to unicode
2147         //so we  check and use getchars instead in that case
2148         if (m_encoding == kCFStringEncodingUnicode)
2149         {
2150             if (szOut != NULL)
2151                 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
2152
2153             nRealOutSize = CFStringGetLength(theString) + 1;
2154         }
2155         else
2156         {
2157             CFStringGetBytes(
2158                 theString,
2159                 CFRangeMake(0, CFStringGetLength(theString)),
2160                 m_encoding,
2161                 0, //what to put in characters that can't be converted -
2162                     //0 tells CFString to return NULL if it meets such a character
2163                 false, //not an external representation
2164                 (UInt8*) szOut,
2165                 nOutSize,
2166                 (CFIndex*) &nRealOutSize
2167                         );
2168         }
2169
2170         CFRelease(theString);
2171
2172 #if SIZEOF_WCHAR_T == 4
2173         delete[] szUniBuffer;
2174 #endif
2175
2176         return  nRealOutSize - 1;
2177     }
2178
2179     bool IsOk() const
2180     {
2181         return m_encoding != kCFStringEncodingInvalidId &&
2182               CFStringIsEncodingAvailable(m_encoding);
2183     }
2184
2185 private:
2186     CFStringEncoding m_encoding ;
2187 };
2188
2189 #endif // defined(__WXCOCOA__)
2190
2191 // ============================================================================
2192 // Mac conversion classes
2193 // ============================================================================
2194
2195 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2196
2197 class wxMBConv_mac : public wxMBConv
2198 {
2199 public:
2200     wxMBConv_mac()
2201     {
2202         Init(CFStringGetSystemEncoding()) ;
2203     }
2204
2205 #if wxUSE_FONTMAP
2206     wxMBConv_mac(const wxChar* name)
2207     {
2208         Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2209     }
2210 #endif
2211
2212     wxMBConv_mac(wxFontEncoding encoding)
2213     {
2214         Init( wxMacGetSystemEncFromFontEnc(encoding) );
2215     }
2216
2217     ~wxMBConv_mac()
2218     {
2219         OSStatus status = noErr ;
2220         status = TECDisposeConverter(m_MB2WC_converter);
2221         status = TECDisposeConverter(m_WC2MB_converter);
2222     }
2223
2224
2225     void Init( TextEncodingBase encoding)
2226     {
2227         OSStatus status = noErr ;
2228         m_char_encoding = encoding ;
2229         m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2230
2231         status = TECCreateConverter(&m_MB2WC_converter,
2232                                     m_char_encoding,
2233                                     m_unicode_encoding);
2234         status = TECCreateConverter(&m_WC2MB_converter,
2235                                     m_unicode_encoding,
2236                                     m_char_encoding);
2237     }
2238
2239     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2240     {
2241         OSStatus status = noErr ;
2242         ByteCount byteOutLen ;
2243         ByteCount byteInLen = strlen(psz) ;
2244         wchar_t *tbuf = NULL ;
2245         UniChar* ubuf = NULL ;
2246         size_t res = 0 ;
2247
2248         if (buf == NULL)
2249         {
2250             //apple specs say at least 32
2251             n = wxMax( 32 , byteInLen ) ;
2252             tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2253         }
2254         ByteCount byteBufferLen = n * sizeof( UniChar ) ;
2255 #if SIZEOF_WCHAR_T == 4
2256         ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
2257 #else
2258         ubuf = (UniChar*) (buf ? buf : tbuf) ;
2259 #endif
2260         status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2261           (TextPtr) ubuf , byteBufferLen, &byteOutLen);
2262 #if SIZEOF_WCHAR_T == 4
2263         // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2264         // is not properly terminated we get random characters at the end
2265         ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
2266         wxMBConvUTF16 converter ;
2267         res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2268         free( ubuf ) ;
2269 #else
2270         res = byteOutLen / sizeof( UniChar ) ;
2271 #endif
2272         if ( buf == NULL )
2273              free(tbuf) ;
2274
2275         if ( buf  && res < n)
2276             buf[res] = 0;
2277
2278         return res ;
2279     }
2280
2281     size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2282     {
2283         OSStatus status = noErr ;
2284         ByteCount byteOutLen ;
2285         ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2286
2287         char *tbuf = NULL ;
2288
2289         if (buf == NULL)
2290         {
2291             //apple specs say at least 32
2292             n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2293             tbuf = (char*) malloc( n ) ;
2294         }
2295
2296         ByteCount byteBufferLen = n ;
2297         UniChar* ubuf = NULL ;
2298 #if SIZEOF_WCHAR_T == 4
2299         wxMBConvUTF16 converter ;
2300         size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2301         byteInLen = unicharlen ;
2302         ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2303         converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
2304 #else
2305         ubuf = (UniChar*) psz ;
2306 #endif
2307         status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2308             (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
2309 #if SIZEOF_WCHAR_T == 4
2310         free( ubuf ) ;
2311 #endif
2312         if ( buf == NULL )
2313             free(tbuf) ;
2314
2315         size_t res = byteOutLen ;
2316         if ( buf  && res < n)
2317         {
2318             buf[res] = 0;
2319
2320             //we need to double-trip to verify it didn't insert any ? in place
2321             //of bogus characters
2322             wxWCharBuffer wcBuf(n);
2323             size_t pszlen = wxWcslen(psz);
2324             if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
2325                         wxWcslen(wcBuf) != pszlen ||
2326                         memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2327             {
2328                 // we didn't obtain the same thing we started from, hence
2329                 // the conversion was lossy and we consider that it failed
2330                 return (size_t)-1;
2331             }
2332         }
2333
2334         return res ;
2335     }
2336
2337     bool IsOk() const
2338         { return m_MB2WC_converter !=  NULL && m_WC2MB_converter != NULL  ; }
2339
2340 private:
2341     TECObjectRef m_MB2WC_converter ;
2342     TECObjectRef m_WC2MB_converter ;
2343
2344     TextEncodingBase m_char_encoding ;
2345     TextEncodingBase m_unicode_encoding ;
2346 };
2347
2348 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2349
2350 // ============================================================================
2351 // wxEncodingConverter based conversion classes
2352 // ============================================================================
2353
2354 #if wxUSE_FONTMAP
2355
2356 class wxMBConv_wxwin : public wxMBConv
2357 {
2358 private:
2359     void Init()
2360     {
2361         m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2362                w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2363     }
2364
2365 public:
2366     // temporarily just use wxEncodingConverter stuff,
2367     // so that it works while a better implementation is built
2368     wxMBConv_wxwin(const wxChar* name)
2369     {
2370         if (name)
2371             m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2372         else
2373             m_enc = wxFONTENCODING_SYSTEM;
2374
2375         Init();
2376     }
2377
2378     wxMBConv_wxwin(wxFontEncoding enc)
2379     {
2380         m_enc = enc;
2381
2382         Init();
2383     }
2384
2385     size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
2386     {
2387         size_t inbuf = strlen(psz);
2388         if (buf)
2389         {
2390             if (!m2w.Convert(psz,buf))
2391                 return (size_t)-1;
2392         }
2393         return inbuf;
2394     }
2395
2396     size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
2397     {
2398         const size_t inbuf = wxWcslen(psz);
2399         if (buf)
2400         {
2401             if (!w2m.Convert(psz,buf))
2402                 return (size_t)-1;
2403         }
2404
2405         return inbuf;
2406     }
2407
2408     bool IsOk() const { return m_ok; }
2409
2410 public:
2411     wxFontEncoding m_enc;
2412     wxEncodingConverter m2w, w2m;
2413
2414     // were we initialized successfully?
2415     bool m_ok;
2416
2417     DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
2418 };
2419
2420 // make the constructors available for unit testing
2421 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_wxwin( const wxChar* name )
2422 {
2423     wxMBConv_wxwin* result = new wxMBConv_wxwin( name );
2424     if ( !result->IsOk() )
2425     {
2426         delete result;
2427         return 0;
2428     }
2429     return result;
2430 }
2431
2432 #endif // wxUSE_FONTMAP
2433
2434 // ============================================================================
2435 // wxCSConv implementation
2436 // ============================================================================
2437
2438 void wxCSConv::Init()
2439 {
2440     m_name = NULL;
2441     m_convReal =  NULL;
2442     m_deferred = true;
2443 }
2444
2445 wxCSConv::wxCSConv(const wxChar *charset)
2446 {
2447     Init();
2448
2449     if ( charset )
2450     {
2451         SetName(charset);
2452     }
2453
2454     m_encoding = wxFONTENCODING_SYSTEM;
2455 }
2456
2457 wxCSConv::wxCSConv(wxFontEncoding encoding)
2458 {
2459     if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
2460     {
2461         wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2462
2463         encoding = wxFONTENCODING_SYSTEM;
2464     }
2465
2466     Init();
2467
2468     m_encoding = encoding;
2469 }
2470
2471 wxCSConv::~wxCSConv()
2472 {
2473     Clear();
2474 }
2475
2476 wxCSConv::wxCSConv(const wxCSConv& conv)
2477         : wxMBConv()
2478 {
2479     Init();
2480
2481     SetName(conv.m_name);
2482     m_encoding = conv.m_encoding;
2483 }
2484
2485 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2486 {
2487     Clear();
2488
2489     SetName(conv.m_name);
2490     m_encoding = conv.m_encoding;
2491
2492     return *this;
2493 }
2494
2495 void wxCSConv::Clear()
2496 {
2497     free(m_name);
2498     delete m_convReal;
2499
2500     m_name = NULL;
2501     m_convReal = NULL;
2502 }
2503
2504 void wxCSConv::SetName(const wxChar *charset)
2505 {
2506     if (charset)
2507     {
2508         m_name = wxStrdup(charset);
2509         m_deferred = true;
2510     }
2511 }
2512
2513 #if wxUSE_FONTMAP
2514 #include "wx/hashmap.h"
2515
2516 WX_DECLARE_HASH_MAP( wxFontEncoding, wxString, wxIntegerHash, wxIntegerEqual,
2517                      wxEncodingNameCache );
2518
2519 static wxEncodingNameCache gs_nameCache;
2520 #endif
2521
2522 wxMBConv *wxCSConv::DoCreate() const
2523 {
2524 #if wxUSE_FONTMAP
2525     wxLogTrace(TRACE_STRCONV,
2526                wxT("creating conversion for %s"),
2527                (m_name ? m_name
2528                        : wxFontMapperBase::GetEncodingName(m_encoding).c_str()));
2529 #endif // wxUSE_FONTMAP
2530
2531     // check for the special case of ASCII or ISO8859-1 charset: as we have
2532     // special knowledge of it anyhow, we don't need to create a special
2533     // conversion object
2534     if ( m_encoding == wxFONTENCODING_ISO8859_1 )
2535     {
2536         // don't convert at all
2537         return NULL;
2538     }
2539
2540     // we trust OS to do conversion better than we can so try external
2541     // conversion methods first
2542     //
2543     // the full order is:
2544     //      1. OS conversion (iconv() under Unix or Win32 API)
2545     //      2. hard coded conversions for UTF
2546     //      3. wxEncodingConverter as fall back
2547
2548     // step (1)
2549 #ifdef HAVE_ICONV
2550 #if !wxUSE_FONTMAP
2551     if ( m_name )
2552 #endif // !wxUSE_FONTMAP
2553     {
2554         wxString name(m_name);
2555         wxFontEncoding encoding(m_encoding);
2556
2557         if ( !name.empty() )
2558         {
2559             wxMBConv_iconv *conv = new wxMBConv_iconv(name);
2560             if ( conv->IsOk() )
2561                 return conv;
2562
2563             delete conv;
2564
2565 #if wxUSE_FONTMAP
2566             encoding =
2567                 wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2568 #endif // wxUSE_FONTMAP
2569         }
2570 #if wxUSE_FONTMAP
2571         {
2572             const wxEncodingNameCache::iterator it = gs_nameCache.find(encoding);
2573             if ( it != gs_nameCache.end() )
2574             {
2575                 if ( it->second.empty() )
2576                     return NULL;
2577
2578                 wxMBConv_iconv *conv = new wxMBConv_iconv(it->second);
2579                 if ( conv->IsOk() )
2580                     return conv;
2581
2582                 delete conv;
2583             }
2584
2585             const wxChar** names = wxFontMapperBase::GetAllEncodingNames(encoding);
2586
2587             for ( ; *names; ++names )
2588             {
2589                 wxMBConv_iconv *conv = new wxMBConv_iconv(*names);
2590                 if ( conv->IsOk() )
2591                 {
2592                     gs_nameCache[encoding] = *names;
2593                     return conv;
2594                 }
2595
2596                 delete conv;
2597             }
2598
2599             gs_nameCache[encoding] = _T(""); // cache the failure
2600         }
2601 #endif // wxUSE_FONTMAP
2602     }
2603 #endif // HAVE_ICONV
2604
2605 #ifdef wxHAVE_WIN32_MB2WC
2606     {
2607 #if wxUSE_FONTMAP
2608         wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2609                                       : new wxMBConv_win32(m_encoding);
2610         if ( conv->IsOk() )
2611             return conv;
2612
2613         delete conv;
2614 #else
2615         return NULL;
2616 #endif
2617     }
2618 #endif // wxHAVE_WIN32_MB2WC
2619 #if defined(__WXMAC__)
2620     {
2621         // leave UTF16 and UTF32 to the built-ins of wx
2622         if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
2623             ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
2624         {
2625
2626 #if wxUSE_FONTMAP
2627             wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2628                                         : new wxMBConv_mac(m_encoding);
2629 #else
2630             wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
2631 #endif
2632             if ( conv->IsOk() )
2633                  return conv;
2634
2635             delete conv;
2636         }
2637     }
2638 #endif
2639 #if defined(__WXCOCOA__)
2640     {
2641         if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2642         {
2643
2644 #if wxUSE_FONTMAP
2645             wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2646                                           : new wxMBConv_cocoa(m_encoding);
2647 #else
2648             wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
2649 #endif
2650             if ( conv->IsOk() )
2651                  return conv;
2652
2653             delete conv;
2654         }
2655     }
2656 #endif
2657     // step (2)
2658     wxFontEncoding enc = m_encoding;
2659 #if wxUSE_FONTMAP
2660     if ( enc == wxFONTENCODING_SYSTEM && m_name )
2661     {
2662         // use "false" to suppress interactive dialogs -- we can be called from
2663         // anywhere and popping up a dialog from here is the last thing we want to
2664         // do
2665         enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
2666     }
2667 #endif // wxUSE_FONTMAP
2668
2669     switch ( enc )
2670     {
2671         case wxFONTENCODING_UTF7:
2672              return new wxMBConvUTF7;
2673
2674         case wxFONTENCODING_UTF8:
2675              return new wxMBConvUTF8;
2676
2677         case wxFONTENCODING_UTF16BE:
2678              return new wxMBConvUTF16BE;
2679
2680         case wxFONTENCODING_UTF16LE:
2681              return new wxMBConvUTF16LE;
2682
2683         case wxFONTENCODING_UTF32BE:
2684              return new wxMBConvUTF32BE;
2685
2686         case wxFONTENCODING_UTF32LE:
2687              return new wxMBConvUTF32LE;
2688
2689         default:
2690              // nothing to do but put here to suppress gcc warnings
2691              ;
2692     }
2693
2694     // step (3)
2695 #if wxUSE_FONTMAP
2696     {
2697         wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2698                                       : new wxMBConv_wxwin(m_encoding);
2699         if ( conv->IsOk() )
2700             return conv;
2701
2702         delete conv;
2703     }
2704 #endif // wxUSE_FONTMAP
2705
2706     // NB: This is a hack to prevent deadlock. What could otherwise happen
2707     //     in Unicode build: wxConvLocal creation ends up being here
2708     //     because of some failure and logs the error. But wxLog will try to
2709     //     attach timestamp, for which it will need wxConvLocal (to convert
2710     //     time to char* and then wchar_t*), but that fails, tries to log
2711     //     error, but wxLog has a (already locked) critical section that
2712     //     guards static buffer.
2713     static bool alreadyLoggingError = false;
2714     if (!alreadyLoggingError)
2715     {
2716         alreadyLoggingError = true;
2717         wxLogError(_("Cannot convert from the charset '%s'!"),
2718                    m_name ? m_name
2719                       :
2720 #if wxUSE_FONTMAP
2721                          wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
2722 #else // !wxUSE_FONTMAP
2723                          wxString::Format(_("encoding %s"), m_encoding).c_str()
2724 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2725               );
2726         alreadyLoggingError = false;
2727     }
2728
2729     return NULL;
2730 }
2731
2732 void wxCSConv::CreateConvIfNeeded() const
2733 {
2734     if ( m_deferred )
2735     {
2736         wxCSConv *self = (wxCSConv *)this; // const_cast
2737
2738 #if wxUSE_INTL
2739         // if we don't have neither the name nor the encoding, use the default
2740         // encoding for this system
2741         if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2742         {
2743             self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
2744         }
2745 #endif // wxUSE_INTL
2746
2747         self->m_convReal = DoCreate();
2748         self->m_deferred = false;
2749     }
2750 }
2751
2752 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2753 {
2754     CreateConvIfNeeded();
2755
2756     if (m_convReal)
2757         return m_convReal->MB2WC(buf, psz, n);
2758
2759     // latin-1 (direct)
2760     size_t len = strlen(psz);
2761
2762     if (buf)
2763     {
2764         for (size_t c = 0; c <= len; c++)
2765             buf[c] = (unsigned char)(psz[c]);
2766     }
2767
2768     return len;
2769 }
2770
2771 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2772 {
2773     CreateConvIfNeeded();
2774
2775     if (m_convReal)
2776         return m_convReal->WC2MB(buf, psz, n);
2777
2778     // latin-1 (direct)
2779     const size_t len = wxWcslen(psz);
2780     if (buf)
2781     {
2782         for (size_t c = 0; c <= len; c++)
2783         {
2784             if (psz[c] > 0xFF)
2785                 return (size_t)-1;
2786             buf[c] = (char)psz[c];
2787         }
2788     }
2789     else
2790     {
2791         for (size_t c = 0; c <= len; c++)
2792         {
2793             if (psz[c] > 0xFF)
2794                 return (size_t)-1;
2795         }
2796     }
2797
2798     return len;
2799 }
2800
2801 // ----------------------------------------------------------------------------
2802 // globals
2803 // ----------------------------------------------------------------------------
2804
2805 #ifdef __WINDOWS__
2806     static wxMBConv_win32 wxConvLibcObj;
2807 #elif defined(__WXMAC__) && !defined(__MACH__)
2808     static wxMBConv_mac wxConvLibcObj ;
2809 #else
2810     static wxMBConvLibc wxConvLibcObj;
2811 #endif
2812
2813 static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
2814 static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
2815 static wxMBConvUTF7 wxConvUTF7Obj;
2816 static wxMBConvUTF8 wxConvUTF8Obj;
2817
2818 WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
2819 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
2820 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
2821 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
2822 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
2823 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
2824 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
2825 #ifdef __WXOSX__
2826                                     wxConvUTF8Obj;
2827 #else
2828                                     wxConvLibcObj;
2829 #endif
2830
2831
2832 #else // !wxUSE_WCHAR_T
2833
2834 // stand-ins in absence of wchar_t
2835 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
2836                                 wxConvISO8859_1,
2837                                 wxConvLocal,
2838                                 wxConvUTF8;
2839
2840 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
2841
2842