src/common/strconv.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        strconv.cpp
   3 // Purpose:     Unicode conversion classes
   4 // Author:      Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
   5 //              Ryan Norton, Fredrik Roubert (UTF7)
   6 // Modified by:
   7 // Created:     29/01/98
   8 // RCS-ID:      $Id$
   9 // Copyright:   (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
  10 //              (c) 2000-2003 Vadim Zeitlin
  11 //              (c) 2004 Ryan Norton, Fredrik Roubert
  12 // Licence:     wxWindows licence
  13 /////////////////////////////////////////////////////////////////////////////
  14
  15 // ============================================================================
  16 // declarations
  17 // ============================================================================
  18
  19 // ----------------------------------------------------------------------------
  20 // headers
  21 // ----------------------------------------------------------------------------
  22
  23 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
  24   #pragma implementation "strconv.h"
  25 #endif
  26
  27 // For compilers that support precompilation, includes "wx.h".
  28 #include "wx/wxprec.h"
  29
  30 #ifdef __BORLANDC__
  31   #pragma hdrstop
  32 #endif
  33
  34 #ifndef WX_PRECOMP
  35     #include "wx/intl.h"
  36     #include "wx/log.h"
  37 #endif // WX_PRECOMP
  38
  39 #include "wx/strconv.h"
  40
  41 #if wxUSE_WCHAR_T
  42
  43 #ifdef __WINDOWS__
  44     #include "wx/msw/private.h"
  45     #include "wx/msw/missing.h"
  46 #endif
  47
  48 #ifndef __WXWINCE__
  49 #include <errno.h>
  50 #endif
  51
  52 #include <ctype.h>
  53 #include <string.h>
  54 #include <stdlib.h>
  55
  56 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
  57     #define wxHAVE_WIN32_MB2WC
  58 #endif // __WIN32__ but !__WXMICROWIN__
  59
  60 #ifdef __SALFORDC__
  61     #include <clib.h>
  62 #endif
  63
  64 #ifdef HAVE_ICONV
  65     #include <iconv.h>
  66     #include "wx/thread.h"
  67 #endif
  68
  69 #include "wx/encconv.h"
  70 #include "wx/fontmap.h"
  71 #include "wx/utils.h"
  72
  73 #ifdef __WXMAC__
  74 #ifndef __DARWIN__
  75 #include <ATSUnicode.h>
  76 #include <TextCommon.h>
  77 #include <TextEncodingConverter.h>
  78 #endif
  79
  80 #include  "wx/mac/private.h"  // includes mac headers
  81 #endif
  82
  83 #define TRACE_STRCONV _T("strconv")
  84
  85 // ============================================================================
  86 // implementation
  87 // ============================================================================
  88
  89 // ----------------------------------------------------------------------------
  90 // UTF-16 en/decoding to/from UCS-4
  91 // ----------------------------------------------------------------------------
  92
  93
  94 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
  95 {
  96     if (input<=0xffff)
  97     {
  98         if (output)
  99             *output = (wxUint16) input;
 100         return 1;
 101     }
 102     else if (input>=0x110000)
 103     {
 104         return (size_t)-1;
 105     }
 106     else
 107     {
 108         if (output)
 109         {
 110             *output++ = (wxUint16) ((input >> 10)+0xd7c0);
 111             *output = (wxUint16) ((input&0x3ff)+0xdc00);
 112         }
 113         return 2;
 114     }
 115 }
 116
 117 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
 118 {
 119     if ((*input<0xd800) || (*input>0xdfff))
 120     {
 121         output = *input;
 122         return 1;
 123     }
 124     else if ((input[1]<0xdc00) || (input[1]>0xdfff))
 125     {
 126         output = *input;
 127         return (size_t)-1;
 128     }
 129     else
 130     {
 131         output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
 132         return 2;
 133     }
 134 }
 135
 136
 137 // ----------------------------------------------------------------------------
 138 // wxMBConv
 139 // ----------------------------------------------------------------------------
 140
 141 wxMBConv::~wxMBConv()
 142 {
 143     // nothing to do here (necessary for Darwin linking probably)
 144 }
 145
 146 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
 147 {
 148     if ( psz )
 149     {
 150         // calculate the length of the buffer needed first
 151         size_t nLen = MB2WC(NULL, psz, 0);
 152         if ( nLen != (size_t)-1 )
 153         {
 154             // now do the actual conversion
 155             wxWCharBuffer buf(nLen);
 156             nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
 157             if ( nLen != (size_t)-1 )
 158             {
 159                 return buf;
 160             }
 161         }
 162     }
 163
 164     wxWCharBuffer buf((wchar_t *)NULL);
 165
 166     return buf;
 167 }
 168
 169 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
 170 {
 171     if ( pwz )
 172     {
 173         size_t nLen = WC2MB(NULL, pwz, 0);
 174         if ( nLen != (size_t)-1 )
 175         {
 176             wxCharBuffer buf(nLen+3);       // space for a wxUint32 trailing zero
 177             nLen = WC2MB(buf.data(), pwz, nLen + 4);
 178             if ( nLen != (size_t)-1 )
 179             {
 180                 return buf;
 181             }
 182         }
 183     }
 184
 185     wxCharBuffer buf((char *)NULL);
 186
 187     return buf;
 188 }
 189
 190 const wxWCharBuffer wxMBConv::cMB2WC(const char *szString, size_t nStringLen, size_t* pOutSize) const
 191 {
 192     wxASSERT(pOutSize != NULL);
 193
 194     const char* szEnd = szString + nStringLen + 1;
 195     const char* szPos = szString;
 196     const char* szStart = szPos;
 197
 198     size_t nActualLength = 0;
 199     size_t nCurrentSize = nStringLen; //try normal size first (should never resize?)
 200
 201     wxWCharBuffer theBuffer(nCurrentSize);
 202
 203     //Convert the string until the length() is reached, continuing the
 204     //loop every time a null character is reached
 205     while(szPos != szEnd)
 206     {
 207         wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
 208
 209         //Get the length of the current (sub)string
 210         size_t nLen = MB2WC(NULL, szPos, 0);
 211
 212         //Invalid conversion?
 213         if( nLen == (size_t)-1 )
 214         {
 215             *pOutSize = 0;
 216             theBuffer.data()[0u] = wxT('\0');
 217             return theBuffer;
 218         }
 219
 220
 221         //Increase the actual length (+1 for current null character)
 222         nActualLength += nLen + 1;
 223
 224         //if buffer too big, realloc the buffer
 225         if (nActualLength > (nCurrentSize+1))
 226         {
 227             wxWCharBuffer theNewBuffer(nCurrentSize << 1);
 228             memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize * sizeof(wchar_t));
 229             theBuffer = theNewBuffer;
 230             nCurrentSize <<= 1;
 231         }
 232
 233         //Convert the current (sub)string
 234         if ( MB2WC(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
 235         {
 236             *pOutSize = 0;
 237             theBuffer.data()[0u] = wxT('\0');
 238             return theBuffer;
 239         }
 240
 241         //Increment to next (sub)string
 242         //Note that we have to use strlen instead of nLen here
 243         //because XX2XX gives us the size of the output buffer,
 244         //which is not necessarily the length of the string
 245         szPos += strlen(szPos) + 1;
 246     }
 247
 248     //success - return actual length and the buffer
 249     *pOutSize = nActualLength;
 250     return theBuffer;
 251 }
 252
 253 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *szString, size_t nStringLen, size_t* pOutSize) const
 254 {
 255     wxASSERT(pOutSize != NULL);
 256
 257     const wchar_t* szEnd = szString + nStringLen + 1;
 258     const wchar_t* szPos = szString;
 259     const wchar_t* szStart = szPos;
 260
 261     size_t nActualLength = 0;
 262     size_t nCurrentSize = nStringLen << 2; //try * 4 first
 263
 264     wxCharBuffer theBuffer(nCurrentSize);
 265
 266     //Convert the string until the length() is reached, continuing the
 267     //loop every time a null character is reached
 268     while(szPos != szEnd)
 269     {
 270         wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
 271
 272         //Get the length of the current (sub)string
 273         size_t nLen = WC2MB(NULL, szPos, 0);
 274
 275         //Invalid conversion?
 276         if( nLen == (size_t)-1 )
 277         {
 278             *pOutSize = 0;
 279             theBuffer.data()[0u] = wxT('\0');
 280             return theBuffer;
 281         }
 282
 283         //Increase the actual length (+1 for current null character)
 284         nActualLength += nLen + 1;
 285
 286         //if buffer too big, realloc the buffer
 287         if (nActualLength > (nCurrentSize+1))
 288         {
 289             wxCharBuffer theNewBuffer(nCurrentSize << 1);
 290             memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize);
 291             theBuffer = theNewBuffer;
 292             nCurrentSize <<= 1;
 293         }
 294
 295         //Convert the current (sub)string
 296         if(WC2MB(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
 297         {
 298             *pOutSize = 0;
 299             theBuffer.data()[0u] = wxT('\0');
 300             return theBuffer;
 301         }
 302
 303         //Increment to next (sub)string
 304         //Note that we have to use wxWcslen instead of nLen here
 305         //because XX2XX gives us the size of the output buffer,
 306         //which is not necessarily the length of the string
 307         szPos += wxWcslen(szPos) + 1;
 308     }
 309
 310     //success - return actual length and the buffer
 311     *pOutSize = nActualLength;
 312     return theBuffer;
 313 }
 314
 315 // ----------------------------------------------------------------------------
 316 // wxMBConvLibc
 317 // ----------------------------------------------------------------------------
 318
 319 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 320 {
 321     return wxMB2WC(buf, psz, n);
 322 }
 323
 324 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 325 {
 326     return wxWC2MB(buf, psz, n);
 327 }
 328
 329 #ifdef __UNIX__
 330
 331 // ----------------------------------------------------------------------------
 332 // wxConvBrokenFileNames
 333 // ----------------------------------------------------------------------------
 334
 335 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar *charset)
 336 {
 337     if ( !charset || wxStricmp(charset, _T("UTF-8")) == 0
 338                   || wxStricmp(charset, _T("UTF8")) == 0  )
 339         m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
 340     else
 341         m_conv = new wxCSConv(charset);
 342 }
 343
 344 size_t
 345 wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf,
 346                              const char *psz,
 347                              size_t outputSize) const
 348 {
 349     return m_conv->MB2WC( outputBuf, psz, outputSize );
 350 }
 351
 352 size_t
 353 wxConvBrokenFileNames::WC2MB(char *outputBuf,
 354                              const wchar_t *psz,
 355                              size_t outputSize) const
 356 {
 357     return m_conv->WC2MB( outputBuf, psz, outputSize );
 358 }
 359
 360 #endif
 361
 362 // ----------------------------------------------------------------------------
 363 // UTF-7
 364 // ----------------------------------------------------------------------------
 365
 366 // Implementation (C) 2004 Fredrik Roubert
 367
 368 //
 369 // BASE64 decoding table
 370 //
 371 static const unsigned char utf7unb64[] =
 372 {
 373     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 374     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 375     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 376     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 377     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 378     0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
 379     0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
 380     0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 381     0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
 382     0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
 383     0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
 384     0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
 385     0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
 386     0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
 387     0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
 388     0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
 389     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 390     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 391     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 392     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 393     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 394     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 395     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 396     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 397     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 398     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 399     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 400     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 401     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 402     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 403     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 404     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 405 };
 406
 407 size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 408 {
 409     size_t len = 0;
 410
 411     while (*psz && ((!buf) || (len < n)))
 412     {
 413         unsigned char cc = *psz++;
 414         if (cc != '+')
 415         {
 416             // plain ASCII char
 417             if (buf)
 418                 *buf++ = cc;
 419             len++;
 420         }
 421         else if (*psz == '-')
 422         {
 423             // encoded plus sign
 424             if (buf)
 425                 *buf++ = cc;
 426             len++;
 427             psz++;
 428         }
 429         else
 430         {
 431             // BASE64 encoded string
 432             bool lsb;
 433             unsigned char c;
 434             unsigned int d, l;
 435             for (lsb = false, d = 0, l = 0;
 436                 (cc = utf7unb64[(unsigned char)*psz]) != 0xff; psz++)
 437             {
 438                 d <<= 6;
 439                 d += cc;
 440                 for (l += 6; l >= 8; lsb = !lsb)
 441                 {
 442                     c = (unsigned char)((d >> (l -= 8)) % 256);
 443                     if (lsb)
 444                     {
 445                         if (buf)
 446                             *buf++ |= c;
 447                         len ++;
 448                     }
 449                     else
 450                         if (buf)
 451                             *buf = (wchar_t)(c << 8);
 452                 }
 453             }
 454             if (*psz == '-')
 455                 psz++;
 456         }
 457     }
 458     if (buf && (len < n))
 459         *buf = 0;
 460     return len;
 461 }
 462
 463 //
 464 // BASE64 encoding table
 465 //
 466 static const unsigned char utf7enb64[] =
 467 {
 468     'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
 469     'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
 470     'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
 471     'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
 472     'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
 473     'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
 474     'w', 'x', 'y', 'z', '0', '1', '2', '3',
 475     '4', '5', '6', '7', '8', '9', '+', '/'
 476 };
 477
 478 //
 479 // UTF-7 encoding table
 480 //
 481 // 0 - Set D (directly encoded characters)
 482 // 1 - Set O (optional direct characters)
 483 // 2 - whitespace characters (optional)
 484 // 3 - special characters
 485 //
 486 static const unsigned char utf7encode[128] =
 487 {
 488     3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
 489     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 490     2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
 491     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
 492     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 493     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
 494     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 495     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
 496 };
 497
 498 size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 499 {
 500
 501
 502     size_t len = 0;
 503
 504     while (*psz && ((!buf) || (len < n)))
 505     {
 506         wchar_t cc = *psz++;
 507         if (cc < 0x80 && utf7encode[cc] < 1)
 508         {
 509             // plain ASCII char
 510             if (buf)
 511                 *buf++ = (char)cc;
 512             len++;
 513         }
 514 #ifndef WC_UTF16
 515         else if (((wxUint32)cc) > 0xffff)
 516         {
 517             // no surrogate pair generation (yet?)
 518             return (size_t)-1;
 519         }
 520 #endif
 521         else
 522         {
 523             if (buf)
 524                 *buf++ = '+';
 525             len++;
 526             if (cc != '+')
 527             {
 528                 // BASE64 encode string
 529                 unsigned int lsb, d, l;
 530                 for (d = 0, l = 0;; psz++)
 531                 {
 532                     for (lsb = 0; lsb < 2; lsb ++)
 533                     {
 534                         d <<= 8;
 535                         d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
 536
 537                         for (l += 8; l >= 6; )
 538                         {
 539                             l -= 6;
 540                             if (buf)
 541                                 *buf++ = utf7enb64[(d >> l) % 64];
 542                             len++;
 543                         }
 544                     }
 545                     cc = *psz;
 546                     if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
 547                         break;
 548                 }
 549                 if (l != 0)
 550                 {
 551                     if (buf)
 552                         *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
 553                     len++;
 554                 }
 555             }
 556             if (buf)
 557                 *buf++ = '-';
 558             len++;
 559         }
 560     }
 561     if (buf && (len < n))
 562         *buf = 0;
 563     return len;
 564 }
 565
 566 // ----------------------------------------------------------------------------
 567 // UTF-8
 568 // ----------------------------------------------------------------------------
 569
 570 static wxUint32 utf8_max[]=
 571     { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
 572
 573 // boundaries of the private use area we use to (temporarily) remap invalid
 574 // characters invalid in a UTF-8 encoded string
 575 const wxUint32 wxUnicodePUA = 0x100000;
 576 const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
 577
 578 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 579 {
 580     size_t len = 0;
 581
 582     while (*psz && ((!buf) || (len < n)))
 583     {
 584         const char *opsz = psz;
 585         bool invalid = false;
 586         unsigned char cc = *psz++, fc = cc;
 587         unsigned cnt;
 588         for (cnt = 0; fc & 0x80; cnt++)
 589             fc <<= 1;
 590         if (!cnt)
 591         {
 592             // plain ASCII char
 593             if (buf)
 594                 *buf++ = cc;
 595             len++;
 596
 597             // escape the escape character for octal escapes
 598             if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
 599                     && cc == '\\' && (!buf || len < n))
 600             {
 601                 if (buf)
 602                     *buf++ = cc;
 603                 len++;
 604             }
 605         }
 606         else
 607         {
 608             cnt--;
 609             if (!cnt)
 610             {
 611                 // invalid UTF-8 sequence
 612                 invalid = true;
 613             }
 614             else
 615             {
 616                 unsigned ocnt = cnt - 1;
 617                 wxUint32 res = cc & (0x3f >> cnt);
 618                 while (cnt--)
 619                 {
 620                     cc = *psz;
 621                     if ((cc & 0xC0) != 0x80)
 622                     {
 623                         // invalid UTF-8 sequence
 624                         invalid = true;
 625                         break;
 626                     }
 627                     psz++;
 628                     res = (res << 6) | (cc & 0x3f);
 629                 }
 630                 if (invalid || res <= utf8_max[ocnt])
 631                 {
 632                     // illegal UTF-8 encoding
 633                     invalid = true;
 634                 }
 635                 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
 636                         res >= wxUnicodePUA && res < wxUnicodePUAEnd)
 637                 {
 638                     // if one of our PUA characters turns up externally
 639                     // it must also be treated as an illegal sequence
 640                     // (a bit like you have to escape an escape character)
 641                     invalid = true;
 642                 }
 643                 else
 644                 {
 645 #ifdef WC_UTF16
 646                     // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 647                     size_t pa = encode_utf16(res, (wxUint16 *)buf);
 648                     if (pa == (size_t)-1)
 649                     {
 650                         invalid = true;
 651                     }
 652                     else
 653                     {
 654                         if (buf)
 655                             buf += pa;
 656                         len += pa;
 657                     }
 658 #else // !WC_UTF16
 659                     if (buf)
 660                         *buf++ = res;
 661                     len++;
 662 #endif // WC_UTF16/!WC_UTF16
 663                 }
 664             }
 665             if (invalid)
 666             {
 667                 if (m_options & MAP_INVALID_UTF8_TO_PUA)
 668                 {
 669                     while (opsz < psz && (!buf || len < n))
 670                     {
 671 #ifdef WC_UTF16
 672                         // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 673                         size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
 674                         wxASSERT(pa != (size_t)-1);
 675                         if (buf)
 676                             buf += pa;
 677                         opsz++;
 678                         len += pa;
 679 #else
 680                         if (buf)
 681                             *buf++ = wxUnicodePUA + (unsigned char)*opsz;
 682                         opsz++;
 683                         len++;
 684 #endif
 685                     }
 686                 }
 687                 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
 688                 {
 689                     while (opsz < psz && (!buf || len < n))
 690                     {
 691                         if ( buf && len + 3 < n )
 692                         {
 693                             unsigned char n = *opsz;
 694                             *buf++ = L'\\';
 695                             *buf++ = (wchar_t)( L'0' + n / 0100 );
 696                             *buf++ = (wchar_t)( L'0' + (n % 0100) / 010 );
 697                             *buf++ = (wchar_t)( L'0' + n % 010 );
 698                         }
 699                         opsz++;
 700                         len += 4;
 701                     }
 702                 }
 703                 else // MAP_INVALID_UTF8_NOT
 704                 {
 705                     return (size_t)-1;
 706                 }
 707             }
 708         }
 709     }
 710     if (buf && (len < n))
 711         *buf = 0;
 712     return len;
 713 }
 714
 715 static inline bool isoctal(wchar_t wch)
 716 {
 717     return L'0' <= wch && wch <= L'7';
 718 }
 719
 720 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 721 {
 722     size_t len = 0;
 723
 724     while (*psz && ((!buf) || (len < n)))
 725     {
 726         wxUint32 cc;
 727 #ifdef WC_UTF16
 728         // cast is ok for WC_UTF16
 729         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
 730         psz += (pa == (size_t)-1) ? 1 : pa;
 731 #else
 732         cc=(*psz++) & 0x7fffffff;
 733 #endif
 734
 735         if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
 736                 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
 737         {
 738             if (buf)
 739                 *buf++ = (char)(cc - wxUnicodePUA);
 740             len++;
 741         }
 742         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
 743                     && cc == L'\\' && psz[0] == L'\\' )
 744         {
 745             if (buf)
 746                 *buf++ = (char)cc;
 747             psz++;
 748             len++;
 749         }
 750         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
 751                     cc == L'\\' &&
 752                         isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
 753         {
 754             if (buf)
 755             {
 756                 *buf++ = (char) ((psz[0] - L'0')*0100 +
 757                                  (psz[1] - L'0')*010 +
 758                                  (psz[2] - L'0'));
 759             }
 760
 761             psz += 3;
 762             len++;
 763         }
 764         else
 765         {
 766             unsigned cnt;
 767             for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
 768             if (!cnt)
 769             {
 770                 // plain ASCII char
 771                 if (buf)
 772                     *buf++ = (char) cc;
 773                 len++;
 774             }
 775
 776             else
 777             {
 778                 len += cnt + 1;
 779                 if (buf)
 780                 {
 781                     *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
 782                     while (cnt--)
 783                         *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
 784                 }
 785             }
 786         }
 787     }
 788
 789     if (buf && (len<n))
 790         *buf = 0;
 791
 792     return len;
 793 }
 794
 795 // ----------------------------------------------------------------------------
 796 // UTF-16
 797 // ----------------------------------------------------------------------------
 798
 799 #ifdef WORDS_BIGENDIAN
 800     #define wxMBConvUTF16straight wxMBConvUTF16BE
 801     #define wxMBConvUTF16swap     wxMBConvUTF16LE
 802 #else
 803     #define wxMBConvUTF16swap     wxMBConvUTF16BE
 804     #define wxMBConvUTF16straight wxMBConvUTF16LE
 805 #endif
 806
 807
 808 #ifdef WC_UTF16
 809
 810 // copy 16bit MB to 16bit String
 811 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 812 {
 813     size_t len=0;
 814
 815     while (*(wxUint16*)psz && (!buf || len < n))
 816     {
 817         if (buf)
 818             *buf++ = *(wxUint16*)psz;
 819         len++;
 820
 821         psz += sizeof(wxUint16);
 822     }
 823     if (buf && len<n)   *buf=0;
 824
 825     return len;
 826 }
 827
 828
 829 // copy 16bit String to 16bit MB
 830 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 831 {
 832     size_t len=0;
 833
 834     while (*psz && (!buf || len < n))
 835     {
 836         if (buf)
 837         {
 838             *(wxUint16*)buf = *psz;
 839             buf += sizeof(wxUint16);
 840         }
 841         len += sizeof(wxUint16);
 842         psz++;
 843     }
 844     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 845
 846     return len;
 847 }
 848
 849
 850 // swap 16bit MB to 16bit String
 851 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 852 {
 853     size_t len=0;
 854
 855     while (*(wxUint16*)psz && (!buf || len < n))
 856     {
 857         if (buf)
 858         {
 859             ((char *)buf)[0] = psz[1];
 860             ((char *)buf)[1] = psz[0];
 861             buf++;
 862         }
 863         len++;
 864         psz += sizeof(wxUint16);
 865     }
 866     if (buf && len<n)   *buf=0;
 867
 868     return len;
 869 }
 870
 871
 872 // swap 16bit MB to 16bit String
 873 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 874 {
 875     size_t len=0;
 876
 877     while (*psz && (!buf || len < n))
 878     {
 879         if (buf)
 880         {
 881             *buf++ = ((char*)psz)[1];
 882             *buf++ = ((char*)psz)[0];
 883         }
 884         len += sizeof(wxUint16);
 885         psz++;
 886     }
 887     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 888
 889     return len;
 890 }
 891
 892
 893 #else // WC_UTF16
 894
 895
 896 // copy 16bit MB to 32bit String
 897 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 898 {
 899     size_t len=0;
 900
 901     while (*(wxUint16*)psz && (!buf || len < n))
 902     {
 903         wxUint32 cc;
 904         size_t pa=decode_utf16((wxUint16*)psz, cc);
 905         if (pa == (size_t)-1)
 906             return pa;
 907
 908         if (buf)
 909             *buf++ = cc;
 910         len++;
 911         psz += pa * sizeof(wxUint16);
 912     }
 913     if (buf && len<n)   *buf=0;
 914
 915     return len;
 916 }
 917
 918
 919 // copy 32bit String to 16bit MB
 920 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 921 {
 922     size_t len=0;
 923
 924     while (*psz && (!buf || len < n))
 925     {
 926         wxUint16 cc[2];
 927         size_t pa=encode_utf16(*psz, cc);
 928
 929         if (pa == (size_t)-1)
 930             return pa;
 931
 932         if (buf)
 933         {
 934             *(wxUint16*)buf = cc[0];
 935             buf += sizeof(wxUint16);
 936             if (pa > 1)
 937             {
 938                 *(wxUint16*)buf = cc[1];
 939                 buf += sizeof(wxUint16);
 940             }
 941         }
 942
 943         len += pa*sizeof(wxUint16);
 944         psz++;
 945     }
 946     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 947
 948     return len;
 949 }
 950
 951
 952 // swap 16bit MB to 32bit String
 953 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 954 {
 955     size_t len=0;
 956
 957     while (*(wxUint16*)psz && (!buf || len < n))
 958     {
 959         wxUint32 cc;
 960         char tmp[4];
 961         tmp[0]=psz[1];  tmp[1]=psz[0];
 962         tmp[2]=psz[3];  tmp[3]=psz[2];
 963
 964         size_t pa=decode_utf16((wxUint16*)tmp, cc);
 965         if (pa == (size_t)-1)
 966             return pa;
 967
 968         if (buf)
 969             *buf++ = cc;
 970
 971         len++;
 972         psz += pa * sizeof(wxUint16);
 973     }
 974     if (buf && len<n)   *buf=0;
 975
 976     return len;
 977 }
 978
 979
 980 // swap 32bit String to 16bit MB
 981 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 982 {
 983     size_t len=0;
 984
 985     while (*psz && (!buf || len < n))
 986     {
 987         wxUint16 cc[2];
 988         size_t pa=encode_utf16(*psz, cc);
 989
 990         if (pa == (size_t)-1)
 991             return pa;
 992
 993         if (buf)
 994         {
 995             *buf++ = ((char*)cc)[1];
 996             *buf++ = ((char*)cc)[0];
 997             if (pa > 1)
 998             {
 999                 *buf++ = ((char*)cc)[3];
1000                 *buf++ = ((char*)cc)[2];
1001             }
1002         }
1003
1004         len += pa*sizeof(wxUint16);
1005         psz++;
1006     }
1007     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
1008
1009     return len;
1010 }
1011
1012 #endif // WC_UTF16
1013
1014
1015 // ----------------------------------------------------------------------------
1016 // UTF-32
1017 // ----------------------------------------------------------------------------
1018
1019 #ifdef WORDS_BIGENDIAN
1020 #define wxMBConvUTF32straight  wxMBConvUTF32BE
1021 #define wxMBConvUTF32swap      wxMBConvUTF32LE
1022 #else
1023 #define wxMBConvUTF32swap      wxMBConvUTF32BE
1024 #define wxMBConvUTF32straight  wxMBConvUTF32LE
1025 #endif
1026
1027
1028 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1029 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1030
1031
1032 #ifdef WC_UTF16
1033
1034 // copy 32bit MB to 16bit String
1035 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1036 {
1037     size_t len=0;
1038
1039     while (*(wxUint32*)psz && (!buf || len < n))
1040     {
1041         wxUint16 cc[2];
1042
1043         size_t pa=encode_utf16(*(wxUint32*)psz, cc);
1044         if (pa == (size_t)-1)
1045             return pa;
1046
1047         if (buf)
1048         {
1049             *buf++ = cc[0];
1050             if (pa > 1)
1051                 *buf++ = cc[1];
1052         }
1053         len += pa;
1054         psz += sizeof(wxUint32);
1055     }
1056     if (buf && len<n)   *buf=0;
1057
1058     return len;
1059 }
1060
1061
1062 // copy 16bit String to 32bit MB
1063 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1064 {
1065     size_t len=0;
1066
1067     while (*psz && (!buf || len < n))
1068     {
1069         wxUint32 cc;
1070
1071         // cast is ok for WC_UTF16
1072         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
1073         if (pa == (size_t)-1)
1074             return pa;
1075
1076         if (buf)
1077         {
1078             *(wxUint32*)buf = cc;
1079             buf += sizeof(wxUint32);
1080         }
1081         len += sizeof(wxUint32);
1082         psz += pa;
1083     }
1084
1085     if (buf && len<=n-sizeof(wxUint32))
1086         *(wxUint32*)buf=0;
1087
1088     return len;
1089 }
1090
1091
1092
1093 // swap 32bit MB to 16bit String
1094 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1095 {
1096     size_t len=0;
1097
1098     while (*(wxUint32*)psz && (!buf || len < n))
1099     {
1100         char tmp[4];
1101         tmp[0] = psz[3];   tmp[1] = psz[2];
1102         tmp[2] = psz[1];   tmp[3] = psz[0];
1103
1104
1105         wxUint16 cc[2];
1106
1107         size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
1108         if (pa == (size_t)-1)
1109             return pa;
1110
1111         if (buf)
1112         {
1113             *buf++ = cc[0];
1114             if (pa > 1)
1115                 *buf++ = cc[1];
1116         }
1117         len += pa;
1118         psz += sizeof(wxUint32);
1119     }
1120
1121     if (buf && len<n)
1122         *buf=0;
1123
1124     return len;
1125 }
1126
1127
1128 // swap 16bit String to 32bit MB
1129 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1130 {
1131     size_t len=0;
1132
1133     while (*psz && (!buf || len < n))
1134     {
1135         char cc[4];
1136
1137         // cast is ok for WC_UTF16
1138         size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
1139         if (pa == (size_t)-1)
1140             return pa;
1141
1142         if (buf)
1143         {
1144             *buf++ = cc[3];
1145             *buf++ = cc[2];
1146             *buf++ = cc[1];
1147             *buf++ = cc[0];
1148         }
1149         len += sizeof(wxUint32);
1150         psz += pa;
1151     }
1152
1153     if (buf && len<=n-sizeof(wxUint32))
1154         *(wxUint32*)buf=0;
1155
1156     return len;
1157 }
1158
1159 #else // WC_UTF16
1160
1161
1162 // copy 32bit MB to 32bit String
1163 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1164 {
1165     size_t len=0;
1166
1167     while (*(wxUint32*)psz && (!buf || len < n))
1168     {
1169         if (buf)
1170             *buf++ = *(wxUint32*)psz;
1171         len++;
1172         psz += sizeof(wxUint32);
1173     }
1174
1175     if (buf && len<n)
1176         *buf=0;
1177
1178     return len;
1179 }
1180
1181
1182 // copy 32bit String to 32bit MB
1183 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1184 {
1185     size_t len=0;
1186
1187     while (*psz && (!buf || len < n))
1188     {
1189         if (buf)
1190         {
1191             *(wxUint32*)buf = *psz;
1192             buf += sizeof(wxUint32);
1193         }
1194
1195         len += sizeof(wxUint32);
1196         psz++;
1197     }
1198
1199     if (buf && len<=n-sizeof(wxUint32))
1200         *(wxUint32*)buf=0;
1201
1202     return len;
1203 }
1204
1205
1206 // swap 32bit MB to 32bit String
1207 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1208 {
1209     size_t len=0;
1210
1211     while (*(wxUint32*)psz && (!buf || len < n))
1212     {
1213         if (buf)
1214         {
1215             ((char *)buf)[0] = psz[3];
1216             ((char *)buf)[1] = psz[2];
1217             ((char *)buf)[2] = psz[1];
1218             ((char *)buf)[3] = psz[0];
1219             buf++;
1220         }
1221         len++;
1222         psz += sizeof(wxUint32);
1223     }
1224
1225     if (buf && len<n)
1226         *buf=0;
1227
1228     return len;
1229 }
1230
1231
1232 // swap 32bit String to 32bit MB
1233 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1234 {
1235     size_t len=0;
1236
1237     while (*psz && (!buf || len < n))
1238     {
1239         if (buf)
1240         {
1241             *buf++ = ((char *)psz)[3];
1242             *buf++ = ((char *)psz)[2];
1243             *buf++ = ((char *)psz)[1];
1244             *buf++ = ((char *)psz)[0];
1245         }
1246         len += sizeof(wxUint32);
1247         psz++;
1248     }
1249
1250     if (buf && len<=n-sizeof(wxUint32))
1251         *(wxUint32*)buf=0;
1252
1253     return len;
1254 }
1255
1256
1257 #endif // WC_UTF16
1258
1259
1260 // ============================================================================
1261 // The classes doing conversion using the iconv_xxx() functions
1262 // ============================================================================
1263
1264 #ifdef HAVE_ICONV
1265
1266 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1267 //     E2BIG if output buffer is _exactly_ as big as needed. Such case is
1268 //     (unless there's yet another bug in glibc) the only case when iconv()
1269 //     returns with (size_t)-1 (which means error) and says there are 0 bytes
1270 //     left in the input buffer -- when _real_ error occurs,
1271 //     bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1272 //     iconv() failure.
1273 //     [This bug does not appear in glibc 2.2.]
1274 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1275 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1276                                      (errno != E2BIG || bufLeft != 0))
1277 #else
1278 #define ICONV_FAILED(cres, bufLeft)  (cres == (size_t)-1)
1279 #endif
1280
1281 #define ICONV_CHAR_CAST(x)  ((ICONV_CONST char **)(x))
1282
1283 #define ICONV_T_INVALID ((iconv_t)-1)
1284
1285 #if SIZEOF_WCHAR_T == 4
1286     #define WC_BSWAP    wxUINT32_SWAP_ALWAYS
1287     #define WC_ENC      wxFONTENCODING_UTF32
1288 #elif SIZEOF_WCHAR_T == 2
1289     #define WC_BSWAP    wxUINT16_SWAP_ALWAYS
1290     #define WC_ENC      wxFONTENCODING_UTF16
1291 #else // sizeof(wchar_t) != 2 nor 4
1292     // does this ever happen?
1293     #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1294 #endif
1295
1296 // ----------------------------------------------------------------------------
1297 // wxMBConv_iconv: encapsulates an iconv character set
1298 // ----------------------------------------------------------------------------
1299
1300 class wxMBConv_iconv : public wxMBConv
1301 {
1302 public:
1303     wxMBConv_iconv(const wxChar *name);
1304     virtual ~wxMBConv_iconv();
1305
1306     virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1307     virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
1308
1309     bool IsOk() const
1310         { return (m2w != ICONV_T_INVALID) && (w2m != ICONV_T_INVALID); }
1311
1312 protected:
1313     // the iconv handlers used to translate from multibyte to wide char and in
1314     // the other direction
1315     iconv_t m2w,
1316             w2m;
1317 #if wxUSE_THREADS
1318     // guards access to m2w and w2m objects
1319     wxMutex m_iconvMutex;
1320 #endif
1321
1322 private:
1323     // the name (for iconv_open()) of a wide char charset -- if none is
1324     // available on this machine, it will remain NULL
1325     static wxString ms_wcCharsetName;
1326
1327     // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1328     // different endian-ness than the native one
1329     static bool ms_wcNeedsSwap;
1330 };
1331
1332 // make the constructor available for unit testing
1333 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const wxChar* name )
1334 {
1335     wxMBConv_iconv* result = new wxMBConv_iconv( name );
1336     if ( !result->IsOk() )
1337     {
1338         delete result;
1339         return 0;
1340     }
1341     return result;
1342 }
1343
1344 wxString wxMBConv_iconv::ms_wcCharsetName;
1345 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
1346
1347 wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
1348 {
1349     // iconv operates with chars, not wxChars, but luckily it uses only ASCII
1350     // names for the charsets
1351     const wxCharBuffer cname(wxString(name).ToAscii());
1352
1353     // check for charset that represents wchar_t:
1354     if ( ms_wcCharsetName.empty() )
1355     {
1356 #if wxUSE_FONTMAP
1357         const wxChar **names = wxFontMapperBase::GetAllEncodingNames(WC_ENC);
1358 #else // !wxUSE_FONTMAP
1359         static const wxChar *names[] =
1360         {
1361 #if SIZEOF_WCHAR_T == 4
1362             _T("UCS-4"),
1363 #elif SIZEOF_WCHAR_T = 2
1364             _T("UCS-2"),
1365 #endif
1366             NULL
1367         };
1368 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1369
1370         for ( ; *names; ++names )
1371         {
1372             const wxString name(*names);
1373
1374             // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
1375             wxString nameXE(name);
1376             #ifdef WORDS_BIGENDIAN
1377                 nameXE += _T("BE");
1378             #else // little endian
1379                 nameXE += _T("LE");
1380             #endif
1381
1382             m2w = iconv_open(nameXE.ToAscii(), cname);
1383             if ( m2w == ICONV_T_INVALID )
1384             {
1385                 // try charset w/o bytesex info (e.g. "UCS4")
1386                 m2w = iconv_open(name.ToAscii(), cname);
1387
1388                 // and check for bytesex ourselves:
1389                 if ( m2w != ICONV_T_INVALID )
1390                 {
1391                     char    buf[2], *bufPtr;
1392                     wchar_t wbuf[2], *wbufPtr;
1393                     size_t  insz, outsz;
1394                     size_t  res;
1395
1396                     buf[0] = 'A';
1397                     buf[1] = 0;
1398                     wbuf[0] = 0;
1399                     insz = 2;
1400                     outsz = SIZEOF_WCHAR_T * 2;
1401                     wbufPtr = wbuf;
1402                     bufPtr = buf;
1403
1404                     res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1405                                 (char**)&wbufPtr, &outsz);
1406
1407                     if (ICONV_FAILED(res, insz))
1408                     {
1409                         wxLogLastError(wxT("iconv"));
1410                         wxLogError(_("Conversion to charset '%s' doesn't work."),
1411                                    name.c_str());
1412                     }
1413                     else // ok, can convert to this encoding, remember it
1414                     {
1415                         ms_wcCharsetName = name;
1416                         ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1417                     }
1418                 }
1419             }
1420             else // use charset not requiring byte swapping
1421             {
1422                 ms_wcCharsetName = nameXE;
1423             }
1424         }
1425
1426         wxLogTrace(TRACE_STRCONV,
1427                    wxT("iconv wchar_t charset is \"%s\"%s"),
1428                    ms_wcCharsetName.empty() ? _T("<none>")
1429                                             : ms_wcCharsetName.c_str(),
1430                    ms_wcNeedsSwap ? _T(" (needs swap)")
1431                                   : _T(""));
1432     }
1433     else // we already have ms_wcCharsetName
1434     {
1435         m2w = iconv_open(ms_wcCharsetName.ToAscii(), cname);
1436     }
1437
1438     if ( ms_wcCharsetName.empty() )
1439     {
1440         w2m = ICONV_T_INVALID;
1441     }
1442     else
1443     {
1444         w2m = iconv_open(cname, ms_wcCharsetName.ToAscii());
1445         if ( w2m == ICONV_T_INVALID )
1446         {
1447             wxLogTrace(TRACE_STRCONV,
1448                        wxT("\"%s\" -> \"%s\" works but not the converse!?"),
1449                        ms_wcCharsetName.c_str(), cname.data());
1450         }
1451     }
1452 }
1453
1454 wxMBConv_iconv::~wxMBConv_iconv()
1455 {
1456     if ( m2w != ICONV_T_INVALID )
1457         iconv_close(m2w);
1458     if ( w2m != ICONV_T_INVALID )
1459         iconv_close(w2m);
1460 }
1461
1462 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1463 {
1464 #if wxUSE_THREADS
1465     // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1466     //     Unfortunately there is a couple of global wxCSConv objects such as
1467     //     wxConvLocal that are used all over wx code, so we have to make sure
1468     //     the handle is used by at most one thread at the time. Otherwise
1469     //     only a few wx classes would be safe to use from non-main threads
1470     //     as MB<->WC conversion would fail "randomly".
1471     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1472 #endif
1473
1474     size_t inbuf = strlen(psz);
1475     size_t outbuf = n * SIZEOF_WCHAR_T;
1476     size_t res, cres;
1477     // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1478     wchar_t *bufPtr = buf;
1479     const char *pszPtr = psz;
1480
1481     if (buf)
1482     {
1483         // have destination buffer, convert there
1484         cres = iconv(m2w,
1485                      ICONV_CHAR_CAST(&pszPtr), &inbuf,
1486                      (char**)&bufPtr, &outbuf);
1487         res = n - (outbuf / SIZEOF_WCHAR_T);
1488
1489         if (ms_wcNeedsSwap)
1490         {
1491             // convert to native endianness
1492             for ( unsigned n = 0; n < res; n++ )
1493                 buf[n] = WC_BSWAP(buf[n]);
1494         }
1495
1496         // NB: iconv was given only strlen(psz) characters on input, and so
1497         //     it couldn't convert the trailing zero. Let's do it ourselves
1498         //     if there's some room left for it in the output buffer.
1499         if (res < n)
1500             buf[res] = 0;
1501     }
1502     else
1503     {
1504         // no destination buffer... convert using temp buffer
1505         // to calculate destination buffer requirement
1506         wchar_t tbuf[8];
1507         res = 0;
1508         do {
1509             bufPtr = tbuf;
1510             outbuf = 8*SIZEOF_WCHAR_T;
1511
1512             cres = iconv(m2w,
1513                          ICONV_CHAR_CAST(&pszPtr), &inbuf,
1514                          (char**)&bufPtr, &outbuf );
1515
1516             res += 8-(outbuf/SIZEOF_WCHAR_T);
1517         } while ((cres==(size_t)-1) && (errno==E2BIG));
1518     }
1519
1520     if (ICONV_FAILED(cres, inbuf))
1521     {
1522         //VS: it is ok if iconv fails, hence trace only
1523         wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1524         return (size_t)-1;
1525     }
1526
1527     return res;
1528 }
1529
1530 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1531 {
1532 #if wxUSE_THREADS
1533     // NB: explained in MB2WC
1534     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1535 #endif
1536
1537     size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
1538     size_t outbuf = n;
1539     size_t res, cres;
1540
1541     wchar_t *tmpbuf = 0;
1542
1543     if (ms_wcNeedsSwap)
1544     {
1545         // need to copy to temp buffer to switch endianness
1546         // (doing WC_BSWAP twice on the original buffer won't help, as it
1547         //  could be in read-only memory, or be accessed in some other thread)
1548         tmpbuf = (wchar_t *)malloc(inbuf + SIZEOF_WCHAR_T);
1549         for ( size_t n = 0; n < inbuf; n++ )
1550             tmpbuf[n] = WC_BSWAP(psz[n]);
1551         tmpbuf[inbuf] = L'\0';
1552         psz = tmpbuf;
1553     }
1554
1555     if (buf)
1556     {
1557         // have destination buffer, convert there
1558         cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1559
1560         res = n-outbuf;
1561
1562         // NB: iconv was given only wcslen(psz) characters on input, and so
1563         //     it couldn't convert the trailing zero. Let's do it ourselves
1564         //     if there's some room left for it in the output buffer.
1565         if (res < n)
1566             buf[0] = 0;
1567     }
1568     else
1569     {
1570         // no destination buffer... convert using temp buffer
1571         // to calculate destination buffer requirement
1572         char tbuf[16];
1573         res = 0;
1574         do {
1575             buf = tbuf; outbuf = 16;
1576
1577             cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1578
1579             res += 16 - outbuf;
1580         } while ((cres==(size_t)-1) && (errno==E2BIG));
1581     }
1582
1583     if (ms_wcNeedsSwap)
1584     {
1585         free(tmpbuf);
1586     }
1587
1588     if (ICONV_FAILED(cres, inbuf))
1589     {
1590         //VS: it is ok if iconv fails, hence trace only
1591         wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1592         return (size_t)-1;
1593     }
1594
1595     return res;
1596 }
1597
1598 #endif // HAVE_ICONV
1599
1600
1601 // ============================================================================
1602 // Win32 conversion classes
1603 // ============================================================================
1604
1605 #ifdef wxHAVE_WIN32_MB2WC
1606
1607 // from utils.cpp
1608 #if wxUSE_FONTMAP
1609 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1610 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1611 #endif
1612
1613 class wxMBConv_win32 : public wxMBConv
1614 {
1615 public:
1616     wxMBConv_win32()
1617     {
1618         m_CodePage = CP_ACP;
1619     }
1620
1621 #if wxUSE_FONTMAP
1622     wxMBConv_win32(const wxChar* name)
1623     {
1624         m_CodePage = wxCharsetToCodepage(name);
1625     }
1626
1627     wxMBConv_win32(wxFontEncoding encoding)
1628     {
1629         m_CodePage = wxEncodingToCodepage(encoding);
1630     }
1631 #endif
1632
1633     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1634     {
1635         // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1636         // the behaviour is not compatible with the Unix version (using iconv)
1637         // and break the library itself, e.g. wxTextInputStream::NextChar()
1638         // wouldn't work if reading an incomplete MB char didn't result in an
1639         // error
1640         //
1641         // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1642         // an error (tested under Windows Server 2003) and apparently it is
1643         // done on purpose, i.e. the function accepts any input in this case
1644         // and although I'd prefer to return error on ill-formed output, our
1645         // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1646         // explicitly ill-formed according to RFC 2152) neither so we don't
1647         // even have any fallback here...
1648         int flags = m_CodePage == CP_UTF7 ? 0 : MB_ERR_INVALID_CHARS;
1649
1650         const size_t len = ::MultiByteToWideChar
1651                              (
1652                                 m_CodePage,     // code page
1653                                 flags,          // flags: fall on error
1654                                 psz,            // input string
1655                                 -1,             // its length (NUL-terminated)
1656                                 buf,            // output string
1657                                 buf ? n : 0     // size of output buffer
1658                              );
1659
1660         // note that it returns count of written chars for buf != NULL and size
1661         // of the needed buffer for buf == NULL so in either case the length of
1662         // the string (which never includes the terminating NUL) is one less
1663         return len ? len - 1 : (size_t)-1;
1664     }
1665
1666     size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
1667     {
1668         /*
1669             we have a problem here: by default, WideCharToMultiByte() may
1670             replace characters unrepresentable in the target code page with bad
1671             quality approximations such as turning "1/2" symbol (U+00BD) into
1672             "1" for the code pages which don't have it and we, obviously, want
1673             to avoid this at any price
1674
1675             the trouble is that this function does it _silently_, i.e. it won't
1676             even tell us whether it did or not... Win98/2000 and higher provide
1677             WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1678             we have to resort to a round trip, i.e. check that converting back
1679             results in the same string -- this is, of course, expensive but
1680             otherwise we simply can't be sure to not garble the data.
1681          */
1682
1683         // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1684         // it doesn't work with CJK encodings (which we test for rather roughly
1685         // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1686         // supporting it
1687         BOOL usedDef wxDUMMY_INITIALIZE(false);
1688         BOOL *pUsedDef;
1689         int flags;
1690         if ( CanUseNoBestFit() && m_CodePage < 50000 )
1691         {
1692             // it's our lucky day
1693             flags = WC_NO_BEST_FIT_CHARS;
1694             pUsedDef = &usedDef;
1695         }
1696         else // old system or unsupported encoding
1697         {
1698             flags = 0;
1699             pUsedDef = NULL;
1700         }
1701
1702         const size_t len = ::WideCharToMultiByte
1703                              (
1704                                 m_CodePage,     // code page
1705                                 flags,          // either none or no best fit
1706                                 pwz,            // input string
1707                                 -1,             // it is (wide) NUL-terminated
1708                                 buf,            // output buffer
1709                                 buf ? n : 0,    // and its size
1710                                 NULL,           // default "replacement" char
1711                                 pUsedDef        // [out] was it used?
1712                              );
1713
1714         if ( !len )
1715         {
1716             // function totally failed
1717             return (size_t)-1;
1718         }
1719
1720         // if we were really converting, check if we succeeded
1721         if ( buf )
1722         {
1723             if ( flags )
1724             {
1725                 // check if the conversion failed, i.e. if any replacements
1726                 // were done
1727                 if ( usedDef )
1728                     return (size_t)-1;
1729             }
1730             else // we must resort to double tripping...
1731             {
1732                 wxWCharBuffer wcBuf(n);
1733                 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1734                         wcscmp(wcBuf, pwz) != 0 )
1735                 {
1736                     // we didn't obtain the same thing we started from, hence
1737                     // the conversion was lossy and we consider that it failed
1738                     return (size_t)-1;
1739                 }
1740             }
1741         }
1742
1743         // see the comment above for the reason of "len - 1"
1744         return len - 1;
1745     }
1746
1747     bool IsOk() const { return m_CodePage != -1; }
1748
1749 private:
1750     static bool CanUseNoBestFit()
1751     {
1752         static int s_isWin98Or2k = -1;
1753
1754         if ( s_isWin98Or2k == -1 )
1755         {
1756             int verMaj, verMin;
1757             switch ( wxGetOsVersion(&verMaj, &verMin) )
1758             {
1759                 case wxWIN95:
1760                     s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1761                     break;
1762
1763                 case wxWINDOWS_NT:
1764                     s_isWin98Or2k = verMaj >= 5;
1765                     break;
1766
1767                 default:
1768                     // unknown, be conseravtive by default
1769                     s_isWin98Or2k = 0;
1770             }
1771
1772             wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1773         }
1774
1775         return s_isWin98Or2k == 1;
1776     }
1777
1778     long m_CodePage;
1779 };
1780
1781 #endif // wxHAVE_WIN32_MB2WC
1782
1783 // ============================================================================
1784 // Cocoa conversion classes
1785 // ============================================================================
1786
1787 #if defined(__WXCOCOA__)
1788
1789 // RN:  There is no UTF-32 support in either Core Foundation or
1790 // Cocoa.  Strangely enough, internally Core Foundation uses
1791 // UTF 32 internally quite a bit - its just not public (yet).
1792
1793 #include <CoreFoundation/CFString.h>
1794 #include <CoreFoundation/CFStringEncodingExt.h>
1795
1796 CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
1797 {
1798     CFStringEncoding enc = kCFStringEncodingInvalidId ;
1799     if ( encoding == wxFONTENCODING_DEFAULT )
1800     {
1801         enc = CFStringGetSystemEncoding();
1802     }
1803     else switch( encoding)
1804     {
1805         case wxFONTENCODING_ISO8859_1 :
1806             enc = kCFStringEncodingISOLatin1 ;
1807             break ;
1808         case wxFONTENCODING_ISO8859_2 :
1809             enc = kCFStringEncodingISOLatin2;
1810             break ;
1811         case wxFONTENCODING_ISO8859_3 :
1812             enc = kCFStringEncodingISOLatin3 ;
1813             break ;
1814         case wxFONTENCODING_ISO8859_4 :
1815             enc = kCFStringEncodingISOLatin4;
1816             break ;
1817         case wxFONTENCODING_ISO8859_5 :
1818             enc = kCFStringEncodingISOLatinCyrillic;
1819             break ;
1820         case wxFONTENCODING_ISO8859_6 :
1821             enc = kCFStringEncodingISOLatinArabic;
1822             break ;
1823         case wxFONTENCODING_ISO8859_7 :
1824             enc = kCFStringEncodingISOLatinGreek;
1825             break ;
1826         case wxFONTENCODING_ISO8859_8 :
1827             enc = kCFStringEncodingISOLatinHebrew;
1828             break ;
1829         case wxFONTENCODING_ISO8859_9 :
1830             enc = kCFStringEncodingISOLatin5;
1831             break ;
1832         case wxFONTENCODING_ISO8859_10 :
1833             enc = kCFStringEncodingISOLatin6;
1834             break ;
1835         case wxFONTENCODING_ISO8859_11 :
1836             enc = kCFStringEncodingISOLatinThai;
1837             break ;
1838         case wxFONTENCODING_ISO8859_13 :
1839             enc = kCFStringEncodingISOLatin7;
1840             break ;
1841         case wxFONTENCODING_ISO8859_14 :
1842             enc = kCFStringEncodingISOLatin8;
1843             break ;
1844         case wxFONTENCODING_ISO8859_15 :
1845             enc = kCFStringEncodingISOLatin9;
1846             break ;
1847
1848         case wxFONTENCODING_KOI8 :
1849             enc = kCFStringEncodingKOI8_R;
1850             break ;
1851         case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
1852             enc = kCFStringEncodingDOSRussian;
1853             break ;
1854
1855 //      case wxFONTENCODING_BULGARIAN :
1856 //          enc = ;
1857 //          break ;
1858
1859         case wxFONTENCODING_CP437 :
1860             enc =kCFStringEncodingDOSLatinUS ;
1861             break ;
1862         case wxFONTENCODING_CP850 :
1863             enc = kCFStringEncodingDOSLatin1;
1864             break ;
1865         case wxFONTENCODING_CP852 :
1866             enc = kCFStringEncodingDOSLatin2;
1867             break ;
1868         case wxFONTENCODING_CP855 :
1869             enc = kCFStringEncodingDOSCyrillic;
1870             break ;
1871         case wxFONTENCODING_CP866 :
1872             enc =kCFStringEncodingDOSRussian ;
1873             break ;
1874         case wxFONTENCODING_CP874 :
1875             enc = kCFStringEncodingDOSThai;
1876             break ;
1877         case wxFONTENCODING_CP932 :
1878             enc = kCFStringEncodingDOSJapanese;
1879             break ;
1880         case wxFONTENCODING_CP936 :
1881             enc =kCFStringEncodingDOSChineseSimplif ;
1882             break ;
1883         case wxFONTENCODING_CP949 :
1884             enc = kCFStringEncodingDOSKorean;
1885             break ;
1886         case wxFONTENCODING_CP950 :
1887             enc = kCFStringEncodingDOSChineseTrad;
1888             break ;
1889         case wxFONTENCODING_CP1250 :
1890             enc = kCFStringEncodingWindowsLatin2;
1891             break ;
1892         case wxFONTENCODING_CP1251 :
1893             enc =kCFStringEncodingWindowsCyrillic ;
1894             break ;
1895         case wxFONTENCODING_CP1252 :
1896             enc =kCFStringEncodingWindowsLatin1 ;
1897             break ;
1898         case wxFONTENCODING_CP1253 :
1899             enc = kCFStringEncodingWindowsGreek;
1900             break ;
1901         case wxFONTENCODING_CP1254 :
1902             enc = kCFStringEncodingWindowsLatin5;
1903             break ;
1904         case wxFONTENCODING_CP1255 :
1905             enc =kCFStringEncodingWindowsHebrew ;
1906             break ;
1907         case wxFONTENCODING_CP1256 :
1908             enc =kCFStringEncodingWindowsArabic ;
1909             break ;
1910         case wxFONTENCODING_CP1257 :
1911             enc = kCFStringEncodingWindowsBalticRim;
1912             break ;
1913 //   This only really encodes to UTF7 (if that) evidently
1914 //        case wxFONTENCODING_UTF7 :
1915 //            enc = kCFStringEncodingNonLossyASCII ;
1916 //            break ;
1917         case wxFONTENCODING_UTF8 :
1918             enc = kCFStringEncodingUTF8 ;
1919             break ;
1920         case wxFONTENCODING_EUC_JP :
1921             enc = kCFStringEncodingEUC_JP;
1922             break ;
1923         case wxFONTENCODING_UTF16 :
1924             enc = kCFStringEncodingUnicode ;
1925             break ;
1926         case wxFONTENCODING_MACROMAN :
1927             enc = kCFStringEncodingMacRoman ;
1928             break ;
1929         case wxFONTENCODING_MACJAPANESE :
1930             enc = kCFStringEncodingMacJapanese ;
1931             break ;
1932         case wxFONTENCODING_MACCHINESETRAD :
1933             enc = kCFStringEncodingMacChineseTrad ;
1934             break ;
1935         case wxFONTENCODING_MACKOREAN :
1936             enc = kCFStringEncodingMacKorean ;
1937             break ;
1938         case wxFONTENCODING_MACARABIC :
1939             enc = kCFStringEncodingMacArabic ;
1940             break ;
1941         case wxFONTENCODING_MACHEBREW :
1942             enc = kCFStringEncodingMacHebrew ;
1943             break ;
1944         case wxFONTENCODING_MACGREEK :
1945             enc = kCFStringEncodingMacGreek ;
1946             break ;
1947         case wxFONTENCODING_MACCYRILLIC :
1948             enc = kCFStringEncodingMacCyrillic ;
1949             break ;
1950         case wxFONTENCODING_MACDEVANAGARI :
1951             enc = kCFStringEncodingMacDevanagari ;
1952             break ;
1953         case wxFONTENCODING_MACGURMUKHI :
1954             enc = kCFStringEncodingMacGurmukhi ;
1955             break ;
1956         case wxFONTENCODING_MACGUJARATI :
1957             enc = kCFStringEncodingMacGujarati ;
1958             break ;
1959         case wxFONTENCODING_MACORIYA :
1960             enc = kCFStringEncodingMacOriya ;
1961             break ;
1962         case wxFONTENCODING_MACBENGALI :
1963             enc = kCFStringEncodingMacBengali ;
1964             break ;
1965         case wxFONTENCODING_MACTAMIL :
1966             enc = kCFStringEncodingMacTamil ;
1967             break ;
1968         case wxFONTENCODING_MACTELUGU :
1969             enc = kCFStringEncodingMacTelugu ;
1970             break ;
1971         case wxFONTENCODING_MACKANNADA :
1972             enc = kCFStringEncodingMacKannada ;
1973             break ;
1974         case wxFONTENCODING_MACMALAJALAM :
1975             enc = kCFStringEncodingMacMalayalam ;
1976             break ;
1977         case wxFONTENCODING_MACSINHALESE :
1978             enc = kCFStringEncodingMacSinhalese ;
1979             break ;
1980         case wxFONTENCODING_MACBURMESE :
1981             enc = kCFStringEncodingMacBurmese ;
1982             break ;
1983         case wxFONTENCODING_MACKHMER :
1984             enc = kCFStringEncodingMacKhmer ;
1985             break ;
1986         case wxFONTENCODING_MACTHAI :
1987             enc = kCFStringEncodingMacThai ;
1988             break ;
1989         case wxFONTENCODING_MACLAOTIAN :
1990             enc = kCFStringEncodingMacLaotian ;
1991             break ;
1992         case wxFONTENCODING_MACGEORGIAN :
1993             enc = kCFStringEncodingMacGeorgian ;
1994             break ;
1995         case wxFONTENCODING_MACARMENIAN :
1996             enc = kCFStringEncodingMacArmenian ;
1997             break ;
1998         case wxFONTENCODING_MACCHINESESIMP :
1999             enc = kCFStringEncodingMacChineseSimp ;
2000             break ;
2001         case wxFONTENCODING_MACTIBETAN :
2002             enc = kCFStringEncodingMacTibetan ;
2003             break ;
2004         case wxFONTENCODING_MACMONGOLIAN :
2005             enc = kCFStringEncodingMacMongolian ;
2006             break ;
2007         case wxFONTENCODING_MACETHIOPIC :
2008             enc = kCFStringEncodingMacEthiopic ;
2009             break ;
2010         case wxFONTENCODING_MACCENTRALEUR :
2011             enc = kCFStringEncodingMacCentralEurRoman ;
2012             break ;
2013         case wxFONTENCODING_MACVIATNAMESE :
2014             enc = kCFStringEncodingMacVietnamese ;
2015             break ;
2016         case wxFONTENCODING_MACARABICEXT :
2017             enc = kCFStringEncodingMacExtArabic ;
2018             break ;
2019         case wxFONTENCODING_MACSYMBOL :
2020             enc = kCFStringEncodingMacSymbol ;
2021             break ;
2022         case wxFONTENCODING_MACDINGBATS :
2023             enc = kCFStringEncodingMacDingbats ;
2024             break ;
2025         case wxFONTENCODING_MACTURKISH :
2026             enc = kCFStringEncodingMacTurkish ;
2027             break ;
2028         case wxFONTENCODING_MACCROATIAN :
2029             enc = kCFStringEncodingMacCroatian ;
2030             break ;
2031         case wxFONTENCODING_MACICELANDIC :
2032             enc = kCFStringEncodingMacIcelandic ;
2033             break ;
2034         case wxFONTENCODING_MACROMANIAN :
2035             enc = kCFStringEncodingMacRomanian ;
2036             break ;
2037         case wxFONTENCODING_MACCELTIC :
2038             enc = kCFStringEncodingMacCeltic ;
2039             break ;
2040         case wxFONTENCODING_MACGAELIC :
2041             enc = kCFStringEncodingMacGaelic ;
2042             break ;
2043 //      case wxFONTENCODING_MACKEYBOARD :
2044 //          enc = kCFStringEncodingMacKeyboardGlyphs ;
2045 //          break ;
2046         default :
2047             // because gcc is picky
2048             break ;
2049     } ;
2050     return enc ;
2051 }
2052
2053 class wxMBConv_cocoa : public wxMBConv
2054 {
2055 public:
2056     wxMBConv_cocoa()
2057     {
2058         Init(CFStringGetSystemEncoding()) ;
2059     }
2060
2061 #if wxUSE_FONTMAP
2062     wxMBConv_cocoa(const wxChar* name)
2063     {
2064         Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2065     }
2066 #endif
2067
2068     wxMBConv_cocoa(wxFontEncoding encoding)
2069     {
2070         Init( wxCFStringEncFromFontEnc(encoding) );
2071     }
2072
2073     ~wxMBConv_cocoa()
2074     {
2075     }
2076
2077     void Init( CFStringEncoding encoding)
2078     {
2079         m_encoding = encoding ;
2080     }
2081
2082     size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2083     {
2084         wxASSERT(szUnConv);
2085
2086         CFStringRef theString = CFStringCreateWithBytes (
2087                                                 NULL, //the allocator
2088                                                 (const UInt8*)szUnConv,
2089                                                 strlen(szUnConv),
2090                                                 m_encoding,
2091                                                 false //no BOM/external representation
2092                                                 );
2093
2094         wxASSERT(theString);
2095
2096         size_t nOutLength = CFStringGetLength(theString);
2097
2098         if (szOut == NULL)
2099         {
2100             CFRelease(theString);
2101             return nOutLength;
2102         }
2103
2104         CFRange theRange = { 0, nOutSize };
2105
2106 #if SIZEOF_WCHAR_T == 4
2107         UniChar* szUniCharBuffer = new UniChar[nOutSize];
2108 #endif
2109
2110         CFStringGetCharacters(theString, theRange, szUniCharBuffer);
2111
2112         CFRelease(theString);
2113
2114         szUniCharBuffer[nOutLength] = '\0' ;
2115
2116 #if SIZEOF_WCHAR_T == 4
2117         wxMBConvUTF16 converter ;
2118         converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
2119         delete[] szUniCharBuffer;
2120 #endif
2121
2122         return nOutLength;
2123     }
2124
2125     size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2126     {
2127         wxASSERT(szUnConv);
2128
2129         size_t nRealOutSize;
2130         size_t nBufSize = wxWcslen(szUnConv);
2131         UniChar* szUniBuffer = (UniChar*) szUnConv;
2132
2133 #if SIZEOF_WCHAR_T == 4
2134         wxMBConvUTF16 converter ;
2135         nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
2136         szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
2137         converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
2138         nBufSize /= sizeof(UniChar);
2139 #endif
2140
2141         CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2142                                 NULL, //allocator
2143                                 szUniBuffer,
2144                                 nBufSize,
2145                                 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
2146                             );
2147
2148         wxASSERT(theString);
2149
2150         //Note that CER puts a BOM when converting to unicode
2151         //so we  check and use getchars instead in that case
2152         if (m_encoding == kCFStringEncodingUnicode)
2153         {
2154             if (szOut != NULL)
2155                 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
2156
2157             nRealOutSize = CFStringGetLength(theString) + 1;
2158         }
2159         else
2160         {
2161             CFStringGetBytes(
2162                 theString,
2163                 CFRangeMake(0, CFStringGetLength(theString)),
2164                 m_encoding,
2165                 0, //what to put in characters that can't be converted -
2166                     //0 tells CFString to return NULL if it meets such a character
2167                 false, //not an external representation
2168                 (UInt8*) szOut,
2169                 nOutSize,
2170                 (CFIndex*) &nRealOutSize
2171                         );
2172         }
2173
2174         CFRelease(theString);
2175
2176 #if SIZEOF_WCHAR_T == 4
2177         delete[] szUniBuffer;
2178 #endif
2179
2180         return  nRealOutSize - 1;
2181     }
2182
2183     bool IsOk() const
2184     {
2185         return m_encoding != kCFStringEncodingInvalidId &&
2186               CFStringIsEncodingAvailable(m_encoding);
2187     }
2188
2189 private:
2190     CFStringEncoding m_encoding ;
2191 };
2192
2193 #endif // defined(__WXCOCOA__)
2194
2195 // ============================================================================
2196 // Mac conversion classes
2197 // ============================================================================
2198
2199 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2200
2201 class wxMBConv_mac : public wxMBConv
2202 {
2203 public:
2204     wxMBConv_mac()
2205     {
2206         Init(CFStringGetSystemEncoding()) ;
2207     }
2208
2209 #if wxUSE_FONTMAP
2210     wxMBConv_mac(const wxChar* name)
2211     {
2212         Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2213     }
2214 #endif
2215
2216     wxMBConv_mac(wxFontEncoding encoding)
2217     {
2218         Init( wxMacGetSystemEncFromFontEnc(encoding) );
2219     }
2220
2221     ~wxMBConv_mac()
2222     {
2223         OSStatus status = noErr ;
2224         status = TECDisposeConverter(m_MB2WC_converter);
2225         status = TECDisposeConverter(m_WC2MB_converter);
2226     }
2227
2228
2229     void Init( TextEncodingBase encoding)
2230     {
2231         OSStatus status = noErr ;
2232         m_char_encoding = encoding ;
2233         m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2234
2235         status = TECCreateConverter(&m_MB2WC_converter,
2236                                     m_char_encoding,
2237                                     m_unicode_encoding);
2238         status = TECCreateConverter(&m_WC2MB_converter,
2239                                     m_unicode_encoding,
2240                                     m_char_encoding);
2241     }
2242
2243     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2244     {
2245         OSStatus status = noErr ;
2246         ByteCount byteOutLen ;
2247         ByteCount byteInLen = strlen(psz) ;
2248         wchar_t *tbuf = NULL ;
2249         UniChar* ubuf = NULL ;
2250         size_t res = 0 ;
2251
2252         if (buf == NULL)
2253         {
2254             //apple specs say at least 32
2255             n = wxMax( 32 , byteInLen ) ;
2256             tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2257         }
2258         ByteCount byteBufferLen = n * sizeof( UniChar ) ;
2259 #if SIZEOF_WCHAR_T == 4
2260         ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
2261 #else
2262         ubuf = (UniChar*) (buf ? buf : tbuf) ;
2263 #endif
2264         status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2265           (TextPtr) ubuf , byteBufferLen, &byteOutLen);
2266 #if SIZEOF_WCHAR_T == 4
2267         // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2268         // is not properly terminated we get random characters at the end
2269         ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
2270         wxMBConvUTF16 converter ;
2271         res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2272         free( ubuf ) ;
2273 #else
2274         res = byteOutLen / sizeof( UniChar ) ;
2275 #endif
2276         if ( buf == NULL )
2277              free(tbuf) ;
2278
2279         if ( buf  && res < n)
2280             buf[res] = 0;
2281
2282         return res ;
2283     }
2284
2285     size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2286     {
2287         OSStatus status = noErr ;
2288         ByteCount byteOutLen ;
2289         ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2290
2291         char *tbuf = NULL ;
2292
2293         if (buf == NULL)
2294         {
2295             //apple specs say at least 32
2296             n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2297             tbuf = (char*) malloc( n ) ;
2298         }
2299
2300         ByteCount byteBufferLen = n ;
2301         UniChar* ubuf = NULL ;
2302 #if SIZEOF_WCHAR_T == 4
2303         wxMBConvUTF16 converter ;
2304         size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2305         byteInLen = unicharlen ;
2306         ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2307         converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
2308 #else
2309         ubuf = (UniChar*) psz ;
2310 #endif
2311         status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2312             (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
2313 #if SIZEOF_WCHAR_T == 4
2314         free( ubuf ) ;
2315 #endif
2316         if ( buf == NULL )
2317             free(tbuf) ;
2318
2319         size_t res = byteOutLen ;
2320         if ( buf  && res < n)
2321         {
2322             buf[res] = 0;
2323
2324             //we need to double-trip to verify it didn't insert any ? in place
2325             //of bogus characters
2326             wxWCharBuffer wcBuf(n);
2327             size_t pszlen = wxWcslen(psz);
2328             if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
2329                         wxWcslen(wcBuf) != pszlen ||
2330                         memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2331             {
2332                 // we didn't obtain the same thing we started from, hence
2333                 // the conversion was lossy and we consider that it failed
2334                 return (size_t)-1;
2335             }
2336         }
2337
2338         return res ;
2339     }
2340
2341     bool IsOk() const
2342         { return m_MB2WC_converter !=  NULL && m_WC2MB_converter != NULL  ; }
2343
2344 private:
2345     TECObjectRef m_MB2WC_converter ;
2346     TECObjectRef m_WC2MB_converter ;
2347
2348     TextEncodingBase m_char_encoding ;
2349     TextEncodingBase m_unicode_encoding ;
2350 };
2351
2352 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2353
2354 // ============================================================================
2355 // wxEncodingConverter based conversion classes
2356 // ============================================================================
2357
2358 #if wxUSE_FONTMAP
2359
2360 class wxMBConv_wxwin : public wxMBConv
2361 {
2362 private:
2363     void Init()
2364     {
2365         m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2366                w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2367     }
2368
2369 public:
2370     // temporarily just use wxEncodingConverter stuff,
2371     // so that it works while a better implementation is built
2372     wxMBConv_wxwin(const wxChar* name)
2373     {
2374         if (name)
2375             m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2376         else
2377             m_enc = wxFONTENCODING_SYSTEM;
2378
2379         Init();
2380     }
2381
2382     wxMBConv_wxwin(wxFontEncoding enc)
2383     {
2384         m_enc = enc;
2385
2386         Init();
2387     }
2388
2389     size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
2390     {
2391         size_t inbuf = strlen(psz);
2392         if (buf)
2393         {
2394             if (!m2w.Convert(psz,buf))
2395                 return (size_t)-1;
2396         }
2397         return inbuf;
2398     }
2399
2400     size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
2401     {
2402         const size_t inbuf = wxWcslen(psz);
2403         if (buf)
2404         {
2405             if (!w2m.Convert(psz,buf))
2406                 return (size_t)-1;
2407         }
2408
2409         return inbuf;
2410     }
2411
2412     bool IsOk() const { return m_ok; }
2413
2414 public:
2415     wxFontEncoding m_enc;
2416     wxEncodingConverter m2w, w2m;
2417
2418     // were we initialized successfully?
2419     bool m_ok;
2420
2421     DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
2422 };
2423
2424 // make the constructors available for unit testing
2425 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_wxwin( const wxChar* name )
2426 {
2427     wxMBConv_wxwin* result = new wxMBConv_wxwin( name );
2428     if ( !result->IsOk() )
2429     {
2430         delete result;
2431         return 0;
2432     }
2433     return result;
2434 }
2435
2436 #endif // wxUSE_FONTMAP
2437
2438 // ============================================================================
2439 // wxCSConv implementation
2440 // ============================================================================
2441
2442 void wxCSConv::Init()
2443 {
2444     m_name = NULL;
2445     m_convReal =  NULL;
2446     m_deferred = true;
2447 }
2448
2449 wxCSConv::wxCSConv(const wxChar *charset)
2450 {
2451     Init();
2452
2453     if ( charset )
2454     {
2455         SetName(charset);
2456     }
2457
2458     m_encoding = wxFONTENCODING_SYSTEM;
2459 }
2460
2461 wxCSConv::wxCSConv(wxFontEncoding encoding)
2462 {
2463     if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
2464     {
2465         wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2466
2467         encoding = wxFONTENCODING_SYSTEM;
2468     }
2469
2470     Init();
2471
2472     m_encoding = encoding;
2473 }
2474
2475 wxCSConv::~wxCSConv()
2476 {
2477     Clear();
2478 }
2479
2480 wxCSConv::wxCSConv(const wxCSConv& conv)
2481         : wxMBConv()
2482 {
2483     Init();
2484
2485     SetName(conv.m_name);
2486     m_encoding = conv.m_encoding;
2487 }
2488
2489 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2490 {
2491     Clear();
2492
2493     SetName(conv.m_name);
2494     m_encoding = conv.m_encoding;
2495
2496     return *this;
2497 }
2498
2499 void wxCSConv::Clear()
2500 {
2501     free(m_name);
2502     delete m_convReal;
2503
2504     m_name = NULL;
2505     m_convReal = NULL;
2506 }
2507
2508 void wxCSConv::SetName(const wxChar *charset)
2509 {
2510     if (charset)
2511     {
2512         m_name = wxStrdup(charset);
2513         m_deferred = true;
2514     }
2515 }
2516
2517 #if wxUSE_FONTMAP
2518 #include "wx/hashmap.h"
2519
2520 WX_DECLARE_HASH_MAP( wxFontEncoding, wxString, wxIntegerHash, wxIntegerEqual,
2521                      wxEncodingNameCache );
2522
2523 static wxEncodingNameCache gs_nameCache;
2524 #endif
2525
2526 wxMBConv *wxCSConv::DoCreate() const
2527 {
2528 #if wxUSE_FONTMAP
2529     wxLogTrace(TRACE_STRCONV,
2530                wxT("creating conversion for %s"),
2531                (m_name ? m_name
2532                        : wxFontMapperBase::GetEncodingName(m_encoding).c_str()));
2533 #endif // wxUSE_FONTMAP
2534
2535     // check for the special case of ASCII or ISO8859-1 charset: as we have
2536     // special knowledge of it anyhow, we don't need to create a special
2537     // conversion object
2538     if ( m_encoding == wxFONTENCODING_ISO8859_1 )
2539     {
2540         // don't convert at all
2541         return NULL;
2542     }
2543
2544     // we trust OS to do conversion better than we can so try external
2545     // conversion methods first
2546     //
2547     // the full order is:
2548     //      1. OS conversion (iconv() under Unix or Win32 API)
2549     //      2. hard coded conversions for UTF
2550     //      3. wxEncodingConverter as fall back
2551
2552     // step (1)
2553 #ifdef HAVE_ICONV
2554 #if !wxUSE_FONTMAP
2555     if ( m_name )
2556 #endif // !wxUSE_FONTMAP
2557     {
2558         wxString name(m_name);
2559         wxFontEncoding encoding(m_encoding);
2560
2561         if ( !name.empty() )
2562         {
2563             wxMBConv_iconv *conv = new wxMBConv_iconv(name);
2564             if ( conv->IsOk() )
2565                 return conv;
2566
2567             delete conv;
2568
2569 #if wxUSE_FONTMAP
2570             encoding =
2571                 wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2572 #endif // wxUSE_FONTMAP
2573         }
2574 #if wxUSE_FONTMAP
2575         {
2576             const wxEncodingNameCache::iterator it = gs_nameCache.find(encoding);
2577             if ( it != gs_nameCache.end() )
2578             {
2579                 if ( it->second.empty() )
2580                     return NULL;
2581
2582                 wxMBConv_iconv *conv = new wxMBConv_iconv(it->second);
2583                 if ( conv->IsOk() )
2584                     return conv;
2585
2586                 delete conv;
2587             }
2588
2589             const wxChar** names = wxFontMapperBase::GetAllEncodingNames(encoding);
2590
2591             for ( ; *names; ++names )
2592             {
2593                 wxMBConv_iconv *conv = new wxMBConv_iconv(*names);
2594                 if ( conv->IsOk() )
2595                 {
2596                     gs_nameCache[encoding] = *names;
2597                     return conv;
2598                 }
2599
2600                 delete conv;
2601             }
2602
2603             gs_nameCache[encoding] = _T(""); // cache the failure
2604         }
2605 #endif // wxUSE_FONTMAP
2606     }
2607 #endif // HAVE_ICONV
2608
2609 #ifdef wxHAVE_WIN32_MB2WC
2610     {
2611 #if wxUSE_FONTMAP
2612         wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2613                                       : new wxMBConv_win32(m_encoding);
2614         if ( conv->IsOk() )
2615             return conv;
2616
2617         delete conv;
2618 #else
2619         return NULL;
2620 #endif
2621     }
2622 #endif // wxHAVE_WIN32_MB2WC
2623 #if defined(__WXMAC__)
2624     {
2625         // leave UTF16 and UTF32 to the built-ins of wx
2626         if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
2627             ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
2628         {
2629
2630 #if wxUSE_FONTMAP
2631             wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2632                                         : new wxMBConv_mac(m_encoding);
2633 #else
2634             wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
2635 #endif
2636             if ( conv->IsOk() )
2637                  return conv;
2638
2639             delete conv;
2640         }
2641     }
2642 #endif
2643 #if defined(__WXCOCOA__)
2644     {
2645         if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2646         {
2647
2648 #if wxUSE_FONTMAP
2649             wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2650                                           : new wxMBConv_cocoa(m_encoding);
2651 #else
2652             wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
2653 #endif
2654             if ( conv->IsOk() )
2655                  return conv;
2656
2657             delete conv;
2658         }
2659     }
2660 #endif
2661     // step (2)
2662     wxFontEncoding enc = m_encoding;
2663 #if wxUSE_FONTMAP
2664     if ( enc == wxFONTENCODING_SYSTEM && m_name )
2665     {
2666         // use "false" to suppress interactive dialogs -- we can be called from
2667         // anywhere and popping up a dialog from here is the last thing we want to
2668         // do
2669         enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
2670     }
2671 #endif // wxUSE_FONTMAP
2672
2673     switch ( enc )
2674     {
2675         case wxFONTENCODING_UTF7:
2676              return new wxMBConvUTF7;
2677
2678         case wxFONTENCODING_UTF8:
2679              return new wxMBConvUTF8;
2680
2681         case wxFONTENCODING_UTF16BE:
2682              return new wxMBConvUTF16BE;
2683
2684         case wxFONTENCODING_UTF16LE:
2685              return new wxMBConvUTF16LE;
2686
2687         case wxFONTENCODING_UTF32BE:
2688              return new wxMBConvUTF32BE;
2689
2690         case wxFONTENCODING_UTF32LE:
2691              return new wxMBConvUTF32LE;
2692
2693         default:
2694              // nothing to do but put here to suppress gcc warnings
2695              ;
2696     }
2697
2698     // step (3)
2699 #if wxUSE_FONTMAP
2700     {
2701         wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2702                                       : new wxMBConv_wxwin(m_encoding);
2703         if ( conv->IsOk() )
2704             return conv;
2705
2706         delete conv;
2707     }
2708 #endif // wxUSE_FONTMAP
2709
2710     // NB: This is a hack to prevent deadlock. What could otherwise happen
2711     //     in Unicode build: wxConvLocal creation ends up being here
2712     //     because of some failure and logs the error. But wxLog will try to
2713     //     attach timestamp, for which it will need wxConvLocal (to convert
2714     //     time to char* and then wchar_t*), but that fails, tries to log
2715     //     error, but wxLog has a (already locked) critical section that
2716     //     guards static buffer.
2717     static bool alreadyLoggingError = false;
2718     if (!alreadyLoggingError)
2719     {
2720         alreadyLoggingError = true;
2721         wxLogError(_("Cannot convert from the charset '%s'!"),
2722                    m_name ? m_name
2723                       :
2724 #if wxUSE_FONTMAP
2725                          wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
2726 #else // !wxUSE_FONTMAP
2727                          wxString::Format(_("encoding %s"), m_encoding).c_str()
2728 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2729               );
2730         alreadyLoggingError = false;
2731     }
2732
2733     return NULL;
2734 }
2735
2736 void wxCSConv::CreateConvIfNeeded() const
2737 {
2738     if ( m_deferred )
2739     {
2740         wxCSConv *self = (wxCSConv *)this; // const_cast
2741
2742 #if wxUSE_INTL
2743         // if we don't have neither the name nor the encoding, use the default
2744         // encoding for this system
2745         if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2746         {
2747             self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
2748         }
2749 #endif // wxUSE_INTL
2750
2751         self->m_convReal = DoCreate();
2752         self->m_deferred = false;
2753     }
2754 }
2755
2756 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2757 {
2758     CreateConvIfNeeded();
2759
2760     if (m_convReal)
2761         return m_convReal->MB2WC(buf, psz, n);
2762
2763     // latin-1 (direct)
2764     size_t len = strlen(psz);
2765
2766     if (buf)
2767     {
2768         for (size_t c = 0; c <= len; c++)
2769             buf[c] = (unsigned char)(psz[c]);
2770     }
2771
2772     return len;
2773 }
2774
2775 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2776 {
2777     CreateConvIfNeeded();
2778
2779     if (m_convReal)
2780         return m_convReal->WC2MB(buf, psz, n);
2781
2782     // latin-1 (direct)
2783     const size_t len = wxWcslen(psz);
2784     if (buf)
2785     {
2786         for (size_t c = 0; c <= len; c++)
2787         {
2788             if (psz[c] > 0xFF)
2789                 return (size_t)-1;
2790             buf[c] = (char)psz[c];
2791         }
2792     }
2793     else
2794     {
2795         for (size_t c = 0; c <= len; c++)
2796         {
2797             if (psz[c] > 0xFF)
2798                 return (size_t)-1;
2799         }
2800     }
2801
2802     return len;
2803 }
2804
2805 // ----------------------------------------------------------------------------
2806 // globals
2807 // ----------------------------------------------------------------------------
2808
2809 #ifdef __WINDOWS__
2810     static wxMBConv_win32 wxConvLibcObj;
2811 #elif defined(__WXMAC__) && !defined(__MACH__)
2812     static wxMBConv_mac wxConvLibcObj ;
2813 #else
2814     static wxMBConvLibc wxConvLibcObj;
2815 #endif
2816
2817 static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
2818 static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
2819 static wxMBConvUTF7 wxConvUTF7Obj;
2820 static wxMBConvUTF8 wxConvUTF8Obj;
2821
2822 WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
2823 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
2824 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
2825 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
2826 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
2827 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
2828 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
2829 #ifdef __WXOSX__
2830                                     wxConvUTF8Obj;
2831 #else
2832                                     wxConvLibcObj;
2833 #endif
2834
2835
2836 #else // !wxUSE_WCHAR_T
2837
2838 // stand-ins in absence of wchar_t
2839 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
2840                                 wxConvISO8859_1,
2841                                 wxConvLocal,
2842                                 wxConvUTF8;
2843
2844 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
2845
2846