src/common/strconv.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/common/strconv.cpp
   3 // Purpose:     Unicode conversion classes
   4 // Author:      Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
   5 //              Ryan Norton, Fredrik Roubert (UTF7)
   6 // Modified by:
   7 // Created:     29/01/98
   8 // RCS-ID:      $Id$
   9 // Copyright:   (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
  10 //              (c) 2000-2003 Vadim Zeitlin
  11 //              (c) 2004 Ryan Norton, Fredrik Roubert
  12 // Licence:     wxWindows licence
  13 /////////////////////////////////////////////////////////////////////////////
  14
  15 // ============================================================================
  16 // declarations
  17 // ============================================================================
  18
  19 // ----------------------------------------------------------------------------
  20 // headers
  21 // ----------------------------------------------------------------------------
  22
  23 // For compilers that support precompilation, includes "wx.h".
  24 #include "wx/wxprec.h"
  25
  26 #ifdef __BORLANDC__
  27   #pragma hdrstop
  28 #endif
  29
  30 #ifndef WX_PRECOMP
  31     #include "wx/intl.h"
  32     #include "wx/log.h"
  33 #endif // WX_PRECOMP
  34
  35 #include "wx/strconv.h"
  36
  37 #if wxUSE_WCHAR_T
  38
  39 #ifdef __WINDOWS__
  40     #include "wx/msw/private.h"
  41     #include "wx/msw/missing.h"
  42 #endif
  43
  44 #ifndef __WXWINCE__
  45 #include <errno.h>
  46 #endif
  47
  48 #include <ctype.h>
  49 #include <string.h>
  50 #include <stdlib.h>
  51
  52 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
  53     #define wxHAVE_WIN32_MB2WC
  54 #endif // __WIN32__ but !__WXMICROWIN__
  55
  56 #ifdef __SALFORDC__
  57     #include <clib.h>
  58 #endif
  59
  60 #ifdef HAVE_ICONV
  61     #include <iconv.h>
  62     #include "wx/thread.h"
  63 #endif
  64
  65 #include "wx/encconv.h"
  66 #include "wx/fontmap.h"
  67 #include "wx/utils.h"
  68
  69 #ifdef __WXMAC__
  70 #ifndef __DARWIN__
  71 #include <ATSUnicode.h>
  72 #include <TextCommon.h>
  73 #include <TextEncodingConverter.h>
  74 #endif
  75
  76 #include  "wx/mac/private.h"  // includes mac headers
  77 #endif
  78
  79 #define TRACE_STRCONV _T("strconv")
  80
  81 // ============================================================================
  82 // implementation
  83 // ============================================================================
  84
  85 // ----------------------------------------------------------------------------
  86 // UTF-16 en/decoding to/from UCS-4
  87 // ----------------------------------------------------------------------------
  88
  89
  90 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
  91 {
  92     if (input<=0xffff)
  93     {
  94         if (output)
  95             *output = (wxUint16) input;
  96         return 1;
  97     }
  98     else if (input>=0x110000)
  99     {
 100         return (size_t)-1;
 101     }
 102     else
 103     {
 104         if (output)
 105         {
 106             *output++ = (wxUint16) ((input >> 10)+0xd7c0);
 107             *output = (wxUint16) ((input&0x3ff)+0xdc00);
 108         }
 109         return 2;
 110     }
 111 }
 112
 113 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
 114 {
 115     if ((*input<0xd800) || (*input>0xdfff))
 116     {
 117         output = *input;
 118         return 1;
 119     }
 120     else if ((input[1]<0xdc00) || (input[1]>0xdfff))
 121     {
 122         output = *input;
 123         return (size_t)-1;
 124     }
 125     else
 126     {
 127         output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
 128         return 2;
 129     }
 130 }
 131
 132
 133 // ----------------------------------------------------------------------------
 134 // wxMBConv
 135 // ----------------------------------------------------------------------------
 136
 137 wxMBConv::~wxMBConv()
 138 {
 139     // nothing to do here (necessary for Darwin linking probably)
 140 }
 141
 142 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
 143 {
 144     if ( psz )
 145     {
 146         // calculate the length of the buffer needed first
 147         size_t nLen = MB2WC(NULL, psz, 0);
 148         if ( nLen != (size_t)-1 )
 149         {
 150             // now do the actual conversion
 151             wxWCharBuffer buf(nLen);
 152             nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
 153             if ( nLen != (size_t)-1 )
 154             {
 155                 return buf;
 156             }
 157         }
 158     }
 159
 160     wxWCharBuffer buf((wchar_t *)NULL);
 161
 162     return buf;
 163 }
 164
 165 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
 166 {
 167     if ( pwz )
 168     {
 169         size_t nLen = WC2MB(NULL, pwz, 0);
 170         if ( nLen != (size_t)-1 )
 171         {
 172             wxCharBuffer buf(nLen+3);       // space for a wxUint32 trailing zero
 173             nLen = WC2MB(buf.data(), pwz, nLen + 4);
 174             if ( nLen != (size_t)-1 )
 175             {
 176                 return buf;
 177             }
 178         }
 179     }
 180
 181     wxCharBuffer buf((char *)NULL);
 182
 183     return buf;
 184 }
 185
 186 const wxWCharBuffer wxMBConv::cMB2WC(const char *szString, size_t nStringLen, size_t* pOutSize) const
 187 {
 188     wxASSERT(pOutSize != NULL);
 189
 190     const char* szEnd = szString + nStringLen + 1;
 191     const char* szPos = szString;
 192     const char* szStart = szPos;
 193
 194     size_t nActualLength = 0;
 195     size_t nCurrentSize = nStringLen; //try normal size first (should never resize?)
 196
 197     wxWCharBuffer theBuffer(nCurrentSize);
 198
 199     //Convert the string until the length() is reached, continuing the
 200     //loop every time a null character is reached
 201     while(szPos != szEnd)
 202     {
 203         wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
 204
 205         //Get the length of the current (sub)string
 206         size_t nLen = MB2WC(NULL, szPos, 0);
 207
 208         //Invalid conversion?
 209         if( nLen == (size_t)-1 )
 210         {
 211             *pOutSize = 0;
 212             theBuffer.data()[0u] = wxT('\0');
 213             return theBuffer;
 214         }
 215
 216
 217         //Increase the actual length (+1 for current null character)
 218         nActualLength += nLen + 1;
 219
 220         //if buffer too big, realloc the buffer
 221         if (nActualLength > (nCurrentSize+1))
 222         {
 223             wxWCharBuffer theNewBuffer(nCurrentSize << 1);
 224             memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize * sizeof(wchar_t));
 225             theBuffer = theNewBuffer;
 226             nCurrentSize <<= 1;
 227         }
 228
 229         //Convert the current (sub)string
 230         if ( MB2WC(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
 231         {
 232             *pOutSize = 0;
 233             theBuffer.data()[0u] = wxT('\0');
 234             return theBuffer;
 235         }
 236
 237         //Increment to next (sub)string
 238         //Note that we have to use strlen instead of nLen here
 239         //because XX2XX gives us the size of the output buffer,
 240         //which is not necessarily the length of the string
 241         szPos += strlen(szPos) + 1;
 242     }
 243
 244     //success - return actual length and the buffer
 245     *pOutSize = nActualLength;
 246     return theBuffer;
 247 }
 248
 249 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *szString, size_t nStringLen, size_t* pOutSize) const
 250 {
 251     wxASSERT(pOutSize != NULL);
 252
 253     const wchar_t* szEnd = szString + nStringLen + 1;
 254     const wchar_t* szPos = szString;
 255     const wchar_t* szStart = szPos;
 256
 257     size_t nActualLength = 0;
 258     size_t nCurrentSize = nStringLen << 2; //try * 4 first
 259
 260     wxCharBuffer theBuffer(nCurrentSize);
 261
 262     //Convert the string until the length() is reached, continuing the
 263     //loop every time a null character is reached
 264     while(szPos != szEnd)
 265     {
 266         wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
 267
 268         //Get the length of the current (sub)string
 269         size_t nLen = WC2MB(NULL, szPos, 0);
 270
 271         //Invalid conversion?
 272         if( nLen == (size_t)-1 )
 273         {
 274             *pOutSize = 0;
 275             theBuffer.data()[0u] = wxT('\0');
 276             return theBuffer;
 277         }
 278
 279         //Increase the actual length (+1 for current null character)
 280         nActualLength += nLen + 1;
 281
 282         //if buffer too big, realloc the buffer
 283         if (nActualLength > (nCurrentSize+1))
 284         {
 285             wxCharBuffer theNewBuffer(nCurrentSize << 1);
 286             memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize);
 287             theBuffer = theNewBuffer;
 288             nCurrentSize <<= 1;
 289         }
 290
 291         //Convert the current (sub)string
 292         if(WC2MB(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
 293         {
 294             *pOutSize = 0;
 295             theBuffer.data()[0u] = wxT('\0');
 296             return theBuffer;
 297         }
 298
 299         //Increment to next (sub)string
 300         //Note that we have to use wxWcslen instead of nLen here
 301         //because XX2XX gives us the size of the output buffer,
 302         //which is not necessarily the length of the string
 303         szPos += wxWcslen(szPos) + 1;
 304     }
 305
 306     //success - return actual length and the buffer
 307     *pOutSize = nActualLength;
 308     return theBuffer;
 309 }
 310
 311 // ----------------------------------------------------------------------------
 312 // wxMBConvLibc
 313 // ----------------------------------------------------------------------------
 314
 315 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 316 {
 317     return wxMB2WC(buf, psz, n);
 318 }
 319
 320 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 321 {
 322     return wxWC2MB(buf, psz, n);
 323 }
 324
 325 #ifdef __UNIX__
 326
 327 // ----------------------------------------------------------------------------
 328 // wxConvBrokenFileNames
 329 // ----------------------------------------------------------------------------
 330
 331 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar *charset)
 332 {
 333     if ( !charset || wxStricmp(charset, _T("UTF-8")) == 0
 334                   || wxStricmp(charset, _T("UTF8")) == 0  )
 335         m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
 336     else
 337         m_conv = new wxCSConv(charset);
 338 }
 339
 340 size_t
 341 wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf,
 342                              const char *psz,
 343                              size_t outputSize) const
 344 {
 345     return m_conv->MB2WC( outputBuf, psz, outputSize );
 346 }
 347
 348 size_t
 349 wxConvBrokenFileNames::WC2MB(char *outputBuf,
 350                              const wchar_t *psz,
 351                              size_t outputSize) const
 352 {
 353     return m_conv->WC2MB( outputBuf, psz, outputSize );
 354 }
 355
 356 #endif
 357
 358 // ----------------------------------------------------------------------------
 359 // UTF-7
 360 // ----------------------------------------------------------------------------
 361
 362 // Implementation (C) 2004 Fredrik Roubert
 363
 364 //
 365 // BASE64 decoding table
 366 //
 367 static const unsigned char utf7unb64[] =
 368 {
 369     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 370     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 371     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 372     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 373     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 374     0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
 375     0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
 376     0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 377     0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
 378     0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
 379     0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
 380     0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
 381     0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
 382     0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
 383     0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
 384     0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
 385     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 386     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 387     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 388     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 389     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 390     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 391     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 392     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 393     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 394     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 395     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 396     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 397     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 398     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 399     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 400     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 401 };
 402
 403 size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 404 {
 405     size_t len = 0;
 406
 407     while (*psz && ((!buf) || (len < n)))
 408     {
 409         unsigned char cc = *psz++;
 410         if (cc != '+')
 411         {
 412             // plain ASCII char
 413             if (buf)
 414                 *buf++ = cc;
 415             len++;
 416         }
 417         else if (*psz == '-')
 418         {
 419             // encoded plus sign
 420             if (buf)
 421                 *buf++ = cc;
 422             len++;
 423             psz++;
 424         }
 425         else
 426         {
 427             // BASE64 encoded string
 428             bool lsb;
 429             unsigned char c;
 430             unsigned int d, l;
 431             for (lsb = false, d = 0, l = 0;
 432                 (cc = utf7unb64[(unsigned char)*psz]) != 0xff; psz++)
 433             {
 434                 d <<= 6;
 435                 d += cc;
 436                 for (l += 6; l >= 8; lsb = !lsb)
 437                 {
 438                     c = (unsigned char)((d >> (l -= 8)) % 256);
 439                     if (lsb)
 440                     {
 441                         if (buf)
 442                             *buf++ |= c;
 443                         len ++;
 444                     }
 445                     else
 446                         if (buf)
 447                             *buf = (wchar_t)(c << 8);
 448                 }
 449             }
 450             if (*psz == '-')
 451                 psz++;
 452         }
 453     }
 454     if (buf && (len < n))
 455         *buf = 0;
 456     return len;
 457 }
 458
 459 //
 460 // BASE64 encoding table
 461 //
 462 static const unsigned char utf7enb64[] =
 463 {
 464     'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
 465     'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
 466     'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
 467     'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
 468     'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
 469     'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
 470     'w', 'x', 'y', 'z', '0', '1', '2', '3',
 471     '4', '5', '6', '7', '8', '9', '+', '/'
 472 };
 473
 474 //
 475 // UTF-7 encoding table
 476 //
 477 // 0 - Set D (directly encoded characters)
 478 // 1 - Set O (optional direct characters)
 479 // 2 - whitespace characters (optional)
 480 // 3 - special characters
 481 //
 482 static const unsigned char utf7encode[128] =
 483 {
 484     3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
 485     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 486     2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
 487     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
 488     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 489     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
 490     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 491     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
 492 };
 493
 494 size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 495 {
 496
 497
 498     size_t len = 0;
 499
 500     while (*psz && ((!buf) || (len < n)))
 501     {
 502         wchar_t cc = *psz++;
 503         if (cc < 0x80 && utf7encode[cc] < 1)
 504         {
 505             // plain ASCII char
 506             if (buf)
 507                 *buf++ = (char)cc;
 508             len++;
 509         }
 510 #ifndef WC_UTF16
 511         else if (((wxUint32)cc) > 0xffff)
 512         {
 513             // no surrogate pair generation (yet?)
 514             return (size_t)-1;
 515         }
 516 #endif
 517         else
 518         {
 519             if (buf)
 520                 *buf++ = '+';
 521             len++;
 522             if (cc != '+')
 523             {
 524                 // BASE64 encode string
 525                 unsigned int lsb, d, l;
 526                 for (d = 0, l = 0;; psz++)
 527                 {
 528                     for (lsb = 0; lsb < 2; lsb ++)
 529                     {
 530                         d <<= 8;
 531                         d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
 532
 533                         for (l += 8; l >= 6; )
 534                         {
 535                             l -= 6;
 536                             if (buf)
 537                                 *buf++ = utf7enb64[(d >> l) % 64];
 538                             len++;
 539                         }
 540                     }
 541                     cc = *psz;
 542                     if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
 543                         break;
 544                 }
 545                 if (l != 0)
 546                 {
 547                     if (buf)
 548                         *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
 549                     len++;
 550                 }
 551             }
 552             if (buf)
 553                 *buf++ = '-';
 554             len++;
 555         }
 556     }
 557     if (buf && (len < n))
 558         *buf = 0;
 559     return len;
 560 }
 561
 562 // ----------------------------------------------------------------------------
 563 // UTF-8
 564 // ----------------------------------------------------------------------------
 565
 566 static wxUint32 utf8_max[]=
 567     { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
 568
 569 // boundaries of the private use area we use to (temporarily) remap invalid
 570 // characters invalid in a UTF-8 encoded string
 571 const wxUint32 wxUnicodePUA = 0x100000;
 572 const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
 573
 574 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 575 {
 576     size_t len = 0;
 577
 578     while (*psz && ((!buf) || (len < n)))
 579     {
 580         const char *opsz = psz;
 581         bool invalid = false;
 582         unsigned char cc = *psz++, fc = cc;
 583         unsigned cnt;
 584         for (cnt = 0; fc & 0x80; cnt++)
 585             fc <<= 1;
 586         if (!cnt)
 587         {
 588             // plain ASCII char
 589             if (buf)
 590                 *buf++ = cc;
 591             len++;
 592
 593             // escape the escape character for octal escapes
 594             if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
 595                     && cc == '\\' && (!buf || len < n))
 596             {
 597                 if (buf)
 598                     *buf++ = cc;
 599                 len++;
 600             }
 601         }
 602         else
 603         {
 604             cnt--;
 605             if (!cnt)
 606             {
 607                 // invalid UTF-8 sequence
 608                 invalid = true;
 609             }
 610             else
 611             {
 612                 unsigned ocnt = cnt - 1;
 613                 wxUint32 res = cc & (0x3f >> cnt);
 614                 while (cnt--)
 615                 {
 616                     cc = *psz;
 617                     if ((cc & 0xC0) != 0x80)
 618                     {
 619                         // invalid UTF-8 sequence
 620                         invalid = true;
 621                         break;
 622                     }
 623                     psz++;
 624                     res = (res << 6) | (cc & 0x3f);
 625                 }
 626                 if (invalid || res <= utf8_max[ocnt])
 627                 {
 628                     // illegal UTF-8 encoding
 629                     invalid = true;
 630                 }
 631                 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
 632                         res >= wxUnicodePUA && res < wxUnicodePUAEnd)
 633                 {
 634                     // if one of our PUA characters turns up externally
 635                     // it must also be treated as an illegal sequence
 636                     // (a bit like you have to escape an escape character)
 637                     invalid = true;
 638                 }
 639                 else
 640                 {
 641 #ifdef WC_UTF16
 642                     // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 643                     size_t pa = encode_utf16(res, (wxUint16 *)buf);
 644                     if (pa == (size_t)-1)
 645                     {
 646                         invalid = true;
 647                     }
 648                     else
 649                     {
 650                         if (buf)
 651                             buf += pa;
 652                         len += pa;
 653                     }
 654 #else // !WC_UTF16
 655                     if (buf)
 656                         *buf++ = (wchar_t)res;
 657                     len++;
 658 #endif // WC_UTF16/!WC_UTF16
 659                 }
 660             }
 661             if (invalid)
 662             {
 663                 if (m_options & MAP_INVALID_UTF8_TO_PUA)
 664                 {
 665                     while (opsz < psz && (!buf || len < n))
 666                     {
 667 #ifdef WC_UTF16
 668                         // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 669                         size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
 670                         wxASSERT(pa != (size_t)-1);
 671                         if (buf)
 672                             buf += pa;
 673                         opsz++;
 674                         len += pa;
 675 #else
 676                         if (buf)
 677                             *buf++ = (wchar_t)(wxUnicodePUA + (unsigned char)*opsz);
 678                         opsz++;
 679                         len++;
 680 #endif
 681                     }
 682                 }
 683                 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
 684                 {
 685                     while (opsz < psz && (!buf || len < n))
 686                     {
 687                         if ( buf && len + 3 < n )
 688                         {
 689                             unsigned char on = *opsz;
 690                             *buf++ = L'\\';
 691                             *buf++ = (wchar_t)( L'0' + on / 0100 );
 692                             *buf++ = (wchar_t)( L'0' + (on % 0100) / 010 );
 693                             *buf++ = (wchar_t)( L'0' + on % 010 );
 694                         }
 695                         opsz++;
 696                         len += 4;
 697                     }
 698                 }
 699                 else // MAP_INVALID_UTF8_NOT
 700                 {
 701                     return (size_t)-1;
 702                 }
 703             }
 704         }
 705     }
 706     if (buf && (len < n))
 707         *buf = 0;
 708     return len;
 709 }
 710
 711 static inline bool isoctal(wchar_t wch)
 712 {
 713     return L'0' <= wch && wch <= L'7';
 714 }
 715
 716 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 717 {
 718     size_t len = 0;
 719
 720     while (*psz && ((!buf) || (len < n)))
 721     {
 722         wxUint32 cc;
 723 #ifdef WC_UTF16
 724         // cast is ok for WC_UTF16
 725         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
 726         psz += (pa == (size_t)-1) ? 1 : pa;
 727 #else
 728         cc=(*psz++) & 0x7fffffff;
 729 #endif
 730
 731         if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
 732                 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
 733         {
 734             if (buf)
 735                 *buf++ = (char)(cc - wxUnicodePUA);
 736             len++;
 737         }
 738         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
 739                     && cc == L'\\' && psz[0] == L'\\' )
 740         {
 741             if (buf)
 742                 *buf++ = (char)cc;
 743             psz++;
 744             len++;
 745         }
 746         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
 747                     cc == L'\\' &&
 748                         isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
 749         {
 750             if (buf)
 751             {
 752                 *buf++ = (char) ((psz[0] - L'0')*0100 +
 753                                  (psz[1] - L'0')*010 +
 754                                  (psz[2] - L'0'));
 755             }
 756
 757             psz += 3;
 758             len++;
 759         }
 760         else
 761         {
 762             unsigned cnt;
 763             for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
 764             if (!cnt)
 765             {
 766                 // plain ASCII char
 767                 if (buf)
 768                     *buf++ = (char) cc;
 769                 len++;
 770             }
 771
 772             else
 773             {
 774                 len += cnt + 1;
 775                 if (buf)
 776                 {
 777                     *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
 778                     while (cnt--)
 779                         *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
 780                 }
 781             }
 782         }
 783     }
 784
 785     if (buf && (len<n))
 786         *buf = 0;
 787
 788     return len;
 789 }
 790
 791 // ----------------------------------------------------------------------------
 792 // UTF-16
 793 // ----------------------------------------------------------------------------
 794
 795 #ifdef WORDS_BIGENDIAN
 796     #define wxMBConvUTF16straight wxMBConvUTF16BE
 797     #define wxMBConvUTF16swap     wxMBConvUTF16LE
 798 #else
 799     #define wxMBConvUTF16swap     wxMBConvUTF16BE
 800     #define wxMBConvUTF16straight wxMBConvUTF16LE
 801 #endif
 802
 803
 804 #ifdef WC_UTF16
 805
 806 // copy 16bit MB to 16bit String
 807 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 808 {
 809     size_t len=0;
 810
 811     while (*(wxUint16*)psz && (!buf || len < n))
 812     {
 813         if (buf)
 814             *buf++ = *(wxUint16*)psz;
 815         len++;
 816
 817         psz += sizeof(wxUint16);
 818     }
 819     if (buf && len<n)   *buf=0;
 820
 821     return len;
 822 }
 823
 824
 825 // copy 16bit String to 16bit MB
 826 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 827 {
 828     size_t len=0;
 829
 830     while (*psz && (!buf || len < n))
 831     {
 832         if (buf)
 833         {
 834             *(wxUint16*)buf = *psz;
 835             buf += sizeof(wxUint16);
 836         }
 837         len += sizeof(wxUint16);
 838         psz++;
 839     }
 840     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 841
 842     return len;
 843 }
 844
 845
 846 // swap 16bit MB to 16bit String
 847 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 848 {
 849     size_t len=0;
 850
 851     while (*(wxUint16*)psz && (!buf || len < n))
 852     {
 853         if (buf)
 854         {
 855             ((char *)buf)[0] = psz[1];
 856             ((char *)buf)[1] = psz[0];
 857             buf++;
 858         }
 859         len++;
 860         psz += sizeof(wxUint16);
 861     }
 862     if (buf && len<n)   *buf=0;
 863
 864     return len;
 865 }
 866
 867
 868 // swap 16bit MB to 16bit String
 869 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 870 {
 871     size_t len=0;
 872
 873     while (*psz && (!buf || len < n))
 874     {
 875         if (buf)
 876         {
 877             *buf++ = ((char*)psz)[1];
 878             *buf++ = ((char*)psz)[0];
 879         }
 880         len += sizeof(wxUint16);
 881         psz++;
 882     }
 883     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 884
 885     return len;
 886 }
 887
 888
 889 #else // WC_UTF16
 890
 891
 892 // copy 16bit MB to 32bit String
 893 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 894 {
 895     size_t len=0;
 896
 897     while (*(wxUint16*)psz && (!buf || len < n))
 898     {
 899         wxUint32 cc;
 900         size_t pa=decode_utf16((wxUint16*)psz, cc);
 901         if (pa == (size_t)-1)
 902             return pa;
 903
 904         if (buf)
 905             *buf++ = (wchar_t)cc;
 906         len++;
 907         psz += pa * sizeof(wxUint16);
 908     }
 909     if (buf && len<n)   *buf=0;
 910
 911     return len;
 912 }
 913
 914
 915 // copy 32bit String to 16bit MB
 916 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 917 {
 918     size_t len=0;
 919
 920     while (*psz && (!buf || len < n))
 921     {
 922         wxUint16 cc[2];
 923         size_t pa=encode_utf16(*psz, cc);
 924
 925         if (pa == (size_t)-1)
 926             return pa;
 927
 928         if (buf)
 929         {
 930             *(wxUint16*)buf = cc[0];
 931             buf += sizeof(wxUint16);
 932             if (pa > 1)
 933             {
 934                 *(wxUint16*)buf = cc[1];
 935                 buf += sizeof(wxUint16);
 936             }
 937         }
 938
 939         len += pa*sizeof(wxUint16);
 940         psz++;
 941     }
 942     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 943
 944     return len;
 945 }
 946
 947
 948 // swap 16bit MB to 32bit String
 949 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 950 {
 951     size_t len=0;
 952
 953     while (*(wxUint16*)psz && (!buf || len < n))
 954     {
 955         wxUint32 cc;
 956         char tmp[4];
 957         tmp[0]=psz[1];  tmp[1]=psz[0];
 958         tmp[2]=psz[3];  tmp[3]=psz[2];
 959
 960         size_t pa=decode_utf16((wxUint16*)tmp, cc);
 961         if (pa == (size_t)-1)
 962             return pa;
 963
 964         if (buf)
 965             *buf++ = (wchar_t)cc;
 966
 967         len++;
 968         psz += pa * sizeof(wxUint16);
 969     }
 970     if (buf && len<n)   *buf=0;
 971
 972     return len;
 973 }
 974
 975
 976 // swap 32bit String to 16bit MB
 977 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 978 {
 979     size_t len=0;
 980
 981     while (*psz && (!buf || len < n))
 982     {
 983         wxUint16 cc[2];
 984         size_t pa=encode_utf16(*psz, cc);
 985
 986         if (pa == (size_t)-1)
 987             return pa;
 988
 989         if (buf)
 990         {
 991             *buf++ = ((char*)cc)[1];
 992             *buf++ = ((char*)cc)[0];
 993             if (pa > 1)
 994             {
 995                 *buf++ = ((char*)cc)[3];
 996                 *buf++ = ((char*)cc)[2];
 997             }
 998         }
 999
1000         len += pa*sizeof(wxUint16);
1001         psz++;
1002     }
1003     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
1004
1005     return len;
1006 }
1007
1008 #endif // WC_UTF16
1009
1010
1011 // ----------------------------------------------------------------------------
1012 // UTF-32
1013 // ----------------------------------------------------------------------------
1014
1015 #ifdef WORDS_BIGENDIAN
1016 #define wxMBConvUTF32straight  wxMBConvUTF32BE
1017 #define wxMBConvUTF32swap      wxMBConvUTF32LE
1018 #else
1019 #define wxMBConvUTF32swap      wxMBConvUTF32BE
1020 #define wxMBConvUTF32straight  wxMBConvUTF32LE
1021 #endif
1022
1023
1024 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1025 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1026
1027
1028 #ifdef WC_UTF16
1029
1030 // copy 32bit MB to 16bit String
1031 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1032 {
1033     size_t len=0;
1034
1035     while (*(wxUint32*)psz && (!buf || len < n))
1036     {
1037         wxUint16 cc[2];
1038
1039         size_t pa=encode_utf16(*(wxUint32*)psz, cc);
1040         if (pa == (size_t)-1)
1041             return pa;
1042
1043         if (buf)
1044         {
1045             *buf++ = cc[0];
1046             if (pa > 1)
1047                 *buf++ = cc[1];
1048         }
1049         len += pa;
1050         psz += sizeof(wxUint32);
1051     }
1052     if (buf && len<n)   *buf=0;
1053
1054     return len;
1055 }
1056
1057
1058 // copy 16bit String to 32bit MB
1059 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1060 {
1061     size_t len=0;
1062
1063     while (*psz && (!buf || len < n))
1064     {
1065         wxUint32 cc;
1066
1067         // cast is ok for WC_UTF16
1068         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
1069         if (pa == (size_t)-1)
1070             return pa;
1071
1072         if (buf)
1073         {
1074             *(wxUint32*)buf = cc;
1075             buf += sizeof(wxUint32);
1076         }
1077         len += sizeof(wxUint32);
1078         psz += pa;
1079     }
1080
1081     if (buf && len<=n-sizeof(wxUint32))
1082         *(wxUint32*)buf=0;
1083
1084     return len;
1085 }
1086
1087
1088
1089 // swap 32bit MB to 16bit String
1090 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1091 {
1092     size_t len=0;
1093
1094     while (*(wxUint32*)psz && (!buf || len < n))
1095     {
1096         char tmp[4];
1097         tmp[0] = psz[3];   tmp[1] = psz[2];
1098         tmp[2] = psz[1];   tmp[3] = psz[0];
1099
1100
1101         wxUint16 cc[2];
1102
1103         size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
1104         if (pa == (size_t)-1)
1105             return pa;
1106
1107         if (buf)
1108         {
1109             *buf++ = cc[0];
1110             if (pa > 1)
1111                 *buf++ = cc[1];
1112         }
1113         len += pa;
1114         psz += sizeof(wxUint32);
1115     }
1116
1117     if (buf && len<n)
1118         *buf=0;
1119
1120     return len;
1121 }
1122
1123
1124 // swap 16bit String to 32bit MB
1125 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1126 {
1127     size_t len=0;
1128
1129     while (*psz && (!buf || len < n))
1130     {
1131         char cc[4];
1132
1133         // cast is ok for WC_UTF16
1134         size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
1135         if (pa == (size_t)-1)
1136             return pa;
1137
1138         if (buf)
1139         {
1140             *buf++ = cc[3];
1141             *buf++ = cc[2];
1142             *buf++ = cc[1];
1143             *buf++ = cc[0];
1144         }
1145         len += sizeof(wxUint32);
1146         psz += pa;
1147     }
1148
1149     if (buf && len<=n-sizeof(wxUint32))
1150         *(wxUint32*)buf=0;
1151
1152     return len;
1153 }
1154
1155 #else // WC_UTF16
1156
1157
1158 // copy 32bit MB to 32bit String
1159 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1160 {
1161     size_t len=0;
1162
1163     while (*(wxUint32*)psz && (!buf || len < n))
1164     {
1165         if (buf)
1166             *buf++ = (wchar_t)(*(wxUint32*)psz);
1167         len++;
1168         psz += sizeof(wxUint32);
1169     }
1170
1171     if (buf && len<n)
1172         *buf=0;
1173
1174     return len;
1175 }
1176
1177
1178 // copy 32bit String to 32bit MB
1179 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1180 {
1181     size_t len=0;
1182
1183     while (*psz && (!buf || len < n))
1184     {
1185         if (buf)
1186         {
1187             *(wxUint32*)buf = *psz;
1188             buf += sizeof(wxUint32);
1189         }
1190
1191         len += sizeof(wxUint32);
1192         psz++;
1193     }
1194
1195     if (buf && len<=n-sizeof(wxUint32))
1196         *(wxUint32*)buf=0;
1197
1198     return len;
1199 }
1200
1201
1202 // swap 32bit MB to 32bit String
1203 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1204 {
1205     size_t len=0;
1206
1207     while (*(wxUint32*)psz && (!buf || len < n))
1208     {
1209         if (buf)
1210         {
1211             ((char *)buf)[0] = psz[3];
1212             ((char *)buf)[1] = psz[2];
1213             ((char *)buf)[2] = psz[1];
1214             ((char *)buf)[3] = psz[0];
1215             buf++;
1216         }
1217         len++;
1218         psz += sizeof(wxUint32);
1219     }
1220
1221     if (buf && len<n)
1222         *buf=0;
1223
1224     return len;
1225 }
1226
1227
1228 // swap 32bit String to 32bit MB
1229 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1230 {
1231     size_t len=0;
1232
1233     while (*psz && (!buf || len < n))
1234     {
1235         if (buf)
1236         {
1237             *buf++ = ((char *)psz)[3];
1238             *buf++ = ((char *)psz)[2];
1239             *buf++ = ((char *)psz)[1];
1240             *buf++ = ((char *)psz)[0];
1241         }
1242         len += sizeof(wxUint32);
1243         psz++;
1244     }
1245
1246     if (buf && len<=n-sizeof(wxUint32))
1247         *(wxUint32*)buf=0;
1248
1249     return len;
1250 }
1251
1252
1253 #endif // WC_UTF16
1254
1255
1256 // ============================================================================
1257 // The classes doing conversion using the iconv_xxx() functions
1258 // ============================================================================
1259
1260 #ifdef HAVE_ICONV
1261
1262 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1263 //     E2BIG if output buffer is _exactly_ as big as needed. Such case is
1264 //     (unless there's yet another bug in glibc) the only case when iconv()
1265 //     returns with (size_t)-1 (which means error) and says there are 0 bytes
1266 //     left in the input buffer -- when _real_ error occurs,
1267 //     bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1268 //     iconv() failure.
1269 //     [This bug does not appear in glibc 2.2.]
1270 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1271 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1272                                      (errno != E2BIG || bufLeft != 0))
1273 #else
1274 #define ICONV_FAILED(cres, bufLeft)  (cres == (size_t)-1)
1275 #endif
1276
1277 #define ICONV_CHAR_CAST(x)  ((ICONV_CONST char **)(x))
1278
1279 #define ICONV_T_INVALID ((iconv_t)-1)
1280
1281 #if SIZEOF_WCHAR_T == 4
1282     #define WC_BSWAP    wxUINT32_SWAP_ALWAYS
1283     #define WC_ENC      wxFONTENCODING_UTF32
1284 #elif SIZEOF_WCHAR_T == 2
1285     #define WC_BSWAP    wxUINT16_SWAP_ALWAYS
1286     #define WC_ENC      wxFONTENCODING_UTF16
1287 #else // sizeof(wchar_t) != 2 nor 4
1288     // does this ever happen?
1289     #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1290 #endif
1291
1292 // ----------------------------------------------------------------------------
1293 // wxMBConv_iconv: encapsulates an iconv character set
1294 // ----------------------------------------------------------------------------
1295
1296 class wxMBConv_iconv : public wxMBConv
1297 {
1298 public:
1299     wxMBConv_iconv(const wxChar *name);
1300     virtual ~wxMBConv_iconv();
1301
1302     virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1303     virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
1304
1305     bool IsOk() const
1306         { return (m2w != ICONV_T_INVALID) && (w2m != ICONV_T_INVALID); }
1307
1308 protected:
1309     // the iconv handlers used to translate from multibyte to wide char and in
1310     // the other direction
1311     iconv_t m2w,
1312             w2m;
1313 #if wxUSE_THREADS
1314     // guards access to m2w and w2m objects
1315     wxMutex m_iconvMutex;
1316 #endif
1317
1318 private:
1319     // the name (for iconv_open()) of a wide char charset -- if none is
1320     // available on this machine, it will remain NULL
1321     static wxString ms_wcCharsetName;
1322
1323     // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1324     // different endian-ness than the native one
1325     static bool ms_wcNeedsSwap;
1326 };
1327
1328 // make the constructor available for unit testing
1329 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const wxChar* name )
1330 {
1331     wxMBConv_iconv* result = new wxMBConv_iconv( name );
1332     if ( !result->IsOk() )
1333     {
1334         delete result;
1335         return 0;
1336     }
1337     return result;
1338 }
1339
1340 wxString wxMBConv_iconv::ms_wcCharsetName;
1341 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
1342
1343 wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
1344 {
1345     // iconv operates with chars, not wxChars, but luckily it uses only ASCII
1346     // names for the charsets
1347     const wxCharBuffer cname(wxString(name).ToAscii());
1348
1349     // check for charset that represents wchar_t:
1350     if ( ms_wcCharsetName.empty() )
1351     {
1352         wxLogTrace(TRACE_STRCONV, _T("Looking for wide char codeset:"));
1353
1354 #if wxUSE_FONTMAP
1355         const wxChar **names = wxFontMapperBase::GetAllEncodingNames(WC_ENC);
1356 #else // !wxUSE_FONTMAP
1357         static const wxChar *names[] =
1358         {
1359 #if SIZEOF_WCHAR_T == 4
1360             _T("UCS-4"),
1361 #elif SIZEOF_WCHAR_T = 2
1362             _T("UCS-2"),
1363 #endif
1364             NULL
1365         };
1366 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1367
1368         for ( ; *names && ms_wcCharsetName.empty(); ++names )
1369         {
1370             const wxString nameCS(*names);
1371
1372             // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
1373             wxString nameXE(nameCS);
1374             #ifdef WORDS_BIGENDIAN
1375                 nameXE += _T("BE");
1376             #else // little endian
1377                 nameXE += _T("LE");
1378             #endif
1379
1380             wxLogTrace(TRACE_STRCONV, _T("  trying charset \"%s\""),
1381                        nameXE.c_str());
1382
1383             m2w = iconv_open(nameXE.ToAscii(), cname);
1384             if ( m2w == ICONV_T_INVALID )
1385             {
1386                 // try charset w/o bytesex info (e.g. "UCS4")
1387                 wxLogTrace(TRACE_STRCONV, _T("  trying charset \"%s\""),
1388                            nameCS.c_str());
1389                 m2w = iconv_open(nameCS.ToAscii(), cname);
1390
1391                 // and check for bytesex ourselves:
1392                 if ( m2w != ICONV_T_INVALID )
1393                 {
1394                     char    buf[2], *bufPtr;
1395                     wchar_t wbuf[2], *wbufPtr;
1396                     size_t  insz, outsz;
1397                     size_t  res;
1398
1399                     buf[0] = 'A';
1400                     buf[1] = 0;
1401                     wbuf[0] = 0;
1402                     insz = 2;
1403                     outsz = SIZEOF_WCHAR_T * 2;
1404                     wbufPtr = wbuf;
1405                     bufPtr = buf;
1406
1407                     res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1408                                 (char**)&wbufPtr, &outsz);
1409
1410                     if (ICONV_FAILED(res, insz))
1411                     {
1412                         wxLogLastError(wxT("iconv"));
1413                         wxLogError(_("Conversion to charset '%s' doesn't work."),
1414                                    nameCS.c_str());
1415                     }
1416                     else // ok, can convert to this encoding, remember it
1417                     {
1418                         ms_wcCharsetName = nameCS;
1419                         ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1420                     }
1421                 }
1422             }
1423             else // use charset not requiring byte swapping
1424             {
1425                 ms_wcCharsetName = nameXE;
1426             }
1427         }
1428
1429         wxLogTrace(TRACE_STRCONV,
1430                    wxT("iconv wchar_t charset is \"%s\"%s"),
1431                    ms_wcCharsetName.empty() ? _T("<none>")
1432                                             : ms_wcCharsetName.c_str(),
1433                    ms_wcNeedsSwap ? _T(" (needs swap)")
1434                                   : _T(""));
1435     }
1436     else // we already have ms_wcCharsetName
1437     {
1438         m2w = iconv_open(ms_wcCharsetName.ToAscii(), cname);
1439     }
1440
1441     if ( ms_wcCharsetName.empty() )
1442     {
1443         w2m = ICONV_T_INVALID;
1444     }
1445     else
1446     {
1447         w2m = iconv_open(cname, ms_wcCharsetName.ToAscii());
1448         if ( w2m == ICONV_T_INVALID )
1449         {
1450             wxLogTrace(TRACE_STRCONV,
1451                        wxT("\"%s\" -> \"%s\" works but not the converse!?"),
1452                        ms_wcCharsetName.c_str(), cname.data());
1453         }
1454     }
1455 }
1456
1457 wxMBConv_iconv::~wxMBConv_iconv()
1458 {
1459     if ( m2w != ICONV_T_INVALID )
1460         iconv_close(m2w);
1461     if ( w2m != ICONV_T_INVALID )
1462         iconv_close(w2m);
1463 }
1464
1465 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1466 {
1467 #if wxUSE_THREADS
1468     // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1469     //     Unfortunately there is a couple of global wxCSConv objects such as
1470     //     wxConvLocal that are used all over wx code, so we have to make sure
1471     //     the handle is used by at most one thread at the time. Otherwise
1472     //     only a few wx classes would be safe to use from non-main threads
1473     //     as MB<->WC conversion would fail "randomly".
1474     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1475 #endif
1476
1477     size_t inbuf = strlen(psz);
1478     size_t outbuf = n * SIZEOF_WCHAR_T;
1479     size_t res, cres;
1480     // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1481     wchar_t *bufPtr = buf;
1482     const char *pszPtr = psz;
1483
1484     if (buf)
1485     {
1486         // have destination buffer, convert there
1487         cres = iconv(m2w,
1488                      ICONV_CHAR_CAST(&pszPtr), &inbuf,
1489                      (char**)&bufPtr, &outbuf);
1490         res = n - (outbuf / SIZEOF_WCHAR_T);
1491
1492         if (ms_wcNeedsSwap)
1493         {
1494             // convert to native endianness
1495             for ( unsigned i = 0; i < res; i++ )
1496                 buf[n] = WC_BSWAP(buf[i]);
1497         }
1498
1499         // NB: iconv was given only strlen(psz) characters on input, and so
1500         //     it couldn't convert the trailing zero. Let's do it ourselves
1501         //     if there's some room left for it in the output buffer.
1502         if (res < n)
1503             buf[res] = 0;
1504     }
1505     else
1506     {
1507         // no destination buffer... convert using temp buffer
1508         // to calculate destination buffer requirement
1509         wchar_t tbuf[8];
1510         res = 0;
1511         do {
1512             bufPtr = tbuf;
1513             outbuf = 8*SIZEOF_WCHAR_T;
1514
1515             cres = iconv(m2w,
1516                          ICONV_CHAR_CAST(&pszPtr), &inbuf,
1517                          (char**)&bufPtr, &outbuf );
1518
1519             res += 8-(outbuf/SIZEOF_WCHAR_T);
1520         } while ((cres==(size_t)-1) && (errno==E2BIG));
1521     }
1522
1523     if (ICONV_FAILED(cres, inbuf))
1524     {
1525         //VS: it is ok if iconv fails, hence trace only
1526         wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1527         return (size_t)-1;
1528     }
1529
1530     return res;
1531 }
1532
1533 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1534 {
1535 #if wxUSE_THREADS
1536     // NB: explained in MB2WC
1537     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1538 #endif
1539
1540     size_t inlen = wxWcslen(psz);
1541     size_t inbuf = inlen * SIZEOF_WCHAR_T;
1542     size_t outbuf = n;
1543     size_t res, cres;
1544
1545     wchar_t *tmpbuf = 0;
1546
1547     if (ms_wcNeedsSwap)
1548     {
1549         // need to copy to temp buffer to switch endianness
1550         // (doing WC_BSWAP twice on the original buffer won't help, as it
1551         //  could be in read-only memory, or be accessed in some other thread)
1552         tmpbuf = (wchar_t *)malloc(inbuf + SIZEOF_WCHAR_T);
1553         for ( size_t i = 0; i < inlen; i++ )
1554             tmpbuf[n] = WC_BSWAP(psz[i]);
1555         tmpbuf[inlen] = L'\0';
1556         psz = tmpbuf;
1557     }
1558
1559     if (buf)
1560     {
1561         // have destination buffer, convert there
1562         cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1563
1564         res = n-outbuf;
1565
1566         // NB: iconv was given only wcslen(psz) characters on input, and so
1567         //     it couldn't convert the trailing zero. Let's do it ourselves
1568         //     if there's some room left for it in the output buffer.
1569         if (res < n)
1570             buf[0] = 0;
1571     }
1572     else
1573     {
1574         // no destination buffer... convert using temp buffer
1575         // to calculate destination buffer requirement
1576         char tbuf[16];
1577         res = 0;
1578         do {
1579             buf = tbuf; outbuf = 16;
1580
1581             cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1582
1583             res += 16 - outbuf;
1584         } while ((cres==(size_t)-1) && (errno==E2BIG));
1585     }
1586
1587     if (ms_wcNeedsSwap)
1588     {
1589         free(tmpbuf);
1590     }
1591
1592     if (ICONV_FAILED(cres, inbuf))
1593     {
1594         wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1595         return (size_t)-1;
1596     }
1597
1598     return res;
1599 }
1600
1601 #endif // HAVE_ICONV
1602
1603
1604 // ============================================================================
1605 // Win32 conversion classes
1606 // ============================================================================
1607
1608 #ifdef wxHAVE_WIN32_MB2WC
1609
1610 // from utils.cpp
1611 #if wxUSE_FONTMAP
1612 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1613 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1614 #endif
1615
1616 class wxMBConv_win32 : public wxMBConv
1617 {
1618 public:
1619     wxMBConv_win32()
1620     {
1621         m_CodePage = CP_ACP;
1622     }
1623
1624 #if wxUSE_FONTMAP
1625     wxMBConv_win32(const wxChar* name)
1626     {
1627         m_CodePage = wxCharsetToCodepage(name);
1628     }
1629
1630     wxMBConv_win32(wxFontEncoding encoding)
1631     {
1632         m_CodePage = wxEncodingToCodepage(encoding);
1633     }
1634 #endif
1635
1636     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1637     {
1638         // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1639         // the behaviour is not compatible with the Unix version (using iconv)
1640         // and break the library itself, e.g. wxTextInputStream::NextChar()
1641         // wouldn't work if reading an incomplete MB char didn't result in an
1642         // error
1643         //
1644         // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1645         // an error (tested under Windows Server 2003) and apparently it is
1646         // done on purpose, i.e. the function accepts any input in this case
1647         // and although I'd prefer to return error on ill-formed output, our
1648         // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1649         // explicitly ill-formed according to RFC 2152) neither so we don't
1650         // even have any fallback here...
1651         int flags = m_CodePage == CP_UTF7 ? 0 : MB_ERR_INVALID_CHARS;
1652
1653         const size_t len = ::MultiByteToWideChar
1654                              (
1655                                 m_CodePage,     // code page
1656                                 flags,          // flags: fall on error
1657                                 psz,            // input string
1658                                 -1,             // its length (NUL-terminated)
1659                                 buf,            // output string
1660                                 buf ? n : 0     // size of output buffer
1661                              );
1662
1663         // note that it returns count of written chars for buf != NULL and size
1664         // of the needed buffer for buf == NULL so in either case the length of
1665         // the string (which never includes the terminating NUL) is one less
1666         return len ? len - 1 : (size_t)-1;
1667     }
1668
1669     size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
1670     {
1671         /*
1672             we have a problem here: by default, WideCharToMultiByte() may
1673             replace characters unrepresentable in the target code page with bad
1674             quality approximations such as turning "1/2" symbol (U+00BD) into
1675             "1" for the code pages which don't have it and we, obviously, want
1676             to avoid this at any price
1677
1678             the trouble is that this function does it _silently_, i.e. it won't
1679             even tell us whether it did or not... Win98/2000 and higher provide
1680             WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1681             we have to resort to a round trip, i.e. check that converting back
1682             results in the same string -- this is, of course, expensive but
1683             otherwise we simply can't be sure to not garble the data.
1684          */
1685
1686         // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1687         // it doesn't work with CJK encodings (which we test for rather roughly
1688         // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1689         // supporting it
1690         BOOL usedDef wxDUMMY_INITIALIZE(false);
1691         BOOL *pUsedDef;
1692         int flags;
1693         if ( CanUseNoBestFit() && m_CodePage < 50000 )
1694         {
1695             // it's our lucky day
1696             flags = WC_NO_BEST_FIT_CHARS;
1697             pUsedDef = &usedDef;
1698         }
1699         else // old system or unsupported encoding
1700         {
1701             flags = 0;
1702             pUsedDef = NULL;
1703         }
1704
1705         const size_t len = ::WideCharToMultiByte
1706                              (
1707                                 m_CodePage,     // code page
1708                                 flags,          // either none or no best fit
1709                                 pwz,            // input string
1710                                 -1,             // it is (wide) NUL-terminated
1711                                 buf,            // output buffer
1712                                 buf ? n : 0,    // and its size
1713                                 NULL,           // default "replacement" char
1714                                 pUsedDef        // [out] was it used?
1715                              );
1716
1717         if ( !len )
1718         {
1719             // function totally failed
1720             return (size_t)-1;
1721         }
1722
1723         // if we were really converting, check if we succeeded
1724         if ( buf )
1725         {
1726             if ( flags )
1727             {
1728                 // check if the conversion failed, i.e. if any replacements
1729                 // were done
1730                 if ( usedDef )
1731                     return (size_t)-1;
1732             }
1733             else // we must resort to double tripping...
1734             {
1735                 wxWCharBuffer wcBuf(n);
1736                 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1737                         wcscmp(wcBuf, pwz) != 0 )
1738                 {
1739                     // we didn't obtain the same thing we started from, hence
1740                     // the conversion was lossy and we consider that it failed
1741                     return (size_t)-1;
1742                 }
1743             }
1744         }
1745
1746         // see the comment above for the reason of "len - 1"
1747         return len - 1;
1748     }
1749
1750     bool IsOk() const { return m_CodePage != -1; }
1751
1752 private:
1753     static bool CanUseNoBestFit()
1754     {
1755         static int s_isWin98Or2k = -1;
1756
1757         if ( s_isWin98Or2k == -1 )
1758         {
1759             int verMaj, verMin;
1760             switch ( wxGetOsVersion(&verMaj, &verMin) )
1761             {
1762                 case wxWIN95:
1763                     s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1764                     break;
1765
1766                 case wxWINDOWS_NT:
1767                     s_isWin98Or2k = verMaj >= 5;
1768                     break;
1769
1770                 default:
1771                     // unknown, be conseravtive by default
1772                     s_isWin98Or2k = 0;
1773             }
1774
1775             wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1776         }
1777
1778         return s_isWin98Or2k == 1;
1779     }
1780
1781     long m_CodePage;
1782 };
1783
1784 #endif // wxHAVE_WIN32_MB2WC
1785
1786 // ============================================================================
1787 // Cocoa conversion classes
1788 // ============================================================================
1789
1790 #if defined(__WXCOCOA__)
1791
1792 // RN:  There is no UTF-32 support in either Core Foundation or
1793 // Cocoa.  Strangely enough, internally Core Foundation uses
1794 // UTF 32 internally quite a bit - its just not public (yet).
1795
1796 #include <CoreFoundation/CFString.h>
1797 #include <CoreFoundation/CFStringEncodingExt.h>
1798
1799 CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
1800 {
1801     CFStringEncoding enc = kCFStringEncodingInvalidId ;
1802     if ( encoding == wxFONTENCODING_DEFAULT )
1803     {
1804         enc = CFStringGetSystemEncoding();
1805     }
1806     else switch( encoding)
1807     {
1808         case wxFONTENCODING_ISO8859_1 :
1809             enc = kCFStringEncodingISOLatin1 ;
1810             break ;
1811         case wxFONTENCODING_ISO8859_2 :
1812             enc = kCFStringEncodingISOLatin2;
1813             break ;
1814         case wxFONTENCODING_ISO8859_3 :
1815             enc = kCFStringEncodingISOLatin3 ;
1816             break ;
1817         case wxFONTENCODING_ISO8859_4 :
1818             enc = kCFStringEncodingISOLatin4;
1819             break ;
1820         case wxFONTENCODING_ISO8859_5 :
1821             enc = kCFStringEncodingISOLatinCyrillic;
1822             break ;
1823         case wxFONTENCODING_ISO8859_6 :
1824             enc = kCFStringEncodingISOLatinArabic;
1825             break ;
1826         case wxFONTENCODING_ISO8859_7 :
1827             enc = kCFStringEncodingISOLatinGreek;
1828             break ;
1829         case wxFONTENCODING_ISO8859_8 :
1830             enc = kCFStringEncodingISOLatinHebrew;
1831             break ;
1832         case wxFONTENCODING_ISO8859_9 :
1833             enc = kCFStringEncodingISOLatin5;
1834             break ;
1835         case wxFONTENCODING_ISO8859_10 :
1836             enc = kCFStringEncodingISOLatin6;
1837             break ;
1838         case wxFONTENCODING_ISO8859_11 :
1839             enc = kCFStringEncodingISOLatinThai;
1840             break ;
1841         case wxFONTENCODING_ISO8859_13 :
1842             enc = kCFStringEncodingISOLatin7;
1843             break ;
1844         case wxFONTENCODING_ISO8859_14 :
1845             enc = kCFStringEncodingISOLatin8;
1846             break ;
1847         case wxFONTENCODING_ISO8859_15 :
1848             enc = kCFStringEncodingISOLatin9;
1849             break ;
1850
1851         case wxFONTENCODING_KOI8 :
1852             enc = kCFStringEncodingKOI8_R;
1853             break ;
1854         case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
1855             enc = kCFStringEncodingDOSRussian;
1856             break ;
1857
1858 //      case wxFONTENCODING_BULGARIAN :
1859 //          enc = ;
1860 //          break ;
1861
1862         case wxFONTENCODING_CP437 :
1863             enc =kCFStringEncodingDOSLatinUS ;
1864             break ;
1865         case wxFONTENCODING_CP850 :
1866             enc = kCFStringEncodingDOSLatin1;
1867             break ;
1868         case wxFONTENCODING_CP852 :
1869             enc = kCFStringEncodingDOSLatin2;
1870             break ;
1871         case wxFONTENCODING_CP855 :
1872             enc = kCFStringEncodingDOSCyrillic;
1873             break ;
1874         case wxFONTENCODING_CP866 :
1875             enc =kCFStringEncodingDOSRussian ;
1876             break ;
1877         case wxFONTENCODING_CP874 :
1878             enc = kCFStringEncodingDOSThai;
1879             break ;
1880         case wxFONTENCODING_CP932 :
1881             enc = kCFStringEncodingDOSJapanese;
1882             break ;
1883         case wxFONTENCODING_CP936 :
1884             enc =kCFStringEncodingDOSChineseSimplif ;
1885             break ;
1886         case wxFONTENCODING_CP949 :
1887             enc = kCFStringEncodingDOSKorean;
1888             break ;
1889         case wxFONTENCODING_CP950 :
1890             enc = kCFStringEncodingDOSChineseTrad;
1891             break ;
1892         case wxFONTENCODING_CP1250 :
1893             enc = kCFStringEncodingWindowsLatin2;
1894             break ;
1895         case wxFONTENCODING_CP1251 :
1896             enc =kCFStringEncodingWindowsCyrillic ;
1897             break ;
1898         case wxFONTENCODING_CP1252 :
1899             enc =kCFStringEncodingWindowsLatin1 ;
1900             break ;
1901         case wxFONTENCODING_CP1253 :
1902             enc = kCFStringEncodingWindowsGreek;
1903             break ;
1904         case wxFONTENCODING_CP1254 :
1905             enc = kCFStringEncodingWindowsLatin5;
1906             break ;
1907         case wxFONTENCODING_CP1255 :
1908             enc =kCFStringEncodingWindowsHebrew ;
1909             break ;
1910         case wxFONTENCODING_CP1256 :
1911             enc =kCFStringEncodingWindowsArabic ;
1912             break ;
1913         case wxFONTENCODING_CP1257 :
1914             enc = kCFStringEncodingWindowsBalticRim;
1915             break ;
1916 //   This only really encodes to UTF7 (if that) evidently
1917 //        case wxFONTENCODING_UTF7 :
1918 //            enc = kCFStringEncodingNonLossyASCII ;
1919 //            break ;
1920         case wxFONTENCODING_UTF8 :
1921             enc = kCFStringEncodingUTF8 ;
1922             break ;
1923         case wxFONTENCODING_EUC_JP :
1924             enc = kCFStringEncodingEUC_JP;
1925             break ;
1926         case wxFONTENCODING_UTF16 :
1927             enc = kCFStringEncodingUnicode ;
1928             break ;
1929         case wxFONTENCODING_MACROMAN :
1930             enc = kCFStringEncodingMacRoman ;
1931             break ;
1932         case wxFONTENCODING_MACJAPANESE :
1933             enc = kCFStringEncodingMacJapanese ;
1934             break ;
1935         case wxFONTENCODING_MACCHINESETRAD :
1936             enc = kCFStringEncodingMacChineseTrad ;
1937             break ;
1938         case wxFONTENCODING_MACKOREAN :
1939             enc = kCFStringEncodingMacKorean ;
1940             break ;
1941         case wxFONTENCODING_MACARABIC :
1942             enc = kCFStringEncodingMacArabic ;
1943             break ;
1944         case wxFONTENCODING_MACHEBREW :
1945             enc = kCFStringEncodingMacHebrew ;
1946             break ;
1947         case wxFONTENCODING_MACGREEK :
1948             enc = kCFStringEncodingMacGreek ;
1949             break ;
1950         case wxFONTENCODING_MACCYRILLIC :
1951             enc = kCFStringEncodingMacCyrillic ;
1952             break ;
1953         case wxFONTENCODING_MACDEVANAGARI :
1954             enc = kCFStringEncodingMacDevanagari ;
1955             break ;
1956         case wxFONTENCODING_MACGURMUKHI :
1957             enc = kCFStringEncodingMacGurmukhi ;
1958             break ;
1959         case wxFONTENCODING_MACGUJARATI :
1960             enc = kCFStringEncodingMacGujarati ;
1961             break ;
1962         case wxFONTENCODING_MACORIYA :
1963             enc = kCFStringEncodingMacOriya ;
1964             break ;
1965         case wxFONTENCODING_MACBENGALI :
1966             enc = kCFStringEncodingMacBengali ;
1967             break ;
1968         case wxFONTENCODING_MACTAMIL :
1969             enc = kCFStringEncodingMacTamil ;
1970             break ;
1971         case wxFONTENCODING_MACTELUGU :
1972             enc = kCFStringEncodingMacTelugu ;
1973             break ;
1974         case wxFONTENCODING_MACKANNADA :
1975             enc = kCFStringEncodingMacKannada ;
1976             break ;
1977         case wxFONTENCODING_MACMALAJALAM :
1978             enc = kCFStringEncodingMacMalayalam ;
1979             break ;
1980         case wxFONTENCODING_MACSINHALESE :
1981             enc = kCFStringEncodingMacSinhalese ;
1982             break ;
1983         case wxFONTENCODING_MACBURMESE :
1984             enc = kCFStringEncodingMacBurmese ;
1985             break ;
1986         case wxFONTENCODING_MACKHMER :
1987             enc = kCFStringEncodingMacKhmer ;
1988             break ;
1989         case wxFONTENCODING_MACTHAI :
1990             enc = kCFStringEncodingMacThai ;
1991             break ;
1992         case wxFONTENCODING_MACLAOTIAN :
1993             enc = kCFStringEncodingMacLaotian ;
1994             break ;
1995         case wxFONTENCODING_MACGEORGIAN :
1996             enc = kCFStringEncodingMacGeorgian ;
1997             break ;
1998         case wxFONTENCODING_MACARMENIAN :
1999             enc = kCFStringEncodingMacArmenian ;
2000             break ;
2001         case wxFONTENCODING_MACCHINESESIMP :
2002             enc = kCFStringEncodingMacChineseSimp ;
2003             break ;
2004         case wxFONTENCODING_MACTIBETAN :
2005             enc = kCFStringEncodingMacTibetan ;
2006             break ;
2007         case wxFONTENCODING_MACMONGOLIAN :
2008             enc = kCFStringEncodingMacMongolian ;
2009             break ;
2010         case wxFONTENCODING_MACETHIOPIC :
2011             enc = kCFStringEncodingMacEthiopic ;
2012             break ;
2013         case wxFONTENCODING_MACCENTRALEUR :
2014             enc = kCFStringEncodingMacCentralEurRoman ;
2015             break ;
2016         case wxFONTENCODING_MACVIATNAMESE :
2017             enc = kCFStringEncodingMacVietnamese ;
2018             break ;
2019         case wxFONTENCODING_MACARABICEXT :
2020             enc = kCFStringEncodingMacExtArabic ;
2021             break ;
2022         case wxFONTENCODING_MACSYMBOL :
2023             enc = kCFStringEncodingMacSymbol ;
2024             break ;
2025         case wxFONTENCODING_MACDINGBATS :
2026             enc = kCFStringEncodingMacDingbats ;
2027             break ;
2028         case wxFONTENCODING_MACTURKISH :
2029             enc = kCFStringEncodingMacTurkish ;
2030             break ;
2031         case wxFONTENCODING_MACCROATIAN :
2032             enc = kCFStringEncodingMacCroatian ;
2033             break ;
2034         case wxFONTENCODING_MACICELANDIC :
2035             enc = kCFStringEncodingMacIcelandic ;
2036             break ;
2037         case wxFONTENCODING_MACROMANIAN :
2038             enc = kCFStringEncodingMacRomanian ;
2039             break ;
2040         case wxFONTENCODING_MACCELTIC :
2041             enc = kCFStringEncodingMacCeltic ;
2042             break ;
2043         case wxFONTENCODING_MACGAELIC :
2044             enc = kCFStringEncodingMacGaelic ;
2045             break ;
2046 //      case wxFONTENCODING_MACKEYBOARD :
2047 //          enc = kCFStringEncodingMacKeyboardGlyphs ;
2048 //          break ;
2049         default :
2050             // because gcc is picky
2051             break ;
2052     } ;
2053     return enc ;
2054 }
2055
2056 class wxMBConv_cocoa : public wxMBConv
2057 {
2058 public:
2059     wxMBConv_cocoa()
2060     {
2061         Init(CFStringGetSystemEncoding()) ;
2062     }
2063
2064 #if wxUSE_FONTMAP
2065     wxMBConv_cocoa(const wxChar* name)
2066     {
2067         Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2068     }
2069 #endif
2070
2071     wxMBConv_cocoa(wxFontEncoding encoding)
2072     {
2073         Init( wxCFStringEncFromFontEnc(encoding) );
2074     }
2075
2076     ~wxMBConv_cocoa()
2077     {
2078     }
2079
2080     void Init( CFStringEncoding encoding)
2081     {
2082         m_encoding = encoding ;
2083     }
2084
2085     size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2086     {
2087         wxASSERT(szUnConv);
2088
2089         CFStringRef theString = CFStringCreateWithBytes (
2090                                                 NULL, //the allocator
2091                                                 (const UInt8*)szUnConv,
2092                                                 strlen(szUnConv),
2093                                                 m_encoding,
2094                                                 false //no BOM/external representation
2095                                                 );
2096
2097         wxASSERT(theString);
2098
2099         size_t nOutLength = CFStringGetLength(theString);
2100
2101         if (szOut == NULL)
2102         {
2103             CFRelease(theString);
2104             return nOutLength;
2105         }
2106
2107         CFRange theRange = { 0, nOutSize };
2108
2109 #if SIZEOF_WCHAR_T == 4
2110         UniChar* szUniCharBuffer = new UniChar[nOutSize];
2111 #endif
2112
2113         CFStringGetCharacters(theString, theRange, szUniCharBuffer);
2114
2115         CFRelease(theString);
2116
2117         szUniCharBuffer[nOutLength] = '\0' ;
2118
2119 #if SIZEOF_WCHAR_T == 4
2120         wxMBConvUTF16 converter ;
2121         converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
2122         delete[] szUniCharBuffer;
2123 #endif
2124
2125         return nOutLength;
2126     }
2127
2128     size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2129     {
2130         wxASSERT(szUnConv);
2131
2132         size_t nRealOutSize;
2133         size_t nBufSize = wxWcslen(szUnConv);
2134         UniChar* szUniBuffer = (UniChar*) szUnConv;
2135
2136 #if SIZEOF_WCHAR_T == 4
2137         wxMBConvUTF16 converter ;
2138         nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
2139         szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
2140         converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
2141         nBufSize /= sizeof(UniChar);
2142 #endif
2143
2144         CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2145                                 NULL, //allocator
2146                                 szUniBuffer,
2147                                 nBufSize,
2148                                 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
2149                             );
2150
2151         wxASSERT(theString);
2152
2153         //Note that CER puts a BOM when converting to unicode
2154         //so we  check and use getchars instead in that case
2155         if (m_encoding == kCFStringEncodingUnicode)
2156         {
2157             if (szOut != NULL)
2158                 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
2159
2160             nRealOutSize = CFStringGetLength(theString) + 1;
2161         }
2162         else
2163         {
2164             CFStringGetBytes(
2165                 theString,
2166                 CFRangeMake(0, CFStringGetLength(theString)),
2167                 m_encoding,
2168                 0, //what to put in characters that can't be converted -
2169                     //0 tells CFString to return NULL if it meets such a character
2170                 false, //not an external representation
2171                 (UInt8*) szOut,
2172                 nOutSize,
2173                 (CFIndex*) &nRealOutSize
2174                         );
2175         }
2176
2177         CFRelease(theString);
2178
2179 #if SIZEOF_WCHAR_T == 4
2180         delete[] szUniBuffer;
2181 #endif
2182
2183         return  nRealOutSize - 1;
2184     }
2185
2186     bool IsOk() const
2187     {
2188         return m_encoding != kCFStringEncodingInvalidId &&
2189               CFStringIsEncodingAvailable(m_encoding);
2190     }
2191
2192 private:
2193     CFStringEncoding m_encoding ;
2194 };
2195
2196 #endif // defined(__WXCOCOA__)
2197
2198 // ============================================================================
2199 // Mac conversion classes
2200 // ============================================================================
2201
2202 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2203
2204 class wxMBConv_mac : public wxMBConv
2205 {
2206 public:
2207     wxMBConv_mac()
2208     {
2209         Init(CFStringGetSystemEncoding()) ;
2210     }
2211
2212 #if wxUSE_FONTMAP
2213     wxMBConv_mac(const wxChar* name)
2214     {
2215         Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2216     }
2217 #endif
2218
2219     wxMBConv_mac(wxFontEncoding encoding)
2220     {
2221         Init( wxMacGetSystemEncFromFontEnc(encoding) );
2222     }
2223
2224     ~wxMBConv_mac()
2225     {
2226         OSStatus status = noErr ;
2227         status = TECDisposeConverter(m_MB2WC_converter);
2228         status = TECDisposeConverter(m_WC2MB_converter);
2229     }
2230
2231
2232     void Init( TextEncodingBase encoding)
2233     {
2234         OSStatus status = noErr ;
2235         m_char_encoding = encoding ;
2236         m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2237
2238         status = TECCreateConverter(&m_MB2WC_converter,
2239                                     m_char_encoding,
2240                                     m_unicode_encoding);
2241         status = TECCreateConverter(&m_WC2MB_converter,
2242                                     m_unicode_encoding,
2243                                     m_char_encoding);
2244     }
2245
2246     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2247     {
2248         OSStatus status = noErr ;
2249         ByteCount byteOutLen ;
2250         ByteCount byteInLen = strlen(psz) ;
2251         wchar_t *tbuf = NULL ;
2252         UniChar* ubuf = NULL ;
2253         size_t res = 0 ;
2254
2255         if (buf == NULL)
2256         {
2257             //apple specs say at least 32
2258             n = wxMax( 32 , byteInLen ) ;
2259             tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2260         }
2261         ByteCount byteBufferLen = n * sizeof( UniChar ) ;
2262 #if SIZEOF_WCHAR_T == 4
2263         ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
2264 #else
2265         ubuf = (UniChar*) (buf ? buf : tbuf) ;
2266 #endif
2267         status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2268           (TextPtr) ubuf , byteBufferLen, &byteOutLen);
2269 #if SIZEOF_WCHAR_T == 4
2270         // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2271         // is not properly terminated we get random characters at the end
2272         ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
2273         wxMBConvUTF16 converter ;
2274         res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2275         free( ubuf ) ;
2276 #else
2277         res = byteOutLen / sizeof( UniChar ) ;
2278 #endif
2279         if ( buf == NULL )
2280              free(tbuf) ;
2281
2282         if ( buf  && res < n)
2283             buf[res] = 0;
2284
2285         return res ;
2286     }
2287
2288     size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2289     {
2290         OSStatus status = noErr ;
2291         ByteCount byteOutLen ;
2292         ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2293
2294         char *tbuf = NULL ;
2295
2296         if (buf == NULL)
2297         {
2298             //apple specs say at least 32
2299             n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2300             tbuf = (char*) malloc( n ) ;
2301         }
2302
2303         ByteCount byteBufferLen = n ;
2304         UniChar* ubuf = NULL ;
2305 #if SIZEOF_WCHAR_T == 4
2306         wxMBConvUTF16 converter ;
2307         size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2308         byteInLen = unicharlen ;
2309         ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2310         converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
2311 #else
2312         ubuf = (UniChar*) psz ;
2313 #endif
2314         status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2315             (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
2316 #if SIZEOF_WCHAR_T == 4
2317         free( ubuf ) ;
2318 #endif
2319         if ( buf == NULL )
2320             free(tbuf) ;
2321
2322         size_t res = byteOutLen ;
2323         if ( buf  && res < n)
2324         {
2325             buf[res] = 0;
2326
2327             //we need to double-trip to verify it didn't insert any ? in place
2328             //of bogus characters
2329             wxWCharBuffer wcBuf(n);
2330             size_t pszlen = wxWcslen(psz);
2331             if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
2332                         wxWcslen(wcBuf) != pszlen ||
2333                         memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2334             {
2335                 // we didn't obtain the same thing we started from, hence
2336                 // the conversion was lossy and we consider that it failed
2337                 return (size_t)-1;
2338             }
2339         }
2340
2341         return res ;
2342     }
2343
2344     bool IsOk() const
2345         { return m_MB2WC_converter !=  NULL && m_WC2MB_converter != NULL  ; }
2346
2347 private:
2348     TECObjectRef m_MB2WC_converter ;
2349     TECObjectRef m_WC2MB_converter ;
2350
2351     TextEncodingBase m_char_encoding ;
2352     TextEncodingBase m_unicode_encoding ;
2353 };
2354
2355 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2356
2357 // ============================================================================
2358 // wxEncodingConverter based conversion classes
2359 // ============================================================================
2360
2361 #if wxUSE_FONTMAP
2362
2363 class wxMBConv_wxwin : public wxMBConv
2364 {
2365 private:
2366     void Init()
2367     {
2368         m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2369                w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2370     }
2371
2372 public:
2373     // temporarily just use wxEncodingConverter stuff,
2374     // so that it works while a better implementation is built
2375     wxMBConv_wxwin(const wxChar* name)
2376     {
2377         if (name)
2378             m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2379         else
2380             m_enc = wxFONTENCODING_SYSTEM;
2381
2382         Init();
2383     }
2384
2385     wxMBConv_wxwin(wxFontEncoding enc)
2386     {
2387         m_enc = enc;
2388
2389         Init();
2390     }
2391
2392     size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
2393     {
2394         size_t inbuf = strlen(psz);
2395         if (buf)
2396         {
2397             if (!m2w.Convert(psz,buf))
2398                 return (size_t)-1;
2399         }
2400         return inbuf;
2401     }
2402
2403     size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
2404     {
2405         const size_t inbuf = wxWcslen(psz);
2406         if (buf)
2407         {
2408             if (!w2m.Convert(psz,buf))
2409                 return (size_t)-1;
2410         }
2411
2412         return inbuf;
2413     }
2414
2415     bool IsOk() const { return m_ok; }
2416
2417 public:
2418     wxFontEncoding m_enc;
2419     wxEncodingConverter m2w, w2m;
2420
2421     // were we initialized successfully?
2422     bool m_ok;
2423
2424     DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
2425 };
2426
2427 // make the constructors available for unit testing
2428 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_wxwin( const wxChar* name )
2429 {
2430     wxMBConv_wxwin* result = new wxMBConv_wxwin( name );
2431     if ( !result->IsOk() )
2432     {
2433         delete result;
2434         return 0;
2435     }
2436     return result;
2437 }
2438
2439 #endif // wxUSE_FONTMAP
2440
2441 // ============================================================================
2442 // wxCSConv implementation
2443 // ============================================================================
2444
2445 void wxCSConv::Init()
2446 {
2447     m_name = NULL;
2448     m_convReal =  NULL;
2449     m_deferred = true;
2450 }
2451
2452 wxCSConv::wxCSConv(const wxChar *charset)
2453 {
2454     Init();
2455
2456     if ( charset )
2457     {
2458         SetName(charset);
2459     }
2460
2461     m_encoding = wxFONTENCODING_SYSTEM;
2462 }
2463
2464 wxCSConv::wxCSConv(wxFontEncoding encoding)
2465 {
2466     if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
2467     {
2468         wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2469
2470         encoding = wxFONTENCODING_SYSTEM;
2471     }
2472
2473     Init();
2474
2475     m_encoding = encoding;
2476 }
2477
2478 wxCSConv::~wxCSConv()
2479 {
2480     Clear();
2481 }
2482
2483 wxCSConv::wxCSConv(const wxCSConv& conv)
2484         : wxMBConv()
2485 {
2486     Init();
2487
2488     SetName(conv.m_name);
2489     m_encoding = conv.m_encoding;
2490 }
2491
2492 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2493 {
2494     Clear();
2495
2496     SetName(conv.m_name);
2497     m_encoding = conv.m_encoding;
2498
2499     return *this;
2500 }
2501
2502 void wxCSConv::Clear()
2503 {
2504     free(m_name);
2505     delete m_convReal;
2506
2507     m_name = NULL;
2508     m_convReal = NULL;
2509 }
2510
2511 void wxCSConv::SetName(const wxChar *charset)
2512 {
2513     if (charset)
2514     {
2515         m_name = wxStrdup(charset);
2516         m_deferred = true;
2517     }
2518 }
2519
2520 #if wxUSE_FONTMAP
2521 #include "wx/hashmap.h"
2522
2523 WX_DECLARE_HASH_MAP( wxFontEncoding, wxString, wxIntegerHash, wxIntegerEqual,
2524                      wxEncodingNameCache );
2525
2526 static wxEncodingNameCache gs_nameCache;
2527 #endif
2528
2529 wxMBConv *wxCSConv::DoCreate() const
2530 {
2531 #if wxUSE_FONTMAP
2532     wxLogTrace(TRACE_STRCONV,
2533                wxT("creating conversion for %s"),
2534                (m_name ? m_name
2535                        : wxFontMapperBase::GetEncodingName(m_encoding).c_str()));
2536 #endif // wxUSE_FONTMAP
2537
2538     // check for the special case of ASCII or ISO8859-1 charset: as we have
2539     // special knowledge of it anyhow, we don't need to create a special
2540     // conversion object
2541     if ( m_encoding == wxFONTENCODING_ISO8859_1 )
2542     {
2543         // don't convert at all
2544         return NULL;
2545     }
2546
2547     // we trust OS to do conversion better than we can so try external
2548     // conversion methods first
2549     //
2550     // the full order is:
2551     //      1. OS conversion (iconv() under Unix or Win32 API)
2552     //      2. hard coded conversions for UTF
2553     //      3. wxEncodingConverter as fall back
2554
2555     // step (1)
2556 #ifdef HAVE_ICONV
2557 #if !wxUSE_FONTMAP
2558     if ( m_name )
2559 #endif // !wxUSE_FONTMAP
2560     {
2561         wxString name(m_name);
2562         wxFontEncoding encoding(m_encoding);
2563
2564         if ( !name.empty() )
2565         {
2566             wxMBConv_iconv *conv = new wxMBConv_iconv(name);
2567             if ( conv->IsOk() )
2568                 return conv;
2569
2570             delete conv;
2571
2572 #if wxUSE_FONTMAP
2573             encoding =
2574                 wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2575 #endif // wxUSE_FONTMAP
2576         }
2577 #if wxUSE_FONTMAP
2578         {
2579             const wxEncodingNameCache::iterator it = gs_nameCache.find(encoding);
2580             if ( it != gs_nameCache.end() )
2581             {
2582                 if ( it->second.empty() )
2583                     return NULL;
2584
2585                 wxMBConv_iconv *conv = new wxMBConv_iconv(it->second);
2586                 if ( conv->IsOk() )
2587                     return conv;
2588
2589                 delete conv;
2590             }
2591
2592             const wxChar** names = wxFontMapperBase::GetAllEncodingNames(encoding);
2593
2594             for ( ; *names; ++names )
2595             {
2596                 wxMBConv_iconv *conv = new wxMBConv_iconv(*names);
2597                 if ( conv->IsOk() )
2598                 {
2599                     gs_nameCache[encoding] = *names;
2600                     return conv;
2601                 }
2602
2603                 delete conv;
2604             }
2605
2606             gs_nameCache[encoding] = _T(""); // cache the failure
2607         }
2608 #endif // wxUSE_FONTMAP
2609     }
2610 #endif // HAVE_ICONV
2611
2612 #ifdef wxHAVE_WIN32_MB2WC
2613     {
2614 #if wxUSE_FONTMAP
2615         wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2616                                       : new wxMBConv_win32(m_encoding);
2617         if ( conv->IsOk() )
2618             return conv;
2619
2620         delete conv;
2621 #else
2622         return NULL;
2623 #endif
2624     }
2625 #endif // wxHAVE_WIN32_MB2WC
2626 #if defined(__WXMAC__)
2627     {
2628         // leave UTF16 and UTF32 to the built-ins of wx
2629         if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
2630             ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
2631         {
2632
2633 #if wxUSE_FONTMAP
2634             wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2635                                         : new wxMBConv_mac(m_encoding);
2636 #else
2637             wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
2638 #endif
2639             if ( conv->IsOk() )
2640                  return conv;
2641
2642             delete conv;
2643         }
2644     }
2645 #endif
2646 #if defined(__WXCOCOA__)
2647     {
2648         if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2649         {
2650
2651 #if wxUSE_FONTMAP
2652             wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2653                                           : new wxMBConv_cocoa(m_encoding);
2654 #else
2655             wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
2656 #endif
2657             if ( conv->IsOk() )
2658                  return conv;
2659
2660             delete conv;
2661         }
2662     }
2663 #endif
2664     // step (2)
2665     wxFontEncoding enc = m_encoding;
2666 #if wxUSE_FONTMAP
2667     if ( enc == wxFONTENCODING_SYSTEM && m_name )
2668     {
2669         // use "false" to suppress interactive dialogs -- we can be called from
2670         // anywhere and popping up a dialog from here is the last thing we want to
2671         // do
2672         enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
2673     }
2674 #endif // wxUSE_FONTMAP
2675
2676     switch ( enc )
2677     {
2678         case wxFONTENCODING_UTF7:
2679              return new wxMBConvUTF7;
2680
2681         case wxFONTENCODING_UTF8:
2682              return new wxMBConvUTF8;
2683
2684         case wxFONTENCODING_UTF16BE:
2685              return new wxMBConvUTF16BE;
2686
2687         case wxFONTENCODING_UTF16LE:
2688              return new wxMBConvUTF16LE;
2689
2690         case wxFONTENCODING_UTF32BE:
2691              return new wxMBConvUTF32BE;
2692
2693         case wxFONTENCODING_UTF32LE:
2694              return new wxMBConvUTF32LE;
2695
2696         default:
2697              // nothing to do but put here to suppress gcc warnings
2698              ;
2699     }
2700
2701     // step (3)
2702 #if wxUSE_FONTMAP
2703     {
2704         wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2705                                       : new wxMBConv_wxwin(m_encoding);
2706         if ( conv->IsOk() )
2707             return conv;
2708
2709         delete conv;
2710     }
2711 #endif // wxUSE_FONTMAP
2712
2713     // NB: This is a hack to prevent deadlock. What could otherwise happen
2714     //     in Unicode build: wxConvLocal creation ends up being here
2715     //     because of some failure and logs the error. But wxLog will try to
2716     //     attach timestamp, for which it will need wxConvLocal (to convert
2717     //     time to char* and then wchar_t*), but that fails, tries to log
2718     //     error, but wxLog has a (already locked) critical section that
2719     //     guards static buffer.
2720     static bool alreadyLoggingError = false;
2721     if (!alreadyLoggingError)
2722     {
2723         alreadyLoggingError = true;
2724         wxLogError(_("Cannot convert from the charset '%s'!"),
2725                    m_name ? m_name
2726                       :
2727 #if wxUSE_FONTMAP
2728                          wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
2729 #else // !wxUSE_FONTMAP
2730                          wxString::Format(_("encoding %s"), m_encoding).c_str()
2731 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2732               );
2733         alreadyLoggingError = false;
2734     }
2735
2736     return NULL;
2737 }
2738
2739 void wxCSConv::CreateConvIfNeeded() const
2740 {
2741     if ( m_deferred )
2742     {
2743         wxCSConv *self = (wxCSConv *)this; // const_cast
2744
2745 #if wxUSE_INTL
2746         // if we don't have neither the name nor the encoding, use the default
2747         // encoding for this system
2748         if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2749         {
2750             self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
2751         }
2752 #endif // wxUSE_INTL
2753
2754         self->m_convReal = DoCreate();
2755         self->m_deferred = false;
2756     }
2757 }
2758
2759 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2760 {
2761     CreateConvIfNeeded();
2762
2763     if (m_convReal)
2764         return m_convReal->MB2WC(buf, psz, n);
2765
2766     // latin-1 (direct)
2767     size_t len = strlen(psz);
2768
2769     if (buf)
2770     {
2771         for (size_t c = 0; c <= len; c++)
2772             buf[c] = (unsigned char)(psz[c]);
2773     }
2774
2775     return len;
2776 }
2777
2778 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2779 {
2780     CreateConvIfNeeded();
2781
2782     if (m_convReal)
2783         return m_convReal->WC2MB(buf, psz, n);
2784
2785     // latin-1 (direct)
2786     const size_t len = wxWcslen(psz);
2787     if (buf)
2788     {
2789         for (size_t c = 0; c <= len; c++)
2790         {
2791             if (psz[c] > 0xFF)
2792                 return (size_t)-1;
2793             buf[c] = (char)psz[c];
2794         }
2795     }
2796     else
2797     {
2798         for (size_t c = 0; c <= len; c++)
2799         {
2800             if (psz[c] > 0xFF)
2801                 return (size_t)-1;
2802         }
2803     }
2804
2805     return len;
2806 }
2807
2808 // ----------------------------------------------------------------------------
2809 // globals
2810 // ----------------------------------------------------------------------------
2811
2812 #ifdef __WINDOWS__
2813     static wxMBConv_win32 wxConvLibcObj;
2814 #elif defined(__WXMAC__) && !defined(__MACH__)
2815     static wxMBConv_mac wxConvLibcObj ;
2816 #else
2817     static wxMBConvLibc wxConvLibcObj;
2818 #endif
2819
2820 static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
2821 static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
2822 static wxMBConvUTF7 wxConvUTF7Obj;
2823 static wxMBConvUTF8 wxConvUTF8Obj;
2824
2825 WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
2826 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
2827 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
2828 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
2829 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
2830 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
2831 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
2832 #ifdef __WXOSX__
2833                                     wxConvUTF8Obj;
2834 #else
2835                                     wxConvLibcObj;
2836 #endif
2837
2838
2839 #else // !wxUSE_WCHAR_T
2840
2841 // stand-ins in absence of wchar_t
2842 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
2843                                 wxConvISO8859_1,
2844                                 wxConvLocal,
2845                                 wxConvUTF8;
2846
2847 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T