src/common/strconv.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        strconv.cpp
   3 // Purpose:     Unicode conversion classes
   4 // Author:      Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
   5 //              Ryan Norton, Fredrik Roubert (UTF7)
   6 // Modified by:
   7 // Created:     29/01/98
   8 // RCS-ID:      $Id$
   9 // Copyright:   (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
  10 //              (c) 2000-2003 Vadim Zeitlin
  11 //              (c) 2004 Ryan Norton, Fredrik Roubert
  12 // Licence:     wxWindows licence
  13 /////////////////////////////////////////////////////////////////////////////
  14
  15 // ============================================================================
  16 // declarations
  17 // ============================================================================
  18
  19 // ----------------------------------------------------------------------------
  20 // headers
  21 // ----------------------------------------------------------------------------
  22
  23 // For compilers that support precompilation, includes "wx.h".
  24 #include "wx/wxprec.h"
  25
  26 #ifdef __BORLANDC__
  27   #pragma hdrstop
  28 #endif
  29
  30 #ifndef WX_PRECOMP
  31     #include "wx/intl.h"
  32     #include "wx/log.h"
  33 #endif // WX_PRECOMP
  34
  35 #include "wx/strconv.h"
  36
  37 #if wxUSE_WCHAR_T
  38
  39 #ifdef __WINDOWS__
  40     #include "wx/msw/private.h"
  41     #include "wx/msw/missing.h"
  42 #endif
  43
  44 #ifndef __WXWINCE__
  45 #include <errno.h>
  46 #endif
  47
  48 #include <ctype.h>
  49 #include <string.h>
  50 #include <stdlib.h>
  51
  52 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
  53     #define wxHAVE_WIN32_MB2WC
  54 #endif // __WIN32__ but !__WXMICROWIN__
  55
  56 #ifdef __SALFORDC__
  57     #include <clib.h>
  58 #endif
  59
  60 #ifdef HAVE_ICONV
  61     #include <iconv.h>
  62     #include "wx/thread.h"
  63 #endif
  64
  65 #include "wx/encconv.h"
  66 #include "wx/fontmap.h"
  67 #include "wx/utils.h"
  68
  69 #ifdef __WXMAC__
  70 #ifndef __DARWIN__
  71 #include <ATSUnicode.h>
  72 #include <TextCommon.h>
  73 #include <TextEncodingConverter.h>
  74 #endif
  75
  76 #include  "wx/mac/private.h"  // includes mac headers
  77 #endif
  78
  79 #define TRACE_STRCONV _T("strconv")
  80
  81 // ============================================================================
  82 // implementation
  83 // ============================================================================
  84
  85 // ----------------------------------------------------------------------------
  86 // UTF-16 en/decoding to/from UCS-4
  87 // ----------------------------------------------------------------------------
  88
  89
  90 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
  91 {
  92     if (input<=0xffff)
  93     {
  94         if (output)
  95             *output = (wxUint16) input;
  96         return 1;
  97     }
  98     else if (input>=0x110000)
  99     {
 100         return (size_t)-1;
 101     }
 102     else
 103     {
 104         if (output)
 105         {
 106             *output++ = (wxUint16) ((input >> 10)+0xd7c0);
 107             *output = (wxUint16) ((input&0x3ff)+0xdc00);
 108         }
 109         return 2;
 110     }
 111 }
 112
 113 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
 114 {
 115     if ((*input<0xd800) || (*input>0xdfff))
 116     {
 117         output = *input;
 118         return 1;
 119     }
 120     else if ((input[1]<0xdc00) || (input[1]>0xdfff))
 121     {
 122         output = *input;
 123         return (size_t)-1;
 124     }
 125     else
 126     {
 127         output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
 128         return 2;
 129     }
 130 }
 131
 132
 133 // ----------------------------------------------------------------------------
 134 // wxMBConv
 135 // ----------------------------------------------------------------------------
 136
 137 wxMBConv::~wxMBConv()
 138 {
 139     // nothing to do here (necessary for Darwin linking probably)
 140 }
 141
 142 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
 143 {
 144     if ( psz )
 145     {
 146         // calculate the length of the buffer needed first
 147         size_t nLen = MB2WC(NULL, psz, 0);
 148         if ( nLen != (size_t)-1 )
 149         {
 150             // now do the actual conversion
 151             wxWCharBuffer buf(nLen);
 152             nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
 153             if ( nLen != (size_t)-1 )
 154             {
 155                 return buf;
 156             }
 157         }
 158     }
 159
 160     wxWCharBuffer buf((wchar_t *)NULL);
 161
 162     return buf;
 163 }
 164
 165 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
 166 {
 167     if ( pwz )
 168     {
 169         size_t nLen = WC2MB(NULL, pwz, 0);
 170         if ( nLen != (size_t)-1 )
 171         {
 172             wxCharBuffer buf(nLen+3);       // space for a wxUint32 trailing zero
 173             nLen = WC2MB(buf.data(), pwz, nLen + 4);
 174             if ( nLen != (size_t)-1 )
 175             {
 176                 return buf;
 177             }
 178         }
 179     }
 180
 181     wxCharBuffer buf((char *)NULL);
 182
 183     return buf;
 184 }
 185
 186 const wxWCharBuffer wxMBConv::cMB2WC(const char *szString, size_t nStringLen, size_t* pOutSize) const
 187 {
 188     wxASSERT(pOutSize != NULL);
 189
 190     const char* szEnd = szString + nStringLen + 1;
 191     const char* szPos = szString;
 192     const char* szStart = szPos;
 193
 194     size_t nActualLength = 0;
 195     size_t nCurrentSize = nStringLen; //try normal size first (should never resize?)
 196
 197     wxWCharBuffer theBuffer(nCurrentSize);
 198
 199     //Convert the string until the length() is reached, continuing the
 200     //loop every time a null character is reached
 201     while(szPos != szEnd)
 202     {
 203         wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
 204
 205         //Get the length of the current (sub)string
 206         size_t nLen = MB2WC(NULL, szPos, 0);
 207
 208         //Invalid conversion?
 209         if( nLen == (size_t)-1 )
 210         {
 211             *pOutSize = 0;
 212             theBuffer.data()[0u] = wxT('\0');
 213             return theBuffer;
 214         }
 215
 216
 217         //Increase the actual length (+1 for current null character)
 218         nActualLength += nLen + 1;
 219
 220         //if buffer too big, realloc the buffer
 221         if (nActualLength > (nCurrentSize+1))
 222         {
 223             wxWCharBuffer theNewBuffer(nCurrentSize << 1);
 224             memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize * sizeof(wchar_t));
 225             theBuffer = theNewBuffer;
 226             nCurrentSize <<= 1;
 227         }
 228
 229         //Convert the current (sub)string
 230         if ( MB2WC(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
 231         {
 232             *pOutSize = 0;
 233             theBuffer.data()[0u] = wxT('\0');
 234             return theBuffer;
 235         }
 236
 237         //Increment to next (sub)string
 238         //Note that we have to use strlen instead of nLen here
 239         //because XX2XX gives us the size of the output buffer,
 240         //which is not necessarily the length of the string
 241         szPos += strlen(szPos) + 1;
 242     }
 243
 244     //success - return actual length and the buffer
 245     *pOutSize = nActualLength;
 246     return theBuffer;
 247 }
 248
 249 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *szString, size_t nStringLen, size_t* pOutSize) const
 250 {
 251     wxASSERT(pOutSize != NULL);
 252
 253     const wchar_t* szEnd = szString + nStringLen + 1;
 254     const wchar_t* szPos = szString;
 255     const wchar_t* szStart = szPos;
 256
 257     size_t nActualLength = 0;
 258     size_t nCurrentSize = nStringLen << 2; //try * 4 first
 259
 260     wxCharBuffer theBuffer(nCurrentSize);
 261
 262     //Convert the string until the length() is reached, continuing the
 263     //loop every time a null character is reached
 264     while(szPos != szEnd)
 265     {
 266         wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
 267
 268         //Get the length of the current (sub)string
 269         size_t nLen = WC2MB(NULL, szPos, 0);
 270
 271         //Invalid conversion?
 272         if( nLen == (size_t)-1 )
 273         {
 274             *pOutSize = 0;
 275             theBuffer.data()[0u] = wxT('\0');
 276             return theBuffer;
 277         }
 278
 279         //Increase the actual length (+1 for current null character)
 280         nActualLength += nLen + 1;
 281
 282         //if buffer too big, realloc the buffer
 283         if (nActualLength > (nCurrentSize+1))
 284         {
 285             wxCharBuffer theNewBuffer(nCurrentSize << 1);
 286             memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize);
 287             theBuffer = theNewBuffer;
 288             nCurrentSize <<= 1;
 289         }
 290
 291         //Convert the current (sub)string
 292         if(WC2MB(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
 293         {
 294             *pOutSize = 0;
 295             theBuffer.data()[0u] = wxT('\0');
 296             return theBuffer;
 297         }
 298
 299         //Increment to next (sub)string
 300         //Note that we have to use wxWcslen instead of nLen here
 301         //because XX2XX gives us the size of the output buffer,
 302         //which is not necessarily the length of the string
 303         szPos += wxWcslen(szPos) + 1;
 304     }
 305
 306     //success - return actual length and the buffer
 307     *pOutSize = nActualLength;
 308     return theBuffer;
 309 }
 310
 311 // ----------------------------------------------------------------------------
 312 // wxMBConvLibc
 313 // ----------------------------------------------------------------------------
 314
 315 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 316 {
 317     return wxMB2WC(buf, psz, n);
 318 }
 319
 320 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 321 {
 322     return wxWC2MB(buf, psz, n);
 323 }
 324
 325 #ifdef __UNIX__
 326
 327 // ----------------------------------------------------------------------------
 328 // wxConvBrokenFileNames
 329 // ----------------------------------------------------------------------------
 330
 331 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar *charset)
 332 {
 333     if ( !charset || wxStricmp(charset, _T("UTF-8")) == 0
 334                   || wxStricmp(charset, _T("UTF8")) == 0  )
 335         m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
 336     else
 337         m_conv = new wxCSConv(charset);
 338 }
 339
 340 size_t
 341 wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf,
 342                              const char *psz,
 343                              size_t outputSize) const
 344 {
 345     return m_conv->MB2WC( outputBuf, psz, outputSize );
 346 }
 347
 348 size_t
 349 wxConvBrokenFileNames::WC2MB(char *outputBuf,
 350                              const wchar_t *psz,
 351                              size_t outputSize) const
 352 {
 353     return m_conv->WC2MB( outputBuf, psz, outputSize );
 354 }
 355
 356 #endif
 357
 358 // ----------------------------------------------------------------------------
 359 // UTF-7
 360 // ----------------------------------------------------------------------------
 361
 362 // Implementation (C) 2004 Fredrik Roubert
 363
 364 //
 365 // BASE64 decoding table
 366 //
 367 static const unsigned char utf7unb64[] =
 368 {
 369     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 370     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 371     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 372     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 373     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 374     0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
 375     0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
 376     0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 377     0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
 378     0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
 379     0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
 380     0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
 381     0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
 382     0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
 383     0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
 384     0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
 385     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 386     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 387     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 388     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 389     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 390     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 391     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 392     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 393     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 394     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 395     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 396     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 397     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 398     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 399     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 400     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 401 };
 402
 403 size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 404 {
 405     size_t len = 0;
 406
 407     while (*psz && ((!buf) || (len < n)))
 408     {
 409         unsigned char cc = *psz++;
 410         if (cc != '+')
 411         {
 412             // plain ASCII char
 413             if (buf)
 414                 *buf++ = cc;
 415             len++;
 416         }
 417         else if (*psz == '-')
 418         {
 419             // encoded plus sign
 420             if (buf)
 421                 *buf++ = cc;
 422             len++;
 423             psz++;
 424         }
 425         else
 426         {
 427             // BASE64 encoded string
 428             bool lsb;
 429             unsigned char c;
 430             unsigned int d, l;
 431             for (lsb = false, d = 0, l = 0;
 432                 (cc = utf7unb64[(unsigned char)*psz]) != 0xff; psz++)
 433             {
 434                 d <<= 6;
 435                 d += cc;
 436                 for (l += 6; l >= 8; lsb = !lsb)
 437                 {
 438                     c = (unsigned char)((d >> (l -= 8)) % 256);
 439                     if (lsb)
 440                     {
 441                         if (buf)
 442                             *buf++ |= c;
 443                         len ++;
 444                     }
 445                     else
 446                         if (buf)
 447                             *buf = (wchar_t)(c << 8);
 448                 }
 449             }
 450             if (*psz == '-')
 451                 psz++;
 452         }
 453     }
 454     if (buf && (len < n))
 455         *buf = 0;
 456     return len;
 457 }
 458
 459 //
 460 // BASE64 encoding table
 461 //
 462 static const unsigned char utf7enb64[] =
 463 {
 464     'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
 465     'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
 466     'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
 467     'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
 468     'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
 469     'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
 470     'w', 'x', 'y', 'z', '0', '1', '2', '3',
 471     '4', '5', '6', '7', '8', '9', '+', '/'
 472 };
 473
 474 //
 475 // UTF-7 encoding table
 476 //
 477 // 0 - Set D (directly encoded characters)
 478 // 1 - Set O (optional direct characters)
 479 // 2 - whitespace characters (optional)
 480 // 3 - special characters
 481 //
 482 static const unsigned char utf7encode[128] =
 483 {
 484     3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
 485     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 486     2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
 487     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
 488     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 489     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
 490     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 491     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
 492 };
 493
 494 size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 495 {
 496
 497
 498     size_t len = 0;
 499
 500     while (*psz && ((!buf) || (len < n)))
 501     {
 502         wchar_t cc = *psz++;
 503         if (cc < 0x80 && utf7encode[cc] < 1)
 504         {
 505             // plain ASCII char
 506             if (buf)
 507                 *buf++ = (char)cc;
 508             len++;
 509         }
 510 #ifndef WC_UTF16
 511         else if (((wxUint32)cc) > 0xffff)
 512         {
 513             // no surrogate pair generation (yet?)
 514             return (size_t)-1;
 515         }
 516 #endif
 517         else
 518         {
 519             if (buf)
 520                 *buf++ = '+';
 521             len++;
 522             if (cc != '+')
 523             {
 524                 // BASE64 encode string
 525                 unsigned int lsb, d, l;
 526                 for (d = 0, l = 0;; psz++)
 527                 {
 528                     for (lsb = 0; lsb < 2; lsb ++)
 529                     {
 530                         d <<= 8;
 531                         d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
 532
 533                         for (l += 8; l >= 6; )
 534                         {
 535                             l -= 6;
 536                             if (buf)
 537                                 *buf++ = utf7enb64[(d >> l) % 64];
 538                             len++;
 539                         }
 540                     }
 541                     cc = *psz;
 542                     if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
 543                         break;
 544                 }
 545                 if (l != 0)
 546                 {
 547                     if (buf)
 548                         *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
 549                     len++;
 550                 }
 551             }
 552             if (buf)
 553                 *buf++ = '-';
 554             len++;
 555         }
 556     }
 557     if (buf && (len < n))
 558         *buf = 0;
 559     return len;
 560 }
 561
 562 // ----------------------------------------------------------------------------
 563 // UTF-8
 564 // ----------------------------------------------------------------------------
 565
 566 static wxUint32 utf8_max[]=
 567     { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
 568
 569 // boundaries of the private use area we use to (temporarily) remap invalid
 570 // characters invalid in a UTF-8 encoded string
 571 const wxUint32 wxUnicodePUA = 0x100000;
 572 const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
 573
 574 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 575 {
 576     size_t len = 0;
 577
 578     while (*psz && ((!buf) || (len < n)))
 579     {
 580         const char *opsz = psz;
 581         bool invalid = false;
 582         unsigned char cc = *psz++, fc = cc;
 583         unsigned cnt;
 584         for (cnt = 0; fc & 0x80; cnt++)
 585             fc <<= 1;
 586         if (!cnt)
 587         {
 588             // plain ASCII char
 589             if (buf)
 590                 *buf++ = cc;
 591             len++;
 592
 593             // escape the escape character for octal escapes
 594             if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
 595                     && cc == '\\' && (!buf || len < n))
 596             {
 597                 if (buf)
 598                     *buf++ = cc;
 599                 len++;
 600             }
 601         }
 602         else
 603         {
 604             cnt--;
 605             if (!cnt)
 606             {
 607                 // invalid UTF-8 sequence
 608                 invalid = true;
 609             }
 610             else
 611             {
 612                 unsigned ocnt = cnt - 1;
 613                 wxUint32 res = cc & (0x3f >> cnt);
 614                 while (cnt--)
 615                 {
 616                     cc = *psz;
 617                     if ((cc & 0xC0) != 0x80)
 618                     {
 619                         // invalid UTF-8 sequence
 620                         invalid = true;
 621                         break;
 622                     }
 623                     psz++;
 624                     res = (res << 6) | (cc & 0x3f);
 625                 }
 626                 if (invalid || res <= utf8_max[ocnt])
 627                 {
 628                     // illegal UTF-8 encoding
 629                     invalid = true;
 630                 }
 631                 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
 632                         res >= wxUnicodePUA && res < wxUnicodePUAEnd)
 633                 {
 634                     // if one of our PUA characters turns up externally
 635                     // it must also be treated as an illegal sequence
 636                     // (a bit like you have to escape an escape character)
 637                     invalid = true;
 638                 }
 639                 else
 640                 {
 641 #ifdef WC_UTF16
 642                     // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 643                     size_t pa = encode_utf16(res, (wxUint16 *)buf);
 644                     if (pa == (size_t)-1)
 645                     {
 646                         invalid = true;
 647                     }
 648                     else
 649                     {
 650                         if (buf)
 651                             buf += pa;
 652                         len += pa;
 653                     }
 654 #else // !WC_UTF16
 655                     if (buf)
 656                         *buf++ = res;
 657                     len++;
 658 #endif // WC_UTF16/!WC_UTF16
 659                 }
 660             }
 661             if (invalid)
 662             {
 663                 if (m_options & MAP_INVALID_UTF8_TO_PUA)
 664                 {
 665                     while (opsz < psz && (!buf || len < n))
 666                     {
 667 #ifdef WC_UTF16
 668                         // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 669                         size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
 670                         wxASSERT(pa != (size_t)-1);
 671                         if (buf)
 672                             buf += pa;
 673                         opsz++;
 674                         len += pa;
 675 #else
 676                         if (buf)
 677                             *buf++ = wxUnicodePUA + (unsigned char)*opsz;
 678                         opsz++;
 679                         len++;
 680 #endif
 681                     }
 682                 }
 683                 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
 684                 {
 685                     while (opsz < psz && (!buf || len < n))
 686                     {
 687                         if ( buf && len + 3 < n )
 688                         {
 689                             unsigned char on = *opsz;
 690                             *buf++ = L'\\';
 691                             *buf++ = (wchar_t)( L'0' + on / 0100 );
 692                             *buf++ = (wchar_t)( L'0' + (on % 0100) / 010 );
 693                             *buf++ = (wchar_t)( L'0' + on % 010 );
 694                         }
 695                         opsz++;
 696                         len += 4;
 697                     }
 698                 }
 699                 else // MAP_INVALID_UTF8_NOT
 700                 {
 701                     return (size_t)-1;
 702                 }
 703             }
 704         }
 705     }
 706     if (buf && (len < n))
 707         *buf = 0;
 708     return len;
 709 }
 710
 711 static inline bool isoctal(wchar_t wch)
 712 {
 713     return L'0' <= wch && wch <= L'7';
 714 }
 715
 716 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 717 {
 718     size_t len = 0;
 719
 720     while (*psz && ((!buf) || (len < n)))
 721     {
 722         wxUint32 cc;
 723 #ifdef WC_UTF16
 724         // cast is ok for WC_UTF16
 725         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
 726         psz += (pa == (size_t)-1) ? 1 : pa;
 727 #else
 728         cc=(*psz++) & 0x7fffffff;
 729 #endif
 730
 731         if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
 732                 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
 733         {
 734             if (buf)
 735                 *buf++ = (char)(cc - wxUnicodePUA);
 736             len++;
 737         }
 738         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
 739                     && cc == L'\\' && psz[0] == L'\\' )
 740         {
 741             if (buf)
 742                 *buf++ = (char)cc;
 743             psz++;
 744             len++;
 745         }
 746         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
 747                     cc == L'\\' &&
 748                         isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
 749         {
 750             if (buf)
 751             {
 752                 *buf++ = (char) ((psz[0] - L'0')*0100 +
 753                                  (psz[1] - L'0')*010 +
 754                                  (psz[2] - L'0'));
 755             }
 756
 757             psz += 3;
 758             len++;
 759         }
 760         else
 761         {
 762             unsigned cnt;
 763             for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
 764             if (!cnt)
 765             {
 766                 // plain ASCII char
 767                 if (buf)
 768                     *buf++ = (char) cc;
 769                 len++;
 770             }
 771
 772             else
 773             {
 774                 len += cnt + 1;
 775                 if (buf)
 776                 {
 777                     *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
 778                     while (cnt--)
 779                         *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
 780                 }
 781             }
 782         }
 783     }
 784
 785     if (buf && (len<n))
 786         *buf = 0;
 787
 788     return len;
 789 }
 790
 791 // ----------------------------------------------------------------------------
 792 // UTF-16
 793 // ----------------------------------------------------------------------------
 794
 795 #ifdef WORDS_BIGENDIAN
 796     #define wxMBConvUTF16straight wxMBConvUTF16BE
 797     #define wxMBConvUTF16swap     wxMBConvUTF16LE
 798 #else
 799     #define wxMBConvUTF16swap     wxMBConvUTF16BE
 800     #define wxMBConvUTF16straight wxMBConvUTF16LE
 801 #endif
 802
 803
 804 #ifdef WC_UTF16
 805
 806 // copy 16bit MB to 16bit String
 807 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 808 {
 809     size_t len=0;
 810
 811     while (*(wxUint16*)psz && (!buf || len < n))
 812     {
 813         if (buf)
 814             *buf++ = *(wxUint16*)psz;
 815         len++;
 816
 817         psz += sizeof(wxUint16);
 818     }
 819     if (buf && len<n)   *buf=0;
 820
 821     return len;
 822 }
 823
 824
 825 // copy 16bit String to 16bit MB
 826 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 827 {
 828     size_t len=0;
 829
 830     while (*psz && (!buf || len < n))
 831     {
 832         if (buf)
 833         {
 834             *(wxUint16*)buf = *psz;
 835             buf += sizeof(wxUint16);
 836         }
 837         len += sizeof(wxUint16);
 838         psz++;
 839     }
 840     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 841
 842     return len;
 843 }
 844
 845
 846 // swap 16bit MB to 16bit String
 847 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 848 {
 849     size_t len=0;
 850
 851     while (*(wxUint16*)psz && (!buf || len < n))
 852     {
 853         if (buf)
 854         {
 855             ((char *)buf)[0] = psz[1];
 856             ((char *)buf)[1] = psz[0];
 857             buf++;
 858         }
 859         len++;
 860         psz += sizeof(wxUint16);
 861     }
 862     if (buf && len<n)   *buf=0;
 863
 864     return len;
 865 }
 866
 867
 868 // swap 16bit MB to 16bit String
 869 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 870 {
 871     size_t len=0;
 872
 873     while (*psz && (!buf || len < n))
 874     {
 875         if (buf)
 876         {
 877             *buf++ = ((char*)psz)[1];
 878             *buf++ = ((char*)psz)[0];
 879         }
 880         len += sizeof(wxUint16);
 881         psz++;
 882     }
 883     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 884
 885     return len;
 886 }
 887
 888
 889 #else // WC_UTF16
 890
 891
 892 // copy 16bit MB to 32bit String
 893 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 894 {
 895     size_t len=0;
 896
 897     while (*(wxUint16*)psz && (!buf || len < n))
 898     {
 899         wxUint32 cc;
 900         size_t pa=decode_utf16((wxUint16*)psz, cc);
 901         if (pa == (size_t)-1)
 902             return pa;
 903
 904         if (buf)
 905             *buf++ = cc;
 906         len++;
 907         psz += pa * sizeof(wxUint16);
 908     }
 909     if (buf && len<n)   *buf=0;
 910
 911     return len;
 912 }
 913
 914
 915 // copy 32bit String to 16bit MB
 916 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 917 {
 918     size_t len=0;
 919
 920     while (*psz && (!buf || len < n))
 921     {
 922         wxUint16 cc[2];
 923         size_t pa=encode_utf16(*psz, cc);
 924
 925         if (pa == (size_t)-1)
 926             return pa;
 927
 928         if (buf)
 929         {
 930             *(wxUint16*)buf = cc[0];
 931             buf += sizeof(wxUint16);
 932             if (pa > 1)
 933             {
 934                 *(wxUint16*)buf = cc[1];
 935                 buf += sizeof(wxUint16);
 936             }
 937         }
 938
 939         len += pa*sizeof(wxUint16);
 940         psz++;
 941     }
 942     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 943
 944     return len;
 945 }
 946
 947
 948 // swap 16bit MB to 32bit String
 949 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 950 {
 951     size_t len=0;
 952
 953     while (*(wxUint16*)psz && (!buf || len < n))
 954     {
 955         wxUint32 cc;
 956         char tmp[4];
 957         tmp[0]=psz[1];  tmp[1]=psz[0];
 958         tmp[2]=psz[3];  tmp[3]=psz[2];
 959
 960         size_t pa=decode_utf16((wxUint16*)tmp, cc);
 961         if (pa == (size_t)-1)
 962             return pa;
 963
 964         if (buf)
 965             *buf++ = cc;
 966
 967         len++;
 968         psz += pa * sizeof(wxUint16);
 969     }
 970     if (buf && len<n)   *buf=0;
 971
 972     return len;
 973 }
 974
 975
 976 // swap 32bit String to 16bit MB
 977 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 978 {
 979     size_t len=0;
 980
 981     while (*psz && (!buf || len < n))
 982     {
 983         wxUint16 cc[2];
 984         size_t pa=encode_utf16(*psz, cc);
 985
 986         if (pa == (size_t)-1)
 987             return pa;
 988
 989         if (buf)
 990         {
 991             *buf++ = ((char*)cc)[1];
 992             *buf++ = ((char*)cc)[0];
 993             if (pa > 1)
 994             {
 995                 *buf++ = ((char*)cc)[3];
 996                 *buf++ = ((char*)cc)[2];
 997             }
 998         }
 999
1000         len += pa*sizeof(wxUint16);
1001         psz++;
1002     }
1003     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
1004
1005     return len;
1006 }
1007
1008 #endif // WC_UTF16
1009
1010
1011 // ----------------------------------------------------------------------------
1012 // UTF-32
1013 // ----------------------------------------------------------------------------
1014
1015 #ifdef WORDS_BIGENDIAN
1016 #define wxMBConvUTF32straight  wxMBConvUTF32BE
1017 #define wxMBConvUTF32swap      wxMBConvUTF32LE
1018 #else
1019 #define wxMBConvUTF32swap      wxMBConvUTF32BE
1020 #define wxMBConvUTF32straight  wxMBConvUTF32LE
1021 #endif
1022
1023
1024 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1025 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1026
1027
1028 #ifdef WC_UTF16
1029
1030 // copy 32bit MB to 16bit String
1031 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1032 {
1033     size_t len=0;
1034
1035     while (*(wxUint32*)psz && (!buf || len < n))
1036     {
1037         wxUint16 cc[2];
1038
1039         size_t pa=encode_utf16(*(wxUint32*)psz, cc);
1040         if (pa == (size_t)-1)
1041             return pa;
1042
1043         if (buf)
1044         {
1045             *buf++ = cc[0];
1046             if (pa > 1)
1047                 *buf++ = cc[1];
1048         }
1049         len += pa;
1050         psz += sizeof(wxUint32);
1051     }
1052     if (buf && len<n)   *buf=0;
1053
1054     return len;
1055 }
1056
1057
1058 // copy 16bit String to 32bit MB
1059 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1060 {
1061     size_t len=0;
1062
1063     while (*psz && (!buf || len < n))
1064     {
1065         wxUint32 cc;
1066
1067         // cast is ok for WC_UTF16
1068         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
1069         if (pa == (size_t)-1)
1070             return pa;
1071
1072         if (buf)
1073         {
1074             *(wxUint32*)buf = cc;
1075             buf += sizeof(wxUint32);
1076         }
1077         len += sizeof(wxUint32);
1078         psz += pa;
1079     }
1080
1081     if (buf && len<=n-sizeof(wxUint32))
1082         *(wxUint32*)buf=0;
1083
1084     return len;
1085 }
1086
1087
1088
1089 // swap 32bit MB to 16bit String
1090 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1091 {
1092     size_t len=0;
1093
1094     while (*(wxUint32*)psz && (!buf || len < n))
1095     {
1096         char tmp[4];
1097         tmp[0] = psz[3];   tmp[1] = psz[2];
1098         tmp[2] = psz[1];   tmp[3] = psz[0];
1099
1100
1101         wxUint16 cc[2];
1102
1103         size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
1104         if (pa == (size_t)-1)
1105             return pa;
1106
1107         if (buf)
1108         {
1109             *buf++ = cc[0];
1110             if (pa > 1)
1111                 *buf++ = cc[1];
1112         }
1113         len += pa;
1114         psz += sizeof(wxUint32);
1115     }
1116
1117     if (buf && len<n)
1118         *buf=0;
1119
1120     return len;
1121 }
1122
1123
1124 // swap 16bit String to 32bit MB
1125 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1126 {
1127     size_t len=0;
1128
1129     while (*psz && (!buf || len < n))
1130     {
1131         char cc[4];
1132
1133         // cast is ok for WC_UTF16
1134         size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
1135         if (pa == (size_t)-1)
1136             return pa;
1137
1138         if (buf)
1139         {
1140             *buf++ = cc[3];
1141             *buf++ = cc[2];
1142             *buf++ = cc[1];
1143             *buf++ = cc[0];
1144         }
1145         len += sizeof(wxUint32);
1146         psz += pa;
1147     }
1148
1149     if (buf && len<=n-sizeof(wxUint32))
1150         *(wxUint32*)buf=0;
1151
1152     return len;
1153 }
1154
1155 #else // WC_UTF16
1156
1157
1158 // copy 32bit MB to 32bit String
1159 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1160 {
1161     size_t len=0;
1162
1163     while (*(wxUint32*)psz && (!buf || len < n))
1164     {
1165         if (buf)
1166             *buf++ = *(wxUint32*)psz;
1167         len++;
1168         psz += sizeof(wxUint32);
1169     }
1170
1171     if (buf && len<n)
1172         *buf=0;
1173
1174     return len;
1175 }
1176
1177
1178 // copy 32bit String to 32bit MB
1179 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1180 {
1181     size_t len=0;
1182
1183     while (*psz && (!buf || len < n))
1184     {
1185         if (buf)
1186         {
1187             *(wxUint32*)buf = *psz;
1188             buf += sizeof(wxUint32);
1189         }
1190
1191         len += sizeof(wxUint32);
1192         psz++;
1193     }
1194
1195     if (buf && len<=n-sizeof(wxUint32))
1196         *(wxUint32*)buf=0;
1197
1198     return len;
1199 }
1200
1201
1202 // swap 32bit MB to 32bit String
1203 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1204 {
1205     size_t len=0;
1206
1207     while (*(wxUint32*)psz && (!buf || len < n))
1208     {
1209         if (buf)
1210         {
1211             ((char *)buf)[0] = psz[3];
1212             ((char *)buf)[1] = psz[2];
1213             ((char *)buf)[2] = psz[1];
1214             ((char *)buf)[3] = psz[0];
1215             buf++;
1216         }
1217         len++;
1218         psz += sizeof(wxUint32);
1219     }
1220
1221     if (buf && len<n)
1222         *buf=0;
1223
1224     return len;
1225 }
1226
1227
1228 // swap 32bit String to 32bit MB
1229 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1230 {
1231     size_t len=0;
1232
1233     while (*psz && (!buf || len < n))
1234     {
1235         if (buf)
1236         {
1237             *buf++ = ((char *)psz)[3];
1238             *buf++ = ((char *)psz)[2];
1239             *buf++ = ((char *)psz)[1];
1240             *buf++ = ((char *)psz)[0];
1241         }
1242         len += sizeof(wxUint32);
1243         psz++;
1244     }
1245
1246     if (buf && len<=n-sizeof(wxUint32))
1247         *(wxUint32*)buf=0;
1248
1249     return len;
1250 }
1251
1252
1253 #endif // WC_UTF16
1254
1255
1256 // ============================================================================
1257 // The classes doing conversion using the iconv_xxx() functions
1258 // ============================================================================
1259
1260 #ifdef HAVE_ICONV
1261
1262 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1263 //     E2BIG if output buffer is _exactly_ as big as needed. Such case is
1264 //     (unless there's yet another bug in glibc) the only case when iconv()
1265 //     returns with (size_t)-1 (which means error) and says there are 0 bytes
1266 //     left in the input buffer -- when _real_ error occurs,
1267 //     bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1268 //     iconv() failure.
1269 //     [This bug does not appear in glibc 2.2.]
1270 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1271 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1272                                      (errno != E2BIG || bufLeft != 0))
1273 #else
1274 #define ICONV_FAILED(cres, bufLeft)  (cres == (size_t)-1)
1275 #endif
1276
1277 #define ICONV_CHAR_CAST(x)  ((ICONV_CONST char **)(x))
1278
1279 #define ICONV_T_INVALID ((iconv_t)-1)
1280
1281 #if SIZEOF_WCHAR_T == 4
1282     #define WC_BSWAP    wxUINT32_SWAP_ALWAYS
1283     #define WC_ENC      wxFONTENCODING_UTF32
1284 #elif SIZEOF_WCHAR_T == 2
1285     #define WC_BSWAP    wxUINT16_SWAP_ALWAYS
1286     #define WC_ENC      wxFONTENCODING_UTF16
1287 #else // sizeof(wchar_t) != 2 nor 4
1288     // does this ever happen?
1289     #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1290 #endif
1291
1292 // ----------------------------------------------------------------------------
1293 // wxMBConv_iconv: encapsulates an iconv character set
1294 // ----------------------------------------------------------------------------
1295
1296 class wxMBConv_iconv : public wxMBConv
1297 {
1298 public:
1299     wxMBConv_iconv(const wxChar *name);
1300     virtual ~wxMBConv_iconv();
1301
1302     virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1303     virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
1304
1305     bool IsOk() const
1306         { return (m2w != ICONV_T_INVALID) && (w2m != ICONV_T_INVALID); }
1307
1308 protected:
1309     // the iconv handlers used to translate from multibyte to wide char and in
1310     // the other direction
1311     iconv_t m2w,
1312             w2m;
1313 #if wxUSE_THREADS
1314     // guards access to m2w and w2m objects
1315     wxMutex m_iconvMutex;
1316 #endif
1317
1318 private:
1319     // the name (for iconv_open()) of a wide char charset -- if none is
1320     // available on this machine, it will remain NULL
1321     static wxString ms_wcCharsetName;
1322
1323     // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1324     // different endian-ness than the native one
1325     static bool ms_wcNeedsSwap;
1326 };
1327
1328 // make the constructor available for unit testing
1329 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const wxChar* name )
1330 {
1331     wxMBConv_iconv* result = new wxMBConv_iconv( name );
1332     if ( !result->IsOk() )
1333     {
1334         delete result;
1335         return 0;
1336     }
1337     return result;
1338 }
1339
1340 wxString wxMBConv_iconv::ms_wcCharsetName;
1341 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
1342
1343 wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
1344 {
1345     // iconv operates with chars, not wxChars, but luckily it uses only ASCII
1346     // names for the charsets
1347     const wxCharBuffer cname(wxString(name).ToAscii());
1348
1349     // check for charset that represents wchar_t:
1350     if ( ms_wcCharsetName.empty() )
1351     {
1352 #if wxUSE_FONTMAP
1353         const wxChar **names = wxFontMapperBase::GetAllEncodingNames(WC_ENC);
1354 #else // !wxUSE_FONTMAP
1355         static const wxChar *names[] =
1356         {
1357 #if SIZEOF_WCHAR_T == 4
1358             _T("UCS-4"),
1359 #elif SIZEOF_WCHAR_T = 2
1360             _T("UCS-2"),
1361 #endif
1362             NULL
1363         };
1364 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1365
1366         for ( ; *names; ++names )
1367         {
1368             const wxString nameCS(*names);
1369
1370             // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
1371             wxString nameXE(nameCS);
1372             #ifdef WORDS_BIGENDIAN
1373                 nameXE += _T("BE");
1374             #else // little endian
1375                 nameXE += _T("LE");
1376             #endif
1377
1378             m2w = iconv_open(nameXE.ToAscii(), cname);
1379             if ( m2w == ICONV_T_INVALID )
1380             {
1381                 // try charset w/o bytesex info (e.g. "UCS4")
1382                 m2w = iconv_open(nameCS.ToAscii(), cname);
1383
1384                 // and check for bytesex ourselves:
1385                 if ( m2w != ICONV_T_INVALID )
1386                 {
1387                     char    buf[2], *bufPtr;
1388                     wchar_t wbuf[2], *wbufPtr;
1389                     size_t  insz, outsz;
1390                     size_t  res;
1391
1392                     buf[0] = 'A';
1393                     buf[1] = 0;
1394                     wbuf[0] = 0;
1395                     insz = 2;
1396                     outsz = SIZEOF_WCHAR_T * 2;
1397                     wbufPtr = wbuf;
1398                     bufPtr = buf;
1399
1400                     res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1401                                 (char**)&wbufPtr, &outsz);
1402
1403                     if (ICONV_FAILED(res, insz))
1404                     {
1405                         wxLogLastError(wxT("iconv"));
1406                         wxLogError(_("Conversion to charset '%s' doesn't work."),
1407                                    nameCS.c_str());
1408                     }
1409                     else // ok, can convert to this encoding, remember it
1410                     {
1411                         ms_wcCharsetName = nameCS;
1412                         ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1413                     }
1414                 }
1415             }
1416             else // use charset not requiring byte swapping
1417             {
1418                 ms_wcCharsetName = nameXE;
1419             }
1420         }
1421
1422         wxLogTrace(TRACE_STRCONV,
1423                    wxT("iconv wchar_t charset is \"%s\"%s"),
1424                    ms_wcCharsetName.empty() ? _T("<none>")
1425                                             : ms_wcCharsetName.c_str(),
1426                    ms_wcNeedsSwap ? _T(" (needs swap)")
1427                                   : _T(""));
1428     }
1429     else // we already have ms_wcCharsetName
1430     {
1431         m2w = iconv_open(ms_wcCharsetName.ToAscii(), cname);
1432     }
1433
1434     if ( ms_wcCharsetName.empty() )
1435     {
1436         w2m = ICONV_T_INVALID;
1437     }
1438     else
1439     {
1440         w2m = iconv_open(cname, ms_wcCharsetName.ToAscii());
1441         if ( w2m == ICONV_T_INVALID )
1442         {
1443             wxLogTrace(TRACE_STRCONV,
1444                        wxT("\"%s\" -> \"%s\" works but not the converse!?"),
1445                        ms_wcCharsetName.c_str(), cname.data());
1446         }
1447     }
1448 }
1449
1450 wxMBConv_iconv::~wxMBConv_iconv()
1451 {
1452     if ( m2w != ICONV_T_INVALID )
1453         iconv_close(m2w);
1454     if ( w2m != ICONV_T_INVALID )
1455         iconv_close(w2m);
1456 }
1457
1458 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1459 {
1460 #if wxUSE_THREADS
1461     // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1462     //     Unfortunately there is a couple of global wxCSConv objects such as
1463     //     wxConvLocal that are used all over wx code, so we have to make sure
1464     //     the handle is used by at most one thread at the time. Otherwise
1465     //     only a few wx classes would be safe to use from non-main threads
1466     //     as MB<->WC conversion would fail "randomly".
1467     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1468 #endif
1469
1470     size_t inbuf = strlen(psz);
1471     size_t outbuf = n * SIZEOF_WCHAR_T;
1472     size_t res, cres;
1473     // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1474     wchar_t *bufPtr = buf;
1475     const char *pszPtr = psz;
1476
1477     if (buf)
1478     {
1479         // have destination buffer, convert there
1480         cres = iconv(m2w,
1481                      ICONV_CHAR_CAST(&pszPtr), &inbuf,
1482                      (char**)&bufPtr, &outbuf);
1483         res = n - (outbuf / SIZEOF_WCHAR_T);
1484
1485         if (ms_wcNeedsSwap)
1486         {
1487             // convert to native endianness
1488             for ( unsigned i = 0; i < res; i++ )
1489                 buf[n] = WC_BSWAP(buf[i]);
1490         }
1491
1492         // NB: iconv was given only strlen(psz) characters on input, and so
1493         //     it couldn't convert the trailing zero. Let's do it ourselves
1494         //     if there's some room left for it in the output buffer.
1495         if (res < n)
1496             buf[res] = 0;
1497     }
1498     else
1499     {
1500         // no destination buffer... convert using temp buffer
1501         // to calculate destination buffer requirement
1502         wchar_t tbuf[8];
1503         res = 0;
1504         do {
1505             bufPtr = tbuf;
1506             outbuf = 8*SIZEOF_WCHAR_T;
1507
1508             cres = iconv(m2w,
1509                          ICONV_CHAR_CAST(&pszPtr), &inbuf,
1510                          (char**)&bufPtr, &outbuf );
1511
1512             res += 8-(outbuf/SIZEOF_WCHAR_T);
1513         } while ((cres==(size_t)-1) && (errno==E2BIG));
1514     }
1515
1516     if (ICONV_FAILED(cres, inbuf))
1517     {
1518         //VS: it is ok if iconv fails, hence trace only
1519         wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1520         return (size_t)-1;
1521     }
1522
1523     return res;
1524 }
1525
1526 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1527 {
1528 #if wxUSE_THREADS
1529     // NB: explained in MB2WC
1530     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1531 #endif
1532
1533     size_t inlen = wxWcslen(psz);
1534     size_t inbuf = inlen * SIZEOF_WCHAR_T;
1535     size_t outbuf = n;
1536     size_t res, cres;
1537
1538     wchar_t *tmpbuf = 0;
1539
1540     if (ms_wcNeedsSwap)
1541     {
1542         // need to copy to temp buffer to switch endianness
1543         // (doing WC_BSWAP twice on the original buffer won't help, as it
1544         //  could be in read-only memory, or be accessed in some other thread)
1545         tmpbuf = (wchar_t *)malloc(inbuf + SIZEOF_WCHAR_T);
1546         for ( size_t i = 0; i < inlen; i++ )
1547             tmpbuf[n] = WC_BSWAP(psz[i]);
1548         tmpbuf[inlen] = L'\0';
1549         psz = tmpbuf;
1550     }
1551
1552     if (buf)
1553     {
1554         // have destination buffer, convert there
1555         cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1556
1557         res = n-outbuf;
1558
1559         // NB: iconv was given only wcslen(psz) characters on input, and so
1560         //     it couldn't convert the trailing zero. Let's do it ourselves
1561         //     if there's some room left for it in the output buffer.
1562         if (res < n)
1563             buf[0] = 0;
1564     }
1565     else
1566     {
1567         // no destination buffer... convert using temp buffer
1568         // to calculate destination buffer requirement
1569         char tbuf[16];
1570         res = 0;
1571         do {
1572             buf = tbuf; outbuf = 16;
1573
1574             cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1575
1576             res += 16 - outbuf;
1577         } while ((cres==(size_t)-1) && (errno==E2BIG));
1578     }
1579
1580     if (ms_wcNeedsSwap)
1581     {
1582         free(tmpbuf);
1583     }
1584
1585     if (ICONV_FAILED(cres, inbuf))
1586     {
1587         //VS: it is ok if iconv fails, hence trace only
1588         wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1589         return (size_t)-1;
1590     }
1591
1592     return res;
1593 }
1594
1595 #endif // HAVE_ICONV
1596
1597
1598 // ============================================================================
1599 // Win32 conversion classes
1600 // ============================================================================
1601
1602 #ifdef wxHAVE_WIN32_MB2WC
1603
1604 // from utils.cpp
1605 #if wxUSE_FONTMAP
1606 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1607 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1608 #endif
1609
1610 class wxMBConv_win32 : public wxMBConv
1611 {
1612 public:
1613     wxMBConv_win32()
1614     {
1615         m_CodePage = CP_ACP;
1616     }
1617
1618 #if wxUSE_FONTMAP
1619     wxMBConv_win32(const wxChar* name)
1620     {
1621         m_CodePage = wxCharsetToCodepage(name);
1622     }
1623
1624     wxMBConv_win32(wxFontEncoding encoding)
1625     {
1626         m_CodePage = wxEncodingToCodepage(encoding);
1627     }
1628 #endif
1629
1630     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1631     {
1632         // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1633         // the behaviour is not compatible with the Unix version (using iconv)
1634         // and break the library itself, e.g. wxTextInputStream::NextChar()
1635         // wouldn't work if reading an incomplete MB char didn't result in an
1636         // error
1637         //
1638         // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1639         // an error (tested under Windows Server 2003) and apparently it is
1640         // done on purpose, i.e. the function accepts any input in this case
1641         // and although I'd prefer to return error on ill-formed output, our
1642         // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1643         // explicitly ill-formed according to RFC 2152) neither so we don't
1644         // even have any fallback here...
1645         int flags = m_CodePage == CP_UTF7 ? 0 : MB_ERR_INVALID_CHARS;
1646
1647         const size_t len = ::MultiByteToWideChar
1648                              (
1649                                 m_CodePage,     // code page
1650                                 flags,          // flags: fall on error
1651                                 psz,            // input string
1652                                 -1,             // its length (NUL-terminated)
1653                                 buf,            // output string
1654                                 buf ? n : 0     // size of output buffer
1655                              );
1656
1657         // note that it returns count of written chars for buf != NULL and size
1658         // of the needed buffer for buf == NULL so in either case the length of
1659         // the string (which never includes the terminating NUL) is one less
1660         return len ? len - 1 : (size_t)-1;
1661     }
1662
1663     size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
1664     {
1665         /*
1666             we have a problem here: by default, WideCharToMultiByte() may
1667             replace characters unrepresentable in the target code page with bad
1668             quality approximations such as turning "1/2" symbol (U+00BD) into
1669             "1" for the code pages which don't have it and we, obviously, want
1670             to avoid this at any price
1671
1672             the trouble is that this function does it _silently_, i.e. it won't
1673             even tell us whether it did or not... Win98/2000 and higher provide
1674             WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1675             we have to resort to a round trip, i.e. check that converting back
1676             results in the same string -- this is, of course, expensive but
1677             otherwise we simply can't be sure to not garble the data.
1678          */
1679
1680         // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1681         // it doesn't work with CJK encodings (which we test for rather roughly
1682         // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1683         // supporting it
1684         BOOL usedDef wxDUMMY_INITIALIZE(false);
1685         BOOL *pUsedDef;
1686         int flags;
1687         if ( CanUseNoBestFit() && m_CodePage < 50000 )
1688         {
1689             // it's our lucky day
1690             flags = WC_NO_BEST_FIT_CHARS;
1691             pUsedDef = &usedDef;
1692         }
1693         else // old system or unsupported encoding
1694         {
1695             flags = 0;
1696             pUsedDef = NULL;
1697         }
1698
1699         const size_t len = ::WideCharToMultiByte
1700                              (
1701                                 m_CodePage,     // code page
1702                                 flags,          // either none or no best fit
1703                                 pwz,            // input string
1704                                 -1,             // it is (wide) NUL-terminated
1705                                 buf,            // output buffer
1706                                 buf ? n : 0,    // and its size
1707                                 NULL,           // default "replacement" char
1708                                 pUsedDef        // [out] was it used?
1709                              );
1710
1711         if ( !len )
1712         {
1713             // function totally failed
1714             return (size_t)-1;
1715         }
1716
1717         // if we were really converting, check if we succeeded
1718         if ( buf )
1719         {
1720             if ( flags )
1721             {
1722                 // check if the conversion failed, i.e. if any replacements
1723                 // were done
1724                 if ( usedDef )
1725                     return (size_t)-1;
1726             }
1727             else // we must resort to double tripping...
1728             {
1729                 wxWCharBuffer wcBuf(n);
1730                 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1731                         wcscmp(wcBuf, pwz) != 0 )
1732                 {
1733                     // we didn't obtain the same thing we started from, hence
1734                     // the conversion was lossy and we consider that it failed
1735                     return (size_t)-1;
1736                 }
1737             }
1738         }
1739
1740         // see the comment above for the reason of "len - 1"
1741         return len - 1;
1742     }
1743
1744     bool IsOk() const { return m_CodePage != -1; }
1745
1746 private:
1747     static bool CanUseNoBestFit()
1748     {
1749         static int s_isWin98Or2k = -1;
1750
1751         if ( s_isWin98Or2k == -1 )
1752         {
1753             int verMaj, verMin;
1754             switch ( wxGetOsVersion(&verMaj, &verMin) )
1755             {
1756                 case wxWIN95:
1757                     s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1758                     break;
1759
1760                 case wxWINDOWS_NT:
1761                     s_isWin98Or2k = verMaj >= 5;
1762                     break;
1763
1764                 default:
1765                     // unknown, be conseravtive by default
1766                     s_isWin98Or2k = 0;
1767             }
1768
1769             wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1770         }
1771
1772         return s_isWin98Or2k == 1;
1773     }
1774
1775     long m_CodePage;
1776 };
1777
1778 #endif // wxHAVE_WIN32_MB2WC
1779
1780 // ============================================================================
1781 // Cocoa conversion classes
1782 // ============================================================================
1783
1784 #if defined(__WXCOCOA__)
1785
1786 // RN:  There is no UTF-32 support in either Core Foundation or
1787 // Cocoa.  Strangely enough, internally Core Foundation uses
1788 // UTF 32 internally quite a bit - its just not public (yet).
1789
1790 #include <CoreFoundation/CFString.h>
1791 #include <CoreFoundation/CFStringEncodingExt.h>
1792
1793 CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
1794 {
1795     CFStringEncoding enc = kCFStringEncodingInvalidId ;
1796     if ( encoding == wxFONTENCODING_DEFAULT )
1797     {
1798         enc = CFStringGetSystemEncoding();
1799     }
1800     else switch( encoding)
1801     {
1802         case wxFONTENCODING_ISO8859_1 :
1803             enc = kCFStringEncodingISOLatin1 ;
1804             break ;
1805         case wxFONTENCODING_ISO8859_2 :
1806             enc = kCFStringEncodingISOLatin2;
1807             break ;
1808         case wxFONTENCODING_ISO8859_3 :
1809             enc = kCFStringEncodingISOLatin3 ;
1810             break ;
1811         case wxFONTENCODING_ISO8859_4 :
1812             enc = kCFStringEncodingISOLatin4;
1813             break ;
1814         case wxFONTENCODING_ISO8859_5 :
1815             enc = kCFStringEncodingISOLatinCyrillic;
1816             break ;
1817         case wxFONTENCODING_ISO8859_6 :
1818             enc = kCFStringEncodingISOLatinArabic;
1819             break ;
1820         case wxFONTENCODING_ISO8859_7 :
1821             enc = kCFStringEncodingISOLatinGreek;
1822             break ;
1823         case wxFONTENCODING_ISO8859_8 :
1824             enc = kCFStringEncodingISOLatinHebrew;
1825             break ;
1826         case wxFONTENCODING_ISO8859_9 :
1827             enc = kCFStringEncodingISOLatin5;
1828             break ;
1829         case wxFONTENCODING_ISO8859_10 :
1830             enc = kCFStringEncodingISOLatin6;
1831             break ;
1832         case wxFONTENCODING_ISO8859_11 :
1833             enc = kCFStringEncodingISOLatinThai;
1834             break ;
1835         case wxFONTENCODING_ISO8859_13 :
1836             enc = kCFStringEncodingISOLatin7;
1837             break ;
1838         case wxFONTENCODING_ISO8859_14 :
1839             enc = kCFStringEncodingISOLatin8;
1840             break ;
1841         case wxFONTENCODING_ISO8859_15 :
1842             enc = kCFStringEncodingISOLatin9;
1843             break ;
1844
1845         case wxFONTENCODING_KOI8 :
1846             enc = kCFStringEncodingKOI8_R;
1847             break ;
1848         case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
1849             enc = kCFStringEncodingDOSRussian;
1850             break ;
1851
1852 //      case wxFONTENCODING_BULGARIAN :
1853 //          enc = ;
1854 //          break ;
1855
1856         case wxFONTENCODING_CP437 :
1857             enc =kCFStringEncodingDOSLatinUS ;
1858             break ;
1859         case wxFONTENCODING_CP850 :
1860             enc = kCFStringEncodingDOSLatin1;
1861             break ;
1862         case wxFONTENCODING_CP852 :
1863             enc = kCFStringEncodingDOSLatin2;
1864             break ;
1865         case wxFONTENCODING_CP855 :
1866             enc = kCFStringEncodingDOSCyrillic;
1867             break ;
1868         case wxFONTENCODING_CP866 :
1869             enc =kCFStringEncodingDOSRussian ;
1870             break ;
1871         case wxFONTENCODING_CP874 :
1872             enc = kCFStringEncodingDOSThai;
1873             break ;
1874         case wxFONTENCODING_CP932 :
1875             enc = kCFStringEncodingDOSJapanese;
1876             break ;
1877         case wxFONTENCODING_CP936 :
1878             enc =kCFStringEncodingDOSChineseSimplif ;
1879             break ;
1880         case wxFONTENCODING_CP949 :
1881             enc = kCFStringEncodingDOSKorean;
1882             break ;
1883         case wxFONTENCODING_CP950 :
1884             enc = kCFStringEncodingDOSChineseTrad;
1885             break ;
1886         case wxFONTENCODING_CP1250 :
1887             enc = kCFStringEncodingWindowsLatin2;
1888             break ;
1889         case wxFONTENCODING_CP1251 :
1890             enc =kCFStringEncodingWindowsCyrillic ;
1891             break ;
1892         case wxFONTENCODING_CP1252 :
1893             enc =kCFStringEncodingWindowsLatin1 ;
1894             break ;
1895         case wxFONTENCODING_CP1253 :
1896             enc = kCFStringEncodingWindowsGreek;
1897             break ;
1898         case wxFONTENCODING_CP1254 :
1899             enc = kCFStringEncodingWindowsLatin5;
1900             break ;
1901         case wxFONTENCODING_CP1255 :
1902             enc =kCFStringEncodingWindowsHebrew ;
1903             break ;
1904         case wxFONTENCODING_CP1256 :
1905             enc =kCFStringEncodingWindowsArabic ;
1906             break ;
1907         case wxFONTENCODING_CP1257 :
1908             enc = kCFStringEncodingWindowsBalticRim;
1909             break ;
1910 //   This only really encodes to UTF7 (if that) evidently
1911 //        case wxFONTENCODING_UTF7 :
1912 //            enc = kCFStringEncodingNonLossyASCII ;
1913 //            break ;
1914         case wxFONTENCODING_UTF8 :
1915             enc = kCFStringEncodingUTF8 ;
1916             break ;
1917         case wxFONTENCODING_EUC_JP :
1918             enc = kCFStringEncodingEUC_JP;
1919             break ;
1920         case wxFONTENCODING_UTF16 :
1921             enc = kCFStringEncodingUnicode ;
1922             break ;
1923         case wxFONTENCODING_MACROMAN :
1924             enc = kCFStringEncodingMacRoman ;
1925             break ;
1926         case wxFONTENCODING_MACJAPANESE :
1927             enc = kCFStringEncodingMacJapanese ;
1928             break ;
1929         case wxFONTENCODING_MACCHINESETRAD :
1930             enc = kCFStringEncodingMacChineseTrad ;
1931             break ;
1932         case wxFONTENCODING_MACKOREAN :
1933             enc = kCFStringEncodingMacKorean ;
1934             break ;
1935         case wxFONTENCODING_MACARABIC :
1936             enc = kCFStringEncodingMacArabic ;
1937             break ;
1938         case wxFONTENCODING_MACHEBREW :
1939             enc = kCFStringEncodingMacHebrew ;
1940             break ;
1941         case wxFONTENCODING_MACGREEK :
1942             enc = kCFStringEncodingMacGreek ;
1943             break ;
1944         case wxFONTENCODING_MACCYRILLIC :
1945             enc = kCFStringEncodingMacCyrillic ;
1946             break ;
1947         case wxFONTENCODING_MACDEVANAGARI :
1948             enc = kCFStringEncodingMacDevanagari ;
1949             break ;
1950         case wxFONTENCODING_MACGURMUKHI :
1951             enc = kCFStringEncodingMacGurmukhi ;
1952             break ;
1953         case wxFONTENCODING_MACGUJARATI :
1954             enc = kCFStringEncodingMacGujarati ;
1955             break ;
1956         case wxFONTENCODING_MACORIYA :
1957             enc = kCFStringEncodingMacOriya ;
1958             break ;
1959         case wxFONTENCODING_MACBENGALI :
1960             enc = kCFStringEncodingMacBengali ;
1961             break ;
1962         case wxFONTENCODING_MACTAMIL :
1963             enc = kCFStringEncodingMacTamil ;
1964             break ;
1965         case wxFONTENCODING_MACTELUGU :
1966             enc = kCFStringEncodingMacTelugu ;
1967             break ;
1968         case wxFONTENCODING_MACKANNADA :
1969             enc = kCFStringEncodingMacKannada ;
1970             break ;
1971         case wxFONTENCODING_MACMALAJALAM :
1972             enc = kCFStringEncodingMacMalayalam ;
1973             break ;
1974         case wxFONTENCODING_MACSINHALESE :
1975             enc = kCFStringEncodingMacSinhalese ;
1976             break ;
1977         case wxFONTENCODING_MACBURMESE :
1978             enc = kCFStringEncodingMacBurmese ;
1979             break ;
1980         case wxFONTENCODING_MACKHMER :
1981             enc = kCFStringEncodingMacKhmer ;
1982             break ;
1983         case wxFONTENCODING_MACTHAI :
1984             enc = kCFStringEncodingMacThai ;
1985             break ;
1986         case wxFONTENCODING_MACLAOTIAN :
1987             enc = kCFStringEncodingMacLaotian ;
1988             break ;
1989         case wxFONTENCODING_MACGEORGIAN :
1990             enc = kCFStringEncodingMacGeorgian ;
1991             break ;
1992         case wxFONTENCODING_MACARMENIAN :
1993             enc = kCFStringEncodingMacArmenian ;
1994             break ;
1995         case wxFONTENCODING_MACCHINESESIMP :
1996             enc = kCFStringEncodingMacChineseSimp ;
1997             break ;
1998         case wxFONTENCODING_MACTIBETAN :
1999             enc = kCFStringEncodingMacTibetan ;
2000             break ;
2001         case wxFONTENCODING_MACMONGOLIAN :
2002             enc = kCFStringEncodingMacMongolian ;
2003             break ;
2004         case wxFONTENCODING_MACETHIOPIC :
2005             enc = kCFStringEncodingMacEthiopic ;
2006             break ;
2007         case wxFONTENCODING_MACCENTRALEUR :
2008             enc = kCFStringEncodingMacCentralEurRoman ;
2009             break ;
2010         case wxFONTENCODING_MACVIATNAMESE :
2011             enc = kCFStringEncodingMacVietnamese ;
2012             break ;
2013         case wxFONTENCODING_MACARABICEXT :
2014             enc = kCFStringEncodingMacExtArabic ;
2015             break ;
2016         case wxFONTENCODING_MACSYMBOL :
2017             enc = kCFStringEncodingMacSymbol ;
2018             break ;
2019         case wxFONTENCODING_MACDINGBATS :
2020             enc = kCFStringEncodingMacDingbats ;
2021             break ;
2022         case wxFONTENCODING_MACTURKISH :
2023             enc = kCFStringEncodingMacTurkish ;
2024             break ;
2025         case wxFONTENCODING_MACCROATIAN :
2026             enc = kCFStringEncodingMacCroatian ;
2027             break ;
2028         case wxFONTENCODING_MACICELANDIC :
2029             enc = kCFStringEncodingMacIcelandic ;
2030             break ;
2031         case wxFONTENCODING_MACROMANIAN :
2032             enc = kCFStringEncodingMacRomanian ;
2033             break ;
2034         case wxFONTENCODING_MACCELTIC :
2035             enc = kCFStringEncodingMacCeltic ;
2036             break ;
2037         case wxFONTENCODING_MACGAELIC :
2038             enc = kCFStringEncodingMacGaelic ;
2039             break ;
2040 //      case wxFONTENCODING_MACKEYBOARD :
2041 //          enc = kCFStringEncodingMacKeyboardGlyphs ;
2042 //          break ;
2043         default :
2044             // because gcc is picky
2045             break ;
2046     } ;
2047     return enc ;
2048 }
2049
2050 class wxMBConv_cocoa : public wxMBConv
2051 {
2052 public:
2053     wxMBConv_cocoa()
2054     {
2055         Init(CFStringGetSystemEncoding()) ;
2056     }
2057
2058 #if wxUSE_FONTMAP
2059     wxMBConv_cocoa(const wxChar* name)
2060     {
2061         Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2062     }
2063 #endif
2064
2065     wxMBConv_cocoa(wxFontEncoding encoding)
2066     {
2067         Init( wxCFStringEncFromFontEnc(encoding) );
2068     }
2069
2070     ~wxMBConv_cocoa()
2071     {
2072     }
2073
2074     void Init( CFStringEncoding encoding)
2075     {
2076         m_encoding = encoding ;
2077     }
2078
2079     size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2080     {
2081         wxASSERT(szUnConv);
2082
2083         CFStringRef theString = CFStringCreateWithBytes (
2084                                                 NULL, //the allocator
2085                                                 (const UInt8*)szUnConv,
2086                                                 strlen(szUnConv),
2087                                                 m_encoding,
2088                                                 false //no BOM/external representation
2089                                                 );
2090
2091         wxASSERT(theString);
2092
2093         size_t nOutLength = CFStringGetLength(theString);
2094
2095         if (szOut == NULL)
2096         {
2097             CFRelease(theString);
2098             return nOutLength;
2099         }
2100
2101         CFRange theRange = { 0, nOutSize };
2102
2103 #if SIZEOF_WCHAR_T == 4
2104         UniChar* szUniCharBuffer = new UniChar[nOutSize];
2105 #endif
2106
2107         CFStringGetCharacters(theString, theRange, szUniCharBuffer);
2108
2109         CFRelease(theString);
2110
2111         szUniCharBuffer[nOutLength] = '\0' ;
2112
2113 #if SIZEOF_WCHAR_T == 4
2114         wxMBConvUTF16 converter ;
2115         converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
2116         delete[] szUniCharBuffer;
2117 #endif
2118
2119         return nOutLength;
2120     }
2121
2122     size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2123     {
2124         wxASSERT(szUnConv);
2125
2126         size_t nRealOutSize;
2127         size_t nBufSize = wxWcslen(szUnConv);
2128         UniChar* szUniBuffer = (UniChar*) szUnConv;
2129
2130 #if SIZEOF_WCHAR_T == 4
2131         wxMBConvUTF16 converter ;
2132         nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
2133         szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
2134         converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
2135         nBufSize /= sizeof(UniChar);
2136 #endif
2137
2138         CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2139                                 NULL, //allocator
2140                                 szUniBuffer,
2141                                 nBufSize,
2142                                 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
2143                             );
2144
2145         wxASSERT(theString);
2146
2147         //Note that CER puts a BOM when converting to unicode
2148         //so we  check and use getchars instead in that case
2149         if (m_encoding == kCFStringEncodingUnicode)
2150         {
2151             if (szOut != NULL)
2152                 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
2153
2154             nRealOutSize = CFStringGetLength(theString) + 1;
2155         }
2156         else
2157         {
2158             CFStringGetBytes(
2159                 theString,
2160                 CFRangeMake(0, CFStringGetLength(theString)),
2161                 m_encoding,
2162                 0, //what to put in characters that can't be converted -
2163                     //0 tells CFString to return NULL if it meets such a character
2164                 false, //not an external representation
2165                 (UInt8*) szOut,
2166                 nOutSize,
2167                 (CFIndex*) &nRealOutSize
2168                         );
2169         }
2170
2171         CFRelease(theString);
2172
2173 #if SIZEOF_WCHAR_T == 4
2174         delete[] szUniBuffer;
2175 #endif
2176
2177         return  nRealOutSize - 1;
2178     }
2179
2180     bool IsOk() const
2181     {
2182         return m_encoding != kCFStringEncodingInvalidId &&
2183               CFStringIsEncodingAvailable(m_encoding);
2184     }
2185
2186 private:
2187     CFStringEncoding m_encoding ;
2188 };
2189
2190 #endif // defined(__WXCOCOA__)
2191
2192 // ============================================================================
2193 // Mac conversion classes
2194 // ============================================================================
2195
2196 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2197
2198 class wxMBConv_mac : public wxMBConv
2199 {
2200 public:
2201     wxMBConv_mac()
2202     {
2203         Init(CFStringGetSystemEncoding()) ;
2204     }
2205
2206 #if wxUSE_FONTMAP
2207     wxMBConv_mac(const wxChar* name)
2208     {
2209         Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2210     }
2211 #endif
2212
2213     wxMBConv_mac(wxFontEncoding encoding)
2214     {
2215         Init( wxMacGetSystemEncFromFontEnc(encoding) );
2216     }
2217
2218     ~wxMBConv_mac()
2219     {
2220         OSStatus status = noErr ;
2221         status = TECDisposeConverter(m_MB2WC_converter);
2222         status = TECDisposeConverter(m_WC2MB_converter);
2223     }
2224
2225
2226     void Init( TextEncodingBase encoding)
2227     {
2228         OSStatus status = noErr ;
2229         m_char_encoding = encoding ;
2230         m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2231
2232         status = TECCreateConverter(&m_MB2WC_converter,
2233                                     m_char_encoding,
2234                                     m_unicode_encoding);
2235         status = TECCreateConverter(&m_WC2MB_converter,
2236                                     m_unicode_encoding,
2237                                     m_char_encoding);
2238     }
2239
2240     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2241     {
2242         OSStatus status = noErr ;
2243         ByteCount byteOutLen ;
2244         ByteCount byteInLen = strlen(psz) ;
2245         wchar_t *tbuf = NULL ;
2246         UniChar* ubuf = NULL ;
2247         size_t res = 0 ;
2248
2249         if (buf == NULL)
2250         {
2251             //apple specs say at least 32
2252             n = wxMax( 32 , byteInLen ) ;
2253             tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2254         }
2255         ByteCount byteBufferLen = n * sizeof( UniChar ) ;
2256 #if SIZEOF_WCHAR_T == 4
2257         ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
2258 #else
2259         ubuf = (UniChar*) (buf ? buf : tbuf) ;
2260 #endif
2261         status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2262           (TextPtr) ubuf , byteBufferLen, &byteOutLen);
2263 #if SIZEOF_WCHAR_T == 4
2264         // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2265         // is not properly terminated we get random characters at the end
2266         ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
2267         wxMBConvUTF16 converter ;
2268         res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2269         free( ubuf ) ;
2270 #else
2271         res = byteOutLen / sizeof( UniChar ) ;
2272 #endif
2273         if ( buf == NULL )
2274              free(tbuf) ;
2275
2276         if ( buf  && res < n)
2277             buf[res] = 0;
2278
2279         return res ;
2280     }
2281
2282     size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2283     {
2284         OSStatus status = noErr ;
2285         ByteCount byteOutLen ;
2286         ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2287
2288         char *tbuf = NULL ;
2289
2290         if (buf == NULL)
2291         {
2292             //apple specs say at least 32
2293             n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2294             tbuf = (char*) malloc( n ) ;
2295         }
2296
2297         ByteCount byteBufferLen = n ;
2298         UniChar* ubuf = NULL ;
2299 #if SIZEOF_WCHAR_T == 4
2300         wxMBConvUTF16 converter ;
2301         size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2302         byteInLen = unicharlen ;
2303         ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2304         converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
2305 #else
2306         ubuf = (UniChar*) psz ;
2307 #endif
2308         status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2309             (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
2310 #if SIZEOF_WCHAR_T == 4
2311         free( ubuf ) ;
2312 #endif
2313         if ( buf == NULL )
2314             free(tbuf) ;
2315
2316         size_t res = byteOutLen ;
2317         if ( buf  && res < n)
2318         {
2319             buf[res] = 0;
2320
2321             //we need to double-trip to verify it didn't insert any ? in place
2322             //of bogus characters
2323             wxWCharBuffer wcBuf(n);
2324             size_t pszlen = wxWcslen(psz);
2325             if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
2326                         wxWcslen(wcBuf) != pszlen ||
2327                         memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2328             {
2329                 // we didn't obtain the same thing we started from, hence
2330                 // the conversion was lossy and we consider that it failed
2331                 return (size_t)-1;
2332             }
2333         }
2334
2335         return res ;
2336     }
2337
2338     bool IsOk() const
2339         { return m_MB2WC_converter !=  NULL && m_WC2MB_converter != NULL  ; }
2340
2341 private:
2342     TECObjectRef m_MB2WC_converter ;
2343     TECObjectRef m_WC2MB_converter ;
2344
2345     TextEncodingBase m_char_encoding ;
2346     TextEncodingBase m_unicode_encoding ;
2347 };
2348
2349 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2350
2351 // ============================================================================
2352 // wxEncodingConverter based conversion classes
2353 // ============================================================================
2354
2355 #if wxUSE_FONTMAP
2356
2357 class wxMBConv_wxwin : public wxMBConv
2358 {
2359 private:
2360     void Init()
2361     {
2362         m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2363                w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2364     }
2365
2366 public:
2367     // temporarily just use wxEncodingConverter stuff,
2368     // so that it works while a better implementation is built
2369     wxMBConv_wxwin(const wxChar* name)
2370     {
2371         if (name)
2372             m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2373         else
2374             m_enc = wxFONTENCODING_SYSTEM;
2375
2376         Init();
2377     }
2378
2379     wxMBConv_wxwin(wxFontEncoding enc)
2380     {
2381         m_enc = enc;
2382
2383         Init();
2384     }
2385
2386     size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
2387     {
2388         size_t inbuf = strlen(psz);
2389         if (buf)
2390         {
2391             if (!m2w.Convert(psz,buf))
2392                 return (size_t)-1;
2393         }
2394         return inbuf;
2395     }
2396
2397     size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
2398     {
2399         const size_t inbuf = wxWcslen(psz);
2400         if (buf)
2401         {
2402             if (!w2m.Convert(psz,buf))
2403                 return (size_t)-1;
2404         }
2405
2406         return inbuf;
2407     }
2408
2409     bool IsOk() const { return m_ok; }
2410
2411 public:
2412     wxFontEncoding m_enc;
2413     wxEncodingConverter m2w, w2m;
2414
2415     // were we initialized successfully?
2416     bool m_ok;
2417
2418     DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
2419 };
2420
2421 // make the constructors available for unit testing
2422 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_wxwin( const wxChar* name )
2423 {
2424     wxMBConv_wxwin* result = new wxMBConv_wxwin( name );
2425     if ( !result->IsOk() )
2426     {
2427         delete result;
2428         return 0;
2429     }
2430     return result;
2431 }
2432
2433 #endif // wxUSE_FONTMAP
2434
2435 // ============================================================================
2436 // wxCSConv implementation
2437 // ============================================================================
2438
2439 void wxCSConv::Init()
2440 {
2441     m_name = NULL;
2442     m_convReal =  NULL;
2443     m_deferred = true;
2444 }
2445
2446 wxCSConv::wxCSConv(const wxChar *charset)
2447 {
2448     Init();
2449
2450     if ( charset )
2451     {
2452         SetName(charset);
2453     }
2454
2455     m_encoding = wxFONTENCODING_SYSTEM;
2456 }
2457
2458 wxCSConv::wxCSConv(wxFontEncoding encoding)
2459 {
2460     if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
2461     {
2462         wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2463
2464         encoding = wxFONTENCODING_SYSTEM;
2465     }
2466
2467     Init();
2468
2469     m_encoding = encoding;
2470 }
2471
2472 wxCSConv::~wxCSConv()
2473 {
2474     Clear();
2475 }
2476
2477 wxCSConv::wxCSConv(const wxCSConv& conv)
2478         : wxMBConv()
2479 {
2480     Init();
2481
2482     SetName(conv.m_name);
2483     m_encoding = conv.m_encoding;
2484 }
2485
2486 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2487 {
2488     Clear();
2489
2490     SetName(conv.m_name);
2491     m_encoding = conv.m_encoding;
2492
2493     return *this;
2494 }
2495
2496 void wxCSConv::Clear()
2497 {
2498     free(m_name);
2499     delete m_convReal;
2500
2501     m_name = NULL;
2502     m_convReal = NULL;
2503 }
2504
2505 void wxCSConv::SetName(const wxChar *charset)
2506 {
2507     if (charset)
2508     {
2509         m_name = wxStrdup(charset);
2510         m_deferred = true;
2511     }
2512 }
2513
2514 #if wxUSE_FONTMAP
2515 #include "wx/hashmap.h"
2516
2517 WX_DECLARE_HASH_MAP( wxFontEncoding, wxString, wxIntegerHash, wxIntegerEqual,
2518                      wxEncodingNameCache );
2519
2520 static wxEncodingNameCache gs_nameCache;
2521 #endif
2522
2523 wxMBConv *wxCSConv::DoCreate() const
2524 {
2525 #if wxUSE_FONTMAP
2526     wxLogTrace(TRACE_STRCONV,
2527                wxT("creating conversion for %s"),
2528                (m_name ? m_name
2529                        : wxFontMapperBase::GetEncodingName(m_encoding).c_str()));
2530 #endif // wxUSE_FONTMAP
2531
2532     // check for the special case of ASCII or ISO8859-1 charset: as we have
2533     // special knowledge of it anyhow, we don't need to create a special
2534     // conversion object
2535     if ( m_encoding == wxFONTENCODING_ISO8859_1 )
2536     {
2537         // don't convert at all
2538         return NULL;
2539     }
2540
2541     // we trust OS to do conversion better than we can so try external
2542     // conversion methods first
2543     //
2544     // the full order is:
2545     //      1. OS conversion (iconv() under Unix or Win32 API)
2546     //      2. hard coded conversions for UTF
2547     //      3. wxEncodingConverter as fall back
2548
2549     // step (1)
2550 #ifdef HAVE_ICONV
2551 #if !wxUSE_FONTMAP
2552     if ( m_name )
2553 #endif // !wxUSE_FONTMAP
2554     {
2555         wxString name(m_name);
2556         wxFontEncoding encoding(m_encoding);
2557
2558         if ( !name.empty() )
2559         {
2560             wxMBConv_iconv *conv = new wxMBConv_iconv(name);
2561             if ( conv->IsOk() )
2562                 return conv;
2563
2564             delete conv;
2565
2566 #if wxUSE_FONTMAP
2567             encoding =
2568                 wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2569 #endif // wxUSE_FONTMAP
2570         }
2571 #if wxUSE_FONTMAP
2572         {
2573             const wxEncodingNameCache::iterator it = gs_nameCache.find(encoding);
2574             if ( it != gs_nameCache.end() )
2575             {
2576                 if ( it->second.empty() )
2577                     return NULL;
2578
2579                 wxMBConv_iconv *conv = new wxMBConv_iconv(it->second);
2580                 if ( conv->IsOk() )
2581                     return conv;
2582
2583                 delete conv;
2584             }
2585
2586             const wxChar** names = wxFontMapperBase::GetAllEncodingNames(encoding);
2587
2588             for ( ; *names; ++names )
2589             {
2590                 wxMBConv_iconv *conv = new wxMBConv_iconv(*names);
2591                 if ( conv->IsOk() )
2592                 {
2593                     gs_nameCache[encoding] = *names;
2594                     return conv;
2595                 }
2596
2597                 delete conv;
2598             }
2599
2600             gs_nameCache[encoding] = _T(""); // cache the failure
2601         }
2602 #endif // wxUSE_FONTMAP
2603     }
2604 #endif // HAVE_ICONV
2605
2606 #ifdef wxHAVE_WIN32_MB2WC
2607     {
2608 #if wxUSE_FONTMAP
2609         wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2610                                       : new wxMBConv_win32(m_encoding);
2611         if ( conv->IsOk() )
2612             return conv;
2613
2614         delete conv;
2615 #else
2616         return NULL;
2617 #endif
2618     }
2619 #endif // wxHAVE_WIN32_MB2WC
2620 #if defined(__WXMAC__)
2621     {
2622         // leave UTF16 and UTF32 to the built-ins of wx
2623         if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
2624             ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
2625         {
2626
2627 #if wxUSE_FONTMAP
2628             wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2629                                         : new wxMBConv_mac(m_encoding);
2630 #else
2631             wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
2632 #endif
2633             if ( conv->IsOk() )
2634                  return conv;
2635
2636             delete conv;
2637         }
2638     }
2639 #endif
2640 #if defined(__WXCOCOA__)
2641     {
2642         if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2643         {
2644
2645 #if wxUSE_FONTMAP
2646             wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2647                                           : new wxMBConv_cocoa(m_encoding);
2648 #else
2649             wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
2650 #endif
2651             if ( conv->IsOk() )
2652                  return conv;
2653
2654             delete conv;
2655         }
2656     }
2657 #endif
2658     // step (2)
2659     wxFontEncoding enc = m_encoding;
2660 #if wxUSE_FONTMAP
2661     if ( enc == wxFONTENCODING_SYSTEM && m_name )
2662     {
2663         // use "false" to suppress interactive dialogs -- we can be called from
2664         // anywhere and popping up a dialog from here is the last thing we want to
2665         // do
2666         enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
2667     }
2668 #endif // wxUSE_FONTMAP
2669
2670     switch ( enc )
2671     {
2672         case wxFONTENCODING_UTF7:
2673              return new wxMBConvUTF7;
2674
2675         case wxFONTENCODING_UTF8:
2676              return new wxMBConvUTF8;
2677
2678         case wxFONTENCODING_UTF16BE:
2679              return new wxMBConvUTF16BE;
2680
2681         case wxFONTENCODING_UTF16LE:
2682              return new wxMBConvUTF16LE;
2683
2684         case wxFONTENCODING_UTF32BE:
2685              return new wxMBConvUTF32BE;
2686
2687         case wxFONTENCODING_UTF32LE:
2688              return new wxMBConvUTF32LE;
2689
2690         default:
2691              // nothing to do but put here to suppress gcc warnings
2692              ;
2693     }
2694
2695     // step (3)
2696 #if wxUSE_FONTMAP
2697     {
2698         wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2699                                       : new wxMBConv_wxwin(m_encoding);
2700         if ( conv->IsOk() )
2701             return conv;
2702
2703         delete conv;
2704     }
2705 #endif // wxUSE_FONTMAP
2706
2707     // NB: This is a hack to prevent deadlock. What could otherwise happen
2708     //     in Unicode build: wxConvLocal creation ends up being here
2709     //     because of some failure and logs the error. But wxLog will try to
2710     //     attach timestamp, for which it will need wxConvLocal (to convert
2711     //     time to char* and then wchar_t*), but that fails, tries to log
2712     //     error, but wxLog has a (already locked) critical section that
2713     //     guards static buffer.
2714     static bool alreadyLoggingError = false;
2715     if (!alreadyLoggingError)
2716     {
2717         alreadyLoggingError = true;
2718         wxLogError(_("Cannot convert from the charset '%s'!"),
2719                    m_name ? m_name
2720                       :
2721 #if wxUSE_FONTMAP
2722                          wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
2723 #else // !wxUSE_FONTMAP
2724                          wxString::Format(_("encoding %s"), m_encoding).c_str()
2725 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2726               );
2727         alreadyLoggingError = false;
2728     }
2729
2730     return NULL;
2731 }
2732
2733 void wxCSConv::CreateConvIfNeeded() const
2734 {
2735     if ( m_deferred )
2736     {
2737         wxCSConv *self = (wxCSConv *)this; // const_cast
2738
2739 #if wxUSE_INTL
2740         // if we don't have neither the name nor the encoding, use the default
2741         // encoding for this system
2742         if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2743         {
2744             self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
2745         }
2746 #endif // wxUSE_INTL
2747
2748         self->m_convReal = DoCreate();
2749         self->m_deferred = false;
2750     }
2751 }
2752
2753 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2754 {
2755     CreateConvIfNeeded();
2756
2757     if (m_convReal)
2758         return m_convReal->MB2WC(buf, psz, n);
2759
2760     // latin-1 (direct)
2761     size_t len = strlen(psz);
2762
2763     if (buf)
2764     {
2765         for (size_t c = 0; c <= len; c++)
2766             buf[c] = (unsigned char)(psz[c]);
2767     }
2768
2769     return len;
2770 }
2771
2772 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2773 {
2774     CreateConvIfNeeded();
2775
2776     if (m_convReal)
2777         return m_convReal->WC2MB(buf, psz, n);
2778
2779     // latin-1 (direct)
2780     const size_t len = wxWcslen(psz);
2781     if (buf)
2782     {
2783         for (size_t c = 0; c <= len; c++)
2784         {
2785             if (psz[c] > 0xFF)
2786                 return (size_t)-1;
2787             buf[c] = (char)psz[c];
2788         }
2789     }
2790     else
2791     {
2792         for (size_t c = 0; c <= len; c++)
2793         {
2794             if (psz[c] > 0xFF)
2795                 return (size_t)-1;
2796         }
2797     }
2798
2799     return len;
2800 }
2801
2802 // ----------------------------------------------------------------------------
2803 // globals
2804 // ----------------------------------------------------------------------------
2805
2806 #ifdef __WINDOWS__
2807     static wxMBConv_win32 wxConvLibcObj;
2808 #elif defined(__WXMAC__) && !defined(__MACH__)
2809     static wxMBConv_mac wxConvLibcObj ;
2810 #else
2811     static wxMBConvLibc wxConvLibcObj;
2812 #endif
2813
2814 static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
2815 static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
2816 static wxMBConvUTF7 wxConvUTF7Obj;
2817 static wxMBConvUTF8 wxConvUTF8Obj;
2818
2819 WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
2820 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
2821 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
2822 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
2823 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
2824 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
2825 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
2826 #ifdef __WXOSX__
2827                                     wxConvUTF8Obj;
2828 #else
2829                                     wxConvLibcObj;
2830 #endif
2831
2832
2833 #else // !wxUSE_WCHAR_T
2834
2835 // stand-ins in absence of wchar_t
2836 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
2837                                 wxConvISO8859_1,
2838                                 wxConvLocal,
2839                                 wxConvUTF8;
2840
2841 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
2842
2843