src/common/strconv.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        strconv.cpp
   3 // Purpose:     Unicode conversion classes
   4 // Author:      Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
   5 //              Ryan Norton, Fredrik Roubert (UTF7)
   6 // Modified by:
   7 // Created:     29/01/98
   8 // RCS-ID:      $Id$
   9 // Copyright:   (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
  10 //              (c) 2000-2003 Vadim Zeitlin
  11 //              (c) 2004 Ryan Norton, Fredrik Roubert
  12 // Licence:     wxWindows licence
  13 /////////////////////////////////////////////////////////////////////////////
  14
  15 // ============================================================================
  16 // declarations
  17 // ============================================================================
  18
  19 // ----------------------------------------------------------------------------
  20 // headers
  21 // ----------------------------------------------------------------------------
  22
  23 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
  24   #pragma implementation "strconv.h"
  25 #endif
  26
  27 // For compilers that support precompilation, includes "wx.h".
  28 #include "wx/wxprec.h"
  29
  30 #ifdef __BORLANDC__
  31   #pragma hdrstop
  32 #endif
  33
  34 #ifndef WX_PRECOMP
  35     #include "wx/intl.h"
  36     #include "wx/log.h"
  37 #endif // WX_PRECOMP
  38
  39 #include "wx/strconv.h"
  40
  41 #if wxUSE_WCHAR_T
  42
  43 #ifdef __WINDOWS__
  44     #include "wx/msw/private.h"
  45     #include "wx/msw/missing.h"
  46 #endif
  47
  48 #ifndef __WXWINCE__
  49 #include <errno.h>
  50 #endif
  51
  52 #include <ctype.h>
  53 #include <string.h>
  54 #include <stdlib.h>
  55
  56 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
  57     #define wxHAVE_WIN32_MB2WC
  58 #endif // __WIN32__ but !__WXMICROWIN__
  59
  60 // ----------------------------------------------------------------------------
  61 // headers
  62 // ----------------------------------------------------------------------------
  63
  64 #ifdef __SALFORDC__
  65     #include <clib.h>
  66 #endif
  67
  68 #ifdef HAVE_ICONV
  69     #include <iconv.h>
  70     #include "wx/thread.h"
  71 #endif
  72
  73 #include "wx/encconv.h"
  74 #include "wx/fontmap.h"
  75 #include "wx/utils.h"
  76
  77 #ifdef __WXMAC__
  78 #include <ATSUnicode.h>
  79 #include <TextCommon.h>
  80 #include <TextEncodingConverter.h>
  81
  82 #include  "wx/mac/private.h"  // includes mac headers
  83 #endif
  84 // ----------------------------------------------------------------------------
  85 // macros
  86 // ----------------------------------------------------------------------------
  87
  88 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
  89 #define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
  90
  91 #if SIZEOF_WCHAR_T == 4
  92     #define WC_NAME         "UCS4"
  93     #define WC_BSWAP         BSWAP_UCS4
  94     #ifdef WORDS_BIGENDIAN
  95       #define WC_NAME_BEST  "UCS-4BE"
  96     #else
  97       #define WC_NAME_BEST  "UCS-4LE"
  98     #endif
  99 #elif SIZEOF_WCHAR_T == 2
 100     #define WC_NAME         "UTF16"
 101     #define WC_BSWAP         BSWAP_UTF16
 102     #define WC_UTF16
 103     #ifdef WORDS_BIGENDIAN
 104       #define WC_NAME_BEST  "UTF-16BE"
 105     #else
 106       #define WC_NAME_BEST  "UTF-16LE"
 107     #endif
 108 #else // sizeof(wchar_t) != 2 nor 4
 109     // does this ever happen?
 110     #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
 111 #endif
 112
 113 // ============================================================================
 114 // implementation
 115 // ============================================================================
 116
 117 // ----------------------------------------------------------------------------
 118 // UTF-16 en/decoding to/from UCS-4
 119 // ----------------------------------------------------------------------------
 120
 121
 122 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
 123 {
 124     if (input<=0xffff)
 125     {
 126         if (output)
 127             *output = (wxUint16) input;
 128         return 1;
 129     }
 130     else if (input>=0x110000)
 131     {
 132         return (size_t)-1;
 133     }
 134     else
 135     {
 136         if (output)
 137         {
 138             *output++ = (wxUint16) ((input >> 10)+0xd7c0);
 139             *output = (wxUint16) ((input&0x3ff)+0xdc00);
 140         }
 141         return 2;
 142     }
 143 }
 144
 145 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
 146 {
 147     if ((*input<0xd800) || (*input>0xdfff))
 148     {
 149         output = *input;
 150         return 1;
 151     }
 152     else if ((input[1]<0xdc00) || (input[1]>0xdfff))
 153     {
 154         output = *input;
 155         return (size_t)-1;
 156     }
 157     else
 158     {
 159         output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
 160         return 2;
 161     }
 162 }
 163
 164
 165 // ----------------------------------------------------------------------------
 166 // wxMBConv
 167 // ----------------------------------------------------------------------------
 168
 169 wxMBConv::~wxMBConv()
 170 {
 171     // nothing to do here (necessary for Darwin linking probably)
 172 }
 173
 174 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
 175 {
 176     if ( psz )
 177     {
 178         // calculate the length of the buffer needed first
 179         size_t nLen = MB2WC(NULL, psz, 0);
 180         if ( nLen != (size_t)-1 )
 181         {
 182             // now do the actual conversion
 183             wxWCharBuffer buf(nLen);
 184             nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
 185             if ( nLen != (size_t)-1 )
 186             {
 187                 return buf;
 188             }
 189         }
 190     }
 191
 192     wxWCharBuffer buf((wchar_t *)NULL);
 193
 194     return buf;
 195 }
 196
 197 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
 198 {
 199     if ( pwz )
 200     {
 201         size_t nLen = WC2MB(NULL, pwz, 0);
 202         if ( nLen != (size_t)-1 )
 203         {
 204             wxCharBuffer buf(nLen+3);       // space for a wxUint32 trailing zero
 205             nLen = WC2MB(buf.data(), pwz, nLen + 4);
 206             if ( nLen != (size_t)-1 )
 207             {
 208                 return buf;
 209             }
 210         }
 211     }
 212
 213     wxCharBuffer buf((char *)NULL);
 214
 215     return buf;
 216 }
 217
 218 const wxWCharBuffer wxMBConv::cMB2WC(const char *szString, size_t nStringLen, size_t* pOutSize) const
 219 {
 220     wxASSERT(pOutSize != NULL);
 221
 222     const char* szEnd = szString + nStringLen + 1;
 223     const char* szPos = szString;
 224     const char* szStart = szPos;
 225
 226     size_t nActualLength = 0;
 227     size_t nCurrentSize = nStringLen; //try normal size first (should never resize?)
 228
 229     wxWCharBuffer theBuffer(nCurrentSize);
 230
 231     //Convert the string until the length() is reached, continuing the
 232     //loop every time a null character is reached
 233     while(szPos != szEnd)
 234     {
 235         wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
 236
 237         //Get the length of the current (sub)string
 238         size_t nLen = MB2WC(NULL, szPos, 0);
 239
 240         //Invalid conversion?
 241         if( nLen == (size_t)-1 )
 242         {
 243             *pOutSize = 0;
 244             theBuffer.data()[0u] = wxT('\0');
 245             return theBuffer;
 246         }
 247
 248
 249         //Increase the actual length (+1 for current null character)
 250         nActualLength += nLen + 1;
 251
 252         //if buffer too big, realloc the buffer
 253         if (nActualLength > (nCurrentSize+1))
 254         {
 255             wxWCharBuffer theNewBuffer(nCurrentSize << 1);
 256             memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize * sizeof(wchar_t));
 257             theBuffer = theNewBuffer;
 258             nCurrentSize <<= 1;
 259         }
 260
 261         //Convert the current (sub)string
 262         if ( MB2WC(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
 263         {
 264             *pOutSize = 0;
 265             theBuffer.data()[0u] = wxT('\0');
 266             return theBuffer;
 267         }
 268
 269         //Increment to next (sub)string
 270         //Note that we have to use strlen instead of nLen here
 271         //because XX2XX gives us the size of the output buffer,
 272         //which is not necessarily the length of the string
 273         szPos += strlen(szPos) + 1;
 274     }
 275
 276     //success - return actual length and the buffer
 277     *pOutSize = nActualLength;
 278     return theBuffer;
 279 }
 280
 281 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *szString, size_t nStringLen, size_t* pOutSize) const
 282 {
 283     wxASSERT(pOutSize != NULL);
 284
 285     const wchar_t* szEnd = szString + nStringLen + 1;
 286     const wchar_t* szPos = szString;
 287     const wchar_t* szStart = szPos;
 288
 289     size_t nActualLength = 0;
 290     size_t nCurrentSize = nStringLen << 2; //try * 4 first
 291
 292     wxCharBuffer theBuffer(nCurrentSize);
 293
 294     //Convert the string until the length() is reached, continuing the
 295     //loop every time a null character is reached
 296     while(szPos != szEnd)
 297     {
 298         wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
 299
 300         //Get the length of the current (sub)string
 301         size_t nLen = WC2MB(NULL, szPos, 0);
 302
 303         //Invalid conversion?
 304         if( nLen == (size_t)-1 )
 305         {
 306             *pOutSize = 0;
 307             theBuffer.data()[0u] = wxT('\0');
 308             return theBuffer;
 309         }
 310
 311         //Increase the actual length (+1 for current null character)
 312         nActualLength += nLen + 1;
 313
 314         //if buffer too big, realloc the buffer
 315         if (nActualLength > (nCurrentSize+1))
 316         {
 317             wxCharBuffer theNewBuffer(nCurrentSize << 1);
 318             memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize);
 319             theBuffer = theNewBuffer;
 320             nCurrentSize <<= 1;
 321         }
 322
 323         //Convert the current (sub)string
 324         if(WC2MB(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
 325         {
 326             *pOutSize = 0;
 327             theBuffer.data()[0u] = wxT('\0');
 328             return theBuffer;
 329         }
 330
 331         //Increment to next (sub)string
 332         //Note that we have to use wxWcslen instead of nLen here
 333         //because XX2XX gives us the size of the output buffer,
 334         //which is not necessarily the length of the string
 335         szPos += wxWcslen(szPos) + 1;
 336     }
 337
 338     //success - return actual length and the buffer
 339     *pOutSize = nActualLength;
 340     return theBuffer;
 341 }
 342
 343 // ----------------------------------------------------------------------------
 344 // wxMBConvLibc
 345 // ----------------------------------------------------------------------------
 346
 347 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 348 {
 349     return wxMB2WC(buf, psz, n);
 350 }
 351
 352 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 353 {
 354     return wxWC2MB(buf, psz, n);
 355 }
 356
 357 #ifdef __UNIX__
 358
 359 // ----------------------------------------------------------------------------
 360 // wxConvBrokenFileNames
 361 // ----------------------------------------------------------------------------
 362
 363 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar *charset)
 364 {
 365     if ( !charset || wxStricmp(charset, _T("UTF-8")) == 0
 366                   || wxStricmp(charset, _T("UTF8")) == 0  )
 367         m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
 368     else
 369         m_conv = new wxCSConv(charset);
 370 }
 371
 372 size_t
 373 wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf,
 374                              const char *psz,
 375                              size_t outputSize) const
 376 {
 377     return m_conv->MB2WC( outputBuf, psz, outputSize );
 378 }
 379
 380 size_t
 381 wxConvBrokenFileNames::WC2MB(char *outputBuf,
 382                              const wchar_t *psz,
 383                              size_t outputSize) const
 384 {
 385     return m_conv->WC2MB( outputBuf, psz, outputSize );
 386 }
 387
 388 #endif
 389
 390 // ----------------------------------------------------------------------------
 391 // UTF-7
 392 // ----------------------------------------------------------------------------
 393
 394 // Implementation (C) 2004 Fredrik Roubert
 395
 396 //
 397 // BASE64 decoding table
 398 //
 399 static const unsigned char utf7unb64[] =
 400 {
 401     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 402     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 403     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 404     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 405     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 406     0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
 407     0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
 408     0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 409     0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
 410     0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
 411     0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
 412     0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
 413     0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
 414     0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
 415     0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
 416     0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
 417     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 418     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 419     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 420     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 421     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 422     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 423     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 424     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 425     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 426     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 427     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 428     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 429     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 430     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 431     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 432     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 433 };
 434
 435 size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 436 {
 437     size_t len = 0;
 438
 439     while (*psz && ((!buf) || (len < n)))
 440     {
 441         unsigned char cc = *psz++;
 442         if (cc != '+')
 443         {
 444             // plain ASCII char
 445             if (buf)
 446                 *buf++ = cc;
 447             len++;
 448         }
 449         else if (*psz == '-')
 450         {
 451             // encoded plus sign
 452             if (buf)
 453                 *buf++ = cc;
 454             len++;
 455             psz++;
 456         }
 457         else
 458         {
 459             // BASE64 encoded string
 460             bool lsb;
 461             unsigned char c;
 462             unsigned int d, l;
 463             for (lsb = false, d = 0, l = 0;
 464                 (cc = utf7unb64[(unsigned char)*psz]) != 0xff; psz++)
 465             {
 466                 d <<= 6;
 467                 d += cc;
 468                 for (l += 6; l >= 8; lsb = !lsb)
 469                 {
 470                     c = (unsigned char)((d >> (l -= 8)) % 256);
 471                     if (lsb)
 472                     {
 473                         if (buf)
 474                             *buf++ |= c;
 475                         len ++;
 476                     }
 477                     else
 478                         if (buf)
 479                             *buf = (wchar_t)(c << 8);
 480                 }
 481             }
 482             if (*psz == '-')
 483                 psz++;
 484         }
 485     }
 486     if (buf && (len < n))
 487         *buf = 0;
 488     return len;
 489 }
 490
 491 //
 492 // BASE64 encoding table
 493 //
 494 static const unsigned char utf7enb64[] =
 495 {
 496     'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
 497     'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
 498     'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
 499     'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
 500     'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
 501     'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
 502     'w', 'x', 'y', 'z', '0', '1', '2', '3',
 503     '4', '5', '6', '7', '8', '9', '+', '/'
 504 };
 505
 506 //
 507 // UTF-7 encoding table
 508 //
 509 // 0 - Set D (directly encoded characters)
 510 // 1 - Set O (optional direct characters)
 511 // 2 - whitespace characters (optional)
 512 // 3 - special characters
 513 //
 514 static const unsigned char utf7encode[128] =
 515 {
 516     3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
 517     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 518     2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
 519     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
 520     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 521     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
 522     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 523     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
 524 };
 525
 526 size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 527 {
 528
 529
 530     size_t len = 0;
 531
 532     while (*psz && ((!buf) || (len < n)))
 533     {
 534         wchar_t cc = *psz++;
 535         if (cc < 0x80 && utf7encode[cc] < 1)
 536         {
 537             // plain ASCII char
 538             if (buf)
 539                 *buf++ = (char)cc;
 540             len++;
 541         }
 542 #ifndef WC_UTF16
 543         else if (((wxUint32)cc) > 0xffff)
 544         {
 545             // no surrogate pair generation (yet?)
 546             return (size_t)-1;
 547         }
 548 #endif
 549         else
 550         {
 551             if (buf)
 552                 *buf++ = '+';
 553             len++;
 554             if (cc != '+')
 555             {
 556                 // BASE64 encode string
 557                 unsigned int lsb, d, l;
 558                 for (d = 0, l = 0;; psz++)
 559                 {
 560                     for (lsb = 0; lsb < 2; lsb ++)
 561                     {
 562                         d <<= 8;
 563                         d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
 564
 565                         for (l += 8; l >= 6; )
 566                         {
 567                             l -= 6;
 568                             if (buf)
 569                                 *buf++ = utf7enb64[(d >> l) % 64];
 570                             len++;
 571                         }
 572                     }
 573                     cc = *psz;
 574                     if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
 575                         break;
 576                 }
 577                 if (l != 0)
 578                 {
 579                     if (buf)
 580                         *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
 581                     len++;
 582                 }
 583             }
 584             if (buf)
 585                 *buf++ = '-';
 586             len++;
 587         }
 588     }
 589     if (buf && (len < n))
 590         *buf = 0;
 591     return len;
 592 }
 593
 594 // ----------------------------------------------------------------------------
 595 // UTF-8
 596 // ----------------------------------------------------------------------------
 597
 598 static wxUint32 utf8_max[]=
 599     { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
 600
 601 // boundaries of the private use area we use to (temporarily) remap invalid
 602 // characters invalid in a UTF-8 encoded string
 603 const wxUint32 wxUnicodePUA = 0x100000;
 604 const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
 605
 606 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 607 {
 608     size_t len = 0;
 609
 610     while (*psz && ((!buf) || (len < n)))
 611     {
 612         const char *opsz = psz;
 613         bool invalid = false;
 614         unsigned char cc = *psz++, fc = cc;
 615         unsigned cnt;
 616         for (cnt = 0; fc & 0x80; cnt++)
 617             fc <<= 1;
 618         if (!cnt)
 619         {
 620             // plain ASCII char
 621             if (buf)
 622                 *buf++ = cc;
 623             len++;
 624
 625             // escape the escape character for octal escapes
 626             if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
 627                     && cc == '\\' && (!buf || len < n))
 628             {
 629                 if (buf)
 630                     *buf++ = cc;
 631                 len++;
 632             }
 633         }
 634         else
 635         {
 636             cnt--;
 637             if (!cnt)
 638             {
 639                 // invalid UTF-8 sequence
 640                 invalid = true;
 641             }
 642             else
 643             {
 644                 unsigned ocnt = cnt - 1;
 645                 wxUint32 res = cc & (0x3f >> cnt);
 646                 while (cnt--)
 647                 {
 648                     cc = *psz;
 649                     if ((cc & 0xC0) != 0x80)
 650                     {
 651                         // invalid UTF-8 sequence
 652                         invalid = true;
 653                         break;
 654                     }
 655                     psz++;
 656                     res = (res << 6) | (cc & 0x3f);
 657                 }
 658                 if (invalid || res <= utf8_max[ocnt])
 659                 {
 660                     // illegal UTF-8 encoding
 661                     invalid = true;
 662                 }
 663                 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
 664                         res >= wxUnicodePUA && res < wxUnicodePUAEnd)
 665                 {
 666                     // if one of our PUA characters turns up externally
 667                     // it must also be treated as an illegal sequence
 668                     // (a bit like you have to escape an escape character)
 669                     invalid = true;
 670                 }
 671                 else
 672                 {
 673 #ifdef WC_UTF16
 674                     // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 675                     size_t pa = encode_utf16(res, (wxUint16 *)buf);
 676                     if (pa == (size_t)-1)
 677                     {
 678                         invalid = true;
 679                     }
 680                     else
 681                     {
 682                         if (buf)
 683                             buf += pa;
 684                         len += pa;
 685                     }
 686 #else // !WC_UTF16
 687                     if (buf)
 688                         *buf++ = res;
 689                     len++;
 690 #endif // WC_UTF16/!WC_UTF16
 691                 }
 692             }
 693             if (invalid)
 694             {
 695                 if (m_options & MAP_INVALID_UTF8_TO_PUA)
 696                 {
 697                     while (opsz < psz && (!buf || len < n))
 698                     {
 699 #ifdef WC_UTF16
 700                         // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 701                         size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
 702                         wxASSERT(pa != (size_t)-1);
 703                         if (buf)
 704                             buf += pa;
 705                         opsz++;
 706                         len += pa;
 707 #else
 708                         if (buf)
 709                             *buf++ = wxUnicodePUA + (unsigned char)*opsz;
 710                         opsz++;
 711                         len++;
 712 #endif
 713                     }
 714                 }
 715                 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
 716                 {
 717                     while (opsz < psz && (!buf || len < n))
 718                     {
 719                         if ( buf && len + 3 < n )
 720                         {
 721                             unsigned char n = *opsz;
 722                             *buf++ = L'\\';
 723                             *buf++ = (wchar_t)( L'0' + n / 0100 );
 724                             *buf++ = (wchar_t)( L'0' + (n % 0100) / 010 );
 725                             *buf++ = (wchar_t)( L'0' + n % 010 );
 726                         }
 727                         opsz++;
 728                         len += 4;
 729                     }
 730                 }
 731                 else // MAP_INVALID_UTF8_NOT
 732                 {
 733                     return (size_t)-1;
 734                 }
 735             }
 736         }
 737     }
 738     if (buf && (len < n))
 739         *buf = 0;
 740     return len;
 741 }
 742
 743 static inline bool isoctal(wchar_t wch)
 744 {
 745     return L'0' <= wch && wch <= L'7';
 746 }
 747
 748 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 749 {
 750     size_t len = 0;
 751
 752     while (*psz && ((!buf) || (len < n)))
 753     {
 754         wxUint32 cc;
 755 #ifdef WC_UTF16
 756         // cast is ok for WC_UTF16
 757         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
 758         psz += (pa == (size_t)-1) ? 1 : pa;
 759 #else
 760         cc=(*psz++) & 0x7fffffff;
 761 #endif
 762
 763         if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
 764                 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
 765         {
 766             if (buf)
 767                 *buf++ = (char)(cc - wxUnicodePUA);
 768             len++;
 769         }
 770         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
 771                     && cc == L'\\' && psz[0] == L'\\' )
 772         {
 773             if (buf)
 774                 *buf++ = (char)cc;
 775             psz++;
 776             len++;
 777         }
 778         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
 779                     cc == L'\\' &&
 780                         isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
 781         {
 782             if (buf)
 783             {
 784                 *buf++ = (char) ((psz[0] - L'0')*0100 +
 785                                  (psz[1] - L'0')*010 +
 786                                  (psz[2] - L'0'));
 787             }
 788
 789             psz += 3;
 790             len++;
 791         }
 792         else
 793         {
 794             unsigned cnt;
 795             for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
 796             if (!cnt)
 797             {
 798                 // plain ASCII char
 799                 if (buf)
 800                     *buf++ = (char) cc;
 801                 len++;
 802             }
 803
 804             else
 805             {
 806                 len += cnt + 1;
 807                 if (buf)
 808                 {
 809                     *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
 810                     while (cnt--)
 811                         *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
 812                 }
 813             }
 814         }
 815     }
 816
 817     if (buf && (len<n))
 818         *buf = 0;
 819
 820     return len;
 821 }
 822
 823 // ----------------------------------------------------------------------------
 824 // UTF-16
 825 // ----------------------------------------------------------------------------
 826
 827 #ifdef WORDS_BIGENDIAN
 828     #define wxMBConvUTF16straight wxMBConvUTF16BE
 829     #define wxMBConvUTF16swap     wxMBConvUTF16LE
 830 #else
 831     #define wxMBConvUTF16swap     wxMBConvUTF16BE
 832     #define wxMBConvUTF16straight wxMBConvUTF16LE
 833 #endif
 834
 835
 836 #ifdef WC_UTF16
 837
 838 // copy 16bit MB to 16bit String
 839 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 840 {
 841     size_t len=0;
 842
 843     while (*(wxUint16*)psz && (!buf || len < n))
 844     {
 845         if (buf)
 846             *buf++ = *(wxUint16*)psz;
 847         len++;
 848
 849         psz += sizeof(wxUint16);
 850     }
 851     if (buf && len<n)   *buf=0;
 852
 853     return len;
 854 }
 855
 856
 857 // copy 16bit String to 16bit MB
 858 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 859 {
 860     size_t len=0;
 861
 862     while (*psz && (!buf || len < n))
 863     {
 864         if (buf)
 865         {
 866             *(wxUint16*)buf = *psz;
 867             buf += sizeof(wxUint16);
 868         }
 869         len += sizeof(wxUint16);
 870         psz++;
 871     }
 872     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 873
 874     return len;
 875 }
 876
 877
 878 // swap 16bit MB to 16bit String
 879 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 880 {
 881     size_t len=0;
 882
 883     while (*(wxUint16*)psz && (!buf || len < n))
 884     {
 885         if (buf)
 886         {
 887             ((char *)buf)[0] = psz[1];
 888             ((char *)buf)[1] = psz[0];
 889             buf++;
 890         }
 891         len++;
 892         psz += sizeof(wxUint16);
 893     }
 894     if (buf && len<n)   *buf=0;
 895
 896     return len;
 897 }
 898
 899
 900 // swap 16bit MB to 16bit String
 901 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 902 {
 903     size_t len=0;
 904
 905     while (*psz && (!buf || len < n))
 906     {
 907         if (buf)
 908         {
 909             *buf++ = ((char*)psz)[1];
 910             *buf++ = ((char*)psz)[0];
 911         }
 912         len += sizeof(wxUint16);
 913         psz++;
 914     }
 915     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 916
 917     return len;
 918 }
 919
 920
 921 #else // WC_UTF16
 922
 923
 924 // copy 16bit MB to 32bit String
 925 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 926 {
 927     size_t len=0;
 928
 929     while (*(wxUint16*)psz && (!buf || len < n))
 930     {
 931         wxUint32 cc;
 932         size_t pa=decode_utf16((wxUint16*)psz, cc);
 933         if (pa == (size_t)-1)
 934             return pa;
 935
 936         if (buf)
 937             *buf++ = cc;
 938         len++;
 939         psz += pa * sizeof(wxUint16);
 940     }
 941     if (buf && len<n)   *buf=0;
 942
 943     return len;
 944 }
 945
 946
 947 // copy 32bit String to 16bit MB
 948 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 949 {
 950     size_t len=0;
 951
 952     while (*psz && (!buf || len < n))
 953     {
 954         wxUint16 cc[2];
 955         size_t pa=encode_utf16(*psz, cc);
 956
 957         if (pa == (size_t)-1)
 958             return pa;
 959
 960         if (buf)
 961         {
 962             *(wxUint16*)buf = cc[0];
 963             buf += sizeof(wxUint16);
 964             if (pa > 1)
 965             {
 966                 *(wxUint16*)buf = cc[1];
 967                 buf += sizeof(wxUint16);
 968             }
 969         }
 970
 971         len += pa*sizeof(wxUint16);
 972         psz++;
 973     }
 974     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 975
 976     return len;
 977 }
 978
 979
 980 // swap 16bit MB to 32bit String
 981 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 982 {
 983     size_t len=0;
 984
 985     while (*(wxUint16*)psz && (!buf || len < n))
 986     {
 987         wxUint32 cc;
 988         char tmp[4];
 989         tmp[0]=psz[1];  tmp[1]=psz[0];
 990         tmp[2]=psz[3];  tmp[3]=psz[2];
 991
 992         size_t pa=decode_utf16((wxUint16*)tmp, cc);
 993         if (pa == (size_t)-1)
 994             return pa;
 995
 996         if (buf)
 997             *buf++ = cc;
 998
 999         len++;
1000         psz += pa * sizeof(wxUint16);
1001     }
1002     if (buf && len<n)   *buf=0;
1003
1004     return len;
1005 }
1006
1007
1008 // swap 32bit String to 16bit MB
1009 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1010 {
1011     size_t len=0;
1012
1013     while (*psz && (!buf || len < n))
1014     {
1015         wxUint16 cc[2];
1016         size_t pa=encode_utf16(*psz, cc);
1017
1018         if (pa == (size_t)-1)
1019             return pa;
1020
1021         if (buf)
1022         {
1023             *buf++ = ((char*)cc)[1];
1024             *buf++ = ((char*)cc)[0];
1025             if (pa > 1)
1026             {
1027                 *buf++ = ((char*)cc)[3];
1028                 *buf++ = ((char*)cc)[2];
1029             }
1030         }
1031
1032         len += pa*sizeof(wxUint16);
1033         psz++;
1034     }
1035     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
1036
1037     return len;
1038 }
1039
1040 #endif // WC_UTF16
1041
1042
1043 // ----------------------------------------------------------------------------
1044 // UTF-32
1045 // ----------------------------------------------------------------------------
1046
1047 #ifdef WORDS_BIGENDIAN
1048 #define wxMBConvUTF32straight  wxMBConvUTF32BE
1049 #define wxMBConvUTF32swap      wxMBConvUTF32LE
1050 #else
1051 #define wxMBConvUTF32swap      wxMBConvUTF32BE
1052 #define wxMBConvUTF32straight  wxMBConvUTF32LE
1053 #endif
1054
1055
1056 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1057 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1058
1059
1060 #ifdef WC_UTF16
1061
1062 // copy 32bit MB to 16bit String
1063 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1064 {
1065     size_t len=0;
1066
1067     while (*(wxUint32*)psz && (!buf || len < n))
1068     {
1069         wxUint16 cc[2];
1070
1071         size_t pa=encode_utf16(*(wxUint32*)psz, cc);
1072         if (pa == (size_t)-1)
1073             return pa;
1074
1075         if (buf)
1076         {
1077             *buf++ = cc[0];
1078             if (pa > 1)
1079                 *buf++ = cc[1];
1080         }
1081         len += pa;
1082         psz += sizeof(wxUint32);
1083     }
1084     if (buf && len<n)   *buf=0;
1085
1086     return len;
1087 }
1088
1089
1090 // copy 16bit String to 32bit MB
1091 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1092 {
1093     size_t len=0;
1094
1095     while (*psz && (!buf || len < n))
1096     {
1097         wxUint32 cc;
1098
1099         // cast is ok for WC_UTF16
1100         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
1101         if (pa == (size_t)-1)
1102             return pa;
1103
1104         if (buf)
1105         {
1106             *(wxUint32*)buf = cc;
1107             buf += sizeof(wxUint32);
1108         }
1109         len += sizeof(wxUint32);
1110         psz += pa;
1111     }
1112
1113     if (buf && len<=n-sizeof(wxUint32))
1114         *(wxUint32*)buf=0;
1115
1116     return len;
1117 }
1118
1119
1120
1121 // swap 32bit MB to 16bit String
1122 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1123 {
1124     size_t len=0;
1125
1126     while (*(wxUint32*)psz && (!buf || len < n))
1127     {
1128         char tmp[4];
1129         tmp[0] = psz[3];   tmp[1] = psz[2];
1130         tmp[2] = psz[1];   tmp[3] = psz[0];
1131
1132
1133         wxUint16 cc[2];
1134
1135         size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
1136         if (pa == (size_t)-1)
1137             return pa;
1138
1139         if (buf)
1140         {
1141             *buf++ = cc[0];
1142             if (pa > 1)
1143                 *buf++ = cc[1];
1144         }
1145         len += pa;
1146         psz += sizeof(wxUint32);
1147     }
1148
1149     if (buf && len<n)
1150         *buf=0;
1151
1152     return len;
1153 }
1154
1155
1156 // swap 16bit String to 32bit MB
1157 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1158 {
1159     size_t len=0;
1160
1161     while (*psz && (!buf || len < n))
1162     {
1163         char cc[4];
1164
1165         // cast is ok for WC_UTF16
1166         size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
1167         if (pa == (size_t)-1)
1168             return pa;
1169
1170         if (buf)
1171         {
1172             *buf++ = cc[3];
1173             *buf++ = cc[2];
1174             *buf++ = cc[1];
1175             *buf++ = cc[0];
1176         }
1177         len += sizeof(wxUint32);
1178         psz += pa;
1179     }
1180
1181     if (buf && len<=n-sizeof(wxUint32))
1182         *(wxUint32*)buf=0;
1183
1184     return len;
1185 }
1186
1187 #else // WC_UTF16
1188
1189
1190 // copy 32bit MB to 32bit String
1191 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1192 {
1193     size_t len=0;
1194
1195     while (*(wxUint32*)psz && (!buf || len < n))
1196     {
1197         if (buf)
1198             *buf++ = *(wxUint32*)psz;
1199         len++;
1200         psz += sizeof(wxUint32);
1201     }
1202
1203     if (buf && len<n)
1204         *buf=0;
1205
1206     return len;
1207 }
1208
1209
1210 // copy 32bit String to 32bit MB
1211 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1212 {
1213     size_t len=0;
1214
1215     while (*psz && (!buf || len < n))
1216     {
1217         if (buf)
1218         {
1219             *(wxUint32*)buf = *psz;
1220             buf += sizeof(wxUint32);
1221         }
1222
1223         len += sizeof(wxUint32);
1224         psz++;
1225     }
1226
1227     if (buf && len<=n-sizeof(wxUint32))
1228         *(wxUint32*)buf=0;
1229
1230     return len;
1231 }
1232
1233
1234 // swap 32bit MB to 32bit String
1235 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1236 {
1237     size_t len=0;
1238
1239     while (*(wxUint32*)psz && (!buf || len < n))
1240     {
1241         if (buf)
1242         {
1243             ((char *)buf)[0] = psz[3];
1244             ((char *)buf)[1] = psz[2];
1245             ((char *)buf)[2] = psz[1];
1246             ((char *)buf)[3] = psz[0];
1247             buf++;
1248         }
1249         len++;
1250         psz += sizeof(wxUint32);
1251     }
1252
1253     if (buf && len<n)
1254         *buf=0;
1255
1256     return len;
1257 }
1258
1259
1260 // swap 32bit String to 32bit MB
1261 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1262 {
1263     size_t len=0;
1264
1265     while (*psz && (!buf || len < n))
1266     {
1267         if (buf)
1268         {
1269             *buf++ = ((char *)psz)[3];
1270             *buf++ = ((char *)psz)[2];
1271             *buf++ = ((char *)psz)[1];
1272             *buf++ = ((char *)psz)[0];
1273         }
1274         len += sizeof(wxUint32);
1275         psz++;
1276     }
1277
1278     if (buf && len<=n-sizeof(wxUint32))
1279         *(wxUint32*)buf=0;
1280
1281     return len;
1282 }
1283
1284
1285 #endif // WC_UTF16
1286
1287
1288 // ============================================================================
1289 // The classes doing conversion using the iconv_xxx() functions
1290 // ============================================================================
1291
1292 #ifdef HAVE_ICONV
1293
1294 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1295 //     E2BIG if output buffer is _exactly_ as big as needed. Such case is
1296 //     (unless there's yet another bug in glibc) the only case when iconv()
1297 //     returns with (size_t)-1 (which means error) and says there are 0 bytes
1298 //     left in the input buffer -- when _real_ error occurs,
1299 //     bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1300 //     iconv() failure.
1301 //     [This bug does not appear in glibc 2.2.]
1302 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1303 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1304                                      (errno != E2BIG || bufLeft != 0))
1305 #else
1306 #define ICONV_FAILED(cres, bufLeft)  (cres == (size_t)-1)
1307 #endif
1308
1309 #define ICONV_CHAR_CAST(x)  ((ICONV_CONST char **)(x))
1310
1311 // ----------------------------------------------------------------------------
1312 // wxMBConv_iconv: encapsulates an iconv character set
1313 // ----------------------------------------------------------------------------
1314
1315 class wxMBConv_iconv : public wxMBConv
1316 {
1317 public:
1318     wxMBConv_iconv(const wxChar *name);
1319     virtual ~wxMBConv_iconv();
1320
1321     virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1322     virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
1323
1324     bool IsOk() const
1325         { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
1326
1327 protected:
1328     // the iconv handlers used to translate from multibyte to wide char and in
1329     // the other direction
1330     iconv_t m2w,
1331             w2m;
1332 #if wxUSE_THREADS
1333     // guards access to m2w and w2m objects
1334     wxMutex m_iconvMutex;
1335 #endif
1336
1337 private:
1338     // the name (for iconv_open()) of a wide char charset -- if none is
1339     // available on this machine, it will remain NULL
1340     static const char *ms_wcCharsetName;
1341
1342     // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1343     // different endian-ness than the native one
1344     static bool ms_wcNeedsSwap;
1345 };
1346
1347 const char *wxMBConv_iconv::ms_wcCharsetName = NULL;
1348 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
1349
1350 wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
1351 {
1352     // Do it the hard way
1353     char cname[100];
1354     for (size_t i = 0; i < wxStrlen(name)+1; i++)
1355         cname[i] = (char) name[i];
1356
1357     // check for charset that represents wchar_t:
1358     if (ms_wcCharsetName == NULL)
1359     {
1360         ms_wcNeedsSwap = false;
1361
1362         // try charset with explicit bytesex info (e.g. "UCS-4LE"):
1363         ms_wcCharsetName = WC_NAME_BEST;
1364         m2w = iconv_open(ms_wcCharsetName, cname);
1365
1366         if (m2w == (iconv_t)-1)
1367         {
1368             // try charset w/o bytesex info (e.g. "UCS4")
1369             // and check for bytesex ourselves:
1370             ms_wcCharsetName = WC_NAME;
1371             m2w = iconv_open(ms_wcCharsetName, cname);
1372
1373             // last bet, try if it knows WCHAR_T pseudo-charset
1374             if (m2w == (iconv_t)-1)
1375             {
1376                 ms_wcCharsetName = "WCHAR_T";
1377                 m2w = iconv_open(ms_wcCharsetName, cname);
1378             }
1379
1380             if (m2w != (iconv_t)-1)
1381             {
1382                 char    buf[2], *bufPtr;
1383                 wchar_t wbuf[2], *wbufPtr;
1384                 size_t  insz, outsz;
1385                 size_t  res;
1386
1387                 buf[0] = 'A';
1388                 buf[1] = 0;
1389                 wbuf[0] = 0;
1390                 insz = 2;
1391                 outsz = SIZEOF_WCHAR_T * 2;
1392                 wbufPtr = wbuf;
1393                 bufPtr = buf;
1394
1395                 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1396                             (char**)&wbufPtr, &outsz);
1397
1398                 if (ICONV_FAILED(res, insz))
1399                 {
1400                     ms_wcCharsetName = NULL;
1401                     wxLogLastError(wxT("iconv"));
1402                     wxLogError(_("Conversion to charset '%s' doesn't work."), name);
1403                 }
1404                 else
1405                 {
1406                     ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1407                 }
1408             }
1409             else
1410             {
1411                 ms_wcCharsetName = NULL;
1412
1413                 // VS: we must not output an error here, since wxWidgets will safely
1414                 //     fall back to using wxEncodingConverter.
1415                 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
1416                 //wxLogError(
1417             }
1418         }
1419         wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
1420     }
1421     else // we already have ms_wcCharsetName
1422     {
1423         m2w = iconv_open(ms_wcCharsetName, cname);
1424     }
1425
1426     // NB: don't ever pass NULL to iconv_open(), it may crash!
1427     if ( ms_wcCharsetName )
1428     {
1429         w2m = iconv_open( cname, ms_wcCharsetName);
1430     }
1431     else
1432     {
1433         w2m = (iconv_t)-1;
1434     }
1435 }
1436
1437 wxMBConv_iconv::~wxMBConv_iconv()
1438 {
1439     if ( m2w != (iconv_t)-1 )
1440         iconv_close(m2w);
1441     if ( w2m != (iconv_t)-1 )
1442         iconv_close(w2m);
1443 }
1444
1445 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1446 {
1447 #if wxUSE_THREADS
1448     // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1449     //     Unfortunately there is a couple of global wxCSConv objects such as
1450     //     wxConvLocal that are used all over wx code, so we have to make sure
1451     //     the handle is used by at most one thread at the time. Otherwise
1452     //     only a few wx classes would be safe to use from non-main threads
1453     //     as MB<->WC conversion would fail "randomly".
1454     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1455 #endif
1456
1457     size_t inbuf = strlen(psz);
1458     size_t outbuf = n * SIZEOF_WCHAR_T;
1459     size_t res, cres;
1460     // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1461     wchar_t *bufPtr = buf;
1462     const char *pszPtr = psz;
1463
1464     if (buf)
1465     {
1466         // have destination buffer, convert there
1467         cres = iconv(m2w,
1468                      ICONV_CHAR_CAST(&pszPtr), &inbuf,
1469                      (char**)&bufPtr, &outbuf);
1470         res = n - (outbuf / SIZEOF_WCHAR_T);
1471
1472         if (ms_wcNeedsSwap)
1473         {
1474             // convert to native endianness
1475             WC_BSWAP(buf /* _not_ bufPtr */, res)
1476         }
1477
1478         // NB: iconv was given only strlen(psz) characters on input, and so
1479         //     it couldn't convert the trailing zero. Let's do it ourselves
1480         //     if there's some room left for it in the output buffer.
1481         if (res < n)
1482             buf[res] = 0;
1483     }
1484     else
1485     {
1486         // no destination buffer... convert using temp buffer
1487         // to calculate destination buffer requirement
1488         wchar_t tbuf[8];
1489         res = 0;
1490         do {
1491             bufPtr = tbuf;
1492             outbuf = 8*SIZEOF_WCHAR_T;
1493
1494             cres = iconv(m2w,
1495                          ICONV_CHAR_CAST(&pszPtr), &inbuf,
1496                          (char**)&bufPtr, &outbuf );
1497
1498             res += 8-(outbuf/SIZEOF_WCHAR_T);
1499         } while ((cres==(size_t)-1) && (errno==E2BIG));
1500     }
1501
1502     if (ICONV_FAILED(cres, inbuf))
1503     {
1504         //VS: it is ok if iconv fails, hence trace only
1505         wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1506         return (size_t)-1;
1507     }
1508
1509     return res;
1510 }
1511
1512 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1513 {
1514 #if wxUSE_THREADS
1515     // NB: explained in MB2WC
1516     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1517 #endif
1518
1519     size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
1520     size_t outbuf = n;
1521     size_t res, cres;
1522
1523     wchar_t *tmpbuf = 0;
1524
1525     if (ms_wcNeedsSwap)
1526     {
1527         // need to copy to temp buffer to switch endianness
1528         // this absolutely doesn't rock!
1529         // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1530         //  could be in read-only memory, or be accessed in some other thread)
1531         tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
1532         memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
1533         WC_BSWAP(tmpbuf, inbuf)
1534         psz=tmpbuf;
1535     }
1536
1537     if (buf)
1538     {
1539         // have destination buffer, convert there
1540         cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1541
1542         res = n-outbuf;
1543
1544         // NB: iconv was given only wcslen(psz) characters on input, and so
1545         //     it couldn't convert the trailing zero. Let's do it ourselves
1546         //     if there's some room left for it in the output buffer.
1547         if (res < n)
1548             buf[0] = 0;
1549     }
1550     else
1551     {
1552         // no destination buffer... convert using temp buffer
1553         // to calculate destination buffer requirement
1554         char tbuf[16];
1555         res = 0;
1556         do {
1557             buf = tbuf; outbuf = 16;
1558
1559             cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1560
1561             res += 16 - outbuf;
1562         } while ((cres==(size_t)-1) && (errno==E2BIG));
1563     }
1564
1565     if (ms_wcNeedsSwap)
1566     {
1567         free(tmpbuf);
1568     }
1569
1570     if (ICONV_FAILED(cres, inbuf))
1571     {
1572         //VS: it is ok if iconv fails, hence trace only
1573         wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1574         return (size_t)-1;
1575     }
1576
1577     return res;
1578 }
1579
1580 #endif // HAVE_ICONV
1581
1582
1583 // ============================================================================
1584 // Win32 conversion classes
1585 // ============================================================================
1586
1587 #ifdef wxHAVE_WIN32_MB2WC
1588
1589 // from utils.cpp
1590 #if wxUSE_FONTMAP
1591 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1592 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1593 #endif
1594
1595 class wxMBConv_win32 : public wxMBConv
1596 {
1597 public:
1598     wxMBConv_win32()
1599     {
1600         m_CodePage = CP_ACP;
1601     }
1602
1603 #if wxUSE_FONTMAP
1604     wxMBConv_win32(const wxChar* name)
1605     {
1606         m_CodePage = wxCharsetToCodepage(name);
1607     }
1608
1609     wxMBConv_win32(wxFontEncoding encoding)
1610     {
1611         m_CodePage = wxEncodingToCodepage(encoding);
1612     }
1613 #endif
1614
1615     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1616     {
1617         // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1618         // the behaviour is not compatible with the Unix version (using iconv)
1619         // and break the library itself, e.g. wxTextInputStream::NextChar()
1620         // wouldn't work if reading an incomplete MB char didn't result in an
1621         // error
1622         //
1623         // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1624         // an error (tested under Windows Server 2003) and apparently it is
1625         // done on purpose, i.e. the function accepts any input in this case
1626         // and although I'd prefer to return error on ill-formed output, our
1627         // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1628         // explicitly ill-formed according to RFC 2152) neither so we don't
1629         // even have any fallback here...
1630         int flags = m_CodePage == CP_UTF7 ? 0 : MB_ERR_INVALID_CHARS;
1631
1632         const size_t len = ::MultiByteToWideChar
1633                              (
1634                                 m_CodePage,     // code page
1635                                 flags,          // flags: fall on error
1636                                 psz,            // input string
1637                                 -1,             // its length (NUL-terminated)
1638                                 buf,            // output string
1639                                 buf ? n : 0     // size of output buffer
1640                              );
1641
1642         // note that it returns count of written chars for buf != NULL and size
1643         // of the needed buffer for buf == NULL so in either case the length of
1644         // the string (which never includes the terminating NUL) is one less
1645         return len ? len - 1 : (size_t)-1;
1646     }
1647
1648     size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
1649     {
1650         /*
1651             we have a problem here: by default, WideCharToMultiByte() may
1652             replace characters unrepresentable in the target code page with bad
1653             quality approximations such as turning "1/2" symbol (U+00BD) into
1654             "1" for the code pages which don't have it and we, obviously, want
1655             to avoid this at any price
1656
1657             the trouble is that this function does it _silently_, i.e. it won't
1658             even tell us whether it did or not... Win98/2000 and higher provide
1659             WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1660             we have to resort to a round trip, i.e. check that converting back
1661             results in the same string -- this is, of course, expensive but
1662             otherwise we simply can't be sure to not garble the data.
1663          */
1664
1665         // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1666         // it doesn't work with CJK encodings (which we test for rather roughly
1667         // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1668         // supporting it
1669         BOOL usedDef wxDUMMY_INITIALIZE(false);
1670         BOOL *pUsedDef;
1671         int flags;
1672         if ( CanUseNoBestFit() && m_CodePage < 50000 )
1673         {
1674             // it's our lucky day
1675             flags = WC_NO_BEST_FIT_CHARS;
1676             pUsedDef = &usedDef;
1677         }
1678         else // old system or unsupported encoding
1679         {
1680             flags = 0;
1681             pUsedDef = NULL;
1682         }
1683
1684         const size_t len = ::WideCharToMultiByte
1685                              (
1686                                 m_CodePage,     // code page
1687                                 flags,          // either none or no best fit
1688                                 pwz,            // input string
1689                                 -1,             // it is (wide) NUL-terminated
1690                                 buf,            // output buffer
1691                                 buf ? n : 0,    // and its size
1692                                 NULL,           // default "replacement" char
1693                                 pUsedDef        // [out] was it used?
1694                              );
1695
1696         if ( !len )
1697         {
1698             // function totally failed
1699             return (size_t)-1;
1700         }
1701
1702         // if we were really converting, check if we succeeded
1703         if ( buf )
1704         {
1705             if ( flags )
1706             {
1707                 // check if the conversion failed, i.e. if any replacements
1708                 // were done
1709                 if ( usedDef )
1710                     return (size_t)-1;
1711             }
1712             else // we must resort to double tripping...
1713             {
1714                 wxWCharBuffer wcBuf(n);
1715                 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1716                         wcscmp(wcBuf, pwz) != 0 )
1717                 {
1718                     // we didn't obtain the same thing we started from, hence
1719                     // the conversion was lossy and we consider that it failed
1720                     return (size_t)-1;
1721                 }
1722             }
1723         }
1724
1725         // see the comment above for the reason of "len - 1"
1726         return len - 1;
1727     }
1728
1729     bool IsOk() const { return m_CodePage != -1; }
1730
1731 private:
1732     static bool CanUseNoBestFit()
1733     {
1734         static int s_isWin98Or2k = -1;
1735
1736         if ( s_isWin98Or2k == -1 )
1737         {
1738             int verMaj, verMin;
1739             switch ( wxGetOsVersion(&verMaj, &verMin) )
1740             {
1741                 case wxWIN95:
1742                     s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1743                     break;
1744
1745                 case wxWINDOWS_NT:
1746                     s_isWin98Or2k = verMaj >= 5;
1747                     break;
1748
1749                 default:
1750                     // unknown, be conseravtive by default
1751                     s_isWin98Or2k = 0;
1752             }
1753
1754             wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1755         }
1756
1757         return s_isWin98Or2k == 1;
1758     }
1759
1760     long m_CodePage;
1761 };
1762
1763 #endif // wxHAVE_WIN32_MB2WC
1764
1765 // ============================================================================
1766 // Cocoa conversion classes
1767 // ============================================================================
1768
1769 #if defined(__WXCOCOA__)
1770
1771 // RN:  There is no UTF-32 support in either Core Foundation or
1772 // Cocoa.  Strangely enough, internally Core Foundation uses
1773 // UTF 32 internally quite a bit - its just not public (yet).
1774
1775 #include <CoreFoundation/CFString.h>
1776 #include <CoreFoundation/CFStringEncodingExt.h>
1777
1778 CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
1779 {
1780     CFStringEncoding enc = kCFStringEncodingInvalidId ;
1781     if ( encoding == wxFONTENCODING_DEFAULT )
1782     {
1783         enc = CFStringGetSystemEncoding();
1784     }
1785     else switch( encoding)
1786     {
1787         case wxFONTENCODING_ISO8859_1 :
1788             enc = kCFStringEncodingISOLatin1 ;
1789             break ;
1790         case wxFONTENCODING_ISO8859_2 :
1791             enc = kCFStringEncodingISOLatin2;
1792             break ;
1793         case wxFONTENCODING_ISO8859_3 :
1794             enc = kCFStringEncodingISOLatin3 ;
1795             break ;
1796         case wxFONTENCODING_ISO8859_4 :
1797             enc = kCFStringEncodingISOLatin4;
1798             break ;
1799         case wxFONTENCODING_ISO8859_5 :
1800             enc = kCFStringEncodingISOLatinCyrillic;
1801             break ;
1802         case wxFONTENCODING_ISO8859_6 :
1803             enc = kCFStringEncodingISOLatinArabic;
1804             break ;
1805         case wxFONTENCODING_ISO8859_7 :
1806             enc = kCFStringEncodingISOLatinGreek;
1807             break ;
1808         case wxFONTENCODING_ISO8859_8 :
1809             enc = kCFStringEncodingISOLatinHebrew;
1810             break ;
1811         case wxFONTENCODING_ISO8859_9 :
1812             enc = kCFStringEncodingISOLatin5;
1813             break ;
1814         case wxFONTENCODING_ISO8859_10 :
1815             enc = kCFStringEncodingISOLatin6;
1816             break ;
1817         case wxFONTENCODING_ISO8859_11 :
1818             enc = kCFStringEncodingISOLatinThai;
1819             break ;
1820         case wxFONTENCODING_ISO8859_13 :
1821             enc = kCFStringEncodingISOLatin7;
1822             break ;
1823         case wxFONTENCODING_ISO8859_14 :
1824             enc = kCFStringEncodingISOLatin8;
1825             break ;
1826         case wxFONTENCODING_ISO8859_15 :
1827             enc = kCFStringEncodingISOLatin9;
1828             break ;
1829
1830         case wxFONTENCODING_KOI8 :
1831             enc = kCFStringEncodingKOI8_R;
1832             break ;
1833         case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
1834             enc = kCFStringEncodingDOSRussian;
1835             break ;
1836
1837 //      case wxFONTENCODING_BULGARIAN :
1838 //          enc = ;
1839 //          break ;
1840
1841         case wxFONTENCODING_CP437 :
1842             enc =kCFStringEncodingDOSLatinUS ;
1843             break ;
1844         case wxFONTENCODING_CP850 :
1845             enc = kCFStringEncodingDOSLatin1;
1846             break ;
1847         case wxFONTENCODING_CP852 :
1848             enc = kCFStringEncodingDOSLatin2;
1849             break ;
1850         case wxFONTENCODING_CP855 :
1851             enc = kCFStringEncodingDOSCyrillic;
1852             break ;
1853         case wxFONTENCODING_CP866 :
1854             enc =kCFStringEncodingDOSRussian ;
1855             break ;
1856         case wxFONTENCODING_CP874 :
1857             enc = kCFStringEncodingDOSThai;
1858             break ;
1859         case wxFONTENCODING_CP932 :
1860             enc = kCFStringEncodingDOSJapanese;
1861             break ;
1862         case wxFONTENCODING_CP936 :
1863             enc =kCFStringEncodingDOSChineseSimplif ;
1864             break ;
1865         case wxFONTENCODING_CP949 :
1866             enc = kCFStringEncodingDOSKorean;
1867             break ;
1868         case wxFONTENCODING_CP950 :
1869             enc = kCFStringEncodingDOSChineseTrad;
1870             break ;
1871         case wxFONTENCODING_CP1250 :
1872             enc = kCFStringEncodingWindowsLatin2;
1873             break ;
1874         case wxFONTENCODING_CP1251 :
1875             enc =kCFStringEncodingWindowsCyrillic ;
1876             break ;
1877         case wxFONTENCODING_CP1252 :
1878             enc =kCFStringEncodingWindowsLatin1 ;
1879             break ;
1880         case wxFONTENCODING_CP1253 :
1881             enc = kCFStringEncodingWindowsGreek;
1882             break ;
1883         case wxFONTENCODING_CP1254 :
1884             enc = kCFStringEncodingWindowsLatin5;
1885             break ;
1886         case wxFONTENCODING_CP1255 :
1887             enc =kCFStringEncodingWindowsHebrew ;
1888             break ;
1889         case wxFONTENCODING_CP1256 :
1890             enc =kCFStringEncodingWindowsArabic ;
1891             break ;
1892         case wxFONTENCODING_CP1257 :
1893             enc = kCFStringEncodingWindowsBalticRim;
1894             break ;
1895 //   This only really encodes to UTF7 (if that) evidently
1896 //        case wxFONTENCODING_UTF7 :
1897 //            enc = kCFStringEncodingNonLossyASCII ;
1898 //            break ;
1899         case wxFONTENCODING_UTF8 :
1900             enc = kCFStringEncodingUTF8 ;
1901             break ;
1902         case wxFONTENCODING_EUC_JP :
1903             enc = kCFStringEncodingEUC_JP;
1904             break ;
1905         case wxFONTENCODING_UTF16 :
1906             enc = kCFStringEncodingUnicode ;
1907             break ;
1908         case wxFONTENCODING_MACROMAN :
1909             enc = kCFStringEncodingMacRoman ;
1910             break ;
1911         case wxFONTENCODING_MACJAPANESE :
1912             enc = kCFStringEncodingMacJapanese ;
1913             break ;
1914         case wxFONTENCODING_MACCHINESETRAD :
1915             enc = kCFStringEncodingMacChineseTrad ;
1916             break ;
1917         case wxFONTENCODING_MACKOREAN :
1918             enc = kCFStringEncodingMacKorean ;
1919             break ;
1920         case wxFONTENCODING_MACARABIC :
1921             enc = kCFStringEncodingMacArabic ;
1922             break ;
1923         case wxFONTENCODING_MACHEBREW :
1924             enc = kCFStringEncodingMacHebrew ;
1925             break ;
1926         case wxFONTENCODING_MACGREEK :
1927             enc = kCFStringEncodingMacGreek ;
1928             break ;
1929         case wxFONTENCODING_MACCYRILLIC :
1930             enc = kCFStringEncodingMacCyrillic ;
1931             break ;
1932         case wxFONTENCODING_MACDEVANAGARI :
1933             enc = kCFStringEncodingMacDevanagari ;
1934             break ;
1935         case wxFONTENCODING_MACGURMUKHI :
1936             enc = kCFStringEncodingMacGurmukhi ;
1937             break ;
1938         case wxFONTENCODING_MACGUJARATI :
1939             enc = kCFStringEncodingMacGujarati ;
1940             break ;
1941         case wxFONTENCODING_MACORIYA :
1942             enc = kCFStringEncodingMacOriya ;
1943             break ;
1944         case wxFONTENCODING_MACBENGALI :
1945             enc = kCFStringEncodingMacBengali ;
1946             break ;
1947         case wxFONTENCODING_MACTAMIL :
1948             enc = kCFStringEncodingMacTamil ;
1949             break ;
1950         case wxFONTENCODING_MACTELUGU :
1951             enc = kCFStringEncodingMacTelugu ;
1952             break ;
1953         case wxFONTENCODING_MACKANNADA :
1954             enc = kCFStringEncodingMacKannada ;
1955             break ;
1956         case wxFONTENCODING_MACMALAJALAM :
1957             enc = kCFStringEncodingMacMalayalam ;
1958             break ;
1959         case wxFONTENCODING_MACSINHALESE :
1960             enc = kCFStringEncodingMacSinhalese ;
1961             break ;
1962         case wxFONTENCODING_MACBURMESE :
1963             enc = kCFStringEncodingMacBurmese ;
1964             break ;
1965         case wxFONTENCODING_MACKHMER :
1966             enc = kCFStringEncodingMacKhmer ;
1967             break ;
1968         case wxFONTENCODING_MACTHAI :
1969             enc = kCFStringEncodingMacThai ;
1970             break ;
1971         case wxFONTENCODING_MACLAOTIAN :
1972             enc = kCFStringEncodingMacLaotian ;
1973             break ;
1974         case wxFONTENCODING_MACGEORGIAN :
1975             enc = kCFStringEncodingMacGeorgian ;
1976             break ;
1977         case wxFONTENCODING_MACARMENIAN :
1978             enc = kCFStringEncodingMacArmenian ;
1979             break ;
1980         case wxFONTENCODING_MACCHINESESIMP :
1981             enc = kCFStringEncodingMacChineseSimp ;
1982             break ;
1983         case wxFONTENCODING_MACTIBETAN :
1984             enc = kCFStringEncodingMacTibetan ;
1985             break ;
1986         case wxFONTENCODING_MACMONGOLIAN :
1987             enc = kCFStringEncodingMacMongolian ;
1988             break ;
1989         case wxFONTENCODING_MACETHIOPIC :
1990             enc = kCFStringEncodingMacEthiopic ;
1991             break ;
1992         case wxFONTENCODING_MACCENTRALEUR :
1993             enc = kCFStringEncodingMacCentralEurRoman ;
1994             break ;
1995         case wxFONTENCODING_MACVIATNAMESE :
1996             enc = kCFStringEncodingMacVietnamese ;
1997             break ;
1998         case wxFONTENCODING_MACARABICEXT :
1999             enc = kCFStringEncodingMacExtArabic ;
2000             break ;
2001         case wxFONTENCODING_MACSYMBOL :
2002             enc = kCFStringEncodingMacSymbol ;
2003             break ;
2004         case wxFONTENCODING_MACDINGBATS :
2005             enc = kCFStringEncodingMacDingbats ;
2006             break ;
2007         case wxFONTENCODING_MACTURKISH :
2008             enc = kCFStringEncodingMacTurkish ;
2009             break ;
2010         case wxFONTENCODING_MACCROATIAN :
2011             enc = kCFStringEncodingMacCroatian ;
2012             break ;
2013         case wxFONTENCODING_MACICELANDIC :
2014             enc = kCFStringEncodingMacIcelandic ;
2015             break ;
2016         case wxFONTENCODING_MACROMANIAN :
2017             enc = kCFStringEncodingMacRomanian ;
2018             break ;
2019         case wxFONTENCODING_MACCELTIC :
2020             enc = kCFStringEncodingMacCeltic ;
2021             break ;
2022         case wxFONTENCODING_MACGAELIC :
2023             enc = kCFStringEncodingMacGaelic ;
2024             break ;
2025 //      case wxFONTENCODING_MACKEYBOARD :
2026 //          enc = kCFStringEncodingMacKeyboardGlyphs ;
2027 //          break ;
2028         default :
2029             // because gcc is picky
2030             break ;
2031     } ;
2032     return enc ;
2033 }
2034
2035 class wxMBConv_cocoa : public wxMBConv
2036 {
2037 public:
2038     wxMBConv_cocoa()
2039     {
2040         Init(CFStringGetSystemEncoding()) ;
2041     }
2042
2043 #if wxUSE_FONTMAP
2044     wxMBConv_cocoa(const wxChar* name)
2045     {
2046         Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2047     }
2048 #endif
2049
2050     wxMBConv_cocoa(wxFontEncoding encoding)
2051     {
2052         Init( wxCFStringEncFromFontEnc(encoding) );
2053     }
2054
2055     ~wxMBConv_cocoa()
2056     {
2057     }
2058
2059     void Init( CFStringEncoding encoding)
2060     {
2061         m_encoding = encoding ;
2062     }
2063
2064     size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2065     {
2066         wxASSERT(szUnConv);
2067
2068         CFStringRef theString = CFStringCreateWithBytes (
2069                                                 NULL, //the allocator
2070                                                 (const UInt8*)szUnConv,
2071                                                 strlen(szUnConv),
2072                                                 m_encoding,
2073                                                 false //no BOM/external representation
2074                                                 );
2075
2076         wxASSERT(theString);
2077
2078         size_t nOutLength = CFStringGetLength(theString);
2079
2080         if (szOut == NULL)
2081         {
2082             CFRelease(theString);
2083             return nOutLength;
2084         }
2085
2086         CFRange theRange = { 0, nOutSize };
2087
2088 #if SIZEOF_WCHAR_T == 4
2089         UniChar* szUniCharBuffer = new UniChar[nOutSize];
2090 #endif
2091
2092         CFStringGetCharacters(theString, theRange, szUniCharBuffer);
2093
2094         CFRelease(theString);
2095
2096         szUniCharBuffer[nOutLength] = '\0' ;
2097
2098 #if SIZEOF_WCHAR_T == 4
2099         wxMBConvUTF16 converter ;
2100         converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
2101         delete[] szUniCharBuffer;
2102 #endif
2103
2104         return nOutLength;
2105     }
2106
2107     size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2108     {
2109         wxASSERT(szUnConv);
2110
2111         size_t nRealOutSize;
2112         size_t nBufSize = wxWcslen(szUnConv);
2113         UniChar* szUniBuffer = (UniChar*) szUnConv;
2114
2115 #if SIZEOF_WCHAR_T == 4
2116         wxMBConvUTF16 converter ;
2117         nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
2118         szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
2119         converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
2120         nBufSize /= sizeof(UniChar);
2121 #endif
2122
2123         CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2124                                 NULL, //allocator
2125                                 szUniBuffer,
2126                                 nBufSize,
2127                                 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
2128                             );
2129
2130         wxASSERT(theString);
2131
2132         //Note that CER puts a BOM when converting to unicode
2133         //so we  check and use getchars instead in that case
2134         if (m_encoding == kCFStringEncodingUnicode)
2135         {
2136             if (szOut != NULL)
2137                 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
2138
2139             nRealOutSize = CFStringGetLength(theString) + 1;
2140         }
2141         else
2142         {
2143             CFStringGetBytes(
2144                 theString,
2145                 CFRangeMake(0, CFStringGetLength(theString)),
2146                 m_encoding,
2147                 0, //what to put in characters that can't be converted -
2148                     //0 tells CFString to return NULL if it meets such a character
2149                 false, //not an external representation
2150                 (UInt8*) szOut,
2151                 nOutSize,
2152                 (CFIndex*) &nRealOutSize
2153                         );
2154         }
2155
2156         CFRelease(theString);
2157
2158 #if SIZEOF_WCHAR_T == 4
2159         delete[] szUniBuffer;
2160 #endif
2161
2162         return  nRealOutSize - 1;
2163     }
2164
2165     bool IsOk() const
2166     {
2167         return m_encoding != kCFStringEncodingInvalidId &&
2168               CFStringIsEncodingAvailable(m_encoding);
2169     }
2170
2171 private:
2172     CFStringEncoding m_encoding ;
2173 };
2174
2175 #endif // defined(__WXCOCOA__)
2176
2177 // ============================================================================
2178 // Mac conversion classes
2179 // ============================================================================
2180
2181 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2182
2183 class wxMBConv_mac : public wxMBConv
2184 {
2185 public:
2186     wxMBConv_mac()
2187     {
2188         Init(CFStringGetSystemEncoding()) ;
2189     }
2190
2191 #if wxUSE_FONTMAP
2192     wxMBConv_mac(const wxChar* name)
2193     {
2194         Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2195     }
2196 #endif
2197
2198     wxMBConv_mac(wxFontEncoding encoding)
2199     {
2200         Init( wxMacGetSystemEncFromFontEnc(encoding) );
2201     }
2202
2203     ~wxMBConv_mac()
2204     {
2205         OSStatus status = noErr ;
2206         status = TECDisposeConverter(m_MB2WC_converter);
2207         status = TECDisposeConverter(m_WC2MB_converter);
2208     }
2209
2210
2211     void Init( TextEncodingBase encoding)
2212     {
2213         OSStatus status = noErr ;
2214         m_char_encoding = encoding ;
2215         m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2216
2217         status = TECCreateConverter(&m_MB2WC_converter,
2218                                     m_char_encoding,
2219                                     m_unicode_encoding);
2220         status = TECCreateConverter(&m_WC2MB_converter,
2221                                     m_unicode_encoding,
2222                                     m_char_encoding);
2223     }
2224
2225     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2226     {
2227         OSStatus status = noErr ;
2228         ByteCount byteOutLen ;
2229         ByteCount byteInLen = strlen(psz) ;
2230         wchar_t *tbuf = NULL ;
2231         UniChar* ubuf = NULL ;
2232         size_t res = 0 ;
2233
2234         if (buf == NULL)
2235         {
2236             //apple specs say at least 32
2237             n = wxMax( 32 , byteInLen ) ;
2238             tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2239         }
2240         ByteCount byteBufferLen = n * sizeof( UniChar ) ;
2241 #if SIZEOF_WCHAR_T == 4
2242         ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
2243 #else
2244         ubuf = (UniChar*) (buf ? buf : tbuf) ;
2245 #endif
2246         status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2247           (TextPtr) ubuf , byteBufferLen, &byteOutLen);
2248 #if SIZEOF_WCHAR_T == 4
2249         // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2250         // is not properly terminated we get random characters at the end
2251         ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
2252         wxMBConvUTF16 converter ;
2253         res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2254         free( ubuf ) ;
2255 #else
2256         res = byteOutLen / sizeof( UniChar ) ;
2257 #endif
2258         if ( buf == NULL )
2259              free(tbuf) ;
2260
2261         if ( buf  && res < n)
2262             buf[res] = 0;
2263
2264         return res ;
2265     }
2266
2267     size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2268     {
2269         OSStatus status = noErr ;
2270         ByteCount byteOutLen ;
2271         ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2272
2273         char *tbuf = NULL ;
2274
2275         if (buf == NULL)
2276         {
2277             //apple specs say at least 32
2278             n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2279             tbuf = (char*) malloc( n ) ;
2280         }
2281
2282         ByteCount byteBufferLen = n ;
2283         UniChar* ubuf = NULL ;
2284 #if SIZEOF_WCHAR_T == 4
2285         wxMBConvUTF16 converter ;
2286         size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2287         byteInLen = unicharlen ;
2288         ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2289         converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
2290 #else
2291         ubuf = (UniChar*) psz ;
2292 #endif
2293         status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2294             (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
2295 #if SIZEOF_WCHAR_T == 4
2296         free( ubuf ) ;
2297 #endif
2298         if ( buf == NULL )
2299             free(tbuf) ;
2300
2301         size_t res = byteOutLen ;
2302         if ( buf  && res < n)
2303         {
2304             buf[res] = 0;
2305
2306             //we need to double-trip to verify it didn't insert any ? in place
2307             //of bogus characters
2308             wxWCharBuffer wcBuf(n);
2309             size_t pszlen = wxWcslen(psz);
2310             if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
2311                         wxWcslen(wcBuf) != pszlen ||
2312                         memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2313             {
2314                 // we didn't obtain the same thing we started from, hence
2315                 // the conversion was lossy and we consider that it failed
2316                 return (size_t)-1;
2317             }
2318         }
2319
2320         return res ;
2321     }
2322
2323     bool IsOk() const
2324         { return m_MB2WC_converter !=  NULL && m_WC2MB_converter != NULL  ; }
2325
2326 private:
2327     TECObjectRef m_MB2WC_converter ;
2328     TECObjectRef m_WC2MB_converter ;
2329
2330     TextEncodingBase m_char_encoding ;
2331     TextEncodingBase m_unicode_encoding ;
2332 };
2333
2334 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2335
2336 // ============================================================================
2337 // wxEncodingConverter based conversion classes
2338 // ============================================================================
2339
2340 #if wxUSE_FONTMAP
2341
2342 class wxMBConv_wxwin : public wxMBConv
2343 {
2344 private:
2345     void Init()
2346     {
2347         m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2348                w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2349     }
2350
2351 public:
2352     // temporarily just use wxEncodingConverter stuff,
2353     // so that it works while a better implementation is built
2354     wxMBConv_wxwin(const wxChar* name)
2355     {
2356         if (name)
2357             m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2358         else
2359             m_enc = wxFONTENCODING_SYSTEM;
2360
2361         Init();
2362     }
2363
2364     wxMBConv_wxwin(wxFontEncoding enc)
2365     {
2366         m_enc = enc;
2367
2368         Init();
2369     }
2370
2371     size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
2372     {
2373         size_t inbuf = strlen(psz);
2374         if (buf)
2375         {
2376             if (!m2w.Convert(psz,buf))
2377                 return (size_t)-1;
2378         }
2379         return inbuf;
2380     }
2381
2382     size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
2383     {
2384         const size_t inbuf = wxWcslen(psz);
2385         if (buf)
2386         {
2387             if (!w2m.Convert(psz,buf))
2388                 return (size_t)-1;
2389         }
2390
2391         return inbuf;
2392     }
2393
2394     bool IsOk() const { return m_ok; }
2395
2396 public:
2397     wxFontEncoding m_enc;
2398     wxEncodingConverter m2w, w2m;
2399
2400     // were we initialized successfully?
2401     bool m_ok;
2402
2403     DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
2404 };
2405
2406 #endif // wxUSE_FONTMAP
2407
2408 // ============================================================================
2409 // wxCSConv implementation
2410 // ============================================================================
2411
2412 void wxCSConv::Init()
2413 {
2414     m_name = NULL;
2415     m_convReal =  NULL;
2416     m_deferred = true;
2417 }
2418
2419 wxCSConv::wxCSConv(const wxChar *charset)
2420 {
2421     Init();
2422
2423     if ( charset )
2424     {
2425         SetName(charset);
2426     }
2427
2428     m_encoding = wxFONTENCODING_SYSTEM;
2429 }
2430
2431 wxCSConv::wxCSConv(wxFontEncoding encoding)
2432 {
2433     if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
2434     {
2435         wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2436
2437         encoding = wxFONTENCODING_SYSTEM;
2438     }
2439
2440     Init();
2441
2442     m_encoding = encoding;
2443 }
2444
2445 wxCSConv::~wxCSConv()
2446 {
2447     Clear();
2448 }
2449
2450 wxCSConv::wxCSConv(const wxCSConv& conv)
2451         : wxMBConv()
2452 {
2453     Init();
2454
2455     SetName(conv.m_name);
2456     m_encoding = conv.m_encoding;
2457 }
2458
2459 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2460 {
2461     Clear();
2462
2463     SetName(conv.m_name);
2464     m_encoding = conv.m_encoding;
2465
2466     return *this;
2467 }
2468
2469 void wxCSConv::Clear()
2470 {
2471     free(m_name);
2472     delete m_convReal;
2473
2474     m_name = NULL;
2475     m_convReal = NULL;
2476 }
2477
2478 void wxCSConv::SetName(const wxChar *charset)
2479 {
2480     if (charset)
2481     {
2482         m_name = wxStrdup(charset);
2483         m_deferred = true;
2484     }
2485 }
2486
2487 wxMBConv *wxCSConv::DoCreate() const
2488 {
2489     // check for the special case of ASCII or ISO8859-1 charset: as we have
2490     // special knowledge of it anyhow, we don't need to create a special
2491     // conversion object
2492     if ( m_encoding == wxFONTENCODING_ISO8859_1 )
2493     {
2494         // don't convert at all
2495         return NULL;
2496     }
2497
2498     // we trust OS to do conversion better than we can so try external
2499     // conversion methods first
2500     //
2501     // the full order is:
2502     //      1. OS conversion (iconv() under Unix or Win32 API)
2503     //      2. hard coded conversions for UTF
2504     //      3. wxEncodingConverter as fall back
2505
2506     // step (1)
2507 #ifdef HAVE_ICONV
2508 #if !wxUSE_FONTMAP
2509     if ( m_name )
2510 #endif // !wxUSE_FONTMAP
2511     {
2512         wxString name(m_name);
2513
2514 #if wxUSE_FONTMAP
2515         if ( name.empty() )
2516             name = wxFontMapperBase::GetEncodingName(m_encoding);
2517 #endif // wxUSE_FONTMAP
2518
2519         wxMBConv_iconv *conv = new wxMBConv_iconv(name);
2520         if ( conv->IsOk() )
2521             return conv;
2522
2523         delete conv;
2524     }
2525 #endif // HAVE_ICONV
2526
2527 #ifdef wxHAVE_WIN32_MB2WC
2528     {
2529 #if wxUSE_FONTMAP
2530         wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2531                                       : new wxMBConv_win32(m_encoding);
2532         if ( conv->IsOk() )
2533             return conv;
2534
2535         delete conv;
2536 #else
2537         return NULL;
2538 #endif
2539     }
2540 #endif // wxHAVE_WIN32_MB2WC
2541 #if defined(__WXMAC__)
2542     {
2543         // leave UTF16 and UTF32 to the built-ins of wx
2544         if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
2545             ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
2546         {
2547
2548 #if wxUSE_FONTMAP
2549             wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2550                                         : new wxMBConv_mac(m_encoding);
2551 #else
2552             wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
2553 #endif
2554             if ( conv->IsOk() )
2555                  return conv;
2556
2557             delete conv;
2558         }
2559     }
2560 #endif
2561 #if defined(__WXCOCOA__)
2562     {
2563         if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2564         {
2565
2566 #if wxUSE_FONTMAP
2567             wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2568                                           : new wxMBConv_cocoa(m_encoding);
2569 #else
2570             wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
2571 #endif
2572             if ( conv->IsOk() )
2573                  return conv;
2574
2575             delete conv;
2576         }
2577     }
2578 #endif
2579     // step (2)
2580     wxFontEncoding enc = m_encoding;
2581 #if wxUSE_FONTMAP
2582     if ( enc == wxFONTENCODING_SYSTEM && m_name )
2583     {
2584         // use "false" to suppress interactive dialogs -- we can be called from
2585         // anywhere and popping up a dialog from here is the last thing we want to
2586         // do
2587         enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
2588     }
2589 #endif // wxUSE_FONTMAP
2590
2591     switch ( enc )
2592     {
2593         case wxFONTENCODING_UTF7:
2594              return new wxMBConvUTF7;
2595
2596         case wxFONTENCODING_UTF8:
2597              return new wxMBConvUTF8;
2598
2599         case wxFONTENCODING_UTF16BE:
2600              return new wxMBConvUTF16BE;
2601
2602         case wxFONTENCODING_UTF16LE:
2603              return new wxMBConvUTF16LE;
2604
2605         case wxFONTENCODING_UTF32BE:
2606              return new wxMBConvUTF32BE;
2607
2608         case wxFONTENCODING_UTF32LE:
2609              return new wxMBConvUTF32LE;
2610
2611         default:
2612              // nothing to do but put here to suppress gcc warnings
2613              ;
2614     }
2615
2616     // step (3)
2617 #if wxUSE_FONTMAP
2618     {
2619         wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2620                                       : new wxMBConv_wxwin(m_encoding);
2621         if ( conv->IsOk() )
2622             return conv;
2623
2624         delete conv;
2625     }
2626 #endif // wxUSE_FONTMAP
2627
2628     // NB: This is a hack to prevent deadlock. What could otherwise happen
2629     //     in Unicode build: wxConvLocal creation ends up being here
2630     //     because of some failure and logs the error. But wxLog will try to
2631     //     attach timestamp, for which it will need wxConvLocal (to convert
2632     //     time to char* and then wchar_t*), but that fails, tries to log
2633     //     error, but wxLog has a (already locked) critical section that
2634     //     guards static buffer.
2635     static bool alreadyLoggingError = false;
2636     if (!alreadyLoggingError)
2637     {
2638         alreadyLoggingError = true;
2639         wxLogError(_("Cannot convert from the charset '%s'!"),
2640                    m_name ? m_name
2641                       :
2642 #if wxUSE_FONTMAP
2643                          wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
2644 #else // !wxUSE_FONTMAP
2645                          wxString::Format(_("encoding %s"), m_encoding).c_str()
2646 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2647               );
2648         alreadyLoggingError = false;
2649     }
2650
2651     return NULL;
2652 }
2653
2654 void wxCSConv::CreateConvIfNeeded() const
2655 {
2656     if ( m_deferred )
2657     {
2658         wxCSConv *self = (wxCSConv *)this; // const_cast
2659
2660 #if wxUSE_INTL
2661         // if we don't have neither the name nor the encoding, use the default
2662         // encoding for this system
2663         if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2664         {
2665             self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
2666         }
2667 #endif // wxUSE_INTL
2668
2669         self->m_convReal = DoCreate();
2670         self->m_deferred = false;
2671     }
2672 }
2673
2674 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2675 {
2676     CreateConvIfNeeded();
2677
2678     if (m_convReal)
2679         return m_convReal->MB2WC(buf, psz, n);
2680
2681     // latin-1 (direct)
2682     size_t len = strlen(psz);
2683
2684     if (buf)
2685     {
2686         for (size_t c = 0; c <= len; c++)
2687             buf[c] = (unsigned char)(psz[c]);
2688     }
2689
2690     return len;
2691 }
2692
2693 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2694 {
2695     CreateConvIfNeeded();
2696
2697     if (m_convReal)
2698         return m_convReal->WC2MB(buf, psz, n);
2699
2700     // latin-1 (direct)
2701     const size_t len = wxWcslen(psz);
2702     if (buf)
2703     {
2704         for (size_t c = 0; c <= len; c++)
2705         {
2706             if (psz[c] > 0xFF)
2707                 return (size_t)-1;
2708             buf[c] = (char)psz[c];
2709         }
2710     }
2711     else
2712     {
2713         for (size_t c = 0; c <= len; c++)
2714         {
2715             if (psz[c] > 0xFF)
2716                 return (size_t)-1;
2717         }
2718     }
2719
2720     return len;
2721 }
2722
2723 // ----------------------------------------------------------------------------
2724 // globals
2725 // ----------------------------------------------------------------------------
2726
2727 #ifdef __WINDOWS__
2728     static wxMBConv_win32 wxConvLibcObj;
2729 #elif defined(__WXMAC__) && !defined(__MACH__)
2730     static wxMBConv_mac wxConvLibcObj ;
2731 #else
2732     static wxMBConvLibc wxConvLibcObj;
2733 #endif
2734
2735 static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
2736 static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
2737 static wxMBConvUTF7 wxConvUTF7Obj;
2738 static wxMBConvUTF8 wxConvUTF8Obj;
2739
2740 WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
2741 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
2742 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
2743 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
2744 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
2745 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
2746 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
2747 #ifdef __WXOSX__
2748                                     wxConvUTF8Obj;
2749 #else
2750                                     wxConvLibcObj;
2751 #endif
2752
2753
2754 #else // !wxUSE_WCHAR_T
2755
2756 // stand-ins in absence of wchar_t
2757 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
2758                                 wxConvISO8859_1,
2759                                 wxConvLocal,
2760                                 wxConvUTF8;
2761
2762 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
2763
2764