src/common/strconv.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        strconv.cpp
   3 // Purpose:     Unicode conversion classes
   4 // Author:      Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
   5 //              Ryan Norton, Fredrik Roubert (UTF7)
   6 // Modified by:
   7 // Created:     29/01/98
   8 // RCS-ID:      $Id$
   9 // Copyright:   (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
  10 //              (c) 2000-2003 Vadim Zeitlin
  11 //              (c) 2004 Ryan Norton, Fredrik Roubert
  12 // Licence:     wxWindows licence
  13 /////////////////////////////////////////////////////////////////////////////
  14
  15 // ============================================================================
  16 // declarations
  17 // ============================================================================
  18
  19 // ----------------------------------------------------------------------------
  20 // headers
  21 // ----------------------------------------------------------------------------
  22
  23 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
  24   #pragma implementation "strconv.h"
  25 #endif
  26
  27 // For compilers that support precompilation, includes "wx.h".
  28 #include "wx/wxprec.h"
  29
  30 #ifdef __BORLANDC__
  31   #pragma hdrstop
  32 #endif
  33
  34 #ifndef WX_PRECOMP
  35     #include "wx/intl.h"
  36     #include "wx/log.h"
  37 #endif // WX_PRECOMP
  38
  39 #include "wx/strconv.h"
  40
  41 #if wxUSE_WCHAR_T
  42
  43 #ifdef __WXMSW__
  44     #include "wx/msw/private.h"
  45 #endif
  46
  47 #ifdef __WINDOWS__
  48     #include "wx/msw/missing.h"
  49 #endif
  50
  51 #ifndef __WXWINCE__
  52 #include <errno.h>
  53 #endif
  54
  55 #include <ctype.h>
  56 #include <string.h>
  57 #include <stdlib.h>
  58
  59 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
  60     #define wxHAVE_WIN32_MB2WC
  61 #endif // __WIN32__ but !__WXMICROWIN__
  62
  63 // ----------------------------------------------------------------------------
  64 // headers
  65 // ----------------------------------------------------------------------------
  66
  67 #ifdef __SALFORDC__
  68     #include <clib.h>
  69 #endif
  70
  71 #ifdef HAVE_ICONV
  72     #include <iconv.h>
  73     #include "wx/thread.h"
  74 #endif
  75
  76 #include "wx/encconv.h"
  77 #include "wx/fontmap.h"
  78 #include "wx/utils.h"
  79
  80 #ifdef __WXMAC__
  81 #include <ATSUnicode.h>
  82 #include <TextCommon.h>
  83 #include <TextEncodingConverter.h>
  84
  85 #include  "wx/mac/private.h"  // includes mac headers
  86 #endif
  87 // ----------------------------------------------------------------------------
  88 // macros
  89 // ----------------------------------------------------------------------------
  90
  91 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
  92 #define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
  93
  94 #if SIZEOF_WCHAR_T == 4
  95     #define WC_NAME         "UCS4"
  96     #define WC_BSWAP         BSWAP_UCS4
  97     #ifdef WORDS_BIGENDIAN
  98       #define WC_NAME_BEST  "UCS-4BE"
  99     #else
 100       #define WC_NAME_BEST  "UCS-4LE"
 101     #endif
 102 #elif SIZEOF_WCHAR_T == 2
 103     #define WC_NAME         "UTF16"
 104     #define WC_BSWAP         BSWAP_UTF16
 105     #define WC_UTF16
 106     #ifdef WORDS_BIGENDIAN
 107       #define WC_NAME_BEST  "UTF-16BE"
 108     #else
 109       #define WC_NAME_BEST  "UTF-16LE"
 110     #endif
 111 #else // sizeof(wchar_t) != 2 nor 4
 112     // does this ever happen?
 113     #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
 114 #endif
 115
 116 // ============================================================================
 117 // implementation
 118 // ============================================================================
 119
 120 // ----------------------------------------------------------------------------
 121 // UTF-16 en/decoding to/from UCS-4
 122 // ----------------------------------------------------------------------------
 123
 124
 125 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
 126 {
 127     if (input<=0xffff)
 128     {
 129         if (output)
 130             *output = (wxUint16) input;
 131         return 1;
 132     }
 133     else if (input>=0x110000)
 134     {
 135         return (size_t)-1;
 136     }
 137     else
 138     {
 139         if (output)
 140         {
 141             *output++ = (wxUint16) ((input >> 10)+0xd7c0);
 142             *output = (wxUint16) ((input&0x3ff)+0xdc00);
 143         }
 144         return 2;
 145     }
 146 }
 147
 148 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
 149 {
 150     if ((*input<0xd800) || (*input>0xdfff))
 151     {
 152         output = *input;
 153         return 1;
 154     }
 155     else if ((input[1]<0xdc00) || (input[1]>0xdfff))
 156     {
 157         output = *input;
 158         return (size_t)-1;
 159     }
 160     else
 161     {
 162         output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
 163         return 2;
 164     }
 165 }
 166
 167
 168 // ----------------------------------------------------------------------------
 169 // wxMBConv
 170 // ----------------------------------------------------------------------------
 171
 172 wxMBConv::~wxMBConv()
 173 {
 174     // nothing to do here (necessary for Darwin linking probably)
 175 }
 176
 177 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
 178 {
 179     if ( psz )
 180     {
 181         // calculate the length of the buffer needed first
 182         size_t nLen = MB2WC(NULL, psz, 0);
 183         if ( nLen != (size_t)-1 )
 184         {
 185             // now do the actual conversion
 186             wxWCharBuffer buf(nLen);
 187             nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
 188             if ( nLen != (size_t)-1 )
 189             {
 190                 return buf;
 191             }
 192         }
 193     }
 194
 195     wxWCharBuffer buf((wchar_t *)NULL);
 196
 197     return buf;
 198 }
 199
 200 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
 201 {
 202     if ( pwz )
 203     {
 204         size_t nLen = WC2MB(NULL, pwz, 0);
 205         if ( nLen != (size_t)-1 )
 206         {
 207             wxCharBuffer buf(nLen+3);       // space for a wxUint32 trailing zero
 208             nLen = WC2MB(buf.data(), pwz, nLen + 4);
 209             if ( nLen != (size_t)-1 )
 210             {
 211                 return buf;
 212             }
 213         }
 214     }
 215
 216     wxCharBuffer buf((char *)NULL);
 217
 218     return buf;
 219 }
 220
 221 const wxWCharBuffer wxMBConv::cMB2WC(const char *szString, size_t nStringLen, size_t* pOutSize) const
 222 {
 223     wxASSERT(pOutSize != NULL);
 224
 225     const char* szEnd = szString + nStringLen + 1;
 226     const char* szPos = szString;
 227     const char* szStart = szPos;
 228
 229     size_t nActualLength = 0;
 230     size_t nCurrentSize = nStringLen; //try normal size first (should never resize?)
 231
 232     wxWCharBuffer theBuffer(nCurrentSize);
 233
 234     //Convert the string until the length() is reached, continuing the
 235     //loop every time a null character is reached
 236     while(szPos != szEnd)
 237     {
 238         wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
 239
 240         //Get the length of the current (sub)string
 241         size_t nLen = MB2WC(NULL, szPos, 0);
 242
 243         //Invalid conversion?
 244         if( nLen == (size_t)-1 )
 245         {
 246             *pOutSize = 0;
 247             theBuffer.data()[0u] = wxT('\0');
 248             return theBuffer;
 249         }
 250
 251
 252         //Increase the actual length (+1 for current null character)
 253         nActualLength += nLen + 1;
 254
 255         //if buffer too big, realloc the buffer
 256         if (nActualLength > (nCurrentSize+1))
 257         {
 258             wxWCharBuffer theNewBuffer(nCurrentSize << 1);
 259             memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize * sizeof(wchar_t));
 260             theBuffer = theNewBuffer;
 261             nCurrentSize <<= 1;
 262         }
 263
 264         //Convert the current (sub)string
 265         if ( MB2WC(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
 266         {
 267             *pOutSize = 0;
 268             theBuffer.data()[0u] = wxT('\0');
 269             return theBuffer;
 270         }
 271
 272         //Increment to next (sub)string
 273         //Note that we have to use strlen instead of nLen here
 274         //because XX2XX gives us the size of the output buffer,
 275         //which is not necessarily the length of the string
 276         szPos += strlen(szPos) + 1;
 277     }
 278
 279     //success - return actual length and the buffer
 280     *pOutSize = nActualLength;
 281     return theBuffer;
 282 }
 283
 284 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *szString, size_t nStringLen, size_t* pOutSize) const
 285 {
 286     wxASSERT(pOutSize != NULL);
 287
 288     const wchar_t* szEnd = szString + nStringLen + 1;
 289     const wchar_t* szPos = szString;
 290     const wchar_t* szStart = szPos;
 291
 292     size_t nActualLength = 0;
 293     size_t nCurrentSize = nStringLen << 2; //try * 4 first
 294
 295     wxCharBuffer theBuffer(nCurrentSize);
 296
 297     //Convert the string until the length() is reached, continuing the
 298     //loop every time a null character is reached
 299     while(szPos != szEnd)
 300     {
 301         wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
 302
 303         //Get the length of the current (sub)string
 304         size_t nLen = WC2MB(NULL, szPos, 0);
 305
 306         //Invalid conversion?
 307         if( nLen == (size_t)-1 )
 308         {
 309             *pOutSize = 0;
 310             theBuffer.data()[0u] = wxT('\0');
 311             return theBuffer;
 312         }
 313
 314         //Increase the actual length (+1 for current null character)
 315         nActualLength += nLen + 1;
 316
 317         //if buffer too big, realloc the buffer
 318         if (nActualLength > (nCurrentSize+1))
 319         {
 320             wxCharBuffer theNewBuffer(nCurrentSize << 1);
 321             memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize);
 322             theBuffer = theNewBuffer;
 323             nCurrentSize <<= 1;
 324         }
 325
 326         //Convert the current (sub)string
 327         if(WC2MB(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
 328         {
 329             *pOutSize = 0;
 330             theBuffer.data()[0u] = wxT('\0');
 331             return theBuffer;
 332         }
 333
 334         //Increment to next (sub)string
 335         //Note that we have to use wxWcslen instead of nLen here
 336         //because XX2XX gives us the size of the output buffer,
 337         //which is not necessarily the length of the string
 338         szPos += wxWcslen(szPos) + 1;
 339     }
 340
 341     //success - return actual length and the buffer
 342     *pOutSize = nActualLength;
 343     return theBuffer;
 344 }
 345
 346 // ----------------------------------------------------------------------------
 347 // wxMBConvLibc
 348 // ----------------------------------------------------------------------------
 349
 350 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 351 {
 352     return wxMB2WC(buf, psz, n);
 353 }
 354
 355 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 356 {
 357     return wxWC2MB(buf, psz, n);
 358 }
 359
 360 #ifdef __UNIX__
 361
 362 // ----------------------------------------------------------------------------
 363 // wxConvBrokenFileNames
 364 // ----------------------------------------------------------------------------
 365
 366 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar *charset)
 367 {
 368     if ( !charset || wxStricmp(charset, _T("UTF-8")) == 0
 369                   || wxStricmp(charset, _T("UTF8")) == 0  )
 370         m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
 371     else
 372         m_conv = new wxCSConv(charset);
 373 }
 374
 375 size_t
 376 wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf,
 377                              const char *psz,
 378                              size_t outputSize) const
 379 {
 380     return m_conv->MB2WC( outputBuf, psz, outputSize );
 381 }
 382
 383 size_t
 384 wxConvBrokenFileNames::WC2MB(char *outputBuf,
 385                              const wchar_t *psz,
 386                              size_t outputSize) const
 387 {
 388     return m_conv->WC2MB( outputBuf, psz, outputSize );
 389 }
 390
 391 #endif
 392
 393 // ----------------------------------------------------------------------------
 394 // UTF-7
 395 // ----------------------------------------------------------------------------
 396
 397 // Implementation (C) 2004 Fredrik Roubert
 398
 399 //
 400 // BASE64 decoding table
 401 //
 402 static const unsigned char utf7unb64[] =
 403 {
 404     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 405     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 406     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 407     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 408     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 409     0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
 410     0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
 411     0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 412     0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
 413     0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
 414     0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
 415     0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
 416     0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
 417     0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
 418     0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
 419     0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
 420     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 421     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 422     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 423     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 424     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 425     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 426     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 427     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 428     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 429     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 430     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 431     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 432     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 433     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 434     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 435     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 436 };
 437
 438 size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 439 {
 440     size_t len = 0;
 441
 442     while (*psz && ((!buf) || (len < n)))
 443     {
 444         unsigned char cc = *psz++;
 445         if (cc != '+')
 446         {
 447             // plain ASCII char
 448             if (buf)
 449                 *buf++ = cc;
 450             len++;
 451         }
 452         else if (*psz == '-')
 453         {
 454             // encoded plus sign
 455             if (buf)
 456                 *buf++ = cc;
 457             len++;
 458             psz++;
 459         }
 460         else
 461         {
 462             // BASE64 encoded string
 463             bool lsb;
 464             unsigned char c;
 465             unsigned int d, l;
 466             for (lsb = false, d = 0, l = 0;
 467                 (cc = utf7unb64[(unsigned char)*psz]) != 0xff; psz++)
 468             {
 469                 d <<= 6;
 470                 d += cc;
 471                 for (l += 6; l >= 8; lsb = !lsb)
 472                 {
 473                     c = (unsigned char)((d >> (l -= 8)) % 256);
 474                     if (lsb)
 475                     {
 476                         if (buf)
 477                             *buf++ |= c;
 478                         len ++;
 479                     }
 480                     else
 481                         if (buf)
 482                             *buf = (wchar_t)(c << 8);
 483                 }
 484             }
 485             if (*psz == '-')
 486                 psz++;
 487         }
 488     }
 489     if (buf && (len < n))
 490         *buf = 0;
 491     return len;
 492 }
 493
 494 //
 495 // BASE64 encoding table
 496 //
 497 static const unsigned char utf7enb64[] =
 498 {
 499     'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
 500     'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
 501     'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
 502     'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
 503     'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
 504     'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
 505     'w', 'x', 'y', 'z', '0', '1', '2', '3',
 506     '4', '5', '6', '7', '8', '9', '+', '/'
 507 };
 508
 509 //
 510 // UTF-7 encoding table
 511 //
 512 // 0 - Set D (directly encoded characters)
 513 // 1 - Set O (optional direct characters)
 514 // 2 - whitespace characters (optional)
 515 // 3 - special characters
 516 //
 517 static const unsigned char utf7encode[128] =
 518 {
 519     3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
 520     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 521     2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
 522     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
 523     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 524     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
 525     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 526     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
 527 };
 528
 529 size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 530 {
 531
 532
 533     size_t len = 0;
 534
 535     while (*psz && ((!buf) || (len < n)))
 536     {
 537         wchar_t cc = *psz++;
 538         if (cc < 0x80 && utf7encode[cc] < 1)
 539         {
 540             // plain ASCII char
 541             if (buf)
 542                 *buf++ = (char)cc;
 543             len++;
 544         }
 545 #ifndef WC_UTF16
 546         else if (((wxUint32)cc) > 0xffff)
 547         {
 548             // no surrogate pair generation (yet?)
 549             return (size_t)-1;
 550         }
 551 #endif
 552         else
 553         {
 554             if (buf)
 555                 *buf++ = '+';
 556             len++;
 557             if (cc != '+')
 558             {
 559                 // BASE64 encode string
 560                 unsigned int lsb, d, l;
 561                 for (d = 0, l = 0;; psz++)
 562                 {
 563                     for (lsb = 0; lsb < 2; lsb ++)
 564                     {
 565                         d <<= 8;
 566                         d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
 567
 568                         for (l += 8; l >= 6; )
 569                         {
 570                             l -= 6;
 571                             if (buf)
 572                                 *buf++ = utf7enb64[(d >> l) % 64];
 573                             len++;
 574                         }
 575                     }
 576                     cc = *psz;
 577                     if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
 578                         break;
 579                 }
 580                 if (l != 0)
 581                 {
 582                     if (buf)
 583                         *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
 584                     len++;
 585                 }
 586             }
 587             if (buf)
 588                 *buf++ = '-';
 589             len++;
 590         }
 591     }
 592     if (buf && (len < n))
 593         *buf = 0;
 594     return len;
 595 }
 596
 597 // ----------------------------------------------------------------------------
 598 // UTF-8
 599 // ----------------------------------------------------------------------------
 600
 601 static wxUint32 utf8_max[]=
 602     { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
 603
 604 // boundaries of the private use area we use to (temporarily) remap invalid
 605 // characters invalid in a UTF-8 encoded string
 606 const wxUint32 wxUnicodePUA = 0x100000;
 607 const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
 608
 609 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 610 {
 611     size_t len = 0;
 612
 613     while (*psz && ((!buf) || (len < n)))
 614     {
 615         const char *opsz = psz;
 616         bool invalid = false;
 617         unsigned char cc = *psz++, fc = cc;
 618         unsigned cnt;
 619         for (cnt = 0; fc & 0x80; cnt++)
 620             fc <<= 1;
 621         if (!cnt)
 622         {
 623             // plain ASCII char
 624             if (buf)
 625                 *buf++ = cc;
 626             len++;
 627
 628             // escape the escape character for octal escapes
 629             if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
 630                     && cc == '\\' && (!buf || len < n))
 631             {
 632                 if (buf)
 633                     *buf++ = cc;
 634                 len++;
 635             }
 636         }
 637         else
 638         {
 639             cnt--;
 640             if (!cnt)
 641             {
 642                 // invalid UTF-8 sequence
 643                 invalid = true;
 644             }
 645             else
 646             {
 647                 unsigned ocnt = cnt - 1;
 648                 wxUint32 res = cc & (0x3f >> cnt);
 649                 while (cnt--)
 650                 {
 651                     cc = *psz;
 652                     if ((cc & 0xC0) != 0x80)
 653                     {
 654                         // invalid UTF-8 sequence
 655                         invalid = true;
 656                         break;
 657                     }
 658                     psz++;
 659                     res = (res << 6) | (cc & 0x3f);
 660                 }
 661                 if (invalid || res <= utf8_max[ocnt])
 662                 {
 663                     // illegal UTF-8 encoding
 664                     invalid = true;
 665                 }
 666                 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
 667                         res >= wxUnicodePUA && res < wxUnicodePUAEnd)
 668                 {
 669                     // if one of our PUA characters turns up externally
 670                     // it must also be treated as an illegal sequence
 671                     // (a bit like you have to escape an escape character)
 672                     invalid = true;
 673                 }
 674                 else
 675                 {
 676 #ifdef WC_UTF16
 677                     // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 678                     size_t pa = encode_utf16(res, (wxUint16 *)buf);
 679                     if (pa == (size_t)-1)
 680                     {
 681                         invalid = true;
 682                     }
 683                     else
 684                     {
 685                         if (buf)
 686                             buf += pa;
 687                         len += pa;
 688                     }
 689 #else // !WC_UTF16
 690                     if (buf)
 691                         *buf++ = res;
 692                     len++;
 693 #endif // WC_UTF16/!WC_UTF16
 694                 }
 695             }
 696             if (invalid)
 697             {
 698                 if (m_options & MAP_INVALID_UTF8_TO_PUA)
 699                 {
 700                     while (opsz < psz && (!buf || len < n))
 701                     {
 702 #ifdef WC_UTF16
 703                         // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 704                         size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
 705                         wxASSERT(pa != (size_t)-1);
 706                         if (buf)
 707                             buf += pa;
 708                         opsz++;
 709                         len += pa;
 710 #else
 711                         if (buf)
 712                             *buf++ = wxUnicodePUA + (unsigned char)*opsz;
 713                         opsz++;
 714                         len++;
 715 #endif
 716                     }
 717                 }
 718                 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
 719                 {
 720                     while (opsz < psz && (!buf || len < n))
 721                     {
 722                         if ( buf && len + 3 < n )
 723                         {
 724                             unsigned char n = *opsz;
 725                             *buf++ = L'\\';
 726                             *buf++ = (wchar_t)( L'0' + n / 0100 );
 727                             *buf++ = (wchar_t)( L'0' + (n % 0100) / 010 );
 728                             *buf++ = (wchar_t)( L'0' + n % 010 );
 729                         }
 730                         opsz++;
 731                         len += 4;
 732                     }
 733                 }
 734                 else // MAP_INVALID_UTF8_NOT
 735                 {
 736                     return (size_t)-1;
 737                 }
 738             }
 739         }
 740     }
 741     if (buf && (len < n))
 742         *buf = 0;
 743     return len;
 744 }
 745
 746 static inline bool isoctal(wchar_t wch)
 747 {
 748     return L'0' <= wch && wch <= L'7';
 749 }
 750
 751 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 752 {
 753     size_t len = 0;
 754
 755     while (*psz && ((!buf) || (len < n)))
 756     {
 757         wxUint32 cc;
 758 #ifdef WC_UTF16
 759         // cast is ok for WC_UTF16
 760         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
 761         psz += (pa == (size_t)-1) ? 1 : pa;
 762 #else
 763         cc=(*psz++) & 0x7fffffff;
 764 #endif
 765
 766         if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
 767                 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
 768         {
 769             if (buf)
 770                 *buf++ = (char)(cc - wxUnicodePUA);
 771             len++;
 772         }
 773         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
 774                     && cc == L'\\' && psz[0] == L'\\' )
 775         {
 776             if (buf)
 777                 *buf++ = (char)cc;
 778             psz++;
 779             len++;
 780         }
 781         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
 782                     cc == L'\\' &&
 783                         isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
 784         {
 785             if (buf)
 786             {
 787                 *buf++ = (char) ((psz[0] - L'0')*0100 +
 788                                  (psz[1] - L'0')*010 +
 789                                  (psz[2] - L'0'));
 790             }
 791
 792             psz += 3;
 793             len++;
 794         }
 795         else
 796         {
 797             unsigned cnt;
 798             for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
 799             if (!cnt)
 800             {
 801                 // plain ASCII char
 802                 if (buf)
 803                     *buf++ = (char) cc;
 804                 len++;
 805             }
 806
 807             else
 808             {
 809                 len += cnt + 1;
 810                 if (buf)
 811                 {
 812                     *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
 813                     while (cnt--)
 814                         *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
 815                 }
 816             }
 817         }
 818     }
 819
 820     if (buf && (len<n))
 821         *buf = 0;
 822
 823     return len;
 824 }
 825
 826 // ----------------------------------------------------------------------------
 827 // UTF-16
 828 // ----------------------------------------------------------------------------
 829
 830 #ifdef WORDS_BIGENDIAN
 831     #define wxMBConvUTF16straight wxMBConvUTF16BE
 832     #define wxMBConvUTF16swap     wxMBConvUTF16LE
 833 #else
 834     #define wxMBConvUTF16swap     wxMBConvUTF16BE
 835     #define wxMBConvUTF16straight wxMBConvUTF16LE
 836 #endif
 837
 838
 839 #ifdef WC_UTF16
 840
 841 // copy 16bit MB to 16bit String
 842 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 843 {
 844     size_t len=0;
 845
 846     while (*(wxUint16*)psz && (!buf || len < n))
 847     {
 848         if (buf)
 849             *buf++ = *(wxUint16*)psz;
 850         len++;
 851
 852         psz += sizeof(wxUint16);
 853     }
 854     if (buf && len<n)   *buf=0;
 855
 856     return len;
 857 }
 858
 859
 860 // copy 16bit String to 16bit MB
 861 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 862 {
 863     size_t len=0;
 864
 865     while (*psz && (!buf || len < n))
 866     {
 867         if (buf)
 868         {
 869             *(wxUint16*)buf = *psz;
 870             buf += sizeof(wxUint16);
 871         }
 872         len += sizeof(wxUint16);
 873         psz++;
 874     }
 875     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 876
 877     return len;
 878 }
 879
 880
 881 // swap 16bit MB to 16bit String
 882 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 883 {
 884     size_t len=0;
 885
 886     while (*(wxUint16*)psz && (!buf || len < n))
 887     {
 888         if (buf)
 889         {
 890             ((char *)buf)[0] = psz[1];
 891             ((char *)buf)[1] = psz[0];
 892             buf++;
 893         }
 894         len++;
 895         psz += sizeof(wxUint16);
 896     }
 897     if (buf && len<n)   *buf=0;
 898
 899     return len;
 900 }
 901
 902
 903 // swap 16bit MB to 16bit String
 904 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 905 {
 906     size_t len=0;
 907
 908     while (*psz && (!buf || len < n))
 909     {
 910         if (buf)
 911         {
 912             *buf++ = ((char*)psz)[1];
 913             *buf++ = ((char*)psz)[0];
 914         }
 915         len += sizeof(wxUint16);
 916         psz++;
 917     }
 918     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 919
 920     return len;
 921 }
 922
 923
 924 #else // WC_UTF16
 925
 926
 927 // copy 16bit MB to 32bit String
 928 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 929 {
 930     size_t len=0;
 931
 932     while (*(wxUint16*)psz && (!buf || len < n))
 933     {
 934         wxUint32 cc;
 935         size_t pa=decode_utf16((wxUint16*)psz, cc);
 936         if (pa == (size_t)-1)
 937             return pa;
 938
 939         if (buf)
 940             *buf++ = cc;
 941         len++;
 942         psz += pa * sizeof(wxUint16);
 943     }
 944     if (buf && len<n)   *buf=0;
 945
 946     return len;
 947 }
 948
 949
 950 // copy 32bit String to 16bit MB
 951 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 952 {
 953     size_t len=0;
 954
 955     while (*psz && (!buf || len < n))
 956     {
 957         wxUint16 cc[2];
 958         size_t pa=encode_utf16(*psz, cc);
 959
 960         if (pa == (size_t)-1)
 961             return pa;
 962
 963         if (buf)
 964         {
 965             *(wxUint16*)buf = cc[0];
 966             buf += sizeof(wxUint16);
 967             if (pa > 1)
 968             {
 969                 *(wxUint16*)buf = cc[1];
 970                 buf += sizeof(wxUint16);
 971             }
 972         }
 973
 974         len += pa*sizeof(wxUint16);
 975         psz++;
 976     }
 977     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 978
 979     return len;
 980 }
 981
 982
 983 // swap 16bit MB to 32bit String
 984 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 985 {
 986     size_t len=0;
 987
 988     while (*(wxUint16*)psz && (!buf || len < n))
 989     {
 990         wxUint32 cc;
 991         char tmp[4];
 992         tmp[0]=psz[1];  tmp[1]=psz[0];
 993         tmp[2]=psz[3];  tmp[3]=psz[2];
 994
 995         size_t pa=decode_utf16((wxUint16*)tmp, cc);
 996         if (pa == (size_t)-1)
 997             return pa;
 998
 999         if (buf)
1000             *buf++ = cc;
1001
1002         len++;
1003         psz += pa * sizeof(wxUint16);
1004     }
1005     if (buf && len<n)   *buf=0;
1006
1007     return len;
1008 }
1009
1010
1011 // swap 32bit String to 16bit MB
1012 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1013 {
1014     size_t len=0;
1015
1016     while (*psz && (!buf || len < n))
1017     {
1018         wxUint16 cc[2];
1019         size_t pa=encode_utf16(*psz, cc);
1020
1021         if (pa == (size_t)-1)
1022             return pa;
1023
1024         if (buf)
1025         {
1026             *buf++ = ((char*)cc)[1];
1027             *buf++ = ((char*)cc)[0];
1028             if (pa > 1)
1029             {
1030                 *buf++ = ((char*)cc)[3];
1031                 *buf++ = ((char*)cc)[2];
1032             }
1033         }
1034
1035         len += pa*sizeof(wxUint16);
1036         psz++;
1037     }
1038     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
1039
1040     return len;
1041 }
1042
1043 #endif // WC_UTF16
1044
1045
1046 // ----------------------------------------------------------------------------
1047 // UTF-32
1048 // ----------------------------------------------------------------------------
1049
1050 #ifdef WORDS_BIGENDIAN
1051 #define wxMBConvUTF32straight  wxMBConvUTF32BE
1052 #define wxMBConvUTF32swap      wxMBConvUTF32LE
1053 #else
1054 #define wxMBConvUTF32swap      wxMBConvUTF32BE
1055 #define wxMBConvUTF32straight  wxMBConvUTF32LE
1056 #endif
1057
1058
1059 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1060 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1061
1062
1063 #ifdef WC_UTF16
1064
1065 // copy 32bit MB to 16bit String
1066 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1067 {
1068     size_t len=0;
1069
1070     while (*(wxUint32*)psz && (!buf || len < n))
1071     {
1072         wxUint16 cc[2];
1073
1074         size_t pa=encode_utf16(*(wxUint32*)psz, cc);
1075         if (pa == (size_t)-1)
1076             return pa;
1077
1078         if (buf)
1079         {
1080             *buf++ = cc[0];
1081             if (pa > 1)
1082                 *buf++ = cc[1];
1083         }
1084         len += pa;
1085         psz += sizeof(wxUint32);
1086     }
1087     if (buf && len<n)   *buf=0;
1088
1089     return len;
1090 }
1091
1092
1093 // copy 16bit String to 32bit MB
1094 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1095 {
1096     size_t len=0;
1097
1098     while (*psz && (!buf || len < n))
1099     {
1100         wxUint32 cc;
1101
1102         // cast is ok for WC_UTF16
1103         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
1104         if (pa == (size_t)-1)
1105             return pa;
1106
1107         if (buf)
1108         {
1109             *(wxUint32*)buf = cc;
1110             buf += sizeof(wxUint32);
1111         }
1112         len += sizeof(wxUint32);
1113         psz += pa;
1114     }
1115
1116     if (buf && len<=n-sizeof(wxUint32))
1117         *(wxUint32*)buf=0;
1118
1119     return len;
1120 }
1121
1122
1123
1124 // swap 32bit MB to 16bit String
1125 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1126 {
1127     size_t len=0;
1128
1129     while (*(wxUint32*)psz && (!buf || len < n))
1130     {
1131         char tmp[4];
1132         tmp[0] = psz[3];   tmp[1] = psz[2];
1133         tmp[2] = psz[1];   tmp[3] = psz[0];
1134
1135
1136         wxUint16 cc[2];
1137
1138         size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
1139         if (pa == (size_t)-1)
1140             return pa;
1141
1142         if (buf)
1143         {
1144             *buf++ = cc[0];
1145             if (pa > 1)
1146                 *buf++ = cc[1];
1147         }
1148         len += pa;
1149         psz += sizeof(wxUint32);
1150     }
1151
1152     if (buf && len<n)
1153         *buf=0;
1154
1155     return len;
1156 }
1157
1158
1159 // swap 16bit String to 32bit MB
1160 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1161 {
1162     size_t len=0;
1163
1164     while (*psz && (!buf || len < n))
1165     {
1166         char cc[4];
1167
1168         // cast is ok for WC_UTF16
1169         size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
1170         if (pa == (size_t)-1)
1171             return pa;
1172
1173         if (buf)
1174         {
1175             *buf++ = cc[3];
1176             *buf++ = cc[2];
1177             *buf++ = cc[1];
1178             *buf++ = cc[0];
1179         }
1180         len += sizeof(wxUint32);
1181         psz += pa;
1182     }
1183
1184     if (buf && len<=n-sizeof(wxUint32))
1185         *(wxUint32*)buf=0;
1186
1187     return len;
1188 }
1189
1190 #else // WC_UTF16
1191
1192
1193 // copy 32bit MB to 32bit String
1194 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1195 {
1196     size_t len=0;
1197
1198     while (*(wxUint32*)psz && (!buf || len < n))
1199     {
1200         if (buf)
1201             *buf++ = *(wxUint32*)psz;
1202         len++;
1203         psz += sizeof(wxUint32);
1204     }
1205
1206     if (buf && len<n)
1207         *buf=0;
1208
1209     return len;
1210 }
1211
1212
1213 // copy 32bit String to 32bit MB
1214 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1215 {
1216     size_t len=0;
1217
1218     while (*psz && (!buf || len < n))
1219     {
1220         if (buf)
1221         {
1222             *(wxUint32*)buf = *psz;
1223             buf += sizeof(wxUint32);
1224         }
1225
1226         len += sizeof(wxUint32);
1227         psz++;
1228     }
1229
1230     if (buf && len<=n-sizeof(wxUint32))
1231         *(wxUint32*)buf=0;
1232
1233     return len;
1234 }
1235
1236
1237 // swap 32bit MB to 32bit String
1238 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1239 {
1240     size_t len=0;
1241
1242     while (*(wxUint32*)psz && (!buf || len < n))
1243     {
1244         if (buf)
1245         {
1246             ((char *)buf)[0] = psz[3];
1247             ((char *)buf)[1] = psz[2];
1248             ((char *)buf)[2] = psz[1];
1249             ((char *)buf)[3] = psz[0];
1250             buf++;
1251         }
1252         len++;
1253         psz += sizeof(wxUint32);
1254     }
1255
1256     if (buf && len<n)
1257         *buf=0;
1258
1259     return len;
1260 }
1261
1262
1263 // swap 32bit String to 32bit MB
1264 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1265 {
1266     size_t len=0;
1267
1268     while (*psz && (!buf || len < n))
1269     {
1270         if (buf)
1271         {
1272             *buf++ = ((char *)psz)[3];
1273             *buf++ = ((char *)psz)[2];
1274             *buf++ = ((char *)psz)[1];
1275             *buf++ = ((char *)psz)[0];
1276         }
1277         len += sizeof(wxUint32);
1278         psz++;
1279     }
1280
1281     if (buf && len<=n-sizeof(wxUint32))
1282         *(wxUint32*)buf=0;
1283
1284     return len;
1285 }
1286
1287
1288 #endif // WC_UTF16
1289
1290
1291 // ============================================================================
1292 // The classes doing conversion using the iconv_xxx() functions
1293 // ============================================================================
1294
1295 #ifdef HAVE_ICONV
1296
1297 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1298 //     E2BIG if output buffer is _exactly_ as big as needed. Such case is
1299 //     (unless there's yet another bug in glibc) the only case when iconv()
1300 //     returns with (size_t)-1 (which means error) and says there are 0 bytes
1301 //     left in the input buffer -- when _real_ error occurs,
1302 //     bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1303 //     iconv() failure.
1304 //     [This bug does not appear in glibc 2.2.]
1305 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1306 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1307                                      (errno != E2BIG || bufLeft != 0))
1308 #else
1309 #define ICONV_FAILED(cres, bufLeft)  (cres == (size_t)-1)
1310 #endif
1311
1312 #define ICONV_CHAR_CAST(x)  ((ICONV_CONST char **)(x))
1313
1314 // ----------------------------------------------------------------------------
1315 // wxMBConv_iconv: encapsulates an iconv character set
1316 // ----------------------------------------------------------------------------
1317
1318 class wxMBConv_iconv : public wxMBConv
1319 {
1320 public:
1321     wxMBConv_iconv(const wxChar *name);
1322     virtual ~wxMBConv_iconv();
1323
1324     virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1325     virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
1326
1327     bool IsOk() const
1328         { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
1329
1330 protected:
1331     // the iconv handlers used to translate from multibyte to wide char and in
1332     // the other direction
1333     iconv_t m2w,
1334             w2m;
1335 #if wxUSE_THREADS
1336     // guards access to m2w and w2m objects
1337     wxMutex m_iconvMutex;
1338 #endif
1339
1340 private:
1341     // the name (for iconv_open()) of a wide char charset -- if none is
1342     // available on this machine, it will remain NULL
1343     static const char *ms_wcCharsetName;
1344
1345     // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1346     // different endian-ness than the native one
1347     static bool ms_wcNeedsSwap;
1348 };
1349
1350 const char *wxMBConv_iconv::ms_wcCharsetName = NULL;
1351 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
1352
1353 wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
1354 {
1355     // Do it the hard way
1356     char cname[100];
1357     for (size_t i = 0; i < wxStrlen(name)+1; i++)
1358         cname[i] = (char) name[i];
1359
1360     // check for charset that represents wchar_t:
1361     if (ms_wcCharsetName == NULL)
1362     {
1363         ms_wcNeedsSwap = false;
1364
1365         // try charset with explicit bytesex info (e.g. "UCS-4LE"):
1366         ms_wcCharsetName = WC_NAME_BEST;
1367         m2w = iconv_open(ms_wcCharsetName, cname);
1368
1369         if (m2w == (iconv_t)-1)
1370         {
1371             // try charset w/o bytesex info (e.g. "UCS4")
1372             // and check for bytesex ourselves:
1373             ms_wcCharsetName = WC_NAME;
1374             m2w = iconv_open(ms_wcCharsetName, cname);
1375
1376             // last bet, try if it knows WCHAR_T pseudo-charset
1377             if (m2w == (iconv_t)-1)
1378             {
1379                 ms_wcCharsetName = "WCHAR_T";
1380                 m2w = iconv_open(ms_wcCharsetName, cname);
1381             }
1382
1383             if (m2w != (iconv_t)-1)
1384             {
1385                 char    buf[2], *bufPtr;
1386                 wchar_t wbuf[2], *wbufPtr;
1387                 size_t  insz, outsz;
1388                 size_t  res;
1389
1390                 buf[0] = 'A';
1391                 buf[1] = 0;
1392                 wbuf[0] = 0;
1393                 insz = 2;
1394                 outsz = SIZEOF_WCHAR_T * 2;
1395                 wbufPtr = wbuf;
1396                 bufPtr = buf;
1397
1398                 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1399                             (char**)&wbufPtr, &outsz);
1400
1401                 if (ICONV_FAILED(res, insz))
1402                 {
1403                     ms_wcCharsetName = NULL;
1404                     wxLogLastError(wxT("iconv"));
1405                     wxLogError(_("Conversion to charset '%s' doesn't work."), name);
1406                 }
1407                 else
1408                 {
1409                     ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1410                 }
1411             }
1412             else
1413             {
1414                 ms_wcCharsetName = NULL;
1415
1416                 // VS: we must not output an error here, since wxWidgets will safely
1417                 //     fall back to using wxEncodingConverter.
1418                 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
1419                 //wxLogError(
1420             }
1421         }
1422         wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
1423     }
1424     else // we already have ms_wcCharsetName
1425     {
1426         m2w = iconv_open(ms_wcCharsetName, cname);
1427     }
1428
1429     // NB: don't ever pass NULL to iconv_open(), it may crash!
1430     if ( ms_wcCharsetName )
1431     {
1432         w2m = iconv_open( cname, ms_wcCharsetName);
1433     }
1434     else
1435     {
1436         w2m = (iconv_t)-1;
1437     }
1438 }
1439
1440 wxMBConv_iconv::~wxMBConv_iconv()
1441 {
1442     if ( m2w != (iconv_t)-1 )
1443         iconv_close(m2w);
1444     if ( w2m != (iconv_t)-1 )
1445         iconv_close(w2m);
1446 }
1447
1448 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1449 {
1450 #if wxUSE_THREADS
1451     // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1452     //     Unfortunately there is a couple of global wxCSConv objects such as
1453     //     wxConvLocal that are used all over wx code, so we have to make sure
1454     //     the handle is used by at most one thread at the time. Otherwise
1455     //     only a few wx classes would be safe to use from non-main threads
1456     //     as MB<->WC conversion would fail "randomly".
1457     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1458 #endif
1459
1460     size_t inbuf = strlen(psz);
1461     size_t outbuf = n * SIZEOF_WCHAR_T;
1462     size_t res, cres;
1463     // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1464     wchar_t *bufPtr = buf;
1465     const char *pszPtr = psz;
1466
1467     if (buf)
1468     {
1469         // have destination buffer, convert there
1470         cres = iconv(m2w,
1471                      ICONV_CHAR_CAST(&pszPtr), &inbuf,
1472                      (char**)&bufPtr, &outbuf);
1473         res = n - (outbuf / SIZEOF_WCHAR_T);
1474
1475         if (ms_wcNeedsSwap)
1476         {
1477             // convert to native endianness
1478             WC_BSWAP(buf /* _not_ bufPtr */, res)
1479         }
1480
1481         // NB: iconv was given only strlen(psz) characters on input, and so
1482         //     it couldn't convert the trailing zero. Let's do it ourselves
1483         //     if there's some room left for it in the output buffer.
1484         if (res < n)
1485             buf[res] = 0;
1486     }
1487     else
1488     {
1489         // no destination buffer... convert using temp buffer
1490         // to calculate destination buffer requirement
1491         wchar_t tbuf[8];
1492         res = 0;
1493         do {
1494             bufPtr = tbuf;
1495             outbuf = 8*SIZEOF_WCHAR_T;
1496
1497             cres = iconv(m2w,
1498                          ICONV_CHAR_CAST(&pszPtr), &inbuf,
1499                          (char**)&bufPtr, &outbuf );
1500
1501             res += 8-(outbuf/SIZEOF_WCHAR_T);
1502         } while ((cres==(size_t)-1) && (errno==E2BIG));
1503     }
1504
1505     if (ICONV_FAILED(cres, inbuf))
1506     {
1507         //VS: it is ok if iconv fails, hence trace only
1508         wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1509         return (size_t)-1;
1510     }
1511
1512     return res;
1513 }
1514
1515 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1516 {
1517 #if wxUSE_THREADS
1518     // NB: explained in MB2WC
1519     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1520 #endif
1521
1522     size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
1523     size_t outbuf = n;
1524     size_t res, cres;
1525
1526     wchar_t *tmpbuf = 0;
1527
1528     if (ms_wcNeedsSwap)
1529     {
1530         // need to copy to temp buffer to switch endianness
1531         // this absolutely doesn't rock!
1532         // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1533         //  could be in read-only memory, or be accessed in some other thread)
1534         tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
1535         memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
1536         WC_BSWAP(tmpbuf, inbuf)
1537         psz=tmpbuf;
1538     }
1539
1540     if (buf)
1541     {
1542         // have destination buffer, convert there
1543         cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1544
1545         res = n-outbuf;
1546
1547         // NB: iconv was given only wcslen(psz) characters on input, and so
1548         //     it couldn't convert the trailing zero. Let's do it ourselves
1549         //     if there's some room left for it in the output buffer.
1550         if (res < n)
1551             buf[0] = 0;
1552     }
1553     else
1554     {
1555         // no destination buffer... convert using temp buffer
1556         // to calculate destination buffer requirement
1557         char tbuf[16];
1558         res = 0;
1559         do {
1560             buf = tbuf; outbuf = 16;
1561
1562             cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1563
1564             res += 16 - outbuf;
1565         } while ((cres==(size_t)-1) && (errno==E2BIG));
1566     }
1567
1568     if (ms_wcNeedsSwap)
1569     {
1570         free(tmpbuf);
1571     }
1572
1573     if (ICONV_FAILED(cres, inbuf))
1574     {
1575         //VS: it is ok if iconv fails, hence trace only
1576         wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1577         return (size_t)-1;
1578     }
1579
1580     return res;
1581 }
1582
1583 #endif // HAVE_ICONV
1584
1585
1586 // ============================================================================
1587 // Win32 conversion classes
1588 // ============================================================================
1589
1590 #ifdef wxHAVE_WIN32_MB2WC
1591
1592 // from utils.cpp
1593 #if wxUSE_FONTMAP
1594 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1595 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1596 #endif
1597
1598 class wxMBConv_win32 : public wxMBConv
1599 {
1600 public:
1601     wxMBConv_win32()
1602     {
1603         m_CodePage = CP_ACP;
1604     }
1605
1606 #if wxUSE_FONTMAP
1607     wxMBConv_win32(const wxChar* name)
1608     {
1609         m_CodePage = wxCharsetToCodepage(name);
1610     }
1611
1612     wxMBConv_win32(wxFontEncoding encoding)
1613     {
1614         m_CodePage = wxEncodingToCodepage(encoding);
1615     }
1616 #endif
1617
1618     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1619     {
1620         // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1621         // the behaviour is not compatible with the Unix version (using iconv)
1622         // and break the library itself, e.g. wxTextInputStream::NextChar()
1623         // wouldn't work if reading an incomplete MB char didn't result in an
1624         // error
1625         //
1626         // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1627         // an error (tested under Windows Server 2003) and apparently it is
1628         // done on purpose, i.e. the function accepts any input in this case
1629         // and although I'd prefer to return error on ill-formed output, our
1630         // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1631         // explicitly ill-formed according to RFC 2152) neither so we don't
1632         // even have any fallback here...
1633         int flags = m_CodePage == CP_UTF7 ? 0 : MB_ERR_INVALID_CHARS;
1634
1635         const size_t len = ::MultiByteToWideChar
1636                              (
1637                                 m_CodePage,     // code page
1638                                 flags,          // flags: fall on error
1639                                 psz,            // input string
1640                                 -1,             // its length (NUL-terminated)
1641                                 buf,            // output string
1642                                 buf ? n : 0     // size of output buffer
1643                              );
1644
1645         // note that it returns count of written chars for buf != NULL and size
1646         // of the needed buffer for buf == NULL so in either case the length of
1647         // the string (which never includes the terminating NUL) is one less
1648         return len ? len - 1 : (size_t)-1;
1649     }
1650
1651     size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
1652     {
1653         /*
1654             we have a problem here: by default, WideCharToMultiByte() may
1655             replace characters unrepresentable in the target code page with bad
1656             quality approximations such as turning "1/2" symbol (U+00BD) into
1657             "1" for the code pages which don't have it and we, obviously, want
1658             to avoid this at any price
1659
1660             the trouble is that this function does it _silently_, i.e. it won't
1661             even tell us whether it did or not... Win98/2000 and higher provide
1662             WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1663             we have to resort to a round trip, i.e. check that converting back
1664             results in the same string -- this is, of course, expensive but
1665             otherwise we simply can't be sure to not garble the data.
1666          */
1667
1668         // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1669         // it doesn't work with CJK encodings (which we test for rather roughly
1670         // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1671         // supporting it
1672         BOOL usedDef wxDUMMY_INITIALIZE(false);
1673         BOOL *pUsedDef;
1674         int flags;
1675         if ( CanUseNoBestFit() && m_CodePage < 50000 )
1676         {
1677             // it's our lucky day
1678             flags = WC_NO_BEST_FIT_CHARS;
1679             pUsedDef = &usedDef;
1680         }
1681         else // old system or unsupported encoding
1682         {
1683             flags = 0;
1684             pUsedDef = NULL;
1685         }
1686
1687         const size_t len = ::WideCharToMultiByte
1688                              (
1689                                 m_CodePage,     // code page
1690                                 flags,          // either none or no best fit
1691                                 pwz,            // input string
1692                                 -1,             // it is (wide) NUL-terminated
1693                                 buf,            // output buffer
1694                                 buf ? n : 0,    // and its size
1695                                 NULL,           // default "replacement" char
1696                                 pUsedDef        // [out] was it used?
1697                              );
1698
1699         if ( !len )
1700         {
1701             // function totally failed
1702             return (size_t)-1;
1703         }
1704
1705         // if we were really converting, check if we succeeded
1706         if ( buf )
1707         {
1708             if ( flags )
1709             {
1710                 // check if the conversion failed, i.e. if any replacements
1711                 // were done
1712                 if ( usedDef )
1713                     return (size_t)-1;
1714             }
1715             else // we must resort to double tripping...
1716             {
1717                 wxWCharBuffer wcBuf(n);
1718                 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1719                         wcscmp(wcBuf, pwz) != 0 )
1720                 {
1721                     // we didn't obtain the same thing we started from, hence
1722                     // the conversion was lossy and we consider that it failed
1723                     return (size_t)-1;
1724                 }
1725             }
1726         }
1727
1728         // see the comment above for the reason of "len - 1"
1729         return len - 1;
1730     }
1731
1732     bool IsOk() const { return m_CodePage != -1; }
1733
1734 private:
1735     static bool CanUseNoBestFit()
1736     {
1737         static int s_isWin98Or2k = -1;
1738
1739         if ( s_isWin98Or2k == -1 )
1740         {
1741             int verMaj, verMin;
1742             switch ( wxGetOsVersion(&verMaj, &verMin) )
1743             {
1744                 case wxWIN95:
1745                     s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1746                     break;
1747
1748                 case wxWINDOWS_NT:
1749                     s_isWin98Or2k = verMaj >= 5;
1750                     break;
1751
1752                 default:
1753                     // unknown, be conseravtive by default
1754                     s_isWin98Or2k = 0;
1755             }
1756
1757             wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1758         }
1759
1760         return s_isWin98Or2k == 1;
1761     }
1762
1763     long m_CodePage;
1764 };
1765
1766 #endif // wxHAVE_WIN32_MB2WC
1767
1768 // ============================================================================
1769 // Cocoa conversion classes
1770 // ============================================================================
1771
1772 #if defined(__WXCOCOA__)
1773
1774 // RN:  There is no UTF-32 support in either Core Foundation or
1775 // Cocoa.  Strangely enough, internally Core Foundation uses
1776 // UTF 32 internally quite a bit - its just not public (yet).
1777
1778 #include <CoreFoundation/CFString.h>
1779 #include <CoreFoundation/CFStringEncodingExt.h>
1780
1781 CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
1782 {
1783     CFStringEncoding enc = kCFStringEncodingInvalidId ;
1784     if ( encoding == wxFONTENCODING_DEFAULT )
1785     {
1786         enc = CFStringGetSystemEncoding();
1787     }
1788     else switch( encoding)
1789     {
1790         case wxFONTENCODING_ISO8859_1 :
1791             enc = kCFStringEncodingISOLatin1 ;
1792             break ;
1793         case wxFONTENCODING_ISO8859_2 :
1794             enc = kCFStringEncodingISOLatin2;
1795             break ;
1796         case wxFONTENCODING_ISO8859_3 :
1797             enc = kCFStringEncodingISOLatin3 ;
1798             break ;
1799         case wxFONTENCODING_ISO8859_4 :
1800             enc = kCFStringEncodingISOLatin4;
1801             break ;
1802         case wxFONTENCODING_ISO8859_5 :
1803             enc = kCFStringEncodingISOLatinCyrillic;
1804             break ;
1805         case wxFONTENCODING_ISO8859_6 :
1806             enc = kCFStringEncodingISOLatinArabic;
1807             break ;
1808         case wxFONTENCODING_ISO8859_7 :
1809             enc = kCFStringEncodingISOLatinGreek;
1810             break ;
1811         case wxFONTENCODING_ISO8859_8 :
1812             enc = kCFStringEncodingISOLatinHebrew;
1813             break ;
1814         case wxFONTENCODING_ISO8859_9 :
1815             enc = kCFStringEncodingISOLatin5;
1816             break ;
1817         case wxFONTENCODING_ISO8859_10 :
1818             enc = kCFStringEncodingISOLatin6;
1819             break ;
1820         case wxFONTENCODING_ISO8859_11 :
1821             enc = kCFStringEncodingISOLatinThai;
1822             break ;
1823         case wxFONTENCODING_ISO8859_13 :
1824             enc = kCFStringEncodingISOLatin7;
1825             break ;
1826         case wxFONTENCODING_ISO8859_14 :
1827             enc = kCFStringEncodingISOLatin8;
1828             break ;
1829         case wxFONTENCODING_ISO8859_15 :
1830             enc = kCFStringEncodingISOLatin9;
1831             break ;
1832
1833         case wxFONTENCODING_KOI8 :
1834             enc = kCFStringEncodingKOI8_R;
1835             break ;
1836         case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
1837             enc = kCFStringEncodingDOSRussian;
1838             break ;
1839
1840 //      case wxFONTENCODING_BULGARIAN :
1841 //          enc = ;
1842 //          break ;
1843
1844         case wxFONTENCODING_CP437 :
1845             enc =kCFStringEncodingDOSLatinUS ;
1846             break ;
1847         case wxFONTENCODING_CP850 :
1848             enc = kCFStringEncodingDOSLatin1;
1849             break ;
1850         case wxFONTENCODING_CP852 :
1851             enc = kCFStringEncodingDOSLatin2;
1852             break ;
1853         case wxFONTENCODING_CP855 :
1854             enc = kCFStringEncodingDOSCyrillic;
1855             break ;
1856         case wxFONTENCODING_CP866 :
1857             enc =kCFStringEncodingDOSRussian ;
1858             break ;
1859         case wxFONTENCODING_CP874 :
1860             enc = kCFStringEncodingDOSThai;
1861             break ;
1862         case wxFONTENCODING_CP932 :
1863             enc = kCFStringEncodingDOSJapanese;
1864             break ;
1865         case wxFONTENCODING_CP936 :
1866             enc =kCFStringEncodingDOSChineseSimplif ;
1867             break ;
1868         case wxFONTENCODING_CP949 :
1869             enc = kCFStringEncodingDOSKorean;
1870             break ;
1871         case wxFONTENCODING_CP950 :
1872             enc = kCFStringEncodingDOSChineseTrad;
1873             break ;
1874         case wxFONTENCODING_CP1250 :
1875             enc = kCFStringEncodingWindowsLatin2;
1876             break ;
1877         case wxFONTENCODING_CP1251 :
1878             enc =kCFStringEncodingWindowsCyrillic ;
1879             break ;
1880         case wxFONTENCODING_CP1252 :
1881             enc =kCFStringEncodingWindowsLatin1 ;
1882             break ;
1883         case wxFONTENCODING_CP1253 :
1884             enc = kCFStringEncodingWindowsGreek;
1885             break ;
1886         case wxFONTENCODING_CP1254 :
1887             enc = kCFStringEncodingWindowsLatin5;
1888             break ;
1889         case wxFONTENCODING_CP1255 :
1890             enc =kCFStringEncodingWindowsHebrew ;
1891             break ;
1892         case wxFONTENCODING_CP1256 :
1893             enc =kCFStringEncodingWindowsArabic ;
1894             break ;
1895         case wxFONTENCODING_CP1257 :
1896             enc = kCFStringEncodingWindowsBalticRim;
1897             break ;
1898 //   This only really encodes to UTF7 (if that) evidently
1899 //        case wxFONTENCODING_UTF7 :
1900 //            enc = kCFStringEncodingNonLossyASCII ;
1901 //            break ;
1902         case wxFONTENCODING_UTF8 :
1903             enc = kCFStringEncodingUTF8 ;
1904             break ;
1905         case wxFONTENCODING_EUC_JP :
1906             enc = kCFStringEncodingEUC_JP;
1907             break ;
1908         case wxFONTENCODING_UTF16 :
1909             enc = kCFStringEncodingUnicode ;
1910             break ;
1911         case wxFONTENCODING_MACROMAN :
1912             enc = kCFStringEncodingMacRoman ;
1913             break ;
1914         case wxFONTENCODING_MACJAPANESE :
1915             enc = kCFStringEncodingMacJapanese ;
1916             break ;
1917         case wxFONTENCODING_MACCHINESETRAD :
1918             enc = kCFStringEncodingMacChineseTrad ;
1919             break ;
1920         case wxFONTENCODING_MACKOREAN :
1921             enc = kCFStringEncodingMacKorean ;
1922             break ;
1923         case wxFONTENCODING_MACARABIC :
1924             enc = kCFStringEncodingMacArabic ;
1925             break ;
1926         case wxFONTENCODING_MACHEBREW :
1927             enc = kCFStringEncodingMacHebrew ;
1928             break ;
1929         case wxFONTENCODING_MACGREEK :
1930             enc = kCFStringEncodingMacGreek ;
1931             break ;
1932         case wxFONTENCODING_MACCYRILLIC :
1933             enc = kCFStringEncodingMacCyrillic ;
1934             break ;
1935         case wxFONTENCODING_MACDEVANAGARI :
1936             enc = kCFStringEncodingMacDevanagari ;
1937             break ;
1938         case wxFONTENCODING_MACGURMUKHI :
1939             enc = kCFStringEncodingMacGurmukhi ;
1940             break ;
1941         case wxFONTENCODING_MACGUJARATI :
1942             enc = kCFStringEncodingMacGujarati ;
1943             break ;
1944         case wxFONTENCODING_MACORIYA :
1945             enc = kCFStringEncodingMacOriya ;
1946             break ;
1947         case wxFONTENCODING_MACBENGALI :
1948             enc = kCFStringEncodingMacBengali ;
1949             break ;
1950         case wxFONTENCODING_MACTAMIL :
1951             enc = kCFStringEncodingMacTamil ;
1952             break ;
1953         case wxFONTENCODING_MACTELUGU :
1954             enc = kCFStringEncodingMacTelugu ;
1955             break ;
1956         case wxFONTENCODING_MACKANNADA :
1957             enc = kCFStringEncodingMacKannada ;
1958             break ;
1959         case wxFONTENCODING_MACMALAJALAM :
1960             enc = kCFStringEncodingMacMalayalam ;
1961             break ;
1962         case wxFONTENCODING_MACSINHALESE :
1963             enc = kCFStringEncodingMacSinhalese ;
1964             break ;
1965         case wxFONTENCODING_MACBURMESE :
1966             enc = kCFStringEncodingMacBurmese ;
1967             break ;
1968         case wxFONTENCODING_MACKHMER :
1969             enc = kCFStringEncodingMacKhmer ;
1970             break ;
1971         case wxFONTENCODING_MACTHAI :
1972             enc = kCFStringEncodingMacThai ;
1973             break ;
1974         case wxFONTENCODING_MACLAOTIAN :
1975             enc = kCFStringEncodingMacLaotian ;
1976             break ;
1977         case wxFONTENCODING_MACGEORGIAN :
1978             enc = kCFStringEncodingMacGeorgian ;
1979             break ;
1980         case wxFONTENCODING_MACARMENIAN :
1981             enc = kCFStringEncodingMacArmenian ;
1982             break ;
1983         case wxFONTENCODING_MACCHINESESIMP :
1984             enc = kCFStringEncodingMacChineseSimp ;
1985             break ;
1986         case wxFONTENCODING_MACTIBETAN :
1987             enc = kCFStringEncodingMacTibetan ;
1988             break ;
1989         case wxFONTENCODING_MACMONGOLIAN :
1990             enc = kCFStringEncodingMacMongolian ;
1991             break ;
1992         case wxFONTENCODING_MACETHIOPIC :
1993             enc = kCFStringEncodingMacEthiopic ;
1994             break ;
1995         case wxFONTENCODING_MACCENTRALEUR :
1996             enc = kCFStringEncodingMacCentralEurRoman ;
1997             break ;
1998         case wxFONTENCODING_MACVIATNAMESE :
1999             enc = kCFStringEncodingMacVietnamese ;
2000             break ;
2001         case wxFONTENCODING_MACARABICEXT :
2002             enc = kCFStringEncodingMacExtArabic ;
2003             break ;
2004         case wxFONTENCODING_MACSYMBOL :
2005             enc = kCFStringEncodingMacSymbol ;
2006             break ;
2007         case wxFONTENCODING_MACDINGBATS :
2008             enc = kCFStringEncodingMacDingbats ;
2009             break ;
2010         case wxFONTENCODING_MACTURKISH :
2011             enc = kCFStringEncodingMacTurkish ;
2012             break ;
2013         case wxFONTENCODING_MACCROATIAN :
2014             enc = kCFStringEncodingMacCroatian ;
2015             break ;
2016         case wxFONTENCODING_MACICELANDIC :
2017             enc = kCFStringEncodingMacIcelandic ;
2018             break ;
2019         case wxFONTENCODING_MACROMANIAN :
2020             enc = kCFStringEncodingMacRomanian ;
2021             break ;
2022         case wxFONTENCODING_MACCELTIC :
2023             enc = kCFStringEncodingMacCeltic ;
2024             break ;
2025         case wxFONTENCODING_MACGAELIC :
2026             enc = kCFStringEncodingMacGaelic ;
2027             break ;
2028 //      case wxFONTENCODING_MACKEYBOARD :
2029 //          enc = kCFStringEncodingMacKeyboardGlyphs ;
2030 //          break ;
2031         default :
2032             // because gcc is picky
2033             break ;
2034     } ;
2035     return enc ;
2036 }
2037
2038 class wxMBConv_cocoa : public wxMBConv
2039 {
2040 public:
2041     wxMBConv_cocoa()
2042     {
2043         Init(CFStringGetSystemEncoding()) ;
2044     }
2045
2046 #if wxUSE_FONTMAP
2047     wxMBConv_cocoa(const wxChar* name)
2048     {
2049         Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2050     }
2051 #endif
2052
2053     wxMBConv_cocoa(wxFontEncoding encoding)
2054     {
2055         Init( wxCFStringEncFromFontEnc(encoding) );
2056     }
2057
2058     ~wxMBConv_cocoa()
2059     {
2060     }
2061
2062     void Init( CFStringEncoding encoding)
2063     {
2064         m_encoding = encoding ;
2065     }
2066
2067     size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2068     {
2069         wxASSERT(szUnConv);
2070
2071         CFStringRef theString = CFStringCreateWithBytes (
2072                                                 NULL, //the allocator
2073                                                 (const UInt8*)szUnConv,
2074                                                 strlen(szUnConv),
2075                                                 m_encoding,
2076                                                 false //no BOM/external representation
2077                                                 );
2078
2079         wxASSERT(theString);
2080
2081         size_t nOutLength = CFStringGetLength(theString);
2082
2083         if (szOut == NULL)
2084         {
2085             CFRelease(theString);
2086             return nOutLength;
2087         }
2088
2089         CFRange theRange = { 0, nOutSize };
2090
2091 #if SIZEOF_WCHAR_T == 4
2092         UniChar* szUniCharBuffer = new UniChar[nOutSize];
2093 #endif
2094
2095         CFStringGetCharacters(theString, theRange, szUniCharBuffer);
2096
2097         CFRelease(theString);
2098
2099         szUniCharBuffer[nOutLength] = '\0' ;
2100
2101 #if SIZEOF_WCHAR_T == 4
2102         wxMBConvUTF16 converter ;
2103         converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
2104         delete[] szUniCharBuffer;
2105 #endif
2106
2107         return nOutLength;
2108     }
2109
2110     size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2111     {
2112         wxASSERT(szUnConv);
2113
2114         size_t nRealOutSize;
2115         size_t nBufSize = wxWcslen(szUnConv);
2116         UniChar* szUniBuffer = (UniChar*) szUnConv;
2117
2118 #if SIZEOF_WCHAR_T == 4
2119         wxMBConvUTF16 converter ;
2120         nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
2121         szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
2122         converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
2123         nBufSize /= sizeof(UniChar);
2124 #endif
2125
2126         CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2127                                 NULL, //allocator
2128                                 szUniBuffer,
2129                                 nBufSize,
2130                                 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
2131                             );
2132
2133         wxASSERT(theString);
2134
2135         //Note that CER puts a BOM when converting to unicode
2136         //so we  check and use getchars instead in that case
2137         if (m_encoding == kCFStringEncodingUnicode)
2138         {
2139             if (szOut != NULL)
2140                 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
2141
2142             nRealOutSize = CFStringGetLength(theString) + 1;
2143         }
2144         else
2145         {
2146             CFStringGetBytes(
2147                 theString,
2148                 CFRangeMake(0, CFStringGetLength(theString)),
2149                 m_encoding,
2150                 0, //what to put in characters that can't be converted -
2151                     //0 tells CFString to return NULL if it meets such a character
2152                 false, //not an external representation
2153                 (UInt8*) szOut,
2154                 nOutSize,
2155                 (CFIndex*) &nRealOutSize
2156                         );
2157         }
2158
2159         CFRelease(theString);
2160
2161 #if SIZEOF_WCHAR_T == 4
2162         delete[] szUniBuffer;
2163 #endif
2164
2165         return  nRealOutSize - 1;
2166     }
2167
2168     bool IsOk() const
2169     {
2170         return m_encoding != kCFStringEncodingInvalidId &&
2171               CFStringIsEncodingAvailable(m_encoding);
2172     }
2173
2174 private:
2175     CFStringEncoding m_encoding ;
2176 };
2177
2178 #endif // defined(__WXCOCOA__)
2179
2180 // ============================================================================
2181 // Mac conversion classes
2182 // ============================================================================
2183
2184 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2185
2186 class wxMBConv_mac : public wxMBConv
2187 {
2188 public:
2189     wxMBConv_mac()
2190     {
2191         Init(CFStringGetSystemEncoding()) ;
2192     }
2193
2194 #if wxUSE_FONTMAP
2195     wxMBConv_mac(const wxChar* name)
2196     {
2197         Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2198     }
2199 #endif
2200
2201     wxMBConv_mac(wxFontEncoding encoding)
2202     {
2203         Init( wxMacGetSystemEncFromFontEnc(encoding) );
2204     }
2205
2206     ~wxMBConv_mac()
2207     {
2208         OSStatus status = noErr ;
2209         status = TECDisposeConverter(m_MB2WC_converter);
2210         status = TECDisposeConverter(m_WC2MB_converter);
2211     }
2212
2213
2214     void Init( TextEncodingBase encoding)
2215     {
2216         OSStatus status = noErr ;
2217         m_char_encoding = encoding ;
2218         m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2219
2220         status = TECCreateConverter(&m_MB2WC_converter,
2221                                     m_char_encoding,
2222                                     m_unicode_encoding);
2223         status = TECCreateConverter(&m_WC2MB_converter,
2224                                     m_unicode_encoding,
2225                                     m_char_encoding);
2226     }
2227
2228     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2229     {
2230         OSStatus status = noErr ;
2231         ByteCount byteOutLen ;
2232         ByteCount byteInLen = strlen(psz) ;
2233         wchar_t *tbuf = NULL ;
2234         UniChar* ubuf = NULL ;
2235         size_t res = 0 ;
2236
2237         if (buf == NULL)
2238         {
2239             //apple specs say at least 32
2240             n = wxMax( 32 , byteInLen ) ;
2241             tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2242         }
2243         ByteCount byteBufferLen = n * sizeof( UniChar ) ;
2244 #if SIZEOF_WCHAR_T == 4
2245         ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
2246 #else
2247         ubuf = (UniChar*) (buf ? buf : tbuf) ;
2248 #endif
2249         status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2250           (TextPtr) ubuf , byteBufferLen, &byteOutLen);
2251 #if SIZEOF_WCHAR_T == 4
2252         // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2253         // is not properly terminated we get random characters at the end
2254         ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
2255         wxMBConvUTF16 converter ;
2256         res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2257         free( ubuf ) ;
2258 #else
2259         res = byteOutLen / sizeof( UniChar ) ;
2260 #endif
2261         if ( buf == NULL )
2262              free(tbuf) ;
2263
2264         if ( buf  && res < n)
2265             buf[res] = 0;
2266
2267         return res ;
2268     }
2269
2270     size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2271     {
2272         OSStatus status = noErr ;
2273         ByteCount byteOutLen ;
2274         ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2275
2276         char *tbuf = NULL ;
2277
2278         if (buf == NULL)
2279         {
2280             //apple specs say at least 32
2281             n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2282             tbuf = (char*) malloc( n ) ;
2283         }
2284
2285         ByteCount byteBufferLen = n ;
2286         UniChar* ubuf = NULL ;
2287 #if SIZEOF_WCHAR_T == 4
2288         wxMBConvUTF16 converter ;
2289         size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2290         byteInLen = unicharlen ;
2291         ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2292         converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
2293 #else
2294         ubuf = (UniChar*) psz ;
2295 #endif
2296         status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2297             (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
2298 #if SIZEOF_WCHAR_T == 4
2299         free( ubuf ) ;
2300 #endif
2301         if ( buf == NULL )
2302             free(tbuf) ;
2303
2304         size_t res = byteOutLen ;
2305         if ( buf  && res < n)
2306         {
2307             buf[res] = 0;
2308
2309             //we need to double-trip to verify it didn't insert any ? in place
2310             //of bogus characters
2311             wxWCharBuffer wcBuf(n);
2312             size_t pszlen = wxWcslen(psz);
2313             if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
2314                         wxWcslen(wcBuf) != pszlen ||
2315                         memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2316             {
2317                 // we didn't obtain the same thing we started from, hence
2318                 // the conversion was lossy and we consider that it failed
2319                 return (size_t)-1;
2320             }
2321         }
2322
2323         return res ;
2324     }
2325
2326     bool IsOk() const
2327         { return m_MB2WC_converter !=  NULL && m_WC2MB_converter != NULL  ; }
2328
2329 private:
2330     TECObjectRef m_MB2WC_converter ;
2331     TECObjectRef m_WC2MB_converter ;
2332
2333     TextEncodingBase m_char_encoding ;
2334     TextEncodingBase m_unicode_encoding ;
2335 };
2336
2337 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2338
2339 // ============================================================================
2340 // wxEncodingConverter based conversion classes
2341 // ============================================================================
2342
2343 #if wxUSE_FONTMAP
2344
2345 class wxMBConv_wxwin : public wxMBConv
2346 {
2347 private:
2348     void Init()
2349     {
2350         m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2351                w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2352     }
2353
2354 public:
2355     // temporarily just use wxEncodingConverter stuff,
2356     // so that it works while a better implementation is built
2357     wxMBConv_wxwin(const wxChar* name)
2358     {
2359         if (name)
2360             m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2361         else
2362             m_enc = wxFONTENCODING_SYSTEM;
2363
2364         Init();
2365     }
2366
2367     wxMBConv_wxwin(wxFontEncoding enc)
2368     {
2369         m_enc = enc;
2370
2371         Init();
2372     }
2373
2374     size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
2375     {
2376         size_t inbuf = strlen(psz);
2377         if (buf)
2378         {
2379             if (!m2w.Convert(psz,buf))
2380                 return (size_t)-1;
2381         }
2382         return inbuf;
2383     }
2384
2385     size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
2386     {
2387         const size_t inbuf = wxWcslen(psz);
2388         if (buf)
2389         {
2390             if (!w2m.Convert(psz,buf))
2391                 return (size_t)-1;
2392         }
2393
2394         return inbuf;
2395     }
2396
2397     bool IsOk() const { return m_ok; }
2398
2399 public:
2400     wxFontEncoding m_enc;
2401     wxEncodingConverter m2w, w2m;
2402
2403     // were we initialized successfully?
2404     bool m_ok;
2405
2406     DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
2407 };
2408
2409 #endif // wxUSE_FONTMAP
2410
2411 // ============================================================================
2412 // wxCSConv implementation
2413 // ============================================================================
2414
2415 void wxCSConv::Init()
2416 {
2417     m_name = NULL;
2418     m_convReal =  NULL;
2419     m_deferred = true;
2420 }
2421
2422 wxCSConv::wxCSConv(const wxChar *charset)
2423 {
2424     Init();
2425
2426     if ( charset )
2427     {
2428         SetName(charset);
2429     }
2430
2431     m_encoding = wxFONTENCODING_SYSTEM;
2432 }
2433
2434 wxCSConv::wxCSConv(wxFontEncoding encoding)
2435 {
2436     if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
2437     {
2438         wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2439
2440         encoding = wxFONTENCODING_SYSTEM;
2441     }
2442
2443     Init();
2444
2445     m_encoding = encoding;
2446 }
2447
2448 wxCSConv::~wxCSConv()
2449 {
2450     Clear();
2451 }
2452
2453 wxCSConv::wxCSConv(const wxCSConv& conv)
2454         : wxMBConv()
2455 {
2456     Init();
2457
2458     SetName(conv.m_name);
2459     m_encoding = conv.m_encoding;
2460 }
2461
2462 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2463 {
2464     Clear();
2465
2466     SetName(conv.m_name);
2467     m_encoding = conv.m_encoding;
2468
2469     return *this;
2470 }
2471
2472 void wxCSConv::Clear()
2473 {
2474     free(m_name);
2475     delete m_convReal;
2476
2477     m_name = NULL;
2478     m_convReal = NULL;
2479 }
2480
2481 void wxCSConv::SetName(const wxChar *charset)
2482 {
2483     if (charset)
2484     {
2485         m_name = wxStrdup(charset);
2486         m_deferred = true;
2487     }
2488 }
2489
2490 wxMBConv *wxCSConv::DoCreate() const
2491 {
2492     // check for the special case of ASCII or ISO8859-1 charset: as we have
2493     // special knowledge of it anyhow, we don't need to create a special
2494     // conversion object
2495     if ( m_encoding == wxFONTENCODING_ISO8859_1 )
2496     {
2497         // don't convert at all
2498         return NULL;
2499     }
2500
2501     // we trust OS to do conversion better than we can so try external
2502     // conversion methods first
2503     //
2504     // the full order is:
2505     //      1. OS conversion (iconv() under Unix or Win32 API)
2506     //      2. hard coded conversions for UTF
2507     //      3. wxEncodingConverter as fall back
2508
2509     // step (1)
2510 #ifdef HAVE_ICONV
2511 #if !wxUSE_FONTMAP
2512     if ( m_name )
2513 #endif // !wxUSE_FONTMAP
2514     {
2515         wxString name(m_name);
2516
2517 #if wxUSE_FONTMAP
2518         if ( name.empty() )
2519             name = wxFontMapperBase::Get()->GetEncodingName(m_encoding);
2520 #endif // wxUSE_FONTMAP
2521
2522         wxMBConv_iconv *conv = new wxMBConv_iconv(name);
2523         if ( conv->IsOk() )
2524             return conv;
2525
2526         delete conv;
2527     }
2528 #endif // HAVE_ICONV
2529
2530 #ifdef wxHAVE_WIN32_MB2WC
2531     {
2532 #if wxUSE_FONTMAP
2533         wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2534                                       : new wxMBConv_win32(m_encoding);
2535         if ( conv->IsOk() )
2536             return conv;
2537
2538         delete conv;
2539 #else
2540         return NULL;
2541 #endif
2542     }
2543 #endif // wxHAVE_WIN32_MB2WC
2544 #if defined(__WXMAC__)
2545     {
2546         // leave UTF16 and UTF32 to the built-ins of wx
2547         if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
2548             ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
2549         {
2550
2551 #if wxUSE_FONTMAP
2552             wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2553                                         : new wxMBConv_mac(m_encoding);
2554 #else
2555             wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
2556 #endif
2557             if ( conv->IsOk() )
2558                  return conv;
2559
2560             delete conv;
2561         }
2562     }
2563 #endif
2564 #if defined(__WXCOCOA__)
2565     {
2566         if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2567         {
2568
2569 #if wxUSE_FONTMAP
2570             wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2571                                           : new wxMBConv_cocoa(m_encoding);
2572 #else
2573             wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
2574 #endif
2575             if ( conv->IsOk() )
2576                  return conv;
2577
2578             delete conv;
2579         }
2580     }
2581 #endif
2582     // step (2)
2583     wxFontEncoding enc = m_encoding;
2584 #if wxUSE_FONTMAP
2585     if ( enc == wxFONTENCODING_SYSTEM && m_name )
2586     {
2587         // use "false" to suppress interactive dialogs -- we can be called from
2588         // anywhere and popping up a dialog from here is the last thing we want to
2589         // do
2590         enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
2591     }
2592 #endif // wxUSE_FONTMAP
2593
2594     switch ( enc )
2595     {
2596         case wxFONTENCODING_UTF7:
2597              return new wxMBConvUTF7;
2598
2599         case wxFONTENCODING_UTF8:
2600              return new wxMBConvUTF8;
2601
2602         case wxFONTENCODING_UTF16BE:
2603              return new wxMBConvUTF16BE;
2604
2605         case wxFONTENCODING_UTF16LE:
2606              return new wxMBConvUTF16LE;
2607
2608         case wxFONTENCODING_UTF32BE:
2609              return new wxMBConvUTF32BE;
2610
2611         case wxFONTENCODING_UTF32LE:
2612              return new wxMBConvUTF32LE;
2613
2614         default:
2615              // nothing to do but put here to suppress gcc warnings
2616              ;
2617     }
2618
2619     // step (3)
2620 #if wxUSE_FONTMAP
2621     {
2622         wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2623                                       : new wxMBConv_wxwin(m_encoding);
2624         if ( conv->IsOk() )
2625             return conv;
2626
2627         delete conv;
2628     }
2629 #endif // wxUSE_FONTMAP
2630
2631     // NB: This is a hack to prevent deadlock. What could otherwise happen
2632     //     in Unicode build: wxConvLocal creation ends up being here
2633     //     because of some failure and logs the error. But wxLog will try to
2634     //     attach timestamp, for which it will need wxConvLocal (to convert
2635     //     time to char* and then wchar_t*), but that fails, tries to log
2636     //     error, but wxLog has a (already locked) critical section that
2637     //     guards static buffer.
2638     static bool alreadyLoggingError = false;
2639     if (!alreadyLoggingError)
2640     {
2641         alreadyLoggingError = true;
2642         wxLogError(_("Cannot convert from the charset '%s'!"),
2643                    m_name ? m_name
2644                       :
2645 #if wxUSE_FONTMAP
2646                          wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
2647 #else // !wxUSE_FONTMAP
2648                          wxString::Format(_("encoding %s"), m_encoding).c_str()
2649 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2650               );
2651         alreadyLoggingError = false;
2652     }
2653
2654     return NULL;
2655 }
2656
2657 void wxCSConv::CreateConvIfNeeded() const
2658 {
2659     if ( m_deferred )
2660     {
2661         wxCSConv *self = (wxCSConv *)this; // const_cast
2662
2663 #if wxUSE_INTL
2664         // if we don't have neither the name nor the encoding, use the default
2665         // encoding for this system
2666         if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2667         {
2668             self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
2669         }
2670 #endif // wxUSE_INTL
2671
2672         self->m_convReal = DoCreate();
2673         self->m_deferred = false;
2674     }
2675 }
2676
2677 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2678 {
2679     CreateConvIfNeeded();
2680
2681     if (m_convReal)
2682         return m_convReal->MB2WC(buf, psz, n);
2683
2684     // latin-1 (direct)
2685     size_t len = strlen(psz);
2686
2687     if (buf)
2688     {
2689         for (size_t c = 0; c <= len; c++)
2690             buf[c] = (unsigned char)(psz[c]);
2691     }
2692
2693     return len;
2694 }
2695
2696 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2697 {
2698     CreateConvIfNeeded();
2699
2700     if (m_convReal)
2701         return m_convReal->WC2MB(buf, psz, n);
2702
2703     // latin-1 (direct)
2704     const size_t len = wxWcslen(psz);
2705     if (buf)
2706     {
2707         for (size_t c = 0; c <= len; c++)
2708         {
2709             if (psz[c] > 0xFF)
2710                 return (size_t)-1;
2711             buf[c] = (char)psz[c];
2712         }
2713     }
2714     else
2715     {
2716         for (size_t c = 0; c <= len; c++)
2717         {
2718             if (psz[c] > 0xFF)
2719                 return (size_t)-1;
2720         }
2721     }
2722
2723     return len;
2724 }
2725
2726 // ----------------------------------------------------------------------------
2727 // globals
2728 // ----------------------------------------------------------------------------
2729
2730 #ifdef __WINDOWS__
2731     static wxMBConv_win32 wxConvLibcObj;
2732 #elif defined(__WXMAC__) && !defined(__MACH__)
2733     static wxMBConv_mac wxConvLibcObj ;
2734 #else
2735     static wxMBConvLibc wxConvLibcObj;
2736 #endif
2737
2738 static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
2739 static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
2740 static wxMBConvUTF7 wxConvUTF7Obj;
2741 static wxMBConvUTF8 wxConvUTF8Obj;
2742
2743 WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
2744 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
2745 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
2746 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
2747 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
2748 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
2749 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
2750 #ifdef __WXOSX__
2751                                     wxConvUTF8Obj;
2752 #else
2753                                     wxConvLibcObj;
2754 #endif
2755
2756
2757 #else // !wxUSE_WCHAR_T
2758
2759 // stand-ins in absence of wchar_t
2760 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
2761                                 wxConvISO8859_1,
2762                                 wxConvLocal,
2763                                 wxConvUTF8;
2764
2765 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
2766
2767