src/common/strconv.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        strconv.cpp
   3 // Purpose:     Unicode conversion classes
   4 // Author:      Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
   5 //              Ryan Norton, Fredrik Roubert (UTF7)
   6 // Modified by:
   7 // Created:     29/01/98
   8 // RCS-ID:      $Id$
   9 // Copyright:   (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
  10 //              (c) 2000-2003 Vadim Zeitlin
  11 //              (c) 2004 Ryan Norton, Fredrik Roubert
  12 // Licence:     wxWindows licence
  13 /////////////////////////////////////////////////////////////////////////////
  14
  15 // ============================================================================
  16 // declarations
  17 // ============================================================================
  18
  19 // ----------------------------------------------------------------------------
  20 // headers
  21 // ----------------------------------------------------------------------------
  22
  23 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
  24   #pragma implementation "strconv.h"
  25 #endif
  26
  27 // For compilers that support precompilation, includes "wx.h".
  28 #include "wx/wxprec.h"
  29
  30 #ifdef __BORLANDC__
  31   #pragma hdrstop
  32 #endif
  33
  34 #ifndef WX_PRECOMP
  35     #include "wx/intl.h"
  36     #include "wx/log.h"
  37 #endif // WX_PRECOMP
  38
  39 #include "wx/strconv.h"
  40
  41 #if wxUSE_WCHAR_T
  42
  43 #ifdef __WXMSW__
  44     #include "wx/msw/private.h"
  45 #endif
  46
  47 #ifdef __WINDOWS__
  48     #include "wx/msw/missing.h"
  49 #endif
  50
  51 #ifndef __WXWINCE__
  52 #include <errno.h>
  53 #endif
  54
  55 #include <ctype.h>
  56 #include <string.h>
  57 #include <stdlib.h>
  58 #ifdef HAVE_LANGINFO_H
  59   #include <langinfo.h>
  60 #endif
  61
  62 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
  63     #define wxHAVE_WIN32_MB2WC
  64 #endif // __WIN32__ but !__WXMICROWIN__
  65
  66 // ----------------------------------------------------------------------------
  67 // headers
  68 // ----------------------------------------------------------------------------
  69
  70 #ifdef __SALFORDC__
  71     #include <clib.h>
  72 #endif
  73
  74 #ifdef HAVE_ICONV
  75     #include <iconv.h>
  76     #include "wx/thread.h"
  77 #endif
  78
  79 #include "wx/encconv.h"
  80 #include "wx/fontmap.h"
  81 #include "wx/utils.h"
  82
  83 #ifdef __WXMAC__
  84 #include <ATSUnicode.h>
  85 #include <TextCommon.h>
  86 #include <TextEncodingConverter.h>
  87
  88 #include  "wx/mac/private.h"  // includes mac headers
  89 #endif
  90 // ----------------------------------------------------------------------------
  91 // macros
  92 // ----------------------------------------------------------------------------
  93
  94 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
  95 #define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
  96
  97 #if SIZEOF_WCHAR_T == 4
  98     #define WC_NAME         "UCS4"
  99     #define WC_BSWAP         BSWAP_UCS4
 100     #ifdef WORDS_BIGENDIAN
 101       #define WC_NAME_BEST  "UCS-4BE"
 102     #else
 103       #define WC_NAME_BEST  "UCS-4LE"
 104     #endif
 105 #elif SIZEOF_WCHAR_T == 2
 106     #define WC_NAME         "UTF16"
 107     #define WC_BSWAP         BSWAP_UTF16
 108     #define WC_UTF16
 109     #ifdef WORDS_BIGENDIAN
 110       #define WC_NAME_BEST  "UTF-16BE"
 111     #else
 112       #define WC_NAME_BEST  "UTF-16LE"
 113     #endif
 114 #else // sizeof(wchar_t) != 2 nor 4
 115     // does this ever happen?
 116     #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
 117 #endif
 118
 119 // ============================================================================
 120 // implementation
 121 // ============================================================================
 122
 123 // ----------------------------------------------------------------------------
 124 // UTF-16 en/decoding to/from UCS-4
 125 // ----------------------------------------------------------------------------
 126
 127
 128 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
 129 {
 130     if (input<=0xffff)
 131     {
 132         if (output)
 133             *output = (wxUint16) input;
 134         return 1;
 135     }
 136     else if (input>=0x110000)
 137     {
 138         return (size_t)-1;
 139     }
 140     else
 141     {
 142         if (output)
 143         {
 144             *output++ = (wxUint16) ((input >> 10)+0xd7c0);
 145             *output = (wxUint16) ((input&0x3ff)+0xdc00);
 146         }
 147         return 2;
 148     }
 149 }
 150
 151 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
 152 {
 153     if ((*input<0xd800) || (*input>0xdfff))
 154     {
 155         output = *input;
 156         return 1;
 157     }
 158     else if ((input[1]<0xdc00) || (input[1]>=0xdfff))
 159     {
 160         output = *input;
 161         return (size_t)-1;
 162     }
 163     else
 164     {
 165         output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
 166         return 2;
 167     }
 168 }
 169
 170
 171 // ----------------------------------------------------------------------------
 172 // wxMBConv
 173 // ----------------------------------------------------------------------------
 174
 175 wxMBConv::~wxMBConv()
 176 {
 177     // nothing to do here (necessary for Darwin linking probably)
 178 }
 179
 180 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
 181 {
 182     if ( psz )
 183     {
 184         // calculate the length of the buffer needed first
 185         size_t nLen = MB2WC(NULL, psz, 0);
 186         if ( nLen != (size_t)-1 )
 187         {
 188             // now do the actual conversion
 189             wxWCharBuffer buf(nLen);
 190             nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
 191             if ( nLen != (size_t)-1 )
 192             {
 193                 return buf;
 194             }
 195         }
 196     }
 197
 198     wxWCharBuffer buf((wchar_t *)NULL);
 199
 200     return buf;
 201 }
 202
 203 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
 204 {
 205     if ( pwz )
 206     {
 207         size_t nLen = WC2MB(NULL, pwz, 0);
 208         if ( nLen != (size_t)-1 )
 209         {
 210             wxCharBuffer buf(nLen+3);       // space for a wxUint32 trailing zero
 211             nLen = WC2MB(buf.data(), pwz, nLen + 4);
 212             if ( nLen != (size_t)-1 )
 213             {
 214                 return buf;
 215             }
 216         }
 217     }
 218
 219     wxCharBuffer buf((char *)NULL);
 220
 221     return buf;
 222 }
 223
 224 const wxWCharBuffer wxMBConv::cMB2WC(const char *szString, size_t nStringLen, size_t* pOutSize) const
 225 {
 226     wxASSERT(pOutSize != NULL);
 227
 228     const char* szEnd = szString + nStringLen + 1;
 229     const char* szPos = szString;
 230     const char* szStart = szPos;
 231
 232     size_t nActualLength = 0;
 233     size_t nCurrentSize = nStringLen; //try normal size first (should never resize?)
 234
 235     wxWCharBuffer theBuffer(nCurrentSize);
 236
 237     //Convert the string until the length() is reached, continuing the
 238     //loop every time a null character is reached
 239     while(szPos != szEnd)
 240     {
 241         wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
 242
 243         //Get the length of the current (sub)string
 244         size_t nLen = MB2WC(NULL, szPos, 0);
 245
 246         //Invalid conversion?
 247         if( nLen == (size_t)-1 )
 248         {
 249             *pOutSize = 0;
 250             theBuffer.data()[0u] = wxT('\0');
 251             return theBuffer;
 252         }
 253
 254
 255         //Increase the actual length (+1 for current null character)
 256         nActualLength += nLen + 1;
 257
 258         //if buffer too big, realloc the buffer
 259         if (nActualLength > (nCurrentSize+1))
 260         {
 261             wxWCharBuffer theNewBuffer(nCurrentSize << 1);
 262             memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize * sizeof(wchar_t));
 263             theBuffer = theNewBuffer;
 264             nCurrentSize <<= 1;
 265         }
 266
 267         //Convert the current (sub)string
 268         if ( MB2WC(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
 269         {
 270             *pOutSize = 0;
 271             theBuffer.data()[0u] = wxT('\0');
 272             return theBuffer;
 273         }
 274
 275         //Increment to next (sub)string
 276         //Note that we have to use strlen here instead of nLen
 277         //here because XX2XX gives us the size of the output buffer,
 278         //not neccessarly the length of the string
 279         szPos += strlen(szPos) + 1;
 280     }
 281
 282     //success - return actual length and the buffer
 283     *pOutSize = nActualLength;
 284     return theBuffer;
 285 }
 286
 287 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *szString, size_t nStringLen, size_t* pOutSize) const
 288 {
 289     wxASSERT(pOutSize != NULL);
 290
 291     const wchar_t* szEnd = szString + nStringLen + 1;
 292     const wchar_t* szPos = szString;
 293     const wchar_t* szStart = szPos;
 294
 295     size_t nActualLength = 0;
 296     size_t nCurrentSize = nStringLen << 2; //try * 4 first
 297
 298     wxCharBuffer theBuffer(nCurrentSize);
 299
 300     //Convert the string until the length() is reached, continuing the
 301     //loop every time a null character is reached
 302     while(szPos != szEnd)
 303     {
 304         wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
 305
 306         //Get the length of the current (sub)string
 307         size_t nLen = WC2MB(NULL, szPos, 0);
 308
 309         //Invalid conversion?
 310         if( nLen == (size_t)-1 )
 311         {
 312             *pOutSize = 0;
 313             theBuffer.data()[0u] = wxT('\0');
 314             return theBuffer;
 315         }
 316
 317         //Increase the actual length (+1 for current null character)
 318         nActualLength += nLen + 1;
 319
 320         //if buffer too big, realloc the buffer
 321         if (nActualLength > (nCurrentSize+1))
 322         {
 323             wxCharBuffer theNewBuffer(nCurrentSize << 1);
 324             memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize);
 325             theBuffer = theNewBuffer;
 326             nCurrentSize <<= 1;
 327         }
 328
 329         //Convert the current (sub)string
 330         if(WC2MB(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
 331         {
 332             *pOutSize = 0;
 333             theBuffer.data()[0u] = wxT('\0');
 334             return theBuffer;
 335         }
 336
 337         //Increment to next (sub)string
 338         //Note that we have to use wxWcslen here instead of nLen
 339         //here because XX2XX gives us the size of the output buffer,
 340         //not neccessarly the length of the string
 341         szPos += wxWcslen(szPos) + 1;
 342     }
 343
 344     //success - return actual length and the buffer
 345     *pOutSize = nActualLength;
 346     return theBuffer;
 347 }
 348
 349 // ----------------------------------------------------------------------------
 350 // wxMBConvLibc
 351 // ----------------------------------------------------------------------------
 352
 353 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 354 {
 355     return wxMB2WC(buf, psz, n);
 356 }
 357
 358 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 359 {
 360     return wxWC2MB(buf, psz, n);
 361 }
 362
 363 // ----------------------------------------------------------------------------
 364 // wxConvBrokenFileNames is made for GTK2 in Unicode mode when
 365 // files are accidentally written in an encoding which is not
 366 // the system encoding. Typically, the system encoding will be
 367 // UTF8 but there might be files stored in ISO8859-1 on disk.
 368 // ----------------------------------------------------------------------------
 369
 370 class wxConvBrokenFileNames: public wxMBConvLibc
 371 {
 372 public:
 373     wxConvBrokenFileNames() : m_utf8conv(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL) { }
 374     virtual size_t MB2WC(wchar_t *outputBuf, const char *psz, size_t outputSize) const;
 375     virtual size_t WC2MB(char *outputBuf, const wchar_t *psz, size_t outputSize) const;
 376     inline bool UseUTF8() const;
 377 private:
 378     wxMBConvUTF8 m_utf8conv;
 379 };
 380
 381 bool wxConvBrokenFileNames::UseUTF8() const
 382 {
 383 #if defined HAVE_LANGINFO_H && defined CODESET
 384     char *codeset = nl_langinfo(CODESET);
 385     return strcmp(codeset, "UTF-8") == 0;
 386 #else
 387     return false;
 388 #endif
 389 }
 390
 391 size_t wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf, const char *psz, size_t outputSize) const
 392 {
 393     if (UseUTF8())
 394         return m_utf8conv.MB2WC( outputBuf, psz, outputSize );
 395     else
 396         return wxMBConvLibc::MB2WC( outputBuf, psz, outputSize );
 397 }
 398
 399 size_t wxConvBrokenFileNames::WC2MB(char *outputBuf, const wchar_t *psz, size_t outputSize) const
 400 {
 401     if (UseUTF8())
 402         return m_utf8conv.WC2MB( outputBuf, psz, outputSize );
 403     else
 404         return wxMBConvLibc::WC2MB( outputBuf, psz, outputSize );
 405 }
 406
 407 // ----------------------------------------------------------------------------
 408 // UTF-7
 409 // ----------------------------------------------------------------------------
 410
 411 // Implementation (C) 2004 Fredrik Roubert
 412
 413 //
 414 // BASE64 decoding table
 415 //
 416 static const unsigned char utf7unb64[] =
 417 {
 418     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 419     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 420     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 421     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 422     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 423     0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
 424     0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
 425     0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 426     0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
 427     0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
 428     0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
 429     0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
 430     0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
 431     0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
 432     0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
 433     0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
 434     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 435     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 436     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 437     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 438     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 439     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 440     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 441     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 442     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 443     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 444     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 445     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 446     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 447     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 448     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 449     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 450 };
 451
 452 size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 453 {
 454     size_t len = 0;
 455
 456     while (*psz && ((!buf) || (len < n)))
 457     {
 458         unsigned char cc = *psz++;
 459         if (cc != '+')
 460         {
 461             // plain ASCII char
 462             if (buf)
 463                 *buf++ = cc;
 464             len++;
 465         }
 466         else if (*psz == '-')
 467         {
 468             // encoded plus sign
 469             if (buf)
 470                 *buf++ = cc;
 471             len++;
 472             psz++;
 473         }
 474         else
 475         {
 476             // BASE64 encoded string
 477             bool lsb;
 478             unsigned char c;
 479             unsigned int d, l;
 480             for (lsb = false, d = 0, l = 0;
 481                 (cc = utf7unb64[(unsigned char)*psz]) != 0xff; psz++)
 482             {
 483                 d <<= 6;
 484                 d += cc;
 485                 for (l += 6; l >= 8; lsb = !lsb)
 486                 {
 487                     c = (unsigned char)((d >> (l -= 8)) % 256);
 488                     if (lsb)
 489                     {
 490                         if (buf)
 491                             *buf++ |= c;
 492                         len ++;
 493                     }
 494                     else
 495                         if (buf)
 496                             *buf = (wchar_t)(c << 8);
 497                 }
 498             }
 499             if (*psz == '-')
 500                 psz++;
 501         }
 502     }
 503     if (buf && (len < n))
 504         *buf = 0;
 505     return len;
 506 }
 507
 508 //
 509 // BASE64 encoding table
 510 //
 511 static const unsigned char utf7enb64[] =
 512 {
 513     'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
 514     'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
 515     'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
 516     'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
 517     'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
 518     'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
 519     'w', 'x', 'y', 'z', '0', '1', '2', '3',
 520     '4', '5', '6', '7', '8', '9', '+', '/'
 521 };
 522
 523 //
 524 // UTF-7 encoding table
 525 //
 526 // 0 - Set D (directly encoded characters)
 527 // 1 - Set O (optional direct characters)
 528 // 2 - whitespace characters (optional)
 529 // 3 - special characters
 530 //
 531 static const unsigned char utf7encode[128] =
 532 {
 533     3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
 534     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 535     2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
 536     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
 537     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 538     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
 539     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 540     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
 541 };
 542
 543 size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 544 {
 545
 546
 547     size_t len = 0;
 548
 549     while (*psz && ((!buf) || (len < n)))
 550     {
 551         wchar_t cc = *psz++;
 552         if (cc < 0x80 && utf7encode[cc] < 1)
 553         {
 554             // plain ASCII char
 555             if (buf)
 556                 *buf++ = (char)cc;
 557             len++;
 558         }
 559 #ifndef WC_UTF16
 560         else if (((wxUint32)cc) > 0xffff)
 561             {
 562             // no surrogate pair generation (yet?)
 563             return (size_t)-1;
 564         }
 565 #endif
 566         else
 567         {
 568             if (buf)
 569                 *buf++ = '+';
 570             len++;
 571             if (cc != '+')
 572             {
 573                 // BASE64 encode string
 574                 unsigned int lsb, d, l;
 575                 for (d = 0, l = 0;; psz++)
 576                 {
 577                     for (lsb = 0; lsb < 2; lsb ++)
 578                     {
 579                         d <<= 8;
 580                         d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
 581
 582                         for (l += 8; l >= 6; )
 583                         {
 584                             l -= 6;
 585                             if (buf)
 586                                 *buf++ = utf7enb64[(d >> l) % 64];
 587                             len++;
 588                         }
 589                     }
 590                     cc = *psz;
 591                     if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
 592                         break;
 593                 }
 594                 if (l != 0)
 595                 {
 596                     if (buf)
 597                         *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
 598                     len++;
 599                 }
 600             }
 601             if (buf)
 602                 *buf++ = '-';
 603             len++;
 604         }
 605     }
 606     if (buf && (len < n))
 607         *buf = 0;
 608     return len;
 609 }
 610
 611 // ----------------------------------------------------------------------------
 612 // UTF-8
 613 // ----------------------------------------------------------------------------
 614
 615 static wxUint32 utf8_max[]=
 616     { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
 617
 618 const wxUint32 wxUnicodePUA = 0x100000;
 619 const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
 620
 621 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 622 {
 623     size_t len = 0;
 624
 625     while (*psz && ((!buf) || (len < n)))
 626     {
 627         const char *opsz = psz;
 628         bool invalid = false;
 629         unsigned char cc = *psz++, fc = cc;
 630         unsigned cnt;
 631         for (cnt = 0; fc & 0x80; cnt++)
 632             fc <<= 1;
 633         if (!cnt)
 634         {
 635             // plain ASCII char
 636             if (buf)
 637                 *buf++ = cc;
 638             len++;
 639         }
 640         else
 641         {
 642             cnt--;
 643             if (!cnt)
 644             {
 645                 // invalid UTF-8 sequence
 646                 invalid = true;
 647             }
 648             else
 649             {
 650                 unsigned ocnt = cnt - 1;
 651                 wxUint32 res = cc & (0x3f >> cnt);
 652                 while (cnt--)
 653                 {
 654                     cc = *psz;
 655                     if ((cc & 0xC0) != 0x80)
 656                     {
 657                         // invalid UTF-8 sequence
 658                         invalid = true;
 659                         break;
 660                     }
 661                     psz++;
 662                     res = (res << 6) | (cc & 0x3f);
 663                 }
 664                 if (invalid || res <= utf8_max[ocnt])
 665                 {
 666                     // illegal UTF-8 encoding
 667                     invalid = true;
 668                 }
 669                 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
 670                         res >= wxUnicodePUA && res < wxUnicodePUAEnd)
 671                 {
 672                     // if one of our PUA characters turns up externally
 673                     // it must also be treated as an illegal sequence
 674                     // (a bit like you have to escape an escape character)
 675                     invalid = true;
 676                 }
 677                 else
 678                 {
 679 #ifdef WC_UTF16
 680                     // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 681                     size_t pa = encode_utf16(res, (wxUint16 *)buf);
 682                     if (pa == (size_t)-1)
 683                     {
 684                         invalid = true;
 685                     }
 686                     else
 687                     {
 688                         if (buf)
 689                             buf += pa;
 690                         len += pa;
 691                     }
 692 #else // !WC_UTF16
 693                     if (buf)
 694                         *buf++ = res;
 695                     len++;
 696 #endif // WC_UTF16/!WC_UTF16
 697                 }
 698             }
 699             if (invalid)
 700             {
 701                 if (m_options & MAP_INVALID_UTF8_TO_PUA)
 702                 {
 703                     while (opsz < psz && (!buf || len < n))
 704                     {
 705 #ifdef WC_UTF16
 706                         // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 707                         size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
 708                         wxASSERT(pa != (size_t)-1);
 709                         if (buf)
 710                             buf += pa;
 711                         opsz++;
 712                         len += pa;
 713 #else
 714                         if (buf)
 715                             *buf++ = wxUnicodePUA + (unsigned char)*opsz;
 716                         opsz++;
 717                         len++;
 718 #endif
 719                     }
 720                 }
 721                 else
 722                 if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
 723                 {
 724                     while (opsz < psz && (!buf || len < n))
 725                     {
 726                         wchar_t str[6];
 727                         wxSnprintf( str, 5, L"\\%o", (int) (unsigned char) *opsz );
 728                         if (buf)
 729                             *buf++ = str[0];
 730                         if (buf)
 731                             *buf++ = str[1];
 732                         if (buf)
 733                             *buf++ = str[2];
 734                         if (buf)
 735                             *buf++ = str[3];
 736                         opsz++;
 737                         len += 4;
 738                     }
 739                 }
 740                 else
 741                 {
 742                     return (size_t)-1;
 743                 }
 744             }
 745         }
 746     }
 747     if (buf && (len < n))
 748         *buf = 0;
 749     return len;
 750 }
 751
 752 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 753 {
 754     size_t len = 0;
 755
 756     while (*psz && ((!buf) || (len < n)))
 757     {
 758         wxUint32 cc;
 759 #ifdef WC_UTF16
 760         // cast is ok for WC_UTF16
 761         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
 762         psz += (pa == (size_t)-1) ? 1 : pa;
 763 #else
 764         cc=(*psz++) & 0x7fffffff;
 765 #endif
 766         if ((m_options & MAP_INVALID_UTF8_TO_PUA)
 767             && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd)
 768         {
 769             if (buf)
 770                 *buf++ = (char)(cc - wxUnicodePUA);
 771             len++;
 772         }
 773         else
 774         if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
 775             && cc == L'\\')
 776         {
 777             wchar_t str[4];
 778             str[0] = *psz; psz++;
 779             str[1] = *psz; psz++;
 780             str[2] = *psz; psz++;
 781             str[3] = 0;
 782             int octal;
 783             wxSscanf( str, L"%o", &octal );
 784             if (buf)
 785                 *buf++ = (char) octal;
 786             len++;
 787         }
 788         else
 789         {
 790             unsigned cnt;
 791             for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
 792             if (!cnt)
 793             {
 794                 // plain ASCII char
 795                 if (buf)
 796                     *buf++ = (char) cc;
 797                 len++;
 798             }
 799
 800             else
 801             {
 802                 len += cnt + 1;
 803                 if (buf)
 804                 {
 805                     *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
 806                     while (cnt--)
 807                         *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
 808                 }
 809             }
 810         }
 811     }
 812
 813     if (buf && (len<n)) *buf = 0;
 814
 815     return len;
 816 }
 817
 818 // ----------------------------------------------------------------------------
 819 // UTF-16
 820 // ----------------------------------------------------------------------------
 821
 822 #ifdef WORDS_BIGENDIAN
 823     #define wxMBConvUTF16straight wxMBConvUTF16BE
 824     #define wxMBConvUTF16swap     wxMBConvUTF16LE
 825 #else
 826     #define wxMBConvUTF16swap     wxMBConvUTF16BE
 827     #define wxMBConvUTF16straight wxMBConvUTF16LE
 828 #endif
 829
 830
 831 #ifdef WC_UTF16
 832
 833 // copy 16bit MB to 16bit String
 834 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 835 {
 836     size_t len=0;
 837
 838     while (*(wxUint16*)psz && (!buf || len < n))
 839     {
 840         if (buf)
 841             *buf++ = *(wxUint16*)psz;
 842         len++;
 843
 844         psz += sizeof(wxUint16);
 845     }
 846     if (buf && len<n)   *buf=0;
 847
 848     return len;
 849 }
 850
 851
 852 // copy 16bit String to 16bit MB
 853 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 854 {
 855     size_t len=0;
 856
 857     while (*psz && (!buf || len < n))
 858     {
 859         if (buf)
 860         {
 861             *(wxUint16*)buf = *psz;
 862             buf += sizeof(wxUint16);
 863         }
 864         len += sizeof(wxUint16);
 865         psz++;
 866     }
 867     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 868
 869     return len;
 870 }
 871
 872
 873 // swap 16bit MB to 16bit String
 874 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 875 {
 876     size_t len=0;
 877
 878     while (*(wxUint16*)psz && (!buf || len < n))
 879     {
 880         if (buf)
 881         {
 882             ((char *)buf)[0] = psz[1];
 883             ((char *)buf)[1] = psz[0];
 884             buf++;
 885         }
 886         len++;
 887         psz += sizeof(wxUint16);
 888     }
 889     if (buf && len<n)   *buf=0;
 890
 891     return len;
 892 }
 893
 894
 895 // swap 16bit MB to 16bit String
 896 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 897 {
 898     size_t len=0;
 899
 900     while (*psz && (!buf || len < n))
 901     {
 902         if (buf)
 903         {
 904             *buf++ = ((char*)psz)[1];
 905             *buf++ = ((char*)psz)[0];
 906         }
 907         len += sizeof(wxUint16);
 908         psz++;
 909     }
 910     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 911
 912     return len;
 913 }
 914
 915
 916 #else // WC_UTF16
 917
 918
 919 // copy 16bit MB to 32bit String
 920 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 921 {
 922     size_t len=0;
 923
 924     while (*(wxUint16*)psz && (!buf || len < n))
 925     {
 926         wxUint32 cc;
 927         size_t pa=decode_utf16((wxUint16*)psz, cc);
 928         if (pa == (size_t)-1)
 929             return pa;
 930
 931         if (buf)
 932             *buf++ = cc;
 933         len++;
 934         psz += pa * sizeof(wxUint16);
 935     }
 936     if (buf && len<n)   *buf=0;
 937
 938     return len;
 939 }
 940
 941
 942 // copy 32bit String to 16bit MB
 943 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 944 {
 945     size_t len=0;
 946
 947     while (*psz && (!buf || len < n))
 948     {
 949         wxUint16 cc[2];
 950         size_t pa=encode_utf16(*psz, cc);
 951
 952         if (pa == (size_t)-1)
 953             return pa;
 954
 955         if (buf)
 956         {
 957             *(wxUint16*)buf = cc[0];
 958             buf += sizeof(wxUint16);
 959             if (pa > 1)
 960             {
 961                 *(wxUint16*)buf = cc[1];
 962                 buf += sizeof(wxUint16);
 963             }
 964         }
 965
 966         len += pa*sizeof(wxUint16);
 967         psz++;
 968     }
 969     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 970
 971     return len;
 972 }
 973
 974
 975 // swap 16bit MB to 32bit String
 976 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 977 {
 978     size_t len=0;
 979
 980     while (*(wxUint16*)psz && (!buf || len < n))
 981     {
 982         wxUint32 cc;
 983         char tmp[4];
 984         tmp[0]=psz[1];  tmp[1]=psz[0];
 985         tmp[2]=psz[3];  tmp[3]=psz[2];
 986
 987         size_t pa=decode_utf16((wxUint16*)tmp, cc);
 988         if (pa == (size_t)-1)
 989             return pa;
 990
 991         if (buf)
 992             *buf++ = cc;
 993
 994         len++;
 995         psz += pa * sizeof(wxUint16);
 996     }
 997     if (buf && len<n)   *buf=0;
 998
 999     return len;
1000 }
1001
1002
1003 // swap 32bit String to 16bit MB
1004 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1005 {
1006     size_t len=0;
1007
1008     while (*psz && (!buf || len < n))
1009     {
1010         wxUint16 cc[2];
1011         size_t pa=encode_utf16(*psz, cc);
1012
1013         if (pa == (size_t)-1)
1014             return pa;
1015
1016         if (buf)
1017         {
1018             *buf++ = ((char*)cc)[1];
1019             *buf++ = ((char*)cc)[0];
1020             if (pa > 1)
1021             {
1022                 *buf++ = ((char*)cc)[3];
1023                 *buf++ = ((char*)cc)[2];
1024             }
1025         }
1026
1027         len += pa*sizeof(wxUint16);
1028         psz++;
1029     }
1030     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
1031
1032     return len;
1033 }
1034
1035 #endif // WC_UTF16
1036
1037
1038 // ----------------------------------------------------------------------------
1039 // UTF-32
1040 // ----------------------------------------------------------------------------
1041
1042 #ifdef WORDS_BIGENDIAN
1043 #define wxMBConvUTF32straight  wxMBConvUTF32BE
1044 #define wxMBConvUTF32swap      wxMBConvUTF32LE
1045 #else
1046 #define wxMBConvUTF32swap      wxMBConvUTF32BE
1047 #define wxMBConvUTF32straight  wxMBConvUTF32LE
1048 #endif
1049
1050
1051 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1052 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1053
1054
1055 #ifdef WC_UTF16
1056
1057 // copy 32bit MB to 16bit String
1058 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1059 {
1060     size_t len=0;
1061
1062     while (*(wxUint32*)psz && (!buf || len < n))
1063     {
1064         wxUint16 cc[2];
1065
1066         size_t pa=encode_utf16(*(wxUint32*)psz, cc);
1067         if (pa == (size_t)-1)
1068             return pa;
1069
1070         if (buf)
1071         {
1072             *buf++ = cc[0];
1073             if (pa > 1)
1074                 *buf++ = cc[1];
1075         }
1076         len += pa;
1077         psz += sizeof(wxUint32);
1078     }
1079     if (buf && len<n)   *buf=0;
1080
1081     return len;
1082 }
1083
1084
1085 // copy 16bit String to 32bit MB
1086 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1087 {
1088     size_t len=0;
1089
1090     while (*psz && (!buf || len < n))
1091     {
1092         wxUint32 cc;
1093
1094         // cast is ok for WC_UTF16
1095         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
1096         if (pa == (size_t)-1)
1097             return pa;
1098
1099         if (buf)
1100         {
1101             *(wxUint32*)buf = cc;
1102             buf += sizeof(wxUint32);
1103         }
1104         len += sizeof(wxUint32);
1105         psz += pa;
1106     }
1107
1108     if (buf && len<=n-sizeof(wxUint32))
1109         *(wxUint32*)buf=0;
1110
1111     return len;
1112 }
1113
1114
1115
1116 // swap 32bit MB to 16bit String
1117 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1118 {
1119     size_t len=0;
1120
1121     while (*(wxUint32*)psz && (!buf || len < n))
1122     {
1123         char tmp[4];
1124         tmp[0] = psz[3];   tmp[1] = psz[2];
1125         tmp[2] = psz[1];   tmp[3] = psz[0];
1126
1127
1128         wxUint16 cc[2];
1129
1130         size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
1131         if (pa == (size_t)-1)
1132             return pa;
1133
1134         if (buf)
1135         {
1136             *buf++ = cc[0];
1137             if (pa > 1)
1138                 *buf++ = cc[1];
1139         }
1140         len += pa;
1141         psz += sizeof(wxUint32);
1142     }
1143
1144     if (buf && len<n)
1145         *buf=0;
1146
1147     return len;
1148 }
1149
1150
1151 // swap 16bit String to 32bit MB
1152 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1153 {
1154     size_t len=0;
1155
1156     while (*psz && (!buf || len < n))
1157     {
1158         char cc[4];
1159
1160         // cast is ok for WC_UTF16
1161         size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
1162         if (pa == (size_t)-1)
1163             return pa;
1164
1165         if (buf)
1166         {
1167             *buf++ = cc[3];
1168             *buf++ = cc[2];
1169             *buf++ = cc[1];
1170             *buf++ = cc[0];
1171         }
1172         len += sizeof(wxUint32);
1173         psz += pa;
1174     }
1175
1176     if (buf && len<=n-sizeof(wxUint32))
1177         *(wxUint32*)buf=0;
1178
1179     return len;
1180 }
1181
1182 #else // WC_UTF16
1183
1184
1185 // copy 32bit MB to 32bit String
1186 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1187 {
1188     size_t len=0;
1189
1190     while (*(wxUint32*)psz && (!buf || len < n))
1191     {
1192         if (buf)
1193             *buf++ = *(wxUint32*)psz;
1194         len++;
1195         psz += sizeof(wxUint32);
1196     }
1197
1198     if (buf && len<n)
1199         *buf=0;
1200
1201     return len;
1202 }
1203
1204
1205 // copy 32bit String to 32bit MB
1206 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1207 {
1208     size_t len=0;
1209
1210     while (*psz && (!buf || len < n))
1211     {
1212         if (buf)
1213         {
1214             *(wxUint32*)buf = *psz;
1215             buf += sizeof(wxUint32);
1216         }
1217
1218         len += sizeof(wxUint32);
1219         psz++;
1220     }
1221
1222     if (buf && len<=n-sizeof(wxUint32))
1223         *(wxUint32*)buf=0;
1224
1225     return len;
1226 }
1227
1228
1229 // swap 32bit MB to 32bit String
1230 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1231 {
1232     size_t len=0;
1233
1234     while (*(wxUint32*)psz && (!buf || len < n))
1235     {
1236         if (buf)
1237         {
1238             ((char *)buf)[0] = psz[3];
1239             ((char *)buf)[1] = psz[2];
1240             ((char *)buf)[2] = psz[1];
1241             ((char *)buf)[3] = psz[0];
1242             buf++;
1243         }
1244         len++;
1245         psz += sizeof(wxUint32);
1246     }
1247
1248     if (buf && len<n)
1249         *buf=0;
1250
1251     return len;
1252 }
1253
1254
1255 // swap 32bit String to 32bit MB
1256 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1257 {
1258     size_t len=0;
1259
1260     while (*psz && (!buf || len < n))
1261     {
1262         if (buf)
1263         {
1264             *buf++ = ((char *)psz)[3];
1265             *buf++ = ((char *)psz)[2];
1266             *buf++ = ((char *)psz)[1];
1267             *buf++ = ((char *)psz)[0];
1268         }
1269         len += sizeof(wxUint32);
1270         psz++;
1271     }
1272
1273     if (buf && len<=n-sizeof(wxUint32))
1274         *(wxUint32*)buf=0;
1275
1276     return len;
1277 }
1278
1279
1280 #endif // WC_UTF16
1281
1282
1283 // ============================================================================
1284 // The classes doing conversion using the iconv_xxx() functions
1285 // ============================================================================
1286
1287 #ifdef HAVE_ICONV
1288
1289 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1290 //     E2BIG if output buffer is _exactly_ as big as needed. Such case is
1291 //     (unless there's yet another bug in glibc) the only case when iconv()
1292 //     returns with (size_t)-1 (which means error) and says there are 0 bytes
1293 //     left in the input buffer -- when _real_ error occurs,
1294 //     bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1295 //     iconv() failure.
1296 //     [This bug does not appear in glibc 2.2.]
1297 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1298 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1299                                      (errno != E2BIG || bufLeft != 0))
1300 #else
1301 #define ICONV_FAILED(cres, bufLeft)  (cres == (size_t)-1)
1302 #endif
1303
1304 #define ICONV_CHAR_CAST(x)  ((ICONV_CONST char **)(x))
1305
1306 // ----------------------------------------------------------------------------
1307 // wxMBConv_iconv: encapsulates an iconv character set
1308 // ----------------------------------------------------------------------------
1309
1310 class wxMBConv_iconv : public wxMBConv
1311 {
1312 public:
1313     wxMBConv_iconv(const wxChar *name);
1314     virtual ~wxMBConv_iconv();
1315
1316     virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1317     virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
1318
1319     bool IsOk() const
1320         { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
1321
1322 protected:
1323     // the iconv handlers used to translate from multibyte to wide char and in
1324     // the other direction
1325     iconv_t m2w,
1326             w2m;
1327 #if wxUSE_THREADS
1328     // guards access to m2w and w2m objects
1329     wxMutex m_iconvMutex;
1330 #endif
1331
1332 private:
1333     // the name (for iconv_open()) of a wide char charset -- if none is
1334     // available on this machine, it will remain NULL
1335     static const char *ms_wcCharsetName;
1336
1337     // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1338     // different endian-ness than the native one
1339     static bool ms_wcNeedsSwap;
1340 };
1341
1342 const char *wxMBConv_iconv::ms_wcCharsetName = NULL;
1343 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
1344
1345 wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
1346 {
1347     // Do it the hard way
1348     char cname[100];
1349     for (size_t i = 0; i < wxStrlen(name)+1; i++)
1350         cname[i] = (char) name[i];
1351
1352     // check for charset that represents wchar_t:
1353     if (ms_wcCharsetName == NULL)
1354     {
1355         ms_wcNeedsSwap = false;
1356
1357         // try charset with explicit bytesex info (e.g. "UCS-4LE"):
1358         ms_wcCharsetName = WC_NAME_BEST;
1359         m2w = iconv_open(ms_wcCharsetName, cname);
1360
1361         if (m2w == (iconv_t)-1)
1362         {
1363             // try charset w/o bytesex info (e.g. "UCS4")
1364             // and check for bytesex ourselves:
1365             ms_wcCharsetName = WC_NAME;
1366             m2w = iconv_open(ms_wcCharsetName, cname);
1367
1368             // last bet, try if it knows WCHAR_T pseudo-charset
1369             if (m2w == (iconv_t)-1)
1370             {
1371                 ms_wcCharsetName = "WCHAR_T";
1372                 m2w = iconv_open(ms_wcCharsetName, cname);
1373             }
1374
1375             if (m2w != (iconv_t)-1)
1376             {
1377                 char    buf[2], *bufPtr;
1378                 wchar_t wbuf[2], *wbufPtr;
1379                 size_t  insz, outsz;
1380                 size_t  res;
1381
1382                 buf[0] = 'A';
1383                 buf[1] = 0;
1384                 wbuf[0] = 0;
1385                 insz = 2;
1386                 outsz = SIZEOF_WCHAR_T * 2;
1387                 wbufPtr = wbuf;
1388                 bufPtr = buf;
1389
1390                 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1391                             (char**)&wbufPtr, &outsz);
1392
1393                 if (ICONV_FAILED(res, insz))
1394                 {
1395                     ms_wcCharsetName = NULL;
1396                     wxLogLastError(wxT("iconv"));
1397                     wxLogError(_("Conversion to charset '%s' doesn't work."), name);
1398                 }
1399                 else
1400                 {
1401                     ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1402                 }
1403             }
1404             else
1405             {
1406                 ms_wcCharsetName = NULL;
1407
1408                 // VS: we must not output an error here, since wxWidgets will safely
1409                 //     fall back to using wxEncodingConverter.
1410                 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
1411                 //wxLogError(
1412             }
1413         }
1414         wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
1415     }
1416     else // we already have ms_wcCharsetName
1417     {
1418         m2w = iconv_open(ms_wcCharsetName, cname);
1419     }
1420
1421     // NB: don't ever pass NULL to iconv_open(), it may crash!
1422     if ( ms_wcCharsetName )
1423     {
1424         w2m = iconv_open( cname, ms_wcCharsetName);
1425     }
1426     else
1427     {
1428         w2m = (iconv_t)-1;
1429     }
1430 }
1431
1432 wxMBConv_iconv::~wxMBConv_iconv()
1433 {
1434     if ( m2w != (iconv_t)-1 )
1435         iconv_close(m2w);
1436     if ( w2m != (iconv_t)-1 )
1437         iconv_close(w2m);
1438 }
1439
1440 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1441 {
1442 #if wxUSE_THREADS
1443     // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1444     //     Unfortunately there is a couple of global wxCSConv objects such as
1445     //     wxConvLocal that are used all over wx code, so we have to make sure
1446     //     the handle is used by at most one thread at the time. Otherwise
1447     //     only a few wx classes would be safe to use from non-main threads
1448     //     as MB<->WC conversion would fail "randomly".
1449     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1450 #endif
1451
1452     size_t inbuf = strlen(psz);
1453     size_t outbuf = n * SIZEOF_WCHAR_T;
1454     size_t res, cres;
1455     // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1456     wchar_t *bufPtr = buf;
1457     const char *pszPtr = psz;
1458
1459     if (buf)
1460     {
1461         // have destination buffer, convert there
1462         cres = iconv(m2w,
1463                      ICONV_CHAR_CAST(&pszPtr), &inbuf,
1464                      (char**)&bufPtr, &outbuf);
1465         res = n - (outbuf / SIZEOF_WCHAR_T);
1466
1467         if (ms_wcNeedsSwap)
1468         {
1469             // convert to native endianness
1470             WC_BSWAP(buf /* _not_ bufPtr */, res)
1471         }
1472
1473         // NB: iconv was given only strlen(psz) characters on input, and so
1474         //     it couldn't convert the trailing zero. Let's do it ourselves
1475         //     if there's some room left for it in the output buffer.
1476         if (res < n)
1477             buf[res] = 0;
1478     }
1479     else
1480     {
1481         // no destination buffer... convert using temp buffer
1482         // to calculate destination buffer requirement
1483         wchar_t tbuf[8];
1484         res = 0;
1485         do {
1486             bufPtr = tbuf;
1487             outbuf = 8*SIZEOF_WCHAR_T;
1488
1489             cres = iconv(m2w,
1490                          ICONV_CHAR_CAST(&pszPtr), &inbuf,
1491                          (char**)&bufPtr, &outbuf );
1492
1493             res += 8-(outbuf/SIZEOF_WCHAR_T);
1494         } while ((cres==(size_t)-1) && (errno==E2BIG));
1495     }
1496
1497     if (ICONV_FAILED(cres, inbuf))
1498     {
1499         //VS: it is ok if iconv fails, hence trace only
1500         wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1501         return (size_t)-1;
1502     }
1503
1504     return res;
1505 }
1506
1507 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1508 {
1509 #if wxUSE_THREADS
1510     // NB: explained in MB2WC
1511     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1512 #endif
1513
1514     size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
1515     size_t outbuf = n;
1516     size_t res, cres;
1517
1518     wchar_t *tmpbuf = 0;
1519
1520     if (ms_wcNeedsSwap)
1521     {
1522         // need to copy to temp buffer to switch endianness
1523         // this absolutely doesn't rock!
1524         // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1525         //  could be in read-only memory, or be accessed in some other thread)
1526         tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
1527         memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
1528         WC_BSWAP(tmpbuf, inbuf)
1529         psz=tmpbuf;
1530     }
1531
1532     if (buf)
1533     {
1534         // have destination buffer, convert there
1535         cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1536
1537         res = n-outbuf;
1538
1539         // NB: iconv was given only wcslen(psz) characters on input, and so
1540         //     it couldn't convert the trailing zero. Let's do it ourselves
1541         //     if there's some room left for it in the output buffer.
1542         if (res < n)
1543             buf[0] = 0;
1544     }
1545     else
1546     {
1547         // no destination buffer... convert using temp buffer
1548         // to calculate destination buffer requirement
1549         char tbuf[16];
1550         res = 0;
1551         do {
1552             buf = tbuf; outbuf = 16;
1553
1554             cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1555
1556             res += 16 - outbuf;
1557         } while ((cres==(size_t)-1) && (errno==E2BIG));
1558     }
1559
1560     if (ms_wcNeedsSwap)
1561     {
1562         free(tmpbuf);
1563     }
1564
1565     if (ICONV_FAILED(cres, inbuf))
1566     {
1567         //VS: it is ok if iconv fails, hence trace only
1568         wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1569         return (size_t)-1;
1570     }
1571
1572     return res;
1573 }
1574
1575 #endif // HAVE_ICONV
1576
1577
1578 // ============================================================================
1579 // Win32 conversion classes
1580 // ============================================================================
1581
1582 #ifdef wxHAVE_WIN32_MB2WC
1583
1584 // from utils.cpp
1585 #if wxUSE_FONTMAP
1586 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1587 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1588 #endif
1589
1590 class wxMBConv_win32 : public wxMBConv
1591 {
1592 public:
1593     wxMBConv_win32()
1594     {
1595         m_CodePage = CP_ACP;
1596     }
1597
1598 #if wxUSE_FONTMAP
1599     wxMBConv_win32(const wxChar* name)
1600     {
1601         m_CodePage = wxCharsetToCodepage(name);
1602     }
1603
1604     wxMBConv_win32(wxFontEncoding encoding)
1605     {
1606         m_CodePage = wxEncodingToCodepage(encoding);
1607     }
1608 #endif
1609
1610     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1611     {
1612         // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1613         // the behaviour is not compatible with the Unix version (using iconv)
1614         // and break the library itself, e.g. wxTextInputStream::NextChar()
1615         // wouldn't work if reading an incomplete MB char didn't result in an
1616         // error
1617         //
1618         // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1619         // an error (tested under Windows Server 2003) and apparently it is
1620         // done on purpose, i.e. the function accepts any input in this case
1621         // and although I'd prefer to return error on ill-formed output, our
1622         // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1623         // explicitly ill-formed according to RFC 2152) neither so we don't
1624         // even have any fallback here...
1625         int flags = m_CodePage == CP_UTF7 ? 0 : MB_ERR_INVALID_CHARS;
1626
1627         const size_t len = ::MultiByteToWideChar
1628                              (
1629                                 m_CodePage,     // code page
1630                                 flags,          // flags: fall on error
1631                                 psz,            // input string
1632                                 -1,             // its length (NUL-terminated)
1633                                 buf,            // output string
1634                                 buf ? n : 0     // size of output buffer
1635                              );
1636
1637         // note that it returns count of written chars for buf != NULL and size
1638         // of the needed buffer for buf == NULL so in either case the length of
1639         // the string (which never includes the terminating NUL) is one less
1640         return len ? len - 1 : (size_t)-1;
1641     }
1642
1643     size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
1644     {
1645         /*
1646             we have a problem here: by default, WideCharToMultiByte() may
1647             replace characters unrepresentable in the target code page with bad
1648             quality approximations such as turning "1/2" symbol (U+00BD) into
1649             "1" for the code pages which don't have it and we, obviously, want
1650             to avoid this at any price
1651
1652             the trouble is that this function does it _silently_, i.e. it won't
1653             even tell us whether it did or not... Win98/2000 and higher provide
1654             WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1655             we have to resort to a round trip, i.e. check that converting back
1656             results in the same string -- this is, of course, expensive but
1657             otherwise we simply can't be sure to not garble the data.
1658          */
1659
1660         // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1661         // it doesn't work with CJK encodings (which we test for rather roughly
1662         // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1663         // supporting it
1664         BOOL usedDef wxDUMMY_INITIALIZE(false);
1665         BOOL *pUsedDef;
1666         int flags;
1667         if ( CanUseNoBestFit() && m_CodePage < 50000 )
1668         {
1669             // it's our lucky day
1670             flags = WC_NO_BEST_FIT_CHARS;
1671             pUsedDef = &usedDef;
1672         }
1673         else // old system or unsupported encoding
1674         {
1675             flags = 0;
1676             pUsedDef = NULL;
1677         }
1678
1679         const size_t len = ::WideCharToMultiByte
1680                              (
1681                                 m_CodePage,     // code page
1682                                 flags,          // either none or no best fit
1683                                 pwz,            // input string
1684                                 -1,             // it is (wide) NUL-terminated
1685                                 buf,            // output buffer
1686                                 buf ? n : 0,    // and its size
1687                                 NULL,           // default "replacement" char
1688                                 pUsedDef        // [out] was it used?
1689                              );
1690
1691         if ( !len )
1692         {
1693             // function totally failed
1694             return (size_t)-1;
1695         }
1696
1697         // if we were really converting, check if we succeeded
1698         if ( buf )
1699         {
1700             if ( flags )
1701             {
1702                 // check if the conversion failed, i.e. if any replacements
1703                 // were done
1704                 if ( usedDef )
1705                     return (size_t)-1;
1706             }
1707             else // we must resort to double tripping...
1708             {
1709                 wxWCharBuffer wcBuf(n);
1710                 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1711                         wcscmp(wcBuf, pwz) != 0 )
1712                 {
1713                     // we didn't obtain the same thing we started from, hence
1714                     // the conversion was lossy and we consider that it failed
1715                     return (size_t)-1;
1716                 }
1717             }
1718         }
1719
1720         // see the comment above for the reason of "len - 1"
1721         return len - 1;
1722     }
1723
1724     bool IsOk() const { return m_CodePage != -1; }
1725
1726 private:
1727     static bool CanUseNoBestFit()
1728     {
1729         static int s_isWin98Or2k = -1;
1730
1731         if ( s_isWin98Or2k == -1 )
1732         {
1733             int verMaj, verMin;
1734             switch ( wxGetOsVersion(&verMaj, &verMin) )
1735             {
1736                 case wxWIN95:
1737                     s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1738                     break;
1739
1740                 case wxWINDOWS_NT:
1741                     s_isWin98Or2k = verMaj >= 5;
1742                     break;
1743
1744                 default:
1745                     // unknown, be conseravtive by default
1746                     s_isWin98Or2k = 0;
1747             }
1748
1749             wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1750         }
1751
1752         return s_isWin98Or2k == 1;
1753     }
1754
1755     long m_CodePage;
1756 };
1757
1758 #endif // wxHAVE_WIN32_MB2WC
1759
1760 // ============================================================================
1761 // Cocoa conversion classes
1762 // ============================================================================
1763
1764 #if defined(__WXCOCOA__)
1765
1766 // RN:  There is no UTF-32 support in either Core Foundation or
1767 // Cocoa.  Strangely enough, internally Core Foundation uses
1768 // UTF 32 internally quite a bit - its just not public (yet).
1769
1770 #include <CoreFoundation/CFString.h>
1771 #include <CoreFoundation/CFStringEncodingExt.h>
1772
1773 CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
1774 {
1775     CFStringEncoding enc = kCFStringEncodingInvalidId ;
1776     if ( encoding == wxFONTENCODING_DEFAULT )
1777     {
1778         enc = CFStringGetSystemEncoding();
1779     }
1780     else switch( encoding)
1781     {
1782         case wxFONTENCODING_ISO8859_1 :
1783             enc = kCFStringEncodingISOLatin1 ;
1784             break ;
1785         case wxFONTENCODING_ISO8859_2 :
1786             enc = kCFStringEncodingISOLatin2;
1787             break ;
1788         case wxFONTENCODING_ISO8859_3 :
1789             enc = kCFStringEncodingISOLatin3 ;
1790             break ;
1791         case wxFONTENCODING_ISO8859_4 :
1792             enc = kCFStringEncodingISOLatin4;
1793             break ;
1794         case wxFONTENCODING_ISO8859_5 :
1795             enc = kCFStringEncodingISOLatinCyrillic;
1796             break ;
1797         case wxFONTENCODING_ISO8859_6 :
1798             enc = kCFStringEncodingISOLatinArabic;
1799             break ;
1800         case wxFONTENCODING_ISO8859_7 :
1801             enc = kCFStringEncodingISOLatinGreek;
1802             break ;
1803         case wxFONTENCODING_ISO8859_8 :
1804             enc = kCFStringEncodingISOLatinHebrew;
1805             break ;
1806         case wxFONTENCODING_ISO8859_9 :
1807             enc = kCFStringEncodingISOLatin5;
1808             break ;
1809         case wxFONTENCODING_ISO8859_10 :
1810             enc = kCFStringEncodingISOLatin6;
1811             break ;
1812         case wxFONTENCODING_ISO8859_11 :
1813             enc = kCFStringEncodingISOLatinThai;
1814             break ;
1815         case wxFONTENCODING_ISO8859_13 :
1816             enc = kCFStringEncodingISOLatin7;
1817             break ;
1818         case wxFONTENCODING_ISO8859_14 :
1819             enc = kCFStringEncodingISOLatin8;
1820             break ;
1821         case wxFONTENCODING_ISO8859_15 :
1822             enc = kCFStringEncodingISOLatin9;
1823             break ;
1824
1825         case wxFONTENCODING_KOI8 :
1826             enc = kCFStringEncodingKOI8_R;
1827             break ;
1828         case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
1829             enc = kCFStringEncodingDOSRussian;
1830             break ;
1831
1832 //      case wxFONTENCODING_BULGARIAN :
1833 //          enc = ;
1834 //          break ;
1835
1836         case wxFONTENCODING_CP437 :
1837             enc =kCFStringEncodingDOSLatinUS ;
1838             break ;
1839         case wxFONTENCODING_CP850 :
1840             enc = kCFStringEncodingDOSLatin1;
1841             break ;
1842         case wxFONTENCODING_CP852 :
1843             enc = kCFStringEncodingDOSLatin2;
1844             break ;
1845         case wxFONTENCODING_CP855 :
1846             enc = kCFStringEncodingDOSCyrillic;
1847             break ;
1848         case wxFONTENCODING_CP866 :
1849             enc =kCFStringEncodingDOSRussian ;
1850             break ;
1851         case wxFONTENCODING_CP874 :
1852             enc = kCFStringEncodingDOSThai;
1853             break ;
1854         case wxFONTENCODING_CP932 :
1855             enc = kCFStringEncodingDOSJapanese;
1856             break ;
1857         case wxFONTENCODING_CP936 :
1858             enc =kCFStringEncodingDOSChineseSimplif ;
1859             break ;
1860         case wxFONTENCODING_CP949 :
1861             enc = kCFStringEncodingDOSKorean;
1862             break ;
1863         case wxFONTENCODING_CP950 :
1864             enc = kCFStringEncodingDOSChineseTrad;
1865             break ;
1866         case wxFONTENCODING_CP1250 :
1867             enc = kCFStringEncodingWindowsLatin2;
1868             break ;
1869         case wxFONTENCODING_CP1251 :
1870             enc =kCFStringEncodingWindowsCyrillic ;
1871             break ;
1872         case wxFONTENCODING_CP1252 :
1873             enc =kCFStringEncodingWindowsLatin1 ;
1874             break ;
1875         case wxFONTENCODING_CP1253 :
1876             enc = kCFStringEncodingWindowsGreek;
1877             break ;
1878         case wxFONTENCODING_CP1254 :
1879             enc = kCFStringEncodingWindowsLatin5;
1880             break ;
1881         case wxFONTENCODING_CP1255 :
1882             enc =kCFStringEncodingWindowsHebrew ;
1883             break ;
1884         case wxFONTENCODING_CP1256 :
1885             enc =kCFStringEncodingWindowsArabic ;
1886             break ;
1887         case wxFONTENCODING_CP1257 :
1888             enc = kCFStringEncodingWindowsBalticRim;
1889             break ;
1890 //   This only really encodes to UTF7 (if that) evidently
1891 //        case wxFONTENCODING_UTF7 :
1892 //            enc = kCFStringEncodingNonLossyASCII ;
1893 //            break ;
1894         case wxFONTENCODING_UTF8 :
1895             enc = kCFStringEncodingUTF8 ;
1896             break ;
1897         case wxFONTENCODING_EUC_JP :
1898             enc = kCFStringEncodingEUC_JP;
1899             break ;
1900         case wxFONTENCODING_UTF16 :
1901             enc = kCFStringEncodingUnicode ;
1902             break ;
1903         case wxFONTENCODING_MACROMAN :
1904             enc = kCFStringEncodingMacRoman ;
1905             break ;
1906         case wxFONTENCODING_MACJAPANESE :
1907             enc = kCFStringEncodingMacJapanese ;
1908             break ;
1909         case wxFONTENCODING_MACCHINESETRAD :
1910             enc = kCFStringEncodingMacChineseTrad ;
1911             break ;
1912         case wxFONTENCODING_MACKOREAN :
1913             enc = kCFStringEncodingMacKorean ;
1914             break ;
1915         case wxFONTENCODING_MACARABIC :
1916             enc = kCFStringEncodingMacArabic ;
1917             break ;
1918         case wxFONTENCODING_MACHEBREW :
1919             enc = kCFStringEncodingMacHebrew ;
1920             break ;
1921         case wxFONTENCODING_MACGREEK :
1922             enc = kCFStringEncodingMacGreek ;
1923             break ;
1924         case wxFONTENCODING_MACCYRILLIC :
1925             enc = kCFStringEncodingMacCyrillic ;
1926             break ;
1927         case wxFONTENCODING_MACDEVANAGARI :
1928             enc = kCFStringEncodingMacDevanagari ;
1929             break ;
1930         case wxFONTENCODING_MACGURMUKHI :
1931             enc = kCFStringEncodingMacGurmukhi ;
1932             break ;
1933         case wxFONTENCODING_MACGUJARATI :
1934             enc = kCFStringEncodingMacGujarati ;
1935             break ;
1936         case wxFONTENCODING_MACORIYA :
1937             enc = kCFStringEncodingMacOriya ;
1938             break ;
1939         case wxFONTENCODING_MACBENGALI :
1940             enc = kCFStringEncodingMacBengali ;
1941             break ;
1942         case wxFONTENCODING_MACTAMIL :
1943             enc = kCFStringEncodingMacTamil ;
1944             break ;
1945         case wxFONTENCODING_MACTELUGU :
1946             enc = kCFStringEncodingMacTelugu ;
1947             break ;
1948         case wxFONTENCODING_MACKANNADA :
1949             enc = kCFStringEncodingMacKannada ;
1950             break ;
1951         case wxFONTENCODING_MACMALAJALAM :
1952             enc = kCFStringEncodingMacMalayalam ;
1953             break ;
1954         case wxFONTENCODING_MACSINHALESE :
1955             enc = kCFStringEncodingMacSinhalese ;
1956             break ;
1957         case wxFONTENCODING_MACBURMESE :
1958             enc = kCFStringEncodingMacBurmese ;
1959             break ;
1960         case wxFONTENCODING_MACKHMER :
1961             enc = kCFStringEncodingMacKhmer ;
1962             break ;
1963         case wxFONTENCODING_MACTHAI :
1964             enc = kCFStringEncodingMacThai ;
1965             break ;
1966         case wxFONTENCODING_MACLAOTIAN :
1967             enc = kCFStringEncodingMacLaotian ;
1968             break ;
1969         case wxFONTENCODING_MACGEORGIAN :
1970             enc = kCFStringEncodingMacGeorgian ;
1971             break ;
1972         case wxFONTENCODING_MACARMENIAN :
1973             enc = kCFStringEncodingMacArmenian ;
1974             break ;
1975         case wxFONTENCODING_MACCHINESESIMP :
1976             enc = kCFStringEncodingMacChineseSimp ;
1977             break ;
1978         case wxFONTENCODING_MACTIBETAN :
1979             enc = kCFStringEncodingMacTibetan ;
1980             break ;
1981         case wxFONTENCODING_MACMONGOLIAN :
1982             enc = kCFStringEncodingMacMongolian ;
1983             break ;
1984         case wxFONTENCODING_MACETHIOPIC :
1985             enc = kCFStringEncodingMacEthiopic ;
1986             break ;
1987         case wxFONTENCODING_MACCENTRALEUR :
1988             enc = kCFStringEncodingMacCentralEurRoman ;
1989             break ;
1990         case wxFONTENCODING_MACVIATNAMESE :
1991             enc = kCFStringEncodingMacVietnamese ;
1992             break ;
1993         case wxFONTENCODING_MACARABICEXT :
1994             enc = kCFStringEncodingMacExtArabic ;
1995             break ;
1996         case wxFONTENCODING_MACSYMBOL :
1997             enc = kCFStringEncodingMacSymbol ;
1998             break ;
1999         case wxFONTENCODING_MACDINGBATS :
2000             enc = kCFStringEncodingMacDingbats ;
2001             break ;
2002         case wxFONTENCODING_MACTURKISH :
2003             enc = kCFStringEncodingMacTurkish ;
2004             break ;
2005         case wxFONTENCODING_MACCROATIAN :
2006             enc = kCFStringEncodingMacCroatian ;
2007             break ;
2008         case wxFONTENCODING_MACICELANDIC :
2009             enc = kCFStringEncodingMacIcelandic ;
2010             break ;
2011         case wxFONTENCODING_MACROMANIAN :
2012             enc = kCFStringEncodingMacRomanian ;
2013             break ;
2014         case wxFONTENCODING_MACCELTIC :
2015             enc = kCFStringEncodingMacCeltic ;
2016             break ;
2017         case wxFONTENCODING_MACGAELIC :
2018             enc = kCFStringEncodingMacGaelic ;
2019             break ;
2020 //      case wxFONTENCODING_MACKEYBOARD :
2021 //          enc = kCFStringEncodingMacKeyboardGlyphs ;
2022 //          break ;
2023         default :
2024             // because gcc is picky
2025             break ;
2026     } ;
2027     return enc ;
2028 }
2029
2030 class wxMBConv_cocoa : public wxMBConv
2031 {
2032 public:
2033     wxMBConv_cocoa()
2034     {
2035         Init(CFStringGetSystemEncoding()) ;
2036     }
2037
2038 #if wxUSE_FONTMAP
2039     wxMBConv_cocoa(const wxChar* name)
2040     {
2041         Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2042     }
2043 #endif
2044
2045     wxMBConv_cocoa(wxFontEncoding encoding)
2046     {
2047         Init( wxCFStringEncFromFontEnc(encoding) );
2048     }
2049
2050     ~wxMBConv_cocoa()
2051     {
2052     }
2053
2054     void Init( CFStringEncoding encoding)
2055     {
2056         m_encoding = encoding ;
2057     }
2058
2059     size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2060     {
2061         wxASSERT(szUnConv);
2062
2063         CFStringRef theString = CFStringCreateWithBytes (
2064                                                 NULL, //the allocator
2065                                                 (const UInt8*)szUnConv,
2066                                                 strlen(szUnConv),
2067                                                 m_encoding,
2068                                                 false //no BOM/external representation
2069                                                 );
2070
2071         wxASSERT(theString);
2072
2073         size_t nOutLength = CFStringGetLength(theString);
2074
2075         if (szOut == NULL)
2076         {
2077             CFRelease(theString);
2078             return nOutLength;
2079         }
2080
2081         CFRange theRange = { 0, nOutSize };
2082
2083 #if SIZEOF_WCHAR_T == 4
2084         UniChar* szUniCharBuffer = new UniChar[nOutSize];
2085 #endif
2086
2087         CFStringGetCharacters(theString, theRange, szUniCharBuffer);
2088
2089         CFRelease(theString);
2090
2091         szUniCharBuffer[nOutLength] = '\0' ;
2092
2093 #if SIZEOF_WCHAR_T == 4
2094         wxMBConvUTF16 converter ;
2095         converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
2096         delete[] szUniCharBuffer;
2097 #endif
2098
2099         return nOutLength;
2100     }
2101
2102     size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2103     {
2104         wxASSERT(szUnConv);
2105
2106         size_t nRealOutSize;
2107         size_t nBufSize = wxWcslen(szUnConv);
2108         UniChar* szUniBuffer = (UniChar*) szUnConv;
2109
2110 #if SIZEOF_WCHAR_T == 4
2111         wxMBConvUTF16BE converter ;
2112         nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
2113         szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
2114         converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
2115         nBufSize /= sizeof(UniChar);
2116 #endif
2117
2118         CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2119                                 NULL, //allocator
2120                                 szUniBuffer,
2121                                 nBufSize,
2122                                 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
2123                             );
2124
2125         wxASSERT(theString);
2126
2127         //Note that CER puts a BOM when converting to unicode
2128         //so we  check and use getchars instead in that case
2129         if (m_encoding == kCFStringEncodingUnicode)
2130         {
2131             if (szOut != NULL)
2132                 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
2133
2134             nRealOutSize = CFStringGetLength(theString) + 1;
2135         }
2136         else
2137         {
2138             CFStringGetBytes(
2139                 theString,
2140                 CFRangeMake(0, CFStringGetLength(theString)),
2141                 m_encoding,
2142                 0, //what to put in characters that can't be converted -
2143                     //0 tells CFString to return NULL if it meets such a character
2144                 false, //not an external representation
2145                 (UInt8*) szOut,
2146                 nOutSize,
2147                 (CFIndex*) &nRealOutSize
2148                         );
2149         }
2150
2151         CFRelease(theString);
2152
2153 #if SIZEOF_WCHAR_T == 4
2154         delete[] szUniBuffer;
2155 #endif
2156
2157         return  nRealOutSize - 1;
2158     }
2159
2160     bool IsOk() const
2161     {
2162         return m_encoding != kCFStringEncodingInvalidId &&
2163               CFStringIsEncodingAvailable(m_encoding);
2164     }
2165
2166 private:
2167     CFStringEncoding m_encoding ;
2168 };
2169
2170 #endif // defined(__WXCOCOA__)
2171
2172 // ============================================================================
2173 // Mac conversion classes
2174 // ============================================================================
2175
2176 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2177
2178 class wxMBConv_mac : public wxMBConv
2179 {
2180 public:
2181     wxMBConv_mac()
2182     {
2183         Init(CFStringGetSystemEncoding()) ;
2184     }
2185
2186 #if wxUSE_FONTMAP
2187     wxMBConv_mac(const wxChar* name)
2188     {
2189         Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2190     }
2191 #endif
2192
2193     wxMBConv_mac(wxFontEncoding encoding)
2194     {
2195         Init( wxMacGetSystemEncFromFontEnc(encoding) );
2196     }
2197
2198     ~wxMBConv_mac()
2199     {
2200         OSStatus status = noErr ;
2201         status = TECDisposeConverter(m_MB2WC_converter);
2202         status = TECDisposeConverter(m_WC2MB_converter);
2203     }
2204
2205
2206     void Init( TextEncodingBase encoding)
2207     {
2208         OSStatus status = noErr ;
2209         m_char_encoding = encoding ;
2210         m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2211
2212         status = TECCreateConverter(&m_MB2WC_converter,
2213                                     m_char_encoding,
2214                                     m_unicode_encoding);
2215         status = TECCreateConverter(&m_WC2MB_converter,
2216                                     m_unicode_encoding,
2217                                     m_char_encoding);
2218     }
2219
2220     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2221     {
2222         OSStatus status = noErr ;
2223         ByteCount byteOutLen ;
2224         ByteCount byteInLen = strlen(psz) ;
2225         wchar_t *tbuf = NULL ;
2226         UniChar* ubuf = NULL ;
2227         size_t res = 0 ;
2228
2229         if (buf == NULL)
2230         {
2231             //apple specs say at least 32
2232             n = wxMax( 32 , byteInLen ) ;
2233             tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2234         }
2235         ByteCount byteBufferLen = n * sizeof( UniChar ) ;
2236 #if SIZEOF_WCHAR_T == 4
2237         ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
2238 #else
2239         ubuf = (UniChar*) (buf ? buf : tbuf) ;
2240 #endif
2241         status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2242           (TextPtr) ubuf , byteBufferLen, &byteOutLen);
2243 #if SIZEOF_WCHAR_T == 4
2244         // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2245         // is not properly terminated we get random characters at the end
2246         ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
2247         wxMBConvUTF16BE converter ;
2248         res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2249         free( ubuf ) ;
2250 #else
2251         res = byteOutLen / sizeof( UniChar ) ;
2252 #endif
2253         if ( buf == NULL )
2254              free(tbuf) ;
2255
2256         if ( buf  && res < n)
2257             buf[res] = 0;
2258
2259         return res ;
2260     }
2261
2262     size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2263     {
2264         OSStatus status = noErr ;
2265         ByteCount byteOutLen ;
2266         ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2267
2268         char *tbuf = NULL ;
2269
2270         if (buf == NULL)
2271         {
2272             //apple specs say at least 32
2273             n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2274             tbuf = (char*) malloc( n ) ;
2275         }
2276
2277         ByteCount byteBufferLen = n ;
2278         UniChar* ubuf = NULL ;
2279 #if SIZEOF_WCHAR_T == 4
2280         wxMBConvUTF16BE converter ;
2281         size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2282         byteInLen = unicharlen ;
2283         ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2284         converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
2285 #else
2286         ubuf = (UniChar*) psz ;
2287 #endif
2288         status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2289             (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
2290 #if SIZEOF_WCHAR_T == 4
2291         free( ubuf ) ;
2292 #endif
2293         if ( buf == NULL )
2294             free(tbuf) ;
2295
2296         size_t res = byteOutLen ;
2297         if ( buf  && res < n)
2298         {
2299             buf[res] = 0;
2300
2301             //we need to double-trip to verify it didn't insert any ? in place
2302             //of bogus characters
2303             wxWCharBuffer wcBuf(n);
2304             size_t pszlen = wxWcslen(psz);
2305             if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
2306                         wxWcslen(wcBuf) != pszlen ||
2307                         memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2308             {
2309                 // we didn't obtain the same thing we started from, hence
2310                 // the conversion was lossy and we consider that it failed
2311                 return (size_t)-1;
2312             }
2313         }
2314
2315         return res ;
2316     }
2317
2318     bool IsOk() const
2319         { return m_MB2WC_converter !=  NULL && m_WC2MB_converter != NULL  ; }
2320
2321 private:
2322     TECObjectRef m_MB2WC_converter ;
2323     TECObjectRef m_WC2MB_converter ;
2324
2325     TextEncodingBase m_char_encoding ;
2326     TextEncodingBase m_unicode_encoding ;
2327 };
2328
2329 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2330
2331 // ============================================================================
2332 // wxEncodingConverter based conversion classes
2333 // ============================================================================
2334
2335 #if wxUSE_FONTMAP
2336
2337 class wxMBConv_wxwin : public wxMBConv
2338 {
2339 private:
2340     void Init()
2341     {
2342         m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2343                w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2344     }
2345
2346 public:
2347     // temporarily just use wxEncodingConverter stuff,
2348     // so that it works while a better implementation is built
2349     wxMBConv_wxwin(const wxChar* name)
2350     {
2351         if (name)
2352             m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2353         else
2354             m_enc = wxFONTENCODING_SYSTEM;
2355
2356         Init();
2357     }
2358
2359     wxMBConv_wxwin(wxFontEncoding enc)
2360     {
2361         m_enc = enc;
2362
2363         Init();
2364     }
2365
2366     size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
2367     {
2368         size_t inbuf = strlen(psz);
2369         if (buf)
2370         {
2371             if (!m2w.Convert(psz,buf))
2372                 return (size_t)-1;
2373         }
2374         return inbuf;
2375     }
2376
2377     size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
2378     {
2379         const size_t inbuf = wxWcslen(psz);
2380         if (buf)
2381         {
2382             if (!w2m.Convert(psz,buf))
2383                 return (size_t)-1;
2384         }
2385
2386         return inbuf;
2387     }
2388
2389     bool IsOk() const { return m_ok; }
2390
2391 public:
2392     wxFontEncoding m_enc;
2393     wxEncodingConverter m2w, w2m;
2394
2395     // were we initialized successfully?
2396     bool m_ok;
2397
2398     DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
2399 };
2400
2401 #endif // wxUSE_FONTMAP
2402
2403 // ============================================================================
2404 // wxCSConv implementation
2405 // ============================================================================
2406
2407 void wxCSConv::Init()
2408 {
2409     m_name = NULL;
2410     m_convReal =  NULL;
2411     m_deferred = true;
2412 }
2413
2414 wxCSConv::wxCSConv(const wxChar *charset)
2415 {
2416     Init();
2417
2418     if ( charset )
2419     {
2420         SetName(charset);
2421     }
2422
2423     m_encoding = wxFONTENCODING_SYSTEM;
2424 }
2425
2426 wxCSConv::wxCSConv(wxFontEncoding encoding)
2427 {
2428     if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
2429     {
2430         wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2431
2432         encoding = wxFONTENCODING_SYSTEM;
2433     }
2434
2435     Init();
2436
2437     m_encoding = encoding;
2438 }
2439
2440 wxCSConv::~wxCSConv()
2441 {
2442     Clear();
2443 }
2444
2445 wxCSConv::wxCSConv(const wxCSConv& conv)
2446         : wxMBConv()
2447 {
2448     Init();
2449
2450     SetName(conv.m_name);
2451     m_encoding = conv.m_encoding;
2452 }
2453
2454 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2455 {
2456     Clear();
2457
2458     SetName(conv.m_name);
2459     m_encoding = conv.m_encoding;
2460
2461     return *this;
2462 }
2463
2464 void wxCSConv::Clear()
2465 {
2466     free(m_name);
2467     delete m_convReal;
2468
2469     m_name = NULL;
2470     m_convReal = NULL;
2471 }
2472
2473 void wxCSConv::SetName(const wxChar *charset)
2474 {
2475     if (charset)
2476     {
2477         m_name = wxStrdup(charset);
2478         m_deferred = true;
2479     }
2480 }
2481
2482 wxMBConv *wxCSConv::DoCreate() const
2483 {
2484     // check for the special case of ASCII or ISO8859-1 charset: as we have
2485     // special knowledge of it anyhow, we don't need to create a special
2486     // conversion object
2487     if ( m_encoding == wxFONTENCODING_ISO8859_1 )
2488     {
2489         // don't convert at all
2490         return NULL;
2491     }
2492
2493     // we trust OS to do conversion better than we can so try external
2494     // conversion methods first
2495     //
2496     // the full order is:
2497     //      1. OS conversion (iconv() under Unix or Win32 API)
2498     //      2. hard coded conversions for UTF
2499     //      3. wxEncodingConverter as fall back
2500
2501     // step (1)
2502 #ifdef HAVE_ICONV
2503 #if !wxUSE_FONTMAP
2504     if ( m_name )
2505 #endif // !wxUSE_FONTMAP
2506     {
2507         wxString name(m_name);
2508
2509 #if wxUSE_FONTMAP
2510         if ( name.empty() )
2511             name = wxFontMapperBase::Get()->GetEncodingName(m_encoding);
2512 #endif // wxUSE_FONTMAP
2513
2514         wxMBConv_iconv *conv = new wxMBConv_iconv(name);
2515         if ( conv->IsOk() )
2516             return conv;
2517
2518         delete conv;
2519     }
2520 #endif // HAVE_ICONV
2521
2522 #ifdef wxHAVE_WIN32_MB2WC
2523     {
2524 #if wxUSE_FONTMAP
2525         wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2526                                       : new wxMBConv_win32(m_encoding);
2527         if ( conv->IsOk() )
2528             return conv;
2529
2530         delete conv;
2531 #else
2532         return NULL;
2533 #endif
2534     }
2535 #endif // wxHAVE_WIN32_MB2WC
2536 #if defined(__WXMAC__)
2537     {
2538         // leave UTF16 and UTF32 to the built-ins of wx
2539         if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
2540             ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
2541         {
2542
2543 #if wxUSE_FONTMAP
2544             wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2545                                         : new wxMBConv_mac(m_encoding);
2546 #else
2547             wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
2548 #endif
2549             if ( conv->IsOk() )
2550                  return conv;
2551
2552             delete conv;
2553         }
2554     }
2555 #endif
2556 #if defined(__WXCOCOA__)
2557     {
2558         if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2559         {
2560
2561 #if wxUSE_FONTMAP
2562             wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2563                                           : new wxMBConv_cocoa(m_encoding);
2564 #else
2565             wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
2566 #endif
2567             if ( conv->IsOk() )
2568                  return conv;
2569
2570             delete conv;
2571         }
2572     }
2573 #endif
2574     // step (2)
2575     wxFontEncoding enc = m_encoding;
2576 #if wxUSE_FONTMAP
2577     if ( enc == wxFONTENCODING_SYSTEM && m_name )
2578     {
2579         // use "false" to suppress interactive dialogs -- we can be called from
2580         // anywhere and popping up a dialog from here is the last thing we want to
2581         // do
2582         enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
2583     }
2584 #endif // wxUSE_FONTMAP
2585
2586     switch ( enc )
2587     {
2588         case wxFONTENCODING_UTF7:
2589              return new wxMBConvUTF7;
2590
2591         case wxFONTENCODING_UTF8:
2592              return new wxMBConvUTF8;
2593
2594         case wxFONTENCODING_UTF16BE:
2595              return new wxMBConvUTF16BE;
2596
2597         case wxFONTENCODING_UTF16LE:
2598              return new wxMBConvUTF16LE;
2599
2600         case wxFONTENCODING_UTF32BE:
2601              return new wxMBConvUTF32BE;
2602
2603         case wxFONTENCODING_UTF32LE:
2604              return new wxMBConvUTF32LE;
2605
2606         default:
2607              // nothing to do but put here to suppress gcc warnings
2608              ;
2609     }
2610
2611     // step (3)
2612 #if wxUSE_FONTMAP
2613     {
2614         wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2615                                       : new wxMBConv_wxwin(m_encoding);
2616         if ( conv->IsOk() )
2617             return conv;
2618
2619         delete conv;
2620     }
2621 #endif // wxUSE_FONTMAP
2622
2623     // NB: This is a hack to prevent deadlock. What could otherwise happen
2624     //     in Unicode build: wxConvLocal creation ends up being here
2625     //     because of some failure and logs the error. But wxLog will try to
2626     //     attach timestamp, for which it will need wxConvLocal (to convert
2627     //     time to char* and then wchar_t*), but that fails, tries to log
2628     //     error, but wxLog has a (already locked) critical section that
2629     //     guards static buffer.
2630     static bool alreadyLoggingError = false;
2631     if (!alreadyLoggingError)
2632     {
2633         alreadyLoggingError = true;
2634         wxLogError(_("Cannot convert from the charset '%s'!"),
2635                    m_name ? m_name
2636                       :
2637 #if wxUSE_FONTMAP
2638                          wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
2639 #else // !wxUSE_FONTMAP
2640                          wxString::Format(_("encoding %s"), m_encoding).c_str()
2641 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2642               );
2643         alreadyLoggingError = false;
2644     }
2645
2646     return NULL;
2647 }
2648
2649 void wxCSConv::CreateConvIfNeeded() const
2650 {
2651     if ( m_deferred )
2652     {
2653         wxCSConv *self = (wxCSConv *)this; // const_cast
2654
2655 #if wxUSE_INTL
2656         // if we don't have neither the name nor the encoding, use the default
2657         // encoding for this system
2658         if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2659         {
2660             self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
2661         }
2662 #endif // wxUSE_INTL
2663
2664         self->m_convReal = DoCreate();
2665         self->m_deferred = false;
2666     }
2667 }
2668
2669 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2670 {
2671     CreateConvIfNeeded();
2672
2673     if (m_convReal)
2674         return m_convReal->MB2WC(buf, psz, n);
2675
2676     // latin-1 (direct)
2677     size_t len = strlen(psz);
2678
2679     if (buf)
2680     {
2681         for (size_t c = 0; c <= len; c++)
2682             buf[c] = (unsigned char)(psz[c]);
2683     }
2684
2685     return len;
2686 }
2687
2688 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2689 {
2690     CreateConvIfNeeded();
2691
2692     if (m_convReal)
2693         return m_convReal->WC2MB(buf, psz, n);
2694
2695     // latin-1 (direct)
2696     const size_t len = wxWcslen(psz);
2697     if (buf)
2698     {
2699         for (size_t c = 0; c <= len; c++)
2700         {
2701             if (psz[c] > 0xFF)
2702                 return (size_t)-1;
2703             buf[c] = (char)psz[c];
2704         }
2705     }
2706     else
2707     {
2708         for (size_t c = 0; c <= len; c++)
2709         {
2710             if (psz[c] > 0xFF)
2711                 return (size_t)-1;
2712         }
2713     }
2714
2715     return len;
2716 }
2717
2718 // ----------------------------------------------------------------------------
2719 // globals
2720 // ----------------------------------------------------------------------------
2721
2722 #ifdef __WINDOWS__
2723     static wxMBConv_win32 wxConvLibcObj;
2724 #elif defined(__WXMAC__) && !defined(__MACH__)
2725     static wxMBConv_mac wxConvLibcObj ;
2726 #else
2727     static wxMBConvLibc wxConvLibcObj;
2728 #endif
2729
2730 static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
2731 static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
2732 static wxMBConvUTF7 wxConvUTF7Obj;
2733 static wxMBConvUTF8 wxConvUTF8Obj;
2734 static wxConvBrokenFileNames wxConvBrokenFileNamesObj;
2735
2736 WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
2737 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
2738 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
2739 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
2740 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
2741 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
2742 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
2743 #ifdef __WXOSX__
2744                                     wxConvUTF8Obj;
2745 #elif __WXGTK20__
2746                                     wxConvBrokenFileNamesObj;
2747 #else
2748                                     wxConvLibcObj;
2749 #endif
2750
2751
2752 #else // !wxUSE_WCHAR_T
2753
2754 // stand-ins in absence of wchar_t
2755 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
2756                                 wxConvISO8859_1,
2757                                 wxConvLocal,
2758                                 wxConvUTF8;
2759
2760 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
2761
2762