src/common/strconv.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        strconv.cpp
   3 // Purpose:     Unicode conversion classes
   4 // Author:      Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
   5 //              Ryan Norton, Fredrik Roubert (UTF7)
   6 // Modified by:
   7 // Created:     29/01/98
   8 // RCS-ID:      $Id$
   9 // Copyright:   (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
  10 //              (c) 2000-2003 Vadim Zeitlin
  11 //              (c) 2004 Ryan Norton, Fredrik Roubert
  12 // Licence:     wxWindows licence
  13 /////////////////////////////////////////////////////////////////////////////
  14
  15 // ============================================================================
  16 // declarations
  17 // ============================================================================
  18
  19 // ----------------------------------------------------------------------------
  20 // headers
  21 // ----------------------------------------------------------------------------
  22
  23 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
  24   #pragma implementation "strconv.h"
  25 #endif
  26
  27 // For compilers that support precompilation, includes "wx.h".
  28 #include "wx/wxprec.h"
  29
  30 #ifdef __BORLANDC__
  31   #pragma hdrstop
  32 #endif
  33
  34 #ifndef WX_PRECOMP
  35     #include "wx/intl.h"
  36     #include "wx/log.h"
  37 #endif // WX_PRECOMP
  38
  39 #include "wx/strconv.h"
  40
  41 #if wxUSE_WCHAR_T
  42
  43 #ifdef __WXMSW__
  44     #include "wx/msw/private.h"
  45 #endif
  46
  47 #ifdef __WINDOWS__
  48     #include "wx/msw/missing.h"
  49 #endif
  50
  51 #ifndef __WXWINCE__
  52 #include <errno.h>
  53 #endif
  54
  55 #include <ctype.h>
  56 #include <string.h>
  57 #include <stdlib.h>
  58 #ifdef HAVE_LANGINFO_H
  59   #include <langinfo.h>
  60 #endif
  61
  62 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
  63     #define wxHAVE_WIN32_MB2WC
  64 #endif // __WIN32__ but !__WXMICROWIN__
  65
  66 // ----------------------------------------------------------------------------
  67 // headers
  68 // ----------------------------------------------------------------------------
  69
  70 #ifdef __SALFORDC__
  71     #include <clib.h>
  72 #endif
  73
  74 #ifdef HAVE_ICONV
  75     #include <iconv.h>
  76     #include "wx/thread.h"
  77 #endif
  78
  79 #include "wx/encconv.h"
  80 #include "wx/fontmap.h"
  81 #include "wx/utils.h"
  82
  83 #ifdef __WXMAC__
  84 #include <ATSUnicode.h>
  85 #include <TextCommon.h>
  86 #include <TextEncodingConverter.h>
  87
  88 #include  "wx/mac/private.h"  // includes mac headers
  89 #endif
  90 // ----------------------------------------------------------------------------
  91 // macros
  92 // ----------------------------------------------------------------------------
  93
  94 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
  95 #define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
  96
  97 #if SIZEOF_WCHAR_T == 4
  98     #define WC_NAME         "UCS4"
  99     #define WC_BSWAP         BSWAP_UCS4
 100     #ifdef WORDS_BIGENDIAN
 101       #define WC_NAME_BEST  "UCS-4BE"
 102     #else
 103       #define WC_NAME_BEST  "UCS-4LE"
 104     #endif
 105 #elif SIZEOF_WCHAR_T == 2
 106     #define WC_NAME         "UTF16"
 107     #define WC_BSWAP         BSWAP_UTF16
 108     #define WC_UTF16
 109     #ifdef WORDS_BIGENDIAN
 110       #define WC_NAME_BEST  "UTF-16BE"
 111     #else
 112       #define WC_NAME_BEST  "UTF-16LE"
 113     #endif
 114 #else // sizeof(wchar_t) != 2 nor 4
 115     // does this ever happen?
 116     #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
 117 #endif
 118
 119 // ============================================================================
 120 // implementation
 121 // ============================================================================
 122
 123 // ----------------------------------------------------------------------------
 124 // UTF-16 en/decoding to/from UCS-4
 125 // ----------------------------------------------------------------------------
 126
 127
 128 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
 129 {
 130     if (input<=0xffff)
 131     {
 132         if (output)
 133             *output = (wxUint16) input;
 134         return 1;
 135     }
 136     else if (input>=0x110000)
 137     {
 138         return (size_t)-1;
 139     }
 140     else
 141     {
 142         if (output)
 143         {
 144             *output++ = (wxUint16) ((input >> 10)+0xd7c0);
 145             *output = (wxUint16) ((input&0x3ff)+0xdc00);
 146         }
 147         return 2;
 148     }
 149 }
 150
 151 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
 152 {
 153     if ((*input<0xd800) || (*input>0xdfff))
 154     {
 155         output = *input;
 156         return 1;
 157     }
 158     else if ((input[1]<0xdc00) || (input[1]>=0xdfff))
 159     {
 160         output = *input;
 161         return (size_t)-1;
 162     }
 163     else
 164     {
 165         output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
 166         return 2;
 167     }
 168 }
 169
 170
 171 // ----------------------------------------------------------------------------
 172 // wxMBConv
 173 // ----------------------------------------------------------------------------
 174
 175 wxMBConv::~wxMBConv()
 176 {
 177     // nothing to do here (necessary for Darwin linking probably)
 178 }
 179
 180 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
 181 {
 182     if ( psz )
 183     {
 184         // calculate the length of the buffer needed first
 185         size_t nLen = MB2WC(NULL, psz, 0);
 186         if ( nLen != (size_t)-1 )
 187         {
 188             // now do the actual conversion
 189             wxWCharBuffer buf(nLen);
 190             nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
 191             if ( nLen != (size_t)-1 )
 192             {
 193                 return buf;
 194             }
 195         }
 196     }
 197
 198     wxWCharBuffer buf((wchar_t *)NULL);
 199
 200     return buf;
 201 }
 202
 203 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
 204 {
 205     if ( pwz )
 206     {
 207         size_t nLen = WC2MB(NULL, pwz, 0);
 208         if ( nLen != (size_t)-1 )
 209         {
 210             wxCharBuffer buf(nLen+3);       // space for a wxUint32 trailing zero
 211             nLen = WC2MB(buf.data(), pwz, nLen + 4);
 212             if ( nLen != (size_t)-1 )
 213             {
 214                 return buf;
 215             }
 216         }
 217     }
 218
 219     wxCharBuffer buf((char *)NULL);
 220
 221     return buf;
 222 }
 223
 224 const wxWCharBuffer wxMBConv::cMB2WC(const char *szString, size_t nStringLen, size_t* pOutSize) const
 225 {
 226     wxASSERT(pOutSize != NULL);
 227
 228     const char* szEnd = szString + nStringLen + 1;
 229     const char* szPos = szString;
 230     const char* szStart = szPos;
 231
 232     size_t nActualLength = 0;
 233     size_t nCurrentSize = nStringLen; //try normal size first (should never resize?)
 234
 235     wxWCharBuffer theBuffer(nCurrentSize);
 236
 237     //Convert the string until the length() is reached, continuing the
 238     //loop every time a null character is reached
 239     while(szPos != szEnd)
 240     {
 241         wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
 242
 243         //Get the length of the current (sub)string
 244         size_t nLen = MB2WC(NULL, szPos, 0);
 245
 246         //Invalid conversion?
 247         if( nLen == (size_t)-1 )
 248         {
 249             *pOutSize = 0;
 250             theBuffer.data()[0u] = wxT('\0');
 251             return theBuffer;
 252         }
 253
 254
 255         //Increase the actual length (+1 for current null character)
 256         nActualLength += nLen + 1;
 257
 258         //if buffer too big, realloc the buffer
 259         if (nActualLength > (nCurrentSize+1))
 260         {
 261             wxWCharBuffer theNewBuffer(nCurrentSize << 1);
 262             memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize * sizeof(wchar_t));
 263             theBuffer = theNewBuffer;
 264             nCurrentSize <<= 1;
 265         }
 266
 267         //Convert the current (sub)string
 268         if ( MB2WC(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
 269         {
 270             *pOutSize = 0;
 271             theBuffer.data()[0u] = wxT('\0');
 272             return theBuffer;
 273         }
 274
 275         //Increment to next (sub)string
 276         //Note that we have to use strlen here instead of nLen
 277         //here because XX2XX gives us the size of the output buffer,
 278         //not neccessarly the length of the string
 279         szPos += strlen(szPos) + 1;
 280     }
 281
 282     //success - return actual length and the buffer
 283     *pOutSize = nActualLength;
 284     return theBuffer;
 285 }
 286
 287 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *szString, size_t nStringLen, size_t* pOutSize) const
 288 {
 289     wxASSERT(pOutSize != NULL);
 290
 291     const wchar_t* szEnd = szString + nStringLen + 1;
 292     const wchar_t* szPos = szString;
 293     const wchar_t* szStart = szPos;
 294
 295     size_t nActualLength = 0;
 296     size_t nCurrentSize = nStringLen << 2; //try * 4 first
 297
 298     wxCharBuffer theBuffer(nCurrentSize);
 299
 300     //Convert the string until the length() is reached, continuing the
 301     //loop every time a null character is reached
 302     while(szPos != szEnd)
 303     {
 304         wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
 305
 306         //Get the length of the current (sub)string
 307         size_t nLen = WC2MB(NULL, szPos, 0);
 308
 309         //Invalid conversion?
 310         if( nLen == (size_t)-1 )
 311         {
 312             *pOutSize = 0;
 313             theBuffer.data()[0u] = wxT('\0');
 314             return theBuffer;
 315         }
 316
 317         //Increase the actual length (+1 for current null character)
 318         nActualLength += nLen + 1;
 319
 320         //if buffer too big, realloc the buffer
 321         if (nActualLength > (nCurrentSize+1))
 322         {
 323             wxCharBuffer theNewBuffer(nCurrentSize << 1);
 324             memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize);
 325             theBuffer = theNewBuffer;
 326             nCurrentSize <<= 1;
 327         }
 328
 329         //Convert the current (sub)string
 330         if(WC2MB(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
 331         {
 332             *pOutSize = 0;
 333             theBuffer.data()[0u] = wxT('\0');
 334             return theBuffer;
 335         }
 336
 337         //Increment to next (sub)string
 338         //Note that we have to use wxWcslen here instead of nLen
 339         //here because XX2XX gives us the size of the output buffer,
 340         //not neccessarly the length of the string
 341         szPos += wxWcslen(szPos) + 1;
 342     }
 343
 344     //success - return actual length and the buffer
 345     *pOutSize = nActualLength;
 346     return theBuffer;
 347 }
 348
 349 // ----------------------------------------------------------------------------
 350 // wxMBConvLibc
 351 // ----------------------------------------------------------------------------
 352
 353 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 354 {
 355     return wxMB2WC(buf, psz, n);
 356 }
 357
 358 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 359 {
 360     return wxWC2MB(buf, psz, n);
 361 }
 362
 363 // ----------------------------------------------------------------------------
 364 // wxConvBrokenFileNames is made for GTK2 in Unicode mode when
 365 // files are accidentally written in an encoding which is not
 366 // the system encoding. Typically, the system encoding will be
 367 // UTF8 but there might be files stored in ISO8859-1 on disk.
 368 // ----------------------------------------------------------------------------
 369
 370 class wxConvBrokenFileNames: public wxMBConvLibc
 371 {
 372 public:
 373     wxConvBrokenFileNames() : m_utf8conv(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL) { }
 374     virtual size_t MB2WC(wchar_t *outputBuf, const char *psz, size_t outputSize) const;
 375     virtual size_t WC2MB(char *outputBuf, const wchar_t *psz, size_t outputSize) const;
 376     inline bool UseUTF8() const;
 377 private:
 378     wxMBConvUTF8 m_utf8conv;
 379 };
 380
 381 bool wxConvBrokenFileNames::UseUTF8() const
 382 {
 383 #if defined HAVE_LANGINFO_H && defined CODESET
 384     char *codeset = nl_langinfo(CODESET);
 385     return strcmp(codeset, "UTF-8") == 0;
 386 #else
 387     return false;
 388 #endif
 389 }
 390
 391 size_t wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf, const char *psz, size_t outputSize) const
 392 {
 393     if (UseUTF8())
 394         return m_utf8conv.MB2WC( outputBuf, psz, outputSize );
 395     else
 396         return wxMBConvLibc::MB2WC( outputBuf, psz, outputSize );
 397 }
 398
 399 size_t wxConvBrokenFileNames::WC2MB(char *outputBuf, const wchar_t *psz, size_t outputSize) const
 400 {
 401     if (UseUTF8())
 402         return m_utf8conv.WC2MB( outputBuf, psz, outputSize );
 403     else
 404         return wxMBConvLibc::WC2MB( outputBuf, psz, outputSize );
 405 }
 406
 407 // ----------------------------------------------------------------------------
 408 // UTF-7
 409 // ----------------------------------------------------------------------------
 410
 411 // Implementation (C) 2004 Fredrik Roubert
 412
 413 //
 414 // BASE64 decoding table
 415 //
 416 static const unsigned char utf7unb64[] =
 417 {
 418     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 419     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 420     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 421     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 422     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 423     0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
 424     0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
 425     0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 426     0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
 427     0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
 428     0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
 429     0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
 430     0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
 431     0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
 432     0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
 433     0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
 434     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 435     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 436     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 437     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 438     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 439     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 440     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 441     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 442     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 443     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 444     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 445     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 446     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 447     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 448     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 449     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 450 };
 451
 452 size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 453 {
 454     size_t len = 0;
 455
 456     while (*psz && ((!buf) || (len < n)))
 457     {
 458         unsigned char cc = *psz++;
 459         if (cc != '+')
 460         {
 461             // plain ASCII char
 462             if (buf)
 463                 *buf++ = cc;
 464             len++;
 465         }
 466         else if (*psz == '-')
 467         {
 468             // encoded plus sign
 469             if (buf)
 470                 *buf++ = cc;
 471             len++;
 472             psz++;
 473         }
 474         else
 475         {
 476             // BASE64 encoded string
 477             bool lsb;
 478             unsigned char c;
 479             unsigned int d, l;
 480             for (lsb = false, d = 0, l = 0;
 481                 (cc = utf7unb64[(unsigned char)*psz]) != 0xff; psz++)
 482             {
 483                 d <<= 6;
 484                 d += cc;
 485                 for (l += 6; l >= 8; lsb = !lsb)
 486                 {
 487                     c = (unsigned char)((d >> (l -= 8)) % 256);
 488                     if (lsb)
 489                     {
 490                         if (buf)
 491                             *buf++ |= c;
 492                         len ++;
 493                     }
 494                     else
 495                         if (buf)
 496                             *buf = (wchar_t)(c << 8);
 497                 }
 498             }
 499             if (*psz == '-')
 500                 psz++;
 501         }
 502     }
 503     if (buf && (len < n))
 504         *buf = 0;
 505     return len;
 506 }
 507
 508 //
 509 // BASE64 encoding table
 510 //
 511 static const unsigned char utf7enb64[] =
 512 {
 513     'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
 514     'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
 515     'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
 516     'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
 517     'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
 518     'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
 519     'w', 'x', 'y', 'z', '0', '1', '2', '3',
 520     '4', '5', '6', '7', '8', '9', '+', '/'
 521 };
 522
 523 //
 524 // UTF-7 encoding table
 525 //
 526 // 0 - Set D (directly encoded characters)
 527 // 1 - Set O (optional direct characters)
 528 // 2 - whitespace characters (optional)
 529 // 3 - special characters
 530 //
 531 static const unsigned char utf7encode[128] =
 532 {
 533     3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
 534     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 535     2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
 536     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
 537     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 538     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
 539     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 540     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
 541 };
 542
 543 size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 544 {
 545
 546
 547     size_t len = 0;
 548
 549     while (*psz && ((!buf) || (len < n)))
 550     {
 551         wchar_t cc = *psz++;
 552         if (cc < 0x80 && utf7encode[cc] < 1)
 553         {
 554             // plain ASCII char
 555             if (buf)
 556                 *buf++ = (char)cc;
 557             len++;
 558         }
 559 #ifndef WC_UTF16
 560         else if (((wxUint32)cc) > 0xffff)
 561             {
 562             // no surrogate pair generation (yet?)
 563             return (size_t)-1;
 564         }
 565 #endif
 566         else
 567         {
 568             if (buf)
 569                 *buf++ = '+';
 570             len++;
 571             if (cc != '+')
 572             {
 573                 // BASE64 encode string
 574                 unsigned int lsb, d, l;
 575                 for (d = 0, l = 0;; psz++)
 576                 {
 577                     for (lsb = 0; lsb < 2; lsb ++)
 578                     {
 579                         d <<= 8;
 580                         d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
 581
 582                         for (l += 8; l >= 6; )
 583                         {
 584                             l -= 6;
 585                             if (buf)
 586                                 *buf++ = utf7enb64[(d >> l) % 64];
 587                             len++;
 588                         }
 589                     }
 590                     cc = *psz;
 591                     if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
 592                         break;
 593                 }
 594                 if (l != 0)
 595                 {
 596                     if (buf)
 597                         *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
 598                     len++;
 599                 }
 600             }
 601             if (buf)
 602                 *buf++ = '-';
 603             len++;
 604         }
 605     }
 606     if (buf && (len < n))
 607         *buf = 0;
 608     return len;
 609 }
 610
 611 // ----------------------------------------------------------------------------
 612 // UTF-8
 613 // ----------------------------------------------------------------------------
 614
 615 static wxUint32 utf8_max[]=
 616     { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
 617
 618 // boundaries of the private use area we use to (temporarily) remap invalid
 619 // characters invalid in a UTF-8 encoded string
 620 const wxUint32 wxUnicodePUA = 0x100000;
 621 const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
 622
 623 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 624 {
 625     size_t len = 0;
 626
 627     while (*psz && ((!buf) || (len < n)))
 628     {
 629         const char *opsz = psz;
 630         bool invalid = false;
 631         unsigned char cc = *psz++, fc = cc;
 632         unsigned cnt;
 633         for (cnt = 0; fc & 0x80; cnt++)
 634             fc <<= 1;
 635         if (!cnt)
 636         {
 637             // plain ASCII char
 638             if (buf)
 639                 *buf++ = cc;
 640             len++;
 641         }
 642         else
 643         {
 644             cnt--;
 645             if (!cnt)
 646             {
 647                 // invalid UTF-8 sequence
 648                 invalid = true;
 649             }
 650             else
 651             {
 652                 unsigned ocnt = cnt - 1;
 653                 wxUint32 res = cc & (0x3f >> cnt);
 654                 while (cnt--)
 655                 {
 656                     cc = *psz;
 657                     if ((cc & 0xC0) != 0x80)
 658                     {
 659                         // invalid UTF-8 sequence
 660                         invalid = true;
 661                         break;
 662                     }
 663                     psz++;
 664                     res = (res << 6) | (cc & 0x3f);
 665                 }
 666                 if (invalid || res <= utf8_max[ocnt])
 667                 {
 668                     // illegal UTF-8 encoding
 669                     invalid = true;
 670                 }
 671                 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
 672                         res >= wxUnicodePUA && res < wxUnicodePUAEnd)
 673                 {
 674                     // if one of our PUA characters turns up externally
 675                     // it must also be treated as an illegal sequence
 676                     // (a bit like you have to escape an escape character)
 677                     invalid = true;
 678                 }
 679                 else
 680                 {
 681 #ifdef WC_UTF16
 682                     // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 683                     size_t pa = encode_utf16(res, (wxUint16 *)buf);
 684                     if (pa == (size_t)-1)
 685                     {
 686                         invalid = true;
 687                     }
 688                     else
 689                     {
 690                         if (buf)
 691                             buf += pa;
 692                         len += pa;
 693                     }
 694 #else // !WC_UTF16
 695                     if (buf)
 696                         *buf++ = res;
 697                     len++;
 698 #endif // WC_UTF16/!WC_UTF16
 699                 }
 700             }
 701             if (invalid)
 702             {
 703                 if (m_options & MAP_INVALID_UTF8_TO_PUA)
 704                 {
 705                     while (opsz < psz && (!buf || len < n))
 706                     {
 707 #ifdef WC_UTF16
 708                         // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 709                         size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
 710                         wxASSERT(pa != (size_t)-1);
 711                         if (buf)
 712                             buf += pa;
 713                         opsz++;
 714                         len += pa;
 715 #else
 716                         if (buf)
 717                             *buf++ = wxUnicodePUA + (unsigned char)*opsz;
 718                         opsz++;
 719                         len++;
 720 #endif
 721                     }
 722                 }
 723                 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
 724                 {
 725                     while (opsz < psz && (!buf || len < n))
 726                     {
 727                         if ( buf && len + 3 < n )
 728                         {
 729                             unsigned char n = *opsz;
 730                             *buf++ = L'\\';
 731                             *buf++ = L'0' + n / 0100;
 732                             *buf++ = L'0' + (n % 0100) / 010;
 733                             *buf++ = L'0' + n % 010;
 734                         }
 735                         opsz++;
 736                         len += 4;
 737                     }
 738                 }
 739                 else // MAP_INVALID_UTF8_NOT
 740                 {
 741                     return (size_t)-1;
 742                 }
 743             }
 744         }
 745     }
 746     if (buf && (len < n))
 747         *buf = 0;
 748     return len;
 749 }
 750
 751 static inline bool isoctal(wchar_t wch)
 752 {
 753     return L'0' <= wch && wch <= L'7';
 754 }
 755
 756 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 757 {
 758     size_t len = 0;
 759
 760     while (*psz && ((!buf) || (len < n)))
 761     {
 762         wxUint32 cc;
 763 #ifdef WC_UTF16
 764         // cast is ok for WC_UTF16
 765         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
 766         psz += (pa == (size_t)-1) ? 1 : pa;
 767 #else
 768         cc=(*psz++) & 0x7fffffff;
 769 #endif
 770
 771         if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
 772                 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
 773         {
 774             if (buf)
 775                 *buf++ = (char)(cc - wxUnicodePUA);
 776             len++;
 777         }
 778         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
 779                     cc == L'\\' &&
 780                         isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
 781         {
 782             if (buf)
 783             {
 784                 *buf++ = (char) (psz[0] - L'0')*0100 +
 785                                 (psz[1] - L'0')*010 +
 786                                 (psz[2] - L'0');
 787             }
 788
 789             psz += 3;
 790             len++;
 791         }
 792         else
 793         {
 794             unsigned cnt;
 795             for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
 796             if (!cnt)
 797             {
 798                 // plain ASCII char
 799                 if (buf)
 800                     *buf++ = (char) cc;
 801                 len++;
 802             }
 803
 804             else
 805             {
 806                 len += cnt + 1;
 807                 if (buf)
 808                 {
 809                     *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
 810                     while (cnt--)
 811                         *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
 812                 }
 813             }
 814         }
 815     }
 816
 817     if (buf && (len<n))
 818         *buf = 0;
 819
 820     return len;
 821 }
 822
 823 // ----------------------------------------------------------------------------
 824 // UTF-16
 825 // ----------------------------------------------------------------------------
 826
 827 #ifdef WORDS_BIGENDIAN
 828     #define wxMBConvUTF16straight wxMBConvUTF16BE
 829     #define wxMBConvUTF16swap     wxMBConvUTF16LE
 830 #else
 831     #define wxMBConvUTF16swap     wxMBConvUTF16BE
 832     #define wxMBConvUTF16straight wxMBConvUTF16LE
 833 #endif
 834
 835
 836 #ifdef WC_UTF16
 837
 838 // copy 16bit MB to 16bit String
 839 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 840 {
 841     size_t len=0;
 842
 843     while (*(wxUint16*)psz && (!buf || len < n))
 844     {
 845         if (buf)
 846             *buf++ = *(wxUint16*)psz;
 847         len++;
 848
 849         psz += sizeof(wxUint16);
 850     }
 851     if (buf && len<n)   *buf=0;
 852
 853     return len;
 854 }
 855
 856
 857 // copy 16bit String to 16bit MB
 858 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 859 {
 860     size_t len=0;
 861
 862     while (*psz && (!buf || len < n))
 863     {
 864         if (buf)
 865         {
 866             *(wxUint16*)buf = *psz;
 867             buf += sizeof(wxUint16);
 868         }
 869         len += sizeof(wxUint16);
 870         psz++;
 871     }
 872     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 873
 874     return len;
 875 }
 876
 877
 878 // swap 16bit MB to 16bit String
 879 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 880 {
 881     size_t len=0;
 882
 883     while (*(wxUint16*)psz && (!buf || len < n))
 884     {
 885         if (buf)
 886         {
 887             ((char *)buf)[0] = psz[1];
 888             ((char *)buf)[1] = psz[0];
 889             buf++;
 890         }
 891         len++;
 892         psz += sizeof(wxUint16);
 893     }
 894     if (buf && len<n)   *buf=0;
 895
 896     return len;
 897 }
 898
 899
 900 // swap 16bit MB to 16bit String
 901 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 902 {
 903     size_t len=0;
 904
 905     while (*psz && (!buf || len < n))
 906     {
 907         if (buf)
 908         {
 909             *buf++ = ((char*)psz)[1];
 910             *buf++ = ((char*)psz)[0];
 911         }
 912         len += sizeof(wxUint16);
 913         psz++;
 914     }
 915     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 916
 917     return len;
 918 }
 919
 920
 921 #else // WC_UTF16
 922
 923
 924 // copy 16bit MB to 32bit String
 925 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 926 {
 927     size_t len=0;
 928
 929     while (*(wxUint16*)psz && (!buf || len < n))
 930     {
 931         wxUint32 cc;
 932         size_t pa=decode_utf16((wxUint16*)psz, cc);
 933         if (pa == (size_t)-1)
 934             return pa;
 935
 936         if (buf)
 937             *buf++ = cc;
 938         len++;
 939         psz += pa * sizeof(wxUint16);
 940     }
 941     if (buf && len<n)   *buf=0;
 942
 943     return len;
 944 }
 945
 946
 947 // copy 32bit String to 16bit MB
 948 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 949 {
 950     size_t len=0;
 951
 952     while (*psz && (!buf || len < n))
 953     {
 954         wxUint16 cc[2];
 955         size_t pa=encode_utf16(*psz, cc);
 956
 957         if (pa == (size_t)-1)
 958             return pa;
 959
 960         if (buf)
 961         {
 962             *(wxUint16*)buf = cc[0];
 963             buf += sizeof(wxUint16);
 964             if (pa > 1)
 965             {
 966                 *(wxUint16*)buf = cc[1];
 967                 buf += sizeof(wxUint16);
 968             }
 969         }
 970
 971         len += pa*sizeof(wxUint16);
 972         psz++;
 973     }
 974     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 975
 976     return len;
 977 }
 978
 979
 980 // swap 16bit MB to 32bit String
 981 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 982 {
 983     size_t len=0;
 984
 985     while (*(wxUint16*)psz && (!buf || len < n))
 986     {
 987         wxUint32 cc;
 988         char tmp[4];
 989         tmp[0]=psz[1];  tmp[1]=psz[0];
 990         tmp[2]=psz[3];  tmp[3]=psz[2];
 991
 992         size_t pa=decode_utf16((wxUint16*)tmp, cc);
 993         if (pa == (size_t)-1)
 994             return pa;
 995
 996         if (buf)
 997             *buf++ = cc;
 998
 999         len++;
1000         psz += pa * sizeof(wxUint16);
1001     }
1002     if (buf && len<n)   *buf=0;
1003
1004     return len;
1005 }
1006
1007
1008 // swap 32bit String to 16bit MB
1009 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1010 {
1011     size_t len=0;
1012
1013     while (*psz && (!buf || len < n))
1014     {
1015         wxUint16 cc[2];
1016         size_t pa=encode_utf16(*psz, cc);
1017
1018         if (pa == (size_t)-1)
1019             return pa;
1020
1021         if (buf)
1022         {
1023             *buf++ = ((char*)cc)[1];
1024             *buf++ = ((char*)cc)[0];
1025             if (pa > 1)
1026             {
1027                 *buf++ = ((char*)cc)[3];
1028                 *buf++ = ((char*)cc)[2];
1029             }
1030         }
1031
1032         len += pa*sizeof(wxUint16);
1033         psz++;
1034     }
1035     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
1036
1037     return len;
1038 }
1039
1040 #endif // WC_UTF16
1041
1042
1043 // ----------------------------------------------------------------------------
1044 // UTF-32
1045 // ----------------------------------------------------------------------------
1046
1047 #ifdef WORDS_BIGENDIAN
1048 #define wxMBConvUTF32straight  wxMBConvUTF32BE
1049 #define wxMBConvUTF32swap      wxMBConvUTF32LE
1050 #else
1051 #define wxMBConvUTF32swap      wxMBConvUTF32BE
1052 #define wxMBConvUTF32straight  wxMBConvUTF32LE
1053 #endif
1054
1055
1056 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1057 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1058
1059
1060 #ifdef WC_UTF16
1061
1062 // copy 32bit MB to 16bit String
1063 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1064 {
1065     size_t len=0;
1066
1067     while (*(wxUint32*)psz && (!buf || len < n))
1068     {
1069         wxUint16 cc[2];
1070
1071         size_t pa=encode_utf16(*(wxUint32*)psz, cc);
1072         if (pa == (size_t)-1)
1073             return pa;
1074
1075         if (buf)
1076         {
1077             *buf++ = cc[0];
1078             if (pa > 1)
1079                 *buf++ = cc[1];
1080         }
1081         len += pa;
1082         psz += sizeof(wxUint32);
1083     }
1084     if (buf && len<n)   *buf=0;
1085
1086     return len;
1087 }
1088
1089
1090 // copy 16bit String to 32bit MB
1091 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1092 {
1093     size_t len=0;
1094
1095     while (*psz && (!buf || len < n))
1096     {
1097         wxUint32 cc;
1098
1099         // cast is ok for WC_UTF16
1100         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
1101         if (pa == (size_t)-1)
1102             return pa;
1103
1104         if (buf)
1105         {
1106             *(wxUint32*)buf = cc;
1107             buf += sizeof(wxUint32);
1108         }
1109         len += sizeof(wxUint32);
1110         psz += pa;
1111     }
1112
1113     if (buf && len<=n-sizeof(wxUint32))
1114         *(wxUint32*)buf=0;
1115
1116     return len;
1117 }
1118
1119
1120
1121 // swap 32bit MB to 16bit String
1122 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1123 {
1124     size_t len=0;
1125
1126     while (*(wxUint32*)psz && (!buf || len < n))
1127     {
1128         char tmp[4];
1129         tmp[0] = psz[3];   tmp[1] = psz[2];
1130         tmp[2] = psz[1];   tmp[3] = psz[0];
1131
1132
1133         wxUint16 cc[2];
1134
1135         size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
1136         if (pa == (size_t)-1)
1137             return pa;
1138
1139         if (buf)
1140         {
1141             *buf++ = cc[0];
1142             if (pa > 1)
1143                 *buf++ = cc[1];
1144         }
1145         len += pa;
1146         psz += sizeof(wxUint32);
1147     }
1148
1149     if (buf && len<n)
1150         *buf=0;
1151
1152     return len;
1153 }
1154
1155
1156 // swap 16bit String to 32bit MB
1157 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1158 {
1159     size_t len=0;
1160
1161     while (*psz && (!buf || len < n))
1162     {
1163         char cc[4];
1164
1165         // cast is ok for WC_UTF16
1166         size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
1167         if (pa == (size_t)-1)
1168             return pa;
1169
1170         if (buf)
1171         {
1172             *buf++ = cc[3];
1173             *buf++ = cc[2];
1174             *buf++ = cc[1];
1175             *buf++ = cc[0];
1176         }
1177         len += sizeof(wxUint32);
1178         psz += pa;
1179     }
1180
1181     if (buf && len<=n-sizeof(wxUint32))
1182         *(wxUint32*)buf=0;
1183
1184     return len;
1185 }
1186
1187 #else // WC_UTF16
1188
1189
1190 // copy 32bit MB to 32bit String
1191 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1192 {
1193     size_t len=0;
1194
1195     while (*(wxUint32*)psz && (!buf || len < n))
1196     {
1197         if (buf)
1198             *buf++ = *(wxUint32*)psz;
1199         len++;
1200         psz += sizeof(wxUint32);
1201     }
1202
1203     if (buf && len<n)
1204         *buf=0;
1205
1206     return len;
1207 }
1208
1209
1210 // copy 32bit String to 32bit MB
1211 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1212 {
1213     size_t len=0;
1214
1215     while (*psz && (!buf || len < n))
1216     {
1217         if (buf)
1218         {
1219             *(wxUint32*)buf = *psz;
1220             buf += sizeof(wxUint32);
1221         }
1222
1223         len += sizeof(wxUint32);
1224         psz++;
1225     }
1226
1227     if (buf && len<=n-sizeof(wxUint32))
1228         *(wxUint32*)buf=0;
1229
1230     return len;
1231 }
1232
1233
1234 // swap 32bit MB to 32bit String
1235 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1236 {
1237     size_t len=0;
1238
1239     while (*(wxUint32*)psz && (!buf || len < n))
1240     {
1241         if (buf)
1242         {
1243             ((char *)buf)[0] = psz[3];
1244             ((char *)buf)[1] = psz[2];
1245             ((char *)buf)[2] = psz[1];
1246             ((char *)buf)[3] = psz[0];
1247             buf++;
1248         }
1249         len++;
1250         psz += sizeof(wxUint32);
1251     }
1252
1253     if (buf && len<n)
1254         *buf=0;
1255
1256     return len;
1257 }
1258
1259
1260 // swap 32bit String to 32bit MB
1261 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1262 {
1263     size_t len=0;
1264
1265     while (*psz && (!buf || len < n))
1266     {
1267         if (buf)
1268         {
1269             *buf++ = ((char *)psz)[3];
1270             *buf++ = ((char *)psz)[2];
1271             *buf++ = ((char *)psz)[1];
1272             *buf++ = ((char *)psz)[0];
1273         }
1274         len += sizeof(wxUint32);
1275         psz++;
1276     }
1277
1278     if (buf && len<=n-sizeof(wxUint32))
1279         *(wxUint32*)buf=0;
1280
1281     return len;
1282 }
1283
1284
1285 #endif // WC_UTF16
1286
1287
1288 // ============================================================================
1289 // The classes doing conversion using the iconv_xxx() functions
1290 // ============================================================================
1291
1292 #ifdef HAVE_ICONV
1293
1294 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1295 //     E2BIG if output buffer is _exactly_ as big as needed. Such case is
1296 //     (unless there's yet another bug in glibc) the only case when iconv()
1297 //     returns with (size_t)-1 (which means error) and says there are 0 bytes
1298 //     left in the input buffer -- when _real_ error occurs,
1299 //     bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1300 //     iconv() failure.
1301 //     [This bug does not appear in glibc 2.2.]
1302 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1303 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1304                                      (errno != E2BIG || bufLeft != 0))
1305 #else
1306 #define ICONV_FAILED(cres, bufLeft)  (cres == (size_t)-1)
1307 #endif
1308
1309 #define ICONV_CHAR_CAST(x)  ((ICONV_CONST char **)(x))
1310
1311 // ----------------------------------------------------------------------------
1312 // wxMBConv_iconv: encapsulates an iconv character set
1313 // ----------------------------------------------------------------------------
1314
1315 class wxMBConv_iconv : public wxMBConv
1316 {
1317 public:
1318     wxMBConv_iconv(const wxChar *name);
1319     virtual ~wxMBConv_iconv();
1320
1321     virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1322     virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
1323
1324     bool IsOk() const
1325         { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
1326
1327 protected:
1328     // the iconv handlers used to translate from multibyte to wide char and in
1329     // the other direction
1330     iconv_t m2w,
1331             w2m;
1332 #if wxUSE_THREADS
1333     // guards access to m2w and w2m objects
1334     wxMutex m_iconvMutex;
1335 #endif
1336
1337 private:
1338     // the name (for iconv_open()) of a wide char charset -- if none is
1339     // available on this machine, it will remain NULL
1340     static const char *ms_wcCharsetName;
1341
1342     // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1343     // different endian-ness than the native one
1344     static bool ms_wcNeedsSwap;
1345 };
1346
1347 const char *wxMBConv_iconv::ms_wcCharsetName = NULL;
1348 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
1349
1350 wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
1351 {
1352     // Do it the hard way
1353     char cname[100];
1354     for (size_t i = 0; i < wxStrlen(name)+1; i++)
1355         cname[i] = (char) name[i];
1356
1357     // check for charset that represents wchar_t:
1358     if (ms_wcCharsetName == NULL)
1359     {
1360         ms_wcNeedsSwap = false;
1361
1362         // try charset with explicit bytesex info (e.g. "UCS-4LE"):
1363         ms_wcCharsetName = WC_NAME_BEST;
1364         m2w = iconv_open(ms_wcCharsetName, cname);
1365
1366         if (m2w == (iconv_t)-1)
1367         {
1368             // try charset w/o bytesex info (e.g. "UCS4")
1369             // and check for bytesex ourselves:
1370             ms_wcCharsetName = WC_NAME;
1371             m2w = iconv_open(ms_wcCharsetName, cname);
1372
1373             // last bet, try if it knows WCHAR_T pseudo-charset
1374             if (m2w == (iconv_t)-1)
1375             {
1376                 ms_wcCharsetName = "WCHAR_T";
1377                 m2w = iconv_open(ms_wcCharsetName, cname);
1378             }
1379
1380             if (m2w != (iconv_t)-1)
1381             {
1382                 char    buf[2], *bufPtr;
1383                 wchar_t wbuf[2], *wbufPtr;
1384                 size_t  insz, outsz;
1385                 size_t  res;
1386
1387                 buf[0] = 'A';
1388                 buf[1] = 0;
1389                 wbuf[0] = 0;
1390                 insz = 2;
1391                 outsz = SIZEOF_WCHAR_T * 2;
1392                 wbufPtr = wbuf;
1393                 bufPtr = buf;
1394
1395                 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1396                             (char**)&wbufPtr, &outsz);
1397
1398                 if (ICONV_FAILED(res, insz))
1399                 {
1400                     ms_wcCharsetName = NULL;
1401                     wxLogLastError(wxT("iconv"));
1402                     wxLogError(_("Conversion to charset '%s' doesn't work."), name);
1403                 }
1404                 else
1405                 {
1406                     ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1407                 }
1408             }
1409             else
1410             {
1411                 ms_wcCharsetName = NULL;
1412
1413                 // VS: we must not output an error here, since wxWidgets will safely
1414                 //     fall back to using wxEncodingConverter.
1415                 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
1416                 //wxLogError(
1417             }
1418         }
1419         wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
1420     }
1421     else // we already have ms_wcCharsetName
1422     {
1423         m2w = iconv_open(ms_wcCharsetName, cname);
1424     }
1425
1426     // NB: don't ever pass NULL to iconv_open(), it may crash!
1427     if ( ms_wcCharsetName )
1428     {
1429         w2m = iconv_open( cname, ms_wcCharsetName);
1430     }
1431     else
1432     {
1433         w2m = (iconv_t)-1;
1434     }
1435 }
1436
1437 wxMBConv_iconv::~wxMBConv_iconv()
1438 {
1439     if ( m2w != (iconv_t)-1 )
1440         iconv_close(m2w);
1441     if ( w2m != (iconv_t)-1 )
1442         iconv_close(w2m);
1443 }
1444
1445 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1446 {
1447 #if wxUSE_THREADS
1448     // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1449     //     Unfortunately there is a couple of global wxCSConv objects such as
1450     //     wxConvLocal that are used all over wx code, so we have to make sure
1451     //     the handle is used by at most one thread at the time. Otherwise
1452     //     only a few wx classes would be safe to use from non-main threads
1453     //     as MB<->WC conversion would fail "randomly".
1454     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1455 #endif
1456
1457     size_t inbuf = strlen(psz);
1458     size_t outbuf = n * SIZEOF_WCHAR_T;
1459     size_t res, cres;
1460     // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1461     wchar_t *bufPtr = buf;
1462     const char *pszPtr = psz;
1463
1464     if (buf)
1465     {
1466         // have destination buffer, convert there
1467         cres = iconv(m2w,
1468                      ICONV_CHAR_CAST(&pszPtr), &inbuf,
1469                      (char**)&bufPtr, &outbuf);
1470         res = n - (outbuf / SIZEOF_WCHAR_T);
1471
1472         if (ms_wcNeedsSwap)
1473         {
1474             // convert to native endianness
1475             WC_BSWAP(buf /* _not_ bufPtr */, res)
1476         }
1477
1478         // NB: iconv was given only strlen(psz) characters on input, and so
1479         //     it couldn't convert the trailing zero. Let's do it ourselves
1480         //     if there's some room left for it in the output buffer.
1481         if (res < n)
1482             buf[res] = 0;
1483     }
1484     else
1485     {
1486         // no destination buffer... convert using temp buffer
1487         // to calculate destination buffer requirement
1488         wchar_t tbuf[8];
1489         res = 0;
1490         do {
1491             bufPtr = tbuf;
1492             outbuf = 8*SIZEOF_WCHAR_T;
1493
1494             cres = iconv(m2w,
1495                          ICONV_CHAR_CAST(&pszPtr), &inbuf,
1496                          (char**)&bufPtr, &outbuf );
1497
1498             res += 8-(outbuf/SIZEOF_WCHAR_T);
1499         } while ((cres==(size_t)-1) && (errno==E2BIG));
1500     }
1501
1502     if (ICONV_FAILED(cres, inbuf))
1503     {
1504         //VS: it is ok if iconv fails, hence trace only
1505         wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1506         return (size_t)-1;
1507     }
1508
1509     return res;
1510 }
1511
1512 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1513 {
1514 #if wxUSE_THREADS
1515     // NB: explained in MB2WC
1516     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1517 #endif
1518
1519     size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
1520     size_t outbuf = n;
1521     size_t res, cres;
1522
1523     wchar_t *tmpbuf = 0;
1524
1525     if (ms_wcNeedsSwap)
1526     {
1527         // need to copy to temp buffer to switch endianness
1528         // this absolutely doesn't rock!
1529         // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1530         //  could be in read-only memory, or be accessed in some other thread)
1531         tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
1532         memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
1533         WC_BSWAP(tmpbuf, inbuf)
1534         psz=tmpbuf;
1535     }
1536
1537     if (buf)
1538     {
1539         // have destination buffer, convert there
1540         cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1541
1542         res = n-outbuf;
1543
1544         // NB: iconv was given only wcslen(psz) characters on input, and so
1545         //     it couldn't convert the trailing zero. Let's do it ourselves
1546         //     if there's some room left for it in the output buffer.
1547         if (res < n)
1548             buf[0] = 0;
1549     }
1550     else
1551     {
1552         // no destination buffer... convert using temp buffer
1553         // to calculate destination buffer requirement
1554         char tbuf[16];
1555         res = 0;
1556         do {
1557             buf = tbuf; outbuf = 16;
1558
1559             cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1560
1561             res += 16 - outbuf;
1562         } while ((cres==(size_t)-1) && (errno==E2BIG));
1563     }
1564
1565     if (ms_wcNeedsSwap)
1566     {
1567         free(tmpbuf);
1568     }
1569
1570     if (ICONV_FAILED(cres, inbuf))
1571     {
1572         //VS: it is ok if iconv fails, hence trace only
1573         wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1574         return (size_t)-1;
1575     }
1576
1577     return res;
1578 }
1579
1580 #endif // HAVE_ICONV
1581
1582
1583 // ============================================================================
1584 // Win32 conversion classes
1585 // ============================================================================
1586
1587 #ifdef wxHAVE_WIN32_MB2WC
1588
1589 // from utils.cpp
1590 #if wxUSE_FONTMAP
1591 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1592 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1593 #endif
1594
1595 class wxMBConv_win32 : public wxMBConv
1596 {
1597 public:
1598     wxMBConv_win32()
1599     {
1600         m_CodePage = CP_ACP;
1601     }
1602
1603 #if wxUSE_FONTMAP
1604     wxMBConv_win32(const wxChar* name)
1605     {
1606         m_CodePage = wxCharsetToCodepage(name);
1607     }
1608
1609     wxMBConv_win32(wxFontEncoding encoding)
1610     {
1611         m_CodePage = wxEncodingToCodepage(encoding);
1612     }
1613 #endif
1614
1615     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1616     {
1617         // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1618         // the behaviour is not compatible with the Unix version (using iconv)
1619         // and break the library itself, e.g. wxTextInputStream::NextChar()
1620         // wouldn't work if reading an incomplete MB char didn't result in an
1621         // error
1622         //
1623         // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1624         // an error (tested under Windows Server 2003) and apparently it is
1625         // done on purpose, i.e. the function accepts any input in this case
1626         // and although I'd prefer to return error on ill-formed output, our
1627         // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1628         // explicitly ill-formed according to RFC 2152) neither so we don't
1629         // even have any fallback here...
1630         int flags = m_CodePage == CP_UTF7 ? 0 : MB_ERR_INVALID_CHARS;
1631
1632         const size_t len = ::MultiByteToWideChar
1633                              (
1634                                 m_CodePage,     // code page
1635                                 flags,          // flags: fall on error
1636                                 psz,            // input string
1637                                 -1,             // its length (NUL-terminated)
1638                                 buf,            // output string
1639                                 buf ? n : 0     // size of output buffer
1640                              );
1641
1642         // note that it returns count of written chars for buf != NULL and size
1643         // of the needed buffer for buf == NULL so in either case the length of
1644         // the string (which never includes the terminating NUL) is one less
1645         return len ? len - 1 : (size_t)-1;
1646     }
1647
1648     size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
1649     {
1650         /*
1651             we have a problem here: by default, WideCharToMultiByte() may
1652             replace characters unrepresentable in the target code page with bad
1653             quality approximations such as turning "1/2" symbol (U+00BD) into
1654             "1" for the code pages which don't have it and we, obviously, want
1655             to avoid this at any price
1656
1657             the trouble is that this function does it _silently_, i.e. it won't
1658             even tell us whether it did or not... Win98/2000 and higher provide
1659             WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1660             we have to resort to a round trip, i.e. check that converting back
1661             results in the same string -- this is, of course, expensive but
1662             otherwise we simply can't be sure to not garble the data.
1663          */
1664
1665         // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1666         // it doesn't work with CJK encodings (which we test for rather roughly
1667         // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1668         // supporting it
1669         BOOL usedDef wxDUMMY_INITIALIZE(false);
1670         BOOL *pUsedDef;
1671         int flags;
1672         if ( CanUseNoBestFit() && m_CodePage < 50000 )
1673         {
1674             // it's our lucky day
1675             flags = WC_NO_BEST_FIT_CHARS;
1676             pUsedDef = &usedDef;
1677         }
1678         else // old system or unsupported encoding
1679         {
1680             flags = 0;
1681             pUsedDef = NULL;
1682         }
1683
1684         const size_t len = ::WideCharToMultiByte
1685                              (
1686                                 m_CodePage,     // code page
1687                                 flags,          // either none or no best fit
1688                                 pwz,            // input string
1689                                 -1,             // it is (wide) NUL-terminated
1690                                 buf,            // output buffer
1691                                 buf ? n : 0,    // and its size
1692                                 NULL,           // default "replacement" char
1693                                 pUsedDef        // [out] was it used?
1694                              );
1695
1696         if ( !len )
1697         {
1698             // function totally failed
1699             return (size_t)-1;
1700         }
1701
1702         // if we were really converting, check if we succeeded
1703         if ( buf )
1704         {
1705             if ( flags )
1706             {
1707                 // check if the conversion failed, i.e. if any replacements
1708                 // were done
1709                 if ( usedDef )
1710                     return (size_t)-1;
1711             }
1712             else // we must resort to double tripping...
1713             {
1714                 wxWCharBuffer wcBuf(n);
1715                 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1716                         wcscmp(wcBuf, pwz) != 0 )
1717                 {
1718                     // we didn't obtain the same thing we started from, hence
1719                     // the conversion was lossy and we consider that it failed
1720                     return (size_t)-1;
1721                 }
1722             }
1723         }
1724
1725         // see the comment above for the reason of "len - 1"
1726         return len - 1;
1727     }
1728
1729     bool IsOk() const { return m_CodePage != -1; }
1730
1731 private:
1732     static bool CanUseNoBestFit()
1733     {
1734         static int s_isWin98Or2k = -1;
1735
1736         if ( s_isWin98Or2k == -1 )
1737         {
1738             int verMaj, verMin;
1739             switch ( wxGetOsVersion(&verMaj, &verMin) )
1740             {
1741                 case wxWIN95:
1742                     s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1743                     break;
1744
1745                 case wxWINDOWS_NT:
1746                     s_isWin98Or2k = verMaj >= 5;
1747                     break;
1748
1749                 default:
1750                     // unknown, be conseravtive by default
1751                     s_isWin98Or2k = 0;
1752             }
1753
1754             wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1755         }
1756
1757         return s_isWin98Or2k == 1;
1758     }
1759
1760     long m_CodePage;
1761 };
1762
1763 #endif // wxHAVE_WIN32_MB2WC
1764
1765 // ============================================================================
1766 // Cocoa conversion classes
1767 // ============================================================================
1768
1769 #if defined(__WXCOCOA__)
1770
1771 // RN:  There is no UTF-32 support in either Core Foundation or
1772 // Cocoa.  Strangely enough, internally Core Foundation uses
1773 // UTF 32 internally quite a bit - its just not public (yet).
1774
1775 #include <CoreFoundation/CFString.h>
1776 #include <CoreFoundation/CFStringEncodingExt.h>
1777
1778 CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
1779 {
1780     CFStringEncoding enc = kCFStringEncodingInvalidId ;
1781     if ( encoding == wxFONTENCODING_DEFAULT )
1782     {
1783         enc = CFStringGetSystemEncoding();
1784     }
1785     else switch( encoding)
1786     {
1787         case wxFONTENCODING_ISO8859_1 :
1788             enc = kCFStringEncodingISOLatin1 ;
1789             break ;
1790         case wxFONTENCODING_ISO8859_2 :
1791             enc = kCFStringEncodingISOLatin2;
1792             break ;
1793         case wxFONTENCODING_ISO8859_3 :
1794             enc = kCFStringEncodingISOLatin3 ;
1795             break ;
1796         case wxFONTENCODING_ISO8859_4 :
1797             enc = kCFStringEncodingISOLatin4;
1798             break ;
1799         case wxFONTENCODING_ISO8859_5 :
1800             enc = kCFStringEncodingISOLatinCyrillic;
1801             break ;
1802         case wxFONTENCODING_ISO8859_6 :
1803             enc = kCFStringEncodingISOLatinArabic;
1804             break ;
1805         case wxFONTENCODING_ISO8859_7 :
1806             enc = kCFStringEncodingISOLatinGreek;
1807             break ;
1808         case wxFONTENCODING_ISO8859_8 :
1809             enc = kCFStringEncodingISOLatinHebrew;
1810             break ;
1811         case wxFONTENCODING_ISO8859_9 :
1812             enc = kCFStringEncodingISOLatin5;
1813             break ;
1814         case wxFONTENCODING_ISO8859_10 :
1815             enc = kCFStringEncodingISOLatin6;
1816             break ;
1817         case wxFONTENCODING_ISO8859_11 :
1818             enc = kCFStringEncodingISOLatinThai;
1819             break ;
1820         case wxFONTENCODING_ISO8859_13 :
1821             enc = kCFStringEncodingISOLatin7;
1822             break ;
1823         case wxFONTENCODING_ISO8859_14 :
1824             enc = kCFStringEncodingISOLatin8;
1825             break ;
1826         case wxFONTENCODING_ISO8859_15 :
1827             enc = kCFStringEncodingISOLatin9;
1828             break ;
1829
1830         case wxFONTENCODING_KOI8 :
1831             enc = kCFStringEncodingKOI8_R;
1832             break ;
1833         case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
1834             enc = kCFStringEncodingDOSRussian;
1835             break ;
1836
1837 //      case wxFONTENCODING_BULGARIAN :
1838 //          enc = ;
1839 //          break ;
1840
1841         case wxFONTENCODING_CP437 :
1842             enc =kCFStringEncodingDOSLatinUS ;
1843             break ;
1844         case wxFONTENCODING_CP850 :
1845             enc = kCFStringEncodingDOSLatin1;
1846             break ;
1847         case wxFONTENCODING_CP852 :
1848             enc = kCFStringEncodingDOSLatin2;
1849             break ;
1850         case wxFONTENCODING_CP855 :
1851             enc = kCFStringEncodingDOSCyrillic;
1852             break ;
1853         case wxFONTENCODING_CP866 :
1854             enc =kCFStringEncodingDOSRussian ;
1855             break ;
1856         case wxFONTENCODING_CP874 :
1857             enc = kCFStringEncodingDOSThai;
1858             break ;
1859         case wxFONTENCODING_CP932 :
1860             enc = kCFStringEncodingDOSJapanese;
1861             break ;
1862         case wxFONTENCODING_CP936 :
1863             enc =kCFStringEncodingDOSChineseSimplif ;
1864             break ;
1865         case wxFONTENCODING_CP949 :
1866             enc = kCFStringEncodingDOSKorean;
1867             break ;
1868         case wxFONTENCODING_CP950 :
1869             enc = kCFStringEncodingDOSChineseTrad;
1870             break ;
1871         case wxFONTENCODING_CP1250 :
1872             enc = kCFStringEncodingWindowsLatin2;
1873             break ;
1874         case wxFONTENCODING_CP1251 :
1875             enc =kCFStringEncodingWindowsCyrillic ;
1876             break ;
1877         case wxFONTENCODING_CP1252 :
1878             enc =kCFStringEncodingWindowsLatin1 ;
1879             break ;
1880         case wxFONTENCODING_CP1253 :
1881             enc = kCFStringEncodingWindowsGreek;
1882             break ;
1883         case wxFONTENCODING_CP1254 :
1884             enc = kCFStringEncodingWindowsLatin5;
1885             break ;
1886         case wxFONTENCODING_CP1255 :
1887             enc =kCFStringEncodingWindowsHebrew ;
1888             break ;
1889         case wxFONTENCODING_CP1256 :
1890             enc =kCFStringEncodingWindowsArabic ;
1891             break ;
1892         case wxFONTENCODING_CP1257 :
1893             enc = kCFStringEncodingWindowsBalticRim;
1894             break ;
1895 //   This only really encodes to UTF7 (if that) evidently
1896 //        case wxFONTENCODING_UTF7 :
1897 //            enc = kCFStringEncodingNonLossyASCII ;
1898 //            break ;
1899         case wxFONTENCODING_UTF8 :
1900             enc = kCFStringEncodingUTF8 ;
1901             break ;
1902         case wxFONTENCODING_EUC_JP :
1903             enc = kCFStringEncodingEUC_JP;
1904             break ;
1905         case wxFONTENCODING_UTF16 :
1906             enc = kCFStringEncodingUnicode ;
1907             break ;
1908         case wxFONTENCODING_MACROMAN :
1909             enc = kCFStringEncodingMacRoman ;
1910             break ;
1911         case wxFONTENCODING_MACJAPANESE :
1912             enc = kCFStringEncodingMacJapanese ;
1913             break ;
1914         case wxFONTENCODING_MACCHINESETRAD :
1915             enc = kCFStringEncodingMacChineseTrad ;
1916             break ;
1917         case wxFONTENCODING_MACKOREAN :
1918             enc = kCFStringEncodingMacKorean ;
1919             break ;
1920         case wxFONTENCODING_MACARABIC :
1921             enc = kCFStringEncodingMacArabic ;
1922             break ;
1923         case wxFONTENCODING_MACHEBREW :
1924             enc = kCFStringEncodingMacHebrew ;
1925             break ;
1926         case wxFONTENCODING_MACGREEK :
1927             enc = kCFStringEncodingMacGreek ;
1928             break ;
1929         case wxFONTENCODING_MACCYRILLIC :
1930             enc = kCFStringEncodingMacCyrillic ;
1931             break ;
1932         case wxFONTENCODING_MACDEVANAGARI :
1933             enc = kCFStringEncodingMacDevanagari ;
1934             break ;
1935         case wxFONTENCODING_MACGURMUKHI :
1936             enc = kCFStringEncodingMacGurmukhi ;
1937             break ;
1938         case wxFONTENCODING_MACGUJARATI :
1939             enc = kCFStringEncodingMacGujarati ;
1940             break ;
1941         case wxFONTENCODING_MACORIYA :
1942             enc = kCFStringEncodingMacOriya ;
1943             break ;
1944         case wxFONTENCODING_MACBENGALI :
1945             enc = kCFStringEncodingMacBengali ;
1946             break ;
1947         case wxFONTENCODING_MACTAMIL :
1948             enc = kCFStringEncodingMacTamil ;
1949             break ;
1950         case wxFONTENCODING_MACTELUGU :
1951             enc = kCFStringEncodingMacTelugu ;
1952             break ;
1953         case wxFONTENCODING_MACKANNADA :
1954             enc = kCFStringEncodingMacKannada ;
1955             break ;
1956         case wxFONTENCODING_MACMALAJALAM :
1957             enc = kCFStringEncodingMacMalayalam ;
1958             break ;
1959         case wxFONTENCODING_MACSINHALESE :
1960             enc = kCFStringEncodingMacSinhalese ;
1961             break ;
1962         case wxFONTENCODING_MACBURMESE :
1963             enc = kCFStringEncodingMacBurmese ;
1964             break ;
1965         case wxFONTENCODING_MACKHMER :
1966             enc = kCFStringEncodingMacKhmer ;
1967             break ;
1968         case wxFONTENCODING_MACTHAI :
1969             enc = kCFStringEncodingMacThai ;
1970             break ;
1971         case wxFONTENCODING_MACLAOTIAN :
1972             enc = kCFStringEncodingMacLaotian ;
1973             break ;
1974         case wxFONTENCODING_MACGEORGIAN :
1975             enc = kCFStringEncodingMacGeorgian ;
1976             break ;
1977         case wxFONTENCODING_MACARMENIAN :
1978             enc = kCFStringEncodingMacArmenian ;
1979             break ;
1980         case wxFONTENCODING_MACCHINESESIMP :
1981             enc = kCFStringEncodingMacChineseSimp ;
1982             break ;
1983         case wxFONTENCODING_MACTIBETAN :
1984             enc = kCFStringEncodingMacTibetan ;
1985             break ;
1986         case wxFONTENCODING_MACMONGOLIAN :
1987             enc = kCFStringEncodingMacMongolian ;
1988             break ;
1989         case wxFONTENCODING_MACETHIOPIC :
1990             enc = kCFStringEncodingMacEthiopic ;
1991             break ;
1992         case wxFONTENCODING_MACCENTRALEUR :
1993             enc = kCFStringEncodingMacCentralEurRoman ;
1994             break ;
1995         case wxFONTENCODING_MACVIATNAMESE :
1996             enc = kCFStringEncodingMacVietnamese ;
1997             break ;
1998         case wxFONTENCODING_MACARABICEXT :
1999             enc = kCFStringEncodingMacExtArabic ;
2000             break ;
2001         case wxFONTENCODING_MACSYMBOL :
2002             enc = kCFStringEncodingMacSymbol ;
2003             break ;
2004         case wxFONTENCODING_MACDINGBATS :
2005             enc = kCFStringEncodingMacDingbats ;
2006             break ;
2007         case wxFONTENCODING_MACTURKISH :
2008             enc = kCFStringEncodingMacTurkish ;
2009             break ;
2010         case wxFONTENCODING_MACCROATIAN :
2011             enc = kCFStringEncodingMacCroatian ;
2012             break ;
2013         case wxFONTENCODING_MACICELANDIC :
2014             enc = kCFStringEncodingMacIcelandic ;
2015             break ;
2016         case wxFONTENCODING_MACROMANIAN :
2017             enc = kCFStringEncodingMacRomanian ;
2018             break ;
2019         case wxFONTENCODING_MACCELTIC :
2020             enc = kCFStringEncodingMacCeltic ;
2021             break ;
2022         case wxFONTENCODING_MACGAELIC :
2023             enc = kCFStringEncodingMacGaelic ;
2024             break ;
2025 //      case wxFONTENCODING_MACKEYBOARD :
2026 //          enc = kCFStringEncodingMacKeyboardGlyphs ;
2027 //          break ;
2028         default :
2029             // because gcc is picky
2030             break ;
2031     } ;
2032     return enc ;
2033 }
2034
2035 class wxMBConv_cocoa : public wxMBConv
2036 {
2037 public:
2038     wxMBConv_cocoa()
2039     {
2040         Init(CFStringGetSystemEncoding()) ;
2041     }
2042
2043 #if wxUSE_FONTMAP
2044     wxMBConv_cocoa(const wxChar* name)
2045     {
2046         Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2047     }
2048 #endif
2049
2050     wxMBConv_cocoa(wxFontEncoding encoding)
2051     {
2052         Init( wxCFStringEncFromFontEnc(encoding) );
2053     }
2054
2055     ~wxMBConv_cocoa()
2056     {
2057     }
2058
2059     void Init( CFStringEncoding encoding)
2060     {
2061         m_encoding = encoding ;
2062     }
2063
2064     size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2065     {
2066         wxASSERT(szUnConv);
2067
2068         CFStringRef theString = CFStringCreateWithBytes (
2069                                                 NULL, //the allocator
2070                                                 (const UInt8*)szUnConv,
2071                                                 strlen(szUnConv),
2072                                                 m_encoding,
2073                                                 false //no BOM/external representation
2074                                                 );
2075
2076         wxASSERT(theString);
2077
2078         size_t nOutLength = CFStringGetLength(theString);
2079
2080         if (szOut == NULL)
2081         {
2082             CFRelease(theString);
2083             return nOutLength;
2084         }
2085
2086         CFRange theRange = { 0, nOutSize };
2087
2088 #if SIZEOF_WCHAR_T == 4
2089         UniChar* szUniCharBuffer = new UniChar[nOutSize];
2090 #endif
2091
2092         CFStringGetCharacters(theString, theRange, szUniCharBuffer);
2093
2094         CFRelease(theString);
2095
2096         szUniCharBuffer[nOutLength] = '\0' ;
2097
2098 #if SIZEOF_WCHAR_T == 4
2099         wxMBConvUTF16 converter ;
2100         converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
2101         delete[] szUniCharBuffer;
2102 #endif
2103
2104         return nOutLength;
2105     }
2106
2107     size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2108     {
2109         wxASSERT(szUnConv);
2110
2111         size_t nRealOutSize;
2112         size_t nBufSize = wxWcslen(szUnConv);
2113         UniChar* szUniBuffer = (UniChar*) szUnConv;
2114
2115 #if SIZEOF_WCHAR_T == 4
2116         wxMBConvUTF16BE converter ;
2117         nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
2118         szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
2119         converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
2120         nBufSize /= sizeof(UniChar);
2121 #endif
2122
2123         CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2124                                 NULL, //allocator
2125                                 szUniBuffer,
2126                                 nBufSize,
2127                                 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
2128                             );
2129
2130         wxASSERT(theString);
2131
2132         //Note that CER puts a BOM when converting to unicode
2133         //so we  check and use getchars instead in that case
2134         if (m_encoding == kCFStringEncodingUnicode)
2135         {
2136             if (szOut != NULL)
2137                 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
2138
2139             nRealOutSize = CFStringGetLength(theString) + 1;
2140         }
2141         else
2142         {
2143             CFStringGetBytes(
2144                 theString,
2145                 CFRangeMake(0, CFStringGetLength(theString)),
2146                 m_encoding,
2147                 0, //what to put in characters that can't be converted -
2148                     //0 tells CFString to return NULL if it meets such a character
2149                 false, //not an external representation
2150                 (UInt8*) szOut,
2151                 nOutSize,
2152                 (CFIndex*) &nRealOutSize
2153                         );
2154         }
2155
2156         CFRelease(theString);
2157
2158 #if SIZEOF_WCHAR_T == 4
2159         delete[] szUniBuffer;
2160 #endif
2161
2162         return  nRealOutSize - 1;
2163     }
2164
2165     bool IsOk() const
2166     {
2167         return m_encoding != kCFStringEncodingInvalidId &&
2168               CFStringIsEncodingAvailable(m_encoding);
2169     }
2170
2171 private:
2172     CFStringEncoding m_encoding ;
2173 };
2174
2175 #endif // defined(__WXCOCOA__)
2176
2177 // ============================================================================
2178 // Mac conversion classes
2179 // ============================================================================
2180
2181 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2182
2183 class wxMBConv_mac : public wxMBConv
2184 {
2185 public:
2186     wxMBConv_mac()
2187     {
2188         Init(CFStringGetSystemEncoding()) ;
2189     }
2190
2191 #if wxUSE_FONTMAP
2192     wxMBConv_mac(const wxChar* name)
2193     {
2194         Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2195     }
2196 #endif
2197
2198     wxMBConv_mac(wxFontEncoding encoding)
2199     {
2200         Init( wxMacGetSystemEncFromFontEnc(encoding) );
2201     }
2202
2203     ~wxMBConv_mac()
2204     {
2205         OSStatus status = noErr ;
2206         status = TECDisposeConverter(m_MB2WC_converter);
2207         status = TECDisposeConverter(m_WC2MB_converter);
2208     }
2209
2210
2211     void Init( TextEncodingBase encoding)
2212     {
2213         OSStatus status = noErr ;
2214         m_char_encoding = encoding ;
2215         m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2216
2217         status = TECCreateConverter(&m_MB2WC_converter,
2218                                     m_char_encoding,
2219                                     m_unicode_encoding);
2220         status = TECCreateConverter(&m_WC2MB_converter,
2221                                     m_unicode_encoding,
2222                                     m_char_encoding);
2223     }
2224
2225     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2226     {
2227         OSStatus status = noErr ;
2228         ByteCount byteOutLen ;
2229         ByteCount byteInLen = strlen(psz) ;
2230         wchar_t *tbuf = NULL ;
2231         UniChar* ubuf = NULL ;
2232         size_t res = 0 ;
2233
2234         if (buf == NULL)
2235         {
2236             //apple specs say at least 32
2237             n = wxMax( 32 , byteInLen ) ;
2238             tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2239         }
2240         ByteCount byteBufferLen = n * sizeof( UniChar ) ;
2241 #if SIZEOF_WCHAR_T == 4
2242         ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
2243 #else
2244         ubuf = (UniChar*) (buf ? buf : tbuf) ;
2245 #endif
2246         status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2247           (TextPtr) ubuf , byteBufferLen, &byteOutLen);
2248 #if SIZEOF_WCHAR_T == 4
2249         // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2250         // is not properly terminated we get random characters at the end
2251         ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
2252         wxMBConvUTF16BE converter ;
2253         res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2254         free( ubuf ) ;
2255 #else
2256         res = byteOutLen / sizeof( UniChar ) ;
2257 #endif
2258         if ( buf == NULL )
2259              free(tbuf) ;
2260
2261         if ( buf  && res < n)
2262             buf[res] = 0;
2263
2264         return res ;
2265     }
2266
2267     size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2268     {
2269         OSStatus status = noErr ;
2270         ByteCount byteOutLen ;
2271         ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2272
2273         char *tbuf = NULL ;
2274
2275         if (buf == NULL)
2276         {
2277             //apple specs say at least 32
2278             n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2279             tbuf = (char*) malloc( n ) ;
2280         }
2281
2282         ByteCount byteBufferLen = n ;
2283         UniChar* ubuf = NULL ;
2284 #if SIZEOF_WCHAR_T == 4
2285         wxMBConvUTF16BE converter ;
2286         size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2287         byteInLen = unicharlen ;
2288         ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2289         converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
2290 #else
2291         ubuf = (UniChar*) psz ;
2292 #endif
2293         status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2294             (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
2295 #if SIZEOF_WCHAR_T == 4
2296         free( ubuf ) ;
2297 #endif
2298         if ( buf == NULL )
2299             free(tbuf) ;
2300
2301         size_t res = byteOutLen ;
2302         if ( buf  && res < n)
2303         {
2304             buf[res] = 0;
2305
2306             //we need to double-trip to verify it didn't insert any ? in place
2307             //of bogus characters
2308             wxWCharBuffer wcBuf(n);
2309             size_t pszlen = wxWcslen(psz);
2310             if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
2311                         wxWcslen(wcBuf) != pszlen ||
2312                         memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2313             {
2314                 // we didn't obtain the same thing we started from, hence
2315                 // the conversion was lossy and we consider that it failed
2316                 return (size_t)-1;
2317             }
2318         }
2319
2320         return res ;
2321     }
2322
2323     bool IsOk() const
2324         { return m_MB2WC_converter !=  NULL && m_WC2MB_converter != NULL  ; }
2325
2326 private:
2327     TECObjectRef m_MB2WC_converter ;
2328     TECObjectRef m_WC2MB_converter ;
2329
2330     TextEncodingBase m_char_encoding ;
2331     TextEncodingBase m_unicode_encoding ;
2332 };
2333
2334 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2335
2336 // ============================================================================
2337 // wxEncodingConverter based conversion classes
2338 // ============================================================================
2339
2340 #if wxUSE_FONTMAP
2341
2342 class wxMBConv_wxwin : public wxMBConv
2343 {
2344 private:
2345     void Init()
2346     {
2347         m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2348                w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2349     }
2350
2351 public:
2352     // temporarily just use wxEncodingConverter stuff,
2353     // so that it works while a better implementation is built
2354     wxMBConv_wxwin(const wxChar* name)
2355     {
2356         if (name)
2357             m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2358         else
2359             m_enc = wxFONTENCODING_SYSTEM;
2360
2361         Init();
2362     }
2363
2364     wxMBConv_wxwin(wxFontEncoding enc)
2365     {
2366         m_enc = enc;
2367
2368         Init();
2369     }
2370
2371     size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
2372     {
2373         size_t inbuf = strlen(psz);
2374         if (buf)
2375         {
2376             if (!m2w.Convert(psz,buf))
2377                 return (size_t)-1;
2378         }
2379         return inbuf;
2380     }
2381
2382     size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
2383     {
2384         const size_t inbuf = wxWcslen(psz);
2385         if (buf)
2386         {
2387             if (!w2m.Convert(psz,buf))
2388                 return (size_t)-1;
2389         }
2390
2391         return inbuf;
2392     }
2393
2394     bool IsOk() const { return m_ok; }
2395
2396 public:
2397     wxFontEncoding m_enc;
2398     wxEncodingConverter m2w, w2m;
2399
2400     // were we initialized successfully?
2401     bool m_ok;
2402
2403     DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
2404 };
2405
2406 #endif // wxUSE_FONTMAP
2407
2408 // ============================================================================
2409 // wxCSConv implementation
2410 // ============================================================================
2411
2412 void wxCSConv::Init()
2413 {
2414     m_name = NULL;
2415     m_convReal =  NULL;
2416     m_deferred = true;
2417 }
2418
2419 wxCSConv::wxCSConv(const wxChar *charset)
2420 {
2421     Init();
2422
2423     if ( charset )
2424     {
2425         SetName(charset);
2426     }
2427
2428     m_encoding = wxFONTENCODING_SYSTEM;
2429 }
2430
2431 wxCSConv::wxCSConv(wxFontEncoding encoding)
2432 {
2433     if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
2434     {
2435         wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2436
2437         encoding = wxFONTENCODING_SYSTEM;
2438     }
2439
2440     Init();
2441
2442     m_encoding = encoding;
2443 }
2444
2445 wxCSConv::~wxCSConv()
2446 {
2447     Clear();
2448 }
2449
2450 wxCSConv::wxCSConv(const wxCSConv& conv)
2451         : wxMBConv()
2452 {
2453     Init();
2454
2455     SetName(conv.m_name);
2456     m_encoding = conv.m_encoding;
2457 }
2458
2459 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2460 {
2461     Clear();
2462
2463     SetName(conv.m_name);
2464     m_encoding = conv.m_encoding;
2465
2466     return *this;
2467 }
2468
2469 void wxCSConv::Clear()
2470 {
2471     free(m_name);
2472     delete m_convReal;
2473
2474     m_name = NULL;
2475     m_convReal = NULL;
2476 }
2477
2478 void wxCSConv::SetName(const wxChar *charset)
2479 {
2480     if (charset)
2481     {
2482         m_name = wxStrdup(charset);
2483         m_deferred = true;
2484     }
2485 }
2486
2487 wxMBConv *wxCSConv::DoCreate() const
2488 {
2489     // check for the special case of ASCII or ISO8859-1 charset: as we have
2490     // special knowledge of it anyhow, we don't need to create a special
2491     // conversion object
2492     if ( m_encoding == wxFONTENCODING_ISO8859_1 )
2493     {
2494         // don't convert at all
2495         return NULL;
2496     }
2497
2498     // we trust OS to do conversion better than we can so try external
2499     // conversion methods first
2500     //
2501     // the full order is:
2502     //      1. OS conversion (iconv() under Unix or Win32 API)
2503     //      2. hard coded conversions for UTF
2504     //      3. wxEncodingConverter as fall back
2505
2506     // step (1)
2507 #ifdef HAVE_ICONV
2508 #if !wxUSE_FONTMAP
2509     if ( m_name )
2510 #endif // !wxUSE_FONTMAP
2511     {
2512         wxString name(m_name);
2513
2514 #if wxUSE_FONTMAP
2515         if ( name.empty() )
2516             name = wxFontMapperBase::Get()->GetEncodingName(m_encoding);
2517 #endif // wxUSE_FONTMAP
2518
2519         wxMBConv_iconv *conv = new wxMBConv_iconv(name);
2520         if ( conv->IsOk() )
2521             return conv;
2522
2523         delete conv;
2524     }
2525 #endif // HAVE_ICONV
2526
2527 #ifdef wxHAVE_WIN32_MB2WC
2528     {
2529 #if wxUSE_FONTMAP
2530         wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2531                                       : new wxMBConv_win32(m_encoding);
2532         if ( conv->IsOk() )
2533             return conv;
2534
2535         delete conv;
2536 #else
2537         return NULL;
2538 #endif
2539     }
2540 #endif // wxHAVE_WIN32_MB2WC
2541 #if defined(__WXMAC__)
2542     {
2543         // leave UTF16 and UTF32 to the built-ins of wx
2544         if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
2545             ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
2546         {
2547
2548 #if wxUSE_FONTMAP
2549             wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2550                                         : new wxMBConv_mac(m_encoding);
2551 #else
2552             wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
2553 #endif
2554             if ( conv->IsOk() )
2555                  return conv;
2556
2557             delete conv;
2558         }
2559     }
2560 #endif
2561 #if defined(__WXCOCOA__)
2562     {
2563         if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2564         {
2565
2566 #if wxUSE_FONTMAP
2567             wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2568                                           : new wxMBConv_cocoa(m_encoding);
2569 #else
2570             wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
2571 #endif
2572             if ( conv->IsOk() )
2573                  return conv;
2574
2575             delete conv;
2576         }
2577     }
2578 #endif
2579     // step (2)
2580     wxFontEncoding enc = m_encoding;
2581 #if wxUSE_FONTMAP
2582     if ( enc == wxFONTENCODING_SYSTEM && m_name )
2583     {
2584         // use "false" to suppress interactive dialogs -- we can be called from
2585         // anywhere and popping up a dialog from here is the last thing we want to
2586         // do
2587         enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
2588     }
2589 #endif // wxUSE_FONTMAP
2590
2591     switch ( enc )
2592     {
2593         case wxFONTENCODING_UTF7:
2594              return new wxMBConvUTF7;
2595
2596         case wxFONTENCODING_UTF8:
2597              return new wxMBConvUTF8;
2598
2599         case wxFONTENCODING_UTF16BE:
2600              return new wxMBConvUTF16BE;
2601
2602         case wxFONTENCODING_UTF16LE:
2603              return new wxMBConvUTF16LE;
2604
2605         case wxFONTENCODING_UTF32BE:
2606              return new wxMBConvUTF32BE;
2607
2608         case wxFONTENCODING_UTF32LE:
2609              return new wxMBConvUTF32LE;
2610
2611         default:
2612              // nothing to do but put here to suppress gcc warnings
2613              ;
2614     }
2615
2616     // step (3)
2617 #if wxUSE_FONTMAP
2618     {
2619         wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2620                                       : new wxMBConv_wxwin(m_encoding);
2621         if ( conv->IsOk() )
2622             return conv;
2623
2624         delete conv;
2625     }
2626 #endif // wxUSE_FONTMAP
2627
2628     // NB: This is a hack to prevent deadlock. What could otherwise happen
2629     //     in Unicode build: wxConvLocal creation ends up being here
2630     //     because of some failure and logs the error. But wxLog will try to
2631     //     attach timestamp, for which it will need wxConvLocal (to convert
2632     //     time to char* and then wchar_t*), but that fails, tries to log
2633     //     error, but wxLog has a (already locked) critical section that
2634     //     guards static buffer.
2635     static bool alreadyLoggingError = false;
2636     if (!alreadyLoggingError)
2637     {
2638         alreadyLoggingError = true;
2639         wxLogError(_("Cannot convert from the charset '%s'!"),
2640                    m_name ? m_name
2641                       :
2642 #if wxUSE_FONTMAP
2643                          wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
2644 #else // !wxUSE_FONTMAP
2645                          wxString::Format(_("encoding %s"), m_encoding).c_str()
2646 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2647               );
2648         alreadyLoggingError = false;
2649     }
2650
2651     return NULL;
2652 }
2653
2654 void wxCSConv::CreateConvIfNeeded() const
2655 {
2656     if ( m_deferred )
2657     {
2658         wxCSConv *self = (wxCSConv *)this; // const_cast
2659
2660 #if wxUSE_INTL
2661         // if we don't have neither the name nor the encoding, use the default
2662         // encoding for this system
2663         if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2664         {
2665             self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
2666         }
2667 #endif // wxUSE_INTL
2668
2669         self->m_convReal = DoCreate();
2670         self->m_deferred = false;
2671     }
2672 }
2673
2674 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2675 {
2676     CreateConvIfNeeded();
2677
2678     if (m_convReal)
2679         return m_convReal->MB2WC(buf, psz, n);
2680
2681     // latin-1 (direct)
2682     size_t len = strlen(psz);
2683
2684     if (buf)
2685     {
2686         for (size_t c = 0; c <= len; c++)
2687             buf[c] = (unsigned char)(psz[c]);
2688     }
2689
2690     return len;
2691 }
2692
2693 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2694 {
2695     CreateConvIfNeeded();
2696
2697     if (m_convReal)
2698         return m_convReal->WC2MB(buf, psz, n);
2699
2700     // latin-1 (direct)
2701     const size_t len = wxWcslen(psz);
2702     if (buf)
2703     {
2704         for (size_t c = 0; c <= len; c++)
2705         {
2706             if (psz[c] > 0xFF)
2707                 return (size_t)-1;
2708             buf[c] = (char)psz[c];
2709         }
2710     }
2711     else
2712     {
2713         for (size_t c = 0; c <= len; c++)
2714         {
2715             if (psz[c] > 0xFF)
2716                 return (size_t)-1;
2717         }
2718     }
2719
2720     return len;
2721 }
2722
2723 // ----------------------------------------------------------------------------
2724 // globals
2725 // ----------------------------------------------------------------------------
2726
2727 #ifdef __WINDOWS__
2728     static wxMBConv_win32 wxConvLibcObj;
2729 #elif defined(__WXMAC__) && !defined(__MACH__)
2730     static wxMBConv_mac wxConvLibcObj ;
2731 #else
2732     static wxMBConvLibc wxConvLibcObj;
2733 #endif
2734
2735 static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
2736 static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
2737 static wxMBConvUTF7 wxConvUTF7Obj;
2738 static wxMBConvUTF8 wxConvUTF8Obj;
2739 static wxConvBrokenFileNames wxConvBrokenFileNamesObj;
2740
2741 WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
2742 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
2743 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
2744 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
2745 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
2746 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
2747 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
2748 #ifdef __WXOSX__
2749                                     wxConvUTF8Obj;
2750 #elif __WXGTK20__
2751                                     wxConvBrokenFileNamesObj;
2752 #else
2753                                     wxConvLibcObj;
2754 #endif
2755
2756
2757 #else // !wxUSE_WCHAR_T
2758
2759 // stand-ins in absence of wchar_t
2760 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
2761                                 wxConvISO8859_1,
2762                                 wxConvLocal,
2763                                 wxConvUTF8;
2764
2765 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
2766
2767