src/common/strconv.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        strconv.cpp
   3 // Purpose:     Unicode conversion classes
   4 // Author:      Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
   5 //              Ryan Norton, Fredrik Roubert (UTF7)
   6 // Modified by:
   7 // Created:     29/01/98
   8 // RCS-ID:      $Id$
   9 // Copyright:   (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
  10 //              (c) 2000-2003 Vadim Zeitlin
  11 //              (c) 2004 Ryan Norton, Fredrik Roubert
  12 // Licence:     wxWindows licence
  13 /////////////////////////////////////////////////////////////////////////////
  14
  15 // ============================================================================
  16 // declarations
  17 // ============================================================================
  18
  19 // ----------------------------------------------------------------------------
  20 // headers
  21 // ----------------------------------------------------------------------------
  22
  23 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
  24   #pragma implementation "strconv.h"
  25 #endif
  26
  27 // For compilers that support precompilation, includes "wx.h".
  28 #include "wx/wxprec.h"
  29
  30 #ifdef __BORLANDC__
  31   #pragma hdrstop
  32 #endif
  33
  34 #ifndef WX_PRECOMP
  35     #include "wx/intl.h"
  36     #include "wx/log.h"
  37 #endif // WX_PRECOMP
  38
  39 #include "wx/strconv.h"
  40
  41 #if wxUSE_WCHAR_T
  42
  43 #ifdef __WXMSW__
  44     #include "wx/msw/private.h"
  45 #endif
  46
  47 #ifdef __WINDOWS__
  48     #include "wx/msw/missing.h"
  49 #endif
  50
  51 #ifndef __WXWINCE__
  52 #include <errno.h>
  53 #endif
  54
  55 #include <ctype.h>
  56 #include <string.h>
  57 #include <stdlib.h>
  58 #ifdef HAVE_LANGINFO_H
  59   #include <langinfo.h>
  60 #endif
  61
  62 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
  63     #define wxHAVE_WIN32_MB2WC
  64 #endif // __WIN32__ but !__WXMICROWIN__
  65
  66 // ----------------------------------------------------------------------------
  67 // headers
  68 // ----------------------------------------------------------------------------
  69
  70 #ifdef __SALFORDC__
  71     #include <clib.h>
  72 #endif
  73
  74 #ifdef HAVE_ICONV
  75     #include <iconv.h>
  76     #include "wx/thread.h"
  77 #endif
  78
  79 #include "wx/encconv.h"
  80 #include "wx/fontmap.h"
  81 #include "wx/utils.h"
  82
  83 #ifdef __WXMAC__
  84 #include <ATSUnicode.h>
  85 #include <TextCommon.h>
  86 #include <TextEncodingConverter.h>
  87
  88 #include  "wx/mac/private.h"  // includes mac headers
  89 #endif
  90 // ----------------------------------------------------------------------------
  91 // macros
  92 // ----------------------------------------------------------------------------
  93
  94 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
  95 #define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
  96
  97 #if SIZEOF_WCHAR_T == 4
  98     #define WC_NAME         "UCS4"
  99     #define WC_BSWAP         BSWAP_UCS4
 100     #ifdef WORDS_BIGENDIAN
 101       #define WC_NAME_BEST  "UCS-4BE"
 102     #else
 103       #define WC_NAME_BEST  "UCS-4LE"
 104     #endif
 105 #elif SIZEOF_WCHAR_T == 2
 106     #define WC_NAME         "UTF16"
 107     #define WC_BSWAP         BSWAP_UTF16
 108     #define WC_UTF16
 109     #ifdef WORDS_BIGENDIAN
 110       #define WC_NAME_BEST  "UTF-16BE"
 111     #else
 112       #define WC_NAME_BEST  "UTF-16LE"
 113     #endif
 114 #else // sizeof(wchar_t) != 2 nor 4
 115     // does this ever happen?
 116     #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
 117 #endif
 118
 119 // ============================================================================
 120 // implementation
 121 // ============================================================================
 122
 123 // ----------------------------------------------------------------------------
 124 // UTF-16 en/decoding to/from UCS-4
 125 // ----------------------------------------------------------------------------
 126
 127
 128 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
 129 {
 130     if (input<=0xffff)
 131     {
 132         if (output)
 133             *output = (wxUint16) input;
 134         return 1;
 135     }
 136     else if (input>=0x110000)
 137     {
 138         return (size_t)-1;
 139     }
 140     else
 141     {
 142         if (output)
 143         {
 144             *output++ = (wxUint16) ((input >> 10)+0xd7c0);
 145             *output = (wxUint16) ((input&0x3ff)+0xdc00);
 146         }
 147         return 2;
 148     }
 149 }
 150
 151 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
 152 {
 153     if ((*input<0xd800) || (*input>0xdfff))
 154     {
 155         output = *input;
 156         return 1;
 157     }
 158     else if ((input[1]<0xdc00) || (input[1]>=0xdfff))
 159     {
 160         output = *input;
 161         return (size_t)-1;
 162     }
 163     else
 164     {
 165         output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
 166         return 2;
 167     }
 168 }
 169
 170
 171 // ----------------------------------------------------------------------------
 172 // wxMBConv
 173 // ----------------------------------------------------------------------------
 174
 175 wxMBConv::~wxMBConv()
 176 {
 177     // nothing to do here (necessary for Darwin linking probably)
 178 }
 179
 180 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
 181 {
 182     if ( psz )
 183     {
 184         // calculate the length of the buffer needed first
 185         size_t nLen = MB2WC(NULL, psz, 0);
 186         if ( nLen != (size_t)-1 )
 187         {
 188             // now do the actual conversion
 189             wxWCharBuffer buf(nLen);
 190             nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
 191             if ( nLen != (size_t)-1 )
 192             {
 193                 return buf;
 194             }
 195         }
 196     }
 197
 198     wxWCharBuffer buf((wchar_t *)NULL);
 199
 200     return buf;
 201 }
 202
 203 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
 204 {
 205     if ( pwz )
 206     {
 207         size_t nLen = WC2MB(NULL, pwz, 0);
 208         if ( nLen != (size_t)-1 )
 209         {
 210             wxCharBuffer buf(nLen+3);       // space for a wxUint32 trailing zero
 211             nLen = WC2MB(buf.data(), pwz, nLen + 4);
 212             if ( nLen != (size_t)-1 )
 213             {
 214                 return buf;
 215             }
 216         }
 217     }
 218
 219     wxCharBuffer buf((char *)NULL);
 220
 221     return buf;
 222 }
 223
 224 const wxWCharBuffer wxMBConv::cMB2WC(const char *szString, size_t nStringLen, size_t* pOutSize) const
 225 {
 226     wxASSERT(pOutSize != NULL);
 227
 228     const char* szEnd = szString + nStringLen + 1;
 229     const char* szPos = szString;
 230     const char* szStart = szPos;
 231
 232     size_t nActualLength = 0;
 233     size_t nCurrentSize = nStringLen; //try normal size first (should never resize?)
 234
 235     wxWCharBuffer theBuffer(nCurrentSize);
 236
 237     //Convert the string until the length() is reached, continuing the
 238     //loop every time a null character is reached
 239     while(szPos != szEnd)
 240     {
 241         wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
 242
 243         //Get the length of the current (sub)string
 244         size_t nLen = MB2WC(NULL, szPos, 0);
 245
 246         //Invalid conversion?
 247         if( nLen == (size_t)-1 )
 248         {
 249             *pOutSize = 0;
 250             theBuffer.data()[0u] = wxT('\0');
 251             return theBuffer;
 252         }
 253
 254
 255         //Increase the actual length (+1 for current null character)
 256         nActualLength += nLen + 1;
 257
 258         //if buffer too big, realloc the buffer
 259         if (nActualLength > (nCurrentSize+1))
 260         {
 261             wxWCharBuffer theNewBuffer(nCurrentSize << 1);
 262             memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize * sizeof(wchar_t));
 263             theBuffer = theNewBuffer;
 264             nCurrentSize <<= 1;
 265         }
 266
 267         //Convert the current (sub)string
 268         if ( MB2WC(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
 269         {
 270             *pOutSize = 0;
 271             theBuffer.data()[0u] = wxT('\0');
 272             return theBuffer;
 273         }
 274
 275         //Increment to next (sub)string
 276         //Note that we have to use strlen here instead of nLen
 277         //here because XX2XX gives us the size of the output buffer,
 278         //not neccessarly the length of the string
 279         szPos += strlen(szPos) + 1;
 280     }
 281
 282     //success - return actual length and the buffer
 283     *pOutSize = nActualLength;
 284     return theBuffer;
 285 }
 286
 287 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *szString, size_t nStringLen, size_t* pOutSize) const
 288 {
 289     wxASSERT(pOutSize != NULL);
 290
 291     const wchar_t* szEnd = szString + nStringLen + 1;
 292     const wchar_t* szPos = szString;
 293     const wchar_t* szStart = szPos;
 294
 295     size_t nActualLength = 0;
 296     size_t nCurrentSize = nStringLen << 2; //try * 4 first
 297
 298     wxCharBuffer theBuffer(nCurrentSize);
 299
 300     //Convert the string until the length() is reached, continuing the
 301     //loop every time a null character is reached
 302     while(szPos != szEnd)
 303     {
 304         wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
 305
 306         //Get the length of the current (sub)string
 307         size_t nLen = WC2MB(NULL, szPos, 0);
 308
 309         //Invalid conversion?
 310         if( nLen == (size_t)-1 )
 311         {
 312             *pOutSize = 0;
 313             theBuffer.data()[0u] = wxT('\0');
 314             return theBuffer;
 315         }
 316
 317         //Increase the actual length (+1 for current null character)
 318         nActualLength += nLen + 1;
 319
 320         //if buffer too big, realloc the buffer
 321         if (nActualLength > (nCurrentSize+1))
 322         {
 323             wxCharBuffer theNewBuffer(nCurrentSize << 1);
 324             memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize);
 325             theBuffer = theNewBuffer;
 326             nCurrentSize <<= 1;
 327         }
 328
 329         //Convert the current (sub)string
 330         if(WC2MB(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
 331         {
 332             *pOutSize = 0;
 333             theBuffer.data()[0u] = wxT('\0');
 334             return theBuffer;
 335         }
 336
 337         //Increment to next (sub)string
 338         //Note that we have to use wxWcslen here instead of nLen
 339         //here because XX2XX gives us the size of the output buffer,
 340         //not neccessarly the length of the string
 341         szPos += wxWcslen(szPos) + 1;
 342     }
 343
 344     //success - return actual length and the buffer
 345     *pOutSize = nActualLength;
 346     return theBuffer;
 347 }
 348
 349 // ----------------------------------------------------------------------------
 350 // wxMBConvLibc
 351 // ----------------------------------------------------------------------------
 352
 353 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 354 {
 355     return wxMB2WC(buf, psz, n);
 356 }
 357
 358 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 359 {
 360     return wxWC2MB(buf, psz, n);
 361 }
 362
 363 #ifdef __WXGTK20__
 364
 365 // ----------------------------------------------------------------------------
 366 // wxConvBrokenFileNames is made for GTK2 in Unicode mode when
 367 // files are accidentally written in an encoding which is not
 368 // the system encoding. Typically, the system encoding will be
 369 // UTF8 but there might be files stored in ISO8859-1 on disk.
 370 // ----------------------------------------------------------------------------
 371
 372 class wxConvBrokenFileNames : public wxMBConv
 373 {
 374 public:
 375     wxConvBrokenFileNames();
 376     virtual ~wxConvBrokenFileNames() { delete m_conv; }
 377
 378     virtual size_t MB2WC(wchar_t *outputBuf, const char *psz, size_t outputSize) const;
 379     virtual size_t WC2MB(char *outputBuf, const wchar_t *psz, size_t outputSize) const;
 380
 381 private:
 382     // the conversion object we forward to
 383     wxMBConv *m_conv;
 384 };
 385
 386 wxConvBrokenFileNames::wxConvBrokenFileNames()
 387 {
 388     // decide which conversion to use for the file names
 389
 390     // (1) this variable exists for the sole purpose of specifying the encoding
 391     //     of the filenames for GTK+ programs, so use it if it is set
 392     const wxChar *encName = wxGetenv(_T("G_FILENAME_ENCODING"));
 393     if ( encName )
 394     {
 395         m_conv = new wxCSConv(encName);
 396     }
 397     else // no G_FILENAME_ENCODING
 398     {
 399         // (2) if a non default locale is set, assume that the user wants his
 400         //     filenames in this locale too
 401         switch ( wxLocale::GetSystemEncoding() )
 402         {
 403             default:
 404                 m_conv = new wxMBConvLibc;
 405                 break;
 406
 407             // (3) finally use UTF-8 by default
 408             case wxFONTENCODING_SYSTEM:
 409             case wxFONTENCODING_UTF8:
 410                 m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
 411                 break;
 412         }
 413     }
 414 }
 415
 416 size_t
 417 wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf,
 418                              const char *psz,
 419                              size_t outputSize) const
 420 {
 421     return m_conv->MB2WC( outputBuf, psz, outputSize );
 422 }
 423
 424 size_t
 425 wxConvBrokenFileNames::WC2MB(char *outputBuf,
 426                              const wchar_t *psz,
 427                              size_t outputSize) const
 428 {
 429     return m_conv->WC2MB( outputBuf, psz, outputSize );
 430 }
 431
 432 #endif // __WXGTK20__
 433
 434 // ----------------------------------------------------------------------------
 435 // UTF-7
 436 // ----------------------------------------------------------------------------
 437
 438 // Implementation (C) 2004 Fredrik Roubert
 439
 440 //
 441 // BASE64 decoding table
 442 //
 443 static const unsigned char utf7unb64[] =
 444 {
 445     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 446     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 447     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 448     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 449     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 450     0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
 451     0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
 452     0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 453     0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
 454     0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
 455     0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
 456     0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
 457     0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
 458     0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
 459     0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
 460     0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
 461     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 462     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 463     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 464     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 465     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 466     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 467     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 468     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 469     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 470     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 471     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 472     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 473     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 474     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 475     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 476     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 477 };
 478
 479 size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 480 {
 481     size_t len = 0;
 482
 483     while (*psz && ((!buf) || (len < n)))
 484     {
 485         unsigned char cc = *psz++;
 486         if (cc != '+')
 487         {
 488             // plain ASCII char
 489             if (buf)
 490                 *buf++ = cc;
 491             len++;
 492         }
 493         else if (*psz == '-')
 494         {
 495             // encoded plus sign
 496             if (buf)
 497                 *buf++ = cc;
 498             len++;
 499             psz++;
 500         }
 501         else
 502         {
 503             // BASE64 encoded string
 504             bool lsb;
 505             unsigned char c;
 506             unsigned int d, l;
 507             for (lsb = false, d = 0, l = 0;
 508                 (cc = utf7unb64[(unsigned char)*psz]) != 0xff; psz++)
 509             {
 510                 d <<= 6;
 511                 d += cc;
 512                 for (l += 6; l >= 8; lsb = !lsb)
 513                 {
 514                     c = (unsigned char)((d >> (l -= 8)) % 256);
 515                     if (lsb)
 516                     {
 517                         if (buf)
 518                             *buf++ |= c;
 519                         len ++;
 520                     }
 521                     else
 522                         if (buf)
 523                             *buf = (wchar_t)(c << 8);
 524                 }
 525             }
 526             if (*psz == '-')
 527                 psz++;
 528         }
 529     }
 530     if (buf && (len < n))
 531         *buf = 0;
 532     return len;
 533 }
 534
 535 //
 536 // BASE64 encoding table
 537 //
 538 static const unsigned char utf7enb64[] =
 539 {
 540     'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
 541     'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
 542     'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
 543     'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
 544     'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
 545     'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
 546     'w', 'x', 'y', 'z', '0', '1', '2', '3',
 547     '4', '5', '6', '7', '8', '9', '+', '/'
 548 };
 549
 550 //
 551 // UTF-7 encoding table
 552 //
 553 // 0 - Set D (directly encoded characters)
 554 // 1 - Set O (optional direct characters)
 555 // 2 - whitespace characters (optional)
 556 // 3 - special characters
 557 //
 558 static const unsigned char utf7encode[128] =
 559 {
 560     3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
 561     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 562     2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
 563     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
 564     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 565     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
 566     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 567     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
 568 };
 569
 570 size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 571 {
 572
 573
 574     size_t len = 0;
 575
 576     while (*psz && ((!buf) || (len < n)))
 577     {
 578         wchar_t cc = *psz++;
 579         if (cc < 0x80 && utf7encode[cc] < 1)
 580         {
 581             // plain ASCII char
 582             if (buf)
 583                 *buf++ = (char)cc;
 584             len++;
 585         }
 586 #ifndef WC_UTF16
 587         else if (((wxUint32)cc) > 0xffff)
 588             {
 589             // no surrogate pair generation (yet?)
 590             return (size_t)-1;
 591         }
 592 #endif
 593         else
 594         {
 595             if (buf)
 596                 *buf++ = '+';
 597             len++;
 598             if (cc != '+')
 599             {
 600                 // BASE64 encode string
 601                 unsigned int lsb, d, l;
 602                 for (d = 0, l = 0;; psz++)
 603                 {
 604                     for (lsb = 0; lsb < 2; lsb ++)
 605                     {
 606                         d <<= 8;
 607                         d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
 608
 609                         for (l += 8; l >= 6; )
 610                         {
 611                             l -= 6;
 612                             if (buf)
 613                                 *buf++ = utf7enb64[(d >> l) % 64];
 614                             len++;
 615                         }
 616                     }
 617                     cc = *psz;
 618                     if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
 619                         break;
 620                 }
 621                 if (l != 0)
 622                 {
 623                     if (buf)
 624                         *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
 625                     len++;
 626                 }
 627             }
 628             if (buf)
 629                 *buf++ = '-';
 630             len++;
 631         }
 632     }
 633     if (buf && (len < n))
 634         *buf = 0;
 635     return len;
 636 }
 637
 638 // ----------------------------------------------------------------------------
 639 // UTF-8
 640 // ----------------------------------------------------------------------------
 641
 642 static wxUint32 utf8_max[]=
 643     { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
 644
 645 // boundaries of the private use area we use to (temporarily) remap invalid
 646 // characters invalid in a UTF-8 encoded string
 647 const wxUint32 wxUnicodePUA = 0x100000;
 648 const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
 649
 650 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 651 {
 652     size_t len = 0;
 653
 654     while (*psz && ((!buf) || (len < n)))
 655     {
 656         const char *opsz = psz;
 657         bool invalid = false;
 658         unsigned char cc = *psz++, fc = cc;
 659         unsigned cnt;
 660         for (cnt = 0; fc & 0x80; cnt++)
 661             fc <<= 1;
 662         if (!cnt)
 663         {
 664             // plain ASCII char
 665             if (buf)
 666                 *buf++ = cc;
 667             len++;
 668         }
 669         else
 670         {
 671             cnt--;
 672             if (!cnt)
 673             {
 674                 // invalid UTF-8 sequence
 675                 invalid = true;
 676             }
 677             else
 678             {
 679                 unsigned ocnt = cnt - 1;
 680                 wxUint32 res = cc & (0x3f >> cnt);
 681                 while (cnt--)
 682                 {
 683                     cc = *psz;
 684                     if ((cc & 0xC0) != 0x80)
 685                     {
 686                         // invalid UTF-8 sequence
 687                         invalid = true;
 688                         break;
 689                     }
 690                     psz++;
 691                     res = (res << 6) | (cc & 0x3f);
 692                 }
 693                 if (invalid || res <= utf8_max[ocnt])
 694                 {
 695                     // illegal UTF-8 encoding
 696                     invalid = true;
 697                 }
 698                 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
 699                         res >= wxUnicodePUA && res < wxUnicodePUAEnd)
 700                 {
 701                     // if one of our PUA characters turns up externally
 702                     // it must also be treated as an illegal sequence
 703                     // (a bit like you have to escape an escape character)
 704                     invalid = true;
 705                 }
 706                 else
 707                 {
 708 #ifdef WC_UTF16
 709                     // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 710                     size_t pa = encode_utf16(res, (wxUint16 *)buf);
 711                     if (pa == (size_t)-1)
 712                     {
 713                         invalid = true;
 714                     }
 715                     else
 716                     {
 717                         if (buf)
 718                             buf += pa;
 719                         len += pa;
 720                     }
 721 #else // !WC_UTF16
 722                     if (buf)
 723                         *buf++ = res;
 724                     len++;
 725 #endif // WC_UTF16/!WC_UTF16
 726                 }
 727             }
 728             if (invalid)
 729             {
 730                 if (m_options & MAP_INVALID_UTF8_TO_PUA)
 731                 {
 732                     while (opsz < psz && (!buf || len < n))
 733                     {
 734 #ifdef WC_UTF16
 735                         // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 736                         size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
 737                         wxASSERT(pa != (size_t)-1);
 738                         if (buf)
 739                             buf += pa;
 740                         opsz++;
 741                         len += pa;
 742 #else
 743                         if (buf)
 744                             *buf++ = wxUnicodePUA + (unsigned char)*opsz;
 745                         opsz++;
 746                         len++;
 747 #endif
 748                     }
 749                 }
 750                 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
 751                 {
 752                     while (opsz < psz && (!buf || len < n))
 753                     {
 754                         if ( buf && len + 3 < n )
 755                         {
 756                             unsigned char n = *opsz;
 757                             *buf++ = L'\\';
 758                             *buf++ = L'0' + n / 0100;
 759                             *buf++ = L'0' + (n % 0100) / 010;
 760                             *buf++ = L'0' + n % 010;
 761                         }
 762                         opsz++;
 763                         len += 4;
 764                     }
 765                 }
 766                 else // MAP_INVALID_UTF8_NOT
 767                 {
 768                     return (size_t)-1;
 769                 }
 770             }
 771         }
 772     }
 773     if (buf && (len < n))
 774         *buf = 0;
 775     return len;
 776 }
 777
 778 static inline bool isoctal(wchar_t wch)
 779 {
 780     return L'0' <= wch && wch <= L'7';
 781 }
 782
 783 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 784 {
 785     size_t len = 0;
 786
 787     while (*psz && ((!buf) || (len < n)))
 788     {
 789         wxUint32 cc;
 790 #ifdef WC_UTF16
 791         // cast is ok for WC_UTF16
 792         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
 793         psz += (pa == (size_t)-1) ? 1 : pa;
 794 #else
 795         cc=(*psz++) & 0x7fffffff;
 796 #endif
 797
 798         if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
 799                 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
 800         {
 801             if (buf)
 802                 *buf++ = (char)(cc - wxUnicodePUA);
 803             len++;
 804         }
 805         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
 806                     cc == L'\\' &&
 807                         isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
 808         {
 809             if (buf)
 810             {
 811                 *buf++ = (char) (psz[0] - L'0')*0100 +
 812                                 (psz[1] - L'0')*010 +
 813                                 (psz[2] - L'0');
 814             }
 815
 816             psz += 3;
 817             len++;
 818         }
 819         else
 820         {
 821             unsigned cnt;
 822             for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
 823             if (!cnt)
 824             {
 825                 // plain ASCII char
 826                 if (buf)
 827                     *buf++ = (char) cc;
 828                 len++;
 829             }
 830
 831             else
 832             {
 833                 len += cnt + 1;
 834                 if (buf)
 835                 {
 836                     *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
 837                     while (cnt--)
 838                         *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
 839                 }
 840             }
 841         }
 842     }
 843
 844     if (buf && (len<n))
 845         *buf = 0;
 846
 847     return len;
 848 }
 849
 850 // ----------------------------------------------------------------------------
 851 // UTF-16
 852 // ----------------------------------------------------------------------------
 853
 854 #ifdef WORDS_BIGENDIAN
 855     #define wxMBConvUTF16straight wxMBConvUTF16BE
 856     #define wxMBConvUTF16swap     wxMBConvUTF16LE
 857 #else
 858     #define wxMBConvUTF16swap     wxMBConvUTF16BE
 859     #define wxMBConvUTF16straight wxMBConvUTF16LE
 860 #endif
 861
 862
 863 #ifdef WC_UTF16
 864
 865 // copy 16bit MB to 16bit String
 866 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 867 {
 868     size_t len=0;
 869
 870     while (*(wxUint16*)psz && (!buf || len < n))
 871     {
 872         if (buf)
 873             *buf++ = *(wxUint16*)psz;
 874         len++;
 875
 876         psz += sizeof(wxUint16);
 877     }
 878     if (buf && len<n)   *buf=0;
 879
 880     return len;
 881 }
 882
 883
 884 // copy 16bit String to 16bit MB
 885 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 886 {
 887     size_t len=0;
 888
 889     while (*psz && (!buf || len < n))
 890     {
 891         if (buf)
 892         {
 893             *(wxUint16*)buf = *psz;
 894             buf += sizeof(wxUint16);
 895         }
 896         len += sizeof(wxUint16);
 897         psz++;
 898     }
 899     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 900
 901     return len;
 902 }
 903
 904
 905 // swap 16bit MB to 16bit String
 906 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 907 {
 908     size_t len=0;
 909
 910     while (*(wxUint16*)psz && (!buf || len < n))
 911     {
 912         if (buf)
 913         {
 914             ((char *)buf)[0] = psz[1];
 915             ((char *)buf)[1] = psz[0];
 916             buf++;
 917         }
 918         len++;
 919         psz += sizeof(wxUint16);
 920     }
 921     if (buf && len<n)   *buf=0;
 922
 923     return len;
 924 }
 925
 926
 927 // swap 16bit MB to 16bit String
 928 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 929 {
 930     size_t len=0;
 931
 932     while (*psz && (!buf || len < n))
 933     {
 934         if (buf)
 935         {
 936             *buf++ = ((char*)psz)[1];
 937             *buf++ = ((char*)psz)[0];
 938         }
 939         len += sizeof(wxUint16);
 940         psz++;
 941     }
 942     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 943
 944     return len;
 945 }
 946
 947
 948 #else // WC_UTF16
 949
 950
 951 // copy 16bit MB to 32bit String
 952 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 953 {
 954     size_t len=0;
 955
 956     while (*(wxUint16*)psz && (!buf || len < n))
 957     {
 958         wxUint32 cc;
 959         size_t pa=decode_utf16((wxUint16*)psz, cc);
 960         if (pa == (size_t)-1)
 961             return pa;
 962
 963         if (buf)
 964             *buf++ = cc;
 965         len++;
 966         psz += pa * sizeof(wxUint16);
 967     }
 968     if (buf && len<n)   *buf=0;
 969
 970     return len;
 971 }
 972
 973
 974 // copy 32bit String to 16bit MB
 975 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 976 {
 977     size_t len=0;
 978
 979     while (*psz && (!buf || len < n))
 980     {
 981         wxUint16 cc[2];
 982         size_t pa=encode_utf16(*psz, cc);
 983
 984         if (pa == (size_t)-1)
 985             return pa;
 986
 987         if (buf)
 988         {
 989             *(wxUint16*)buf = cc[0];
 990             buf += sizeof(wxUint16);
 991             if (pa > 1)
 992             {
 993                 *(wxUint16*)buf = cc[1];
 994                 buf += sizeof(wxUint16);
 995             }
 996         }
 997
 998         len += pa*sizeof(wxUint16);
 999         psz++;
1000     }
1001     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
1002
1003     return len;
1004 }
1005
1006
1007 // swap 16bit MB to 32bit String
1008 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1009 {
1010     size_t len=0;
1011
1012     while (*(wxUint16*)psz && (!buf || len < n))
1013     {
1014         wxUint32 cc;
1015         char tmp[4];
1016         tmp[0]=psz[1];  tmp[1]=psz[0];
1017         tmp[2]=psz[3];  tmp[3]=psz[2];
1018
1019         size_t pa=decode_utf16((wxUint16*)tmp, cc);
1020         if (pa == (size_t)-1)
1021             return pa;
1022
1023         if (buf)
1024             *buf++ = cc;
1025
1026         len++;
1027         psz += pa * sizeof(wxUint16);
1028     }
1029     if (buf && len<n)   *buf=0;
1030
1031     return len;
1032 }
1033
1034
1035 // swap 32bit String to 16bit MB
1036 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1037 {
1038     size_t len=0;
1039
1040     while (*psz && (!buf || len < n))
1041     {
1042         wxUint16 cc[2];
1043         size_t pa=encode_utf16(*psz, cc);
1044
1045         if (pa == (size_t)-1)
1046             return pa;
1047
1048         if (buf)
1049         {
1050             *buf++ = ((char*)cc)[1];
1051             *buf++ = ((char*)cc)[0];
1052             if (pa > 1)
1053             {
1054                 *buf++ = ((char*)cc)[3];
1055                 *buf++ = ((char*)cc)[2];
1056             }
1057         }
1058
1059         len += pa*sizeof(wxUint16);
1060         psz++;
1061     }
1062     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
1063
1064     return len;
1065 }
1066
1067 #endif // WC_UTF16
1068
1069
1070 // ----------------------------------------------------------------------------
1071 // UTF-32
1072 // ----------------------------------------------------------------------------
1073
1074 #ifdef WORDS_BIGENDIAN
1075 #define wxMBConvUTF32straight  wxMBConvUTF32BE
1076 #define wxMBConvUTF32swap      wxMBConvUTF32LE
1077 #else
1078 #define wxMBConvUTF32swap      wxMBConvUTF32BE
1079 #define wxMBConvUTF32straight  wxMBConvUTF32LE
1080 #endif
1081
1082
1083 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1084 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1085
1086
1087 #ifdef WC_UTF16
1088
1089 // copy 32bit MB to 16bit String
1090 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1091 {
1092     size_t len=0;
1093
1094     while (*(wxUint32*)psz && (!buf || len < n))
1095     {
1096         wxUint16 cc[2];
1097
1098         size_t pa=encode_utf16(*(wxUint32*)psz, cc);
1099         if (pa == (size_t)-1)
1100             return pa;
1101
1102         if (buf)
1103         {
1104             *buf++ = cc[0];
1105             if (pa > 1)
1106                 *buf++ = cc[1];
1107         }
1108         len += pa;
1109         psz += sizeof(wxUint32);
1110     }
1111     if (buf && len<n)   *buf=0;
1112
1113     return len;
1114 }
1115
1116
1117 // copy 16bit String to 32bit MB
1118 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1119 {
1120     size_t len=0;
1121
1122     while (*psz && (!buf || len < n))
1123     {
1124         wxUint32 cc;
1125
1126         // cast is ok for WC_UTF16
1127         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
1128         if (pa == (size_t)-1)
1129             return pa;
1130
1131         if (buf)
1132         {
1133             *(wxUint32*)buf = cc;
1134             buf += sizeof(wxUint32);
1135         }
1136         len += sizeof(wxUint32);
1137         psz += pa;
1138     }
1139
1140     if (buf && len<=n-sizeof(wxUint32))
1141         *(wxUint32*)buf=0;
1142
1143     return len;
1144 }
1145
1146
1147
1148 // swap 32bit MB to 16bit String
1149 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1150 {
1151     size_t len=0;
1152
1153     while (*(wxUint32*)psz && (!buf || len < n))
1154     {
1155         char tmp[4];
1156         tmp[0] = psz[3];   tmp[1] = psz[2];
1157         tmp[2] = psz[1];   tmp[3] = psz[0];
1158
1159
1160         wxUint16 cc[2];
1161
1162         size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
1163         if (pa == (size_t)-1)
1164             return pa;
1165
1166         if (buf)
1167         {
1168             *buf++ = cc[0];
1169             if (pa > 1)
1170                 *buf++ = cc[1];
1171         }
1172         len += pa;
1173         psz += sizeof(wxUint32);
1174     }
1175
1176     if (buf && len<n)
1177         *buf=0;
1178
1179     return len;
1180 }
1181
1182
1183 // swap 16bit String to 32bit MB
1184 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1185 {
1186     size_t len=0;
1187
1188     while (*psz && (!buf || len < n))
1189     {
1190         char cc[4];
1191
1192         // cast is ok for WC_UTF16
1193         size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
1194         if (pa == (size_t)-1)
1195             return pa;
1196
1197         if (buf)
1198         {
1199             *buf++ = cc[3];
1200             *buf++ = cc[2];
1201             *buf++ = cc[1];
1202             *buf++ = cc[0];
1203         }
1204         len += sizeof(wxUint32);
1205         psz += pa;
1206     }
1207
1208     if (buf && len<=n-sizeof(wxUint32))
1209         *(wxUint32*)buf=0;
1210
1211     return len;
1212 }
1213
1214 #else // WC_UTF16
1215
1216
1217 // copy 32bit MB to 32bit String
1218 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1219 {
1220     size_t len=0;
1221
1222     while (*(wxUint32*)psz && (!buf || len < n))
1223     {
1224         if (buf)
1225             *buf++ = *(wxUint32*)psz;
1226         len++;
1227         psz += sizeof(wxUint32);
1228     }
1229
1230     if (buf && len<n)
1231         *buf=0;
1232
1233     return len;
1234 }
1235
1236
1237 // copy 32bit String to 32bit MB
1238 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1239 {
1240     size_t len=0;
1241
1242     while (*psz && (!buf || len < n))
1243     {
1244         if (buf)
1245         {
1246             *(wxUint32*)buf = *psz;
1247             buf += sizeof(wxUint32);
1248         }
1249
1250         len += sizeof(wxUint32);
1251         psz++;
1252     }
1253
1254     if (buf && len<=n-sizeof(wxUint32))
1255         *(wxUint32*)buf=0;
1256
1257     return len;
1258 }
1259
1260
1261 // swap 32bit MB to 32bit String
1262 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1263 {
1264     size_t len=0;
1265
1266     while (*(wxUint32*)psz && (!buf || len < n))
1267     {
1268         if (buf)
1269         {
1270             ((char *)buf)[0] = psz[3];
1271             ((char *)buf)[1] = psz[2];
1272             ((char *)buf)[2] = psz[1];
1273             ((char *)buf)[3] = psz[0];
1274             buf++;
1275         }
1276         len++;
1277         psz += sizeof(wxUint32);
1278     }
1279
1280     if (buf && len<n)
1281         *buf=0;
1282
1283     return len;
1284 }
1285
1286
1287 // swap 32bit String to 32bit MB
1288 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1289 {
1290     size_t len=0;
1291
1292     while (*psz && (!buf || len < n))
1293     {
1294         if (buf)
1295         {
1296             *buf++ = ((char *)psz)[3];
1297             *buf++ = ((char *)psz)[2];
1298             *buf++ = ((char *)psz)[1];
1299             *buf++ = ((char *)psz)[0];
1300         }
1301         len += sizeof(wxUint32);
1302         psz++;
1303     }
1304
1305     if (buf && len<=n-sizeof(wxUint32))
1306         *(wxUint32*)buf=0;
1307
1308     return len;
1309 }
1310
1311
1312 #endif // WC_UTF16
1313
1314
1315 // ============================================================================
1316 // The classes doing conversion using the iconv_xxx() functions
1317 // ============================================================================
1318
1319 #ifdef HAVE_ICONV
1320
1321 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1322 //     E2BIG if output buffer is _exactly_ as big as needed. Such case is
1323 //     (unless there's yet another bug in glibc) the only case when iconv()
1324 //     returns with (size_t)-1 (which means error) and says there are 0 bytes
1325 //     left in the input buffer -- when _real_ error occurs,
1326 //     bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1327 //     iconv() failure.
1328 //     [This bug does not appear in glibc 2.2.]
1329 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1330 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1331                                      (errno != E2BIG || bufLeft != 0))
1332 #else
1333 #define ICONV_FAILED(cres, bufLeft)  (cres == (size_t)-1)
1334 #endif
1335
1336 #define ICONV_CHAR_CAST(x)  ((ICONV_CONST char **)(x))
1337
1338 // ----------------------------------------------------------------------------
1339 // wxMBConv_iconv: encapsulates an iconv character set
1340 // ----------------------------------------------------------------------------
1341
1342 class wxMBConv_iconv : public wxMBConv
1343 {
1344 public:
1345     wxMBConv_iconv(const wxChar *name);
1346     virtual ~wxMBConv_iconv();
1347
1348     virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1349     virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
1350
1351     bool IsOk() const
1352         { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
1353
1354 protected:
1355     // the iconv handlers used to translate from multibyte to wide char and in
1356     // the other direction
1357     iconv_t m2w,
1358             w2m;
1359 #if wxUSE_THREADS
1360     // guards access to m2w and w2m objects
1361     wxMutex m_iconvMutex;
1362 #endif
1363
1364 private:
1365     // the name (for iconv_open()) of a wide char charset -- if none is
1366     // available on this machine, it will remain NULL
1367     static const char *ms_wcCharsetName;
1368
1369     // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1370     // different endian-ness than the native one
1371     static bool ms_wcNeedsSwap;
1372 };
1373
1374 const char *wxMBConv_iconv::ms_wcCharsetName = NULL;
1375 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
1376
1377 wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
1378 {
1379     // Do it the hard way
1380     char cname[100];
1381     for (size_t i = 0; i < wxStrlen(name)+1; i++)
1382         cname[i] = (char) name[i];
1383
1384     // check for charset that represents wchar_t:
1385     if (ms_wcCharsetName == NULL)
1386     {
1387         ms_wcNeedsSwap = false;
1388
1389         // try charset with explicit bytesex info (e.g. "UCS-4LE"):
1390         ms_wcCharsetName = WC_NAME_BEST;
1391         m2w = iconv_open(ms_wcCharsetName, cname);
1392
1393         if (m2w == (iconv_t)-1)
1394         {
1395             // try charset w/o bytesex info (e.g. "UCS4")
1396             // and check for bytesex ourselves:
1397             ms_wcCharsetName = WC_NAME;
1398             m2w = iconv_open(ms_wcCharsetName, cname);
1399
1400             // last bet, try if it knows WCHAR_T pseudo-charset
1401             if (m2w == (iconv_t)-1)
1402             {
1403                 ms_wcCharsetName = "WCHAR_T";
1404                 m2w = iconv_open(ms_wcCharsetName, cname);
1405             }
1406
1407             if (m2w != (iconv_t)-1)
1408             {
1409                 char    buf[2], *bufPtr;
1410                 wchar_t wbuf[2], *wbufPtr;
1411                 size_t  insz, outsz;
1412                 size_t  res;
1413
1414                 buf[0] = 'A';
1415                 buf[1] = 0;
1416                 wbuf[0] = 0;
1417                 insz = 2;
1418                 outsz = SIZEOF_WCHAR_T * 2;
1419                 wbufPtr = wbuf;
1420                 bufPtr = buf;
1421
1422                 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1423                             (char**)&wbufPtr, &outsz);
1424
1425                 if (ICONV_FAILED(res, insz))
1426                 {
1427                     ms_wcCharsetName = NULL;
1428                     wxLogLastError(wxT("iconv"));
1429                     wxLogError(_("Conversion to charset '%s' doesn't work."), name);
1430                 }
1431                 else
1432                 {
1433                     ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1434                 }
1435             }
1436             else
1437             {
1438                 ms_wcCharsetName = NULL;
1439
1440                 // VS: we must not output an error here, since wxWidgets will safely
1441                 //     fall back to using wxEncodingConverter.
1442                 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
1443                 //wxLogError(
1444             }
1445         }
1446         wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
1447     }
1448     else // we already have ms_wcCharsetName
1449     {
1450         m2w = iconv_open(ms_wcCharsetName, cname);
1451     }
1452
1453     // NB: don't ever pass NULL to iconv_open(), it may crash!
1454     if ( ms_wcCharsetName )
1455     {
1456         w2m = iconv_open( cname, ms_wcCharsetName);
1457     }
1458     else
1459     {
1460         w2m = (iconv_t)-1;
1461     }
1462 }
1463
1464 wxMBConv_iconv::~wxMBConv_iconv()
1465 {
1466     if ( m2w != (iconv_t)-1 )
1467         iconv_close(m2w);
1468     if ( w2m != (iconv_t)-1 )
1469         iconv_close(w2m);
1470 }
1471
1472 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1473 {
1474 #if wxUSE_THREADS
1475     // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1476     //     Unfortunately there is a couple of global wxCSConv objects such as
1477     //     wxConvLocal that are used all over wx code, so we have to make sure
1478     //     the handle is used by at most one thread at the time. Otherwise
1479     //     only a few wx classes would be safe to use from non-main threads
1480     //     as MB<->WC conversion would fail "randomly".
1481     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1482 #endif
1483
1484     size_t inbuf = strlen(psz);
1485     size_t outbuf = n * SIZEOF_WCHAR_T;
1486     size_t res, cres;
1487     // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1488     wchar_t *bufPtr = buf;
1489     const char *pszPtr = psz;
1490
1491     if (buf)
1492     {
1493         // have destination buffer, convert there
1494         cres = iconv(m2w,
1495                      ICONV_CHAR_CAST(&pszPtr), &inbuf,
1496                      (char**)&bufPtr, &outbuf);
1497         res = n - (outbuf / SIZEOF_WCHAR_T);
1498
1499         if (ms_wcNeedsSwap)
1500         {
1501             // convert to native endianness
1502             WC_BSWAP(buf /* _not_ bufPtr */, res)
1503         }
1504
1505         // NB: iconv was given only strlen(psz) characters on input, and so
1506         //     it couldn't convert the trailing zero. Let's do it ourselves
1507         //     if there's some room left for it in the output buffer.
1508         if (res < n)
1509             buf[res] = 0;
1510     }
1511     else
1512     {
1513         // no destination buffer... convert using temp buffer
1514         // to calculate destination buffer requirement
1515         wchar_t tbuf[8];
1516         res = 0;
1517         do {
1518             bufPtr = tbuf;
1519             outbuf = 8*SIZEOF_WCHAR_T;
1520
1521             cres = iconv(m2w,
1522                          ICONV_CHAR_CAST(&pszPtr), &inbuf,
1523                          (char**)&bufPtr, &outbuf );
1524
1525             res += 8-(outbuf/SIZEOF_WCHAR_T);
1526         } while ((cres==(size_t)-1) && (errno==E2BIG));
1527     }
1528
1529     if (ICONV_FAILED(cres, inbuf))
1530     {
1531         //VS: it is ok if iconv fails, hence trace only
1532         wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1533         return (size_t)-1;
1534     }
1535
1536     return res;
1537 }
1538
1539 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1540 {
1541 #if wxUSE_THREADS
1542     // NB: explained in MB2WC
1543     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1544 #endif
1545
1546     size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
1547     size_t outbuf = n;
1548     size_t res, cres;
1549
1550     wchar_t *tmpbuf = 0;
1551
1552     if (ms_wcNeedsSwap)
1553     {
1554         // need to copy to temp buffer to switch endianness
1555         // this absolutely doesn't rock!
1556         // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1557         //  could be in read-only memory, or be accessed in some other thread)
1558         tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
1559         memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
1560         WC_BSWAP(tmpbuf, inbuf)
1561         psz=tmpbuf;
1562     }
1563
1564     if (buf)
1565     {
1566         // have destination buffer, convert there
1567         cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1568
1569         res = n-outbuf;
1570
1571         // NB: iconv was given only wcslen(psz) characters on input, and so
1572         //     it couldn't convert the trailing zero. Let's do it ourselves
1573         //     if there's some room left for it in the output buffer.
1574         if (res < n)
1575             buf[0] = 0;
1576     }
1577     else
1578     {
1579         // no destination buffer... convert using temp buffer
1580         // to calculate destination buffer requirement
1581         char tbuf[16];
1582         res = 0;
1583         do {
1584             buf = tbuf; outbuf = 16;
1585
1586             cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1587
1588             res += 16 - outbuf;
1589         } while ((cres==(size_t)-1) && (errno==E2BIG));
1590     }
1591
1592     if (ms_wcNeedsSwap)
1593     {
1594         free(tmpbuf);
1595     }
1596
1597     if (ICONV_FAILED(cres, inbuf))
1598     {
1599         //VS: it is ok if iconv fails, hence trace only
1600         wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1601         return (size_t)-1;
1602     }
1603
1604     return res;
1605 }
1606
1607 #endif // HAVE_ICONV
1608
1609
1610 // ============================================================================
1611 // Win32 conversion classes
1612 // ============================================================================
1613
1614 #ifdef wxHAVE_WIN32_MB2WC
1615
1616 // from utils.cpp
1617 #if wxUSE_FONTMAP
1618 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1619 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1620 #endif
1621
1622 class wxMBConv_win32 : public wxMBConv
1623 {
1624 public:
1625     wxMBConv_win32()
1626     {
1627         m_CodePage = CP_ACP;
1628     }
1629
1630 #if wxUSE_FONTMAP
1631     wxMBConv_win32(const wxChar* name)
1632     {
1633         m_CodePage = wxCharsetToCodepage(name);
1634     }
1635
1636     wxMBConv_win32(wxFontEncoding encoding)
1637     {
1638         m_CodePage = wxEncodingToCodepage(encoding);
1639     }
1640 #endif
1641
1642     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1643     {
1644         // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1645         // the behaviour is not compatible with the Unix version (using iconv)
1646         // and break the library itself, e.g. wxTextInputStream::NextChar()
1647         // wouldn't work if reading an incomplete MB char didn't result in an
1648         // error
1649         //
1650         // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1651         // an error (tested under Windows Server 2003) and apparently it is
1652         // done on purpose, i.e. the function accepts any input in this case
1653         // and although I'd prefer to return error on ill-formed output, our
1654         // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1655         // explicitly ill-formed according to RFC 2152) neither so we don't
1656         // even have any fallback here...
1657         int flags = m_CodePage == CP_UTF7 ? 0 : MB_ERR_INVALID_CHARS;
1658
1659         const size_t len = ::MultiByteToWideChar
1660                              (
1661                                 m_CodePage,     // code page
1662                                 flags,          // flags: fall on error
1663                                 psz,            // input string
1664                                 -1,             // its length (NUL-terminated)
1665                                 buf,            // output string
1666                                 buf ? n : 0     // size of output buffer
1667                              );
1668
1669         // note that it returns count of written chars for buf != NULL and size
1670         // of the needed buffer for buf == NULL so in either case the length of
1671         // the string (which never includes the terminating NUL) is one less
1672         return len ? len - 1 : (size_t)-1;
1673     }
1674
1675     size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
1676     {
1677         /*
1678             we have a problem here: by default, WideCharToMultiByte() may
1679             replace characters unrepresentable in the target code page with bad
1680             quality approximations such as turning "1/2" symbol (U+00BD) into
1681             "1" for the code pages which don't have it and we, obviously, want
1682             to avoid this at any price
1683
1684             the trouble is that this function does it _silently_, i.e. it won't
1685             even tell us whether it did or not... Win98/2000 and higher provide
1686             WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1687             we have to resort to a round trip, i.e. check that converting back
1688             results in the same string -- this is, of course, expensive but
1689             otherwise we simply can't be sure to not garble the data.
1690          */
1691
1692         // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1693         // it doesn't work with CJK encodings (which we test for rather roughly
1694         // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1695         // supporting it
1696         BOOL usedDef wxDUMMY_INITIALIZE(false);
1697         BOOL *pUsedDef;
1698         int flags;
1699         if ( CanUseNoBestFit() && m_CodePage < 50000 )
1700         {
1701             // it's our lucky day
1702             flags = WC_NO_BEST_FIT_CHARS;
1703             pUsedDef = &usedDef;
1704         }
1705         else // old system or unsupported encoding
1706         {
1707             flags = 0;
1708             pUsedDef = NULL;
1709         }
1710
1711         const size_t len = ::WideCharToMultiByte
1712                              (
1713                                 m_CodePage,     // code page
1714                                 flags,          // either none or no best fit
1715                                 pwz,            // input string
1716                                 -1,             // it is (wide) NUL-terminated
1717                                 buf,            // output buffer
1718                                 buf ? n : 0,    // and its size
1719                                 NULL,           // default "replacement" char
1720                                 pUsedDef        // [out] was it used?
1721                              );
1722
1723         if ( !len )
1724         {
1725             // function totally failed
1726             return (size_t)-1;
1727         }
1728
1729         // if we were really converting, check if we succeeded
1730         if ( buf )
1731         {
1732             if ( flags )
1733             {
1734                 // check if the conversion failed, i.e. if any replacements
1735                 // were done
1736                 if ( usedDef )
1737                     return (size_t)-1;
1738             }
1739             else // we must resort to double tripping...
1740             {
1741                 wxWCharBuffer wcBuf(n);
1742                 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1743                         wcscmp(wcBuf, pwz) != 0 )
1744                 {
1745                     // we didn't obtain the same thing we started from, hence
1746                     // the conversion was lossy and we consider that it failed
1747                     return (size_t)-1;
1748                 }
1749             }
1750         }
1751
1752         // see the comment above for the reason of "len - 1"
1753         return len - 1;
1754     }
1755
1756     bool IsOk() const { return m_CodePage != -1; }
1757
1758 private:
1759     static bool CanUseNoBestFit()
1760     {
1761         static int s_isWin98Or2k = -1;
1762
1763         if ( s_isWin98Or2k == -1 )
1764         {
1765             int verMaj, verMin;
1766             switch ( wxGetOsVersion(&verMaj, &verMin) )
1767             {
1768                 case wxWIN95:
1769                     s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1770                     break;
1771
1772                 case wxWINDOWS_NT:
1773                     s_isWin98Or2k = verMaj >= 5;
1774                     break;
1775
1776                 default:
1777                     // unknown, be conseravtive by default
1778                     s_isWin98Or2k = 0;
1779             }
1780
1781             wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1782         }
1783
1784         return s_isWin98Or2k == 1;
1785     }
1786
1787     long m_CodePage;
1788 };
1789
1790 #endif // wxHAVE_WIN32_MB2WC
1791
1792 // ============================================================================
1793 // Cocoa conversion classes
1794 // ============================================================================
1795
1796 #if defined(__WXCOCOA__)
1797
1798 // RN:  There is no UTF-32 support in either Core Foundation or
1799 // Cocoa.  Strangely enough, internally Core Foundation uses
1800 // UTF 32 internally quite a bit - its just not public (yet).
1801
1802 #include <CoreFoundation/CFString.h>
1803 #include <CoreFoundation/CFStringEncodingExt.h>
1804
1805 CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
1806 {
1807     CFStringEncoding enc = kCFStringEncodingInvalidId ;
1808     if ( encoding == wxFONTENCODING_DEFAULT )
1809     {
1810         enc = CFStringGetSystemEncoding();
1811     }
1812     else switch( encoding)
1813     {
1814         case wxFONTENCODING_ISO8859_1 :
1815             enc = kCFStringEncodingISOLatin1 ;
1816             break ;
1817         case wxFONTENCODING_ISO8859_2 :
1818             enc = kCFStringEncodingISOLatin2;
1819             break ;
1820         case wxFONTENCODING_ISO8859_3 :
1821             enc = kCFStringEncodingISOLatin3 ;
1822             break ;
1823         case wxFONTENCODING_ISO8859_4 :
1824             enc = kCFStringEncodingISOLatin4;
1825             break ;
1826         case wxFONTENCODING_ISO8859_5 :
1827             enc = kCFStringEncodingISOLatinCyrillic;
1828             break ;
1829         case wxFONTENCODING_ISO8859_6 :
1830             enc = kCFStringEncodingISOLatinArabic;
1831             break ;
1832         case wxFONTENCODING_ISO8859_7 :
1833             enc = kCFStringEncodingISOLatinGreek;
1834             break ;
1835         case wxFONTENCODING_ISO8859_8 :
1836             enc = kCFStringEncodingISOLatinHebrew;
1837             break ;
1838         case wxFONTENCODING_ISO8859_9 :
1839             enc = kCFStringEncodingISOLatin5;
1840             break ;
1841         case wxFONTENCODING_ISO8859_10 :
1842             enc = kCFStringEncodingISOLatin6;
1843             break ;
1844         case wxFONTENCODING_ISO8859_11 :
1845             enc = kCFStringEncodingISOLatinThai;
1846             break ;
1847         case wxFONTENCODING_ISO8859_13 :
1848             enc = kCFStringEncodingISOLatin7;
1849             break ;
1850         case wxFONTENCODING_ISO8859_14 :
1851             enc = kCFStringEncodingISOLatin8;
1852             break ;
1853         case wxFONTENCODING_ISO8859_15 :
1854             enc = kCFStringEncodingISOLatin9;
1855             break ;
1856
1857         case wxFONTENCODING_KOI8 :
1858             enc = kCFStringEncodingKOI8_R;
1859             break ;
1860         case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
1861             enc = kCFStringEncodingDOSRussian;
1862             break ;
1863
1864 //      case wxFONTENCODING_BULGARIAN :
1865 //          enc = ;
1866 //          break ;
1867
1868         case wxFONTENCODING_CP437 :
1869             enc =kCFStringEncodingDOSLatinUS ;
1870             break ;
1871         case wxFONTENCODING_CP850 :
1872             enc = kCFStringEncodingDOSLatin1;
1873             break ;
1874         case wxFONTENCODING_CP852 :
1875             enc = kCFStringEncodingDOSLatin2;
1876             break ;
1877         case wxFONTENCODING_CP855 :
1878             enc = kCFStringEncodingDOSCyrillic;
1879             break ;
1880         case wxFONTENCODING_CP866 :
1881             enc =kCFStringEncodingDOSRussian ;
1882             break ;
1883         case wxFONTENCODING_CP874 :
1884             enc = kCFStringEncodingDOSThai;
1885             break ;
1886         case wxFONTENCODING_CP932 :
1887             enc = kCFStringEncodingDOSJapanese;
1888             break ;
1889         case wxFONTENCODING_CP936 :
1890             enc =kCFStringEncodingDOSChineseSimplif ;
1891             break ;
1892         case wxFONTENCODING_CP949 :
1893             enc = kCFStringEncodingDOSKorean;
1894             break ;
1895         case wxFONTENCODING_CP950 :
1896             enc = kCFStringEncodingDOSChineseTrad;
1897             break ;
1898         case wxFONTENCODING_CP1250 :
1899             enc = kCFStringEncodingWindowsLatin2;
1900             break ;
1901         case wxFONTENCODING_CP1251 :
1902             enc =kCFStringEncodingWindowsCyrillic ;
1903             break ;
1904         case wxFONTENCODING_CP1252 :
1905             enc =kCFStringEncodingWindowsLatin1 ;
1906             break ;
1907         case wxFONTENCODING_CP1253 :
1908             enc = kCFStringEncodingWindowsGreek;
1909             break ;
1910         case wxFONTENCODING_CP1254 :
1911             enc = kCFStringEncodingWindowsLatin5;
1912             break ;
1913         case wxFONTENCODING_CP1255 :
1914             enc =kCFStringEncodingWindowsHebrew ;
1915             break ;
1916         case wxFONTENCODING_CP1256 :
1917             enc =kCFStringEncodingWindowsArabic ;
1918             break ;
1919         case wxFONTENCODING_CP1257 :
1920             enc = kCFStringEncodingWindowsBalticRim;
1921             break ;
1922 //   This only really encodes to UTF7 (if that) evidently
1923 //        case wxFONTENCODING_UTF7 :
1924 //            enc = kCFStringEncodingNonLossyASCII ;
1925 //            break ;
1926         case wxFONTENCODING_UTF8 :
1927             enc = kCFStringEncodingUTF8 ;
1928             break ;
1929         case wxFONTENCODING_EUC_JP :
1930             enc = kCFStringEncodingEUC_JP;
1931             break ;
1932         case wxFONTENCODING_UTF16 :
1933             enc = kCFStringEncodingUnicode ;
1934             break ;
1935         case wxFONTENCODING_MACROMAN :
1936             enc = kCFStringEncodingMacRoman ;
1937             break ;
1938         case wxFONTENCODING_MACJAPANESE :
1939             enc = kCFStringEncodingMacJapanese ;
1940             break ;
1941         case wxFONTENCODING_MACCHINESETRAD :
1942             enc = kCFStringEncodingMacChineseTrad ;
1943             break ;
1944         case wxFONTENCODING_MACKOREAN :
1945             enc = kCFStringEncodingMacKorean ;
1946             break ;
1947         case wxFONTENCODING_MACARABIC :
1948             enc = kCFStringEncodingMacArabic ;
1949             break ;
1950         case wxFONTENCODING_MACHEBREW :
1951             enc = kCFStringEncodingMacHebrew ;
1952             break ;
1953         case wxFONTENCODING_MACGREEK :
1954             enc = kCFStringEncodingMacGreek ;
1955             break ;
1956         case wxFONTENCODING_MACCYRILLIC :
1957             enc = kCFStringEncodingMacCyrillic ;
1958             break ;
1959         case wxFONTENCODING_MACDEVANAGARI :
1960             enc = kCFStringEncodingMacDevanagari ;
1961             break ;
1962         case wxFONTENCODING_MACGURMUKHI :
1963             enc = kCFStringEncodingMacGurmukhi ;
1964             break ;
1965         case wxFONTENCODING_MACGUJARATI :
1966             enc = kCFStringEncodingMacGujarati ;
1967             break ;
1968         case wxFONTENCODING_MACORIYA :
1969             enc = kCFStringEncodingMacOriya ;
1970             break ;
1971         case wxFONTENCODING_MACBENGALI :
1972             enc = kCFStringEncodingMacBengali ;
1973             break ;
1974         case wxFONTENCODING_MACTAMIL :
1975             enc = kCFStringEncodingMacTamil ;
1976             break ;
1977         case wxFONTENCODING_MACTELUGU :
1978             enc = kCFStringEncodingMacTelugu ;
1979             break ;
1980         case wxFONTENCODING_MACKANNADA :
1981             enc = kCFStringEncodingMacKannada ;
1982             break ;
1983         case wxFONTENCODING_MACMALAJALAM :
1984             enc = kCFStringEncodingMacMalayalam ;
1985             break ;
1986         case wxFONTENCODING_MACSINHALESE :
1987             enc = kCFStringEncodingMacSinhalese ;
1988             break ;
1989         case wxFONTENCODING_MACBURMESE :
1990             enc = kCFStringEncodingMacBurmese ;
1991             break ;
1992         case wxFONTENCODING_MACKHMER :
1993             enc = kCFStringEncodingMacKhmer ;
1994             break ;
1995         case wxFONTENCODING_MACTHAI :
1996             enc = kCFStringEncodingMacThai ;
1997             break ;
1998         case wxFONTENCODING_MACLAOTIAN :
1999             enc = kCFStringEncodingMacLaotian ;
2000             break ;
2001         case wxFONTENCODING_MACGEORGIAN :
2002             enc = kCFStringEncodingMacGeorgian ;
2003             break ;
2004         case wxFONTENCODING_MACARMENIAN :
2005             enc = kCFStringEncodingMacArmenian ;
2006             break ;
2007         case wxFONTENCODING_MACCHINESESIMP :
2008             enc = kCFStringEncodingMacChineseSimp ;
2009             break ;
2010         case wxFONTENCODING_MACTIBETAN :
2011             enc = kCFStringEncodingMacTibetan ;
2012             break ;
2013         case wxFONTENCODING_MACMONGOLIAN :
2014             enc = kCFStringEncodingMacMongolian ;
2015             break ;
2016         case wxFONTENCODING_MACETHIOPIC :
2017             enc = kCFStringEncodingMacEthiopic ;
2018             break ;
2019         case wxFONTENCODING_MACCENTRALEUR :
2020             enc = kCFStringEncodingMacCentralEurRoman ;
2021             break ;
2022         case wxFONTENCODING_MACVIATNAMESE :
2023             enc = kCFStringEncodingMacVietnamese ;
2024             break ;
2025         case wxFONTENCODING_MACARABICEXT :
2026             enc = kCFStringEncodingMacExtArabic ;
2027             break ;
2028         case wxFONTENCODING_MACSYMBOL :
2029             enc = kCFStringEncodingMacSymbol ;
2030             break ;
2031         case wxFONTENCODING_MACDINGBATS :
2032             enc = kCFStringEncodingMacDingbats ;
2033             break ;
2034         case wxFONTENCODING_MACTURKISH :
2035             enc = kCFStringEncodingMacTurkish ;
2036             break ;
2037         case wxFONTENCODING_MACCROATIAN :
2038             enc = kCFStringEncodingMacCroatian ;
2039             break ;
2040         case wxFONTENCODING_MACICELANDIC :
2041             enc = kCFStringEncodingMacIcelandic ;
2042             break ;
2043         case wxFONTENCODING_MACROMANIAN :
2044             enc = kCFStringEncodingMacRomanian ;
2045             break ;
2046         case wxFONTENCODING_MACCELTIC :
2047             enc = kCFStringEncodingMacCeltic ;
2048             break ;
2049         case wxFONTENCODING_MACGAELIC :
2050             enc = kCFStringEncodingMacGaelic ;
2051             break ;
2052 //      case wxFONTENCODING_MACKEYBOARD :
2053 //          enc = kCFStringEncodingMacKeyboardGlyphs ;
2054 //          break ;
2055         default :
2056             // because gcc is picky
2057             break ;
2058     } ;
2059     return enc ;
2060 }
2061
2062 class wxMBConv_cocoa : public wxMBConv
2063 {
2064 public:
2065     wxMBConv_cocoa()
2066     {
2067         Init(CFStringGetSystemEncoding()) ;
2068     }
2069
2070 #if wxUSE_FONTMAP
2071     wxMBConv_cocoa(const wxChar* name)
2072     {
2073         Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2074     }
2075 #endif
2076
2077     wxMBConv_cocoa(wxFontEncoding encoding)
2078     {
2079         Init( wxCFStringEncFromFontEnc(encoding) );
2080     }
2081
2082     ~wxMBConv_cocoa()
2083     {
2084     }
2085
2086     void Init( CFStringEncoding encoding)
2087     {
2088         m_encoding = encoding ;
2089     }
2090
2091     size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2092     {
2093         wxASSERT(szUnConv);
2094
2095         CFStringRef theString = CFStringCreateWithBytes (
2096                                                 NULL, //the allocator
2097                                                 (const UInt8*)szUnConv,
2098                                                 strlen(szUnConv),
2099                                                 m_encoding,
2100                                                 false //no BOM/external representation
2101                                                 );
2102
2103         wxASSERT(theString);
2104
2105         size_t nOutLength = CFStringGetLength(theString);
2106
2107         if (szOut == NULL)
2108         {
2109             CFRelease(theString);
2110             return nOutLength;
2111         }
2112
2113         CFRange theRange = { 0, nOutSize };
2114
2115 #if SIZEOF_WCHAR_T == 4
2116         UniChar* szUniCharBuffer = new UniChar[nOutSize];
2117 #endif
2118
2119         CFStringGetCharacters(theString, theRange, szUniCharBuffer);
2120
2121         CFRelease(theString);
2122
2123         szUniCharBuffer[nOutLength] = '\0' ;
2124
2125 #if SIZEOF_WCHAR_T == 4
2126         wxMBConvUTF16 converter ;
2127         converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
2128         delete[] szUniCharBuffer;
2129 #endif
2130
2131         return nOutLength;
2132     }
2133
2134     size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2135     {
2136         wxASSERT(szUnConv);
2137
2138         size_t nRealOutSize;
2139         size_t nBufSize = wxWcslen(szUnConv);
2140         UniChar* szUniBuffer = (UniChar*) szUnConv;
2141
2142 #if SIZEOF_WCHAR_T == 4
2143         wxMBConvUTF16BE converter ;
2144         nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
2145         szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
2146         converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
2147         nBufSize /= sizeof(UniChar);
2148 #endif
2149
2150         CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2151                                 NULL, //allocator
2152                                 szUniBuffer,
2153                                 nBufSize,
2154                                 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
2155                             );
2156
2157         wxASSERT(theString);
2158
2159         //Note that CER puts a BOM when converting to unicode
2160         //so we  check and use getchars instead in that case
2161         if (m_encoding == kCFStringEncodingUnicode)
2162         {
2163             if (szOut != NULL)
2164                 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
2165
2166             nRealOutSize = CFStringGetLength(theString) + 1;
2167         }
2168         else
2169         {
2170             CFStringGetBytes(
2171                 theString,
2172                 CFRangeMake(0, CFStringGetLength(theString)),
2173                 m_encoding,
2174                 0, //what to put in characters that can't be converted -
2175                     //0 tells CFString to return NULL if it meets such a character
2176                 false, //not an external representation
2177                 (UInt8*) szOut,
2178                 nOutSize,
2179                 (CFIndex*) &nRealOutSize
2180                         );
2181         }
2182
2183         CFRelease(theString);
2184
2185 #if SIZEOF_WCHAR_T == 4
2186         delete[] szUniBuffer;
2187 #endif
2188
2189         return  nRealOutSize - 1;
2190     }
2191
2192     bool IsOk() const
2193     {
2194         return m_encoding != kCFStringEncodingInvalidId &&
2195               CFStringIsEncodingAvailable(m_encoding);
2196     }
2197
2198 private:
2199     CFStringEncoding m_encoding ;
2200 };
2201
2202 #endif // defined(__WXCOCOA__)
2203
2204 // ============================================================================
2205 // Mac conversion classes
2206 // ============================================================================
2207
2208 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2209
2210 class wxMBConv_mac : public wxMBConv
2211 {
2212 public:
2213     wxMBConv_mac()
2214     {
2215         Init(CFStringGetSystemEncoding()) ;
2216     }
2217
2218 #if wxUSE_FONTMAP
2219     wxMBConv_mac(const wxChar* name)
2220     {
2221         Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2222     }
2223 #endif
2224
2225     wxMBConv_mac(wxFontEncoding encoding)
2226     {
2227         Init( wxMacGetSystemEncFromFontEnc(encoding) );
2228     }
2229
2230     ~wxMBConv_mac()
2231     {
2232         OSStatus status = noErr ;
2233         status = TECDisposeConverter(m_MB2WC_converter);
2234         status = TECDisposeConverter(m_WC2MB_converter);
2235     }
2236
2237
2238     void Init( TextEncodingBase encoding)
2239     {
2240         OSStatus status = noErr ;
2241         m_char_encoding = encoding ;
2242         m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2243
2244         status = TECCreateConverter(&m_MB2WC_converter,
2245                                     m_char_encoding,
2246                                     m_unicode_encoding);
2247         status = TECCreateConverter(&m_WC2MB_converter,
2248                                     m_unicode_encoding,
2249                                     m_char_encoding);
2250     }
2251
2252     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2253     {
2254         OSStatus status = noErr ;
2255         ByteCount byteOutLen ;
2256         ByteCount byteInLen = strlen(psz) ;
2257         wchar_t *tbuf = NULL ;
2258         UniChar* ubuf = NULL ;
2259         size_t res = 0 ;
2260
2261         if (buf == NULL)
2262         {
2263             //apple specs say at least 32
2264             n = wxMax( 32 , byteInLen ) ;
2265             tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2266         }
2267         ByteCount byteBufferLen = n * sizeof( UniChar ) ;
2268 #if SIZEOF_WCHAR_T == 4
2269         ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
2270 #else
2271         ubuf = (UniChar*) (buf ? buf : tbuf) ;
2272 #endif
2273         status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2274           (TextPtr) ubuf , byteBufferLen, &byteOutLen);
2275 #if SIZEOF_WCHAR_T == 4
2276         // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2277         // is not properly terminated we get random characters at the end
2278         ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
2279         wxMBConvUTF16BE converter ;
2280         res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2281         free( ubuf ) ;
2282 #else
2283         res = byteOutLen / sizeof( UniChar ) ;
2284 #endif
2285         if ( buf == NULL )
2286              free(tbuf) ;
2287
2288         if ( buf  && res < n)
2289             buf[res] = 0;
2290
2291         return res ;
2292     }
2293
2294     size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2295     {
2296         OSStatus status = noErr ;
2297         ByteCount byteOutLen ;
2298         ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2299
2300         char *tbuf = NULL ;
2301
2302         if (buf == NULL)
2303         {
2304             //apple specs say at least 32
2305             n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2306             tbuf = (char*) malloc( n ) ;
2307         }
2308
2309         ByteCount byteBufferLen = n ;
2310         UniChar* ubuf = NULL ;
2311 #if SIZEOF_WCHAR_T == 4
2312         wxMBConvUTF16BE converter ;
2313         size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2314         byteInLen = unicharlen ;
2315         ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2316         converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
2317 #else
2318         ubuf = (UniChar*) psz ;
2319 #endif
2320         status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2321             (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
2322 #if SIZEOF_WCHAR_T == 4
2323         free( ubuf ) ;
2324 #endif
2325         if ( buf == NULL )
2326             free(tbuf) ;
2327
2328         size_t res = byteOutLen ;
2329         if ( buf  && res < n)
2330         {
2331             buf[res] = 0;
2332
2333             //we need to double-trip to verify it didn't insert any ? in place
2334             //of bogus characters
2335             wxWCharBuffer wcBuf(n);
2336             size_t pszlen = wxWcslen(psz);
2337             if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
2338                         wxWcslen(wcBuf) != pszlen ||
2339                         memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2340             {
2341                 // we didn't obtain the same thing we started from, hence
2342                 // the conversion was lossy and we consider that it failed
2343                 return (size_t)-1;
2344             }
2345         }
2346
2347         return res ;
2348     }
2349
2350     bool IsOk() const
2351         { return m_MB2WC_converter !=  NULL && m_WC2MB_converter != NULL  ; }
2352
2353 private:
2354     TECObjectRef m_MB2WC_converter ;
2355     TECObjectRef m_WC2MB_converter ;
2356
2357     TextEncodingBase m_char_encoding ;
2358     TextEncodingBase m_unicode_encoding ;
2359 };
2360
2361 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2362
2363 // ============================================================================
2364 // wxEncodingConverter based conversion classes
2365 // ============================================================================
2366
2367 #if wxUSE_FONTMAP
2368
2369 class wxMBConv_wxwin : public wxMBConv
2370 {
2371 private:
2372     void Init()
2373     {
2374         m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2375                w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2376     }
2377
2378 public:
2379     // temporarily just use wxEncodingConverter stuff,
2380     // so that it works while a better implementation is built
2381     wxMBConv_wxwin(const wxChar* name)
2382     {
2383         if (name)
2384             m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2385         else
2386             m_enc = wxFONTENCODING_SYSTEM;
2387
2388         Init();
2389     }
2390
2391     wxMBConv_wxwin(wxFontEncoding enc)
2392     {
2393         m_enc = enc;
2394
2395         Init();
2396     }
2397
2398     size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
2399     {
2400         size_t inbuf = strlen(psz);
2401         if (buf)
2402         {
2403             if (!m2w.Convert(psz,buf))
2404                 return (size_t)-1;
2405         }
2406         return inbuf;
2407     }
2408
2409     size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
2410     {
2411         const size_t inbuf = wxWcslen(psz);
2412         if (buf)
2413         {
2414             if (!w2m.Convert(psz,buf))
2415                 return (size_t)-1;
2416         }
2417
2418         return inbuf;
2419     }
2420
2421     bool IsOk() const { return m_ok; }
2422
2423 public:
2424     wxFontEncoding m_enc;
2425     wxEncodingConverter m2w, w2m;
2426
2427     // were we initialized successfully?
2428     bool m_ok;
2429
2430     DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
2431 };
2432
2433 #endif // wxUSE_FONTMAP
2434
2435 // ============================================================================
2436 // wxCSConv implementation
2437 // ============================================================================
2438
2439 void wxCSConv::Init()
2440 {
2441     m_name = NULL;
2442     m_convReal =  NULL;
2443     m_deferred = true;
2444 }
2445
2446 wxCSConv::wxCSConv(const wxChar *charset)
2447 {
2448     Init();
2449
2450     if ( charset )
2451     {
2452         SetName(charset);
2453     }
2454
2455     m_encoding = wxFONTENCODING_SYSTEM;
2456 }
2457
2458 wxCSConv::wxCSConv(wxFontEncoding encoding)
2459 {
2460     if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
2461     {
2462         wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2463
2464         encoding = wxFONTENCODING_SYSTEM;
2465     }
2466
2467     Init();
2468
2469     m_encoding = encoding;
2470 }
2471
2472 wxCSConv::~wxCSConv()
2473 {
2474     Clear();
2475 }
2476
2477 wxCSConv::wxCSConv(const wxCSConv& conv)
2478         : wxMBConv()
2479 {
2480     Init();
2481
2482     SetName(conv.m_name);
2483     m_encoding = conv.m_encoding;
2484 }
2485
2486 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2487 {
2488     Clear();
2489
2490     SetName(conv.m_name);
2491     m_encoding = conv.m_encoding;
2492
2493     return *this;
2494 }
2495
2496 void wxCSConv::Clear()
2497 {
2498     free(m_name);
2499     delete m_convReal;
2500
2501     m_name = NULL;
2502     m_convReal = NULL;
2503 }
2504
2505 void wxCSConv::SetName(const wxChar *charset)
2506 {
2507     if (charset)
2508     {
2509         m_name = wxStrdup(charset);
2510         m_deferred = true;
2511     }
2512 }
2513
2514 wxMBConv *wxCSConv::DoCreate() const
2515 {
2516     // check for the special case of ASCII or ISO8859-1 charset: as we have
2517     // special knowledge of it anyhow, we don't need to create a special
2518     // conversion object
2519     if ( m_encoding == wxFONTENCODING_ISO8859_1 )
2520     {
2521         // don't convert at all
2522         return NULL;
2523     }
2524
2525     // we trust OS to do conversion better than we can so try external
2526     // conversion methods first
2527     //
2528     // the full order is:
2529     //      1. OS conversion (iconv() under Unix or Win32 API)
2530     //      2. hard coded conversions for UTF
2531     //      3. wxEncodingConverter as fall back
2532
2533     // step (1)
2534 #ifdef HAVE_ICONV
2535 #if !wxUSE_FONTMAP
2536     if ( m_name )
2537 #endif // !wxUSE_FONTMAP
2538     {
2539         wxString name(m_name);
2540
2541 #if wxUSE_FONTMAP
2542         if ( name.empty() )
2543             name = wxFontMapperBase::Get()->GetEncodingName(m_encoding);
2544 #endif // wxUSE_FONTMAP
2545
2546         wxMBConv_iconv *conv = new wxMBConv_iconv(name);
2547         if ( conv->IsOk() )
2548             return conv;
2549
2550         delete conv;
2551     }
2552 #endif // HAVE_ICONV
2553
2554 #ifdef wxHAVE_WIN32_MB2WC
2555     {
2556 #if wxUSE_FONTMAP
2557         wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2558                                       : new wxMBConv_win32(m_encoding);
2559         if ( conv->IsOk() )
2560             return conv;
2561
2562         delete conv;
2563 #else
2564         return NULL;
2565 #endif
2566     }
2567 #endif // wxHAVE_WIN32_MB2WC
2568 #if defined(__WXMAC__)
2569     {
2570         // leave UTF16 and UTF32 to the built-ins of wx
2571         if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
2572             ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
2573         {
2574
2575 #if wxUSE_FONTMAP
2576             wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2577                                         : new wxMBConv_mac(m_encoding);
2578 #else
2579             wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
2580 #endif
2581             if ( conv->IsOk() )
2582                  return conv;
2583
2584             delete conv;
2585         }
2586     }
2587 #endif
2588 #if defined(__WXCOCOA__)
2589     {
2590         if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2591         {
2592
2593 #if wxUSE_FONTMAP
2594             wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2595                                           : new wxMBConv_cocoa(m_encoding);
2596 #else
2597             wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
2598 #endif
2599             if ( conv->IsOk() )
2600                  return conv;
2601
2602             delete conv;
2603         }
2604     }
2605 #endif
2606     // step (2)
2607     wxFontEncoding enc = m_encoding;
2608 #if wxUSE_FONTMAP
2609     if ( enc == wxFONTENCODING_SYSTEM && m_name )
2610     {
2611         // use "false" to suppress interactive dialogs -- we can be called from
2612         // anywhere and popping up a dialog from here is the last thing we want to
2613         // do
2614         enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
2615     }
2616 #endif // wxUSE_FONTMAP
2617
2618     switch ( enc )
2619     {
2620         case wxFONTENCODING_UTF7:
2621              return new wxMBConvUTF7;
2622
2623         case wxFONTENCODING_UTF8:
2624              return new wxMBConvUTF8;
2625
2626         case wxFONTENCODING_UTF16BE:
2627              return new wxMBConvUTF16BE;
2628
2629         case wxFONTENCODING_UTF16LE:
2630              return new wxMBConvUTF16LE;
2631
2632         case wxFONTENCODING_UTF32BE:
2633              return new wxMBConvUTF32BE;
2634
2635         case wxFONTENCODING_UTF32LE:
2636              return new wxMBConvUTF32LE;
2637
2638         default:
2639              // nothing to do but put here to suppress gcc warnings
2640              ;
2641     }
2642
2643     // step (3)
2644 #if wxUSE_FONTMAP
2645     {
2646         wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2647                                       : new wxMBConv_wxwin(m_encoding);
2648         if ( conv->IsOk() )
2649             return conv;
2650
2651         delete conv;
2652     }
2653 #endif // wxUSE_FONTMAP
2654
2655     // NB: This is a hack to prevent deadlock. What could otherwise happen
2656     //     in Unicode build: wxConvLocal creation ends up being here
2657     //     because of some failure and logs the error. But wxLog will try to
2658     //     attach timestamp, for which it will need wxConvLocal (to convert
2659     //     time to char* and then wchar_t*), but that fails, tries to log
2660     //     error, but wxLog has a (already locked) critical section that
2661     //     guards static buffer.
2662     static bool alreadyLoggingError = false;
2663     if (!alreadyLoggingError)
2664     {
2665         alreadyLoggingError = true;
2666         wxLogError(_("Cannot convert from the charset '%s'!"),
2667                    m_name ? m_name
2668                       :
2669 #if wxUSE_FONTMAP
2670                          wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
2671 #else // !wxUSE_FONTMAP
2672                          wxString::Format(_("encoding %s"), m_encoding).c_str()
2673 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2674               );
2675         alreadyLoggingError = false;
2676     }
2677
2678     return NULL;
2679 }
2680
2681 void wxCSConv::CreateConvIfNeeded() const
2682 {
2683     if ( m_deferred )
2684     {
2685         wxCSConv *self = (wxCSConv *)this; // const_cast
2686
2687 #if wxUSE_INTL
2688         // if we don't have neither the name nor the encoding, use the default
2689         // encoding for this system
2690         if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2691         {
2692             self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
2693         }
2694 #endif // wxUSE_INTL
2695
2696         self->m_convReal = DoCreate();
2697         self->m_deferred = false;
2698     }
2699 }
2700
2701 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2702 {
2703     CreateConvIfNeeded();
2704
2705     if (m_convReal)
2706         return m_convReal->MB2WC(buf, psz, n);
2707
2708     // latin-1 (direct)
2709     size_t len = strlen(psz);
2710
2711     if (buf)
2712     {
2713         for (size_t c = 0; c <= len; c++)
2714             buf[c] = (unsigned char)(psz[c]);
2715     }
2716
2717     return len;
2718 }
2719
2720 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2721 {
2722     CreateConvIfNeeded();
2723
2724     if (m_convReal)
2725         return m_convReal->WC2MB(buf, psz, n);
2726
2727     // latin-1 (direct)
2728     const size_t len = wxWcslen(psz);
2729     if (buf)
2730     {
2731         for (size_t c = 0; c <= len; c++)
2732         {
2733             if (psz[c] > 0xFF)
2734                 return (size_t)-1;
2735             buf[c] = (char)psz[c];
2736         }
2737     }
2738     else
2739     {
2740         for (size_t c = 0; c <= len; c++)
2741         {
2742             if (psz[c] > 0xFF)
2743                 return (size_t)-1;
2744         }
2745     }
2746
2747     return len;
2748 }
2749
2750 // ----------------------------------------------------------------------------
2751 // globals
2752 // ----------------------------------------------------------------------------
2753
2754 #ifdef __WINDOWS__
2755     static wxMBConv_win32 wxConvLibcObj;
2756 #elif defined(__WXMAC__) && !defined(__MACH__)
2757     static wxMBConv_mac wxConvLibcObj ;
2758 #else
2759     static wxMBConvLibc wxConvLibcObj;
2760 #endif
2761
2762 static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
2763 static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
2764 static wxMBConvUTF7 wxConvUTF7Obj;
2765 static wxMBConvUTF8 wxConvUTF8Obj;
2766
2767 #ifdef __WXGTK20__
2768     static wxConvBrokenFileNames wxConvBrokenFileNamesObj;
2769 #endif
2770
2771 WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
2772 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
2773 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
2774 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
2775 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
2776 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
2777 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
2778 #ifdef __WXOSX__
2779                                     wxConvUTF8Obj;
2780 #elif __WXGTK20__
2781                                     wxConvBrokenFileNamesObj;
2782 #else
2783                                     wxConvLibcObj;
2784 #endif
2785
2786
2787 #else // !wxUSE_WCHAR_T
2788
2789 // stand-ins in absence of wchar_t
2790 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
2791                                 wxConvISO8859_1,
2792                                 wxConvLocal,
2793                                 wxConvUTF8;
2794
2795 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
2796
2797