src/common/strconv.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        strconv.cpp
   3 // Purpose:     Unicode conversion classes
   4 // Author:      Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
   5 //              Ryan Norton, Fredrik Roubert (UTF7)
   6 // Modified by:
   7 // Created:     29/01/98
   8 // RCS-ID:      $Id$
   9 // Copyright:   (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
  10 //              (c) 2000-2003 Vadim Zeitlin
  11 //              (c) 2004 Ryan Norton, Fredrik Roubert
  12 // Licence:     wxWindows licence
  13 /////////////////////////////////////////////////////////////////////////////
  14
  15 // ============================================================================
  16 // declarations
  17 // ============================================================================
  18
  19 // ----------------------------------------------------------------------------
  20 // headers
  21 // ----------------------------------------------------------------------------
  22
  23 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
  24   #pragma implementation "strconv.h"
  25 #endif
  26
  27 // For compilers that support precompilation, includes "wx.h".
  28 #include "wx/wxprec.h"
  29
  30 #ifdef __BORLANDC__
  31   #pragma hdrstop
  32 #endif
  33
  34 #ifndef WX_PRECOMP
  35     #include "wx/intl.h"
  36     #include "wx/log.h"
  37 #endif // WX_PRECOMP
  38
  39 #include "wx/strconv.h"
  40
  41 #if wxUSE_WCHAR_T
  42
  43 #ifdef __WXMSW__
  44     #include "wx/msw/private.h"
  45 #endif
  46
  47 #ifdef __WINDOWS__
  48     #include "wx/msw/missing.h"
  49 #endif
  50
  51 #ifndef __WXWINCE__
  52 #include <errno.h>
  53 #endif
  54
  55 #include <ctype.h>
  56 #include <string.h>
  57 #include <stdlib.h>
  58
  59 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
  60     #define wxHAVE_WIN32_MB2WC
  61 #endif // __WIN32__ but !__WXMICROWIN__
  62
  63 // ----------------------------------------------------------------------------
  64 // headers
  65 // ----------------------------------------------------------------------------
  66
  67 #ifdef __SALFORDC__
  68     #include <clib.h>
  69 #endif
  70
  71 #ifdef HAVE_ICONV
  72     #include <iconv.h>
  73     #include "wx/thread.h"
  74 #endif
  75
  76 #include "wx/encconv.h"
  77 #include "wx/fontmap.h"
  78 #include "wx/utils.h"
  79
  80 #ifdef __WXMAC__
  81 #include <ATSUnicode.h>
  82 #include <TextCommon.h>
  83 #include <TextEncodingConverter.h>
  84
  85 #include  "wx/mac/private.h"  // includes mac headers
  86 #endif
  87 // ----------------------------------------------------------------------------
  88 // macros
  89 // ----------------------------------------------------------------------------
  90
  91 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
  92 #define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
  93
  94 #if SIZEOF_WCHAR_T == 4
  95     #define WC_NAME         "UCS4"
  96     #define WC_BSWAP         BSWAP_UCS4
  97     #ifdef WORDS_BIGENDIAN
  98       #define WC_NAME_BEST  "UCS-4BE"
  99     #else
 100       #define WC_NAME_BEST  "UCS-4LE"
 101     #endif
 102 #elif SIZEOF_WCHAR_T == 2
 103     #define WC_NAME         "UTF16"
 104     #define WC_BSWAP         BSWAP_UTF16
 105     #define WC_UTF16
 106     #ifdef WORDS_BIGENDIAN
 107       #define WC_NAME_BEST  "UTF-16BE"
 108     #else
 109       #define WC_NAME_BEST  "UTF-16LE"
 110     #endif
 111 #else // sizeof(wchar_t) != 2 nor 4
 112     // does this ever happen?
 113     #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
 114 #endif
 115
 116 // ============================================================================
 117 // implementation
 118 // ============================================================================
 119
 120 // ----------------------------------------------------------------------------
 121 // UTF-16 en/decoding to/from UCS-4
 122 // ----------------------------------------------------------------------------
 123
 124
 125 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
 126 {
 127     if (input<=0xffff)
 128     {
 129         if (output)
 130             *output = (wxUint16) input;
 131         return 1;
 132     }
 133     else if (input>=0x110000)
 134     {
 135         return (size_t)-1;
 136     }
 137     else
 138     {
 139         if (output)
 140         {
 141             *output++ = (wxUint16) ((input >> 10)+0xd7c0);
 142             *output = (wxUint16) ((input&0x3ff)+0xdc00);
 143         }
 144         return 2;
 145     }
 146 }
 147
 148 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
 149 {
 150     if ((*input<0xd800) || (*input>0xdfff))
 151     {
 152         output = *input;
 153         return 1;
 154     }
 155     else if ((input[1]<0xdc00) || (input[1]>0xdfff))
 156     {
 157         output = *input;
 158         return (size_t)-1;
 159     }
 160     else
 161     {
 162         output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
 163         return 2;
 164     }
 165 }
 166
 167
 168 // ----------------------------------------------------------------------------
 169 // wxMBConv
 170 // ----------------------------------------------------------------------------
 171
 172 wxMBConv::~wxMBConv()
 173 {
 174     // nothing to do here (necessary for Darwin linking probably)
 175 }
 176
 177 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
 178 {
 179     if ( psz )
 180     {
 181         // calculate the length of the buffer needed first
 182         size_t nLen = MB2WC(NULL, psz, 0);
 183         if ( nLen != (size_t)-1 )
 184         {
 185             // now do the actual conversion
 186             wxWCharBuffer buf(nLen);
 187             nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
 188             if ( nLen != (size_t)-1 )
 189             {
 190                 return buf;
 191             }
 192         }
 193     }
 194
 195     wxWCharBuffer buf((wchar_t *)NULL);
 196
 197     return buf;
 198 }
 199
 200 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
 201 {
 202     if ( pwz )
 203     {
 204         size_t nLen = WC2MB(NULL, pwz, 0);
 205         if ( nLen != (size_t)-1 )
 206         {
 207             wxCharBuffer buf(nLen+3);       // space for a wxUint32 trailing zero
 208             nLen = WC2MB(buf.data(), pwz, nLen + 4);
 209             if ( nLen != (size_t)-1 )
 210             {
 211                 return buf;
 212             }
 213         }
 214     }
 215
 216     wxCharBuffer buf((char *)NULL);
 217
 218     return buf;
 219 }
 220
 221 const wxWCharBuffer wxMBConv::cMB2WC(const char *szString, size_t nStringLen, size_t* pOutSize) const
 222 {
 223     wxASSERT(pOutSize != NULL);
 224
 225     const char* szEnd = szString + nStringLen + 1;
 226     const char* szPos = szString;
 227     const char* szStart = szPos;
 228
 229     size_t nActualLength = 0;
 230     size_t nCurrentSize = nStringLen; //try normal size first (should never resize?)
 231
 232     wxWCharBuffer theBuffer(nCurrentSize);
 233
 234     //Convert the string until the length() is reached, continuing the
 235     //loop every time a null character is reached
 236     while(szPos != szEnd)
 237     {
 238         wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
 239
 240         //Get the length of the current (sub)string
 241         size_t nLen = MB2WC(NULL, szPos, 0);
 242
 243         //Invalid conversion?
 244         if( nLen == (size_t)-1 )
 245         {
 246             *pOutSize = 0;
 247             theBuffer.data()[0u] = wxT('\0');
 248             return theBuffer;
 249         }
 250
 251
 252         //Increase the actual length (+1 for current null character)
 253         nActualLength += nLen + 1;
 254
 255         //if buffer too big, realloc the buffer
 256         if (nActualLength > (nCurrentSize+1))
 257         {
 258             wxWCharBuffer theNewBuffer(nCurrentSize << 1);
 259             memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize * sizeof(wchar_t));
 260             theBuffer = theNewBuffer;
 261             nCurrentSize <<= 1;
 262         }
 263
 264         //Convert the current (sub)string
 265         if ( MB2WC(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
 266         {
 267             *pOutSize = 0;
 268             theBuffer.data()[0u] = wxT('\0');
 269             return theBuffer;
 270         }
 271
 272         //Increment to next (sub)string
 273         //Note that we have to use strlen here instead of nLen
 274         //here because XX2XX gives us the size of the output buffer,
 275         //not neccessarly the length of the string
 276         szPos += strlen(szPos) + 1;
 277     }
 278
 279     //success - return actual length and the buffer
 280     *pOutSize = nActualLength;
 281     return theBuffer;
 282 }
 283
 284 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *szString, size_t nStringLen, size_t* pOutSize) const
 285 {
 286     wxASSERT(pOutSize != NULL);
 287
 288     const wchar_t* szEnd = szString + nStringLen + 1;
 289     const wchar_t* szPos = szString;
 290     const wchar_t* szStart = szPos;
 291
 292     size_t nActualLength = 0;
 293     size_t nCurrentSize = nStringLen << 2; //try * 4 first
 294
 295     wxCharBuffer theBuffer(nCurrentSize);
 296
 297     //Convert the string until the length() is reached, continuing the
 298     //loop every time a null character is reached
 299     while(szPos != szEnd)
 300     {
 301         wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
 302
 303         //Get the length of the current (sub)string
 304         size_t nLen = WC2MB(NULL, szPos, 0);
 305
 306         //Invalid conversion?
 307         if( nLen == (size_t)-1 )
 308         {
 309             *pOutSize = 0;
 310             theBuffer.data()[0u] = wxT('\0');
 311             return theBuffer;
 312         }
 313
 314         //Increase the actual length (+1 for current null character)
 315         nActualLength += nLen + 1;
 316
 317         //if buffer too big, realloc the buffer
 318         if (nActualLength > (nCurrentSize+1))
 319         {
 320             wxCharBuffer theNewBuffer(nCurrentSize << 1);
 321             memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize);
 322             theBuffer = theNewBuffer;
 323             nCurrentSize <<= 1;
 324         }
 325
 326         //Convert the current (sub)string
 327         if(WC2MB(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
 328         {
 329             *pOutSize = 0;
 330             theBuffer.data()[0u] = wxT('\0');
 331             return theBuffer;
 332         }
 333
 334         //Increment to next (sub)string
 335         //Note that we have to use wxWcslen here instead of nLen
 336         //here because XX2XX gives us the size of the output buffer,
 337         //not neccessarly the length of the string
 338         szPos += wxWcslen(szPos) + 1;
 339     }
 340
 341     //success - return actual length and the buffer
 342     *pOutSize = nActualLength;
 343     return theBuffer;
 344 }
 345
 346 // ----------------------------------------------------------------------------
 347 // wxMBConvLibc
 348 // ----------------------------------------------------------------------------
 349
 350 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 351 {
 352     return wxMB2WC(buf, psz, n);
 353 }
 354
 355 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 356 {
 357     return wxWC2MB(buf, psz, n);
 358 }
 359
 360 #ifdef __UNIX__
 361
 362 // ----------------------------------------------------------------------------
 363 // wxConvBrokenFileNames
 364 // ----------------------------------------------------------------------------
 365
 366 wxConvBrokenFileNames::wxConvBrokenFileNames()
 367 {
 368     // decide which conversion to use for the file names
 369
 370     // (1) this variable exists for the sole purpose of specifying the encoding
 371     //     of the filenames for GTK+ programs, so use it if it is set
 372     wxString encName(wxGetenv(_T("G_FILENAME_ENCODING")));
 373     encName.MakeUpper();
 374     if ( !encName.empty() && encName != _T("UTF-8") && encName != _T("UTF8") )
 375     {
 376         m_conv = new wxCSConv(encName);
 377     }
 378     else // no G_FILENAME_ENCODING
 379     {
 380         if ( encName.empty() )
 381             encName = wxLocale::GetSystemEncodingName().Upper();
 382
 383         // (2) if a non default locale is set, assume that the user wants his
 384         //     filenames in this locale too
 385         if ( !encName.empty() && encName != _T("UTF-8") && encName != _T("UTF8") )
 386         {
 387             wxSetEnv(_T("G_FILENAME_ENCODING"), encName);
 388             m_conv = new wxMBConvLibc;
 389         }
 390         else
 391         {
 392             // (3) finally use UTF-8 by default
 393             m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
 394         }
 395     }
 396 }
 397
 398 size_t
 399 wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf,
 400                              const char *psz,
 401                              size_t outputSize) const
 402 {
 403     return m_conv->MB2WC( outputBuf, psz, outputSize );
 404 }
 405
 406 size_t
 407 wxConvBrokenFileNames::WC2MB(char *outputBuf,
 408                              const wchar_t *psz,
 409                              size_t outputSize) const
 410 {
 411     return m_conv->WC2MB( outputBuf, psz, outputSize );
 412 }
 413
 414 #endif
 415
 416 // ----------------------------------------------------------------------------
 417 // UTF-7
 418 // ----------------------------------------------------------------------------
 419
 420 // Implementation (C) 2004 Fredrik Roubert
 421
 422 //
 423 // BASE64 decoding table
 424 //
 425 static const unsigned char utf7unb64[] =
 426 {
 427     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 428     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 429     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 430     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 431     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 432     0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
 433     0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
 434     0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 435     0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
 436     0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
 437     0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
 438     0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
 439     0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
 440     0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
 441     0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
 442     0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
 443     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 444     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 445     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 446     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 447     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 448     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 449     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 450     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 451     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 452     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 453     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 454     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 455     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 456     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 457     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 458     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 459 };
 460
 461 size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 462 {
 463     size_t len = 0;
 464
 465     while (*psz && ((!buf) || (len < n)))
 466     {
 467         unsigned char cc = *psz++;
 468         if (cc != '+')
 469         {
 470             // plain ASCII char
 471             if (buf)
 472                 *buf++ = cc;
 473             len++;
 474         }
 475         else if (*psz == '-')
 476         {
 477             // encoded plus sign
 478             if (buf)
 479                 *buf++ = cc;
 480             len++;
 481             psz++;
 482         }
 483         else
 484         {
 485             // BASE64 encoded string
 486             bool lsb;
 487             unsigned char c;
 488             unsigned int d, l;
 489             for (lsb = false, d = 0, l = 0;
 490                 (cc = utf7unb64[(unsigned char)*psz]) != 0xff; psz++)
 491             {
 492                 d <<= 6;
 493                 d += cc;
 494                 for (l += 6; l >= 8; lsb = !lsb)
 495                 {
 496                     c = (unsigned char)((d >> (l -= 8)) % 256);
 497                     if (lsb)
 498                     {
 499                         if (buf)
 500                             *buf++ |= c;
 501                         len ++;
 502                     }
 503                     else
 504                         if (buf)
 505                             *buf = (wchar_t)(c << 8);
 506                 }
 507             }
 508             if (*psz == '-')
 509                 psz++;
 510         }
 511     }
 512     if (buf && (len < n))
 513         *buf = 0;
 514     return len;
 515 }
 516
 517 //
 518 // BASE64 encoding table
 519 //
 520 static const unsigned char utf7enb64[] =
 521 {
 522     'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
 523     'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
 524     'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
 525     'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
 526     'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
 527     'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
 528     'w', 'x', 'y', 'z', '0', '1', '2', '3',
 529     '4', '5', '6', '7', '8', '9', '+', '/'
 530 };
 531
 532 //
 533 // UTF-7 encoding table
 534 //
 535 // 0 - Set D (directly encoded characters)
 536 // 1 - Set O (optional direct characters)
 537 // 2 - whitespace characters (optional)
 538 // 3 - special characters
 539 //
 540 static const unsigned char utf7encode[128] =
 541 {
 542     3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
 543     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 544     2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
 545     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
 546     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 547     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
 548     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 549     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
 550 };
 551
 552 size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 553 {
 554
 555
 556     size_t len = 0;
 557
 558     while (*psz && ((!buf) || (len < n)))
 559     {
 560         wchar_t cc = *psz++;
 561         if (cc < 0x80 && utf7encode[cc] < 1)
 562         {
 563             // plain ASCII char
 564             if (buf)
 565                 *buf++ = (char)cc;
 566             len++;
 567         }
 568 #ifndef WC_UTF16
 569         else if (((wxUint32)cc) > 0xffff)
 570         {
 571             // no surrogate pair generation (yet?)
 572             return (size_t)-1;
 573         }
 574 #endif
 575         else
 576         {
 577             if (buf)
 578                 *buf++ = '+';
 579             len++;
 580             if (cc != '+')
 581             {
 582                 // BASE64 encode string
 583                 unsigned int lsb, d, l;
 584                 for (d = 0, l = 0;; psz++)
 585                 {
 586                     for (lsb = 0; lsb < 2; lsb ++)
 587                     {
 588                         d <<= 8;
 589                         d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
 590
 591                         for (l += 8; l >= 6; )
 592                         {
 593                             l -= 6;
 594                             if (buf)
 595                                 *buf++ = utf7enb64[(d >> l) % 64];
 596                             len++;
 597                         }
 598                     }
 599                     cc = *psz;
 600                     if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
 601                         break;
 602                 }
 603                 if (l != 0)
 604                 {
 605                     if (buf)
 606                         *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
 607                     len++;
 608                 }
 609             }
 610             if (buf)
 611                 *buf++ = '-';
 612             len++;
 613         }
 614     }
 615     if (buf && (len < n))
 616         *buf = 0;
 617     return len;
 618 }
 619
 620 // ----------------------------------------------------------------------------
 621 // UTF-8
 622 // ----------------------------------------------------------------------------
 623
 624 static wxUint32 utf8_max[]=
 625     { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
 626
 627 // boundaries of the private use area we use to (temporarily) remap invalid
 628 // characters invalid in a UTF-8 encoded string
 629 const wxUint32 wxUnicodePUA = 0x100000;
 630 const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
 631
 632 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 633 {
 634     size_t len = 0;
 635
 636     while (*psz && ((!buf) || (len < n)))
 637     {
 638         const char *opsz = psz;
 639         bool invalid = false;
 640         unsigned char cc = *psz++, fc = cc;
 641         unsigned cnt;
 642         for (cnt = 0; fc & 0x80; cnt++)
 643             fc <<= 1;
 644         if (!cnt)
 645         {
 646             // plain ASCII char
 647             if (buf)
 648                 *buf++ = cc;
 649             len++;
 650
 651             // escape the escape character for octal escapes
 652             if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
 653                     && cc == '\\' && (!buf || len < n))
 654             {
 655                 if (buf)
 656                     *buf++ = cc;
 657                 len++;
 658             }
 659         }
 660         else
 661         {
 662             cnt--;
 663             if (!cnt)
 664             {
 665                 // invalid UTF-8 sequence
 666                 invalid = true;
 667             }
 668             else
 669             {
 670                 unsigned ocnt = cnt - 1;
 671                 wxUint32 res = cc & (0x3f >> cnt);
 672                 while (cnt--)
 673                 {
 674                     cc = *psz;
 675                     if ((cc & 0xC0) != 0x80)
 676                     {
 677                         // invalid UTF-8 sequence
 678                         invalid = true;
 679                         break;
 680                     }
 681                     psz++;
 682                     res = (res << 6) | (cc & 0x3f);
 683                 }
 684                 if (invalid || res <= utf8_max[ocnt])
 685                 {
 686                     // illegal UTF-8 encoding
 687                     invalid = true;
 688                 }
 689                 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
 690                         res >= wxUnicodePUA && res < wxUnicodePUAEnd)
 691                 {
 692                     // if one of our PUA characters turns up externally
 693                     // it must also be treated as an illegal sequence
 694                     // (a bit like you have to escape an escape character)
 695                     invalid = true;
 696                 }
 697                 else
 698                 {
 699 #ifdef WC_UTF16
 700                     // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 701                     size_t pa = encode_utf16(res, (wxUint16 *)buf);
 702                     if (pa == (size_t)-1)
 703                     {
 704                         invalid = true;
 705                     }
 706                     else
 707                     {
 708                         if (buf)
 709                             buf += pa;
 710                         len += pa;
 711                     }
 712 #else // !WC_UTF16
 713                     if (buf)
 714                         *buf++ = res;
 715                     len++;
 716 #endif // WC_UTF16/!WC_UTF16
 717                 }
 718             }
 719             if (invalid)
 720             {
 721                 if (m_options & MAP_INVALID_UTF8_TO_PUA)
 722                 {
 723                     while (opsz < psz && (!buf || len < n))
 724                     {
 725 #ifdef WC_UTF16
 726                         // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 727                         size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
 728                         wxASSERT(pa != (size_t)-1);
 729                         if (buf)
 730                             buf += pa;
 731                         opsz++;
 732                         len += pa;
 733 #else
 734                         if (buf)
 735                             *buf++ = wxUnicodePUA + (unsigned char)*opsz;
 736                         opsz++;
 737                         len++;
 738 #endif
 739                     }
 740                 }
 741                 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
 742                 {
 743                     while (opsz < psz && (!buf || len < n))
 744                     {
 745                         if ( buf && len + 3 < n )
 746                         {
 747                             unsigned char n = *opsz;
 748                             *buf++ = L'\\';
 749                             *buf++ = (wchar_t)( L'0' + n / 0100 );
 750                             *buf++ = (wchar_t)( L'0' + (n % 0100) / 010 );
 751                             *buf++ = (wchar_t)( L'0' + n % 010 );
 752                         }
 753                         opsz++;
 754                         len += 4;
 755                     }
 756                 }
 757                 else // MAP_INVALID_UTF8_NOT
 758                 {
 759                     return (size_t)-1;
 760                 }
 761             }
 762         }
 763     }
 764     if (buf && (len < n))
 765         *buf = 0;
 766     return len;
 767 }
 768
 769 static inline bool isoctal(wchar_t wch)
 770 {
 771     return L'0' <= wch && wch <= L'7';
 772 }
 773
 774 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 775 {
 776     size_t len = 0;
 777
 778     while (*psz && ((!buf) || (len < n)))
 779     {
 780         wxUint32 cc;
 781 #ifdef WC_UTF16
 782         // cast is ok for WC_UTF16
 783         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
 784         psz += (pa == (size_t)-1) ? 1 : pa;
 785 #else
 786         cc=(*psz++) & 0x7fffffff;
 787 #endif
 788
 789         if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
 790                 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
 791         {
 792             if (buf)
 793                 *buf++ = (char)(cc - wxUnicodePUA);
 794             len++;
 795         }
 796         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
 797                     && cc == L'\\' && psz[0] == L'\\' )
 798         {
 799             if (buf)
 800                 *buf++ = (char)cc;
 801             psz++;
 802             len++;
 803         }
 804         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
 805                     cc == L'\\' &&
 806                         isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
 807         {
 808             if (buf)
 809             {
 810                 *buf++ = (char) ((psz[0] - L'0')*0100 +
 811                                  (psz[1] - L'0')*010 +
 812                                  (psz[2] - L'0'));
 813             }
 814
 815             psz += 3;
 816             len++;
 817         }
 818         else
 819         {
 820             unsigned cnt;
 821             for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
 822             if (!cnt)
 823             {
 824                 // plain ASCII char
 825                 if (buf)
 826                     *buf++ = (char) cc;
 827                 len++;
 828             }
 829
 830             else
 831             {
 832                 len += cnt + 1;
 833                 if (buf)
 834                 {
 835                     *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
 836                     while (cnt--)
 837                         *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
 838                 }
 839             }
 840         }
 841     }
 842
 843     if (buf && (len<n))
 844         *buf = 0;
 845
 846     return len;
 847 }
 848
 849 // ----------------------------------------------------------------------------
 850 // UTF-16
 851 // ----------------------------------------------------------------------------
 852
 853 #ifdef WORDS_BIGENDIAN
 854     #define wxMBConvUTF16straight wxMBConvUTF16BE
 855     #define wxMBConvUTF16swap     wxMBConvUTF16LE
 856 #else
 857     #define wxMBConvUTF16swap     wxMBConvUTF16BE
 858     #define wxMBConvUTF16straight wxMBConvUTF16LE
 859 #endif
 860
 861
 862 #ifdef WC_UTF16
 863
 864 // copy 16bit MB to 16bit String
 865 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 866 {
 867     size_t len=0;
 868
 869     while (*(wxUint16*)psz && (!buf || len < n))
 870     {
 871         if (buf)
 872             *buf++ = *(wxUint16*)psz;
 873         len++;
 874
 875         psz += sizeof(wxUint16);
 876     }
 877     if (buf && len<n)   *buf=0;
 878
 879     return len;
 880 }
 881
 882
 883 // copy 16bit String to 16bit MB
 884 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 885 {
 886     size_t len=0;
 887
 888     while (*psz && (!buf || len < n))
 889     {
 890         if (buf)
 891         {
 892             *(wxUint16*)buf = *psz;
 893             buf += sizeof(wxUint16);
 894         }
 895         len += sizeof(wxUint16);
 896         psz++;
 897     }
 898     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 899
 900     return len;
 901 }
 902
 903
 904 // swap 16bit MB to 16bit String
 905 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 906 {
 907     size_t len=0;
 908
 909     while (*(wxUint16*)psz && (!buf || len < n))
 910     {
 911         if (buf)
 912         {
 913             ((char *)buf)[0] = psz[1];
 914             ((char *)buf)[1] = psz[0];
 915             buf++;
 916         }
 917         len++;
 918         psz += sizeof(wxUint16);
 919     }
 920     if (buf && len<n)   *buf=0;
 921
 922     return len;
 923 }
 924
 925
 926 // swap 16bit MB to 16bit String
 927 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 928 {
 929     size_t len=0;
 930
 931     while (*psz && (!buf || len < n))
 932     {
 933         if (buf)
 934         {
 935             *buf++ = ((char*)psz)[1];
 936             *buf++ = ((char*)psz)[0];
 937         }
 938         len += sizeof(wxUint16);
 939         psz++;
 940     }
 941     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 942
 943     return len;
 944 }
 945
 946
 947 #else // WC_UTF16
 948
 949
 950 // copy 16bit MB to 32bit String
 951 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 952 {
 953     size_t len=0;
 954
 955     while (*(wxUint16*)psz && (!buf || len < n))
 956     {
 957         wxUint32 cc;
 958         size_t pa=decode_utf16((wxUint16*)psz, cc);
 959         if (pa == (size_t)-1)
 960             return pa;
 961
 962         if (buf)
 963             *buf++ = cc;
 964         len++;
 965         psz += pa * sizeof(wxUint16);
 966     }
 967     if (buf && len<n)   *buf=0;
 968
 969     return len;
 970 }
 971
 972
 973 // copy 32bit String to 16bit MB
 974 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 975 {
 976     size_t len=0;
 977
 978     while (*psz && (!buf || len < n))
 979     {
 980         wxUint16 cc[2];
 981         size_t pa=encode_utf16(*psz, cc);
 982
 983         if (pa == (size_t)-1)
 984             return pa;
 985
 986         if (buf)
 987         {
 988             *(wxUint16*)buf = cc[0];
 989             buf += sizeof(wxUint16);
 990             if (pa > 1)
 991             {
 992                 *(wxUint16*)buf = cc[1];
 993                 buf += sizeof(wxUint16);
 994             }
 995         }
 996
 997         len += pa*sizeof(wxUint16);
 998         psz++;
 999     }
1000     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
1001
1002     return len;
1003 }
1004
1005
1006 // swap 16bit MB to 32bit String
1007 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1008 {
1009     size_t len=0;
1010
1011     while (*(wxUint16*)psz && (!buf || len < n))
1012     {
1013         wxUint32 cc;
1014         char tmp[4];
1015         tmp[0]=psz[1];  tmp[1]=psz[0];
1016         tmp[2]=psz[3];  tmp[3]=psz[2];
1017
1018         size_t pa=decode_utf16((wxUint16*)tmp, cc);
1019         if (pa == (size_t)-1)
1020             return pa;
1021
1022         if (buf)
1023             *buf++ = cc;
1024
1025         len++;
1026         psz += pa * sizeof(wxUint16);
1027     }
1028     if (buf && len<n)   *buf=0;
1029
1030     return len;
1031 }
1032
1033
1034 // swap 32bit String to 16bit MB
1035 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1036 {
1037     size_t len=0;
1038
1039     while (*psz && (!buf || len < n))
1040     {
1041         wxUint16 cc[2];
1042         size_t pa=encode_utf16(*psz, cc);
1043
1044         if (pa == (size_t)-1)
1045             return pa;
1046
1047         if (buf)
1048         {
1049             *buf++ = ((char*)cc)[1];
1050             *buf++ = ((char*)cc)[0];
1051             if (pa > 1)
1052             {
1053                 *buf++ = ((char*)cc)[3];
1054                 *buf++ = ((char*)cc)[2];
1055             }
1056         }
1057
1058         len += pa*sizeof(wxUint16);
1059         psz++;
1060     }
1061     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
1062
1063     return len;
1064 }
1065
1066 #endif // WC_UTF16
1067
1068
1069 // ----------------------------------------------------------------------------
1070 // UTF-32
1071 // ----------------------------------------------------------------------------
1072
1073 #ifdef WORDS_BIGENDIAN
1074 #define wxMBConvUTF32straight  wxMBConvUTF32BE
1075 #define wxMBConvUTF32swap      wxMBConvUTF32LE
1076 #else
1077 #define wxMBConvUTF32swap      wxMBConvUTF32BE
1078 #define wxMBConvUTF32straight  wxMBConvUTF32LE
1079 #endif
1080
1081
1082 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1083 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1084
1085
1086 #ifdef WC_UTF16
1087
1088 // copy 32bit MB to 16bit String
1089 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1090 {
1091     size_t len=0;
1092
1093     while (*(wxUint32*)psz && (!buf || len < n))
1094     {
1095         wxUint16 cc[2];
1096
1097         size_t pa=encode_utf16(*(wxUint32*)psz, cc);
1098         if (pa == (size_t)-1)
1099             return pa;
1100
1101         if (buf)
1102         {
1103             *buf++ = cc[0];
1104             if (pa > 1)
1105                 *buf++ = cc[1];
1106         }
1107         len += pa;
1108         psz += sizeof(wxUint32);
1109     }
1110     if (buf && len<n)   *buf=0;
1111
1112     return len;
1113 }
1114
1115
1116 // copy 16bit String to 32bit MB
1117 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1118 {
1119     size_t len=0;
1120
1121     while (*psz && (!buf || len < n))
1122     {
1123         wxUint32 cc;
1124
1125         // cast is ok for WC_UTF16
1126         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
1127         if (pa == (size_t)-1)
1128             return pa;
1129
1130         if (buf)
1131         {
1132             *(wxUint32*)buf = cc;
1133             buf += sizeof(wxUint32);
1134         }
1135         len += sizeof(wxUint32);
1136         psz += pa;
1137     }
1138
1139     if (buf && len<=n-sizeof(wxUint32))
1140         *(wxUint32*)buf=0;
1141
1142     return len;
1143 }
1144
1145
1146
1147 // swap 32bit MB to 16bit String
1148 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1149 {
1150     size_t len=0;
1151
1152     while (*(wxUint32*)psz && (!buf || len < n))
1153     {
1154         char tmp[4];
1155         tmp[0] = psz[3];   tmp[1] = psz[2];
1156         tmp[2] = psz[1];   tmp[3] = psz[0];
1157
1158
1159         wxUint16 cc[2];
1160
1161         size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
1162         if (pa == (size_t)-1)
1163             return pa;
1164
1165         if (buf)
1166         {
1167             *buf++ = cc[0];
1168             if (pa > 1)
1169                 *buf++ = cc[1];
1170         }
1171         len += pa;
1172         psz += sizeof(wxUint32);
1173     }
1174
1175     if (buf && len<n)
1176         *buf=0;
1177
1178     return len;
1179 }
1180
1181
1182 // swap 16bit String to 32bit MB
1183 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1184 {
1185     size_t len=0;
1186
1187     while (*psz && (!buf || len < n))
1188     {
1189         char cc[4];
1190
1191         // cast is ok for WC_UTF16
1192         size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
1193         if (pa == (size_t)-1)
1194             return pa;
1195
1196         if (buf)
1197         {
1198             *buf++ = cc[3];
1199             *buf++ = cc[2];
1200             *buf++ = cc[1];
1201             *buf++ = cc[0];
1202         }
1203         len += sizeof(wxUint32);
1204         psz += pa;
1205     }
1206
1207     if (buf && len<=n-sizeof(wxUint32))
1208         *(wxUint32*)buf=0;
1209
1210     return len;
1211 }
1212
1213 #else // WC_UTF16
1214
1215
1216 // copy 32bit MB to 32bit String
1217 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1218 {
1219     size_t len=0;
1220
1221     while (*(wxUint32*)psz && (!buf || len < n))
1222     {
1223         if (buf)
1224             *buf++ = *(wxUint32*)psz;
1225         len++;
1226         psz += sizeof(wxUint32);
1227     }
1228
1229     if (buf && len<n)
1230         *buf=0;
1231
1232     return len;
1233 }
1234
1235
1236 // copy 32bit String to 32bit MB
1237 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1238 {
1239     size_t len=0;
1240
1241     while (*psz && (!buf || len < n))
1242     {
1243         if (buf)
1244         {
1245             *(wxUint32*)buf = *psz;
1246             buf += sizeof(wxUint32);
1247         }
1248
1249         len += sizeof(wxUint32);
1250         psz++;
1251     }
1252
1253     if (buf && len<=n-sizeof(wxUint32))
1254         *(wxUint32*)buf=0;
1255
1256     return len;
1257 }
1258
1259
1260 // swap 32bit MB to 32bit String
1261 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1262 {
1263     size_t len=0;
1264
1265     while (*(wxUint32*)psz && (!buf || len < n))
1266     {
1267         if (buf)
1268         {
1269             ((char *)buf)[0] = psz[3];
1270             ((char *)buf)[1] = psz[2];
1271             ((char *)buf)[2] = psz[1];
1272             ((char *)buf)[3] = psz[0];
1273             buf++;
1274         }
1275         len++;
1276         psz += sizeof(wxUint32);
1277     }
1278
1279     if (buf && len<n)
1280         *buf=0;
1281
1282     return len;
1283 }
1284
1285
1286 // swap 32bit String to 32bit MB
1287 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1288 {
1289     size_t len=0;
1290
1291     while (*psz && (!buf || len < n))
1292     {
1293         if (buf)
1294         {
1295             *buf++ = ((char *)psz)[3];
1296             *buf++ = ((char *)psz)[2];
1297             *buf++ = ((char *)psz)[1];
1298             *buf++ = ((char *)psz)[0];
1299         }
1300         len += sizeof(wxUint32);
1301         psz++;
1302     }
1303
1304     if (buf && len<=n-sizeof(wxUint32))
1305         *(wxUint32*)buf=0;
1306
1307     return len;
1308 }
1309
1310
1311 #endif // WC_UTF16
1312
1313
1314 // ============================================================================
1315 // The classes doing conversion using the iconv_xxx() functions
1316 // ============================================================================
1317
1318 #ifdef HAVE_ICONV
1319
1320 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1321 //     E2BIG if output buffer is _exactly_ as big as needed. Such case is
1322 //     (unless there's yet another bug in glibc) the only case when iconv()
1323 //     returns with (size_t)-1 (which means error) and says there are 0 bytes
1324 //     left in the input buffer -- when _real_ error occurs,
1325 //     bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1326 //     iconv() failure.
1327 //     [This bug does not appear in glibc 2.2.]
1328 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1329 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1330                                      (errno != E2BIG || bufLeft != 0))
1331 #else
1332 #define ICONV_FAILED(cres, bufLeft)  (cres == (size_t)-1)
1333 #endif
1334
1335 #define ICONV_CHAR_CAST(x)  ((ICONV_CONST char **)(x))
1336
1337 // ----------------------------------------------------------------------------
1338 // wxMBConv_iconv: encapsulates an iconv character set
1339 // ----------------------------------------------------------------------------
1340
1341 class wxMBConv_iconv : public wxMBConv
1342 {
1343 public:
1344     wxMBConv_iconv(const wxChar *name);
1345     virtual ~wxMBConv_iconv();
1346
1347     virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1348     virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
1349
1350     bool IsOk() const
1351         { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
1352
1353 protected:
1354     // the iconv handlers used to translate from multibyte to wide char and in
1355     // the other direction
1356     iconv_t m2w,
1357             w2m;
1358 #if wxUSE_THREADS
1359     // guards access to m2w and w2m objects
1360     wxMutex m_iconvMutex;
1361 #endif
1362
1363 private:
1364     // the name (for iconv_open()) of a wide char charset -- if none is
1365     // available on this machine, it will remain NULL
1366     static const char *ms_wcCharsetName;
1367
1368     // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1369     // different endian-ness than the native one
1370     static bool ms_wcNeedsSwap;
1371 };
1372
1373 const char *wxMBConv_iconv::ms_wcCharsetName = NULL;
1374 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
1375
1376 wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
1377 {
1378     // Do it the hard way
1379     char cname[100];
1380     for (size_t i = 0; i < wxStrlen(name)+1; i++)
1381         cname[i] = (char) name[i];
1382
1383     // check for charset that represents wchar_t:
1384     if (ms_wcCharsetName == NULL)
1385     {
1386         ms_wcNeedsSwap = false;
1387
1388         // try charset with explicit bytesex info (e.g. "UCS-4LE"):
1389         ms_wcCharsetName = WC_NAME_BEST;
1390         m2w = iconv_open(ms_wcCharsetName, cname);
1391
1392         if (m2w == (iconv_t)-1)
1393         {
1394             // try charset w/o bytesex info (e.g. "UCS4")
1395             // and check for bytesex ourselves:
1396             ms_wcCharsetName = WC_NAME;
1397             m2w = iconv_open(ms_wcCharsetName, cname);
1398
1399             // last bet, try if it knows WCHAR_T pseudo-charset
1400             if (m2w == (iconv_t)-1)
1401             {
1402                 ms_wcCharsetName = "WCHAR_T";
1403                 m2w = iconv_open(ms_wcCharsetName, cname);
1404             }
1405
1406             if (m2w != (iconv_t)-1)
1407             {
1408                 char    buf[2], *bufPtr;
1409                 wchar_t wbuf[2], *wbufPtr;
1410                 size_t  insz, outsz;
1411                 size_t  res;
1412
1413                 buf[0] = 'A';
1414                 buf[1] = 0;
1415                 wbuf[0] = 0;
1416                 insz = 2;
1417                 outsz = SIZEOF_WCHAR_T * 2;
1418                 wbufPtr = wbuf;
1419                 bufPtr = buf;
1420
1421                 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1422                             (char**)&wbufPtr, &outsz);
1423
1424                 if (ICONV_FAILED(res, insz))
1425                 {
1426                     ms_wcCharsetName = NULL;
1427                     wxLogLastError(wxT("iconv"));
1428                     wxLogError(_("Conversion to charset '%s' doesn't work."), name);
1429                 }
1430                 else
1431                 {
1432                     ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1433                 }
1434             }
1435             else
1436             {
1437                 ms_wcCharsetName = NULL;
1438
1439                 // VS: we must not output an error here, since wxWidgets will safely
1440                 //     fall back to using wxEncodingConverter.
1441                 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
1442                 //wxLogError(
1443             }
1444         }
1445         wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
1446     }
1447     else // we already have ms_wcCharsetName
1448     {
1449         m2w = iconv_open(ms_wcCharsetName, cname);
1450     }
1451
1452     // NB: don't ever pass NULL to iconv_open(), it may crash!
1453     if ( ms_wcCharsetName )
1454     {
1455         w2m = iconv_open( cname, ms_wcCharsetName);
1456     }
1457     else
1458     {
1459         w2m = (iconv_t)-1;
1460     }
1461 }
1462
1463 wxMBConv_iconv::~wxMBConv_iconv()
1464 {
1465     if ( m2w != (iconv_t)-1 )
1466         iconv_close(m2w);
1467     if ( w2m != (iconv_t)-1 )
1468         iconv_close(w2m);
1469 }
1470
1471 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1472 {
1473 #if wxUSE_THREADS
1474     // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1475     //     Unfortunately there is a couple of global wxCSConv objects such as
1476     //     wxConvLocal that are used all over wx code, so we have to make sure
1477     //     the handle is used by at most one thread at the time. Otherwise
1478     //     only a few wx classes would be safe to use from non-main threads
1479     //     as MB<->WC conversion would fail "randomly".
1480     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1481 #endif
1482
1483     size_t inbuf = strlen(psz);
1484     size_t outbuf = n * SIZEOF_WCHAR_T;
1485     size_t res, cres;
1486     // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1487     wchar_t *bufPtr = buf;
1488     const char *pszPtr = psz;
1489
1490     if (buf)
1491     {
1492         // have destination buffer, convert there
1493         cres = iconv(m2w,
1494                      ICONV_CHAR_CAST(&pszPtr), &inbuf,
1495                      (char**)&bufPtr, &outbuf);
1496         res = n - (outbuf / SIZEOF_WCHAR_T);
1497
1498         if (ms_wcNeedsSwap)
1499         {
1500             // convert to native endianness
1501             WC_BSWAP(buf /* _not_ bufPtr */, res)
1502         }
1503
1504         // NB: iconv was given only strlen(psz) characters on input, and so
1505         //     it couldn't convert the trailing zero. Let's do it ourselves
1506         //     if there's some room left for it in the output buffer.
1507         if (res < n)
1508             buf[res] = 0;
1509     }
1510     else
1511     {
1512         // no destination buffer... convert using temp buffer
1513         // to calculate destination buffer requirement
1514         wchar_t tbuf[8];
1515         res = 0;
1516         do {
1517             bufPtr = tbuf;
1518             outbuf = 8*SIZEOF_WCHAR_T;
1519
1520             cres = iconv(m2w,
1521                          ICONV_CHAR_CAST(&pszPtr), &inbuf,
1522                          (char**)&bufPtr, &outbuf );
1523
1524             res += 8-(outbuf/SIZEOF_WCHAR_T);
1525         } while ((cres==(size_t)-1) && (errno==E2BIG));
1526     }
1527
1528     if (ICONV_FAILED(cres, inbuf))
1529     {
1530         //VS: it is ok if iconv fails, hence trace only
1531         wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1532         return (size_t)-1;
1533     }
1534
1535     return res;
1536 }
1537
1538 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1539 {
1540 #if wxUSE_THREADS
1541     // NB: explained in MB2WC
1542     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1543 #endif
1544
1545     size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
1546     size_t outbuf = n;
1547     size_t res, cres;
1548
1549     wchar_t *tmpbuf = 0;
1550
1551     if (ms_wcNeedsSwap)
1552     {
1553         // need to copy to temp buffer to switch endianness
1554         // this absolutely doesn't rock!
1555         // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1556         //  could be in read-only memory, or be accessed in some other thread)
1557         tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
1558         memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
1559         WC_BSWAP(tmpbuf, inbuf)
1560         psz=tmpbuf;
1561     }
1562
1563     if (buf)
1564     {
1565         // have destination buffer, convert there
1566         cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1567
1568         res = n-outbuf;
1569
1570         // NB: iconv was given only wcslen(psz) characters on input, and so
1571         //     it couldn't convert the trailing zero. Let's do it ourselves
1572         //     if there's some room left for it in the output buffer.
1573         if (res < n)
1574             buf[0] = 0;
1575     }
1576     else
1577     {
1578         // no destination buffer... convert using temp buffer
1579         // to calculate destination buffer requirement
1580         char tbuf[16];
1581         res = 0;
1582         do {
1583             buf = tbuf; outbuf = 16;
1584
1585             cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1586
1587             res += 16 - outbuf;
1588         } while ((cres==(size_t)-1) && (errno==E2BIG));
1589     }
1590
1591     if (ms_wcNeedsSwap)
1592     {
1593         free(tmpbuf);
1594     }
1595
1596     if (ICONV_FAILED(cres, inbuf))
1597     {
1598         //VS: it is ok if iconv fails, hence trace only
1599         wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1600         return (size_t)-1;
1601     }
1602
1603     return res;
1604 }
1605
1606 #endif // HAVE_ICONV
1607
1608
1609 // ============================================================================
1610 // Win32 conversion classes
1611 // ============================================================================
1612
1613 #ifdef wxHAVE_WIN32_MB2WC
1614
1615 // from utils.cpp
1616 #if wxUSE_FONTMAP
1617 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1618 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1619 #endif
1620
1621 class wxMBConv_win32 : public wxMBConv
1622 {
1623 public:
1624     wxMBConv_win32()
1625     {
1626         m_CodePage = CP_ACP;
1627     }
1628
1629 #if wxUSE_FONTMAP
1630     wxMBConv_win32(const wxChar* name)
1631     {
1632         m_CodePage = wxCharsetToCodepage(name);
1633     }
1634
1635     wxMBConv_win32(wxFontEncoding encoding)
1636     {
1637         m_CodePage = wxEncodingToCodepage(encoding);
1638     }
1639 #endif
1640
1641     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1642     {
1643         // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1644         // the behaviour is not compatible with the Unix version (using iconv)
1645         // and break the library itself, e.g. wxTextInputStream::NextChar()
1646         // wouldn't work if reading an incomplete MB char didn't result in an
1647         // error
1648         //
1649         // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1650         // an error (tested under Windows Server 2003) and apparently it is
1651         // done on purpose, i.e. the function accepts any input in this case
1652         // and although I'd prefer to return error on ill-formed output, our
1653         // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1654         // explicitly ill-formed according to RFC 2152) neither so we don't
1655         // even have any fallback here...
1656         int flags = m_CodePage == CP_UTF7 ? 0 : MB_ERR_INVALID_CHARS;
1657
1658         const size_t len = ::MultiByteToWideChar
1659                              (
1660                                 m_CodePage,     // code page
1661                                 flags,          // flags: fall on error
1662                                 psz,            // input string
1663                                 -1,             // its length (NUL-terminated)
1664                                 buf,            // output string
1665                                 buf ? n : 0     // size of output buffer
1666                              );
1667
1668         // note that it returns count of written chars for buf != NULL and size
1669         // of the needed buffer for buf == NULL so in either case the length of
1670         // the string (which never includes the terminating NUL) is one less
1671         return len ? len - 1 : (size_t)-1;
1672     }
1673
1674     size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
1675     {
1676         /*
1677             we have a problem here: by default, WideCharToMultiByte() may
1678             replace characters unrepresentable in the target code page with bad
1679             quality approximations such as turning "1/2" symbol (U+00BD) into
1680             "1" for the code pages which don't have it and we, obviously, want
1681             to avoid this at any price
1682
1683             the trouble is that this function does it _silently_, i.e. it won't
1684             even tell us whether it did or not... Win98/2000 and higher provide
1685             WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1686             we have to resort to a round trip, i.e. check that converting back
1687             results in the same string -- this is, of course, expensive but
1688             otherwise we simply can't be sure to not garble the data.
1689          */
1690
1691         // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1692         // it doesn't work with CJK encodings (which we test for rather roughly
1693         // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1694         // supporting it
1695         BOOL usedDef wxDUMMY_INITIALIZE(false);
1696         BOOL *pUsedDef;
1697         int flags;
1698         if ( CanUseNoBestFit() && m_CodePage < 50000 )
1699         {
1700             // it's our lucky day
1701             flags = WC_NO_BEST_FIT_CHARS;
1702             pUsedDef = &usedDef;
1703         }
1704         else // old system or unsupported encoding
1705         {
1706             flags = 0;
1707             pUsedDef = NULL;
1708         }
1709
1710         const size_t len = ::WideCharToMultiByte
1711                              (
1712                                 m_CodePage,     // code page
1713                                 flags,          // either none or no best fit
1714                                 pwz,            // input string
1715                                 -1,             // it is (wide) NUL-terminated
1716                                 buf,            // output buffer
1717                                 buf ? n : 0,    // and its size
1718                                 NULL,           // default "replacement" char
1719                                 pUsedDef        // [out] was it used?
1720                              );
1721
1722         if ( !len )
1723         {
1724             // function totally failed
1725             return (size_t)-1;
1726         }
1727
1728         // if we were really converting, check if we succeeded
1729         if ( buf )
1730         {
1731             if ( flags )
1732             {
1733                 // check if the conversion failed, i.e. if any replacements
1734                 // were done
1735                 if ( usedDef )
1736                     return (size_t)-1;
1737             }
1738             else // we must resort to double tripping...
1739             {
1740                 wxWCharBuffer wcBuf(n);
1741                 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1742                         wcscmp(wcBuf, pwz) != 0 )
1743                 {
1744                     // we didn't obtain the same thing we started from, hence
1745                     // the conversion was lossy and we consider that it failed
1746                     return (size_t)-1;
1747                 }
1748             }
1749         }
1750
1751         // see the comment above for the reason of "len - 1"
1752         return len - 1;
1753     }
1754
1755     bool IsOk() const { return m_CodePage != -1; }
1756
1757 private:
1758     static bool CanUseNoBestFit()
1759     {
1760         static int s_isWin98Or2k = -1;
1761
1762         if ( s_isWin98Or2k == -1 )
1763         {
1764             int verMaj, verMin;
1765             switch ( wxGetOsVersion(&verMaj, &verMin) )
1766             {
1767                 case wxWIN95:
1768                     s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1769                     break;
1770
1771                 case wxWINDOWS_NT:
1772                     s_isWin98Or2k = verMaj >= 5;
1773                     break;
1774
1775                 default:
1776                     // unknown, be conseravtive by default
1777                     s_isWin98Or2k = 0;
1778             }
1779
1780             wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1781         }
1782
1783         return s_isWin98Or2k == 1;
1784     }
1785
1786     long m_CodePage;
1787 };
1788
1789 #endif // wxHAVE_WIN32_MB2WC
1790
1791 // ============================================================================
1792 // Cocoa conversion classes
1793 // ============================================================================
1794
1795 #if defined(__WXCOCOA__)
1796
1797 // RN:  There is no UTF-32 support in either Core Foundation or
1798 // Cocoa.  Strangely enough, internally Core Foundation uses
1799 // UTF 32 internally quite a bit - its just not public (yet).
1800
1801 #include <CoreFoundation/CFString.h>
1802 #include <CoreFoundation/CFStringEncodingExt.h>
1803
1804 CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
1805 {
1806     CFStringEncoding enc = kCFStringEncodingInvalidId ;
1807     if ( encoding == wxFONTENCODING_DEFAULT )
1808     {
1809         enc = CFStringGetSystemEncoding();
1810     }
1811     else switch( encoding)
1812     {
1813         case wxFONTENCODING_ISO8859_1 :
1814             enc = kCFStringEncodingISOLatin1 ;
1815             break ;
1816         case wxFONTENCODING_ISO8859_2 :
1817             enc = kCFStringEncodingISOLatin2;
1818             break ;
1819         case wxFONTENCODING_ISO8859_3 :
1820             enc = kCFStringEncodingISOLatin3 ;
1821             break ;
1822         case wxFONTENCODING_ISO8859_4 :
1823             enc = kCFStringEncodingISOLatin4;
1824             break ;
1825         case wxFONTENCODING_ISO8859_5 :
1826             enc = kCFStringEncodingISOLatinCyrillic;
1827             break ;
1828         case wxFONTENCODING_ISO8859_6 :
1829             enc = kCFStringEncodingISOLatinArabic;
1830             break ;
1831         case wxFONTENCODING_ISO8859_7 :
1832             enc = kCFStringEncodingISOLatinGreek;
1833             break ;
1834         case wxFONTENCODING_ISO8859_8 :
1835             enc = kCFStringEncodingISOLatinHebrew;
1836             break ;
1837         case wxFONTENCODING_ISO8859_9 :
1838             enc = kCFStringEncodingISOLatin5;
1839             break ;
1840         case wxFONTENCODING_ISO8859_10 :
1841             enc = kCFStringEncodingISOLatin6;
1842             break ;
1843         case wxFONTENCODING_ISO8859_11 :
1844             enc = kCFStringEncodingISOLatinThai;
1845             break ;
1846         case wxFONTENCODING_ISO8859_13 :
1847             enc = kCFStringEncodingISOLatin7;
1848             break ;
1849         case wxFONTENCODING_ISO8859_14 :
1850             enc = kCFStringEncodingISOLatin8;
1851             break ;
1852         case wxFONTENCODING_ISO8859_15 :
1853             enc = kCFStringEncodingISOLatin9;
1854             break ;
1855
1856         case wxFONTENCODING_KOI8 :
1857             enc = kCFStringEncodingKOI8_R;
1858             break ;
1859         case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
1860             enc = kCFStringEncodingDOSRussian;
1861             break ;
1862
1863 //      case wxFONTENCODING_BULGARIAN :
1864 //          enc = ;
1865 //          break ;
1866
1867         case wxFONTENCODING_CP437 :
1868             enc =kCFStringEncodingDOSLatinUS ;
1869             break ;
1870         case wxFONTENCODING_CP850 :
1871             enc = kCFStringEncodingDOSLatin1;
1872             break ;
1873         case wxFONTENCODING_CP852 :
1874             enc = kCFStringEncodingDOSLatin2;
1875             break ;
1876         case wxFONTENCODING_CP855 :
1877             enc = kCFStringEncodingDOSCyrillic;
1878             break ;
1879         case wxFONTENCODING_CP866 :
1880             enc =kCFStringEncodingDOSRussian ;
1881             break ;
1882         case wxFONTENCODING_CP874 :
1883             enc = kCFStringEncodingDOSThai;
1884             break ;
1885         case wxFONTENCODING_CP932 :
1886             enc = kCFStringEncodingDOSJapanese;
1887             break ;
1888         case wxFONTENCODING_CP936 :
1889             enc =kCFStringEncodingDOSChineseSimplif ;
1890             break ;
1891         case wxFONTENCODING_CP949 :
1892             enc = kCFStringEncodingDOSKorean;
1893             break ;
1894         case wxFONTENCODING_CP950 :
1895             enc = kCFStringEncodingDOSChineseTrad;
1896             break ;
1897         case wxFONTENCODING_CP1250 :
1898             enc = kCFStringEncodingWindowsLatin2;
1899             break ;
1900         case wxFONTENCODING_CP1251 :
1901             enc =kCFStringEncodingWindowsCyrillic ;
1902             break ;
1903         case wxFONTENCODING_CP1252 :
1904             enc =kCFStringEncodingWindowsLatin1 ;
1905             break ;
1906         case wxFONTENCODING_CP1253 :
1907             enc = kCFStringEncodingWindowsGreek;
1908             break ;
1909         case wxFONTENCODING_CP1254 :
1910             enc = kCFStringEncodingWindowsLatin5;
1911             break ;
1912         case wxFONTENCODING_CP1255 :
1913             enc =kCFStringEncodingWindowsHebrew ;
1914             break ;
1915         case wxFONTENCODING_CP1256 :
1916             enc =kCFStringEncodingWindowsArabic ;
1917             break ;
1918         case wxFONTENCODING_CP1257 :
1919             enc = kCFStringEncodingWindowsBalticRim;
1920             break ;
1921 //   This only really encodes to UTF7 (if that) evidently
1922 //        case wxFONTENCODING_UTF7 :
1923 //            enc = kCFStringEncodingNonLossyASCII ;
1924 //            break ;
1925         case wxFONTENCODING_UTF8 :
1926             enc = kCFStringEncodingUTF8 ;
1927             break ;
1928         case wxFONTENCODING_EUC_JP :
1929             enc = kCFStringEncodingEUC_JP;
1930             break ;
1931         case wxFONTENCODING_UTF16 :
1932             enc = kCFStringEncodingUnicode ;
1933             break ;
1934         case wxFONTENCODING_MACROMAN :
1935             enc = kCFStringEncodingMacRoman ;
1936             break ;
1937         case wxFONTENCODING_MACJAPANESE :
1938             enc = kCFStringEncodingMacJapanese ;
1939             break ;
1940         case wxFONTENCODING_MACCHINESETRAD :
1941             enc = kCFStringEncodingMacChineseTrad ;
1942             break ;
1943         case wxFONTENCODING_MACKOREAN :
1944             enc = kCFStringEncodingMacKorean ;
1945             break ;
1946         case wxFONTENCODING_MACARABIC :
1947             enc = kCFStringEncodingMacArabic ;
1948             break ;
1949         case wxFONTENCODING_MACHEBREW :
1950             enc = kCFStringEncodingMacHebrew ;
1951             break ;
1952         case wxFONTENCODING_MACGREEK :
1953             enc = kCFStringEncodingMacGreek ;
1954             break ;
1955         case wxFONTENCODING_MACCYRILLIC :
1956             enc = kCFStringEncodingMacCyrillic ;
1957             break ;
1958         case wxFONTENCODING_MACDEVANAGARI :
1959             enc = kCFStringEncodingMacDevanagari ;
1960             break ;
1961         case wxFONTENCODING_MACGURMUKHI :
1962             enc = kCFStringEncodingMacGurmukhi ;
1963             break ;
1964         case wxFONTENCODING_MACGUJARATI :
1965             enc = kCFStringEncodingMacGujarati ;
1966             break ;
1967         case wxFONTENCODING_MACORIYA :
1968             enc = kCFStringEncodingMacOriya ;
1969             break ;
1970         case wxFONTENCODING_MACBENGALI :
1971             enc = kCFStringEncodingMacBengali ;
1972             break ;
1973         case wxFONTENCODING_MACTAMIL :
1974             enc = kCFStringEncodingMacTamil ;
1975             break ;
1976         case wxFONTENCODING_MACTELUGU :
1977             enc = kCFStringEncodingMacTelugu ;
1978             break ;
1979         case wxFONTENCODING_MACKANNADA :
1980             enc = kCFStringEncodingMacKannada ;
1981             break ;
1982         case wxFONTENCODING_MACMALAJALAM :
1983             enc = kCFStringEncodingMacMalayalam ;
1984             break ;
1985         case wxFONTENCODING_MACSINHALESE :
1986             enc = kCFStringEncodingMacSinhalese ;
1987             break ;
1988         case wxFONTENCODING_MACBURMESE :
1989             enc = kCFStringEncodingMacBurmese ;
1990             break ;
1991         case wxFONTENCODING_MACKHMER :
1992             enc = kCFStringEncodingMacKhmer ;
1993             break ;
1994         case wxFONTENCODING_MACTHAI :
1995             enc = kCFStringEncodingMacThai ;
1996             break ;
1997         case wxFONTENCODING_MACLAOTIAN :
1998             enc = kCFStringEncodingMacLaotian ;
1999             break ;
2000         case wxFONTENCODING_MACGEORGIAN :
2001             enc = kCFStringEncodingMacGeorgian ;
2002             break ;
2003         case wxFONTENCODING_MACARMENIAN :
2004             enc = kCFStringEncodingMacArmenian ;
2005             break ;
2006         case wxFONTENCODING_MACCHINESESIMP :
2007             enc = kCFStringEncodingMacChineseSimp ;
2008             break ;
2009         case wxFONTENCODING_MACTIBETAN :
2010             enc = kCFStringEncodingMacTibetan ;
2011             break ;
2012         case wxFONTENCODING_MACMONGOLIAN :
2013             enc = kCFStringEncodingMacMongolian ;
2014             break ;
2015         case wxFONTENCODING_MACETHIOPIC :
2016             enc = kCFStringEncodingMacEthiopic ;
2017             break ;
2018         case wxFONTENCODING_MACCENTRALEUR :
2019             enc = kCFStringEncodingMacCentralEurRoman ;
2020             break ;
2021         case wxFONTENCODING_MACVIATNAMESE :
2022             enc = kCFStringEncodingMacVietnamese ;
2023             break ;
2024         case wxFONTENCODING_MACARABICEXT :
2025             enc = kCFStringEncodingMacExtArabic ;
2026             break ;
2027         case wxFONTENCODING_MACSYMBOL :
2028             enc = kCFStringEncodingMacSymbol ;
2029             break ;
2030         case wxFONTENCODING_MACDINGBATS :
2031             enc = kCFStringEncodingMacDingbats ;
2032             break ;
2033         case wxFONTENCODING_MACTURKISH :
2034             enc = kCFStringEncodingMacTurkish ;
2035             break ;
2036         case wxFONTENCODING_MACCROATIAN :
2037             enc = kCFStringEncodingMacCroatian ;
2038             break ;
2039         case wxFONTENCODING_MACICELANDIC :
2040             enc = kCFStringEncodingMacIcelandic ;
2041             break ;
2042         case wxFONTENCODING_MACROMANIAN :
2043             enc = kCFStringEncodingMacRomanian ;
2044             break ;
2045         case wxFONTENCODING_MACCELTIC :
2046             enc = kCFStringEncodingMacCeltic ;
2047             break ;
2048         case wxFONTENCODING_MACGAELIC :
2049             enc = kCFStringEncodingMacGaelic ;
2050             break ;
2051 //      case wxFONTENCODING_MACKEYBOARD :
2052 //          enc = kCFStringEncodingMacKeyboardGlyphs ;
2053 //          break ;
2054         default :
2055             // because gcc is picky
2056             break ;
2057     } ;
2058     return enc ;
2059 }
2060
2061 class wxMBConv_cocoa : public wxMBConv
2062 {
2063 public:
2064     wxMBConv_cocoa()
2065     {
2066         Init(CFStringGetSystemEncoding()) ;
2067     }
2068
2069 #if wxUSE_FONTMAP
2070     wxMBConv_cocoa(const wxChar* name)
2071     {
2072         Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2073     }
2074 #endif
2075
2076     wxMBConv_cocoa(wxFontEncoding encoding)
2077     {
2078         Init( wxCFStringEncFromFontEnc(encoding) );
2079     }
2080
2081     ~wxMBConv_cocoa()
2082     {
2083     }
2084
2085     void Init( CFStringEncoding encoding)
2086     {
2087         m_encoding = encoding ;
2088     }
2089
2090     size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2091     {
2092         wxASSERT(szUnConv);
2093
2094         CFStringRef theString = CFStringCreateWithBytes (
2095                                                 NULL, //the allocator
2096                                                 (const UInt8*)szUnConv,
2097                                                 strlen(szUnConv),
2098                                                 m_encoding,
2099                                                 false //no BOM/external representation
2100                                                 );
2101
2102         wxASSERT(theString);
2103
2104         size_t nOutLength = CFStringGetLength(theString);
2105
2106         if (szOut == NULL)
2107         {
2108             CFRelease(theString);
2109             return nOutLength;
2110         }
2111
2112         CFRange theRange = { 0, nOutSize };
2113
2114 #if SIZEOF_WCHAR_T == 4
2115         UniChar* szUniCharBuffer = new UniChar[nOutSize];
2116 #endif
2117
2118         CFStringGetCharacters(theString, theRange, szUniCharBuffer);
2119
2120         CFRelease(theString);
2121
2122         szUniCharBuffer[nOutLength] = '\0' ;
2123
2124 #if SIZEOF_WCHAR_T == 4
2125         wxMBConvUTF16 converter ;
2126         converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
2127         delete[] szUniCharBuffer;
2128 #endif
2129
2130         return nOutLength;
2131     }
2132
2133     size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2134     {
2135         wxASSERT(szUnConv);
2136
2137         size_t nRealOutSize;
2138         size_t nBufSize = wxWcslen(szUnConv);
2139         UniChar* szUniBuffer = (UniChar*) szUnConv;
2140
2141 #if SIZEOF_WCHAR_T == 4
2142         wxMBConvUTF16BE converter ;
2143         nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
2144         szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
2145         converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
2146         nBufSize /= sizeof(UniChar);
2147 #endif
2148
2149         CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2150                                 NULL, //allocator
2151                                 szUniBuffer,
2152                                 nBufSize,
2153                                 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
2154                             );
2155
2156         wxASSERT(theString);
2157
2158         //Note that CER puts a BOM when converting to unicode
2159         //so we  check and use getchars instead in that case
2160         if (m_encoding == kCFStringEncodingUnicode)
2161         {
2162             if (szOut != NULL)
2163                 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
2164
2165             nRealOutSize = CFStringGetLength(theString) + 1;
2166         }
2167         else
2168         {
2169             CFStringGetBytes(
2170                 theString,
2171                 CFRangeMake(0, CFStringGetLength(theString)),
2172                 m_encoding,
2173                 0, //what to put in characters that can't be converted -
2174                     //0 tells CFString to return NULL if it meets such a character
2175                 false, //not an external representation
2176                 (UInt8*) szOut,
2177                 nOutSize,
2178                 (CFIndex*) &nRealOutSize
2179                         );
2180         }
2181
2182         CFRelease(theString);
2183
2184 #if SIZEOF_WCHAR_T == 4
2185         delete[] szUniBuffer;
2186 #endif
2187
2188         return  nRealOutSize - 1;
2189     }
2190
2191     bool IsOk() const
2192     {
2193         return m_encoding != kCFStringEncodingInvalidId &&
2194               CFStringIsEncodingAvailable(m_encoding);
2195     }
2196
2197 private:
2198     CFStringEncoding m_encoding ;
2199 };
2200
2201 #endif // defined(__WXCOCOA__)
2202
2203 // ============================================================================
2204 // Mac conversion classes
2205 // ============================================================================
2206
2207 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2208
2209 class wxMBConv_mac : public wxMBConv
2210 {
2211 public:
2212     wxMBConv_mac()
2213     {
2214         Init(CFStringGetSystemEncoding()) ;
2215     }
2216
2217 #if wxUSE_FONTMAP
2218     wxMBConv_mac(const wxChar* name)
2219     {
2220         Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2221     }
2222 #endif
2223
2224     wxMBConv_mac(wxFontEncoding encoding)
2225     {
2226         Init( wxMacGetSystemEncFromFontEnc(encoding) );
2227     }
2228
2229     ~wxMBConv_mac()
2230     {
2231         OSStatus status = noErr ;
2232         status = TECDisposeConverter(m_MB2WC_converter);
2233         status = TECDisposeConverter(m_WC2MB_converter);
2234     }
2235
2236
2237     void Init( TextEncodingBase encoding)
2238     {
2239         OSStatus status = noErr ;
2240         m_char_encoding = encoding ;
2241         m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2242
2243         status = TECCreateConverter(&m_MB2WC_converter,
2244                                     m_char_encoding,
2245                                     m_unicode_encoding);
2246         status = TECCreateConverter(&m_WC2MB_converter,
2247                                     m_unicode_encoding,
2248                                     m_char_encoding);
2249     }
2250
2251     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2252     {
2253         OSStatus status = noErr ;
2254         ByteCount byteOutLen ;
2255         ByteCount byteInLen = strlen(psz) ;
2256         wchar_t *tbuf = NULL ;
2257         UniChar* ubuf = NULL ;
2258         size_t res = 0 ;
2259
2260         if (buf == NULL)
2261         {
2262             //apple specs say at least 32
2263             n = wxMax( 32 , byteInLen ) ;
2264             tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2265         }
2266         ByteCount byteBufferLen = n * sizeof( UniChar ) ;
2267 #if SIZEOF_WCHAR_T == 4
2268         ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
2269 #else
2270         ubuf = (UniChar*) (buf ? buf : tbuf) ;
2271 #endif
2272         status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2273           (TextPtr) ubuf , byteBufferLen, &byteOutLen);
2274 #if SIZEOF_WCHAR_T == 4
2275         // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2276         // is not properly terminated we get random characters at the end
2277         ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
2278         wxMBConvUTF16BE converter ;
2279         res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2280         free( ubuf ) ;
2281 #else
2282         res = byteOutLen / sizeof( UniChar ) ;
2283 #endif
2284         if ( buf == NULL )
2285              free(tbuf) ;
2286
2287         if ( buf  && res < n)
2288             buf[res] = 0;
2289
2290         return res ;
2291     }
2292
2293     size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2294     {
2295         OSStatus status = noErr ;
2296         ByteCount byteOutLen ;
2297         ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2298
2299         char *tbuf = NULL ;
2300
2301         if (buf == NULL)
2302         {
2303             //apple specs say at least 32
2304             n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2305             tbuf = (char*) malloc( n ) ;
2306         }
2307
2308         ByteCount byteBufferLen = n ;
2309         UniChar* ubuf = NULL ;
2310 #if SIZEOF_WCHAR_T == 4
2311         wxMBConvUTF16BE converter ;
2312         size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2313         byteInLen = unicharlen ;
2314         ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2315         converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
2316 #else
2317         ubuf = (UniChar*) psz ;
2318 #endif
2319         status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2320             (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
2321 #if SIZEOF_WCHAR_T == 4
2322         free( ubuf ) ;
2323 #endif
2324         if ( buf == NULL )
2325             free(tbuf) ;
2326
2327         size_t res = byteOutLen ;
2328         if ( buf  && res < n)
2329         {
2330             buf[res] = 0;
2331
2332             //we need to double-trip to verify it didn't insert any ? in place
2333             //of bogus characters
2334             wxWCharBuffer wcBuf(n);
2335             size_t pszlen = wxWcslen(psz);
2336             if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
2337                         wxWcslen(wcBuf) != pszlen ||
2338                         memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2339             {
2340                 // we didn't obtain the same thing we started from, hence
2341                 // the conversion was lossy and we consider that it failed
2342                 return (size_t)-1;
2343             }
2344         }
2345
2346         return res ;
2347     }
2348
2349     bool IsOk() const
2350         { return m_MB2WC_converter !=  NULL && m_WC2MB_converter != NULL  ; }
2351
2352 private:
2353     TECObjectRef m_MB2WC_converter ;
2354     TECObjectRef m_WC2MB_converter ;
2355
2356     TextEncodingBase m_char_encoding ;
2357     TextEncodingBase m_unicode_encoding ;
2358 };
2359
2360 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2361
2362 // ============================================================================
2363 // wxEncodingConverter based conversion classes
2364 // ============================================================================
2365
2366 #if wxUSE_FONTMAP
2367
2368 class wxMBConv_wxwin : public wxMBConv
2369 {
2370 private:
2371     void Init()
2372     {
2373         m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2374                w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2375     }
2376
2377 public:
2378     // temporarily just use wxEncodingConverter stuff,
2379     // so that it works while a better implementation is built
2380     wxMBConv_wxwin(const wxChar* name)
2381     {
2382         if (name)
2383             m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2384         else
2385             m_enc = wxFONTENCODING_SYSTEM;
2386
2387         Init();
2388     }
2389
2390     wxMBConv_wxwin(wxFontEncoding enc)
2391     {
2392         m_enc = enc;
2393
2394         Init();
2395     }
2396
2397     size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
2398     {
2399         size_t inbuf = strlen(psz);
2400         if (buf)
2401         {
2402             if (!m2w.Convert(psz,buf))
2403                 return (size_t)-1;
2404         }
2405         return inbuf;
2406     }
2407
2408     size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
2409     {
2410         const size_t inbuf = wxWcslen(psz);
2411         if (buf)
2412         {
2413             if (!w2m.Convert(psz,buf))
2414                 return (size_t)-1;
2415         }
2416
2417         return inbuf;
2418     }
2419
2420     bool IsOk() const { return m_ok; }
2421
2422 public:
2423     wxFontEncoding m_enc;
2424     wxEncodingConverter m2w, w2m;
2425
2426     // were we initialized successfully?
2427     bool m_ok;
2428
2429     DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
2430 };
2431
2432 #endif // wxUSE_FONTMAP
2433
2434 // ============================================================================
2435 // wxCSConv implementation
2436 // ============================================================================
2437
2438 void wxCSConv::Init()
2439 {
2440     m_name = NULL;
2441     m_convReal =  NULL;
2442     m_deferred = true;
2443 }
2444
2445 wxCSConv::wxCSConv(const wxChar *charset)
2446 {
2447     Init();
2448
2449     if ( charset )
2450     {
2451         SetName(charset);
2452     }
2453
2454     m_encoding = wxFONTENCODING_SYSTEM;
2455 }
2456
2457 wxCSConv::wxCSConv(wxFontEncoding encoding)
2458 {
2459     if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
2460     {
2461         wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2462
2463         encoding = wxFONTENCODING_SYSTEM;
2464     }
2465
2466     Init();
2467
2468     m_encoding = encoding;
2469 }
2470
2471 wxCSConv::~wxCSConv()
2472 {
2473     Clear();
2474 }
2475
2476 wxCSConv::wxCSConv(const wxCSConv& conv)
2477         : wxMBConv()
2478 {
2479     Init();
2480
2481     SetName(conv.m_name);
2482     m_encoding = conv.m_encoding;
2483 }
2484
2485 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2486 {
2487     Clear();
2488
2489     SetName(conv.m_name);
2490     m_encoding = conv.m_encoding;
2491
2492     return *this;
2493 }
2494
2495 void wxCSConv::Clear()
2496 {
2497     free(m_name);
2498     delete m_convReal;
2499
2500     m_name = NULL;
2501     m_convReal = NULL;
2502 }
2503
2504 void wxCSConv::SetName(const wxChar *charset)
2505 {
2506     if (charset)
2507     {
2508         m_name = wxStrdup(charset);
2509         m_deferred = true;
2510     }
2511 }
2512
2513 wxMBConv *wxCSConv::DoCreate() const
2514 {
2515     // check for the special case of ASCII or ISO8859-1 charset: as we have
2516     // special knowledge of it anyhow, we don't need to create a special
2517     // conversion object
2518     if ( m_encoding == wxFONTENCODING_ISO8859_1 )
2519     {
2520         // don't convert at all
2521         return NULL;
2522     }
2523
2524     // we trust OS to do conversion better than we can so try external
2525     // conversion methods first
2526     //
2527     // the full order is:
2528     //      1. OS conversion (iconv() under Unix or Win32 API)
2529     //      2. hard coded conversions for UTF
2530     //      3. wxEncodingConverter as fall back
2531
2532     // step (1)
2533 #ifdef HAVE_ICONV
2534 #if !wxUSE_FONTMAP
2535     if ( m_name )
2536 #endif // !wxUSE_FONTMAP
2537     {
2538         wxString name(m_name);
2539
2540 #if wxUSE_FONTMAP
2541         if ( name.empty() )
2542             name = wxFontMapperBase::Get()->GetEncodingName(m_encoding);
2543 #endif // wxUSE_FONTMAP
2544
2545         wxMBConv_iconv *conv = new wxMBConv_iconv(name);
2546         if ( conv->IsOk() )
2547             return conv;
2548
2549         delete conv;
2550     }
2551 #endif // HAVE_ICONV
2552
2553 #ifdef wxHAVE_WIN32_MB2WC
2554     {
2555 #if wxUSE_FONTMAP
2556         wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2557                                       : new wxMBConv_win32(m_encoding);
2558         if ( conv->IsOk() )
2559             return conv;
2560
2561         delete conv;
2562 #else
2563         return NULL;
2564 #endif
2565     }
2566 #endif // wxHAVE_WIN32_MB2WC
2567 #if defined(__WXMAC__)
2568     {
2569         // leave UTF16 and UTF32 to the built-ins of wx
2570         if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
2571             ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
2572         {
2573
2574 #if wxUSE_FONTMAP
2575             wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2576                                         : new wxMBConv_mac(m_encoding);
2577 #else
2578             wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
2579 #endif
2580             if ( conv->IsOk() )
2581                  return conv;
2582
2583             delete conv;
2584         }
2585     }
2586 #endif
2587 #if defined(__WXCOCOA__)
2588     {
2589         if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2590         {
2591
2592 #if wxUSE_FONTMAP
2593             wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2594                                           : new wxMBConv_cocoa(m_encoding);
2595 #else
2596             wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
2597 #endif
2598             if ( conv->IsOk() )
2599                  return conv;
2600
2601             delete conv;
2602         }
2603     }
2604 #endif
2605     // step (2)
2606     wxFontEncoding enc = m_encoding;
2607 #if wxUSE_FONTMAP
2608     if ( enc == wxFONTENCODING_SYSTEM && m_name )
2609     {
2610         // use "false" to suppress interactive dialogs -- we can be called from
2611         // anywhere and popping up a dialog from here is the last thing we want to
2612         // do
2613         enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
2614     }
2615 #endif // wxUSE_FONTMAP
2616
2617     switch ( enc )
2618     {
2619         case wxFONTENCODING_UTF7:
2620              return new wxMBConvUTF7;
2621
2622         case wxFONTENCODING_UTF8:
2623              return new wxMBConvUTF8;
2624
2625         case wxFONTENCODING_UTF16BE:
2626              return new wxMBConvUTF16BE;
2627
2628         case wxFONTENCODING_UTF16LE:
2629              return new wxMBConvUTF16LE;
2630
2631         case wxFONTENCODING_UTF32BE:
2632              return new wxMBConvUTF32BE;
2633
2634         case wxFONTENCODING_UTF32LE:
2635              return new wxMBConvUTF32LE;
2636
2637         default:
2638              // nothing to do but put here to suppress gcc warnings
2639              ;
2640     }
2641
2642     // step (3)
2643 #if wxUSE_FONTMAP
2644     {
2645         wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2646                                       : new wxMBConv_wxwin(m_encoding);
2647         if ( conv->IsOk() )
2648             return conv;
2649
2650         delete conv;
2651     }
2652 #endif // wxUSE_FONTMAP
2653
2654     // NB: This is a hack to prevent deadlock. What could otherwise happen
2655     //     in Unicode build: wxConvLocal creation ends up being here
2656     //     because of some failure and logs the error. But wxLog will try to
2657     //     attach timestamp, for which it will need wxConvLocal (to convert
2658     //     time to char* and then wchar_t*), but that fails, tries to log
2659     //     error, but wxLog has a (already locked) critical section that
2660     //     guards static buffer.
2661     static bool alreadyLoggingError = false;
2662     if (!alreadyLoggingError)
2663     {
2664         alreadyLoggingError = true;
2665         wxLogError(_("Cannot convert from the charset '%s'!"),
2666                    m_name ? m_name
2667                       :
2668 #if wxUSE_FONTMAP
2669                          wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
2670 #else // !wxUSE_FONTMAP
2671                          wxString::Format(_("encoding %s"), m_encoding).c_str()
2672 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2673               );
2674         alreadyLoggingError = false;
2675     }
2676
2677     return NULL;
2678 }
2679
2680 void wxCSConv::CreateConvIfNeeded() const
2681 {
2682     if ( m_deferred )
2683     {
2684         wxCSConv *self = (wxCSConv *)this; // const_cast
2685
2686 #if wxUSE_INTL
2687         // if we don't have neither the name nor the encoding, use the default
2688         // encoding for this system
2689         if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2690         {
2691             self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
2692         }
2693 #endif // wxUSE_INTL
2694
2695         self->m_convReal = DoCreate();
2696         self->m_deferred = false;
2697     }
2698 }
2699
2700 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2701 {
2702     CreateConvIfNeeded();
2703
2704     if (m_convReal)
2705         return m_convReal->MB2WC(buf, psz, n);
2706
2707     // latin-1 (direct)
2708     size_t len = strlen(psz);
2709
2710     if (buf)
2711     {
2712         for (size_t c = 0; c <= len; c++)
2713             buf[c] = (unsigned char)(psz[c]);
2714     }
2715
2716     return len;
2717 }
2718
2719 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2720 {
2721     CreateConvIfNeeded();
2722
2723     if (m_convReal)
2724         return m_convReal->WC2MB(buf, psz, n);
2725
2726     // latin-1 (direct)
2727     const size_t len = wxWcslen(psz);
2728     if (buf)
2729     {
2730         for (size_t c = 0; c <= len; c++)
2731         {
2732             if (psz[c] > 0xFF)
2733                 return (size_t)-1;
2734             buf[c] = (char)psz[c];
2735         }
2736     }
2737     else
2738     {
2739         for (size_t c = 0; c <= len; c++)
2740         {
2741             if (psz[c] > 0xFF)
2742                 return (size_t)-1;
2743         }
2744     }
2745
2746     return len;
2747 }
2748
2749 // ----------------------------------------------------------------------------
2750 // globals
2751 // ----------------------------------------------------------------------------
2752
2753 #ifdef __WINDOWS__
2754     static wxMBConv_win32 wxConvLibcObj;
2755 #elif defined(__WXMAC__) && !defined(__MACH__)
2756     static wxMBConv_mac wxConvLibcObj ;
2757 #else
2758     static wxMBConvLibc wxConvLibcObj;
2759 #endif
2760
2761 static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
2762 static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
2763 static wxMBConvUTF7 wxConvUTF7Obj;
2764 static wxMBConvUTF8 wxConvUTF8Obj;
2765
2766 WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
2767 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
2768 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
2769 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
2770 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
2771 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
2772 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
2773 #ifdef __WXOSX__
2774                                     wxConvUTF8Obj;
2775 #else
2776                                     wxConvLibcObj;
2777 #endif
2778
2779
2780 #else // !wxUSE_WCHAR_T
2781
2782 // stand-ins in absence of wchar_t
2783 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
2784                                 wxConvISO8859_1,
2785                                 wxConvLocal,
2786                                 wxConvUTF8;
2787
2788 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
2789
2790