src/common/strconv.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        strconv.cpp
   3 // Purpose:     Unicode conversion classes
   4 // Author:      Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
   5 //              Ryan Norton, Fredrik Roubert (UTF7)
   6 // Modified by:
   7 // Created:     29/01/98
   8 // RCS-ID:      $Id$
   9 // Copyright:   (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
  10 //              (c) 2000-2003 Vadim Zeitlin
  11 //              (c) 2004 Ryan Norton, Fredrik Roubert
  12 // Licence:     wxWindows licence
  13 /////////////////////////////////////////////////////////////////////////////
  14
  15 // ============================================================================
  16 // declarations
  17 // ============================================================================
  18
  19 // ----------------------------------------------------------------------------
  20 // headers
  21 // ----------------------------------------------------------------------------
  22
  23 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
  24   #pragma implementation "strconv.h"
  25 #endif
  26
  27 // For compilers that support precompilation, includes "wx.h".
  28 #include "wx/wxprec.h"
  29
  30 #ifdef __BORLANDC__
  31   #pragma hdrstop
  32 #endif
  33
  34 #ifndef WX_PRECOMP
  35     #include "wx/intl.h"
  36     #include "wx/log.h"
  37 #endif // WX_PRECOMP
  38
  39 #include "wx/strconv.h"
  40
  41 #if wxUSE_WCHAR_T
  42
  43 #ifdef __WXMSW__
  44     #include "wx/msw/private.h"
  45 #endif
  46
  47 #ifdef __WINDOWS__
  48     #include "wx/msw/missing.h"
  49 #endif
  50
  51 #ifndef __WXWINCE__
  52 #include <errno.h>
  53 #endif
  54
  55 #include <ctype.h>
  56 #include <string.h>
  57 #include <stdlib.h>
  58 #ifdef HAVE_LANGINFO_H
  59   #include <langinfo.h>
  60 #endif
  61
  62 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
  63     #define wxHAVE_WIN32_MB2WC
  64 #endif // __WIN32__ but !__WXMICROWIN__
  65
  66 // ----------------------------------------------------------------------------
  67 // headers
  68 // ----------------------------------------------------------------------------
  69
  70 #ifdef __SALFORDC__
  71     #include <clib.h>
  72 #endif
  73
  74 #ifdef HAVE_ICONV
  75     #include <iconv.h>
  76     #include "wx/thread.h"
  77 #endif
  78
  79 #include "wx/encconv.h"
  80 #include "wx/fontmap.h"
  81 #include "wx/utils.h"
  82
  83 #ifdef __WXMAC__
  84 #include <ATSUnicode.h>
  85 #include <TextCommon.h>
  86 #include <TextEncodingConverter.h>
  87
  88 #include  "wx/mac/private.h"  // includes mac headers
  89 #endif
  90 // ----------------------------------------------------------------------------
  91 // macros
  92 // ----------------------------------------------------------------------------
  93
  94 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
  95 #define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
  96
  97 #if SIZEOF_WCHAR_T == 4
  98     #define WC_NAME         "UCS4"
  99     #define WC_BSWAP         BSWAP_UCS4
 100     #ifdef WORDS_BIGENDIAN
 101       #define WC_NAME_BEST  "UCS-4BE"
 102     #else
 103       #define WC_NAME_BEST  "UCS-4LE"
 104     #endif
 105 #elif SIZEOF_WCHAR_T == 2
 106     #define WC_NAME         "UTF16"
 107     #define WC_BSWAP         BSWAP_UTF16
 108     #define WC_UTF16
 109     #ifdef WORDS_BIGENDIAN
 110       #define WC_NAME_BEST  "UTF-16BE"
 111     #else
 112       #define WC_NAME_BEST  "UTF-16LE"
 113     #endif
 114 #else // sizeof(wchar_t) != 2 nor 4
 115     // does this ever happen?
 116     #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
 117 #endif
 118
 119 // ============================================================================
 120 // implementation
 121 // ============================================================================
 122
 123 // ----------------------------------------------------------------------------
 124 // UTF-16 en/decoding to/from UCS-4
 125 // ----------------------------------------------------------------------------
 126
 127
 128 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
 129 {
 130     if (input<=0xffff)
 131     {
 132         if (output)
 133             *output = (wxUint16) input;
 134         return 1;
 135     }
 136     else if (input>=0x110000)
 137     {
 138         return (size_t)-1;
 139     }
 140     else
 141     {
 142         if (output)
 143         {
 144             *output++ = (wxUint16) ((input >> 10)+0xd7c0);
 145             *output = (wxUint16) ((input&0x3ff)+0xdc00);
 146         }
 147         return 2;
 148     }
 149 }
 150
 151 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
 152 {
 153     if ((*input<0xd800) || (*input>0xdfff))
 154     {
 155         output = *input;
 156         return 1;
 157     }
 158     else if ((input[1]<0xdc00) || (input[1]>=0xdfff))
 159     {
 160         output = *input;
 161         return (size_t)-1;
 162     }
 163     else
 164     {
 165         output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
 166         return 2;
 167     }
 168 }
 169
 170
 171 // ----------------------------------------------------------------------------
 172 // wxMBConv
 173 // ----------------------------------------------------------------------------
 174
 175 wxMBConv::~wxMBConv()
 176 {
 177     // nothing to do here (necessary for Darwin linking probably)
 178 }
 179
 180 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
 181 {
 182     if ( psz )
 183     {
 184         // calculate the length of the buffer needed first
 185         size_t nLen = MB2WC(NULL, psz, 0);
 186         if ( nLen != (size_t)-1 )
 187         {
 188             // now do the actual conversion
 189             wxWCharBuffer buf(nLen);
 190             nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
 191             if ( nLen != (size_t)-1 )
 192             {
 193                 return buf;
 194             }
 195         }
 196     }
 197
 198     wxWCharBuffer buf((wchar_t *)NULL);
 199
 200     return buf;
 201 }
 202
 203 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
 204 {
 205     if ( pwz )
 206     {
 207         size_t nLen = WC2MB(NULL, pwz, 0);
 208         if ( nLen != (size_t)-1 )
 209         {
 210             wxCharBuffer buf(nLen+3);       // space for a wxUint32 trailing zero
 211             nLen = WC2MB(buf.data(), pwz, nLen + 4);
 212             if ( nLen != (size_t)-1 )
 213             {
 214                 return buf;
 215             }
 216         }
 217     }
 218
 219     wxCharBuffer buf((char *)NULL);
 220
 221     return buf;
 222 }
 223
 224 const wxWCharBuffer wxMBConv::cMB2WC(const char *szString, size_t nStringLen, size_t* pOutSize) const
 225 {
 226     wxASSERT(pOutSize != NULL);
 227
 228     const char* szEnd = szString + nStringLen + 1;
 229     const char* szPos = szString;
 230     const char* szStart = szPos;
 231
 232     size_t nActualLength = 0;
 233     size_t nCurrentSize = nStringLen; //try normal size first (should never resize?)
 234
 235     wxWCharBuffer theBuffer(nCurrentSize);
 236
 237     //Convert the string until the length() is reached, continuing the
 238     //loop every time a null character is reached
 239     while(szPos != szEnd)
 240     {
 241         wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
 242
 243         //Get the length of the current (sub)string
 244         size_t nLen = MB2WC(NULL, szPos, 0);
 245
 246         //Invalid conversion?
 247         if( nLen == (size_t)-1 )
 248         {
 249             *pOutSize = 0;
 250             theBuffer.data()[0u] = wxT('\0');
 251             return theBuffer;
 252         }
 253
 254
 255         //Increase the actual length (+1 for current null character)
 256         nActualLength += nLen + 1;
 257
 258         //if buffer too big, realloc the buffer
 259         if (nActualLength > (nCurrentSize+1))
 260         {
 261             wxWCharBuffer theNewBuffer(nCurrentSize << 1);
 262             memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize * sizeof(wchar_t));
 263             theBuffer = theNewBuffer;
 264             nCurrentSize <<= 1;
 265         }
 266
 267         //Convert the current (sub)string
 268         if ( MB2WC(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
 269         {
 270             *pOutSize = 0;
 271             theBuffer.data()[0u] = wxT('\0');
 272             return theBuffer;
 273         }
 274
 275         //Increment to next (sub)string
 276         //Note that we have to use strlen here instead of nLen
 277         //here because XX2XX gives us the size of the output buffer,
 278         //not neccessarly the length of the string
 279         szPos += strlen(szPos) + 1;
 280     }
 281
 282     //success - return actual length and the buffer
 283     *pOutSize = nActualLength;
 284     return theBuffer;
 285 }
 286
 287 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *szString, size_t nStringLen, size_t* pOutSize) const
 288 {
 289     wxASSERT(pOutSize != NULL);
 290
 291     const wchar_t* szEnd = szString + nStringLen + 1;
 292     const wchar_t* szPos = szString;
 293     const wchar_t* szStart = szPos;
 294
 295     size_t nActualLength = 0;
 296     size_t nCurrentSize = nStringLen << 2; //try * 4 first
 297
 298     wxCharBuffer theBuffer(nCurrentSize);
 299
 300     //Convert the string until the length() is reached, continuing the
 301     //loop every time a null character is reached
 302     while(szPos != szEnd)
 303     {
 304         wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
 305
 306         //Get the length of the current (sub)string
 307         size_t nLen = WC2MB(NULL, szPos, 0);
 308
 309         //Invalid conversion?
 310         if( nLen == (size_t)-1 )
 311         {
 312             *pOutSize = 0;
 313             theBuffer.data()[0u] = wxT('\0');
 314             return theBuffer;
 315         }
 316
 317         //Increase the actual length (+1 for current null character)
 318         nActualLength += nLen + 1;
 319
 320         //if buffer too big, realloc the buffer
 321         if (nActualLength > (nCurrentSize+1))
 322         {
 323             wxCharBuffer theNewBuffer(nCurrentSize << 1);
 324             memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize);
 325             theBuffer = theNewBuffer;
 326             nCurrentSize <<= 1;
 327         }
 328
 329         //Convert the current (sub)string
 330         if(WC2MB(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
 331         {
 332             *pOutSize = 0;
 333             theBuffer.data()[0u] = wxT('\0');
 334             return theBuffer;
 335         }
 336
 337         //Increment to next (sub)string
 338         //Note that we have to use wxWcslen here instead of nLen
 339         //here because XX2XX gives us the size of the output buffer,
 340         //not neccessarly the length of the string
 341         szPos += wxWcslen(szPos) + 1;
 342     }
 343
 344     //success - return actual length and the buffer
 345     *pOutSize = nActualLength;
 346     return theBuffer;
 347 }
 348
 349 // ----------------------------------------------------------------------------
 350 // wxMBConvLibc
 351 // ----------------------------------------------------------------------------
 352
 353 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 354 {
 355     return wxMB2WC(buf, psz, n);
 356 }
 357
 358 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 359 {
 360     return wxWC2MB(buf, psz, n);
 361 }
 362
 363 #ifdef __UNIX__
 364
 365 // ----------------------------------------------------------------------------
 366 // wxConvBrokenFileNames
 367 // ----------------------------------------------------------------------------
 368
 369 wxConvBrokenFileNames::wxConvBrokenFileNames()
 370 {
 371     // decide which conversion to use for the file names
 372
 373     // (1) this variable exists for the sole purpose of specifying the encoding
 374     //     of the filenames for GTK+ programs, so use it if it is set
 375     const wxChar *encName = wxGetenv(_T("G_FILENAME_ENCODING"));
 376     if ( encName )
 377     {
 378         m_conv = new wxCSConv(encName);
 379     }
 380     else // no G_FILENAME_ENCODING
 381     {
 382         // (2) if a non default locale is set, assume that the user wants his
 383         //     filenames in this locale too
 384         switch ( wxLocale::GetSystemEncoding() )
 385         {
 386             default:
 387                 m_conv = new wxMBConvLibc;
 388                 break;
 389
 390             // (3) finally use UTF-8 by default
 391             case wxFONTENCODING_SYSTEM:
 392             case wxFONTENCODING_UTF8:
 393                 m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
 394                 break;
 395         }
 396     }
 397 }
 398
 399 size_t
 400 wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf,
 401                              const char *psz,
 402                              size_t outputSize) const
 403 {
 404     return m_conv->MB2WC( outputBuf, psz, outputSize );
 405 }
 406
 407 size_t
 408 wxConvBrokenFileNames::WC2MB(char *outputBuf,
 409                              const wchar_t *psz,
 410                              size_t outputSize) const
 411 {
 412     return m_conv->WC2MB( outputBuf, psz, outputSize );
 413 }
 414
 415 #endif
 416
 417 // ----------------------------------------------------------------------------
 418 // UTF-7
 419 // ----------------------------------------------------------------------------
 420
 421 // Implementation (C) 2004 Fredrik Roubert
 422
 423 //
 424 // BASE64 decoding table
 425 //
 426 static const unsigned char utf7unb64[] =
 427 {
 428     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 429     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 430     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 431     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 432     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 433     0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
 434     0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
 435     0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 436     0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
 437     0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
 438     0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
 439     0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
 440     0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
 441     0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
 442     0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
 443     0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
 444     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 445     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 446     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 447     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 448     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 449     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 450     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 451     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 452     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 453     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 454     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 455     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 456     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 457     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 458     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 459     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 460 };
 461
 462 size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 463 {
 464     size_t len = 0;
 465
 466     while (*psz && ((!buf) || (len < n)))
 467     {
 468         unsigned char cc = *psz++;
 469         if (cc != '+')
 470         {
 471             // plain ASCII char
 472             if (buf)
 473                 *buf++ = cc;
 474             len++;
 475         }
 476         else if (*psz == '-')
 477         {
 478             // encoded plus sign
 479             if (buf)
 480                 *buf++ = cc;
 481             len++;
 482             psz++;
 483         }
 484         else
 485         {
 486             // BASE64 encoded string
 487             bool lsb;
 488             unsigned char c;
 489             unsigned int d, l;
 490             for (lsb = false, d = 0, l = 0;
 491                 (cc = utf7unb64[(unsigned char)*psz]) != 0xff; psz++)
 492             {
 493                 d <<= 6;
 494                 d += cc;
 495                 for (l += 6; l >= 8; lsb = !lsb)
 496                 {
 497                     c = (unsigned char)((d >> (l -= 8)) % 256);
 498                     if (lsb)
 499                     {
 500                         if (buf)
 501                             *buf++ |= c;
 502                         len ++;
 503                     }
 504                     else
 505                         if (buf)
 506                             *buf = (wchar_t)(c << 8);
 507                 }
 508             }
 509             if (*psz == '-')
 510                 psz++;
 511         }
 512     }
 513     if (buf && (len < n))
 514         *buf = 0;
 515     return len;
 516 }
 517
 518 //
 519 // BASE64 encoding table
 520 //
 521 static const unsigned char utf7enb64[] =
 522 {
 523     'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
 524     'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
 525     'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
 526     'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
 527     'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
 528     'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
 529     'w', 'x', 'y', 'z', '0', '1', '2', '3',
 530     '4', '5', '6', '7', '8', '9', '+', '/'
 531 };
 532
 533 //
 534 // UTF-7 encoding table
 535 //
 536 // 0 - Set D (directly encoded characters)
 537 // 1 - Set O (optional direct characters)
 538 // 2 - whitespace characters (optional)
 539 // 3 - special characters
 540 //
 541 static const unsigned char utf7encode[128] =
 542 {
 543     3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
 544     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 545     2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
 546     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
 547     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 548     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
 549     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 550     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
 551 };
 552
 553 size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 554 {
 555
 556
 557     size_t len = 0;
 558
 559     while (*psz && ((!buf) || (len < n)))
 560     {
 561         wchar_t cc = *psz++;
 562         if (cc < 0x80 && utf7encode[cc] < 1)
 563         {
 564             // plain ASCII char
 565             if (buf)
 566                 *buf++ = (char)cc;
 567             len++;
 568         }
 569 #ifndef WC_UTF16
 570         else if (((wxUint32)cc) > 0xffff)
 571         {
 572             // no surrogate pair generation (yet?)
 573             return (size_t)-1;
 574         }
 575 #endif
 576         else
 577         {
 578             if (buf)
 579                 *buf++ = '+';
 580             len++;
 581             if (cc != '+')
 582             {
 583                 // BASE64 encode string
 584                 unsigned int lsb, d, l;
 585                 for (d = 0, l = 0;; psz++)
 586                 {
 587                     for (lsb = 0; lsb < 2; lsb ++)
 588                     {
 589                         d <<= 8;
 590                         d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
 591
 592                         for (l += 8; l >= 6; )
 593                         {
 594                             l -= 6;
 595                             if (buf)
 596                                 *buf++ = utf7enb64[(d >> l) % 64];
 597                             len++;
 598                         }
 599                     }
 600                     cc = *psz;
 601                     if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
 602                         break;
 603                 }
 604                 if (l != 0)
 605                 {
 606                     if (buf)
 607                         *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
 608                     len++;
 609                 }
 610             }
 611             if (buf)
 612                 *buf++ = '-';
 613             len++;
 614         }
 615     }
 616     if (buf && (len < n))
 617         *buf = 0;
 618     return len;
 619 }
 620
 621 // ----------------------------------------------------------------------------
 622 // UTF-8
 623 // ----------------------------------------------------------------------------
 624
 625 static wxUint32 utf8_max[]=
 626     { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
 627
 628 // boundaries of the private use area we use to (temporarily) remap invalid
 629 // characters invalid in a UTF-8 encoded string
 630 const wxUint32 wxUnicodePUA = 0x100000;
 631 const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
 632
 633 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 634 {
 635     size_t len = 0;
 636
 637     while (*psz && ((!buf) || (len < n)))
 638     {
 639         const char *opsz = psz;
 640         bool invalid = false;
 641         unsigned char cc = *psz++, fc = cc;
 642         unsigned cnt;
 643         for (cnt = 0; fc & 0x80; cnt++)
 644             fc <<= 1;
 645         if (!cnt)
 646         {
 647             // plain ASCII char
 648             if (buf)
 649                 *buf++ = cc;
 650             len++;
 651         }
 652         else
 653         {
 654             cnt--;
 655             if (!cnt)
 656             {
 657                 // invalid UTF-8 sequence
 658                 invalid = true;
 659             }
 660             else
 661             {
 662                 unsigned ocnt = cnt - 1;
 663                 wxUint32 res = cc & (0x3f >> cnt);
 664                 while (cnt--)
 665                 {
 666                     cc = *psz;
 667                     if ((cc & 0xC0) != 0x80)
 668                     {
 669                         // invalid UTF-8 sequence
 670                         invalid = true;
 671                         break;
 672                     }
 673                     psz++;
 674                     res = (res << 6) | (cc & 0x3f);
 675                 }
 676                 if (invalid || res <= utf8_max[ocnt])
 677                 {
 678                     // illegal UTF-8 encoding
 679                     invalid = true;
 680                 }
 681                 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
 682                         res >= wxUnicodePUA && res < wxUnicodePUAEnd)
 683                 {
 684                     // if one of our PUA characters turns up externally
 685                     // it must also be treated as an illegal sequence
 686                     // (a bit like you have to escape an escape character)
 687                     invalid = true;
 688                 }
 689                 else
 690                 {
 691 #ifdef WC_UTF16
 692                     // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 693                     size_t pa = encode_utf16(res, (wxUint16 *)buf);
 694                     if (pa == (size_t)-1)
 695                     {
 696                         invalid = true;
 697                     }
 698                     else
 699                     {
 700                         if (buf)
 701                             buf += pa;
 702                         len += pa;
 703                     }
 704 #else // !WC_UTF16
 705                     if (buf)
 706                         *buf++ = res;
 707                     len++;
 708 #endif // WC_UTF16/!WC_UTF16
 709                 }
 710             }
 711             if (invalid)
 712             {
 713                 if (m_options & MAP_INVALID_UTF8_TO_PUA)
 714                 {
 715                     while (opsz < psz && (!buf || len < n))
 716                     {
 717 #ifdef WC_UTF16
 718                         // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 719                         size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
 720                         wxASSERT(pa != (size_t)-1);
 721                         if (buf)
 722                             buf += pa;
 723                         opsz++;
 724                         len += pa;
 725 #else
 726                         if (buf)
 727                             *buf++ = wxUnicodePUA + (unsigned char)*opsz;
 728                         opsz++;
 729                         len++;
 730 #endif
 731                     }
 732                 }
 733                 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
 734                 {
 735                     while (opsz < psz && (!buf || len < n))
 736                     {
 737                         if ( buf && len + 3 < n )
 738                         {
 739                             unsigned char n = *opsz;
 740                             *buf++ = L'\\';
 741                             *buf++ = (wchar_t)( L'0' + n / 0100 );
 742                             *buf++ = (wchar_t)( L'0' + (n % 0100) / 010 );
 743                             *buf++ = (wchar_t)( L'0' + n % 010 );
 744                         }
 745                         opsz++;
 746                         len += 4;
 747                     }
 748                 }
 749                 else // MAP_INVALID_UTF8_NOT
 750                 {
 751                     return (size_t)-1;
 752                 }
 753             }
 754         }
 755     }
 756     if (buf && (len < n))
 757         *buf = 0;
 758     return len;
 759 }
 760
 761 static inline bool isoctal(wchar_t wch)
 762 {
 763     return L'0' <= wch && wch <= L'7';
 764 }
 765
 766 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 767 {
 768     size_t len = 0;
 769
 770     while (*psz && ((!buf) || (len < n)))
 771     {
 772         wxUint32 cc;
 773 #ifdef WC_UTF16
 774         // cast is ok for WC_UTF16
 775         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
 776         psz += (pa == (size_t)-1) ? 1 : pa;
 777 #else
 778         cc=(*psz++) & 0x7fffffff;
 779 #endif
 780
 781         if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
 782                 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
 783         {
 784             if (buf)
 785                 *buf++ = (char)(cc - wxUnicodePUA);
 786             len++;
 787         }
 788         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
 789                     cc == L'\\' &&
 790                         isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
 791         {
 792             if (buf)
 793             {
 794                 *buf++ = (char) ((psz[0] - L'0')*0100 +
 795                                  (psz[1] - L'0')*010 +
 796                                  (psz[2] - L'0'));
 797             }
 798
 799             psz += 3;
 800             len++;
 801         }
 802         else
 803         {
 804             unsigned cnt;
 805             for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
 806             if (!cnt)
 807             {
 808                 // plain ASCII char
 809                 if (buf)
 810                     *buf++ = (char) cc;
 811                 len++;
 812             }
 813
 814             else
 815             {
 816                 len += cnt + 1;
 817                 if (buf)
 818                 {
 819                     *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
 820                     while (cnt--)
 821                         *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
 822                 }
 823             }
 824         }
 825     }
 826
 827     if (buf && (len<n))
 828         *buf = 0;
 829
 830     return len;
 831 }
 832
 833 // ----------------------------------------------------------------------------
 834 // UTF-16
 835 // ----------------------------------------------------------------------------
 836
 837 #ifdef WORDS_BIGENDIAN
 838     #define wxMBConvUTF16straight wxMBConvUTF16BE
 839     #define wxMBConvUTF16swap     wxMBConvUTF16LE
 840 #else
 841     #define wxMBConvUTF16swap     wxMBConvUTF16BE
 842     #define wxMBConvUTF16straight wxMBConvUTF16LE
 843 #endif
 844
 845
 846 #ifdef WC_UTF16
 847
 848 // copy 16bit MB to 16bit String
 849 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 850 {
 851     size_t len=0;
 852
 853     while (*(wxUint16*)psz && (!buf || len < n))
 854     {
 855         if (buf)
 856             *buf++ = *(wxUint16*)psz;
 857         len++;
 858
 859         psz += sizeof(wxUint16);
 860     }
 861     if (buf && len<n)   *buf=0;
 862
 863     return len;
 864 }
 865
 866
 867 // copy 16bit String to 16bit MB
 868 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 869 {
 870     size_t len=0;
 871
 872     while (*psz && (!buf || len < n))
 873     {
 874         if (buf)
 875         {
 876             *(wxUint16*)buf = *psz;
 877             buf += sizeof(wxUint16);
 878         }
 879         len += sizeof(wxUint16);
 880         psz++;
 881     }
 882     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 883
 884     return len;
 885 }
 886
 887
 888 // swap 16bit MB to 16bit String
 889 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 890 {
 891     size_t len=0;
 892
 893     while (*(wxUint16*)psz && (!buf || len < n))
 894     {
 895         if (buf)
 896         {
 897             ((char *)buf)[0] = psz[1];
 898             ((char *)buf)[1] = psz[0];
 899             buf++;
 900         }
 901         len++;
 902         psz += sizeof(wxUint16);
 903     }
 904     if (buf && len<n)   *buf=0;
 905
 906     return len;
 907 }
 908
 909
 910 // swap 16bit MB to 16bit String
 911 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 912 {
 913     size_t len=0;
 914
 915     while (*psz && (!buf || len < n))
 916     {
 917         if (buf)
 918         {
 919             *buf++ = ((char*)psz)[1];
 920             *buf++ = ((char*)psz)[0];
 921         }
 922         len += sizeof(wxUint16);
 923         psz++;
 924     }
 925     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 926
 927     return len;
 928 }
 929
 930
 931 #else // WC_UTF16
 932
 933
 934 // copy 16bit MB to 32bit String
 935 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 936 {
 937     size_t len=0;
 938
 939     while (*(wxUint16*)psz && (!buf || len < n))
 940     {
 941         wxUint32 cc;
 942         size_t pa=decode_utf16((wxUint16*)psz, cc);
 943         if (pa == (size_t)-1)
 944             return pa;
 945
 946         if (buf)
 947             *buf++ = cc;
 948         len++;
 949         psz += pa * sizeof(wxUint16);
 950     }
 951     if (buf && len<n)   *buf=0;
 952
 953     return len;
 954 }
 955
 956
 957 // copy 32bit String to 16bit MB
 958 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 959 {
 960     size_t len=0;
 961
 962     while (*psz && (!buf || len < n))
 963     {
 964         wxUint16 cc[2];
 965         size_t pa=encode_utf16(*psz, cc);
 966
 967         if (pa == (size_t)-1)
 968             return pa;
 969
 970         if (buf)
 971         {
 972             *(wxUint16*)buf = cc[0];
 973             buf += sizeof(wxUint16);
 974             if (pa > 1)
 975             {
 976                 *(wxUint16*)buf = cc[1];
 977                 buf += sizeof(wxUint16);
 978             }
 979         }
 980
 981         len += pa*sizeof(wxUint16);
 982         psz++;
 983     }
 984     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 985
 986     return len;
 987 }
 988
 989
 990 // swap 16bit MB to 32bit String
 991 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 992 {
 993     size_t len=0;
 994
 995     while (*(wxUint16*)psz && (!buf || len < n))
 996     {
 997         wxUint32 cc;
 998         char tmp[4];
 999         tmp[0]=psz[1];  tmp[1]=psz[0];
1000         tmp[2]=psz[3];  tmp[3]=psz[2];
1001
1002         size_t pa=decode_utf16((wxUint16*)tmp, cc);
1003         if (pa == (size_t)-1)
1004             return pa;
1005
1006         if (buf)
1007             *buf++ = cc;
1008
1009         len++;
1010         psz += pa * sizeof(wxUint16);
1011     }
1012     if (buf && len<n)   *buf=0;
1013
1014     return len;
1015 }
1016
1017
1018 // swap 32bit String to 16bit MB
1019 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1020 {
1021     size_t len=0;
1022
1023     while (*psz && (!buf || len < n))
1024     {
1025         wxUint16 cc[2];
1026         size_t pa=encode_utf16(*psz, cc);
1027
1028         if (pa == (size_t)-1)
1029             return pa;
1030
1031         if (buf)
1032         {
1033             *buf++ = ((char*)cc)[1];
1034             *buf++ = ((char*)cc)[0];
1035             if (pa > 1)
1036             {
1037                 *buf++ = ((char*)cc)[3];
1038                 *buf++ = ((char*)cc)[2];
1039             }
1040         }
1041
1042         len += pa*sizeof(wxUint16);
1043         psz++;
1044     }
1045     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
1046
1047     return len;
1048 }
1049
1050 #endif // WC_UTF16
1051
1052
1053 // ----------------------------------------------------------------------------
1054 // UTF-32
1055 // ----------------------------------------------------------------------------
1056
1057 #ifdef WORDS_BIGENDIAN
1058 #define wxMBConvUTF32straight  wxMBConvUTF32BE
1059 #define wxMBConvUTF32swap      wxMBConvUTF32LE
1060 #else
1061 #define wxMBConvUTF32swap      wxMBConvUTF32BE
1062 #define wxMBConvUTF32straight  wxMBConvUTF32LE
1063 #endif
1064
1065
1066 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1067 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1068
1069
1070 #ifdef WC_UTF16
1071
1072 // copy 32bit MB to 16bit String
1073 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1074 {
1075     size_t len=0;
1076
1077     while (*(wxUint32*)psz && (!buf || len < n))
1078     {
1079         wxUint16 cc[2];
1080
1081         size_t pa=encode_utf16(*(wxUint32*)psz, cc);
1082         if (pa == (size_t)-1)
1083             return pa;
1084
1085         if (buf)
1086         {
1087             *buf++ = cc[0];
1088             if (pa > 1)
1089                 *buf++ = cc[1];
1090         }
1091         len += pa;
1092         psz += sizeof(wxUint32);
1093     }
1094     if (buf && len<n)   *buf=0;
1095
1096     return len;
1097 }
1098
1099
1100 // copy 16bit String to 32bit MB
1101 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1102 {
1103     size_t len=0;
1104
1105     while (*psz && (!buf || len < n))
1106     {
1107         wxUint32 cc;
1108
1109         // cast is ok for WC_UTF16
1110         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
1111         if (pa == (size_t)-1)
1112             return pa;
1113
1114         if (buf)
1115         {
1116             *(wxUint32*)buf = cc;
1117             buf += sizeof(wxUint32);
1118         }
1119         len += sizeof(wxUint32);
1120         psz += pa;
1121     }
1122
1123     if (buf && len<=n-sizeof(wxUint32))
1124         *(wxUint32*)buf=0;
1125
1126     return len;
1127 }
1128
1129
1130
1131 // swap 32bit MB to 16bit String
1132 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1133 {
1134     size_t len=0;
1135
1136     while (*(wxUint32*)psz && (!buf || len < n))
1137     {
1138         char tmp[4];
1139         tmp[0] = psz[3];   tmp[1] = psz[2];
1140         tmp[2] = psz[1];   tmp[3] = psz[0];
1141
1142
1143         wxUint16 cc[2];
1144
1145         size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
1146         if (pa == (size_t)-1)
1147             return pa;
1148
1149         if (buf)
1150         {
1151             *buf++ = cc[0];
1152             if (pa > 1)
1153                 *buf++ = cc[1];
1154         }
1155         len += pa;
1156         psz += sizeof(wxUint32);
1157     }
1158
1159     if (buf && len<n)
1160         *buf=0;
1161
1162     return len;
1163 }
1164
1165
1166 // swap 16bit String to 32bit MB
1167 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1168 {
1169     size_t len=0;
1170
1171     while (*psz && (!buf || len < n))
1172     {
1173         char cc[4];
1174
1175         // cast is ok for WC_UTF16
1176         size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
1177         if (pa == (size_t)-1)
1178             return pa;
1179
1180         if (buf)
1181         {
1182             *buf++ = cc[3];
1183             *buf++ = cc[2];
1184             *buf++ = cc[1];
1185             *buf++ = cc[0];
1186         }
1187         len += sizeof(wxUint32);
1188         psz += pa;
1189     }
1190
1191     if (buf && len<=n-sizeof(wxUint32))
1192         *(wxUint32*)buf=0;
1193
1194     return len;
1195 }
1196
1197 #else // WC_UTF16
1198
1199
1200 // copy 32bit MB to 32bit String
1201 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1202 {
1203     size_t len=0;
1204
1205     while (*(wxUint32*)psz && (!buf || len < n))
1206     {
1207         if (buf)
1208             *buf++ = *(wxUint32*)psz;
1209         len++;
1210         psz += sizeof(wxUint32);
1211     }
1212
1213     if (buf && len<n)
1214         *buf=0;
1215
1216     return len;
1217 }
1218
1219
1220 // copy 32bit String to 32bit MB
1221 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1222 {
1223     size_t len=0;
1224
1225     while (*psz && (!buf || len < n))
1226     {
1227         if (buf)
1228         {
1229             *(wxUint32*)buf = *psz;
1230             buf += sizeof(wxUint32);
1231         }
1232
1233         len += sizeof(wxUint32);
1234         psz++;
1235     }
1236
1237     if (buf && len<=n-sizeof(wxUint32))
1238         *(wxUint32*)buf=0;
1239
1240     return len;
1241 }
1242
1243
1244 // swap 32bit MB to 32bit String
1245 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1246 {
1247     size_t len=0;
1248
1249     while (*(wxUint32*)psz && (!buf || len < n))
1250     {
1251         if (buf)
1252         {
1253             ((char *)buf)[0] = psz[3];
1254             ((char *)buf)[1] = psz[2];
1255             ((char *)buf)[2] = psz[1];
1256             ((char *)buf)[3] = psz[0];
1257             buf++;
1258         }
1259         len++;
1260         psz += sizeof(wxUint32);
1261     }
1262
1263     if (buf && len<n)
1264         *buf=0;
1265
1266     return len;
1267 }
1268
1269
1270 // swap 32bit String to 32bit MB
1271 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1272 {
1273     size_t len=0;
1274
1275     while (*psz && (!buf || len < n))
1276     {
1277         if (buf)
1278         {
1279             *buf++ = ((char *)psz)[3];
1280             *buf++ = ((char *)psz)[2];
1281             *buf++ = ((char *)psz)[1];
1282             *buf++ = ((char *)psz)[0];
1283         }
1284         len += sizeof(wxUint32);
1285         psz++;
1286     }
1287
1288     if (buf && len<=n-sizeof(wxUint32))
1289         *(wxUint32*)buf=0;
1290
1291     return len;
1292 }
1293
1294
1295 #endif // WC_UTF16
1296
1297
1298 // ============================================================================
1299 // The classes doing conversion using the iconv_xxx() functions
1300 // ============================================================================
1301
1302 #ifdef HAVE_ICONV
1303
1304 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1305 //     E2BIG if output buffer is _exactly_ as big as needed. Such case is
1306 //     (unless there's yet another bug in glibc) the only case when iconv()
1307 //     returns with (size_t)-1 (which means error) and says there are 0 bytes
1308 //     left in the input buffer -- when _real_ error occurs,
1309 //     bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1310 //     iconv() failure.
1311 //     [This bug does not appear in glibc 2.2.]
1312 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1313 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1314                                      (errno != E2BIG || bufLeft != 0))
1315 #else
1316 #define ICONV_FAILED(cres, bufLeft)  (cres == (size_t)-1)
1317 #endif
1318
1319 #define ICONV_CHAR_CAST(x)  ((ICONV_CONST char **)(x))
1320
1321 // ----------------------------------------------------------------------------
1322 // wxMBConv_iconv: encapsulates an iconv character set
1323 // ----------------------------------------------------------------------------
1324
1325 class wxMBConv_iconv : public wxMBConv
1326 {
1327 public:
1328     wxMBConv_iconv(const wxChar *name);
1329     virtual ~wxMBConv_iconv();
1330
1331     virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1332     virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
1333
1334     bool IsOk() const
1335         { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
1336
1337 protected:
1338     // the iconv handlers used to translate from multibyte to wide char and in
1339     // the other direction
1340     iconv_t m2w,
1341             w2m;
1342 #if wxUSE_THREADS
1343     // guards access to m2w and w2m objects
1344     wxMutex m_iconvMutex;
1345 #endif
1346
1347 private:
1348     // the name (for iconv_open()) of a wide char charset -- if none is
1349     // available on this machine, it will remain NULL
1350     static const char *ms_wcCharsetName;
1351
1352     // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1353     // different endian-ness than the native one
1354     static bool ms_wcNeedsSwap;
1355 };
1356
1357 const char *wxMBConv_iconv::ms_wcCharsetName = NULL;
1358 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
1359
1360 wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
1361 {
1362     // Do it the hard way
1363     char cname[100];
1364     for (size_t i = 0; i < wxStrlen(name)+1; i++)
1365         cname[i] = (char) name[i];
1366
1367     // check for charset that represents wchar_t:
1368     if (ms_wcCharsetName == NULL)
1369     {
1370         ms_wcNeedsSwap = false;
1371
1372         // try charset with explicit bytesex info (e.g. "UCS-4LE"):
1373         ms_wcCharsetName = WC_NAME_BEST;
1374         m2w = iconv_open(ms_wcCharsetName, cname);
1375
1376         if (m2w == (iconv_t)-1)
1377         {
1378             // try charset w/o bytesex info (e.g. "UCS4")
1379             // and check for bytesex ourselves:
1380             ms_wcCharsetName = WC_NAME;
1381             m2w = iconv_open(ms_wcCharsetName, cname);
1382
1383             // last bet, try if it knows WCHAR_T pseudo-charset
1384             if (m2w == (iconv_t)-1)
1385             {
1386                 ms_wcCharsetName = "WCHAR_T";
1387                 m2w = iconv_open(ms_wcCharsetName, cname);
1388             }
1389
1390             if (m2w != (iconv_t)-1)
1391             {
1392                 char    buf[2], *bufPtr;
1393                 wchar_t wbuf[2], *wbufPtr;
1394                 size_t  insz, outsz;
1395                 size_t  res;
1396
1397                 buf[0] = 'A';
1398                 buf[1] = 0;
1399                 wbuf[0] = 0;
1400                 insz = 2;
1401                 outsz = SIZEOF_WCHAR_T * 2;
1402                 wbufPtr = wbuf;
1403                 bufPtr = buf;
1404
1405                 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1406                             (char**)&wbufPtr, &outsz);
1407
1408                 if (ICONV_FAILED(res, insz))
1409                 {
1410                     ms_wcCharsetName = NULL;
1411                     wxLogLastError(wxT("iconv"));
1412                     wxLogError(_("Conversion to charset '%s' doesn't work."), name);
1413                 }
1414                 else
1415                 {
1416                     ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1417                 }
1418             }
1419             else
1420             {
1421                 ms_wcCharsetName = NULL;
1422
1423                 // VS: we must not output an error here, since wxWidgets will safely
1424                 //     fall back to using wxEncodingConverter.
1425                 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
1426                 //wxLogError(
1427             }
1428         }
1429         wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
1430     }
1431     else // we already have ms_wcCharsetName
1432     {
1433         m2w = iconv_open(ms_wcCharsetName, cname);
1434     }
1435
1436     // NB: don't ever pass NULL to iconv_open(), it may crash!
1437     if ( ms_wcCharsetName )
1438     {
1439         w2m = iconv_open( cname, ms_wcCharsetName);
1440     }
1441     else
1442     {
1443         w2m = (iconv_t)-1;
1444     }
1445 }
1446
1447 wxMBConv_iconv::~wxMBConv_iconv()
1448 {
1449     if ( m2w != (iconv_t)-1 )
1450         iconv_close(m2w);
1451     if ( w2m != (iconv_t)-1 )
1452         iconv_close(w2m);
1453 }
1454
1455 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1456 {
1457 #if wxUSE_THREADS
1458     // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1459     //     Unfortunately there is a couple of global wxCSConv objects such as
1460     //     wxConvLocal that are used all over wx code, so we have to make sure
1461     //     the handle is used by at most one thread at the time. Otherwise
1462     //     only a few wx classes would be safe to use from non-main threads
1463     //     as MB<->WC conversion would fail "randomly".
1464     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1465 #endif
1466
1467     size_t inbuf = strlen(psz);
1468     size_t outbuf = n * SIZEOF_WCHAR_T;
1469     size_t res, cres;
1470     // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1471     wchar_t *bufPtr = buf;
1472     const char *pszPtr = psz;
1473
1474     if (buf)
1475     {
1476         // have destination buffer, convert there
1477         cres = iconv(m2w,
1478                      ICONV_CHAR_CAST(&pszPtr), &inbuf,
1479                      (char**)&bufPtr, &outbuf);
1480         res = n - (outbuf / SIZEOF_WCHAR_T);
1481
1482         if (ms_wcNeedsSwap)
1483         {
1484             // convert to native endianness
1485             WC_BSWAP(buf /* _not_ bufPtr */, res)
1486         }
1487
1488         // NB: iconv was given only strlen(psz) characters on input, and so
1489         //     it couldn't convert the trailing zero. Let's do it ourselves
1490         //     if there's some room left for it in the output buffer.
1491         if (res < n)
1492             buf[res] = 0;
1493     }
1494     else
1495     {
1496         // no destination buffer... convert using temp buffer
1497         // to calculate destination buffer requirement
1498         wchar_t tbuf[8];
1499         res = 0;
1500         do {
1501             bufPtr = tbuf;
1502             outbuf = 8*SIZEOF_WCHAR_T;
1503
1504             cres = iconv(m2w,
1505                          ICONV_CHAR_CAST(&pszPtr), &inbuf,
1506                          (char**)&bufPtr, &outbuf );
1507
1508             res += 8-(outbuf/SIZEOF_WCHAR_T);
1509         } while ((cres==(size_t)-1) && (errno==E2BIG));
1510     }
1511
1512     if (ICONV_FAILED(cres, inbuf))
1513     {
1514         //VS: it is ok if iconv fails, hence trace only
1515         wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1516         return (size_t)-1;
1517     }
1518
1519     return res;
1520 }
1521
1522 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1523 {
1524 #if wxUSE_THREADS
1525     // NB: explained in MB2WC
1526     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1527 #endif
1528
1529     size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
1530     size_t outbuf = n;
1531     size_t res, cres;
1532
1533     wchar_t *tmpbuf = 0;
1534
1535     if (ms_wcNeedsSwap)
1536     {
1537         // need to copy to temp buffer to switch endianness
1538         // this absolutely doesn't rock!
1539         // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1540         //  could be in read-only memory, or be accessed in some other thread)
1541         tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
1542         memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
1543         WC_BSWAP(tmpbuf, inbuf)
1544         psz=tmpbuf;
1545     }
1546
1547     if (buf)
1548     {
1549         // have destination buffer, convert there
1550         cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1551
1552         res = n-outbuf;
1553
1554         // NB: iconv was given only wcslen(psz) characters on input, and so
1555         //     it couldn't convert the trailing zero. Let's do it ourselves
1556         //     if there's some room left for it in the output buffer.
1557         if (res < n)
1558             buf[0] = 0;
1559     }
1560     else
1561     {
1562         // no destination buffer... convert using temp buffer
1563         // to calculate destination buffer requirement
1564         char tbuf[16];
1565         res = 0;
1566         do {
1567             buf = tbuf; outbuf = 16;
1568
1569             cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1570
1571             res += 16 - outbuf;
1572         } while ((cres==(size_t)-1) && (errno==E2BIG));
1573     }
1574
1575     if (ms_wcNeedsSwap)
1576     {
1577         free(tmpbuf);
1578     }
1579
1580     if (ICONV_FAILED(cres, inbuf))
1581     {
1582         //VS: it is ok if iconv fails, hence trace only
1583         wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1584         return (size_t)-1;
1585     }
1586
1587     return res;
1588 }
1589
1590 #endif // HAVE_ICONV
1591
1592
1593 // ============================================================================
1594 // Win32 conversion classes
1595 // ============================================================================
1596
1597 #ifdef wxHAVE_WIN32_MB2WC
1598
1599 // from utils.cpp
1600 #if wxUSE_FONTMAP
1601 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1602 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1603 #endif
1604
1605 class wxMBConv_win32 : public wxMBConv
1606 {
1607 public:
1608     wxMBConv_win32()
1609     {
1610         m_CodePage = CP_ACP;
1611     }
1612
1613 #if wxUSE_FONTMAP
1614     wxMBConv_win32(const wxChar* name)
1615     {
1616         m_CodePage = wxCharsetToCodepage(name);
1617     }
1618
1619     wxMBConv_win32(wxFontEncoding encoding)
1620     {
1621         m_CodePage = wxEncodingToCodepage(encoding);
1622     }
1623 #endif
1624
1625     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1626     {
1627         // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1628         // the behaviour is not compatible with the Unix version (using iconv)
1629         // and break the library itself, e.g. wxTextInputStream::NextChar()
1630         // wouldn't work if reading an incomplete MB char didn't result in an
1631         // error
1632         //
1633         // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1634         // an error (tested under Windows Server 2003) and apparently it is
1635         // done on purpose, i.e. the function accepts any input in this case
1636         // and although I'd prefer to return error on ill-formed output, our
1637         // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1638         // explicitly ill-formed according to RFC 2152) neither so we don't
1639         // even have any fallback here...
1640         int flags = m_CodePage == CP_UTF7 ? 0 : MB_ERR_INVALID_CHARS;
1641
1642         const size_t len = ::MultiByteToWideChar
1643                              (
1644                                 m_CodePage,     // code page
1645                                 flags,          // flags: fall on error
1646                                 psz,            // input string
1647                                 -1,             // its length (NUL-terminated)
1648                                 buf,            // output string
1649                                 buf ? n : 0     // size of output buffer
1650                              );
1651
1652         // note that it returns count of written chars for buf != NULL and size
1653         // of the needed buffer for buf == NULL so in either case the length of
1654         // the string (which never includes the terminating NUL) is one less
1655         return len ? len - 1 : (size_t)-1;
1656     }
1657
1658     size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
1659     {
1660         /*
1661             we have a problem here: by default, WideCharToMultiByte() may
1662             replace characters unrepresentable in the target code page with bad
1663             quality approximations such as turning "1/2" symbol (U+00BD) into
1664             "1" for the code pages which don't have it and we, obviously, want
1665             to avoid this at any price
1666
1667             the trouble is that this function does it _silently_, i.e. it won't
1668             even tell us whether it did or not... Win98/2000 and higher provide
1669             WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1670             we have to resort to a round trip, i.e. check that converting back
1671             results in the same string -- this is, of course, expensive but
1672             otherwise we simply can't be sure to not garble the data.
1673          */
1674
1675         // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1676         // it doesn't work with CJK encodings (which we test for rather roughly
1677         // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1678         // supporting it
1679         BOOL usedDef wxDUMMY_INITIALIZE(false);
1680         BOOL *pUsedDef;
1681         int flags;
1682         if ( CanUseNoBestFit() && m_CodePage < 50000 )
1683         {
1684             // it's our lucky day
1685             flags = WC_NO_BEST_FIT_CHARS;
1686             pUsedDef = &usedDef;
1687         }
1688         else // old system or unsupported encoding
1689         {
1690             flags = 0;
1691             pUsedDef = NULL;
1692         }
1693
1694         const size_t len = ::WideCharToMultiByte
1695                              (
1696                                 m_CodePage,     // code page
1697                                 flags,          // either none or no best fit
1698                                 pwz,            // input string
1699                                 -1,             // it is (wide) NUL-terminated
1700                                 buf,            // output buffer
1701                                 buf ? n : 0,    // and its size
1702                                 NULL,           // default "replacement" char
1703                                 pUsedDef        // [out] was it used?
1704                              );
1705
1706         if ( !len )
1707         {
1708             // function totally failed
1709             return (size_t)-1;
1710         }
1711
1712         // if we were really converting, check if we succeeded
1713         if ( buf )
1714         {
1715             if ( flags )
1716             {
1717                 // check if the conversion failed, i.e. if any replacements
1718                 // were done
1719                 if ( usedDef )
1720                     return (size_t)-1;
1721             }
1722             else // we must resort to double tripping...
1723             {
1724                 wxWCharBuffer wcBuf(n);
1725                 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1726                         wcscmp(wcBuf, pwz) != 0 )
1727                 {
1728                     // we didn't obtain the same thing we started from, hence
1729                     // the conversion was lossy and we consider that it failed
1730                     return (size_t)-1;
1731                 }
1732             }
1733         }
1734
1735         // see the comment above for the reason of "len - 1"
1736         return len - 1;
1737     }
1738
1739     bool IsOk() const { return m_CodePage != -1; }
1740
1741 private:
1742     static bool CanUseNoBestFit()
1743     {
1744         static int s_isWin98Or2k = -1;
1745
1746         if ( s_isWin98Or2k == -1 )
1747         {
1748             int verMaj, verMin;
1749             switch ( wxGetOsVersion(&verMaj, &verMin) )
1750             {
1751                 case wxWIN95:
1752                     s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1753                     break;
1754
1755                 case wxWINDOWS_NT:
1756                     s_isWin98Or2k = verMaj >= 5;
1757                     break;
1758
1759                 default:
1760                     // unknown, be conseravtive by default
1761                     s_isWin98Or2k = 0;
1762             }
1763
1764             wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1765         }
1766
1767         return s_isWin98Or2k == 1;
1768     }
1769
1770     long m_CodePage;
1771 };
1772
1773 #endif // wxHAVE_WIN32_MB2WC
1774
1775 // ============================================================================
1776 // Cocoa conversion classes
1777 // ============================================================================
1778
1779 #if defined(__WXCOCOA__)
1780
1781 // RN:  There is no UTF-32 support in either Core Foundation or
1782 // Cocoa.  Strangely enough, internally Core Foundation uses
1783 // UTF 32 internally quite a bit - its just not public (yet).
1784
1785 #include <CoreFoundation/CFString.h>
1786 #include <CoreFoundation/CFStringEncodingExt.h>
1787
1788 CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
1789 {
1790     CFStringEncoding enc = kCFStringEncodingInvalidId ;
1791     if ( encoding == wxFONTENCODING_DEFAULT )
1792     {
1793         enc = CFStringGetSystemEncoding();
1794     }
1795     else switch( encoding)
1796     {
1797         case wxFONTENCODING_ISO8859_1 :
1798             enc = kCFStringEncodingISOLatin1 ;
1799             break ;
1800         case wxFONTENCODING_ISO8859_2 :
1801             enc = kCFStringEncodingISOLatin2;
1802             break ;
1803         case wxFONTENCODING_ISO8859_3 :
1804             enc = kCFStringEncodingISOLatin3 ;
1805             break ;
1806         case wxFONTENCODING_ISO8859_4 :
1807             enc = kCFStringEncodingISOLatin4;
1808             break ;
1809         case wxFONTENCODING_ISO8859_5 :
1810             enc = kCFStringEncodingISOLatinCyrillic;
1811             break ;
1812         case wxFONTENCODING_ISO8859_6 :
1813             enc = kCFStringEncodingISOLatinArabic;
1814             break ;
1815         case wxFONTENCODING_ISO8859_7 :
1816             enc = kCFStringEncodingISOLatinGreek;
1817             break ;
1818         case wxFONTENCODING_ISO8859_8 :
1819             enc = kCFStringEncodingISOLatinHebrew;
1820             break ;
1821         case wxFONTENCODING_ISO8859_9 :
1822             enc = kCFStringEncodingISOLatin5;
1823             break ;
1824         case wxFONTENCODING_ISO8859_10 :
1825             enc = kCFStringEncodingISOLatin6;
1826             break ;
1827         case wxFONTENCODING_ISO8859_11 :
1828             enc = kCFStringEncodingISOLatinThai;
1829             break ;
1830         case wxFONTENCODING_ISO8859_13 :
1831             enc = kCFStringEncodingISOLatin7;
1832             break ;
1833         case wxFONTENCODING_ISO8859_14 :
1834             enc = kCFStringEncodingISOLatin8;
1835             break ;
1836         case wxFONTENCODING_ISO8859_15 :
1837             enc = kCFStringEncodingISOLatin9;
1838             break ;
1839
1840         case wxFONTENCODING_KOI8 :
1841             enc = kCFStringEncodingKOI8_R;
1842             break ;
1843         case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
1844             enc = kCFStringEncodingDOSRussian;
1845             break ;
1846
1847 //      case wxFONTENCODING_BULGARIAN :
1848 //          enc = ;
1849 //          break ;
1850
1851         case wxFONTENCODING_CP437 :
1852             enc =kCFStringEncodingDOSLatinUS ;
1853             break ;
1854         case wxFONTENCODING_CP850 :
1855             enc = kCFStringEncodingDOSLatin1;
1856             break ;
1857         case wxFONTENCODING_CP852 :
1858             enc = kCFStringEncodingDOSLatin2;
1859             break ;
1860         case wxFONTENCODING_CP855 :
1861             enc = kCFStringEncodingDOSCyrillic;
1862             break ;
1863         case wxFONTENCODING_CP866 :
1864             enc =kCFStringEncodingDOSRussian ;
1865             break ;
1866         case wxFONTENCODING_CP874 :
1867             enc = kCFStringEncodingDOSThai;
1868             break ;
1869         case wxFONTENCODING_CP932 :
1870             enc = kCFStringEncodingDOSJapanese;
1871             break ;
1872         case wxFONTENCODING_CP936 :
1873             enc =kCFStringEncodingDOSChineseSimplif ;
1874             break ;
1875         case wxFONTENCODING_CP949 :
1876             enc = kCFStringEncodingDOSKorean;
1877             break ;
1878         case wxFONTENCODING_CP950 :
1879             enc = kCFStringEncodingDOSChineseTrad;
1880             break ;
1881         case wxFONTENCODING_CP1250 :
1882             enc = kCFStringEncodingWindowsLatin2;
1883             break ;
1884         case wxFONTENCODING_CP1251 :
1885             enc =kCFStringEncodingWindowsCyrillic ;
1886             break ;
1887         case wxFONTENCODING_CP1252 :
1888             enc =kCFStringEncodingWindowsLatin1 ;
1889             break ;
1890         case wxFONTENCODING_CP1253 :
1891             enc = kCFStringEncodingWindowsGreek;
1892             break ;
1893         case wxFONTENCODING_CP1254 :
1894             enc = kCFStringEncodingWindowsLatin5;
1895             break ;
1896         case wxFONTENCODING_CP1255 :
1897             enc =kCFStringEncodingWindowsHebrew ;
1898             break ;
1899         case wxFONTENCODING_CP1256 :
1900             enc =kCFStringEncodingWindowsArabic ;
1901             break ;
1902         case wxFONTENCODING_CP1257 :
1903             enc = kCFStringEncodingWindowsBalticRim;
1904             break ;
1905 //   This only really encodes to UTF7 (if that) evidently
1906 //        case wxFONTENCODING_UTF7 :
1907 //            enc = kCFStringEncodingNonLossyASCII ;
1908 //            break ;
1909         case wxFONTENCODING_UTF8 :
1910             enc = kCFStringEncodingUTF8 ;
1911             break ;
1912         case wxFONTENCODING_EUC_JP :
1913             enc = kCFStringEncodingEUC_JP;
1914             break ;
1915         case wxFONTENCODING_UTF16 :
1916             enc = kCFStringEncodingUnicode ;
1917             break ;
1918         case wxFONTENCODING_MACROMAN :
1919             enc = kCFStringEncodingMacRoman ;
1920             break ;
1921         case wxFONTENCODING_MACJAPANESE :
1922             enc = kCFStringEncodingMacJapanese ;
1923             break ;
1924         case wxFONTENCODING_MACCHINESETRAD :
1925             enc = kCFStringEncodingMacChineseTrad ;
1926             break ;
1927         case wxFONTENCODING_MACKOREAN :
1928             enc = kCFStringEncodingMacKorean ;
1929             break ;
1930         case wxFONTENCODING_MACARABIC :
1931             enc = kCFStringEncodingMacArabic ;
1932             break ;
1933         case wxFONTENCODING_MACHEBREW :
1934             enc = kCFStringEncodingMacHebrew ;
1935             break ;
1936         case wxFONTENCODING_MACGREEK :
1937             enc = kCFStringEncodingMacGreek ;
1938             break ;
1939         case wxFONTENCODING_MACCYRILLIC :
1940             enc = kCFStringEncodingMacCyrillic ;
1941             break ;
1942         case wxFONTENCODING_MACDEVANAGARI :
1943             enc = kCFStringEncodingMacDevanagari ;
1944             break ;
1945         case wxFONTENCODING_MACGURMUKHI :
1946             enc = kCFStringEncodingMacGurmukhi ;
1947             break ;
1948         case wxFONTENCODING_MACGUJARATI :
1949             enc = kCFStringEncodingMacGujarati ;
1950             break ;
1951         case wxFONTENCODING_MACORIYA :
1952             enc = kCFStringEncodingMacOriya ;
1953             break ;
1954         case wxFONTENCODING_MACBENGALI :
1955             enc = kCFStringEncodingMacBengali ;
1956             break ;
1957         case wxFONTENCODING_MACTAMIL :
1958             enc = kCFStringEncodingMacTamil ;
1959             break ;
1960         case wxFONTENCODING_MACTELUGU :
1961             enc = kCFStringEncodingMacTelugu ;
1962             break ;
1963         case wxFONTENCODING_MACKANNADA :
1964             enc = kCFStringEncodingMacKannada ;
1965             break ;
1966         case wxFONTENCODING_MACMALAJALAM :
1967             enc = kCFStringEncodingMacMalayalam ;
1968             break ;
1969         case wxFONTENCODING_MACSINHALESE :
1970             enc = kCFStringEncodingMacSinhalese ;
1971             break ;
1972         case wxFONTENCODING_MACBURMESE :
1973             enc = kCFStringEncodingMacBurmese ;
1974             break ;
1975         case wxFONTENCODING_MACKHMER :
1976             enc = kCFStringEncodingMacKhmer ;
1977             break ;
1978         case wxFONTENCODING_MACTHAI :
1979             enc = kCFStringEncodingMacThai ;
1980             break ;
1981         case wxFONTENCODING_MACLAOTIAN :
1982             enc = kCFStringEncodingMacLaotian ;
1983             break ;
1984         case wxFONTENCODING_MACGEORGIAN :
1985             enc = kCFStringEncodingMacGeorgian ;
1986             break ;
1987         case wxFONTENCODING_MACARMENIAN :
1988             enc = kCFStringEncodingMacArmenian ;
1989             break ;
1990         case wxFONTENCODING_MACCHINESESIMP :
1991             enc = kCFStringEncodingMacChineseSimp ;
1992             break ;
1993         case wxFONTENCODING_MACTIBETAN :
1994             enc = kCFStringEncodingMacTibetan ;
1995             break ;
1996         case wxFONTENCODING_MACMONGOLIAN :
1997             enc = kCFStringEncodingMacMongolian ;
1998             break ;
1999         case wxFONTENCODING_MACETHIOPIC :
2000             enc = kCFStringEncodingMacEthiopic ;
2001             break ;
2002         case wxFONTENCODING_MACCENTRALEUR :
2003             enc = kCFStringEncodingMacCentralEurRoman ;
2004             break ;
2005         case wxFONTENCODING_MACVIATNAMESE :
2006             enc = kCFStringEncodingMacVietnamese ;
2007             break ;
2008         case wxFONTENCODING_MACARABICEXT :
2009             enc = kCFStringEncodingMacExtArabic ;
2010             break ;
2011         case wxFONTENCODING_MACSYMBOL :
2012             enc = kCFStringEncodingMacSymbol ;
2013             break ;
2014         case wxFONTENCODING_MACDINGBATS :
2015             enc = kCFStringEncodingMacDingbats ;
2016             break ;
2017         case wxFONTENCODING_MACTURKISH :
2018             enc = kCFStringEncodingMacTurkish ;
2019             break ;
2020         case wxFONTENCODING_MACCROATIAN :
2021             enc = kCFStringEncodingMacCroatian ;
2022             break ;
2023         case wxFONTENCODING_MACICELANDIC :
2024             enc = kCFStringEncodingMacIcelandic ;
2025             break ;
2026         case wxFONTENCODING_MACROMANIAN :
2027             enc = kCFStringEncodingMacRomanian ;
2028             break ;
2029         case wxFONTENCODING_MACCELTIC :
2030             enc = kCFStringEncodingMacCeltic ;
2031             break ;
2032         case wxFONTENCODING_MACGAELIC :
2033             enc = kCFStringEncodingMacGaelic ;
2034             break ;
2035 //      case wxFONTENCODING_MACKEYBOARD :
2036 //          enc = kCFStringEncodingMacKeyboardGlyphs ;
2037 //          break ;
2038         default :
2039             // because gcc is picky
2040             break ;
2041     } ;
2042     return enc ;
2043 }
2044
2045 class wxMBConv_cocoa : public wxMBConv
2046 {
2047 public:
2048     wxMBConv_cocoa()
2049     {
2050         Init(CFStringGetSystemEncoding()) ;
2051     }
2052
2053 #if wxUSE_FONTMAP
2054     wxMBConv_cocoa(const wxChar* name)
2055     {
2056         Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2057     }
2058 #endif
2059
2060     wxMBConv_cocoa(wxFontEncoding encoding)
2061     {
2062         Init( wxCFStringEncFromFontEnc(encoding) );
2063     }
2064
2065     ~wxMBConv_cocoa()
2066     {
2067     }
2068
2069     void Init( CFStringEncoding encoding)
2070     {
2071         m_encoding = encoding ;
2072     }
2073
2074     size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2075     {
2076         wxASSERT(szUnConv);
2077
2078         CFStringRef theString = CFStringCreateWithBytes (
2079                                                 NULL, //the allocator
2080                                                 (const UInt8*)szUnConv,
2081                                                 strlen(szUnConv),
2082                                                 m_encoding,
2083                                                 false //no BOM/external representation
2084                                                 );
2085
2086         wxASSERT(theString);
2087
2088         size_t nOutLength = CFStringGetLength(theString);
2089
2090         if (szOut == NULL)
2091         {
2092             CFRelease(theString);
2093             return nOutLength;
2094         }
2095
2096         CFRange theRange = { 0, nOutSize };
2097
2098 #if SIZEOF_WCHAR_T == 4
2099         UniChar* szUniCharBuffer = new UniChar[nOutSize];
2100 #endif
2101
2102         CFStringGetCharacters(theString, theRange, szUniCharBuffer);
2103
2104         CFRelease(theString);
2105
2106         szUniCharBuffer[nOutLength] = '\0' ;
2107
2108 #if SIZEOF_WCHAR_T == 4
2109         wxMBConvUTF16 converter ;
2110         converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
2111         delete[] szUniCharBuffer;
2112 #endif
2113
2114         return nOutLength;
2115     }
2116
2117     size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2118     {
2119         wxASSERT(szUnConv);
2120
2121         size_t nRealOutSize;
2122         size_t nBufSize = wxWcslen(szUnConv);
2123         UniChar* szUniBuffer = (UniChar*) szUnConv;
2124
2125 #if SIZEOF_WCHAR_T == 4
2126         wxMBConvUTF16BE converter ;
2127         nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
2128         szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
2129         converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
2130         nBufSize /= sizeof(UniChar);
2131 #endif
2132
2133         CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2134                                 NULL, //allocator
2135                                 szUniBuffer,
2136                                 nBufSize,
2137                                 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
2138                             );
2139
2140         wxASSERT(theString);
2141
2142         //Note that CER puts a BOM when converting to unicode
2143         //so we  check and use getchars instead in that case
2144         if (m_encoding == kCFStringEncodingUnicode)
2145         {
2146             if (szOut != NULL)
2147                 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
2148
2149             nRealOutSize = CFStringGetLength(theString) + 1;
2150         }
2151         else
2152         {
2153             CFStringGetBytes(
2154                 theString,
2155                 CFRangeMake(0, CFStringGetLength(theString)),
2156                 m_encoding,
2157                 0, //what to put in characters that can't be converted -
2158                     //0 tells CFString to return NULL if it meets such a character
2159                 false, //not an external representation
2160                 (UInt8*) szOut,
2161                 nOutSize,
2162                 (CFIndex*) &nRealOutSize
2163                         );
2164         }
2165
2166         CFRelease(theString);
2167
2168 #if SIZEOF_WCHAR_T == 4
2169         delete[] szUniBuffer;
2170 #endif
2171
2172         return  nRealOutSize - 1;
2173     }
2174
2175     bool IsOk() const
2176     {
2177         return m_encoding != kCFStringEncodingInvalidId &&
2178               CFStringIsEncodingAvailable(m_encoding);
2179     }
2180
2181 private:
2182     CFStringEncoding m_encoding ;
2183 };
2184
2185 #endif // defined(__WXCOCOA__)
2186
2187 // ============================================================================
2188 // Mac conversion classes
2189 // ============================================================================
2190
2191 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2192
2193 class wxMBConv_mac : public wxMBConv
2194 {
2195 public:
2196     wxMBConv_mac()
2197     {
2198         Init(CFStringGetSystemEncoding()) ;
2199     }
2200
2201 #if wxUSE_FONTMAP
2202     wxMBConv_mac(const wxChar* name)
2203     {
2204         Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2205     }
2206 #endif
2207
2208     wxMBConv_mac(wxFontEncoding encoding)
2209     {
2210         Init( wxMacGetSystemEncFromFontEnc(encoding) );
2211     }
2212
2213     ~wxMBConv_mac()
2214     {
2215         OSStatus status = noErr ;
2216         status = TECDisposeConverter(m_MB2WC_converter);
2217         status = TECDisposeConverter(m_WC2MB_converter);
2218     }
2219
2220
2221     void Init( TextEncodingBase encoding)
2222     {
2223         OSStatus status = noErr ;
2224         m_char_encoding = encoding ;
2225         m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2226
2227         status = TECCreateConverter(&m_MB2WC_converter,
2228                                     m_char_encoding,
2229                                     m_unicode_encoding);
2230         status = TECCreateConverter(&m_WC2MB_converter,
2231                                     m_unicode_encoding,
2232                                     m_char_encoding);
2233     }
2234
2235     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2236     {
2237         OSStatus status = noErr ;
2238         ByteCount byteOutLen ;
2239         ByteCount byteInLen = strlen(psz) ;
2240         wchar_t *tbuf = NULL ;
2241         UniChar* ubuf = NULL ;
2242         size_t res = 0 ;
2243
2244         if (buf == NULL)
2245         {
2246             //apple specs say at least 32
2247             n = wxMax( 32 , byteInLen ) ;
2248             tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2249         }
2250         ByteCount byteBufferLen = n * sizeof( UniChar ) ;
2251 #if SIZEOF_WCHAR_T == 4
2252         ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
2253 #else
2254         ubuf = (UniChar*) (buf ? buf : tbuf) ;
2255 #endif
2256         status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2257           (TextPtr) ubuf , byteBufferLen, &byteOutLen);
2258 #if SIZEOF_WCHAR_T == 4
2259         // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2260         // is not properly terminated we get random characters at the end
2261         ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
2262         wxMBConvUTF16BE converter ;
2263         res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2264         free( ubuf ) ;
2265 #else
2266         res = byteOutLen / sizeof( UniChar ) ;
2267 #endif
2268         if ( buf == NULL )
2269              free(tbuf) ;
2270
2271         if ( buf  && res < n)
2272             buf[res] = 0;
2273
2274         return res ;
2275     }
2276
2277     size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2278     {
2279         OSStatus status = noErr ;
2280         ByteCount byteOutLen ;
2281         ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2282
2283         char *tbuf = NULL ;
2284
2285         if (buf == NULL)
2286         {
2287             //apple specs say at least 32
2288             n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2289             tbuf = (char*) malloc( n ) ;
2290         }
2291
2292         ByteCount byteBufferLen = n ;
2293         UniChar* ubuf = NULL ;
2294 #if SIZEOF_WCHAR_T == 4
2295         wxMBConvUTF16BE converter ;
2296         size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2297         byteInLen = unicharlen ;
2298         ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2299         converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
2300 #else
2301         ubuf = (UniChar*) psz ;
2302 #endif
2303         status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2304             (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
2305 #if SIZEOF_WCHAR_T == 4
2306         free( ubuf ) ;
2307 #endif
2308         if ( buf == NULL )
2309             free(tbuf) ;
2310
2311         size_t res = byteOutLen ;
2312         if ( buf  && res < n)
2313         {
2314             buf[res] = 0;
2315
2316             //we need to double-trip to verify it didn't insert any ? in place
2317             //of bogus characters
2318             wxWCharBuffer wcBuf(n);
2319             size_t pszlen = wxWcslen(psz);
2320             if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
2321                         wxWcslen(wcBuf) != pszlen ||
2322                         memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2323             {
2324                 // we didn't obtain the same thing we started from, hence
2325                 // the conversion was lossy and we consider that it failed
2326                 return (size_t)-1;
2327             }
2328         }
2329
2330         return res ;
2331     }
2332
2333     bool IsOk() const
2334         { return m_MB2WC_converter !=  NULL && m_WC2MB_converter != NULL  ; }
2335
2336 private:
2337     TECObjectRef m_MB2WC_converter ;
2338     TECObjectRef m_WC2MB_converter ;
2339
2340     TextEncodingBase m_char_encoding ;
2341     TextEncodingBase m_unicode_encoding ;
2342 };
2343
2344 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2345
2346 // ============================================================================
2347 // wxEncodingConverter based conversion classes
2348 // ============================================================================
2349
2350 #if wxUSE_FONTMAP
2351
2352 class wxMBConv_wxwin : public wxMBConv
2353 {
2354 private:
2355     void Init()
2356     {
2357         m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2358                w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2359     }
2360
2361 public:
2362     // temporarily just use wxEncodingConverter stuff,
2363     // so that it works while a better implementation is built
2364     wxMBConv_wxwin(const wxChar* name)
2365     {
2366         if (name)
2367             m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2368         else
2369             m_enc = wxFONTENCODING_SYSTEM;
2370
2371         Init();
2372     }
2373
2374     wxMBConv_wxwin(wxFontEncoding enc)
2375     {
2376         m_enc = enc;
2377
2378         Init();
2379     }
2380
2381     size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
2382     {
2383         size_t inbuf = strlen(psz);
2384         if (buf)
2385         {
2386             if (!m2w.Convert(psz,buf))
2387                 return (size_t)-1;
2388         }
2389         return inbuf;
2390     }
2391
2392     size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
2393     {
2394         const size_t inbuf = wxWcslen(psz);
2395         if (buf)
2396         {
2397             if (!w2m.Convert(psz,buf))
2398                 return (size_t)-1;
2399         }
2400
2401         return inbuf;
2402     }
2403
2404     bool IsOk() const { return m_ok; }
2405
2406 public:
2407     wxFontEncoding m_enc;
2408     wxEncodingConverter m2w, w2m;
2409
2410     // were we initialized successfully?
2411     bool m_ok;
2412
2413     DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
2414 };
2415
2416 #endif // wxUSE_FONTMAP
2417
2418 // ============================================================================
2419 // wxCSConv implementation
2420 // ============================================================================
2421
2422 void wxCSConv::Init()
2423 {
2424     m_name = NULL;
2425     m_convReal =  NULL;
2426     m_deferred = true;
2427 }
2428
2429 wxCSConv::wxCSConv(const wxChar *charset)
2430 {
2431     Init();
2432
2433     if ( charset )
2434     {
2435         SetName(charset);
2436     }
2437
2438     m_encoding = wxFONTENCODING_SYSTEM;
2439 }
2440
2441 wxCSConv::wxCSConv(wxFontEncoding encoding)
2442 {
2443     if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
2444     {
2445         wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2446
2447         encoding = wxFONTENCODING_SYSTEM;
2448     }
2449
2450     Init();
2451
2452     m_encoding = encoding;
2453 }
2454
2455 wxCSConv::~wxCSConv()
2456 {
2457     Clear();
2458 }
2459
2460 wxCSConv::wxCSConv(const wxCSConv& conv)
2461         : wxMBConv()
2462 {
2463     Init();
2464
2465     SetName(conv.m_name);
2466     m_encoding = conv.m_encoding;
2467 }
2468
2469 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2470 {
2471     Clear();
2472
2473     SetName(conv.m_name);
2474     m_encoding = conv.m_encoding;
2475
2476     return *this;
2477 }
2478
2479 void wxCSConv::Clear()
2480 {
2481     free(m_name);
2482     delete m_convReal;
2483
2484     m_name = NULL;
2485     m_convReal = NULL;
2486 }
2487
2488 void wxCSConv::SetName(const wxChar *charset)
2489 {
2490     if (charset)
2491     {
2492         m_name = wxStrdup(charset);
2493         m_deferred = true;
2494     }
2495 }
2496
2497 wxMBConv *wxCSConv::DoCreate() const
2498 {
2499     // check for the special case of ASCII or ISO8859-1 charset: as we have
2500     // special knowledge of it anyhow, we don't need to create a special
2501     // conversion object
2502     if ( m_encoding == wxFONTENCODING_ISO8859_1 )
2503     {
2504         // don't convert at all
2505         return NULL;
2506     }
2507
2508     // we trust OS to do conversion better than we can so try external
2509     // conversion methods first
2510     //
2511     // the full order is:
2512     //      1. OS conversion (iconv() under Unix or Win32 API)
2513     //      2. hard coded conversions for UTF
2514     //      3. wxEncodingConverter as fall back
2515
2516     // step (1)
2517 #ifdef HAVE_ICONV
2518 #if !wxUSE_FONTMAP
2519     if ( m_name )
2520 #endif // !wxUSE_FONTMAP
2521     {
2522         wxString name(m_name);
2523
2524 #if wxUSE_FONTMAP
2525         if ( name.empty() )
2526             name = wxFontMapperBase::Get()->GetEncodingName(m_encoding);
2527 #endif // wxUSE_FONTMAP
2528
2529         wxMBConv_iconv *conv = new wxMBConv_iconv(name);
2530         if ( conv->IsOk() )
2531             return conv;
2532
2533         delete conv;
2534     }
2535 #endif // HAVE_ICONV
2536
2537 #ifdef wxHAVE_WIN32_MB2WC
2538     {
2539 #if wxUSE_FONTMAP
2540         wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2541                                       : new wxMBConv_win32(m_encoding);
2542         if ( conv->IsOk() )
2543             return conv;
2544
2545         delete conv;
2546 #else
2547         return NULL;
2548 #endif
2549     }
2550 #endif // wxHAVE_WIN32_MB2WC
2551 #if defined(__WXMAC__)
2552     {
2553         // leave UTF16 and UTF32 to the built-ins of wx
2554         if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
2555             ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
2556         {
2557
2558 #if wxUSE_FONTMAP
2559             wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2560                                         : new wxMBConv_mac(m_encoding);
2561 #else
2562             wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
2563 #endif
2564             if ( conv->IsOk() )
2565                  return conv;
2566
2567             delete conv;
2568         }
2569     }
2570 #endif
2571 #if defined(__WXCOCOA__)
2572     {
2573         if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2574         {
2575
2576 #if wxUSE_FONTMAP
2577             wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2578                                           : new wxMBConv_cocoa(m_encoding);
2579 #else
2580             wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
2581 #endif
2582             if ( conv->IsOk() )
2583                  return conv;
2584
2585             delete conv;
2586         }
2587     }
2588 #endif
2589     // step (2)
2590     wxFontEncoding enc = m_encoding;
2591 #if wxUSE_FONTMAP
2592     if ( enc == wxFONTENCODING_SYSTEM && m_name )
2593     {
2594         // use "false" to suppress interactive dialogs -- we can be called from
2595         // anywhere and popping up a dialog from here is the last thing we want to
2596         // do
2597         enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
2598     }
2599 #endif // wxUSE_FONTMAP
2600
2601     switch ( enc )
2602     {
2603         case wxFONTENCODING_UTF7:
2604              return new wxMBConvUTF7;
2605
2606         case wxFONTENCODING_UTF8:
2607              return new wxMBConvUTF8;
2608
2609         case wxFONTENCODING_UTF16BE:
2610              return new wxMBConvUTF16BE;
2611
2612         case wxFONTENCODING_UTF16LE:
2613              return new wxMBConvUTF16LE;
2614
2615         case wxFONTENCODING_UTF32BE:
2616              return new wxMBConvUTF32BE;
2617
2618         case wxFONTENCODING_UTF32LE:
2619              return new wxMBConvUTF32LE;
2620
2621         default:
2622              // nothing to do but put here to suppress gcc warnings
2623              ;
2624     }
2625
2626     // step (3)
2627 #if wxUSE_FONTMAP
2628     {
2629         wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2630                                       : new wxMBConv_wxwin(m_encoding);
2631         if ( conv->IsOk() )
2632             return conv;
2633
2634         delete conv;
2635     }
2636 #endif // wxUSE_FONTMAP
2637
2638     // NB: This is a hack to prevent deadlock. What could otherwise happen
2639     //     in Unicode build: wxConvLocal creation ends up being here
2640     //     because of some failure and logs the error. But wxLog will try to
2641     //     attach timestamp, for which it will need wxConvLocal (to convert
2642     //     time to char* and then wchar_t*), but that fails, tries to log
2643     //     error, but wxLog has a (already locked) critical section that
2644     //     guards static buffer.
2645     static bool alreadyLoggingError = false;
2646     if (!alreadyLoggingError)
2647     {
2648         alreadyLoggingError = true;
2649         wxLogError(_("Cannot convert from the charset '%s'!"),
2650                    m_name ? m_name
2651                       :
2652 #if wxUSE_FONTMAP
2653                          wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
2654 #else // !wxUSE_FONTMAP
2655                          wxString::Format(_("encoding %s"), m_encoding).c_str()
2656 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2657               );
2658         alreadyLoggingError = false;
2659     }
2660
2661     return NULL;
2662 }
2663
2664 void wxCSConv::CreateConvIfNeeded() const
2665 {
2666     if ( m_deferred )
2667     {
2668         wxCSConv *self = (wxCSConv *)this; // const_cast
2669
2670 #if wxUSE_INTL
2671         // if we don't have neither the name nor the encoding, use the default
2672         // encoding for this system
2673         if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2674         {
2675             self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
2676         }
2677 #endif // wxUSE_INTL
2678
2679         self->m_convReal = DoCreate();
2680         self->m_deferred = false;
2681     }
2682 }
2683
2684 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2685 {
2686     CreateConvIfNeeded();
2687
2688     if (m_convReal)
2689         return m_convReal->MB2WC(buf, psz, n);
2690
2691     // latin-1 (direct)
2692     size_t len = strlen(psz);
2693
2694     if (buf)
2695     {
2696         for (size_t c = 0; c <= len; c++)
2697             buf[c] = (unsigned char)(psz[c]);
2698     }
2699
2700     return len;
2701 }
2702
2703 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2704 {
2705     CreateConvIfNeeded();
2706
2707     if (m_convReal)
2708         return m_convReal->WC2MB(buf, psz, n);
2709
2710     // latin-1 (direct)
2711     const size_t len = wxWcslen(psz);
2712     if (buf)
2713     {
2714         for (size_t c = 0; c <= len; c++)
2715         {
2716             if (psz[c] > 0xFF)
2717                 return (size_t)-1;
2718             buf[c] = (char)psz[c];
2719         }
2720     }
2721     else
2722     {
2723         for (size_t c = 0; c <= len; c++)
2724         {
2725             if (psz[c] > 0xFF)
2726                 return (size_t)-1;
2727         }
2728     }
2729
2730     return len;
2731 }
2732
2733 // ----------------------------------------------------------------------------
2734 // globals
2735 // ----------------------------------------------------------------------------
2736
2737 #ifdef __WINDOWS__
2738     static wxMBConv_win32 wxConvLibcObj;
2739 #elif defined(__WXMAC__) && !defined(__MACH__)
2740     static wxMBConv_mac wxConvLibcObj ;
2741 #else
2742     static wxMBConvLibc wxConvLibcObj;
2743 #endif
2744
2745 static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
2746 static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
2747 static wxMBConvUTF7 wxConvUTF7Obj;
2748 static wxMBConvUTF8 wxConvUTF8Obj;
2749
2750 WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
2751 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
2752 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
2753 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
2754 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
2755 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
2756 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
2757 #ifdef __WXOSX__
2758                                     wxConvUTF8Obj;
2759 #else
2760                                     wxConvLibcObj;
2761 #endif
2762
2763
2764 #else // !wxUSE_WCHAR_T
2765
2766 // stand-ins in absence of wchar_t
2767 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
2768                                 wxConvISO8859_1,
2769                                 wxConvLocal,
2770                                 wxConvUTF8;
2771
2772 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
2773
2774