src/common/strconv.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        strconv.cpp
   3 // Purpose:     Unicode conversion classes
   4 // Author:      Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
   5 //              Ryan Norton, Fredrik Roubert (UTF7)
   6 // Modified by:
   7 // Created:     29/01/98
   8 // RCS-ID:      $Id$
   9 // Copyright:   (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
  10 //              (c) 2000-2003 Vadim Zeitlin
  11 //              (c) 2004 Ryan Norton, Fredrik Roubert
  12 // Licence:     wxWindows licence
  13 /////////////////////////////////////////////////////////////////////////////
  14
  15 // ============================================================================
  16 // declarations
  17 // ============================================================================
  18
  19 // ----------------------------------------------------------------------------
  20 // headers
  21 // ----------------------------------------------------------------------------
  22
  23 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
  24   #pragma implementation "strconv.h"
  25 #endif
  26
  27 // For compilers that support precompilation, includes "wx.h".
  28 #include "wx/wxprec.h"
  29
  30 #ifdef __BORLANDC__
  31   #pragma hdrstop
  32 #endif
  33
  34 #ifndef WX_PRECOMP
  35     #include "wx/intl.h"
  36     #include "wx/log.h"
  37 #endif // WX_PRECOMP
  38
  39 #include "wx/strconv.h"
  40
  41 #if wxUSE_WCHAR_T
  42
  43 #ifdef __WXMSW__
  44     #include "wx/msw/private.h"
  45 #endif
  46
  47 #ifdef __WINDOWS__
  48     #include "wx/msw/missing.h"
  49 #endif
  50
  51 #ifndef __WXWINCE__
  52 #include <errno.h>
  53 #endif
  54
  55 #include <ctype.h>
  56 #include <string.h>
  57 #include <stdlib.h>
  58
  59 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
  60     #define wxHAVE_WIN32_MB2WC
  61 #endif // __WIN32__ but !__WXMICROWIN__
  62
  63 // ----------------------------------------------------------------------------
  64 // headers
  65 // ----------------------------------------------------------------------------
  66
  67 #ifdef __SALFORDC__
  68     #include <clib.h>
  69 #endif
  70
  71 #ifdef HAVE_ICONV
  72     #include <iconv.h>
  73     #include "wx/thread.h"
  74 #endif
  75
  76 #include "wx/encconv.h"
  77 #include "wx/fontmap.h"
  78 #include "wx/utils.h"
  79
  80 #ifdef __WXMAC__
  81 #include <ATSUnicode.h>
  82 #include <TextCommon.h>
  83 #include <TextEncodingConverter.h>
  84
  85 #include  "wx/mac/private.h"  // includes mac headers
  86 #endif
  87 // ----------------------------------------------------------------------------
  88 // macros
  89 // ----------------------------------------------------------------------------
  90
  91 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
  92 #define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
  93
  94 #if SIZEOF_WCHAR_T == 4
  95     #define WC_NAME         "UCS4"
  96     #define WC_BSWAP         BSWAP_UCS4
  97     #ifdef WORDS_BIGENDIAN
  98       #define WC_NAME_BEST  "UCS-4BE"
  99     #else
 100       #define WC_NAME_BEST  "UCS-4LE"
 101     #endif
 102 #elif SIZEOF_WCHAR_T == 2
 103     #define WC_NAME         "UTF16"
 104     #define WC_BSWAP         BSWAP_UTF16
 105     #define WC_UTF16
 106     #ifdef WORDS_BIGENDIAN
 107       #define WC_NAME_BEST  "UTF-16BE"
 108     #else
 109       #define WC_NAME_BEST  "UTF-16LE"
 110     #endif
 111 #else // sizeof(wchar_t) != 2 nor 4
 112     // does this ever happen?
 113     #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
 114 #endif
 115
 116 // ============================================================================
 117 // implementation
 118 // ============================================================================
 119
 120 // ----------------------------------------------------------------------------
 121 // UTF-16 en/decoding to/from UCS-4
 122 // ----------------------------------------------------------------------------
 123
 124
 125 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
 126 {
 127     if (input<=0xffff)
 128     {
 129         if (output)
 130             *output = (wxUint16) input;
 131         return 1;
 132     }
 133     else if (input>=0x110000)
 134     {
 135         return (size_t)-1;
 136     }
 137     else
 138     {
 139         if (output)
 140         {
 141             *output++ = (wxUint16) ((input >> 10)+0xd7c0);
 142             *output = (wxUint16) ((input&0x3ff)+0xdc00);
 143         }
 144         return 2;
 145     }
 146 }
 147
 148 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
 149 {
 150     if ((*input<0xd800) || (*input>0xdfff))
 151     {
 152         output = *input;
 153         return 1;
 154     }
 155     else if ((input[1]<0xdc00) || (input[1]>0xdfff))
 156     {
 157         output = *input;
 158         return (size_t)-1;
 159     }
 160     else
 161     {
 162         output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
 163         return 2;
 164     }
 165 }
 166
 167
 168 // ----------------------------------------------------------------------------
 169 // wxMBConv
 170 // ----------------------------------------------------------------------------
 171
 172 wxMBConv::~wxMBConv()
 173 {
 174     // nothing to do here (necessary for Darwin linking probably)
 175 }
 176
 177 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
 178 {
 179     if ( psz )
 180     {
 181         // calculate the length of the buffer needed first
 182         size_t nLen = MB2WC(NULL, psz, 0);
 183         if ( nLen != (size_t)-1 )
 184         {
 185             // now do the actual conversion
 186             wxWCharBuffer buf(nLen);
 187             nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
 188             if ( nLen != (size_t)-1 )
 189             {
 190                 return buf;
 191             }
 192         }
 193     }
 194
 195     wxWCharBuffer buf((wchar_t *)NULL);
 196
 197     return buf;
 198 }
 199
 200 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
 201 {
 202     if ( pwz )
 203     {
 204         size_t nLen = WC2MB(NULL, pwz, 0);
 205         if ( nLen != (size_t)-1 )
 206         {
 207             wxCharBuffer buf(nLen+3);       // space for a wxUint32 trailing zero
 208             nLen = WC2MB(buf.data(), pwz, nLen + 4);
 209             if ( nLen != (size_t)-1 )
 210             {
 211                 return buf;
 212             }
 213         }
 214     }
 215
 216     wxCharBuffer buf((char *)NULL);
 217
 218     return buf;
 219 }
 220
 221 const wxWCharBuffer wxMBConv::cMB2WC(const char *szString, size_t nStringLen, size_t* pOutSize) const
 222 {
 223     wxASSERT(pOutSize != NULL);
 224
 225     const char* szEnd = szString + nStringLen + 1;
 226     const char* szPos = szString;
 227     const char* szStart = szPos;
 228
 229     size_t nActualLength = 0;
 230     size_t nCurrentSize = nStringLen; //try normal size first (should never resize?)
 231
 232     wxWCharBuffer theBuffer(nCurrentSize);
 233
 234     //Convert the string until the length() is reached, continuing the
 235     //loop every time a null character is reached
 236     while(szPos != szEnd)
 237     {
 238         wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
 239
 240         //Get the length of the current (sub)string
 241         size_t nLen = MB2WC(NULL, szPos, 0);
 242
 243         //Invalid conversion?
 244         if( nLen == (size_t)-1 )
 245         {
 246             *pOutSize = 0;
 247             theBuffer.data()[0u] = wxT('\0');
 248             return theBuffer;
 249         }
 250
 251
 252         //Increase the actual length (+1 for current null character)
 253         nActualLength += nLen + 1;
 254
 255         //if buffer too big, realloc the buffer
 256         if (nActualLength > (nCurrentSize+1))
 257         {
 258             wxWCharBuffer theNewBuffer(nCurrentSize << 1);
 259             memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize * sizeof(wchar_t));
 260             theBuffer = theNewBuffer;
 261             nCurrentSize <<= 1;
 262         }
 263
 264         //Convert the current (sub)string
 265         if ( MB2WC(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
 266         {
 267             *pOutSize = 0;
 268             theBuffer.data()[0u] = wxT('\0');
 269             return theBuffer;
 270         }
 271
 272         //Increment to next (sub)string
 273         //Note that we have to use strlen here instead of nLen
 274         //here because XX2XX gives us the size of the output buffer,
 275         //not neccessarly the length of the string
 276         szPos += strlen(szPos) + 1;
 277     }
 278
 279     //success - return actual length and the buffer
 280     *pOutSize = nActualLength;
 281     return theBuffer;
 282 }
 283
 284 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *szString, size_t nStringLen, size_t* pOutSize) const
 285 {
 286     wxASSERT(pOutSize != NULL);
 287
 288     const wchar_t* szEnd = szString + nStringLen + 1;
 289     const wchar_t* szPos = szString;
 290     const wchar_t* szStart = szPos;
 291
 292     size_t nActualLength = 0;
 293     size_t nCurrentSize = nStringLen << 2; //try * 4 first
 294
 295     wxCharBuffer theBuffer(nCurrentSize);
 296
 297     //Convert the string until the length() is reached, continuing the
 298     //loop every time a null character is reached
 299     while(szPos != szEnd)
 300     {
 301         wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
 302
 303         //Get the length of the current (sub)string
 304         size_t nLen = WC2MB(NULL, szPos, 0);
 305
 306         //Invalid conversion?
 307         if( nLen == (size_t)-1 )
 308         {
 309             *pOutSize = 0;
 310             theBuffer.data()[0u] = wxT('\0');
 311             return theBuffer;
 312         }
 313
 314         //Increase the actual length (+1 for current null character)
 315         nActualLength += nLen + 1;
 316
 317         //if buffer too big, realloc the buffer
 318         if (nActualLength > (nCurrentSize+1))
 319         {
 320             wxCharBuffer theNewBuffer(nCurrentSize << 1);
 321             memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize);
 322             theBuffer = theNewBuffer;
 323             nCurrentSize <<= 1;
 324         }
 325
 326         //Convert the current (sub)string
 327         if(WC2MB(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
 328         {
 329             *pOutSize = 0;
 330             theBuffer.data()[0u] = wxT('\0');
 331             return theBuffer;
 332         }
 333
 334         //Increment to next (sub)string
 335         //Note that we have to use wxWcslen here instead of nLen
 336         //here because XX2XX gives us the size of the output buffer,
 337         //not neccessarly the length of the string
 338         szPos += wxWcslen(szPos) + 1;
 339     }
 340
 341     //success - return actual length and the buffer
 342     *pOutSize = nActualLength;
 343     return theBuffer;
 344 }
 345
 346 // ----------------------------------------------------------------------------
 347 // wxMBConvLibc
 348 // ----------------------------------------------------------------------------
 349
 350 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 351 {
 352     return wxMB2WC(buf, psz, n);
 353 }
 354
 355 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 356 {
 357     return wxWC2MB(buf, psz, n);
 358 }
 359
 360 #ifdef __UNIX__
 361
 362 // ----------------------------------------------------------------------------
 363 // wxConvBrokenFileNames
 364 // ----------------------------------------------------------------------------
 365
 366 wxConvBrokenFileNames::wxConvBrokenFileNames()
 367 {
 368     // decide which conversion to use for the file names
 369
 370     // (1) this variable exists for the sole purpose of specifying the encoding
 371     //     of the filenames for GTK+ programs, so use it if it is set
 372     wxString encName(wxGetenv(_T("G_FILENAME_ENCODING")));
 373     encName.MakeUpper();
 374     if ( !encName.empty() && encName != _T("UTF-8") && encName != _T("UTF8") )
 375     {
 376         m_conv = new wxCSConv(encName);
 377     }
 378     else // no G_FILENAME_ENCODING
 379     {
 380 #if wxUSE_INTL
 381         if ( encName.empty() )
 382             encName = wxLocale::GetSystemEncodingName().Upper();
 383 #endif
 384
 385         // (2) if a non default locale is set, assume that the user wants his
 386         //     filenames in this locale too
 387         if ( !encName.empty() && encName != _T("UTF-8") && encName != _T("UTF8") )
 388         {
 389             wxSetEnv(_T("G_FILENAME_ENCODING"), encName);
 390             m_conv = new wxCSConv(encName);
 391         }
 392         else
 393         {
 394             // (3) finally use UTF-8 by default
 395             m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
 396         }
 397     }
 398 }
 399
 400 size_t
 401 wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf,
 402                              const char *psz,
 403                              size_t outputSize) const
 404 {
 405     return m_conv->MB2WC( outputBuf, psz, outputSize );
 406 }
 407
 408 size_t
 409 wxConvBrokenFileNames::WC2MB(char *outputBuf,
 410                              const wchar_t *psz,
 411                              size_t outputSize) const
 412 {
 413     return m_conv->WC2MB( outputBuf, psz, outputSize );
 414 }
 415
 416 #endif
 417
 418 // ----------------------------------------------------------------------------
 419 // UTF-7
 420 // ----------------------------------------------------------------------------
 421
 422 // Implementation (C) 2004 Fredrik Roubert
 423
 424 //
 425 // BASE64 decoding table
 426 //
 427 static const unsigned char utf7unb64[] =
 428 {
 429     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 430     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 431     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 432     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 433     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 434     0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
 435     0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
 436     0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 437     0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
 438     0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
 439     0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
 440     0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
 441     0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
 442     0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
 443     0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
 444     0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
 445     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 446     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 447     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 448     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 449     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 450     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 451     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 452     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 453     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 454     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 455     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 456     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 457     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 458     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 459     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 460     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 461 };
 462
 463 size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 464 {
 465     size_t len = 0;
 466
 467     while (*psz && ((!buf) || (len < n)))
 468     {
 469         unsigned char cc = *psz++;
 470         if (cc != '+')
 471         {
 472             // plain ASCII char
 473             if (buf)
 474                 *buf++ = cc;
 475             len++;
 476         }
 477         else if (*psz == '-')
 478         {
 479             // encoded plus sign
 480             if (buf)
 481                 *buf++ = cc;
 482             len++;
 483             psz++;
 484         }
 485         else
 486         {
 487             // BASE64 encoded string
 488             bool lsb;
 489             unsigned char c;
 490             unsigned int d, l;
 491             for (lsb = false, d = 0, l = 0;
 492                 (cc = utf7unb64[(unsigned char)*psz]) != 0xff; psz++)
 493             {
 494                 d <<= 6;
 495                 d += cc;
 496                 for (l += 6; l >= 8; lsb = !lsb)
 497                 {
 498                     c = (unsigned char)((d >> (l -= 8)) % 256);
 499                     if (lsb)
 500                     {
 501                         if (buf)
 502                             *buf++ |= c;
 503                         len ++;
 504                     }
 505                     else
 506                         if (buf)
 507                             *buf = (wchar_t)(c << 8);
 508                 }
 509             }
 510             if (*psz == '-')
 511                 psz++;
 512         }
 513     }
 514     if (buf && (len < n))
 515         *buf = 0;
 516     return len;
 517 }
 518
 519 //
 520 // BASE64 encoding table
 521 //
 522 static const unsigned char utf7enb64[] =
 523 {
 524     'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
 525     'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
 526     'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
 527     'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
 528     'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
 529     'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
 530     'w', 'x', 'y', 'z', '0', '1', '2', '3',
 531     '4', '5', '6', '7', '8', '9', '+', '/'
 532 };
 533
 534 //
 535 // UTF-7 encoding table
 536 //
 537 // 0 - Set D (directly encoded characters)
 538 // 1 - Set O (optional direct characters)
 539 // 2 - whitespace characters (optional)
 540 // 3 - special characters
 541 //
 542 static const unsigned char utf7encode[128] =
 543 {
 544     3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
 545     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 546     2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
 547     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
 548     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 549     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
 550     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 551     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
 552 };
 553
 554 size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 555 {
 556
 557
 558     size_t len = 0;
 559
 560     while (*psz && ((!buf) || (len < n)))
 561     {
 562         wchar_t cc = *psz++;
 563         if (cc < 0x80 && utf7encode[cc] < 1)
 564         {
 565             // plain ASCII char
 566             if (buf)
 567                 *buf++ = (char)cc;
 568             len++;
 569         }
 570 #ifndef WC_UTF16
 571         else if (((wxUint32)cc) > 0xffff)
 572         {
 573             // no surrogate pair generation (yet?)
 574             return (size_t)-1;
 575         }
 576 #endif
 577         else
 578         {
 579             if (buf)
 580                 *buf++ = '+';
 581             len++;
 582             if (cc != '+')
 583             {
 584                 // BASE64 encode string
 585                 unsigned int lsb, d, l;
 586                 for (d = 0, l = 0;; psz++)
 587                 {
 588                     for (lsb = 0; lsb < 2; lsb ++)
 589                     {
 590                         d <<= 8;
 591                         d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
 592
 593                         for (l += 8; l >= 6; )
 594                         {
 595                             l -= 6;
 596                             if (buf)
 597                                 *buf++ = utf7enb64[(d >> l) % 64];
 598                             len++;
 599                         }
 600                     }
 601                     cc = *psz;
 602                     if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
 603                         break;
 604                 }
 605                 if (l != 0)
 606                 {
 607                     if (buf)
 608                         *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
 609                     len++;
 610                 }
 611             }
 612             if (buf)
 613                 *buf++ = '-';
 614             len++;
 615         }
 616     }
 617     if (buf && (len < n))
 618         *buf = 0;
 619     return len;
 620 }
 621
 622 // ----------------------------------------------------------------------------
 623 // UTF-8
 624 // ----------------------------------------------------------------------------
 625
 626 static wxUint32 utf8_max[]=
 627     { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
 628
 629 // boundaries of the private use area we use to (temporarily) remap invalid
 630 // characters invalid in a UTF-8 encoded string
 631 const wxUint32 wxUnicodePUA = 0x100000;
 632 const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
 633
 634 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 635 {
 636     size_t len = 0;
 637
 638     while (*psz && ((!buf) || (len < n)))
 639     {
 640         const char *opsz = psz;
 641         bool invalid = false;
 642         unsigned char cc = *psz++, fc = cc;
 643         unsigned cnt;
 644         for (cnt = 0; fc & 0x80; cnt++)
 645             fc <<= 1;
 646         if (!cnt)
 647         {
 648             // plain ASCII char
 649             if (buf)
 650                 *buf++ = cc;
 651             len++;
 652
 653             // escape the escape character for octal escapes
 654             if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
 655                     && cc == '\\' && (!buf || len < n))
 656             {
 657                 if (buf)
 658                     *buf++ = cc;
 659                 len++;
 660             }
 661         }
 662         else
 663         {
 664             cnt--;
 665             if (!cnt)
 666             {
 667                 // invalid UTF-8 sequence
 668                 invalid = true;
 669             }
 670             else
 671             {
 672                 unsigned ocnt = cnt - 1;
 673                 wxUint32 res = cc & (0x3f >> cnt);
 674                 while (cnt--)
 675                 {
 676                     cc = *psz;
 677                     if ((cc & 0xC0) != 0x80)
 678                     {
 679                         // invalid UTF-8 sequence
 680                         invalid = true;
 681                         break;
 682                     }
 683                     psz++;
 684                     res = (res << 6) | (cc & 0x3f);
 685                 }
 686                 if (invalid || res <= utf8_max[ocnt])
 687                 {
 688                     // illegal UTF-8 encoding
 689                     invalid = true;
 690                 }
 691                 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
 692                         res >= wxUnicodePUA && res < wxUnicodePUAEnd)
 693                 {
 694                     // if one of our PUA characters turns up externally
 695                     // it must also be treated as an illegal sequence
 696                     // (a bit like you have to escape an escape character)
 697                     invalid = true;
 698                 }
 699                 else
 700                 {
 701 #ifdef WC_UTF16
 702                     // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 703                     size_t pa = encode_utf16(res, (wxUint16 *)buf);
 704                     if (pa == (size_t)-1)
 705                     {
 706                         invalid = true;
 707                     }
 708                     else
 709                     {
 710                         if (buf)
 711                             buf += pa;
 712                         len += pa;
 713                     }
 714 #else // !WC_UTF16
 715                     if (buf)
 716                         *buf++ = res;
 717                     len++;
 718 #endif // WC_UTF16/!WC_UTF16
 719                 }
 720             }
 721             if (invalid)
 722             {
 723                 if (m_options & MAP_INVALID_UTF8_TO_PUA)
 724                 {
 725                     while (opsz < psz && (!buf || len < n))
 726                     {
 727 #ifdef WC_UTF16
 728                         // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 729                         size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
 730                         wxASSERT(pa != (size_t)-1);
 731                         if (buf)
 732                             buf += pa;
 733                         opsz++;
 734                         len += pa;
 735 #else
 736                         if (buf)
 737                             *buf++ = wxUnicodePUA + (unsigned char)*opsz;
 738                         opsz++;
 739                         len++;
 740 #endif
 741                     }
 742                 }
 743                 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
 744                 {
 745                     while (opsz < psz && (!buf || len < n))
 746                     {
 747                         if ( buf && len + 3 < n )
 748                         {
 749                             unsigned char n = *opsz;
 750                             *buf++ = L'\\';
 751                             *buf++ = (wchar_t)( L'0' + n / 0100 );
 752                             *buf++ = (wchar_t)( L'0' + (n % 0100) / 010 );
 753                             *buf++ = (wchar_t)( L'0' + n % 010 );
 754                         }
 755                         opsz++;
 756                         len += 4;
 757                     }
 758                 }
 759                 else // MAP_INVALID_UTF8_NOT
 760                 {
 761                     return (size_t)-1;
 762                 }
 763             }
 764         }
 765     }
 766     if (buf && (len < n))
 767         *buf = 0;
 768     return len;
 769 }
 770
 771 static inline bool isoctal(wchar_t wch)
 772 {
 773     return L'0' <= wch && wch <= L'7';
 774 }
 775
 776 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 777 {
 778     size_t len = 0;
 779
 780     while (*psz && ((!buf) || (len < n)))
 781     {
 782         wxUint32 cc;
 783 #ifdef WC_UTF16
 784         // cast is ok for WC_UTF16
 785         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
 786         psz += (pa == (size_t)-1) ? 1 : pa;
 787 #else
 788         cc=(*psz++) & 0x7fffffff;
 789 #endif
 790
 791         if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
 792                 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
 793         {
 794             if (buf)
 795                 *buf++ = (char)(cc - wxUnicodePUA);
 796             len++;
 797         }
 798         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
 799                     && cc == L'\\' && psz[0] == L'\\' )
 800         {
 801             if (buf)
 802                 *buf++ = (char)cc;
 803             psz++;
 804             len++;
 805         }
 806         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
 807                     cc == L'\\' &&
 808                         isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
 809         {
 810             if (buf)
 811             {
 812                 *buf++ = (char) ((psz[0] - L'0')*0100 +
 813                                  (psz[1] - L'0')*010 +
 814                                  (psz[2] - L'0'));
 815             }
 816
 817             psz += 3;
 818             len++;
 819         }
 820         else
 821         {
 822             unsigned cnt;
 823             for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
 824             if (!cnt)
 825             {
 826                 // plain ASCII char
 827                 if (buf)
 828                     *buf++ = (char) cc;
 829                 len++;
 830             }
 831
 832             else
 833             {
 834                 len += cnt + 1;
 835                 if (buf)
 836                 {
 837                     *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
 838                     while (cnt--)
 839                         *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
 840                 }
 841             }
 842         }
 843     }
 844
 845     if (buf && (len<n))
 846         *buf = 0;
 847
 848     return len;
 849 }
 850
 851 // ----------------------------------------------------------------------------
 852 // UTF-16
 853 // ----------------------------------------------------------------------------
 854
 855 #ifdef WORDS_BIGENDIAN
 856     #define wxMBConvUTF16straight wxMBConvUTF16BE
 857     #define wxMBConvUTF16swap     wxMBConvUTF16LE
 858 #else
 859     #define wxMBConvUTF16swap     wxMBConvUTF16BE
 860     #define wxMBConvUTF16straight wxMBConvUTF16LE
 861 #endif
 862
 863
 864 #ifdef WC_UTF16
 865
 866 // copy 16bit MB to 16bit String
 867 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 868 {
 869     size_t len=0;
 870
 871     while (*(wxUint16*)psz && (!buf || len < n))
 872     {
 873         if (buf)
 874             *buf++ = *(wxUint16*)psz;
 875         len++;
 876
 877         psz += sizeof(wxUint16);
 878     }
 879     if (buf && len<n)   *buf=0;
 880
 881     return len;
 882 }
 883
 884
 885 // copy 16bit String to 16bit MB
 886 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 887 {
 888     size_t len=0;
 889
 890     while (*psz && (!buf || len < n))
 891     {
 892         if (buf)
 893         {
 894             *(wxUint16*)buf = *psz;
 895             buf += sizeof(wxUint16);
 896         }
 897         len += sizeof(wxUint16);
 898         psz++;
 899     }
 900     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 901
 902     return len;
 903 }
 904
 905
 906 // swap 16bit MB to 16bit String
 907 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 908 {
 909     size_t len=0;
 910
 911     while (*(wxUint16*)psz && (!buf || len < n))
 912     {
 913         if (buf)
 914         {
 915             ((char *)buf)[0] = psz[1];
 916             ((char *)buf)[1] = psz[0];
 917             buf++;
 918         }
 919         len++;
 920         psz += sizeof(wxUint16);
 921     }
 922     if (buf && len<n)   *buf=0;
 923
 924     return len;
 925 }
 926
 927
 928 // swap 16bit MB to 16bit String
 929 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 930 {
 931     size_t len=0;
 932
 933     while (*psz && (!buf || len < n))
 934     {
 935         if (buf)
 936         {
 937             *buf++ = ((char*)psz)[1];
 938             *buf++ = ((char*)psz)[0];
 939         }
 940         len += sizeof(wxUint16);
 941         psz++;
 942     }
 943     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 944
 945     return len;
 946 }
 947
 948
 949 #else // WC_UTF16
 950
 951
 952 // copy 16bit MB to 32bit String
 953 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 954 {
 955     size_t len=0;
 956
 957     while (*(wxUint16*)psz && (!buf || len < n))
 958     {
 959         wxUint32 cc;
 960         size_t pa=decode_utf16((wxUint16*)psz, cc);
 961         if (pa == (size_t)-1)
 962             return pa;
 963
 964         if (buf)
 965             *buf++ = cc;
 966         len++;
 967         psz += pa * sizeof(wxUint16);
 968     }
 969     if (buf && len<n)   *buf=0;
 970
 971     return len;
 972 }
 973
 974
 975 // copy 32bit String to 16bit MB
 976 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 977 {
 978     size_t len=0;
 979
 980     while (*psz && (!buf || len < n))
 981     {
 982         wxUint16 cc[2];
 983         size_t pa=encode_utf16(*psz, cc);
 984
 985         if (pa == (size_t)-1)
 986             return pa;
 987
 988         if (buf)
 989         {
 990             *(wxUint16*)buf = cc[0];
 991             buf += sizeof(wxUint16);
 992             if (pa > 1)
 993             {
 994                 *(wxUint16*)buf = cc[1];
 995                 buf += sizeof(wxUint16);
 996             }
 997         }
 998
 999         len += pa*sizeof(wxUint16);
1000         psz++;
1001     }
1002     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
1003
1004     return len;
1005 }
1006
1007
1008 // swap 16bit MB to 32bit String
1009 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1010 {
1011     size_t len=0;
1012
1013     while (*(wxUint16*)psz && (!buf || len < n))
1014     {
1015         wxUint32 cc;
1016         char tmp[4];
1017         tmp[0]=psz[1];  tmp[1]=psz[0];
1018         tmp[2]=psz[3];  tmp[3]=psz[2];
1019
1020         size_t pa=decode_utf16((wxUint16*)tmp, cc);
1021         if (pa == (size_t)-1)
1022             return pa;
1023
1024         if (buf)
1025             *buf++ = cc;
1026
1027         len++;
1028         psz += pa * sizeof(wxUint16);
1029     }
1030     if (buf && len<n)   *buf=0;
1031
1032     return len;
1033 }
1034
1035
1036 // swap 32bit String to 16bit MB
1037 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1038 {
1039     size_t len=0;
1040
1041     while (*psz && (!buf || len < n))
1042     {
1043         wxUint16 cc[2];
1044         size_t pa=encode_utf16(*psz, cc);
1045
1046         if (pa == (size_t)-1)
1047             return pa;
1048
1049         if (buf)
1050         {
1051             *buf++ = ((char*)cc)[1];
1052             *buf++ = ((char*)cc)[0];
1053             if (pa > 1)
1054             {
1055                 *buf++ = ((char*)cc)[3];
1056                 *buf++ = ((char*)cc)[2];
1057             }
1058         }
1059
1060         len += pa*sizeof(wxUint16);
1061         psz++;
1062     }
1063     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
1064
1065     return len;
1066 }
1067
1068 #endif // WC_UTF16
1069
1070
1071 // ----------------------------------------------------------------------------
1072 // UTF-32
1073 // ----------------------------------------------------------------------------
1074
1075 #ifdef WORDS_BIGENDIAN
1076 #define wxMBConvUTF32straight  wxMBConvUTF32BE
1077 #define wxMBConvUTF32swap      wxMBConvUTF32LE
1078 #else
1079 #define wxMBConvUTF32swap      wxMBConvUTF32BE
1080 #define wxMBConvUTF32straight  wxMBConvUTF32LE
1081 #endif
1082
1083
1084 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1085 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1086
1087
1088 #ifdef WC_UTF16
1089
1090 // copy 32bit MB to 16bit String
1091 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1092 {
1093     size_t len=0;
1094
1095     while (*(wxUint32*)psz && (!buf || len < n))
1096     {
1097         wxUint16 cc[2];
1098
1099         size_t pa=encode_utf16(*(wxUint32*)psz, cc);
1100         if (pa == (size_t)-1)
1101             return pa;
1102
1103         if (buf)
1104         {
1105             *buf++ = cc[0];
1106             if (pa > 1)
1107                 *buf++ = cc[1];
1108         }
1109         len += pa;
1110         psz += sizeof(wxUint32);
1111     }
1112     if (buf && len<n)   *buf=0;
1113
1114     return len;
1115 }
1116
1117
1118 // copy 16bit String to 32bit MB
1119 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1120 {
1121     size_t len=0;
1122
1123     while (*psz && (!buf || len < n))
1124     {
1125         wxUint32 cc;
1126
1127         // cast is ok for WC_UTF16
1128         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
1129         if (pa == (size_t)-1)
1130             return pa;
1131
1132         if (buf)
1133         {
1134             *(wxUint32*)buf = cc;
1135             buf += sizeof(wxUint32);
1136         }
1137         len += sizeof(wxUint32);
1138         psz += pa;
1139     }
1140
1141     if (buf && len<=n-sizeof(wxUint32))
1142         *(wxUint32*)buf=0;
1143
1144     return len;
1145 }
1146
1147
1148
1149 // swap 32bit MB to 16bit String
1150 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1151 {
1152     size_t len=0;
1153
1154     while (*(wxUint32*)psz && (!buf || len < n))
1155     {
1156         char tmp[4];
1157         tmp[0] = psz[3];   tmp[1] = psz[2];
1158         tmp[2] = psz[1];   tmp[3] = psz[0];
1159
1160
1161         wxUint16 cc[2];
1162
1163         size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
1164         if (pa == (size_t)-1)
1165             return pa;
1166
1167         if (buf)
1168         {
1169             *buf++ = cc[0];
1170             if (pa > 1)
1171                 *buf++ = cc[1];
1172         }
1173         len += pa;
1174         psz += sizeof(wxUint32);
1175     }
1176
1177     if (buf && len<n)
1178         *buf=0;
1179
1180     return len;
1181 }
1182
1183
1184 // swap 16bit String to 32bit MB
1185 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1186 {
1187     size_t len=0;
1188
1189     while (*psz && (!buf || len < n))
1190     {
1191         char cc[4];
1192
1193         // cast is ok for WC_UTF16
1194         size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
1195         if (pa == (size_t)-1)
1196             return pa;
1197
1198         if (buf)
1199         {
1200             *buf++ = cc[3];
1201             *buf++ = cc[2];
1202             *buf++ = cc[1];
1203             *buf++ = cc[0];
1204         }
1205         len += sizeof(wxUint32);
1206         psz += pa;
1207     }
1208
1209     if (buf && len<=n-sizeof(wxUint32))
1210         *(wxUint32*)buf=0;
1211
1212     return len;
1213 }
1214
1215 #else // WC_UTF16
1216
1217
1218 // copy 32bit MB to 32bit String
1219 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1220 {
1221     size_t len=0;
1222
1223     while (*(wxUint32*)psz && (!buf || len < n))
1224     {
1225         if (buf)
1226             *buf++ = *(wxUint32*)psz;
1227         len++;
1228         psz += sizeof(wxUint32);
1229     }
1230
1231     if (buf && len<n)
1232         *buf=0;
1233
1234     return len;
1235 }
1236
1237
1238 // copy 32bit String to 32bit MB
1239 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1240 {
1241     size_t len=0;
1242
1243     while (*psz && (!buf || len < n))
1244     {
1245         if (buf)
1246         {
1247             *(wxUint32*)buf = *psz;
1248             buf += sizeof(wxUint32);
1249         }
1250
1251         len += sizeof(wxUint32);
1252         psz++;
1253     }
1254
1255     if (buf && len<=n-sizeof(wxUint32))
1256         *(wxUint32*)buf=0;
1257
1258     return len;
1259 }
1260
1261
1262 // swap 32bit MB to 32bit String
1263 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1264 {
1265     size_t len=0;
1266
1267     while (*(wxUint32*)psz && (!buf || len < n))
1268     {
1269         if (buf)
1270         {
1271             ((char *)buf)[0] = psz[3];
1272             ((char *)buf)[1] = psz[2];
1273             ((char *)buf)[2] = psz[1];
1274             ((char *)buf)[3] = psz[0];
1275             buf++;
1276         }
1277         len++;
1278         psz += sizeof(wxUint32);
1279     }
1280
1281     if (buf && len<n)
1282         *buf=0;
1283
1284     return len;
1285 }
1286
1287
1288 // swap 32bit String to 32bit MB
1289 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1290 {
1291     size_t len=0;
1292
1293     while (*psz && (!buf || len < n))
1294     {
1295         if (buf)
1296         {
1297             *buf++ = ((char *)psz)[3];
1298             *buf++ = ((char *)psz)[2];
1299             *buf++ = ((char *)psz)[1];
1300             *buf++ = ((char *)psz)[0];
1301         }
1302         len += sizeof(wxUint32);
1303         psz++;
1304     }
1305
1306     if (buf && len<=n-sizeof(wxUint32))
1307         *(wxUint32*)buf=0;
1308
1309     return len;
1310 }
1311
1312
1313 #endif // WC_UTF16
1314
1315
1316 // ============================================================================
1317 // The classes doing conversion using the iconv_xxx() functions
1318 // ============================================================================
1319
1320 #ifdef HAVE_ICONV
1321
1322 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1323 //     E2BIG if output buffer is _exactly_ as big as needed. Such case is
1324 //     (unless there's yet another bug in glibc) the only case when iconv()
1325 //     returns with (size_t)-1 (which means error) and says there are 0 bytes
1326 //     left in the input buffer -- when _real_ error occurs,
1327 //     bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1328 //     iconv() failure.
1329 //     [This bug does not appear in glibc 2.2.]
1330 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1331 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1332                                      (errno != E2BIG || bufLeft != 0))
1333 #else
1334 #define ICONV_FAILED(cres, bufLeft)  (cres == (size_t)-1)
1335 #endif
1336
1337 #define ICONV_CHAR_CAST(x)  ((ICONV_CONST char **)(x))
1338
1339 // ----------------------------------------------------------------------------
1340 // wxMBConv_iconv: encapsulates an iconv character set
1341 // ----------------------------------------------------------------------------
1342
1343 class wxMBConv_iconv : public wxMBConv
1344 {
1345 public:
1346     wxMBConv_iconv(const wxChar *name);
1347     virtual ~wxMBConv_iconv();
1348
1349     virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1350     virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
1351
1352     bool IsOk() const
1353         { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
1354
1355 protected:
1356     // the iconv handlers used to translate from multibyte to wide char and in
1357     // the other direction
1358     iconv_t m2w,
1359             w2m;
1360 #if wxUSE_THREADS
1361     // guards access to m2w and w2m objects
1362     wxMutex m_iconvMutex;
1363 #endif
1364
1365 private:
1366     // the name (for iconv_open()) of a wide char charset -- if none is
1367     // available on this machine, it will remain NULL
1368     static const char *ms_wcCharsetName;
1369
1370     // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1371     // different endian-ness than the native one
1372     static bool ms_wcNeedsSwap;
1373 };
1374
1375 const char *wxMBConv_iconv::ms_wcCharsetName = NULL;
1376 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
1377
1378 wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
1379 {
1380     // Do it the hard way
1381     char cname[100];
1382     for (size_t i = 0; i < wxStrlen(name)+1; i++)
1383         cname[i] = (char) name[i];
1384
1385     // check for charset that represents wchar_t:
1386     if (ms_wcCharsetName == NULL)
1387     {
1388         ms_wcNeedsSwap = false;
1389
1390         // try charset with explicit bytesex info (e.g. "UCS-4LE"):
1391         ms_wcCharsetName = WC_NAME_BEST;
1392         m2w = iconv_open(ms_wcCharsetName, cname);
1393
1394         if (m2w == (iconv_t)-1)
1395         {
1396             // try charset w/o bytesex info (e.g. "UCS4")
1397             // and check for bytesex ourselves:
1398             ms_wcCharsetName = WC_NAME;
1399             m2w = iconv_open(ms_wcCharsetName, cname);
1400
1401             // last bet, try if it knows WCHAR_T pseudo-charset
1402             if (m2w == (iconv_t)-1)
1403             {
1404                 ms_wcCharsetName = "WCHAR_T";
1405                 m2w = iconv_open(ms_wcCharsetName, cname);
1406             }
1407
1408             if (m2w != (iconv_t)-1)
1409             {
1410                 char    buf[2], *bufPtr;
1411                 wchar_t wbuf[2], *wbufPtr;
1412                 size_t  insz, outsz;
1413                 size_t  res;
1414
1415                 buf[0] = 'A';
1416                 buf[1] = 0;
1417                 wbuf[0] = 0;
1418                 insz = 2;
1419                 outsz = SIZEOF_WCHAR_T * 2;
1420                 wbufPtr = wbuf;
1421                 bufPtr = buf;
1422
1423                 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1424                             (char**)&wbufPtr, &outsz);
1425
1426                 if (ICONV_FAILED(res, insz))
1427                 {
1428                     ms_wcCharsetName = NULL;
1429                     wxLogLastError(wxT("iconv"));
1430                     wxLogError(_("Conversion to charset '%s' doesn't work."), name);
1431                 }
1432                 else
1433                 {
1434                     ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1435                 }
1436             }
1437             else
1438             {
1439                 ms_wcCharsetName = NULL;
1440
1441                 // VS: we must not output an error here, since wxWidgets will safely
1442                 //     fall back to using wxEncodingConverter.
1443                 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
1444                 //wxLogError(
1445             }
1446         }
1447         wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
1448     }
1449     else // we already have ms_wcCharsetName
1450     {
1451         m2w = iconv_open(ms_wcCharsetName, cname);
1452     }
1453
1454     // NB: don't ever pass NULL to iconv_open(), it may crash!
1455     if ( ms_wcCharsetName )
1456     {
1457         w2m = iconv_open( cname, ms_wcCharsetName);
1458     }
1459     else
1460     {
1461         w2m = (iconv_t)-1;
1462     }
1463 }
1464
1465 wxMBConv_iconv::~wxMBConv_iconv()
1466 {
1467     if ( m2w != (iconv_t)-1 )
1468         iconv_close(m2w);
1469     if ( w2m != (iconv_t)-1 )
1470         iconv_close(w2m);
1471 }
1472
1473 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1474 {
1475 #if wxUSE_THREADS
1476     // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1477     //     Unfortunately there is a couple of global wxCSConv objects such as
1478     //     wxConvLocal that are used all over wx code, so we have to make sure
1479     //     the handle is used by at most one thread at the time. Otherwise
1480     //     only a few wx classes would be safe to use from non-main threads
1481     //     as MB<->WC conversion would fail "randomly".
1482     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1483 #endif
1484
1485     size_t inbuf = strlen(psz);
1486     size_t outbuf = n * SIZEOF_WCHAR_T;
1487     size_t res, cres;
1488     // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1489     wchar_t *bufPtr = buf;
1490     const char *pszPtr = psz;
1491
1492     if (buf)
1493     {
1494         // have destination buffer, convert there
1495         cres = iconv(m2w,
1496                      ICONV_CHAR_CAST(&pszPtr), &inbuf,
1497                      (char**)&bufPtr, &outbuf);
1498         res = n - (outbuf / SIZEOF_WCHAR_T);
1499
1500         if (ms_wcNeedsSwap)
1501         {
1502             // convert to native endianness
1503             WC_BSWAP(buf /* _not_ bufPtr */, res)
1504         }
1505
1506         // NB: iconv was given only strlen(psz) characters on input, and so
1507         //     it couldn't convert the trailing zero. Let's do it ourselves
1508         //     if there's some room left for it in the output buffer.
1509         if (res < n)
1510             buf[res] = 0;
1511     }
1512     else
1513     {
1514         // no destination buffer... convert using temp buffer
1515         // to calculate destination buffer requirement
1516         wchar_t tbuf[8];
1517         res = 0;
1518         do {
1519             bufPtr = tbuf;
1520             outbuf = 8*SIZEOF_WCHAR_T;
1521
1522             cres = iconv(m2w,
1523                          ICONV_CHAR_CAST(&pszPtr), &inbuf,
1524                          (char**)&bufPtr, &outbuf );
1525
1526             res += 8-(outbuf/SIZEOF_WCHAR_T);
1527         } while ((cres==(size_t)-1) && (errno==E2BIG));
1528     }
1529
1530     if (ICONV_FAILED(cres, inbuf))
1531     {
1532         //VS: it is ok if iconv fails, hence trace only
1533         wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1534         return (size_t)-1;
1535     }
1536
1537     return res;
1538 }
1539
1540 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1541 {
1542 #if wxUSE_THREADS
1543     // NB: explained in MB2WC
1544     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1545 #endif
1546
1547     size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
1548     size_t outbuf = n;
1549     size_t res, cres;
1550
1551     wchar_t *tmpbuf = 0;
1552
1553     if (ms_wcNeedsSwap)
1554     {
1555         // need to copy to temp buffer to switch endianness
1556         // this absolutely doesn't rock!
1557         // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1558         //  could be in read-only memory, or be accessed in some other thread)
1559         tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
1560         memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
1561         WC_BSWAP(tmpbuf, inbuf)
1562         psz=tmpbuf;
1563     }
1564
1565     if (buf)
1566     {
1567         // have destination buffer, convert there
1568         cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1569
1570         res = n-outbuf;
1571
1572         // NB: iconv was given only wcslen(psz) characters on input, and so
1573         //     it couldn't convert the trailing zero. Let's do it ourselves
1574         //     if there's some room left for it in the output buffer.
1575         if (res < n)
1576             buf[0] = 0;
1577     }
1578     else
1579     {
1580         // no destination buffer... convert using temp buffer
1581         // to calculate destination buffer requirement
1582         char tbuf[16];
1583         res = 0;
1584         do {
1585             buf = tbuf; outbuf = 16;
1586
1587             cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1588
1589             res += 16 - outbuf;
1590         } while ((cres==(size_t)-1) && (errno==E2BIG));
1591     }
1592
1593     if (ms_wcNeedsSwap)
1594     {
1595         free(tmpbuf);
1596     }
1597
1598     if (ICONV_FAILED(cres, inbuf))
1599     {
1600         //VS: it is ok if iconv fails, hence trace only
1601         wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1602         return (size_t)-1;
1603     }
1604
1605     return res;
1606 }
1607
1608 #endif // HAVE_ICONV
1609
1610
1611 // ============================================================================
1612 // Win32 conversion classes
1613 // ============================================================================
1614
1615 #ifdef wxHAVE_WIN32_MB2WC
1616
1617 // from utils.cpp
1618 #if wxUSE_FONTMAP
1619 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1620 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1621 #endif
1622
1623 class wxMBConv_win32 : public wxMBConv
1624 {
1625 public:
1626     wxMBConv_win32()
1627     {
1628         m_CodePage = CP_ACP;
1629     }
1630
1631 #if wxUSE_FONTMAP
1632     wxMBConv_win32(const wxChar* name)
1633     {
1634         m_CodePage = wxCharsetToCodepage(name);
1635     }
1636
1637     wxMBConv_win32(wxFontEncoding encoding)
1638     {
1639         m_CodePage = wxEncodingToCodepage(encoding);
1640     }
1641 #endif
1642
1643     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1644     {
1645         // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1646         // the behaviour is not compatible with the Unix version (using iconv)
1647         // and break the library itself, e.g. wxTextInputStream::NextChar()
1648         // wouldn't work if reading an incomplete MB char didn't result in an
1649         // error
1650         //
1651         // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1652         // an error (tested under Windows Server 2003) and apparently it is
1653         // done on purpose, i.e. the function accepts any input in this case
1654         // and although I'd prefer to return error on ill-formed output, our
1655         // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1656         // explicitly ill-formed according to RFC 2152) neither so we don't
1657         // even have any fallback here...
1658         int flags = m_CodePage == CP_UTF7 ? 0 : MB_ERR_INVALID_CHARS;
1659
1660         const size_t len = ::MultiByteToWideChar
1661                              (
1662                                 m_CodePage,     // code page
1663                                 flags,          // flags: fall on error
1664                                 psz,            // input string
1665                                 -1,             // its length (NUL-terminated)
1666                                 buf,            // output string
1667                                 buf ? n : 0     // size of output buffer
1668                              );
1669
1670         // note that it returns count of written chars for buf != NULL and size
1671         // of the needed buffer for buf == NULL so in either case the length of
1672         // the string (which never includes the terminating NUL) is one less
1673         return len ? len - 1 : (size_t)-1;
1674     }
1675
1676     size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
1677     {
1678         /*
1679             we have a problem here: by default, WideCharToMultiByte() may
1680             replace characters unrepresentable in the target code page with bad
1681             quality approximations such as turning "1/2" symbol (U+00BD) into
1682             "1" for the code pages which don't have it and we, obviously, want
1683             to avoid this at any price
1684
1685             the trouble is that this function does it _silently_, i.e. it won't
1686             even tell us whether it did or not... Win98/2000 and higher provide
1687             WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1688             we have to resort to a round trip, i.e. check that converting back
1689             results in the same string -- this is, of course, expensive but
1690             otherwise we simply can't be sure to not garble the data.
1691          */
1692
1693         // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1694         // it doesn't work with CJK encodings (which we test for rather roughly
1695         // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1696         // supporting it
1697         BOOL usedDef wxDUMMY_INITIALIZE(false);
1698         BOOL *pUsedDef;
1699         int flags;
1700         if ( CanUseNoBestFit() && m_CodePage < 50000 )
1701         {
1702             // it's our lucky day
1703             flags = WC_NO_BEST_FIT_CHARS;
1704             pUsedDef = &usedDef;
1705         }
1706         else // old system or unsupported encoding
1707         {
1708             flags = 0;
1709             pUsedDef = NULL;
1710         }
1711
1712         const size_t len = ::WideCharToMultiByte
1713                              (
1714                                 m_CodePage,     // code page
1715                                 flags,          // either none or no best fit
1716                                 pwz,            // input string
1717                                 -1,             // it is (wide) NUL-terminated
1718                                 buf,            // output buffer
1719                                 buf ? n : 0,    // and its size
1720                                 NULL,           // default "replacement" char
1721                                 pUsedDef        // [out] was it used?
1722                              );
1723
1724         if ( !len )
1725         {
1726             // function totally failed
1727             return (size_t)-1;
1728         }
1729
1730         // if we were really converting, check if we succeeded
1731         if ( buf )
1732         {
1733             if ( flags )
1734             {
1735                 // check if the conversion failed, i.e. if any replacements
1736                 // were done
1737                 if ( usedDef )
1738                     return (size_t)-1;
1739             }
1740             else // we must resort to double tripping...
1741             {
1742                 wxWCharBuffer wcBuf(n);
1743                 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1744                         wcscmp(wcBuf, pwz) != 0 )
1745                 {
1746                     // we didn't obtain the same thing we started from, hence
1747                     // the conversion was lossy and we consider that it failed
1748                     return (size_t)-1;
1749                 }
1750             }
1751         }
1752
1753         // see the comment above for the reason of "len - 1"
1754         return len - 1;
1755     }
1756
1757     bool IsOk() const { return m_CodePage != -1; }
1758
1759 private:
1760     static bool CanUseNoBestFit()
1761     {
1762         static int s_isWin98Or2k = -1;
1763
1764         if ( s_isWin98Or2k == -1 )
1765         {
1766             int verMaj, verMin;
1767             switch ( wxGetOsVersion(&verMaj, &verMin) )
1768             {
1769                 case wxWIN95:
1770                     s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1771                     break;
1772
1773                 case wxWINDOWS_NT:
1774                     s_isWin98Or2k = verMaj >= 5;
1775                     break;
1776
1777                 default:
1778                     // unknown, be conseravtive by default
1779                     s_isWin98Or2k = 0;
1780             }
1781
1782             wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1783         }
1784
1785         return s_isWin98Or2k == 1;
1786     }
1787
1788     long m_CodePage;
1789 };
1790
1791 #endif // wxHAVE_WIN32_MB2WC
1792
1793 // ============================================================================
1794 // Cocoa conversion classes
1795 // ============================================================================
1796
1797 #if defined(__WXCOCOA__)
1798
1799 // RN:  There is no UTF-32 support in either Core Foundation or
1800 // Cocoa.  Strangely enough, internally Core Foundation uses
1801 // UTF 32 internally quite a bit - its just not public (yet).
1802
1803 #include <CoreFoundation/CFString.h>
1804 #include <CoreFoundation/CFStringEncodingExt.h>
1805
1806 CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
1807 {
1808     CFStringEncoding enc = kCFStringEncodingInvalidId ;
1809     if ( encoding == wxFONTENCODING_DEFAULT )
1810     {
1811         enc = CFStringGetSystemEncoding();
1812     }
1813     else switch( encoding)
1814     {
1815         case wxFONTENCODING_ISO8859_1 :
1816             enc = kCFStringEncodingISOLatin1 ;
1817             break ;
1818         case wxFONTENCODING_ISO8859_2 :
1819             enc = kCFStringEncodingISOLatin2;
1820             break ;
1821         case wxFONTENCODING_ISO8859_3 :
1822             enc = kCFStringEncodingISOLatin3 ;
1823             break ;
1824         case wxFONTENCODING_ISO8859_4 :
1825             enc = kCFStringEncodingISOLatin4;
1826             break ;
1827         case wxFONTENCODING_ISO8859_5 :
1828             enc = kCFStringEncodingISOLatinCyrillic;
1829             break ;
1830         case wxFONTENCODING_ISO8859_6 :
1831             enc = kCFStringEncodingISOLatinArabic;
1832             break ;
1833         case wxFONTENCODING_ISO8859_7 :
1834             enc = kCFStringEncodingISOLatinGreek;
1835             break ;
1836         case wxFONTENCODING_ISO8859_8 :
1837             enc = kCFStringEncodingISOLatinHebrew;
1838             break ;
1839         case wxFONTENCODING_ISO8859_9 :
1840             enc = kCFStringEncodingISOLatin5;
1841             break ;
1842         case wxFONTENCODING_ISO8859_10 :
1843             enc = kCFStringEncodingISOLatin6;
1844             break ;
1845         case wxFONTENCODING_ISO8859_11 :
1846             enc = kCFStringEncodingISOLatinThai;
1847             break ;
1848         case wxFONTENCODING_ISO8859_13 :
1849             enc = kCFStringEncodingISOLatin7;
1850             break ;
1851         case wxFONTENCODING_ISO8859_14 :
1852             enc = kCFStringEncodingISOLatin8;
1853             break ;
1854         case wxFONTENCODING_ISO8859_15 :
1855             enc = kCFStringEncodingISOLatin9;
1856             break ;
1857
1858         case wxFONTENCODING_KOI8 :
1859             enc = kCFStringEncodingKOI8_R;
1860             break ;
1861         case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
1862             enc = kCFStringEncodingDOSRussian;
1863             break ;
1864
1865 //      case wxFONTENCODING_BULGARIAN :
1866 //          enc = ;
1867 //          break ;
1868
1869         case wxFONTENCODING_CP437 :
1870             enc =kCFStringEncodingDOSLatinUS ;
1871             break ;
1872         case wxFONTENCODING_CP850 :
1873             enc = kCFStringEncodingDOSLatin1;
1874             break ;
1875         case wxFONTENCODING_CP852 :
1876             enc = kCFStringEncodingDOSLatin2;
1877             break ;
1878         case wxFONTENCODING_CP855 :
1879             enc = kCFStringEncodingDOSCyrillic;
1880             break ;
1881         case wxFONTENCODING_CP866 :
1882             enc =kCFStringEncodingDOSRussian ;
1883             break ;
1884         case wxFONTENCODING_CP874 :
1885             enc = kCFStringEncodingDOSThai;
1886             break ;
1887         case wxFONTENCODING_CP932 :
1888             enc = kCFStringEncodingDOSJapanese;
1889             break ;
1890         case wxFONTENCODING_CP936 :
1891             enc =kCFStringEncodingDOSChineseSimplif ;
1892             break ;
1893         case wxFONTENCODING_CP949 :
1894             enc = kCFStringEncodingDOSKorean;
1895             break ;
1896         case wxFONTENCODING_CP950 :
1897             enc = kCFStringEncodingDOSChineseTrad;
1898             break ;
1899         case wxFONTENCODING_CP1250 :
1900             enc = kCFStringEncodingWindowsLatin2;
1901             break ;
1902         case wxFONTENCODING_CP1251 :
1903             enc =kCFStringEncodingWindowsCyrillic ;
1904             break ;
1905         case wxFONTENCODING_CP1252 :
1906             enc =kCFStringEncodingWindowsLatin1 ;
1907             break ;
1908         case wxFONTENCODING_CP1253 :
1909             enc = kCFStringEncodingWindowsGreek;
1910             break ;
1911         case wxFONTENCODING_CP1254 :
1912             enc = kCFStringEncodingWindowsLatin5;
1913             break ;
1914         case wxFONTENCODING_CP1255 :
1915             enc =kCFStringEncodingWindowsHebrew ;
1916             break ;
1917         case wxFONTENCODING_CP1256 :
1918             enc =kCFStringEncodingWindowsArabic ;
1919             break ;
1920         case wxFONTENCODING_CP1257 :
1921             enc = kCFStringEncodingWindowsBalticRim;
1922             break ;
1923 //   This only really encodes to UTF7 (if that) evidently
1924 //        case wxFONTENCODING_UTF7 :
1925 //            enc = kCFStringEncodingNonLossyASCII ;
1926 //            break ;
1927         case wxFONTENCODING_UTF8 :
1928             enc = kCFStringEncodingUTF8 ;
1929             break ;
1930         case wxFONTENCODING_EUC_JP :
1931             enc = kCFStringEncodingEUC_JP;
1932             break ;
1933         case wxFONTENCODING_UTF16 :
1934             enc = kCFStringEncodingUnicode ;
1935             break ;
1936         case wxFONTENCODING_MACROMAN :
1937             enc = kCFStringEncodingMacRoman ;
1938             break ;
1939         case wxFONTENCODING_MACJAPANESE :
1940             enc = kCFStringEncodingMacJapanese ;
1941             break ;
1942         case wxFONTENCODING_MACCHINESETRAD :
1943             enc = kCFStringEncodingMacChineseTrad ;
1944             break ;
1945         case wxFONTENCODING_MACKOREAN :
1946             enc = kCFStringEncodingMacKorean ;
1947             break ;
1948         case wxFONTENCODING_MACARABIC :
1949             enc = kCFStringEncodingMacArabic ;
1950             break ;
1951         case wxFONTENCODING_MACHEBREW :
1952             enc = kCFStringEncodingMacHebrew ;
1953             break ;
1954         case wxFONTENCODING_MACGREEK :
1955             enc = kCFStringEncodingMacGreek ;
1956             break ;
1957         case wxFONTENCODING_MACCYRILLIC :
1958             enc = kCFStringEncodingMacCyrillic ;
1959             break ;
1960         case wxFONTENCODING_MACDEVANAGARI :
1961             enc = kCFStringEncodingMacDevanagari ;
1962             break ;
1963         case wxFONTENCODING_MACGURMUKHI :
1964             enc = kCFStringEncodingMacGurmukhi ;
1965             break ;
1966         case wxFONTENCODING_MACGUJARATI :
1967             enc = kCFStringEncodingMacGujarati ;
1968             break ;
1969         case wxFONTENCODING_MACORIYA :
1970             enc = kCFStringEncodingMacOriya ;
1971             break ;
1972         case wxFONTENCODING_MACBENGALI :
1973             enc = kCFStringEncodingMacBengali ;
1974             break ;
1975         case wxFONTENCODING_MACTAMIL :
1976             enc = kCFStringEncodingMacTamil ;
1977             break ;
1978         case wxFONTENCODING_MACTELUGU :
1979             enc = kCFStringEncodingMacTelugu ;
1980             break ;
1981         case wxFONTENCODING_MACKANNADA :
1982             enc = kCFStringEncodingMacKannada ;
1983             break ;
1984         case wxFONTENCODING_MACMALAJALAM :
1985             enc = kCFStringEncodingMacMalayalam ;
1986             break ;
1987         case wxFONTENCODING_MACSINHALESE :
1988             enc = kCFStringEncodingMacSinhalese ;
1989             break ;
1990         case wxFONTENCODING_MACBURMESE :
1991             enc = kCFStringEncodingMacBurmese ;
1992             break ;
1993         case wxFONTENCODING_MACKHMER :
1994             enc = kCFStringEncodingMacKhmer ;
1995             break ;
1996         case wxFONTENCODING_MACTHAI :
1997             enc = kCFStringEncodingMacThai ;
1998             break ;
1999         case wxFONTENCODING_MACLAOTIAN :
2000             enc = kCFStringEncodingMacLaotian ;
2001             break ;
2002         case wxFONTENCODING_MACGEORGIAN :
2003             enc = kCFStringEncodingMacGeorgian ;
2004             break ;
2005         case wxFONTENCODING_MACARMENIAN :
2006             enc = kCFStringEncodingMacArmenian ;
2007             break ;
2008         case wxFONTENCODING_MACCHINESESIMP :
2009             enc = kCFStringEncodingMacChineseSimp ;
2010             break ;
2011         case wxFONTENCODING_MACTIBETAN :
2012             enc = kCFStringEncodingMacTibetan ;
2013             break ;
2014         case wxFONTENCODING_MACMONGOLIAN :
2015             enc = kCFStringEncodingMacMongolian ;
2016             break ;
2017         case wxFONTENCODING_MACETHIOPIC :
2018             enc = kCFStringEncodingMacEthiopic ;
2019             break ;
2020         case wxFONTENCODING_MACCENTRALEUR :
2021             enc = kCFStringEncodingMacCentralEurRoman ;
2022             break ;
2023         case wxFONTENCODING_MACVIATNAMESE :
2024             enc = kCFStringEncodingMacVietnamese ;
2025             break ;
2026         case wxFONTENCODING_MACARABICEXT :
2027             enc = kCFStringEncodingMacExtArabic ;
2028             break ;
2029         case wxFONTENCODING_MACSYMBOL :
2030             enc = kCFStringEncodingMacSymbol ;
2031             break ;
2032         case wxFONTENCODING_MACDINGBATS :
2033             enc = kCFStringEncodingMacDingbats ;
2034             break ;
2035         case wxFONTENCODING_MACTURKISH :
2036             enc = kCFStringEncodingMacTurkish ;
2037             break ;
2038         case wxFONTENCODING_MACCROATIAN :
2039             enc = kCFStringEncodingMacCroatian ;
2040             break ;
2041         case wxFONTENCODING_MACICELANDIC :
2042             enc = kCFStringEncodingMacIcelandic ;
2043             break ;
2044         case wxFONTENCODING_MACROMANIAN :
2045             enc = kCFStringEncodingMacRomanian ;
2046             break ;
2047         case wxFONTENCODING_MACCELTIC :
2048             enc = kCFStringEncodingMacCeltic ;
2049             break ;
2050         case wxFONTENCODING_MACGAELIC :
2051             enc = kCFStringEncodingMacGaelic ;
2052             break ;
2053 //      case wxFONTENCODING_MACKEYBOARD :
2054 //          enc = kCFStringEncodingMacKeyboardGlyphs ;
2055 //          break ;
2056         default :
2057             // because gcc is picky
2058             break ;
2059     } ;
2060     return enc ;
2061 }
2062
2063 class wxMBConv_cocoa : public wxMBConv
2064 {
2065 public:
2066     wxMBConv_cocoa()
2067     {
2068         Init(CFStringGetSystemEncoding()) ;
2069     }
2070
2071 #if wxUSE_FONTMAP
2072     wxMBConv_cocoa(const wxChar* name)
2073     {
2074         Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2075     }
2076 #endif
2077
2078     wxMBConv_cocoa(wxFontEncoding encoding)
2079     {
2080         Init( wxCFStringEncFromFontEnc(encoding) );
2081     }
2082
2083     ~wxMBConv_cocoa()
2084     {
2085     }
2086
2087     void Init( CFStringEncoding encoding)
2088     {
2089         m_encoding = encoding ;
2090     }
2091
2092     size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2093     {
2094         wxASSERT(szUnConv);
2095
2096         CFStringRef theString = CFStringCreateWithBytes (
2097                                                 NULL, //the allocator
2098                                                 (const UInt8*)szUnConv,
2099                                                 strlen(szUnConv),
2100                                                 m_encoding,
2101                                                 false //no BOM/external representation
2102                                                 );
2103
2104         wxASSERT(theString);
2105
2106         size_t nOutLength = CFStringGetLength(theString);
2107
2108         if (szOut == NULL)
2109         {
2110             CFRelease(theString);
2111             return nOutLength;
2112         }
2113
2114         CFRange theRange = { 0, nOutSize };
2115
2116 #if SIZEOF_WCHAR_T == 4
2117         UniChar* szUniCharBuffer = new UniChar[nOutSize];
2118 #endif
2119
2120         CFStringGetCharacters(theString, theRange, szUniCharBuffer);
2121
2122         CFRelease(theString);
2123
2124         szUniCharBuffer[nOutLength] = '\0' ;
2125
2126 #if SIZEOF_WCHAR_T == 4
2127         wxMBConvUTF16 converter ;
2128         converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
2129         delete[] szUniCharBuffer;
2130 #endif
2131
2132         return nOutLength;
2133     }
2134
2135     size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2136     {
2137         wxASSERT(szUnConv);
2138
2139         size_t nRealOutSize;
2140         size_t nBufSize = wxWcslen(szUnConv);
2141         UniChar* szUniBuffer = (UniChar*) szUnConv;
2142
2143 #if SIZEOF_WCHAR_T == 4
2144         wxMBConvUTF16BE converter ;
2145         nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
2146         szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
2147         converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
2148         nBufSize /= sizeof(UniChar);
2149 #endif
2150
2151         CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2152                                 NULL, //allocator
2153                                 szUniBuffer,
2154                                 nBufSize,
2155                                 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
2156                             );
2157
2158         wxASSERT(theString);
2159
2160         //Note that CER puts a BOM when converting to unicode
2161         //so we  check and use getchars instead in that case
2162         if (m_encoding == kCFStringEncodingUnicode)
2163         {
2164             if (szOut != NULL)
2165                 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
2166
2167             nRealOutSize = CFStringGetLength(theString) + 1;
2168         }
2169         else
2170         {
2171             CFStringGetBytes(
2172                 theString,
2173                 CFRangeMake(0, CFStringGetLength(theString)),
2174                 m_encoding,
2175                 0, //what to put in characters that can't be converted -
2176                     //0 tells CFString to return NULL if it meets such a character
2177                 false, //not an external representation
2178                 (UInt8*) szOut,
2179                 nOutSize,
2180                 (CFIndex*) &nRealOutSize
2181                         );
2182         }
2183
2184         CFRelease(theString);
2185
2186 #if SIZEOF_WCHAR_T == 4
2187         delete[] szUniBuffer;
2188 #endif
2189
2190         return  nRealOutSize - 1;
2191     }
2192
2193     bool IsOk() const
2194     {
2195         return m_encoding != kCFStringEncodingInvalidId &&
2196               CFStringIsEncodingAvailable(m_encoding);
2197     }
2198
2199 private:
2200     CFStringEncoding m_encoding ;
2201 };
2202
2203 #endif // defined(__WXCOCOA__)
2204
2205 // ============================================================================
2206 // Mac conversion classes
2207 // ============================================================================
2208
2209 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2210
2211 class wxMBConv_mac : public wxMBConv
2212 {
2213 public:
2214     wxMBConv_mac()
2215     {
2216         Init(CFStringGetSystemEncoding()) ;
2217     }
2218
2219 #if wxUSE_FONTMAP
2220     wxMBConv_mac(const wxChar* name)
2221     {
2222         Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2223     }
2224 #endif
2225
2226     wxMBConv_mac(wxFontEncoding encoding)
2227     {
2228         Init( wxMacGetSystemEncFromFontEnc(encoding) );
2229     }
2230
2231     ~wxMBConv_mac()
2232     {
2233         OSStatus status = noErr ;
2234         status = TECDisposeConverter(m_MB2WC_converter);
2235         status = TECDisposeConverter(m_WC2MB_converter);
2236     }
2237
2238
2239     void Init( TextEncodingBase encoding)
2240     {
2241         OSStatus status = noErr ;
2242         m_char_encoding = encoding ;
2243         m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2244
2245         status = TECCreateConverter(&m_MB2WC_converter,
2246                                     m_char_encoding,
2247                                     m_unicode_encoding);
2248         status = TECCreateConverter(&m_WC2MB_converter,
2249                                     m_unicode_encoding,
2250                                     m_char_encoding);
2251     }
2252
2253     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2254     {
2255         OSStatus status = noErr ;
2256         ByteCount byteOutLen ;
2257         ByteCount byteInLen = strlen(psz) ;
2258         wchar_t *tbuf = NULL ;
2259         UniChar* ubuf = NULL ;
2260         size_t res = 0 ;
2261
2262         if (buf == NULL)
2263         {
2264             //apple specs say at least 32
2265             n = wxMax( 32 , byteInLen ) ;
2266             tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2267         }
2268         ByteCount byteBufferLen = n * sizeof( UniChar ) ;
2269 #if SIZEOF_WCHAR_T == 4
2270         ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
2271 #else
2272         ubuf = (UniChar*) (buf ? buf : tbuf) ;
2273 #endif
2274         status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2275           (TextPtr) ubuf , byteBufferLen, &byteOutLen);
2276 #if SIZEOF_WCHAR_T == 4
2277         // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2278         // is not properly terminated we get random characters at the end
2279         ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
2280         wxMBConvUTF16BE converter ;
2281         res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2282         free( ubuf ) ;
2283 #else
2284         res = byteOutLen / sizeof( UniChar ) ;
2285 #endif
2286         if ( buf == NULL )
2287              free(tbuf) ;
2288
2289         if ( buf  && res < n)
2290             buf[res] = 0;
2291
2292         return res ;
2293     }
2294
2295     size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2296     {
2297         OSStatus status = noErr ;
2298         ByteCount byteOutLen ;
2299         ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2300
2301         char *tbuf = NULL ;
2302
2303         if (buf == NULL)
2304         {
2305             //apple specs say at least 32
2306             n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2307             tbuf = (char*) malloc( n ) ;
2308         }
2309
2310         ByteCount byteBufferLen = n ;
2311         UniChar* ubuf = NULL ;
2312 #if SIZEOF_WCHAR_T == 4
2313         wxMBConvUTF16BE converter ;
2314         size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2315         byteInLen = unicharlen ;
2316         ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2317         converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
2318 #else
2319         ubuf = (UniChar*) psz ;
2320 #endif
2321         status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2322             (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
2323 #if SIZEOF_WCHAR_T == 4
2324         free( ubuf ) ;
2325 #endif
2326         if ( buf == NULL )
2327             free(tbuf) ;
2328
2329         size_t res = byteOutLen ;
2330         if ( buf  && res < n)
2331         {
2332             buf[res] = 0;
2333
2334             //we need to double-trip to verify it didn't insert any ? in place
2335             //of bogus characters
2336             wxWCharBuffer wcBuf(n);
2337             size_t pszlen = wxWcslen(psz);
2338             if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
2339                         wxWcslen(wcBuf) != pszlen ||
2340                         memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2341             {
2342                 // we didn't obtain the same thing we started from, hence
2343                 // the conversion was lossy and we consider that it failed
2344                 return (size_t)-1;
2345             }
2346         }
2347
2348         return res ;
2349     }
2350
2351     bool IsOk() const
2352         { return m_MB2WC_converter !=  NULL && m_WC2MB_converter != NULL  ; }
2353
2354 private:
2355     TECObjectRef m_MB2WC_converter ;
2356     TECObjectRef m_WC2MB_converter ;
2357
2358     TextEncodingBase m_char_encoding ;
2359     TextEncodingBase m_unicode_encoding ;
2360 };
2361
2362 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2363
2364 // ============================================================================
2365 // wxEncodingConverter based conversion classes
2366 // ============================================================================
2367
2368 #if wxUSE_FONTMAP
2369
2370 class wxMBConv_wxwin : public wxMBConv
2371 {
2372 private:
2373     void Init()
2374     {
2375         m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2376                w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2377     }
2378
2379 public:
2380     // temporarily just use wxEncodingConverter stuff,
2381     // so that it works while a better implementation is built
2382     wxMBConv_wxwin(const wxChar* name)
2383     {
2384         if (name)
2385             m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2386         else
2387             m_enc = wxFONTENCODING_SYSTEM;
2388
2389         Init();
2390     }
2391
2392     wxMBConv_wxwin(wxFontEncoding enc)
2393     {
2394         m_enc = enc;
2395
2396         Init();
2397     }
2398
2399     size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
2400     {
2401         size_t inbuf = strlen(psz);
2402         if (buf)
2403         {
2404             if (!m2w.Convert(psz,buf))
2405                 return (size_t)-1;
2406         }
2407         return inbuf;
2408     }
2409
2410     size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
2411     {
2412         const size_t inbuf = wxWcslen(psz);
2413         if (buf)
2414         {
2415             if (!w2m.Convert(psz,buf))
2416                 return (size_t)-1;
2417         }
2418
2419         return inbuf;
2420     }
2421
2422     bool IsOk() const { return m_ok; }
2423
2424 public:
2425     wxFontEncoding m_enc;
2426     wxEncodingConverter m2w, w2m;
2427
2428     // were we initialized successfully?
2429     bool m_ok;
2430
2431     DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
2432 };
2433
2434 #endif // wxUSE_FONTMAP
2435
2436 // ============================================================================
2437 // wxCSConv implementation
2438 // ============================================================================
2439
2440 void wxCSConv::Init()
2441 {
2442     m_name = NULL;
2443     m_convReal =  NULL;
2444     m_deferred = true;
2445 }
2446
2447 wxCSConv::wxCSConv(const wxChar *charset)
2448 {
2449     Init();
2450
2451     if ( charset )
2452     {
2453         SetName(charset);
2454     }
2455
2456     m_encoding = wxFONTENCODING_SYSTEM;
2457 }
2458
2459 wxCSConv::wxCSConv(wxFontEncoding encoding)
2460 {
2461     if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
2462     {
2463         wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2464
2465         encoding = wxFONTENCODING_SYSTEM;
2466     }
2467
2468     Init();
2469
2470     m_encoding = encoding;
2471 }
2472
2473 wxCSConv::~wxCSConv()
2474 {
2475     Clear();
2476 }
2477
2478 wxCSConv::wxCSConv(const wxCSConv& conv)
2479         : wxMBConv()
2480 {
2481     Init();
2482
2483     SetName(conv.m_name);
2484     m_encoding = conv.m_encoding;
2485 }
2486
2487 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2488 {
2489     Clear();
2490
2491     SetName(conv.m_name);
2492     m_encoding = conv.m_encoding;
2493
2494     return *this;
2495 }
2496
2497 void wxCSConv::Clear()
2498 {
2499     free(m_name);
2500     delete m_convReal;
2501
2502     m_name = NULL;
2503     m_convReal = NULL;
2504 }
2505
2506 void wxCSConv::SetName(const wxChar *charset)
2507 {
2508     if (charset)
2509     {
2510         m_name = wxStrdup(charset);
2511         m_deferred = true;
2512     }
2513 }
2514
2515 wxMBConv *wxCSConv::DoCreate() const
2516 {
2517     // check for the special case of ASCII or ISO8859-1 charset: as we have
2518     // special knowledge of it anyhow, we don't need to create a special
2519     // conversion object
2520     if ( m_encoding == wxFONTENCODING_ISO8859_1 )
2521     {
2522         // don't convert at all
2523         return NULL;
2524     }
2525
2526     // we trust OS to do conversion better than we can so try external
2527     // conversion methods first
2528     //
2529     // the full order is:
2530     //      1. OS conversion (iconv() under Unix or Win32 API)
2531     //      2. hard coded conversions for UTF
2532     //      3. wxEncodingConverter as fall back
2533
2534     // step (1)
2535 #ifdef HAVE_ICONV
2536 #if !wxUSE_FONTMAP
2537     if ( m_name )
2538 #endif // !wxUSE_FONTMAP
2539     {
2540         wxString name(m_name);
2541
2542 #if wxUSE_FONTMAP
2543         if ( name.empty() )
2544             name = wxFontMapperBase::Get()->GetEncodingName(m_encoding);
2545 #endif // wxUSE_FONTMAP
2546
2547         wxMBConv_iconv *conv = new wxMBConv_iconv(name);
2548         if ( conv->IsOk() )
2549             return conv;
2550
2551         delete conv;
2552     }
2553 #endif // HAVE_ICONV
2554
2555 #ifdef wxHAVE_WIN32_MB2WC
2556     {
2557 #if wxUSE_FONTMAP
2558         wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2559                                       : new wxMBConv_win32(m_encoding);
2560         if ( conv->IsOk() )
2561             return conv;
2562
2563         delete conv;
2564 #else
2565         return NULL;
2566 #endif
2567     }
2568 #endif // wxHAVE_WIN32_MB2WC
2569 #if defined(__WXMAC__)
2570     {
2571         // leave UTF16 and UTF32 to the built-ins of wx
2572         if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
2573             ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
2574         {
2575
2576 #if wxUSE_FONTMAP
2577             wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2578                                         : new wxMBConv_mac(m_encoding);
2579 #else
2580             wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
2581 #endif
2582             if ( conv->IsOk() )
2583                  return conv;
2584
2585             delete conv;
2586         }
2587     }
2588 #endif
2589 #if defined(__WXCOCOA__)
2590     {
2591         if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2592         {
2593
2594 #if wxUSE_FONTMAP
2595             wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2596                                           : new wxMBConv_cocoa(m_encoding);
2597 #else
2598             wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
2599 #endif
2600             if ( conv->IsOk() )
2601                  return conv;
2602
2603             delete conv;
2604         }
2605     }
2606 #endif
2607     // step (2)
2608     wxFontEncoding enc = m_encoding;
2609 #if wxUSE_FONTMAP
2610     if ( enc == wxFONTENCODING_SYSTEM && m_name )
2611     {
2612         // use "false" to suppress interactive dialogs -- we can be called from
2613         // anywhere and popping up a dialog from here is the last thing we want to
2614         // do
2615         enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
2616     }
2617 #endif // wxUSE_FONTMAP
2618
2619     switch ( enc )
2620     {
2621         case wxFONTENCODING_UTF7:
2622              return new wxMBConvUTF7;
2623
2624         case wxFONTENCODING_UTF8:
2625              return new wxMBConvUTF8;
2626
2627         case wxFONTENCODING_UTF16BE:
2628              return new wxMBConvUTF16BE;
2629
2630         case wxFONTENCODING_UTF16LE:
2631              return new wxMBConvUTF16LE;
2632
2633         case wxFONTENCODING_UTF32BE:
2634              return new wxMBConvUTF32BE;
2635
2636         case wxFONTENCODING_UTF32LE:
2637              return new wxMBConvUTF32LE;
2638
2639         default:
2640              // nothing to do but put here to suppress gcc warnings
2641              ;
2642     }
2643
2644     // step (3)
2645 #if wxUSE_FONTMAP
2646     {
2647         wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2648                                       : new wxMBConv_wxwin(m_encoding);
2649         if ( conv->IsOk() )
2650             return conv;
2651
2652         delete conv;
2653     }
2654 #endif // wxUSE_FONTMAP
2655
2656     // NB: This is a hack to prevent deadlock. What could otherwise happen
2657     //     in Unicode build: wxConvLocal creation ends up being here
2658     //     because of some failure and logs the error. But wxLog will try to
2659     //     attach timestamp, for which it will need wxConvLocal (to convert
2660     //     time to char* and then wchar_t*), but that fails, tries to log
2661     //     error, but wxLog has a (already locked) critical section that
2662     //     guards static buffer.
2663     static bool alreadyLoggingError = false;
2664     if (!alreadyLoggingError)
2665     {
2666         alreadyLoggingError = true;
2667         wxLogError(_("Cannot convert from the charset '%s'!"),
2668                    m_name ? m_name
2669                       :
2670 #if wxUSE_FONTMAP
2671                          wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
2672 #else // !wxUSE_FONTMAP
2673                          wxString::Format(_("encoding %s"), m_encoding).c_str()
2674 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2675               );
2676         alreadyLoggingError = false;
2677     }
2678
2679     return NULL;
2680 }
2681
2682 void wxCSConv::CreateConvIfNeeded() const
2683 {
2684     if ( m_deferred )
2685     {
2686         wxCSConv *self = (wxCSConv *)this; // const_cast
2687
2688 #if wxUSE_INTL
2689         // if we don't have neither the name nor the encoding, use the default
2690         // encoding for this system
2691         if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2692         {
2693             self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
2694         }
2695 #endif // wxUSE_INTL
2696
2697         self->m_convReal = DoCreate();
2698         self->m_deferred = false;
2699     }
2700 }
2701
2702 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2703 {
2704     CreateConvIfNeeded();
2705
2706     if (m_convReal)
2707         return m_convReal->MB2WC(buf, psz, n);
2708
2709     // latin-1 (direct)
2710     size_t len = strlen(psz);
2711
2712     if (buf)
2713     {
2714         for (size_t c = 0; c <= len; c++)
2715             buf[c] = (unsigned char)(psz[c]);
2716     }
2717
2718     return len;
2719 }
2720
2721 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2722 {
2723     CreateConvIfNeeded();
2724
2725     if (m_convReal)
2726         return m_convReal->WC2MB(buf, psz, n);
2727
2728     // latin-1 (direct)
2729     const size_t len = wxWcslen(psz);
2730     if (buf)
2731     {
2732         for (size_t c = 0; c <= len; c++)
2733         {
2734             if (psz[c] > 0xFF)
2735                 return (size_t)-1;
2736             buf[c] = (char)psz[c];
2737         }
2738     }
2739     else
2740     {
2741         for (size_t c = 0; c <= len; c++)
2742         {
2743             if (psz[c] > 0xFF)
2744                 return (size_t)-1;
2745         }
2746     }
2747
2748     return len;
2749 }
2750
2751 // ----------------------------------------------------------------------------
2752 // globals
2753 // ----------------------------------------------------------------------------
2754
2755 #ifdef __WINDOWS__
2756     static wxMBConv_win32 wxConvLibcObj;
2757 #elif defined(__WXMAC__) && !defined(__MACH__)
2758     static wxMBConv_mac wxConvLibcObj ;
2759 #else
2760     static wxMBConvLibc wxConvLibcObj;
2761 #endif
2762
2763 static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
2764 static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
2765 static wxMBConvUTF7 wxConvUTF7Obj;
2766 static wxMBConvUTF8 wxConvUTF8Obj;
2767
2768 WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
2769 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
2770 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
2771 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
2772 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
2773 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
2774 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
2775 #ifdef __WXOSX__
2776                                     wxConvUTF8Obj;
2777 #else
2778                                     wxConvLibcObj;
2779 #endif
2780
2781
2782 #else // !wxUSE_WCHAR_T
2783
2784 // stand-ins in absence of wchar_t
2785 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
2786                                 wxConvISO8859_1,
2787                                 wxConvLocal,
2788                                 wxConvUTF8;
2789
2790 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
2791
2792