src/common/strconv.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        strconv.cpp
   3 // Purpose:     Unicode conversion classes
   4 // Author:      Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
   5 //              Ryan Norton, Fredrik Roubert (UTF7)
   6 // Modified by:
   7 // Created:     29/01/98
   8 // RCS-ID:      $Id$
   9 // Copyright:   (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
  10 //              (c) 2000-2003 Vadim Zeitlin
  11 //              (c) 2004 Ryan Norton, Fredrik Roubert
  12 // Licence:     wxWindows licence
  13 /////////////////////////////////////////////////////////////////////////////
  14
  15 // ============================================================================
  16 // declarations
  17 // ============================================================================
  18
  19 // ----------------------------------------------------------------------------
  20 // headers
  21 // ----------------------------------------------------------------------------
  22
  23 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
  24   #pragma implementation "strconv.h"
  25 #endif
  26
  27 // For compilers that support precompilation, includes "wx.h".
  28 #include "wx/wxprec.h"
  29
  30 #ifdef __BORLANDC__
  31   #pragma hdrstop
  32 #endif
  33
  34 #ifndef WX_PRECOMP
  35     #include "wx/intl.h"
  36     #include "wx/log.h"
  37 #endif // WX_PRECOMP
  38
  39 #include "wx/strconv.h"
  40
  41 #if wxUSE_WCHAR_T
  42
  43 #ifdef __WXMSW__
  44     #include "wx/msw/private.h"
  45 #endif
  46
  47 #ifdef __WINDOWS__
  48     #include "wx/msw/missing.h"
  49 #endif
  50
  51 #ifndef __WXWINCE__
  52 #include <errno.h>
  53 #endif
  54
  55 #include <ctype.h>
  56 #include <string.h>
  57 #include <stdlib.h>
  58
  59 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
  60     #define wxHAVE_WIN32_MB2WC
  61 #endif // __WIN32__ but !__WXMICROWIN__
  62
  63 // ----------------------------------------------------------------------------
  64 // headers
  65 // ----------------------------------------------------------------------------
  66
  67 #ifdef __SALFORDC__
  68     #include <clib.h>
  69 #endif
  70
  71 #ifdef HAVE_ICONV
  72     #include <iconv.h>
  73     #include "wx/thread.h"
  74 #endif
  75
  76 #include "wx/encconv.h"
  77 #include "wx/fontmap.h"
  78 #include "wx/utils.h"
  79
  80 #ifdef __WXMAC__
  81 #include <ATSUnicode.h>
  82 #include <TextCommon.h>
  83 #include <TextEncodingConverter.h>
  84
  85 #include  "wx/mac/private.h"  // includes mac headers
  86 #endif
  87 // ----------------------------------------------------------------------------
  88 // macros
  89 // ----------------------------------------------------------------------------
  90
  91 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
  92 #define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
  93
  94 #if SIZEOF_WCHAR_T == 4
  95     #define WC_NAME         "UCS4"
  96     #define WC_BSWAP         BSWAP_UCS4
  97     #ifdef WORDS_BIGENDIAN
  98       #define WC_NAME_BEST  "UCS-4BE"
  99     #else
 100       #define WC_NAME_BEST  "UCS-4LE"
 101     #endif
 102 #elif SIZEOF_WCHAR_T == 2
 103     #define WC_NAME         "UTF16"
 104     #define WC_BSWAP         BSWAP_UTF16
 105     #define WC_UTF16
 106     #ifdef WORDS_BIGENDIAN
 107       #define WC_NAME_BEST  "UTF-16BE"
 108     #else
 109       #define WC_NAME_BEST  "UTF-16LE"
 110     #endif
 111 #else // sizeof(wchar_t) != 2 nor 4
 112     // does this ever happen?
 113     #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
 114 #endif
 115
 116 // ============================================================================
 117 // implementation
 118 // ============================================================================
 119
 120 // ----------------------------------------------------------------------------
 121 // UTF-16 en/decoding to/from UCS-4
 122 // ----------------------------------------------------------------------------
 123
 124
 125 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
 126 {
 127     if (input<=0xffff)
 128     {
 129         if (output)
 130             *output = (wxUint16) input;
 131         return 1;
 132     }
 133     else if (input>=0x110000)
 134     {
 135         return (size_t)-1;
 136     }
 137     else
 138     {
 139         if (output)
 140         {
 141             *output++ = (wxUint16) ((input >> 10)+0xd7c0);
 142             *output = (wxUint16) ((input&0x3ff)+0xdc00);
 143         }
 144         return 2;
 145     }
 146 }
 147
 148 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
 149 {
 150     if ((*input<0xd800) || (*input>0xdfff))
 151     {
 152         output = *input;
 153         return 1;
 154     }
 155     else if ((input[1]<0xdc00) || (input[1]>0xdfff))
 156     {
 157         output = *input;
 158         return (size_t)-1;
 159     }
 160     else
 161     {
 162         output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
 163         return 2;
 164     }
 165 }
 166
 167
 168 // ----------------------------------------------------------------------------
 169 // wxMBConv
 170 // ----------------------------------------------------------------------------
 171
 172 wxMBConv::~wxMBConv()
 173 {
 174     // nothing to do here (necessary for Darwin linking probably)
 175 }
 176
 177 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
 178 {
 179     if ( psz )
 180     {
 181         // calculate the length of the buffer needed first
 182         size_t nLen = MB2WC(NULL, psz, 0);
 183         if ( nLen != (size_t)-1 )
 184         {
 185             // now do the actual conversion
 186             wxWCharBuffer buf(nLen);
 187             nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
 188             if ( nLen != (size_t)-1 )
 189             {
 190                 return buf;
 191             }
 192         }
 193     }
 194
 195     wxWCharBuffer buf((wchar_t *)NULL);
 196
 197     return buf;
 198 }
 199
 200 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
 201 {
 202     if ( pwz )
 203     {
 204         size_t nLen = WC2MB(NULL, pwz, 0);
 205         if ( nLen != (size_t)-1 )
 206         {
 207             wxCharBuffer buf(nLen+3);       // space for a wxUint32 trailing zero
 208             nLen = WC2MB(buf.data(), pwz, nLen + 4);
 209             if ( nLen != (size_t)-1 )
 210             {
 211                 return buf;
 212             }
 213         }
 214     }
 215
 216     wxCharBuffer buf((char *)NULL);
 217
 218     return buf;
 219 }
 220
 221 const wxWCharBuffer wxMBConv::cMB2WC(const char *szString, size_t nStringLen, size_t* pOutSize) const
 222 {
 223     wxASSERT(pOutSize != NULL);
 224
 225     const char* szEnd = szString + nStringLen + 1;
 226     const char* szPos = szString;
 227     const char* szStart = szPos;
 228
 229     size_t nActualLength = 0;
 230     size_t nCurrentSize = nStringLen; //try normal size first (should never resize?)
 231
 232     wxWCharBuffer theBuffer(nCurrentSize);
 233
 234     //Convert the string until the length() is reached, continuing the
 235     //loop every time a null character is reached
 236     while(szPos != szEnd)
 237     {
 238         wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
 239
 240         //Get the length of the current (sub)string
 241         size_t nLen = MB2WC(NULL, szPos, 0);
 242
 243         //Invalid conversion?
 244         if( nLen == (size_t)-1 )
 245         {
 246             *pOutSize = 0;
 247             theBuffer.data()[0u] = wxT('\0');
 248             return theBuffer;
 249         }
 250
 251
 252         //Increase the actual length (+1 for current null character)
 253         nActualLength += nLen + 1;
 254
 255         //if buffer too big, realloc the buffer
 256         if (nActualLength > (nCurrentSize+1))
 257         {
 258             wxWCharBuffer theNewBuffer(nCurrentSize << 1);
 259             memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize * sizeof(wchar_t));
 260             theBuffer = theNewBuffer;
 261             nCurrentSize <<= 1;
 262         }
 263
 264         //Convert the current (sub)string
 265         if ( MB2WC(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
 266         {
 267             *pOutSize = 0;
 268             theBuffer.data()[0u] = wxT('\0');
 269             return theBuffer;
 270         }
 271
 272         //Increment to next (sub)string
 273         //Note that we have to use strlen here instead of nLen
 274         //here because XX2XX gives us the size of the output buffer,
 275         //not neccessarly the length of the string
 276         szPos += strlen(szPos) + 1;
 277     }
 278
 279     //success - return actual length and the buffer
 280     *pOutSize = nActualLength;
 281     return theBuffer;
 282 }
 283
 284 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *szString, size_t nStringLen, size_t* pOutSize) const
 285 {
 286     wxASSERT(pOutSize != NULL);
 287
 288     const wchar_t* szEnd = szString + nStringLen + 1;
 289     const wchar_t* szPos = szString;
 290     const wchar_t* szStart = szPos;
 291
 292     size_t nActualLength = 0;
 293     size_t nCurrentSize = nStringLen << 2; //try * 4 first
 294
 295     wxCharBuffer theBuffer(nCurrentSize);
 296
 297     //Convert the string until the length() is reached, continuing the
 298     //loop every time a null character is reached
 299     while(szPos != szEnd)
 300     {
 301         wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
 302
 303         //Get the length of the current (sub)string
 304         size_t nLen = WC2MB(NULL, szPos, 0);
 305
 306         //Invalid conversion?
 307         if( nLen == (size_t)-1 )
 308         {
 309             *pOutSize = 0;
 310             theBuffer.data()[0u] = wxT('\0');
 311             return theBuffer;
 312         }
 313
 314         //Increase the actual length (+1 for current null character)
 315         nActualLength += nLen + 1;
 316
 317         //if buffer too big, realloc the buffer
 318         if (nActualLength > (nCurrentSize+1))
 319         {
 320             wxCharBuffer theNewBuffer(nCurrentSize << 1);
 321             memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize);
 322             theBuffer = theNewBuffer;
 323             nCurrentSize <<= 1;
 324         }
 325
 326         //Convert the current (sub)string
 327         if(WC2MB(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
 328         {
 329             *pOutSize = 0;
 330             theBuffer.data()[0u] = wxT('\0');
 331             return theBuffer;
 332         }
 333
 334         //Increment to next (sub)string
 335         //Note that we have to use wxWcslen here instead of nLen
 336         //here because XX2XX gives us the size of the output buffer,
 337         //not neccessarly the length of the string
 338         szPos += wxWcslen(szPos) + 1;
 339     }
 340
 341     //success - return actual length and the buffer
 342     *pOutSize = nActualLength;
 343     return theBuffer;
 344 }
 345
 346 // ----------------------------------------------------------------------------
 347 // wxMBConvLibc
 348 // ----------------------------------------------------------------------------
 349
 350 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 351 {
 352     return wxMB2WC(buf, psz, n);
 353 }
 354
 355 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 356 {
 357     return wxWC2MB(buf, psz, n);
 358 }
 359
 360 #ifdef __UNIX__
 361
 362 // ----------------------------------------------------------------------------
 363 // wxConvBrokenFileNames
 364 // ----------------------------------------------------------------------------
 365
 366 wxConvBrokenFileNames::wxConvBrokenFileNames()
 367 {
 368     // decide which conversion to use for the file names
 369
 370     // (1) this variable exists for the sole purpose of specifying the encoding
 371     //     of the filenames for GTK+ programs, so use it if it is set
 372     const wxChar *encName = wxGetenv(_T("G_FILENAME_ENCODING"));
 373     if ( encName )
 374     {
 375         m_conv = new wxCSConv(encName);
 376     }
 377     else // no G_FILENAME_ENCODING
 378     {
 379         // (2) if a non default locale is set, assume that the user wants his
 380         //     filenames in this locale too
 381         switch ( wxLocale::GetSystemEncoding() )
 382         {
 383             default:
 384                 m_conv = new wxMBConvLibc;
 385                 break;
 386
 387             // (3) finally use UTF-8 by default
 388             case wxFONTENCODING_SYSTEM:
 389             case wxFONTENCODING_UTF8:
 390                 m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
 391                 break;
 392         }
 393     }
 394 }
 395
 396 size_t
 397 wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf,
 398                              const char *psz,
 399                              size_t outputSize) const
 400 {
 401     return m_conv->MB2WC( outputBuf, psz, outputSize );
 402 }
 403
 404 size_t
 405 wxConvBrokenFileNames::WC2MB(char *outputBuf,
 406                              const wchar_t *psz,
 407                              size_t outputSize) const
 408 {
 409     return m_conv->WC2MB( outputBuf, psz, outputSize );
 410 }
 411
 412 #endif
 413
 414 // ----------------------------------------------------------------------------
 415 // UTF-7
 416 // ----------------------------------------------------------------------------
 417
 418 // Implementation (C) 2004 Fredrik Roubert
 419
 420 //
 421 // BASE64 decoding table
 422 //
 423 static const unsigned char utf7unb64[] =
 424 {
 425     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 426     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 427     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 428     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 429     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 430     0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
 431     0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
 432     0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 433     0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
 434     0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
 435     0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
 436     0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
 437     0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
 438     0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
 439     0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
 440     0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
 441     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 442     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 443     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 444     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 445     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 446     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 447     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 448     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 449     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 450     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 451     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 452     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 453     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 454     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 455     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 456     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 457 };
 458
 459 size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 460 {
 461     size_t len = 0;
 462
 463     while (*psz && ((!buf) || (len < n)))
 464     {
 465         unsigned char cc = *psz++;
 466         if (cc != '+')
 467         {
 468             // plain ASCII char
 469             if (buf)
 470                 *buf++ = cc;
 471             len++;
 472         }
 473         else if (*psz == '-')
 474         {
 475             // encoded plus sign
 476             if (buf)
 477                 *buf++ = cc;
 478             len++;
 479             psz++;
 480         }
 481         else
 482         {
 483             // BASE64 encoded string
 484             bool lsb;
 485             unsigned char c;
 486             unsigned int d, l;
 487             for (lsb = false, d = 0, l = 0;
 488                 (cc = utf7unb64[(unsigned char)*psz]) != 0xff; psz++)
 489             {
 490                 d <<= 6;
 491                 d += cc;
 492                 for (l += 6; l >= 8; lsb = !lsb)
 493                 {
 494                     c = (unsigned char)((d >> (l -= 8)) % 256);
 495                     if (lsb)
 496                     {
 497                         if (buf)
 498                             *buf++ |= c;
 499                         len ++;
 500                     }
 501                     else
 502                         if (buf)
 503                             *buf = (wchar_t)(c << 8);
 504                 }
 505             }
 506             if (*psz == '-')
 507                 psz++;
 508         }
 509     }
 510     if (buf && (len < n))
 511         *buf = 0;
 512     return len;
 513 }
 514
 515 //
 516 // BASE64 encoding table
 517 //
 518 static const unsigned char utf7enb64[] =
 519 {
 520     'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
 521     'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
 522     'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
 523     'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
 524     'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
 525     'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
 526     'w', 'x', 'y', 'z', '0', '1', '2', '3',
 527     '4', '5', '6', '7', '8', '9', '+', '/'
 528 };
 529
 530 //
 531 // UTF-7 encoding table
 532 //
 533 // 0 - Set D (directly encoded characters)
 534 // 1 - Set O (optional direct characters)
 535 // 2 - whitespace characters (optional)
 536 // 3 - special characters
 537 //
 538 static const unsigned char utf7encode[128] =
 539 {
 540     3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
 541     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 542     2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
 543     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
 544     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 545     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
 546     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 547     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
 548 };
 549
 550 size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 551 {
 552
 553
 554     size_t len = 0;
 555
 556     while (*psz && ((!buf) || (len < n)))
 557     {
 558         wchar_t cc = *psz++;
 559         if (cc < 0x80 && utf7encode[cc] < 1)
 560         {
 561             // plain ASCII char
 562             if (buf)
 563                 *buf++ = (char)cc;
 564             len++;
 565         }
 566 #ifndef WC_UTF16
 567         else if (((wxUint32)cc) > 0xffff)
 568         {
 569             // no surrogate pair generation (yet?)
 570             return (size_t)-1;
 571         }
 572 #endif
 573         else
 574         {
 575             if (buf)
 576                 *buf++ = '+';
 577             len++;
 578             if (cc != '+')
 579             {
 580                 // BASE64 encode string
 581                 unsigned int lsb, d, l;
 582                 for (d = 0, l = 0;; psz++)
 583                 {
 584                     for (lsb = 0; lsb < 2; lsb ++)
 585                     {
 586                         d <<= 8;
 587                         d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
 588
 589                         for (l += 8; l >= 6; )
 590                         {
 591                             l -= 6;
 592                             if (buf)
 593                                 *buf++ = utf7enb64[(d >> l) % 64];
 594                             len++;
 595                         }
 596                     }
 597                     cc = *psz;
 598                     if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
 599                         break;
 600                 }
 601                 if (l != 0)
 602                 {
 603                     if (buf)
 604                         *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
 605                     len++;
 606                 }
 607             }
 608             if (buf)
 609                 *buf++ = '-';
 610             len++;
 611         }
 612     }
 613     if (buf && (len < n))
 614         *buf = 0;
 615     return len;
 616 }
 617
 618 // ----------------------------------------------------------------------------
 619 // UTF-8
 620 // ----------------------------------------------------------------------------
 621
 622 static wxUint32 utf8_max[]=
 623     { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
 624
 625 // boundaries of the private use area we use to (temporarily) remap invalid
 626 // characters invalid in a UTF-8 encoded string
 627 const wxUint32 wxUnicodePUA = 0x100000;
 628 const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
 629
 630 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 631 {
 632     size_t len = 0;
 633
 634     while (*psz && ((!buf) || (len < n)))
 635     {
 636         const char *opsz = psz;
 637         bool invalid = false;
 638         unsigned char cc = *psz++, fc = cc;
 639         unsigned cnt;
 640         for (cnt = 0; fc & 0x80; cnt++)
 641             fc <<= 1;
 642         if (!cnt)
 643         {
 644             // plain ASCII char
 645             if (buf)
 646                 *buf++ = cc;
 647             len++;
 648         }
 649         else
 650         {
 651             cnt--;
 652             if (!cnt)
 653             {
 654                 // invalid UTF-8 sequence
 655                 invalid = true;
 656             }
 657             else
 658             {
 659                 unsigned ocnt = cnt - 1;
 660                 wxUint32 res = cc & (0x3f >> cnt);
 661                 while (cnt--)
 662                 {
 663                     cc = *psz;
 664                     if ((cc & 0xC0) != 0x80)
 665                     {
 666                         // invalid UTF-8 sequence
 667                         invalid = true;
 668                         break;
 669                     }
 670                     psz++;
 671                     res = (res << 6) | (cc & 0x3f);
 672                 }
 673                 if (invalid || res <= utf8_max[ocnt])
 674                 {
 675                     // illegal UTF-8 encoding
 676                     invalid = true;
 677                 }
 678                 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
 679                         res >= wxUnicodePUA && res < wxUnicodePUAEnd)
 680                 {
 681                     // if one of our PUA characters turns up externally
 682                     // it must also be treated as an illegal sequence
 683                     // (a bit like you have to escape an escape character)
 684                     invalid = true;
 685                 }
 686                 else
 687                 {
 688 #ifdef WC_UTF16
 689                     // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 690                     size_t pa = encode_utf16(res, (wxUint16 *)buf);
 691                     if (pa == (size_t)-1)
 692                     {
 693                         invalid = true;
 694                     }
 695                     else
 696                     {
 697                         if (buf)
 698                             buf += pa;
 699                         len += pa;
 700                     }
 701 #else // !WC_UTF16
 702                     if (buf)
 703                         *buf++ = res;
 704                     len++;
 705 #endif // WC_UTF16/!WC_UTF16
 706                 }
 707             }
 708             if (invalid)
 709             {
 710                 if (m_options & MAP_INVALID_UTF8_TO_PUA)
 711                 {
 712                     while (opsz < psz && (!buf || len < n))
 713                     {
 714 #ifdef WC_UTF16
 715                         // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 716                         size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
 717                         wxASSERT(pa != (size_t)-1);
 718                         if (buf)
 719                             buf += pa;
 720                         opsz++;
 721                         len += pa;
 722 #else
 723                         if (buf)
 724                             *buf++ = wxUnicodePUA + (unsigned char)*opsz;
 725                         opsz++;
 726                         len++;
 727 #endif
 728                     }
 729                 }
 730                 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
 731                 {
 732                     while (opsz < psz && (!buf || len < n))
 733                     {
 734                         if ( buf && len + 3 < n )
 735                         {
 736                             unsigned char n = *opsz;
 737                             *buf++ = L'\\';
 738                             *buf++ = (wchar_t)( L'0' + n / 0100 );
 739                             *buf++ = (wchar_t)( L'0' + (n % 0100) / 010 );
 740                             *buf++ = (wchar_t)( L'0' + n % 010 );
 741                         }
 742                         opsz++;
 743                         len += 4;
 744                     }
 745                 }
 746                 else // MAP_INVALID_UTF8_NOT
 747                 {
 748                     return (size_t)-1;
 749                 }
 750             }
 751         }
 752     }
 753     if (buf && (len < n))
 754         *buf = 0;
 755     return len;
 756 }
 757
 758 static inline bool isoctal(wchar_t wch)
 759 {
 760     return L'0' <= wch && wch <= L'7';
 761 }
 762
 763 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 764 {
 765     size_t len = 0;
 766
 767     while (*psz && ((!buf) || (len < n)))
 768     {
 769         wxUint32 cc;
 770 #ifdef WC_UTF16
 771         // cast is ok for WC_UTF16
 772         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
 773         psz += (pa == (size_t)-1) ? 1 : pa;
 774 #else
 775         cc=(*psz++) & 0x7fffffff;
 776 #endif
 777
 778         if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
 779                 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
 780         {
 781             if (buf)
 782                 *buf++ = (char)(cc - wxUnicodePUA);
 783             len++;
 784         }
 785         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
 786                     cc == L'\\' &&
 787                         isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
 788         {
 789             if (buf)
 790             {
 791                 *buf++ = (char) ((psz[0] - L'0')*0100 +
 792                                  (psz[1] - L'0')*010 +
 793                                  (psz[2] - L'0'));
 794             }
 795
 796             psz += 3;
 797             len++;
 798         }
 799         else
 800         {
 801             unsigned cnt;
 802             for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
 803             if (!cnt)
 804             {
 805                 // plain ASCII char
 806                 if (buf)
 807                     *buf++ = (char) cc;
 808                 len++;
 809             }
 810
 811             else
 812             {
 813                 len += cnt + 1;
 814                 if (buf)
 815                 {
 816                     *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
 817                     while (cnt--)
 818                         *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
 819                 }
 820             }
 821         }
 822     }
 823
 824     if (buf && (len<n))
 825         *buf = 0;
 826
 827     return len;
 828 }
 829
 830 // ----------------------------------------------------------------------------
 831 // UTF-16
 832 // ----------------------------------------------------------------------------
 833
 834 #ifdef WORDS_BIGENDIAN
 835     #define wxMBConvUTF16straight wxMBConvUTF16BE
 836     #define wxMBConvUTF16swap     wxMBConvUTF16LE
 837 #else
 838     #define wxMBConvUTF16swap     wxMBConvUTF16BE
 839     #define wxMBConvUTF16straight wxMBConvUTF16LE
 840 #endif
 841
 842
 843 #ifdef WC_UTF16
 844
 845 // copy 16bit MB to 16bit String
 846 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 847 {
 848     size_t len=0;
 849
 850     while (*(wxUint16*)psz && (!buf || len < n))
 851     {
 852         if (buf)
 853             *buf++ = *(wxUint16*)psz;
 854         len++;
 855
 856         psz += sizeof(wxUint16);
 857     }
 858     if (buf && len<n)   *buf=0;
 859
 860     return len;
 861 }
 862
 863
 864 // copy 16bit String to 16bit MB
 865 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 866 {
 867     size_t len=0;
 868
 869     while (*psz && (!buf || len < n))
 870     {
 871         if (buf)
 872         {
 873             *(wxUint16*)buf = *psz;
 874             buf += sizeof(wxUint16);
 875         }
 876         len += sizeof(wxUint16);
 877         psz++;
 878     }
 879     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 880
 881     return len;
 882 }
 883
 884
 885 // swap 16bit MB to 16bit String
 886 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 887 {
 888     size_t len=0;
 889
 890     while (*(wxUint16*)psz && (!buf || len < n))
 891     {
 892         if (buf)
 893         {
 894             ((char *)buf)[0] = psz[1];
 895             ((char *)buf)[1] = psz[0];
 896             buf++;
 897         }
 898         len++;
 899         psz += sizeof(wxUint16);
 900     }
 901     if (buf && len<n)   *buf=0;
 902
 903     return len;
 904 }
 905
 906
 907 // swap 16bit MB to 16bit String
 908 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 909 {
 910     size_t len=0;
 911
 912     while (*psz && (!buf || len < n))
 913     {
 914         if (buf)
 915         {
 916             *buf++ = ((char*)psz)[1];
 917             *buf++ = ((char*)psz)[0];
 918         }
 919         len += sizeof(wxUint16);
 920         psz++;
 921     }
 922     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 923
 924     return len;
 925 }
 926
 927
 928 #else // WC_UTF16
 929
 930
 931 // copy 16bit MB to 32bit String
 932 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 933 {
 934     size_t len=0;
 935
 936     while (*(wxUint16*)psz && (!buf || len < n))
 937     {
 938         wxUint32 cc;
 939         size_t pa=decode_utf16((wxUint16*)psz, cc);
 940         if (pa == (size_t)-1)
 941             return pa;
 942
 943         if (buf)
 944             *buf++ = cc;
 945         len++;
 946         psz += pa * sizeof(wxUint16);
 947     }
 948     if (buf && len<n)   *buf=0;
 949
 950     return len;
 951 }
 952
 953
 954 // copy 32bit String to 16bit MB
 955 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 956 {
 957     size_t len=0;
 958
 959     while (*psz && (!buf || len < n))
 960     {
 961         wxUint16 cc[2];
 962         size_t pa=encode_utf16(*psz, cc);
 963
 964         if (pa == (size_t)-1)
 965             return pa;
 966
 967         if (buf)
 968         {
 969             *(wxUint16*)buf = cc[0];
 970             buf += sizeof(wxUint16);
 971             if (pa > 1)
 972             {
 973                 *(wxUint16*)buf = cc[1];
 974                 buf += sizeof(wxUint16);
 975             }
 976         }
 977
 978         len += pa*sizeof(wxUint16);
 979         psz++;
 980     }
 981     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 982
 983     return len;
 984 }
 985
 986
 987 // swap 16bit MB to 32bit String
 988 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 989 {
 990     size_t len=0;
 991
 992     while (*(wxUint16*)psz && (!buf || len < n))
 993     {
 994         wxUint32 cc;
 995         char tmp[4];
 996         tmp[0]=psz[1];  tmp[1]=psz[0];
 997         tmp[2]=psz[3];  tmp[3]=psz[2];
 998
 999         size_t pa=decode_utf16((wxUint16*)tmp, cc);
1000         if (pa == (size_t)-1)
1001             return pa;
1002
1003         if (buf)
1004             *buf++ = cc;
1005
1006         len++;
1007         psz += pa * sizeof(wxUint16);
1008     }
1009     if (buf && len<n)   *buf=0;
1010
1011     return len;
1012 }
1013
1014
1015 // swap 32bit String to 16bit MB
1016 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1017 {
1018     size_t len=0;
1019
1020     while (*psz && (!buf || len < n))
1021     {
1022         wxUint16 cc[2];
1023         size_t pa=encode_utf16(*psz, cc);
1024
1025         if (pa == (size_t)-1)
1026             return pa;
1027
1028         if (buf)
1029         {
1030             *buf++ = ((char*)cc)[1];
1031             *buf++ = ((char*)cc)[0];
1032             if (pa > 1)
1033             {
1034                 *buf++ = ((char*)cc)[3];
1035                 *buf++ = ((char*)cc)[2];
1036             }
1037         }
1038
1039         len += pa*sizeof(wxUint16);
1040         psz++;
1041     }
1042     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
1043
1044     return len;
1045 }
1046
1047 #endif // WC_UTF16
1048
1049
1050 // ----------------------------------------------------------------------------
1051 // UTF-32
1052 // ----------------------------------------------------------------------------
1053
1054 #ifdef WORDS_BIGENDIAN
1055 #define wxMBConvUTF32straight  wxMBConvUTF32BE
1056 #define wxMBConvUTF32swap      wxMBConvUTF32LE
1057 #else
1058 #define wxMBConvUTF32swap      wxMBConvUTF32BE
1059 #define wxMBConvUTF32straight  wxMBConvUTF32LE
1060 #endif
1061
1062
1063 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1064 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1065
1066
1067 #ifdef WC_UTF16
1068
1069 // copy 32bit MB to 16bit String
1070 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1071 {
1072     size_t len=0;
1073
1074     while (*(wxUint32*)psz && (!buf || len < n))
1075     {
1076         wxUint16 cc[2];
1077
1078         size_t pa=encode_utf16(*(wxUint32*)psz, cc);
1079         if (pa == (size_t)-1)
1080             return pa;
1081
1082         if (buf)
1083         {
1084             *buf++ = cc[0];
1085             if (pa > 1)
1086                 *buf++ = cc[1];
1087         }
1088         len += pa;
1089         psz += sizeof(wxUint32);
1090     }
1091     if (buf && len<n)   *buf=0;
1092
1093     return len;
1094 }
1095
1096
1097 // copy 16bit String to 32bit MB
1098 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1099 {
1100     size_t len=0;
1101
1102     while (*psz && (!buf || len < n))
1103     {
1104         wxUint32 cc;
1105
1106         // cast is ok for WC_UTF16
1107         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
1108         if (pa == (size_t)-1)
1109             return pa;
1110
1111         if (buf)
1112         {
1113             *(wxUint32*)buf = cc;
1114             buf += sizeof(wxUint32);
1115         }
1116         len += sizeof(wxUint32);
1117         psz += pa;
1118     }
1119
1120     if (buf && len<=n-sizeof(wxUint32))
1121         *(wxUint32*)buf=0;
1122
1123     return len;
1124 }
1125
1126
1127
1128 // swap 32bit MB to 16bit String
1129 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1130 {
1131     size_t len=0;
1132
1133     while (*(wxUint32*)psz && (!buf || len < n))
1134     {
1135         char tmp[4];
1136         tmp[0] = psz[3];   tmp[1] = psz[2];
1137         tmp[2] = psz[1];   tmp[3] = psz[0];
1138
1139
1140         wxUint16 cc[2];
1141
1142         size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
1143         if (pa == (size_t)-1)
1144             return pa;
1145
1146         if (buf)
1147         {
1148             *buf++ = cc[0];
1149             if (pa > 1)
1150                 *buf++ = cc[1];
1151         }
1152         len += pa;
1153         psz += sizeof(wxUint32);
1154     }
1155
1156     if (buf && len<n)
1157         *buf=0;
1158
1159     return len;
1160 }
1161
1162
1163 // swap 16bit String to 32bit MB
1164 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1165 {
1166     size_t len=0;
1167
1168     while (*psz && (!buf || len < n))
1169     {
1170         char cc[4];
1171
1172         // cast is ok for WC_UTF16
1173         size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
1174         if (pa == (size_t)-1)
1175             return pa;
1176
1177         if (buf)
1178         {
1179             *buf++ = cc[3];
1180             *buf++ = cc[2];
1181             *buf++ = cc[1];
1182             *buf++ = cc[0];
1183         }
1184         len += sizeof(wxUint32);
1185         psz += pa;
1186     }
1187
1188     if (buf && len<=n-sizeof(wxUint32))
1189         *(wxUint32*)buf=0;
1190
1191     return len;
1192 }
1193
1194 #else // WC_UTF16
1195
1196
1197 // copy 32bit MB to 32bit String
1198 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1199 {
1200     size_t len=0;
1201
1202     while (*(wxUint32*)psz && (!buf || len < n))
1203     {
1204         if (buf)
1205             *buf++ = *(wxUint32*)psz;
1206         len++;
1207         psz += sizeof(wxUint32);
1208     }
1209
1210     if (buf && len<n)
1211         *buf=0;
1212
1213     return len;
1214 }
1215
1216
1217 // copy 32bit String to 32bit MB
1218 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1219 {
1220     size_t len=0;
1221
1222     while (*psz && (!buf || len < n))
1223     {
1224         if (buf)
1225         {
1226             *(wxUint32*)buf = *psz;
1227             buf += sizeof(wxUint32);
1228         }
1229
1230         len += sizeof(wxUint32);
1231         psz++;
1232     }
1233
1234     if (buf && len<=n-sizeof(wxUint32))
1235         *(wxUint32*)buf=0;
1236
1237     return len;
1238 }
1239
1240
1241 // swap 32bit MB to 32bit String
1242 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1243 {
1244     size_t len=0;
1245
1246     while (*(wxUint32*)psz && (!buf || len < n))
1247     {
1248         if (buf)
1249         {
1250             ((char *)buf)[0] = psz[3];
1251             ((char *)buf)[1] = psz[2];
1252             ((char *)buf)[2] = psz[1];
1253             ((char *)buf)[3] = psz[0];
1254             buf++;
1255         }
1256         len++;
1257         psz += sizeof(wxUint32);
1258     }
1259
1260     if (buf && len<n)
1261         *buf=0;
1262
1263     return len;
1264 }
1265
1266
1267 // swap 32bit String to 32bit MB
1268 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1269 {
1270     size_t len=0;
1271
1272     while (*psz && (!buf || len < n))
1273     {
1274         if (buf)
1275         {
1276             *buf++ = ((char *)psz)[3];
1277             *buf++ = ((char *)psz)[2];
1278             *buf++ = ((char *)psz)[1];
1279             *buf++ = ((char *)psz)[0];
1280         }
1281         len += sizeof(wxUint32);
1282         psz++;
1283     }
1284
1285     if (buf && len<=n-sizeof(wxUint32))
1286         *(wxUint32*)buf=0;
1287
1288     return len;
1289 }
1290
1291
1292 #endif // WC_UTF16
1293
1294
1295 // ============================================================================
1296 // The classes doing conversion using the iconv_xxx() functions
1297 // ============================================================================
1298
1299 #ifdef HAVE_ICONV
1300
1301 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1302 //     E2BIG if output buffer is _exactly_ as big as needed. Such case is
1303 //     (unless there's yet another bug in glibc) the only case when iconv()
1304 //     returns with (size_t)-1 (which means error) and says there are 0 bytes
1305 //     left in the input buffer -- when _real_ error occurs,
1306 //     bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1307 //     iconv() failure.
1308 //     [This bug does not appear in glibc 2.2.]
1309 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1310 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1311                                      (errno != E2BIG || bufLeft != 0))
1312 #else
1313 #define ICONV_FAILED(cres, bufLeft)  (cres == (size_t)-1)
1314 #endif
1315
1316 #define ICONV_CHAR_CAST(x)  ((ICONV_CONST char **)(x))
1317
1318 // ----------------------------------------------------------------------------
1319 // wxMBConv_iconv: encapsulates an iconv character set
1320 // ----------------------------------------------------------------------------
1321
1322 class wxMBConv_iconv : public wxMBConv
1323 {
1324 public:
1325     wxMBConv_iconv(const wxChar *name);
1326     virtual ~wxMBConv_iconv();
1327
1328     virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1329     virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
1330
1331     bool IsOk() const
1332         { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
1333
1334 protected:
1335     // the iconv handlers used to translate from multibyte to wide char and in
1336     // the other direction
1337     iconv_t m2w,
1338             w2m;
1339 #if wxUSE_THREADS
1340     // guards access to m2w and w2m objects
1341     wxMutex m_iconvMutex;
1342 #endif
1343
1344 private:
1345     // the name (for iconv_open()) of a wide char charset -- if none is
1346     // available on this machine, it will remain NULL
1347     static const char *ms_wcCharsetName;
1348
1349     // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1350     // different endian-ness than the native one
1351     static bool ms_wcNeedsSwap;
1352 };
1353
1354 const char *wxMBConv_iconv::ms_wcCharsetName = NULL;
1355 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
1356
1357 wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
1358 {
1359     // Do it the hard way
1360     char cname[100];
1361     for (size_t i = 0; i < wxStrlen(name)+1; i++)
1362         cname[i] = (char) name[i];
1363
1364     // check for charset that represents wchar_t:
1365     if (ms_wcCharsetName == NULL)
1366     {
1367         ms_wcNeedsSwap = false;
1368
1369         // try charset with explicit bytesex info (e.g. "UCS-4LE"):
1370         ms_wcCharsetName = WC_NAME_BEST;
1371         m2w = iconv_open(ms_wcCharsetName, cname);
1372
1373         if (m2w == (iconv_t)-1)
1374         {
1375             // try charset w/o bytesex info (e.g. "UCS4")
1376             // and check for bytesex ourselves:
1377             ms_wcCharsetName = WC_NAME;
1378             m2w = iconv_open(ms_wcCharsetName, cname);
1379
1380             // last bet, try if it knows WCHAR_T pseudo-charset
1381             if (m2w == (iconv_t)-1)
1382             {
1383                 ms_wcCharsetName = "WCHAR_T";
1384                 m2w = iconv_open(ms_wcCharsetName, cname);
1385             }
1386
1387             if (m2w != (iconv_t)-1)
1388             {
1389                 char    buf[2], *bufPtr;
1390                 wchar_t wbuf[2], *wbufPtr;
1391                 size_t  insz, outsz;
1392                 size_t  res;
1393
1394                 buf[0] = 'A';
1395                 buf[1] = 0;
1396                 wbuf[0] = 0;
1397                 insz = 2;
1398                 outsz = SIZEOF_WCHAR_T * 2;
1399                 wbufPtr = wbuf;
1400                 bufPtr = buf;
1401
1402                 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1403                             (char**)&wbufPtr, &outsz);
1404
1405                 if (ICONV_FAILED(res, insz))
1406                 {
1407                     ms_wcCharsetName = NULL;
1408                     wxLogLastError(wxT("iconv"));
1409                     wxLogError(_("Conversion to charset '%s' doesn't work."), name);
1410                 }
1411                 else
1412                 {
1413                     ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1414                 }
1415             }
1416             else
1417             {
1418                 ms_wcCharsetName = NULL;
1419
1420                 // VS: we must not output an error here, since wxWidgets will safely
1421                 //     fall back to using wxEncodingConverter.
1422                 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
1423                 //wxLogError(
1424             }
1425         }
1426         wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
1427     }
1428     else // we already have ms_wcCharsetName
1429     {
1430         m2w = iconv_open(ms_wcCharsetName, cname);
1431     }
1432
1433     // NB: don't ever pass NULL to iconv_open(), it may crash!
1434     if ( ms_wcCharsetName )
1435     {
1436         w2m = iconv_open( cname, ms_wcCharsetName);
1437     }
1438     else
1439     {
1440         w2m = (iconv_t)-1;
1441     }
1442 }
1443
1444 wxMBConv_iconv::~wxMBConv_iconv()
1445 {
1446     if ( m2w != (iconv_t)-1 )
1447         iconv_close(m2w);
1448     if ( w2m != (iconv_t)-1 )
1449         iconv_close(w2m);
1450 }
1451
1452 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1453 {
1454 #if wxUSE_THREADS
1455     // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1456     //     Unfortunately there is a couple of global wxCSConv objects such as
1457     //     wxConvLocal that are used all over wx code, so we have to make sure
1458     //     the handle is used by at most one thread at the time. Otherwise
1459     //     only a few wx classes would be safe to use from non-main threads
1460     //     as MB<->WC conversion would fail "randomly".
1461     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1462 #endif
1463
1464     size_t inbuf = strlen(psz);
1465     size_t outbuf = n * SIZEOF_WCHAR_T;
1466     size_t res, cres;
1467     // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1468     wchar_t *bufPtr = buf;
1469     const char *pszPtr = psz;
1470
1471     if (buf)
1472     {
1473         // have destination buffer, convert there
1474         cres = iconv(m2w,
1475                      ICONV_CHAR_CAST(&pszPtr), &inbuf,
1476                      (char**)&bufPtr, &outbuf);
1477         res = n - (outbuf / SIZEOF_WCHAR_T);
1478
1479         if (ms_wcNeedsSwap)
1480         {
1481             // convert to native endianness
1482             WC_BSWAP(buf /* _not_ bufPtr */, res)
1483         }
1484
1485         // NB: iconv was given only strlen(psz) characters on input, and so
1486         //     it couldn't convert the trailing zero. Let's do it ourselves
1487         //     if there's some room left for it in the output buffer.
1488         if (res < n)
1489             buf[res] = 0;
1490     }
1491     else
1492     {
1493         // no destination buffer... convert using temp buffer
1494         // to calculate destination buffer requirement
1495         wchar_t tbuf[8];
1496         res = 0;
1497         do {
1498             bufPtr = tbuf;
1499             outbuf = 8*SIZEOF_WCHAR_T;
1500
1501             cres = iconv(m2w,
1502                          ICONV_CHAR_CAST(&pszPtr), &inbuf,
1503                          (char**)&bufPtr, &outbuf );
1504
1505             res += 8-(outbuf/SIZEOF_WCHAR_T);
1506         } while ((cres==(size_t)-1) && (errno==E2BIG));
1507     }
1508
1509     if (ICONV_FAILED(cres, inbuf))
1510     {
1511         //VS: it is ok if iconv fails, hence trace only
1512         wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1513         return (size_t)-1;
1514     }
1515
1516     return res;
1517 }
1518
1519 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1520 {
1521 #if wxUSE_THREADS
1522     // NB: explained in MB2WC
1523     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1524 #endif
1525
1526     size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
1527     size_t outbuf = n;
1528     size_t res, cres;
1529
1530     wchar_t *tmpbuf = 0;
1531
1532     if (ms_wcNeedsSwap)
1533     {
1534         // need to copy to temp buffer to switch endianness
1535         // this absolutely doesn't rock!
1536         // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1537         //  could be in read-only memory, or be accessed in some other thread)
1538         tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
1539         memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
1540         WC_BSWAP(tmpbuf, inbuf)
1541         psz=tmpbuf;
1542     }
1543
1544     if (buf)
1545     {
1546         // have destination buffer, convert there
1547         cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1548
1549         res = n-outbuf;
1550
1551         // NB: iconv was given only wcslen(psz) characters on input, and so
1552         //     it couldn't convert the trailing zero. Let's do it ourselves
1553         //     if there's some room left for it in the output buffer.
1554         if (res < n)
1555             buf[0] = 0;
1556     }
1557     else
1558     {
1559         // no destination buffer... convert using temp buffer
1560         // to calculate destination buffer requirement
1561         char tbuf[16];
1562         res = 0;
1563         do {
1564             buf = tbuf; outbuf = 16;
1565
1566             cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1567
1568             res += 16 - outbuf;
1569         } while ((cres==(size_t)-1) && (errno==E2BIG));
1570     }
1571
1572     if (ms_wcNeedsSwap)
1573     {
1574         free(tmpbuf);
1575     }
1576
1577     if (ICONV_FAILED(cres, inbuf))
1578     {
1579         //VS: it is ok if iconv fails, hence trace only
1580         wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1581         return (size_t)-1;
1582     }
1583
1584     return res;
1585 }
1586
1587 #endif // HAVE_ICONV
1588
1589
1590 // ============================================================================
1591 // Win32 conversion classes
1592 // ============================================================================
1593
1594 #ifdef wxHAVE_WIN32_MB2WC
1595
1596 // from utils.cpp
1597 #if wxUSE_FONTMAP
1598 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1599 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1600 #endif
1601
1602 class wxMBConv_win32 : public wxMBConv
1603 {
1604 public:
1605     wxMBConv_win32()
1606     {
1607         m_CodePage = CP_ACP;
1608     }
1609
1610 #if wxUSE_FONTMAP
1611     wxMBConv_win32(const wxChar* name)
1612     {
1613         m_CodePage = wxCharsetToCodepage(name);
1614     }
1615
1616     wxMBConv_win32(wxFontEncoding encoding)
1617     {
1618         m_CodePage = wxEncodingToCodepage(encoding);
1619     }
1620 #endif
1621
1622     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1623     {
1624         // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1625         // the behaviour is not compatible with the Unix version (using iconv)
1626         // and break the library itself, e.g. wxTextInputStream::NextChar()
1627         // wouldn't work if reading an incomplete MB char didn't result in an
1628         // error
1629         //
1630         // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1631         // an error (tested under Windows Server 2003) and apparently it is
1632         // done on purpose, i.e. the function accepts any input in this case
1633         // and although I'd prefer to return error on ill-formed output, our
1634         // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1635         // explicitly ill-formed according to RFC 2152) neither so we don't
1636         // even have any fallback here...
1637         int flags = m_CodePage == CP_UTF7 ? 0 : MB_ERR_INVALID_CHARS;
1638
1639         const size_t len = ::MultiByteToWideChar
1640                              (
1641                                 m_CodePage,     // code page
1642                                 flags,          // flags: fall on error
1643                                 psz,            // input string
1644                                 -1,             // its length (NUL-terminated)
1645                                 buf,            // output string
1646                                 buf ? n : 0     // size of output buffer
1647                              );
1648
1649         // note that it returns count of written chars for buf != NULL and size
1650         // of the needed buffer for buf == NULL so in either case the length of
1651         // the string (which never includes the terminating NUL) is one less
1652         return len ? len - 1 : (size_t)-1;
1653     }
1654
1655     size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
1656     {
1657         /*
1658             we have a problem here: by default, WideCharToMultiByte() may
1659             replace characters unrepresentable in the target code page with bad
1660             quality approximations such as turning "1/2" symbol (U+00BD) into
1661             "1" for the code pages which don't have it and we, obviously, want
1662             to avoid this at any price
1663
1664             the trouble is that this function does it _silently_, i.e. it won't
1665             even tell us whether it did or not... Win98/2000 and higher provide
1666             WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1667             we have to resort to a round trip, i.e. check that converting back
1668             results in the same string -- this is, of course, expensive but
1669             otherwise we simply can't be sure to not garble the data.
1670          */
1671
1672         // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1673         // it doesn't work with CJK encodings (which we test for rather roughly
1674         // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1675         // supporting it
1676         BOOL usedDef wxDUMMY_INITIALIZE(false);
1677         BOOL *pUsedDef;
1678         int flags;
1679         if ( CanUseNoBestFit() && m_CodePage < 50000 )
1680         {
1681             // it's our lucky day
1682             flags = WC_NO_BEST_FIT_CHARS;
1683             pUsedDef = &usedDef;
1684         }
1685         else // old system or unsupported encoding
1686         {
1687             flags = 0;
1688             pUsedDef = NULL;
1689         }
1690
1691         const size_t len = ::WideCharToMultiByte
1692                              (
1693                                 m_CodePage,     // code page
1694                                 flags,          // either none or no best fit
1695                                 pwz,            // input string
1696                                 -1,             // it is (wide) NUL-terminated
1697                                 buf,            // output buffer
1698                                 buf ? n : 0,    // and its size
1699                                 NULL,           // default "replacement" char
1700                                 pUsedDef        // [out] was it used?
1701                              );
1702
1703         if ( !len )
1704         {
1705             // function totally failed
1706             return (size_t)-1;
1707         }
1708
1709         // if we were really converting, check if we succeeded
1710         if ( buf )
1711         {
1712             if ( flags )
1713             {
1714                 // check if the conversion failed, i.e. if any replacements
1715                 // were done
1716                 if ( usedDef )
1717                     return (size_t)-1;
1718             }
1719             else // we must resort to double tripping...
1720             {
1721                 wxWCharBuffer wcBuf(n);
1722                 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1723                         wcscmp(wcBuf, pwz) != 0 )
1724                 {
1725                     // we didn't obtain the same thing we started from, hence
1726                     // the conversion was lossy and we consider that it failed
1727                     return (size_t)-1;
1728                 }
1729             }
1730         }
1731
1732         // see the comment above for the reason of "len - 1"
1733         return len - 1;
1734     }
1735
1736     bool IsOk() const { return m_CodePage != -1; }
1737
1738 private:
1739     static bool CanUseNoBestFit()
1740     {
1741         static int s_isWin98Or2k = -1;
1742
1743         if ( s_isWin98Or2k == -1 )
1744         {
1745             int verMaj, verMin;
1746             switch ( wxGetOsVersion(&verMaj, &verMin) )
1747             {
1748                 case wxWIN95:
1749                     s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1750                     break;
1751
1752                 case wxWINDOWS_NT:
1753                     s_isWin98Or2k = verMaj >= 5;
1754                     break;
1755
1756                 default:
1757                     // unknown, be conseravtive by default
1758                     s_isWin98Or2k = 0;
1759             }
1760
1761             wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1762         }
1763
1764         return s_isWin98Or2k == 1;
1765     }
1766
1767     long m_CodePage;
1768 };
1769
1770 #endif // wxHAVE_WIN32_MB2WC
1771
1772 // ============================================================================
1773 // Cocoa conversion classes
1774 // ============================================================================
1775
1776 #if defined(__WXCOCOA__)
1777
1778 // RN:  There is no UTF-32 support in either Core Foundation or
1779 // Cocoa.  Strangely enough, internally Core Foundation uses
1780 // UTF 32 internally quite a bit - its just not public (yet).
1781
1782 #include <CoreFoundation/CFString.h>
1783 #include <CoreFoundation/CFStringEncodingExt.h>
1784
1785 CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
1786 {
1787     CFStringEncoding enc = kCFStringEncodingInvalidId ;
1788     if ( encoding == wxFONTENCODING_DEFAULT )
1789     {
1790         enc = CFStringGetSystemEncoding();
1791     }
1792     else switch( encoding)
1793     {
1794         case wxFONTENCODING_ISO8859_1 :
1795             enc = kCFStringEncodingISOLatin1 ;
1796             break ;
1797         case wxFONTENCODING_ISO8859_2 :
1798             enc = kCFStringEncodingISOLatin2;
1799             break ;
1800         case wxFONTENCODING_ISO8859_3 :
1801             enc = kCFStringEncodingISOLatin3 ;
1802             break ;
1803         case wxFONTENCODING_ISO8859_4 :
1804             enc = kCFStringEncodingISOLatin4;
1805             break ;
1806         case wxFONTENCODING_ISO8859_5 :
1807             enc = kCFStringEncodingISOLatinCyrillic;
1808             break ;
1809         case wxFONTENCODING_ISO8859_6 :
1810             enc = kCFStringEncodingISOLatinArabic;
1811             break ;
1812         case wxFONTENCODING_ISO8859_7 :
1813             enc = kCFStringEncodingISOLatinGreek;
1814             break ;
1815         case wxFONTENCODING_ISO8859_8 :
1816             enc = kCFStringEncodingISOLatinHebrew;
1817             break ;
1818         case wxFONTENCODING_ISO8859_9 :
1819             enc = kCFStringEncodingISOLatin5;
1820             break ;
1821         case wxFONTENCODING_ISO8859_10 :
1822             enc = kCFStringEncodingISOLatin6;
1823             break ;
1824         case wxFONTENCODING_ISO8859_11 :
1825             enc = kCFStringEncodingISOLatinThai;
1826             break ;
1827         case wxFONTENCODING_ISO8859_13 :
1828             enc = kCFStringEncodingISOLatin7;
1829             break ;
1830         case wxFONTENCODING_ISO8859_14 :
1831             enc = kCFStringEncodingISOLatin8;
1832             break ;
1833         case wxFONTENCODING_ISO8859_15 :
1834             enc = kCFStringEncodingISOLatin9;
1835             break ;
1836
1837         case wxFONTENCODING_KOI8 :
1838             enc = kCFStringEncodingKOI8_R;
1839             break ;
1840         case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
1841             enc = kCFStringEncodingDOSRussian;
1842             break ;
1843
1844 //      case wxFONTENCODING_BULGARIAN :
1845 //          enc = ;
1846 //          break ;
1847
1848         case wxFONTENCODING_CP437 :
1849             enc =kCFStringEncodingDOSLatinUS ;
1850             break ;
1851         case wxFONTENCODING_CP850 :
1852             enc = kCFStringEncodingDOSLatin1;
1853             break ;
1854         case wxFONTENCODING_CP852 :
1855             enc = kCFStringEncodingDOSLatin2;
1856             break ;
1857         case wxFONTENCODING_CP855 :
1858             enc = kCFStringEncodingDOSCyrillic;
1859             break ;
1860         case wxFONTENCODING_CP866 :
1861             enc =kCFStringEncodingDOSRussian ;
1862             break ;
1863         case wxFONTENCODING_CP874 :
1864             enc = kCFStringEncodingDOSThai;
1865             break ;
1866         case wxFONTENCODING_CP932 :
1867             enc = kCFStringEncodingDOSJapanese;
1868             break ;
1869         case wxFONTENCODING_CP936 :
1870             enc =kCFStringEncodingDOSChineseSimplif ;
1871             break ;
1872         case wxFONTENCODING_CP949 :
1873             enc = kCFStringEncodingDOSKorean;
1874             break ;
1875         case wxFONTENCODING_CP950 :
1876             enc = kCFStringEncodingDOSChineseTrad;
1877             break ;
1878         case wxFONTENCODING_CP1250 :
1879             enc = kCFStringEncodingWindowsLatin2;
1880             break ;
1881         case wxFONTENCODING_CP1251 :
1882             enc =kCFStringEncodingWindowsCyrillic ;
1883             break ;
1884         case wxFONTENCODING_CP1252 :
1885             enc =kCFStringEncodingWindowsLatin1 ;
1886             break ;
1887         case wxFONTENCODING_CP1253 :
1888             enc = kCFStringEncodingWindowsGreek;
1889             break ;
1890         case wxFONTENCODING_CP1254 :
1891             enc = kCFStringEncodingWindowsLatin5;
1892             break ;
1893         case wxFONTENCODING_CP1255 :
1894             enc =kCFStringEncodingWindowsHebrew ;
1895             break ;
1896         case wxFONTENCODING_CP1256 :
1897             enc =kCFStringEncodingWindowsArabic ;
1898             break ;
1899         case wxFONTENCODING_CP1257 :
1900             enc = kCFStringEncodingWindowsBalticRim;
1901             break ;
1902 //   This only really encodes to UTF7 (if that) evidently
1903 //        case wxFONTENCODING_UTF7 :
1904 //            enc = kCFStringEncodingNonLossyASCII ;
1905 //            break ;
1906         case wxFONTENCODING_UTF8 :
1907             enc = kCFStringEncodingUTF8 ;
1908             break ;
1909         case wxFONTENCODING_EUC_JP :
1910             enc = kCFStringEncodingEUC_JP;
1911             break ;
1912         case wxFONTENCODING_UTF16 :
1913             enc = kCFStringEncodingUnicode ;
1914             break ;
1915         case wxFONTENCODING_MACROMAN :
1916             enc = kCFStringEncodingMacRoman ;
1917             break ;
1918         case wxFONTENCODING_MACJAPANESE :
1919             enc = kCFStringEncodingMacJapanese ;
1920             break ;
1921         case wxFONTENCODING_MACCHINESETRAD :
1922             enc = kCFStringEncodingMacChineseTrad ;
1923             break ;
1924         case wxFONTENCODING_MACKOREAN :
1925             enc = kCFStringEncodingMacKorean ;
1926             break ;
1927         case wxFONTENCODING_MACARABIC :
1928             enc = kCFStringEncodingMacArabic ;
1929             break ;
1930         case wxFONTENCODING_MACHEBREW :
1931             enc = kCFStringEncodingMacHebrew ;
1932             break ;
1933         case wxFONTENCODING_MACGREEK :
1934             enc = kCFStringEncodingMacGreek ;
1935             break ;
1936         case wxFONTENCODING_MACCYRILLIC :
1937             enc = kCFStringEncodingMacCyrillic ;
1938             break ;
1939         case wxFONTENCODING_MACDEVANAGARI :
1940             enc = kCFStringEncodingMacDevanagari ;
1941             break ;
1942         case wxFONTENCODING_MACGURMUKHI :
1943             enc = kCFStringEncodingMacGurmukhi ;
1944             break ;
1945         case wxFONTENCODING_MACGUJARATI :
1946             enc = kCFStringEncodingMacGujarati ;
1947             break ;
1948         case wxFONTENCODING_MACORIYA :
1949             enc = kCFStringEncodingMacOriya ;
1950             break ;
1951         case wxFONTENCODING_MACBENGALI :
1952             enc = kCFStringEncodingMacBengali ;
1953             break ;
1954         case wxFONTENCODING_MACTAMIL :
1955             enc = kCFStringEncodingMacTamil ;
1956             break ;
1957         case wxFONTENCODING_MACTELUGU :
1958             enc = kCFStringEncodingMacTelugu ;
1959             break ;
1960         case wxFONTENCODING_MACKANNADA :
1961             enc = kCFStringEncodingMacKannada ;
1962             break ;
1963         case wxFONTENCODING_MACMALAJALAM :
1964             enc = kCFStringEncodingMacMalayalam ;
1965             break ;
1966         case wxFONTENCODING_MACSINHALESE :
1967             enc = kCFStringEncodingMacSinhalese ;
1968             break ;
1969         case wxFONTENCODING_MACBURMESE :
1970             enc = kCFStringEncodingMacBurmese ;
1971             break ;
1972         case wxFONTENCODING_MACKHMER :
1973             enc = kCFStringEncodingMacKhmer ;
1974             break ;
1975         case wxFONTENCODING_MACTHAI :
1976             enc = kCFStringEncodingMacThai ;
1977             break ;
1978         case wxFONTENCODING_MACLAOTIAN :
1979             enc = kCFStringEncodingMacLaotian ;
1980             break ;
1981         case wxFONTENCODING_MACGEORGIAN :
1982             enc = kCFStringEncodingMacGeorgian ;
1983             break ;
1984         case wxFONTENCODING_MACARMENIAN :
1985             enc = kCFStringEncodingMacArmenian ;
1986             break ;
1987         case wxFONTENCODING_MACCHINESESIMP :
1988             enc = kCFStringEncodingMacChineseSimp ;
1989             break ;
1990         case wxFONTENCODING_MACTIBETAN :
1991             enc = kCFStringEncodingMacTibetan ;
1992             break ;
1993         case wxFONTENCODING_MACMONGOLIAN :
1994             enc = kCFStringEncodingMacMongolian ;
1995             break ;
1996         case wxFONTENCODING_MACETHIOPIC :
1997             enc = kCFStringEncodingMacEthiopic ;
1998             break ;
1999         case wxFONTENCODING_MACCENTRALEUR :
2000             enc = kCFStringEncodingMacCentralEurRoman ;
2001             break ;
2002         case wxFONTENCODING_MACVIATNAMESE :
2003             enc = kCFStringEncodingMacVietnamese ;
2004             break ;
2005         case wxFONTENCODING_MACARABICEXT :
2006             enc = kCFStringEncodingMacExtArabic ;
2007             break ;
2008         case wxFONTENCODING_MACSYMBOL :
2009             enc = kCFStringEncodingMacSymbol ;
2010             break ;
2011         case wxFONTENCODING_MACDINGBATS :
2012             enc = kCFStringEncodingMacDingbats ;
2013             break ;
2014         case wxFONTENCODING_MACTURKISH :
2015             enc = kCFStringEncodingMacTurkish ;
2016             break ;
2017         case wxFONTENCODING_MACCROATIAN :
2018             enc = kCFStringEncodingMacCroatian ;
2019             break ;
2020         case wxFONTENCODING_MACICELANDIC :
2021             enc = kCFStringEncodingMacIcelandic ;
2022             break ;
2023         case wxFONTENCODING_MACROMANIAN :
2024             enc = kCFStringEncodingMacRomanian ;
2025             break ;
2026         case wxFONTENCODING_MACCELTIC :
2027             enc = kCFStringEncodingMacCeltic ;
2028             break ;
2029         case wxFONTENCODING_MACGAELIC :
2030             enc = kCFStringEncodingMacGaelic ;
2031             break ;
2032 //      case wxFONTENCODING_MACKEYBOARD :
2033 //          enc = kCFStringEncodingMacKeyboardGlyphs ;
2034 //          break ;
2035         default :
2036             // because gcc is picky
2037             break ;
2038     } ;
2039     return enc ;
2040 }
2041
2042 class wxMBConv_cocoa : public wxMBConv
2043 {
2044 public:
2045     wxMBConv_cocoa()
2046     {
2047         Init(CFStringGetSystemEncoding()) ;
2048     }
2049
2050 #if wxUSE_FONTMAP
2051     wxMBConv_cocoa(const wxChar* name)
2052     {
2053         Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2054     }
2055 #endif
2056
2057     wxMBConv_cocoa(wxFontEncoding encoding)
2058     {
2059         Init( wxCFStringEncFromFontEnc(encoding) );
2060     }
2061
2062     ~wxMBConv_cocoa()
2063     {
2064     }
2065
2066     void Init( CFStringEncoding encoding)
2067     {
2068         m_encoding = encoding ;
2069     }
2070
2071     size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2072     {
2073         wxASSERT(szUnConv);
2074
2075         CFStringRef theString = CFStringCreateWithBytes (
2076                                                 NULL, //the allocator
2077                                                 (const UInt8*)szUnConv,
2078                                                 strlen(szUnConv),
2079                                                 m_encoding,
2080                                                 false //no BOM/external representation
2081                                                 );
2082
2083         wxASSERT(theString);
2084
2085         size_t nOutLength = CFStringGetLength(theString);
2086
2087         if (szOut == NULL)
2088         {
2089             CFRelease(theString);
2090             return nOutLength;
2091         }
2092
2093         CFRange theRange = { 0, nOutSize };
2094
2095 #if SIZEOF_WCHAR_T == 4
2096         UniChar* szUniCharBuffer = new UniChar[nOutSize];
2097 #endif
2098
2099         CFStringGetCharacters(theString, theRange, szUniCharBuffer);
2100
2101         CFRelease(theString);
2102
2103         szUniCharBuffer[nOutLength] = '\0' ;
2104
2105 #if SIZEOF_WCHAR_T == 4
2106         wxMBConvUTF16 converter ;
2107         converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
2108         delete[] szUniCharBuffer;
2109 #endif
2110
2111         return nOutLength;
2112     }
2113
2114     size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2115     {
2116         wxASSERT(szUnConv);
2117
2118         size_t nRealOutSize;
2119         size_t nBufSize = wxWcslen(szUnConv);
2120         UniChar* szUniBuffer = (UniChar*) szUnConv;
2121
2122 #if SIZEOF_WCHAR_T == 4
2123         wxMBConvUTF16BE converter ;
2124         nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
2125         szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
2126         converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
2127         nBufSize /= sizeof(UniChar);
2128 #endif
2129
2130         CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2131                                 NULL, //allocator
2132                                 szUniBuffer,
2133                                 nBufSize,
2134                                 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
2135                             );
2136
2137         wxASSERT(theString);
2138
2139         //Note that CER puts a BOM when converting to unicode
2140         //so we  check and use getchars instead in that case
2141         if (m_encoding == kCFStringEncodingUnicode)
2142         {
2143             if (szOut != NULL)
2144                 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
2145
2146             nRealOutSize = CFStringGetLength(theString) + 1;
2147         }
2148         else
2149         {
2150             CFStringGetBytes(
2151                 theString,
2152                 CFRangeMake(0, CFStringGetLength(theString)),
2153                 m_encoding,
2154                 0, //what to put in characters that can't be converted -
2155                     //0 tells CFString to return NULL if it meets such a character
2156                 false, //not an external representation
2157                 (UInt8*) szOut,
2158                 nOutSize,
2159                 (CFIndex*) &nRealOutSize
2160                         );
2161         }
2162
2163         CFRelease(theString);
2164
2165 #if SIZEOF_WCHAR_T == 4
2166         delete[] szUniBuffer;
2167 #endif
2168
2169         return  nRealOutSize - 1;
2170     }
2171
2172     bool IsOk() const
2173     {
2174         return m_encoding != kCFStringEncodingInvalidId &&
2175               CFStringIsEncodingAvailable(m_encoding);
2176     }
2177
2178 private:
2179     CFStringEncoding m_encoding ;
2180 };
2181
2182 #endif // defined(__WXCOCOA__)
2183
2184 // ============================================================================
2185 // Mac conversion classes
2186 // ============================================================================
2187
2188 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2189
2190 class wxMBConv_mac : public wxMBConv
2191 {
2192 public:
2193     wxMBConv_mac()
2194     {
2195         Init(CFStringGetSystemEncoding()) ;
2196     }
2197
2198 #if wxUSE_FONTMAP
2199     wxMBConv_mac(const wxChar* name)
2200     {
2201         Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2202     }
2203 #endif
2204
2205     wxMBConv_mac(wxFontEncoding encoding)
2206     {
2207         Init( wxMacGetSystemEncFromFontEnc(encoding) );
2208     }
2209
2210     ~wxMBConv_mac()
2211     {
2212         OSStatus status = noErr ;
2213         status = TECDisposeConverter(m_MB2WC_converter);
2214         status = TECDisposeConverter(m_WC2MB_converter);
2215     }
2216
2217
2218     void Init( TextEncodingBase encoding)
2219     {
2220         OSStatus status = noErr ;
2221         m_char_encoding = encoding ;
2222         m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2223
2224         status = TECCreateConverter(&m_MB2WC_converter,
2225                                     m_char_encoding,
2226                                     m_unicode_encoding);
2227         status = TECCreateConverter(&m_WC2MB_converter,
2228                                     m_unicode_encoding,
2229                                     m_char_encoding);
2230     }
2231
2232     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2233     {
2234         OSStatus status = noErr ;
2235         ByteCount byteOutLen ;
2236         ByteCount byteInLen = strlen(psz) ;
2237         wchar_t *tbuf = NULL ;
2238         UniChar* ubuf = NULL ;
2239         size_t res = 0 ;
2240
2241         if (buf == NULL)
2242         {
2243             //apple specs say at least 32
2244             n = wxMax( 32 , byteInLen ) ;
2245             tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2246         }
2247         ByteCount byteBufferLen = n * sizeof( UniChar ) ;
2248 #if SIZEOF_WCHAR_T == 4
2249         ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
2250 #else
2251         ubuf = (UniChar*) (buf ? buf : tbuf) ;
2252 #endif
2253         status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2254           (TextPtr) ubuf , byteBufferLen, &byteOutLen);
2255 #if SIZEOF_WCHAR_T == 4
2256         // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2257         // is not properly terminated we get random characters at the end
2258         ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
2259         wxMBConvUTF16BE converter ;
2260         res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2261         free( ubuf ) ;
2262 #else
2263         res = byteOutLen / sizeof( UniChar ) ;
2264 #endif
2265         if ( buf == NULL )
2266              free(tbuf) ;
2267
2268         if ( buf  && res < n)
2269             buf[res] = 0;
2270
2271         return res ;
2272     }
2273
2274     size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2275     {
2276         OSStatus status = noErr ;
2277         ByteCount byteOutLen ;
2278         ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2279
2280         char *tbuf = NULL ;
2281
2282         if (buf == NULL)
2283         {
2284             //apple specs say at least 32
2285             n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2286             tbuf = (char*) malloc( n ) ;
2287         }
2288
2289         ByteCount byteBufferLen = n ;
2290         UniChar* ubuf = NULL ;
2291 #if SIZEOF_WCHAR_T == 4
2292         wxMBConvUTF16BE converter ;
2293         size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2294         byteInLen = unicharlen ;
2295         ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2296         converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
2297 #else
2298         ubuf = (UniChar*) psz ;
2299 #endif
2300         status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2301             (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
2302 #if SIZEOF_WCHAR_T == 4
2303         free( ubuf ) ;
2304 #endif
2305         if ( buf == NULL )
2306             free(tbuf) ;
2307
2308         size_t res = byteOutLen ;
2309         if ( buf  && res < n)
2310         {
2311             buf[res] = 0;
2312
2313             //we need to double-trip to verify it didn't insert any ? in place
2314             //of bogus characters
2315             wxWCharBuffer wcBuf(n);
2316             size_t pszlen = wxWcslen(psz);
2317             if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
2318                         wxWcslen(wcBuf) != pszlen ||
2319                         memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2320             {
2321                 // we didn't obtain the same thing we started from, hence
2322                 // the conversion was lossy and we consider that it failed
2323                 return (size_t)-1;
2324             }
2325         }
2326
2327         return res ;
2328     }
2329
2330     bool IsOk() const
2331         { return m_MB2WC_converter !=  NULL && m_WC2MB_converter != NULL  ; }
2332
2333 private:
2334     TECObjectRef m_MB2WC_converter ;
2335     TECObjectRef m_WC2MB_converter ;
2336
2337     TextEncodingBase m_char_encoding ;
2338     TextEncodingBase m_unicode_encoding ;
2339 };
2340
2341 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2342
2343 // ============================================================================
2344 // wxEncodingConverter based conversion classes
2345 // ============================================================================
2346
2347 #if wxUSE_FONTMAP
2348
2349 class wxMBConv_wxwin : public wxMBConv
2350 {
2351 private:
2352     void Init()
2353     {
2354         m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2355                w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2356     }
2357
2358 public:
2359     // temporarily just use wxEncodingConverter stuff,
2360     // so that it works while a better implementation is built
2361     wxMBConv_wxwin(const wxChar* name)
2362     {
2363         if (name)
2364             m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2365         else
2366             m_enc = wxFONTENCODING_SYSTEM;
2367
2368         Init();
2369     }
2370
2371     wxMBConv_wxwin(wxFontEncoding enc)
2372     {
2373         m_enc = enc;
2374
2375         Init();
2376     }
2377
2378     size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
2379     {
2380         size_t inbuf = strlen(psz);
2381         if (buf)
2382         {
2383             if (!m2w.Convert(psz,buf))
2384                 return (size_t)-1;
2385         }
2386         return inbuf;
2387     }
2388
2389     size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
2390     {
2391         const size_t inbuf = wxWcslen(psz);
2392         if (buf)
2393         {
2394             if (!w2m.Convert(psz,buf))
2395                 return (size_t)-1;
2396         }
2397
2398         return inbuf;
2399     }
2400
2401     bool IsOk() const { return m_ok; }
2402
2403 public:
2404     wxFontEncoding m_enc;
2405     wxEncodingConverter m2w, w2m;
2406
2407     // were we initialized successfully?
2408     bool m_ok;
2409
2410     DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
2411 };
2412
2413 #endif // wxUSE_FONTMAP
2414
2415 // ============================================================================
2416 // wxCSConv implementation
2417 // ============================================================================
2418
2419 void wxCSConv::Init()
2420 {
2421     m_name = NULL;
2422     m_convReal =  NULL;
2423     m_deferred = true;
2424 }
2425
2426 wxCSConv::wxCSConv(const wxChar *charset)
2427 {
2428     Init();
2429
2430     if ( charset )
2431     {
2432         SetName(charset);
2433     }
2434
2435     m_encoding = wxFONTENCODING_SYSTEM;
2436 }
2437
2438 wxCSConv::wxCSConv(wxFontEncoding encoding)
2439 {
2440     if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
2441     {
2442         wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2443
2444         encoding = wxFONTENCODING_SYSTEM;
2445     }
2446
2447     Init();
2448
2449     m_encoding = encoding;
2450 }
2451
2452 wxCSConv::~wxCSConv()
2453 {
2454     Clear();
2455 }
2456
2457 wxCSConv::wxCSConv(const wxCSConv& conv)
2458         : wxMBConv()
2459 {
2460     Init();
2461
2462     SetName(conv.m_name);
2463     m_encoding = conv.m_encoding;
2464 }
2465
2466 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2467 {
2468     Clear();
2469
2470     SetName(conv.m_name);
2471     m_encoding = conv.m_encoding;
2472
2473     return *this;
2474 }
2475
2476 void wxCSConv::Clear()
2477 {
2478     free(m_name);
2479     delete m_convReal;
2480
2481     m_name = NULL;
2482     m_convReal = NULL;
2483 }
2484
2485 void wxCSConv::SetName(const wxChar *charset)
2486 {
2487     if (charset)
2488     {
2489         m_name = wxStrdup(charset);
2490         m_deferred = true;
2491     }
2492 }
2493
2494 wxMBConv *wxCSConv::DoCreate() const
2495 {
2496     // check for the special case of ASCII or ISO8859-1 charset: as we have
2497     // special knowledge of it anyhow, we don't need to create a special
2498     // conversion object
2499     if ( m_encoding == wxFONTENCODING_ISO8859_1 )
2500     {
2501         // don't convert at all
2502         return NULL;
2503     }
2504
2505     // we trust OS to do conversion better than we can so try external
2506     // conversion methods first
2507     //
2508     // the full order is:
2509     //      1. OS conversion (iconv() under Unix or Win32 API)
2510     //      2. hard coded conversions for UTF
2511     //      3. wxEncodingConverter as fall back
2512
2513     // step (1)
2514 #ifdef HAVE_ICONV
2515 #if !wxUSE_FONTMAP
2516     if ( m_name )
2517 #endif // !wxUSE_FONTMAP
2518     {
2519         wxString name(m_name);
2520
2521 #if wxUSE_FONTMAP
2522         if ( name.empty() )
2523             name = wxFontMapperBase::Get()->GetEncodingName(m_encoding);
2524 #endif // wxUSE_FONTMAP
2525
2526         wxMBConv_iconv *conv = new wxMBConv_iconv(name);
2527         if ( conv->IsOk() )
2528             return conv;
2529
2530         delete conv;
2531     }
2532 #endif // HAVE_ICONV
2533
2534 #ifdef wxHAVE_WIN32_MB2WC
2535     {
2536 #if wxUSE_FONTMAP
2537         wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2538                                       : new wxMBConv_win32(m_encoding);
2539         if ( conv->IsOk() )
2540             return conv;
2541
2542         delete conv;
2543 #else
2544         return NULL;
2545 #endif
2546     }
2547 #endif // wxHAVE_WIN32_MB2WC
2548 #if defined(__WXMAC__)
2549     {
2550         // leave UTF16 and UTF32 to the built-ins of wx
2551         if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
2552             ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
2553         {
2554
2555 #if wxUSE_FONTMAP
2556             wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2557                                         : new wxMBConv_mac(m_encoding);
2558 #else
2559             wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
2560 #endif
2561             if ( conv->IsOk() )
2562                  return conv;
2563
2564             delete conv;
2565         }
2566     }
2567 #endif
2568 #if defined(__WXCOCOA__)
2569     {
2570         if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2571         {
2572
2573 #if wxUSE_FONTMAP
2574             wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2575                                           : new wxMBConv_cocoa(m_encoding);
2576 #else
2577             wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
2578 #endif
2579             if ( conv->IsOk() )
2580                  return conv;
2581
2582             delete conv;
2583         }
2584     }
2585 #endif
2586     // step (2)
2587     wxFontEncoding enc = m_encoding;
2588 #if wxUSE_FONTMAP
2589     if ( enc == wxFONTENCODING_SYSTEM && m_name )
2590     {
2591         // use "false" to suppress interactive dialogs -- we can be called from
2592         // anywhere and popping up a dialog from here is the last thing we want to
2593         // do
2594         enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
2595     }
2596 #endif // wxUSE_FONTMAP
2597
2598     switch ( enc )
2599     {
2600         case wxFONTENCODING_UTF7:
2601              return new wxMBConvUTF7;
2602
2603         case wxFONTENCODING_UTF8:
2604              return new wxMBConvUTF8;
2605
2606         case wxFONTENCODING_UTF16BE:
2607              return new wxMBConvUTF16BE;
2608
2609         case wxFONTENCODING_UTF16LE:
2610              return new wxMBConvUTF16LE;
2611
2612         case wxFONTENCODING_UTF32BE:
2613              return new wxMBConvUTF32BE;
2614
2615         case wxFONTENCODING_UTF32LE:
2616              return new wxMBConvUTF32LE;
2617
2618         default:
2619              // nothing to do but put here to suppress gcc warnings
2620              ;
2621     }
2622
2623     // step (3)
2624 #if wxUSE_FONTMAP
2625     {
2626         wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2627                                       : new wxMBConv_wxwin(m_encoding);
2628         if ( conv->IsOk() )
2629             return conv;
2630
2631         delete conv;
2632     }
2633 #endif // wxUSE_FONTMAP
2634
2635     // NB: This is a hack to prevent deadlock. What could otherwise happen
2636     //     in Unicode build: wxConvLocal creation ends up being here
2637     //     because of some failure and logs the error. But wxLog will try to
2638     //     attach timestamp, for which it will need wxConvLocal (to convert
2639     //     time to char* and then wchar_t*), but that fails, tries to log
2640     //     error, but wxLog has a (already locked) critical section that
2641     //     guards static buffer.
2642     static bool alreadyLoggingError = false;
2643     if (!alreadyLoggingError)
2644     {
2645         alreadyLoggingError = true;
2646         wxLogError(_("Cannot convert from the charset '%s'!"),
2647                    m_name ? m_name
2648                       :
2649 #if wxUSE_FONTMAP
2650                          wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
2651 #else // !wxUSE_FONTMAP
2652                          wxString::Format(_("encoding %s"), m_encoding).c_str()
2653 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2654               );
2655         alreadyLoggingError = false;
2656     }
2657
2658     return NULL;
2659 }
2660
2661 void wxCSConv::CreateConvIfNeeded() const
2662 {
2663     if ( m_deferred )
2664     {
2665         wxCSConv *self = (wxCSConv *)this; // const_cast
2666
2667 #if wxUSE_INTL
2668         // if we don't have neither the name nor the encoding, use the default
2669         // encoding for this system
2670         if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2671         {
2672             self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
2673         }
2674 #endif // wxUSE_INTL
2675
2676         self->m_convReal = DoCreate();
2677         self->m_deferred = false;
2678     }
2679 }
2680
2681 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2682 {
2683     CreateConvIfNeeded();
2684
2685     if (m_convReal)
2686         return m_convReal->MB2WC(buf, psz, n);
2687
2688     // latin-1 (direct)
2689     size_t len = strlen(psz);
2690
2691     if (buf)
2692     {
2693         for (size_t c = 0; c <= len; c++)
2694             buf[c] = (unsigned char)(psz[c]);
2695     }
2696
2697     return len;
2698 }
2699
2700 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2701 {
2702     CreateConvIfNeeded();
2703
2704     if (m_convReal)
2705         return m_convReal->WC2MB(buf, psz, n);
2706
2707     // latin-1 (direct)
2708     const size_t len = wxWcslen(psz);
2709     if (buf)
2710     {
2711         for (size_t c = 0; c <= len; c++)
2712         {
2713             if (psz[c] > 0xFF)
2714                 return (size_t)-1;
2715             buf[c] = (char)psz[c];
2716         }
2717     }
2718     else
2719     {
2720         for (size_t c = 0; c <= len; c++)
2721         {
2722             if (psz[c] > 0xFF)
2723                 return (size_t)-1;
2724         }
2725     }
2726
2727     return len;
2728 }
2729
2730 // ----------------------------------------------------------------------------
2731 // globals
2732 // ----------------------------------------------------------------------------
2733
2734 #ifdef __WINDOWS__
2735     static wxMBConv_win32 wxConvLibcObj;
2736 #elif defined(__WXMAC__) && !defined(__MACH__)
2737     static wxMBConv_mac wxConvLibcObj ;
2738 #else
2739     static wxMBConvLibc wxConvLibcObj;
2740 #endif
2741
2742 static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
2743 static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
2744 static wxMBConvUTF7 wxConvUTF7Obj;
2745 static wxMBConvUTF8 wxConvUTF8Obj;
2746
2747 WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
2748 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
2749 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
2750 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
2751 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
2752 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
2753 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
2754 #ifdef __WXOSX__
2755                                     wxConvUTF8Obj;
2756 #else
2757                                     wxConvLibcObj;
2758 #endif
2759
2760
2761 #else // !wxUSE_WCHAR_T
2762
2763 // stand-ins in absence of wchar_t
2764 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
2765                                 wxConvISO8859_1,
2766                                 wxConvLocal,
2767                                 wxConvUTF8;
2768
2769 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
2770
2771