src/common/strconv.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        strconv.cpp
   3 // Purpose:     Unicode conversion classes
   4 // Author:      Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
   5 //              Ryan Norton, Fredrik Roubert (UTF7)
   6 // Modified by:
   7 // Created:     29/01/98
   8 // RCS-ID:      $Id$
   9 // Copyright:   (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
  10 //              (c) 2000-2003 Vadim Zeitlin
  11 //              (c) 2004 Ryan Norton, Fredrik Roubert
  12 // Licence:     wxWindows licence
  13 /////////////////////////////////////////////////////////////////////////////
  14
  15 // ============================================================================
  16 // declarations
  17 // ============================================================================
  18
  19 // ----------------------------------------------------------------------------
  20 // headers
  21 // ----------------------------------------------------------------------------
  22
  23 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
  24   #pragma implementation "strconv.h"
  25 #endif
  26
  27 // For compilers that support precompilation, includes "wx.h".
  28 #include "wx/wxprec.h"
  29
  30 #ifdef __BORLANDC__
  31   #pragma hdrstop
  32 #endif
  33
  34 #ifndef WX_PRECOMP
  35     #include "wx/intl.h"
  36     #include "wx/log.h"
  37 #endif // WX_PRECOMP
  38
  39 #include "wx/strconv.h"
  40
  41 #if wxUSE_WCHAR_T
  42
  43 #ifdef __WINDOWS__
  44     #include "wx/msw/private.h"
  45     #include "wx/msw/missing.h"
  46 #endif
  47
  48 #ifndef __WXWINCE__
  49 #include <errno.h>
  50 #endif
  51
  52 #include <ctype.h>
  53 #include <string.h>
  54 #include <stdlib.h>
  55
  56 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
  57     #define wxHAVE_WIN32_MB2WC
  58 #endif // __WIN32__ but !__WXMICROWIN__
  59
  60 // ----------------------------------------------------------------------------
  61 // headers
  62 // ----------------------------------------------------------------------------
  63
  64 #ifdef __SALFORDC__
  65     #include <clib.h>
  66 #endif
  67
  68 #ifdef HAVE_ICONV
  69     #include <iconv.h>
  70     #include "wx/thread.h"
  71 #endif
  72
  73 #include "wx/encconv.h"
  74 #include "wx/fontmap.h"
  75 #include "wx/utils.h"
  76
  77 #ifdef __WXMAC__
  78 #ifndef __DARWIN__
  79 #include <ATSUnicode.h>
  80 #include <TextCommon.h>
  81 #include <TextEncodingConverter.h>
  82 #endif
  83
  84 #include  "wx/mac/private.h"  // includes mac headers
  85 #endif
  86 // ----------------------------------------------------------------------------
  87 // macros
  88 // ----------------------------------------------------------------------------
  89
  90 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
  91 #define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
  92
  93 #if SIZEOF_WCHAR_T == 4
  94     #define WC_NAME         "UCS4"
  95     #define WC_BSWAP         BSWAP_UCS4
  96     #ifdef WORDS_BIGENDIAN
  97       #define WC_NAME_BEST  "UCS-4BE"
  98     #else
  99       #define WC_NAME_BEST  "UCS-4LE"
 100     #endif
 101 #elif SIZEOF_WCHAR_T == 2
 102     #define WC_NAME         "UTF16"
 103     #define WC_BSWAP         BSWAP_UTF16
 104     #define WC_UTF16
 105     #ifdef WORDS_BIGENDIAN
 106       #define WC_NAME_BEST  "UTF-16BE"
 107     #else
 108       #define WC_NAME_BEST  "UTF-16LE"
 109     #endif
 110 #else // sizeof(wchar_t) != 2 nor 4
 111     // does this ever happen?
 112     #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
 113 #endif
 114
 115 // ============================================================================
 116 // implementation
 117 // ============================================================================
 118
 119 // ----------------------------------------------------------------------------
 120 // UTF-16 en/decoding to/from UCS-4
 121 // ----------------------------------------------------------------------------
 122
 123
 124 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
 125 {
 126     if (input<=0xffff)
 127     {
 128         if (output)
 129             *output = (wxUint16) input;
 130         return 1;
 131     }
 132     else if (input>=0x110000)
 133     {
 134         return (size_t)-1;
 135     }
 136     else
 137     {
 138         if (output)
 139         {
 140             *output++ = (wxUint16) ((input >> 10)+0xd7c0);
 141             *output = (wxUint16) ((input&0x3ff)+0xdc00);
 142         }
 143         return 2;
 144     }
 145 }
 146
 147 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
 148 {
 149     if ((*input<0xd800) || (*input>0xdfff))
 150     {
 151         output = *input;
 152         return 1;
 153     }
 154     else if ((input[1]<0xdc00) || (input[1]>0xdfff))
 155     {
 156         output = *input;
 157         return (size_t)-1;
 158     }
 159     else
 160     {
 161         output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
 162         return 2;
 163     }
 164 }
 165
 166
 167 // ----------------------------------------------------------------------------
 168 // wxMBConv
 169 // ----------------------------------------------------------------------------
 170
 171 wxMBConv::~wxMBConv()
 172 {
 173     // nothing to do here (necessary for Darwin linking probably)
 174 }
 175
 176 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
 177 {
 178     if ( psz )
 179     {
 180         // calculate the length of the buffer needed first
 181         size_t nLen = MB2WC(NULL, psz, 0);
 182         if ( nLen != (size_t)-1 )
 183         {
 184             // now do the actual conversion
 185             wxWCharBuffer buf(nLen);
 186             nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
 187             if ( nLen != (size_t)-1 )
 188             {
 189                 return buf;
 190             }
 191         }
 192     }
 193
 194     wxWCharBuffer buf((wchar_t *)NULL);
 195
 196     return buf;
 197 }
 198
 199 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
 200 {
 201     if ( pwz )
 202     {
 203         size_t nLen = WC2MB(NULL, pwz, 0);
 204         if ( nLen != (size_t)-1 )
 205         {
 206             wxCharBuffer buf(nLen+3);       // space for a wxUint32 trailing zero
 207             nLen = WC2MB(buf.data(), pwz, nLen + 4);
 208             if ( nLen != (size_t)-1 )
 209             {
 210                 return buf;
 211             }
 212         }
 213     }
 214
 215     wxCharBuffer buf((char *)NULL);
 216
 217     return buf;
 218 }
 219
 220 const wxWCharBuffer wxMBConv::cMB2WC(const char *szString, size_t nStringLen, size_t* pOutSize) const
 221 {
 222     wxASSERT(pOutSize != NULL);
 223
 224     const char* szEnd = szString + nStringLen + 1;
 225     const char* szPos = szString;
 226     const char* szStart = szPos;
 227
 228     size_t nActualLength = 0;
 229     size_t nCurrentSize = nStringLen; //try normal size first (should never resize?)
 230
 231     wxWCharBuffer theBuffer(nCurrentSize);
 232
 233     //Convert the string until the length() is reached, continuing the
 234     //loop every time a null character is reached
 235     while(szPos != szEnd)
 236     {
 237         wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
 238
 239         //Get the length of the current (sub)string
 240         size_t nLen = MB2WC(NULL, szPos, 0);
 241
 242         //Invalid conversion?
 243         if( nLen == (size_t)-1 )
 244         {
 245             *pOutSize = 0;
 246             theBuffer.data()[0u] = wxT('\0');
 247             return theBuffer;
 248         }
 249
 250
 251         //Increase the actual length (+1 for current null character)
 252         nActualLength += nLen + 1;
 253
 254         //if buffer too big, realloc the buffer
 255         if (nActualLength > (nCurrentSize+1))
 256         {
 257             wxWCharBuffer theNewBuffer(nCurrentSize << 1);
 258             memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize * sizeof(wchar_t));
 259             theBuffer = theNewBuffer;
 260             nCurrentSize <<= 1;
 261         }
 262
 263         //Convert the current (sub)string
 264         if ( MB2WC(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
 265         {
 266             *pOutSize = 0;
 267             theBuffer.data()[0u] = wxT('\0');
 268             return theBuffer;
 269         }
 270
 271         //Increment to next (sub)string
 272         //Note that we have to use strlen instead of nLen here
 273         //because XX2XX gives us the size of the output buffer,
 274         //which is not necessarily the length of the string
 275         szPos += strlen(szPos) + 1;
 276     }
 277
 278     //success - return actual length and the buffer
 279     *pOutSize = nActualLength;
 280     return theBuffer;
 281 }
 282
 283 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *szString, size_t nStringLen, size_t* pOutSize) const
 284 {
 285     wxASSERT(pOutSize != NULL);
 286
 287     const wchar_t* szEnd = szString + nStringLen + 1;
 288     const wchar_t* szPos = szString;
 289     const wchar_t* szStart = szPos;
 290
 291     size_t nActualLength = 0;
 292     size_t nCurrentSize = nStringLen << 2; //try * 4 first
 293
 294     wxCharBuffer theBuffer(nCurrentSize);
 295
 296     //Convert the string until the length() is reached, continuing the
 297     //loop every time a null character is reached
 298     while(szPos != szEnd)
 299     {
 300         wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
 301
 302         //Get the length of the current (sub)string
 303         size_t nLen = WC2MB(NULL, szPos, 0);
 304
 305         //Invalid conversion?
 306         if( nLen == (size_t)-1 )
 307         {
 308             *pOutSize = 0;
 309             theBuffer.data()[0u] = wxT('\0');
 310             return theBuffer;
 311         }
 312
 313         //Increase the actual length (+1 for current null character)
 314         nActualLength += nLen + 1;
 315
 316         //if buffer too big, realloc the buffer
 317         if (nActualLength > (nCurrentSize+1))
 318         {
 319             wxCharBuffer theNewBuffer(nCurrentSize << 1);
 320             memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize);
 321             theBuffer = theNewBuffer;
 322             nCurrentSize <<= 1;
 323         }
 324
 325         //Convert the current (sub)string
 326         if(WC2MB(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
 327         {
 328             *pOutSize = 0;
 329             theBuffer.data()[0u] = wxT('\0');
 330             return theBuffer;
 331         }
 332
 333         //Increment to next (sub)string
 334         //Note that we have to use wxWcslen instead of nLen here
 335         //because XX2XX gives us the size of the output buffer,
 336         //which is not necessarily the length of the string
 337         szPos += wxWcslen(szPos) + 1;
 338     }
 339
 340     //success - return actual length and the buffer
 341     *pOutSize = nActualLength;
 342     return theBuffer;
 343 }
 344
 345 // ----------------------------------------------------------------------------
 346 // wxMBConvLibc
 347 // ----------------------------------------------------------------------------
 348
 349 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 350 {
 351     return wxMB2WC(buf, psz, n);
 352 }
 353
 354 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 355 {
 356     return wxWC2MB(buf, psz, n);
 357 }
 358
 359 #ifdef __UNIX__
 360
 361 // ----------------------------------------------------------------------------
 362 // wxConvBrokenFileNames
 363 // ----------------------------------------------------------------------------
 364
 365 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar *charset)
 366 {
 367     if ( !charset || wxStricmp(charset, _T("UTF-8")) == 0
 368                   || wxStricmp(charset, _T("UTF8")) == 0  )
 369         m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
 370     else
 371         m_conv = new wxCSConv(charset);
 372 }
 373
 374 size_t
 375 wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf,
 376                              const char *psz,
 377                              size_t outputSize) const
 378 {
 379     return m_conv->MB2WC( outputBuf, psz, outputSize );
 380 }
 381
 382 size_t
 383 wxConvBrokenFileNames::WC2MB(char *outputBuf,
 384                              const wchar_t *psz,
 385                              size_t outputSize) const
 386 {
 387     return m_conv->WC2MB( outputBuf, psz, outputSize );
 388 }
 389
 390 #endif
 391
 392 // ----------------------------------------------------------------------------
 393 // UTF-7
 394 // ----------------------------------------------------------------------------
 395
 396 // Implementation (C) 2004 Fredrik Roubert
 397
 398 //
 399 // BASE64 decoding table
 400 //
 401 static const unsigned char utf7unb64[] =
 402 {
 403     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 404     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 405     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 406     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 407     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 408     0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
 409     0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
 410     0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 411     0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
 412     0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
 413     0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
 414     0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
 415     0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
 416     0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
 417     0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
 418     0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
 419     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 420     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 421     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 422     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 423     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 424     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 425     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 426     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 427     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 428     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 429     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 430     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 431     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 432     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 433     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 434     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 435 };
 436
 437 size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 438 {
 439     size_t len = 0;
 440
 441     while (*psz && ((!buf) || (len < n)))
 442     {
 443         unsigned char cc = *psz++;
 444         if (cc != '+')
 445         {
 446             // plain ASCII char
 447             if (buf)
 448                 *buf++ = cc;
 449             len++;
 450         }
 451         else if (*psz == '-')
 452         {
 453             // encoded plus sign
 454             if (buf)
 455                 *buf++ = cc;
 456             len++;
 457             psz++;
 458         }
 459         else
 460         {
 461             // BASE64 encoded string
 462             bool lsb;
 463             unsigned char c;
 464             unsigned int d, l;
 465             for (lsb = false, d = 0, l = 0;
 466                 (cc = utf7unb64[(unsigned char)*psz]) != 0xff; psz++)
 467             {
 468                 d <<= 6;
 469                 d += cc;
 470                 for (l += 6; l >= 8; lsb = !lsb)
 471                 {
 472                     c = (unsigned char)((d >> (l -= 8)) % 256);
 473                     if (lsb)
 474                     {
 475                         if (buf)
 476                             *buf++ |= c;
 477                         len ++;
 478                     }
 479                     else
 480                         if (buf)
 481                             *buf = (wchar_t)(c << 8);
 482                 }
 483             }
 484             if (*psz == '-')
 485                 psz++;
 486         }
 487     }
 488     if (buf && (len < n))
 489         *buf = 0;
 490     return len;
 491 }
 492
 493 //
 494 // BASE64 encoding table
 495 //
 496 static const unsigned char utf7enb64[] =
 497 {
 498     'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
 499     'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
 500     'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
 501     'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
 502     'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
 503     'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
 504     'w', 'x', 'y', 'z', '0', '1', '2', '3',
 505     '4', '5', '6', '7', '8', '9', '+', '/'
 506 };
 507
 508 //
 509 // UTF-7 encoding table
 510 //
 511 // 0 - Set D (directly encoded characters)
 512 // 1 - Set O (optional direct characters)
 513 // 2 - whitespace characters (optional)
 514 // 3 - special characters
 515 //
 516 static const unsigned char utf7encode[128] =
 517 {
 518     3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
 519     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 520     2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
 521     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
 522     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 523     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
 524     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 525     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
 526 };
 527
 528 size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 529 {
 530
 531
 532     size_t len = 0;
 533
 534     while (*psz && ((!buf) || (len < n)))
 535     {
 536         wchar_t cc = *psz++;
 537         if (cc < 0x80 && utf7encode[cc] < 1)
 538         {
 539             // plain ASCII char
 540             if (buf)
 541                 *buf++ = (char)cc;
 542             len++;
 543         }
 544 #ifndef WC_UTF16
 545         else if (((wxUint32)cc) > 0xffff)
 546         {
 547             // no surrogate pair generation (yet?)
 548             return (size_t)-1;
 549         }
 550 #endif
 551         else
 552         {
 553             if (buf)
 554                 *buf++ = '+';
 555             len++;
 556             if (cc != '+')
 557             {
 558                 // BASE64 encode string
 559                 unsigned int lsb, d, l;
 560                 for (d = 0, l = 0;; psz++)
 561                 {
 562                     for (lsb = 0; lsb < 2; lsb ++)
 563                     {
 564                         d <<= 8;
 565                         d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
 566
 567                         for (l += 8; l >= 6; )
 568                         {
 569                             l -= 6;
 570                             if (buf)
 571                                 *buf++ = utf7enb64[(d >> l) % 64];
 572                             len++;
 573                         }
 574                     }
 575                     cc = *psz;
 576                     if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
 577                         break;
 578                 }
 579                 if (l != 0)
 580                 {
 581                     if (buf)
 582                         *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
 583                     len++;
 584                 }
 585             }
 586             if (buf)
 587                 *buf++ = '-';
 588             len++;
 589         }
 590     }
 591     if (buf && (len < n))
 592         *buf = 0;
 593     return len;
 594 }
 595
 596 // ----------------------------------------------------------------------------
 597 // UTF-8
 598 // ----------------------------------------------------------------------------
 599
 600 static wxUint32 utf8_max[]=
 601     { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
 602
 603 // boundaries of the private use area we use to (temporarily) remap invalid
 604 // characters invalid in a UTF-8 encoded string
 605 const wxUint32 wxUnicodePUA = 0x100000;
 606 const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
 607
 608 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 609 {
 610     size_t len = 0;
 611
 612     while (*psz && ((!buf) || (len < n)))
 613     {
 614         const char *opsz = psz;
 615         bool invalid = false;
 616         unsigned char cc = *psz++, fc = cc;
 617         unsigned cnt;
 618         for (cnt = 0; fc & 0x80; cnt++)
 619             fc <<= 1;
 620         if (!cnt)
 621         {
 622             // plain ASCII char
 623             if (buf)
 624                 *buf++ = cc;
 625             len++;
 626
 627             // escape the escape character for octal escapes
 628             if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
 629                     && cc == '\\' && (!buf || len < n))
 630             {
 631                 if (buf)
 632                     *buf++ = cc;
 633                 len++;
 634             }
 635         }
 636         else
 637         {
 638             cnt--;
 639             if (!cnt)
 640             {
 641                 // invalid UTF-8 sequence
 642                 invalid = true;
 643             }
 644             else
 645             {
 646                 unsigned ocnt = cnt - 1;
 647                 wxUint32 res = cc & (0x3f >> cnt);
 648                 while (cnt--)
 649                 {
 650                     cc = *psz;
 651                     if ((cc & 0xC0) != 0x80)
 652                     {
 653                         // invalid UTF-8 sequence
 654                         invalid = true;
 655                         break;
 656                     }
 657                     psz++;
 658                     res = (res << 6) | (cc & 0x3f);
 659                 }
 660                 if (invalid || res <= utf8_max[ocnt])
 661                 {
 662                     // illegal UTF-8 encoding
 663                     invalid = true;
 664                 }
 665                 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
 666                         res >= wxUnicodePUA && res < wxUnicodePUAEnd)
 667                 {
 668                     // if one of our PUA characters turns up externally
 669                     // it must also be treated as an illegal sequence
 670                     // (a bit like you have to escape an escape character)
 671                     invalid = true;
 672                 }
 673                 else
 674                 {
 675 #ifdef WC_UTF16
 676                     // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 677                     size_t pa = encode_utf16(res, (wxUint16 *)buf);
 678                     if (pa == (size_t)-1)
 679                     {
 680                         invalid = true;
 681                     }
 682                     else
 683                     {
 684                         if (buf)
 685                             buf += pa;
 686                         len += pa;
 687                     }
 688 #else // !WC_UTF16
 689                     if (buf)
 690                         *buf++ = res;
 691                     len++;
 692 #endif // WC_UTF16/!WC_UTF16
 693                 }
 694             }
 695             if (invalid)
 696             {
 697                 if (m_options & MAP_INVALID_UTF8_TO_PUA)
 698                 {
 699                     while (opsz < psz && (!buf || len < n))
 700                     {
 701 #ifdef WC_UTF16
 702                         // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 703                         size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
 704                         wxASSERT(pa != (size_t)-1);
 705                         if (buf)
 706                             buf += pa;
 707                         opsz++;
 708                         len += pa;
 709 #else
 710                         if (buf)
 711                             *buf++ = wxUnicodePUA + (unsigned char)*opsz;
 712                         opsz++;
 713                         len++;
 714 #endif
 715                     }
 716                 }
 717                 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
 718                 {
 719                     while (opsz < psz && (!buf || len < n))
 720                     {
 721                         if ( buf && len + 3 < n )
 722                         {
 723                             unsigned char n = *opsz;
 724                             *buf++ = L'\\';
 725                             *buf++ = (wchar_t)( L'0' + n / 0100 );
 726                             *buf++ = (wchar_t)( L'0' + (n % 0100) / 010 );
 727                             *buf++ = (wchar_t)( L'0' + n % 010 );
 728                         }
 729                         opsz++;
 730                         len += 4;
 731                     }
 732                 }
 733                 else // MAP_INVALID_UTF8_NOT
 734                 {
 735                     return (size_t)-1;
 736                 }
 737             }
 738         }
 739     }
 740     if (buf && (len < n))
 741         *buf = 0;
 742     return len;
 743 }
 744
 745 static inline bool isoctal(wchar_t wch)
 746 {
 747     return L'0' <= wch && wch <= L'7';
 748 }
 749
 750 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 751 {
 752     size_t len = 0;
 753
 754     while (*psz && ((!buf) || (len < n)))
 755     {
 756         wxUint32 cc;
 757 #ifdef WC_UTF16
 758         // cast is ok for WC_UTF16
 759         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
 760         psz += (pa == (size_t)-1) ? 1 : pa;
 761 #else
 762         cc=(*psz++) & 0x7fffffff;
 763 #endif
 764
 765         if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
 766                 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
 767         {
 768             if (buf)
 769                 *buf++ = (char)(cc - wxUnicodePUA);
 770             len++;
 771         }
 772         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
 773                     && cc == L'\\' && psz[0] == L'\\' )
 774         {
 775             if (buf)
 776                 *buf++ = (char)cc;
 777             psz++;
 778             len++;
 779         }
 780         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
 781                     cc == L'\\' &&
 782                         isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
 783         {
 784             if (buf)
 785             {
 786                 *buf++ = (char) ((psz[0] - L'0')*0100 +
 787                                  (psz[1] - L'0')*010 +
 788                                  (psz[2] - L'0'));
 789             }
 790
 791             psz += 3;
 792             len++;
 793         }
 794         else
 795         {
 796             unsigned cnt;
 797             for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
 798             if (!cnt)
 799             {
 800                 // plain ASCII char
 801                 if (buf)
 802                     *buf++ = (char) cc;
 803                 len++;
 804             }
 805
 806             else
 807             {
 808                 len += cnt + 1;
 809                 if (buf)
 810                 {
 811                     *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
 812                     while (cnt--)
 813                         *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
 814                 }
 815             }
 816         }
 817     }
 818
 819     if (buf && (len<n))
 820         *buf = 0;
 821
 822     return len;
 823 }
 824
 825 // ----------------------------------------------------------------------------
 826 // UTF-16
 827 // ----------------------------------------------------------------------------
 828
 829 #ifdef WORDS_BIGENDIAN
 830     #define wxMBConvUTF16straight wxMBConvUTF16BE
 831     #define wxMBConvUTF16swap     wxMBConvUTF16LE
 832 #else
 833     #define wxMBConvUTF16swap     wxMBConvUTF16BE
 834     #define wxMBConvUTF16straight wxMBConvUTF16LE
 835 #endif
 836
 837
 838 #ifdef WC_UTF16
 839
 840 // copy 16bit MB to 16bit String
 841 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 842 {
 843     size_t len=0;
 844
 845     while (*(wxUint16*)psz && (!buf || len < n))
 846     {
 847         if (buf)
 848             *buf++ = *(wxUint16*)psz;
 849         len++;
 850
 851         psz += sizeof(wxUint16);
 852     }
 853     if (buf && len<n)   *buf=0;
 854
 855     return len;
 856 }
 857
 858
 859 // copy 16bit String to 16bit MB
 860 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 861 {
 862     size_t len=0;
 863
 864     while (*psz && (!buf || len < n))
 865     {
 866         if (buf)
 867         {
 868             *(wxUint16*)buf = *psz;
 869             buf += sizeof(wxUint16);
 870         }
 871         len += sizeof(wxUint16);
 872         psz++;
 873     }
 874     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 875
 876     return len;
 877 }
 878
 879
 880 // swap 16bit MB to 16bit String
 881 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 882 {
 883     size_t len=0;
 884
 885     while (*(wxUint16*)psz && (!buf || len < n))
 886     {
 887         if (buf)
 888         {
 889             ((char *)buf)[0] = psz[1];
 890             ((char *)buf)[1] = psz[0];
 891             buf++;
 892         }
 893         len++;
 894         psz += sizeof(wxUint16);
 895     }
 896     if (buf && len<n)   *buf=0;
 897
 898     return len;
 899 }
 900
 901
 902 // swap 16bit MB to 16bit String
 903 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 904 {
 905     size_t len=0;
 906
 907     while (*psz && (!buf || len < n))
 908     {
 909         if (buf)
 910         {
 911             *buf++ = ((char*)psz)[1];
 912             *buf++ = ((char*)psz)[0];
 913         }
 914         len += sizeof(wxUint16);
 915         psz++;
 916     }
 917     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 918
 919     return len;
 920 }
 921
 922
 923 #else // WC_UTF16
 924
 925
 926 // copy 16bit MB to 32bit String
 927 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 928 {
 929     size_t len=0;
 930
 931     while (*(wxUint16*)psz && (!buf || len < n))
 932     {
 933         wxUint32 cc;
 934         size_t pa=decode_utf16((wxUint16*)psz, cc);
 935         if (pa == (size_t)-1)
 936             return pa;
 937
 938         if (buf)
 939             *buf++ = cc;
 940         len++;
 941         psz += pa * sizeof(wxUint16);
 942     }
 943     if (buf && len<n)   *buf=0;
 944
 945     return len;
 946 }
 947
 948
 949 // copy 32bit String to 16bit MB
 950 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 951 {
 952     size_t len=0;
 953
 954     while (*psz && (!buf || len < n))
 955     {
 956         wxUint16 cc[2];
 957         size_t pa=encode_utf16(*psz, cc);
 958
 959         if (pa == (size_t)-1)
 960             return pa;
 961
 962         if (buf)
 963         {
 964             *(wxUint16*)buf = cc[0];
 965             buf += sizeof(wxUint16);
 966             if (pa > 1)
 967             {
 968                 *(wxUint16*)buf = cc[1];
 969                 buf += sizeof(wxUint16);
 970             }
 971         }
 972
 973         len += pa*sizeof(wxUint16);
 974         psz++;
 975     }
 976     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 977
 978     return len;
 979 }
 980
 981
 982 // swap 16bit MB to 32bit String
 983 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 984 {
 985     size_t len=0;
 986
 987     while (*(wxUint16*)psz && (!buf || len < n))
 988     {
 989         wxUint32 cc;
 990         char tmp[4];
 991         tmp[0]=psz[1];  tmp[1]=psz[0];
 992         tmp[2]=psz[3];  tmp[3]=psz[2];
 993
 994         size_t pa=decode_utf16((wxUint16*)tmp, cc);
 995         if (pa == (size_t)-1)
 996             return pa;
 997
 998         if (buf)
 999             *buf++ = cc;
1000
1001         len++;
1002         psz += pa * sizeof(wxUint16);
1003     }
1004     if (buf && len<n)   *buf=0;
1005
1006     return len;
1007 }
1008
1009
1010 // swap 32bit String to 16bit MB
1011 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1012 {
1013     size_t len=0;
1014
1015     while (*psz && (!buf || len < n))
1016     {
1017         wxUint16 cc[2];
1018         size_t pa=encode_utf16(*psz, cc);
1019
1020         if (pa == (size_t)-1)
1021             return pa;
1022
1023         if (buf)
1024         {
1025             *buf++ = ((char*)cc)[1];
1026             *buf++ = ((char*)cc)[0];
1027             if (pa > 1)
1028             {
1029                 *buf++ = ((char*)cc)[3];
1030                 *buf++ = ((char*)cc)[2];
1031             }
1032         }
1033
1034         len += pa*sizeof(wxUint16);
1035         psz++;
1036     }
1037     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
1038
1039     return len;
1040 }
1041
1042 #endif // WC_UTF16
1043
1044
1045 // ----------------------------------------------------------------------------
1046 // UTF-32
1047 // ----------------------------------------------------------------------------
1048
1049 #ifdef WORDS_BIGENDIAN
1050 #define wxMBConvUTF32straight  wxMBConvUTF32BE
1051 #define wxMBConvUTF32swap      wxMBConvUTF32LE
1052 #else
1053 #define wxMBConvUTF32swap      wxMBConvUTF32BE
1054 #define wxMBConvUTF32straight  wxMBConvUTF32LE
1055 #endif
1056
1057
1058 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1059 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1060
1061
1062 #ifdef WC_UTF16
1063
1064 // copy 32bit MB to 16bit String
1065 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1066 {
1067     size_t len=0;
1068
1069     while (*(wxUint32*)psz && (!buf || len < n))
1070     {
1071         wxUint16 cc[2];
1072
1073         size_t pa=encode_utf16(*(wxUint32*)psz, cc);
1074         if (pa == (size_t)-1)
1075             return pa;
1076
1077         if (buf)
1078         {
1079             *buf++ = cc[0];
1080             if (pa > 1)
1081                 *buf++ = cc[1];
1082         }
1083         len += pa;
1084         psz += sizeof(wxUint32);
1085     }
1086     if (buf && len<n)   *buf=0;
1087
1088     return len;
1089 }
1090
1091
1092 // copy 16bit String to 32bit MB
1093 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1094 {
1095     size_t len=0;
1096
1097     while (*psz && (!buf || len < n))
1098     {
1099         wxUint32 cc;
1100
1101         // cast is ok for WC_UTF16
1102         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
1103         if (pa == (size_t)-1)
1104             return pa;
1105
1106         if (buf)
1107         {
1108             *(wxUint32*)buf = cc;
1109             buf += sizeof(wxUint32);
1110         }
1111         len += sizeof(wxUint32);
1112         psz += pa;
1113     }
1114
1115     if (buf && len<=n-sizeof(wxUint32))
1116         *(wxUint32*)buf=0;
1117
1118     return len;
1119 }
1120
1121
1122
1123 // swap 32bit MB to 16bit String
1124 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1125 {
1126     size_t len=0;
1127
1128     while (*(wxUint32*)psz && (!buf || len < n))
1129     {
1130         char tmp[4];
1131         tmp[0] = psz[3];   tmp[1] = psz[2];
1132         tmp[2] = psz[1];   tmp[3] = psz[0];
1133
1134
1135         wxUint16 cc[2];
1136
1137         size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
1138         if (pa == (size_t)-1)
1139             return pa;
1140
1141         if (buf)
1142         {
1143             *buf++ = cc[0];
1144             if (pa > 1)
1145                 *buf++ = cc[1];
1146         }
1147         len += pa;
1148         psz += sizeof(wxUint32);
1149     }
1150
1151     if (buf && len<n)
1152         *buf=0;
1153
1154     return len;
1155 }
1156
1157
1158 // swap 16bit String to 32bit MB
1159 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1160 {
1161     size_t len=0;
1162
1163     while (*psz && (!buf || len < n))
1164     {
1165         char cc[4];
1166
1167         // cast is ok for WC_UTF16
1168         size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
1169         if (pa == (size_t)-1)
1170             return pa;
1171
1172         if (buf)
1173         {
1174             *buf++ = cc[3];
1175             *buf++ = cc[2];
1176             *buf++ = cc[1];
1177             *buf++ = cc[0];
1178         }
1179         len += sizeof(wxUint32);
1180         psz += pa;
1181     }
1182
1183     if (buf && len<=n-sizeof(wxUint32))
1184         *(wxUint32*)buf=0;
1185
1186     return len;
1187 }
1188
1189 #else // WC_UTF16
1190
1191
1192 // copy 32bit MB to 32bit String
1193 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1194 {
1195     size_t len=0;
1196
1197     while (*(wxUint32*)psz && (!buf || len < n))
1198     {
1199         if (buf)
1200             *buf++ = *(wxUint32*)psz;
1201         len++;
1202         psz += sizeof(wxUint32);
1203     }
1204
1205     if (buf && len<n)
1206         *buf=0;
1207
1208     return len;
1209 }
1210
1211
1212 // copy 32bit String to 32bit MB
1213 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1214 {
1215     size_t len=0;
1216
1217     while (*psz && (!buf || len < n))
1218     {
1219         if (buf)
1220         {
1221             *(wxUint32*)buf = *psz;
1222             buf += sizeof(wxUint32);
1223         }
1224
1225         len += sizeof(wxUint32);
1226         psz++;
1227     }
1228
1229     if (buf && len<=n-sizeof(wxUint32))
1230         *(wxUint32*)buf=0;
1231
1232     return len;
1233 }
1234
1235
1236 // swap 32bit MB to 32bit String
1237 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1238 {
1239     size_t len=0;
1240
1241     while (*(wxUint32*)psz && (!buf || len < n))
1242     {
1243         if (buf)
1244         {
1245             ((char *)buf)[0] = psz[3];
1246             ((char *)buf)[1] = psz[2];
1247             ((char *)buf)[2] = psz[1];
1248             ((char *)buf)[3] = psz[0];
1249             buf++;
1250         }
1251         len++;
1252         psz += sizeof(wxUint32);
1253     }
1254
1255     if (buf && len<n)
1256         *buf=0;
1257
1258     return len;
1259 }
1260
1261
1262 // swap 32bit String to 32bit MB
1263 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1264 {
1265     size_t len=0;
1266
1267     while (*psz && (!buf || len < n))
1268     {
1269         if (buf)
1270         {
1271             *buf++ = ((char *)psz)[3];
1272             *buf++ = ((char *)psz)[2];
1273             *buf++ = ((char *)psz)[1];
1274             *buf++ = ((char *)psz)[0];
1275         }
1276         len += sizeof(wxUint32);
1277         psz++;
1278     }
1279
1280     if (buf && len<=n-sizeof(wxUint32))
1281         *(wxUint32*)buf=0;
1282
1283     return len;
1284 }
1285
1286
1287 #endif // WC_UTF16
1288
1289
1290 // ============================================================================
1291 // The classes doing conversion using the iconv_xxx() functions
1292 // ============================================================================
1293
1294 #ifdef HAVE_ICONV
1295
1296 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1297 //     E2BIG if output buffer is _exactly_ as big as needed. Such case is
1298 //     (unless there's yet another bug in glibc) the only case when iconv()
1299 //     returns with (size_t)-1 (which means error) and says there are 0 bytes
1300 //     left in the input buffer -- when _real_ error occurs,
1301 //     bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1302 //     iconv() failure.
1303 //     [This bug does not appear in glibc 2.2.]
1304 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1305 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1306                                      (errno != E2BIG || bufLeft != 0))
1307 #else
1308 #define ICONV_FAILED(cres, bufLeft)  (cres == (size_t)-1)
1309 #endif
1310
1311 #define ICONV_CHAR_CAST(x)  ((ICONV_CONST char **)(x))
1312
1313 // ----------------------------------------------------------------------------
1314 // wxMBConv_iconv: encapsulates an iconv character set
1315 // ----------------------------------------------------------------------------
1316
1317 class wxMBConv_iconv : public wxMBConv
1318 {
1319 public:
1320     wxMBConv_iconv(const wxChar *name);
1321     virtual ~wxMBConv_iconv();
1322
1323     virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1324     virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
1325
1326     bool IsOk() const
1327         { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
1328
1329 protected:
1330     // the iconv handlers used to translate from multibyte to wide char and in
1331     // the other direction
1332     iconv_t m2w,
1333             w2m;
1334 #if wxUSE_THREADS
1335     // guards access to m2w and w2m objects
1336     wxMutex m_iconvMutex;
1337 #endif
1338
1339 private:
1340     // the name (for iconv_open()) of a wide char charset -- if none is
1341     // available on this machine, it will remain NULL
1342     static const char *ms_wcCharsetName;
1343
1344     // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1345     // different endian-ness than the native one
1346     static bool ms_wcNeedsSwap;
1347 };
1348
1349 const char *wxMBConv_iconv::ms_wcCharsetName = NULL;
1350 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
1351
1352 wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
1353 {
1354     // Do it the hard way
1355     char cname[100];
1356     for (size_t i = 0; i < wxStrlen(name)+1; i++)
1357         cname[i] = (char) name[i];
1358
1359     // check for charset that represents wchar_t:
1360     if (ms_wcCharsetName == NULL)
1361     {
1362         ms_wcNeedsSwap = false;
1363
1364         // try charset with explicit bytesex info (e.g. "UCS-4LE"):
1365         ms_wcCharsetName = WC_NAME_BEST;
1366         m2w = iconv_open(ms_wcCharsetName, cname);
1367
1368         if (m2w == (iconv_t)-1)
1369         {
1370             // try charset w/o bytesex info (e.g. "UCS4")
1371             // and check for bytesex ourselves:
1372             ms_wcCharsetName = WC_NAME;
1373             m2w = iconv_open(ms_wcCharsetName, cname);
1374
1375             // last bet, try if it knows WCHAR_T pseudo-charset
1376             if (m2w == (iconv_t)-1)
1377             {
1378                 ms_wcCharsetName = "WCHAR_T";
1379                 m2w = iconv_open(ms_wcCharsetName, cname);
1380             }
1381
1382             if (m2w != (iconv_t)-1)
1383             {
1384                 char    buf[2], *bufPtr;
1385                 wchar_t wbuf[2], *wbufPtr;
1386                 size_t  insz, outsz;
1387                 size_t  res;
1388
1389                 buf[0] = 'A';
1390                 buf[1] = 0;
1391                 wbuf[0] = 0;
1392                 insz = 2;
1393                 outsz = SIZEOF_WCHAR_T * 2;
1394                 wbufPtr = wbuf;
1395                 bufPtr = buf;
1396
1397                 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1398                             (char**)&wbufPtr, &outsz);
1399
1400                 if (ICONV_FAILED(res, insz))
1401                 {
1402                     ms_wcCharsetName = NULL;
1403                     wxLogLastError(wxT("iconv"));
1404                     wxLogError(_("Conversion to charset '%s' doesn't work."), name);
1405                 }
1406                 else
1407                 {
1408                     ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1409                 }
1410             }
1411             else
1412             {
1413                 ms_wcCharsetName = NULL;
1414
1415                 // VS: we must not output an error here, since wxWidgets will safely
1416                 //     fall back to using wxEncodingConverter.
1417                 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
1418                 //wxLogError(
1419             }
1420         }
1421         wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
1422     }
1423     else // we already have ms_wcCharsetName
1424     {
1425         m2w = iconv_open(ms_wcCharsetName, cname);
1426     }
1427
1428     // NB: don't ever pass NULL to iconv_open(), it may crash!
1429     if ( ms_wcCharsetName )
1430     {
1431         w2m = iconv_open( cname, ms_wcCharsetName);
1432     }
1433     else
1434     {
1435         w2m = (iconv_t)-1;
1436     }
1437 }
1438
1439 wxMBConv_iconv::~wxMBConv_iconv()
1440 {
1441     if ( m2w != (iconv_t)-1 )
1442         iconv_close(m2w);
1443     if ( w2m != (iconv_t)-1 )
1444         iconv_close(w2m);
1445 }
1446
1447 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1448 {
1449 #if wxUSE_THREADS
1450     // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1451     //     Unfortunately there is a couple of global wxCSConv objects such as
1452     //     wxConvLocal that are used all over wx code, so we have to make sure
1453     //     the handle is used by at most one thread at the time. Otherwise
1454     //     only a few wx classes would be safe to use from non-main threads
1455     //     as MB<->WC conversion would fail "randomly".
1456     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1457 #endif
1458
1459     size_t inbuf = strlen(psz);
1460     size_t outbuf = n * SIZEOF_WCHAR_T;
1461     size_t res, cres;
1462     // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1463     wchar_t *bufPtr = buf;
1464     const char *pszPtr = psz;
1465
1466     if (buf)
1467     {
1468         // have destination buffer, convert there
1469         cres = iconv(m2w,
1470                      ICONV_CHAR_CAST(&pszPtr), &inbuf,
1471                      (char**)&bufPtr, &outbuf);
1472         res = n - (outbuf / SIZEOF_WCHAR_T);
1473
1474         if (ms_wcNeedsSwap)
1475         {
1476             // convert to native endianness
1477             WC_BSWAP(buf /* _not_ bufPtr */, res)
1478         }
1479
1480         // NB: iconv was given only strlen(psz) characters on input, and so
1481         //     it couldn't convert the trailing zero. Let's do it ourselves
1482         //     if there's some room left for it in the output buffer.
1483         if (res < n)
1484             buf[res] = 0;
1485     }
1486     else
1487     {
1488         // no destination buffer... convert using temp buffer
1489         // to calculate destination buffer requirement
1490         wchar_t tbuf[8];
1491         res = 0;
1492         do {
1493             bufPtr = tbuf;
1494             outbuf = 8*SIZEOF_WCHAR_T;
1495
1496             cres = iconv(m2w,
1497                          ICONV_CHAR_CAST(&pszPtr), &inbuf,
1498                          (char**)&bufPtr, &outbuf );
1499
1500             res += 8-(outbuf/SIZEOF_WCHAR_T);
1501         } while ((cres==(size_t)-1) && (errno==E2BIG));
1502     }
1503
1504     if (ICONV_FAILED(cres, inbuf))
1505     {
1506         //VS: it is ok if iconv fails, hence trace only
1507         wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1508         return (size_t)-1;
1509     }
1510
1511     return res;
1512 }
1513
1514 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1515 {
1516 #if wxUSE_THREADS
1517     // NB: explained in MB2WC
1518     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1519 #endif
1520
1521     size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
1522     size_t outbuf = n;
1523     size_t res, cres;
1524
1525     wchar_t *tmpbuf = 0;
1526
1527     if (ms_wcNeedsSwap)
1528     {
1529         // need to copy to temp buffer to switch endianness
1530         // this absolutely doesn't rock!
1531         // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1532         //  could be in read-only memory, or be accessed in some other thread)
1533         tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
1534         memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
1535         WC_BSWAP(tmpbuf, inbuf)
1536         psz=tmpbuf;
1537     }
1538
1539     if (buf)
1540     {
1541         // have destination buffer, convert there
1542         cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1543
1544         res = n-outbuf;
1545
1546         // NB: iconv was given only wcslen(psz) characters on input, and so
1547         //     it couldn't convert the trailing zero. Let's do it ourselves
1548         //     if there's some room left for it in the output buffer.
1549         if (res < n)
1550             buf[0] = 0;
1551     }
1552     else
1553     {
1554         // no destination buffer... convert using temp buffer
1555         // to calculate destination buffer requirement
1556         char tbuf[16];
1557         res = 0;
1558         do {
1559             buf = tbuf; outbuf = 16;
1560
1561             cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1562
1563             res += 16 - outbuf;
1564         } while ((cres==(size_t)-1) && (errno==E2BIG));
1565     }
1566
1567     if (ms_wcNeedsSwap)
1568     {
1569         free(tmpbuf);
1570     }
1571
1572     if (ICONV_FAILED(cres, inbuf))
1573     {
1574         //VS: it is ok if iconv fails, hence trace only
1575         wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1576         return (size_t)-1;
1577     }
1578
1579     return res;
1580 }
1581
1582 #endif // HAVE_ICONV
1583
1584
1585 // ============================================================================
1586 // Win32 conversion classes
1587 // ============================================================================
1588
1589 #ifdef wxHAVE_WIN32_MB2WC
1590
1591 // from utils.cpp
1592 #if wxUSE_FONTMAP
1593 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1594 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1595 #endif
1596
1597 class wxMBConv_win32 : public wxMBConv
1598 {
1599 public:
1600     wxMBConv_win32()
1601     {
1602         m_CodePage = CP_ACP;
1603     }
1604
1605 #if wxUSE_FONTMAP
1606     wxMBConv_win32(const wxChar* name)
1607     {
1608         m_CodePage = wxCharsetToCodepage(name);
1609     }
1610
1611     wxMBConv_win32(wxFontEncoding encoding)
1612     {
1613         m_CodePage = wxEncodingToCodepage(encoding);
1614     }
1615 #endif
1616
1617     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1618     {
1619         // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1620         // the behaviour is not compatible with the Unix version (using iconv)
1621         // and break the library itself, e.g. wxTextInputStream::NextChar()
1622         // wouldn't work if reading an incomplete MB char didn't result in an
1623         // error
1624         //
1625         // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1626         // an error (tested under Windows Server 2003) and apparently it is
1627         // done on purpose, i.e. the function accepts any input in this case
1628         // and although I'd prefer to return error on ill-formed output, our
1629         // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1630         // explicitly ill-formed according to RFC 2152) neither so we don't
1631         // even have any fallback here...
1632         int flags = m_CodePage == CP_UTF7 ? 0 : MB_ERR_INVALID_CHARS;
1633
1634         const size_t len = ::MultiByteToWideChar
1635                              (
1636                                 m_CodePage,     // code page
1637                                 flags,          // flags: fall on error
1638                                 psz,            // input string
1639                                 -1,             // its length (NUL-terminated)
1640                                 buf,            // output string
1641                                 buf ? n : 0     // size of output buffer
1642                              );
1643
1644         // note that it returns count of written chars for buf != NULL and size
1645         // of the needed buffer for buf == NULL so in either case the length of
1646         // the string (which never includes the terminating NUL) is one less
1647         return len ? len - 1 : (size_t)-1;
1648     }
1649
1650     size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
1651     {
1652         /*
1653             we have a problem here: by default, WideCharToMultiByte() may
1654             replace characters unrepresentable in the target code page with bad
1655             quality approximations such as turning "1/2" symbol (U+00BD) into
1656             "1" for the code pages which don't have it and we, obviously, want
1657             to avoid this at any price
1658
1659             the trouble is that this function does it _silently_, i.e. it won't
1660             even tell us whether it did or not... Win98/2000 and higher provide
1661             WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1662             we have to resort to a round trip, i.e. check that converting back
1663             results in the same string -- this is, of course, expensive but
1664             otherwise we simply can't be sure to not garble the data.
1665          */
1666
1667         // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1668         // it doesn't work with CJK encodings (which we test for rather roughly
1669         // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1670         // supporting it
1671         BOOL usedDef wxDUMMY_INITIALIZE(false);
1672         BOOL *pUsedDef;
1673         int flags;
1674         if ( CanUseNoBestFit() && m_CodePage < 50000 )
1675         {
1676             // it's our lucky day
1677             flags = WC_NO_BEST_FIT_CHARS;
1678             pUsedDef = &usedDef;
1679         }
1680         else // old system or unsupported encoding
1681         {
1682             flags = 0;
1683             pUsedDef = NULL;
1684         }
1685
1686         const size_t len = ::WideCharToMultiByte
1687                              (
1688                                 m_CodePage,     // code page
1689                                 flags,          // either none or no best fit
1690                                 pwz,            // input string
1691                                 -1,             // it is (wide) NUL-terminated
1692                                 buf,            // output buffer
1693                                 buf ? n : 0,    // and its size
1694                                 NULL,           // default "replacement" char
1695                                 pUsedDef        // [out] was it used?
1696                              );
1697
1698         if ( !len )
1699         {
1700             // function totally failed
1701             return (size_t)-1;
1702         }
1703
1704         // if we were really converting, check if we succeeded
1705         if ( buf )
1706         {
1707             if ( flags )
1708             {
1709                 // check if the conversion failed, i.e. if any replacements
1710                 // were done
1711                 if ( usedDef )
1712                     return (size_t)-1;
1713             }
1714             else // we must resort to double tripping...
1715             {
1716                 wxWCharBuffer wcBuf(n);
1717                 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1718                         wcscmp(wcBuf, pwz) != 0 )
1719                 {
1720                     // we didn't obtain the same thing we started from, hence
1721                     // the conversion was lossy and we consider that it failed
1722                     return (size_t)-1;
1723                 }
1724             }
1725         }
1726
1727         // see the comment above for the reason of "len - 1"
1728         return len - 1;
1729     }
1730
1731     bool IsOk() const { return m_CodePage != -1; }
1732
1733 private:
1734     static bool CanUseNoBestFit()
1735     {
1736         static int s_isWin98Or2k = -1;
1737
1738         if ( s_isWin98Or2k == -1 )
1739         {
1740             int verMaj, verMin;
1741             switch ( wxGetOsVersion(&verMaj, &verMin) )
1742             {
1743                 case wxWIN95:
1744                     s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1745                     break;
1746
1747                 case wxWINDOWS_NT:
1748                     s_isWin98Or2k = verMaj >= 5;
1749                     break;
1750
1751                 default:
1752                     // unknown, be conseravtive by default
1753                     s_isWin98Or2k = 0;
1754             }
1755
1756             wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1757         }
1758
1759         return s_isWin98Or2k == 1;
1760     }
1761
1762     long m_CodePage;
1763 };
1764
1765 #endif // wxHAVE_WIN32_MB2WC
1766
1767 // ============================================================================
1768 // Cocoa conversion classes
1769 // ============================================================================
1770
1771 #if defined(__WXCOCOA__)
1772
1773 // RN:  There is no UTF-32 support in either Core Foundation or
1774 // Cocoa.  Strangely enough, internally Core Foundation uses
1775 // UTF 32 internally quite a bit - its just not public (yet).
1776
1777 #include <CoreFoundation/CFString.h>
1778 #include <CoreFoundation/CFStringEncodingExt.h>
1779
1780 CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
1781 {
1782     CFStringEncoding enc = kCFStringEncodingInvalidId ;
1783     if ( encoding == wxFONTENCODING_DEFAULT )
1784     {
1785         enc = CFStringGetSystemEncoding();
1786     }
1787     else switch( encoding)
1788     {
1789         case wxFONTENCODING_ISO8859_1 :
1790             enc = kCFStringEncodingISOLatin1 ;
1791             break ;
1792         case wxFONTENCODING_ISO8859_2 :
1793             enc = kCFStringEncodingISOLatin2;
1794             break ;
1795         case wxFONTENCODING_ISO8859_3 :
1796             enc = kCFStringEncodingISOLatin3 ;
1797             break ;
1798         case wxFONTENCODING_ISO8859_4 :
1799             enc = kCFStringEncodingISOLatin4;
1800             break ;
1801         case wxFONTENCODING_ISO8859_5 :
1802             enc = kCFStringEncodingISOLatinCyrillic;
1803             break ;
1804         case wxFONTENCODING_ISO8859_6 :
1805             enc = kCFStringEncodingISOLatinArabic;
1806             break ;
1807         case wxFONTENCODING_ISO8859_7 :
1808             enc = kCFStringEncodingISOLatinGreek;
1809             break ;
1810         case wxFONTENCODING_ISO8859_8 :
1811             enc = kCFStringEncodingISOLatinHebrew;
1812             break ;
1813         case wxFONTENCODING_ISO8859_9 :
1814             enc = kCFStringEncodingISOLatin5;
1815             break ;
1816         case wxFONTENCODING_ISO8859_10 :
1817             enc = kCFStringEncodingISOLatin6;
1818             break ;
1819         case wxFONTENCODING_ISO8859_11 :
1820             enc = kCFStringEncodingISOLatinThai;
1821             break ;
1822         case wxFONTENCODING_ISO8859_13 :
1823             enc = kCFStringEncodingISOLatin7;
1824             break ;
1825         case wxFONTENCODING_ISO8859_14 :
1826             enc = kCFStringEncodingISOLatin8;
1827             break ;
1828         case wxFONTENCODING_ISO8859_15 :
1829             enc = kCFStringEncodingISOLatin9;
1830             break ;
1831
1832         case wxFONTENCODING_KOI8 :
1833             enc = kCFStringEncodingKOI8_R;
1834             break ;
1835         case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
1836             enc = kCFStringEncodingDOSRussian;
1837             break ;
1838
1839 //      case wxFONTENCODING_BULGARIAN :
1840 //          enc = ;
1841 //          break ;
1842
1843         case wxFONTENCODING_CP437 :
1844             enc =kCFStringEncodingDOSLatinUS ;
1845             break ;
1846         case wxFONTENCODING_CP850 :
1847             enc = kCFStringEncodingDOSLatin1;
1848             break ;
1849         case wxFONTENCODING_CP852 :
1850             enc = kCFStringEncodingDOSLatin2;
1851             break ;
1852         case wxFONTENCODING_CP855 :
1853             enc = kCFStringEncodingDOSCyrillic;
1854             break ;
1855         case wxFONTENCODING_CP866 :
1856             enc =kCFStringEncodingDOSRussian ;
1857             break ;
1858         case wxFONTENCODING_CP874 :
1859             enc = kCFStringEncodingDOSThai;
1860             break ;
1861         case wxFONTENCODING_CP932 :
1862             enc = kCFStringEncodingDOSJapanese;
1863             break ;
1864         case wxFONTENCODING_CP936 :
1865             enc =kCFStringEncodingDOSChineseSimplif ;
1866             break ;
1867         case wxFONTENCODING_CP949 :
1868             enc = kCFStringEncodingDOSKorean;
1869             break ;
1870         case wxFONTENCODING_CP950 :
1871             enc = kCFStringEncodingDOSChineseTrad;
1872             break ;
1873         case wxFONTENCODING_CP1250 :
1874             enc = kCFStringEncodingWindowsLatin2;
1875             break ;
1876         case wxFONTENCODING_CP1251 :
1877             enc =kCFStringEncodingWindowsCyrillic ;
1878             break ;
1879         case wxFONTENCODING_CP1252 :
1880             enc =kCFStringEncodingWindowsLatin1 ;
1881             break ;
1882         case wxFONTENCODING_CP1253 :
1883             enc = kCFStringEncodingWindowsGreek;
1884             break ;
1885         case wxFONTENCODING_CP1254 :
1886             enc = kCFStringEncodingWindowsLatin5;
1887             break ;
1888         case wxFONTENCODING_CP1255 :
1889             enc =kCFStringEncodingWindowsHebrew ;
1890             break ;
1891         case wxFONTENCODING_CP1256 :
1892             enc =kCFStringEncodingWindowsArabic ;
1893             break ;
1894         case wxFONTENCODING_CP1257 :
1895             enc = kCFStringEncodingWindowsBalticRim;
1896             break ;
1897 //   This only really encodes to UTF7 (if that) evidently
1898 //        case wxFONTENCODING_UTF7 :
1899 //            enc = kCFStringEncodingNonLossyASCII ;
1900 //            break ;
1901         case wxFONTENCODING_UTF8 :
1902             enc = kCFStringEncodingUTF8 ;
1903             break ;
1904         case wxFONTENCODING_EUC_JP :
1905             enc = kCFStringEncodingEUC_JP;
1906             break ;
1907         case wxFONTENCODING_UTF16 :
1908             enc = kCFStringEncodingUnicode ;
1909             break ;
1910         case wxFONTENCODING_MACROMAN :
1911             enc = kCFStringEncodingMacRoman ;
1912             break ;
1913         case wxFONTENCODING_MACJAPANESE :
1914             enc = kCFStringEncodingMacJapanese ;
1915             break ;
1916         case wxFONTENCODING_MACCHINESETRAD :
1917             enc = kCFStringEncodingMacChineseTrad ;
1918             break ;
1919         case wxFONTENCODING_MACKOREAN :
1920             enc = kCFStringEncodingMacKorean ;
1921             break ;
1922         case wxFONTENCODING_MACARABIC :
1923             enc = kCFStringEncodingMacArabic ;
1924             break ;
1925         case wxFONTENCODING_MACHEBREW :
1926             enc = kCFStringEncodingMacHebrew ;
1927             break ;
1928         case wxFONTENCODING_MACGREEK :
1929             enc = kCFStringEncodingMacGreek ;
1930             break ;
1931         case wxFONTENCODING_MACCYRILLIC :
1932             enc = kCFStringEncodingMacCyrillic ;
1933             break ;
1934         case wxFONTENCODING_MACDEVANAGARI :
1935             enc = kCFStringEncodingMacDevanagari ;
1936             break ;
1937         case wxFONTENCODING_MACGURMUKHI :
1938             enc = kCFStringEncodingMacGurmukhi ;
1939             break ;
1940         case wxFONTENCODING_MACGUJARATI :
1941             enc = kCFStringEncodingMacGujarati ;
1942             break ;
1943         case wxFONTENCODING_MACORIYA :
1944             enc = kCFStringEncodingMacOriya ;
1945             break ;
1946         case wxFONTENCODING_MACBENGALI :
1947             enc = kCFStringEncodingMacBengali ;
1948             break ;
1949         case wxFONTENCODING_MACTAMIL :
1950             enc = kCFStringEncodingMacTamil ;
1951             break ;
1952         case wxFONTENCODING_MACTELUGU :
1953             enc = kCFStringEncodingMacTelugu ;
1954             break ;
1955         case wxFONTENCODING_MACKANNADA :
1956             enc = kCFStringEncodingMacKannada ;
1957             break ;
1958         case wxFONTENCODING_MACMALAJALAM :
1959             enc = kCFStringEncodingMacMalayalam ;
1960             break ;
1961         case wxFONTENCODING_MACSINHALESE :
1962             enc = kCFStringEncodingMacSinhalese ;
1963             break ;
1964         case wxFONTENCODING_MACBURMESE :
1965             enc = kCFStringEncodingMacBurmese ;
1966             break ;
1967         case wxFONTENCODING_MACKHMER :
1968             enc = kCFStringEncodingMacKhmer ;
1969             break ;
1970         case wxFONTENCODING_MACTHAI :
1971             enc = kCFStringEncodingMacThai ;
1972             break ;
1973         case wxFONTENCODING_MACLAOTIAN :
1974             enc = kCFStringEncodingMacLaotian ;
1975             break ;
1976         case wxFONTENCODING_MACGEORGIAN :
1977             enc = kCFStringEncodingMacGeorgian ;
1978             break ;
1979         case wxFONTENCODING_MACARMENIAN :
1980             enc = kCFStringEncodingMacArmenian ;
1981             break ;
1982         case wxFONTENCODING_MACCHINESESIMP :
1983             enc = kCFStringEncodingMacChineseSimp ;
1984             break ;
1985         case wxFONTENCODING_MACTIBETAN :
1986             enc = kCFStringEncodingMacTibetan ;
1987             break ;
1988         case wxFONTENCODING_MACMONGOLIAN :
1989             enc = kCFStringEncodingMacMongolian ;
1990             break ;
1991         case wxFONTENCODING_MACETHIOPIC :
1992             enc = kCFStringEncodingMacEthiopic ;
1993             break ;
1994         case wxFONTENCODING_MACCENTRALEUR :
1995             enc = kCFStringEncodingMacCentralEurRoman ;
1996             break ;
1997         case wxFONTENCODING_MACVIATNAMESE :
1998             enc = kCFStringEncodingMacVietnamese ;
1999             break ;
2000         case wxFONTENCODING_MACARABICEXT :
2001             enc = kCFStringEncodingMacExtArabic ;
2002             break ;
2003         case wxFONTENCODING_MACSYMBOL :
2004             enc = kCFStringEncodingMacSymbol ;
2005             break ;
2006         case wxFONTENCODING_MACDINGBATS :
2007             enc = kCFStringEncodingMacDingbats ;
2008             break ;
2009         case wxFONTENCODING_MACTURKISH :
2010             enc = kCFStringEncodingMacTurkish ;
2011             break ;
2012         case wxFONTENCODING_MACCROATIAN :
2013             enc = kCFStringEncodingMacCroatian ;
2014             break ;
2015         case wxFONTENCODING_MACICELANDIC :
2016             enc = kCFStringEncodingMacIcelandic ;
2017             break ;
2018         case wxFONTENCODING_MACROMANIAN :
2019             enc = kCFStringEncodingMacRomanian ;
2020             break ;
2021         case wxFONTENCODING_MACCELTIC :
2022             enc = kCFStringEncodingMacCeltic ;
2023             break ;
2024         case wxFONTENCODING_MACGAELIC :
2025             enc = kCFStringEncodingMacGaelic ;
2026             break ;
2027 //      case wxFONTENCODING_MACKEYBOARD :
2028 //          enc = kCFStringEncodingMacKeyboardGlyphs ;
2029 //          break ;
2030         default :
2031             // because gcc is picky
2032             break ;
2033     } ;
2034     return enc ;
2035 }
2036
2037 class wxMBConv_cocoa : public wxMBConv
2038 {
2039 public:
2040     wxMBConv_cocoa()
2041     {
2042         Init(CFStringGetSystemEncoding()) ;
2043     }
2044
2045 #if wxUSE_FONTMAP
2046     wxMBConv_cocoa(const wxChar* name)
2047     {
2048         Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2049     }
2050 #endif
2051
2052     wxMBConv_cocoa(wxFontEncoding encoding)
2053     {
2054         Init( wxCFStringEncFromFontEnc(encoding) );
2055     }
2056
2057     ~wxMBConv_cocoa()
2058     {
2059     }
2060
2061     void Init( CFStringEncoding encoding)
2062     {
2063         m_encoding = encoding ;
2064     }
2065
2066     size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2067     {
2068         wxASSERT(szUnConv);
2069
2070         CFStringRef theString = CFStringCreateWithBytes (
2071                                                 NULL, //the allocator
2072                                                 (const UInt8*)szUnConv,
2073                                                 strlen(szUnConv),
2074                                                 m_encoding,
2075                                                 false //no BOM/external representation
2076                                                 );
2077
2078         wxASSERT(theString);
2079
2080         size_t nOutLength = CFStringGetLength(theString);
2081
2082         if (szOut == NULL)
2083         {
2084             CFRelease(theString);
2085             return nOutLength;
2086         }
2087
2088         CFRange theRange = { 0, nOutSize };
2089
2090 #if SIZEOF_WCHAR_T == 4
2091         UniChar* szUniCharBuffer = new UniChar[nOutSize];
2092 #endif
2093
2094         CFStringGetCharacters(theString, theRange, szUniCharBuffer);
2095
2096         CFRelease(theString);
2097
2098         szUniCharBuffer[nOutLength] = '\0' ;
2099
2100 #if SIZEOF_WCHAR_T == 4
2101         wxMBConvUTF16 converter ;
2102         converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
2103         delete[] szUniCharBuffer;
2104 #endif
2105
2106         return nOutLength;
2107     }
2108
2109     size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2110     {
2111         wxASSERT(szUnConv);
2112
2113         size_t nRealOutSize;
2114         size_t nBufSize = wxWcslen(szUnConv);
2115         UniChar* szUniBuffer = (UniChar*) szUnConv;
2116
2117 #if SIZEOF_WCHAR_T == 4
2118         wxMBConvUTF16 converter ;
2119         nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
2120         szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
2121         converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
2122         nBufSize /= sizeof(UniChar);
2123 #endif
2124
2125         CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2126                                 NULL, //allocator
2127                                 szUniBuffer,
2128                                 nBufSize,
2129                                 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
2130                             );
2131
2132         wxASSERT(theString);
2133
2134         //Note that CER puts a BOM when converting to unicode
2135         //so we  check and use getchars instead in that case
2136         if (m_encoding == kCFStringEncodingUnicode)
2137         {
2138             if (szOut != NULL)
2139                 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
2140
2141             nRealOutSize = CFStringGetLength(theString) + 1;
2142         }
2143         else
2144         {
2145             CFStringGetBytes(
2146                 theString,
2147                 CFRangeMake(0, CFStringGetLength(theString)),
2148                 m_encoding,
2149                 0, //what to put in characters that can't be converted -
2150                     //0 tells CFString to return NULL if it meets such a character
2151                 false, //not an external representation
2152                 (UInt8*) szOut,
2153                 nOutSize,
2154                 (CFIndex*) &nRealOutSize
2155                         );
2156         }
2157
2158         CFRelease(theString);
2159
2160 #if SIZEOF_WCHAR_T == 4
2161         delete[] szUniBuffer;
2162 #endif
2163
2164         return  nRealOutSize - 1;
2165     }
2166
2167     bool IsOk() const
2168     {
2169         return m_encoding != kCFStringEncodingInvalidId &&
2170               CFStringIsEncodingAvailable(m_encoding);
2171     }
2172
2173 private:
2174     CFStringEncoding m_encoding ;
2175 };
2176
2177 #endif // defined(__WXCOCOA__)
2178
2179 // ============================================================================
2180 // Mac conversion classes
2181 // ============================================================================
2182
2183 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2184
2185 class wxMBConv_mac : public wxMBConv
2186 {
2187 public:
2188     wxMBConv_mac()
2189     {
2190         Init(CFStringGetSystemEncoding()) ;
2191     }
2192
2193 #if wxUSE_FONTMAP
2194     wxMBConv_mac(const wxChar* name)
2195     {
2196         Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2197     }
2198 #endif
2199
2200     wxMBConv_mac(wxFontEncoding encoding)
2201     {
2202         Init( wxMacGetSystemEncFromFontEnc(encoding) );
2203     }
2204
2205     ~wxMBConv_mac()
2206     {
2207         OSStatus status = noErr ;
2208         status = TECDisposeConverter(m_MB2WC_converter);
2209         status = TECDisposeConverter(m_WC2MB_converter);
2210     }
2211
2212
2213     void Init( TextEncodingBase encoding)
2214     {
2215         OSStatus status = noErr ;
2216         m_char_encoding = encoding ;
2217         m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2218
2219         status = TECCreateConverter(&m_MB2WC_converter,
2220                                     m_char_encoding,
2221                                     m_unicode_encoding);
2222         status = TECCreateConverter(&m_WC2MB_converter,
2223                                     m_unicode_encoding,
2224                                     m_char_encoding);
2225     }
2226
2227     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2228     {
2229         OSStatus status = noErr ;
2230         ByteCount byteOutLen ;
2231         ByteCount byteInLen = strlen(psz) ;
2232         wchar_t *tbuf = NULL ;
2233         UniChar* ubuf = NULL ;
2234         size_t res = 0 ;
2235
2236         if (buf == NULL)
2237         {
2238             //apple specs say at least 32
2239             n = wxMax( 32 , byteInLen ) ;
2240             tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2241         }
2242         ByteCount byteBufferLen = n * sizeof( UniChar ) ;
2243 #if SIZEOF_WCHAR_T == 4
2244         ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
2245 #else
2246         ubuf = (UniChar*) (buf ? buf : tbuf) ;
2247 #endif
2248         status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2249           (TextPtr) ubuf , byteBufferLen, &byteOutLen);
2250 #if SIZEOF_WCHAR_T == 4
2251         // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2252         // is not properly terminated we get random characters at the end
2253         ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
2254         wxMBConvUTF16 converter ;
2255         res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2256         free( ubuf ) ;
2257 #else
2258         res = byteOutLen / sizeof( UniChar ) ;
2259 #endif
2260         if ( buf == NULL )
2261              free(tbuf) ;
2262
2263         if ( buf  && res < n)
2264             buf[res] = 0;
2265
2266         return res ;
2267     }
2268
2269     size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2270     {
2271         OSStatus status = noErr ;
2272         ByteCount byteOutLen ;
2273         ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2274
2275         char *tbuf = NULL ;
2276
2277         if (buf == NULL)
2278         {
2279             //apple specs say at least 32
2280             n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2281             tbuf = (char*) malloc( n ) ;
2282         }
2283
2284         ByteCount byteBufferLen = n ;
2285         UniChar* ubuf = NULL ;
2286 #if SIZEOF_WCHAR_T == 4
2287         wxMBConvUTF16 converter ;
2288         size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2289         byteInLen = unicharlen ;
2290         ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2291         converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
2292 #else
2293         ubuf = (UniChar*) psz ;
2294 #endif
2295         status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2296             (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
2297 #if SIZEOF_WCHAR_T == 4
2298         free( ubuf ) ;
2299 #endif
2300         if ( buf == NULL )
2301             free(tbuf) ;
2302
2303         size_t res = byteOutLen ;
2304         if ( buf  && res < n)
2305         {
2306             buf[res] = 0;
2307
2308             //we need to double-trip to verify it didn't insert any ? in place
2309             //of bogus characters
2310             wxWCharBuffer wcBuf(n);
2311             size_t pszlen = wxWcslen(psz);
2312             if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
2313                         wxWcslen(wcBuf) != pszlen ||
2314                         memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2315             {
2316                 // we didn't obtain the same thing we started from, hence
2317                 // the conversion was lossy and we consider that it failed
2318                 return (size_t)-1;
2319             }
2320         }
2321
2322         return res ;
2323     }
2324
2325     bool IsOk() const
2326         { return m_MB2WC_converter !=  NULL && m_WC2MB_converter != NULL  ; }
2327
2328 private:
2329     TECObjectRef m_MB2WC_converter ;
2330     TECObjectRef m_WC2MB_converter ;
2331
2332     TextEncodingBase m_char_encoding ;
2333     TextEncodingBase m_unicode_encoding ;
2334 };
2335
2336 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2337
2338 // ============================================================================
2339 // wxEncodingConverter based conversion classes
2340 // ============================================================================
2341
2342 #if wxUSE_FONTMAP
2343
2344 class wxMBConv_wxwin : public wxMBConv
2345 {
2346 private:
2347     void Init()
2348     {
2349         m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2350                w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2351     }
2352
2353 public:
2354     // temporarily just use wxEncodingConverter stuff,
2355     // so that it works while a better implementation is built
2356     wxMBConv_wxwin(const wxChar* name)
2357     {
2358         if (name)
2359             m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2360         else
2361             m_enc = wxFONTENCODING_SYSTEM;
2362
2363         Init();
2364     }
2365
2366     wxMBConv_wxwin(wxFontEncoding enc)
2367     {
2368         m_enc = enc;
2369
2370         Init();
2371     }
2372
2373     size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
2374     {
2375         size_t inbuf = strlen(psz);
2376         if (buf)
2377         {
2378             if (!m2w.Convert(psz,buf))
2379                 return (size_t)-1;
2380         }
2381         return inbuf;
2382     }
2383
2384     size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
2385     {
2386         const size_t inbuf = wxWcslen(psz);
2387         if (buf)
2388         {
2389             if (!w2m.Convert(psz,buf))
2390                 return (size_t)-1;
2391         }
2392
2393         return inbuf;
2394     }
2395
2396     bool IsOk() const { return m_ok; }
2397
2398 public:
2399     wxFontEncoding m_enc;
2400     wxEncodingConverter m2w, w2m;
2401
2402     // were we initialized successfully?
2403     bool m_ok;
2404
2405     DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
2406 };
2407
2408 #endif // wxUSE_FONTMAP
2409
2410 // ============================================================================
2411 // wxCSConv implementation
2412 // ============================================================================
2413
2414 void wxCSConv::Init()
2415 {
2416     m_name = NULL;
2417     m_convReal =  NULL;
2418     m_deferred = true;
2419 }
2420
2421 wxCSConv::wxCSConv(const wxChar *charset)
2422 {
2423     Init();
2424
2425     if ( charset )
2426     {
2427         SetName(charset);
2428     }
2429
2430     m_encoding = wxFONTENCODING_SYSTEM;
2431 }
2432
2433 wxCSConv::wxCSConv(wxFontEncoding encoding)
2434 {
2435     if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
2436     {
2437         wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2438
2439         encoding = wxFONTENCODING_SYSTEM;
2440     }
2441
2442     Init();
2443
2444     m_encoding = encoding;
2445 }
2446
2447 wxCSConv::~wxCSConv()
2448 {
2449     Clear();
2450 }
2451
2452 wxCSConv::wxCSConv(const wxCSConv& conv)
2453         : wxMBConv()
2454 {
2455     Init();
2456
2457     SetName(conv.m_name);
2458     m_encoding = conv.m_encoding;
2459 }
2460
2461 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2462 {
2463     Clear();
2464
2465     SetName(conv.m_name);
2466     m_encoding = conv.m_encoding;
2467
2468     return *this;
2469 }
2470
2471 void wxCSConv::Clear()
2472 {
2473     free(m_name);
2474     delete m_convReal;
2475
2476     m_name = NULL;
2477     m_convReal = NULL;
2478 }
2479
2480 void wxCSConv::SetName(const wxChar *charset)
2481 {
2482     if (charset)
2483     {
2484         m_name = wxStrdup(charset);
2485         m_deferred = true;
2486     }
2487 }
2488
2489 wxMBConv *wxCSConv::DoCreate() const
2490 {
2491     // check for the special case of ASCII or ISO8859-1 charset: as we have
2492     // special knowledge of it anyhow, we don't need to create a special
2493     // conversion object
2494     if ( m_encoding == wxFONTENCODING_ISO8859_1 )
2495     {
2496         // don't convert at all
2497         return NULL;
2498     }
2499
2500     // we trust OS to do conversion better than we can so try external
2501     // conversion methods first
2502     //
2503     // the full order is:
2504     //      1. OS conversion (iconv() under Unix or Win32 API)
2505     //      2. hard coded conversions for UTF
2506     //      3. wxEncodingConverter as fall back
2507
2508     // step (1)
2509 #ifdef HAVE_ICONV
2510 #if !wxUSE_FONTMAP
2511     if ( m_name )
2512 #endif // !wxUSE_FONTMAP
2513     {
2514         wxString name(m_name);
2515
2516 #if wxUSE_FONTMAP
2517         if ( name.empty() )
2518             name = wxFontMapperBase::GetEncodingName(m_encoding);
2519 #endif // wxUSE_FONTMAP
2520
2521         wxMBConv_iconv *conv = new wxMBConv_iconv(name);
2522         if ( conv->IsOk() )
2523             return conv;
2524
2525         delete conv;
2526     }
2527 #endif // HAVE_ICONV
2528
2529 #ifdef wxHAVE_WIN32_MB2WC
2530     {
2531 #if wxUSE_FONTMAP
2532         wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2533                                       : new wxMBConv_win32(m_encoding);
2534         if ( conv->IsOk() )
2535             return conv;
2536
2537         delete conv;
2538 #else
2539         return NULL;
2540 #endif
2541     }
2542 #endif // wxHAVE_WIN32_MB2WC
2543 #if defined(__WXMAC__)
2544     {
2545         // leave UTF16 and UTF32 to the built-ins of wx
2546         if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
2547             ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
2548         {
2549
2550 #if wxUSE_FONTMAP
2551             wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2552                                         : new wxMBConv_mac(m_encoding);
2553 #else
2554             wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
2555 #endif
2556             if ( conv->IsOk() )
2557                  return conv;
2558
2559             delete conv;
2560         }
2561     }
2562 #endif
2563 #if defined(__WXCOCOA__)
2564     {
2565         if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2566         {
2567
2568 #if wxUSE_FONTMAP
2569             wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2570                                           : new wxMBConv_cocoa(m_encoding);
2571 #else
2572             wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
2573 #endif
2574             if ( conv->IsOk() )
2575                  return conv;
2576
2577             delete conv;
2578         }
2579     }
2580 #endif
2581     // step (2)
2582     wxFontEncoding enc = m_encoding;
2583 #if wxUSE_FONTMAP
2584     if ( enc == wxFONTENCODING_SYSTEM && m_name )
2585     {
2586         // use "false" to suppress interactive dialogs -- we can be called from
2587         // anywhere and popping up a dialog from here is the last thing we want to
2588         // do
2589         enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
2590     }
2591 #endif // wxUSE_FONTMAP
2592
2593     switch ( enc )
2594     {
2595         case wxFONTENCODING_UTF7:
2596              return new wxMBConvUTF7;
2597
2598         case wxFONTENCODING_UTF8:
2599              return new wxMBConvUTF8;
2600
2601         case wxFONTENCODING_UTF16BE:
2602              return new wxMBConvUTF16BE;
2603
2604         case wxFONTENCODING_UTF16LE:
2605              return new wxMBConvUTF16LE;
2606
2607         case wxFONTENCODING_UTF32BE:
2608              return new wxMBConvUTF32BE;
2609
2610         case wxFONTENCODING_UTF32LE:
2611              return new wxMBConvUTF32LE;
2612
2613         default:
2614              // nothing to do but put here to suppress gcc warnings
2615              ;
2616     }
2617
2618     // step (3)
2619 #if wxUSE_FONTMAP
2620     {
2621         wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2622                                       : new wxMBConv_wxwin(m_encoding);
2623         if ( conv->IsOk() )
2624             return conv;
2625
2626         delete conv;
2627     }
2628 #endif // wxUSE_FONTMAP
2629
2630     // NB: This is a hack to prevent deadlock. What could otherwise happen
2631     //     in Unicode build: wxConvLocal creation ends up being here
2632     //     because of some failure and logs the error. But wxLog will try to
2633     //     attach timestamp, for which it will need wxConvLocal (to convert
2634     //     time to char* and then wchar_t*), but that fails, tries to log
2635     //     error, but wxLog has a (already locked) critical section that
2636     //     guards static buffer.
2637     static bool alreadyLoggingError = false;
2638     if (!alreadyLoggingError)
2639     {
2640         alreadyLoggingError = true;
2641         wxLogError(_("Cannot convert from the charset '%s'!"),
2642                    m_name ? m_name
2643                       :
2644 #if wxUSE_FONTMAP
2645                          wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
2646 #else // !wxUSE_FONTMAP
2647                          wxString::Format(_("encoding %s"), m_encoding).c_str()
2648 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2649               );
2650         alreadyLoggingError = false;
2651     }
2652
2653     return NULL;
2654 }
2655
2656 void wxCSConv::CreateConvIfNeeded() const
2657 {
2658     if ( m_deferred )
2659     {
2660         wxCSConv *self = (wxCSConv *)this; // const_cast
2661
2662 #if wxUSE_INTL
2663         // if we don't have neither the name nor the encoding, use the default
2664         // encoding for this system
2665         if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2666         {
2667             self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
2668         }
2669 #endif // wxUSE_INTL
2670
2671         self->m_convReal = DoCreate();
2672         self->m_deferred = false;
2673     }
2674 }
2675
2676 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2677 {
2678     CreateConvIfNeeded();
2679
2680     if (m_convReal)
2681         return m_convReal->MB2WC(buf, psz, n);
2682
2683     // latin-1 (direct)
2684     size_t len = strlen(psz);
2685
2686     if (buf)
2687     {
2688         for (size_t c = 0; c <= len; c++)
2689             buf[c] = (unsigned char)(psz[c]);
2690     }
2691
2692     return len;
2693 }
2694
2695 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2696 {
2697     CreateConvIfNeeded();
2698
2699     if (m_convReal)
2700         return m_convReal->WC2MB(buf, psz, n);
2701
2702     // latin-1 (direct)
2703     const size_t len = wxWcslen(psz);
2704     if (buf)
2705     {
2706         for (size_t c = 0; c <= len; c++)
2707         {
2708             if (psz[c] > 0xFF)
2709                 return (size_t)-1;
2710             buf[c] = (char)psz[c];
2711         }
2712     }
2713     else
2714     {
2715         for (size_t c = 0; c <= len; c++)
2716         {
2717             if (psz[c] > 0xFF)
2718                 return (size_t)-1;
2719         }
2720     }
2721
2722     return len;
2723 }
2724
2725 // ----------------------------------------------------------------------------
2726 // globals
2727 // ----------------------------------------------------------------------------
2728
2729 #ifdef __WINDOWS__
2730     static wxMBConv_win32 wxConvLibcObj;
2731 #elif defined(__WXMAC__) && !defined(__MACH__)
2732     static wxMBConv_mac wxConvLibcObj ;
2733 #else
2734     static wxMBConvLibc wxConvLibcObj;
2735 #endif
2736
2737 static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
2738 static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
2739 static wxMBConvUTF7 wxConvUTF7Obj;
2740 static wxMBConvUTF8 wxConvUTF8Obj;
2741
2742 WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
2743 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
2744 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
2745 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
2746 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
2747 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
2748 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
2749 #ifdef __WXOSX__
2750                                     wxConvUTF8Obj;
2751 #else
2752                                     wxConvLibcObj;
2753 #endif
2754
2755
2756 #else // !wxUSE_WCHAR_T
2757
2758 // stand-ins in absence of wchar_t
2759 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
2760                                 wxConvISO8859_1,
2761                                 wxConvLocal,
2762                                 wxConvUTF8;
2763
2764 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
2765
2766