src/common/strconv.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        strconv.cpp
   3 // Purpose:     Unicode conversion classes
   4 // Author:      Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
   5 //              Ryan Norton, Fredrik Roubert (UTF7)
   6 // Modified by:
   7 // Created:     29/01/98
   8 // RCS-ID:      $Id$
   9 // Copyright:   (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
  10 //              (c) 2000-2003 Vadim Zeitlin
  11 //              (c) 2004 Ryan Norton, Fredrik Roubert
  12 // Licence:     wxWindows licence
  13 /////////////////////////////////////////////////////////////////////////////
  14
  15 // ============================================================================
  16 // declarations
  17 // ============================================================================
  18
  19 // ----------------------------------------------------------------------------
  20 // headers
  21 // ----------------------------------------------------------------------------
  22
  23 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
  24   #pragma implementation "strconv.h"
  25 #endif
  26
  27 // For compilers that support precompilation, includes "wx.h".
  28 #include "wx/wxprec.h"
  29
  30 #ifdef __BORLANDC__
  31   #pragma hdrstop
  32 #endif
  33
  34 #ifndef WX_PRECOMP
  35     #include "wx/intl.h"
  36     #include "wx/log.h"
  37 #endif // WX_PRECOMP
  38
  39 #include "wx/strconv.h"
  40
  41 #if wxUSE_WCHAR_T
  42
  43 #ifdef __WINDOWS__
  44     #include "wx/msw/private.h"
  45     #include "wx/msw/missing.h"
  46 #endif
  47
  48 #ifndef __WXWINCE__
  49 #include <errno.h>
  50 #endif
  51
  52 #include <ctype.h>
  53 #include <string.h>
  54 #include <stdlib.h>
  55
  56 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
  57     #define wxHAVE_WIN32_MB2WC
  58 #endif // __WIN32__ but !__WXMICROWIN__
  59
  60 // ----------------------------------------------------------------------------
  61 // headers
  62 // ----------------------------------------------------------------------------
  63
  64 #ifdef __SALFORDC__
  65     #include <clib.h>
  66 #endif
  67
  68 #ifdef HAVE_ICONV
  69     #include <iconv.h>
  70     #include "wx/thread.h"
  71 #endif
  72
  73 #include "wx/encconv.h"
  74 #include "wx/fontmap.h"
  75 #include "wx/utils.h"
  76
  77 #ifdef __WXMAC__
  78 #ifndef __DARWIN__
  79 #include <ATSUnicode.h>
  80 #include <TextCommon.h>
  81 #include <TextEncodingConverter.h>
  82 #endif
  83
  84 #include  "wx/mac/private.h"  // includes mac headers
  85 #endif
  86 // ----------------------------------------------------------------------------
  87 // macros
  88 // ----------------------------------------------------------------------------
  89
  90 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
  91 #define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
  92
  93 #if SIZEOF_WCHAR_T == 4
  94     #define WC_NAME         "UCS4"
  95     #define WC_BSWAP         BSWAP_UCS4
  96     #ifdef WORDS_BIGENDIAN
  97       #define WC_NAME_BEST  "UCS-4BE"
  98     #else
  99       #define WC_NAME_BEST  "UCS-4LE"
 100     #endif
 101 #elif SIZEOF_WCHAR_T == 2
 102     #define WC_NAME         "UTF16"
 103     #define WC_BSWAP         BSWAP_UTF16
 104     #define WC_UTF16
 105     #ifdef WORDS_BIGENDIAN
 106       #define WC_NAME_BEST  "UTF-16BE"
 107     #else
 108       #define WC_NAME_BEST  "UTF-16LE"
 109     #endif
 110 #else // sizeof(wchar_t) != 2 nor 4
 111     // does this ever happen?
 112     #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
 113 #endif
 114
 115 // ============================================================================
 116 // implementation
 117 // ============================================================================
 118
 119 // ----------------------------------------------------------------------------
 120 // UTF-16 en/decoding to/from UCS-4
 121 // ----------------------------------------------------------------------------
 122
 123
 124 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
 125 {
 126     if (input<=0xffff)
 127     {
 128         if (output)
 129             *output = (wxUint16) input;
 130         return 1;
 131     }
 132     else if (input>=0x110000)
 133     {
 134         return (size_t)-1;
 135     }
 136     else
 137     {
 138         if (output)
 139         {
 140             *output++ = (wxUint16) ((input >> 10)+0xd7c0);
 141             *output = (wxUint16) ((input&0x3ff)+0xdc00);
 142         }
 143         return 2;
 144     }
 145 }
 146
 147 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
 148 {
 149     if ((*input<0xd800) || (*input>0xdfff))
 150     {
 151         output = *input;
 152         return 1;
 153     }
 154     else if ((input[1]<0xdc00) || (input[1]>0xdfff))
 155     {
 156         output = *input;
 157         return (size_t)-1;
 158     }
 159     else
 160     {
 161         output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
 162         return 2;
 163     }
 164 }
 165
 166
 167 // ----------------------------------------------------------------------------
 168 // wxMBConv
 169 // ----------------------------------------------------------------------------
 170
 171 wxMBConv::~wxMBConv()
 172 {
 173     // nothing to do here (necessary for Darwin linking probably)
 174 }
 175
 176 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
 177 {
 178     if ( psz )
 179     {
 180         // calculate the length of the buffer needed first
 181         size_t nLen = MB2WC(NULL, psz, 0);
 182         if ( nLen != (size_t)-1 )
 183         {
 184             // now do the actual conversion
 185             wxWCharBuffer buf(nLen);
 186             nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
 187             if ( nLen != (size_t)-1 )
 188             {
 189                 return buf;
 190             }
 191         }
 192     }
 193
 194     wxWCharBuffer buf((wchar_t *)NULL);
 195
 196     return buf;
 197 }
 198
 199 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
 200 {
 201     if ( pwz )
 202     {
 203         size_t nLen = WC2MB(NULL, pwz, 0);
 204         if ( nLen != (size_t)-1 )
 205         {
 206             wxCharBuffer buf(nLen+3);       // space for a wxUint32 trailing zero
 207             nLen = WC2MB(buf.data(), pwz, nLen + 4);
 208             if ( nLen != (size_t)-1 )
 209             {
 210                 return buf;
 211             }
 212         }
 213     }
 214
 215     wxCharBuffer buf((char *)NULL);
 216
 217     return buf;
 218 }
 219
 220 const wxWCharBuffer wxMBConv::cMB2WC(const char *szString, size_t nStringLen, size_t* pOutSize) const
 221 {
 222     wxASSERT(pOutSize != NULL);
 223
 224     const char* szEnd = szString + nStringLen + 1;
 225     const char* szPos = szString;
 226     const char* szStart = szPos;
 227
 228     size_t nActualLength = 0;
 229     size_t nCurrentSize = nStringLen; //try normal size first (should never resize?)
 230
 231     wxWCharBuffer theBuffer(nCurrentSize);
 232
 233     //Convert the string until the length() is reached, continuing the
 234     //loop every time a null character is reached
 235     while(szPos != szEnd)
 236     {
 237         wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
 238
 239         //Get the length of the current (sub)string
 240         size_t nLen = MB2WC(NULL, szPos, 0);
 241
 242         //Invalid conversion?
 243         if( nLen == (size_t)-1 )
 244         {
 245             *pOutSize = 0;
 246             theBuffer.data()[0u] = wxT('\0');
 247             return theBuffer;
 248         }
 249
 250
 251         //Increase the actual length (+1 for current null character)
 252         nActualLength += nLen + 1;
 253
 254         //if buffer too big, realloc the buffer
 255         if (nActualLength > (nCurrentSize+1))
 256         {
 257             wxWCharBuffer theNewBuffer(nCurrentSize << 1);
 258             memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize * sizeof(wchar_t));
 259             theBuffer = theNewBuffer;
 260             nCurrentSize <<= 1;
 261         }
 262
 263         //Convert the current (sub)string
 264         if ( MB2WC(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
 265         {
 266             *pOutSize = 0;
 267             theBuffer.data()[0u] = wxT('\0');
 268             return theBuffer;
 269         }
 270
 271         //Increment to next (sub)string
 272         //Note that we have to use strlen instead of nLen here
 273         //because XX2XX gives us the size of the output buffer,
 274         //which is not necessarily the length of the string
 275         szPos += strlen(szPos) + 1;
 276     }
 277
 278     //success - return actual length and the buffer
 279     *pOutSize = nActualLength;
 280     return theBuffer;
 281 }
 282
 283 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *szString, size_t nStringLen, size_t* pOutSize) const
 284 {
 285     wxASSERT(pOutSize != NULL);
 286
 287     const wchar_t* szEnd = szString + nStringLen + 1;
 288     const wchar_t* szPos = szString;
 289     const wchar_t* szStart = szPos;
 290
 291     size_t nActualLength = 0;
 292     size_t nCurrentSize = nStringLen << 2; //try * 4 first
 293
 294     wxCharBuffer theBuffer(nCurrentSize);
 295
 296     //Convert the string until the length() is reached, continuing the
 297     //loop every time a null character is reached
 298     while(szPos != szEnd)
 299     {
 300         wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
 301
 302         //Get the length of the current (sub)string
 303         size_t nLen = WC2MB(NULL, szPos, 0);
 304
 305         //Invalid conversion?
 306         if( nLen == (size_t)-1 )
 307         {
 308             *pOutSize = 0;
 309             theBuffer.data()[0u] = wxT('\0');
 310             return theBuffer;
 311         }
 312
 313         //Increase the actual length (+1 for current null character)
 314         nActualLength += nLen + 1;
 315
 316         //if buffer too big, realloc the buffer
 317         if (nActualLength > (nCurrentSize+1))
 318         {
 319             wxCharBuffer theNewBuffer(nCurrentSize << 1);
 320             memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize);
 321             theBuffer = theNewBuffer;
 322             nCurrentSize <<= 1;
 323         }
 324
 325         //Convert the current (sub)string
 326         if(WC2MB(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
 327         {
 328             *pOutSize = 0;
 329             theBuffer.data()[0u] = wxT('\0');
 330             return theBuffer;
 331         }
 332
 333         //Increment to next (sub)string
 334         //Note that we have to use wxWcslen instead of nLen here
 335         //because XX2XX gives us the size of the output buffer,
 336         //which is not necessarily the length of the string
 337         szPos += wxWcslen(szPos) + 1;
 338     }
 339
 340     //success - return actual length and the buffer
 341     *pOutSize = nActualLength;
 342     return theBuffer;
 343 }
 344
 345 // ----------------------------------------------------------------------------
 346 // wxMBConvLibc
 347 // ----------------------------------------------------------------------------
 348
 349 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 350 {
 351     return wxMB2WC(buf, psz, n);
 352 }
 353
 354 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 355 {
 356     return wxWC2MB(buf, psz, n);
 357 }
 358
 359 #ifdef __UNIX__
 360
 361 // ----------------------------------------------------------------------------
 362 // wxConvBrokenFileNames
 363 // ----------------------------------------------------------------------------
 364
 365 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar *charset)
 366 {
 367     if ( !charset || wxStricmp(charset, _T("UTF-8")) == 0
 368                   || wxStricmp(charset, _T("UTF8")) == 0  )
 369         m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
 370     else
 371         m_conv = new wxCSConv(charset);
 372 }
 373
 374 size_t
 375 wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf,
 376                              const char *psz,
 377                              size_t outputSize) const
 378 {
 379     return m_conv->MB2WC( outputBuf, psz, outputSize );
 380 }
 381
 382 size_t
 383 wxConvBrokenFileNames::WC2MB(char *outputBuf,
 384                              const wchar_t *psz,
 385                              size_t outputSize) const
 386 {
 387     return m_conv->WC2MB( outputBuf, psz, outputSize );
 388 }
 389
 390 #endif
 391
 392 // ----------------------------------------------------------------------------
 393 // UTF-7
 394 // ----------------------------------------------------------------------------
 395
 396 // Implementation (C) 2004 Fredrik Roubert
 397
 398 //
 399 // BASE64 decoding table
 400 //
 401 static const unsigned char utf7unb64[] =
 402 {
 403     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 404     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 405     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 406     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 407     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 408     0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
 409     0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
 410     0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 411     0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
 412     0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
 413     0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
 414     0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
 415     0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
 416     0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
 417     0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
 418     0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
 419     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 420     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 421     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 422     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 423     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 424     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 425     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 426     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 427     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 428     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 429     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 430     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 431     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 432     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 433     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 434     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 435 };
 436
 437 size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 438 {
 439     size_t len = 0;
 440
 441     while (*psz && ((!buf) || (len < n)))
 442     {
 443         unsigned char cc = *psz++;
 444         if (cc != '+')
 445         {
 446             // plain ASCII char
 447             if (buf)
 448                 *buf++ = cc;
 449             len++;
 450         }
 451         else if (*psz == '-')
 452         {
 453             // encoded plus sign
 454             if (buf)
 455                 *buf++ = cc;
 456             len++;
 457             psz++;
 458         }
 459         else
 460         {
 461             // BASE64 encoded string
 462             bool lsb;
 463             unsigned char c;
 464             unsigned int d, l;
 465             for (lsb = false, d = 0, l = 0;
 466                 (cc = utf7unb64[(unsigned char)*psz]) != 0xff; psz++)
 467             {
 468                 d <<= 6;
 469                 d += cc;
 470                 for (l += 6; l >= 8; lsb = !lsb)
 471                 {
 472                     c = (unsigned char)((d >> (l -= 8)) % 256);
 473                     if (lsb)
 474                     {
 475                         if (buf)
 476                             *buf++ |= c;
 477                         len ++;
 478                     }
 479                     else
 480                         if (buf)
 481                             *buf = (wchar_t)(c << 8);
 482                 }
 483             }
 484             if (*psz == '-')
 485                 psz++;
 486         }
 487     }
 488     if (buf && (len < n))
 489         *buf = 0;
 490     return len;
 491 }
 492
 493 //
 494 // BASE64 encoding table
 495 //
 496 static const unsigned char utf7enb64[] =
 497 {
 498     'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
 499     'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
 500     'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
 501     'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
 502     'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
 503     'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
 504     'w', 'x', 'y', 'z', '0', '1', '2', '3',
 505     '4', '5', '6', '7', '8', '9', '+', '/'
 506 };
 507
 508 //
 509 // UTF-7 encoding table
 510 //
 511 // 0 - Set D (directly encoded characters)
 512 // 1 - Set O (optional direct characters)
 513 // 2 - whitespace characters (optional)
 514 // 3 - special characters
 515 //
 516 static const unsigned char utf7encode[128] =
 517 {
 518     3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
 519     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 520     2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
 521     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
 522     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 523     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
 524     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 525     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
 526 };
 527
 528 size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 529 {
 530
 531
 532     size_t len = 0;
 533
 534     while (*psz && ((!buf) || (len < n)))
 535     {
 536         wchar_t cc = *psz++;
 537         if (cc < 0x80 && utf7encode[cc] < 1)
 538         {
 539             // plain ASCII char
 540             if (buf)
 541                 *buf++ = (char)cc;
 542             len++;
 543         }
 544 #ifndef WC_UTF16
 545         else if (((wxUint32)cc) > 0xffff)
 546         {
 547             // no surrogate pair generation (yet?)
 548             return (size_t)-1;
 549         }
 550 #endif
 551         else
 552         {
 553             if (buf)
 554                 *buf++ = '+';
 555             len++;
 556             if (cc != '+')
 557             {
 558                 // BASE64 encode string
 559                 unsigned int lsb, d, l;
 560                 for (d = 0, l = 0;; psz++)
 561                 {
 562                     for (lsb = 0; lsb < 2; lsb ++)
 563                     {
 564                         d <<= 8;
 565                         d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
 566
 567                         for (l += 8; l >= 6; )
 568                         {
 569                             l -= 6;
 570                             if (buf)
 571                                 *buf++ = utf7enb64[(d >> l) % 64];
 572                             len++;
 573                         }
 574                     }
 575                     cc = *psz;
 576                     if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
 577                         break;
 578                 }
 579                 if (l != 0)
 580                 {
 581                     if (buf)
 582                         *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
 583                     len++;
 584                 }
 585             }
 586             if (buf)
 587                 *buf++ = '-';
 588             len++;
 589         }
 590     }
 591     if (buf && (len < n))
 592         *buf = 0;
 593     return len;
 594 }
 595
 596 // ----------------------------------------------------------------------------
 597 // UTF-8
 598 // ----------------------------------------------------------------------------
 599
 600 static wxUint32 utf8_max[]=
 601     { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
 602
 603 // boundaries of the private use area we use to (temporarily) remap invalid
 604 // characters invalid in a UTF-8 encoded string
 605 const wxUint32 wxUnicodePUA = 0x100000;
 606 const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
 607
 608 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 609 {
 610     size_t len = 0;
 611
 612     while (*psz && ((!buf) || (len < n)))
 613     {
 614         const char *opsz = psz;
 615         bool invalid = false;
 616         unsigned char cc = *psz++, fc = cc;
 617         unsigned cnt;
 618         for (cnt = 0; fc & 0x80; cnt++)
 619             fc <<= 1;
 620         if (!cnt)
 621         {
 622             // plain ASCII char
 623             if (buf)
 624                 *buf++ = cc;
 625             len++;
 626
 627             // escape the escape character for octal escapes
 628             if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
 629                     && cc == '\\' && (!buf || len < n))
 630             {
 631                 if (buf)
 632                     *buf++ = cc;
 633                 len++;
 634             }
 635         }
 636         else
 637         {
 638             cnt--;
 639             if (!cnt)
 640             {
 641                 // invalid UTF-8 sequence
 642                 invalid = true;
 643             }
 644             else
 645             {
 646                 unsigned ocnt = cnt - 1;
 647                 wxUint32 res = cc & (0x3f >> cnt);
 648                 while (cnt--)
 649                 {
 650                     cc = *psz;
 651                     if ((cc & 0xC0) != 0x80)
 652                     {
 653                         // invalid UTF-8 sequence
 654                         invalid = true;
 655                         break;
 656                     }
 657                     psz++;
 658                     res = (res << 6) | (cc & 0x3f);
 659                 }
 660                 if (invalid || res <= utf8_max[ocnt])
 661                 {
 662                     // illegal UTF-8 encoding
 663                     invalid = true;
 664                 }
 665                 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
 666                         res >= wxUnicodePUA && res < wxUnicodePUAEnd)
 667                 {
 668                     // if one of our PUA characters turns up externally
 669                     // it must also be treated as an illegal sequence
 670                     // (a bit like you have to escape an escape character)
 671                     invalid = true;
 672                 }
 673                 else
 674                 {
 675 #ifdef WC_UTF16
 676                     // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 677                     size_t pa = encode_utf16(res, (wxUint16 *)buf);
 678                     if (pa == (size_t)-1)
 679                     {
 680                         invalid = true;
 681                     }
 682                     else
 683                     {
 684                         if (buf)
 685                             buf += pa;
 686                         len += pa;
 687                     }
 688 #else // !WC_UTF16
 689                     if (buf)
 690                         *buf++ = res;
 691                     len++;
 692 #endif // WC_UTF16/!WC_UTF16
 693                 }
 694             }
 695             if (invalid)
 696             {
 697                 if (m_options & MAP_INVALID_UTF8_TO_PUA)
 698                 {
 699                     while (opsz < psz && (!buf || len < n))
 700                     {
 701 #ifdef WC_UTF16
 702                         // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 703                         size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
 704                         wxASSERT(pa != (size_t)-1);
 705                         if (buf)
 706                             buf += pa;
 707                         opsz++;
 708                         len += pa;
 709 #else
 710                         if (buf)
 711                             *buf++ = wxUnicodePUA + (unsigned char)*opsz;
 712                         opsz++;
 713                         len++;
 714 #endif
 715                     }
 716                 }
 717                 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
 718                 {
 719                     while (opsz < psz && (!buf || len < n))
 720                     {
 721                         if ( buf && len + 3 < n )
 722                         {
 723                             unsigned char n = *opsz;
 724                             *buf++ = L'\\';
 725                             *buf++ = (wchar_t)( L'0' + n / 0100 );
 726                             *buf++ = (wchar_t)( L'0' + (n % 0100) / 010 );
 727                             *buf++ = (wchar_t)( L'0' + n % 010 );
 728                         }
 729                         opsz++;
 730                         len += 4;
 731                     }
 732                 }
 733                 else // MAP_INVALID_UTF8_NOT
 734                 {
 735                     return (size_t)-1;
 736                 }
 737             }
 738         }
 739     }
 740     if (buf && (len < n))
 741         *buf = 0;
 742     return len;
 743 }
 744
 745 static inline bool isoctal(wchar_t wch)
 746 {
 747     return L'0' <= wch && wch <= L'7';
 748 }
 749
 750 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 751 {
 752     size_t len = 0;
 753
 754     while (*psz && ((!buf) || (len < n)))
 755     {
 756         wxUint32 cc;
 757 #ifdef WC_UTF16
 758         // cast is ok for WC_UTF16
 759         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
 760         psz += (pa == (size_t)-1) ? 1 : pa;
 761 #else
 762         cc=(*psz++) & 0x7fffffff;
 763 #endif
 764
 765         if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
 766                 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
 767         {
 768             if (buf)
 769                 *buf++ = (char)(cc - wxUnicodePUA);
 770             len++;
 771         }
 772         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
 773                     && cc == L'\\' && psz[0] == L'\\' )
 774         {
 775             if (buf)
 776                 *buf++ = (char)cc;
 777             psz++;
 778             len++;
 779         }
 780         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
 781                     cc == L'\\' &&
 782                         isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
 783         {
 784             if (buf)
 785             {
 786                 *buf++ = (char) ((psz[0] - L'0')*0100 +
 787                                  (psz[1] - L'0')*010 +
 788                                  (psz[2] - L'0'));
 789             }
 790
 791             psz += 3;
 792             len++;
 793         }
 794         else
 795         {
 796             unsigned cnt;
 797             for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
 798             if (!cnt)
 799             {
 800                 // plain ASCII char
 801                 if (buf)
 802                     *buf++ = (char) cc;
 803                 len++;
 804             }
 805
 806             else
 807             {
 808                 len += cnt + 1;
 809                 if (buf)
 810                 {
 811                     *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
 812                     while (cnt--)
 813                         *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
 814                 }
 815             }
 816         }
 817     }
 818
 819     if (buf && (len<n))
 820         *buf = 0;
 821
 822     return len;
 823 }
 824
 825 // ----------------------------------------------------------------------------
 826 // UTF-16
 827 // ----------------------------------------------------------------------------
 828
 829 #ifdef WORDS_BIGENDIAN
 830     #define wxMBConvUTF16straight wxMBConvUTF16BE
 831     #define wxMBConvUTF16swap     wxMBConvUTF16LE
 832 #else
 833     #define wxMBConvUTF16swap     wxMBConvUTF16BE
 834     #define wxMBConvUTF16straight wxMBConvUTF16LE
 835 #endif
 836
 837
 838 #ifdef WC_UTF16
 839
 840 // copy 16bit MB to 16bit String
 841 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 842 {
 843     size_t len=0;
 844
 845     while (*(wxUint16*)psz && (!buf || len < n))
 846     {
 847         if (buf)
 848             *buf++ = *(wxUint16*)psz;
 849         len++;
 850
 851         psz += sizeof(wxUint16);
 852     }
 853     if (buf && len<n)   *buf=0;
 854
 855     return len;
 856 }
 857
 858
 859 // copy 16bit String to 16bit MB
 860 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 861 {
 862     size_t len=0;
 863
 864     while (*psz && (!buf || len < n))
 865     {
 866         if (buf)
 867         {
 868             *(wxUint16*)buf = *psz;
 869             buf += sizeof(wxUint16);
 870         }
 871         len += sizeof(wxUint16);
 872         psz++;
 873     }
 874     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 875
 876     return len;
 877 }
 878
 879
 880 // swap 16bit MB to 16bit String
 881 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 882 {
 883     size_t len=0;
 884
 885     while (*(wxUint16*)psz && (!buf || len < n))
 886     {
 887         if (buf)
 888         {
 889             ((char *)buf)[0] = psz[1];
 890             ((char *)buf)[1] = psz[0];
 891             buf++;
 892         }
 893         len++;
 894         psz += sizeof(wxUint16);
 895     }
 896     if (buf && len<n)   *buf=0;
 897
 898     return len;
 899 }
 900
 901
 902 // swap 16bit MB to 16bit String
 903 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 904 {
 905     size_t len=0;
 906
 907     while (*psz && (!buf || len < n))
 908     {
 909         if (buf)
 910         {
 911             *buf++ = ((char*)psz)[1];
 912             *buf++ = ((char*)psz)[0];
 913         }
 914         len += sizeof(wxUint16);
 915         psz++;
 916     }
 917     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 918
 919     return len;
 920 }
 921
 922
 923 #else // WC_UTF16
 924
 925
 926 // copy 16bit MB to 32bit String
 927 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 928 {
 929     size_t len=0;
 930
 931     while (*(wxUint16*)psz && (!buf || len < n))
 932     {
 933         wxUint32 cc;
 934         size_t pa=decode_utf16((wxUint16*)psz, cc);
 935         if (pa == (size_t)-1)
 936             return pa;
 937
 938         if (buf)
 939             *buf++ = cc;
 940         len++;
 941         psz += pa * sizeof(wxUint16);
 942     }
 943     if (buf && len<n)   *buf=0;
 944
 945     return len;
 946 }
 947
 948
 949 // copy 32bit String to 16bit MB
 950 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 951 {
 952     size_t len=0;
 953
 954     while (*psz && (!buf || len < n))
 955     {
 956         wxUint16 cc[2];
 957         size_t pa=encode_utf16(*psz, cc);
 958
 959         if (pa == (size_t)-1)
 960             return pa;
 961
 962         if (buf)
 963         {
 964             *(wxUint16*)buf = cc[0];
 965             buf += sizeof(wxUint16);
 966             if (pa > 1)
 967             {
 968                 *(wxUint16*)buf = cc[1];
 969                 buf += sizeof(wxUint16);
 970             }
 971         }
 972
 973         len += pa*sizeof(wxUint16);
 974         psz++;
 975     }
 976     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 977
 978     return len;
 979 }
 980
 981
 982 // swap 16bit MB to 32bit String
 983 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 984 {
 985     size_t len=0;
 986
 987     while (*(wxUint16*)psz && (!buf || len < n))
 988     {
 989         wxUint32 cc;
 990         char tmp[4];
 991         tmp[0]=psz[1];  tmp[1]=psz[0];
 992         tmp[2]=psz[3];  tmp[3]=psz[2];
 993
 994         size_t pa=decode_utf16((wxUint16*)tmp, cc);
 995         if (pa == (size_t)-1)
 996             return pa;
 997
 998         if (buf)
 999             *buf++ = cc;
1000
1001         len++;
1002         psz += pa * sizeof(wxUint16);
1003     }
1004     if (buf && len<n)   *buf=0;
1005
1006     return len;
1007 }
1008
1009
1010 // swap 32bit String to 16bit MB
1011 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1012 {
1013     size_t len=0;
1014
1015     while (*psz && (!buf || len < n))
1016     {
1017         wxUint16 cc[2];
1018         size_t pa=encode_utf16(*psz, cc);
1019
1020         if (pa == (size_t)-1)
1021             return pa;
1022
1023         if (buf)
1024         {
1025             *buf++ = ((char*)cc)[1];
1026             *buf++ = ((char*)cc)[0];
1027             if (pa > 1)
1028             {
1029                 *buf++ = ((char*)cc)[3];
1030                 *buf++ = ((char*)cc)[2];
1031             }
1032         }
1033
1034         len += pa*sizeof(wxUint16);
1035         psz++;
1036     }
1037     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
1038
1039     return len;
1040 }
1041
1042 #endif // WC_UTF16
1043
1044
1045 // ----------------------------------------------------------------------------
1046 // UTF-32
1047 // ----------------------------------------------------------------------------
1048
1049 #ifdef WORDS_BIGENDIAN
1050 #define wxMBConvUTF32straight  wxMBConvUTF32BE
1051 #define wxMBConvUTF32swap      wxMBConvUTF32LE
1052 #else
1053 #define wxMBConvUTF32swap      wxMBConvUTF32BE
1054 #define wxMBConvUTF32straight  wxMBConvUTF32LE
1055 #endif
1056
1057
1058 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1059 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1060
1061
1062 #ifdef WC_UTF16
1063
1064 // copy 32bit MB to 16bit String
1065 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1066 {
1067     size_t len=0;
1068
1069     while (*(wxUint32*)psz && (!buf || len < n))
1070     {
1071         wxUint16 cc[2];
1072
1073         size_t pa=encode_utf16(*(wxUint32*)psz, cc);
1074         if (pa == (size_t)-1)
1075             return pa;
1076
1077         if (buf)
1078         {
1079             *buf++ = cc[0];
1080             if (pa > 1)
1081                 *buf++ = cc[1];
1082         }
1083         len += pa;
1084         psz += sizeof(wxUint32);
1085     }
1086     if (buf && len<n)   *buf=0;
1087
1088     return len;
1089 }
1090
1091
1092 // copy 16bit String to 32bit MB
1093 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1094 {
1095     size_t len=0;
1096
1097     while (*psz && (!buf || len < n))
1098     {
1099         wxUint32 cc;
1100
1101         // cast is ok for WC_UTF16
1102         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
1103         if (pa == (size_t)-1)
1104             return pa;
1105
1106         if (buf)
1107         {
1108             *(wxUint32*)buf = cc;
1109             buf += sizeof(wxUint32);
1110         }
1111         len += sizeof(wxUint32);
1112         psz += pa;
1113     }
1114
1115     if (buf && len<=n-sizeof(wxUint32))
1116         *(wxUint32*)buf=0;
1117
1118     return len;
1119 }
1120
1121
1122
1123 // swap 32bit MB to 16bit String
1124 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1125 {
1126     size_t len=0;
1127
1128     while (*(wxUint32*)psz && (!buf || len < n))
1129     {
1130         char tmp[4];
1131         tmp[0] = psz[3];   tmp[1] = psz[2];
1132         tmp[2] = psz[1];   tmp[3] = psz[0];
1133
1134
1135         wxUint16 cc[2];
1136
1137         size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
1138         if (pa == (size_t)-1)
1139             return pa;
1140
1141         if (buf)
1142         {
1143             *buf++ = cc[0];
1144             if (pa > 1)
1145                 *buf++ = cc[1];
1146         }
1147         len += pa;
1148         psz += sizeof(wxUint32);
1149     }
1150
1151     if (buf && len<n)
1152         *buf=0;
1153
1154     return len;
1155 }
1156
1157
1158 // swap 16bit String to 32bit MB
1159 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1160 {
1161     size_t len=0;
1162
1163     while (*psz && (!buf || len < n))
1164     {
1165         char cc[4];
1166
1167         // cast is ok for WC_UTF16
1168         size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
1169         if (pa == (size_t)-1)
1170             return pa;
1171
1172         if (buf)
1173         {
1174             *buf++ = cc[3];
1175             *buf++ = cc[2];
1176             *buf++ = cc[1];
1177             *buf++ = cc[0];
1178         }
1179         len += sizeof(wxUint32);
1180         psz += pa;
1181     }
1182
1183     if (buf && len<=n-sizeof(wxUint32))
1184         *(wxUint32*)buf=0;
1185
1186     return len;
1187 }
1188
1189 #else // WC_UTF16
1190
1191
1192 // copy 32bit MB to 32bit String
1193 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1194 {
1195     size_t len=0;
1196
1197     while (*(wxUint32*)psz && (!buf || len < n))
1198     {
1199         if (buf)
1200             *buf++ = *(wxUint32*)psz;
1201         len++;
1202         psz += sizeof(wxUint32);
1203     }
1204
1205     if (buf && len<n)
1206         *buf=0;
1207
1208     return len;
1209 }
1210
1211
1212 // copy 32bit String to 32bit MB
1213 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1214 {
1215     size_t len=0;
1216
1217     while (*psz && (!buf || len < n))
1218     {
1219         if (buf)
1220         {
1221             *(wxUint32*)buf = *psz;
1222             buf += sizeof(wxUint32);
1223         }
1224
1225         len += sizeof(wxUint32);
1226         psz++;
1227     }
1228
1229     if (buf && len<=n-sizeof(wxUint32))
1230         *(wxUint32*)buf=0;
1231
1232     return len;
1233 }
1234
1235
1236 // swap 32bit MB to 32bit String
1237 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1238 {
1239     size_t len=0;
1240
1241     while (*(wxUint32*)psz && (!buf || len < n))
1242     {
1243         if (buf)
1244         {
1245             ((char *)buf)[0] = psz[3];
1246             ((char *)buf)[1] = psz[2];
1247             ((char *)buf)[2] = psz[1];
1248             ((char *)buf)[3] = psz[0];
1249             buf++;
1250         }
1251         len++;
1252         psz += sizeof(wxUint32);
1253     }
1254
1255     if (buf && len<n)
1256         *buf=0;
1257
1258     return len;
1259 }
1260
1261
1262 // swap 32bit String to 32bit MB
1263 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1264 {
1265     size_t len=0;
1266
1267     while (*psz && (!buf || len < n))
1268     {
1269         if (buf)
1270         {
1271             *buf++ = ((char *)psz)[3];
1272             *buf++ = ((char *)psz)[2];
1273             *buf++ = ((char *)psz)[1];
1274             *buf++ = ((char *)psz)[0];
1275         }
1276         len += sizeof(wxUint32);
1277         psz++;
1278     }
1279
1280     if (buf && len<=n-sizeof(wxUint32))
1281         *(wxUint32*)buf=0;
1282
1283     return len;
1284 }
1285
1286
1287 #endif // WC_UTF16
1288
1289
1290 // ============================================================================
1291 // The classes doing conversion using the iconv_xxx() functions
1292 // ============================================================================
1293
1294 #ifdef HAVE_ICONV
1295
1296 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1297 //     E2BIG if output buffer is _exactly_ as big as needed. Such case is
1298 //     (unless there's yet another bug in glibc) the only case when iconv()
1299 //     returns with (size_t)-1 (which means error) and says there are 0 bytes
1300 //     left in the input buffer -- when _real_ error occurs,
1301 //     bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1302 //     iconv() failure.
1303 //     [This bug does not appear in glibc 2.2.]
1304 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1305 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1306                                      (errno != E2BIG || bufLeft != 0))
1307 #else
1308 #define ICONV_FAILED(cres, bufLeft)  (cres == (size_t)-1)
1309 #endif
1310
1311 #define ICONV_CHAR_CAST(x)  ((ICONV_CONST char **)(x))
1312
1313 // ----------------------------------------------------------------------------
1314 // wxMBConv_iconv: encapsulates an iconv character set
1315 // ----------------------------------------------------------------------------
1316
1317 class wxMBConv_iconv : public wxMBConv
1318 {
1319 public:
1320     wxMBConv_iconv(const wxChar *name);
1321     virtual ~wxMBConv_iconv();
1322
1323     virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1324     virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
1325
1326     bool IsOk() const
1327         { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
1328
1329 protected:
1330     // the iconv handlers used to translate from multibyte to wide char and in
1331     // the other direction
1332     iconv_t m2w,
1333             w2m;
1334 #if wxUSE_THREADS
1335     // guards access to m2w and w2m objects
1336     wxMutex m_iconvMutex;
1337 #endif
1338
1339 private:
1340     // the name (for iconv_open()) of a wide char charset -- if none is
1341     // available on this machine, it will remain NULL
1342     static const char *ms_wcCharsetName;
1343
1344     // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1345     // different endian-ness than the native one
1346     static bool ms_wcNeedsSwap;
1347 };
1348
1349 // make the constructor available for unit testing
1350 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const wxChar* name )
1351 {
1352     wxMBConv_iconv* result = new wxMBConv_iconv( name );
1353     if ( !result->IsOk() )
1354     {
1355         delete result;
1356         return 0;
1357     }
1358     return result;
1359 }
1360
1361 const char *wxMBConv_iconv::ms_wcCharsetName = NULL;
1362 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
1363
1364 wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
1365 {
1366     // Do it the hard way
1367     char cname[100];
1368     for (size_t i = 0; i < wxStrlen(name)+1; i++)
1369         cname[i] = (char) name[i];
1370
1371     // check for charset that represents wchar_t:
1372     if (ms_wcCharsetName == NULL)
1373     {
1374         ms_wcNeedsSwap = false;
1375
1376         // try charset with explicit bytesex info (e.g. "UCS-4LE"):
1377         ms_wcCharsetName = WC_NAME_BEST;
1378         m2w = iconv_open(ms_wcCharsetName, cname);
1379
1380         if (m2w == (iconv_t)-1)
1381         {
1382             // try charset w/o bytesex info (e.g. "UCS4")
1383             // and check for bytesex ourselves:
1384             ms_wcCharsetName = WC_NAME;
1385             m2w = iconv_open(ms_wcCharsetName, cname);
1386
1387             // last bet, try if it knows WCHAR_T pseudo-charset
1388             if (m2w == (iconv_t)-1)
1389             {
1390                 ms_wcCharsetName = "WCHAR_T";
1391                 m2w = iconv_open(ms_wcCharsetName, cname);
1392             }
1393
1394             if (m2w != (iconv_t)-1)
1395             {
1396                 char    buf[2], *bufPtr;
1397                 wchar_t wbuf[2], *wbufPtr;
1398                 size_t  insz, outsz;
1399                 size_t  res;
1400
1401                 buf[0] = 'A';
1402                 buf[1] = 0;
1403                 wbuf[0] = 0;
1404                 insz = 2;
1405                 outsz = SIZEOF_WCHAR_T * 2;
1406                 wbufPtr = wbuf;
1407                 bufPtr = buf;
1408
1409                 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1410                             (char**)&wbufPtr, &outsz);
1411
1412                 if (ICONV_FAILED(res, insz))
1413                 {
1414                     ms_wcCharsetName = NULL;
1415                     wxLogLastError(wxT("iconv"));
1416                     wxLogError(_("Conversion to charset '%s' doesn't work."), name);
1417                 }
1418                 else
1419                 {
1420                     ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1421                 }
1422             }
1423             else
1424             {
1425                 ms_wcCharsetName = NULL;
1426
1427                 // VS: we must not output an error here, since wxWidgets will safely
1428                 //     fall back to using wxEncodingConverter.
1429                 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
1430                 //wxLogError(
1431             }
1432         }
1433         wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
1434     }
1435     else // we already have ms_wcCharsetName
1436     {
1437         m2w = iconv_open(ms_wcCharsetName, cname);
1438     }
1439
1440     // NB: don't ever pass NULL to iconv_open(), it may crash!
1441     if ( ms_wcCharsetName )
1442     {
1443         w2m = iconv_open( cname, ms_wcCharsetName);
1444     }
1445     else
1446     {
1447         w2m = (iconv_t)-1;
1448     }
1449 }
1450
1451 wxMBConv_iconv::~wxMBConv_iconv()
1452 {
1453     if ( m2w != (iconv_t)-1 )
1454         iconv_close(m2w);
1455     if ( w2m != (iconv_t)-1 )
1456         iconv_close(w2m);
1457 }
1458
1459 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1460 {
1461 #if wxUSE_THREADS
1462     // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1463     //     Unfortunately there is a couple of global wxCSConv objects such as
1464     //     wxConvLocal that are used all over wx code, so we have to make sure
1465     //     the handle is used by at most one thread at the time. Otherwise
1466     //     only a few wx classes would be safe to use from non-main threads
1467     //     as MB<->WC conversion would fail "randomly".
1468     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1469 #endif
1470
1471     size_t inbuf = strlen(psz);
1472     size_t outbuf = n * SIZEOF_WCHAR_T;
1473     size_t res, cres;
1474     // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1475     wchar_t *bufPtr = buf;
1476     const char *pszPtr = psz;
1477
1478     if (buf)
1479     {
1480         // have destination buffer, convert there
1481         cres = iconv(m2w,
1482                      ICONV_CHAR_CAST(&pszPtr), &inbuf,
1483                      (char**)&bufPtr, &outbuf);
1484         res = n - (outbuf / SIZEOF_WCHAR_T);
1485
1486         if (ms_wcNeedsSwap)
1487         {
1488             // convert to native endianness
1489             WC_BSWAP(buf /* _not_ bufPtr */, res)
1490         }
1491
1492         // NB: iconv was given only strlen(psz) characters on input, and so
1493         //     it couldn't convert the trailing zero. Let's do it ourselves
1494         //     if there's some room left for it in the output buffer.
1495         if (res < n)
1496             buf[res] = 0;
1497     }
1498     else
1499     {
1500         // no destination buffer... convert using temp buffer
1501         // to calculate destination buffer requirement
1502         wchar_t tbuf[8];
1503         res = 0;
1504         do {
1505             bufPtr = tbuf;
1506             outbuf = 8*SIZEOF_WCHAR_T;
1507
1508             cres = iconv(m2w,
1509                          ICONV_CHAR_CAST(&pszPtr), &inbuf,
1510                          (char**)&bufPtr, &outbuf );
1511
1512             res += 8-(outbuf/SIZEOF_WCHAR_T);
1513         } while ((cres==(size_t)-1) && (errno==E2BIG));
1514     }
1515
1516     if (ICONV_FAILED(cres, inbuf))
1517     {
1518         //VS: it is ok if iconv fails, hence trace only
1519         wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1520         return (size_t)-1;
1521     }
1522
1523     return res;
1524 }
1525
1526 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1527 {
1528 #if wxUSE_THREADS
1529     // NB: explained in MB2WC
1530     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1531 #endif
1532
1533     size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
1534     size_t outbuf = n;
1535     size_t res, cres;
1536
1537     wchar_t *tmpbuf = 0;
1538
1539     if (ms_wcNeedsSwap)
1540     {
1541         // need to copy to temp buffer to switch endianness
1542         // this absolutely doesn't rock!
1543         // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1544         //  could be in read-only memory, or be accessed in some other thread)
1545         tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
1546         memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
1547         WC_BSWAP(tmpbuf, inbuf)
1548         psz=tmpbuf;
1549     }
1550
1551     if (buf)
1552     {
1553         // have destination buffer, convert there
1554         cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1555
1556         res = n-outbuf;
1557
1558         // NB: iconv was given only wcslen(psz) characters on input, and so
1559         //     it couldn't convert the trailing zero. Let's do it ourselves
1560         //     if there's some room left for it in the output buffer.
1561         if (res < n)
1562             buf[0] = 0;
1563     }
1564     else
1565     {
1566         // no destination buffer... convert using temp buffer
1567         // to calculate destination buffer requirement
1568         char tbuf[16];
1569         res = 0;
1570         do {
1571             buf = tbuf; outbuf = 16;
1572
1573             cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1574
1575             res += 16 - outbuf;
1576         } while ((cres==(size_t)-1) && (errno==E2BIG));
1577     }
1578
1579     if (ms_wcNeedsSwap)
1580     {
1581         free(tmpbuf);
1582     }
1583
1584     if (ICONV_FAILED(cres, inbuf))
1585     {
1586         //VS: it is ok if iconv fails, hence trace only
1587         wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1588         return (size_t)-1;
1589     }
1590
1591     return res;
1592 }
1593
1594 #endif // HAVE_ICONV
1595
1596
1597 // ============================================================================
1598 // Win32 conversion classes
1599 // ============================================================================
1600
1601 #ifdef wxHAVE_WIN32_MB2WC
1602
1603 // from utils.cpp
1604 #if wxUSE_FONTMAP
1605 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1606 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1607 #endif
1608
1609 class wxMBConv_win32 : public wxMBConv
1610 {
1611 public:
1612     wxMBConv_win32()
1613     {
1614         m_CodePage = CP_ACP;
1615     }
1616
1617 #if wxUSE_FONTMAP
1618     wxMBConv_win32(const wxChar* name)
1619     {
1620         m_CodePage = wxCharsetToCodepage(name);
1621     }
1622
1623     wxMBConv_win32(wxFontEncoding encoding)
1624     {
1625         m_CodePage = wxEncodingToCodepage(encoding);
1626     }
1627 #endif
1628
1629     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1630     {
1631         // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1632         // the behaviour is not compatible with the Unix version (using iconv)
1633         // and break the library itself, e.g. wxTextInputStream::NextChar()
1634         // wouldn't work if reading an incomplete MB char didn't result in an
1635         // error
1636         //
1637         // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1638         // an error (tested under Windows Server 2003) and apparently it is
1639         // done on purpose, i.e. the function accepts any input in this case
1640         // and although I'd prefer to return error on ill-formed output, our
1641         // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1642         // explicitly ill-formed according to RFC 2152) neither so we don't
1643         // even have any fallback here...
1644         int flags = m_CodePage == CP_UTF7 ? 0 : MB_ERR_INVALID_CHARS;
1645
1646         const size_t len = ::MultiByteToWideChar
1647                              (
1648                                 m_CodePage,     // code page
1649                                 flags,          // flags: fall on error
1650                                 psz,            // input string
1651                                 -1,             // its length (NUL-terminated)
1652                                 buf,            // output string
1653                                 buf ? n : 0     // size of output buffer
1654                              );
1655
1656         // note that it returns count of written chars for buf != NULL and size
1657         // of the needed buffer for buf == NULL so in either case the length of
1658         // the string (which never includes the terminating NUL) is one less
1659         return len ? len - 1 : (size_t)-1;
1660     }
1661
1662     size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
1663     {
1664         /*
1665             we have a problem here: by default, WideCharToMultiByte() may
1666             replace characters unrepresentable in the target code page with bad
1667             quality approximations such as turning "1/2" symbol (U+00BD) into
1668             "1" for the code pages which don't have it and we, obviously, want
1669             to avoid this at any price
1670
1671             the trouble is that this function does it _silently_, i.e. it won't
1672             even tell us whether it did or not... Win98/2000 and higher provide
1673             WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1674             we have to resort to a round trip, i.e. check that converting back
1675             results in the same string -- this is, of course, expensive but
1676             otherwise we simply can't be sure to not garble the data.
1677          */
1678
1679         // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1680         // it doesn't work with CJK encodings (which we test for rather roughly
1681         // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1682         // supporting it
1683         BOOL usedDef wxDUMMY_INITIALIZE(false);
1684         BOOL *pUsedDef;
1685         int flags;
1686         if ( CanUseNoBestFit() && m_CodePage < 50000 )
1687         {
1688             // it's our lucky day
1689             flags = WC_NO_BEST_FIT_CHARS;
1690             pUsedDef = &usedDef;
1691         }
1692         else // old system or unsupported encoding
1693         {
1694             flags = 0;
1695             pUsedDef = NULL;
1696         }
1697
1698         const size_t len = ::WideCharToMultiByte
1699                              (
1700                                 m_CodePage,     // code page
1701                                 flags,          // either none or no best fit
1702                                 pwz,            // input string
1703                                 -1,             // it is (wide) NUL-terminated
1704                                 buf,            // output buffer
1705                                 buf ? n : 0,    // and its size
1706                                 NULL,           // default "replacement" char
1707                                 pUsedDef        // [out] was it used?
1708                              );
1709
1710         if ( !len )
1711         {
1712             // function totally failed
1713             return (size_t)-1;
1714         }
1715
1716         // if we were really converting, check if we succeeded
1717         if ( buf )
1718         {
1719             if ( flags )
1720             {
1721                 // check if the conversion failed, i.e. if any replacements
1722                 // were done
1723                 if ( usedDef )
1724                     return (size_t)-1;
1725             }
1726             else // we must resort to double tripping...
1727             {
1728                 wxWCharBuffer wcBuf(n);
1729                 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1730                         wcscmp(wcBuf, pwz) != 0 )
1731                 {
1732                     // we didn't obtain the same thing we started from, hence
1733                     // the conversion was lossy and we consider that it failed
1734                     return (size_t)-1;
1735                 }
1736             }
1737         }
1738
1739         // see the comment above for the reason of "len - 1"
1740         return len - 1;
1741     }
1742
1743     bool IsOk() const { return m_CodePage != -1; }
1744
1745 private:
1746     static bool CanUseNoBestFit()
1747     {
1748         static int s_isWin98Or2k = -1;
1749
1750         if ( s_isWin98Or2k == -1 )
1751         {
1752             int verMaj, verMin;
1753             switch ( wxGetOsVersion(&verMaj, &verMin) )
1754             {
1755                 case wxWIN95:
1756                     s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1757                     break;
1758
1759                 case wxWINDOWS_NT:
1760                     s_isWin98Or2k = verMaj >= 5;
1761                     break;
1762
1763                 default:
1764                     // unknown, be conseravtive by default
1765                     s_isWin98Or2k = 0;
1766             }
1767
1768             wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1769         }
1770
1771         return s_isWin98Or2k == 1;
1772     }
1773
1774     long m_CodePage;
1775 };
1776
1777 #endif // wxHAVE_WIN32_MB2WC
1778
1779 // ============================================================================
1780 // Cocoa conversion classes
1781 // ============================================================================
1782
1783 #if defined(__WXCOCOA__)
1784
1785 // RN:  There is no UTF-32 support in either Core Foundation or
1786 // Cocoa.  Strangely enough, internally Core Foundation uses
1787 // UTF 32 internally quite a bit - its just not public (yet).
1788
1789 #include <CoreFoundation/CFString.h>
1790 #include <CoreFoundation/CFStringEncodingExt.h>
1791
1792 CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
1793 {
1794     CFStringEncoding enc = kCFStringEncodingInvalidId ;
1795     if ( encoding == wxFONTENCODING_DEFAULT )
1796     {
1797         enc = CFStringGetSystemEncoding();
1798     }
1799     else switch( encoding)
1800     {
1801         case wxFONTENCODING_ISO8859_1 :
1802             enc = kCFStringEncodingISOLatin1 ;
1803             break ;
1804         case wxFONTENCODING_ISO8859_2 :
1805             enc = kCFStringEncodingISOLatin2;
1806             break ;
1807         case wxFONTENCODING_ISO8859_3 :
1808             enc = kCFStringEncodingISOLatin3 ;
1809             break ;
1810         case wxFONTENCODING_ISO8859_4 :
1811             enc = kCFStringEncodingISOLatin4;
1812             break ;
1813         case wxFONTENCODING_ISO8859_5 :
1814             enc = kCFStringEncodingISOLatinCyrillic;
1815             break ;
1816         case wxFONTENCODING_ISO8859_6 :
1817             enc = kCFStringEncodingISOLatinArabic;
1818             break ;
1819         case wxFONTENCODING_ISO8859_7 :
1820             enc = kCFStringEncodingISOLatinGreek;
1821             break ;
1822         case wxFONTENCODING_ISO8859_8 :
1823             enc = kCFStringEncodingISOLatinHebrew;
1824             break ;
1825         case wxFONTENCODING_ISO8859_9 :
1826             enc = kCFStringEncodingISOLatin5;
1827             break ;
1828         case wxFONTENCODING_ISO8859_10 :
1829             enc = kCFStringEncodingISOLatin6;
1830             break ;
1831         case wxFONTENCODING_ISO8859_11 :
1832             enc = kCFStringEncodingISOLatinThai;
1833             break ;
1834         case wxFONTENCODING_ISO8859_13 :
1835             enc = kCFStringEncodingISOLatin7;
1836             break ;
1837         case wxFONTENCODING_ISO8859_14 :
1838             enc = kCFStringEncodingISOLatin8;
1839             break ;
1840         case wxFONTENCODING_ISO8859_15 :
1841             enc = kCFStringEncodingISOLatin9;
1842             break ;
1843
1844         case wxFONTENCODING_KOI8 :
1845             enc = kCFStringEncodingKOI8_R;
1846             break ;
1847         case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
1848             enc = kCFStringEncodingDOSRussian;
1849             break ;
1850
1851 //      case wxFONTENCODING_BULGARIAN :
1852 //          enc = ;
1853 //          break ;
1854
1855         case wxFONTENCODING_CP437 :
1856             enc =kCFStringEncodingDOSLatinUS ;
1857             break ;
1858         case wxFONTENCODING_CP850 :
1859             enc = kCFStringEncodingDOSLatin1;
1860             break ;
1861         case wxFONTENCODING_CP852 :
1862             enc = kCFStringEncodingDOSLatin2;
1863             break ;
1864         case wxFONTENCODING_CP855 :
1865             enc = kCFStringEncodingDOSCyrillic;
1866             break ;
1867         case wxFONTENCODING_CP866 :
1868             enc =kCFStringEncodingDOSRussian ;
1869             break ;
1870         case wxFONTENCODING_CP874 :
1871             enc = kCFStringEncodingDOSThai;
1872             break ;
1873         case wxFONTENCODING_CP932 :
1874             enc = kCFStringEncodingDOSJapanese;
1875             break ;
1876         case wxFONTENCODING_CP936 :
1877             enc =kCFStringEncodingDOSChineseSimplif ;
1878             break ;
1879         case wxFONTENCODING_CP949 :
1880             enc = kCFStringEncodingDOSKorean;
1881             break ;
1882         case wxFONTENCODING_CP950 :
1883             enc = kCFStringEncodingDOSChineseTrad;
1884             break ;
1885         case wxFONTENCODING_CP1250 :
1886             enc = kCFStringEncodingWindowsLatin2;
1887             break ;
1888         case wxFONTENCODING_CP1251 :
1889             enc =kCFStringEncodingWindowsCyrillic ;
1890             break ;
1891         case wxFONTENCODING_CP1252 :
1892             enc =kCFStringEncodingWindowsLatin1 ;
1893             break ;
1894         case wxFONTENCODING_CP1253 :
1895             enc = kCFStringEncodingWindowsGreek;
1896             break ;
1897         case wxFONTENCODING_CP1254 :
1898             enc = kCFStringEncodingWindowsLatin5;
1899             break ;
1900         case wxFONTENCODING_CP1255 :
1901             enc =kCFStringEncodingWindowsHebrew ;
1902             break ;
1903         case wxFONTENCODING_CP1256 :
1904             enc =kCFStringEncodingWindowsArabic ;
1905             break ;
1906         case wxFONTENCODING_CP1257 :
1907             enc = kCFStringEncodingWindowsBalticRim;
1908             break ;
1909 //   This only really encodes to UTF7 (if that) evidently
1910 //        case wxFONTENCODING_UTF7 :
1911 //            enc = kCFStringEncodingNonLossyASCII ;
1912 //            break ;
1913         case wxFONTENCODING_UTF8 :
1914             enc = kCFStringEncodingUTF8 ;
1915             break ;
1916         case wxFONTENCODING_EUC_JP :
1917             enc = kCFStringEncodingEUC_JP;
1918             break ;
1919         case wxFONTENCODING_UTF16 :
1920             enc = kCFStringEncodingUnicode ;
1921             break ;
1922         case wxFONTENCODING_MACROMAN :
1923             enc = kCFStringEncodingMacRoman ;
1924             break ;
1925         case wxFONTENCODING_MACJAPANESE :
1926             enc = kCFStringEncodingMacJapanese ;
1927             break ;
1928         case wxFONTENCODING_MACCHINESETRAD :
1929             enc = kCFStringEncodingMacChineseTrad ;
1930             break ;
1931         case wxFONTENCODING_MACKOREAN :
1932             enc = kCFStringEncodingMacKorean ;
1933             break ;
1934         case wxFONTENCODING_MACARABIC :
1935             enc = kCFStringEncodingMacArabic ;
1936             break ;
1937         case wxFONTENCODING_MACHEBREW :
1938             enc = kCFStringEncodingMacHebrew ;
1939             break ;
1940         case wxFONTENCODING_MACGREEK :
1941             enc = kCFStringEncodingMacGreek ;
1942             break ;
1943         case wxFONTENCODING_MACCYRILLIC :
1944             enc = kCFStringEncodingMacCyrillic ;
1945             break ;
1946         case wxFONTENCODING_MACDEVANAGARI :
1947             enc = kCFStringEncodingMacDevanagari ;
1948             break ;
1949         case wxFONTENCODING_MACGURMUKHI :
1950             enc = kCFStringEncodingMacGurmukhi ;
1951             break ;
1952         case wxFONTENCODING_MACGUJARATI :
1953             enc = kCFStringEncodingMacGujarati ;
1954             break ;
1955         case wxFONTENCODING_MACORIYA :
1956             enc = kCFStringEncodingMacOriya ;
1957             break ;
1958         case wxFONTENCODING_MACBENGALI :
1959             enc = kCFStringEncodingMacBengali ;
1960             break ;
1961         case wxFONTENCODING_MACTAMIL :
1962             enc = kCFStringEncodingMacTamil ;
1963             break ;
1964         case wxFONTENCODING_MACTELUGU :
1965             enc = kCFStringEncodingMacTelugu ;
1966             break ;
1967         case wxFONTENCODING_MACKANNADA :
1968             enc = kCFStringEncodingMacKannada ;
1969             break ;
1970         case wxFONTENCODING_MACMALAJALAM :
1971             enc = kCFStringEncodingMacMalayalam ;
1972             break ;
1973         case wxFONTENCODING_MACSINHALESE :
1974             enc = kCFStringEncodingMacSinhalese ;
1975             break ;
1976         case wxFONTENCODING_MACBURMESE :
1977             enc = kCFStringEncodingMacBurmese ;
1978             break ;
1979         case wxFONTENCODING_MACKHMER :
1980             enc = kCFStringEncodingMacKhmer ;
1981             break ;
1982         case wxFONTENCODING_MACTHAI :
1983             enc = kCFStringEncodingMacThai ;
1984             break ;
1985         case wxFONTENCODING_MACLAOTIAN :
1986             enc = kCFStringEncodingMacLaotian ;
1987             break ;
1988         case wxFONTENCODING_MACGEORGIAN :
1989             enc = kCFStringEncodingMacGeorgian ;
1990             break ;
1991         case wxFONTENCODING_MACARMENIAN :
1992             enc = kCFStringEncodingMacArmenian ;
1993             break ;
1994         case wxFONTENCODING_MACCHINESESIMP :
1995             enc = kCFStringEncodingMacChineseSimp ;
1996             break ;
1997         case wxFONTENCODING_MACTIBETAN :
1998             enc = kCFStringEncodingMacTibetan ;
1999             break ;
2000         case wxFONTENCODING_MACMONGOLIAN :
2001             enc = kCFStringEncodingMacMongolian ;
2002             break ;
2003         case wxFONTENCODING_MACETHIOPIC :
2004             enc = kCFStringEncodingMacEthiopic ;
2005             break ;
2006         case wxFONTENCODING_MACCENTRALEUR :
2007             enc = kCFStringEncodingMacCentralEurRoman ;
2008             break ;
2009         case wxFONTENCODING_MACVIATNAMESE :
2010             enc = kCFStringEncodingMacVietnamese ;
2011             break ;
2012         case wxFONTENCODING_MACARABICEXT :
2013             enc = kCFStringEncodingMacExtArabic ;
2014             break ;
2015         case wxFONTENCODING_MACSYMBOL :
2016             enc = kCFStringEncodingMacSymbol ;
2017             break ;
2018         case wxFONTENCODING_MACDINGBATS :
2019             enc = kCFStringEncodingMacDingbats ;
2020             break ;
2021         case wxFONTENCODING_MACTURKISH :
2022             enc = kCFStringEncodingMacTurkish ;
2023             break ;
2024         case wxFONTENCODING_MACCROATIAN :
2025             enc = kCFStringEncodingMacCroatian ;
2026             break ;
2027         case wxFONTENCODING_MACICELANDIC :
2028             enc = kCFStringEncodingMacIcelandic ;
2029             break ;
2030         case wxFONTENCODING_MACROMANIAN :
2031             enc = kCFStringEncodingMacRomanian ;
2032             break ;
2033         case wxFONTENCODING_MACCELTIC :
2034             enc = kCFStringEncodingMacCeltic ;
2035             break ;
2036         case wxFONTENCODING_MACGAELIC :
2037             enc = kCFStringEncodingMacGaelic ;
2038             break ;
2039 //      case wxFONTENCODING_MACKEYBOARD :
2040 //          enc = kCFStringEncodingMacKeyboardGlyphs ;
2041 //          break ;
2042         default :
2043             // because gcc is picky
2044             break ;
2045     } ;
2046     return enc ;
2047 }
2048
2049 class wxMBConv_cocoa : public wxMBConv
2050 {
2051 public:
2052     wxMBConv_cocoa()
2053     {
2054         Init(CFStringGetSystemEncoding()) ;
2055     }
2056
2057 #if wxUSE_FONTMAP
2058     wxMBConv_cocoa(const wxChar* name)
2059     {
2060         Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2061     }
2062 #endif
2063
2064     wxMBConv_cocoa(wxFontEncoding encoding)
2065     {
2066         Init( wxCFStringEncFromFontEnc(encoding) );
2067     }
2068
2069     ~wxMBConv_cocoa()
2070     {
2071     }
2072
2073     void Init( CFStringEncoding encoding)
2074     {
2075         m_encoding = encoding ;
2076     }
2077
2078     size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2079     {
2080         wxASSERT(szUnConv);
2081
2082         CFStringRef theString = CFStringCreateWithBytes (
2083                                                 NULL, //the allocator
2084                                                 (const UInt8*)szUnConv,
2085                                                 strlen(szUnConv),
2086                                                 m_encoding,
2087                                                 false //no BOM/external representation
2088                                                 );
2089
2090         wxASSERT(theString);
2091
2092         size_t nOutLength = CFStringGetLength(theString);
2093
2094         if (szOut == NULL)
2095         {
2096             CFRelease(theString);
2097             return nOutLength;
2098         }
2099
2100         CFRange theRange = { 0, nOutSize };
2101
2102 #if SIZEOF_WCHAR_T == 4
2103         UniChar* szUniCharBuffer = new UniChar[nOutSize];
2104 #endif
2105
2106         CFStringGetCharacters(theString, theRange, szUniCharBuffer);
2107
2108         CFRelease(theString);
2109
2110         szUniCharBuffer[nOutLength] = '\0' ;
2111
2112 #if SIZEOF_WCHAR_T == 4
2113         wxMBConvUTF16 converter ;
2114         converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
2115         delete[] szUniCharBuffer;
2116 #endif
2117
2118         return nOutLength;
2119     }
2120
2121     size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2122     {
2123         wxASSERT(szUnConv);
2124
2125         size_t nRealOutSize;
2126         size_t nBufSize = wxWcslen(szUnConv);
2127         UniChar* szUniBuffer = (UniChar*) szUnConv;
2128
2129 #if SIZEOF_WCHAR_T == 4
2130         wxMBConvUTF16 converter ;
2131         nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
2132         szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
2133         converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
2134         nBufSize /= sizeof(UniChar);
2135 #endif
2136
2137         CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2138                                 NULL, //allocator
2139                                 szUniBuffer,
2140                                 nBufSize,
2141                                 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
2142                             );
2143
2144         wxASSERT(theString);
2145
2146         //Note that CER puts a BOM when converting to unicode
2147         //so we  check and use getchars instead in that case
2148         if (m_encoding == kCFStringEncodingUnicode)
2149         {
2150             if (szOut != NULL)
2151                 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
2152
2153             nRealOutSize = CFStringGetLength(theString) + 1;
2154         }
2155         else
2156         {
2157             CFStringGetBytes(
2158                 theString,
2159                 CFRangeMake(0, CFStringGetLength(theString)),
2160                 m_encoding,
2161                 0, //what to put in characters that can't be converted -
2162                     //0 tells CFString to return NULL if it meets such a character
2163                 false, //not an external representation
2164                 (UInt8*) szOut,
2165                 nOutSize,
2166                 (CFIndex*) &nRealOutSize
2167                         );
2168         }
2169
2170         CFRelease(theString);
2171
2172 #if SIZEOF_WCHAR_T == 4
2173         delete[] szUniBuffer;
2174 #endif
2175
2176         return  nRealOutSize - 1;
2177     }
2178
2179     bool IsOk() const
2180     {
2181         return m_encoding != kCFStringEncodingInvalidId &&
2182               CFStringIsEncodingAvailable(m_encoding);
2183     }
2184
2185 private:
2186     CFStringEncoding m_encoding ;
2187 };
2188
2189 #endif // defined(__WXCOCOA__)
2190
2191 // ============================================================================
2192 // Mac conversion classes
2193 // ============================================================================
2194
2195 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2196
2197 class wxMBConv_mac : public wxMBConv
2198 {
2199 public:
2200     wxMBConv_mac()
2201     {
2202         Init(CFStringGetSystemEncoding()) ;
2203     }
2204
2205 #if wxUSE_FONTMAP
2206     wxMBConv_mac(const wxChar* name)
2207     {
2208         Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2209     }
2210 #endif
2211
2212     wxMBConv_mac(wxFontEncoding encoding)
2213     {
2214         Init( wxMacGetSystemEncFromFontEnc(encoding) );
2215     }
2216
2217     ~wxMBConv_mac()
2218     {
2219         OSStatus status = noErr ;
2220         status = TECDisposeConverter(m_MB2WC_converter);
2221         status = TECDisposeConverter(m_WC2MB_converter);
2222     }
2223
2224
2225     void Init( TextEncodingBase encoding)
2226     {
2227         OSStatus status = noErr ;
2228         m_char_encoding = encoding ;
2229         m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2230
2231         status = TECCreateConverter(&m_MB2WC_converter,
2232                                     m_char_encoding,
2233                                     m_unicode_encoding);
2234         status = TECCreateConverter(&m_WC2MB_converter,
2235                                     m_unicode_encoding,
2236                                     m_char_encoding);
2237     }
2238
2239     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2240     {
2241         OSStatus status = noErr ;
2242         ByteCount byteOutLen ;
2243         ByteCount byteInLen = strlen(psz) ;
2244         wchar_t *tbuf = NULL ;
2245         UniChar* ubuf = NULL ;
2246         size_t res = 0 ;
2247
2248         if (buf == NULL)
2249         {
2250             //apple specs say at least 32
2251             n = wxMax( 32 , byteInLen ) ;
2252             tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2253         }
2254         ByteCount byteBufferLen = n * sizeof( UniChar ) ;
2255 #if SIZEOF_WCHAR_T == 4
2256         ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
2257 #else
2258         ubuf = (UniChar*) (buf ? buf : tbuf) ;
2259 #endif
2260         status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2261           (TextPtr) ubuf , byteBufferLen, &byteOutLen);
2262 #if SIZEOF_WCHAR_T == 4
2263         // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2264         // is not properly terminated we get random characters at the end
2265         ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
2266         wxMBConvUTF16 converter ;
2267         res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2268         free( ubuf ) ;
2269 #else
2270         res = byteOutLen / sizeof( UniChar ) ;
2271 #endif
2272         if ( buf == NULL )
2273              free(tbuf) ;
2274
2275         if ( buf  && res < n)
2276             buf[res] = 0;
2277
2278         return res ;
2279     }
2280
2281     size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2282     {
2283         OSStatus status = noErr ;
2284         ByteCount byteOutLen ;
2285         ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2286
2287         char *tbuf = NULL ;
2288
2289         if (buf == NULL)
2290         {
2291             //apple specs say at least 32
2292             n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2293             tbuf = (char*) malloc( n ) ;
2294         }
2295
2296         ByteCount byteBufferLen = n ;
2297         UniChar* ubuf = NULL ;
2298 #if SIZEOF_WCHAR_T == 4
2299         wxMBConvUTF16 converter ;
2300         size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2301         byteInLen = unicharlen ;
2302         ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2303         converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
2304 #else
2305         ubuf = (UniChar*) psz ;
2306 #endif
2307         status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2308             (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
2309 #if SIZEOF_WCHAR_T == 4
2310         free( ubuf ) ;
2311 #endif
2312         if ( buf == NULL )
2313             free(tbuf) ;
2314
2315         size_t res = byteOutLen ;
2316         if ( buf  && res < n)
2317         {
2318             buf[res] = 0;
2319
2320             //we need to double-trip to verify it didn't insert any ? in place
2321             //of bogus characters
2322             wxWCharBuffer wcBuf(n);
2323             size_t pszlen = wxWcslen(psz);
2324             if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
2325                         wxWcslen(wcBuf) != pszlen ||
2326                         memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2327             {
2328                 // we didn't obtain the same thing we started from, hence
2329                 // the conversion was lossy and we consider that it failed
2330                 return (size_t)-1;
2331             }
2332         }
2333
2334         return res ;
2335     }
2336
2337     bool IsOk() const
2338         { return m_MB2WC_converter !=  NULL && m_WC2MB_converter != NULL  ; }
2339
2340 private:
2341     TECObjectRef m_MB2WC_converter ;
2342     TECObjectRef m_WC2MB_converter ;
2343
2344     TextEncodingBase m_char_encoding ;
2345     TextEncodingBase m_unicode_encoding ;
2346 };
2347
2348 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2349
2350 // ============================================================================
2351 // wxEncodingConverter based conversion classes
2352 // ============================================================================
2353
2354 #if wxUSE_FONTMAP
2355
2356 class wxMBConv_wxwin : public wxMBConv
2357 {
2358 private:
2359     void Init()
2360     {
2361         m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2362                w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2363     }
2364
2365 public:
2366     // temporarily just use wxEncodingConverter stuff,
2367     // so that it works while a better implementation is built
2368     wxMBConv_wxwin(const wxChar* name)
2369     {
2370         if (name)
2371             m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2372         else
2373             m_enc = wxFONTENCODING_SYSTEM;
2374
2375         Init();
2376     }
2377
2378     wxMBConv_wxwin(wxFontEncoding enc)
2379     {
2380         m_enc = enc;
2381
2382         Init();
2383     }
2384
2385     size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
2386     {
2387         size_t inbuf = strlen(psz);
2388         if (buf)
2389         {
2390             if (!m2w.Convert(psz,buf))
2391                 return (size_t)-1;
2392         }
2393         return inbuf;
2394     }
2395
2396     size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
2397     {
2398         const size_t inbuf = wxWcslen(psz);
2399         if (buf)
2400         {
2401             if (!w2m.Convert(psz,buf))
2402                 return (size_t)-1;
2403         }
2404
2405         return inbuf;
2406     }
2407
2408     bool IsOk() const { return m_ok; }
2409
2410 public:
2411     wxFontEncoding m_enc;
2412     wxEncodingConverter m2w, w2m;
2413
2414     // were we initialized successfully?
2415     bool m_ok;
2416
2417     DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
2418 };
2419
2420 // make the constructors available for unit testing
2421 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_wxwin( const wxChar* name )
2422 {
2423     wxMBConv_wxwin* result = new wxMBConv_wxwin( name );
2424     if ( !result->IsOk() )
2425     {
2426         delete result;
2427         return 0;
2428     }
2429     return result;
2430 }
2431
2432 #endif // wxUSE_FONTMAP
2433
2434 // ============================================================================
2435 // wxCSConv implementation
2436 // ============================================================================
2437
2438 void wxCSConv::Init()
2439 {
2440     m_name = NULL;
2441     m_convReal =  NULL;
2442     m_deferred = true;
2443 }
2444
2445 wxCSConv::wxCSConv(const wxChar *charset)
2446 {
2447     Init();
2448
2449     if ( charset )
2450     {
2451         SetName(charset);
2452     }
2453
2454     m_encoding = wxFONTENCODING_SYSTEM;
2455 }
2456
2457 wxCSConv::wxCSConv(wxFontEncoding encoding)
2458 {
2459     if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
2460     {
2461         wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2462
2463         encoding = wxFONTENCODING_SYSTEM;
2464     }
2465
2466     Init();
2467
2468     m_encoding = encoding;
2469 }
2470
2471 wxCSConv::~wxCSConv()
2472 {
2473     Clear();
2474 }
2475
2476 wxCSConv::wxCSConv(const wxCSConv& conv)
2477         : wxMBConv()
2478 {
2479     Init();
2480
2481     SetName(conv.m_name);
2482     m_encoding = conv.m_encoding;
2483 }
2484
2485 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2486 {
2487     Clear();
2488
2489     SetName(conv.m_name);
2490     m_encoding = conv.m_encoding;
2491
2492     return *this;
2493 }
2494
2495 void wxCSConv::Clear()
2496 {
2497     free(m_name);
2498     delete m_convReal;
2499
2500     m_name = NULL;
2501     m_convReal = NULL;
2502 }
2503
2504 void wxCSConv::SetName(const wxChar *charset)
2505 {
2506     if (charset)
2507     {
2508         m_name = wxStrdup(charset);
2509         m_deferred = true;
2510     }
2511 }
2512
2513 wxMBConv *wxCSConv::DoCreate() const
2514 {
2515     // check for the special case of ASCII or ISO8859-1 charset: as we have
2516     // special knowledge of it anyhow, we don't need to create a special
2517     // conversion object
2518     if ( m_encoding == wxFONTENCODING_ISO8859_1 )
2519     {
2520         // don't convert at all
2521         return NULL;
2522     }
2523
2524     // we trust OS to do conversion better than we can so try external
2525     // conversion methods first
2526     //
2527     // the full order is:
2528     //      1. OS conversion (iconv() under Unix or Win32 API)
2529     //      2. hard coded conversions for UTF
2530     //      3. wxEncodingConverter as fall back
2531
2532     // step (1)
2533 #ifdef HAVE_ICONV
2534 #if !wxUSE_FONTMAP
2535     if ( m_name )
2536 #endif // !wxUSE_FONTMAP
2537     {
2538         wxString name(m_name);
2539
2540 #if wxUSE_FONTMAP
2541         if ( name.empty() )
2542             name = wxFontMapperBase::GetEncodingName(m_encoding);
2543 #endif // wxUSE_FONTMAP
2544
2545         wxMBConv_iconv *conv = new wxMBConv_iconv(name);
2546         if ( conv->IsOk() )
2547             return conv;
2548
2549         delete conv;
2550     }
2551 #endif // HAVE_ICONV
2552
2553 #ifdef wxHAVE_WIN32_MB2WC
2554     {
2555 #if wxUSE_FONTMAP
2556         wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2557                                       : new wxMBConv_win32(m_encoding);
2558         if ( conv->IsOk() )
2559             return conv;
2560
2561         delete conv;
2562 #else
2563         return NULL;
2564 #endif
2565     }
2566 #endif // wxHAVE_WIN32_MB2WC
2567 #if defined(__WXMAC__)
2568     {
2569         // leave UTF16 and UTF32 to the built-ins of wx
2570         if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
2571             ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
2572         {
2573
2574 #if wxUSE_FONTMAP
2575             wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2576                                         : new wxMBConv_mac(m_encoding);
2577 #else
2578             wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
2579 #endif
2580             if ( conv->IsOk() )
2581                  return conv;
2582
2583             delete conv;
2584         }
2585     }
2586 #endif
2587 #if defined(__WXCOCOA__)
2588     {
2589         if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2590         {
2591
2592 #if wxUSE_FONTMAP
2593             wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2594                                           : new wxMBConv_cocoa(m_encoding);
2595 #else
2596             wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
2597 #endif
2598             if ( conv->IsOk() )
2599                  return conv;
2600
2601             delete conv;
2602         }
2603     }
2604 #endif
2605     // step (2)
2606     wxFontEncoding enc = m_encoding;
2607 #if wxUSE_FONTMAP
2608     if ( enc == wxFONTENCODING_SYSTEM && m_name )
2609     {
2610         // use "false" to suppress interactive dialogs -- we can be called from
2611         // anywhere and popping up a dialog from here is the last thing we want to
2612         // do
2613         enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
2614     }
2615 #endif // wxUSE_FONTMAP
2616
2617     switch ( enc )
2618     {
2619         case wxFONTENCODING_UTF7:
2620              return new wxMBConvUTF7;
2621
2622         case wxFONTENCODING_UTF8:
2623              return new wxMBConvUTF8;
2624
2625         case wxFONTENCODING_UTF16BE:
2626              return new wxMBConvUTF16BE;
2627
2628         case wxFONTENCODING_UTF16LE:
2629              return new wxMBConvUTF16LE;
2630
2631         case wxFONTENCODING_UTF32BE:
2632              return new wxMBConvUTF32BE;
2633
2634         case wxFONTENCODING_UTF32LE:
2635              return new wxMBConvUTF32LE;
2636
2637         default:
2638              // nothing to do but put here to suppress gcc warnings
2639              ;
2640     }
2641
2642     // step (3)
2643 #if wxUSE_FONTMAP
2644     {
2645         wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2646                                       : new wxMBConv_wxwin(m_encoding);
2647         if ( conv->IsOk() )
2648             return conv;
2649
2650         delete conv;
2651     }
2652 #endif // wxUSE_FONTMAP
2653
2654     // NB: This is a hack to prevent deadlock. What could otherwise happen
2655     //     in Unicode build: wxConvLocal creation ends up being here
2656     //     because of some failure and logs the error. But wxLog will try to
2657     //     attach timestamp, for which it will need wxConvLocal (to convert
2658     //     time to char* and then wchar_t*), but that fails, tries to log
2659     //     error, but wxLog has a (already locked) critical section that
2660     //     guards static buffer.
2661     static bool alreadyLoggingError = false;
2662     if (!alreadyLoggingError)
2663     {
2664         alreadyLoggingError = true;
2665         wxLogError(_("Cannot convert from the charset '%s'!"),
2666                    m_name ? m_name
2667                       :
2668 #if wxUSE_FONTMAP
2669                          wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
2670 #else // !wxUSE_FONTMAP
2671                          wxString::Format(_("encoding %s"), m_encoding).c_str()
2672 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2673               );
2674         alreadyLoggingError = false;
2675     }
2676
2677     return NULL;
2678 }
2679
2680 void wxCSConv::CreateConvIfNeeded() const
2681 {
2682     if ( m_deferred )
2683     {
2684         wxCSConv *self = (wxCSConv *)this; // const_cast
2685
2686 #if wxUSE_INTL
2687         // if we don't have neither the name nor the encoding, use the default
2688         // encoding for this system
2689         if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2690         {
2691             self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
2692         }
2693 #endif // wxUSE_INTL
2694
2695         self->m_convReal = DoCreate();
2696         self->m_deferred = false;
2697     }
2698 }
2699
2700 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2701 {
2702     CreateConvIfNeeded();
2703
2704     if (m_convReal)
2705         return m_convReal->MB2WC(buf, psz, n);
2706
2707     // latin-1 (direct)
2708     size_t len = strlen(psz);
2709
2710     if (buf)
2711     {
2712         for (size_t c = 0; c <= len; c++)
2713             buf[c] = (unsigned char)(psz[c]);
2714     }
2715
2716     return len;
2717 }
2718
2719 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2720 {
2721     CreateConvIfNeeded();
2722
2723     if (m_convReal)
2724         return m_convReal->WC2MB(buf, psz, n);
2725
2726     // latin-1 (direct)
2727     const size_t len = wxWcslen(psz);
2728     if (buf)
2729     {
2730         for (size_t c = 0; c <= len; c++)
2731         {
2732             if (psz[c] > 0xFF)
2733                 return (size_t)-1;
2734             buf[c] = (char)psz[c];
2735         }
2736     }
2737     else
2738     {
2739         for (size_t c = 0; c <= len; c++)
2740         {
2741             if (psz[c] > 0xFF)
2742                 return (size_t)-1;
2743         }
2744     }
2745
2746     return len;
2747 }
2748
2749 // ----------------------------------------------------------------------------
2750 // globals
2751 // ----------------------------------------------------------------------------
2752
2753 #ifdef __WINDOWS__
2754     static wxMBConv_win32 wxConvLibcObj;
2755 #elif defined(__WXMAC__) && !defined(__MACH__)
2756     static wxMBConv_mac wxConvLibcObj ;
2757 #else
2758     static wxMBConvLibc wxConvLibcObj;
2759 #endif
2760
2761 static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
2762 static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
2763 static wxMBConvUTF7 wxConvUTF7Obj;
2764 static wxMBConvUTF8 wxConvUTF8Obj;
2765
2766 WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
2767 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
2768 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
2769 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
2770 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
2771 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
2772 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
2773 #ifdef __WXOSX__
2774                                     wxConvUTF8Obj;
2775 #else
2776                                     wxConvLibcObj;
2777 #endif
2778
2779
2780 #else // !wxUSE_WCHAR_T
2781
2782 // stand-ins in absence of wchar_t
2783 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
2784                                 wxConvISO8859_1,
2785                                 wxConvLocal,
2786                                 wxConvUTF8;
2787
2788 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
2789
2790