src/common/strconv.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        strconv.cpp
   3 // Purpose:     Unicode conversion classes
   4 // Author:      Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
   5 //              Ryan Norton, Fredrik Roubert (UTF7)
   6 // Modified by:
   7 // Created:     29/01/98
   8 // RCS-ID:      $Id$
   9 // Copyright:   (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
  10 //              (c) 2000-2003 Vadim Zeitlin
  11 //              (c) 2004 Ryan Norton, Fredrik Roubert
  12 // Licence:     wxWindows licence
  13 /////////////////////////////////////////////////////////////////////////////
  14
  15 // ============================================================================
  16 // declarations
  17 // ============================================================================
  18
  19 // ----------------------------------------------------------------------------
  20 // headers
  21 // ----------------------------------------------------------------------------
  22
  23 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
  24   #pragma implementation "strconv.h"
  25 #endif
  26
  27 // For compilers that support precompilation, includes "wx.h".
  28 #include "wx/wxprec.h"
  29
  30 #ifdef __BORLANDC__
  31   #pragma hdrstop
  32 #endif
  33
  34 #ifndef WX_PRECOMP
  35     #include "wx/intl.h"
  36     #include "wx/log.h"
  37 #endif // WX_PRECOMP
  38
  39 #include "wx/strconv.h"
  40
  41 #if wxUSE_WCHAR_T
  42
  43 #ifdef __WINDOWS__
  44     #include "wx/msw/private.h"
  45     #include "wx/msw/missing.h"
  46 #endif
  47
  48 #ifndef __WXWINCE__
  49 #include <errno.h>
  50 #endif
  51
  52 #include <ctype.h>
  53 #include <string.h>
  54 #include <stdlib.h>
  55
  56 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
  57     #define wxHAVE_WIN32_MB2WC
  58 #endif // __WIN32__ but !__WXMICROWIN__
  59
  60 // ----------------------------------------------------------------------------
  61 // headers
  62 // ----------------------------------------------------------------------------
  63
  64 #ifdef __SALFORDC__
  65     #include <clib.h>
  66 #endif
  67
  68 #ifdef HAVE_ICONV
  69     #include <iconv.h>
  70     #include "wx/thread.h"
  71 #endif
  72
  73 #include "wx/encconv.h"
  74 #include "wx/fontmap.h"
  75 #include "wx/utils.h"
  76
  77 #ifdef __WXMAC__
  78 #ifndef __DARWIN__
  79 #include <ATSUnicode.h>
  80 #include <TextCommon.h>
  81 #include <TextEncodingConverter.h>
  82 #endif
  83
  84 #include  "wx/mac/private.h"  // includes mac headers
  85 #endif
  86 // ----------------------------------------------------------------------------
  87 // macros
  88 // ----------------------------------------------------------------------------
  89
  90 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
  91 #define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
  92
  93 #if SIZEOF_WCHAR_T == 4
  94     #define WC_NAME         "UCS4"
  95     #define WC_BSWAP         BSWAP_UCS4
  96     #ifdef WORDS_BIGENDIAN
  97       #define WC_NAME_BEST  "UCS-4BE"
  98     #else
  99       #define WC_NAME_BEST  "UCS-4LE"
 100     #endif
 101 #elif SIZEOF_WCHAR_T == 2
 102     #define WC_NAME         "UTF16"
 103     #define WC_BSWAP         BSWAP_UTF16
 104     #define WC_UTF16
 105     #ifdef WORDS_BIGENDIAN
 106       #define WC_NAME_BEST  "UTF-16BE"
 107     #else
 108       #define WC_NAME_BEST  "UTF-16LE"
 109     #endif
 110 #else // sizeof(wchar_t) != 2 nor 4
 111     // does this ever happen?
 112     #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
 113 #endif
 114
 115 // ============================================================================
 116 // implementation
 117 // ============================================================================
 118
 119 // ----------------------------------------------------------------------------
 120 // UTF-16 en/decoding to/from UCS-4
 121 // ----------------------------------------------------------------------------
 122
 123
 124 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
 125 {
 126     if (input<=0xffff)
 127     {
 128         if (output)
 129             *output = (wxUint16) input;
 130         return 1;
 131     }
 132     else if (input>=0x110000)
 133     {
 134         return (size_t)-1;
 135     }
 136     else
 137     {
 138         if (output)
 139         {
 140             *output++ = (wxUint16) ((input >> 10)+0xd7c0);
 141             *output = (wxUint16) ((input&0x3ff)+0xdc00);
 142         }
 143         return 2;
 144     }
 145 }
 146
 147 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
 148 {
 149     if ((*input<0xd800) || (*input>0xdfff))
 150     {
 151         output = *input;
 152         return 1;
 153     }
 154     else if ((input[1]<0xdc00) || (input[1]>0xdfff))
 155     {
 156         output = *input;
 157         return (size_t)-1;
 158     }
 159     else
 160     {
 161         output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
 162         return 2;
 163     }
 164 }
 165
 166
 167 // ----------------------------------------------------------------------------
 168 // wxMBConv
 169 // ----------------------------------------------------------------------------
 170
 171 wxMBConv::~wxMBConv()
 172 {
 173     // nothing to do here (necessary for Darwin linking probably)
 174 }
 175
 176 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
 177 {
 178     if ( psz )
 179     {
 180         // calculate the length of the buffer needed first
 181         size_t nLen = MB2WC(NULL, psz, 0);
 182         if ( nLen != (size_t)-1 )
 183         {
 184             // now do the actual conversion
 185             wxWCharBuffer buf(nLen);
 186             nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
 187             if ( nLen != (size_t)-1 )
 188             {
 189                 return buf;
 190             }
 191         }
 192     }
 193
 194     wxWCharBuffer buf((wchar_t *)NULL);
 195
 196     return buf;
 197 }
 198
 199 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
 200 {
 201     if ( pwz )
 202     {
 203         size_t nLen = WC2MB(NULL, pwz, 0);
 204         if ( nLen != (size_t)-1 )
 205         {
 206             wxCharBuffer buf(nLen+3);       // space for a wxUint32 trailing zero
 207             nLen = WC2MB(buf.data(), pwz, nLen + 4);
 208             if ( nLen != (size_t)-1 )
 209             {
 210                 return buf;
 211             }
 212         }
 213     }
 214
 215     wxCharBuffer buf((char *)NULL);
 216
 217     return buf;
 218 }
 219
 220 const wxWCharBuffer wxMBConv::cMB2WC(const char *szString, size_t nStringLen, size_t* pOutSize) const
 221 {
 222     wxASSERT(pOutSize != NULL);
 223
 224     const char* szEnd = szString + nStringLen + 1;
 225     const char* szPos = szString;
 226     const char* szStart = szPos;
 227
 228     size_t nActualLength = 0;
 229     size_t nCurrentSize = nStringLen; //try normal size first (should never resize?)
 230
 231     wxWCharBuffer theBuffer(nCurrentSize);
 232
 233     //Convert the string until the length() is reached, continuing the
 234     //loop every time a null character is reached
 235     while(szPos != szEnd)
 236     {
 237         wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
 238
 239         //Get the length of the current (sub)string
 240         size_t nLen = MB2WC(NULL, szPos, 0);
 241
 242         //Invalid conversion?
 243         if( nLen == (size_t)-1 )
 244         {
 245             *pOutSize = 0;
 246             theBuffer.data()[0u] = wxT('\0');
 247             return theBuffer;
 248         }
 249
 250
 251         //Increase the actual length (+1 for current null character)
 252         nActualLength += nLen + 1;
 253
 254         //if buffer too big, realloc the buffer
 255         if (nActualLength > (nCurrentSize+1))
 256         {
 257             wxWCharBuffer theNewBuffer(nCurrentSize << 1);
 258             memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize * sizeof(wchar_t));
 259             theBuffer = theNewBuffer;
 260             nCurrentSize <<= 1;
 261         }
 262
 263         //Convert the current (sub)string
 264         if ( MB2WC(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
 265         {
 266             *pOutSize = 0;
 267             theBuffer.data()[0u] = wxT('\0');
 268             return theBuffer;
 269         }
 270
 271         //Increment to next (sub)string
 272         //Note that we have to use strlen instead of nLen here
 273         //because XX2XX gives us the size of the output buffer,
 274         //which is not necessarily the length of the string
 275         szPos += strlen(szPos) + 1;
 276     }
 277
 278     //success - return actual length and the buffer
 279     *pOutSize = nActualLength;
 280     return theBuffer;
 281 }
 282
 283 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *szString, size_t nStringLen, size_t* pOutSize) const
 284 {
 285     wxASSERT(pOutSize != NULL);
 286
 287     const wchar_t* szEnd = szString + nStringLen + 1;
 288     const wchar_t* szPos = szString;
 289     const wchar_t* szStart = szPos;
 290
 291     size_t nActualLength = 0;
 292     size_t nCurrentSize = nStringLen << 2; //try * 4 first
 293
 294     wxCharBuffer theBuffer(nCurrentSize);
 295
 296     //Convert the string until the length() is reached, continuing the
 297     //loop every time a null character is reached
 298     while(szPos != szEnd)
 299     {
 300         wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
 301
 302         //Get the length of the current (sub)string
 303         size_t nLen = WC2MB(NULL, szPos, 0);
 304
 305         //Invalid conversion?
 306         if( nLen == (size_t)-1 )
 307         {
 308             *pOutSize = 0;
 309             theBuffer.data()[0u] = wxT('\0');
 310             return theBuffer;
 311         }
 312
 313         //Increase the actual length (+1 for current null character)
 314         nActualLength += nLen + 1;
 315
 316         //if buffer too big, realloc the buffer
 317         if (nActualLength > (nCurrentSize+1))
 318         {
 319             wxCharBuffer theNewBuffer(nCurrentSize << 1);
 320             memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize);
 321             theBuffer = theNewBuffer;
 322             nCurrentSize <<= 1;
 323         }
 324
 325         //Convert the current (sub)string
 326         if(WC2MB(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
 327         {
 328             *pOutSize = 0;
 329             theBuffer.data()[0u] = wxT('\0');
 330             return theBuffer;
 331         }
 332
 333         //Increment to next (sub)string
 334         //Note that we have to use wxWcslen instead of nLen here
 335         //because XX2XX gives us the size of the output buffer,
 336         //which is not necessarily the length of the string
 337         szPos += wxWcslen(szPos) + 1;
 338     }
 339
 340     //success - return actual length and the buffer
 341     *pOutSize = nActualLength;
 342     return theBuffer;
 343 }
 344
 345 // ----------------------------------------------------------------------------
 346 // wxMBConvLibc
 347 // ----------------------------------------------------------------------------
 348
 349 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 350 {
 351     return wxMB2WC(buf, psz, n);
 352 }
 353
 354 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 355 {
 356     return wxWC2MB(buf, psz, n);
 357 }
 358
 359 #ifdef __UNIX__
 360
 361 // ----------------------------------------------------------------------------
 362 // wxConvBrokenFileNames
 363 // ----------------------------------------------------------------------------
 364
 365 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar *charset)
 366 {
 367     if ( !charset || wxStricmp(charset, _T("UTF-8")) == 0
 368                   || wxStricmp(charset, _T("UTF8")) == 0  )
 369         m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
 370     else
 371         m_conv = new wxCSConv(charset);
 372 }
 373
 374 size_t
 375 wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf,
 376                              const char *psz,
 377                              size_t outputSize) const
 378 {
 379     return m_conv->MB2WC( outputBuf, psz, outputSize );
 380 }
 381
 382 size_t
 383 wxConvBrokenFileNames::WC2MB(char *outputBuf,
 384                              const wchar_t *psz,
 385                              size_t outputSize) const
 386 {
 387     return m_conv->WC2MB( outputBuf, psz, outputSize );
 388 }
 389
 390 #endif
 391
 392 // ----------------------------------------------------------------------------
 393 // UTF-7
 394 // ----------------------------------------------------------------------------
 395
 396 // Implementation (C) 2004 Fredrik Roubert
 397
 398 //
 399 // BASE64 decoding table
 400 //
 401 static const unsigned char utf7unb64[] =
 402 {
 403     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 404     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 405     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 406     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 407     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 408     0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
 409     0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
 410     0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 411     0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
 412     0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
 413     0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
 414     0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
 415     0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
 416     0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
 417     0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
 418     0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
 419     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 420     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 421     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 422     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 423     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 424     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 425     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 426     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 427     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 428     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 429     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 430     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 431     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 432     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 433     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 434     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 435 };
 436
 437 size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 438 {
 439     size_t len = 0;
 440
 441     while (*psz && ((!buf) || (len < n)))
 442     {
 443         unsigned char cc = *psz++;
 444         if (cc != '+')
 445         {
 446             // plain ASCII char
 447             if (buf)
 448                 *buf++ = cc;
 449             len++;
 450         }
 451         else if (*psz == '-')
 452         {
 453             // encoded plus sign
 454             if (buf)
 455                 *buf++ = cc;
 456             len++;
 457             psz++;
 458         }
 459         else
 460         {
 461             // BASE64 encoded string
 462             bool lsb;
 463             unsigned char c;
 464             unsigned int d, l;
 465             for (lsb = false, d = 0, l = 0;
 466                 (cc = utf7unb64[(unsigned char)*psz]) != 0xff; psz++)
 467             {
 468                 d <<= 6;
 469                 d += cc;
 470                 for (l += 6; l >= 8; lsb = !lsb)
 471                 {
 472                     c = (unsigned char)((d >> (l -= 8)) % 256);
 473                     if (lsb)
 474                     {
 475                         if (buf)
 476                             *buf++ |= c;
 477                         len ++;
 478                     }
 479                     else
 480                         if (buf)
 481                             *buf = (wchar_t)(c << 8);
 482                 }
 483             }
 484             if (*psz == '-')
 485                 psz++;
 486         }
 487     }
 488     if (buf && (len < n))
 489         *buf = 0;
 490     return len;
 491 }
 492
 493 //
 494 // BASE64 encoding table
 495 //
 496 static const unsigned char utf7enb64[] =
 497 {
 498     'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
 499     'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
 500     'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
 501     'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
 502     'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
 503     'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
 504     'w', 'x', 'y', 'z', '0', '1', '2', '3',
 505     '4', '5', '6', '7', '8', '9', '+', '/'
 506 };
 507
 508 //
 509 // UTF-7 encoding table
 510 //
 511 // 0 - Set D (directly encoded characters)
 512 // 1 - Set O (optional direct characters)
 513 // 2 - whitespace characters (optional)
 514 // 3 - special characters
 515 //
 516 static const unsigned char utf7encode[128] =
 517 {
 518     3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
 519     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 520     2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
 521     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
 522     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 523     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
 524     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 525     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
 526 };
 527
 528 size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 529 {
 530
 531
 532     size_t len = 0;
 533
 534     while (*psz && ((!buf) || (len < n)))
 535     {
 536         wchar_t cc = *psz++;
 537         if (cc < 0x80 && utf7encode[cc] < 1)
 538         {
 539             // plain ASCII char
 540             if (buf)
 541                 *buf++ = (char)cc;
 542             len++;
 543         }
 544 #ifndef WC_UTF16
 545         else if (((wxUint32)cc) > 0xffff)
 546         {
 547             // no surrogate pair generation (yet?)
 548             return (size_t)-1;
 549         }
 550 #endif
 551         else
 552         {
 553             if (buf)
 554                 *buf++ = '+';
 555             len++;
 556             if (cc != '+')
 557             {
 558                 // BASE64 encode string
 559                 unsigned int lsb, d, l;
 560                 for (d = 0, l = 0;; psz++)
 561                 {
 562                     for (lsb = 0; lsb < 2; lsb ++)
 563                     {
 564                         d <<= 8;
 565                         d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
 566
 567                         for (l += 8; l >= 6; )
 568                         {
 569                             l -= 6;
 570                             if (buf)
 571                                 *buf++ = utf7enb64[(d >> l) % 64];
 572                             len++;
 573                         }
 574                     }
 575                     cc = *psz;
 576                     if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
 577                         break;
 578                 }
 579                 if (l != 0)
 580                 {
 581                     if (buf)
 582                         *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
 583                     len++;
 584                 }
 585             }
 586             if (buf)
 587                 *buf++ = '-';
 588             len++;
 589         }
 590     }
 591     if (buf && (len < n))
 592         *buf = 0;
 593     return len;
 594 }
 595
 596 // ----------------------------------------------------------------------------
 597 // UTF-8
 598 // ----------------------------------------------------------------------------
 599
 600 static wxUint32 utf8_max[]=
 601     { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
 602
 603 // boundaries of the private use area we use to (temporarily) remap invalid
 604 // characters invalid in a UTF-8 encoded string
 605 const wxUint32 wxUnicodePUA = 0x100000;
 606 const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
 607
 608 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 609 {
 610     size_t len = 0;
 611
 612     while (*psz && ((!buf) || (len < n)))
 613     {
 614         const char *opsz = psz;
 615         bool invalid = false;
 616         unsigned char cc = *psz++, fc = cc;
 617         unsigned cnt;
 618         for (cnt = 0; fc & 0x80; cnt++)
 619             fc <<= 1;
 620         if (!cnt)
 621         {
 622             // plain ASCII char
 623             if (buf)
 624                 *buf++ = cc;
 625             len++;
 626
 627             // escape the escape character for octal escapes
 628             if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
 629                     && cc == '\\' && (!buf || len < n))
 630             {
 631                 if (buf)
 632                     *buf++ = cc;
 633                 len++;
 634             }
 635         }
 636         else
 637         {
 638             cnt--;
 639             if (!cnt)
 640             {
 641                 // invalid UTF-8 sequence
 642                 invalid = true;
 643             }
 644             else
 645             {
 646                 unsigned ocnt = cnt - 1;
 647                 wxUint32 res = cc & (0x3f >> cnt);
 648                 while (cnt--)
 649                 {
 650                     cc = *psz;
 651                     if ((cc & 0xC0) != 0x80)
 652                     {
 653                         // invalid UTF-8 sequence
 654                         invalid = true;
 655                         break;
 656                     }
 657                     psz++;
 658                     res = (res << 6) | (cc & 0x3f);
 659                 }
 660                 if (invalid || res <= utf8_max[ocnt])
 661                 {
 662                     // illegal UTF-8 encoding
 663                     invalid = true;
 664                 }
 665                 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
 666                         res >= wxUnicodePUA && res < wxUnicodePUAEnd)
 667                 {
 668                     // if one of our PUA characters turns up externally
 669                     // it must also be treated as an illegal sequence
 670                     // (a bit like you have to escape an escape character)
 671                     invalid = true;
 672                 }
 673                 else
 674                 {
 675 #ifdef WC_UTF16
 676                     // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 677                     size_t pa = encode_utf16(res, (wxUint16 *)buf);
 678                     if (pa == (size_t)-1)
 679                     {
 680                         invalid = true;
 681                     }
 682                     else
 683                     {
 684                         if (buf)
 685                             buf += pa;
 686                         len += pa;
 687                     }
 688 #else // !WC_UTF16
 689                     if (buf)
 690                         *buf++ = res;
 691                     len++;
 692 #endif // WC_UTF16/!WC_UTF16
 693                 }
 694             }
 695             if (invalid)
 696             {
 697                 if (m_options & MAP_INVALID_UTF8_TO_PUA)
 698                 {
 699                     while (opsz < psz && (!buf || len < n))
 700                     {
 701 #ifdef WC_UTF16
 702                         // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 703                         size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
 704                         wxASSERT(pa != (size_t)-1);
 705                         if (buf)
 706                             buf += pa;
 707                         opsz++;
 708                         len += pa;
 709 #else
 710                         if (buf)
 711                             *buf++ = wxUnicodePUA + (unsigned char)*opsz;
 712                         opsz++;
 713                         len++;
 714 #endif
 715                     }
 716                 }
 717                 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
 718                 {
 719                     while (opsz < psz && (!buf || len < n))
 720                     {
 721                         if ( buf && len + 3 < n )
 722                         {
 723                             unsigned char n = *opsz;
 724                             *buf++ = L'\\';
 725                             *buf++ = (wchar_t)( L'0' + n / 0100 );
 726                             *buf++ = (wchar_t)( L'0' + (n % 0100) / 010 );
 727                             *buf++ = (wchar_t)( L'0' + n % 010 );
 728                         }
 729                         opsz++;
 730                         len += 4;
 731                     }
 732                 }
 733                 else // MAP_INVALID_UTF8_NOT
 734                 {
 735                     return (size_t)-1;
 736                 }
 737             }
 738         }
 739     }
 740     if (buf && (len < n))
 741         *buf = 0;
 742     return len;
 743 }
 744
 745 static inline bool isoctal(wchar_t wch)
 746 {
 747     return L'0' <= wch && wch <= L'7';
 748 }
 749
 750 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 751 {
 752     size_t len = 0;
 753
 754     while (*psz && ((!buf) || (len < n)))
 755     {
 756         wxUint32 cc;
 757 #ifdef WC_UTF16
 758         // cast is ok for WC_UTF16
 759         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
 760         psz += (pa == (size_t)-1) ? 1 : pa;
 761 #else
 762         cc=(*psz++) & 0x7fffffff;
 763 #endif
 764
 765         if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
 766                 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
 767         {
 768             if (buf)
 769                 *buf++ = (char)(cc - wxUnicodePUA);
 770             len++;
 771         }
 772         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
 773                     && cc == L'\\' && psz[0] == L'\\' )
 774         {
 775             if (buf)
 776                 *buf++ = (char)cc;
 777             psz++;
 778             len++;
 779         }
 780         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
 781                     cc == L'\\' &&
 782                         isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
 783         {
 784             if (buf)
 785             {
 786                 *buf++ = (char) ((psz[0] - L'0')*0100 +
 787                                  (psz[1] - L'0')*010 +
 788                                  (psz[2] - L'0'));
 789             }
 790
 791             psz += 3;
 792             len++;
 793         }
 794         else
 795         {
 796             unsigned cnt;
 797             for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
 798             if (!cnt)
 799             {
 800                 // plain ASCII char
 801                 if (buf)
 802                     *buf++ = (char) cc;
 803                 len++;
 804             }
 805
 806             else
 807             {
 808                 len += cnt + 1;
 809                 if (buf)
 810                 {
 811                     *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
 812                     while (cnt--)
 813                         *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
 814                 }
 815             }
 816         }
 817     }
 818
 819     if (buf && (len<n))
 820         *buf = 0;
 821
 822     return len;
 823 }
 824
 825 // ----------------------------------------------------------------------------
 826 // UTF-16
 827 // ----------------------------------------------------------------------------
 828
 829 #ifdef WORDS_BIGENDIAN
 830     #define wxMBConvUTF16straight wxMBConvUTF16BE
 831     #define wxMBConvUTF16swap     wxMBConvUTF16LE
 832 #else
 833     #define wxMBConvUTF16swap     wxMBConvUTF16BE
 834     #define wxMBConvUTF16straight wxMBConvUTF16LE
 835 #endif
 836
 837
 838 #ifdef WC_UTF16
 839
 840 // copy 16bit MB to 16bit String
 841 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 842 {
 843     size_t len=0;
 844
 845     while (*(wxUint16*)psz && (!buf || len < n))
 846     {
 847         if (buf)
 848             *buf++ = *(wxUint16*)psz;
 849         len++;
 850
 851         psz += sizeof(wxUint16);
 852     }
 853     if (buf && len<n)   *buf=0;
 854
 855     return len;
 856 }
 857
 858
 859 // copy 16bit String to 16bit MB
 860 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 861 {
 862     size_t len=0;
 863
 864     while (*psz && (!buf || len < n))
 865     {
 866         if (buf)
 867         {
 868             *(wxUint16*)buf = *psz;
 869             buf += sizeof(wxUint16);
 870         }
 871         len += sizeof(wxUint16);
 872         psz++;
 873     }
 874     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 875
 876     return len;
 877 }
 878
 879
 880 // swap 16bit MB to 16bit String
 881 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 882 {
 883     size_t len=0;
 884
 885     while (*(wxUint16*)psz && (!buf || len < n))
 886     {
 887         if (buf)
 888         {
 889             ((char *)buf)[0] = psz[1];
 890             ((char *)buf)[1] = psz[0];
 891             buf++;
 892         }
 893         len++;
 894         psz += sizeof(wxUint16);
 895     }
 896     if (buf && len<n)   *buf=0;
 897
 898     return len;
 899 }
 900
 901
 902 // swap 16bit MB to 16bit String
 903 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 904 {
 905     size_t len=0;
 906
 907     while (*psz && (!buf || len < n))
 908     {
 909         if (buf)
 910         {
 911             *buf++ = ((char*)psz)[1];
 912             *buf++ = ((char*)psz)[0];
 913         }
 914         len += sizeof(wxUint16);
 915         psz++;
 916     }
 917     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 918
 919     return len;
 920 }
 921
 922
 923 #else // WC_UTF16
 924
 925
 926 // copy 16bit MB to 32bit String
 927 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 928 {
 929     size_t len=0;
 930
 931     while (*(wxUint16*)psz && (!buf || len < n))
 932     {
 933         wxUint32 cc;
 934         size_t pa=decode_utf16((wxUint16*)psz, cc);
 935         if (pa == (size_t)-1)
 936             return pa;
 937
 938         if (buf)
 939             *buf++ = cc;
 940         len++;
 941         psz += pa * sizeof(wxUint16);
 942     }
 943     if (buf && len<n)   *buf=0;
 944
 945     return len;
 946 }
 947
 948
 949 // copy 32bit String to 16bit MB
 950 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 951 {
 952     size_t len=0;
 953
 954     while (*psz && (!buf || len < n))
 955     {
 956         wxUint16 cc[2];
 957         size_t pa=encode_utf16(*psz, cc);
 958
 959         if (pa == (size_t)-1)
 960             return pa;
 961
 962         if (buf)
 963         {
 964             *(wxUint16*)buf = cc[0];
 965             buf += sizeof(wxUint16);
 966             if (pa > 1)
 967             {
 968                 *(wxUint16*)buf = cc[1];
 969                 buf += sizeof(wxUint16);
 970             }
 971         }
 972
 973         len += pa*sizeof(wxUint16);
 974         psz++;
 975     }
 976     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 977
 978     return len;
 979 }
 980
 981
 982 // swap 16bit MB to 32bit String
 983 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 984 {
 985     size_t len=0;
 986
 987     while (*(wxUint16*)psz && (!buf || len < n))
 988     {
 989         wxUint32 cc;
 990         char tmp[4];
 991         tmp[0]=psz[1];  tmp[1]=psz[0];
 992         tmp[2]=psz[3];  tmp[3]=psz[2];
 993
 994         size_t pa=decode_utf16((wxUint16*)tmp, cc);
 995         if (pa == (size_t)-1)
 996             return pa;
 997
 998         if (buf)
 999             *buf++ = cc;
1000
1001         len++;
1002         psz += pa * sizeof(wxUint16);
1003     }
1004     if (buf && len<n)   *buf=0;
1005
1006     return len;
1007 }
1008
1009
1010 // swap 32bit String to 16bit MB
1011 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1012 {
1013     size_t len=0;
1014
1015     while (*psz && (!buf || len < n))
1016     {
1017         wxUint16 cc[2];
1018         size_t pa=encode_utf16(*psz, cc);
1019
1020         if (pa == (size_t)-1)
1021             return pa;
1022
1023         if (buf)
1024         {
1025             *buf++ = ((char*)cc)[1];
1026             *buf++ = ((char*)cc)[0];
1027             if (pa > 1)
1028             {
1029                 *buf++ = ((char*)cc)[3];
1030                 *buf++ = ((char*)cc)[2];
1031             }
1032         }
1033
1034         len += pa*sizeof(wxUint16);
1035         psz++;
1036     }
1037     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
1038
1039     return len;
1040 }
1041
1042 #endif // WC_UTF16
1043
1044
1045 // ----------------------------------------------------------------------------
1046 // UTF-32
1047 // ----------------------------------------------------------------------------
1048
1049 #ifdef WORDS_BIGENDIAN
1050 #define wxMBConvUTF32straight  wxMBConvUTF32BE
1051 #define wxMBConvUTF32swap      wxMBConvUTF32LE
1052 #else
1053 #define wxMBConvUTF32swap      wxMBConvUTF32BE
1054 #define wxMBConvUTF32straight  wxMBConvUTF32LE
1055 #endif
1056
1057
1058 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1059 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1060
1061
1062 #ifdef WC_UTF16
1063
1064 // copy 32bit MB to 16bit String
1065 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1066 {
1067     size_t len=0;
1068
1069     while (*(wxUint32*)psz && (!buf || len < n))
1070     {
1071         wxUint16 cc[2];
1072
1073         size_t pa=encode_utf16(*(wxUint32*)psz, cc);
1074         if (pa == (size_t)-1)
1075             return pa;
1076
1077         if (buf)
1078         {
1079             *buf++ = cc[0];
1080             if (pa > 1)
1081                 *buf++ = cc[1];
1082         }
1083         len += pa;
1084         psz += sizeof(wxUint32);
1085     }
1086     if (buf && len<n)   *buf=0;
1087
1088     return len;
1089 }
1090
1091
1092 // copy 16bit String to 32bit MB
1093 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1094 {
1095     size_t len=0;
1096
1097     while (*psz && (!buf || len < n))
1098     {
1099         wxUint32 cc;
1100
1101         // cast is ok for WC_UTF16
1102         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
1103         if (pa == (size_t)-1)
1104             return pa;
1105
1106         if (buf)
1107         {
1108             *(wxUint32*)buf = cc;
1109             buf += sizeof(wxUint32);
1110         }
1111         len += sizeof(wxUint32);
1112         psz += pa;
1113     }
1114
1115     if (buf && len<=n-sizeof(wxUint32))
1116         *(wxUint32*)buf=0;
1117
1118     return len;
1119 }
1120
1121
1122
1123 // swap 32bit MB to 16bit String
1124 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1125 {
1126     size_t len=0;
1127
1128     while (*(wxUint32*)psz && (!buf || len < n))
1129     {
1130         char tmp[4];
1131         tmp[0] = psz[3];   tmp[1] = psz[2];
1132         tmp[2] = psz[1];   tmp[3] = psz[0];
1133
1134
1135         wxUint16 cc[2];
1136
1137         size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
1138         if (pa == (size_t)-1)
1139             return pa;
1140
1141         if (buf)
1142         {
1143             *buf++ = cc[0];
1144             if (pa > 1)
1145                 *buf++ = cc[1];
1146         }
1147         len += pa;
1148         psz += sizeof(wxUint32);
1149     }
1150
1151     if (buf && len<n)
1152         *buf=0;
1153
1154     return len;
1155 }
1156
1157
1158 // swap 16bit String to 32bit MB
1159 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1160 {
1161     size_t len=0;
1162
1163     while (*psz && (!buf || len < n))
1164     {
1165         char cc[4];
1166
1167         // cast is ok for WC_UTF16
1168         size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
1169         if (pa == (size_t)-1)
1170             return pa;
1171
1172         if (buf)
1173         {
1174             *buf++ = cc[3];
1175             *buf++ = cc[2];
1176             *buf++ = cc[1];
1177             *buf++ = cc[0];
1178         }
1179         len += sizeof(wxUint32);
1180         psz += pa;
1181     }
1182
1183     if (buf && len<=n-sizeof(wxUint32))
1184         *(wxUint32*)buf=0;
1185
1186     return len;
1187 }
1188
1189 #else // WC_UTF16
1190
1191
1192 // copy 32bit MB to 32bit String
1193 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1194 {
1195     size_t len=0;
1196
1197     while (*(wxUint32*)psz && (!buf || len < n))
1198     {
1199         if (buf)
1200             *buf++ = *(wxUint32*)psz;
1201         len++;
1202         psz += sizeof(wxUint32);
1203     }
1204
1205     if (buf && len<n)
1206         *buf=0;
1207
1208     return len;
1209 }
1210
1211
1212 // copy 32bit String to 32bit MB
1213 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1214 {
1215     size_t len=0;
1216
1217     while (*psz && (!buf || len < n))
1218     {
1219         if (buf)
1220         {
1221             *(wxUint32*)buf = *psz;
1222             buf += sizeof(wxUint32);
1223         }
1224
1225         len += sizeof(wxUint32);
1226         psz++;
1227     }
1228
1229     if (buf && len<=n-sizeof(wxUint32))
1230         *(wxUint32*)buf=0;
1231
1232     return len;
1233 }
1234
1235
1236 // swap 32bit MB to 32bit String
1237 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1238 {
1239     size_t len=0;
1240
1241     while (*(wxUint32*)psz && (!buf || len < n))
1242     {
1243         if (buf)
1244         {
1245             ((char *)buf)[0] = psz[3];
1246             ((char *)buf)[1] = psz[2];
1247             ((char *)buf)[2] = psz[1];
1248             ((char *)buf)[3] = psz[0];
1249             buf++;
1250         }
1251         len++;
1252         psz += sizeof(wxUint32);
1253     }
1254
1255     if (buf && len<n)
1256         *buf=0;
1257
1258     return len;
1259 }
1260
1261
1262 // swap 32bit String to 32bit MB
1263 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1264 {
1265     size_t len=0;
1266
1267     while (*psz && (!buf || len < n))
1268     {
1269         if (buf)
1270         {
1271             *buf++ = ((char *)psz)[3];
1272             *buf++ = ((char *)psz)[2];
1273             *buf++ = ((char *)psz)[1];
1274             *buf++ = ((char *)psz)[0];
1275         }
1276         len += sizeof(wxUint32);
1277         psz++;
1278     }
1279
1280     if (buf && len<=n-sizeof(wxUint32))
1281         *(wxUint32*)buf=0;
1282
1283     return len;
1284 }
1285
1286
1287 #endif // WC_UTF16
1288
1289
1290 // ============================================================================
1291 // The classes doing conversion using the iconv_xxx() functions
1292 // ============================================================================
1293
1294 #ifdef HAVE_ICONV
1295
1296 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1297 //     E2BIG if output buffer is _exactly_ as big as needed. Such case is
1298 //     (unless there's yet another bug in glibc) the only case when iconv()
1299 //     returns with (size_t)-1 (which means error) and says there are 0 bytes
1300 //     left in the input buffer -- when _real_ error occurs,
1301 //     bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1302 //     iconv() failure.
1303 //     [This bug does not appear in glibc 2.2.]
1304 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1305 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1306                                      (errno != E2BIG || bufLeft != 0))
1307 #else
1308 #define ICONV_FAILED(cres, bufLeft)  (cres == (size_t)-1)
1309 #endif
1310
1311 #define ICONV_CHAR_CAST(x)  ((ICONV_CONST char **)(x))
1312
1313 // ----------------------------------------------------------------------------
1314 // wxMBConv_iconv: encapsulates an iconv character set
1315 // ----------------------------------------------------------------------------
1316
1317 class wxMBConv_iconv : public wxMBConv
1318 {
1319 public:
1320     wxMBConv_iconv(const wxChar *name);
1321     virtual ~wxMBConv_iconv();
1322
1323     virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1324     virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
1325
1326     bool IsOk() const
1327         { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
1328
1329 protected:
1330     // the iconv handlers used to translate from multibyte to wide char and in
1331     // the other direction
1332     iconv_t m2w,
1333             w2m;
1334 #if wxUSE_THREADS
1335     // guards access to m2w and w2m objects
1336     wxMutex m_iconvMutex;
1337 #endif
1338
1339 private:
1340     // the name (for iconv_open()) of a wide char charset -- if none is
1341     // available on this machine, it will remain NULL
1342     static const char *ms_wcCharsetName;
1343
1344     // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1345     // different endian-ness than the native one
1346     static bool ms_wcNeedsSwap;
1347 };
1348
1349 // make the constructor available for unit testing
1350 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const wxChar* name )
1351 {
1352     wxMBConv_iconv* result = new wxMBConv_iconv( name );
1353     if ( !result->IsOk() )
1354     {
1355         delete result;
1356         return 0;
1357     }
1358     return result;
1359 }
1360
1361 const char *wxMBConv_iconv::ms_wcCharsetName = NULL;
1362 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
1363
1364 wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
1365 {
1366     // iconv operates with chars, not wxChars, but luckily it uses only ASCII
1367     // names for the charsets
1368     const wxCharBuffer cname(wxString::ToAscii(name));
1369
1370     // check for charset that represents wchar_t:
1371     if (ms_wcCharsetName == NULL)
1372     {
1373         ms_wcNeedsSwap = false;
1374
1375         // try charset with explicit bytesex info (e.g. "UCS-4LE"):
1376         ms_wcCharsetName = WC_NAME_BEST;
1377         m2w = iconv_open(ms_wcCharsetName, cname);
1378
1379         if (m2w == (iconv_t)-1)
1380         {
1381             // try charset w/o bytesex info (e.g. "UCS4")
1382             // and check for bytesex ourselves:
1383             ms_wcCharsetName = WC_NAME;
1384             m2w = iconv_open(ms_wcCharsetName, cname);
1385
1386             // last bet, try if it knows WCHAR_T pseudo-charset
1387             if (m2w == (iconv_t)-1)
1388             {
1389                 ms_wcCharsetName = "WCHAR_T";
1390                 m2w = iconv_open(ms_wcCharsetName, cname);
1391             }
1392
1393             if (m2w != (iconv_t)-1)
1394             {
1395                 char    buf[2], *bufPtr;
1396                 wchar_t wbuf[2], *wbufPtr;
1397                 size_t  insz, outsz;
1398                 size_t  res;
1399
1400                 buf[0] = 'A';
1401                 buf[1] = 0;
1402                 wbuf[0] = 0;
1403                 insz = 2;
1404                 outsz = SIZEOF_WCHAR_T * 2;
1405                 wbufPtr = wbuf;
1406                 bufPtr = buf;
1407
1408                 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1409                             (char**)&wbufPtr, &outsz);
1410
1411                 if (ICONV_FAILED(res, insz))
1412                 {
1413                     ms_wcCharsetName = NULL;
1414                     wxLogLastError(wxT("iconv"));
1415                     wxLogError(_("Conversion to charset '%s' doesn't work."), name);
1416                 }
1417                 else
1418                 {
1419                     ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1420                 }
1421             }
1422             else
1423             {
1424                 ms_wcCharsetName = NULL;
1425
1426                 // VS: we must not output an error here, since wxWidgets will safely
1427                 //     fall back to using wxEncodingConverter.
1428                 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
1429                 //wxLogError(
1430             }
1431         }
1432         wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
1433     }
1434     else // we already have ms_wcCharsetName
1435     {
1436         m2w = iconv_open(ms_wcCharsetName, cname);
1437     }
1438
1439     // NB: don't ever pass NULL to iconv_open(), it may crash!
1440     if ( ms_wcCharsetName )
1441     {
1442         w2m = iconv_open( cname, ms_wcCharsetName);
1443     }
1444     else
1445     {
1446         w2m = (iconv_t)-1;
1447     }
1448 }
1449
1450 wxMBConv_iconv::~wxMBConv_iconv()
1451 {
1452     if ( m2w != (iconv_t)-1 )
1453         iconv_close(m2w);
1454     if ( w2m != (iconv_t)-1 )
1455         iconv_close(w2m);
1456 }
1457
1458 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1459 {
1460 #if wxUSE_THREADS
1461     // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1462     //     Unfortunately there is a couple of global wxCSConv objects such as
1463     //     wxConvLocal that are used all over wx code, so we have to make sure
1464     //     the handle is used by at most one thread at the time. Otherwise
1465     //     only a few wx classes would be safe to use from non-main threads
1466     //     as MB<->WC conversion would fail "randomly".
1467     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1468 #endif
1469
1470     size_t inbuf = strlen(psz);
1471     size_t outbuf = n * SIZEOF_WCHAR_T;
1472     size_t res, cres;
1473     // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1474     wchar_t *bufPtr = buf;
1475     const char *pszPtr = psz;
1476
1477     if (buf)
1478     {
1479         // have destination buffer, convert there
1480         cres = iconv(m2w,
1481                      ICONV_CHAR_CAST(&pszPtr), &inbuf,
1482                      (char**)&bufPtr, &outbuf);
1483         res = n - (outbuf / SIZEOF_WCHAR_T);
1484
1485         if (ms_wcNeedsSwap)
1486         {
1487             // convert to native endianness
1488             WC_BSWAP(buf /* _not_ bufPtr */, res)
1489         }
1490
1491         // NB: iconv was given only strlen(psz) characters on input, and so
1492         //     it couldn't convert the trailing zero. Let's do it ourselves
1493         //     if there's some room left for it in the output buffer.
1494         if (res < n)
1495             buf[res] = 0;
1496     }
1497     else
1498     {
1499         // no destination buffer... convert using temp buffer
1500         // to calculate destination buffer requirement
1501         wchar_t tbuf[8];
1502         res = 0;
1503         do {
1504             bufPtr = tbuf;
1505             outbuf = 8*SIZEOF_WCHAR_T;
1506
1507             cres = iconv(m2w,
1508                          ICONV_CHAR_CAST(&pszPtr), &inbuf,
1509                          (char**)&bufPtr, &outbuf );
1510
1511             res += 8-(outbuf/SIZEOF_WCHAR_T);
1512         } while ((cres==(size_t)-1) && (errno==E2BIG));
1513     }
1514
1515     if (ICONV_FAILED(cres, inbuf))
1516     {
1517         //VS: it is ok if iconv fails, hence trace only
1518         wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1519         return (size_t)-1;
1520     }
1521
1522     return res;
1523 }
1524
1525 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1526 {
1527 #if wxUSE_THREADS
1528     // NB: explained in MB2WC
1529     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1530 #endif
1531
1532     size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
1533     size_t outbuf = n;
1534     size_t res, cres;
1535
1536     wchar_t *tmpbuf = 0;
1537
1538     if (ms_wcNeedsSwap)
1539     {
1540         // need to copy to temp buffer to switch endianness
1541         // this absolutely doesn't rock!
1542         // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1543         //  could be in read-only memory, or be accessed in some other thread)
1544         tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
1545         memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
1546         WC_BSWAP(tmpbuf, inbuf)
1547         psz=tmpbuf;
1548     }
1549
1550     if (buf)
1551     {
1552         // have destination buffer, convert there
1553         cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1554
1555         res = n-outbuf;
1556
1557         // NB: iconv was given only wcslen(psz) characters on input, and so
1558         //     it couldn't convert the trailing zero. Let's do it ourselves
1559         //     if there's some room left for it in the output buffer.
1560         if (res < n)
1561             buf[0] = 0;
1562     }
1563     else
1564     {
1565         // no destination buffer... convert using temp buffer
1566         // to calculate destination buffer requirement
1567         char tbuf[16];
1568         res = 0;
1569         do {
1570             buf = tbuf; outbuf = 16;
1571
1572             cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1573
1574             res += 16 - outbuf;
1575         } while ((cres==(size_t)-1) && (errno==E2BIG));
1576     }
1577
1578     if (ms_wcNeedsSwap)
1579     {
1580         free(tmpbuf);
1581     }
1582
1583     if (ICONV_FAILED(cres, inbuf))
1584     {
1585         //VS: it is ok if iconv fails, hence trace only
1586         wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1587         return (size_t)-1;
1588     }
1589
1590     return res;
1591 }
1592
1593 #endif // HAVE_ICONV
1594
1595
1596 // ============================================================================
1597 // Win32 conversion classes
1598 // ============================================================================
1599
1600 #ifdef wxHAVE_WIN32_MB2WC
1601
1602 // from utils.cpp
1603 #if wxUSE_FONTMAP
1604 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1605 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1606 #endif
1607
1608 class wxMBConv_win32 : public wxMBConv
1609 {
1610 public:
1611     wxMBConv_win32()
1612     {
1613         m_CodePage = CP_ACP;
1614     }
1615
1616 #if wxUSE_FONTMAP
1617     wxMBConv_win32(const wxChar* name)
1618     {
1619         m_CodePage = wxCharsetToCodepage(name);
1620     }
1621
1622     wxMBConv_win32(wxFontEncoding encoding)
1623     {
1624         m_CodePage = wxEncodingToCodepage(encoding);
1625     }
1626 #endif
1627
1628     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1629     {
1630         // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1631         // the behaviour is not compatible with the Unix version (using iconv)
1632         // and break the library itself, e.g. wxTextInputStream::NextChar()
1633         // wouldn't work if reading an incomplete MB char didn't result in an
1634         // error
1635         //
1636         // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1637         // an error (tested under Windows Server 2003) and apparently it is
1638         // done on purpose, i.e. the function accepts any input in this case
1639         // and although I'd prefer to return error on ill-formed output, our
1640         // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1641         // explicitly ill-formed according to RFC 2152) neither so we don't
1642         // even have any fallback here...
1643         int flags = m_CodePage == CP_UTF7 ? 0 : MB_ERR_INVALID_CHARS;
1644
1645         const size_t len = ::MultiByteToWideChar
1646                              (
1647                                 m_CodePage,     // code page
1648                                 flags,          // flags: fall on error
1649                                 psz,            // input string
1650                                 -1,             // its length (NUL-terminated)
1651                                 buf,            // output string
1652                                 buf ? n : 0     // size of output buffer
1653                              );
1654
1655         // note that it returns count of written chars for buf != NULL and size
1656         // of the needed buffer for buf == NULL so in either case the length of
1657         // the string (which never includes the terminating NUL) is one less
1658         return len ? len - 1 : (size_t)-1;
1659     }
1660
1661     size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
1662     {
1663         /*
1664             we have a problem here: by default, WideCharToMultiByte() may
1665             replace characters unrepresentable in the target code page with bad
1666             quality approximations such as turning "1/2" symbol (U+00BD) into
1667             "1" for the code pages which don't have it and we, obviously, want
1668             to avoid this at any price
1669
1670             the trouble is that this function does it _silently_, i.e. it won't
1671             even tell us whether it did or not... Win98/2000 and higher provide
1672             WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1673             we have to resort to a round trip, i.e. check that converting back
1674             results in the same string -- this is, of course, expensive but
1675             otherwise we simply can't be sure to not garble the data.
1676          */
1677
1678         // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1679         // it doesn't work with CJK encodings (which we test for rather roughly
1680         // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1681         // supporting it
1682         BOOL usedDef wxDUMMY_INITIALIZE(false);
1683         BOOL *pUsedDef;
1684         int flags;
1685         if ( CanUseNoBestFit() && m_CodePage < 50000 )
1686         {
1687             // it's our lucky day
1688             flags = WC_NO_BEST_FIT_CHARS;
1689             pUsedDef = &usedDef;
1690         }
1691         else // old system or unsupported encoding
1692         {
1693             flags = 0;
1694             pUsedDef = NULL;
1695         }
1696
1697         const size_t len = ::WideCharToMultiByte
1698                              (
1699                                 m_CodePage,     // code page
1700                                 flags,          // either none or no best fit
1701                                 pwz,            // input string
1702                                 -1,             // it is (wide) NUL-terminated
1703                                 buf,            // output buffer
1704                                 buf ? n : 0,    // and its size
1705                                 NULL,           // default "replacement" char
1706                                 pUsedDef        // [out] was it used?
1707                              );
1708
1709         if ( !len )
1710         {
1711             // function totally failed
1712             return (size_t)-1;
1713         }
1714
1715         // if we were really converting, check if we succeeded
1716         if ( buf )
1717         {
1718             if ( flags )
1719             {
1720                 // check if the conversion failed, i.e. if any replacements
1721                 // were done
1722                 if ( usedDef )
1723                     return (size_t)-1;
1724             }
1725             else // we must resort to double tripping...
1726             {
1727                 wxWCharBuffer wcBuf(n);
1728                 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1729                         wcscmp(wcBuf, pwz) != 0 )
1730                 {
1731                     // we didn't obtain the same thing we started from, hence
1732                     // the conversion was lossy and we consider that it failed
1733                     return (size_t)-1;
1734                 }
1735             }
1736         }
1737
1738         // see the comment above for the reason of "len - 1"
1739         return len - 1;
1740     }
1741
1742     bool IsOk() const { return m_CodePage != -1; }
1743
1744 private:
1745     static bool CanUseNoBestFit()
1746     {
1747         static int s_isWin98Or2k = -1;
1748
1749         if ( s_isWin98Or2k == -1 )
1750         {
1751             int verMaj, verMin;
1752             switch ( wxGetOsVersion(&verMaj, &verMin) )
1753             {
1754                 case wxWIN95:
1755                     s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1756                     break;
1757
1758                 case wxWINDOWS_NT:
1759                     s_isWin98Or2k = verMaj >= 5;
1760                     break;
1761
1762                 default:
1763                     // unknown, be conseravtive by default
1764                     s_isWin98Or2k = 0;
1765             }
1766
1767             wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1768         }
1769
1770         return s_isWin98Or2k == 1;
1771     }
1772
1773     long m_CodePage;
1774 };
1775
1776 #endif // wxHAVE_WIN32_MB2WC
1777
1778 // ============================================================================
1779 // Cocoa conversion classes
1780 // ============================================================================
1781
1782 #if defined(__WXCOCOA__)
1783
1784 // RN:  There is no UTF-32 support in either Core Foundation or
1785 // Cocoa.  Strangely enough, internally Core Foundation uses
1786 // UTF 32 internally quite a bit - its just not public (yet).
1787
1788 #include <CoreFoundation/CFString.h>
1789 #include <CoreFoundation/CFStringEncodingExt.h>
1790
1791 CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
1792 {
1793     CFStringEncoding enc = kCFStringEncodingInvalidId ;
1794     if ( encoding == wxFONTENCODING_DEFAULT )
1795     {
1796         enc = CFStringGetSystemEncoding();
1797     }
1798     else switch( encoding)
1799     {
1800         case wxFONTENCODING_ISO8859_1 :
1801             enc = kCFStringEncodingISOLatin1 ;
1802             break ;
1803         case wxFONTENCODING_ISO8859_2 :
1804             enc = kCFStringEncodingISOLatin2;
1805             break ;
1806         case wxFONTENCODING_ISO8859_3 :
1807             enc = kCFStringEncodingISOLatin3 ;
1808             break ;
1809         case wxFONTENCODING_ISO8859_4 :
1810             enc = kCFStringEncodingISOLatin4;
1811             break ;
1812         case wxFONTENCODING_ISO8859_5 :
1813             enc = kCFStringEncodingISOLatinCyrillic;
1814             break ;
1815         case wxFONTENCODING_ISO8859_6 :
1816             enc = kCFStringEncodingISOLatinArabic;
1817             break ;
1818         case wxFONTENCODING_ISO8859_7 :
1819             enc = kCFStringEncodingISOLatinGreek;
1820             break ;
1821         case wxFONTENCODING_ISO8859_8 :
1822             enc = kCFStringEncodingISOLatinHebrew;
1823             break ;
1824         case wxFONTENCODING_ISO8859_9 :
1825             enc = kCFStringEncodingISOLatin5;
1826             break ;
1827         case wxFONTENCODING_ISO8859_10 :
1828             enc = kCFStringEncodingISOLatin6;
1829             break ;
1830         case wxFONTENCODING_ISO8859_11 :
1831             enc = kCFStringEncodingISOLatinThai;
1832             break ;
1833         case wxFONTENCODING_ISO8859_13 :
1834             enc = kCFStringEncodingISOLatin7;
1835             break ;
1836         case wxFONTENCODING_ISO8859_14 :
1837             enc = kCFStringEncodingISOLatin8;
1838             break ;
1839         case wxFONTENCODING_ISO8859_15 :
1840             enc = kCFStringEncodingISOLatin9;
1841             break ;
1842
1843         case wxFONTENCODING_KOI8 :
1844             enc = kCFStringEncodingKOI8_R;
1845             break ;
1846         case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
1847             enc = kCFStringEncodingDOSRussian;
1848             break ;
1849
1850 //      case wxFONTENCODING_BULGARIAN :
1851 //          enc = ;
1852 //          break ;
1853
1854         case wxFONTENCODING_CP437 :
1855             enc =kCFStringEncodingDOSLatinUS ;
1856             break ;
1857         case wxFONTENCODING_CP850 :
1858             enc = kCFStringEncodingDOSLatin1;
1859             break ;
1860         case wxFONTENCODING_CP852 :
1861             enc = kCFStringEncodingDOSLatin2;
1862             break ;
1863         case wxFONTENCODING_CP855 :
1864             enc = kCFStringEncodingDOSCyrillic;
1865             break ;
1866         case wxFONTENCODING_CP866 :
1867             enc =kCFStringEncodingDOSRussian ;
1868             break ;
1869         case wxFONTENCODING_CP874 :
1870             enc = kCFStringEncodingDOSThai;
1871             break ;
1872         case wxFONTENCODING_CP932 :
1873             enc = kCFStringEncodingDOSJapanese;
1874             break ;
1875         case wxFONTENCODING_CP936 :
1876             enc =kCFStringEncodingDOSChineseSimplif ;
1877             break ;
1878         case wxFONTENCODING_CP949 :
1879             enc = kCFStringEncodingDOSKorean;
1880             break ;
1881         case wxFONTENCODING_CP950 :
1882             enc = kCFStringEncodingDOSChineseTrad;
1883             break ;
1884         case wxFONTENCODING_CP1250 :
1885             enc = kCFStringEncodingWindowsLatin2;
1886             break ;
1887         case wxFONTENCODING_CP1251 :
1888             enc =kCFStringEncodingWindowsCyrillic ;
1889             break ;
1890         case wxFONTENCODING_CP1252 :
1891             enc =kCFStringEncodingWindowsLatin1 ;
1892             break ;
1893         case wxFONTENCODING_CP1253 :
1894             enc = kCFStringEncodingWindowsGreek;
1895             break ;
1896         case wxFONTENCODING_CP1254 :
1897             enc = kCFStringEncodingWindowsLatin5;
1898             break ;
1899         case wxFONTENCODING_CP1255 :
1900             enc =kCFStringEncodingWindowsHebrew ;
1901             break ;
1902         case wxFONTENCODING_CP1256 :
1903             enc =kCFStringEncodingWindowsArabic ;
1904             break ;
1905         case wxFONTENCODING_CP1257 :
1906             enc = kCFStringEncodingWindowsBalticRim;
1907             break ;
1908 //   This only really encodes to UTF7 (if that) evidently
1909 //        case wxFONTENCODING_UTF7 :
1910 //            enc = kCFStringEncodingNonLossyASCII ;
1911 //            break ;
1912         case wxFONTENCODING_UTF8 :
1913             enc = kCFStringEncodingUTF8 ;
1914             break ;
1915         case wxFONTENCODING_EUC_JP :
1916             enc = kCFStringEncodingEUC_JP;
1917             break ;
1918         case wxFONTENCODING_UTF16 :
1919             enc = kCFStringEncodingUnicode ;
1920             break ;
1921         case wxFONTENCODING_MACROMAN :
1922             enc = kCFStringEncodingMacRoman ;
1923             break ;
1924         case wxFONTENCODING_MACJAPANESE :
1925             enc = kCFStringEncodingMacJapanese ;
1926             break ;
1927         case wxFONTENCODING_MACCHINESETRAD :
1928             enc = kCFStringEncodingMacChineseTrad ;
1929             break ;
1930         case wxFONTENCODING_MACKOREAN :
1931             enc = kCFStringEncodingMacKorean ;
1932             break ;
1933         case wxFONTENCODING_MACARABIC :
1934             enc = kCFStringEncodingMacArabic ;
1935             break ;
1936         case wxFONTENCODING_MACHEBREW :
1937             enc = kCFStringEncodingMacHebrew ;
1938             break ;
1939         case wxFONTENCODING_MACGREEK :
1940             enc = kCFStringEncodingMacGreek ;
1941             break ;
1942         case wxFONTENCODING_MACCYRILLIC :
1943             enc = kCFStringEncodingMacCyrillic ;
1944             break ;
1945         case wxFONTENCODING_MACDEVANAGARI :
1946             enc = kCFStringEncodingMacDevanagari ;
1947             break ;
1948         case wxFONTENCODING_MACGURMUKHI :
1949             enc = kCFStringEncodingMacGurmukhi ;
1950             break ;
1951         case wxFONTENCODING_MACGUJARATI :
1952             enc = kCFStringEncodingMacGujarati ;
1953             break ;
1954         case wxFONTENCODING_MACORIYA :
1955             enc = kCFStringEncodingMacOriya ;
1956             break ;
1957         case wxFONTENCODING_MACBENGALI :
1958             enc = kCFStringEncodingMacBengali ;
1959             break ;
1960         case wxFONTENCODING_MACTAMIL :
1961             enc = kCFStringEncodingMacTamil ;
1962             break ;
1963         case wxFONTENCODING_MACTELUGU :
1964             enc = kCFStringEncodingMacTelugu ;
1965             break ;
1966         case wxFONTENCODING_MACKANNADA :
1967             enc = kCFStringEncodingMacKannada ;
1968             break ;
1969         case wxFONTENCODING_MACMALAJALAM :
1970             enc = kCFStringEncodingMacMalayalam ;
1971             break ;
1972         case wxFONTENCODING_MACSINHALESE :
1973             enc = kCFStringEncodingMacSinhalese ;
1974             break ;
1975         case wxFONTENCODING_MACBURMESE :
1976             enc = kCFStringEncodingMacBurmese ;
1977             break ;
1978         case wxFONTENCODING_MACKHMER :
1979             enc = kCFStringEncodingMacKhmer ;
1980             break ;
1981         case wxFONTENCODING_MACTHAI :
1982             enc = kCFStringEncodingMacThai ;
1983             break ;
1984         case wxFONTENCODING_MACLAOTIAN :
1985             enc = kCFStringEncodingMacLaotian ;
1986             break ;
1987         case wxFONTENCODING_MACGEORGIAN :
1988             enc = kCFStringEncodingMacGeorgian ;
1989             break ;
1990         case wxFONTENCODING_MACARMENIAN :
1991             enc = kCFStringEncodingMacArmenian ;
1992             break ;
1993         case wxFONTENCODING_MACCHINESESIMP :
1994             enc = kCFStringEncodingMacChineseSimp ;
1995             break ;
1996         case wxFONTENCODING_MACTIBETAN :
1997             enc = kCFStringEncodingMacTibetan ;
1998             break ;
1999         case wxFONTENCODING_MACMONGOLIAN :
2000             enc = kCFStringEncodingMacMongolian ;
2001             break ;
2002         case wxFONTENCODING_MACETHIOPIC :
2003             enc = kCFStringEncodingMacEthiopic ;
2004             break ;
2005         case wxFONTENCODING_MACCENTRALEUR :
2006             enc = kCFStringEncodingMacCentralEurRoman ;
2007             break ;
2008         case wxFONTENCODING_MACVIATNAMESE :
2009             enc = kCFStringEncodingMacVietnamese ;
2010             break ;
2011         case wxFONTENCODING_MACARABICEXT :
2012             enc = kCFStringEncodingMacExtArabic ;
2013             break ;
2014         case wxFONTENCODING_MACSYMBOL :
2015             enc = kCFStringEncodingMacSymbol ;
2016             break ;
2017         case wxFONTENCODING_MACDINGBATS :
2018             enc = kCFStringEncodingMacDingbats ;
2019             break ;
2020         case wxFONTENCODING_MACTURKISH :
2021             enc = kCFStringEncodingMacTurkish ;
2022             break ;
2023         case wxFONTENCODING_MACCROATIAN :
2024             enc = kCFStringEncodingMacCroatian ;
2025             break ;
2026         case wxFONTENCODING_MACICELANDIC :
2027             enc = kCFStringEncodingMacIcelandic ;
2028             break ;
2029         case wxFONTENCODING_MACROMANIAN :
2030             enc = kCFStringEncodingMacRomanian ;
2031             break ;
2032         case wxFONTENCODING_MACCELTIC :
2033             enc = kCFStringEncodingMacCeltic ;
2034             break ;
2035         case wxFONTENCODING_MACGAELIC :
2036             enc = kCFStringEncodingMacGaelic ;
2037             break ;
2038 //      case wxFONTENCODING_MACKEYBOARD :
2039 //          enc = kCFStringEncodingMacKeyboardGlyphs ;
2040 //          break ;
2041         default :
2042             // because gcc is picky
2043             break ;
2044     } ;
2045     return enc ;
2046 }
2047
2048 class wxMBConv_cocoa : public wxMBConv
2049 {
2050 public:
2051     wxMBConv_cocoa()
2052     {
2053         Init(CFStringGetSystemEncoding()) ;
2054     }
2055
2056 #if wxUSE_FONTMAP
2057     wxMBConv_cocoa(const wxChar* name)
2058     {
2059         Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2060     }
2061 #endif
2062
2063     wxMBConv_cocoa(wxFontEncoding encoding)
2064     {
2065         Init( wxCFStringEncFromFontEnc(encoding) );
2066     }
2067
2068     ~wxMBConv_cocoa()
2069     {
2070     }
2071
2072     void Init( CFStringEncoding encoding)
2073     {
2074         m_encoding = encoding ;
2075     }
2076
2077     size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2078     {
2079         wxASSERT(szUnConv);
2080
2081         CFStringRef theString = CFStringCreateWithBytes (
2082                                                 NULL, //the allocator
2083                                                 (const UInt8*)szUnConv,
2084                                                 strlen(szUnConv),
2085                                                 m_encoding,
2086                                                 false //no BOM/external representation
2087                                                 );
2088
2089         wxASSERT(theString);
2090
2091         size_t nOutLength = CFStringGetLength(theString);
2092
2093         if (szOut == NULL)
2094         {
2095             CFRelease(theString);
2096             return nOutLength;
2097         }
2098
2099         CFRange theRange = { 0, nOutSize };
2100
2101 #if SIZEOF_WCHAR_T == 4
2102         UniChar* szUniCharBuffer = new UniChar[nOutSize];
2103 #endif
2104
2105         CFStringGetCharacters(theString, theRange, szUniCharBuffer);
2106
2107         CFRelease(theString);
2108
2109         szUniCharBuffer[nOutLength] = '\0' ;
2110
2111 #if SIZEOF_WCHAR_T == 4
2112         wxMBConvUTF16 converter ;
2113         converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
2114         delete[] szUniCharBuffer;
2115 #endif
2116
2117         return nOutLength;
2118     }
2119
2120     size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2121     {
2122         wxASSERT(szUnConv);
2123
2124         size_t nRealOutSize;
2125         size_t nBufSize = wxWcslen(szUnConv);
2126         UniChar* szUniBuffer = (UniChar*) szUnConv;
2127
2128 #if SIZEOF_WCHAR_T == 4
2129         wxMBConvUTF16 converter ;
2130         nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
2131         szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
2132         converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
2133         nBufSize /= sizeof(UniChar);
2134 #endif
2135
2136         CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2137                                 NULL, //allocator
2138                                 szUniBuffer,
2139                                 nBufSize,
2140                                 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
2141                             );
2142
2143         wxASSERT(theString);
2144
2145         //Note that CER puts a BOM when converting to unicode
2146         //so we  check and use getchars instead in that case
2147         if (m_encoding == kCFStringEncodingUnicode)
2148         {
2149             if (szOut != NULL)
2150                 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
2151
2152             nRealOutSize = CFStringGetLength(theString) + 1;
2153         }
2154         else
2155         {
2156             CFStringGetBytes(
2157                 theString,
2158                 CFRangeMake(0, CFStringGetLength(theString)),
2159                 m_encoding,
2160                 0, //what to put in characters that can't be converted -
2161                     //0 tells CFString to return NULL if it meets such a character
2162                 false, //not an external representation
2163                 (UInt8*) szOut,
2164                 nOutSize,
2165                 (CFIndex*) &nRealOutSize
2166                         );
2167         }
2168
2169         CFRelease(theString);
2170
2171 #if SIZEOF_WCHAR_T == 4
2172         delete[] szUniBuffer;
2173 #endif
2174
2175         return  nRealOutSize - 1;
2176     }
2177
2178     bool IsOk() const
2179     {
2180         return m_encoding != kCFStringEncodingInvalidId &&
2181               CFStringIsEncodingAvailable(m_encoding);
2182     }
2183
2184 private:
2185     CFStringEncoding m_encoding ;
2186 };
2187
2188 #endif // defined(__WXCOCOA__)
2189
2190 // ============================================================================
2191 // Mac conversion classes
2192 // ============================================================================
2193
2194 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2195
2196 class wxMBConv_mac : public wxMBConv
2197 {
2198 public:
2199     wxMBConv_mac()
2200     {
2201         Init(CFStringGetSystemEncoding()) ;
2202     }
2203
2204 #if wxUSE_FONTMAP
2205     wxMBConv_mac(const wxChar* name)
2206     {
2207         Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2208     }
2209 #endif
2210
2211     wxMBConv_mac(wxFontEncoding encoding)
2212     {
2213         Init( wxMacGetSystemEncFromFontEnc(encoding) );
2214     }
2215
2216     ~wxMBConv_mac()
2217     {
2218         OSStatus status = noErr ;
2219         status = TECDisposeConverter(m_MB2WC_converter);
2220         status = TECDisposeConverter(m_WC2MB_converter);
2221     }
2222
2223
2224     void Init( TextEncodingBase encoding)
2225     {
2226         OSStatus status = noErr ;
2227         m_char_encoding = encoding ;
2228         m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2229
2230         status = TECCreateConverter(&m_MB2WC_converter,
2231                                     m_char_encoding,
2232                                     m_unicode_encoding);
2233         status = TECCreateConverter(&m_WC2MB_converter,
2234                                     m_unicode_encoding,
2235                                     m_char_encoding);
2236     }
2237
2238     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2239     {
2240         OSStatus status = noErr ;
2241         ByteCount byteOutLen ;
2242         ByteCount byteInLen = strlen(psz) ;
2243         wchar_t *tbuf = NULL ;
2244         UniChar* ubuf = NULL ;
2245         size_t res = 0 ;
2246
2247         if (buf == NULL)
2248         {
2249             //apple specs say at least 32
2250             n = wxMax( 32 , byteInLen ) ;
2251             tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2252         }
2253         ByteCount byteBufferLen = n * sizeof( UniChar ) ;
2254 #if SIZEOF_WCHAR_T == 4
2255         ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
2256 #else
2257         ubuf = (UniChar*) (buf ? buf : tbuf) ;
2258 #endif
2259         status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2260           (TextPtr) ubuf , byteBufferLen, &byteOutLen);
2261 #if SIZEOF_WCHAR_T == 4
2262         // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2263         // is not properly terminated we get random characters at the end
2264         ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
2265         wxMBConvUTF16 converter ;
2266         res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2267         free( ubuf ) ;
2268 #else
2269         res = byteOutLen / sizeof( UniChar ) ;
2270 #endif
2271         if ( buf == NULL )
2272              free(tbuf) ;
2273
2274         if ( buf  && res < n)
2275             buf[res] = 0;
2276
2277         return res ;
2278     }
2279
2280     size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2281     {
2282         OSStatus status = noErr ;
2283         ByteCount byteOutLen ;
2284         ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2285
2286         char *tbuf = NULL ;
2287
2288         if (buf == NULL)
2289         {
2290             //apple specs say at least 32
2291             n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2292             tbuf = (char*) malloc( n ) ;
2293         }
2294
2295         ByteCount byteBufferLen = n ;
2296         UniChar* ubuf = NULL ;
2297 #if SIZEOF_WCHAR_T == 4
2298         wxMBConvUTF16 converter ;
2299         size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2300         byteInLen = unicharlen ;
2301         ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2302         converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
2303 #else
2304         ubuf = (UniChar*) psz ;
2305 #endif
2306         status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2307             (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
2308 #if SIZEOF_WCHAR_T == 4
2309         free( ubuf ) ;
2310 #endif
2311         if ( buf == NULL )
2312             free(tbuf) ;
2313
2314         size_t res = byteOutLen ;
2315         if ( buf  && res < n)
2316         {
2317             buf[res] = 0;
2318
2319             //we need to double-trip to verify it didn't insert any ? in place
2320             //of bogus characters
2321             wxWCharBuffer wcBuf(n);
2322             size_t pszlen = wxWcslen(psz);
2323             if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
2324                         wxWcslen(wcBuf) != pszlen ||
2325                         memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2326             {
2327                 // we didn't obtain the same thing we started from, hence
2328                 // the conversion was lossy and we consider that it failed
2329                 return (size_t)-1;
2330             }
2331         }
2332
2333         return res ;
2334     }
2335
2336     bool IsOk() const
2337         { return m_MB2WC_converter !=  NULL && m_WC2MB_converter != NULL  ; }
2338
2339 private:
2340     TECObjectRef m_MB2WC_converter ;
2341     TECObjectRef m_WC2MB_converter ;
2342
2343     TextEncodingBase m_char_encoding ;
2344     TextEncodingBase m_unicode_encoding ;
2345 };
2346
2347 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2348
2349 // ============================================================================
2350 // wxEncodingConverter based conversion classes
2351 // ============================================================================
2352
2353 #if wxUSE_FONTMAP
2354
2355 class wxMBConv_wxwin : public wxMBConv
2356 {
2357 private:
2358     void Init()
2359     {
2360         m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2361                w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2362     }
2363
2364 public:
2365     // temporarily just use wxEncodingConverter stuff,
2366     // so that it works while a better implementation is built
2367     wxMBConv_wxwin(const wxChar* name)
2368     {
2369         if (name)
2370             m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2371         else
2372             m_enc = wxFONTENCODING_SYSTEM;
2373
2374         Init();
2375     }
2376
2377     wxMBConv_wxwin(wxFontEncoding enc)
2378     {
2379         m_enc = enc;
2380
2381         Init();
2382     }
2383
2384     size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
2385     {
2386         size_t inbuf = strlen(psz);
2387         if (buf)
2388         {
2389             if (!m2w.Convert(psz,buf))
2390                 return (size_t)-1;
2391         }
2392         return inbuf;
2393     }
2394
2395     size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
2396     {
2397         const size_t inbuf = wxWcslen(psz);
2398         if (buf)
2399         {
2400             if (!w2m.Convert(psz,buf))
2401                 return (size_t)-1;
2402         }
2403
2404         return inbuf;
2405     }
2406
2407     bool IsOk() const { return m_ok; }
2408
2409 public:
2410     wxFontEncoding m_enc;
2411     wxEncodingConverter m2w, w2m;
2412
2413     // were we initialized successfully?
2414     bool m_ok;
2415
2416     DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
2417 };
2418
2419 // make the constructors available for unit testing
2420 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_wxwin( const wxChar* name )
2421 {
2422     wxMBConv_wxwin* result = new wxMBConv_wxwin( name );
2423     if ( !result->IsOk() )
2424     {
2425         delete result;
2426         return 0;
2427     }
2428     return result;
2429 }
2430
2431 #endif // wxUSE_FONTMAP
2432
2433 // ============================================================================
2434 // wxCSConv implementation
2435 // ============================================================================
2436
2437 void wxCSConv::Init()
2438 {
2439     m_name = NULL;
2440     m_convReal =  NULL;
2441     m_deferred = true;
2442 }
2443
2444 wxCSConv::wxCSConv(const wxChar *charset)
2445 {
2446     Init();
2447
2448     if ( charset )
2449     {
2450         SetName(charset);
2451     }
2452
2453     m_encoding = wxFONTENCODING_SYSTEM;
2454 }
2455
2456 wxCSConv::wxCSConv(wxFontEncoding encoding)
2457 {
2458     if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
2459     {
2460         wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2461
2462         encoding = wxFONTENCODING_SYSTEM;
2463     }
2464
2465     Init();
2466
2467     m_encoding = encoding;
2468 }
2469
2470 wxCSConv::~wxCSConv()
2471 {
2472     Clear();
2473 }
2474
2475 wxCSConv::wxCSConv(const wxCSConv& conv)
2476         : wxMBConv()
2477 {
2478     Init();
2479
2480     SetName(conv.m_name);
2481     m_encoding = conv.m_encoding;
2482 }
2483
2484 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2485 {
2486     Clear();
2487
2488     SetName(conv.m_name);
2489     m_encoding = conv.m_encoding;
2490
2491     return *this;
2492 }
2493
2494 void wxCSConv::Clear()
2495 {
2496     free(m_name);
2497     delete m_convReal;
2498
2499     m_name = NULL;
2500     m_convReal = NULL;
2501 }
2502
2503 void wxCSConv::SetName(const wxChar *charset)
2504 {
2505     if (charset)
2506     {
2507         m_name = wxStrdup(charset);
2508         m_deferred = true;
2509     }
2510 }
2511
2512 wxMBConv *wxCSConv::DoCreate() const
2513 {
2514     // check for the special case of ASCII or ISO8859-1 charset: as we have
2515     // special knowledge of it anyhow, we don't need to create a special
2516     // conversion object
2517     if ( m_encoding == wxFONTENCODING_ISO8859_1 )
2518     {
2519         // don't convert at all
2520         return NULL;
2521     }
2522
2523     // we trust OS to do conversion better than we can so try external
2524     // conversion methods first
2525     //
2526     // the full order is:
2527     //      1. OS conversion (iconv() under Unix or Win32 API)
2528     //      2. hard coded conversions for UTF
2529     //      3. wxEncodingConverter as fall back
2530
2531     // step (1)
2532 #ifdef HAVE_ICONV
2533 #if !wxUSE_FONTMAP
2534     if ( m_name )
2535 #endif // !wxUSE_FONTMAP
2536     {
2537         wxString name(m_name);
2538
2539 #if wxUSE_FONTMAP
2540         if ( name.empty() )
2541             name = wxFontMapperBase::GetEncodingName(m_encoding);
2542 #endif // wxUSE_FONTMAP
2543
2544         wxMBConv_iconv *conv = new wxMBConv_iconv(name);
2545         if ( conv->IsOk() )
2546             return conv;
2547
2548         delete conv;
2549     }
2550 #endif // HAVE_ICONV
2551
2552 #ifdef wxHAVE_WIN32_MB2WC
2553     {
2554 #if wxUSE_FONTMAP
2555         wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2556                                       : new wxMBConv_win32(m_encoding);
2557         if ( conv->IsOk() )
2558             return conv;
2559
2560         delete conv;
2561 #else
2562         return NULL;
2563 #endif
2564     }
2565 #endif // wxHAVE_WIN32_MB2WC
2566 #if defined(__WXMAC__)
2567     {
2568         // leave UTF16 and UTF32 to the built-ins of wx
2569         if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
2570             ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
2571         {
2572
2573 #if wxUSE_FONTMAP
2574             wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2575                                         : new wxMBConv_mac(m_encoding);
2576 #else
2577             wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
2578 #endif
2579             if ( conv->IsOk() )
2580                  return conv;
2581
2582             delete conv;
2583         }
2584     }
2585 #endif
2586 #if defined(__WXCOCOA__)
2587     {
2588         if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2589         {
2590
2591 #if wxUSE_FONTMAP
2592             wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2593                                           : new wxMBConv_cocoa(m_encoding);
2594 #else
2595             wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
2596 #endif
2597             if ( conv->IsOk() )
2598                  return conv;
2599
2600             delete conv;
2601         }
2602     }
2603 #endif
2604     // step (2)
2605     wxFontEncoding enc = m_encoding;
2606 #if wxUSE_FONTMAP
2607     if ( enc == wxFONTENCODING_SYSTEM && m_name )
2608     {
2609         // use "false" to suppress interactive dialogs -- we can be called from
2610         // anywhere and popping up a dialog from here is the last thing we want to
2611         // do
2612         enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
2613     }
2614 #endif // wxUSE_FONTMAP
2615
2616     switch ( enc )
2617     {
2618         case wxFONTENCODING_UTF7:
2619              return new wxMBConvUTF7;
2620
2621         case wxFONTENCODING_UTF8:
2622              return new wxMBConvUTF8;
2623
2624         case wxFONTENCODING_UTF16BE:
2625              return new wxMBConvUTF16BE;
2626
2627         case wxFONTENCODING_UTF16LE:
2628              return new wxMBConvUTF16LE;
2629
2630         case wxFONTENCODING_UTF32BE:
2631              return new wxMBConvUTF32BE;
2632
2633         case wxFONTENCODING_UTF32LE:
2634              return new wxMBConvUTF32LE;
2635
2636         default:
2637              // nothing to do but put here to suppress gcc warnings
2638              ;
2639     }
2640
2641     // step (3)
2642 #if wxUSE_FONTMAP
2643     {
2644         wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2645                                       : new wxMBConv_wxwin(m_encoding);
2646         if ( conv->IsOk() )
2647             return conv;
2648
2649         delete conv;
2650     }
2651 #endif // wxUSE_FONTMAP
2652
2653     // NB: This is a hack to prevent deadlock. What could otherwise happen
2654     //     in Unicode build: wxConvLocal creation ends up being here
2655     //     because of some failure and logs the error. But wxLog will try to
2656     //     attach timestamp, for which it will need wxConvLocal (to convert
2657     //     time to char* and then wchar_t*), but that fails, tries to log
2658     //     error, but wxLog has a (already locked) critical section that
2659     //     guards static buffer.
2660     static bool alreadyLoggingError = false;
2661     if (!alreadyLoggingError)
2662     {
2663         alreadyLoggingError = true;
2664         wxLogError(_("Cannot convert from the charset '%s'!"),
2665                    m_name ? m_name
2666                       :
2667 #if wxUSE_FONTMAP
2668                          wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
2669 #else // !wxUSE_FONTMAP
2670                          wxString::Format(_("encoding %s"), m_encoding).c_str()
2671 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2672               );
2673         alreadyLoggingError = false;
2674     }
2675
2676     return NULL;
2677 }
2678
2679 void wxCSConv::CreateConvIfNeeded() const
2680 {
2681     if ( m_deferred )
2682     {
2683         wxCSConv *self = (wxCSConv *)this; // const_cast
2684
2685 #if wxUSE_INTL
2686         // if we don't have neither the name nor the encoding, use the default
2687         // encoding for this system
2688         if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2689         {
2690             self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
2691         }
2692 #endif // wxUSE_INTL
2693
2694         self->m_convReal = DoCreate();
2695         self->m_deferred = false;
2696     }
2697 }
2698
2699 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2700 {
2701     CreateConvIfNeeded();
2702
2703     if (m_convReal)
2704         return m_convReal->MB2WC(buf, psz, n);
2705
2706     // latin-1 (direct)
2707     size_t len = strlen(psz);
2708
2709     if (buf)
2710     {
2711         for (size_t c = 0; c <= len; c++)
2712             buf[c] = (unsigned char)(psz[c]);
2713     }
2714
2715     return len;
2716 }
2717
2718 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2719 {
2720     CreateConvIfNeeded();
2721
2722     if (m_convReal)
2723         return m_convReal->WC2MB(buf, psz, n);
2724
2725     // latin-1 (direct)
2726     const size_t len = wxWcslen(psz);
2727     if (buf)
2728     {
2729         for (size_t c = 0; c <= len; c++)
2730         {
2731             if (psz[c] > 0xFF)
2732                 return (size_t)-1;
2733             buf[c] = (char)psz[c];
2734         }
2735     }
2736     else
2737     {
2738         for (size_t c = 0; c <= len; c++)
2739         {
2740             if (psz[c] > 0xFF)
2741                 return (size_t)-1;
2742         }
2743     }
2744
2745     return len;
2746 }
2747
2748 // ----------------------------------------------------------------------------
2749 // globals
2750 // ----------------------------------------------------------------------------
2751
2752 #ifdef __WINDOWS__
2753     static wxMBConv_win32 wxConvLibcObj;
2754 #elif defined(__WXMAC__) && !defined(__MACH__)
2755     static wxMBConv_mac wxConvLibcObj ;
2756 #else
2757     static wxMBConvLibc wxConvLibcObj;
2758 #endif
2759
2760 static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
2761 static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
2762 static wxMBConvUTF7 wxConvUTF7Obj;
2763 static wxMBConvUTF8 wxConvUTF8Obj;
2764
2765 WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
2766 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
2767 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
2768 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
2769 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
2770 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
2771 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
2772 #ifdef __WXOSX__
2773                                     wxConvUTF8Obj;
2774 #else
2775                                     wxConvLibcObj;
2776 #endif
2777
2778
2779 #else // !wxUSE_WCHAR_T
2780
2781 // stand-ins in absence of wchar_t
2782 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
2783                                 wxConvISO8859_1,
2784                                 wxConvLocal,
2785                                 wxConvUTF8;
2786
2787 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
2788
2789