src/common/strconv.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        strconv.cpp
   3 // Purpose:     Unicode conversion classes
   4 // Author:      Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
   5 //              Ryan Norton, Fredrik Roubert (UTF7)
   6 // Modified by:
   7 // Created:     29/01/98
   8 // RCS-ID:      $Id$
   9 // Copyright:   (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
  10 //              (c) 2000-2003 Vadim Zeitlin
  11 //              (c) 2004 Ryan Norton, Fredrik Roubert
  12 // Licence:     wxWindows licence
  13 /////////////////////////////////////////////////////////////////////////////
  14
  15 // ============================================================================
  16 // declarations
  17 // ============================================================================
  18
  19 // ----------------------------------------------------------------------------
  20 // headers
  21 // ----------------------------------------------------------------------------
  22
  23 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
  24   #pragma implementation "strconv.h"
  25 #endif
  26
  27 // For compilers that support precompilation, includes "wx.h".
  28 #include "wx/wxprec.h"
  29
  30 #ifdef __BORLANDC__
  31   #pragma hdrstop
  32 #endif
  33
  34 #ifndef WX_PRECOMP
  35     #include "wx/intl.h"
  36     #include "wx/log.h"
  37 #endif // WX_PRECOMP
  38
  39 #include "wx/strconv.h"
  40
  41 #if wxUSE_WCHAR_T
  42
  43 #ifdef __WINDOWS__
  44     #include "wx/msw/private.h"
  45     #include "wx/msw/missing.h"
  46 #endif
  47
  48 #ifndef __WXWINCE__
  49 #include <errno.h>
  50 #endif
  51
  52 #include <ctype.h>
  53 #include <string.h>
  54 #include <stdlib.h>
  55
  56 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
  57     #define wxHAVE_WIN32_MB2WC
  58 #endif // __WIN32__ but !__WXMICROWIN__
  59
  60 // ----------------------------------------------------------------------------
  61 // headers
  62 // ----------------------------------------------------------------------------
  63
  64 #ifdef __SALFORDC__
  65     #include <clib.h>
  66 #endif
  67
  68 #ifdef HAVE_ICONV
  69     #include <iconv.h>
  70     #include "wx/thread.h"
  71 #endif
  72
  73 #include "wx/encconv.h"
  74 #include "wx/fontmap.h"
  75 #include "wx/utils.h"
  76
  77 #ifdef __WXMAC__
  78 #ifndef __DARWIN__
  79 #include <ATSUnicode.h>
  80 #include <TextCommon.h>
  81 #include <TextEncodingConverter.h>
  82 #endif
  83
  84 #include  "wx/mac/private.h"  // includes mac headers
  85 #endif
  86
  87 #define TRACE_STRCONV _T("strconv")
  88
  89 // ----------------------------------------------------------------------------
  90 // macros
  91 // ----------------------------------------------------------------------------
  92
  93 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
  94 #define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
  95
  96 #if SIZEOF_WCHAR_T == 4
  97     #define WC_NAME         "UCS4"
  98     #define WC_BSWAP         BSWAP_UCS4
  99     #ifdef WORDS_BIGENDIAN
 100       #define WC_NAME_BEST  "UCS-4BE"
 101     #else
 102       #define WC_NAME_BEST  "UCS-4LE"
 103     #endif
 104 #elif SIZEOF_WCHAR_T == 2
 105     #define WC_NAME         "UTF16"
 106     #define WC_BSWAP         BSWAP_UTF16
 107     #define WC_UTF16
 108     #ifdef WORDS_BIGENDIAN
 109       #define WC_NAME_BEST  "UTF-16BE"
 110     #else
 111       #define WC_NAME_BEST  "UTF-16LE"
 112     #endif
 113 #else // sizeof(wchar_t) != 2 nor 4
 114     // does this ever happen?
 115     #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
 116 #endif
 117
 118 // ============================================================================
 119 // implementation
 120 // ============================================================================
 121
 122 // ----------------------------------------------------------------------------
 123 // UTF-16 en/decoding to/from UCS-4
 124 // ----------------------------------------------------------------------------
 125
 126
 127 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
 128 {
 129     if (input<=0xffff)
 130     {
 131         if (output)
 132             *output = (wxUint16) input;
 133         return 1;
 134     }
 135     else if (input>=0x110000)
 136     {
 137         return (size_t)-1;
 138     }
 139     else
 140     {
 141         if (output)
 142         {
 143             *output++ = (wxUint16) ((input >> 10)+0xd7c0);
 144             *output = (wxUint16) ((input&0x3ff)+0xdc00);
 145         }
 146         return 2;
 147     }
 148 }
 149
 150 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
 151 {
 152     if ((*input<0xd800) || (*input>0xdfff))
 153     {
 154         output = *input;
 155         return 1;
 156     }
 157     else if ((input[1]<0xdc00) || (input[1]>0xdfff))
 158     {
 159         output = *input;
 160         return (size_t)-1;
 161     }
 162     else
 163     {
 164         output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
 165         return 2;
 166     }
 167 }
 168
 169
 170 // ----------------------------------------------------------------------------
 171 // wxMBConv
 172 // ----------------------------------------------------------------------------
 173
 174 wxMBConv::~wxMBConv()
 175 {
 176     // nothing to do here (necessary for Darwin linking probably)
 177 }
 178
 179 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
 180 {
 181     if ( psz )
 182     {
 183         // calculate the length of the buffer needed first
 184         size_t nLen = MB2WC(NULL, psz, 0);
 185         if ( nLen != (size_t)-1 )
 186         {
 187             // now do the actual conversion
 188             wxWCharBuffer buf(nLen);
 189             nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
 190             if ( nLen != (size_t)-1 )
 191             {
 192                 return buf;
 193             }
 194         }
 195     }
 196
 197     wxWCharBuffer buf((wchar_t *)NULL);
 198
 199     return buf;
 200 }
 201
 202 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
 203 {
 204     if ( pwz )
 205     {
 206         size_t nLen = WC2MB(NULL, pwz, 0);
 207         if ( nLen != (size_t)-1 )
 208         {
 209             wxCharBuffer buf(nLen+3);       // space for a wxUint32 trailing zero
 210             nLen = WC2MB(buf.data(), pwz, nLen + 4);
 211             if ( nLen != (size_t)-1 )
 212             {
 213                 return buf;
 214             }
 215         }
 216     }
 217
 218     wxCharBuffer buf((char *)NULL);
 219
 220     return buf;
 221 }
 222
 223 const wxWCharBuffer wxMBConv::cMB2WC(const char *szString, size_t nStringLen, size_t* pOutSize) const
 224 {
 225     wxASSERT(pOutSize != NULL);
 226
 227     const char* szEnd = szString + nStringLen + 1;
 228     const char* szPos = szString;
 229     const char* szStart = szPos;
 230
 231     size_t nActualLength = 0;
 232     size_t nCurrentSize = nStringLen; //try normal size first (should never resize?)
 233
 234     wxWCharBuffer theBuffer(nCurrentSize);
 235
 236     //Convert the string until the length() is reached, continuing the
 237     //loop every time a null character is reached
 238     while(szPos != szEnd)
 239     {
 240         wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
 241
 242         //Get the length of the current (sub)string
 243         size_t nLen = MB2WC(NULL, szPos, 0);
 244
 245         //Invalid conversion?
 246         if( nLen == (size_t)-1 )
 247         {
 248             *pOutSize = 0;
 249             theBuffer.data()[0u] = wxT('\0');
 250             return theBuffer;
 251         }
 252
 253
 254         //Increase the actual length (+1 for current null character)
 255         nActualLength += nLen + 1;
 256
 257         //if buffer too big, realloc the buffer
 258         if (nActualLength > (nCurrentSize+1))
 259         {
 260             wxWCharBuffer theNewBuffer(nCurrentSize << 1);
 261             memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize * sizeof(wchar_t));
 262             theBuffer = theNewBuffer;
 263             nCurrentSize <<= 1;
 264         }
 265
 266         //Convert the current (sub)string
 267         if ( MB2WC(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
 268         {
 269             *pOutSize = 0;
 270             theBuffer.data()[0u] = wxT('\0');
 271             return theBuffer;
 272         }
 273
 274         //Increment to next (sub)string
 275         //Note that we have to use strlen instead of nLen here
 276         //because XX2XX gives us the size of the output buffer,
 277         //which is not necessarily the length of the string
 278         szPos += strlen(szPos) + 1;
 279     }
 280
 281     //success - return actual length and the buffer
 282     *pOutSize = nActualLength;
 283     return theBuffer;
 284 }
 285
 286 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *szString, size_t nStringLen, size_t* pOutSize) const
 287 {
 288     wxASSERT(pOutSize != NULL);
 289
 290     const wchar_t* szEnd = szString + nStringLen + 1;
 291     const wchar_t* szPos = szString;
 292     const wchar_t* szStart = szPos;
 293
 294     size_t nActualLength = 0;
 295     size_t nCurrentSize = nStringLen << 2; //try * 4 first
 296
 297     wxCharBuffer theBuffer(nCurrentSize);
 298
 299     //Convert the string until the length() is reached, continuing the
 300     //loop every time a null character is reached
 301     while(szPos != szEnd)
 302     {
 303         wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
 304
 305         //Get the length of the current (sub)string
 306         size_t nLen = WC2MB(NULL, szPos, 0);
 307
 308         //Invalid conversion?
 309         if( nLen == (size_t)-1 )
 310         {
 311             *pOutSize = 0;
 312             theBuffer.data()[0u] = wxT('\0');
 313             return theBuffer;
 314         }
 315
 316         //Increase the actual length (+1 for current null character)
 317         nActualLength += nLen + 1;
 318
 319         //if buffer too big, realloc the buffer
 320         if (nActualLength > (nCurrentSize+1))
 321         {
 322             wxCharBuffer theNewBuffer(nCurrentSize << 1);
 323             memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize);
 324             theBuffer = theNewBuffer;
 325             nCurrentSize <<= 1;
 326         }
 327
 328         //Convert the current (sub)string
 329         if(WC2MB(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
 330         {
 331             *pOutSize = 0;
 332             theBuffer.data()[0u] = wxT('\0');
 333             return theBuffer;
 334         }
 335
 336         //Increment to next (sub)string
 337         //Note that we have to use wxWcslen instead of nLen here
 338         //because XX2XX gives us the size of the output buffer,
 339         //which is not necessarily the length of the string
 340         szPos += wxWcslen(szPos) + 1;
 341     }
 342
 343     //success - return actual length and the buffer
 344     *pOutSize = nActualLength;
 345     return theBuffer;
 346 }
 347
 348 // ----------------------------------------------------------------------------
 349 // wxMBConvLibc
 350 // ----------------------------------------------------------------------------
 351
 352 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 353 {
 354     return wxMB2WC(buf, psz, n);
 355 }
 356
 357 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 358 {
 359     return wxWC2MB(buf, psz, n);
 360 }
 361
 362 #ifdef __UNIX__
 363
 364 // ----------------------------------------------------------------------------
 365 // wxConvBrokenFileNames
 366 // ----------------------------------------------------------------------------
 367
 368 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar *charset)
 369 {
 370     if ( !charset || wxStricmp(charset, _T("UTF-8")) == 0
 371                   || wxStricmp(charset, _T("UTF8")) == 0  )
 372         m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
 373     else
 374         m_conv = new wxCSConv(charset);
 375 }
 376
 377 size_t
 378 wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf,
 379                              const char *psz,
 380                              size_t outputSize) const
 381 {
 382     return m_conv->MB2WC( outputBuf, psz, outputSize );
 383 }
 384
 385 size_t
 386 wxConvBrokenFileNames::WC2MB(char *outputBuf,
 387                              const wchar_t *psz,
 388                              size_t outputSize) const
 389 {
 390     return m_conv->WC2MB( outputBuf, psz, outputSize );
 391 }
 392
 393 #endif
 394
 395 // ----------------------------------------------------------------------------
 396 // UTF-7
 397 // ----------------------------------------------------------------------------
 398
 399 // Implementation (C) 2004 Fredrik Roubert
 400
 401 //
 402 // BASE64 decoding table
 403 //
 404 static const unsigned char utf7unb64[] =
 405 {
 406     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 407     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 408     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 409     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 410     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 411     0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
 412     0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
 413     0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 414     0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
 415     0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
 416     0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
 417     0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
 418     0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
 419     0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
 420     0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
 421     0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
 422     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 423     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 424     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 425     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 426     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 427     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 428     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 429     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 430     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 431     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 432     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 433     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 434     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 435     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 436     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 437     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 438 };
 439
 440 size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 441 {
 442     size_t len = 0;
 443
 444     while (*psz && ((!buf) || (len < n)))
 445     {
 446         unsigned char cc = *psz++;
 447         if (cc != '+')
 448         {
 449             // plain ASCII char
 450             if (buf)
 451                 *buf++ = cc;
 452             len++;
 453         }
 454         else if (*psz == '-')
 455         {
 456             // encoded plus sign
 457             if (buf)
 458                 *buf++ = cc;
 459             len++;
 460             psz++;
 461         }
 462         else
 463         {
 464             // BASE64 encoded string
 465             bool lsb;
 466             unsigned char c;
 467             unsigned int d, l;
 468             for (lsb = false, d = 0, l = 0;
 469                 (cc = utf7unb64[(unsigned char)*psz]) != 0xff; psz++)
 470             {
 471                 d <<= 6;
 472                 d += cc;
 473                 for (l += 6; l >= 8; lsb = !lsb)
 474                 {
 475                     c = (unsigned char)((d >> (l -= 8)) % 256);
 476                     if (lsb)
 477                     {
 478                         if (buf)
 479                             *buf++ |= c;
 480                         len ++;
 481                     }
 482                     else
 483                         if (buf)
 484                             *buf = (wchar_t)(c << 8);
 485                 }
 486             }
 487             if (*psz == '-')
 488                 psz++;
 489         }
 490     }
 491     if (buf && (len < n))
 492         *buf = 0;
 493     return len;
 494 }
 495
 496 //
 497 // BASE64 encoding table
 498 //
 499 static const unsigned char utf7enb64[] =
 500 {
 501     'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
 502     'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
 503     'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
 504     'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
 505     'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
 506     'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
 507     'w', 'x', 'y', 'z', '0', '1', '2', '3',
 508     '4', '5', '6', '7', '8', '9', '+', '/'
 509 };
 510
 511 //
 512 // UTF-7 encoding table
 513 //
 514 // 0 - Set D (directly encoded characters)
 515 // 1 - Set O (optional direct characters)
 516 // 2 - whitespace characters (optional)
 517 // 3 - special characters
 518 //
 519 static const unsigned char utf7encode[128] =
 520 {
 521     3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
 522     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 523     2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
 524     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
 525     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 526     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
 527     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 528     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
 529 };
 530
 531 size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 532 {
 533
 534
 535     size_t len = 0;
 536
 537     while (*psz && ((!buf) || (len < n)))
 538     {
 539         wchar_t cc = *psz++;
 540         if (cc < 0x80 && utf7encode[cc] < 1)
 541         {
 542             // plain ASCII char
 543             if (buf)
 544                 *buf++ = (char)cc;
 545             len++;
 546         }
 547 #ifndef WC_UTF16
 548         else if (((wxUint32)cc) > 0xffff)
 549         {
 550             // no surrogate pair generation (yet?)
 551             return (size_t)-1;
 552         }
 553 #endif
 554         else
 555         {
 556             if (buf)
 557                 *buf++ = '+';
 558             len++;
 559             if (cc != '+')
 560             {
 561                 // BASE64 encode string
 562                 unsigned int lsb, d, l;
 563                 for (d = 0, l = 0;; psz++)
 564                 {
 565                     for (lsb = 0; lsb < 2; lsb ++)
 566                     {
 567                         d <<= 8;
 568                         d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
 569
 570                         for (l += 8; l >= 6; )
 571                         {
 572                             l -= 6;
 573                             if (buf)
 574                                 *buf++ = utf7enb64[(d >> l) % 64];
 575                             len++;
 576                         }
 577                     }
 578                     cc = *psz;
 579                     if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
 580                         break;
 581                 }
 582                 if (l != 0)
 583                 {
 584                     if (buf)
 585                         *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
 586                     len++;
 587                 }
 588             }
 589             if (buf)
 590                 *buf++ = '-';
 591             len++;
 592         }
 593     }
 594     if (buf && (len < n))
 595         *buf = 0;
 596     return len;
 597 }
 598
 599 // ----------------------------------------------------------------------------
 600 // UTF-8
 601 // ----------------------------------------------------------------------------
 602
 603 static wxUint32 utf8_max[]=
 604     { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
 605
 606 // boundaries of the private use area we use to (temporarily) remap invalid
 607 // characters invalid in a UTF-8 encoded string
 608 const wxUint32 wxUnicodePUA = 0x100000;
 609 const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
 610
 611 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 612 {
 613     size_t len = 0;
 614
 615     while (*psz && ((!buf) || (len < n)))
 616     {
 617         const char *opsz = psz;
 618         bool invalid = false;
 619         unsigned char cc = *psz++, fc = cc;
 620         unsigned cnt;
 621         for (cnt = 0; fc & 0x80; cnt++)
 622             fc <<= 1;
 623         if (!cnt)
 624         {
 625             // plain ASCII char
 626             if (buf)
 627                 *buf++ = cc;
 628             len++;
 629
 630             // escape the escape character for octal escapes
 631             if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
 632                     && cc == '\\' && (!buf || len < n))
 633             {
 634                 if (buf)
 635                     *buf++ = cc;
 636                 len++;
 637             }
 638         }
 639         else
 640         {
 641             cnt--;
 642             if (!cnt)
 643             {
 644                 // invalid UTF-8 sequence
 645                 invalid = true;
 646             }
 647             else
 648             {
 649                 unsigned ocnt = cnt - 1;
 650                 wxUint32 res = cc & (0x3f >> cnt);
 651                 while (cnt--)
 652                 {
 653                     cc = *psz;
 654                     if ((cc & 0xC0) != 0x80)
 655                     {
 656                         // invalid UTF-8 sequence
 657                         invalid = true;
 658                         break;
 659                     }
 660                     psz++;
 661                     res = (res << 6) | (cc & 0x3f);
 662                 }
 663                 if (invalid || res <= utf8_max[ocnt])
 664                 {
 665                     // illegal UTF-8 encoding
 666                     invalid = true;
 667                 }
 668                 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
 669                         res >= wxUnicodePUA && res < wxUnicodePUAEnd)
 670                 {
 671                     // if one of our PUA characters turns up externally
 672                     // it must also be treated as an illegal sequence
 673                     // (a bit like you have to escape an escape character)
 674                     invalid = true;
 675                 }
 676                 else
 677                 {
 678 #ifdef WC_UTF16
 679                     // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 680                     size_t pa = encode_utf16(res, (wxUint16 *)buf);
 681                     if (pa == (size_t)-1)
 682                     {
 683                         invalid = true;
 684                     }
 685                     else
 686                     {
 687                         if (buf)
 688                             buf += pa;
 689                         len += pa;
 690                     }
 691 #else // !WC_UTF16
 692                     if (buf)
 693                         *buf++ = res;
 694                     len++;
 695 #endif // WC_UTF16/!WC_UTF16
 696                 }
 697             }
 698             if (invalid)
 699             {
 700                 if (m_options & MAP_INVALID_UTF8_TO_PUA)
 701                 {
 702                     while (opsz < psz && (!buf || len < n))
 703                     {
 704 #ifdef WC_UTF16
 705                         // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 706                         size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
 707                         wxASSERT(pa != (size_t)-1);
 708                         if (buf)
 709                             buf += pa;
 710                         opsz++;
 711                         len += pa;
 712 #else
 713                         if (buf)
 714                             *buf++ = wxUnicodePUA + (unsigned char)*opsz;
 715                         opsz++;
 716                         len++;
 717 #endif
 718                     }
 719                 }
 720                 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
 721                 {
 722                     while (opsz < psz && (!buf || len < n))
 723                     {
 724                         if ( buf && len + 3 < n )
 725                         {
 726                             unsigned char n = *opsz;
 727                             *buf++ = L'\\';
 728                             *buf++ = (wchar_t)( L'0' + n / 0100 );
 729                             *buf++ = (wchar_t)( L'0' + (n % 0100) / 010 );
 730                             *buf++ = (wchar_t)( L'0' + n % 010 );
 731                         }
 732                         opsz++;
 733                         len += 4;
 734                     }
 735                 }
 736                 else // MAP_INVALID_UTF8_NOT
 737                 {
 738                     return (size_t)-1;
 739                 }
 740             }
 741         }
 742     }
 743     if (buf && (len < n))
 744         *buf = 0;
 745     return len;
 746 }
 747
 748 static inline bool isoctal(wchar_t wch)
 749 {
 750     return L'0' <= wch && wch <= L'7';
 751 }
 752
 753 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 754 {
 755     size_t len = 0;
 756
 757     while (*psz && ((!buf) || (len < n)))
 758     {
 759         wxUint32 cc;
 760 #ifdef WC_UTF16
 761         // cast is ok for WC_UTF16
 762         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
 763         psz += (pa == (size_t)-1) ? 1 : pa;
 764 #else
 765         cc=(*psz++) & 0x7fffffff;
 766 #endif
 767
 768         if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
 769                 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
 770         {
 771             if (buf)
 772                 *buf++ = (char)(cc - wxUnicodePUA);
 773             len++;
 774         }
 775         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
 776                     && cc == L'\\' && psz[0] == L'\\' )
 777         {
 778             if (buf)
 779                 *buf++ = (char)cc;
 780             psz++;
 781             len++;
 782         }
 783         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
 784                     cc == L'\\' &&
 785                         isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
 786         {
 787             if (buf)
 788             {
 789                 *buf++ = (char) ((psz[0] - L'0')*0100 +
 790                                  (psz[1] - L'0')*010 +
 791                                  (psz[2] - L'0'));
 792             }
 793
 794             psz += 3;
 795             len++;
 796         }
 797         else
 798         {
 799             unsigned cnt;
 800             for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
 801             if (!cnt)
 802             {
 803                 // plain ASCII char
 804                 if (buf)
 805                     *buf++ = (char) cc;
 806                 len++;
 807             }
 808
 809             else
 810             {
 811                 len += cnt + 1;
 812                 if (buf)
 813                 {
 814                     *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
 815                     while (cnt--)
 816                         *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
 817                 }
 818             }
 819         }
 820     }
 821
 822     if (buf && (len<n))
 823         *buf = 0;
 824
 825     return len;
 826 }
 827
 828 // ----------------------------------------------------------------------------
 829 // UTF-16
 830 // ----------------------------------------------------------------------------
 831
 832 #ifdef WORDS_BIGENDIAN
 833     #define wxMBConvUTF16straight wxMBConvUTF16BE
 834     #define wxMBConvUTF16swap     wxMBConvUTF16LE
 835 #else
 836     #define wxMBConvUTF16swap     wxMBConvUTF16BE
 837     #define wxMBConvUTF16straight wxMBConvUTF16LE
 838 #endif
 839
 840
 841 #ifdef WC_UTF16
 842
 843 // copy 16bit MB to 16bit String
 844 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 845 {
 846     size_t len=0;
 847
 848     while (*(wxUint16*)psz && (!buf || len < n))
 849     {
 850         if (buf)
 851             *buf++ = *(wxUint16*)psz;
 852         len++;
 853
 854         psz += sizeof(wxUint16);
 855     }
 856     if (buf && len<n)   *buf=0;
 857
 858     return len;
 859 }
 860
 861
 862 // copy 16bit String to 16bit MB
 863 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 864 {
 865     size_t len=0;
 866
 867     while (*psz && (!buf || len < n))
 868     {
 869         if (buf)
 870         {
 871             *(wxUint16*)buf = *psz;
 872             buf += sizeof(wxUint16);
 873         }
 874         len += sizeof(wxUint16);
 875         psz++;
 876     }
 877     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 878
 879     return len;
 880 }
 881
 882
 883 // swap 16bit MB to 16bit String
 884 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 885 {
 886     size_t len=0;
 887
 888     while (*(wxUint16*)psz && (!buf || len < n))
 889     {
 890         if (buf)
 891         {
 892             ((char *)buf)[0] = psz[1];
 893             ((char *)buf)[1] = psz[0];
 894             buf++;
 895         }
 896         len++;
 897         psz += sizeof(wxUint16);
 898     }
 899     if (buf && len<n)   *buf=0;
 900
 901     return len;
 902 }
 903
 904
 905 // swap 16bit MB to 16bit String
 906 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 907 {
 908     size_t len=0;
 909
 910     while (*psz && (!buf || len < n))
 911     {
 912         if (buf)
 913         {
 914             *buf++ = ((char*)psz)[1];
 915             *buf++ = ((char*)psz)[0];
 916         }
 917         len += sizeof(wxUint16);
 918         psz++;
 919     }
 920     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 921
 922     return len;
 923 }
 924
 925
 926 #else // WC_UTF16
 927
 928
 929 // copy 16bit MB to 32bit String
 930 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 931 {
 932     size_t len=0;
 933
 934     while (*(wxUint16*)psz && (!buf || len < n))
 935     {
 936         wxUint32 cc;
 937         size_t pa=decode_utf16((wxUint16*)psz, cc);
 938         if (pa == (size_t)-1)
 939             return pa;
 940
 941         if (buf)
 942             *buf++ = cc;
 943         len++;
 944         psz += pa * sizeof(wxUint16);
 945     }
 946     if (buf && len<n)   *buf=0;
 947
 948     return len;
 949 }
 950
 951
 952 // copy 32bit String to 16bit MB
 953 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 954 {
 955     size_t len=0;
 956
 957     while (*psz && (!buf || len < n))
 958     {
 959         wxUint16 cc[2];
 960         size_t pa=encode_utf16(*psz, cc);
 961
 962         if (pa == (size_t)-1)
 963             return pa;
 964
 965         if (buf)
 966         {
 967             *(wxUint16*)buf = cc[0];
 968             buf += sizeof(wxUint16);
 969             if (pa > 1)
 970             {
 971                 *(wxUint16*)buf = cc[1];
 972                 buf += sizeof(wxUint16);
 973             }
 974         }
 975
 976         len += pa*sizeof(wxUint16);
 977         psz++;
 978     }
 979     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 980
 981     return len;
 982 }
 983
 984
 985 // swap 16bit MB to 32bit String
 986 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 987 {
 988     size_t len=0;
 989
 990     while (*(wxUint16*)psz && (!buf || len < n))
 991     {
 992         wxUint32 cc;
 993         char tmp[4];
 994         tmp[0]=psz[1];  tmp[1]=psz[0];
 995         tmp[2]=psz[3];  tmp[3]=psz[2];
 996
 997         size_t pa=decode_utf16((wxUint16*)tmp, cc);
 998         if (pa == (size_t)-1)
 999             return pa;
1000
1001         if (buf)
1002             *buf++ = cc;
1003
1004         len++;
1005         psz += pa * sizeof(wxUint16);
1006     }
1007     if (buf && len<n)   *buf=0;
1008
1009     return len;
1010 }
1011
1012
1013 // swap 32bit String to 16bit MB
1014 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1015 {
1016     size_t len=0;
1017
1018     while (*psz && (!buf || len < n))
1019     {
1020         wxUint16 cc[2];
1021         size_t pa=encode_utf16(*psz, cc);
1022
1023         if (pa == (size_t)-1)
1024             return pa;
1025
1026         if (buf)
1027         {
1028             *buf++ = ((char*)cc)[1];
1029             *buf++ = ((char*)cc)[0];
1030             if (pa > 1)
1031             {
1032                 *buf++ = ((char*)cc)[3];
1033                 *buf++ = ((char*)cc)[2];
1034             }
1035         }
1036
1037         len += pa*sizeof(wxUint16);
1038         psz++;
1039     }
1040     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
1041
1042     return len;
1043 }
1044
1045 #endif // WC_UTF16
1046
1047
1048 // ----------------------------------------------------------------------------
1049 // UTF-32
1050 // ----------------------------------------------------------------------------
1051
1052 #ifdef WORDS_BIGENDIAN
1053 #define wxMBConvUTF32straight  wxMBConvUTF32BE
1054 #define wxMBConvUTF32swap      wxMBConvUTF32LE
1055 #else
1056 #define wxMBConvUTF32swap      wxMBConvUTF32BE
1057 #define wxMBConvUTF32straight  wxMBConvUTF32LE
1058 #endif
1059
1060
1061 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1062 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1063
1064
1065 #ifdef WC_UTF16
1066
1067 // copy 32bit MB to 16bit String
1068 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1069 {
1070     size_t len=0;
1071
1072     while (*(wxUint32*)psz && (!buf || len < n))
1073     {
1074         wxUint16 cc[2];
1075
1076         size_t pa=encode_utf16(*(wxUint32*)psz, cc);
1077         if (pa == (size_t)-1)
1078             return pa;
1079
1080         if (buf)
1081         {
1082             *buf++ = cc[0];
1083             if (pa > 1)
1084                 *buf++ = cc[1];
1085         }
1086         len += pa;
1087         psz += sizeof(wxUint32);
1088     }
1089     if (buf && len<n)   *buf=0;
1090
1091     return len;
1092 }
1093
1094
1095 // copy 16bit String to 32bit MB
1096 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1097 {
1098     size_t len=0;
1099
1100     while (*psz && (!buf || len < n))
1101     {
1102         wxUint32 cc;
1103
1104         // cast is ok for WC_UTF16
1105         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
1106         if (pa == (size_t)-1)
1107             return pa;
1108
1109         if (buf)
1110         {
1111             *(wxUint32*)buf = cc;
1112             buf += sizeof(wxUint32);
1113         }
1114         len += sizeof(wxUint32);
1115         psz += pa;
1116     }
1117
1118     if (buf && len<=n-sizeof(wxUint32))
1119         *(wxUint32*)buf=0;
1120
1121     return len;
1122 }
1123
1124
1125
1126 // swap 32bit MB to 16bit String
1127 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1128 {
1129     size_t len=0;
1130
1131     while (*(wxUint32*)psz && (!buf || len < n))
1132     {
1133         char tmp[4];
1134         tmp[0] = psz[3];   tmp[1] = psz[2];
1135         tmp[2] = psz[1];   tmp[3] = psz[0];
1136
1137
1138         wxUint16 cc[2];
1139
1140         size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
1141         if (pa == (size_t)-1)
1142             return pa;
1143
1144         if (buf)
1145         {
1146             *buf++ = cc[0];
1147             if (pa > 1)
1148                 *buf++ = cc[1];
1149         }
1150         len += pa;
1151         psz += sizeof(wxUint32);
1152     }
1153
1154     if (buf && len<n)
1155         *buf=0;
1156
1157     return len;
1158 }
1159
1160
1161 // swap 16bit String to 32bit MB
1162 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1163 {
1164     size_t len=0;
1165
1166     while (*psz && (!buf || len < n))
1167     {
1168         char cc[4];
1169
1170         // cast is ok for WC_UTF16
1171         size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
1172         if (pa == (size_t)-1)
1173             return pa;
1174
1175         if (buf)
1176         {
1177             *buf++ = cc[3];
1178             *buf++ = cc[2];
1179             *buf++ = cc[1];
1180             *buf++ = cc[0];
1181         }
1182         len += sizeof(wxUint32);
1183         psz += pa;
1184     }
1185
1186     if (buf && len<=n-sizeof(wxUint32))
1187         *(wxUint32*)buf=0;
1188
1189     return len;
1190 }
1191
1192 #else // WC_UTF16
1193
1194
1195 // copy 32bit MB to 32bit String
1196 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1197 {
1198     size_t len=0;
1199
1200     while (*(wxUint32*)psz && (!buf || len < n))
1201     {
1202         if (buf)
1203             *buf++ = *(wxUint32*)psz;
1204         len++;
1205         psz += sizeof(wxUint32);
1206     }
1207
1208     if (buf && len<n)
1209         *buf=0;
1210
1211     return len;
1212 }
1213
1214
1215 // copy 32bit String to 32bit MB
1216 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1217 {
1218     size_t len=0;
1219
1220     while (*psz && (!buf || len < n))
1221     {
1222         if (buf)
1223         {
1224             *(wxUint32*)buf = *psz;
1225             buf += sizeof(wxUint32);
1226         }
1227
1228         len += sizeof(wxUint32);
1229         psz++;
1230     }
1231
1232     if (buf && len<=n-sizeof(wxUint32))
1233         *(wxUint32*)buf=0;
1234
1235     return len;
1236 }
1237
1238
1239 // swap 32bit MB to 32bit String
1240 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1241 {
1242     size_t len=0;
1243
1244     while (*(wxUint32*)psz && (!buf || len < n))
1245     {
1246         if (buf)
1247         {
1248             ((char *)buf)[0] = psz[3];
1249             ((char *)buf)[1] = psz[2];
1250             ((char *)buf)[2] = psz[1];
1251             ((char *)buf)[3] = psz[0];
1252             buf++;
1253         }
1254         len++;
1255         psz += sizeof(wxUint32);
1256     }
1257
1258     if (buf && len<n)
1259         *buf=0;
1260
1261     return len;
1262 }
1263
1264
1265 // swap 32bit String to 32bit MB
1266 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1267 {
1268     size_t len=0;
1269
1270     while (*psz && (!buf || len < n))
1271     {
1272         if (buf)
1273         {
1274             *buf++ = ((char *)psz)[3];
1275             *buf++ = ((char *)psz)[2];
1276             *buf++ = ((char *)psz)[1];
1277             *buf++ = ((char *)psz)[0];
1278         }
1279         len += sizeof(wxUint32);
1280         psz++;
1281     }
1282
1283     if (buf && len<=n-sizeof(wxUint32))
1284         *(wxUint32*)buf=0;
1285
1286     return len;
1287 }
1288
1289
1290 #endif // WC_UTF16
1291
1292
1293 // ============================================================================
1294 // The classes doing conversion using the iconv_xxx() functions
1295 // ============================================================================
1296
1297 #ifdef HAVE_ICONV
1298
1299 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1300 //     E2BIG if output buffer is _exactly_ as big as needed. Such case is
1301 //     (unless there's yet another bug in glibc) the only case when iconv()
1302 //     returns with (size_t)-1 (which means error) and says there are 0 bytes
1303 //     left in the input buffer -- when _real_ error occurs,
1304 //     bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1305 //     iconv() failure.
1306 //     [This bug does not appear in glibc 2.2.]
1307 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1308 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1309                                      (errno != E2BIG || bufLeft != 0))
1310 #else
1311 #define ICONV_FAILED(cres, bufLeft)  (cres == (size_t)-1)
1312 #endif
1313
1314 #define ICONV_CHAR_CAST(x)  ((ICONV_CONST char **)(x))
1315
1316 // ----------------------------------------------------------------------------
1317 // wxMBConv_iconv: encapsulates an iconv character set
1318 // ----------------------------------------------------------------------------
1319
1320 class wxMBConv_iconv : public wxMBConv
1321 {
1322 public:
1323     wxMBConv_iconv(const wxChar *name);
1324     virtual ~wxMBConv_iconv();
1325
1326     virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1327     virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
1328
1329     bool IsOk() const
1330         { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
1331
1332 protected:
1333     // the iconv handlers used to translate from multibyte to wide char and in
1334     // the other direction
1335     iconv_t m2w,
1336             w2m;
1337 #if wxUSE_THREADS
1338     // guards access to m2w and w2m objects
1339     wxMutex m_iconvMutex;
1340 #endif
1341
1342 private:
1343     // the name (for iconv_open()) of a wide char charset -- if none is
1344     // available on this machine, it will remain NULL
1345     static const char *ms_wcCharsetName;
1346
1347     // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1348     // different endian-ness than the native one
1349     static bool ms_wcNeedsSwap;
1350 };
1351
1352 // make the constructor available for unit testing
1353 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const wxChar* name )
1354 {
1355     wxMBConv_iconv* result = new wxMBConv_iconv( name );
1356     if ( !result->IsOk() )
1357     {
1358         delete result;
1359         return 0;
1360     }
1361     return result;
1362 }
1363
1364 const char *wxMBConv_iconv::ms_wcCharsetName = NULL;
1365 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
1366
1367 wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
1368 {
1369     // iconv operates with chars, not wxChars, but luckily it uses only ASCII
1370     // names for the charsets
1371     const wxCharBuffer cname(wxString(name).ToAscii());
1372
1373     // check for charset that represents wchar_t:
1374     if (ms_wcCharsetName == NULL)
1375     {
1376         ms_wcNeedsSwap = false;
1377
1378         // try charset with explicit bytesex info (e.g. "UCS-4LE"):
1379         ms_wcCharsetName = WC_NAME_BEST;
1380         m2w = iconv_open(ms_wcCharsetName, cname);
1381
1382         if (m2w == (iconv_t)-1)
1383         {
1384             // try charset w/o bytesex info (e.g. "UCS4")
1385             // and check for bytesex ourselves:
1386             ms_wcCharsetName = WC_NAME;
1387             m2w = iconv_open(ms_wcCharsetName, cname);
1388
1389             // last bet, try if it knows WCHAR_T pseudo-charset
1390             if (m2w == (iconv_t)-1)
1391             {
1392                 ms_wcCharsetName = "WCHAR_T";
1393                 m2w = iconv_open(ms_wcCharsetName, cname);
1394             }
1395
1396             if (m2w != (iconv_t)-1)
1397             {
1398                 char    buf[2], *bufPtr;
1399                 wchar_t wbuf[2], *wbufPtr;
1400                 size_t  insz, outsz;
1401                 size_t  res;
1402
1403                 buf[0] = 'A';
1404                 buf[1] = 0;
1405                 wbuf[0] = 0;
1406                 insz = 2;
1407                 outsz = SIZEOF_WCHAR_T * 2;
1408                 wbufPtr = wbuf;
1409                 bufPtr = buf;
1410
1411                 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1412                             (char**)&wbufPtr, &outsz);
1413
1414                 if (ICONV_FAILED(res, insz))
1415                 {
1416                     ms_wcCharsetName = NULL;
1417                     wxLogLastError(wxT("iconv"));
1418                     wxLogError(_("Conversion to charset '%s' doesn't work."), name);
1419                 }
1420                 else
1421                 {
1422                     ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1423                 }
1424             }
1425             else
1426             {
1427                 ms_wcCharsetName = NULL;
1428
1429                 // VS: we must not output an error here, since wxWidgets will safely
1430                 //     fall back to using wxEncodingConverter.
1431                 wxLogTrace(TRACE_STRCONV, wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
1432             }
1433         }
1434         wxLogTrace(TRACE_STRCONV,
1435                    wxT("wchar_t charset is '%s', needs swap: %i"),
1436                    ms_wcCharsetName ? ms_wcCharsetName : "<none>", ms_wcNeedsSwap);
1437     }
1438     else // we already have ms_wcCharsetName
1439     {
1440         m2w = iconv_open(ms_wcCharsetName, cname);
1441     }
1442
1443     // NB: don't ever pass NULL to iconv_open(), it may crash!
1444     if ( ms_wcCharsetName )
1445     {
1446         w2m = iconv_open( cname, ms_wcCharsetName);
1447     }
1448     else
1449     {
1450         w2m = (iconv_t)-1;
1451     }
1452 }
1453
1454 wxMBConv_iconv::~wxMBConv_iconv()
1455 {
1456     if ( m2w != (iconv_t)-1 )
1457         iconv_close(m2w);
1458     if ( w2m != (iconv_t)-1 )
1459         iconv_close(w2m);
1460 }
1461
1462 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1463 {
1464 #if wxUSE_THREADS
1465     // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1466     //     Unfortunately there is a couple of global wxCSConv objects such as
1467     //     wxConvLocal that are used all over wx code, so we have to make sure
1468     //     the handle is used by at most one thread at the time. Otherwise
1469     //     only a few wx classes would be safe to use from non-main threads
1470     //     as MB<->WC conversion would fail "randomly".
1471     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1472 #endif
1473
1474     size_t inbuf = strlen(psz);
1475     size_t outbuf = n * SIZEOF_WCHAR_T;
1476     size_t res, cres;
1477     // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1478     wchar_t *bufPtr = buf;
1479     const char *pszPtr = psz;
1480
1481     if (buf)
1482     {
1483         // have destination buffer, convert there
1484         cres = iconv(m2w,
1485                      ICONV_CHAR_CAST(&pszPtr), &inbuf,
1486                      (char**)&bufPtr, &outbuf);
1487         res = n - (outbuf / SIZEOF_WCHAR_T);
1488
1489         if (ms_wcNeedsSwap)
1490         {
1491             // convert to native endianness
1492             WC_BSWAP(buf /* _not_ bufPtr */, res)
1493         }
1494
1495         // NB: iconv was given only strlen(psz) characters on input, and so
1496         //     it couldn't convert the trailing zero. Let's do it ourselves
1497         //     if there's some room left for it in the output buffer.
1498         if (res < n)
1499             buf[res] = 0;
1500     }
1501     else
1502     {
1503         // no destination buffer... convert using temp buffer
1504         // to calculate destination buffer requirement
1505         wchar_t tbuf[8];
1506         res = 0;
1507         do {
1508             bufPtr = tbuf;
1509             outbuf = 8*SIZEOF_WCHAR_T;
1510
1511             cres = iconv(m2w,
1512                          ICONV_CHAR_CAST(&pszPtr), &inbuf,
1513                          (char**)&bufPtr, &outbuf );
1514
1515             res += 8-(outbuf/SIZEOF_WCHAR_T);
1516         } while ((cres==(size_t)-1) && (errno==E2BIG));
1517     }
1518
1519     if (ICONV_FAILED(cres, inbuf))
1520     {
1521         //VS: it is ok if iconv fails, hence trace only
1522         wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1523         return (size_t)-1;
1524     }
1525
1526     return res;
1527 }
1528
1529 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1530 {
1531 #if wxUSE_THREADS
1532     // NB: explained in MB2WC
1533     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1534 #endif
1535
1536     size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
1537     size_t outbuf = n;
1538     size_t res, cres;
1539
1540     wchar_t *tmpbuf = 0;
1541
1542     if (ms_wcNeedsSwap)
1543     {
1544         // need to copy to temp buffer to switch endianness
1545         // this absolutely doesn't rock!
1546         // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1547         //  could be in read-only memory, or be accessed in some other thread)
1548         tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
1549         memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
1550         WC_BSWAP(tmpbuf, inbuf)
1551         psz=tmpbuf;
1552     }
1553
1554     if (buf)
1555     {
1556         // have destination buffer, convert there
1557         cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1558
1559         res = n-outbuf;
1560
1561         // NB: iconv was given only wcslen(psz) characters on input, and so
1562         //     it couldn't convert the trailing zero. Let's do it ourselves
1563         //     if there's some room left for it in the output buffer.
1564         if (res < n)
1565             buf[0] = 0;
1566     }
1567     else
1568     {
1569         // no destination buffer... convert using temp buffer
1570         // to calculate destination buffer requirement
1571         char tbuf[16];
1572         res = 0;
1573         do {
1574             buf = tbuf; outbuf = 16;
1575
1576             cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1577
1578             res += 16 - outbuf;
1579         } while ((cres==(size_t)-1) && (errno==E2BIG));
1580     }
1581
1582     if (ms_wcNeedsSwap)
1583     {
1584         free(tmpbuf);
1585     }
1586
1587     if (ICONV_FAILED(cres, inbuf))
1588     {
1589         //VS: it is ok if iconv fails, hence trace only
1590         wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1591         return (size_t)-1;
1592     }
1593
1594     return res;
1595 }
1596
1597 #endif // HAVE_ICONV
1598
1599
1600 // ============================================================================
1601 // Win32 conversion classes
1602 // ============================================================================
1603
1604 #ifdef wxHAVE_WIN32_MB2WC
1605
1606 // from utils.cpp
1607 #if wxUSE_FONTMAP
1608 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1609 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1610 #endif
1611
1612 class wxMBConv_win32 : public wxMBConv
1613 {
1614 public:
1615     wxMBConv_win32()
1616     {
1617         m_CodePage = CP_ACP;
1618     }
1619
1620 #if wxUSE_FONTMAP
1621     wxMBConv_win32(const wxChar* name)
1622     {
1623         m_CodePage = wxCharsetToCodepage(name);
1624     }
1625
1626     wxMBConv_win32(wxFontEncoding encoding)
1627     {
1628         m_CodePage = wxEncodingToCodepage(encoding);
1629     }
1630 #endif
1631
1632     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1633     {
1634         // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1635         // the behaviour is not compatible with the Unix version (using iconv)
1636         // and break the library itself, e.g. wxTextInputStream::NextChar()
1637         // wouldn't work if reading an incomplete MB char didn't result in an
1638         // error
1639         //
1640         // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1641         // an error (tested under Windows Server 2003) and apparently it is
1642         // done on purpose, i.e. the function accepts any input in this case
1643         // and although I'd prefer to return error on ill-formed output, our
1644         // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1645         // explicitly ill-formed according to RFC 2152) neither so we don't
1646         // even have any fallback here...
1647         int flags = m_CodePage == CP_UTF7 ? 0 : MB_ERR_INVALID_CHARS;
1648
1649         const size_t len = ::MultiByteToWideChar
1650                              (
1651                                 m_CodePage,     // code page
1652                                 flags,          // flags: fall on error
1653                                 psz,            // input string
1654                                 -1,             // its length (NUL-terminated)
1655                                 buf,            // output string
1656                                 buf ? n : 0     // size of output buffer
1657                              );
1658
1659         // note that it returns count of written chars for buf != NULL and size
1660         // of the needed buffer for buf == NULL so in either case the length of
1661         // the string (which never includes the terminating NUL) is one less
1662         return len ? len - 1 : (size_t)-1;
1663     }
1664
1665     size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
1666     {
1667         /*
1668             we have a problem here: by default, WideCharToMultiByte() may
1669             replace characters unrepresentable in the target code page with bad
1670             quality approximations such as turning "1/2" symbol (U+00BD) into
1671             "1" for the code pages which don't have it and we, obviously, want
1672             to avoid this at any price
1673
1674             the trouble is that this function does it _silently_, i.e. it won't
1675             even tell us whether it did or not... Win98/2000 and higher provide
1676             WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1677             we have to resort to a round trip, i.e. check that converting back
1678             results in the same string -- this is, of course, expensive but
1679             otherwise we simply can't be sure to not garble the data.
1680          */
1681
1682         // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1683         // it doesn't work with CJK encodings (which we test for rather roughly
1684         // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1685         // supporting it
1686         BOOL usedDef wxDUMMY_INITIALIZE(false);
1687         BOOL *pUsedDef;
1688         int flags;
1689         if ( CanUseNoBestFit() && m_CodePage < 50000 )
1690         {
1691             // it's our lucky day
1692             flags = WC_NO_BEST_FIT_CHARS;
1693             pUsedDef = &usedDef;
1694         }
1695         else // old system or unsupported encoding
1696         {
1697             flags = 0;
1698             pUsedDef = NULL;
1699         }
1700
1701         const size_t len = ::WideCharToMultiByte
1702                              (
1703                                 m_CodePage,     // code page
1704                                 flags,          // either none or no best fit
1705                                 pwz,            // input string
1706                                 -1,             // it is (wide) NUL-terminated
1707                                 buf,            // output buffer
1708                                 buf ? n : 0,    // and its size
1709                                 NULL,           // default "replacement" char
1710                                 pUsedDef        // [out] was it used?
1711                              );
1712
1713         if ( !len )
1714         {
1715             // function totally failed
1716             return (size_t)-1;
1717         }
1718
1719         // if we were really converting, check if we succeeded
1720         if ( buf )
1721         {
1722             if ( flags )
1723             {
1724                 // check if the conversion failed, i.e. if any replacements
1725                 // were done
1726                 if ( usedDef )
1727                     return (size_t)-1;
1728             }
1729             else // we must resort to double tripping...
1730             {
1731                 wxWCharBuffer wcBuf(n);
1732                 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1733                         wcscmp(wcBuf, pwz) != 0 )
1734                 {
1735                     // we didn't obtain the same thing we started from, hence
1736                     // the conversion was lossy and we consider that it failed
1737                     return (size_t)-1;
1738                 }
1739             }
1740         }
1741
1742         // see the comment above for the reason of "len - 1"
1743         return len - 1;
1744     }
1745
1746     bool IsOk() const { return m_CodePage != -1; }
1747
1748 private:
1749     static bool CanUseNoBestFit()
1750     {
1751         static int s_isWin98Or2k = -1;
1752
1753         if ( s_isWin98Or2k == -1 )
1754         {
1755             int verMaj, verMin;
1756             switch ( wxGetOsVersion(&verMaj, &verMin) )
1757             {
1758                 case wxWIN95:
1759                     s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1760                     break;
1761
1762                 case wxWINDOWS_NT:
1763                     s_isWin98Or2k = verMaj >= 5;
1764                     break;
1765
1766                 default:
1767                     // unknown, be conseravtive by default
1768                     s_isWin98Or2k = 0;
1769             }
1770
1771             wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1772         }
1773
1774         return s_isWin98Or2k == 1;
1775     }
1776
1777     long m_CodePage;
1778 };
1779
1780 #endif // wxHAVE_WIN32_MB2WC
1781
1782 // ============================================================================
1783 // Cocoa conversion classes
1784 // ============================================================================
1785
1786 #if defined(__WXCOCOA__)
1787
1788 // RN:  There is no UTF-32 support in either Core Foundation or
1789 // Cocoa.  Strangely enough, internally Core Foundation uses
1790 // UTF 32 internally quite a bit - its just not public (yet).
1791
1792 #include <CoreFoundation/CFString.h>
1793 #include <CoreFoundation/CFStringEncodingExt.h>
1794
1795 CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
1796 {
1797     CFStringEncoding enc = kCFStringEncodingInvalidId ;
1798     if ( encoding == wxFONTENCODING_DEFAULT )
1799     {
1800         enc = CFStringGetSystemEncoding();
1801     }
1802     else switch( encoding)
1803     {
1804         case wxFONTENCODING_ISO8859_1 :
1805             enc = kCFStringEncodingISOLatin1 ;
1806             break ;
1807         case wxFONTENCODING_ISO8859_2 :
1808             enc = kCFStringEncodingISOLatin2;
1809             break ;
1810         case wxFONTENCODING_ISO8859_3 :
1811             enc = kCFStringEncodingISOLatin3 ;
1812             break ;
1813         case wxFONTENCODING_ISO8859_4 :
1814             enc = kCFStringEncodingISOLatin4;
1815             break ;
1816         case wxFONTENCODING_ISO8859_5 :
1817             enc = kCFStringEncodingISOLatinCyrillic;
1818             break ;
1819         case wxFONTENCODING_ISO8859_6 :
1820             enc = kCFStringEncodingISOLatinArabic;
1821             break ;
1822         case wxFONTENCODING_ISO8859_7 :
1823             enc = kCFStringEncodingISOLatinGreek;
1824             break ;
1825         case wxFONTENCODING_ISO8859_8 :
1826             enc = kCFStringEncodingISOLatinHebrew;
1827             break ;
1828         case wxFONTENCODING_ISO8859_9 :
1829             enc = kCFStringEncodingISOLatin5;
1830             break ;
1831         case wxFONTENCODING_ISO8859_10 :
1832             enc = kCFStringEncodingISOLatin6;
1833             break ;
1834         case wxFONTENCODING_ISO8859_11 :
1835             enc = kCFStringEncodingISOLatinThai;
1836             break ;
1837         case wxFONTENCODING_ISO8859_13 :
1838             enc = kCFStringEncodingISOLatin7;
1839             break ;
1840         case wxFONTENCODING_ISO8859_14 :
1841             enc = kCFStringEncodingISOLatin8;
1842             break ;
1843         case wxFONTENCODING_ISO8859_15 :
1844             enc = kCFStringEncodingISOLatin9;
1845             break ;
1846
1847         case wxFONTENCODING_KOI8 :
1848             enc = kCFStringEncodingKOI8_R;
1849             break ;
1850         case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
1851             enc = kCFStringEncodingDOSRussian;
1852             break ;
1853
1854 //      case wxFONTENCODING_BULGARIAN :
1855 //          enc = ;
1856 //          break ;
1857
1858         case wxFONTENCODING_CP437 :
1859             enc =kCFStringEncodingDOSLatinUS ;
1860             break ;
1861         case wxFONTENCODING_CP850 :
1862             enc = kCFStringEncodingDOSLatin1;
1863             break ;
1864         case wxFONTENCODING_CP852 :
1865             enc = kCFStringEncodingDOSLatin2;
1866             break ;
1867         case wxFONTENCODING_CP855 :
1868             enc = kCFStringEncodingDOSCyrillic;
1869             break ;
1870         case wxFONTENCODING_CP866 :
1871             enc =kCFStringEncodingDOSRussian ;
1872             break ;
1873         case wxFONTENCODING_CP874 :
1874             enc = kCFStringEncodingDOSThai;
1875             break ;
1876         case wxFONTENCODING_CP932 :
1877             enc = kCFStringEncodingDOSJapanese;
1878             break ;
1879         case wxFONTENCODING_CP936 :
1880             enc =kCFStringEncodingDOSChineseSimplif ;
1881             break ;
1882         case wxFONTENCODING_CP949 :
1883             enc = kCFStringEncodingDOSKorean;
1884             break ;
1885         case wxFONTENCODING_CP950 :
1886             enc = kCFStringEncodingDOSChineseTrad;
1887             break ;
1888         case wxFONTENCODING_CP1250 :
1889             enc = kCFStringEncodingWindowsLatin2;
1890             break ;
1891         case wxFONTENCODING_CP1251 :
1892             enc =kCFStringEncodingWindowsCyrillic ;
1893             break ;
1894         case wxFONTENCODING_CP1252 :
1895             enc =kCFStringEncodingWindowsLatin1 ;
1896             break ;
1897         case wxFONTENCODING_CP1253 :
1898             enc = kCFStringEncodingWindowsGreek;
1899             break ;
1900         case wxFONTENCODING_CP1254 :
1901             enc = kCFStringEncodingWindowsLatin5;
1902             break ;
1903         case wxFONTENCODING_CP1255 :
1904             enc =kCFStringEncodingWindowsHebrew ;
1905             break ;
1906         case wxFONTENCODING_CP1256 :
1907             enc =kCFStringEncodingWindowsArabic ;
1908             break ;
1909         case wxFONTENCODING_CP1257 :
1910             enc = kCFStringEncodingWindowsBalticRim;
1911             break ;
1912 //   This only really encodes to UTF7 (if that) evidently
1913 //        case wxFONTENCODING_UTF7 :
1914 //            enc = kCFStringEncodingNonLossyASCII ;
1915 //            break ;
1916         case wxFONTENCODING_UTF8 :
1917             enc = kCFStringEncodingUTF8 ;
1918             break ;
1919         case wxFONTENCODING_EUC_JP :
1920             enc = kCFStringEncodingEUC_JP;
1921             break ;
1922         case wxFONTENCODING_UTF16 :
1923             enc = kCFStringEncodingUnicode ;
1924             break ;
1925         case wxFONTENCODING_MACROMAN :
1926             enc = kCFStringEncodingMacRoman ;
1927             break ;
1928         case wxFONTENCODING_MACJAPANESE :
1929             enc = kCFStringEncodingMacJapanese ;
1930             break ;
1931         case wxFONTENCODING_MACCHINESETRAD :
1932             enc = kCFStringEncodingMacChineseTrad ;
1933             break ;
1934         case wxFONTENCODING_MACKOREAN :
1935             enc = kCFStringEncodingMacKorean ;
1936             break ;
1937         case wxFONTENCODING_MACARABIC :
1938             enc = kCFStringEncodingMacArabic ;
1939             break ;
1940         case wxFONTENCODING_MACHEBREW :
1941             enc = kCFStringEncodingMacHebrew ;
1942             break ;
1943         case wxFONTENCODING_MACGREEK :
1944             enc = kCFStringEncodingMacGreek ;
1945             break ;
1946         case wxFONTENCODING_MACCYRILLIC :
1947             enc = kCFStringEncodingMacCyrillic ;
1948             break ;
1949         case wxFONTENCODING_MACDEVANAGARI :
1950             enc = kCFStringEncodingMacDevanagari ;
1951             break ;
1952         case wxFONTENCODING_MACGURMUKHI :
1953             enc = kCFStringEncodingMacGurmukhi ;
1954             break ;
1955         case wxFONTENCODING_MACGUJARATI :
1956             enc = kCFStringEncodingMacGujarati ;
1957             break ;
1958         case wxFONTENCODING_MACORIYA :
1959             enc = kCFStringEncodingMacOriya ;
1960             break ;
1961         case wxFONTENCODING_MACBENGALI :
1962             enc = kCFStringEncodingMacBengali ;
1963             break ;
1964         case wxFONTENCODING_MACTAMIL :
1965             enc = kCFStringEncodingMacTamil ;
1966             break ;
1967         case wxFONTENCODING_MACTELUGU :
1968             enc = kCFStringEncodingMacTelugu ;
1969             break ;
1970         case wxFONTENCODING_MACKANNADA :
1971             enc = kCFStringEncodingMacKannada ;
1972             break ;
1973         case wxFONTENCODING_MACMALAJALAM :
1974             enc = kCFStringEncodingMacMalayalam ;
1975             break ;
1976         case wxFONTENCODING_MACSINHALESE :
1977             enc = kCFStringEncodingMacSinhalese ;
1978             break ;
1979         case wxFONTENCODING_MACBURMESE :
1980             enc = kCFStringEncodingMacBurmese ;
1981             break ;
1982         case wxFONTENCODING_MACKHMER :
1983             enc = kCFStringEncodingMacKhmer ;
1984             break ;
1985         case wxFONTENCODING_MACTHAI :
1986             enc = kCFStringEncodingMacThai ;
1987             break ;
1988         case wxFONTENCODING_MACLAOTIAN :
1989             enc = kCFStringEncodingMacLaotian ;
1990             break ;
1991         case wxFONTENCODING_MACGEORGIAN :
1992             enc = kCFStringEncodingMacGeorgian ;
1993             break ;
1994         case wxFONTENCODING_MACARMENIAN :
1995             enc = kCFStringEncodingMacArmenian ;
1996             break ;
1997         case wxFONTENCODING_MACCHINESESIMP :
1998             enc = kCFStringEncodingMacChineseSimp ;
1999             break ;
2000         case wxFONTENCODING_MACTIBETAN :
2001             enc = kCFStringEncodingMacTibetan ;
2002             break ;
2003         case wxFONTENCODING_MACMONGOLIAN :
2004             enc = kCFStringEncodingMacMongolian ;
2005             break ;
2006         case wxFONTENCODING_MACETHIOPIC :
2007             enc = kCFStringEncodingMacEthiopic ;
2008             break ;
2009         case wxFONTENCODING_MACCENTRALEUR :
2010             enc = kCFStringEncodingMacCentralEurRoman ;
2011             break ;
2012         case wxFONTENCODING_MACVIATNAMESE :
2013             enc = kCFStringEncodingMacVietnamese ;
2014             break ;
2015         case wxFONTENCODING_MACARABICEXT :
2016             enc = kCFStringEncodingMacExtArabic ;
2017             break ;
2018         case wxFONTENCODING_MACSYMBOL :
2019             enc = kCFStringEncodingMacSymbol ;
2020             break ;
2021         case wxFONTENCODING_MACDINGBATS :
2022             enc = kCFStringEncodingMacDingbats ;
2023             break ;
2024         case wxFONTENCODING_MACTURKISH :
2025             enc = kCFStringEncodingMacTurkish ;
2026             break ;
2027         case wxFONTENCODING_MACCROATIAN :
2028             enc = kCFStringEncodingMacCroatian ;
2029             break ;
2030         case wxFONTENCODING_MACICELANDIC :
2031             enc = kCFStringEncodingMacIcelandic ;
2032             break ;
2033         case wxFONTENCODING_MACROMANIAN :
2034             enc = kCFStringEncodingMacRomanian ;
2035             break ;
2036         case wxFONTENCODING_MACCELTIC :
2037             enc = kCFStringEncodingMacCeltic ;
2038             break ;
2039         case wxFONTENCODING_MACGAELIC :
2040             enc = kCFStringEncodingMacGaelic ;
2041             break ;
2042 //      case wxFONTENCODING_MACKEYBOARD :
2043 //          enc = kCFStringEncodingMacKeyboardGlyphs ;
2044 //          break ;
2045         default :
2046             // because gcc is picky
2047             break ;
2048     } ;
2049     return enc ;
2050 }
2051
2052 class wxMBConv_cocoa : public wxMBConv
2053 {
2054 public:
2055     wxMBConv_cocoa()
2056     {
2057         Init(CFStringGetSystemEncoding()) ;
2058     }
2059
2060 #if wxUSE_FONTMAP
2061     wxMBConv_cocoa(const wxChar* name)
2062     {
2063         Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2064     }
2065 #endif
2066
2067     wxMBConv_cocoa(wxFontEncoding encoding)
2068     {
2069         Init( wxCFStringEncFromFontEnc(encoding) );
2070     }
2071
2072     ~wxMBConv_cocoa()
2073     {
2074     }
2075
2076     void Init( CFStringEncoding encoding)
2077     {
2078         m_encoding = encoding ;
2079     }
2080
2081     size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2082     {
2083         wxASSERT(szUnConv);
2084
2085         CFStringRef theString = CFStringCreateWithBytes (
2086                                                 NULL, //the allocator
2087                                                 (const UInt8*)szUnConv,
2088                                                 strlen(szUnConv),
2089                                                 m_encoding,
2090                                                 false //no BOM/external representation
2091                                                 );
2092
2093         wxASSERT(theString);
2094
2095         size_t nOutLength = CFStringGetLength(theString);
2096
2097         if (szOut == NULL)
2098         {
2099             CFRelease(theString);
2100             return nOutLength;
2101         }
2102
2103         CFRange theRange = { 0, nOutSize };
2104
2105 #if SIZEOF_WCHAR_T == 4
2106         UniChar* szUniCharBuffer = new UniChar[nOutSize];
2107 #endif
2108
2109         CFStringGetCharacters(theString, theRange, szUniCharBuffer);
2110
2111         CFRelease(theString);
2112
2113         szUniCharBuffer[nOutLength] = '\0' ;
2114
2115 #if SIZEOF_WCHAR_T == 4
2116         wxMBConvUTF16 converter ;
2117         converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
2118         delete[] szUniCharBuffer;
2119 #endif
2120
2121         return nOutLength;
2122     }
2123
2124     size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2125     {
2126         wxASSERT(szUnConv);
2127
2128         size_t nRealOutSize;
2129         size_t nBufSize = wxWcslen(szUnConv);
2130         UniChar* szUniBuffer = (UniChar*) szUnConv;
2131
2132 #if SIZEOF_WCHAR_T == 4
2133         wxMBConvUTF16 converter ;
2134         nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
2135         szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
2136         converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
2137         nBufSize /= sizeof(UniChar);
2138 #endif
2139
2140         CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2141                                 NULL, //allocator
2142                                 szUniBuffer,
2143                                 nBufSize,
2144                                 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
2145                             );
2146
2147         wxASSERT(theString);
2148
2149         //Note that CER puts a BOM when converting to unicode
2150         //so we  check and use getchars instead in that case
2151         if (m_encoding == kCFStringEncodingUnicode)
2152         {
2153             if (szOut != NULL)
2154                 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
2155
2156             nRealOutSize = CFStringGetLength(theString) + 1;
2157         }
2158         else
2159         {
2160             CFStringGetBytes(
2161                 theString,
2162                 CFRangeMake(0, CFStringGetLength(theString)),
2163                 m_encoding,
2164                 0, //what to put in characters that can't be converted -
2165                     //0 tells CFString to return NULL if it meets such a character
2166                 false, //not an external representation
2167                 (UInt8*) szOut,
2168                 nOutSize,
2169                 (CFIndex*) &nRealOutSize
2170                         );
2171         }
2172
2173         CFRelease(theString);
2174
2175 #if SIZEOF_WCHAR_T == 4
2176         delete[] szUniBuffer;
2177 #endif
2178
2179         return  nRealOutSize - 1;
2180     }
2181
2182     bool IsOk() const
2183     {
2184         return m_encoding != kCFStringEncodingInvalidId &&
2185               CFStringIsEncodingAvailable(m_encoding);
2186     }
2187
2188 private:
2189     CFStringEncoding m_encoding ;
2190 };
2191
2192 #endif // defined(__WXCOCOA__)
2193
2194 // ============================================================================
2195 // Mac conversion classes
2196 // ============================================================================
2197
2198 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2199
2200 class wxMBConv_mac : public wxMBConv
2201 {
2202 public:
2203     wxMBConv_mac()
2204     {
2205         Init(CFStringGetSystemEncoding()) ;
2206     }
2207
2208 #if wxUSE_FONTMAP
2209     wxMBConv_mac(const wxChar* name)
2210     {
2211         Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2212     }
2213 #endif
2214
2215     wxMBConv_mac(wxFontEncoding encoding)
2216     {
2217         Init( wxMacGetSystemEncFromFontEnc(encoding) );
2218     }
2219
2220     ~wxMBConv_mac()
2221     {
2222         OSStatus status = noErr ;
2223         status = TECDisposeConverter(m_MB2WC_converter);
2224         status = TECDisposeConverter(m_WC2MB_converter);
2225     }
2226
2227
2228     void Init( TextEncodingBase encoding)
2229     {
2230         OSStatus status = noErr ;
2231         m_char_encoding = encoding ;
2232         m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2233
2234         status = TECCreateConverter(&m_MB2WC_converter,
2235                                     m_char_encoding,
2236                                     m_unicode_encoding);
2237         status = TECCreateConverter(&m_WC2MB_converter,
2238                                     m_unicode_encoding,
2239                                     m_char_encoding);
2240     }
2241
2242     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2243     {
2244         OSStatus status = noErr ;
2245         ByteCount byteOutLen ;
2246         ByteCount byteInLen = strlen(psz) ;
2247         wchar_t *tbuf = NULL ;
2248         UniChar* ubuf = NULL ;
2249         size_t res = 0 ;
2250
2251         if (buf == NULL)
2252         {
2253             //apple specs say at least 32
2254             n = wxMax( 32 , byteInLen ) ;
2255             tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2256         }
2257         ByteCount byteBufferLen = n * sizeof( UniChar ) ;
2258 #if SIZEOF_WCHAR_T == 4
2259         ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
2260 #else
2261         ubuf = (UniChar*) (buf ? buf : tbuf) ;
2262 #endif
2263         status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2264           (TextPtr) ubuf , byteBufferLen, &byteOutLen);
2265 #if SIZEOF_WCHAR_T == 4
2266         // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2267         // is not properly terminated we get random characters at the end
2268         ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
2269         wxMBConvUTF16 converter ;
2270         res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2271         free( ubuf ) ;
2272 #else
2273         res = byteOutLen / sizeof( UniChar ) ;
2274 #endif
2275         if ( buf == NULL )
2276              free(tbuf) ;
2277
2278         if ( buf  && res < n)
2279             buf[res] = 0;
2280
2281         return res ;
2282     }
2283
2284     size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2285     {
2286         OSStatus status = noErr ;
2287         ByteCount byteOutLen ;
2288         ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2289
2290         char *tbuf = NULL ;
2291
2292         if (buf == NULL)
2293         {
2294             //apple specs say at least 32
2295             n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2296             tbuf = (char*) malloc( n ) ;
2297         }
2298
2299         ByteCount byteBufferLen = n ;
2300         UniChar* ubuf = NULL ;
2301 #if SIZEOF_WCHAR_T == 4
2302         wxMBConvUTF16 converter ;
2303         size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2304         byteInLen = unicharlen ;
2305         ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2306         converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
2307 #else
2308         ubuf = (UniChar*) psz ;
2309 #endif
2310         status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2311             (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
2312 #if SIZEOF_WCHAR_T == 4
2313         free( ubuf ) ;
2314 #endif
2315         if ( buf == NULL )
2316             free(tbuf) ;
2317
2318         size_t res = byteOutLen ;
2319         if ( buf  && res < n)
2320         {
2321             buf[res] = 0;
2322
2323             //we need to double-trip to verify it didn't insert any ? in place
2324             //of bogus characters
2325             wxWCharBuffer wcBuf(n);
2326             size_t pszlen = wxWcslen(psz);
2327             if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
2328                         wxWcslen(wcBuf) != pszlen ||
2329                         memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2330             {
2331                 // we didn't obtain the same thing we started from, hence
2332                 // the conversion was lossy and we consider that it failed
2333                 return (size_t)-1;
2334             }
2335         }
2336
2337         return res ;
2338     }
2339
2340     bool IsOk() const
2341         { return m_MB2WC_converter !=  NULL && m_WC2MB_converter != NULL  ; }
2342
2343 private:
2344     TECObjectRef m_MB2WC_converter ;
2345     TECObjectRef m_WC2MB_converter ;
2346
2347     TextEncodingBase m_char_encoding ;
2348     TextEncodingBase m_unicode_encoding ;
2349 };
2350
2351 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2352
2353 // ============================================================================
2354 // wxEncodingConverter based conversion classes
2355 // ============================================================================
2356
2357 #if wxUSE_FONTMAP
2358
2359 class wxMBConv_wxwin : public wxMBConv
2360 {
2361 private:
2362     void Init()
2363     {
2364         m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2365                w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2366     }
2367
2368 public:
2369     // temporarily just use wxEncodingConverter stuff,
2370     // so that it works while a better implementation is built
2371     wxMBConv_wxwin(const wxChar* name)
2372     {
2373         if (name)
2374             m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2375         else
2376             m_enc = wxFONTENCODING_SYSTEM;
2377
2378         Init();
2379     }
2380
2381     wxMBConv_wxwin(wxFontEncoding enc)
2382     {
2383         m_enc = enc;
2384
2385         Init();
2386     }
2387
2388     size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
2389     {
2390         size_t inbuf = strlen(psz);
2391         if (buf)
2392         {
2393             if (!m2w.Convert(psz,buf))
2394                 return (size_t)-1;
2395         }
2396         return inbuf;
2397     }
2398
2399     size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
2400     {
2401         const size_t inbuf = wxWcslen(psz);
2402         if (buf)
2403         {
2404             if (!w2m.Convert(psz,buf))
2405                 return (size_t)-1;
2406         }
2407
2408         return inbuf;
2409     }
2410
2411     bool IsOk() const { return m_ok; }
2412
2413 public:
2414     wxFontEncoding m_enc;
2415     wxEncodingConverter m2w, w2m;
2416
2417     // were we initialized successfully?
2418     bool m_ok;
2419
2420     DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
2421 };
2422
2423 // make the constructors available for unit testing
2424 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_wxwin( const wxChar* name )
2425 {
2426     wxMBConv_wxwin* result = new wxMBConv_wxwin( name );
2427     if ( !result->IsOk() )
2428     {
2429         delete result;
2430         return 0;
2431     }
2432     return result;
2433 }
2434
2435 #endif // wxUSE_FONTMAP
2436
2437 // ============================================================================
2438 // wxCSConv implementation
2439 // ============================================================================
2440
2441 void wxCSConv::Init()
2442 {
2443     m_name = NULL;
2444     m_convReal =  NULL;
2445     m_deferred = true;
2446 }
2447
2448 wxCSConv::wxCSConv(const wxChar *charset)
2449 {
2450     Init();
2451
2452     if ( charset )
2453     {
2454         SetName(charset);
2455     }
2456
2457     m_encoding = wxFONTENCODING_SYSTEM;
2458 }
2459
2460 wxCSConv::wxCSConv(wxFontEncoding encoding)
2461 {
2462     if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
2463     {
2464         wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2465
2466         encoding = wxFONTENCODING_SYSTEM;
2467     }
2468
2469     Init();
2470
2471     m_encoding = encoding;
2472 }
2473
2474 wxCSConv::~wxCSConv()
2475 {
2476     Clear();
2477 }
2478
2479 wxCSConv::wxCSConv(const wxCSConv& conv)
2480         : wxMBConv()
2481 {
2482     Init();
2483
2484     SetName(conv.m_name);
2485     m_encoding = conv.m_encoding;
2486 }
2487
2488 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2489 {
2490     Clear();
2491
2492     SetName(conv.m_name);
2493     m_encoding = conv.m_encoding;
2494
2495     return *this;
2496 }
2497
2498 void wxCSConv::Clear()
2499 {
2500     free(m_name);
2501     delete m_convReal;
2502
2503     m_name = NULL;
2504     m_convReal = NULL;
2505 }
2506
2507 void wxCSConv::SetName(const wxChar *charset)
2508 {
2509     if (charset)
2510     {
2511         m_name = wxStrdup(charset);
2512         m_deferred = true;
2513     }
2514 }
2515
2516 #if wxUSE_FONTMAP
2517 #include "wx/hashmap.h"
2518
2519 WX_DECLARE_HASH_MAP( wxFontEncoding, wxString, wxIntegerHash, wxIntegerEqual,
2520                      wxEncodingNameCache );
2521
2522 static wxEncodingNameCache gs_nameCache;
2523 #endif
2524
2525 wxMBConv *wxCSConv::DoCreate() const
2526 {
2527 #if wxUSE_FONTMAP
2528     wxLogTrace(TRACE_STRCONV,
2529                wxT("creating conversion for %s"),
2530                (m_name ? m_name
2531                        : wxFontMapperBase::GetEncodingName(m_encoding).c_str()));
2532 #endif // wxUSE_FONTMAP
2533
2534     // check for the special case of ASCII or ISO8859-1 charset: as we have
2535     // special knowledge of it anyhow, we don't need to create a special
2536     // conversion object
2537     if ( m_encoding == wxFONTENCODING_ISO8859_1 )
2538     {
2539         // don't convert at all
2540         return NULL;
2541     }
2542
2543     // we trust OS to do conversion better than we can so try external
2544     // conversion methods first
2545     //
2546     // the full order is:
2547     //      1. OS conversion (iconv() under Unix or Win32 API)
2548     //      2. hard coded conversions for UTF
2549     //      3. wxEncodingConverter as fall back
2550
2551     // step (1)
2552 #ifdef HAVE_ICONV
2553 #if !wxUSE_FONTMAP
2554     if ( m_name )
2555 #endif // !wxUSE_FONTMAP
2556     {
2557         wxString name(m_name);
2558         wxFontEncoding encoding(m_encoding);
2559
2560         if ( !name.empty() )
2561         {
2562             wxMBConv_iconv *conv = new wxMBConv_iconv(name);
2563             if ( conv->IsOk() )
2564                 return conv;
2565
2566             delete conv;
2567
2568 #if wxUSE_FONTMAP
2569             encoding =
2570                 wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2571 #endif // wxUSE_FONTMAP
2572         }
2573 #if wxUSE_FONTMAP
2574         {
2575             const wxEncodingNameCache::iterator it = gs_nameCache.find(encoding);
2576             if ( it != gs_nameCache.end() )
2577             {
2578                 if ( it->second.empty() )
2579                     return NULL;
2580
2581                 wxMBConv_iconv *conv = new wxMBConv_iconv(it->second);
2582                 if ( conv->IsOk() )
2583                     return conv;
2584
2585                 delete conv;
2586             }
2587
2588             const wxChar** names = wxFontMapperBase::GetAllEncodingNames(encoding);
2589
2590             for ( ; *names; ++names )
2591             {
2592                 wxMBConv_iconv *conv = new wxMBConv_iconv(*names);
2593                 if ( conv->IsOk() )
2594                 {
2595                     gs_nameCache[encoding] = *names;
2596                     return conv;
2597                 }
2598
2599                 delete conv;
2600             }
2601
2602             gs_nameCache[encoding] = _T(""); // cache the failure
2603         }
2604 #endif // wxUSE_FONTMAP
2605     }
2606 #endif // HAVE_ICONV
2607
2608 #ifdef wxHAVE_WIN32_MB2WC
2609     {
2610 #if wxUSE_FONTMAP
2611         wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2612                                       : new wxMBConv_win32(m_encoding);
2613         if ( conv->IsOk() )
2614             return conv;
2615
2616         delete conv;
2617 #else
2618         return NULL;
2619 #endif
2620     }
2621 #endif // wxHAVE_WIN32_MB2WC
2622 #if defined(__WXMAC__)
2623     {
2624         // leave UTF16 and UTF32 to the built-ins of wx
2625         if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
2626             ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
2627         {
2628
2629 #if wxUSE_FONTMAP
2630             wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2631                                         : new wxMBConv_mac(m_encoding);
2632 #else
2633             wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
2634 #endif
2635             if ( conv->IsOk() )
2636                  return conv;
2637
2638             delete conv;
2639         }
2640     }
2641 #endif
2642 #if defined(__WXCOCOA__)
2643     {
2644         if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2645         {
2646
2647 #if wxUSE_FONTMAP
2648             wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2649                                           : new wxMBConv_cocoa(m_encoding);
2650 #else
2651             wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
2652 #endif
2653             if ( conv->IsOk() )
2654                  return conv;
2655
2656             delete conv;
2657         }
2658     }
2659 #endif
2660     // step (2)
2661     wxFontEncoding enc = m_encoding;
2662 #if wxUSE_FONTMAP
2663     if ( enc == wxFONTENCODING_SYSTEM && m_name )
2664     {
2665         // use "false" to suppress interactive dialogs -- we can be called from
2666         // anywhere and popping up a dialog from here is the last thing we want to
2667         // do
2668         enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
2669     }
2670 #endif // wxUSE_FONTMAP
2671
2672     switch ( enc )
2673     {
2674         case wxFONTENCODING_UTF7:
2675              return new wxMBConvUTF7;
2676
2677         case wxFONTENCODING_UTF8:
2678              return new wxMBConvUTF8;
2679
2680         case wxFONTENCODING_UTF16BE:
2681              return new wxMBConvUTF16BE;
2682
2683         case wxFONTENCODING_UTF16LE:
2684              return new wxMBConvUTF16LE;
2685
2686         case wxFONTENCODING_UTF32BE:
2687              return new wxMBConvUTF32BE;
2688
2689         case wxFONTENCODING_UTF32LE:
2690              return new wxMBConvUTF32LE;
2691
2692         default:
2693              // nothing to do but put here to suppress gcc warnings
2694              ;
2695     }
2696
2697     // step (3)
2698 #if wxUSE_FONTMAP
2699     {
2700         wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2701                                       : new wxMBConv_wxwin(m_encoding);
2702         if ( conv->IsOk() )
2703             return conv;
2704
2705         delete conv;
2706     }
2707 #endif // wxUSE_FONTMAP
2708
2709     // NB: This is a hack to prevent deadlock. What could otherwise happen
2710     //     in Unicode build: wxConvLocal creation ends up being here
2711     //     because of some failure and logs the error. But wxLog will try to
2712     //     attach timestamp, for which it will need wxConvLocal (to convert
2713     //     time to char* and then wchar_t*), but that fails, tries to log
2714     //     error, but wxLog has a (already locked) critical section that
2715     //     guards static buffer.
2716     static bool alreadyLoggingError = false;
2717     if (!alreadyLoggingError)
2718     {
2719         alreadyLoggingError = true;
2720         wxLogError(_("Cannot convert from the charset '%s'!"),
2721                    m_name ? m_name
2722                       :
2723 #if wxUSE_FONTMAP
2724                          wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
2725 #else // !wxUSE_FONTMAP
2726                          wxString::Format(_("encoding %s"), m_encoding).c_str()
2727 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2728               );
2729         alreadyLoggingError = false;
2730     }
2731
2732     return NULL;
2733 }
2734
2735 void wxCSConv::CreateConvIfNeeded() const
2736 {
2737     if ( m_deferred )
2738     {
2739         wxCSConv *self = (wxCSConv *)this; // const_cast
2740
2741 #if wxUSE_INTL
2742         // if we don't have neither the name nor the encoding, use the default
2743         // encoding for this system
2744         if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2745         {
2746             self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
2747         }
2748 #endif // wxUSE_INTL
2749
2750         self->m_convReal = DoCreate();
2751         self->m_deferred = false;
2752     }
2753 }
2754
2755 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2756 {
2757     CreateConvIfNeeded();
2758
2759     if (m_convReal)
2760         return m_convReal->MB2WC(buf, psz, n);
2761
2762     // latin-1 (direct)
2763     size_t len = strlen(psz);
2764
2765     if (buf)
2766     {
2767         for (size_t c = 0; c <= len; c++)
2768             buf[c] = (unsigned char)(psz[c]);
2769     }
2770
2771     return len;
2772 }
2773
2774 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2775 {
2776     CreateConvIfNeeded();
2777
2778     if (m_convReal)
2779         return m_convReal->WC2MB(buf, psz, n);
2780
2781     // latin-1 (direct)
2782     const size_t len = wxWcslen(psz);
2783     if (buf)
2784     {
2785         for (size_t c = 0; c <= len; c++)
2786         {
2787             if (psz[c] > 0xFF)
2788                 return (size_t)-1;
2789             buf[c] = (char)psz[c];
2790         }
2791     }
2792     else
2793     {
2794         for (size_t c = 0; c <= len; c++)
2795         {
2796             if (psz[c] > 0xFF)
2797                 return (size_t)-1;
2798         }
2799     }
2800
2801     return len;
2802 }
2803
2804 // ----------------------------------------------------------------------------
2805 // globals
2806 // ----------------------------------------------------------------------------
2807
2808 #ifdef __WINDOWS__
2809     static wxMBConv_win32 wxConvLibcObj;
2810 #elif defined(__WXMAC__) && !defined(__MACH__)
2811     static wxMBConv_mac wxConvLibcObj ;
2812 #else
2813     static wxMBConvLibc wxConvLibcObj;
2814 #endif
2815
2816 static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
2817 static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
2818 static wxMBConvUTF7 wxConvUTF7Obj;
2819 static wxMBConvUTF8 wxConvUTF8Obj;
2820
2821 WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
2822 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
2823 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
2824 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
2825 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
2826 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
2827 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
2828 #ifdef __WXOSX__
2829                                     wxConvUTF8Obj;
2830 #else
2831                                     wxConvLibcObj;
2832 #endif
2833
2834
2835 #else // !wxUSE_WCHAR_T
2836
2837 // stand-ins in absence of wchar_t
2838 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
2839                                 wxConvISO8859_1,
2840                                 wxConvLocal,
2841                                 wxConvUTF8;
2842
2843 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
2844
2845