src/common/strconv.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        strconv.cpp
   3 // Purpose:     Unicode conversion classes
   4 // Author:      Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
   5 //              Ryan Norton, Fredrik Roubert (UTF7)
   6 // Modified by:
   7 // Created:     29/01/98
   8 // RCS-ID:      $Id$
   9 // Copyright:   (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
  10 //              (c) 2000-2003 Vadim Zeitlin
  11 //              (c) 2004 Ryan Norton, Fredrik Roubert
  12 // Licence:     wxWindows licence
  13 /////////////////////////////////////////////////////////////////////////////
  14
  15 // ============================================================================
  16 // declarations
  17 // ============================================================================
  18
  19 // ----------------------------------------------------------------------------
  20 // headers
  21 // ----------------------------------------------------------------------------
  22
  23 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
  24   #pragma implementation "strconv.h"
  25 #endif
  26
  27 // For compilers that support precompilation, includes "wx.h".
  28 #include "wx/wxprec.h"
  29
  30 #ifdef __BORLANDC__
  31   #pragma hdrstop
  32 #endif
  33
  34 #ifndef WX_PRECOMP
  35     #include "wx/intl.h"
  36     #include "wx/log.h"
  37 #endif // WX_PRECOMP
  38
  39 #include "wx/strconv.h"
  40
  41 #if wxUSE_WCHAR_T
  42
  43 #ifdef __WINDOWS__
  44     #include "wx/msw/private.h"
  45     #include "wx/msw/missing.h"
  46 #endif
  47
  48 #ifndef __WXWINCE__
  49 #include <errno.h>
  50 #endif
  51
  52 #include <ctype.h>
  53 #include <string.h>
  54 #include <stdlib.h>
  55
  56 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
  57     #define wxHAVE_WIN32_MB2WC
  58 #endif // __WIN32__ but !__WXMICROWIN__
  59
  60 // ----------------------------------------------------------------------------
  61 // headers
  62 // ----------------------------------------------------------------------------
  63
  64 #ifdef __SALFORDC__
  65     #include <clib.h>
  66 #endif
  67
  68 #ifdef HAVE_ICONV
  69     #include <iconv.h>
  70     #include "wx/thread.h"
  71 #endif
  72
  73 #include "wx/encconv.h"
  74 #include "wx/fontmap.h"
  75 #include "wx/utils.h"
  76
  77 #ifdef __WXMAC__
  78 #ifndef __DARWIN__
  79 #include <ATSUnicode.h>
  80 #include <TextCommon.h>
  81 #include <TextEncodingConverter.h>
  82 #endif
  83
  84 #include  "wx/mac/private.h"  // includes mac headers
  85 #endif
  86
  87 #define TRACE_STRCONV _T("strconv")
  88
  89 // ----------------------------------------------------------------------------
  90 // macros
  91 // ----------------------------------------------------------------------------
  92
  93 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
  94 #define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
  95
  96 #if SIZEOF_WCHAR_T == 4
  97     #define WC_NAME         "UCS4"
  98     #define WC_BSWAP         BSWAP_UCS4
  99     #ifdef WORDS_BIGENDIAN
 100       #define WC_NAME_BEST  "UCS-4BE"
 101     #else
 102       #define WC_NAME_BEST  "UCS-4LE"
 103     #endif
 104 #elif SIZEOF_WCHAR_T == 2
 105     #define WC_NAME         "UTF16"
 106     #define WC_BSWAP         BSWAP_UTF16
 107     #define WC_UTF16
 108     #ifdef WORDS_BIGENDIAN
 109       #define WC_NAME_BEST  "UTF-16BE"
 110     #else
 111       #define WC_NAME_BEST  "UTF-16LE"
 112     #endif
 113 #else // sizeof(wchar_t) != 2 nor 4
 114     // does this ever happen?
 115     #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
 116 #endif
 117
 118 // ============================================================================
 119 // implementation
 120 // ============================================================================
 121
 122 // ----------------------------------------------------------------------------
 123 // UTF-16 en/decoding to/from UCS-4
 124 // ----------------------------------------------------------------------------
 125
 126
 127 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
 128 {
 129     if (input<=0xffff)
 130     {
 131         if (output)
 132             *output = (wxUint16) input;
 133         return 1;
 134     }
 135     else if (input>=0x110000)
 136     {
 137         return (size_t)-1;
 138     }
 139     else
 140     {
 141         if (output)
 142         {
 143             *output++ = (wxUint16) ((input >> 10)+0xd7c0);
 144             *output = (wxUint16) ((input&0x3ff)+0xdc00);
 145         }
 146         return 2;
 147     }
 148 }
 149
 150 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
 151 {
 152     if ((*input<0xd800) || (*input>0xdfff))
 153     {
 154         output = *input;
 155         return 1;
 156     }
 157     else if ((input[1]<0xdc00) || (input[1]>0xdfff))
 158     {
 159         output = *input;
 160         return (size_t)-1;
 161     }
 162     else
 163     {
 164         output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
 165         return 2;
 166     }
 167 }
 168
 169
 170 // ----------------------------------------------------------------------------
 171 // wxMBConv
 172 // ----------------------------------------------------------------------------
 173
 174 wxMBConv::~wxMBConv()
 175 {
 176     // nothing to do here (necessary for Darwin linking probably)
 177 }
 178
 179 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
 180 {
 181     if ( psz )
 182     {
 183         // calculate the length of the buffer needed first
 184         size_t nLen = MB2WC(NULL, psz, 0);
 185         if ( nLen != (size_t)-1 )
 186         {
 187             // now do the actual conversion
 188             wxWCharBuffer buf(nLen);
 189             nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
 190             if ( nLen != (size_t)-1 )
 191             {
 192                 return buf;
 193             }
 194         }
 195     }
 196
 197     wxWCharBuffer buf((wchar_t *)NULL);
 198
 199     return buf;
 200 }
 201
 202 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
 203 {
 204     if ( pwz )
 205     {
 206         size_t nLen = WC2MB(NULL, pwz, 0);
 207         if ( nLen != (size_t)-1 )
 208         {
 209             wxCharBuffer buf(nLen+3);       // space for a wxUint32 trailing zero
 210             nLen = WC2MB(buf.data(), pwz, nLen + 4);
 211             if ( nLen != (size_t)-1 )
 212             {
 213                 return buf;
 214             }
 215         }
 216     }
 217
 218     wxCharBuffer buf((char *)NULL);
 219
 220     return buf;
 221 }
 222
 223 const wxWCharBuffer wxMBConv::cMB2WC(const char *szString, size_t nStringLen, size_t* pOutSize) const
 224 {
 225     wxASSERT(pOutSize != NULL);
 226
 227     const char* szEnd = szString + nStringLen + 1;
 228     const char* szPos = szString;
 229     const char* szStart = szPos;
 230
 231     size_t nActualLength = 0;
 232     size_t nCurrentSize = nStringLen; //try normal size first (should never resize?)
 233
 234     wxWCharBuffer theBuffer(nCurrentSize);
 235
 236     //Convert the string until the length() is reached, continuing the
 237     //loop every time a null character is reached
 238     while(szPos != szEnd)
 239     {
 240         wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
 241
 242         //Get the length of the current (sub)string
 243         size_t nLen = MB2WC(NULL, szPos, 0);
 244
 245         //Invalid conversion?
 246         if( nLen == (size_t)-1 )
 247         {
 248             *pOutSize = 0;
 249             theBuffer.data()[0u] = wxT('\0');
 250             return theBuffer;
 251         }
 252
 253
 254         //Increase the actual length (+1 for current null character)
 255         nActualLength += nLen + 1;
 256
 257         //if buffer too big, realloc the buffer
 258         if (nActualLength > (nCurrentSize+1))
 259         {
 260             wxWCharBuffer theNewBuffer(nCurrentSize << 1);
 261             memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize * sizeof(wchar_t));
 262             theBuffer = theNewBuffer;
 263             nCurrentSize <<= 1;
 264         }
 265
 266         //Convert the current (sub)string
 267         if ( MB2WC(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
 268         {
 269             *pOutSize = 0;
 270             theBuffer.data()[0u] = wxT('\0');
 271             return theBuffer;
 272         }
 273
 274         //Increment to next (sub)string
 275         //Note that we have to use strlen instead of nLen here
 276         //because XX2XX gives us the size of the output buffer,
 277         //which is not necessarily the length of the string
 278         szPos += strlen(szPos) + 1;
 279     }
 280
 281     //success - return actual length and the buffer
 282     *pOutSize = nActualLength;
 283     return theBuffer;
 284 }
 285
 286 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *szString, size_t nStringLen, size_t* pOutSize) const
 287 {
 288     wxASSERT(pOutSize != NULL);
 289
 290     const wchar_t* szEnd = szString + nStringLen + 1;
 291     const wchar_t* szPos = szString;
 292     const wchar_t* szStart = szPos;
 293
 294     size_t nActualLength = 0;
 295     size_t nCurrentSize = nStringLen << 2; //try * 4 first
 296
 297     wxCharBuffer theBuffer(nCurrentSize);
 298
 299     //Convert the string until the length() is reached, continuing the
 300     //loop every time a null character is reached
 301     while(szPos != szEnd)
 302     {
 303         wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
 304
 305         //Get the length of the current (sub)string
 306         size_t nLen = WC2MB(NULL, szPos, 0);
 307
 308         //Invalid conversion?
 309         if( nLen == (size_t)-1 )
 310         {
 311             *pOutSize = 0;
 312             theBuffer.data()[0u] = wxT('\0');
 313             return theBuffer;
 314         }
 315
 316         //Increase the actual length (+1 for current null character)
 317         nActualLength += nLen + 1;
 318
 319         //if buffer too big, realloc the buffer
 320         if (nActualLength > (nCurrentSize+1))
 321         {
 322             wxCharBuffer theNewBuffer(nCurrentSize << 1);
 323             memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize);
 324             theBuffer = theNewBuffer;
 325             nCurrentSize <<= 1;
 326         }
 327
 328         //Convert the current (sub)string
 329         if(WC2MB(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
 330         {
 331             *pOutSize = 0;
 332             theBuffer.data()[0u] = wxT('\0');
 333             return theBuffer;
 334         }
 335
 336         //Increment to next (sub)string
 337         //Note that we have to use wxWcslen instead of nLen here
 338         //because XX2XX gives us the size of the output buffer,
 339         //which is not necessarily the length of the string
 340         szPos += wxWcslen(szPos) + 1;
 341     }
 342
 343     //success - return actual length and the buffer
 344     *pOutSize = nActualLength;
 345     return theBuffer;
 346 }
 347
 348 // ----------------------------------------------------------------------------
 349 // wxMBConvLibc
 350 // ----------------------------------------------------------------------------
 351
 352 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 353 {
 354     return wxMB2WC(buf, psz, n);
 355 }
 356
 357 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 358 {
 359     return wxWC2MB(buf, psz, n);
 360 }
 361
 362 #ifdef __UNIX__
 363
 364 // ----------------------------------------------------------------------------
 365 // wxConvBrokenFileNames
 366 // ----------------------------------------------------------------------------
 367
 368 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar *charset)
 369 {
 370     if ( !charset || wxStricmp(charset, _T("UTF-8")) == 0
 371                   || wxStricmp(charset, _T("UTF8")) == 0  )
 372         m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
 373     else
 374         m_conv = new wxCSConv(charset);
 375 }
 376
 377 size_t
 378 wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf,
 379                              const char *psz,
 380                              size_t outputSize) const
 381 {
 382     return m_conv->MB2WC( outputBuf, psz, outputSize );
 383 }
 384
 385 size_t
 386 wxConvBrokenFileNames::WC2MB(char *outputBuf,
 387                              const wchar_t *psz,
 388                              size_t outputSize) const
 389 {
 390     return m_conv->WC2MB( outputBuf, psz, outputSize );
 391 }
 392
 393 #endif
 394
 395 // ----------------------------------------------------------------------------
 396 // UTF-7
 397 // ----------------------------------------------------------------------------
 398
 399 // Implementation (C) 2004 Fredrik Roubert
 400
 401 //
 402 // BASE64 decoding table
 403 //
 404 static const unsigned char utf7unb64[] =
 405 {
 406     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 407     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 408     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 409     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 410     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 411     0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
 412     0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
 413     0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 414     0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
 415     0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
 416     0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
 417     0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
 418     0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
 419     0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
 420     0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
 421     0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
 422     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 423     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 424     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 425     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 426     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 427     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 428     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 429     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 430     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 431     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 432     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 433     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 434     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 435     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 436     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 437     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 438 };
 439
 440 size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 441 {
 442     size_t len = 0;
 443
 444     while (*psz && ((!buf) || (len < n)))
 445     {
 446         unsigned char cc = *psz++;
 447         if (cc != '+')
 448         {
 449             // plain ASCII char
 450             if (buf)
 451                 *buf++ = cc;
 452             len++;
 453         }
 454         else if (*psz == '-')
 455         {
 456             // encoded plus sign
 457             if (buf)
 458                 *buf++ = cc;
 459             len++;
 460             psz++;
 461         }
 462         else
 463         {
 464             // BASE64 encoded string
 465             bool lsb;
 466             unsigned char c;
 467             unsigned int d, l;
 468             for (lsb = false, d = 0, l = 0;
 469                 (cc = utf7unb64[(unsigned char)*psz]) != 0xff; psz++)
 470             {
 471                 d <<= 6;
 472                 d += cc;
 473                 for (l += 6; l >= 8; lsb = !lsb)
 474                 {
 475                     c = (unsigned char)((d >> (l -= 8)) % 256);
 476                     if (lsb)
 477                     {
 478                         if (buf)
 479                             *buf++ |= c;
 480                         len ++;
 481                     }
 482                     else
 483                         if (buf)
 484                             *buf = (wchar_t)(c << 8);
 485                 }
 486             }
 487             if (*psz == '-')
 488                 psz++;
 489         }
 490     }
 491     if (buf && (len < n))
 492         *buf = 0;
 493     return len;
 494 }
 495
 496 //
 497 // BASE64 encoding table
 498 //
 499 static const unsigned char utf7enb64[] =
 500 {
 501     'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
 502     'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
 503     'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
 504     'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
 505     'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
 506     'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
 507     'w', 'x', 'y', 'z', '0', '1', '2', '3',
 508     '4', '5', '6', '7', '8', '9', '+', '/'
 509 };
 510
 511 //
 512 // UTF-7 encoding table
 513 //
 514 // 0 - Set D (directly encoded characters)
 515 // 1 - Set O (optional direct characters)
 516 // 2 - whitespace characters (optional)
 517 // 3 - special characters
 518 //
 519 static const unsigned char utf7encode[128] =
 520 {
 521     3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
 522     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 523     2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
 524     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
 525     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 526     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
 527     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 528     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
 529 };
 530
 531 size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 532 {
 533
 534
 535     size_t len = 0;
 536
 537     while (*psz && ((!buf) || (len < n)))
 538     {
 539         wchar_t cc = *psz++;
 540         if (cc < 0x80 && utf7encode[cc] < 1)
 541         {
 542             // plain ASCII char
 543             if (buf)
 544                 *buf++ = (char)cc;
 545             len++;
 546         }
 547 #ifndef WC_UTF16
 548         else if (((wxUint32)cc) > 0xffff)
 549         {
 550             // no surrogate pair generation (yet?)
 551             return (size_t)-1;
 552         }
 553 #endif
 554         else
 555         {
 556             if (buf)
 557                 *buf++ = '+';
 558             len++;
 559             if (cc != '+')
 560             {
 561                 // BASE64 encode string
 562                 unsigned int lsb, d, l;
 563                 for (d = 0, l = 0;; psz++)
 564                 {
 565                     for (lsb = 0; lsb < 2; lsb ++)
 566                     {
 567                         d <<= 8;
 568                         d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
 569
 570                         for (l += 8; l >= 6; )
 571                         {
 572                             l -= 6;
 573                             if (buf)
 574                                 *buf++ = utf7enb64[(d >> l) % 64];
 575                             len++;
 576                         }
 577                     }
 578                     cc = *psz;
 579                     if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
 580                         break;
 581                 }
 582                 if (l != 0)
 583                 {
 584                     if (buf)
 585                         *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
 586                     len++;
 587                 }
 588             }
 589             if (buf)
 590                 *buf++ = '-';
 591             len++;
 592         }
 593     }
 594     if (buf && (len < n))
 595         *buf = 0;
 596     return len;
 597 }
 598
 599 // ----------------------------------------------------------------------------
 600 // UTF-8
 601 // ----------------------------------------------------------------------------
 602
 603 static wxUint32 utf8_max[]=
 604     { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
 605
 606 // boundaries of the private use area we use to (temporarily) remap invalid
 607 // characters invalid in a UTF-8 encoded string
 608 const wxUint32 wxUnicodePUA = 0x100000;
 609 const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
 610
 611 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 612 {
 613     size_t len = 0;
 614
 615     while (*psz && ((!buf) || (len < n)))
 616     {
 617         const char *opsz = psz;
 618         bool invalid = false;
 619         unsigned char cc = *psz++, fc = cc;
 620         unsigned cnt;
 621         for (cnt = 0; fc & 0x80; cnt++)
 622             fc <<= 1;
 623         if (!cnt)
 624         {
 625             // plain ASCII char
 626             if (buf)
 627                 *buf++ = cc;
 628             len++;
 629
 630             // escape the escape character for octal escapes
 631             if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
 632                     && cc == '\\' && (!buf || len < n))
 633             {
 634                 if (buf)
 635                     *buf++ = cc;
 636                 len++;
 637             }
 638         }
 639         else
 640         {
 641             cnt--;
 642             if (!cnt)
 643             {
 644                 // invalid UTF-8 sequence
 645                 invalid = true;
 646             }
 647             else
 648             {
 649                 unsigned ocnt = cnt - 1;
 650                 wxUint32 res = cc & (0x3f >> cnt);
 651                 while (cnt--)
 652                 {
 653                     cc = *psz;
 654                     if ((cc & 0xC0) != 0x80)
 655                     {
 656                         // invalid UTF-8 sequence
 657                         invalid = true;
 658                         break;
 659                     }
 660                     psz++;
 661                     res = (res << 6) | (cc & 0x3f);
 662                 }
 663                 if (invalid || res <= utf8_max[ocnt])
 664                 {
 665                     // illegal UTF-8 encoding
 666                     invalid = true;
 667                 }
 668                 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
 669                         res >= wxUnicodePUA && res < wxUnicodePUAEnd)
 670                 {
 671                     // if one of our PUA characters turns up externally
 672                     // it must also be treated as an illegal sequence
 673                     // (a bit like you have to escape an escape character)
 674                     invalid = true;
 675                 }
 676                 else
 677                 {
 678 #ifdef WC_UTF16
 679                     // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 680                     size_t pa = encode_utf16(res, (wxUint16 *)buf);
 681                     if (pa == (size_t)-1)
 682                     {
 683                         invalid = true;
 684                     }
 685                     else
 686                     {
 687                         if (buf)
 688                             buf += pa;
 689                         len += pa;
 690                     }
 691 #else // !WC_UTF16
 692                     if (buf)
 693                         *buf++ = res;
 694                     len++;
 695 #endif // WC_UTF16/!WC_UTF16
 696                 }
 697             }
 698             if (invalid)
 699             {
 700                 if (m_options & MAP_INVALID_UTF8_TO_PUA)
 701                 {
 702                     while (opsz < psz && (!buf || len < n))
 703                     {
 704 #ifdef WC_UTF16
 705                         // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 706                         size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
 707                         wxASSERT(pa != (size_t)-1);
 708                         if (buf)
 709                             buf += pa;
 710                         opsz++;
 711                         len += pa;
 712 #else
 713                         if (buf)
 714                             *buf++ = wxUnicodePUA + (unsigned char)*opsz;
 715                         opsz++;
 716                         len++;
 717 #endif
 718                     }
 719                 }
 720                 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
 721                 {
 722                     while (opsz < psz && (!buf || len < n))
 723                     {
 724                         if ( buf && len + 3 < n )
 725                         {
 726                             unsigned char n = *opsz;
 727                             *buf++ = L'\\';
 728                             *buf++ = (wchar_t)( L'0' + n / 0100 );
 729                             *buf++ = (wchar_t)( L'0' + (n % 0100) / 010 );
 730                             *buf++ = (wchar_t)( L'0' + n % 010 );
 731                         }
 732                         opsz++;
 733                         len += 4;
 734                     }
 735                 }
 736                 else // MAP_INVALID_UTF8_NOT
 737                 {
 738                     return (size_t)-1;
 739                 }
 740             }
 741         }
 742     }
 743     if (buf && (len < n))
 744         *buf = 0;
 745     return len;
 746 }
 747
 748 static inline bool isoctal(wchar_t wch)
 749 {
 750     return L'0' <= wch && wch <= L'7';
 751 }
 752
 753 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 754 {
 755     size_t len = 0;
 756
 757     while (*psz && ((!buf) || (len < n)))
 758     {
 759         wxUint32 cc;
 760 #ifdef WC_UTF16
 761         // cast is ok for WC_UTF16
 762         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
 763         psz += (pa == (size_t)-1) ? 1 : pa;
 764 #else
 765         cc=(*psz++) & 0x7fffffff;
 766 #endif
 767
 768         if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
 769                 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
 770         {
 771             if (buf)
 772                 *buf++ = (char)(cc - wxUnicodePUA);
 773             len++;
 774         }
 775         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
 776                     && cc == L'\\' && psz[0] == L'\\' )
 777         {
 778             if (buf)
 779                 *buf++ = (char)cc;
 780             psz++;
 781             len++;
 782         }
 783         else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
 784                     cc == L'\\' &&
 785                         isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
 786         {
 787             if (buf)
 788             {
 789                 *buf++ = (char) ((psz[0] - L'0')*0100 +
 790                                  (psz[1] - L'0')*010 +
 791                                  (psz[2] - L'0'));
 792             }
 793
 794             psz += 3;
 795             len++;
 796         }
 797         else
 798         {
 799             unsigned cnt;
 800             for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
 801             if (!cnt)
 802             {
 803                 // plain ASCII char
 804                 if (buf)
 805                     *buf++ = (char) cc;
 806                 len++;
 807             }
 808
 809             else
 810             {
 811                 len += cnt + 1;
 812                 if (buf)
 813                 {
 814                     *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
 815                     while (cnt--)
 816                         *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
 817                 }
 818             }
 819         }
 820     }
 821
 822     if (buf && (len<n))
 823         *buf = 0;
 824
 825     return len;
 826 }
 827
 828 // ----------------------------------------------------------------------------
 829 // UTF-16
 830 // ----------------------------------------------------------------------------
 831
 832 #ifdef WORDS_BIGENDIAN
 833     #define wxMBConvUTF16straight wxMBConvUTF16BE
 834     #define wxMBConvUTF16swap     wxMBConvUTF16LE
 835 #else
 836     #define wxMBConvUTF16swap     wxMBConvUTF16BE
 837     #define wxMBConvUTF16straight wxMBConvUTF16LE
 838 #endif
 839
 840
 841 #ifdef WC_UTF16
 842
 843 // copy 16bit MB to 16bit String
 844 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 845 {
 846     size_t len=0;
 847
 848     while (*(wxUint16*)psz && (!buf || len < n))
 849     {
 850         if (buf)
 851             *buf++ = *(wxUint16*)psz;
 852         len++;
 853
 854         psz += sizeof(wxUint16);
 855     }
 856     if (buf && len<n)   *buf=0;
 857
 858     return len;
 859 }
 860
 861
 862 // copy 16bit String to 16bit MB
 863 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 864 {
 865     size_t len=0;
 866
 867     while (*psz && (!buf || len < n))
 868     {
 869         if (buf)
 870         {
 871             *(wxUint16*)buf = *psz;
 872             buf += sizeof(wxUint16);
 873         }
 874         len += sizeof(wxUint16);
 875         psz++;
 876     }
 877     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 878
 879     return len;
 880 }
 881
 882
 883 // swap 16bit MB to 16bit String
 884 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 885 {
 886     size_t len=0;
 887
 888     while (*(wxUint16*)psz && (!buf || len < n))
 889     {
 890         if (buf)
 891         {
 892             ((char *)buf)[0] = psz[1];
 893             ((char *)buf)[1] = psz[0];
 894             buf++;
 895         }
 896         len++;
 897         psz += sizeof(wxUint16);
 898     }
 899     if (buf && len<n)   *buf=0;
 900
 901     return len;
 902 }
 903
 904
 905 // swap 16bit MB to 16bit String
 906 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 907 {
 908     size_t len=0;
 909
 910     while (*psz && (!buf || len < n))
 911     {
 912         if (buf)
 913         {
 914             *buf++ = ((char*)psz)[1];
 915             *buf++ = ((char*)psz)[0];
 916         }
 917         len += sizeof(wxUint16);
 918         psz++;
 919     }
 920     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 921
 922     return len;
 923 }
 924
 925
 926 #else // WC_UTF16
 927
 928
 929 // copy 16bit MB to 32bit String
 930 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 931 {
 932     size_t len=0;
 933
 934     while (*(wxUint16*)psz && (!buf || len < n))
 935     {
 936         wxUint32 cc;
 937         size_t pa=decode_utf16((wxUint16*)psz, cc);
 938         if (pa == (size_t)-1)
 939             return pa;
 940
 941         if (buf)
 942             *buf++ = cc;
 943         len++;
 944         psz += pa * sizeof(wxUint16);
 945     }
 946     if (buf && len<n)   *buf=0;
 947
 948     return len;
 949 }
 950
 951
 952 // copy 32bit String to 16bit MB
 953 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 954 {
 955     size_t len=0;
 956
 957     while (*psz && (!buf || len < n))
 958     {
 959         wxUint16 cc[2];
 960         size_t pa=encode_utf16(*psz, cc);
 961
 962         if (pa == (size_t)-1)
 963             return pa;
 964
 965         if (buf)
 966         {
 967             *(wxUint16*)buf = cc[0];
 968             buf += sizeof(wxUint16);
 969             if (pa > 1)
 970             {
 971                 *(wxUint16*)buf = cc[1];
 972                 buf += sizeof(wxUint16);
 973             }
 974         }
 975
 976         len += pa*sizeof(wxUint16);
 977         psz++;
 978     }
 979     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 980
 981     return len;
 982 }
 983
 984
 985 // swap 16bit MB to 32bit String
 986 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 987 {
 988     size_t len=0;
 989
 990     while (*(wxUint16*)psz && (!buf || len < n))
 991     {
 992         wxUint32 cc;
 993         char tmp[4];
 994         tmp[0]=psz[1];  tmp[1]=psz[0];
 995         tmp[2]=psz[3];  tmp[3]=psz[2];
 996
 997         size_t pa=decode_utf16((wxUint16*)tmp, cc);
 998         if (pa == (size_t)-1)
 999             return pa;
1000
1001         if (buf)
1002             *buf++ = cc;
1003
1004         len++;
1005         psz += pa * sizeof(wxUint16);
1006     }
1007     if (buf && len<n)   *buf=0;
1008
1009     return len;
1010 }
1011
1012
1013 // swap 32bit String to 16bit MB
1014 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1015 {
1016     size_t len=0;
1017
1018     while (*psz && (!buf || len < n))
1019     {
1020         wxUint16 cc[2];
1021         size_t pa=encode_utf16(*psz, cc);
1022
1023         if (pa == (size_t)-1)
1024             return pa;
1025
1026         if (buf)
1027         {
1028             *buf++ = ((char*)cc)[1];
1029             *buf++ = ((char*)cc)[0];
1030             if (pa > 1)
1031             {
1032                 *buf++ = ((char*)cc)[3];
1033                 *buf++ = ((char*)cc)[2];
1034             }
1035         }
1036
1037         len += pa*sizeof(wxUint16);
1038         psz++;
1039     }
1040     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
1041
1042     return len;
1043 }
1044
1045 #endif // WC_UTF16
1046
1047
1048 // ----------------------------------------------------------------------------
1049 // UTF-32
1050 // ----------------------------------------------------------------------------
1051
1052 #ifdef WORDS_BIGENDIAN
1053 #define wxMBConvUTF32straight  wxMBConvUTF32BE
1054 #define wxMBConvUTF32swap      wxMBConvUTF32LE
1055 #else
1056 #define wxMBConvUTF32swap      wxMBConvUTF32BE
1057 #define wxMBConvUTF32straight  wxMBConvUTF32LE
1058 #endif
1059
1060
1061 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1062 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1063
1064
1065 #ifdef WC_UTF16
1066
1067 // copy 32bit MB to 16bit String
1068 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1069 {
1070     size_t len=0;
1071
1072     while (*(wxUint32*)psz && (!buf || len < n))
1073     {
1074         wxUint16 cc[2];
1075
1076         size_t pa=encode_utf16(*(wxUint32*)psz, cc);
1077         if (pa == (size_t)-1)
1078             return pa;
1079
1080         if (buf)
1081         {
1082             *buf++ = cc[0];
1083             if (pa > 1)
1084                 *buf++ = cc[1];
1085         }
1086         len += pa;
1087         psz += sizeof(wxUint32);
1088     }
1089     if (buf && len<n)   *buf=0;
1090
1091     return len;
1092 }
1093
1094
1095 // copy 16bit String to 32bit MB
1096 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1097 {
1098     size_t len=0;
1099
1100     while (*psz && (!buf || len < n))
1101     {
1102         wxUint32 cc;
1103
1104         // cast is ok for WC_UTF16
1105         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
1106         if (pa == (size_t)-1)
1107             return pa;
1108
1109         if (buf)
1110         {
1111             *(wxUint32*)buf = cc;
1112             buf += sizeof(wxUint32);
1113         }
1114         len += sizeof(wxUint32);
1115         psz += pa;
1116     }
1117
1118     if (buf && len<=n-sizeof(wxUint32))
1119         *(wxUint32*)buf=0;
1120
1121     return len;
1122 }
1123
1124
1125
1126 // swap 32bit MB to 16bit String
1127 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1128 {
1129     size_t len=0;
1130
1131     while (*(wxUint32*)psz && (!buf || len < n))
1132     {
1133         char tmp[4];
1134         tmp[0] = psz[3];   tmp[1] = psz[2];
1135         tmp[2] = psz[1];   tmp[3] = psz[0];
1136
1137
1138         wxUint16 cc[2];
1139
1140         size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
1141         if (pa == (size_t)-1)
1142             return pa;
1143
1144         if (buf)
1145         {
1146             *buf++ = cc[0];
1147             if (pa > 1)
1148                 *buf++ = cc[1];
1149         }
1150         len += pa;
1151         psz += sizeof(wxUint32);
1152     }
1153
1154     if (buf && len<n)
1155         *buf=0;
1156
1157     return len;
1158 }
1159
1160
1161 // swap 16bit String to 32bit MB
1162 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1163 {
1164     size_t len=0;
1165
1166     while (*psz && (!buf || len < n))
1167     {
1168         char cc[4];
1169
1170         // cast is ok for WC_UTF16
1171         size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
1172         if (pa == (size_t)-1)
1173             return pa;
1174
1175         if (buf)
1176         {
1177             *buf++ = cc[3];
1178             *buf++ = cc[2];
1179             *buf++ = cc[1];
1180             *buf++ = cc[0];
1181         }
1182         len += sizeof(wxUint32);
1183         psz += pa;
1184     }
1185
1186     if (buf && len<=n-sizeof(wxUint32))
1187         *(wxUint32*)buf=0;
1188
1189     return len;
1190 }
1191
1192 #else // WC_UTF16
1193
1194
1195 // copy 32bit MB to 32bit String
1196 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1197 {
1198     size_t len=0;
1199
1200     while (*(wxUint32*)psz && (!buf || len < n))
1201     {
1202         if (buf)
1203             *buf++ = *(wxUint32*)psz;
1204         len++;
1205         psz += sizeof(wxUint32);
1206     }
1207
1208     if (buf && len<n)
1209         *buf=0;
1210
1211     return len;
1212 }
1213
1214
1215 // copy 32bit String to 32bit MB
1216 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1217 {
1218     size_t len=0;
1219
1220     while (*psz && (!buf || len < n))
1221     {
1222         if (buf)
1223         {
1224             *(wxUint32*)buf = *psz;
1225             buf += sizeof(wxUint32);
1226         }
1227
1228         len += sizeof(wxUint32);
1229         psz++;
1230     }
1231
1232     if (buf && len<=n-sizeof(wxUint32))
1233         *(wxUint32*)buf=0;
1234
1235     return len;
1236 }
1237
1238
1239 // swap 32bit MB to 32bit String
1240 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1241 {
1242     size_t len=0;
1243
1244     while (*(wxUint32*)psz && (!buf || len < n))
1245     {
1246         if (buf)
1247         {
1248             ((char *)buf)[0] = psz[3];
1249             ((char *)buf)[1] = psz[2];
1250             ((char *)buf)[2] = psz[1];
1251             ((char *)buf)[3] = psz[0];
1252             buf++;
1253         }
1254         len++;
1255         psz += sizeof(wxUint32);
1256     }
1257
1258     if (buf && len<n)
1259         *buf=0;
1260
1261     return len;
1262 }
1263
1264
1265 // swap 32bit String to 32bit MB
1266 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1267 {
1268     size_t len=0;
1269
1270     while (*psz && (!buf || len < n))
1271     {
1272         if (buf)
1273         {
1274             *buf++ = ((char *)psz)[3];
1275             *buf++ = ((char *)psz)[2];
1276             *buf++ = ((char *)psz)[1];
1277             *buf++ = ((char *)psz)[0];
1278         }
1279         len += sizeof(wxUint32);
1280         psz++;
1281     }
1282
1283     if (buf && len<=n-sizeof(wxUint32))
1284         *(wxUint32*)buf=0;
1285
1286     return len;
1287 }
1288
1289
1290 #endif // WC_UTF16
1291
1292
1293 // ============================================================================
1294 // The classes doing conversion using the iconv_xxx() functions
1295 // ============================================================================
1296
1297 #ifdef HAVE_ICONV
1298
1299 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1300 //     E2BIG if output buffer is _exactly_ as big as needed. Such case is
1301 //     (unless there's yet another bug in glibc) the only case when iconv()
1302 //     returns with (size_t)-1 (which means error) and says there are 0 bytes
1303 //     left in the input buffer -- when _real_ error occurs,
1304 //     bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1305 //     iconv() failure.
1306 //     [This bug does not appear in glibc 2.2.]
1307 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1308 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1309                                      (errno != E2BIG || bufLeft != 0))
1310 #else
1311 #define ICONV_FAILED(cres, bufLeft)  (cres == (size_t)-1)
1312 #endif
1313
1314 #define ICONV_CHAR_CAST(x)  ((ICONV_CONST char **)(x))
1315
1316 // ----------------------------------------------------------------------------
1317 // wxMBConv_iconv: encapsulates an iconv character set
1318 // ----------------------------------------------------------------------------
1319
1320 class wxMBConv_iconv : public wxMBConv
1321 {
1322 public:
1323     wxMBConv_iconv(const wxChar *name);
1324     virtual ~wxMBConv_iconv();
1325
1326     virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1327     virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
1328
1329     bool IsOk() const
1330         { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
1331
1332 protected:
1333     // the iconv handlers used to translate from multibyte to wide char and in
1334     // the other direction
1335     iconv_t m2w,
1336             w2m;
1337 #if wxUSE_THREADS
1338     // guards access to m2w and w2m objects
1339     wxMutex m_iconvMutex;
1340 #endif
1341
1342 private:
1343     // the name (for iconv_open()) of a wide char charset -- if none is
1344     // available on this machine, it will remain NULL
1345     static const char *ms_wcCharsetName;
1346
1347     // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1348     // different endian-ness than the native one
1349     static bool ms_wcNeedsSwap;
1350 };
1351
1352 // make the constructor available for unit testing
1353 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const wxChar* name )
1354 {
1355     wxMBConv_iconv* result = new wxMBConv_iconv( name );
1356     if ( !result->IsOk() )
1357     {
1358         delete result;
1359         return 0;
1360     }
1361     return result;
1362 }
1363
1364 const char *wxMBConv_iconv::ms_wcCharsetName = NULL;
1365 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
1366
1367 wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
1368 {
1369     // iconv operates with chars, not wxChars, but luckily it uses only ASCII
1370     // names for the charsets
1371     const wxCharBuffer cname(wxString(name).ToAscii());
1372
1373     // check for charset that represents wchar_t:
1374     if (ms_wcCharsetName == NULL)
1375     {
1376         ms_wcNeedsSwap = false;
1377
1378         // try charset with explicit bytesex info (e.g. "UCS-4LE"):
1379         ms_wcCharsetName = WC_NAME_BEST;
1380         m2w = iconv_open(ms_wcCharsetName, cname);
1381
1382         if (m2w == (iconv_t)-1)
1383         {
1384             // try charset w/o bytesex info (e.g. "UCS4")
1385             // and check for bytesex ourselves:
1386             ms_wcCharsetName = WC_NAME;
1387             m2w = iconv_open(ms_wcCharsetName, cname);
1388
1389             // last bet, try if it knows WCHAR_T pseudo-charset
1390             if (m2w == (iconv_t)-1)
1391             {
1392                 ms_wcCharsetName = "WCHAR_T";
1393                 m2w = iconv_open(ms_wcCharsetName, cname);
1394             }
1395
1396             if (m2w != (iconv_t)-1)
1397             {
1398                 char    buf[2], *bufPtr;
1399                 wchar_t wbuf[2], *wbufPtr;
1400                 size_t  insz, outsz;
1401                 size_t  res;
1402
1403                 buf[0] = 'A';
1404                 buf[1] = 0;
1405                 wbuf[0] = 0;
1406                 insz = 2;
1407                 outsz = SIZEOF_WCHAR_T * 2;
1408                 wbufPtr = wbuf;
1409                 bufPtr = buf;
1410
1411                 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1412                             (char**)&wbufPtr, &outsz);
1413
1414                 if (ICONV_FAILED(res, insz))
1415                 {
1416                     ms_wcCharsetName = NULL;
1417                     wxLogLastError(wxT("iconv"));
1418                     wxLogError(_("Conversion to charset '%s' doesn't work."), name);
1419                 }
1420                 else
1421                 {
1422                     ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1423                 }
1424             }
1425             else
1426             {
1427                 ms_wcCharsetName = NULL;
1428
1429                 // VS: we must not output an error here, since wxWidgets will safely
1430                 //     fall back to using wxEncodingConverter.
1431                 wxLogTrace(TRACE_STRCONV, wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
1432                 //wxLogError(
1433             }
1434         }
1435         wxLogTrace(TRACE_STRCONV, wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
1436     }
1437     else // we already have ms_wcCharsetName
1438     {
1439         m2w = iconv_open(ms_wcCharsetName, cname);
1440     }
1441
1442     // NB: don't ever pass NULL to iconv_open(), it may crash!
1443     if ( ms_wcCharsetName )
1444     {
1445         w2m = iconv_open( cname, ms_wcCharsetName);
1446     }
1447     else
1448     {
1449         w2m = (iconv_t)-1;
1450     }
1451 }
1452
1453 wxMBConv_iconv::~wxMBConv_iconv()
1454 {
1455     if ( m2w != (iconv_t)-1 )
1456         iconv_close(m2w);
1457     if ( w2m != (iconv_t)-1 )
1458         iconv_close(w2m);
1459 }
1460
1461 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1462 {
1463 #if wxUSE_THREADS
1464     // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1465     //     Unfortunately there is a couple of global wxCSConv objects such as
1466     //     wxConvLocal that are used all over wx code, so we have to make sure
1467     //     the handle is used by at most one thread at the time. Otherwise
1468     //     only a few wx classes would be safe to use from non-main threads
1469     //     as MB<->WC conversion would fail "randomly".
1470     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1471 #endif
1472
1473     size_t inbuf = strlen(psz);
1474     size_t outbuf = n * SIZEOF_WCHAR_T;
1475     size_t res, cres;
1476     // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1477     wchar_t *bufPtr = buf;
1478     const char *pszPtr = psz;
1479
1480     if (buf)
1481     {
1482         // have destination buffer, convert there
1483         cres = iconv(m2w,
1484                      ICONV_CHAR_CAST(&pszPtr), &inbuf,
1485                      (char**)&bufPtr, &outbuf);
1486         res = n - (outbuf / SIZEOF_WCHAR_T);
1487
1488         if (ms_wcNeedsSwap)
1489         {
1490             // convert to native endianness
1491             WC_BSWAP(buf /* _not_ bufPtr */, res)
1492         }
1493
1494         // NB: iconv was given only strlen(psz) characters on input, and so
1495         //     it couldn't convert the trailing zero. Let's do it ourselves
1496         //     if there's some room left for it in the output buffer.
1497         if (res < n)
1498             buf[res] = 0;
1499     }
1500     else
1501     {
1502         // no destination buffer... convert using temp buffer
1503         // to calculate destination buffer requirement
1504         wchar_t tbuf[8];
1505         res = 0;
1506         do {
1507             bufPtr = tbuf;
1508             outbuf = 8*SIZEOF_WCHAR_T;
1509
1510             cres = iconv(m2w,
1511                          ICONV_CHAR_CAST(&pszPtr), &inbuf,
1512                          (char**)&bufPtr, &outbuf );
1513
1514             res += 8-(outbuf/SIZEOF_WCHAR_T);
1515         } while ((cres==(size_t)-1) && (errno==E2BIG));
1516     }
1517
1518     if (ICONV_FAILED(cres, inbuf))
1519     {
1520         //VS: it is ok if iconv fails, hence trace only
1521         wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1522         return (size_t)-1;
1523     }
1524
1525     return res;
1526 }
1527
1528 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1529 {
1530 #if wxUSE_THREADS
1531     // NB: explained in MB2WC
1532     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1533 #endif
1534
1535     size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
1536     size_t outbuf = n;
1537     size_t res, cres;
1538
1539     wchar_t *tmpbuf = 0;
1540
1541     if (ms_wcNeedsSwap)
1542     {
1543         // need to copy to temp buffer to switch endianness
1544         // this absolutely doesn't rock!
1545         // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1546         //  could be in read-only memory, or be accessed in some other thread)
1547         tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
1548         memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
1549         WC_BSWAP(tmpbuf, inbuf)
1550         psz=tmpbuf;
1551     }
1552
1553     if (buf)
1554     {
1555         // have destination buffer, convert there
1556         cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1557
1558         res = n-outbuf;
1559
1560         // NB: iconv was given only wcslen(psz) characters on input, and so
1561         //     it couldn't convert the trailing zero. Let's do it ourselves
1562         //     if there's some room left for it in the output buffer.
1563         if (res < n)
1564             buf[0] = 0;
1565     }
1566     else
1567     {
1568         // no destination buffer... convert using temp buffer
1569         // to calculate destination buffer requirement
1570         char tbuf[16];
1571         res = 0;
1572         do {
1573             buf = tbuf; outbuf = 16;
1574
1575             cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1576
1577             res += 16 - outbuf;
1578         } while ((cres==(size_t)-1) && (errno==E2BIG));
1579     }
1580
1581     if (ms_wcNeedsSwap)
1582     {
1583         free(tmpbuf);
1584     }
1585
1586     if (ICONV_FAILED(cres, inbuf))
1587     {
1588         //VS: it is ok if iconv fails, hence trace only
1589         wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1590         return (size_t)-1;
1591     }
1592
1593     return res;
1594 }
1595
1596 #endif // HAVE_ICONV
1597
1598
1599 // ============================================================================
1600 // Win32 conversion classes
1601 // ============================================================================
1602
1603 #ifdef wxHAVE_WIN32_MB2WC
1604
1605 // from utils.cpp
1606 #if wxUSE_FONTMAP
1607 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1608 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1609 #endif
1610
1611 class wxMBConv_win32 : public wxMBConv
1612 {
1613 public:
1614     wxMBConv_win32()
1615     {
1616         m_CodePage = CP_ACP;
1617     }
1618
1619 #if wxUSE_FONTMAP
1620     wxMBConv_win32(const wxChar* name)
1621     {
1622         m_CodePage = wxCharsetToCodepage(name);
1623     }
1624
1625     wxMBConv_win32(wxFontEncoding encoding)
1626     {
1627         m_CodePage = wxEncodingToCodepage(encoding);
1628     }
1629 #endif
1630
1631     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1632     {
1633         // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1634         // the behaviour is not compatible with the Unix version (using iconv)
1635         // and break the library itself, e.g. wxTextInputStream::NextChar()
1636         // wouldn't work if reading an incomplete MB char didn't result in an
1637         // error
1638         //
1639         // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
1640         // an error (tested under Windows Server 2003) and apparently it is
1641         // done on purpose, i.e. the function accepts any input in this case
1642         // and although I'd prefer to return error on ill-formed output, our
1643         // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
1644         // explicitly ill-formed according to RFC 2152) neither so we don't
1645         // even have any fallback here...
1646         int flags = m_CodePage == CP_UTF7 ? 0 : MB_ERR_INVALID_CHARS;
1647
1648         const size_t len = ::MultiByteToWideChar
1649                              (
1650                                 m_CodePage,     // code page
1651                                 flags,          // flags: fall on error
1652                                 psz,            // input string
1653                                 -1,             // its length (NUL-terminated)
1654                                 buf,            // output string
1655                                 buf ? n : 0     // size of output buffer
1656                              );
1657
1658         // note that it returns count of written chars for buf != NULL and size
1659         // of the needed buffer for buf == NULL so in either case the length of
1660         // the string (which never includes the terminating NUL) is one less
1661         return len ? len - 1 : (size_t)-1;
1662     }
1663
1664     size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
1665     {
1666         /*
1667             we have a problem here: by default, WideCharToMultiByte() may
1668             replace characters unrepresentable in the target code page with bad
1669             quality approximations such as turning "1/2" symbol (U+00BD) into
1670             "1" for the code pages which don't have it and we, obviously, want
1671             to avoid this at any price
1672
1673             the trouble is that this function does it _silently_, i.e. it won't
1674             even tell us whether it did or not... Win98/2000 and higher provide
1675             WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1676             we have to resort to a round trip, i.e. check that converting back
1677             results in the same string -- this is, of course, expensive but
1678             otherwise we simply can't be sure to not garble the data.
1679          */
1680
1681         // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1682         // it doesn't work with CJK encodings (which we test for rather roughly
1683         // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1684         // supporting it
1685         BOOL usedDef wxDUMMY_INITIALIZE(false);
1686         BOOL *pUsedDef;
1687         int flags;
1688         if ( CanUseNoBestFit() && m_CodePage < 50000 )
1689         {
1690             // it's our lucky day
1691             flags = WC_NO_BEST_FIT_CHARS;
1692             pUsedDef = &usedDef;
1693         }
1694         else // old system or unsupported encoding
1695         {
1696             flags = 0;
1697             pUsedDef = NULL;
1698         }
1699
1700         const size_t len = ::WideCharToMultiByte
1701                              (
1702                                 m_CodePage,     // code page
1703                                 flags,          // either none or no best fit
1704                                 pwz,            // input string
1705                                 -1,             // it is (wide) NUL-terminated
1706                                 buf,            // output buffer
1707                                 buf ? n : 0,    // and its size
1708                                 NULL,           // default "replacement" char
1709                                 pUsedDef        // [out] was it used?
1710                              );
1711
1712         if ( !len )
1713         {
1714             // function totally failed
1715             return (size_t)-1;
1716         }
1717
1718         // if we were really converting, check if we succeeded
1719         if ( buf )
1720         {
1721             if ( flags )
1722             {
1723                 // check if the conversion failed, i.e. if any replacements
1724                 // were done
1725                 if ( usedDef )
1726                     return (size_t)-1;
1727             }
1728             else // we must resort to double tripping...
1729             {
1730                 wxWCharBuffer wcBuf(n);
1731                 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1732                         wcscmp(wcBuf, pwz) != 0 )
1733                 {
1734                     // we didn't obtain the same thing we started from, hence
1735                     // the conversion was lossy and we consider that it failed
1736                     return (size_t)-1;
1737                 }
1738             }
1739         }
1740
1741         // see the comment above for the reason of "len - 1"
1742         return len - 1;
1743     }
1744
1745     bool IsOk() const { return m_CodePage != -1; }
1746
1747 private:
1748     static bool CanUseNoBestFit()
1749     {
1750         static int s_isWin98Or2k = -1;
1751
1752         if ( s_isWin98Or2k == -1 )
1753         {
1754             int verMaj, verMin;
1755             switch ( wxGetOsVersion(&verMaj, &verMin) )
1756             {
1757                 case wxWIN95:
1758                     s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1759                     break;
1760
1761                 case wxWINDOWS_NT:
1762                     s_isWin98Or2k = verMaj >= 5;
1763                     break;
1764
1765                 default:
1766                     // unknown, be conseravtive by default
1767                     s_isWin98Or2k = 0;
1768             }
1769
1770             wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1771         }
1772
1773         return s_isWin98Or2k == 1;
1774     }
1775
1776     long m_CodePage;
1777 };
1778
1779 #endif // wxHAVE_WIN32_MB2WC
1780
1781 // ============================================================================
1782 // Cocoa conversion classes
1783 // ============================================================================
1784
1785 #if defined(__WXCOCOA__)
1786
1787 // RN:  There is no UTF-32 support in either Core Foundation or
1788 // Cocoa.  Strangely enough, internally Core Foundation uses
1789 // UTF 32 internally quite a bit - its just not public (yet).
1790
1791 #include <CoreFoundation/CFString.h>
1792 #include <CoreFoundation/CFStringEncodingExt.h>
1793
1794 CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
1795 {
1796     CFStringEncoding enc = kCFStringEncodingInvalidId ;
1797     if ( encoding == wxFONTENCODING_DEFAULT )
1798     {
1799         enc = CFStringGetSystemEncoding();
1800     }
1801     else switch( encoding)
1802     {
1803         case wxFONTENCODING_ISO8859_1 :
1804             enc = kCFStringEncodingISOLatin1 ;
1805             break ;
1806         case wxFONTENCODING_ISO8859_2 :
1807             enc = kCFStringEncodingISOLatin2;
1808             break ;
1809         case wxFONTENCODING_ISO8859_3 :
1810             enc = kCFStringEncodingISOLatin3 ;
1811             break ;
1812         case wxFONTENCODING_ISO8859_4 :
1813             enc = kCFStringEncodingISOLatin4;
1814             break ;
1815         case wxFONTENCODING_ISO8859_5 :
1816             enc = kCFStringEncodingISOLatinCyrillic;
1817             break ;
1818         case wxFONTENCODING_ISO8859_6 :
1819             enc = kCFStringEncodingISOLatinArabic;
1820             break ;
1821         case wxFONTENCODING_ISO8859_7 :
1822             enc = kCFStringEncodingISOLatinGreek;
1823             break ;
1824         case wxFONTENCODING_ISO8859_8 :
1825             enc = kCFStringEncodingISOLatinHebrew;
1826             break ;
1827         case wxFONTENCODING_ISO8859_9 :
1828             enc = kCFStringEncodingISOLatin5;
1829             break ;
1830         case wxFONTENCODING_ISO8859_10 :
1831             enc = kCFStringEncodingISOLatin6;
1832             break ;
1833         case wxFONTENCODING_ISO8859_11 :
1834             enc = kCFStringEncodingISOLatinThai;
1835             break ;
1836         case wxFONTENCODING_ISO8859_13 :
1837             enc = kCFStringEncodingISOLatin7;
1838             break ;
1839         case wxFONTENCODING_ISO8859_14 :
1840             enc = kCFStringEncodingISOLatin8;
1841             break ;
1842         case wxFONTENCODING_ISO8859_15 :
1843             enc = kCFStringEncodingISOLatin9;
1844             break ;
1845
1846         case wxFONTENCODING_KOI8 :
1847             enc = kCFStringEncodingKOI8_R;
1848             break ;
1849         case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
1850             enc = kCFStringEncodingDOSRussian;
1851             break ;
1852
1853 //      case wxFONTENCODING_BULGARIAN :
1854 //          enc = ;
1855 //          break ;
1856
1857         case wxFONTENCODING_CP437 :
1858             enc =kCFStringEncodingDOSLatinUS ;
1859             break ;
1860         case wxFONTENCODING_CP850 :
1861             enc = kCFStringEncodingDOSLatin1;
1862             break ;
1863         case wxFONTENCODING_CP852 :
1864             enc = kCFStringEncodingDOSLatin2;
1865             break ;
1866         case wxFONTENCODING_CP855 :
1867             enc = kCFStringEncodingDOSCyrillic;
1868             break ;
1869         case wxFONTENCODING_CP866 :
1870             enc =kCFStringEncodingDOSRussian ;
1871             break ;
1872         case wxFONTENCODING_CP874 :
1873             enc = kCFStringEncodingDOSThai;
1874             break ;
1875         case wxFONTENCODING_CP932 :
1876             enc = kCFStringEncodingDOSJapanese;
1877             break ;
1878         case wxFONTENCODING_CP936 :
1879             enc =kCFStringEncodingDOSChineseSimplif ;
1880             break ;
1881         case wxFONTENCODING_CP949 :
1882             enc = kCFStringEncodingDOSKorean;
1883             break ;
1884         case wxFONTENCODING_CP950 :
1885             enc = kCFStringEncodingDOSChineseTrad;
1886             break ;
1887         case wxFONTENCODING_CP1250 :
1888             enc = kCFStringEncodingWindowsLatin2;
1889             break ;
1890         case wxFONTENCODING_CP1251 :
1891             enc =kCFStringEncodingWindowsCyrillic ;
1892             break ;
1893         case wxFONTENCODING_CP1252 :
1894             enc =kCFStringEncodingWindowsLatin1 ;
1895             break ;
1896         case wxFONTENCODING_CP1253 :
1897             enc = kCFStringEncodingWindowsGreek;
1898             break ;
1899         case wxFONTENCODING_CP1254 :
1900             enc = kCFStringEncodingWindowsLatin5;
1901             break ;
1902         case wxFONTENCODING_CP1255 :
1903             enc =kCFStringEncodingWindowsHebrew ;
1904             break ;
1905         case wxFONTENCODING_CP1256 :
1906             enc =kCFStringEncodingWindowsArabic ;
1907             break ;
1908         case wxFONTENCODING_CP1257 :
1909             enc = kCFStringEncodingWindowsBalticRim;
1910             break ;
1911 //   This only really encodes to UTF7 (if that) evidently
1912 //        case wxFONTENCODING_UTF7 :
1913 //            enc = kCFStringEncodingNonLossyASCII ;
1914 //            break ;
1915         case wxFONTENCODING_UTF8 :
1916             enc = kCFStringEncodingUTF8 ;
1917             break ;
1918         case wxFONTENCODING_EUC_JP :
1919             enc = kCFStringEncodingEUC_JP;
1920             break ;
1921         case wxFONTENCODING_UTF16 :
1922             enc = kCFStringEncodingUnicode ;
1923             break ;
1924         case wxFONTENCODING_MACROMAN :
1925             enc = kCFStringEncodingMacRoman ;
1926             break ;
1927         case wxFONTENCODING_MACJAPANESE :
1928             enc = kCFStringEncodingMacJapanese ;
1929             break ;
1930         case wxFONTENCODING_MACCHINESETRAD :
1931             enc = kCFStringEncodingMacChineseTrad ;
1932             break ;
1933         case wxFONTENCODING_MACKOREAN :
1934             enc = kCFStringEncodingMacKorean ;
1935             break ;
1936         case wxFONTENCODING_MACARABIC :
1937             enc = kCFStringEncodingMacArabic ;
1938             break ;
1939         case wxFONTENCODING_MACHEBREW :
1940             enc = kCFStringEncodingMacHebrew ;
1941             break ;
1942         case wxFONTENCODING_MACGREEK :
1943             enc = kCFStringEncodingMacGreek ;
1944             break ;
1945         case wxFONTENCODING_MACCYRILLIC :
1946             enc = kCFStringEncodingMacCyrillic ;
1947             break ;
1948         case wxFONTENCODING_MACDEVANAGARI :
1949             enc = kCFStringEncodingMacDevanagari ;
1950             break ;
1951         case wxFONTENCODING_MACGURMUKHI :
1952             enc = kCFStringEncodingMacGurmukhi ;
1953             break ;
1954         case wxFONTENCODING_MACGUJARATI :
1955             enc = kCFStringEncodingMacGujarati ;
1956             break ;
1957         case wxFONTENCODING_MACORIYA :
1958             enc = kCFStringEncodingMacOriya ;
1959             break ;
1960         case wxFONTENCODING_MACBENGALI :
1961             enc = kCFStringEncodingMacBengali ;
1962             break ;
1963         case wxFONTENCODING_MACTAMIL :
1964             enc = kCFStringEncodingMacTamil ;
1965             break ;
1966         case wxFONTENCODING_MACTELUGU :
1967             enc = kCFStringEncodingMacTelugu ;
1968             break ;
1969         case wxFONTENCODING_MACKANNADA :
1970             enc = kCFStringEncodingMacKannada ;
1971             break ;
1972         case wxFONTENCODING_MACMALAJALAM :
1973             enc = kCFStringEncodingMacMalayalam ;
1974             break ;
1975         case wxFONTENCODING_MACSINHALESE :
1976             enc = kCFStringEncodingMacSinhalese ;
1977             break ;
1978         case wxFONTENCODING_MACBURMESE :
1979             enc = kCFStringEncodingMacBurmese ;
1980             break ;
1981         case wxFONTENCODING_MACKHMER :
1982             enc = kCFStringEncodingMacKhmer ;
1983             break ;
1984         case wxFONTENCODING_MACTHAI :
1985             enc = kCFStringEncodingMacThai ;
1986             break ;
1987         case wxFONTENCODING_MACLAOTIAN :
1988             enc = kCFStringEncodingMacLaotian ;
1989             break ;
1990         case wxFONTENCODING_MACGEORGIAN :
1991             enc = kCFStringEncodingMacGeorgian ;
1992             break ;
1993         case wxFONTENCODING_MACARMENIAN :
1994             enc = kCFStringEncodingMacArmenian ;
1995             break ;
1996         case wxFONTENCODING_MACCHINESESIMP :
1997             enc = kCFStringEncodingMacChineseSimp ;
1998             break ;
1999         case wxFONTENCODING_MACTIBETAN :
2000             enc = kCFStringEncodingMacTibetan ;
2001             break ;
2002         case wxFONTENCODING_MACMONGOLIAN :
2003             enc = kCFStringEncodingMacMongolian ;
2004             break ;
2005         case wxFONTENCODING_MACETHIOPIC :
2006             enc = kCFStringEncodingMacEthiopic ;
2007             break ;
2008         case wxFONTENCODING_MACCENTRALEUR :
2009             enc = kCFStringEncodingMacCentralEurRoman ;
2010             break ;
2011         case wxFONTENCODING_MACVIATNAMESE :
2012             enc = kCFStringEncodingMacVietnamese ;
2013             break ;
2014         case wxFONTENCODING_MACARABICEXT :
2015             enc = kCFStringEncodingMacExtArabic ;
2016             break ;
2017         case wxFONTENCODING_MACSYMBOL :
2018             enc = kCFStringEncodingMacSymbol ;
2019             break ;
2020         case wxFONTENCODING_MACDINGBATS :
2021             enc = kCFStringEncodingMacDingbats ;
2022             break ;
2023         case wxFONTENCODING_MACTURKISH :
2024             enc = kCFStringEncodingMacTurkish ;
2025             break ;
2026         case wxFONTENCODING_MACCROATIAN :
2027             enc = kCFStringEncodingMacCroatian ;
2028             break ;
2029         case wxFONTENCODING_MACICELANDIC :
2030             enc = kCFStringEncodingMacIcelandic ;
2031             break ;
2032         case wxFONTENCODING_MACROMANIAN :
2033             enc = kCFStringEncodingMacRomanian ;
2034             break ;
2035         case wxFONTENCODING_MACCELTIC :
2036             enc = kCFStringEncodingMacCeltic ;
2037             break ;
2038         case wxFONTENCODING_MACGAELIC :
2039             enc = kCFStringEncodingMacGaelic ;
2040             break ;
2041 //      case wxFONTENCODING_MACKEYBOARD :
2042 //          enc = kCFStringEncodingMacKeyboardGlyphs ;
2043 //          break ;
2044         default :
2045             // because gcc is picky
2046             break ;
2047     } ;
2048     return enc ;
2049 }
2050
2051 class wxMBConv_cocoa : public wxMBConv
2052 {
2053 public:
2054     wxMBConv_cocoa()
2055     {
2056         Init(CFStringGetSystemEncoding()) ;
2057     }
2058
2059 #if wxUSE_FONTMAP
2060     wxMBConv_cocoa(const wxChar* name)
2061     {
2062         Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2063     }
2064 #endif
2065
2066     wxMBConv_cocoa(wxFontEncoding encoding)
2067     {
2068         Init( wxCFStringEncFromFontEnc(encoding) );
2069     }
2070
2071     ~wxMBConv_cocoa()
2072     {
2073     }
2074
2075     void Init( CFStringEncoding encoding)
2076     {
2077         m_encoding = encoding ;
2078     }
2079
2080     size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2081     {
2082         wxASSERT(szUnConv);
2083
2084         CFStringRef theString = CFStringCreateWithBytes (
2085                                                 NULL, //the allocator
2086                                                 (const UInt8*)szUnConv,
2087                                                 strlen(szUnConv),
2088                                                 m_encoding,
2089                                                 false //no BOM/external representation
2090                                                 );
2091
2092         wxASSERT(theString);
2093
2094         size_t nOutLength = CFStringGetLength(theString);
2095
2096         if (szOut == NULL)
2097         {
2098             CFRelease(theString);
2099             return nOutLength;
2100         }
2101
2102         CFRange theRange = { 0, nOutSize };
2103
2104 #if SIZEOF_WCHAR_T == 4
2105         UniChar* szUniCharBuffer = new UniChar[nOutSize];
2106 #endif
2107
2108         CFStringGetCharacters(theString, theRange, szUniCharBuffer);
2109
2110         CFRelease(theString);
2111
2112         szUniCharBuffer[nOutLength] = '\0' ;
2113
2114 #if SIZEOF_WCHAR_T == 4
2115         wxMBConvUTF16 converter ;
2116         converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
2117         delete[] szUniCharBuffer;
2118 #endif
2119
2120         return nOutLength;
2121     }
2122
2123     size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2124     {
2125         wxASSERT(szUnConv);
2126
2127         size_t nRealOutSize;
2128         size_t nBufSize = wxWcslen(szUnConv);
2129         UniChar* szUniBuffer = (UniChar*) szUnConv;
2130
2131 #if SIZEOF_WCHAR_T == 4
2132         wxMBConvUTF16 converter ;
2133         nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
2134         szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
2135         converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
2136         nBufSize /= sizeof(UniChar);
2137 #endif
2138
2139         CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2140                                 NULL, //allocator
2141                                 szUniBuffer,
2142                                 nBufSize,
2143                                 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
2144                             );
2145
2146         wxASSERT(theString);
2147
2148         //Note that CER puts a BOM when converting to unicode
2149         //so we  check and use getchars instead in that case
2150         if (m_encoding == kCFStringEncodingUnicode)
2151         {
2152             if (szOut != NULL)
2153                 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
2154
2155             nRealOutSize = CFStringGetLength(theString) + 1;
2156         }
2157         else
2158         {
2159             CFStringGetBytes(
2160                 theString,
2161                 CFRangeMake(0, CFStringGetLength(theString)),
2162                 m_encoding,
2163                 0, //what to put in characters that can't be converted -
2164                     //0 tells CFString to return NULL if it meets such a character
2165                 false, //not an external representation
2166                 (UInt8*) szOut,
2167                 nOutSize,
2168                 (CFIndex*) &nRealOutSize
2169                         );
2170         }
2171
2172         CFRelease(theString);
2173
2174 #if SIZEOF_WCHAR_T == 4
2175         delete[] szUniBuffer;
2176 #endif
2177
2178         return  nRealOutSize - 1;
2179     }
2180
2181     bool IsOk() const
2182     {
2183         return m_encoding != kCFStringEncodingInvalidId &&
2184               CFStringIsEncodingAvailable(m_encoding);
2185     }
2186
2187 private:
2188     CFStringEncoding m_encoding ;
2189 };
2190
2191 #endif // defined(__WXCOCOA__)
2192
2193 // ============================================================================
2194 // Mac conversion classes
2195 // ============================================================================
2196
2197 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2198
2199 class wxMBConv_mac : public wxMBConv
2200 {
2201 public:
2202     wxMBConv_mac()
2203     {
2204         Init(CFStringGetSystemEncoding()) ;
2205     }
2206
2207 #if wxUSE_FONTMAP
2208     wxMBConv_mac(const wxChar* name)
2209     {
2210         Init( wxMacGetSystemEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2211     }
2212 #endif
2213
2214     wxMBConv_mac(wxFontEncoding encoding)
2215     {
2216         Init( wxMacGetSystemEncFromFontEnc(encoding) );
2217     }
2218
2219     ~wxMBConv_mac()
2220     {
2221         OSStatus status = noErr ;
2222         status = TECDisposeConverter(m_MB2WC_converter);
2223         status = TECDisposeConverter(m_WC2MB_converter);
2224     }
2225
2226
2227     void Init( TextEncodingBase encoding)
2228     {
2229         OSStatus status = noErr ;
2230         m_char_encoding = encoding ;
2231         m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2232
2233         status = TECCreateConverter(&m_MB2WC_converter,
2234                                     m_char_encoding,
2235                                     m_unicode_encoding);
2236         status = TECCreateConverter(&m_WC2MB_converter,
2237                                     m_unicode_encoding,
2238                                     m_char_encoding);
2239     }
2240
2241     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2242     {
2243         OSStatus status = noErr ;
2244         ByteCount byteOutLen ;
2245         ByteCount byteInLen = strlen(psz) ;
2246         wchar_t *tbuf = NULL ;
2247         UniChar* ubuf = NULL ;
2248         size_t res = 0 ;
2249
2250         if (buf == NULL)
2251         {
2252             //apple specs say at least 32
2253             n = wxMax( 32 , byteInLen ) ;
2254             tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2255         }
2256         ByteCount byteBufferLen = n * sizeof( UniChar ) ;
2257 #if SIZEOF_WCHAR_T == 4
2258         ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
2259 #else
2260         ubuf = (UniChar*) (buf ? buf : tbuf) ;
2261 #endif
2262         status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2263           (TextPtr) ubuf , byteBufferLen, &byteOutLen);
2264 #if SIZEOF_WCHAR_T == 4
2265         // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2266         // is not properly terminated we get random characters at the end
2267         ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
2268         wxMBConvUTF16 converter ;
2269         res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2270         free( ubuf ) ;
2271 #else
2272         res = byteOutLen / sizeof( UniChar ) ;
2273 #endif
2274         if ( buf == NULL )
2275              free(tbuf) ;
2276
2277         if ( buf  && res < n)
2278             buf[res] = 0;
2279
2280         return res ;
2281     }
2282
2283     size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2284     {
2285         OSStatus status = noErr ;
2286         ByteCount byteOutLen ;
2287         ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2288
2289         char *tbuf = NULL ;
2290
2291         if (buf == NULL)
2292         {
2293             //apple specs say at least 32
2294             n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2295             tbuf = (char*) malloc( n ) ;
2296         }
2297
2298         ByteCount byteBufferLen = n ;
2299         UniChar* ubuf = NULL ;
2300 #if SIZEOF_WCHAR_T == 4
2301         wxMBConvUTF16 converter ;
2302         size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2303         byteInLen = unicharlen ;
2304         ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2305         converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
2306 #else
2307         ubuf = (UniChar*) psz ;
2308 #endif
2309         status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2310             (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
2311 #if SIZEOF_WCHAR_T == 4
2312         free( ubuf ) ;
2313 #endif
2314         if ( buf == NULL )
2315             free(tbuf) ;
2316
2317         size_t res = byteOutLen ;
2318         if ( buf  && res < n)
2319         {
2320             buf[res] = 0;
2321
2322             //we need to double-trip to verify it didn't insert any ? in place
2323             //of bogus characters
2324             wxWCharBuffer wcBuf(n);
2325             size_t pszlen = wxWcslen(psz);
2326             if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
2327                         wxWcslen(wcBuf) != pszlen ||
2328                         memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2329             {
2330                 // we didn't obtain the same thing we started from, hence
2331                 // the conversion was lossy and we consider that it failed
2332                 return (size_t)-1;
2333             }
2334         }
2335
2336         return res ;
2337     }
2338
2339     bool IsOk() const
2340         { return m_MB2WC_converter !=  NULL && m_WC2MB_converter != NULL  ; }
2341
2342 private:
2343     TECObjectRef m_MB2WC_converter ;
2344     TECObjectRef m_WC2MB_converter ;
2345
2346     TextEncodingBase m_char_encoding ;
2347     TextEncodingBase m_unicode_encoding ;
2348 };
2349
2350 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2351
2352 // ============================================================================
2353 // wxEncodingConverter based conversion classes
2354 // ============================================================================
2355
2356 #if wxUSE_FONTMAP
2357
2358 class wxMBConv_wxwin : public wxMBConv
2359 {
2360 private:
2361     void Init()
2362     {
2363         m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2364                w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2365     }
2366
2367 public:
2368     // temporarily just use wxEncodingConverter stuff,
2369     // so that it works while a better implementation is built
2370     wxMBConv_wxwin(const wxChar* name)
2371     {
2372         if (name)
2373             m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2374         else
2375             m_enc = wxFONTENCODING_SYSTEM;
2376
2377         Init();
2378     }
2379
2380     wxMBConv_wxwin(wxFontEncoding enc)
2381     {
2382         m_enc = enc;
2383
2384         Init();
2385     }
2386
2387     size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
2388     {
2389         size_t inbuf = strlen(psz);
2390         if (buf)
2391         {
2392             if (!m2w.Convert(psz,buf))
2393                 return (size_t)-1;
2394         }
2395         return inbuf;
2396     }
2397
2398     size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
2399     {
2400         const size_t inbuf = wxWcslen(psz);
2401         if (buf)
2402         {
2403             if (!w2m.Convert(psz,buf))
2404                 return (size_t)-1;
2405         }
2406
2407         return inbuf;
2408     }
2409
2410     bool IsOk() const { return m_ok; }
2411
2412 public:
2413     wxFontEncoding m_enc;
2414     wxEncodingConverter m2w, w2m;
2415
2416     // were we initialized successfully?
2417     bool m_ok;
2418
2419     DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
2420 };
2421
2422 // make the constructors available for unit testing
2423 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_wxwin( const wxChar* name )
2424 {
2425     wxMBConv_wxwin* result = new wxMBConv_wxwin( name );
2426     if ( !result->IsOk() )
2427     {
2428         delete result;
2429         return 0;
2430     }
2431     return result;
2432 }
2433
2434 #endif // wxUSE_FONTMAP
2435
2436 // ============================================================================
2437 // wxCSConv implementation
2438 // ============================================================================
2439
2440 void wxCSConv::Init()
2441 {
2442     m_name = NULL;
2443     m_convReal =  NULL;
2444     m_deferred = true;
2445 }
2446
2447 wxCSConv::wxCSConv(const wxChar *charset)
2448 {
2449     Init();
2450
2451     if ( charset )
2452     {
2453         SetName(charset);
2454     }
2455
2456     m_encoding = wxFONTENCODING_SYSTEM;
2457 }
2458
2459 wxCSConv::wxCSConv(wxFontEncoding encoding)
2460 {
2461     if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
2462     {
2463         wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2464
2465         encoding = wxFONTENCODING_SYSTEM;
2466     }
2467
2468     Init();
2469
2470     m_encoding = encoding;
2471 }
2472
2473 wxCSConv::~wxCSConv()
2474 {
2475     Clear();
2476 }
2477
2478 wxCSConv::wxCSConv(const wxCSConv& conv)
2479         : wxMBConv()
2480 {
2481     Init();
2482
2483     SetName(conv.m_name);
2484     m_encoding = conv.m_encoding;
2485 }
2486
2487 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2488 {
2489     Clear();
2490
2491     SetName(conv.m_name);
2492     m_encoding = conv.m_encoding;
2493
2494     return *this;
2495 }
2496
2497 void wxCSConv::Clear()
2498 {
2499     free(m_name);
2500     delete m_convReal;
2501
2502     m_name = NULL;
2503     m_convReal = NULL;
2504 }
2505
2506 void wxCSConv::SetName(const wxChar *charset)
2507 {
2508     if (charset)
2509     {
2510         m_name = wxStrdup(charset);
2511         m_deferred = true;
2512     }
2513 }
2514
2515 #if wxUSE_FONTMAP
2516 #include "wx/hashmap.h"
2517
2518 WX_DECLARE_HASH_MAP( wxFontEncoding, wxString, wxIntegerHash, wxIntegerEqual,
2519                      wxEncodingNameCache );
2520
2521 static wxEncodingNameCache gs_nameCache;
2522 #endif
2523
2524 wxMBConv *wxCSConv::DoCreate() const
2525 {
2526 #if wxUSE_FONTMAP
2527     wxLogTrace(TRACE_STRCONV,
2528                wxT("creating conversion for %s"),
2529                (m_name ? m_name
2530                        : wxFontMapperBase::GetEncodingName(m_encoding).c_str()));
2531 #endif // wxUSE_FONTMAP
2532
2533     // check for the special case of ASCII or ISO8859-1 charset: as we have
2534     // special knowledge of it anyhow, we don't need to create a special
2535     // conversion object
2536     if ( m_encoding == wxFONTENCODING_ISO8859_1 )
2537     {
2538         // don't convert at all
2539         return NULL;
2540     }
2541
2542     // we trust OS to do conversion better than we can so try external
2543     // conversion methods first
2544     //
2545     // the full order is:
2546     //      1. OS conversion (iconv() under Unix or Win32 API)
2547     //      2. hard coded conversions for UTF
2548     //      3. wxEncodingConverter as fall back
2549
2550     // step (1)
2551 #ifdef HAVE_ICONV
2552 #if !wxUSE_FONTMAP
2553     if ( m_name )
2554 #endif // !wxUSE_FONTMAP
2555     {
2556         wxString name(m_name);
2557         wxFontEncoding encoding(m_encoding);
2558
2559         if ( !name.empty() )
2560         {
2561             wxMBConv_iconv *conv = new wxMBConv_iconv(name);
2562             if ( conv->IsOk() )
2563                 return conv;
2564
2565             delete conv;
2566
2567 #if wxUSE_FONTMAP
2568             encoding =
2569                 wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2570 #endif // wxUSE_FONTMAP
2571         }
2572 #if wxUSE_FONTMAP
2573         {
2574             const wxEncodingNameCache::iterator it = gs_nameCache.find(encoding);
2575             if ( it != gs_nameCache.end() )
2576             {
2577                 if ( it->second.empty() )
2578                     return NULL;
2579
2580                 wxMBConv_iconv *conv = new wxMBConv_iconv(it->second);
2581                 if ( conv->IsOk() )
2582                     return conv;
2583
2584                 delete conv;
2585             }
2586
2587             const wxChar** names = wxFontMapperBase::GetAllEncodingNames(encoding);
2588
2589             for ( ; *names; ++names )
2590             {
2591                 wxMBConv_iconv *conv = new wxMBConv_iconv(*names);
2592                 if ( conv->IsOk() )
2593                 {
2594                     gs_nameCache[encoding] = *names;
2595                     return conv;
2596                 }
2597
2598                 delete conv;
2599             }
2600
2601             gs_nameCache[encoding] = ""; // cache the failure
2602         }
2603 #endif // wxUSE_FONTMAP
2604     }
2605 #endif // HAVE_ICONV
2606
2607 #ifdef wxHAVE_WIN32_MB2WC
2608     {
2609 #if wxUSE_FONTMAP
2610         wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2611                                       : new wxMBConv_win32(m_encoding);
2612         if ( conv->IsOk() )
2613             return conv;
2614
2615         delete conv;
2616 #else
2617         return NULL;
2618 #endif
2619     }
2620 #endif // wxHAVE_WIN32_MB2WC
2621 #if defined(__WXMAC__)
2622     {
2623         // leave UTF16 and UTF32 to the built-ins of wx
2624         if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
2625             ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
2626         {
2627
2628 #if wxUSE_FONTMAP
2629             wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2630                                         : new wxMBConv_mac(m_encoding);
2631 #else
2632             wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
2633 #endif
2634             if ( conv->IsOk() )
2635                  return conv;
2636
2637             delete conv;
2638         }
2639     }
2640 #endif
2641 #if defined(__WXCOCOA__)
2642     {
2643         if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2644         {
2645
2646 #if wxUSE_FONTMAP
2647             wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2648                                           : new wxMBConv_cocoa(m_encoding);
2649 #else
2650             wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
2651 #endif
2652             if ( conv->IsOk() )
2653                  return conv;
2654
2655             delete conv;
2656         }
2657     }
2658 #endif
2659     // step (2)
2660     wxFontEncoding enc = m_encoding;
2661 #if wxUSE_FONTMAP
2662     if ( enc == wxFONTENCODING_SYSTEM && m_name )
2663     {
2664         // use "false" to suppress interactive dialogs -- we can be called from
2665         // anywhere and popping up a dialog from here is the last thing we want to
2666         // do
2667         enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
2668     }
2669 #endif // wxUSE_FONTMAP
2670
2671     switch ( enc )
2672     {
2673         case wxFONTENCODING_UTF7:
2674              return new wxMBConvUTF7;
2675
2676         case wxFONTENCODING_UTF8:
2677              return new wxMBConvUTF8;
2678
2679         case wxFONTENCODING_UTF16BE:
2680              return new wxMBConvUTF16BE;
2681
2682         case wxFONTENCODING_UTF16LE:
2683              return new wxMBConvUTF16LE;
2684
2685         case wxFONTENCODING_UTF32BE:
2686              return new wxMBConvUTF32BE;
2687
2688         case wxFONTENCODING_UTF32LE:
2689              return new wxMBConvUTF32LE;
2690
2691         default:
2692              // nothing to do but put here to suppress gcc warnings
2693              ;
2694     }
2695
2696     // step (3)
2697 #if wxUSE_FONTMAP
2698     {
2699         wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2700                                       : new wxMBConv_wxwin(m_encoding);
2701         if ( conv->IsOk() )
2702             return conv;
2703
2704         delete conv;
2705     }
2706 #endif // wxUSE_FONTMAP
2707
2708     // NB: This is a hack to prevent deadlock. What could otherwise happen
2709     //     in Unicode build: wxConvLocal creation ends up being here
2710     //     because of some failure and logs the error. But wxLog will try to
2711     //     attach timestamp, for which it will need wxConvLocal (to convert
2712     //     time to char* and then wchar_t*), but that fails, tries to log
2713     //     error, but wxLog has a (already locked) critical section that
2714     //     guards static buffer.
2715     static bool alreadyLoggingError = false;
2716     if (!alreadyLoggingError)
2717     {
2718         alreadyLoggingError = true;
2719         wxLogError(_("Cannot convert from the charset '%s'!"),
2720                    m_name ? m_name
2721                       :
2722 #if wxUSE_FONTMAP
2723                          wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
2724 #else // !wxUSE_FONTMAP
2725                          wxString::Format(_("encoding %s"), m_encoding).c_str()
2726 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2727               );
2728         alreadyLoggingError = false;
2729     }
2730
2731     return NULL;
2732 }
2733
2734 void wxCSConv::CreateConvIfNeeded() const
2735 {
2736     if ( m_deferred )
2737     {
2738         wxCSConv *self = (wxCSConv *)this; // const_cast
2739
2740 #if wxUSE_INTL
2741         // if we don't have neither the name nor the encoding, use the default
2742         // encoding for this system
2743         if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2744         {
2745             self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
2746         }
2747 #endif // wxUSE_INTL
2748
2749         self->m_convReal = DoCreate();
2750         self->m_deferred = false;
2751     }
2752 }
2753
2754 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2755 {
2756     CreateConvIfNeeded();
2757
2758     if (m_convReal)
2759         return m_convReal->MB2WC(buf, psz, n);
2760
2761     // latin-1 (direct)
2762     size_t len = strlen(psz);
2763
2764     if (buf)
2765     {
2766         for (size_t c = 0; c <= len; c++)
2767             buf[c] = (unsigned char)(psz[c]);
2768     }
2769
2770     return len;
2771 }
2772
2773 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2774 {
2775     CreateConvIfNeeded();
2776
2777     if (m_convReal)
2778         return m_convReal->WC2MB(buf, psz, n);
2779
2780     // latin-1 (direct)
2781     const size_t len = wxWcslen(psz);
2782     if (buf)
2783     {
2784         for (size_t c = 0; c <= len; c++)
2785         {
2786             if (psz[c] > 0xFF)
2787                 return (size_t)-1;
2788             buf[c] = (char)psz[c];
2789         }
2790     }
2791     else
2792     {
2793         for (size_t c = 0; c <= len; c++)
2794         {
2795             if (psz[c] > 0xFF)
2796                 return (size_t)-1;
2797         }
2798     }
2799
2800     return len;
2801 }
2802
2803 // ----------------------------------------------------------------------------
2804 // globals
2805 // ----------------------------------------------------------------------------
2806
2807 #ifdef __WINDOWS__
2808     static wxMBConv_win32 wxConvLibcObj;
2809 #elif defined(__WXMAC__) && !defined(__MACH__)
2810     static wxMBConv_mac wxConvLibcObj ;
2811 #else
2812     static wxMBConvLibc wxConvLibcObj;
2813 #endif
2814
2815 static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
2816 static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
2817 static wxMBConvUTF7 wxConvUTF7Obj;
2818 static wxMBConvUTF8 wxConvUTF8Obj;
2819
2820 WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
2821 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
2822 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
2823 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
2824 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
2825 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
2826 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
2827 #ifdef __WXOSX__
2828                                     wxConvUTF8Obj;
2829 #else
2830                                     wxConvLibcObj;
2831 #endif
2832
2833
2834 #else // !wxUSE_WCHAR_T
2835
2836 // stand-ins in absence of wchar_t
2837 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
2838                                 wxConvISO8859_1,
2839                                 wxConvLocal,
2840                                 wxConvUTF8;
2841
2842 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
2843
2844