src/common/strconv.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        strconv.cpp
   3 // Purpose:     Unicode conversion classes
   4 // Author:      Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
   5 //              Ryan Norton, Fredrik Roubert (UTF7)
   6 // Modified by:
   7 // Created:     29/01/98
   8 // RCS-ID:      $Id$
   9 // Copyright:   (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
  10 //              (c) 2000-2003 Vadim Zeitlin
  11 //              (c) 2004 Ryan Norton, Fredrik Roubert
  12 // Licence:     wxWindows licence
  13 /////////////////////////////////////////////////////////////////////////////
  14
  15 // ============================================================================
  16 // declarations
  17 // ============================================================================
  18
  19 // ----------------------------------------------------------------------------
  20 // headers
  21 // ----------------------------------------------------------------------------
  22
  23 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
  24   #pragma implementation "strconv.h"
  25 #endif
  26
  27 // For compilers that support precompilation, includes "wx.h".
  28 #include "wx/wxprec.h"
  29
  30 #ifdef __BORLANDC__
  31   #pragma hdrstop
  32 #endif
  33
  34 #ifndef WX_PRECOMP
  35     #include "wx/intl.h"
  36     #include "wx/log.h"
  37 #endif // WX_PRECOMP
  38
  39 #include "wx/strconv.h"
  40
  41 #if wxUSE_WCHAR_T
  42
  43 #ifdef __WXMSW__
  44     #include "wx/msw/private.h"
  45 #endif
  46
  47 #ifdef __WINDOWS__
  48     #include "wx/msw/missing.h"
  49 #endif
  50
  51 #ifndef __WXWINCE__
  52 #include <errno.h>
  53 #endif
  54
  55 #include <ctype.h>
  56 #include <string.h>
  57 #include <stdlib.h>
  58
  59 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
  60     #define wxHAVE_WIN32_MB2WC
  61 #endif // __WIN32__ but !__WXMICROWIN__
  62
  63 // ----------------------------------------------------------------------------
  64 // headers
  65 // ----------------------------------------------------------------------------
  66
  67 #ifdef __SALFORDC__
  68     #include <clib.h>
  69 #endif
  70
  71 #ifdef HAVE_ICONV
  72     #include <iconv.h>
  73     #include "wx/thread.h"
  74 #endif
  75
  76 #include "wx/encconv.h"
  77 #include "wx/fontmap.h"
  78 #include "wx/utils.h"
  79
  80 #ifdef __WXMAC__
  81 #include <ATSUnicode.h>
  82 #include <TextCommon.h>
  83 #include <TextEncodingConverter.h>
  84
  85 #include  "wx/mac/private.h"  // includes mac headers
  86 #endif
  87 // ----------------------------------------------------------------------------
  88 // macros
  89 // ----------------------------------------------------------------------------
  90
  91 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
  92 #define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
  93
  94 #if SIZEOF_WCHAR_T == 4
  95     #define WC_NAME         "UCS4"
  96     #define WC_BSWAP         BSWAP_UCS4
  97     #ifdef WORDS_BIGENDIAN
  98       #define WC_NAME_BEST  "UCS-4BE"
  99     #else
 100       #define WC_NAME_BEST  "UCS-4LE"
 101     #endif
 102 #elif SIZEOF_WCHAR_T == 2
 103     #define WC_NAME         "UTF16"
 104     #define WC_BSWAP         BSWAP_UTF16
 105     #define WC_UTF16
 106     #ifdef WORDS_BIGENDIAN
 107       #define WC_NAME_BEST  "UTF-16BE"
 108     #else
 109       #define WC_NAME_BEST  "UTF-16LE"
 110     #endif
 111 #else // sizeof(wchar_t) != 2 nor 4
 112     // does this ever happen?
 113     #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
 114 #endif
 115
 116 // ============================================================================
 117 // implementation
 118 // ============================================================================
 119
 120 // ----------------------------------------------------------------------------
 121 // UTF-16 en/decoding to/from UCS-4
 122 // ----------------------------------------------------------------------------
 123
 124
 125 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
 126 {
 127     if (input<=0xffff)
 128     {
 129         if (output)
 130             *output = (wxUint16) input;
 131         return 1;
 132     }
 133     else if (input>=0x110000)
 134     {
 135         return (size_t)-1;
 136     }
 137     else
 138     {
 139         if (output)
 140         {
 141             *output++ = (wxUint16) ((input >> 10)+0xd7c0);
 142             *output = (wxUint16) ((input&0x3ff)+0xdc00);
 143         }
 144         return 2;
 145     }
 146 }
 147
 148 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
 149 {
 150     if ((*input<0xd800) || (*input>0xdfff))
 151     {
 152         output = *input;
 153         return 1;
 154     }
 155     else if ((input[1]<0xdc00) || (input[1]>=0xdfff))
 156     {
 157         output = *input;
 158         return (size_t)-1;
 159     }
 160     else
 161     {
 162         output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
 163         return 2;
 164     }
 165 }
 166
 167
 168 // ----------------------------------------------------------------------------
 169 // wxMBConv
 170 // ----------------------------------------------------------------------------
 171
 172 wxMBConv::~wxMBConv()
 173 {
 174     // nothing to do here (necessary for Darwin linking probably)
 175 }
 176
 177 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
 178 {
 179     if ( psz )
 180     {
 181         // calculate the length of the buffer needed first
 182         size_t nLen = MB2WC(NULL, psz, 0);
 183         if ( nLen != (size_t)-1 )
 184         {
 185             // now do the actual conversion
 186             wxWCharBuffer buf(nLen);
 187             nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
 188             if ( nLen != (size_t)-1 )
 189             {
 190                 return buf;
 191             }
 192         }
 193     }
 194
 195     wxWCharBuffer buf((wchar_t *)NULL);
 196
 197     return buf;
 198 }
 199
 200 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
 201 {
 202     if ( pwz )
 203     {
 204         size_t nLen = WC2MB(NULL, pwz, 0);
 205         if ( nLen != (size_t)-1 )
 206         {
 207             wxCharBuffer buf(nLen+3);       // space for a wxUint32 trailing zero
 208             nLen = WC2MB(buf.data(), pwz, nLen + 4);
 209             if ( nLen != (size_t)-1 )
 210             {
 211                 return buf;
 212             }
 213         }
 214     }
 215
 216     wxCharBuffer buf((char *)NULL);
 217
 218     return buf;
 219 }
 220
 221 const wxWCharBuffer wxMBConv::cMB2WC(const char *szString, size_t nStringLen, size_t* pOutSize) const
 222 {
 223     wxASSERT(pOutSize != NULL);
 224
 225     const char* szEnd = szString + nStringLen + 1;
 226     const char* szPos = szString;
 227     const char* szStart = szPos;
 228
 229     size_t nActualLength = 0;
 230     size_t nCurrentSize = nStringLen; //try normal size first (should never resize?)
 231
 232     wxWCharBuffer theBuffer(nCurrentSize);
 233
 234     //Convert the string until the length() is reached, continuing the
 235     //loop every time a null character is reached
 236     while(szPos != szEnd)
 237     {
 238         wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
 239
 240         //Get the length of the current (sub)string
 241         size_t nLen = MB2WC(NULL, szPos, 0);
 242
 243         //Invalid conversion?
 244         if( nLen == (size_t)-1 )
 245         {
 246             *pOutSize = 0;
 247             theBuffer.data()[0u] = wxT('\0');
 248             return theBuffer;
 249         }
 250
 251
 252         //Increase the actual length (+1 for current null character)
 253         nActualLength += nLen + 1;
 254
 255         //if buffer too big, realloc the buffer
 256         if (nActualLength > (nCurrentSize+1))
 257         {
 258             wxWCharBuffer theNewBuffer(nCurrentSize << 1);
 259             memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize * sizeof(wchar_t));
 260             theBuffer = theNewBuffer;
 261             nCurrentSize <<= 1;
 262         }
 263
 264         //Convert the current (sub)string
 265         if ( MB2WC(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
 266         {
 267             *pOutSize = 0;
 268             theBuffer.data()[0u] = wxT('\0');
 269             return theBuffer;
 270         }
 271
 272         //Increment to next (sub)string
 273         //Note that we have to use strlen here instead of nLen
 274         //here because XX2XX gives us the size of the output buffer,
 275         //not neccessarly the length of the string
 276         szPos += strlen(szPos) + 1;
 277     }
 278
 279     //success - return actual length and the buffer
 280     *pOutSize = nActualLength;
 281     return theBuffer;
 282 }
 283
 284 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *szString, size_t nStringLen, size_t* pOutSize) const
 285 {
 286     wxASSERT(pOutSize != NULL);
 287
 288     const wchar_t* szEnd = szString + nStringLen + 1;
 289     const wchar_t* szPos = szString;
 290     const wchar_t* szStart = szPos;
 291
 292     size_t nActualLength = 0;
 293     size_t nCurrentSize = nStringLen << 2; //try * 4 first
 294
 295     wxCharBuffer theBuffer(nCurrentSize);
 296
 297     //Convert the string until the length() is reached, continuing the
 298     //loop every time a null character is reached
 299     while(szPos != szEnd)
 300     {
 301         wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
 302
 303         //Get the length of the current (sub)string
 304         size_t nLen = WC2MB(NULL, szPos, 0);
 305
 306         //Invalid conversion?
 307         if( nLen == (size_t)-1 )
 308         {
 309             *pOutSize = 0;
 310             theBuffer.data()[0u] = wxT('\0');
 311             return theBuffer;
 312         }
 313
 314         //Increase the actual length (+1 for current null character)
 315         nActualLength += nLen + 1;
 316
 317         //if buffer too big, realloc the buffer
 318         if (nActualLength > (nCurrentSize+1))
 319         {
 320             wxCharBuffer theNewBuffer(nCurrentSize << 1);
 321             memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize);
 322             theBuffer = theNewBuffer;
 323             nCurrentSize <<= 1;
 324         }
 325
 326         //Convert the current (sub)string
 327         if(WC2MB(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
 328         {
 329             *pOutSize = 0;
 330             theBuffer.data()[0u] = wxT('\0');
 331             return theBuffer;
 332         }
 333
 334         //Increment to next (sub)string
 335         //Note that we have to use wxWcslen here instead of nLen
 336         //here because XX2XX gives us the size of the output buffer,
 337         //not neccessarly the length of the string
 338         szPos += wxWcslen(szPos) + 1;
 339     }
 340
 341     //success - return actual length and the buffer
 342     *pOutSize = nActualLength;
 343     return theBuffer;
 344 }
 345
 346 // ----------------------------------------------------------------------------
 347 // wxMBConvLibc
 348 // ----------------------------------------------------------------------------
 349
 350 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 351 {
 352     return wxMB2WC(buf, psz, n);
 353 }
 354
 355 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 356 {
 357     return wxWC2MB(buf, psz, n);
 358 }
 359 // ----------------------------------------------------------------------------
 360 // UTF-7
 361 // ----------------------------------------------------------------------------
 362
 363 // Implementation (C) 2004 Fredrik Roubert
 364
 365 //
 366 // BASE64 decoding table
 367 //
 368 static const unsigned char utf7unb64[] =
 369 {
 370     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 371     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 372     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 373     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 374     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 375     0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
 376     0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
 377     0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 378     0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
 379     0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
 380     0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
 381     0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
 382     0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
 383     0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
 384     0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
 385     0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
 386     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 387     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 388     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 389     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 390     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 391     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 392     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 393     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 394     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 395     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 396     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 397     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 398     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 399     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 400     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 401     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 402 };
 403
 404 size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 405 {
 406
 407     size_t len = 0;
 408
 409     while (*psz && ((!buf) || (len < n)))
 410     {
 411         unsigned char cc = *psz++;
 412         if (cc != '+')
 413         {
 414             // plain ASCII char
 415             if (buf)
 416                 *buf++ = cc;
 417             len++;
 418         }
 419         else if (*psz == '-')
 420         {
 421             // encoded plus sign
 422             if (buf)
 423                 *buf++ = cc;
 424             len++;
 425             psz++;
 426         }
 427         else
 428         {
 429             // BASE64 encoded string
 430             bool lsb;
 431             unsigned char c;
 432             unsigned int d, l;
 433             for (lsb = false, d = 0, l = 0;
 434                 (cc = utf7unb64[(unsigned char)*psz]) != 0xff; psz++)
 435             {
 436                 d <<= 6;
 437                 d += cc;
 438                 for (l += 6; l >= 8; lsb = !lsb)
 439                 {
 440                     c = (unsigned char)((d >> (l -= 8)) % 256);
 441                     if (lsb)
 442                     {
 443                         if (buf)
 444                             *buf++ |= c;
 445                         len ++;
 446                     }
 447                     else
 448                         if (buf)
 449                             *buf = (wchar_t)(c << 8);
 450                 }
 451             }
 452             if (*psz == '-')
 453                 psz++;
 454         }
 455     }
 456     if (buf && (len < n))
 457         *buf = 0;
 458     return len;
 459 }
 460
 461 //
 462 // BASE64 encoding table
 463 //
 464 static const unsigned char utf7enb64[] =
 465 {
 466     'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
 467     'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
 468     'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
 469     'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
 470     'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
 471     'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
 472     'w', 'x', 'y', 'z', '0', '1', '2', '3',
 473     '4', '5', '6', '7', '8', '9', '+', '/'
 474 };
 475
 476 //
 477 // UTF-7 encoding table
 478 //
 479 // 0 - Set D (directly encoded characters)
 480 // 1 - Set O (optional direct characters)
 481 // 2 - whitespace characters (optional)
 482 // 3 - special characters
 483 //
 484 static const unsigned char utf7encode[128] =
 485 {
 486     3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
 487     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 488     2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
 489     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
 490     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 491     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
 492     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 493     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
 494 };
 495
 496 size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t
 497 *psz, size_t n) const
 498 {
 499
 500
 501     size_t len = 0;
 502
 503     while (*psz && ((!buf) || (len < n)))
 504     {
 505         wchar_t cc = *psz++;
 506         if (cc < 0x80 && utf7encode[cc] < 1)
 507         {
 508             // plain ASCII char
 509             if (buf)
 510                 *buf++ = (char)cc;
 511             len++;
 512         }
 513 #ifndef WC_UTF16
 514         else if (((wxUint32)cc) > 0xffff)
 515             {
 516             // no surrogate pair generation (yet?)
 517             return (size_t)-1;
 518         }
 519 #endif
 520         else
 521         {
 522             if (buf)
 523                 *buf++ = '+';
 524             len++;
 525             if (cc != '+')
 526             {
 527                 // BASE64 encode string
 528                 unsigned int lsb, d, l;
 529                 for (d = 0, l = 0;; psz++)
 530                 {
 531                     for (lsb = 0; lsb < 2; lsb ++)
 532                     {
 533                         d <<= 8;
 534                         d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
 535
 536                         for (l += 8; l >= 6; )
 537                         {
 538                             l -= 6;
 539                             if (buf)
 540                                 *buf++ = utf7enb64[(d >> l) % 64];
 541                             len++;
 542                         }
 543                     }
 544                     cc = *psz;
 545                     if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
 546                         break;
 547                 }
 548                 if (l != 0)
 549                 {
 550                     if (buf)
 551                         *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
 552                     len++;
 553                 }
 554             }
 555             if (buf)
 556                 *buf++ = '-';
 557             len++;
 558         }
 559     }
 560     if (buf && (len < n))
 561         *buf = 0;
 562     return len;
 563 }
 564
 565 // ----------------------------------------------------------------------------
 566 // UTF-8
 567 // ----------------------------------------------------------------------------
 568
 569 static wxUint32 utf8_max[]=
 570     { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
 571
 572 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 573 {
 574     size_t len = 0;
 575
 576     while (*psz && ((!buf) || (len < n)))
 577     {
 578         unsigned char cc = *psz++, fc = cc;
 579         unsigned cnt;
 580         for (cnt = 0; fc & 0x80; cnt++)
 581             fc <<= 1;
 582         if (!cnt)
 583         {
 584             // plain ASCII char
 585             if (buf)
 586                 *buf++ = cc;
 587             len++;
 588         }
 589         else
 590         {
 591             cnt--;
 592             if (!cnt)
 593             {
 594                 // invalid UTF-8 sequence
 595                 return (size_t)-1;
 596             }
 597             else
 598             {
 599                 unsigned ocnt = cnt - 1;
 600                 wxUint32 res = cc & (0x3f >> cnt);
 601                 while (cnt--)
 602                 {
 603                     cc = *psz++;
 604                     if ((cc & 0xC0) != 0x80)
 605                     {
 606                         // invalid UTF-8 sequence
 607                         return (size_t)-1;
 608                     }
 609                     res = (res << 6) | (cc & 0x3f);
 610                 }
 611                 if (res <= utf8_max[ocnt])
 612                 {
 613                     // illegal UTF-8 encoding
 614                     return (size_t)-1;
 615                 }
 616 #ifdef WC_UTF16
 617                 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 618                 size_t pa = encode_utf16(res, (wxUint16 *)buf);
 619                 if (pa == (size_t)-1)
 620                   return (size_t)-1;
 621                 if (buf)
 622                     buf += pa;
 623                 len += pa;
 624 #else // !WC_UTF16
 625                 if (buf)
 626                     *buf++ = res;
 627                 len++;
 628 #endif // WC_UTF16/!WC_UTF16
 629             }
 630         }
 631     }
 632     if (buf && (len < n))
 633         *buf = 0;
 634     return len;
 635 }
 636
 637 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 638 {
 639     size_t len = 0;
 640
 641     while (*psz && ((!buf) || (len < n)))
 642     {
 643         wxUint32 cc;
 644 #ifdef WC_UTF16
 645         // cast is ok for WC_UTF16
 646         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
 647         psz += (pa == (size_t)-1) ? 1 : pa;
 648 #else
 649         cc=(*psz++) & 0x7fffffff;
 650 #endif
 651         unsigned cnt;
 652         for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
 653         if (!cnt)
 654         {
 655             // plain ASCII char
 656             if (buf)
 657                 *buf++ = (char) cc;
 658             len++;
 659         }
 660
 661         else
 662         {
 663             len += cnt + 1;
 664             if (buf)
 665             {
 666                 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
 667                 while (cnt--)
 668                     *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
 669             }
 670         }
 671     }
 672
 673     if (buf && (len<n)) *buf = 0;
 674
 675     return len;
 676 }
 677
 678
 679
 680
 681 // ----------------------------------------------------------------------------
 682 // UTF-16
 683 // ----------------------------------------------------------------------------
 684
 685 #ifdef WORDS_BIGENDIAN
 686     #define wxMBConvUTF16straight wxMBConvUTF16BE
 687     #define wxMBConvUTF16swap     wxMBConvUTF16LE
 688 #else
 689     #define wxMBConvUTF16swap     wxMBConvUTF16BE
 690     #define wxMBConvUTF16straight wxMBConvUTF16LE
 691 #endif
 692
 693
 694 #ifdef WC_UTF16
 695
 696 // copy 16bit MB to 16bit String
 697 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 698 {
 699     size_t len=0;
 700
 701     while (*(wxUint16*)psz && (!buf || len < n))
 702     {
 703         if (buf)
 704             *buf++ = *(wxUint16*)psz;
 705         len++;
 706
 707         psz += sizeof(wxUint16);
 708     }
 709     if (buf && len<n)   *buf=0;
 710
 711     return len;
 712 }
 713
 714
 715 // copy 16bit String to 16bit MB
 716 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 717 {
 718     size_t len=0;
 719
 720     while (*psz && (!buf || len < n))
 721     {
 722         if (buf)
 723         {
 724             *(wxUint16*)buf = *psz;
 725             buf += sizeof(wxUint16);
 726         }
 727         len += sizeof(wxUint16);
 728         psz++;
 729     }
 730     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 731
 732     return len;
 733 }
 734
 735
 736 // swap 16bit MB to 16bit String
 737 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 738 {
 739     size_t len=0;
 740
 741     while (*(wxUint16*)psz && (!buf || len < n))
 742     {
 743         if (buf)
 744         {
 745             ((char *)buf)[0] = psz[1];
 746             ((char *)buf)[1] = psz[0];
 747             buf++;
 748         }
 749         len++;
 750         psz += sizeof(wxUint16);
 751     }
 752     if (buf && len<n)   *buf=0;
 753
 754     return len;
 755 }
 756
 757
 758 // swap 16bit MB to 16bit String
 759 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 760 {
 761     size_t len=0;
 762
 763     while (*psz && (!buf || len < n))
 764     {
 765         if (buf)
 766         {
 767             *buf++ = ((char*)psz)[1];
 768             *buf++ = ((char*)psz)[0];
 769         }
 770         len += sizeof(wxUint16);
 771         psz++;
 772     }
 773     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 774
 775     return len;
 776 }
 777
 778
 779 #else // WC_UTF16
 780
 781
 782 // copy 16bit MB to 32bit String
 783 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 784 {
 785     size_t len=0;
 786
 787     while (*(wxUint16*)psz && (!buf || len < n))
 788     {
 789         wxUint32 cc;
 790         size_t pa=decode_utf16((wxUint16*)psz, cc);
 791         if (pa == (size_t)-1)
 792             return pa;
 793
 794         if (buf)
 795             *buf++ = cc;
 796         len++;
 797         psz += pa * sizeof(wxUint16);
 798     }
 799     if (buf && len<n)   *buf=0;
 800
 801     return len;
 802 }
 803
 804
 805 // copy 32bit String to 16bit MB
 806 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 807 {
 808     size_t len=0;
 809
 810     while (*psz && (!buf || len < n))
 811     {
 812         wxUint16 cc[2];
 813         size_t pa=encode_utf16(*psz, cc);
 814
 815         if (pa == (size_t)-1)
 816             return pa;
 817
 818         if (buf)
 819         {
 820             *(wxUint16*)buf = cc[0];
 821             buf += sizeof(wxUint16);
 822             if (pa > 1)
 823             {
 824                 *(wxUint16*)buf = cc[1];
 825                 buf += sizeof(wxUint16);
 826             }
 827         }
 828
 829         len += pa*sizeof(wxUint16);
 830         psz++;
 831     }
 832     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 833
 834     return len;
 835 }
 836
 837
 838 // swap 16bit MB to 32bit String
 839 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 840 {
 841     size_t len=0;
 842
 843     while (*(wxUint16*)psz && (!buf || len < n))
 844     {
 845         wxUint32 cc;
 846         char tmp[4];
 847         tmp[0]=psz[1];  tmp[1]=psz[0];
 848         tmp[2]=psz[3];  tmp[3]=psz[2];
 849
 850         size_t pa=decode_utf16((wxUint16*)tmp, cc);
 851         if (pa == (size_t)-1)
 852             return pa;
 853
 854         if (buf)
 855             *buf++ = cc;
 856
 857         len++;
 858         psz += pa * sizeof(wxUint16);
 859     }
 860     if (buf && len<n)   *buf=0;
 861
 862     return len;
 863 }
 864
 865
 866 // swap 32bit String to 16bit MB
 867 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 868 {
 869     size_t len=0;
 870
 871     while (*psz && (!buf || len < n))
 872     {
 873         wxUint16 cc[2];
 874         size_t pa=encode_utf16(*psz, cc);
 875
 876         if (pa == (size_t)-1)
 877             return pa;
 878
 879         if (buf)
 880         {
 881             *buf++ = ((char*)cc)[1];
 882             *buf++ = ((char*)cc)[0];
 883             if (pa > 1)
 884             {
 885                 *buf++ = ((char*)cc)[3];
 886                 *buf++ = ((char*)cc)[2];
 887             }
 888         }
 889
 890         len += pa*sizeof(wxUint16);
 891         psz++;
 892     }
 893     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 894
 895     return len;
 896 }
 897
 898 #endif // WC_UTF16
 899
 900
 901 // ----------------------------------------------------------------------------
 902 // UTF-32
 903 // ----------------------------------------------------------------------------
 904
 905 #ifdef WORDS_BIGENDIAN
 906 #define wxMBConvUTF32straight  wxMBConvUTF32BE
 907 #define wxMBConvUTF32swap      wxMBConvUTF32LE
 908 #else
 909 #define wxMBConvUTF32swap      wxMBConvUTF32BE
 910 #define wxMBConvUTF32straight  wxMBConvUTF32LE
 911 #endif
 912
 913
 914 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
 915 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
 916
 917
 918 #ifdef WC_UTF16
 919
 920 // copy 32bit MB to 16bit String
 921 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 922 {
 923     size_t len=0;
 924
 925     while (*(wxUint32*)psz && (!buf || len < n))
 926     {
 927         wxUint16 cc[2];
 928
 929         size_t pa=encode_utf16(*(wxUint32*)psz, cc);
 930         if (pa == (size_t)-1)
 931             return pa;
 932
 933         if (buf)
 934         {
 935             *buf++ = cc[0];
 936             if (pa > 1)
 937                 *buf++ = cc[1];
 938         }
 939         len += pa;
 940         psz += sizeof(wxUint32);
 941     }
 942     if (buf && len<n)   *buf=0;
 943
 944     return len;
 945 }
 946
 947
 948 // copy 16bit String to 32bit MB
 949 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 950 {
 951     size_t len=0;
 952
 953     while (*psz && (!buf || len < n))
 954     {
 955         wxUint32 cc;
 956
 957         // cast is ok for WC_UTF16
 958         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
 959         if (pa == (size_t)-1)
 960             return pa;
 961
 962         if (buf)
 963         {
 964             *(wxUint32*)buf = cc;
 965             buf += sizeof(wxUint32);
 966         }
 967         len += sizeof(wxUint32);
 968         psz += pa;
 969     }
 970
 971     if (buf && len<=n-sizeof(wxUint32))
 972         *(wxUint32*)buf=0;
 973
 974     return len;
 975 }
 976
 977
 978
 979 // swap 32bit MB to 16bit String
 980 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 981 {
 982     size_t len=0;
 983
 984     while (*(wxUint32*)psz && (!buf || len < n))
 985     {
 986         char tmp[4];
 987         tmp[0] = psz[3];   tmp[1] = psz[2];
 988         tmp[2] = psz[1];   tmp[3] = psz[0];
 989
 990
 991         wxUint16 cc[2];
 992
 993         size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
 994         if (pa == (size_t)-1)
 995             return pa;
 996
 997         if (buf)
 998         {
 999             *buf++ = cc[0];
1000             if (pa > 1)
1001                 *buf++ = cc[1];
1002         }
1003         len += pa;
1004         psz += sizeof(wxUint32);
1005     }
1006
1007     if (buf && len<n)
1008         *buf=0;
1009
1010     return len;
1011 }
1012
1013
1014 // swap 16bit String to 32bit MB
1015 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1016 {
1017     size_t len=0;
1018
1019     while (*psz && (!buf || len < n))
1020     {
1021         char cc[4];
1022
1023         // cast is ok for WC_UTF16
1024         size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
1025         if (pa == (size_t)-1)
1026             return pa;
1027
1028         if (buf)
1029         {
1030             *buf++ = cc[3];
1031             *buf++ = cc[2];
1032             *buf++ = cc[1];
1033             *buf++ = cc[0];
1034         }
1035         len += sizeof(wxUint32);
1036         psz += pa;
1037     }
1038
1039     if (buf && len<=n-sizeof(wxUint32))
1040         *(wxUint32*)buf=0;
1041
1042     return len;
1043 }
1044
1045 #else // WC_UTF16
1046
1047
1048 // copy 32bit MB to 32bit String
1049 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1050 {
1051     size_t len=0;
1052
1053     while (*(wxUint32*)psz && (!buf || len < n))
1054     {
1055         if (buf)
1056             *buf++ = *(wxUint32*)psz;
1057         len++;
1058         psz += sizeof(wxUint32);
1059     }
1060
1061     if (buf && len<n)
1062         *buf=0;
1063
1064     return len;
1065 }
1066
1067
1068 // copy 32bit String to 32bit MB
1069 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1070 {
1071     size_t len=0;
1072
1073     while (*psz && (!buf || len < n))
1074     {
1075         if (buf)
1076         {
1077             *(wxUint32*)buf = *psz;
1078             buf += sizeof(wxUint32);
1079         }
1080
1081         len += sizeof(wxUint32);
1082         psz++;
1083     }
1084
1085     if (buf && len<=n-sizeof(wxUint32))
1086         *(wxUint32*)buf=0;
1087
1088     return len;
1089 }
1090
1091
1092 // swap 32bit MB to 32bit String
1093 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1094 {
1095     size_t len=0;
1096
1097     while (*(wxUint32*)psz && (!buf || len < n))
1098     {
1099         if (buf)
1100         {
1101             ((char *)buf)[0] = psz[3];
1102             ((char *)buf)[1] = psz[2];
1103             ((char *)buf)[2] = psz[1];
1104             ((char *)buf)[3] = psz[0];
1105             buf++;
1106         }
1107         len++;
1108         psz += sizeof(wxUint32);
1109     }
1110
1111     if (buf && len<n)
1112         *buf=0;
1113
1114     return len;
1115 }
1116
1117
1118 // swap 32bit String to 32bit MB
1119 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1120 {
1121     size_t len=0;
1122
1123     while (*psz && (!buf || len < n))
1124     {
1125         if (buf)
1126         {
1127             *buf++ = ((char *)psz)[3];
1128             *buf++ = ((char *)psz)[2];
1129             *buf++ = ((char *)psz)[1];
1130             *buf++ = ((char *)psz)[0];
1131         }
1132         len += sizeof(wxUint32);
1133         psz++;
1134     }
1135
1136     if (buf && len<=n-sizeof(wxUint32))
1137         *(wxUint32*)buf=0;
1138
1139     return len;
1140 }
1141
1142
1143 #endif // WC_UTF16
1144
1145
1146 // ============================================================================
1147 // The classes doing conversion using the iconv_xxx() functions
1148 // ============================================================================
1149
1150 #ifdef HAVE_ICONV
1151
1152 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1153 //     E2BIG if output buffer is _exactly_ as big as needed. Such case is
1154 //     (unless there's yet another bug in glibc) the only case when iconv()
1155 //     returns with (size_t)-1 (which means error) and says there are 0 bytes
1156 //     left in the input buffer -- when _real_ error occurs,
1157 //     bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1158 //     iconv() failure.
1159 //     [This bug does not appear in glibc 2.2.]
1160 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1161 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1162                                      (errno != E2BIG || bufLeft != 0))
1163 #else
1164 #define ICONV_FAILED(cres, bufLeft)  (cres == (size_t)-1)
1165 #endif
1166
1167 #define ICONV_CHAR_CAST(x)  ((ICONV_CONST char **)(x))
1168
1169 // ----------------------------------------------------------------------------
1170 // wxMBConv_iconv: encapsulates an iconv character set
1171 // ----------------------------------------------------------------------------
1172
1173 class wxMBConv_iconv : public wxMBConv
1174 {
1175 public:
1176     wxMBConv_iconv(const wxChar *name);
1177     virtual ~wxMBConv_iconv();
1178
1179     virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1180     virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
1181
1182     bool IsOk() const
1183         { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
1184
1185 protected:
1186     // the iconv handlers used to translate from multibyte to wide char and in
1187     // the other direction
1188     iconv_t m2w,
1189             w2m;
1190 #if wxUSE_THREADS
1191     // guards access to m2w and w2m objects
1192     wxMutex m_iconvMutex;
1193 #endif
1194
1195 private:
1196     // the name (for iconv_open()) of a wide char charset -- if none is
1197     // available on this machine, it will remain NULL
1198     static const char *ms_wcCharsetName;
1199
1200     // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1201     // different endian-ness than the native one
1202     static bool ms_wcNeedsSwap;
1203 };
1204
1205 const char *wxMBConv_iconv::ms_wcCharsetName = NULL;
1206 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
1207
1208 wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
1209 {
1210     // Do it the hard way
1211     char cname[100];
1212     for (size_t i = 0; i < wxStrlen(name)+1; i++)
1213         cname[i] = (char) name[i];
1214
1215     // check for charset that represents wchar_t:
1216     if (ms_wcCharsetName == NULL)
1217     {
1218         ms_wcNeedsSwap = false;
1219
1220         // try charset with explicit bytesex info (e.g. "UCS-4LE"):
1221         ms_wcCharsetName = WC_NAME_BEST;
1222         m2w = iconv_open(ms_wcCharsetName, cname);
1223
1224         if (m2w == (iconv_t)-1)
1225         {
1226             // try charset w/o bytesex info (e.g. "UCS4")
1227             // and check for bytesex ourselves:
1228             ms_wcCharsetName = WC_NAME;
1229             m2w = iconv_open(ms_wcCharsetName, cname);
1230
1231             // last bet, try if it knows WCHAR_T pseudo-charset
1232             if (m2w == (iconv_t)-1)
1233             {
1234                 ms_wcCharsetName = "WCHAR_T";
1235                 m2w = iconv_open(ms_wcCharsetName, cname);
1236             }
1237
1238             if (m2w != (iconv_t)-1)
1239             {
1240                 char    buf[2], *bufPtr;
1241                 wchar_t wbuf[2], *wbufPtr;
1242                 size_t  insz, outsz;
1243                 size_t  res;
1244
1245                 buf[0] = 'A';
1246                 buf[1] = 0;
1247                 wbuf[0] = 0;
1248                 insz = 2;
1249                 outsz = SIZEOF_WCHAR_T * 2;
1250                 wbufPtr = wbuf;
1251                 bufPtr = buf;
1252
1253                 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1254                             (char**)&wbufPtr, &outsz);
1255
1256                 if (ICONV_FAILED(res, insz))
1257                 {
1258                     ms_wcCharsetName = NULL;
1259                     wxLogLastError(wxT("iconv"));
1260                     wxLogError(_("Conversion to charset '%s' doesn't work."), name);
1261                 }
1262                 else
1263                 {
1264                     ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1265                 }
1266             }
1267             else
1268             {
1269                 ms_wcCharsetName = NULL;
1270
1271                 // VS: we must not output an error here, since wxWidgets will safely
1272                 //     fall back to using wxEncodingConverter.
1273                 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
1274                 //wxLogError(
1275             }
1276         }
1277         wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
1278     }
1279     else // we already have ms_wcCharsetName
1280     {
1281         m2w = iconv_open(ms_wcCharsetName, cname);
1282     }
1283
1284     // NB: don't ever pass NULL to iconv_open(), it may crash!
1285     if ( ms_wcCharsetName )
1286     {
1287         w2m = iconv_open( cname, ms_wcCharsetName);
1288     }
1289     else
1290     {
1291         w2m = (iconv_t)-1;
1292     }
1293 }
1294
1295 wxMBConv_iconv::~wxMBConv_iconv()
1296 {
1297     if ( m2w != (iconv_t)-1 )
1298         iconv_close(m2w);
1299     if ( w2m != (iconv_t)-1 )
1300         iconv_close(w2m);
1301 }
1302
1303 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1304 {
1305 #if wxUSE_THREADS
1306     // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1307     //     Unfortunately there is a couple of global wxCSConv objects such as
1308     //     wxConvLocal that are used all over wx code, so we have to make sure
1309     //     the handle is used by at most one thread at the time. Otherwise
1310     //     only a few wx classes would be safe to use from non-main threads
1311     //     as MB<->WC conversion would fail "randomly".
1312     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1313 #endif
1314
1315     size_t inbuf = strlen(psz);
1316     size_t outbuf = n * SIZEOF_WCHAR_T;
1317     size_t res, cres;
1318     // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1319     wchar_t *bufPtr = buf;
1320     const char *pszPtr = psz;
1321
1322     if (buf)
1323     {
1324         // have destination buffer, convert there
1325         cres = iconv(m2w,
1326                      ICONV_CHAR_CAST(&pszPtr), &inbuf,
1327                      (char**)&bufPtr, &outbuf);
1328         res = n - (outbuf / SIZEOF_WCHAR_T);
1329
1330         if (ms_wcNeedsSwap)
1331         {
1332             // convert to native endianness
1333             WC_BSWAP(buf /* _not_ bufPtr */, res)
1334         }
1335
1336         // NB: iconv was given only strlen(psz) characters on input, and so
1337         //     it couldn't convert the trailing zero. Let's do it ourselves
1338         //     if there's some room left for it in the output buffer.
1339         if (res < n)
1340             buf[res] = 0;
1341     }
1342     else
1343     {
1344         // no destination buffer... convert using temp buffer
1345         // to calculate destination buffer requirement
1346         wchar_t tbuf[8];
1347         res = 0;
1348         do {
1349             bufPtr = tbuf;
1350             outbuf = 8*SIZEOF_WCHAR_T;
1351
1352             cres = iconv(m2w,
1353                          ICONV_CHAR_CAST(&pszPtr), &inbuf,
1354                          (char**)&bufPtr, &outbuf );
1355
1356             res += 8-(outbuf/SIZEOF_WCHAR_T);
1357         } while ((cres==(size_t)-1) && (errno==E2BIG));
1358     }
1359
1360     if (ICONV_FAILED(cres, inbuf))
1361     {
1362         //VS: it is ok if iconv fails, hence trace only
1363         wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1364         return (size_t)-1;
1365     }
1366
1367     return res;
1368 }
1369
1370 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1371 {
1372 #if wxUSE_THREADS
1373     // NB: explained in MB2WC
1374     wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1375 #endif
1376
1377     size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
1378     size_t outbuf = n;
1379     size_t res, cres;
1380
1381     wchar_t *tmpbuf = 0;
1382
1383     if (ms_wcNeedsSwap)
1384     {
1385         // need to copy to temp buffer to switch endianness
1386         // this absolutely doesn't rock!
1387         // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1388         //  could be in read-only memory, or be accessed in some other thread)
1389         tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
1390         memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
1391         WC_BSWAP(tmpbuf, inbuf)
1392         psz=tmpbuf;
1393     }
1394
1395     if (buf)
1396     {
1397         // have destination buffer, convert there
1398         cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1399
1400         res = n-outbuf;
1401
1402         // NB: iconv was given only wcslen(psz) characters on input, and so
1403         //     it couldn't convert the trailing zero. Let's do it ourselves
1404         //     if there's some room left for it in the output buffer.
1405         if (res < n)
1406             buf[0] = 0;
1407     }
1408     else
1409     {
1410         // no destination buffer... convert using temp buffer
1411         // to calculate destination buffer requirement
1412         char tbuf[16];
1413         res = 0;
1414         do {
1415             buf = tbuf; outbuf = 16;
1416
1417             cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1418
1419             res += 16 - outbuf;
1420         } while ((cres==(size_t)-1) && (errno==E2BIG));
1421     }
1422
1423     if (ms_wcNeedsSwap)
1424     {
1425         free(tmpbuf);
1426     }
1427
1428     if (ICONV_FAILED(cres, inbuf))
1429     {
1430         //VS: it is ok if iconv fails, hence trace only
1431         wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1432         return (size_t)-1;
1433     }
1434
1435     return res;
1436 }
1437
1438 #endif // HAVE_ICONV
1439
1440
1441 // ============================================================================
1442 // Win32 conversion classes
1443 // ============================================================================
1444
1445 #ifdef wxHAVE_WIN32_MB2WC
1446
1447 // from utils.cpp
1448 #if wxUSE_FONTMAP
1449 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1450 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1451 #endif
1452
1453 class wxMBConv_win32 : public wxMBConv
1454 {
1455 public:
1456     wxMBConv_win32()
1457     {
1458         m_CodePage = CP_ACP;
1459     }
1460
1461 #if wxUSE_FONTMAP
1462     wxMBConv_win32(const wxChar* name)
1463     {
1464         m_CodePage = wxCharsetToCodepage(name);
1465     }
1466
1467     wxMBConv_win32(wxFontEncoding encoding)
1468     {
1469         m_CodePage = wxEncodingToCodepage(encoding);
1470     }
1471 #endif
1472
1473     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1474     {
1475         // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1476         // the behaviour is not compatible with the Unix version (using iconv)
1477         // and break the library itself, e.g. wxTextInputStream::NextChar()
1478         // wouldn't work if reading an incomplete MB char didn't result in an
1479         // error
1480         const size_t len = ::MultiByteToWideChar
1481                              (
1482                                 m_CodePage,     // code page
1483                                 MB_ERR_INVALID_CHARS, // flags: fall on error
1484                                 psz,            // input string
1485                                 -1,             // its length (NUL-terminated)
1486                                 buf,            // output string
1487                                 buf ? n : 0     // size of output buffer
1488                              );
1489
1490         // note that it returns count of written chars for buf != NULL and size
1491         // of the needed buffer for buf == NULL so in either case the length of
1492         // the string (which never includes the terminating NUL) is one less
1493         return len ? len - 1 : (size_t)-1;
1494     }
1495
1496     size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
1497     {
1498         /*
1499             we have a problem here: by default, WideCharToMultiByte() may
1500             replace characters unrepresentable in the target code page with bad
1501             quality approximations such as turning "1/2" symbol (U+00BD) into
1502             "1" for the code pages which don't have it and we, obviously, want
1503             to avoid this at any price
1504
1505             the trouble is that this function does it _silently_, i.e. it won't
1506             even tell us whether it did or not... Win98/2000 and higher provide
1507             WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1508             we have to resort to a round trip, i.e. check that converting back
1509             results in the same string -- this is, of course, expensive but
1510             otherwise we simply can't be sure to not garble the data.
1511          */
1512
1513         // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1514         // it doesn't work with CJK encodings (which we test for rather roughly
1515         // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1516         // supporting it
1517         BOOL usedDef wxDUMMY_INITIALIZE(false);
1518         BOOL *pUsedDef;
1519         int flags;
1520         if ( CanUseNoBestFit() && m_CodePage < 50000 )
1521         {
1522             // it's our lucky day
1523             flags = WC_NO_BEST_FIT_CHARS;
1524             pUsedDef = &usedDef;
1525         }
1526         else // old system or unsupported encoding
1527         {
1528             flags = 0;
1529             pUsedDef = NULL;
1530         }
1531
1532         const size_t len = ::WideCharToMultiByte
1533                              (
1534                                 m_CodePage,     // code page
1535                                 flags,          // either none or no best fit
1536                                 pwz,            // input string
1537                                 -1,             // it is (wide) NUL-terminated
1538                                 buf,            // output buffer
1539                                 buf ? n : 0,    // and its size
1540                                 NULL,           // default "replacement" char
1541                                 pUsedDef        // [out] was it used?
1542                              );
1543
1544         if ( !len )
1545         {
1546             // function totally failed
1547             return (size_t)-1;
1548         }
1549
1550         // if we were really converting, check if we succeeded
1551         if ( buf )
1552         {
1553             if ( flags )
1554             {
1555                 // check if the conversion failed, i.e. if any replacements
1556                 // were done
1557                 if ( usedDef )
1558                     return (size_t)-1;
1559             }
1560             else // we must resort to double tripping...
1561             {
1562                 wxWCharBuffer wcBuf(n);
1563                 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1564                         wcscmp(wcBuf, pwz) != 0 )
1565                 {
1566                     // we didn't obtain the same thing we started from, hence
1567                     // the conversion was lossy and we consider that it failed
1568                     return (size_t)-1;
1569                 }
1570             }
1571         }
1572
1573         // see the comment above for the reason of "len - 1"
1574         return len - 1;
1575     }
1576
1577     bool IsOk() const { return m_CodePage != -1; }
1578
1579 private:
1580     static bool CanUseNoBestFit()
1581     {
1582         static int s_isWin98Or2k = -1;
1583
1584         if ( s_isWin98Or2k == -1 )
1585         {
1586             int verMaj, verMin;
1587             switch ( wxGetOsVersion(&verMaj, &verMin) )
1588             {
1589                 case wxWIN95:
1590                     s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1591                     break;
1592
1593                 case wxWINDOWS_NT:
1594                     s_isWin98Or2k = verMaj >= 5;
1595                     break;
1596
1597                 default:
1598                     // unknown, be conseravtive by default
1599                     s_isWin98Or2k = 0;
1600             }
1601
1602             wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1603         }
1604
1605         return s_isWin98Or2k == 1;
1606     }
1607
1608     long m_CodePage;
1609 };
1610
1611 #endif // wxHAVE_WIN32_MB2WC
1612
1613 // ============================================================================
1614 // Cocoa conversion classes
1615 // ============================================================================
1616
1617 #if defined(__WXCOCOA__)
1618
1619 // RN:  There is no UTF-32 support in either Core Foundation or
1620 // Cocoa.  Strangely enough, internally Core Foundation uses
1621 // UTF 32 internally quite a bit - its just not public (yet).
1622
1623 #include <CoreFoundation/CFString.h>
1624 #include <CoreFoundation/CFStringEncodingExt.h>
1625
1626 CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
1627 {
1628     CFStringEncoding enc = kCFStringEncodingInvalidId ;
1629     if ( encoding == wxFONTENCODING_DEFAULT )
1630     {
1631         enc = CFStringGetSystemEncoding();
1632     }
1633     else switch( encoding)
1634     {
1635         case wxFONTENCODING_ISO8859_1 :
1636             enc = kCFStringEncodingISOLatin1 ;
1637             break ;
1638         case wxFONTENCODING_ISO8859_2 :
1639             enc = kCFStringEncodingISOLatin2;
1640             break ;
1641         case wxFONTENCODING_ISO8859_3 :
1642             enc = kCFStringEncodingISOLatin3 ;
1643             break ;
1644         case wxFONTENCODING_ISO8859_4 :
1645             enc = kCFStringEncodingISOLatin4;
1646             break ;
1647         case wxFONTENCODING_ISO8859_5 :
1648             enc = kCFStringEncodingISOLatinCyrillic;
1649             break ;
1650         case wxFONTENCODING_ISO8859_6 :
1651             enc = kCFStringEncodingISOLatinArabic;
1652             break ;
1653         case wxFONTENCODING_ISO8859_7 :
1654             enc = kCFStringEncodingISOLatinGreek;
1655             break ;
1656         case wxFONTENCODING_ISO8859_8 :
1657             enc = kCFStringEncodingISOLatinHebrew;
1658             break ;
1659         case wxFONTENCODING_ISO8859_9 :
1660             enc = kCFStringEncodingISOLatin5;
1661             break ;
1662         case wxFONTENCODING_ISO8859_10 :
1663             enc = kCFStringEncodingISOLatin6;
1664             break ;
1665         case wxFONTENCODING_ISO8859_11 :
1666             enc = kCFStringEncodingISOLatinThai;
1667             break ;
1668         case wxFONTENCODING_ISO8859_13 :
1669             enc = kCFStringEncodingISOLatin7;
1670             break ;
1671         case wxFONTENCODING_ISO8859_14 :
1672             enc = kCFStringEncodingISOLatin8;
1673             break ;
1674         case wxFONTENCODING_ISO8859_15 :
1675             enc = kCFStringEncodingISOLatin9;
1676             break ;
1677
1678         case wxFONTENCODING_KOI8 :
1679             enc = kCFStringEncodingKOI8_R;
1680             break ;
1681         case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
1682             enc = kCFStringEncodingDOSRussian;
1683             break ;
1684
1685 //      case wxFONTENCODING_BULGARIAN :
1686 //          enc = ;
1687 //          break ;
1688
1689         case wxFONTENCODING_CP437 :
1690             enc =kCFStringEncodingDOSLatinUS ;
1691             break ;
1692         case wxFONTENCODING_CP850 :
1693             enc = kCFStringEncodingDOSLatin1;
1694             break ;
1695         case wxFONTENCODING_CP852 :
1696             enc = kCFStringEncodingDOSLatin2;
1697             break ;
1698         case wxFONTENCODING_CP855 :
1699             enc = kCFStringEncodingDOSCyrillic;
1700             break ;
1701         case wxFONTENCODING_CP866 :
1702             enc =kCFStringEncodingDOSRussian ;
1703             break ;
1704         case wxFONTENCODING_CP874 :
1705             enc = kCFStringEncodingDOSThai;
1706             break ;
1707         case wxFONTENCODING_CP932 :
1708             enc = kCFStringEncodingDOSJapanese;
1709             break ;
1710         case wxFONTENCODING_CP936 :
1711             enc =kCFStringEncodingDOSChineseSimplif ;
1712             break ;
1713         case wxFONTENCODING_CP949 :
1714             enc = kCFStringEncodingDOSKorean;
1715             break ;
1716         case wxFONTENCODING_CP950 :
1717             enc = kCFStringEncodingDOSChineseTrad;
1718             break ;
1719         case wxFONTENCODING_CP1250 :
1720             enc = kCFStringEncodingWindowsLatin2;
1721             break ;
1722         case wxFONTENCODING_CP1251 :
1723             enc =kCFStringEncodingWindowsCyrillic ;
1724             break ;
1725         case wxFONTENCODING_CP1252 :
1726             enc =kCFStringEncodingWindowsLatin1 ;
1727             break ;
1728         case wxFONTENCODING_CP1253 :
1729             enc = kCFStringEncodingWindowsGreek;
1730             break ;
1731         case wxFONTENCODING_CP1254 :
1732             enc = kCFStringEncodingWindowsLatin5;
1733             break ;
1734         case wxFONTENCODING_CP1255 :
1735             enc =kCFStringEncodingWindowsHebrew ;
1736             break ;
1737         case wxFONTENCODING_CP1256 :
1738             enc =kCFStringEncodingWindowsArabic ;
1739             break ;
1740         case wxFONTENCODING_CP1257 :
1741             enc = kCFStringEncodingWindowsBalticRim;
1742             break ;
1743 //   This only really encodes to UTF7 (if that) evidently
1744 //        case wxFONTENCODING_UTF7 :
1745 //            enc = kCFStringEncodingNonLossyASCII ;
1746 //            break ;
1747         case wxFONTENCODING_UTF8 :
1748             enc = kCFStringEncodingUTF8 ;
1749             break ;
1750         case wxFONTENCODING_EUC_JP :
1751             enc = kCFStringEncodingEUC_JP;
1752             break ;
1753         case wxFONTENCODING_UTF16 :
1754             enc = kCFStringEncodingUnicode ;
1755             break ;
1756         case wxFONTENCODING_MACROMAN :
1757             enc = kCFStringEncodingMacRoman ;
1758             break ;
1759         case wxFONTENCODING_MACJAPANESE :
1760             enc = kCFStringEncodingMacJapanese ;
1761             break ;
1762         case wxFONTENCODING_MACCHINESETRAD :
1763             enc = kCFStringEncodingMacChineseTrad ;
1764             break ;
1765         case wxFONTENCODING_MACKOREAN :
1766             enc = kCFStringEncodingMacKorean ;
1767             break ;
1768         case wxFONTENCODING_MACARABIC :
1769             enc = kCFStringEncodingMacArabic ;
1770             break ;
1771         case wxFONTENCODING_MACHEBREW :
1772             enc = kCFStringEncodingMacHebrew ;
1773             break ;
1774         case wxFONTENCODING_MACGREEK :
1775             enc = kCFStringEncodingMacGreek ;
1776             break ;
1777         case wxFONTENCODING_MACCYRILLIC :
1778             enc = kCFStringEncodingMacCyrillic ;
1779             break ;
1780         case wxFONTENCODING_MACDEVANAGARI :
1781             enc = kCFStringEncodingMacDevanagari ;
1782             break ;
1783         case wxFONTENCODING_MACGURMUKHI :
1784             enc = kCFStringEncodingMacGurmukhi ;
1785             break ;
1786         case wxFONTENCODING_MACGUJARATI :
1787             enc = kCFStringEncodingMacGujarati ;
1788             break ;
1789         case wxFONTENCODING_MACORIYA :
1790             enc = kCFStringEncodingMacOriya ;
1791             break ;
1792         case wxFONTENCODING_MACBENGALI :
1793             enc = kCFStringEncodingMacBengali ;
1794             break ;
1795         case wxFONTENCODING_MACTAMIL :
1796             enc = kCFStringEncodingMacTamil ;
1797             break ;
1798         case wxFONTENCODING_MACTELUGU :
1799             enc = kCFStringEncodingMacTelugu ;
1800             break ;
1801         case wxFONTENCODING_MACKANNADA :
1802             enc = kCFStringEncodingMacKannada ;
1803             break ;
1804         case wxFONTENCODING_MACMALAJALAM :
1805             enc = kCFStringEncodingMacMalayalam ;
1806             break ;
1807         case wxFONTENCODING_MACSINHALESE :
1808             enc = kCFStringEncodingMacSinhalese ;
1809             break ;
1810         case wxFONTENCODING_MACBURMESE :
1811             enc = kCFStringEncodingMacBurmese ;
1812             break ;
1813         case wxFONTENCODING_MACKHMER :
1814             enc = kCFStringEncodingMacKhmer ;
1815             break ;
1816         case wxFONTENCODING_MACTHAI :
1817             enc = kCFStringEncodingMacThai ;
1818             break ;
1819         case wxFONTENCODING_MACLAOTIAN :
1820             enc = kCFStringEncodingMacLaotian ;
1821             break ;
1822         case wxFONTENCODING_MACGEORGIAN :
1823             enc = kCFStringEncodingMacGeorgian ;
1824             break ;
1825         case wxFONTENCODING_MACARMENIAN :
1826             enc = kCFStringEncodingMacArmenian ;
1827             break ;
1828         case wxFONTENCODING_MACCHINESESIMP :
1829             enc = kCFStringEncodingMacChineseSimp ;
1830             break ;
1831         case wxFONTENCODING_MACTIBETAN :
1832             enc = kCFStringEncodingMacTibetan ;
1833             break ;
1834         case wxFONTENCODING_MACMONGOLIAN :
1835             enc = kCFStringEncodingMacMongolian ;
1836             break ;
1837         case wxFONTENCODING_MACETHIOPIC :
1838             enc = kCFStringEncodingMacEthiopic ;
1839             break ;
1840         case wxFONTENCODING_MACCENTRALEUR :
1841             enc = kCFStringEncodingMacCentralEurRoman ;
1842             break ;
1843         case wxFONTENCODING_MACVIATNAMESE :
1844             enc = kCFStringEncodingMacVietnamese ;
1845             break ;
1846         case wxFONTENCODING_MACARABICEXT :
1847             enc = kCFStringEncodingMacExtArabic ;
1848             break ;
1849         case wxFONTENCODING_MACSYMBOL :
1850             enc = kCFStringEncodingMacSymbol ;
1851             break ;
1852         case wxFONTENCODING_MACDINGBATS :
1853             enc = kCFStringEncodingMacDingbats ;
1854             break ;
1855         case wxFONTENCODING_MACTURKISH :
1856             enc = kCFStringEncodingMacTurkish ;
1857             break ;
1858         case wxFONTENCODING_MACCROATIAN :
1859             enc = kCFStringEncodingMacCroatian ;
1860             break ;
1861         case wxFONTENCODING_MACICELANDIC :
1862             enc = kCFStringEncodingMacIcelandic ;
1863             break ;
1864         case wxFONTENCODING_MACROMANIAN :
1865             enc = kCFStringEncodingMacRomanian ;
1866             break ;
1867         case wxFONTENCODING_MACCELTIC :
1868             enc = kCFStringEncodingMacCeltic ;
1869             break ;
1870         case wxFONTENCODING_MACGAELIC :
1871             enc = kCFStringEncodingMacGaelic ;
1872             break ;
1873 //      case wxFONTENCODING_MACKEYBOARD :
1874 //          enc = kCFStringEncodingMacKeyboardGlyphs ;
1875 //          break ;
1876         default :
1877             // because gcc is picky
1878             break ;
1879     } ;
1880     return enc ;
1881 }
1882
1883 class wxMBConv_cocoa : public wxMBConv
1884 {
1885 public:
1886     wxMBConv_cocoa()
1887     {
1888         Init(CFStringGetSystemEncoding()) ;
1889     }
1890
1891     wxMBConv_cocoa(const wxChar* name)
1892     {
1893         Init( wxCFStringEncFromFontEnc(wxFontMapper::Get()->CharsetToEncoding(name, false) ) ) ;
1894     }
1895
1896     wxMBConv_cocoa(wxFontEncoding encoding)
1897     {
1898         Init( wxCFStringEncFromFontEnc(encoding) );
1899     }
1900
1901     ~wxMBConv_cocoa()
1902     {
1903     }
1904
1905     void Init( CFStringEncoding encoding)
1906     {
1907         m_encoding = encoding ;
1908     }
1909
1910     size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
1911     {
1912         wxASSERT(szUnConv);
1913
1914         CFStringRef theString = CFStringCreateWithBytes (
1915                                                 NULL, //the allocator
1916                                                 (const UInt8*)szUnConv,
1917                                                 strlen(szUnConv),
1918                                                 m_encoding,
1919                                                 false //no BOM/external representation
1920                                                 );
1921
1922         wxASSERT(theString);
1923
1924         size_t nOutLength = CFStringGetLength(theString);
1925
1926         if (szOut == NULL)
1927         {
1928             CFRelease(theString);
1929             return nOutLength;
1930         }
1931
1932         CFRange theRange = { 0, nOutSize };
1933
1934 #if SIZEOF_WCHAR_T == 4
1935         UniChar* szUniCharBuffer = new UniChar[nOutSize];
1936 #endif
1937
1938         CFStringGetCharacters(theString, theRange, szUniCharBuffer);
1939
1940         CFRelease(theString);
1941
1942         szUniCharBuffer[nOutLength] = '\0' ;
1943
1944 #if SIZEOF_WCHAR_T == 4
1945         wxMBConvUTF16 converter ;
1946         converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
1947         delete[] szUniCharBuffer;
1948 #endif
1949
1950         return nOutLength;
1951     }
1952
1953     size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
1954     {
1955         wxASSERT(szUnConv);
1956
1957         size_t nRealOutSize;
1958         size_t nBufSize = wxWcslen(szUnConv);
1959         UniChar* szUniBuffer = (UniChar*) szUnConv;
1960
1961 #if SIZEOF_WCHAR_T == 4
1962         wxMBConvUTF16BE converter ;
1963         nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
1964         szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
1965         converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
1966         nBufSize /= sizeof(UniChar);
1967 #endif
1968
1969         CFStringRef theString = CFStringCreateWithCharactersNoCopy(
1970                                 NULL, //allocator
1971                                 szUniBuffer,
1972                                 nBufSize,
1973                                 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
1974                             );
1975
1976         wxASSERT(theString);
1977
1978         //Note that CER puts a BOM when converting to unicode
1979         //so we  check and use getchars instead in that case
1980         if (m_encoding == kCFStringEncodingUnicode)
1981         {
1982             if (szOut != NULL)
1983                 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
1984
1985             nRealOutSize = CFStringGetLength(theString) + 1;
1986         }
1987         else
1988         {
1989             CFStringGetBytes(
1990                 theString,
1991                 CFRangeMake(0, CFStringGetLength(theString)),
1992                 m_encoding,
1993                 0, //what to put in characters that can't be converted -
1994                     //0 tells CFString to return NULL if it meets such a character
1995                 false, //not an external representation
1996                 (UInt8*) szOut,
1997                 nOutSize,
1998                 (CFIndex*) &nRealOutSize
1999                         );
2000         }
2001
2002         CFRelease(theString);
2003
2004 #if SIZEOF_WCHAR_T == 4
2005         delete[] szUniBuffer;
2006 #endif
2007
2008         return  nRealOutSize - 1;
2009     }
2010
2011     bool IsOk() const
2012     {
2013         return m_encoding != kCFStringEncodingInvalidId &&
2014               CFStringIsEncodingAvailable(m_encoding);
2015     }
2016
2017 private:
2018     CFStringEncoding m_encoding ;
2019 };
2020
2021 #endif // defined(__WXCOCOA__)
2022
2023 // ============================================================================
2024 // Mac conversion classes
2025 // ============================================================================
2026
2027 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2028
2029 class wxMBConv_mac : public wxMBConv
2030 {
2031 public:
2032     wxMBConv_mac()
2033     {
2034         Init(CFStringGetSystemEncoding()) ;
2035     }
2036
2037     wxMBConv_mac(const wxChar* name)
2038     {
2039         Init( wxMacGetSystemEncFromFontEnc(wxFontMapper::Get()->CharsetToEncoding(name, false) ) ) ;
2040     }
2041
2042     wxMBConv_mac(wxFontEncoding encoding)
2043     {
2044         Init( wxMacGetSystemEncFromFontEnc(encoding) );
2045     }
2046
2047     ~wxMBConv_mac()
2048     {
2049         OSStatus status = noErr ;
2050         status = TECDisposeConverter(m_MB2WC_converter);
2051         status = TECDisposeConverter(m_WC2MB_converter);
2052     }
2053
2054
2055     void Init( TextEncodingBase encoding)
2056     {
2057         OSStatus status = noErr ;
2058         m_char_encoding = encoding ;
2059         m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2060
2061         status = TECCreateConverter(&m_MB2WC_converter,
2062                                     m_char_encoding,
2063                                     m_unicode_encoding);
2064         status = TECCreateConverter(&m_WC2MB_converter,
2065                                     m_unicode_encoding,
2066                                     m_char_encoding);
2067     }
2068
2069     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2070     {
2071         OSStatus status = noErr ;
2072         ByteCount byteOutLen ;
2073         ByteCount byteInLen = strlen(psz) ;
2074         wchar_t *tbuf = NULL ;
2075         UniChar* ubuf = NULL ;
2076         size_t res = 0 ;
2077
2078         if (buf == NULL)
2079         {
2080             //apple specs say at least 32
2081             n = wxMax( 32 , byteInLen ) ;
2082             tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2083         }
2084         ByteCount byteBufferLen = n * sizeof( UniChar ) ;
2085 #if SIZEOF_WCHAR_T == 4
2086         ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
2087 #else
2088         ubuf = (UniChar*) (buf ? buf : tbuf) ;
2089 #endif
2090         status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2091           (TextPtr) ubuf , byteBufferLen, &byteOutLen);
2092 #if SIZEOF_WCHAR_T == 4
2093         // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2094         // is not properly terminated we get random characters at the end
2095         ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
2096         wxMBConvUTF16BE converter ;
2097         res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2098         free( ubuf ) ;
2099 #else
2100         res = byteOutLen / sizeof( UniChar ) ;
2101 #endif
2102         if ( buf == NULL )
2103              free(tbuf) ;
2104
2105         if ( buf  && res < n)
2106             buf[res] = 0;
2107
2108         return res ;
2109     }
2110
2111     size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2112     {
2113         OSStatus status = noErr ;
2114         ByteCount byteOutLen ;
2115         ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2116
2117         char *tbuf = NULL ;
2118
2119         if (buf == NULL)
2120         {
2121             //apple specs say at least 32
2122             n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2123             tbuf = (char*) malloc( n ) ;
2124         }
2125
2126         ByteCount byteBufferLen = n ;
2127         UniChar* ubuf = NULL ;
2128 #if SIZEOF_WCHAR_T == 4
2129         wxMBConvUTF16BE converter ;
2130         size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2131         byteInLen = unicharlen ;
2132         ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2133         converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
2134 #else
2135         ubuf = (UniChar*) psz ;
2136 #endif
2137         status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2138             (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
2139 #if SIZEOF_WCHAR_T == 4
2140         free( ubuf ) ;
2141 #endif
2142         if ( buf == NULL )
2143             free(tbuf) ;
2144
2145         size_t res = byteOutLen ;
2146         if ( buf  && res < n)
2147         {
2148             buf[res] = 0;
2149
2150             //we need to double-trip to verify it didn't insert any ? in place
2151             //of bogus characters
2152             wxWCharBuffer wcBuf(n);
2153             size_t pszlen = wxWcslen(psz);
2154             if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
2155                         wxWcslen(wcBuf) != pszlen ||
2156                         memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2157             {
2158                 // we didn't obtain the same thing we started from, hence
2159                 // the conversion was lossy and we consider that it failed
2160                 return (size_t)-1;
2161             }
2162         }
2163
2164         return res ;
2165     }
2166
2167     bool IsOk() const
2168         { return m_MB2WC_converter !=  NULL && m_WC2MB_converter != NULL  ; }
2169
2170 private:
2171     TECObjectRef m_MB2WC_converter ;
2172     TECObjectRef m_WC2MB_converter ;
2173
2174     TextEncodingBase m_char_encoding ;
2175     TextEncodingBase m_unicode_encoding ;
2176 };
2177
2178 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2179
2180 // ============================================================================
2181 // wxEncodingConverter based conversion classes
2182 // ============================================================================
2183
2184 #if wxUSE_FONTMAP
2185
2186 class wxMBConv_wxwin : public wxMBConv
2187 {
2188 private:
2189     void Init()
2190     {
2191         m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2192                w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2193     }
2194
2195 public:
2196     // temporarily just use wxEncodingConverter stuff,
2197     // so that it works while a better implementation is built
2198     wxMBConv_wxwin(const wxChar* name)
2199     {
2200         if (name)
2201             m_enc = wxFontMapper::Get()->CharsetToEncoding(name, false);
2202         else
2203             m_enc = wxFONTENCODING_SYSTEM;
2204
2205         Init();
2206     }
2207
2208     wxMBConv_wxwin(wxFontEncoding enc)
2209     {
2210         m_enc = enc;
2211
2212         Init();
2213     }
2214
2215     size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
2216     {
2217         size_t inbuf = strlen(psz);
2218         if (buf)
2219         {
2220             if (!m2w.Convert(psz,buf))
2221                 return (size_t)-1;
2222         }
2223         return inbuf;
2224     }
2225
2226     size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
2227     {
2228         const size_t inbuf = wxWcslen(psz);
2229         if (buf)
2230         {
2231             if (!w2m.Convert(psz,buf))
2232                 return (size_t)-1;
2233         }
2234
2235         return inbuf;
2236     }
2237
2238     bool IsOk() const { return m_ok; }
2239
2240 public:
2241     wxFontEncoding m_enc;
2242     wxEncodingConverter m2w, w2m;
2243
2244     // were we initialized successfully?
2245     bool m_ok;
2246
2247     DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
2248 };
2249
2250 #endif // wxUSE_FONTMAP
2251
2252 // ============================================================================
2253 // wxCSConv implementation
2254 // ============================================================================
2255
2256 void wxCSConv::Init()
2257 {
2258     m_name = NULL;
2259     m_convReal =  NULL;
2260     m_deferred = true;
2261 }
2262
2263 wxCSConv::wxCSConv(const wxChar *charset)
2264 {
2265     Init();
2266
2267     if ( charset )
2268     {
2269         SetName(charset);
2270     }
2271
2272     m_encoding = wxFONTENCODING_SYSTEM;
2273 }
2274
2275 wxCSConv::wxCSConv(wxFontEncoding encoding)
2276 {
2277     if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
2278     {
2279         wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2280
2281         encoding = wxFONTENCODING_SYSTEM;
2282     }
2283
2284     Init();
2285
2286     m_encoding = encoding;
2287 }
2288
2289 wxCSConv::~wxCSConv()
2290 {
2291     Clear();
2292 }
2293
2294 wxCSConv::wxCSConv(const wxCSConv& conv)
2295         : wxMBConv()
2296 {
2297     Init();
2298
2299     SetName(conv.m_name);
2300     m_encoding = conv.m_encoding;
2301 }
2302
2303 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2304 {
2305     Clear();
2306
2307     SetName(conv.m_name);
2308     m_encoding = conv.m_encoding;
2309
2310     return *this;
2311 }
2312
2313 void wxCSConv::Clear()
2314 {
2315     free(m_name);
2316     delete m_convReal;
2317
2318     m_name = NULL;
2319     m_convReal = NULL;
2320 }
2321
2322 void wxCSConv::SetName(const wxChar *charset)
2323 {
2324     if (charset)
2325     {
2326         m_name = wxStrdup(charset);
2327         m_deferred = true;
2328     }
2329 }
2330
2331 wxMBConv *wxCSConv::DoCreate() const
2332 {
2333     // check for the special case of ASCII or ISO8859-1 charset: as we have
2334     // special knowledge of it anyhow, we don't need to create a special
2335     // conversion object
2336     if ( m_encoding == wxFONTENCODING_ISO8859_1 )
2337     {
2338         // don't convert at all
2339         return NULL;
2340     }
2341
2342     // we trust OS to do conversion better than we can so try external
2343     // conversion methods first
2344     //
2345     // the full order is:
2346     //      1. OS conversion (iconv() under Unix or Win32 API)
2347     //      2. hard coded conversions for UTF
2348     //      3. wxEncodingConverter as fall back
2349
2350     // step (1)
2351 #ifdef HAVE_ICONV
2352 #if !wxUSE_FONTMAP
2353     if ( m_name )
2354 #endif // !wxUSE_FONTMAP
2355     {
2356         wxString name(m_name);
2357
2358 #if wxUSE_FONTMAP
2359         if ( name.empty() )
2360             name = wxFontMapper::Get()->GetEncodingName(m_encoding);
2361 #endif // wxUSE_FONTMAP
2362
2363         wxMBConv_iconv *conv = new wxMBConv_iconv(name);
2364         if ( conv->IsOk() )
2365             return conv;
2366
2367         delete conv;
2368     }
2369 #endif // HAVE_ICONV
2370
2371 #ifdef wxHAVE_WIN32_MB2WC
2372     {
2373 #if wxUSE_FONTMAP
2374         wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2375                                       : new wxMBConv_win32(m_encoding);
2376         if ( conv->IsOk() )
2377             return conv;
2378
2379         delete conv;
2380 #else
2381         return NULL;
2382 #endif
2383     }
2384 #endif // wxHAVE_WIN32_MB2WC
2385 #if defined(__WXMAC__)
2386     {
2387         if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ) )
2388         {
2389
2390             wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2391                                         : new wxMBConv_mac(m_encoding);
2392             if ( conv->IsOk() )
2393                  return conv;
2394
2395             delete conv;
2396         }
2397     }
2398 #endif
2399 #if defined(__WXCOCOA__)
2400     {
2401         if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2402         {
2403
2404             wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2405                                           : new wxMBConv_cocoa(m_encoding);
2406             if ( conv->IsOk() )
2407                  return conv;
2408
2409             delete conv;
2410         }
2411     }
2412 #endif
2413     // step (2)
2414     wxFontEncoding enc = m_encoding;
2415 #if wxUSE_FONTMAP
2416     if ( enc == wxFONTENCODING_SYSTEM && m_name )
2417     {
2418         // use "false" to suppress interactive dialogs -- we can be called from
2419         // anywhere and popping up a dialog from here is the last thing we want to
2420         // do
2421         enc = wxFontMapper::Get()->CharsetToEncoding(m_name, false);
2422     }
2423 #endif // wxUSE_FONTMAP
2424
2425     switch ( enc )
2426     {
2427         case wxFONTENCODING_UTF7:
2428              return new wxMBConvUTF7;
2429
2430         case wxFONTENCODING_UTF8:
2431              return new wxMBConvUTF8;
2432
2433         case wxFONTENCODING_UTF16BE:
2434              return new wxMBConvUTF16BE;
2435
2436         case wxFONTENCODING_UTF16LE:
2437              return new wxMBConvUTF16LE;
2438
2439         case wxFONTENCODING_UTF32BE:
2440              return new wxMBConvUTF32BE;
2441
2442         case wxFONTENCODING_UTF32LE:
2443              return new wxMBConvUTF32LE;
2444
2445         default:
2446              // nothing to do but put here to suppress gcc warnings
2447              ;
2448     }
2449
2450     // step (3)
2451 #if wxUSE_FONTMAP
2452     {
2453         wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2454                                       : new wxMBConv_wxwin(m_encoding);
2455         if ( conv->IsOk() )
2456             return conv;
2457
2458         delete conv;
2459     }
2460 #endif // wxUSE_FONTMAP
2461
2462     // NB: This is a hack to prevent deadlock. What could otherwise happen
2463     //     in Unicode build: wxConvLocal creation ends up being here
2464     //     because of some failure and logs the error. But wxLog will try to
2465     //     attach timestamp, for which it will need wxConvLocal (to convert
2466     //     time to char* and then wchar_t*), but that fails, tries to log
2467     //     error, but wxLog has a (already locked) critical section that
2468     //     guards static buffer.
2469     static bool alreadyLoggingError = false;
2470     if (!alreadyLoggingError)
2471     {
2472         alreadyLoggingError = true;
2473         wxLogError(_("Cannot convert from the charset '%s'!"),
2474                    m_name ? m_name
2475                       :
2476 #if wxUSE_FONTMAP
2477                          wxFontMapper::GetEncodingDescription(m_encoding).c_str()
2478 #else // !wxUSE_FONTMAP
2479                          wxString::Format(_("encoding %s"), m_encoding).c_str()
2480 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2481               );
2482         alreadyLoggingError = false;
2483     }
2484
2485     return NULL;
2486 }
2487
2488 void wxCSConv::CreateConvIfNeeded() const
2489 {
2490     if ( m_deferred )
2491     {
2492         wxCSConv *self = (wxCSConv *)this; // const_cast
2493
2494 #if wxUSE_INTL
2495         // if we don't have neither the name nor the encoding, use the default
2496         // encoding for this system
2497         if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2498         {
2499             self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
2500         }
2501 #endif // wxUSE_INTL
2502
2503         self->m_convReal = DoCreate();
2504         self->m_deferred = false;
2505     }
2506 }
2507
2508 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2509 {
2510     CreateConvIfNeeded();
2511
2512     if (m_convReal)
2513         return m_convReal->MB2WC(buf, psz, n);
2514
2515     // latin-1 (direct)
2516     size_t len = strlen(psz);
2517
2518     if (buf)
2519     {
2520         for (size_t c = 0; c <= len; c++)
2521             buf[c] = (unsigned char)(psz[c]);
2522     }
2523
2524     return len;
2525 }
2526
2527 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2528 {
2529     CreateConvIfNeeded();
2530
2531     if (m_convReal)
2532         return m_convReal->WC2MB(buf, psz, n);
2533
2534     // latin-1 (direct)
2535     const size_t len = wxWcslen(psz);
2536     if (buf)
2537     {
2538         for (size_t c = 0; c <= len; c++)
2539         {
2540             if (psz[c] > 0xFF)
2541                 return (size_t)-1;
2542             buf[c] = (char)psz[c];
2543         }
2544     }
2545     else
2546     {
2547         for (size_t c = 0; c <= len; c++)
2548         {
2549             if (psz[c] > 0xFF)
2550                 return (size_t)-1;
2551         }
2552     }
2553
2554     return len;
2555 }
2556
2557 // ----------------------------------------------------------------------------
2558 // globals
2559 // ----------------------------------------------------------------------------
2560
2561 #ifdef __WINDOWS__
2562     static wxMBConv_win32 wxConvLibcObj;
2563 #elif defined(__WXMAC__) && !defined(__MACH__)
2564     static wxMBConv_mac wxConvLibcObj ;
2565 #else
2566     static wxMBConvLibc wxConvLibcObj;
2567 #endif
2568
2569 static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
2570 static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
2571 static wxMBConvUTF7 wxConvUTF7Obj;
2572 static wxMBConvUTF8 wxConvUTF8Obj;
2573
2574
2575 WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
2576 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
2577 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
2578 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
2579 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
2580 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
2581
2582 #else // !wxUSE_WCHAR_T
2583
2584 // stand-ins in absence of wchar_t
2585 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
2586                                 wxConvISO8859_1,
2587                                 wxConvLocal,
2588                                 wxConvUTF8;
2589
2590 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
2591
2592