src/common/strconv.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        strconv.cpp
   3 // Purpose:     Unicode conversion classes
   4 // Author:      Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
   5 //              Ryan Norton, Fredrik Roubert (UTF7)
   6 // Modified by:
   7 // Created:     29/01/98
   8 // RCS-ID:      $Id$
   9 // Copyright:   (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
  10 //              (c) 2000-2003 Vadim Zeitlin
  11 //              (c) 2004 Ryan Norton, Fredrik Roubert
  12 // Licence:     wxWindows licence
  13 /////////////////////////////////////////////////////////////////////////////
  14
  15 // ============================================================================
  16 // declarations
  17 // ============================================================================
  18
  19 // ----------------------------------------------------------------------------
  20 // headers
  21 // ----------------------------------------------------------------------------
  22
  23 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
  24   #pragma implementation "strconv.h"
  25 #endif
  26
  27 // For compilers that support precompilation, includes "wx.h".
  28 #include "wx/wxprec.h"
  29
  30 #ifdef __BORLANDC__
  31   #pragma hdrstop
  32 #endif
  33
  34 #ifndef WX_PRECOMP
  35     #include "wx/intl.h"
  36     #include "wx/log.h"
  37 #endif // WX_PRECOMP
  38
  39 #include "wx/strconv.h"
  40
  41 #if wxUSE_WCHAR_T
  42
  43 #ifdef __WXMSW__
  44     #include "wx/msw/private.h"
  45 #endif
  46
  47 #ifdef __WINDOWS__
  48     #include "wx/msw/missing.h"
  49 #endif
  50
  51 #ifndef __WXWINCE__
  52 #include <errno.h>
  53 #endif
  54
  55 #include <ctype.h>
  56 #include <string.h>
  57 #include <stdlib.h>
  58
  59 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
  60     #define wxHAVE_WIN32_MB2WC
  61 #endif // __WIN32__ but !__WXMICROWIN__
  62
  63 // ----------------------------------------------------------------------------
  64 // headers
  65 // ----------------------------------------------------------------------------
  66
  67 #ifdef __SALFORDC__
  68     #include <clib.h>
  69 #endif
  70
  71 #ifdef HAVE_ICONV
  72     #include <iconv.h>
  73 #endif
  74
  75 #include "wx/encconv.h"
  76 #include "wx/fontmap.h"
  77 #include "wx/utils.h"
  78
  79 #ifdef __WXMAC__
  80 #include <ATSUnicode.h>
  81 #include <TextCommon.h>
  82 #include <TextEncodingConverter.h>
  83
  84 #include  "wx/mac/private.h"  // includes mac headers
  85 #endif
  86 // ----------------------------------------------------------------------------
  87 // macros
  88 // ----------------------------------------------------------------------------
  89
  90 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
  91 #define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
  92
  93 #if SIZEOF_WCHAR_T == 4
  94     #define WC_NAME         "UCS4"
  95     #define WC_BSWAP         BSWAP_UCS4
  96     #ifdef WORDS_BIGENDIAN
  97       #define WC_NAME_BEST  "UCS-4BE"
  98     #else
  99       #define WC_NAME_BEST  "UCS-4LE"
 100     #endif
 101 #elif SIZEOF_WCHAR_T == 2
 102     #define WC_NAME         "UTF16"
 103     #define WC_BSWAP         BSWAP_UTF16
 104     #define WC_UTF16
 105     #ifdef WORDS_BIGENDIAN
 106       #define WC_NAME_BEST  "UTF-16BE"
 107     #else
 108       #define WC_NAME_BEST  "UTF-16LE"
 109     #endif
 110 #else // sizeof(wchar_t) != 2 nor 4
 111     // does this ever happen?
 112     #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
 113 #endif
 114
 115 // ============================================================================
 116 // implementation
 117 // ============================================================================
 118
 119 // ----------------------------------------------------------------------------
 120 // UTF-16 en/decoding to/from UCS-4
 121 // ----------------------------------------------------------------------------
 122
 123
 124 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
 125 {
 126     if (input<=0xffff)
 127     {
 128         if (output)
 129             *output = (wxUint16) input;
 130         return 1;
 131     }
 132     else if (input>=0x110000)
 133     {
 134         return (size_t)-1;
 135     }
 136     else
 137     {
 138         if (output)
 139         {
 140             *output++ = (wxUint16) ((input >> 10)+0xd7c0);
 141             *output = (wxUint16) ((input&0x3ff)+0xdc00);
 142         }
 143         return 2;
 144     }
 145 }
 146
 147 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
 148 {
 149     if ((*input<0xd800) || (*input>0xdfff))
 150     {
 151         output = *input;
 152         return 1;
 153     }
 154     else if ((input[1]<0xdc00) || (input[1]>=0xdfff))
 155     {
 156         output = *input;
 157         return (size_t)-1;
 158     }
 159     else
 160     {
 161         output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
 162         return 2;
 163     }
 164 }
 165
 166
 167 // ----------------------------------------------------------------------------
 168 // wxMBConv
 169 // ----------------------------------------------------------------------------
 170
 171 wxMBConv::~wxMBConv()
 172 {
 173     // nothing to do here (necessary for Darwin linking probably)
 174 }
 175
 176 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
 177 {
 178     if ( psz )
 179     {
 180         // calculate the length of the buffer needed first
 181         size_t nLen = MB2WC(NULL, psz, 0);
 182         if ( nLen != (size_t)-1 )
 183         {
 184             // now do the actual conversion
 185             wxWCharBuffer buf(nLen);
 186             nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
 187             if ( nLen != (size_t)-1 )
 188             {
 189                 return buf;
 190             }
 191         }
 192     }
 193
 194     wxWCharBuffer buf((wchar_t *)NULL);
 195
 196     return buf;
 197 }
 198
 199 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
 200 {
 201     if ( pwz )
 202     {
 203         size_t nLen = WC2MB(NULL, pwz, 0);
 204         if ( nLen != (size_t)-1 )
 205         {
 206             wxCharBuffer buf(nLen+3);       // space for a wxUint32 trailing zero
 207             nLen = WC2MB(buf.data(), pwz, nLen + 4);
 208             if ( nLen != (size_t)-1 )
 209             {
 210                 return buf;
 211             }
 212         }
 213     }
 214
 215     wxCharBuffer buf((char *)NULL);
 216
 217     return buf;
 218 }
 219
 220 size_t wxMBConv::MB2WC(wchar_t* szBuffer, const char* szString,
 221                        size_t outsize, size_t nStringLen) const
 222 {
 223     const char* szEnd = szString + nStringLen + 1;
 224     const char* szPos = szString;
 225     const char* szStart = szPos;
 226
 227     size_t nActualLength = 0;
 228
 229     //Convert the string until the length() is reached, continuing the
 230     //loop every time a null character is reached
 231     while(szPos != szEnd)
 232     {
 233         wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
 234
 235         //Get the length of the current (sub)string
 236         size_t nLen = MB2WC(NULL, szPos, 0);
 237
 238         //Invalid conversion?
 239         if( nLen == (size_t)-1 )
 240             return nLen;
 241
 242         //Increase the actual length (+1 for current null character)
 243         nActualLength += nLen + 1;
 244
 245         //Only copy data in if buffer size is big enough
 246         if (szBuffer != NULL &&
 247             nActualLength <= outsize)
 248         {
 249             //Convert the current (sub)string
 250             if ( MB2WC(&szBuffer[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
 251                 return (size_t)-1;
 252         }
 253
 254         //Increment to next (sub)string
 255         //Note that we have to use strlen here instead of nLen
 256         //here because XX2XX gives us the size of the output buffer,
 257         //not neccessarly the length of the string
 258         szPos += strlen(szPos) + 1;
 259     }
 260
 261     return nActualLength - 1; //success - return actual length
 262 }
 263
 264 size_t wxMBConv::WC2MB(char* szBuffer, const wchar_t* szString,
 265                        size_t outsize, size_t nStringLen) const
 266 {
 267     const wchar_t* szEnd = szString + nStringLen + 1;
 268     const wchar_t* szPos = szString;
 269     const wchar_t* szStart = szPos;
 270
 271     size_t nActualLength = 0;
 272
 273     //Convert the string until the length() is reached, continuing the
 274     //loop every time a null character is reached
 275     while(szPos != szEnd)
 276     {
 277         wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
 278
 279         //Get the length of the current (sub)string
 280         size_t nLen = WC2MB(NULL, szPos, 0);
 281
 282         //Invalid conversion?
 283         if( nLen == (size_t)-1 )
 284             return nLen;
 285
 286         //Increase the actual length (+1 for current null character)
 287         nActualLength += nLen + 1;
 288
 289         //Only copy data in if buffer size is big enough
 290         if (szBuffer != NULL &&
 291             nActualLength <= outsize)
 292         {
 293             //Convert the current (sub)string
 294             if(WC2MB(&szBuffer[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
 295                 return (size_t)-1;
 296         }
 297
 298         //Increment to next (sub)string
 299         //Note that we have to use wxWcslen here instead of nLen
 300         //here because XX2XX gives us the size of the output buffer,
 301         //not neccessarly the length of the string
 302         szPos += wxWcslen(szPos) + 1;
 303     }
 304
 305     return nActualLength - 1;  //success - return actual length
 306 }
 307
 308 // ----------------------------------------------------------------------------
 309 // wxMBConvLibc
 310 // ----------------------------------------------------------------------------
 311
 312 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 313 {
 314     return wxMB2WC(buf, psz, n);
 315 }
 316
 317 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 318 {
 319     return wxWC2MB(buf, psz, n);
 320 }
 321 // ----------------------------------------------------------------------------
 322 // UTF-7
 323 // ----------------------------------------------------------------------------
 324
 325 // Implementation (C) 2004 Fredrik Roubert
 326
 327 //
 328 // BASE64 decoding table
 329 //
 330 static const unsigned char utf7unb64[] =
 331 {
 332     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 333     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 334     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 335     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 336     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 337     0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
 338     0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
 339     0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 340     0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
 341     0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
 342     0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
 343     0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
 344     0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
 345     0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
 346     0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
 347     0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
 348     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 349     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 350     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 351     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 352     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 353     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 354     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 355     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 356     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 357     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 358     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 359     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 360     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 361     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 362     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 363     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 364 };
 365
 366 size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 367 {
 368
 369     size_t len = 0;
 370
 371     while (*psz && ((!buf) || (len < n)))
 372     {
 373         unsigned char cc = *psz++;
 374         if (cc != '+')
 375         {
 376             // plain ASCII char
 377             if (buf)
 378                 *buf++ = cc;
 379             len++;
 380         }
 381         else if (*psz == '-')
 382         {
 383             // encoded plus sign
 384             if (buf)
 385                 *buf++ = cc;
 386             len++;
 387             psz++;
 388         }
 389         else
 390         {
 391             // BASE64 encoded string
 392             bool lsb;
 393             unsigned char c;
 394             unsigned int d, l;
 395             for (lsb = false, d = 0, l = 0;
 396                 (cc = utf7unb64[(unsigned char)*psz]) != 0xff; psz++)
 397             {
 398                 d <<= 6;
 399                 d += cc;
 400                 for (l += 6; l >= 8; lsb = !lsb)
 401                 {
 402                     c = (d >> (l -= 8)) % 256;
 403                     if (lsb)
 404                     {
 405                         if (buf)
 406                             *buf++ |= c;
 407                         len ++;
 408                     }
 409                     else
 410                         if (buf)
 411                             *buf = c << 8;
 412                 }
 413             }
 414             if (*psz == '-')
 415                 psz++;
 416         }
 417     }
 418     if (buf && (len < n))
 419         *buf = 0;
 420     return len;
 421 }
 422
 423 //
 424 // BASE64 encoding table
 425 //
 426 static const unsigned char utf7enb64[] =
 427 {
 428     'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
 429     'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
 430     'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
 431     'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
 432     'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
 433     'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
 434     'w', 'x', 'y', 'z', '0', '1', '2', '3',
 435     '4', '5', '6', '7', '8', '9', '+', '/'
 436 };
 437
 438 //
 439 // UTF-7 encoding table
 440 //
 441 // 0 - Set D (directly encoded characters)
 442 // 1 - Set O (optional direct characters)
 443 // 2 - whitespace characters (optional)
 444 // 3 - special characters
 445 //
 446 static const unsigned char utf7encode[128] =
 447 {
 448     3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
 449     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 450     2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
 451     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
 452     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 453     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
 454     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 455     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
 456 };
 457
 458 size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t
 459 *psz, size_t n) const
 460 {
 461
 462
 463     size_t len = 0;
 464
 465     while (*psz && ((!buf) || (len < n)))
 466     {
 467         wchar_t cc = *psz++;
 468         if (cc < 0x80 && utf7encode[cc] < 1)
 469         {
 470             // plain ASCII char
 471             if (buf)
 472                 *buf++ = (char)cc;
 473             len++;
 474         }
 475 #ifndef WC_UTF16
 476 #ifdef __VMS
 477        else if (cc > 0xffff)
 478 #else
 479        else if (cc > ((const wchar_t)0xffff))
 480 #endif
 481          {
 482             // no surrogate pair generation (yet?)
 483             return (size_t)-1;
 484         }
 485 #endif
 486         else
 487         {
 488             if (buf)
 489                 *buf++ = '+';
 490             len++;
 491             if (cc != '+')
 492             {
 493                 // BASE64 encode string
 494                 unsigned int lsb, d, l;
 495                 for (d = 0, l = 0;; psz++)
 496                 {
 497                     for (lsb = 0; lsb < 2; lsb ++)
 498                     {
 499                         d <<= 8;
 500                         d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
 501
 502                         for (l += 8; l >= 6; )
 503                         {
 504                             l -= 6;
 505                             if (buf)
 506                                 *buf++ = utf7enb64[(d >> l) % 64];
 507                             len++;
 508                         }
 509                     }
 510                     cc = *psz;
 511                     if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
 512                         break;
 513                 }
 514                 if (l != 0)
 515                 {
 516                     if (buf)
 517                         *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
 518                     len++;
 519                 }
 520             }
 521             if (buf)
 522                 *buf++ = '-';
 523             len++;
 524         }
 525     }
 526     if (buf && (len < n))
 527         *buf = 0;
 528     return len;
 529 }
 530
 531 // ----------------------------------------------------------------------------
 532 // UTF-8
 533 // ----------------------------------------------------------------------------
 534
 535 static wxUint32 utf8_max[]=
 536     { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
 537
 538 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 539 {
 540     size_t len = 0;
 541
 542     while (*psz && ((!buf) || (len < n)))
 543     {
 544         unsigned char cc = *psz++, fc = cc;
 545         unsigned cnt;
 546         for (cnt = 0; fc & 0x80; cnt++)
 547             fc <<= 1;
 548         if (!cnt)
 549         {
 550             // plain ASCII char
 551             if (buf)
 552                 *buf++ = cc;
 553             len++;
 554         }
 555         else
 556         {
 557             cnt--;
 558             if (!cnt)
 559             {
 560                 // invalid UTF-8 sequence
 561                 return (size_t)-1;
 562             }
 563             else
 564             {
 565                 unsigned ocnt = cnt - 1;
 566                 wxUint32 res = cc & (0x3f >> cnt);
 567                 while (cnt--)
 568                 {
 569                     cc = *psz++;
 570                     if ((cc & 0xC0) != 0x80)
 571                     {
 572                         // invalid UTF-8 sequence
 573                         return (size_t)-1;
 574                     }
 575                     res = (res << 6) | (cc & 0x3f);
 576                 }
 577                 if (res <= utf8_max[ocnt])
 578                 {
 579                     // illegal UTF-8 encoding
 580                     return (size_t)-1;
 581                 }
 582 #ifdef WC_UTF16
 583                 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 584                 size_t pa = encode_utf16(res, (wxUint16 *)buf);
 585                 if (pa == (size_t)-1)
 586                   return (size_t)-1;
 587                 if (buf)
 588                     buf += pa;
 589                 len += pa;
 590 #else // !WC_UTF16
 591                 if (buf)
 592                     *buf++ = res;
 593                 len++;
 594 #endif // WC_UTF16/!WC_UTF16
 595             }
 596         }
 597     }
 598     if (buf && (len < n))
 599         *buf = 0;
 600     return len;
 601 }
 602
 603 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 604 {
 605     size_t len = 0;
 606
 607     while (*psz && ((!buf) || (len < n)))
 608     {
 609         wxUint32 cc;
 610 #ifdef WC_UTF16
 611         // cast is ok for WC_UTF16
 612         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
 613         psz += (pa == (size_t)-1) ? 1 : pa;
 614 #else
 615         cc=(*psz++) & 0x7fffffff;
 616 #endif
 617         unsigned cnt;
 618         for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
 619         if (!cnt)
 620         {
 621             // plain ASCII char
 622             if (buf)
 623                 *buf++ = (char) cc;
 624             len++;
 625         }
 626
 627         else
 628         {
 629             len += cnt + 1;
 630             if (buf)
 631             {
 632                 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
 633                 while (cnt--)
 634                     *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
 635             }
 636         }
 637     }
 638
 639     if (buf && (len<n)) *buf = 0;
 640
 641     return len;
 642 }
 643
 644
 645
 646
 647 // ----------------------------------------------------------------------------
 648 // UTF-16
 649 // ----------------------------------------------------------------------------
 650
 651 #ifdef WORDS_BIGENDIAN
 652     #define wxMBConvUTF16straight wxMBConvUTF16BE
 653     #define wxMBConvUTF16swap     wxMBConvUTF16LE
 654 #else
 655     #define wxMBConvUTF16swap     wxMBConvUTF16BE
 656     #define wxMBConvUTF16straight wxMBConvUTF16LE
 657 #endif
 658
 659
 660 #ifdef WC_UTF16
 661
 662 // copy 16bit MB to 16bit String
 663 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 664 {
 665     size_t len=0;
 666
 667     while (*(wxUint16*)psz && (!buf || len < n))
 668     {
 669         if (buf)
 670             *buf++ = *(wxUint16*)psz;
 671         len++;
 672
 673         psz += sizeof(wxUint16);
 674     }
 675     if (buf && len<n)   *buf=0;
 676
 677     return len;
 678 }
 679
 680
 681 // copy 16bit String to 16bit MB
 682 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 683 {
 684     size_t len=0;
 685
 686     while (*psz && (!buf || len < n))
 687     {
 688         if (buf)
 689         {
 690             *(wxUint16*)buf = *psz;
 691             buf += sizeof(wxUint16);
 692         }
 693         len += sizeof(wxUint16);
 694         psz++;
 695     }
 696     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 697
 698     return len;
 699 }
 700
 701
 702 // swap 16bit MB to 16bit String
 703 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 704 {
 705     size_t len=0;
 706
 707     while (*(wxUint16*)psz && (!buf || len < n))
 708     {
 709         if (buf)
 710         {
 711             ((char *)buf)[0] = psz[1];
 712             ((char *)buf)[1] = psz[0];
 713             buf++;
 714         }
 715         len++;
 716         psz += sizeof(wxUint16);
 717     }
 718     if (buf && len<n)   *buf=0;
 719
 720     return len;
 721 }
 722
 723
 724 // swap 16bit MB to 16bit String
 725 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 726 {
 727     size_t len=0;
 728
 729     while (*psz && (!buf || len < n))
 730     {
 731         if (buf)
 732         {
 733             *buf++ = ((char*)psz)[1];
 734             *buf++ = ((char*)psz)[0];
 735         }
 736         len += sizeof(wxUint16);
 737         psz++;
 738     }
 739     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 740
 741     return len;
 742 }
 743
 744
 745 #else // WC_UTF16
 746
 747
 748 // copy 16bit MB to 32bit String
 749 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 750 {
 751     size_t len=0;
 752
 753     while (*(wxUint16*)psz && (!buf || len < n))
 754     {
 755         wxUint32 cc;
 756         size_t pa=decode_utf16((wxUint16*)psz, cc);
 757         if (pa == (size_t)-1)
 758             return pa;
 759
 760         if (buf)
 761             *buf++ = cc;
 762         len++;
 763         psz += pa * sizeof(wxUint16);
 764     }
 765     if (buf && len<n)   *buf=0;
 766
 767     return len;
 768 }
 769
 770
 771 // copy 32bit String to 16bit MB
 772 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 773 {
 774     size_t len=0;
 775
 776     while (*psz && (!buf || len < n))
 777     {
 778         wxUint16 cc[2];
 779         size_t pa=encode_utf16(*psz, cc);
 780
 781         if (pa == (size_t)-1)
 782             return pa;
 783
 784         if (buf)
 785         {
 786             *(wxUint16*)buf = cc[0];
 787             buf += sizeof(wxUint16);
 788             if (pa > 1)
 789             {
 790                 *(wxUint16*)buf = cc[1];
 791                 buf += sizeof(wxUint16);
 792             }
 793         }
 794
 795         len += pa*sizeof(wxUint16);
 796         psz++;
 797     }
 798     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 799
 800     return len;
 801 }
 802
 803
 804 // swap 16bit MB to 32bit String
 805 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 806 {
 807     size_t len=0;
 808
 809     while (*(wxUint16*)psz && (!buf || len < n))
 810     {
 811         wxUint32 cc;
 812         char tmp[4];
 813         tmp[0]=psz[1];  tmp[1]=psz[0];
 814         tmp[2]=psz[3];  tmp[3]=psz[2];
 815
 816         size_t pa=decode_utf16((wxUint16*)tmp, cc);
 817         if (pa == (size_t)-1)
 818             return pa;
 819
 820         if (buf)
 821             *buf++ = cc;
 822
 823         len++;
 824         psz += pa * sizeof(wxUint16);
 825     }
 826     if (buf && len<n)   *buf=0;
 827
 828     return len;
 829 }
 830
 831
 832 // swap 32bit String to 16bit MB
 833 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 834 {
 835     size_t len=0;
 836
 837     while (*psz && (!buf || len < n))
 838     {
 839         wxUint16 cc[2];
 840         size_t pa=encode_utf16(*psz, cc);
 841
 842         if (pa == (size_t)-1)
 843             return pa;
 844
 845         if (buf)
 846         {
 847             *buf++ = ((char*)cc)[1];
 848             *buf++ = ((char*)cc)[0];
 849             if (pa > 1)
 850             {
 851                 *buf++ = ((char*)cc)[3];
 852                 *buf++ = ((char*)cc)[2];
 853             }
 854         }
 855
 856         len += pa*sizeof(wxUint16);
 857         psz++;
 858     }
 859     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 860
 861     return len;
 862 }
 863
 864 #endif // WC_UTF16
 865
 866
 867 // ----------------------------------------------------------------------------
 868 // UTF-32
 869 // ----------------------------------------------------------------------------
 870
 871 #ifdef WORDS_BIGENDIAN
 872 #define wxMBConvUTF32straight  wxMBConvUTF32BE
 873 #define wxMBConvUTF32swap      wxMBConvUTF32LE
 874 #else
 875 #define wxMBConvUTF32swap      wxMBConvUTF32BE
 876 #define wxMBConvUTF32straight  wxMBConvUTF32LE
 877 #endif
 878
 879
 880 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
 881 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
 882
 883
 884 #ifdef WC_UTF16
 885
 886 // copy 32bit MB to 16bit String
 887 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 888 {
 889     size_t len=0;
 890
 891     while (*(wxUint32*)psz && (!buf || len < n))
 892     {
 893         wxUint16 cc[2];
 894
 895         size_t pa=encode_utf16(*(wxUint32*)psz, cc);
 896         if (pa == (size_t)-1)
 897             return pa;
 898
 899         if (buf)
 900         {
 901             *buf++ = cc[0];
 902             if (pa > 1)
 903                 *buf++ = cc[1];
 904         }
 905         len += pa;
 906         psz += sizeof(wxUint32);
 907     }
 908     if (buf && len<n)   *buf=0;
 909
 910     return len;
 911 }
 912
 913
 914 // copy 16bit String to 32bit MB
 915 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 916 {
 917     size_t len=0;
 918
 919     while (*psz && (!buf || len < n))
 920     {
 921         wxUint32 cc;
 922
 923         // cast is ok for WC_UTF16
 924         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
 925         if (pa == (size_t)-1)
 926             return pa;
 927
 928         if (buf)
 929         {
 930             *(wxUint32*)buf = cc;
 931             buf += sizeof(wxUint32);
 932         }
 933         len += sizeof(wxUint32);
 934         psz += pa;
 935     }
 936
 937     if (buf && len<=n-sizeof(wxUint32))
 938         *(wxUint32*)buf=0;
 939
 940     return len;
 941 }
 942
 943
 944
 945 // swap 32bit MB to 16bit String
 946 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 947 {
 948     size_t len=0;
 949
 950     while (*(wxUint32*)psz && (!buf || len < n))
 951     {
 952         char tmp[4];
 953         tmp[0] = psz[3];   tmp[1] = psz[2];
 954         tmp[2] = psz[1];   tmp[3] = psz[0];
 955
 956
 957         wxUint16 cc[2];
 958
 959         size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
 960         if (pa == (size_t)-1)
 961             return pa;
 962
 963         if (buf)
 964         {
 965             *buf++ = cc[0];
 966             if (pa > 1)
 967                 *buf++ = cc[1];
 968         }
 969         len += pa;
 970         psz += sizeof(wxUint32);
 971     }
 972
 973     if (buf && len<n)
 974         *buf=0;
 975
 976     return len;
 977 }
 978
 979
 980 // swap 16bit String to 32bit MB
 981 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 982 {
 983     size_t len=0;
 984
 985     while (*psz && (!buf || len < n))
 986     {
 987         char cc[4];
 988
 989         // cast is ok for WC_UTF16
 990         size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
 991         if (pa == (size_t)-1)
 992             return pa;
 993
 994         if (buf)
 995         {
 996             *buf++ = cc[3];
 997             *buf++ = cc[2];
 998             *buf++ = cc[1];
 999             *buf++ = cc[0];
1000         }
1001         len += sizeof(wxUint32);
1002         psz += pa;
1003     }
1004
1005     if (buf && len<=n-sizeof(wxUint32))
1006         *(wxUint32*)buf=0;
1007
1008     return len;
1009 }
1010
1011 #else // WC_UTF16
1012
1013
1014 // copy 32bit MB to 32bit String
1015 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1016 {
1017     size_t len=0;
1018
1019     while (*(wxUint32*)psz && (!buf || len < n))
1020     {
1021         if (buf)
1022             *buf++ = *(wxUint32*)psz;
1023         len++;
1024         psz += sizeof(wxUint32);
1025     }
1026
1027     if (buf && len<n)
1028         *buf=0;
1029
1030     return len;
1031 }
1032
1033
1034 // copy 32bit String to 32bit MB
1035 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1036 {
1037     size_t len=0;
1038
1039     while (*psz && (!buf || len < n))
1040     {
1041         if (buf)
1042         {
1043             *(wxUint32*)buf = *psz;
1044             buf += sizeof(wxUint32);
1045         }
1046
1047         len += sizeof(wxUint32);
1048         psz++;
1049     }
1050
1051     if (buf && len<=n-sizeof(wxUint32))
1052         *(wxUint32*)buf=0;
1053
1054     return len;
1055 }
1056
1057
1058 // swap 32bit MB to 32bit String
1059 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1060 {
1061     size_t len=0;
1062
1063     while (*(wxUint32*)psz && (!buf || len < n))
1064     {
1065         if (buf)
1066         {
1067             ((char *)buf)[0] = psz[3];
1068             ((char *)buf)[1] = psz[2];
1069             ((char *)buf)[2] = psz[1];
1070             ((char *)buf)[3] = psz[0];
1071             buf++;
1072         }
1073         len++;
1074         psz += sizeof(wxUint32);
1075     }
1076
1077     if (buf && len<n)
1078         *buf=0;
1079
1080     return len;
1081 }
1082
1083
1084 // swap 32bit String to 32bit MB
1085 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1086 {
1087     size_t len=0;
1088
1089     while (*psz && (!buf || len < n))
1090     {
1091         if (buf)
1092         {
1093             *buf++ = ((char *)psz)[3];
1094             *buf++ = ((char *)psz)[2];
1095             *buf++ = ((char *)psz)[1];
1096             *buf++ = ((char *)psz)[0];
1097         }
1098         len += sizeof(wxUint32);
1099         psz++;
1100     }
1101
1102     if (buf && len<=n-sizeof(wxUint32))
1103         *(wxUint32*)buf=0;
1104
1105     return len;
1106 }
1107
1108
1109 #endif // WC_UTF16
1110
1111
1112 // ============================================================================
1113 // The classes doing conversion using the iconv_xxx() functions
1114 // ============================================================================
1115
1116 #ifdef HAVE_ICONV
1117
1118 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with E2BIG
1119 //     if output buffer is _exactly_ as big as needed. Such case is (unless there's
1120 //     yet another bug in glibc) the only case when iconv() returns with (size_t)-1
1121 //     (which means error) and says there are 0 bytes left in the input buffer --
1122 //     when _real_ error occurs, bytes-left-in-input buffer is non-zero. Hence,
1123 //     this alternative test for iconv() failure.
1124 //     [This bug does not appear in glibc 2.2.]
1125 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1126 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1127                                      (errno != E2BIG || bufLeft != 0))
1128 #else
1129 #define ICONV_FAILED(cres, bufLeft)  (cres == (size_t)-1)
1130 #endif
1131
1132 #define ICONV_CHAR_CAST(x)  ((ICONV_CONST char **)(x))
1133
1134 // ----------------------------------------------------------------------------
1135 // wxMBConv_iconv: encapsulates an iconv character set
1136 // ----------------------------------------------------------------------------
1137
1138 class wxMBConv_iconv : public wxMBConv
1139 {
1140 public:
1141     wxMBConv_iconv(const wxChar *name);
1142     virtual ~wxMBConv_iconv();
1143
1144     virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1145     virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
1146
1147     bool IsOk() const
1148         { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
1149
1150 protected:
1151     // the iconv handlers used to translate from multibyte to wide char and in
1152     // the other direction
1153     iconv_t m2w,
1154             w2m;
1155
1156 private:
1157     // the name (for iconv_open()) of a wide char charset -- if none is
1158     // available on this machine, it will remain NULL
1159     static const char *ms_wcCharsetName;
1160
1161     // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1162     // different endian-ness than the native one
1163     static bool ms_wcNeedsSwap;
1164 };
1165
1166 const char *wxMBConv_iconv::ms_wcCharsetName = NULL;
1167 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
1168
1169 wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
1170 {
1171     // Do it the hard way
1172     char cname[100];
1173     for (size_t i = 0; i < wxStrlen(name)+1; i++)
1174         cname[i] = (char) name[i];
1175
1176     // check for charset that represents wchar_t:
1177     if (ms_wcCharsetName == NULL)
1178     {
1179         ms_wcNeedsSwap = false;
1180
1181         // try charset with explicit bytesex info (e.g. "UCS-4LE"):
1182         ms_wcCharsetName = WC_NAME_BEST;
1183         m2w = iconv_open(ms_wcCharsetName, cname);
1184
1185         if (m2w == (iconv_t)-1)
1186         {
1187             // try charset w/o bytesex info (e.g. "UCS4")
1188             // and check for bytesex ourselves:
1189             ms_wcCharsetName = WC_NAME;
1190             m2w = iconv_open(ms_wcCharsetName, cname);
1191
1192             // last bet, try if it knows WCHAR_T pseudo-charset
1193             if (m2w == (iconv_t)-1)
1194             {
1195                 ms_wcCharsetName = "WCHAR_T";
1196                 m2w = iconv_open(ms_wcCharsetName, cname);
1197             }
1198
1199             if (m2w != (iconv_t)-1)
1200             {
1201                 char    buf[2], *bufPtr;
1202                 wchar_t wbuf[2], *wbufPtr;
1203                 size_t  insz, outsz;
1204                 size_t  res;
1205
1206                 buf[0] = 'A';
1207                 buf[1] = 0;
1208                 wbuf[0] = 0;
1209                 insz = 2;
1210                 outsz = SIZEOF_WCHAR_T * 2;
1211                 wbufPtr = wbuf;
1212                 bufPtr = buf;
1213
1214                 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1215                             (char**)&wbufPtr, &outsz);
1216
1217                 if (ICONV_FAILED(res, insz))
1218                 {
1219                     ms_wcCharsetName = NULL;
1220                     wxLogLastError(wxT("iconv"));
1221                     wxLogError(_("Conversion to charset '%s' doesn't work."), name);
1222                 }
1223                 else
1224                 {
1225                     ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1226                 }
1227             }
1228             else
1229             {
1230                 ms_wcCharsetName = NULL;
1231
1232                 // VS: we must not output an error here, since wxWidgets will safely
1233                 //     fall back to using wxEncodingConverter.
1234                 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
1235                 //wxLogError(
1236             }
1237         }
1238         wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
1239     }
1240     else // we already have ms_wcCharsetName
1241     {
1242         m2w = iconv_open(ms_wcCharsetName, cname);
1243     }
1244
1245     // NB: don't ever pass NULL to iconv_open(), it may crash!
1246     if ( ms_wcCharsetName )
1247     {
1248         w2m = iconv_open( cname, ms_wcCharsetName);
1249     }
1250     else
1251     {
1252         w2m = (iconv_t)-1;
1253     }
1254 }
1255
1256 wxMBConv_iconv::~wxMBConv_iconv()
1257 {
1258     if ( m2w != (iconv_t)-1 )
1259         iconv_close(m2w);
1260     if ( w2m != (iconv_t)-1 )
1261         iconv_close(w2m);
1262 }
1263
1264 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1265 {
1266     size_t inbuf = strlen(psz);
1267     size_t outbuf = n * SIZEOF_WCHAR_T;
1268     size_t res, cres;
1269     // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1270     wchar_t *bufPtr = buf;
1271     const char *pszPtr = psz;
1272
1273     if (buf)
1274     {
1275         // have destination buffer, convert there
1276         cres = iconv(m2w,
1277                      ICONV_CHAR_CAST(&pszPtr), &inbuf,
1278                      (char**)&bufPtr, &outbuf);
1279         res = n - (outbuf / SIZEOF_WCHAR_T);
1280
1281         if (ms_wcNeedsSwap)
1282         {
1283             // convert to native endianness
1284             WC_BSWAP(buf /* _not_ bufPtr */, res)
1285         }
1286
1287         // NB: iconv was given only strlen(psz) characters on input, and so
1288         //     it couldn't convert the trailing zero. Let's do it ourselves
1289         //     if there's some room left for it in the output buffer.
1290         if (res < n)
1291             buf[res] = 0;
1292     }
1293     else
1294     {
1295         // no destination buffer... convert using temp buffer
1296         // to calculate destination buffer requirement
1297         wchar_t tbuf[8];
1298         res = 0;
1299         do {
1300             bufPtr = tbuf;
1301             outbuf = 8*SIZEOF_WCHAR_T;
1302
1303             cres = iconv(m2w,
1304                          ICONV_CHAR_CAST(&pszPtr), &inbuf,
1305                          (char**)&bufPtr, &outbuf );
1306
1307             res += 8-(outbuf/SIZEOF_WCHAR_T);
1308         } while ((cres==(size_t)-1) && (errno==E2BIG));
1309     }
1310
1311     if (ICONV_FAILED(cres, inbuf))
1312     {
1313         //VS: it is ok if iconv fails, hence trace only
1314         wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1315         return (size_t)-1;
1316     }
1317
1318     return res;
1319 }
1320
1321 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1322 {
1323     size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
1324     size_t outbuf = n;
1325     size_t res, cres;
1326
1327     wchar_t *tmpbuf = 0;
1328
1329     if (ms_wcNeedsSwap)
1330     {
1331         // need to copy to temp buffer to switch endianness
1332         // this absolutely doesn't rock!
1333         // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1334         //  could be in read-only memory, or be accessed in some other thread)
1335         tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
1336         memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
1337         WC_BSWAP(tmpbuf, inbuf)
1338         psz=tmpbuf;
1339     }
1340
1341     if (buf)
1342     {
1343         // have destination buffer, convert there
1344         cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1345
1346         res = n-outbuf;
1347
1348         // NB: iconv was given only wcslen(psz) characters on input, and so
1349         //     it couldn't convert the trailing zero. Let's do it ourselves
1350         //     if there's some room left for it in the output buffer.
1351         if (res < n)
1352             buf[0] = 0;
1353     }
1354     else
1355     {
1356         // no destination buffer... convert using temp buffer
1357         // to calculate destination buffer requirement
1358         char tbuf[16];
1359         res = 0;
1360         do {
1361             buf = tbuf; outbuf = 16;
1362
1363             cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1364
1365             res += 16 - outbuf;
1366         } while ((cres==(size_t)-1) && (errno==E2BIG));
1367     }
1368
1369     if (ms_wcNeedsSwap)
1370     {
1371         free(tmpbuf);
1372     }
1373
1374     if (ICONV_FAILED(cres, inbuf))
1375     {
1376         //VS: it is ok if iconv fails, hence trace only
1377         wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1378         return (size_t)-1;
1379     }
1380
1381     return res;
1382 }
1383
1384 #endif // HAVE_ICONV
1385
1386
1387 // ============================================================================
1388 // Win32 conversion classes
1389 // ============================================================================
1390
1391 #ifdef wxHAVE_WIN32_MB2WC
1392
1393 // from utils.cpp
1394 #if wxUSE_FONTMAP
1395 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1396 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1397 #endif
1398
1399 class wxMBConv_win32 : public wxMBConv
1400 {
1401 public:
1402     wxMBConv_win32()
1403     {
1404         m_CodePage = CP_ACP;
1405     }
1406
1407 #if wxUSE_FONTMAP
1408     wxMBConv_win32(const wxChar* name)
1409     {
1410         m_CodePage = wxCharsetToCodepage(name);
1411     }
1412
1413     wxMBConv_win32(wxFontEncoding encoding)
1414     {
1415         m_CodePage = wxEncodingToCodepage(encoding);
1416     }
1417 #endif
1418
1419     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1420     {
1421         // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1422         // the behaviour is not compatible with the Unix version (using iconv)
1423         // and break the library itself, e.g. wxTextInputStream::NextChar()
1424         // wouldn't work if reading an incomplete MB char didn't result in an
1425         // error
1426         const size_t len = ::MultiByteToWideChar
1427                              (
1428                                 m_CodePage,     // code page
1429                                 MB_ERR_INVALID_CHARS, // flags: fall on error
1430                                 psz,            // input string
1431                                 -1,             // its length (NUL-terminated)
1432                                 buf,            // output string
1433                                 buf ? n : 0     // size of output buffer
1434                              );
1435
1436         // note that it returns count of written chars for buf != NULL and size
1437         // of the needed buffer for buf == NULL so in either case the length of
1438         // the string (which never includes the terminating NUL) is one less
1439         return len ? len - 1 : (size_t)-1;
1440     }
1441
1442     size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
1443     {
1444         /*
1445             we have a problem here: by default, WideCharToMultiByte() may
1446             replace characters unrepresentable in the target code page with bad
1447             quality approximations such as turning "1/2" symbol (U+00BD) into
1448             "1" for the code pages which don't have it and we, obviously, want
1449             to avoid this at any price
1450
1451             the trouble is that this function does it _silently_, i.e. it won't
1452             even tell us whether it did or not... Win98/2000 and higher provide
1453             WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1454             we have to resort to a round trip, i.e. check that converting back
1455             results in the same string -- this is, of course, expensive but
1456             otherwise we simply can't be sure to not garble the data.
1457          */
1458
1459         // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1460         // it doesn't work with CJK encodings (which we test for rather roughly
1461         // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1462         // supporting it
1463         BOOL usedDef wxDUMMY_INITIALIZE(false);
1464         BOOL *pUsedDef;
1465         int flags;
1466         if ( CanUseNoBestFit() && m_CodePage < 50000 )
1467         {
1468             // it's our lucky day
1469             flags = WC_NO_BEST_FIT_CHARS;
1470             pUsedDef = &usedDef;
1471         }
1472         else // old system or unsupported encoding
1473         {
1474             flags = 0;
1475             pUsedDef = NULL;
1476         }
1477
1478         const size_t len = ::WideCharToMultiByte
1479                              (
1480                                 m_CodePage,     // code page
1481                                 flags,          // either none or no best fit
1482                                 pwz,            // input string
1483                                 -1,             // it is (wide) NUL-terminated
1484                                 buf,            // output buffer
1485                                 buf ? n : 0,    // and its size
1486                                 NULL,           // default "replacement" char
1487                                 pUsedDef        // [out] was it used?
1488                              );
1489
1490         if ( !len )
1491         {
1492             // function totally failed
1493             return (size_t)-1;
1494         }
1495
1496         // if we were really converting, check if we succeeded
1497         if ( buf )
1498         {
1499             if ( flags )
1500             {
1501                 // check if the conversion failed, i.e. if any replacements
1502                 // were done
1503                 if ( usedDef )
1504                     return (size_t)-1;
1505             }
1506             else // we must resort to double tripping...
1507             {
1508                 wxWCharBuffer wcBuf(n);
1509                 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1510                         wcscmp(wcBuf, pwz) != 0 )
1511                 {
1512                     // we didn't obtain the same thing we started from, hence
1513                     // the conversion was lossy and we consider that it failed
1514                     return (size_t)-1;
1515                 }
1516             }
1517         }
1518
1519         // see the comment above for the reason of "len - 1"
1520         return len - 1;
1521     }
1522
1523     bool IsOk() const { return m_CodePage != -1; }
1524
1525 private:
1526     static bool CanUseNoBestFit()
1527     {
1528         static int s_isWin98Or2k = -1;
1529
1530         if ( s_isWin98Or2k == -1 )
1531         {
1532             int verMaj, verMin;
1533             switch ( wxGetOsVersion(&verMaj, &verMin) )
1534             {
1535                 case wxWIN95:
1536                     s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1537                     break;
1538
1539                 case wxWINDOWS_NT:
1540                     s_isWin98Or2k = verMaj >= 5;
1541                     break;
1542
1543                 default:
1544                     // unknown, be conseravtive by default
1545                     s_isWin98Or2k = 0;
1546             }
1547
1548             wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1549         }
1550
1551         return s_isWin98Or2k == 1;
1552     }
1553
1554     long m_CodePage;
1555 };
1556
1557 #endif // wxHAVE_WIN32_MB2WC
1558
1559 // ============================================================================
1560 // Cocoa conversion classes
1561 // ============================================================================
1562
1563 #if defined(__WXCOCOA__)
1564
1565 // RN:  There is no UTF-32 support in either Core Foundation or
1566 // Cocoa.  Strangely enough, internally Core Foundation uses
1567 // UTF 32 internally quite a bit - its just not public (yet).
1568
1569 #include <CoreFoundation/CFString.h>
1570 #include <CoreFoundation/CFStringEncodingExt.h>
1571
1572 CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
1573 {
1574     CFStringEncoding enc = kCFStringEncodingInvalidId ;
1575     if ( encoding == wxFONTENCODING_DEFAULT )
1576     {
1577         enc = CFStringGetSystemEncoding();
1578     }
1579     else switch( encoding)
1580     {
1581         case wxFONTENCODING_ISO8859_1 :
1582             enc = kCFStringEncodingISOLatin1 ;
1583             break ;
1584         case wxFONTENCODING_ISO8859_2 :
1585             enc = kCFStringEncodingISOLatin2;
1586             break ;
1587         case wxFONTENCODING_ISO8859_3 :
1588             enc = kCFStringEncodingISOLatin3 ;
1589             break ;
1590         case wxFONTENCODING_ISO8859_4 :
1591             enc = kCFStringEncodingISOLatin4;
1592             break ;
1593         case wxFONTENCODING_ISO8859_5 :
1594             enc = kCFStringEncodingISOLatinCyrillic;
1595             break ;
1596         case wxFONTENCODING_ISO8859_6 :
1597             enc = kCFStringEncodingISOLatinArabic;
1598             break ;
1599         case wxFONTENCODING_ISO8859_7 :
1600             enc = kCFStringEncodingISOLatinGreek;
1601             break ;
1602         case wxFONTENCODING_ISO8859_8 :
1603             enc = kCFStringEncodingISOLatinHebrew;
1604             break ;
1605         case wxFONTENCODING_ISO8859_9 :
1606             enc = kCFStringEncodingISOLatin5;
1607             break ;
1608         case wxFONTENCODING_ISO8859_10 :
1609             enc = kCFStringEncodingISOLatin6;
1610             break ;
1611         case wxFONTENCODING_ISO8859_11 :
1612             enc = kCFStringEncodingISOLatinThai;
1613             break ;
1614         case wxFONTENCODING_ISO8859_13 :
1615             enc = kCFStringEncodingISOLatin7;
1616             break ;
1617         case wxFONTENCODING_ISO8859_14 :
1618             enc = kCFStringEncodingISOLatin8;
1619             break ;
1620         case wxFONTENCODING_ISO8859_15 :
1621             enc = kCFStringEncodingISOLatin9;
1622             break ;
1623
1624         case wxFONTENCODING_KOI8 :
1625             enc = kCFStringEncodingKOI8_R;
1626             break ;
1627         case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
1628             enc = kCFStringEncodingDOSRussian;
1629             break ;
1630
1631 //      case wxFONTENCODING_BULGARIAN :
1632 //          enc = ;
1633 //          break ;
1634
1635         case wxFONTENCODING_CP437 :
1636             enc =kCFStringEncodingDOSLatinUS ;
1637             break ;
1638         case wxFONTENCODING_CP850 :
1639             enc = kCFStringEncodingDOSLatin1;
1640             break ;
1641         case wxFONTENCODING_CP852 :
1642             enc = kCFStringEncodingDOSLatin2;
1643             break ;
1644         case wxFONTENCODING_CP855 :
1645             enc = kCFStringEncodingDOSCyrillic;
1646             break ;
1647         case wxFONTENCODING_CP866 :
1648             enc =kCFStringEncodingDOSRussian ;
1649             break ;
1650         case wxFONTENCODING_CP874 :
1651             enc = kCFStringEncodingDOSThai;
1652             break ;
1653         case wxFONTENCODING_CP932 :
1654             enc = kCFStringEncodingDOSJapanese;
1655             break ;
1656         case wxFONTENCODING_CP936 :
1657             enc =kCFStringEncodingDOSChineseSimplif ;
1658             break ;
1659         case wxFONTENCODING_CP949 :
1660             enc = kCFStringEncodingDOSKorean;
1661             break ;
1662         case wxFONTENCODING_CP950 :
1663             enc = kCFStringEncodingDOSChineseTrad;
1664             break ;
1665         case wxFONTENCODING_CP1250 :
1666             enc = kCFStringEncodingWindowsLatin2;
1667             break ;
1668         case wxFONTENCODING_CP1251 :
1669             enc =kCFStringEncodingWindowsCyrillic ;
1670             break ;
1671         case wxFONTENCODING_CP1252 :
1672             enc =kCFStringEncodingWindowsLatin1 ;
1673             break ;
1674         case wxFONTENCODING_CP1253 :
1675             enc = kCFStringEncodingWindowsGreek;
1676             break ;
1677         case wxFONTENCODING_CP1254 :
1678             enc = kCFStringEncodingWindowsLatin5;
1679             break ;
1680         case wxFONTENCODING_CP1255 :
1681             enc =kCFStringEncodingWindowsHebrew ;
1682             break ;
1683         case wxFONTENCODING_CP1256 :
1684             enc =kCFStringEncodingWindowsArabic ;
1685             break ;
1686         case wxFONTENCODING_CP1257 :
1687             enc = kCFStringEncodingWindowsBalticRim;
1688             break ;
1689 //   This only really encodes to UTF7 (if that) evidently
1690 //        case wxFONTENCODING_UTF7 :
1691 //            enc = kCFStringEncodingNonLossyASCII ;
1692 //            break ;
1693         case wxFONTENCODING_UTF8 :
1694             enc = kCFStringEncodingUTF8 ;
1695             break ;
1696         case wxFONTENCODING_EUC_JP :
1697             enc = kCFStringEncodingEUC_JP;
1698             break ;
1699         case wxFONTENCODING_UTF16 :
1700             enc = kCFStringEncodingUnicode ;
1701             break ;
1702         case wxFONTENCODING_MACROMAN :
1703             enc = kCFStringEncodingMacRoman ;
1704             break ;
1705         case wxFONTENCODING_MACJAPANESE :
1706             enc = kCFStringEncodingMacJapanese ;
1707             break ;
1708         case wxFONTENCODING_MACCHINESETRAD :
1709             enc = kCFStringEncodingMacChineseTrad ;
1710             break ;
1711         case wxFONTENCODING_MACKOREAN :
1712             enc = kCFStringEncodingMacKorean ;
1713             break ;
1714         case wxFONTENCODING_MACARABIC :
1715             enc = kCFStringEncodingMacArabic ;
1716             break ;
1717         case wxFONTENCODING_MACHEBREW :
1718             enc = kCFStringEncodingMacHebrew ;
1719             break ;
1720         case wxFONTENCODING_MACGREEK :
1721             enc = kCFStringEncodingMacGreek ;
1722             break ;
1723         case wxFONTENCODING_MACCYRILLIC :
1724             enc = kCFStringEncodingMacCyrillic ;
1725             break ;
1726         case wxFONTENCODING_MACDEVANAGARI :
1727             enc = kCFStringEncodingMacDevanagari ;
1728             break ;
1729         case wxFONTENCODING_MACGURMUKHI :
1730             enc = kCFStringEncodingMacGurmukhi ;
1731             break ;
1732         case wxFONTENCODING_MACGUJARATI :
1733             enc = kCFStringEncodingMacGujarati ;
1734             break ;
1735         case wxFONTENCODING_MACORIYA :
1736             enc = kCFStringEncodingMacOriya ;
1737             break ;
1738         case wxFONTENCODING_MACBENGALI :
1739             enc = kCFStringEncodingMacBengali ;
1740             break ;
1741         case wxFONTENCODING_MACTAMIL :
1742             enc = kCFStringEncodingMacTamil ;
1743             break ;
1744         case wxFONTENCODING_MACTELUGU :
1745             enc = kCFStringEncodingMacTelugu ;
1746             break ;
1747         case wxFONTENCODING_MACKANNADA :
1748             enc = kCFStringEncodingMacKannada ;
1749             break ;
1750         case wxFONTENCODING_MACMALAJALAM :
1751             enc = kCFStringEncodingMacMalayalam ;
1752             break ;
1753         case wxFONTENCODING_MACSINHALESE :
1754             enc = kCFStringEncodingMacSinhalese ;
1755             break ;
1756         case wxFONTENCODING_MACBURMESE :
1757             enc = kCFStringEncodingMacBurmese ;
1758             break ;
1759         case wxFONTENCODING_MACKHMER :
1760             enc = kCFStringEncodingMacKhmer ;
1761             break ;
1762         case wxFONTENCODING_MACTHAI :
1763             enc = kCFStringEncodingMacThai ;
1764             break ;
1765         case wxFONTENCODING_MACLAOTIAN :
1766             enc = kCFStringEncodingMacLaotian ;
1767             break ;
1768         case wxFONTENCODING_MACGEORGIAN :
1769             enc = kCFStringEncodingMacGeorgian ;
1770             break ;
1771         case wxFONTENCODING_MACARMENIAN :
1772             enc = kCFStringEncodingMacArmenian ;
1773             break ;
1774         case wxFONTENCODING_MACCHINESESIMP :
1775             enc = kCFStringEncodingMacChineseSimp ;
1776             break ;
1777         case wxFONTENCODING_MACTIBETAN :
1778             enc = kCFStringEncodingMacTibetan ;
1779             break ;
1780         case wxFONTENCODING_MACMONGOLIAN :
1781             enc = kCFStringEncodingMacMongolian ;
1782             break ;
1783         case wxFONTENCODING_MACETHIOPIC :
1784             enc = kCFStringEncodingMacEthiopic ;
1785             break ;
1786         case wxFONTENCODING_MACCENTRALEUR :
1787             enc = kCFStringEncodingMacCentralEurRoman ;
1788             break ;
1789         case wxFONTENCODING_MACVIATNAMESE :
1790             enc = kCFStringEncodingMacVietnamese ;
1791             break ;
1792         case wxFONTENCODING_MACARABICEXT :
1793             enc = kCFStringEncodingMacExtArabic ;
1794             break ;
1795         case wxFONTENCODING_MACSYMBOL :
1796             enc = kCFStringEncodingMacSymbol ;
1797             break ;
1798         case wxFONTENCODING_MACDINGBATS :
1799             enc = kCFStringEncodingMacDingbats ;
1800             break ;
1801         case wxFONTENCODING_MACTURKISH :
1802             enc = kCFStringEncodingMacTurkish ;
1803             break ;
1804         case wxFONTENCODING_MACCROATIAN :
1805             enc = kCFStringEncodingMacCroatian ;
1806             break ;
1807         case wxFONTENCODING_MACICELANDIC :
1808             enc = kCFStringEncodingMacIcelandic ;
1809             break ;
1810         case wxFONTENCODING_MACROMANIAN :
1811             enc = kCFStringEncodingMacRomanian ;
1812             break ;
1813         case wxFONTENCODING_MACCELTIC :
1814             enc = kCFStringEncodingMacCeltic ;
1815             break ;
1816         case wxFONTENCODING_MACGAELIC :
1817             enc = kCFStringEncodingMacGaelic ;
1818             break ;
1819 //      case wxFONTENCODING_MACKEYBOARD :
1820 //          enc = kCFStringEncodingMacKeyboardGlyphs ;
1821 //          break ;
1822         default :
1823             // because gcc is picky
1824             break ;
1825     } ;
1826     return enc ;
1827 }
1828
1829 class wxMBConv_cocoa : public wxMBConv
1830 {
1831 public:
1832     wxMBConv_cocoa()
1833     {
1834         Init(CFStringGetSystemEncoding()) ;
1835     }
1836
1837     wxMBConv_cocoa(const wxChar* name)
1838     {
1839         Init( wxCFStringEncFromFontEnc(wxFontMapper::Get()->CharsetToEncoding(name, false) ) ) ;
1840     }
1841
1842     wxMBConv_cocoa(wxFontEncoding encoding)
1843     {
1844         Init( wxCFStringEncFromFontEnc(encoding) );
1845     }
1846
1847     ~wxMBConv_cocoa()
1848     {
1849     }
1850
1851     void Init( CFStringEncoding encoding)
1852     {
1853         m_encoding = encoding ;
1854     }
1855
1856     size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
1857     {
1858         wxASSERT(szUnConv);
1859
1860         CFStringRef theString = CFStringCreateWithBytes (
1861                                                 NULL, //the allocator
1862                                                 (const UInt8*)szUnConv,
1863                                                 strlen(szUnConv),
1864                                                 m_encoding,
1865                                                 false //no BOM/external representation
1866                                                 );
1867
1868         wxASSERT(theString);
1869
1870         size_t nOutLength = CFStringGetLength(theString);
1871
1872         if (szOut == NULL)
1873         {
1874             CFRelease(theString);
1875             return nOutLength;
1876         }
1877
1878         CFRange theRange = { 0, nOutSize };
1879
1880 #if SIZEOF_WCHAR_T == 4
1881         UniChar* szUniCharBuffer = new UniChar[nOutSize];
1882 #endif
1883
1884         CFStringGetCharacters(theString, theRange, szUniCharBuffer);
1885
1886         CFRelease(theString);
1887
1888         szUniCharBuffer[nOutLength] = '\0' ;
1889
1890 #if SIZEOF_WCHAR_T == 4
1891         wxMBConvUTF16 converter ;
1892         converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
1893         delete[] szUniCharBuffer;
1894 #endif
1895
1896         return nOutLength;
1897     }
1898
1899     size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
1900     {
1901         wxASSERT(szUnConv);
1902
1903         size_t nRealOutSize;
1904         size_t nBufSize = wxWcslen(szUnConv);
1905         UniChar* szUniBuffer = (UniChar*) szUnConv;
1906
1907 #if SIZEOF_WCHAR_T == 4
1908         wxMBConvUTF16BE converter ;
1909         nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
1910         szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
1911         converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
1912         nBufSize /= sizeof(UniChar);
1913 #endif
1914
1915         CFStringRef theString = CFStringCreateWithCharactersNoCopy(
1916                                 NULL, //allocator
1917                                 szUniBuffer,
1918                                 nBufSize,
1919                                 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
1920                             );
1921
1922         wxASSERT(theString);
1923
1924         //Note that CER puts a BOM when converting to unicode
1925         //so we  check and use getchars instead in that case
1926         if (m_encoding == kCFStringEncodingUnicode)
1927         {
1928             if (szOut != NULL)
1929                 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
1930
1931             nRealOutSize = CFStringGetLength(theString) + 1;
1932         }
1933         else
1934         {
1935             CFStringGetBytes(
1936                 theString,
1937                 CFRangeMake(0, CFStringGetLength(theString)),
1938                 m_encoding,
1939                 0, //what to put in characters that can't be converted -
1940                     //0 tells CFString to return NULL if it meets such a character
1941                 false, //not an external representation
1942                 (UInt8*) szOut,
1943                 nOutSize,
1944                 (CFIndex*) &nRealOutSize
1945                         );
1946         }
1947
1948         CFRelease(theString);
1949
1950 #if SIZEOF_WCHAR_T == 4
1951         delete[] szUniBuffer;
1952 #endif
1953
1954         return  nRealOutSize - 1;
1955     }
1956
1957     bool IsOk() const
1958     {
1959         return m_encoding != kCFStringEncodingInvalidId &&
1960               CFStringIsEncodingAvailable(m_encoding);
1961     }
1962
1963 private:
1964     CFStringEncoding m_encoding ;
1965 };
1966
1967 #endif // defined(__WXCOCOA__)
1968
1969 // ============================================================================
1970 // Mac conversion classes
1971 // ============================================================================
1972
1973 #if defined(__WXMAC__) && defined(TARGET_CARBON)
1974
1975 class wxMBConv_mac : public wxMBConv
1976 {
1977 public:
1978     wxMBConv_mac()
1979     {
1980         Init(CFStringGetSystemEncoding()) ;
1981     }
1982
1983     wxMBConv_mac(const wxChar* name)
1984     {
1985         Init( wxMacGetSystemEncFromFontEnc(wxFontMapper::Get()->CharsetToEncoding(name, false) ) ) ;
1986     }
1987
1988     wxMBConv_mac(wxFontEncoding encoding)
1989     {
1990         Init( wxMacGetSystemEncFromFontEnc(encoding) );
1991     }
1992
1993     ~wxMBConv_mac()
1994     {
1995         OSStatus status = noErr ;
1996         status = TECDisposeConverter(m_MB2WC_converter);
1997         status = TECDisposeConverter(m_WC2MB_converter);
1998     }
1999
2000
2001     void Init( TextEncodingBase encoding)
2002     {
2003         OSStatus status = noErr ;
2004         m_char_encoding = encoding ;
2005         m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2006
2007         status = TECCreateConverter(&m_MB2WC_converter,
2008                                     m_char_encoding,
2009                                     m_unicode_encoding);
2010         status = TECCreateConverter(&m_WC2MB_converter,
2011                                     m_unicode_encoding,
2012                                     m_char_encoding);
2013     }
2014
2015     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2016     {
2017         OSStatus status = noErr ;
2018         ByteCount byteOutLen ;
2019         ByteCount byteInLen = strlen(psz) ;
2020         wchar_t *tbuf = NULL ;
2021         UniChar* ubuf = NULL ;
2022         size_t res = 0 ;
2023
2024         if (buf == NULL)
2025         {
2026             //apple specs say at least 32
2027             n = wxMax( 32 , byteInLen ) ;
2028             tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2029         }
2030         ByteCount byteBufferLen = n * sizeof( UniChar ) ;
2031 #if SIZEOF_WCHAR_T == 4
2032         ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
2033 #else
2034         ubuf = (UniChar*) (buf ? buf : tbuf) ;
2035 #endif
2036         status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2037           (TextPtr) ubuf , byteBufferLen, &byteOutLen);
2038 #if SIZEOF_WCHAR_T == 4
2039         // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2040         // is not properly terminated we get random characters at the end
2041         ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
2042         wxMBConvUTF16BE converter ;
2043         res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2044         free( ubuf ) ;
2045 #else
2046         res = byteOutLen / sizeof( UniChar ) ;
2047 #endif
2048         if ( buf == NULL )
2049              free(tbuf) ;
2050
2051         if ( buf  && res < n)
2052             buf[res] = 0;
2053
2054         return res ;
2055     }
2056
2057     size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2058     {
2059         OSStatus status = noErr ;
2060         ByteCount byteOutLen ;
2061         ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2062
2063         char *tbuf = NULL ;
2064
2065         if (buf == NULL)
2066         {
2067             //apple specs say at least 32
2068             n = wxMax( 32 , ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2069             tbuf = (char*) malloc( n ) ;
2070         }
2071
2072         ByteCount byteBufferLen = n ;
2073         UniChar* ubuf = NULL ;
2074 #if SIZEOF_WCHAR_T == 4
2075         wxMBConvUTF16BE converter ;
2076         size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2077         byteInLen = unicharlen ;
2078         ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2079         converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
2080 #else
2081         ubuf = (UniChar*) psz ;
2082 #endif
2083         status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2084             (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
2085 #if SIZEOF_WCHAR_T == 4
2086         free( ubuf ) ;
2087 #endif
2088         if ( buf == NULL )
2089             free(tbuf) ;
2090
2091         size_t res = byteOutLen ;
2092         if ( buf  && res < n)
2093         {
2094             buf[res] = 0;
2095
2096             //we need to double-trip to verify it didn't insert any ? in place
2097             //of bogus characters
2098             wxWCharBuffer wcBuf(n);
2099             size_t pszlen = wxWcslen(psz);
2100             if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
2101                         wxWcslen(wcBuf) != pszlen ||
2102                         memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2103             {
2104                 // we didn't obtain the same thing we started from, hence
2105                 // the conversion was lossy and we consider that it failed
2106                 return (size_t)-1;
2107             }
2108         }
2109
2110         return res ;
2111     }
2112
2113     bool IsOk() const
2114         { return m_MB2WC_converter !=  NULL && m_WC2MB_converter != NULL  ; }
2115
2116 private:
2117     TECObjectRef m_MB2WC_converter ;
2118     TECObjectRef m_WC2MB_converter ;
2119
2120     TextEncodingBase m_char_encoding ;
2121     TextEncodingBase m_unicode_encoding ;
2122 };
2123
2124 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2125
2126 // ============================================================================
2127 // wxEncodingConverter based conversion classes
2128 // ============================================================================
2129
2130 #if wxUSE_FONTMAP
2131
2132 class wxMBConv_wxwin : public wxMBConv
2133 {
2134 private:
2135     void Init()
2136     {
2137         m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2138                w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2139     }
2140
2141 public:
2142     // temporarily just use wxEncodingConverter stuff,
2143     // so that it works while a better implementation is built
2144     wxMBConv_wxwin(const wxChar* name)
2145     {
2146         if (name)
2147             m_enc = wxFontMapper::Get()->CharsetToEncoding(name, false);
2148         else
2149             m_enc = wxFONTENCODING_SYSTEM;
2150
2151         Init();
2152     }
2153
2154     wxMBConv_wxwin(wxFontEncoding enc)
2155     {
2156         m_enc = enc;
2157
2158         Init();
2159     }
2160
2161     size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
2162     {
2163         size_t inbuf = strlen(psz);
2164         if (buf)
2165             m2w.Convert(psz,buf);
2166         return inbuf;
2167     }
2168
2169     size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
2170     {
2171         const size_t inbuf = wxWcslen(psz);
2172         if (buf)
2173             w2m.Convert(psz,buf);
2174
2175         return inbuf;
2176     }
2177
2178     bool IsOk() const { return m_ok; }
2179
2180 public:
2181     wxFontEncoding m_enc;
2182     wxEncodingConverter m2w, w2m;
2183
2184     // were we initialized successfully?
2185     bool m_ok;
2186
2187     DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
2188 };
2189
2190 #endif // wxUSE_FONTMAP
2191
2192 // ============================================================================
2193 // wxCSConv implementation
2194 // ============================================================================
2195
2196 void wxCSConv::Init()
2197 {
2198     m_name = NULL;
2199     m_convReal =  NULL;
2200     m_deferred = true;
2201 }
2202
2203 wxCSConv::wxCSConv(const wxChar *charset)
2204 {
2205     Init();
2206
2207     if ( charset )
2208     {
2209         SetName(charset);
2210     }
2211
2212     m_encoding = wxFONTENCODING_SYSTEM;
2213 }
2214
2215 wxCSConv::wxCSConv(wxFontEncoding encoding)
2216 {
2217     if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
2218     {
2219         wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2220
2221         encoding = wxFONTENCODING_SYSTEM;
2222     }
2223
2224     Init();
2225
2226     m_encoding = encoding;
2227 }
2228
2229 wxCSConv::~wxCSConv()
2230 {
2231     Clear();
2232 }
2233
2234 wxCSConv::wxCSConv(const wxCSConv& conv)
2235         : wxMBConv()
2236 {
2237     Init();
2238
2239     SetName(conv.m_name);
2240     m_encoding = conv.m_encoding;
2241 }
2242
2243 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2244 {
2245     Clear();
2246
2247     SetName(conv.m_name);
2248     m_encoding = conv.m_encoding;
2249
2250     return *this;
2251 }
2252
2253 void wxCSConv::Clear()
2254 {
2255     free(m_name);
2256     delete m_convReal;
2257
2258     m_name = NULL;
2259     m_convReal = NULL;
2260 }
2261
2262 void wxCSConv::SetName(const wxChar *charset)
2263 {
2264     if (charset)
2265     {
2266         m_name = wxStrdup(charset);
2267         m_deferred = true;
2268     }
2269 }
2270
2271 wxMBConv *wxCSConv::DoCreate() const
2272 {
2273     // check for the special case of ASCII or ISO8859-1 charset: as we have
2274     // special knowledge of it anyhow, we don't need to create a special
2275     // conversion object
2276     if ( m_encoding == wxFONTENCODING_ISO8859_1 )
2277     {
2278         // don't convert at all
2279         return NULL;
2280     }
2281
2282     // we trust OS to do conversion better than we can so try external
2283     // conversion methods first
2284     //
2285     // the full order is:
2286     //      1. OS conversion (iconv() under Unix or Win32 API)
2287     //      2. hard coded conversions for UTF
2288     //      3. wxEncodingConverter as fall back
2289
2290     // step (1)
2291 #ifdef HAVE_ICONV
2292 #if !wxUSE_FONTMAP
2293     if ( m_name )
2294 #endif // !wxUSE_FONTMAP
2295     {
2296         wxString name(m_name);
2297
2298 #if wxUSE_FONTMAP
2299         if ( name.empty() )
2300             name = wxFontMapper::Get()->GetEncodingName(m_encoding);
2301 #endif // wxUSE_FONTMAP
2302
2303         wxMBConv_iconv *conv = new wxMBConv_iconv(name);
2304         if ( conv->IsOk() )
2305             return conv;
2306
2307         delete conv;
2308     }
2309 #endif // HAVE_ICONV
2310
2311 #ifdef wxHAVE_WIN32_MB2WC
2312     {
2313 #if wxUSE_FONTMAP
2314         wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2315                                       : new wxMBConv_win32(m_encoding);
2316         if ( conv->IsOk() )
2317             return conv;
2318
2319         delete conv;
2320 #else
2321         return NULL;
2322 #endif
2323     }
2324 #endif // wxHAVE_WIN32_MB2WC
2325 #if defined(__WXMAC__)
2326     {
2327         if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ) )
2328         {
2329
2330             wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2331                                         : new wxMBConv_mac(m_encoding);
2332             if ( conv->IsOk() )
2333                  return conv;
2334
2335             delete conv;
2336         }
2337     }
2338 #endif
2339 #if defined(__WXCOCOA__)
2340     {
2341         if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2342         {
2343
2344             wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2345                                           : new wxMBConv_cocoa(m_encoding);
2346             if ( conv->IsOk() )
2347                  return conv;
2348
2349             delete conv;
2350         }
2351     }
2352 #endif
2353     // step (2)
2354     wxFontEncoding enc = m_encoding;
2355 #if wxUSE_FONTMAP
2356     if ( enc == wxFONTENCODING_SYSTEM && m_name )
2357     {
2358         // use "false" to suppress interactive dialogs -- we can be called from
2359         // anywhere and popping up a dialog from here is the last thing we want to
2360         // do
2361         enc = wxFontMapper::Get()->CharsetToEncoding(m_name, false);
2362     }
2363 #endif // wxUSE_FONTMAP
2364
2365     switch ( enc )
2366     {
2367         case wxFONTENCODING_UTF7:
2368              return new wxMBConvUTF7;
2369
2370         case wxFONTENCODING_UTF8:
2371              return new wxMBConvUTF8;
2372
2373         case wxFONTENCODING_UTF16BE:
2374              return new wxMBConvUTF16BE;
2375
2376         case wxFONTENCODING_UTF16LE:
2377              return new wxMBConvUTF16LE;
2378
2379         case wxFONTENCODING_UTF32BE:
2380              return new wxMBConvUTF32BE;
2381
2382         case wxFONTENCODING_UTF32LE:
2383              return new wxMBConvUTF32LE;
2384
2385         default:
2386              // nothing to do but put here to suppress gcc warnings
2387              ;
2388     }
2389
2390     // step (3)
2391 #if wxUSE_FONTMAP
2392     {
2393         wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2394                                       : new wxMBConv_wxwin(m_encoding);
2395         if ( conv->IsOk() )
2396             return conv;
2397
2398         delete conv;
2399     }
2400 #endif // wxUSE_FONTMAP
2401
2402     // NB: This is a hack to prevent deadlock. What could otherwise happen
2403     //     in Unicode build: wxConvLocal creation ends up being here
2404     //     because of some failure and logs the error. But wxLog will try to
2405     //     attach timestamp, for which it will need wxConvLocal (to convert
2406     //     time to char* and then wchar_t*), but that fails, tries to log
2407     //     error, but wxLog has a (already locked) critical section that
2408     //     guards static buffer.
2409     static bool alreadyLoggingError = false;
2410     if (!alreadyLoggingError)
2411     {
2412         alreadyLoggingError = true;
2413         wxLogError(_("Cannot convert from the charset '%s'!"),
2414                    m_name ? m_name
2415                       :
2416 #if wxUSE_FONTMAP
2417                          wxFontMapper::GetEncodingDescription(m_encoding).c_str()
2418 #else // !wxUSE_FONTMAP
2419                          wxString::Format(_("encoding %s"), m_encoding).c_str()
2420 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2421               );
2422         alreadyLoggingError = false;
2423     }
2424
2425     return NULL;
2426 }
2427
2428 void wxCSConv::CreateConvIfNeeded() const
2429 {
2430     if ( m_deferred )
2431     {
2432         wxCSConv *self = (wxCSConv *)this; // const_cast
2433
2434 #if wxUSE_INTL
2435         // if we don't have neither the name nor the encoding, use the default
2436         // encoding for this system
2437         if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2438         {
2439             self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
2440         }
2441 #endif // wxUSE_INTL
2442
2443         self->m_convReal = DoCreate();
2444         self->m_deferred = false;
2445     }
2446 }
2447
2448 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2449 {
2450     CreateConvIfNeeded();
2451
2452     if (m_convReal)
2453         return m_convReal->MB2WC(buf, psz, n);
2454
2455     // latin-1 (direct)
2456     size_t len = strlen(psz);
2457
2458     if (buf)
2459     {
2460         for (size_t c = 0; c <= len; c++)
2461             buf[c] = (unsigned char)(psz[c]);
2462     }
2463
2464     return len;
2465 }
2466
2467 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2468 {
2469     CreateConvIfNeeded();
2470
2471     if (m_convReal)
2472         return m_convReal->WC2MB(buf, psz, n);
2473
2474     // latin-1 (direct)
2475     const size_t len = wxWcslen(psz);
2476     if (buf)
2477     {
2478         for (size_t c = 0; c <= len; c++)
2479         {
2480             if (psz[c] > 0xFF)
2481                 return (size_t)-1;
2482             buf[c] = (char)psz[c];
2483         }
2484     }
2485     else
2486     {
2487         for (size_t c = 0; c <= len; c++)
2488         {
2489             if (psz[c] > 0xFF)
2490                 return (size_t)-1;
2491         }
2492     }
2493
2494     return len;
2495 }
2496
2497 // ----------------------------------------------------------------------------
2498 // globals
2499 // ----------------------------------------------------------------------------
2500
2501 #ifdef __WINDOWS__
2502     static wxMBConv_win32 wxConvLibcObj;
2503 #elif defined(__WXMAC__) && !defined(__MACH__)
2504     static wxMBConv_mac wxConvLibcObj ;
2505 #else
2506     static wxMBConvLibc wxConvLibcObj;
2507 #endif
2508
2509 static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
2510 static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
2511 static wxMBConvUTF7 wxConvUTF7Obj;
2512 static wxMBConvUTF8 wxConvUTF8Obj;
2513
2514
2515 WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
2516 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
2517 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
2518 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
2519 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
2520 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
2521
2522 #else // !wxUSE_WCHAR_T
2523
2524 // stand-ins in absence of wchar_t
2525 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
2526                                 wxConvISO8859_1,
2527                                 wxConvLocal,
2528                                 wxConvUTF8;
2529
2530 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
2531
2532