src/common/strconv.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        strconv.cpp
   3 // Purpose:     Unicode conversion classes
   4 // Author:      Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
   5 //              Ryan Norton, Fredrik Roubert (UTF7)
   6 // Modified by:
   7 // Created:     29/01/98
   8 // RCS-ID:      $Id$
   9 // Copyright:   (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
  10 //              (c) 2000-2003 Vadim Zeitlin
  11 //              (c) 2004 Ryan Norton, Fredrik Roubert
  12 // Licence:     wxWindows licence
  13 /////////////////////////////////////////////////////////////////////////////
  14
  15 // ============================================================================
  16 // declarations
  17 // ============================================================================
  18
  19 // ----------------------------------------------------------------------------
  20 // headers
  21 // ----------------------------------------------------------------------------
  22
  23 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
  24   #pragma implementation "strconv.h"
  25 #endif
  26
  27 // For compilers that support precompilation, includes "wx.h".
  28 #include "wx/wxprec.h"
  29
  30 #ifdef __BORLANDC__
  31   #pragma hdrstop
  32 #endif
  33
  34 #ifndef WX_PRECOMP
  35     #include "wx/intl.h"
  36     #include "wx/log.h"
  37 #endif // WX_PRECOMP
  38
  39 #include "wx/strconv.h"
  40
  41 #if wxUSE_WCHAR_T
  42
  43 #ifdef __WXMSW__
  44     #include "wx/msw/private.h"
  45 #endif
  46
  47 #ifdef __WINDOWS__
  48     #include "wx/msw/missing.h"
  49 #endif
  50
  51 #ifndef __WXWINCE__
  52 #include <errno.h>
  53 #endif
  54
  55 #include <ctype.h>
  56 #include <string.h>
  57 #include <stdlib.h>
  58
  59 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
  60     #define wxHAVE_WIN32_MB2WC
  61 #endif // __WIN32__ but !__WXMICROWIN__
  62
  63 // ----------------------------------------------------------------------------
  64 // headers
  65 // ----------------------------------------------------------------------------
  66
  67 #ifdef __SALFORDC__
  68     #include <clib.h>
  69 #endif
  70
  71 #ifdef HAVE_ICONV
  72     #include <iconv.h>
  73 #endif
  74
  75 #include "wx/encconv.h"
  76 #include "wx/fontmap.h"
  77 #include "wx/utils.h"
  78
  79 #ifdef __WXMAC__
  80 #include <ATSUnicode.h>
  81 #include <TextCommon.h>
  82 #include <TextEncodingConverter.h>
  83
  84 #include  "wx/mac/private.h"  // includes mac headers
  85 #endif
  86 // ----------------------------------------------------------------------------
  87 // macros
  88 // ----------------------------------------------------------------------------
  89
  90 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
  91 #define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
  92
  93 #if SIZEOF_WCHAR_T == 4
  94     #define WC_NAME         "UCS4"
  95     #define WC_BSWAP         BSWAP_UCS4
  96     #ifdef WORDS_BIGENDIAN
  97       #define WC_NAME_BEST  "UCS-4BE"
  98     #else
  99       #define WC_NAME_BEST  "UCS-4LE"
 100     #endif
 101 #elif SIZEOF_WCHAR_T == 2
 102     #define WC_NAME         "UTF16"
 103     #define WC_BSWAP         BSWAP_UTF16
 104     #define WC_UTF16
 105     #ifdef WORDS_BIGENDIAN
 106       #define WC_NAME_BEST  "UTF-16BE"
 107     #else
 108       #define WC_NAME_BEST  "UTF-16LE"
 109     #endif
 110 #else // sizeof(wchar_t) != 2 nor 4
 111     // does this ever happen?
 112     #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
 113 #endif
 114
 115 // ============================================================================
 116 // implementation
 117 // ============================================================================
 118
 119 // ----------------------------------------------------------------------------
 120 // UTF-16 en/decoding to/from UCS-4
 121 // ----------------------------------------------------------------------------
 122
 123
 124 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
 125 {
 126     if (input<=0xffff)
 127     {
 128         if (output)
 129             *output = (wxUint16) input;
 130         return 1;
 131     }
 132     else if (input>=0x110000)
 133     {
 134         return (size_t)-1;
 135     }
 136     else
 137     {
 138         if (output)
 139         {
 140             *output++ = (wxUint16) ((input >> 10)+0xd7c0);
 141             *output = (wxUint16) ((input&0x3ff)+0xdc00);
 142         }
 143         return 2;
 144     }
 145 }
 146
 147 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
 148 {
 149     if ((*input<0xd800) || (*input>0xdfff))
 150     {
 151         output = *input;
 152         return 1;
 153     }
 154     else if ((input[1]<0xdc00) || (input[1]>=0xdfff))
 155     {
 156         output = *input;
 157         return (size_t)-1;
 158     }
 159     else
 160     {
 161         output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
 162         return 2;
 163     }
 164 }
 165
 166
 167 // ----------------------------------------------------------------------------
 168 // wxMBConv
 169 // ----------------------------------------------------------------------------
 170
 171 wxMBConv::~wxMBConv()
 172 {
 173     // nothing to do here (necessary for Darwin linking probably)
 174 }
 175
 176 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
 177 {
 178     if ( psz )
 179     {
 180         // calculate the length of the buffer needed first
 181         size_t nLen = MB2WC(NULL, psz, 0);
 182         if ( nLen != (size_t)-1 )
 183         {
 184             // now do the actual conversion
 185             wxWCharBuffer buf(nLen);
 186             nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
 187             if ( nLen != (size_t)-1 )
 188             {
 189                 return buf;
 190             }
 191         }
 192     }
 193
 194     wxWCharBuffer buf((wchar_t *)NULL);
 195
 196     return buf;
 197 }
 198
 199 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
 200 {
 201     if ( pwz )
 202     {
 203         size_t nLen = WC2MB(NULL, pwz, 0);
 204         if ( nLen != (size_t)-1 )
 205         {
 206             wxCharBuffer buf(nLen+3);       // space for a wxUint32 trailing zero
 207             nLen = WC2MB(buf.data(), pwz, nLen + 4);
 208             if ( nLen != (size_t)-1 )
 209             {
 210                 return buf;
 211             }
 212         }
 213     }
 214
 215     wxCharBuffer buf((char *)NULL);
 216
 217     return buf;
 218 }
 219
 220 size_t wxMBConv::MB2WC(wchar_t* szBuffer, const char* szString,
 221                        size_t outsize, size_t nStringLen) const
 222 {
 223     const char* szEnd = szString + nStringLen + 1;
 224     const char* szPos = szString;
 225     const char* szStart = szPos;
 226
 227     size_t nActualLength = 0;
 228
 229     //Convert the string until the length() is reached, continuing the
 230     //loop every time a null character is reached
 231     while(szPos != szEnd)
 232     {
 233         wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
 234
 235         //Get the length of the current (sub)string
 236         size_t nLen = MB2WC(NULL, szPos, 0);
 237
 238         //Invalid conversion?
 239         if( nLen == (size_t)-1 )
 240             return nLen;
 241
 242         //Increase the actual length (+1 for current null character)
 243         nActualLength += nLen + 1;
 244
 245         //Only copy data in if buffer size is big enough
 246         if (szBuffer != NULL &&
 247             nActualLength <= outsize)
 248         {
 249             //Convert the current (sub)string
 250             if ( MB2WC(&szBuffer[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
 251                 return (size_t)-1;
 252         }
 253
 254         //Increment to next (sub)string
 255         //Note that we have to use strlen here instead of nLen
 256         //here because XX2XX gives us the size of the output buffer,
 257         //not neccessarly the length of the string
 258         szPos += strlen(szPos) + 1;
 259     }
 260
 261     return nActualLength - 1; //success - return actual length
 262 }
 263
 264 size_t wxMBConv::WC2MB(char* szBuffer, const wchar_t* szString,
 265                        size_t outsize, size_t nStringLen) const
 266 {
 267     const wchar_t* szEnd = szString + nStringLen + 1;
 268     const wchar_t* szPos = szString;
 269     const wchar_t* szStart = szPos;
 270
 271     size_t nActualLength = 0;
 272
 273     //Convert the string until the length() is reached, continuing the
 274     //loop every time a null character is reached
 275     while(szPos != szEnd)
 276     {
 277         wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
 278
 279         //Get the length of the current (sub)string
 280         size_t nLen = WC2MB(NULL, szPos, 0);
 281
 282         //Invalid conversion?
 283         if( nLen == (size_t)-1 )
 284             return nLen;
 285
 286         //Increase the actual length (+1 for current null character)
 287         nActualLength += nLen + 1;
 288
 289         //Only copy data in if buffer size is big enough
 290         if (szBuffer != NULL &&
 291             nActualLength <= outsize)
 292         {
 293             //Convert the current (sub)string
 294             if(WC2MB(&szBuffer[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
 295                 return (size_t)-1;
 296         }
 297
 298         //Increment to next (sub)string
 299         //Note that we have to use wxWcslen here instead of nLen
 300         //here because XX2XX gives us the size of the output buffer,
 301         //not neccessarly the length of the string
 302         szPos += wxWcslen(szPos) + 1;
 303     }
 304
 305     return nActualLength - 1;  //success - return actual length
 306 }
 307
 308 // ----------------------------------------------------------------------------
 309 // wxMBConvLibc
 310 // ----------------------------------------------------------------------------
 311
 312 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 313 {
 314     return wxMB2WC(buf, psz, n);
 315 }
 316
 317 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 318 {
 319     return wxWC2MB(buf, psz, n);
 320 }
 321 // ----------------------------------------------------------------------------
 322 // UTF-7
 323 // ----------------------------------------------------------------------------
 324
 325 // Implementation (C) 2004 Fredrik Roubert
 326
 327 //
 328 // BASE64 decoding table
 329 //
 330 static const unsigned char utf7unb64[] =
 331 {
 332     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 333     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 334     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 335     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 336     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 337     0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
 338     0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
 339     0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 340     0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
 341     0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
 342     0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
 343     0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
 344     0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
 345     0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
 346     0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
 347     0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
 348     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 349     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 350     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 351     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 352     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 353     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 354     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 355     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 356     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 357     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 358     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 359     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 360     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 361     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 362     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 363     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 364 };
 365
 366 size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 367 {
 368
 369     size_t len = 0;
 370
 371     while (*psz && ((!buf) || (len < n)))
 372     {
 373         unsigned char cc = *psz++;
 374         if (cc != '+')
 375         {
 376             // plain ASCII char
 377             if (buf)
 378                 *buf++ = cc;
 379             len++;
 380         }
 381         else if (*psz == '-')
 382         {
 383             // encoded plus sign
 384             if (buf)
 385                 *buf++ = cc;
 386             len++;
 387             psz++;
 388         }
 389         else
 390         {
 391             // BASE64 encoded string
 392             bool lsb;
 393             unsigned char c;
 394             unsigned int d, l;
 395             for (lsb = false, d = 0, l = 0;
 396                 (cc = utf7unb64[(unsigned char)*psz]) != 0xff; psz++)
 397             {
 398                 d <<= 6;
 399                 d += cc;
 400                 for (l += 6; l >= 8; lsb = !lsb)
 401                 {
 402                     c = (d >> (l -= 8)) % 256;
 403                     if (lsb)
 404                     {
 405                         if (buf)
 406                             *buf++ |= c;
 407                         len ++;
 408                     }
 409                     else
 410                         if (buf)
 411                             *buf = c << 8;
 412                 }
 413             }
 414             if (*psz == '-')
 415                 psz++;
 416         }
 417     }
 418     if (buf && (len < n))
 419         *buf = 0;
 420     return len;
 421 }
 422
 423 //
 424 // BASE64 encoding table
 425 //
 426 static const unsigned char utf7enb64[] =
 427 {
 428     'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
 429     'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
 430     'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
 431     'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
 432     'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
 433     'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
 434     'w', 'x', 'y', 'z', '0', '1', '2', '3',
 435     '4', '5', '6', '7', '8', '9', '+', '/'
 436 };
 437
 438 //
 439 // UTF-7 encoding table
 440 //
 441 // 0 - Set D (directly encoded characters)
 442 // 1 - Set O (optional direct characters)
 443 // 2 - whitespace characters (optional)
 444 // 3 - special characters
 445 //
 446 static const unsigned char utf7encode[128] =
 447 {
 448     3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
 449     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 450     2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
 451     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
 452     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 453     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
 454     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 455     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
 456 };
 457
 458 size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t
 459 *psz, size_t n) const
 460 {
 461
 462
 463     size_t len = 0;
 464
 465     while (*psz && ((!buf) || (len < n)))
 466     {
 467         wchar_t cc = *psz++;
 468         if (cc < 0x80 && utf7encode[cc] < 1)
 469         {
 470             // plain ASCII char
 471             if (buf)
 472                 *buf++ = (char)cc;
 473             len++;
 474         }
 475 #ifndef WC_UTF16
 476         else if (cc > ((const wchar_t)0xffff))
 477         {
 478             // no surrogate pair generation (yet?)
 479             return (size_t)-1;
 480         }
 481 #endif
 482         else
 483         {
 484             if (buf)
 485                 *buf++ = '+';
 486             len++;
 487             if (cc != '+')
 488             {
 489                 // BASE64 encode string
 490                 unsigned int lsb, d, l;
 491                 for (d = 0, l = 0;; psz++)
 492                 {
 493                     for (lsb = 0; lsb < 2; lsb ++)
 494                     {
 495                         d <<= 8;
 496                         d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
 497
 498                         for (l += 8; l >= 6; )
 499                         {
 500                             l -= 6;
 501                             if (buf)
 502                                 *buf++ = utf7enb64[(d >> l) % 64];
 503                             len++;
 504                         }
 505                     }
 506                     cc = *psz;
 507                     if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
 508                         break;
 509                 }
 510                 if (l != 0)
 511                 {
 512                     if (buf)
 513                         *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
 514                     len++;
 515                 }
 516             }
 517             if (buf)
 518                 *buf++ = '-';
 519             len++;
 520         }
 521     }
 522     if (buf && (len < n))
 523         *buf = 0;
 524     return len;
 525 }
 526
 527 // ----------------------------------------------------------------------------
 528 // UTF-8
 529 // ----------------------------------------------------------------------------
 530
 531 static wxUint32 utf8_max[]=
 532     { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
 533
 534 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 535 {
 536     size_t len = 0;
 537
 538     while (*psz && ((!buf) || (len < n)))
 539     {
 540         unsigned char cc = *psz++, fc = cc;
 541         unsigned cnt;
 542         for (cnt = 0; fc & 0x80; cnt++)
 543             fc <<= 1;
 544         if (!cnt)
 545         {
 546             // plain ASCII char
 547             if (buf)
 548                 *buf++ = cc;
 549             len++;
 550         }
 551         else
 552         {
 553             cnt--;
 554             if (!cnt)
 555             {
 556                 // invalid UTF-8 sequence
 557                 return (size_t)-1;
 558             }
 559             else
 560             {
 561                 unsigned ocnt = cnt - 1;
 562                 wxUint32 res = cc & (0x3f >> cnt);
 563                 while (cnt--)
 564                 {
 565                     cc = *psz++;
 566                     if ((cc & 0xC0) != 0x80)
 567                     {
 568                         // invalid UTF-8 sequence
 569                         return (size_t)-1;
 570                     }
 571                     res = (res << 6) | (cc & 0x3f);
 572                 }
 573                 if (res <= utf8_max[ocnt])
 574                 {
 575                     // illegal UTF-8 encoding
 576                     return (size_t)-1;
 577                 }
 578 #ifdef WC_UTF16
 579                 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 580                 size_t pa = encode_utf16(res, (wxUint16 *)buf);
 581                 if (pa == (size_t)-1)
 582                   return (size_t)-1;
 583                 if (buf)
 584                     buf += pa;
 585                 len += pa;
 586 #else // !WC_UTF16
 587                 if (buf)
 588                     *buf++ = res;
 589                 len++;
 590 #endif // WC_UTF16/!WC_UTF16
 591             }
 592         }
 593     }
 594     if (buf && (len < n))
 595         *buf = 0;
 596     return len;
 597 }
 598
 599 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 600 {
 601     size_t len = 0;
 602
 603     while (*psz && ((!buf) || (len < n)))
 604     {
 605         wxUint32 cc;
 606 #ifdef WC_UTF16
 607         // cast is ok for WC_UTF16
 608         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
 609         psz += (pa == (size_t)-1) ? 1 : pa;
 610 #else
 611         cc=(*psz++) & 0x7fffffff;
 612 #endif
 613         unsigned cnt;
 614         for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
 615         if (!cnt)
 616         {
 617             // plain ASCII char
 618             if (buf)
 619                 *buf++ = (char) cc;
 620             len++;
 621         }
 622
 623         else
 624         {
 625             len += cnt + 1;
 626             if (buf)
 627             {
 628                 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
 629                 while (cnt--)
 630                     *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
 631             }
 632         }
 633     }
 634
 635     if (buf && (len<n)) *buf = 0;
 636
 637     return len;
 638 }
 639
 640
 641
 642
 643 // ----------------------------------------------------------------------------
 644 // UTF-16
 645 // ----------------------------------------------------------------------------
 646
 647 #ifdef WORDS_BIGENDIAN
 648     #define wxMBConvUTF16straight wxMBConvUTF16BE
 649     #define wxMBConvUTF16swap     wxMBConvUTF16LE
 650 #else
 651     #define wxMBConvUTF16swap     wxMBConvUTF16BE
 652     #define wxMBConvUTF16straight wxMBConvUTF16LE
 653 #endif
 654
 655
 656 #ifdef WC_UTF16
 657
 658 // copy 16bit MB to 16bit String
 659 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 660 {
 661     size_t len=0;
 662
 663     while (*(wxUint16*)psz && (!buf || len < n))
 664     {
 665         if (buf)
 666             *buf++ = *(wxUint16*)psz;
 667         len++;
 668
 669         psz += sizeof(wxUint16);
 670     }
 671     if (buf && len<n)   *buf=0;
 672
 673     return len;
 674 }
 675
 676
 677 // copy 16bit String to 16bit MB
 678 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 679 {
 680     size_t len=0;
 681
 682     while (*psz && (!buf || len < n))
 683     {
 684         if (buf)
 685         {
 686             *(wxUint16*)buf = *psz;
 687             buf += sizeof(wxUint16);
 688         }
 689         len += sizeof(wxUint16);
 690         psz++;
 691     }
 692     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 693
 694     return len;
 695 }
 696
 697
 698 // swap 16bit MB to 16bit String
 699 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 700 {
 701     size_t len=0;
 702
 703     while (*(wxUint16*)psz && (!buf || len < n))
 704     {
 705         if (buf)
 706         {
 707             ((char *)buf)[0] = psz[1];
 708             ((char *)buf)[1] = psz[0];
 709             buf++;
 710         }
 711         len++;
 712         psz += sizeof(wxUint16);
 713     }
 714     if (buf && len<n)   *buf=0;
 715
 716     return len;
 717 }
 718
 719
 720 // swap 16bit MB to 16bit String
 721 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 722 {
 723     size_t len=0;
 724
 725     while (*psz && (!buf || len < n))
 726     {
 727         if (buf)
 728         {
 729             *buf++ = ((char*)psz)[1];
 730             *buf++ = ((char*)psz)[0];
 731         }
 732         len += sizeof(wxUint16);
 733         psz++;
 734     }
 735     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 736
 737     return len;
 738 }
 739
 740
 741 #else // WC_UTF16
 742
 743
 744 // copy 16bit MB to 32bit String
 745 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 746 {
 747     size_t len=0;
 748
 749     while (*(wxUint16*)psz && (!buf || len < n))
 750     {
 751         wxUint32 cc;
 752         size_t pa=decode_utf16((wxUint16*)psz, cc);
 753         if (pa == (size_t)-1)
 754             return pa;
 755
 756         if (buf)
 757             *buf++ = cc;
 758         len++;
 759         psz += pa * sizeof(wxUint16);
 760     }
 761     if (buf && len<n)   *buf=0;
 762
 763     return len;
 764 }
 765
 766
 767 // copy 32bit String to 16bit MB
 768 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 769 {
 770     size_t len=0;
 771
 772     while (*psz && (!buf || len < n))
 773     {
 774         wxUint16 cc[2];
 775         size_t pa=encode_utf16(*psz, cc);
 776
 777         if (pa == (size_t)-1)
 778             return pa;
 779
 780         if (buf)
 781         {
 782             *(wxUint16*)buf = cc[0];
 783             buf += sizeof(wxUint16);
 784             if (pa > 1)
 785             {
 786                 *(wxUint16*)buf = cc[1];
 787                 buf += sizeof(wxUint16);
 788             }
 789         }
 790
 791         len += pa*sizeof(wxUint16);
 792         psz++;
 793     }
 794     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 795
 796     return len;
 797 }
 798
 799
 800 // swap 16bit MB to 32bit String
 801 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 802 {
 803     size_t len=0;
 804
 805     while (*(wxUint16*)psz && (!buf || len < n))
 806     {
 807         wxUint32 cc;
 808         char tmp[4];
 809         tmp[0]=psz[1];  tmp[1]=psz[0];
 810         tmp[2]=psz[3];  tmp[3]=psz[2];
 811
 812         size_t pa=decode_utf16((wxUint16*)tmp, cc);
 813         if (pa == (size_t)-1)
 814             return pa;
 815
 816         if (buf)
 817             *buf++ = cc;
 818
 819         len++;
 820         psz += pa * sizeof(wxUint16);
 821     }
 822     if (buf && len<n)   *buf=0;
 823
 824     return len;
 825 }
 826
 827
 828 // swap 32bit String to 16bit MB
 829 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 830 {
 831     size_t len=0;
 832
 833     while (*psz && (!buf || len < n))
 834     {
 835         wxUint16 cc[2];
 836         size_t pa=encode_utf16(*psz, cc);
 837
 838         if (pa == (size_t)-1)
 839             return pa;
 840
 841         if (buf)
 842         {
 843             *buf++ = ((char*)cc)[1];
 844             *buf++ = ((char*)cc)[0];
 845             if (pa > 1)
 846             {
 847                 *buf++ = ((char*)cc)[3];
 848                 *buf++ = ((char*)cc)[2];
 849             }
 850         }
 851
 852         len += pa*sizeof(wxUint16);
 853         psz++;
 854     }
 855     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 856
 857     return len;
 858 }
 859
 860 #endif // WC_UTF16
 861
 862
 863 // ----------------------------------------------------------------------------
 864 // UTF-32
 865 // ----------------------------------------------------------------------------
 866
 867 #ifdef WORDS_BIGENDIAN
 868 #define wxMBConvUTF32straight  wxMBConvUTF32BE
 869 #define wxMBConvUTF32swap      wxMBConvUTF32LE
 870 #else
 871 #define wxMBConvUTF32swap      wxMBConvUTF32BE
 872 #define wxMBConvUTF32straight  wxMBConvUTF32LE
 873 #endif
 874
 875
 876 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
 877 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
 878
 879
 880 #ifdef WC_UTF16
 881
 882 // copy 32bit MB to 16bit String
 883 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 884 {
 885     size_t len=0;
 886
 887     while (*(wxUint32*)psz && (!buf || len < n))
 888     {
 889         wxUint16 cc[2];
 890
 891         size_t pa=encode_utf16(*(wxUint32*)psz, cc);
 892         if (pa == (size_t)-1)
 893             return pa;
 894
 895         if (buf)
 896         {
 897             *buf++ = cc[0];
 898             if (pa > 1)
 899                 *buf++ = cc[1];
 900         }
 901         len += pa;
 902         psz += sizeof(wxUint32);
 903     }
 904     if (buf && len<n)   *buf=0;
 905
 906     return len;
 907 }
 908
 909
 910 // copy 16bit String to 32bit MB
 911 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 912 {
 913     size_t len=0;
 914
 915     while (*psz && (!buf || len < n))
 916     {
 917         wxUint32 cc;
 918
 919         // cast is ok for WC_UTF16
 920         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
 921         if (pa == (size_t)-1)
 922             return pa;
 923
 924         if (buf)
 925         {
 926             *(wxUint32*)buf = cc;
 927             buf += sizeof(wxUint32);
 928         }
 929         len += sizeof(wxUint32);
 930         psz += pa;
 931     }
 932
 933     if (buf && len<=n-sizeof(wxUint32))
 934         *(wxUint32*)buf=0;
 935
 936     return len;
 937 }
 938
 939
 940
 941 // swap 32bit MB to 16bit String
 942 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 943 {
 944     size_t len=0;
 945
 946     while (*(wxUint32*)psz && (!buf || len < n))
 947     {
 948         char tmp[4];
 949         tmp[0] = psz[3];   tmp[1] = psz[2];
 950         tmp[2] = psz[1];   tmp[3] = psz[0];
 951
 952
 953         wxUint16 cc[2];
 954
 955         size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
 956         if (pa == (size_t)-1)
 957             return pa;
 958
 959         if (buf)
 960         {
 961             *buf++ = cc[0];
 962             if (pa > 1)
 963                 *buf++ = cc[1];
 964         }
 965         len += pa;
 966         psz += sizeof(wxUint32);
 967     }
 968
 969     if (buf && len<n)
 970         *buf=0;
 971
 972     return len;
 973 }
 974
 975
 976 // swap 16bit String to 32bit MB
 977 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 978 {
 979     size_t len=0;
 980
 981     while (*psz && (!buf || len < n))
 982     {
 983         char cc[4];
 984
 985         // cast is ok for WC_UTF16
 986         size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
 987         if (pa == (size_t)-1)
 988             return pa;
 989
 990         if (buf)
 991         {
 992             *buf++ = cc[3];
 993             *buf++ = cc[2];
 994             *buf++ = cc[1];
 995             *buf++ = cc[0];
 996         }
 997         len += sizeof(wxUint32);
 998         psz += pa;
 999     }
1000
1001     if (buf && len<=n-sizeof(wxUint32))
1002         *(wxUint32*)buf=0;
1003
1004     return len;
1005 }
1006
1007 #else // WC_UTF16
1008
1009
1010 // copy 32bit MB to 32bit String
1011 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1012 {
1013     size_t len=0;
1014
1015     while (*(wxUint32*)psz && (!buf || len < n))
1016     {
1017         if (buf)
1018             *buf++ = *(wxUint32*)psz;
1019         len++;
1020         psz += sizeof(wxUint32);
1021     }
1022
1023     if (buf && len<n)
1024         *buf=0;
1025
1026     return len;
1027 }
1028
1029
1030 // copy 32bit String to 32bit MB
1031 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1032 {
1033     size_t len=0;
1034
1035     while (*psz && (!buf || len < n))
1036     {
1037         if (buf)
1038         {
1039             *(wxUint32*)buf = *psz;
1040             buf += sizeof(wxUint32);
1041         }
1042
1043         len += sizeof(wxUint32);
1044         psz++;
1045     }
1046
1047     if (buf && len<=n-sizeof(wxUint32))
1048         *(wxUint32*)buf=0;
1049
1050     return len;
1051 }
1052
1053
1054 // swap 32bit MB to 32bit String
1055 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1056 {
1057     size_t len=0;
1058
1059     while (*(wxUint32*)psz && (!buf || len < n))
1060     {
1061         if (buf)
1062         {
1063             ((char *)buf)[0] = psz[3];
1064             ((char *)buf)[1] = psz[2];
1065             ((char *)buf)[2] = psz[1];
1066             ((char *)buf)[3] = psz[0];
1067             buf++;
1068         }
1069         len++;
1070         psz += sizeof(wxUint32);
1071     }
1072
1073     if (buf && len<n)
1074         *buf=0;
1075
1076     return len;
1077 }
1078
1079
1080 // swap 32bit String to 32bit MB
1081 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1082 {
1083     size_t len=0;
1084
1085     while (*psz && (!buf || len < n))
1086     {
1087         if (buf)
1088         {
1089             *buf++ = ((char *)psz)[3];
1090             *buf++ = ((char *)psz)[2];
1091             *buf++ = ((char *)psz)[1];
1092             *buf++ = ((char *)psz)[0];
1093         }
1094         len += sizeof(wxUint32);
1095         psz++;
1096     }
1097
1098     if (buf && len<=n-sizeof(wxUint32))
1099         *(wxUint32*)buf=0;
1100
1101     return len;
1102 }
1103
1104
1105 #endif // WC_UTF16
1106
1107
1108 // ============================================================================
1109 // The classes doing conversion using the iconv_xxx() functions
1110 // ============================================================================
1111
1112 #ifdef HAVE_ICONV
1113
1114 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with E2BIG
1115 //     if output buffer is _exactly_ as big as needed. Such case is (unless there's
1116 //     yet another bug in glibc) the only case when iconv() returns with (size_t)-1
1117 //     (which means error) and says there are 0 bytes left in the input buffer --
1118 //     when _real_ error occurs, bytes-left-in-input buffer is non-zero. Hence,
1119 //     this alternative test for iconv() failure.
1120 //     [This bug does not appear in glibc 2.2.]
1121 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1122 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1123                                      (errno != E2BIG || bufLeft != 0))
1124 #else
1125 #define ICONV_FAILED(cres, bufLeft)  (cres == (size_t)-1)
1126 #endif
1127
1128 #define ICONV_CHAR_CAST(x)  ((ICONV_CONST char **)(x))
1129
1130 // ----------------------------------------------------------------------------
1131 // wxMBConv_iconv: encapsulates an iconv character set
1132 // ----------------------------------------------------------------------------
1133
1134 class wxMBConv_iconv : public wxMBConv
1135 {
1136 public:
1137     wxMBConv_iconv(const wxChar *name);
1138     virtual ~wxMBConv_iconv();
1139
1140     virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1141     virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
1142
1143     bool IsOk() const
1144         { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
1145
1146 protected:
1147     // the iconv handlers used to translate from multibyte to wide char and in
1148     // the other direction
1149     iconv_t m2w,
1150             w2m;
1151
1152 private:
1153     // the name (for iconv_open()) of a wide char charset -- if none is
1154     // available on this machine, it will remain NULL
1155     static const char *ms_wcCharsetName;
1156
1157     // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1158     // different endian-ness than the native one
1159     static bool ms_wcNeedsSwap;
1160 };
1161
1162 const char *wxMBConv_iconv::ms_wcCharsetName = NULL;
1163 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
1164
1165 wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
1166 {
1167     // Do it the hard way
1168     char cname[100];
1169     for (size_t i = 0; i < wxStrlen(name)+1; i++)
1170         cname[i] = (char) name[i];
1171
1172     // check for charset that represents wchar_t:
1173     if (ms_wcCharsetName == NULL)
1174     {
1175         ms_wcNeedsSwap = false;
1176
1177         // try charset with explicit bytesex info (e.g. "UCS-4LE"):
1178         ms_wcCharsetName = WC_NAME_BEST;
1179         m2w = iconv_open(ms_wcCharsetName, cname);
1180
1181         if (m2w == (iconv_t)-1)
1182         {
1183             // try charset w/o bytesex info (e.g. "UCS4")
1184             // and check for bytesex ourselves:
1185             ms_wcCharsetName = WC_NAME;
1186             m2w = iconv_open(ms_wcCharsetName, cname);
1187
1188             // last bet, try if it knows WCHAR_T pseudo-charset
1189             if (m2w == (iconv_t)-1)
1190             {
1191                 ms_wcCharsetName = "WCHAR_T";
1192                 m2w = iconv_open(ms_wcCharsetName, cname);
1193             }
1194
1195             if (m2w != (iconv_t)-1)
1196             {
1197                 char    buf[2], *bufPtr;
1198                 wchar_t wbuf[2], *wbufPtr;
1199                 size_t  insz, outsz;
1200                 size_t  res;
1201
1202                 buf[0] = 'A';
1203                 buf[1] = 0;
1204                 wbuf[0] = 0;
1205                 insz = 2;
1206                 outsz = SIZEOF_WCHAR_T * 2;
1207                 wbufPtr = wbuf;
1208                 bufPtr = buf;
1209
1210                 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1211                             (char**)&wbufPtr, &outsz);
1212
1213                 if (ICONV_FAILED(res, insz))
1214                 {
1215                     ms_wcCharsetName = NULL;
1216                     wxLogLastError(wxT("iconv"));
1217                     wxLogError(_("Conversion to charset '%s' doesn't work."), name);
1218                 }
1219                 else
1220                 {
1221                     ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1222                 }
1223             }
1224             else
1225             {
1226                 ms_wcCharsetName = NULL;
1227
1228                 // VS: we must not output an error here, since wxWidgets will safely
1229                 //     fall back to using wxEncodingConverter.
1230                 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
1231                 //wxLogError(
1232             }
1233         }
1234         wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
1235     }
1236     else // we already have ms_wcCharsetName
1237     {
1238         m2w = iconv_open(ms_wcCharsetName, cname);
1239     }
1240
1241     // NB: don't ever pass NULL to iconv_open(), it may crash!
1242     if ( ms_wcCharsetName )
1243     {
1244         w2m = iconv_open( cname, ms_wcCharsetName);
1245     }
1246     else
1247     {
1248         w2m = (iconv_t)-1;
1249     }
1250 }
1251
1252 wxMBConv_iconv::~wxMBConv_iconv()
1253 {
1254     if ( m2w != (iconv_t)-1 )
1255         iconv_close(m2w);
1256     if ( w2m != (iconv_t)-1 )
1257         iconv_close(w2m);
1258 }
1259
1260 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1261 {
1262     size_t inbuf = strlen(psz);
1263     size_t outbuf = n * SIZEOF_WCHAR_T;
1264     size_t res, cres;
1265     // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1266     wchar_t *bufPtr = buf;
1267     const char *pszPtr = psz;
1268
1269     if (buf)
1270     {
1271         // have destination buffer, convert there
1272         cres = iconv(m2w,
1273                      ICONV_CHAR_CAST(&pszPtr), &inbuf,
1274                      (char**)&bufPtr, &outbuf);
1275         res = n - (outbuf / SIZEOF_WCHAR_T);
1276
1277         if (ms_wcNeedsSwap)
1278         {
1279             // convert to native endianness
1280             WC_BSWAP(buf /* _not_ bufPtr */, res)
1281         }
1282
1283         // NB: iconv was given only strlen(psz) characters on input, and so
1284         //     it couldn't convert the trailing zero. Let's do it ourselves
1285         //     if there's some room left for it in the output buffer.
1286         if (res < n)
1287             buf[res] = 0;
1288     }
1289     else
1290     {
1291         // no destination buffer... convert using temp buffer
1292         // to calculate destination buffer requirement
1293         wchar_t tbuf[8];
1294         res = 0;
1295         do {
1296             bufPtr = tbuf;
1297             outbuf = 8*SIZEOF_WCHAR_T;
1298
1299             cres = iconv(m2w,
1300                          ICONV_CHAR_CAST(&pszPtr), &inbuf,
1301                          (char**)&bufPtr, &outbuf );
1302
1303             res += 8-(outbuf/SIZEOF_WCHAR_T);
1304         } while ((cres==(size_t)-1) && (errno==E2BIG));
1305     }
1306
1307     if (ICONV_FAILED(cres, inbuf))
1308     {
1309         //VS: it is ok if iconv fails, hence trace only
1310         wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1311         return (size_t)-1;
1312     }
1313
1314     return res;
1315 }
1316
1317 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1318 {
1319     size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
1320     size_t outbuf = n;
1321     size_t res, cres;
1322
1323     wchar_t *tmpbuf = 0;
1324
1325     if (ms_wcNeedsSwap)
1326     {
1327         // need to copy to temp buffer to switch endianness
1328         // this absolutely doesn't rock!
1329         // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1330         //  could be in read-only memory, or be accessed in some other thread)
1331         tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
1332         memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
1333         WC_BSWAP(tmpbuf, inbuf)
1334         psz=tmpbuf;
1335     }
1336
1337     if (buf)
1338     {
1339         // have destination buffer, convert there
1340         cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1341
1342         res = n-outbuf;
1343
1344         // NB: iconv was given only wcslen(psz) characters on input, and so
1345         //     it couldn't convert the trailing zero. Let's do it ourselves
1346         //     if there's some room left for it in the output buffer.
1347         if (res < n)
1348             buf[0] = 0;
1349     }
1350     else
1351     {
1352         // no destination buffer... convert using temp buffer
1353         // to calculate destination buffer requirement
1354         char tbuf[16];
1355         res = 0;
1356         do {
1357             buf = tbuf; outbuf = 16;
1358
1359             cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1360
1361             res += 16 - outbuf;
1362         } while ((cres==(size_t)-1) && (errno==E2BIG));
1363     }
1364
1365     if (ms_wcNeedsSwap)
1366     {
1367         free(tmpbuf);
1368     }
1369
1370     if (ICONV_FAILED(cres, inbuf))
1371     {
1372         //VS: it is ok if iconv fails, hence trace only
1373         wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1374         return (size_t)-1;
1375     }
1376
1377     return res;
1378 }
1379
1380 #endif // HAVE_ICONV
1381
1382
1383 // ============================================================================
1384 // Win32 conversion classes
1385 // ============================================================================
1386
1387 #ifdef wxHAVE_WIN32_MB2WC
1388
1389 // from utils.cpp
1390 #if wxUSE_FONTMAP
1391 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1392 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1393 #endif
1394
1395 class wxMBConv_win32 : public wxMBConv
1396 {
1397 public:
1398     wxMBConv_win32()
1399     {
1400         m_CodePage = CP_ACP;
1401     }
1402
1403 #if wxUSE_FONTMAP
1404     wxMBConv_win32(const wxChar* name)
1405     {
1406         m_CodePage = wxCharsetToCodepage(name);
1407     }
1408
1409     wxMBConv_win32(wxFontEncoding encoding)
1410     {
1411         m_CodePage = wxEncodingToCodepage(encoding);
1412     }
1413 #endif
1414
1415     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1416     {
1417         // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1418         // the behaviour is not compatible with the Unix version (using iconv)
1419         // and break the library itself, e.g. wxTextInputStream::NextChar()
1420         // wouldn't work if reading an incomplete MB char didn't result in an
1421         // error
1422         const size_t len = ::MultiByteToWideChar
1423                              (
1424                                 m_CodePage,     // code page
1425                                 MB_ERR_INVALID_CHARS, // flags: fall on error
1426                                 psz,            // input string
1427                                 -1,             // its length (NUL-terminated)
1428                                 buf,            // output string
1429                                 buf ? n : 0     // size of output buffer
1430                              );
1431
1432         // note that it returns count of written chars for buf != NULL and size
1433         // of the needed buffer for buf == NULL so in either case the length of
1434         // the string (which never includes the terminating NUL) is one less
1435         return len ? len - 1 : (size_t)-1;
1436     }
1437
1438     size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
1439     {
1440         /*
1441             we have a problem here: by default, WideCharToMultiByte() may
1442             replace characters unrepresentable in the target code page with bad
1443             quality approximations such as turning "1/2" symbol (U+00BD) into
1444             "1" for the code pages which don't have it and we, obviously, want
1445             to avoid this at any price
1446
1447             the trouble is that this function does it _silently_, i.e. it won't
1448             even tell us whether it did or not... Win98/2000 and higher provide
1449             WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1450             we have to resort to a round trip, i.e. check that converting back
1451             results in the same string -- this is, of course, expensive but
1452             otherwise we simply can't be sure to not garble the data.
1453          */
1454
1455         // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1456         // it doesn't work with CJK encodings (which we test for rather roughly
1457         // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1458         // supporting it
1459         BOOL usedDef wxDUMMY_INITIALIZE(false);
1460         BOOL *pUsedDef;
1461         int flags;
1462         if ( CanUseNoBestFit() && m_CodePage < 50000 )
1463         {
1464             // it's our lucky day
1465             flags = WC_NO_BEST_FIT_CHARS;
1466             pUsedDef = &usedDef;
1467         }
1468         else // old system or unsupported encoding
1469         {
1470             flags = 0;
1471             pUsedDef = NULL;
1472         }
1473
1474         const size_t len = ::WideCharToMultiByte
1475                              (
1476                                 m_CodePage,     // code page
1477                                 flags,          // either none or no best fit
1478                                 pwz,            // input string
1479                                 -1,             // it is (wide) NUL-terminated
1480                                 buf,            // output buffer
1481                                 buf ? n : 0,    // and its size
1482                                 NULL,           // default "replacement" char
1483                                 pUsedDef        // [out] was it used?
1484                              );
1485
1486         if ( !len )
1487         {
1488             // function totally failed
1489             return (size_t)-1;
1490         }
1491
1492         // if we were really converting, check if we succeeded
1493         if ( buf )
1494         {
1495             if ( flags )
1496             {
1497                 // check if the conversion failed, i.e. if any replacements
1498                 // were done
1499                 if ( usedDef )
1500                     return (size_t)-1;
1501             }
1502             else // we must resort to double tripping...
1503             {
1504                 wxWCharBuffer wcBuf(n);
1505                 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1506                         wcscmp(wcBuf, pwz) != 0 )
1507                 {
1508                     // we didn't obtain the same thing we started from, hence
1509                     // the conversion was lossy and we consider that it failed
1510                     return (size_t)-1;
1511                 }
1512             }
1513         }
1514
1515         // see the comment above for the reason of "len - 1"
1516         return len - 1;
1517     }
1518
1519     bool IsOk() const { return m_CodePage != -1; }
1520
1521 private:
1522     static bool CanUseNoBestFit()
1523     {
1524         static int s_isWin98Or2k = -1;
1525
1526         if ( s_isWin98Or2k == -1 )
1527         {
1528             int verMaj, verMin;
1529             switch ( wxGetOsVersion(&verMaj, &verMin) )
1530             {
1531                 case wxWIN95:
1532                     s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1533                     break;
1534
1535                 case wxWINDOWS_NT:
1536                     s_isWin98Or2k = verMaj >= 5;
1537                     break;
1538
1539                 default:
1540                     // unknown, be conseravtive by default
1541                     s_isWin98Or2k = 0;
1542             }
1543
1544             wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1545         }
1546
1547         return s_isWin98Or2k == 1;
1548     }
1549
1550     long m_CodePage;
1551 };
1552
1553 #endif // wxHAVE_WIN32_MB2WC
1554
1555 // ============================================================================
1556 // Cocoa conversion classes
1557 // ============================================================================
1558
1559 #if defined(__WXCOCOA__)
1560
1561 // RN:  There is no UTF-32 support in either Core Foundation or
1562 // Cocoa.  Strangely enough, internally Core Foundation uses
1563 // UTF 32 internally quite a bit - its just not public (yet).
1564
1565 #include <CoreFoundation/CFString.h>
1566 #include <CoreFoundation/CFStringEncodingExt.h>
1567
1568 CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
1569 {
1570     CFStringEncoding enc = kCFStringEncodingInvalidId ;
1571     if ( encoding == wxFONTENCODING_DEFAULT )
1572     {
1573         enc = CFStringGetSystemEncoding();
1574     }
1575     else switch( encoding)
1576     {
1577         case wxFONTENCODING_ISO8859_1 :
1578             enc = kCFStringEncodingISOLatin1 ;
1579             break ;
1580         case wxFONTENCODING_ISO8859_2 :
1581             enc = kCFStringEncodingISOLatin2;
1582             break ;
1583         case wxFONTENCODING_ISO8859_3 :
1584             enc = kCFStringEncodingISOLatin3 ;
1585             break ;
1586         case wxFONTENCODING_ISO8859_4 :
1587             enc = kCFStringEncodingISOLatin4;
1588             break ;
1589         case wxFONTENCODING_ISO8859_5 :
1590             enc = kCFStringEncodingISOLatinCyrillic;
1591             break ;
1592         case wxFONTENCODING_ISO8859_6 :
1593             enc = kCFStringEncodingISOLatinArabic;
1594             break ;
1595         case wxFONTENCODING_ISO8859_7 :
1596             enc = kCFStringEncodingISOLatinGreek;
1597             break ;
1598         case wxFONTENCODING_ISO8859_8 :
1599             enc = kCFStringEncodingISOLatinHebrew;
1600             break ;
1601         case wxFONTENCODING_ISO8859_9 :
1602             enc = kCFStringEncodingISOLatin5;
1603             break ;
1604         case wxFONTENCODING_ISO8859_10 :
1605             enc = kCFStringEncodingISOLatin6;
1606             break ;
1607         case wxFONTENCODING_ISO8859_11 :
1608             enc = kCFStringEncodingISOLatinThai;
1609             break ;
1610         case wxFONTENCODING_ISO8859_13 :
1611             enc = kCFStringEncodingISOLatin7;
1612             break ;
1613         case wxFONTENCODING_ISO8859_14 :
1614             enc = kCFStringEncodingISOLatin8;
1615             break ;
1616         case wxFONTENCODING_ISO8859_15 :
1617             enc = kCFStringEncodingISOLatin9;
1618             break ;
1619
1620         case wxFONTENCODING_KOI8 :
1621             enc = kCFStringEncodingKOI8_R;
1622             break ;
1623         case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
1624             enc = kCFStringEncodingDOSRussian;
1625             break ;
1626
1627 //      case wxFONTENCODING_BULGARIAN :
1628 //          enc = ;
1629 //          break ;
1630
1631         case wxFONTENCODING_CP437 :
1632             enc =kCFStringEncodingDOSLatinUS ;
1633             break ;
1634         case wxFONTENCODING_CP850 :
1635             enc = kCFStringEncodingDOSLatin1;
1636             break ;
1637         case wxFONTENCODING_CP852 :
1638             enc = kCFStringEncodingDOSLatin2;
1639             break ;
1640         case wxFONTENCODING_CP855 :
1641             enc = kCFStringEncodingDOSCyrillic;
1642             break ;
1643         case wxFONTENCODING_CP866 :
1644             enc =kCFStringEncodingDOSRussian ;
1645             break ;
1646         case wxFONTENCODING_CP874 :
1647             enc = kCFStringEncodingDOSThai;
1648             break ;
1649         case wxFONTENCODING_CP932 :
1650             enc = kCFStringEncodingDOSJapanese;
1651             break ;
1652         case wxFONTENCODING_CP936 :
1653             enc =kCFStringEncodingDOSChineseSimplif ;
1654             break ;
1655         case wxFONTENCODING_CP949 :
1656             enc = kCFStringEncodingDOSKorean;
1657             break ;
1658         case wxFONTENCODING_CP950 :
1659             enc = kCFStringEncodingDOSChineseTrad;
1660             break ;
1661         case wxFONTENCODING_CP1250 :
1662             enc = kCFStringEncodingWindowsLatin2;
1663             break ;
1664         case wxFONTENCODING_CP1251 :
1665             enc =kCFStringEncodingWindowsCyrillic ;
1666             break ;
1667         case wxFONTENCODING_CP1252 :
1668             enc =kCFStringEncodingWindowsLatin1 ;
1669             break ;
1670         case wxFONTENCODING_CP1253 :
1671             enc = kCFStringEncodingWindowsGreek;
1672             break ;
1673         case wxFONTENCODING_CP1254 :
1674             enc = kCFStringEncodingWindowsLatin5;
1675             break ;
1676         case wxFONTENCODING_CP1255 :
1677             enc =kCFStringEncodingWindowsHebrew ;
1678             break ;
1679         case wxFONTENCODING_CP1256 :
1680             enc =kCFStringEncodingWindowsArabic ;
1681             break ;
1682         case wxFONTENCODING_CP1257 :
1683             enc = kCFStringEncodingWindowsBalticRim;
1684             break ;
1685 //   This only really encodes to UTF7 (if that) evidently
1686 //        case wxFONTENCODING_UTF7 :
1687 //            enc = kCFStringEncodingNonLossyASCII ;
1688 //            break ;
1689         case wxFONTENCODING_UTF8 :
1690             enc = kCFStringEncodingUTF8 ;
1691             break ;
1692         case wxFONTENCODING_EUC_JP :
1693             enc = kCFStringEncodingEUC_JP;
1694             break ;
1695         case wxFONTENCODING_UTF16 :
1696             enc = kCFStringEncodingUnicode ;
1697             break ;
1698         case wxFONTENCODING_MACROMAN :
1699             enc = kCFStringEncodingMacRoman ;
1700             break ;
1701         case wxFONTENCODING_MACJAPANESE :
1702             enc = kCFStringEncodingMacJapanese ;
1703             break ;
1704         case wxFONTENCODING_MACCHINESETRAD :
1705             enc = kCFStringEncodingMacChineseTrad ;
1706             break ;
1707         case wxFONTENCODING_MACKOREAN :
1708             enc = kCFStringEncodingMacKorean ;
1709             break ;
1710         case wxFONTENCODING_MACARABIC :
1711             enc = kCFStringEncodingMacArabic ;
1712             break ;
1713         case wxFONTENCODING_MACHEBREW :
1714             enc = kCFStringEncodingMacHebrew ;
1715             break ;
1716         case wxFONTENCODING_MACGREEK :
1717             enc = kCFStringEncodingMacGreek ;
1718             break ;
1719         case wxFONTENCODING_MACCYRILLIC :
1720             enc = kCFStringEncodingMacCyrillic ;
1721             break ;
1722         case wxFONTENCODING_MACDEVANAGARI :
1723             enc = kCFStringEncodingMacDevanagari ;
1724             break ;
1725         case wxFONTENCODING_MACGURMUKHI :
1726             enc = kCFStringEncodingMacGurmukhi ;
1727             break ;
1728         case wxFONTENCODING_MACGUJARATI :
1729             enc = kCFStringEncodingMacGujarati ;
1730             break ;
1731         case wxFONTENCODING_MACORIYA :
1732             enc = kCFStringEncodingMacOriya ;
1733             break ;
1734         case wxFONTENCODING_MACBENGALI :
1735             enc = kCFStringEncodingMacBengali ;
1736             break ;
1737         case wxFONTENCODING_MACTAMIL :
1738             enc = kCFStringEncodingMacTamil ;
1739             break ;
1740         case wxFONTENCODING_MACTELUGU :
1741             enc = kCFStringEncodingMacTelugu ;
1742             break ;
1743         case wxFONTENCODING_MACKANNADA :
1744             enc = kCFStringEncodingMacKannada ;
1745             break ;
1746         case wxFONTENCODING_MACMALAJALAM :
1747             enc = kCFStringEncodingMacMalayalam ;
1748             break ;
1749         case wxFONTENCODING_MACSINHALESE :
1750             enc = kCFStringEncodingMacSinhalese ;
1751             break ;
1752         case wxFONTENCODING_MACBURMESE :
1753             enc = kCFStringEncodingMacBurmese ;
1754             break ;
1755         case wxFONTENCODING_MACKHMER :
1756             enc = kCFStringEncodingMacKhmer ;
1757             break ;
1758         case wxFONTENCODING_MACTHAI :
1759             enc = kCFStringEncodingMacThai ;
1760             break ;
1761         case wxFONTENCODING_MACLAOTIAN :
1762             enc = kCFStringEncodingMacLaotian ;
1763             break ;
1764         case wxFONTENCODING_MACGEORGIAN :
1765             enc = kCFStringEncodingMacGeorgian ;
1766             break ;
1767         case wxFONTENCODING_MACARMENIAN :
1768             enc = kCFStringEncodingMacArmenian ;
1769             break ;
1770         case wxFONTENCODING_MACCHINESESIMP :
1771             enc = kCFStringEncodingMacChineseSimp ;
1772             break ;
1773         case wxFONTENCODING_MACTIBETAN :
1774             enc = kCFStringEncodingMacTibetan ;
1775             break ;
1776         case wxFONTENCODING_MACMONGOLIAN :
1777             enc = kCFStringEncodingMacMongolian ;
1778             break ;
1779         case wxFONTENCODING_MACETHIOPIC :
1780             enc = kCFStringEncodingMacEthiopic ;
1781             break ;
1782         case wxFONTENCODING_MACCENTRALEUR :
1783             enc = kCFStringEncodingMacCentralEurRoman ;
1784             break ;
1785         case wxFONTENCODING_MACVIATNAMESE :
1786             enc = kCFStringEncodingMacVietnamese ;
1787             break ;
1788         case wxFONTENCODING_MACARABICEXT :
1789             enc = kCFStringEncodingMacExtArabic ;
1790             break ;
1791         case wxFONTENCODING_MACSYMBOL :
1792             enc = kCFStringEncodingMacSymbol ;
1793             break ;
1794         case wxFONTENCODING_MACDINGBATS :
1795             enc = kCFStringEncodingMacDingbats ;
1796             break ;
1797         case wxFONTENCODING_MACTURKISH :
1798             enc = kCFStringEncodingMacTurkish ;
1799             break ;
1800         case wxFONTENCODING_MACCROATIAN :
1801             enc = kCFStringEncodingMacCroatian ;
1802             break ;
1803         case wxFONTENCODING_MACICELANDIC :
1804             enc = kCFStringEncodingMacIcelandic ;
1805             break ;
1806         case wxFONTENCODING_MACROMANIAN :
1807             enc = kCFStringEncodingMacRomanian ;
1808             break ;
1809         case wxFONTENCODING_MACCELTIC :
1810             enc = kCFStringEncodingMacCeltic ;
1811             break ;
1812         case wxFONTENCODING_MACGAELIC :
1813             enc = kCFStringEncodingMacGaelic ;
1814             break ;
1815 //      case wxFONTENCODING_MACKEYBOARD :
1816 //          enc = kCFStringEncodingMacKeyboardGlyphs ;
1817 //          break ;
1818         default :
1819             // because gcc is picky
1820             break ;
1821     } ;
1822     return enc ;
1823 }
1824
1825 class wxMBConv_cocoa : public wxMBConv
1826 {
1827 public:
1828     wxMBConv_cocoa()
1829     {
1830         Init(CFStringGetSystemEncoding()) ;
1831     }
1832
1833     wxMBConv_cocoa(const wxChar* name)
1834     {
1835         Init( wxCFStringEncFromFontEnc(wxFontMapper::Get()->CharsetToEncoding(name, false) ) ) ;
1836     }
1837
1838     wxMBConv_cocoa(wxFontEncoding encoding)
1839     {
1840         Init( wxCFStringEncFromFontEnc(encoding) );
1841     }
1842
1843     ~wxMBConv_cocoa()
1844     {
1845     }
1846
1847     void Init( CFStringEncoding encoding)
1848     {
1849         m_encoding = encoding ;
1850     }
1851
1852     size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
1853     {
1854         wxASSERT(szUnConv);
1855
1856         CFStringRef theString = CFStringCreateWithBytes (
1857                                                 NULL, //the allocator
1858                                                 (const UInt8*)szUnConv,
1859                                                 strlen(szUnConv),
1860                                                 m_encoding,
1861                                                 false //no BOM/external representation
1862                                                 );
1863
1864         wxASSERT(theString);
1865
1866         size_t nOutLength = CFStringGetLength(theString);
1867
1868         if (szOut == NULL)
1869         {
1870             CFRelease(theString);
1871             return nOutLength;
1872         }
1873
1874         CFRange theRange = { 0, nOutSize };
1875
1876 #if SIZEOF_WCHAR_T == 4
1877         UniChar* szUniCharBuffer = new UniChar[nOutSize];
1878 #endif
1879
1880         CFStringGetCharacters(theString, theRange, szUniCharBuffer);
1881
1882         CFRelease(theString);
1883
1884         szUniCharBuffer[nOutLength] = '\0' ;
1885
1886 #if SIZEOF_WCHAR_T == 4
1887         wxMBConvUTF16 converter ;
1888         converter.MB2WC(szOut, (const char*)szUniCharBuffer , nOutSize ) ;
1889         delete[] szUniCharBuffer;
1890 #endif
1891
1892         return nOutLength;
1893     }
1894
1895     size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
1896     {
1897         wxASSERT(szUnConv);
1898
1899         size_t nRealOutSize;
1900         size_t nBufSize = wxWcslen(szUnConv);
1901         UniChar* szUniBuffer = (UniChar*) szUnConv;
1902
1903 #if SIZEOF_WCHAR_T == 4
1904         wxMBConvUTF16BE converter ;
1905         nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
1906         szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
1907         converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
1908         nBufSize /= sizeof(UniChar);
1909 #endif
1910
1911         CFStringRef theString = CFStringCreateWithCharactersNoCopy(
1912                                 NULL, //allocator
1913                                 szUniBuffer,
1914                                 nBufSize,
1915                                 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
1916                             );
1917
1918         wxASSERT(theString);
1919
1920         //Note that CER puts a BOM when converting to unicode
1921         //so we  check and use getchars instead in that case
1922         if (m_encoding == kCFStringEncodingUnicode)
1923         {
1924             if (szOut != NULL)
1925                 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
1926
1927             nRealOutSize = CFStringGetLength(theString) + 1;
1928         }
1929         else
1930         {
1931             CFStringGetBytes(
1932                 theString,
1933                 CFRangeMake(0, CFStringGetLength(theString)),
1934                 m_encoding,
1935                 0, //what to put in characters that can't be converted -
1936                     //0 tells CFString to return NULL if it meets such a character
1937                 false, //not an external representation
1938                 (UInt8*) szOut,
1939                 nOutSize,
1940                 (CFIndex*) &nRealOutSize
1941                         );
1942         }
1943
1944         CFRelease(theString);
1945
1946 #if SIZEOF_WCHAR_T == 4
1947         delete[] szUniBuffer;
1948 #endif
1949
1950         return  nRealOutSize - 1;
1951     }
1952
1953     bool IsOk() const
1954     {
1955         return m_encoding != kCFStringEncodingInvalidId &&
1956               CFStringIsEncodingAvailable(m_encoding);
1957     }
1958
1959 private:
1960     CFStringEncoding m_encoding ;
1961 };
1962
1963 #endif // defined(__WXCOCOA__)
1964
1965 // ============================================================================
1966 // Mac conversion classes
1967 // ============================================================================
1968
1969 #if defined(__WXMAC__) && defined(TARGET_CARBON)
1970
1971 class wxMBConv_mac : public wxMBConv
1972 {
1973 public:
1974     wxMBConv_mac()
1975     {
1976         Init(CFStringGetSystemEncoding()) ;
1977     }
1978
1979     wxMBConv_mac(const wxChar* name)
1980     {
1981         Init( wxMacGetSystemEncFromFontEnc(wxFontMapper::Get()->CharsetToEncoding(name, false) ) ) ;
1982     }
1983
1984     wxMBConv_mac(wxFontEncoding encoding)
1985     {
1986         Init( wxMacGetSystemEncFromFontEnc(encoding) );
1987     }
1988
1989     ~wxMBConv_mac()
1990     {
1991         OSStatus status = noErr ;
1992         status = TECDisposeConverter(m_MB2WC_converter);
1993         status = TECDisposeConverter(m_WC2MB_converter);
1994     }
1995
1996
1997     void Init( TextEncodingBase encoding)
1998     {
1999         OSStatus status = noErr ;
2000         m_char_encoding = encoding ;
2001         m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2002
2003         status = TECCreateConverter(&m_MB2WC_converter,
2004                                     m_char_encoding,
2005                                     m_unicode_encoding);
2006         status = TECCreateConverter(&m_WC2MB_converter,
2007                                     m_unicode_encoding,
2008                                     m_char_encoding);
2009     }
2010
2011     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2012     {
2013         OSStatus status = noErr ;
2014         ByteCount byteOutLen ;
2015         ByteCount byteInLen = strlen(psz) ;
2016         wchar_t *tbuf = NULL ;
2017         UniChar* ubuf = NULL ;
2018         size_t res = 0 ;
2019
2020         if (buf == NULL)
2021         {
2022             //apple specs say at least 32
2023             n = 32 ;
2024             tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2025         }
2026         ByteCount byteBufferLen = n * sizeof( UniChar ) ;
2027 #if SIZEOF_WCHAR_T == 4
2028         ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
2029 #else
2030         ubuf = (UniChar*) (buf ? buf : tbuf) ;
2031 #endif
2032         status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2033           (TextPtr) ubuf , byteBufferLen, &byteOutLen);
2034 #if SIZEOF_WCHAR_T == 4
2035         // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2036         // is not properly terminated we get random characters at the end
2037         ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
2038         wxMBConvUTF16BE converter ;
2039         res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2040         free( ubuf ) ;
2041 #else
2042         res = byteOutLen / sizeof( UniChar ) ;
2043 #endif
2044         if ( buf == NULL )
2045              free(tbuf) ;
2046
2047         if ( buf  && res < n)
2048             buf[res] = 0;
2049
2050         return res ;
2051     }
2052
2053     size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2054     {
2055         OSStatus status = noErr ;
2056         ByteCount byteOutLen ;
2057         ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2058
2059         char *tbuf = NULL ;
2060
2061         if (buf == NULL)
2062         {
2063             //apple specs say at least 32
2064             n = 32;
2065             tbuf = (char*) malloc( n ) ;
2066         }
2067
2068         ByteCount byteBufferLen = n ;
2069         UniChar* ubuf = NULL ;
2070 #if SIZEOF_WCHAR_T == 4
2071         wxMBConvUTF16BE converter ;
2072         size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2073         byteInLen = unicharlen ;
2074         ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2075         converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
2076 #else
2077         ubuf = (UniChar*) psz ;
2078 #endif
2079         status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2080             (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
2081 #if SIZEOF_WCHAR_T == 4
2082         free( ubuf ) ;
2083 #endif
2084         if ( buf == NULL )
2085             free(tbuf) ;
2086
2087         size_t res = byteOutLen ;
2088         if ( buf  && res < n)
2089         {
2090             buf[res] = 0;
2091
2092             //we need to double-trip to verify it didn't insert any ? in place
2093             //of bogus characters
2094             wxWCharBuffer wcBuf(n);
2095             size_t pszlen = wxWcslen(psz);
2096             if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
2097                         wxWcslen(wcBuf) != pszlen ||
2098                         memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2099             {
2100                 // we didn't obtain the same thing we started from, hence
2101                 // the conversion was lossy and we consider that it failed
2102                 return (size_t)-1;
2103             }
2104         }
2105
2106         return res ;
2107     }
2108
2109     bool IsOk() const
2110         { return m_MB2WC_converter !=  NULL && m_WC2MB_converter != NULL  ; }
2111
2112 private:
2113     TECObjectRef m_MB2WC_converter ;
2114     TECObjectRef m_WC2MB_converter ;
2115
2116     TextEncodingBase m_char_encoding ;
2117     TextEncodingBase m_unicode_encoding ;
2118 };
2119
2120 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2121
2122 // ============================================================================
2123 // wxEncodingConverter based conversion classes
2124 // ============================================================================
2125
2126 #if wxUSE_FONTMAP
2127
2128 class wxMBConv_wxwin : public wxMBConv
2129 {
2130 private:
2131     void Init()
2132     {
2133         m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2134                w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2135     }
2136
2137 public:
2138     // temporarily just use wxEncodingConverter stuff,
2139     // so that it works while a better implementation is built
2140     wxMBConv_wxwin(const wxChar* name)
2141     {
2142         if (name)
2143             m_enc = wxFontMapper::Get()->CharsetToEncoding(name, false);
2144         else
2145             m_enc = wxFONTENCODING_SYSTEM;
2146
2147         Init();
2148     }
2149
2150     wxMBConv_wxwin(wxFontEncoding enc)
2151     {
2152         m_enc = enc;
2153
2154         Init();
2155     }
2156
2157     size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
2158     {
2159         size_t inbuf = strlen(psz);
2160         if (buf)
2161             m2w.Convert(psz,buf);
2162         return inbuf;
2163     }
2164
2165     size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
2166     {
2167         const size_t inbuf = wxWcslen(psz);
2168         if (buf)
2169             w2m.Convert(psz,buf);
2170
2171         return inbuf;
2172     }
2173
2174     bool IsOk() const { return m_ok; }
2175
2176 public:
2177     wxFontEncoding m_enc;
2178     wxEncodingConverter m2w, w2m;
2179
2180     // were we initialized successfully?
2181     bool m_ok;
2182
2183     DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
2184 };
2185
2186 #endif // wxUSE_FONTMAP
2187
2188 // ============================================================================
2189 // wxCSConv implementation
2190 // ============================================================================
2191
2192 void wxCSConv::Init()
2193 {
2194     m_name = NULL;
2195     m_convReal =  NULL;
2196     m_deferred = true;
2197 }
2198
2199 wxCSConv::wxCSConv(const wxChar *charset)
2200 {
2201     Init();
2202
2203     if ( charset )
2204     {
2205         SetName(charset);
2206     }
2207
2208     m_encoding = wxFONTENCODING_SYSTEM;
2209 }
2210
2211 wxCSConv::wxCSConv(wxFontEncoding encoding)
2212 {
2213     if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
2214     {
2215         wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2216
2217         encoding = wxFONTENCODING_SYSTEM;
2218     }
2219
2220     Init();
2221
2222     m_encoding = encoding;
2223 }
2224
2225 wxCSConv::~wxCSConv()
2226 {
2227     Clear();
2228 }
2229
2230 wxCSConv::wxCSConv(const wxCSConv& conv)
2231         : wxMBConv()
2232 {
2233     Init();
2234
2235     SetName(conv.m_name);
2236     m_encoding = conv.m_encoding;
2237 }
2238
2239 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2240 {
2241     Clear();
2242
2243     SetName(conv.m_name);
2244     m_encoding = conv.m_encoding;
2245
2246     return *this;
2247 }
2248
2249 void wxCSConv::Clear()
2250 {
2251     free(m_name);
2252     delete m_convReal;
2253
2254     m_name = NULL;
2255     m_convReal = NULL;
2256 }
2257
2258 void wxCSConv::SetName(const wxChar *charset)
2259 {
2260     if (charset)
2261     {
2262         m_name = wxStrdup(charset);
2263         m_deferred = true;
2264     }
2265 }
2266
2267 wxMBConv *wxCSConv::DoCreate() const
2268 {
2269     // check for the special case of ASCII or ISO8859-1 charset: as we have
2270     // special knowledge of it anyhow, we don't need to create a special
2271     // conversion object
2272     if ( m_encoding == wxFONTENCODING_ISO8859_1 )
2273     {
2274         // don't convert at all
2275         return NULL;
2276     }
2277
2278     // we trust OS to do conversion better than we can so try external
2279     // conversion methods first
2280     //
2281     // the full order is:
2282     //      1. OS conversion (iconv() under Unix or Win32 API)
2283     //      2. hard coded conversions for UTF
2284     //      3. wxEncodingConverter as fall back
2285
2286     // step (1)
2287 #ifdef HAVE_ICONV
2288 #if !wxUSE_FONTMAP
2289     if ( m_name )
2290 #endif // !wxUSE_FONTMAP
2291     {
2292         wxString name(m_name);
2293
2294 #if wxUSE_FONTMAP
2295         if ( name.empty() )
2296             name = wxFontMapper::Get()->GetEncodingName(m_encoding);
2297 #endif // wxUSE_FONTMAP
2298
2299         wxMBConv_iconv *conv = new wxMBConv_iconv(name);
2300         if ( conv->IsOk() )
2301             return conv;
2302
2303         delete conv;
2304     }
2305 #endif // HAVE_ICONV
2306
2307 #ifdef wxHAVE_WIN32_MB2WC
2308     {
2309 #if wxUSE_FONTMAP
2310         wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2311                                       : new wxMBConv_win32(m_encoding);
2312         if ( conv->IsOk() )
2313             return conv;
2314
2315         delete conv;
2316 #else
2317         return NULL;
2318 #endif
2319     }
2320 #endif // wxHAVE_WIN32_MB2WC
2321 #if defined(__WXMAC__)
2322     {
2323         if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ) )
2324         {
2325
2326             wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2327                                         : new wxMBConv_mac(m_encoding);
2328             if ( conv->IsOk() )
2329                  return conv;
2330
2331             delete conv;
2332         }
2333     }
2334 #endif
2335 #if defined(__WXCOCOA__)
2336     {
2337         if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2338         {
2339
2340             wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2341                                           : new wxMBConv_cocoa(m_encoding);
2342             if ( conv->IsOk() )
2343                  return conv;
2344
2345             delete conv;
2346         }
2347     }
2348 #endif
2349     // step (2)
2350     wxFontEncoding enc = m_encoding;
2351 #if wxUSE_FONTMAP
2352     if ( enc == wxFONTENCODING_SYSTEM && m_name )
2353     {
2354         // use "false" to suppress interactive dialogs -- we can be called from
2355         // anywhere and popping up a dialog from here is the last thing we want to
2356         // do
2357         enc = wxFontMapper::Get()->CharsetToEncoding(m_name, false);
2358     }
2359 #endif // wxUSE_FONTMAP
2360
2361     switch ( enc )
2362     {
2363         case wxFONTENCODING_UTF7:
2364              return new wxMBConvUTF7;
2365
2366         case wxFONTENCODING_UTF8:
2367              return new wxMBConvUTF8;
2368
2369         case wxFONTENCODING_UTF16BE:
2370              return new wxMBConvUTF16BE;
2371
2372         case wxFONTENCODING_UTF16LE:
2373              return new wxMBConvUTF16LE;
2374
2375         case wxFONTENCODING_UTF32BE:
2376              return new wxMBConvUTF32BE;
2377
2378         case wxFONTENCODING_UTF32LE:
2379              return new wxMBConvUTF32LE;
2380
2381         default:
2382              // nothing to do but put here to suppress gcc warnings
2383              ;
2384     }
2385
2386     // step (3)
2387 #if wxUSE_FONTMAP
2388     {
2389         wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2390                                       : new wxMBConv_wxwin(m_encoding);
2391         if ( conv->IsOk() )
2392             return conv;
2393
2394         delete conv;
2395     }
2396 #endif // wxUSE_FONTMAP
2397
2398     // NB: This is a hack to prevent deadlock. What could otherwise happen
2399     //     in Unicode build: wxConvLocal creation ends up being here
2400     //     because of some failure and logs the error. But wxLog will try to
2401     //     attach timestamp, for which it will need wxConvLocal (to convert
2402     //     time to char* and then wchar_t*), but that fails, tries to log
2403     //     error, but wxLog has a (already locked) critical section that
2404     //     guards static buffer.
2405     static bool alreadyLoggingError = false;
2406     if (!alreadyLoggingError)
2407     {
2408         alreadyLoggingError = true;
2409         wxLogError(_("Cannot convert from the charset '%s'!"),
2410                    m_name ? m_name
2411                       :
2412 #if wxUSE_FONTMAP
2413                          wxFontMapper::GetEncodingDescription(m_encoding).c_str()
2414 #else // !wxUSE_FONTMAP
2415                          wxString::Format(_("encoding %s"), m_encoding).c_str()
2416 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2417               );
2418         alreadyLoggingError = false;
2419     }
2420
2421     return NULL;
2422 }
2423
2424 void wxCSConv::CreateConvIfNeeded() const
2425 {
2426     if ( m_deferred )
2427     {
2428         wxCSConv *self = (wxCSConv *)this; // const_cast
2429
2430 #if wxUSE_INTL
2431         // if we don't have neither the name nor the encoding, use the default
2432         // encoding for this system
2433         if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2434         {
2435             self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
2436         }
2437 #endif // wxUSE_INTL
2438
2439         self->m_convReal = DoCreate();
2440         self->m_deferred = false;
2441     }
2442 }
2443
2444 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2445 {
2446     CreateConvIfNeeded();
2447
2448     if (m_convReal)
2449         return m_convReal->MB2WC(buf, psz, n);
2450
2451     // latin-1 (direct)
2452     size_t len = strlen(psz);
2453
2454     if (buf)
2455     {
2456         for (size_t c = 0; c <= len; c++)
2457             buf[c] = (unsigned char)(psz[c]);
2458     }
2459
2460     return len;
2461 }
2462
2463 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2464 {
2465     CreateConvIfNeeded();
2466
2467     if (m_convReal)
2468         return m_convReal->WC2MB(buf, psz, n);
2469
2470     // latin-1 (direct)
2471     const size_t len = wxWcslen(psz);
2472     if (buf)
2473     {
2474         for (size_t c = 0; c <= len; c++)
2475         {
2476             if (psz[c] > 0xFF)
2477                 return (size_t)-1;
2478             buf[c] = (char)psz[c];
2479         }
2480     }
2481     else
2482     {
2483         for (size_t c = 0; c <= len; c++)
2484         {
2485             if (psz[c] > 0xFF)
2486                 return (size_t)-1;
2487         }
2488     }
2489
2490     return len;
2491 }
2492
2493 // ----------------------------------------------------------------------------
2494 // globals
2495 // ----------------------------------------------------------------------------
2496
2497 #ifdef __WINDOWS__
2498     static wxMBConv_win32 wxConvLibcObj;
2499 #elif defined(__WXMAC__) && !defined(__MACH__)
2500     static wxMBConv_mac wxConvLibcObj ;
2501 #else
2502     static wxMBConvLibc wxConvLibcObj;
2503 #endif
2504
2505 static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
2506 static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
2507 static wxMBConvUTF7 wxConvUTF7Obj;
2508 static wxMBConvUTF8 wxConvUTF8Obj;
2509
2510
2511 WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
2512 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
2513 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
2514 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
2515 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
2516 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
2517
2518 #else // !wxUSE_WCHAR_T
2519
2520 // stand-ins in absence of wchar_t
2521 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
2522                                 wxConvISO8859_1,
2523                                 wxConvLocal,
2524                                 wxConvUTF8;
2525
2526 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
2527
2528