src/common/strconv.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        strconv.cpp
   3 // Purpose:     Unicode conversion classes
   4 // Author:      Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
   5 //              Ryan Norton, Fredrik Roubert (UTF7)
   6 // Modified by:
   7 // Created:     29/01/98
   8 // RCS-ID:      $Id$
   9 // Copyright:   (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
  10 //              (c) 2000-2003 Vadim Zeitlin
  11 //              (c) 2004 Ryan Norton, Fredrik Roubert
  12 // Licence:     wxWindows licence
  13 /////////////////////////////////////////////////////////////////////////////
  14
  15 // ============================================================================
  16 // declarations
  17 // ============================================================================
  18
  19 // ----------------------------------------------------------------------------
  20 // headers
  21 // ----------------------------------------------------------------------------
  22
  23 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
  24   #pragma implementation "strconv.h"
  25 #endif
  26
  27 // For compilers that support precompilation, includes "wx.h".
  28 #include "wx/wxprec.h"
  29
  30 #ifdef __BORLANDC__
  31   #pragma hdrstop
  32 #endif
  33
  34 #ifndef WX_PRECOMP
  35     #include "wx/intl.h"
  36     #include "wx/log.h"
  37 #endif // WX_PRECOMP
  38
  39 #include "wx/strconv.h"
  40
  41 #if wxUSE_WCHAR_T
  42
  43 #ifdef __WXMSW__
  44     #include "wx/msw/private.h"
  45 #endif
  46
  47 #ifdef __WINDOWS__
  48     #include "wx/msw/missing.h"
  49 #endif
  50
  51 #ifndef __WXWINCE__
  52 #include <errno.h>
  53 #endif
  54
  55 #include <ctype.h>
  56 #include <string.h>
  57 #include <stdlib.h>
  58
  59 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
  60     #define wxHAVE_WIN32_MB2WC
  61 #endif // __WIN32__ but !__WXMICROWIN__
  62
  63 // ----------------------------------------------------------------------------
  64 // headers
  65 // ----------------------------------------------------------------------------
  66
  67 #ifdef __SALFORDC__
  68     #include <clib.h>
  69 #endif
  70
  71 #ifdef HAVE_ICONV
  72     #include <iconv.h>
  73 #endif
  74
  75 #include "wx/encconv.h"
  76 #include "wx/fontmap.h"
  77 #include "wx/utils.h"
  78
  79 #ifdef __WXMAC__
  80 #include <ATSUnicode.h>
  81 #include <TextCommon.h>
  82 #include <TextEncodingConverter.h>
  83
  84 #include  "wx/mac/private.h"  // includes mac headers
  85 #endif
  86 // ----------------------------------------------------------------------------
  87 // macros
  88 // ----------------------------------------------------------------------------
  89
  90 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
  91 #define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
  92
  93 #if SIZEOF_WCHAR_T == 4
  94     #define WC_NAME         "UCS4"
  95     #define WC_BSWAP         BSWAP_UCS4
  96     #ifdef WORDS_BIGENDIAN
  97       #define WC_NAME_BEST  "UCS-4BE"
  98     #else
  99       #define WC_NAME_BEST  "UCS-4LE"
 100     #endif
 101 #elif SIZEOF_WCHAR_T == 2
 102     #define WC_NAME         "UTF16"
 103     #define WC_BSWAP         BSWAP_UTF16
 104     #define WC_UTF16
 105     #ifdef WORDS_BIGENDIAN
 106       #define WC_NAME_BEST  "UTF-16BE"
 107     #else
 108       #define WC_NAME_BEST  "UTF-16LE"
 109     #endif
 110 #else // sizeof(wchar_t) != 2 nor 4
 111     // does this ever happen?
 112     #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
 113 #endif
 114
 115 // ============================================================================
 116 // implementation
 117 // ============================================================================
 118
 119 // ----------------------------------------------------------------------------
 120 // UTF-16 en/decoding to/from UCS-4
 121 // ----------------------------------------------------------------------------
 122
 123
 124 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
 125 {
 126     if (input<=0xffff)
 127     {
 128         if (output)
 129             *output = (wxUint16) input;
 130         return 1;
 131     }
 132     else if (input>=0x110000)
 133     {
 134         return (size_t)-1;
 135     }
 136     else
 137     {
 138         if (output)
 139         {
 140             *output++ = (wxUint16) ((input >> 10)+0xd7c0);
 141             *output = (wxUint16) ((input&0x3ff)+0xdc00);
 142         }
 143         return 2;
 144     }
 145 }
 146
 147 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
 148 {
 149     if ((*input<0xd800) || (*input>0xdfff))
 150     {
 151         output = *input;
 152         return 1;
 153     }
 154     else if ((input[1]<0xdc00) || (input[1]>=0xdfff))
 155     {
 156         output = *input;
 157         return (size_t)-1;
 158     }
 159     else
 160     {
 161         output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
 162         return 2;
 163     }
 164 }
 165
 166
 167 // ----------------------------------------------------------------------------
 168 // wxMBConv
 169 // ----------------------------------------------------------------------------
 170
 171 wxMBConv::~wxMBConv()
 172 {
 173     // nothing to do here (necessary for Darwin linking probably)
 174 }
 175
 176 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
 177 {
 178     if ( psz )
 179     {
 180         // calculate the length of the buffer needed first
 181         size_t nLen = MB2WC(NULL, psz, 0);
 182         if ( nLen != (size_t)-1 )
 183         {
 184             // now do the actual conversion
 185             wxWCharBuffer buf(nLen);
 186             nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
 187             if ( nLen != (size_t)-1 )
 188             {
 189                 return buf;
 190             }
 191         }
 192     }
 193
 194     wxWCharBuffer buf((wchar_t *)NULL);
 195
 196     return buf;
 197 }
 198
 199 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
 200 {
 201     if ( pwz )
 202     {
 203         size_t nLen = WC2MB(NULL, pwz, 0);
 204         if ( nLen != (size_t)-1 )
 205         {
 206             wxCharBuffer buf(nLen+3);       // space for a wxUint32 trailing zero
 207             nLen = WC2MB(buf.data(), pwz, nLen + 4);
 208             if ( nLen != (size_t)-1 )
 209             {
 210                 return buf;
 211             }
 212         }
 213     }
 214
 215     wxCharBuffer buf((char *)NULL);
 216
 217     return buf;
 218 }
 219
 220 size_t wxMBConv::MB2WC(wchar_t* szBuffer, const char* szString,
 221                        size_t outsize, size_t nStringLen) const
 222 {
 223     const char* szEnd = szString + nStringLen + 1;
 224     const char* szPos = szString;
 225     const char* szStart = szPos;
 226
 227     size_t nActualLength = 0;
 228
 229     //Convert the string until the length() is reached, continuing the
 230     //loop every time a null character is reached
 231     while(szPos != szEnd)
 232     {
 233         wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
 234
 235         //Get the length of the current (sub)string
 236         size_t nLen = MB2WC(NULL, szPos, 0);
 237
 238         //Invalid conversion?
 239         if( nLen == (size_t)-1 )
 240             return nLen;
 241
 242         //Increase the actual length (+1 for current null character)
 243         nActualLength += nLen + 1;
 244
 245         //Only copy data in if buffer size is big enough
 246         if (szBuffer != NULL &&
 247             nActualLength <= outsize)
 248         {
 249             //Convert the current (sub)string
 250             if ( MB2WC(&szBuffer[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
 251                 return (size_t)-1;
 252         }
 253
 254         //Increment to next (sub)string
 255         //Note that we have to use strlen here instead of nLen
 256         //here because XX2XX gives us the size of the output buffer,
 257         //not neccessarly the length of the string
 258         szPos += strlen(szPos) + 1;
 259     }
 260
 261     return nActualLength - 1; //success - return actual length
 262 }
 263
 264 size_t wxMBConv::WC2MB(char* szBuffer, const wchar_t* szString,
 265                        size_t outsize, size_t nStringLen) const
 266 {
 267     const wchar_t* szEnd = szString + nStringLen + 1;
 268     const wchar_t* szPos = szString;
 269     const wchar_t* szStart = szPos;
 270
 271     size_t nActualLength = 0;
 272
 273     //Convert the string until the length() is reached, continuing the
 274     //loop every time a null character is reached
 275     while(szPos != szEnd)
 276     {
 277         wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
 278
 279         //Get the length of the current (sub)string
 280         size_t nLen = WC2MB(NULL, szPos, 0);
 281
 282         //Invalid conversion?
 283         if( nLen == (size_t)-1 )
 284             return nLen;
 285
 286         //Increase the actual length (+1 for current null character)
 287         nActualLength += nLen + 1;
 288
 289         //Only copy data in if buffer size is big enough
 290         if (szBuffer != NULL &&
 291             nActualLength <= outsize)
 292         {
 293             //Convert the current (sub)string
 294             if(WC2MB(&szBuffer[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
 295                 return (size_t)-1;
 296         }
 297
 298         //Increment to next (sub)string
 299         //Note that we have to use wxWcslen here instead of nLen
 300         //here because XX2XX gives us the size of the output buffer,
 301         //not neccessarly the length of the string
 302         szPos += wxWcslen(szPos) + 1;
 303     }
 304
 305     return nActualLength - 1;  //success - return actual length
 306 }
 307
 308 // ----------------------------------------------------------------------------
 309 // wxMBConvLibc
 310 // ----------------------------------------------------------------------------
 311
 312 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 313 {
 314     return wxMB2WC(buf, psz, n);
 315 }
 316
 317 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 318 {
 319     return wxWC2MB(buf, psz, n);
 320 }
 321 // ----------------------------------------------------------------------------
 322 // UTF-7
 323 // ----------------------------------------------------------------------------
 324
 325 // Implementation (C) 2004 Fredrik Roubert
 326
 327 //
 328 // BASE64 decoding table
 329 //
 330 static const unsigned char utf7unb64[] =
 331 {
 332     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 333     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 334     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 335     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 336     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 337     0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
 338     0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
 339     0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 340     0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
 341     0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
 342     0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
 343     0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
 344     0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
 345     0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
 346     0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
 347     0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
 348     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 349     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 350     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 351     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 352     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 353     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 354     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 355     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 356     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 357     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 358     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 359     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 360     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 361     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 362     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 363     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 364 };
 365
 366 size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 367 {
 368
 369     size_t len = 0;
 370
 371     while (*psz && ((!buf) || (len < n)))
 372     {
 373         unsigned char cc = *psz++;
 374         if (cc != '+')
 375         {
 376             // plain ASCII char
 377             if (buf)
 378                 *buf++ = cc;
 379             len++;
 380         }
 381         else if (*psz == '-')
 382         {
 383             // encoded plus sign
 384             if (buf)
 385                 *buf++ = cc;
 386             len++;
 387             psz++;
 388         }
 389         else
 390         {
 391             // BASE64 encoded string
 392             bool lsb;
 393             unsigned char c;
 394             unsigned int d, l;
 395             for (lsb = false, d = 0, l = 0;
 396                 (cc = utf7unb64[(unsigned char)*psz]) != 0xff; psz++)
 397             {
 398                 d <<= 6;
 399                 d += cc;
 400                 for (l += 6; l >= 8; lsb = !lsb)
 401                 {
 402                     c = (d >> (l -= 8)) % 256;
 403                     if (lsb)
 404                     {
 405                         if (buf)
 406                             *buf++ |= c;
 407                         len ++;
 408                     }
 409                     else
 410                         if (buf)
 411                             *buf = c << 8;
 412                 }
 413             }
 414             if (*psz == '-')
 415                 psz++;
 416         }
 417     }
 418     if (buf && (len < n))
 419         *buf = 0;
 420     return len;
 421 }
 422
 423 //
 424 // BASE64 encoding table
 425 //
 426 static const unsigned char utf7enb64[] =
 427 {
 428     'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
 429     'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
 430     'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
 431     'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
 432     'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
 433     'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
 434     'w', 'x', 'y', 'z', '0', '1', '2', '3',
 435     '4', '5', '6', '7', '8', '9', '+', '/'
 436 };
 437
 438 //
 439 // UTF-7 encoding table
 440 //
 441 // 0 - Set D (directly encoded characters)
 442 // 1 - Set O (optional direct characters)
 443 // 2 - whitespace characters (optional)
 444 // 3 - special characters
 445 //
 446 static const unsigned char utf7encode[128] =
 447 {
 448     3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
 449     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 450     2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
 451     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
 452     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 453     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
 454     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 455     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
 456 };
 457
 458 size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t
 459 *psz, size_t n) const
 460 {
 461
 462
 463     size_t len = 0;
 464
 465     while (*psz && ((!buf) || (len < n)))
 466     {
 467         wchar_t cc = *psz++;
 468         if (cc < 0x80 && utf7encode[cc] < 1)
 469         {
 470             // plain ASCII char
 471             if (buf)
 472                 *buf++ = (char)cc;
 473             len++;
 474         }
 475 #ifndef WC_UTF16
 476         else if (cc > 0xffff)
 477         {
 478             // no surrogate pair generation (yet?)
 479             return (size_t)-1;
 480         }
 481 #endif
 482         else
 483         {
 484             if (buf)
 485                 *buf++ = '+';
 486             len++;
 487             if (cc != '+')
 488             {
 489                 // BASE64 encode string
 490                 unsigned int lsb, d, l;
 491                 for (d = 0, l = 0;; psz++)
 492                 {
 493                     for (lsb = 0; lsb < 2; lsb ++)
 494                     {
 495                         d <<= 8;
 496                         d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
 497
 498                         for (l += 8; l >= 6; )
 499                         {
 500                             l -= 6;
 501                             if (buf)
 502                                 *buf++ = utf7enb64[(d >> l) % 64];
 503                             len++;
 504                         }
 505                     }
 506                     cc = *psz;
 507                     if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
 508                         break;
 509                 }
 510                 if (l != 0)
 511                 {
 512                     if (buf)
 513                         *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
 514                     len++;
 515                 }
 516             }
 517             if (buf)
 518                 *buf++ = '-';
 519             len++;
 520         }
 521     }
 522     if (buf && (len < n))
 523         *buf = 0;
 524     return len;
 525 }
 526
 527 // ----------------------------------------------------------------------------
 528 // UTF-8
 529 // ----------------------------------------------------------------------------
 530
 531 static wxUint32 utf8_max[]=
 532     { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
 533
 534 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 535 {
 536     size_t len = 0;
 537
 538     while (*psz && ((!buf) || (len < n)))
 539     {
 540         unsigned char cc = *psz++, fc = cc;
 541         unsigned cnt;
 542         for (cnt = 0; fc & 0x80; cnt++)
 543             fc <<= 1;
 544         if (!cnt)
 545         {
 546             // plain ASCII char
 547             if (buf)
 548                 *buf++ = cc;
 549             len++;
 550         }
 551         else
 552         {
 553             cnt--;
 554             if (!cnt)
 555             {
 556                 // invalid UTF-8 sequence
 557                 return (size_t)-1;
 558             }
 559             else
 560             {
 561                 unsigned ocnt = cnt - 1;
 562                 wxUint32 res = cc & (0x3f >> cnt);
 563                 while (cnt--)
 564                 {
 565                     cc = *psz++;
 566                     if ((cc & 0xC0) != 0x80)
 567                     {
 568                         // invalid UTF-8 sequence
 569                         return (size_t)-1;
 570                     }
 571                     res = (res << 6) | (cc & 0x3f);
 572                 }
 573                 if (res <= utf8_max[ocnt])
 574                 {
 575                     // illegal UTF-8 encoding
 576                     return (size_t)-1;
 577                 }
 578 #ifdef WC_UTF16
 579                 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 580                 size_t pa = encode_utf16(res, (wxUint16 *)buf);
 581                 if (pa == (size_t)-1)
 582                   return (size_t)-1;
 583                 if (buf)
 584                     buf += pa;
 585                 len += pa;
 586 #else // !WC_UTF16
 587                 if (buf)
 588                     *buf++ = res;
 589                 len++;
 590 #endif // WC_UTF16/!WC_UTF16
 591             }
 592         }
 593     }
 594     if (buf && (len < n))
 595         *buf = 0;
 596     return len;
 597 }
 598
 599 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 600 {
 601     size_t len = 0;
 602
 603     while (*psz && ((!buf) || (len < n)))
 604     {
 605         wxUint32 cc;
 606 #ifdef WC_UTF16
 607         // cast is ok for WC_UTF16
 608         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
 609         psz += (pa == (size_t)-1) ? 1 : pa;
 610 #else
 611         cc=(*psz++) & 0x7fffffff;
 612 #endif
 613         unsigned cnt;
 614         for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
 615         if (!cnt)
 616         {
 617             // plain ASCII char
 618             if (buf)
 619                 *buf++ = (char) cc;
 620             len++;
 621         }
 622
 623         else
 624         {
 625             len += cnt + 1;
 626             if (buf)
 627             {
 628                 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
 629                 while (cnt--)
 630                     *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
 631             }
 632         }
 633     }
 634
 635     if (buf && (len<n)) *buf = 0;
 636
 637     return len;
 638 }
 639
 640
 641
 642
 643 // ----------------------------------------------------------------------------
 644 // UTF-16
 645 // ----------------------------------------------------------------------------
 646
 647 #ifdef WORDS_BIGENDIAN
 648     #define wxMBConvUTF16straight wxMBConvUTF16BE
 649     #define wxMBConvUTF16swap     wxMBConvUTF16LE
 650 #else
 651     #define wxMBConvUTF16swap     wxMBConvUTF16BE
 652     #define wxMBConvUTF16straight wxMBConvUTF16LE
 653 #endif
 654
 655
 656 #ifdef WC_UTF16
 657
 658 // copy 16bit MB to 16bit String
 659 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 660 {
 661     size_t len=0;
 662
 663     while (*(wxUint16*)psz && (!buf || len < n))
 664     {
 665         if (buf)
 666             *buf++ = *(wxUint16*)psz;
 667         len++;
 668
 669         psz += sizeof(wxUint16);
 670     }
 671     if (buf && len<n)   *buf=0;
 672
 673     return len;
 674 }
 675
 676
 677 // copy 16bit String to 16bit MB
 678 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 679 {
 680     size_t len=0;
 681
 682     while (*psz && (!buf || len < n))
 683     {
 684         if (buf)
 685         {
 686             *(wxUint16*)buf = *psz;
 687             buf += sizeof(wxUint16);
 688         }
 689         len += sizeof(wxUint16);
 690         psz++;
 691     }
 692     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 693
 694     return len;
 695 }
 696
 697
 698 // swap 16bit MB to 16bit String
 699 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 700 {
 701     size_t len=0;
 702
 703     while (*(wxUint16*)psz && (!buf || len < n))
 704     {
 705         if (buf)
 706         {
 707             ((char *)buf)[0] = psz[1];
 708             ((char *)buf)[1] = psz[0];
 709             buf++;
 710         }
 711         len++;
 712         psz += sizeof(wxUint16);
 713     }
 714     if (buf && len<n)   *buf=0;
 715
 716     return len;
 717 }
 718
 719
 720 // swap 16bit MB to 16bit String
 721 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 722 {
 723     size_t len=0;
 724
 725     while (*psz && (!buf || len < n))
 726     {
 727         if (buf)
 728         {
 729             *buf++ = ((char*)psz)[1];
 730             *buf++ = ((char*)psz)[0];
 731         }
 732         len += sizeof(wxUint16);
 733         psz++;
 734     }
 735     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 736
 737     return len;
 738 }
 739
 740
 741 #else // WC_UTF16
 742
 743
 744 // copy 16bit MB to 32bit String
 745 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 746 {
 747     size_t len=0;
 748
 749     while (*(wxUint16*)psz && (!buf || len < n))
 750     {
 751         wxUint32 cc;
 752         size_t pa=decode_utf16((wxUint16*)psz, cc);
 753         if (pa == (size_t)-1)
 754             return pa;
 755
 756         if (buf)
 757             *buf++ = cc;
 758         len++;
 759         psz += pa * sizeof(wxUint16);
 760     }
 761     if (buf && len<n)   *buf=0;
 762
 763     return len;
 764 }
 765
 766
 767 // copy 32bit String to 16bit MB
 768 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 769 {
 770     size_t len=0;
 771
 772     while (*psz && (!buf || len < n))
 773     {
 774         wxUint16 cc[2];
 775         size_t pa=encode_utf16(*psz, cc);
 776
 777         if (pa == (size_t)-1)
 778             return pa;
 779
 780         if (buf)
 781         {
 782             *(wxUint16*)buf = cc[0];
 783             buf += sizeof(wxUint16);
 784             if (pa > 1)
 785             {
 786                 *(wxUint16*)buf = cc[1];
 787                 buf += sizeof(wxUint16);
 788             }
 789         }
 790
 791         len += pa*sizeof(wxUint16);
 792         psz++;
 793     }
 794     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 795
 796     return len;
 797 }
 798
 799
 800 // swap 16bit MB to 32bit String
 801 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 802 {
 803     size_t len=0;
 804
 805     while (*(wxUint16*)psz && (!buf || len < n))
 806     {
 807         wxUint32 cc;
 808         char tmp[4];
 809         tmp[0]=psz[1];  tmp[1]=psz[0];
 810         tmp[2]=psz[3];  tmp[3]=psz[2];
 811
 812         size_t pa=decode_utf16((wxUint16*)tmp, cc);
 813         if (pa == (size_t)-1)
 814             return pa;
 815
 816         if (buf)
 817             *buf++ = cc;
 818
 819         len++;
 820         psz += pa * sizeof(wxUint16);
 821     }
 822     if (buf && len<n)   *buf=0;
 823
 824     return len;
 825 }
 826
 827
 828 // swap 32bit String to 16bit MB
 829 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 830 {
 831     size_t len=0;
 832
 833     while (*psz && (!buf || len < n))
 834     {
 835         wxUint16 cc[2];
 836         size_t pa=encode_utf16(*psz, cc);
 837
 838         if (pa == (size_t)-1)
 839             return pa;
 840
 841         if (buf)
 842         {
 843             *buf++ = ((char*)cc)[1];
 844             *buf++ = ((char*)cc)[0];
 845             if (pa > 1)
 846             {
 847                 *buf++ = ((char*)cc)[3];
 848                 *buf++ = ((char*)cc)[2];
 849             }
 850         }
 851
 852         len += pa*sizeof(wxUint16);
 853         psz++;
 854     }
 855     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 856
 857     return len;
 858 }
 859
 860 #endif // WC_UTF16
 861
 862
 863 // ----------------------------------------------------------------------------
 864 // UTF-32
 865 // ----------------------------------------------------------------------------
 866
 867 #ifdef WORDS_BIGENDIAN
 868 #define wxMBConvUTF32straight  wxMBConvUTF32BE
 869 #define wxMBConvUTF32swap      wxMBConvUTF32LE
 870 #else
 871 #define wxMBConvUTF32swap      wxMBConvUTF32BE
 872 #define wxMBConvUTF32straight  wxMBConvUTF32LE
 873 #endif
 874
 875
 876 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
 877 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
 878
 879
 880 #ifdef WC_UTF16
 881
 882 // copy 32bit MB to 16bit String
 883 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 884 {
 885     size_t len=0;
 886
 887     while (*(wxUint32*)psz && (!buf || len < n))
 888     {
 889         wxUint16 cc[2];
 890
 891         size_t pa=encode_utf16(*(wxUint32*)psz, cc);
 892         if (pa == (size_t)-1)
 893             return pa;
 894
 895         if (buf)
 896         {
 897             *buf++ = cc[0];
 898             if (pa > 1)
 899                 *buf++ = cc[1];
 900         }
 901         len += pa;
 902         psz += sizeof(wxUint32);
 903     }
 904     if (buf && len<n)   *buf=0;
 905
 906     return len;
 907 }
 908
 909
 910 // copy 16bit String to 32bit MB
 911 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 912 {
 913     size_t len=0;
 914
 915     while (*psz && (!buf || len < n))
 916     {
 917         wxUint32 cc;
 918
 919         // cast is ok for WC_UTF16
 920         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
 921         if (pa == (size_t)-1)
 922             return pa;
 923
 924         if (buf)
 925         {
 926             *(wxUint32*)buf = cc;
 927             buf += sizeof(wxUint32);
 928         }
 929         len += sizeof(wxUint32);
 930         psz += pa;
 931     }
 932
 933     if (buf && len<=n-sizeof(wxUint32))
 934         *(wxUint32*)buf=0;
 935
 936     return len;
 937 }
 938
 939
 940
 941 // swap 32bit MB to 16bit String
 942 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 943 {
 944     size_t len=0;
 945
 946     while (*(wxUint32*)psz && (!buf || len < n))
 947     {
 948         char tmp[4];
 949         tmp[0] = psz[3];   tmp[1] = psz[2];
 950         tmp[2] = psz[1];   tmp[3] = psz[0];
 951
 952
 953         wxUint16 cc[2];
 954
 955         size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
 956         if (pa == (size_t)-1)
 957             return pa;
 958
 959         if (buf)
 960         {
 961             *buf++ = cc[0];
 962             if (pa > 1)
 963                 *buf++ = cc[1];
 964         }
 965         len += pa;
 966         psz += sizeof(wxUint32);
 967     }
 968
 969     if (buf && len<n)
 970         *buf=0;
 971
 972     return len;
 973 }
 974
 975
 976 // swap 16bit String to 32bit MB
 977 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 978 {
 979     size_t len=0;
 980
 981     while (*psz && (!buf || len < n))
 982     {
 983         char cc[4];
 984
 985         // cast is ok for WC_UTF16
 986         size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
 987         if (pa == (size_t)-1)
 988             return pa;
 989
 990         if (buf)
 991         {
 992             *buf++ = cc[3];
 993             *buf++ = cc[2];
 994             *buf++ = cc[1];
 995             *buf++ = cc[0];
 996         }
 997         len += sizeof(wxUint32);
 998         psz += pa;
 999     }
1000
1001     if (buf && len<=n-sizeof(wxUint32))
1002         *(wxUint32*)buf=0;
1003
1004     return len;
1005 }
1006
1007 #else // WC_UTF16
1008
1009
1010 // copy 32bit MB to 32bit String
1011 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1012 {
1013     size_t len=0;
1014
1015     while (*(wxUint32*)psz && (!buf || len < n))
1016     {
1017         if (buf)
1018             *buf++ = *(wxUint32*)psz;
1019         len++;
1020         psz += sizeof(wxUint32);
1021     }
1022
1023     if (buf && len<n)
1024         *buf=0;
1025
1026     return len;
1027 }
1028
1029
1030 // copy 32bit String to 32bit MB
1031 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1032 {
1033     size_t len=0;
1034
1035     while (*psz && (!buf || len < n))
1036     {
1037         if (buf)
1038         {
1039             *(wxUint32*)buf = *psz;
1040             buf += sizeof(wxUint32);
1041         }
1042
1043         len += sizeof(wxUint32);
1044         psz++;
1045     }
1046
1047     if (buf && len<=n-sizeof(wxUint32))
1048         *(wxUint32*)buf=0;
1049
1050     return len;
1051 }
1052
1053
1054 // swap 32bit MB to 32bit String
1055 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1056 {
1057     size_t len=0;
1058
1059     while (*(wxUint32*)psz && (!buf || len < n))
1060     {
1061         if (buf)
1062         {
1063             ((char *)buf)[0] = psz[3];
1064             ((char *)buf)[1] = psz[2];
1065             ((char *)buf)[2] = psz[1];
1066             ((char *)buf)[3] = psz[0];
1067             buf++;
1068         }
1069         len++;
1070         psz += sizeof(wxUint32);
1071     }
1072
1073     if (buf && len<n)
1074         *buf=0;
1075
1076     return len;
1077 }
1078
1079
1080 // swap 32bit String to 32bit MB
1081 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1082 {
1083     size_t len=0;
1084
1085     while (*psz && (!buf || len < n))
1086     {
1087         if (buf)
1088         {
1089             *buf++ = ((char *)psz)[3];
1090             *buf++ = ((char *)psz)[2];
1091             *buf++ = ((char *)psz)[1];
1092             *buf++ = ((char *)psz)[0];
1093         }
1094         len += sizeof(wxUint32);
1095         psz++;
1096     }
1097
1098     if (buf && len<=n-sizeof(wxUint32))
1099         *(wxUint32*)buf=0;
1100
1101     return len;
1102 }
1103
1104
1105 #endif // WC_UTF16
1106
1107
1108 // ============================================================================
1109 // The classes doing conversion using the iconv_xxx() functions
1110 // ============================================================================
1111
1112 #ifdef HAVE_ICONV
1113
1114 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with E2BIG
1115 //     if output buffer is _exactly_ as big as needed. Such case is (unless there's
1116 //     yet another bug in glibc) the only case when iconv() returns with (size_t)-1
1117 //     (which means error) and says there are 0 bytes left in the input buffer --
1118 //     when _real_ error occurs, bytes-left-in-input buffer is non-zero. Hence,
1119 //     this alternative test for iconv() failure.
1120 //     [This bug does not appear in glibc 2.2.]
1121 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1122 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1123                                      (errno != E2BIG || bufLeft != 0))
1124 #else
1125 #define ICONV_FAILED(cres, bufLeft)  (cres == (size_t)-1)
1126 #endif
1127
1128 #define ICONV_CHAR_CAST(x)  ((ICONV_CONST char **)(x))
1129
1130 // ----------------------------------------------------------------------------
1131 // wxMBConv_iconv: encapsulates an iconv character set
1132 // ----------------------------------------------------------------------------
1133
1134 class wxMBConv_iconv : public wxMBConv
1135 {
1136 public:
1137     wxMBConv_iconv(const wxChar *name);
1138     virtual ~wxMBConv_iconv();
1139
1140     virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1141     virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
1142
1143     bool IsOk() const
1144         { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
1145
1146 protected:
1147     // the iconv handlers used to translate from multibyte to wide char and in
1148     // the other direction
1149     iconv_t m2w,
1150             w2m;
1151
1152 private:
1153     // the name (for iconv_open()) of a wide char charset -- if none is
1154     // available on this machine, it will remain NULL
1155     static const char *ms_wcCharsetName;
1156
1157     // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1158     // different endian-ness than the native one
1159     static bool ms_wcNeedsSwap;
1160 };
1161
1162 const char *wxMBConv_iconv::ms_wcCharsetName = NULL;
1163 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
1164
1165 wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
1166 {
1167     // Do it the hard way
1168     char cname[100];
1169     for (size_t i = 0; i < wxStrlen(name)+1; i++)
1170         cname[i] = (char) name[i];
1171
1172     // check for charset that represents wchar_t:
1173     if (ms_wcCharsetName == NULL)
1174     {
1175         ms_wcNeedsSwap = false;
1176
1177         // try charset with explicit bytesex info (e.g. "UCS-4LE"):
1178         ms_wcCharsetName = WC_NAME_BEST;
1179         m2w = iconv_open(ms_wcCharsetName, cname);
1180
1181         if (m2w == (iconv_t)-1)
1182         {
1183             // try charset w/o bytesex info (e.g. "UCS4")
1184             // and check for bytesex ourselves:
1185             ms_wcCharsetName = WC_NAME;
1186             m2w = iconv_open(ms_wcCharsetName, cname);
1187
1188             // last bet, try if it knows WCHAR_T pseudo-charset
1189             if (m2w == (iconv_t)-1)
1190             {
1191                 ms_wcCharsetName = "WCHAR_T";
1192                 m2w = iconv_open(ms_wcCharsetName, cname);
1193             }
1194
1195             if (m2w != (iconv_t)-1)
1196             {
1197                 char    buf[2], *bufPtr;
1198                 wchar_t wbuf[2], *wbufPtr;
1199                 size_t  insz, outsz;
1200                 size_t  res;
1201
1202                 buf[0] = 'A';
1203                 buf[1] = 0;
1204                 wbuf[0] = 0;
1205                 insz = 2;
1206                 outsz = SIZEOF_WCHAR_T * 2;
1207                 wbufPtr = wbuf;
1208                 bufPtr = buf;
1209
1210                 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1211                             (char**)&wbufPtr, &outsz);
1212
1213                 if (ICONV_FAILED(res, insz))
1214                 {
1215                     ms_wcCharsetName = NULL;
1216                     wxLogLastError(wxT("iconv"));
1217                     wxLogError(_("Conversion to charset '%s' doesn't work."), name);
1218                 }
1219                 else
1220                 {
1221                     ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1222                 }
1223             }
1224             else
1225             {
1226                 ms_wcCharsetName = NULL;
1227
1228                 // VS: we must not output an error here, since wxWidgets will safely
1229                 //     fall back to using wxEncodingConverter.
1230                 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
1231                 //wxLogError(
1232             }
1233         }
1234         wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
1235     }
1236     else // we already have ms_wcCharsetName
1237     {
1238         m2w = iconv_open(ms_wcCharsetName, cname);
1239     }
1240
1241     // NB: don't ever pass NULL to iconv_open(), it may crash!
1242     if ( ms_wcCharsetName )
1243     {
1244         w2m = iconv_open( cname, ms_wcCharsetName);
1245     }
1246     else
1247     {
1248         w2m = (iconv_t)-1;
1249     }
1250 }
1251
1252 wxMBConv_iconv::~wxMBConv_iconv()
1253 {
1254     if ( m2w != (iconv_t)-1 )
1255         iconv_close(m2w);
1256     if ( w2m != (iconv_t)-1 )
1257         iconv_close(w2m);
1258 }
1259
1260 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1261 {
1262     size_t inbuf = strlen(psz);
1263     size_t outbuf = n * SIZEOF_WCHAR_T;
1264     size_t res, cres;
1265     // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1266     wchar_t *bufPtr = buf;
1267     const char *pszPtr = psz;
1268
1269     if (buf)
1270     {
1271         // have destination buffer, convert there
1272         cres = iconv(m2w,
1273                      ICONV_CHAR_CAST(&pszPtr), &inbuf,
1274                      (char**)&bufPtr, &outbuf);
1275         res = n - (outbuf / SIZEOF_WCHAR_T);
1276
1277         if (ms_wcNeedsSwap)
1278         {
1279             // convert to native endianness
1280             WC_BSWAP(buf /* _not_ bufPtr */, res)
1281         }
1282
1283         // NB: iconv was given only strlen(psz) characters on input, and so
1284         //     it couldn't convert the trailing zero. Let's do it ourselves
1285         //     if there's some room left for it in the output buffer.
1286         if (res < n)
1287             buf[res] = 0;
1288     }
1289     else
1290     {
1291         // no destination buffer... convert using temp buffer
1292         // to calculate destination buffer requirement
1293         wchar_t tbuf[8];
1294         res = 0;
1295         do {
1296             bufPtr = tbuf;
1297             outbuf = 8*SIZEOF_WCHAR_T;
1298
1299             cres = iconv(m2w,
1300                          ICONV_CHAR_CAST(&pszPtr), &inbuf,
1301                          (char**)&bufPtr, &outbuf );
1302
1303             res += 8-(outbuf/SIZEOF_WCHAR_T);
1304         } while ((cres==(size_t)-1) && (errno==E2BIG));
1305     }
1306
1307     if (ICONV_FAILED(cres, inbuf))
1308     {
1309         //VS: it is ok if iconv fails, hence trace only
1310         wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1311         return (size_t)-1;
1312     }
1313
1314     return res;
1315 }
1316
1317 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1318 {
1319     size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
1320     size_t outbuf = n;
1321     size_t res, cres;
1322
1323     wchar_t *tmpbuf = 0;
1324
1325     if (ms_wcNeedsSwap)
1326     {
1327         // need to copy to temp buffer to switch endianness
1328         // this absolutely doesn't rock!
1329         // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1330         //  could be in read-only memory, or be accessed in some other thread)
1331         tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
1332         memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
1333         WC_BSWAP(tmpbuf, inbuf)
1334         psz=tmpbuf;
1335     }
1336
1337     if (buf)
1338     {
1339         // have destination buffer, convert there
1340         cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1341
1342         res = n-outbuf;
1343
1344         // NB: iconv was given only wcslen(psz) characters on input, and so
1345         //     it couldn't convert the trailing zero. Let's do it ourselves
1346         //     if there's some room left for it in the output buffer.
1347         if (res < n)
1348             buf[0] = 0;
1349     }
1350     else
1351     {
1352         // no destination buffer... convert using temp buffer
1353         // to calculate destination buffer requirement
1354         char tbuf[16];
1355         res = 0;
1356         do {
1357             buf = tbuf; outbuf = 16;
1358
1359             cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1360
1361             res += 16 - outbuf;
1362         } while ((cres==(size_t)-1) && (errno==E2BIG));
1363     }
1364
1365     if (ms_wcNeedsSwap)
1366     {
1367         free(tmpbuf);
1368     }
1369
1370     if (ICONV_FAILED(cres, inbuf))
1371     {
1372         //VS: it is ok if iconv fails, hence trace only
1373         wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1374         return (size_t)-1;
1375     }
1376
1377     return res;
1378 }
1379
1380 #endif // HAVE_ICONV
1381
1382
1383 // ============================================================================
1384 // Win32 conversion classes
1385 // ============================================================================
1386
1387 #ifdef wxHAVE_WIN32_MB2WC
1388
1389 // from utils.cpp
1390 #if wxUSE_FONTMAP
1391 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1392 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1393 #endif
1394
1395 class wxMBConv_win32 : public wxMBConv
1396 {
1397 public:
1398     wxMBConv_win32()
1399     {
1400         m_CodePage = CP_ACP;
1401     }
1402
1403 #if wxUSE_FONTMAP
1404     wxMBConv_win32(const wxChar* name)
1405     {
1406         m_CodePage = wxCharsetToCodepage(name);
1407     }
1408
1409     wxMBConv_win32(wxFontEncoding encoding)
1410     {
1411         m_CodePage = wxEncodingToCodepage(encoding);
1412     }
1413 #endif
1414
1415     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1416     {
1417         // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1418         // the behaviour is not compatible with the Unix version (using iconv)
1419         // and break the library itself, e.g. wxTextInputStream::NextChar()
1420         // wouldn't work if reading an incomplete MB char didn't result in an
1421         // error
1422         const size_t len = ::MultiByteToWideChar
1423                              (
1424                                 m_CodePage,     // code page
1425                                 MB_ERR_INVALID_CHARS, // flags: fall on error
1426                                 psz,            // input string
1427                                 -1,             // its length (NUL-terminated)
1428                                 buf,            // output string
1429                                 buf ? n : 0     // size of output buffer
1430                              );
1431
1432         // note that it returns count of written chars for buf != NULL and size
1433         // of the needed buffer for buf == NULL so in either case the length of
1434         // the string (which never includes the terminating NUL) is one less
1435         return len ? len - 1 : (size_t)-1;
1436     }
1437
1438     size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
1439     {
1440         /*
1441             we have a problem here: by default, WideCharToMultiByte() may
1442             replace characters unrepresentable in the target code page with bad
1443             quality approximations such as turning "1/2" symbol (U+00BD) into
1444             "1" for the code pages which don't have it and we, obviously, want
1445             to avoid this at any price
1446
1447             the trouble is that this function does it _silently_, i.e. it won't
1448             even tell us whether it did or not... Win98/2000 and higher provide
1449             WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1450             we have to resort to a round trip, i.e. check that converting back
1451             results in the same string -- this is, of course, expensive but
1452             otherwise we simply can't be sure to not garble the data.
1453          */
1454
1455         // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1456         // it doesn't work with CJK encodings (which we test for rather roughly
1457         // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1458         // supporting it
1459         BOOL usedDef wxDUMMY_INITIALIZE(false);
1460         BOOL *pUsedDef;
1461         int flags;
1462         if ( CanUseNoBestFit() && m_CodePage < 50000 )
1463         {
1464             // it's our lucky day
1465             flags = WC_NO_BEST_FIT_CHARS;
1466             pUsedDef = &usedDef;
1467         }
1468         else // old system or unsupported encoding
1469         {
1470             flags = 0;
1471             pUsedDef = NULL;
1472         }
1473
1474         const size_t len = ::WideCharToMultiByte
1475                              (
1476                                 m_CodePage,     // code page
1477                                 flags,          // either none or no best fit
1478                                 pwz,            // input string
1479                                 -1,             // it is (wide) NUL-terminated
1480                                 buf,            // output buffer
1481                                 buf ? n : 0,    // and its size
1482                                 NULL,           // default "replacement" char
1483                                 pUsedDef        // [out] was it used?
1484                              );
1485
1486         if ( !len )
1487         {
1488             // function totally failed
1489             return (size_t)-1;
1490         }
1491
1492         // if we were really converting, check if we succeeded
1493         if ( buf )
1494         {
1495             if ( flags )
1496             {
1497                 // check if the conversion failed, i.e. if any replacements
1498                 // were done
1499                 if ( usedDef )
1500                     return (size_t)-1;
1501             }
1502             else // we must resort to double tripping...
1503             {
1504                 wxWCharBuffer wcBuf(n);
1505                 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1506                         wcscmp(wcBuf, pwz) != 0 )
1507                 {
1508                     // we didn't obtain the same thing we started from, hence
1509                     // the conversion was lossy and we consider that it failed
1510                     return (size_t)-1;
1511                 }
1512             }
1513         }
1514
1515         // see the comment above for the reason of "len - 1"
1516         return len - 1;
1517     }
1518
1519     bool IsOk() const { return m_CodePage != -1; }
1520
1521 private:
1522     static bool CanUseNoBestFit()
1523     {
1524         static int s_isWin98Or2k = -1;
1525
1526         if ( s_isWin98Or2k == -1 )
1527         {
1528             int verMaj, verMin;
1529             switch ( wxGetOsVersion(&verMaj, &verMin) )
1530             {
1531                 case wxWIN95:
1532                     s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1533                     break;
1534
1535                 case wxWINDOWS_NT:
1536                     s_isWin98Or2k = verMaj >= 5;
1537                     break;
1538
1539                 default:
1540                     // unknown, be conseravtive by default
1541                     s_isWin98Or2k = 0;
1542             }
1543
1544             wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1545         }
1546
1547         return s_isWin98Or2k == 1;
1548     }
1549
1550     long m_CodePage;
1551 };
1552
1553 #endif // wxHAVE_WIN32_MB2WC
1554
1555 // ============================================================================
1556 // Cocoa conversion classes
1557 // ============================================================================
1558
1559 #if defined(__WXCOCOA__)
1560
1561 // RN:  There is no UTF-32 support in either Core Foundation or
1562 // Cocoa.  Strangely enough, internally Core Foundation uses
1563 // UTF 32 internally quite a bit - its just not public (yet).
1564
1565 #include <CoreFoundation/CFString.h>
1566 #include <CoreFoundation/CFStringEncodingExt.h>
1567
1568 CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
1569 {
1570     CFStringEncoding enc = 0 ;
1571     if ( encoding == wxFONTENCODING_DEFAULT )
1572     {
1573 #if wxUSE_GUI
1574         encoding = wxFont::GetDefaultEncoding() ;
1575 #else
1576         encoding = wxLocale::GetSystemEncoding() ;
1577 #endif
1578     }
1579     else switch( encoding)
1580     {
1581         case wxFONTENCODING_ISO8859_1 :
1582             enc = kCFStringEncodingISOLatin1 ;
1583             break ;
1584         case wxFONTENCODING_ISO8859_2 :
1585             enc = kCFStringEncodingISOLatin2;
1586             break ;
1587         case wxFONTENCODING_ISO8859_3 :
1588             enc = kCFStringEncodingISOLatin3 ;
1589             break ;
1590         case wxFONTENCODING_ISO8859_4 :
1591             enc = kCFStringEncodingISOLatin4;
1592             break ;
1593         case wxFONTENCODING_ISO8859_5 :
1594             enc = kCFStringEncodingISOLatinCyrillic;
1595             break ;
1596         case wxFONTENCODING_ISO8859_6 :
1597             enc = kCFStringEncodingISOLatinArabic;
1598             break ;
1599         case wxFONTENCODING_ISO8859_7 :
1600             enc = kCFStringEncodingISOLatinGreek;
1601             break ;
1602         case wxFONTENCODING_ISO8859_8 :
1603             enc = kCFStringEncodingISOLatinHebrew;
1604             break ;
1605         case wxFONTENCODING_ISO8859_9 :
1606             enc = kCFStringEncodingISOLatin5;
1607             break ;
1608         case wxFONTENCODING_ISO8859_10 :
1609             enc = kCFStringEncodingISOLatin6;
1610             break ;
1611         case wxFONTENCODING_ISO8859_11 :
1612             enc = kCFStringEncodingISOLatinThai;
1613             break ;
1614         case wxFONTENCODING_ISO8859_13 :
1615             enc = kCFStringEncodingISOLatin7;
1616             break ;
1617         case wxFONTENCODING_ISO8859_14 :
1618             enc = kCFStringEncodingISOLatin8;
1619             break ;
1620         case wxFONTENCODING_ISO8859_15 :
1621             enc = kCFStringEncodingISOLatin9;
1622             break ;
1623
1624         case wxFONTENCODING_KOI8 :
1625             enc = kCFStringEncodingKOI8_R;
1626             break ;
1627         case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
1628             enc = kCFStringEncodingDOSRussian;
1629             break ;
1630
1631 //      case wxFONTENCODING_BULGARIAN :
1632 //          enc = ;
1633 //          break ;
1634
1635         case wxFONTENCODING_CP437 :
1636             enc =kCFStringEncodingDOSLatinUS ;
1637             break ;
1638         case wxFONTENCODING_CP850 :
1639             enc = kCFStringEncodingDOSLatin1;
1640             break ;
1641         case wxFONTENCODING_CP852 :
1642             enc = kCFStringEncodingDOSLatin2;
1643             break ;
1644         case wxFONTENCODING_CP855 :
1645             enc = kCFStringEncodingDOSCyrillic;
1646             break ;
1647         case wxFONTENCODING_CP866 :
1648             enc =kCFStringEncodingDOSRussian ;
1649             break ;
1650         case wxFONTENCODING_CP874 :
1651             enc = kCFStringEncodingDOSThai;
1652             break ;
1653         case wxFONTENCODING_CP932 :
1654             enc = kCFStringEncodingDOSJapanese;
1655             break ;
1656         case wxFONTENCODING_CP936 :
1657             enc =kCFStringEncodingDOSChineseSimplif ;
1658             break ;
1659         case wxFONTENCODING_CP949 :
1660             enc = kCFStringEncodingDOSKorean;
1661             break ;
1662         case wxFONTENCODING_CP950 :
1663             enc = kCFStringEncodingDOSChineseTrad;
1664             break ;
1665
1666         case wxFONTENCODING_CP1250 :
1667             enc = kCFStringEncodingWindowsLatin2;
1668             break ;
1669         case wxFONTENCODING_CP1251 :
1670             enc =kCFStringEncodingWindowsCyrillic ;
1671             break ;
1672         case wxFONTENCODING_CP1252 :
1673             enc =kCFStringEncodingWindowsLatin1 ;
1674             break ;
1675         case wxFONTENCODING_CP1253 :
1676             enc = kCFStringEncodingWindowsGreek;
1677             break ;
1678         case wxFONTENCODING_CP1254 :
1679             enc = kCFStringEncodingWindowsLatin5;
1680             break ;
1681         case wxFONTENCODING_CP1255 :
1682             enc =kCFStringEncodingWindowsHebrew ;
1683             break ;
1684         case wxFONTENCODING_CP1256 :
1685             enc =kCFStringEncodingWindowsArabic ;
1686             break ;
1687         case wxFONTENCODING_CP1257 :
1688             enc = kCFStringEncodingWindowsBalticRim;
1689             break ;
1690         case wxFONTENCODING_UTF7 :
1691             enc = kCFStringEncodingNonLossyASCII ;
1692             break ;
1693         case wxFONTENCODING_UTF8 :
1694             enc = kCFStringEncodingUTF8 ;
1695             break ;
1696         case wxFONTENCODING_EUC_JP :
1697             enc = kCFStringEncodingEUC_JP;
1698             break ;
1699         case wxFONTENCODING_UTF16 :
1700             enc = kCFStringEncodingUnicode ;
1701             break ;
1702         case wxFONTENCODING_MACROMAN :
1703             enc = kCFStringEncodingMacRoman ;
1704             break ;
1705         case wxFONTENCODING_MACJAPANESE :
1706             enc = kCFStringEncodingMacJapanese ;
1707             break ;
1708         case wxFONTENCODING_MACCHINESETRAD :
1709             enc = kCFStringEncodingMacChineseTrad ;
1710             break ;
1711         case wxFONTENCODING_MACKOREAN :
1712             enc = kCFStringEncodingMacKorean ;
1713             break ;
1714         case wxFONTENCODING_MACARABIC :
1715             enc = kCFStringEncodingMacArabic ;
1716             break ;
1717         case wxFONTENCODING_MACHEBREW :
1718             enc = kCFStringEncodingMacHebrew ;
1719             break ;
1720         case wxFONTENCODING_MACGREEK :
1721             enc = kCFStringEncodingMacGreek ;
1722             break ;
1723         case wxFONTENCODING_MACCYRILLIC :
1724             enc = kCFStringEncodingMacCyrillic ;
1725             break ;
1726         case wxFONTENCODING_MACDEVANAGARI :
1727             enc = kCFStringEncodingMacDevanagari ;
1728             break ;
1729         case wxFONTENCODING_MACGURMUKHI :
1730             enc = kCFStringEncodingMacGurmukhi ;
1731             break ;
1732         case wxFONTENCODING_MACGUJARATI :
1733             enc = kCFStringEncodingMacGujarati ;
1734             break ;
1735         case wxFONTENCODING_MACORIYA :
1736             enc = kCFStringEncodingMacOriya ;
1737             break ;
1738         case wxFONTENCODING_MACBENGALI :
1739             enc = kCFStringEncodingMacBengali ;
1740             break ;
1741         case wxFONTENCODING_MACTAMIL :
1742             enc = kCFStringEncodingMacTamil ;
1743             break ;
1744         case wxFONTENCODING_MACTELUGU :
1745             enc = kCFStringEncodingMacTelugu ;
1746             break ;
1747         case wxFONTENCODING_MACKANNADA :
1748             enc = kCFStringEncodingMacKannada ;
1749             break ;
1750         case wxFONTENCODING_MACMALAJALAM :
1751             enc = kCFStringEncodingMacMalayalam ;
1752             break ;
1753         case wxFONTENCODING_MACSINHALESE :
1754             enc = kCFStringEncodingMacSinhalese ;
1755             break ;
1756         case wxFONTENCODING_MACBURMESE :
1757             enc = kCFStringEncodingMacBurmese ;
1758             break ;
1759         case wxFONTENCODING_MACKHMER :
1760             enc = kCFStringEncodingMacKhmer ;
1761             break ;
1762         case wxFONTENCODING_MACTHAI :
1763             enc = kCFStringEncodingMacThai ;
1764             break ;
1765         case wxFONTENCODING_MACLAOTIAN :
1766             enc = kCFStringEncodingMacLaotian ;
1767             break ;
1768         case wxFONTENCODING_MACGEORGIAN :
1769             enc = kCFStringEncodingMacGeorgian ;
1770             break ;
1771         case wxFONTENCODING_MACARMENIAN :
1772             enc = kCFStringEncodingMacArmenian ;
1773             break ;
1774         case wxFONTENCODING_MACCHINESESIMP :
1775             enc = kCFStringEncodingMacChineseSimp ;
1776             break ;
1777         case wxFONTENCODING_MACTIBETAN :
1778             enc = kCFStringEncodingMacTibetan ;
1779             break ;
1780         case wxFONTENCODING_MACMONGOLIAN :
1781             enc = kCFStringEncodingMacMongolian ;
1782             break ;
1783         case wxFONTENCODING_MACETHIOPIC :
1784             enc = kCFStringEncodingMacEthiopic ;
1785             break ;
1786         case wxFONTENCODING_MACCENTRALEUR :
1787             enc = kCFStringEncodingMacCentralEurRoman ;
1788             break ;
1789         case wxFONTENCODING_MACVIATNAMESE :
1790             enc = kCFStringEncodingMacVietnamese ;
1791             break ;
1792         case wxFONTENCODING_MACARABICEXT :
1793             enc = kCFStringEncodingMacExtArabic ;
1794             break ;
1795         case wxFONTENCODING_MACSYMBOL :
1796             enc = kCFStringEncodingMacSymbol ;
1797             break ;
1798         case wxFONTENCODING_MACDINGBATS :
1799             enc = kCFStringEncodingMacDingbats ;
1800             break ;
1801         case wxFONTENCODING_MACTURKISH :
1802             enc = kCFStringEncodingMacTurkish ;
1803             break ;
1804         case wxFONTENCODING_MACCROATIAN :
1805             enc = kCFStringEncodingMacCroatian ;
1806             break ;
1807         case wxFONTENCODING_MACICELANDIC :
1808             enc = kCFStringEncodingMacIcelandic ;
1809             break ;
1810         case wxFONTENCODING_MACROMANIAN :
1811             enc = kCFStringEncodingMacRomanian ;
1812             break ;
1813         case wxFONTENCODING_MACCELTIC :
1814             enc = kCFStringEncodingMacCeltic ;
1815             break ;
1816         case wxFONTENCODING_MACGAELIC :
1817             enc = kCFStringEncodingMacGaelic ;
1818             break ;
1819 //      case wxFONTENCODING_MACKEYBOARD :
1820 //          enc = kCFStringEncodingMacKeyboardGlyphs ;
1821 //          break ;
1822         default :
1823             // because gcc is picky
1824             break ;
1825     } ;
1826     return enc ;
1827 }
1828
1829 wxFontEncoding wxFontEncFromCFStringEnc(CFStringEncoding encoding)
1830 {
1831     wxFontEncoding enc = wxFONTENCODING_DEFAULT ;
1832
1833     switch( encoding)
1834     {
1835         case kCFStringEncodingISOLatin1  :
1836             enc = wxFONTENCODING_ISO8859_1 ;
1837             break ;
1838         case kCFStringEncodingISOLatin2 :
1839             enc = wxFONTENCODING_ISO8859_2;
1840             break ;
1841         case kCFStringEncodingISOLatin3 :
1842             enc = wxFONTENCODING_ISO8859_3 ;
1843             break ;
1844         case kCFStringEncodingISOLatin4 :
1845             enc = wxFONTENCODING_ISO8859_4;
1846             break ;
1847         case kCFStringEncodingISOLatinCyrillic :
1848             enc = wxFONTENCODING_ISO8859_5;
1849             break ;
1850         case kCFStringEncodingISOLatinArabic :
1851             enc = wxFONTENCODING_ISO8859_6;
1852             break ;
1853         case kCFStringEncodingISOLatinGreek :
1854             enc = wxFONTENCODING_ISO8859_7;
1855             break ;
1856         case kCFStringEncodingISOLatinHebrew :
1857             enc = wxFONTENCODING_ISO8859_8;
1858             break ;
1859         case kCFStringEncodingISOLatin5 :
1860             enc = wxFONTENCODING_ISO8859_9;
1861             break ;
1862         case kCFStringEncodingISOLatin6 :
1863             enc = wxFONTENCODING_ISO8859_10;
1864             break ;
1865         case kCFStringEncodingISOLatin7 :
1866             enc = wxFONTENCODING_ISO8859_13;
1867             break ;
1868         case kCFStringEncodingISOLatin8 :
1869             enc = wxFONTENCODING_ISO8859_14;
1870             break ;
1871         case kCFStringEncodingISOLatin9 :
1872             enc =wxFONTENCODING_ISO8859_15 ;
1873             break ;
1874
1875         case kCFStringEncodingKOI8_R :
1876             enc = wxFONTENCODING_KOI8;
1877             break ;
1878
1879 //      case  :
1880 //          enc = wxFONTENCODING_BULGARIAN;
1881 //          break ;
1882
1883         case kCFStringEncodingDOSLatinUS :
1884             enc = wxFONTENCODING_CP437;
1885             break ;
1886         case kCFStringEncodingDOSLatin1 :
1887             enc = wxFONTENCODING_CP850;
1888             break ;
1889         case kCFStringEncodingDOSLatin2 :
1890             enc =wxFONTENCODING_CP852 ;
1891             break ;
1892         case kCFStringEncodingDOSCyrillic :
1893             enc = wxFONTENCODING_CP855;
1894             break ;
1895         case kCFStringEncodingDOSRussian :
1896             enc = wxFONTENCODING_CP866;
1897             break ;
1898         case kCFStringEncodingDOSThai :
1899             enc =wxFONTENCODING_CP874 ;
1900             break ;
1901         case kCFStringEncodingDOSJapanese :
1902             enc = wxFONTENCODING_CP932;
1903             break ;
1904         case kCFStringEncodingDOSChineseSimplif :
1905             enc = wxFONTENCODING_CP936;
1906             break ;
1907         case kCFStringEncodingDOSKorean :
1908             enc = wxFONTENCODING_CP949;
1909             break ;
1910         case kCFStringEncodingDOSChineseTrad :
1911             enc = wxFONTENCODING_CP950;
1912             break ;
1913
1914         case kCFStringEncodingWindowsLatin2 :
1915             enc = wxFONTENCODING_CP1250;
1916             break ;
1917         case kCFStringEncodingWindowsCyrillic :
1918             enc = wxFONTENCODING_CP1251;
1919             break ;
1920         case kCFStringEncodingWindowsLatin1 :
1921             enc = wxFONTENCODING_CP1252;
1922             break ;
1923         case kCFStringEncodingWindowsGreek :
1924             enc = wxFONTENCODING_CP1253;
1925             break ;
1926         case kCFStringEncodingWindowsLatin5 :
1927             enc = wxFONTENCODING_CP1254;
1928             break ;
1929         case kCFStringEncodingWindowsHebrew :
1930             enc = wxFONTENCODING_CP1255;
1931             break ;
1932         case kCFStringEncodingWindowsArabic :
1933             enc = wxFONTENCODING_CP1256;
1934             break ;
1935         case kCFStringEncodingWindowsBalticRim :
1936             enc =wxFONTENCODING_CP1257 ;
1937             break ;
1938         case kCFStringEncodingEUC_JP :
1939             enc = wxFONTENCODING_EUC_JP;
1940             break ;
1941         case kCFStringEncodingUnicode :
1942             enc = wxFONTENCODING_UTF16;
1943             break;
1944         case kCFStringEncodingMacRoman :
1945             enc = wxFONTENCODING_MACROMAN ;
1946             break ;
1947         case kCFStringEncodingMacJapanese :
1948             enc = wxFONTENCODING_MACJAPANESE ;
1949             break ;
1950         case kCFStringEncodingMacChineseTrad :
1951             enc = wxFONTENCODING_MACCHINESETRAD ;
1952             break ;
1953         case kCFStringEncodingMacKorean :
1954             enc = wxFONTENCODING_MACKOREAN ;
1955             break ;
1956         case kCFStringEncodingMacArabic :
1957             enc =wxFONTENCODING_MACARABIC ;
1958             break ;
1959         case kCFStringEncodingMacHebrew :
1960             enc = wxFONTENCODING_MACHEBREW ;
1961             break ;
1962         case kCFStringEncodingMacGreek :
1963             enc = wxFONTENCODING_MACGREEK ;
1964             break ;
1965         case kCFStringEncodingMacCyrillic :
1966             enc = wxFONTENCODING_MACCYRILLIC ;
1967             break ;
1968         case kCFStringEncodingMacDevanagari :
1969             enc = wxFONTENCODING_MACDEVANAGARI ;
1970             break ;
1971         case kCFStringEncodingMacGurmukhi :
1972             enc = wxFONTENCODING_MACGURMUKHI ;
1973             break ;
1974         case kCFStringEncodingMacGujarati :
1975             enc = wxFONTENCODING_MACGUJARATI ;
1976             break ;
1977         case kCFStringEncodingMacOriya :
1978             enc =wxFONTENCODING_MACORIYA ;
1979             break ;
1980         case kCFStringEncodingMacBengali :
1981             enc =wxFONTENCODING_MACBENGALI ;
1982             break ;
1983         case kCFStringEncodingMacTamil :
1984             enc = wxFONTENCODING_MACTAMIL ;
1985             break ;
1986         case kCFStringEncodingMacTelugu :
1987             enc = wxFONTENCODING_MACTELUGU ;
1988             break ;
1989         case kCFStringEncodingMacKannada :
1990             enc = wxFONTENCODING_MACKANNADA ;
1991             break ;
1992         case kCFStringEncodingMacMalayalam :
1993             enc = wxFONTENCODING_MACMALAJALAM ;
1994             break ;
1995         case kCFStringEncodingMacSinhalese :
1996             enc = wxFONTENCODING_MACSINHALESE ;
1997             break ;
1998         case kCFStringEncodingMacBurmese :
1999             enc = wxFONTENCODING_MACBURMESE ;
2000             break ;
2001         case kCFStringEncodingMacKhmer :
2002             enc = wxFONTENCODING_MACKHMER ;
2003             break ;
2004         case kCFStringEncodingMacThai :
2005             enc = wxFONTENCODING_MACTHAI ;
2006             break ;
2007         case kCFStringEncodingMacLaotian :
2008             enc = wxFONTENCODING_MACLAOTIAN ;
2009             break ;
2010         case kCFStringEncodingMacGeorgian :
2011             enc = wxFONTENCODING_MACGEORGIAN ;
2012             break ;
2013         case kCFStringEncodingMacArmenian :
2014             enc = wxFONTENCODING_MACARMENIAN ;
2015             break ;
2016         case kCFStringEncodingMacChineseSimp :
2017             enc = wxFONTENCODING_MACCHINESESIMP ;
2018             break ;
2019         case kCFStringEncodingMacTibetan :
2020             enc = wxFONTENCODING_MACTIBETAN ;
2021             break ;
2022         case kCFStringEncodingMacMongolian :
2023             enc = wxFONTENCODING_MACMONGOLIAN ;
2024             break ;
2025         case kCFStringEncodingMacEthiopic :
2026             enc = wxFONTENCODING_MACETHIOPIC ;
2027             break ;
2028         case kCFStringEncodingMacCentralEurRoman:
2029             enc = wxFONTENCODING_MACCENTRALEUR  ;
2030             break ;
2031         case kCFStringEncodingMacVietnamese:
2032             enc = wxFONTENCODING_MACVIATNAMESE  ;
2033             break ;
2034         case kCFStringEncodingMacExtArabic :
2035             enc = wxFONTENCODING_MACARABICEXT ;
2036             break ;
2037         case kCFStringEncodingMacSymbol :
2038             enc = wxFONTENCODING_MACSYMBOL ;
2039             break ;
2040         case kCFStringEncodingMacDingbats :
2041             enc = wxFONTENCODING_MACDINGBATS ;
2042             break ;
2043         case kCFStringEncodingMacTurkish :
2044             enc = wxFONTENCODING_MACTURKISH ;
2045             break ;
2046         case kCFStringEncodingMacCroatian :
2047             enc = wxFONTENCODING_MACCROATIAN ;
2048             break ;
2049         case kCFStringEncodingMacIcelandic :
2050             enc = wxFONTENCODING_MACICELANDIC ;
2051             break ;
2052         case kCFStringEncodingMacRomanian :
2053             enc = wxFONTENCODING_MACROMANIAN ;
2054             break ;
2055         case kCFStringEncodingMacCeltic :
2056             enc = wxFONTENCODING_MACCELTIC ;
2057             break ;
2058         case kCFStringEncodingMacGaelic :
2059             enc = wxFONTENCODING_MACGAELIC ;
2060             break ;
2061 //        case kCFStringEncodingMacKeyboardGlyphs :
2062 //            enc = wxFONTENCODING_MACKEYBOARD ;
2063 //            break ;
2064     } ;
2065     return enc ;
2066 }
2067
2068 class wxMBConv_cocoa : public wxMBConv
2069 {
2070 public:
2071     wxMBConv_cocoa()
2072     {
2073         Init(CFStringGetSystemEncoding()) ;
2074     }
2075
2076     wxMBConv_cocoa(const wxChar* name)
2077     {
2078         Init( wxCFStringEncFromFontEnc(wxFontMapper::Get()->CharsetToEncoding(name, false) ) ) ;
2079     }
2080
2081     wxMBConv_cocoa(wxFontEncoding encoding)
2082     {
2083         Init( wxCFStringEncFromFontEnc(encoding) );
2084     }
2085
2086     ~wxMBConv_cocoa()
2087     {
2088     }
2089
2090     void Init( CFStringEncoding encoding)
2091     {
2092         m_char_encoding = encoding ;
2093         m_unicode_encoding = kCFStringEncodingUnicode;
2094     }
2095
2096     size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2097     {
2098         wxASSERT(szUnConv);
2099
2100         size_t nBufSize = strlen(szUnConv) + 1;
2101         size_t nRealOutSize;
2102
2103         UniChar* szUniCharBuffer    = (UniChar*) szOut;
2104         wchar_t* szConvBuffer       = szOut;
2105
2106         if (szConvBuffer == NULL && nOutSize != 0)
2107         {
2108             szConvBuffer = new wchar_t[nOutSize] ;
2109         }
2110
2111 #if SIZEOF_WCHAR_T == 4
2112         szUniCharBuffer = new UniChar[nOutSize];
2113 #endif
2114
2115         CFDataRef theData = CFDataCreateWithBytesNoCopy (
2116                                             NULL,     //allocator
2117                                             (const UInt8*)szUnConv,
2118                                             nBufSize - 1,
2119                                             NULL      //deallocator
2120                                             );
2121
2122         wxASSERT(theData);
2123
2124         CFStringRef theString = CFStringCreateFromExternalRepresentation (
2125                                                 NULL,
2126                                                 theData,
2127                                                 m_char_encoding
2128                                                 );
2129
2130         wxASSERT(theString);
2131
2132         if (nOutSize == 0)
2133         {
2134             nRealOutSize = CFStringGetLength(theString) + 1;
2135             CFRelease(theString);
2136             return nRealOutSize - 1;
2137         }
2138
2139         CFRange theRange = { 0, CFStringGetLength(theString) };
2140
2141         CFStringGetCharacters(theString, theRange, szUniCharBuffer);
2142
2143
2144         nRealOutSize = (CFStringGetLength(theString) + 1);
2145
2146         CFRelease(theString);
2147
2148         szUniCharBuffer[nRealOutSize-1] = '\0' ;
2149
2150 #if SIZEOF_WCHAR_T == 4
2151         wxMBConvUTF16 converter ;
2152         converter.MB2WC(szConvBuffer  , (const char*)szUniCharBuffer , nRealOutSize ) ;
2153         delete[] szUniCharBuffer;
2154 #endif
2155         if ( szOut == NULL )
2156             delete [] szConvBuffer;
2157
2158         return nRealOutSize ;
2159     }
2160
2161     size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2162     {
2163         size_t nBufSize = wxWcslen(szUnConv) + 1;
2164         size_t nRealOutSize;
2165         char* szBuffer = szOut;
2166         UniChar* szUniBuffer = (UniChar*) szUnConv;
2167
2168         if (szOut == NULL)
2169         {
2170             // worst case
2171             nRealOutSize = ((nBufSize - 1) * 8) +1 ;
2172             szBuffer = new char[ nRealOutSize ] ;
2173         }
2174         else
2175             nRealOutSize = nOutSize;
2176
2177 #if SIZEOF_WCHAR_T == 4
2178         wxMBConvUTF16BE converter ;
2179         nBufSize = converter.WC2MB( NULL , szUnConv , 0 );
2180         szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1] ;
2181         converter.WC2MB( (char*) szUniBuffer , szUnConv, nBufSize + sizeof(UniChar)) ;
2182         nBufSize /= sizeof(UniChar);
2183         ++nBufSize;
2184 #endif
2185
2186         CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2187                                 NULL, //allocator
2188                                 szUniBuffer,
2189                                 nBufSize,
2190                                 NULL //deallocator
2191                             );
2192
2193         wxASSERT(theString);
2194
2195         //Note that CER puts a BOM when converting to unicode
2196         //so we may want to check and use getchars instead in that case
2197         CFDataRef theData = CFStringCreateExternalRepresentation(
2198                                 NULL, //allocator
2199                                 theString,
2200                                 m_char_encoding,
2201                                 0 //what to put in characters that can't be converted -
2202                                     //0 tells CFString to return NULL if it meets such a character
2203                         );
2204
2205         if(!theData)
2206             return (size_t)-1;
2207
2208         CFRelease(theString);
2209
2210         nRealOutSize = CFDataGetLength(theData);
2211
2212         if ( szOut == NULL )
2213             delete[] szBuffer;
2214
2215         if(nOutSize == 0)
2216         {
2217 //TODO: This gets flagged as a non-malloced address by the debugger...
2218 //#if SIZEOF_WCHAR_T == 4
2219 //        delete[] szUniBuffer;
2220 //#endif
2221             CFRelease(theData);
2222             return nRealOutSize - 1;
2223         }
2224
2225         CFRange theRange = {0, CFDataGetLength(theData) };
2226         CFDataGetBytes(theData, theRange, (UInt8*) szBuffer);
2227
2228         CFRelease(theData);
2229
2230 //TODO: This gets flagged as a non-malloced address by the debugger...
2231 //#if SIZEOF_WCHAR_T == 4
2232 //        delete[] szUniBuffer;
2233 //#endif
2234         return  nRealOutSize - 1;
2235     }
2236
2237     bool IsOk() const
2238     {
2239         //TODO: check for invalid en/de/coding
2240         return true;
2241     }
2242
2243 private:
2244     CFStringEncoding m_char_encoding ;
2245     CFStringEncoding m_unicode_encoding ;
2246 };
2247
2248 #endif // defined(__WXCOCOA__)
2249
2250 // ============================================================================
2251 // Mac conversion classes
2252 // ============================================================================
2253
2254 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2255
2256 class wxMBConv_mac : public wxMBConv
2257 {
2258 public:
2259     wxMBConv_mac()
2260     {
2261         Init(CFStringGetSystemEncoding()) ;
2262     }
2263
2264     wxMBConv_mac(const wxChar* name)
2265     {
2266         Init( wxMacGetSystemEncFromFontEnc(wxFontMapper::Get()->CharsetToEncoding(name, false) ) ) ;
2267     }
2268
2269     wxMBConv_mac(wxFontEncoding encoding)
2270     {
2271         Init( wxMacGetSystemEncFromFontEnc(encoding) );
2272     }
2273
2274     ~wxMBConv_mac()
2275     {
2276         OSStatus status = noErr ;
2277         status = TECDisposeConverter(m_MB2WC_converter);
2278         status = TECDisposeConverter(m_WC2MB_converter);
2279     }
2280
2281
2282     void Init( TextEncodingBase encoding)
2283     {
2284         OSStatus status = noErr ;
2285         m_char_encoding = encoding ;
2286         m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
2287
2288         status = TECCreateConverter(&m_MB2WC_converter,
2289                                     m_char_encoding,
2290                                     m_unicode_encoding);
2291         status = TECCreateConverter(&m_WC2MB_converter,
2292                                     m_unicode_encoding,
2293                                     m_char_encoding);
2294     }
2295
2296     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2297     {
2298         OSStatus status = noErr ;
2299         ByteCount byteOutLen ;
2300         ByteCount byteInLen = strlen(psz) ;
2301         wchar_t *tbuf = NULL ;
2302         UniChar* ubuf = NULL ;
2303         size_t res = 0 ;
2304
2305         if (buf == NULL)
2306         {
2307             n = byteInLen ;
2308             tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
2309         }
2310         ByteCount byteBufferLen = n * sizeof( UniChar ) ;
2311 #if SIZEOF_WCHAR_T == 4
2312         ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
2313 #else
2314         ubuf = (UniChar*) (buf ? buf : tbuf) ;
2315 #endif
2316         status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
2317           (TextPtr) ubuf , byteBufferLen, &byteOutLen);
2318 #if SIZEOF_WCHAR_T == 4
2319         // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2320         // is not properly terminated we get random characters at the end
2321         ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
2322         wxMBConvUTF16BE converter ;
2323         res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
2324         free( ubuf ) ;
2325 #else
2326         res = byteOutLen / sizeof( UniChar ) ;
2327 #endif
2328         if ( buf == NULL )
2329              free(tbuf) ;
2330
2331         if ( buf  && res < n)
2332             buf[res] = 0;
2333
2334         return res ;
2335     }
2336
2337     size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2338     {
2339         OSStatus status = noErr ;
2340         ByteCount byteOutLen ;
2341         ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2342
2343         char *tbuf = NULL ;
2344
2345         if (buf == NULL)
2346         {
2347             // worst case
2348             n = ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T;
2349             tbuf = (char*) malloc( n ) ;
2350         }
2351
2352         ByteCount byteBufferLen = n ;
2353         UniChar* ubuf = NULL ;
2354 #if SIZEOF_WCHAR_T == 4
2355         wxMBConvUTF16BE converter ;
2356         size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
2357         byteInLen = unicharlen ;
2358         ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2359         converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
2360 #else
2361         ubuf = (UniChar*) psz ;
2362 #endif
2363         status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
2364             (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
2365 #if SIZEOF_WCHAR_T == 4
2366         free( ubuf ) ;
2367 #endif
2368         if ( buf == NULL )
2369             free(tbuf) ;
2370
2371         size_t res = byteOutLen ;
2372         if ( buf  && res < n)
2373             buf[res] = 0;
2374
2375         return res ;
2376     }
2377
2378     bool IsOk() const
2379         { return m_MB2WC_converter !=  NULL && m_WC2MB_converter != NULL  ; }
2380
2381 private:
2382     TECObjectRef m_MB2WC_converter ;
2383     TECObjectRef m_WC2MB_converter ;
2384
2385     TextEncodingBase m_char_encoding ;
2386     TextEncodingBase m_unicode_encoding ;
2387 };
2388
2389 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
2390
2391 // ============================================================================
2392 // wxEncodingConverter based conversion classes
2393 // ============================================================================
2394
2395 #if wxUSE_FONTMAP
2396
2397 class wxMBConv_wxwin : public wxMBConv
2398 {
2399 private:
2400     void Init()
2401     {
2402         m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2403                w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2404     }
2405
2406 public:
2407     // temporarily just use wxEncodingConverter stuff,
2408     // so that it works while a better implementation is built
2409     wxMBConv_wxwin(const wxChar* name)
2410     {
2411         if (name)
2412             m_enc = wxFontMapper::Get()->CharsetToEncoding(name, false);
2413         else
2414             m_enc = wxFONTENCODING_SYSTEM;
2415
2416         Init();
2417     }
2418
2419     wxMBConv_wxwin(wxFontEncoding enc)
2420     {
2421         m_enc = enc;
2422
2423         Init();
2424     }
2425
2426     size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
2427     {
2428         size_t inbuf = strlen(psz);
2429         if (buf)
2430             m2w.Convert(psz,buf);
2431         return inbuf;
2432     }
2433
2434     size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
2435     {
2436         const size_t inbuf = wxWcslen(psz);
2437         if (buf)
2438             w2m.Convert(psz,buf);
2439
2440         return inbuf;
2441     }
2442
2443     bool IsOk() const { return m_ok; }
2444
2445 public:
2446     wxFontEncoding m_enc;
2447     wxEncodingConverter m2w, w2m;
2448
2449     // were we initialized successfully?
2450     bool m_ok;
2451
2452     DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
2453 };
2454
2455 #endif // wxUSE_FONTMAP
2456
2457 // ============================================================================
2458 // wxCSConv implementation
2459 // ============================================================================
2460
2461 void wxCSConv::Init()
2462 {
2463     m_name = NULL;
2464     m_convReal =  NULL;
2465     m_deferred = true;
2466 }
2467
2468 wxCSConv::wxCSConv(const wxChar *charset)
2469 {
2470     Init();
2471
2472     if ( charset )
2473     {
2474         SetName(charset);
2475     }
2476
2477     m_encoding = wxFONTENCODING_SYSTEM;
2478 }
2479
2480 wxCSConv::wxCSConv(wxFontEncoding encoding)
2481 {
2482     if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
2483     {
2484         wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2485
2486         encoding = wxFONTENCODING_SYSTEM;
2487     }
2488
2489     Init();
2490
2491     m_encoding = encoding;
2492 }
2493
2494 wxCSConv::~wxCSConv()
2495 {
2496     Clear();
2497 }
2498
2499 wxCSConv::wxCSConv(const wxCSConv& conv)
2500         : wxMBConv()
2501 {
2502     Init();
2503
2504     SetName(conv.m_name);
2505     m_encoding = conv.m_encoding;
2506 }
2507
2508 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2509 {
2510     Clear();
2511
2512     SetName(conv.m_name);
2513     m_encoding = conv.m_encoding;
2514
2515     return *this;
2516 }
2517
2518 void wxCSConv::Clear()
2519 {
2520     free(m_name);
2521     delete m_convReal;
2522
2523     m_name = NULL;
2524     m_convReal = NULL;
2525 }
2526
2527 void wxCSConv::SetName(const wxChar *charset)
2528 {
2529     if (charset)
2530     {
2531         m_name = wxStrdup(charset);
2532         m_deferred = true;
2533     }
2534 }
2535
2536 wxMBConv *wxCSConv::DoCreate() const
2537 {
2538     // check for the special case of ASCII or ISO8859-1 charset: as we have
2539     // special knowledge of it anyhow, we don't need to create a special
2540     // conversion object
2541     if ( m_encoding == wxFONTENCODING_ISO8859_1 )
2542     {
2543         // don't convert at all
2544         return NULL;
2545     }
2546
2547     // we trust OS to do conversion better than we can so try external
2548     // conversion methods first
2549     //
2550     // the full order is:
2551     //      1. OS conversion (iconv() under Unix or Win32 API)
2552     //      2. hard coded conversions for UTF
2553     //      3. wxEncodingConverter as fall back
2554
2555     // step (1)
2556 #ifdef HAVE_ICONV
2557 #if !wxUSE_FONTMAP
2558     if ( m_name )
2559 #endif // !wxUSE_FONTMAP
2560     {
2561         wxString name(m_name);
2562
2563 #if wxUSE_FONTMAP
2564         if ( name.empty() )
2565             name = wxFontMapper::Get()->GetEncodingName(m_encoding);
2566 #endif // wxUSE_FONTMAP
2567
2568         wxMBConv_iconv *conv = new wxMBConv_iconv(name);
2569         if ( conv->IsOk() )
2570             return conv;
2571
2572         delete conv;
2573     }
2574 #endif // HAVE_ICONV
2575
2576 #ifdef wxHAVE_WIN32_MB2WC
2577     {
2578 #if wxUSE_FONTMAP
2579         wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2580                                       : new wxMBConv_win32(m_encoding);
2581         if ( conv->IsOk() )
2582             return conv;
2583
2584         delete conv;
2585 #else
2586         return NULL;
2587 #endif
2588     }
2589 #endif // wxHAVE_WIN32_MB2WC
2590 #if defined(__WXMAC__)
2591     {
2592         if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ) )
2593         {
2594
2595             wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
2596                                         : new wxMBConv_mac(m_encoding);
2597             if ( conv->IsOk() )
2598                  return conv;
2599
2600             delete conv;
2601         }
2602     }
2603 #endif
2604 #if defined(__WXCOCOA__)
2605     {
2606         if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
2607         {
2608
2609             wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
2610                                           : new wxMBConv_cocoa(m_encoding);
2611             if ( conv->IsOk() )
2612                  return conv;
2613
2614             delete conv;
2615         }
2616     }
2617 #endif
2618     // step (2)
2619     wxFontEncoding enc = m_encoding;
2620 #if wxUSE_FONTMAP
2621     if ( enc == wxFONTENCODING_SYSTEM && m_name )
2622     {
2623         // use "false" to suppress interactive dialogs -- we can be called from
2624         // anywhere and popping up a dialog from here is the last thing we want to
2625         // do
2626         enc = wxFontMapper::Get()->CharsetToEncoding(m_name, false);
2627     }
2628 #endif // wxUSE_FONTMAP
2629
2630     switch ( enc )
2631     {
2632         case wxFONTENCODING_UTF7:
2633              return new wxMBConvUTF7;
2634
2635         case wxFONTENCODING_UTF8:
2636              return new wxMBConvUTF8;
2637
2638         case wxFONTENCODING_UTF16BE:
2639              return new wxMBConvUTF16BE;
2640
2641         case wxFONTENCODING_UTF16LE:
2642              return new wxMBConvUTF16LE;
2643
2644         case wxFONTENCODING_UTF32BE:
2645              return new wxMBConvUTF32BE;
2646
2647         case wxFONTENCODING_UTF32LE:
2648              return new wxMBConvUTF32LE;
2649
2650         default:
2651              // nothing to do but put here to suppress gcc warnings
2652              ;
2653     }
2654
2655     // step (3)
2656 #if wxUSE_FONTMAP
2657     {
2658         wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2659                                       : new wxMBConv_wxwin(m_encoding);
2660         if ( conv->IsOk() )
2661             return conv;
2662
2663         delete conv;
2664     }
2665 #endif // wxUSE_FONTMAP
2666
2667     // NB: This is a hack to prevent deadlock. What could otherwise happen
2668     //     in Unicode build: wxConvLocal creation ends up being here
2669     //     because of some failure and logs the error. But wxLog will try to
2670     //     attach timestamp, for which it will need wxConvLocal (to convert
2671     //     time to char* and then wchar_t*), but that fails, tries to log
2672     //     error, but wxLog has a (already locked) critical section that
2673     //     guards static buffer.
2674     static bool alreadyLoggingError = false;
2675     if (!alreadyLoggingError)
2676     {
2677         alreadyLoggingError = true;
2678         wxLogError(_("Cannot convert from the charset '%s'!"),
2679                    m_name ? m_name
2680                       :
2681 #if wxUSE_FONTMAP
2682                          wxFontMapper::GetEncodingDescription(m_encoding).c_str()
2683 #else // !wxUSE_FONTMAP
2684                          wxString::Format(_("encoding %s"), m_encoding).c_str()
2685 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2686               );
2687         alreadyLoggingError = false;
2688     }
2689
2690     return NULL;
2691 }
2692
2693 void wxCSConv::CreateConvIfNeeded() const
2694 {
2695     if ( m_deferred )
2696     {
2697         wxCSConv *self = (wxCSConv *)this; // const_cast
2698
2699 #if wxUSE_INTL
2700         // if we don't have neither the name nor the encoding, use the default
2701         // encoding for this system
2702         if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2703         {
2704             self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
2705         }
2706 #endif // wxUSE_INTL
2707
2708         self->m_convReal = DoCreate();
2709         self->m_deferred = false;
2710     }
2711 }
2712
2713 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2714 {
2715     CreateConvIfNeeded();
2716
2717     if (m_convReal)
2718         return m_convReal->MB2WC(buf, psz, n);
2719
2720     // latin-1 (direct)
2721     size_t len = strlen(psz);
2722
2723     if (buf)
2724     {
2725         for (size_t c = 0; c <= len; c++)
2726             buf[c] = (unsigned char)(psz[c]);
2727     }
2728
2729     return len;
2730 }
2731
2732 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2733 {
2734     CreateConvIfNeeded();
2735
2736     if (m_convReal)
2737         return m_convReal->WC2MB(buf, psz, n);
2738
2739     // latin-1 (direct)
2740     const size_t len = wxWcslen(psz);
2741     if (buf)
2742     {
2743         for (size_t c = 0; c <= len; c++)
2744         {
2745             if (psz[c] > 0xFF)
2746                 return (size_t)-1;
2747             buf[c] = (char)psz[c];
2748         }
2749     }
2750     else
2751     {
2752         for (size_t c = 0; c <= len; c++)
2753         {
2754             if (psz[c] > 0xFF)
2755                 return (size_t)-1;
2756         }
2757     }
2758
2759     return len;
2760 }
2761
2762 // ----------------------------------------------------------------------------
2763 // globals
2764 // ----------------------------------------------------------------------------
2765
2766 #ifdef __WINDOWS__
2767     static wxMBConv_win32 wxConvLibcObj;
2768 #elif defined(__WXMAC__) && !defined(__MACH__)
2769     static wxMBConv_mac wxConvLibcObj ;
2770 #else
2771     static wxMBConvLibc wxConvLibcObj;
2772 #endif
2773
2774 static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
2775 static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
2776 static wxMBConvUTF7 wxConvUTF7Obj;
2777 static wxMBConvUTF8 wxConvUTF8Obj;
2778
2779
2780 WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
2781 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
2782 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
2783 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
2784 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
2785 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
2786
2787 #else // !wxUSE_WCHAR_T
2788
2789 // stand-ins in absence of wchar_t
2790 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
2791                                 wxConvISO8859_1,
2792                                 wxConvLocal,
2793                                 wxConvUTF8;
2794
2795 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
2796
2797