src/common/strconv.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        strconv.cpp
   3 // Purpose:     Unicode conversion classes
   4 // Author:      Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik
   5 // Modified by:
   6 // Created:     29/01/98
   7 // RCS-ID:      $Id$
   8 // Copyright:   (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
   9 //              (c) 2000-2003 Vadim Zeitlin
  10 // Licence:     wxWindows licence
  11 /////////////////////////////////////////////////////////////////////////////
  12
  13 // ============================================================================
  14 // declarations
  15 // ============================================================================
  16
  17 // ----------------------------------------------------------------------------
  18 // headers
  19 // ----------------------------------------------------------------------------
  20
  21 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
  22   #pragma implementation "strconv.h"
  23 #endif
  24
  25 // For compilers that support precompilation, includes "wx.h".
  26 #include "wx/wxprec.h"
  27
  28 #ifdef __BORLANDC__
  29   #pragma hdrstop
  30 #endif
  31
  32 #ifndef WX_PRECOMP
  33     #include "wx/intl.h"
  34     #include "wx/log.h"
  35 #endif // WX_PRECOMP
  36
  37 #include "wx/strconv.h"
  38
  39 #if wxUSE_WCHAR_T
  40
  41 #ifdef __WXMSW__
  42     #include "wx/msw/private.h"
  43 #endif
  44
  45 #ifdef __WINDOWS__
  46     #include "wx/msw/missing.h"
  47 #endif
  48
  49 #ifndef __WXWINCE__
  50 #include <errno.h>
  51 #endif
  52
  53 #include <ctype.h>
  54 #include <string.h>
  55 #include <stdlib.h>
  56
  57 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
  58     #define wxHAVE_WIN32_MB2WC
  59 #endif // __WIN32__ but !__WXMICROWIN__
  60
  61 // ----------------------------------------------------------------------------
  62 // headers
  63 // ----------------------------------------------------------------------------
  64
  65 #ifdef __SALFORDC__
  66     #include <clib.h>
  67 #endif
  68
  69 #ifdef HAVE_ICONV
  70     #include <iconv.h>
  71 #endif
  72
  73 #include "wx/encconv.h"
  74 #include "wx/fontmap.h"
  75 #include "wx/utils.h"
  76
  77 #ifdef __WXMAC__
  78 #include <ATSUnicode.h>
  79 #include <TextCommon.h>
  80 #include <TextEncodingConverter.h>
  81
  82 #include  "wx/mac/private.h"  // includes mac headers
  83 #endif
  84 // ----------------------------------------------------------------------------
  85 // macros
  86 // ----------------------------------------------------------------------------
  87
  88 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
  89 #define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
  90
  91 #if SIZEOF_WCHAR_T == 4
  92     #define WC_NAME         "UCS4"
  93     #define WC_BSWAP         BSWAP_UCS4
  94     #ifdef WORDS_BIGENDIAN
  95       #define WC_NAME_BEST  "UCS-4BE"
  96     #else
  97       #define WC_NAME_BEST  "UCS-4LE"
  98     #endif
  99 #elif SIZEOF_WCHAR_T == 2
 100     #define WC_NAME         "UTF16"
 101     #define WC_BSWAP         BSWAP_UTF16
 102     #define WC_UTF16
 103     #ifdef WORDS_BIGENDIAN
 104       #define WC_NAME_BEST  "UTF-16BE"
 105     #else
 106       #define WC_NAME_BEST  "UTF-16LE"
 107     #endif
 108 #else // sizeof(wchar_t) != 2 nor 4
 109     // does this ever happen?
 110     #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
 111 #endif
 112
 113 // ============================================================================
 114 // implementation
 115 // ============================================================================
 116
 117 // ----------------------------------------------------------------------------
 118 // UTF-16 en/decoding to/from UCS-4
 119 // ----------------------------------------------------------------------------
 120
 121
 122 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
 123 {
 124     if (input<=0xffff)
 125     {
 126         if (output)
 127             *output = (wxUint16) input;
 128         return 1;
 129     }
 130     else if (input>=0x110000)
 131     {
 132         return (size_t)-1;
 133     }
 134     else
 135     {
 136         if (output)
 137         {
 138             *output++ = (wxUint16) ((input >> 10)+0xd7c0);
 139             *output = (wxUint16) ((input&0x3ff)+0xdc00);
 140         }
 141         return 2;
 142     }
 143 }
 144
 145 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
 146 {
 147     if ((*input<0xd800) || (*input>0xdfff))
 148     {
 149         output = *input;
 150         return 1;
 151     }
 152     else if ((input[1]<0xdc00) || (input[1]>=0xdfff))
 153     {
 154         output = *input;
 155         return (size_t)-1;
 156     }
 157     else
 158     {
 159         output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
 160         return 2;
 161     }
 162 }
 163
 164
 165 // ----------------------------------------------------------------------------
 166 // wxMBConv
 167 // ----------------------------------------------------------------------------
 168
 169 wxMBConv::~wxMBConv()
 170 {
 171     // nothing to do here
 172 }
 173
 174 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
 175 {
 176     if ( psz )
 177     {
 178         // calculate the length of the buffer needed first
 179         size_t nLen = MB2WC(NULL, psz, 0);
 180         if ( nLen != (size_t)-1 )
 181         {
 182             // now do the actual conversion
 183             wxWCharBuffer buf(nLen);
 184             nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
 185             if ( nLen != (size_t)-1 )
 186             {
 187                 return buf;
 188             }
 189         }
 190     }
 191
 192     wxWCharBuffer buf((wchar_t *)NULL);
 193
 194     return buf;
 195 }
 196
 197 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
 198 {
 199     if ( pwz )
 200     {
 201         size_t nLen = WC2MB(NULL, pwz, 0);
 202         if ( nLen != (size_t)-1 )
 203         {
 204             wxCharBuffer buf(nLen+3);       // space for a wxUint32 trailing zero
 205             nLen = WC2MB(buf.data(), pwz, nLen + 4);
 206             if ( nLen != (size_t)-1 )
 207             {
 208                 return buf;
 209             }
 210         }
 211     }
 212
 213     wxCharBuffer buf((char *)NULL);
 214
 215     return buf;
 216 }
 217
 218 // ----------------------------------------------------------------------------
 219 // wxMBConvLibc
 220 // ----------------------------------------------------------------------------
 221
 222 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 223 {
 224     return wxMB2WC(buf, psz, n);
 225 }
 226
 227 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 228 {
 229     return wxWC2MB(buf, psz, n);
 230 }
 231
 232 // ----------------------------------------------------------------------------
 233 // UTF-7
 234 // ----------------------------------------------------------------------------
 235
 236 #if 0
 237 static char utf7_setD[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
 238                         "abcdefghijklmnopqrstuvwxyz"
 239                         "0123456789'(),-./:?";
 240 static char utf7_setO[]="!\"#$%&*;<=>@[]^_`{|}";
 241 static char utf7_setB[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
 242                         "abcdefghijklmnopqrstuvwxyz"
 243                         "0123456789+/";
 244 #endif
 245
 246 // TODO: write actual implementations of UTF-7 here
 247 size_t wxMBConvUTF7::MB2WC(wchar_t * WXUNUSED(buf),
 248                            const char * WXUNUSED(psz),
 249                            size_t WXUNUSED(n)) const
 250 {
 251   return 0;
 252 }
 253
 254 size_t wxMBConvUTF7::WC2MB(char * WXUNUSED(buf),
 255                            const wchar_t * WXUNUSED(psz),
 256                            size_t WXUNUSED(n)) const
 257 {
 258   return 0;
 259 }
 260
 261 // ----------------------------------------------------------------------------
 262 // UTF-8
 263 // ----------------------------------------------------------------------------
 264
 265 static wxUint32 utf8_max[]=
 266     { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
 267
 268 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 269 {
 270     size_t len = 0;
 271
 272     while (*psz && ((!buf) || (len < n)))
 273     {
 274         unsigned char cc = *psz++, fc = cc;
 275         unsigned cnt;
 276         for (cnt = 0; fc & 0x80; cnt++)
 277             fc <<= 1;
 278         if (!cnt)
 279         {
 280             // plain ASCII char
 281             if (buf)
 282                 *buf++ = cc;
 283             len++;
 284         }
 285         else
 286         {
 287             cnt--;
 288             if (!cnt)
 289             {
 290                 // invalid UTF-8 sequence
 291                 return (size_t)-1;
 292             }
 293             else
 294             {
 295                 unsigned ocnt = cnt - 1;
 296                 wxUint32 res = cc & (0x3f >> cnt);
 297                 while (cnt--)
 298                 {
 299                     cc = *psz++;
 300                     if ((cc & 0xC0) != 0x80)
 301                     {
 302                         // invalid UTF-8 sequence
 303                         return (size_t)-1;
 304                     }
 305                     res = (res << 6) | (cc & 0x3f);
 306                 }
 307                 if (res <= utf8_max[ocnt])
 308                 {
 309                     // illegal UTF-8 encoding
 310                     return (size_t)-1;
 311                 }
 312 #ifdef WC_UTF16
 313                 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 314                 size_t pa = encode_utf16(res, (wxUint16 *)buf);
 315                 if (pa == (size_t)-1)
 316                   return (size_t)-1;
 317                 if (buf)
 318                     buf += pa;
 319                 len += pa;
 320 #else // !WC_UTF16
 321                 if (buf)
 322                     *buf++ = res;
 323                 len++;
 324 #endif // WC_UTF16/!WC_UTF16
 325             }
 326         }
 327     }
 328     if (buf && (len < n))
 329         *buf = 0;
 330     return len;
 331 }
 332
 333 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 334 {
 335     size_t len = 0;
 336
 337     while (*psz && ((!buf) || (len < n)))
 338     {
 339         wxUint32 cc;
 340 #ifdef WC_UTF16
 341         // cast is ok for WC_UTF16
 342         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
 343         psz += (pa == (size_t)-1) ? 1 : pa;
 344 #else
 345         cc=(*psz++) & 0x7fffffff;
 346 #endif
 347         unsigned cnt;
 348         for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
 349         if (!cnt)
 350         {
 351             // plain ASCII char
 352             if (buf)
 353                 *buf++ = (char) cc;
 354             len++;
 355         }
 356
 357         else
 358         {
 359             len += cnt + 1;
 360             if (buf)
 361             {
 362                 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
 363                 while (cnt--)
 364                     *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
 365             }
 366         }
 367     }
 368
 369     if (buf && (len<n)) *buf = 0;
 370
 371     return len;
 372 }
 373
 374
 375
 376
 377 // ----------------------------------------------------------------------------
 378 // UTF-16
 379 // ----------------------------------------------------------------------------
 380
 381 #ifdef WORDS_BIGENDIAN
 382     #define wxMBConvUTF16straight wxMBConvUTF16BE
 383     #define wxMBConvUTF16swap     wxMBConvUTF16LE
 384 #else
 385     #define wxMBConvUTF16swap     wxMBConvUTF16BE
 386     #define wxMBConvUTF16straight wxMBConvUTF16LE
 387 #endif
 388
 389
 390 #ifdef WC_UTF16
 391
 392 // copy 16bit MB to 16bit String
 393 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 394 {
 395     size_t len=0;
 396
 397     while (*(wxUint16*)psz && (!buf || len < n))
 398     {
 399         if (buf)
 400             *buf++ = *(wxUint16*)psz;
 401         len++;
 402
 403         psz += sizeof(wxUint16);
 404     }
 405     if (buf && len<n)   *buf=0;
 406
 407     return len;
 408 }
 409
 410
 411 // copy 16bit String to 16bit MB
 412 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 413 {
 414     size_t len=0;
 415
 416     while (*psz && (!buf || len < n))
 417     {
 418         if (buf)
 419         {
 420             *(wxUint16*)buf = *psz;
 421             buf += sizeof(wxUint16);
 422         }
 423         len += sizeof(wxUint16);
 424         psz++;
 425     }
 426     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 427
 428     return len;
 429 }
 430
 431
 432 // swap 16bit MB to 16bit String
 433 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 434 {
 435     size_t len=0;
 436
 437     while (*(wxUint16*)psz && (!buf || len < n))
 438     {
 439         if (buf)
 440         {
 441             ((char *)buf)[0] = psz[1];
 442             ((char *)buf)[1] = psz[0];
 443             buf++;
 444         }
 445         len++;
 446         psz += sizeof(wxUint16);
 447     }
 448     if (buf && len<n)   *buf=0;
 449
 450     return len;
 451 }
 452
 453
 454 // swap 16bit MB to 16bit String
 455 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 456 {
 457     size_t len=0;
 458
 459     while (*psz && (!buf || len < n))
 460     {
 461         if (buf)
 462         {
 463             *buf++ = ((char*)psz)[1];
 464             *buf++ = ((char*)psz)[0];
 465         }
 466         len += sizeof(wxUint16);
 467         psz++;
 468     }
 469     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 470
 471     return len;
 472 }
 473
 474
 475 #else // WC_UTF16
 476
 477
 478 // copy 16bit MB to 32bit String
 479 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 480 {
 481     size_t len=0;
 482
 483     while (*(wxUint16*)psz && (!buf || len < n))
 484     {
 485         wxUint32 cc;
 486         size_t pa=decode_utf16((wxUint16*)psz, cc);
 487         if (pa == (size_t)-1)
 488             return pa;
 489
 490         if (buf)
 491             *buf++ = cc;
 492         len++;
 493         psz += pa * sizeof(wxUint16);
 494     }
 495     if (buf && len<n)   *buf=0;
 496
 497     return len;
 498 }
 499
 500
 501 // copy 32bit String to 16bit MB
 502 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 503 {
 504     size_t len=0;
 505
 506     while (*psz && (!buf || len < n))
 507     {
 508         wxUint16 cc[2];
 509         size_t pa=encode_utf16(*psz, cc);
 510
 511         if (pa == (size_t)-1)
 512             return pa;
 513
 514         if (buf)
 515         {
 516             *(wxUint16*)buf = cc[0];
 517             buf += sizeof(wxUint16);
 518             if (pa > 1)
 519             {
 520                 *(wxUint16*)buf = cc[1];
 521                 buf += sizeof(wxUint16);
 522             }
 523         }
 524
 525         len += pa*sizeof(wxUint16);
 526         psz++;
 527     }
 528     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 529
 530     return len;
 531 }
 532
 533
 534 // swap 16bit MB to 32bit String
 535 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 536 {
 537     size_t len=0;
 538
 539     while (*(wxUint16*)psz && (!buf || len < n))
 540     {
 541         wxUint32 cc;
 542         char tmp[4];
 543         tmp[0]=psz[1];  tmp[1]=psz[0];
 544         tmp[2]=psz[3];  tmp[3]=psz[2];
 545
 546         size_t pa=decode_utf16((wxUint16*)tmp, cc);
 547         if (pa == (size_t)-1)
 548             return pa;
 549
 550         if (buf)
 551             *buf++ = cc;
 552
 553         len++;
 554         psz += pa * sizeof(wxUint16);
 555     }
 556     if (buf && len<n)   *buf=0;
 557
 558     return len;
 559 }
 560
 561
 562 // swap 32bit String to 16bit MB
 563 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 564 {
 565     size_t len=0;
 566
 567     while (*psz && (!buf || len < n))
 568     {
 569         wxUint16 cc[2];
 570         size_t pa=encode_utf16(*psz, cc);
 571
 572         if (pa == (size_t)-1)
 573             return pa;
 574
 575         if (buf)
 576         {
 577             *buf++ = ((char*)cc)[1];
 578             *buf++ = ((char*)cc)[0];
 579             if (pa > 1)
 580             {
 581                 *buf++ = ((char*)cc)[3];
 582                 *buf++ = ((char*)cc)[2];
 583             }
 584         }
 585
 586         len += pa*sizeof(wxUint16);
 587         psz++;
 588     }
 589     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 590
 591     return len;
 592 }
 593
 594 #endif // WC_UTF16
 595
 596
 597 // ----------------------------------------------------------------------------
 598 // UTF-32
 599 // ----------------------------------------------------------------------------
 600
 601 #ifdef WORDS_BIGENDIAN
 602 #define wxMBConvUTF32straight  wxMBConvUTF32BE
 603 #define wxMBConvUTF32swap      wxMBConvUTF32LE
 604 #else
 605 #define wxMBConvUTF32swap      wxMBConvUTF32BE
 606 #define wxMBConvUTF32straight  wxMBConvUTF32LE
 607 #endif
 608
 609
 610 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
 611 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
 612
 613
 614 #ifdef WC_UTF16
 615
 616 // copy 32bit MB to 16bit String
 617 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 618 {
 619     size_t len=0;
 620
 621     while (*(wxUint32*)psz && (!buf || len < n))
 622     {
 623         wxUint16 cc[2];
 624
 625         size_t pa=encode_utf16(*(wxUint32*)psz, cc);
 626         if (pa == (size_t)-1)
 627             return pa;
 628
 629         if (buf)
 630         {
 631             *buf++ = cc[0];
 632             if (pa > 1)
 633                 *buf++ = cc[1];
 634         }
 635         len += pa;
 636         psz += sizeof(wxUint32);
 637     }
 638     if (buf && len<n)   *buf=0;
 639
 640     return len;
 641 }
 642
 643
 644 // copy 16bit String to 32bit MB
 645 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 646 {
 647     size_t len=0;
 648
 649     while (*psz && (!buf || len < n))
 650     {
 651         wxUint32 cc;
 652
 653         // cast is ok for WC_UTF16
 654         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
 655         if (pa == (size_t)-1)
 656             return pa;
 657
 658         if (buf)
 659         {
 660             *(wxUint32*)buf = cc;
 661             buf += sizeof(wxUint32);
 662         }
 663         len += sizeof(wxUint32);
 664         psz += pa;
 665     }
 666
 667     if (buf && len<=n-sizeof(wxUint32))
 668         *(wxUint32*)buf=0;
 669
 670     return len;
 671 }
 672
 673
 674
 675 // swap 32bit MB to 16bit String
 676 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 677 {
 678     size_t len=0;
 679
 680     while (*(wxUint32*)psz && (!buf || len < n))
 681     {
 682         char tmp[4];
 683         tmp[0] = psz[3];   tmp[1] = psz[2];
 684         tmp[2] = psz[1];   tmp[3] = psz[0];
 685
 686
 687         wxUint16 cc[2];
 688
 689         size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
 690         if (pa == (size_t)-1)
 691             return pa;
 692
 693         if (buf)
 694         {
 695             *buf++ = cc[0];
 696             if (pa > 1)
 697                 *buf++ = cc[1];
 698         }
 699         len += pa;
 700         psz += sizeof(wxUint32);
 701     }
 702
 703     if (buf && len<n)
 704         *buf=0;
 705
 706     return len;
 707 }
 708
 709
 710 // swap 16bit String to 32bit MB
 711 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 712 {
 713     size_t len=0;
 714
 715     while (*psz && (!buf || len < n))
 716     {
 717         char cc[4];
 718
 719         // cast is ok for WC_UTF16
 720         size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
 721         if (pa == (size_t)-1)
 722             return pa;
 723
 724         if (buf)
 725         {
 726             *buf++ = cc[3];
 727             *buf++ = cc[2];
 728             *buf++ = cc[1];
 729             *buf++ = cc[0];
 730         }
 731         len += sizeof(wxUint32);
 732         psz += pa;
 733     }
 734
 735     if (buf && len<=n-sizeof(wxUint32))
 736         *(wxUint32*)buf=0;
 737
 738     return len;
 739 }
 740
 741 #else // WC_UTF16
 742
 743
 744 // copy 32bit MB to 32bit String
 745 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 746 {
 747     size_t len=0;
 748
 749     while (*(wxUint32*)psz && (!buf || len < n))
 750     {
 751         if (buf)
 752             *buf++ = *(wxUint32*)psz;
 753         len++;
 754         psz += sizeof(wxUint32);
 755     }
 756
 757     if (buf && len<n)
 758         *buf=0;
 759
 760     return len;
 761 }
 762
 763
 764 // copy 32bit String to 32bit MB
 765 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 766 {
 767     size_t len=0;
 768
 769     while (*psz && (!buf || len < n))
 770     {
 771         if (buf)
 772         {
 773             *(wxUint32*)buf = *psz;
 774             buf += sizeof(wxUint32);
 775         }
 776
 777         len += sizeof(wxUint32);
 778         psz++;
 779     }
 780
 781     if (buf && len<=n-sizeof(wxUint32))
 782         *(wxUint32*)buf=0;
 783
 784     return len;
 785 }
 786
 787
 788 // swap 32bit MB to 32bit String
 789 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 790 {
 791     size_t len=0;
 792
 793     while (*(wxUint32*)psz && (!buf || len < n))
 794     {
 795         if (buf)
 796         {
 797             ((char *)buf)[0] = psz[3];
 798             ((char *)buf)[1] = psz[2];
 799             ((char *)buf)[2] = psz[1];
 800             ((char *)buf)[3] = psz[0];
 801             buf++;
 802         }
 803         len++;
 804         psz += sizeof(wxUint32);
 805     }
 806
 807     if (buf && len<n)
 808         *buf=0;
 809
 810     return len;
 811 }
 812
 813
 814 // swap 32bit String to 32bit MB
 815 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 816 {
 817     size_t len=0;
 818
 819     while (*psz && (!buf || len < n))
 820     {
 821         if (buf)
 822         {
 823             *buf++ = ((char *)psz)[3];
 824             *buf++ = ((char *)psz)[2];
 825             *buf++ = ((char *)psz)[1];
 826             *buf++ = ((char *)psz)[0];
 827         }
 828         len += sizeof(wxUint32);
 829         psz++;
 830     }
 831
 832     if (buf && len<=n-sizeof(wxUint32))
 833         *(wxUint32*)buf=0;
 834
 835     return len;
 836 }
 837
 838
 839 #endif // WC_UTF16
 840
 841
 842 // ============================================================================
 843 // The classes doing conversion using the iconv_xxx() functions
 844 // ============================================================================
 845
 846 #ifdef HAVE_ICONV
 847
 848 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with E2BIG
 849 //     if output buffer is _exactly_ as big as needed. Such case is (unless there's
 850 //     yet another bug in glibc) the only case when iconv() returns with (size_t)-1
 851 //     (which means error) and says there are 0 bytes left in the input buffer --
 852 //     when _real_ error occurs, bytes-left-in-input buffer is non-zero. Hence,
 853 //     this alternative test for iconv() failure.
 854 //     [This bug does not appear in glibc 2.2.]
 855 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
 856 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
 857                                      (errno != E2BIG || bufLeft != 0))
 858 #else
 859 #define ICONV_FAILED(cres, bufLeft)  (cres == (size_t)-1)
 860 #endif
 861
 862 #define ICONV_CHAR_CAST(x)  ((ICONV_CONST char **)(x))
 863
 864 // ----------------------------------------------------------------------------
 865 // wxMBConv_iconv: encapsulates an iconv character set
 866 // ----------------------------------------------------------------------------
 867
 868 class wxMBConv_iconv : public wxMBConv
 869 {
 870 public:
 871     wxMBConv_iconv(const wxChar *name);
 872     virtual ~wxMBConv_iconv();
 873
 874     virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
 875     virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
 876
 877     bool IsOk() const
 878         { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
 879
 880 protected:
 881     // the iconv handlers used to translate from multibyte to wide char and in
 882     // the other direction
 883     iconv_t m2w,
 884             w2m;
 885
 886 private:
 887     // the name (for iconv_open()) of a wide char charset -- if none is
 888     // available on this machine, it will remain NULL
 889     static const char *ms_wcCharsetName;
 890
 891     // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
 892     // different endian-ness than the native one
 893     static bool ms_wcNeedsSwap;
 894 };
 895
 896 const char *wxMBConv_iconv::ms_wcCharsetName = NULL;
 897 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
 898
 899 wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
 900 {
 901     // Do it the hard way
 902     char cname[100];
 903     for (size_t i = 0; i < wxStrlen(name)+1; i++)
 904         cname[i] = (char) name[i];
 905
 906     // check for charset that represents wchar_t:
 907     if (ms_wcCharsetName == NULL)
 908     {
 909         ms_wcNeedsSwap = false;
 910
 911         // try charset with explicit bytesex info (e.g. "UCS-4LE"):
 912         ms_wcCharsetName = WC_NAME_BEST;
 913         m2w = iconv_open(ms_wcCharsetName, cname);
 914
 915         if (m2w == (iconv_t)-1)
 916         {
 917             // try charset w/o bytesex info (e.g. "UCS4")
 918             // and check for bytesex ourselves:
 919             ms_wcCharsetName = WC_NAME;
 920             m2w = iconv_open(ms_wcCharsetName, cname);
 921
 922             // last bet, try if it knows WCHAR_T pseudo-charset
 923             if (m2w == (iconv_t)-1)
 924             {
 925                 ms_wcCharsetName = "WCHAR_T";
 926                 m2w = iconv_open(ms_wcCharsetName, cname);
 927             }
 928
 929             if (m2w != (iconv_t)-1)
 930             {
 931                 char    buf[2], *bufPtr;
 932                 wchar_t wbuf[2], *wbufPtr;
 933                 size_t  insz, outsz;
 934                 size_t  res;
 935
 936                 buf[0] = 'A';
 937                 buf[1] = 0;
 938                 wbuf[0] = 0;
 939                 insz = 2;
 940                 outsz = SIZEOF_WCHAR_T * 2;
 941                 wbufPtr = wbuf;
 942                 bufPtr = buf;
 943
 944                 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
 945                             (char**)&wbufPtr, &outsz);
 946
 947                 if (ICONV_FAILED(res, insz))
 948                 {
 949                     ms_wcCharsetName = NULL;
 950                     wxLogLastError(wxT("iconv"));
 951                     wxLogError(_("Conversion to charset '%s' doesn't work."), name);
 952                 }
 953                 else
 954                 {
 955                     ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
 956                 }
 957             }
 958             else
 959             {
 960                 ms_wcCharsetName = NULL;
 961
 962                 // VS: we must not output an error here, since wxWidgets will safely
 963                 //     fall back to using wxEncodingConverter.
 964                 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
 965                 //wxLogError(
 966             }
 967         }
 968         wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
 969     }
 970     else // we already have ms_wcCharsetName
 971     {
 972         m2w = iconv_open(ms_wcCharsetName, cname);
 973     }
 974
 975     // NB: don't ever pass NULL to iconv_open(), it may crash!
 976     if ( ms_wcCharsetName )
 977     {
 978         w2m = iconv_open( cname, ms_wcCharsetName);
 979     }
 980     else
 981     {
 982         w2m = (iconv_t)-1;
 983     }
 984 }
 985
 986 wxMBConv_iconv::~wxMBConv_iconv()
 987 {
 988     if ( m2w != (iconv_t)-1 )
 989         iconv_close(m2w);
 990     if ( w2m != (iconv_t)-1 )
 991         iconv_close(w2m);
 992 }
 993
 994 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 995 {
 996     size_t inbuf = strlen(psz);
 997     size_t outbuf = n * SIZEOF_WCHAR_T;
 998     size_t res, cres;
 999     // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1000     wchar_t *bufPtr = buf;
1001     const char *pszPtr = psz;
1002
1003     if (buf)
1004     {
1005         // have destination buffer, convert there
1006         cres = iconv(m2w,
1007                      ICONV_CHAR_CAST(&pszPtr), &inbuf,
1008                      (char**)&bufPtr, &outbuf);
1009         res = n - (outbuf / SIZEOF_WCHAR_T);
1010
1011         if (ms_wcNeedsSwap)
1012         {
1013             // convert to native endianness
1014             WC_BSWAP(buf /* _not_ bufPtr */, res)
1015         }
1016
1017         // NB: iconv was given only strlen(psz) characters on input, and so
1018         //     it couldn't convert the trailing zero. Let's do it ourselves
1019         //     if there's some room left for it in the output buffer.
1020         if (res < n)
1021             buf[res] = 0;
1022     }
1023     else
1024     {
1025         // no destination buffer... convert using temp buffer
1026         // to calculate destination buffer requirement
1027         wchar_t tbuf[8];
1028         res = 0;
1029         do {
1030             bufPtr = tbuf;
1031             outbuf = 8*SIZEOF_WCHAR_T;
1032
1033             cres = iconv(m2w,
1034                          ICONV_CHAR_CAST(&pszPtr), &inbuf,
1035                          (char**)&bufPtr, &outbuf );
1036
1037             res += 8-(outbuf/SIZEOF_WCHAR_T);
1038         } while ((cres==(size_t)-1) && (errno==E2BIG));
1039     }
1040
1041     if (ICONV_FAILED(cres, inbuf))
1042     {
1043         //VS: it is ok if iconv fails, hence trace only
1044         wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1045         return (size_t)-1;
1046     }
1047
1048     return res;
1049 }
1050
1051 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1052 {
1053     size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
1054     size_t outbuf = n;
1055     size_t res, cres;
1056
1057     wchar_t *tmpbuf = 0;
1058
1059     if (ms_wcNeedsSwap)
1060     {
1061         // need to copy to temp buffer to switch endianness
1062         // this absolutely doesn't rock!
1063         // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1064         //  could be in read-only memory, or be accessed in some other thread)
1065         tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
1066         memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
1067         WC_BSWAP(tmpbuf, inbuf)
1068         psz=tmpbuf;
1069     }
1070
1071     if (buf)
1072     {
1073         // have destination buffer, convert there
1074         cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1075
1076         res = n-outbuf;
1077
1078         // NB: iconv was given only wcslen(psz) characters on input, and so
1079         //     it couldn't convert the trailing zero. Let's do it ourselves
1080         //     if there's some room left for it in the output buffer.
1081         if (res < n)
1082             buf[0] = 0;
1083     }
1084     else
1085     {
1086         // no destination buffer... convert using temp buffer
1087         // to calculate destination buffer requirement
1088         char tbuf[16];
1089         res = 0;
1090         do {
1091             buf = tbuf; outbuf = 16;
1092
1093             cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1094
1095             res += 16 - outbuf;
1096         } while ((cres==(size_t)-1) && (errno==E2BIG));
1097     }
1098
1099     if (ms_wcNeedsSwap)
1100     {
1101         free(tmpbuf);
1102     }
1103
1104     if (ICONV_FAILED(cres, inbuf))
1105     {
1106         //VS: it is ok if iconv fails, hence trace only
1107         wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1108         return (size_t)-1;
1109     }
1110
1111     return res;
1112 }
1113
1114 #endif // HAVE_ICONV
1115
1116
1117 // ============================================================================
1118 // Win32 conversion classes
1119 // ============================================================================
1120
1121 #ifdef wxHAVE_WIN32_MB2WC
1122
1123 // from utils.cpp
1124 #if wxUSE_FONTMAP
1125 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1126 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1127 #endif
1128
1129 class wxMBConv_win32 : public wxMBConv
1130 {
1131 public:
1132     wxMBConv_win32()
1133     {
1134         m_CodePage = CP_ACP;
1135     }
1136
1137 #if wxUSE_FONTMAP
1138     wxMBConv_win32(const wxChar* name)
1139     {
1140         m_CodePage = wxCharsetToCodepage(name);
1141     }
1142
1143     wxMBConv_win32(wxFontEncoding encoding)
1144     {
1145         m_CodePage = wxEncodingToCodepage(encoding);
1146     }
1147 #endif
1148
1149     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1150     {
1151         const size_t len = ::MultiByteToWideChar
1152                              (
1153                                 m_CodePage,     // code page
1154                                 0,              // flags (none)
1155                                 psz,            // input string
1156                                 -1,             // its length (NUL-terminated)
1157                                 buf,            // output string
1158                                 buf ? n : 0     // size of output buffer
1159                              );
1160
1161         // note that it returns count of written chars for buf != NULL and size
1162         // of the needed buffer for buf == NULL so in either case the length of
1163         // the string (which never includes the terminating NUL) is one less
1164         return len ? len - 1 : (size_t)-1;
1165     }
1166
1167     size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
1168     {
1169         /*
1170             we have a problem here: by default, WideCharToMultiByte() may
1171             replace characters unrepresentable in the target code page with bad
1172             quality approximations such as turning "1/2" symbol (U+00BD) into
1173             "1" for the code pages which don't have it and we, obviously, want
1174             to avoid this at any price
1175
1176             the trouble is that this function does it _silently_, i.e. it won't
1177             even tell us whether it did or not... Win98/2000 and higher provide
1178             WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1179             we have to resort to a round trip, i.e. check that converting back
1180             results in the same string -- this is, of course, expensive but
1181             otherwise we simply can't be sure to not garble the data.
1182          */
1183
1184         // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1185         // it doesn't work with CJK encodings (which we test for rather roughly
1186         // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1187         // supporting it
1188         BOOL usedDef wxDUMMY_INITIALIZE(false),
1189              *pUsedDef;
1190         int flags;
1191         if ( CanUseNoBestFit() && m_CodePage < 50000 )
1192         {
1193             // it's our lucky day
1194             flags = WC_NO_BEST_FIT_CHARS;
1195             pUsedDef = &usedDef;
1196         }
1197         else // old system or unsupported encoding
1198         {
1199             flags = 0;
1200             pUsedDef = NULL;
1201         }
1202
1203         const size_t len = ::WideCharToMultiByte
1204                              (
1205                                 m_CodePage,     // code page
1206                                 flags,          // either none or no best fit
1207                                 pwz,            // input string
1208                                 -1,             // it is (wide) NUL-terminated
1209                                 buf,            // output buffer
1210                                 buf ? n : 0,    // and its size
1211                                 NULL,           // default "replacement" char
1212                                 pUsedDef        // [out] was it used?
1213                              );
1214
1215         if ( !len )
1216         {
1217             // function totally failed
1218             return (size_t)-1;
1219         }
1220
1221         // if we were really converting, check if we succeeded
1222         if ( buf )
1223         {
1224             if ( flags )
1225             {
1226                 // check if the conversion failed, i.e. if any replacements
1227                 // were done
1228                 if ( usedDef )
1229                     return (size_t)-1;
1230             }
1231             else // we must resort to double tripping...
1232             {
1233                 wxWCharBuffer wcBuf(n);
1234                 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1235                         wcscmp(wcBuf, pwz) != 0 )
1236                 {
1237                     // we didn't obtain the same thing we started from, hence
1238                     // the conversion was lossy and we consider that it failed
1239                     return (size_t)-1;
1240                 }
1241             }
1242         }
1243
1244         // see the comment above for the reason of "len - 1"
1245         return len - 1;
1246     }
1247
1248     bool IsOk() const { return m_CodePage != -1; }
1249
1250 private:
1251     static bool CanUseNoBestFit()
1252     {
1253         static int s_isWin98Or2k = -1;
1254
1255         if ( s_isWin98Or2k == -1 )
1256         {
1257             int verMaj, verMin;
1258             switch ( wxGetOsVersion(&verMaj, &verMin) )
1259             {
1260                 case wxWIN95:
1261                     s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1262                     break;
1263
1264                 case wxWINDOWS_NT:
1265                     s_isWin98Or2k = verMaj >= 5;
1266                     break;
1267
1268                 default:
1269                     // unknown, be conseravtive by default
1270                     s_isWin98Or2k = 0;
1271             }
1272
1273             wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1274         }
1275
1276         return s_isWin98Or2k == 1;
1277     }
1278
1279     long m_CodePage;
1280 };
1281
1282 #endif // wxHAVE_WIN32_MB2WC
1283
1284 // ============================================================================
1285 // Mac conversion classes
1286 // ============================================================================
1287
1288 #if defined(__WXMAC__) && defined(TARGET_CARBON)
1289
1290 class wxMBConv_mac : public wxMBConv
1291 {
1292 public:
1293     wxMBConv_mac()
1294     {
1295         Init(CFStringGetSystemEncoding()) ;
1296     }
1297
1298     wxMBConv_mac(const wxChar* name)
1299     {
1300         Init( wxMacGetSystemEncFromFontEnc(wxFontMapper::Get()->CharsetToEncoding(name, FALSE) ) ) ;
1301     }
1302
1303     wxMBConv_mac(wxFontEncoding encoding)
1304     {
1305         Init( wxMacGetSystemEncFromFontEnc(encoding) );
1306     }
1307
1308         ~wxMBConv_mac()
1309         {
1310             OSStatus status = noErr ;
1311             status = TECDisposeConverter(m_MB2WC_converter);
1312             status = TECDisposeConverter(m_WC2MB_converter);
1313         }
1314
1315
1316         void Init( TextEncodingBase encoding)
1317         {
1318             OSStatus status = noErr ;
1319                 m_char_encoding = encoding ;
1320                 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
1321
1322             status = TECCreateConverter(&m_MB2WC_converter,
1323                                         m_char_encoding,
1324                                         m_unicode_encoding);
1325             status = TECCreateConverter(&m_WC2MB_converter,
1326                                         m_unicode_encoding,
1327                                         m_char_encoding);
1328         }
1329
1330     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1331     {
1332             OSStatus status = noErr ;
1333             ByteCount byteOutLen ;
1334             ByteCount byteInLen = strlen(psz) ;
1335                 wchar_t *tbuf = NULL ;
1336                 UniChar* ubuf = NULL ;
1337                 size_t res = 0 ;
1338
1339                 if (buf == NULL)
1340                 {
1341                         n = byteInLen ;
1342                         tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
1343                 }
1344             ByteCount byteBufferLen = n * sizeof( UniChar ) ;
1345 #if SIZEOF_WCHAR_T == 4
1346                 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
1347 #else
1348                 ubuf = (UniChar*) (buf ? buf : tbuf) ;
1349 #endif
1350             status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
1351               (TextPtr) ubuf , byteBufferLen, &byteOutLen);
1352 #if SIZEOF_WCHAR_T == 4
1353         // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
1354         // is not properly terminated we get random characters at the end
1355         ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
1356                 wxMBConvUTF16BE converter ;
1357                 res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
1358                 free( ubuf ) ;
1359 #else
1360                 res = byteOutLen / sizeof( UniChar ) ;
1361 #endif
1362                 if ( buf == NULL )
1363                         free(tbuf) ;
1364
1365         if ( buf  && res < n)
1366             buf[res] = 0;
1367
1368                 return res ;
1369     }
1370
1371     size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
1372     {
1373             OSStatus status = noErr ;
1374             ByteCount byteOutLen ;
1375             ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
1376
1377                 char *tbuf = NULL ;
1378
1379                 if (buf == NULL)
1380                 {
1381                         // worst case
1382                         n = byteInLen * 2 ;
1383                         tbuf = (char*) malloc( n ) ;
1384                 }
1385
1386             ByteCount byteBufferLen = n ;
1387                 UniChar* ubuf = NULL ;
1388 #if SIZEOF_WCHAR_T == 4
1389                 wxMBConvUTF16BE converter ;
1390                 size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
1391                 byteInLen = unicharlen ;
1392                 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
1393                 converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
1394 #else
1395                 ubuf = (UniChar*) psz ;
1396 #endif
1397             status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
1398                (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
1399 #if SIZEOF_WCHAR_T == 4
1400                 free( ubuf ) ;
1401 #endif
1402                 if ( buf == NULL )
1403                         free(tbuf) ;
1404
1405                 size_t res = byteOutLen ;
1406         if ( buf  && res < n)
1407             buf[res] = 0;
1408
1409                 return res ;
1410     }
1411
1412     bool IsOk() const
1413         { return m_MB2WC_converter !=  NULL && m_WC2MB_converter != NULL  ; }
1414
1415 private:
1416         TECObjectRef m_MB2WC_converter ;
1417         TECObjectRef m_WC2MB_converter ;
1418
1419         TextEncodingBase m_char_encoding ;
1420         TextEncodingBase m_unicode_encoding ;
1421 };
1422
1423 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
1424
1425 // ============================================================================
1426 // wxEncodingConverter based conversion classes
1427 // ============================================================================
1428
1429 #if wxUSE_FONTMAP
1430
1431 class wxMBConv_wxwin : public wxMBConv
1432 {
1433 private:
1434     void Init()
1435     {
1436         m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
1437                w2m.Init(wxFONTENCODING_UNICODE, m_enc);
1438     }
1439
1440 public:
1441     // temporarily just use wxEncodingConverter stuff,
1442     // so that it works while a better implementation is built
1443     wxMBConv_wxwin(const wxChar* name)
1444     {
1445         if (name)
1446             m_enc = wxFontMapper::Get()->CharsetToEncoding(name, false);
1447         else
1448             m_enc = wxFONTENCODING_SYSTEM;
1449
1450         Init();
1451     }
1452
1453     wxMBConv_wxwin(wxFontEncoding enc)
1454     {
1455         m_enc = enc;
1456
1457         Init();
1458     }
1459
1460     size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
1461     {
1462         size_t inbuf = strlen(psz);
1463         if (buf)
1464             m2w.Convert(psz,buf);
1465         return inbuf;
1466     }
1467
1468     size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
1469     {
1470         const size_t inbuf = wxWcslen(psz);
1471         if (buf)
1472             w2m.Convert(psz,buf);
1473
1474         return inbuf;
1475     }
1476
1477     bool IsOk() const { return m_ok; }
1478
1479 public:
1480     wxFontEncoding m_enc;
1481     wxEncodingConverter m2w, w2m;
1482
1483     // were we initialized successfully?
1484     bool m_ok;
1485
1486     DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
1487 };
1488
1489 #endif // wxUSE_FONTMAP
1490
1491 // ============================================================================
1492 // wxCSConv implementation
1493 // ============================================================================
1494
1495 void wxCSConv::Init()
1496 {
1497     m_name = NULL;
1498     m_convReal =  NULL;
1499     m_deferred = true;
1500 }
1501
1502 wxCSConv::wxCSConv(const wxChar *charset)
1503 {
1504     Init();
1505
1506     if ( charset )
1507     {
1508         SetName(charset);
1509     }
1510
1511     m_encoding = wxFONTENCODING_SYSTEM;
1512 }
1513
1514 wxCSConv::wxCSConv(wxFontEncoding encoding)
1515 {
1516     if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
1517     {
1518         wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
1519
1520         encoding = wxFONTENCODING_SYSTEM;
1521     }
1522
1523     Init();
1524
1525     m_encoding = encoding;
1526 }
1527
1528 wxCSConv::~wxCSConv()
1529 {
1530     Clear();
1531 }
1532
1533 wxCSConv::wxCSConv(const wxCSConv& conv)
1534         : wxMBConv()
1535 {
1536     Init();
1537
1538     SetName(conv.m_name);
1539     m_encoding = conv.m_encoding;
1540 }
1541
1542 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
1543 {
1544     Clear();
1545
1546     SetName(conv.m_name);
1547     m_encoding = conv.m_encoding;
1548
1549     return *this;
1550 }
1551
1552 void wxCSConv::Clear()
1553 {
1554     free(m_name);
1555     delete m_convReal;
1556
1557     m_name = NULL;
1558     m_convReal = NULL;
1559 }
1560
1561 void wxCSConv::SetName(const wxChar *charset)
1562 {
1563     if (charset)
1564     {
1565         m_name = wxStrdup(charset);
1566         m_deferred = true;
1567     }
1568 }
1569
1570 wxMBConv *wxCSConv::DoCreate() const
1571 {
1572     // check for the special case of ASCII or ISO8859-1 charset: as we have
1573     // special knowledge of it anyhow, we don't need to create a special
1574     // conversion object
1575     if ( m_encoding == wxFONTENCODING_ISO8859_1 )
1576     {
1577         // don't convert at all
1578         return NULL;
1579     }
1580
1581     // we trust OS to do conversion better than we can so try external
1582     // conversion methods first
1583     //
1584     // the full order is:
1585     //      1. OS conversion (iconv() under Unix or Win32 API)
1586     //      2. hard coded conversions for UTF
1587     //      3. wxEncodingConverter as fall back
1588
1589     // step (1)
1590 #ifdef HAVE_ICONV
1591 #if !wxUSE_FONTMAP
1592     if ( m_name )
1593 #endif // !wxUSE_FONTMAP
1594     {
1595         wxString name(m_name);
1596
1597 #if wxUSE_FONTMAP
1598         if ( name.empty() )
1599             name = wxFontMapper::Get()->GetEncodingName(m_encoding);
1600 #endif // wxUSE_FONTMAP
1601
1602         wxMBConv_iconv *conv = new wxMBConv_iconv(name);
1603         if ( conv->IsOk() )
1604             return conv;
1605
1606         delete conv;
1607     }
1608 #endif // HAVE_ICONV
1609
1610 #ifdef wxHAVE_WIN32_MB2WC
1611     {
1612 #if wxUSE_FONTMAP
1613         wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
1614                                       : new wxMBConv_win32(m_encoding);
1615         if ( conv->IsOk() )
1616             return conv;
1617
1618         delete conv;
1619 #else
1620         return NULL;
1621 #endif
1622     }
1623 #endif // wxHAVE_WIN32_MB2WC
1624 #if defined(__WXMAC__)
1625     {
1626         if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ) )
1627         {
1628
1629                 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
1630                                             : new wxMBConv_mac(m_encoding);
1631                 if ( conv->IsOk() )
1632                     return conv;
1633
1634                 delete conv;
1635         }
1636     }
1637 #endif
1638     // step (2)
1639     wxFontEncoding enc = m_encoding;
1640 #if wxUSE_FONTMAP
1641     if ( enc == wxFONTENCODING_SYSTEM && m_name )
1642     {
1643         // use "false" to suppress interactive dialogs -- we can be called from
1644         // anywhere and popping up a dialog from here is the last thing we want to
1645         // do
1646         enc = wxFontMapper::Get()->CharsetToEncoding(m_name, false);
1647     }
1648 #endif // wxUSE_FONTMAP
1649
1650     switch ( enc )
1651     {
1652         case wxFONTENCODING_UTF7:
1653              return new wxMBConvUTF7;
1654
1655         case wxFONTENCODING_UTF8:
1656              return new wxMBConvUTF8;
1657
1658         case wxFONTENCODING_UTF16BE:
1659              return new wxMBConvUTF16BE;
1660
1661         case wxFONTENCODING_UTF16LE:
1662              return new wxMBConvUTF16LE;
1663
1664         case wxFONTENCODING_UTF32BE:
1665              return new wxMBConvUTF32BE;
1666
1667         case wxFONTENCODING_UTF32LE:
1668              return new wxMBConvUTF32LE;
1669
1670         default:
1671              // nothing to do but put here to suppress gcc warnings
1672              ;
1673     }
1674
1675     // step (3)
1676 #if wxUSE_FONTMAP
1677     {
1678         wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
1679                                       : new wxMBConv_wxwin(m_encoding);
1680         if ( conv->IsOk() )
1681             return conv;
1682
1683         delete conv;
1684     }
1685 #endif // wxUSE_FONTMAP
1686
1687     // NB: This is a hack to prevent deadlock. What could otherwise happen
1688     //     in Unicode build: wxConvLocal creation ends up being here
1689     //     because of some failure and logs the error. But wxLog will try to
1690     //     attach timestamp, for which it will need wxConvLocal (to convert
1691     //     time to char* and then wchar_t*), but that fails, tries to log
1692     //     error, but wxLog has a (already locked) critical section that
1693     //     guards static buffer.
1694     static bool alreadyLoggingError = false;
1695     if (!alreadyLoggingError)
1696     {
1697         alreadyLoggingError = true;
1698         wxLogError(_("Cannot convert from the charset '%s'!"),
1699                    m_name ? m_name
1700                       :
1701 #if wxUSE_FONTMAP
1702                          wxFontMapper::GetEncodingDescription(m_encoding).c_str()
1703 #else // !wxUSE_FONTMAP
1704                          wxString::Format(_("encoding %s"), m_encoding).c_str()
1705 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1706               );
1707         alreadyLoggingError = false;
1708     }
1709
1710     return NULL;
1711 }
1712
1713 void wxCSConv::CreateConvIfNeeded() const
1714 {
1715     if ( m_deferred )
1716     {
1717         wxCSConv *self = (wxCSConv *)this; // const_cast
1718
1719 #if wxUSE_INTL
1720         // if we don't have neither the name nor the encoding, use the default
1721         // encoding for this system
1722         if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
1723         {
1724             self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
1725         }
1726 #endif // wxUSE_INTL
1727
1728         self->m_convReal = DoCreate();
1729         self->m_deferred = false;
1730     }
1731 }
1732
1733 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1734 {
1735     CreateConvIfNeeded();
1736
1737     if (m_convReal)
1738         return m_convReal->MB2WC(buf, psz, n);
1739
1740     // latin-1 (direct)
1741     size_t len = strlen(psz);
1742
1743     if (buf)
1744     {
1745         for (size_t c = 0; c <= len; c++)
1746             buf[c] = (unsigned char)(psz[c]);
1747     }
1748
1749     return len;
1750 }
1751
1752 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1753 {
1754     CreateConvIfNeeded();
1755
1756     if (m_convReal)
1757         return m_convReal->WC2MB(buf, psz, n);
1758
1759     // latin-1 (direct)
1760     const size_t len = wxWcslen(psz);
1761     if (buf)
1762     {
1763         for (size_t c = 0; c <= len; c++)
1764         {
1765             if (psz[c] > 0xFF)
1766                 return (size_t)-1;
1767             buf[c] = psz[c];
1768         }
1769     }
1770     else
1771     {
1772         for (size_t c = 0; c <= len; c++)
1773         {
1774             if (psz[c] > 0xFF)
1775                 return (size_t)-1;
1776         }
1777     }
1778
1779     return len;
1780 }
1781
1782 // ----------------------------------------------------------------------------
1783 // globals
1784 // ----------------------------------------------------------------------------
1785
1786 #ifdef __WINDOWS__
1787     static wxMBConv_win32 wxConvLibcObj;
1788 #elif defined(__WXMAC__) && !defined(__MACH__)
1789     static wxMBConv_mac wxConvLibcObj ;
1790 #else
1791     static wxMBConvLibc wxConvLibcObj;
1792 #endif
1793
1794 static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
1795 static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
1796 static wxMBConvUTF7 wxConvUTF7Obj;
1797 static wxMBConvUTF8 wxConvUTF8Obj;
1798
1799
1800 WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
1801 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
1802 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
1803 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
1804 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
1805 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
1806
1807 #else // !wxUSE_WCHAR_T
1808
1809 // stand-ins in absence of wchar_t
1810 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
1811                                 wxConvISO8859_1,
1812                                 wxConvLocal,
1813                                 wxConvUTF8;
1814
1815 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
1816
1817