src/common/strconv.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        strconv.cpp
   3 // Purpose:     Unicode conversion classes
   4 // Author:      Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik
   5 // Modified by:
   6 // Created:     29/01/98
   7 // RCS-ID:      $Id$
   8 // Copyright:   (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
   9 //              (c) 2000-2003 Vadim Zeitlin
  10 // Licence:     wxWindows licence
  11 /////////////////////////////////////////////////////////////////////////////
  12
  13 // ============================================================================
  14 // declarations
  15 // ============================================================================
  16
  17 // ----------------------------------------------------------------------------
  18 // headers
  19 // ----------------------------------------------------------------------------
  20
  21 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
  22   #pragma implementation "strconv.h"
  23 #endif
  24
  25 // For compilers that support precompilation, includes "wx.h".
  26 #include "wx/wxprec.h"
  27
  28 #ifdef __BORLANDC__
  29   #pragma hdrstop
  30 #endif
  31
  32 #ifndef WX_PRECOMP
  33     #include "wx/intl.h"
  34     #include "wx/log.h"
  35 #endif // WX_PRECOMP
  36
  37 #include "wx/strconv.h"
  38
  39 #if wxUSE_WCHAR_T
  40
  41 #ifdef __WXMSW__
  42     #include "wx/msw/private.h"
  43 #endif
  44
  45 #ifdef __WINDOWS__
  46     #include "wx/msw/missing.h"
  47 #endif
  48
  49 #ifndef __WXWINCE__
  50 #include <errno.h>
  51 #endif
  52
  53 #include <ctype.h>
  54 #include <string.h>
  55 #include <stdlib.h>
  56
  57 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
  58     #define wxHAVE_WIN32_MB2WC
  59 #endif // __WIN32__ but !__WXMICROWIN__
  60
  61 // ----------------------------------------------------------------------------
  62 // headers
  63 // ----------------------------------------------------------------------------
  64
  65 #ifdef __SALFORDC__
  66     #include <clib.h>
  67 #endif
  68
  69 #ifdef HAVE_ICONV
  70     #include <iconv.h>
  71 #endif
  72
  73 #include "wx/encconv.h"
  74 #include "wx/fontmap.h"
  75 #include "wx/utils.h"
  76
  77 #ifdef __WXMAC__
  78 #include <ATSUnicode.h>
  79 #include <TextCommon.h>
  80 #include <TextEncodingConverter.h>
  81
  82 #include  "wx/mac/private.h"  // includes mac headers
  83 #endif
  84 // ----------------------------------------------------------------------------
  85 // macros
  86 // ----------------------------------------------------------------------------
  87
  88 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
  89 #define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
  90
  91 #if SIZEOF_WCHAR_T == 4
  92     #define WC_NAME         "UCS4"
  93     #define WC_BSWAP         BSWAP_UCS4
  94     #ifdef WORDS_BIGENDIAN
  95       #define WC_NAME_BEST  "UCS-4BE"
  96     #else
  97       #define WC_NAME_BEST  "UCS-4LE"
  98     #endif
  99 #elif SIZEOF_WCHAR_T == 2
 100     #define WC_NAME         "UTF16"
 101     #define WC_BSWAP         BSWAP_UTF16
 102     #define WC_UTF16
 103     #ifdef WORDS_BIGENDIAN
 104       #define WC_NAME_BEST  "UTF-16BE"
 105     #else
 106       #define WC_NAME_BEST  "UTF-16LE"
 107     #endif
 108 #else // sizeof(wchar_t) != 2 nor 4
 109     // does this ever happen?
 110     #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
 111 #endif
 112
 113 // ============================================================================
 114 // implementation
 115 // ============================================================================
 116
 117 // ----------------------------------------------------------------------------
 118 // UTF-16 en/decoding to/from UCS-4
 119 // ----------------------------------------------------------------------------
 120
 121
 122 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
 123 {
 124     if (input<=0xffff)
 125     {
 126         if (output)
 127             *output = (wxUint16) input;
 128         return 1;
 129     }
 130     else if (input>=0x110000)
 131     {
 132         return (size_t)-1;
 133     }
 134     else
 135     {
 136         if (output)
 137         {
 138             *output++ = (wxUint16) ((input >> 10)+0xd7c0);
 139             *output = (wxUint16) ((input&0x3ff)+0xdc00);
 140         }
 141         return 2;
 142     }
 143 }
 144
 145 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
 146 {
 147     if ((*input<0xd800) || (*input>0xdfff))
 148     {
 149         output = *input;
 150         return 1;
 151     }
 152     else if ((input[1]<0xdc00) || (input[1]>=0xdfff))
 153     {
 154         output = *input;
 155         return (size_t)-1;
 156     }
 157     else
 158     {
 159         output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
 160         return 2;
 161     }
 162 }
 163
 164
 165 // ----------------------------------------------------------------------------
 166 // wxMBConv
 167 // ----------------------------------------------------------------------------
 168
 169 wxMBConv::~wxMBConv()
 170 {
 171     // nothing to do here
 172 }
 173
 174 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
 175 {
 176     if ( psz )
 177     {
 178         // calculate the length of the buffer needed first
 179         size_t nLen = MB2WC(NULL, psz, 0);
 180         if ( nLen != (size_t)-1 )
 181         {
 182             // now do the actual conversion
 183             wxWCharBuffer buf(nLen);
 184             nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
 185             if ( nLen != (size_t)-1 )
 186             {
 187                 return buf;
 188             }
 189         }
 190     }
 191
 192     wxWCharBuffer buf((wchar_t *)NULL);
 193
 194     return buf;
 195 }
 196
 197 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
 198 {
 199     if ( pwz )
 200     {
 201         size_t nLen = WC2MB(NULL, pwz, 0);
 202         if ( nLen != (size_t)-1 )
 203         {
 204             wxCharBuffer buf(nLen+3);       // space for a wxUint32 trailing zero
 205             nLen = WC2MB(buf.data(), pwz, nLen + 4);
 206             if ( nLen != (size_t)-1 )
 207             {
 208                 return buf;
 209             }
 210         }
 211     }
 212
 213     wxCharBuffer buf((char *)NULL);
 214
 215     return buf;
 216 }
 217
 218 // ----------------------------------------------------------------------------
 219 // wxMBConvLibc
 220 // ----------------------------------------------------------------------------
 221
 222 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 223 {
 224     return wxMB2WC(buf, psz, n);
 225 }
 226
 227 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 228 {
 229     return wxWC2MB(buf, psz, n);
 230 }
 231
 232 // ----------------------------------------------------------------------------
 233 // UTF-7
 234 // ----------------------------------------------------------------------------
 235
 236 #if 0
 237 static char utf7_setD[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
 238                         "abcdefghijklmnopqrstuvwxyz"
 239                         "0123456789'(),-./:?";
 240 static char utf7_setO[]="!\"#$%&*;<=>@[]^_`{|}";
 241 static char utf7_setB[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
 242                         "abcdefghijklmnopqrstuvwxyz"
 243                         "0123456789+/";
 244 #endif
 245
 246 // TODO: write actual implementations of UTF-7 here
 247 size_t wxMBConvUTF7::MB2WC(wchar_t * WXUNUSED(buf),
 248                            const char * WXUNUSED(psz),
 249                            size_t WXUNUSED(n)) const
 250 {
 251   return 0;
 252 }
 253
 254 size_t wxMBConvUTF7::WC2MB(char * WXUNUSED(buf),
 255                            const wchar_t * WXUNUSED(psz),
 256                            size_t WXUNUSED(n)) const
 257 {
 258   return 0;
 259 }
 260
 261 // ----------------------------------------------------------------------------
 262 // UTF-8
 263 // ----------------------------------------------------------------------------
 264
 265 static wxUint32 utf8_max[]=
 266     { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
 267
 268 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 269 {
 270     size_t len = 0;
 271
 272     while (*psz && ((!buf) || (len < n)))
 273     {
 274         unsigned char cc = *psz++, fc = cc;
 275         unsigned cnt;
 276         for (cnt = 0; fc & 0x80; cnt++)
 277             fc <<= 1;
 278         if (!cnt)
 279         {
 280             // plain ASCII char
 281             if (buf)
 282                 *buf++ = cc;
 283             len++;
 284         }
 285         else
 286         {
 287             cnt--;
 288             if (!cnt)
 289             {
 290                 // invalid UTF-8 sequence
 291                 return (size_t)-1;
 292             }
 293             else
 294             {
 295                 unsigned ocnt = cnt - 1;
 296                 wxUint32 res = cc & (0x3f >> cnt);
 297                 while (cnt--)
 298                 {
 299                     cc = *psz++;
 300                     if ((cc & 0xC0) != 0x80)
 301                     {
 302                         // invalid UTF-8 sequence
 303                         return (size_t)-1;
 304                     }
 305                     res = (res << 6) | (cc & 0x3f);
 306                 }
 307                 if (res <= utf8_max[ocnt])
 308                 {
 309                     // illegal UTF-8 encoding
 310                     return (size_t)-1;
 311                 }
 312 #ifdef WC_UTF16
 313                 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 314                 size_t pa = encode_utf16(res, (wxUint16 *)buf);
 315                 if (pa == (size_t)-1)
 316                   return (size_t)-1;
 317                 if (buf)
 318                     buf += pa;
 319                 len += pa;
 320 #else // !WC_UTF16
 321                 if (buf)
 322                     *buf++ = res;
 323                 len++;
 324 #endif // WC_UTF16/!WC_UTF16
 325             }
 326         }
 327     }
 328     if (buf && (len < n))
 329         *buf = 0;
 330     return len;
 331 }
 332
 333 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 334 {
 335     size_t len = 0;
 336
 337     while (*psz && ((!buf) || (len < n)))
 338     {
 339         wxUint32 cc;
 340 #ifdef WC_UTF16
 341         // cast is ok for WC_UTF16
 342         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
 343         psz += (pa == (size_t)-1) ? 1 : pa;
 344 #else
 345         cc=(*psz++) & 0x7fffffff;
 346 #endif
 347         unsigned cnt;
 348         for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
 349         if (!cnt)
 350         {
 351             // plain ASCII char
 352             if (buf)
 353                 *buf++ = (char) cc;
 354             len++;
 355         }
 356
 357         else
 358         {
 359             len += cnt + 1;
 360             if (buf)
 361             {
 362                 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
 363                 while (cnt--)
 364                     *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
 365             }
 366         }
 367     }
 368
 369     if (buf && (len<n)) *buf = 0;
 370
 371     return len;
 372 }
 373
 374
 375
 376
 377 // ----------------------------------------------------------------------------
 378 // UTF-16
 379 // ----------------------------------------------------------------------------
 380
 381 #ifdef WORDS_BIGENDIAN
 382     #define wxMBConvUTF16straight wxMBConvUTF16BE
 383     #define wxMBConvUTF16swap     wxMBConvUTF16LE
 384 #else
 385     #define wxMBConvUTF16swap     wxMBConvUTF16BE
 386     #define wxMBConvUTF16straight wxMBConvUTF16LE
 387 #endif
 388
 389
 390 #ifdef WC_UTF16
 391
 392 // copy 16bit MB to 16bit String
 393 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 394 {
 395     size_t len=0;
 396
 397     while (*(wxUint16*)psz && (!buf || len < n))
 398     {
 399         if (buf)
 400             *buf++ = *(wxUint16*)psz;
 401         len++;
 402
 403         psz += sizeof(wxUint16);
 404     }
 405     if (buf && len<n)   *buf=0;
 406
 407     return len;
 408 }
 409
 410
 411 // copy 16bit String to 16bit MB
 412 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 413 {
 414     size_t len=0;
 415
 416     while (*psz && (!buf || len < n))
 417     {
 418         if (buf)
 419         {
 420             *(wxUint16*)buf = *psz;
 421             buf += sizeof(wxUint16);
 422         }
 423         len += sizeof(wxUint16);
 424         psz++;
 425     }
 426     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 427
 428     return len;
 429 }
 430
 431
 432 // swap 16bit MB to 16bit String
 433 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 434 {
 435     size_t len=0;
 436
 437     while (*(wxUint16*)psz && (!buf || len < n))
 438     {
 439         if (buf)
 440         {
 441             ((char *)buf)[0] = psz[1];
 442             ((char *)buf)[1] = psz[0];
 443             buf++;
 444         }
 445         len++;
 446         psz += sizeof(wxUint16);
 447     }
 448     if (buf && len<n)   *buf=0;
 449
 450     return len;
 451 }
 452
 453
 454 // swap 16bit MB to 16bit String
 455 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 456 {
 457     size_t len=0;
 458
 459     while (*psz && (!buf || len < n))
 460     {
 461         if (buf)
 462         {
 463             *buf++ = ((char*)psz)[1];
 464             *buf++ = ((char*)psz)[0];
 465         }
 466         len += sizeof(wxUint16);
 467         psz++;
 468     }
 469     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 470
 471     return len;
 472 }
 473
 474
 475 #else // WC_UTF16
 476
 477
 478 // copy 16bit MB to 32bit String
 479 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 480 {
 481     size_t len=0;
 482
 483     while (*(wxUint16*)psz && (!buf || len < n))
 484     {
 485         wxUint32 cc;
 486         size_t pa=decode_utf16((wxUint16*)psz, cc);
 487         if (pa == (size_t)-1)
 488             return pa;
 489
 490         if (buf)
 491             *buf++ = cc;
 492         len++;
 493         psz += pa * sizeof(wxUint16);
 494     }
 495     if (buf && len<n)   *buf=0;
 496
 497     return len;
 498 }
 499
 500
 501 // copy 32bit String to 16bit MB
 502 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 503 {
 504     size_t len=0;
 505
 506     while (*psz && (!buf || len < n))
 507     {
 508         wxUint16 cc[2];
 509         size_t pa=encode_utf16(*psz, cc);
 510
 511         if (pa == (size_t)-1)
 512             return pa;
 513
 514         if (buf)
 515         {
 516             *(wxUint16*)buf = cc[0];
 517             buf += sizeof(wxUint16);
 518             if (pa > 1)
 519             {
 520                 *(wxUint16*)buf = cc[1];
 521                 buf += sizeof(wxUint16);
 522             }
 523         }
 524
 525         len += pa*sizeof(wxUint16);
 526         psz++;
 527     }
 528     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 529
 530     return len;
 531 }
 532
 533
 534 // swap 16bit MB to 32bit String
 535 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 536 {
 537     size_t len=0;
 538
 539     while (*(wxUint16*)psz && (!buf || len < n))
 540     {
 541         wxUint32 cc;
 542         char tmp[4];
 543         tmp[0]=psz[1];  tmp[1]=psz[0];
 544         tmp[2]=psz[3];  tmp[3]=psz[2];
 545
 546         size_t pa=decode_utf16((wxUint16*)tmp, cc);
 547         if (pa == (size_t)-1)
 548             return pa;
 549
 550         if (buf)
 551             *buf++ = cc;
 552
 553         len++;
 554         psz += pa * sizeof(wxUint16);
 555     }
 556     if (buf && len<n)   *buf=0;
 557
 558     return len;
 559 }
 560
 561
 562 // swap 32bit String to 16bit MB
 563 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 564 {
 565     size_t len=0;
 566
 567     while (*psz && (!buf || len < n))
 568     {
 569         wxUint16 cc[2];
 570         size_t pa=encode_utf16(*psz, cc);
 571
 572         if (pa == (size_t)-1)
 573             return pa;
 574
 575         if (buf)
 576         {
 577             *buf++ = ((char*)cc)[1];
 578             *buf++ = ((char*)cc)[0];
 579             if (pa > 1)
 580             {
 581                 *buf++ = ((char*)cc)[3];
 582                 *buf++ = ((char*)cc)[2];
 583             }
 584         }
 585
 586         len += pa*sizeof(wxUint16);
 587         psz++;
 588     }
 589     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 590
 591     return len;
 592 }
 593
 594 #endif // WC_UTF16
 595
 596
 597 // ----------------------------------------------------------------------------
 598 // UTF-32
 599 // ----------------------------------------------------------------------------
 600
 601 #ifdef WORDS_BIGENDIAN
 602 #define wxMBConvUTF32straight  wxMBConvUTF32BE
 603 #define wxMBConvUTF32swap      wxMBConvUTF32LE
 604 #else
 605 #define wxMBConvUTF32swap      wxMBConvUTF32BE
 606 #define wxMBConvUTF32straight  wxMBConvUTF32LE
 607 #endif
 608
 609
 610 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
 611 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
 612
 613
 614 #ifdef WC_UTF16
 615
 616 // copy 32bit MB to 16bit String
 617 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 618 {
 619     size_t len=0;
 620
 621     while (*(wxUint32*)psz && (!buf || len < n))
 622     {
 623         wxUint16 cc[2];
 624
 625         size_t pa=encode_utf16(*(wxUint32*)psz, cc);
 626         if (pa == (size_t)-1)
 627             return pa;
 628
 629         if (buf)
 630         {
 631             *buf++ = cc[0];
 632             if (pa > 1)
 633                 *buf++ = cc[1];
 634         }
 635         len += pa;
 636         psz += sizeof(wxUint32);
 637     }
 638     if (buf && len<n)   *buf=0;
 639
 640     return len;
 641 }
 642
 643
 644 // copy 16bit String to 32bit MB
 645 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 646 {
 647     size_t len=0;
 648
 649     while (*psz && (!buf || len < n))
 650     {
 651         wxUint32 cc;
 652
 653         // cast is ok for WC_UTF16
 654         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
 655         if (pa == (size_t)-1)
 656             return pa;
 657
 658         if (buf)
 659         {
 660             *(wxUint32*)buf = cc;
 661             buf += sizeof(wxUint32);
 662         }
 663         len += sizeof(wxUint32);
 664         psz += pa;
 665     }
 666
 667     if (buf && len<=n-sizeof(wxUint32))
 668         *(wxUint32*)buf=0;
 669
 670     return len;
 671 }
 672
 673
 674
 675 // swap 32bit MB to 16bit String
 676 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 677 {
 678     size_t len=0;
 679
 680     while (*(wxUint32*)psz && (!buf || len < n))
 681     {
 682         char tmp[4];
 683         tmp[0] = psz[3];   tmp[1] = psz[2];
 684         tmp[2] = psz[1];   tmp[3] = psz[0];
 685
 686
 687         wxUint16 cc[2];
 688
 689         size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
 690         if (pa == (size_t)-1)
 691             return pa;
 692
 693         if (buf)
 694         {
 695             *buf++ = cc[0];
 696             if (pa > 1)
 697                 *buf++ = cc[1];
 698         }
 699         len += pa;
 700         psz += sizeof(wxUint32);
 701     }
 702
 703     if (buf && len<n)
 704         *buf=0;
 705
 706     return len;
 707 }
 708
 709
 710 // swap 16bit String to 32bit MB
 711 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 712 {
 713     size_t len=0;
 714
 715     while (*psz && (!buf || len < n))
 716     {
 717         char cc[4];
 718
 719         // cast is ok for WC_UTF16
 720         size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
 721         if (pa == (size_t)-1)
 722             return pa;
 723
 724         if (buf)
 725         {
 726             *buf++ = cc[3];
 727             *buf++ = cc[2];
 728             *buf++ = cc[1];
 729             *buf++ = cc[0];
 730         }
 731         len += sizeof(wxUint32);
 732         psz += pa;
 733     }
 734
 735     if (buf && len<=n-sizeof(wxUint32))
 736         *(wxUint32*)buf=0;
 737
 738     return len;
 739 }
 740
 741 #else // WC_UTF16
 742
 743
 744 // copy 32bit MB to 32bit String
 745 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 746 {
 747     size_t len=0;
 748
 749     while (*(wxUint32*)psz && (!buf || len < n))
 750     {
 751         if (buf)
 752             *buf++ = *(wxUint32*)psz;
 753         len++;
 754         psz += sizeof(wxUint32);
 755     }
 756
 757     if (buf && len<n)
 758         *buf=0;
 759
 760     return len;
 761 }
 762
 763
 764 // copy 32bit String to 32bit MB
 765 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 766 {
 767     size_t len=0;
 768
 769     while (*psz && (!buf || len < n))
 770     {
 771         if (buf)
 772         {
 773             *(wxUint32*)buf = *psz;
 774             buf += sizeof(wxUint32);
 775         }
 776
 777         len += sizeof(wxUint32);
 778         psz++;
 779     }
 780
 781     if (buf && len<=n-sizeof(wxUint32))
 782         *(wxUint32*)buf=0;
 783
 784     return len;
 785 }
 786
 787
 788 // swap 32bit MB to 32bit String
 789 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 790 {
 791     size_t len=0;
 792
 793     while (*(wxUint32*)psz && (!buf || len < n))
 794     {
 795         if (buf)
 796         {
 797             ((char *)buf)[0] = psz[3];
 798             ((char *)buf)[1] = psz[2];
 799             ((char *)buf)[2] = psz[1];
 800             ((char *)buf)[3] = psz[0];
 801             buf++;
 802         }
 803         len++;
 804         psz += sizeof(wxUint32);
 805     }
 806
 807     if (buf && len<n)
 808         *buf=0;
 809
 810     return len;
 811 }
 812
 813
 814 // swap 32bit String to 32bit MB
 815 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 816 {
 817     size_t len=0;
 818
 819     while (*psz && (!buf || len < n))
 820     {
 821         if (buf)
 822         {
 823             *buf++ = ((char *)psz)[3];
 824             *buf++ = ((char *)psz)[2];
 825             *buf++ = ((char *)psz)[1];
 826             *buf++ = ((char *)psz)[0];
 827         }
 828         len += sizeof(wxUint32);
 829         psz++;
 830     }
 831
 832     if (buf && len<=n-sizeof(wxUint32))
 833         *(wxUint32*)buf=0;
 834
 835     return len;
 836 }
 837
 838
 839 #endif // WC_UTF16
 840
 841
 842 // ============================================================================
 843 // The classes doing conversion using the iconv_xxx() functions
 844 // ============================================================================
 845
 846 #ifdef HAVE_ICONV
 847
 848 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with E2BIG
 849 //     if output buffer is _exactly_ as big as needed. Such case is (unless there's
 850 //     yet another bug in glibc) the only case when iconv() returns with (size_t)-1
 851 //     (which means error) and says there are 0 bytes left in the input buffer --
 852 //     when _real_ error occurs, bytes-left-in-input buffer is non-zero. Hence,
 853 //     this alternative test for iconv() failure.
 854 //     [This bug does not appear in glibc 2.2.]
 855 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
 856 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
 857                                      (errno != E2BIG || bufLeft != 0))
 858 #else
 859 #define ICONV_FAILED(cres, bufLeft)  (cres == (size_t)-1)
 860 #endif
 861
 862 #define ICONV_CHAR_CAST(x)  ((ICONV_CONST char **)(x))
 863
 864 // ----------------------------------------------------------------------------
 865 // wxMBConv_iconv: encapsulates an iconv character set
 866 // ----------------------------------------------------------------------------
 867
 868 class wxMBConv_iconv : public wxMBConv
 869 {
 870 public:
 871     wxMBConv_iconv(const wxChar *name);
 872     virtual ~wxMBConv_iconv();
 873
 874     virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
 875     virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
 876
 877     bool IsOk() const
 878         { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
 879
 880 protected:
 881     // the iconv handlers used to translate from multibyte to wide char and in
 882     // the other direction
 883     iconv_t m2w,
 884             w2m;
 885
 886 private:
 887     // the name (for iconv_open()) of a wide char charset -- if none is
 888     // available on this machine, it will remain NULL
 889     static const char *ms_wcCharsetName;
 890
 891     // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
 892     // different endian-ness than the native one
 893     static bool ms_wcNeedsSwap;
 894 };
 895
 896 const char *wxMBConv_iconv::ms_wcCharsetName = NULL;
 897 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
 898
 899 wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
 900 {
 901     // Do it the hard way
 902     char cname[100];
 903     for (size_t i = 0; i < wxStrlen(name)+1; i++)
 904         cname[i] = (char) name[i];
 905
 906     // check for charset that represents wchar_t:
 907     if (ms_wcCharsetName == NULL)
 908     {
 909         ms_wcNeedsSwap = false;
 910
 911         // try charset with explicit bytesex info (e.g. "UCS-4LE"):
 912         ms_wcCharsetName = WC_NAME_BEST;
 913         m2w = iconv_open(ms_wcCharsetName, cname);
 914
 915         if (m2w == (iconv_t)-1)
 916         {
 917             // try charset w/o bytesex info (e.g. "UCS4")
 918             // and check for bytesex ourselves:
 919             ms_wcCharsetName = WC_NAME;
 920             m2w = iconv_open(ms_wcCharsetName, cname);
 921
 922             // last bet, try if it knows WCHAR_T pseudo-charset
 923             if (m2w == (iconv_t)-1)
 924             {
 925                 ms_wcCharsetName = "WCHAR_T";
 926                 m2w = iconv_open(ms_wcCharsetName, cname);
 927             }
 928
 929             if (m2w != (iconv_t)-1)
 930             {
 931                 char    buf[2], *bufPtr;
 932                 wchar_t wbuf[2], *wbufPtr;
 933                 size_t  insz, outsz;
 934                 size_t  res;
 935
 936                 buf[0] = 'A';
 937                 buf[1] = 0;
 938                 wbuf[0] = 0;
 939                 insz = 2;
 940                 outsz = SIZEOF_WCHAR_T * 2;
 941                 wbufPtr = wbuf;
 942                 bufPtr = buf;
 943
 944                 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
 945                             (char**)&wbufPtr, &outsz);
 946
 947                 if (ICONV_FAILED(res, insz))
 948                 {
 949                     ms_wcCharsetName = NULL;
 950                     wxLogLastError(wxT("iconv"));
 951                     wxLogError(_("Conversion to charset '%s' doesn't work."), name);
 952                 }
 953                 else
 954                 {
 955                     ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
 956                 }
 957             }
 958             else
 959             {
 960                 ms_wcCharsetName = NULL;
 961
 962                 // VS: we must not output an error here, since wxWidgets will safely
 963                 //     fall back to using wxEncodingConverter.
 964                 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
 965                 //wxLogError(
 966             }
 967         }
 968         wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
 969     }
 970     else // we already have ms_wcCharsetName
 971     {
 972         m2w = iconv_open(ms_wcCharsetName, cname);
 973     }
 974
 975     // NB: don't ever pass NULL to iconv_open(), it may crash!
 976     if ( ms_wcCharsetName )
 977     {
 978         w2m = iconv_open( cname, ms_wcCharsetName);
 979     }
 980     else
 981     {
 982         w2m = (iconv_t)-1;
 983     }
 984 }
 985
 986 wxMBConv_iconv::~wxMBConv_iconv()
 987 {
 988     if ( m2w != (iconv_t)-1 )
 989         iconv_close(m2w);
 990     if ( w2m != (iconv_t)-1 )
 991         iconv_close(w2m);
 992 }
 993
 994 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 995 {
 996     size_t inbuf = strlen(psz);
 997     size_t outbuf = n * SIZEOF_WCHAR_T;
 998     size_t res, cres;
 999     // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1000     wchar_t *bufPtr = buf;
1001     const char *pszPtr = psz;
1002
1003     if (buf)
1004     {
1005         // have destination buffer, convert there
1006         cres = iconv(m2w,
1007                      ICONV_CHAR_CAST(&pszPtr), &inbuf,
1008                      (char**)&bufPtr, &outbuf);
1009         res = n - (outbuf / SIZEOF_WCHAR_T);
1010
1011         if (ms_wcNeedsSwap)
1012         {
1013             // convert to native endianness
1014             WC_BSWAP(buf /* _not_ bufPtr */, res)
1015         }
1016
1017         // NB: iconv was given only strlen(psz) characters on input, and so
1018         //     it couldn't convert the trailing zero. Let's do it ourselves
1019         //     if there's some room left for it in the output buffer.
1020         if (res < n)
1021             buf[res] = 0;
1022     }
1023     else
1024     {
1025         // no destination buffer... convert using temp buffer
1026         // to calculate destination buffer requirement
1027         wchar_t tbuf[8];
1028         res = 0;
1029         do {
1030             bufPtr = tbuf;
1031             outbuf = 8*SIZEOF_WCHAR_T;
1032
1033             cres = iconv(m2w,
1034                          ICONV_CHAR_CAST(&pszPtr), &inbuf,
1035                          (char**)&bufPtr, &outbuf );
1036
1037             res += 8-(outbuf/SIZEOF_WCHAR_T);
1038         } while ((cres==(size_t)-1) && (errno==E2BIG));
1039     }
1040
1041     if (ICONV_FAILED(cres, inbuf))
1042     {
1043         //VS: it is ok if iconv fails, hence trace only
1044         wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1045         return (size_t)-1;
1046     }
1047
1048     return res;
1049 }
1050
1051 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1052 {
1053     size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
1054     size_t outbuf = n;
1055     size_t res, cres;
1056
1057     wchar_t *tmpbuf = 0;
1058
1059     if (ms_wcNeedsSwap)
1060     {
1061         // need to copy to temp buffer to switch endianness
1062         // this absolutely doesn't rock!
1063         // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1064         //  could be in read-only memory, or be accessed in some other thread)
1065         tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
1066         memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
1067         WC_BSWAP(tmpbuf, inbuf)
1068         psz=tmpbuf;
1069     }
1070
1071     if (buf)
1072     {
1073         // have destination buffer, convert there
1074         cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1075
1076         res = n-outbuf;
1077
1078         // NB: iconv was given only wcslen(psz) characters on input, and so
1079         //     it couldn't convert the trailing zero. Let's do it ourselves
1080         //     if there's some room left for it in the output buffer.
1081         if (res < n)
1082             buf[0] = 0;
1083     }
1084     else
1085     {
1086         // no destination buffer... convert using temp buffer
1087         // to calculate destination buffer requirement
1088         char tbuf[16];
1089         res = 0;
1090         do {
1091             buf = tbuf; outbuf = 16;
1092
1093             cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1094
1095             res += 16 - outbuf;
1096         } while ((cres==(size_t)-1) && (errno==E2BIG));
1097     }
1098
1099     if (ms_wcNeedsSwap)
1100     {
1101         free(tmpbuf);
1102     }
1103
1104     if (ICONV_FAILED(cres, inbuf))
1105     {
1106         //VS: it is ok if iconv fails, hence trace only
1107         wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1108         return (size_t)-1;
1109     }
1110
1111     return res;
1112 }
1113
1114 #endif // HAVE_ICONV
1115
1116
1117 // ============================================================================
1118 // Win32 conversion classes
1119 // ============================================================================
1120
1121 #ifdef wxHAVE_WIN32_MB2WC
1122
1123 // from utils.cpp
1124 #if wxUSE_FONTMAP
1125 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1126 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1127 #endif
1128
1129 class wxMBConv_win32 : public wxMBConv
1130 {
1131 public:
1132     wxMBConv_win32()
1133     {
1134         m_CodePage = CP_ACP;
1135     }
1136
1137 #if wxUSE_FONTMAP
1138     wxMBConv_win32(const wxChar* name)
1139     {
1140         m_CodePage = wxCharsetToCodepage(name);
1141     }
1142
1143     wxMBConv_win32(wxFontEncoding encoding)
1144     {
1145         m_CodePage = wxEncodingToCodepage(encoding);
1146     }
1147 #endif
1148
1149     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1150     {
1151         // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
1152         // the behaviour is not compatible with the Unix version (using iconv)
1153         // and break the library itself, e.g. wxTextInputStream::NextChar()
1154         // wouldn't work if reading an incomplete MB char didn't result in an
1155         // error
1156         const size_t len = ::MultiByteToWideChar
1157                              (
1158                                 m_CodePage,     // code page
1159                                 MB_ERR_INVALID_CHARS, // flags: fall on error
1160                                 psz,            // input string
1161                                 -1,             // its length (NUL-terminated)
1162                                 buf,            // output string
1163                                 buf ? n : 0     // size of output buffer
1164                              );
1165
1166         // note that it returns count of written chars for buf != NULL and size
1167         // of the needed buffer for buf == NULL so in either case the length of
1168         // the string (which never includes the terminating NUL) is one less
1169         return len ? len - 1 : (size_t)-1;
1170     }
1171
1172     size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
1173     {
1174         /*
1175             we have a problem here: by default, WideCharToMultiByte() may
1176             replace characters unrepresentable in the target code page with bad
1177             quality approximations such as turning "1/2" symbol (U+00BD) into
1178             "1" for the code pages which don't have it and we, obviously, want
1179             to avoid this at any price
1180
1181             the trouble is that this function does it _silently_, i.e. it won't
1182             even tell us whether it did or not... Win98/2000 and higher provide
1183             WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1184             we have to resort to a round trip, i.e. check that converting back
1185             results in the same string -- this is, of course, expensive but
1186             otherwise we simply can't be sure to not garble the data.
1187          */
1188
1189         // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1190         // it doesn't work with CJK encodings (which we test for rather roughly
1191         // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1192         // supporting it
1193         BOOL usedDef wxDUMMY_INITIALIZE(false),
1194              *pUsedDef;
1195         int flags;
1196         if ( CanUseNoBestFit() && m_CodePage < 50000 )
1197         {
1198             // it's our lucky day
1199             flags = WC_NO_BEST_FIT_CHARS;
1200             pUsedDef = &usedDef;
1201         }
1202         else // old system or unsupported encoding
1203         {
1204             flags = 0;
1205             pUsedDef = NULL;
1206         }
1207
1208         const size_t len = ::WideCharToMultiByte
1209                              (
1210                                 m_CodePage,     // code page
1211                                 flags,          // either none or no best fit
1212                                 pwz,            // input string
1213                                 -1,             // it is (wide) NUL-terminated
1214                                 buf,            // output buffer
1215                                 buf ? n : 0,    // and its size
1216                                 NULL,           // default "replacement" char
1217                                 pUsedDef        // [out] was it used?
1218                              );
1219
1220         if ( !len )
1221         {
1222             // function totally failed
1223             return (size_t)-1;
1224         }
1225
1226         // if we were really converting, check if we succeeded
1227         if ( buf )
1228         {
1229             if ( flags )
1230             {
1231                 // check if the conversion failed, i.e. if any replacements
1232                 // were done
1233                 if ( usedDef )
1234                     return (size_t)-1;
1235             }
1236             else // we must resort to double tripping...
1237             {
1238                 wxWCharBuffer wcBuf(n);
1239                 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1240                         wcscmp(wcBuf, pwz) != 0 )
1241                 {
1242                     // we didn't obtain the same thing we started from, hence
1243                     // the conversion was lossy and we consider that it failed
1244                     return (size_t)-1;
1245                 }
1246             }
1247         }
1248
1249         // see the comment above for the reason of "len - 1"
1250         return len - 1;
1251     }
1252
1253     bool IsOk() const { return m_CodePage != -1; }
1254
1255 private:
1256     static bool CanUseNoBestFit()
1257     {
1258         static int s_isWin98Or2k = -1;
1259
1260         if ( s_isWin98Or2k == -1 )
1261         {
1262             int verMaj, verMin;
1263             switch ( wxGetOsVersion(&verMaj, &verMin) )
1264             {
1265                 case wxWIN95:
1266                     s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1267                     break;
1268
1269                 case wxWINDOWS_NT:
1270                     s_isWin98Or2k = verMaj >= 5;
1271                     break;
1272
1273                 default:
1274                     // unknown, be conseravtive by default
1275                     s_isWin98Or2k = 0;
1276             }
1277
1278             wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1279         }
1280
1281         return s_isWin98Or2k == 1;
1282     }
1283
1284     long m_CodePage;
1285 };
1286
1287 #endif // wxHAVE_WIN32_MB2WC
1288
1289 // ============================================================================
1290 // Mac conversion classes
1291 // ============================================================================
1292
1293 #if defined(__WXMAC__) && defined(TARGET_CARBON)
1294
1295 class wxMBConv_mac : public wxMBConv
1296 {
1297 public:
1298     wxMBConv_mac()
1299     {
1300         Init(CFStringGetSystemEncoding()) ;
1301     }
1302
1303     wxMBConv_mac(const wxChar* name)
1304     {
1305         Init( wxMacGetSystemEncFromFontEnc(wxFontMapper::Get()->CharsetToEncoding(name, FALSE) ) ) ;
1306     }
1307
1308     wxMBConv_mac(wxFontEncoding encoding)
1309     {
1310         Init( wxMacGetSystemEncFromFontEnc(encoding) );
1311     }
1312
1313         ~wxMBConv_mac()
1314         {
1315             OSStatus status = noErr ;
1316             status = TECDisposeConverter(m_MB2WC_converter);
1317             status = TECDisposeConverter(m_WC2MB_converter);
1318         }
1319
1320
1321         void Init( TextEncodingBase encoding)
1322         {
1323             OSStatus status = noErr ;
1324                 m_char_encoding = encoding ;
1325                 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
1326
1327             status = TECCreateConverter(&m_MB2WC_converter,
1328                                         m_char_encoding,
1329                                         m_unicode_encoding);
1330             status = TECCreateConverter(&m_WC2MB_converter,
1331                                         m_unicode_encoding,
1332                                         m_char_encoding);
1333         }
1334
1335     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1336     {
1337             OSStatus status = noErr ;
1338             ByteCount byteOutLen ;
1339             ByteCount byteInLen = strlen(psz) ;
1340                 wchar_t *tbuf = NULL ;
1341                 UniChar* ubuf = NULL ;
1342                 size_t res = 0 ;
1343
1344                 if (buf == NULL)
1345                 {
1346                         n = byteInLen ;
1347                         tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
1348                 }
1349             ByteCount byteBufferLen = n * sizeof( UniChar ) ;
1350 #if SIZEOF_WCHAR_T == 4
1351                 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
1352 #else
1353                 ubuf = (UniChar*) (buf ? buf : tbuf) ;
1354 #endif
1355             status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
1356               (TextPtr) ubuf , byteBufferLen, &byteOutLen);
1357 #if SIZEOF_WCHAR_T == 4
1358         // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
1359         // is not properly terminated we get random characters at the end
1360         ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
1361                 wxMBConvUTF16BE converter ;
1362                 res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
1363                 free( ubuf ) ;
1364 #else
1365                 res = byteOutLen / sizeof( UniChar ) ;
1366 #endif
1367                 if ( buf == NULL )
1368                         free(tbuf) ;
1369
1370         if ( buf  && res < n)
1371             buf[res] = 0;
1372
1373                 return res ;
1374     }
1375
1376     size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
1377     {
1378             OSStatus status = noErr ;
1379             ByteCount byteOutLen ;
1380             ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
1381
1382                 char *tbuf = NULL ;
1383
1384                 if (buf == NULL)
1385                 {
1386                         // worst case
1387                         n = byteInLen * 2 ;
1388                         tbuf = (char*) malloc( n ) ;
1389                 }
1390
1391             ByteCount byteBufferLen = n ;
1392                 UniChar* ubuf = NULL ;
1393 #if SIZEOF_WCHAR_T == 4
1394                 wxMBConvUTF16BE converter ;
1395                 size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
1396                 byteInLen = unicharlen ;
1397                 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
1398                 converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
1399 #else
1400                 ubuf = (UniChar*) psz ;
1401 #endif
1402             status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
1403                (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
1404 #if SIZEOF_WCHAR_T == 4
1405                 free( ubuf ) ;
1406 #endif
1407                 if ( buf == NULL )
1408                         free(tbuf) ;
1409
1410                 size_t res = byteOutLen ;
1411         if ( buf  && res < n)
1412             buf[res] = 0;
1413
1414                 return res ;
1415     }
1416
1417     bool IsOk() const
1418         { return m_MB2WC_converter !=  NULL && m_WC2MB_converter != NULL  ; }
1419
1420 private:
1421         TECObjectRef m_MB2WC_converter ;
1422         TECObjectRef m_WC2MB_converter ;
1423
1424         TextEncodingBase m_char_encoding ;
1425         TextEncodingBase m_unicode_encoding ;
1426 };
1427
1428 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
1429
1430 // ============================================================================
1431 // wxEncodingConverter based conversion classes
1432 // ============================================================================
1433
1434 #if wxUSE_FONTMAP
1435
1436 class wxMBConv_wxwin : public wxMBConv
1437 {
1438 private:
1439     void Init()
1440     {
1441         m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
1442                w2m.Init(wxFONTENCODING_UNICODE, m_enc);
1443     }
1444
1445 public:
1446     // temporarily just use wxEncodingConverter stuff,
1447     // so that it works while a better implementation is built
1448     wxMBConv_wxwin(const wxChar* name)
1449     {
1450         if (name)
1451             m_enc = wxFontMapper::Get()->CharsetToEncoding(name, false);
1452         else
1453             m_enc = wxFONTENCODING_SYSTEM;
1454
1455         Init();
1456     }
1457
1458     wxMBConv_wxwin(wxFontEncoding enc)
1459     {
1460         m_enc = enc;
1461
1462         Init();
1463     }
1464
1465     size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
1466     {
1467         size_t inbuf = strlen(psz);
1468         if (buf)
1469             m2w.Convert(psz,buf);
1470         return inbuf;
1471     }
1472
1473     size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
1474     {
1475         const size_t inbuf = wxWcslen(psz);
1476         if (buf)
1477             w2m.Convert(psz,buf);
1478
1479         return inbuf;
1480     }
1481
1482     bool IsOk() const { return m_ok; }
1483
1484 public:
1485     wxFontEncoding m_enc;
1486     wxEncodingConverter m2w, w2m;
1487
1488     // were we initialized successfully?
1489     bool m_ok;
1490
1491     DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
1492 };
1493
1494 #endif // wxUSE_FONTMAP
1495
1496 // ============================================================================
1497 // wxCSConv implementation
1498 // ============================================================================
1499
1500 void wxCSConv::Init()
1501 {
1502     m_name = NULL;
1503     m_convReal =  NULL;
1504     m_deferred = true;
1505 }
1506
1507 wxCSConv::wxCSConv(const wxChar *charset)
1508 {
1509     Init();
1510
1511     if ( charset )
1512     {
1513         SetName(charset);
1514     }
1515
1516     m_encoding = wxFONTENCODING_SYSTEM;
1517 }
1518
1519 wxCSConv::wxCSConv(wxFontEncoding encoding)
1520 {
1521     if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
1522     {
1523         wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
1524
1525         encoding = wxFONTENCODING_SYSTEM;
1526     }
1527
1528     Init();
1529
1530     m_encoding = encoding;
1531 }
1532
1533 wxCSConv::~wxCSConv()
1534 {
1535     Clear();
1536 }
1537
1538 wxCSConv::wxCSConv(const wxCSConv& conv)
1539         : wxMBConv()
1540 {
1541     Init();
1542
1543     SetName(conv.m_name);
1544     m_encoding = conv.m_encoding;
1545 }
1546
1547 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
1548 {
1549     Clear();
1550
1551     SetName(conv.m_name);
1552     m_encoding = conv.m_encoding;
1553
1554     return *this;
1555 }
1556
1557 void wxCSConv::Clear()
1558 {
1559     free(m_name);
1560     delete m_convReal;
1561
1562     m_name = NULL;
1563     m_convReal = NULL;
1564 }
1565
1566 void wxCSConv::SetName(const wxChar *charset)
1567 {
1568     if (charset)
1569     {
1570         m_name = wxStrdup(charset);
1571         m_deferred = true;
1572     }
1573 }
1574
1575 wxMBConv *wxCSConv::DoCreate() const
1576 {
1577     // check for the special case of ASCII or ISO8859-1 charset: as we have
1578     // special knowledge of it anyhow, we don't need to create a special
1579     // conversion object
1580     if ( m_encoding == wxFONTENCODING_ISO8859_1 )
1581     {
1582         // don't convert at all
1583         return NULL;
1584     }
1585
1586     // we trust OS to do conversion better than we can so try external
1587     // conversion methods first
1588     //
1589     // the full order is:
1590     //      1. OS conversion (iconv() under Unix or Win32 API)
1591     //      2. hard coded conversions for UTF
1592     //      3. wxEncodingConverter as fall back
1593
1594     // step (1)
1595 #ifdef HAVE_ICONV
1596 #if !wxUSE_FONTMAP
1597     if ( m_name )
1598 #endif // !wxUSE_FONTMAP
1599     {
1600         wxString name(m_name);
1601
1602 #if wxUSE_FONTMAP
1603         if ( name.empty() )
1604             name = wxFontMapper::Get()->GetEncodingName(m_encoding);
1605 #endif // wxUSE_FONTMAP
1606
1607         wxMBConv_iconv *conv = new wxMBConv_iconv(name);
1608         if ( conv->IsOk() )
1609             return conv;
1610
1611         delete conv;
1612     }
1613 #endif // HAVE_ICONV
1614
1615 #ifdef wxHAVE_WIN32_MB2WC
1616     {
1617 #if wxUSE_FONTMAP
1618         wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
1619                                       : new wxMBConv_win32(m_encoding);
1620         if ( conv->IsOk() )
1621             return conv;
1622
1623         delete conv;
1624 #else
1625         return NULL;
1626 #endif
1627     }
1628 #endif // wxHAVE_WIN32_MB2WC
1629 #if defined(__WXMAC__)
1630     {
1631         if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ) )
1632         {
1633
1634                 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
1635                                             : new wxMBConv_mac(m_encoding);
1636                 if ( conv->IsOk() )
1637                     return conv;
1638
1639                 delete conv;
1640         }
1641     }
1642 #endif
1643     // step (2)
1644     wxFontEncoding enc = m_encoding;
1645 #if wxUSE_FONTMAP
1646     if ( enc == wxFONTENCODING_SYSTEM && m_name )
1647     {
1648         // use "false" to suppress interactive dialogs -- we can be called from
1649         // anywhere and popping up a dialog from here is the last thing we want to
1650         // do
1651         enc = wxFontMapper::Get()->CharsetToEncoding(m_name, false);
1652     }
1653 #endif // wxUSE_FONTMAP
1654
1655     switch ( enc )
1656     {
1657         case wxFONTENCODING_UTF7:
1658              return new wxMBConvUTF7;
1659
1660         case wxFONTENCODING_UTF8:
1661              return new wxMBConvUTF8;
1662
1663         case wxFONTENCODING_UTF16BE:
1664              return new wxMBConvUTF16BE;
1665
1666         case wxFONTENCODING_UTF16LE:
1667              return new wxMBConvUTF16LE;
1668
1669         case wxFONTENCODING_UTF32BE:
1670              return new wxMBConvUTF32BE;
1671
1672         case wxFONTENCODING_UTF32LE:
1673              return new wxMBConvUTF32LE;
1674
1675         default:
1676              // nothing to do but put here to suppress gcc warnings
1677              ;
1678     }
1679
1680     // step (3)
1681 #if wxUSE_FONTMAP
1682     {
1683         wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
1684                                       : new wxMBConv_wxwin(m_encoding);
1685         if ( conv->IsOk() )
1686             return conv;
1687
1688         delete conv;
1689     }
1690 #endif // wxUSE_FONTMAP
1691
1692     // NB: This is a hack to prevent deadlock. What could otherwise happen
1693     //     in Unicode build: wxConvLocal creation ends up being here
1694     //     because of some failure and logs the error. But wxLog will try to
1695     //     attach timestamp, for which it will need wxConvLocal (to convert
1696     //     time to char* and then wchar_t*), but that fails, tries to log
1697     //     error, but wxLog has a (already locked) critical section that
1698     //     guards static buffer.
1699     static bool alreadyLoggingError = false;
1700     if (!alreadyLoggingError)
1701     {
1702         alreadyLoggingError = true;
1703         wxLogError(_("Cannot convert from the charset '%s'!"),
1704                    m_name ? m_name
1705                       :
1706 #if wxUSE_FONTMAP
1707                          wxFontMapper::GetEncodingDescription(m_encoding).c_str()
1708 #else // !wxUSE_FONTMAP
1709                          wxString::Format(_("encoding %s"), m_encoding).c_str()
1710 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1711               );
1712         alreadyLoggingError = false;
1713     }
1714
1715     return NULL;
1716 }
1717
1718 void wxCSConv::CreateConvIfNeeded() const
1719 {
1720     if ( m_deferred )
1721     {
1722         wxCSConv *self = (wxCSConv *)this; // const_cast
1723
1724 #if wxUSE_INTL
1725         // if we don't have neither the name nor the encoding, use the default
1726         // encoding for this system
1727         if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
1728         {
1729             self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
1730         }
1731 #endif // wxUSE_INTL
1732
1733         self->m_convReal = DoCreate();
1734         self->m_deferred = false;
1735     }
1736 }
1737
1738 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1739 {
1740     CreateConvIfNeeded();
1741
1742     if (m_convReal)
1743         return m_convReal->MB2WC(buf, psz, n);
1744
1745     // latin-1 (direct)
1746     size_t len = strlen(psz);
1747
1748     if (buf)
1749     {
1750         for (size_t c = 0; c <= len; c++)
1751             buf[c] = (unsigned char)(psz[c]);
1752     }
1753
1754     return len;
1755 }
1756
1757 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1758 {
1759     CreateConvIfNeeded();
1760
1761     if (m_convReal)
1762         return m_convReal->WC2MB(buf, psz, n);
1763
1764     // latin-1 (direct)
1765     const size_t len = wxWcslen(psz);
1766     if (buf)
1767     {
1768         for (size_t c = 0; c <= len; c++)
1769         {
1770             if (psz[c] > 0xFF)
1771                 return (size_t)-1;
1772             buf[c] = psz[c];
1773         }
1774     }
1775     else
1776     {
1777         for (size_t c = 0; c <= len; c++)
1778         {
1779             if (psz[c] > 0xFF)
1780                 return (size_t)-1;
1781         }
1782     }
1783
1784     return len;
1785 }
1786
1787 // ----------------------------------------------------------------------------
1788 // globals
1789 // ----------------------------------------------------------------------------
1790
1791 #ifdef __WINDOWS__
1792     static wxMBConv_win32 wxConvLibcObj;
1793 #elif defined(__WXMAC__) && !defined(__MACH__)
1794     static wxMBConv_mac wxConvLibcObj ;
1795 #else
1796     static wxMBConvLibc wxConvLibcObj;
1797 #endif
1798
1799 static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
1800 static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
1801 static wxMBConvUTF7 wxConvUTF7Obj;
1802 static wxMBConvUTF8 wxConvUTF8Obj;
1803
1804
1805 WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
1806 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
1807 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
1808 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
1809 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
1810 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
1811
1812 #else // !wxUSE_WCHAR_T
1813
1814 // stand-ins in absence of wchar_t
1815 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
1816                                 wxConvISO8859_1,
1817                                 wxConvLocal,
1818                                 wxConvUTF8;
1819
1820 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
1821
1822