src/common/strconv.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        strconv.cpp
   3 // Purpose:     Unicode conversion classes
   4 // Author:      Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik
   5 // Modified by:
   6 // Created:     29/01/98
   7 // RCS-ID:      $Id$
   8 // Copyright:   (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
   9 //              (c) 2000-2003 Vadim Zeitlin
  10 // Licence:     wxWindows licence
  11 /////////////////////////////////////////////////////////////////////////////
  12
  13 // ============================================================================
  14 // declarations
  15 // ============================================================================
  16
  17 // ----------------------------------------------------------------------------
  18 // headers
  19 // ----------------------------------------------------------------------------
  20
  21 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
  22   #pragma implementation "strconv.h"
  23 #endif
  24
  25 // For compilers that support precompilation, includes "wx.h".
  26 #include "wx/wxprec.h"
  27
  28 #ifdef __BORLANDC__
  29   #pragma hdrstop
  30 #endif
  31
  32 #ifndef WX_PRECOMP
  33     #include "wx/intl.h"
  34     #include "wx/log.h"
  35 #endif // WX_PRECOMP
  36
  37 #include "wx/strconv.h"
  38
  39 #if wxUSE_WCHAR_T
  40
  41 #ifdef __WXMSW__
  42     #include "wx/msw/private.h"
  43     #include "wx/msw/missing.h"
  44 #endif
  45
  46 #ifndef __WXWINCE__
  47 #include <errno.h>
  48 #endif
  49
  50 #include <ctype.h>
  51 #include <string.h>
  52 #include <stdlib.h>
  53
  54 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
  55     #define wxHAVE_WIN32_MB2WC
  56 #endif // __WIN32__ but !__WXMICROWIN__
  57
  58 // ----------------------------------------------------------------------------
  59 // headers
  60 // ----------------------------------------------------------------------------
  61
  62 #ifdef __SALFORDC__
  63     #include <clib.h>
  64 #endif
  65
  66 #ifdef HAVE_ICONV
  67     #include <iconv.h>
  68 #endif
  69
  70 #include "wx/encconv.h"
  71 #include "wx/fontmap.h"
  72
  73 #ifdef __WXMAC__
  74 #include <ATSUnicode.h>
  75 #include <TextCommon.h>
  76 #include <TextEncodingConverter.h>
  77
  78 #include  "wx/mac/private.h"  // includes mac headers
  79 #endif
  80 // ----------------------------------------------------------------------------
  81 // macros
  82 // ----------------------------------------------------------------------------
  83
  84 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
  85 #define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
  86
  87 #if SIZEOF_WCHAR_T == 4
  88     #define WC_NAME         "UCS4"
  89     #define WC_BSWAP         BSWAP_UCS4
  90     #ifdef WORDS_BIGENDIAN
  91       #define WC_NAME_BEST  "UCS-4BE"
  92     #else
  93       #define WC_NAME_BEST  "UCS-4LE"
  94     #endif
  95 #elif SIZEOF_WCHAR_T == 2
  96     #define WC_NAME         "UTF16"
  97     #define WC_BSWAP         BSWAP_UTF16
  98     #define WC_UTF16
  99     #ifdef WORDS_BIGENDIAN
 100       #define WC_NAME_BEST  "UTF-16BE"
 101     #else
 102       #define WC_NAME_BEST  "UTF-16LE"
 103     #endif
 104 #else // sizeof(wchar_t) != 2 nor 4
 105     // does this ever happen?
 106     #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
 107 #endif
 108
 109 // ============================================================================
 110 // implementation
 111 // ============================================================================
 112
 113 // ----------------------------------------------------------------------------
 114 // UTF-16 en/decoding to/from UCS-4
 115 // ----------------------------------------------------------------------------
 116
 117
 118 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
 119 {
 120     if (input<=0xffff)
 121     {
 122         if (output)
 123             *output = (wxUint16) input;
 124         return 1;
 125     }
 126     else if (input>=0x110000)
 127     {
 128         return (size_t)-1;
 129     }
 130     else
 131     {
 132         if (output)
 133         {
 134             *output++ = (wxUint16) ((input >> 10)+0xd7c0);
 135             *output = (wxUint16) ((input&0x3ff)+0xdc00);
 136         }
 137         return 2;
 138     }
 139 }
 140
 141 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
 142 {
 143     if ((*input<0xd800) || (*input>0xdfff))
 144     {
 145         output = *input;
 146         return 1;
 147     }
 148     else if ((input[1]<0xdc00) || (input[1]>=0xdfff))
 149     {
 150         output = *input;
 151         return (size_t)-1;
 152     }
 153     else
 154     {
 155         output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
 156         return 2;
 157     }
 158 }
 159
 160
 161 // ----------------------------------------------------------------------------
 162 // wxMBConv
 163 // ----------------------------------------------------------------------------
 164
 165 wxMBConv::~wxMBConv()
 166 {
 167     // nothing to do here
 168 }
 169
 170 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
 171 {
 172     if ( psz )
 173     {
 174         // calculate the length of the buffer needed first
 175         size_t nLen = MB2WC(NULL, psz, 0);
 176         if ( nLen != (size_t)-1 )
 177         {
 178             // now do the actual conversion
 179             wxWCharBuffer buf(nLen);
 180             nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
 181             if ( nLen != (size_t)-1 )
 182             {
 183                 return buf;
 184             }
 185         }
 186     }
 187
 188     wxWCharBuffer buf((wchar_t *)NULL);
 189
 190     return buf;
 191 }
 192
 193 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
 194 {
 195     if ( pwz )
 196     {
 197         size_t nLen = WC2MB(NULL, pwz, 0);
 198         if ( nLen != (size_t)-1 )
 199         {
 200             wxCharBuffer buf(nLen+3);       // space for a wxUint32 trailing zero
 201             nLen = WC2MB(buf.data(), pwz, nLen + 4);
 202             if ( nLen != (size_t)-1 )
 203             {
 204                 return buf;
 205             }
 206         }
 207     }
 208
 209     wxCharBuffer buf((char *)NULL);
 210
 211     return buf;
 212 }
 213
 214 // ----------------------------------------------------------------------------
 215 // wxMBConvLibc
 216 // ----------------------------------------------------------------------------
 217
 218 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 219 {
 220     return wxMB2WC(buf, psz, n);
 221 }
 222
 223 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 224 {
 225     return wxWC2MB(buf, psz, n);
 226 }
 227
 228 // ----------------------------------------------------------------------------
 229 // UTF-7
 230 // ----------------------------------------------------------------------------
 231
 232 #if 0
 233 static char utf7_setD[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
 234                         "abcdefghijklmnopqrstuvwxyz"
 235                         "0123456789'(),-./:?";
 236 static char utf7_setO[]="!\"#$%&*;<=>@[]^_`{|}";
 237 static char utf7_setB[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
 238                         "abcdefghijklmnopqrstuvwxyz"
 239                         "0123456789+/";
 240 #endif
 241
 242 // TODO: write actual implementations of UTF-7 here
 243 size_t wxMBConvUTF7::MB2WC(wchar_t * WXUNUSED(buf),
 244                            const char * WXUNUSED(psz),
 245                            size_t WXUNUSED(n)) const
 246 {
 247   return 0;
 248 }
 249
 250 size_t wxMBConvUTF7::WC2MB(char * WXUNUSED(buf),
 251                            const wchar_t * WXUNUSED(psz),
 252                            size_t WXUNUSED(n)) const
 253 {
 254   return 0;
 255 }
 256
 257 // ----------------------------------------------------------------------------
 258 // UTF-8
 259 // ----------------------------------------------------------------------------
 260
 261 static wxUint32 utf8_max[]=
 262     { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
 263
 264 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 265 {
 266     size_t len = 0;
 267
 268     while (*psz && ((!buf) || (len < n)))
 269     {
 270         unsigned char cc = *psz++, fc = cc;
 271         unsigned cnt;
 272         for (cnt = 0; fc & 0x80; cnt++)
 273             fc <<= 1;
 274         if (!cnt)
 275         {
 276             // plain ASCII char
 277             if (buf)
 278                 *buf++ = cc;
 279             len++;
 280         }
 281         else
 282         {
 283             cnt--;
 284             if (!cnt)
 285             {
 286                 // invalid UTF-8 sequence
 287                 return (size_t)-1;
 288             }
 289             else
 290             {
 291                 unsigned ocnt = cnt - 1;
 292                 wxUint32 res = cc & (0x3f >> cnt);
 293                 while (cnt--)
 294                 {
 295                     cc = *psz++;
 296                     if ((cc & 0xC0) != 0x80)
 297                     {
 298                         // invalid UTF-8 sequence
 299                         return (size_t)-1;
 300                     }
 301                     res = (res << 6) | (cc & 0x3f);
 302                 }
 303                 if (res <= utf8_max[ocnt])
 304                 {
 305                     // illegal UTF-8 encoding
 306                     return (size_t)-1;
 307                 }
 308 #ifdef WC_UTF16
 309                 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 310                 size_t pa = encode_utf16(res, (wxUint16 *)buf);
 311                 if (pa == (size_t)-1)
 312                   return (size_t)-1;
 313                 if (buf)
 314                     buf += pa;
 315                 len += pa;
 316 #else // !WC_UTF16
 317                 if (buf)
 318                     *buf++ = res;
 319                 len++;
 320 #endif // WC_UTF16/!WC_UTF16
 321             }
 322         }
 323     }
 324     if (buf && (len < n))
 325         *buf = 0;
 326     return len;
 327 }
 328
 329 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 330 {
 331     size_t len = 0;
 332
 333     while (*psz && ((!buf) || (len < n)))
 334     {
 335         wxUint32 cc;
 336 #ifdef WC_UTF16
 337         // cast is ok for WC_UTF16
 338         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
 339         psz += (pa == (size_t)-1) ? 1 : pa;
 340 #else
 341         cc=(*psz++) & 0x7fffffff;
 342 #endif
 343         unsigned cnt;
 344         for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
 345         if (!cnt)
 346         {
 347             // plain ASCII char
 348             if (buf)
 349                 *buf++ = (char) cc;
 350             len++;
 351         }
 352
 353         else
 354         {
 355             len += cnt + 1;
 356             if (buf)
 357             {
 358                 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
 359                 while (cnt--)
 360                     *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
 361             }
 362         }
 363     }
 364
 365     if (buf && (len<n)) *buf = 0;
 366
 367     return len;
 368 }
 369
 370
 371
 372
 373 // ----------------------------------------------------------------------------
 374 // UTF-16
 375 // ----------------------------------------------------------------------------
 376
 377 #ifdef WORDS_BIGENDIAN
 378     #define wxMBConvUTF16straight wxMBConvUTF16BE
 379     #define wxMBConvUTF16swap     wxMBConvUTF16LE
 380 #else
 381     #define wxMBConvUTF16swap     wxMBConvUTF16BE
 382     #define wxMBConvUTF16straight wxMBConvUTF16LE
 383 #endif
 384
 385
 386 #ifdef WC_UTF16
 387
 388 // copy 16bit MB to 16bit String
 389 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 390 {
 391     size_t len=0;
 392
 393     while (*(wxUint16*)psz && (!buf || len < n))
 394     {
 395         if (buf)
 396             *buf++ = *(wxUint16*)psz;
 397         len++;
 398
 399         psz += sizeof(wxUint16);
 400     }
 401     if (buf && len<n)   *buf=0;
 402
 403     return len;
 404 }
 405
 406
 407 // copy 16bit String to 16bit MB
 408 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 409 {
 410     size_t len=0;
 411
 412     while (*psz && (!buf || len < n))
 413     {
 414         if (buf)
 415         {
 416             *(wxUint16*)buf = *psz;
 417             buf += sizeof(wxUint16);
 418         }
 419         len += sizeof(wxUint16);
 420         psz++;
 421     }
 422     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 423
 424     return len;
 425 }
 426
 427
 428 // swap 16bit MB to 16bit String
 429 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 430 {
 431     size_t len=0;
 432
 433     while (*(wxUint16*)psz && (!buf || len < n))
 434     {
 435         if (buf)
 436         {
 437             ((char *)buf)[0] = psz[1];
 438             ((char *)buf)[1] = psz[0];
 439             buf++;
 440         }
 441         len++;
 442         psz += sizeof(wxUint16);
 443     }
 444     if (buf && len<n)   *buf=0;
 445
 446     return len;
 447 }
 448
 449
 450 // swap 16bit MB to 16bit String
 451 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 452 {
 453     size_t len=0;
 454
 455     while (*psz && (!buf || len < n))
 456     {
 457         if (buf)
 458         {
 459             *buf++ = ((char*)psz)[1];
 460             *buf++ = ((char*)psz)[0];
 461         }
 462         len += sizeof(wxUint16);
 463         psz++;
 464     }
 465     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 466
 467     return len;
 468 }
 469
 470
 471 #else // WC_UTF16
 472
 473
 474 // copy 16bit MB to 32bit String
 475 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 476 {
 477     size_t len=0;
 478
 479     while (*(wxUint16*)psz && (!buf || len < n))
 480     {
 481         wxUint32 cc;
 482         size_t pa=decode_utf16((wxUint16*)psz, cc);
 483         if (pa == (size_t)-1)
 484             return pa;
 485
 486         if (buf)
 487             *buf++ = cc;
 488         len++;
 489         psz += pa * sizeof(wxUint16);
 490     }
 491     if (buf && len<n)   *buf=0;
 492
 493     return len;
 494 }
 495
 496
 497 // copy 32bit String to 16bit MB
 498 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 499 {
 500     size_t len=0;
 501
 502     while (*psz && (!buf || len < n))
 503     {
 504         wxUint16 cc[2];
 505         size_t pa=encode_utf16(*psz, cc);
 506
 507         if (pa == (size_t)-1)
 508             return pa;
 509
 510         if (buf)
 511         {
 512             *(wxUint16*)buf = cc[0];
 513             buf += sizeof(wxUint16);
 514             if (pa > 1)
 515             {
 516                 *(wxUint16*)buf = cc[1];
 517                 buf += sizeof(wxUint16);
 518             }
 519         }
 520
 521         len += pa*sizeof(wxUint16);
 522         psz++;
 523     }
 524     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 525
 526     return len;
 527 }
 528
 529
 530 // swap 16bit MB to 32bit String
 531 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 532 {
 533     size_t len=0;
 534
 535     while (*(wxUint16*)psz && (!buf || len < n))
 536     {
 537         wxUint32 cc;
 538         char tmp[4];
 539         tmp[0]=psz[1];  tmp[1]=psz[0];
 540         tmp[2]=psz[3];  tmp[3]=psz[2];
 541
 542         size_t pa=decode_utf16((wxUint16*)tmp, cc);
 543         if (pa == (size_t)-1)
 544             return pa;
 545
 546         if (buf)
 547             *buf++ = cc;
 548
 549         len++;
 550         psz += pa * sizeof(wxUint16);
 551     }
 552     if (buf && len<n)   *buf=0;
 553
 554     return len;
 555 }
 556
 557
 558 // swap 32bit String to 16bit MB
 559 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 560 {
 561     size_t len=0;
 562
 563     while (*psz && (!buf || len < n))
 564     {
 565         wxUint16 cc[2];
 566         size_t pa=encode_utf16(*psz, cc);
 567
 568         if (pa == (size_t)-1)
 569             return pa;
 570
 571         if (buf)
 572         {
 573             *buf++ = ((char*)cc)[1];
 574             *buf++ = ((char*)cc)[0];
 575             if (pa > 1)
 576             {
 577                 *buf++ = ((char*)cc)[3];
 578                 *buf++ = ((char*)cc)[2];
 579             }
 580         }
 581
 582         len += pa*sizeof(wxUint16);
 583         psz++;
 584     }
 585     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 586
 587     return len;
 588 }
 589
 590 #endif // WC_UTF16
 591
 592
 593 // ----------------------------------------------------------------------------
 594 // UTF-32
 595 // ----------------------------------------------------------------------------
 596
 597 #ifdef WORDS_BIGENDIAN
 598 #define wxMBConvUTF32straight  wxMBConvUTF32BE
 599 #define wxMBConvUTF32swap      wxMBConvUTF32LE
 600 #else
 601 #define wxMBConvUTF32swap      wxMBConvUTF32BE
 602 #define wxMBConvUTF32straight  wxMBConvUTF32LE
 603 #endif
 604
 605
 606 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
 607 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
 608
 609
 610 #ifdef WC_UTF16
 611
 612 // copy 32bit MB to 16bit String
 613 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 614 {
 615     size_t len=0;
 616
 617     while (*(wxUint32*)psz && (!buf || len < n))
 618     {
 619         wxUint16 cc[2];
 620
 621         size_t pa=encode_utf16(*(wxUint32*)psz, cc);
 622         if (pa == (size_t)-1)
 623             return pa;
 624
 625         if (buf)
 626         {
 627             *buf++ = cc[0];
 628             if (pa > 1)
 629                 *buf++ = cc[1];
 630         }
 631         len += pa;
 632         psz += sizeof(wxUint32);
 633     }
 634     if (buf && len<n)   *buf=0;
 635
 636     return len;
 637 }
 638
 639
 640 // copy 16bit String to 32bit MB
 641 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 642 {
 643     size_t len=0;
 644
 645     while (*psz && (!buf || len < n))
 646     {
 647         wxUint32 cc;
 648
 649         // cast is ok for WC_UTF16
 650         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
 651         if (pa == (size_t)-1)
 652             return pa;
 653
 654         if (buf)
 655         {
 656             *(wxUint32*)buf = cc;
 657             buf += sizeof(wxUint32);
 658         }
 659         len += sizeof(wxUint32);
 660         psz += pa;
 661     }
 662
 663     if (buf && len<=n-sizeof(wxUint32))
 664         *(wxUint32*)buf=0;
 665
 666     return len;
 667 }
 668
 669
 670
 671 // swap 32bit MB to 16bit String
 672 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 673 {
 674     size_t len=0;
 675
 676     while (*(wxUint32*)psz && (!buf || len < n))
 677     {
 678         char tmp[4];
 679         tmp[0] = psz[3];   tmp[1] = psz[2];
 680         tmp[2] = psz[1];   tmp[3] = psz[0];
 681
 682
 683         wxUint16 cc[2];
 684
 685         size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
 686         if (pa == (size_t)-1)
 687             return pa;
 688
 689         if (buf)
 690         {
 691             *buf++ = cc[0];
 692             if (pa > 1)
 693                 *buf++ = cc[1];
 694         }
 695         len += pa;
 696         psz += sizeof(wxUint32);
 697     }
 698
 699     if (buf && len<n)
 700         *buf=0;
 701
 702     return len;
 703 }
 704
 705
 706 // swap 16bit String to 32bit MB
 707 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 708 {
 709     size_t len=0;
 710
 711     while (*psz && (!buf || len < n))
 712     {
 713         char cc[4];
 714
 715         // cast is ok for WC_UTF16
 716         size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
 717         if (pa == (size_t)-1)
 718             return pa;
 719
 720         if (buf)
 721         {
 722             *buf++ = cc[3];
 723             *buf++ = cc[2];
 724             *buf++ = cc[1];
 725             *buf++ = cc[0];
 726         }
 727         len += sizeof(wxUint32);
 728         psz += pa;
 729     }
 730
 731     if (buf && len<=n-sizeof(wxUint32))
 732         *(wxUint32*)buf=0;
 733
 734     return len;
 735 }
 736
 737 #else // WC_UTF16
 738
 739
 740 // copy 32bit MB to 32bit String
 741 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 742 {
 743     size_t len=0;
 744
 745     while (*(wxUint32*)psz && (!buf || len < n))
 746     {
 747         if (buf)
 748             *buf++ = *(wxUint32*)psz;
 749         len++;
 750         psz += sizeof(wxUint32);
 751     }
 752
 753     if (buf && len<n)
 754         *buf=0;
 755
 756     return len;
 757 }
 758
 759
 760 // copy 32bit String to 32bit MB
 761 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 762 {
 763     size_t len=0;
 764
 765     while (*psz && (!buf || len < n))
 766     {
 767         if (buf)
 768         {
 769             *(wxUint32*)buf = *psz;
 770             buf += sizeof(wxUint32);
 771         }
 772
 773         len += sizeof(wxUint32);
 774         psz++;
 775     }
 776
 777     if (buf && len<=n-sizeof(wxUint32))
 778         *(wxUint32*)buf=0;
 779
 780     return len;
 781 }
 782
 783
 784 // swap 32bit MB to 32bit String
 785 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 786 {
 787     size_t len=0;
 788
 789     while (*(wxUint32*)psz && (!buf || len < n))
 790     {
 791         if (buf)
 792         {
 793             ((char *)buf)[0] = psz[3];
 794             ((char *)buf)[1] = psz[2];
 795             ((char *)buf)[2] = psz[1];
 796             ((char *)buf)[3] = psz[0];
 797             buf++;
 798         }
 799         len++;
 800         psz += sizeof(wxUint32);
 801     }
 802
 803     if (buf && len<n)
 804         *buf=0;
 805
 806     return len;
 807 }
 808
 809
 810 // swap 32bit String to 32bit MB
 811 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 812 {
 813     size_t len=0;
 814
 815     while (*psz && (!buf || len < n))
 816     {
 817         if (buf)
 818         {
 819             *buf++ = ((char *)psz)[3];
 820             *buf++ = ((char *)psz)[2];
 821             *buf++ = ((char *)psz)[1];
 822             *buf++ = ((char *)psz)[0];
 823         }
 824         len += sizeof(wxUint32);
 825         psz++;
 826     }
 827
 828     if (buf && len<=n-sizeof(wxUint32))
 829         *(wxUint32*)buf=0;
 830
 831     return len;
 832 }
 833
 834
 835 #endif // WC_UTF16
 836
 837
 838 // ============================================================================
 839 // The classes doing conversion using the iconv_xxx() functions
 840 // ============================================================================
 841
 842 #ifdef HAVE_ICONV
 843
 844 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with E2BIG
 845 //     if output buffer is _exactly_ as big as needed. Such case is (unless there's
 846 //     yet another bug in glibc) the only case when iconv() returns with (size_t)-1
 847 //     (which means error) and says there are 0 bytes left in the input buffer --
 848 //     when _real_ error occurs, bytes-left-in-input buffer is non-zero. Hence,
 849 //     this alternative test for iconv() failure.
 850 //     [This bug does not appear in glibc 2.2.]
 851 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
 852 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
 853                                      (errno != E2BIG || bufLeft != 0))
 854 #else
 855 #define ICONV_FAILED(cres, bufLeft)  (cres == (size_t)-1)
 856 #endif
 857
 858 #define ICONV_CHAR_CAST(x)  ((ICONV_CONST char **)(x))
 859
 860 // ----------------------------------------------------------------------------
 861 // wxMBConv_iconv: encapsulates an iconv character set
 862 // ----------------------------------------------------------------------------
 863
 864 class wxMBConv_iconv : public wxMBConv
 865 {
 866 public:
 867     wxMBConv_iconv(const wxChar *name);
 868     virtual ~wxMBConv_iconv();
 869
 870     virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
 871     virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
 872
 873     bool IsOk() const
 874         { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
 875
 876 protected:
 877     // the iconv handlers used to translate from multibyte to wide char and in
 878     // the other direction
 879     iconv_t m2w,
 880             w2m;
 881
 882 private:
 883     // the name (for iconv_open()) of a wide char charset -- if none is
 884     // available on this machine, it will remain NULL
 885     static const char *ms_wcCharsetName;
 886
 887     // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
 888     // different endian-ness than the native one
 889     static bool ms_wcNeedsSwap;
 890 };
 891
 892 const char *wxMBConv_iconv::ms_wcCharsetName = NULL;
 893 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
 894
 895 wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
 896 {
 897     // Do it the hard way
 898     char cname[100];
 899     for (size_t i = 0; i < wxStrlen(name)+1; i++)
 900         cname[i] = (char) name[i];
 901
 902     // check for charset that represents wchar_t:
 903     if (ms_wcCharsetName == NULL)
 904     {
 905         ms_wcNeedsSwap = false;
 906
 907         // try charset with explicit bytesex info (e.g. "UCS-4LE"):
 908         ms_wcCharsetName = WC_NAME_BEST;
 909         m2w = iconv_open(ms_wcCharsetName, cname);
 910
 911         if (m2w == (iconv_t)-1)
 912         {
 913             // try charset w/o bytesex info (e.g. "UCS4")
 914             // and check for bytesex ourselves:
 915             ms_wcCharsetName = WC_NAME;
 916             m2w = iconv_open(ms_wcCharsetName, cname);
 917
 918             // last bet, try if it knows WCHAR_T pseudo-charset
 919             if (m2w == (iconv_t)-1)
 920             {
 921                 ms_wcCharsetName = "WCHAR_T";
 922                 m2w = iconv_open(ms_wcCharsetName, cname);
 923             }
 924
 925             if (m2w != (iconv_t)-1)
 926             {
 927                 char    buf[2], *bufPtr;
 928                 wchar_t wbuf[2], *wbufPtr;
 929                 size_t  insz, outsz;
 930                 size_t  res;
 931
 932                 buf[0] = 'A';
 933                 buf[1] = 0;
 934                 wbuf[0] = 0;
 935                 insz = 2;
 936                 outsz = SIZEOF_WCHAR_T * 2;
 937                 wbufPtr = wbuf;
 938                 bufPtr = buf;
 939
 940                 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
 941                             (char**)&wbufPtr, &outsz);
 942
 943                 if (ICONV_FAILED(res, insz))
 944                 {
 945                     ms_wcCharsetName = NULL;
 946                     wxLogLastError(wxT("iconv"));
 947                     wxLogError(_("Conversion to charset '%s' doesn't work."), name);
 948                 }
 949                 else
 950                 {
 951                     ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
 952                 }
 953             }
 954             else
 955             {
 956                 ms_wcCharsetName = NULL;
 957
 958                 // VS: we must not output an error here, since wxWindows will safely
 959                 //     fall back to using wxEncodingConverter.
 960                 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
 961                 //wxLogError(
 962             }
 963         }
 964         wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
 965     }
 966     else // we already have ms_wcCharsetName
 967     {
 968         m2w = iconv_open(ms_wcCharsetName, cname);
 969     }
 970
 971     // NB: don't ever pass NULL to iconv_open(), it may crash!
 972     if ( ms_wcCharsetName )
 973     {
 974         w2m = iconv_open( cname, ms_wcCharsetName);
 975     }
 976     else
 977     {
 978         w2m = (iconv_t)-1;
 979     }
 980 }
 981
 982 wxMBConv_iconv::~wxMBConv_iconv()
 983 {
 984     if ( m2w != (iconv_t)-1 )
 985         iconv_close(m2w);
 986     if ( w2m != (iconv_t)-1 )
 987         iconv_close(w2m);
 988 }
 989
 990 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 991 {
 992     size_t inbuf = strlen(psz);
 993     size_t outbuf = n * SIZEOF_WCHAR_T;
 994     size_t res, cres;
 995     // VS: Use these instead of psz, buf because iconv() modifies its arguments:
 996     wchar_t *bufPtr = buf;
 997     const char *pszPtr = psz;
 998
 999     if (buf)
1000     {
1001         // have destination buffer, convert there
1002         cres = iconv(m2w,
1003                      ICONV_CHAR_CAST(&pszPtr), &inbuf,
1004                      (char**)&bufPtr, &outbuf);
1005         res = n - (outbuf / SIZEOF_WCHAR_T);
1006
1007         if (ms_wcNeedsSwap)
1008         {
1009             // convert to native endianness
1010             WC_BSWAP(buf /* _not_ bufPtr */, res)
1011         }
1012
1013         // NB: iconv was given only strlen(psz) characters on input, and so
1014         //     it couldn't convert the trailing zero. Let's do it ourselves
1015         //     if there's some room left for it in the output buffer.
1016         if (res < n)
1017             buf[res] = 0;
1018     }
1019     else
1020     {
1021         // no destination buffer... convert using temp buffer
1022         // to calculate destination buffer requirement
1023         wchar_t tbuf[8];
1024         res = 0;
1025         do {
1026             bufPtr = tbuf;
1027             outbuf = 8*SIZEOF_WCHAR_T;
1028
1029             cres = iconv(m2w,
1030                          ICONV_CHAR_CAST(&pszPtr), &inbuf,
1031                          (char**)&bufPtr, &outbuf );
1032
1033             res += 8-(outbuf/SIZEOF_WCHAR_T);
1034         } while ((cres==(size_t)-1) && (errno==E2BIG));
1035     }
1036
1037     if (ICONV_FAILED(cres, inbuf))
1038     {
1039         //VS: it is ok if iconv fails, hence trace only
1040         wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1041         return (size_t)-1;
1042     }
1043
1044     return res;
1045 }
1046
1047 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1048 {
1049     size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
1050     size_t outbuf = n;
1051     size_t res, cres;
1052
1053     wchar_t *tmpbuf = 0;
1054
1055     if (ms_wcNeedsSwap)
1056     {
1057         // need to copy to temp buffer to switch endianness
1058         // this absolutely doesn't rock!
1059         // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1060         //  could be in read-only memory, or be accessed in some other thread)
1061         tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
1062         memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
1063         WC_BSWAP(tmpbuf, inbuf)
1064         psz=tmpbuf;
1065     }
1066
1067     if (buf)
1068     {
1069         // have destination buffer, convert there
1070         cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1071
1072         res = n-outbuf;
1073
1074         // NB: iconv was given only wcslen(psz) characters on input, and so
1075         //     it couldn't convert the trailing zero. Let's do it ourselves
1076         //     if there's some room left for it in the output buffer.
1077         if (res < n)
1078             buf[0] = 0;
1079     }
1080     else
1081     {
1082         // no destination buffer... convert using temp buffer
1083         // to calculate destination buffer requirement
1084         char tbuf[16];
1085         res = 0;
1086         do {
1087             buf = tbuf; outbuf = 16;
1088
1089             cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1090
1091             res += 16 - outbuf;
1092         } while ((cres==(size_t)-1) && (errno==E2BIG));
1093     }
1094
1095     if (ms_wcNeedsSwap)
1096     {
1097         free(tmpbuf);
1098     }
1099
1100     if (ICONV_FAILED(cres, inbuf))
1101     {
1102         //VS: it is ok if iconv fails, hence trace only
1103         wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1104         return (size_t)-1;
1105     }
1106
1107     return res;
1108 }
1109
1110 #endif // HAVE_ICONV
1111
1112
1113 // ============================================================================
1114 // Win32 conversion classes
1115 // ============================================================================
1116
1117 #ifdef wxHAVE_WIN32_MB2WC
1118
1119 // from utils.cpp
1120 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1121 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1122
1123 class wxMBConv_win32 : public wxMBConv
1124 {
1125 public:
1126     wxMBConv_win32()
1127     {
1128         m_CodePage = CP_ACP;
1129     }
1130
1131     wxMBConv_win32(const wxChar* name)
1132     {
1133         m_CodePage = wxCharsetToCodepage(name);
1134     }
1135
1136     wxMBConv_win32(wxFontEncoding encoding)
1137     {
1138         m_CodePage = wxEncodingToCodepage(encoding);
1139     }
1140
1141     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1142     {
1143         const size_t len = ::MultiByteToWideChar
1144                              (
1145                                 m_CodePage,     // code page
1146                                 0,              // flags (none)
1147                                 psz,            // input string
1148                                 -1,             // its length (NUL-terminated)
1149                                 buf,            // output string
1150                                 buf ? n : 0     // size of output buffer
1151                              );
1152
1153         // note that it returns count of written chars for buf != NULL and size
1154         // of the needed buffer for buf == NULL so in either case the length of
1155         // the string (which never includes the terminating NUL) is one less
1156         return len ? len - 1 : (size_t)-1;
1157     }
1158
1159     size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
1160     {
1161         /*
1162             we have a problem here: by default, WideCharToMultiByte() may
1163             replace characters unrepresentable in the target code page with bad
1164             quality approximations such as turning "1/2" symbol (U+00BD) into
1165             "1" for the code pages which don't have it and we, obviously, want
1166             to avoid this at any price
1167
1168             the trouble is that this function does it _silently_, i.e. it won't
1169             even tell us whether it did or not... Win98/2000 and higher provide
1170             WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1171             we have to resort to a round trip, i.e. check that converting back
1172             results in the same string -- this is, of course, expensive but
1173             otherwise we simply can't be sure to not garble the data.
1174          */
1175
1176         // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1177         // it doesn't work with CJK encodings (which we test for rather roughly
1178         // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1179         // supporting it
1180         BOOL usedDef wxDUMMY_INITIALIZE(false),
1181              *pUsedDef;
1182         int flags;
1183         if ( CanUseNoBestFit() && m_CodePage < 50000 )
1184         {
1185             // it's our lucky day
1186             flags = WC_NO_BEST_FIT_CHARS;
1187             pUsedDef = &usedDef;
1188         }
1189         else // old system or unsupported encoding
1190         {
1191             flags = 0;
1192             pUsedDef = NULL;
1193         }
1194
1195         const size_t len = ::WideCharToMultiByte
1196                              (
1197                                 m_CodePage,     // code page
1198                                 flags,          // either none or no best fit
1199                                 pwz,            // input string
1200                                 -1,             // it is (wide) NUL-terminated
1201                                 buf,            // output buffer
1202                                 buf ? n : 0,    // and its size
1203                                 NULL,           // default "replacement" char
1204                                 pUsedDef        // [out] was it used?
1205                              );
1206
1207         if ( !len )
1208         {
1209             // function totally failed
1210             return (size_t)-1;
1211         }
1212
1213         // if we were really converting, check if we succeeded
1214         if ( buf )
1215         {
1216             if ( flags )
1217             {
1218                 // check if the conversion failed, i.e. if any replacements
1219                 // were done
1220                 if ( usedDef )
1221                     return (size_t)-1;
1222             }
1223             else // we must resort to double tripping...
1224             {
1225                 wxWCharBuffer wcBuf(n);
1226                 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1227                         wcscmp(wcBuf, pwz) != 0 )
1228                 {
1229                     // we didn't obtain the same thing we started from, hence
1230                     // the conversion was lossy and we consider that it failed
1231                     return (size_t)-1;
1232                 }
1233             }
1234         }
1235
1236         // see the comment above for the reason of "len - 1"
1237         return len - 1;
1238     }
1239
1240     bool IsOk() const { return m_CodePage != -1; }
1241
1242 private:
1243     static bool CanUseNoBestFit()
1244     {
1245         static int s_isWin98Or2k = -1;
1246
1247         if ( s_isWin98Or2k == -1 )
1248         {
1249             int verMaj, verMin;
1250             switch ( wxGetOsVersion(&verMaj, &verMin) )
1251             {
1252                 case wxWIN95:
1253                     s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1254                     break;
1255
1256                 case wxWINDOWS_NT:
1257                     s_isWin98Or2k = verMaj >= 5;
1258                     break;
1259
1260                 default:
1261                     // unknown, be conseravtive by default
1262                     s_isWin98Or2k = 0;
1263             }
1264
1265             wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1266         }
1267
1268         return s_isWin98Or2k == 1;
1269     }
1270
1271     long m_CodePage;
1272 };
1273
1274 #endif // wxHAVE_WIN32_MB2WC
1275
1276 // ============================================================================
1277 // Mac conversion classes
1278 // ============================================================================
1279
1280 #if defined(__WXMAC__) && defined(TARGET_CARBON)
1281
1282 class wxMBConv_mac : public wxMBConv
1283 {
1284 public:
1285     wxMBConv_mac()
1286     {
1287         Init(CFStringGetSystemEncoding()) ;
1288     }
1289
1290     wxMBConv_mac(const wxChar* name)
1291     {
1292         Init( wxMacGetSystemEncFromFontEnc(wxFontMapper::Get()->CharsetToEncoding(name, FALSE) ) ) ;
1293     }
1294
1295     wxMBConv_mac(wxFontEncoding encoding)
1296     {
1297         Init( wxMacGetSystemEncFromFontEnc(encoding) );
1298     }
1299
1300         ~wxMBConv_mac()
1301         {
1302             OSStatus status = noErr ;
1303             status = TECDisposeConverter(m_MB2WC_converter);
1304             status = TECDisposeConverter(m_WC2MB_converter);
1305         }
1306
1307
1308         void Init( TextEncodingBase encoding)
1309         {
1310             OSStatus status = noErr ;
1311                 m_char_encoding = encoding ;
1312                 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
1313
1314             status = TECCreateConverter(&m_MB2WC_converter,
1315                                         m_char_encoding,
1316                                         m_unicode_encoding);
1317             status = TECCreateConverter(&m_WC2MB_converter,
1318                                         m_unicode_encoding,
1319                                         m_char_encoding);
1320         }
1321
1322     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1323     {
1324             OSStatus status = noErr ;
1325             ByteCount byteOutLen ;
1326             ByteCount byteInLen = strlen(psz) ;
1327                 wchar_t *tbuf = NULL ;
1328                 UniChar* ubuf = NULL ;
1329                 size_t res = 0 ;
1330
1331                 if (buf == NULL)
1332                 {
1333                         n = byteInLen ;
1334                         tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
1335                 }
1336             ByteCount byteBufferLen = n * sizeof( UniChar ) ;
1337 #if SIZEOF_WCHAR_T == 4
1338                 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
1339 #else
1340                 ubuf = (UniChar*) (buf ? buf : tbuf) ;
1341 #endif
1342             status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
1343               (TextPtr) ubuf , byteBufferLen, &byteOutLen);
1344 #if SIZEOF_WCHAR_T == 4
1345         // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
1346         // is not properly terminated we get random characters at the end
1347         ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
1348                 wxMBConvUTF16BE converter ;
1349                 res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
1350                 free( ubuf ) ;
1351 #else
1352                 res = byteOutLen / sizeof( UniChar ) ;
1353 #endif
1354                 if ( buf == NULL )
1355                         free(tbuf) ;
1356
1357         if ( buf  && res < n)
1358             buf[res] = 0;
1359
1360                 return res ;
1361     }
1362
1363     size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
1364     {
1365             OSStatus status = noErr ;
1366             ByteCount byteOutLen ;
1367             ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
1368
1369                 char *tbuf = NULL ;
1370
1371                 if (buf == NULL)
1372                 {
1373                         // worst case
1374                         n = byteInLen * 2 ;
1375                         tbuf = (char*) malloc( n ) ;
1376                 }
1377
1378             ByteCount byteBufferLen = n ;
1379                 UniChar* ubuf = NULL ;
1380 #if SIZEOF_WCHAR_T == 4
1381                 wxMBConvUTF16BE converter ;
1382                 size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
1383                 byteInLen = unicharlen ;
1384                 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
1385                 converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
1386 #else
1387                 ubuf = (UniChar*) psz ;
1388 #endif
1389             status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
1390                (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
1391 #if SIZEOF_WCHAR_T == 4
1392                 free( ubuf ) ;
1393 #endif
1394                 if ( buf == NULL )
1395                         free(tbuf) ;
1396
1397                 size_t res = byteOutLen ;
1398         if ( buf  && res < n)
1399             buf[res] = 0;
1400
1401                 return res ;
1402     }
1403
1404     bool IsOk() const
1405         { return m_MB2WC_converter !=  NULL && m_WC2MB_converter != NULL  ; }
1406
1407 private:
1408         TECObjectRef m_MB2WC_converter ;
1409         TECObjectRef m_WC2MB_converter ;
1410
1411         TextEncodingBase m_char_encoding ;
1412         TextEncodingBase m_unicode_encoding ;
1413 };
1414
1415 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
1416
1417 // ============================================================================
1418 // wxEncodingConverter based conversion classes
1419 // ============================================================================
1420
1421 #if wxUSE_FONTMAP
1422
1423 class wxMBConv_wxwin : public wxMBConv
1424 {
1425 private:
1426     void Init()
1427     {
1428         m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
1429                w2m.Init(wxFONTENCODING_UNICODE, m_enc);
1430     }
1431
1432 public:
1433     // temporarily just use wxEncodingConverter stuff,
1434     // so that it works while a better implementation is built
1435     wxMBConv_wxwin(const wxChar* name)
1436     {
1437         if (name)
1438             m_enc = wxFontMapper::Get()->CharsetToEncoding(name, false);
1439         else
1440             m_enc = wxFONTENCODING_SYSTEM;
1441
1442         Init();
1443     }
1444
1445     wxMBConv_wxwin(wxFontEncoding enc)
1446     {
1447         m_enc = enc;
1448
1449         Init();
1450     }
1451
1452     size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
1453     {
1454         size_t inbuf = strlen(psz);
1455         if (buf)
1456             m2w.Convert(psz,buf);
1457         return inbuf;
1458     }
1459
1460     size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
1461     {
1462         const size_t inbuf = wxWcslen(psz);
1463         if (buf)
1464             w2m.Convert(psz,buf);
1465
1466         return inbuf;
1467     }
1468
1469     bool IsOk() const { return m_ok; }
1470
1471 public:
1472     wxFontEncoding m_enc;
1473     wxEncodingConverter m2w, w2m;
1474
1475     // were we initialized successfully?
1476     bool m_ok;
1477
1478     DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
1479 };
1480
1481 #endif // wxUSE_FONTMAP
1482
1483 // ============================================================================
1484 // wxCSConv implementation
1485 // ============================================================================
1486
1487 void wxCSConv::Init()
1488 {
1489     m_name = NULL;
1490     m_convReal =  NULL;
1491     m_deferred = true;
1492 }
1493
1494 wxCSConv::wxCSConv(const wxChar *charset)
1495 {
1496     Init();
1497
1498     if ( charset )
1499     {
1500         SetName(charset);
1501     }
1502
1503     m_encoding = wxFONTENCODING_SYSTEM;
1504 }
1505
1506 wxCSConv::wxCSConv(wxFontEncoding encoding)
1507 {
1508     if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
1509     {
1510         wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
1511
1512         encoding = wxFONTENCODING_SYSTEM;
1513     }
1514
1515     Init();
1516
1517     m_encoding = encoding;
1518 }
1519
1520 wxCSConv::~wxCSConv()
1521 {
1522     Clear();
1523 }
1524
1525 wxCSConv::wxCSConv(const wxCSConv& conv)
1526         : wxMBConv()
1527 {
1528     Init();
1529
1530     SetName(conv.m_name);
1531     m_encoding = conv.m_encoding;
1532 }
1533
1534 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
1535 {
1536     Clear();
1537
1538     SetName(conv.m_name);
1539     m_encoding = conv.m_encoding;
1540
1541     return *this;
1542 }
1543
1544 void wxCSConv::Clear()
1545 {
1546     free(m_name);
1547     delete m_convReal;
1548
1549     m_name = NULL;
1550     m_convReal = NULL;
1551 }
1552
1553 void wxCSConv::SetName(const wxChar *charset)
1554 {
1555     if (charset)
1556     {
1557         m_name = wxStrdup(charset);
1558         m_deferred = true;
1559     }
1560 }
1561
1562 wxMBConv *wxCSConv::DoCreate() const
1563 {
1564     // check for the special case of ASCII or ISO8859-1 charset: as we have
1565     // special knowledge of it anyhow, we don't need to create a special
1566     // conversion object
1567     if ( m_encoding == wxFONTENCODING_ISO8859_1 )
1568     {
1569         // don't convert at all
1570         return NULL;
1571     }
1572
1573     // we trust OS to do conversion better than we can so try external
1574     // conversion methods first
1575     //
1576     // the full order is:
1577     //      1. OS conversion (iconv() under Unix or Win32 API)
1578     //      2. hard coded conversions for UTF
1579     //      3. wxEncodingConverter as fall back
1580
1581     // step (1)
1582 #ifdef HAVE_ICONV
1583 #if !wxUSE_FONTMAP
1584     if ( m_name )
1585 #endif // !wxUSE_FONTMAP
1586     {
1587         wxString name(m_name);
1588
1589 #if wxUSE_FONTMAP
1590         if ( name.empty() )
1591             name = wxFontMapper::Get()->GetEncodingName(m_encoding);
1592 #endif // wxUSE_FONTMAP
1593
1594         wxMBConv_iconv *conv = new wxMBConv_iconv(name);
1595         if ( conv->IsOk() )
1596             return conv;
1597
1598         delete conv;
1599     }
1600 #endif // HAVE_ICONV
1601
1602 #ifdef wxHAVE_WIN32_MB2WC
1603     {
1604         wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
1605                                       : new wxMBConv_win32(m_encoding);
1606         if ( conv->IsOk() )
1607             return conv;
1608
1609         delete conv;
1610     }
1611 #endif // wxHAVE_WIN32_MB2WC
1612 #if defined(__WXMAC__)
1613     {
1614         if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ) )
1615         {
1616
1617                 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
1618                                             : new wxMBConv_mac(m_encoding);
1619                 if ( conv->IsOk() )
1620                     return conv;
1621
1622                 delete conv;
1623         }
1624     }
1625 #endif
1626     // step (2)
1627     wxFontEncoding enc = m_encoding;
1628 #if wxUSE_FONTMAP
1629     if ( enc == wxFONTENCODING_SYSTEM && m_name )
1630     {
1631         // use "false" to suppress interactive dialogs -- we can be called from
1632         // anywhere and popping up a dialog from here is the last thing we want to
1633         // do
1634         enc = wxFontMapper::Get()->CharsetToEncoding(m_name, false);
1635     }
1636 #endif // wxUSE_FONTMAP
1637
1638     switch ( enc )
1639     {
1640         case wxFONTENCODING_UTF7:
1641              return new wxMBConvUTF7;
1642
1643         case wxFONTENCODING_UTF8:
1644              return new wxMBConvUTF8;
1645
1646         case wxFONTENCODING_UTF16BE:
1647              return new wxMBConvUTF16BE;
1648
1649         case wxFONTENCODING_UTF16LE:
1650              return new wxMBConvUTF16LE;
1651
1652         case wxFONTENCODING_UTF32BE:
1653              return new wxMBConvUTF32BE;
1654
1655         case wxFONTENCODING_UTF32LE:
1656              return new wxMBConvUTF32LE;
1657
1658         default:
1659              // nothing to do but put here to suppress gcc warnings
1660              ;
1661     }
1662
1663     // step (3)
1664 #if wxUSE_FONTMAP
1665     {
1666         wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
1667                                       : new wxMBConv_wxwin(m_encoding);
1668         if ( conv->IsOk() )
1669             return conv;
1670
1671         delete conv;
1672     }
1673 #endif // wxUSE_FONTMAP
1674
1675     // NB: This is a hack to prevent deadlock. What could otherwise happen
1676     //     in Unicode build: wxConvLocal creation ends up being here
1677     //     because of some failure and logs the error. But wxLog will try to
1678     //     attach timestamp, for which it will need wxConvLocal (to convert
1679     //     time to char* and then wchar_t*), but that fails, tries to log
1680     //     error, but wxLog has a (already locked) critical section that
1681     //     guards static buffer.
1682     static bool alreadyLoggingError = false;
1683     if (!alreadyLoggingError)
1684     {
1685         alreadyLoggingError = true;
1686         wxLogError(_("Cannot convert from the charset '%s'!"),
1687                    m_name ? m_name
1688                       :
1689 #if wxUSE_FONTMAP
1690                          wxFontMapper::GetEncodingDescription(m_encoding).c_str()
1691 #else // !wxUSE_FONTMAP
1692                          wxString::Format(_("encoding %s"), m_encoding).c_str()
1693 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1694               );
1695         alreadyLoggingError = false;
1696     }
1697
1698     return NULL;
1699 }
1700
1701 void wxCSConv::CreateConvIfNeeded() const
1702 {
1703     if ( m_deferred )
1704     {
1705         wxCSConv *self = (wxCSConv *)this; // const_cast
1706
1707 #if wxUSE_INTL
1708         // if we don't have neither the name nor the encoding, use the default
1709         // encoding for this system
1710         if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
1711         {
1712             self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
1713         }
1714 #endif // wxUSE_INTL
1715
1716         self->m_convReal = DoCreate();
1717         self->m_deferred = false;
1718     }
1719 }
1720
1721 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1722 {
1723     CreateConvIfNeeded();
1724
1725     if (m_convReal)
1726         return m_convReal->MB2WC(buf, psz, n);
1727
1728     // latin-1 (direct)
1729     size_t len = strlen(psz);
1730
1731     if (buf)
1732     {
1733         for (size_t c = 0; c <= len; c++)
1734             buf[c] = (unsigned char)(psz[c]);
1735     }
1736
1737     return len;
1738 }
1739
1740 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1741 {
1742     CreateConvIfNeeded();
1743
1744     if (m_convReal)
1745         return m_convReal->WC2MB(buf, psz, n);
1746
1747     // latin-1 (direct)
1748     const size_t len = wxWcslen(psz);
1749     if (buf)
1750     {
1751         for (size_t c = 0; c <= len; c++)
1752         {
1753             if (psz[c] > 0xFF)
1754                 return (size_t)-1;
1755             buf[c] = psz[c];
1756         }
1757     }
1758     else
1759     {
1760         for (size_t c = 0; c <= len; c++)
1761         {
1762             if (psz[c] > 0xFF)
1763                 return (size_t)-1;
1764         }
1765     }
1766
1767     return len;
1768 }
1769
1770 // ----------------------------------------------------------------------------
1771 // globals
1772 // ----------------------------------------------------------------------------
1773
1774 #ifdef __WINDOWS__
1775     static wxMBConv_win32 wxConvLibcObj;
1776 #else
1777     static wxMBConvLibc wxConvLibcObj;
1778 #endif
1779
1780 static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
1781 static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
1782 static wxMBConvUTF7 wxConvUTF7Obj;
1783 static wxMBConvUTF8 wxConvUTF8Obj;
1784
1785
1786 WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
1787 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
1788 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
1789 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
1790 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
1791 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
1792
1793 #else // !wxUSE_WCHAR_T
1794
1795 // stand-ins in absence of wchar_t
1796 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
1797                                 wxConvISO8859_1,
1798                                 wxConvLocal,
1799                                 wxConvUTF8;
1800
1801 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
1802
1803