src/common/strconv.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        strconv.cpp
   3 // Purpose:     Unicode conversion classes
   4 // Author:      Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik
   5 // Modified by:
   6 // Created:     29/01/98
   7 // RCS-ID:      $Id$
   8 // Copyright:   (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
   9 //              (c) 2000-2003 Vadim Zeitlin
  10 // Licence:     wxWindows licence
  11 /////////////////////////////////////////////////////////////////////////////
  12
  13 // ============================================================================
  14 // declarations
  15 // ============================================================================
  16
  17 // ----------------------------------------------------------------------------
  18 // headers
  19 // ----------------------------------------------------------------------------
  20
  21 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
  22   #pragma implementation "strconv.h"
  23 #endif
  24
  25 // For compilers that support precompilation, includes "wx.h".
  26 #include "wx/wxprec.h"
  27
  28 #ifdef __BORLANDC__
  29   #pragma hdrstop
  30 #endif
  31
  32 #ifndef WX_PRECOMP
  33     #include "wx/intl.h"
  34     #include "wx/log.h"
  35 #endif // WX_PRECOMP
  36
  37 #include "wx/strconv.h"
  38
  39 #if wxUSE_WCHAR_T
  40
  41 #ifdef __WXMSW__
  42     #include "wx/msw/private.h"
  43     #include "wx/msw/missing.h"
  44 #endif
  45
  46 #ifndef __WXWINCE__
  47 #include <errno.h>
  48 #endif
  49
  50 #include <ctype.h>
  51 #include <string.h>
  52 #include <stdlib.h>
  53
  54 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
  55     #define wxHAVE_WIN32_MB2WC
  56 #endif // __WIN32__ but !__WXMICROWIN__
  57
  58 // ----------------------------------------------------------------------------
  59 // headers
  60 // ----------------------------------------------------------------------------
  61
  62 #ifdef __SALFORDC__
  63     #include <clib.h>
  64 #endif
  65
  66 #ifdef HAVE_ICONV
  67     #include <iconv.h>
  68 #endif
  69
  70 #include "wx/encconv.h"
  71 #include "wx/fontmap.h"
  72
  73 #ifdef __WXMAC__
  74 #include "ATSUnicode.h"
  75 #include "TextCommon.h"
  76 #include "TextEncodingConverter.h"
  77
  78 #include  "wx/mac/private.h"  // includes mac headers
  79 #endif
  80 // ----------------------------------------------------------------------------
  81 // macros
  82 // ----------------------------------------------------------------------------
  83
  84 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
  85 #define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
  86
  87 #if SIZEOF_WCHAR_T == 4
  88     #define WC_NAME         "UCS4"
  89     #define WC_BSWAP         BSWAP_UCS4
  90     #ifdef WORDS_BIGENDIAN
  91       #define WC_NAME_BEST  "UCS-4BE"
  92     #else
  93       #define WC_NAME_BEST  "UCS-4LE"
  94     #endif
  95 #elif SIZEOF_WCHAR_T == 2
  96     #define WC_NAME         "UTF16"
  97     #define WC_BSWAP         BSWAP_UTF16
  98     #define WC_UTF16
  99     #ifdef WORDS_BIGENDIAN
 100       #define WC_NAME_BEST  "UTF-16BE"
 101     #else
 102       #define WC_NAME_BEST  "UTF-16LE"
 103     #endif
 104 #else // sizeof(wchar_t) != 2 nor 4
 105     // does this ever happen?
 106     #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
 107 #endif
 108
 109 // ============================================================================
 110 // implementation
 111 // ============================================================================
 112
 113 // ----------------------------------------------------------------------------
 114 // UTF-16 en/decoding to/from UCS-4
 115 // ----------------------------------------------------------------------------
 116
 117
 118 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
 119 {
 120     if (input<=0xffff)
 121     {
 122         if (output)
 123             *output = (wxUint16) input;
 124         return 1;
 125     }
 126     else if (input>=0x110000)
 127     {
 128         return (size_t)-1;
 129     }
 130     else
 131     {
 132         if (output)
 133         {
 134             *output++ = (wxUint16) ((input >> 10)+0xd7c0);
 135             *output = (wxUint16) ((input&0x3ff)+0xdc00);
 136         }
 137         return 2;
 138     }
 139 }
 140
 141 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
 142 {
 143     if ((*input<0xd800) || (*input>0xdfff))
 144     {
 145         output = *input;
 146         return 1;
 147     }
 148     else if ((input[1]<0xdc00) || (input[1]>=0xdfff))
 149     {
 150         output = *input;
 151         return (size_t)-1;
 152     }
 153     else
 154     {
 155         output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
 156         return 2;
 157     }
 158 }
 159
 160
 161 // ----------------------------------------------------------------------------
 162 // wxMBConv
 163 // ----------------------------------------------------------------------------
 164
 165 wxMBConv::~wxMBConv()
 166 {
 167     // nothing to do here
 168 }
 169
 170 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
 171 {
 172     if ( psz )
 173     {
 174         // calculate the length of the buffer needed first
 175         size_t nLen = MB2WC(NULL, psz, 0);
 176         if ( nLen != (size_t)-1 )
 177         {
 178             // now do the actual conversion
 179             wxWCharBuffer buf(nLen);
 180             MB2WC(buf.data(), psz, nLen + 1); // with the trailing NUL
 181
 182             return buf;
 183         }
 184     }
 185
 186     wxWCharBuffer buf((wchar_t *)NULL);
 187
 188     return buf;
 189 }
 190
 191 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
 192 {
 193     if ( pwz )
 194     {
 195         size_t nLen = WC2MB(NULL, pwz, 0);
 196         if ( nLen != (size_t)-1 )
 197         {
 198             wxCharBuffer buf(nLen+3);       // space for a wxUint32 trailing zero
 199             WC2MB(buf.data(), pwz, nLen + 4);
 200
 201             return buf;
 202         }
 203     }
 204
 205     wxCharBuffer buf((char *)NULL);
 206
 207     return buf;
 208 }
 209
 210 // ----------------------------------------------------------------------------
 211 // wxMBConvLibc
 212 // ----------------------------------------------------------------------------
 213
 214 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 215 {
 216     return wxMB2WC(buf, psz, n);
 217 }
 218
 219 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 220 {
 221     return wxWC2MB(buf, psz, n);
 222 }
 223
 224 // ----------------------------------------------------------------------------
 225 // UTF-7
 226 // ----------------------------------------------------------------------------
 227
 228 #if 0
 229 static char utf7_setD[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
 230                         "abcdefghijklmnopqrstuvwxyz"
 231                         "0123456789'(),-./:?";
 232 static char utf7_setO[]="!\"#$%&*;<=>@[]^_`{|}";
 233 static char utf7_setB[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
 234                         "abcdefghijklmnopqrstuvwxyz"
 235                         "0123456789+/";
 236 #endif
 237
 238 // TODO: write actual implementations of UTF-7 here
 239 size_t wxMBConvUTF7::MB2WC(wchar_t * WXUNUSED(buf),
 240                            const char * WXUNUSED(psz),
 241                            size_t WXUNUSED(n)) const
 242 {
 243   return 0;
 244 }
 245
 246 size_t wxMBConvUTF7::WC2MB(char * WXUNUSED(buf),
 247                            const wchar_t * WXUNUSED(psz),
 248                            size_t WXUNUSED(n)) const
 249 {
 250   return 0;
 251 }
 252
 253 // ----------------------------------------------------------------------------
 254 // UTF-8
 255 // ----------------------------------------------------------------------------
 256
 257 static wxUint32 utf8_max[]=
 258     { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
 259
 260 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 261 {
 262     size_t len = 0;
 263
 264     while (*psz && ((!buf) || (len < n)))
 265     {
 266         unsigned char cc = *psz++, fc = cc;
 267         unsigned cnt;
 268         for (cnt = 0; fc & 0x80; cnt++)
 269             fc <<= 1;
 270         if (!cnt)
 271         {
 272             // plain ASCII char
 273             if (buf)
 274                 *buf++ = cc;
 275             len++;
 276         }
 277         else
 278         {
 279             cnt--;
 280             if (!cnt)
 281             {
 282                 // invalid UTF-8 sequence
 283                 return (size_t)-1;
 284             }
 285             else
 286             {
 287                 unsigned ocnt = cnt - 1;
 288                 wxUint32 res = cc & (0x3f >> cnt);
 289                 while (cnt--)
 290                 {
 291                     cc = *psz++;
 292                     if ((cc & 0xC0) != 0x80)
 293                     {
 294                         // invalid UTF-8 sequence
 295                         return (size_t)-1;
 296                     }
 297                     res = (res << 6) | (cc & 0x3f);
 298                 }
 299                 if (res <= utf8_max[ocnt])
 300                 {
 301                     // illegal UTF-8 encoding
 302                     return (size_t)-1;
 303                 }
 304 #ifdef WC_UTF16
 305                 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 306                 size_t pa = encode_utf16(res, (wxUint16 *)buf);
 307                 if (pa == (size_t)-1)
 308                   return (size_t)-1;
 309                 if (buf)
 310                     buf += pa;
 311                 len += pa;
 312 #else // !WC_UTF16
 313                 if (buf)
 314                     *buf++ = res;
 315                 len++;
 316 #endif // WC_UTF16/!WC_UTF16
 317             }
 318         }
 319     }
 320     if (buf && (len < n))
 321         *buf = 0;
 322     return len;
 323 }
 324
 325 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 326 {
 327     size_t len = 0;
 328
 329     while (*psz && ((!buf) || (len < n)))
 330     {
 331         wxUint32 cc;
 332 #ifdef WC_UTF16
 333         // cast is ok for WC_UTF16
 334         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
 335         psz += (pa == (size_t)-1) ? 1 : pa;
 336 #else
 337         cc=(*psz++) & 0x7fffffff;
 338 #endif
 339         unsigned cnt;
 340         for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
 341         if (!cnt)
 342         {
 343             // plain ASCII char
 344             if (buf)
 345                 *buf++ = (char) cc;
 346             len++;
 347         }
 348
 349         else
 350         {
 351             len += cnt + 1;
 352             if (buf)
 353             {
 354                 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
 355                 while (cnt--)
 356                     *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
 357             }
 358         }
 359     }
 360
 361     if (buf && (len<n)) *buf = 0;
 362
 363     return len;
 364 }
 365
 366
 367
 368
 369 // ----------------------------------------------------------------------------
 370 // UTF-16
 371 // ----------------------------------------------------------------------------
 372
 373 #ifdef WORDS_BIGENDIAN
 374     #define wxMBConvUTF16straight wxMBConvUTF16BE
 375     #define wxMBConvUTF16swap     wxMBConvUTF16LE
 376 #else
 377     #define wxMBConvUTF16swap     wxMBConvUTF16BE
 378     #define wxMBConvUTF16straight wxMBConvUTF16LE
 379 #endif
 380
 381
 382 #ifdef WC_UTF16
 383
 384 // copy 16bit MB to 16bit String
 385 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 386 {
 387     size_t len=0;
 388
 389     while (*(wxUint16*)psz && (!buf || len < n))
 390     {
 391         if (buf)
 392             *buf++ = *(wxUint16*)psz;
 393         len++;
 394
 395         psz += sizeof(wxUint16);
 396     }
 397     if (buf && len<n)   *buf=0;
 398
 399     return len;
 400 }
 401
 402
 403 // copy 16bit String to 16bit MB
 404 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 405 {
 406     size_t len=0;
 407
 408     while (*psz && (!buf || len < n))
 409     {
 410         if (buf)
 411         {
 412             *(wxUint16*)buf = *psz;
 413             buf += sizeof(wxUint16);
 414         }
 415         len += sizeof(wxUint16);
 416         psz++;
 417     }
 418     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 419
 420     return len;
 421 }
 422
 423
 424 // swap 16bit MB to 16bit String
 425 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 426 {
 427     size_t len=0;
 428
 429     while (*(wxUint16*)psz && (!buf || len < n))
 430     {
 431         if (buf)
 432         {
 433             ((char *)buf)[0] = psz[1];
 434             ((char *)buf)[1] = psz[0];
 435             buf++;
 436         }
 437         len++;
 438         psz += sizeof(wxUint16);
 439     }
 440     if (buf && len<n)   *buf=0;
 441
 442     return len;
 443 }
 444
 445
 446 // swap 16bit MB to 16bit String
 447 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 448 {
 449     size_t len=0;
 450
 451     while (*psz && (!buf || len < n))
 452     {
 453         if (buf)
 454         {
 455             *buf++ = ((char*)psz)[1];
 456             *buf++ = ((char*)psz)[0];
 457         }
 458         len += sizeof(wxUint16);
 459         psz++;
 460     }
 461     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 462
 463     return len;
 464 }
 465
 466
 467 #else // WC_UTF16
 468
 469
 470 // copy 16bit MB to 32bit String
 471 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 472 {
 473     size_t len=0;
 474
 475     while (*(wxUint16*)psz && (!buf || len < n))
 476     {
 477         wxUint32 cc;
 478         size_t pa=decode_utf16((wxUint16*)psz, cc);
 479         if (pa == (size_t)-1)
 480             return pa;
 481
 482         if (buf)
 483             *buf++ = cc;
 484         len++;
 485         psz += pa * sizeof(wxUint16);
 486     }
 487     if (buf && len<n)   *buf=0;
 488
 489     return len;
 490 }
 491
 492
 493 // copy 32bit String to 16bit MB
 494 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 495 {
 496     size_t len=0;
 497
 498     while (*psz && (!buf || len < n))
 499     {
 500         wxUint16 cc[2];
 501         size_t pa=encode_utf16(*psz, cc);
 502
 503         if (pa == (size_t)-1)
 504             return pa;
 505
 506         if (buf)
 507         {
 508             *(wxUint16*)buf = cc[0];
 509             buf += sizeof(wxUint16);
 510             if (pa > 1)
 511             {
 512                 *(wxUint16*)buf = cc[1];
 513                 buf += sizeof(wxUint16);
 514             }
 515         }
 516
 517         len += pa*sizeof(wxUint16);
 518         psz++;
 519     }
 520     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 521
 522     return len;
 523 }
 524
 525
 526 // swap 16bit MB to 32bit String
 527 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 528 {
 529     size_t len=0;
 530
 531     while (*(wxUint16*)psz && (!buf || len < n))
 532     {
 533         wxUint32 cc;
 534         char tmp[4];
 535         tmp[0]=psz[1];  tmp[1]=psz[0];
 536         tmp[2]=psz[3];  tmp[3]=psz[2];
 537
 538         size_t pa=decode_utf16((wxUint16*)tmp, cc);
 539         if (pa == (size_t)-1)
 540             return pa;
 541
 542         if (buf)
 543             *buf++ = cc;
 544
 545         len++;
 546         psz += pa * sizeof(wxUint16);
 547     }
 548     if (buf && len<n)   *buf=0;
 549
 550     return len;
 551 }
 552
 553
 554 // swap 32bit String to 16bit MB
 555 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 556 {
 557     size_t len=0;
 558
 559     while (*psz && (!buf || len < n))
 560     {
 561         wxUint16 cc[2];
 562         size_t pa=encode_utf16(*psz, cc);
 563
 564         if (pa == (size_t)-1)
 565             return pa;
 566
 567         if (buf)
 568         {
 569             *buf++ = ((char*)cc)[1];
 570             *buf++ = ((char*)cc)[0];
 571             if (pa > 1)
 572             {
 573                 *buf++ = ((char*)cc)[3];
 574                 *buf++ = ((char*)cc)[2];
 575             }
 576         }
 577
 578         len += pa*sizeof(wxUint16);
 579         psz++;
 580     }
 581     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 582
 583     return len;
 584 }
 585
 586 #endif // WC_UTF16
 587
 588
 589 // ----------------------------------------------------------------------------
 590 // UTF-32
 591 // ----------------------------------------------------------------------------
 592
 593 #ifdef WORDS_BIGENDIAN
 594 #define wxMBConvUTF32straight  wxMBConvUTF32BE
 595 #define wxMBConvUTF32swap      wxMBConvUTF32LE
 596 #else
 597 #define wxMBConvUTF32swap      wxMBConvUTF32BE
 598 #define wxMBConvUTF32straight  wxMBConvUTF32LE
 599 #endif
 600
 601
 602 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
 603 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
 604
 605
 606 #ifdef WC_UTF16
 607
 608 // copy 32bit MB to 16bit String
 609 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 610 {
 611     size_t len=0;
 612
 613     while (*(wxUint32*)psz && (!buf || len < n))
 614     {
 615         wxUint16 cc[2];
 616
 617         size_t pa=encode_utf16(*(wxUint32*)psz, cc);
 618         if (pa == (size_t)-1)
 619             return pa;
 620
 621         if (buf)
 622         {
 623             *buf++ = cc[0];
 624             if (pa > 1)
 625                 *buf++ = cc[1];
 626         }
 627         len += pa;
 628         psz += sizeof(wxUint32);
 629     }
 630     if (buf && len<n)   *buf=0;
 631
 632     return len;
 633 }
 634
 635
 636 // copy 16bit String to 32bit MB
 637 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 638 {
 639     size_t len=0;
 640
 641     while (*psz && (!buf || len < n))
 642     {
 643         wxUint32 cc;
 644
 645         // cast is ok for WC_UTF16
 646         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
 647         if (pa == (size_t)-1)
 648             return pa;
 649
 650         if (buf)
 651         {
 652             *(wxUint32*)buf = cc;
 653             buf += sizeof(wxUint32);
 654         }
 655         len += sizeof(wxUint32);
 656         psz += pa;
 657     }
 658
 659     if (buf && len<=n-sizeof(wxUint32))
 660         *(wxUint32*)buf=0;
 661
 662     return len;
 663 }
 664
 665
 666
 667 // swap 32bit MB to 16bit String
 668 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 669 {
 670     size_t len=0;
 671
 672     while (*(wxUint32*)psz && (!buf || len < n))
 673     {
 674         char tmp[4];
 675         tmp[0] = psz[3];   tmp[1] = psz[2];
 676         tmp[2] = psz[1];   tmp[3] = psz[0];
 677
 678
 679         wxUint16 cc[2];
 680
 681         size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
 682         if (pa == (size_t)-1)
 683             return pa;
 684
 685         if (buf)
 686         {
 687             *buf++ = cc[0];
 688             if (pa > 1)
 689                 *buf++ = cc[1];
 690         }
 691         len += pa;
 692         psz += sizeof(wxUint32);
 693     }
 694
 695     if (buf && len<n)
 696         *buf=0;
 697
 698     return len;
 699 }
 700
 701
 702 // swap 16bit String to 32bit MB
 703 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 704 {
 705     size_t len=0;
 706
 707     while (*psz && (!buf || len < n))
 708     {
 709         char cc[4];
 710
 711         // cast is ok for WC_UTF16
 712         size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
 713         if (pa == (size_t)-1)
 714             return pa;
 715
 716         if (buf)
 717         {
 718             *buf++ = cc[3];
 719             *buf++ = cc[2];
 720             *buf++ = cc[1];
 721             *buf++ = cc[0];
 722         }
 723         len += sizeof(wxUint32);
 724         psz += pa;
 725     }
 726
 727     if (buf && len<=n-sizeof(wxUint32))
 728         *(wxUint32*)buf=0;
 729
 730     return len;
 731 }
 732
 733 #else // WC_UTF16
 734
 735
 736 // copy 32bit MB to 32bit String
 737 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 738 {
 739     size_t len=0;
 740
 741     while (*(wxUint32*)psz && (!buf || len < n))
 742     {
 743         if (buf)
 744             *buf++ = *(wxUint32*)psz;
 745         len++;
 746         psz += sizeof(wxUint32);
 747     }
 748
 749     if (buf && len<n)
 750         *buf=0;
 751
 752     return len;
 753 }
 754
 755
 756 // copy 32bit String to 32bit MB
 757 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 758 {
 759     size_t len=0;
 760
 761     while (*psz && (!buf || len < n))
 762     {
 763         if (buf)
 764         {
 765             *(wxUint32*)buf = *psz;
 766             buf += sizeof(wxUint32);
 767         }
 768
 769         len += sizeof(wxUint32);
 770         psz++;
 771     }
 772
 773     if (buf && len<=n-sizeof(wxUint32))
 774         *(wxUint32*)buf=0;
 775
 776     return len;
 777 }
 778
 779
 780 // swap 32bit MB to 32bit String
 781 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 782 {
 783     size_t len=0;
 784
 785     while (*(wxUint32*)psz && (!buf || len < n))
 786     {
 787         if (buf)
 788         {
 789             ((char *)buf)[0] = psz[3];
 790             ((char *)buf)[1] = psz[2];
 791             ((char *)buf)[2] = psz[1];
 792             ((char *)buf)[3] = psz[0];
 793             buf++;
 794         }
 795         len++;
 796         psz += sizeof(wxUint32);
 797     }
 798
 799     if (buf && len<n)
 800         *buf=0;
 801
 802     return len;
 803 }
 804
 805
 806 // swap 32bit String to 32bit MB
 807 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 808 {
 809     size_t len=0;
 810
 811     while (*psz && (!buf || len < n))
 812     {
 813         if (buf)
 814         {
 815             *buf++ = ((char *)psz)[3];
 816             *buf++ = ((char *)psz)[2];
 817             *buf++ = ((char *)psz)[1];
 818             *buf++ = ((char *)psz)[0];
 819         }
 820         len += sizeof(wxUint32);
 821         psz++;
 822     }
 823
 824     if (buf && len<=n-sizeof(wxUint32))
 825         *(wxUint32*)buf=0;
 826
 827     return len;
 828 }
 829
 830
 831 #endif // WC_UTF16
 832
 833
 834 // ============================================================================
 835 // The classes doing conversion using the iconv_xxx() functions
 836 // ============================================================================
 837
 838 #ifdef HAVE_ICONV
 839
 840 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with E2BIG
 841 //     if output buffer is _exactly_ as big as needed. Such case is (unless there's
 842 //     yet another bug in glibc) the only case when iconv() returns with (size_t)-1
 843 //     (which means error) and says there are 0 bytes left in the input buffer --
 844 //     when _real_ error occurs, bytes-left-in-input buffer is non-zero. Hence,
 845 //     this alternative test for iconv() failure.
 846 //     [This bug does not appear in glibc 2.2.]
 847 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
 848 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
 849                                      (errno != E2BIG || bufLeft != 0))
 850 #else
 851 #define ICONV_FAILED(cres, bufLeft)  (cres == (size_t)-1)
 852 #endif
 853
 854 #define ICONV_CHAR_CAST(x)  ((ICONV_CONST char **)(x))
 855
 856 // ----------------------------------------------------------------------------
 857 // wxMBConv_iconv: encapsulates an iconv character set
 858 // ----------------------------------------------------------------------------
 859
 860 class wxMBConv_iconv : public wxMBConv
 861 {
 862 public:
 863     wxMBConv_iconv(const wxChar *name);
 864     virtual ~wxMBConv_iconv();
 865
 866     virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
 867     virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
 868
 869     bool IsOk() const
 870         { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
 871
 872 protected:
 873     // the iconv handlers used to translate from multibyte to wide char and in
 874     // the other direction
 875     iconv_t m2w,
 876             w2m;
 877
 878 private:
 879     // the name (for iconv_open()) of a wide char charset -- if none is
 880     // available on this machine, it will remain NULL
 881     static const char *ms_wcCharsetName;
 882
 883     // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
 884     // different endian-ness than the native one
 885     static bool ms_wcNeedsSwap;
 886 };
 887
 888 const char *wxMBConv_iconv::ms_wcCharsetName = NULL;
 889 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
 890
 891 wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
 892 {
 893     // Do it the hard way
 894     char cname[100];
 895     for (size_t i = 0; i < wxStrlen(name)+1; i++)
 896         cname[i] = (char) name[i];
 897
 898     // check for charset that represents wchar_t:
 899     if (ms_wcCharsetName == NULL)
 900     {
 901         ms_wcNeedsSwap = false;
 902
 903         // try charset with explicit bytesex info (e.g. "UCS-4LE"):
 904         ms_wcCharsetName = WC_NAME_BEST;
 905         m2w = iconv_open(ms_wcCharsetName, cname);
 906
 907         if (m2w == (iconv_t)-1)
 908         {
 909             // try charset w/o bytesex info (e.g. "UCS4")
 910             // and check for bytesex ourselves:
 911             ms_wcCharsetName = WC_NAME;
 912             m2w = iconv_open(ms_wcCharsetName, cname);
 913
 914             // last bet, try if it knows WCHAR_T pseudo-charset
 915             if (m2w == (iconv_t)-1)
 916             {
 917                 ms_wcCharsetName = "WCHAR_T";
 918                 m2w = iconv_open(ms_wcCharsetName, cname);
 919             }
 920
 921             if (m2w != (iconv_t)-1)
 922             {
 923                 char    buf[2], *bufPtr;
 924                 wchar_t wbuf[2], *wbufPtr;
 925                 size_t  insz, outsz;
 926                 size_t  res;
 927
 928                 buf[0] = 'A';
 929                 buf[1] = 0;
 930                 wbuf[0] = 0;
 931                 insz = 2;
 932                 outsz = SIZEOF_WCHAR_T * 2;
 933                 wbufPtr = wbuf;
 934                 bufPtr = buf;
 935
 936                 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
 937                             (char**)&wbufPtr, &outsz);
 938
 939                 if (ICONV_FAILED(res, insz))
 940                 {
 941                     ms_wcCharsetName = NULL;
 942                     wxLogLastError(wxT("iconv"));
 943                     wxLogError(_("Conversion to charset '%s' doesn't work."), name);
 944                 }
 945                 else
 946                 {
 947                     ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
 948                 }
 949             }
 950             else
 951             {
 952                 ms_wcCharsetName = NULL;
 953
 954                 // VS: we must not output an error here, since wxWindows will safely
 955                 //     fall back to using wxEncodingConverter.
 956                 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
 957                 //wxLogError(
 958             }
 959         }
 960         wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
 961     }
 962     else // we already have ms_wcCharsetName
 963     {
 964         m2w = iconv_open(ms_wcCharsetName, cname);
 965     }
 966
 967     // NB: don't ever pass NULL to iconv_open(), it may crash!
 968     if ( ms_wcCharsetName )
 969     {
 970         w2m = iconv_open( cname, ms_wcCharsetName);
 971     }
 972     else
 973     {
 974         w2m = (iconv_t)-1;
 975     }
 976 }
 977
 978 wxMBConv_iconv::~wxMBConv_iconv()
 979 {
 980     if ( m2w != (iconv_t)-1 )
 981         iconv_close(m2w);
 982     if ( w2m != (iconv_t)-1 )
 983         iconv_close(w2m);
 984 }
 985
 986 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 987 {
 988     size_t inbuf = strlen(psz);
 989     size_t outbuf = n * SIZEOF_WCHAR_T;
 990     size_t res, cres;
 991     // VS: Use these instead of psz, buf because iconv() modifies its arguments:
 992     wchar_t *bufPtr = buf;
 993     const char *pszPtr = psz;
 994
 995     if (buf)
 996     {
 997         // have destination buffer, convert there
 998         cres = iconv(m2w,
 999                      ICONV_CHAR_CAST(&pszPtr), &inbuf,
1000                      (char**)&bufPtr, &outbuf);
1001         res = n - (outbuf / SIZEOF_WCHAR_T);
1002
1003         if (ms_wcNeedsSwap)
1004         {
1005             // convert to native endianness
1006             WC_BSWAP(buf /* _not_ bufPtr */, res)
1007         }
1008
1009         // NB: iconv was given only strlen(psz) characters on input, and so
1010         //     it couldn't convert the trailing zero. Let's do it ourselves
1011         //     if there's some room left for it in the output buffer.
1012         if (res < n)
1013             buf[res] = 0;
1014     }
1015     else
1016     {
1017         // no destination buffer... convert using temp buffer
1018         // to calculate destination buffer requirement
1019         wchar_t tbuf[8];
1020         res = 0;
1021         do {
1022             bufPtr = tbuf;
1023             outbuf = 8*SIZEOF_WCHAR_T;
1024
1025             cres = iconv(m2w,
1026                          ICONV_CHAR_CAST(&pszPtr), &inbuf,
1027                          (char**)&bufPtr, &outbuf );
1028
1029             res += 8-(outbuf/SIZEOF_WCHAR_T);
1030         } while ((cres==(size_t)-1) && (errno==E2BIG));
1031     }
1032
1033     if (ICONV_FAILED(cres, inbuf))
1034     {
1035         //VS: it is ok if iconv fails, hence trace only
1036         wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1037         return (size_t)-1;
1038     }
1039
1040     return res;
1041 }
1042
1043 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1044 {
1045     size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
1046     size_t outbuf = n;
1047     size_t res, cres;
1048
1049     wchar_t *tmpbuf = 0;
1050
1051     if (ms_wcNeedsSwap)
1052     {
1053         // need to copy to temp buffer to switch endianness
1054         // this absolutely doesn't rock!
1055         // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1056         //  could be in read-only memory, or be accessed in some other thread)
1057         tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
1058         memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
1059         WC_BSWAP(tmpbuf, inbuf)
1060         psz=tmpbuf;
1061     }
1062
1063     if (buf)
1064     {
1065         // have destination buffer, convert there
1066         cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1067
1068         res = n-outbuf;
1069
1070         // NB: iconv was given only wcslen(psz) characters on input, and so
1071         //     it couldn't convert the trailing zero. Let's do it ourselves
1072         //     if there's some room left for it in the output buffer.
1073         if (res < n)
1074             buf[0] = 0;
1075     }
1076     else
1077     {
1078         // no destination buffer... convert using temp buffer
1079         // to calculate destination buffer requirement
1080         char tbuf[16];
1081         res = 0;
1082         do {
1083             buf = tbuf; outbuf = 16;
1084
1085             cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1086
1087             res += 16 - outbuf;
1088         } while ((cres==(size_t)-1) && (errno==E2BIG));
1089     }
1090
1091     if (ms_wcNeedsSwap)
1092     {
1093         free(tmpbuf);
1094     }
1095
1096     if (ICONV_FAILED(cres, inbuf))
1097     {
1098         //VS: it is ok if iconv fails, hence trace only
1099         wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1100         return (size_t)-1;
1101     }
1102
1103     return res;
1104 }
1105
1106 #endif // HAVE_ICONV
1107
1108
1109 // ============================================================================
1110 // Win32 conversion classes
1111 // ============================================================================
1112
1113 #ifdef wxHAVE_WIN32_MB2WC
1114
1115 // from utils.cpp
1116 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1117 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1118
1119 class wxMBConv_win32 : public wxMBConv
1120 {
1121 public:
1122     wxMBConv_win32()
1123     {
1124         m_CodePage = CP_ACP;
1125     }
1126
1127     wxMBConv_win32(const wxChar* name)
1128     {
1129         m_CodePage = wxCharsetToCodepage(name);
1130     }
1131
1132     wxMBConv_win32(wxFontEncoding encoding)
1133     {
1134         m_CodePage = wxEncodingToCodepage(encoding);
1135     }
1136
1137     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1138     {
1139         const size_t len = ::MultiByteToWideChar
1140                              (
1141                                 m_CodePage,     // code page
1142                                 0,              // flags (none)
1143                                 psz,            // input string
1144                                 -1,             // its length (NUL-terminated)
1145                                 buf,            // output string
1146                                 buf ? n : 0     // size of output buffer
1147                              );
1148
1149         // note that it returns count of written chars for buf != NULL and size
1150         // of the needed buffer for buf == NULL so in either case the length of
1151         // the string (which never includes the terminating NUL) is one less
1152         return len ? len - 1 : (size_t)-1;
1153     }
1154
1155     size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
1156     {
1157         /*
1158             we have a problem here: by default, WideCharToMultiByte() may
1159             replace characters unrepresentable in the target code page with bad
1160             quality approximations such as turning "1/2" symbol (U+00BD) into
1161             "1" for the code pages which don't have it and we, obviously, want
1162             to avoid this at any price
1163
1164             the trouble is that this function does it _silently_, i.e. it won't
1165             even tell us whether it did or not... Win98/2000 and higher provide
1166             WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1167             we have to resort to a round trip, i.e. check that converting back
1168             results in the same string -- this is, of course, expensive but
1169             otherwise we simply can't be sure to not garble the data.
1170          */
1171
1172         // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1173         // it doesn't work with CJK encodings (which we test for rather roughly
1174         // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1175         // supporting it
1176         BOOL usedDef wxDUMMY_INITIALIZE(false),
1177              *pUsedDef;
1178         int flags;
1179         if ( CanUseNoBestFit() && m_CodePage < 50000 )
1180         {
1181             // it's our lucky day
1182             flags = WC_NO_BEST_FIT_CHARS;
1183             pUsedDef = &usedDef;
1184         }
1185         else // old system or unsupported encoding
1186         {
1187             flags = 0;
1188             pUsedDef = NULL;
1189         }
1190
1191         const size_t len = ::WideCharToMultiByte
1192                              (
1193                                 m_CodePage,     // code page
1194                                 flags,          // either none or no best fit
1195                                 pwz,            // input string
1196                                 -1,             // it is (wide) NUL-terminated
1197                                 buf,            // output buffer
1198                                 buf ? n : 0,    // and its size
1199                                 NULL,           // default "replacement" char
1200                                 pUsedDef        // [out] was it used?
1201                              );
1202
1203         if ( !len )
1204         {
1205             // function totally failed
1206             return (size_t)-1;
1207         }
1208
1209         // if we were really converting, check if we succeeded
1210         if ( buf )
1211         {
1212             if ( flags )
1213             {
1214                 // check if the conversion failed, i.e. if any replacements
1215                 // were done
1216                 if ( usedDef )
1217                     return (size_t)-1;
1218             }
1219             else // we must resort to double tripping...
1220             {
1221                 wxWCharBuffer wcBuf(n);
1222                 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1223                         wcscmp(wcBuf, pwz) != 0 )
1224                 {
1225                     // we didn't obtain the same thing we started from, hence
1226                     // the conversion was lossy and we consider that it failed
1227                     return (size_t)-1;
1228                 }
1229             }
1230         }
1231
1232         // see the comment above for the reason of "len - 1"
1233         return len - 1;
1234     }
1235
1236     bool IsOk() const { return m_CodePage != -1; }
1237
1238 private:
1239     static bool CanUseNoBestFit()
1240     {
1241         static int s_isWin98Or2k = -1;
1242
1243         if ( s_isWin98Or2k == -1 )
1244         {
1245             int verMaj, verMin;
1246             switch ( wxGetOsVersion(&verMaj, &verMin) )
1247             {
1248                 case wxWIN95:
1249                     s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1250                     break;
1251
1252                 case wxWINDOWS_NT:
1253                     s_isWin98Or2k = verMaj >= 5;
1254                     break;
1255
1256                 default:
1257                     // unknown, be conseravtive by default
1258                     s_isWin98Or2k = 0;
1259             }
1260
1261             wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1262         }
1263
1264         return s_isWin98Or2k == 1;
1265     }
1266
1267     long m_CodePage;
1268 };
1269
1270 #endif // wxHAVE_WIN32_MB2WC
1271
1272 // ============================================================================
1273 // Mac conversion classes
1274 // ============================================================================
1275
1276 #if defined(__WXMAC__) && defined(TARGET_CARBON)
1277
1278 class wxMBConv_mac : public wxMBConv
1279 {
1280 public:
1281     wxMBConv_mac()
1282     {
1283         Init(CFStringGetSystemEncoding()) ;
1284     }
1285
1286     wxMBConv_mac(const wxChar* name)
1287     {
1288         Init( wxMacGetSystemEncFromFontEnc(wxFontMapper::Get()->CharsetToEncoding(name, FALSE) ) ) ;
1289     }
1290
1291     wxMBConv_mac(wxFontEncoding encoding)
1292     {
1293         Init( wxMacGetSystemEncFromFontEnc(encoding) );
1294     }
1295
1296         ~wxMBConv_mac()
1297         {
1298             OSStatus status = noErr ;
1299             status = TECDisposeConverter(m_MB2WC_converter);
1300             status = TECDisposeConverter(m_WC2MB_converter);
1301         }
1302
1303
1304         void Init( TextEncodingBase encoding)
1305         {
1306             OSStatus status = noErr ;
1307                 m_char_encoding = encoding ;
1308                 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
1309
1310             status = TECCreateConverter(&m_MB2WC_converter,
1311                                         m_char_encoding,
1312                                         m_unicode_encoding);
1313             status = TECCreateConverter(&m_WC2MB_converter,
1314                                         m_unicode_encoding,
1315                                         m_char_encoding);
1316         }
1317
1318     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1319     {
1320             OSStatus status = noErr ;
1321             ByteCount byteOutLen ;
1322             ByteCount byteInLen = strlen(psz) ;
1323                 wchar_t *tbuf = NULL ;
1324                 UniChar* ubuf = NULL ;
1325                 size_t res = 0 ;
1326
1327                 if (buf == NULL)
1328                 {
1329                         n = byteInLen ;
1330                         tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
1331                 }
1332             ByteCount byteBufferLen = n * sizeof( UniChar ) ;
1333 #if SIZEOF_WCHAR_T == 4
1334                 ubuf = (UniChar*) malloc( byteBufferLen ) ;
1335 #else
1336                 ubuf = (UniChar*) (buf ? buf : tbuf) ;
1337 #endif
1338             status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
1339               (TextPtr) ubuf , byteBufferLen, &byteOutLen);
1340 #if SIZEOF_WCHAR_T == 4
1341                 wxMBConvUTF16BE converter ;
1342                 res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
1343                 free( ubuf ) ;
1344 #else
1345                 res = byteOutLen / sizeof( UniChar ) ;
1346 #endif
1347                 if ( buf == NULL )
1348                         free(tbuf) ;
1349
1350         if ( buf  && res < n)
1351             buf[res] = 0;
1352
1353                 return res ;
1354     }
1355
1356     size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
1357     {
1358             OSStatus status = noErr ;
1359             ByteCount byteOutLen ;
1360             ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
1361
1362                 char *tbuf = NULL ;
1363
1364                 if (buf == NULL)
1365                 {
1366                         // worst case
1367                         n = byteInLen * 2 ;
1368                         tbuf = (char*) malloc( n ) ;
1369                 }
1370
1371             ByteCount byteBufferLen = n ;
1372                 UniChar* ubuf = NULL ;
1373 #if SIZEOF_WCHAR_T == 4
1374                 wxMBConvUTF16BE converter ;
1375                 size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
1376                 byteBufferLen = unicharlen ;
1377                 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
1378                 converter.WC2MB( (char*) ubuf , psz, unicharlen ) ;
1379 #else
1380                 ubuf = (UniChar*) psz ;
1381 #endif
1382             status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
1383                (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
1384 #if SIZEOF_WCHAR_T == 4
1385                 free( ubuf ) ;
1386 #endif
1387                 if ( buf == NULL )
1388                         free(tbuf) ;
1389
1390                 size_t res = byteOutLen ;
1391         if ( buf  && res < n)
1392             buf[res] = 0;
1393
1394                 return res ;
1395     }
1396
1397     bool IsOk() const
1398         { return m_MB2WC_converter !=  NULL && m_WC2MB_converter != NULL  ; }
1399
1400 private:
1401         TECObjectRef m_MB2WC_converter ;
1402         TECObjectRef m_WC2MB_converter ;
1403
1404         TextEncodingBase m_char_encoding ;
1405         TextEncodingBase m_unicode_encoding ;
1406 };
1407
1408 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
1409
1410 // ============================================================================
1411 // wxEncodingConverter based conversion classes
1412 // ============================================================================
1413
1414 #if wxUSE_FONTMAP
1415
1416 class wxMBConv_wxwin : public wxMBConv
1417 {
1418 private:
1419     void Init()
1420     {
1421         m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
1422                w2m.Init(wxFONTENCODING_UNICODE, m_enc);
1423     }
1424
1425 public:
1426     // temporarily just use wxEncodingConverter stuff,
1427     // so that it works while a better implementation is built
1428     wxMBConv_wxwin(const wxChar* name)
1429     {
1430         if (name)
1431             m_enc = wxFontMapper::Get()->CharsetToEncoding(name, false);
1432         else
1433             m_enc = wxFONTENCODING_SYSTEM;
1434
1435         Init();
1436     }
1437
1438     wxMBConv_wxwin(wxFontEncoding enc)
1439     {
1440         m_enc = enc;
1441
1442         Init();
1443     }
1444
1445     size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
1446     {
1447         size_t inbuf = strlen(psz);
1448         if (buf)
1449             m2w.Convert(psz,buf);
1450         return inbuf;
1451     }
1452
1453     size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
1454     {
1455         const size_t inbuf = wxWcslen(psz);
1456         if (buf)
1457             w2m.Convert(psz,buf);
1458
1459         return inbuf;
1460     }
1461
1462     bool IsOk() const { return m_ok; }
1463
1464 public:
1465     wxFontEncoding m_enc;
1466     wxEncodingConverter m2w, w2m;
1467
1468     // were we initialized successfully?
1469     bool m_ok;
1470
1471     DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
1472 };
1473
1474 #endif // wxUSE_FONTMAP
1475
1476 // ============================================================================
1477 // wxCSConv implementation
1478 // ============================================================================
1479
1480 void wxCSConv::Init()
1481 {
1482     m_name = NULL;
1483     m_convReal =  NULL;
1484     m_deferred = true;
1485 }
1486
1487 wxCSConv::wxCSConv(const wxChar *charset)
1488 {
1489     Init();
1490
1491     if ( charset )
1492     {
1493         SetName(charset);
1494     }
1495
1496     m_encoding = wxFONTENCODING_SYSTEM;
1497 }
1498
1499 wxCSConv::wxCSConv(wxFontEncoding encoding)
1500 {
1501     if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
1502     {
1503         wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
1504
1505         encoding = wxFONTENCODING_SYSTEM;
1506     }
1507
1508     Init();
1509
1510     m_encoding = encoding;
1511 }
1512
1513 wxCSConv::~wxCSConv()
1514 {
1515     Clear();
1516 }
1517
1518 wxCSConv::wxCSConv(const wxCSConv& conv)
1519         : wxMBConv()
1520 {
1521     Init();
1522
1523     SetName(conv.m_name);
1524     m_encoding = conv.m_encoding;
1525 }
1526
1527 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
1528 {
1529     Clear();
1530
1531     SetName(conv.m_name);
1532     m_encoding = conv.m_encoding;
1533
1534     return *this;
1535 }
1536
1537 void wxCSConv::Clear()
1538 {
1539     free(m_name);
1540     delete m_convReal;
1541
1542     m_name = NULL;
1543     m_convReal = NULL;
1544 }
1545
1546 void wxCSConv::SetName(const wxChar *charset)
1547 {
1548     if (charset)
1549     {
1550         m_name = wxStrdup(charset);
1551         m_deferred = true;
1552     }
1553 }
1554
1555 wxMBConv *wxCSConv::DoCreate() const
1556 {
1557     // check for the special case of ASCII or ISO8859-1 charset: as we have
1558     // special knowledge of it anyhow, we don't need to create a special
1559     // conversion object
1560     if ( m_encoding == wxFONTENCODING_ISO8859_1 )
1561     {
1562         // don't convert at all
1563         return NULL;
1564     }
1565
1566     // we trust OS to do conversion better than we can so try external
1567     // conversion methods first
1568     //
1569     // the full order is:
1570     //      1. OS conversion (iconv() under Unix or Win32 API)
1571     //      2. hard coded conversions for UTF
1572     //      3. wxEncodingConverter as fall back
1573
1574     // step (1)
1575 #ifdef HAVE_ICONV
1576 #if !wxUSE_FONTMAP
1577     if ( m_name )
1578 #endif // !wxUSE_FONTMAP
1579     {
1580         wxString name(m_name);
1581
1582 #if wxUSE_FONTMAP
1583         if ( name.empty() )
1584             name = wxFontMapper::Get()->GetEncodingName(m_encoding);
1585 #endif // wxUSE_FONTMAP
1586
1587         wxMBConv_iconv *conv = new wxMBConv_iconv(name);
1588         if ( conv->IsOk() )
1589             return conv;
1590
1591         delete conv;
1592     }
1593 #endif // HAVE_ICONV
1594
1595 #ifdef wxHAVE_WIN32_MB2WC
1596     {
1597         wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
1598                                       : new wxMBConv_win32(m_encoding);
1599         if ( conv->IsOk() )
1600             return conv;
1601
1602         delete conv;
1603     }
1604 #endif // wxHAVE_WIN32_MB2WC
1605 #if defined(__WXMAC__)
1606     {
1607         if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ) )
1608         {
1609
1610                 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
1611                                             : new wxMBConv_mac(m_encoding);
1612                 if ( conv->IsOk() )
1613                     return conv;
1614
1615                 delete conv;
1616         }
1617     }
1618 #endif
1619     // step (2)
1620     wxFontEncoding enc = m_encoding;
1621 #if wxUSE_FONTMAP
1622     if ( enc == wxFONTENCODING_SYSTEM && m_name )
1623     {
1624         // use "false" to suppress interactive dialogs -- we can be called from
1625         // anywhere and popping up a dialog from here is the last thing we want to
1626         // do
1627         enc = wxFontMapper::Get()->CharsetToEncoding(m_name, false);
1628     }
1629 #endif // wxUSE_FONTMAP
1630
1631     switch ( enc )
1632     {
1633         case wxFONTENCODING_UTF7:
1634              return new wxMBConvUTF7;
1635
1636         case wxFONTENCODING_UTF8:
1637              return new wxMBConvUTF8;
1638
1639         case wxFONTENCODING_UTF16BE:
1640              return new wxMBConvUTF16BE;
1641
1642         case wxFONTENCODING_UTF16LE:
1643              return new wxMBConvUTF16LE;
1644
1645         case wxFONTENCODING_UTF32BE:
1646              return new wxMBConvUTF32BE;
1647
1648         case wxFONTENCODING_UTF32LE:
1649              return new wxMBConvUTF32LE;
1650
1651         default:
1652              // nothing to do but put here to suppress gcc warnings
1653              ;
1654     }
1655
1656     // step (3)
1657 #if wxUSE_FONTMAP
1658     {
1659         wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
1660                                       : new wxMBConv_wxwin(m_encoding);
1661         if ( conv->IsOk() )
1662             return conv;
1663
1664         delete conv;
1665     }
1666 #endif // wxUSE_FONTMAP
1667
1668     // NB: This is a hack to prevent deadlock. What could otherwise happen
1669     //     in Unicode build: wxConvLocal creation ends up being here
1670     //     because of some failure and logs the error. But wxLog will try to
1671     //     attach timestamp, for which it will need wxConvLocal (to convert
1672     //     time to char* and then wchar_t*), but that fails, tries to log
1673     //     error, but wxLog has a (already locked) critical section that
1674     //     guards static buffer.
1675     static bool alreadyLoggingError = false;
1676     if (!alreadyLoggingError)
1677     {
1678         alreadyLoggingError = true;
1679         wxLogError(_("Cannot convert from the charset '%s'!"),
1680                    m_name ? m_name
1681                       :
1682 #if wxUSE_FONTMAP
1683                          wxFontMapper::GetEncodingDescription(m_encoding).c_str()
1684 #else // !wxUSE_FONTMAP
1685                          wxString::Format(_("encoding %s"), m_encoding).c_str()
1686 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1687               );
1688         alreadyLoggingError = false;
1689     }
1690
1691     return NULL;
1692 }
1693
1694 void wxCSConv::CreateConvIfNeeded() const
1695 {
1696     if ( m_deferred )
1697     {
1698         wxCSConv *self = (wxCSConv *)this; // const_cast
1699
1700 #if wxUSE_INTL
1701         // if we don't have neither the name nor the encoding, use the default
1702         // encoding for this system
1703         if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
1704         {
1705             self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
1706         }
1707 #endif // wxUSE_INTL
1708
1709         self->m_convReal = DoCreate();
1710         self->m_deferred = false;
1711     }
1712 }
1713
1714 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1715 {
1716     CreateConvIfNeeded();
1717
1718     if (m_convReal)
1719         return m_convReal->MB2WC(buf, psz, n);
1720
1721     // latin-1 (direct)
1722     size_t len = strlen(psz);
1723
1724     if (buf)
1725     {
1726         for (size_t c = 0; c <= len; c++)
1727             buf[c] = (unsigned char)(psz[c]);
1728     }
1729
1730     return len;
1731 }
1732
1733 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1734 {
1735     CreateConvIfNeeded();
1736
1737     if (m_convReal)
1738         return m_convReal->WC2MB(buf, psz, n);
1739
1740     // latin-1 (direct)
1741     const size_t len = wxWcslen(psz);
1742     if (buf)
1743     {
1744         for (size_t c = 0; c <= len; c++)
1745         {
1746             if (psz[c] > 0xFF)
1747                 return (size_t)-1;
1748             buf[c] = psz[c];
1749         }
1750     }
1751     else
1752     {
1753         for (size_t c = 0; c <= len; c++)
1754         {
1755             if (psz[c] > 0xFF)
1756                 return (size_t)-1;
1757         }
1758     }
1759
1760     return len;
1761 }
1762
1763 // ----------------------------------------------------------------------------
1764 // globals
1765 // ----------------------------------------------------------------------------
1766
1767 #ifdef __WINDOWS__
1768     static wxMBConv_win32 wxConvLibcObj;
1769 #else
1770     static wxMBConvLibc wxConvLibcObj;
1771 #endif
1772
1773 static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
1774 static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
1775 static wxMBConvUTF7 wxConvUTF7Obj;
1776 static wxMBConvUTF8 wxConvUTF8Obj;
1777
1778
1779 WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
1780 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
1781 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
1782 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
1783 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
1784 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
1785
1786 #else // !wxUSE_WCHAR_T
1787
1788 // stand-ins in absence of wchar_t
1789 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
1790                                 wxConvISO8859_1,
1791                                 wxConvLocal,
1792                                 wxConvUTF8;
1793
1794 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
1795
1796