src/common/strconv.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        strconv.cpp
   3 // Purpose:     Unicode conversion classes
   4 // Author:      Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik
   5 // Modified by:
   6 // Created:     29/01/98
   7 // RCS-ID:      $Id$
   8 // Copyright:   (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
   9 //              (c) 2000-2003 Vadim Zeitlin
  10 // Licence:     wxWindows licence
  11 /////////////////////////////////////////////////////////////////////////////
  12
  13 // ============================================================================
  14 // declarations
  15 // ============================================================================
  16
  17 // ----------------------------------------------------------------------------
  18 // headers
  19 // ----------------------------------------------------------------------------
  20
  21 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
  22   #pragma implementation "strconv.h"
  23 #endif
  24
  25 // For compilers that support precompilation, includes "wx.h".
  26 #include "wx/wxprec.h"
  27
  28 #ifdef __BORLANDC__
  29   #pragma hdrstop
  30 #endif
  31
  32 #ifndef WX_PRECOMP
  33     #include "wx/intl.h"
  34     #include "wx/log.h"
  35 #endif // WX_PRECOMP
  36
  37 #include "wx/strconv.h"
  38
  39 #if wxUSE_WCHAR_T
  40
  41 #ifdef __WXMSW__
  42     #include "wx/msw/private.h"
  43     #include "wx/msw/missing.h"
  44 #endif
  45
  46 #ifndef __WXWINCE__
  47 #include <errno.h>
  48 #endif
  49
  50 #include <ctype.h>
  51 #include <string.h>
  52 #include <stdlib.h>
  53
  54 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
  55     #define wxHAVE_WIN32_MB2WC
  56 #endif // __WIN32__ but !__WXMICROWIN__
  57
  58 // ----------------------------------------------------------------------------
  59 // headers
  60 // ----------------------------------------------------------------------------
  61
  62 #ifdef __SALFORDC__
  63     #include <clib.h>
  64 #endif
  65
  66 #ifdef HAVE_ICONV
  67     #include <iconv.h>
  68 #endif
  69
  70 #include "wx/encconv.h"
  71 #include "wx/fontmap.h"
  72
  73 #ifdef __WXMAC__
  74 #include <ATSUnicode.h>
  75 #include <TextCommon.h>
  76 #include <TextEncodingConverter.h>
  77
  78 #include  "wx/mac/private.h"  // includes mac headers
  79 #endif
  80 // ----------------------------------------------------------------------------
  81 // macros
  82 // ----------------------------------------------------------------------------
  83
  84 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
  85 #define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
  86
  87 #if SIZEOF_WCHAR_T == 4
  88     #define WC_NAME         "UCS4"
  89     #define WC_BSWAP         BSWAP_UCS4
  90     #ifdef WORDS_BIGENDIAN
  91       #define WC_NAME_BEST  "UCS-4BE"
  92     #else
  93       #define WC_NAME_BEST  "UCS-4LE"
  94     #endif
  95 #elif SIZEOF_WCHAR_T == 2
  96     #define WC_NAME         "UTF16"
  97     #define WC_BSWAP         BSWAP_UTF16
  98     #define WC_UTF16
  99     #ifdef WORDS_BIGENDIAN
 100       #define WC_NAME_BEST  "UTF-16BE"
 101     #else
 102       #define WC_NAME_BEST  "UTF-16LE"
 103     #endif
 104 #else // sizeof(wchar_t) != 2 nor 4
 105     // does this ever happen?
 106     #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
 107 #endif
 108
 109 // ============================================================================
 110 // implementation
 111 // ============================================================================
 112
 113 // ----------------------------------------------------------------------------
 114 // UTF-16 en/decoding to/from UCS-4
 115 // ----------------------------------------------------------------------------
 116
 117
 118 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
 119 {
 120     if (input<=0xffff)
 121     {
 122         if (output)
 123             *output = (wxUint16) input;
 124         return 1;
 125     }
 126     else if (input>=0x110000)
 127     {
 128         return (size_t)-1;
 129     }
 130     else
 131     {
 132         if (output)
 133         {
 134             *output++ = (wxUint16) ((input >> 10)+0xd7c0);
 135             *output = (wxUint16) ((input&0x3ff)+0xdc00);
 136         }
 137         return 2;
 138     }
 139 }
 140
 141 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
 142 {
 143     if ((*input<0xd800) || (*input>0xdfff))
 144     {
 145         output = *input;
 146         return 1;
 147     }
 148     else if ((input[1]<0xdc00) || (input[1]>=0xdfff))
 149     {
 150         output = *input;
 151         return (size_t)-1;
 152     }
 153     else
 154     {
 155         output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
 156         return 2;
 157     }
 158 }
 159
 160
 161 // ----------------------------------------------------------------------------
 162 // wxMBConv
 163 // ----------------------------------------------------------------------------
 164
 165 wxMBConv::~wxMBConv()
 166 {
 167     // nothing to do here
 168 }
 169
 170 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
 171 {
 172     if ( psz )
 173     {
 174         // calculate the length of the buffer needed first
 175         size_t nLen = MB2WC(NULL, psz, 0);
 176         if ( nLen != (size_t)-1 )
 177         {
 178             // now do the actual conversion
 179             wxWCharBuffer buf(nLen);
 180             MB2WC(buf.data(), psz, nLen + 1); // with the trailing NUL
 181
 182             return buf;
 183         }
 184     }
 185
 186     wxWCharBuffer buf((wchar_t *)NULL);
 187
 188     return buf;
 189 }
 190
 191 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
 192 {
 193     if ( pwz )
 194     {
 195         size_t nLen = WC2MB(NULL, pwz, 0);
 196         if ( nLen != (size_t)-1 )
 197         {
 198             wxCharBuffer buf(nLen+3);       // space for a wxUint32 trailing zero
 199             WC2MB(buf.data(), pwz, nLen + 4);
 200
 201             return buf;
 202         }
 203     }
 204
 205     wxCharBuffer buf((char *)NULL);
 206
 207     return buf;
 208 }
 209
 210 // ----------------------------------------------------------------------------
 211 // wxMBConvLibc
 212 // ----------------------------------------------------------------------------
 213
 214 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 215 {
 216     return wxMB2WC(buf, psz, n);
 217 }
 218
 219 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 220 {
 221     return wxWC2MB(buf, psz, n);
 222 }
 223
 224 // ----------------------------------------------------------------------------
 225 // UTF-7
 226 // ----------------------------------------------------------------------------
 227
 228 #if 0
 229 static char utf7_setD[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
 230                         "abcdefghijklmnopqrstuvwxyz"
 231                         "0123456789'(),-./:?";
 232 static char utf7_setO[]="!\"#$%&*;<=>@[]^_`{|}";
 233 static char utf7_setB[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
 234                         "abcdefghijklmnopqrstuvwxyz"
 235                         "0123456789+/";
 236 #endif
 237
 238 // TODO: write actual implementations of UTF-7 here
 239 size_t wxMBConvUTF7::MB2WC(wchar_t * WXUNUSED(buf),
 240                            const char * WXUNUSED(psz),
 241                            size_t WXUNUSED(n)) const
 242 {
 243   return 0;
 244 }
 245
 246 size_t wxMBConvUTF7::WC2MB(char * WXUNUSED(buf),
 247                            const wchar_t * WXUNUSED(psz),
 248                            size_t WXUNUSED(n)) const
 249 {
 250   return 0;
 251 }
 252
 253 // ----------------------------------------------------------------------------
 254 // UTF-8
 255 // ----------------------------------------------------------------------------
 256
 257 static wxUint32 utf8_max[]=
 258     { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
 259
 260 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 261 {
 262     size_t len = 0;
 263
 264     while (*psz && ((!buf) || (len < n)))
 265     {
 266         unsigned char cc = *psz++, fc = cc;
 267         unsigned cnt;
 268         for (cnt = 0; fc & 0x80; cnt++)
 269             fc <<= 1;
 270         if (!cnt)
 271         {
 272             // plain ASCII char
 273             if (buf)
 274                 *buf++ = cc;
 275             len++;
 276         }
 277         else
 278         {
 279             cnt--;
 280             if (!cnt)
 281             {
 282                 // invalid UTF-8 sequence
 283                 return (size_t)-1;
 284             }
 285             else
 286             {
 287                 unsigned ocnt = cnt - 1;
 288                 wxUint32 res = cc & (0x3f >> cnt);
 289                 while (cnt--)
 290                 {
 291                     cc = *psz++;
 292                     if ((cc & 0xC0) != 0x80)
 293                     {
 294                         // invalid UTF-8 sequence
 295                         return (size_t)-1;
 296                     }
 297                     res = (res << 6) | (cc & 0x3f);
 298                 }
 299                 if (res <= utf8_max[ocnt])
 300                 {
 301                     // illegal UTF-8 encoding
 302                     return (size_t)-1;
 303                 }
 304 #ifdef WC_UTF16
 305                 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 306                 size_t pa = encode_utf16(res, (wxUint16 *)buf);
 307                 if (pa == (size_t)-1)
 308                   return (size_t)-1;
 309                 if (buf)
 310                     buf += pa;
 311                 len += pa;
 312 #else // !WC_UTF16
 313                 if (buf)
 314                     *buf++ = res;
 315                 len++;
 316 #endif // WC_UTF16/!WC_UTF16
 317             }
 318         }
 319     }
 320     if (buf && (len < n))
 321         *buf = 0;
 322     return len;
 323 }
 324
 325 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 326 {
 327     size_t len = 0;
 328
 329     while (*psz && ((!buf) || (len < n)))
 330     {
 331         wxUint32 cc;
 332 #ifdef WC_UTF16
 333         // cast is ok for WC_UTF16
 334         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
 335         psz += (pa == (size_t)-1) ? 1 : pa;
 336 #else
 337         cc=(*psz++) & 0x7fffffff;
 338 #endif
 339         unsigned cnt;
 340         for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
 341         if (!cnt)
 342         {
 343             // plain ASCII char
 344             if (buf)
 345                 *buf++ = (char) cc;
 346             len++;
 347         }
 348
 349         else
 350         {
 351             len += cnt + 1;
 352             if (buf)
 353             {
 354                 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
 355                 while (cnt--)
 356                     *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
 357             }
 358         }
 359     }
 360
 361     if (buf && (len<n)) *buf = 0;
 362
 363     return len;
 364 }
 365
 366
 367
 368
 369 // ----------------------------------------------------------------------------
 370 // UTF-16
 371 // ----------------------------------------------------------------------------
 372
 373 #ifdef WORDS_BIGENDIAN
 374     #define wxMBConvUTF16straight wxMBConvUTF16BE
 375     #define wxMBConvUTF16swap     wxMBConvUTF16LE
 376 #else
 377     #define wxMBConvUTF16swap     wxMBConvUTF16BE
 378     #define wxMBConvUTF16straight wxMBConvUTF16LE
 379 #endif
 380
 381
 382 #ifdef WC_UTF16
 383
 384 // copy 16bit MB to 16bit String
 385 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 386 {
 387     size_t len=0;
 388
 389     while (*(wxUint16*)psz && (!buf || len < n))
 390     {
 391         if (buf)
 392             *buf++ = *(wxUint16*)psz;
 393         len++;
 394
 395         psz += sizeof(wxUint16);
 396     }
 397     if (buf && len<n)   *buf=0;
 398
 399     return len;
 400 }
 401
 402
 403 // copy 16bit String to 16bit MB
 404 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 405 {
 406     size_t len=0;
 407
 408     while (*psz && (!buf || len < n))
 409     {
 410         if (buf)
 411         {
 412             *(wxUint16*)buf = *psz;
 413             buf += sizeof(wxUint16);
 414         }
 415         len += sizeof(wxUint16);
 416         psz++;
 417     }
 418     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 419
 420     return len;
 421 }
 422
 423
 424 // swap 16bit MB to 16bit String
 425 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 426 {
 427     size_t len=0;
 428
 429     while (*(wxUint16*)psz && (!buf || len < n))
 430     {
 431         if (buf)
 432         {
 433             ((char *)buf)[0] = psz[1];
 434             ((char *)buf)[1] = psz[0];
 435             buf++;
 436         }
 437         len++;
 438         psz += sizeof(wxUint16);
 439     }
 440     if (buf && len<n)   *buf=0;
 441
 442     return len;
 443 }
 444
 445
 446 // swap 16bit MB to 16bit String
 447 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 448 {
 449     size_t len=0;
 450
 451     while (*psz && (!buf || len < n))
 452     {
 453         if (buf)
 454         {
 455             *buf++ = ((char*)psz)[1];
 456             *buf++ = ((char*)psz)[0];
 457         }
 458         len += sizeof(wxUint16);
 459         psz++;
 460     }
 461     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 462
 463     return len;
 464 }
 465
 466
 467 #else // WC_UTF16
 468
 469
 470 // copy 16bit MB to 32bit String
 471 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 472 {
 473     size_t len=0;
 474
 475     while (*(wxUint16*)psz && (!buf || len < n))
 476     {
 477         wxUint32 cc;
 478         size_t pa=decode_utf16((wxUint16*)psz, cc);
 479         if (pa == (size_t)-1)
 480             return pa;
 481
 482         if (buf)
 483             *buf++ = cc;
 484         len++;
 485         psz += pa * sizeof(wxUint16);
 486     }
 487     if (buf && len<n)   *buf=0;
 488
 489     return len;
 490 }
 491
 492
 493 // copy 32bit String to 16bit MB
 494 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 495 {
 496     size_t len=0;
 497
 498     while (*psz && (!buf || len < n))
 499     {
 500         wxUint16 cc[2];
 501         size_t pa=encode_utf16(*psz, cc);
 502
 503         if (pa == (size_t)-1)
 504             return pa;
 505
 506         if (buf)
 507         {
 508             *(wxUint16*)buf = cc[0];
 509             buf += sizeof(wxUint16);
 510             if (pa > 1)
 511             {
 512                 *(wxUint16*)buf = cc[1];
 513                 buf += sizeof(wxUint16);
 514             }
 515         }
 516
 517         len += pa*sizeof(wxUint16);
 518         psz++;
 519     }
 520     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 521
 522     return len;
 523 }
 524
 525
 526 // swap 16bit MB to 32bit String
 527 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 528 {
 529     size_t len=0;
 530
 531     while (*(wxUint16*)psz && (!buf || len < n))
 532     {
 533         wxUint32 cc;
 534         char tmp[4];
 535         tmp[0]=psz[1];  tmp[1]=psz[0];
 536         tmp[2]=psz[3];  tmp[3]=psz[2];
 537
 538         size_t pa=decode_utf16((wxUint16*)tmp, cc);
 539         if (pa == (size_t)-1)
 540             return pa;
 541
 542         if (buf)
 543             *buf++ = cc;
 544
 545         len++;
 546         psz += pa * sizeof(wxUint16);
 547     }
 548     if (buf && len<n)   *buf=0;
 549
 550     return len;
 551 }
 552
 553
 554 // swap 32bit String to 16bit MB
 555 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 556 {
 557     size_t len=0;
 558
 559     while (*psz && (!buf || len < n))
 560     {
 561         wxUint16 cc[2];
 562         size_t pa=encode_utf16(*psz, cc);
 563
 564         if (pa == (size_t)-1)
 565             return pa;
 566
 567         if (buf)
 568         {
 569             *buf++ = ((char*)cc)[1];
 570             *buf++ = ((char*)cc)[0];
 571             if (pa > 1)
 572             {
 573                 *buf++ = ((char*)cc)[3];
 574                 *buf++ = ((char*)cc)[2];
 575             }
 576         }
 577
 578         len += pa*sizeof(wxUint16);
 579         psz++;
 580     }
 581     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 582
 583     return len;
 584 }
 585
 586 #endif // WC_UTF16
 587
 588
 589 // ----------------------------------------------------------------------------
 590 // UTF-32
 591 // ----------------------------------------------------------------------------
 592
 593 #ifdef WORDS_BIGENDIAN
 594 #define wxMBConvUTF32straight  wxMBConvUTF32BE
 595 #define wxMBConvUTF32swap      wxMBConvUTF32LE
 596 #else
 597 #define wxMBConvUTF32swap      wxMBConvUTF32BE
 598 #define wxMBConvUTF32straight  wxMBConvUTF32LE
 599 #endif
 600
 601
 602 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
 603 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
 604
 605
 606 #ifdef WC_UTF16
 607
 608 // copy 32bit MB to 16bit String
 609 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 610 {
 611     size_t len=0;
 612
 613     while (*(wxUint32*)psz && (!buf || len < n))
 614     {
 615         wxUint16 cc[2];
 616
 617         size_t pa=encode_utf16(*(wxUint32*)psz, cc);
 618         if (pa == (size_t)-1)
 619             return pa;
 620
 621         if (buf)
 622         {
 623             *buf++ = cc[0];
 624             if (pa > 1)
 625                 *buf++ = cc[1];
 626         }
 627         len += pa;
 628         psz += sizeof(wxUint32);
 629     }
 630     if (buf && len<n)   *buf=0;
 631
 632     return len;
 633 }
 634
 635
 636 // copy 16bit String to 32bit MB
 637 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 638 {
 639     size_t len=0;
 640
 641     while (*psz && (!buf || len < n))
 642     {
 643         wxUint32 cc;
 644
 645         // cast is ok for WC_UTF16
 646         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
 647         if (pa == (size_t)-1)
 648             return pa;
 649
 650         if (buf)
 651         {
 652             *(wxUint32*)buf = cc;
 653             buf += sizeof(wxUint32);
 654         }
 655         len += sizeof(wxUint32);
 656         psz += pa;
 657     }
 658
 659     if (buf && len<=n-sizeof(wxUint32))
 660         *(wxUint32*)buf=0;
 661
 662     return len;
 663 }
 664
 665
 666
 667 // swap 32bit MB to 16bit String
 668 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 669 {
 670     size_t len=0;
 671
 672     while (*(wxUint32*)psz && (!buf || len < n))
 673     {
 674         char tmp[4];
 675         tmp[0] = psz[3];   tmp[1] = psz[2];
 676         tmp[2] = psz[1];   tmp[3] = psz[0];
 677
 678
 679         wxUint16 cc[2];
 680
 681         size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
 682         if (pa == (size_t)-1)
 683             return pa;
 684
 685         if (buf)
 686         {
 687             *buf++ = cc[0];
 688             if (pa > 1)
 689                 *buf++ = cc[1];
 690         }
 691         len += pa;
 692         psz += sizeof(wxUint32);
 693     }
 694
 695     if (buf && len<n)
 696         *buf=0;
 697
 698     return len;
 699 }
 700
 701
 702 // swap 16bit String to 32bit MB
 703 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 704 {
 705     size_t len=0;
 706
 707     while (*psz && (!buf || len < n))
 708     {
 709         char cc[4];
 710
 711         // cast is ok for WC_UTF16
 712         size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
 713         if (pa == (size_t)-1)
 714             return pa;
 715
 716         if (buf)
 717         {
 718             *buf++ = cc[3];
 719             *buf++ = cc[2];
 720             *buf++ = cc[1];
 721             *buf++ = cc[0];
 722         }
 723         len += sizeof(wxUint32);
 724         psz += pa;
 725     }
 726
 727     if (buf && len<=n-sizeof(wxUint32))
 728         *(wxUint32*)buf=0;
 729
 730     return len;
 731 }
 732
 733 #else // WC_UTF16
 734
 735
 736 // copy 32bit MB to 32bit String
 737 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 738 {
 739     size_t len=0;
 740
 741     while (*(wxUint32*)psz && (!buf || len < n))
 742     {
 743         if (buf)
 744             *buf++ = *(wxUint32*)psz;
 745         len++;
 746         psz += sizeof(wxUint32);
 747     }
 748
 749     if (buf && len<n)
 750         *buf=0;
 751
 752     return len;
 753 }
 754
 755
 756 // copy 32bit String to 32bit MB
 757 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 758 {
 759     size_t len=0;
 760
 761     while (*psz && (!buf || len < n))
 762     {
 763         if (buf)
 764         {
 765             *(wxUint32*)buf = *psz;
 766             buf += sizeof(wxUint32);
 767         }
 768
 769         len += sizeof(wxUint32);
 770         psz++;
 771     }
 772
 773     if (buf && len<=n-sizeof(wxUint32))
 774         *(wxUint32*)buf=0;
 775
 776     return len;
 777 }
 778
 779
 780 // swap 32bit MB to 32bit String
 781 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 782 {
 783     size_t len=0;
 784
 785     while (*(wxUint32*)psz && (!buf || len < n))
 786     {
 787         if (buf)
 788         {
 789             ((char *)buf)[0] = psz[3];
 790             ((char *)buf)[1] = psz[2];
 791             ((char *)buf)[2] = psz[1];
 792             ((char *)buf)[3] = psz[0];
 793             buf++;
 794         }
 795         len++;
 796         psz += sizeof(wxUint32);
 797     }
 798
 799     if (buf && len<n)
 800         *buf=0;
 801
 802     return len;
 803 }
 804
 805
 806 // swap 32bit String to 32bit MB
 807 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 808 {
 809     size_t len=0;
 810
 811     while (*psz && (!buf || len < n))
 812     {
 813         if (buf)
 814         {
 815             *buf++ = ((char *)psz)[3];
 816             *buf++ = ((char *)psz)[2];
 817             *buf++ = ((char *)psz)[1];
 818             *buf++ = ((char *)psz)[0];
 819         }
 820         len += sizeof(wxUint32);
 821         psz++;
 822     }
 823
 824     if (buf && len<=n-sizeof(wxUint32))
 825         *(wxUint32*)buf=0;
 826
 827     return len;
 828 }
 829
 830
 831 #endif // WC_UTF16
 832
 833
 834 // ============================================================================
 835 // The classes doing conversion using the iconv_xxx() functions
 836 // ============================================================================
 837
 838 #ifdef HAVE_ICONV
 839
 840 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with E2BIG
 841 //     if output buffer is _exactly_ as big as needed. Such case is (unless there's
 842 //     yet another bug in glibc) the only case when iconv() returns with (size_t)-1
 843 //     (which means error) and says there are 0 bytes left in the input buffer --
 844 //     when _real_ error occurs, bytes-left-in-input buffer is non-zero. Hence,
 845 //     this alternative test for iconv() failure.
 846 //     [This bug does not appear in glibc 2.2.]
 847 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
 848 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
 849                                      (errno != E2BIG || bufLeft != 0))
 850 #else
 851 #define ICONV_FAILED(cres, bufLeft)  (cres == (size_t)-1)
 852 #endif
 853
 854 #define ICONV_CHAR_CAST(x)  ((ICONV_CONST char **)(x))
 855
 856 // ----------------------------------------------------------------------------
 857 // wxMBConv_iconv: encapsulates an iconv character set
 858 // ----------------------------------------------------------------------------
 859
 860 class wxMBConv_iconv : public wxMBConv
 861 {
 862 public:
 863     wxMBConv_iconv(const wxChar *name);
 864     virtual ~wxMBConv_iconv();
 865
 866     virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
 867     virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
 868
 869     bool IsOk() const
 870         { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
 871
 872 protected:
 873     // the iconv handlers used to translate from multibyte to wide char and in
 874     // the other direction
 875     iconv_t m2w,
 876             w2m;
 877
 878 private:
 879     // the name (for iconv_open()) of a wide char charset -- if none is
 880     // available on this machine, it will remain NULL
 881     static const char *ms_wcCharsetName;
 882
 883     // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
 884     // different endian-ness than the native one
 885     static bool ms_wcNeedsSwap;
 886 };
 887
 888 const char *wxMBConv_iconv::ms_wcCharsetName = NULL;
 889 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
 890
 891 wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
 892 {
 893     // Do it the hard way
 894     char cname[100];
 895     for (size_t i = 0; i < wxStrlen(name)+1; i++)
 896         cname[i] = (char) name[i];
 897
 898     // check for charset that represents wchar_t:
 899     if (ms_wcCharsetName == NULL)
 900     {
 901         ms_wcNeedsSwap = false;
 902
 903         // try charset with explicit bytesex info (e.g. "UCS-4LE"):
 904         ms_wcCharsetName = WC_NAME_BEST;
 905         m2w = iconv_open(ms_wcCharsetName, cname);
 906
 907         if (m2w == (iconv_t)-1)
 908         {
 909             // try charset w/o bytesex info (e.g. "UCS4")
 910             // and check for bytesex ourselves:
 911             ms_wcCharsetName = WC_NAME;
 912             m2w = iconv_open(ms_wcCharsetName, cname);
 913
 914             // last bet, try if it knows WCHAR_T pseudo-charset
 915             if (m2w == (iconv_t)-1)
 916             {
 917                 ms_wcCharsetName = "WCHAR_T";
 918                 m2w = iconv_open(ms_wcCharsetName, cname);
 919             }
 920
 921             if (m2w != (iconv_t)-1)
 922             {
 923                 char    buf[2], *bufPtr;
 924                 wchar_t wbuf[2], *wbufPtr;
 925                 size_t  insz, outsz;
 926                 size_t  res;
 927
 928                 buf[0] = 'A';
 929                 buf[1] = 0;
 930                 wbuf[0] = 0;
 931                 insz = 2;
 932                 outsz = SIZEOF_WCHAR_T * 2;
 933                 wbufPtr = wbuf;
 934                 bufPtr = buf;
 935
 936                 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
 937                             (char**)&wbufPtr, &outsz);
 938
 939                 if (ICONV_FAILED(res, insz))
 940                 {
 941                     ms_wcCharsetName = NULL;
 942                     wxLogLastError(wxT("iconv"));
 943                     wxLogError(_("Conversion to charset '%s' doesn't work."), name);
 944                 }
 945                 else
 946                 {
 947                     ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
 948                 }
 949             }
 950             else
 951             {
 952                 ms_wcCharsetName = NULL;
 953
 954                 // VS: we must not output an error here, since wxWindows will safely
 955                 //     fall back to using wxEncodingConverter.
 956                 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
 957                 //wxLogError(
 958             }
 959         }
 960         wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
 961     }
 962     else // we already have ms_wcCharsetName
 963     {
 964         m2w = iconv_open(ms_wcCharsetName, cname);
 965     }
 966
 967     // NB: don't ever pass NULL to iconv_open(), it may crash!
 968     if ( ms_wcCharsetName )
 969     {
 970         w2m = iconv_open( cname, ms_wcCharsetName);
 971     }
 972     else
 973     {
 974         w2m = (iconv_t)-1;
 975     }
 976 }
 977
 978 wxMBConv_iconv::~wxMBConv_iconv()
 979 {
 980     if ( m2w != (iconv_t)-1 )
 981         iconv_close(m2w);
 982     if ( w2m != (iconv_t)-1 )
 983         iconv_close(w2m);
 984 }
 985
 986 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 987 {
 988     size_t inbuf = strlen(psz);
 989     size_t outbuf = n * SIZEOF_WCHAR_T;
 990     size_t res, cres;
 991     // VS: Use these instead of psz, buf because iconv() modifies its arguments:
 992     wchar_t *bufPtr = buf;
 993     const char *pszPtr = psz;
 994
 995     if (buf)
 996     {
 997         // have destination buffer, convert there
 998         cres = iconv(m2w,
 999                      ICONV_CHAR_CAST(&pszPtr), &inbuf,
1000                      (char**)&bufPtr, &outbuf);
1001         res = n - (outbuf / SIZEOF_WCHAR_T);
1002
1003         if (ms_wcNeedsSwap)
1004         {
1005             // convert to native endianness
1006             WC_BSWAP(buf /* _not_ bufPtr */, res)
1007         }
1008
1009         // NB: iconv was given only strlen(psz) characters on input, and so
1010         //     it couldn't convert the trailing zero. Let's do it ourselves
1011         //     if there's some room left for it in the output buffer.
1012         if (res < n)
1013             buf[res] = 0;
1014     }
1015     else
1016     {
1017         // no destination buffer... convert using temp buffer
1018         // to calculate destination buffer requirement
1019         wchar_t tbuf[8];
1020         res = 0;
1021         do {
1022             bufPtr = tbuf;
1023             outbuf = 8*SIZEOF_WCHAR_T;
1024
1025             cres = iconv(m2w,
1026                          ICONV_CHAR_CAST(&pszPtr), &inbuf,
1027                          (char**)&bufPtr, &outbuf );
1028
1029             res += 8-(outbuf/SIZEOF_WCHAR_T);
1030         } while ((cres==(size_t)-1) && (errno==E2BIG));
1031     }
1032
1033     if (ICONV_FAILED(cres, inbuf))
1034     {
1035         //VS: it is ok if iconv fails, hence trace only
1036         wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1037         return (size_t)-1;
1038     }
1039
1040     return res;
1041 }
1042
1043 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1044 {
1045     size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
1046     size_t outbuf = n;
1047     size_t res, cres;
1048
1049     wchar_t *tmpbuf = 0;
1050
1051     if (ms_wcNeedsSwap)
1052     {
1053         // need to copy to temp buffer to switch endianness
1054         // this absolutely doesn't rock!
1055         // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1056         //  could be in read-only memory, or be accessed in some other thread)
1057         tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
1058         memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
1059         WC_BSWAP(tmpbuf, inbuf)
1060         psz=tmpbuf;
1061     }
1062
1063     if (buf)
1064     {
1065         // have destination buffer, convert there
1066         cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1067
1068         res = n-outbuf;
1069
1070         // NB: iconv was given only wcslen(psz) characters on input, and so
1071         //     it couldn't convert the trailing zero. Let's do it ourselves
1072         //     if there's some room left for it in the output buffer.
1073         if (res < n)
1074             buf[0] = 0;
1075     }
1076     else
1077     {
1078         // no destination buffer... convert using temp buffer
1079         // to calculate destination buffer requirement
1080         char tbuf[16];
1081         res = 0;
1082         do {
1083             buf = tbuf; outbuf = 16;
1084
1085             cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1086
1087             res += 16 - outbuf;
1088         } while ((cres==(size_t)-1) && (errno==E2BIG));
1089     }
1090
1091     if (ms_wcNeedsSwap)
1092     {
1093         free(tmpbuf);
1094     }
1095
1096     if (ICONV_FAILED(cres, inbuf))
1097     {
1098         //VS: it is ok if iconv fails, hence trace only
1099         wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1100         return (size_t)-1;
1101     }
1102
1103     return res;
1104 }
1105
1106 #endif // HAVE_ICONV
1107
1108
1109 // ============================================================================
1110 // Win32 conversion classes
1111 // ============================================================================
1112
1113 #ifdef wxHAVE_WIN32_MB2WC
1114
1115 // from utils.cpp
1116 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1117 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1118
1119 class wxMBConv_win32 : public wxMBConv
1120 {
1121 public:
1122     wxMBConv_win32()
1123     {
1124         m_CodePage = CP_ACP;
1125     }
1126
1127     wxMBConv_win32(const wxChar* name)
1128     {
1129         m_CodePage = wxCharsetToCodepage(name);
1130     }
1131
1132     wxMBConv_win32(wxFontEncoding encoding)
1133     {
1134         m_CodePage = wxEncodingToCodepage(encoding);
1135     }
1136
1137     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1138     {
1139         const size_t len = ::MultiByteToWideChar
1140                              (
1141                                 m_CodePage,     // code page
1142                                 0,              // flags (none)
1143                                 psz,            // input string
1144                                 -1,             // its length (NUL-terminated)
1145                                 buf,            // output string
1146                                 buf ? n : 0     // size of output buffer
1147                              );
1148
1149         // note that it returns count of written chars for buf != NULL and size
1150         // of the needed buffer for buf == NULL so in either case the length of
1151         // the string (which never includes the terminating NUL) is one less
1152         return len ? len - 1 : (size_t)-1;
1153     }
1154
1155     size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
1156     {
1157         /*
1158             we have a problem here: by default, WideCharToMultiByte() may
1159             replace characters unrepresentable in the target code page with bad
1160             quality approximations such as turning "1/2" symbol (U+00BD) into
1161             "1" for the code pages which don't have it and we, obviously, want
1162             to avoid this at any price
1163
1164             the trouble is that this function does it _silently_, i.e. it won't
1165             even tell us whether it did or not... Win98/2000 and higher provide
1166             WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1167             we have to resort to a round trip, i.e. check that converting back
1168             results in the same string -- this is, of course, expensive but
1169             otherwise we simply can't be sure to not garble the data.
1170          */
1171
1172         // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1173         // it doesn't work with CJK encodings (which we test for rather roughly
1174         // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1175         // supporting it
1176         BOOL usedDef wxDUMMY_INITIALIZE(false),
1177              *pUsedDef;
1178         int flags;
1179         if ( CanUseNoBestFit() && m_CodePage < 50000 )
1180         {
1181             // it's our lucky day
1182             flags = WC_NO_BEST_FIT_CHARS;
1183             pUsedDef = &usedDef;
1184         }
1185         else // old system or unsupported encoding
1186         {
1187             flags = 0;
1188             pUsedDef = NULL;
1189         }
1190
1191         const size_t len = ::WideCharToMultiByte
1192                              (
1193                                 m_CodePage,     // code page
1194                                 flags,          // either none or no best fit
1195                                 pwz,            // input string
1196                                 -1,             // it is (wide) NUL-terminated
1197                                 buf,            // output buffer
1198                                 buf ? n : 0,    // and its size
1199                                 NULL,           // default "replacement" char
1200                                 pUsedDef        // [out] was it used?
1201                              );
1202
1203         if ( !len )
1204         {
1205             // function totally failed
1206             return (size_t)-1;
1207         }
1208
1209         // if we were really converting, check if we succeeded
1210         if ( buf )
1211         {
1212             if ( flags )
1213             {
1214                 // check if the conversion failed, i.e. if any replacements
1215                 // were done
1216                 if ( usedDef )
1217                     return (size_t)-1;
1218             }
1219             else // we must resort to double tripping...
1220             {
1221                 wxWCharBuffer wcBuf(n);
1222                 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1223                         wcscmp(wcBuf, pwz) != 0 )
1224                 {
1225                     // we didn't obtain the same thing we started from, hence
1226                     // the conversion was lossy and we consider that it failed
1227                     return (size_t)-1;
1228                 }
1229             }
1230         }
1231
1232         // see the comment above for the reason of "len - 1"
1233         return len - 1;
1234     }
1235
1236     bool IsOk() const { return m_CodePage != -1; }
1237
1238 private:
1239     static bool CanUseNoBestFit()
1240     {
1241         static int s_isWin98Or2k = -1;
1242
1243         if ( s_isWin98Or2k == -1 )
1244         {
1245             int verMaj, verMin;
1246             switch ( wxGetOsVersion(&verMaj, &verMin) )
1247             {
1248                 case wxWIN95:
1249                     s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1250                     break;
1251
1252                 case wxWINDOWS_NT:
1253                     s_isWin98Or2k = verMaj >= 5;
1254                     break;
1255
1256                 default:
1257                     // unknown, be conseravtive by default
1258                     s_isWin98Or2k = 0;
1259             }
1260
1261             wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1262         }
1263
1264         return s_isWin98Or2k == 1;
1265     }
1266
1267     long m_CodePage;
1268 };
1269
1270 #endif // wxHAVE_WIN32_MB2WC
1271
1272 // ============================================================================
1273 // Mac conversion classes
1274 // ============================================================================
1275
1276 #if defined(__WXMAC__) && defined(TARGET_CARBON)
1277
1278 class wxMBConv_mac : public wxMBConv
1279 {
1280 public:
1281     wxMBConv_mac()
1282     {
1283         Init(CFStringGetSystemEncoding()) ;
1284     }
1285
1286     wxMBConv_mac(const wxChar* name)
1287     {
1288         Init( wxMacGetSystemEncFromFontEnc(wxFontMapper::Get()->CharsetToEncoding(name, FALSE) ) ) ;
1289     }
1290
1291     wxMBConv_mac(wxFontEncoding encoding)
1292     {
1293         Init( wxMacGetSystemEncFromFontEnc(encoding) );
1294     }
1295
1296         ~wxMBConv_mac()
1297         {
1298             OSStatus status = noErr ;
1299             status = TECDisposeConverter(m_MB2WC_converter);
1300             status = TECDisposeConverter(m_WC2MB_converter);
1301         }
1302
1303
1304         void Init( TextEncodingBase encoding)
1305         {
1306             OSStatus status = noErr ;
1307                 m_char_encoding = encoding ;
1308                 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
1309
1310             status = TECCreateConverter(&m_MB2WC_converter,
1311                                         m_char_encoding,
1312                                         m_unicode_encoding);
1313             status = TECCreateConverter(&m_WC2MB_converter,
1314                                         m_unicode_encoding,
1315                                         m_char_encoding);
1316         }
1317
1318     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1319     {
1320             OSStatus status = noErr ;
1321             ByteCount byteOutLen ;
1322             ByteCount byteInLen = strlen(psz) ;
1323                 wchar_t *tbuf = NULL ;
1324                 UniChar* ubuf = NULL ;
1325                 size_t res = 0 ;
1326
1327                 if (buf == NULL)
1328                 {
1329                         n = byteInLen ;
1330                         tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
1331                 }
1332             ByteCount byteBufferLen = n * sizeof( UniChar ) ;
1333 #if SIZEOF_WCHAR_T == 4
1334                 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
1335 #else
1336                 ubuf = (UniChar*) (buf ? buf : tbuf) ;
1337 #endif
1338             status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
1339               (TextPtr) ubuf , byteBufferLen, &byteOutLen);
1340 #if SIZEOF_WCHAR_T == 4
1341         // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
1342         // is not properly terminated we get random characters at the end
1343         ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
1344                 wxMBConvUTF16BE converter ;
1345                 res = converter.MB2WC( (buf ? buf : tbuf) , (const char*)ubuf , n ) ;
1346                 free( ubuf ) ;
1347 #else
1348                 res = byteOutLen / sizeof( UniChar ) ;
1349 #endif
1350                 if ( buf == NULL )
1351                         free(tbuf) ;
1352
1353         if ( buf  && res < n)
1354             buf[res] = 0;
1355
1356                 return res ;
1357     }
1358
1359     size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
1360     {
1361             OSStatus status = noErr ;
1362             ByteCount byteOutLen ;
1363             ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
1364
1365                 char *tbuf = NULL ;
1366
1367                 if (buf == NULL)
1368                 {
1369                         // worst case
1370                         n = byteInLen * 2 ;
1371                         tbuf = (char*) malloc( n ) ;
1372                 }
1373
1374             ByteCount byteBufferLen = n ;
1375                 UniChar* ubuf = NULL ;
1376 #if SIZEOF_WCHAR_T == 4
1377                 wxMBConvUTF16BE converter ;
1378                 size_t unicharlen = converter.WC2MB( NULL , psz , 0 ) ;
1379                 byteInLen = unicharlen ;
1380                 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
1381                 converter.WC2MB( (char*) ubuf , psz, unicharlen + 2 ) ;
1382 #else
1383                 ubuf = (UniChar*) psz ;
1384 #endif
1385             status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) ubuf , byteInLen, &byteInLen,
1386                (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
1387 #if SIZEOF_WCHAR_T == 4
1388                 free( ubuf ) ;
1389 #endif
1390                 if ( buf == NULL )
1391                         free(tbuf) ;
1392
1393                 size_t res = byteOutLen ;
1394         if ( buf  && res < n)
1395             buf[res] = 0;
1396
1397                 return res ;
1398     }
1399
1400     bool IsOk() const
1401         { return m_MB2WC_converter !=  NULL && m_WC2MB_converter != NULL  ; }
1402
1403 private:
1404         TECObjectRef m_MB2WC_converter ;
1405         TECObjectRef m_WC2MB_converter ;
1406
1407         TextEncodingBase m_char_encoding ;
1408         TextEncodingBase m_unicode_encoding ;
1409 };
1410
1411 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
1412
1413 // ============================================================================
1414 // wxEncodingConverter based conversion classes
1415 // ============================================================================
1416
1417 #if wxUSE_FONTMAP
1418
1419 class wxMBConv_wxwin : public wxMBConv
1420 {
1421 private:
1422     void Init()
1423     {
1424         m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
1425                w2m.Init(wxFONTENCODING_UNICODE, m_enc);
1426     }
1427
1428 public:
1429     // temporarily just use wxEncodingConverter stuff,
1430     // so that it works while a better implementation is built
1431     wxMBConv_wxwin(const wxChar* name)
1432     {
1433         if (name)
1434             m_enc = wxFontMapper::Get()->CharsetToEncoding(name, false);
1435         else
1436             m_enc = wxFONTENCODING_SYSTEM;
1437
1438         Init();
1439     }
1440
1441     wxMBConv_wxwin(wxFontEncoding enc)
1442     {
1443         m_enc = enc;
1444
1445         Init();
1446     }
1447
1448     size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
1449     {
1450         size_t inbuf = strlen(psz);
1451         if (buf)
1452             m2w.Convert(psz,buf);
1453         return inbuf;
1454     }
1455
1456     size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
1457     {
1458         const size_t inbuf = wxWcslen(psz);
1459         if (buf)
1460             w2m.Convert(psz,buf);
1461
1462         return inbuf;
1463     }
1464
1465     bool IsOk() const { return m_ok; }
1466
1467 public:
1468     wxFontEncoding m_enc;
1469     wxEncodingConverter m2w, w2m;
1470
1471     // were we initialized successfully?
1472     bool m_ok;
1473
1474     DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
1475 };
1476
1477 #endif // wxUSE_FONTMAP
1478
1479 // ============================================================================
1480 // wxCSConv implementation
1481 // ============================================================================
1482
1483 void wxCSConv::Init()
1484 {
1485     m_name = NULL;
1486     m_convReal =  NULL;
1487     m_deferred = true;
1488 }
1489
1490 wxCSConv::wxCSConv(const wxChar *charset)
1491 {
1492     Init();
1493
1494     if ( charset )
1495     {
1496         SetName(charset);
1497     }
1498
1499     m_encoding = wxFONTENCODING_SYSTEM;
1500 }
1501
1502 wxCSConv::wxCSConv(wxFontEncoding encoding)
1503 {
1504     if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
1505     {
1506         wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
1507
1508         encoding = wxFONTENCODING_SYSTEM;
1509     }
1510
1511     Init();
1512
1513     m_encoding = encoding;
1514 }
1515
1516 wxCSConv::~wxCSConv()
1517 {
1518     Clear();
1519 }
1520
1521 wxCSConv::wxCSConv(const wxCSConv& conv)
1522         : wxMBConv()
1523 {
1524     Init();
1525
1526     SetName(conv.m_name);
1527     m_encoding = conv.m_encoding;
1528 }
1529
1530 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
1531 {
1532     Clear();
1533
1534     SetName(conv.m_name);
1535     m_encoding = conv.m_encoding;
1536
1537     return *this;
1538 }
1539
1540 void wxCSConv::Clear()
1541 {
1542     free(m_name);
1543     delete m_convReal;
1544
1545     m_name = NULL;
1546     m_convReal = NULL;
1547 }
1548
1549 void wxCSConv::SetName(const wxChar *charset)
1550 {
1551     if (charset)
1552     {
1553         m_name = wxStrdup(charset);
1554         m_deferred = true;
1555     }
1556 }
1557
1558 wxMBConv *wxCSConv::DoCreate() const
1559 {
1560     // check for the special case of ASCII or ISO8859-1 charset: as we have
1561     // special knowledge of it anyhow, we don't need to create a special
1562     // conversion object
1563     if ( m_encoding == wxFONTENCODING_ISO8859_1 )
1564     {
1565         // don't convert at all
1566         return NULL;
1567     }
1568
1569     // we trust OS to do conversion better than we can so try external
1570     // conversion methods first
1571     //
1572     // the full order is:
1573     //      1. OS conversion (iconv() under Unix or Win32 API)
1574     //      2. hard coded conversions for UTF
1575     //      3. wxEncodingConverter as fall back
1576
1577     // step (1)
1578 #ifdef HAVE_ICONV
1579 #if !wxUSE_FONTMAP
1580     if ( m_name )
1581 #endif // !wxUSE_FONTMAP
1582     {
1583         wxString name(m_name);
1584
1585 #if wxUSE_FONTMAP
1586         if ( name.empty() )
1587             name = wxFontMapper::Get()->GetEncodingName(m_encoding);
1588 #endif // wxUSE_FONTMAP
1589
1590         wxMBConv_iconv *conv = new wxMBConv_iconv(name);
1591         if ( conv->IsOk() )
1592             return conv;
1593
1594         delete conv;
1595     }
1596 #endif // HAVE_ICONV
1597
1598 #ifdef wxHAVE_WIN32_MB2WC
1599     {
1600         wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
1601                                       : new wxMBConv_win32(m_encoding);
1602         if ( conv->IsOk() )
1603             return conv;
1604
1605         delete conv;
1606     }
1607 #endif // wxHAVE_WIN32_MB2WC
1608 #if defined(__WXMAC__)
1609     {
1610         if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ) )
1611         {
1612
1613                 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
1614                                             : new wxMBConv_mac(m_encoding);
1615                 if ( conv->IsOk() )
1616                     return conv;
1617
1618                 delete conv;
1619         }
1620     }
1621 #endif
1622     // step (2)
1623     wxFontEncoding enc = m_encoding;
1624 #if wxUSE_FONTMAP
1625     if ( enc == wxFONTENCODING_SYSTEM && m_name )
1626     {
1627         // use "false" to suppress interactive dialogs -- we can be called from
1628         // anywhere and popping up a dialog from here is the last thing we want to
1629         // do
1630         enc = wxFontMapper::Get()->CharsetToEncoding(m_name, false);
1631     }
1632 #endif // wxUSE_FONTMAP
1633
1634     switch ( enc )
1635     {
1636         case wxFONTENCODING_UTF7:
1637              return new wxMBConvUTF7;
1638
1639         case wxFONTENCODING_UTF8:
1640              return new wxMBConvUTF8;
1641
1642         case wxFONTENCODING_UTF16BE:
1643              return new wxMBConvUTF16BE;
1644
1645         case wxFONTENCODING_UTF16LE:
1646              return new wxMBConvUTF16LE;
1647
1648         case wxFONTENCODING_UTF32BE:
1649              return new wxMBConvUTF32BE;
1650
1651         case wxFONTENCODING_UTF32LE:
1652              return new wxMBConvUTF32LE;
1653
1654         default:
1655              // nothing to do but put here to suppress gcc warnings
1656              ;
1657     }
1658
1659     // step (3)
1660 #if wxUSE_FONTMAP
1661     {
1662         wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
1663                                       : new wxMBConv_wxwin(m_encoding);
1664         if ( conv->IsOk() )
1665             return conv;
1666
1667         delete conv;
1668     }
1669 #endif // wxUSE_FONTMAP
1670
1671     // NB: This is a hack to prevent deadlock. What could otherwise happen
1672     //     in Unicode build: wxConvLocal creation ends up being here
1673     //     because of some failure and logs the error. But wxLog will try to
1674     //     attach timestamp, for which it will need wxConvLocal (to convert
1675     //     time to char* and then wchar_t*), but that fails, tries to log
1676     //     error, but wxLog has a (already locked) critical section that
1677     //     guards static buffer.
1678     static bool alreadyLoggingError = false;
1679     if (!alreadyLoggingError)
1680     {
1681         alreadyLoggingError = true;
1682         wxLogError(_("Cannot convert from the charset '%s'!"),
1683                    m_name ? m_name
1684                       :
1685 #if wxUSE_FONTMAP
1686                          wxFontMapper::GetEncodingDescription(m_encoding).c_str()
1687 #else // !wxUSE_FONTMAP
1688                          wxString::Format(_("encoding %s"), m_encoding).c_str()
1689 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1690               );
1691         alreadyLoggingError = false;
1692     }
1693
1694     return NULL;
1695 }
1696
1697 void wxCSConv::CreateConvIfNeeded() const
1698 {
1699     if ( m_deferred )
1700     {
1701         wxCSConv *self = (wxCSConv *)this; // const_cast
1702
1703 #if wxUSE_INTL
1704         // if we don't have neither the name nor the encoding, use the default
1705         // encoding for this system
1706         if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
1707         {
1708             self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
1709         }
1710 #endif // wxUSE_INTL
1711
1712         self->m_convReal = DoCreate();
1713         self->m_deferred = false;
1714     }
1715 }
1716
1717 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1718 {
1719     CreateConvIfNeeded();
1720
1721     if (m_convReal)
1722         return m_convReal->MB2WC(buf, psz, n);
1723
1724     // latin-1 (direct)
1725     size_t len = strlen(psz);
1726
1727     if (buf)
1728     {
1729         for (size_t c = 0; c <= len; c++)
1730             buf[c] = (unsigned char)(psz[c]);
1731     }
1732
1733     return len;
1734 }
1735
1736 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1737 {
1738     CreateConvIfNeeded();
1739
1740     if (m_convReal)
1741         return m_convReal->WC2MB(buf, psz, n);
1742
1743     // latin-1 (direct)
1744     const size_t len = wxWcslen(psz);
1745     if (buf)
1746     {
1747         for (size_t c = 0; c <= len; c++)
1748         {
1749             if (psz[c] > 0xFF)
1750                 return (size_t)-1;
1751             buf[c] = psz[c];
1752         }
1753     }
1754     else
1755     {
1756         for (size_t c = 0; c <= len; c++)
1757         {
1758             if (psz[c] > 0xFF)
1759                 return (size_t)-1;
1760         }
1761     }
1762
1763     return len;
1764 }
1765
1766 // ----------------------------------------------------------------------------
1767 // globals
1768 // ----------------------------------------------------------------------------
1769
1770 #ifdef __WINDOWS__
1771     static wxMBConv_win32 wxConvLibcObj;
1772 #else
1773     static wxMBConvLibc wxConvLibcObj;
1774 #endif
1775
1776 static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
1777 static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
1778 static wxMBConvUTF7 wxConvUTF7Obj;
1779 static wxMBConvUTF8 wxConvUTF8Obj;
1780
1781
1782 WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
1783 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
1784 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
1785 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
1786 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
1787 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
1788
1789 #else // !wxUSE_WCHAR_T
1790
1791 // stand-ins in absence of wchar_t
1792 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
1793                                 wxConvISO8859_1,
1794                                 wxConvLocal,
1795                                 wxConvUTF8;
1796
1797 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
1798
1799