src/common/strconv.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        strconv.cpp
   3 // Purpose:     Unicode conversion classes
   4 // Author:      Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik
   5 // Modified by:
   6 // Created:     29/01/98
   7 // RCS-ID:      $Id$
   8 // Copyright:   (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
   9 //              (c) 2000-2003 Vadim Zeitlin
  10 // Licence:     wxWindows licence
  11 /////////////////////////////////////////////////////////////////////////////
  12
  13 // ============================================================================
  14 // declarations
  15 // ============================================================================
  16
  17 // ----------------------------------------------------------------------------
  18 // headers
  19 // ----------------------------------------------------------------------------
  20
  21 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
  22   #pragma implementation "strconv.h"
  23 #endif
  24
  25 // For compilers that support precompilation, includes "wx.h".
  26 #include "wx/wxprec.h"
  27
  28 #ifdef __BORLANDC__
  29   #pragma hdrstop
  30 #endif
  31
  32 #ifndef WX_PRECOMP
  33     #include "wx/intl.h"
  34     #include "wx/log.h"
  35 #endif // WX_PRECOMP
  36
  37 #include "wx/strconv.h"
  38
  39 #if wxUSE_WCHAR_T
  40
  41 #ifdef __WXMSW__
  42     #include "wx/msw/private.h"
  43     #include "wx/msw/missing.h"
  44 #endif
  45
  46 #ifndef __WXWINCE__
  47 #include <errno.h>
  48 #endif
  49
  50 #include <ctype.h>
  51 #include <string.h>
  52 #include <stdlib.h>
  53
  54 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
  55     #define wxHAVE_WIN32_MB2WC
  56 #endif // __WIN32__ but !__WXMICROWIN__
  57
  58 // ----------------------------------------------------------------------------
  59 // headers
  60 // ----------------------------------------------------------------------------
  61
  62 #ifdef __SALFORDC__
  63     #include <clib.h>
  64 #endif
  65
  66 #ifdef HAVE_ICONV
  67     #include <iconv.h>
  68 #endif
  69
  70 #include "wx/encconv.h"
  71 #include "wx/fontmap.h"
  72
  73 #ifdef __WXMAC__
  74 #include "ATSUnicode.h"
  75 #include "TextCommon.h"
  76 #include "TextEncodingConverter.h"
  77
  78 #include  "wx/mac/private.h"  // includes mac headers
  79 #endif
  80 // ----------------------------------------------------------------------------
  81 // macros
  82 // ----------------------------------------------------------------------------
  83
  84 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); }
  85 #define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); }
  86
  87 #if SIZEOF_WCHAR_T == 4
  88     #define WC_NAME         "UCS4"
  89     #define WC_BSWAP         BSWAP_UCS4
  90     #ifdef WORDS_BIGENDIAN
  91       #define WC_NAME_BEST  "UCS-4BE"
  92     #else
  93       #define WC_NAME_BEST  "UCS-4LE"
  94     #endif
  95 #elif SIZEOF_WCHAR_T == 2
  96     #define WC_NAME         "UTF16"
  97     #define WC_BSWAP         BSWAP_UTF16
  98     #define WC_UTF16
  99     #ifdef WORDS_BIGENDIAN
 100       #define WC_NAME_BEST  "UTF-16BE"
 101     #else
 102       #define WC_NAME_BEST  "UTF-16LE"
 103     #endif
 104 #else // sizeof(wchar_t) != 2 nor 4
 105     // does this ever happen?
 106     #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
 107 #endif
 108
 109 // ============================================================================
 110 // implementation
 111 // ============================================================================
 112
 113 // ----------------------------------------------------------------------------
 114 // UTF-16 en/decoding to/from UCS-4
 115 // ----------------------------------------------------------------------------
 116
 117
 118 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
 119 {
 120     if (input<=0xffff)
 121     {
 122         if (output)
 123             *output = (wxUint16) input;
 124         return 1;
 125     }
 126     else if (input>=0x110000)
 127     {
 128         return (size_t)-1;
 129     }
 130     else
 131     {
 132         if (output)
 133         {
 134             *output++ = (wxUint16) ((input >> 10)+0xd7c0);
 135             *output = (wxUint16) ((input&0x3ff)+0xdc00);
 136         }
 137         return 2;
 138     }
 139 }
 140
 141 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
 142 {
 143     if ((*input<0xd800) || (*input>0xdfff))
 144     {
 145         output = *input;
 146         return 1;
 147     }
 148     else if ((input[1]<0xdc00) || (input[1]>=0xdfff))
 149     {
 150         output = *input;
 151         return (size_t)-1;
 152     }
 153     else
 154     {
 155         output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
 156         return 2;
 157     }
 158 }
 159
 160
 161 // ----------------------------------------------------------------------------
 162 // wxMBConv
 163 // ----------------------------------------------------------------------------
 164
 165 wxMBConv::~wxMBConv()
 166 {
 167     // nothing to do here
 168 }
 169
 170 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
 171 {
 172     if ( psz )
 173     {
 174         // calculate the length of the buffer needed first
 175         size_t nLen = MB2WC(NULL, psz, 0);
 176         if ( nLen != (size_t)-1 )
 177         {
 178             // now do the actual conversion
 179             wxWCharBuffer buf(nLen);
 180             MB2WC(buf.data(), psz, nLen + 1); // with the trailing NUL
 181
 182             return buf;
 183         }
 184     }
 185
 186     wxWCharBuffer buf((wchar_t *)NULL);
 187
 188     return buf;
 189 }
 190
 191 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
 192 {
 193     if ( pwz )
 194     {
 195         size_t nLen = WC2MB(NULL, pwz, 0);
 196         if ( nLen != (size_t)-1 )
 197         {
 198             wxCharBuffer buf(nLen+3);       // space for a wxUint32 trailing zero
 199             WC2MB(buf.data(), pwz, nLen + 4);
 200
 201             return buf;
 202         }
 203     }
 204
 205     wxCharBuffer buf((char *)NULL);
 206
 207     return buf;
 208 }
 209
 210 // ----------------------------------------------------------------------------
 211 // wxMBConvLibc
 212 // ----------------------------------------------------------------------------
 213
 214 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 215 {
 216     return wxMB2WC(buf, psz, n);
 217 }
 218
 219 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 220 {
 221     return wxWC2MB(buf, psz, n);
 222 }
 223
 224 // ----------------------------------------------------------------------------
 225 // UTF-7
 226 // ----------------------------------------------------------------------------
 227
 228 #if 0
 229 static char utf7_setD[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
 230                         "abcdefghijklmnopqrstuvwxyz"
 231                         "0123456789'(),-./:?";
 232 static char utf7_setO[]="!\"#$%&*;<=>@[]^_`{|}";
 233 static char utf7_setB[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
 234                         "abcdefghijklmnopqrstuvwxyz"
 235                         "0123456789+/";
 236 #endif
 237
 238 // TODO: write actual implementations of UTF-7 here
 239 size_t wxMBConvUTF7::MB2WC(wchar_t * WXUNUSED(buf),
 240                            const char * WXUNUSED(psz),
 241                            size_t WXUNUSED(n)) const
 242 {
 243   return 0;
 244 }
 245
 246 size_t wxMBConvUTF7::WC2MB(char * WXUNUSED(buf),
 247                            const wchar_t * WXUNUSED(psz),
 248                            size_t WXUNUSED(n)) const
 249 {
 250   return 0;
 251 }
 252
 253 // ----------------------------------------------------------------------------
 254 // UTF-8
 255 // ----------------------------------------------------------------------------
 256
 257 static wxUint32 utf8_max[]=
 258     { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
 259
 260 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 261 {
 262     size_t len = 0;
 263
 264     while (*psz && ((!buf) || (len < n)))
 265     {
 266         unsigned char cc = *psz++, fc = cc;
 267         unsigned cnt;
 268         for (cnt = 0; fc & 0x80; cnt++)
 269             fc <<= 1;
 270         if (!cnt)
 271         {
 272             // plain ASCII char
 273             if (buf)
 274                 *buf++ = cc;
 275             len++;
 276         }
 277         else
 278         {
 279             cnt--;
 280             if (!cnt)
 281             {
 282                 // invalid UTF-8 sequence
 283                 return (size_t)-1;
 284             }
 285             else
 286             {
 287                 unsigned ocnt = cnt - 1;
 288                 wxUint32 res = cc & (0x3f >> cnt);
 289                 while (cnt--)
 290                 {
 291                     cc = *psz++;
 292                     if ((cc & 0xC0) != 0x80)
 293                     {
 294                         // invalid UTF-8 sequence
 295                         return (size_t)-1;
 296                     }
 297                     res = (res << 6) | (cc & 0x3f);
 298                 }
 299                 if (res <= utf8_max[ocnt])
 300                 {
 301                     // illegal UTF-8 encoding
 302                     return (size_t)-1;
 303                 }
 304 #ifdef WC_UTF16
 305                 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
 306                 size_t pa = encode_utf16(res, (wxUint16 *)buf);
 307                 if (pa == (size_t)-1)
 308                   return (size_t)-1;
 309                 if (buf)
 310                     buf += pa;
 311                 len += pa;
 312 #else // !WC_UTF16
 313                 if (buf)
 314                     *buf++ = res;
 315                 len++;
 316 #endif // WC_UTF16/!WC_UTF16
 317             }
 318         }
 319     }
 320     if (buf && (len < n))
 321         *buf = 0;
 322     return len;
 323 }
 324
 325 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 326 {
 327     size_t len = 0;
 328
 329     while (*psz && ((!buf) || (len < n)))
 330     {
 331         wxUint32 cc;
 332 #ifdef WC_UTF16
 333         // cast is ok for WC_UTF16
 334         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
 335         psz += (pa == (size_t)-1) ? 1 : pa;
 336 #else
 337         cc=(*psz++) & 0x7fffffff;
 338 #endif
 339         unsigned cnt;
 340         for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
 341         if (!cnt)
 342         {
 343             // plain ASCII char
 344             if (buf)
 345                 *buf++ = (char) cc;
 346             len++;
 347         }
 348
 349         else
 350         {
 351             len += cnt + 1;
 352             if (buf)
 353             {
 354                 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
 355                 while (cnt--)
 356                     *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
 357             }
 358         }
 359     }
 360
 361     if (buf && (len<n)) *buf = 0;
 362
 363     return len;
 364 }
 365
 366
 367
 368
 369 // ----------------------------------------------------------------------------
 370 // UTF-16
 371 // ----------------------------------------------------------------------------
 372
 373 #ifdef WORDS_BIGENDIAN
 374     #define wxMBConvUTF16straight wxMBConvUTF16BE
 375     #define wxMBConvUTF16swap     wxMBConvUTF16LE
 376 #else
 377     #define wxMBConvUTF16swap     wxMBConvUTF16BE
 378     #define wxMBConvUTF16straight wxMBConvUTF16LE
 379 #endif
 380
 381
 382 #ifdef WC_UTF16
 383
 384 // copy 16bit MB to 16bit String
 385 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 386 {
 387     size_t len=0;
 388
 389     while (*(wxUint16*)psz && (!buf || len < n))
 390     {
 391         if (buf)
 392             *buf++ = *(wxUint16*)psz;
 393         len++;
 394
 395         psz += sizeof(wxUint16);
 396     }
 397     if (buf && len<n)   *buf=0;
 398
 399     return len;
 400 }
 401
 402
 403 // copy 16bit String to 16bit MB
 404 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 405 {
 406     size_t len=0;
 407
 408     while (*psz && (!buf || len < n))
 409     {
 410         if (buf)
 411         {
 412             *(wxUint16*)buf = *psz;
 413             buf += sizeof(wxUint16);
 414         }
 415         len += sizeof(wxUint16);
 416         psz++;
 417     }
 418     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 419
 420     return len;
 421 }
 422
 423
 424 // swap 16bit MB to 16bit String
 425 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 426 {
 427     size_t len=0;
 428
 429     while (*(wxUint16*)psz && (!buf || len < n))
 430     {
 431         if (buf)
 432         {
 433             ((char *)buf)[0] = psz[1];
 434             ((char *)buf)[1] = psz[0];
 435             buf++;
 436         }
 437         len++;
 438         psz += sizeof(wxUint16);
 439     }
 440     if (buf && len<n)   *buf=0;
 441
 442     return len;
 443 }
 444
 445
 446 // swap 16bit MB to 16bit String
 447 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 448 {
 449     size_t len=0;
 450
 451     while (*psz && (!buf || len < n))
 452     {
 453         if (buf)
 454         {
 455             *buf++ = ((char*)psz)[1];
 456             *buf++ = ((char*)psz)[0];
 457         }
 458         len += sizeof(wxUint16);
 459         psz++;
 460     }
 461     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 462
 463     return len;
 464 }
 465
 466
 467 #else // WC_UTF16
 468
 469
 470 // copy 16bit MB to 32bit String
 471 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 472 {
 473     size_t len=0;
 474
 475     while (*(wxUint16*)psz && (!buf || len < n))
 476     {
 477         wxUint32 cc;
 478         size_t pa=decode_utf16((wxUint16*)psz, cc);
 479         if (pa == (size_t)-1)
 480             return pa;
 481
 482         if (buf)
 483             *buf++ = cc;
 484         len++;
 485         psz += pa * sizeof(wxUint16);
 486     }
 487     if (buf && len<n)   *buf=0;
 488
 489     return len;
 490 }
 491
 492
 493 // copy 32bit String to 16bit MB
 494 size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 495 {
 496     size_t len=0;
 497
 498     while (*psz && (!buf || len < n))
 499     {
 500         wxUint16 cc[2];
 501         size_t pa=encode_utf16(*psz, cc);
 502
 503         if (pa == (size_t)-1)
 504             return pa;
 505
 506         if (buf)
 507         {
 508             *(wxUint16*)buf = cc[0];
 509             buf += sizeof(wxUint16);
 510             if (pa > 1)
 511             {
 512                 *(wxUint16*)buf = cc[1];
 513                 buf += sizeof(wxUint16);
 514             }
 515         }
 516
 517         len += pa*sizeof(wxUint16);
 518         psz++;
 519     }
 520     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 521
 522     return len;
 523 }
 524
 525
 526 // swap 16bit MB to 32bit String
 527 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 528 {
 529     size_t len=0;
 530
 531     while (*(wxUint16*)psz && (!buf || len < n))
 532     {
 533         wxUint32 cc;
 534         char tmp[4];
 535         tmp[0]=psz[1];  tmp[1]=psz[0];
 536         tmp[2]=psz[3];  tmp[3]=psz[2];
 537
 538         size_t pa=decode_utf16((wxUint16*)tmp, cc);
 539         if (pa == (size_t)-1)
 540             return pa;
 541
 542         if (buf)
 543             *buf++ = cc;
 544
 545         len++;
 546         psz += pa * sizeof(wxUint16);
 547     }
 548     if (buf && len<n)   *buf=0;
 549
 550     return len;
 551 }
 552
 553
 554 // swap 32bit String to 16bit MB
 555 size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 556 {
 557     size_t len=0;
 558
 559     while (*psz && (!buf || len < n))
 560     {
 561         wxUint16 cc[2];
 562         size_t pa=encode_utf16(*psz, cc);
 563
 564         if (pa == (size_t)-1)
 565             return pa;
 566
 567         if (buf)
 568         {
 569             *buf++ = ((char*)cc)[1];
 570             *buf++ = ((char*)cc)[0];
 571             if (pa > 1)
 572             {
 573                 *buf++ = ((char*)cc)[3];
 574                 *buf++ = ((char*)cc)[2];
 575             }
 576         }
 577
 578         len += pa*sizeof(wxUint16);
 579         psz++;
 580     }
 581     if (buf && len<=n-sizeof(wxUint16))   *(wxUint16*)buf=0;
 582
 583     return len;
 584 }
 585
 586 #endif // WC_UTF16
 587
 588
 589 // ----------------------------------------------------------------------------
 590 // UTF-32
 591 // ----------------------------------------------------------------------------
 592
 593 #ifdef WORDS_BIGENDIAN
 594 #define wxMBConvUTF32straight  wxMBConvUTF32BE
 595 #define wxMBConvUTF32swap      wxMBConvUTF32LE
 596 #else
 597 #define wxMBConvUTF32swap      wxMBConvUTF32BE
 598 #define wxMBConvUTF32straight  wxMBConvUTF32LE
 599 #endif
 600
 601
 602 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
 603 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
 604
 605
 606 #ifdef WC_UTF16
 607
 608 // copy 32bit MB to 16bit String
 609 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 610 {
 611     size_t len=0;
 612
 613     while (*(wxUint32*)psz && (!buf || len < n))
 614     {
 615         wxUint16 cc[2];
 616
 617         size_t pa=encode_utf16(*(wxUint32*)psz, cc);
 618         if (pa == (size_t)-1)
 619             return pa;
 620
 621         if (buf)
 622         {
 623             *buf++ = cc[0];
 624             if (pa > 1)
 625                 *buf++ = cc[1];
 626         }
 627         len += pa;
 628         psz += sizeof(wxUint32);
 629     }
 630     if (buf && len<n)   *buf=0;
 631
 632     return len;
 633 }
 634
 635
 636 // copy 16bit String to 32bit MB
 637 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 638 {
 639     size_t len=0;
 640
 641     while (*psz && (!buf || len < n))
 642     {
 643         wxUint32 cc;
 644
 645         // cast is ok for WC_UTF16
 646         size_t pa = decode_utf16((const wxUint16 *)psz, cc);
 647         if (pa == (size_t)-1)
 648             return pa;
 649
 650         if (buf)
 651         {
 652             *(wxUint32*)buf = cc;
 653             buf += sizeof(wxUint32);
 654         }
 655         len += sizeof(wxUint32);
 656         psz += pa;
 657     }
 658
 659     if (buf && len<=n-sizeof(wxUint32))
 660         *(wxUint32*)buf=0;
 661
 662     return len;
 663 }
 664
 665
 666
 667 // swap 32bit MB to 16bit String
 668 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 669 {
 670     size_t len=0;
 671
 672     while (*(wxUint32*)psz && (!buf || len < n))
 673     {
 674         char tmp[4];
 675         tmp[0] = psz[3];   tmp[1] = psz[2];
 676         tmp[2] = psz[1];   tmp[3] = psz[0];
 677
 678
 679         wxUint16 cc[2];
 680
 681         size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
 682         if (pa == (size_t)-1)
 683             return pa;
 684
 685         if (buf)
 686         {
 687             *buf++ = cc[0];
 688             if (pa > 1)
 689                 *buf++ = cc[1];
 690         }
 691         len += pa;
 692         psz += sizeof(wxUint32);
 693     }
 694
 695     if (buf && len<n)
 696         *buf=0;
 697
 698     return len;
 699 }
 700
 701
 702 // swap 16bit String to 32bit MB
 703 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 704 {
 705     size_t len=0;
 706
 707     while (*psz && (!buf || len < n))
 708     {
 709         char cc[4];
 710
 711         // cast is ok for WC_UTF16
 712         size_t pa=decode_utf16((const wxUint16 *)psz, *(wxUint32*)cc);
 713         if (pa == (size_t)-1)
 714             return pa;
 715
 716         if (buf)
 717         {
 718             *buf++ = cc[3];
 719             *buf++ = cc[2];
 720             *buf++ = cc[1];
 721             *buf++ = cc[0];
 722         }
 723         len += sizeof(wxUint32);
 724         psz += pa;
 725     }
 726
 727     if (buf && len<=n-sizeof(wxUint32))
 728         *(wxUint32*)buf=0;
 729
 730     return len;
 731 }
 732
 733 #else // WC_UTF16
 734
 735
 736 // copy 32bit MB to 32bit String
 737 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 738 {
 739     size_t len=0;
 740
 741     while (*(wxUint32*)psz && (!buf || len < n))
 742     {
 743         if (buf)
 744             *buf++ = *(wxUint32*)psz;
 745         len++;
 746         psz += sizeof(wxUint32);
 747     }
 748
 749     if (buf && len<n)
 750         *buf=0;
 751
 752     return len;
 753 }
 754
 755
 756 // copy 32bit String to 32bit MB
 757 size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 758 {
 759     size_t len=0;
 760
 761     while (*psz && (!buf || len < n))
 762     {
 763         if (buf)
 764         {
 765             *(wxUint32*)buf = *psz;
 766             buf += sizeof(wxUint32);
 767         }
 768
 769         len += sizeof(wxUint32);
 770         psz++;
 771     }
 772
 773     if (buf && len<=n-sizeof(wxUint32))
 774         *(wxUint32*)buf=0;
 775
 776     return len;
 777 }
 778
 779
 780 // swap 32bit MB to 32bit String
 781 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 782 {
 783     size_t len=0;
 784
 785     while (*(wxUint32*)psz && (!buf || len < n))
 786     {
 787         if (buf)
 788         {
 789             ((char *)buf)[0] = psz[3];
 790             ((char *)buf)[1] = psz[2];
 791             ((char *)buf)[2] = psz[1];
 792             ((char *)buf)[3] = psz[0];
 793             buf++;
 794         }
 795         len++;
 796         psz += sizeof(wxUint32);
 797     }
 798
 799     if (buf && len<n)
 800         *buf=0;
 801
 802     return len;
 803 }
 804
 805
 806 // swap 32bit String to 32bit MB
 807 size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
 808 {
 809     size_t len=0;
 810
 811     while (*psz && (!buf || len < n))
 812     {
 813         if (buf)
 814         {
 815             *buf++ = ((char *)psz)[3];
 816             *buf++ = ((char *)psz)[2];
 817             *buf++ = ((char *)psz)[1];
 818             *buf++ = ((char *)psz)[0];
 819         }
 820         len += sizeof(wxUint32);
 821         psz++;
 822     }
 823
 824     if (buf && len<=n-sizeof(wxUint32))
 825         *(wxUint32*)buf=0;
 826
 827     return len;
 828 }
 829
 830
 831 #endif // WC_UTF16
 832
 833
 834 // ============================================================================
 835 // The classes doing conversion using the iconv_xxx() functions
 836 // ============================================================================
 837
 838 #ifdef HAVE_ICONV
 839
 840 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with E2BIG
 841 //     if output buffer is _exactly_ as big as needed. Such case is (unless there's
 842 //     yet another bug in glibc) the only case when iconv() returns with (size_t)-1
 843 //     (which means error) and says there are 0 bytes left in the input buffer --
 844 //     when _real_ error occurs, bytes-left-in-input buffer is non-zero. Hence,
 845 //     this alternative test for iconv() failure.
 846 //     [This bug does not appear in glibc 2.2.]
 847 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
 848 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
 849                                      (errno != E2BIG || bufLeft != 0))
 850 #else
 851 #define ICONV_FAILED(cres, bufLeft)  (cres == (size_t)-1)
 852 #endif
 853
 854 #define ICONV_CHAR_CAST(x)  ((ICONV_CONST char **)(x))
 855
 856 // ----------------------------------------------------------------------------
 857 // wxMBConv_iconv: encapsulates an iconv character set
 858 // ----------------------------------------------------------------------------
 859
 860 class wxMBConv_iconv : public wxMBConv
 861 {
 862 public:
 863     wxMBConv_iconv(const wxChar *name);
 864     virtual ~wxMBConv_iconv();
 865
 866     virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
 867     virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
 868
 869     bool IsOk() const
 870         { return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
 871
 872 protected:
 873     // the iconv handlers used to translate from multibyte to wide char and in
 874     // the other direction
 875     iconv_t m2w,
 876             w2m;
 877
 878 private:
 879     // the name (for iconv_open()) of a wide char charset -- if none is
 880     // available on this machine, it will remain NULL
 881     static const char *ms_wcCharsetName;
 882
 883     // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
 884     // different endian-ness than the native one
 885     static bool ms_wcNeedsSwap;
 886 };
 887
 888 const char *wxMBConv_iconv::ms_wcCharsetName = NULL;
 889 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
 890
 891 wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
 892 {
 893     // Do it the hard way
 894     char cname[100];
 895     for (size_t i = 0; i < wxStrlen(name)+1; i++)
 896         cname[i] = (char) name[i];
 897
 898     // check for charset that represents wchar_t:
 899     if (ms_wcCharsetName == NULL)
 900     {
 901         ms_wcNeedsSwap = false;
 902
 903         // try charset with explicit bytesex info (e.g. "UCS-4LE"):
 904         ms_wcCharsetName = WC_NAME_BEST;
 905         m2w = iconv_open(ms_wcCharsetName, cname);
 906
 907         if (m2w == (iconv_t)-1)
 908         {
 909             // try charset w/o bytesex info (e.g. "UCS4")
 910             // and check for bytesex ourselves:
 911             ms_wcCharsetName = WC_NAME;
 912             m2w = iconv_open(ms_wcCharsetName, cname);
 913
 914             // last bet, try if it knows WCHAR_T pseudo-charset
 915             if (m2w == (iconv_t)-1)
 916             {
 917                 ms_wcCharsetName = "WCHAR_T";
 918                 m2w = iconv_open(ms_wcCharsetName, cname);
 919             }
 920
 921             if (m2w != (iconv_t)-1)
 922             {
 923                 char    buf[2], *bufPtr;
 924                 wchar_t wbuf[2], *wbufPtr;
 925                 size_t  insz, outsz;
 926                 size_t  res;
 927
 928                 buf[0] = 'A';
 929                 buf[1] = 0;
 930                 wbuf[0] = 0;
 931                 insz = 2;
 932                 outsz = SIZEOF_WCHAR_T * 2;
 933                 wbufPtr = wbuf;
 934                 bufPtr = buf;
 935
 936                 res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
 937                             (char**)&wbufPtr, &outsz);
 938
 939                 if (ICONV_FAILED(res, insz))
 940                 {
 941                     ms_wcCharsetName = NULL;
 942                     wxLogLastError(wxT("iconv"));
 943                     wxLogError(_("Conversion to charset '%s' doesn't work."), name);
 944                 }
 945                 else
 946                 {
 947                     ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
 948                 }
 949             }
 950             else
 951             {
 952                 ms_wcCharsetName = NULL;
 953
 954                 // VS: we must not output an error here, since wxWindows will safely
 955                 //     fall back to using wxEncodingConverter.
 956                 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
 957                 //wxLogError(
 958             }
 959         }
 960         wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName, ms_wcNeedsSwap);
 961     }
 962     else // we already have ms_wcCharsetName
 963     {
 964         m2w = iconv_open(ms_wcCharsetName, cname);
 965     }
 966
 967     // NB: don't ever pass NULL to iconv_open(), it may crash!
 968     if ( ms_wcCharsetName )
 969     {
 970         w2m = iconv_open( cname, ms_wcCharsetName);
 971     }
 972     else
 973     {
 974         w2m = (iconv_t)-1;
 975     }
 976 }
 977
 978 wxMBConv_iconv::~wxMBConv_iconv()
 979 {
 980     if ( m2w != (iconv_t)-1 )
 981         iconv_close(m2w);
 982     if ( w2m != (iconv_t)-1 )
 983         iconv_close(w2m);
 984 }
 985
 986 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
 987 {
 988     size_t inbuf = strlen(psz);
 989     size_t outbuf = n * SIZEOF_WCHAR_T;
 990     size_t res, cres;
 991     // VS: Use these instead of psz, buf because iconv() modifies its arguments:
 992     wchar_t *bufPtr = buf;
 993     const char *pszPtr = psz;
 994
 995     if (buf)
 996     {
 997         // have destination buffer, convert there
 998         cres = iconv(m2w,
 999                      ICONV_CHAR_CAST(&pszPtr), &inbuf,
1000                      (char**)&bufPtr, &outbuf);
1001         res = n - (outbuf / SIZEOF_WCHAR_T);
1002
1003         if (ms_wcNeedsSwap)
1004         {
1005             // convert to native endianness
1006             WC_BSWAP(buf /* _not_ bufPtr */, res)
1007         }
1008
1009         // NB: iconv was given only strlen(psz) characters on input, and so
1010         //     it couldn't convert the trailing zero. Let's do it ourselves
1011         //     if there's some room left for it in the output buffer.
1012         if (res < n)
1013             buf[res] = 0;
1014     }
1015     else
1016     {
1017         // no destination buffer... convert using temp buffer
1018         // to calculate destination buffer requirement
1019         wchar_t tbuf[8];
1020         res = 0;
1021         do {
1022             bufPtr = tbuf;
1023             outbuf = 8*SIZEOF_WCHAR_T;
1024
1025             cres = iconv(m2w,
1026                          ICONV_CHAR_CAST(&pszPtr), &inbuf,
1027                          (char**)&bufPtr, &outbuf );
1028
1029             res += 8-(outbuf/SIZEOF_WCHAR_T);
1030         } while ((cres==(size_t)-1) && (errno==E2BIG));
1031     }
1032
1033     if (ICONV_FAILED(cres, inbuf))
1034     {
1035         //VS: it is ok if iconv fails, hence trace only
1036         wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1037         return (size_t)-1;
1038     }
1039
1040     return res;
1041 }
1042
1043 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1044 {
1045     size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
1046     size_t outbuf = n;
1047     size_t res, cres;
1048
1049     wchar_t *tmpbuf = 0;
1050
1051     if (ms_wcNeedsSwap)
1052     {
1053         // need to copy to temp buffer to switch endianness
1054         // this absolutely doesn't rock!
1055         // (no, doing WC_BSWAP twice on the original buffer won't help, as it
1056         //  could be in read-only memory, or be accessed in some other thread)
1057         tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
1058         memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
1059         WC_BSWAP(tmpbuf, inbuf)
1060         psz=tmpbuf;
1061     }
1062
1063     if (buf)
1064     {
1065         // have destination buffer, convert there
1066         cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1067
1068         res = n-outbuf;
1069
1070         // NB: iconv was given only wcslen(psz) characters on input, and so
1071         //     it couldn't convert the trailing zero. Let's do it ourselves
1072         //     if there's some room left for it in the output buffer.
1073         if (res < n)
1074             buf[0] = 0;
1075     }
1076     else
1077     {
1078         // no destination buffer... convert using temp buffer
1079         // to calculate destination buffer requirement
1080         char tbuf[16];
1081         res = 0;
1082         do {
1083             buf = tbuf; outbuf = 16;
1084
1085             cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1086
1087             res += 16 - outbuf;
1088         } while ((cres==(size_t)-1) && (errno==E2BIG));
1089     }
1090
1091     if (ms_wcNeedsSwap)
1092     {
1093         free(tmpbuf);
1094     }
1095
1096     if (ICONV_FAILED(cres, inbuf))
1097     {
1098         //VS: it is ok if iconv fails, hence trace only
1099         wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1100         return (size_t)-1;
1101     }
1102
1103     return res;
1104 }
1105
1106 #endif // HAVE_ICONV
1107
1108
1109 // ============================================================================
1110 // Win32 conversion classes
1111 // ============================================================================
1112
1113 #ifdef wxHAVE_WIN32_MB2WC
1114
1115 // from utils.cpp
1116 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1117 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1118
1119 class wxMBConv_win32 : public wxMBConv
1120 {
1121 public:
1122     wxMBConv_win32()
1123     {
1124         m_CodePage = CP_ACP;
1125     }
1126
1127     wxMBConv_win32(const wxChar* name)
1128     {
1129         m_CodePage = wxCharsetToCodepage(name);
1130     }
1131
1132     wxMBConv_win32(wxFontEncoding encoding)
1133     {
1134         m_CodePage = wxEncodingToCodepage(encoding);
1135     }
1136
1137     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1138     {
1139         const size_t len = ::MultiByteToWideChar
1140                              (
1141                                 m_CodePage,     // code page
1142                                 0,              // flags (none)
1143                                 psz,            // input string
1144                                 -1,             // its length (NUL-terminated)
1145                                 buf,            // output string
1146                                 buf ? n : 0     // size of output buffer
1147                              );
1148
1149         // note that it returns count of written chars for buf != NULL and size
1150         // of the needed buffer for buf == NULL so in either case the length of
1151         // the string (which never includes the terminating NUL) is one less
1152         return len ? len - 1 : (size_t)-1;
1153     }
1154
1155     size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
1156     {
1157         /*
1158             we have a problem here: by default, WideCharToMultiByte() may
1159             replace characters unrepresentable in the target code page with bad
1160             quality approximations such as turning "1/2" symbol (U+00BD) into
1161             "1" for the code pages which don't have it and we, obviously, want
1162             to avoid this at any price
1163
1164             the trouble is that this function does it _silently_, i.e. it won't
1165             even tell us whether it did or not... Win98/2000 and higher provide
1166             WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
1167             we have to resort to a round trip, i.e. check that converting back
1168             results in the same string -- this is, of course, expensive but
1169             otherwise we simply can't be sure to not garble the data.
1170          */
1171
1172         // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
1173         // it doesn't work with CJK encodings (which we test for rather roughly
1174         // here...) nor with UTF-7/8 nor, of course, with Windows versions not
1175         // supporting it
1176         BOOL usedDef wxDUMMY_INITIALIZE(false),
1177              *pUsedDef;
1178         int flags;
1179         if ( CanUseNoBestFit() && m_CodePage < 50000 )
1180         {
1181             // it's our lucky day
1182             flags = WC_NO_BEST_FIT_CHARS;
1183             pUsedDef = &usedDef;
1184         }
1185         else // old system or unsupported encoding
1186         {
1187             flags = 0;
1188             pUsedDef = NULL;
1189         }
1190
1191         const size_t len = ::WideCharToMultiByte
1192                              (
1193                                 m_CodePage,     // code page
1194                                 flags,          // either none or no best fit
1195                                 pwz,            // input string
1196                                 -1,             // it is (wide) NUL-terminated
1197                                 buf,            // output buffer
1198                                 buf ? n : 0,    // and its size
1199                                 NULL,           // default "replacement" char
1200                                 pUsedDef        // [out] was it used?
1201                              );
1202
1203         if ( !len )
1204         {
1205             // function totally failed
1206             return (size_t)-1;
1207         }
1208
1209         // if we were really converting, check if we succeeded
1210         if ( buf )
1211         {
1212             if ( flags )
1213             {
1214                 // check if the conversion failed, i.e. if any replacements
1215                 // were done
1216                 if ( usedDef )
1217                     return (size_t)-1;
1218             }
1219             else // we must resort to double tripping...
1220             {
1221                 wxWCharBuffer wcBuf(n);
1222                 if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
1223                         wcscmp(wcBuf, pwz) != 0 )
1224                 {
1225                     // we didn't obtain the same thing we started from, hence
1226                     // the conversion was lossy and we consider that it failed
1227                     return (size_t)-1;
1228                 }
1229             }
1230         }
1231
1232         // see the comment above for the reason of "len - 1"
1233         return len - 1;
1234     }
1235
1236     bool IsOk() const { return m_CodePage != -1; }
1237
1238 private:
1239     static bool CanUseNoBestFit()
1240     {
1241         static int s_isWin98Or2k = -1;
1242
1243         if ( s_isWin98Or2k == -1 )
1244         {
1245             int verMaj, verMin;
1246             switch ( wxGetOsVersion(&verMaj, &verMin) )
1247             {
1248                 case wxWIN95:
1249                     s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
1250                     break;
1251
1252                 case wxWINDOWS_NT:
1253                     s_isWin98Or2k = verMaj >= 5;
1254                     break;
1255
1256                 default:
1257                     // unknown, be conseravtive by default
1258                     s_isWin98Or2k = 0;
1259             }
1260
1261             wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
1262         }
1263
1264         return s_isWin98Or2k == 1;
1265     }
1266
1267     long m_CodePage;
1268 };
1269
1270 #endif // wxHAVE_WIN32_MB2WC
1271
1272 // ============================================================================
1273 // Mac conversion classes
1274 // ============================================================================
1275
1276 #if defined(__WXMAC__) && defined(TARGET_CARBON)
1277
1278 class wxMBConv_mac : public wxMBConv
1279 {
1280 public:
1281     wxMBConv_mac()
1282     {
1283         Init(CFStringGetSystemEncoding()) ;
1284     }
1285
1286     wxMBConv_mac(const wxChar* name)
1287     {
1288         Init( wxMacGetSystemEncFromFontEnc(wxFontMapper::Get()->CharsetToEncoding(name, FALSE) ) ) ;
1289     }
1290
1291     wxMBConv_mac(wxFontEncoding encoding)
1292     {
1293         Init( wxMacGetSystemEncFromFontEnc(encoding) );
1294     }
1295
1296         ~wxMBConv_mac()
1297         {
1298             OSStatus status = noErr ;
1299             status = TECDisposeConverter(m_MB2WC_converter);
1300             status = TECDisposeConverter(m_WC2MB_converter);
1301         }
1302
1303
1304         void Init( TextEncodingBase encoding)
1305         {
1306             OSStatus status = noErr ;
1307                 m_char_encoding = encoding ;
1308 #if SIZEOF_WCHAR_T == 4
1309                 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode32BitFormat) ;
1310 #else
1311                 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
1312 #endif
1313             status = TECCreateConverter(&m_MB2WC_converter,
1314                                         m_char_encoding,
1315                                         m_unicode_encoding);
1316             status = TECCreateConverter(&m_WC2MB_converter,
1317                                         m_unicode_encoding,
1318                                         m_char_encoding);
1319         }
1320
1321     size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
1322     {
1323             OSStatus status = noErr ;
1324             ByteCount byteOutLen ;
1325             ByteCount byteInLen = strlen(psz) ;
1326                 wchar_t *tbuf = NULL ;
1327
1328                 if (buf == NULL)
1329                 {
1330                         n = byteInLen ;
1331                         tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T) ;
1332                 }
1333
1334             ByteCount byteBufferLen = n * SIZEOF_WCHAR_T ;
1335             status = TECConvertText(m_MB2WC_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
1336               (TextPtr) (buf ? buf : tbuf) , byteBufferLen, &byteOutLen);
1337
1338                 if ( buf == NULL )
1339                         free(tbuf) ;
1340
1341                 size_t res = byteOutLen / SIZEOF_WCHAR_T ;
1342         if ( buf  && res < n)
1343             buf[res] = 0;
1344
1345                 return res ;
1346     }
1347
1348     size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
1349     {
1350             OSStatus status = noErr ;
1351             ByteCount byteOutLen ;
1352             ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
1353
1354                 char *tbuf = NULL ;
1355
1356                 if (buf == NULL)
1357                 {
1358                         // worst case
1359                         n = byteInLen * 2 ;
1360                         tbuf = (char*) malloc( n ) ;
1361                 }
1362
1363             ByteCount byteBufferLen = n ;
1364             status = TECConvertText(m_WC2MB_converter, (ConstTextPtr) psz , byteInLen, &byteInLen,
1365                (TextPtr) ( buf ? buf : tbuf ) , byteBufferLen, &byteOutLen);
1366
1367                 if ( buf == NULL )
1368                         free(tbuf) ;
1369
1370                 size_t res = byteOutLen ;
1371         if ( buf  && res < n)
1372             buf[res] = 0;
1373
1374                 return res ;
1375     }
1376
1377     bool IsOk() const
1378         { return m_MB2WC_converter !=  NULL && m_WC2MB_converter != NULL  ; }
1379
1380 private:
1381         TECObjectRef m_MB2WC_converter ;
1382         TECObjectRef m_WC2MB_converter ;
1383
1384         TextEncodingBase m_char_encoding ;
1385         TextEncodingBase m_unicode_encoding ;
1386 };
1387
1388 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
1389
1390 // ============================================================================
1391 // wxEncodingConverter based conversion classes
1392 // ============================================================================
1393
1394 #if wxUSE_FONTMAP
1395
1396 class wxMBConv_wxwin : public wxMBConv
1397 {
1398 private:
1399     void Init()
1400     {
1401         m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
1402                w2m.Init(wxFONTENCODING_UNICODE, m_enc);
1403     }
1404
1405 public:
1406     // temporarily just use wxEncodingConverter stuff,
1407     // so that it works while a better implementation is built
1408     wxMBConv_wxwin(const wxChar* name)
1409     {
1410         if (name)
1411             m_enc = wxFontMapper::Get()->CharsetToEncoding(name, false);
1412         else
1413             m_enc = wxFONTENCODING_SYSTEM;
1414
1415         Init();
1416     }
1417
1418     wxMBConv_wxwin(wxFontEncoding enc)
1419     {
1420         m_enc = enc;
1421
1422         Init();
1423     }
1424
1425     size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
1426     {
1427         size_t inbuf = strlen(psz);
1428         if (buf)
1429             m2w.Convert(psz,buf);
1430         return inbuf;
1431     }
1432
1433     size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
1434     {
1435         const size_t inbuf = wxWcslen(psz);
1436         if (buf)
1437             w2m.Convert(psz,buf);
1438
1439         return inbuf;
1440     }
1441
1442     bool IsOk() const { return m_ok; }
1443
1444 public:
1445     wxFontEncoding m_enc;
1446     wxEncodingConverter m2w, w2m;
1447
1448     // were we initialized successfully?
1449     bool m_ok;
1450
1451     DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
1452 };
1453
1454 #endif // wxUSE_FONTMAP
1455
1456 // ============================================================================
1457 // wxCSConv implementation
1458 // ============================================================================
1459
1460 void wxCSConv::Init()
1461 {
1462     m_name = NULL;
1463     m_convReal =  NULL;
1464     m_deferred = true;
1465 }
1466
1467 wxCSConv::wxCSConv(const wxChar *charset)
1468 {
1469     Init();
1470
1471     if ( charset )
1472     {
1473         SetName(charset);
1474     }
1475
1476     m_encoding = wxFONTENCODING_SYSTEM;
1477 }
1478
1479 wxCSConv::wxCSConv(wxFontEncoding encoding)
1480 {
1481     if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
1482     {
1483         wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
1484
1485         encoding = wxFONTENCODING_SYSTEM;
1486     }
1487
1488     Init();
1489
1490     m_encoding = encoding;
1491 }
1492
1493 wxCSConv::~wxCSConv()
1494 {
1495     Clear();
1496 }
1497
1498 wxCSConv::wxCSConv(const wxCSConv& conv)
1499         : wxMBConv()
1500 {
1501     Init();
1502
1503     SetName(conv.m_name);
1504     m_encoding = conv.m_encoding;
1505 }
1506
1507 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
1508 {
1509     Clear();
1510
1511     SetName(conv.m_name);
1512     m_encoding = conv.m_encoding;
1513
1514     return *this;
1515 }
1516
1517 void wxCSConv::Clear()
1518 {
1519     free(m_name);
1520     delete m_convReal;
1521
1522     m_name = NULL;
1523     m_convReal = NULL;
1524 }
1525
1526 void wxCSConv::SetName(const wxChar *charset)
1527 {
1528     if (charset)
1529     {
1530         m_name = wxStrdup(charset);
1531         m_deferred = true;
1532     }
1533 }
1534
1535 wxMBConv *wxCSConv::DoCreate() const
1536 {
1537     // check for the special case of ASCII or ISO8859-1 charset: as we have
1538     // special knowledge of it anyhow, we don't need to create a special
1539     // conversion object
1540     if ( m_encoding == wxFONTENCODING_ISO8859_1 )
1541     {
1542         // don't convert at all
1543         return NULL;
1544     }
1545
1546     // we trust OS to do conversion better than we can so try external
1547     // conversion methods first
1548     //
1549     // the full order is:
1550     //      1. OS conversion (iconv() under Unix or Win32 API)
1551     //      2. hard coded conversions for UTF
1552     //      3. wxEncodingConverter as fall back
1553
1554     // step (1)
1555 #ifdef HAVE_ICONV
1556 #if !wxUSE_FONTMAP
1557     if ( m_name )
1558 #endif // !wxUSE_FONTMAP
1559     {
1560         wxString name(m_name);
1561
1562 #if wxUSE_FONTMAP
1563         if ( name.empty() )
1564             name = wxFontMapper::Get()->GetEncodingName(m_encoding);
1565 #endif // wxUSE_FONTMAP
1566
1567         wxMBConv_iconv *conv = new wxMBConv_iconv(name);
1568         if ( conv->IsOk() )
1569             return conv;
1570
1571         delete conv;
1572     }
1573 #endif // HAVE_ICONV
1574
1575 #ifdef wxHAVE_WIN32_MB2WC
1576     {
1577         wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
1578                                       : new wxMBConv_win32(m_encoding);
1579         if ( conv->IsOk() )
1580             return conv;
1581
1582         delete conv;
1583     }
1584 #endif // wxHAVE_WIN32_MB2WC
1585 #if defined(__WXMAC__)
1586     {
1587         if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ) )
1588         {
1589
1590                 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
1591                                             : new wxMBConv_mac(m_encoding);
1592                 if ( conv->IsOk() )
1593                     return conv;
1594
1595                 delete conv;
1596         }
1597     }
1598 #endif
1599     // step (2)
1600     wxFontEncoding enc = m_encoding;
1601 #if wxUSE_FONTMAP
1602     if ( enc == wxFONTENCODING_SYSTEM && m_name )
1603     {
1604         // use "false" to suppress interactive dialogs -- we can be called from
1605         // anywhere and popping up a dialog from here is the last thing we want to
1606         // do
1607         enc = wxFontMapper::Get()->CharsetToEncoding(m_name, false);
1608     }
1609 #endif // wxUSE_FONTMAP
1610
1611     switch ( enc )
1612     {
1613         case wxFONTENCODING_UTF7:
1614              return new wxMBConvUTF7;
1615
1616         case wxFONTENCODING_UTF8:
1617              return new wxMBConvUTF8;
1618
1619         case wxFONTENCODING_UTF16BE:
1620              return new wxMBConvUTF16BE;
1621
1622         case wxFONTENCODING_UTF16LE:
1623              return new wxMBConvUTF16LE;
1624
1625         case wxFONTENCODING_UTF32BE:
1626              return new wxMBConvUTF32BE;
1627
1628         case wxFONTENCODING_UTF32LE:
1629              return new wxMBConvUTF32LE;
1630
1631         default:
1632              // nothing to do but put here to suppress gcc warnings
1633              ;
1634     }
1635
1636     // step (3)
1637 #if wxUSE_FONTMAP
1638     {
1639         wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
1640                                       : new wxMBConv_wxwin(m_encoding);
1641         if ( conv->IsOk() )
1642             return conv;
1643
1644         delete conv;
1645     }
1646 #endif // wxUSE_FONTMAP
1647
1648     // NB: This is a hack to prevent deadlock. What could otherwise happen
1649     //     in Unicode build: wxConvLocal creation ends up being here
1650     //     because of some failure and logs the error. But wxLog will try to
1651     //     attach timestamp, for which it will need wxConvLocal (to convert
1652     //     time to char* and then wchar_t*), but that fails, tries to log
1653     //     error, but wxLog has a (already locked) critical section that
1654     //     guards static buffer.
1655     static bool alreadyLoggingError = false;
1656     if (!alreadyLoggingError)
1657     {
1658         alreadyLoggingError = true;
1659         wxLogError(_("Cannot convert from the charset '%s'!"),
1660                    m_name ? m_name
1661                       :
1662 #if wxUSE_FONTMAP
1663                          wxFontMapper::GetEncodingDescription(m_encoding).c_str()
1664 #else // !wxUSE_FONTMAP
1665                          wxString::Format(_("encoding %s"), m_encoding).c_str()
1666 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1667               );
1668         alreadyLoggingError = false;
1669     }
1670
1671     return NULL;
1672 }
1673
1674 void wxCSConv::CreateConvIfNeeded() const
1675 {
1676     if ( m_deferred )
1677     {
1678         wxCSConv *self = (wxCSConv *)this; // const_cast
1679
1680 #if wxUSE_INTL
1681         // if we don't have neither the name nor the encoding, use the default
1682         // encoding for this system
1683         if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
1684         {
1685             self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
1686         }
1687 #endif // wxUSE_INTL
1688
1689         self->m_convReal = DoCreate();
1690         self->m_deferred = false;
1691     }
1692 }
1693
1694 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1695 {
1696     CreateConvIfNeeded();
1697
1698     if (m_convReal)
1699         return m_convReal->MB2WC(buf, psz, n);
1700
1701     // latin-1 (direct)
1702     size_t len = strlen(psz);
1703
1704     if (buf)
1705     {
1706         for (size_t c = 0; c <= len; c++)
1707             buf[c] = (unsigned char)(psz[c]);
1708     }
1709
1710     return len;
1711 }
1712
1713 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1714 {
1715     CreateConvIfNeeded();
1716
1717     if (m_convReal)
1718         return m_convReal->WC2MB(buf, psz, n);
1719
1720     // latin-1 (direct)
1721     const size_t len = wxWcslen(psz);
1722     if (buf)
1723     {
1724         for (size_t c = 0; c <= len; c++)
1725         {
1726             if (psz[c] > 0xFF)
1727                 return (size_t)-1;
1728             buf[c] = psz[c];
1729         }
1730     }
1731     else
1732     {
1733         for (size_t c = 0; c <= len; c++)
1734         {
1735             if (psz[c] > 0xFF)
1736                 return (size_t)-1;
1737         }
1738     }
1739
1740     return len;
1741 }
1742
1743 // ----------------------------------------------------------------------------
1744 // globals
1745 // ----------------------------------------------------------------------------
1746
1747 #ifdef __WINDOWS__
1748     static wxMBConv_win32 wxConvLibcObj;
1749 #else
1750     static wxMBConvLibc wxConvLibcObj;
1751 #endif
1752
1753 static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
1754 static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
1755 static wxMBConvUTF7 wxConvUTF7Obj;
1756 static wxMBConvUTF8 wxConvUTF8Obj;
1757
1758
1759 WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
1760 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
1761 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
1762 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
1763 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
1764 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
1765
1766 #else // !wxUSE_WCHAR_T
1767
1768 // stand-ins in absence of wchar_t
1769 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
1770                                 wxConvISO8859_1,
1771                                 wxConvLocal,
1772                                 wxConvUTF8;
1773
1774 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
1775
1776