1 ///////////////////////////////////////////////////////////////////////////// 
   3 // Purpose:     Unicode conversion classes 
   4 // Author:      Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik 
   8 // Copyright:   (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik 
   9 //              (c) 2000-2003 Vadim Zeitlin 
  10 // Licence:     wxWindows licence 
  11 ///////////////////////////////////////////////////////////////////////////// 
  13 // ============================================================================ 
  15 // ============================================================================ 
  17 // ---------------------------------------------------------------------------- 
  19 // ---------------------------------------------------------------------------- 
  21 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA) 
  22   #pragma implementation "strconv.h" 
  25 // For compilers that support precompilation, includes "wx.h". 
  26 #include "wx/wxprec.h" 
  37 #include "wx/strconv.h" 
  42     #include "wx/msw/private.h" 
  53 #if defined(__WIN32__) && !defined(__WXMICROWIN__) 
  54     #define wxHAVE_WIN32_MB2WC 
  55 #endif // __WIN32__ but !__WXMICROWIN__ 
  57 // ---------------------------------------------------------------------------- 
  59 // ---------------------------------------------------------------------------- 
  69 #include "wx/encconv.h" 
  70 #include "wx/fontmap.h" 
  72 // ---------------------------------------------------------------------------- 
  74 // ---------------------------------------------------------------------------- 
  76 #define BSWAP_UCS4(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT32_SWAP_ALWAYS(str[_c]); } 
  77 #define BSWAP_UTF16(str, len) { unsigned _c; for (_c=0; _c<len; _c++) str[_c]=wxUINT16_SWAP_ALWAYS(str[_c]); } 
  79 #if SIZEOF_WCHAR_T == 4 
  80     #define WC_NAME         "UCS4" 
  81     #define WC_BSWAP         BSWAP_UCS4 
  82     #ifdef WORDS_BIGENDIAN 
  83       #define WC_NAME_BEST  "UCS-4BE" 
  85       #define WC_NAME_BEST  "UCS-4LE" 
  87 #elif SIZEOF_WCHAR_T == 2 
  88     #define WC_NAME         "UTF16" 
  89     #define WC_BSWAP         BSWAP_UTF16 
  91     #ifdef WORDS_BIGENDIAN 
  92       #define WC_NAME_BEST  "UTF-16BE" 
  94       #define WC_NAME_BEST  "UTF-16LE" 
  96 #else // sizeof(wchar_t) != 2 nor 4 
  97     // does this ever happen? 
  98     #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org" 
 101 // ============================================================================ 
 103 // ============================================================================ 
 105 // ---------------------------------------------------------------------------- 
 106 // UTF-16 en/decoding to/from UCS-4 
 107 // ---------------------------------------------------------------------------- 
 110 static size_t encode_utf16(wxUint32 input
, wxUint16 
*output
) 
 115             *output 
= (wxUint16
) input
; 
 118     else if (input
>=0x110000) 
 126             *output
++ = (wxUint16
) ((input 
>> 10)+0xd7c0); 
 127             *output 
= (wxUint16
) ((input
&0x3ff)+0xdc00); 
 133 static size_t decode_utf16(const wxUint16
* input
, wxUint32
& output
) 
 135     if ((*input
<0xd800) || (*input
>0xdfff)) 
 140     else if ((input
[1]<0xdc00) || (input
[1]>=0xdfff)) 
 147         output 
= ((input
[0] - 0xd7c0) << 10) + (input
[1] - 0xdc00); 
 153 // ---------------------------------------------------------------------------- 
 155 // ---------------------------------------------------------------------------- 
 157 wxMBConv::~wxMBConv() 
 159     // nothing to do here 
 162 const wxWCharBuffer 
wxMBConv::cMB2WC(const char *psz
) const 
 166         // calculate the length of the buffer needed first 
 167         size_t nLen 
= MB2WC(NULL
, psz
, 0); 
 168         if ( nLen 
!= (size_t)-1 ) 
 170             // now do the actual conversion 
 171             wxWCharBuffer 
buf(nLen
); 
 172             MB2WC(buf
.data(), psz
, nLen 
+ 1); // with the trailing NUL 
 178     wxWCharBuffer 
buf((wchar_t *)NULL
); 
 183 const wxCharBuffer 
wxMBConv::cWC2MB(const wchar_t *pwz
) const 
 187         size_t nLen 
= WC2MB(NULL
, pwz
, 0); 
 188         if ( nLen 
!= (size_t)-1 ) 
 190             wxCharBuffer 
buf(nLen
+3);       // space for a wxUint32 trailing zero 
 191             WC2MB(buf
.data(), pwz
, nLen 
+ 4); 
 197     wxCharBuffer 
buf((char *)NULL
); 
 202 // ---------------------------------------------------------------------------- 
 204 // ---------------------------------------------------------------------------- 
 206 size_t wxMBConvLibc::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
 208     return wxMB2WC(buf
, psz
, n
); 
 211 size_t wxMBConvLibc::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
 213     return wxWC2MB(buf
, psz
, n
); 
 216 // ---------------------------------------------------------------------------- 
 218 // ---------------------------------------------------------------------------- 
 221 static char utf7_setD
[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ" 
 222                         "abcdefghijklmnopqrstuvwxyz" 
 223                         "0123456789'(),-./:?"; 
 224 static char utf7_setO
[]="!\"#$%&*;<=>@[]^_`{|}"; 
 225 static char utf7_setB
[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ" 
 226                         "abcdefghijklmnopqrstuvwxyz" 
 230 // TODO: write actual implementations of UTF-7 here 
 231 size_t wxMBConvUTF7::MB2WC(wchar_t * WXUNUSED(buf
), 
 232                            const char * WXUNUSED(psz
), 
 233                            size_t WXUNUSED(n
)) const 
 238 size_t wxMBConvUTF7::WC2MB(char * WXUNUSED(buf
), 
 239                            const wchar_t * WXUNUSED(psz
), 
 240                            size_t WXUNUSED(n
)) const 
 245 // ---------------------------------------------------------------------------- 
 247 // ---------------------------------------------------------------------------- 
 249 static wxUint32 utf8_max
[]= 
 250     { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff }; 
 252 size_t wxMBConvUTF8::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
 256     while (*psz 
&& ((!buf
) || (len 
< n
))) 
 258         unsigned char cc 
= *psz
++, fc 
= cc
; 
 260         for (cnt 
= 0; fc 
& 0x80; cnt
++) 
 274                 // invalid UTF-8 sequence 
 279                 unsigned ocnt 
= cnt 
- 1; 
 280                 wxUint32 res 
= cc 
& (0x3f >> cnt
); 
 284                     if ((cc 
& 0xC0) != 0x80) 
 286                         // invalid UTF-8 sequence 
 289                     res 
= (res 
<< 6) | (cc 
& 0x3f); 
 291                 if (res 
<= utf8_max
[ocnt
]) 
 293                     // illegal UTF-8 encoding 
 297                 // cast is ok because wchar_t == wxUuint16 if WC_UTF16 
 298                 size_t pa 
= encode_utf16(res
, (wxUint16 
*)buf
); 
 299                 if (pa 
== (size_t)-1) 
 308 #endif // WC_UTF16/!WC_UTF16 
 312     if (buf 
&& (len 
< n
)) 
 317 size_t wxMBConvUTF8::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
 321     while (*psz 
&& ((!buf
) || (len 
< n
))) 
 325         // cast is ok for WC_UTF16 
 326         size_t pa 
= decode_utf16((const wxUint16 
*)psz
, cc
); 
 327         psz 
+= (pa 
== (size_t)-1) ? 1 : pa
; 
 329         cc
=(*psz
++) & 0x7fffffff; 
 332         for (cnt 
= 0; cc 
> utf8_max
[cnt
]; cnt
++) {} 
 346                 *buf
++ = (char) ((-128 >> cnt
) | ((cc 
>> (cnt 
* 6)) & (0x3f >> cnt
))); 
 348                     *buf
++ = (char) (0x80 | ((cc 
>> (cnt 
* 6)) & 0x3f)); 
 353     if (buf 
&& (len
<n
)) *buf 
= 0; 
 361 // ---------------------------------------------------------------------------- 
 363 // ---------------------------------------------------------------------------- 
 365 #ifdef WORDS_BIGENDIAN 
 366     #define wxMBConvUTF16straight wxMBConvUTF16BE 
 367     #define wxMBConvUTF16swap     wxMBConvUTF16LE 
 369     #define wxMBConvUTF16swap     wxMBConvUTF16BE 
 370     #define wxMBConvUTF16straight wxMBConvUTF16LE 
 376 // copy 16bit MB to 16bit String 
 377 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
 381     while (*(wxUint16
*)psz 
&& (!buf 
|| len 
< n
)) 
 384             *buf
++ = *(wxUint16
*)psz
; 
 387         psz 
+= sizeof(wxUint16
); 
 389     if (buf 
&& len
<n
)   *buf
=0; 
 395 // copy 16bit String to 16bit MB 
 396 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
 400     while (*psz 
&& (!buf 
|| len 
< n
)) 
 404             *(wxUint16
*)buf 
= *psz
; 
 405             buf 
+= sizeof(wxUint16
); 
 407         len 
+= sizeof(wxUint16
); 
 410     if (buf 
&& len
<=n
-sizeof(wxUint16
))   *(wxUint16
*)buf
=0; 
 416 // swap 16bit MB to 16bit String 
 417 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
 421     while (*(wxUint16
*)psz 
&& (!buf 
|| len 
< n
)) 
 425             ((char *)buf
)[0] = psz
[1]; 
 426             ((char *)buf
)[1] = psz
[0]; 
 430         psz 
+= sizeof(wxUint16
); 
 432     if (buf 
&& len
<n
)   *buf
=0; 
 438 // swap 16bit MB to 16bit String 
 439 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
 443     while (*psz 
&& (!buf 
|| len 
< n
)) 
 447             *buf
++ = ((char*)psz
)[1]; 
 448             *buf
++ = ((char*)psz
)[0]; 
 450         len 
+= sizeof(wxUint16
); 
 453     if (buf 
&& len
<=n
-sizeof(wxUint16
))   *(wxUint16
*)buf
=0; 
 462 // copy 16bit MB to 32bit String 
 463 size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
 467     while (*(wxUint16
*)psz 
&& (!buf 
|| len 
< n
)) 
 470         size_t pa
=decode_utf16((wxUint16
*)psz
, cc
); 
 471         if (pa 
== (size_t)-1) 
 477         psz 
+= pa 
* sizeof(wxUint16
); 
 479     if (buf 
&& len
<n
)   *buf
=0; 
 485 // copy 32bit String to 16bit MB 
 486 size_t wxMBConvUTF16straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
 490     while (*psz 
&& (!buf 
|| len 
< n
)) 
 493         size_t pa
=encode_utf16(*psz
, cc
); 
 495         if (pa 
== (size_t)-1) 
 500             *(wxUint16
*)buf 
= cc
[0]; 
 501             buf 
+= sizeof(wxUint16
); 
 504                 *(wxUint16
*)buf 
= cc
[1]; 
 505                 buf 
+= sizeof(wxUint16
); 
 509         len 
+= pa
*sizeof(wxUint16
); 
 512     if (buf 
&& len
<=n
-sizeof(wxUint16
))   *(wxUint16
*)buf
=0; 
 518 // swap 16bit MB to 32bit String 
 519 size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
 523     while (*(wxUint16
*)psz 
&& (!buf 
|| len 
< n
)) 
 527         tmp
[0]=psz
[1];  tmp
[1]=psz
[0]; 
 528         tmp
[2]=psz
[3];  tmp
[3]=psz
[2]; 
 530         size_t pa
=decode_utf16((wxUint16
*)tmp
, cc
); 
 531         if (pa 
== (size_t)-1) 
 538         psz 
+= pa 
* sizeof(wxUint16
); 
 540     if (buf 
&& len
<n
)   *buf
=0; 
 546 // swap 32bit String to 16bit MB 
 547 size_t wxMBConvUTF16swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
 551     while (*psz 
&& (!buf 
|| len 
< n
)) 
 554         size_t pa
=encode_utf16(*psz
, cc
); 
 556         if (pa 
== (size_t)-1) 
 561             *buf
++ = ((char*)cc
)[1]; 
 562             *buf
++ = ((char*)cc
)[0]; 
 565                 *buf
++ = ((char*)cc
)[3]; 
 566                 *buf
++ = ((char*)cc
)[2]; 
 570         len 
+= pa
*sizeof(wxUint16
); 
 573     if (buf 
&& len
<=n
-sizeof(wxUint16
))   *(wxUint16
*)buf
=0; 
 581 // ---------------------------------------------------------------------------- 
 583 // ---------------------------------------------------------------------------- 
 585 #ifdef WORDS_BIGENDIAN 
 586 #define wxMBConvUTF32straight  wxMBConvUTF32BE 
 587 #define wxMBConvUTF32swap      wxMBConvUTF32LE 
 589 #define wxMBConvUTF32swap      wxMBConvUTF32BE 
 590 #define wxMBConvUTF32straight  wxMBConvUTF32LE 
 594 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE
) wxConvUTF32LE
; 
 595 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE
) wxConvUTF32BE
; 
 600 // copy 32bit MB to 16bit String 
 601 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
 605     while (*(wxUint32
*)psz 
&& (!buf 
|| len 
< n
)) 
 609         size_t pa
=encode_utf16(*(wxUint32
*)psz
, cc
); 
 610         if (pa 
== (size_t)-1) 
 620         psz 
+= sizeof(wxUint32
); 
 622     if (buf 
&& len
<n
)   *buf
=0; 
 628 // copy 16bit String to 32bit MB 
 629 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
 633     while (*psz 
&& (!buf 
|| len 
< n
)) 
 637         // cast is ok for WC_UTF16 
 638         size_t pa 
= decode_utf16((const wxUint16 
*)psz
, cc
); 
 639         if (pa 
== (size_t)-1) 
 644             *(wxUint32
*)buf 
= cc
; 
 645             buf 
+= sizeof(wxUint32
); 
 647         len 
+= sizeof(wxUint32
); 
 651     if (buf 
&& len
<=n
-sizeof(wxUint32
)) 
 659 // swap 32bit MB to 16bit String 
 660 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
 664     while (*(wxUint32
*)psz 
&& (!buf 
|| len 
< n
)) 
 667         tmp
[0] = psz
[3];   tmp
[1] = psz
[2]; 
 668         tmp
[2] = psz
[1];   tmp
[3] = psz
[0]; 
 673         size_t pa
=encode_utf16(*(wxUint32
*)tmp
, cc
); 
 674         if (pa 
== (size_t)-1) 
 684         psz 
+= sizeof(wxUint32
); 
 694 // swap 16bit String to 32bit MB 
 695 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
 699     while (*psz 
&& (!buf 
|| len 
< n
)) 
 703         // cast is ok for WC_UTF16 
 704         size_t pa
=decode_utf16((const wxUint16 
*)psz
, *(wxUint32
*)cc
); 
 705         if (pa 
== (size_t)-1) 
 715         len 
+= sizeof(wxUint32
); 
 719     if (buf 
&& len
<=n
-sizeof(wxUint32
)) 
 728 // copy 32bit MB to 32bit String 
 729 size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
 733     while (*(wxUint32
*)psz 
&& (!buf 
|| len 
< n
)) 
 736             *buf
++ = *(wxUint32
*)psz
; 
 738         psz 
+= sizeof(wxUint32
); 
 748 // copy 32bit String to 32bit MB 
 749 size_t wxMBConvUTF32straight::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
 753     while (*psz 
&& (!buf 
|| len 
< n
)) 
 757             *(wxUint32
*)buf 
= *psz
; 
 758             buf 
+= sizeof(wxUint32
); 
 761         len 
+= sizeof(wxUint32
); 
 765     if (buf 
&& len
<=n
-sizeof(wxUint32
)) 
 772 // swap 32bit MB to 32bit String 
 773 size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
 777     while (*(wxUint32
*)psz 
&& (!buf 
|| len 
< n
)) 
 781             ((char *)buf
)[0] = psz
[3]; 
 782             ((char *)buf
)[1] = psz
[2]; 
 783             ((char *)buf
)[2] = psz
[1]; 
 784             ((char *)buf
)[3] = psz
[0]; 
 788         psz 
+= sizeof(wxUint32
); 
 798 // swap 32bit String to 32bit MB 
 799 size_t wxMBConvUTF32swap::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
 803     while (*psz 
&& (!buf 
|| len 
< n
)) 
 807             *buf
++ = ((char *)psz
)[3]; 
 808             *buf
++ = ((char *)psz
)[2]; 
 809             *buf
++ = ((char *)psz
)[1]; 
 810             *buf
++ = ((char *)psz
)[0]; 
 812         len 
+= sizeof(wxUint32
); 
 816     if (buf 
&& len
<=n
-sizeof(wxUint32
)) 
 826 // ============================================================================ 
 827 // The classes doing conversion using the iconv_xxx() functions 
 828 // ============================================================================ 
 832 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with E2BIG 
 833 //     if output buffer is _exactly_ as big as needed. Such case is (unless there's 
 834 //     yet another bug in glibc) the only case when iconv() returns with (size_t)-1 
 835 //     (which means error) and says there are 0 bytes left in the input buffer -- 
 836 //     when _real_ error occurs, bytes-left-in-input buffer is non-zero. Hence, 
 837 //     this alternative test for iconv() failure. 
 838 //     [This bug does not appear in glibc 2.2.] 
 839 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1 
 840 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \ 
 841                                      (errno != E2BIG || bufLeft != 0)) 
 843 #define ICONV_FAILED(cres, bufLeft)  (cres == (size_t)-1) 
 846 #define ICONV_CHAR_CAST(x)  ((ICONV_CONST char **)(x)) 
 848 // ---------------------------------------------------------------------------- 
 849 // wxMBConv_iconv: encapsulates an iconv character set 
 850 // ---------------------------------------------------------------------------- 
 852 class wxMBConv_iconv 
: public wxMBConv
 
 855     wxMBConv_iconv(const wxChar 
*name
); 
 856     virtual ~wxMBConv_iconv(); 
 858     virtual size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const; 
 859     virtual size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const; 
 862         { return (m2w 
!= (iconv_t
)-1) && (w2m 
!= (iconv_t
)-1); } 
 865     // the iconv handlers used to translate from multibyte to wide char and in 
 866     // the other direction 
 871     // the name (for iconv_open()) of a wide char charset -- if none is 
 872     // available on this machine, it will remain NULL 
 873     static const char *ms_wcCharsetName
; 
 875     // true if the wide char encoding we use (i.e. ms_wcCharsetName) has 
 876     // different endian-ness than the native one 
 877     static bool ms_wcNeedsSwap
; 
 880 const char *wxMBConv_iconv::ms_wcCharsetName 
= NULL
; 
 881 bool wxMBConv_iconv::ms_wcNeedsSwap 
= false; 
 883 wxMBConv_iconv::wxMBConv_iconv(const wxChar 
*name
) 
 885     // Do it the hard way 
 887     for (size_t i 
= 0; i 
< wxStrlen(name
)+1; i
++) 
 888         cname
[i
] = (char) name
[i
]; 
 890     // check for charset that represents wchar_t: 
 891     if (ms_wcCharsetName 
== NULL
) 
 893         ms_wcNeedsSwap 
= false; 
 895         // try charset with explicit bytesex info (e.g. "UCS-4LE"): 
 896         ms_wcCharsetName 
= WC_NAME_BEST
; 
 897         m2w 
= iconv_open(ms_wcCharsetName
, cname
); 
 899         if (m2w 
== (iconv_t
)-1) 
 901             // try charset w/o bytesex info (e.g. "UCS4") 
 902             // and check for bytesex ourselves: 
 903             ms_wcCharsetName 
= WC_NAME
; 
 904             m2w 
= iconv_open(ms_wcCharsetName
, cname
); 
 906             // last bet, try if it knows WCHAR_T pseudo-charset 
 907             if (m2w 
== (iconv_t
)-1) 
 909                 ms_wcCharsetName 
= "WCHAR_T"; 
 910                 m2w 
= iconv_open(ms_wcCharsetName
, cname
); 
 913             if (m2w 
!= (iconv_t
)-1) 
 915                 char    buf
[2], *bufPtr
; 
 916                 wchar_t wbuf
[2], *wbufPtr
; 
 924                 outsz 
= SIZEOF_WCHAR_T 
* 2; 
 928                 res 
= iconv(m2w
, ICONV_CHAR_CAST(&bufPtr
), &insz
, 
 929                             (char**)&wbufPtr
, &outsz
); 
 931                 if (ICONV_FAILED(res
, insz
)) 
 933                     ms_wcCharsetName 
= NULL
; 
 934                     wxLogLastError(wxT("iconv")); 
 935                     wxLogError(_("Conversion to charset '%s' doesn't work."), name
); 
 939                     ms_wcNeedsSwap 
= wbuf
[0] != (wchar_t)buf
[0]; 
 944                 ms_wcCharsetName 
= NULL
; 
 946                 // VS: we must not output an error here, since wxWindows will safely 
 947                 //     fall back to using wxEncodingConverter. 
 948                 wxLogTrace(wxT("strconv"), wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name
); 
 952         wxLogTrace(wxT("strconv"), wxT("wchar_t charset is '%s', needs swap: %i"), ms_wcCharsetName
, ms_wcNeedsSwap
); 
 954     else // we already have ms_wcCharsetName 
 956         m2w 
= iconv_open(ms_wcCharsetName
, cname
); 
 959     // NB: don't ever pass NULL to iconv_open(), it may crash! 
 960     if ( ms_wcCharsetName 
) 
 962         w2m 
= iconv_open( cname
, ms_wcCharsetName
); 
 970 wxMBConv_iconv::~wxMBConv_iconv() 
 972     if ( m2w 
!= (iconv_t
)-1 ) 
 974     if ( w2m 
!= (iconv_t
)-1 ) 
 978 size_t wxMBConv_iconv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
 980     size_t inbuf 
= strlen(psz
); 
 981     size_t outbuf 
= n 
* SIZEOF_WCHAR_T
; 
 983     // VS: Use these instead of psz, buf because iconv() modifies its arguments: 
 984     wchar_t *bufPtr 
= buf
; 
 985     const char *pszPtr 
= psz
; 
 989         // have destination buffer, convert there 
 991                      ICONV_CHAR_CAST(&pszPtr
), &inbuf
, 
 992                      (char**)&bufPtr
, &outbuf
); 
 993         res 
= n 
- (outbuf 
/ SIZEOF_WCHAR_T
); 
 997             // convert to native endianness 
 998             WC_BSWAP(buf 
/* _not_ bufPtr */, res
) 
1001         // NB: iconv was given only strlen(psz) characters on input, and so 
1002         //     it couldn't convert the trailing zero. Let's do it ourselves 
1003         //     if there's some room left for it in the output buffer. 
1009         // no destination buffer... convert using temp buffer 
1010         // to calculate destination buffer requirement 
1015             outbuf 
= 8*SIZEOF_WCHAR_T
; 
1018                          ICONV_CHAR_CAST(&pszPtr
), &inbuf
, 
1019                          (char**)&bufPtr
, &outbuf 
); 
1021             res 
+= 8-(outbuf
/SIZEOF_WCHAR_T
); 
1022         } while ((cres
==(size_t)-1) && (errno
==E2BIG
)); 
1025     if (ICONV_FAILED(cres
, inbuf
)) 
1027         //VS: it is ok if iconv fails, hence trace only 
1028         wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode())); 
1035 size_t wxMBConv_iconv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
1037     size_t inbuf 
= wxWcslen(psz
) * SIZEOF_WCHAR_T
; 
1041     wchar_t *tmpbuf 
= 0; 
1045         // need to copy to temp buffer to switch endianness 
1046         // this absolutely doesn't rock! 
1047         // (no, doing WC_BSWAP twice on the original buffer won't help, as it 
1048         //  could be in read-only memory, or be accessed in some other thread) 
1049         tmpbuf
=(wchar_t*)malloc((inbuf
+1)*SIZEOF_WCHAR_T
); 
1050         memcpy(tmpbuf
,psz
,(inbuf
+1)*SIZEOF_WCHAR_T
); 
1051         WC_BSWAP(tmpbuf
, inbuf
) 
1057         // have destination buffer, convert there 
1058         cres 
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf 
); 
1062         // NB: iconv was given only wcslen(psz) characters on input, and so 
1063         //     it couldn't convert the trailing zero. Let's do it ourselves 
1064         //     if there's some room left for it in the output buffer. 
1070         // no destination buffer... convert using temp buffer 
1071         // to calculate destination buffer requirement 
1075             buf 
= tbuf
; outbuf 
= 16; 
1077             cres 
= iconv( w2m
, ICONV_CHAR_CAST(&psz
), &inbuf
, &buf
, &outbuf 
); 
1080         } while ((cres
==(size_t)-1) && (errno
==E2BIG
)); 
1088     if (ICONV_FAILED(cres
, inbuf
)) 
1090         //VS: it is ok if iconv fails, hence trace only 
1091         wxLogTrace(wxT("strconv"), wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode())); 
1098 #endif // HAVE_ICONV 
1101 // ============================================================================ 
1102 // Win32 conversion classes 
1103 // ============================================================================ 
1105 #ifdef wxHAVE_WIN32_MB2WC 
1108 extern WXDLLIMPEXP_BASE 
long wxCharsetToCodepage(const wxChar 
*charset
); 
1109 extern WXDLLIMPEXP_BASE 
long wxEncodingToCodepage(wxFontEncoding encoding
); 
1111 class wxMBConv_win32 
: public wxMBConv
 
1116         m_CodePage 
= CP_ACP
; 
1119     wxMBConv_win32(const wxChar
* name
) 
1121         m_CodePage 
= wxCharsetToCodepage(name
); 
1124     wxMBConv_win32(wxFontEncoding encoding
) 
1126         m_CodePage 
= wxEncodingToCodepage(encoding
); 
1129     size_t MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
1131         const size_t len 
= ::MultiByteToWideChar
 
1133                                 m_CodePage
,     // code page 
1135                                 psz
,            // input string 
1136                                 -1,             // its length (NUL-terminated) 
1137                                 buf
,            // output string 
1138                                 buf 
? n 
: 0     // size of output buffer 
1141         // note that it returns count of written chars for buf != NULL and size 
1142         // of the needed buffer for buf == NULL so in either case the length of 
1143         // the string (which never includes the terminating NUL) is one less 
1144         return len 
? len 
- 1 : (size_t)-1; 
1147     size_t WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
1149         const size_t len 
= ::WideCharToMultiByte
 
1151                                 m_CodePage
,     // code page 
1153                                 psz
,            // input string 
1154                                 -1,             // it is (wide) NUL-terminated 
1155                                 buf
,            // output buffer 
1156                                 buf 
? n 
: 0,    // and its size 
1157                                 NULL
,           // default "replacement" char 
1158                                 NULL            
// [out] was it used? 
1161         // see the comment above for the reason of "len - 1" 
1162         return len 
? len 
- 1 : (size_t)-1; 
1166         { return m_CodePage 
!= -1; } 
1172 #endif // wxHAVE_WIN32_MB2WC 
1175 // ============================================================================ 
1176 // wxEncodingConverter based conversion classes 
1177 // ============================================================================ 
1181 class wxMBConv_wxwin 
: public wxMBConv
 
1186         m_ok 
= m2w
.Init(m_enc
, wxFONTENCODING_UNICODE
) && 
1187                w2m
.Init(wxFONTENCODING_UNICODE
, m_enc
); 
1191     // temporarily just use wxEncodingConverter stuff, 
1192     // so that it works while a better implementation is built 
1193     wxMBConv_wxwin(const wxChar
* name
) 
1196             m_enc 
= wxFontMapper::Get()->CharsetToEncoding(name
, false); 
1198             m_enc 
= wxFONTENCODING_SYSTEM
; 
1203     wxMBConv_wxwin(wxFontEncoding enc
) 
1210     size_t MB2WC(wchar_t *buf
, const char *psz
, size_t WXUNUSED(n
)) const 
1212         size_t inbuf 
= strlen(psz
); 
1214             m2w
.Convert(psz
,buf
); 
1218     size_t WC2MB(char *buf
, const wchar_t *psz
, size_t WXUNUSED(n
)) const 
1220         const size_t inbuf 
= wxWcslen(psz
); 
1222             w2m
.Convert(psz
,buf
); 
1227     bool IsOk() const { return m_ok
; } 
1230     wxFontEncoding m_enc
; 
1231     wxEncodingConverter m2w
, w2m
; 
1233     // were we initialized successfully? 
1236     DECLARE_NO_COPY_CLASS(wxMBConv_wxwin
) 
1239 #endif // wxUSE_FONTMAP 
1241 // ============================================================================ 
1242 // wxCSConv implementation 
1243 // ============================================================================ 
1245 void wxCSConv::Init() 
1252 wxCSConv::wxCSConv(const wxChar 
*charset
) 
1261     m_encoding 
= wxFONTENCODING_SYSTEM
; 
1264 wxCSConv::wxCSConv(wxFontEncoding encoding
) 
1266     if ( encoding 
== wxFONTENCODING_MAX 
|| encoding 
== wxFONTENCODING_DEFAULT 
) 
1268         wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") ); 
1270         encoding 
= wxFONTENCODING_SYSTEM
; 
1275     m_encoding 
= encoding
; 
1278 wxCSConv::~wxCSConv() 
1283 wxCSConv::wxCSConv(const wxCSConv
& conv
) 
1288     SetName(conv
.m_name
); 
1289     m_encoding 
= conv
.m_encoding
; 
1292 wxCSConv
& wxCSConv::operator=(const wxCSConv
& conv
) 
1296     SetName(conv
.m_name
); 
1297     m_encoding 
= conv
.m_encoding
; 
1302 void wxCSConv::Clear() 
1311 void wxCSConv::SetName(const wxChar 
*charset
) 
1315         m_name 
= wxStrdup(charset
); 
1320 static inline bool DoesntNeedConv(wxFontEncoding enc
) 
1322     return enc 
== wxFONTENCODING_DEFAULT 
|| 
1323             enc 
== wxFONTENCODING_SYSTEM 
|| 
1324              enc 
== wxFONTENCODING_ISO8859_1
; 
1327 wxMBConv 
*wxCSConv::DoCreate() const 
1330     wxFontMapper 
* const fontMapper 
= wxFontMapper::Get(); 
1332     wxFontEncoding encFromName 
= m_name 
? fontMapper
->CharsetToEncoding(m_name
) 
1333                                         : wxFONTENCODING_SYSTEM
; 
1334 #endif // wxUSE_FONTMAP 
1336     // check for the special case of ASCII charset 
1337     if ( (!m_name 
&& DoesntNeedConv(m_encoding
)) 
1339             || (m_name 
&& DoesntNeedConv(encFromName
)) 
1340 #endif // wxUSE_FONTMAP 
1343         // don't convert at all 
1347     // we trust OS to do conversion better than we can so try external 
1348     // conversion methods first 
1350     // the full order is: 
1351     //      1. OS conversion (iconv() under Unix or Win32 API) 
1352     //      2. hard coded conversions for UTF 
1353     //      3. wxEncodingConverter as fall back 
1359         wxMBConv_iconv 
*conv 
= new wxMBConv_iconv(m_name
); 
1365 #endif // HAVE_ICONV 
1367 #ifdef wxHAVE_WIN32_MB2WC 
1369         wxMBConv_win32 
*conv 
= m_name 
? new wxMBConv_win32(m_name
) 
1370                                       : new wxMBConv_win32(m_encoding
); 
1376 #endif // wxHAVE_WIN32_MB2WC 
1379     wxFontEncoding enc 
= m_encoding
; 
1381     if ( enc 
== wxFONTENCODING_SYSTEM 
) 
1383 #endif // wxUSE_FONTMAP 
1387         case wxFONTENCODING_UTF7
: 
1388              return new wxMBConvUTF7
; 
1390         case wxFONTENCODING_UTF8
: 
1391              return new wxMBConvUTF8
; 
1393         case wxFONTENCODING_UTF16BE
: 
1394              return new wxMBConvUTF16BE
; 
1396         case wxFONTENCODING_UTF16LE
: 
1397              return new wxMBConvUTF16LE
; 
1399         case wxFONTENCODING_UTF32BE
: 
1400              return new wxMBConvUTF32BE
; 
1402         case wxFONTENCODING_UTF32LE
: 
1403              return new wxMBConvUTF32LE
; 
1406              // nothing to do but put here to suppress gcc warnings 
1413         wxMBConv_wxwin 
*conv 
= m_name 
? new wxMBConv_wxwin(m_name
) 
1414                                       : new wxMBConv_wxwin(m_encoding
); 
1420 #endif // wxUSE_FONTMAP 
1422     wxLogError(_("Cannot convert from the charset '%s'!"), 
1426                          wxFontMapper::GetEncodingDescription(m_encoding
).c_str() 
1427 #else // !wxUSE_FONTMAP 
1428                          wxString::Format(_("encoding %s"), m_encoding
).c_str() 
1429 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP 
1435 void wxCSConv::CreateConvIfNeeded() const 
1439         wxCSConv 
*self 
= (wxCSConv 
*)this; // const_cast 
1442         // if we don't have neither the name nor the encoding, use the default 
1443         // encoding for this system 
1444         if ( !m_name 
&& m_encoding 
== wxFONTENCODING_SYSTEM 
) 
1446             self
->m_encoding 
= wxLocale::GetSystemEncoding(); 
1448 #endif // wxUSE_INTL 
1450         self
->m_convReal 
= DoCreate(); 
1451         self
->m_deferred 
= false; 
1455 size_t wxCSConv::MB2WC(wchar_t *buf
, const char *psz
, size_t n
) const 
1457     CreateConvIfNeeded(); 
1460         return m_convReal
->MB2WC(buf
, psz
, n
); 
1463     size_t len 
= strlen(psz
); 
1467         for (size_t c 
= 0; c 
<= len
; c
++) 
1468             buf
[c
] = (unsigned char)(psz
[c
]); 
1474 size_t wxCSConv::WC2MB(char *buf
, const wchar_t *psz
, size_t n
) const 
1476     CreateConvIfNeeded(); 
1479         return m_convReal
->WC2MB(buf
, psz
, n
); 
1482     const size_t len 
= wxWcslen(psz
); 
1485         for (size_t c 
= 0; c 
<= len
; c
++) 
1486             buf
[c
] = (psz
[c
] > 0xff) ? '?' : psz
[c
]; 
1492 // ---------------------------------------------------------------------------- 
1494 // ---------------------------------------------------------------------------- 
1497     static wxMBConv_win32 wxConvLibcObj
; 
1499     static wxMBConvLibc wxConvLibcObj
; 
1502 static wxCSConv 
wxConvLocalObj(wxFONTENCODING_SYSTEM
); 
1503 static wxCSConv 
wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1
); 
1504 static wxMBConvUTF7 wxConvUTF7Obj
; 
1505 static wxMBConvUTF8 wxConvUTF8Obj
; 
1508 WXDLLIMPEXP_DATA_BASE(wxMBConv
&) wxConvLibc 
= wxConvLibcObj
; 
1509 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvLocal 
= wxConvLocalObj
; 
1510 WXDLLIMPEXP_DATA_BASE(wxCSConv
&) wxConvISO8859_1 
= wxConvISO8859_1Obj
; 
1511 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7
&) wxConvUTF7 
= wxConvUTF7Obj
; 
1512 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8
&) wxConvUTF8 
= wxConvUTF8Obj
; 
1513 WXDLLIMPEXP_DATA_BASE(wxMBConv 
*) wxConvCurrent 
= &wxConvLibcObj
; 
1515 #else // !wxUSE_WCHAR_T 
1517 // stand-ins in absence of wchar_t 
1518 WXDLLIMPEXP_DATA_BASE(wxMBConv
) wxConvLibc
, 
1523 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T